summaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorKOBAYASHI Shuji <[email protected]>2020-11-10 14:02:55 +0900
committerKOBAYASHI Shuji <[email protected]>2020-11-10 15:21:49 +0900
commitf2d8db39be487fcd710c5c5844ae47d3a6920e70 (patch)
tree229bb98ce32aab978047b37961e6189c75820962 /src
parent8455344bf3fd92ca00f598accfe6b3eb29ddcb0c (diff)
downloadmruby-f2d8db39be487fcd710c5c5844ae47d3a6920e70.tar.gz
mruby-f2d8db39be487fcd710c5c5844ae47d3a6920e70.zip
Reduce memory usage of Hash object
## Implementation Summary * Change entry list from segmented list to flat array. * Change value of hash bucket from pointer to entry to index of entry list, and represent it by variable length bits according to capacity of hash buckets. * Store management information about entry list and hash table to `struct RHash` as much as possible. ## Benchmark Summary Only the results of typical situations on 64-bit Word-boxing are present here. For more detailed information, including consideration, see below (although most of the body is written in Japanese). * https://shuujii.github.io/mruby-hash-benchmark ### Memory Usage Lower value is better. | Hash Size | Baseline | New | Factor | |----------:|--------------:|--------------:|-----------:| | 16 | 344B | 256B | 0.74419x | | 40 | 1,464B | 840B | 0.57377x | | 200 | 8,056B | 3,784B | 0.46971x | | 500 | 17,169B | 9,944B | 0.57949x | ### Performance Higher value is better. #### `mrb_hash_set` | Hash Size | Baseline | New | Factor | |----------:|--------------:|--------------:|-----------:| | 16 | 1.41847M i/s | 1.36004M i/s | 0.95881x | | 40 | 0.39224M i/s | 0.31888M i/s | 0.81296x | | 200 | 0.03780M i/s | 0.04290M i/s | 1.13494x | | 500 | 0.01225M i/s | 0.01314M i/s | 1.07275x | #### `mrb_hash_get` | Hash Size | Baseline | New | Factor | |----------:|--------------:|--------------:|-----------:| | 16 | 26.05920M i/s | 30.19543M i/s | 1.15872x | | 40 | 44.26420M i/s | 32.75781M i/s | 0.74005x | | 200 | 44.55171M i/s | 31.56926M i/s | 0.70860x | | 500 | 39.19250M i/s | 29.73806M i/s | 0.75877x | #### `mrb_hash_each` | Hash Size | Baseline | New | Factor | |----------:|--------------:|--------------:|-----------:| | 16 | 25.11964M i/s | 30.34167M i/s | 1.20789x | | 40 | 11.74253M i/s | 13.25539M i/s | 1.12884x | | 200 | 2.01133M i/s | 2.97214M i/s | 1.47770x | | 500 | 0.87411M i/s | 1.21178M i/s | 1.38631x | #### `Hash#[]=` | Hash Size | Baseline | New | Factor | |----------:|--------------:|--------------:|-----------:| | 16 | 0.50095M i/s | 0.56490M i/s | 1.12764x | | 40 | 0.19132M i/s | 0.18392M i/s | 0.96129x | | 200 | 0.03624M i/s | 0.03256M i/s | 0.89860x | | 500 | 0.01527M i/s | 0.01236M i/s | 0.80935x | #### `Hash#[]` | Hash Size | Baseline | New | Factor | |----------:|--------------:|--------------:|-----------:| | 16 | 11.53211M i/s | 12.78806M i/s | 1.10891x | | 40 | 15.26920M i/s | 13.37529M i/s | 0.87596x | | 200 | 15.28550M i/s | 13.36410M i/s | 0.87430x | | 500 | 14.57695M i/s | 12.75388M i/s | 0.87494x | #### `Hash#each` | Hash Size | Baseline | New | Factor | |----------:|--------------:|--------------:|-----------:| | 16 | 0.30462M i/s | 0.27080M i/s | 0.88898x | | 40 | 0.12912M i/s | 0.11704M i/s | 0.90642x | | 200 | 0.02638M i/s | 0.02402M i/s | 0.91071x | | 500 | 0.01066M i/s | 0.00959M i/s | 0.89953x | #### `Hash#delete` | Hash Size | Baseline | New | Factor | |----------:|--------------:|--------------:|-----------:| | 16 | 7.84167M i/s | 6.96419M i/s | 0.88810x | | 40 | 6.91292M i/s | 7.41427M i/s | 1.07252x | | 200 | 3.75952M i/s | 7.32080M i/s | 1.94727x | | 500 | 2.10754M i/s | 7.05963M i/s | 3.34970x | #### `Hash#shift` | Hash Size | Baseline | New | Factor | |----------:|--------------:|--------------:|-----------:| | 16 | 14.66444M i/s | 13.18876M i/s | 0.89937x | | 40 | 11.95124M i/s | 11.10420M i/s | 0.92913x | | 200 | 5.53681M i/s | 7.88155M i/s | 1.42348x | | 500 | 2.96728M i/s | 5.40405M i/s | 1.82121x | #### `Hash#dup` | Hash Size | Baseline | New | Factor | |----------:|--------------:|--------------:|-----------:| | 16 | 0.15063M i/s | 5.37889M i/s | 35.71024x | | 40 | 0.06515M i/s | 3.38196M i/s | 51.91279x | | 200 | 0.01359M i/s | 1.46538M i/s | 107.84056x | | 500 | 0.00559M i/s | 0.75411M i/s | 134.88057x | ### Binary Size Lower value is better. | File | Baseline | New | Factor | |:-----------|--------------:|--------------:|----------:| | mruby | 730,408B | 734,176B | 1.00519x | | libmruby.a | 1,068,134B | 1,072,846B | 1.00441x | ## Other Fixes The following issues have also been fixed in the parts where there was some change this time. * [Heap use-after-free in `Hash#value?`](https://gist.github.com/shuujii/30e4fcd5844a4112a0ecd4a5b3483101#file-heap-use-after-free-in-hash-value-md) * [Heap use-after-free in `ht_hash_equal`](https://gist.github.com/shuujii/30e4fcd5844a4112a0ecd4a5b3483101#file-heap-use-after-free-in-ht_hash_equal-md) * [Heap use-after-free in `ht_hash_func`](https://gist.github.com/shuujii/30e4fcd5844a4112a0ecd4a5b3483101#file-heap-use-after-free-in-ht_hash_func-md) * [Heap use-after-free in `mrb_hash_merge`](https://gist.github.com/shuujii/30e4fcd5844a4112a0ecd4a5b3483101#file-heap-use-after-free-in-mrb_hash_merge-md) * [Self-replacement does not work for `Hash#replace`](https://gist.github.com/shuujii/30e4fcd5844a4112a0ecd4a5b3483101#file-self-replacement-does-not-work-for-hash-replace-md) * [Repeated deletes and inserts increase memory usage of `Hash`](https://gist.github.com/shuujii/30e4fcd5844a4112a0ecd4a5b3483101#file-repeated-deletes-and-inserts-increase-memory-usage-of-hash-md) * [`Hash#rehash` does not reindex completely](https://gist.github.com/shuujii/30e4fcd5844a4112a0ecd4a5b3483101#file-hash-rehash-does-not-reindex-completely-md) * `mrb_hash_delete_key` does not cause an error for frozen object * `mrb_hash_new_capa` does not allocate required space first * [`mrb_os_memsize_of_hash_table` result is incorrect](https://github.com/mruby/mruby/pull/5032#discussion_r457994075)
Diffstat (limited to 'src')
-rw-r--r--src/hash.c1695
1 files changed, 1007 insertions, 688 deletions
diff --git a/src/hash.c b/src/hash.c
index d67fa6254..f797f8823 100644
--- a/src/hash.c
+++ b/src/hash.c
@@ -4,6 +4,7 @@
** See Copyright Notice in mruby.h
*/
+#include <string.h>
#include <mruby.h>
#include <mruby/array.h>
#include <mruby/class.h>
@@ -11,56 +12,285 @@
#include <mruby/string.h>
#include <mruby/variable.h>
-#ifndef MRB_NO_FLOAT
-/* a function to get hash value of a float number */
-mrb_int mrb_float_id(mrb_float f);
+/*
+ * === Glossary
+ *
+ * [EA]
+ * Entry Array. Store `Hash' entries in insertion order.
+ *
+ * [AR]
+ * Array Table Implementation. The structure of `Hash` that doesn't have a
+ * hash table and linearly searches EA. It is used when `Hash` size <= 16.
+ *
+ * [IB]
+ * Index Buckets. The buckets of hash table, where the bucket value is EA
+ * index. The index is represented by variable length bits according to
+ * the capacity.
+ *
+ * [HT]
+ * Hash Table Implementation. The structure of `Hash` that has IB and is
+ * searched by hash table algorithm. It is used when `Hash` > 16. Collision
+ * resolution strategy is open addressing method.
+ *
+ * [size]
+ * The number of `Hash` entries (value of `Hash#size`).
+ *
+ * [slot]
+ * The generic term for EA or IB elements.
+ *
+ * [active]
+ * The state in which a slot is recognized as a `Hash` entry.
+ *
+ * [deleted]
+ * The state in which a slot is marked as deleted.
+ *
+ * [used]
+ * The state in which a slot is active or deleted.
+ *
+ * [empty]
+ * The state in which a slot is not used. Capacity is equal to the sum of
+ * the number of used slots and the number of empty slots.
+ */
+
+#define EA_INCREASE_RATIO 6 / 5 + 6
+#define EA_MAX_INCREASE UINT16_MAX
+#define EA_MAX_CAPA /* `- 2` means reserved indices (empty and deleted) */ \
+ U32(lesser(IB_MAX_CAPA - 2, MRB_INT_MAX))
+#define IB_MAX_CAPA (U32(1) << IB_MAX_BIT)
+#define IB_TYPE_BIT 32
+#define IB_INIT_BIT ( \
+ ib_upper_bound_for(32) <= AR_MAX_SIZE ? 6 : \
+ ib_upper_bound_for(16) <= AR_MAX_SIZE ? 5 : \
+ 4 \
+)
+#define IB_MAX_BIT (IB_TYPE_BIT - 1)
+#define AR_DEFAULT_CAPA 4
+#define AR_MAX_SIZE 16
+#define H_MAX_SIZE EA_MAX_CAPA
+
+mrb_static_assert1(offsetof(struct RHash, iv) == offsetof(struct RObject, iv));
+mrb_static_assert1(AR_MAX_SIZE < (1 << MRB_HASH_AR_EA_CAPA_BIT));
+
+typedef struct hash_entry {
+ mrb_value key;
+ mrb_value val;
+} hash_entry;
+
+typedef struct hash_table {
+ hash_entry *ea;
+#ifdef MRB_32BIT
+ uint32_t ea_capa;
+ uint32_t ea_n_used;
#endif
+ uint32_t ib[];
+} hash_table;
+
+typedef struct index_buckets_iter {
+ struct RHash *h;
+ uint32_t bit;
+ uint32_t mask;
+ uint32_t pos;
+ uint32_t ary_index;
+ uint32_t ea_index;
+ uint32_t shift1;
+ uint32_t shift2;
+ uint32_t step;
+} index_buckets_iter;
+
+/*
+ * `c_` :: receiver class (category)
+ * `n_` :: attribute name
+ * `t_` :: attribute type
+ * `p_` :: struct member path
+ * `k_` :: macro key
+ */
+#define DEFINE_GETTER(c_, n_, t_, p_) \
+ MRB_INLINE t_ c_##_##n_(const struct RHash *h) {return h->p_;}
+#define DEFINE_SETTER(c_, n_, t_, p_) \
+ MRB_INLINE void c_##_set_##n_(struct RHash *h, t_ v) {h->p_ = v;}
+#define DEFINE_ACCESSOR(c_, n_, t_, p_) \
+ DEFINE_GETTER(c_, n_, t_, p_) \
+ DEFINE_SETTER(c_, n_, t_, p_)
+#define DEFINE_FLAG_GETTER(c_, n_, t_, k_) \
+ MRB_INLINE t_ c_##_##n_(const struct RHash *h) { \
+ return (t_)((h->flags & MRB_HASH_##k_##_MASK) >> MRB_HASH_##k_##_SHIFT); \
+ }
+#define DEFINE_FLAG_SETTER(c_, n_, t_, k_) \
+ MRB_INLINE void c_##_set_##n_(struct RHash *h, t_ v) { \
+ h->flags &= ~MRB_HASH_##k_##_MASK; \
+ h->flags |= v << MRB_HASH_##k_##_SHIFT; \
+ }
+#define DEFINE_FLAG_ACCESSOR(c_, n_, t_, k_) \
+ DEFINE_FLAG_GETTER(c_, n_, t_, k_) \
+ DEFINE_FLAG_SETTER(c_, n_, t_, k_)
+#define DEFINE_INCREMENTER(c_, n_) \
+ MRB_INLINE void c_##_inc_##n_(struct RHash *h) { \
+ c_##_set_##n_(h, c_##_##n_(h) + 1); \
+ }
+#define DEFINE_DECREMENTER(c_, n_) \
+ MRB_INLINE void c_##_dec_##n_(struct RHash *h) { \
+ c_##_set_##n_(h, c_##_##n_(h) - 1); \
+ }
+#define DEFINE_SWITCHER(n_, k_) \
+ MRB_INLINE void h_##n_##_on(struct RHash *h) { \
+ h->flags |= MRB_HASH_##k_; \
+ } \
+ MRB_INLINE void h_##n_##_off(struct RHash *h) { \
+ h->flags &= ~MRB_HASH_##k_; \
+ } \
+ MRB_INLINE mrb_bool h_##n_##_p(const struct RHash *h) { \
+ return (h->flags & MRB_HASH_##k_) == MRB_HASH_##k_; \
+ }
-#ifndef MRB_HT_INIT_SIZE
-#define MRB_HT_INIT_SIZE 4
+#ifdef MRB_64BIT
+DEFINE_ACCESSOR(ar, ea_capa, uint32_t, ea_capa)
+DEFINE_ACCESSOR(ar, ea_n_used, uint32_t, ea_n_used)
+DEFINE_ACCESSOR(ht, ea_capa, uint32_t, ea_capa)
+DEFINE_ACCESSOR(ht, ea_n_used, uint32_t, ea_n_used)
+#else
+DEFINE_FLAG_ACCESSOR(ar, ea_capa, uint32_t, AR_EA_CAPA)
+DEFINE_FLAG_ACCESSOR(ar, ea_n_used, uint32_t, AR_EA_N_USED)
+DEFINE_ACCESSOR(ht, ea_capa, uint32_t, ht->ea_capa)
+DEFINE_ACCESSOR(ht, ea_n_used, uint32_t, ht->ea_n_used)
#endif
-#define HT_SEG_INCREASE_RATIO 6 / 5
+DEFINE_FLAG_ACCESSOR(ib, bit, uint32_t, IB_BIT)
+DEFINE_ACCESSOR(ar, size, uint32_t, size)
+DEFINE_ACCESSOR(ar, ea, hash_entry*, ea)
+DEFINE_DECREMENTER(ar, size)
+DEFINE_ACCESSOR(ht, size, uint32_t, size)
+DEFINE_ACCESSOR(ht, ea, hash_entry*, ht->ea)
+DEFINE_GETTER(ht, ib, uint32_t*, ht->ib)
+DEFINE_INCREMENTER(ht, size)
+DEFINE_DECREMENTER(ht, size)
+DEFINE_GETTER(h, size, uint32_t, size)
+DEFINE_ACCESSOR(h, ht, hash_table*, ht)
+DEFINE_SWITCHER(ht, HT)
+
+#define ea_each_used(ea, n_used, entry_var, code) do { \
+ hash_entry *entry_var = ea, *ea_end__ = entry_var + (n_used); \
+ for (; entry_var < ea_end__; ++entry_var) { \
+ code; \
+ } \
+} while (0)
-struct segkv {
- mrb_value key;
- mrb_value val;
-};
-
-typedef struct segment {
- uint16_t size;
- struct segment *next;
- struct segkv e[];
-} segment;
-
-typedef struct segindex {
- size_t size;
- size_t capa;
- struct segkv *table[];
-} segindex;
-
-/* hash table structure */
-typedef struct htable {
- segment *rootseg;
- segment *lastseg;
- mrb_int size;
- uint16_t last_len;
- segindex *index;
-} htable;
-
-static /* inline */ size_t
-ht_hash_func(mrb_state *mrb, htable *t, mrb_value key)
+#define ea_each(ea, size, entry_var, code) do { \
+ hash_entry *entry_var = ea; \
+ uint32_t size__ = size; \
+ for (; 0 < size__; ++entry_var) { \
+ if (entry_deleted_p(entry_var)) continue; \
+ --size__; \
+ code; \
+ } \
+} while (0)
+
+#define ib_cycle_by_key(mrb, h, key, it_var, code) do { \
+ index_buckets_iter it_var[1]; \
+ ib_it_init(mrb, it_var, h, key); \
+ for (;;) { \
+ ib_it_next(it_var); \
+ code; \
+ } \
+} while (0)
+
+#define ib_find_by_key(mrb, h_, key_, it_var, code) do { \
+ mrb_value ib_fbk_key__ = key_; \
+ ib_cycle_by_key(mrb, h_, ib_fbk_key__, it_var, { \
+ if (ib_it_empty_p(it_var)) break; \
+ if (ib_it_deleted_p(it_var)) continue; \
+ if (obj_eql(mrb, ib_fbk_key__, ib_it_entry(it_var)->key, it_var->h)) { \
+ code; \
+ break; \
+ } \
+ }); \
+} while (0)
+
+#define h_each(h, entry_var, code) do { \
+ struct RHash *h__ = h; \
+ hash_entry *h_e_ea__; \
+ uint32_t h_e_size__; \
+ h_ar_p(h) ? (h_e_ea__ = ar_ea(h__), h_e_size__ = ar_size(h__)) : \
+ (h_e_ea__ = ht_ea(h__), h_e_size__ = ht_size(h__)); \
+ ea_each(h_e_ea__, h_e_size__, entry_var, code); \
+} while (0)
+
+/*
+ * `h_check_modified` raises an exception when a dangerous modification is
+ * made to `h` by executing `code`.
+ *
+ * This macro is not called if `h->ht` (`h->ea`) is `NULL` (`Hash` size is
+ * zero). And because the `hash_entry` is rather large, `h->ht->ea` and
+ * `h->ht->ea_capa` are able to be safely accessed even in AR. This nature
+ * is used to eliminate branch of AR or HT.
+ *
+ * `HT_ASSERT_SAFE_READ` checks if members can be accessed according to its
+ * assumptions.
+ */
+#define HT_ASSERT_SAFE_READ(attr_name) \
+ mrb_static_assert1( \
+ offsetof(hash_table, attr_name) + sizeof(((hash_table*)0)->attr_name) <= \
+ sizeof(hash_entry))
+HT_ASSERT_SAFE_READ(ea);
+#ifdef MRB_32BIT
+HT_ASSERT_SAFE_READ(ea_capa);
+#endif
+#undef HT_ASSERT_SAFE_READ
+#define h_check_modified(mrb, h, code) do { \
+ struct RHash *h__ = h; \
+ uint32_t mask = MRB_HASH_HT|MRB_HASH_IB_BIT_MASK|MRB_HASH_AR_EA_CAPA_MASK; \
+ uint32_t flags = h__->flags & mask; \
+ void* tbl__ = (mrb_assert(h__->ht), h__->ht); \
+ uint32_t ht_ea_capa__ = ht_ea_capa(h__); \
+ hash_entry *ht_ea__ = ht_ea(h__); \
+ code; \
+ if (flags != (h__->flags & mask) || \
+ tbl__ != h__->ht || \
+ ht_ea_capa__ != ht_ea_capa(h__) || \
+ ht_ea__ != ht_ea(h__)) { \
+ mrb_raise(mrb, E_RUNTIME_ERROR, "hash modified"); \
+ } \
+} while (0)
+
+#define U32(v) ((uint32_t)(v))
+#define U64(v) ((uint64_t)(v))
+#define h_ar_p(h) (!h_ht_p(h))
+#define h_ar_on(h) h_ht_off(h)
+#define lesser(a, b) ((a) < (b) ? (a) : (b))
+
+static uint32_t ib_upper_bound_for(uint32_t capa);
+static uint32_t ib_bit_to_capa(uint32_t bit);
+static void ht_init(
+ mrb_state *mrb, struct RHash *h, uint32_t size,
+ hash_entry *ea, uint32_t ea_capa, hash_table *ht, uint32_t ib_bit);
+static void ht_set_without_ib_adjustment(
+ mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value val);
+
+static uint32_t
+next_power2(uint32_t v)
{
- enum mrb_vtype tt = mrb_type(key);
- mrb_value hv;
- size_t h;
- segindex *index = t->index;
- size_t capa = index ? index->capa : 0;
+ mrb_assert(v != 0);
+#ifdef __GNUC__
+ return U32(1) << ((sizeof(unsigned) * CHAR_BIT) - __builtin_clz(v));
+#else
+ v |= v >> 1;
+ v |= v >> 2;
+ v |= v >> 4;
+ v |= v >> 8;
+ v |= v >> 16;
+ ++v;
+ return v;
+#endif
+}
+static uint32_t
+obj_hash_code(mrb_state *mrb, mrb_value key, struct RHash *h)
+{
+ enum mrb_vtype tt = mrb_type(key);
+ uint32_t hash_code;
+ mrb_value hash_code_obj;
switch (tt) {
case MRB_TT_STRING:
- h = mrb_str_hash(mrb, key);
+ hash_code = mrb_str_hash(mrb, key);
break;
-
case MRB_TT_TRUE:
case MRB_TT_FALSE:
case MRB_TT_SYMBOL:
@@ -68,24 +298,24 @@ ht_hash_func(mrb_state *mrb, htable *t, mrb_value key)
#ifndef MRB_NO_FLOAT
case MRB_TT_FLOAT:
#endif
- h = (size_t)mrb_obj_id(key);
+ hash_code = U32(mrb_obj_id(key));
break;
-
default:
- hv = mrb_funcall_id(mrb, key, MRB_SYM(hash), 0);
- h = (size_t)tt ^ (size_t)mrb_integer(hv);
+ h_check_modified(mrb, h, {
+ hash_code_obj = mrb_funcall_argv(mrb, key, MRB_SYM(hash), 0, NULL);
+ });
+
+ hash_code = U32(tt) ^ U32(mrb_integer(hash_code_obj));
break;
}
- if (index && (index != t->index || capa != index->capa)) {
- mrb_raise(mrb, E_RUNTIME_ERROR, "hash modified");
- }
- return ((h)^((h)<<2)^((h)>>2));
+ return hash_code ^ (hash_code << 2) ^ (hash_code >> 2);
}
-static inline mrb_bool
-ht_hash_equal(mrb_state *mrb, htable *t, mrb_value a, mrb_value b)
+static mrb_bool
+obj_eql(mrb_state *mrb, mrb_value a, mrb_value b, struct RHash *h)
{
enum mrb_vtype tt = mrb_type(a);
+ mrb_bool eql;
switch (tt) {
case MRB_TT_STRING:
@@ -106,603 +336,821 @@ ht_hash_equal(mrb_state *mrb, htable *t, mrb_value a, mrb_value b)
#endif
default:
- {
- segindex *index = t->index;
- size_t capa = index ? index->capa : 0;
- mrb_bool eql = mrb_eql(mrb, a, b);
- if (index && (index != t->index || capa != index->capa)) {
- mrb_raise(mrb, E_RUNTIME_ERROR, "hash modified");
- }
- return eql;
- }
- }
+ h_check_modified(mrb, h, {eql = mrb_eql(mrb, a, b);});
+ return eql;
+ }
}
-/* Creates the hash table. */
-static htable*
-ht_new(mrb_state *mrb)
+static mrb_bool
+entry_deleted_p(const hash_entry* entry)
{
- htable *t;
+ return mrb_undef_p(entry->key);
+}
- t = (htable*)mrb_malloc(mrb, sizeof(htable));
- t->size = 0;
- t->rootseg = NULL;
- t->lastseg = NULL;
- t->last_len = 0;
- t->index = NULL;
+static void
+entry_delete(hash_entry* entry)
+{
+ entry->key = mrb_undef_value();
+}
- return t;
+static uint32_t
+ea_next_capa_for(uint32_t size, uint32_t max_capa)
+{
+ if (size < AR_DEFAULT_CAPA) {
+ return AR_DEFAULT_CAPA;
+ }
+ else {
+ uint64_t capa = U64(size) * EA_INCREASE_RATIO, inc = capa - size;
+ if (EA_MAX_INCREASE < inc) capa = size + EA_MAX_INCREASE;
+ return capa <= max_capa ? U32(capa) : max_capa;
+ }
}
-#define power2(v) do { \
- v--;\
- v |= v >> 1;\
- v |= v >> 2;\
- v |= v >> 4;\
- v |= v >> 8;\
- v |= v >> 16;\
- v++;\
-} while (0)
+static hash_entry*
+ea_resize(mrb_state *mrb, hash_entry *ea, uint32_t capa)
+{
+ return (hash_entry*)mrb_realloc(mrb, ea, sizeof(hash_entry) * capa);
+}
-#ifndef UPPER_BOUND
-#define UPPER_BOUND(x) ((x)>>2|(x)>>1)
-#endif
+static void
+ea_compress(hash_entry *ea, uint32_t n_used)
+{
+ hash_entry *w_entry = ea;
+ ea_each_used(ea, n_used, r_entry, {
+ if (entry_deleted_p(r_entry)) continue;
+ if (r_entry != w_entry) *w_entry = *r_entry;
+ ++w_entry;
+ });
+}
+
+/*
+ * Increase or decrease capacity of `ea` to a standard size that can
+ * accommodate `*capap + 1` entries (but, not exceed `max_capa`). Set the
+ * changed capacity to `*capap` and return a pointer to `mrb_realloc`ed EA.
+ */
+static hash_entry*
+ea_adjust(mrb_state *mrb, hash_entry *ea, uint32_t *capap, uint32_t max_capa)
+{
+ *capap = ea_next_capa_for(*capap, max_capa);
+ return ea_resize(mrb, ea, *capap);
+}
+
+static hash_entry*
+ea_dup(mrb_state *mrb, const hash_entry *ea, uint32_t capa)
+{
+ size_t byte_size = sizeof(hash_entry) * capa;
+ hash_entry *new_ea = (hash_entry*)mrb_malloc(mrb, byte_size);
+ return (hash_entry*)memcpy(new_ea, ea, byte_size);
+}
-#define HT_MASK(index) ((index->capa)-1)
+static hash_entry*
+ea_get_by_key(mrb_state *mrb, hash_entry *ea, uint32_t size, mrb_value key,
+ struct RHash *h)
+{
+ ea_each(ea, size, entry, {
+ if (obj_eql(mrb, key, entry->key, h)) return entry;
+ });
+ return NULL;
+}
+
+static hash_entry*
+ea_get(hash_entry *ea, uint32_t index)
+{
+ return &ea[index];
+}
-/* Build index for the hash table */
static void
-ht_index(mrb_state *mrb, htable *t)
+ea_set(hash_entry *ea, uint32_t index, mrb_value key, mrb_value val)
{
- size_t size = (size_t)t->size;
- size_t mask;
- segindex *index = t->index;
- segment *seg;
- size_t i;
+ ea[index].key = key;
+ ea[index].val = val;
+}
- if (size == 0) {
- t->index = NULL;
- mrb_free(mrb, index);
- return;
- }
- /* allocate index table */
- if (index && index->size >= UPPER_BOUND(index->capa)) {
- size = index->capa+1;
- }
- power2(size);
- if (!index || index->capa < size) {
- index = (segindex*)mrb_realloc_simple(mrb, index, sizeof(segindex)+sizeof(struct segkv*)*size);
- if (index == NULL) {
- mrb_free(mrb, t->index);
- t->index = NULL;
- return;
- }
- t->index = index;
- }
- index->size = t->size;
- index->capa = size;
- for (i=0; i<size; i++) {
- index->table[i] = NULL;
- }
+static void
+ar_init(struct RHash *h, uint32_t size,
+ hash_entry *ea, uint32_t ea_capa, uint32_t ea_n_used)
+{
+ h_ar_on(h);
+ ar_set_size(h, size);
+ ar_set_ea(h, ea);
+ ar_set_ea_capa(h, ea_capa);
+ ar_set_ea_n_used(h, ea_n_used);
+}
- /* rebuild index */
- mask = HT_MASK(index);
- seg = t->rootseg;
- while (seg) {
- for (i=0; i<seg->size; i++) {
- mrb_value key;
- size_t k, step = 0;
+static void
+ar_free(mrb_state *mrb, struct RHash *h)
+{
+ mrb_free(mrb, ar_ea(h));
+}
- if (!seg->next && i >= (size_t)t->last_len) {
- return;
- }
- key = seg->e[i].key;
- if (mrb_undef_p(key)) continue;
- k = ht_hash_func(mrb, t, key) & mask;
- while (index->table[k]) {
- k = (k+(++step)) & mask;
- }
- index->table[k] = &seg->e[i];
- }
- seg = seg->next;
- }
+static void
+ar_adjust_ea(mrb_state *mrb, struct RHash *h, uint32_t size, uint32_t max_ea_capa)
+{
+ uint32_t ea_capa = size;
+ hash_entry *ea = ea_adjust(mrb, ar_ea(h), &ea_capa, max_ea_capa);
+ ar_set_ea(h, ea);
+ ar_set_ea_capa(h, ea_capa);
}
-/* Compacts the hash table removing deleted entries. */
static void
-ht_compact(mrb_state *mrb, htable *t)
+ar_compress(mrb_state *mrb, struct RHash *h)
{
- segment *seg;
- uint16_t i, i2;
- segment *seg2 = NULL;
- mrb_int size = 0;
+ uint32_t size = ar_size(h);
+ ea_compress(ar_ea(h), ar_ea_n_used(h));
+ ar_set_ea_n_used(h, size);
+ ar_adjust_ea(mrb, h, size, lesser(ar_ea_capa(h), AR_MAX_SIZE));
+}
- if (t == NULL) return;
- seg = t->rootseg;
- if (t->index && (size_t)t->size == t->index->size) {
- ht_index(mrb, t);
- return;
- }
- while (seg) {
- for (i=0; i<seg->size; i++) {
- mrb_value k = seg->e[i].key;
+static mrb_bool
+ar_get(mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value *valp)
+{
+ ea_each(ar_ea(h), ar_size(h), entry, {
+ if (!obj_eql(mrb, key, entry->key, h)) continue;
+ *valp = entry->val;
+ return TRUE;
+ });
+ return FALSE;
+}
- if (!seg->next && i >= t->last_len) {
- goto exit;
- }
- if (mrb_undef_p(k)) { /* found deleted key */
- if (seg2 == NULL) {
- seg2 = seg;
- i2 = i;
+static void
+ar_set(mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value val)
+{
+ uint32_t size = ar_size(h);
+ hash_entry *entry;
+ if ((entry = ea_get_by_key(mrb, ar_ea(h), size, key, h))) {
+ entry->val = val;
+ }
+ else {
+ uint32_t ea_capa = ar_ea_capa(h), ea_n_used = ar_ea_n_used(h);
+ if (ea_capa == ea_n_used) {
+ if (size == ea_n_used) {
+ if (size == AR_MAX_SIZE) {
+ hash_entry *ea = ea_adjust(mrb, ar_ea(h), &ea_capa, EA_MAX_CAPA);
+ ea_set(ea, ea_n_used, key, val);
+ ht_init(mrb, h, ++size, ea, ea_capa, NULL, IB_INIT_BIT);
+ return;
+ }
+ else {
+ ar_adjust_ea(mrb, h, size, AR_MAX_SIZE);
}
}
else {
- size++;
- if (seg2 != NULL) {
- seg2->e[i2++] = seg->e[i];
- if (i2 >= seg2->size) {
- seg2 = seg2->next;
- i2 = 0;
- }
- }
+ ar_compress(mrb, h);
+ ea_n_used = size;
}
}
- seg = seg->next;
+ ea_set(ar_ea(h), ea_n_used, key, val);
+ ar_set_size(h, ++size);
+ ar_set_ea_n_used(h, ++ea_n_used);
}
- exit:
- /* reached at end */
- t->size = size;
- if (seg2) {
- seg = seg2->next;
- seg2->next = NULL;
- t->last_len = i2;
- t->lastseg = seg2;
- while (seg) {
- seg2 = seg->next;
- mrb_free(mrb, seg);
- seg = seg2;
+}
+
+static mrb_bool
+ar_delete(mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value *valp)
+{
+ hash_entry *entry = ea_get_by_key(mrb, ar_ea(h), ar_size(h), key, h);
+ if (!entry) return FALSE;
+ *valp = entry->val;
+ entry_delete(entry);
+ ar_dec_size(h);
+ return TRUE;
+}
+
+static void
+ar_shift(mrb_state *mrb, struct RHash *h, mrb_value *keyp, mrb_value *valp)
+{
+ uint32_t size = ar_size(h);
+ ea_each(ar_ea(h), size, entry, {
+ *keyp = entry->key;
+ *valp = entry->val;
+ entry_delete(entry);
+ ar_set_size(h, --size);
+ return;
+ });
+}
+
+static void
+ar_rehash(mrb_state *mrb, struct RHash *h)
+{
+ /* see comments in `h_rehash` */
+ uint32_t size = ar_size(h), w_size = 0, ea_capa = ar_ea_capa(h);
+ hash_entry *ea = ar_ea(h), *w_entry;
+ ea_each(ea, size, r_entry, {
+ if ((w_entry = ea_get_by_key(mrb, ea, w_size, r_entry->key, h))) {
+ w_entry->val = r_entry->val;
+ ar_set_size(h, --size);
+ entry_delete(r_entry);
}
- }
- if (t->index) {
- ht_index(mrb, t);
- }
+ else {
+ if (w_size != U32(r_entry - ea)) {
+ ea_set(ea, w_size, r_entry->key, r_entry->val);
+ entry_delete(r_entry);
+ }
+ ++w_size;
+ }
+ });
+ mrb_assert(size == w_size);
+ ar_set_ea_n_used(h, size);
+ ar_adjust_ea(mrb, h, size, ea_capa);
}
-static segment*
-segment_alloc(mrb_state *mrb, segment *seg)
+static uint32_t
+ib_it_pos_for(index_buckets_iter *it, uint32_t v)
{
- uint32_t size;
+ return v & it->mask;
+}
- if (!seg) size = MRB_HT_INIT_SIZE;
- else {
- size = seg->size*HT_SEG_INCREASE_RATIO + 1;
- if (size > UINT16_MAX) size = UINT16_MAX;
- }
+static uint32_t
+ib_it_empty_value(const index_buckets_iter *it)
+{
+ return it->mask;
+}
- seg = (segment*)mrb_malloc(mrb, sizeof(segment)+sizeof(struct segkv)*size);
- seg->size = size;
- seg->next = NULL;
+static uint32_t
+ib_it_deleted_value(const index_buckets_iter *it)
+{
+ return it->mask - 1;
+}
- return seg;
+static mrb_bool
+ib_it_empty_p(const index_buckets_iter *it)
+{
+ return it->ea_index == ib_it_empty_value(it);
+}
+
+static mrb_bool
+ib_it_deleted_p(const index_buckets_iter *it)
+{
+ return it->ea_index == ib_it_deleted_value(it);
+}
+
+static mrb_bool
+ib_it_active_p(const index_buckets_iter *it)
+{
+ return it->ea_index < ib_it_deleted_value(it);
}
-/* Set the value for the key in the indexed table. */
static void
-ht_index_put(mrb_state *mrb, htable *t, mrb_value key, mrb_value val)
+ib_it_init(mrb_state *mrb, index_buckets_iter *it, struct RHash *h, mrb_value key)
{
- segindex *index = t->index;
- size_t k, sp, step = 0, mask;
- segment *seg;
+ it->h = h;
+ it->bit = ib_bit(h);
+ it->mask = ib_bit_to_capa(it->bit) - 1;
+ it->pos = ib_it_pos_for(it, obj_hash_code(mrb, key, h));
+ it->step = 0;
+}
- if (index->size >= UPPER_BOUND(index->capa)) {
- /* need to expand table */
- ht_compact(mrb, t);
- index = t->index;
- }
- mask = HT_MASK(index);
- sp = index->capa;
- k = ht_hash_func(mrb, t, key) & mask;
- while (index->table[k]) {
- mrb_value key2 = index->table[k]->key;
- if (mrb_undef_p(key2)) {
- if (sp == index->capa) sp = k;
- }
- else if (ht_hash_equal(mrb, t, key, key2)) {
- index->table[k]->val = val;
- return;
- }
- k = (k+(++step)) & mask;
+static void
+ib_it_next(index_buckets_iter *it)
+{
+ /*
+ * [IB image]
+ *
+ * ary_index(1) --.
+ * \ .-- shift1(3) .-- shift2(29)
+ * pos(6) --. \ / /
+ * View | \ \ <-o-> <----------o---------->
+ * -------- +---------------------\----\--+-----------------------------+-----
+ * array | 0 `--. `-|--- o 1 | ...
+ * +---------+---------+-----+\--+-----+---------+---------+---+-----
+ * buckets | 0 | 1 | ... | o 6 | 7 | 8 | ...
+ * +---------+---------+-----+=========+---------+---------+---------
+ * bit set |1 1 1 0 0|0 0 0 1 1| ... |0 1 0 1 1|0 1 1 1 0|0 1 0 1 0| ...
+ * +---------+---------+-----+========*+---------+---------+---------
+ * <---o---> \
+ * \ `-- bit_pos(34)
+ * `-- bit(5)
+ */
+ uint64_t bit_pos;
+ bit_pos = U64(it->bit) * (it->pos + 1) - 1;
+ it->ary_index = U32(bit_pos / IB_TYPE_BIT);
+ it->shift2 = U32((U64(it->ary_index) + 1) * IB_TYPE_BIT - bit_pos - 1);
+ it->ea_index = (ht_ib(it->h)[it->ary_index] >> it->shift2) & it->mask;
+ if (IB_TYPE_BIT - it->bit < it->shift2) {
+ it->shift1 = IB_TYPE_BIT - it->shift2;
+ it->ea_index |= (ht_ib(it->h)[it->ary_index - 1] << it->shift1) & it->mask;
}
- if (sp < index->capa) {
- k = sp;
+ else {
+ it->shift1 = 0;
}
+ it->pos = ib_it_pos_for(it, it->pos + (++it->step));
+}
- /* put the value at the last */
- seg = t->lastseg;
- if (t->last_len < seg->size) {
- index->table[k] = &seg->e[t->last_len++];
- }
- else { /* append a new segment */
- seg->next = segment_alloc(mrb, seg);
- seg = seg->next;
- seg->next = NULL;
- t->lastseg = seg;
- t->last_len = 1;
- index->table[k] = &seg->e[0];
+static uint32_t
+ib_it_get(const index_buckets_iter *it)
+{
+ return it->ea_index;
+}
+
+static void
+ib_it_set(index_buckets_iter *it, uint32_t ea_index)
+{
+ uint32_t mask, i;
+ it->ea_index = ea_index;
+ if (it->shift1) {
+ i = it->ary_index - 1;
+ mask = it->mask >> it->shift1;
+ ht_ib(it->h)[i] = (ht_ib(it->h)[i] & ~mask) | (ea_index >> it->shift1);
}
- index->table[k]->key = key;
- index->table[k]->val = val;
- index->size++;
- t->size++;
+ i = it->ary_index;
+ mask = it->mask << it->shift2;
+ ht_ib(it->h)[i] = (ht_ib(it->h)[i] & ~mask) | (ea_index << it->shift2);
}
-/* Set the value for the key in the hash table. */
static void
-ht_put(mrb_state *mrb, htable *t, mrb_value key, mrb_value val)
+ib_it_delete(index_buckets_iter *it)
{
- segment *seg;
- mrb_int i, deleted = 0;
+ ib_it_set(it, ib_it_deleted_value(it));
+}
- if (t == NULL) return;
- if (t->index) {
- ht_index_put(mrb, t, key, val);
- return;
- }
- seg = t->rootseg;
- while (seg) {
- for (i=0; i<seg->size; i++) {
- mrb_value k = seg->e[i].key;
- /* Found room in last segment after last_len */
- if (!seg->next && i >= t->last_len) {
- seg->e[i].key = key;
- seg->e[i].val = val;
- t->last_len = (uint16_t)i+1;
- t->size++;
- return;
- }
- if (mrb_undef_p(k)) {
- deleted++;
- continue;
- }
- if (ht_hash_equal(mrb, t, key, k)) {
- seg->e[i].val = val;
- return;
- }
- }
- seg = seg->next;
- }
- /* Not found */
+static hash_entry*
+ib_it_entry(index_buckets_iter *it)
+{
+ return ea_get(ht_ea(it->h), it->ea_index);
+}
- /* Compact if last segment has room */
- if (deleted > 0 && deleted > MRB_HT_INIT_SIZE) {
- ht_compact(mrb, t);
- }
- t->size++;
+static uint32_t
+ib_capa_to_bit(uint32_t capa)
+{
+#ifdef __GNUC__
+ return U32(__builtin_ctz(capa));
+#else
+ /* http://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn */
+ static const uint32_t MultiplyDeBruijnBitPosition2[] = {
+ 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
+ 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
+ };
+ return MultiplyDeBruijnBitPosition2[U32(capa * 0x077CB531U) >> 27];
+#endif
+}
- /* check if thre's room after compaction */
- if (t->lastseg && t->last_len < t->lastseg->size) {
- seg = t->lastseg;
- i = t->last_len;
- }
- else {
- /* append new segment */
- seg = segment_alloc(mrb, t->lastseg);
- i = 0;
- if (t->rootseg == NULL) {
- t->rootseg = seg;
- }
- else {
- t->lastseg->next = seg;
- }
- t->lastseg = seg;
- }
- seg->e[i].key = key;
- seg->e[i].val = val;
- t->last_len = (uint16_t)i+1;
- if (t->index == NULL && t->size > MRB_HT_INIT_SIZE*4) {
- ht_index(mrb, t);
- }
+static uint32_t
+ib_bit_to_capa(uint32_t bit)
+{
+ return U32(1) << bit;
}
-/* Get a value for a key from the indexed table. */
-static mrb_bool
-ht_index_get(mrb_state *mrb, htable *t, mrb_value key, mrb_value *vp)
-{
- segindex *index = t->index;
- size_t mask = HT_MASK(index);
- size_t k = ht_hash_func(mrb, t, key) & mask;
- size_t step = 0;
-
- while (index->table[k]) {
- mrb_value key2 = index->table[k]->key;
- if (!mrb_undef_p(key2) && ht_hash_equal(mrb, t, key, key2)) {
- if (vp) *vp = index->table[k]->val;
- return TRUE;
- }
- k = (k+(++step)) & mask;
- }
- return FALSE;
+static uint32_t
+ib_upper_bound_for(uint32_t capa)
+{
+ return (capa >> 2) | (capa >> 1); /* 3/4 */
}
-/* Get a value for a key from the hash table. */
-static mrb_bool
-ht_get(mrb_state *mrb, htable *t, mrb_value key, mrb_value *vp)
+static uint32_t
+ib_bit_for(uint32_t size)
{
- segment *seg;
- mrb_int i;
+ uint32_t capa = next_power2(size);
+ if (capa != IB_MAX_CAPA && ib_upper_bound_for(capa) < size) capa *= 2;
+ return ib_capa_to_bit(capa);
+}
- if (t == NULL) return FALSE;
- if (t->index) {
- return ht_index_get(mrb, t, key, vp);
- }
+static uint32_t
+ib_byte_size_for(uint32_t ib_bit)
+{
+ uint64_t ary_size = (U64(1) << ib_bit) * ib_bit / IB_TYPE_BIT;
+ return U32(sizeof(uint32_t) * ary_size);
+}
- seg = t->rootseg;
- while (seg) {
- for (i=0; i<seg->size; i++) {
- mrb_value k = seg->e[i].key;
+static void
+ib_init(mrb_state *mrb, struct RHash *h, uint32_t ib_bit, size_t ib_byte_size)
+{
+ hash_entry *ea = ht_ea(h);
+ memset(ht_ib(h), 0xff, ib_byte_size);
+ ib_set_bit(h, ib_bit);
+ ea_each_used(ea, ht_ea_n_used(h), entry, {
+ ib_cycle_by_key(mrb, h, entry->key, it, {
+ if (!ib_it_empty_p(it)) continue;
+ ib_it_set(it, U32(entry - ea));
+ break;
+ });
+ });
+}
- if (!seg->next && i >= t->last_len) {
- return FALSE;
- }
- if (mrb_undef_p(k)) continue;
- if (ht_hash_equal(mrb, t, key, k)) {
- if (vp) *vp = seg->e[i].val;
- return TRUE;
- }
- }
- seg = seg->next;
- }
- return FALSE;
+static void
+ht_init(mrb_state *mrb, struct RHash *h, uint32_t size,
+ hash_entry *ea, uint32_t ea_capa, hash_table *ht, uint32_t ib_bit)
+{
+ size_t ib_byte_size = ib_byte_size_for(ib_bit);
+ size_t ht_byte_size = sizeof(hash_table) + ib_byte_size;
+ h_ht_on(h);
+ h_set_ht(h, (hash_table*)mrb_realloc(mrb, ht, ht_byte_size));
+ ht_set_size(h, size);
+ ht_set_ea(h, ea);
+ ht_set_ea_capa(h, ea_capa);
+ ht_set_ea_n_used(h, size);
+ ib_init(mrb, h, ib_bit, ib_byte_size);
+}
+
+static void
+ht_free(mrb_state *mrb, struct RHash *h)
+{
+ mrb_free(mrb, ht_ea(h));
+ mrb_free(mrb, h_ht(h));
+}
+
+static hash_table*
+ht_dup(mrb_state *mrb, const struct RHash *h)
+{
+ size_t ib_byte_size = ib_byte_size_for(ib_bit(h));
+ size_t ht_byte_size = sizeof(hash_table) + ib_byte_size;
+ hash_table *new_ht = (hash_table*)mrb_malloc(mrb, ht_byte_size);
+ return (hash_table*)memcpy(new_ht, h_ht(h), ht_byte_size);
+}
+
+static void
+ht_adjust_ea(mrb_state *mrb, struct RHash *h, uint32_t size, uint32_t max_ea_capa)
+{
+ uint32_t ea_capa = size;
+ hash_entry *ea = ea_adjust(mrb, ht_ea(h), &ea_capa, max_ea_capa);
+ ht_set_ea(h, ea);
+ ht_set_ea_capa(h, ea_capa);
+}
+
+static void
+ht_to_ar(mrb_state *mrb, struct RHash *h)
+{
+ uint32_t size = ht_size(h), ea_capa = size;
+ hash_entry *ea = ht_ea(h);
+ ea_compress(ea, ht_ea_n_used(h));
+ ea = ea_adjust(mrb, ea, &ea_capa, AR_MAX_SIZE);
+ mrb_free(mrb, h_ht(h));
+ ar_init(h, size, ea, ea_capa, size);
}
-/* Deletes the value for the symbol from the hash table. */
-/* Deletion is done by overwriting keys by `undef`. */
static mrb_bool
-ht_del(mrb_state *mrb, htable *t, mrb_value key, mrb_value *vp)
+ht_get(mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value *valp)
{
- segment *seg;
- mrb_int i;
+ ib_find_by_key(mrb, h, key, it, {
+ *valp = ib_it_entry(it)->val;
+ return TRUE;
+ });
+ return FALSE;
+}
- if (t == NULL) return FALSE;
- seg = t->rootseg;
- while (seg) {
- for (i=0; i<seg->size; i++) {
- mrb_value key2;
+static void
+ht_set_as_ar(mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value val)
+{
+ ht_to_ar(mrb, h);
+ ar_set(mrb, h, key, val);
+}
- if (!seg->next && i >= t->last_len) {
- /* not found */
- return FALSE;
- }
- key2 = seg->e[i].key;
- if (!mrb_undef_p(key2) && ht_hash_equal(mrb, t, key, key2)) {
- if (vp) *vp = seg->e[i].val;
- seg->e[i].key = mrb_undef_value();
- t->size--;
- return TRUE;
+static void
+ht_set_without_ib_adjustment(mrb_state *mrb, struct RHash *h,
+ mrb_value key, mrb_value val)
+{
+ mrb_assert(ht_size(h) < ib_bit_to_capa(ib_bit(h)));
+ ib_cycle_by_key(mrb, h, key, it, {
+ if (ib_it_active_p(it)) {
+ if (!obj_eql(mrb, key, ib_it_entry(it)->key, h)) continue;
+ ib_it_entry(it)->val = val;
+ }
+ else {
+ uint32_t ea_n_used = ht_ea_n_used(h);
+ if (ea_n_used == H_MAX_SIZE) {
+ mrb_assert(ht_size(h) == ea_n_used);
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "hash too big");
}
+ if (ea_n_used == ht_ea_capa(h)) ht_adjust_ea(mrb, h, ea_n_used, EA_MAX_CAPA);
+ ib_it_set(it, ea_n_used);
+ ea_set(ht_ea(h), ea_n_used, key, val);
+ ht_inc_size(h);
+ ht_set_ea_n_used(h, ++ea_n_used);
}
- seg = seg->next;
- }
- return FALSE;
+ return;
+ });
}
-/* Iterates over the hash table. */
static void
-ht_foreach(mrb_state *mrb, htable *t, mrb_hash_foreach_func *func, void *p)
-{
- segment *seg;
- mrb_int i;
-
- if (t == NULL) return;
- seg = t->rootseg;
- while (seg) {
- for (i=0; i<seg->size; i++) {
- /* no value in last segment after last_len */
- if (!seg->next && i >= t->last_len) {
- return;
- }
- if (mrb_undef_p(seg->e[i].key)) continue;
- if ((*func)(mrb, seg->e[i].key, seg->e[i].val, p) != 0)
- return;
+ht_set(mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value val)
+{
+ uint32_t size = ht_size(h);
+ uint32_t ib_bit_width = ib_bit(h), ib_capa = ib_bit_to_capa(ib_bit_width);
+ if (ib_upper_bound_for(ib_capa) <= size) {
+ if (size != ht_ea_n_used(h)) ea_compress(ht_ea(h), ht_ea_n_used(h));
+ ht_init(mrb, h, size, ht_ea(h), ht_ea_capa(h), h_ht(h), ++ib_bit_width);
+ }
+ else if (ht_ea_capa(h) == ht_ea_n_used(h) && size != ht_ea_n_used(h)) {
+ if (size <= AR_MAX_SIZE) {ht_set_as_ar(mrb, h, key, val); return;}
+ if (ea_next_capa_for(size, EA_MAX_CAPA) <= ht_ea_capa(h)) {
+ ea_compress(ht_ea(h), ht_ea_n_used(h));
+ ht_adjust_ea(mrb, h, size, ht_ea_capa(h));
+ ht_init(mrb, h, size, ht_ea(h), ht_ea_capa(h), h_ht(h), ib_bit_width);
}
- seg = seg->next;
}
+ ht_set_without_ib_adjustment(mrb, h, key, val);
}
-size_t
-mrb_os_memsize_of_hash_table(mrb_value obj)
+static mrb_bool
+ht_delete(mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value *valp)
{
- struct htable *h = mrb_hash_ptr(obj)->ht;
- size_t segkv_size = 0;
+ ib_find_by_key(mrb, h, key, it, {
+ hash_entry *entry = ib_it_entry(it);
+ *valp = entry->val;
+ ib_it_delete(it);
+ entry_delete(entry);
+ ht_dec_size(h);
+ return TRUE;
+ });
+ return FALSE;
+}
- if (h->index) segkv_size = (sizeof(struct segkv) * h->index->capa);
+static void
+ht_shift(mrb_state *mrb, struct RHash *h, mrb_value *keyp, mrb_value *valp)
+{
+ hash_entry *ea = ht_ea(h);
+ ea_each(ea, ht_size(h), entry, {
+ ib_cycle_by_key(mrb, h, entry->key, it, {
+ if (ib_it_get(it) != U32(entry - ea)) continue;
+ *keyp = entry->key;
+ *valp = entry->val;
+ ib_it_delete(it);
+ entry_delete(entry);
+ ht_dec_size(h);
+ return;
+ });
+ });
+}
- return sizeof(htable) +
- sizeof(segindex) +
- (sizeof(segment) * h->size) +
- segkv_size;
+static void
+ht_rehash(mrb_state *mrb, struct RHash *h)
+{
+ /* see comments in `h_rehash` */
+ uint32_t size = ht_size(h), w_size = 0, ea_capa = ht_ea_capa(h);
+ hash_entry *ea = ht_ea(h);
+ ht_init(mrb, h, 0, ea, ea_capa, h_ht(h), ib_bit_for(size));
+ ht_set_size(h, size);
+ ht_set_ea_n_used(h, ht_ea_n_used(h));
+ ea_each(ea, size, r_entry, {
+ ib_cycle_by_key(mrb, h, r_entry->key, it, {
+ if (ib_it_active_p(it)) {
+ if (!obj_eql(mrb, r_entry->key, ib_it_entry(it)->key, h)) continue;
+ ib_it_entry(it)->val = r_entry->val;
+ ht_set_size(h, --size);
+ entry_delete(r_entry);
+ }
+ else {
+ if (w_size != U32(r_entry - ea)) {
+ ea_set(ea, w_size, r_entry->key, r_entry->val);
+ entry_delete(r_entry);
+ }
+ ib_it_set(it, w_size++);
+ }
+ break;
+ });
+ });
+ mrb_assert(size == w_size);
+ ht_set_ea_n_used(h, size);
+ size <= AR_MAX_SIZE ? ht_to_ar(mrb, h) : ht_adjust_ea(mrb, h, size, ea_capa);
}
-/* Iterates over the hash table. */
-MRB_API void
-mrb_hash_foreach(mrb_state *mrb, struct RHash *hash, mrb_hash_foreach_func *func, void *p)
+static mrb_value
+h_key_for(mrb_state *mrb, mrb_value key)
{
- ht_foreach(mrb, hash->ht, func, p);
+ if (mrb_string_p(key) && !MRB_FROZEN_P(mrb_str_ptr(key))) {
+ key = mrb_str_dup(mrb, key);
+ MRB_SET_FROZEN_FLAG(mrb_str_ptr(key));
+ }
+ return key;
}
-/* Copy the hash table. */
-static htable*
-ht_copy(mrb_state *mrb, htable *t)
+static struct RHash*
+h_alloc(mrb_state *mrb)
{
- segment *seg;
- htable *t2;
- mrb_int i;
+ return (struct RHash*)mrb_obj_alloc(mrb, MRB_TT_HASH, mrb->hash_class);
+}
- seg = t->rootseg;
- t2 = ht_new(mrb);
- if (t->size == 0) return t2;
+static void
+h_init(struct RHash *h)
+{
+ ar_init(h, 0, NULL, 0, 0);
+}
- while (seg) {
- for (i=0; i<seg->size; i++) {
- mrb_value key = seg->e[i].key;
- mrb_value val = seg->e[i].val;
+static void
+h_free_table(mrb_state *mrb, struct RHash *h)
+{
+ (h_ar_p(h) ? ar_free : ht_free)(mrb, h);
+}
- if ((seg->next == NULL) && (i >= t->last_len)) {
- return t2;
- }
- if (mrb_undef_p(key)) continue; /* skip deleted key */
- ht_put(mrb, t2, key, val);
- }
- seg = seg->next;
- }
- return t2;
+static void
+h_clear(mrb_state *mrb, struct RHash *h)
+{
+ h_free_table(mrb, h);
+ h_init(h);
+}
+
+static mrb_bool
+h_get(mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value *valp)
+{
+ return (h_ar_p(h) ? ar_get : ht_get)(mrb, h, key, valp);
}
-/* Free memory of the hash table. */
static void
-ht_free(mrb_state *mrb, htable *t)
+h_set(mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value val)
{
- segment *seg;
+ (h_ar_p(h) ? ar_set : ht_set)(mrb, h, key, val);
+}
- if (!t) return;
- seg = t->rootseg;
- while (seg) {
- segment *p = seg;
- seg = seg->next;
- mrb_free(mrb, p);
- }
- if (t->index) mrb_free(mrb, t->index);
- mrb_free(mrb, t);
+static mrb_bool
+h_delete(mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value *valp)
+{
+ return (h_ar_p(h) ? ar_delete : ht_delete)(mrb, h, key, valp);
}
-static void hash_modify(mrb_state *mrb, mrb_value hash);
+/* find first element in the table, and remove it. */
+static void
+h_shift(mrb_state *mrb, struct RHash *h, mrb_value *keyp, mrb_value *valp)
+{
+ (h_ar_p(h) ? ar_shift : ht_shift)(mrb, h, keyp, valp);
+}
-static inline mrb_value
-ht_key(mrb_state *mrb, mrb_value key)
+static void
+h_rehash(mrb_state *mrb, struct RHash *h)
{
- if (mrb_string_p(key) && !mrb_frozen_p(mrb_str_ptr(key))) {
- key = mrb_str_dup(mrb, key);
- MRB_SET_FROZEN_FLAG(mrb_str_ptr(key));
+ /*
+ * ==== Comments common to `ar_rehash` and `ht_rehash`
+ *
+ * - Because reindex (such as elimination of duplicate keys) must be
+ * guaranteed, it is necessary to set one by one.
+ *
+ * - To prevent EA from breaking if an exception occurs in the middle,
+ * delete the slot before moving when moving the entry, and update size
+ * at any time when overwriting.
+ */
+ (h_size(h) == 0 ? h_clear : h_ar_p(h) ? ar_rehash : ht_rehash)(mrb, h);
+}
+
+static void
+h_replace(mrb_state *mrb, struct RHash *h, struct RHash *orig_h)
+{
+ uint32_t size = h_size(orig_h);
+ if (size == 0) {
+ h_clear(mrb, h);
+ }
+ else if (h_ar_p(orig_h)) {
+ uint32_t ea_capa = ar_ea_capa(orig_h);
+ hash_entry *ea = ea_dup(mrb, ar_ea(orig_h), ea_capa);
+ h_free_table(mrb, h);
+ ar_init(h, size, ea, ea_capa, ar_ea_n_used(orig_h));
+ }
+ else { /* HT */
+ uint32_t ea_capa = ht_ea_capa(orig_h);
+ hash_entry *ea = ea_dup(mrb, ht_ea(orig_h), ea_capa);
+ hash_table *ht = ht_dup(mrb, orig_h);
+ h_free_table(mrb, h);
+ h_ht_on(h);
+ h_set_ht(h, ht);
+ ht_set_size(h, size);
+ ht_set_ea(h, ea);
+#ifdef MRB_64BIT
+ ht_set_ea_capa(h, ea_capa);
+ ht_set_ea_n_used(h, ht_ea_n_used(orig_h));
+#endif
+ ib_set_bit(h, ib_bit(orig_h));
}
- return key;
}
-#define KEY(key) ht_key(mrb, key)
+void
+mrb_gc_mark_hash(mrb_state *mrb, struct RHash *h)
+{
+ h_each(h, entry, {
+ mrb_gc_mark_value(mrb, entry->key);
+ mrb_gc_mark_value(mrb, entry->val);
+ });
+}
-static int
-hash_mark_i(mrb_state *mrb, mrb_value key, mrb_value val, void *p)
+size_t
+mrb_gc_mark_hash_size(mrb_state *mrb, struct RHash *h)
{
- mrb_gc_mark_value(mrb, key);
- mrb_gc_mark_value(mrb, val);
- return 0;
+ return h_size(h) * 2;
}
void
-mrb_gc_mark_hash(mrb_state *mrb, struct RHash *hash)
+mrb_gc_free_hash(mrb_state *mrb, struct RHash *h)
{
- ht_foreach(mrb, hash->ht, hash_mark_i, NULL);
+ h_free_table(mrb, h);
}
size_t
-mrb_gc_mark_hash_size(mrb_state *mrb, struct RHash *hash)
+mrb_os_memsize_of_hash_table(mrb_value self)
{
- if (!hash->ht) return 0;
- return hash->ht->size*2;
+ struct RHash *h = mrb_hash_ptr(self);
+ return h_ar_p(h) ? (ar_ea_capa(h) * sizeof(hash_entry)) :
+ (ht_ea_capa(h) * sizeof(hash_entry) +
+ sizeof(hash_table) +
+ ib_byte_size_for(ib_bit(h)));
}
-void
-mrb_gc_free_hash(mrb_state *mrb, struct RHash *hash)
+/* Iterates over the key/value pairs. */
+MRB_API void
+mrb_hash_foreach(mrb_state *mrb, struct RHash *h, mrb_hash_foreach_func *func, void *data)
{
- ht_free(mrb, hash->ht);
+ h_each(h, entry, {
+ if (func(mrb, entry->key, entry->val, data) != 0) return;
+ });
}
MRB_API mrb_value
mrb_hash_new(mrb_state *mrb)
{
- struct RHash *h;
-
- h = (struct RHash*)mrb_obj_alloc(mrb, MRB_TT_HASH, mrb->hash_class);
- h->ht = 0;
- h->iv = 0;
+ struct RHash *h = h_alloc(mrb);
return mrb_obj_value(h);
}
+/*
+ * Set the capacity of EA and IB to minimum capacity (and appropriate load
+ * factor) that does not cause expansion when inserting `capa` elements.
+ */
MRB_API mrb_value
mrb_hash_new_capa(mrb_state *mrb, mrb_int capa)
{
- struct RHash *h;
-
- h = (struct RHash*)mrb_obj_alloc(mrb, MRB_TT_HASH, mrb->hash_class);
- /* preallocate hash table */
- h->ht = ht_new(mrb);
- /* capacity ignored */
- h->iv = 0;
- return mrb_obj_value(h);
+ if (capa < 0 || EA_MAX_CAPA < capa) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "hash too big");
+ return mrb_nil_value(); /* not reached */
+ }
+ else if (capa == 0) {
+ return mrb_hash_new(mrb);
+ }
+ else {
+ uint32_t size = U32(capa);
+ struct RHash *h = h_alloc(mrb);
+ hash_entry *ea = ea_resize(mrb, NULL, size);
+ if (size <= AR_MAX_SIZE) {
+ ar_init(h, 0, ea, size, 0);
+ }
+ else {
+ ht_init(mrb, h, 0, ea, size, NULL, ib_bit_for(size));
+ }
+ return mrb_obj_value(h);
+ }
}
static mrb_value mrb_hash_default(mrb_state *mrb, mrb_value hash);
-static mrb_value hash_default(mrb_state *mrb, mrb_value hash, mrb_value key);
-static mrb_value
-mrb_hash_init_copy(mrb_state *mrb, mrb_value self)
+static void
+hash_modify(mrb_state *mrb, mrb_value hash)
{
- mrb_value orig = mrb_get_arg1(mrb);
- struct RHash* copy;
- htable *orig_h;
- mrb_value ifnone, vret;
+ mrb_check_frozen(mrb, mrb_hash_ptr(hash));
+}
- if (mrb_obj_equal(mrb, self, orig)) return self;
- if ((mrb_type(self) != mrb_type(orig)) || (mrb_obj_class(mrb, self) != mrb_obj_class(mrb, orig))) {
- mrb_raise(mrb, E_TYPE_ERROR, "initialize_copy should take same class object");
+static mrb_value
+hash_default(mrb_state *mrb, mrb_value hash, mrb_value key)
+{
+ if (MRB_RHASH_DEFAULT_P(hash)) {
+ if (MRB_RHASH_PROCDEFAULT_P(hash)) {
+ return mrb_funcall_id(mrb, RHASH_PROCDEFAULT(hash), MRB_SYM(call), 2, hash, key);
+ }
+ else {
+ return RHASH_IFNONE(hash);
+ }
}
+ return mrb_nil_value();
+}
- orig_h = RHASH_TBL(self);
- copy = (struct RHash*)mrb_obj_alloc(mrb, MRB_TT_HASH, mrb->hash_class);
- copy->ht = ht_copy(mrb, orig_h);
-
- if (MRB_RHASH_DEFAULT_P(self)) {
- copy->flags |= MRB_HASH_DEFAULT;
- }
- if (MRB_RHASH_PROCDEFAULT_P(self)) {
- copy->flags |= MRB_HASH_PROC_DEFAULT;
+static void
+hash_replace(mrb_state *mrb, mrb_value self, mrb_value orig)
+{
+ struct RHash *h = mrb_hash_ptr(self), *orig_h = mrb_hash_ptr(orig);
+ uint32_t mask = MRB_HASH_DEFAULT | MRB_HASH_PROC_DEFAULT;
+ mrb_sym name;
+ h_replace(mrb, h, orig_h);
+ name = MRB_SYM(ifnone);
+ if (orig_h->flags & MRB_HASH_DEFAULT) {
+ mrb_iv_set(mrb, self, name, mrb_iv_get(mrb, orig, name));
}
- vret = mrb_obj_value(copy);
- ifnone = RHASH_IFNONE(self);
- if (!mrb_nil_p(ifnone)) {
- mrb_iv_set(mrb, vret, MRB_SYM(ifnone), ifnone);
+ else {
+ mrb_iv_remove(mrb, self, name);
}
- return vret;
+ h->flags &= ~mask;
+ h->flags |= orig_h->flags & mask;
}
-static int
-check_kdict_i(mrb_state *mrb, mrb_value key, mrb_value val, void *data)
+static mrb_value
+mrb_hash_init_copy(mrb_state *mrb, mrb_value self)
{
- if (!mrb_symbol_p(key)) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "keyword argument hash with non symbol keys");
- }
- return 0;
+ mrb_value orig;
+ mrb_get_args(mrb, "H", &orig);
+ hash_modify(mrb, self);
+ if (mrb_hash_ptr(self) != mrb_hash_ptr(orig)) hash_replace(mrb, self, orig);
+ return self;
}
void
mrb_hash_check_kdict(mrb_state *mrb, mrb_value self)
{
- htable *t;
-
- t = RHASH_TBL(self);
- if (!t || t->size == 0) return;
- ht_foreach(mrb, t, check_kdict_i, NULL);
+ h_each(mrb_hash_ptr(self), entry, {
+ if (mrb_symbol_p(entry->key)) continue;
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "keyword argument hash with non symbol keys");
+ });
}
MRB_API mrb_value
mrb_hash_dup(mrb_state *mrb, mrb_value self)
{
- struct RHash* copy;
- htable *orig_h;
-
- orig_h = RHASH_TBL(self);
- copy = (struct RHash*)mrb_obj_alloc(mrb, MRB_TT_HASH, mrb->hash_class);
- copy->ht = orig_h ? ht_copy(mrb, orig_h) : NULL;
- return mrb_obj_value(copy);
+ struct RHash* copy_h = h_alloc(mrb);
+ mrb_value copy = mrb_obj_value(copy_h);
+ copy_h->c = mrb_hash_ptr(self)->c;
+ hash_replace(mrb, copy, self);
+ return copy;
}
MRB_API mrb_value
@@ -711,7 +1159,7 @@ mrb_hash_get(mrb_state *mrb, mrb_value hash, mrb_value key)
mrb_value val;
mrb_sym mid;
- if (ht_get(mrb, RHASH_TBL(hash), key, &val)) {
+ if (h_get(mrb, mrb_hash_ptr(hash), key, &val)) {
return val;
}
@@ -728,7 +1176,7 @@ mrb_hash_fetch(mrb_state *mrb, mrb_value hash, mrb_value key, mrb_value def)
{
mrb_value val;
- if (ht_get(mrb, RHASH_TBL(hash), key, &val)) {
+ if (h_get(mrb, mrb_hash_ptr(hash), key, &val)) {
return val;
}
/* not found */
@@ -739,21 +1187,10 @@ MRB_API void
mrb_hash_set(mrb_state *mrb, mrb_value hash, mrb_value key, mrb_value val)
{
hash_modify(mrb, hash);
-
- key = KEY(key);
- ht_put(mrb, RHASH_TBL(hash), key, val);
- mrb_field_write_barrier_value(mrb, (struct RBasic*)RHASH(hash), key);
- mrb_field_write_barrier_value(mrb, (struct RBasic*)RHASH(hash), val);
- return;
-}
-
-static void
-hash_modify(mrb_state *mrb, mrb_value hash)
-{
- mrb_check_frozen(mrb, mrb_hash_ptr(hash));
- if (!RHASH_TBL(hash)) {
- RHASH_TBL(hash) = ht_new(mrb);
- }
+ key = h_key_for(mrb, key);
+ h_set(mrb, mrb_hash_ptr(hash), key, val);
+ mrb_field_write_barrier_value(mrb, mrb_basic_ptr(hash), key);
+ mrb_field_write_barrier_value(mrb, mrb_basic_ptr(hash), val);
}
/* 15.2.13.4.16 */
@@ -837,20 +1274,6 @@ mrb_hash_aget(mrb_state *mrb, mrb_value self)
return mrb_hash_get(mrb, self, key);
}
-static mrb_value
-hash_default(mrb_state *mrb, mrb_value hash, mrb_value key)
-{
- if (MRB_RHASH_DEFAULT_P(hash)) {
- if (MRB_RHASH_PROCDEFAULT_P(hash)) {
- return mrb_funcall_id(mrb, RHASH_PROCDEFAULT(hash), MRB_SYM(call), 2, hash, key);
- }
- else {
- return RHASH_IFNONE(hash);
- }
- }
- return mrb_nil_value();
-}
-
/* 15.2.13.4.5 */
/*
* call-seq:
@@ -945,7 +1368,6 @@ mrb_hash_set_default(mrb_state *mrb, mrb_value hash)
* a #=> [nil, nil, 4]
*/
-
static mrb_value
mrb_hash_default_proc(mrb_state *mrb, mrb_value hash)
{
@@ -990,10 +1412,10 @@ mrb_hash_set_default_proc(mrb_state *mrb, mrb_value hash)
MRB_API mrb_value
mrb_hash_delete_key(mrb_state *mrb, mrb_value hash, mrb_value key)
{
- htable *t = RHASH_TBL(hash);
mrb_value del_val;
- if (ht_del(mrb, t, key, &del_val)) {
+ hash_modify(mrb, hash);
+ if (h_delete(mrb, mrb_hash_ptr(hash), key, &del_val)) {
return del_val;
}
@@ -1005,38 +1427,9 @@ static mrb_value
mrb_hash_delete(mrb_state *mrb, mrb_value self)
{
mrb_value key = mrb_get_arg1(mrb);
-
- hash_modify(mrb, self);
return mrb_hash_delete_key(mrb, self, key);
}
-/* find first element in the hash table, and remove it. */
-static void
-ht_shift(mrb_state *mrb, htable *t, mrb_value *kp, mrb_value *vp)
-{
- segment *seg = t->rootseg;
- mrb_int i;
-
- while (seg) {
- for (i=0; i<seg->size; i++) {
- mrb_value key;
-
- if (!seg->next && i >= t->last_len) {
- return;
- }
- key = seg->e[i].key;
- if (mrb_undef_p(key)) continue;
- *kp = key;
- *vp = seg->e[i].val;
- /* delete element */
- seg->e[i].key = mrb_undef_value();
- t->size--;
- return;
- }
- seg = seg->next;
- }
-}
-
/* 15.2.13.4.24 */
/*
* call-seq:
@@ -1054,28 +1447,19 @@ ht_shift(mrb_state *mrb, htable *t, mrb_value *kp, mrb_value *vp)
static mrb_value
mrb_hash_shift(mrb_state *mrb, mrb_value hash)
{
- htable *t = RHASH_TBL(hash);
+ struct RHash *h = mrb_hash_ptr(hash);
hash_modify(mrb, hash);
- if (t && t->size > 0) {
- mrb_value del_key = mrb_nil_value();
- mrb_value del_val = mrb_nil_value();
-
- ht_shift(mrb, t, &del_key, &del_val);
+ if (h_size(h) == 0) {
+ return hash_default(mrb, hash, mrb_nil_value());
+ }
+ else {
+ mrb_value del_key, del_val;
+ h_shift(mrb, h, &del_key, &del_val);
mrb_gc_protect(mrb, del_key);
mrb_gc_protect(mrb, del_val);
return mrb_assoc_new(mrb, del_key, del_val);
}
-
- if (MRB_RHASH_DEFAULT_P(hash)) {
- if (MRB_RHASH_PROCDEFAULT_P(hash)) {
- return mrb_funcall_id(mrb, RHASH_PROCDEFAULT(hash), MRB_SYM(call), 2, hash, mrb_nil_value());
- }
- else {
- return RHASH_IFNONE(hash);
- }
- }
- return mrb_nil_value();
}
/* 15.2.13.4.4 */
@@ -1093,13 +1477,8 @@ mrb_hash_shift(mrb_state *mrb, mrb_value hash)
MRB_API mrb_value
mrb_hash_clear(mrb_state *mrb, mrb_value hash)
{
- htable *t = RHASH_TBL(hash);
-
hash_modify(mrb, hash);
- if (t) {
- ht_free(mrb, t);
- RHASH_TBL(hash) = NULL;
- }
+ h_clear(mrb, mrb_hash_ptr(hash));
return hash;
}
@@ -1135,10 +1514,7 @@ mrb_hash_aset(mrb_state *mrb, mrb_value self)
MRB_API mrb_int
mrb_hash_size(mrb_state *mrb, mrb_value hash)
{
- htable *t = RHASH_TBL(hash);
-
- if (!t) return 0;
- return t->size;
+ return (mrb_int)h_size(mrb_hash_ptr(hash));
}
/* 15.2.13.4.20 */
@@ -1165,10 +1541,7 @@ mrb_hash_size_m(mrb_state *mrb, mrb_value self)
MRB_API mrb_bool
mrb_hash_empty_p(mrb_state *mrb, mrb_value self)
{
- htable *t = RHASH_TBL(self);
-
- if (!t) return TRUE;
- return t->size == 0;
+ return h_size(mrb_hash_ptr(self)) == 0;
}
/* 15.2.13.4.12 */
@@ -1187,13 +1560,6 @@ mrb_hash_empty_m(mrb_state *mrb, mrb_value self)
return mrb_bool_value(mrb_hash_empty_p(mrb, self));
}
-static int
-hash_keys_i(mrb_state *mrb, mrb_value key, mrb_value val, void *p)
-{
- mrb_ary_push(mrb, *(mrb_value*)p, key);
- return 0;
-}
-
/* 15.2.13.4.19 */
/*
* call-seq:
@@ -1210,24 +1576,14 @@ hash_keys_i(mrb_state *mrb, mrb_value key, mrb_value val, void *p)
MRB_API mrb_value
mrb_hash_keys(mrb_state *mrb, mrb_value hash)
{
- htable *t = RHASH_TBL(hash);
- mrb_int size;
- mrb_value ary;
-
- if (!t || (size = t->size) == 0)
- return mrb_ary_new(mrb);
- ary = mrb_ary_new_capa(mrb, size);
- ht_foreach(mrb, t, hash_keys_i, (void*)&ary);
+ struct RHash *h = mrb_hash_ptr(hash);
+ mrb_value ary = mrb_ary_new_capa(mrb, (mrb_int)h_size(h));
+ h_each(h, entry, {
+ mrb_ary_push(mrb, ary, entry->key);
+ });
return ary;
}
-static int
-hash_vals_i(mrb_state *mrb, mrb_value key, mrb_value val, void *p)
-{
- mrb_ary_push(mrb, *(mrb_value*)p, val);
- return 0;
-}
-
/* 15.2.13.4.28 */
/*
* call-seq:
@@ -1244,14 +1600,11 @@ hash_vals_i(mrb_state *mrb, mrb_value key, mrb_value val, void *p)
MRB_API mrb_value
mrb_hash_values(mrb_state *mrb, mrb_value hash)
{
- htable *t = RHASH_TBL(hash);
- mrb_int size;
- mrb_value ary;
-
- if (!t || (size = t->size) == 0)
- return mrb_ary_new(mrb);
- ary = mrb_ary_new_capa(mrb, size);
- ht_foreach(mrb, t, hash_vals_i, (void*)&ary);
+ struct RHash *h = mrb_hash_ptr(hash);
+ mrb_value ary = mrb_ary_new_capa(mrb, (mrb_int)h_size(h));
+ h_each(h, entry, {
+ mrb_ary_push(mrb, ary, entry->val);
+ });
return ary;
}
@@ -1277,13 +1630,8 @@ mrb_hash_values(mrb_state *mrb, mrb_value hash)
MRB_API mrb_bool
mrb_hash_key_p(mrb_state *mrb, mrb_value hash, mrb_value key)
{
- htable *t;
-
- t = RHASH_TBL(hash);
- if (ht_get(mrb, t, key, NULL)) {
- return TRUE;
- }
- return FALSE;
+ mrb_value val;
+ return h_get(mrb, mrb_hash_ptr(hash), key, &val);
}
static mrb_value
@@ -1296,23 +1644,6 @@ mrb_hash_has_key(mrb_state *mrb, mrb_value hash)
return mrb_bool_value(key_p);
}
-struct has_v_arg {
- mrb_bool found;
- mrb_value val;
-};
-
-static int
-hash_has_value_i(mrb_state *mrb, mrb_value key, mrb_value val, void *p)
-{
- struct has_v_arg *arg = (struct has_v_arg*)p;
-
- if (mrb_equal(mrb, arg->val, val)) {
- arg->found = TRUE;
- return 1;
- }
- return 0;
-}
-
/* 15.2.13.4.14 */
/* 15.2.13.4.27 */
/*
@@ -1332,41 +1663,32 @@ static mrb_value
mrb_hash_has_value(mrb_state *mrb, mrb_value hash)
{
mrb_value val = mrb_get_arg1(mrb);
- struct has_v_arg arg;
-
- arg.found = FALSE;
- arg.val = val;
- ht_foreach(mrb, RHASH_TBL(hash), hash_has_value_i, &arg);
- return mrb_bool_value(arg.found);
-}
-
-static int
-merge_i(mrb_state *mrb, mrb_value key, mrb_value val, void *data)
-{
- htable *h1 = (htable*)data;
-
- ht_put(mrb, h1, key, val);
- return 0;
+ struct RHash *h = mrb_hash_ptr(hash);
+ h_each(h, entry, {
+ h_check_modified(mrb, h, {
+ if (mrb_equal(mrb, val, entry->val)) return mrb_true_value();
+ });
+ });
+ return mrb_false_value();
}
MRB_API void
mrb_hash_merge(mrb_state *mrb, mrb_value hash1, mrb_value hash2)
{
- htable *h1, *h2;
+ struct RHash *h1, *h2;
hash_modify(mrb, hash1);
- hash2 = mrb_ensure_hash_type(mrb, hash2);
- h1 = RHASH_TBL(hash1);
- h2 = RHASH_TBL(hash2);
-
- if (!h2) return;
- if (!h1) {
- RHASH_TBL(hash1) = ht_copy(mrb, h2);
- return;
- }
- ht_foreach(mrb, h2, merge_i, h1);
- mrb_write_barrier(mrb, (struct RBasic*)RHASH(hash1));
- return;
+ mrb_ensure_hash_type(mrb, hash2);
+ h1 = mrb_hash_ptr(hash1);
+ h2 = mrb_hash_ptr(hash2);
+
+ if (h1 == h2) return;
+ if (h_size(h2) == 0) return;
+ h_each(h2, entry, {
+ h_check_modified(mrb, h2, {h_set(mrb, h1, entry->key, entry->val);});
+ mrb_field_write_barrier_value(mrb, (struct RBasic *)h1, entry->key);
+ mrb_field_write_barrier_value(mrb, (struct RBasic *)h1, entry->val);
+ });
}
/*
@@ -1381,14 +1703,10 @@ mrb_hash_merge(mrb_state *mrb, mrb_value hash1, mrb_value hash2)
* k = keys[0]
* h = {}
* keys.each{|key| h[key] = key[0]}
- * h #=> { [1]=> 1, [2]=> 2, [3]=> 3, [4]=> 4, [5]=> 5, [6]=> 6, [7]=> 7,
- * [8]=> 8, [9]=> 9,[10]=>10,[11]=>11,[12]=>12,[13]=>13,[14]=>14,
- * [15]=>15,[16]=>16,[17]=>17}
+ * h #=> { [1]=>1, [2]=>2, ... [16]=>16, [17]=>17}
* h[k] #=> 1
* k[0] = keys.size + 1
- * h #=> {[18]=> 1, [2]=> 2, [3]=> 3, [4]=> 4, [5]=> 5, [6]=> 6, [7]=> 7,
- * [8]=> 8, [9]=> 9,[10]=>10,[11]=>11,[12]=>12,[13]=>13,[14]=>14,
- * [15]=>15,[16]=>16,[17]=>17}
+ * h #=> {[18]=>1, [2]=>2, ... [16]=>16, [17]=>17}
* h[k] #=> nil
* h.rehash
* h[k] #=> 1
@@ -1396,7 +1714,7 @@ mrb_hash_merge(mrb_state *mrb, mrb_value hash1, mrb_value hash2)
static mrb_value
mrb_hash_rehash(mrb_state *mrb, mrb_value self)
{
- ht_compact(mrb, RHASH_TBL(self));
+ h_rehash(mrb, mrb_hash_ptr(self));
return self;
}
@@ -1408,11 +1726,10 @@ mrb_init_hash(mrb_state *mrb)
mrb->hash_class = h = mrb_define_class(mrb, "Hash", mrb->object_class); /* 15.2.13 */
MRB_SET_INSTANCE_TT(h, MRB_TT_HASH);
- mrb_define_method(mrb, h, "initialize_copy", mrb_hash_init_copy, MRB_ARGS_REQ(1));
mrb_define_method(mrb, h, "[]", mrb_hash_aget, MRB_ARGS_REQ(1)); /* 15.2.13.4.2 */
mrb_define_method(mrb, h, "[]=", mrb_hash_aset, MRB_ARGS_REQ(2)); /* 15.2.13.4.3 */
mrb_define_method(mrb, h, "clear", mrb_hash_clear, MRB_ARGS_NONE()); /* 15.2.13.4.4 */
- mrb_define_method(mrb, h, "default", mrb_hash_default, MRB_ARGS_OPT(1)); /* 15.2.13.4.5 */
+ mrb_define_method(mrb, h, "default", mrb_hash_default, MRB_ARGS_OPT(1)); /* 15.2.13.4.5 */
mrb_define_method(mrb, h, "default=", mrb_hash_set_default, MRB_ARGS_REQ(1)); /* 15.2.13.4.6 */
mrb_define_method(mrb, h, "default_proc", mrb_hash_default_proc,MRB_ARGS_NONE()); /* 15.2.13.4.7 */
mrb_define_method(mrb, h, "default_proc=", mrb_hash_set_default_proc,MRB_ARGS_REQ(1)); /* 15.2.13.4.7 */
@@ -1422,10 +1739,12 @@ mrb_init_hash(mrb_state *mrb)
mrb_define_method(mrb, h, "has_value?", mrb_hash_has_value, MRB_ARGS_REQ(1)); /* 15.2.13.4.14 */
mrb_define_method(mrb, h, "include?", mrb_hash_has_key, MRB_ARGS_REQ(1)); /* 15.2.13.4.15 */
mrb_define_method(mrb, h, "initialize", mrb_hash_init, MRB_ARGS_OPT(1)|MRB_ARGS_BLOCK()); /* 15.2.13.4.16 */
+ mrb_define_method(mrb, h, "initialize_copy", mrb_hash_init_copy, MRB_ARGS_REQ(1)); /* 15.2.13.4.17 */
mrb_define_method(mrb, h, "key?", mrb_hash_has_key, MRB_ARGS_REQ(1)); /* 15.2.13.4.18 */
mrb_define_method(mrb, h, "keys", mrb_hash_keys, MRB_ARGS_NONE()); /* 15.2.13.4.19 */
mrb_define_method(mrb, h, "length", mrb_hash_size_m, MRB_ARGS_NONE()); /* 15.2.13.4.20 */
mrb_define_method(mrb, h, "member?", mrb_hash_has_key, MRB_ARGS_REQ(1)); /* 15.2.13.4.21 */
+ mrb_define_method(mrb, h, "replace", mrb_hash_init_copy, MRB_ARGS_REQ(1)); /* 15.2.13.4.23 */
mrb_define_method(mrb, h, "shift", mrb_hash_shift, MRB_ARGS_NONE()); /* 15.2.13.4.24 */
mrb_define_method(mrb, h, "size", mrb_hash_size_m, MRB_ARGS_NONE()); /* 15.2.13.4.25 */
mrb_define_method(mrb, h, "store", mrb_hash_aset, MRB_ARGS_REQ(2)); /* 15.2.13.4.26 */