From f2d8db39be487fcd710c5c5844ae47d3a6920e70 Mon Sep 17 00:00:00 2001 From: KOBAYASHI Shuji Date: Tue, 10 Nov 2020 14:02:55 +0900 Subject: Reduce memory usage of Hash object ## Implementation Summary * Change entry list from segmented list to flat array. * Change value of hash bucket from pointer to entry to index of entry list, and represent it by variable length bits according to capacity of hash buckets. * Store management information about entry list and hash table to `struct RHash` as much as possible. ## Benchmark Summary Only the results of typical situations on 64-bit Word-boxing are present here. For more detailed information, including consideration, see below (although most of the body is written in Japanese). * https://shuujii.github.io/mruby-hash-benchmark ### Memory Usage Lower value is better. | Hash Size | Baseline | New | Factor | |----------:|--------------:|--------------:|-----------:| | 16 | 344B | 256B | 0.74419x | | 40 | 1,464B | 840B | 0.57377x | | 200 | 8,056B | 3,784B | 0.46971x | | 500 | 17,169B | 9,944B | 0.57949x | ### Performance Higher value is better. #### `mrb_hash_set` | Hash Size | Baseline | New | Factor | |----------:|--------------:|--------------:|-----------:| | 16 | 1.41847M i/s | 1.36004M i/s | 0.95881x | | 40 | 0.39224M i/s | 0.31888M i/s | 0.81296x | | 200 | 0.03780M i/s | 0.04290M i/s | 1.13494x | | 500 | 0.01225M i/s | 0.01314M i/s | 1.07275x | #### `mrb_hash_get` | Hash Size | Baseline | New | Factor | |----------:|--------------:|--------------:|-----------:| | 16 | 26.05920M i/s | 30.19543M i/s | 1.15872x | | 40 | 44.26420M i/s | 32.75781M i/s | 0.74005x | | 200 | 44.55171M i/s | 31.56926M i/s | 0.70860x | | 500 | 39.19250M i/s | 29.73806M i/s | 0.75877x | #### `mrb_hash_each` | Hash Size | Baseline | New | Factor | |----------:|--------------:|--------------:|-----------:| | 16 | 25.11964M i/s | 30.34167M i/s | 1.20789x | | 40 | 11.74253M i/s | 13.25539M i/s | 1.12884x | | 200 | 2.01133M i/s | 2.97214M i/s | 1.47770x | | 500 | 0.87411M i/s | 1.21178M i/s | 1.38631x | #### `Hash#[]=` | Hash Size | Baseline | New | Factor | |----------:|--------------:|--------------:|-----------:| | 16 | 0.50095M i/s | 0.56490M i/s | 1.12764x | | 40 | 0.19132M i/s | 0.18392M i/s | 0.96129x | | 200 | 0.03624M i/s | 0.03256M i/s | 0.89860x | | 500 | 0.01527M i/s | 0.01236M i/s | 0.80935x | #### `Hash#[]` | Hash Size | Baseline | New | Factor | |----------:|--------------:|--------------:|-----------:| | 16 | 11.53211M i/s | 12.78806M i/s | 1.10891x | | 40 | 15.26920M i/s | 13.37529M i/s | 0.87596x | | 200 | 15.28550M i/s | 13.36410M i/s | 0.87430x | | 500 | 14.57695M i/s | 12.75388M i/s | 0.87494x | #### `Hash#each` | Hash Size | Baseline | New | Factor | |----------:|--------------:|--------------:|-----------:| | 16 | 0.30462M i/s | 0.27080M i/s | 0.88898x | | 40 | 0.12912M i/s | 0.11704M i/s | 0.90642x | | 200 | 0.02638M i/s | 0.02402M i/s | 0.91071x | | 500 | 0.01066M i/s | 0.00959M i/s | 0.89953x | #### `Hash#delete` | Hash Size | Baseline | New | Factor | |----------:|--------------:|--------------:|-----------:| | 16 | 7.84167M i/s | 6.96419M i/s | 0.88810x | | 40 | 6.91292M i/s | 7.41427M i/s | 1.07252x | | 200 | 3.75952M i/s | 7.32080M i/s | 1.94727x | | 500 | 2.10754M i/s | 7.05963M i/s | 3.34970x | #### `Hash#shift` | Hash Size | Baseline | New | Factor | |----------:|--------------:|--------------:|-----------:| | 16 | 14.66444M i/s | 13.18876M i/s | 0.89937x | | 40 | 11.95124M i/s | 11.10420M i/s | 0.92913x | | 200 | 5.53681M i/s | 7.88155M i/s | 1.42348x | | 500 | 2.96728M i/s | 5.40405M i/s | 1.82121x | #### `Hash#dup` | Hash Size | Baseline | New | Factor | |----------:|--------------:|--------------:|-----------:| | 16 | 0.15063M i/s | 5.37889M i/s | 35.71024x | | 40 | 0.06515M i/s | 3.38196M i/s | 51.91279x | | 200 | 0.01359M i/s | 1.46538M i/s | 107.84056x | | 500 | 0.00559M i/s | 0.75411M i/s | 134.88057x | ### Binary Size Lower value is better. | File | Baseline | New | Factor | |:-----------|--------------:|--------------:|----------:| | mruby | 730,408B | 734,176B | 1.00519x | | libmruby.a | 1,068,134B | 1,072,846B | 1.00441x | ## Other Fixes The following issues have also been fixed in the parts where there was some change this time. * [Heap use-after-free in `Hash#value?`](https://gist.github.com/shuujii/30e4fcd5844a4112a0ecd4a5b3483101#file-heap-use-after-free-in-hash-value-md) * [Heap use-after-free in `ht_hash_equal`](https://gist.github.com/shuujii/30e4fcd5844a4112a0ecd4a5b3483101#file-heap-use-after-free-in-ht_hash_equal-md) * [Heap use-after-free in `ht_hash_func`](https://gist.github.com/shuujii/30e4fcd5844a4112a0ecd4a5b3483101#file-heap-use-after-free-in-ht_hash_func-md) * [Heap use-after-free in `mrb_hash_merge`](https://gist.github.com/shuujii/30e4fcd5844a4112a0ecd4a5b3483101#file-heap-use-after-free-in-mrb_hash_merge-md) * [Self-replacement does not work for `Hash#replace`](https://gist.github.com/shuujii/30e4fcd5844a4112a0ecd4a5b3483101#file-self-replacement-does-not-work-for-hash-replace-md) * [Repeated deletes and inserts increase memory usage of `Hash`](https://gist.github.com/shuujii/30e4fcd5844a4112a0ecd4a5b3483101#file-repeated-deletes-and-inserts-increase-memory-usage-of-hash-md) * [`Hash#rehash` does not reindex completely](https://gist.github.com/shuujii/30e4fcd5844a4112a0ecd4a5b3483101#file-hash-rehash-does-not-reindex-completely-md) * `mrb_hash_delete_key` does not cause an error for frozen object * `mrb_hash_new_capa` does not allocate required space first * [`mrb_os_memsize_of_hash_table` result is incorrect](https://github.com/mruby/mruby/pull/5032#discussion_r457994075) --- src/hash.c | 1695 ++++++++++++++++++++++++++++++++++++------------------------ 1 file changed, 1007 insertions(+), 688 deletions(-) (limited to 'src') diff --git a/src/hash.c b/src/hash.c index d67fa6254..f797f8823 100644 --- a/src/hash.c +++ b/src/hash.c @@ -4,6 +4,7 @@ ** See Copyright Notice in mruby.h */ +#include #include #include #include @@ -11,56 +12,285 @@ #include #include -#ifndef MRB_NO_FLOAT -/* a function to get hash value of a float number */ -mrb_int mrb_float_id(mrb_float f); +/* + * === Glossary + * + * [EA] + * Entry Array. Store `Hash' entries in insertion order. + * + * [AR] + * Array Table Implementation. The structure of `Hash` that doesn't have a + * hash table and linearly searches EA. It is used when `Hash` size <= 16. + * + * [IB] + * Index Buckets. The buckets of hash table, where the bucket value is EA + * index. The index is represented by variable length bits according to + * the capacity. + * + * [HT] + * Hash Table Implementation. The structure of `Hash` that has IB and is + * searched by hash table algorithm. It is used when `Hash` > 16. Collision + * resolution strategy is open addressing method. + * + * [size] + * The number of `Hash` entries (value of `Hash#size`). + * + * [slot] + * The generic term for EA or IB elements. + * + * [active] + * The state in which a slot is recognized as a `Hash` entry. + * + * [deleted] + * The state in which a slot is marked as deleted. + * + * [used] + * The state in which a slot is active or deleted. + * + * [empty] + * The state in which a slot is not used. Capacity is equal to the sum of + * the number of used slots and the number of empty slots. + */ + +#define EA_INCREASE_RATIO 6 / 5 + 6 +#define EA_MAX_INCREASE UINT16_MAX +#define EA_MAX_CAPA /* `- 2` means reserved indices (empty and deleted) */ \ + U32(lesser(IB_MAX_CAPA - 2, MRB_INT_MAX)) +#define IB_MAX_CAPA (U32(1) << IB_MAX_BIT) +#define IB_TYPE_BIT 32 +#define IB_INIT_BIT ( \ + ib_upper_bound_for(32) <= AR_MAX_SIZE ? 6 : \ + ib_upper_bound_for(16) <= AR_MAX_SIZE ? 5 : \ + 4 \ +) +#define IB_MAX_BIT (IB_TYPE_BIT - 1) +#define AR_DEFAULT_CAPA 4 +#define AR_MAX_SIZE 16 +#define H_MAX_SIZE EA_MAX_CAPA + +mrb_static_assert1(offsetof(struct RHash, iv) == offsetof(struct RObject, iv)); +mrb_static_assert1(AR_MAX_SIZE < (1 << MRB_HASH_AR_EA_CAPA_BIT)); + +typedef struct hash_entry { + mrb_value key; + mrb_value val; +} hash_entry; + +typedef struct hash_table { + hash_entry *ea; +#ifdef MRB_32BIT + uint32_t ea_capa; + uint32_t ea_n_used; #endif + uint32_t ib[]; +} hash_table; + +typedef struct index_buckets_iter { + struct RHash *h; + uint32_t bit; + uint32_t mask; + uint32_t pos; + uint32_t ary_index; + uint32_t ea_index; + uint32_t shift1; + uint32_t shift2; + uint32_t step; +} index_buckets_iter; + +/* + * `c_` :: receiver class (category) + * `n_` :: attribute name + * `t_` :: attribute type + * `p_` :: struct member path + * `k_` :: macro key + */ +#define DEFINE_GETTER(c_, n_, t_, p_) \ + MRB_INLINE t_ c_##_##n_(const struct RHash *h) {return h->p_;} +#define DEFINE_SETTER(c_, n_, t_, p_) \ + MRB_INLINE void c_##_set_##n_(struct RHash *h, t_ v) {h->p_ = v;} +#define DEFINE_ACCESSOR(c_, n_, t_, p_) \ + DEFINE_GETTER(c_, n_, t_, p_) \ + DEFINE_SETTER(c_, n_, t_, p_) +#define DEFINE_FLAG_GETTER(c_, n_, t_, k_) \ + MRB_INLINE t_ c_##_##n_(const struct RHash *h) { \ + return (t_)((h->flags & MRB_HASH_##k_##_MASK) >> MRB_HASH_##k_##_SHIFT); \ + } +#define DEFINE_FLAG_SETTER(c_, n_, t_, k_) \ + MRB_INLINE void c_##_set_##n_(struct RHash *h, t_ v) { \ + h->flags &= ~MRB_HASH_##k_##_MASK; \ + h->flags |= v << MRB_HASH_##k_##_SHIFT; \ + } +#define DEFINE_FLAG_ACCESSOR(c_, n_, t_, k_) \ + DEFINE_FLAG_GETTER(c_, n_, t_, k_) \ + DEFINE_FLAG_SETTER(c_, n_, t_, k_) +#define DEFINE_INCREMENTER(c_, n_) \ + MRB_INLINE void c_##_inc_##n_(struct RHash *h) { \ + c_##_set_##n_(h, c_##_##n_(h) + 1); \ + } +#define DEFINE_DECREMENTER(c_, n_) \ + MRB_INLINE void c_##_dec_##n_(struct RHash *h) { \ + c_##_set_##n_(h, c_##_##n_(h) - 1); \ + } +#define DEFINE_SWITCHER(n_, k_) \ + MRB_INLINE void h_##n_##_on(struct RHash *h) { \ + h->flags |= MRB_HASH_##k_; \ + } \ + MRB_INLINE void h_##n_##_off(struct RHash *h) { \ + h->flags &= ~MRB_HASH_##k_; \ + } \ + MRB_INLINE mrb_bool h_##n_##_p(const struct RHash *h) { \ + return (h->flags & MRB_HASH_##k_) == MRB_HASH_##k_; \ + } -#ifndef MRB_HT_INIT_SIZE -#define MRB_HT_INIT_SIZE 4 +#ifdef MRB_64BIT +DEFINE_ACCESSOR(ar, ea_capa, uint32_t, ea_capa) +DEFINE_ACCESSOR(ar, ea_n_used, uint32_t, ea_n_used) +DEFINE_ACCESSOR(ht, ea_capa, uint32_t, ea_capa) +DEFINE_ACCESSOR(ht, ea_n_used, uint32_t, ea_n_used) +#else +DEFINE_FLAG_ACCESSOR(ar, ea_capa, uint32_t, AR_EA_CAPA) +DEFINE_FLAG_ACCESSOR(ar, ea_n_used, uint32_t, AR_EA_N_USED) +DEFINE_ACCESSOR(ht, ea_capa, uint32_t, ht->ea_capa) +DEFINE_ACCESSOR(ht, ea_n_used, uint32_t, ht->ea_n_used) #endif -#define HT_SEG_INCREASE_RATIO 6 / 5 +DEFINE_FLAG_ACCESSOR(ib, bit, uint32_t, IB_BIT) +DEFINE_ACCESSOR(ar, size, uint32_t, size) +DEFINE_ACCESSOR(ar, ea, hash_entry*, ea) +DEFINE_DECREMENTER(ar, size) +DEFINE_ACCESSOR(ht, size, uint32_t, size) +DEFINE_ACCESSOR(ht, ea, hash_entry*, ht->ea) +DEFINE_GETTER(ht, ib, uint32_t*, ht->ib) +DEFINE_INCREMENTER(ht, size) +DEFINE_DECREMENTER(ht, size) +DEFINE_GETTER(h, size, uint32_t, size) +DEFINE_ACCESSOR(h, ht, hash_table*, ht) +DEFINE_SWITCHER(ht, HT) + +#define ea_each_used(ea, n_used, entry_var, code) do { \ + hash_entry *entry_var = ea, *ea_end__ = entry_var + (n_used); \ + for (; entry_var < ea_end__; ++entry_var) { \ + code; \ + } \ +} while (0) -struct segkv { - mrb_value key; - mrb_value val; -}; - -typedef struct segment { - uint16_t size; - struct segment *next; - struct segkv e[]; -} segment; - -typedef struct segindex { - size_t size; - size_t capa; - struct segkv *table[]; -} segindex; - -/* hash table structure */ -typedef struct htable { - segment *rootseg; - segment *lastseg; - mrb_int size; - uint16_t last_len; - segindex *index; -} htable; - -static /* inline */ size_t -ht_hash_func(mrb_state *mrb, htable *t, mrb_value key) +#define ea_each(ea, size, entry_var, code) do { \ + hash_entry *entry_var = ea; \ + uint32_t size__ = size; \ + for (; 0 < size__; ++entry_var) { \ + if (entry_deleted_p(entry_var)) continue; \ + --size__; \ + code; \ + } \ +} while (0) + +#define ib_cycle_by_key(mrb, h, key, it_var, code) do { \ + index_buckets_iter it_var[1]; \ + ib_it_init(mrb, it_var, h, key); \ + for (;;) { \ + ib_it_next(it_var); \ + code; \ + } \ +} while (0) + +#define ib_find_by_key(mrb, h_, key_, it_var, code) do { \ + mrb_value ib_fbk_key__ = key_; \ + ib_cycle_by_key(mrb, h_, ib_fbk_key__, it_var, { \ + if (ib_it_empty_p(it_var)) break; \ + if (ib_it_deleted_p(it_var)) continue; \ + if (obj_eql(mrb, ib_fbk_key__, ib_it_entry(it_var)->key, it_var->h)) { \ + code; \ + break; \ + } \ + }); \ +} while (0) + +#define h_each(h, entry_var, code) do { \ + struct RHash *h__ = h; \ + hash_entry *h_e_ea__; \ + uint32_t h_e_size__; \ + h_ar_p(h) ? (h_e_ea__ = ar_ea(h__), h_e_size__ = ar_size(h__)) : \ + (h_e_ea__ = ht_ea(h__), h_e_size__ = ht_size(h__)); \ + ea_each(h_e_ea__, h_e_size__, entry_var, code); \ +} while (0) + +/* + * `h_check_modified` raises an exception when a dangerous modification is + * made to `h` by executing `code`. + * + * This macro is not called if `h->ht` (`h->ea`) is `NULL` (`Hash` size is + * zero). And because the `hash_entry` is rather large, `h->ht->ea` and + * `h->ht->ea_capa` are able to be safely accessed even in AR. This nature + * is used to eliminate branch of AR or HT. + * + * `HT_ASSERT_SAFE_READ` checks if members can be accessed according to its + * assumptions. + */ +#define HT_ASSERT_SAFE_READ(attr_name) \ + mrb_static_assert1( \ + offsetof(hash_table, attr_name) + sizeof(((hash_table*)0)->attr_name) <= \ + sizeof(hash_entry)) +HT_ASSERT_SAFE_READ(ea); +#ifdef MRB_32BIT +HT_ASSERT_SAFE_READ(ea_capa); +#endif +#undef HT_ASSERT_SAFE_READ +#define h_check_modified(mrb, h, code) do { \ + struct RHash *h__ = h; \ + uint32_t mask = MRB_HASH_HT|MRB_HASH_IB_BIT_MASK|MRB_HASH_AR_EA_CAPA_MASK; \ + uint32_t flags = h__->flags & mask; \ + void* tbl__ = (mrb_assert(h__->ht), h__->ht); \ + uint32_t ht_ea_capa__ = ht_ea_capa(h__); \ + hash_entry *ht_ea__ = ht_ea(h__); \ + code; \ + if (flags != (h__->flags & mask) || \ + tbl__ != h__->ht || \ + ht_ea_capa__ != ht_ea_capa(h__) || \ + ht_ea__ != ht_ea(h__)) { \ + mrb_raise(mrb, E_RUNTIME_ERROR, "hash modified"); \ + } \ +} while (0) + +#define U32(v) ((uint32_t)(v)) +#define U64(v) ((uint64_t)(v)) +#define h_ar_p(h) (!h_ht_p(h)) +#define h_ar_on(h) h_ht_off(h) +#define lesser(a, b) ((a) < (b) ? (a) : (b)) + +static uint32_t ib_upper_bound_for(uint32_t capa); +static uint32_t ib_bit_to_capa(uint32_t bit); +static void ht_init( + mrb_state *mrb, struct RHash *h, uint32_t size, + hash_entry *ea, uint32_t ea_capa, hash_table *ht, uint32_t ib_bit); +static void ht_set_without_ib_adjustment( + mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value val); + +static uint32_t +next_power2(uint32_t v) { - enum mrb_vtype tt = mrb_type(key); - mrb_value hv; - size_t h; - segindex *index = t->index; - size_t capa = index ? index->capa : 0; + mrb_assert(v != 0); +#ifdef __GNUC__ + return U32(1) << ((sizeof(unsigned) * CHAR_BIT) - __builtin_clz(v)); +#else + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + ++v; + return v; +#endif +} +static uint32_t +obj_hash_code(mrb_state *mrb, mrb_value key, struct RHash *h) +{ + enum mrb_vtype tt = mrb_type(key); + uint32_t hash_code; + mrb_value hash_code_obj; switch (tt) { case MRB_TT_STRING: - h = mrb_str_hash(mrb, key); + hash_code = mrb_str_hash(mrb, key); break; - case MRB_TT_TRUE: case MRB_TT_FALSE: case MRB_TT_SYMBOL: @@ -68,24 +298,24 @@ ht_hash_func(mrb_state *mrb, htable *t, mrb_value key) #ifndef MRB_NO_FLOAT case MRB_TT_FLOAT: #endif - h = (size_t)mrb_obj_id(key); + hash_code = U32(mrb_obj_id(key)); break; - default: - hv = mrb_funcall_id(mrb, key, MRB_SYM(hash), 0); - h = (size_t)tt ^ (size_t)mrb_integer(hv); + h_check_modified(mrb, h, { + hash_code_obj = mrb_funcall_argv(mrb, key, MRB_SYM(hash), 0, NULL); + }); + + hash_code = U32(tt) ^ U32(mrb_integer(hash_code_obj)); break; } - if (index && (index != t->index || capa != index->capa)) { - mrb_raise(mrb, E_RUNTIME_ERROR, "hash modified"); - } - return ((h)^((h)<<2)^((h)>>2)); + return hash_code ^ (hash_code << 2) ^ (hash_code >> 2); } -static inline mrb_bool -ht_hash_equal(mrb_state *mrb, htable *t, mrb_value a, mrb_value b) +static mrb_bool +obj_eql(mrb_state *mrb, mrb_value a, mrb_value b, struct RHash *h) { enum mrb_vtype tt = mrb_type(a); + mrb_bool eql; switch (tt) { case MRB_TT_STRING: @@ -106,603 +336,821 @@ ht_hash_equal(mrb_state *mrb, htable *t, mrb_value a, mrb_value b) #endif default: - { - segindex *index = t->index; - size_t capa = index ? index->capa : 0; - mrb_bool eql = mrb_eql(mrb, a, b); - if (index && (index != t->index || capa != index->capa)) { - mrb_raise(mrb, E_RUNTIME_ERROR, "hash modified"); - } - return eql; - } - } + h_check_modified(mrb, h, {eql = mrb_eql(mrb, a, b);}); + return eql; + } } -/* Creates the hash table. */ -static htable* -ht_new(mrb_state *mrb) +static mrb_bool +entry_deleted_p(const hash_entry* entry) { - htable *t; + return mrb_undef_p(entry->key); +} - t = (htable*)mrb_malloc(mrb, sizeof(htable)); - t->size = 0; - t->rootseg = NULL; - t->lastseg = NULL; - t->last_len = 0; - t->index = NULL; +static void +entry_delete(hash_entry* entry) +{ + entry->key = mrb_undef_value(); +} - return t; +static uint32_t +ea_next_capa_for(uint32_t size, uint32_t max_capa) +{ + if (size < AR_DEFAULT_CAPA) { + return AR_DEFAULT_CAPA; + } + else { + uint64_t capa = U64(size) * EA_INCREASE_RATIO, inc = capa - size; + if (EA_MAX_INCREASE < inc) capa = size + EA_MAX_INCREASE; + return capa <= max_capa ? U32(capa) : max_capa; + } } -#define power2(v) do { \ - v--;\ - v |= v >> 1;\ - v |= v >> 2;\ - v |= v >> 4;\ - v |= v >> 8;\ - v |= v >> 16;\ - v++;\ -} while (0) +static hash_entry* +ea_resize(mrb_state *mrb, hash_entry *ea, uint32_t capa) +{ + return (hash_entry*)mrb_realloc(mrb, ea, sizeof(hash_entry) * capa); +} -#ifndef UPPER_BOUND -#define UPPER_BOUND(x) ((x)>>2|(x)>>1) -#endif +static void +ea_compress(hash_entry *ea, uint32_t n_used) +{ + hash_entry *w_entry = ea; + ea_each_used(ea, n_used, r_entry, { + if (entry_deleted_p(r_entry)) continue; + if (r_entry != w_entry) *w_entry = *r_entry; + ++w_entry; + }); +} + +/* + * Increase or decrease capacity of `ea` to a standard size that can + * accommodate `*capap + 1` entries (but, not exceed `max_capa`). Set the + * changed capacity to `*capap` and return a pointer to `mrb_realloc`ed EA. + */ +static hash_entry* +ea_adjust(mrb_state *mrb, hash_entry *ea, uint32_t *capap, uint32_t max_capa) +{ + *capap = ea_next_capa_for(*capap, max_capa); + return ea_resize(mrb, ea, *capap); +} + +static hash_entry* +ea_dup(mrb_state *mrb, const hash_entry *ea, uint32_t capa) +{ + size_t byte_size = sizeof(hash_entry) * capa; + hash_entry *new_ea = (hash_entry*)mrb_malloc(mrb, byte_size); + return (hash_entry*)memcpy(new_ea, ea, byte_size); +} -#define HT_MASK(index) ((index->capa)-1) +static hash_entry* +ea_get_by_key(mrb_state *mrb, hash_entry *ea, uint32_t size, mrb_value key, + struct RHash *h) +{ + ea_each(ea, size, entry, { + if (obj_eql(mrb, key, entry->key, h)) return entry; + }); + return NULL; +} + +static hash_entry* +ea_get(hash_entry *ea, uint32_t index) +{ + return &ea[index]; +} -/* Build index for the hash table */ static void -ht_index(mrb_state *mrb, htable *t) +ea_set(hash_entry *ea, uint32_t index, mrb_value key, mrb_value val) { - size_t size = (size_t)t->size; - size_t mask; - segindex *index = t->index; - segment *seg; - size_t i; + ea[index].key = key; + ea[index].val = val; +} - if (size == 0) { - t->index = NULL; - mrb_free(mrb, index); - return; - } - /* allocate index table */ - if (index && index->size >= UPPER_BOUND(index->capa)) { - size = index->capa+1; - } - power2(size); - if (!index || index->capa < size) { - index = (segindex*)mrb_realloc_simple(mrb, index, sizeof(segindex)+sizeof(struct segkv*)*size); - if (index == NULL) { - mrb_free(mrb, t->index); - t->index = NULL; - return; - } - t->index = index; - } - index->size = t->size; - index->capa = size; - for (i=0; itable[i] = NULL; - } +static void +ar_init(struct RHash *h, uint32_t size, + hash_entry *ea, uint32_t ea_capa, uint32_t ea_n_used) +{ + h_ar_on(h); + ar_set_size(h, size); + ar_set_ea(h, ea); + ar_set_ea_capa(h, ea_capa); + ar_set_ea_n_used(h, ea_n_used); +} - /* rebuild index */ - mask = HT_MASK(index); - seg = t->rootseg; - while (seg) { - for (i=0; isize; i++) { - mrb_value key; - size_t k, step = 0; +static void +ar_free(mrb_state *mrb, struct RHash *h) +{ + mrb_free(mrb, ar_ea(h)); +} - if (!seg->next && i >= (size_t)t->last_len) { - return; - } - key = seg->e[i].key; - if (mrb_undef_p(key)) continue; - k = ht_hash_func(mrb, t, key) & mask; - while (index->table[k]) { - k = (k+(++step)) & mask; - } - index->table[k] = &seg->e[i]; - } - seg = seg->next; - } +static void +ar_adjust_ea(mrb_state *mrb, struct RHash *h, uint32_t size, uint32_t max_ea_capa) +{ + uint32_t ea_capa = size; + hash_entry *ea = ea_adjust(mrb, ar_ea(h), &ea_capa, max_ea_capa); + ar_set_ea(h, ea); + ar_set_ea_capa(h, ea_capa); } -/* Compacts the hash table removing deleted entries. */ static void -ht_compact(mrb_state *mrb, htable *t) +ar_compress(mrb_state *mrb, struct RHash *h) { - segment *seg; - uint16_t i, i2; - segment *seg2 = NULL; - mrb_int size = 0; + uint32_t size = ar_size(h); + ea_compress(ar_ea(h), ar_ea_n_used(h)); + ar_set_ea_n_used(h, size); + ar_adjust_ea(mrb, h, size, lesser(ar_ea_capa(h), AR_MAX_SIZE)); +} - if (t == NULL) return; - seg = t->rootseg; - if (t->index && (size_t)t->size == t->index->size) { - ht_index(mrb, t); - return; - } - while (seg) { - for (i=0; isize; i++) { - mrb_value k = seg->e[i].key; +static mrb_bool +ar_get(mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value *valp) +{ + ea_each(ar_ea(h), ar_size(h), entry, { + if (!obj_eql(mrb, key, entry->key, h)) continue; + *valp = entry->val; + return TRUE; + }); + return FALSE; +} - if (!seg->next && i >= t->last_len) { - goto exit; - } - if (mrb_undef_p(k)) { /* found deleted key */ - if (seg2 == NULL) { - seg2 = seg; - i2 = i; +static void +ar_set(mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value val) +{ + uint32_t size = ar_size(h); + hash_entry *entry; + if ((entry = ea_get_by_key(mrb, ar_ea(h), size, key, h))) { + entry->val = val; + } + else { + uint32_t ea_capa = ar_ea_capa(h), ea_n_used = ar_ea_n_used(h); + if (ea_capa == ea_n_used) { + if (size == ea_n_used) { + if (size == AR_MAX_SIZE) { + hash_entry *ea = ea_adjust(mrb, ar_ea(h), &ea_capa, EA_MAX_CAPA); + ea_set(ea, ea_n_used, key, val); + ht_init(mrb, h, ++size, ea, ea_capa, NULL, IB_INIT_BIT); + return; + } + else { + ar_adjust_ea(mrb, h, size, AR_MAX_SIZE); } } else { - size++; - if (seg2 != NULL) { - seg2->e[i2++] = seg->e[i]; - if (i2 >= seg2->size) { - seg2 = seg2->next; - i2 = 0; - } - } + ar_compress(mrb, h); + ea_n_used = size; } } - seg = seg->next; + ea_set(ar_ea(h), ea_n_used, key, val); + ar_set_size(h, ++size); + ar_set_ea_n_used(h, ++ea_n_used); } - exit: - /* reached at end */ - t->size = size; - if (seg2) { - seg = seg2->next; - seg2->next = NULL; - t->last_len = i2; - t->lastseg = seg2; - while (seg) { - seg2 = seg->next; - mrb_free(mrb, seg); - seg = seg2; +} + +static mrb_bool +ar_delete(mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value *valp) +{ + hash_entry *entry = ea_get_by_key(mrb, ar_ea(h), ar_size(h), key, h); + if (!entry) return FALSE; + *valp = entry->val; + entry_delete(entry); + ar_dec_size(h); + return TRUE; +} + +static void +ar_shift(mrb_state *mrb, struct RHash *h, mrb_value *keyp, mrb_value *valp) +{ + uint32_t size = ar_size(h); + ea_each(ar_ea(h), size, entry, { + *keyp = entry->key; + *valp = entry->val; + entry_delete(entry); + ar_set_size(h, --size); + return; + }); +} + +static void +ar_rehash(mrb_state *mrb, struct RHash *h) +{ + /* see comments in `h_rehash` */ + uint32_t size = ar_size(h), w_size = 0, ea_capa = ar_ea_capa(h); + hash_entry *ea = ar_ea(h), *w_entry; + ea_each(ea, size, r_entry, { + if ((w_entry = ea_get_by_key(mrb, ea, w_size, r_entry->key, h))) { + w_entry->val = r_entry->val; + ar_set_size(h, --size); + entry_delete(r_entry); } - } - if (t->index) { - ht_index(mrb, t); - } + else { + if (w_size != U32(r_entry - ea)) { + ea_set(ea, w_size, r_entry->key, r_entry->val); + entry_delete(r_entry); + } + ++w_size; + } + }); + mrb_assert(size == w_size); + ar_set_ea_n_used(h, size); + ar_adjust_ea(mrb, h, size, ea_capa); } -static segment* -segment_alloc(mrb_state *mrb, segment *seg) +static uint32_t +ib_it_pos_for(index_buckets_iter *it, uint32_t v) { - uint32_t size; + return v & it->mask; +} - if (!seg) size = MRB_HT_INIT_SIZE; - else { - size = seg->size*HT_SEG_INCREASE_RATIO + 1; - if (size > UINT16_MAX) size = UINT16_MAX; - } +static uint32_t +ib_it_empty_value(const index_buckets_iter *it) +{ + return it->mask; +} - seg = (segment*)mrb_malloc(mrb, sizeof(segment)+sizeof(struct segkv)*size); - seg->size = size; - seg->next = NULL; +static uint32_t +ib_it_deleted_value(const index_buckets_iter *it) +{ + return it->mask - 1; +} - return seg; +static mrb_bool +ib_it_empty_p(const index_buckets_iter *it) +{ + return it->ea_index == ib_it_empty_value(it); +} + +static mrb_bool +ib_it_deleted_p(const index_buckets_iter *it) +{ + return it->ea_index == ib_it_deleted_value(it); +} + +static mrb_bool +ib_it_active_p(const index_buckets_iter *it) +{ + return it->ea_index < ib_it_deleted_value(it); } -/* Set the value for the key in the indexed table. */ static void -ht_index_put(mrb_state *mrb, htable *t, mrb_value key, mrb_value val) +ib_it_init(mrb_state *mrb, index_buckets_iter *it, struct RHash *h, mrb_value key) { - segindex *index = t->index; - size_t k, sp, step = 0, mask; - segment *seg; + it->h = h; + it->bit = ib_bit(h); + it->mask = ib_bit_to_capa(it->bit) - 1; + it->pos = ib_it_pos_for(it, obj_hash_code(mrb, key, h)); + it->step = 0; +} - if (index->size >= UPPER_BOUND(index->capa)) { - /* need to expand table */ - ht_compact(mrb, t); - index = t->index; - } - mask = HT_MASK(index); - sp = index->capa; - k = ht_hash_func(mrb, t, key) & mask; - while (index->table[k]) { - mrb_value key2 = index->table[k]->key; - if (mrb_undef_p(key2)) { - if (sp == index->capa) sp = k; - } - else if (ht_hash_equal(mrb, t, key, key2)) { - index->table[k]->val = val; - return; - } - k = (k+(++step)) & mask; +static void +ib_it_next(index_buckets_iter *it) +{ + /* + * [IB image] + * + * ary_index(1) --. + * \ .-- shift1(3) .-- shift2(29) + * pos(6) --. \ / / + * View | \ \ <-o-> <----------o----------> + * -------- +---------------------\----\--+-----------------------------+----- + * array | 0 `--. `-|--- o 1 | ... + * +---------+---------+-----+\--+-----+---------+---------+---+----- + * buckets | 0 | 1 | ... | o 6 | 7 | 8 | ... + * +---------+---------+-----+=========+---------+---------+--------- + * bit set |1 1 1 0 0|0 0 0 1 1| ... |0 1 0 1 1|0 1 1 1 0|0 1 0 1 0| ... + * +---------+---------+-----+========*+---------+---------+--------- + * <---o---> \ + * \ `-- bit_pos(34) + * `-- bit(5) + */ + uint64_t bit_pos; + bit_pos = U64(it->bit) * (it->pos + 1) - 1; + it->ary_index = U32(bit_pos / IB_TYPE_BIT); + it->shift2 = U32((U64(it->ary_index) + 1) * IB_TYPE_BIT - bit_pos - 1); + it->ea_index = (ht_ib(it->h)[it->ary_index] >> it->shift2) & it->mask; + if (IB_TYPE_BIT - it->bit < it->shift2) { + it->shift1 = IB_TYPE_BIT - it->shift2; + it->ea_index |= (ht_ib(it->h)[it->ary_index - 1] << it->shift1) & it->mask; } - if (sp < index->capa) { - k = sp; + else { + it->shift1 = 0; } + it->pos = ib_it_pos_for(it, it->pos + (++it->step)); +} - /* put the value at the last */ - seg = t->lastseg; - if (t->last_len < seg->size) { - index->table[k] = &seg->e[t->last_len++]; - } - else { /* append a new segment */ - seg->next = segment_alloc(mrb, seg); - seg = seg->next; - seg->next = NULL; - t->lastseg = seg; - t->last_len = 1; - index->table[k] = &seg->e[0]; +static uint32_t +ib_it_get(const index_buckets_iter *it) +{ + return it->ea_index; +} + +static void +ib_it_set(index_buckets_iter *it, uint32_t ea_index) +{ + uint32_t mask, i; + it->ea_index = ea_index; + if (it->shift1) { + i = it->ary_index - 1; + mask = it->mask >> it->shift1; + ht_ib(it->h)[i] = (ht_ib(it->h)[i] & ~mask) | (ea_index >> it->shift1); } - index->table[k]->key = key; - index->table[k]->val = val; - index->size++; - t->size++; + i = it->ary_index; + mask = it->mask << it->shift2; + ht_ib(it->h)[i] = (ht_ib(it->h)[i] & ~mask) | (ea_index << it->shift2); } -/* Set the value for the key in the hash table. */ static void -ht_put(mrb_state *mrb, htable *t, mrb_value key, mrb_value val) +ib_it_delete(index_buckets_iter *it) { - segment *seg; - mrb_int i, deleted = 0; + ib_it_set(it, ib_it_deleted_value(it)); +} - if (t == NULL) return; - if (t->index) { - ht_index_put(mrb, t, key, val); - return; - } - seg = t->rootseg; - while (seg) { - for (i=0; isize; i++) { - mrb_value k = seg->e[i].key; - /* Found room in last segment after last_len */ - if (!seg->next && i >= t->last_len) { - seg->e[i].key = key; - seg->e[i].val = val; - t->last_len = (uint16_t)i+1; - t->size++; - return; - } - if (mrb_undef_p(k)) { - deleted++; - continue; - } - if (ht_hash_equal(mrb, t, key, k)) { - seg->e[i].val = val; - return; - } - } - seg = seg->next; - } - /* Not found */ +static hash_entry* +ib_it_entry(index_buckets_iter *it) +{ + return ea_get(ht_ea(it->h), it->ea_index); +} - /* Compact if last segment has room */ - if (deleted > 0 && deleted > MRB_HT_INIT_SIZE) { - ht_compact(mrb, t); - } - t->size++; +static uint32_t +ib_capa_to_bit(uint32_t capa) +{ +#ifdef __GNUC__ + return U32(__builtin_ctz(capa)); +#else + /* http://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn */ + static const uint32_t MultiplyDeBruijnBitPosition2[] = { + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 + }; + return MultiplyDeBruijnBitPosition2[U32(capa * 0x077CB531U) >> 27]; +#endif +} - /* check if thre's room after compaction */ - if (t->lastseg && t->last_len < t->lastseg->size) { - seg = t->lastseg; - i = t->last_len; - } - else { - /* append new segment */ - seg = segment_alloc(mrb, t->lastseg); - i = 0; - if (t->rootseg == NULL) { - t->rootseg = seg; - } - else { - t->lastseg->next = seg; - } - t->lastseg = seg; - } - seg->e[i].key = key; - seg->e[i].val = val; - t->last_len = (uint16_t)i+1; - if (t->index == NULL && t->size > MRB_HT_INIT_SIZE*4) { - ht_index(mrb, t); - } +static uint32_t +ib_bit_to_capa(uint32_t bit) +{ + return U32(1) << bit; } -/* Get a value for a key from the indexed table. */ -static mrb_bool -ht_index_get(mrb_state *mrb, htable *t, mrb_value key, mrb_value *vp) -{ - segindex *index = t->index; - size_t mask = HT_MASK(index); - size_t k = ht_hash_func(mrb, t, key) & mask; - size_t step = 0; - - while (index->table[k]) { - mrb_value key2 = index->table[k]->key; - if (!mrb_undef_p(key2) && ht_hash_equal(mrb, t, key, key2)) { - if (vp) *vp = index->table[k]->val; - return TRUE; - } - k = (k+(++step)) & mask; - } - return FALSE; +static uint32_t +ib_upper_bound_for(uint32_t capa) +{ + return (capa >> 2) | (capa >> 1); /* 3/4 */ } -/* Get a value for a key from the hash table. */ -static mrb_bool -ht_get(mrb_state *mrb, htable *t, mrb_value key, mrb_value *vp) +static uint32_t +ib_bit_for(uint32_t size) { - segment *seg; - mrb_int i; + uint32_t capa = next_power2(size); + if (capa != IB_MAX_CAPA && ib_upper_bound_for(capa) < size) capa *= 2; + return ib_capa_to_bit(capa); +} - if (t == NULL) return FALSE; - if (t->index) { - return ht_index_get(mrb, t, key, vp); - } +static uint32_t +ib_byte_size_for(uint32_t ib_bit) +{ + uint64_t ary_size = (U64(1) << ib_bit) * ib_bit / IB_TYPE_BIT; + return U32(sizeof(uint32_t) * ary_size); +} - seg = t->rootseg; - while (seg) { - for (i=0; isize; i++) { - mrb_value k = seg->e[i].key; +static void +ib_init(mrb_state *mrb, struct RHash *h, uint32_t ib_bit, size_t ib_byte_size) +{ + hash_entry *ea = ht_ea(h); + memset(ht_ib(h), 0xff, ib_byte_size); + ib_set_bit(h, ib_bit); + ea_each_used(ea, ht_ea_n_used(h), entry, { + ib_cycle_by_key(mrb, h, entry->key, it, { + if (!ib_it_empty_p(it)) continue; + ib_it_set(it, U32(entry - ea)); + break; + }); + }); +} - if (!seg->next && i >= t->last_len) { - return FALSE; - } - if (mrb_undef_p(k)) continue; - if (ht_hash_equal(mrb, t, key, k)) { - if (vp) *vp = seg->e[i].val; - return TRUE; - } - } - seg = seg->next; - } - return FALSE; +static void +ht_init(mrb_state *mrb, struct RHash *h, uint32_t size, + hash_entry *ea, uint32_t ea_capa, hash_table *ht, uint32_t ib_bit) +{ + size_t ib_byte_size = ib_byte_size_for(ib_bit); + size_t ht_byte_size = sizeof(hash_table) + ib_byte_size; + h_ht_on(h); + h_set_ht(h, (hash_table*)mrb_realloc(mrb, ht, ht_byte_size)); + ht_set_size(h, size); + ht_set_ea(h, ea); + ht_set_ea_capa(h, ea_capa); + ht_set_ea_n_used(h, size); + ib_init(mrb, h, ib_bit, ib_byte_size); +} + +static void +ht_free(mrb_state *mrb, struct RHash *h) +{ + mrb_free(mrb, ht_ea(h)); + mrb_free(mrb, h_ht(h)); +} + +static hash_table* +ht_dup(mrb_state *mrb, const struct RHash *h) +{ + size_t ib_byte_size = ib_byte_size_for(ib_bit(h)); + size_t ht_byte_size = sizeof(hash_table) + ib_byte_size; + hash_table *new_ht = (hash_table*)mrb_malloc(mrb, ht_byte_size); + return (hash_table*)memcpy(new_ht, h_ht(h), ht_byte_size); +} + +static void +ht_adjust_ea(mrb_state *mrb, struct RHash *h, uint32_t size, uint32_t max_ea_capa) +{ + uint32_t ea_capa = size; + hash_entry *ea = ea_adjust(mrb, ht_ea(h), &ea_capa, max_ea_capa); + ht_set_ea(h, ea); + ht_set_ea_capa(h, ea_capa); +} + +static void +ht_to_ar(mrb_state *mrb, struct RHash *h) +{ + uint32_t size = ht_size(h), ea_capa = size; + hash_entry *ea = ht_ea(h); + ea_compress(ea, ht_ea_n_used(h)); + ea = ea_adjust(mrb, ea, &ea_capa, AR_MAX_SIZE); + mrb_free(mrb, h_ht(h)); + ar_init(h, size, ea, ea_capa, size); } -/* Deletes the value for the symbol from the hash table. */ -/* Deletion is done by overwriting keys by `undef`. */ static mrb_bool -ht_del(mrb_state *mrb, htable *t, mrb_value key, mrb_value *vp) +ht_get(mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value *valp) { - segment *seg; - mrb_int i; + ib_find_by_key(mrb, h, key, it, { + *valp = ib_it_entry(it)->val; + return TRUE; + }); + return FALSE; +} - if (t == NULL) return FALSE; - seg = t->rootseg; - while (seg) { - for (i=0; isize; i++) { - mrb_value key2; +static void +ht_set_as_ar(mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value val) +{ + ht_to_ar(mrb, h); + ar_set(mrb, h, key, val); +} - if (!seg->next && i >= t->last_len) { - /* not found */ - return FALSE; - } - key2 = seg->e[i].key; - if (!mrb_undef_p(key2) && ht_hash_equal(mrb, t, key, key2)) { - if (vp) *vp = seg->e[i].val; - seg->e[i].key = mrb_undef_value(); - t->size--; - return TRUE; +static void +ht_set_without_ib_adjustment(mrb_state *mrb, struct RHash *h, + mrb_value key, mrb_value val) +{ + mrb_assert(ht_size(h) < ib_bit_to_capa(ib_bit(h))); + ib_cycle_by_key(mrb, h, key, it, { + if (ib_it_active_p(it)) { + if (!obj_eql(mrb, key, ib_it_entry(it)->key, h)) continue; + ib_it_entry(it)->val = val; + } + else { + uint32_t ea_n_used = ht_ea_n_used(h); + if (ea_n_used == H_MAX_SIZE) { + mrb_assert(ht_size(h) == ea_n_used); + mrb_raise(mrb, E_ARGUMENT_ERROR, "hash too big"); } + if (ea_n_used == ht_ea_capa(h)) ht_adjust_ea(mrb, h, ea_n_used, EA_MAX_CAPA); + ib_it_set(it, ea_n_used); + ea_set(ht_ea(h), ea_n_used, key, val); + ht_inc_size(h); + ht_set_ea_n_used(h, ++ea_n_used); } - seg = seg->next; - } - return FALSE; + return; + }); } -/* Iterates over the hash table. */ static void -ht_foreach(mrb_state *mrb, htable *t, mrb_hash_foreach_func *func, void *p) -{ - segment *seg; - mrb_int i; - - if (t == NULL) return; - seg = t->rootseg; - while (seg) { - for (i=0; isize; i++) { - /* no value in last segment after last_len */ - if (!seg->next && i >= t->last_len) { - return; - } - if (mrb_undef_p(seg->e[i].key)) continue; - if ((*func)(mrb, seg->e[i].key, seg->e[i].val, p) != 0) - return; +ht_set(mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value val) +{ + uint32_t size = ht_size(h); + uint32_t ib_bit_width = ib_bit(h), ib_capa = ib_bit_to_capa(ib_bit_width); + if (ib_upper_bound_for(ib_capa) <= size) { + if (size != ht_ea_n_used(h)) ea_compress(ht_ea(h), ht_ea_n_used(h)); + ht_init(mrb, h, size, ht_ea(h), ht_ea_capa(h), h_ht(h), ++ib_bit_width); + } + else if (ht_ea_capa(h) == ht_ea_n_used(h) && size != ht_ea_n_used(h)) { + if (size <= AR_MAX_SIZE) {ht_set_as_ar(mrb, h, key, val); return;} + if (ea_next_capa_for(size, EA_MAX_CAPA) <= ht_ea_capa(h)) { + ea_compress(ht_ea(h), ht_ea_n_used(h)); + ht_adjust_ea(mrb, h, size, ht_ea_capa(h)); + ht_init(mrb, h, size, ht_ea(h), ht_ea_capa(h), h_ht(h), ib_bit_width); } - seg = seg->next; } + ht_set_without_ib_adjustment(mrb, h, key, val); } -size_t -mrb_os_memsize_of_hash_table(mrb_value obj) +static mrb_bool +ht_delete(mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value *valp) { - struct htable *h = mrb_hash_ptr(obj)->ht; - size_t segkv_size = 0; + ib_find_by_key(mrb, h, key, it, { + hash_entry *entry = ib_it_entry(it); + *valp = entry->val; + ib_it_delete(it); + entry_delete(entry); + ht_dec_size(h); + return TRUE; + }); + return FALSE; +} - if (h->index) segkv_size = (sizeof(struct segkv) * h->index->capa); +static void +ht_shift(mrb_state *mrb, struct RHash *h, mrb_value *keyp, mrb_value *valp) +{ + hash_entry *ea = ht_ea(h); + ea_each(ea, ht_size(h), entry, { + ib_cycle_by_key(mrb, h, entry->key, it, { + if (ib_it_get(it) != U32(entry - ea)) continue; + *keyp = entry->key; + *valp = entry->val; + ib_it_delete(it); + entry_delete(entry); + ht_dec_size(h); + return; + }); + }); +} - return sizeof(htable) + - sizeof(segindex) + - (sizeof(segment) * h->size) + - segkv_size; +static void +ht_rehash(mrb_state *mrb, struct RHash *h) +{ + /* see comments in `h_rehash` */ + uint32_t size = ht_size(h), w_size = 0, ea_capa = ht_ea_capa(h); + hash_entry *ea = ht_ea(h); + ht_init(mrb, h, 0, ea, ea_capa, h_ht(h), ib_bit_for(size)); + ht_set_size(h, size); + ht_set_ea_n_used(h, ht_ea_n_used(h)); + ea_each(ea, size, r_entry, { + ib_cycle_by_key(mrb, h, r_entry->key, it, { + if (ib_it_active_p(it)) { + if (!obj_eql(mrb, r_entry->key, ib_it_entry(it)->key, h)) continue; + ib_it_entry(it)->val = r_entry->val; + ht_set_size(h, --size); + entry_delete(r_entry); + } + else { + if (w_size != U32(r_entry - ea)) { + ea_set(ea, w_size, r_entry->key, r_entry->val); + entry_delete(r_entry); + } + ib_it_set(it, w_size++); + } + break; + }); + }); + mrb_assert(size == w_size); + ht_set_ea_n_used(h, size); + size <= AR_MAX_SIZE ? ht_to_ar(mrb, h) : ht_adjust_ea(mrb, h, size, ea_capa); } -/* Iterates over the hash table. */ -MRB_API void -mrb_hash_foreach(mrb_state *mrb, struct RHash *hash, mrb_hash_foreach_func *func, void *p) +static mrb_value +h_key_for(mrb_state *mrb, mrb_value key) { - ht_foreach(mrb, hash->ht, func, p); + if (mrb_string_p(key) && !MRB_FROZEN_P(mrb_str_ptr(key))) { + key = mrb_str_dup(mrb, key); + MRB_SET_FROZEN_FLAG(mrb_str_ptr(key)); + } + return key; } -/* Copy the hash table. */ -static htable* -ht_copy(mrb_state *mrb, htable *t) +static struct RHash* +h_alloc(mrb_state *mrb) { - segment *seg; - htable *t2; - mrb_int i; + return (struct RHash*)mrb_obj_alloc(mrb, MRB_TT_HASH, mrb->hash_class); +} - seg = t->rootseg; - t2 = ht_new(mrb); - if (t->size == 0) return t2; +static void +h_init(struct RHash *h) +{ + ar_init(h, 0, NULL, 0, 0); +} - while (seg) { - for (i=0; isize; i++) { - mrb_value key = seg->e[i].key; - mrb_value val = seg->e[i].val; +static void +h_free_table(mrb_state *mrb, struct RHash *h) +{ + (h_ar_p(h) ? ar_free : ht_free)(mrb, h); +} - if ((seg->next == NULL) && (i >= t->last_len)) { - return t2; - } - if (mrb_undef_p(key)) continue; /* skip deleted key */ - ht_put(mrb, t2, key, val); - } - seg = seg->next; - } - return t2; +static void +h_clear(mrb_state *mrb, struct RHash *h) +{ + h_free_table(mrb, h); + h_init(h); +} + +static mrb_bool +h_get(mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value *valp) +{ + return (h_ar_p(h) ? ar_get : ht_get)(mrb, h, key, valp); } -/* Free memory of the hash table. */ static void -ht_free(mrb_state *mrb, htable *t) +h_set(mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value val) { - segment *seg; + (h_ar_p(h) ? ar_set : ht_set)(mrb, h, key, val); +} - if (!t) return; - seg = t->rootseg; - while (seg) { - segment *p = seg; - seg = seg->next; - mrb_free(mrb, p); - } - if (t->index) mrb_free(mrb, t->index); - mrb_free(mrb, t); +static mrb_bool +h_delete(mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value *valp) +{ + return (h_ar_p(h) ? ar_delete : ht_delete)(mrb, h, key, valp); } -static void hash_modify(mrb_state *mrb, mrb_value hash); +/* find first element in the table, and remove it. */ +static void +h_shift(mrb_state *mrb, struct RHash *h, mrb_value *keyp, mrb_value *valp) +{ + (h_ar_p(h) ? ar_shift : ht_shift)(mrb, h, keyp, valp); +} -static inline mrb_value -ht_key(mrb_state *mrb, mrb_value key) +static void +h_rehash(mrb_state *mrb, struct RHash *h) { - if (mrb_string_p(key) && !mrb_frozen_p(mrb_str_ptr(key))) { - key = mrb_str_dup(mrb, key); - MRB_SET_FROZEN_FLAG(mrb_str_ptr(key)); + /* + * ==== Comments common to `ar_rehash` and `ht_rehash` + * + * - Because reindex (such as elimination of duplicate keys) must be + * guaranteed, it is necessary to set one by one. + * + * - To prevent EA from breaking if an exception occurs in the middle, + * delete the slot before moving when moving the entry, and update size + * at any time when overwriting. + */ + (h_size(h) == 0 ? h_clear : h_ar_p(h) ? ar_rehash : ht_rehash)(mrb, h); +} + +static void +h_replace(mrb_state *mrb, struct RHash *h, struct RHash *orig_h) +{ + uint32_t size = h_size(orig_h); + if (size == 0) { + h_clear(mrb, h); + } + else if (h_ar_p(orig_h)) { + uint32_t ea_capa = ar_ea_capa(orig_h); + hash_entry *ea = ea_dup(mrb, ar_ea(orig_h), ea_capa); + h_free_table(mrb, h); + ar_init(h, size, ea, ea_capa, ar_ea_n_used(orig_h)); + } + else { /* HT */ + uint32_t ea_capa = ht_ea_capa(orig_h); + hash_entry *ea = ea_dup(mrb, ht_ea(orig_h), ea_capa); + hash_table *ht = ht_dup(mrb, orig_h); + h_free_table(mrb, h); + h_ht_on(h); + h_set_ht(h, ht); + ht_set_size(h, size); + ht_set_ea(h, ea); +#ifdef MRB_64BIT + ht_set_ea_capa(h, ea_capa); + ht_set_ea_n_used(h, ht_ea_n_used(orig_h)); +#endif + ib_set_bit(h, ib_bit(orig_h)); } - return key; } -#define KEY(key) ht_key(mrb, key) +void +mrb_gc_mark_hash(mrb_state *mrb, struct RHash *h) +{ + h_each(h, entry, { + mrb_gc_mark_value(mrb, entry->key); + mrb_gc_mark_value(mrb, entry->val); + }); +} -static int -hash_mark_i(mrb_state *mrb, mrb_value key, mrb_value val, void *p) +size_t +mrb_gc_mark_hash_size(mrb_state *mrb, struct RHash *h) { - mrb_gc_mark_value(mrb, key); - mrb_gc_mark_value(mrb, val); - return 0; + return h_size(h) * 2; } void -mrb_gc_mark_hash(mrb_state *mrb, struct RHash *hash) +mrb_gc_free_hash(mrb_state *mrb, struct RHash *h) { - ht_foreach(mrb, hash->ht, hash_mark_i, NULL); + h_free_table(mrb, h); } size_t -mrb_gc_mark_hash_size(mrb_state *mrb, struct RHash *hash) +mrb_os_memsize_of_hash_table(mrb_value self) { - if (!hash->ht) return 0; - return hash->ht->size*2; + struct RHash *h = mrb_hash_ptr(self); + return h_ar_p(h) ? (ar_ea_capa(h) * sizeof(hash_entry)) : + (ht_ea_capa(h) * sizeof(hash_entry) + + sizeof(hash_table) + + ib_byte_size_for(ib_bit(h))); } -void -mrb_gc_free_hash(mrb_state *mrb, struct RHash *hash) +/* Iterates over the key/value pairs. */ +MRB_API void +mrb_hash_foreach(mrb_state *mrb, struct RHash *h, mrb_hash_foreach_func *func, void *data) { - ht_free(mrb, hash->ht); + h_each(h, entry, { + if (func(mrb, entry->key, entry->val, data) != 0) return; + }); } MRB_API mrb_value mrb_hash_new(mrb_state *mrb) { - struct RHash *h; - - h = (struct RHash*)mrb_obj_alloc(mrb, MRB_TT_HASH, mrb->hash_class); - h->ht = 0; - h->iv = 0; + struct RHash *h = h_alloc(mrb); return mrb_obj_value(h); } +/* + * Set the capacity of EA and IB to minimum capacity (and appropriate load + * factor) that does not cause expansion when inserting `capa` elements. + */ MRB_API mrb_value mrb_hash_new_capa(mrb_state *mrb, mrb_int capa) { - struct RHash *h; - - h = (struct RHash*)mrb_obj_alloc(mrb, MRB_TT_HASH, mrb->hash_class); - /* preallocate hash table */ - h->ht = ht_new(mrb); - /* capacity ignored */ - h->iv = 0; - return mrb_obj_value(h); + if (capa < 0 || EA_MAX_CAPA < capa) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "hash too big"); + return mrb_nil_value(); /* not reached */ + } + else if (capa == 0) { + return mrb_hash_new(mrb); + } + else { + uint32_t size = U32(capa); + struct RHash *h = h_alloc(mrb); + hash_entry *ea = ea_resize(mrb, NULL, size); + if (size <= AR_MAX_SIZE) { + ar_init(h, 0, ea, size, 0); + } + else { + ht_init(mrb, h, 0, ea, size, NULL, ib_bit_for(size)); + } + return mrb_obj_value(h); + } } static mrb_value mrb_hash_default(mrb_state *mrb, mrb_value hash); -static mrb_value hash_default(mrb_state *mrb, mrb_value hash, mrb_value key); -static mrb_value -mrb_hash_init_copy(mrb_state *mrb, mrb_value self) +static void +hash_modify(mrb_state *mrb, mrb_value hash) { - mrb_value orig = mrb_get_arg1(mrb); - struct RHash* copy; - htable *orig_h; - mrb_value ifnone, vret; + mrb_check_frozen(mrb, mrb_hash_ptr(hash)); +} - if (mrb_obj_equal(mrb, self, orig)) return self; - if ((mrb_type(self) != mrb_type(orig)) || (mrb_obj_class(mrb, self) != mrb_obj_class(mrb, orig))) { - mrb_raise(mrb, E_TYPE_ERROR, "initialize_copy should take same class object"); +static mrb_value +hash_default(mrb_state *mrb, mrb_value hash, mrb_value key) +{ + if (MRB_RHASH_DEFAULT_P(hash)) { + if (MRB_RHASH_PROCDEFAULT_P(hash)) { + return mrb_funcall_id(mrb, RHASH_PROCDEFAULT(hash), MRB_SYM(call), 2, hash, key); + } + else { + return RHASH_IFNONE(hash); + } } + return mrb_nil_value(); +} - orig_h = RHASH_TBL(self); - copy = (struct RHash*)mrb_obj_alloc(mrb, MRB_TT_HASH, mrb->hash_class); - copy->ht = ht_copy(mrb, orig_h); - - if (MRB_RHASH_DEFAULT_P(self)) { - copy->flags |= MRB_HASH_DEFAULT; - } - if (MRB_RHASH_PROCDEFAULT_P(self)) { - copy->flags |= MRB_HASH_PROC_DEFAULT; +static void +hash_replace(mrb_state *mrb, mrb_value self, mrb_value orig) +{ + struct RHash *h = mrb_hash_ptr(self), *orig_h = mrb_hash_ptr(orig); + uint32_t mask = MRB_HASH_DEFAULT | MRB_HASH_PROC_DEFAULT; + mrb_sym name; + h_replace(mrb, h, orig_h); + name = MRB_SYM(ifnone); + if (orig_h->flags & MRB_HASH_DEFAULT) { + mrb_iv_set(mrb, self, name, mrb_iv_get(mrb, orig, name)); } - vret = mrb_obj_value(copy); - ifnone = RHASH_IFNONE(self); - if (!mrb_nil_p(ifnone)) { - mrb_iv_set(mrb, vret, MRB_SYM(ifnone), ifnone); + else { + mrb_iv_remove(mrb, self, name); } - return vret; + h->flags &= ~mask; + h->flags |= orig_h->flags & mask; } -static int -check_kdict_i(mrb_state *mrb, mrb_value key, mrb_value val, void *data) +static mrb_value +mrb_hash_init_copy(mrb_state *mrb, mrb_value self) { - if (!mrb_symbol_p(key)) { - mrb_raise(mrb, E_ARGUMENT_ERROR, "keyword argument hash with non symbol keys"); - } - return 0; + mrb_value orig; + mrb_get_args(mrb, "H", &orig); + hash_modify(mrb, self); + if (mrb_hash_ptr(self) != mrb_hash_ptr(orig)) hash_replace(mrb, self, orig); + return self; } void mrb_hash_check_kdict(mrb_state *mrb, mrb_value self) { - htable *t; - - t = RHASH_TBL(self); - if (!t || t->size == 0) return; - ht_foreach(mrb, t, check_kdict_i, NULL); + h_each(mrb_hash_ptr(self), entry, { + if (mrb_symbol_p(entry->key)) continue; + mrb_raise(mrb, E_ARGUMENT_ERROR, "keyword argument hash with non symbol keys"); + }); } MRB_API mrb_value mrb_hash_dup(mrb_state *mrb, mrb_value self) { - struct RHash* copy; - htable *orig_h; - - orig_h = RHASH_TBL(self); - copy = (struct RHash*)mrb_obj_alloc(mrb, MRB_TT_HASH, mrb->hash_class); - copy->ht = orig_h ? ht_copy(mrb, orig_h) : NULL; - return mrb_obj_value(copy); + struct RHash* copy_h = h_alloc(mrb); + mrb_value copy = mrb_obj_value(copy_h); + copy_h->c = mrb_hash_ptr(self)->c; + hash_replace(mrb, copy, self); + return copy; } MRB_API mrb_value @@ -711,7 +1159,7 @@ mrb_hash_get(mrb_state *mrb, mrb_value hash, mrb_value key) mrb_value val; mrb_sym mid; - if (ht_get(mrb, RHASH_TBL(hash), key, &val)) { + if (h_get(mrb, mrb_hash_ptr(hash), key, &val)) { return val; } @@ -728,7 +1176,7 @@ mrb_hash_fetch(mrb_state *mrb, mrb_value hash, mrb_value key, mrb_value def) { mrb_value val; - if (ht_get(mrb, RHASH_TBL(hash), key, &val)) { + if (h_get(mrb, mrb_hash_ptr(hash), key, &val)) { return val; } /* not found */ @@ -739,21 +1187,10 @@ MRB_API void mrb_hash_set(mrb_state *mrb, mrb_value hash, mrb_value key, mrb_value val) { hash_modify(mrb, hash); - - key = KEY(key); - ht_put(mrb, RHASH_TBL(hash), key, val); - mrb_field_write_barrier_value(mrb, (struct RBasic*)RHASH(hash), key); - mrb_field_write_barrier_value(mrb, (struct RBasic*)RHASH(hash), val); - return; -} - -static void -hash_modify(mrb_state *mrb, mrb_value hash) -{ - mrb_check_frozen(mrb, mrb_hash_ptr(hash)); - if (!RHASH_TBL(hash)) { - RHASH_TBL(hash) = ht_new(mrb); - } + key = h_key_for(mrb, key); + h_set(mrb, mrb_hash_ptr(hash), key, val); + mrb_field_write_barrier_value(mrb, mrb_basic_ptr(hash), key); + mrb_field_write_barrier_value(mrb, mrb_basic_ptr(hash), val); } /* 15.2.13.4.16 */ @@ -837,20 +1274,6 @@ mrb_hash_aget(mrb_state *mrb, mrb_value self) return mrb_hash_get(mrb, self, key); } -static mrb_value -hash_default(mrb_state *mrb, mrb_value hash, mrb_value key) -{ - if (MRB_RHASH_DEFAULT_P(hash)) { - if (MRB_RHASH_PROCDEFAULT_P(hash)) { - return mrb_funcall_id(mrb, RHASH_PROCDEFAULT(hash), MRB_SYM(call), 2, hash, key); - } - else { - return RHASH_IFNONE(hash); - } - } - return mrb_nil_value(); -} - /* 15.2.13.4.5 */ /* * call-seq: @@ -945,7 +1368,6 @@ mrb_hash_set_default(mrb_state *mrb, mrb_value hash) * a #=> [nil, nil, 4] */ - static mrb_value mrb_hash_default_proc(mrb_state *mrb, mrb_value hash) { @@ -990,10 +1412,10 @@ mrb_hash_set_default_proc(mrb_state *mrb, mrb_value hash) MRB_API mrb_value mrb_hash_delete_key(mrb_state *mrb, mrb_value hash, mrb_value key) { - htable *t = RHASH_TBL(hash); mrb_value del_val; - if (ht_del(mrb, t, key, &del_val)) { + hash_modify(mrb, hash); + if (h_delete(mrb, mrb_hash_ptr(hash), key, &del_val)) { return del_val; } @@ -1005,38 +1427,9 @@ static mrb_value mrb_hash_delete(mrb_state *mrb, mrb_value self) { mrb_value key = mrb_get_arg1(mrb); - - hash_modify(mrb, self); return mrb_hash_delete_key(mrb, self, key); } -/* find first element in the hash table, and remove it. */ -static void -ht_shift(mrb_state *mrb, htable *t, mrb_value *kp, mrb_value *vp) -{ - segment *seg = t->rootseg; - mrb_int i; - - while (seg) { - for (i=0; isize; i++) { - mrb_value key; - - if (!seg->next && i >= t->last_len) { - return; - } - key = seg->e[i].key; - if (mrb_undef_p(key)) continue; - *kp = key; - *vp = seg->e[i].val; - /* delete element */ - seg->e[i].key = mrb_undef_value(); - t->size--; - return; - } - seg = seg->next; - } -} - /* 15.2.13.4.24 */ /* * call-seq: @@ -1054,28 +1447,19 @@ ht_shift(mrb_state *mrb, htable *t, mrb_value *kp, mrb_value *vp) static mrb_value mrb_hash_shift(mrb_state *mrb, mrb_value hash) { - htable *t = RHASH_TBL(hash); + struct RHash *h = mrb_hash_ptr(hash); hash_modify(mrb, hash); - if (t && t->size > 0) { - mrb_value del_key = mrb_nil_value(); - mrb_value del_val = mrb_nil_value(); - - ht_shift(mrb, t, &del_key, &del_val); + if (h_size(h) == 0) { + return hash_default(mrb, hash, mrb_nil_value()); + } + else { + mrb_value del_key, del_val; + h_shift(mrb, h, &del_key, &del_val); mrb_gc_protect(mrb, del_key); mrb_gc_protect(mrb, del_val); return mrb_assoc_new(mrb, del_key, del_val); } - - if (MRB_RHASH_DEFAULT_P(hash)) { - if (MRB_RHASH_PROCDEFAULT_P(hash)) { - return mrb_funcall_id(mrb, RHASH_PROCDEFAULT(hash), MRB_SYM(call), 2, hash, mrb_nil_value()); - } - else { - return RHASH_IFNONE(hash); - } - } - return mrb_nil_value(); } /* 15.2.13.4.4 */ @@ -1093,13 +1477,8 @@ mrb_hash_shift(mrb_state *mrb, mrb_value hash) MRB_API mrb_value mrb_hash_clear(mrb_state *mrb, mrb_value hash) { - htable *t = RHASH_TBL(hash); - hash_modify(mrb, hash); - if (t) { - ht_free(mrb, t); - RHASH_TBL(hash) = NULL; - } + h_clear(mrb, mrb_hash_ptr(hash)); return hash; } @@ -1135,10 +1514,7 @@ mrb_hash_aset(mrb_state *mrb, mrb_value self) MRB_API mrb_int mrb_hash_size(mrb_state *mrb, mrb_value hash) { - htable *t = RHASH_TBL(hash); - - if (!t) return 0; - return t->size; + return (mrb_int)h_size(mrb_hash_ptr(hash)); } /* 15.2.13.4.20 */ @@ -1165,10 +1541,7 @@ mrb_hash_size_m(mrb_state *mrb, mrb_value self) MRB_API mrb_bool mrb_hash_empty_p(mrb_state *mrb, mrb_value self) { - htable *t = RHASH_TBL(self); - - if (!t) return TRUE; - return t->size == 0; + return h_size(mrb_hash_ptr(self)) == 0; } /* 15.2.13.4.12 */ @@ -1187,13 +1560,6 @@ mrb_hash_empty_m(mrb_state *mrb, mrb_value self) return mrb_bool_value(mrb_hash_empty_p(mrb, self)); } -static int -hash_keys_i(mrb_state *mrb, mrb_value key, mrb_value val, void *p) -{ - mrb_ary_push(mrb, *(mrb_value*)p, key); - return 0; -} - /* 15.2.13.4.19 */ /* * call-seq: @@ -1210,24 +1576,14 @@ hash_keys_i(mrb_state *mrb, mrb_value key, mrb_value val, void *p) MRB_API mrb_value mrb_hash_keys(mrb_state *mrb, mrb_value hash) { - htable *t = RHASH_TBL(hash); - mrb_int size; - mrb_value ary; - - if (!t || (size = t->size) == 0) - return mrb_ary_new(mrb); - ary = mrb_ary_new_capa(mrb, size); - ht_foreach(mrb, t, hash_keys_i, (void*)&ary); + struct RHash *h = mrb_hash_ptr(hash); + mrb_value ary = mrb_ary_new_capa(mrb, (mrb_int)h_size(h)); + h_each(h, entry, { + mrb_ary_push(mrb, ary, entry->key); + }); return ary; } -static int -hash_vals_i(mrb_state *mrb, mrb_value key, mrb_value val, void *p) -{ - mrb_ary_push(mrb, *(mrb_value*)p, val); - return 0; -} - /* 15.2.13.4.28 */ /* * call-seq: @@ -1244,14 +1600,11 @@ hash_vals_i(mrb_state *mrb, mrb_value key, mrb_value val, void *p) MRB_API mrb_value mrb_hash_values(mrb_state *mrb, mrb_value hash) { - htable *t = RHASH_TBL(hash); - mrb_int size; - mrb_value ary; - - if (!t || (size = t->size) == 0) - return mrb_ary_new(mrb); - ary = mrb_ary_new_capa(mrb, size); - ht_foreach(mrb, t, hash_vals_i, (void*)&ary); + struct RHash *h = mrb_hash_ptr(hash); + mrb_value ary = mrb_ary_new_capa(mrb, (mrb_int)h_size(h)); + h_each(h, entry, { + mrb_ary_push(mrb, ary, entry->val); + }); return ary; } @@ -1277,13 +1630,8 @@ mrb_hash_values(mrb_state *mrb, mrb_value hash) MRB_API mrb_bool mrb_hash_key_p(mrb_state *mrb, mrb_value hash, mrb_value key) { - htable *t; - - t = RHASH_TBL(hash); - if (ht_get(mrb, t, key, NULL)) { - return TRUE; - } - return FALSE; + mrb_value val; + return h_get(mrb, mrb_hash_ptr(hash), key, &val); } static mrb_value @@ -1296,23 +1644,6 @@ mrb_hash_has_key(mrb_state *mrb, mrb_value hash) return mrb_bool_value(key_p); } -struct has_v_arg { - mrb_bool found; - mrb_value val; -}; - -static int -hash_has_value_i(mrb_state *mrb, mrb_value key, mrb_value val, void *p) -{ - struct has_v_arg *arg = (struct has_v_arg*)p; - - if (mrb_equal(mrb, arg->val, val)) { - arg->found = TRUE; - return 1; - } - return 0; -} - /* 15.2.13.4.14 */ /* 15.2.13.4.27 */ /* @@ -1332,41 +1663,32 @@ static mrb_value mrb_hash_has_value(mrb_state *mrb, mrb_value hash) { mrb_value val = mrb_get_arg1(mrb); - struct has_v_arg arg; - - arg.found = FALSE; - arg.val = val; - ht_foreach(mrb, RHASH_TBL(hash), hash_has_value_i, &arg); - return mrb_bool_value(arg.found); -} - -static int -merge_i(mrb_state *mrb, mrb_value key, mrb_value val, void *data) -{ - htable *h1 = (htable*)data; - - ht_put(mrb, h1, key, val); - return 0; + struct RHash *h = mrb_hash_ptr(hash); + h_each(h, entry, { + h_check_modified(mrb, h, { + if (mrb_equal(mrb, val, entry->val)) return mrb_true_value(); + }); + }); + return mrb_false_value(); } MRB_API void mrb_hash_merge(mrb_state *mrb, mrb_value hash1, mrb_value hash2) { - htable *h1, *h2; + struct RHash *h1, *h2; hash_modify(mrb, hash1); - hash2 = mrb_ensure_hash_type(mrb, hash2); - h1 = RHASH_TBL(hash1); - h2 = RHASH_TBL(hash2); - - if (!h2) return; - if (!h1) { - RHASH_TBL(hash1) = ht_copy(mrb, h2); - return; - } - ht_foreach(mrb, h2, merge_i, h1); - mrb_write_barrier(mrb, (struct RBasic*)RHASH(hash1)); - return; + mrb_ensure_hash_type(mrb, hash2); + h1 = mrb_hash_ptr(hash1); + h2 = mrb_hash_ptr(hash2); + + if (h1 == h2) return; + if (h_size(h2) == 0) return; + h_each(h2, entry, { + h_check_modified(mrb, h2, {h_set(mrb, h1, entry->key, entry->val);}); + mrb_field_write_barrier_value(mrb, (struct RBasic *)h1, entry->key); + mrb_field_write_barrier_value(mrb, (struct RBasic *)h1, entry->val); + }); } /* @@ -1381,14 +1703,10 @@ mrb_hash_merge(mrb_state *mrb, mrb_value hash1, mrb_value hash2) * k = keys[0] * h = {} * keys.each{|key| h[key] = key[0]} - * h #=> { [1]=> 1, [2]=> 2, [3]=> 3, [4]=> 4, [5]=> 5, [6]=> 6, [7]=> 7, - * [8]=> 8, [9]=> 9,[10]=>10,[11]=>11,[12]=>12,[13]=>13,[14]=>14, - * [15]=>15,[16]=>16,[17]=>17} + * h #=> { [1]=>1, [2]=>2, ... [16]=>16, [17]=>17} * h[k] #=> 1 * k[0] = keys.size + 1 - * h #=> {[18]=> 1, [2]=> 2, [3]=> 3, [4]=> 4, [5]=> 5, [6]=> 6, [7]=> 7, - * [8]=> 8, [9]=> 9,[10]=>10,[11]=>11,[12]=>12,[13]=>13,[14]=>14, - * [15]=>15,[16]=>16,[17]=>17} + * h #=> {[18]=>1, [2]=>2, ... [16]=>16, [17]=>17} * h[k] #=> nil * h.rehash * h[k] #=> 1 @@ -1396,7 +1714,7 @@ mrb_hash_merge(mrb_state *mrb, mrb_value hash1, mrb_value hash2) static mrb_value mrb_hash_rehash(mrb_state *mrb, mrb_value self) { - ht_compact(mrb, RHASH_TBL(self)); + h_rehash(mrb, mrb_hash_ptr(self)); return self; } @@ -1408,11 +1726,10 @@ mrb_init_hash(mrb_state *mrb) mrb->hash_class = h = mrb_define_class(mrb, "Hash", mrb->object_class); /* 15.2.13 */ MRB_SET_INSTANCE_TT(h, MRB_TT_HASH); - mrb_define_method(mrb, h, "initialize_copy", mrb_hash_init_copy, MRB_ARGS_REQ(1)); mrb_define_method(mrb, h, "[]", mrb_hash_aget, MRB_ARGS_REQ(1)); /* 15.2.13.4.2 */ mrb_define_method(mrb, h, "[]=", mrb_hash_aset, MRB_ARGS_REQ(2)); /* 15.2.13.4.3 */ mrb_define_method(mrb, h, "clear", mrb_hash_clear, MRB_ARGS_NONE()); /* 15.2.13.4.4 */ - mrb_define_method(mrb, h, "default", mrb_hash_default, MRB_ARGS_OPT(1)); /* 15.2.13.4.5 */ + mrb_define_method(mrb, h, "default", mrb_hash_default, MRB_ARGS_OPT(1)); /* 15.2.13.4.5 */ mrb_define_method(mrb, h, "default=", mrb_hash_set_default, MRB_ARGS_REQ(1)); /* 15.2.13.4.6 */ mrb_define_method(mrb, h, "default_proc", mrb_hash_default_proc,MRB_ARGS_NONE()); /* 15.2.13.4.7 */ mrb_define_method(mrb, h, "default_proc=", mrb_hash_set_default_proc,MRB_ARGS_REQ(1)); /* 15.2.13.4.7 */ @@ -1422,10 +1739,12 @@ mrb_init_hash(mrb_state *mrb) mrb_define_method(mrb, h, "has_value?", mrb_hash_has_value, MRB_ARGS_REQ(1)); /* 15.2.13.4.14 */ mrb_define_method(mrb, h, "include?", mrb_hash_has_key, MRB_ARGS_REQ(1)); /* 15.2.13.4.15 */ mrb_define_method(mrb, h, "initialize", mrb_hash_init, MRB_ARGS_OPT(1)|MRB_ARGS_BLOCK()); /* 15.2.13.4.16 */ + mrb_define_method(mrb, h, "initialize_copy", mrb_hash_init_copy, MRB_ARGS_REQ(1)); /* 15.2.13.4.17 */ mrb_define_method(mrb, h, "key?", mrb_hash_has_key, MRB_ARGS_REQ(1)); /* 15.2.13.4.18 */ mrb_define_method(mrb, h, "keys", mrb_hash_keys, MRB_ARGS_NONE()); /* 15.2.13.4.19 */ mrb_define_method(mrb, h, "length", mrb_hash_size_m, MRB_ARGS_NONE()); /* 15.2.13.4.20 */ mrb_define_method(mrb, h, "member?", mrb_hash_has_key, MRB_ARGS_REQ(1)); /* 15.2.13.4.21 */ + mrb_define_method(mrb, h, "replace", mrb_hash_init_copy, MRB_ARGS_REQ(1)); /* 15.2.13.4.23 */ mrb_define_method(mrb, h, "shift", mrb_hash_shift, MRB_ARGS_NONE()); /* 15.2.13.4.24 */ mrb_define_method(mrb, h, "size", mrb_hash_size_m, MRB_ARGS_NONE()); /* 15.2.13.4.25 */ mrb_define_method(mrb, h, "store", mrb_hash_aset, MRB_ARGS_REQ(2)); /* 15.2.13.4.26 */ -- cgit v1.2.3