diff options
| author | Yukihiro "Matz" Matsumoto <[email protected]> | 2020-11-10 21:56:42 +0900 |
|---|---|---|
| committer | GitHub <[email protected]> | 2020-11-10 21:56:42 +0900 |
| commit | 6a5e97b448e82fe55348ab1a7e96b70b2c45f6c1 (patch) | |
| tree | 89abbf8aaa8a4997648686b3d455db7e2373f206 | |
| parent | cc20c5ca3a5da0561be8644b7513d947badb1c56 (diff) | |
| parent | f2d8db39be487fcd710c5c5844ae47d3a6920e70 (diff) | |
| download | mruby-6a5e97b448e82fe55348ab1a7e96b70b2c45f6c1.tar.gz mruby-6a5e97b448e82fe55348ab1a7e96b70b2c45f6c1.zip | |
Merge pull request #5121 from shuujii/reduce-memory-usage-of-Hash-object
Reduce memory usage of Hash object
| -rw-r--r-- | include/mruby/hash.h | 47 | ||||
| -rw-r--r-- | mrblib/hash.rb | 20 | ||||
| -rw-r--r-- | src/hash.c | 1695 | ||||
| -rw-r--r-- | test/t/hash.rb | 1184 |
4 files changed, 1944 insertions, 1002 deletions
diff --git a/include/mruby/hash.h b/include/mruby/hash.h index 7dab4a85c..c2af3b976 100644 --- a/include/mruby/hash.h +++ b/include/mruby/hash.h @@ -14,10 +14,22 @@ */ MRB_BEGIN_DECL +/* offset of `iv` must be 3 words */ struct RHash { MRB_OBJECT_HEADER; +#ifdef MRB_64BIT + uint32_t size; struct iv_tbl *iv; - struct htable *ht; + uint32_t ea_capa; + uint32_t ea_n_used; +#else + struct iv_tbl *iv; + uint32_t size; +#endif + union { + struct hash_entry *ea; + struct hash_table *ht; + }; }; #define mrb_hash_ptr(v) ((struct RHash*)(mrb_ptr(v))) @@ -179,8 +191,9 @@ MRB_API mrb_value mrb_hash_clear(mrb_state *mrb, mrb_value hash); MRB_API mrb_int mrb_hash_size(mrb_state *mrb, mrb_value hash); /* - * Copies the hash. - * + * Copies the hash. This function does NOT copy the instance variables + * (except for the default value). Use mrb_obj_dup() to copy the instance + * variables as well. * * @param mrb The mruby state reference. * @param hash The target hash. @@ -198,16 +211,24 @@ MRB_API mrb_value mrb_hash_dup(mrb_state *mrb, mrb_value hash); */ MRB_API void mrb_hash_merge(mrb_state *mrb, mrb_value hash1, mrb_value hash2); -/* RHASH_TBL allocates st_table if not available. */ -#define RHASH(obj) ((struct RHash*)(mrb_ptr(obj))) -#define RHASH_TBL(h) (RHASH(h)->ht) -#define RHASH_IFNONE(h) mrb_iv_get(mrb, (h), MRB_SYM(ifnone)) -#define RHASH_PROCDEFAULT(h) RHASH_IFNONE(h) - -#define MRB_HASH_DEFAULT 1 -#define MRB_HASH_PROC_DEFAULT 2 -#define MRB_RHASH_DEFAULT_P(h) (RHASH(h)->flags & MRB_HASH_DEFAULT) -#define MRB_RHASH_PROCDEFAULT_P(h) (RHASH(h)->flags & MRB_HASH_PROC_DEFAULT) +#define RHASH(hash) ((struct RHash*)(mrb_ptr(hash))) +#define RHASH_IFNONE(hash) mrb_iv_get(mrb, (hash), MRB_SYM(ifnone)) +#define RHASH_PROCDEFAULT(hash) RHASH_IFNONE(hash) + +#define MRB_HASH_IB_BIT_BIT 5 +#define MRB_HASH_AR_EA_CAPA_BIT 5 +#define MRB_HASH_IB_BIT_SHIFT 0 +#define MRB_HASH_AR_EA_CAPA_SHIFT 0 +#define MRB_HASH_AR_EA_N_USED_SHIFT MRB_HASH_AR_EA_CAPA_BIT +#define MRB_HASH_SIZE_FLAGS_SHIFT (MRB_HASH_AR_EA_CAPA_BIT * 2) +#define MRB_HASH_IB_BIT_MASK ((1 << MRB_HASH_IB_BIT_BIT) - 1) +#define MRB_HASH_AR_EA_CAPA_MASK ((1 << MRB_HASH_AR_EA_CAPA_BIT) - 1) +#define MRB_HASH_AR_EA_N_USED_MASK (MRB_HASH_AR_EA_CAPA_MASK << MRB_HASH_AR_EA_N_USED_SHIFT) +#define MRB_HASH_DEFAULT (1 << (MRB_HASH_SIZE_FLAGS_SHIFT + 0)) +#define MRB_HASH_PROC_DEFAULT (1 << (MRB_HASH_SIZE_FLAGS_SHIFT + 1)) +#define MRB_HASH_HT (1 << (MRB_HASH_SIZE_FLAGS_SHIFT + 2)) +#define MRB_RHASH_DEFAULT_P(hash) (RHASH(hash)->flags & MRB_HASH_DEFAULT) +#define MRB_RHASH_PROCDEFAULT_P(hash) (RHASH(hash)->flags & MRB_HASH_PROC_DEFAULT) /* GC functions */ void mrb_gc_mark_hash(mrb_state*, struct RHash*); diff --git a/mrblib/hash.rb b/mrblib/hash.rb index 7d64addd7..61ede8137 100644 --- a/mrblib/hash.rb +++ b/mrblib/hash.rb @@ -145,26 +145,6 @@ class Hash end ## - # Replaces the contents of <i>hsh</i> with the contents of other hash - # - # ISO 15.2.13.4.23 - def replace(hash) - raise TypeError, "Hash required (#{hash.class} given)" unless Hash === hash - self.clear - hash.each_key{|k| - self[k] = hash[k] - } - if hash.default_proc - self.default_proc = hash.default_proc - else - self.default = hash.default - end - self - end - # ISO 15.2.13.4.17 - alias initialize_copy replace - - ## # Return a hash which contains the content of # +self+ and +other+. If a block is given # it will be called for each element with diff --git a/src/hash.c b/src/hash.c index d67fa6254..f797f8823 100644 --- a/src/hash.c +++ b/src/hash.c @@ -4,6 +4,7 @@ ** See Copyright Notice in mruby.h */ +#include <string.h> #include <mruby.h> #include <mruby/array.h> #include <mruby/class.h> @@ -11,56 +12,285 @@ #include <mruby/string.h> #include <mruby/variable.h> -#ifndef MRB_NO_FLOAT -/* a function to get hash value of a float number */ -mrb_int mrb_float_id(mrb_float f); +/* + * === Glossary + * + * [EA] + * Entry Array. Store `Hash' entries in insertion order. + * + * [AR] + * Array Table Implementation. The structure of `Hash` that doesn't have a + * hash table and linearly searches EA. It is used when `Hash` size <= 16. + * + * [IB] + * Index Buckets. The buckets of hash table, where the bucket value is EA + * index. The index is represented by variable length bits according to + * the capacity. + * + * [HT] + * Hash Table Implementation. The structure of `Hash` that has IB and is + * searched by hash table algorithm. It is used when `Hash` > 16. Collision + * resolution strategy is open addressing method. + * + * [size] + * The number of `Hash` entries (value of `Hash#size`). + * + * [slot] + * The generic term for EA or IB elements. + * + * [active] + * The state in which a slot is recognized as a `Hash` entry. + * + * [deleted] + * The state in which a slot is marked as deleted. + * + * [used] + * The state in which a slot is active or deleted. + * + * [empty] + * The state in which a slot is not used. Capacity is equal to the sum of + * the number of used slots and the number of empty slots. + */ + +#define EA_INCREASE_RATIO 6 / 5 + 6 +#define EA_MAX_INCREASE UINT16_MAX +#define EA_MAX_CAPA /* `- 2` means reserved indices (empty and deleted) */ \ + U32(lesser(IB_MAX_CAPA - 2, MRB_INT_MAX)) +#define IB_MAX_CAPA (U32(1) << IB_MAX_BIT) +#define IB_TYPE_BIT 32 +#define IB_INIT_BIT ( \ + ib_upper_bound_for(32) <= AR_MAX_SIZE ? 6 : \ + ib_upper_bound_for(16) <= AR_MAX_SIZE ? 5 : \ + 4 \ +) +#define IB_MAX_BIT (IB_TYPE_BIT - 1) +#define AR_DEFAULT_CAPA 4 +#define AR_MAX_SIZE 16 +#define H_MAX_SIZE EA_MAX_CAPA + +mrb_static_assert1(offsetof(struct RHash, iv) == offsetof(struct RObject, iv)); +mrb_static_assert1(AR_MAX_SIZE < (1 << MRB_HASH_AR_EA_CAPA_BIT)); + +typedef struct hash_entry { + mrb_value key; + mrb_value val; +} hash_entry; + +typedef struct hash_table { + hash_entry *ea; +#ifdef MRB_32BIT + uint32_t ea_capa; + uint32_t ea_n_used; #endif + uint32_t ib[]; +} hash_table; + +typedef struct index_buckets_iter { + struct RHash *h; + uint32_t bit; + uint32_t mask; + uint32_t pos; + uint32_t ary_index; + uint32_t ea_index; + uint32_t shift1; + uint32_t shift2; + uint32_t step; +} index_buckets_iter; + +/* + * `c_` :: receiver class (category) + * `n_` :: attribute name + * `t_` :: attribute type + * `p_` :: struct member path + * `k_` :: macro key + */ +#define DEFINE_GETTER(c_, n_, t_, p_) \ + MRB_INLINE t_ c_##_##n_(const struct RHash *h) {return h->p_;} +#define DEFINE_SETTER(c_, n_, t_, p_) \ + MRB_INLINE void c_##_set_##n_(struct RHash *h, t_ v) {h->p_ = v;} +#define DEFINE_ACCESSOR(c_, n_, t_, p_) \ + DEFINE_GETTER(c_, n_, t_, p_) \ + DEFINE_SETTER(c_, n_, t_, p_) +#define DEFINE_FLAG_GETTER(c_, n_, t_, k_) \ + MRB_INLINE t_ c_##_##n_(const struct RHash *h) { \ + return (t_)((h->flags & MRB_HASH_##k_##_MASK) >> MRB_HASH_##k_##_SHIFT); \ + } +#define DEFINE_FLAG_SETTER(c_, n_, t_, k_) \ + MRB_INLINE void c_##_set_##n_(struct RHash *h, t_ v) { \ + h->flags &= ~MRB_HASH_##k_##_MASK; \ + h->flags |= v << MRB_HASH_##k_##_SHIFT; \ + } +#define DEFINE_FLAG_ACCESSOR(c_, n_, t_, k_) \ + DEFINE_FLAG_GETTER(c_, n_, t_, k_) \ + DEFINE_FLAG_SETTER(c_, n_, t_, k_) +#define DEFINE_INCREMENTER(c_, n_) \ + MRB_INLINE void c_##_inc_##n_(struct RHash *h) { \ + c_##_set_##n_(h, c_##_##n_(h) + 1); \ + } +#define DEFINE_DECREMENTER(c_, n_) \ + MRB_INLINE void c_##_dec_##n_(struct RHash *h) { \ + c_##_set_##n_(h, c_##_##n_(h) - 1); \ + } +#define DEFINE_SWITCHER(n_, k_) \ + MRB_INLINE void h_##n_##_on(struct RHash *h) { \ + h->flags |= MRB_HASH_##k_; \ + } \ + MRB_INLINE void h_##n_##_off(struct RHash *h) { \ + h->flags &= ~MRB_HASH_##k_; \ + } \ + MRB_INLINE mrb_bool h_##n_##_p(const struct RHash *h) { \ + return (h->flags & MRB_HASH_##k_) == MRB_HASH_##k_; \ + } -#ifndef MRB_HT_INIT_SIZE -#define MRB_HT_INIT_SIZE 4 +#ifdef MRB_64BIT +DEFINE_ACCESSOR(ar, ea_capa, uint32_t, ea_capa) +DEFINE_ACCESSOR(ar, ea_n_used, uint32_t, ea_n_used) +DEFINE_ACCESSOR(ht, ea_capa, uint32_t, ea_capa) +DEFINE_ACCESSOR(ht, ea_n_used, uint32_t, ea_n_used) +#else +DEFINE_FLAG_ACCESSOR(ar, ea_capa, uint32_t, AR_EA_CAPA) +DEFINE_FLAG_ACCESSOR(ar, ea_n_used, uint32_t, AR_EA_N_USED) +DEFINE_ACCESSOR(ht, ea_capa, uint32_t, ht->ea_capa) +DEFINE_ACCESSOR(ht, ea_n_used, uint32_t, ht->ea_n_used) #endif -#define HT_SEG_INCREASE_RATIO 6 / 5 +DEFINE_FLAG_ACCESSOR(ib, bit, uint32_t, IB_BIT) +DEFINE_ACCESSOR(ar, size, uint32_t, size) +DEFINE_ACCESSOR(ar, ea, hash_entry*, ea) +DEFINE_DECREMENTER(ar, size) +DEFINE_ACCESSOR(ht, size, uint32_t, size) +DEFINE_ACCESSOR(ht, ea, hash_entry*, ht->ea) +DEFINE_GETTER(ht, ib, uint32_t*, ht->ib) +DEFINE_INCREMENTER(ht, size) +DEFINE_DECREMENTER(ht, size) +DEFINE_GETTER(h, size, uint32_t, size) +DEFINE_ACCESSOR(h, ht, hash_table*, ht) +DEFINE_SWITCHER(ht, HT) + +#define ea_each_used(ea, n_used, entry_var, code) do { \ + hash_entry *entry_var = ea, *ea_end__ = entry_var + (n_used); \ + for (; entry_var < ea_end__; ++entry_var) { \ + code; \ + } \ +} while (0) -struct segkv { - mrb_value key; - mrb_value val; -}; - -typedef struct segment { - uint16_t size; - struct segment *next; - struct segkv e[]; -} segment; - -typedef struct segindex { - size_t size; - size_t capa; - struct segkv *table[]; -} segindex; - -/* hash table structure */ -typedef struct htable { - segment *rootseg; - segment *lastseg; - mrb_int size; - uint16_t last_len; - segindex *index; -} htable; - -static /* inline */ size_t -ht_hash_func(mrb_state *mrb, htable *t, mrb_value key) +#define ea_each(ea, size, entry_var, code) do { \ + hash_entry *entry_var = ea; \ + uint32_t size__ = size; \ + for (; 0 < size__; ++entry_var) { \ + if (entry_deleted_p(entry_var)) continue; \ + --size__; \ + code; \ + } \ +} while (0) + +#define ib_cycle_by_key(mrb, h, key, it_var, code) do { \ + index_buckets_iter it_var[1]; \ + ib_it_init(mrb, it_var, h, key); \ + for (;;) { \ + ib_it_next(it_var); \ + code; \ + } \ +} while (0) + +#define ib_find_by_key(mrb, h_, key_, it_var, code) do { \ + mrb_value ib_fbk_key__ = key_; \ + ib_cycle_by_key(mrb, h_, ib_fbk_key__, it_var, { \ + if (ib_it_empty_p(it_var)) break; \ + if (ib_it_deleted_p(it_var)) continue; \ + if (obj_eql(mrb, ib_fbk_key__, ib_it_entry(it_var)->key, it_var->h)) { \ + code; \ + break; \ + } \ + }); \ +} while (0) + +#define h_each(h, entry_var, code) do { \ + struct RHash *h__ = h; \ + hash_entry *h_e_ea__; \ + uint32_t h_e_size__; \ + h_ar_p(h) ? (h_e_ea__ = ar_ea(h__), h_e_size__ = ar_size(h__)) : \ + (h_e_ea__ = ht_ea(h__), h_e_size__ = ht_size(h__)); \ + ea_each(h_e_ea__, h_e_size__, entry_var, code); \ +} while (0) + +/* + * `h_check_modified` raises an exception when a dangerous modification is + * made to `h` by executing `code`. + * + * This macro is not called if `h->ht` (`h->ea`) is `NULL` (`Hash` size is + * zero). And because the `hash_entry` is rather large, `h->ht->ea` and + * `h->ht->ea_capa` are able to be safely accessed even in AR. This nature + * is used to eliminate branch of AR or HT. + * + * `HT_ASSERT_SAFE_READ` checks if members can be accessed according to its + * assumptions. + */ +#define HT_ASSERT_SAFE_READ(attr_name) \ + mrb_static_assert1( \ + offsetof(hash_table, attr_name) + sizeof(((hash_table*)0)->attr_name) <= \ + sizeof(hash_entry)) +HT_ASSERT_SAFE_READ(ea); +#ifdef MRB_32BIT +HT_ASSERT_SAFE_READ(ea_capa); +#endif +#undef HT_ASSERT_SAFE_READ +#define h_check_modified(mrb, h, code) do { \ + struct RHash *h__ = h; \ + uint32_t mask = MRB_HASH_HT|MRB_HASH_IB_BIT_MASK|MRB_HASH_AR_EA_CAPA_MASK; \ + uint32_t flags = h__->flags & mask; \ + void* tbl__ = (mrb_assert(h__->ht), h__->ht); \ + uint32_t ht_ea_capa__ = ht_ea_capa(h__); \ + hash_entry *ht_ea__ = ht_ea(h__); \ + code; \ + if (flags != (h__->flags & mask) || \ + tbl__ != h__->ht || \ + ht_ea_capa__ != ht_ea_capa(h__) || \ + ht_ea__ != ht_ea(h__)) { \ + mrb_raise(mrb, E_RUNTIME_ERROR, "hash modified"); \ + } \ +} while (0) + +#define U32(v) ((uint32_t)(v)) +#define U64(v) ((uint64_t)(v)) +#define h_ar_p(h) (!h_ht_p(h)) +#define h_ar_on(h) h_ht_off(h) +#define lesser(a, b) ((a) < (b) ? (a) : (b)) + +static uint32_t ib_upper_bound_for(uint32_t capa); +static uint32_t ib_bit_to_capa(uint32_t bit); +static void ht_init( + mrb_state *mrb, struct RHash *h, uint32_t size, + hash_entry *ea, uint32_t ea_capa, hash_table *ht, uint32_t ib_bit); +static void ht_set_without_ib_adjustment( + mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value val); + +static uint32_t +next_power2(uint32_t v) { - enum mrb_vtype tt = mrb_type(key); - mrb_value hv; - size_t h; - segindex *index = t->index; - size_t capa = index ? index->capa : 0; + mrb_assert(v != 0); +#ifdef __GNUC__ + return U32(1) << ((sizeof(unsigned) * CHAR_BIT) - __builtin_clz(v)); +#else + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + ++v; + return v; +#endif +} +static uint32_t +obj_hash_code(mrb_state *mrb, mrb_value key, struct RHash *h) +{ + enum mrb_vtype tt = mrb_type(key); + uint32_t hash_code; + mrb_value hash_code_obj; switch (tt) { case MRB_TT_STRING: - h = mrb_str_hash(mrb, key); + hash_code = mrb_str_hash(mrb, key); break; - case MRB_TT_TRUE: case MRB_TT_FALSE: case MRB_TT_SYMBOL: @@ -68,24 +298,24 @@ ht_hash_func(mrb_state *mrb, htable *t, mrb_value key) #ifndef MRB_NO_FLOAT case MRB_TT_FLOAT: #endif - h = (size_t)mrb_obj_id(key); + hash_code = U32(mrb_obj_id(key)); break; - default: - hv = mrb_funcall_id(mrb, key, MRB_SYM(hash), 0); - h = (size_t)tt ^ (size_t)mrb_integer(hv); + h_check_modified(mrb, h, { + hash_code_obj = mrb_funcall_argv(mrb, key, MRB_SYM(hash), 0, NULL); + }); + + hash_code = U32(tt) ^ U32(mrb_integer(hash_code_obj)); break; } - if (index && (index != t->index || capa != index->capa)) { - mrb_raise(mrb, E_RUNTIME_ERROR, "hash modified"); - } - return ((h)^((h)<<2)^((h)>>2)); + return hash_code ^ (hash_code << 2) ^ (hash_code >> 2); } -static inline mrb_bool -ht_hash_equal(mrb_state *mrb, htable *t, mrb_value a, mrb_value b) +static mrb_bool +obj_eql(mrb_state *mrb, mrb_value a, mrb_value b, struct RHash *h) { enum mrb_vtype tt = mrb_type(a); + mrb_bool eql; switch (tt) { case MRB_TT_STRING: @@ -106,603 +336,821 @@ ht_hash_equal(mrb_state *mrb, htable *t, mrb_value a, mrb_value b) #endif default: - { - segindex *index = t->index; - size_t capa = index ? index->capa : 0; - mrb_bool eql = mrb_eql(mrb, a, b); - if (index && (index != t->index || capa != index->capa)) { - mrb_raise(mrb, E_RUNTIME_ERROR, "hash modified"); - } - return eql; - } - } + h_check_modified(mrb, h, {eql = mrb_eql(mrb, a, b);}); + return eql; + } } -/* Creates the hash table. */ -static htable* -ht_new(mrb_state *mrb) +static mrb_bool +entry_deleted_p(const hash_entry* entry) { - htable *t; + return mrb_undef_p(entry->key); +} - t = (htable*)mrb_malloc(mrb, sizeof(htable)); - t->size = 0; - t->rootseg = NULL; - t->lastseg = NULL; - t->last_len = 0; - t->index = NULL; +static void +entry_delete(hash_entry* entry) +{ + entry->key = mrb_undef_value(); +} - return t; +static uint32_t +ea_next_capa_for(uint32_t size, uint32_t max_capa) +{ + if (size < AR_DEFAULT_CAPA) { + return AR_DEFAULT_CAPA; + } + else { + uint64_t capa = U64(size) * EA_INCREASE_RATIO, inc = capa - size; + if (EA_MAX_INCREASE < inc) capa = size + EA_MAX_INCREASE; + return capa <= max_capa ? U32(capa) : max_capa; + } } -#define power2(v) do { \ - v--;\ - v |= v >> 1;\ - v |= v >> 2;\ - v |= v >> 4;\ - v |= v >> 8;\ - v |= v >> 16;\ - v++;\ -} while (0) +static hash_entry* +ea_resize(mrb_state *mrb, hash_entry *ea, uint32_t capa) +{ + return (hash_entry*)mrb_realloc(mrb, ea, sizeof(hash_entry) * capa); +} -#ifndef UPPER_BOUND -#define UPPER_BOUND(x) ((x)>>2|(x)>>1) -#endif +static void +ea_compress(hash_entry *ea, uint32_t n_used) +{ + hash_entry *w_entry = ea; + ea_each_used(ea, n_used, r_entry, { + if (entry_deleted_p(r_entry)) continue; + if (r_entry != w_entry) *w_entry = *r_entry; + ++w_entry; + }); +} + +/* + * Increase or decrease capacity of `ea` to a standard size that can + * accommodate `*capap + 1` entries (but, not exceed `max_capa`). Set the + * changed capacity to `*capap` and return a pointer to `mrb_realloc`ed EA. + */ +static hash_entry* +ea_adjust(mrb_state *mrb, hash_entry *ea, uint32_t *capap, uint32_t max_capa) +{ + *capap = ea_next_capa_for(*capap, max_capa); + return ea_resize(mrb, ea, *capap); +} + +static hash_entry* +ea_dup(mrb_state *mrb, const hash_entry *ea, uint32_t capa) +{ + size_t byte_size = sizeof(hash_entry) * capa; + hash_entry *new_ea = (hash_entry*)mrb_malloc(mrb, byte_size); + return (hash_entry*)memcpy(new_ea, ea, byte_size); +} -#define HT_MASK(index) ((index->capa)-1) +static hash_entry* +ea_get_by_key(mrb_state *mrb, hash_entry *ea, uint32_t size, mrb_value key, + struct RHash *h) +{ + ea_each(ea, size, entry, { + if (obj_eql(mrb, key, entry->key, h)) return entry; + }); + return NULL; +} + +static hash_entry* +ea_get(hash_entry *ea, uint32_t index) +{ + return &ea[index]; +} -/* Build index for the hash table */ static void -ht_index(mrb_state *mrb, htable *t) +ea_set(hash_entry *ea, uint32_t index, mrb_value key, mrb_value val) { - size_t size = (size_t)t->size; - size_t mask; - segindex *index = t->index; - segment *seg; - size_t i; + ea[index].key = key; + ea[index].val = val; +} - if (size == 0) { - t->index = NULL; - mrb_free(mrb, index); - return; - } - /* allocate index table */ - if (index && index->size >= UPPER_BOUND(index->capa)) { - size = index->capa+1; - } - power2(size); - if (!index || index->capa < size) { - index = (segindex*)mrb_realloc_simple(mrb, index, sizeof(segindex)+sizeof(struct segkv*)*size); - if (index == NULL) { - mrb_free(mrb, t->index); - t->index = NULL; - return; - } - t->index = index; - } - index->size = t->size; - index->capa = size; - for (i=0; i<size; i++) { - index->table[i] = NULL; - } +static void +ar_init(struct RHash *h, uint32_t size, + hash_entry *ea, uint32_t ea_capa, uint32_t ea_n_used) +{ + h_ar_on(h); + ar_set_size(h, size); + ar_set_ea(h, ea); + ar_set_ea_capa(h, ea_capa); + ar_set_ea_n_used(h, ea_n_used); +} - /* rebuild index */ - mask = HT_MASK(index); - seg = t->rootseg; - while (seg) { - for (i=0; i<seg->size; i++) { - mrb_value key; - size_t k, step = 0; +static void +ar_free(mrb_state *mrb, struct RHash *h) +{ + mrb_free(mrb, ar_ea(h)); +} - if (!seg->next && i >= (size_t)t->last_len) { - return; - } - key = seg->e[i].key; - if (mrb_undef_p(key)) continue; - k = ht_hash_func(mrb, t, key) & mask; - while (index->table[k]) { - k = (k+(++step)) & mask; - } - index->table[k] = &seg->e[i]; - } - seg = seg->next; - } +static void +ar_adjust_ea(mrb_state *mrb, struct RHash *h, uint32_t size, uint32_t max_ea_capa) +{ + uint32_t ea_capa = size; + hash_entry *ea = ea_adjust(mrb, ar_ea(h), &ea_capa, max_ea_capa); + ar_set_ea(h, ea); + ar_set_ea_capa(h, ea_capa); } -/* Compacts the hash table removing deleted entries. */ static void -ht_compact(mrb_state *mrb, htable *t) +ar_compress(mrb_state *mrb, struct RHash *h) { - segment *seg; - uint16_t i, i2; - segment *seg2 = NULL; - mrb_int size = 0; + uint32_t size = ar_size(h); + ea_compress(ar_ea(h), ar_ea_n_used(h)); + ar_set_ea_n_used(h, size); + ar_adjust_ea(mrb, h, size, lesser(ar_ea_capa(h), AR_MAX_SIZE)); +} - if (t == NULL) return; - seg = t->rootseg; - if (t->index && (size_t)t->size == t->index->size) { - ht_index(mrb, t); - return; - } - while (seg) { - for (i=0; i<seg->size; i++) { - mrb_value k = seg->e[i].key; +static mrb_bool +ar_get(mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value *valp) +{ + ea_each(ar_ea(h), ar_size(h), entry, { + if (!obj_eql(mrb, key, entry->key, h)) continue; + *valp = entry->val; + return TRUE; + }); + return FALSE; +} - if (!seg->next && i >= t->last_len) { - goto exit; - } - if (mrb_undef_p(k)) { /* found deleted key */ - if (seg2 == NULL) { - seg2 = seg; - i2 = i; +static void +ar_set(mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value val) +{ + uint32_t size = ar_size(h); + hash_entry *entry; + if ((entry = ea_get_by_key(mrb, ar_ea(h), size, key, h))) { + entry->val = val; + } + else { + uint32_t ea_capa = ar_ea_capa(h), ea_n_used = ar_ea_n_used(h); + if (ea_capa == ea_n_used) { + if (size == ea_n_used) { + if (size == AR_MAX_SIZE) { + hash_entry *ea = ea_adjust(mrb, ar_ea(h), &ea_capa, EA_MAX_CAPA); + ea_set(ea, ea_n_used, key, val); + ht_init(mrb, h, ++size, ea, ea_capa, NULL, IB_INIT_BIT); + return; + } + else { + ar_adjust_ea(mrb, h, size, AR_MAX_SIZE); } } else { - size++; - if (seg2 != NULL) { - seg2->e[i2++] = seg->e[i]; - if (i2 >= seg2->size) { - seg2 = seg2->next; - i2 = 0; - } - } + ar_compress(mrb, h); + ea_n_used = size; } } - seg = seg->next; + ea_set(ar_ea(h), ea_n_used, key, val); + ar_set_size(h, ++size); + ar_set_ea_n_used(h, ++ea_n_used); } - exit: - /* reached at end */ - t->size = size; - if (seg2) { - seg = seg2->next; - seg2->next = NULL; - t->last_len = i2; - t->lastseg = seg2; - while (seg) { - seg2 = seg->next; - mrb_free(mrb, seg); - seg = seg2; +} + +static mrb_bool +ar_delete(mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value *valp) +{ + hash_entry *entry = ea_get_by_key(mrb, ar_ea(h), ar_size(h), key, h); + if (!entry) return FALSE; + *valp = entry->val; + entry_delete(entry); + ar_dec_size(h); + return TRUE; +} + +static void +ar_shift(mrb_state *mrb, struct RHash *h, mrb_value *keyp, mrb_value *valp) +{ + uint32_t size = ar_size(h); + ea_each(ar_ea(h), size, entry, { + *keyp = entry->key; + *valp = entry->val; + entry_delete(entry); + ar_set_size(h, --size); + return; + }); +} + +static void +ar_rehash(mrb_state *mrb, struct RHash *h) +{ + /* see comments in `h_rehash` */ + uint32_t size = ar_size(h), w_size = 0, ea_capa = ar_ea_capa(h); + hash_entry *ea = ar_ea(h), *w_entry; + ea_each(ea, size, r_entry, { + if ((w_entry = ea_get_by_key(mrb, ea, w_size, r_entry->key, h))) { + w_entry->val = r_entry->val; + ar_set_size(h, --size); + entry_delete(r_entry); } - } - if (t->index) { - ht_index(mrb, t); - } + else { + if (w_size != U32(r_entry - ea)) { + ea_set(ea, w_size, r_entry->key, r_entry->val); + entry_delete(r_entry); + } + ++w_size; + } + }); + mrb_assert(size == w_size); + ar_set_ea_n_used(h, size); + ar_adjust_ea(mrb, h, size, ea_capa); } -static segment* -segment_alloc(mrb_state *mrb, segment *seg) +static uint32_t +ib_it_pos_for(index_buckets_iter *it, uint32_t v) { - uint32_t size; + return v & it->mask; +} - if (!seg) size = MRB_HT_INIT_SIZE; - else { - size = seg->size*HT_SEG_INCREASE_RATIO + 1; - if (size > UINT16_MAX) size = UINT16_MAX; - } +static uint32_t +ib_it_empty_value(const index_buckets_iter *it) +{ + return it->mask; +} - seg = (segment*)mrb_malloc(mrb, sizeof(segment)+sizeof(struct segkv)*size); - seg->size = size; - seg->next = NULL; +static uint32_t +ib_it_deleted_value(const index_buckets_iter *it) +{ + return it->mask - 1; +} - return seg; +static mrb_bool +ib_it_empty_p(const index_buckets_iter *it) +{ + return it->ea_index == ib_it_empty_value(it); +} + +static mrb_bool +ib_it_deleted_p(const index_buckets_iter *it) +{ + return it->ea_index == ib_it_deleted_value(it); +} + +static mrb_bool +ib_it_active_p(const index_buckets_iter *it) +{ + return it->ea_index < ib_it_deleted_value(it); } -/* Set the value for the key in the indexed table. */ static void -ht_index_put(mrb_state *mrb, htable *t, mrb_value key, mrb_value val) +ib_it_init(mrb_state *mrb, index_buckets_iter *it, struct RHash *h, mrb_value key) { - segindex *index = t->index; - size_t k, sp, step = 0, mask; - segment *seg; + it->h = h; + it->bit = ib_bit(h); + it->mask = ib_bit_to_capa(it->bit) - 1; + it->pos = ib_it_pos_for(it, obj_hash_code(mrb, key, h)); + it->step = 0; +} - if (index->size >= UPPER_BOUND(index->capa)) { - /* need to expand table */ - ht_compact(mrb, t); - index = t->index; - } - mask = HT_MASK(index); - sp = index->capa; - k = ht_hash_func(mrb, t, key) & mask; - while (index->table[k]) { - mrb_value key2 = index->table[k]->key; - if (mrb_undef_p(key2)) { - if (sp == index->capa) sp = k; - } - else if (ht_hash_equal(mrb, t, key, key2)) { - index->table[k]->val = val; - return; - } - k = (k+(++step)) & mask; +static void +ib_it_next(index_buckets_iter *it) +{ + /* + * [IB image] + * + * ary_index(1) --. + * \ .-- shift1(3) .-- shift2(29) + * pos(6) --. \ / / + * View | \ \ <-o-> <----------o----------> + * -------- +---------------------\----\--+-----------------------------+----- + * array | 0 `--. `-|--- o 1 | ... + * +---------+---------+-----+\--+-----+---------+---------+---+----- + * buckets | 0 | 1 | ... | o 6 | 7 | 8 | ... + * +---------+---------+-----+=========+---------+---------+--------- + * bit set |1 1 1 0 0|0 0 0 1 1| ... |0 1 0 1 1|0 1 1 1 0|0 1 0 1 0| ... + * +---------+---------+-----+========*+---------+---------+--------- + * <---o---> \ + * \ `-- bit_pos(34) + * `-- bit(5) + */ + uint64_t bit_pos; + bit_pos = U64(it->bit) * (it->pos + 1) - 1; + it->ary_index = U32(bit_pos / IB_TYPE_BIT); + it->shift2 = U32((U64(it->ary_index) + 1) * IB_TYPE_BIT - bit_pos - 1); + it->ea_index = (ht_ib(it->h)[it->ary_index] >> it->shift2) & it->mask; + if (IB_TYPE_BIT - it->bit < it->shift2) { + it->shift1 = IB_TYPE_BIT - it->shift2; + it->ea_index |= (ht_ib(it->h)[it->ary_index - 1] << it->shift1) & it->mask; } - if (sp < index->capa) { - k = sp; + else { + it->shift1 = 0; } + it->pos = ib_it_pos_for(it, it->pos + (++it->step)); +} - /* put the value at the last */ - seg = t->lastseg; - if (t->last_len < seg->size) { - index->table[k] = &seg->e[t->last_len++]; - } - else { /* append a new segment */ - seg->next = segment_alloc(mrb, seg); - seg = seg->next; - seg->next = NULL; - t->lastseg = seg; - t->last_len = 1; - index->table[k] = &seg->e[0]; +static uint32_t +ib_it_get(const index_buckets_iter *it) +{ + return it->ea_index; +} + +static void +ib_it_set(index_buckets_iter *it, uint32_t ea_index) +{ + uint32_t mask, i; + it->ea_index = ea_index; + if (it->shift1) { + i = it->ary_index - 1; + mask = it->mask >> it->shift1; + ht_ib(it->h)[i] = (ht_ib(it->h)[i] & ~mask) | (ea_index >> it->shift1); } - index->table[k]->key = key; - index->table[k]->val = val; - index->size++; - t->size++; + i = it->ary_index; + mask = it->mask << it->shift2; + ht_ib(it->h)[i] = (ht_ib(it->h)[i] & ~mask) | (ea_index << it->shift2); } -/* Set the value for the key in the hash table. */ static void -ht_put(mrb_state *mrb, htable *t, mrb_value key, mrb_value val) +ib_it_delete(index_buckets_iter *it) { - segment *seg; - mrb_int i, deleted = 0; + ib_it_set(it, ib_it_deleted_value(it)); +} - if (t == NULL) return; - if (t->index) { - ht_index_put(mrb, t, key, val); - return; - } - seg = t->rootseg; - while (seg) { - for (i=0; i<seg->size; i++) { - mrb_value k = seg->e[i].key; - /* Found room in last segment after last_len */ - if (!seg->next && i >= t->last_len) { - seg->e[i].key = key; - seg->e[i].val = val; - t->last_len = (uint16_t)i+1; - t->size++; - return; - } - if (mrb_undef_p(k)) { - deleted++; - continue; - } - if (ht_hash_equal(mrb, t, key, k)) { - seg->e[i].val = val; - return; - } - } - seg = seg->next; - } - /* Not found */ +static hash_entry* +ib_it_entry(index_buckets_iter *it) +{ + return ea_get(ht_ea(it->h), it->ea_index); +} - /* Compact if last segment has room */ - if (deleted > 0 && deleted > MRB_HT_INIT_SIZE) { - ht_compact(mrb, t); - } - t->size++; +static uint32_t +ib_capa_to_bit(uint32_t capa) +{ +#ifdef __GNUC__ + return U32(__builtin_ctz(capa)); +#else + /* http://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn */ + static const uint32_t MultiplyDeBruijnBitPosition2[] = { + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 + }; + return MultiplyDeBruijnBitPosition2[U32(capa * 0x077CB531U) >> 27]; +#endif +} - /* check if thre's room after compaction */ - if (t->lastseg && t->last_len < t->lastseg->size) { - seg = t->lastseg; - i = t->last_len; - } - else { - /* append new segment */ - seg = segment_alloc(mrb, t->lastseg); - i = 0; - if (t->rootseg == NULL) { - t->rootseg = seg; - } - else { - t->lastseg->next = seg; - } - t->lastseg = seg; - } - seg->e[i].key = key; - seg->e[i].val = val; - t->last_len = (uint16_t)i+1; - if (t->index == NULL && t->size > MRB_HT_INIT_SIZE*4) { - ht_index(mrb, t); - } +static uint32_t +ib_bit_to_capa(uint32_t bit) +{ + return U32(1) << bit; } -/* Get a value for a key from the indexed table. */ -static mrb_bool -ht_index_get(mrb_state *mrb, htable *t, mrb_value key, mrb_value *vp) -{ - segindex *index = t->index; - size_t mask = HT_MASK(index); - size_t k = ht_hash_func(mrb, t, key) & mask; - size_t step = 0; - - while (index->table[k]) { - mrb_value key2 = index->table[k]->key; - if (!mrb_undef_p(key2) && ht_hash_equal(mrb, t, key, key2)) { - if (vp) *vp = index->table[k]->val; - return TRUE; - } - k = (k+(++step)) & mask; - } - return FALSE; +static uint32_t +ib_upper_bound_for(uint32_t capa) +{ + return (capa >> 2) | (capa >> 1); /* 3/4 */ } -/* Get a value for a key from the hash table. */ -static mrb_bool -ht_get(mrb_state *mrb, htable *t, mrb_value key, mrb_value *vp) +static uint32_t +ib_bit_for(uint32_t size) { - segment *seg; - mrb_int i; + uint32_t capa = next_power2(size); + if (capa != IB_MAX_CAPA && ib_upper_bound_for(capa) < size) capa *= 2; + return ib_capa_to_bit(capa); +} - if (t == NULL) return FALSE; - if (t->index) { - return ht_index_get(mrb, t, key, vp); - } +static uint32_t +ib_byte_size_for(uint32_t ib_bit) +{ + uint64_t ary_size = (U64(1) << ib_bit) * ib_bit / IB_TYPE_BIT; + return U32(sizeof(uint32_t) * ary_size); +} - seg = t->rootseg; - while (seg) { - for (i=0; i<seg->size; i++) { - mrb_value k = seg->e[i].key; +static void +ib_init(mrb_state *mrb, struct RHash *h, uint32_t ib_bit, size_t ib_byte_size) +{ + hash_entry *ea = ht_ea(h); + memset(ht_ib(h), 0xff, ib_byte_size); + ib_set_bit(h, ib_bit); + ea_each_used(ea, ht_ea_n_used(h), entry, { + ib_cycle_by_key(mrb, h, entry->key, it, { + if (!ib_it_empty_p(it)) continue; + ib_it_set(it, U32(entry - ea)); + break; + }); + }); +} - if (!seg->next && i >= t->last_len) { - return FALSE; - } - if (mrb_undef_p(k)) continue; - if (ht_hash_equal(mrb, t, key, k)) { - if (vp) *vp = seg->e[i].val; - return TRUE; - } - } - seg = seg->next; - } - return FALSE; +static void +ht_init(mrb_state *mrb, struct RHash *h, uint32_t size, + hash_entry *ea, uint32_t ea_capa, hash_table *ht, uint32_t ib_bit) +{ + size_t ib_byte_size = ib_byte_size_for(ib_bit); + size_t ht_byte_size = sizeof(hash_table) + ib_byte_size; + h_ht_on(h); + h_set_ht(h, (hash_table*)mrb_realloc(mrb, ht, ht_byte_size)); + ht_set_size(h, size); + ht_set_ea(h, ea); + ht_set_ea_capa(h, ea_capa); + ht_set_ea_n_used(h, size); + ib_init(mrb, h, ib_bit, ib_byte_size); +} + +static void +ht_free(mrb_state *mrb, struct RHash *h) +{ + mrb_free(mrb, ht_ea(h)); + mrb_free(mrb, h_ht(h)); +} + +static hash_table* +ht_dup(mrb_state *mrb, const struct RHash *h) +{ + size_t ib_byte_size = ib_byte_size_for(ib_bit(h)); + size_t ht_byte_size = sizeof(hash_table) + ib_byte_size; + hash_table *new_ht = (hash_table*)mrb_malloc(mrb, ht_byte_size); + return (hash_table*)memcpy(new_ht, h_ht(h), ht_byte_size); +} + +static void +ht_adjust_ea(mrb_state *mrb, struct RHash *h, uint32_t size, uint32_t max_ea_capa) +{ + uint32_t ea_capa = size; + hash_entry *ea = ea_adjust(mrb, ht_ea(h), &ea_capa, max_ea_capa); + ht_set_ea(h, ea); + ht_set_ea_capa(h, ea_capa); +} + +static void +ht_to_ar(mrb_state *mrb, struct RHash *h) +{ + uint32_t size = ht_size(h), ea_capa = size; + hash_entry *ea = ht_ea(h); + ea_compress(ea, ht_ea_n_used(h)); + ea = ea_adjust(mrb, ea, &ea_capa, AR_MAX_SIZE); + mrb_free(mrb, h_ht(h)); + ar_init(h, size, ea, ea_capa, size); } -/* Deletes the value for the symbol from the hash table. */ -/* Deletion is done by overwriting keys by `undef`. */ static mrb_bool -ht_del(mrb_state *mrb, htable *t, mrb_value key, mrb_value *vp) +ht_get(mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value *valp) { - segment *seg; - mrb_int i; + ib_find_by_key(mrb, h, key, it, { + *valp = ib_it_entry(it)->val; + return TRUE; + }); + return FALSE; +} - if (t == NULL) return FALSE; - seg = t->rootseg; - while (seg) { - for (i=0; i<seg->size; i++) { - mrb_value key2; +static void +ht_set_as_ar(mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value val) +{ + ht_to_ar(mrb, h); + ar_set(mrb, h, key, val); +} - if (!seg->next && i >= t->last_len) { - /* not found */ - return FALSE; - } - key2 = seg->e[i].key; - if (!mrb_undef_p(key2) && ht_hash_equal(mrb, t, key, key2)) { - if (vp) *vp = seg->e[i].val; - seg->e[i].key = mrb_undef_value(); - t->size--; - return TRUE; +static void +ht_set_without_ib_adjustment(mrb_state *mrb, struct RHash *h, + mrb_value key, mrb_value val) +{ + mrb_assert(ht_size(h) < ib_bit_to_capa(ib_bit(h))); + ib_cycle_by_key(mrb, h, key, it, { + if (ib_it_active_p(it)) { + if (!obj_eql(mrb, key, ib_it_entry(it)->key, h)) continue; + ib_it_entry(it)->val = val; + } + else { + uint32_t ea_n_used = ht_ea_n_used(h); + if (ea_n_used == H_MAX_SIZE) { + mrb_assert(ht_size(h) == ea_n_used); + mrb_raise(mrb, E_ARGUMENT_ERROR, "hash too big"); } + if (ea_n_used == ht_ea_capa(h)) ht_adjust_ea(mrb, h, ea_n_used, EA_MAX_CAPA); + ib_it_set(it, ea_n_used); + ea_set(ht_ea(h), ea_n_used, key, val); + ht_inc_size(h); + ht_set_ea_n_used(h, ++ea_n_used); } - seg = seg->next; - } - return FALSE; + return; + }); } -/* Iterates over the hash table. */ static void -ht_foreach(mrb_state *mrb, htable *t, mrb_hash_foreach_func *func, void *p) -{ - segment *seg; - mrb_int i; - - if (t == NULL) return; - seg = t->rootseg; - while (seg) { - for (i=0; i<seg->size; i++) { - /* no value in last segment after last_len */ - if (!seg->next && i >= t->last_len) { - return; - } - if (mrb_undef_p(seg->e[i].key)) continue; - if ((*func)(mrb, seg->e[i].key, seg->e[i].val, p) != 0) - return; +ht_set(mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value val) +{ + uint32_t size = ht_size(h); + uint32_t ib_bit_width = ib_bit(h), ib_capa = ib_bit_to_capa(ib_bit_width); + if (ib_upper_bound_for(ib_capa) <= size) { + if (size != ht_ea_n_used(h)) ea_compress(ht_ea(h), ht_ea_n_used(h)); + ht_init(mrb, h, size, ht_ea(h), ht_ea_capa(h), h_ht(h), ++ib_bit_width); + } + else if (ht_ea_capa(h) == ht_ea_n_used(h) && size != ht_ea_n_used(h)) { + if (size <= AR_MAX_SIZE) {ht_set_as_ar(mrb, h, key, val); return;} + if (ea_next_capa_for(size, EA_MAX_CAPA) <= ht_ea_capa(h)) { + ea_compress(ht_ea(h), ht_ea_n_used(h)); + ht_adjust_ea(mrb, h, size, ht_ea_capa(h)); + ht_init(mrb, h, size, ht_ea(h), ht_ea_capa(h), h_ht(h), ib_bit_width); } - seg = seg->next; } + ht_set_without_ib_adjustment(mrb, h, key, val); } -size_t -mrb_os_memsize_of_hash_table(mrb_value obj) +static mrb_bool +ht_delete(mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value *valp) { - struct htable *h = mrb_hash_ptr(obj)->ht; - size_t segkv_size = 0; + ib_find_by_key(mrb, h, key, it, { + hash_entry *entry = ib_it_entry(it); + *valp = entry->val; + ib_it_delete(it); + entry_delete(entry); + ht_dec_size(h); + return TRUE; + }); + return FALSE; +} - if (h->index) segkv_size = (sizeof(struct segkv) * h->index->capa); +static void +ht_shift(mrb_state *mrb, struct RHash *h, mrb_value *keyp, mrb_value *valp) +{ + hash_entry *ea = ht_ea(h); + ea_each(ea, ht_size(h), entry, { + ib_cycle_by_key(mrb, h, entry->key, it, { + if (ib_it_get(it) != U32(entry - ea)) continue; + *keyp = entry->key; + *valp = entry->val; + ib_it_delete(it); + entry_delete(entry); + ht_dec_size(h); + return; + }); + }); +} - return sizeof(htable) + - sizeof(segindex) + - (sizeof(segment) * h->size) + - segkv_size; +static void +ht_rehash(mrb_state *mrb, struct RHash *h) +{ + /* see comments in `h_rehash` */ + uint32_t size = ht_size(h), w_size = 0, ea_capa = ht_ea_capa(h); + hash_entry *ea = ht_ea(h); + ht_init(mrb, h, 0, ea, ea_capa, h_ht(h), ib_bit_for(size)); + ht_set_size(h, size); + ht_set_ea_n_used(h, ht_ea_n_used(h)); + ea_each(ea, size, r_entry, { + ib_cycle_by_key(mrb, h, r_entry->key, it, { + if (ib_it_active_p(it)) { + if (!obj_eql(mrb, r_entry->key, ib_it_entry(it)->key, h)) continue; + ib_it_entry(it)->val = r_entry->val; + ht_set_size(h, --size); + entry_delete(r_entry); + } + else { + if (w_size != U32(r_entry - ea)) { + ea_set(ea, w_size, r_entry->key, r_entry->val); + entry_delete(r_entry); + } + ib_it_set(it, w_size++); + } + break; + }); + }); + mrb_assert(size == w_size); + ht_set_ea_n_used(h, size); + size <= AR_MAX_SIZE ? ht_to_ar(mrb, h) : ht_adjust_ea(mrb, h, size, ea_capa); } -/* Iterates over the hash table. */ -MRB_API void -mrb_hash_foreach(mrb_state *mrb, struct RHash *hash, mrb_hash_foreach_func *func, void *p) +static mrb_value +h_key_for(mrb_state *mrb, mrb_value key) { - ht_foreach(mrb, hash->ht, func, p); + if (mrb_string_p(key) && !MRB_FROZEN_P(mrb_str_ptr(key))) { + key = mrb_str_dup(mrb, key); + MRB_SET_FROZEN_FLAG(mrb_str_ptr(key)); + } + return key; } -/* Copy the hash table. */ -static htable* -ht_copy(mrb_state *mrb, htable *t) +static struct RHash* +h_alloc(mrb_state *mrb) { - segment *seg; - htable *t2; - mrb_int i; + return (struct RHash*)mrb_obj_alloc(mrb, MRB_TT_HASH, mrb->hash_class); +} - seg = t->rootseg; - t2 = ht_new(mrb); - if (t->size == 0) return t2; +static void +h_init(struct RHash *h) +{ + ar_init(h, 0, NULL, 0, 0); +} - while (seg) { - for (i=0; i<seg->size; i++) { - mrb_value key = seg->e[i].key; - mrb_value val = seg->e[i].val; +static void +h_free_table(mrb_state *mrb, struct RHash *h) +{ + (h_ar_p(h) ? ar_free : ht_free)(mrb, h); +} - if ((seg->next == NULL) && (i >= t->last_len)) { - return t2; - } - if (mrb_undef_p(key)) continue; /* skip deleted key */ - ht_put(mrb, t2, key, val); - } - seg = seg->next; - } - return t2; +static void +h_clear(mrb_state *mrb, struct RHash *h) +{ + h_free_table(mrb, h); + h_init(h); +} + +static mrb_bool +h_get(mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value *valp) +{ + return (h_ar_p(h) ? ar_get : ht_get)(mrb, h, key, valp); } -/* Free memory of the hash table. */ static void -ht_free(mrb_state *mrb, htable *t) +h_set(mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value val) { - segment *seg; + (h_ar_p(h) ? ar_set : ht_set)(mrb, h, key, val); +} - if (!t) return; - seg = t->rootseg; - while (seg) { - segment *p = seg; - seg = seg->next; - mrb_free(mrb, p); - } - if (t->index) mrb_free(mrb, t->index); - mrb_free(mrb, t); +static mrb_bool +h_delete(mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value *valp) +{ + return (h_ar_p(h) ? ar_delete : ht_delete)(mrb, h, key, valp); } -static void hash_modify(mrb_state *mrb, mrb_value hash); +/* find first element in the table, and remove it. */ +static void +h_shift(mrb_state *mrb, struct RHash *h, mrb_value *keyp, mrb_value *valp) +{ + (h_ar_p(h) ? ar_shift : ht_shift)(mrb, h, keyp, valp); +} -static inline mrb_value -ht_key(mrb_state *mrb, mrb_value key) +static void +h_rehash(mrb_state *mrb, struct RHash *h) { - if (mrb_string_p(key) && !mrb_frozen_p(mrb_str_ptr(key))) { - key = mrb_str_dup(mrb, key); - MRB_SET_FROZEN_FLAG(mrb_str_ptr(key)); + /* + * ==== Comments common to `ar_rehash` and `ht_rehash` + * + * - Because reindex (such as elimination of duplicate keys) must be + * guaranteed, it is necessary to set one by one. + * + * - To prevent EA from breaking if an exception occurs in the middle, + * delete the slot before moving when moving the entry, and update size + * at any time when overwriting. + */ + (h_size(h) == 0 ? h_clear : h_ar_p(h) ? ar_rehash : ht_rehash)(mrb, h); +} + +static void +h_replace(mrb_state *mrb, struct RHash *h, struct RHash *orig_h) +{ + uint32_t size = h_size(orig_h); + if (size == 0) { + h_clear(mrb, h); + } + else if (h_ar_p(orig_h)) { + uint32_t ea_capa = ar_ea_capa(orig_h); + hash_entry *ea = ea_dup(mrb, ar_ea(orig_h), ea_capa); + h_free_table(mrb, h); + ar_init(h, size, ea, ea_capa, ar_ea_n_used(orig_h)); + } + else { /* HT */ + uint32_t ea_capa = ht_ea_capa(orig_h); + hash_entry *ea = ea_dup(mrb, ht_ea(orig_h), ea_capa); + hash_table *ht = ht_dup(mrb, orig_h); + h_free_table(mrb, h); + h_ht_on(h); + h_set_ht(h, ht); + ht_set_size(h, size); + ht_set_ea(h, ea); +#ifdef MRB_64BIT + ht_set_ea_capa(h, ea_capa); + ht_set_ea_n_used(h, ht_ea_n_used(orig_h)); +#endif + ib_set_bit(h, ib_bit(orig_h)); } - return key; } -#define KEY(key) ht_key(mrb, key) +void +mrb_gc_mark_hash(mrb_state *mrb, struct RHash *h) +{ + h_each(h, entry, { + mrb_gc_mark_value(mrb, entry->key); + mrb_gc_mark_value(mrb, entry->val); + }); +} -static int -hash_mark_i(mrb_state *mrb, mrb_value key, mrb_value val, void *p) +size_t +mrb_gc_mark_hash_size(mrb_state *mrb, struct RHash *h) { - mrb_gc_mark_value(mrb, key); - mrb_gc_mark_value(mrb, val); - return 0; + return h_size(h) * 2; } void -mrb_gc_mark_hash(mrb_state *mrb, struct RHash *hash) +mrb_gc_free_hash(mrb_state *mrb, struct RHash *h) { - ht_foreach(mrb, hash->ht, hash_mark_i, NULL); + h_free_table(mrb, h); } size_t -mrb_gc_mark_hash_size(mrb_state *mrb, struct RHash *hash) +mrb_os_memsize_of_hash_table(mrb_value self) { - if (!hash->ht) return 0; - return hash->ht->size*2; + struct RHash *h = mrb_hash_ptr(self); + return h_ar_p(h) ? (ar_ea_capa(h) * sizeof(hash_entry)) : + (ht_ea_capa(h) * sizeof(hash_entry) + + sizeof(hash_table) + + ib_byte_size_for(ib_bit(h))); } -void -mrb_gc_free_hash(mrb_state *mrb, struct RHash *hash) +/* Iterates over the key/value pairs. */ +MRB_API void +mrb_hash_foreach(mrb_state *mrb, struct RHash *h, mrb_hash_foreach_func *func, void *data) { - ht_free(mrb, hash->ht); + h_each(h, entry, { + if (func(mrb, entry->key, entry->val, data) != 0) return; + }); } MRB_API mrb_value mrb_hash_new(mrb_state *mrb) { - struct RHash *h; - - h = (struct RHash*)mrb_obj_alloc(mrb, MRB_TT_HASH, mrb->hash_class); - h->ht = 0; - h->iv = 0; + struct RHash *h = h_alloc(mrb); return mrb_obj_value(h); } +/* + * Set the capacity of EA and IB to minimum capacity (and appropriate load + * factor) that does not cause expansion when inserting `capa` elements. + */ MRB_API mrb_value mrb_hash_new_capa(mrb_state *mrb, mrb_int capa) { - struct RHash *h; - - h = (struct RHash*)mrb_obj_alloc(mrb, MRB_TT_HASH, mrb->hash_class); - /* preallocate hash table */ - h->ht = ht_new(mrb); - /* capacity ignored */ - h->iv = 0; - return mrb_obj_value(h); + if (capa < 0 || EA_MAX_CAPA < capa) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "hash too big"); + return mrb_nil_value(); /* not reached */ + } + else if (capa == 0) { + return mrb_hash_new(mrb); + } + else { + uint32_t size = U32(capa); + struct RHash *h = h_alloc(mrb); + hash_entry *ea = ea_resize(mrb, NULL, size); + if (size <= AR_MAX_SIZE) { + ar_init(h, 0, ea, size, 0); + } + else { + ht_init(mrb, h, 0, ea, size, NULL, ib_bit_for(size)); + } + return mrb_obj_value(h); + } } static mrb_value mrb_hash_default(mrb_state *mrb, mrb_value hash); -static mrb_value hash_default(mrb_state *mrb, mrb_value hash, mrb_value key); -static mrb_value -mrb_hash_init_copy(mrb_state *mrb, mrb_value self) +static void +hash_modify(mrb_state *mrb, mrb_value hash) { - mrb_value orig = mrb_get_arg1(mrb); - struct RHash* copy; - htable *orig_h; - mrb_value ifnone, vret; + mrb_check_frozen(mrb, mrb_hash_ptr(hash)); +} - if (mrb_obj_equal(mrb, self, orig)) return self; - if ((mrb_type(self) != mrb_type(orig)) || (mrb_obj_class(mrb, self) != mrb_obj_class(mrb, orig))) { - mrb_raise(mrb, E_TYPE_ERROR, "initialize_copy should take same class object"); +static mrb_value +hash_default(mrb_state *mrb, mrb_value hash, mrb_value key) +{ + if (MRB_RHASH_DEFAULT_P(hash)) { + if (MRB_RHASH_PROCDEFAULT_P(hash)) { + return mrb_funcall_id(mrb, RHASH_PROCDEFAULT(hash), MRB_SYM(call), 2, hash, key); + } + else { + return RHASH_IFNONE(hash); + } } + return mrb_nil_value(); +} - orig_h = RHASH_TBL(self); - copy = (struct RHash*)mrb_obj_alloc(mrb, MRB_TT_HASH, mrb->hash_class); - copy->ht = ht_copy(mrb, orig_h); - - if (MRB_RHASH_DEFAULT_P(self)) { - copy->flags |= MRB_HASH_DEFAULT; - } - if (MRB_RHASH_PROCDEFAULT_P(self)) { - copy->flags |= MRB_HASH_PROC_DEFAULT; +static void +hash_replace(mrb_state *mrb, mrb_value self, mrb_value orig) +{ + struct RHash *h = mrb_hash_ptr(self), *orig_h = mrb_hash_ptr(orig); + uint32_t mask = MRB_HASH_DEFAULT | MRB_HASH_PROC_DEFAULT; + mrb_sym name; + h_replace(mrb, h, orig_h); + name = MRB_SYM(ifnone); + if (orig_h->flags & MRB_HASH_DEFAULT) { + mrb_iv_set(mrb, self, name, mrb_iv_get(mrb, orig, name)); } - vret = mrb_obj_value(copy); - ifnone = RHASH_IFNONE(self); - if (!mrb_nil_p(ifnone)) { - mrb_iv_set(mrb, vret, MRB_SYM(ifnone), ifnone); + else { + mrb_iv_remove(mrb, self, name); } - return vret; + h->flags &= ~mask; + h->flags |= orig_h->flags & mask; } -static int -check_kdict_i(mrb_state *mrb, mrb_value key, mrb_value val, void *data) +static mrb_value +mrb_hash_init_copy(mrb_state *mrb, mrb_value self) { - if (!mrb_symbol_p(key)) { - mrb_raise(mrb, E_ARGUMENT_ERROR, "keyword argument hash with non symbol keys"); - } - return 0; + mrb_value orig; + mrb_get_args(mrb, "H", &orig); + hash_modify(mrb, self); + if (mrb_hash_ptr(self) != mrb_hash_ptr(orig)) hash_replace(mrb, self, orig); + return self; } void mrb_hash_check_kdict(mrb_state *mrb, mrb_value self) { - htable *t; - - t = RHASH_TBL(self); - if (!t || t->size == 0) return; - ht_foreach(mrb, t, check_kdict_i, NULL); + h_each(mrb_hash_ptr(self), entry, { + if (mrb_symbol_p(entry->key)) continue; + mrb_raise(mrb, E_ARGUMENT_ERROR, "keyword argument hash with non symbol keys"); + }); } MRB_API mrb_value mrb_hash_dup(mrb_state *mrb, mrb_value self) { - struct RHash* copy; - htable *orig_h; - - orig_h = RHASH_TBL(self); - copy = (struct RHash*)mrb_obj_alloc(mrb, MRB_TT_HASH, mrb->hash_class); - copy->ht = orig_h ? ht_copy(mrb, orig_h) : NULL; - return mrb_obj_value(copy); + struct RHash* copy_h = h_alloc(mrb); + mrb_value copy = mrb_obj_value(copy_h); + copy_h->c = mrb_hash_ptr(self)->c; + hash_replace(mrb, copy, self); + return copy; } MRB_API mrb_value @@ -711,7 +1159,7 @@ mrb_hash_get(mrb_state *mrb, mrb_value hash, mrb_value key) mrb_value val; mrb_sym mid; - if (ht_get(mrb, RHASH_TBL(hash), key, &val)) { + if (h_get(mrb, mrb_hash_ptr(hash), key, &val)) { return val; } @@ -728,7 +1176,7 @@ mrb_hash_fetch(mrb_state *mrb, mrb_value hash, mrb_value key, mrb_value def) { mrb_value val; - if (ht_get(mrb, RHASH_TBL(hash), key, &val)) { + if (h_get(mrb, mrb_hash_ptr(hash), key, &val)) { return val; } /* not found */ @@ -739,21 +1187,10 @@ MRB_API void mrb_hash_set(mrb_state *mrb, mrb_value hash, mrb_value key, mrb_value val) { hash_modify(mrb, hash); - - key = KEY(key); - ht_put(mrb, RHASH_TBL(hash), key, val); - mrb_field_write_barrier_value(mrb, (struct RBasic*)RHASH(hash), key); - mrb_field_write_barrier_value(mrb, (struct RBasic*)RHASH(hash), val); - return; -} - -static void -hash_modify(mrb_state *mrb, mrb_value hash) -{ - mrb_check_frozen(mrb, mrb_hash_ptr(hash)); - if (!RHASH_TBL(hash)) { - RHASH_TBL(hash) = ht_new(mrb); - } + key = h_key_for(mrb, key); + h_set(mrb, mrb_hash_ptr(hash), key, val); + mrb_field_write_barrier_value(mrb, mrb_basic_ptr(hash), key); + mrb_field_write_barrier_value(mrb, mrb_basic_ptr(hash), val); } /* 15.2.13.4.16 */ @@ -837,20 +1274,6 @@ mrb_hash_aget(mrb_state *mrb, mrb_value self) return mrb_hash_get(mrb, self, key); } -static mrb_value -hash_default(mrb_state *mrb, mrb_value hash, mrb_value key) -{ - if (MRB_RHASH_DEFAULT_P(hash)) { - if (MRB_RHASH_PROCDEFAULT_P(hash)) { - return mrb_funcall_id(mrb, RHASH_PROCDEFAULT(hash), MRB_SYM(call), 2, hash, key); - } - else { - return RHASH_IFNONE(hash); - } - } - return mrb_nil_value(); -} - /* 15.2.13.4.5 */ /* * call-seq: @@ -945,7 +1368,6 @@ mrb_hash_set_default(mrb_state *mrb, mrb_value hash) * a #=> [nil, nil, 4] */ - static mrb_value mrb_hash_default_proc(mrb_state *mrb, mrb_value hash) { @@ -990,10 +1412,10 @@ mrb_hash_set_default_proc(mrb_state *mrb, mrb_value hash) MRB_API mrb_value mrb_hash_delete_key(mrb_state *mrb, mrb_value hash, mrb_value key) { - htable *t = RHASH_TBL(hash); mrb_value del_val; - if (ht_del(mrb, t, key, &del_val)) { + hash_modify(mrb, hash); + if (h_delete(mrb, mrb_hash_ptr(hash), key, &del_val)) { return del_val; } @@ -1005,38 +1427,9 @@ static mrb_value mrb_hash_delete(mrb_state *mrb, mrb_value self) { mrb_value key = mrb_get_arg1(mrb); - - hash_modify(mrb, self); return mrb_hash_delete_key(mrb, self, key); } -/* find first element in the hash table, and remove it. */ -static void -ht_shift(mrb_state *mrb, htable *t, mrb_value *kp, mrb_value *vp) -{ - segment *seg = t->rootseg; - mrb_int i; - - while (seg) { - for (i=0; i<seg->size; i++) { - mrb_value key; - - if (!seg->next && i >= t->last_len) { - return; - } - key = seg->e[i].key; - if (mrb_undef_p(key)) continue; - *kp = key; - *vp = seg->e[i].val; - /* delete element */ - seg->e[i].key = mrb_undef_value(); - t->size--; - return; - } - seg = seg->next; - } -} - /* 15.2.13.4.24 */ /* * call-seq: @@ -1054,28 +1447,19 @@ ht_shift(mrb_state *mrb, htable *t, mrb_value *kp, mrb_value *vp) static mrb_value mrb_hash_shift(mrb_state *mrb, mrb_value hash) { - htable *t = RHASH_TBL(hash); + struct RHash *h = mrb_hash_ptr(hash); hash_modify(mrb, hash); - if (t && t->size > 0) { - mrb_value del_key = mrb_nil_value(); - mrb_value del_val = mrb_nil_value(); - - ht_shift(mrb, t, &del_key, &del_val); + if (h_size(h) == 0) { + return hash_default(mrb, hash, mrb_nil_value()); + } + else { + mrb_value del_key, del_val; + h_shift(mrb, h, &del_key, &del_val); mrb_gc_protect(mrb, del_key); mrb_gc_protect(mrb, del_val); return mrb_assoc_new(mrb, del_key, del_val); } - - if (MRB_RHASH_DEFAULT_P(hash)) { - if (MRB_RHASH_PROCDEFAULT_P(hash)) { - return mrb_funcall_id(mrb, RHASH_PROCDEFAULT(hash), MRB_SYM(call), 2, hash, mrb_nil_value()); - } - else { - return RHASH_IFNONE(hash); - } - } - return mrb_nil_value(); } /* 15.2.13.4.4 */ @@ -1093,13 +1477,8 @@ mrb_hash_shift(mrb_state *mrb, mrb_value hash) MRB_API mrb_value mrb_hash_clear(mrb_state *mrb, mrb_value hash) { - htable *t = RHASH_TBL(hash); - hash_modify(mrb, hash); - if (t) { - ht_free(mrb, t); - RHASH_TBL(hash) = NULL; - } + h_clear(mrb, mrb_hash_ptr(hash)); return hash; } @@ -1135,10 +1514,7 @@ mrb_hash_aset(mrb_state *mrb, mrb_value self) MRB_API mrb_int mrb_hash_size(mrb_state *mrb, mrb_value hash) { - htable *t = RHASH_TBL(hash); - - if (!t) return 0; - return t->size; + return (mrb_int)h_size(mrb_hash_ptr(hash)); } /* 15.2.13.4.20 */ @@ -1165,10 +1541,7 @@ mrb_hash_size_m(mrb_state *mrb, mrb_value self) MRB_API mrb_bool mrb_hash_empty_p(mrb_state *mrb, mrb_value self) { - htable *t = RHASH_TBL(self); - - if (!t) return TRUE; - return t->size == 0; + return h_size(mrb_hash_ptr(self)) == 0; } /* 15.2.13.4.12 */ @@ -1187,13 +1560,6 @@ mrb_hash_empty_m(mrb_state *mrb, mrb_value self) return mrb_bool_value(mrb_hash_empty_p(mrb, self)); } -static int -hash_keys_i(mrb_state *mrb, mrb_value key, mrb_value val, void *p) -{ - mrb_ary_push(mrb, *(mrb_value*)p, key); - return 0; -} - /* 15.2.13.4.19 */ /* * call-seq: @@ -1210,24 +1576,14 @@ hash_keys_i(mrb_state *mrb, mrb_value key, mrb_value val, void *p) MRB_API mrb_value mrb_hash_keys(mrb_state *mrb, mrb_value hash) { - htable *t = RHASH_TBL(hash); - mrb_int size; - mrb_value ary; - - if (!t || (size = t->size) == 0) - return mrb_ary_new(mrb); - ary = mrb_ary_new_capa(mrb, size); - ht_foreach(mrb, t, hash_keys_i, (void*)&ary); + struct RHash *h = mrb_hash_ptr(hash); + mrb_value ary = mrb_ary_new_capa(mrb, (mrb_int)h_size(h)); + h_each(h, entry, { + mrb_ary_push(mrb, ary, entry->key); + }); return ary; } -static int -hash_vals_i(mrb_state *mrb, mrb_value key, mrb_value val, void *p) -{ - mrb_ary_push(mrb, *(mrb_value*)p, val); - return 0; -} - /* 15.2.13.4.28 */ /* * call-seq: @@ -1244,14 +1600,11 @@ hash_vals_i(mrb_state *mrb, mrb_value key, mrb_value val, void *p) MRB_API mrb_value mrb_hash_values(mrb_state *mrb, mrb_value hash) { - htable *t = RHASH_TBL(hash); - mrb_int size; - mrb_value ary; - - if (!t || (size = t->size) == 0) - return mrb_ary_new(mrb); - ary = mrb_ary_new_capa(mrb, size); - ht_foreach(mrb, t, hash_vals_i, (void*)&ary); + struct RHash *h = mrb_hash_ptr(hash); + mrb_value ary = mrb_ary_new_capa(mrb, (mrb_int)h_size(h)); + h_each(h, entry, { + mrb_ary_push(mrb, ary, entry->val); + }); return ary; } @@ -1277,13 +1630,8 @@ mrb_hash_values(mrb_state *mrb, mrb_value hash) MRB_API mrb_bool mrb_hash_key_p(mrb_state *mrb, mrb_value hash, mrb_value key) { - htable *t; - - t = RHASH_TBL(hash); - if (ht_get(mrb, t, key, NULL)) { - return TRUE; - } - return FALSE; + mrb_value val; + return h_get(mrb, mrb_hash_ptr(hash), key, &val); } static mrb_value @@ -1296,23 +1644,6 @@ mrb_hash_has_key(mrb_state *mrb, mrb_value hash) return mrb_bool_value(key_p); } -struct has_v_arg { - mrb_bool found; - mrb_value val; -}; - -static int -hash_has_value_i(mrb_state *mrb, mrb_value key, mrb_value val, void *p) -{ - struct has_v_arg *arg = (struct has_v_arg*)p; - - if (mrb_equal(mrb, arg->val, val)) { - arg->found = TRUE; - return 1; - } - return 0; -} - /* 15.2.13.4.14 */ /* 15.2.13.4.27 */ /* @@ -1332,41 +1663,32 @@ static mrb_value mrb_hash_has_value(mrb_state *mrb, mrb_value hash) { mrb_value val = mrb_get_arg1(mrb); - struct has_v_arg arg; - - arg.found = FALSE; - arg.val = val; - ht_foreach(mrb, RHASH_TBL(hash), hash_has_value_i, &arg); - return mrb_bool_value(arg.found); -} - -static int -merge_i(mrb_state *mrb, mrb_value key, mrb_value val, void *data) -{ - htable *h1 = (htable*)data; - - ht_put(mrb, h1, key, val); - return 0; + struct RHash *h = mrb_hash_ptr(hash); + h_each(h, entry, { + h_check_modified(mrb, h, { + if (mrb_equal(mrb, val, entry->val)) return mrb_true_value(); + }); + }); + return mrb_false_value(); } MRB_API void mrb_hash_merge(mrb_state *mrb, mrb_value hash1, mrb_value hash2) { - htable *h1, *h2; + struct RHash *h1, *h2; hash_modify(mrb, hash1); - hash2 = mrb_ensure_hash_type(mrb, hash2); - h1 = RHASH_TBL(hash1); - h2 = RHASH_TBL(hash2); - - if (!h2) return; - if (!h1) { - RHASH_TBL(hash1) = ht_copy(mrb, h2); - return; - } - ht_foreach(mrb, h2, merge_i, h1); - mrb_write_barrier(mrb, (struct RBasic*)RHASH(hash1)); - return; + mrb_ensure_hash_type(mrb, hash2); + h1 = mrb_hash_ptr(hash1); + h2 = mrb_hash_ptr(hash2); + + if (h1 == h2) return; + if (h_size(h2) == 0) return; + h_each(h2, entry, { + h_check_modified(mrb, h2, {h_set(mrb, h1, entry->key, entry->val);}); + mrb_field_write_barrier_value(mrb, (struct RBasic *)h1, entry->key); + mrb_field_write_barrier_value(mrb, (struct RBasic *)h1, entry->val); + }); } /* @@ -1381,14 +1703,10 @@ mrb_hash_merge(mrb_state *mrb, mrb_value hash1, mrb_value hash2) * k = keys[0] * h = {} * keys.each{|key| h[key] = key[0]} - * h #=> { [1]=> 1, [2]=> 2, [3]=> 3, [4]=> 4, [5]=> 5, [6]=> 6, [7]=> 7, - * [8]=> 8, [9]=> 9,[10]=>10,[11]=>11,[12]=>12,[13]=>13,[14]=>14, - * [15]=>15,[16]=>16,[17]=>17} + * h #=> { [1]=>1, [2]=>2, ... [16]=>16, [17]=>17} * h[k] #=> 1 * k[0] = keys.size + 1 - * h #=> {[18]=> 1, [2]=> 2, [3]=> 3, [4]=> 4, [5]=> 5, [6]=> 6, [7]=> 7, - * [8]=> 8, [9]=> 9,[10]=>10,[11]=>11,[12]=>12,[13]=>13,[14]=>14, - * [15]=>15,[16]=>16,[17]=>17} + * h #=> {[18]=>1, [2]=>2, ... [16]=>16, [17]=>17} * h[k] #=> nil * h.rehash * h[k] #=> 1 @@ -1396,7 +1714,7 @@ mrb_hash_merge(mrb_state *mrb, mrb_value hash1, mrb_value hash2) static mrb_value mrb_hash_rehash(mrb_state *mrb, mrb_value self) { - ht_compact(mrb, RHASH_TBL(self)); + h_rehash(mrb, mrb_hash_ptr(self)); return self; } @@ -1408,11 +1726,10 @@ mrb_init_hash(mrb_state *mrb) mrb->hash_class = h = mrb_define_class(mrb, "Hash", mrb->object_class); /* 15.2.13 */ MRB_SET_INSTANCE_TT(h, MRB_TT_HASH); - mrb_define_method(mrb, h, "initialize_copy", mrb_hash_init_copy, MRB_ARGS_REQ(1)); mrb_define_method(mrb, h, "[]", mrb_hash_aget, MRB_ARGS_REQ(1)); /* 15.2.13.4.2 */ mrb_define_method(mrb, h, "[]=", mrb_hash_aset, MRB_ARGS_REQ(2)); /* 15.2.13.4.3 */ mrb_define_method(mrb, h, "clear", mrb_hash_clear, MRB_ARGS_NONE()); /* 15.2.13.4.4 */ - mrb_define_method(mrb, h, "default", mrb_hash_default, MRB_ARGS_OPT(1)); /* 15.2.13.4.5 */ + mrb_define_method(mrb, h, "default", mrb_hash_default, MRB_ARGS_OPT(1)); /* 15.2.13.4.5 */ mrb_define_method(mrb, h, "default=", mrb_hash_set_default, MRB_ARGS_REQ(1)); /* 15.2.13.4.6 */ mrb_define_method(mrb, h, "default_proc", mrb_hash_default_proc,MRB_ARGS_NONE()); /* 15.2.13.4.7 */ mrb_define_method(mrb, h, "default_proc=", mrb_hash_set_default_proc,MRB_ARGS_REQ(1)); /* 15.2.13.4.7 */ @@ -1422,10 +1739,12 @@ mrb_init_hash(mrb_state *mrb) mrb_define_method(mrb, h, "has_value?", mrb_hash_has_value, MRB_ARGS_REQ(1)); /* 15.2.13.4.14 */ mrb_define_method(mrb, h, "include?", mrb_hash_has_key, MRB_ARGS_REQ(1)); /* 15.2.13.4.15 */ mrb_define_method(mrb, h, "initialize", mrb_hash_init, MRB_ARGS_OPT(1)|MRB_ARGS_BLOCK()); /* 15.2.13.4.16 */ + mrb_define_method(mrb, h, "initialize_copy", mrb_hash_init_copy, MRB_ARGS_REQ(1)); /* 15.2.13.4.17 */ mrb_define_method(mrb, h, "key?", mrb_hash_has_key, MRB_ARGS_REQ(1)); /* 15.2.13.4.18 */ mrb_define_method(mrb, h, "keys", mrb_hash_keys, MRB_ARGS_NONE()); /* 15.2.13.4.19 */ mrb_define_method(mrb, h, "length", mrb_hash_size_m, MRB_ARGS_NONE()); /* 15.2.13.4.20 */ mrb_define_method(mrb, h, "member?", mrb_hash_has_key, MRB_ARGS_REQ(1)); /* 15.2.13.4.21 */ + mrb_define_method(mrb, h, "replace", mrb_hash_init_copy, MRB_ARGS_REQ(1)); /* 15.2.13.4.23 */ mrb_define_method(mrb, h, "shift", mrb_hash_shift, MRB_ARGS_NONE()); /* 15.2.13.4.24 */ mrb_define_method(mrb, h, "size", mrb_hash_size_m, MRB_ARGS_NONE()); /* 15.2.13.4.25 */ mrb_define_method(mrb, h, "store", mrb_hash_aset, MRB_ARGS_REQ(2)); /* 15.2.13.4.26 */ diff --git a/test/t/hash.rb b/test/t/hash.rb index cd47d251d..c51af03aa 100644 --- a/test/t/hash.rb +++ b/test/t/hash.rb @@ -1,378 +1,1000 @@ ## # Hash ISO Test -assert('Hash', '15.2.13') do - assert_equal Class, Hash.class -end +class HashKey + attr_accessor :value, :error, :callback -assert('Hash#==', '15.2.13.4.1') do - assert_true({ 'abc' => 'abc' } == { 'abc' => 'abc' }) - assert_false({ 'abc' => 'abc' } == { 'cba' => 'cba' }) - assert_false({ :a => 1 } == true) - skip unless Object.const_defined?(:Float) - assert_true({ :equal => 1 } == { :equal => 1.0 }) -end + self.class.alias_method :[], :new -assert('Hash#[]', '15.2.13.4.2') do - a = { 'abc' => 'abc' } + def initialize(value, error: nil, callback: nil) + @value = value + @error = error + @callback = callback + end - assert_equal 'abc', a['abc'] + def ==(other) + @callback.(:==, self, other) if @callback + return raise_error(:==) if @error == true || @error == :== + other.kind_of?(self.class) && @value == other.value + end - # Hash#[] should call #default (#3272) - hash = {} - def hash.default(k); self[k] = 1; end - hash[:foo] += 1 + def eql?(other) + @callback.(:eql?, self, other) if @callback + return raise_error(:eql?) if @error == true || @error == :eql? + other.kind_of?(self.class) && @value.eql?(other.value) + end - assert_equal 2, hash[:foo] -end + def hash + @callback.(:hash, self) if @callback + return raise_error(:hash) if @error == true || @error == :hash + @value % 3 + end -assert('Hash#[]=', '15.2.13.4.3') do - a = Hash.new - a['abc'] = 'abc' + def to_s + "#{self.class}[#{@value}]" + end + alias inspect to_s - assert_equal 'abc', a['abc'] + def raise_error(name) + raise "##{self}: #{name} error" + end end -assert('Hash#clear', '15.2.13.4.4') do - a = { 'abc' => 'abc' } - a.clear - - assert_equal({ }, a) +class HashEntries < Array + self.class.alias_method :[], :new + + def initialize(entries) self.replace(entries) end + def key(index, k=get=true) get ? self[index][0] : (self[index][0] = k) end + def value(index, v=get=true) get ? self[index][1] : (self[index][1] = v) end + def keys; map{|k, v| k} end + def values; map{|k, v| v} end + def each_key(&block) each{|k, v| block.(k)} end + def each_value(&block) each{|k, v| block.(v)} end + def dup2; self.class[*map{|k, v| [k.dup, v.dup]}] end + def to_s; "#{self.class}#{super}" end + alias inspect to_s + + def hash_for(hash={}, &block) + each{|k, v| hash[k] = v} + block.(hash) if block + hash + end end -assert('Hash#dup') do - a = { 'a' => 1 } - b = a.dup - a['a'] = 2 - assert_equal({'a' => 1}, b) - - c = Hash.new { |h, k| h[k] = k.upcase } - d = c.dup - assert_equal("FOO", d["foo"]) +def ar_entries + HashEntries[ + [1, "one"], + [HashKey[2], :two], + [nil, :two], + [:one, 1], + ["&", "&"], + [HashKey[6], :six], + [HashKey[5], :five], # same hash code as HashKey[2] + ] +end + +def ht_entries + ar_entries.dup.push( + ["id", 32], + [:date, "2020-05-02"], + [200, "OK"], + ["modifiers", ["left_shift", "control"]], + [:banana, :yellow], + ["JSON", "JavaScript Object Notation"], + [:size, :large], + ["key_code", "h"], + ["h", 0x04], + [[3, 2, 1], "three, two, one"], + [:auto, true], + [HashKey[12], "December"], + [:path, "/path/to/file"], + [:name, "Ruby"], + ) +end + +def merge_entries!(entries1, entries2) + entries2.each do |k2, v2| + entry1 = entries1.find{|k1, _| k1.eql?(k2)} + entry1 ? (entry1[1] = v2) : (entries1 << [k2, v2]) + end + entries1 +end + +def product(*arrays, &block) + sizes = Array.new(arrays.size+1, 1) + (arrays.size-1).downto(0){|i| sizes[i] = arrays[i].size * sizes[i+1]} + size = sizes[0] + results = Array.new(size){[]} + arrays.each_with_index do |array, arrays_i| + results_i = -1 + (size / sizes[arrays_i]).times do + array.each do |v| + sizes[arrays_i+1].times{results[results_i+=1] << v} + end + end + end + results.each{block.(_1)} end -assert('Hash#default', '15.2.13.4.5') do - a = Hash.new - b = Hash.new('abc') - c = Hash.new {|s,k| s[k] = k} - - assert_nil a.default - assert_equal 'abc', b.default - assert_nil c.default - assert_equal 'abc', c.default('abc') +def assert_iterator(exp, obj, meth) + params = [] + obj.__send__(meth) {|param| params << param} + assert_equal(exp, params) end -assert('Hash#default=', '15.2.13.4.6') do - a = { 'abc' => 'abc' } - a.default = 'cba' - - assert_equal 'abc', a['abc'] - assert_equal 'cba', a['notexist'] +def assert_nothing_crashed(&block) + block.call rescue nil + pass end -assert('Hash#default_proc', '15.2.13.4.7') do - a = Hash.new - b = Hash.new {|s,k| s[k] = k + k} - c = b[2] - d = b['cat'] - - assert_nil a.default_proc - assert_equal Proc, b.default_proc.class - assert_equal 4, c - assert_equal 'catcat', d +assert('Hash', '15.2.13') do + assert_equal(Class, Hash.class) +end + +[[:==, '15.2.13.4.1'], [:eql?, '']].each do |meth, iso| + assert("Hash##{meth}", iso) do + cls = Class.new(Hash){attr_accessor :foo} + [ar_entries, ht_entries].each do |entries| + h1 = entries.hash_for + h2 = entries.dup.reverse!.hash_for + assert_operator(h1, meth, h2) + assert_operator(h1, meth, h1) + assert_not_operator(h1, meth, true) + assert_operator({}, meth, Hash.new) + + h1 = entries.hash_for(cls.new(1)) {|h| h.foo = 1} + h2 = entries.hash_for(cls.new(2)) {|h| h.foo = 2} + assert_operator(h1, meth, h2) + + h1 = entries.hash_for + h2 = entries.hash_for(cls.new) + assert_operator(h1, meth, h2) + + h1 = (entries.dup << [:_k, 1]).hash_for + h2 = (entries.dup << [:_k, 2]).hash_for + assert_not_operator(h1, meth, h2) + + h1 = (entries.dup << [:_k1, 0]).hash_for + h2 = (entries.dup << [:_k2, 0]).hash_for + assert_not_operator(h1, meth, h2) + + h1 = entries.hash_for + h2 = (entries.dup << [:_k, 2]).hash_for + assert_not_operator(h1, meth, h2) + + k1, v1 = HashKey[-1], HashKey[-2] + k2, v2 = HashKey[-1], HashKey[-2] + h1 = (entries.dup << [k1, v1]).hash_for + h2 = (entries.dup << [k2, v2]).hash_for + product([h1, h2], [k1, k2], %i[eql? hash]) do |h, k, m| + [k1, k2].each{_1.callback = nil} + k.callback = ->(name, *){h.clear if name == m} + assert_nothing_crashed{h1.__send__(meth, h2)} + end + product([h1, h2], [v1, v2]) do |h, v| + [v1, v2].each{_1.callback = nil} + v.callback = ->(name, *){h.clear if name == meth} + assert_nothing_crashed{h1.__send__(meth, h2)} + end + + if Object.const_defined?(:Float) + h1 = (entries.dup << [-1, true]).hash_for + h2 = (entries.dup << [-1.0, true]).hash_for + assert_not_operator(h1, meth, h2) + h1 = (entries.dup << [-1.0, true]).hash_for + h2 = (entries.dup << [-1, true]).hash_for + assert_not_operator(h1, meth, h2) + + h1 = (entries.dup << [:_k, 1]).hash_for + h2 = (entries.dup << [:_k, 1.0]).hash_for + if meth == :== + assert_operator(h1, meth, h2) + else + assert_not_operator(h1, meth, h2) + end + end + end + end end -assert('Hash#delete', '15.2.13.4.8') do - a = { 'abc' => 'ABC' } - b = { 'abc' => 'ABC' } - b_tmp_1 = false - b_tmp_2 = false - - assert_equal 'ABC', a.delete('abc') - b.delete('abc') do |k| - b_tmp_1 = true - end - b.delete('abc') do |k| - b_tmp_2 = true +assert('Hash#[]', '15.2.13.4.2') do + [ar_entries, ht_entries].each do |entries| + h = entries.hash_for + assert_equal(entries.size, h.size) + entries.each{|k, v| assert_equal(v, h[k])} + assert_equal(nil, h["_not_found_"]) + assert_equal(nil, h[:_not_dound_]) + assert_equal(nil, h[-2]) + + k = HashKey[-4] + h[HashKey[-1]] = -1 + h[k] = -4 + h.delete(k) + assert_equal(nil, h[k]) + + if Object.const_defined?(:Float) + h[-2] = 22 + assert_equal(nil, h[-2.0]) + h[-3.0] = 33 + assert_equal(nil, h[-3]) + assert_equal(33, h[-3.0]) + end + + k = HashKey[-2] + k.callback = ->(name, *){h.clear if name == :eql?} + assert_nothing_crashed{h[k]} + k.callback = ->(name, *){h.clear if name == :hash} + assert_nothing_crashed{h[k]} end - assert_nil a.delete('cba') - assert_false a.has_key?('abc') - assert_false b_tmp_1 - assert_true b_tmp_2 + # Hash#[] should call #default (#3272) + h = {} + def h.default(k); self[k] = 1; end + h[:foo] += 1 + assert_equal(2, h[:foo]) +end + +[%w[[]= 3], %w[store 26]].each do |meth, no| + assert("Hash##{meth}", "15.2.13.4.#{no}") do + [{}, ht_entries.hash_for].each do |h| + # duplicated key + k = :_dup_key + h.__send__(meth, k, 1) + size = h.size + h.__send__(meth, k, 2) + assert_equal(size, h.size) + assert_equal(2, h[k]) + + # freeze string key + k = "_mutable" + h.__send__(meth, k, 1) + h_k = h.keys[-1] + assert_not_same(k, h_k) + assert_predicate(h_k, :frozen?) + assert_not_predicate(k, :frozen?) + + # frozen string key + k = "_immutable".freeze + h.__send__(meth, k, 2) + h_k = h.keys[-1] + assert_same(k, h_k) + assert_predicate(h_k, :frozen?) + + # numeric key + if Object.const_defined?(:Float) + h.__send__(meth, 3, :fixnum) + h.__send__(meth, 3.0, :float) + assert_equal(:fixnum, h[3]) + assert_equal(:float, h[3.0]) + h.__send__(meth, 4.0, :float) + h.__send__(meth, 4, :fixnum) + assert_equal(:fixnum, h[4]) + assert_equal(:float, h[4.0]) + end + + # other key + k = [:_array] + h.__send__(meth, k, :_array) + h_k = h.keys[-1] + assert_same(k, h_k) + assert_not_predicate(h_k, :frozen?) + assert_not_predicate(k, :frozen?) + + # deleted key + k1, k2, k3 = HashKey[-1], HashKey[-4], HashKey[-7] # same hash code + h.__send__(meth, k1, 1) + h.__send__(meth, k2, -4) + h.__send__(meth, k3, 73) + size = h.size + h.delete(k1) + h.delete(k2) + h.__send__(meth, k2, 40) + assert_equal(nil, h[k1]) + assert_equal(40, h[k2]) + assert_equal(73, h[k3]) + assert_equal(size - 1, h.size) + + # frozen + h.freeze + assert_raise(FrozenError){h.__send__(meth, -100, 1)} + end + + [ar_entries.hash_for, ht_entries.hash_for].each do |h| + k = HashKey[-2] + k.callback = ->(name, *){h.clear if name == :eql?} + assert_nothing_crashed{h.__send__(meth, k, 2)} + k.callback = ->(name, *){h.clear if name == :hash} + assert_nothing_crashed{h.__send__(meth, k, 2)} + end + end end -assert('Hash#each', '15.2.13.4.9') do - a = { 'abc_key' => 'abc_value' } - key = nil - value = nil - - a.each do |k,v| - key = k - value = v +assert('Hash#clear', '15.2.13.4.4') do + [ar_entries, ht_entries].each do |entries| + h = entries.hash_for + assert_same(h, h.clear) + assert_equal(0, h.size) + assert_nil(h[entries.key(3)]) + + h.freeze + assert_raise(FrozenError){h.clear} end - assert_equal 'abc_key', key - assert_equal 'abc_value', value + h = {}.freeze + assert_raise(FrozenError){h.clear} end -assert('Hash#each_key', '15.2.13.4.10') do - a = { 'abc_key' => 'abc_value' } - key = nil - - a.each_key do |k| - key = k +assert('Hash#dup') do + cls = Class.new(Hash){attr_accessor :foo} + [ar_entries, ht_entries].each do |entries| + h1 = entries.hash_for(cls.new(61)){|h| h.foo = 23}.freeze + h2 = h1.dup + assert_not_predicate(h2, :frozen?) + assert_equal(h1.class, h2.class) + assert_equal(entries, h2.to_a) + assert_equal(23, h2.foo) + assert_equal(61, h2["_not_found_"]) + h2[-10] = 10 + assert_equal(10, h2[-10]) + assert_not_operator(h1, :key?, -10) + + h = entries.hash_for + k = HashKey[-1] + h[k] = 1 + k.callback = ->(*){h.clear} + assert_nothing_crashed{h.dup} end - - assert_equal 'abc_key', key end -assert('Hash#each_value', '15.2.13.4.11') do - a = { 'abc_key' => 'abc_value' } - value = nil - - a.each_value do |v| - value = v +assert('Hash#default', '15.2.13.4.5') do + [ar_entries, ht_entries].each do |entries| + h = entries.hash_for(Hash.new) + assert_equal(nil, h.default) + assert_equal(nil, h.default(-2)) + + h = entries.hash_for(Hash.new(-88)) + assert_equal(-88, h.default) + assert_equal(-88, h.default(-2)) + assert_not_operator(h, :key?, -2) + assert_raise(ArgumentError){h.default(-2,-2)} + + proc = ->(h, k){h[k] = k * 3} + h = entries.hash_for(Hash.new(proc)) + assert_equal(proc, h.default(-2)) + + h = entries.hash_for(Hash.new(&proc)) + assert_equal(nil, h.default) + assert_not_operator(h, :key?, -2) + assert_equal(-6, h.default(-2)) + assert_equal(-6, h[-2]) + h[-2] = -5 + assert_equal(-6, h.default(-2)) + assert_equal(-6, h[-2]) end - - assert_equal 'abc_value', value end -assert('Hash#empty?', '15.2.13.4.12') do - a = { 'abc_key' => 'abc_value' } - b = Hash.new - - assert_false a.empty? - assert_true b.empty? -end +assert('Hash#default=', '15.2.13.4.6') do + [ar_entries, ht_entries].each do |entries| + h = entries.hash_for(Hash.new) + h.default = 3 + assert_equal(3, h[-2]) + assert_equal(entries.value(0), h[entries.key(0)]) -assert('Hash#has_key?', '15.2.13.4.13') do - a = { 'abc_key' => 'abc_value' } - b = Hash.new + h.default = 4 + assert_equal(4, h[-2]) - assert_true a.has_key?('abc_key') - assert_false b.has_key?('cba') -end + h.default = nil + assert_equal(nil, h[-2]) -assert('Hash#has_value?', '15.2.13.4.14') do - a = { 'abc_key' => 'abc_value' } - b = Hash.new + h.default = [5] + assert_same(h[-2], h[-3]) - assert_true a.has_value?('abc_value') - assert_false b.has_value?('cba') + h.freeze + assert_raise(FrozenError){h.default = 3} + end end -assert('Hash#include?', '15.2.13.4.15') do - a = { 'abc_key' => 'abc_value' } - b = Hash.new - - assert_true a.include?('abc_key') - assert_false b.include?('cba') +assert('Hash#default_proc', '15.2.13.4.7') do + [ar_entries, ht_entries].each do |entries| + h = entries.hash_for({}) + assert_nil(h.default_proc) + + h = entries.hash_for(Hash.new(34)) + assert_nil(h.default_proc) + + h = entries.hash_for(Hash.new{|h, k| h[k] = k * 3}) + proc = h.default_proc + assert_equal(Proc, proc.class) + assert_equal(6, proc.(h, 2)) + assert_equal([2, 6], h.to_a[-1]) + end end -assert('Hash#initialize', '15.2.13.4.16') do - # Testing initialize by new. - h = Hash.new - h2 = Hash.new(:not_found) - - assert_true h.is_a? Hash - assert_equal({ }, h) - assert_nil h["hello"] - assert_equal :not_found, h2["hello"] -end +assert('Hash#delete', '15.2.13.4.8') do + [ar_entries, ht_entries].each do |entries| + h = entries.hash_for + pairs = entries.dup + [0, 2, -1].each do |i| + k, v = pairs.delete_at(i) + assert_equal(v, h.delete(k)) + assert_equal(nil, h[k]) + assert_equal(false, h.key?(k)) + end + [entries.key(0), "_not_found_"].each {|k|assert_equal(nil, h.delete(k))} + assert_equal(pairs.size, h.size) + assert_equal(pairs, h.to_a) + pairs.each {|k, v| assert_equal(v, h[k])} + + h = entries.hash_for + pairs = entries.dup + [pairs.delete_at(1), ["_not_found_", "_default"]].each do |k, v| + assert_equal(v, h.delete(k){"_default"}) + assert_equal(nil, h[k]) + assert_equal(false, h.key?(k)) + end + assert_equal(pairs.size, h.size) + assert_equal(pairs, h.to_a) + pairs.each {|k, v| assert_equal(v, h[k])} + + if Object.const_defined?(:Float) + h = entries.dup.push([-5, 1], [-5.0, 2], [-6.0, 3], [-6, 4]).hash_for + assert_equal(1, h.delete(-5)) + assert_equal(3, h.delete(-6.0)) + end + + # nil value with block + h = entries.hash_for + k = "_nil" + h[k] = nil + assert_equal(nil, h.delete(k){"blk"}) + assert_equal(false, h.key?(k)) + + k = HashKey[-31, callback: ->(*){h.clear}] + assert_nothing_crashed{h.delete(k)} + end -assert('Hash#initialize_copy', '15.2.13.4.17') do - a = { 'abc_key' => 'abc_value' } - b = Hash.new.initialize_copy(a) + assert_raise(ArgumentError){{}.delete} + assert_raise(ArgumentError){{}.delete(1,2)} - assert_equal({ 'abc_key' => 'abc_value' }, b) + h = {}.freeze + assert_raise(FrozenError){h.delete(1)} +end + +[%w[each 9], %w[each_key 10], %w[each_value 11]].each do |meth, no| + assert("Hash##{meth}", "15.2.13.4.#{no}") do + [ar_entries, ht_entries].each do |entries| + exp = [] + entries.__send__(meth){|param| exp << param} + assert_iterator(exp, entries.hash_for, meth) + + h = entries.hash_for + entries.shift + h.shift + entry = entries.delete_at(1) + h.delete(entry[0]) + h.delete(entries.delete_at(-4)[0]) + entries << entry + h.store(*entry) + exp = [] + entries.__send__(meth){|param| exp << param} + assert_iterator(exp, h, meth) + end + + assert_iterator([], {}, meth) + end end -assert('Hash#key?', '15.2.13.4.18') do - a = { 'abc_key' => 'abc_value' } - b = Hash.new +assert('Hash#empty?', '15.2.13.4.12') do + [ar_entries, ht_entries].each do |entries| + assert_not_predicate entries.hash_for, :empty? - assert_true a.key?('abc_key') - assert_false b.key?('cba') -end + h = entries.hash_for + h.shift + h.delete(entries.key(-1)) + assert_not_predicate h, :empty? -assert('Hash#keys', '15.2.13.4.19') do - a = { 'abc_key' => 'abc_value' } + h = entries.hash_for + entries.size.times{h.shift} + assert_predicate(h, :empty?) - assert_equal ['abc_key'], a.keys -end - -assert('Hash#length', '15.2.13.4.20') do - a = { 'abc_key' => 'abc_value' } - b = Hash.new + h = entries.hash_for + entries.each {|k, v| h.delete(k)} + assert_predicate(h, :empty?) + end - assert_equal 1, a.length - assert_equal 0, b.length + assert_predicate(Hash.new, :empty?) + assert_predicate(Hash.new(1), :empty?) + assert_predicate(Hash.new{|h, k| h[k] = 2}, :empty?) end -assert('Hash#member?', '15.2.13.4.21') do - a = { 'abc_key' => 'abc_value' } - b = Hash.new - - assert_true a.member?('abc_key') - assert_false b.member?('cba') -end +[%w[has_key? 13], %w[include? 15], %w[key? 18], %w[member? 21]].each do |meth,no| + assert("Hash##{meth}", "15.2.13.4.#{no}") do + [ar_entries, ht_entries].each do |entries| + pairs = entries.dup.push([HashKey[-3], 3], [nil, "NIL"]) + h = pairs.hash_for + pairs.each{|k, v| assert_operator(h, meth, k)} + assert_not_operator(h, meth, HashKey[-6]) + assert_not_operator(h, meth, 3) -assert('Hash#merge', '15.2.13.4.22') do - a = { 'abc_key' => 'abc_value', 'cba_key' => 'cba_value' } - b = { 'cba_key' => 'XXX', 'xyz_key' => 'xyz_value' } + if Object.const_defined?(:Float) + hh = entries.push([-7, :i], [-8.0, :f]).hash_for + assert_not_operator(hh, meth, -7.0) + assert_not_operator(hh, meth, -8) + assert_operator(hh, meth, -8.0) + end - result_1 = a.merge b - result_2 = a.merge(b) do |key, original, new| - original - end + h.shift + assert_not_operator(h, meth, pairs.key(0)) - assert_equal({'abc_key' => 'abc_value', 'cba_key' => 'XXX', - 'xyz_key' => 'xyz_value' }, result_1) - assert_equal({'abc_key' => 'abc_value', 'cba_key' => 'cba_value', - 'xyz_key' => 'xyz_value' }, result_2) + h.delete(pairs.key(3)) + assert_not_operator(h, meth, pairs.key(3)) - assert_raise(TypeError) do - { 'abc_key' => 'abc_value' }.merge "a" + k = HashKey[-31, callback: ->(*){h.clear}] + assert_nothing_crashed{h.__send__(meth, k)} + end end -end -assert('Hash#replace', '15.2.13.4.23') do - a = { 'abc_key' => 'abc_value' } - b = Hash.new.replace(a) + h = Hash.new{|h, k| h[1] = 1} + assert_not_operator(h, meth, 1) +end - assert_equal({ 'abc_key' => 'abc_value' }, b) +[%w[has_value? 14], %w[value? 24]].each do |meth, no| + assert("Hash##{meth}", "15.2.13.4.#{no}") do + [ar_entries, ht_entries].each do |entries| + entries.push([HashKey[-5], -8], ["NIL", nil]) + h = entries.hash_for + entries.each{|k, v| assert_operator(h, meth, v)} + assert_operator(h, meth, -8.0) if Object.const_defined?(:Float) + assert_not_operator(h, meth, "-8") - a = Hash.new(42) - b = {} - b.replace(a) - assert_equal(42, b[1]) + h.shift + assert_not_operator(h, meth, entries.value(0)) - a = Hash.new{|h,x| x} - b.replace(a) - assert_equal(127, b[127]) + h.delete(entries.key(3)) + assert_not_operator(h, meth, entries.value(3)) - assert_raise(TypeError) do - { 'abc_key' => 'abc_value' }.replace "a" + v = HashKey[-31, callback: ->(*){h.clear}] + assert_nothing_crashed{h.__send__(meth, v)} + end end -end - -assert('Hash#shift', '15.2.13.4.24') do - a = { 'abc_key' => 'abc_value', 'cba_key' => 'cba_value' } - b = a.shift - - assert_equal Array, b.class - assert_equal 2, b.size - assert_equal 1, a.size - b = a.shift - - assert_equal Array, b.class - assert_equal 2, b.size - assert_equal 0, a.size + h = Hash.new{|h, k| h[1] = 1} + assert_not_operator(h, meth, 1) end -assert('Hash#size', '15.2.13.4.25') do - a = { 'abc_key' => 'abc_value' } - b = Hash.new - - assert_equal 1, a.size - assert_equal 0, b.size +assert('Hash#initialize', '15.2.13.4.16') do + h = Hash.new + assert_equal(Hash, h.class) + assert_not_operator(h, :key?, 1) + assert_equal(nil, h[1]) + + h = Hash.new([8]) + assert_not_operator(h, :key?, 1) + assert_equal([8], h[1]) + assert_same(h[1], h[2]) + + k = "key" + h = Hash.new{|hash, key| [hash, key]} + assert_not_operator(h, :key?, k) + assert_equal([h, k], h[k]) + assert_same(h, h[k][0]) + assert_same(k, h[k][1]) + + assert_raise(ArgumentError){Hash.new(1,2)} + assert_raise(ArgumentError){Hash.new(1){}} +end + +[%w[keys 19], %w[values 28]].each do |meth, no| + assert("Hash##{meth}", "15.2.13.4.#{no}") do + [ar_entries, ht_entries].each do |entries| + h = entries.hash_for + assert_equal(entries.__send__(meth), h.__send__(meth)) + + h.shift + entries.shift + h.delete(entries.delete_at(3)[0]) + assert_equal(entries.__send__(meth), h.__send__(meth)) + end + + assert_equal([], {}.__send__(meth)) + end end -assert('Hash#store', '15.2.13.4.26') do - a = Hash.new - a.store('abc', 'abc') +[%w[length 20], %w[size 25]].each do |meth, no| + assert("Hash##{meth}", "15.2.13.4.#{no}") do + [ar_entries, ht_entries].each do |entries| + h = entries.hash_for + assert_equal(entries.size, h.__send__(meth)) + + h.shift + entries.shift + h.delete(entries.delete_at(3)[0]) + assert_equal(entries.size, h.__send__(meth)) + end - assert_equal 'abc', a['abc'] + assert_equal(0, Hash.new.__send__(meth)) + end end -assert('Hash#value?', '15.2.13.4.27') do - a = { 'abc_key' => 'abc_value' } - b = Hash.new +assert('Hash#merge', '15.2.13.4.22') do + cls = Class.new(Hash){attr_accessor :foo} + ar_pairs = HashEntries[ + ["id", 32], + [nil, :two], + ["&", "&"], + [:same_key, :AR], + [HashKey[2], 20], + ] + ht_pairs = HashEntries[ + *(1..20).map{[_1, _1.to_s]}, + [:same_key, :HT], + [:age, 32], + [HashKey[5], 500], + ] + + [[ar_pairs, ht_pairs], [ht_pairs, ar_pairs]].each do |entries1, entries2| + h1 = entries1.hash_for(cls.new(:dv1)){|h| h.foo = :iv1}.freeze + h2 = entries2.hash_for(Hash.new(:dv2)).freeze + h3 = h1.merge(h2) + assert_equal(entries1, h1.to_a) + assert_equal(merge_entries!(entries1.dup2, entries2), h3.to_a) + assert_equal(cls, h3.class) + assert_equal(:dv1, h3.default) + assert_equal(:iv1, h3.foo) + + h3 = {}.merge(entries2.hash_for(cls.new)) + assert_equal(merge_entries!([], entries2), h3.to_a) + assert_equal(Hash, h3.class) + + h3 = entries1.hash_for.merge({}) + assert_equal(merge_entries!(entries1.dup2, []), h3.to_a) + + h1 = entries1.hash_for + h2 = entries2.hash_for + h3 = h1.merge(h2){|k, v1, v2| [k, v1, v2]} + exp = merge_entries!(entries1.dup2, entries2) + exp.find{|k, _| k == :same_key}[1] = [ + :same_key, + entries1.find{|k, _| k == :same_key}[1], + entries2.find{|k, _| k == :same_key}[1], + ] + assert_equal(exp, h3.to_a) + + assert_raise(TypeError){entries1.hash_for.merge("str")} + + k2 = HashKey[-2] + entries2 << [k2, 234] + h1, h2 = entries1.hash_for, entries2.hash_for + k2.callback = ->(name, *){h1.clear if name == :eql?} + assert_nothing_crashed{h1.merge(h2)} + h1, h2 = entries1.hash_for, entries2.hash_for + k2.callback = ->(name, *){h2.clear if name == :eql?} + assert_nothing_crashed{h1.merge(h2)} + h1, h2 = entries1.hash_for, entries2.hash_for + k2.callback = ->(name, *){h1.clear if name == :hash} + assert_nothing_crashed{h1.merge(h2)} + h1, h2 = entries1.hash_for, entries2.hash_for + k2.callback = ->(name, *){h2.clear if name == :hash} + assert_nothing_crashed{h1.merge(h2)} + end +end - assert_true a.value?('abc_value') - assert_false b.value?('cba') +assert("Hash#replace", "15.2.13.4.23") do + cls = Class.new(Hash){attr_accessor :foo} + e = [ar_entries, ht_entries] + [e, e.reverse].each do |entries1, entries2| + h1 = entries1.hash_for + assert_same(h1, h1.replace(h1)) + assert_equal(entries1, h1.to_a) + + h1 = {} + assert_same(h1, h1.replace(entries2.hash_for)) + assert_equal(entries2, h1.to_a) + + h1 = entries1.hash_for + assert_same(h1, h1.replace({})) + assert_predicate(h1, :empty?) + + pairs2 = entries2.dup + h2 = pairs2.hash_for + pairs2.shift + h2.shift + h2.delete(pairs2.delete_at(2)[0]) + h2.delete(pairs2.delete_at(4)[0]) + h1 = entries1.hash_for + assert_same(h1, h1.replace(h2)) + assert_equal(pairs2, h1.to_a) + + h1 = entries1.hash_for(Hash.new(10)) + h2 = entries2.hash_for(Hash.new(20)) + assert_same(h1, h1.replace(h2)) + assert_equal(entries2, h1.to_a) + assert_equal(20, h1.default) + + h1 = entries1.hash_for(Hash.new{_2}) + h2 = entries2.hash_for(Hash.new{_2.to_s}) + assert_same(h1, h1.replace(h2)) + assert_equal(entries2, h1.to_a) + assert_equal("-11", h1[-11]) + + h1 = entries1.hash_for(Hash.new(10)) + h2 = entries2.hash_for(Hash.new{_2.to_s}) + assert_same(h1, h1.replace(h2)) + assert_equal(entries2, h1.to_a) + assert_equal("-11", h1[-11]) + + h1 = entries1.hash_for(Hash.new{_2}) + h2 = entries2.hash_for(Hash.new(20)) + assert_same(h1, h1.replace(h2)) + assert_equal(entries2, h1.to_a) + assert_equal(20, h1[-1]) + + h1 = entries1.hash_for(cls.new(10)){|h| h.foo = 41} + h2 = entries2.hash_for(cls.new(20)){|h| h.foo = 42}.freeze + assert_same(h1, h1.replace(h2)) + assert_equal(entries2, h1.to_a) + assert_equal(20, h1.default) + assert_equal(41, h1.foo) + + h1 = entries1.hash_for + h2 = entries2.hash_for(cls.new) + assert_same(h1, h1.replace(h2)) + assert_equal(entries2, h1.to_a) + + assert_raise(TypeError){entries1.hash_for.replace([])} + + k2 = HashKey[-2] + pairs2 = entries2.dup + pairs2 << [k2, 23] + h1 = entries1.hash_for + h2 = pairs2.hash_for + k2.callback = ->(*){h1.clear; h2.clear} + assert_nothing_crashed{h1.replace(h2)} + + assert_raise(FrozenError){h1.freeze.replace(h1)} + assert_raise(FrozenError){{}.freeze.replace({})} + end end -assert('Hash#values', '15.2.13.4.28') do - a = { 'abc_key' => 'abc_value' } +assert('Hash#shift', '15.2.13.4.24') do + [ar_entries, ht_entries].each do |entries| + pairs = entries.dup + h = pairs.hash_for + h.delete(pairs.delete_at(0)[0]) + h.delete(pairs.delete_at(3)[0]) + until pairs.empty? + exp = pairs.shift + act = h.shift + assert_equal(Array, act.class) + assert_equal(exp, act) + assert_equal(exp.size, act.size) + assert_not_operator(h, :key?, exp[0]) + end + + assert_equal(nil, h.shift) + assert_equal(0, h.size) + + h.default = -456 + assert_equal(-456, h.shift) + assert_equal(0, h.size) + + h.freeze + assert_raise(FrozenError){h.shift} + end - assert_equal ['abc_value'], a.values + h = Hash.new{|h, k| [h, k]} + assert_operator(h.shift, :eql?, [h, nil]) + assert_equal(0, h.size) end # Not ISO specified -assert('Hash#eql?') do - a = { 'a' => 1, 'b' => 2, 'c' => 3 } - b = { 'a' => 1, 'b' => 2, 'c' => 3 } - c = { 'a' => 1.0, 'b' => 2, 'c' => 3 } - assert_true(a.eql?(b)) - assert_false(a.eql?(c)) - assert_false(a.eql?(true)) -end - -assert('Hash#reject') do - h = {:one => 1, :two => 2, :three => 3, :four => 4} - ret = h.reject do |k,v| - v % 2 == 0 +%i[reject select].each do |meth| + assert("Hash##{meth}") do + cls = Class.new(Hash){attr_accessor :foo} + [ar_entries, ht_entries].each do |entries| + params = nil + filter = ->((k, v)) do + params << [k, v] + String === k + end + + h = entries.hash_for(cls.new(1)) + params = [] + ret = h.__send__(meth, &filter) + assert_equal(entries, params) + assert_equal(entries, h.to_a) + assert_equal(1, h.default) + assert_equal(entries.__send__(meth, &filter), ret.to_a) + assert_equal(Hash, ret.class) + assert_equal(nil, ret.default) + + params = [] + assert_predicate({}.__send__(meth, &filter), :empty?) + assert_predicate(params, :empty?) + end end - assert_equal({:one => 1, :three => 3}, ret) - assert_equal({:one => 1, :two => 2, :three => 3, :four => 4}, h) end -assert('Hash#reject!') do - h = {:one => 1, :two => 2, :three => 3, :four => 4} - ret = h.reject! do |k,v| - v % 2 == 0 +%i[reject! select!].each do |meth| + assert("Hash##{meth}") do + [ar_entries, ht_entries].each do |entries| + params = nil + filter = ->((k, v)) do + params << [k, v] + String === k + end + + pairs = entries.dup << ["_str", 5] + h = pairs.hash_for(Hash.new(1)) + params = [] + ret = h.__send__(meth, &filter) + assert_same(h, ret) + assert_equal(pairs, params) + assert_equal(pairs.__send__(meth.to_s[0..-2], &filter), h.to_a) + assert_equal(1, h.default) + + h = pairs.hash_for + ret = h.__send__(meth){meth == :select!} + assert_nil(ret) + assert_equal(pairs, h.to_a) + + assert_raise(FrozenError){h.freeze.__send__(meth, &filter)} + end + + h = {} + assert_nil(h.__send__(meth){}) + assert_predicate(h, :empty?) end - assert_equal({:one => 1, :three => 3}, ret) - assert_equal({:one => 1, :three => 3}, h) end -assert('Hash#select') do - h = {:one => 1, :two => 2, :three => 3, :four => 4} - ret = h.select do |k,v| - v % 2 == 0 +%i[inspect to_s].each do |meth| + assert("Hash##{meth}") do + assert_equal('{}', Hash.new.__send__(meth)) + + h1 = {:s => 0, :a => [1,2], 37 => :b, :d => "del", "c" => nil} + h1.shift + h1.delete(:d) + s1 = ':a=>[1, 2], 37=>:b, "c"=>nil' + h2 = Hash.new(100) + + (1..14).each{h2[_1] = _1 * 2} + h2 = {**h2, **h1} + s2 = "1=>2, 2=>4, 3=>6, 4=>8, 5=>10, 6=>12, 7=>14, 8=>16, " \ + "9=>18, 10=>20, 11=>22, 12=>24, 13=>26, 14=>28, #{s1}" + + [[h1, s1], [h2, s2]].each do |h, s| + assert_equal("{#{s}}", h.__send__(meth)) + + hh = {} + hh[:recur] = hh + h.each{|k, v| hh[k] = v} + assert_equal("{:recur=>{...}, #{s}}", hh.__send__(meth)) + + hh = h.dup + hh[hh] = :recur + assert_equal("{#{s}, {...}=>:recur}", hh.__send__(meth)) + end + + [ar_entries, ht_entries].each do |entries| + cls = Class.new do + attr_accessor :h + def inspect; @h.replace(@h.dup); to_s; end + end + v = cls.new + h = entries.hash_for({_k: v}) + v.h = h + assert_nothing_raised{h.__send__(meth)} + end end - assert_equal({:two => 2, :four => 4}, ret) - assert_equal({:one => 1, :two => 2, :three => 3, :four => 4}, h) end -assert('Hash#select!') do - h = {:one => 1, :two => 2, :three => 3, :four => 4} - ret = h.select! do |k,v| - v % 2 == 0 +assert('Hash#rehash') do + cls = Class.new(Hash){attr_accessor :foo} + [ar_entries, ht_entries].each do |entries| + k1, k2, k3 = HashKey[-1], HashKey[-2], HashKey[-3] + pairs = entries.dup.push( + [-4, -40], + [HashKey[-11], -5], + [:_del, "_del"], + [k1, :_k1], + ["_a", "_b"], + [k2, :_k2], + ["_c", "_d"], + [HashKey[-22], -21], + [k3, :_k3], + ) + h = pairs.hash_for(cls.new(:defvar)){|h| h.foo = "f"} + k1.value, k2.value, k3.value = -11, -11, -22 + pairs1 = pairs.dup + pairs1.delete([:_del, h.delete(:_del)]) + exp_pairs1 = pairs1.hash_for.to_a + h.freeze + assert_same(h, h.rehash) + assert_equal(exp_pairs1, h.to_a) + assert_equal(exp_pairs1.size, h.size) + assert_equal(:defvar, h.default) + assert_equal("f", h.foo) + exp_pairs1.each {|k, v| assert_equal(v, h[k])} + + # If an error occurs during rehash, at least the entry list is not broken. + k1.value, k2.value, k3.value = -1, -2, -3 + h = pairs.hash_for + k1.value = -11 + pairs2 = pairs.dup + pairs2.delete([:_del, h.delete(:_del)]) + exp_pairs2 = pairs2.hash_for.to_a + k2.error = :eql? + assert_raise{h.rehash} + act_pairs2 = h.to_a + unless pairs2 == act_pairs2 && pairs2.size == h.size + assert_equal(exp_pairs2, act_pairs2) + assert_equal(exp_pairs2.size, h.size) + end + + k1.value = -1 + k2.error = false + h = pairs.hash_for + k1.callback = ->(name, *){h.clear if name == :eql?} + assert_nothing_crashed{h.rehash} + k1.callback = ->(name, *){h.clear if name == :hash} + assert_nothing_crashed{h.rehash} end - assert_equal({:two => 2, :four => 4}, ret) - assert_equal({:two => 2, :four => 4}, h) -end - -# Not ISO specified - -assert('Hash#inspect') do - h = { "c" => 300, "a" => 100, "d" => 400, "c" => 300 } - h["recur"] = h - ret = h.to_s - assert_include ret, '"c"=>300' - assert_include ret, '"a"=>100' - assert_include ret, '"d"=>400' - assert_include ret, '"recur"=>{...}' + h = {} + assert_same(h, h.rehash) + assert_predicate(h, :empty?) +end + +assert('#eql? receiver should be specified key') do + [ar_entries, ht_entries].each do |entries| + h = entries.hash_for + k0 = HashKey[-99] + h[k0] = 1 + + k1 = HashKey[-3, error: :eql?] + assert_raise{h[k1]} + k0.error = :eql? + k1.error = false + assert_nothing_raised{h[k1]} + + k0.error = false + k1.error = :eql? + assert_raise{h[k1] = 1} + k0.error = :eql? + k1.error = false + assert_nothing_raised{h[k1] = 1} + + k0.error = false + k2 = HashKey[-6, error: :eql?] + assert_raise{h.delete(k2)} + k0.error = :eql? + k2.error = false + assert_nothing_raised{h.delete(k2)} + + k0.error = false + k3 = HashKey[-9, error: :eql?] + %i[has_key? include? key? member?].each do |m| + assert_raise{h.__send__(m, k3)} + end + k0.error = :eql? + k3.error = false + %i[has_key? include? key? member?].each do |m| + assert_nothing_raised{h.__send__(m, k3)} + end + end end -assert('Hash#rehash') do - h = {[:a] => "b"} - # hash key modified - h.keys[0][0] = :b - # h[[:b]] => nil - h.rehash - assert_equal("b", h[[:b]]) -end +assert('#== receiver should be specified value') do + [ar_entries, ht_entries].each do |entries| + h = entries.hash_for + v0 = HashKey[-99] + h[-99] = v0 -assert('Hash#freeze') do - h = {}.freeze - assert_raise(FrozenError) do - h[:a] = 'b' + v1 = HashKey[-3, error: :==] + %i[has_value? value?].each{|m| assert_raise{h.__send__(m, v1)}} + v0.error = :== + v1.error = false + %i[has_value? value?].each{|m| assert_nothing_raised{h.__send__(m, v1)}} end end |
