diff options
Diffstat (limited to 'src/hash.c')
| -rw-r--r-- | src/hash.c | 982 |
1 files changed, 748 insertions, 234 deletions
diff --git a/src/hash.c b/src/hash.c index db9d1d9c8..2a0a19363 100644 --- a/src/hash.c +++ b/src/hash.c @@ -8,7 +8,6 @@ #include <mruby/array.h> #include <mruby/class.h> #include <mruby/hash.h> -#include <mruby/khash.h> #include <mruby/string.h> #include <mruby/variable.h> @@ -17,14 +16,47 @@ mrb_int mrb_float_id(mrb_float f); #endif -static inline khint_t -mrb_hash_ht_hash_func(mrb_state *mrb, mrb_value key) +#ifndef MRB_HT_INIT_SIZE +#define MRB_HT_INIT_SIZE 4 +#endif +#define HT_SEG_INCREASE_RATIO 6 / 5 + +struct segkv { + mrb_value key; + mrb_value val; +}; + +typedef struct segment { + uint16_t size; + struct segment *next; + struct segkv e[]; +} segment; + +typedef struct segindex { + size_t size; + size_t capa; + struct segkv *table[]; +} segindex; + +/* hash table structure */ +typedef struct htable { + segment *rootseg; + segment *lastseg; + mrb_int size; + uint16_t last_len; + segindex *index; +} htable; + +static /* inline */ size_t +ht_hash_func(mrb_state *mrb, htable *t, mrb_value key) { - enum mrb_vtype t = mrb_type(key); + enum mrb_vtype tt = mrb_type(key); mrb_value hv; - khint_t h; + size_t h; + segindex *index = t->index; + size_t capa = index ? index->capa : 0; - switch (t) { + switch (tt) { case MRB_TT_STRING: h = mrb_str_hash(mrb, key); break; @@ -36,23 +68,26 @@ mrb_hash_ht_hash_func(mrb_state *mrb, mrb_value key) #ifndef MRB_WITHOUT_FLOAT case MRB_TT_FLOAT: #endif - h = (khint_t)mrb_obj_id(key); + h = (size_t)mrb_obj_id(key); break; default: hv = mrb_funcall(mrb, key, "hash", 0); - h = (khint_t)t ^ (khint_t)mrb_fixnum(hv); + h = (size_t)t ^ (size_t)mrb_fixnum(hv); break; } - return kh_int_hash_func(mrb, h); + if (index && (index != t->index || capa != index->capa)) { + mrb_raise(mrb, E_RUNTIME_ERROR, "hash modified"); + } + return ((h)^((h)<<2)^((h)>>2)); } -static inline khint_t -mrb_hash_ht_hash_equal(mrb_state *mrb, mrb_value a, mrb_value b) +static inline mrb_bool +ht_hash_equal(mrb_state *mrb, htable *t, mrb_value a, mrb_value b) { - enum mrb_vtype t = mrb_type(a); + enum mrb_vtype tt = mrb_type(a); - switch (t) { + switch (tt) { case MRB_TT_STRING: return mrb_str_equal(mrb, a, b); @@ -85,16 +120,461 @@ mrb_hash_ht_hash_equal(mrb_state *mrb, mrb_value a, mrb_value b) #endif default: - return mrb_eql(mrb, a, b); + { + segindex *index = t->index; + size_t capa = index ? index->capa : 0; + mrb_bool eql = mrb_eql(mrb, a, b); + if (index && (index != t->index || capa != index->capa)) { + mrb_raise(mrb, E_RUNTIME_ERROR, "hash modified"); + } + return eql; + } + } +} + +/* Creates the hash table. */ +static htable* +ht_new(mrb_state *mrb) +{ + htable *t; + + t = (htable*)mrb_malloc(mrb, sizeof(htable)); + t->size = 0; + t->rootseg = NULL; + t->lastseg = NULL; + t->last_len = 0; + t->index = NULL; + + return t; +} + +#define power2(v) do { \ + v--;\ + v |= v >> 1;\ + v |= v >> 2;\ + v |= v >> 4;\ + v |= v >> 8;\ + v |= v >> 16;\ + v++;\ +} while (0) + +#ifndef UPPER_BOUND +#define UPPER_BOUND(x) ((x)>>2|(x)>>1) +#endif + +#define HT_MASK(index) ((index->capa)-1) + +/* Build index for the hash table */ +static void +ht_index(mrb_state *mrb, htable *t) +{ + size_t size = (size_t)t->size; + size_t mask; + segindex *index = t->index; + segment *seg; + size_t i; + + /* allocate index table */ + if (index && index->size >= UPPER_BOUND(index->capa)) { + size = index->capa+1; + } + power2(size); + if (!index || index->capa < size) { + index = (segindex*)mrb_realloc_simple(mrb, index, sizeof(segindex)+sizeof(struct segkv*)*size); + if (index == NULL) { + mrb_free(mrb, t->index); + t->index = NULL; + return; + } + t->index = index; + } + index->size = t->size; + index->capa = size; + for (i=0; i<size; i++) { + index->table[i] = NULL; + } + + /* rebuld index */ + mask = HT_MASK(index); + seg = t->rootseg; + while (seg) { + for (i=0; i<seg->size; i++) { + mrb_value key; + size_t k, step = 0; + + if (!seg->next && i >= (size_t)t->last_len) { + return; + } + key = seg->e[i].key; + if (mrb_undef_p(key)) continue; + k = ht_hash_func(mrb, t, key) & mask; + while (index->table[k]) { + k = (k+(++step)) & mask; + } + index->table[k] = &seg->e[i]; + } + seg = seg->next; + } +} + +/* Compacts the hash table removing deleted entries. */ +static void +ht_compact(mrb_state *mrb, htable *t) +{ + segment *seg; + mrb_int i; + segment *seg2 = NULL; + mrb_int i2; + mrb_int size = 0; + + if (t == NULL) return; + seg = t->rootseg; + if (t->index && (size_t)t->size == t->index->size) { + ht_index(mrb, t); + return; + } + while (seg) { + for (i=0; i<seg->size; i++) { + mrb_value k = seg->e[i].key; + + if (!seg->next && i >= t->last_len) { + goto exit; + } + if (mrb_undef_p(k)) { /* found deleted key */ + if (seg2 == NULL) { + seg2 = seg; + i2 = i; + } + } + else { + size++; + if (seg2 != NULL) { + seg2->e[i2++] = seg->e[i]; + if (i2 >= seg2->size) { + seg2 = seg2->next; + i2 = 0; + } + } + } + } + seg = seg->next; + } + exit: + /* reached at end */ + t->size = size; + if (seg2) { + seg = seg2->next; + seg2->next = NULL; + t->last_len = i2; + t->lastseg = seg2; + while (seg) { + seg2 = seg->next; + mrb_free(mrb, seg); + seg = seg2; + } + } + if (t->index) { + ht_index(mrb, t); + } +} + +static segment* +segment_alloc(mrb_state *mrb, segment *seg) +{ + uint32_t size; + + if (!seg) size = MRB_HT_INIT_SIZE; + else { + size = seg->size*HT_SEG_INCREASE_RATIO + 1; + if (size > UINT16_MAX) size = UINT16_MAX; + } + + seg = (segment*)mrb_malloc(mrb, sizeof(segment)+sizeof(struct segkv)*size); + seg->size = size; + seg->next = NULL; + + return seg; +} + +/* Set the value for the key in the indexed table. */ +static void +ht_index_put(mrb_state *mrb, htable *t, mrb_value key, mrb_value val) +{ + segindex *index = t->index; + size_t k, sp, step = 0, mask; + segment *seg; + + if (index->size >= UPPER_BOUND(index->capa)) { + /* need to expand table */ + ht_compact(mrb, t); + index = t->index; + } + mask = HT_MASK(index); + sp = index->capa; + k = ht_hash_func(mrb, t, key) & mask; + while (index->table[k]) { + mrb_value key2 = index->table[k]->key; + if (mrb_undef_p(key2)) { + if (sp == index->capa) sp = k; + } + else if (ht_hash_equal(mrb, t, key, key2)) { + index->table[k]->val = val; + return; + } + k = (k+(++step)) & mask; + } + if (sp < index->capa) { + k = sp; + } + + /* put the value at the last */ + seg = t->lastseg; + if (t->last_len < seg->size) { + index->table[k] = &seg->e[t->last_len++]; + } + else { /* append a new segment */ + seg->next = segment_alloc(mrb, seg); + seg = seg->next; + seg->next = NULL; + t->lastseg = seg; + t->last_len = 1; + index->table[k] = &seg->e[0]; + } + index->table[k]->key = key; + index->table[k]->val = val; + index->size++; + t->size++; +} + +/* Set the value for the key in the hash table. */ +static void +ht_put(mrb_state *mrb, htable *t, mrb_value key, mrb_value val) +{ + segment *seg; + mrb_int i, deleted = 0; + + if (t == NULL) return; + if (t->index) { + ht_index_put(mrb, t, key, val); + return; + } + seg = t->rootseg; + while (seg) { + for (i=0; i<seg->size; i++) { + mrb_value k = seg->e[i].key; + /* Found room in last segment after last_len */ + if (!seg->next && i >= t->last_len) { + seg->e[i].key = key; + seg->e[i].val = val; + t->last_len = i+1; + t->size++; + return; + } + if (mrb_undef_p(k)) { + deleted++; + continue; + } + if (ht_hash_equal(mrb, t, k, key)) { + seg->e[i].val = val; + return; + } + } + seg = seg->next; + } + /* Not found */ + + /* Compact if last segment has room */ + if (deleted > 0 && deleted > MRB_HT_INIT_SIZE) { + ht_compact(mrb, t); + } + t->size++; + + /* check if thre's room after compaction */ + if (t->lastseg && t->last_len < t->lastseg->size) { + seg = t->lastseg; + i = t->last_len; + } + else { + /* append new segment */ + seg = segment_alloc(mrb, t->lastseg); + i = 0; + if (t->rootseg == NULL) { + t->rootseg = seg; + } + else { + t->lastseg->next = seg; + } + t->lastseg = seg; + } + seg->e[i].key = key; + seg->e[i].val = val; + t->last_len = i+1; + if (t->index == NULL && t->size > MRB_HT_INIT_SIZE*4) { + ht_index(mrb, t); } } -KHASH_DEFINE (ht, mrb_value, mrb_hash_value, TRUE, mrb_hash_ht_hash_func, mrb_hash_ht_hash_equal) +/* Get a value for a key from the indexed table. */ +static mrb_bool +ht_index_get(mrb_state *mrb, htable *t, mrb_value key, mrb_value *vp) +{ + segindex *index = t->index; + size_t mask = HT_MASK(index); + size_t k = ht_hash_func(mrb, t, key) & mask; + size_t step = 0; + + while (index->table[k]) { + mrb_value key2 = index->table[k]->key; + if (!mrb_undef_p(key2) && ht_hash_equal(mrb, t, key, key2)) { + if (vp) *vp = index->table[k]->val; + return TRUE; + } + k = (k+(++step)) & mask; + } + return FALSE; +} + +/* Get a value for a key from the hash table. */ +static mrb_bool +ht_get(mrb_state *mrb, htable *t, mrb_value key, mrb_value *vp) +{ + segment *seg; + mrb_int i; + + if (t == NULL) return FALSE; + if (t->index) { + return ht_index_get(mrb, t, key, vp); + } + + seg = t->rootseg; + while (seg) { + for (i=0; i<seg->size; i++) { + mrb_value k = seg->e[i].key; + + if (!seg->next && i >= t->last_len) { + return FALSE; + } + if (mrb_undef_p(k)) continue; + if (ht_hash_equal(mrb, t, k, key)) { + if (vp) *vp = seg->e[i].val; + return TRUE; + } + } + seg = seg->next; + } + return FALSE; +} + +/* Deletes the value for the symbol from the hash table. */ +/* Deletion is done by overwriting keys by `undef`. */ +static mrb_bool +ht_del(mrb_state *mrb, htable *t, mrb_value key, mrb_value *vp) +{ + segment *seg; + mrb_int i; + + if (t == NULL) return FALSE; + seg = t->rootseg; + while (seg) { + for (i=0; i<seg->size; i++) { + mrb_value key2; + + if (!seg->next && i >= t->last_len) { + /* not found */ + return FALSE; + } + key2 = seg->e[i].key; + if (!mrb_undef_p(key2) && ht_hash_equal(mrb, t, key, key2)) { + if (vp) *vp = seg->e[i].val; + seg->e[i].key = mrb_undef_value(); + t->size--; + return TRUE; + } + } + seg = seg->next; + } + return FALSE; +} + +/* Iterates over the hash table. */ +static void +ht_foreach(mrb_state *mrb, htable *t, mrb_hash_foreach_func *func, void *p) +{ + segment *seg; + mrb_int i; + + if (t == NULL) return; + seg = t->rootseg; + while (seg) { + for (i=0; i<seg->size; i++) { + /* no value in last segment after last_len */ + if (!seg->next && i >= t->last_len) { + return; + } + if (mrb_undef_p(seg->e[i].key)) continue; + if ((*func)(mrb, seg->e[i].key, seg->e[i].val, p) != 0) + return; + } + seg = seg->next; + } +} + +/* Iterates over the hash table. */ +MRB_API void +mrb_hash_foreach(mrb_state *mrb, struct RHash *hash, mrb_hash_foreach_func *func, void *p) +{ + ht_foreach(mrb, hash->ht, func, p); +} + +/* Copy the hash table. */ +static htable* +ht_copy(mrb_state *mrb, htable *t) +{ + segment *seg; + htable *t2; + mrb_int i; + + seg = t->rootseg; + t2 = ht_new(mrb); + if (t->size == 0) return t2; + + while (seg) { + for (i=0; i<seg->size; i++) { + mrb_value key = seg->e[i].key; + mrb_value val = seg->e[i].val; + + if ((seg->next == NULL) && (i >= t->last_len)) { + return t2; + } + if (mrb_undef_p(key)) continue; /* skip deleted key */ + ht_put(mrb, t2, key, val); + } + seg = seg->next; + } + return t2; +} + +/* Free memory of the hash table. */ +static void +ht_free(mrb_state *mrb, htable *t) +{ + segment *seg; + + if (!t) return; + seg = t->rootseg; + while (seg) { + segment *p = seg; + seg = seg->next; + mrb_free(mrb, p); + } + if (t->index) mrb_free(mrb, t->index); + mrb_free(mrb, t); +} static void mrb_hash_modify(mrb_state *mrb, mrb_value hash); static inline mrb_value -mrb_hash_ht_key(mrb_state *mrb, mrb_value key) +ht_key(mrb_state *mrb, mrb_value key) { if (mrb_string_p(key) && !MRB_FROZEN_P(mrb_str_ptr(key))) { key = mrb_str_dup(mrb, key); @@ -103,59 +583,57 @@ mrb_hash_ht_key(mrb_state *mrb, mrb_value key) return key; } -#define KEY(key) mrb_hash_ht_key(mrb, key) +#define KEY(key) ht_key(mrb, key) + +static int +hash_mark_i(mrb_state *mrb, mrb_value key, mrb_value val, void *p) +{ + mrb_gc_mark_value(mrb, key); + mrb_gc_mark_value(mrb, val); + return 0; +} void mrb_gc_mark_hash(mrb_state *mrb, struct RHash *hash) { - khiter_t k; - khash_t(ht) *h = hash->ht; - - if (!h) return; - for (k = kh_begin(h); k != kh_end(h); k++) { - if (kh_exist(h, k)) { - mrb_value key = kh_key(h, k); - mrb_value val = kh_value(h, k).v; - - mrb_gc_mark_value(mrb, key); - mrb_gc_mark_value(mrb, val); - } - } + ht_foreach(mrb, hash->ht, hash_mark_i, NULL); } size_t mrb_gc_mark_hash_size(mrb_state *mrb, struct RHash *hash) { if (!hash->ht) return 0; - return kh_size(hash->ht)*2; + return hash->ht->size*2; } void mrb_gc_free_hash(mrb_state *mrb, struct RHash *hash) { - if (hash->ht) kh_destroy(ht, mrb, hash->ht); + ht_free(mrb, hash->ht); } - MRB_API mrb_value -mrb_hash_new_capa(mrb_state *mrb, mrb_int capa) +mrb_hash_new(mrb_state *mrb) { struct RHash *h; h = (struct RHash*)mrb_obj_alloc(mrb, MRB_TT_HASH, mrb->hash_class); - /* khash needs 1/4 empty space so it is not resized immediately */ - if (capa == 0) - h->ht = 0; - else - h->ht = kh_init_size(ht, mrb, (khint_t)(capa*4/3)); + h->ht = 0; h->iv = 0; return mrb_obj_value(h); } MRB_API mrb_value -mrb_hash_new(mrb_state *mrb) +mrb_hash_new_capa(mrb_state *mrb, mrb_int capa) { - return mrb_hash_new_capa(mrb, 0); + struct RHash *h; + + h = (struct RHash*)mrb_obj_alloc(mrb, MRB_TT_HASH, mrb->hash_class); + /* preallocate hash table */ + h->ht = ht_new(mrb); + /* capacity ignored */ + h->iv = 0; + return mrb_obj_value(h); } static mrb_value mrb_hash_default(mrb_state *mrb, mrb_value hash); @@ -166,7 +644,7 @@ mrb_hash_init_copy(mrb_state *mrb, mrb_value self) { mrb_value orig; struct RHash* copy; - khash_t(ht) *orig_h; + htable *orig_h; mrb_value ifnone, vret; mrb_get_args(mrb, "o", &orig); @@ -177,22 +655,7 @@ mrb_hash_init_copy(mrb_state *mrb, mrb_value self) orig_h = RHASH_TBL(self); copy = (struct RHash*)mrb_obj_alloc(mrb, MRB_TT_HASH, mrb->hash_class); - copy->ht = kh_init(ht, mrb); - - if (orig_h && kh_size(orig_h) > 0) { - khash_t(ht) *copy_h = copy->ht; - khiter_t k, copy_k; - - for (k = kh_begin(orig_h); k != kh_end(orig_h); k++) { - if (kh_exist(orig_h, k)) { - int ai = mrb_gc_arena_save(mrb); - copy_k = kh_put(ht, mrb, copy_h, KEY(kh_key(orig_h, k))); - mrb_gc_arena_restore(mrb, ai); - kh_val(copy_h, copy_k).v = kh_val(orig_h, k).v; - kh_val(copy_h, copy_k).n = kh_size(copy_h)-1; - } - } - } + copy->ht = ht_copy(mrb, orig_h); if (MRB_RHASH_DEFAULT_P(self)) { copy->flags |= MRB_HASH_DEFAULT; @@ -208,17 +671,45 @@ mrb_hash_init_copy(mrb_state *mrb, mrb_value self) return vret; } +static int +check_kdict_i(mrb_state *mrb, mrb_value key, mrb_value val, void *data) +{ + if (!mrb_symbol_p(key)) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "keyword argument hash with non symbol keys"); + } + return 0; +} + +void +mrb_hash_check_kdict(mrb_state *mrb, mrb_value self) +{ + htable *t; + + t = RHASH_TBL(self); + if (!t || t->size == 0) return; + ht_foreach(mrb, t, check_kdict_i, NULL); +} + +MRB_API mrb_value +mrb_hash_dup(mrb_state *mrb, mrb_value self) +{ + struct RHash* copy; + htable *orig_h; + + orig_h = RHASH_TBL(self); + copy = (struct RHash*)mrb_obj_alloc(mrb, MRB_TT_HASH, mrb->hash_class); + copy->ht = orig_h ? ht_copy(mrb, orig_h) : NULL; + return mrb_obj_value(copy); +} + MRB_API mrb_value mrb_hash_get(mrb_state *mrb, mrb_value hash, mrb_value key) { - khash_t(ht) *h = RHASH_TBL(hash); - khiter_t k; + mrb_value val; mrb_sym mid; - if (h) { - k = kh_get(ht, mrb, h, key); - if (k != kh_end(h)) - return kh_value(h, k).v; + if (ht_get(mrb, RHASH_TBL(hash), key, &val)) { + return val; } mid = mrb_intern_lit(mrb, "default"); @@ -232,15 +723,11 @@ mrb_hash_get(mrb_state *mrb, mrb_value hash, mrb_value key) MRB_API mrb_value mrb_hash_fetch(mrb_state *mrb, mrb_value hash, mrb_value key, mrb_value def) { - khash_t(ht) *h = RHASH_TBL(hash); - khiter_t k; + mrb_value val; - if (h) { - k = kh_get(ht, mrb, h, key); - if (k != kh_end(h)) - return kh_value(h, k).v; + if (ht_get(mrb, RHASH_TBL(hash), key, &val)) { + return val; } - /* not found */ return def; } @@ -248,54 +735,22 @@ mrb_hash_fetch(mrb_state *mrb, mrb_value hash, mrb_value key, mrb_value def) MRB_API void mrb_hash_set(mrb_state *mrb, mrb_value hash, mrb_value key, mrb_value val) { - khash_t(ht) *h; - khiter_t k; - int r; - mrb_hash_modify(mrb, hash); - h = RHASH_TBL(hash); - - if (!h) h = RHASH_TBL(hash) = kh_init(ht, mrb); - k = kh_put2(ht, mrb, h, key, &r); - kh_value(h, k).v = val; - - if (r != 0) { - /* expand */ - int ai = mrb_gc_arena_save(mrb); - key = kh_key(h, k) = KEY(key); - mrb_gc_arena_restore(mrb, ai); - kh_value(h, k).n = kh_size(h)-1; - } + key = KEY(key); + ht_put(mrb, RHASH_TBL(hash), key, val); mrb_field_write_barrier_value(mrb, (struct RBasic*)RHASH(hash), key); mrb_field_write_barrier_value(mrb, (struct RBasic*)RHASH(hash), val); return; } -MRB_API mrb_value -mrb_check_hash_type(mrb_state *mrb, mrb_value hash) -{ - return mrb_check_convert_type(mrb, hash, MRB_TT_HASH, "Hash", "to_hash"); -} - -MRB_API khash_t(ht)* -mrb_hash_tbl(mrb_state *mrb, mrb_value hash) -{ - khash_t(ht) *h = RHASH_TBL(hash); - - if (!h) { - return RHASH_TBL(hash) = kh_init(ht, mrb); - } - return h; -} - static void mrb_hash_modify(mrb_state *mrb, mrb_value hash) { - if (MRB_FROZEN_P(mrb_hash_ptr(hash))) { - mrb_raise(mrb, E_FROZEN_ERROR, "can't modify frozen hash"); + mrb_check_frozen(mrb, mrb_hash_ptr(hash)); + if (!RHASH_TBL(hash)) { + RHASH_TBL(hash) = ht_new(mrb); } - mrb_hash_tbl(mrb, hash); } /* 15.2.13.4.16 */ @@ -535,47 +990,17 @@ mrb_hash_set_default_proc(mrb_state *mrb, mrb_value hash) MRB_API mrb_value mrb_hash_delete_key(mrb_state *mrb, mrb_value hash, mrb_value key) { - khash_t(ht) *h = RHASH_TBL(hash); - khiter_t k; - mrb_value delVal; - mrb_int n; - - if (h) { - k = kh_get(ht, mrb, h, key); - if (k != kh_end(h)) { - delVal = kh_value(h, k).v; - n = kh_value(h, k).n; - kh_del(ht, mrb, h, k); - for (k = kh_begin(h); k != kh_end(h); k++) { - if (!kh_exist(h, k)) continue; - if (kh_value(h, k).n > n) kh_value(h, k).n--; - } - return delVal; - } + htable *t = RHASH_TBL(hash); + mrb_value del_val; + + if (ht_del(mrb, t, key, &del_val)) { + return del_val; } /* not found */ return mrb_nil_value(); } -/* 15.2.13.4.8 */ -/* - * call-seq: - * hsh.delete(key) -> value - * hsh.delete(key) {| key | block } -> value - * - * Deletes and returns a key-value pair from <i>hsh</i> whose key is - * equal to <i>key</i>. If the key is not found, returns the - * <em>default value</em>. If the optional code block is given and the - * key is not found, pass in the key and return the result of - * <i>block</i>. - * - * h = { "a" => 100, "b" => 200 } - * h.delete("a") #=> 100 - * h.delete("z") #=> nil - * h.delete("z") { |el| "#{el} not found" } #=> "z not found" - * - */ static mrb_value mrb_hash_delete(mrb_state *mrb, mrb_value self) { @@ -586,6 +1011,33 @@ mrb_hash_delete(mrb_state *mrb, mrb_value self) return mrb_hash_delete_key(mrb, self, key); } +/* find first element in the hash table, and remove it. */ +static void +ht_shift(mrb_state *mrb, htable *t, mrb_value *kp, mrb_value *vp) +{ + segment *seg = t->rootseg; + mrb_int i; + + while (seg) { + for (i=0; i<seg->size; i++) { + mrb_value key; + + if (!seg->next && i >= t->last_len) { + return; + } + key = seg->e[i].key; + if (mrb_undef_p(key)) continue; + *kp = key; + *vp = seg->e[i].val; + /* delete element */ + seg->e[i].key = mrb_undef_value(); + t->size--; + return; + } + seg = seg->next; + } +} + /* 15.2.13.4.24 */ /* * call-seq: @@ -603,22 +1055,16 @@ mrb_hash_delete(mrb_state *mrb, mrb_value self) static mrb_value mrb_hash_shift(mrb_state *mrb, mrb_value hash) { - khash_t(ht) *h = RHASH_TBL(hash); - khiter_t k; - mrb_value delKey, delVal; + htable *t = RHASH_TBL(hash); mrb_hash_modify(mrb, hash); - if (h && kh_size(h) > 0) { - for (k = kh_begin(h); k != kh_end(h); k++) { - if (!kh_exist(h, k)) continue; - - delKey = kh_key(h, k); - mrb_gc_protect(mrb, delKey); - delVal = mrb_hash_delete_key(mrb, hash, delKey); - mrb_gc_protect(mrb, delVal); + if (t && t->size > 0) { + mrb_value del_key, del_val; - return mrb_assoc_new(mrb, delKey, delVal); - } + ht_shift(mrb, t, &del_key, &del_val); + mrb_gc_protect(mrb, del_key); + mrb_gc_protect(mrb, del_val); + return mrb_assoc_new(mrb, del_key, del_val); } if (MRB_RHASH_DEFAULT_P(hash)) { @@ -647,10 +1093,13 @@ mrb_hash_shift(mrb_state *mrb, mrb_value hash) MRB_API mrb_value mrb_hash_clear(mrb_state *mrb, mrb_value hash) { - khash_t(ht) *h = RHASH_TBL(hash); + htable *t = RHASH_TBL(hash); mrb_hash_modify(mrb, hash); - if (h) kh_clear(ht, mrb, h); + if (t) { + ht_free(mrb, t); + RHASH_TBL(hash) = NULL; + } return hash; } @@ -683,6 +1132,15 @@ mrb_hash_aset(mrb_state *mrb, mrb_value self) return val; } +MRB_API mrb_int +mrb_hash_size(mrb_state *mrb, mrb_value hash) +{ + htable *t = RHASH_TBL(hash); + + if (!t) return 0; + return t->size; +} + /* 15.2.13.4.20 */ /* 15.2.13.4.25 */ /* @@ -700,10 +1158,17 @@ mrb_hash_aset(mrb_state *mrb, mrb_value self) static mrb_value mrb_hash_size_m(mrb_state *mrb, mrb_value self) { - khash_t(ht) *h = RHASH_TBL(self); + mrb_int size = mrb_hash_size(mrb, self); + return mrb_fixnum_value(size); +} + +MRB_API mrb_bool +mrb_hash_empty_p(mrb_state *mrb, mrb_value self) +{ + htable *t = RHASH_TBL(self); - if (!h) return mrb_fixnum_value(0); - return mrb_fixnum_value(kh_size(h)); + if (!t) return TRUE; + return t->size == 0; } /* 15.2.13.4.12 */ @@ -716,27 +1181,17 @@ mrb_hash_size_m(mrb_state *mrb, mrb_value self) * {}.empty? #=> true * */ -MRB_API mrb_value -mrb_hash_empty_p(mrb_state *mrb, mrb_value self) +static mrb_value +mrb_hash_empty_m(mrb_state *mrb, mrb_value self) { - khash_t(ht) *h = RHASH_TBL(self); - - if (h) return mrb_bool_value(kh_size(h) == 0); - return mrb_true_value(); + return mrb_bool_value(mrb_hash_empty_p(mrb, self)); } -/* 15.2.13.4.29 (x)*/ -/* - * call-seq: - * hsh.to_hash => hsh - * - * Returns +self+. - */ - -static mrb_value -mrb_hash_to_hash(mrb_state *mrb, mrb_value hash) +static int +hash_keys_i(mrb_state *mrb, mrb_value key, mrb_value val, void *p) { - return hash; + mrb_ary_push(mrb, *(mrb_value*)p, key); + return 0; } /* 15.2.13.4.19 */ @@ -755,33 +1210,24 @@ mrb_hash_to_hash(mrb_state *mrb, mrb_value hash) MRB_API mrb_value mrb_hash_keys(mrb_state *mrb, mrb_value hash) { - khash_t(ht) *h = RHASH_TBL(hash); - khiter_t k; - mrb_int end; + htable *t = RHASH_TBL(hash); + mrb_int size; mrb_value ary; - mrb_value *p; - - if (!h || kh_size(h) == 0) return mrb_ary_new(mrb); - ary = mrb_ary_new_capa(mrb, kh_size(h)); - end = kh_size(h)-1; - mrb_ary_set(mrb, ary, end, mrb_nil_value()); - p = RARRAY_PTR(ary); - for (k = kh_begin(h); k != kh_end(h); k++) { - if (kh_exist(h, k)) { - mrb_value kv = kh_key(h, k); - mrb_hash_value hv = kh_value(h, k); - - if (hv.n <= end) { - p[hv.n] = kv; - } - else { - p[end] = kv; - } - } - } + + if (!t || (size = t->size) == 0) + return mrb_ary_new(mrb); + ary = mrb_ary_new_capa(mrb, size); + ht_foreach(mrb, t, hash_keys_i, (void*)&ary); return ary; } +static int +hash_vals_i(mrb_state *mrb, mrb_value key, mrb_value val, void *p) +{ + mrb_ary_push(mrb, *(mrb_value*)p, val); + return 0; +} + /* 15.2.13.4.28 */ /* * call-seq: @@ -798,19 +1244,14 @@ mrb_hash_keys(mrb_state *mrb, mrb_value hash) MRB_API mrb_value mrb_hash_values(mrb_state *mrb, mrb_value hash) { - khash_t(ht) *h = RHASH_TBL(hash); - khiter_t k; + htable *t = RHASH_TBL(hash); + mrb_int size; mrb_value ary; - if (!h) return mrb_ary_new(mrb); - ary = mrb_ary_new_capa(mrb, kh_size(h)); - for (k = kh_begin(h); k != kh_end(h); k++) { - if (kh_exist(h, k)) { - mrb_hash_value hv = kh_value(h, k); - - mrb_ary_set(mrb, ary, hv.n, hv.v); - } - } + if (!t || (size = t->size) == 0) + return mrb_ary_new(mrb); + ary = mrb_ary_new_capa(mrb, size); + ht_foreach(mrb, t, hash_vals_i, (void*)&ary); return ary; } @@ -833,21 +1274,44 @@ mrb_hash_values(mrb_state *mrb, mrb_value hash) * */ +MRB_API mrb_bool +mrb_hash_key_p(mrb_state *mrb, mrb_value hash, mrb_value key) +{ + htable *t; + + t = RHASH_TBL(hash); + if (ht_get(mrb, t, key, NULL)) { + return TRUE; + } + return FALSE; +} + static mrb_value mrb_hash_has_key(mrb_state *mrb, mrb_value hash) { mrb_value key; - khash_t(ht) *h; - khiter_t k; + mrb_bool key_p; mrb_get_args(mrb, "o", &key); + key_p = mrb_hash_key_p(mrb, hash, key); + return mrb_bool_value(key_p); +} - h = RHASH_TBL(hash); - if (h) { - k = kh_get(ht, mrb, h, key); - return mrb_bool_value(k != kh_end(h)); +struct has_v_arg { + mrb_bool found; + mrb_value val; +}; + +static int +hash_has_value_i(mrb_state *mrb, mrb_value key, mrb_value val, void *p) +{ + struct has_v_arg *arg = (struct has_v_arg*)p; + + if (mrb_equal(mrb, arg->val, val)) { + arg->found = TRUE; + return 1; } - return mrb_false_value(); + return 0; } /* 15.2.13.4.14 */ @@ -869,22 +1333,73 @@ static mrb_value mrb_hash_has_value(mrb_state *mrb, mrb_value hash) { mrb_value val; - khash_t(ht) *h; - khiter_t k; - + struct has_v_arg arg; + mrb_get_args(mrb, "o", &val); - h = RHASH_TBL(hash); + arg.found = FALSE; + arg.val = val; + ht_foreach(mrb, RHASH_TBL(hash), hash_has_value_i, &arg); + return mrb_bool_value(arg.found); +} - if (h) { - for (k = kh_begin(h); k != kh_end(h); k++) { - if (!kh_exist(h, k)) continue; +static int +merge_i(mrb_state *mrb, mrb_value key, mrb_value val, void *data) +{ + htable *h1 = (htable*)data; - if (mrb_equal(mrb, kh_value(h, k).v, val)) { - return mrb_true_value(); - } - } + ht_put(mrb, h1, key, val); + return 0; +} + +MRB_API void +mrb_hash_merge(mrb_state *mrb, mrb_value hash1, mrb_value hash2) +{ + htable *h1, *h2; + + mrb_hash_modify(mrb, hash1); + hash2 = mrb_ensure_hash_type(mrb, hash2); + h1 = RHASH_TBL(hash1); + h2 = RHASH_TBL(hash2); + + if (!h2) return; + if (!h1) { + RHASH_TBL(hash1) = ht_copy(mrb, h2); + return; } - return mrb_false_value(); + ht_foreach(mrb, h2, merge_i, h1); + mrb_write_barrier(mrb, (struct RBasic*)RHASH(hash1)); + return; +} + +/* + * call-seq: + * hsh.rehash -> hsh + * + * Rebuilds the hash based on the current hash values for each key. If + * values of key objects have changed since they were inserted, this + * method will reindex <i>hsh</i>. + * + * keys = (1..17).map{|n| [n]} + * k = keys[0] + * h = {} + * keys.each{|key| h[key] = key[0]} + * h #=> { [1]=> 1, [2]=> 2, [3]=> 3, [4]=> 4, [5]=> 5, [6]=> 6, [7]=> 7, + * [8]=> 8, [9]=> 9,[10]=>10,[11]=>11,[12]=>12,[13]=>13,[14]=>14, + * [15]=>15,[16]=>16,[17]=>17} + * h[k] #=> 1 + * k[0] = keys.size + 1 + * h #=> {[18]=> 1, [2]=> 2, [3]=> 3, [4]=> 4, [5]=> 5, [6]=> 6, [7]=> 7, + * [8]=> 8, [9]=> 9,[10]=>10,[11]=>11,[12]=>12,[13]=>13,[14]=>14, + * [15]=>15,[16]=>16,[17]=>17} + * h[k] #=> nil + * h.rehash + * h[k] #=> 1 + */ +static mrb_value +mrb_hash_rehash(mrb_state *mrb, mrb_value self) +{ + ht_compact(mrb, RHASH_TBL(self)); + return self; } void @@ -904,7 +1419,7 @@ mrb_init_hash(mrb_state *mrb) mrb_define_method(mrb, h, "default_proc", mrb_hash_default_proc,MRB_ARGS_NONE()); /* 15.2.13.4.7 */ mrb_define_method(mrb, h, "default_proc=", mrb_hash_set_default_proc,MRB_ARGS_REQ(1)); /* 15.2.13.4.7 */ mrb_define_method(mrb, h, "__delete", mrb_hash_delete, MRB_ARGS_REQ(1)); /* core of 15.2.13.4.8 */ - mrb_define_method(mrb, h, "empty?", mrb_hash_empty_p, MRB_ARGS_NONE()); /* 15.2.13.4.12 */ + mrb_define_method(mrb, h, "empty?", mrb_hash_empty_m, MRB_ARGS_NONE()); /* 15.2.13.4.12 */ mrb_define_method(mrb, h, "has_key?", mrb_hash_has_key, MRB_ARGS_REQ(1)); /* 15.2.13.4.13 */ mrb_define_method(mrb, h, "has_value?", mrb_hash_has_value, MRB_ARGS_REQ(1)); /* 15.2.13.4.14 */ mrb_define_method(mrb, h, "include?", mrb_hash_has_key, MRB_ARGS_REQ(1)); /* 15.2.13.4.15 */ @@ -918,6 +1433,5 @@ mrb_init_hash(mrb_state *mrb) mrb_define_method(mrb, h, "store", mrb_hash_aset, MRB_ARGS_REQ(2)); /* 15.2.13.4.26 */ mrb_define_method(mrb, h, "value?", mrb_hash_has_value, MRB_ARGS_REQ(1)); /* 15.2.13.4.27 */ mrb_define_method(mrb, h, "values", mrb_hash_values, MRB_ARGS_NONE()); /* 15.2.13.4.28 */ - - mrb_define_method(mrb, h, "to_hash", mrb_hash_to_hash, MRB_ARGS_NONE()); /* 15.2.13.4.29 (x)*/ + mrb_define_method(mrb, h, "rehash", mrb_hash_rehash, MRB_ARGS_NONE()); } |
