From 8c9e7127845f84fcbb249c45936c97a89ca7a80a Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Mon, 30 Jul 2018 22:07:31 +0900 Subject: Keyword argument implemented. --- src/hash.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 75 insertions(+), 11 deletions(-) (limited to 'src/hash.c') diff --git a/src/hash.c b/src/hash.c index db9d1d9c8..0dce81677 100644 --- a/src/hash.c +++ b/src/hash.c @@ -208,6 +208,54 @@ mrb_hash_init_copy(mrb_state *mrb, mrb_value self) return vret; } +void +mrb_hash_check_kdict(mrb_state *mrb, mrb_value self) +{ + khash_t(ht) *orig_h; + khiter_t k; + int nosym = FALSE; + + orig_h = RHASH_TBL(self); + if (!orig_h || kh_size(orig_h) == 0) return; + for (k = kh_begin(orig_h); k != kh_end(orig_h); k++) { + if (kh_exist(orig_h, k)) { + mrb_value key = kh_key(orig_h, k); + + if (!mrb_symbol_p(key)) nosym = TRUE; + } + } + if (nosym) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "keyword argument hash with non symbol keys"); + } +} + +MRB_API mrb_value +mrb_hash_dup(mrb_state *mrb, mrb_value self) +{ + struct RHash* copy; + khash_t(ht) *orig_h; + + orig_h = RHASH_TBL(self); + copy = (struct RHash*)mrb_obj_alloc(mrb, MRB_TT_HASH, mrb->hash_class); + copy->ht = kh_init(ht, mrb); + + if (orig_h && kh_size(orig_h) > 0) { + int ai = mrb_gc_arena_save(mrb); + khash_t(ht) *copy_h = copy->ht; + khiter_t k, copy_k; + + for (k = kh_begin(orig_h); k != kh_end(orig_h); k++) { + if (kh_exist(orig_h, k)) { + copy_k = kh_put(ht, mrb, copy_h, KEY(kh_key(orig_h, k))); + mrb_gc_arena_restore(mrb, ai); + kh_val(copy_h, copy_k).v = kh_val(orig_h, k).v; + kh_val(copy_h, copy_k).n = kh_size(copy_h)-1; + } + } + } + return mrb_obj_value(copy); +} + MRB_API mrb_value mrb_hash_get(mrb_state *mrb, mrb_value hash, mrb_value key) { @@ -716,13 +764,21 @@ mrb_hash_size_m(mrb_state *mrb, mrb_value self) * {}.empty? #=> true * */ -MRB_API mrb_value +MRB_API mrb_bool mrb_hash_empty_p(mrb_state *mrb, mrb_value self) { khash_t(ht) *h = RHASH_TBL(self); - if (h) return mrb_bool_value(kh_size(h) == 0); - return mrb_true_value(); + if (h) return kh_size(h) == 0; + return TRUE; +} + +static mrb_value +mrb_hash_empty_m(mrb_state *mrb, mrb_value self) +{ + if (mrb_hash_empty_p(mrb, self)) + return mrb_true_value(); + return mrb_false_value(); } /* 15.2.13.4.29 (x)*/ @@ -833,21 +889,29 @@ mrb_hash_values(mrb_state *mrb, mrb_value hash) * */ -static mrb_value -mrb_hash_has_key(mrb_state *mrb, mrb_value hash) +MRB_API mrb_bool +mrb_hash_key_p(mrb_state *mrb, mrb_value hash, mrb_value key) { - mrb_value key; khash_t(ht) *h; khiter_t k; - mrb_get_args(mrb, "o", &key); - h = RHASH_TBL(hash); if (h) { k = kh_get(ht, mrb, h, key); - return mrb_bool_value(k != kh_end(h)); + return k != kh_end(h); } - return mrb_false_value(); + return FALSE; +} + +static mrb_value +mrb_hash_has_key(mrb_state *mrb, mrb_value hash) +{ + mrb_value key; + mrb_bool key_p; + + mrb_get_args(mrb, "o", &key); + key_p = mrb_hash_key_p(mrb, hash, key); + return mrb_bool_value(key_p); } /* 15.2.13.4.14 */ @@ -904,7 +968,7 @@ mrb_init_hash(mrb_state *mrb) mrb_define_method(mrb, h, "default_proc", mrb_hash_default_proc,MRB_ARGS_NONE()); /* 15.2.13.4.7 */ mrb_define_method(mrb, h, "default_proc=", mrb_hash_set_default_proc,MRB_ARGS_REQ(1)); /* 15.2.13.4.7 */ mrb_define_method(mrb, h, "__delete", mrb_hash_delete, MRB_ARGS_REQ(1)); /* core of 15.2.13.4.8 */ - mrb_define_method(mrb, h, "empty?", mrb_hash_empty_p, MRB_ARGS_NONE()); /* 15.2.13.4.12 */ + mrb_define_method(mrb, h, "empty?", mrb_hash_empty_m, MRB_ARGS_NONE()); /* 15.2.13.4.12 */ mrb_define_method(mrb, h, "has_key?", mrb_hash_has_key, MRB_ARGS_REQ(1)); /* 15.2.13.4.13 */ mrb_define_method(mrb, h, "has_value?", mrb_hash_has_value, MRB_ARGS_REQ(1)); /* 15.2.13.4.14 */ mrb_define_method(mrb, h, "include?", mrb_hash_has_key, MRB_ARGS_REQ(1)); /* 15.2.13.4.15 */ -- cgit v1.2.3 From 3554fc54de68e2cf92121147df071b57b50663b1 Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Sat, 25 Aug 2018 09:08:16 +0900 Subject: Add a new function `mrb_hash_merge()`. --- include/mruby/hash.h | 10 ++++++++++ src/hash.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) (limited to 'src/hash.c') diff --git a/include/mruby/hash.h b/include/mruby/hash.h index 9a3812850..2026c8e0d 100644 --- a/include/mruby/hash.h +++ b/include/mruby/hash.h @@ -174,6 +174,16 @@ MRB_API mrb_value mrb_hash_clear(mrb_state *mrb, mrb_value hash); */ MRB_API mrb_value mrb_hash_dup(mrb_state *mrb, mrb_value hash); +/* + * Merges two hashes. The first hash will be modified by the + * second hash. + * + * @param mrb The mruby state reference. + * @param hash1 The target hash. + * @param hash2 Updating hash + */ +MRB_API void mrb_hash_merge(mrb_state *mrb, mrb_value hash1, mrb_value hash2); + /* declaration of struct kh_ht */ /* be careful when you touch the internal */ typedef struct { diff --git a/src/hash.c b/src/hash.c index 0dce81677..122f5b5d5 100644 --- a/src/hash.c +++ b/src/hash.c @@ -951,6 +951,39 @@ mrb_hash_has_value(mrb_state *mrb, mrb_value hash) return mrb_false_value(); } +MRB_API void +mrb_hash_merge(mrb_state *mrb, mrb_value hash1, mrb_value hash2) +{ + khash_t(ht) *h1; + khash_t(ht) *h2; + khiter_t k; + + mrb_hash_modify(mrb, hash1); + hash2 = mrb_check_hash_type(mrb, hash2); + h1 = RHASH_TBL(hash1); + h2 = RHASH_TBL(hash2); + + if (!h1) { + RHASH_TBL(hash1) = kh_copy(ht, mrb, h2); + return; + } + for (k = kh_begin(h2); k != kh_end(h2); k++) { + khiter_t k1; + int r; + + if (!kh_exist(h2, k)) continue; + k1 = kh_put2(ht, mrb, h1, kh_key(h2, k), &r); + kh_value(h1, k1).v = kh_value(h2,k).v; + if (r != 0) { + /* expand */ + kh_key(h1, k1) = kh_key(h2, k); + kh_value(h1, k1).n = kh_size(h1)-1; + } + } + mrb_write_barrier(mrb, (struct RBasic*)RHASH(hash1)); + return; +} + void mrb_init_hash(mrb_state *mrb) { -- cgit v1.2.3 From f6564dd83e058fee6b264b5ce6db6d868c22d94a Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Thu, 30 Aug 2018 22:14:10 +0900 Subject: Add new function `mrb_ensure_hash_type()`; ref #4097 Unlike `mrb_check_hash_type()` that returns `nil` if the argument is not a `Hash`, `mrb_ensure_hash_type()` raises a `TypeError` exception. --- include/mruby/hash.h | 1 + src/hash.c | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'src/hash.c') diff --git a/include/mruby/hash.h b/include/mruby/hash.h index 2026c8e0d..449a7d4bc 100644 --- a/include/mruby/hash.h +++ b/include/mruby/hash.h @@ -25,6 +25,7 @@ struct RHash { #define mrb_hash_value(p) mrb_obj_value((void*)(p)) MRB_API mrb_value mrb_hash_new_capa(mrb_state*, mrb_int); +MRB_API mrb_value mrb_ensure_hash_type(mrb_state *mrb, mrb_value hash); MRB_API mrb_value mrb_check_hash_type(mrb_state *mrb, mrb_value hash); /* diff --git a/src/hash.c b/src/hash.c index 122f5b5d5..7d81bc343 100644 --- a/src/hash.c +++ b/src/hash.c @@ -320,6 +320,12 @@ mrb_hash_set(mrb_state *mrb, mrb_value hash, mrb_value key, mrb_value val) return; } +MRB_API mrb_value +mrb_ensure_hash_type(mrb_state *mrb, mrb_value hash) +{ + return mrb_convert_type(mrb, hash, MRB_TT_HASH, "Hash", "to_hash"); +} + MRB_API mrb_value mrb_check_hash_type(mrb_state *mrb, mrb_value hash) { @@ -959,7 +965,7 @@ mrb_hash_merge(mrb_state *mrb, mrb_value hash1, mrb_value hash2) khiter_t k; mrb_hash_modify(mrb, hash1); - hash2 = mrb_check_hash_type(mrb, hash2); + hash2 = mrb_ensure_hash_type(mrb, hash2); h1 = RHASH_TBL(hash1); h2 = RHASH_TBL(hash2); -- cgit v1.2.3 From 0b44e802a95101bb7c5849382d669cf6cf4f2680 Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Thu, 6 Sep 2018 23:30:32 +0900 Subject: Need to check if merging hash is empty; fix #4107 --- src/hash.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/hash.c') diff --git a/src/hash.c b/src/hash.c index 7d81bc343..f6b61f4e1 100644 --- a/src/hash.c +++ b/src/hash.c @@ -969,6 +969,7 @@ mrb_hash_merge(mrb_state *mrb, mrb_value hash1, mrb_value hash2) h1 = RHASH_TBL(hash1); h2 = RHASH_TBL(hash2); + if (!h2) return; if (!h1) { RHASH_TBL(hash1) = kh_copy(ht, mrb, h2); return; -- cgit v1.2.3 From e8dcfe17454eb88f266f90cf31ca562e30378291 Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Mon, 6 Aug 2018 16:38:38 +0900 Subject: Use segmented list to implement `Hash` [Experimental] I know it's not hash at all, but reduce memory consumption. --- include/mruby/hash.h | 5 +- src/hash.c | 634 ++++++++++++++++++++++++++++++++------------------- 2 files changed, 400 insertions(+), 239 deletions(-) (limited to 'src/hash.c') diff --git a/include/mruby/hash.h b/include/mruby/hash.h index 449a7d4bc..dacdd9376 100644 --- a/include/mruby/hash.h +++ b/include/mruby/hash.h @@ -18,7 +18,7 @@ MRB_BEGIN_DECL struct RHash { MRB_OBJECT_HEADER; struct iv_tbl *iv; - struct kh_ht *ht; + struct seglist *ht; }; #define mrb_hash_ptr(v) ((struct RHash*)(mrb_ptr(v))) @@ -76,7 +76,7 @@ MRB_API mrb_value mrb_hash_get(mrb_state *mrb, mrb_value hash, mrb_value key); * * Equivalent to: * - * hash.hash_key?(key) ? hash[key] : def + * hash.key?(key) ? hash[key] : def * * @param mrb The mruby state reference. * @param hash The target hash. @@ -199,7 +199,6 @@ KHASH_DECLARE(ht, mrb_value, mrb_hash_value, TRUE) #define RHASH_TBL(h) (RHASH(h)->ht) #define RHASH_IFNONE(h) mrb_iv_get(mrb, (h), mrb_intern_lit(mrb, "ifnone")) #define RHASH_PROCDEFAULT(h) RHASH_IFNONE(h) -MRB_API struct kh_ht * mrb_hash_tbl(mrb_state *mrb, mrb_value hash); #define MRB_HASH_DEFAULT 1 #define MRB_HASH_PROC_DEFAULT 2 diff --git a/src/hash.c b/src/hash.c index f6b61f4e1..16d49af24 100644 --- a/src/hash.c +++ b/src/hash.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include @@ -47,7 +46,7 @@ mrb_hash_ht_hash_func(mrb_state *mrb, mrb_value key) return kh_int_hash_func(mrb, h); } -static inline khint_t +static inline mrb_bool mrb_hash_ht_hash_equal(mrb_state *mrb, mrb_value a, mrb_value b) { enum mrb_vtype t = mrb_type(a); @@ -89,7 +88,251 @@ mrb_hash_ht_hash_equal(mrb_state *mrb, mrb_value a, mrb_value b) } } -KHASH_DEFINE (ht, mrb_value, mrb_hash_value, TRUE, mrb_hash_ht_hash_func, mrb_hash_ht_hash_equal) +/* return non zero to break the loop */ +typedef int (sg_foreach_func)(mrb_state *mrb,mrb_value key,mrb_value val, void *data); + +#ifndef MRB_SG_SEGMENT_SIZE +#define MRB_SG_SEGMENT_SIZE 5 +#endif + +typedef struct segment { + mrb_value key[MRB_SG_SEGMENT_SIZE]; + mrb_value val[MRB_SG_SEGMENT_SIZE]; + struct segment *next; +} segment; + +/* Instance variable table structure */ +typedef struct seglist { + segment *rootseg; + size_t size; + size_t last_len; +} seglist; + +/* Creates the instance variable table. */ +static seglist* +sg_new(mrb_state *mrb) +{ + seglist *t; + + t = (seglist*)mrb_malloc(mrb, sizeof(seglist)); + t->size = 0; + t->rootseg = NULL; + t->last_len = 0; + + return t; +} + +/* Set the value for the symbol in the instance variable table. */ +static void +sg_put(mrb_state *mrb, seglist *t, mrb_value key, mrb_value val) +{ + segment *seg; + segment *prev = NULL; + size_t i; + + if (t == NULL) return; + seg = t->rootseg; + while (seg) { + for (i=0; ikey[i]; + /* Found room in last segment after last_len */ + if (!seg->next && i >= t->last_len) { + seg->key[i] = key; + seg->val[i] = val; + t->last_len = i+1; + t->size++; + return; + } + if (mrb_hash_ht_hash_equal(mrb, k, key)) { + seg->val[i] = val; + return; + } + } + prev = seg; + seg = seg->next; + } + + /* Not found */ + t->size++; + + seg = (segment*)mrb_malloc(mrb, sizeof(segment)); + if (!seg) return; + seg->next = NULL; + seg->key[0] = key; + seg->val[0] = val; + t->last_len = 1; + if (prev) { + prev->next = seg; + } + else { + t->rootseg = seg; + } +} + +/* Get a value for a symbol from the instance variable table. */ +static mrb_bool +sg_get(mrb_state *mrb, seglist *t, mrb_value key, mrb_value *vp) +{ + segment *seg; + size_t i; + + if (t == NULL) return FALSE; + seg = t->rootseg; + while (seg) { + for (i=0; ikey[i]; + + if (!seg->next && i >= t->last_len) { + return FALSE; + } + if (mrb_hash_ht_hash_equal(mrb, k, key)) { + if (vp) *vp = seg->val[i]; + return TRUE; + } + } + seg = seg->next; + } + return FALSE; +} + +/* Deletes the value for the symbol from the instance variable table. */ +static mrb_bool +sg_del(mrb_state *mrb, seglist *t, mrb_value key, mrb_value *vp) +{ + segment *seg; + size_t i; + + if (t == NULL) return FALSE; + seg = t->rootseg; + while (seg) { + for (i=0; ikey[i]; + + if (!seg->next && i >= t->last_len) { + /* not found */ + return FALSE; + } + if (mrb_hash_ht_hash_equal(mrb, k, key)) { + segment *sg0; + size_t i0; + + t->size--; + if (vp) *vp = seg->val[i]; + sg0 = seg; + i0 = i; + while (seg) { + for (i++; inext && i >= t->last_len) { + t->last_len--; + if (t->last_len == 0 && sg0 != seg) { + sg0->next = NULL; + t->last_len = MRB_SG_SEGMENT_SIZE; + mrb_free(mrb, seg); + } + return TRUE; + } + sg0->key[i0] = seg->key[i]; + sg0->val[i0] = seg->val[i]; + i0 = i; + } + sg0 = seg; + seg = seg->next; + } + t->last_len--; + return TRUE; + } + } + seg = seg->next; + } + return FALSE; +} + +/* Iterates over the instance variable table. */ +static void +sg_foreach(mrb_state *mrb, seglist *t, sg_foreach_func *func, void *p) +{ + segment *seg; + size_t i; + + if (t == NULL) return; + seg = t->rootseg; + while (seg) { + for (i=0; inext && i >= t->last_len) { + return; + } + if ((*func)(mrb, seg->key[i], seg->val[i], p) != 0) + return; + } + seg = seg->next; + } +} + +/* Get the size of the instance variable table. */ +static size_t +sg_size(mrb_state *mrb, seglist *t) +{ + segment *seg; + size_t size = 0; + + if (t == NULL) return 0; + if (t->size > 0) return t->size; + seg = t->rootseg; + while (seg) { + if (seg->next == NULL) { + size += t->last_len; + return size; + } + seg = seg->next; + size += MRB_SG_SEGMENT_SIZE; + } + /* empty seglist */ + return 0; +} + +/* Copy the instance variable table. */ +static seglist* +sg_copy(mrb_state *mrb, seglist *t) +{ + segment *seg; + seglist *t2; + + size_t i; + + seg = t->rootseg; + t2 = sg_new(mrb); + + while (seg != NULL) { + for (i=0; ikey[i]; + mrb_value val = seg->val[i]; + + if ((seg->next == NULL) && (i >= t->last_len)) { + return t2; + } + sg_put(mrb, t2, key, val); + } + seg = seg->next; + } + return t2; +} + +/* Free memory of the instance variable table. */ +static void +sg_free(mrb_state *mrb, seglist *t) +{ + segment *seg; + + if (!t) return; + seg = t->rootseg; + while (seg) { + segment *p = seg; + seg = seg->next; + mrb_free(mrb, p); + } + mrb_free(mrb, t); +} static void mrb_hash_modify(mrb_state *mrb, mrb_value hash); @@ -105,57 +348,55 @@ mrb_hash_ht_key(mrb_state *mrb, mrb_value key) #define KEY(key) mrb_hash_ht_key(mrb, key) +static int +hash_mark_i(mrb_state *mrb, mrb_value key, mrb_value val, void *p) +{ + mrb_gc_mark_value(mrb, key); + mrb_gc_mark_value(mrb, val); + return 0; +} + void mrb_gc_mark_hash(mrb_state *mrb, struct RHash *hash) { - khiter_t k; - khash_t(ht) *h = hash->ht; - - if (!h) return; - for (k = kh_begin(h); k != kh_end(h); k++) { - if (kh_exist(h, k)) { - mrb_value key = kh_key(h, k); - mrb_value val = kh_value(h, k).v; - - mrb_gc_mark_value(mrb, key); - mrb_gc_mark_value(mrb, val); - } - } + sg_foreach(mrb, hash->ht, hash_mark_i, NULL); } size_t mrb_gc_mark_hash_size(mrb_state *mrb, struct RHash *hash) { if (!hash->ht) return 0; - return kh_size(hash->ht)*2; + return sg_size(mrb, hash->ht)*2; } void mrb_gc_free_hash(mrb_state *mrb, struct RHash *hash) { - if (hash->ht) kh_destroy(ht, mrb, hash->ht); + sg_free(mrb, hash->ht); } - MRB_API mrb_value -mrb_hash_new_capa(mrb_state *mrb, mrb_int capa) +mrb_hash_new(mrb_state *mrb) { struct RHash *h; h = (struct RHash*)mrb_obj_alloc(mrb, MRB_TT_HASH, mrb->hash_class); - /* khash needs 1/4 empty space so it is not resized immediately */ - if (capa == 0) - h->ht = 0; - else - h->ht = kh_init_size(ht, mrb, (khint_t)(capa*4/3)); + h->ht = 0; h->iv = 0; return mrb_obj_value(h); } MRB_API mrb_value -mrb_hash_new(mrb_state *mrb) +mrb_hash_new_capa(mrb_state *mrb, mrb_int capa) { - return mrb_hash_new_capa(mrb, 0); + struct RHash *h; + + h = (struct RHash*)mrb_obj_alloc(mrb, MRB_TT_HASH, mrb->hash_class); + /* preallocate segment list */ + h->ht = sg_new(mrb); + /* capacity ignored */ + h->iv = 0; + return mrb_obj_value(h); } static mrb_value mrb_hash_default(mrb_state *mrb, mrb_value hash); @@ -166,7 +407,7 @@ mrb_hash_init_copy(mrb_state *mrb, mrb_value self) { mrb_value orig; struct RHash* copy; - khash_t(ht) *orig_h; + seglist *orig_h; mrb_value ifnone, vret; mrb_get_args(mrb, "o", &orig); @@ -177,22 +418,7 @@ mrb_hash_init_copy(mrb_state *mrb, mrb_value self) orig_h = RHASH_TBL(self); copy = (struct RHash*)mrb_obj_alloc(mrb, MRB_TT_HASH, mrb->hash_class); - copy->ht = kh_init(ht, mrb); - - if (orig_h && kh_size(orig_h) > 0) { - khash_t(ht) *copy_h = copy->ht; - khiter_t k, copy_k; - - for (k = kh_begin(orig_h); k != kh_end(orig_h); k++) { - if (kh_exist(orig_h, k)) { - int ai = mrb_gc_arena_save(mrb); - copy_k = kh_put(ht, mrb, copy_h, KEY(kh_key(orig_h, k))); - mrb_gc_arena_restore(mrb, ai); - kh_val(copy_h, copy_k).v = kh_val(orig_h, k).v; - kh_val(copy_h, copy_k).n = kh_size(copy_h)-1; - } - } - } + copy->ht = sg_copy(mrb, orig_h); if (MRB_RHASH_DEFAULT_P(self)) { copy->flags |= MRB_HASH_DEFAULT; @@ -208,65 +434,54 @@ mrb_hash_init_copy(mrb_state *mrb, mrb_value self) return vret; } +static int +check_kdict_i(mrb_state *mrb, mrb_value key, mrb_value val, void *data) +{ + if (!mrb_symbol_p(key)) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "keyword argument hash with non symbol keys"); + } + return 0; +} + void mrb_hash_check_kdict(mrb_state *mrb, mrb_value self) { - khash_t(ht) *orig_h; - khiter_t k; - int nosym = FALSE; + seglist *sg; - orig_h = RHASH_TBL(self); - if (!orig_h || kh_size(orig_h) == 0) return; - for (k = kh_begin(orig_h); k != kh_end(orig_h); k++) { - if (kh_exist(orig_h, k)) { - mrb_value key = kh_key(orig_h, k); - - if (!mrb_symbol_p(key)) nosym = TRUE; - } - } - if (nosym) { - mrb_raise(mrb, E_ARGUMENT_ERROR, "keyword argument hash with non symbol keys"); - } + sg = RHASH_TBL(self); + if (!sg || sg_size(mrb, sg) == 0) return; + sg_foreach(mrb, sg, check_kdict_i, NULL); } MRB_API mrb_value mrb_hash_dup(mrb_state *mrb, mrb_value self) { struct RHash* copy; - khash_t(ht) *orig_h; + seglist *orig_h; orig_h = RHASH_TBL(self); copy = (struct RHash*)mrb_obj_alloc(mrb, MRB_TT_HASH, mrb->hash_class); - copy->ht = kh_init(ht, mrb); - - if (orig_h && kh_size(orig_h) > 0) { - int ai = mrb_gc_arena_save(mrb); - khash_t(ht) *copy_h = copy->ht; - khiter_t k, copy_k; - - for (k = kh_begin(orig_h); k != kh_end(orig_h); k++) { - if (kh_exist(orig_h, k)) { - copy_k = kh_put(ht, mrb, copy_h, KEY(kh_key(orig_h, k))); - mrb_gc_arena_restore(mrb, ai); - kh_val(copy_h, copy_k).v = kh_val(orig_h, k).v; - kh_val(copy_h, copy_k).n = kh_size(copy_h)-1; - } - } - } + copy->ht = orig_h ? sg_copy(mrb, orig_h) : NULL; return mrb_obj_value(copy); } +MRB_API mrb_bool +mrb_hash_has_key_p(mrb_state *mrb, mrb_value hash, mrb_value key) +{ + if (sg_get(mrb, RHASH_TBL(hash), key, NULL)) { + return TRUE; + } + return FALSE; +} + MRB_API mrb_value mrb_hash_get(mrb_state *mrb, mrb_value hash, mrb_value key) { - khash_t(ht) *h = RHASH_TBL(hash); - khiter_t k; + mrb_value val; mrb_sym mid; - if (h) { - k = kh_get(ht, mrb, h, key); - if (k != kh_end(h)) - return kh_value(h, k).v; + if (sg_get(mrb, RHASH_TBL(hash), key, &val)) { + return val; } mid = mrb_intern_lit(mrb, "default"); @@ -280,15 +495,11 @@ mrb_hash_get(mrb_state *mrb, mrb_value hash, mrb_value key) MRB_API mrb_value mrb_hash_fetch(mrb_state *mrb, mrb_value hash, mrb_value key, mrb_value def) { - khash_t(ht) *h = RHASH_TBL(hash); - khiter_t k; + mrb_value val; - if (h) { - k = kh_get(ht, mrb, h, key); - if (k != kh_end(h)) - return kh_value(h, k).v; + if (sg_get(mrb, RHASH_TBL(hash), key, &val)) { + return val; } - /* not found */ return def; } @@ -296,25 +507,9 @@ mrb_hash_fetch(mrb_state *mrb, mrb_value hash, mrb_value key, mrb_value def) MRB_API void mrb_hash_set(mrb_state *mrb, mrb_value hash, mrb_value key, mrb_value val) { - khash_t(ht) *h; - khiter_t k; - int r; - mrb_hash_modify(mrb, hash); - h = RHASH_TBL(hash); - - if (!h) h = RHASH_TBL(hash) = kh_init(ht, mrb); - k = kh_put2(ht, mrb, h, key, &r); - kh_value(h, k).v = val; - - if (r != 0) { - /* expand */ - int ai = mrb_gc_arena_save(mrb); - key = kh_key(h, k) = KEY(key); - mrb_gc_arena_restore(mrb, ai); - kh_value(h, k).n = kh_size(h)-1; - } + sg_put(mrb, RHASH_TBL(hash), key, val); mrb_field_write_barrier_value(mrb, (struct RBasic*)RHASH(hash), key); mrb_field_write_barrier_value(mrb, (struct RBasic*)RHASH(hash), val); return; @@ -332,24 +527,16 @@ mrb_check_hash_type(mrb_state *mrb, mrb_value hash) return mrb_check_convert_type(mrb, hash, MRB_TT_HASH, "Hash", "to_hash"); } -MRB_API khash_t(ht)* -mrb_hash_tbl(mrb_state *mrb, mrb_value hash) -{ - khash_t(ht) *h = RHASH_TBL(hash); - - if (!h) { - return RHASH_TBL(hash) = kh_init(ht, mrb); - } - return h; -} - static void mrb_hash_modify(mrb_state *mrb, mrb_value hash) { if (MRB_FROZEN_P(mrb_hash_ptr(hash))) { mrb_raise(mrb, E_FROZEN_ERROR, "can't modify frozen hash"); } - mrb_hash_tbl(mrb, hash); + + if (!RHASH_TBL(hash)) { + RHASH_TBL(hash) = sg_new(mrb); + } } /* 15.2.13.4.16 */ @@ -589,23 +776,11 @@ mrb_hash_set_default_proc(mrb_state *mrb, mrb_value hash) MRB_API mrb_value mrb_hash_delete_key(mrb_state *mrb, mrb_value hash, mrb_value key) { - khash_t(ht) *h = RHASH_TBL(hash); - khiter_t k; - mrb_value delVal; - mrb_int n; - - if (h) { - k = kh_get(ht, mrb, h, key); - if (k != kh_end(h)) { - delVal = kh_value(h, k).v; - n = kh_value(h, k).n; - kh_del(ht, mrb, h, k); - for (k = kh_begin(h); k != kh_end(h); k++) { - if (!kh_exist(h, k)) continue; - if (kh_value(h, k).n > n) kh_value(h, k).n--; - } - return delVal; - } + seglist *sg = RHASH_TBL(hash); + mrb_value del_val; + + if (sg_del(mrb, sg, key, &del_val)) { + return del_val; } /* not found */ @@ -657,22 +832,14 @@ mrb_hash_delete(mrb_state *mrb, mrb_value self) static mrb_value mrb_hash_shift(mrb_state *mrb, mrb_value hash) { - khash_t(ht) *h = RHASH_TBL(hash); - khiter_t k; - mrb_value delKey, delVal; + seglist *sg = RHASH_TBL(hash); mrb_hash_modify(mrb, hash); - if (h && kh_size(h) > 0) { - for (k = kh_begin(h); k != kh_end(h); k++) { - if (!kh_exist(h, k)) continue; - - delKey = kh_key(h, k); - mrb_gc_protect(mrb, delKey); - delVal = mrb_hash_delete_key(mrb, hash, delKey); - mrb_gc_protect(mrb, delVal); - - return mrb_assoc_new(mrb, delKey, delVal); - } + if (sg && sg_size(mrb, sg) > 0) { + mrb_value del_key = sg->rootseg->key[0]; + mrb_value del_val = sg->rootseg->val[0]; + sg_del(mrb, sg, del_key, NULL); + return mrb_assoc_new(mrb, del_key, del_val); } if (MRB_RHASH_DEFAULT_P(hash)) { @@ -701,10 +868,13 @@ mrb_hash_shift(mrb_state *mrb, mrb_value hash) MRB_API mrb_value mrb_hash_clear(mrb_state *mrb, mrb_value hash) { - khash_t(ht) *h = RHASH_TBL(hash); + seglist *sg = RHASH_TBL(hash); mrb_hash_modify(mrb, hash); - if (h) kh_clear(ht, mrb, h); + if (sg) { + sg_free(mrb, sg); + RHASH_TBL(hash) = NULL; + } return hash; } @@ -754,10 +924,19 @@ mrb_hash_aset(mrb_state *mrb, mrb_value self) static mrb_value mrb_hash_size_m(mrb_state *mrb, mrb_value self) { - khash_t(ht) *h = RHASH_TBL(self); + seglist *sg = RHASH_TBL(self); - if (!h) return mrb_fixnum_value(0); - return mrb_fixnum_value(kh_size(h)); + if (!sg) return mrb_fixnum_value(0); + return mrb_fixnum_value(sg_size(mrb, sg)); +} + +MRB_API mrb_bool +mrb_hash_empty_p(mrb_state *mrb, mrb_value self) +{ + seglist *sg = RHASH_TBL(self); + + if (!sg) return TRUE; + return sg_size(mrb, sg) == 0; } /* 15.2.13.4.12 */ @@ -770,21 +949,10 @@ mrb_hash_size_m(mrb_state *mrb, mrb_value self) * {}.empty? #=> true * */ -MRB_API mrb_bool -mrb_hash_empty_p(mrb_state *mrb, mrb_value self) -{ - khash_t(ht) *h = RHASH_TBL(self); - - if (h) return kh_size(h) == 0; - return TRUE; -} - static mrb_value mrb_hash_empty_m(mrb_state *mrb, mrb_value self) { - if (mrb_hash_empty_p(mrb, self)) - return mrb_true_value(); - return mrb_false_value(); + return mrb_bool_value(mrb_hash_empty_p(mrb, self)); } /* 15.2.13.4.29 (x)*/ @@ -801,6 +969,13 @@ mrb_hash_to_hash(mrb_state *mrb, mrb_value hash) return hash; } +static int +hash_keys_i(mrb_state *mrb, mrb_value key, mrb_value val, void *p) +{ + mrb_ary_push(mrb, *(mrb_value*)p, key); + return 0; +} + /* 15.2.13.4.19 */ /* * call-seq: @@ -817,33 +992,24 @@ mrb_hash_to_hash(mrb_state *mrb, mrb_value hash) MRB_API mrb_value mrb_hash_keys(mrb_state *mrb, mrb_value hash) { - khash_t(ht) *h = RHASH_TBL(hash); - khiter_t k; - mrb_int end; + seglist *sg = RHASH_TBL(hash); + size_t size; mrb_value ary; - mrb_value *p; - - if (!h || kh_size(h) == 0) return mrb_ary_new(mrb); - ary = mrb_ary_new_capa(mrb, kh_size(h)); - end = kh_size(h)-1; - mrb_ary_set(mrb, ary, end, mrb_nil_value()); - p = RARRAY_PTR(ary); - for (k = kh_begin(h); k != kh_end(h); k++) { - if (kh_exist(h, k)) { - mrb_value kv = kh_key(h, k); - mrb_hash_value hv = kh_value(h, k); - - if (hv.n <= end) { - p[hv.n] = kv; - } - else { - p[end] = kv; - } - } - } + + if (!sg || (size = sg_size(mrb, sg)) == 0) + return mrb_ary_new(mrb); + ary = mrb_ary_new_capa(mrb, size); + sg_foreach(mrb, sg, hash_keys_i, (void*)&ary); return ary; } +static int +hash_vals_i(mrb_state *mrb, mrb_value key, mrb_value val, void *p) +{ + mrb_ary_push(mrb, *(mrb_value*)p, val); + return 0; +} + /* 15.2.13.4.28 */ /* * call-seq: @@ -860,19 +1026,14 @@ mrb_hash_keys(mrb_state *mrb, mrb_value hash) MRB_API mrb_value mrb_hash_values(mrb_state *mrb, mrb_value hash) { - khash_t(ht) *h = RHASH_TBL(hash); - khiter_t k; + seglist *sg = RHASH_TBL(hash); + size_t size; mrb_value ary; - if (!h) return mrb_ary_new(mrb); - ary = mrb_ary_new_capa(mrb, kh_size(h)); - for (k = kh_begin(h); k != kh_end(h); k++) { - if (kh_exist(h, k)) { - mrb_hash_value hv = kh_value(h, k); - - mrb_ary_set(mrb, ary, hv.n, hv.v); - } - } + if (!sg || (size = sg_size(mrb, sg)) == 0) + return mrb_ary_new(mrb); + ary = mrb_ary_new_capa(mrb, size); + sg_foreach(mrb, sg, hash_vals_i, (void*)&ary); return ary; } @@ -898,13 +1059,11 @@ mrb_hash_values(mrb_state *mrb, mrb_value hash) MRB_API mrb_bool mrb_hash_key_p(mrb_state *mrb, mrb_value hash, mrb_value key) { - khash_t(ht) *h; - khiter_t k; + seglist *sg; - h = RHASH_TBL(hash); - if (h) { - k = kh_get(ht, mrb, h, key); - return k != kh_end(h); + sg = RHASH_TBL(hash); + if (sg_get(mrb, sg, key, NULL)) { + return TRUE; } return FALSE; } @@ -920,6 +1079,23 @@ mrb_hash_has_key(mrb_state *mrb, mrb_value hash) return mrb_bool_value(key_p); } +struct has_v_arg { + mrb_bool found; + mrb_value val; +}; + +static int +hash_has_value_i(mrb_state *mrb, mrb_value key, mrb_value val, void *p) +{ + struct has_v_arg *arg = (struct has_v_arg*)p; + + if (mrb_equal(mrb, arg->val, val)) { + arg->found = TRUE; + return 1; + } + return 0; +} + /* 15.2.13.4.14 */ /* 15.2.13.4.27 */ /* @@ -939,30 +1115,28 @@ static mrb_value mrb_hash_has_value(mrb_state *mrb, mrb_value hash) { mrb_value val; - khash_t(ht) *h; - khiter_t k; - + struct has_v_arg arg; + mrb_get_args(mrb, "o", &val); - h = RHASH_TBL(hash); + arg.found = FALSE; + arg.val = val; + sg_foreach(mrb, RHASH_TBL(hash), hash_has_value_i, &arg); + return mrb_bool_value(arg.found); +} - if (h) { - for (k = kh_begin(h); k != kh_end(h); k++) { - if (!kh_exist(h, k)) continue; +static int +merge_i(mrb_state *mrb, mrb_value key, mrb_value val, void *data) +{ + seglist *h1 = (seglist*)data; - if (mrb_equal(mrb, kh_value(h, k).v, val)) { - return mrb_true_value(); - } - } - } - return mrb_false_value(); + sg_put(mrb, h1, key, val); + return 0; } MRB_API void mrb_hash_merge(mrb_state *mrb, mrb_value hash1, mrb_value hash2) { - khash_t(ht) *h1; - khash_t(ht) *h2; - khiter_t k; + seglist *h1, *h2; mrb_hash_modify(mrb, hash1); hash2 = mrb_ensure_hash_type(mrb, hash2); @@ -971,22 +1145,10 @@ mrb_hash_merge(mrb_state *mrb, mrb_value hash1, mrb_value hash2) if (!h2) return; if (!h1) { - RHASH_TBL(hash1) = kh_copy(ht, mrb, h2); + RHASH_TBL(hash1) = sg_copy(mrb, h2); return; } - for (k = kh_begin(h2); k != kh_end(h2); k++) { - khiter_t k1; - int r; - - if (!kh_exist(h2, k)) continue; - k1 = kh_put2(ht, mrb, h1, kh_key(h2, k), &r); - kh_value(h1, k1).v = kh_value(h2,k).v; - if (r != 0) { - /* expand */ - kh_key(h1, k1) = kh_key(h2, k); - kh_value(h1, k1).n = kh_size(h1)-1; - } - } + sg_foreach(mrb, h2, merge_i, h1); mrb_write_barrier(mrb, (struct RBasic*)RHASH(hash1)); return; } -- cgit v1.2.3 From 8ffd4e47fb1c18088e554d31e8af88881517a201 Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Sat, 22 Sep 2018 23:05:52 +0900 Subject: Use `mrb_undef_value` for delete mark instead of shifting Hash entry table. That means entry table should be compacted periodically by `sg_compact()`. --- mrbgems/mruby-hash-ext/mrblib/hash.rb | 4 +- mrblib/hash.rb | 12 +-- src/hash.c | 176 ++++++++++++++++++---------------- 3 files changed, 97 insertions(+), 95 deletions(-) (limited to 'src/hash.c') diff --git a/mrbgems/mruby-hash-ext/mrblib/hash.rb b/mrbgems/mruby-hash-ext/mrblib/hash.rb index 61e4c890c..5bbbdf559 100644 --- a/mrbgems/mruby-hash-ext/mrblib/hash.rb +++ b/mrbgems/mruby-hash-ext/mrblib/hash.rb @@ -461,9 +461,9 @@ class Hash return to_enum :transform_keys! unless block self.keys.each do |k| value = self[k] - new_key = block.call(k) self.__delete(k) - self[new_key] = value + k = block.call(k) if block + self[k] = value end self end diff --git a/mrblib/hash.rb b/mrblib/hash.rb index 96029a230..eba92ba4a 100644 --- a/mrblib/hash.rb +++ b/mrblib/hash.rb @@ -55,10 +55,9 @@ class Hash # ISO 15.2.13.4.8 def delete(key, &block) if block && !self.has_key?(key) - block.call(key) - else - self.__delete(key) + return block.call(key) end + self.__delete(key) end ## @@ -335,11 +334,8 @@ class Hash # h["AA"] #=> "b" # def rehash - h = {} - self.each{|k,v| - h[k] = v - } - self.replace(h) + self.size + self end end diff --git a/src/hash.c b/src/hash.c index 16d49af24..820ee5a71 100644 --- a/src/hash.c +++ b/src/hash.c @@ -96,16 +96,18 @@ typedef int (sg_foreach_func)(mrb_state *mrb,mrb_value key,mrb_value val, void * #endif typedef struct segment { - mrb_value key[MRB_SG_SEGMENT_SIZE]; - mrb_value val[MRB_SG_SEGMENT_SIZE]; struct segment *next; + struct { + mrb_value key; + mrb_value val; + } e[MRB_SG_SEGMENT_SIZE]; } segment; /* Instance variable table structure */ typedef struct seglist { segment *rootseg; - size_t size; - size_t last_len; + mrb_int size; + mrb_int last_len; } seglist; /* Creates the instance variable table. */ @@ -128,23 +130,24 @@ sg_put(mrb_state *mrb, seglist *t, mrb_value key, mrb_value val) { segment *seg; segment *prev = NULL; - size_t i; + mrb_int i; if (t == NULL) return; seg = t->rootseg; while (seg) { for (i=0; ikey[i]; + mrb_value k = seg->e[i].key; /* Found room in last segment after last_len */ if (!seg->next && i >= t->last_len) { - seg->key[i] = key; - seg->val[i] = val; + seg->e[i].key = key; + seg->e[i].val = val; t->last_len = i+1; - t->size++; + if (t->size >= 0) t->size++; return; } + if (mrb_undef_p(k)) continue; if (mrb_hash_ht_hash_equal(mrb, k, key)) { - seg->val[i] = val; + seg->e[i].val = val; return; } } @@ -153,13 +156,13 @@ sg_put(mrb_state *mrb, seglist *t, mrb_value key, mrb_value val) } /* Not found */ - t->size++; + if (t->size >= 0) t->size++; seg = (segment*)mrb_malloc(mrb, sizeof(segment)); if (!seg) return; seg->next = NULL; - seg->key[0] = key; - seg->val[0] = val; + seg->e[0].key = key; + seg->e[0].val = val; t->last_len = 1; if (prev) { prev->next = seg; @@ -174,19 +177,20 @@ static mrb_bool sg_get(mrb_state *mrb, seglist *t, mrb_value key, mrb_value *vp) { segment *seg; - size_t i; + mrb_int i; if (t == NULL) return FALSE; seg = t->rootseg; while (seg) { for (i=0; ikey[i]; + mrb_value k = seg->e[i].key; if (!seg->next && i >= t->last_len) { return FALSE; } + if (mrb_undef_p(k)) continue; if (mrb_hash_ht_hash_equal(mrb, k, key)) { - if (vp) *vp = seg->val[i]; + if (vp) *vp = seg->e[i].val; return TRUE; } } @@ -195,50 +199,81 @@ sg_get(mrb_state *mrb, seglist *t, mrb_value key, mrb_value *vp) return FALSE; } +/* Compacts the hash removing delete entries. */ +static void +sg_compact(mrb_state *mrb, seglist *t) +{ + segment *seg = t->rootseg; + mrb_int i; + segment *seg2 = NULL; + mrb_int i2; + mrb_int size = 0; + + while (seg) { + for (i=0; ie[i].key; + + if (!seg->next && i >= t->last_len) { + goto exit; + } + if (mrb_undef_p(k)) { /* found delete key */ + if (seg2 == NULL) { + seg2 = seg; + i2 = i; + } + } + else { + size++; + if (seg2 != NULL) { + seg2->e[i2++] = seg->e[i]; + if (i2 >= MRB_SG_SEGMENT_SIZE) { + seg2 = seg2->next; + i2 = 0; + } + } + } + } + seg = seg->next; + } + exit: + /* reached at end */ + t->size = size; + t->last_len = i2; + if (seg != seg2) { + seg = seg2->next; + seg2->next = NULL; + while (seg) { + seg2 = seg->next; + mrb_free(mrb, seg); + seg = seg2; + } + } +} + /* Deletes the value for the symbol from the instance variable table. */ +/* Deletion is done by overwriting keys by `undef`. */ static mrb_bool sg_del(mrb_state *mrb, seglist *t, mrb_value key, mrb_value *vp) { segment *seg; - size_t i; + mrb_int i; if (t == NULL) return FALSE; seg = t->rootseg; while (seg) { for (i=0; ikey[i]; + mrb_value k = seg->e[i].key; if (!seg->next && i >= t->last_len) { /* not found */ return FALSE; } + if (mrb_undef_p(k)) continue; if (mrb_hash_ht_hash_equal(mrb, k, key)) { - segment *sg0; - size_t i0; - - t->size--; - if (vp) *vp = seg->val[i]; - sg0 = seg; - i0 = i; - while (seg) { - for (i++; inext && i >= t->last_len) { - t->last_len--; - if (t->last_len == 0 && sg0 != seg) { - sg0->next = NULL; - t->last_len = MRB_SG_SEGMENT_SIZE; - mrb_free(mrb, seg); - } - return TRUE; - } - sg0->key[i0] = seg->key[i]; - sg0->val[i0] = seg->val[i]; - i0 = i; - } - sg0 = seg; - seg = seg->next; - } - t->last_len--; + if (vp) *vp = k; + seg->e[i].key = mrb_undef_value(); + if (t->size > 0) t->size = -1; + else t->size--; /* count number of deleted */ return TRUE; } } @@ -252,7 +287,7 @@ static void sg_foreach(mrb_state *mrb, seglist *t, sg_foreach_func *func, void *p) { segment *seg; - size_t i; + mrb_int i; if (t == NULL) return; seg = t->rootseg; @@ -262,7 +297,8 @@ sg_foreach(mrb_state *mrb, seglist *t, sg_foreach_func *func, void *p) if (!seg->next && i >= t->last_len) { return; } - if ((*func)(mrb, seg->key[i], seg->val[i], p) != 0) + if (mrb_undef_p(seg->e[i].key)) continue; + if ((*func)(mrb, seg->e[i].key, seg->e[i].val, p) != 0) return; } seg = seg->next; @@ -270,25 +306,14 @@ sg_foreach(mrb_state *mrb, seglist *t, sg_foreach_func *func, void *p) } /* Get the size of the instance variable table. */ -static size_t +static mrb_int sg_size(mrb_state *mrb, seglist *t) { - segment *seg; - size_t size = 0; - if (t == NULL) return 0; - if (t->size > 0) return t->size; - seg = t->rootseg; - while (seg) { - if (seg->next == NULL) { - size += t->last_len; - return size; - } - seg = seg->next; - size += MRB_SG_SEGMENT_SIZE; + if (t->size < 0) { + sg_compact(mrb, t); } - /* empty seglist */ - return 0; + return t->size; } /* Copy the instance variable table. */ @@ -297,16 +322,15 @@ sg_copy(mrb_state *mrb, seglist *t) { segment *seg; seglist *t2; - - size_t i; + mrb_int i; seg = t->rootseg; t2 = sg_new(mrb); while (seg != NULL) { for (i=0; ikey[i]; - mrb_value val = seg->val[i]; + mrb_value key = seg->e[i].key; + mrb_value val = seg->e[i].val; if ((seg->next == NULL) && (i >= t->last_len)) { return t2; @@ -787,24 +811,6 @@ mrb_hash_delete_key(mrb_state *mrb, mrb_value hash, mrb_value key) return mrb_nil_value(); } -/* 15.2.13.4.8 */ -/* - * call-seq: - * hsh.delete(key) -> value - * hsh.delete(key) {| key | block } -> value - * - * Deletes and returns a key-value pair from hsh whose key is - * equal to key. If the key is not found, returns the - * default value. If the optional code block is given and the - * key is not found, pass in the key and return the result of - * block. - * - * h = { "a" => 100, "b" => 200 } - * h.delete("a") #=> 100 - * h.delete("z") #=> nil - * h.delete("z") { |el| "#{el} not found" } #=> "z not found" - * - */ static mrb_value mrb_hash_delete(mrb_state *mrb, mrb_value self) { @@ -836,8 +842,8 @@ mrb_hash_shift(mrb_state *mrb, mrb_value hash) mrb_hash_modify(mrb, hash); if (sg && sg_size(mrb, sg) > 0) { - mrb_value del_key = sg->rootseg->key[0]; - mrb_value del_val = sg->rootseg->val[0]; + mrb_value del_key = sg->rootseg->e[0].key; + mrb_value del_val = sg->rootseg->e[0].val; sg_del(mrb, sg, del_key, NULL); return mrb_assoc_new(mrb, del_key, del_val); } -- cgit v1.2.3 From d78acc7afed35813f25e3091150dab668c373f05 Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Wed, 26 Sep 2018 11:14:36 +0900 Subject: Add index to larger segment lists for performance --- mrbgems/mruby-array-ext/mrblib/array.rb | 14 +- mrblib/hash.rb | 2 +- src/hash.c | 440 +++++++++++++++++++++++--------- 3 files changed, 329 insertions(+), 127 deletions(-) (limited to 'src/hash.c') diff --git a/mrbgems/mruby-array-ext/mrblib/array.rb b/mrbgems/mruby-array-ext/mrblib/array.rb index 8c2acc7ac..eac8d4718 100644 --- a/mrbgems/mruby-array-ext/mrblib/array.rb +++ b/mrbgems/mruby-array-ext/mrblib/array.rb @@ -1,3 +1,4 @@ +# coding: cp932 class Array ## # call-seq: @@ -41,26 +42,19 @@ class Array # c.uniq! { |s| s.first } # => [["student", "sam"], ["teacher", "matz"]] # def uniq!(&block) + hash = {} if block - hash = {} self.each do |val| key = block.call(val) hash[key] = val unless hash.key?(key) end result = hash.values - elsif self.size > 20 + else hash = {} self.each do |val| hash[val] = val end - result = hash.values - else - ary = self.dup - result = [] - while ary.size > 0 - result << ary.shift - ary.delete(result.last) - end + result = hash.keys end if result.size == self.size nil diff --git a/mrblib/hash.rb b/mrblib/hash.rb index eba92ba4a..245b76ee0 100644 --- a/mrblib/hash.rb +++ b/mrblib/hash.rb @@ -334,7 +334,7 @@ class Hash # h["AA"] #=> "b" # def rehash - self.size + # do nothing (for now) self end end diff --git a/src/hash.c b/src/hash.c index 820ee5a71..07a12be68 100644 --- a/src/hash.c +++ b/src/hash.c @@ -16,14 +16,48 @@ mrb_int mrb_float_id(mrb_float f); #endif -static inline khint_t -mrb_hash_ht_hash_func(mrb_state *mrb, mrb_value key) +/* return non zero to break the loop */ +typedef int (sg_foreach_func)(mrb_state *mrb,mrb_value key, mrb_value val, void *data); + +#ifndef MRB_SG_SEGMENT_SIZE +#define MRB_SG_SEGMENT_SIZE 5 +#endif + +struct segkv { + mrb_value key; + mrb_value val; +}; + +typedef struct segment { + struct segment *next; + struct segkv e[MRB_SG_SEGMENT_SIZE]; +} segment; + +typedef struct segindex { + size_t size; + size_t capa; + struct segkv *table[]; +} segindex; + +/* Instance variable table structure */ +typedef struct seglist { + segment *rootseg; + segment *lastseg; + mrb_int size; + mrb_int last_len; + segindex *index; +} seglist; + +static /* inline */ size_t +sg_hash_func(mrb_state *mrb, seglist *t, mrb_value key) { - enum mrb_vtype t = mrb_type(key); + enum mrb_vtype tt = mrb_type(key); mrb_value hv; - khint_t h; + size_t h; + segindex *index = t->index; + size_t capa = index ? index->capa : 0; - switch (t) { + switch (tt) { case MRB_TT_STRING: h = mrb_str_hash(mrb, key); break; @@ -35,23 +69,26 @@ mrb_hash_ht_hash_func(mrb_state *mrb, mrb_value key) #ifndef MRB_WITHOUT_FLOAT case MRB_TT_FLOAT: #endif - h = (khint_t)mrb_obj_id(key); + h = (size_t)mrb_obj_id(key); break; default: hv = mrb_funcall(mrb, key, "hash", 0); - h = (khint_t)t ^ (khint_t)mrb_fixnum(hv); + h = (size_t)t ^ (size_t)mrb_fixnum(hv); break; } - return kh_int_hash_func(mrb, h); + if (index && (index != t->index || capa != index->capa)) { + mrb_raise(mrb, E_RUNTIME_ERROR, "hash modified"); + } + return ((h)^((h)<<2)^((h)>>2)); } static inline mrb_bool -mrb_hash_ht_hash_equal(mrb_state *mrb, mrb_value a, mrb_value b) +sg_hash_equal(mrb_state *mrb, seglist *t, mrb_value a, mrb_value b) { - enum mrb_vtype t = mrb_type(a); + enum mrb_vtype tt = mrb_type(a); - switch (t) { + switch (tt) { case MRB_TT_STRING: return mrb_str_equal(mrb, a, b); @@ -84,32 +121,18 @@ mrb_hash_ht_hash_equal(mrb_state *mrb, mrb_value a, mrb_value b) #endif default: - return mrb_eql(mrb, a, b); - } + { + segindex *index = t->index; + size_t capa = index ? index->capa : 0; + mrb_bool eql = mrb_eql(mrb, a, b); + if (index && (index != t->index || capa != index->capa)) { + mrb_raise(mrb, E_RUNTIME_ERROR, "hash modified"); + } + return eql; + } + } } -/* return non zero to break the loop */ -typedef int (sg_foreach_func)(mrb_state *mrb,mrb_value key,mrb_value val, void *data); - -#ifndef MRB_SG_SEGMENT_SIZE -#define MRB_SG_SEGMENT_SIZE 5 -#endif - -typedef struct segment { - struct segment *next; - struct { - mrb_value key; - mrb_value val; - } e[MRB_SG_SEGMENT_SIZE]; -} segment; - -/* Instance variable table structure */ -typedef struct seglist { - segment *rootseg; - mrb_int size; - mrb_int last_len; -} seglist; - /* Creates the instance variable table. */ static seglist* sg_new(mrb_state *mrb) @@ -119,87 +142,87 @@ sg_new(mrb_state *mrb) t = (seglist*)mrb_malloc(mrb, sizeof(seglist)); t->size = 0; t->rootseg = NULL; + t->lastseg = NULL; t->last_len = 0; + t->index = NULL; return t; } -/* Set the value for the symbol in the instance variable table. */ +#define power2(v) do { \ + v--;\ + v |= v >> 1;\ + v |= v >> 2;\ + v |= v >> 4;\ + v |= v >> 8;\ + v |= v >> 16;\ + v++;\ +} while (0) + +#ifndef UPPER_BOUND +#define UPPER_BOUND(x) ((x)>>2|(x)>>1) +#endif + +#define SG_MASK(index) ((index->capa)-1) + +/* Build index for the segment list */ static void -sg_put(mrb_state *mrb, seglist *t, mrb_value key, mrb_value val) +sg_index(mrb_state *mrb, seglist *t) { + size_t size = (size_t)t->size; + size_t mask; + segindex *index = t->index; segment *seg; - segment *prev = NULL; - mrb_int i; + size_t i; - if (t == NULL) return; - seg = t->rootseg; - while (seg) { - for (i=0; ie[i].key; - /* Found room in last segment after last_len */ - if (!seg->next && i >= t->last_len) { - seg->e[i].key = key; - seg->e[i].val = val; - t->last_len = i+1; - if (t->size >= 0) t->size++; - return; - } - if (mrb_undef_p(k)) continue; - if (mrb_hash_ht_hash_equal(mrb, k, key)) { - seg->e[i].val = val; - return; - } + if (size < MRB_SG_SEGMENT_SIZE) { + if (index) { + failed: + mrb_free(mrb, index); + t->index = NULL; } - prev = seg; - seg = seg->next; + return; } - - /* Not found */ - if (t->size >= 0) t->size++; - - seg = (segment*)mrb_malloc(mrb, sizeof(segment)); - if (!seg) return; - seg->next = NULL; - seg->e[0].key = key; - seg->e[0].val = val; - t->last_len = 1; - if (prev) { - prev->next = seg; + /* allocate index table */ + if (index && index->size >= UPPER_BOUND(index->capa)) { + size = index->capa+1; } - else { - t->rootseg = seg; + power2(size); + if (!index || index->capa < size) { + index = (segindex*)mrb_realloc_simple(mrb, index, sizeof(segindex)+sizeof(struct segkv*)*size); + if (index == NULL) goto failed; + t->index = index; + } + index->size = t->size; + index->capa = size; + for (i=0; itable[i] = NULL; } -} - -/* Get a value for a symbol from the instance variable table. */ -static mrb_bool -sg_get(mrb_state *mrb, seglist *t, mrb_value key, mrb_value *vp) -{ - segment *seg; - mrb_int i; - if (t == NULL) return FALSE; + /* rebuld index */ + mask = SG_MASK(index); seg = t->rootseg; while (seg) { for (i=0; ie[i].key; + mrb_value key; + size_t k, step = 0; - if (!seg->next && i >= t->last_len) { - return FALSE; + if (!seg->next && i >= (size_t)t->last_len) { + return; } - if (mrb_undef_p(k)) continue; - if (mrb_hash_ht_hash_equal(mrb, k, key)) { - if (vp) *vp = seg->e[i].val; - return TRUE; + key = seg->e[i].key; + if (mrb_undef_p(key)) continue; + k = sg_hash_func(mrb, t, key) & mask; + while (index->table[k]) { + k = (k+(++step)) & mask; } + index->table[k] = &seg->e[i]; } seg = seg->next; } - return FALSE; } -/* Compacts the hash removing delete entries. */ +/* Compacts the segment list removing deleted entries. */ static void sg_compact(mrb_state *mrb, seglist *t) { @@ -209,6 +232,10 @@ sg_compact(mrb_state *mrb, seglist *t) mrb_int i2; mrb_int size = 0; + if (t->index && (size_t)t->size == t->index->size) { + sg_index(mrb, t); + return; + } while (seg) { for (i=0; ie[i].key; @@ -238,16 +265,179 @@ sg_compact(mrb_state *mrb, seglist *t) exit: /* reached at end */ t->size = size; - t->last_len = i2; - if (seg != seg2) { + if (seg2) { seg = seg2->next; seg2->next = NULL; + t->last_len = i2; + t->lastseg = seg2; while (seg) { seg2 = seg->next; mrb_free(mrb, seg); seg = seg2; } } + if (t->index) { + sg_index(mrb, t); + } +} + +/* Set the value for the key in the indexed segment list. */ +static void +sg_index_put(mrb_state *mrb, seglist *t, mrb_value key, mrb_value val) +{ + segindex *index = t->index; + size_t k, sp, step = 0, mask; + segment *seg; + + if (index->size >= UPPER_BOUND(index->capa)) { + /* need to expand table */ + sg_compact(mrb, t); + index = t->index; + } + mask = SG_MASK(index); + sp = index->capa; + k = sg_hash_func(mrb, t, key) & mask; + while (index->table[k]) { + mrb_value key2 = index->table[k]->key; + if (mrb_undef_p(key2)) { + if (sp == index->capa) sp = k; + } + else if (sg_hash_equal(mrb, t, key, key2)) { + index->table[k]->val = val; + return; + } + k = (k+(++step)) & mask; + } + if (sp < index->capa) { + k = sp; + } + + /* put the value at the last */ + seg = t->lastseg; + if (t->last_len < MRB_SG_SEGMENT_SIZE) { + index->table[k] = &seg->e[t->last_len++]; + } + else { /* append a new segment */ + seg->next = (segment*)mrb_malloc(mrb, sizeof(segment)); + seg = seg->next; + seg->next = NULL; + t->lastseg = seg; + t->last_len = 1; + index->table[k] = &seg->e[0]; + } + index->table[k]->key = key; + index->table[k]->val = val; + index->size++; + t->size++; +} + +/* Set the value for the key in the segment list. */ +static void +sg_put(mrb_state *mrb, seglist *t, mrb_value key, mrb_value val) +{ + segment *seg; + mrb_int i, deleted = 0; + + if (t == NULL) return; + if (t->index) { + sg_index_put(mrb, t, key, val); + return; + } + seg = t->rootseg; + while (seg) { + for (i=0; ie[i].key; + /* Found room in last segment after last_len */ + if (!seg->next && i >= t->last_len) { + seg->e[i].key = key; + seg->e[i].val = val; + t->last_len = i+1; + t->size++; + return; + } + if (mrb_undef_p(k)) { + deleted++; + continue; + } + if (sg_hash_equal(mrb, t, k, key)) { + seg->e[i].val = val; + return; + } + } + seg = seg->next; + } + + /* Not found */ + if (deleted > MRB_SG_SEGMENT_SIZE) { + sg_compact(mrb, t); + } + t->size++; + + seg = (segment*)mrb_malloc(mrb, sizeof(segment)); + seg->next = NULL; + seg->e[0].key = key; + seg->e[0].val = val; + t->last_len = 1; + if (t->rootseg == NULL) { + t->rootseg = seg; + } + else { + t->lastseg->next = seg; + } + t->lastseg = seg; + if (t->index == NULL && t->size > MRB_SG_SEGMENT_SIZE*4) { + sg_index(mrb, t); + } +} + +/* Get a value for a key from the indexed segment list. */ +static mrb_bool +sg_index_get(mrb_state *mrb, seglist *t, mrb_value key, mrb_value *vp) +{ + segindex *index = t->index; + size_t mask = SG_MASK(index); + size_t k = sg_hash_func(mrb, t, key) & mask; + size_t step = 0; + + while (index->table[k]) { + if (sg_hash_equal(mrb, t, key, index->table[k]->key)) { + if (vp) *vp = index->table[k]->val; + return TRUE; + } + k = (k+(++step)) & mask; + } + return FALSE; +} + +/* Get a value for a key from the segment list. */ +static mrb_bool +sg_get(mrb_state *mrb, seglist *t, mrb_value key, mrb_value *vp) +{ + segment *seg; + mrb_int i; + + if (t == NULL) return FALSE; + if (t->index) { + return sg_index_get(mrb, t, key, vp); + } + + seg = t->rootseg; + while (seg) { + for (i=0; ie[i].key; + + if (!seg->next && i >= t->last_len) { + return FALSE; + } + if (mrb_undef_p(k)) continue; + if (sg_hash_equal(mrb, t, k, key)) { + if (vp) *vp = seg->e[i].val; + return TRUE; + } + } + seg = seg->next; + } + return FALSE; } /* Deletes the value for the symbol from the instance variable table. */ @@ -262,18 +452,17 @@ sg_del(mrb_state *mrb, seglist *t, mrb_value key, mrb_value *vp) seg = t->rootseg; while (seg) { for (i=0; ie[i].key; + mrb_value key2; if (!seg->next && i >= t->last_len) { /* not found */ return FALSE; } - if (mrb_undef_p(k)) continue; - if (mrb_hash_ht_hash_equal(mrb, k, key)) { - if (vp) *vp = k; + key2 = seg->e[i].key; + if (!mrb_undef_p(key2) && sg_hash_equal(mrb, t, key, key2)) { + if (vp) *vp = key2; seg->e[i].key = mrb_undef_value(); - if (t->size > 0) t->size = -1; - else t->size--; /* count number of deleted */ + t->size--; return TRUE; } } @@ -290,6 +479,9 @@ sg_foreach(mrb_state *mrb, seglist *t, sg_foreach_func *func, void *p) mrb_int i; if (t == NULL) return; + if (t->index && t->index->size-(size_t)t->size > MRB_SG_SEGMENT_SIZE) { + sg_compact(mrb, t); + } seg = t->rootseg; while (seg) { for (i=0; isize < 0) { - sg_compact(mrb, t); - } return t->size; } @@ -355,13 +544,14 @@ sg_free(mrb_state *mrb, seglist *t) seg = seg->next; mrb_free(mrb, p); } + if (t->index) mrb_free(mrb, t->index); mrb_free(mrb, t); } static void mrb_hash_modify(mrb_state *mrb, mrb_value hash); static inline mrb_value -mrb_hash_ht_key(mrb_state *mrb, mrb_value key) +ht_key(mrb_state *mrb, mrb_value key) { if (mrb_string_p(key) && !MRB_FROZEN_P(mrb_str_ptr(key))) { key = mrb_str_dup(mrb, key); @@ -370,7 +560,7 @@ mrb_hash_ht_key(mrb_state *mrb, mrb_value key) return key; } -#define KEY(key) mrb_hash_ht_key(mrb, key) +#define KEY(key) ht_key(mrb, key) static int hash_mark_i(mrb_state *mrb, mrb_value key, mrb_value val, void *p) @@ -489,15 +679,6 @@ mrb_hash_dup(mrb_state *mrb, mrb_value self) return mrb_obj_value(copy); } -MRB_API mrb_bool -mrb_hash_has_key_p(mrb_state *mrb, mrb_value hash, mrb_value key) -{ - if (sg_get(mrb, RHASH_TBL(hash), key, NULL)) { - return TRUE; - } - return FALSE; -} - MRB_API mrb_value mrb_hash_get(mrb_state *mrb, mrb_value hash, mrb_value key) { @@ -821,6 +1002,33 @@ mrb_hash_delete(mrb_state *mrb, mrb_value self) return mrb_hash_delete_key(mrb, self, key); } +/* find first element in segment list, and remove it. */ +static void +sg_shift(mrb_state *mrb, seglist *t, mrb_value *kp, mrb_value *vp) +{ + segment *seg = t->rootseg; + mrb_int i; + + while (seg) { + for (i=0; inext && i >= t->last_len) { + return; + } + key = seg->e[i].key; + if (mrb_undef_p(key)) continue; + *kp = key; + *vp = seg->e[i].val; + /* delete element */ + seg->e[i].key = mrb_undef_value(); + t->size--; + return; + } + seg = seg->next; + } +} + /* 15.2.13.4.24 */ /* * call-seq: @@ -842,9 +1050,9 @@ mrb_hash_shift(mrb_state *mrb, mrb_value hash) mrb_hash_modify(mrb, hash); if (sg && sg_size(mrb, sg) > 0) { - mrb_value del_key = sg->rootseg->e[0].key; - mrb_value del_val = sg->rootseg->e[0].val; - sg_del(mrb, sg, del_key, NULL); + mrb_value del_key, del_val; + + sg_shift(mrb, sg, &del_key, &del_val); return mrb_assoc_new(mrb, del_key, del_val); } -- cgit v1.2.3 From 54ec08c7f19e7c46de3dcb63e4f30cd65e4a56e7 Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Wed, 26 Sep 2018 13:04:13 +0900 Subject: Implement `Hash#rehash` in C using `sg_compact()`. --- mrblib/hash.rb | 20 -------------------- src/hash.c | 21 +++++++++++++++++++++ 2 files changed, 21 insertions(+), 20 deletions(-) (limited to 'src/hash.c') diff --git a/mrblib/hash.rb b/mrblib/hash.rb index 245b76ee0..582cfbc6c 100644 --- a/mrblib/hash.rb +++ b/mrblib/hash.rb @@ -317,26 +317,6 @@ class Hash } h end - - ## - # call-seq: - # hsh.rehash -> hsh - # - # Rebuilds the hash based on the current hash values for each key. If - # values of key objects have changed since they were inserted, this - # method will reindex hsh. - # - # h = {"AAA" => "b"} - # h.keys[0].chop! - # h #=> {"AA"=>"b"} - # h["AA"] #=> nil - # h.rehash #=> {"AA"=>"b"} - # h["AA"] #=> "b" - # - def rehash - # do nothing (for now) - self - end end ## diff --git a/src/hash.c b/src/hash.c index 07a12be68..21dc846af 100644 --- a/src/hash.c +++ b/src/hash.c @@ -1367,6 +1367,26 @@ mrb_hash_merge(mrb_state *mrb, mrb_value hash1, mrb_value hash2) return; } +/* + * call-seq: + * hsh.rehash -> hsh + * + * Rebuilds the hash based on the current hash values for each key. If + * values of key objects have changed since they were inserted, this + * method will reindex hsh. + * + * h = {"AAA" => "b"} + * h.keys[0].chop! + * h.rehash #=> {"AA"=>"b"} + * h["AA"] #=> "b" + */ +static mrb_value +mrb_hash_rehash(mrb_state *mrb, mrb_value self) +{ + sg_compact(mrb, RHASH_TBL(self)); + return self; +} + void mrb_init_hash(mrb_state *mrb) { @@ -1398,6 +1418,7 @@ mrb_init_hash(mrb_state *mrb) mrb_define_method(mrb, h, "store", mrb_hash_aset, MRB_ARGS_REQ(2)); /* 15.2.13.4.26 */ mrb_define_method(mrb, h, "value?", mrb_hash_has_value, MRB_ARGS_REQ(1)); /* 15.2.13.4.27 */ mrb_define_method(mrb, h, "values", mrb_hash_values, MRB_ARGS_NONE()); /* 15.2.13.4.28 */ + mrb_define_method(mrb, h, "rehash", mrb_hash_rehash, MRB_ARGS_NONE()); mrb_define_method(mrb, h, "to_hash", mrb_hash_to_hash, MRB_ARGS_NONE()); /* 15.2.13.4.29 (x)*/ } -- cgit v1.2.3 From 82e00ce60f358489ae2c3a7fdbe4da9bf264b286 Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Fri, 12 Oct 2018 08:25:54 +0900 Subject: `Hash#delete` should return the deleted value; fix #4133 --- src/hash.c | 2 +- test/t/hash.rb | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src/hash.c') diff --git a/src/hash.c b/src/hash.c index 21dc846af..5bccb3091 100644 --- a/src/hash.c +++ b/src/hash.c @@ -460,7 +460,7 @@ sg_del(mrb_state *mrb, seglist *t, mrb_value key, mrb_value *vp) } key2 = seg->e[i].key; if (!mrb_undef_p(key2) && sg_hash_equal(mrb, t, key, key2)) { - if (vp) *vp = key2; + if (vp) *vp = seg->e[i].val; seg->e[i].key = mrb_undef_value(); t->size--; return TRUE; diff --git a/test/t/hash.rb b/test/t/hash.rb index 5403a5901..8088bfa21 100644 --- a/test/t/hash.rb +++ b/test/t/hash.rb @@ -82,12 +82,12 @@ assert('Hash#default_proc', '15.2.13.4.7') do end assert('Hash#delete', '15.2.13.4.8') do - a = { 'abc' => 'abc' } - b = { 'abc' => 'abc' } + a = { 'abc' => 'ABC' } + b = { 'abc' => 'ABC' } b_tmp_1 = false b_tmp_2 = false - a.delete('abc') + assert_equal 'ABC', a.delete('abc') b.delete('abc') do |k| b_tmp_1 = true end -- cgit v1.2.3 From 249fef7dc49ee5c22256aa7e9c36cd788e0ba323 Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Fri, 12 Oct 2018 08:30:57 +0900 Subject: Add `NULL` check in `sg_compact()`; fix #4139 --- src/hash.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/hash.c') diff --git a/src/hash.c b/src/hash.c index 5bccb3091..325c7e66f 100644 --- a/src/hash.c +++ b/src/hash.c @@ -226,12 +226,14 @@ sg_index(mrb_state *mrb, seglist *t) static void sg_compact(mrb_state *mrb, seglist *t) { - segment *seg = t->rootseg; + segment *seg; mrb_int i; segment *seg2 = NULL; mrb_int i2; mrb_int size = 0; + if (t == NULL) return; + seg = t->rootseg; if (t->index && (size_t)t->size == t->index->size) { sg_index(mrb, t); return; -- cgit v1.2.3 From e5f160dc7ec4f5ae596a7129f368ccc06186c3ee Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Fri, 12 Oct 2018 19:00:45 +0900 Subject: Should not compare `undef` (deleted) key in hashes; fix #4136 --- src/hash.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/hash.c') diff --git a/src/hash.c b/src/hash.c index 325c7e66f..e1ceec489 100644 --- a/src/hash.c +++ b/src/hash.c @@ -402,7 +402,8 @@ sg_index_get(mrb_state *mrb, seglist *t, mrb_value key, mrb_value *vp) size_t step = 0; while (index->table[k]) { - if (sg_hash_equal(mrb, t, key, index->table[k]->key)) { + mrb_value key2 = index->table[k]->key; + if (!mrb_undef_p(key2) && sg_hash_equal(mrb, t, key, key2)) { if (vp) *vp = index->table[k]->val; return TRUE; } -- cgit v1.2.3 From 80b5a56f5139c9bcc6f064ac77c7a69b7489137c Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Sat, 20 Oct 2018 19:02:12 +0900 Subject: Need to freeze string keys. --- src/hash.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/hash.c') diff --git a/src/hash.c b/src/hash.c index e1ceec489..03a95dbd8 100644 --- a/src/hash.c +++ b/src/hash.c @@ -717,6 +717,7 @@ mrb_hash_set(mrb_state *mrb, mrb_value hash, mrb_value key, mrb_value val) { mrb_hash_modify(mrb, hash); + key = KEY(key); sg_put(mrb, RHASH_TBL(hash), key, val); mrb_field_write_barrier_value(mrb, (struct RBasic*)RHASH(hash), key); mrb_field_write_barrier_value(mrb, (struct RBasic*)RHASH(hash), val); -- cgit v1.2.3 From 180b73fec437e21e2e862fc47bff9ad07f581d2c Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Fri, 16 Nov 2018 01:04:57 +0900 Subject: The key or value object could be reclaimed by GC; fix #4164 The GC may occur between `sg_shift` and `mrb_assoc_new`, in which case `key` and `value` could be freed even tough they are still alive. The issue is found and fixed by https://hackerone.com/hexodus --- src/hash.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/hash.c') diff --git a/src/hash.c b/src/hash.c index 03a95dbd8..376c054cb 100644 --- a/src/hash.c +++ b/src/hash.c @@ -1057,6 +1057,8 @@ mrb_hash_shift(mrb_state *mrb, mrb_value hash) mrb_value del_key, del_val; sg_shift(mrb, sg, &del_key, &del_val); + mrb_gc_protect(mrb, del_key); + mrb_gc_protect(mrb, del_val); return mrb_assoc_new(mrb, del_key, del_val); } -- cgit v1.2.3 From 25d390d6ccf3da7a853ddd8272dd318ecadca316 Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Mon, 19 Nov 2018 09:30:15 +0900 Subject: Improve Hash table using variable sized segments. --- include/mruby/hash.h | 2 +- src/hash.c | 308 +++++++++++++++++++++++++++------------------------ 2 files changed, 162 insertions(+), 148 deletions(-) (limited to 'src/hash.c') diff --git a/include/mruby/hash.h b/include/mruby/hash.h index dacdd9376..7811894ae 100644 --- a/include/mruby/hash.h +++ b/include/mruby/hash.h @@ -18,7 +18,7 @@ MRB_BEGIN_DECL struct RHash { MRB_OBJECT_HEADER; struct iv_tbl *iv; - struct seglist *ht; + struct htable *ht; }; #define mrb_hash_ptr(v) ((struct RHash*)(mrb_ptr(v))) diff --git a/src/hash.c b/src/hash.c index 376c054cb..f43fd901c 100644 --- a/src/hash.c +++ b/src/hash.c @@ -17,11 +17,12 @@ mrb_int mrb_float_id(mrb_float f); #endif /* return non zero to break the loop */ -typedef int (sg_foreach_func)(mrb_state *mrb,mrb_value key, mrb_value val, void *data); +typedef int (ht_foreach_func)(mrb_state *mrb,mrb_value key, mrb_value val, void *data); -#ifndef MRB_SG_SEGMENT_SIZE -#define MRB_SG_SEGMENT_SIZE 5 +#ifndef MRB_HT_INIT_SIZE +#define MRB_HT_INIT_SIZE 4 #endif +#define HT_SEG_INCREASE_RATIO 1.2 struct segkv { mrb_value key; @@ -29,8 +30,9 @@ struct segkv { }; typedef struct segment { + uint16_t size; struct segment *next; - struct segkv e[MRB_SG_SEGMENT_SIZE]; + struct segkv e[]; } segment; typedef struct segindex { @@ -40,16 +42,16 @@ typedef struct segindex { } segindex; /* Instance variable table structure */ -typedef struct seglist { +typedef struct htable { segment *rootseg; segment *lastseg; mrb_int size; - mrb_int last_len; + uint16_t last_len; segindex *index; -} seglist; +} htable; static /* inline */ size_t -sg_hash_func(mrb_state *mrb, seglist *t, mrb_value key) +ht_hash_func(mrb_state *mrb, htable *t, mrb_value key) { enum mrb_vtype tt = mrb_type(key); mrb_value hv; @@ -84,7 +86,7 @@ sg_hash_func(mrb_state *mrb, seglist *t, mrb_value key) } static inline mrb_bool -sg_hash_equal(mrb_state *mrb, seglist *t, mrb_value a, mrb_value b) +ht_hash_equal(mrb_state *mrb, htable *t, mrb_value a, mrb_value b) { enum mrb_vtype tt = mrb_type(a); @@ -134,12 +136,12 @@ sg_hash_equal(mrb_state *mrb, seglist *t, mrb_value a, mrb_value b) } /* Creates the instance variable table. */ -static seglist* -sg_new(mrb_state *mrb) +static htable* +ht_new(mrb_state *mrb) { - seglist *t; + htable *t; - t = (seglist*)mrb_malloc(mrb, sizeof(seglist)); + t = (htable*)mrb_malloc(mrb, sizeof(htable)); t->size = 0; t->rootseg = NULL; t->lastseg = NULL; @@ -163,11 +165,11 @@ sg_new(mrb_state *mrb) #define UPPER_BOUND(x) ((x)>>2|(x)>>1) #endif -#define SG_MASK(index) ((index->capa)-1) +#define HT_MASK(index) ((index->capa)-1) -/* Build index for the segment list */ +/* Build index for the hash table */ static void -sg_index(mrb_state *mrb, seglist *t) +ht_index(mrb_state *mrb, htable *t) { size_t size = (size_t)t->size; size_t mask; @@ -175,14 +177,6 @@ sg_index(mrb_state *mrb, seglist *t) segment *seg; size_t i; - if (size < MRB_SG_SEGMENT_SIZE) { - if (index) { - failed: - mrb_free(mrb, index); - t->index = NULL; - } - return; - } /* allocate index table */ if (index && index->size >= UPPER_BOUND(index->capa)) { size = index->capa+1; @@ -190,7 +184,11 @@ sg_index(mrb_state *mrb, seglist *t) power2(size); if (!index || index->capa < size) { index = (segindex*)mrb_realloc_simple(mrb, index, sizeof(segindex)+sizeof(struct segkv*)*size); - if (index == NULL) goto failed; + if (index == NULL) { + mrb_free(mrb, index); + t->index = NULL; + return; + } t->index = index; } index->size = t->size; @@ -200,10 +198,10 @@ sg_index(mrb_state *mrb, seglist *t) } /* rebuld index */ - mask = SG_MASK(index); + mask = HT_MASK(index); seg = t->rootseg; while (seg) { - for (i=0; isize; i++) { mrb_value key; size_t k, step = 0; @@ -212,7 +210,7 @@ sg_index(mrb_state *mrb, seglist *t) } key = seg->e[i].key; if (mrb_undef_p(key)) continue; - k = sg_hash_func(mrb, t, key) & mask; + k = ht_hash_func(mrb, t, key) & mask; while (index->table[k]) { k = (k+(++step)) & mask; } @@ -222,9 +220,9 @@ sg_index(mrb_state *mrb, seglist *t) } } -/* Compacts the segment list removing deleted entries. */ +/* Compacts the hash table removing deleted entries. */ static void -sg_compact(mrb_state *mrb, seglist *t) +ht_compact(mrb_state *mrb, htable *t) { segment *seg; mrb_int i; @@ -235,11 +233,11 @@ sg_compact(mrb_state *mrb, seglist *t) if (t == NULL) return; seg = t->rootseg; if (t->index && (size_t)t->size == t->index->size) { - sg_index(mrb, t); + ht_index(mrb, t); return; } while (seg) { - for (i=0; isize; i++) { mrb_value k = seg->e[i].key; if (!seg->next && i >= t->last_len) { @@ -255,7 +253,7 @@ sg_compact(mrb_state *mrb, seglist *t) size++; if (seg2 != NULL) { seg2->e[i2++] = seg->e[i]; - if (i2 >= MRB_SG_SEGMENT_SIZE) { + if (i2 >= seg2->size) { seg2 = seg2->next; i2 = 0; } @@ -279,13 +277,31 @@ sg_compact(mrb_state *mrb, seglist *t) } } if (t->index) { - sg_index(mrb, t); + ht_index(mrb, t); } } -/* Set the value for the key in the indexed segment list. */ +static segment* +segment_alloc(mrb_state *mrb, segment *seg) +{ + uint32_t size; + + if (!seg) size = MRB_HT_INIT_SIZE; + else { + size = seg->size*HT_SEG_INCREASE_RATIO + 1; + if (size > UINT16_MAX) size = UINT16_MAX; + } + + seg = (segment*)mrb_malloc(mrb, sizeof(segment)+sizeof(struct segkv)*size); + seg->size = size; + seg->next = NULL; + + return seg; +} + +/* Set the value for the key in the indexed table. */ static void -sg_index_put(mrb_state *mrb, seglist *t, mrb_value key, mrb_value val) +ht_index_put(mrb_state *mrb, htable *t, mrb_value key, mrb_value val) { segindex *index = t->index; size_t k, sp, step = 0, mask; @@ -293,18 +309,18 @@ sg_index_put(mrb_state *mrb, seglist *t, mrb_value key, mrb_value val) if (index->size >= UPPER_BOUND(index->capa)) { /* need to expand table */ - sg_compact(mrb, t); + ht_compact(mrb, t); index = t->index; } - mask = SG_MASK(index); + mask = HT_MASK(index); sp = index->capa; - k = sg_hash_func(mrb, t, key) & mask; + k = ht_hash_func(mrb, t, key) & mask; while (index->table[k]) { mrb_value key2 = index->table[k]->key; if (mrb_undef_p(key2)) { if (sp == index->capa) sp = k; } - else if (sg_hash_equal(mrb, t, key, key2)) { + else if (ht_hash_equal(mrb, t, key, key2)) { index->table[k]->val = val; return; } @@ -316,11 +332,11 @@ sg_index_put(mrb_state *mrb, seglist *t, mrb_value key, mrb_value val) /* put the value at the last */ seg = t->lastseg; - if (t->last_len < MRB_SG_SEGMENT_SIZE) { + if (t->last_len < seg->size) { index->table[k] = &seg->e[t->last_len++]; } else { /* append a new segment */ - seg->next = (segment*)mrb_malloc(mrb, sizeof(segment)); + seg->next = segment_alloc(mrb, seg); seg = seg->next; seg->next = NULL; t->lastseg = seg; @@ -333,21 +349,21 @@ sg_index_put(mrb_state *mrb, seglist *t, mrb_value key, mrb_value val) t->size++; } -/* Set the value for the key in the segment list. */ +/* Set the value for the key in the table. */ static void -sg_put(mrb_state *mrb, seglist *t, mrb_value key, mrb_value val) +ht_put(mrb_state *mrb, htable *t, mrb_value key, mrb_value val) { segment *seg; mrb_int i, deleted = 0; if (t == NULL) return; if (t->index) { - sg_index_put(mrb, t, key, val); + ht_index_put(mrb, t, key, val); return; } seg = t->rootseg; while (seg) { - for (i=0; isize; i++) { mrb_value k = seg->e[i].key; /* Found room in last segment after last_len */ if (!seg->next && i >= t->last_len) { @@ -361,49 +377,58 @@ sg_put(mrb_state *mrb, seglist *t, mrb_value key, mrb_value val) deleted++; continue; } - if (sg_hash_equal(mrb, t, k, key)) { + if (ht_hash_equal(mrb, t, k, key)) { seg->e[i].val = val; return; } } seg = seg->next; } - /* Not found */ - if (deleted > MRB_SG_SEGMENT_SIZE) { - sg_compact(mrb, t); + + /* Compact if last segment has room */ + if (deleted > 0 && deleted > MRB_HT_INIT_SIZE) { + ht_compact(mrb, t); } t->size++; - seg = (segment*)mrb_malloc(mrb, sizeof(segment)); - seg->next = NULL; - seg->e[0].key = key; - seg->e[0].val = val; - t->last_len = 1; - if (t->rootseg == NULL) { - t->rootseg = seg; + /* check if thre's room after compaction */ + if (t->lastseg && t->last_len < t->lastseg->size) { + seg = t->lastseg; + i = t->last_len; } else { - t->lastseg->next = seg; + /* append new segment */ + seg = segment_alloc(mrb, t->lastseg); + i = 0; + if (t->rootseg == NULL) { + t->rootseg = seg; + } + else { + t->lastseg->next = seg; + } + t->lastseg = seg; } - t->lastseg = seg; - if (t->index == NULL && t->size > MRB_SG_SEGMENT_SIZE*4) { - sg_index(mrb, t); + seg->e[i].key = key; + seg->e[i].val = val; + t->last_len = i+1; + if (t->index == NULL && t->size > MRB_HT_INIT_SIZE*4) { + ht_index(mrb, t); } } -/* Get a value for a key from the indexed segment list. */ +/* Get a value for a key from the indexed table. */ static mrb_bool -sg_index_get(mrb_state *mrb, seglist *t, mrb_value key, mrb_value *vp) +ht_index_get(mrb_state *mrb, htable *t, mrb_value key, mrb_value *vp) { segindex *index = t->index; - size_t mask = SG_MASK(index); - size_t k = sg_hash_func(mrb, t, key) & mask; + size_t mask = HT_MASK(index); + size_t k = ht_hash_func(mrb, t, key) & mask; size_t step = 0; while (index->table[k]) { mrb_value key2 = index->table[k]->key; - if (!mrb_undef_p(key2) && sg_hash_equal(mrb, t, key, key2)) { + if (!mrb_undef_p(key2) && ht_hash_equal(mrb, t, key, key2)) { if (vp) *vp = index->table[k]->val; return TRUE; } @@ -412,28 +437,28 @@ sg_index_get(mrb_state *mrb, seglist *t, mrb_value key, mrb_value *vp) return FALSE; } -/* Get a value for a key from the segment list. */ +/* Get a value for a key from the hash table. */ static mrb_bool -sg_get(mrb_state *mrb, seglist *t, mrb_value key, mrb_value *vp) +ht_get(mrb_state *mrb, htable *t, mrb_value key, mrb_value *vp) { segment *seg; mrb_int i; if (t == NULL) return FALSE; if (t->index) { - return sg_index_get(mrb, t, key, vp); + return ht_index_get(mrb, t, key, vp); } seg = t->rootseg; while (seg) { - for (i=0; isize; i++) { mrb_value k = seg->e[i].key; if (!seg->next && i >= t->last_len) { return FALSE; } if (mrb_undef_p(k)) continue; - if (sg_hash_equal(mrb, t, k, key)) { + if (ht_hash_equal(mrb, t, k, key)) { if (vp) *vp = seg->e[i].val; return TRUE; } @@ -446,7 +471,7 @@ sg_get(mrb_state *mrb, seglist *t, mrb_value key, mrb_value *vp) /* Deletes the value for the symbol from the instance variable table. */ /* Deletion is done by overwriting keys by `undef`. */ static mrb_bool -sg_del(mrb_state *mrb, seglist *t, mrb_value key, mrb_value *vp) +ht_del(mrb_state *mrb, htable *t, mrb_value key, mrb_value *vp) { segment *seg; mrb_int i; @@ -454,7 +479,7 @@ sg_del(mrb_state *mrb, seglist *t, mrb_value key, mrb_value *vp) if (t == NULL) return FALSE; seg = t->rootseg; while (seg) { - for (i=0; isize; i++) { mrb_value key2; if (!seg->next && i >= t->last_len) { @@ -462,7 +487,7 @@ sg_del(mrb_state *mrb, seglist *t, mrb_value key, mrb_value *vp) return FALSE; } key2 = seg->e[i].key; - if (!mrb_undef_p(key2) && sg_hash_equal(mrb, t, key, key2)) { + if (!mrb_undef_p(key2) && ht_hash_equal(mrb, t, key, key2)) { if (vp) *vp = seg->e[i].val; seg->e[i].key = mrb_undef_value(); t->size--; @@ -476,18 +501,15 @@ sg_del(mrb_state *mrb, seglist *t, mrb_value key, mrb_value *vp) /* Iterates over the instance variable table. */ static void -sg_foreach(mrb_state *mrb, seglist *t, sg_foreach_func *func, void *p) +ht_foreach(mrb_state *mrb, htable *t, ht_foreach_func *func, void *p) { segment *seg; mrb_int i; if (t == NULL) return; - if (t->index && t->index->size-(size_t)t->size > MRB_SG_SEGMENT_SIZE) { - sg_compact(mrb, t); - } seg = t->rootseg; while (seg) { - for (i=0; isize; i++) { /* no value in last segment after last_len */ if (!seg->next && i >= t->last_len) { return; @@ -500,34 +522,26 @@ sg_foreach(mrb_state *mrb, seglist *t, sg_foreach_func *func, void *p) } } -/* Get the size of the instance variable table. */ -static mrb_int -sg_size(mrb_state *mrb, seglist *t) -{ - if (t == NULL) return 0; - return t->size; -} - /* Copy the instance variable table. */ -static seglist* -sg_copy(mrb_state *mrb, seglist *t) +static htable* +ht_copy(mrb_state *mrb, htable *t) { segment *seg; - seglist *t2; + htable *t2; mrb_int i; seg = t->rootseg; - t2 = sg_new(mrb); + t2 = ht_new(mrb); - while (seg != NULL) { - for (i=0; isize; i++) { mrb_value key = seg->e[i].key; mrb_value val = seg->e[i].val; if ((seg->next == NULL) && (i >= t->last_len)) { return t2; } - sg_put(mrb, t2, key, val); + ht_put(mrb, t2, key, val); } seg = seg->next; } @@ -536,7 +550,7 @@ sg_copy(mrb_state *mrb, seglist *t) /* Free memory of the instance variable table. */ static void -sg_free(mrb_state *mrb, seglist *t) +ht_free(mrb_state *mrb, htable *t) { segment *seg; @@ -576,20 +590,20 @@ hash_mark_i(mrb_state *mrb, mrb_value key, mrb_value val, void *p) void mrb_gc_mark_hash(mrb_state *mrb, struct RHash *hash) { - sg_foreach(mrb, hash->ht, hash_mark_i, NULL); + ht_foreach(mrb, hash->ht, hash_mark_i, NULL); } size_t mrb_gc_mark_hash_size(mrb_state *mrb, struct RHash *hash) { if (!hash->ht) return 0; - return sg_size(mrb, hash->ht)*2; + return hash->ht->size*2; } void mrb_gc_free_hash(mrb_state *mrb, struct RHash *hash) { - sg_free(mrb, hash->ht); + ht_free(mrb, hash->ht); } MRB_API mrb_value @@ -609,8 +623,8 @@ mrb_hash_new_capa(mrb_state *mrb, mrb_int capa) struct RHash *h; h = (struct RHash*)mrb_obj_alloc(mrb, MRB_TT_HASH, mrb->hash_class); - /* preallocate segment list */ - h->ht = sg_new(mrb); + /* preallocate hash table */ + h->ht = ht_new(mrb); /* capacity ignored */ h->iv = 0; return mrb_obj_value(h); @@ -624,7 +638,7 @@ mrb_hash_init_copy(mrb_state *mrb, mrb_value self) { mrb_value orig; struct RHash* copy; - seglist *orig_h; + htable *orig_h; mrb_value ifnone, vret; mrb_get_args(mrb, "o", &orig); @@ -635,7 +649,7 @@ mrb_hash_init_copy(mrb_state *mrb, mrb_value self) orig_h = RHASH_TBL(self); copy = (struct RHash*)mrb_obj_alloc(mrb, MRB_TT_HASH, mrb->hash_class); - copy->ht = sg_copy(mrb, orig_h); + copy->ht = ht_copy(mrb, orig_h); if (MRB_RHASH_DEFAULT_P(self)) { copy->flags |= MRB_HASH_DEFAULT; @@ -663,22 +677,22 @@ check_kdict_i(mrb_state *mrb, mrb_value key, mrb_value val, void *data) void mrb_hash_check_kdict(mrb_state *mrb, mrb_value self) { - seglist *sg; + htable *t; - sg = RHASH_TBL(self); - if (!sg || sg_size(mrb, sg) == 0) return; - sg_foreach(mrb, sg, check_kdict_i, NULL); + t = RHASH_TBL(self); + if (!t || t->size == 0) return; + ht_foreach(mrb, t, check_kdict_i, NULL); } MRB_API mrb_value mrb_hash_dup(mrb_state *mrb, mrb_value self) { struct RHash* copy; - seglist *orig_h; + htable *orig_h; orig_h = RHASH_TBL(self); copy = (struct RHash*)mrb_obj_alloc(mrb, MRB_TT_HASH, mrb->hash_class); - copy->ht = orig_h ? sg_copy(mrb, orig_h) : NULL; + copy->ht = orig_h ? ht_copy(mrb, orig_h) : NULL; return mrb_obj_value(copy); } @@ -688,7 +702,7 @@ mrb_hash_get(mrb_state *mrb, mrb_value hash, mrb_value key) mrb_value val; mrb_sym mid; - if (sg_get(mrb, RHASH_TBL(hash), key, &val)) { + if (ht_get(mrb, RHASH_TBL(hash), key, &val)) { return val; } @@ -705,7 +719,7 @@ mrb_hash_fetch(mrb_state *mrb, mrb_value hash, mrb_value key, mrb_value def) { mrb_value val; - if (sg_get(mrb, RHASH_TBL(hash), key, &val)) { + if (ht_get(mrb, RHASH_TBL(hash), key, &val)) { return val; } /* not found */ @@ -718,7 +732,7 @@ mrb_hash_set(mrb_state *mrb, mrb_value hash, mrb_value key, mrb_value val) mrb_hash_modify(mrb, hash); key = KEY(key); - sg_put(mrb, RHASH_TBL(hash), key, val); + ht_put(mrb, RHASH_TBL(hash), key, val); mrb_field_write_barrier_value(mrb, (struct RBasic*)RHASH(hash), key); mrb_field_write_barrier_value(mrb, (struct RBasic*)RHASH(hash), val); return; @@ -744,7 +758,7 @@ mrb_hash_modify(mrb_state *mrb, mrb_value hash) } if (!RHASH_TBL(hash)) { - RHASH_TBL(hash) = sg_new(mrb); + RHASH_TBL(hash) = ht_new(mrb); } } @@ -985,10 +999,10 @@ mrb_hash_set_default_proc(mrb_state *mrb, mrb_value hash) MRB_API mrb_value mrb_hash_delete_key(mrb_state *mrb, mrb_value hash, mrb_value key) { - seglist *sg = RHASH_TBL(hash); + htable *t = RHASH_TBL(hash); mrb_value del_val; - if (sg_del(mrb, sg, key, &del_val)) { + if (ht_del(mrb, t, key, &del_val)) { return del_val; } @@ -1006,15 +1020,15 @@ mrb_hash_delete(mrb_state *mrb, mrb_value self) return mrb_hash_delete_key(mrb, self, key); } -/* find first element in segment list, and remove it. */ +/* find first element in a hash table, and remove it. */ static void -sg_shift(mrb_state *mrb, seglist *t, mrb_value *kp, mrb_value *vp) +ht_shift(mrb_state *mrb, htable *t, mrb_value *kp, mrb_value *vp) { segment *seg = t->rootseg; mrb_int i; while (seg) { - for (i=0; isize; i++) { mrb_value key; if (!seg->next && i >= t->last_len) { @@ -1050,13 +1064,13 @@ sg_shift(mrb_state *mrb, seglist *t, mrb_value *kp, mrb_value *vp) static mrb_value mrb_hash_shift(mrb_state *mrb, mrb_value hash) { - seglist *sg = RHASH_TBL(hash); + htable *t = RHASH_TBL(hash); mrb_hash_modify(mrb, hash); - if (sg && sg_size(mrb, sg) > 0) { + if (t && t->size > 0) { mrb_value del_key, del_val; - sg_shift(mrb, sg, &del_key, &del_val); + ht_shift(mrb, t, &del_key, &del_val); mrb_gc_protect(mrb, del_key); mrb_gc_protect(mrb, del_val); return mrb_assoc_new(mrb, del_key, del_val); @@ -1088,11 +1102,11 @@ mrb_hash_shift(mrb_state *mrb, mrb_value hash) MRB_API mrb_value mrb_hash_clear(mrb_state *mrb, mrb_value hash) { - seglist *sg = RHASH_TBL(hash); + htable *t = RHASH_TBL(hash); mrb_hash_modify(mrb, hash); - if (sg) { - sg_free(mrb, sg); + if (t) { + ht_free(mrb, t); RHASH_TBL(hash) = NULL; } return hash; @@ -1144,19 +1158,19 @@ mrb_hash_aset(mrb_state *mrb, mrb_value self) static mrb_value mrb_hash_size_m(mrb_state *mrb, mrb_value self) { - seglist *sg = RHASH_TBL(self); + htable *t = RHASH_TBL(self); - if (!sg) return mrb_fixnum_value(0); - return mrb_fixnum_value(sg_size(mrb, sg)); + if (!t) return mrb_fixnum_value(0); + return mrb_fixnum_value(t->size); } MRB_API mrb_bool mrb_hash_empty_p(mrb_state *mrb, mrb_value self) { - seglist *sg = RHASH_TBL(self); + htable *t = RHASH_TBL(self); - if (!sg) return TRUE; - return sg_size(mrb, sg) == 0; + if (!t) return TRUE; + return t->size == 0; } /* 15.2.13.4.12 */ @@ -1212,14 +1226,14 @@ hash_keys_i(mrb_state *mrb, mrb_value key, mrb_value val, void *p) MRB_API mrb_value mrb_hash_keys(mrb_state *mrb, mrb_value hash) { - seglist *sg = RHASH_TBL(hash); - size_t size; + htable *t = RHASH_TBL(hash); + mrb_int size; mrb_value ary; - if (!sg || (size = sg_size(mrb, sg)) == 0) + if (!t || (size = t->size) == 0) return mrb_ary_new(mrb); ary = mrb_ary_new_capa(mrb, size); - sg_foreach(mrb, sg, hash_keys_i, (void*)&ary); + ht_foreach(mrb, t, hash_keys_i, (void*)&ary); return ary; } @@ -1246,14 +1260,14 @@ hash_vals_i(mrb_state *mrb, mrb_value key, mrb_value val, void *p) MRB_API mrb_value mrb_hash_values(mrb_state *mrb, mrb_value hash) { - seglist *sg = RHASH_TBL(hash); - size_t size; + htable *t = RHASH_TBL(hash); + mrb_int size; mrb_value ary; - if (!sg || (size = sg_size(mrb, sg)) == 0) + if (!t || (size = t->size) == 0) return mrb_ary_new(mrb); ary = mrb_ary_new_capa(mrb, size); - sg_foreach(mrb, sg, hash_vals_i, (void*)&ary); + ht_foreach(mrb, t, hash_vals_i, (void*)&ary); return ary; } @@ -1279,10 +1293,10 @@ mrb_hash_values(mrb_state *mrb, mrb_value hash) MRB_API mrb_bool mrb_hash_key_p(mrb_state *mrb, mrb_value hash, mrb_value key) { - seglist *sg; + htable *t; - sg = RHASH_TBL(hash); - if (sg_get(mrb, sg, key, NULL)) { + t = RHASH_TBL(hash); + if (ht_get(mrb, t, key, NULL)) { return TRUE; } return FALSE; @@ -1340,23 +1354,23 @@ mrb_hash_has_value(mrb_state *mrb, mrb_value hash) mrb_get_args(mrb, "o", &val); arg.found = FALSE; arg.val = val; - sg_foreach(mrb, RHASH_TBL(hash), hash_has_value_i, &arg); + ht_foreach(mrb, RHASH_TBL(hash), hash_has_value_i, &arg); return mrb_bool_value(arg.found); } static int merge_i(mrb_state *mrb, mrb_value key, mrb_value val, void *data) { - seglist *h1 = (seglist*)data; + htable *h1 = (htable*)data; - sg_put(mrb, h1, key, val); + ht_put(mrb, h1, key, val); return 0; } MRB_API void mrb_hash_merge(mrb_state *mrb, mrb_value hash1, mrb_value hash2) { - seglist *h1, *h2; + htable *h1, *h2; mrb_hash_modify(mrb, hash1); hash2 = mrb_ensure_hash_type(mrb, hash2); @@ -1365,10 +1379,10 @@ mrb_hash_merge(mrb_state *mrb, mrb_value hash1, mrb_value hash2) if (!h2) return; if (!h1) { - RHASH_TBL(hash1) = sg_copy(mrb, h2); + RHASH_TBL(hash1) = ht_copy(mrb, h2); return; } - sg_foreach(mrb, h2, merge_i, h1); + ht_foreach(mrb, h2, merge_i, h1); mrb_write_barrier(mrb, (struct RBasic*)RHASH(hash1)); return; } @@ -1389,7 +1403,7 @@ mrb_hash_merge(mrb_state *mrb, mrb_value hash1, mrb_value hash2) static mrb_value mrb_hash_rehash(mrb_state *mrb, mrb_value self) { - sg_compact(mrb, RHASH_TBL(self)); + ht_compact(mrb, RHASH_TBL(self)); return self; } -- cgit v1.2.3 From 610bcc88c2b4f3ca9bbfebb57279c25806fa0461 Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Wed, 19 Sep 2018 22:53:48 +0900 Subject: Removed `to_hash` conversion method. --- mrbgems/mruby-hash-ext/mrblib/hash.rb | 30 +++++++----------------------- mrbgems/mruby-kernel-ext/src/kernel.c | 20 ++++++-------------- mrbgems/mruby-sprintf/src/sprintf.c | 2 +- mrblib/hash.rb | 14 ++++---------- src/class.c | 15 ++------------- src/hash.c | 28 ---------------------------- src/object.c | 17 +++++++++++++++++ 7 files changed, 37 insertions(+), 89 deletions(-) (limited to 'src/hash.c') diff --git a/mrbgems/mruby-hash-ext/mrblib/hash.rb b/mrbgems/mruby-hash-ext/mrblib/hash.rb index f1143e25b..547f3404a 100644 --- a/mrbgems/mruby-hash-ext/mrblib/hash.rb +++ b/mrbgems/mruby-hash-ext/mrblib/hash.rb @@ -27,9 +27,9 @@ class Hash length = object.length if length == 1 o = object[0] - if o.respond_to?(:to_hash) + if Hash === o h = self.new - object[0].to_hash.each { |k, v| h[k] = v } + o.each { |k, v| h[k] = v } return h elsif o.respond_to?(:to_a) h = self.new @@ -82,7 +82,7 @@ class Hash # def merge!(other, &block) - raise TypeError, "can't convert argument into Hash" unless other.respond_to?(:to_hash) + raise TypeError, "Hash required (#{other.class} given)" unless Hash === other if block other.each_key{|k| self[k] = (self.has_key?(k))? block.call(k, self[k], other[k]): other[k] @@ -310,11 +310,7 @@ class Hash # h1 < h1 #=> false # def <(hash) - begin - hash = hash.to_hash - rescue NoMethodError - raise TypeError, "can't convert #{hash.class} to Hash" - end + raise TypeError, "can't convert #{hash.class} to Hash" unless Hash === hash size < hash.size and all? {|key, val| hash.key?(key) and hash[key] == val } @@ -334,11 +330,7 @@ class Hash # h1 <= h1 #=> true # def <=(hash) - begin - hash = hash.to_hash - rescue NoMethodError - raise TypeError, "can't convert #{hash.class} to Hash" - end + raise TypeError, "can't convert #{hash.class} to Hash" unless Hash === hash size <= hash.size and all? {|key, val| hash.key?(key) and hash[key] == val } @@ -358,11 +350,7 @@ class Hash # h1 > h1 #=> false # def >(hash) - begin - hash = hash.to_hash - rescue NoMethodError - raise TypeError, "can't convert #{hash.class} to Hash" - end + raise TypeError, "can't convert #{hash.class} to Hash" unless Hash === hash size > hash.size and hash.all? {|key, val| key?(key) and self[key] == val } @@ -382,11 +370,7 @@ class Hash # h1 >= h1 #=> true # def >=(hash) - begin - hash = hash.to_hash - rescue NoMethodError - raise TypeError, "can't convert #{hash.class} to Hash" - end + raise TypeError, "can't convert #{hash.class} to Hash" unless Hash === hash size >= hash.size and hash.all? {|key, val| key?(key) and self[key] == val } diff --git a/mrbgems/mruby-kernel-ext/src/kernel.c b/mrbgems/mruby-kernel-ext/src/kernel.c index 324753f6e..99affbfa4 100644 --- a/mrbgems/mruby-kernel-ext/src/kernel.c +++ b/mrbgems/mruby-kernel-ext/src/kernel.c @@ -184,9 +184,9 @@ mrb_f_array(mrb_state *mrb, mrb_value self) * call-seq: * Hash(arg) -> hash * - * Converts arg to a Hash by calling - * arg.to_hash. Returns an empty Hash when - * arg is nil or []. + * Returns a Hash if arg is a Hash. + * Returns an empty Hash when arg is nil + * or []. * * Hash([]) #=> {} * Hash(nil) #=> {} @@ -197,21 +197,13 @@ mrb_f_array(mrb_state *mrb, mrb_value self) static mrb_value mrb_f_hash(mrb_state *mrb, mrb_value self) { - mrb_value arg, tmp; + mrb_value arg; mrb_get_args(mrb, "o", &arg); - if (mrb_nil_p(arg)) { + if (mrb_nil_p(arg) || (mrb_array_p(arg) && RARRAY_LEN(arg) == 0)) { return mrb_hash_new(mrb); } - tmp = mrb_check_convert_type(mrb, arg, MRB_TT_HASH, "Hash", "to_hash"); - if (mrb_nil_p(tmp)) { - if (mrb_array_p(arg) && RARRAY_LEN(arg) == 0) { - return mrb_hash_new(mrb); - } - mrb_raisef(mrb, E_TYPE_ERROR, "can't convert %S into Hash", - mrb_str_new_cstr(mrb, mrb_obj_classname(mrb, arg))); - } - return tmp; + return mrb_ensure_hash_type(mrb, arg); } /* diff --git a/mrbgems/mruby-sprintf/src/sprintf.c b/mrbgems/mruby-sprintf/src/sprintf.c index 15d7b5464..5a4a7899e 100644 --- a/mrbgems/mruby-sprintf/src/sprintf.c +++ b/mrbgems/mruby-sprintf/src/sprintf.c @@ -233,7 +233,7 @@ get_hash(mrb_state *mrb, mrb_value *hash, mrb_int argc, const mrb_value *argv) if (argc != 2) { mrb_raise(mrb, E_ARGUMENT_ERROR, "one hash required"); } - tmp = mrb_check_convert_type(mrb, argv[1], MRB_TT_HASH, "Hash", "to_hash"); + tmp = mrb_check_hash_type(mrb, argv[1]); if (mrb_nil_p(tmp)) { mrb_raise(mrb, E_ARGUMENT_ERROR, "one hash required"); } diff --git a/mrblib/hash.rb b/mrblib/hash.rb index 6b61218ff..609883ecb 100644 --- a/mrblib/hash.rb +++ b/mrblib/hash.rb @@ -12,9 +12,7 @@ class Hash # ISO 15.2.13.4.1 def ==(hash) return true if self.equal?(hash) - begin - hash = hash.to_hash - rescue NoMethodError + unless Hash === hash return false end return false if self.size != hash.size @@ -32,9 +30,7 @@ class Hash # ISO 15.2.13.4.32 (x) def eql?(hash) return true if self.equal?(hash) - begin - hash = hash.to_hash - rescue NoMethodError + unless Hash === hash return false end return false if self.size != hash.size @@ -153,9 +149,8 @@ class Hash # # ISO 15.2.13.4.23 def replace(hash) - raise TypeError, "can't convert argument into Hash" unless hash.respond_to?(:to_hash) + raise TypeError, "Hash required (#{hash.class} given)" unless Hash === hash self.clear - hash = hash.to_hash hash.each_key{|k| self[k] = hash[k] } @@ -178,8 +173,7 @@ class Hash # # ISO 15.2.13.4.22 def merge(other, &block) - raise TypeError, "can't convert argument into Hash" unless other.respond_to?(:to_hash) - other = other.to_hash + raise TypeError, "Hash required (#{other.class} given)" unless Hash === other h = self.dup if block other.each_key{|k| diff --git a/src/class.c b/src/class.c index 5d6ff4b39..dd5b65cc3 100644 --- a/src/class.c +++ b/src/class.c @@ -492,18 +492,6 @@ mrb_notimplement_m(mrb_state *mrb, mrb_value self) return mrb_nil_value(); } -static mrb_value -check_type(mrb_state *mrb, mrb_value val, enum mrb_vtype t, const char *c, const char *m) -{ - mrb_value tmp; - - tmp = mrb_check_convert_type(mrb, val, t, c, m); - if (mrb_nil_p(tmp)) { - mrb_raisef(mrb, E_TYPE_ERROR, "expected %S", mrb_str_new_cstr(mrb, c)); - } - return tmp; -} - #define CHECK_TYPE(mrb, val, t, c) do { \ if (mrb_type(val) != (t)) {\ mrb_raisef(mrb, E_TYPE_ERROR, "expected %S", mrb_str_new_lit(mrb, c));\ @@ -527,7 +515,8 @@ to_ary(mrb_state *mrb, mrb_value val) static mrb_value to_hash(mrb_state *mrb, mrb_value val) { - return check_type(mrb, val, MRB_TT_HASH, "Hash", "to_hash"); + CHECK_TYPE(mrb, val, MRB_TT_HASH, "Hash"); + return val; } #define to_sym(mrb, ss) mrb_obj_to_sym(mrb, ss) diff --git a/src/hash.c b/src/hash.c index f43fd901c..467b20a51 100644 --- a/src/hash.c +++ b/src/hash.c @@ -738,18 +738,6 @@ mrb_hash_set(mrb_state *mrb, mrb_value hash, mrb_value key, mrb_value val) return; } -MRB_API mrb_value -mrb_ensure_hash_type(mrb_state *mrb, mrb_value hash) -{ - return mrb_convert_type(mrb, hash, MRB_TT_HASH, "Hash", "to_hash"); -} - -MRB_API mrb_value -mrb_check_hash_type(mrb_state *mrb, mrb_value hash) -{ - return mrb_check_convert_type(mrb, hash, MRB_TT_HASH, "Hash", "to_hash"); -} - static void mrb_hash_modify(mrb_state *mrb, mrb_value hash) { @@ -1189,20 +1177,6 @@ mrb_hash_empty_m(mrb_state *mrb, mrb_value self) return mrb_bool_value(mrb_hash_empty_p(mrb, self)); } -/* 15.2.13.4.29 (x)*/ -/* - * call-seq: - * hsh.to_hash => hsh - * - * Returns +self+. - */ - -static mrb_value -mrb_hash_to_hash(mrb_state *mrb, mrb_value hash) -{ - return hash; -} - static int hash_keys_i(mrb_state *mrb, mrb_value key, mrb_value val, void *p) { @@ -1439,6 +1413,4 @@ mrb_init_hash(mrb_state *mrb) mrb_define_method(mrb, h, "value?", mrb_hash_has_value, MRB_ARGS_REQ(1)); /* 15.2.13.4.27 */ mrb_define_method(mrb, h, "values", mrb_hash_values, MRB_ARGS_NONE()); /* 15.2.13.4.28 */ mrb_define_method(mrb, h, "rehash", mrb_hash_rehash, MRB_ARGS_NONE()); - - mrb_define_method(mrb, h, "to_hash", mrb_hash_to_hash, MRB_ARGS_NONE()); /* 15.2.13.4.29 (x)*/ } diff --git a/src/object.c b/src/object.c index a105c62f0..66dfa0f97 100644 --- a/src/object.c +++ b/src/object.c @@ -623,6 +623,23 @@ mrb_check_array_type(mrb_state *mrb, mrb_value ary) return ary; } +MRB_API mrb_value +mrb_ensure_hash_type(mrb_state *mrb, mrb_value hash) +{ + if (!mrb_hash_p(hash)) { + mrb_raisef(mrb, E_TYPE_ERROR, "%S cannot be converted to Hash", + inspect_type(mrb, hash)); + } + return hash; +} + +MRB_API mrb_value +mrb_check_hash_type(mrb_state *mrb, mrb_value hash) +{ + if (!mrb_hash_p(hash)) return mrb_nil_value(); + return hash; +} + MRB_API mrb_value mrb_inspect(mrb_state *mrb, mrb_value obj) { -- cgit v1.2.3 From c0d91a14e78de39b3940e56d4cccdbdb17b65694 Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Tue, 11 Dec 2018 09:01:05 +0900 Subject: Add API function `mrb_hash_foreach()` to iterate over items in a hash. --- include/mruby/hash.h | 4 ++++ src/hash.c | 10 +++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'src/hash.c') diff --git a/include/mruby/hash.h b/include/mruby/hash.h index 7811894ae..9b20bf9e6 100644 --- a/include/mruby/hash.h +++ b/include/mruby/hash.h @@ -210,6 +210,10 @@ void mrb_gc_mark_hash(mrb_state*, struct RHash*); size_t mrb_gc_mark_hash_size(mrb_state*, struct RHash*); void mrb_gc_free_hash(mrb_state*, struct RHash*); +/* return non zero to break the loop */ +typedef int (ht_foreach_func)(mrb_state *mrb, mrb_value key, mrb_value val, void *data); +MRB_API void mrb_hash_foreach(mrb_state *mrb, struct RHash *hash, ht_foreach_func *func, void *p); + MRB_END_DECL #endif /* MRUBY_HASH_H */ diff --git a/src/hash.c b/src/hash.c index 467b20a51..00d5ad2b1 100644 --- a/src/hash.c +++ b/src/hash.c @@ -16,9 +16,6 @@ mrb_int mrb_float_id(mrb_float f); #endif -/* return non zero to break the loop */ -typedef int (ht_foreach_func)(mrb_state *mrb,mrb_value key, mrb_value val, void *data); - #ifndef MRB_HT_INIT_SIZE #define MRB_HT_INIT_SIZE 4 #endif @@ -522,6 +519,13 @@ ht_foreach(mrb_state *mrb, htable *t, ht_foreach_func *func, void *p) } } +/* Iterates over the instance variable table. */ +MRB_API void +mrb_hash_foreach(mrb_state *mrb, struct RHash *hash, ht_foreach_func *func, void *p) +{ + ht_foreach(mrb, hash->ht, func, p); +} + /* Copy the instance variable table. */ static htable* ht_copy(mrb_state *mrb, htable *t) -- cgit v1.2.3 From 378c728338c7dd828daf32468b4ce4f73fae4cb6 Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Tue, 11 Dec 2018 09:04:08 +0900 Subject: Update comments. --- src/hash.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'src/hash.c') diff --git a/src/hash.c b/src/hash.c index 00d5ad2b1..5920fe2c3 100644 --- a/src/hash.c +++ b/src/hash.c @@ -38,7 +38,7 @@ typedef struct segindex { struct segkv *table[]; } segindex; -/* Instance variable table structure */ +/* hash table structure */ typedef struct htable { segment *rootseg; segment *lastseg; @@ -132,7 +132,7 @@ ht_hash_equal(mrb_state *mrb, htable *t, mrb_value a, mrb_value b) } } -/* Creates the instance variable table. */ +/* Creates the hash table. */ static htable* ht_new(mrb_state *mrb) { @@ -346,7 +346,7 @@ ht_index_put(mrb_state *mrb, htable *t, mrb_value key, mrb_value val) t->size++; } -/* Set the value for the key in the table. */ +/* Set the value for the key in the hash table. */ static void ht_put(mrb_state *mrb, htable *t, mrb_value key, mrb_value val) { @@ -465,7 +465,7 @@ ht_get(mrb_state *mrb, htable *t, mrb_value key, mrb_value *vp) return FALSE; } -/* Deletes the value for the symbol from the instance variable table. */ +/* Deletes the value for the symbol from the hash table. */ /* Deletion is done by overwriting keys by `undef`. */ static mrb_bool ht_del(mrb_state *mrb, htable *t, mrb_value key, mrb_value *vp) @@ -496,7 +496,7 @@ ht_del(mrb_state *mrb, htable *t, mrb_value key, mrb_value *vp) return FALSE; } -/* Iterates over the instance variable table. */ +/* Iterates over the hash table. */ static void ht_foreach(mrb_state *mrb, htable *t, ht_foreach_func *func, void *p) { @@ -519,14 +519,14 @@ ht_foreach(mrb_state *mrb, htable *t, ht_foreach_func *func, void *p) } } -/* Iterates over the instance variable table. */ +/* Iterates over the hash table. */ MRB_API void mrb_hash_foreach(mrb_state *mrb, struct RHash *hash, ht_foreach_func *func, void *p) { ht_foreach(mrb, hash->ht, func, p); } -/* Copy the instance variable table. */ +/* Copy the hash table. */ static htable* ht_copy(mrb_state *mrb, htable *t) { @@ -552,7 +552,7 @@ ht_copy(mrb_state *mrb, htable *t) return t2; } -/* Free memory of the instance variable table. */ +/* Free memory of the hash table. */ static void ht_free(mrb_state *mrb, htable *t) { @@ -1012,7 +1012,7 @@ mrb_hash_delete(mrb_state *mrb, mrb_value self) return mrb_hash_delete_key(mrb, self, key); } -/* find first element in a hash table, and remove it. */ +/* find first element in the hash table, and remove it. */ static void ht_shift(mrb_state *mrb, htable *t, mrb_value *kp, mrb_value *vp) { -- cgit v1.2.3 From 265171a28c2805ae49d0ad70d0f35220b4cb4d4b Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Tue, 11 Dec 2018 10:41:26 +0900 Subject: Rename `ht_foreach_func` to `mrb_hash_foreach_func`. --- include/mruby/hash.h | 4 ++-- src/hash.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src/hash.c') diff --git a/include/mruby/hash.h b/include/mruby/hash.h index 9b20bf9e6..911a96042 100644 --- a/include/mruby/hash.h +++ b/include/mruby/hash.h @@ -211,8 +211,8 @@ size_t mrb_gc_mark_hash_size(mrb_state*, struct RHash*); void mrb_gc_free_hash(mrb_state*, struct RHash*); /* return non zero to break the loop */ -typedef int (ht_foreach_func)(mrb_state *mrb, mrb_value key, mrb_value val, void *data); -MRB_API void mrb_hash_foreach(mrb_state *mrb, struct RHash *hash, ht_foreach_func *func, void *p); +typedef int (mrb_hash_foreach_func)(mrb_state *mrb, mrb_value key, mrb_value val, void *data); +MRB_API void mrb_hash_foreach(mrb_state *mrb, struct RHash *hash, mrb_hash_foreach_func *func, void *p); MRB_END_DECL diff --git a/src/hash.c b/src/hash.c index 5920fe2c3..cb4a274e8 100644 --- a/src/hash.c +++ b/src/hash.c @@ -498,7 +498,7 @@ ht_del(mrb_state *mrb, htable *t, mrb_value key, mrb_value *vp) /* Iterates over the hash table. */ static void -ht_foreach(mrb_state *mrb, htable *t, ht_foreach_func *func, void *p) +ht_foreach(mrb_state *mrb, htable *t, mrb_hash_foreach_func *func, void *p) { segment *seg; mrb_int i; @@ -521,7 +521,7 @@ ht_foreach(mrb_state *mrb, htable *t, ht_foreach_func *func, void *p) /* Iterates over the hash table. */ MRB_API void -mrb_hash_foreach(mrb_state *mrb, struct RHash *hash, ht_foreach_func *func, void *p) +mrb_hash_foreach(mrb_state *mrb, struct RHash *hash, mrb_hash_foreach_func *func, void *p) { ht_foreach(mrb, hash->ht, func, p); } -- cgit v1.2.3 From 60da293d5c4f4d00798f06ad955ea12311b6fd68 Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Tue, 11 Dec 2018 10:54:47 +0900 Subject: Avoid using floating point number for HT_SEG_INCREASE_RATIO; ref #4182 --- src/hash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/hash.c') diff --git a/src/hash.c b/src/hash.c index cb4a274e8..3d6c2ed7d 100644 --- a/src/hash.c +++ b/src/hash.c @@ -19,7 +19,7 @@ mrb_int mrb_float_id(mrb_float f); #ifndef MRB_HT_INIT_SIZE #define MRB_HT_INIT_SIZE 4 #endif -#define HT_SEG_INCREASE_RATIO 1.2 +#define HT_SEG_INCREASE_RATIO 6 / 5 struct segkv { mrb_value key; -- cgit v1.2.3 From 62dd4d89fc6da2c38a9bc1913c1f25566a9e443e Mon Sep 17 00:00:00 2001 From: dearblue Date: Fri, 14 Dec 2018 21:41:07 +0900 Subject: Add `mrb_hash_size()` function. --- include/mruby/hash.h | 13 +++++++++++++ src/hash.c | 14 ++++++++++---- 2 files changed, 23 insertions(+), 4 deletions(-) (limited to 'src/hash.c') diff --git a/include/mruby/hash.h b/include/mruby/hash.h index 911a96042..ce51b2016 100644 --- a/include/mruby/hash.h +++ b/include/mruby/hash.h @@ -165,6 +165,19 @@ MRB_API mrb_value mrb_hash_values(mrb_state *mrb, mrb_value hash); */ MRB_API mrb_value mrb_hash_clear(mrb_state *mrb, mrb_value hash); +/* + * Get hash size. + * + * Equivalent to: + * + * hash.size + * + * @param mrb The mruby state reference. + * @param hash The target hash. + * @return The hash size. + */ +MRB_API mrb_int mrb_hash_size(mrb_state *mrb, mrb_value hash); + /* * Copies the hash. * diff --git a/src/hash.c b/src/hash.c index 3d6c2ed7d..2f60eb74a 100644 --- a/src/hash.c +++ b/src/hash.c @@ -1133,6 +1133,15 @@ mrb_hash_aset(mrb_state *mrb, mrb_value self) return val; } +MRB_API mrb_int +mrb_hash_size(mrb_state *mrb, mrb_value hash) +{ + htable *t = RHASH_TBL(hash); + + if (!t) return 0; + return t->size; +} + /* 15.2.13.4.20 */ /* 15.2.13.4.25 */ /* @@ -1150,10 +1159,7 @@ mrb_hash_aset(mrb_state *mrb, mrb_value self) static mrb_value mrb_hash_size_m(mrb_state *mrb, mrb_value self) { - htable *t = RHASH_TBL(self); - - if (!t) return mrb_fixnum_value(0); - return mrb_fixnum_value(t->size); + return mrb_fixnum_value(mrb_hash_size(mrb, self)); } MRB_API mrb_bool -- cgit v1.2.3 From ccc5edc60dd8f60d4e829586ac5541cb6f4e9461 Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Mon, 17 Dec 2018 16:00:22 +0900 Subject: Small refactoring of #4188 --- src/hash.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/hash.c') diff --git a/src/hash.c b/src/hash.c index 2f60eb74a..6b92344c3 100644 --- a/src/hash.c +++ b/src/hash.c @@ -1159,7 +1159,8 @@ mrb_hash_size(mrb_state *mrb, mrb_value hash) static mrb_value mrb_hash_size_m(mrb_state *mrb, mrb_value self) { - return mrb_fixnum_value(mrb_hash_size(mrb, self)); + mrb_int size = mrb_hash_size(mrb, self); + return mrb_fixnum_value(size); } MRB_API mrb_bool -- cgit v1.2.3 From ef93ff64054f2aa7e4451387fb768fe2307247a9 Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Mon, 11 Feb 2019 15:19:20 +0900 Subject: Should not copy keys&values when a hash table is empty; fix #4270 --- src/hash.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/hash.c') diff --git a/src/hash.c b/src/hash.c index 6b92344c3..fd963c3de 100644 --- a/src/hash.c +++ b/src/hash.c @@ -536,6 +536,7 @@ ht_copy(mrb_state *mrb, htable *t) seg = t->rootseg; t2 = ht_new(mrb); + if (t->size == 0) return t2; while (seg) { for (i=0; isize; i++) { -- cgit v1.2.3 From e3beef065c2de80a843f329599b424676d83086c Mon Sep 17 00:00:00 2001 From: KOBAYASHI Shuji Date: Tue, 9 Apr 2019 18:23:11 +0900 Subject: Extract frozen checking to function --- include/mruby.h | 6 ++++++ mrbgems/mruby-struct/src/struct.c | 5 +---- src/array.c | 4 +--- src/class.c | 7 +------ src/error.c | 7 +++++++ src/hash.c | 5 +---- src/string.c | 12 ++---------- src/variable.c | 4 +--- 8 files changed, 20 insertions(+), 30 deletions(-) (limited to 'src/hash.c') diff --git a/include/mruby.h b/include/mruby.h index 5b0d84cd3..939601b4d 100644 --- a/include/mruby.h +++ b/include/mruby.h @@ -1143,6 +1143,7 @@ MRB_API mrb_noreturn void mrb_exc_raise(mrb_state *mrb, mrb_value exc); MRB_API mrb_noreturn void mrb_raise(mrb_state *mrb, struct RClass *c, const char *msg); MRB_API mrb_noreturn void mrb_raisef(mrb_state *mrb, struct RClass *c, const char *fmt, ...); MRB_API mrb_noreturn void mrb_name_error(mrb_state *mrb, mrb_sym id, const char *fmt, ...); +MRB_API mrb_noreturn void mrb_frozen_error(mrb_state *mrb, void *frozen_obj); MRB_API void mrb_warn(mrb_state *mrb, const char *fmt, ...); MRB_API mrb_noreturn void mrb_bug(mrb_state *mrb, const char *fmt, ...); MRB_API void mrb_print_backtrace(mrb_state *mrb); @@ -1196,6 +1197,11 @@ MRB_API mrb_value mrb_to_int(mrb_state *mrb, mrb_value val); MRB_API mrb_value mrb_to_str(mrb_state *mrb, mrb_value val); MRB_API void mrb_check_type(mrb_state *mrb, mrb_value x, enum mrb_vtype t); +static inline void mrb_check_frozen(mrb_state *mrb, void *o) +{ + if (MRB_FROZEN_P((struct RBasic*)o)) mrb_frozen_error(mrb, o); +} + typedef enum call_type { CALL_PUBLIC, CALL_FCALL, diff --git a/mrbgems/mruby-struct/src/struct.c b/mrbgems/mruby-struct/src/struct.c index c0ce71219..1df135a9f 100644 --- a/mrbgems/mruby-struct/src/struct.c +++ b/mrbgems/mruby-struct/src/struct.c @@ -87,10 +87,7 @@ mrb_struct_s_members_m(mrb_state *mrb, mrb_value klass) static void mrb_struct_modify(mrb_state *mrb, mrb_value strct) { - if (MRB_FROZEN_P(mrb_basic_ptr(strct))) { - mrb_raise(mrb, E_FROZEN_ERROR, "can't modify frozen struct"); - } - + mrb_check_frozen(mrb, mrb_basic_ptr(strct)); mrb_write_barrier(mrb, mrb_basic_ptr(strct)); } diff --git a/src/array.c b/src/array.c index 43f4c98b5..d4302cb22 100644 --- a/src/array.c +++ b/src/array.c @@ -120,9 +120,7 @@ ary_fill_with_nil(mrb_value *ptr, mrb_int size) static void ary_modify_check(mrb_state *mrb, struct RArray *a) { - if (MRB_FROZEN_P(a)) { - mrb_raise(mrb, E_FROZEN_ERROR, "can't modify frozen array"); - } + mrb_check_frozen(mrb, a); } static void diff --git a/src/class.c b/src/class.c index eaef787f7..fd56fa399 100644 --- a/src/class.c +++ b/src/class.c @@ -441,12 +441,7 @@ mrb_define_method_raw(mrb_state *mrb, struct RClass *c, mrb_sym mid, mrb_method_ MRB_CLASS_ORIGIN(c); h = c->mt; - if (MRB_FROZEN_P(c)) { - if (c->tt == MRB_TT_MODULE) - mrb_raise(mrb, E_FROZEN_ERROR, "can't modify frozen module"); - else - mrb_raise(mrb, E_FROZEN_ERROR, "can't modify frozen class"); - } + mrb_check_frozen(mrb, c); if (!h) h = c->mt = kh_init(mt, mrb); k = kh_put(mt, mrb, h, mid); kh_value(h, k) = m; diff --git a/src/error.c b/src/error.c index e69812dda..4c1b67c99 100644 --- a/src/error.c +++ b/src/error.c @@ -484,6 +484,13 @@ mrb_no_method_error(mrb_state *mrb, mrb_sym id, mrb_value args, char const* fmt, mrb_exc_raise(mrb, exc); } +MRB_API mrb_noreturn void +mrb_frozen_error(mrb_state *mrb, void *frozen_obj) +{ + mrb_raisef(mrb, E_FROZEN_ERROR, "can't modify frozen %S", + mrb_obj_value(mrb_class(mrb, mrb_obj_value(frozen_obj)))); +} + void mrb_init_exception(mrb_state *mrb) { diff --git a/src/hash.c b/src/hash.c index fd963c3de..c4820513b 100644 --- a/src/hash.c +++ b/src/hash.c @@ -746,10 +746,7 @@ mrb_hash_set(mrb_state *mrb, mrb_value hash, mrb_value key, mrb_value val) static void mrb_hash_modify(mrb_state *mrb, mrb_value hash) { - if (MRB_FROZEN_P(mrb_hash_ptr(hash))) { - mrb_raise(mrb, E_FROZEN_ERROR, "can't modify frozen hash"); - } - + mrb_check_frozen(mrb, mrb_hash_ptr(hash)); if (!RHASH_TBL(hash)) { RHASH_TBL(hash) = ht_new(mrb); } diff --git a/src/string.c b/src/string.c index 63c592d59..f7a805a94 100644 --- a/src/string.c +++ b/src/string.c @@ -493,20 +493,12 @@ str_index_str(mrb_state *mrb, mrb_value str, mrb_value str2, mrb_int offset) return mrb_str_index(mrb, str, ptr, len, offset); } -static void -check_frozen(mrb_state *mrb, struct RString *s) -{ - if (MRB_FROZEN_P(s)) { - mrb_raise(mrb, E_FROZEN_ERROR, "can't modify frozen string"); - } -} - static mrb_value str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2) { mrb_int len; - check_frozen(mrb, s1); + mrb_check_frozen(mrb, s1); if (s1 == s2) return mrb_obj_value(s1); s1->flags &= ~MRB_STR_NO_UTF; s1->flags |= s2->flags&MRB_STR_NO_UTF; @@ -646,7 +638,7 @@ mrb_locale_from_utf8(const char *utf8, int len) MRB_API void mrb_str_modify(mrb_state *mrb, struct RString *s) { - check_frozen(mrb, s); + mrb_check_frozen(mrb, s); s->flags &= ~MRB_STR_NO_UTF; if (RSTR_SHARED_P(s)) { mrb_shared_string *shared = s->as.heap.aux.shared; diff --git a/src/variable.c b/src/variable.c index 9edfb32bd..cf89a4a02 100644 --- a/src/variable.c +++ b/src/variable.c @@ -346,9 +346,7 @@ mrb_obj_iv_set(mrb_state *mrb, struct RObject *obj, mrb_sym sym, mrb_value v) { iv_tbl *t; - if (MRB_FROZEN_P(obj)) { - mrb_raisef(mrb, E_FROZEN_ERROR, "can't modify frozen %S", mrb_obj_value(obj)); - } + mrb_check_frozen(mrb, obj); assign_class_name(mrb, obj, sym, v); if (!obj->iv) { obj->iv = iv_new(mrb); -- cgit v1.2.3 From d328808892bf866f9ad72ba476fcee00edd06293 Mon Sep 17 00:00:00 2001 From: dearblue Date: Sun, 14 Apr 2019 17:55:20 +0900 Subject: Fix memory leak for hash table index if occur out of memory --- src/hash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/hash.c') diff --git a/src/hash.c b/src/hash.c index c4820513b..a9367a426 100644 --- a/src/hash.c +++ b/src/hash.c @@ -182,7 +182,7 @@ ht_index(mrb_state *mrb, htable *t) if (!index || index->capa < size) { index = (segindex*)mrb_realloc_simple(mrb, index, sizeof(segindex)+sizeof(struct segkv*)*size); if (index == NULL) { - mrb_free(mrb, index); + mrb_free(mrb, t->index); t->index = NULL; return; } -- cgit v1.2.3 From 28de6b0da195e1ebf8f6ce30de462f44fb761b8b Mon Sep 17 00:00:00 2001 From: KOBAYASHI Shuji Date: Sat, 22 Jun 2019 19:05:42 +0900 Subject: Refine `Hash#rehash` example [ci skip] Previous example doesn't work because string key (frozen) can't be modified. --- src/hash.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'src/hash.c') diff --git a/src/hash.c b/src/hash.c index a9367a426..b4d96251c 100644 --- a/src/hash.c +++ b/src/hash.c @@ -1378,10 +1378,21 @@ mrb_hash_merge(mrb_state *mrb, mrb_value hash1, mrb_value hash2) * values of key objects have changed since they were inserted, this * method will reindex hsh. * - * h = {"AAA" => "b"} - * h.keys[0].chop! - * h.rehash #=> {"AA"=>"b"} - * h["AA"] #=> "b" + * keys = (1..17).map{|n| [n]} + * k = keys[0] + * h = {} + * keys.each{|key| h[key] = key[0]} + * h #=> { [1]=> 1, [2]=> 2, [3]=> 3, [4]=> 4, [5]=> 5, [6]=> 6, [7]=> 7, + * [8]=> 8, [9]=> 9,[10]=>10,[11]=>11,[12]=>12,[13]=>13,[14]=>14, + * [15]=>15,[16]=>16,[17]=>17} + * h[k] #=> 1 + * k[0] = keys.size + 1 + * h #=> {[18]=> 1, [2]=> 2, [3]=> 3, [4]=> 4, [5]=> 5, [6]=> 6, [7]=> 7, + * [8]=> 8, [9]=> 9,[10]=>10,[11]=>11,[12]=>12,[13]=>13,[14]=>14, + * [15]=>15,[16]=>16,[17]=>17} + * h[k] #=> nil + * h.rehash + * h[k] #=> 1 */ static mrb_value mrb_hash_rehash(mrb_state *mrb, mrb_value self) -- cgit v1.2.3 From 23783a44300a39efbbc312a6ca22fe61d94db857 Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Thu, 27 Jun 2019 09:34:40 +0900 Subject: Skip copying delete keys in a hash; fix #4534 --- src/hash.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/hash.c') diff --git a/src/hash.c b/src/hash.c index b4d96251c..ac6a7cd56 100644 --- a/src/hash.c +++ b/src/hash.c @@ -240,7 +240,7 @@ ht_compact(mrb_state *mrb, htable *t) if (!seg->next && i >= t->last_len) { goto exit; } - if (mrb_undef_p(k)) { /* found delete key */ + if (mrb_undef_p(k)) { /* found deleted key */ if (seg2 == NULL) { seg2 = seg; i2 = i; @@ -543,6 +543,7 @@ ht_copy(mrb_state *mrb, htable *t) mrb_value key = seg->e[i].key; mrb_value val = seg->e[i].val; + if (mrb_undef_p(key)) continue; /* skip deleted key */ if ((seg->next == NULL) && (i >= t->last_len)) { return t2; } -- cgit v1.2.3 From 8294ce9fd458a0a1acf8fcdcb6161b4a020866ad Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Thu, 4 Jul 2019 23:11:57 +0900 Subject: It was too early to check `key` for `undef`; ref #4534 --- src/hash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/hash.c') diff --git a/src/hash.c b/src/hash.c index ac6a7cd56..2a0a19363 100644 --- a/src/hash.c +++ b/src/hash.c @@ -543,10 +543,10 @@ ht_copy(mrb_state *mrb, htable *t) mrb_value key = seg->e[i].key; mrb_value val = seg->e[i].val; - if (mrb_undef_p(key)) continue; /* skip deleted key */ if ((seg->next == NULL) && (i >= t->last_len)) { return t2; } + if (mrb_undef_p(key)) continue; /* skip deleted key */ ht_put(mrb, t2, key, val); } seg = seg->next; -- cgit v1.2.3