diff options
| author | Yukihiro "Matz" Matsumoto <[email protected]> | 2018-09-26 11:14:36 +0900 |
|---|---|---|
| committer | Yukihiro "Matz" Matsumoto <[email protected]> | 2018-09-26 23:09:48 +0900 |
| commit | d78acc7afed35813f25e3091150dab668c373f05 (patch) | |
| tree | 5d9c5651b8b00aa2f7f3245817d67dd3662b4ab3 /src | |
| parent | 8ffd4e47fb1c18088e554d31e8af88881517a201 (diff) | |
| download | mruby-d78acc7afed35813f25e3091150dab668c373f05.tar.gz mruby-d78acc7afed35813f25e3091150dab668c373f05.zip | |
Add index to larger segment lists for performance
Diffstat (limited to 'src')
| -rw-r--r-- | src/hash.c | 440 |
1 files changed, 324 insertions, 116 deletions
diff --git a/src/hash.c b/src/hash.c index 820ee5a71..07a12be68 100644 --- a/src/hash.c +++ b/src/hash.c @@ -16,14 +16,48 @@ mrb_int mrb_float_id(mrb_float f); #endif -static inline khint_t -mrb_hash_ht_hash_func(mrb_state *mrb, mrb_value key) +/* return non zero to break the loop */ +typedef int (sg_foreach_func)(mrb_state *mrb,mrb_value key, mrb_value val, void *data); + +#ifndef MRB_SG_SEGMENT_SIZE +#define MRB_SG_SEGMENT_SIZE 5 +#endif + +struct segkv { + mrb_value key; + mrb_value val; +}; + +typedef struct segment { + struct segment *next; + struct segkv e[MRB_SG_SEGMENT_SIZE]; +} segment; + +typedef struct segindex { + size_t size; + size_t capa; + struct segkv *table[]; +} segindex; + +/* Instance variable table structure */ +typedef struct seglist { + segment *rootseg; + segment *lastseg; + mrb_int size; + mrb_int last_len; + segindex *index; +} seglist; + +static /* inline */ size_t +sg_hash_func(mrb_state *mrb, seglist *t, mrb_value key) { - enum mrb_vtype t = mrb_type(key); + enum mrb_vtype tt = mrb_type(key); mrb_value hv; - khint_t h; + size_t h; + segindex *index = t->index; + size_t capa = index ? index->capa : 0; - switch (t) { + switch (tt) { case MRB_TT_STRING: h = mrb_str_hash(mrb, key); break; @@ -35,23 +69,26 @@ mrb_hash_ht_hash_func(mrb_state *mrb, mrb_value key) #ifndef MRB_WITHOUT_FLOAT case MRB_TT_FLOAT: #endif - h = (khint_t)mrb_obj_id(key); + h = (size_t)mrb_obj_id(key); break; default: hv = mrb_funcall(mrb, key, "hash", 0); - h = (khint_t)t ^ (khint_t)mrb_fixnum(hv); + h = (size_t)t ^ (size_t)mrb_fixnum(hv); break; } - return kh_int_hash_func(mrb, h); + if (index && (index != t->index || capa != index->capa)) { + mrb_raise(mrb, E_RUNTIME_ERROR, "hash modified"); + } + return ((h)^((h)<<2)^((h)>>2)); } static inline mrb_bool -mrb_hash_ht_hash_equal(mrb_state *mrb, mrb_value a, mrb_value b) +sg_hash_equal(mrb_state *mrb, seglist *t, mrb_value a, mrb_value b) { - enum mrb_vtype t = mrb_type(a); + enum mrb_vtype tt = mrb_type(a); - switch (t) { + switch (tt) { case MRB_TT_STRING: return mrb_str_equal(mrb, a, b); @@ -84,32 +121,18 @@ mrb_hash_ht_hash_equal(mrb_state *mrb, mrb_value a, mrb_value b) #endif default: - return mrb_eql(mrb, a, b); - } + { + segindex *index = t->index; + size_t capa = index ? index->capa : 0; + mrb_bool eql = mrb_eql(mrb, a, b); + if (index && (index != t->index || capa != index->capa)) { + mrb_raise(mrb, E_RUNTIME_ERROR, "hash modified"); + } + return eql; + } + } } -/* return non zero to break the loop */ -typedef int (sg_foreach_func)(mrb_state *mrb,mrb_value key,mrb_value val, void *data); - -#ifndef MRB_SG_SEGMENT_SIZE -#define MRB_SG_SEGMENT_SIZE 5 -#endif - -typedef struct segment { - struct segment *next; - struct { - mrb_value key; - mrb_value val; - } e[MRB_SG_SEGMENT_SIZE]; -} segment; - -/* Instance variable table structure */ -typedef struct seglist { - segment *rootseg; - mrb_int size; - mrb_int last_len; -} seglist; - /* Creates the instance variable table. */ static seglist* sg_new(mrb_state *mrb) @@ -119,87 +142,87 @@ sg_new(mrb_state *mrb) t = (seglist*)mrb_malloc(mrb, sizeof(seglist)); t->size = 0; t->rootseg = NULL; + t->lastseg = NULL; t->last_len = 0; + t->index = NULL; return t; } -/* Set the value for the symbol in the instance variable table. */ +#define power2(v) do { \ + v--;\ + v |= v >> 1;\ + v |= v >> 2;\ + v |= v >> 4;\ + v |= v >> 8;\ + v |= v >> 16;\ + v++;\ +} while (0) + +#ifndef UPPER_BOUND +#define UPPER_BOUND(x) ((x)>>2|(x)>>1) +#endif + +#define SG_MASK(index) ((index->capa)-1) + +/* Build index for the segment list */ static void -sg_put(mrb_state *mrb, seglist *t, mrb_value key, mrb_value val) +sg_index(mrb_state *mrb, seglist *t) { + size_t size = (size_t)t->size; + size_t mask; + segindex *index = t->index; segment *seg; - segment *prev = NULL; - mrb_int i; + size_t i; - if (t == NULL) return; - seg = t->rootseg; - while (seg) { - for (i=0; i<MRB_SG_SEGMENT_SIZE; i++) { - mrb_value k = seg->e[i].key; - /* Found room in last segment after last_len */ - if (!seg->next && i >= t->last_len) { - seg->e[i].key = key; - seg->e[i].val = val; - t->last_len = i+1; - if (t->size >= 0) t->size++; - return; - } - if (mrb_undef_p(k)) continue; - if (mrb_hash_ht_hash_equal(mrb, k, key)) { - seg->e[i].val = val; - return; - } + if (size < MRB_SG_SEGMENT_SIZE) { + if (index) { + failed: + mrb_free(mrb, index); + t->index = NULL; } - prev = seg; - seg = seg->next; + return; } - - /* Not found */ - if (t->size >= 0) t->size++; - - seg = (segment*)mrb_malloc(mrb, sizeof(segment)); - if (!seg) return; - seg->next = NULL; - seg->e[0].key = key; - seg->e[0].val = val; - t->last_len = 1; - if (prev) { - prev->next = seg; + /* allocate index table */ + if (index && index->size >= UPPER_BOUND(index->capa)) { + size = index->capa+1; } - else { - t->rootseg = seg; + power2(size); + if (!index || index->capa < size) { + index = (segindex*)mrb_realloc_simple(mrb, index, sizeof(segindex)+sizeof(struct segkv*)*size); + if (index == NULL) goto failed; + t->index = index; + } + index->size = t->size; + index->capa = size; + for (i=0; i<size; i++) { + index->table[i] = NULL; } -} - -/* Get a value for a symbol from the instance variable table. */ -static mrb_bool -sg_get(mrb_state *mrb, seglist *t, mrb_value key, mrb_value *vp) -{ - segment *seg; - mrb_int i; - if (t == NULL) return FALSE; + /* rebuld index */ + mask = SG_MASK(index); seg = t->rootseg; while (seg) { for (i=0; i<MRB_SG_SEGMENT_SIZE; i++) { - mrb_value k = seg->e[i].key; + mrb_value key; + size_t k, step = 0; - if (!seg->next && i >= t->last_len) { - return FALSE; + if (!seg->next && i >= (size_t)t->last_len) { + return; } - if (mrb_undef_p(k)) continue; - if (mrb_hash_ht_hash_equal(mrb, k, key)) { - if (vp) *vp = seg->e[i].val; - return TRUE; + key = seg->e[i].key; + if (mrb_undef_p(key)) continue; + k = sg_hash_func(mrb, t, key) & mask; + while (index->table[k]) { + k = (k+(++step)) & mask; } + index->table[k] = &seg->e[i]; } seg = seg->next; } - return FALSE; } -/* Compacts the hash removing delete entries. */ +/* Compacts the segment list removing deleted entries. */ static void sg_compact(mrb_state *mrb, seglist *t) { @@ -209,6 +232,10 @@ sg_compact(mrb_state *mrb, seglist *t) mrb_int i2; mrb_int size = 0; + if (t->index && (size_t)t->size == t->index->size) { + sg_index(mrb, t); + return; + } while (seg) { for (i=0; i<MRB_SG_SEGMENT_SIZE; i++) { mrb_value k = seg->e[i].key; @@ -238,16 +265,179 @@ sg_compact(mrb_state *mrb, seglist *t) exit: /* reached at end */ t->size = size; - t->last_len = i2; - if (seg != seg2) { + if (seg2) { seg = seg2->next; seg2->next = NULL; + t->last_len = i2; + t->lastseg = seg2; while (seg) { seg2 = seg->next; mrb_free(mrb, seg); seg = seg2; } } + if (t->index) { + sg_index(mrb, t); + } +} + +/* Set the value for the key in the indexed segment list. */ +static void +sg_index_put(mrb_state *mrb, seglist *t, mrb_value key, mrb_value val) +{ + segindex *index = t->index; + size_t k, sp, step = 0, mask; + segment *seg; + + if (index->size >= UPPER_BOUND(index->capa)) { + /* need to expand table */ + sg_compact(mrb, t); + index = t->index; + } + mask = SG_MASK(index); + sp = index->capa; + k = sg_hash_func(mrb, t, key) & mask; + while (index->table[k]) { + mrb_value key2 = index->table[k]->key; + if (mrb_undef_p(key2)) { + if (sp == index->capa) sp = k; + } + else if (sg_hash_equal(mrb, t, key, key2)) { + index->table[k]->val = val; + return; + } + k = (k+(++step)) & mask; + } + if (sp < index->capa) { + k = sp; + } + + /* put the value at the last */ + seg = t->lastseg; + if (t->last_len < MRB_SG_SEGMENT_SIZE) { + index->table[k] = &seg->e[t->last_len++]; + } + else { /* append a new segment */ + seg->next = (segment*)mrb_malloc(mrb, sizeof(segment)); + seg = seg->next; + seg->next = NULL; + t->lastseg = seg; + t->last_len = 1; + index->table[k] = &seg->e[0]; + } + index->table[k]->key = key; + index->table[k]->val = val; + index->size++; + t->size++; +} + +/* Set the value for the key in the segment list. */ +static void +sg_put(mrb_state *mrb, seglist *t, mrb_value key, mrb_value val) +{ + segment *seg; + mrb_int i, deleted = 0; + + if (t == NULL) return; + if (t->index) { + sg_index_put(mrb, t, key, val); + return; + } + seg = t->rootseg; + while (seg) { + for (i=0; i<MRB_SG_SEGMENT_SIZE; i++) { + mrb_value k = seg->e[i].key; + /* Found room in last segment after last_len */ + if (!seg->next && i >= t->last_len) { + seg->e[i].key = key; + seg->e[i].val = val; + t->last_len = i+1; + t->size++; + return; + } + if (mrb_undef_p(k)) { + deleted++; + continue; + } + if (sg_hash_equal(mrb, t, k, key)) { + seg->e[i].val = val; + return; + } + } + seg = seg->next; + } + + /* Not found */ + if (deleted > MRB_SG_SEGMENT_SIZE) { + sg_compact(mrb, t); + } + t->size++; + + seg = (segment*)mrb_malloc(mrb, sizeof(segment)); + seg->next = NULL; + seg->e[0].key = key; + seg->e[0].val = val; + t->last_len = 1; + if (t->rootseg == NULL) { + t->rootseg = seg; + } + else { + t->lastseg->next = seg; + } + t->lastseg = seg; + if (t->index == NULL && t->size > MRB_SG_SEGMENT_SIZE*4) { + sg_index(mrb, t); + } +} + +/* Get a value for a key from the indexed segment list. */ +static mrb_bool +sg_index_get(mrb_state *mrb, seglist *t, mrb_value key, mrb_value *vp) +{ + segindex *index = t->index; + size_t mask = SG_MASK(index); + size_t k = sg_hash_func(mrb, t, key) & mask; + size_t step = 0; + + while (index->table[k]) { + if (sg_hash_equal(mrb, t, key, index->table[k]->key)) { + if (vp) *vp = index->table[k]->val; + return TRUE; + } + k = (k+(++step)) & mask; + } + return FALSE; +} + +/* Get a value for a key from the segment list. */ +static mrb_bool +sg_get(mrb_state *mrb, seglist *t, mrb_value key, mrb_value *vp) +{ + segment *seg; + mrb_int i; + + if (t == NULL) return FALSE; + if (t->index) { + return sg_index_get(mrb, t, key, vp); + } + + seg = t->rootseg; + while (seg) { + for (i=0; i<MRB_SG_SEGMENT_SIZE; i++) { + mrb_value k = seg->e[i].key; + + if (!seg->next && i >= t->last_len) { + return FALSE; + } + if (mrb_undef_p(k)) continue; + if (sg_hash_equal(mrb, t, k, key)) { + if (vp) *vp = seg->e[i].val; + return TRUE; + } + } + seg = seg->next; + } + return FALSE; } /* Deletes the value for the symbol from the instance variable table. */ @@ -262,18 +452,17 @@ sg_del(mrb_state *mrb, seglist *t, mrb_value key, mrb_value *vp) seg = t->rootseg; while (seg) { for (i=0; i<MRB_SG_SEGMENT_SIZE; i++) { - mrb_value k = seg->e[i].key; + mrb_value key2; if (!seg->next && i >= t->last_len) { /* not found */ return FALSE; } - if (mrb_undef_p(k)) continue; - if (mrb_hash_ht_hash_equal(mrb, k, key)) { - if (vp) *vp = k; + key2 = seg->e[i].key; + if (!mrb_undef_p(key2) && sg_hash_equal(mrb, t, key, key2)) { + if (vp) *vp = key2; seg->e[i].key = mrb_undef_value(); - if (t->size > 0) t->size = -1; - else t->size--; /* count number of deleted */ + t->size--; return TRUE; } } @@ -290,6 +479,9 @@ sg_foreach(mrb_state *mrb, seglist *t, sg_foreach_func *func, void *p) mrb_int i; if (t == NULL) return; + if (t->index && t->index->size-(size_t)t->size > MRB_SG_SEGMENT_SIZE) { + sg_compact(mrb, t); + } seg = t->rootseg; while (seg) { for (i=0; i<MRB_SG_SEGMENT_SIZE; i++) { @@ -310,9 +502,6 @@ static mrb_int sg_size(mrb_state *mrb, seglist *t) { if (t == NULL) return 0; - if (t->size < 0) { - sg_compact(mrb, t); - } return t->size; } @@ -355,13 +544,14 @@ sg_free(mrb_state *mrb, seglist *t) seg = seg->next; mrb_free(mrb, p); } + if (t->index) mrb_free(mrb, t->index); mrb_free(mrb, t); } static void mrb_hash_modify(mrb_state *mrb, mrb_value hash); static inline mrb_value -mrb_hash_ht_key(mrb_state *mrb, mrb_value key) +ht_key(mrb_state *mrb, mrb_value key) { if (mrb_string_p(key) && !MRB_FROZEN_P(mrb_str_ptr(key))) { key = mrb_str_dup(mrb, key); @@ -370,7 +560,7 @@ mrb_hash_ht_key(mrb_state *mrb, mrb_value key) return key; } -#define KEY(key) mrb_hash_ht_key(mrb, key) +#define KEY(key) ht_key(mrb, key) static int hash_mark_i(mrb_state *mrb, mrb_value key, mrb_value val, void *p) @@ -489,15 +679,6 @@ mrb_hash_dup(mrb_state *mrb, mrb_value self) return mrb_obj_value(copy); } -MRB_API mrb_bool -mrb_hash_has_key_p(mrb_state *mrb, mrb_value hash, mrb_value key) -{ - if (sg_get(mrb, RHASH_TBL(hash), key, NULL)) { - return TRUE; - } - return FALSE; -} - MRB_API mrb_value mrb_hash_get(mrb_state *mrb, mrb_value hash, mrb_value key) { @@ -821,6 +1002,33 @@ mrb_hash_delete(mrb_state *mrb, mrb_value self) return mrb_hash_delete_key(mrb, self, key); } +/* find first element in segment list, and remove it. */ +static void +sg_shift(mrb_state *mrb, seglist *t, mrb_value *kp, mrb_value *vp) +{ + segment *seg = t->rootseg; + mrb_int i; + + while (seg) { + for (i=0; i<MRB_SG_SEGMENT_SIZE; i++) { + mrb_value key; + + if (!seg->next && i >= t->last_len) { + return; + } + key = seg->e[i].key; + if (mrb_undef_p(key)) continue; + *kp = key; + *vp = seg->e[i].val; + /* delete element */ + seg->e[i].key = mrb_undef_value(); + t->size--; + return; + } + seg = seg->next; + } +} + /* 15.2.13.4.24 */ /* * call-seq: @@ -842,9 +1050,9 @@ mrb_hash_shift(mrb_state *mrb, mrb_value hash) mrb_hash_modify(mrb, hash); if (sg && sg_size(mrb, sg) > 0) { - mrb_value del_key = sg->rootseg->e[0].key; - mrb_value del_val = sg->rootseg->e[0].val; - sg_del(mrb, sg, del_key, NULL); + mrb_value del_key, del_val; + + sg_shift(mrb, sg, &del_key, &del_val); return mrb_assoc_new(mrb, del_key, del_val); } |
