From dd81de49a6114dc725a1991d052a056dc5b53649 Mon Sep 17 00:00:00 2001 From: KOBAYASHI Shuji Date: Mon, 18 Jan 2021 21:32:10 +0900 Subject: Fix that Hash may not contain any empty buckets The Hash implementation assumed that there were always empty buckets, but sometimes there were only active or deleted buckets (no empty buckets). Therefore, fix it so that this situation does not occur. ### Example ```ruby # example.rb class A attr_reader :v def initialize(v) @v = v end def ==(o) @v == o.v end def hash; @v end def to_s; "#{self.class}[#{@v}]" end alias eql? == alias inspect to_s end keys = (0..31).map{A.new(_1)} h = {} (0..16).each{h[keys[_1]] = _1} (17..31).each do k = keys[_1] h[k] = _1 h.delete(k) end p h.keys ``` #### Before this patch: ```console $ bin/mruby example.rb [A[0], A[1], A[2], A[3], A[4], A[5], A[6], A[7], A[8], A[9], A[10], A[11], A[12], A[13], A[14], A[15], A[16], A[30], A[31]] ``` #### After this patch: ```console $ bin/mruby example.rb [A[0], A[1], A[2], A[3], A[4], A[5], A[6], A[7], A[8], A[9], A[10], A[11], A[12], A[13], A[14], A[15], A[16]] ``` --- src/hash.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/hash.c b/src/hash.c index 8ca2f666e..3195c3e7e 100644 --- a/src/hash.c +++ b/src/hash.c @@ -52,10 +52,10 @@ * the number of used slots and the number of empty slots. */ +#define EA_N_RESERVED_INDICES 2 /* empty and deleted */ #define EA_INCREASE_RATIO 6 / 5 + 6 #define EA_MAX_INCREASE UINT16_MAX -#define EA_MAX_CAPA /* `- 2` means reserved indices (empty and deleted) */ \ - U32(lesser(IB_MAX_CAPA - 2, MRB_INT_MAX)) +#define EA_MAX_CAPA U32(lesser(IB_MAX_CAPA - EA_N_RESERVED_INDICES, MRB_INT_MAX)) #define IB_MAX_CAPA (U32(1) << IB_MAX_BIT) #define IB_TYPE_BIT 32 #define IB_INIT_BIT ( \ @@ -360,7 +360,7 @@ ea_next_capa_for(uint32_t size, uint32_t max_capa) } else { /* - * For 32-bit CPU, the theoretical value of max EA capa is + * For 32-bit CPU, the theoretical value of maximum EA capacity is * `UINT32_MAX / sizeof (hash_entry)`. At this time, if * `EA_INCREASE_RATIO` is the current value, 32-bit range will not be * exceeded during the calculation of `capa`, so `size_t` is used. @@ -839,12 +839,16 @@ ht_set(mrb_state *mrb, struct RHash *h, mrb_value key, mrb_value val) if (size != ht_ea_n_used(h)) ea_compress(ht_ea(h), ht_ea_n_used(h)); ht_init(mrb, h, size, ht_ea(h), ht_ea_capa(h), h_ht(h), ++ib_bit_width); } - else if (ht_ea_capa(h) == ht_ea_n_used(h) && size != ht_ea_n_used(h)) { - if (size <= AR_MAX_SIZE) {ht_set_as_ar(mrb, h, key, val); return;} - if (ea_next_capa_for(size, EA_MAX_CAPA) <= ht_ea_capa(h)) { - ea_compress(ht_ea(h), ht_ea_n_used(h)); - ht_adjust_ea(mrb, h, size, ht_ea_capa(h)); - ht_init(mrb, h, size, ht_ea(h), ht_ea_capa(h), h_ht(h), ib_bit_width); + else if (size != ht_ea_n_used(h)) { + if (ib_capa - EA_N_RESERVED_INDICES <= ht_ea_n_used(h)) goto compress; + if (ht_ea_capa(h) == ht_ea_n_used(h)) { + if (size <= AR_MAX_SIZE) {ht_set_as_ar(mrb, h, key, val); return;} + if (ea_next_capa_for(size, EA_MAX_CAPA) <= ht_ea_capa(h)) { + compress: + ea_compress(ht_ea(h), ht_ea_n_used(h)); + ht_adjust_ea(mrb, h, size, ht_ea_capa(h)); + ht_init(mrb, h, size, ht_ea(h), ht_ea_capa(h), h_ht(h), ib_bit_width); + } } } ht_set_without_ib_adjustment(mrb, h, key, val); -- cgit v1.2.3