diff options
Diffstat (limited to 'src/symbol.c')
| -rw-r--r-- | src/symbol.c | 270 |
1 files changed, 184 insertions, 86 deletions
diff --git a/src/symbol.c b/src/symbol.c index 5e1f9f561..53f4f6e1d 100644 --- a/src/symbol.c +++ b/src/symbol.c @@ -15,54 +15,158 @@ /* ------------------------------------------------------ */ typedef struct symbol_name { mrb_bool lit : 1; + uint8_t prev; uint16_t len; const char *name; } symbol_name; -static inline khint_t -sym_hash_func(mrb_state *mrb, mrb_sym s) +#define SYMBOL_INLINE_BIT 1 +#define SYMBOL_INLINE_LOWER_BIT 2 +#define SYMBOL_INLINE (1 << (SYMBOL_INLINE_BIT - 1)) +#define SYMBOL_INLINE_LOWER (1 << (SYMBOL_INLINE_LOWER_BIT - 1)) +#define SYMBOL_NORMAL_SHIFT SYMBOL_INLINE_BIT +#define SYMBOL_INLINE_SHIFT SYMBOL_INLINE_LOWER_BIT +#ifdef MRB_ENABLE_ALL_SYMBOLS +# define SYMBOL_INLINE_P(sym) FALSE +# define SYMBOL_INLINE_LOWER_P(sym) FALSE +# define sym_inline_pack(name, len) 0 +# define sym_inline_unpack(sym, buf, lenp) NULL +#else +# define SYMBOL_INLINE_P(sym) ((sym) & SYMBOL_INLINE) +# define SYMBOL_INLINE_LOWER_P(sym) ((sym) & SYMBOL_INLINE_LOWER) +#endif + +static void +sym_validate_len(mrb_state *mrb, size_t len) { - khint_t h = 0; - size_t i, len = mrb->symtbl[s].len; - const char *p = mrb->symtbl[s].name; + if (len >= RITE_LV_NULL_MARK) { + mrb_raise(mrb, E_ARGUMENT_ERROR, "symbol length too long"); + } +} +#ifndef MRB_ENABLE_ALL_SYMBOLS +static const char pack_table[] = "_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; + +static mrb_sym +sym_inline_pack(const char *name, uint16_t len) +{ + const int lower_length_max = (MRB_SYMBOL_BITSIZE - 2) / 5; + const int mix_length_max = (MRB_SYMBOL_BITSIZE - 2) / 6; + + char c; + const char *p; + int i; + mrb_sym sym = 0; + mrb_bool lower = TRUE; + + if (len > lower_length_max) return 0; /* too long */ for (i=0; i<len; i++) { - h = (h << 5) - h + *p++; + uint32_t bits; + + c = name[i]; + if (c == 0) return 0; /* NUL in name */ + p = strchr(pack_table, (int)c); + if (p == 0) return 0; /* non alnum char */ + bits = (uint32_t)(p - pack_table)+1; + if (bits > 27) lower = FALSE; + if (i >= mix_length_max) break; + sym |= bits<<(i*6+SYMBOL_INLINE_SHIFT); } - return h; + if (lower) { + sym = 0; + for (i=0; i<len; i++) { + uint32_t bits; + + c = name[i]; + p = strchr(pack_table, (int)c); + bits = (uint32_t)(p - pack_table)+1; + sym |= bits<<(i*5+SYMBOL_INLINE_SHIFT); + } + return sym | SYMBOL_INLINE | SYMBOL_INLINE_LOWER; + } + if (len > mix_length_max) return 0; + return sym | SYMBOL_INLINE; } -#define sym_hash_equal(mrb,a, b) (mrb->symtbl[a].len == mrb->symtbl[b].len && memcmp(mrb->symtbl[a].name, mrb->symtbl[b].name, mrb->symtbl[a].len) == 0) - -KHASH_DECLARE(n2s, mrb_sym, mrb_sym, FALSE) -KHASH_DEFINE (n2s, mrb_sym, mrb_sym, FALSE, sym_hash_func, sym_hash_equal) -/* ------------------------------------------------------ */ -static void -sym_validate_len(mrb_state *mrb, size_t len) +static const char* +sym_inline_unpack(mrb_sym sym, char *buf, mrb_int *lenp) { - if (len >= RITE_LV_NULL_MARK) { - mrb_raise(mrb, E_ARGUMENT_ERROR, "symbol length too long"); + int bit_per_char = SYMBOL_INLINE_LOWER_P(sym) ? 5 : 6; + int i; + + mrb_assert(SYMBOL_INLINE_P(sym)); + + for (i=0; i<30/bit_per_char; i++) { + uint32_t bits = sym>>(i*bit_per_char+SYMBOL_INLINE_SHIFT) & ((1<<bit_per_char)-1); + if (bits == 0) break; + buf[i] = pack_table[bits-1];; } + buf[i] = '\0'; + if (lenp) *lenp = i; + return buf; +} +#endif + +static uint8_t +symhash(const char *key, size_t len) +{ + uint32_t hash, i; + + for(hash = i = 0; i < len; ++i) { + hash += key[i]; + hash += (hash << 10); + hash ^= (hash >> 6); + } + hash += (hash << 3); + hash ^= (hash >> 11); + hash += (hash << 15); + return hash & 0xff; +} + +static mrb_sym +find_symbol(mrb_state *mrb, const char *name, uint16_t len, uint8_t hash) +{ + mrb_sym i; + symbol_name *sname; + + /* inline symbol */ + i = sym_inline_pack(name, len); + if (i > 0) return i; + + i = mrb->symhash[hash]; + if (i == 0) return 0; + do { + sname = &mrb->symtbl[i]; + if (sname->len == len && memcmp(sname->name, name, len) == 0) { + return i<<SYMBOL_NORMAL_SHIFT; + } + if (sname->prev == 0xff) { + i -= 0xff; + sname = &mrb->symtbl[i]; + while (mrb->symtbl < sname) { + if (sname->len == len && memcmp(sname->name, name, len) == 0) { + return (mrb_sym)(sname - mrb->symtbl)<<SYMBOL_NORMAL_SHIFT; + } + sname--; + } + return 0; + } + i -= sname->prev; + } while (sname->prev > 0); + return 0; } static mrb_sym sym_intern(mrb_state *mrb, const char *name, size_t len, mrb_bool lit) { - khash_t(n2s) *h = mrb->name2sym; - symbol_name *sname = mrb->symtbl; /* symtbl[0] for working memory */ - khiter_t k; mrb_sym sym; - char *p; + symbol_name *sname; + uint8_t hash; sym_validate_len(mrb, len); - if (sname) { - sname->lit = lit; - sname->len = (uint16_t)len; - sname->name = name; - k = kh_get(n2s, mrb, h, 0); - if (k != kh_end(h)) - return kh_key(h, k); - } + hash = symhash(name, len); + sym = find_symbol(mrb, name, len, hash); + if (sym > 0) return sym; /* registering a new symbol */ sym = ++mrb->symidx; @@ -78,15 +182,25 @@ sym_intern(mrb_state *mrb, const char *name, size_t len, mrb_bool lit) sname->lit = TRUE; } else { - p = (char *)mrb_malloc(mrb, len+1); + char *p = (char *)mrb_malloc(mrb, len+1); memcpy(p, name, len); p[len] = 0; sname->name = (const char*)p; sname->lit = FALSE; } - kh_put(n2s, mrb, h, sym); + if (mrb->symhash[hash]) { + mrb_sym i = sym - mrb->symhash[hash]; + if (i > 0xff) + sname->prev = 0xff; + else + sname->prev = i; + } + else { + sname->prev = 0; + } + mrb->symhash[hash] = sym; - return sym; + return sym<<SYMBOL_NORMAL_SHIFT; } MRB_API mrb_sym @@ -116,25 +230,18 @@ mrb_intern_str(mrb_state *mrb, mrb_value str) MRB_API mrb_value mrb_check_intern(mrb_state *mrb, const char *name, size_t len) { - khash_t(n2s) *h = mrb->name2sym; - symbol_name *sname = mrb->symtbl; - khiter_t k; + mrb_sym sym; sym_validate_len(mrb, len); - sname->len = (uint16_t)len; - sname->name = name; - - k = kh_get(n2s, mrb, h, 0); - if (k != kh_end(h)) { - return mrb_symbol_value(kh_key(h, k)); - } + sym = find_symbol(mrb, name, len, symhash(name, len)); + if (sym > 0) return mrb_symbol_value(sym); return mrb_nil_value(); } MRB_API mrb_value mrb_check_intern_cstr(mrb_state *mrb, const char *name) { - return mrb_check_intern(mrb, name, (mrb_int)strlen(name)); + return mrb_check_intern(mrb, name, strlen(name)); } MRB_API mrb_value @@ -143,10 +250,12 @@ mrb_check_intern_str(mrb_state *mrb, mrb_value str) return mrb_check_intern(mrb, RSTRING_PTR(str), RSTRING_LEN(str)); } -/* lenp must be a pointer to a size_t variable */ -MRB_API const char* -mrb_sym2name_len(mrb_state *mrb, mrb_sym sym, mrb_int *lenp) +static const char* +sym2name_len(mrb_state *mrb, mrb_sym sym, char *buf, mrb_int *lenp) { + if (SYMBOL_INLINE_P(sym)) return sym_inline_unpack(sym, buf, lenp); + + sym >>= SYMBOL_NORMAL_SHIFT; if (sym == 0 || mrb->symidx < sym) { if (lenp) *lenp = 0; return NULL; @@ -156,6 +265,12 @@ mrb_sym2name_len(mrb_state *mrb, mrb_sym sym, mrb_int *lenp) return mrb->symtbl[sym].name; } +MRB_API const char* +mrb_sym2name_len(mrb_state *mrb, mrb_sym sym, mrb_int *lenp) +{ + return sym2name_len(mrb, sym, mrb->symbuf, lenp); +} + void mrb_free_symtbl(mrb_state *mrb) { @@ -167,13 +282,11 @@ mrb_free_symtbl(mrb_state *mrb) } } mrb_free(mrb, mrb->symtbl); - kh_destroy(n2s, mrb, mrb->name2sym); } void mrb_init_symtbl(mrb_state *mrb) { - mrb->name2sym = kh_init(n2s, mrb); } /********************************************************************** @@ -209,26 +322,6 @@ mrb_init_symtbl(mrb_state *mrb) * */ - -/* 15.2.11.3.1 */ -/* - * call-seq: - * sym == obj -> true or false - * - * Equality---If <i>sym</i> and <i>obj</i> are exactly the same - * symbol, returns <code>true</code>. - */ - -static mrb_value -sym_equal(mrb_state *mrb, mrb_value sym1) -{ - mrb_value sym2; - - mrb_get_args(mrb, "o", &sym2); - - return mrb_bool_value(mrb_obj_equal(mrb, sym1, sym2)); -} - /* 15.2.11.3.2 */ /* 15.2.11.3.3 */ /* @@ -241,14 +334,9 @@ sym_equal(mrb_state *mrb, mrb_value sym1) * :fred.id2name #=> "fred" */ static mrb_value -mrb_sym_to_s(mrb_state *mrb, mrb_value sym) +sym_to_s(mrb_state *mrb, mrb_value sym) { - mrb_sym id = mrb_symbol(sym); - const char *p; - mrb_int len; - - p = mrb_sym2name_len(mrb, id, &len); - return mrb_str_new_static(mrb, p, len); + return mrb_sym2str(mrb, mrb_symbol(sym)); } /* 15.2.11.3.4 */ @@ -387,8 +475,8 @@ id: switch (*m) { case '!': case '?': case '=': ++m; default: break; - } } + } break; } return *m ? FALSE : TRUE; @@ -406,7 +494,7 @@ sym_inspect(mrb_state *mrb, mrb_value sym) name = mrb_sym2name_len(mrb, id, &len); str = mrb_str_new(mrb, 0, len+1); sp = RSTRING_PTR(str); - RSTRING_PTR(str)[0] = ':'; + sp[0] = ':'; memcpy(sp+1, name, len); mrb_assert_int_fit(mrb_int, len, size_t, SIZE_MAX); if (!symname_p(name) || strlen(name) != (size_t)len) { @@ -415,6 +503,9 @@ sym_inspect(mrb_state *mrb, mrb_value sym) sp[0] = ':'; sp[1] = '"'; } +#ifdef MRB_UTF8_STRING + if (SYMBOL_INLINE_P(id)) RSTR_SET_ASCII_FLAG(mrb_str_ptr(str)); +#endif return str; } @@ -425,6 +516,11 @@ mrb_sym2str(mrb_state *mrb, mrb_sym sym) const char *name = mrb_sym2name_len(mrb, sym, &len); if (!name) return mrb_undef_value(); /* can't happen */ + if (SYMBOL_INLINE_P(sym)) { + mrb_value str = mrb_str_new(mrb, name, len); + RSTR_SET_ASCII_FLAG(mrb_str_ptr(str)); + return str; + } return mrb_str_new_static(mrb, name, len); } @@ -439,7 +535,9 @@ mrb_sym2name(mrb_state *mrb, mrb_sym sym) return name; } else { - mrb_value str = mrb_str_dump(mrb, mrb_str_new_static(mrb, name, len)); + mrb_value str = SYMBOL_INLINE_P(sym) ? + mrb_str_new(mrb, name, len) : mrb_str_new_static(mrb, name, len); + str = mrb_str_dump(mrb, str); return RSTRING_PTR(str); } } @@ -461,9 +559,10 @@ sym_cmp(mrb_state *mrb, mrb_value s1) const char *p1, *p2; int retval; mrb_int len, len1, len2; + char buf1[8], buf2[8]; - p1 = mrb_sym2name_len(mrb, sym1, &len1); - p2 = mrb_sym2name_len(mrb, sym2, &len2); + p1 = sym2name_len(mrb, sym1, buf1, &len1); + p2 = sym2name_len(mrb, sym2, buf2, &len2); len = lesser(len1, len2); retval = memcmp(p1, p2, len); if (retval == 0) { @@ -481,14 +580,13 @@ mrb_init_symbol(mrb_state *mrb) { struct RClass *sym; - mrb->symbol_class = sym = mrb_define_class(mrb, "Symbol", mrb->object_class); /* 15.2.11 */ + mrb->symbol_class = sym = mrb_define_class(mrb, "Symbol", mrb->object_class); /* 15.2.11 */ MRB_SET_INSTANCE_TT(sym, MRB_TT_SYMBOL); mrb_undef_class_method(mrb, sym, "new"); - mrb_define_method(mrb, sym, "===", sym_equal, MRB_ARGS_REQ(1)); /* 15.2.11.3.1 */ - mrb_define_method(mrb, sym, "id2name", mrb_sym_to_s, MRB_ARGS_NONE()); /* 15.2.11.3.2 */ - mrb_define_method(mrb, sym, "to_s", mrb_sym_to_s, MRB_ARGS_NONE()); /* 15.2.11.3.3 */ - mrb_define_method(mrb, sym, "to_sym", sym_to_sym, MRB_ARGS_NONE()); /* 15.2.11.3.4 */ - mrb_define_method(mrb, sym, "inspect", sym_inspect, MRB_ARGS_NONE()); /* 15.2.11.3.5(x) */ - mrb_define_method(mrb, sym, "<=>", sym_cmp, MRB_ARGS_REQ(1)); + mrb_define_method(mrb, sym, "id2name", sym_to_s, MRB_ARGS_NONE()); /* 15.2.11.3.2 */ + mrb_define_method(mrb, sym, "to_s", sym_to_s, MRB_ARGS_NONE()); /* 15.2.11.3.3 */ + mrb_define_method(mrb, sym, "to_sym", sym_to_sym, MRB_ARGS_NONE()); /* 15.2.11.3.4 */ + mrb_define_method(mrb, sym, "inspect", sym_inspect, MRB_ARGS_NONE()); /* 15.2.11.3.5(x) */ + mrb_define_method(mrb, sym, "<=>", sym_cmp, MRB_ARGS_REQ(1)); } |
