From 06ba5905f019ca1a8d8e498fe6dce7a94702c9ad Mon Sep 17 00:00:00 2001 From: "Yukihiro \"Matz\" Matsumoto" Date: Wed, 6 Feb 2019 22:01:45 +0900 Subject: Implement inline packed symbols. Small symbols with all alphanumeric characters (<5) are packed in 32bit symbol integer a la base64. This means those small symbols are not listed in `Symbol.all_symbols`. --- src/symbol.c | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 90 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/symbol.c b/src/symbol.c index 4242f3d8e..ab7504772 100644 --- a/src/symbol.c +++ b/src/symbol.c @@ -28,6 +28,79 @@ sym_validate_len(mrb_state *mrb, size_t len) } } +static char pack_table[] = "_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; + +static mrb_sym +sym_inline_pack(const char *name, uint16_t len) +{ + char c; + char *p; + int i; + mrb_sym sym = 0; + int lower = 1; + + if (len > 6) return 0; /* too long */ + for (i=0; i 27) lower = 0; + sym |= bits<<(i*6+2); + } + if (lower) { + sym = 0; + for (i=0; i>(i*5+2) & 31; + if (bits == 0) break; + c = pack_table[bits-1]; + buf[i] = c; + } + buf[i] = '\0'; + return buf; + } + + for (i=0; i<5; i++) { + uint32_t bits; + char c; + + bits = sym>>(i*6+2) & 63; + if (bits == 0) break; + c = pack_table[bits-1]; + buf[i] = c; + } + buf[i] = '\0'; + return buf; +} + uint8_t symhash(const char *key, size_t len) { @@ -50,19 +123,23 @@ find_symbol(mrb_state *mrb, const char *name, uint16_t len, uint8_t hash) mrb_sym i; symbol_name *sname; + /* inline symbol */ + i = sym_inline_pack(name, len); + if (i > 0) return i; + i = mrb->symhash[hash]; if (i == 0) return 0; do { sname = &mrb->symtbl[i]; if (sname->len == len && memcmp(sname->name, name, len) == 0) { - return i; + return i<<1; } if (sname->prev == 0xff) { i -= 0xff; sname = &mrb->symtbl[i]; while (mrb->symtbl < sname) { if (sname->len == len && memcmp(sname->name, name, len) == 0) { - return (mrb_sym)(sname - mrb->symtbl); + return (mrb_sym)(sname - mrb->symtbl)<<1; } sname--; } @@ -117,7 +194,7 @@ sym_intern(mrb_state *mrb, const char *name, size_t len, mrb_bool lit) } mrb->symhash[hash] = sym; - return sym; + return sym<<1; } MRB_API mrb_sym @@ -170,6 +247,13 @@ mrb_check_intern_str(mrb_state *mrb, mrb_value str) MRB_API const char* mrb_sym2name_len(mrb_state *mrb, mrb_sym sym, mrb_int *lenp) { + if (sym & 1) { /* inline packed symbol */ + sym_inline_unpack(sym, mrb->symbuf); + if (lenp) *lenp = strlen(mrb->symbuf); + return mrb->symbuf; + } + + sym >>= 1; if (sym == 0 || mrb->symidx < sym) { if (lenp) *lenp = 0; return NULL; @@ -446,6 +530,9 @@ mrb_sym2str(mrb_state *mrb, mrb_sym sym) const char *name = mrb_sym2name_len(mrb, sym, &len); if (!name) return mrb_undef_value(); /* can't happen */ + if (sym&1) { /* inline symbol */ + return mrb_str_new(mrb, name, len); + } return mrb_str_new_static(mrb, name, len); } -- cgit v1.2.3