diff options
| author | Yukihiro "Matz" Matsumoto <[email protected]> | 2019-02-06 22:01:45 +0900 |
|---|---|---|
| committer | Yukihiro "Matz" Matsumoto <[email protected]> | 2019-02-12 17:30:34 +0900 |
| commit | 06ba5905f019ca1a8d8e498fe6dce7a94702c9ad (patch) | |
| tree | ca7dacf0eee23b1256cda2fad2e7b69a975af47a /src | |
| parent | 3a95bf0ddb2ce9c15655afc14073669bf4968c12 (diff) | |
| download | mruby-06ba5905f019ca1a8d8e498fe6dce7a94702c9ad.tar.gz mruby-06ba5905f019ca1a8d8e498fe6dce7a94702c9ad.zip | |
Implement inline packed symbols.
Small symbols with all alphanumeric characters (<5) are packed in 32bit
symbol integer a la base64. This means those small symbols are not
listed in `Symbol.all_symbols`.
Diffstat (limited to 'src')
| -rw-r--r-- | src/symbol.c | 93 |
1 files changed, 90 insertions, 3 deletions
diff --git a/src/symbol.c b/src/symbol.c index 4242f3d8e..ab7504772 100644 --- a/src/symbol.c +++ b/src/symbol.c @@ -28,6 +28,79 @@ sym_validate_len(mrb_state *mrb, size_t len) } } +static char pack_table[] = "_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; + +static mrb_sym +sym_inline_pack(const char *name, uint16_t len) +{ + char c; + char *p; + int i; + mrb_sym sym = 0; + int lower = 1; + + if (len > 6) return 0; /* too long */ + for (i=0; i<len; i++) { + uint32_t bits; + + c = name[i]; + if (c == 0) return 0; /* NUL in name */ + p = strchr(pack_table, (int)c); + if (p == 0) return 0; /* non alnum char */ + bits = (uint32_t)(p - pack_table)+1; + if (bits > 27) lower = 0; + sym |= bits<<(i*6+2); + } + if (lower) { + sym = 0; + for (i=0; i<len; i++) { + uint32_t bits; + + c = name[i]; + p = strchr(pack_table, (int)c); + bits = (uint32_t)(p - pack_table)+1; + sym |= bits<<(i*5+2); + } + return sym | 3; + } + if (len == 6) return 0; + return sym | 1; +} + +static const char* +sym_inline_unpack(mrb_sym sym, char *buf) +{ + int i; + + if (sym == 0) return NULL; + if ((sym&1) == 0) return NULL; /* need to be inline sym */ + if (sym&2) { /* all lower case (5bits/char) */ + for (i=0; i<6; i++) { + uint32_t bits; + char c; + + bits = sym>>(i*5+2) & 31; + if (bits == 0) break; + c = pack_table[bits-1]; + buf[i] = c; + } + buf[i] = '\0'; + return buf; + } + + for (i=0; i<5; i++) { + uint32_t bits; + char c; + + bits = sym>>(i*6+2) & 63; + if (bits == 0) break; + c = pack_table[bits-1]; + buf[i] = c; + } + buf[i] = '\0'; + return buf; +} + uint8_t symhash(const char *key, size_t len) { @@ -50,19 +123,23 @@ find_symbol(mrb_state *mrb, const char *name, uint16_t len, uint8_t hash) mrb_sym i; symbol_name *sname; + /* inline symbol */ + i = sym_inline_pack(name, len); + if (i > 0) return i; + i = mrb->symhash[hash]; if (i == 0) return 0; do { sname = &mrb->symtbl[i]; if (sname->len == len && memcmp(sname->name, name, len) == 0) { - return i; + return i<<1; } if (sname->prev == 0xff) { i -= 0xff; sname = &mrb->symtbl[i]; while (mrb->symtbl < sname) { if (sname->len == len && memcmp(sname->name, name, len) == 0) { - return (mrb_sym)(sname - mrb->symtbl); + return (mrb_sym)(sname - mrb->symtbl)<<1; } sname--; } @@ -117,7 +194,7 @@ sym_intern(mrb_state *mrb, const char *name, size_t len, mrb_bool lit) } mrb->symhash[hash] = sym; - return sym; + return sym<<1; } MRB_API mrb_sym @@ -170,6 +247,13 @@ mrb_check_intern_str(mrb_state *mrb, mrb_value str) MRB_API const char* mrb_sym2name_len(mrb_state *mrb, mrb_sym sym, mrb_int *lenp) { + if (sym & 1) { /* inline packed symbol */ + sym_inline_unpack(sym, mrb->symbuf); + if (lenp) *lenp = strlen(mrb->symbuf); + return mrb->symbuf; + } + + sym >>= 1; if (sym == 0 || mrb->symidx < sym) { if (lenp) *lenp = 0; return NULL; @@ -446,6 +530,9 @@ mrb_sym2str(mrb_state *mrb, mrb_sym sym) const char *name = mrb_sym2name_len(mrb, sym, &len); if (!name) return mrb_undef_value(); /* can't happen */ + if (sym&1) { /* inline symbol */ + return mrb_str_new(mrb, name, len); + } return mrb_str_new_static(mrb, name, len); } |
