summaryrefslogtreecommitdiffhomepage
path: root/src/symbol.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/symbol.c')
-rw-r--r--src/symbol.c270
1 files changed, 184 insertions, 86 deletions
diff --git a/src/symbol.c b/src/symbol.c
index 5e1f9f561..53f4f6e1d 100644
--- a/src/symbol.c
+++ b/src/symbol.c
@@ -15,54 +15,158 @@
/* ------------------------------------------------------ */
typedef struct symbol_name {
mrb_bool lit : 1;
+ uint8_t prev;
uint16_t len;
const char *name;
} symbol_name;
-static inline khint_t
-sym_hash_func(mrb_state *mrb, mrb_sym s)
+#define SYMBOL_INLINE_BIT 1
+#define SYMBOL_INLINE_LOWER_BIT 2
+#define SYMBOL_INLINE (1 << (SYMBOL_INLINE_BIT - 1))
+#define SYMBOL_INLINE_LOWER (1 << (SYMBOL_INLINE_LOWER_BIT - 1))
+#define SYMBOL_NORMAL_SHIFT SYMBOL_INLINE_BIT
+#define SYMBOL_INLINE_SHIFT SYMBOL_INLINE_LOWER_BIT
+#ifdef MRB_ENABLE_ALL_SYMBOLS
+# define SYMBOL_INLINE_P(sym) FALSE
+# define SYMBOL_INLINE_LOWER_P(sym) FALSE
+# define sym_inline_pack(name, len) 0
+# define sym_inline_unpack(sym, buf, lenp) NULL
+#else
+# define SYMBOL_INLINE_P(sym) ((sym) & SYMBOL_INLINE)
+# define SYMBOL_INLINE_LOWER_P(sym) ((sym) & SYMBOL_INLINE_LOWER)
+#endif
+
+static void
+sym_validate_len(mrb_state *mrb, size_t len)
{
- khint_t h = 0;
- size_t i, len = mrb->symtbl[s].len;
- const char *p = mrb->symtbl[s].name;
+ if (len >= RITE_LV_NULL_MARK) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "symbol length too long");
+ }
+}
+#ifndef MRB_ENABLE_ALL_SYMBOLS
+static const char pack_table[] = "_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
+
+static mrb_sym
+sym_inline_pack(const char *name, uint16_t len)
+{
+ const int lower_length_max = (MRB_SYMBOL_BITSIZE - 2) / 5;
+ const int mix_length_max = (MRB_SYMBOL_BITSIZE - 2) / 6;
+
+ char c;
+ const char *p;
+ int i;
+ mrb_sym sym = 0;
+ mrb_bool lower = TRUE;
+
+ if (len > lower_length_max) return 0; /* too long */
for (i=0; i<len; i++) {
- h = (h << 5) - h + *p++;
+ uint32_t bits;
+
+ c = name[i];
+ if (c == 0) return 0; /* NUL in name */
+ p = strchr(pack_table, (int)c);
+ if (p == 0) return 0; /* non alnum char */
+ bits = (uint32_t)(p - pack_table)+1;
+ if (bits > 27) lower = FALSE;
+ if (i >= mix_length_max) break;
+ sym |= bits<<(i*6+SYMBOL_INLINE_SHIFT);
}
- return h;
+ if (lower) {
+ sym = 0;
+ for (i=0; i<len; i++) {
+ uint32_t bits;
+
+ c = name[i];
+ p = strchr(pack_table, (int)c);
+ bits = (uint32_t)(p - pack_table)+1;
+ sym |= bits<<(i*5+SYMBOL_INLINE_SHIFT);
+ }
+ return sym | SYMBOL_INLINE | SYMBOL_INLINE_LOWER;
+ }
+ if (len > mix_length_max) return 0;
+ return sym | SYMBOL_INLINE;
}
-#define sym_hash_equal(mrb,a, b) (mrb->symtbl[a].len == mrb->symtbl[b].len && memcmp(mrb->symtbl[a].name, mrb->symtbl[b].name, mrb->symtbl[a].len) == 0)
-
-KHASH_DECLARE(n2s, mrb_sym, mrb_sym, FALSE)
-KHASH_DEFINE (n2s, mrb_sym, mrb_sym, FALSE, sym_hash_func, sym_hash_equal)
-/* ------------------------------------------------------ */
-static void
-sym_validate_len(mrb_state *mrb, size_t len)
+static const char*
+sym_inline_unpack(mrb_sym sym, char *buf, mrb_int *lenp)
{
- if (len >= RITE_LV_NULL_MARK) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "symbol length too long");
+ int bit_per_char = SYMBOL_INLINE_LOWER_P(sym) ? 5 : 6;
+ int i;
+
+ mrb_assert(SYMBOL_INLINE_P(sym));
+
+ for (i=0; i<30/bit_per_char; i++) {
+ uint32_t bits = sym>>(i*bit_per_char+SYMBOL_INLINE_SHIFT) & ((1<<bit_per_char)-1);
+ if (bits == 0) break;
+ buf[i] = pack_table[bits-1];;
}
+ buf[i] = '\0';
+ if (lenp) *lenp = i;
+ return buf;
+}
+#endif
+
+static uint8_t
+symhash(const char *key, size_t len)
+{
+ uint32_t hash, i;
+
+ for(hash = i = 0; i < len; ++i) {
+ hash += key[i];
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ }
+ hash += (hash << 3);
+ hash ^= (hash >> 11);
+ hash += (hash << 15);
+ return hash & 0xff;
+}
+
+static mrb_sym
+find_symbol(mrb_state *mrb, const char *name, uint16_t len, uint8_t hash)
+{
+ mrb_sym i;
+ symbol_name *sname;
+
+ /* inline symbol */
+ i = sym_inline_pack(name, len);
+ if (i > 0) return i;
+
+ i = mrb->symhash[hash];
+ if (i == 0) return 0;
+ do {
+ sname = &mrb->symtbl[i];
+ if (sname->len == len && memcmp(sname->name, name, len) == 0) {
+ return i<<SYMBOL_NORMAL_SHIFT;
+ }
+ if (sname->prev == 0xff) {
+ i -= 0xff;
+ sname = &mrb->symtbl[i];
+ while (mrb->symtbl < sname) {
+ if (sname->len == len && memcmp(sname->name, name, len) == 0) {
+ return (mrb_sym)(sname - mrb->symtbl)<<SYMBOL_NORMAL_SHIFT;
+ }
+ sname--;
+ }
+ return 0;
+ }
+ i -= sname->prev;
+ } while (sname->prev > 0);
+ return 0;
}
static mrb_sym
sym_intern(mrb_state *mrb, const char *name, size_t len, mrb_bool lit)
{
- khash_t(n2s) *h = mrb->name2sym;
- symbol_name *sname = mrb->symtbl; /* symtbl[0] for working memory */
- khiter_t k;
mrb_sym sym;
- char *p;
+ symbol_name *sname;
+ uint8_t hash;
sym_validate_len(mrb, len);
- if (sname) {
- sname->lit = lit;
- sname->len = (uint16_t)len;
- sname->name = name;
- k = kh_get(n2s, mrb, h, 0);
- if (k != kh_end(h))
- return kh_key(h, k);
- }
+ hash = symhash(name, len);
+ sym = find_symbol(mrb, name, len, hash);
+ if (sym > 0) return sym;
/* registering a new symbol */
sym = ++mrb->symidx;
@@ -78,15 +182,25 @@ sym_intern(mrb_state *mrb, const char *name, size_t len, mrb_bool lit)
sname->lit = TRUE;
}
else {
- p = (char *)mrb_malloc(mrb, len+1);
+ char *p = (char *)mrb_malloc(mrb, len+1);
memcpy(p, name, len);
p[len] = 0;
sname->name = (const char*)p;
sname->lit = FALSE;
}
- kh_put(n2s, mrb, h, sym);
+ if (mrb->symhash[hash]) {
+ mrb_sym i = sym - mrb->symhash[hash];
+ if (i > 0xff)
+ sname->prev = 0xff;
+ else
+ sname->prev = i;
+ }
+ else {
+ sname->prev = 0;
+ }
+ mrb->symhash[hash] = sym;
- return sym;
+ return sym<<SYMBOL_NORMAL_SHIFT;
}
MRB_API mrb_sym
@@ -116,25 +230,18 @@ mrb_intern_str(mrb_state *mrb, mrb_value str)
MRB_API mrb_value
mrb_check_intern(mrb_state *mrb, const char *name, size_t len)
{
- khash_t(n2s) *h = mrb->name2sym;
- symbol_name *sname = mrb->symtbl;
- khiter_t k;
+ mrb_sym sym;
sym_validate_len(mrb, len);
- sname->len = (uint16_t)len;
- sname->name = name;
-
- k = kh_get(n2s, mrb, h, 0);
- if (k != kh_end(h)) {
- return mrb_symbol_value(kh_key(h, k));
- }
+ sym = find_symbol(mrb, name, len, symhash(name, len));
+ if (sym > 0) return mrb_symbol_value(sym);
return mrb_nil_value();
}
MRB_API mrb_value
mrb_check_intern_cstr(mrb_state *mrb, const char *name)
{
- return mrb_check_intern(mrb, name, (mrb_int)strlen(name));
+ return mrb_check_intern(mrb, name, strlen(name));
}
MRB_API mrb_value
@@ -143,10 +250,12 @@ mrb_check_intern_str(mrb_state *mrb, mrb_value str)
return mrb_check_intern(mrb, RSTRING_PTR(str), RSTRING_LEN(str));
}
-/* lenp must be a pointer to a size_t variable */
-MRB_API const char*
-mrb_sym2name_len(mrb_state *mrb, mrb_sym sym, mrb_int *lenp)
+static const char*
+sym2name_len(mrb_state *mrb, mrb_sym sym, char *buf, mrb_int *lenp)
{
+ if (SYMBOL_INLINE_P(sym)) return sym_inline_unpack(sym, buf, lenp);
+
+ sym >>= SYMBOL_NORMAL_SHIFT;
if (sym == 0 || mrb->symidx < sym) {
if (lenp) *lenp = 0;
return NULL;
@@ -156,6 +265,12 @@ mrb_sym2name_len(mrb_state *mrb, mrb_sym sym, mrb_int *lenp)
return mrb->symtbl[sym].name;
}
+MRB_API const char*
+mrb_sym2name_len(mrb_state *mrb, mrb_sym sym, mrb_int *lenp)
+{
+ return sym2name_len(mrb, sym, mrb->symbuf, lenp);
+}
+
void
mrb_free_symtbl(mrb_state *mrb)
{
@@ -167,13 +282,11 @@ mrb_free_symtbl(mrb_state *mrb)
}
}
mrb_free(mrb, mrb->symtbl);
- kh_destroy(n2s, mrb, mrb->name2sym);
}
void
mrb_init_symtbl(mrb_state *mrb)
{
- mrb->name2sym = kh_init(n2s, mrb);
}
/**********************************************************************
@@ -209,26 +322,6 @@ mrb_init_symtbl(mrb_state *mrb)
*
*/
-
-/* 15.2.11.3.1 */
-/*
- * call-seq:
- * sym == obj -> true or false
- *
- * Equality---If <i>sym</i> and <i>obj</i> are exactly the same
- * symbol, returns <code>true</code>.
- */
-
-static mrb_value
-sym_equal(mrb_state *mrb, mrb_value sym1)
-{
- mrb_value sym2;
-
- mrb_get_args(mrb, "o", &sym2);
-
- return mrb_bool_value(mrb_obj_equal(mrb, sym1, sym2));
-}
-
/* 15.2.11.3.2 */
/* 15.2.11.3.3 */
/*
@@ -241,14 +334,9 @@ sym_equal(mrb_state *mrb, mrb_value sym1)
* :fred.id2name #=> "fred"
*/
static mrb_value
-mrb_sym_to_s(mrb_state *mrb, mrb_value sym)
+sym_to_s(mrb_state *mrb, mrb_value sym)
{
- mrb_sym id = mrb_symbol(sym);
- const char *p;
- mrb_int len;
-
- p = mrb_sym2name_len(mrb, id, &len);
- return mrb_str_new_static(mrb, p, len);
+ return mrb_sym2str(mrb, mrb_symbol(sym));
}
/* 15.2.11.3.4 */
@@ -387,8 +475,8 @@ id:
switch (*m) {
case '!': case '?': case '=': ++m;
default: break;
- }
}
+ }
break;
}
return *m ? FALSE : TRUE;
@@ -406,7 +494,7 @@ sym_inspect(mrb_state *mrb, mrb_value sym)
name = mrb_sym2name_len(mrb, id, &len);
str = mrb_str_new(mrb, 0, len+1);
sp = RSTRING_PTR(str);
- RSTRING_PTR(str)[0] = ':';
+ sp[0] = ':';
memcpy(sp+1, name, len);
mrb_assert_int_fit(mrb_int, len, size_t, SIZE_MAX);
if (!symname_p(name) || strlen(name) != (size_t)len) {
@@ -415,6 +503,9 @@ sym_inspect(mrb_state *mrb, mrb_value sym)
sp[0] = ':';
sp[1] = '"';
}
+#ifdef MRB_UTF8_STRING
+ if (SYMBOL_INLINE_P(id)) RSTR_SET_ASCII_FLAG(mrb_str_ptr(str));
+#endif
return str;
}
@@ -425,6 +516,11 @@ mrb_sym2str(mrb_state *mrb, mrb_sym sym)
const char *name = mrb_sym2name_len(mrb, sym, &len);
if (!name) return mrb_undef_value(); /* can't happen */
+ if (SYMBOL_INLINE_P(sym)) {
+ mrb_value str = mrb_str_new(mrb, name, len);
+ RSTR_SET_ASCII_FLAG(mrb_str_ptr(str));
+ return str;
+ }
return mrb_str_new_static(mrb, name, len);
}
@@ -439,7 +535,9 @@ mrb_sym2name(mrb_state *mrb, mrb_sym sym)
return name;
}
else {
- mrb_value str = mrb_str_dump(mrb, mrb_str_new_static(mrb, name, len));
+ mrb_value str = SYMBOL_INLINE_P(sym) ?
+ mrb_str_new(mrb, name, len) : mrb_str_new_static(mrb, name, len);
+ str = mrb_str_dump(mrb, str);
return RSTRING_PTR(str);
}
}
@@ -461,9 +559,10 @@ sym_cmp(mrb_state *mrb, mrb_value s1)
const char *p1, *p2;
int retval;
mrb_int len, len1, len2;
+ char buf1[8], buf2[8];
- p1 = mrb_sym2name_len(mrb, sym1, &len1);
- p2 = mrb_sym2name_len(mrb, sym2, &len2);
+ p1 = sym2name_len(mrb, sym1, buf1, &len1);
+ p2 = sym2name_len(mrb, sym2, buf2, &len2);
len = lesser(len1, len2);
retval = memcmp(p1, p2, len);
if (retval == 0) {
@@ -481,14 +580,13 @@ mrb_init_symbol(mrb_state *mrb)
{
struct RClass *sym;
- mrb->symbol_class = sym = mrb_define_class(mrb, "Symbol", mrb->object_class); /* 15.2.11 */
+ mrb->symbol_class = sym = mrb_define_class(mrb, "Symbol", mrb->object_class); /* 15.2.11 */
MRB_SET_INSTANCE_TT(sym, MRB_TT_SYMBOL);
mrb_undef_class_method(mrb, sym, "new");
- mrb_define_method(mrb, sym, "===", sym_equal, MRB_ARGS_REQ(1)); /* 15.2.11.3.1 */
- mrb_define_method(mrb, sym, "id2name", mrb_sym_to_s, MRB_ARGS_NONE()); /* 15.2.11.3.2 */
- mrb_define_method(mrb, sym, "to_s", mrb_sym_to_s, MRB_ARGS_NONE()); /* 15.2.11.3.3 */
- mrb_define_method(mrb, sym, "to_sym", sym_to_sym, MRB_ARGS_NONE()); /* 15.2.11.3.4 */
- mrb_define_method(mrb, sym, "inspect", sym_inspect, MRB_ARGS_NONE()); /* 15.2.11.3.5(x) */
- mrb_define_method(mrb, sym, "<=>", sym_cmp, MRB_ARGS_REQ(1));
+ mrb_define_method(mrb, sym, "id2name", sym_to_s, MRB_ARGS_NONE()); /* 15.2.11.3.2 */
+ mrb_define_method(mrb, sym, "to_s", sym_to_s, MRB_ARGS_NONE()); /* 15.2.11.3.3 */
+ mrb_define_method(mrb, sym, "to_sym", sym_to_sym, MRB_ARGS_NONE()); /* 15.2.11.3.4 */
+ mrb_define_method(mrb, sym, "inspect", sym_inspect, MRB_ARGS_NONE()); /* 15.2.11.3.5(x) */
+ mrb_define_method(mrb, sym, "<=>", sym_cmp, MRB_ARGS_REQ(1));
}