diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/class.c | 2 | ||||
| -rw-r--r-- | src/string.c | 79 | ||||
| -rw-r--r-- | src/symbol.c | 113 |
3 files changed, 108 insertions, 86 deletions
diff --git a/src/class.c b/src/class.c index dd5b65cc3..359bb4fea 100644 --- a/src/class.c +++ b/src/class.c @@ -1815,7 +1815,7 @@ mrb_define_alias(mrb_state *mrb, struct RClass *klass, const char *name1, const * show information on the thing we're attached to as well. */ -static mrb_value +mrb_value mrb_mod_to_s(mrb_state *mrb, mrb_value klass) { mrb_value str; diff --git a/src/string.c b/src/string.c index 52941473c..19962fb30 100644 --- a/src/string.c +++ b/src/string.c @@ -20,6 +20,7 @@ #include <mruby/class.h> #include <mruby/range.h> #include <mruby/string.h> +#include <mruby/numeric.h> #include <mruby/re.h> typedef struct mrb_shared_string { @@ -156,13 +157,6 @@ mrb_str_new(mrb_state *mrb, const char *p, size_t len) return mrb_obj_value(str_new(mrb, p, len)); } -/* - * call-seq: (Caution! NULL string) - * String.new(str="") => new_str - * - * Returns a new string object containing a copy of <i>str</i>. - */ - MRB_API mrb_value mrb_str_new_cstr(mrb_state *mrb, const char *p) { @@ -238,27 +232,36 @@ utf8len(const char* p, const char* e) return len; } -static mrb_int -utf8_strlen(mrb_value str, mrb_int len) +mrb_int +mrb_utf8_len(const char *str, mrb_int byte_len) { mrb_int total = 0; - char* p = RSTRING_PTR(str); - char* e = p; - if (RSTRING(str)->flags & MRB_STR_NO_UTF) { - return RSTRING_LEN(str); - } - e += len < 0 ? RSTRING_LEN(str) : len; - while (p<e) { + const char *p = str; + const char *e = p + byte_len; + + while (p < e) { p += utf8len(p, e); total++; } - if (RSTRING_LEN(str) == total) { - RSTRING(str)->flags |= MRB_STR_NO_UTF; - } return total; } -#define RSTRING_CHAR_LEN(s) utf8_strlen(s, -1) +static mrb_int +utf8_strlen(mrb_value str) +{ + mrb_int byte_len = RSTRING_LEN(str); + + if (RSTRING(str)->flags & MRB_STR_NO_UTF) { + return byte_len; + } + else { + mrb_int utf8_len = mrb_utf8_len(RSTRING_PTR(str), byte_len); + if (byte_len == utf8_len) RSTRING(str)->flags |= MRB_STR_NO_UTF; + return utf8_len; + } +} + +#define RSTRING_CHAR_LEN(s) utf8_strlen(s) /* map character index to byte offset index */ static mrb_int @@ -739,12 +742,6 @@ mrb_str_to_cstr(mrb_state *mrb, mrb_value str0) return RSTR_PTR(s); } -/* - * call-seq: (Caution! String("abcd") change) - * String("abcdefg") = String("abcd") + String("efg") - * - * Returns a new string object containing a copy of <i>str</i>. - */ MRB_API void mrb_str_concat(mrb_state *mrb, mrb_value self, mrb_value other) { @@ -752,12 +749,6 @@ mrb_str_concat(mrb_state *mrb, mrb_value self, mrb_value other) mrb_str_cat_str(mrb, self, other); } -/* - * call-seq: (Caution! String("abcd") remain) - * String("abcdefg") = String("abcd") + String("efg") - * - * Returns a new string object containing a copy of <i>str</i>. - */ MRB_API mrb_value mrb_str_plus(mrb_state *mrb, mrb_value a, mrb_value b) { @@ -775,10 +766,13 @@ mrb_str_plus(mrb_state *mrb, mrb_value a, mrb_value b) /* 15.2.10.5.2 */ /* - * call-seq: (Caution! String("abcd") remain) for stack_argument - * String("abcdefg") = String("abcd") + String("efg") + * call-seq: + * str + other_str -> new_str * - * Returns a new string object containing a copy of <i>str</i>. + * Concatenation---Returns a new <code>String</code> containing + * <i>other_str</i> concatenated to <i>str</i>. + * + * "Hello from " + self.to_s #=> "Hello from main" */ static mrb_value mrb_str_plus_m(mrb_state *mrb, mrb_value self) @@ -979,13 +973,22 @@ mrb_str_equal_m(mrb_state *mrb, mrb_value str1) return mrb_bool_value(mrb_str_equal(mrb, str1, str2)); } /* ---------------------------------- */ +mrb_value mrb_mod_to_s(mrb_state *mrb, mrb_value klass); + MRB_API mrb_value mrb_str_to_str(mrb_state *mrb, mrb_value str) { - if (!mrb_string_p(str)) { + switch (mrb_type(str)) { + case MRB_TT_STRING: + return str; + case MRB_TT_FIXNUM: + return mrb_fixnum_to_str(mrb, str, 10); + case MRB_TT_CLASS: + case MRB_TT_MODULE: + return mrb_mod_to_s(mrb, str); + default: return mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_s"); } - return str; } MRB_API const char* @@ -1587,8 +1590,6 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str) return mrb_fixnum_value(pos); } -#define STR_REPLACE_SHARED_MIN 10 - /* 15.2.10.5.24 */ /* 15.2.10.5.28 */ /* diff --git a/src/symbol.c b/src/symbol.c index 6b4c7200c..4242f3d8e 100644 --- a/src/symbol.c +++ b/src/symbol.c @@ -15,28 +15,11 @@ /* ------------------------------------------------------ */ typedef struct symbol_name { mrb_bool lit : 1; + uint8_t prev; uint16_t len; const char *name; } symbol_name; -static inline khint_t -sym_hash_func(mrb_state *mrb, mrb_sym s) -{ - khint_t h = 0; - size_t i, len = mrb->symtbl[s].len; - const char *p = mrb->symtbl[s].name; - - for (i=0; i<len; i++) { - h = (h << 5) - h + *p++; - } - return h; -} -#define sym_hash_equal(mrb,a, b) (mrb->symtbl[a].len == mrb->symtbl[b].len && memcmp(mrb->symtbl[a].name, mrb->symtbl[b].name, mrb->symtbl[a].len) == 0) - -KHASH_DECLARE(n2s, mrb_sym, mrb_sym, FALSE) -KHASH_DEFINE (n2s, mrb_sym, mrb_sym, FALSE, sym_hash_func, sym_hash_equal) -/* ------------------------------------------------------ */ - static void sym_validate_len(mrb_state *mrb, size_t len) { @@ -45,24 +28,62 @@ sym_validate_len(mrb_state *mrb, size_t len) } } +uint8_t +symhash(const char *key, size_t len) +{ + uint32_t hash, i; + + for(hash = i = 0; i < len; ++i) { + hash += key[i]; + hash += (hash << 10); + hash ^= (hash >> 6); + } + hash += (hash << 3); + hash ^= (hash >> 11); + hash += (hash << 15); + return hash & 0xff; +} + +static mrb_sym +find_symbol(mrb_state *mrb, const char *name, uint16_t len, uint8_t hash) +{ + mrb_sym i; + symbol_name *sname; + + i = mrb->symhash[hash]; + if (i == 0) return 0; + do { + sname = &mrb->symtbl[i]; + if (sname->len == len && memcmp(sname->name, name, len) == 0) { + return i; + } + if (sname->prev == 0xff) { + i -= 0xff; + sname = &mrb->symtbl[i]; + while (mrb->symtbl < sname) { + if (sname->len == len && memcmp(sname->name, name, len) == 0) { + return (mrb_sym)(sname - mrb->symtbl); + } + sname--; + } + return 0; + } + i -= sname->prev; + } while (sname->prev > 0); + return 0; +} + static mrb_sym sym_intern(mrb_state *mrb, const char *name, size_t len, mrb_bool lit) { - khash_t(n2s) *h = mrb->name2sym; - symbol_name *sname = mrb->symtbl; /* symtbl[0] for working memory */ - khiter_t k; mrb_sym sym; - char *p; + symbol_name *sname; + uint8_t hash; sym_validate_len(mrb, len); - if (sname) { - sname->lit = lit; - sname->len = (uint16_t)len; - sname->name = name; - k = kh_get(n2s, mrb, h, 0); - if (k != kh_end(h)) - return kh_key(h, k); - } + hash = symhash(name, len); + sym = find_symbol(mrb, name, len, hash); + if (sym > 0) return sym; /* registering a new symbol */ sym = ++mrb->symidx; @@ -78,13 +99,23 @@ sym_intern(mrb_state *mrb, const char *name, size_t len, mrb_bool lit) sname->lit = TRUE; } else { - p = (char *)mrb_malloc(mrb, len+1); + char *p = (char *)mrb_malloc(mrb, len+1); memcpy(p, name, len); p[len] = 0; sname->name = (const char*)p; sname->lit = FALSE; } - kh_put(n2s, mrb, h, sym); + if (mrb->symhash[hash]) { + mrb_sym i = sym - mrb->symhash[hash]; + if (i > 0xff) + sname->prev = 0xff; + else + sname->prev = i; + } + else { + sname->prev = 0; + } + mrb->symhash[hash] = sym; return sym; } @@ -116,25 +147,18 @@ mrb_intern_str(mrb_state *mrb, mrb_value str) MRB_API mrb_value mrb_check_intern(mrb_state *mrb, const char *name, size_t len) { - khash_t(n2s) *h = mrb->name2sym; - symbol_name *sname = mrb->symtbl; - khiter_t k; + mrb_sym sym; sym_validate_len(mrb, len); - sname->len = (uint16_t)len; - sname->name = name; - - k = kh_get(n2s, mrb, h, 0); - if (k != kh_end(h)) { - return mrb_symbol_value(kh_key(h, k)); - } + sym = find_symbol(mrb, name, len, symhash(name, len)); + if (sym > 0) return mrb_symbol_value(sym); return mrb_nil_value(); } MRB_API mrb_value mrb_check_intern_cstr(mrb_state *mrb, const char *name) { - return mrb_check_intern(mrb, name, (mrb_int)strlen(name)); + return mrb_check_intern(mrb, name, strlen(name)); } MRB_API mrb_value @@ -143,7 +167,6 @@ mrb_check_intern_str(mrb_state *mrb, mrb_value str) return mrb_check_intern(mrb, RSTRING_PTR(str), RSTRING_LEN(str)); } -/* lenp must be a pointer to a size_t variable */ MRB_API const char* mrb_sym2name_len(mrb_state *mrb, mrb_sym sym, mrb_int *lenp) { @@ -167,13 +190,11 @@ mrb_free_symtbl(mrb_state *mrb) } } mrb_free(mrb, mrb->symtbl); - kh_destroy(n2s, mrb, mrb->name2sym); } void mrb_init_symtbl(mrb_state *mrb) { - mrb->name2sym = kh_init(n2s, mrb); } /********************************************************************** |
