diff options
| -rw-r--r-- | lib/mruby/build.rb | 19 | ||||
| -rw-r--r-- | src/string.c | 76 | ||||
| -rw-r--r-- | test/t/string.rb | 11 |
3 files changed, 86 insertions, 20 deletions
diff --git a/lib/mruby/build.rb b/lib/mruby/build.rb index 887a5518e..d9d52948b 100644 --- a/lib/mruby/build.rb +++ b/lib/mruby/build.rb @@ -22,7 +22,6 @@ module MRuby def initialize(name, &block) @name, @initializer = name.to_s, block - MRuby::Toolchain.toolchains ||= {} MRuby::Toolchain.toolchains[@name] = self end @@ -30,13 +29,8 @@ module MRuby conf.instance_exec(conf, params, &@initializer) end - def self.load - Dir.glob("#{MRUBY_ROOT}/tasks/toolchains/*.rake").each do |file| - Kernel.load file - end - end + self.toolchains = {} end - Toolchain.load class Build class << self @@ -196,10 +190,15 @@ EOS end def toolchain(name, params={}) - tc = Toolchain.toolchains[name.to_s] - fail "Unknown #{name} toolchain" unless tc + name = name.to_s + tc = Toolchain.toolchains[name] || begin + path = "#{MRUBY_ROOT}/tasks/toolchains/#{name}.rake" + fail "Unknown #{name} toolchain" unless File.exist?(path) + load path + Toolchain.toolchains[name] + end tc.setup(self, params) - @toolchains.unshift name.to_s + @toolchains.unshift name end def primary_toolchain diff --git a/src/string.c b/src/string.c index 078c028d8..056348921 100644 --- a/src/string.c +++ b/src/string.c @@ -83,7 +83,7 @@ str_new(mrb_state *mrb, const char *p, size_t len) } static inline void -str_with_class(mrb_state *mrb, struct RString *s, mrb_value obj) +str_with_class(struct RString *s, mrb_value obj) { s->c = mrb_str_ptr(obj)->c; } @@ -93,7 +93,7 @@ mrb_str_new_empty(mrb_state *mrb, mrb_value str) { struct RString *s = str_new(mrb, 0, 0); - str_with_class(mrb, s, str); + str_with_class(s, str); return mrb_obj_value(s); } @@ -304,12 +304,71 @@ bytes2chars(char *p, mrb_int bi) return i; } +static mrb_int +str_index_str_by_char_search(mrb_state *mrb, const char *p, const char *pend, const char *s, const mrb_int slen, mrb_int off) +{ + /* Based on Quick Search algorithm (Boyer-Moore-Horspool algorithm) */ + + ptrdiff_t qstable[1 << CHAR_BIT]; + + /* Preprocessing */ + { + size_t i; + + for (i = 0; i < 1 << CHAR_BIT; i ++) { + qstable[i] = slen; + } + for (i = 0; i < slen; i ++) { + qstable[(unsigned char)s[i]] = slen - (i + 1); + } + } + + /* Searching */ + while (p < pend && pend - p >= slen) { + const char *pivot; + + if (memcmp(p, s, slen) == 0) { + return off; + } + + pivot = p + qstable[(unsigned char)p[slen - 1]]; + if (pivot > pend || pivot < p /* overflowed */) { return -1; } + + do { + p += utf8len(p, pend); + off ++; + } while (p < pivot); + } + + return -1; +} + +static mrb_int +str_index_str_by_char(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos) +{ + const char *p = RSTRING_PTR(str); + const char *pend = p + RSTRING_LEN(str); + const char *s = RSTRING_PTR(sub); + const mrb_int slen = RSTRING_LEN(sub); + mrb_int off = pos; + + for (; pos > 0; pos --) { + if (pend - p < 1) { return -1; } + p += utf8len(p, pend); + } + + if (slen < 1) { return off; } + + return str_index_str_by_char_search(mrb, p, pend, s, slen, off); +} + #define BYTES_ALIGN_CHECK(pos) if (pos < 0) return mrb_nil_value(); #else #define RSTRING_CHAR_LEN(s) RSTRING_LEN(s) #define chars2bytes(p, off, ci) (ci) #define bytes2chars(p, bi) (bi) #define BYTES_ALIGN_CHECK(pos) +#define str_index_str_by_char(mrb, str, sub, pos) str_index_str(mrb, str, sub, pos) #endif static inline mrb_int @@ -830,7 +889,7 @@ mrb_str_times(mrb_state *mrb, mrb_value self) len = RSTRING_LEN(self)*times; str2 = str_new(mrb, 0, len); - str_with_class(mrb, str2, self); + str_with_class(str2, self); p = RSTR_PTR(str2); if (len > 0) { n = RSTRING_LEN(self); @@ -1011,7 +1070,7 @@ mrb_str_dup(mrb_state *mrb, mrb_value str) struct RString *s = mrb_str_ptr(str); struct RString *dup = str_new(mrb, 0, 0); - str_with_class(mrb, dup, str); + str_with_class(dup, str); return str_replace(mrb, dup, s); } @@ -1680,15 +1739,13 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str) else sub = mrb_nil_value(); } - clen = RSTRING_CHAR_LEN(str); if (pos < 0) { + clen = RSTRING_CHAR_LEN(str); pos += clen; if (pos < 0) { return mrb_nil_value(); } } - if (pos > clen) return mrb_nil_value(); - pos = chars2bytes(str, 0, pos); switch (mrb_type(sub)) { default: { @@ -1702,12 +1759,11 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str) } /* fall through */ case MRB_TT_STRING: - pos = str_index_str(mrb, str, sub, pos); + pos = str_index_str_by_char(mrb, str, sub, pos); break; } if (pos == -1) return mrb_nil_value(); - pos = bytes2chars(RSTRING_PTR(str), pos); BYTES_ALIGN_CHECK(pos); return mrb_fixnum_value(pos); } @@ -2561,7 +2617,7 @@ mrb_str_dump(mrb_state *mrb, mrb_value str) } result = str_new(mrb, 0, len); - str_with_class(mrb, result, str); + str_with_class(result, str); p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str); q = RSTR_PTR(result); *q++ = '"'; diff --git a/test/t/string.rb b/test/t/string.rb index 87d60ed72..cf145f97e 100644 --- a/test/t/string.rb +++ b/test/t/string.rb @@ -468,6 +468,17 @@ assert('String#index', '15.2.10.5.22') do assert_equal nil, "hello".index("", 6) end +assert('String#index(UTF-8)', '15.2.10.5.22') do + assert_equal 0, '⓿➊➋➌➍➎'.index('⓿') + assert_nil '⓿➊➋➌➍➎'.index('➓') + assert_equal 6, '⓿➊➋➌➍➎⓿➊➋➌➍➎'.index('⓿', 1) + assert_equal 6, "⓿➊➋➌➍➎".index("", 6) + assert_equal nil, "⓿➊➋➌➍➎".index("", 7) + assert_equal 0, '⓿➊➋➌➍➎'.index("\xe2") + assert_equal nil, '⓿➊➋➌➍➎'.index("\xe3") + assert_equal 6, "\xd1\xd1\xd1\xd1\xd1\xd1⓿➊➋➌➍➎".index('⓿') +end if UTF8STRING + assert('String#initialize', '15.2.10.5.23') do a = '' a.initialize('abc') |
