summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--lib/mruby/build.rb19
-rw-r--r--src/string.c76
-rw-r--r--test/t/string.rb11
3 files changed, 86 insertions, 20 deletions
diff --git a/lib/mruby/build.rb b/lib/mruby/build.rb
index 887a5518e..d9d52948b 100644
--- a/lib/mruby/build.rb
+++ b/lib/mruby/build.rb
@@ -22,7 +22,6 @@ module MRuby
def initialize(name, &block)
@name, @initializer = name.to_s, block
- MRuby::Toolchain.toolchains ||= {}
MRuby::Toolchain.toolchains[@name] = self
end
@@ -30,13 +29,8 @@ module MRuby
conf.instance_exec(conf, params, &@initializer)
end
- def self.load
- Dir.glob("#{MRUBY_ROOT}/tasks/toolchains/*.rake").each do |file|
- Kernel.load file
- end
- end
+ self.toolchains = {}
end
- Toolchain.load
class Build
class << self
@@ -196,10 +190,15 @@ EOS
end
def toolchain(name, params={})
- tc = Toolchain.toolchains[name.to_s]
- fail "Unknown #{name} toolchain" unless tc
+ name = name.to_s
+ tc = Toolchain.toolchains[name] || begin
+ path = "#{MRUBY_ROOT}/tasks/toolchains/#{name}.rake"
+ fail "Unknown #{name} toolchain" unless File.exist?(path)
+ load path
+ Toolchain.toolchains[name]
+ end
tc.setup(self, params)
- @toolchains.unshift name.to_s
+ @toolchains.unshift name
end
def primary_toolchain
diff --git a/src/string.c b/src/string.c
index 078c028d8..056348921 100644
--- a/src/string.c
+++ b/src/string.c
@@ -83,7 +83,7 @@ str_new(mrb_state *mrb, const char *p, size_t len)
}
static inline void
-str_with_class(mrb_state *mrb, struct RString *s, mrb_value obj)
+str_with_class(struct RString *s, mrb_value obj)
{
s->c = mrb_str_ptr(obj)->c;
}
@@ -93,7 +93,7 @@ mrb_str_new_empty(mrb_state *mrb, mrb_value str)
{
struct RString *s = str_new(mrb, 0, 0);
- str_with_class(mrb, s, str);
+ str_with_class(s, str);
return mrb_obj_value(s);
}
@@ -304,12 +304,71 @@ bytes2chars(char *p, mrb_int bi)
return i;
}
+static mrb_int
+str_index_str_by_char_search(mrb_state *mrb, const char *p, const char *pend, const char *s, const mrb_int slen, mrb_int off)
+{
+ /* Based on Quick Search algorithm (Boyer-Moore-Horspool algorithm) */
+
+ ptrdiff_t qstable[1 << CHAR_BIT];
+
+ /* Preprocessing */
+ {
+ size_t i;
+
+ for (i = 0; i < 1 << CHAR_BIT; i ++) {
+ qstable[i] = slen;
+ }
+ for (i = 0; i < slen; i ++) {
+ qstable[(unsigned char)s[i]] = slen - (i + 1);
+ }
+ }
+
+ /* Searching */
+ while (p < pend && pend - p >= slen) {
+ const char *pivot;
+
+ if (memcmp(p, s, slen) == 0) {
+ return off;
+ }
+
+ pivot = p + qstable[(unsigned char)p[slen - 1]];
+ if (pivot > pend || pivot < p /* overflowed */) { return -1; }
+
+ do {
+ p += utf8len(p, pend);
+ off ++;
+ } while (p < pivot);
+ }
+
+ return -1;
+}
+
+static mrb_int
+str_index_str_by_char(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos)
+{
+ const char *p = RSTRING_PTR(str);
+ const char *pend = p + RSTRING_LEN(str);
+ const char *s = RSTRING_PTR(sub);
+ const mrb_int slen = RSTRING_LEN(sub);
+ mrb_int off = pos;
+
+ for (; pos > 0; pos --) {
+ if (pend - p < 1) { return -1; }
+ p += utf8len(p, pend);
+ }
+
+ if (slen < 1) { return off; }
+
+ return str_index_str_by_char_search(mrb, p, pend, s, slen, off);
+}
+
#define BYTES_ALIGN_CHECK(pos) if (pos < 0) return mrb_nil_value();
#else
#define RSTRING_CHAR_LEN(s) RSTRING_LEN(s)
#define chars2bytes(p, off, ci) (ci)
#define bytes2chars(p, bi) (bi)
#define BYTES_ALIGN_CHECK(pos)
+#define str_index_str_by_char(mrb, str, sub, pos) str_index_str(mrb, str, sub, pos)
#endif
static inline mrb_int
@@ -830,7 +889,7 @@ mrb_str_times(mrb_state *mrb, mrb_value self)
len = RSTRING_LEN(self)*times;
str2 = str_new(mrb, 0, len);
- str_with_class(mrb, str2, self);
+ str_with_class(str2, self);
p = RSTR_PTR(str2);
if (len > 0) {
n = RSTRING_LEN(self);
@@ -1011,7 +1070,7 @@ mrb_str_dup(mrb_state *mrb, mrb_value str)
struct RString *s = mrb_str_ptr(str);
struct RString *dup = str_new(mrb, 0, 0);
- str_with_class(mrb, dup, str);
+ str_with_class(dup, str);
return str_replace(mrb, dup, s);
}
@@ -1680,15 +1739,13 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str)
else
sub = mrb_nil_value();
}
- clen = RSTRING_CHAR_LEN(str);
if (pos < 0) {
+ clen = RSTRING_CHAR_LEN(str);
pos += clen;
if (pos < 0) {
return mrb_nil_value();
}
}
- if (pos > clen) return mrb_nil_value();
- pos = chars2bytes(str, 0, pos);
switch (mrb_type(sub)) {
default: {
@@ -1702,12 +1759,11 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str)
}
/* fall through */
case MRB_TT_STRING:
- pos = str_index_str(mrb, str, sub, pos);
+ pos = str_index_str_by_char(mrb, str, sub, pos);
break;
}
if (pos == -1) return mrb_nil_value();
- pos = bytes2chars(RSTRING_PTR(str), pos);
BYTES_ALIGN_CHECK(pos);
return mrb_fixnum_value(pos);
}
@@ -2561,7 +2617,7 @@ mrb_str_dump(mrb_state *mrb, mrb_value str)
}
result = str_new(mrb, 0, len);
- str_with_class(mrb, result, str);
+ str_with_class(result, str);
p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
q = RSTR_PTR(result);
*q++ = '"';
diff --git a/test/t/string.rb b/test/t/string.rb
index 87d60ed72..cf145f97e 100644
--- a/test/t/string.rb
+++ b/test/t/string.rb
@@ -468,6 +468,17 @@ assert('String#index', '15.2.10.5.22') do
assert_equal nil, "hello".index("", 6)
end
+assert('String#index(UTF-8)', '15.2.10.5.22') do
+ assert_equal 0, '⓿➊➋➌➍➎'.index('⓿')
+ assert_nil '⓿➊➋➌➍➎'.index('➓')
+ assert_equal 6, '⓿➊➋➌➍➎⓿➊➋➌➍➎'.index('⓿', 1)
+ assert_equal 6, "⓿➊➋➌➍➎".index("", 6)
+ assert_equal nil, "⓿➊➋➌➍➎".index("", 7)
+ assert_equal 0, '⓿➊➋➌➍➎'.index("\xe2")
+ assert_equal nil, '⓿➊➋➌➍➎'.index("\xe3")
+ assert_equal 6, "\xd1\xd1\xd1\xd1\xd1\xd1⓿➊➋➌➍➎".index('⓿')
+end if UTF8STRING
+
assert('String#initialize', '15.2.10.5.23') do
a = ''
a.initialize('abc')