diff options
Diffstat (limited to 'mrbgems/mruby-string-utf8')
| -rw-r--r-- | mrbgems/mruby-string-utf8/mrbgem.rake | 1 | ||||
| -rw-r--r-- | mrbgems/mruby-string-utf8/src/string.c | 133 | ||||
| -rw-r--r-- | mrbgems/mruby-string-utf8/test/string.rb | 41 |
3 files changed, 134 insertions, 41 deletions
diff --git a/mrbgems/mruby-string-utf8/mrbgem.rake b/mrbgems/mruby-string-utf8/mrbgem.rake index 86d0a6da3..7642d4e07 100644 --- a/mrbgems/mruby-string-utf8/mrbgem.rake +++ b/mrbgems/mruby-string-utf8/mrbgem.rake @@ -2,4 +2,5 @@ MRuby::Gem::Specification.new('mruby-string-utf8') do |spec| spec.license = 'MIT' spec.author = 'mruby developers' spec.summary = 'UTF-8 support in String class' + spec.add_dependency('mruby-string-ext', :core => 'mruby-string-ext') end diff --git a/mrbgems/mruby-string-utf8/src/string.c b/mrbgems/mruby-string-utf8/src/string.c index edda491fc..874fa8dbb 100644 --- a/mrbgems/mruby-string-utf8/src/string.c +++ b/mrbgems/mruby-string-utf8/src/string.c @@ -1,17 +1,12 @@ #include "mruby.h" #include "mruby/array.h" +#include "mruby/class.h" #include "mruby/string.h" #include "mruby/range.h" #include "mruby/re.h" #include <ctype.h> #include <string.h> -#define STR_EMBED_P(s) ((s)->flags & MRB_STR_EMBED) -#define STR_EMBED_LEN(s)\ - (mrb_int)(((s)->flags & MRB_STR_EMBED_LEN_MASK) >> MRB_STR_EMBED_LEN_SHIFT) -#define STR_PTR(s) ((STR_EMBED_P(s)) ? (s)->as.ary : (s)->as.heap.ptr) -#define STR_LEN(s) ((STR_EMBED_P(s)) ? STR_EMBED_LEN(s) : (mrb_int)(s)->as.heap.len) - static const char utf8len_codepage[256] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, @@ -36,27 +31,6 @@ static char utf8len_codepage_zero[256] = 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, }; -static const char isspacetable[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -#define ascii_isspace(c) isspacetable[(unsigned char)(c)] - static mrb_int utf8code(unsigned char* p) { @@ -127,9 +101,7 @@ mrb_utf8_strlen(mrb_value str, mrb_int len) static mrb_value mrb_str_size(mrb_state *mrb, mrb_value str) { - mrb_int size = mrb_utf8_strlen(str, -1); - - return mrb_fixnum_value(size); + return mrb_fixnum_value(mrb_utf8_strlen(str, -1)); } #define RSTRING_LEN_UTF8(s) mrb_utf8_strlen(s, -1) @@ -161,10 +133,10 @@ mrb_memsearch_qs(const unsigned char *xs, mrb_int m, const unsigned char *ys, mr qstable[i] = m + 1; for (; x < xe; ++x) qstable[*x] = xe - x; - /* Searching */ + /* Searching */ for (; y + m <= ys + n; y += *(qstable + y[m])) { if (*xs == *y && memcmp(xs, y, m) == 0) - return y - ys; + return y - ys; } return -1; } @@ -272,17 +244,17 @@ str_rindex(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos) mrb_int len = RSTRING_LEN(sub); /* substring longer than string */ - if (STR_LEN(ps) < len) return -1; - if (STR_LEN(ps) - pos < len) { - pos = STR_LEN(ps) - len; + if (RSTR_LEN(ps) < len) return -1; + if (RSTR_LEN(ps) - pos < len) { + pos = RSTR_LEN(ps) - len; } - sbeg = STR_PTR(ps); - s = STR_PTR(ps) + pos; + sbeg = RSTR_PTR(ps); + s = RSTR_PTR(ps) + pos; t = RSTRING_PTR(sub); if (len) { while (sbeg <= s) { if (memcmp(s, t, len) == 0) { - return s - STR_PTR(ps); + return s - RSTR_PTR(ps); } s--; } @@ -572,7 +544,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) if (mrb_string_p(spat)) { split_type = string; if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' '){ - split_type = awk; + split_type = awk; } } else { @@ -594,7 +566,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) int ai = mrb_gc_arena_save(mrb); c = (unsigned char)*ptr++; if (skip) { - if (ascii_isspace(c)) { + if (ISSPACE(c)) { beg = ptr - bptr; } else { @@ -603,7 +575,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) if (lim_p && lim <= i) break; } } - else if (ascii_isspace(c)) { + else if (ISSPACE(c)) { mrb_ary_push(mrb, result, str_subseq(mrb, str, beg, end-beg)); mrb_gc_arena_restore(mrb, ai); skip = 1; @@ -667,6 +639,80 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) return result; } +static mrb_value +mrb_str_chr(mrb_state *mrb, mrb_value self) +{ + return str_substr(mrb, self, 0, 1); +} + +static mrb_value +mrb_str_chars(mrb_state *mrb, mrb_value self) +{ + mrb_value result; + mrb_value blk; + int ai; + mrb_int len; + mrb_value arg; + char *p = RSTRING_PTR(self); + char *e = p + RSTRING_LEN(self); + + mrb_get_args(mrb, "&", &blk); + + result = mrb_ary_new(mrb); + + if (!mrb_nil_p(blk)) { + while (p < e) { + len = utf8len((unsigned char*) p); + arg = mrb_str_new(mrb, p, len); + mrb_yield_argv(mrb, blk, 1, &arg); + p += len; + } + return self; + } + while (p < e) { + ai = mrb_gc_arena_save(mrb); + len = utf8len((unsigned char*) p); + mrb_ary_push(mrb, result, mrb_str_new(mrb, p, len)); + mrb_gc_arena_restore(mrb, ai); + p += len; + } + return result; +} + +static mrb_value +mrb_str_codepoints(mrb_state *mrb, mrb_value self) +{ + mrb_value result; + mrb_value blk; + int ai; + mrb_int len; + mrb_value arg; + char *p = RSTRING_PTR(self); + char *e = p + RSTRING_LEN(self); + + mrb_get_args(mrb, "&", &blk); + + result = mrb_ary_new(mrb); + + if (!mrb_nil_p(blk)) { + while (p < e) { + len = utf8len((unsigned char*) p); + arg = mrb_fixnum_value(utf8code((unsigned char*) p)); + mrb_yield_argv(mrb, blk, 1, &arg); + p += len; + } + return self; + } + while (p < e) { + ai = mrb_gc_arena_save(mrb); + len = utf8len((unsigned char*) p); + mrb_ary_push(mrb, result, mrb_fixnum_value(utf8code((unsigned char*) p))); + mrb_gc_arena_restore(mrb, ai); + p += len; + } + return result; +} + void mrb_mruby_string_utf8_gem_init(mrb_state* mrb) { @@ -682,6 +728,11 @@ mrb_mruby_string_utf8_gem_init(mrb_state* mrb) mrb_define_method(mrb, s, "reverse", mrb_str_reverse, MRB_ARGS_NONE()); mrb_define_method(mrb, s, "reverse!", mrb_str_reverse_bang, MRB_ARGS_NONE()); mrb_define_method(mrb, s, "rindex", mrb_str_rindex_m, MRB_ARGS_ANY()); + mrb_define_method(mrb, s, "chr", mrb_str_chr, MRB_ARGS_NONE()); + mrb_define_method(mrb, s, "chars", mrb_str_chars, MRB_ARGS_NONE()); + mrb_alias_method(mrb, s, mrb_intern_lit(mrb, "each_char"), mrb_intern_lit(mrb, "chars")); + mrb_define_method(mrb, s, "codepoints", mrb_str_codepoints, MRB_ARGS_NONE()); + mrb_alias_method(mrb, s, mrb_intern_lit(mrb, "each_codepoint"), mrb_intern_lit(mrb, "codepoints")); mrb_define_method(mrb, mrb->fixnum_class, "chr", mrb_fixnum_chr, MRB_ARGS_NONE()); } diff --git a/mrbgems/mruby-string-utf8/test/string.rb b/mrbgems/mruby-string-utf8/test/string.rb index 1bfa8512c..5b4180037 100644 --- a/mrbgems/mruby-string-utf8/test/string.rb +++ b/mrbgems/mruby-string-utf8/test/string.rb @@ -66,3 +66,44 @@ assert('String#rindex') do assert_equal 12, str.rindex('ち') assert_equal 3, str.rindex('ち', 10) end + +assert('String#chr(utf-8)') do + assert_equal "こ", "こんにちは世界!".chr +end + +assert('String#chars') do + expect = ['こ', 'ん', 'に', 'ち', 'は', '世', '界', '!'] + assert_equal expect, "こんにちは世界!".chars + s = "" + "こんにちは世界!".chars do |x| + s += x + end + assert_equal "こんにちは世界!", s +end + +assert('String#each_char') do + expect = ['こ', 'ん', 'に', 'ち', 'は', '世', '界', '!'] + s = "" + "こんにちは世界!".each_char do |x| + s += x + end + assert_equal "こんにちは世界!", s +end +assert('String#codepoints') do + expect = [12371, 12435, 12395, 12385, 12399, 19990, 30028, 33] + assert_equal expect, "こんにちは世界!".codepoints + cp = [] + "こんにちは世界!".codepoints do |x| + cp << x + end + assert_equal expect, cp +end + +assert('String#each_codepoint') do + expect = [12371, 12435, 12395, 12385, 12399, 19990, 30028, 33] + cp = [] + "こんにちは世界!".each_codepoint do |x| + cp << x + end + assert_equal expect, cp +end |
