From ba5a995c23cbac1daafe538a84d8498c8ee1746c Mon Sep 17 00:00:00 2001 From: mattn Date: Thu, 17 Apr 2014 19:47:01 +0900 Subject: Handle utf-8 code in index/rindex --- mrbgems/mruby-string-utf8/src/string.c | 166 +++++++++++++++++++++++++++++---- 1 file changed, 150 insertions(+), 16 deletions(-) (limited to 'mrbgems/mruby-string-utf8') diff --git a/mrbgems/mruby-string-utf8/src/string.c b/mrbgems/mruby-string-utf8/src/string.c index 7f323415f..35a853d83 100644 --- a/mrbgems/mruby-string-utf8/src/string.c +++ b/mrbgems/mruby-string-utf8/src/string.c @@ -5,6 +5,12 @@ #include #include +#define STR_EMBED_P(s) ((s)->flags & MRB_STR_EMBED) +#define STR_EMBED_LEN(s)\ + (size_t)(((s)->flags & MRB_STR_EMBED_LEN_MASK) >> MRB_STR_EMBED_LEN_SHIFT) +#define STR_PTR(s) ((STR_EMBED_P(s)) ? (s)->as.ary : (s)->as.heap.ptr) +#define STR_LEN(s) ((STR_EMBED_P(s)) ? STR_EMBED_LEN(s) : (size_t)(s)->as.heap.len) + static const char utf8len_codepage[256] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, @@ -17,6 +23,8 @@ static const char utf8len_codepage[256] = 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1, }; +static mrb_value mrb_fixnum_chr(mrb_state*, mrb_value); + static mrb_int utf8len(unsigned char* p) { @@ -119,7 +127,6 @@ str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) mrb_int i; unsigned char *p = (unsigned char*) RSTRING_PTR(str), *t; unsigned char *e = p + RSTRING_LEN(str); - for (i = 0; i < beg && p 0) + sub = argv[0]; + else + sub = mrb_nil_value(); + + } + regexp_check(mrb, sub); + if (pos < 0) { + pos += RSTRING_LEN(str); + if (pos < 0) { + return mrb_nil_value(); + } + } + + if (mrb_type(sub) == MRB_TT_FIXNUM) { + sub = mrb_fixnum_chr(mrb, sub); + } + + switch (mrb_type(sub)) { + default: { + mrb_value tmp; + + tmp = mrb_check_string_type(mrb, sub); + if (mrb_nil_p(tmp)) { + mrb_raisef(mrb, E_TYPE_ERROR, "type mismatch: %S given", sub); + } + sub = tmp; + } + /* fall through */ + case MRB_TT_STRING: + pos = str_index(mrb, str, sub, pos); + break; + } + + if (pos == -1) return mrb_nil_value(); + return mrb_fixnum_value(mrb_utf8_strlen(str, pos)); } static mrb_value @@ -269,11 +345,12 @@ mrb_str_reverse_bang(mrb_state *mrb, mrb_value str) char *buf = (char *)mrb_malloc(mrb, (size_t)len); unsigned char* p = (unsigned char*)buf; unsigned char* e = (unsigned char*)buf + len; - unsigned char* r = (unsigned char*)RSTRING_END(str); - + unsigned char* r; + memcpy(buf, RSTRING_PTR(str), len); mrb_str_modify(mrb, mrb_str_ptr(str)); - + r = (unsigned char*)RSTRING_PTR(str) + len; + while (p len) pos = len; + } + else { + pos = len; + if (argc > 0) + sub = argv[0]; + else + sub = mrb_nil_value(); + } + regexp_check(mrb, sub); + + if (mrb_type(sub) == MRB_TT_FIXNUM) { + sub = mrb_fixnum_chr(mrb, sub); + } + + switch (mrb_type(sub)) { + default: { + mrb_value tmp; + + tmp = mrb_check_string_type(mrb, sub); + if (mrb_nil_p(tmp)) { + mrb_raisef(mrb, E_TYPE_ERROR, "type mismatch: %S given", sub); + } + sub = tmp; + } + /* fall through */ + case MRB_TT_STRING: + pos = str_rindex(mrb, str, sub, pos); + break; + } + + if (pos == -1) return mrb_nil_value(); + return mrb_fixnum_value(mrb_utf8_strlen(str, pos)); +} + static mrb_value mrb_str_reverse(mrb_state *mrb, mrb_value str) { @@ -334,11 +467,12 @@ mrb_mruby_string_utf8_gem_init(mrb_state* mrb) mrb_define_method(mrb, s, "size", mrb_str_size, MRB_ARGS_NONE()); mrb_define_method(mrb, s, "length", mrb_str_size, MRB_ARGS_NONE()); - mrb_define_method(mrb, s, "index", mrb_str_index, MRB_ARGS_ANY()); + mrb_define_method(mrb, s, "index", mrb_str_index_m, MRB_ARGS_ANY()); mrb_define_method(mrb, s, "[]", mrb_str_aref_m, MRB_ARGS_ANY()); mrb_define_method(mrb, s, "slice", mrb_str_aref_m, MRB_ARGS_ANY()); - mrb_define_method(mrb, s, "reverse", mrb_str_reverse, MRB_ARGS_NONE()); + mrb_define_method(mrb, s, "reverse", mrb_str_reverse, MRB_ARGS_NONE()); mrb_define_method(mrb, s, "reverse!", mrb_str_reverse_bang, MRB_ARGS_NONE()); + mrb_define_method(mrb, s, "rindex", mrb_str_rindex_m, MRB_ARGS_ANY()); mrb_define_method(mrb, mrb->fixnum_class, "chr", mrb_fixnum_chr, MRB_ARGS_NONE()); } -- cgit v1.2.3