diff options
Diffstat (limited to 'mrbgems/mruby-string-ext/src/string.c')
| -rw-r--r-- | mrbgems/mruby-string-ext/src/string.c | 314 |
1 files changed, 309 insertions, 5 deletions
diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c index f04f12c4b..c2ee1fc23 100644 --- a/mrbgems/mruby-string-ext/src/string.c +++ b/mrbgems/mruby-string-ext/src/string.c @@ -1,9 +1,9 @@ -#include <ctype.h> #include <string.h> -#include "mruby.h" -#include "mruby/array.h" -#include "mruby/class.h" -#include "mruby/string.h" +#include <mruby.h> +#include <mruby/array.h> +#include <mruby/class.h> +#include <mruby/string.h> +#include <mruby/range.h> static mrb_value mrb_str_getbyte(mrb_state *mrb, mrb_value str) @@ -19,6 +19,59 @@ mrb_str_getbyte(mrb_state *mrb, mrb_value str) return mrb_fixnum_value((unsigned char)RSTRING_PTR(str)[pos]); } +static mrb_value +mrb_str_setbyte(mrb_state *mrb, mrb_value str) +{ + mrb_int pos, byte; + long len = RSTRING_LEN(str); + + mrb_get_args(mrb, "ii", &pos, &byte); + + if (pos < -len || len <= pos) + mrb_raisef(mrb, E_INDEX_ERROR, "index %S is out of array", mrb_fixnum_value(pos)); + if (pos < 0) + pos += len; + + mrb_str_modify(mrb, mrb_str_ptr(str)); + byte &= 0xff; + RSTRING_PTR(str)[pos] = byte; + return mrb_fixnum_value((unsigned char)byte); +} + +static mrb_value +mrb_str_byteslice(mrb_state *mrb, mrb_value str) +{ + mrb_value a1; + mrb_int len; + int argc; + + argc = mrb_get_args(mrb, "o|i", &a1, &len); + if (argc == 2) { + return mrb_str_substr(mrb, str, mrb_fixnum(a1), len); + } + switch (mrb_type(a1)) { + case MRB_TT_RANGE: + { + mrb_int beg; + + len = RSTRING_LEN(str); + if (mrb_range_beg_len(mrb, a1, &beg, &len, len)) { + return mrb_str_substr(mrb, str, beg, len); + } + return mrb_nil_value(); + } + case MRB_TT_FLOAT: + a1 = mrb_fixnum_value((mrb_int)mrb_float(a1)); + /* fall through */ + case MRB_TT_FIXNUM: + return mrb_str_substr(mrb, str, mrb_fixnum(a1), 1); + default: + mrb_raise(mrb, E_TYPE_ERROR, "wrong type of argument"); + } + /* not reached */ + return mrb_nil_value(); +} + /* * call-seq: * str.swapcase! -> str or nil @@ -192,6 +245,51 @@ mrb_str_chr(mrb_state *mrb, mrb_value self) return mrb_str_substr(mrb, self, 0, 1); } +static mrb_value +mrb_fixnum_chr(mrb_state *mrb, mrb_value num) +{ + mrb_int cp = mrb_fixnum(num); +#ifdef MRB_UTF8_STRING + char utf8[4]; + mrb_int len; + + if (cp < 0 || 0x10FFFF < cp) { + mrb_raisef(mrb, E_RANGE_ERROR, "%S out of char range", num); + } + if (cp < 0x80) { + utf8[0] = (char)cp; + len = 1; + } + else if (cp < 0x800) { + utf8[0] = (char)(0xC0 | (cp >> 6)); + utf8[1] = (char)(0x80 | (cp & 0x3F)); + len = 2; + } + else if (cp < 0x10000) { + utf8[0] = (char)(0xE0 | (cp >> 12)); + utf8[1] = (char)(0x80 | ((cp >> 6) & 0x3F)); + utf8[2] = (char)(0x80 | ( cp & 0x3F)); + len = 3; + } + else { + utf8[0] = (char)(0xF0 | (cp >> 18)); + utf8[1] = (char)(0x80 | ((cp >> 12) & 0x3F)); + utf8[2] = (char)(0x80 | ((cp >> 6) & 0x3F)); + utf8[3] = (char)(0x80 | ( cp & 0x3F)); + len = 4; + } + return mrb_str_new(mrb, utf8, len); +#else + char c; + + if (cp < 0 || 0xff < cp) { + mrb_raisef(mrb, E_RANGE_ERROR, "%S out of char range", num); + } + c = (char)cp; + return mrb_str_new(mrb, &c, 1); +#endif +} + /* * call-seq: * string.lines -> array of string @@ -239,6 +337,202 @@ mrb_str_lines(mrb_state *mrb, mrb_value self) return result; } +/* + * call-seq: + * string.succ -> string + * + * Returns next sequence of the string; + * + * a = "abc" + * a.succ #=> "abd" + */ +static mrb_value +mrb_str_succ_bang(mrb_state *mrb, mrb_value self) +{ + mrb_value result; + unsigned char *p, *e, *b, *t; + const char *prepend; + struct RString *s = mrb_str_ptr(self); + size_t l; + + if (RSTRING_LEN(self) == 0) + return self; + + mrb_str_modify(mrb, s); + l = RSTRING_LEN(self); + b = p = (unsigned char*) RSTRING_PTR(self); + t = e = p + l; + *(e--) = 0; + + // find trailing ascii/number + while (e >= b) { + if (ISALNUM(*e)) + break; + e--; + } + if (e < b) { + e = p + l - 1; + result = mrb_str_new_lit(mrb, ""); + } else { + // find leading letter of the ascii/number + b = e; + while (b > p) { + if (!ISALNUM(*b) || (ISALNUM(*b) && *b != '9' && *b != 'z' && *b != 'Z')) + break; + b--; + } + if (!ISALNUM(*b)) + b++; + result = mrb_str_new(mrb, (char*) p, b - p); + } + + while (e >= b) { + if (!ISALNUM(*e)) { + if (*e == 0xff) { + mrb_str_cat_lit(mrb, result, "\x01"); + (*e) = 0; + } else + (*e)++; + break; + } + prepend = NULL; + if (*e == '9') { + if (e == b) prepend = "1"; + *e = '0'; + } else if (*e == 'z') { + if (e == b) prepend = "a"; + *e = 'a'; + } else if (*e == 'Z') { + if (e == b) prepend = "A"; + *e = 'A'; + } else { + (*e)++; + break; + } + if (prepend) mrb_str_cat_cstr(mrb, result, prepend); + e--; + } + result = mrb_str_cat(mrb, result, (char*) b, t - b); + l = RSTRING_LEN(result); + mrb_str_resize(mrb, self, l); + memcpy(RSTRING_PTR(self), RSTRING_PTR(result), l); + return self; +} + +static mrb_value +mrb_str_succ(mrb_state *mrb, mrb_value self) +{ + mrb_value str; + + str = mrb_str_dup(mrb, self); + mrb_str_succ_bang(mrb, str); + return str; +} + +/* + * call-seq: + * str.prepend(other_str) -> str + * + * Prepend---Prepend the given string to <i>str</i>. + * + * a = "world" + * a.prepend("hello ") #=> "hello world" + * a #=> "hello world" + */ +static mrb_value +mrb_str_prepend(mrb_state *mrb, mrb_value self) +{ + struct RString *s1 = mrb_str_ptr(self), *s2, *temp_s; + mrb_int len; + mrb_value other, temp_str; + + mrb_get_args(mrb, "S", &other); + + mrb_str_modify(mrb, s1); + if (!mrb_string_p(other)) { + other = mrb_str_to_str(mrb, other); + } + s2 = mrb_str_ptr(other); + len = RSTR_LEN(s1) + RSTR_LEN(s2); + temp_str = mrb_str_new(mrb, NULL, RSTR_LEN(s1)); + temp_s = mrb_str_ptr(temp_str); + memcpy(RSTR_PTR(temp_s), RSTR_PTR(s1), RSTR_LEN(s1)); + if (RSTRING_CAPA(self) < len) { + mrb_str_resize(mrb, self, len); + } + memcpy(RSTR_PTR(s1), RSTR_PTR(s2), RSTR_LEN(s2)); + memcpy(RSTR_PTR(s1) + RSTR_LEN(s2), RSTR_PTR(temp_s), RSTR_LEN(temp_s)); + RSTR_SET_LEN(s1, len); + RSTR_PTR(s1)[len] = '\0'; + return self; +} + +#ifdef MRB_UTF8_STRING +static const char utf8len_codepage_zero[256] = +{ + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, +}; + +static mrb_int +utf8code(unsigned char* p) +{ + mrb_int len; + + if (p[0] < 0x80) + return p[0]; + + len = utf8len_codepage_zero[p[0]]; + if (len > 1 && (p[1] & 0xc0) == 0x80) { + if (len == 2) + return ((p[0] & 0x1f) << 6) + (p[1] & 0x3f); + if ((p[2] & 0xc0) == 0x80) { + if (len == 3) + return ((p[0] & 0x0f) << 12) + ((p[1] & 0x3f) << 6) + + (p[2] & 0x3f); + if ((p[3] & 0xc0) == 0x80) { + if (len == 4) + return ((p[0] & 0x07) << 18) + ((p[1] & 0x3f) << 12) + + ((p[2] & 0x3f) << 6) + (p[3] & 0x3f); + if ((p[4] & 0xc0) == 0x80) { + if (len == 5) + return ((p[0] & 0x03) << 24) + ((p[1] & 0x3f) << 18) + + ((p[2] & 0x3f) << 12) + ((p[3] & 0x3f) << 6) + + (p[4] & 0x3f); + if ((p[5] & 0xc0) == 0x80 && len == 6) + return ((p[0] & 0x01) << 30) + ((p[1] & 0x3f) << 24) + + ((p[2] & 0x3f) << 18) + ((p[3] & 0x3f) << 12) + + ((p[4] & 0x3f) << 6) + (p[5] & 0x3f); + } + } + } + } + return p[0]; +} + +static mrb_value +mrb_str_ord(mrb_state* mrb, mrb_value str) +{ + if (RSTRING_LEN(str) == 0) + mrb_raise(mrb, E_ARGUMENT_ERROR, "empty string"); + return mrb_fixnum_value(utf8code((unsigned char*) RSTRING_PTR(str))); +} +#else +static mrb_value +mrb_str_ord(mrb_state* mrb, mrb_value str) +{ + if (RSTRING_LEN(str) == 0) + mrb_raise(mrb, E_ARGUMENT_ERROR, "empty string"); + return mrb_fixnum_value(RSTRING_PTR(str)[0]); +} +#endif + void mrb_mruby_string_ext_gem_init(mrb_state* mrb) { @@ -246,6 +540,8 @@ mrb_mruby_string_ext_gem_init(mrb_state* mrb) mrb_define_method(mrb, s, "dump", mrb_str_dump, MRB_ARGS_NONE()); mrb_define_method(mrb, s, "getbyte", mrb_str_getbyte, MRB_ARGS_REQ(1)); + mrb_define_method(mrb, s, "setbyte", mrb_str_setbyte, MRB_ARGS_REQ(2)); + mrb_define_method(mrb, s, "byteslice", mrb_str_byteslice, MRB_ARGS_REQ(1)|MRB_ARGS_OPT(1)); mrb_define_method(mrb, s, "swapcase!", mrb_str_swapcase_bang, MRB_ARGS_NONE()); mrb_define_method(mrb, s, "swapcase", mrb_str_swapcase, MRB_ARGS_NONE()); mrb_define_method(mrb, s, "concat", mrb_str_concat2, MRB_ARGS_REQ(1)); @@ -256,6 +552,14 @@ mrb_mruby_string_ext_gem_init(mrb_state* mrb) mrb_define_method(mrb, s, "oct", mrb_str_oct, MRB_ARGS_NONE()); mrb_define_method(mrb, s, "chr", mrb_str_chr, MRB_ARGS_NONE()); mrb_define_method(mrb, s, "lines", mrb_str_lines, MRB_ARGS_NONE()); + mrb_define_method(mrb, s, "succ", mrb_str_succ, MRB_ARGS_NONE()); + mrb_define_method(mrb, s, "succ!", mrb_str_succ_bang, MRB_ARGS_NONE()); + mrb_define_method(mrb, s, "prepend", mrb_str_prepend, MRB_ARGS_REQ(1)); + mrb_alias_method(mrb, s, mrb_intern_lit(mrb, "next"), mrb_intern_lit(mrb, "succ")); + mrb_alias_method(mrb, s, mrb_intern_lit(mrb, "next!"), mrb_intern_lit(mrb, "succ!")); + mrb_define_method(mrb, s, "ord", mrb_str_ord, MRB_ARGS_NONE()); + + mrb_define_method(mrb, mrb->fixnum_class, "chr", mrb_fixnum_chr, MRB_ARGS_NONE()); } void |
