diff options
Diffstat (limited to 'src/string.c')
| -rw-r--r-- | src/string.c | 512 |
1 files changed, 262 insertions, 250 deletions
diff --git a/src/string.c b/src/string.c index 26ed7b052..aef0ac88f 100644 --- a/src/string.c +++ b/src/string.c @@ -10,56 +10,65 @@ #include <string.h> #include "mruby/string.h" #include <ctype.h> -#include "mruby/numeric.h" +#include <limits.h> #include "mruby/range.h" #include "mruby/array.h" #include "mruby/class.h" -#include "mruby/variable.h" #include <stdio.h> +#ifdef ENABLE_REGEXP #include "re.h" -#ifdef INCLUDE_REGEXP #include "regex.h" -#include "st.h" -#endif //INCLUDE_REGEXP - -#ifndef FALSE -#define FALSE 0 -#endif - -#ifndef TRUE -#define TRUE 1 -#endif +#endif //ENABLE_REGEXP const char mrb_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz"; -#ifdef INCLUDE_REGEXP +#ifdef ENABLE_REGEXP static mrb_value get_pat(mrb_state *mrb, mrb_value pat, mrb_int quote); -#endif //INCLUDE_REGEXP +#endif //ENABLE_REGEXP static mrb_value str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2); -static mrb_value mrb_str_subseq(mrb_state *mrb, mrb_value str, long beg, long len); +static mrb_value mrb_str_subseq(mrb_state *mrb, mrb_value str, int beg, int len); #define RESIZE_CAPA(s,capacity) do {\ - s->buf = mrb_realloc(mrb, s->buf, (capacity)+1);\ + s->ptr = (char *)mrb_realloc(mrb, s->ptr, (capacity)+1);\ s->aux.capa = capacity;\ } while (0) +void +mrb_str_decref(mrb_state *mrb, struct mrb_shared_string *shared) +{ + shared->refcnt--; + if (shared->refcnt == 0) { + mrb_free(mrb, shared->ptr); + mrb_free(mrb, shared); + } +} + static void str_modify(mrb_state *mrb, struct RString *s) { if (s->flags & MRB_STR_SHARED) { - char *ptr, *p; - long len; + struct mrb_shared_string *shared = s->aux.shared; - p = s->buf; - len = s->len; - ptr = mrb_malloc(mrb, sizeof(char)*(len+1)); - if (p) { - memcpy(ptr, p, len); + if (shared->refcnt == 1 && s->ptr == shared->ptr) { + s->ptr = shared->ptr; + s->aux.capa = shared->len; + mrb_free(mrb, shared); + } + else { + char *ptr, *p; + long len; + + p = s->ptr; + len = s->len; + ptr = (char *)mrb_malloc(mrb, len+1); + if (p) { + memcpy(ptr, p, len); + } + ptr[len] = 0; + s->ptr = ptr; + s->aux.capa = len; + mrb_str_decref(mrb, shared); } - ptr[len] = 0; - s->buf = ptr; - s->len = len; - s->aux.capa = len; s->flags &= ~MRB_STR_SHARED; } } @@ -74,13 +83,11 @@ mrb_str_resize(mrb_state *mrb, mrb_value str, int len) slen = s->len; if (len != slen) { if (slen < len || slen -len > 1024) { - s->buf = mrb_realloc(mrb, s->buf, len+1); - } - if (!(s->flags & MRB_STR_SHARED)) { - s->aux.capa = len; + s->ptr = (char *)mrb_realloc(mrb, s->ptr, len+1); } + s->aux.capa = len; s->len = len; - s->buf[len] = '\0'; /* sentinel */ + s->ptr[len] = '\0'; /* sentinel */ } return str; } @@ -90,8 +97,8 @@ str_mod_check(mrb_state *mrb, mrb_value str, char *p, mrb_int len) { struct RString *s = mrb_str_ptr(str); - if (s->buf != p || s->len != len) { - mrb_raise(mrb, mrb->eRuntimeError_class, "string modified"); + if (s->ptr != p || s->len != len) { + mrb_raise(mrb, E_RUNTIME_ERROR, "string modified"); } } @@ -105,7 +112,7 @@ str_alloc(mrb_state *mrb, struct RClass *c) s = mrb_obj_alloc_string(mrb); s->c = c; - s->buf = 0; + s->ptr = 0; s->len = 0; s->aux.capa = 0; @@ -126,11 +133,11 @@ str_new(mrb_state *mrb, const char *p, int len) s->len = len; s->aux.capa = len; - s->buf = mrb_malloc(mrb, len+1); + s->ptr = (char *)mrb_malloc(mrb, len+1); if (p) { - memcpy(s->buf, p, len); + memcpy(s->ptr, p, len); } - s->buf[len] = '\0'; + s->ptr[len] = '\0'; return s; } @@ -161,8 +168,8 @@ mrb_str_buf_new(mrb_state *mrb, int capa) } s->len = 0; s->aux.capa = capa; - s->buf = mrb_malloc(mrb, capa+1); - s->buf[0] = '\0'; + s->ptr = (char *)mrb_malloc(mrb, capa+1); + s->ptr[0] = '\0'; return mrb_obj_value(s); } @@ -173,18 +180,18 @@ str_buf_cat(mrb_state *mrb, struct RString *s, const char *ptr, int len) long capa, total, off = -1; str_modify(mrb, s); - if (ptr >= s->buf && ptr <= s->buf + s->len) { - off = ptr - s->buf; + if (ptr >= s->ptr && ptr <= s->ptr + s->len) { + off = ptr - s->ptr; } if (len == 0) return; capa = s->aux.capa; - if (s->len >= LONG_MAX - len) { + if (s->len >= INT_MAX - len) { mrb_raise(mrb, E_ARGUMENT_ERROR, "string sizes too big"); } total = s->len+len; if (capa <= total) { while (total > capa) { - if (capa + 1 >= LONG_MAX / 2) { + if (capa + 1 >= INT_MAX / 2) { capa = (total + 4095) / 4096; break; } @@ -193,11 +200,11 @@ str_buf_cat(mrb_state *mrb, struct RString *s, const char *ptr, int len) RESIZE_CAPA(s, capa); } if (off != -1) { - ptr = s->buf + off; + ptr = s->ptr + off; } - memcpy(s->buf + s->len, ptr, len); + memcpy(s->ptr + s->len, ptr, len); s->len = total; - s->buf[total] = '\0'; /* sentinel */ + s->ptr[total] = '\0'; /* sentinel */ } mrb_value @@ -211,7 +218,9 @@ mrb_str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, int len) mrb_value mrb_str_new(mrb_state *mrb, const char *p, int len) { - struct RString *s = str_new(mrb, p, len); + struct RString *s; + + s = str_new(mrb, p, len); return mrb_obj_value(s); } @@ -240,39 +249,32 @@ mrb_str_new_cstr(mrb_state *mrb, const char *p) int len = strlen(p); s = mrb_obj_alloc_string(mrb); - s->buf = mrb_malloc(mrb, len+1); - memcpy(s->buf, p, len); - s->buf[len] = 0; + s->ptr = (char *)mrb_malloc(mrb, len+1); + memcpy(s->ptr, p, len); + s->ptr[len] = 0; s->len = len; s->aux.capa = len; return mrb_obj_value(s); } -static struct RString* -str_make_shared(mrb_state *mrb, mrb_value str) +static void +str_make_shared(mrb_state *mrb, struct RString *s) { - struct RString *orig, *s; + if (!(s->flags & MRB_STR_SHARED)) { + struct mrb_shared_string *shared = (struct mrb_shared_string *)mrb_malloc(mrb, sizeof(struct mrb_shared_string)); - s = str_new(mrb, 0, 0); - str_with_class(mrb, s, str); - orig = mrb_str_ptr(str); - if (!(orig->flags & MRB_STR_SHARED)) { - struct RString *shared = mrb_obj_alloc_string(mrb); - - shared->buf = orig->buf; - shared->len = orig->len; - shared->aux.capa = orig->aux.capa; - - orig->aux.shared = shared; - orig->flags |= MRB_STR_SHARED; + shared->refcnt = 1; + if (s->aux.capa > s->len) { + s->ptr = shared->ptr = (char *)mrb_realloc(mrb, s->ptr, s->len+1); + } + else { + shared->ptr = s->ptr; + } + shared->len = s->len; + s->aux.shared = shared; + s->flags |= MRB_STR_SHARED; } - s->buf = orig->buf; - s->len = orig->len; - s->aux.shared = orig->aux.shared; - s->flags |= MRB_STR_SHARED; - - return s; } /* @@ -285,16 +287,19 @@ str_make_shared(mrb_state *mrb, mrb_value str) mrb_value mrb_str_literal(mrb_state *mrb, mrb_value str) { - struct RString *orig, *s; + struct RString *s, *orig; + struct mrb_shared_string *shared; - s = str_new(mrb, 0, 0); + s = str_alloc(mrb, mrb->string_class); orig = mrb_str_ptr(str); - while (orig->flags & MRB_STR_SHARED) { - orig = orig->aux.shared; + if (!(orig->flags & MRB_STR_SHARED)) { + str_make_shared(mrb, mrb_str_ptr(str)); } - s->buf = orig->buf; - s->len = orig->len; - s->aux.shared = orig; + shared = orig->aux.shared; + shared->refcnt++; + s->ptr = shared->ptr; + s->len = shared->len; + s->aux.shared = shared; s->flags |= MRB_STR_SHARED; return mrb_obj_value(s); @@ -312,7 +317,7 @@ mrb_str_body(mrb_value str, int *len_p) struct RString *s = mrb_str_ptr(str); *len_p = s->len; - return s->buf; + return s->ptr; } /* @@ -336,11 +341,11 @@ mrb_str_concat(mrb_state *mrb, mrb_value self, mrb_value other) if (s1->aux.capa < len) { s1->aux.capa = len; - s1->buf = mrb_realloc(mrb, s1->buf, len+1); + s1->ptr = (char *)mrb_realloc(mrb, s1->ptr, len+1); } - memcpy(s1->buf+s1->len, s2->buf, s2->len); + memcpy(s1->ptr+s1->len, s2->ptr, s2->len); s1->len = len; - s1->buf[len] = 0; + s1->ptr[len] = 0; } /* @@ -357,8 +362,8 @@ mrb_str_plus(mrb_state *mrb, mrb_value a, mrb_value b) struct RString *t; t = str_new(mrb, 0, s->len + s2->len); - memcpy(t->buf, s->buf, s->len); - memcpy(t->buf + s->len, s2->buf, s2->len); + memcpy(t->ptr, s->ptr, s->len); + memcpy(t->ptr + s->len, s2->ptr, s2->len); return mrb_obj_value(t); } @@ -426,14 +431,14 @@ mrb_str_times(mrb_state *mrb, mrb_value self) if (times < 0) { mrb_raise(mrb, E_ARGUMENT_ERROR, "negative argument"); } - if (times && INT32_MAX/times < RSTRING_LEN(self)) { + if (times && INT_MAX/times < RSTRING_LEN(self)) { mrb_raise(mrb, E_ARGUMENT_ERROR, "argument too big"); } len = RSTRING_LEN(self)*times; str2 = str_new(mrb, 0, len); str_with_class(mrb, str2, self); - p = str2->buf; + p = str2->ptr; if (len > 0) { n = RSTRING_LEN(self); memcpy(p, RSTRING_PTR(self), n); @@ -468,7 +473,7 @@ mrb_str_cmp(mrb_state *mrb, mrb_value str1, mrb_value str2) struct RString *s2 = mrb_str_ptr(str2); len = lesser(s1->len, s2->len); - retval = memcmp(s1->buf, s2->buf, len); + retval = memcmp(s1->ptr, s2->ptr, len); if (retval == 0) { if (s1->len == s2->len) return 0; if (s1->len > s2->len) return 1; @@ -659,7 +664,7 @@ mrb_memsearch_qs(const unsigned char *xs, long m, const unsigned char *ys, long static int mrb_memsearch(const void *x0, int m, const void *y0, int n) { - const unsigned char *x = x0, *y = y0; + const unsigned char *x = (const unsigned char *)x0, *y = (const unsigned char *)y0; if (m > n) return -1; else if (m == n) { @@ -676,7 +681,7 @@ mrb_memsearch(const void *x0, int m, const void *y0, int n) } return -1; } - return mrb_memsearch_qs(x0, m, y0, n); + return mrb_memsearch_qs((const unsigned char *)x0, m, (const unsigned char *)y0, n); } static mrb_int @@ -712,7 +717,7 @@ mrb_str_dup(mrb_state *mrb, mrb_value str) /* should return shared string */ struct RString *s = mrb_str_ptr(str); - return mrb_str_new(mrb, s->buf, s->len); + return mrb_str_new(mrb, s->ptr, s->len); } static mrb_value @@ -730,12 +735,12 @@ num_index: return str; case MRB_TT_REGEX: -#ifdef INCLUDE_REGEXP +#ifdef ENABLE_REGEXP return mrb_str_subpat(mrb, str, indx, 0); //mrb_str_subpat(str, indx, INT2FIX(0)); #else mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported"); return mrb_nil_value(); -#endif //INCLUDE_REGEXP +#endif //ENABLE_REGEXP case MRB_TT_STRING: if (mrb_str_index(mrb, str, indx, 0) != -1) @@ -750,7 +755,7 @@ num_index: len = RSTRING_LEN(str); switch (mrb_range_beg_len(mrb, indx, &beg, &len, len, 0)) { - case 0/*FLASE*/: + case FALSE: break; case 2/*OTHER*/: return mrb_nil_value(); @@ -822,12 +827,12 @@ mrb_str_aref_m(mrb_state *mrb, mrb_value str) argc = mrb_get_args(mrb, "o|o", &a1, &a2); if (argc == 2) { if (mrb_type(a1) == MRB_TT_REGEX) { -#ifdef INCLUDE_REGEXP +#ifdef ENABLE_REGEXP return mrb_str_subpat(mrb, str, argv[0], mrb_fixnum(argv[1])); #else mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported"); return mrb_nil_value(); -#endif //INCLUDE_REGEXP +#endif //ENABLE_REGEXP } return mrb_str_substr(mrb, str, mrb_fixnum(a1), mrb_fixnum(a2)); } @@ -858,8 +863,8 @@ mrb_str_capitalize_bang(mrb_state *mrb, mrb_value str) struct RString *s = mrb_str_ptr(str); str_modify(mrb, s); - if (s->len == 0 || !s->buf) return mrb_nil_value(); - p = s->buf; pend = s->buf + s->len; + if (s->len == 0 || !s->ptr) return mrb_nil_value(); + p = s->ptr; pend = s->ptr + s->len; if (ISLOWER(*p)) { *p = toupper(*p); modify = 1; @@ -918,14 +923,14 @@ mrb_str_chomp_bang(mrb_state *mrb, mrb_value str) if (mrb_get_args(mrb, "|S", &rs) == 0) { if (len == 0) return mrb_nil_value(); smart_chomp: - if (s->buf[len-1] == '\n') { + if (s->ptr[len-1] == '\n') { s->len--; if (s->len > 0 && - s->buf[s->len-1] == '\r') { + s->ptr[s->len-1] == '\r') { s->len--; } } - else if (s->buf[len-1] == '\r') { + else if (s->ptr[len-1] == '\r') { s->len--; } else { @@ -935,7 +940,7 @@ mrb_str_chomp_bang(mrb_state *mrb, mrb_value str) } if (len == 0 || mrb_nil_p(rs)) return mrb_nil_value(); - p = s->buf; + p = s->ptr; rslen = RSTRING_LEN(rs); if (rslen == 0) { while (len>0 && p[len-1] == '\n') { @@ -1015,14 +1020,14 @@ mrb_str_chop_bang(mrb_state *mrb, mrb_value str) if (s->len > 0) { int len; len = s->len - 1; - if (s->buf[len] == '\n') { + if (s->ptr[len] == '\n') { if (len > 0 && - s->buf[len-1] == '\r') { + s->ptr[len-1] == '\r') { len--; } } s->len = len; - s->buf[len] = '\0'; + s->ptr[len] = '\0'; return str; } return mrb_nil_value(); @@ -1070,8 +1075,8 @@ mrb_str_downcase_bang(mrb_state *mrb, mrb_value str) struct RString *s = mrb_str_ptr(str); str_modify(mrb, s); - p = s->buf; - pend = s->buf + s->len; + p = s->ptr; + pend = s->ptr + s->len; while (p < pend) { if (ISUPPER(*p)) { *p = tolower(*p); @@ -1184,13 +1189,20 @@ mrb_str_eql(mrb_state *mrb, mrb_value self) } static mrb_value -mrb_str_subseq(mrb_state *mrb, mrb_value str, long beg, long len) +mrb_str_subseq(mrb_state *mrb, mrb_value str, int beg, int len) { - struct RString *s; + struct RString *orig, *s; + struct mrb_shared_string *shared; - s = str_make_shared(mrb, str); - s->buf += beg; + orig = mrb_str_ptr(str); + str_make_shared(mrb, orig); + shared = orig->aux.shared; + s = mrb_obj_alloc_string(mrb); + s->ptr = orig->ptr + beg; s->len = len; + s->aux.shared = shared; + s->flags |= MRB_STR_SHARED; + shared->refcnt++; return mrb_obj_value(s); } @@ -1226,7 +1238,7 @@ mrb_str_buf_append(mrb_state *mrb, mrb_value str, mrb_value str2) return str; } -#ifdef INCLUDE_REGEXP +#ifdef ENABLE_REGEXP static mrb_value str_gsub(mrb_state *mrb, mrb_value str, mrb_int bang) { @@ -1342,7 +1354,6 @@ str_gsub(mrb_state *mrb, mrb_value str, mrb_int bang) static mrb_value mrb_str_gsub(mrb_state *mrb, mrb_value self) { - //return str_gsub(argc, argv, self, 0); return str_gsub(mrb, self, 0); } @@ -1363,7 +1374,7 @@ mrb_str_gsub_bang(mrb_state *mrb, mrb_value self) str_modify(mrb, s); return str_gsub(mrb, s, 1); } -#endif //INCLUDE_REGEXP +#endif //ENABLE_REGEXP mrb_int mrb_str_hash(mrb_state *mrb, mrb_value str) @@ -1371,7 +1382,7 @@ mrb_str_hash(mrb_state *mrb, mrb_value str) /* 1-8-7 */ struct RString *s = mrb_str_ptr(str); long len = s->len; - char *p = s->buf; + char *p = s->ptr; mrb_int key = 0; while (len--) { @@ -1457,10 +1468,8 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str) int argc; mrb_value sub; - //mrb_value initpos; mrb_int pos; - //if (mrb_scan_args(argc, argv, "11", &sub, &initpos) == 2) { mrb_get_args(mrb, "*", &argv, &argc); if (argc == 2) { pos = mrb_fixnum(argv[1]); @@ -1486,7 +1495,7 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str) switch (mrb_type(sub)) { case MRB_TT_REGEX: -#ifdef INCLUDE_REGEXP +#ifdef ENABLE_REGEXP if (pos > RSTRING_LEN(str)) return mrb_nil_value(); pos = mrb_str_offset(mrb, str, pos); @@ -1494,7 +1503,7 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str) pos = mrb_str_sublen(mrb, str, pos); #else mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported"); -#endif //INCLUDE_REGEXP +#endif //ENABLE_REGEXP break; case MRB_TT_FIXNUM: { @@ -1528,16 +1537,43 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str) return mrb_fixnum_value(pos); } +#define STR_REPLACE_SHARED_MIN 10 + static mrb_value str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2) { - int len = s2->len; - - str_modify(mrb, s1); - s1->buf = mrb_realloc(mrb, s1->buf, len); - memcpy(s1->buf, s2->buf, len); - s1->len = s2->len; - s2->aux.capa = s2->len; + if (s2->flags & MRB_STR_SHARED) { + L_SHARE: + if (s1->flags & MRB_STR_SHARED){ + mrb_str_decref(mrb, s1->aux.shared); + } + else { + mrb_free(mrb, s1->ptr); + } + s1->ptr = s2->ptr; + s1->len = s2->len; + s1->aux.shared = s2->aux.shared; + s1->flags |= MRB_STR_SHARED; + s1->aux.shared->refcnt++; + } + else if (s2->len > STR_REPLACE_SHARED_MIN) { + str_make_shared(mrb, s2); + goto L_SHARE; + } + else { + if (s1->flags & MRB_STR_SHARED) { + mrb_str_decref(mrb, s1->aux.shared); + s1->flags &= ~MRB_STR_SHARED; + s1->ptr = (char *)mrb_malloc(mrb, s2->len+1); + } + else { + s1->ptr = (char *)mrb_realloc(mrb, s1->ptr, s2->len+1); + } + memcpy(s1->ptr, s2->ptr, s2->len); + s1->ptr[s2->len] = 0; + s1->len = s2->len; + s1->aux.capa = s2->len; + } return mrb_obj_value(s1); } @@ -1577,12 +1613,6 @@ mrb_str_init(mrb_state *mrb, mrb_value self) return self; } -mrb_sym -mrb_intern_str(mrb_state *mrb, mrb_value str) -{ - return mrb_intern(mrb, RSTRING_PTR(str)); -} - /* 15.2.10.5.25 */ /* 15.2.10.5.41 */ /* @@ -1635,7 +1665,7 @@ mrb_check_string_type(mrb_state *mrb, mrb_value str) return mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str"); } -#ifdef INCLUDE_REGEXP +#ifdef ENABLE_REGEXP static mrb_value get_pat(mrb_state *mrb, mrb_value pat, mrb_int quote) { @@ -1663,7 +1693,7 @@ get_pat(mrb_state *mrb, mrb_value pat, mrb_int quote) return mrb_reg_regcomp(mrb, pat); } -#endif //INCLUDE_REGEXP +#endif //ENABLE_REGEXP /* 15.2.10.5.27 */ /* @@ -1678,7 +1708,7 @@ get_pat(mrb_state *mrb, mrb_value pat, mrb_int quote) * 'hello'.match(/(.)\1/)[0] #=> "ll" * 'hello'.match('xx') #=> nil */ -#ifdef INCLUDE_REGEXP +#ifdef ENABLE_REGEXP static mrb_value mrb_str_match_m(mrb_state *mrb, mrb_value self) { @@ -1698,7 +1728,7 @@ mrb_str_match_m(mrb_state *mrb, mrb_value self) } return result; } -#endif //INCLUDE_REGEXP +#endif //ENABLE_REGEXP /* ---------------------------------- */ /* 15.2.10.5.29 */ @@ -1721,7 +1751,7 @@ mrb_str_reverse(mrb_state *mrb, mrb_value str) s2 = str_new(mrb, 0, RSTRING(str)->len); str_with_class(mrb, s2, str); s = RSTRING_PTR(str); e = RSTRING_END(str) - 1; - p = s2->buf; + p = s2->ptr; while (e >= s) { *p++ = *e--; @@ -1745,7 +1775,7 @@ mrb_str_reverse_bang(mrb_state *mrb, mrb_value str) str_modify(mrb, s); if (s->len > 1) { - p = s->buf; + p = s->ptr; e = p + s->len - 1; while (p < e) { c = *p; @@ -1787,13 +1817,13 @@ mrb_str_rindex(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos) if (ps->len - pos < len) { pos = ps->len - len; } - sbeg = ps->buf; - s = ps->buf + pos; - t = psub->buf; + sbeg = ps->ptr; + s = ps->ptr + pos; + t = psub->ptr; if (len) { while (sbeg <= s) { if (memcmp(s, t, len) == 0) { - return s - ps->buf; + return s - ps->ptr; } s--; } @@ -1850,11 +1880,11 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str) pos += len; if (pos < 0) { if (mrb_type(sub) == MRB_TT_REGEX) { -#ifdef INCLUDE_REGEXP +#ifdef ENABLE_REGEXP mrb_backref_set(mrb, mrb_nil_value()); #else mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported"); -#endif //INCLUDE_REGEXP +#endif //ENABLE_REGEXP } return mrb_nil_value(); } @@ -1871,7 +1901,7 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str) switch (mrb_type(sub)) { case MRB_TT_REGEX: -#ifdef INCLUDE_REGEXP +#ifdef ENABLE_REGEXP pos = mrb_str_offset(mrb, str, pos); if (!RREGEXP(sub)->ptr || RREGEXP_SRC_LEN(sub)) { pos = mrb_reg_search(mrb, sub, str, pos, 1); @@ -1880,7 +1910,7 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str) if (pos >= 0) return mrb_fixnum_value(pos); #else mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported"); -#endif //INCLUDE_REGEXP +#endif //ENABLE_REGEXP break; case MRB_TT_FIXNUM: { @@ -1914,7 +1944,7 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str) return mrb_nil_value(); } -#ifdef INCLUDE_REGEXP +#ifdef ENABLE_REGEXP static mrb_value scan_once(mrb_state *mrb, mrb_value str, mrb_value pat, mrb_int *start) { @@ -1926,7 +1956,6 @@ scan_once(mrb_state *mrb, mrb_value str, mrb_value pat, mrb_int *start) if (mrb_reg_search(mrb, pat, str, *start, 0) >= 0) { match = mrb_backref_get(mrb); - //regs = RMATCH(match)->regs; pmatch = mrb_match_ptr(match); regs = &pmatch->rmatch->regs; if (regs->beg[0] == regs->end[0]) { @@ -1944,7 +1973,7 @@ scan_once(mrb_state *mrb, mrb_value str, mrb_value pat, mrb_int *start) if (regs->num_regs == 1) { return mrb_reg_nth_match(mrb, 0, match); } - result = mrb_ary_new_capa(mrb, regs->num_regs);//mrb_ary_new2(regs->num_regs); + result = mrb_ary_new_capa(mrb, regs->num_regs); for (i=1; i < regs->num_regs; i++) { mrb_ary_push(mrb, result, mrb_reg_nth_match(mrb, i, match)); } @@ -1953,7 +1982,7 @@ scan_once(mrb_state *mrb, mrb_value str, mrb_value pat, mrb_int *start) } return mrb_nil_value(); } -#endif //INCLUDE_REGEXP +#endif //ENABLE_REGEXP /* 15.2.10.5.32 */ /* @@ -1986,7 +2015,7 @@ scan_once(mrb_state *mrb, mrb_value str, mrb_value pat, mrb_int *start) * <<cruel>> <<world>> * rceu lowlr */ -#ifdef INCLUDE_REGEXP +#ifdef ENABLE_REGEXP static mrb_value mrb_str_scan(mrb_state *mrb, mrb_value str) { @@ -1995,7 +2024,7 @@ mrb_str_scan(mrb_state *mrb, mrb_value str) mrb_int start = 0; mrb_value match = mrb_nil_value(); struct RString *ps = mrb_str_ptr(str); - char *p = ps->buf; + char *p = ps->ptr; long len = ps->len; mrb_get_args(mrb, "o&", &pat, &b); @@ -2020,7 +2049,7 @@ mrb_str_scan(mrb_state *mrb, mrb_value str) mrb_backref_set(mrb, match); return str; } -#endif //INCLUDE_REGEXP +#endif //ENABLE_REGEXP static const char isspacetable[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, @@ -2089,27 +2118,16 @@ static const char isspacetable[256] = { static mrb_value mrb_str_split_m(mrb_state *mrb, mrb_value str) { - mrb_value *argv; int argc; mrb_value spat = mrb_nil_value(); - mrb_value limit; enum {awk, string, regexp} split_type = string; long beg, end, i = 0; - int lim = 0; + int lim = -1; mrb_value result, tmp; - mrb_get_args(mrb, "*", &argv, &argc); - if (argc > 0) - spat = argv[0]; - if (argc > 1) - limit = argv[1]; - else - limit = mrb_nil_value(); - + argc = mrb_get_args(mrb, "|oi", &spat, &lim); if (argc == 2) { - lim = mrb_fixnum(limit); - if (lim <= 0) limit = mrb_nil_value(); - else if (lim == 1) { + if (lim == 1) { if (RSTRING_LEN(str) == 0) return mrb_ary_new_capa(mrb, 0); return mrb_ary_new_from_values(mrb, 1, &str); @@ -2117,35 +2135,34 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) i = 1; } - if (argc == 0) { + if (argc == 0 || mrb_nil_p(spat)) { split_type = awk; } else { -//fs_set: if (mrb_type(spat) == MRB_TT_STRING) { split_type = string; -#ifdef INCLUDE_REGEXP +#ifdef ENABLE_REGEXP if (RSTRING_LEN(spat) == 0) { /* Special case - split into chars */ spat = mrb_reg_regcomp(mrb, spat); split_type = regexp; } else { -#endif //INCLUDE_REGEXP +#endif //ENABLE_REGEXP if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' '){ split_type = awk; } -#ifdef INCLUDE_REGEXP +#ifdef ENABLE_REGEXP } -#endif //INCLUDE_REGEXP +#endif //ENABLE_REGEXP } else { -#ifdef INCLUDE_REGEXP +#ifdef ENABLE_REGEXP spat = get_pat(mrb, spat, 1); split_type = regexp; #else mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported"); -#endif //INCLUDE_REGEXP +#endif //ENABLE_REGEXP } } @@ -2168,22 +2185,47 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) else { end = ptr - bptr; skip = 0; - if (!mrb_nil_p(limit) && lim <= i) break; + if (lim >= 0 && lim <= i) break; } } else if (ascii_isspace(c)) { mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, beg, end-beg)); skip = 1; beg = ptr - bptr; - if (!mrb_nil_p(limit)) ++i; + if (lim >= 0) ++i; } else { end = ptr - bptr; } } } + else if (split_type == string) { + char *ptr = RSTRING_PTR(str); + char *temp = ptr; + char *eptr = RSTRING_END(str); + long slen = RSTRING_LEN(spat); + + if (slen == 0) { + while (ptr < eptr) { + mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, ptr-temp, 1)); + ptr++; + if (lim >= 0 && lim <= ++i) break; + } + } + else { + char *sptr = RSTRING_PTR(spat); + + while (ptr < eptr && + (end = mrb_memsearch(sptr, slen, ptr, eptr - ptr)) >= 0) { + mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, ptr - temp, end)); + ptr += end + slen; + if (lim >= 0 && lim <= ++i) break; + } + } + beg = ptr - temp; + } else { -#ifdef INCLUDE_REGEXP +#ifdef ENABLE_REGEXP char *ptr = RSTRING_PTR(str); long len = RSTRING_LEN(str); long start = beg; @@ -2225,20 +2267,20 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) tmp = mrb_str_subseq(mrb, str, BEG(idx), END(idx)-BEG(idx)); mrb_ary_push(mrb, result, tmp); } - if (!mrb_nil_p(limit) && lim <= ++i) break; + if (lim >= 0 && lim <= ++i) break; } #else mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported"); -#endif //INCLUDE_REGEXP +#endif //ENABLE_REGEXP } - if (RSTRING_LEN(str) > 0 && (!mrb_nil_p(limit) || RSTRING_LEN(str) > beg || lim < 0)) { + if (RSTRING_LEN(str) > 0 && (lim >= 0 || RSTRING_LEN(str) > beg || lim < 0)) { if (RSTRING_LEN(str) == beg) tmp = mrb_str_new_empty(mrb, str); else tmp = mrb_str_subseq(mrb, str, beg, RSTRING_LEN(str)-beg); mrb_ary_push(mrb, result, tmp); } - if (mrb_nil_p(limit) && lim == 0) { + if (lim < 0) { long len; while ((len = RARRAY_LEN(result)) > 0 && (tmp = RARRAY_PTR(result)[len-1], RSTRING_LEN(tmp) == 0)) @@ -2267,14 +2309,14 @@ mrb_block_given_p() * returning <i>str</i>, or <code>nil</code> if no substitutions were * performed. */ -#ifdef INCLUDE_REGEXP +#ifdef ENABLE_REGEXP static mrb_value mrb_str_sub_bang(mrb_state *mrb, mrb_value str) { str_modify(mrb, str); return mrb_nil_value(); } -#endif //INCLUDE_REGEXP +#endif //ENABLE_REGEXP /* 15.2.10.5.36 */ @@ -2315,7 +2357,7 @@ mrb_str_sub_bang(mrb_state *mrb, mrb_value str) * #=> "Is /bin/bash your preferred shell?" */ -#ifdef INCLUDE_REGEXP +#ifdef ENABLE_REGEXP static mrb_value mrb_str_sub(mrb_state *mrb, mrb_value self) { @@ -2324,7 +2366,7 @@ mrb_str_sub(mrb_state *mrb, mrb_value self) mrb_str_sub_bang(mrb, str); return str; } -#endif //INCLUDE_REGEXP +#endif //ENABLE_REGEXP mrb_value mrb_cstr_to_inum(mrb_state *mrb, const char *str, int base, int badcheck) @@ -2332,17 +2374,10 @@ mrb_cstr_to_inum(mrb_state *mrb, const char *str, int base, int badcheck) #define BDIGIT unsigned int #define BDIGIT_DBL unsigned long -// const char *s = str; char *end; char sign = 1; -// char nondigit = 0; int c; -// BDIGIT_DBL num; long len; -// long blen = 1; -// long i; -// mrb_value z; -// BDIGIT *zds; unsigned long val; #undef ISDIGIT @@ -2459,28 +2494,30 @@ mrb_cstr_to_inum(mrb_state *mrb, const char *str, int base, int badcheck) } len *= strlen(str)*sizeof(char); - val = strtoul((char*)str, &end, base); + val = strtoul((char*)str, &end, base); - if (badcheck) { - if (end == str) goto bad; /* no number */ - while (*end && ISSPACE(*end)) end++; - if (*end) goto bad; /* trailing garbage */ - } + if (badcheck) { + if (end == str) goto bad; /* no number */ + while (*end && ISSPACE(*end)) end++; + if (*end) goto bad; /* trailing garbage */ + } - if (sign) return mrb_fixnum_value(val); - else { - long result = -(long)val; - return mrb_fixnum_value(result); - } + if (sign) return mrb_fixnum_value(val); + else { + long result = -(long)val; + return mrb_fixnum_value(result); + } bad: - printf("Integer"); + mrb_raise(mrb, E_ARGUMENT_ERROR, "invalide string for number(%s)", str); + /* not reached */ return mrb_fixnum_value(0); } + char * mrb_string_value_cstr(mrb_state *mrb, mrb_value *ptr) { struct RString *ps = mrb_str_ptr(*ptr); - char *s = ps->buf; + char *s = ps->ptr; if (!s || ps->len != strlen(s)) { mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte"); @@ -2494,10 +2531,8 @@ mrb_str_to_inum(mrb_state *mrb, mrb_value str, int base, int badcheck) char *s; int len; - //StringValue(str); mrb_string_value(mrb, &str); if (badcheck) { - //s = StringValueCStr(str); s = mrb_string_value_cstr(mrb, &str); } else { @@ -2506,8 +2541,7 @@ mrb_str_to_inum(mrb_state *mrb, mrb_value str, int base, int badcheck) if (s) { len = RSTRING_LEN(str); if (s[len]) { /* no sentinel somehow */ - //char *p = ALLOCA_N(char, len+1); - char *p = mrb_malloc(mrb, len+1); + char *p = (char *)mrb_malloc(mrb, len+1); //MEMCPY(p, s, char, len); memcpy(p, s, sizeof(char)*len); @@ -2544,10 +2578,8 @@ mrb_str_to_i(mrb_state *mrb, mrb_value self) { mrb_value *argv; int argc; - //mrb_value b; int base; - //mrb_scan_args(argc, *argv, "01", &b); mrb_get_args(mrb, "*", &argv, &argc); if (argc == 0) base = 10; @@ -2583,8 +2615,8 @@ mrb_cstr_to_dbl(mrb_state *mrb, const char * p, int badcheck) if (p == end) { if (badcheck) { bad: - //mrb_invalid_str(q, "Float()"); - printf("Float()\n"); + mrb_raise(mrb, E_ARGUMENT_ERROR, "invalide string for float(%s)", p); + /* not reached */ } return d; } @@ -2643,7 +2675,7 @@ mrb_str_to_dbl(mrb_state *mrb, mrb_value str, int badcheck) mrb_raise(mrb, E_ARGUMENT_ERROR, "string for Float contains null byte"); } if (s[len]) { /* no sentinel somehow */ - char *p = mrb_malloc(mrb, len+1); + char *p = (char *)mrb_malloc(mrb, len+1); memcpy(p, s, sizeof(char)*len); p[len] = '\0'; @@ -2670,7 +2702,6 @@ mrb_str_to_dbl(mrb_state *mrb, mrb_value str, int badcheck) static mrb_value mrb_str_to_f(mrb_state *mrb, mrb_value self) { - //return mrb_float_new(mrb_str_to_dbl(self, 0/*Qfalse*/)); return mrb_float_value(mrb_str_to_dbl(mrb, self, 0/*Qfalse*/)); } @@ -2778,7 +2809,7 @@ mrb_str_dump(mrb_state *mrb, mrb_value str) len++; } else { - len += 4; /* \xNN */ + len += 4; /* \NNN */ } break; } @@ -2787,7 +2818,7 @@ mrb_str_dump(mrb_state *mrb, mrb_value str) result = str_new(mrb, 0, len); str_with_class(mrb, result, str); p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str); - q = result->buf; + q = result->ptr; *q++ = '"'; while (p < pend) { @@ -2849,19 +2880,10 @@ mrb_str_dump(mrb_state *mrb, mrb_value str) mrb_value mrb_str_cat(mrb_state *mrb, mrb_value str, const char *ptr, long len) { - struct RString *s = mrb_str_ptr(str); - if (len < 0) { mrb_raise(mrb, E_ARGUMENT_ERROR, "negative string size (or size too big)"); } - if (0/*STR_ASSOC_P(str)*/) { - mrb_realloc(mrb, s->buf, s->len+len+1); - memcpy(s->buf + s->len, ptr, len); - s->len += len; - s->buf[s->len] = '\0'; /* sentinel */ - return str; - } - str_buf_cat(mrb, s, ptr, len); + str_buf_cat(mrb, mrb_str_ptr(str), ptr, len); return str; } @@ -2915,18 +2937,15 @@ mrb_str_append(mrb_state *mrb, mrb_value str, mrb_value str2) mrb_value mrb_str_inspect(mrb_state *mrb, mrb_value str) { - const char *p, *pend, *prev; + const char *p, *pend; char buf[CHAR_ESC_LEN + 1]; - mrb_value result = mrb_str_new_cstr(mrb, "\""); + mrb_value result = mrb_str_new(mrb, "\"", 1); p = RSTRING_PTR(str); pend = RSTRING_END(str); - prev = p; - while (p < pend) { + for (;p < pend; p++) { unsigned int c, cc; - int n; - c = *p++; - n = 1; + c = *p; if (c == '"'|| c == '\\' || (c == '#' && IS_EVSTR(p, pend))) { buf[0] = '\\'; buf[1] = c; mrb_str_buf_cat(mrb, result, buf, 2); @@ -2949,25 +2968,18 @@ mrb_str_inspect(mrb_state *mrb, mrb_value str) default: cc = 0; break; } if (cc) { - if (p - n > prev) mrb_str_buf_cat(mrb, result, prev, p - n - prev); buf[0] = '\\'; buf[1] = (char)cc; mrb_str_buf_cat(mrb, result, buf, 2); - prev = p; continue; } else { - if (p - n > prev) mrb_str_buf_cat(mrb, result, prev, p - n - prev); - sprintf(buf, "\\%03o", c & 0377); - mrb_str_buf_cat(mrb, result, buf, strlen(buf)); - prev = p; + int n = sprintf(buf, "\\%03o", c & 0377); + mrb_str_buf_cat(mrb, result, buf, n); continue; } } -#ifdef INCLUDE_ENCODING - if (p > prev) mrb_str_buf_cat(mrb, result, prev, p - prev); -#endif - mrb_str_buf_cat(mrb, result, "\"", strlen("\"")); + mrb_str_buf_cat(mrb, result, "\"", 1); return result; } @@ -3002,7 +3014,7 @@ mrb_init_string(mrb_state *mrb) mrb_define_method(mrb, s, "each_line", mrb_str_each_line, ARGS_REQ(1)); /* 15.2.10.5.15 */ mrb_define_method(mrb, s, "empty?", mrb_str_empty_p, ARGS_NONE()); /* 15.2.10.5.16 */ mrb_define_method(mrb, s, "eql?", mrb_str_eql, ARGS_REQ(1)); /* 15.2.10.5.17 */ -#ifdef INCLUDE_REGEXP +#ifdef ENABLE_REGEXP mrb_define_method(mrb, s, "gsub", mrb_str_gsub, ARGS_REQ(1)); /* 15.2.10.5.18 */ mrb_define_method(mrb, s, "gsub!", mrb_str_gsub_bang, ARGS_REQ(1)); /* 15.2.10.5.19 */ #endif @@ -3012,19 +3024,19 @@ mrb_init_string(mrb_state *mrb) mrb_define_method(mrb, s, "initialize", mrb_str_init, ARGS_REQ(1)); /* 15.2.10.5.23 */ mrb_define_method(mrb, s, "initialize_copy", mrb_str_replace, ARGS_REQ(1)); /* 15.2.10.5.24 */ mrb_define_method(mrb, s, "intern", mrb_str_intern, ARGS_NONE()); /* 15.2.10.5.25 */ -#ifdef INCLUDE_REGEXP +#ifdef ENABLE_REGEXP mrb_define_method(mrb, s, "match", mrb_str_match_m, ARGS_REQ(1)); /* 15.2.10.5.27 */ #endif mrb_define_method(mrb, s, "replace", mrb_str_replace, ARGS_REQ(1)); /* 15.2.10.5.28 */ mrb_define_method(mrb, s, "reverse", mrb_str_reverse, ARGS_NONE()); /* 15.2.10.5.29 */ mrb_define_method(mrb, s, "reverse!", mrb_str_reverse_bang, ARGS_NONE()); /* 15.2.10.5.30 */ mrb_define_method(mrb, s, "rindex", mrb_str_rindex_m, ARGS_ANY()); /* 15.2.10.5.31 */ -#ifdef INCLUDE_REGEXP +#ifdef ENABLE_REGEXP mrb_define_method(mrb, s, "scan", mrb_str_scan, ARGS_REQ(1)); /* 15.2.10.5.32 */ #endif mrb_define_method(mrb, s, "slice", mrb_str_aref_m, ARGS_ANY()); /* 15.2.10.5.34 */ mrb_define_method(mrb, s, "split", mrb_str_split_m, ARGS_ANY()); /* 15.2.10.5.35 */ -#ifdef INCLUDE_REGEXP +#ifdef ENABLE_REGEXP mrb_define_method(mrb, s, "sub", mrb_str_sub, ARGS_REQ(1)); /* 15.2.10.5.36 */ mrb_define_method(mrb, s, "sub!", mrb_str_sub_bang, ARGS_REQ(1)); /* 15.2.10.5.37 */ #endif |
