diff options
Diffstat (limited to 'src/string.c')
| -rw-r--r-- | src/string.c | 108 |
1 files changed, 49 insertions, 59 deletions
diff --git a/src/string.c b/src/string.c index 6ab9a1ff7..724aad745 100644 --- a/src/string.c +++ b/src/string.c @@ -246,6 +246,28 @@ str_decref(mrb_state *mrb, mrb_shared_string *shared) } static void +str_modify_keep_ascii(mrb_state *mrb, struct RString *s) +{ + if (RSTR_SHARED_P(s)) { + mrb_shared_string *shared = s->as.heap.aux.shared; + + if (shared->refcnt == 1 && s->as.heap.ptr == shared->ptr) { + s->as.heap.aux.capa = shared->capa; + s->as.heap.ptr[s->as.heap.len] = '\0'; + RSTR_UNSET_SHARED_FLAG(s); + mrb_free(mrb, shared); + } + else { + str_init_modifiable(mrb, s, s->as.heap.ptr, (size_t)s->as.heap.len); + str_decref(mrb, shared); + } + } + else if (RSTR_NOFREE_P(s) || RSTR_FSHARED_P(s)) { + str_init_modifiable(mrb, s, s->as.heap.ptr, (size_t)s->as.heap.len); + } +} + +static void check_null_byte(mrb_state *mrb, mrb_value str) { mrb_to_str(mrb, str); @@ -330,16 +352,21 @@ utf8_strlen(mrb_value str) static mrb_int chars2bytes(mrb_value s, mrb_int off, mrb_int idx) { - mrb_int i, b, n; - const char *p = RSTRING_PTR(s) + off; - const char *e = RSTRING_END(s); + if (RSTR_ASCII_P(mrb_str_ptr(s))) { + return idx; + } + else { + mrb_int i, b, n; + const char *p = RSTRING_PTR(s) + off; + const char *e = RSTRING_END(s); - for (b=i=0; p<e && i<idx; i++) { - n = utf8len(p, e); - b += n; - p += n; + for (b=i=0; p<e && i<idx; i++) { + n = utf8len(p, e); + b += n; + p += n; + } + return b; } - return b; } /* map byte offset to character index */ @@ -809,22 +836,7 @@ MRB_API void mrb_str_modify_keep_ascii(mrb_state *mrb, struct RString *s) { mrb_check_frozen(mrb, s); - if (RSTR_SHARED_P(s)) { - mrb_shared_string *shared = s->as.heap.aux.shared; - - if (shared->refcnt == 1 && s->as.heap.ptr == shared->ptr) { - s->as.heap.aux.capa = shared->capa; - s->as.heap.ptr[s->as.heap.len] = '\0'; - mrb_free(mrb, shared); - } - else { - str_init_modifiable(mrb, s, s->as.heap.ptr, (size_t)s->as.heap.len); - str_decref(mrb, shared); - } - } - else if (RSTR_NOFREE_P(s) || RSTR_FSHARED_P(s)) { - str_init_modifiable(mrb, s, s->as.heap.ptr, (size_t)s->as.heap.len); - } + str_modify_keep_ascii(mrb, s); } MRB_API void @@ -1223,8 +1235,6 @@ mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen) * str[fixnum] => fixnum or nil * str[fixnum, fixnum] => new_str or nil * str[range] => new_str or nil - * str[regexp] => new_str or nil - * str[regexp, fixnum] => new_str or nil * str[other_str] => new_str or nil * str.slice(fixnum) => fixnum or nil * str.slice(fixnum, fixnum) => new_str or nil @@ -1432,8 +1442,6 @@ mrb_str_aset(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen, mrb_ * str[fixnum] = replace * str[fixnum, fixnum] = replace * str[range] = replace - * str[regexp] = replace - * str[regexp, fixnum] = replace * str[other_str] = replace * * Modify +self+ by replacing the content of +self+. @@ -1941,15 +1949,10 @@ mrb_str_intern(mrb_state *mrb, mrb_value self) MRB_API mrb_value mrb_obj_as_string(mrb_state *mrb, mrb_value obj) { - mrb_value str; - if (mrb_string_p(obj)) { return obj; } - str = mrb_funcall(mrb, obj, "to_s", 0); - if (!mrb_string_p(str)) - return mrb_any_to_s(mrb, obj); - return str; + return mrb_str_to_str(mrb, obj); } MRB_API mrb_value @@ -2103,23 +2106,18 @@ mrb_str_rindex(mrb_state *mrb, mrb_value str) /* * call-seq: - * str.split(pattern="\n", [limit]) => anArray + * str.split(separator=nil, [limit]) => anArray * * Divides <i>str</i> into substrings based on a delimiter, returning an array * of these substrings. * - * If <i>pattern</i> is a <code>String</code>, then its contents are used as - * the delimiter when splitting <i>str</i>. If <i>pattern</i> is a single + * If <i>separator</i> is a <code>String</code>, then its contents are used as + * the delimiter when splitting <i>str</i>. If <i>separator</i> is a single * space, <i>str</i> is split on whitespace, with leading whitespace and runs * of contiguous whitespace characters ignored. * - * If <i>pattern</i> is a <code>Regexp</code>, <i>str</i> is divided where the - * pattern matches. Whenever the pattern matches a zero-length string, - * <i>str</i> is split into individual characters. - * - * If <i>pattern</i> is omitted, the value of <code>$;</code> is used. If - * <code>$;</code> is <code>nil</code> (which is the default), <i>str</i> is - * split on whitespace as if ' ' were specified. + * If <i>separator</i> is omitted or <code>nil</code> (which is the default), + * <i>str</i> is split on whitespace as if ' ' were specified. * * If the <i>limit</i> parameter is omitted, trailing null fields are * suppressed. If <i>limit</i> is a positive number, at most that number of @@ -2130,9 +2128,6 @@ mrb_str_rindex(mrb_state *mrb, mrb_value str) * * " now's the time".split #=> ["now's", "the", "time"] * " now's the time".split(' ') #=> ["now's", "the", "time"] - * " now's the time".split(/ /) #=> ["", "now's", "", "the", "time"] - * "hello".split(//) #=> ["h", "e", "l", "l", "o"] - * "hello".split(//, 3) #=> ["h", "e", "llo"] * * "mellow yellow".split("ello") #=> ["m", "w y", "w"] * "1,2,,3,4,,".split(',') #=> ["1", "2", "", "3", "4"] @@ -2145,7 +2140,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) { mrb_int argc; mrb_value spat = mrb_nil_value(); - enum {awk, string, regexp} split_type = string; + enum {awk, string} split_type = string; mrb_int i = 0; mrb_int beg; mrb_int end; @@ -2434,15 +2429,12 @@ mrb_string_value_cstr(mrb_state *mrb, mrb_value *ptr) if (p[len] == '\0') { return p; } - if (mrb_frozen_p(ps) || RSTR_CAPA(ps) == len) { - ps = str_new(mrb, NULL, len+1); - memcpy(RSTR_PTR(ps), p, len); - RSTR_SET_LEN(ps, len); - *ptr = mrb_obj_value(ps); - } - else { - mrb_str_modify(mrb, ps); - } + + /* + * Even after str_modify_keep_ascii(), NULL termination is not ensured if + * RSTR_SET_LEN() is used explicitly (e.g. String#delete_suffix!). + */ + str_modify_keep_ascii(mrb, ps); RSTR_PTR(ps)[len] = '\0'; return RSTR_PTR(ps); } @@ -2506,8 +2498,6 @@ mrb_cstr_to_dbl(mrb_state *mrb, const char * p, mrb_bool badcheck) char buf[DBL_DIG * 4 + 10]; double d; - enum {max_width = 20}; - if (!p) return 0.0; while (ISSPACE(*p)) p++; |
