summaryrefslogtreecommitdiffhomepage
path: root/src/string.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/string.c')
-rw-r--r--src/string.c108
1 files changed, 49 insertions, 59 deletions
diff --git a/src/string.c b/src/string.c
index 6ab9a1ff7..724aad745 100644
--- a/src/string.c
+++ b/src/string.c
@@ -246,6 +246,28 @@ str_decref(mrb_state *mrb, mrb_shared_string *shared)
}
static void
+str_modify_keep_ascii(mrb_state *mrb, struct RString *s)
+{
+ if (RSTR_SHARED_P(s)) {
+ mrb_shared_string *shared = s->as.heap.aux.shared;
+
+ if (shared->refcnt == 1 && s->as.heap.ptr == shared->ptr) {
+ s->as.heap.aux.capa = shared->capa;
+ s->as.heap.ptr[s->as.heap.len] = '\0';
+ RSTR_UNSET_SHARED_FLAG(s);
+ mrb_free(mrb, shared);
+ }
+ else {
+ str_init_modifiable(mrb, s, s->as.heap.ptr, (size_t)s->as.heap.len);
+ str_decref(mrb, shared);
+ }
+ }
+ else if (RSTR_NOFREE_P(s) || RSTR_FSHARED_P(s)) {
+ str_init_modifiable(mrb, s, s->as.heap.ptr, (size_t)s->as.heap.len);
+ }
+}
+
+static void
check_null_byte(mrb_state *mrb, mrb_value str)
{
mrb_to_str(mrb, str);
@@ -330,16 +352,21 @@ utf8_strlen(mrb_value str)
static mrb_int
chars2bytes(mrb_value s, mrb_int off, mrb_int idx)
{
- mrb_int i, b, n;
- const char *p = RSTRING_PTR(s) + off;
- const char *e = RSTRING_END(s);
+ if (RSTR_ASCII_P(mrb_str_ptr(s))) {
+ return idx;
+ }
+ else {
+ mrb_int i, b, n;
+ const char *p = RSTRING_PTR(s) + off;
+ const char *e = RSTRING_END(s);
- for (b=i=0; p<e && i<idx; i++) {
- n = utf8len(p, e);
- b += n;
- p += n;
+ for (b=i=0; p<e && i<idx; i++) {
+ n = utf8len(p, e);
+ b += n;
+ p += n;
+ }
+ return b;
}
- return b;
}
/* map byte offset to character index */
@@ -809,22 +836,7 @@ MRB_API void
mrb_str_modify_keep_ascii(mrb_state *mrb, struct RString *s)
{
mrb_check_frozen(mrb, s);
- if (RSTR_SHARED_P(s)) {
- mrb_shared_string *shared = s->as.heap.aux.shared;
-
- if (shared->refcnt == 1 && s->as.heap.ptr == shared->ptr) {
- s->as.heap.aux.capa = shared->capa;
- s->as.heap.ptr[s->as.heap.len] = '\0';
- mrb_free(mrb, shared);
- }
- else {
- str_init_modifiable(mrb, s, s->as.heap.ptr, (size_t)s->as.heap.len);
- str_decref(mrb, shared);
- }
- }
- else if (RSTR_NOFREE_P(s) || RSTR_FSHARED_P(s)) {
- str_init_modifiable(mrb, s, s->as.heap.ptr, (size_t)s->as.heap.len);
- }
+ str_modify_keep_ascii(mrb, s);
}
MRB_API void
@@ -1223,8 +1235,6 @@ mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen)
* str[fixnum] => fixnum or nil
* str[fixnum, fixnum] => new_str or nil
* str[range] => new_str or nil
- * str[regexp] => new_str or nil
- * str[regexp, fixnum] => new_str or nil
* str[other_str] => new_str or nil
* str.slice(fixnum) => fixnum or nil
* str.slice(fixnum, fixnum) => new_str or nil
@@ -1432,8 +1442,6 @@ mrb_str_aset(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen, mrb_
* str[fixnum] = replace
* str[fixnum, fixnum] = replace
* str[range] = replace
- * str[regexp] = replace
- * str[regexp, fixnum] = replace
* str[other_str] = replace
*
* Modify +self+ by replacing the content of +self+.
@@ -1941,15 +1949,10 @@ mrb_str_intern(mrb_state *mrb, mrb_value self)
MRB_API mrb_value
mrb_obj_as_string(mrb_state *mrb, mrb_value obj)
{
- mrb_value str;
-
if (mrb_string_p(obj)) {
return obj;
}
- str = mrb_funcall(mrb, obj, "to_s", 0);
- if (!mrb_string_p(str))
- return mrb_any_to_s(mrb, obj);
- return str;
+ return mrb_str_to_str(mrb, obj);
}
MRB_API mrb_value
@@ -2103,23 +2106,18 @@ mrb_str_rindex(mrb_state *mrb, mrb_value str)
/*
* call-seq:
- * str.split(pattern="\n", [limit]) => anArray
+ * str.split(separator=nil, [limit]) => anArray
*
* Divides <i>str</i> into substrings based on a delimiter, returning an array
* of these substrings.
*
- * If <i>pattern</i> is a <code>String</code>, then its contents are used as
- * the delimiter when splitting <i>str</i>. If <i>pattern</i> is a single
+ * If <i>separator</i> is a <code>String</code>, then its contents are used as
+ * the delimiter when splitting <i>str</i>. If <i>separator</i> is a single
* space, <i>str</i> is split on whitespace, with leading whitespace and runs
* of contiguous whitespace characters ignored.
*
- * If <i>pattern</i> is a <code>Regexp</code>, <i>str</i> is divided where the
- * pattern matches. Whenever the pattern matches a zero-length string,
- * <i>str</i> is split into individual characters.
- *
- * If <i>pattern</i> is omitted, the value of <code>$;</code> is used. If
- * <code>$;</code> is <code>nil</code> (which is the default), <i>str</i> is
- * split on whitespace as if ' ' were specified.
+ * If <i>separator</i> is omitted or <code>nil</code> (which is the default),
+ * <i>str</i> is split on whitespace as if ' ' were specified.
*
* If the <i>limit</i> parameter is omitted, trailing null fields are
* suppressed. If <i>limit</i> is a positive number, at most that number of
@@ -2130,9 +2128,6 @@ mrb_str_rindex(mrb_state *mrb, mrb_value str)
*
* " now's the time".split #=> ["now's", "the", "time"]
* " now's the time".split(' ') #=> ["now's", "the", "time"]
- * " now's the time".split(/ /) #=> ["", "now's", "", "the", "time"]
- * "hello".split(//) #=> ["h", "e", "l", "l", "o"]
- * "hello".split(//, 3) #=> ["h", "e", "llo"]
*
* "mellow yellow".split("ello") #=> ["m", "w y", "w"]
* "1,2,,3,4,,".split(',') #=> ["1", "2", "", "3", "4"]
@@ -2145,7 +2140,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
{
mrb_int argc;
mrb_value spat = mrb_nil_value();
- enum {awk, string, regexp} split_type = string;
+ enum {awk, string} split_type = string;
mrb_int i = 0;
mrb_int beg;
mrb_int end;
@@ -2434,15 +2429,12 @@ mrb_string_value_cstr(mrb_state *mrb, mrb_value *ptr)
if (p[len] == '\0') {
return p;
}
- if (mrb_frozen_p(ps) || RSTR_CAPA(ps) == len) {
- ps = str_new(mrb, NULL, len+1);
- memcpy(RSTR_PTR(ps), p, len);
- RSTR_SET_LEN(ps, len);
- *ptr = mrb_obj_value(ps);
- }
- else {
- mrb_str_modify(mrb, ps);
- }
+
+ /*
+ * Even after str_modify_keep_ascii(), NULL termination is not ensured if
+ * RSTR_SET_LEN() is used explicitly (e.g. String#delete_suffix!).
+ */
+ str_modify_keep_ascii(mrb, ps);
RSTR_PTR(ps)[len] = '\0';
return RSTR_PTR(ps);
}
@@ -2506,8 +2498,6 @@ mrb_cstr_to_dbl(mrb_state *mrb, const char * p, mrb_bool badcheck)
char buf[DBL_DIG * 4 + 10];
double d;
- enum {max_width = 20};
-
if (!p) return 0.0;
while (ISSPACE(*p)) p++;