summaryrefslogtreecommitdiffhomepage
path: root/src/string.c
diff options
context:
space:
mode:
authorFelix Jones <[email protected]>2017-02-16 13:33:46 +0000
committerFelix Jones <[email protected]>2017-02-16 13:33:46 +0000
commitd83aad8d570e4bbffa3bd3ce64e210f78afa425f (patch)
tree5389a87c135b1bdf3e23a1ba02e02400b7cf80fc /src/string.c
parent70aa6dc38d75dd6b1e2c76f290bc576e36e36ea3 (diff)
parentb165708c8deba00685f9a27926c554aaa7f3b0fb (diff)
downloadmruby-d83aad8d570e4bbffa3bd3ce64e210f78afa425f.tar.gz
mruby-d83aad8d570e4bbffa3bd3ce64e210f78afa425f.zip
Merge branch 'master' into android.rake-ndk-clang
Diffstat (limited to 'src/string.c')
-rw-r--r--src/string.c379
1 files changed, 311 insertions, 68 deletions
diff --git a/src/string.c b/src/string.c
index 15fcc502a..acf32167d 100644
--- a/src/string.c
+++ b/src/string.c
@@ -118,9 +118,12 @@ mrb_str_buf_new(mrb_state *mrb, size_t capa)
return mrb_obj_value(s);
}
-static inline void
-resize_capa(mrb_state *mrb, struct RString *s, mrb_int capacity)
+static void
+resize_capa(mrb_state *mrb, struct RString *s, size_t capacity)
{
+#if SIZE_MAX > MRB_INT_MAX
+ mrb_assert(capacity < MRB_INT_MAX);
+#endif
if (RSTR_EMBED_P(s)) {
if (RSTRING_EMBED_LEN_MAX < capacity) {
char *const tmp = (char *)mrb_malloc(mrb, capacity+1);
@@ -129,12 +132,12 @@ resize_capa(mrb_state *mrb, struct RString *s, mrb_int capacity)
RSTR_UNSET_EMBED_FLAG(s);
s->as.heap.ptr = tmp;
s->as.heap.len = len;
- s->as.heap.aux.capa = capacity;
+ s->as.heap.aux.capa = (mrb_int)capacity;
}
}
else {
- s->as.heap.ptr = (char *)mrb_realloc(mrb, RSTR_PTR(s), capacity+1);
- s->as.heap.aux.capa = capacity;
+ s->as.heap.ptr = (char*)mrb_realloc(mrb, RSTR_PTR(s), capacity+1);
+ s->as.heap.aux.capa = (mrb_int)capacity;
}
}
@@ -151,22 +154,26 @@ str_buf_cat(mrb_state *mrb, struct RString *s, const char *ptr, size_t len)
off = ptr - RSTR_PTR(s);
}
- if (RSTR_EMBED_P(s))
- capa = RSTRING_EMBED_LEN_MAX;
- else
- capa = s->as.heap.aux.capa;
+ capa = RSTR_CAPA(s);
+ if (capa <= RSTRING_EMBED_LEN_MAX)
+ capa = RSTRING_EMBED_LEN_MAX+1;
- if (RSTR_LEN(s) >= MRB_INT_MAX - (mrb_int)len) {
+ total = RSTR_LEN(s)+len;
+ if (total >= MRB_INT_MAX) {
+ size_error:
mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big");
}
- total = RSTR_LEN(s)+len;
if (capa <= total) {
while (total > capa) {
- if (capa + 1 >= MRB_INT_MAX / 2) {
- capa = (total + 4095) / 4096;
- break;
+ if (capa <= MRB_INT_MAX / 2) {
+ capa *= 2;
}
- capa = (capa + 1) * 2;
+ else {
+ capa = total;
+ }
+ }
+ if (capa < total || capa > MRB_INT_MAX) {
+ goto size_error;
}
resize_capa(mrb, s, capa);
}
@@ -361,7 +368,7 @@ mrb_memsearch(const void *x0, mrb_int m, const void *y0, mrb_int n)
return 0;
}
else if (m == 1) {
- const unsigned char *ys = memchr(y, *x, n);
+ const unsigned char *ys = (const unsigned char *)memchr(y, *x, n);
if (ys)
return ys - y;
@@ -416,7 +423,7 @@ byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
mrb_shared_string *shared;
orig = mrb_str_ptr(str);
- if (RSTR_EMBED_P(orig)) {
+ if (RSTR_EMBED_P(orig) || RSTR_LEN(orig) == 0) {
s = str_new(mrb, orig->as.ary+beg, len);
}
else {
@@ -501,7 +508,7 @@ str_index(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int offset)
static void
check_frozen(mrb_state *mrb, struct RString *s)
{
- if (RSTR_FROZEN_P(s)) {
+ if (MRB_FROZEN_P(s)) {
mrb_raise(mrb, E_RUNTIME_ERROR, "can't modify frozen string");
}
}
@@ -512,6 +519,9 @@ str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2)
long len;
check_frozen(mrb, s1);
+ s1->flags &= ~MRB_STR_NO_UTF;
+ s1->flags |= s2->flags&MRB_STR_NO_UTF;
+ if (s1 == s2) return mrb_obj_value(s1);
len = RSTR_LEN(s2);
if (RSTR_SHARED_P(s1)) {
str_decref(mrb, s1->as.heap.aux.shared);
@@ -662,7 +672,7 @@ mrb_str_modify(mrb_state *mrb, struct RString *s)
if (RSTR_SHARED_P(s)) {
mrb_shared_string *shared = s->as.heap.aux.shared;
- if (shared->refcnt == 1 && s->as.heap.ptr == shared->ptr) {
+ if (shared->nofree == 0 && shared->refcnt == 1 && s->as.heap.ptr == shared->ptr) {
s->as.heap.ptr = shared->ptr;
s->as.heap.aux.capa = shared->len;
RSTR_PTR(s)[s->as.heap.len] = '\0';
@@ -688,27 +698,25 @@ mrb_str_modify(mrb_state *mrb, struct RString *s)
}
if (RSTR_NOFREE_P(s)) {
char *p = s->as.heap.ptr;
+ mrb_int len = s->as.heap.len;
- s->as.heap.ptr = (char *)mrb_malloc(mrb, (size_t)s->as.heap.len+1);
+ RSTR_UNSET_NOFREE_FLAG(s);
+ if (len < RSTRING_EMBED_LEN_MAX) {
+ RSTR_SET_EMBED_FLAG(s);
+ RSTR_SET_EMBED_LEN(s, len);
+ }
+ else {
+ s->as.heap.ptr = (char *)mrb_malloc(mrb, (size_t)len+1);
+ s->as.heap.aux.capa = len;
+ }
if (p) {
- memcpy(RSTR_PTR(s), p, s->as.heap.len);
+ memcpy(RSTR_PTR(s), p, len);
}
- RSTR_PTR(s)[s->as.heap.len] = '\0';
- s->as.heap.aux.capa = s->as.heap.len;
- RSTR_UNSET_NOFREE_FLAG(s);
+ RSTR_PTR(s)[len] = '\0';
return;
}
}
-static mrb_value
-mrb_str_freeze(mrb_state *mrb, mrb_value str)
-{
- struct RString *s = mrb_str_ptr(str);
-
- RSTR_SET_FROZEN_FLAG(s);
- return str;
-}
-
MRB_API mrb_value
mrb_str_resize(mrb_state *mrb, mrb_value str, mrb_int len)
{
@@ -760,8 +768,14 @@ mrb_str_concat(mrb_state *mrb, mrb_value self, mrb_value other)
other = mrb_str_to_str(mrb, other);
}
s2 = mrb_str_ptr(other);
+ if (RSTR_LEN(s2) == 0) {
+ return;
+ }
len = RSTR_LEN(s1) + RSTR_LEN(s2);
+ if (len < 0 || len >= MRB_INT_MAX) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big");
+ }
if (RSTRING_CAPA(self) < len) {
resize_capa(mrb, s1, len);
}
@@ -1079,22 +1093,25 @@ num_index:
return mrb_nil_value();
case MRB_TT_RANGE:
- /* check if indx is Range */
- {
- mrb_int beg, len;
+ goto range_arg;
- len = RSTRING_CHAR_LEN(str);
- if (mrb_range_beg_len(mrb, indx, &beg, &len, len)) {
- return str_subseq(mrb, str, beg, len);
- }
- else {
- return mrb_nil_value();
- }
- }
- case MRB_TT_FLOAT:
default:
indx = mrb_Integer(mrb, indx);
if (mrb_nil_p(indx)) {
+ range_arg:
+ {
+ mrb_int beg, len;
+
+ len = RSTRING_CHAR_LEN(str);
+ switch (mrb_range_beg_len(mrb, indx, &beg, &len, len, TRUE)) {
+ case 1:
+ return str_subseq(mrb, str, beg, len);
+ case 2:
+ return mrb_nil_value();
+ default:
+ break;
+ }
+ }
mrb_raise(mrb, E_TYPE_ERROR, "can't convert to Fixnum");
}
idx = mrb_fixnum(indx);
@@ -1235,11 +1252,13 @@ mrb_str_chomp_bang(mrb_state *mrb, mrb_value str)
char *p, *pp;
mrb_int rslen;
mrb_int len;
+ mrb_int argc;
struct RString *s = mrb_str_ptr(str);
mrb_str_modify(mrb, s);
+ argc = mrb_get_args(mrb, "|S", &rs);
len = RSTR_LEN(s);
- if (mrb_get_args(mrb, "|S", &rs) == 0) {
+ if (argc == 0) {
if (len == 0) return mrb_nil_value();
smart_chomp:
if (RSTR_PTR(s)[len-1] == '\n') {
@@ -1530,22 +1549,12 @@ mrb_str_hash_m(mrb_state *mrb, mrb_value self)
static mrb_value
mrb_str_include(mrb_state *mrb, mrb_value self)
{
- mrb_int i;
mrb_value str2;
- mrb_bool include_p;
-
- mrb_get_args(mrb, "o", &str2);
- if (mrb_fixnum_p(str2)) {
- include_p = (memchr(RSTRING_PTR(self), mrb_fixnum(str2), RSTRING_LEN(self)) != NULL);
- }
- else {
- str2 = mrb_str_to_str(mrb, str2);
- i = str_index(mrb, self, str2, 0);
- include_p = (i != -1);
- }
-
- return mrb_bool_value(include_p);
+ mrb_get_args(mrb, "S", &str2);
+ if (str_index(mrb, self, str2, 0) < 0)
+ return mrb_bool_value(FALSE);
+ return mrb_bool_value(TRUE);
}
/* 15.2.10.5.22 */
@@ -1764,7 +1773,7 @@ mrb_str_reverse_bang(mrb_state *mrb, mrb_value str)
mrb_str_modify(mrb, mrb_str_ptr(str));
len = RSTRING_LEN(str);
- buf = mrb_malloc(mrb, (size_t)len);
+ buf = (char*)mrb_malloc(mrb, (size_t)len);
p = buf;
e = buf + len;
@@ -2225,7 +2234,7 @@ mrb_string_value_cstr(mrb_state *mrb, mrb_value *ptr)
char *p = RSTR_PTR(ps);
if (!p || p[len] != '\0') {
- if (RSTR_FROZEN_P(ps)) {
+ if (MRB_FROZEN_P(ps)) {
*ptr = str = mrb_str_dup(mrb, str);
ps = mrb_str_ptr(str);
}
@@ -2294,7 +2303,7 @@ mrb_cstr_to_dbl(mrb_state *mrb, const char * p, mrb_bool badcheck)
if (!badcheck && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
return 0.0;
}
- d = strtod(p, &end);
+ d = mrb_float_read(p, &end);
if (p == end) {
if (badcheck) {
bad:
@@ -2332,7 +2341,7 @@ bad:
return 0.0;
}
- d = strtod(p, &end);
+ d = mrb_float_read(p, &end);
if (badcheck) {
if (!end || p == end) goto bad;
while (*end && ISSPACE(*end)) end++;
@@ -2637,7 +2646,7 @@ mrb_str_inspect(mrb_state *mrb, mrb_value str)
}
#endif
c = *p;
- if (c == '"'|| c == '\\' || (c == '#' && IS_EVSTR(p, pend))) {
+ if (c == '"'|| c == '\\' || (c == '#' && IS_EVSTR(p+1, pend))) {
buf[0] = '\\'; buf[1] = c;
mrb_str_cat(mrb, result, buf, 2);
continue;
@@ -2723,8 +2732,8 @@ mrb_init_string(mrb_state *mrb)
mrb_define_method(mrb, s, "capitalize!", mrb_str_capitalize_bang, MRB_ARGS_NONE()); /* 15.2.10.5.8 */
mrb_define_method(mrb, s, "chomp", mrb_str_chomp, MRB_ARGS_ANY()); /* 15.2.10.5.9 */
mrb_define_method(mrb, s, "chomp!", mrb_str_chomp_bang, MRB_ARGS_ANY()); /* 15.2.10.5.10 */
- mrb_define_method(mrb, s, "chop", mrb_str_chop, MRB_ARGS_REQ(1)); /* 15.2.10.5.11 */
- mrb_define_method(mrb, s, "chop!", mrb_str_chop_bang, MRB_ARGS_REQ(1)); /* 15.2.10.5.12 */
+ mrb_define_method(mrb, s, "chop", mrb_str_chop, MRB_ARGS_NONE()); /* 15.2.10.5.11 */
+ mrb_define_method(mrb, s, "chop!", mrb_str_chop_bang, MRB_ARGS_NONE()); /* 15.2.10.5.12 */
mrb_define_method(mrb, s, "downcase", mrb_str_downcase, MRB_ARGS_NONE()); /* 15.2.10.5.13 */
mrb_define_method(mrb, s, "downcase!", mrb_str_downcase_bang, MRB_ARGS_NONE()); /* 15.2.10.5.14 */
mrb_define_method(mrb, s, "empty?", mrb_str_empty_p, MRB_ARGS_NONE()); /* 15.2.10.5.16 */
@@ -2754,6 +2763,240 @@ mrb_init_string(mrb_state *mrb)
mrb_define_method(mrb, s, "upcase!", mrb_str_upcase_bang, MRB_ARGS_NONE()); /* 15.2.10.5.43 */
mrb_define_method(mrb, s, "inspect", mrb_str_inspect, MRB_ARGS_NONE()); /* 15.2.10.5.46(x) */
mrb_define_method(mrb, s, "bytes", mrb_str_bytes, MRB_ARGS_NONE());
+}
+
+/*
+ * Source code for the "strtod" library procedure.
+ *
+ * Copyright (c) 1988-1993 The Regents of the University of California.
+ * Copyright (c) 1994 Sun Microsystems, Inc.
+ *
+ * Permission to use, copy, modify, and distribute this
+ * software and its documentation for any purpose and without
+ * fee is hereby granted, provided that the above copyright
+ * notice appear in all copies. The University of California
+ * makes no representations about the suitability of this
+ * software for any purpose. It is provided "as is" without
+ * express or implied warranty.
+ *
+ * RCS: @(#) $Id: strtod.c 11708 2007-02-12 23:01:19Z shyouhei $
+ */
- mrb_define_method(mrb, s, "freeze", mrb_str_freeze, MRB_ARGS_NONE());
+#include <ctype.h>
+#include <errno.h>
+
+static const int maxExponent = 511; /* Largest possible base 10 exponent. Any
+ * exponent larger than this will already
+ * produce underflow or overflow, so there's
+ * no need to worry about additional digits.
+ */
+static const double powersOf10[] = {/* Table giving binary powers of 10. Entry */
+ 10., /* is 10^2^i. Used to convert decimal */
+ 100., /* exponents into floating-point numbers. */
+ 1.0e4,
+ 1.0e8,
+ 1.0e16,
+ 1.0e32,
+ 1.0e64,
+ 1.0e128,
+ 1.0e256
+};
+
+MRB_API double
+mrb_float_read(const char *string, char **endPtr)
+/* const char *string; A decimal ASCII floating-point number,
+ * optionally preceded by white space.
+ * Must have form "-I.FE-X", where I is the
+ * integer part of the mantissa, F is the
+ * fractional part of the mantissa, and X
+ * is the exponent. Either of the signs
+ * may be "+", "-", or omitted. Either I
+ * or F may be omitted, or both. The decimal
+ * point isn't necessary unless F is present.
+ * The "E" may actually be an "e". E and X
+ * may both be omitted (but not just one).
+ */
+/* char **endPtr; If non-NULL, store terminating character's
+ * address here. */
+{
+ int sign, expSign = FALSE;
+ double fraction, dblExp;
+ const double *d;
+ register const char *p;
+ register int c;
+ int exp = 0; /* Exponent read from "EX" field. */
+ int fracExp = 0; /* Exponent that derives from the fractional
+ * part. Under normal circumstatnces, it is
+ * the negative of the number of digits in F.
+ * However, if I is very long, the last digits
+ * of I get dropped (otherwise a long I with a
+ * large negative exponent could cause an
+ * unnecessary overflow on I alone). In this
+ * case, fracExp is incremented one for each
+ * dropped digit. */
+ int mantSize; /* Number of digits in mantissa. */
+ int decPt; /* Number of mantissa digits BEFORE decimal
+ * point. */
+ const char *pExp; /* Temporarily holds location of exponent
+ * in string. */
+
+ /*
+ * Strip off leading blanks and check for a sign.
+ */
+
+ p = string;
+ while (isspace(*p)) {
+ p += 1;
+ }
+ if (*p == '-') {
+ sign = TRUE;
+ p += 1;
+ } else {
+ if (*p == '+') {
+ p += 1;
+ }
+ sign = FALSE;
+ }
+
+ /*
+ * Count the number of digits in the mantissa (including the decimal
+ * point), and also locate the decimal point.
+ */
+
+ decPt = -1;
+ for (mantSize = 0; ; mantSize += 1)
+ {
+ c = *p;
+ if (!isdigit(c)) {
+ if ((c != '.') || (decPt >= 0)) {
+ break;
+ }
+ decPt = mantSize;
+ }
+ p += 1;
+ }
+
+ /*
+ * Now suck up the digits in the mantissa. Use two integers to
+ * collect 9 digits each (this is faster than using floating-point).
+ * If the mantissa has more than 18 digits, ignore the extras, since
+ * they can't affect the value anyway.
+ */
+
+ pExp = p;
+ p -= mantSize;
+ if (decPt < 0) {
+ decPt = mantSize;
+ } else {
+ mantSize -= 1; /* One of the digits was the point. */
+ }
+ if (mantSize > 18) {
+ if (decPt - 18 > 29999) {
+ fracExp = 29999;
+ } else {
+ fracExp = decPt - 18;
+ }
+ mantSize = 18;
+ } else {
+ fracExp = decPt - mantSize;
+ }
+ if (mantSize == 0) {
+ fraction = 0.0;
+ p = string;
+ goto done;
+ } else {
+ int frac1, frac2;
+ frac1 = 0;
+ for ( ; mantSize > 9; mantSize -= 1)
+ {
+ c = *p;
+ p += 1;
+ if (c == '.') {
+ c = *p;
+ p += 1;
+ }
+ frac1 = 10*frac1 + (c - '0');
+ }
+ frac2 = 0;
+ for (; mantSize > 0; mantSize -= 1)
+ {
+ c = *p;
+ p += 1;
+ if (c == '.') {
+ c = *p;
+ p += 1;
+ }
+ frac2 = 10*frac2 + (c - '0');
+ }
+ fraction = (1.0e9 * frac1) + frac2;
+ }
+
+ /*
+ * Skim off the exponent.
+ */
+
+ p = pExp;
+ if ((*p == 'E') || (*p == 'e')) {
+ p += 1;
+ if (*p == '-') {
+ expSign = TRUE;
+ p += 1;
+ } else {
+ if (*p == '+') {
+ p += 1;
+ }
+ expSign = FALSE;
+ }
+ while (isdigit(*p)) {
+ exp = exp * 10 + (*p - '0');
+ if (exp > 19999) {
+ exp = 19999;
+ }
+ p += 1;
+ }
+ }
+ if (expSign) {
+ exp = fracExp - exp;
+ } else {
+ exp = fracExp + exp;
+ }
+
+ /*
+ * Generate a floating-point number that represents the exponent.
+ * Do this by processing the exponent one bit at a time to combine
+ * many powers of 2 of 10. Then combine the exponent with the
+ * fraction.
+ */
+
+ if (exp < 0) {
+ expSign = TRUE;
+ exp = -exp;
+ } else {
+ expSign = FALSE;
+ }
+ if (exp > maxExponent) {
+ exp = maxExponent;
+ errno = ERANGE;
+ }
+ dblExp = 1.0;
+ for (d = powersOf10; exp != 0; exp >>= 1, d += 1) {
+ if (exp & 01) {
+ dblExp *= *d;
+ }
+ }
+ if (expSign) {
+ fraction /= dblExp;
+ } else {
+ fraction *= dblExp;
+ }
+
+done:
+ if (endPtr != NULL) {
+ *endPtr = (char *) p;
+ }
+
+ if (sign) {
+ return -fraction;
+ }
+ return fraction;
}