summaryrefslogtreecommitdiffhomepage
path: root/src/string.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/string.c')
-rw-r--r--src/string.c416
1 files changed, 102 insertions, 314 deletions
diff --git a/src/string.c b/src/string.c
index e440bff8c..ac0f4a920 100644
--- a/src/string.c
+++ b/src/string.c
@@ -8,14 +8,6 @@
# define _CRT_NONSTDC_NO_DEPRECATE
#endif
-#ifndef MRB_NO_FLOAT
-#include <float.h>
-#include <math.h>
-#endif
-#include <limits.h>
-#include <stddef.h>
-#include <stdlib.h>
-#include <string.h>
#include <mruby.h>
#include <mruby/array.h>
#include <mruby/class.h>
@@ -23,6 +15,7 @@
#include <mruby/string.h>
#include <mruby/numeric.h>
#include <mruby/presym.h>
+#include <string.h>
typedef struct mrb_shared_string {
int refcnt;
@@ -32,7 +25,7 @@ typedef struct mrb_shared_string {
const char mrb_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz";
-#define mrb_obj_alloc_string(mrb) ((struct RString*)mrb_obj_alloc((mrb), MRB_TT_STRING, (mrb)->string_class))
+#define mrb_obj_alloc_string(mrb) MRB_OBJ_ALLOC((mrb), MRB_TT_STRING, (mrb)->string_class)
static struct RString*
str_init_normal_capa(mrb_state *mrb, struct RString *s,
@@ -244,7 +237,7 @@ str_modify_keep_ascii(mrb_state *mrb, struct RString *s)
static void
check_null_byte(mrb_state *mrb, mrb_value str)
{
- mrb_to_str(mrb, str);
+ mrb_ensure_string_type(mrb, str);
if (memchr(RSTRING_PTR(str), '\0', RSTRING_LEN(str))) {
mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte");
}
@@ -839,8 +832,15 @@ mrb_str_to_cstr(mrb_state *mrb, mrb_value str0)
{
struct RString *s;
+ const char *p = RSTRING_PTR(str0);
+ size_t len = RSTRING_LEN(str0);
check_null_byte(mrb, str0);
- s = str_new(mrb, RSTRING_PTR(str0), RSTRING_LEN(str0));
+ if (RSTR_EMBEDDABLE_P(len)) {
+ s = str_init_embed(mrb_obj_alloc_string(mrb), p, len);
+ }
+ else {
+ s = str_init_normal(mrb, mrb_obj_alloc_string(mrb), p, len);
+ }
return RSTR_PTR(s);
}
@@ -897,14 +897,14 @@ static mrb_value
mrb_str_size(mrb_state *mrb, mrb_value self)
{
mrb_int len = RSTRING_CHAR_LEN(self);
- return mrb_fixnum_value(len);
+ return mrb_int_value(mrb, len);
}
static mrb_value
mrb_str_bytesize(mrb_state *mrb, mrb_value self)
{
mrb_int len = RSTRING_LEN(self);
- return mrb_fixnum_value(len);
+ return mrb_int_value(mrb, len);
}
/* 15.2.10.5.1 */
@@ -1018,7 +1018,7 @@ mrb_str_cmp_m(mrb_state *mrb, mrb_value str1)
else {
result = mrb_str_cmp(mrb, str1, str2);
}
- return mrb_fixnum_value(result);
+ return mrb_int_value(mrb, result);
}
static mrb_bool
@@ -1071,7 +1071,7 @@ mrb_string_value_ptr(mrb_state *mrb, mrb_value str)
MRB_API mrb_int
mrb_string_value_len(mrb_state *mrb, mrb_value ptr)
{
- mrb_to_str(mrb, ptr);
+ mrb_ensure_string_type(mrb, ptr);
return RSTRING_LEN(ptr);
}
@@ -1102,8 +1102,8 @@ static enum str_convert_range
str_convert_range(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen, mrb_int *beg, mrb_int *len)
{
if (!mrb_undef_p(alen)) {
- *beg = mrb_int(mrb, indx);
- *len = mrb_int(mrb, alen);
+ *beg = mrb_as_int(mrb, indx);
+ *len = mrb_as_int(mrb, alen);
return STR_CHAR_RANGE;
}
else {
@@ -1123,7 +1123,7 @@ str_convert_range(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen,
goto range_arg;
default:
- indx = mrb_to_int(mrb, indx);
+ indx = mrb_to_integer(mrb, indx);
if (mrb_integer_p(indx)) {
*beg = mrb_integer(indx);
*len = 1;
@@ -1175,14 +1175,14 @@ mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen)
/* 15.2.10.5.34 */
/*
* call-seq:
- * str[fixnum] => fixnum or nil
- * str[fixnum, fixnum] => new_str or nil
- * str[range] => new_str or nil
- * str[other_str] => new_str or nil
- * str.slice(fixnum) => fixnum or nil
- * str.slice(fixnum, fixnum) => new_str or nil
- * str.slice(range) => new_str or nil
- * str.slice(other_str) => new_str or nil
+ * str[int] => int or nil
+ * str[int, int] => new_str or nil
+ * str[range] => new_str or nil
+ * str[other_str] => new_str or nil
+ * str.slice(int) => int or nil
+ * str.slice(int, int) => new_str or nil
+ * str.slice(range) => new_str or nil
+ * str.slice(other_str) => new_str or nil
*
* Element Reference---If passed a single <code>Integer</code>, returns the code
* of the character at that position. If passed two <code>Integer</code>
@@ -1239,13 +1239,11 @@ str_replace_partial(mrb_state *mrb, mrb_value src, mrb_int pos, mrb_int end, mrb
if (end > len) { end = len; }
if (pos < 0 || pos > len) {
- str_out_of_index(mrb, mrb_fixnum_value(pos));
+ str_out_of_index(mrb, mrb_int_value(mrb, pos));
}
replen = (mrb_nil_p(rep) ? 0 : RSTRING_LEN(rep));
- newlen = replen + (len - (end - pos));
-
- if (newlen >= MRB_SSIZE_MAX || newlen < replen /* overflowed */) {
+ if (mrb_int_add_overflow(replen, len - (end - pos), &newlen) || newlen >= MRB_SSIZE_MAX) {
mrb_raise(mrb, E_RUNTIME_ERROR, "string size too big");
}
@@ -1358,8 +1356,7 @@ mrb_str_aset(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen, mrb_
{
mrb_int beg, len, charlen;
- mrb_to_str(mrb, replace);
-
+ mrb_ensure_string_type(mrb, replace);
switch (str_convert_range(mrb, str, indx, alen, &beg, &len)) {
case STR_OUT_OF_RANGE:
default:
@@ -1376,14 +1373,17 @@ mrb_str_aset(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen, mrb_
str_range_to_bytes(str, &beg, &len);
/* fall through */
case STR_BYTE_RANGE_CORRECTED:
- str_replace_partial(mrb, str, beg, beg + len, replace);
+ if (mrb_int_add_overflow(beg, len, &len)) {
+ mrb_raise(mrb, E_RUNTIME_ERROR, "string index too big");
+ }
+ str_replace_partial(mrb, str, beg, len, replace);
}
}
/*
* call-seq:
- * str[fixnum] = replace
- * str[fixnum, fixnum] = replace
+ * str[int] = replace
+ * str[int, int] = replace
* str[range] = replace
* str[other_str] = replace
*
@@ -1754,7 +1754,7 @@ mrb_str_hash(mrb_state *mrb, mrb_value str)
/* 15.2.10.5.20 */
/*
* call-seq:
- * str.hash => fixnum
+ * str.hash => int
*
* Return a hash based on the string's length and content.
*/
@@ -1762,14 +1762,14 @@ static mrb_value
mrb_str_hash_m(mrb_state *mrb, mrb_value self)
{
mrb_int key = mrb_str_hash(mrb, self);
- return mrb_fixnum_value(key);
+ return mrb_int_value(mrb, key);
}
/* 15.2.10.5.21 */
/*
* call-seq:
* str.include? other_str => true or false
- * str.include? fixnum => true or false
+ * str.include? int => true or false
*
* Returns <code>true</code> if <i>str</i> contains the given string or
* character.
@@ -1792,7 +1792,7 @@ mrb_str_include(mrb_state *mrb, mrb_value self)
/* 15.2.10.5.22 */
/*
* call-seq:
- * str.index(substring [, offset]) => fixnum or nil
+ * str.index(substring [, offset]) => int or nil
*
* Returns the index of the first occurrence of the given
* <i>substring</i>. Returns <code>nil</code> if not found.
@@ -1824,7 +1824,7 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str)
if (pos == -1) return mrb_nil_value();
BYTES_ALIGN_CHECK(pos);
- return mrb_fixnum_value(pos);
+ return mrb_int_value(mrb, pos);
}
/* 15.2.10.5.24 */
@@ -1873,7 +1873,7 @@ mrb_str_init(mrb_state *mrb, mrb_value self)
* str.to_sym => symbol
*
* Returns the <code>Symbol</code> corresponding to <i>str</i>, creating the
- * symbol if it did not previously exist. See <code>Symbol#id2name</code>.
+ * symbol if it did not previously exist.
*
* "Koala".intern #=> :Koala
* s = 'cat'.to_sym #=> :cat
@@ -1901,7 +1901,7 @@ mrb_obj_as_string(mrb_state *mrb, mrb_value obj)
case MRB_TT_SYMBOL:
return mrb_sym_str(mrb, mrb_symbol(obj));
case MRB_TT_INTEGER:
- return mrb_fixnum_to_str(mrb, obj, 10);
+ return mrb_integer_to_str(mrb, obj, 10);
case MRB_TT_SCLASS:
case MRB_TT_CLASS:
case MRB_TT_MODULE:
@@ -2018,7 +2018,7 @@ mrb_str_reverse(mrb_state *mrb, mrb_value str)
/* 15.2.10.5.31 */
/*
* call-seq:
- * str.rindex(substring [, offset]) => fixnum or nil
+ * str.rindex(substring [, offset]) => int or nil
*
* Returns the index of the last occurrence of the given <i>substring</i>.
* Returns <code>nil</code> if not found. If the second parameter is
@@ -2053,7 +2053,7 @@ mrb_str_rindex(mrb_state *mrb, mrb_value str)
if (pos >= 0) {
pos = bytes2chars(RSTRING_PTR(str), RSTRING_LEN(str), pos);
BYTES_ALIGN_CHECK(pos);
- return mrb_fixnum_value(pos);
+ return mrb_int_value(mrb, pos);
}
return mrb_nil_value();
}
@@ -2199,8 +2199,8 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
return result;
}
-mrb_value
-mrb_str_len_to_inum(mrb_state *mrb, const char *str, size_t len, mrb_int base, int badcheck)
+static mrb_value
+mrb_str_len_to_integer(mrb_state *mrb, const char *str, size_t len, mrb_int base, int badcheck)
{
const char *p = str;
const char *pend = str + len;
@@ -2363,12 +2363,6 @@ mrb_str_len_to_inum(mrb_state *mrb, const char *str, size_t len, mrb_int base, i
return mrb_fixnum_value(0);
}
-MRB_API mrb_value
-mrb_cstr_to_inum(mrb_state *mrb, const char *str, mrb_int base, mrb_bool badcheck)
-{
- return mrb_str_len_to_inum(mrb, str, strlen(str), base, badcheck);
-}
-
/* obslete: use RSTRING_CSTR() or mrb_string_cstr() */
MRB_API const char*
mrb_string_value_cstr(mrb_state *mrb, mrb_value *ptr)
@@ -2401,15 +2395,15 @@ mrb_string_cstr(mrb_state *mrb, mrb_value str)
}
MRB_API mrb_value
-mrb_str_to_inum(mrb_state *mrb, mrb_value str, mrb_int base, mrb_bool badcheck)
+mrb_str_to_integer(mrb_state *mrb, mrb_value str, mrb_int base, mrb_bool badcheck)
{
const char *s;
mrb_int len;
- mrb_to_str(mrb, str);
+ mrb_ensure_string_type(mrb, str);
s = RSTRING_PTR(str);
len = RSTRING_LEN(str);
- return mrb_str_len_to_inum(mrb, s, len, base, badcheck);
+ return mrb_str_len_to_integer(mrb, s, len, base, badcheck);
}
/* 15.2.10.5.38 */
@@ -2439,14 +2433,14 @@ mrb_str_to_i(mrb_state *mrb, mrb_value self)
mrb_int base = 10;
mrb_get_args(mrb, "|i", &base);
- if (base < 0) {
+ if (base < 0 || 36 < base) {
mrb_raisef(mrb, E_ARGUMENT_ERROR, "illegal radix %i", base);
}
- return mrb_str_to_inum(mrb, self, base, FALSE);
+ return mrb_str_to_integer(mrb, self, base, FALSE);
}
#ifndef MRB_NO_FLOAT
-double
+static double
mrb_str_len_to_dbl(mrb_state *mrb, const char *s, size_t len, mrb_bool badcheck)
{
char buf[DBL_DIG * 4 + 20];
@@ -2466,7 +2460,7 @@ mrb_str_len_to_dbl(mrb_state *mrb, const char *s, size_t len, mrb_bool badcheck)
mrb_value x;
if (!badcheck) return 0.0;
- x = mrb_str_len_to_inum(mrb, p, pend-p, 0, badcheck);
+ x = mrb_str_len_to_integer(mrb, p, pend-p, 0, badcheck);
if (mrb_integer_p(x))
d = (double)mrb_integer(x);
else /* if (mrb_float_p(x)) */
@@ -2536,12 +2530,6 @@ bad:
}
MRB_API double
-mrb_cstr_to_dbl(mrb_state *mrb, const char *s, mrb_bool badcheck)
-{
- return mrb_str_len_to_dbl(mrb, s, strlen(s), badcheck);
-}
-
-MRB_API double
mrb_str_to_dbl(mrb_state *mrb, mrb_value str, mrb_bool badcheck)
{
return mrb_str_len_to_dbl(mrb, RSTRING_PTR(str), RSTRING_LEN(str), badcheck);
@@ -2711,7 +2699,7 @@ mrb_str_cat_str(mrb_state *mrb, mrb_value str, mrb_value str2)
MRB_API mrb_value
mrb_str_append(mrb_state *mrb, mrb_value str1, mrb_value str2)
{
- mrb_to_str(mrb, str2);
+ mrb_ensure_string_type(mrb, str2);
return mrb_str_cat_str(mrb, str1, str2);
}
@@ -2734,7 +2722,7 @@ mrb_str_inspect(mrb_state *mrb, mrb_value str)
/*
* call-seq:
- * str.bytes -> array of fixnums
+ * str.bytes -> array of int
*
* Returns an array of bytes in _str_.
*
@@ -2843,7 +2831,7 @@ mrb_str_byteslice(mrb_state *mrb, mrb_value str)
}
}
else {
- beg = mrb_integer(mrb_to_int(mrb, a1));
+ beg = mrb_integer(mrb_to_integer(mrb, a1));
len = 1;
empty = FALSE;
}
@@ -2860,6 +2848,51 @@ mrb_str_byteslice(mrb_state *mrb, mrb_value str)
}
}
+static mrb_value
+sub_replace(mrb_state *mrb, mrb_value self)
+{
+ char *p, *match;
+ mrb_int plen, mlen;
+ mrb_int found, offset;
+ mrb_value result;
+
+ mrb_get_args(mrb, "ssi", &p, &plen, &match, &mlen, &found);
+ result = mrb_str_new(mrb, 0, 0);
+ for (mrb_int i=0; i<plen; i++) {
+ if (p[i] != '\\' || i+1==plen) {
+ mrb_str_cat(mrb, result, p+i, 1);
+ continue;
+ }
+ i++;
+ switch (p[i]) {
+ case '\\':
+ mrb_str_cat(mrb, result, "\\", 1);
+ break;
+ case '`':
+ mrb_str_cat(mrb, result, RSTRING_PTR(self), chars2bytes(self, 0, found));
+ break;
+ case '&': case '0':
+ mrb_str_cat(mrb, result, match, mlen);
+ break;
+ case '\'':
+ offset = chars2bytes(self, 0, found) + mlen;
+ if (RSTRING_LEN(self) > offset) {
+ mrb_str_cat(mrb, result, RSTRING_PTR(self)+offset, RSTRING_LEN(self)-offset);
+ }
+ break;
+ case '1': case '2': case '3':
+ case '4': case '5': case '6':
+ case '7': case '8': case '9':
+ /* ignore sub-group match (no Regexp supported) */
+ break;
+ default:
+ mrb_str_cat(mrb, result, &p[i-1], 2);
+ break;
+ }
+ }
+ return result;
+}
+
/* ---------------------------*/
void
mrb_init_string(mrb_state *mrb)
@@ -2921,251 +2954,6 @@ mrb_init_string(mrb_state *mrb)
mrb_define_method(mrb, s, "getbyte", mrb_str_getbyte, MRB_ARGS_REQ(1));
mrb_define_method(mrb, s, "setbyte", mrb_str_setbyte, MRB_ARGS_REQ(2));
mrb_define_method(mrb, s, "byteslice", mrb_str_byteslice, MRB_ARGS_ARG(1,1));
-}
-
-#ifndef MRB_NO_FLOAT
-/*
- * Source code for the "strtod" library procedure.
- *
- * Copyright (c) 1988-1993 The Regents of the University of California.
- * Copyright (c) 1994 Sun Microsystems, Inc.
- *
- * Permission to use, copy, modify, and distribute this
- * software and its documentation for any purpose and without
- * fee is hereby granted, provided that the above copyright
- * notice appear in all copies. The University of California
- * makes no representations about the suitability of this
- * software for any purpose. It is provided "as is" without
- * express or implied warranty.
- *
- * RCS: @(#) $Id: strtod.c 11708 2007-02-12 23:01:19Z shyouhei $
- */
-
-#include <ctype.h>
-#include <errno.h>
-
-static const int maxExponent = 511; /* Largest possible base 10 exponent. Any
- * exponent larger than this will already
- * produce underflow or overflow, so there's
- * no need to worry about additional digits.
- */
-static const double powersOf10[] = {/* Table giving binary powers of 10. Entry */
- 10., /* is 10^2^i. Used to convert decimal */
- 100., /* exponents into floating-point numbers. */
- 1.0e4,
- 1.0e8,
- 1.0e16,
- 1.0e32,
- 1.0e64,
- 1.0e128,
- 1.0e256
-};
-
-MRB_API double
-mrb_float_read(const char *string, char **endPtr)
-/* const char *string; A decimal ASCII floating-point number,
- * optionally preceded by white space.
- * Must have form "-I.FE-X", where I is the
- * integer part of the mantissa, F is the
- * fractional part of the mantissa, and X
- * is the exponent. Either of the signs
- * may be "+", "-", or omitted. Either I
- * or F may be omitted, or both. The decimal
- * point isn't necessary unless F is present.
- * The "E" may actually be an "e". E and X
- * may both be omitted (but not just one).
- */
-/* char **endPtr; If non-NULL, store terminating character's
- * address here. */
-{
- int sign, expSign = FALSE;
- double fraction, dblExp;
- const double *d;
- const char *p;
- int c;
- int exp = 0; /* Exponent read from "EX" field. */
- int fracExp = 0; /* Exponent that derives from the fractional
- * part. Under normal circumstances, it is
- * the negative of the number of digits in F.
- * However, if I is very long, the last digits
- * of I get dropped (otherwise a long I with a
- * large negative exponent could cause an
- * unnecessary overflow on I alone). In this
- * case, fracExp is incremented one for each
- * dropped digit. */
- int mantSize; /* Number of digits in mantissa. */
- int decPt; /* Number of mantissa digits BEFORE decimal
- * point. */
- const char *pExp; /* Temporarily holds location of exponent
- * in string. */
-
- /*
- * Strip off leading blanks and check for a sign.
- */
-
- p = string;
- while (ISSPACE(*p)) {
- p += 1;
- }
- if (*p == '-') {
- sign = TRUE;
- p += 1;
- }
- else {
- if (*p == '+') {
- p += 1;
- }
- sign = FALSE;
- }
-
- /*
- * Count the number of digits in the mantissa (including the decimal
- * point), and also locate the decimal point.
- */
-
- decPt = -1;
- for (mantSize = 0; ; mantSize += 1)
- {
- c = *p;
- if (!ISDIGIT(c)) {
- if ((c != '.') || (decPt >= 0)) {
- break;
- }
- decPt = mantSize;
- }
- p += 1;
- }
-
- /*
- * Now suck up the digits in the mantissa. Use two integers to
- * collect 9 digits each (this is faster than using floating-point).
- * If the mantissa has more than 18 digits, ignore the extras, since
- * they can't affect the value anyway.
- */
-
- pExp = p;
- p -= mantSize;
- if (decPt < 0) {
- decPt = mantSize;
- }
- else {
- mantSize -= 1; /* One of the digits was the point. */
- }
- if (mantSize > 18) {
- if (decPt - 18 > 29999) {
- fracExp = 29999;
- }
- else {
- fracExp = decPt - 18;
- }
- mantSize = 18;
- }
- else {
- fracExp = decPt - mantSize;
- }
- if (mantSize == 0) {
- fraction = 0.0;
- p = string;
- goto done;
- }
- else {
- int frac1, frac2;
- frac1 = 0;
- for ( ; mantSize > 9; mantSize -= 1)
- {
- c = *p;
- p += 1;
- if (c == '.') {
- c = *p;
- p += 1;
- }
- frac1 = 10*frac1 + (c - '0');
- }
- frac2 = 0;
- for (; mantSize > 0; mantSize -= 1)
- {
- c = *p;
- p += 1;
- if (c == '.') {
- c = *p;
- p += 1;
- }
- frac2 = 10*frac2 + (c - '0');
- }
- fraction = (1.0e9 * frac1) + frac2;
- }
-
- /*
- * Skim off the exponent.
- */
- p = pExp;
- if ((*p == 'E') || (*p == 'e')) {
- p += 1;
- if (*p == '-') {
- expSign = TRUE;
- p += 1;
- }
- else {
- if (*p == '+') {
- p += 1;
- }
- expSign = FALSE;
- }
- while (ISDIGIT(*p)) {
- exp = exp * 10 + (*p - '0');
- if (exp > 19999) {
- exp = 19999;
- }
- p += 1;
- }
- }
- if (expSign) {
- exp = fracExp - exp;
- }
- else {
- exp = fracExp + exp;
- }
-
- /*
- * Generate a floating-point number that represents the exponent.
- * Do this by processing the exponent one bit at a time to combine
- * many powers of 2 of 10. Then combine the exponent with the
- * fraction.
- */
-
- if (exp < 0) {
- expSign = TRUE;
- exp = -exp;
- }
- else {
- expSign = FALSE;
- }
- if (exp > maxExponent) {
- exp = maxExponent;
- errno = ERANGE;
- }
- dblExp = 1.0;
- for (d = powersOf10; exp != 0; exp >>= 1, d += 1) {
- if (exp & 01) {
- dblExp *= *d;
- }
- }
- if (expSign) {
- fraction /= dblExp;
- }
- else {
- fraction *= dblExp;
- }
-
-done:
- if (endPtr != NULL) {
- *endPtr = (char *) p;
- }
-
- if (sign) {
- return -fraction;
- }
- return fraction;
+ mrb_define_method(mrb, s, "__sub_replace", sub_replace, MRB_ARGS_REQ(3)); /* internal */
}
-#endif