diff options
Diffstat (limited to 'src/string.c')
| -rw-r--r-- | src/string.c | 416 |
1 files changed, 102 insertions, 314 deletions
diff --git a/src/string.c b/src/string.c index e440bff8c..ac0f4a920 100644 --- a/src/string.c +++ b/src/string.c @@ -8,14 +8,6 @@ # define _CRT_NONSTDC_NO_DEPRECATE #endif -#ifndef MRB_NO_FLOAT -#include <float.h> -#include <math.h> -#endif -#include <limits.h> -#include <stddef.h> -#include <stdlib.h> -#include <string.h> #include <mruby.h> #include <mruby/array.h> #include <mruby/class.h> @@ -23,6 +15,7 @@ #include <mruby/string.h> #include <mruby/numeric.h> #include <mruby/presym.h> +#include <string.h> typedef struct mrb_shared_string { int refcnt; @@ -32,7 +25,7 @@ typedef struct mrb_shared_string { const char mrb_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz"; -#define mrb_obj_alloc_string(mrb) ((struct RString*)mrb_obj_alloc((mrb), MRB_TT_STRING, (mrb)->string_class)) +#define mrb_obj_alloc_string(mrb) MRB_OBJ_ALLOC((mrb), MRB_TT_STRING, (mrb)->string_class) static struct RString* str_init_normal_capa(mrb_state *mrb, struct RString *s, @@ -244,7 +237,7 @@ str_modify_keep_ascii(mrb_state *mrb, struct RString *s) static void check_null_byte(mrb_state *mrb, mrb_value str) { - mrb_to_str(mrb, str); + mrb_ensure_string_type(mrb, str); if (memchr(RSTRING_PTR(str), '\0', RSTRING_LEN(str))) { mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte"); } @@ -839,8 +832,15 @@ mrb_str_to_cstr(mrb_state *mrb, mrb_value str0) { struct RString *s; + const char *p = RSTRING_PTR(str0); + size_t len = RSTRING_LEN(str0); check_null_byte(mrb, str0); - s = str_new(mrb, RSTRING_PTR(str0), RSTRING_LEN(str0)); + if (RSTR_EMBEDDABLE_P(len)) { + s = str_init_embed(mrb_obj_alloc_string(mrb), p, len); + } + else { + s = str_init_normal(mrb, mrb_obj_alloc_string(mrb), p, len); + } return RSTR_PTR(s); } @@ -897,14 +897,14 @@ static mrb_value mrb_str_size(mrb_state *mrb, mrb_value self) { mrb_int len = RSTRING_CHAR_LEN(self); - return mrb_fixnum_value(len); + return mrb_int_value(mrb, len); } static mrb_value mrb_str_bytesize(mrb_state *mrb, mrb_value self) { mrb_int len = RSTRING_LEN(self); - return mrb_fixnum_value(len); + return mrb_int_value(mrb, len); } /* 15.2.10.5.1 */ @@ -1018,7 +1018,7 @@ mrb_str_cmp_m(mrb_state *mrb, mrb_value str1) else { result = mrb_str_cmp(mrb, str1, str2); } - return mrb_fixnum_value(result); + return mrb_int_value(mrb, result); } static mrb_bool @@ -1071,7 +1071,7 @@ mrb_string_value_ptr(mrb_state *mrb, mrb_value str) MRB_API mrb_int mrb_string_value_len(mrb_state *mrb, mrb_value ptr) { - mrb_to_str(mrb, ptr); + mrb_ensure_string_type(mrb, ptr); return RSTRING_LEN(ptr); } @@ -1102,8 +1102,8 @@ static enum str_convert_range str_convert_range(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen, mrb_int *beg, mrb_int *len) { if (!mrb_undef_p(alen)) { - *beg = mrb_int(mrb, indx); - *len = mrb_int(mrb, alen); + *beg = mrb_as_int(mrb, indx); + *len = mrb_as_int(mrb, alen); return STR_CHAR_RANGE; } else { @@ -1123,7 +1123,7 @@ str_convert_range(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen, goto range_arg; default: - indx = mrb_to_int(mrb, indx); + indx = mrb_to_integer(mrb, indx); if (mrb_integer_p(indx)) { *beg = mrb_integer(indx); *len = 1; @@ -1175,14 +1175,14 @@ mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen) /* 15.2.10.5.34 */ /* * call-seq: - * str[fixnum] => fixnum or nil - * str[fixnum, fixnum] => new_str or nil - * str[range] => new_str or nil - * str[other_str] => new_str or nil - * str.slice(fixnum) => fixnum or nil - * str.slice(fixnum, fixnum) => new_str or nil - * str.slice(range) => new_str or nil - * str.slice(other_str) => new_str or nil + * str[int] => int or nil + * str[int, int] => new_str or nil + * str[range] => new_str or nil + * str[other_str] => new_str or nil + * str.slice(int) => int or nil + * str.slice(int, int) => new_str or nil + * str.slice(range) => new_str or nil + * str.slice(other_str) => new_str or nil * * Element Reference---If passed a single <code>Integer</code>, returns the code * of the character at that position. If passed two <code>Integer</code> @@ -1239,13 +1239,11 @@ str_replace_partial(mrb_state *mrb, mrb_value src, mrb_int pos, mrb_int end, mrb if (end > len) { end = len; } if (pos < 0 || pos > len) { - str_out_of_index(mrb, mrb_fixnum_value(pos)); + str_out_of_index(mrb, mrb_int_value(mrb, pos)); } replen = (mrb_nil_p(rep) ? 0 : RSTRING_LEN(rep)); - newlen = replen + (len - (end - pos)); - - if (newlen >= MRB_SSIZE_MAX || newlen < replen /* overflowed */) { + if (mrb_int_add_overflow(replen, len - (end - pos), &newlen) || newlen >= MRB_SSIZE_MAX) { mrb_raise(mrb, E_RUNTIME_ERROR, "string size too big"); } @@ -1358,8 +1356,7 @@ mrb_str_aset(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen, mrb_ { mrb_int beg, len, charlen; - mrb_to_str(mrb, replace); - + mrb_ensure_string_type(mrb, replace); switch (str_convert_range(mrb, str, indx, alen, &beg, &len)) { case STR_OUT_OF_RANGE: default: @@ -1376,14 +1373,17 @@ mrb_str_aset(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen, mrb_ str_range_to_bytes(str, &beg, &len); /* fall through */ case STR_BYTE_RANGE_CORRECTED: - str_replace_partial(mrb, str, beg, beg + len, replace); + if (mrb_int_add_overflow(beg, len, &len)) { + mrb_raise(mrb, E_RUNTIME_ERROR, "string index too big"); + } + str_replace_partial(mrb, str, beg, len, replace); } } /* * call-seq: - * str[fixnum] = replace - * str[fixnum, fixnum] = replace + * str[int] = replace + * str[int, int] = replace * str[range] = replace * str[other_str] = replace * @@ -1754,7 +1754,7 @@ mrb_str_hash(mrb_state *mrb, mrb_value str) /* 15.2.10.5.20 */ /* * call-seq: - * str.hash => fixnum + * str.hash => int * * Return a hash based on the string's length and content. */ @@ -1762,14 +1762,14 @@ static mrb_value mrb_str_hash_m(mrb_state *mrb, mrb_value self) { mrb_int key = mrb_str_hash(mrb, self); - return mrb_fixnum_value(key); + return mrb_int_value(mrb, key); } /* 15.2.10.5.21 */ /* * call-seq: * str.include? other_str => true or false - * str.include? fixnum => true or false + * str.include? int => true or false * * Returns <code>true</code> if <i>str</i> contains the given string or * character. @@ -1792,7 +1792,7 @@ mrb_str_include(mrb_state *mrb, mrb_value self) /* 15.2.10.5.22 */ /* * call-seq: - * str.index(substring [, offset]) => fixnum or nil + * str.index(substring [, offset]) => int or nil * * Returns the index of the first occurrence of the given * <i>substring</i>. Returns <code>nil</code> if not found. @@ -1824,7 +1824,7 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str) if (pos == -1) return mrb_nil_value(); BYTES_ALIGN_CHECK(pos); - return mrb_fixnum_value(pos); + return mrb_int_value(mrb, pos); } /* 15.2.10.5.24 */ @@ -1873,7 +1873,7 @@ mrb_str_init(mrb_state *mrb, mrb_value self) * str.to_sym => symbol * * Returns the <code>Symbol</code> corresponding to <i>str</i>, creating the - * symbol if it did not previously exist. See <code>Symbol#id2name</code>. + * symbol if it did not previously exist. * * "Koala".intern #=> :Koala * s = 'cat'.to_sym #=> :cat @@ -1901,7 +1901,7 @@ mrb_obj_as_string(mrb_state *mrb, mrb_value obj) case MRB_TT_SYMBOL: return mrb_sym_str(mrb, mrb_symbol(obj)); case MRB_TT_INTEGER: - return mrb_fixnum_to_str(mrb, obj, 10); + return mrb_integer_to_str(mrb, obj, 10); case MRB_TT_SCLASS: case MRB_TT_CLASS: case MRB_TT_MODULE: @@ -2018,7 +2018,7 @@ mrb_str_reverse(mrb_state *mrb, mrb_value str) /* 15.2.10.5.31 */ /* * call-seq: - * str.rindex(substring [, offset]) => fixnum or nil + * str.rindex(substring [, offset]) => int or nil * * Returns the index of the last occurrence of the given <i>substring</i>. * Returns <code>nil</code> if not found. If the second parameter is @@ -2053,7 +2053,7 @@ mrb_str_rindex(mrb_state *mrb, mrb_value str) if (pos >= 0) { pos = bytes2chars(RSTRING_PTR(str), RSTRING_LEN(str), pos); BYTES_ALIGN_CHECK(pos); - return mrb_fixnum_value(pos); + return mrb_int_value(mrb, pos); } return mrb_nil_value(); } @@ -2199,8 +2199,8 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) return result; } -mrb_value -mrb_str_len_to_inum(mrb_state *mrb, const char *str, size_t len, mrb_int base, int badcheck) +static mrb_value +mrb_str_len_to_integer(mrb_state *mrb, const char *str, size_t len, mrb_int base, int badcheck) { const char *p = str; const char *pend = str + len; @@ -2363,12 +2363,6 @@ mrb_str_len_to_inum(mrb_state *mrb, const char *str, size_t len, mrb_int base, i return mrb_fixnum_value(0); } -MRB_API mrb_value -mrb_cstr_to_inum(mrb_state *mrb, const char *str, mrb_int base, mrb_bool badcheck) -{ - return mrb_str_len_to_inum(mrb, str, strlen(str), base, badcheck); -} - /* obslete: use RSTRING_CSTR() or mrb_string_cstr() */ MRB_API const char* mrb_string_value_cstr(mrb_state *mrb, mrb_value *ptr) @@ -2401,15 +2395,15 @@ mrb_string_cstr(mrb_state *mrb, mrb_value str) } MRB_API mrb_value -mrb_str_to_inum(mrb_state *mrb, mrb_value str, mrb_int base, mrb_bool badcheck) +mrb_str_to_integer(mrb_state *mrb, mrb_value str, mrb_int base, mrb_bool badcheck) { const char *s; mrb_int len; - mrb_to_str(mrb, str); + mrb_ensure_string_type(mrb, str); s = RSTRING_PTR(str); len = RSTRING_LEN(str); - return mrb_str_len_to_inum(mrb, s, len, base, badcheck); + return mrb_str_len_to_integer(mrb, s, len, base, badcheck); } /* 15.2.10.5.38 */ @@ -2439,14 +2433,14 @@ mrb_str_to_i(mrb_state *mrb, mrb_value self) mrb_int base = 10; mrb_get_args(mrb, "|i", &base); - if (base < 0) { + if (base < 0 || 36 < base) { mrb_raisef(mrb, E_ARGUMENT_ERROR, "illegal radix %i", base); } - return mrb_str_to_inum(mrb, self, base, FALSE); + return mrb_str_to_integer(mrb, self, base, FALSE); } #ifndef MRB_NO_FLOAT -double +static double mrb_str_len_to_dbl(mrb_state *mrb, const char *s, size_t len, mrb_bool badcheck) { char buf[DBL_DIG * 4 + 20]; @@ -2466,7 +2460,7 @@ mrb_str_len_to_dbl(mrb_state *mrb, const char *s, size_t len, mrb_bool badcheck) mrb_value x; if (!badcheck) return 0.0; - x = mrb_str_len_to_inum(mrb, p, pend-p, 0, badcheck); + x = mrb_str_len_to_integer(mrb, p, pend-p, 0, badcheck); if (mrb_integer_p(x)) d = (double)mrb_integer(x); else /* if (mrb_float_p(x)) */ @@ -2536,12 +2530,6 @@ bad: } MRB_API double -mrb_cstr_to_dbl(mrb_state *mrb, const char *s, mrb_bool badcheck) -{ - return mrb_str_len_to_dbl(mrb, s, strlen(s), badcheck); -} - -MRB_API double mrb_str_to_dbl(mrb_state *mrb, mrb_value str, mrb_bool badcheck) { return mrb_str_len_to_dbl(mrb, RSTRING_PTR(str), RSTRING_LEN(str), badcheck); @@ -2711,7 +2699,7 @@ mrb_str_cat_str(mrb_state *mrb, mrb_value str, mrb_value str2) MRB_API mrb_value mrb_str_append(mrb_state *mrb, mrb_value str1, mrb_value str2) { - mrb_to_str(mrb, str2); + mrb_ensure_string_type(mrb, str2); return mrb_str_cat_str(mrb, str1, str2); } @@ -2734,7 +2722,7 @@ mrb_str_inspect(mrb_state *mrb, mrb_value str) /* * call-seq: - * str.bytes -> array of fixnums + * str.bytes -> array of int * * Returns an array of bytes in _str_. * @@ -2843,7 +2831,7 @@ mrb_str_byteslice(mrb_state *mrb, mrb_value str) } } else { - beg = mrb_integer(mrb_to_int(mrb, a1)); + beg = mrb_integer(mrb_to_integer(mrb, a1)); len = 1; empty = FALSE; } @@ -2860,6 +2848,51 @@ mrb_str_byteslice(mrb_state *mrb, mrb_value str) } } +static mrb_value +sub_replace(mrb_state *mrb, mrb_value self) +{ + char *p, *match; + mrb_int plen, mlen; + mrb_int found, offset; + mrb_value result; + + mrb_get_args(mrb, "ssi", &p, &plen, &match, &mlen, &found); + result = mrb_str_new(mrb, 0, 0); + for (mrb_int i=0; i<plen; i++) { + if (p[i] != '\\' || i+1==plen) { + mrb_str_cat(mrb, result, p+i, 1); + continue; + } + i++; + switch (p[i]) { + case '\\': + mrb_str_cat(mrb, result, "\\", 1); + break; + case '`': + mrb_str_cat(mrb, result, RSTRING_PTR(self), chars2bytes(self, 0, found)); + break; + case '&': case '0': + mrb_str_cat(mrb, result, match, mlen); + break; + case '\'': + offset = chars2bytes(self, 0, found) + mlen; + if (RSTRING_LEN(self) > offset) { + mrb_str_cat(mrb, result, RSTRING_PTR(self)+offset, RSTRING_LEN(self)-offset); + } + break; + case '1': case '2': case '3': + case '4': case '5': case '6': + case '7': case '8': case '9': + /* ignore sub-group match (no Regexp supported) */ + break; + default: + mrb_str_cat(mrb, result, &p[i-1], 2); + break; + } + } + return result; +} + /* ---------------------------*/ void mrb_init_string(mrb_state *mrb) @@ -2921,251 +2954,6 @@ mrb_init_string(mrb_state *mrb) mrb_define_method(mrb, s, "getbyte", mrb_str_getbyte, MRB_ARGS_REQ(1)); mrb_define_method(mrb, s, "setbyte", mrb_str_setbyte, MRB_ARGS_REQ(2)); mrb_define_method(mrb, s, "byteslice", mrb_str_byteslice, MRB_ARGS_ARG(1,1)); -} - -#ifndef MRB_NO_FLOAT -/* - * Source code for the "strtod" library procedure. - * - * Copyright (c) 1988-1993 The Regents of the University of California. - * Copyright (c) 1994 Sun Microsystems, Inc. - * - * Permission to use, copy, modify, and distribute this - * software and its documentation for any purpose and without - * fee is hereby granted, provided that the above copyright - * notice appear in all copies. The University of California - * makes no representations about the suitability of this - * software for any purpose. It is provided "as is" without - * express or implied warranty. - * - * RCS: @(#) $Id: strtod.c 11708 2007-02-12 23:01:19Z shyouhei $ - */ - -#include <ctype.h> -#include <errno.h> - -static const int maxExponent = 511; /* Largest possible base 10 exponent. Any - * exponent larger than this will already - * produce underflow or overflow, so there's - * no need to worry about additional digits. - */ -static const double powersOf10[] = {/* Table giving binary powers of 10. Entry */ - 10., /* is 10^2^i. Used to convert decimal */ - 100., /* exponents into floating-point numbers. */ - 1.0e4, - 1.0e8, - 1.0e16, - 1.0e32, - 1.0e64, - 1.0e128, - 1.0e256 -}; - -MRB_API double -mrb_float_read(const char *string, char **endPtr) -/* const char *string; A decimal ASCII floating-point number, - * optionally preceded by white space. - * Must have form "-I.FE-X", where I is the - * integer part of the mantissa, F is the - * fractional part of the mantissa, and X - * is the exponent. Either of the signs - * may be "+", "-", or omitted. Either I - * or F may be omitted, or both. The decimal - * point isn't necessary unless F is present. - * The "E" may actually be an "e". E and X - * may both be omitted (but not just one). - */ -/* char **endPtr; If non-NULL, store terminating character's - * address here. */ -{ - int sign, expSign = FALSE; - double fraction, dblExp; - const double *d; - const char *p; - int c; - int exp = 0; /* Exponent read from "EX" field. */ - int fracExp = 0; /* Exponent that derives from the fractional - * part. Under normal circumstances, it is - * the negative of the number of digits in F. - * However, if I is very long, the last digits - * of I get dropped (otherwise a long I with a - * large negative exponent could cause an - * unnecessary overflow on I alone). In this - * case, fracExp is incremented one for each - * dropped digit. */ - int mantSize; /* Number of digits in mantissa. */ - int decPt; /* Number of mantissa digits BEFORE decimal - * point. */ - const char *pExp; /* Temporarily holds location of exponent - * in string. */ - - /* - * Strip off leading blanks and check for a sign. - */ - - p = string; - while (ISSPACE(*p)) { - p += 1; - } - if (*p == '-') { - sign = TRUE; - p += 1; - } - else { - if (*p == '+') { - p += 1; - } - sign = FALSE; - } - - /* - * Count the number of digits in the mantissa (including the decimal - * point), and also locate the decimal point. - */ - - decPt = -1; - for (mantSize = 0; ; mantSize += 1) - { - c = *p; - if (!ISDIGIT(c)) { - if ((c != '.') || (decPt >= 0)) { - break; - } - decPt = mantSize; - } - p += 1; - } - - /* - * Now suck up the digits in the mantissa. Use two integers to - * collect 9 digits each (this is faster than using floating-point). - * If the mantissa has more than 18 digits, ignore the extras, since - * they can't affect the value anyway. - */ - - pExp = p; - p -= mantSize; - if (decPt < 0) { - decPt = mantSize; - } - else { - mantSize -= 1; /* One of the digits was the point. */ - } - if (mantSize > 18) { - if (decPt - 18 > 29999) { - fracExp = 29999; - } - else { - fracExp = decPt - 18; - } - mantSize = 18; - } - else { - fracExp = decPt - mantSize; - } - if (mantSize == 0) { - fraction = 0.0; - p = string; - goto done; - } - else { - int frac1, frac2; - frac1 = 0; - for ( ; mantSize > 9; mantSize -= 1) - { - c = *p; - p += 1; - if (c == '.') { - c = *p; - p += 1; - } - frac1 = 10*frac1 + (c - '0'); - } - frac2 = 0; - for (; mantSize > 0; mantSize -= 1) - { - c = *p; - p += 1; - if (c == '.') { - c = *p; - p += 1; - } - frac2 = 10*frac2 + (c - '0'); - } - fraction = (1.0e9 * frac1) + frac2; - } - - /* - * Skim off the exponent. - */ - p = pExp; - if ((*p == 'E') || (*p == 'e')) { - p += 1; - if (*p == '-') { - expSign = TRUE; - p += 1; - } - else { - if (*p == '+') { - p += 1; - } - expSign = FALSE; - } - while (ISDIGIT(*p)) { - exp = exp * 10 + (*p - '0'); - if (exp > 19999) { - exp = 19999; - } - p += 1; - } - } - if (expSign) { - exp = fracExp - exp; - } - else { - exp = fracExp + exp; - } - - /* - * Generate a floating-point number that represents the exponent. - * Do this by processing the exponent one bit at a time to combine - * many powers of 2 of 10. Then combine the exponent with the - * fraction. - */ - - if (exp < 0) { - expSign = TRUE; - exp = -exp; - } - else { - expSign = FALSE; - } - if (exp > maxExponent) { - exp = maxExponent; - errno = ERANGE; - } - dblExp = 1.0; - for (d = powersOf10; exp != 0; exp >>= 1, d += 1) { - if (exp & 01) { - dblExp *= *d; - } - } - if (expSign) { - fraction /= dblExp; - } - else { - fraction *= dblExp; - } - -done: - if (endPtr != NULL) { - *endPtr = (char *) p; - } - - if (sign) { - return -fraction; - } - return fraction; + mrb_define_method(mrb, s, "__sub_replace", sub_replace, MRB_ARGS_REQ(3)); /* internal */ } -#endif |
