summaryrefslogtreecommitdiffhomepage
path: root/src/string.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/string.c')
-rw-r--r--src/string.c146
1 files changed, 55 insertions, 91 deletions
diff --git a/src/string.c b/src/string.c
index 2668a2c85..f043bfd5a 100644
--- a/src/string.c
+++ b/src/string.c
@@ -20,6 +20,7 @@
#include <mruby/class.h>
#include <mruby/range.h>
#include <mruby/string.h>
+#include <mruby/numeric.h>
#include <mruby/re.h>
typedef struct mrb_shared_string {
@@ -156,13 +157,6 @@ mrb_str_new(mrb_state *mrb, const char *p, size_t len)
return mrb_obj_value(str_new(mrb, p, len));
}
-/*
- * call-seq: (Caution! NULL string)
- * String.new(str="") => new_str
- *
- * Returns a new string object containing a copy of <i>str</i>.
- */
-
MRB_API mrb_value
mrb_str_new_cstr(mrb_state *mrb, const char *p)
{
@@ -238,27 +232,36 @@ utf8len(const char* p, const char* e)
return len;
}
-static mrb_int
-utf8_strlen(mrb_value str, mrb_int len)
+mrb_int
+mrb_utf8_len(const char *str, mrb_int byte_len)
{
mrb_int total = 0;
- char* p = RSTRING_PTR(str);
- char* e = p;
- if (RSTRING(str)->flags & MRB_STR_NO_UTF) {
- return RSTRING_LEN(str);
- }
- e += len < 0 ? RSTRING_LEN(str) : len;
- while (p<e) {
+ const char *p = str;
+ const char *e = p + byte_len;
+
+ while (p < e) {
p += utf8len(p, e);
total++;
}
- if (RSTRING_LEN(str) == total) {
- RSTRING(str)->flags |= MRB_STR_NO_UTF;
- }
return total;
}
-#define RSTRING_CHAR_LEN(s) utf8_strlen(s, -1)
+static mrb_int
+utf8_strlen(mrb_value str)
+{
+ mrb_int byte_len = RSTRING_LEN(str);
+
+ if (RSTRING(str)->flags & MRB_STR_NO_UTF) {
+ return byte_len;
+ }
+ else {
+ mrb_int utf8_len = mrb_utf8_len(RSTRING_PTR(str), byte_len);
+ if (byte_len == utf8_len) RSTRING(str)->flags |= MRB_STR_NO_UTF;
+ return utf8_len;
+ }
+}
+
+#define RSTRING_CHAR_LEN(s) utf8_strlen(s)
/* map character index to byte offset index */
static mrb_int
@@ -490,20 +493,12 @@ str_index_str(mrb_state *mrb, mrb_value str, mrb_value str2, mrb_int offset)
return mrb_str_index(mrb, str, ptr, len, offset);
}
-static void
-check_frozen(mrb_state *mrb, struct RString *s)
-{
- if (MRB_FROZEN_P(s)) {
- mrb_raise(mrb, E_FROZEN_ERROR, "can't modify frozen string");
- }
-}
-
static mrb_value
str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2)
{
mrb_int len;
- check_frozen(mrb, s1);
+ mrb_check_frozen(mrb, s1);
if (s1 == s2) return mrb_obj_value(s1);
s1->flags &= ~MRB_STR_NO_UTF;
s1->flags |= s2->flags&MRB_STR_NO_UTF;
@@ -643,7 +638,7 @@ mrb_locale_from_utf8(const char *utf8, int len)
MRB_API void
mrb_str_modify(mrb_state *mrb, struct RString *s)
{
- check_frozen(mrb, s);
+ mrb_check_frozen(mrb, s);
s->flags &= ~MRB_STR_NO_UTF;
if (RSTR_SHARED_P(s)) {
mrb_shared_string *shared = s->as.heap.aux.shared;
@@ -739,12 +734,6 @@ mrb_str_to_cstr(mrb_state *mrb, mrb_value str0)
return RSTR_PTR(s);
}
-/*
- * call-seq: (Caution! String("abcd") change)
- * String("abcdefg") = String("abcd") + String("efg")
- *
- * Returns a new string object containing a copy of <i>str</i>.
- */
MRB_API void
mrb_str_concat(mrb_state *mrb, mrb_value self, mrb_value other)
{
@@ -752,12 +741,6 @@ mrb_str_concat(mrb_state *mrb, mrb_value self, mrb_value other)
mrb_str_cat_str(mrb, self, other);
}
-/*
- * call-seq: (Caution! String("abcd") remain)
- * String("abcdefg") = String("abcd") + String("efg")
- *
- * Returns a new string object containing a copy of <i>str</i>.
- */
MRB_API mrb_value
mrb_str_plus(mrb_state *mrb, mrb_value a, mrb_value b)
{
@@ -775,10 +758,13 @@ mrb_str_plus(mrb_state *mrb, mrb_value a, mrb_value b)
/* 15.2.10.5.2 */
/*
- * call-seq: (Caution! String("abcd") remain) for stack_argument
- * String("abcdefg") = String("abcd") + String("efg")
+ * call-seq:
+ * str + other_str -> new_str
*
- * Returns a new string object containing a copy of <i>str</i>.
+ * Concatenation---Returns a new <code>String</code> containing
+ * <i>other_str</i> concatenated to <i>str</i>.
+ *
+ * "Hello from " + self.to_s #=> "Hello from main"
*/
static mrb_value
mrb_str_plus_m(mrb_state *mrb, mrb_value self)
@@ -979,13 +965,22 @@ mrb_str_equal_m(mrb_state *mrb, mrb_value str1)
return mrb_bool_value(mrb_str_equal(mrb, str1, str2));
}
/* ---------------------------------- */
+mrb_value mrb_mod_to_s(mrb_state *mrb, mrb_value klass);
+
MRB_API mrb_value
mrb_str_to_str(mrb_state *mrb, mrb_value str)
{
- if (!mrb_string_p(str)) {
+ switch (mrb_type(str)) {
+ case MRB_TT_STRING:
+ return str;
+ case MRB_TT_FIXNUM:
+ return mrb_fixnum_to_str(mrb, str, 10);
+ case MRB_TT_CLASS:
+ case MRB_TT_MODULE:
+ return mrb_mod_to_s(mrb, str);
+ default:
return mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_s");
}
- return str;
}
MRB_API const char*
@@ -1002,20 +997,6 @@ mrb_string_value_len(mrb_state *mrb, mrb_value ptr)
return RSTRING_LEN(ptr);
}
-void
-mrb_noregexp(mrb_state *mrb, mrb_value self)
-{
- mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp class not implemented");
-}
-
-void
-mrb_regexp_check(mrb_state *mrb, mrb_value obj)
-{
- if (mrb_regexp_p(mrb, obj)) {
- mrb_noregexp(mrb, obj);
- }
-}
-
MRB_API mrb_value
mrb_str_dup(mrb_state *mrb, mrb_value str)
{
@@ -1031,7 +1012,6 @@ mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx)
{
mrb_int idx;
- mrb_regexp_check(mrb, indx);
switch (mrb_type(indx)) {
case MRB_TT_FIXNUM:
idx = mrb_fixnum(indx);
@@ -1124,13 +1104,9 @@ mrb_str_aref_m(mrb_state *mrb, mrb_value str)
if (argc == 2) {
mrb_int n1, n2;
- mrb_regexp_check(mrb, a1);
mrb_get_args(mrb, "ii", &n1, &n2);
return str_substr(mrb, str, n1, n2);
}
- if (argc != 1) {
- mrb_raisef(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%S for 1)", mrb_fixnum_value(argc));
- }
return mrb_str_aref(mrb, str, a1);
}
@@ -1554,7 +1530,6 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str)
else
sub = mrb_nil_value();
}
- mrb_regexp_check(mrb, sub);
clen = RSTRING_CHAR_LEN(str);
if (pos < 0) {
pos += clen;
@@ -1587,8 +1562,6 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str)
return mrb_fixnum_value(pos);
}
-#define STR_REPLACE_SHARED_MIN 10
-
/* 15.2.10.5.24 */
/* 15.2.10.5.28 */
/*
@@ -1808,7 +1781,6 @@ mrb_str_rindex(mrb_state *mrb, mrb_value str)
if (pos < 0) {
pos += len;
if (pos < 0) {
- mrb_regexp_check(mrb, sub);
return mrb_nil_value();
}
}
@@ -1822,7 +1794,6 @@ mrb_str_rindex(mrb_state *mrb, mrb_value str)
sub = mrb_nil_value();
}
pos = chars2bytes(str, 0, pos);
- mrb_regexp_check(mrb, sub);
switch (mrb_type(sub)) {
default: {
@@ -1916,16 +1887,11 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
if (argc == 0 || mrb_nil_p(spat)) {
split_type = awk;
}
- else {
- if (mrb_string_p(spat)) {
- split_type = string;
- if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' ') {
- split_type = awk;
- }
- }
- else {
- mrb_noregexp(mrb, str);
- }
+ else if (!mrb_string_p(spat)) {
+ mrb_raise(mrb, E_TYPE_ERROR, "expected String");
+ }
+ else if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' ') {
+ split_type = awk;
}
result = mrb_ary_new(mrb);
@@ -1962,7 +1928,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
}
}
}
- else if (split_type == string) {
+ else { /* split_type == string */
mrb_int str_len = RSTRING_LEN(str);
mrb_int pat_len = RSTRING_LEN(spat);
mrb_int idx = 0;
@@ -1983,9 +1949,6 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
}
beg = idx;
}
- else {
- mrb_noregexp(mrb, str);
- }
if (RSTRING_LEN(str) > 0 && (lim_p || RSTRING_LEN(str) > beg || lim < 0)) {
if (RSTRING_LEN(str) == beg) {
tmp = mrb_str_new_empty(mrb, str);
@@ -2005,7 +1968,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
return result;
}
-MRB_API mrb_value
+static mrb_value
mrb_str_len_to_inum(mrb_state *mrb, const char *str, mrb_int len, mrb_int base, int badcheck)
{
const char *p = str;
@@ -2149,7 +2112,7 @@ mrb_str_len_to_inum(mrb_state *mrb, const char *str, mrb_int len, mrb_int base,
else
#endif
{
- mrb_raisef(mrb, E_ARGUMENT_ERROR, "string (%S) too big for integer",
+ mrb_raisef(mrb, E_RANGE_ERROR, "string (%S) too big for integer",
mrb_str_new(mrb, str, pend-str));
}
}
@@ -2173,7 +2136,7 @@ mrb_str_len_to_inum(mrb_state *mrb, const char *str, mrb_int len, mrb_int base,
}
MRB_API mrb_value
-mrb_cstr_to_inum(mrb_state *mrb, const char *str, int base, int badcheck)
+mrb_cstr_to_inum(mrb_state *mrb, const char *str, mrb_int base, mrb_bool badcheck)
{
return mrb_str_len_to_inum(mrb, str, strlen(str), base, badcheck);
}
@@ -2754,6 +2717,7 @@ mrb_init_string(mrb_state *mrb)
#endif
mrb_define_method(mrb, s, "to_i", mrb_str_to_i, MRB_ARGS_ANY()); /* 15.2.10.5.39 */
mrb_define_method(mrb, s, "to_s", mrb_str_to_s, MRB_ARGS_NONE()); /* 15.2.10.5.40 */
+ mrb_define_method(mrb, s, "to_str", mrb_str_to_s, MRB_ARGS_NONE());
mrb_define_method(mrb, s, "to_sym", mrb_str_intern, MRB_ARGS_NONE()); /* 15.2.10.5.41 */
mrb_define_method(mrb, s, "upcase", mrb_str_upcase, MRB_ARGS_NONE()); /* 15.2.10.5.42 */
mrb_define_method(mrb, s, "upcase!", mrb_str_upcase_bang, MRB_ARGS_NONE()); /* 15.2.10.5.43 */
@@ -2842,7 +2806,7 @@ mrb_float_read(const char *string, char **endPtr)
*/
p = string;
- while (isspace(*p)) {
+ while (ISSPACE(*p)) {
p += 1;
}
if (*p == '-') {
@@ -2865,7 +2829,7 @@ mrb_float_read(const char *string, char **endPtr)
for (mantSize = 0; ; mantSize += 1)
{
c = *p;
- if (!isdigit(c)) {
+ if (!ISDIGIT(c)) {
if ((c != '.') || (decPt >= 0)) {
break;
}
@@ -2950,7 +2914,7 @@ mrb_float_read(const char *string, char **endPtr)
}
expSign = FALSE;
}
- while (isdigit(*p)) {
+ while (ISDIGIT(*p)) {
exp = exp * 10 + (*p - '0');
if (exp > 19999) {
exp = 19999;