summaryrefslogtreecommitdiffhomepage
path: root/src/string.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/string.c')
-rw-r--r--src/string.c1384
1 files changed, 791 insertions, 593 deletions
diff --git a/src/string.c b/src/string.c
index 6570c89fb..6664eabd6 100644
--- a/src/string.c
+++ b/src/string.c
@@ -4,20 +4,21 @@
** See Copyright Notice in mruby.h
*/
-#include <ctype.h>
+#ifdef _MSC_VER
+# define _CRT_NONSTDC_NO_DEPRECATE
+#endif
+
#include <float.h>
#include <limits.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
-#include "mruby.h"
-#include "mruby/array.h"
-#include "mruby/class.h"
-#include "mruby/range.h"
-#include "mruby/string.h"
-#include "mruby/re.h"
-
-const char mrb_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz";
+#include <mruby.h>
+#include <mruby/array.h>
+#include <mruby/class.h>
+#include <mruby/range.h>
+#include <mruby/string.h>
+#include <mruby/re.h>
typedef struct mrb_shared_string {
mrb_bool nofree : 1;
@@ -26,129 +27,39 @@ typedef struct mrb_shared_string {
mrb_int len;
} mrb_shared_string;
-static mrb_value str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2);
-static mrb_value mrb_str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len);
-
-mrb_int
-mrb_str_strlen(mrb_state *mrb, struct RString *s)
-{
- mrb_int i, max = RSTR_LEN(s);
- char *p = RSTR_PTR(s);
-
- if (!p) return 0;
- for (i=0; i<max; i++) {
- if (p[i] == '\0') {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte");
- }
- }
- return max;
-}
-
-static inline void
-resize_capa(mrb_state *mrb, struct RString *s, mrb_int capacity)
-{
- if (RSTR_EMBED_P(s)) {
- if (RSTRING_EMBED_LEN_MAX < capacity) {
- char *const tmp = (char *)mrb_malloc(mrb, capacity+1);
- const mrb_int len = RSTR_EMBED_LEN(s);
- memcpy(tmp, s->as.ary, len);
- RSTR_UNSET_EMBED_FLAG(s);
- s->as.heap.ptr = tmp;
- s->as.heap.len = len;
- s->as.heap.aux.capa = capacity;
- }
- }
- else {
- s->as.heap.ptr = (char *)mrb_realloc(mrb, RSTR_PTR(s), capacity+1);
- s->as.heap.aux.capa = capacity;
- }
-}
+const char mrb_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz";
-static void
-str_decref(mrb_state *mrb, mrb_shared_string *shared)
-{
- shared->refcnt--;
- if (shared->refcnt == 0) {
- if (!shared->nofree) {
- mrb_free(mrb, shared->ptr);
- }
- mrb_free(mrb, shared);
- }
-}
+#define mrb_obj_alloc_string(mrb) ((struct RString*)mrb_obj_alloc((mrb), MRB_TT_STRING, (mrb)->string_class))
-void
-mrb_str_modify(mrb_state *mrb, struct RString *s)
+static struct RString*
+str_new_static(mrb_state *mrb, const char *p, size_t len)
{
- if (RSTR_SHARED_P(s)) {
- mrb_shared_string *shared = s->as.heap.aux.shared;
-
- if (shared->refcnt == 1 && s->as.heap.ptr == shared->ptr) {
- s->as.heap.ptr = shared->ptr;
- s->as.heap.aux.capa = shared->len;
- RSTR_PTR(s)[s->as.heap.len] = '\0';
- mrb_free(mrb, shared);
- }
- else {
- char *ptr, *p;
- mrb_int len;
-
- p = RSTR_PTR(s);
- len = s->as.heap.len;
- ptr = (char *)mrb_malloc(mrb, (size_t)len + 1);
- if (p) {
- memcpy(ptr, p, len);
- }
- ptr[len] = '\0';
- s->as.heap.ptr = ptr;
- s->as.heap.aux.capa = len;
- str_decref(mrb, shared);
- }
- RSTR_UNSET_SHARED_FLAG(s);
- return;
- }
- if (RSTR_NOFREE_P(s)) {
- char *p = s->as.heap.ptr;
+ struct RString *s;
- s->as.heap.ptr = (char *)mrb_malloc(mrb, (size_t)s->as.heap.len+1);
- if (p) {
- memcpy(RSTR_PTR(s), p, s->as.heap.len);
- }
- RSTR_PTR(s)[s->as.heap.len] = '\0';
- s->as.heap.aux.capa = s->as.heap.len;
- RSTR_UNSET_NOFREE_FLAG(s);
- return;
+ if (len >= MRB_INT_MAX) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big");
}
-}
-
-mrb_value
-mrb_str_resize(mrb_state *mrb, mrb_value str, mrb_int len)
-{
- mrb_int slen;
- struct RString *s = mrb_str_ptr(str);
+ s = mrb_obj_alloc_string(mrb);
+ s->as.heap.len = len;
+ s->as.heap.aux.capa = 0; /* nofree */
+ s->as.heap.ptr = (char *)p;
+ s->flags = MRB_STR_NOFREE;
- mrb_str_modify(mrb, s);
- slen = RSTR_LEN(s);
- if (len != slen) {
- if (slen < len || slen - len > 256) {
- resize_capa(mrb, s, len);
- }
- RSTR_SET_LEN(s, len);
- RSTR_PTR(s)[len] = '\0'; /* sentinel */
- }
- return str;
+ return s;
}
-#define mrb_obj_alloc_string(mrb) ((struct RString*)mrb_obj_alloc((mrb), MRB_TT_STRING, (mrb)->string_class))
-
static struct RString*
str_new(mrb_state *mrb, const char *p, size_t len)
{
struct RString *s;
+ if (p && mrb_ro_data_p(p)) {
+ return str_new_static(mrb, p, len);
+ }
s = mrb_obj_alloc_string(mrb);
if (len < RSTRING_EMBED_LEN_MAX) {
RSTR_SET_EMBED_FLAG(s);
- RSTR_SET_EMBED_LEN(s,len);
+ RSTR_SET_EMBED_LEN(s, len);
if (p) {
memcpy(s->as.ary, p, len);
}
@@ -167,7 +78,7 @@ str_new(mrb_state *mrb, const char *p, size_t len)
return s;
}
-static void
+static inline void
str_with_class(mrb_state *mrb, struct RString *s, mrb_value obj)
{
s->c = mrb_str_ptr(obj)->c;
@@ -186,7 +97,7 @@ mrb_str_new_empty(mrb_state *mrb, mrb_value str)
# define MRB_STR_BUF_MIN_SIZE 128
#endif
-mrb_value
+MRB_API mrb_value
mrb_str_buf_new(mrb_state *mrb, size_t capa)
{
struct RString *s;
@@ -207,6 +118,26 @@ mrb_str_buf_new(mrb_state *mrb, size_t capa)
return mrb_obj_value(s);
}
+static inline void
+resize_capa(mrb_state *mrb, struct RString *s, mrb_int capacity)
+{
+ if (RSTR_EMBED_P(s)) {
+ if (RSTRING_EMBED_LEN_MAX < capacity) {
+ char *const tmp = (char *)mrb_malloc(mrb, capacity+1);
+ const mrb_int len = RSTR_EMBED_LEN(s);
+ memcpy(tmp, s->as.ary, len);
+ RSTR_UNSET_EMBED_FLAG(s);
+ s->as.heap.ptr = tmp;
+ s->as.heap.len = len;
+ s->as.heap.aux.capa = capacity;
+ }
+ }
+ else {
+ s->as.heap.ptr = (char *)mrb_realloc(mrb, RSTR_PTR(s), capacity+1);
+ s->as.heap.aux.capa = capacity;
+ }
+}
+
static void
str_buf_cat(mrb_state *mrb, struct RString *s, const char *ptr, size_t len)
{
@@ -248,7 +179,7 @@ str_buf_cat(mrb_state *mrb, struct RString *s, const char *ptr, size_t len)
RSTR_PTR(s)[total] = '\0'; /* sentinel */
}
-mrb_value
+MRB_API mrb_value
mrb_str_new(mrb_state *mrb, const char *p, size_t len)
{
return mrb_obj_value(str_new(mrb, p, len));
@@ -261,7 +192,7 @@ mrb_str_new(mrb_state *mrb, const char *p, size_t len)
* Returns a new string object containing a copy of <i>str</i>.
*/
-mrb_value
+MRB_API mrb_value
mrb_str_new_cstr(mrb_state *mrb, const char *p)
{
struct RString *s;
@@ -279,20 +210,23 @@ mrb_str_new_cstr(mrb_state *mrb, const char *p)
return mrb_obj_value(s);
}
-mrb_value
+MRB_API mrb_value
mrb_str_new_static(mrb_state *mrb, const char *p, size_t len)
{
- struct RString *s;
+ struct RString *s = str_new_static(mrb, p, len);
+ return mrb_obj_value(s);
+}
- if (len >= MRB_INT_MAX) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big");
+static void
+str_decref(mrb_state *mrb, mrb_shared_string *shared)
+{
+ shared->refcnt--;
+ if (shared->refcnt == 0) {
+ if (!shared->nofree) {
+ mrb_free(mrb, shared->ptr);
+ }
+ mrb_free(mrb, shared);
}
- s = mrb_obj_alloc_string(mrb);
- s->as.heap.len = len;
- s->as.heap.aux.capa = 0; /* nofree */
- s->as.heap.ptr = (char *)p;
- s->flags = MRB_STR_NOFREE;
- return mrb_obj_value(s);
}
void
@@ -306,20 +240,126 @@ mrb_gc_free_str(mrb_state *mrb, struct RString *str)
mrb_free(mrb, str->as.heap.ptr);
}
-char *
-mrb_str_to_cstr(mrb_state *mrb, mrb_value str0)
+#ifdef MRB_UTF8_STRING
+static const char utf8len_codepage[256] =
{
- struct RString *s;
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1,
+};
- if (!mrb_string_p(str0)) {
- mrb_raise(mrb, E_TYPE_ERROR, "expected String");
+static mrb_int
+utf8len(const char* p, const char* e)
+{
+ mrb_int len;
+ mrb_int i;
+
+ len = utf8len_codepage[(unsigned char)*p];
+ if (p + len > e) return 1;
+ for (i = 1; i < len; ++i)
+ if ((p[i] & 0xc0) != 0x80)
+ return 1;
+ return len;
+}
+
+static mrb_int
+utf8_strlen(mrb_value str, mrb_int len)
+{
+ mrb_int total = 0;
+ char* p = RSTRING_PTR(str);
+ char* e = p;
+ e += len < 0 ? RSTRING_LEN(str) : len;
+ while (p<e) {
+ p += utf8len(p, e);
+ total++;
}
+ return total;
+}
- s = str_new(mrb, RSTRING_PTR(str0), RSTRING_LEN(str0));
- if ((strlen(RSTR_PTR(s)) ^ RSTR_LEN(s)) != 0) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte");
+#define RSTRING_CHAR_LEN(s) utf8_strlen(s, -1)
+
+/* map character index to byte offset index */
+static mrb_int
+chars2bytes(mrb_value s, mrb_int off, mrb_int idx)
+{
+ mrb_int i, b, n;
+ const char *p = RSTRING_PTR(s) + off;
+ const char *e = RSTRING_END(s);
+
+ for (b=i=0; p<e && i<idx; i++) {
+ n = utf8len(p, e);
+ b += n;
+ p += n;
}
- return RSTR_PTR(s);
+ return b;
+}
+
+/* map byte offset to character index */
+static mrb_int
+bytes2chars(char *p, mrb_int bi)
+{
+ mrb_int i, b, n;
+
+ for (b=i=0; b<bi; i++) {
+ n = utf8len(p, p+bi);
+ b += n;
+ p += n;
+ }
+ return i;
+}
+
+#else
+#define RSTRING_CHAR_LEN(s) RSTRING_LEN(s)
+#define chars2bytes(p, off, ci) (ci)
+#define bytes2chars(p, bi) (bi)
+#endif
+
+static inline mrb_int
+mrb_memsearch_qs(const unsigned char *xs, mrb_int m, const unsigned char *ys, mrb_int n)
+{
+ const unsigned char *x = xs, *xe = xs + m;
+ const unsigned char *y = ys;
+ int i, qstable[256];
+
+ /* Preprocessing */
+ for (i = 0; i < 256; ++i)
+ qstable[i] = m + 1;
+ for (; x < xe; ++x)
+ qstable[*x] = xe - x;
+ /* Searching */
+ for (; y + m <= ys + n; y += *(qstable + y[m])) {
+ if (*xs == *y && memcmp(xs, y, m) == 0)
+ return y - ys;
+ }
+ return -1;
+}
+
+static mrb_int
+mrb_memsearch(const void *x0, mrb_int m, const void *y0, mrb_int n)
+{
+ const unsigned char *x = (const unsigned char *)x0, *y = (const unsigned char *)y0;
+
+ if (m > n) return -1;
+ else if (m == n) {
+ return memcmp(x0, y0, m) == 0 ? 0 : -1;
+ }
+ else if (m < 1) {
+ return 0;
+ }
+ else if (m == 1) {
+ const unsigned char *ys = y, *ye = ys + n;
+ for (; y < ye; ++y) {
+ if (*x == *y)
+ return y - ys;
+ }
+ return -1;
+ }
+ return mrb_memsearch_qs((const unsigned char *)x0, m, (const unsigned char *)y0, n);
}
static void
@@ -360,18 +400,336 @@ str_make_shared(mrb_state *mrb, struct RString *s)
}
}
-/*
- * call-seq:
- * char* str = String("abcd"), len=strlen("abcd")
- *
- * Returns a new string object containing a copy of <i>str</i>.
- */
-const char*
-mrb_str_body(mrb_value str, int *len_p)
+static mrb_value
+byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
+{
+ struct RString *orig, *s;
+ mrb_shared_string *shared;
+
+ orig = mrb_str_ptr(str);
+ if (RSTR_EMBED_P(orig)) {
+ s = str_new(mrb, orig->as.ary+beg, len);
+ }
+ else {
+ str_make_shared(mrb, orig);
+ shared = orig->as.heap.aux.shared;
+ s = mrb_obj_alloc_string(mrb);
+ s->as.heap.ptr = orig->as.heap.ptr + beg;
+ s->as.heap.len = len;
+ s->as.heap.aux.shared = shared;
+ RSTR_SET_SHARED_FLAG(s);
+ shared->refcnt++;
+ }
+
+ return mrb_obj_value(s);
+}
+#ifdef MRB_UTF8_STRING
+static inline mrb_value
+str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
+{
+ beg = chars2bytes(str, 0, beg);
+ len = chars2bytes(str, beg, len);
+
+ return byte_subseq(mrb, str, beg, len);
+}
+#else
+#define str_subseq(mrb, str, beg, len) byte_subseq(mrb, str, beg, len)
+#endif
+
+static mrb_value
+str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
+{
+ mrb_int clen = RSTRING_CHAR_LEN(str);
+
+ if (len < 0) return mrb_nil_value();
+ if (clen == 0) {
+ len = 0;
+ }
+ else if (beg < 0) {
+ beg = clen + beg;
+ }
+ if (beg > clen) return mrb_nil_value();
+ if (beg < 0) {
+ beg += clen;
+ if (beg < 0) return mrb_nil_value();
+ }
+ if (beg + len > clen)
+ len = clen - beg;
+ if (len <= 0) {
+ len = 0;
+ }
+ return str_subseq(mrb, str, beg, len);
+}
+
+static mrb_int
+str_index(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int offset)
+{
+ mrb_int pos;
+ char *s, *sptr;
+ mrb_int len, slen;
+
+ len = RSTRING_LEN(str);
+ slen = RSTRING_LEN(sub);
+ if (offset < 0) {
+ offset += len;
+ if (offset < 0) return -1;
+ }
+ if (len - offset < slen) return -1;
+ s = RSTRING_PTR(str);
+ if (offset) {
+ s += offset;
+ }
+ if (slen == 0) return offset;
+ /* need proceed one character at a time */
+ sptr = RSTRING_PTR(sub);
+ slen = RSTRING_LEN(sub);
+ len = RSTRING_LEN(str) - offset;
+ pos = mrb_memsearch(sptr, slen, s, len);
+ if (pos < 0) return pos;
+ return pos + offset;
+}
+
+static void
+check_frozen(mrb_state *mrb, struct RString *s)
+{
+ if (RSTR_FROZEN_P(s)) {
+ mrb_raise(mrb, E_RUNTIME_ERROR, "can't modify frozen string");
+ }
+}
+
+static mrb_value
+str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2)
+{
+ long len;
+
+ check_frozen(mrb, s1);
+ len = RSTR_LEN(s2);
+ if (RSTR_SHARED_P(s1)) {
+ str_decref(mrb, s1->as.heap.aux.shared);
+ }
+ else if (!RSTR_EMBED_P(s1) && !RSTR_NOFREE_P(s1)) {
+ mrb_free(mrb, s1->as.heap.ptr);
+ }
+
+ RSTR_UNSET_NOFREE_FLAG(s1);
+
+ if (RSTR_SHARED_P(s2)) {
+L_SHARE:
+ RSTR_UNSET_EMBED_FLAG(s1);
+ s1->as.heap.ptr = s2->as.heap.ptr;
+ s1->as.heap.len = len;
+ s1->as.heap.aux.shared = s2->as.heap.aux.shared;
+ RSTR_SET_SHARED_FLAG(s1);
+ s1->as.heap.aux.shared->refcnt++;
+ }
+ else {
+ if (len <= RSTRING_EMBED_LEN_MAX) {
+ RSTR_UNSET_SHARED_FLAG(s1);
+ RSTR_SET_EMBED_FLAG(s1);
+ memcpy(s1->as.ary, RSTR_PTR(s2), len);
+ RSTR_SET_EMBED_LEN(s1, len);
+ }
+ else {
+ str_make_shared(mrb, s2);
+ goto L_SHARE;
+ }
+ }
+
+ return mrb_obj_value(s1);
+}
+
+static mrb_int
+str_rindex(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos)
+{
+ char *s, *sbeg, *t;
+ struct RString *ps = mrb_str_ptr(str);
+ mrb_int len = RSTRING_LEN(sub);
+
+ /* substring longer than string */
+ if (RSTR_LEN(ps) < len) return -1;
+ if (RSTR_LEN(ps) - pos < len) {
+ pos = RSTR_LEN(ps) - len;
+ }
+ sbeg = RSTR_PTR(ps);
+ s = RSTR_PTR(ps) + pos;
+ t = RSTRING_PTR(sub);
+ if (len) {
+ while (sbeg <= s) {
+ if (memcmp(s, t, len) == 0) {
+ return s - RSTR_PTR(ps);
+ }
+ s--;
+ }
+ return -1;
+ }
+ else {
+ return pos;
+ }
+}
+
+MRB_API mrb_int
+mrb_str_strlen(mrb_state *mrb, struct RString *s)
+{
+ mrb_int i, max = RSTR_LEN(s);
+ char *p = RSTR_PTR(s);
+
+ if (!p) return 0;
+ for (i=0; i<max; i++) {
+ if (p[i] == '\0') {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte");
+ }
+ }
+ return max;
+}
+
+#ifdef _WIN32
+#include <windows.h>
+
+char*
+mrb_utf8_from_locale(const char *str, size_t len)
+{
+ wchar_t* wcsp;
+ char* mbsp;
+ size_t mbssize, wcssize;
+
+ if (len == 0)
+ return strdup("");
+ if (len == -1)
+ len = strlen(str);
+ wcssize = MultiByteToWideChar(GetACP(), 0, str, len, NULL, 0);
+ wcsp = (wchar_t*) malloc((wcssize + 1) * sizeof(wchar_t));
+ if (!wcsp)
+ return NULL;
+ wcssize = MultiByteToWideChar(GetACP(), 0, str, len, wcsp, wcssize + 1);
+ wcsp[wcssize] = 0;
+
+ mbssize = WideCharToMultiByte(CP_UTF8, 0, (LPCWSTR) wcsp, -1, NULL, 0, NULL, NULL);
+ mbsp = (char*) malloc((mbssize + 1));
+ if (!mbsp) {
+ free(wcsp);
+ return NULL;
+ }
+ mbssize = WideCharToMultiByte(CP_UTF8, 0, (LPCWSTR) wcsp, -1, mbsp, mbssize, NULL, NULL);
+ mbsp[mbssize] = 0;
+ free(wcsp);
+ return mbsp;
+}
+
+char*
+mrb_locale_from_utf8(const char *utf8, size_t len)
+{
+ wchar_t* wcsp;
+ char* mbsp;
+ size_t mbssize, wcssize;
+
+ if (len == 0)
+ return strdup("");
+ if (len == -1)
+ len = strlen(utf8);
+ wcssize = MultiByteToWideChar(CP_UTF8, 0, utf8, len, NULL, 0);
+ wcsp = (wchar_t*) malloc((wcssize + 1) * sizeof(wchar_t));
+ if (!wcsp)
+ return NULL;
+ wcssize = MultiByteToWideChar(CP_UTF8, 0, utf8, len, wcsp, wcssize + 1);
+ wcsp[wcssize] = 0;
+ mbssize = WideCharToMultiByte(GetACP(), 0, (LPCWSTR) wcsp, -1, NULL, 0, NULL, NULL);
+ mbsp = (char*) malloc((mbssize + 1));
+ if (!mbsp) {
+ free(wcsp);
+ return NULL;
+ }
+ mbssize = WideCharToMultiByte(GetACP(), 0, (LPCWSTR) wcsp, -1, mbsp, mbssize, NULL, NULL);
+ mbsp[mbssize] = 0;
+ free(wcsp);
+ return mbsp;
+}
+#endif
+
+MRB_API void
+mrb_str_modify(mrb_state *mrb, struct RString *s)
+{
+ check_frozen(mrb, s);
+ if (RSTR_SHARED_P(s)) {
+ mrb_shared_string *shared = s->as.heap.aux.shared;
+
+ if (shared->refcnt == 1 && s->as.heap.ptr == shared->ptr) {
+ s->as.heap.ptr = shared->ptr;
+ s->as.heap.aux.capa = shared->len;
+ RSTR_PTR(s)[s->as.heap.len] = '\0';
+ mrb_free(mrb, shared);
+ }
+ else {
+ char *ptr, *p;
+ mrb_int len;
+
+ p = RSTR_PTR(s);
+ len = s->as.heap.len;
+ ptr = (char *)mrb_malloc(mrb, (size_t)len + 1);
+ if (p) {
+ memcpy(ptr, p, len);
+ }
+ ptr[len] = '\0';
+ s->as.heap.ptr = ptr;
+ s->as.heap.aux.capa = len;
+ str_decref(mrb, shared);
+ }
+ RSTR_UNSET_SHARED_FLAG(s);
+ return;
+ }
+ if (RSTR_NOFREE_P(s)) {
+ char *p = s->as.heap.ptr;
+
+ s->as.heap.ptr = (char *)mrb_malloc(mrb, (size_t)s->as.heap.len+1);
+ if (p) {
+ memcpy(RSTR_PTR(s), p, s->as.heap.len);
+ }
+ RSTR_PTR(s)[s->as.heap.len] = '\0';
+ s->as.heap.aux.capa = s->as.heap.len;
+ RSTR_UNSET_NOFREE_FLAG(s);
+ return;
+ }
+}
+
+static mrb_value
+mrb_str_freeze(mrb_state *mrb, mrb_value str)
+{
+ struct RString *s = mrb_str_ptr(str);
+
+ RSTR_SET_FROZEN_FLAG(s);
+ return str;
+}
+
+MRB_API mrb_value
+mrb_str_resize(mrb_state *mrb, mrb_value str, mrb_int len)
{
+ mrb_int slen;
struct RString *s = mrb_str_ptr(str);
- *len_p = RSTR_LEN(s);
+ mrb_str_modify(mrb, s);
+ slen = RSTR_LEN(s);
+ if (len != slen) {
+ if (slen < len || slen - len > 256) {
+ resize_capa(mrb, s, len);
+ }
+ RSTR_SET_LEN(s, len);
+ RSTR_PTR(s)[len] = '\0'; /* sentinel */
+ }
+ return str;
+}
+
+MRB_API char*
+mrb_str_to_cstr(mrb_state *mrb, mrb_value str0)
+{
+ struct RString *s;
+
+ if (!mrb_string_p(str0)) {
+ mrb_raise(mrb, E_TYPE_ERROR, "expected String");
+ }
+
+ s = str_new(mrb, RSTRING_PTR(str0), RSTRING_LEN(str0));
+ if ((strlen(RSTR_PTR(s)) ^ RSTR_LEN(s)) != 0) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte");
+ }
return RSTR_PTR(s);
}
@@ -381,7 +739,7 @@ mrb_str_body(mrb_value str, int *len_p)
*
* Returns a new string object containing a copy of <i>str</i>.
*/
-void
+MRB_API void
mrb_str_concat(mrb_state *mrb, mrb_value self, mrb_value other)
{
struct RString *s1 = mrb_str_ptr(self), *s2;
@@ -408,7 +766,7 @@ mrb_str_concat(mrb_state *mrb, mrb_value self, mrb_value other)
*
* Returns a new string object containing a copy of <i>str</i>.
*/
-mrb_value
+MRB_API mrb_value
mrb_str_plus(mrb_state *mrb, mrb_value a, mrb_value b)
{
struct RString *s = mrb_str_ptr(a);
@@ -439,32 +797,26 @@ mrb_str_plus_m(mrb_state *mrb, mrb_value self)
return mrb_str_plus(mrb, self, str);
}
+/* 15.2.10.5.26 */
+/* 15.2.10.5.33 */
/*
* call-seq:
- * len = strlen(String("abcd"))
+ * "abcd".size => int
*
- * Returns a new string object containing a copy of <i>str</i>.
+ * Returns the length of string.
*/
static mrb_value
-mrb_str_bytesize(mrb_state *mrb, mrb_value self)
+mrb_str_size(mrb_state *mrb, mrb_value self)
{
- struct RString *s = mrb_str_ptr(self);
- return mrb_fixnum_value(RSTR_LEN(s));
+ mrb_int len = RSTRING_CHAR_LEN(self);
+ return mrb_fixnum_value(len);
}
-/* 15.2.10.5.26 */
-/* 15.2.10.5.33 */
-/*
- * call-seq:
- * len = strlen(String("abcd"))
- *
- * Returns a new string object containing a copy of <i>str</i>.
- */
-mrb_value
-mrb_str_size(mrb_state *mrb, mrb_value self)
+static mrb_value
+mrb_str_bytesize(mrb_state *mrb, mrb_value self)
{
- struct RString *s = mrb_str_ptr(self);
- return mrb_fixnum_value(RSTR_LEN(s));
+ mrb_int len = RSTRING_LEN(self);
+ return mrb_fixnum_value(len);
}
/* 15.2.10.5.1 */
@@ -521,7 +873,7 @@ mrb_str_times(mrb_state *mrb, mrb_value self)
* = 0
* < -1
*/
-int
+MRB_API int
mrb_str_cmp(mrb_state *mrb, mrb_value str1, mrb_value str2)
{
mrb_int len;
@@ -607,7 +959,7 @@ str_eql(mrb_state *mrb, const mrb_value str1, const mrb_value str2)
return FALSE;
}
-mrb_bool
+MRB_API mrb_bool
mrb_str_equal(mrb_state *mrb, mrb_value str1, mrb_value str2)
{
if (mrb_immediate_p(str2)) return FALSE;
@@ -643,7 +995,7 @@ mrb_str_equal_m(mrb_state *mrb, mrb_value str1)
return mrb_bool_value(mrb_str_equal(mrb, str1, str2));
}
/* ---------------------------------- */
-mrb_value
+MRB_API mrb_value
mrb_str_to_str(mrb_state *mrb, mrb_value str)
{
mrb_value s;
@@ -658,100 +1010,35 @@ mrb_str_to_str(mrb_state *mrb, mrb_value str)
return str;
}
-char *
+MRB_API const char*
mrb_string_value_ptr(mrb_state *mrb, mrb_value ptr)
{
mrb_value str = mrb_str_to_str(mrb, ptr);
return RSTRING_PTR(str);
}
-static mrb_value
-noregexp(mrb_state *mrb, mrb_value self)
-{
- mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp class not implemented");
- return mrb_nil_value();
-}
-
-static void
-regexp_check(mrb_state *mrb, mrb_value obj)
+MRB_API mrb_int
+mrb_string_value_len(mrb_state *mrb, mrb_value ptr)
{
- if (mrb_regexp_p(mrb, obj)) {
- noregexp(mrb, obj);
- }
-}
-
-static inline mrb_int
-mrb_memsearch_qs(const unsigned char *xs, mrb_int m, const unsigned char *ys, mrb_int n)
-{
- const unsigned char *x = xs, *xe = xs + m;
- const unsigned char *y = ys;
- int i, qstable[256];
-
- /* Preprocessing */
- for (i = 0; i < 256; ++i)
- qstable[i] = m + 1;
- for (; x < xe; ++x)
- qstable[*x] = xe - x;
- /* Searching */
- for (; y + m <= ys + n; y += *(qstable + y[m])) {
- if (*xs == *y && memcmp(xs, y, m) == 0)
- return y - ys;
- }
- return -1;
+ mrb_value str = mrb_str_to_str(mrb, ptr);
+ return RSTRING_LEN(str);
}
-static mrb_int
-mrb_memsearch(const void *x0, mrb_int m, const void *y0, mrb_int n)
+void
+mrb_noregexp(mrb_state *mrb, mrb_value self)
{
- const unsigned char *x = (const unsigned char *)x0, *y = (const unsigned char *)y0;
-
- if (m > n) return -1;
- else if (m == n) {
- return memcmp(x0, y0, m) == 0 ? 0 : -1;
- }
- else if (m < 1) {
- return 0;
- }
- else if (m == 1) {
- const unsigned char *ys = y, *ye = ys + n;
- for (; y < ye; ++y) {
- if (*x == *y)
- return y - ys;
- }
- return -1;
- }
- return mrb_memsearch_qs((const unsigned char *)x0, m, (const unsigned char *)y0, n);
+ mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp class not implemented");
}
-static mrb_int
-mrb_str_index(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int offset)
+void
+mrb_regexp_check(mrb_state *mrb, mrb_value obj)
{
- mrb_int pos;
- char *s, *sptr;
- mrb_int len, slen;
-
- len = RSTRING_LEN(str);
- slen = RSTRING_LEN(sub);
- if (offset < 0) {
- offset += len;
- if (offset < 0) return -1;
- }
- if (len - offset < slen) return -1;
- s = RSTRING_PTR(str);
- if (offset) {
- s += offset;
+ if (mrb_regexp_p(mrb, obj)) {
+ mrb_noregexp(mrb, obj);
}
- if (slen == 0) return offset;
- /* need proceed one character at a time */
- sptr = RSTRING_PTR(sub);
- slen = RSTRING_LEN(sub);
- len = RSTRING_LEN(str) - offset;
- pos = mrb_memsearch(sptr, slen, s, len);
- if (pos < 0) return pos;
- return pos + offset;
}
-mrb_value
+MRB_API mrb_value
mrb_str_dup(mrb_state *mrb, mrb_value str)
{
struct RString *s = mrb_str_ptr(str);
@@ -766,18 +1053,18 @@ mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx)
{
mrb_int idx;
- regexp_check(mrb, indx);
+ mrb_regexp_check(mrb, indx);
switch (mrb_type(indx)) {
case MRB_TT_FIXNUM:
idx = mrb_fixnum(indx);
num_index:
- str = mrb_str_substr(mrb, str, idx, 1);
+ str = str_substr(mrb, str, idx, 1);
if (!mrb_nil_p(str) && RSTRING_LEN(str) == 0) return mrb_nil_value();
return str;
case MRB_TT_STRING:
- if (mrb_str_index(mrb, str, indx, 0) != -1)
+ if (str_index(mrb, str, indx, 0) != -1)
return mrb_str_dup(mrb, indx);
return mrb_nil_value();
@@ -786,15 +1073,20 @@ num_index:
{
mrb_int beg, len;
- len = RSTRING_LEN(str);
+ len = RSTRING_CHAR_LEN(str);
if (mrb_range_beg_len(mrb, indx, &beg, &len, len)) {
- return mrb_str_subseq(mrb, str, beg, len);
+ return str_subseq(mrb, str, beg, len);
}
else {
return mrb_nil_value();
}
}
+ case MRB_TT_FLOAT:
default:
+ indx = mrb_Integer(mrb, indx);
+ if (mrb_nil_p(indx)) {
+ mrb_raise(mrb, E_TYPE_ERROR, "can't convert to Fixnum");
+ }
idx = mrb_fixnum(indx);
goto num_index;
}
@@ -831,6 +1123,7 @@ num_index:
*
* a = "hello there"
* a[1] #=> 101(1.8.7) "e"(1.9.2)
+ * a[1.1] #=> "e"(1.9.2)
* a[1,3] #=> "ell"
* a[1..3] #=> "ell"
* a[-3,2] #=> "er"
@@ -848,8 +1141,8 @@ mrb_str_aref_m(mrb_state *mrb, mrb_value str)
argc = mrb_get_args(mrb, "o|o", &a1, &a2);
if (argc == 2) {
- regexp_check(mrb, a1);
- return mrb_str_substr(mrb, str, mrb_fixnum(a1), mrb_fixnum(a2));
+ mrb_regexp_check(mrb, a1);
+ return str_substr(mrb, str, mrb_fixnum(a1), mrb_fixnum(a2));
}
if (argc != 1) {
mrb_raisef(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%S for 1)", mrb_fixnum_value(argc));
@@ -919,7 +1212,7 @@ mrb_str_capitalize(mrb_state *mrb, mrb_value self)
/* 15.2.10.5.10 */
/*
* call-seq:
- * str.chomp!(separator=$/) => str or nil
+ * str.chomp!(separator="\n") => str or nil
*
* Modifies <i>str</i> in place as described for <code>String#chomp</code>,
* returning <i>str</i>, or <code>nil</code> if no modifications were made.
@@ -993,7 +1286,7 @@ mrb_str_chomp_bang(mrb_state *mrb, mrb_value str)
/* 15.2.10.5.9 */
/*
* call-seq:
- * str.chomp(separator=$/) => new_str
+ * str.chomp(separator="\n") => new_str
*
* Returns a new <code>String</code> with the given record separator removed
* from the end of <i>str</i> (if present). If <code>$/</code> has not been
@@ -1036,7 +1329,18 @@ mrb_str_chop_bang(mrb_state *mrb, mrb_value str)
mrb_str_modify(mrb, s);
if (RSTR_LEN(s) > 0) {
mrb_int len;
+#ifdef MRB_UTF8_STRING
+ const char* t = RSTR_PTR(s), *p = t;
+ const char* e = p + RSTR_LEN(s);
+ while (p<e) {
+ mrb_int clen = utf8len(p, e);
+ if (p + clen>=e) break;
+ p += clen;
+ }
+ len = p - t;
+#else
len = RSTR_LEN(s) - 1;
+#endif
if (RSTR_PTR(s)[len] == '\n') {
if (len > 0 &&
RSTR_PTR(s)[len-1] == '\r') {
@@ -1113,7 +1417,7 @@ mrb_str_downcase_bang(mrb_state *mrb, mrb_value str)
*
* Returns a copy of <i>str</i> with all uppercase letters replaced with their
* lowercase counterparts. The operation is locale insensitive---only
- * characters ``A'' to ``Z'' are affected.
+ * characters 'A' to 'Z' are affected.
*
* "hEllO".downcase #=> "hello"
*/
@@ -1164,47 +1468,10 @@ mrb_str_eql(mrb_state *mrb, mrb_value self)
return mrb_bool_value(eql_p);
}
-static mrb_value
-mrb_str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
-{
- struct RString *orig, *s;
- mrb_shared_string *shared;
-
- orig = mrb_str_ptr(str);
- if (RSTR_EMBED_P(orig)) {
- s = str_new(mrb, orig->as.ary+beg, len);
- } else {
- str_make_shared(mrb, orig);
- shared = orig->as.heap.aux.shared;
- s = mrb_obj_alloc_string(mrb);
- s->as.heap.ptr = orig->as.heap.ptr + beg;
- s->as.heap.len = len;
- s->as.heap.aux.shared = shared;
- RSTR_SET_SHARED_FLAG(s);
- shared->refcnt++;
- }
-
- return mrb_obj_value(s);
-}
-
-mrb_value
+MRB_API mrb_value
mrb_str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
{
- if (len < 0) return mrb_nil_value();
- if (!RSTRING_LEN(str)) {
- len = 0;
- }
- if (beg > RSTRING_LEN(str)) return mrb_nil_value();
- if (beg < 0) {
- beg += RSTRING_LEN(str);
- if (beg < 0) return mrb_nil_value();
- }
- if (beg + len > RSTRING_LEN(str))
- len = RSTRING_LEN(str) - beg;
- if (len <= 0) {
- len = 0;
- }
- return mrb_str_subseq(mrb, str, beg, len);
+ return str_substr(mrb, str, beg, len);
}
mrb_int
@@ -1263,7 +1530,7 @@ mrb_str_include(mrb_state *mrb, mrb_value self)
}
else {
str2 = mrb_str_to_str(mrb, str2);
- i = mrb_str_index(mrb, self, str2, 0);
+ i = str_index(mrb, self, str2, 0);
include_p = (i != -1);
}
@@ -1293,12 +1560,12 @@ mrb_str_include(mrb_state *mrb, mrb_value self)
* "hello".index(/[aeiou]/, -3) #=> 4
*/
static mrb_value
-mrb_str_index_m(mrb_state *mrb, mrb_value str)
+mrb_str_index(mrb_state *mrb, mrb_value str)
{
mrb_value *argv;
mrb_int argc;
mrb_value sub;
- mrb_int pos;
+ mrb_int pos, clen;
mrb_get_args(mrb, "*", &argv, &argc);
if (argc == 2) {
@@ -1312,26 +1579,18 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str)
else
sub = mrb_nil_value();
}
- regexp_check(mrb, sub);
+ mrb_regexp_check(mrb, sub);
+ clen = RSTRING_CHAR_LEN(str);
if (pos < 0) {
- pos += RSTRING_LEN(str);
+ pos += clen;
if (pos < 0) {
return mrb_nil_value();
}
}
+ if (pos >= clen) return mrb_nil_value();
+ pos = chars2bytes(str, 0, pos);
switch (mrb_type(sub)) {
- case MRB_TT_FIXNUM: {
- int c = mrb_fixnum(sub);
- mrb_int len = RSTRING_LEN(str);
- unsigned char *p = (unsigned char*)RSTRING_PTR(str);
-
- for (;pos<len;pos++) {
- if (p[pos] == c) return mrb_fixnum_value(pos);
- }
- return mrb_nil_value();
- }
-
default: {
mrb_value tmp;
@@ -1343,56 +1602,17 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str)
}
/* fall through */
case MRB_TT_STRING:
- pos = mrb_str_index(mrb, str, sub, pos);
+ pos = str_index(mrb, str, sub, pos);
break;
}
if (pos == -1) return mrb_nil_value();
+ pos = bytes2chars(RSTRING_PTR(str), pos);
return mrb_fixnum_value(pos);
}
#define STR_REPLACE_SHARED_MIN 10
-static mrb_value
-str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2)
-{
- long len;
-
- len = RSTR_LEN(s2);
- if (RSTR_SHARED_P(s1)) {
- str_decref(mrb, s1->as.heap.aux.shared);
- }
- else if (!RSTR_EMBED_P(s1) && !RSTR_NOFREE_P(s1)) {
- mrb_free(mrb, s1->as.heap.ptr);
- }
-
- RSTR_UNSET_NOFREE_FLAG(s1);
-
- if (RSTR_SHARED_P(s2)) {
-L_SHARE:
- RSTR_UNSET_EMBED_FLAG(s1);
- s1->as.heap.ptr = s2->as.heap.ptr;
- s1->as.heap.len = len;
- s1->as.heap.aux.shared = s2->as.heap.aux.shared;
- RSTR_SET_SHARED_FLAG(s1);
- s1->as.heap.aux.shared->refcnt++;
- }
- else {
- if (len <= RSTRING_EMBED_LEN_MAX) {
- RSTR_UNSET_SHARED_FLAG(s1);
- RSTR_SET_EMBED_FLAG(s1);
- memcpy(s1->as.ary, RSTR_PTR(s2), len);
- RSTR_SET_EMBED_LEN(s1, len);
- }
- else {
- str_make_shared(mrb, s2);
- goto L_SHARE;
- }
- }
-
- return mrb_obj_value(s1);
-}
-
/* 15.2.10.5.24 */
/* 15.2.10.5.28 */
/*
@@ -1450,13 +1670,13 @@ mrb_str_init(mrb_state *mrb, mrb_value self)
*
* 'cat and dog'.to_sym #=> :"cat and dog"
*/
-mrb_value
+MRB_API mrb_value
mrb_str_intern(mrb_state *mrb, mrb_value self)
{
return mrb_symbol_value(mrb_intern_str(mrb, self));
}
/* ---------------------------------- */
-mrb_value
+MRB_API mrb_value
mrb_obj_as_string(mrb_state *mrb, mrb_value obj)
{
mrb_value str;
@@ -1470,7 +1690,7 @@ mrb_obj_as_string(mrb_state *mrb, mrb_value obj)
return str;
}
-mrb_value
+MRB_API mrb_value
mrb_ptr_to_str(mrb_state *mrb, void *p)
{
struct RString *p_str;
@@ -1500,119 +1720,93 @@ mrb_ptr_to_str(mrb_state *mrb, void *p)
return mrb_obj_value(p_str);
}
-mrb_value
+MRB_API mrb_value
mrb_string_type(mrb_state *mrb, mrb_value str)
{
return mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str");
}
-mrb_value
+MRB_API mrb_value
mrb_check_string_type(mrb_state *mrb, mrb_value str)
{
return mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str");
}
-/* ---------------------------------- */
-/* 15.2.10.5.29 */
+/* 15.2.10.5.30 */
/*
* call-seq:
- * str.reverse => new_str
- *
- * Returns a new string with the characters from <i>str</i> in reverse order.
+ * str.reverse! => str
*
- * "stressed".reverse #=> "desserts"
+ * Reverses <i>str</i> in place.
*/
static mrb_value
-mrb_str_reverse(mrb_state *mrb, mrb_value str)
+mrb_str_reverse_bang(mrb_state *mrb, mrb_value str)
{
- struct RString *s2;
- char *s, *e, *p;
+#ifdef MRB_UTF8_STRING
+ mrb_int utf8_len = RSTRING_CHAR_LEN(str);
+ mrb_int len = RSTRING_LEN(str);
+
+ if (utf8_len == len) goto bytes;
+ if (utf8_len > 1) {
+ char *buf;
+ char *p, *e, *r;
- if (RSTRING_LEN(str) <= 1) return mrb_str_dup(mrb, str);
+ mrb_str_modify(mrb, mrb_str_ptr(str));
+ len = RSTRING_LEN(str);
+ buf = mrb_malloc(mrb, (size_t)len);
+ p = buf;
+ e = buf + len;
- s2 = str_new(mrb, 0, RSTRING_LEN(str));
- str_with_class(mrb, s2, str);
- s = RSTRING_PTR(str); e = RSTRING_END(str) - 1;
- p = RSTR_PTR(s2);
+ memcpy(buf, RSTRING_PTR(str), len);
+ r = RSTRING_PTR(str) + len;
- while (e >= s) {
- *p++ = *e--;
+ while (p<e) {
+ mrb_int clen = utf8len(p, e);
+ r -= clen;
+ memcpy(r, p, clen);
+ p += clen;
+ }
+ mrb_free(mrb, buf);
}
- return mrb_obj_value(s2);
-}
+ return str;
-/* 15.2.10.5.30 */
-/*
- * call-seq:
- * str.reverse! => str
- *
- * Reverses <i>str</i> in place.
- */
-static mrb_value
-mrb_str_reverse_bang(mrb_state *mrb, mrb_value str)
-{
- struct RString *s = mrb_str_ptr(str);
- char *p, *e;
- char c;
+ bytes:
+#endif
+ {
+ struct RString *s = mrb_str_ptr(str);
+ char *p, *e;
+ char c;
- mrb_str_modify(mrb, s);
- if (RSTR_LEN(s) > 1) {
- p = RSTR_PTR(s);
- e = p + RSTR_LEN(s) - 1;
- while (p < e) {
+ mrb_str_modify(mrb, s);
+ if (RSTR_LEN(s) > 1) {
+ p = RSTR_PTR(s);
+ e = p + RSTR_LEN(s) - 1;
+ while (p < e) {
c = *p;
*p++ = *e;
*e-- = c;
+ }
}
+ return str;
}
- return str;
}
+/* ---------------------------------- */
+/* 15.2.10.5.29 */
/*
* call-seq:
- * str.rindex(substring [, fixnum]) => fixnum or nil
- * str.rindex(fixnum [, fixnum]) => fixnum or nil
- * str.rindex(regexp [, fixnum]) => fixnum or nil
+ * str.reverse => new_str
*
- * Returns the index of the last occurrence of the given <i>substring</i>,
- * character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>. Returns
- * <code>nil</code> if not found. If the second parameter is present, it
- * specifies the position in the string to end the search---characters beyond
- * this point will not be considered.
+ * Returns a new string with the characters from <i>str</i> in reverse order.
*
- * "hello".rindex('e') #=> 1
- * "hello".rindex('l') #=> 3
- * "hello".rindex('a') #=> nil
- * "hello".rindex(101) #=> 1
- * "hello".rindex(/[aeiou]/, -2) #=> 1
+ * "stressed".reverse #=> "desserts"
*/
-static mrb_int
-mrb_str_rindex(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos)
+static mrb_value
+mrb_str_reverse(mrb_state *mrb, mrb_value str)
{
- char *s, *sbeg, *t;
- struct RString *ps = mrb_str_ptr(str);
- mrb_int len = RSTRING_LEN(sub);
-
- /* substring longer than string */
- if (RSTR_LEN(ps) < len) return -1;
- if (RSTR_LEN(ps) - pos < len) {
- pos = RSTR_LEN(ps) - len;
- }
- sbeg = RSTR_PTR(ps);
- s = RSTR_PTR(ps) + pos;
- t = RSTRING_PTR(sub);
- if (len) {
- while (sbeg <= s) {
- if (memcmp(s, t, len) == 0) {
- return s - RSTR_PTR(ps);
- }
- s--;
- }
- return -1;
- }
- else {
- return pos;
- }
+ mrb_value str2 = mrb_str_dup(mrb, str);
+ mrb_str_reverse_bang(mrb, str2);
+ return str2;
}
/* 15.2.10.5.31 */
@@ -1635,13 +1829,13 @@ mrb_str_rindex(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos)
* "hello".rindex(/[aeiou]/, -2) #=> 1
*/
static mrb_value
-mrb_str_rindex_m(mrb_state *mrb, mrb_value str)
+mrb_str_rindex(mrb_state *mrb, mrb_value str)
{
mrb_value *argv;
mrb_int argc;
mrb_value sub;
mrb_value vpos;
- mrb_int pos, len = RSTRING_LEN(str);
+ mrb_int pos, len = RSTRING_CHAR_LEN(str);
mrb_get_args(mrb, "*", &argv, &argc);
if (argc == 2) {
@@ -1651,7 +1845,7 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str)
if (pos < 0) {
pos += len;
if (pos < 0) {
- regexp_check(mrb, sub);
+ mrb_regexp_check(mrb, sub);
return mrb_nil_value();
}
}
@@ -1664,20 +1858,11 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str)
else
sub = mrb_nil_value();
}
- regexp_check(mrb, sub);
+ pos = chars2bytes(str, 0, pos);
+ len = chars2bytes(str, pos, len);
+ mrb_regexp_check(mrb, sub);
switch (mrb_type(sub)) {
- case MRB_TT_FIXNUM: {
- int c = mrb_fixnum(sub);
- mrb_int len = RSTRING_LEN(str);
- unsigned char *p = (unsigned char*)RSTRING_PTR(str);
-
- for (pos=len-1;pos>=0;pos--) {
- if (p[pos] == c) return mrb_fixnum_value(pos);
- }
- return mrb_nil_value();
- }
-
default: {
mrb_value tmp;
@@ -1689,8 +1874,11 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str)
}
/* fall through */
case MRB_TT_STRING:
- pos = mrb_str_rindex(mrb, str, sub, pos);
- if (pos >= 0) return mrb_fixnum_value(pos);
+ pos = str_rindex(mrb, str, sub, pos);
+ if (pos >= 0) {
+ pos = bytes2chars(RSTRING_PTR(str), pos);
+ return mrb_fixnum_value(pos);
+ }
break;
} /* end of switch (TYPE(sub)) */
@@ -1701,7 +1889,7 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str)
/*
* call-seq:
- * str.split(pattern=$;, [limit]) => anArray
+ * str.split(pattern="\n", [limit]) => anArray
*
* Divides <i>str</i> into substrings based on a delimiter, returning an array
* of these substrings.
@@ -1717,7 +1905,7 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str)
*
* If <i>pattern</i> is omitted, the value of <code>$;</code> is used. If
* <code>$;</code> is <code>nil</code> (which is the default), <i>str</i> is
- * split on whitespace as if ` ' were specified.
+ * split on whitespace as if ' ' were specified.
*
* If the <i>limit</i> parameter is omitted, trailing null fields are
* suppressed. If <i>limit</i> is a positive number, at most that number of
@@ -1729,10 +1917,8 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str)
* " now's the time".split #=> ["now's", "the", "time"]
* " now's the time".split(' ') #=> ["now's", "the", "time"]
* " now's the time".split(/ /) #=> ["", "now's", "", "the", "time"]
- * "1, 2.34,56, 7".split(%r{,\s*}) #=> ["1", "2.34", "56", "7"]
* "hello".split(//) #=> ["h", "e", "l", "l", "o"]
* "hello".split(//, 3) #=> ["h", "e", "llo"]
- * "hi mom".split(%r{\s*}) #=> ["h", "i", "m", "o", "m"]
*
* "mellow yellow".split("ello") #=> ["m", "w y", "w"]
* "1,2,,3,4,,".split(',') #=> ["1", "2", "", "3", "4"]
@@ -1774,83 +1960,73 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
}
}
else {
- noregexp(mrb, str);
+ mrb_noregexp(mrb, str);
}
}
result = mrb_ary_new(mrb);
beg = 0;
if (split_type == awk) {
- char *ptr = RSTRING_PTR(str);
- char *eptr = RSTRING_END(str);
- char *bptr = ptr;
mrb_bool skip = TRUE;
+ mrb_int idx = 0;
+ mrb_int str_len = RSTRING_LEN(str);
unsigned int c;
+ int ai = mrb_gc_arena_save(mrb);
- end = beg;
- while (ptr < eptr) {
- int ai = mrb_gc_arena_save(mrb);
- c = (unsigned char)*ptr++;
+ idx = end = beg;
+ while (idx < str_len) {
+ c = (unsigned char)RSTRING_PTR(str)[idx++];
if (skip) {
if (ISSPACE(c)) {
- beg = ptr - bptr;
+ beg = idx;
}
else {
- end = ptr - bptr;
+ end = idx;
skip = FALSE;
if (lim_p && lim <= i) break;
}
}
else if (ISSPACE(c)) {
- mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, beg, end-beg));
+ mrb_ary_push(mrb, result, byte_subseq(mrb, str, beg, end-beg));
mrb_gc_arena_restore(mrb, ai);
skip = TRUE;
- beg = ptr - bptr;
+ beg = idx;
if (lim_p) ++i;
}
else {
- end = ptr - bptr;
+ end = idx;
}
}
}
else if (split_type == string) {
- char *ptr = RSTRING_PTR(str); // s->as.ary
- char *temp = ptr;
- char *eptr = RSTRING_END(str);
- mrb_int slen = RSTRING_LEN(spat);
-
- if (slen == 0) {
- int ai = mrb_gc_arena_save(mrb);
- while (ptr < eptr) {
- mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, ptr-temp, 1));
- mrb_gc_arena_restore(mrb, ai);
- ptr++;
- if (lim_p && lim <= ++i) break;
+ mrb_int str_len = RSTRING_LEN(str);
+ mrb_int pat_len = RSTRING_LEN(spat);
+ mrb_int idx = 0;
+ int ai = mrb_gc_arena_save(mrb);
+
+ while (idx < str_len) {
+ if (pat_len > 0) {
+ end = mrb_memsearch(RSTRING_PTR(spat), pat_len, RSTRING_PTR(str)+idx, str_len - idx);
+ if (end < 0) break;
+ } else {
+ end = chars2bytes(str, idx, 1);
}
+ mrb_ary_push(mrb, result, byte_subseq(mrb, str, idx, end));
+ mrb_gc_arena_restore(mrb, ai);
+ idx += end + pat_len;
+ if (lim_p && lim <= ++i) break;
}
- else {
- char *sptr = RSTRING_PTR(spat);
- int ai = mrb_gc_arena_save(mrb);
-
- while (ptr < eptr &&
- (end = mrb_memsearch(sptr, slen, ptr, eptr - ptr)) >= 0) {
- mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, ptr - temp, end));
- mrb_gc_arena_restore(mrb, ai);
- ptr += end + slen;
- if (lim_p && lim <= ++i) break;
- }
- }
- beg = ptr - temp;
+ beg = idx;
}
else {
- noregexp(mrb, str);
+ mrb_noregexp(mrb, str);
}
if (RSTRING_LEN(str) > 0 && (lim_p || RSTRING_LEN(str) > beg || lim < 0)) {
if (RSTRING_LEN(str) == beg) {
tmp = mrb_str_new_empty(mrb, str);
}
else {
- tmp = mrb_str_subseq(mrb, str, beg, RSTRING_LEN(str)-beg);
+ tmp = byte_subseq(mrb, str, beg, RSTRING_LEN(str)-beg);
}
mrb_ary_push(mrb, result, tmp);
}
@@ -1864,44 +2040,39 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
return result;
}
-mrb_value
-mrb_cstr_to_inum(mrb_state *mrb, const char *str, int base, int badcheck)
+MRB_API mrb_value
+mrb_str_len_to_inum(mrb_state *mrb, const char *str, size_t len, int base, int badcheck)
{
- const char *p;
+ const char *p = str;
+ const char *pend = str + len;
char sign = 1;
- int c, uscore;
- unsigned long n = 0;
+ int c;
+ uint64_t n = 0;
mrb_int val;
-#undef ISDIGIT
-#define ISDIGIT(c) ('0' <= (c) && (c) <= '9')
#define conv_digit(c) \
- (!ISASCII(c) ? -1 : \
- isdigit(c) ? ((c) - '0') : \
- islower(c) ? ((c) - 'a' + 10) : \
- isupper(c) ? ((c) - 'A' + 10) : \
+ (ISDIGIT(c) ? ((c) - '0') : \
+ ISLOWER(c) ? ((c) - 'a' + 10) : \
+ ISUPPER(c) ? ((c) - 'A' + 10) : \
-1)
- if (!str) {
+ if (!p) {
if (badcheck) goto bad;
return mrb_fixnum_value(0);
}
- while (ISSPACE(*str)) str++;
+ while (p<pend && ISSPACE(*p))
+ p++;
- if (str[0] == '+') {
- str++;
+ if (p[0] == '+') {
+ p++;
}
- else if (str[0] == '-') {
- str++;
+ else if (p[0] == '-') {
+ p++;
sign = 0;
}
- if (str[0] == '+' || str[0] == '-') {
- if (badcheck) goto bad;
- return mrb_fixnum_value(0);
- }
if (base <= 0) {
- if (str[0] == '0') {
- switch (str[1]) {
+ if (p[0] == '0') {
+ switch (p[1]) {
case 'x': case 'X':
base = 16;
break;
@@ -1916,6 +2087,7 @@ mrb_cstr_to_inum(mrb_state *mrb, const char *str, int base, int badcheck)
break;
default:
base = 8;
+ break;
}
}
else if (base < -1) {
@@ -1927,27 +2099,27 @@ mrb_cstr_to_inum(mrb_state *mrb, const char *str, int base, int badcheck)
}
switch (base) {
case 2:
- if (str[0] == '0' && (str[1] == 'b'||str[1] == 'B')) {
- str += 2;
+ if (p[0] == '0' && (p[1] == 'b'||p[1] == 'B')) {
+ p += 2;
}
break;
case 3:
break;
case 8:
- if (str[0] == '0' && (str[1] == 'o'||str[1] == 'O')) {
- str += 2;
+ if (p[0] == '0' && (p[1] == 'o'||p[1] == 'O')) {
+ p += 2;
}
case 4: case 5: case 6: case 7:
break;
case 10:
- if (str[0] == '0' && (str[1] == 'd'||str[1] == 'D')) {
- str += 2;
+ if (p[0] == '0' && (p[1] == 'd'||p[1] == 'D')) {
+ p += 2;
}
case 9: case 11: case 12: case 13: case 14: case 15:
break;
case 16:
- if (str[0] == '0' && (str[1] == 'x'||str[1] == 'X')) {
- str += 2;
+ if (p[0] == '0' && (p[1] == 'x'||p[1] == 'X')) {
+ p += 2;
}
break;
default:
@@ -1956,96 +2128,109 @@ mrb_cstr_to_inum(mrb_state *mrb, const char *str, int base, int badcheck)
}
break;
} /* end of switch (base) { */
- if (*str == '0') { /* squeeze preceeding 0s */
- uscore = 0;
- while ((c = *++str) == '0' || c == '_') {
+ if (p>=pend) {
+ if (badcheck) goto bad;
+ return mrb_fixnum_value(0);
+ }
+ if (*p == '0') { /* squeeze preceding 0s */
+ p++;
+ while (p<pend) {
+ c = *p++;
if (c == '_') {
- if (++uscore >= 2)
+ if (p<pend && *p == '_') {
+ if (badcheck) goto bad;
break;
+ }
+ continue;
+ }
+ if (c != '0') {
+ p--;
+ break;
}
- else
- uscore = 0;
}
- if (!(c = *str) || ISSPACE(c)) --str;
}
- c = *str;
- c = conv_digit(c);
- if (c < 0 || c >= base) {
+ if (p == pend) {
if (badcheck) goto bad;
return mrb_fixnum_value(0);
}
-
- uscore = 0;
- for (p=str;*p;p++) {
+ for ( ;p<pend;p++) {
if (*p == '_') {
- if (uscore == 0) {
- uscore++;
+ p++;
+ if (p==pend) {
+ if (badcheck) goto bad;
continue;
}
- if (badcheck) goto bad;
- break;
+ if (*p == '_') {
+ if (badcheck) goto bad;
+ break;
+ }
+ }
+ if (badcheck && *p == '\0') {
+ goto nullbyte;
}
- uscore = 0;
c = conv_digit(*p);
if (c < 0 || c >= base) {
- if (badcheck) goto bad;
break;
}
n *= base;
n += c;
+ if (n > (uint64_t)MRB_INT_MAX + (sign ? 0 : 1)) {
+ mrb_raisef(mrb, E_ARGUMENT_ERROR, "string (%S) too big for integer",
+ mrb_str_new(mrb, str, pend-str));
+ }
}
- if (n > MRB_INT_MAX) {
- mrb_raisef(mrb, E_ARGUMENT_ERROR, "string (%S) too big for integer", mrb_str_new_cstr(mrb, str));
- }
- val = n;
+ val = (mrb_int)n;
if (badcheck) {
if (p == str) goto bad; /* no number */
- while (*p && ISSPACE(*p)) p++;
- if (*p) goto bad; /* trailing garbage */
+ while (p<pend && ISSPACE(*p)) p++;
+ if (p<pend) goto bad; /* trailing garbage */
}
return mrb_fixnum_value(sign ? val : -val);
-bad:
- mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid string for number(%S)", mrb_str_new_cstr(mrb, str));
+ nullbyte:
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte");
+ /* not reached */
+ bad:
+ mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid string for number(%S)",
+ mrb_inspect(mrb, mrb_str_new(mrb, str, pend-str)));
/* not reached */
return mrb_fixnum_value(0);
}
-char *
+MRB_API mrb_value
+mrb_cstr_to_inum(mrb_state *mrb, const char *str, int base, int badcheck)
+{
+ return mrb_str_len_to_inum(mrb, str, strlen(str), base, badcheck);
+}
+
+MRB_API const char*
mrb_string_value_cstr(mrb_state *mrb, mrb_value *ptr)
{
- struct RString *ps = mrb_str_ptr(*ptr);
+ mrb_value str = mrb_str_to_str(mrb, *ptr);
+ struct RString *ps = mrb_str_ptr(str);
mrb_int len = mrb_str_strlen(mrb, ps);
char *p = RSTR_PTR(ps);
if (!p || p[len] != '\0') {
+ if (RSTR_FROZEN_P(ps)) {
+ *ptr = str = mrb_str_dup(mrb, str);
+ ps = mrb_str_ptr(str);
+ }
mrb_str_modify(mrb, ps);
return RSTR_PTR(ps);
}
return p;
}
-mrb_value
+MRB_API mrb_value
mrb_str_to_inum(mrb_state *mrb, mrb_value str, mrb_int base, mrb_bool badcheck)
{
- char *s;
+ const char *s;
mrb_int len;
- str = mrb_str_to_str(mrb, str);
- if (badcheck) {
- s = mrb_string_value_cstr(mrb, &str);
- }
- else {
- s = RSTRING_PTR(str);
- }
- if (s) {
- len = RSTRING_LEN(str);
- if (s[len]) { /* no sentinel somehow */
- struct RString *temp_str = str_new(mrb, s, len);
- s = RSTR_PTR(temp_str);
- }
- }
- return mrb_cstr_to_inum(mrb, s, base, badcheck);
+ s = mrb_string_value_ptr(mrb, str);
+ len = RSTRING_LEN(str);
+ return mrb_str_len_to_inum(mrb, s, len, base, badcheck);
}
/* 15.2.10.5.38 */
@@ -2081,16 +2266,14 @@ mrb_str_to_i(mrb_state *mrb, mrb_value self)
return mrb_str_to_inum(mrb, self, base, FALSE);
}
-double
+MRB_API double
mrb_cstr_to_dbl(mrb_state *mrb, const char * p, mrb_bool badcheck)
{
char *end;
+ char buf[DBL_DIG * 4 + 10];
double d;
enum {max_width = 20};
-#define OutOfRange() (((w = end - p) > max_width) ? \
- (w = max_width, ellipsis = "...") : \
- (w = (int)(end - p), ellipsis = ""))
if (!p) return 0.0;
while (ISSPACE(*p)) p++;
@@ -2108,7 +2291,6 @@ bad:
return d;
}
if (*end) {
- char buf[DBL_DIG * 4 + 10];
char *n = buf;
char *e = buf + sizeof(buf) - 1;
char prev = 0;
@@ -2147,7 +2329,7 @@ bad:
return d;
}
-double
+MRB_API double
mrb_str_to_dbl(mrb_state *mrb, mrb_value str, mrb_bool badcheck)
{
char *s;
@@ -2242,7 +2424,7 @@ mrb_str_upcase_bang(mrb_state *mrb, mrb_value str)
*
* Returns a copy of <i>str</i> with all lowercase letters replaced with their
* uppercase counterparts. The operation is locale insensitive---only
- * characters ``a'' to ``z'' are affected.
+ * characters 'a' to 'z' are affected.
*
* "hEllO".upcase #=> "HELLO"
*/
@@ -2377,30 +2559,30 @@ mrb_str_dump(mrb_state *mrb, mrb_value str)
return mrb_obj_value(result);
}
-mrb_value
+MRB_API mrb_value
mrb_str_cat(mrb_state *mrb, mrb_value str, const char *ptr, size_t len)
{
str_buf_cat(mrb, mrb_str_ptr(str), ptr, len);
return str;
}
-mrb_value
+MRB_API mrb_value
mrb_str_cat_cstr(mrb_state *mrb, mrb_value str, const char *ptr)
{
return mrb_str_cat(mrb, str, ptr, strlen(ptr));
}
-mrb_value
+MRB_API mrb_value
mrb_str_cat_str(mrb_state *mrb, mrb_value str, mrb_value str2)
{
return mrb_str_cat(mrb, str, RSTRING_PTR(str2), RSTRING_LEN(str2));
}
-mrb_value
-mrb_str_append(mrb_state *mrb, mrb_value str, mrb_value str2)
+MRB_API mrb_value
+mrb_str_append(mrb_state *mrb, mrb_value str1, mrb_value str2)
{
str2 = mrb_str_to_str(mrb, str2);
- return mrb_str_cat_str(mrb, str, str2);
+ return mrb_str_cat_str(mrb, str1, str2);
}
#define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */
@@ -2426,7 +2608,21 @@ mrb_str_inspect(mrb_state *mrb, mrb_value str)
p = RSTRING_PTR(str); pend = RSTRING_END(str);
for (;p < pend; p++) {
unsigned char c, cc;
+#ifdef MRB_UTF8_STRING
+ mrb_int clen;
+ clen = utf8len(p, pend);
+ if (clen > 1) {
+ mrb_int i;
+
+ for (i=0; i<clen; i++) {
+ buf[i] = p[i];
+ }
+ mrb_str_cat(mrb, result, buf, clen);
+ p += clen-1;
+ continue;
+ }
+#endif
c = *p;
if (c == '"'|| c == '\\' || (c == '#' && IS_EVSTR(p, pend))) {
buf[0] = '\\'; buf[1] = c;
@@ -2500,7 +2696,7 @@ mrb_init_string(mrb_state *mrb)
mrb_static_assert(RSTRING_EMBED_LEN_MAX < (1 << 5), "pointer size too big for embedded string");
- s = mrb->string_class = mrb_define_class(mrb, "String", mrb->object_class); /* 15.2.10 */
+ mrb->string_class = s = mrb_define_class(mrb, "String", mrb->object_class); /* 15.2.10 */
MRB_SET_INSTANCE_TT(s, MRB_TT_STRING);
mrb_define_method(mrb, s, "bytesize", mrb_str_bytesize, MRB_ARGS_NONE());
@@ -2511,7 +2707,7 @@ mrb_init_string(mrb_state *mrb)
mrb_define_method(mrb, s, "*", mrb_str_times, MRB_ARGS_REQ(1)); /* 15.2.10.5.5 */
mrb_define_method(mrb, s, "[]", mrb_str_aref_m, MRB_ARGS_ANY()); /* 15.2.10.5.6 */
mrb_define_method(mrb, s, "capitalize", mrb_str_capitalize, MRB_ARGS_NONE()); /* 15.2.10.5.7 */
- mrb_define_method(mrb, s, "capitalize!", mrb_str_capitalize_bang, MRB_ARGS_REQ(1)); /* 15.2.10.5.8 */
+ mrb_define_method(mrb, s, "capitalize!", mrb_str_capitalize_bang, MRB_ARGS_NONE()); /* 15.2.10.5.8 */
mrb_define_method(mrb, s, "chomp", mrb_str_chomp, MRB_ARGS_ANY()); /* 15.2.10.5.9 */
mrb_define_method(mrb, s, "chomp!", mrb_str_chomp_bang, MRB_ARGS_ANY()); /* 15.2.10.5.10 */
mrb_define_method(mrb, s, "chop", mrb_str_chop, MRB_ARGS_REQ(1)); /* 15.2.10.5.11 */
@@ -2521,9 +2717,9 @@ mrb_init_string(mrb_state *mrb)
mrb_define_method(mrb, s, "empty?", mrb_str_empty_p, MRB_ARGS_NONE()); /* 15.2.10.5.16 */
mrb_define_method(mrb, s, "eql?", mrb_str_eql, MRB_ARGS_REQ(1)); /* 15.2.10.5.17 */
- mrb_define_method(mrb, s, "hash", mrb_str_hash_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.20 */
+ mrb_define_method(mrb, s, "hash", mrb_str_hash_m, MRB_ARGS_NONE()); /* 15.2.10.5.20 */
mrb_define_method(mrb, s, "include?", mrb_str_include, MRB_ARGS_REQ(1)); /* 15.2.10.5.21 */
- mrb_define_method(mrb, s, "index", mrb_str_index_m, MRB_ARGS_ANY()); /* 15.2.10.5.22 */
+ mrb_define_method(mrb, s, "index", mrb_str_index, MRB_ARGS_ANY()); /* 15.2.10.5.22 */
mrb_define_method(mrb, s, "initialize", mrb_str_init, MRB_ARGS_REQ(1)); /* 15.2.10.5.23 */
mrb_define_method(mrb, s, "initialize_copy", mrb_str_replace, MRB_ARGS_REQ(1)); /* 15.2.10.5.24 */
mrb_define_method(mrb, s, "intern", mrb_str_intern, MRB_ARGS_NONE()); /* 15.2.10.5.25 */
@@ -2531,7 +2727,7 @@ mrb_init_string(mrb_state *mrb)
mrb_define_method(mrb, s, "replace", mrb_str_replace, MRB_ARGS_REQ(1)); /* 15.2.10.5.28 */
mrb_define_method(mrb, s, "reverse", mrb_str_reverse, MRB_ARGS_NONE()); /* 15.2.10.5.29 */
mrb_define_method(mrb, s, "reverse!", mrb_str_reverse_bang, MRB_ARGS_NONE()); /* 15.2.10.5.30 */
- mrb_define_method(mrb, s, "rindex", mrb_str_rindex_m, MRB_ARGS_ANY()); /* 15.2.10.5.31 */
+ mrb_define_method(mrb, s, "rindex", mrb_str_rindex, MRB_ARGS_ANY()); /* 15.2.10.5.31 */
mrb_define_method(mrb, s, "size", mrb_str_size, MRB_ARGS_NONE()); /* 15.2.10.5.33 */
mrb_define_method(mrb, s, "slice", mrb_str_aref_m, MRB_ARGS_ANY()); /* 15.2.10.5.34 */
mrb_define_method(mrb, s, "split", mrb_str_split_m, MRB_ARGS_ANY()); /* 15.2.10.5.35 */
@@ -2541,8 +2737,10 @@ mrb_init_string(mrb_state *mrb)
mrb_define_method(mrb, s, "to_s", mrb_str_to_s, MRB_ARGS_NONE()); /* 15.2.10.5.40 */
mrb_define_method(mrb, s, "to_str", mrb_str_to_s, MRB_ARGS_NONE());
mrb_define_method(mrb, s, "to_sym", mrb_str_intern, MRB_ARGS_NONE()); /* 15.2.10.5.41 */
- mrb_define_method(mrb, s, "upcase", mrb_str_upcase, MRB_ARGS_REQ(1)); /* 15.2.10.5.42 */
- mrb_define_method(mrb, s, "upcase!", mrb_str_upcase_bang, MRB_ARGS_REQ(1)); /* 15.2.10.5.43 */
+ mrb_define_method(mrb, s, "upcase", mrb_str_upcase, MRB_ARGS_NONE()); /* 15.2.10.5.42 */
+ mrb_define_method(mrb, s, "upcase!", mrb_str_upcase_bang, MRB_ARGS_NONE()); /* 15.2.10.5.43 */
mrb_define_method(mrb, s, "inspect", mrb_str_inspect, MRB_ARGS_NONE()); /* 15.2.10.5.46(x) */
mrb_define_method(mrb, s, "bytes", mrb_str_bytes, MRB_ARGS_NONE());
+
+ mrb_define_method(mrb, s, "freeze", mrb_str_freeze, MRB_ARGS_NONE());
}