summaryrefslogtreecommitdiffhomepage
path: root/src/string.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/string.c')
-rw-r--r--src/string.c2672
1 files changed, 1528 insertions, 1144 deletions
diff --git a/src/string.c b/src/string.c
index 23cd76747..03d9c8a9c 100644
--- a/src/string.c
+++ b/src/string.c
@@ -4,65 +4,209 @@
** See Copyright Notice in mruby.h
*/
-#include <ctype.h>
+#ifdef _MSC_VER
+# define _CRT_NONSTDC_NO_DEPRECATE
+#endif
+
+#ifndef MRB_NO_FLOAT
#include <float.h>
+#include <math.h>
+#endif
#include <limits.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
-#include "mruby.h"
-#include "mruby/array.h"
-#include "mruby/class.h"
-#include "mruby/range.h"
-#include "mruby/string.h"
-#include "mruby/numeric.h"
-#include "mruby/re.h"
-
-const char mrb_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz";
+#include <mruby.h>
+#include <mruby/array.h>
+#include <mruby/class.h>
+#include <mruby/range.h>
+#include <mruby/string.h>
+#include <mruby/numeric.h>
+#include <mruby/presym.h>
typedef struct mrb_shared_string {
- mrb_bool nofree : 1;
int refcnt;
+ mrb_ssize capa;
char *ptr;
- mrb_int len;
} mrb_shared_string;
-static mrb_value str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2);
-static mrb_value mrb_str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len);
+const char mrb_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz";
-MRB_API mrb_int
-mrb_str_strlen(mrb_state *mrb, struct RString *s)
+#define mrb_obj_alloc_string(mrb) MRB_OBJ_ALLOC((mrb), MRB_TT_STRING, (mrb)->string_class)
+
+static struct RString*
+str_init_normal_capa(mrb_state *mrb, struct RString *s,
+ const char *p, size_t len, size_t capa)
+{
+ char *dst = (char *)mrb_malloc(mrb, capa + 1);
+ if (p) memcpy(dst, p, len);
+ dst[len] = '\0';
+ s->as.heap.ptr = dst;
+ s->as.heap.len = (mrb_ssize)len;
+ s->as.heap.aux.capa = (mrb_ssize)capa;
+ RSTR_UNSET_TYPE_FLAG(s);
+ return s;
+}
+
+static struct RString*
+str_init_normal(mrb_state *mrb, struct RString *s, const char *p, size_t len)
{
- mrb_int i, max = RSTR_LEN(s);
- char *p = RSTR_PTR(s);
+ return str_init_normal_capa(mrb, s, p, len, len);
+}
- if (!p) return 0;
- for (i=0; i<max; i++) {
- if (p[i] == '\0') {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte");
- }
+static struct RString*
+str_init_embed(struct RString *s, const char *p, size_t len)
+{
+ if (p) memcpy(RSTR_EMBED_PTR(s), p, len);
+ RSTR_EMBED_PTR(s)[len] = '\0';
+ RSTR_SET_TYPE_FLAG(s, EMBED);
+ RSTR_SET_EMBED_LEN(s, len);
+ return s;
+}
+
+static struct RString*
+str_init_nofree(struct RString *s, const char *p, size_t len)
+{
+ s->as.heap.ptr = (char *)p;
+ s->as.heap.len = (mrb_ssize)len;
+ s->as.heap.aux.capa = 0; /* nofree */
+ RSTR_SET_TYPE_FLAG(s, NOFREE);
+ return s;
+}
+
+static struct RString*
+str_init_shared(mrb_state *mrb, const struct RString *orig, struct RString *s, mrb_shared_string *shared)
+{
+ if (shared) {
+ shared->refcnt++;
}
- return max;
+ else {
+ shared = (mrb_shared_string *)mrb_malloc(mrb, sizeof(mrb_shared_string));
+ shared->refcnt = 1;
+ shared->ptr = orig->as.heap.ptr;
+ shared->capa = orig->as.heap.aux.capa;
+ }
+ s->as.heap.ptr = orig->as.heap.ptr;
+ s->as.heap.len = orig->as.heap.len;
+ s->as.heap.aux.shared = shared;
+ RSTR_SET_TYPE_FLAG(s, SHARED);
+ return s;
}
-static inline void
-resize_capa(mrb_state *mrb, struct RString *s, mrb_int capacity)
+static struct RString*
+str_init_fshared(const struct RString *orig, struct RString *s, struct RString *fshared)
+{
+ s->as.heap.ptr = orig->as.heap.ptr;
+ s->as.heap.len = orig->as.heap.len;
+ s->as.heap.aux.fshared = fshared;
+ RSTR_SET_TYPE_FLAG(s, FSHARED);
+ return s;
+}
+
+static struct RString*
+str_init_modifiable(mrb_state *mrb, struct RString *s, const char *p, size_t len)
+{
+ if (RSTR_EMBEDDABLE_P(len)) {
+ return str_init_embed(s, p, len);
+ }
+ else {
+ return str_init_normal(mrb, s, p, len);
+ }
+}
+
+static struct RString*
+str_new_static(mrb_state *mrb, const char *p, size_t len)
+{
+ if (RSTR_EMBEDDABLE_P(len)) {
+ return str_init_embed(mrb_obj_alloc_string(mrb), p, len);
+ }
+ if (len >= MRB_SSIZE_MAX) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big");
+ }
+ return str_init_nofree(mrb_obj_alloc_string(mrb), p, len);
+}
+
+static struct RString*
+str_new(mrb_state *mrb, const char *p, size_t len)
+{
+ if (RSTR_EMBEDDABLE_P(len)) {
+ return str_init_embed(mrb_obj_alloc_string(mrb), p, len);
+ }
+ if (len >= MRB_SSIZE_MAX) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big");
+ }
+ if (p && mrb_ro_data_p(p)) {
+ return str_init_nofree(mrb_obj_alloc_string(mrb), p, len);
+ }
+ return str_init_normal(mrb, mrb_obj_alloc_string(mrb), p, len);
+}
+
+MRB_API mrb_value
+mrb_str_new_capa(mrb_state *mrb, size_t capa)
+{
+ struct RString *s;
+
+ if (RSTR_EMBEDDABLE_P(capa)) {
+ s = str_init_embed(mrb_obj_alloc_string(mrb), NULL, 0);
+ }
+ else if (capa >= MRB_SSIZE_MAX) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "string capacity size too big");
+ /* not reached */
+ s = NULL;
+ }
+ else {
+ s = str_init_normal_capa(mrb, mrb_obj_alloc_string(mrb), NULL, 0, capa);
+ }
+
+ return mrb_obj_value(s);
+}
+
+static void
+resize_capa(mrb_state *mrb, struct RString *s, size_t capacity)
{
+#if SIZE_MAX > MRB_SSIZE_MAX
+ mrb_assert(capacity < MRB_SSIZE_MAX);
+#endif
if (RSTR_EMBED_P(s)) {
- if (RSTRING_EMBED_LEN_MAX < capacity) {
- char *const tmp = (char *)mrb_malloc(mrb, capacity+1);
- const mrb_int len = RSTR_EMBED_LEN(s);
- memcpy(tmp, s->as.ary, len);
- RSTR_UNSET_EMBED_FLAG(s);
- s->as.heap.ptr = tmp;
- s->as.heap.len = len;
- s->as.heap.aux.capa = capacity;
+ if (!RSTR_EMBEDDABLE_P(capacity)) {
+ str_init_normal_capa(mrb, s, RSTR_EMBED_PTR(s), RSTR_EMBED_LEN(s), capacity);
}
}
else {
- s->as.heap.ptr = (char *)mrb_realloc(mrb, RSTR_PTR(s), capacity+1);
- s->as.heap.aux.capa = capacity;
+ s->as.heap.ptr = (char*)mrb_realloc(mrb, RSTR_PTR(s), capacity+1);
+ s->as.heap.aux.capa = (mrb_ssize)capacity;
+ }
+}
+
+MRB_API mrb_value
+mrb_str_new(mrb_state *mrb, const char *p, size_t len)
+{
+ return mrb_obj_value(str_new(mrb, p, len));
+}
+
+MRB_API mrb_value
+mrb_str_new_cstr(mrb_state *mrb, const char *p)
+{
+ struct RString *s;
+ size_t len;
+
+ if (p) {
+ len = strlen(p);
+ }
+ else {
+ len = 0;
}
+
+ s = str_new(mrb, p, len);
+
+ return mrb_obj_value(s);
+}
+
+MRB_API mrb_value
+mrb_str_new_static(mrb_state *mrb, const char *p, size_t len)
+{
+ struct RString *s = str_new_static(mrb, p, len);
+ return mrb_obj_value(s);
}
static void
@@ -70,341 +214,643 @@ str_decref(mrb_state *mrb, mrb_shared_string *shared)
{
shared->refcnt--;
if (shared->refcnt == 0) {
- if (!shared->nofree) {
- mrb_free(mrb, shared->ptr);
- }
+ mrb_free(mrb, shared->ptr);
mrb_free(mrb, shared);
}
}
-MRB_API void
-mrb_str_modify(mrb_state *mrb, struct RString *s)
+static void
+str_modify_keep_ascii(mrb_state *mrb, struct RString *s)
{
if (RSTR_SHARED_P(s)) {
mrb_shared_string *shared = s->as.heap.aux.shared;
if (shared->refcnt == 1 && s->as.heap.ptr == shared->ptr) {
- s->as.heap.ptr = shared->ptr;
- s->as.heap.aux.capa = shared->len;
- RSTR_PTR(s)[s->as.heap.len] = '\0';
+ s->as.heap.aux.capa = shared->capa;
+ s->as.heap.ptr[s->as.heap.len] = '\0';
+ RSTR_UNSET_SHARED_FLAG(s);
mrb_free(mrb, shared);
}
else {
- char *ptr, *p;
- mrb_int len;
-
- p = RSTR_PTR(s);
- len = s->as.heap.len;
- ptr = (char *)mrb_malloc(mrb, (size_t)len + 1);
- if (p) {
- memcpy(ptr, p, len);
- }
- ptr[len] = '\0';
- s->as.heap.ptr = ptr;
- s->as.heap.aux.capa = len;
+ str_init_modifiable(mrb, s, s->as.heap.ptr, (size_t)s->as.heap.len);
str_decref(mrb, shared);
}
- RSTR_UNSET_SHARED_FLAG(s);
- return;
}
- if (RSTR_NOFREE_P(s)) {
- char *p = s->as.heap.ptr;
+ else if (RSTR_NOFREE_P(s) || RSTR_FSHARED_P(s)) {
+ str_init_modifiable(mrb, s, s->as.heap.ptr, (size_t)s->as.heap.len);
+ }
+}
- s->as.heap.ptr = (char *)mrb_malloc(mrb, (size_t)s->as.heap.len+1);
- if (p) {
- memcpy(RSTR_PTR(s), p, s->as.heap.len);
- }
- RSTR_PTR(s)[s->as.heap.len] = '\0';
- s->as.heap.aux.capa = s->as.heap.len;
- RSTR_UNSET_NOFREE_FLAG(s);
- return;
+static void
+check_null_byte(mrb_state *mrb, mrb_value str)
+{
+ mrb_to_str(mrb, str);
+ if (memchr(RSTRING_PTR(str), '\0', RSTRING_LEN(str))) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte");
}
}
-MRB_API mrb_value
-mrb_str_resize(mrb_state *mrb, mrb_value str, mrb_int len)
+void
+mrb_gc_free_str(mrb_state *mrb, struct RString *str)
+{
+ if (RSTR_EMBED_P(str))
+ /* no code */;
+ else if (RSTR_SHARED_P(str))
+ str_decref(mrb, str->as.heap.aux.shared);
+ else if (!RSTR_NOFREE_P(str) && !RSTR_FSHARED_P(str))
+ mrb_free(mrb, str->as.heap.ptr);
+}
+
+#ifdef MRB_UTF8_STRING
+static const char utf8len_codepage[256] =
+{
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1,
+};
+
+#define utf8_islead(c) ((unsigned char)((c)&0xc0) != 0x80)
+
+mrb_int
+mrb_utf8len(const char* p, const char* e)
+{
+ mrb_int len;
+ mrb_int i;
+
+ if ((unsigned char)*p < 0x80) return 1;
+ len = utf8len_codepage[(unsigned char)*p];
+ if (len == 1) return 1;
+ if (len > e - p) return 1;
+ for (i = 1; i < len; ++i)
+ if (utf8_islead(p[i]))
+ return 1;
+ return len;
+}
+
+mrb_int
+mrb_utf8_strlen(const char *str, mrb_int byte_len)
+{
+ mrb_int len = 0;
+ const char *p = str;
+ const char *e = p + byte_len;
+
+ while (p < e) {
+ p += mrb_utf8len(p, e);
+ len++;
+ }
+ return len;
+}
+
+static mrb_int
+utf8_strlen(mrb_value str)
{
- mrb_int slen;
struct RString *s = mrb_str_ptr(str);
+ mrb_int byte_len = RSTR_LEN(s);
- mrb_str_modify(mrb, s);
- slen = RSTR_LEN(s);
- if (len != slen) {
- if (slen < len || slen - len > 256) {
- resize_capa(mrb, s, len);
- }
- RSTR_SET_LEN(s, len);
- RSTR_PTR(s)[len] = '\0'; /* sentinel */
+ if (RSTR_ASCII_P(s)) {
+ return byte_len;
+ }
+ else {
+ mrb_int utf8_len = mrb_utf8_strlen(RSTR_PTR(s), byte_len);
+ if (byte_len == utf8_len) RSTR_SET_ASCII_FLAG(s);
+ return utf8_len;
}
- return str;
}
-#define mrb_obj_alloc_string(mrb) ((struct RString*)mrb_obj_alloc((mrb), MRB_TT_STRING, (mrb)->string_class))
+#define RSTRING_CHAR_LEN(s) utf8_strlen(s)
-static struct RString*
-str_new_static(mrb_state *mrb, const char *p, size_t len)
+/* map character index to byte offset index */
+static mrb_int
+chars2bytes(mrb_value s, mrb_int off, mrb_int idx)
{
- struct RString *s;
-
- if (len >= MRB_INT_MAX) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big");
+ if (RSTR_ASCII_P(mrb_str_ptr(s))) {
+ return idx;
}
- s = mrb_obj_alloc_string(mrb);
- s->as.heap.len = len;
- s->as.heap.aux.capa = 0; /* nofree */
- s->as.heap.ptr = (char *)p;
- s->flags = MRB_STR_NOFREE;
+ else {
+ mrb_int i, b, n;
+ const char *p = RSTRING_PTR(s) + off;
+ const char *e = RSTRING_END(s);
- return s;
+ for (b=i=0; p<e && i<idx; i++) {
+ n = mrb_utf8len(p, e);
+ b += n;
+ p += n;
+ }
+ return b;
+ }
}
-static struct RString*
-str_new(mrb_state *mrb, const char *p, size_t len)
+/* map byte offset to character index */
+static mrb_int
+bytes2chars(char *p, mrb_int len, mrb_int bi)
{
- struct RString *s;
+ const char *e = p + (size_t)len;
+ const char *pivot = p + bi;
+ mrb_int i;
- if (mrb_ro_data_p(p)) {
- return str_new_static(mrb, p, len);
+ for (i = 0; p < pivot; i ++) {
+ p += mrb_utf8len(p, e);
}
- s = mrb_obj_alloc_string(mrb);
- if (len < RSTRING_EMBED_LEN_MAX) {
- RSTR_SET_EMBED_FLAG(s);
- RSTR_SET_EMBED_LEN(s, len);
- if (p) {
- memcpy(s->as.ary, p, len);
- }
- } else {
- if (len >= MRB_INT_MAX) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big");
+ if (p != pivot) return -1;
+ return i;
+}
+
+static const char *
+char_adjust(const char *beg, const char *end, const char *ptr)
+{
+ if ((ptr > beg || ptr < end) && (*ptr & 0xc0) == 0x80) {
+ const int utf8_adjust_max = 3;
+ const char *p;
+
+ if (ptr - beg > utf8_adjust_max) {
+ beg = ptr - utf8_adjust_max;
}
- s->as.heap.len = len;
- s->as.heap.aux.capa = len;
- s->as.heap.ptr = (char *)mrb_malloc(mrb, len+1);
- if (p) {
- memcpy(s->as.heap.ptr, p, len);
+
+ p = ptr;
+ while (p > beg) {
+ p --;
+ if ((*p & 0xc0) != 0x80) {
+ int clen = mrb_utf8len(p, end);
+ if (clen > ptr - p) return p;
+ break;
+ }
}
}
- RSTR_PTR(s)[len] = '\0';
- return s;
-}
-static inline void
-str_with_class(mrb_state *mrb, struct RString *s, mrb_value obj)
-{
- s->c = mrb_str_ptr(obj)->c;
+ return ptr;
}
-static mrb_value
-mrb_str_new_empty(mrb_state *mrb, mrb_value str)
+static const char *
+char_backtrack(const char *ptr, const char *end)
{
- struct RString *s = str_new(mrb, 0, 0);
+ if (ptr < end) {
+ const int utf8_bytelen_max = 4;
+ const char *p;
- str_with_class(mrb, s, str);
- return mrb_obj_value(s);
-}
+ if (end - ptr > utf8_bytelen_max) {
+ ptr = end - utf8_bytelen_max;
+ }
-#ifndef MRB_STR_BUF_MIN_SIZE
-# define MRB_STR_BUF_MIN_SIZE 128
-#endif
+ p = end;
+ while (p > ptr) {
+ p --;
+ if ((*p & 0xc0) != 0x80) {
+ int clen = utf8len_codepage[(unsigned char)*p];
+ if (clen == end - p) { return p; }
+ break;
+ }
+ }
+ }
-MRB_API mrb_value
-mrb_str_buf_new(mrb_state *mrb, size_t capa)
+ return end - 1;
+}
+
+static mrb_int
+str_index_str_by_char_search(mrb_state *mrb, const char *p, const char *pend, const char *s, const mrb_int slen, mrb_int off)
{
- struct RString *s;
+ /* Based on Quick Search algorithm (Boyer-Moore-Horspool algorithm) */
- s = mrb_obj_alloc_string(mrb);
+ ptrdiff_t qstable[1 << CHAR_BIT];
- if (capa >= MRB_INT_MAX) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "string capacity size too big");
+ /* Preprocessing */
+ {
+ mrb_int i;
+
+ for (i = 0; i < 1 << CHAR_BIT; i ++) {
+ qstable[i] = slen;
+ }
+ for (i = 0; i < slen; i ++) {
+ qstable[(unsigned char)s[i]] = slen - (i + 1);
+ }
}
- if (capa < MRB_STR_BUF_MIN_SIZE) {
- capa = MRB_STR_BUF_MIN_SIZE;
+
+ /* Searching */
+ while (p < pend && pend - p >= slen) {
+ const char *pivot;
+
+ if (memcmp(p, s, slen) == 0) {
+ return off;
+ }
+
+ pivot = p + qstable[(unsigned char)p[slen - 1]];
+ if (pivot >= pend || pivot < p /* overflowed */) { return -1; }
+
+ do {
+ p += mrb_utf8len(p, pend);
+ off ++;
+ } while (p < pivot);
}
- s->as.heap.len = 0;
- s->as.heap.aux.capa = capa;
- s->as.heap.ptr = (char *)mrb_malloc(mrb, capa+1);
- RSTR_PTR(s)[0] = '\0';
- return mrb_obj_value(s);
+ return -1;
}
-static void
-str_buf_cat(mrb_state *mrb, struct RString *s, const char *ptr, size_t len)
+static mrb_int
+str_index_str_by_char(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos)
{
- size_t capa;
- size_t total;
- ptrdiff_t off = -1;
+ const char *p = RSTRING_PTR(str);
+ const char *pend = p + RSTRING_LEN(str);
+ const char *s = RSTRING_PTR(sub);
+ const mrb_int slen = RSTRING_LEN(sub);
+ mrb_int off = pos;
- if (len == 0) return;
- mrb_str_modify(mrb, s);
- if (ptr >= RSTR_PTR(s) && ptr <= RSTR_PTR(s) + (size_t)RSTR_LEN(s)) {
- off = ptr - RSTR_PTR(s);
+ for (; pos > 0; pos --) {
+ if (pend - p < 1) { return -1; }
+ p += mrb_utf8len(p, pend);
}
- if (RSTR_EMBED_P(s))
- capa = RSTRING_EMBED_LEN_MAX;
- else
- capa = s->as.heap.aux.capa;
+ if (slen < 1) { return off; }
- if (RSTR_LEN(s) >= MRB_INT_MAX - (mrb_int)len) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big");
- }
- total = RSTR_LEN(s)+len;
- if (capa <= total) {
- while (total > capa) {
- if (capa + 1 >= MRB_INT_MAX / 2) {
- capa = (total + 4095) / 4096;
- break;
+ return str_index_str_by_char_search(mrb, p, pend, s, slen, off);
+}
+
+#define BYTES_ALIGN_CHECK(pos) if (pos < 0) return mrb_nil_value();
+#else
+#define RSTRING_CHAR_LEN(s) RSTRING_LEN(s)
+#define chars2bytes(p, off, ci) (ci)
+#define bytes2chars(p, end, bi) (bi)
+#define char_adjust(beg, end, ptr) (ptr)
+#define char_backtrack(ptr, end) ((end) - 1)
+#define BYTES_ALIGN_CHECK(pos)
+#define str_index_str_by_char(mrb, str, sub, pos) str_index_str(mrb, str, sub, pos)
+#endif
+
+#ifndef MRB_QS_SHORT_STRING_LENGTH
+#define MRB_QS_SHORT_STRING_LENGTH 2048
+#endif
+
+static inline mrb_int
+mrb_memsearch_qs(const unsigned char *xs, mrb_int m, const unsigned char *ys, mrb_int n)
+{
+ if (n + m < MRB_QS_SHORT_STRING_LENGTH) {
+ const unsigned char *y = ys;
+ const unsigned char *ye = ys+n-m+1;
+
+ for (;;) {
+ y = (const unsigned char*)memchr(y, xs[0], (size_t)(ye-y));
+ if (y == NULL) return -1;
+ if (memcmp(xs, y, m) == 0) {
+ return (mrb_int)(y - ys);
}
- capa = (capa + 1) * 2;
+ y++;
}
- resize_capa(mrb, s, capa);
+ return -1;
}
- if (off != -1) {
- ptr = RSTR_PTR(s) + off;
+ else {
+ const unsigned char *x = xs, *xe = xs + m;
+ const unsigned char *y = ys;
+ int i;
+ ptrdiff_t qstable[256];
+
+ /* Preprocessing */
+ for (i = 0; i < 256; ++i)
+ qstable[i] = m + 1;
+ for (; x < xe; ++x)
+ qstable[*x] = xe - x;
+ /* Searching */
+ for (; y + m <= ys + n; y += *(qstable + y[m])) {
+ if (*xs == *y && memcmp(xs, y, m) == 0)
+ return (mrb_int)(y - ys);
+ }
+ return -1;
}
- memcpy(RSTR_PTR(s) + RSTR_LEN(s), ptr, len);
- mrb_assert_int_fit(size_t, total, mrb_int, MRB_INT_MAX);
- RSTR_SET_LEN(s, total);
- RSTR_PTR(s)[total] = '\0'; /* sentinel */
}
-MRB_API mrb_value
-mrb_str_new(mrb_state *mrb, const char *p, size_t len)
+static mrb_int
+mrb_memsearch(const void *x0, mrb_int m, const void *y0, mrb_int n)
{
- return mrb_obj_value(str_new(mrb, p, len));
-}
+ const unsigned char *x = (const unsigned char *)x0, *y = (const unsigned char *)y0;
-/*
- * call-seq: (Caution! NULL string)
- * String.new(str="") => new_str
- *
- * Returns a new string object containing a copy of <i>str</i>.
- */
+ if (m > n) return -1;
+ else if (m == n) {
+ return memcmp(x0, y0, m) == 0 ? 0 : -1;
+ }
+ else if (m < 1) {
+ return 0;
+ }
+ else if (m == 1) {
+ const unsigned char *ys = (const unsigned char *)memchr(y, *x, n);
-MRB_API mrb_value
-mrb_str_new_cstr(mrb_state *mrb, const char *p)
+ if (ys)
+ return (mrb_int)(ys - y);
+ else
+ return -1;
+ }
+ return mrb_memsearch_qs((const unsigned char *)x0, m, (const unsigned char *)y0, n);
+}
+
+static void
+str_share(mrb_state *mrb, struct RString *orig, struct RString *s)
{
- struct RString *s;
- size_t len;
+ size_t len = (size_t)orig->as.heap.len;
- if (p) {
- len = strlen(p);
+ mrb_assert(!RSTR_EMBED_P(orig));
+ if (RSTR_NOFREE_P(orig)) {
+ str_init_nofree(s, orig->as.heap.ptr, len);
+ }
+ else if (RSTR_SHARED_P(orig)) {
+ str_init_shared(mrb, orig, s, orig->as.heap.aux.shared);
+ }
+ else if (RSTR_FSHARED_P(orig)) {
+ str_init_fshared(orig, s, orig->as.heap.aux.fshared);
}
else {
- len = 0;
+ if (orig->as.heap.aux.capa > orig->as.heap.len) {
+ orig->as.heap.ptr = (char *)mrb_realloc(mrb, orig->as.heap.ptr, len+1);
+ orig->as.heap.aux.capa = (mrb_ssize)len;
+ }
+ str_init_shared(mrb, orig, s, NULL);
+ str_init_shared(mrb, orig, orig, s->as.heap.aux.shared);
}
+}
- s = str_new(mrb, p, len);
+mrb_value
+mrb_str_byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
+{
+ struct RString *orig, *s;
+ orig = mrb_str_ptr(str);
+ s = mrb_obj_alloc_string(mrb);
+ if (RSTR_EMBEDDABLE_P(len)) {
+ str_init_embed(s, RSTR_PTR(orig)+beg, len);
+ }
+ else {
+ str_share(mrb, orig, s);
+ s->as.heap.ptr += (mrb_ssize)beg;
+ s->as.heap.len = (mrb_ssize)len;
+ }
+ RSTR_COPY_ASCII_FLAG(s, orig);
return mrb_obj_value(s);
}
-MRB_API mrb_value
-mrb_str_new_static(mrb_state *mrb, const char *p, size_t len)
+static void
+str_range_to_bytes(mrb_value str, mrb_int *pos, mrb_int *len)
{
- struct RString *s = str_new_static(mrb, p, len);
- return mrb_obj_value(s);
+ *pos = chars2bytes(str, 0, *pos);
+ *len = chars2bytes(str, *pos, *len);
+}
+#ifdef MRB_UTF8_STRING
+static inline mrb_value
+str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
+{
+ str_range_to_bytes(str, &beg, &len);
+ return mrb_str_byte_subseq(mrb, str, beg, len);
}
+#else
+#define str_subseq(mrb, str, beg, len) mrb_str_byte_subseq(mrb, str, beg, len)
+#endif
-void
-mrb_gc_free_str(mrb_state *mrb, struct RString *str)
+mrb_bool
+mrb_str_beg_len(mrb_int str_len, mrb_int *begp, mrb_int *lenp)
{
- if (RSTR_EMBED_P(str))
- /* no code */;
- else if (RSTR_SHARED_P(str))
- str_decref(mrb, str->as.heap.aux.shared);
- else if (!RSTR_NOFREE_P(str))
- mrb_free(mrb, str->as.heap.ptr);
+ if (str_len < *begp || *lenp < 0) return FALSE;
+ if (*begp < 0) {
+ *begp += str_len;
+ if (*begp < 0) return FALSE;
+ }
+ if (*lenp > str_len - *begp)
+ *lenp = str_len - *begp;
+ if (*lenp <= 0) {
+ *lenp = 0;
+ }
+ return TRUE;
}
-MRB_API char*
-mrb_str_to_cstr(mrb_state *mrb, mrb_value str0)
+static mrb_value
+str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
{
- struct RString *s;
+ return mrb_str_beg_len(RSTRING_CHAR_LEN(str), &beg, &len) ?
+ str_subseq(mrb, str, beg, len) : mrb_nil_value();
+}
- if (!mrb_string_p(str0)) {
- mrb_raise(mrb, E_TYPE_ERROR, "expected String");
+MRB_API mrb_int
+mrb_str_index(mrb_state *mrb, mrb_value str, const char *sptr, mrb_int slen, mrb_int offset)
+{
+ mrb_int pos;
+ char *s;
+ mrb_int len;
+
+ len = RSTRING_LEN(str);
+ if (offset < 0) {
+ offset += len;
+ if (offset < 0) return -1;
+ }
+ if (len - offset < slen) return -1;
+ s = RSTRING_PTR(str);
+ if (offset) {
+ s += offset;
+ }
+ if (slen == 0) return offset;
+ /* need proceed one character at a time */
+ len = RSTRING_LEN(str) - offset;
+ pos = mrb_memsearch(sptr, slen, s, len);
+ if (pos < 0) return pos;
+ return pos + offset;
+}
+
+static mrb_int
+str_index_str(mrb_state *mrb, mrb_value str, mrb_value str2, mrb_int offset)
+{
+ const char *ptr;
+ mrb_int len;
+
+ ptr = RSTRING_PTR(str2);
+ len = RSTRING_LEN(str2);
+
+ return mrb_str_index(mrb, str, ptr, len, offset);
+}
+
+static mrb_value
+str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2)
+{
+ size_t len;
+
+ mrb_check_frozen(mrb, s1);
+ if (s1 == s2) return mrb_obj_value(s1);
+ RSTR_COPY_ASCII_FLAG(s1, s2);
+ if (RSTR_SHARED_P(s1)) {
+ str_decref(mrb, s1->as.heap.aux.shared);
+ }
+ else if (!RSTR_EMBED_P(s1) && !RSTR_NOFREE_P(s1) && !RSTR_FSHARED_P(s1)
+ && s1->as.heap.ptr) {
+ mrb_free(mrb, s1->as.heap.ptr);
}
- s = str_new(mrb, RSTRING_PTR(str0), RSTRING_LEN(str0));
- if ((strlen(RSTR_PTR(s)) ^ RSTR_LEN(s)) != 0) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte");
+ len = (size_t)RSTR_LEN(s2);
+ if (RSTR_EMBEDDABLE_P(len)) {
+ str_init_embed(s1, RSTR_PTR(s2), len);
}
- return RSTR_PTR(s);
+ else {
+ str_share(mrb, s2, s1);
+ }
+
+ return mrb_obj_value(s1);
}
-static void
-str_make_shared(mrb_state *mrb, struct RString *s)
+static mrb_int
+str_rindex(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos)
{
- if (!RSTR_SHARED_P(s)) {
- mrb_shared_string *shared = (mrb_shared_string *)mrb_malloc(mrb, sizeof(mrb_shared_string));
+ const char *s, *sbeg, *t;
+ struct RString *ps = mrb_str_ptr(str);
+ mrb_int len = RSTRING_LEN(sub);
- shared->refcnt = 1;
- if (RSTR_EMBED_P(s)) {
- const mrb_int len = RSTR_EMBED_LEN(s);
- char *const tmp = (char *)mrb_malloc(mrb, len+1);
- memcpy(tmp, s->as.ary, len);
- tmp[len] = '\0';
- RSTR_UNSET_EMBED_FLAG(s);
- s->as.heap.ptr = tmp;
- s->as.heap.len = len;
- shared->nofree = FALSE;
- shared->ptr = s->as.heap.ptr;
- }
- else if (RSTR_NOFREE_P(s)) {
- shared->nofree = TRUE;
- shared->ptr = s->as.heap.ptr;
- RSTR_UNSET_NOFREE_FLAG(s);
- }
- else {
- shared->nofree = FALSE;
- if (s->as.heap.aux.capa > s->as.heap.len) {
- s->as.heap.ptr = shared->ptr = (char *)mrb_realloc(mrb, s->as.heap.ptr, s->as.heap.len+1);
- }
- else {
- shared->ptr = s->as.heap.ptr;
+ /* substring longer than string */
+ if (RSTR_LEN(ps) < len) return -1;
+ if (RSTR_LEN(ps) - pos < len) {
+ pos = RSTR_LEN(ps) - len;
+ }
+ sbeg = RSTR_PTR(ps);
+ s = RSTR_PTR(ps) + pos;
+ t = RSTRING_PTR(sub);
+ if (len) {
+ s = char_adjust(sbeg, sbeg + RSTR_LEN(ps), s);
+ while (sbeg <= s) {
+ if (memcmp(s, t, len) == 0) {
+ return (mrb_int)(s - RSTR_PTR(ps));
}
+ s = char_backtrack(sbeg, s);
}
- shared->len = s->as.heap.len;
- s->as.heap.aux.shared = shared;
- RSTR_SET_SHARED_FLAG(s);
+ return -1;
+ }
+ else {
+ return pos;
}
}
-/*
- * call-seq: (Caution! String("abcd") change)
- * String("abcdefg") = String("abcd") + String("efg")
- *
- * Returns a new string object containing a copy of <i>str</i>.
- */
-MRB_API void
-mrb_str_concat(mrb_state *mrb, mrb_value self, mrb_value other)
+MRB_API mrb_int
+mrb_str_strlen(mrb_state *mrb, struct RString *s)
{
- struct RString *s1 = mrb_str_ptr(self), *s2;
- mrb_int len;
+ mrb_int i, max = RSTR_LEN(s);
+ char *p = RSTR_PTR(s);
- mrb_str_modify(mrb, s1);
- if (!mrb_string_p(other)) {
- other = mrb_str_to_str(mrb, other);
+ if (!p) return 0;
+ for (i=0; i<max; i++) {
+ if (p[i] == '\0') {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte");
+ }
}
- s2 = mrb_str_ptr(other);
- len = RSTR_LEN(s1) + RSTR_LEN(s2);
+ return max;
+}
+
+#ifdef _WIN32
+#include <windows.h>
+
+char*
+mrb_utf8_from_locale(const char *str, int len)
+{
+ wchar_t* wcsp;
+ char* mbsp;
+ int mbssize, wcssize;
+
+ if (len == 0)
+ return strdup("");
+ if (len == -1)
+ len = (int)strlen(str);
+ wcssize = MultiByteToWideChar(GetACP(), 0, str, len, NULL, 0);
+ wcsp = (wchar_t*) malloc((wcssize + 1) * sizeof(wchar_t));
+ if (!wcsp)
+ return NULL;
+ wcssize = MultiByteToWideChar(GetACP(), 0, str, len, wcsp, wcssize + 1);
+ wcsp[wcssize] = 0;
+
+ mbssize = WideCharToMultiByte(CP_UTF8, 0, (LPCWSTR) wcsp, -1, NULL, 0, NULL, NULL);
+ mbsp = (char*) malloc((mbssize + 1));
+ if (!mbsp) {
+ free(wcsp);
+ return NULL;
+ }
+ mbssize = WideCharToMultiByte(CP_UTF8, 0, (LPCWSTR) wcsp, -1, mbsp, mbssize, NULL, NULL);
+ mbsp[mbssize] = 0;
+ free(wcsp);
+ return mbsp;
+}
+
+char*
+mrb_locale_from_utf8(const char *utf8, int len)
+{
+ wchar_t* wcsp;
+ char* mbsp;
+ int mbssize, wcssize;
+
+ if (len == 0)
+ return strdup("");
+ if (len == -1)
+ len = (int)strlen(utf8);
+ wcssize = MultiByteToWideChar(CP_UTF8, 0, utf8, len, NULL, 0);
+ wcsp = (wchar_t*) malloc((wcssize + 1) * sizeof(wchar_t));
+ if (!wcsp)
+ return NULL;
+ wcssize = MultiByteToWideChar(CP_UTF8, 0, utf8, len, wcsp, wcssize + 1);
+ wcsp[wcssize] = 0;
+ mbssize = WideCharToMultiByte(GetACP(), 0, (LPCWSTR) wcsp, -1, NULL, 0, NULL, NULL);
+ mbsp = (char*) malloc((mbssize + 1));
+ if (!mbsp) {
+ free(wcsp);
+ return NULL;
+ }
+ mbssize = WideCharToMultiByte(GetACP(), 0, (LPCWSTR) wcsp, -1, mbsp, mbssize, NULL, NULL);
+ mbsp[mbssize] = 0;
+ free(wcsp);
+ return mbsp;
+}
+#endif
+
+MRB_API void
+mrb_str_modify_keep_ascii(mrb_state *mrb, struct RString *s)
+{
+ mrb_check_frozen(mrb, s);
+ str_modify_keep_ascii(mrb, s);
+}
- if (RSTRING_CAPA(self) < len) {
- resize_capa(mrb, s1, len);
+MRB_API void
+mrb_str_modify(mrb_state *mrb, struct RString *s)
+{
+ mrb_str_modify_keep_ascii(mrb, s);
+ RSTR_UNSET_ASCII_FLAG(s);
+}
+
+MRB_API mrb_value
+mrb_str_resize(mrb_state *mrb, mrb_value str, mrb_int len)
+{
+ mrb_int slen;
+ struct RString *s = mrb_str_ptr(str);
+
+ if (len < 0) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "negative (or overflowed) string size");
}
- memcpy(RSTR_PTR(s1)+RSTR_LEN(s1), RSTR_PTR(s2), RSTR_LEN(s2));
- RSTR_SET_LEN(s1, len);
- RSTR_PTR(s1)[len] = '\0';
+ mrb_str_modify(mrb, s);
+ slen = RSTR_LEN(s);
+ if (len != slen) {
+ if (slen < len || slen - len > 256) {
+ resize_capa(mrb, s, len);
+ }
+ RSTR_SET_LEN(s, len);
+ RSTR_PTR(s)[len] = '\0'; /* sentinel */
+ }
+ return str;
+}
+
+MRB_API char*
+mrb_str_to_cstr(mrb_state *mrb, mrb_value str0)
+{
+ struct RString *s;
+
+ check_null_byte(mrb, str0);
+ s = str_new(mrb, RSTRING_PTR(str0), RSTRING_LEN(str0));
+ return RSTR_PTR(s);
+}
+
+MRB_API void
+mrb_str_concat(mrb_state *mrb, mrb_value self, mrb_value other)
+{
+ other = mrb_obj_as_string(mrb, other);
+ mrb_str_cat_str(mrb, self, other);
}
-/*
- * call-seq: (Caution! String("abcd") remain)
- * String("abcdefg") = String("abcd") + String("efg")
- *
- * Returns a new string object containing a copy of <i>str</i>.
- */
MRB_API mrb_value
mrb_str_plus(mrb_state *mrb, mrb_value a, mrb_value b)
{
@@ -422,10 +868,13 @@ mrb_str_plus(mrb_state *mrb, mrb_value a, mrb_value b)
/* 15.2.10.5.2 */
/*
- * call-seq: (Caution! String("abcd") remain) for stack_argument
- * String("abcdefg") = String("abcd") + String("efg")
+ * call-seq:
+ * str + other_str -> new_str
*
- * Returns a new string object containing a copy of <i>str</i>.
+ * Concatenation---Returns a new <code>String</code> containing
+ * <i>other_str</i> concatenated to <i>str</i>.
+ *
+ * "Hello from " + self.to_s #=> "Hello from main"
*/
static mrb_value
mrb_str_plus_m(mrb_state *mrb, mrb_value self)
@@ -440,15 +889,22 @@ mrb_str_plus_m(mrb_state *mrb, mrb_value self)
/* 15.2.10.5.33 */
/*
* call-seq:
- * len = strlen(String("abcd"))
+ * "abcd".size => int
*
* Returns the length of string.
*/
static mrb_value
mrb_str_size(mrb_state *mrb, mrb_value self)
{
- struct RString *s = mrb_str_ptr(self);
- return mrb_fixnum_value(RSTR_LEN(s));
+ mrb_int len = RSTRING_CHAR_LEN(self);
+ return mrb_fixnum_value(len);
+}
+
+static mrb_value
+mrb_str_bytesize(mrb_state *mrb, mrb_value self)
+{
+ mrb_int len = RSTRING_LEN(self);
+ return mrb_fixnum_value(len);
}
/* 15.2.10.5.1 */
@@ -472,13 +928,12 @@ mrb_str_times(mrb_state *mrb, mrb_value self)
if (times < 0) {
mrb_raise(mrb, E_ARGUMENT_ERROR, "negative argument");
}
- if (times && MRB_INT_MAX / times < RSTRING_LEN(self)) {
+ if (times && MRB_SSIZE_MAX / times < RSTRING_LEN(self)) {
mrb_raise(mrb, E_ARGUMENT_ERROR, "argument too big");
}
len = RSTRING_LEN(self)*times;
str2 = str_new(mrb, 0, len);
- str_with_class(mrb, str2, self);
p = RSTR_PTR(str2);
if (len > 0) {
n = RSTRING_LEN(self);
@@ -490,6 +945,7 @@ mrb_str_times(mrb_state *mrb, mrb_value self)
memcpy(p + n, p, len-n);
}
p[RSTR_LEN(str2)] = '\0';
+ RSTR_COPY_ASCII_FLAG(str2, mrb_str_ptr(self));
return mrb_obj_value(str2);
}
@@ -553,26 +1009,11 @@ mrb_str_cmp(mrb_state *mrb, mrb_value str1, mrb_value str2)
static mrb_value
mrb_str_cmp_m(mrb_state *mrb, mrb_value str1)
{
- mrb_value str2;
+ mrb_value str2 = mrb_get_arg1(mrb);
mrb_int result;
- mrb_get_args(mrb, "o", &str2);
if (!mrb_string_p(str2)) {
- if (!mrb_respond_to(mrb, str2, mrb_intern_lit(mrb, "to_s"))) {
- return mrb_nil_value();
- }
- else if (!mrb_respond_to(mrb, str2, mrb_intern_lit(mrb, "<=>"))) {
- return mrb_nil_value();
- }
- else {
- mrb_value tmp = mrb_funcall(mrb, str2, "<=>", 1, str1);
-
- if (mrb_nil_p(tmp)) return mrb_nil_value();
- if (!mrb_fixnum(tmp)) {
- return mrb_funcall(mrb, mrb_fixnum_value(0), "-", 1, tmp);
- }
- result = -mrb_fixnum(tmp);
- }
+ return mrb_nil_value();
}
else {
result = mrb_str_cmp(mrb, str1, str2);
@@ -594,15 +1035,7 @@ str_eql(mrb_state *mrb, const mrb_value str1, const mrb_value str2)
MRB_API mrb_bool
mrb_str_equal(mrb_state *mrb, mrb_value str1, mrb_value str2)
{
- if (mrb_immediate_p(str2)) return FALSE;
- if (!mrb_string_p(str2)) {
- if (mrb_nil_p(str2)) return FALSE;
- if (!mrb_respond_to(mrb, str2, mrb_intern_lit(mrb, "to_str"))) {
- return FALSE;
- }
- str2 = mrb_funcall(mrb, str2, "to_str", 0);
- return mrb_equal(mrb, str2, str1);
- }
+ if (!mrb_string_p(str2)) return FALSE;
return str_eql(mrb, str1, str2);
}
@@ -620,171 +1053,122 @@ mrb_str_equal(mrb_state *mrb, mrb_value str1, mrb_value str2)
static mrb_value
mrb_str_equal_m(mrb_state *mrb, mrb_value str1)
{
- mrb_value str2;
-
- mrb_get_args(mrb, "o", &str2);
+ mrb_value str2 = mrb_get_arg1(mrb);
return mrb_bool_value(mrb_str_equal(mrb, str1, str2));
}
/* ---------------------------------- */
-MRB_API mrb_value
-mrb_str_to_str(mrb_state *mrb, mrb_value str)
-{
- mrb_value s;
-
- if (!mrb_string_p(str)) {
- s = mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str");
- if (mrb_nil_p(s)) {
- s = mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_s");
- }
- return s;
- }
- return str;
-}
+/* obslete: use RSTRING_PTR() */
MRB_API const char*
-mrb_string_value_ptr(mrb_state *mrb, mrb_value ptr)
+mrb_string_value_ptr(mrb_state *mrb, mrb_value str)
{
- mrb_value str = mrb_str_to_str(mrb, ptr);
+ str = mrb_obj_as_string(mrb, str);
return RSTRING_PTR(str);
}
-void
-mrb_noregexp(mrb_state *mrb, mrb_value self)
+/* obslete: use RSTRING_LEN() */
+MRB_API mrb_int
+mrb_string_value_len(mrb_state *mrb, mrb_value ptr)
{
- mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp class not implemented");
+ mrb_to_str(mrb, ptr);
+ return RSTRING_LEN(ptr);
}
-void
-mrb_regexp_check(mrb_state *mrb, mrb_value obj)
+MRB_API mrb_value
+mrb_str_dup(mrb_state *mrb, mrb_value str)
{
- if (mrb_regexp_p(mrb, obj)) {
- mrb_noregexp(mrb, obj);
- }
+ struct RString *s = mrb_str_ptr(str);
+ struct RString *dup = str_new(mrb, 0, 0);
+
+ return str_replace(mrb, dup, s);
}
-static inline mrb_int
-mrb_memsearch_qs(const unsigned char *xs, mrb_int m, const unsigned char *ys, mrb_int n)
-{
- const unsigned char *x = xs, *xe = xs + m;
- const unsigned char *y = ys;
- int i, qstable[256];
+enum str_convert_range {
+ /* `beg` and `len` are byte unit in `0 ... str.bytesize` */
+ STR_BYTE_RANGE_CORRECTED = 1,
- /* Preprocessing */
- for (i = 0; i < 256; ++i)
- qstable[i] = m + 1;
- for (; x < xe; ++x)
- qstable[*x] = xe - x;
- /* Searching */
- for (; y + m <= ys + n; y += *(qstable + y[m])) {
- if (*xs == *y && memcmp(xs, y, m) == 0)
- return y - ys;
- }
- return -1;
-}
+ /* `beg` and `len` are char unit in any range */
+ STR_CHAR_RANGE = 2,
-static mrb_int
-mrb_memsearch(const void *x0, mrb_int m, const void *y0, mrb_int n)
-{
- const unsigned char *x = (const unsigned char *)x0, *y = (const unsigned char *)y0;
+ /* `beg` and `len` are char unit in `0 ... str.size` */
+ STR_CHAR_RANGE_CORRECTED = 3,
- if (m > n) return -1;
- else if (m == n) {
- return memcmp(x0, y0, m) == 0 ? 0 : -1;
- }
- else if (m < 1) {
- return 0;
- }
- else if (m == 1) {
- const unsigned char *ys = y, *ye = ys + n;
- for (; y < ye; ++y) {
- if (*x == *y)
- return y - ys;
- }
- return -1;
- }
- return mrb_memsearch_qs((const unsigned char *)x0, m, (const unsigned char *)y0, n);
-}
+ /* `beg` is out of range */
+ STR_OUT_OF_RANGE = -1
+};
-static mrb_int
-mrb_str_index(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int offset)
+static enum str_convert_range
+str_convert_range(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen, mrb_int *beg, mrb_int *len)
{
- mrb_int pos;
- char *s, *sptr;
- mrb_int len, slen;
-
- len = RSTRING_LEN(str);
- slen = RSTRING_LEN(sub);
- if (offset < 0) {
- offset += len;
- if (offset < 0) return -1;
+ if (!mrb_undef_p(alen)) {
+ *beg = mrb_as_int(mrb, indx);
+ *len = mrb_as_int(mrb, alen);
+ return STR_CHAR_RANGE;
}
- if (len - offset < slen) return -1;
- s = RSTRING_PTR(str);
- if (offset) {
- s += offset;
- }
- if (slen == 0) return offset;
- /* need proceed one character at a time */
- sptr = RSTRING_PTR(sub);
- slen = RSTRING_LEN(sub);
- len = RSTRING_LEN(str) - offset;
- pos = mrb_memsearch(sptr, slen, s, len);
- if (pos < 0) return pos;
- return pos + offset;
-}
+ else {
+ switch (mrb_type(indx)) {
+ case MRB_TT_INTEGER:
+ *beg = mrb_integer(indx);
+ *len = 1;
+ return STR_CHAR_RANGE;
-MRB_API mrb_value
-mrb_str_dup(mrb_state *mrb, mrb_value str)
-{
- struct RString *s = mrb_str_ptr(str);
- struct RString *dup = str_new(mrb, 0, 0);
+ case MRB_TT_STRING:
+ *beg = str_index_str(mrb, str, indx, 0);
+ if (*beg < 0) { break; }
+ *len = RSTRING_LEN(indx);
+ return STR_BYTE_RANGE_CORRECTED;
- str_with_class(mrb, dup, str);
- return str_replace(mrb, dup, s);
+ case MRB_TT_RANGE:
+ goto range_arg;
+
+ default:
+ indx = mrb_to_int(mrb, indx);
+ if (mrb_integer_p(indx)) {
+ *beg = mrb_integer(indx);
+ *len = 1;
+ return STR_CHAR_RANGE;
+ }
+range_arg:
+ *len = RSTRING_CHAR_LEN(str);
+ switch (mrb_range_beg_len(mrb, indx, beg, len, *len, TRUE)) {
+ case MRB_RANGE_OK:
+ return STR_CHAR_RANGE_CORRECTED;
+ case MRB_RANGE_OUT:
+ return STR_OUT_OF_RANGE;
+ default:
+ break;
+ }
+
+ mrb_raise(mrb, E_TYPE_ERROR, "can't convert to Integer");
+ }
+ }
+ return STR_OUT_OF_RANGE;
}
static mrb_value
-mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx)
+mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen)
{
- mrb_int idx;
-
- mrb_regexp_check(mrb, indx);
- switch (mrb_type(indx)) {
- case MRB_TT_FLOAT:
- indx = mrb_flo_to_fixnum(mrb, indx);
- /* fall through */
- case MRB_TT_FIXNUM:
- idx = mrb_fixnum(indx);
+ mrb_int beg, len;
-num_index:
- str = mrb_str_substr(mrb, str, idx, 1);
- if (!mrb_nil_p(str) && RSTRING_LEN(str) == 0) return mrb_nil_value();
+ switch (str_convert_range(mrb, str, indx, alen, &beg, &len)) {
+ case STR_CHAR_RANGE_CORRECTED:
+ return str_subseq(mrb, str, beg, len);
+ case STR_CHAR_RANGE:
+ str = str_substr(mrb, str, beg, len);
+ if (mrb_undef_p(alen) && !mrb_nil_p(str) && RSTRING_LEN(str) == 0) return mrb_nil_value();
return str;
-
- case MRB_TT_STRING:
- if (mrb_str_index(mrb, str, indx, 0) != -1)
+ case STR_BYTE_RANGE_CORRECTED:
+ if (mrb_string_p(indx)) {
return mrb_str_dup(mrb, indx);
- return mrb_nil_value();
-
- case MRB_TT_RANGE:
- /* check if indx is Range */
- {
- mrb_int beg, len;
-
- len = RSTRING_LEN(str);
- if (mrb_range_beg_len(mrb, indx, &beg, &len, len)) {
- return mrb_str_subseq(mrb, str, beg, len);
- }
- else {
- return mrb_nil_value();
- }
}
+ else {
+ return mrb_str_byte_subseq(mrb, str, beg, len);
+ }
+ case STR_OUT_OF_RANGE:
default:
- idx = mrb_fixnum(indx);
- goto num_index;
+ return mrb_nil_value();
}
- return mrb_nil_value(); /* not reached */
}
/* 15.2.10.5.6 */
@@ -794,16 +1178,14 @@ num_index:
* str[fixnum] => fixnum or nil
* str[fixnum, fixnum] => new_str or nil
* str[range] => new_str or nil
- * str[regexp] => new_str or nil
- * str[regexp, fixnum] => new_str or nil
* str[other_str] => new_str or nil
* str.slice(fixnum) => fixnum or nil
* str.slice(fixnum, fixnum) => new_str or nil
* str.slice(range) => new_str or nil
* str.slice(other_str) => new_str or nil
*
- * Element Reference---If passed a single <code>Fixnum</code>, returns the code
- * of the character at that position. If passed two <code>Fixnum</code>
+ * Element Reference---If passed a single <code>Integer</code>, returns the code
+ * of the character at that position. If passed two <code>Integer</code>
* objects, returns a substring starting at the offset given by the first, and
* a length given by the second. If given a range, a substring containing
* characters at offsets given by the range is returned. In all three cases, if
@@ -831,17 +1213,198 @@ static mrb_value
mrb_str_aref_m(mrb_state *mrb, mrb_value str)
{
mrb_value a1, a2;
- int argc;
- argc = mrb_get_args(mrb, "o|o", &a1, &a2);
- if (argc == 2) {
- mrb_regexp_check(mrb, a1);
- return mrb_str_substr(mrb, str, mrb_fixnum(a1), mrb_fixnum(a2));
+ if (mrb_get_args(mrb, "o|o", &a1, &a2) == 1) {
+ a2 = mrb_undef_value();
+ }
+
+ return mrb_str_aref(mrb, str, a1, a2);
+}
+
+static mrb_noreturn void
+str_out_of_index(mrb_state *mrb, mrb_value index)
+{
+ mrb_raisef(mrb, E_INDEX_ERROR, "index %v out of string", index);
+}
+
+static mrb_value
+str_replace_partial(mrb_state *mrb, mrb_value src, mrb_int pos, mrb_int end, mrb_value rep)
+{
+ const mrb_int shrink_threshold = 256;
+ struct RString *str = mrb_str_ptr(src);
+ mrb_int len = RSTR_LEN(str);
+ mrb_int replen, newlen;
+ char *strp;
+
+ if (end > len) { end = len; }
+
+ if (pos < 0 || pos > len) {
+ str_out_of_index(mrb, mrb_fixnum_value(pos));
+ }
+
+ replen = (mrb_nil_p(rep) ? 0 : RSTRING_LEN(rep));
+ newlen = replen + (len - (end - pos));
+
+ if (newlen >= MRB_SSIZE_MAX || newlen < replen /* overflowed */) {
+ mrb_raise(mrb, E_RUNTIME_ERROR, "string size too big");
+ }
+
+ mrb_str_modify(mrb, str);
+
+ if (len < newlen) {
+ resize_capa(mrb, str, newlen);
+ }
+
+ strp = RSTR_PTR(str);
+
+ memmove(strp + newlen - (len - end), strp + end, len - end);
+ if (!mrb_nil_p(rep)) {
+ memmove(strp + pos, RSTRING_PTR(rep), replen);
+ }
+ RSTR_SET_LEN(str, newlen);
+ strp[newlen] = '\0';
+
+ if (len - newlen >= shrink_threshold) {
+ resize_capa(mrb, str, newlen);
+ }
+
+ return src;
+}
+
+#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
+
+static mrb_value
+str_escape(mrb_state *mrb, mrb_value str, mrb_bool inspect)
+{
+ const char *p, *pend;
+ char buf[4]; /* `\x??` or UTF-8 character */
+ mrb_value result = mrb_str_new_lit(mrb, "\"");
+#ifdef MRB_UTF8_STRING
+ uint32_t ascii_flag = MRB_STR_ASCII;
+#endif
+
+ p = RSTRING_PTR(str); pend = RSTRING_END(str);
+ for (;p < pend; p++) {
+ unsigned char c, cc;
+#ifdef MRB_UTF8_STRING
+ if (inspect) {
+ mrb_int clen = mrb_utf8len(p, pend);
+ if (clen > 1) {
+ mrb_int i;
+
+ for (i=0; i<clen; i++) {
+ buf[i] = p[i];
+ }
+ mrb_str_cat(mrb, result, buf, clen);
+ p += clen-1;
+ ascii_flag = 0;
+ continue;
+ }
+ }
+#endif
+ c = *p;
+ if (c == '"'|| c == '\\' || (c == '#' && IS_EVSTR(p+1, pend))) {
+ buf[0] = '\\'; buf[1] = c;
+ mrb_str_cat(mrb, result, buf, 2);
+ continue;
+ }
+ if (ISPRINT(c)) {
+ buf[0] = c;
+ mrb_str_cat(mrb, result, buf, 1);
+ continue;
+ }
+ switch (c) {
+ case '\n': cc = 'n'; break;
+ case '\r': cc = 'r'; break;
+ case '\t': cc = 't'; break;
+ case '\f': cc = 'f'; break;
+ case '\013': cc = 'v'; break;
+ case '\010': cc = 'b'; break;
+ case '\007': cc = 'a'; break;
+ case 033: cc = 'e'; break;
+ default: cc = 0; break;
+ }
+ if (cc) {
+ buf[0] = '\\';
+ buf[1] = (char)cc;
+ mrb_str_cat(mrb, result, buf, 2);
+ continue;
+ }
+ else {
+ buf[0] = '\\';
+ buf[1] = 'x';
+ buf[3] = mrb_digitmap[c % 16]; c /= 16;
+ buf[2] = mrb_digitmap[c % 16];
+ mrb_str_cat(mrb, result, buf, 4);
+ continue;
+ }
+ }
+ mrb_str_cat_lit(mrb, result, "\"");
+#ifdef MRB_UTF8_STRING
+ if (inspect) {
+ mrb_str_ptr(str)->flags |= ascii_flag;
+ mrb_str_ptr(result)->flags |= ascii_flag;
+ }
+ else {
+ RSTR_SET_ASCII_FLAG(mrb_str_ptr(result));
+ }
+#endif
+
+ return result;
+}
+
+static void
+mrb_str_aset(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen, mrb_value replace)
+{
+ mrb_int beg, len, charlen;
+
+ mrb_to_str(mrb, replace);
+
+ switch (str_convert_range(mrb, str, indx, alen, &beg, &len)) {
+ case STR_OUT_OF_RANGE:
+ default:
+ mrb_raise(mrb, E_INDEX_ERROR, "string not matched");
+ case STR_CHAR_RANGE:
+ if (len < 0) {
+ mrb_raisef(mrb, E_INDEX_ERROR, "negative length %v", alen);
+ }
+ charlen = RSTRING_CHAR_LEN(str);
+ if (beg < 0) { beg += charlen; }
+ if (beg < 0 || beg > charlen) { str_out_of_index(mrb, indx); }
+ /* fall through */
+ case STR_CHAR_RANGE_CORRECTED:
+ str_range_to_bytes(str, &beg, &len);
+ /* fall through */
+ case STR_BYTE_RANGE_CORRECTED:
+ str_replace_partial(mrb, str, beg, beg + len, replace);
}
- if (argc != 1) {
- mrb_raisef(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%S for 1)", mrb_fixnum_value(argc));
+}
+
+/*
+ * call-seq:
+ * str[fixnum] = replace
+ * str[fixnum, fixnum] = replace
+ * str[range] = replace
+ * str[other_str] = replace
+ *
+ * Modify +self+ by replacing the content of +self+.
+ * The portion of the string affected is determined using the same criteria as +String#[]+.
+ */
+static mrb_value
+mrb_str_aset_m(mrb_state *mrb, mrb_value str)
+{
+ mrb_value indx, alen, replace;
+
+ switch (mrb_get_args(mrb, "oo|S!", &indx, &alen, &replace)) {
+ case 2:
+ replace = alen;
+ alen = mrb_undef_value();
+ break;
+ case 3:
+ break;
}
- return mrb_str_aref(mrb, str, a1);
+ mrb_str_aset(mrb, str, indx, alen, replace);
+ return str;
}
/* 15.2.10.5.8 */
@@ -864,7 +1427,7 @@ mrb_str_capitalize_bang(mrb_state *mrb, mrb_value str)
mrb_bool modify = FALSE;
struct RString *s = mrb_str_ptr(str);
- mrb_str_modify(mrb, s);
+ mrb_str_modify_keep_ascii(mrb, s);
if (RSTR_LEN(s) == 0 || !RSTR_PTR(s)) return mrb_nil_value();
p = RSTR_PTR(s); pend = RSTR_PTR(s) + RSTR_LEN(s);
if (ISLOWER(*p)) {
@@ -906,7 +1469,7 @@ mrb_str_capitalize(mrb_state *mrb, mrb_value self)
/* 15.2.10.5.10 */
/*
* call-seq:
- * str.chomp!(separator=$/) => str or nil
+ * str.chomp!(separator="\n") => str or nil
*
* Modifies <i>str</i> in place as described for <code>String#chomp</code>,
* returning <i>str</i>, or <code>nil</code> if no modifications were made.
@@ -919,11 +1482,13 @@ mrb_str_chomp_bang(mrb_state *mrb, mrb_value str)
char *p, *pp;
mrb_int rslen;
mrb_int len;
+ mrb_int argc;
struct RString *s = mrb_str_ptr(str);
- mrb_str_modify(mrb, s);
+ argc = mrb_get_args(mrb, "|S", &rs);
+ mrb_str_modify_keep_ascii(mrb, s);
len = RSTR_LEN(s);
- if (mrb_get_args(mrb, "|S", &rs) == 0) {
+ if (argc == 0) {
if (len == 0) return mrb_nil_value();
smart_chomp:
if (RSTR_PTR(s)[len-1] == '\n') {
@@ -980,12 +1545,11 @@ mrb_str_chomp_bang(mrb_state *mrb, mrb_value str)
/* 15.2.10.5.9 */
/*
* call-seq:
- * str.chomp(separator=$/) => new_str
+ * str.chomp(separator="\n") => new_str
*
* Returns a new <code>String</code> with the given record separator removed
- * from the end of <i>str</i> (if present). If <code>$/</code> has not been
- * changed from the default Ruby record separator, then <code>chomp</code> also
- * removes carriage return characters (that is it will remove <code>\n</code>,
+ * from the end of <i>str</i> (if present). <code>chomp</code> also removes
+ * carriage return characters (that is it will remove <code>\n</code>,
* <code>\r</code>, and <code>\r\n</code>).
*
* "hello".chomp #=> "hello"
@@ -1020,10 +1584,21 @@ mrb_str_chop_bang(mrb_state *mrb, mrb_value str)
{
struct RString *s = mrb_str_ptr(str);
- mrb_str_modify(mrb, s);
+ mrb_str_modify_keep_ascii(mrb, s);
if (RSTR_LEN(s) > 0) {
mrb_int len;
+#ifdef MRB_UTF8_STRING
+ const char* t = RSTR_PTR(s), *p = t;
+ const char* e = p + RSTR_LEN(s);
+ while (p<e) {
+ mrb_int clen = mrb_utf8len(p, e);
+ if (p + clen>=e) break;
+ p += clen;
+ }
+ len = p - t;
+#else
len = RSTR_LEN(s) - 1;
+#endif
if (RSTR_PTR(s)[len] == '\n') {
if (len > 0 &&
RSTR_PTR(s)[len-1] == '\r') {
@@ -1078,7 +1653,7 @@ mrb_str_downcase_bang(mrb_state *mrb, mrb_value str)
mrb_bool modify = FALSE;
struct RString *s = mrb_str_ptr(str);
- mrb_str_modify(mrb, s);
+ mrb_str_modify_keep_ascii(mrb, s);
p = RSTR_PTR(s);
pend = RSTR_PTR(s) + RSTR_LEN(s);
while (p < pend) {
@@ -1100,7 +1675,7 @@ mrb_str_downcase_bang(mrb_state *mrb, mrb_value str)
*
* Returns a copy of <i>str</i> with all uppercase letters replaced with their
* lowercase counterparts. The operation is locale insensitive---only
- * characters ``A'' to ``Z'' are affected.
+ * characters 'A' to 'Z' are affected.
*
* "hEllO".downcase #=> "hello"
*/
@@ -1142,72 +1717,38 @@ mrb_str_empty_p(mrb_state *mrb, mrb_value self)
static mrb_value
mrb_str_eql(mrb_state *mrb, mrb_value self)
{
- mrb_value str2;
+ mrb_value str2 = mrb_get_arg1(mrb);
mrb_bool eql_p;
- mrb_get_args(mrb, "o", &str2);
- eql_p = (mrb_type(str2) == MRB_TT_STRING) && str_eql(mrb, self, str2);
+ eql_p = (mrb_string_p(str2)) && str_eql(mrb, self, str2);
return mrb_bool_value(eql_p);
}
-static mrb_value
-mrb_str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
-{
- struct RString *orig, *s;
- mrb_shared_string *shared;
-
- orig = mrb_str_ptr(str);
- if (RSTR_EMBED_P(orig)) {
- s = str_new(mrb, orig->as.ary+beg, len);
- } else {
- str_make_shared(mrb, orig);
- shared = orig->as.heap.aux.shared;
- s = mrb_obj_alloc_string(mrb);
- s->as.heap.ptr = orig->as.heap.ptr + beg;
- s->as.heap.len = len;
- s->as.heap.aux.shared = shared;
- RSTR_SET_SHARED_FLAG(s);
- shared->refcnt++;
- }
-
- return mrb_obj_value(s);
-}
-
MRB_API mrb_value
mrb_str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
{
- if (len < 0) return mrb_nil_value();
- if (!RSTRING_LEN(str)) {
- len = 0;
- }
- if (beg > RSTRING_LEN(str)) return mrb_nil_value();
- if (beg < 0) {
- beg += RSTRING_LEN(str);
- if (beg < 0) return mrb_nil_value();
- }
- if (beg + len > RSTRING_LEN(str))
- len = RSTRING_LEN(str) - beg;
- if (len <= 0) {
- len = 0;
- }
- return mrb_str_subseq(mrb, str, beg, len);
+ return str_substr(mrb, str, beg, len);
}
-mrb_int
+uint32_t
mrb_str_hash(mrb_state *mrb, mrb_value str)
{
/* 1-8-7 */
struct RString *s = mrb_str_ptr(str);
mrb_int len = RSTR_LEN(s);
char *p = RSTR_PTR(s);
- mrb_int key = 0;
+ uint32_t hash = 0;
- while (len--) {
- key = key*65599 + *p;
- p++;
+ for(int i = 0; i < len; ++i) {
+ hash += p[i];
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
}
- return key + (key>>5);
+ hash += (hash << 3);
+ hash ^= (hash >> 11);
+ hash += (hash << 15);
+ return hash;
}
/* 15.2.10.5.20 */
@@ -1240,146 +1781,52 @@ mrb_str_hash_m(mrb_state *mrb, mrb_value self)
static mrb_value
mrb_str_include(mrb_state *mrb, mrb_value self)
{
- mrb_int i;
mrb_value str2;
- mrb_bool include_p;
-
- mrb_get_args(mrb, "o", &str2);
- if (mrb_fixnum_p(str2)) {
- include_p = (memchr(RSTRING_PTR(self), mrb_fixnum(str2), RSTRING_LEN(self)) != NULL);
- }
- else {
- str2 = mrb_str_to_str(mrb, str2);
- i = mrb_str_index(mrb, self, str2, 0);
- include_p = (i != -1);
- }
-
- return mrb_bool_value(include_p);
+ mrb_get_args(mrb, "S", &str2);
+ if (str_index_str(mrb, self, str2, 0) < 0)
+ return mrb_bool_value(FALSE);
+ return mrb_bool_value(TRUE);
}
/* 15.2.10.5.22 */
/*
* call-seq:
* str.index(substring [, offset]) => fixnum or nil
- * str.index(fixnum [, offset]) => fixnum or nil
- * str.index(regexp [, offset]) => fixnum or nil
*
* Returns the index of the first occurrence of the given
- * <i>substring</i>,
- * character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>.
- * Returns
- * <code>nil</code> if not found.
+ * <i>substring</i>. Returns <code>nil</code> if not found.
* If the second parameter is present, it
* specifies the position in the string to begin the search.
*
- * "hello".index('e') #=> 1
+ * "hello".index('l') #=> 2
* "hello".index('lo') #=> 3
* "hello".index('a') #=> nil
- * "hello".index(101) #=> 1(101=0x65='e')
- * "hello".index(/[aeiou]/, -3) #=> 4
+ * "hello".index('l', -2) #=> 3
*/
static mrb_value
mrb_str_index_m(mrb_state *mrb, mrb_value str)
{
- mrb_value *argv;
- mrb_int argc;
mrb_value sub;
mrb_int pos;
- mrb_get_args(mrb, "*", &argv, &argc);
- if (argc == 2) {
- pos = mrb_fixnum(argv[1]);
- sub = argv[0];
- }
- else {
+ if (mrb_get_args(mrb, "S|i", &sub, &pos) == 1) {
pos = 0;
- if (argc > 0)
- sub = argv[0];
- else
- sub = mrb_nil_value();
}
- mrb_regexp_check(mrb, sub);
- if (pos < 0) {
- pos += RSTRING_LEN(str);
+ else if (pos < 0) {
+ mrb_int clen = RSTRING_CHAR_LEN(str);
+ pos += clen;
if (pos < 0) {
return mrb_nil_value();
}
}
-
- switch (mrb_type(sub)) {
- case MRB_TT_FIXNUM: {
- int c = mrb_fixnum(sub);
- mrb_int len = RSTRING_LEN(str);
- unsigned char *p = (unsigned char*)RSTRING_PTR(str);
-
- for (;pos<len;pos++) {
- if (p[pos] == c) return mrb_fixnum_value(pos);
- }
- return mrb_nil_value();
- }
-
- default: {
- mrb_value tmp;
-
- tmp = mrb_check_string_type(mrb, sub);
- if (mrb_nil_p(tmp)) {
- mrb_raisef(mrb, E_TYPE_ERROR, "type mismatch: %S given", sub);
- }
- sub = tmp;
- }
- /* fall through */
- case MRB_TT_STRING:
- pos = mrb_str_index(mrb, str, sub, pos);
- break;
- }
+ pos = str_index_str_by_char(mrb, str, sub, pos);
if (pos == -1) return mrb_nil_value();
+ BYTES_ALIGN_CHECK(pos);
return mrb_fixnum_value(pos);
}
-#define STR_REPLACE_SHARED_MIN 10
-
-static mrb_value
-str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2)
-{
- long len;
-
- len = RSTR_LEN(s2);
- if (RSTR_SHARED_P(s1)) {
- str_decref(mrb, s1->as.heap.aux.shared);
- }
- else if (!RSTR_EMBED_P(s1) && !RSTR_NOFREE_P(s1)) {
- mrb_free(mrb, s1->as.heap.ptr);
- }
-
- RSTR_UNSET_NOFREE_FLAG(s1);
-
- if (RSTR_SHARED_P(s2)) {
-L_SHARE:
- RSTR_UNSET_EMBED_FLAG(s1);
- s1->as.heap.ptr = s2->as.heap.ptr;
- s1->as.heap.len = len;
- s1->as.heap.aux.shared = s2->as.heap.aux.shared;
- RSTR_SET_SHARED_FLAG(s1);
- s1->as.heap.aux.shared->refcnt++;
- }
- else {
- if (len <= RSTRING_EMBED_LEN_MAX) {
- RSTR_UNSET_SHARED_FLAG(s1);
- RSTR_SET_EMBED_FLAG(s1);
- memcpy(s1->as.ary, RSTR_PTR(s2), len);
- RSTR_SET_EMBED_LEN(s1, len);
- }
- else {
- str_make_shared(mrb, s2);
- goto L_SHARE;
- }
- }
-
- return mrb_obj_value(s1);
-}
-
/* 15.2.10.5.24 */
/* 15.2.10.5.28 */
/*
@@ -1410,9 +1857,11 @@ mrb_str_init(mrb_state *mrb, mrb_value self)
{
mrb_value str2;
- if (mrb_get_args(mrb, "|S", &str2) == 1) {
- str_replace(mrb, mrb_str_ptr(self), mrb_str_ptr(str2));
+ if (mrb_get_args(mrb, "|S", &str2) == 0) {
+ struct RString *s = str_new(mrb, 0, 0);
+ str2 = mrb_obj_value(s);
}
+ str_replace(mrb, mrb_str_ptr(self), mrb_str_ptr(str2));
return self;
}
@@ -1424,7 +1873,7 @@ mrb_str_init(mrb_state *mrb, mrb_value self)
* str.to_sym => symbol
*
* Returns the <code>Symbol</code> corresponding to <i>str</i>, creating the
- * symbol if it did not previously exist. See <code>Symbol#id2name</code>.
+ * symbol if it did not previously exist.
*
* "Koala".intern #=> :Koala
* s = 'cat'.to_sym #=> :cat
@@ -1446,15 +1895,20 @@ mrb_str_intern(mrb_state *mrb, mrb_value self)
MRB_API mrb_value
mrb_obj_as_string(mrb_state *mrb, mrb_value obj)
{
- mrb_value str;
-
- if (mrb_string_p(obj)) {
+ switch (mrb_type(obj)) {
+ case MRB_TT_STRING:
return obj;
+ case MRB_TT_SYMBOL:
+ return mrb_sym_str(mrb, mrb_symbol(obj));
+ case MRB_TT_INTEGER:
+ return mrb_integer_to_str(mrb, obj, 10);
+ case MRB_TT_SCLASS:
+ case MRB_TT_CLASS:
+ case MRB_TT_MODULE:
+ return mrb_mod_to_s(mrb, obj);
+ default:
+ return mrb_type_convert(mrb, obj, MRB_TT_STRING, MRB_SYM(to_s));
}
- str = mrb_funcall(mrb, obj, "to_s", 0);
- if (!mrb_string_p(str))
- return mrb_any_to_s(mrb, obj);
- return str;
}
MRB_API mrb_value
@@ -1487,45 +1941,16 @@ mrb_ptr_to_str(mrb_state *mrb, void *p)
return mrb_obj_value(p_str);
}
-MRB_API mrb_value
-mrb_string_type(mrb_state *mrb, mrb_value str)
-{
- return mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str");
-}
-
-MRB_API mrb_value
-mrb_check_string_type(mrb_state *mrb, mrb_value str)
-{
- return mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str");
-}
-
-/* ---------------------------------- */
-/* 15.2.10.5.29 */
-/*
- * call-seq:
- * str.reverse => new_str
- *
- * Returns a new string with the characters from <i>str</i> in reverse order.
- *
- * "stressed".reverse #=> "desserts"
- */
-static mrb_value
-mrb_str_reverse(mrb_state *mrb, mrb_value str)
+static inline void
+str_reverse(char *p, char *e)
{
- struct RString *s2;
- char *s, *e, *p;
-
- if (RSTRING_LEN(str) <= 1) return mrb_str_dup(mrb, str);
-
- s2 = str_new(mrb, 0, RSTRING_LEN(str));
- str_with_class(mrb, s2, str);
- s = RSTRING_PTR(str); e = RSTRING_END(str) - 1;
- p = RSTR_PTR(s2);
+ char c;
- while (e >= s) {
- *p++ = *e--;
+ while (p < e) {
+ c = *p;
+ *p++ = *e;
+ *e-- = c;
}
- return mrb_obj_value(s2);
}
/* 15.2.10.5.30 */
@@ -1540,146 +1965,96 @@ mrb_str_reverse_bang(mrb_state *mrb, mrb_value str)
{
struct RString *s = mrb_str_ptr(str);
char *p, *e;
- char c;
- mrb_str_modify(mrb, s);
- if (RSTR_LEN(s) > 1) {
+#ifdef MRB_UTF8_STRING
+ mrb_int utf8_len = RSTRING_CHAR_LEN(str);
+ mrb_int len = RSTR_LEN(s);
+
+ if (utf8_len < 2) return str;
+ if (utf8_len < len) {
+ mrb_str_modify(mrb, s);
p = RSTR_PTR(s);
- e = p + RSTR_LEN(s) - 1;
- while (p < e) {
- c = *p;
- *p++ = *e;
- *e-- = c;
+ e = p + RSTR_LEN(s);
+ while (p<e) {
+ mrb_int clen = mrb_utf8len(p, e);
+ str_reverse(p, p + clen - 1);
+ p += clen;
}
+ goto bytes;
+ }
+#endif
+
+ if (RSTR_LEN(s) > 1) {
+ mrb_str_modify(mrb, s);
+ goto bytes;
}
return str;
+
+ bytes:
+ p = RSTR_PTR(s);
+ e = p + RSTR_LEN(s) - 1;
+ str_reverse(p, e);
+ return str;
}
+/* ---------------------------------- */
+/* 15.2.10.5.29 */
/*
* call-seq:
- * str.rindex(substring [, fixnum]) => fixnum or nil
- * str.rindex(fixnum [, fixnum]) => fixnum or nil
- * str.rindex(regexp [, fixnum]) => fixnum or nil
+ * str.reverse => new_str
*
- * Returns the index of the last occurrence of the given <i>substring</i>,
- * character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>. Returns
- * <code>nil</code> if not found. If the second parameter is present, it
- * specifies the position in the string to end the search---characters beyond
- * this point will not be considered.
+ * Returns a new string with the characters from <i>str</i> in reverse order.
*
- * "hello".rindex('e') #=> 1
- * "hello".rindex('l') #=> 3
- * "hello".rindex('a') #=> nil
- * "hello".rindex(101) #=> 1
- * "hello".rindex(/[aeiou]/, -2) #=> 1
+ * "stressed".reverse #=> "desserts"
*/
-static mrb_int
-mrb_str_rindex(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos)
+static mrb_value
+mrb_str_reverse(mrb_state *mrb, mrb_value str)
{
- char *s, *sbeg, *t;
- struct RString *ps = mrb_str_ptr(str);
- mrb_int len = RSTRING_LEN(sub);
-
- /* substring longer than string */
- if (RSTR_LEN(ps) < len) return -1;
- if (RSTR_LEN(ps) - pos < len) {
- pos = RSTR_LEN(ps) - len;
- }
- sbeg = RSTR_PTR(ps);
- s = RSTR_PTR(ps) + pos;
- t = RSTRING_PTR(sub);
- if (len) {
- while (sbeg <= s) {
- if (memcmp(s, t, len) == 0) {
- return s - RSTR_PTR(ps);
- }
- s--;
- }
- return -1;
- }
- else {
- return pos;
- }
+ mrb_value str2 = mrb_str_dup(mrb, str);
+ mrb_str_reverse_bang(mrb, str2);
+ return str2;
}
/* 15.2.10.5.31 */
/*
* call-seq:
- * str.rindex(substring [, fixnum]) => fixnum or nil
- * str.rindex(fixnum [, fixnum]) => fixnum or nil
- * str.rindex(regexp [, fixnum]) => fixnum or nil
+ * str.rindex(substring [, offset]) => fixnum or nil
*
- * Returns the index of the last occurrence of the given <i>substring</i>,
- * character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>. Returns
- * <code>nil</code> if not found. If the second parameter is present, it
- * specifies the position in the string to end the search---characters beyond
- * this point will not be considered.
+ * Returns the index of the last occurrence of the given <i>substring</i>.
+ * Returns <code>nil</code> if not found. If the second parameter is
+ * present, it specifies the position in the string to end the
+ * search---characters beyond this point will not be considered.
*
* "hello".rindex('e') #=> 1
* "hello".rindex('l') #=> 3
* "hello".rindex('a') #=> nil
- * "hello".rindex(101) #=> 1
- * "hello".rindex(/[aeiou]/, -2) #=> 1
+ * "hello".rindex('l', 2) #=> 2
*/
static mrb_value
-mrb_str_rindex_m(mrb_state *mrb, mrb_value str)
+mrb_str_rindex(mrb_state *mrb, mrb_value str)
{
- mrb_value *argv;
- mrb_int argc;
mrb_value sub;
- mrb_value vpos;
- mrb_int pos, len = RSTRING_LEN(str);
+ mrb_int pos, len = RSTRING_CHAR_LEN(str);
- mrb_get_args(mrb, "*", &argv, &argc);
- if (argc == 2) {
- sub = argv[0];
- vpos = argv[1];
- pos = mrb_fixnum(vpos);
+ if (mrb_get_args(mrb, "S|i", &sub, &pos) == 1) {
+ pos = len;
+ }
+ else {
if (pos < 0) {
pos += len;
if (pos < 0) {
- mrb_regexp_check(mrb, sub);
return mrb_nil_value();
}
}
if (pos > len) pos = len;
}
- else {
- pos = len;
- if (argc > 0)
- sub = argv[0];
- else
- sub = mrb_nil_value();
+ pos = chars2bytes(str, 0, pos);
+ pos = str_rindex(mrb, str, sub, pos);
+ if (pos >= 0) {
+ pos = bytes2chars(RSTRING_PTR(str), RSTRING_LEN(str), pos);
+ BYTES_ALIGN_CHECK(pos);
+ return mrb_fixnum_value(pos);
}
- mrb_regexp_check(mrb, sub);
-
- switch (mrb_type(sub)) {
- case MRB_TT_FIXNUM: {
- int c = mrb_fixnum(sub);
- unsigned char *p = (unsigned char*)RSTRING_PTR(str);
-
- for (pos=len-1;pos>=0;pos--) {
- if (p[pos] == c) return mrb_fixnum_value(pos);
- }
- return mrb_nil_value();
- }
-
- default: {
- mrb_value tmp;
-
- tmp = mrb_check_string_type(mrb, sub);
- if (mrb_nil_p(tmp)) {
- mrb_raisef(mrb, E_TYPE_ERROR, "type mismatch: %S given", sub);
- }
- sub = tmp;
- }
- /* fall through */
- case MRB_TT_STRING:
- pos = mrb_str_rindex(mrb, str, sub, pos);
- if (pos >= 0) return mrb_fixnum_value(pos);
- break;
-
- } /* end of switch (TYPE(sub)) */
return mrb_nil_value();
}
@@ -1687,23 +2062,18 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str)
/*
* call-seq:
- * str.split(pattern=$;, [limit]) => anArray
+ * str.split(separator=nil, [limit]) => anArray
*
* Divides <i>str</i> into substrings based on a delimiter, returning an array
* of these substrings.
*
- * If <i>pattern</i> is a <code>String</code>, then its contents are used as
- * the delimiter when splitting <i>str</i>. If <i>pattern</i> is a single
+ * If <i>separator</i> is a <code>String</code>, then its contents are used as
+ * the delimiter when splitting <i>str</i>. If <i>separator</i> is a single
* space, <i>str</i> is split on whitespace, with leading whitespace and runs
* of contiguous whitespace characters ignored.
*
- * If <i>pattern</i> is a <code>Regexp</code>, <i>str</i> is divided where the
- * pattern matches. Whenever the pattern matches a zero-length string,
- * <i>str</i> is split into individual characters.
- *
- * If <i>pattern</i> is omitted, the value of <code>$;</code> is used. If
- * <code>$;</code> is <code>nil</code> (which is the default), <i>str</i> is
- * split on whitespace as if ` ' were specified.
+ * If <i>separator</i> is omitted or <code>nil</code> (which is the default),
+ * <i>str</i> is split on whitespace as if ' ' were specified.
*
* If the <i>limit</i> parameter is omitted, trailing null fields are
* suppressed. If <i>limit</i> is a positive number, at most that number of
@@ -1714,11 +2084,6 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str)
*
* " now's the time".split #=> ["now's", "the", "time"]
* " now's the time".split(' ') #=> ["now's", "the", "time"]
- * " now's the time".split(/ /) #=> ["", "now's", "", "the", "time"]
- * "1, 2.34,56, 7".split(%r{,\s*}) #=> ["1", "2.34", "56", "7"]
- * "hello".split(//) #=> ["h", "e", "l", "l", "o"]
- * "hello".split(//, 3) #=> ["h", "e", "llo"]
- * "hi mom".split(%r{\s*}) #=> ["h", "i", "m", "o", "m"]
*
* "mellow yellow".split("ello") #=> ["m", "w y", "w"]
* "1,2,,3,4,,".split(',') #=> ["1", "2", "", "3", "4"]
@@ -1729,13 +2094,14 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str)
static mrb_value
mrb_str_split_m(mrb_state *mrb, mrb_value str)
{
- int argc;
+ mrb_int argc;
mrb_value spat = mrb_nil_value();
- enum {awk, string, regexp} split_type = string;
- long i = 0, lim_p;
+ enum {awk, string} split_type = string;
+ mrb_int i = 0;
mrb_int beg;
mrb_int end;
mrb_int lim = 0;
+ mrb_bool lim_p;
mrb_value result, tmp;
argc = mrb_get_args(mrb, "|oi", &spat, &lim);
@@ -1752,91 +2118,74 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
if (argc == 0 || mrb_nil_p(spat)) {
split_type = awk;
}
- else {
- if (mrb_string_p(spat)) {
- split_type = string;
- if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' ') {
- split_type = awk;
- }
- }
- else {
- mrb_noregexp(mrb, str);
- }
+ else if (!mrb_string_p(spat)) {
+ mrb_raise(mrb, E_TYPE_ERROR, "expected String");
+ }
+ else if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' ') {
+ split_type = awk;
}
result = mrb_ary_new(mrb);
beg = 0;
if (split_type == awk) {
- char *ptr = RSTRING_PTR(str);
- char *eptr = RSTRING_END(str);
- char *bptr = ptr;
mrb_bool skip = TRUE;
+ mrb_int idx = 0;
+ mrb_int str_len = RSTRING_LEN(str);
unsigned int c;
+ int ai = mrb_gc_arena_save(mrb);
- end = beg;
- while (ptr < eptr) {
- int ai = mrb_gc_arena_save(mrb);
- c = (unsigned char)*ptr++;
+ idx = end = beg;
+ while (idx < str_len) {
+ c = (unsigned char)RSTRING_PTR(str)[idx++];
if (skip) {
if (ISSPACE(c)) {
- beg = ptr - bptr;
+ beg = idx;
}
else {
- end = ptr - bptr;
+ end = idx;
skip = FALSE;
if (lim_p && lim <= i) break;
}
}
else if (ISSPACE(c)) {
- mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, beg, end-beg));
+ mrb_ary_push(mrb, result, mrb_str_byte_subseq(mrb, str, beg, end-beg));
mrb_gc_arena_restore(mrb, ai);
skip = TRUE;
- beg = ptr - bptr;
+ beg = idx;
if (lim_p) ++i;
}
else {
- end = ptr - bptr;
+ end = idx;
}
}
}
- else if (split_type == string) {
- char *ptr = RSTRING_PTR(str); /* s->as.ary */
- char *temp = ptr;
- char *eptr = RSTRING_END(str);
- mrb_int slen = RSTRING_LEN(spat);
+ else { /* split_type == string */
+ mrb_int str_len = RSTRING_LEN(str);
+ mrb_int pat_len = RSTRING_LEN(spat);
+ mrb_int idx = 0;
+ int ai = mrb_gc_arena_save(mrb);
- if (slen == 0) {
- int ai = mrb_gc_arena_save(mrb);
- while (ptr < eptr) {
- mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, ptr-temp, 1));
- mrb_gc_arena_restore(mrb, ai);
- ptr++;
- if (lim_p && lim <= ++i) break;
+ while (idx < str_len) {
+ if (pat_len > 0) {
+ end = mrb_memsearch(RSTRING_PTR(spat), pat_len, RSTRING_PTR(str)+idx, str_len - idx);
+ if (end < 0) break;
}
- }
- else {
- char *sptr = RSTRING_PTR(spat);
- int ai = mrb_gc_arena_save(mrb);
-
- while (ptr < eptr &&
- (end = mrb_memsearch(sptr, slen, ptr, eptr - ptr)) >= 0) {
- mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, ptr - temp, end));
- mrb_gc_arena_restore(mrb, ai);
- ptr += end + slen;
- if (lim_p && lim <= ++i) break;
+ else {
+ end = chars2bytes(str, idx, 1);
}
+ mrb_ary_push(mrb, result, mrb_str_byte_subseq(mrb, str, idx, end));
+ mrb_gc_arena_restore(mrb, ai);
+ idx += end + pat_len;
+ if (lim_p && lim <= ++i) break;
}
- beg = ptr - temp;
- }
- else {
- mrb_noregexp(mrb, str);
+ beg = idx;
}
if (RSTRING_LEN(str) > 0 && (lim_p || RSTRING_LEN(str) > beg || lim < 0)) {
if (RSTRING_LEN(str) == beg) {
- tmp = mrb_str_new_empty(mrb, str);
+ tmp = mrb_str_new(mrb, 0, 0);
}
else {
- tmp = mrb_str_subseq(mrb, str, beg, RSTRING_LEN(str)-beg);
+ tmp = mrb_str_byte_subseq(mrb, str, beg, RSTRING_LEN(str)-beg);
}
mrb_ary_push(mrb, result, tmp);
}
@@ -1850,13 +2199,14 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
return result;
}
-MRB_API mrb_value
-mrb_cstr_to_inum(mrb_state *mrb, const char *str, int base, int badcheck)
+static mrb_value
+mrb_str_len_to_inum(mrb_state *mrb, const char *str, size_t len, mrb_int base, int badcheck)
{
- const char *p;
+ const char *p = str;
+ const char *pend = str + len;
char sign = 1;
- int c, uscore;
- unsigned long n = 0;
+ int c;
+ mrb_int n = 0;
mrb_int val;
#define conv_digit(c) \
@@ -1865,26 +2215,23 @@ mrb_cstr_to_inum(mrb_state *mrb, const char *str, int base, int badcheck)
ISUPPER(c) ? ((c) - 'A' + 10) : \
-1)
- if (!str) {
+ if (!p) {
if (badcheck) goto bad;
return mrb_fixnum_value(0);
}
- while (ISSPACE(*str)) str++;
+ while (p<pend && ISSPACE(*p))
+ p++;
- if (str[0] == '+') {
- str++;
+ if (p[0] == '+') {
+ p++;
}
- else if (str[0] == '-') {
- str++;
+ else if (p[0] == '-') {
+ p++;
sign = 0;
}
- if (str[0] == '+' || str[0] == '-') {
- if (badcheck) goto bad;
- return mrb_fixnum_value(0);
- }
if (base <= 0) {
- if (str[0] == '0') {
- switch (str[1]) {
+ if (p[0] == '0') {
+ switch (p[1]) {
case 'x': case 'X':
base = 16;
break;
@@ -1899,6 +2246,7 @@ mrb_cstr_to_inum(mrb_state *mrb, const char *str, int base, int badcheck)
break;
default:
base = 8;
+ break;
}
}
else if (base < -1) {
@@ -1910,102 +2258,140 @@ mrb_cstr_to_inum(mrb_state *mrb, const char *str, int base, int badcheck)
}
switch (base) {
case 2:
- if (str[0] == '0' && (str[1] == 'b'||str[1] == 'B')) {
- str += 2;
+ if (p[0] == '0' && (p[1] == 'b'||p[1] == 'B')) {
+ p += 2;
}
break;
case 3:
break;
case 8:
- if (str[0] == '0' && (str[1] == 'o'||str[1] == 'O')) {
- str += 2;
+ if (p[0] == '0' && (p[1] == 'o'||p[1] == 'O')) {
+ p += 2;
}
case 4: case 5: case 6: case 7:
break;
case 10:
- if (str[0] == '0' && (str[1] == 'd'||str[1] == 'D')) {
- str += 2;
+ if (p[0] == '0' && (p[1] == 'd'||p[1] == 'D')) {
+ p += 2;
}
case 9: case 11: case 12: case 13: case 14: case 15:
break;
case 16:
- if (str[0] == '0' && (str[1] == 'x'||str[1] == 'X')) {
- str += 2;
+ if (p[0] == '0' && (p[1] == 'x'||p[1] == 'X')) {
+ p += 2;
}
break;
default:
if (base < 2 || 36 < base) {
- mrb_raisef(mrb, E_ARGUMENT_ERROR, "illegal radix %S", mrb_fixnum_value(base));
+ mrb_raisef(mrb, E_ARGUMENT_ERROR, "illegal radix %i", base);
}
break;
} /* end of switch (base) { */
- if (*str == '0') { /* squeeze preceeding 0s */
- uscore = 0;
- while ((c = *++str) == '0' || c == '_') {
+ if (p>=pend) {
+ if (badcheck) goto bad;
+ return mrb_fixnum_value(0);
+ }
+ if (*p == '0') { /* squeeze preceding 0s */
+ p++;
+ while (p<pend) {
+ c = *p++;
if (c == '_') {
- if (++uscore >= 2)
+ if (p<pend && *p == '_') {
+ if (badcheck) goto bad;
break;
+ }
+ continue;
+ }
+ if (c != '0') {
+ p--;
+ break;
}
- else
- uscore = 0;
}
- if (!(c = *str) || ISSPACE(c)) --str;
+ if (*(p - 1) == '0')
+ p--;
}
- c = *str;
- c = conv_digit(c);
- if (c < 0 || c >= base) {
+ if (p == pend || *p == '_') {
if (badcheck) goto bad;
return mrb_fixnum_value(0);
}
-
- uscore = 0;
- for (p=str;*p;p++) {
+ for ( ;p<pend;p++) {
if (*p == '_') {
- if (uscore == 0) {
- uscore++;
+ p++;
+ if (p==pend) {
+ if (badcheck) goto bad;
continue;
}
- if (badcheck) goto bad;
- break;
+ if (*p == '_') {
+ if (badcheck) goto bad;
+ break;
+ }
+ }
+ if (badcheck && *p == '\0') {
+ goto nullbyte;
}
- uscore = 0;
c = conv_digit(*p);
if (c < 0 || c >= base) {
- if (badcheck) goto bad;
break;
}
- n *= base;
+ if (mrb_int_mul_overflow(n, base, &n)) goto overflow;
+ if (MRB_INT_MAX - c < n) {
+ if (sign == 0 && MRB_INT_MAX - n == c - 1) {
+ n = MRB_INT_MIN;
+ sign = 1;
+ break;
+ }
+ overflow:
+ mrb_raisef(mrb, E_RANGE_ERROR, "string (%l) too big for integer", str, pend-str);
+ }
n += c;
}
- if (n > MRB_INT_MAX) {
- mrb_raisef(mrb, E_ARGUMENT_ERROR, "string (%S) too big for integer", mrb_str_new_cstr(mrb, str));
- }
- val = n;
+ val = (mrb_int)n;
if (badcheck) {
- if (p == str) goto bad; /* no number */
- while (*p && ISSPACE(*p)) p++;
- if (*p) goto bad; /* trailing garbage */
+ if (p == str) goto bad; /* no number */
+ if (*(p - 1) == '_') goto bad; /* trailing '_' */
+ while (p<pend && ISSPACE(*p)) p++;
+ if (p<pend) goto bad; /* trailing garbage */
}
- return mrb_fixnum_value(sign ? val : -val);
-bad:
- mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid string for number(%S)", mrb_str_new_cstr(mrb, str));
+ return mrb_int_value(mrb, sign ? val : -val);
+ nullbyte:
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte");
+ /* not reached */
+ bad:
+ mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid string for number(%!l)", str, pend-str);
/* not reached */
return mrb_fixnum_value(0);
}
+/* obslete: use RSTRING_CSTR() or mrb_string_cstr() */
MRB_API const char*
mrb_string_value_cstr(mrb_state *mrb, mrb_value *ptr)
{
- struct RString *ps = mrb_str_ptr(*ptr);
- mrb_int len = mrb_str_strlen(mrb, ps);
- char *p = RSTR_PTR(ps);
+ struct RString *ps;
+ const char *p;
+ mrb_int len;
- if (!p || p[len] != '\0') {
- mrb_str_modify(mrb, ps);
- return RSTR_PTR(ps);
+ check_null_byte(mrb, *ptr);
+ ps = mrb_str_ptr(*ptr);
+ p = RSTR_PTR(ps);
+ len = RSTR_LEN(ps);
+ if (p[len] == '\0') {
+ return p;
}
- return p;
+
+ /*
+ * Even after str_modify_keep_ascii(), NULL termination is not ensured if
+ * RSTR_SET_LEN() is used explicitly (e.g. String#delete_suffix!).
+ */
+ str_modify_keep_ascii(mrb, ps);
+ RSTR_PTR(ps)[len] = '\0';
+ return RSTR_PTR(ps);
+}
+
+MRB_API const char*
+mrb_string_cstr(mrb_state *mrb, mrb_value str)
+{
+ return mrb_string_value_cstr(mrb, &str);
}
MRB_API mrb_value
@@ -2014,21 +2400,10 @@ mrb_str_to_inum(mrb_state *mrb, mrb_value str, mrb_int base, mrb_bool badcheck)
const char *s;
mrb_int len;
- str = mrb_str_to_str(mrb, str);
- if (badcheck) {
- s = mrb_string_value_cstr(mrb, &str);
- }
- else {
- s = RSTRING_PTR(str);
- }
- if (s) {
- len = RSTRING_LEN(str);
- if (s[len]) { /* no sentinel somehow */
- struct RString *temp_str = str_new(mrb, s, len);
- s = RSTR_PTR(temp_str);
- }
- }
- return mrb_cstr_to_inum(mrb, s, base, badcheck);
+ mrb_to_str(mrb, str);
+ s = RSTRING_PTR(str);
+ len = RSTRING_LEN(str);
+ return mrb_str_len_to_inum(mrb, s, len, base, badcheck);
}
/* 15.2.10.5.38 */
@@ -2058,71 +2433,98 @@ mrb_str_to_i(mrb_state *mrb, mrb_value self)
mrb_int base = 10;
mrb_get_args(mrb, "|i", &base);
- if (base < 0) {
- mrb_raisef(mrb, E_ARGUMENT_ERROR, "illegal radix %S", mrb_fixnum_value(base));
+ if (base < 0 || 36 < base) {
+ mrb_raisef(mrb, E_ARGUMENT_ERROR, "illegal radix %i", base);
}
return mrb_str_to_inum(mrb, self, base, FALSE);
}
-MRB_API double
-mrb_cstr_to_dbl(mrb_state *mrb, const char * p, mrb_bool badcheck)
+#ifndef MRB_NO_FLOAT
+static double
+mrb_str_len_to_dbl(mrb_state *mrb, const char *s, size_t len, mrb_bool badcheck)
{
+ char buf[DBL_DIG * 4 + 20];
+ const char *p = s, *p2;
+ const char *pend = p + len;
char *end;
+ char *n;
+ char prev = 0;
double d;
-
- enum {max_width = 20};
+ mrb_bool dot = FALSE;
if (!p) return 0.0;
- while (ISSPACE(*p)) p++;
-
- if (!badcheck && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
- return 0.0;
+ while (p<pend && ISSPACE(*p)) p++;
+ p2 = p;
+
+ if (pend - p > 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
+ mrb_value x;
+
+ if (!badcheck) return 0.0;
+ x = mrb_str_len_to_inum(mrb, p, pend-p, 0, badcheck);
+ if (mrb_integer_p(x))
+ d = (double)mrb_integer(x);
+ else /* if (mrb_float_p(x)) */
+ d = mrb_float(x);
+ return d;
}
- d = strtod(p, &end);
- if (p == end) {
- if (badcheck) {
-bad:
- mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid string for float(%S)", mrb_str_new_cstr(mrb, p));
- /* not reached */
+ while (p < pend) {
+ if (!*p) {
+ if (badcheck) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "string for Float contains null byte");
+ /* not reached */
+ }
+ pend = p;
+ p = p2;
+ goto nocopy;
}
- return d;
+ if (!badcheck && *p == ' ') {
+ pend = p;
+ p = p2;
+ goto nocopy;
+ }
+ if (*p == '_') break;
+ p++;
}
- if (*end) {
- char buf[DBL_DIG * 4 + 10];
- char *n = buf;
- char *e = buf + sizeof(buf) - 1;
- char prev = 0;
-
- while (p < end && n < e) prev = *n++ = *p++;
- while (*p) {
- if (*p == '_') {
- /* remove underscores between digits */
- if (badcheck) {
- if (n == buf || !ISDIGIT(prev)) goto bad;
- ++p;
- if (!ISDIGIT(*p)) goto bad;
- }
- else {
- while (*++p == '_');
- continue;
- }
+ p = p2;
+ n = buf;
+ while (p < pend) {
+ char c = *p++;
+ if (c == '.') dot = TRUE;
+ if (c == '_') {
+ /* remove an underscore between digits */
+ if (n == buf || !ISDIGIT(prev) || p == pend) {
+ if (badcheck) goto bad;
+ break;
}
- prev = *p++;
- if (n < e) *n++ = prev;
}
- *n = '\0';
- p = buf;
-
- if (!badcheck && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
- return 0.0;
+ else if (badcheck && prev == '_' && !ISDIGIT(c)) goto bad;
+ else {
+ const char *bend = buf+sizeof(buf)-1;
+ if (n==bend) { /* buffer overflow */
+ if (dot) break; /* cut off remaining fractions */
+ return INFINITY;
+ }
+ *n++ = c;
}
-
- d = strtod(p, &end);
+ prev = c;
+ }
+ *n = '\0';
+ p = buf;
+ pend = n;
+nocopy:
+ d = mrb_float_read(p, &end);
+ if (p == end) {
if (badcheck) {
- if (!end || p == end) goto bad;
- while (*end && ISSPACE(*end)) end++;
- if (*end) goto bad;
+bad:
+ mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid string for float(%!s)", s);
+ /* not reached */
}
+ return d;
+ }
+ if (badcheck) {
+ if (!end || p == end) goto bad;
+ while (end<pend && ISSPACE(*end)) end++;
+ if (end<pend) goto bad;
}
return d;
}
@@ -2130,22 +2532,7 @@ bad:
MRB_API double
mrb_str_to_dbl(mrb_state *mrb, mrb_value str, mrb_bool badcheck)
{
- char *s;
- mrb_int len;
-
- str = mrb_str_to_str(mrb, str);
- s = RSTRING_PTR(str);
- len = RSTRING_LEN(str);
- if (s) {
- if (badcheck && memchr(s, '\0', len)) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "string for Float contains null byte");
- }
- if (s[len]) { /* no sentinel somehow */
- struct RString *temp_str = str_new(mrb, s, len);
- s = RSTR_PTR(temp_str);
- }
- }
- return mrb_cstr_to_dbl(mrb, s, badcheck);
+ return mrb_str_len_to_dbl(mrb, RSTRING_PTR(str), RSTRING_LEN(str), badcheck);
}
/* 15.2.10.5.39 */
@@ -2154,7 +2541,7 @@ mrb_str_to_dbl(mrb_state *mrb, mrb_value str, mrb_bool badcheck)
* str.to_f => float
*
* Returns the result of interpreting leading characters in <i>str</i> as a
- * floating point number. Extraneous characters past the end of a valid number
+ * floating-point number. Extraneous characters past the end of a valid number
* are ignored. If there is not a valid number at the start of <i>str</i>,
* <code>0.0</code> is returned. This method never raises an exception.
*
@@ -2167,12 +2554,12 @@ mrb_str_to_f(mrb_state *mrb, mrb_value self)
{
return mrb_float_value(mrb, mrb_str_to_dbl(mrb, self, FALSE));
}
+#endif
/* 15.2.10.5.40 */
/*
* call-seq:
* str.to_s => str
- * str.to_str => str
*
* Returns the receiver.
*/
@@ -2200,7 +2587,7 @@ mrb_str_upcase_bang(mrb_state *mrb, mrb_value str)
char *p, *pend;
mrb_bool modify = FALSE;
- mrb_str_modify(mrb, s);
+ mrb_str_modify_keep_ascii(mrb, s);
p = RSTRING_PTR(str);
pend = RSTRING_END(str);
while (p < pend) {
@@ -2222,7 +2609,7 @@ mrb_str_upcase_bang(mrb_state *mrb, mrb_value str)
*
* Returns a copy of <i>str</i> with all lowercase letters replaced with their
* uppercase counterparts. The operation is locale insensitive---only
- * characters ``a'' to ``z'' are affected.
+ * characters 'a' to 'z' are affected.
*
* "hEllO".upcase #=> "HELLO"
*/
@@ -2236,8 +2623,6 @@ mrb_str_upcase(mrb_state *mrb, mrb_value self)
return str;
}
-#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
-
/*
* call-seq:
* str.dump -> new_str
@@ -2248,143 +2633,76 @@ mrb_str_upcase(mrb_state *mrb, mrb_value self)
mrb_value
mrb_str_dump(mrb_state *mrb, mrb_value str)
{
- mrb_int len;
- const char *p, *pend;
- char *q;
- struct RString *result;
-
- len = 2; /* "" */
- p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
- while (p < pend) {
- unsigned char c = *p++;
- switch (c) {
- case '"': case '\\':
- case '\n': case '\r':
- case '\t': case '\f':
- case '\013': case '\010': case '\007': case '\033':
- len += 2;
- break;
+ return str_escape(mrb, str, FALSE);
+}
- case '#':
- len += IS_EVSTR(p, pend) ? 2 : 1;
- break;
+MRB_API mrb_value
+mrb_str_cat(mrb_state *mrb, mrb_value str, const char *ptr, size_t len)
+{
+ struct RString *s = mrb_str_ptr(str);
+ size_t capa;
+ size_t total;
+ ptrdiff_t off = -1;
- default:
- if (ISPRINT(c)) {
- len++;
- }
- else {
- len += 4; /* \NNN */
- }
- break;
- }
+ if (len == 0) return str;
+ mrb_str_modify(mrb, s);
+ if (ptr >= RSTR_PTR(s) && ptr <= RSTR_PTR(s) + (size_t)RSTR_LEN(s)) {
+ off = ptr - RSTR_PTR(s);
}
- result = str_new(mrb, 0, len);
- str_with_class(mrb, result, str);
- p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
- q = RSTR_PTR(result);
- *q++ = '"';
- while (p < pend) {
- unsigned char c = *p++;
-
- switch (c) {
- case '"':
- case '\\':
- *q++ = '\\';
- *q++ = c;
- break;
-
- case '\n':
- *q++ = '\\';
- *q++ = 'n';
- break;
-
- case '\r':
- *q++ = '\\';
- *q++ = 'r';
- break;
-
- case '\t':
- *q++ = '\\';
- *q++ = 't';
- break;
-
- case '\f':
- *q++ = '\\';
- *q++ = 'f';
- break;
-
- case '\013':
- *q++ = '\\';
- *q++ = 'v';
- break;
-
- case '\010':
- *q++ = '\\';
- *q++ = 'b';
- break;
-
- case '\007':
- *q++ = '\\';
- *q++ = 'a';
- break;
-
- case '\033':
- *q++ = '\\';
- *q++ = 'e';
- break;
-
- case '#':
- if (IS_EVSTR(p, pend)) *q++ = '\\';
- *q++ = '#';
- break;
-
- default:
- if (ISPRINT(c)) {
- *q++ = c;
- }
- else {
- *q++ = '\\';
- q[2] = '0' + c % 8; c /= 8;
- q[1] = '0' + c % 8; c /= 8;
- q[0] = '0' + c % 8;
- q += 3;
- }
+ capa = RSTR_CAPA(s);
+ total = RSTR_LEN(s)+len;
+ if (total >= MRB_SSIZE_MAX) {
+ size_error:
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big");
+ }
+ if (capa <= total) {
+ if (capa == 0) capa = 1;
+ while (capa <= total) {
+ if (capa <= MRB_SSIZE_MAX / 2) {
+ capa *= 2;
+ }
+ else {
+ capa = total+1;
+ }
}
+ if (capa <= total || capa > MRB_SSIZE_MAX) {
+ goto size_error;
+ }
+ resize_capa(mrb, s, capa);
}
- *q = '"';
- return mrb_obj_value(result);
-}
-
-MRB_API mrb_value
-mrb_str_cat(mrb_state *mrb, mrb_value str, const char *ptr, size_t len)
-{
- str_buf_cat(mrb, mrb_str_ptr(str), ptr, len);
+ if (off != -1) {
+ ptr = RSTR_PTR(s) + off;
+ }
+ memcpy(RSTR_PTR(s) + RSTR_LEN(s), ptr, len);
+ mrb_assert_int_fit(size_t, total, mrb_ssize, MRB_SSIZE_MAX);
+ RSTR_SET_LEN(s, total);
+ RSTR_PTR(s)[total] = '\0'; /* sentinel */
return str;
}
MRB_API mrb_value
mrb_str_cat_cstr(mrb_state *mrb, mrb_value str, const char *ptr)
{
- return mrb_str_cat(mrb, str, ptr, strlen(ptr));
+ return mrb_str_cat(mrb, str, ptr, ptr ? strlen(ptr) : 0);
}
MRB_API mrb_value
mrb_str_cat_str(mrb_state *mrb, mrb_value str, mrb_value str2)
{
+ if (mrb_str_ptr(str) == mrb_str_ptr(str2)) {
+ mrb_str_modify(mrb, mrb_str_ptr(str));
+ }
return mrb_str_cat(mrb, str, RSTRING_PTR(str2), RSTRING_LEN(str2));
}
MRB_API mrb_value
-mrb_str_append(mrb_state *mrb, mrb_value str, mrb_value str2)
+mrb_str_append(mrb_state *mrb, mrb_value str1, mrb_value str2)
{
- str2 = mrb_str_to_str(mrb, str2);
- return mrb_str_cat_str(mrb, str, str2);
+ mrb_to_str(mrb, str2);
+ return mrb_str_cat_str(mrb, str1, str2);
}
-#define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */
-
/*
* call-seq:
* str.inspect -> string
@@ -2399,54 +2717,7 @@ mrb_str_append(mrb_state *mrb, mrb_value str, mrb_value str2)
mrb_value
mrb_str_inspect(mrb_state *mrb, mrb_value str)
{
- const char *p, *pend;
- char buf[CHAR_ESC_LEN + 1];
- mrb_value result = mrb_str_new_lit(mrb, "\"");
-
- p = RSTRING_PTR(str); pend = RSTRING_END(str);
- for (;p < pend; p++) {
- unsigned char c, cc;
-
- c = *p;
- if (c == '"'|| c == '\\' || (c == '#' && IS_EVSTR(p, pend))) {
- buf[0] = '\\'; buf[1] = c;
- mrb_str_cat(mrb, result, buf, 2);
- continue;
- }
- if (ISPRINT(c)) {
- buf[0] = c;
- mrb_str_cat(mrb, result, buf, 1);
- continue;
- }
- switch (c) {
- case '\n': cc = 'n'; break;
- case '\r': cc = 'r'; break;
- case '\t': cc = 't'; break;
- case '\f': cc = 'f'; break;
- case '\013': cc = 'v'; break;
- case '\010': cc = 'b'; break;
- case '\007': cc = 'a'; break;
- case 033: cc = 'e'; break;
- default: cc = 0; break;
- }
- if (cc) {
- buf[0] = '\\';
- buf[1] = (char)cc;
- mrb_str_cat(mrb, result, buf, 2);
- continue;
- }
- else {
- buf[0] = '\\';
- buf[3] = '0' + c % 8; c /= 8;
- buf[2] = '0' + c % 8; c /= 8;
- buf[1] = '0' + c % 8;
- mrb_str_cat(mrb, result, buf, 4);
- continue;
- }
- }
- mrb_str_cat_lit(mrb, result, "\"");
-
- return result;
+ return str_escape(mrb, str, TRUE);
}
/*
@@ -2472,30 +2743,137 @@ mrb_str_bytes(mrb_state *mrb, mrb_value str)
return a;
}
+/*
+ * call-seq:
+ * str.getbyte(index) -> 0 .. 255
+ *
+ * returns the <i>index</i>th byte as an integer.
+ */
+static mrb_value
+mrb_str_getbyte(mrb_state *mrb, mrb_value str)
+{
+ mrb_int pos;
+ mrb_get_args(mrb, "i", &pos);
+
+ if (pos < 0)
+ pos += RSTRING_LEN(str);
+ if (pos < 0 || RSTRING_LEN(str) <= pos)
+ return mrb_nil_value();
+
+ return mrb_fixnum_value((unsigned char)RSTRING_PTR(str)[pos]);
+}
+
+/*
+ * call-seq:
+ * str.setbyte(index, integer) -> integer
+ *
+ * modifies the <i>index</i>th byte as <i>integer</i>.
+ */
+static mrb_value
+mrb_str_setbyte(mrb_state *mrb, mrb_value str)
+{
+ mrb_int pos, byte;
+ mrb_int len;
+
+ mrb_get_args(mrb, "ii", &pos, &byte);
+
+ len = RSTRING_LEN(str);
+ if (pos < -len || len <= pos)
+ mrb_raisef(mrb, E_INDEX_ERROR, "index %i out of string", pos);
+ if (pos < 0)
+ pos += len;
+
+ mrb_str_modify(mrb, mrb_str_ptr(str));
+ byte &= 0xff;
+ RSTRING_PTR(str)[pos] = (unsigned char)byte;
+ return mrb_fixnum_value((unsigned char)byte);
+}
+
+/*
+ * call-seq:
+ * str.byteslice(integer) -> new_str or nil
+ * str.byteslice(integer, integer) -> new_str or nil
+ * str.byteslice(range) -> new_str or nil
+ *
+ * Byte Reference---If passed a single Integer, returns a
+ * substring of one byte at that position. If passed two Integer
+ * objects, returns a substring starting at the offset given by the first, and
+ * a length given by the second. If given a Range, a substring containing
+ * bytes at offsets given by the range is returned. In all three cases, if
+ * an offset is negative, it is counted from the end of <i>str</i>. Returns
+ * <code>nil</code> if the initial offset falls outside the string, the length
+ * is negative, or the beginning of the range is greater than the end.
+ * The encoding of the resulted string keeps original encoding.
+ *
+ * "hello".byteslice(1) #=> "e"
+ * "hello".byteslice(-1) #=> "o"
+ * "hello".byteslice(1, 2) #=> "el"
+ * "\x80\u3042".byteslice(1, 3) #=> "\u3042"
+ * "\x03\u3042\xff".byteslice(1..3) #=> "\u3042"
+ */
+static mrb_value
+mrb_str_byteslice(mrb_state *mrb, mrb_value str)
+{
+ mrb_value a1;
+ mrb_int str_len = RSTRING_LEN(str), beg, len;
+ mrb_bool empty = TRUE;
+
+ len = mrb_get_argc(mrb);
+ switch (len) {
+ case 2:
+ mrb_get_args(mrb, "ii", &beg, &len);
+ break;
+ case 1:
+ a1 = mrb_get_arg1(mrb);
+ if (mrb_range_p(a1)) {
+ if (mrb_range_beg_len(mrb, a1, &beg, &len, str_len, TRUE) != MRB_RANGE_OK) {
+ return mrb_nil_value();
+ }
+ }
+ else {
+ beg = mrb_integer(mrb_to_int(mrb, a1));
+ len = 1;
+ empty = FALSE;
+ }
+ break;
+ default:
+ mrb_argnum_error(mrb, len, 1, 2);
+ break;
+ }
+ if (mrb_str_beg_len(str_len, &beg, &len) && (empty || len != 0)) {
+ return mrb_str_byte_subseq(mrb, str, beg, len);
+ }
+ else {
+ return mrb_nil_value();
+ }
+}
+
/* ---------------------------*/
void
mrb_init_string(mrb_state *mrb)
{
struct RClass *s;
- mrb_static_assert(RSTRING_EMBED_LEN_MAX < (1 << 5), "pointer size too big for embedded string");
+ mrb_static_assert(RSTRING_EMBED_LEN_MAX < (1 << MRB_STR_EMBED_LEN_BIT),
+ "pointer size too big for embedded string");
- s = mrb->string_class = mrb_define_class(mrb, "String", mrb->object_class); /* 15.2.10 */
+ mrb->string_class = s = mrb_define_class(mrb, "String", mrb->object_class); /* 15.2.10 */
MRB_SET_INSTANCE_TT(s, MRB_TT_STRING);
- mrb_define_method(mrb, s, "bytesize", mrb_str_size, MRB_ARGS_NONE());
+ mrb_define_method(mrb, s, "bytesize", mrb_str_bytesize, MRB_ARGS_NONE());
mrb_define_method(mrb, s, "<=>", mrb_str_cmp_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.1 */
mrb_define_method(mrb, s, "==", mrb_str_equal_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.2 */
mrb_define_method(mrb, s, "+", mrb_str_plus_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.4 */
mrb_define_method(mrb, s, "*", mrb_str_times, MRB_ARGS_REQ(1)); /* 15.2.10.5.5 */
mrb_define_method(mrb, s, "[]", mrb_str_aref_m, MRB_ARGS_ANY()); /* 15.2.10.5.6 */
+ mrb_define_method(mrb, s, "[]=", mrb_str_aset_m, MRB_ARGS_ANY());
mrb_define_method(mrb, s, "capitalize", mrb_str_capitalize, MRB_ARGS_NONE()); /* 15.2.10.5.7 */
mrb_define_method(mrb, s, "capitalize!", mrb_str_capitalize_bang, MRB_ARGS_NONE()); /* 15.2.10.5.8 */
mrb_define_method(mrb, s, "chomp", mrb_str_chomp, MRB_ARGS_ANY()); /* 15.2.10.5.9 */
mrb_define_method(mrb, s, "chomp!", mrb_str_chomp_bang, MRB_ARGS_ANY()); /* 15.2.10.5.10 */
- mrb_define_method(mrb, s, "chop", mrb_str_chop, MRB_ARGS_REQ(1)); /* 15.2.10.5.11 */
- mrb_define_method(mrb, s, "chop!", mrb_str_chop_bang, MRB_ARGS_REQ(1)); /* 15.2.10.5.12 */
+ mrb_define_method(mrb, s, "chop", mrb_str_chop, MRB_ARGS_NONE()); /* 15.2.10.5.11 */
+ mrb_define_method(mrb, s, "chop!", mrb_str_chop_bang, MRB_ARGS_NONE()); /* 15.2.10.5.12 */
mrb_define_method(mrb, s, "downcase", mrb_str_downcase, MRB_ARGS_NONE()); /* 15.2.10.5.13 */
mrb_define_method(mrb, s, "downcase!", mrb_str_downcase_bang, MRB_ARGS_NONE()); /* 15.2.10.5.14 */
mrb_define_method(mrb, s, "empty?", mrb_str_empty_p, MRB_ARGS_NONE()); /* 15.2.10.5.16 */
@@ -2503,7 +2881,7 @@ mrb_init_string(mrb_state *mrb)
mrb_define_method(mrb, s, "hash", mrb_str_hash_m, MRB_ARGS_NONE()); /* 15.2.10.5.20 */
mrb_define_method(mrb, s, "include?", mrb_str_include, MRB_ARGS_REQ(1)); /* 15.2.10.5.21 */
- mrb_define_method(mrb, s, "index", mrb_str_index_m, MRB_ARGS_ANY()); /* 15.2.10.5.22 */
+ mrb_define_method(mrb, s, "index", mrb_str_index_m, MRB_ARGS_ARG(1,1)); /* 15.2.10.5.22 */
mrb_define_method(mrb, s, "initialize", mrb_str_init, MRB_ARGS_REQ(1)); /* 15.2.10.5.23 */
mrb_define_method(mrb, s, "initialize_copy", mrb_str_replace, MRB_ARGS_REQ(1)); /* 15.2.10.5.24 */
mrb_define_method(mrb, s, "intern", mrb_str_intern, MRB_ARGS_NONE()); /* 15.2.10.5.25 */
@@ -2511,12 +2889,14 @@ mrb_init_string(mrb_state *mrb)
mrb_define_method(mrb, s, "replace", mrb_str_replace, MRB_ARGS_REQ(1)); /* 15.2.10.5.28 */
mrb_define_method(mrb, s, "reverse", mrb_str_reverse, MRB_ARGS_NONE()); /* 15.2.10.5.29 */
mrb_define_method(mrb, s, "reverse!", mrb_str_reverse_bang, MRB_ARGS_NONE()); /* 15.2.10.5.30 */
- mrb_define_method(mrb, s, "rindex", mrb_str_rindex_m, MRB_ARGS_ANY()); /* 15.2.10.5.31 */
+ mrb_define_method(mrb, s, "rindex", mrb_str_rindex, MRB_ARGS_ANY()); /* 15.2.10.5.31 */
mrb_define_method(mrb, s, "size", mrb_str_size, MRB_ARGS_NONE()); /* 15.2.10.5.33 */
mrb_define_method(mrb, s, "slice", mrb_str_aref_m, MRB_ARGS_ANY()); /* 15.2.10.5.34 */
mrb_define_method(mrb, s, "split", mrb_str_split_m, MRB_ARGS_ANY()); /* 15.2.10.5.35 */
+#ifndef MRB_NO_FLOAT
mrb_define_method(mrb, s, "to_f", mrb_str_to_f, MRB_ARGS_NONE()); /* 15.2.10.5.38 */
+#endif
mrb_define_method(mrb, s, "to_i", mrb_str_to_i, MRB_ARGS_ANY()); /* 15.2.10.5.39 */
mrb_define_method(mrb, s, "to_s", mrb_str_to_s, MRB_ARGS_NONE()); /* 15.2.10.5.40 */
mrb_define_method(mrb, s, "to_str", mrb_str_to_s, MRB_ARGS_NONE());
@@ -2525,4 +2905,8 @@ mrb_init_string(mrb_state *mrb)
mrb_define_method(mrb, s, "upcase!", mrb_str_upcase_bang, MRB_ARGS_NONE()); /* 15.2.10.5.43 */
mrb_define_method(mrb, s, "inspect", mrb_str_inspect, MRB_ARGS_NONE()); /* 15.2.10.5.46(x) */
mrb_define_method(mrb, s, "bytes", mrb_str_bytes, MRB_ARGS_NONE());
+
+ mrb_define_method(mrb, s, "getbyte", mrb_str_getbyte, MRB_ARGS_REQ(1));
+ mrb_define_method(mrb, s, "setbyte", mrb_str_setbyte, MRB_ARGS_REQ(2));
+ mrb_define_method(mrb, s, "byteslice", mrb_str_byteslice, MRB_ARGS_ARG(1,1));
}