summaryrefslogtreecommitdiffhomepage
path: root/src/string.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/string.c')
-rw-r--r--src/string.c1151
1 files changed, 694 insertions, 457 deletions
diff --git a/src/string.c b/src/string.c
index 2668a2c85..190e87123 100644
--- a/src/string.c
+++ b/src/string.c
@@ -20,13 +20,12 @@
#include <mruby/class.h>
#include <mruby/range.h>
#include <mruby/string.h>
-#include <mruby/re.h>
+#include <mruby/numeric.h>
typedef struct mrb_shared_string {
- mrb_bool nofree : 1;
int refcnt;
+ mrb_int capa;
char *ptr;
- mrb_int len;
} mrb_shared_string;
const char mrb_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz";
@@ -34,55 +33,114 @@ const char mrb_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz";
#define mrb_obj_alloc_string(mrb) ((struct RString*)mrb_obj_alloc((mrb), MRB_TT_STRING, (mrb)->string_class))
static struct RString*
-str_new_static(mrb_state *mrb, const char *p, size_t len)
+str_init_normal_capa(mrb_state *mrb, struct RString *s,
+ const char *p, size_t len, size_t capa)
{
- struct RString *s;
+ char *dst = (char *)mrb_malloc(mrb, capa + 1);
+ if (p) memcpy(dst, p, len);
+ dst[len] = '\0';
+ s->as.heap.ptr = dst;
+ s->as.heap.len = (mrb_int)len;
+ s->as.heap.aux.capa = (mrb_int)capa;
+ RSTR_UNSET_TYPE_FLAG(s);
+ return s;
+}
- if (len >= MRB_INT_MAX) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big");
- }
- s = mrb_obj_alloc_string(mrb);
+static struct RString*
+str_init_normal(mrb_state *mrb, struct RString *s, const char *p, size_t len)
+{
+ return str_init_normal_capa(mrb, s, p, len, len);
+}
+
+static struct RString*
+str_init_embed(struct RString *s, const char *p, size_t len)
+{
+ if (p) memcpy(RSTR_EMBED_PTR(s), p, len);
+ RSTR_EMBED_PTR(s)[len] = '\0';
+ RSTR_SET_TYPE_FLAG(s, EMBED);
+ RSTR_SET_EMBED_LEN(s, len);
+ return s;
+}
+
+static struct RString*
+str_init_nofree(struct RString *s, const char *p, size_t len)
+{
+ s->as.heap.ptr = (char *)p;
s->as.heap.len = (mrb_int)len;
s->as.heap.aux.capa = 0; /* nofree */
- s->as.heap.ptr = (char *)p;
- s->flags = MRB_STR_NOFREE;
+ RSTR_SET_TYPE_FLAG(s, NOFREE);
+ return s;
+}
+static struct RString*
+str_init_shared(mrb_state *mrb, const struct RString *orig, struct RString *s, mrb_shared_string *shared)
+{
+ if (shared) {
+ shared->refcnt++;
+ }
+ else {
+ shared = (mrb_shared_string *)mrb_malloc(mrb, sizeof(mrb_shared_string));
+ shared->refcnt = 1;
+ shared->ptr = orig->as.heap.ptr;
+ shared->capa = orig->as.heap.aux.capa;
+ }
+ s->as.heap.ptr = orig->as.heap.ptr;
+ s->as.heap.len = orig->as.heap.len;
+ s->as.heap.aux.shared = shared;
+ RSTR_SET_TYPE_FLAG(s, SHARED);
return s;
}
static struct RString*
-str_new(mrb_state *mrb, const char *p, size_t len)
+str_init_fshared(const struct RString *orig, struct RString *s, struct RString *fshared)
{
- struct RString *s;
+ s->as.heap.ptr = orig->as.heap.ptr;
+ s->as.heap.len = orig->as.heap.len;
+ s->as.heap.aux.fshared = fshared;
+ RSTR_SET_TYPE_FLAG(s, FSHARED);
+ return s;
+}
- if (p && mrb_ro_data_p(p)) {
- return str_new_static(mrb, p, len);
- }
- s = mrb_obj_alloc_string(mrb);
- if (len <= RSTRING_EMBED_LEN_MAX) {
- RSTR_SET_EMBED_FLAG(s);
- RSTR_SET_EMBED_LEN(s, len);
- if (p) {
- memcpy(s->as.ary, p, len);
- }
+static struct RString*
+str_init_modifiable(mrb_state *mrb, struct RString *s, const char *p, size_t len)
+{
+ if (RSTR_EMBEDDABLE_P(len)) {
+ return str_init_embed(s, p, len);
}
else {
- if (len >= MRB_INT_MAX) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big");
- }
- s->as.heap.ptr = (char *)mrb_malloc(mrb, len+1);
- s->as.heap.len = (mrb_int)len;
- s->as.heap.aux.capa = (mrb_int)len;
- if (p) {
- memcpy(s->as.heap.ptr, p, len);
- }
+ return str_init_normal(mrb, s, p, len);
}
- RSTR_PTR(s)[len] = '\0';
- return s;
+}
+
+static struct RString*
+str_new_static(mrb_state *mrb, const char *p, size_t len)
+{
+ if (RSTR_EMBEDDABLE_P(len)) {
+ return str_init_embed(mrb_obj_alloc_string(mrb), p, len);
+ }
+ if (len >= MRB_INT_MAX) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big");
+ }
+ return str_init_nofree(mrb_obj_alloc_string(mrb), p, len);
+}
+
+static struct RString*
+str_new(mrb_state *mrb, const char *p, size_t len)
+{
+ if (RSTR_EMBEDDABLE_P(len)) {
+ return str_init_embed(mrb_obj_alloc_string(mrb), p, len);
+ }
+ if (len >= MRB_INT_MAX) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big");
+ }
+ if (p && mrb_ro_data_p(p)) {
+ return str_init_nofree(mrb_obj_alloc_string(mrb), p, len);
+ }
+ return str_init_normal(mrb, mrb_obj_alloc_string(mrb), p, len);
}
static inline void
-str_with_class(mrb_state *mrb, struct RString *s, mrb_value obj)
+str_with_class(struct RString *s, mrb_value obj)
{
s->c = mrb_str_ptr(obj)->c;
}
@@ -92,7 +150,7 @@ mrb_str_new_empty(mrb_state *mrb, mrb_value str)
{
struct RString *s = str_new(mrb, 0, 0);
- str_with_class(mrb, s, str);
+ str_with_class(s, str);
return mrb_obj_value(s);
}
@@ -101,15 +159,15 @@ mrb_str_new_capa(mrb_state *mrb, size_t capa)
{
struct RString *s;
- s = mrb_obj_alloc_string(mrb);
-
- if (capa >= MRB_INT_MAX) {
+ if (RSTR_EMBEDDABLE_P(capa)) {
+ s = str_init_embed(mrb_obj_alloc_string(mrb), NULL, 0);
+ }
+ else if (capa >= MRB_INT_MAX) {
mrb_raise(mrb, E_ARGUMENT_ERROR, "string capacity size too big");
}
- s->as.heap.len = 0;
- s->as.heap.aux.capa = (mrb_int)capa;
- s->as.heap.ptr = (char *)mrb_malloc(mrb, capa+1);
- RSTR_PTR(s)[0] = '\0';
+ else {
+ s = str_init_normal_capa(mrb, mrb_obj_alloc_string(mrb), NULL, 0, capa);
+ }
return mrb_obj_value(s);
}
@@ -134,14 +192,8 @@ resize_capa(mrb_state *mrb, struct RString *s, size_t capacity)
mrb_assert(capacity < MRB_INT_MAX);
#endif
if (RSTR_EMBED_P(s)) {
- if (RSTRING_EMBED_LEN_MAX < capacity) {
- char *const tmp = (char *)mrb_malloc(mrb, capacity+1);
- const mrb_int len = RSTR_EMBED_LEN(s);
- memcpy(tmp, s->as.ary, len);
- RSTR_UNSET_EMBED_FLAG(s);
- s->as.heap.ptr = tmp;
- s->as.heap.len = len;
- s->as.heap.aux.capa = (mrb_int)capacity;
+ if (!RSTR_EMBEDDABLE_P(capacity)) {
+ str_init_normal_capa(mrb, s, RSTR_EMBED_PTR(s), RSTR_EMBED_LEN(s), capacity);
}
}
else {
@@ -156,13 +208,6 @@ mrb_str_new(mrb_state *mrb, const char *p, size_t len)
return mrb_obj_value(str_new(mrb, p, len));
}
-/*
- * call-seq: (Caution! NULL string)
- * String.new(str="") => new_str
- *
- * Returns a new string object containing a copy of <i>str</i>.
- */
-
MRB_API mrb_value
mrb_str_new_cstr(mrb_state *mrb, const char *p)
{
@@ -193,13 +238,20 @@ str_decref(mrb_state *mrb, mrb_shared_string *shared)
{
shared->refcnt--;
if (shared->refcnt == 0) {
- if (!shared->nofree) {
- mrb_free(mrb, shared->ptr);
- }
+ mrb_free(mrb, shared->ptr);
mrb_free(mrb, shared);
}
}
+static void
+check_null_byte(mrb_state *mrb, mrb_value str)
+{
+ mrb_to_str(mrb, str);
+ if (memchr(RSTRING_PTR(str), '\0', RSTRING_LEN(str))) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte");
+ }
+}
+
void
mrb_gc_free_str(mrb_state *mrb, struct RString *str)
{
@@ -230,35 +282,47 @@ utf8len(const char* p, const char* e)
mrb_int len;
mrb_int i;
+ if ((unsigned char)*p < 0x80) return 1;
len = utf8len_codepage[(unsigned char)*p];
- if (p + len > e) return 1;
+ if (len == 1) return 1;
+ if (len > e - p) return 1;
for (i = 1; i < len; ++i)
if ((p[i] & 0xc0) != 0x80)
return 1;
return len;
}
-static mrb_int
-utf8_strlen(mrb_value str, mrb_int len)
+mrb_int
+mrb_utf8_len(const char *str, mrb_int byte_len)
{
mrb_int total = 0;
- char* p = RSTRING_PTR(str);
- char* e = p;
- if (RSTRING(str)->flags & MRB_STR_NO_UTF) {
- return RSTRING_LEN(str);
- }
- e += len < 0 ? RSTRING_LEN(str) : len;
- while (p<e) {
+ const char *p = str;
+ const char *e = p + byte_len;
+
+ while (p < e) {
p += utf8len(p, e);
total++;
}
- if (RSTRING_LEN(str) == total) {
- RSTRING(str)->flags |= MRB_STR_NO_UTF;
- }
return total;
}
-#define RSTRING_CHAR_LEN(s) utf8_strlen(s, -1)
+static mrb_int
+utf8_strlen(mrb_value str)
+{
+ struct RString *s = mrb_str_ptr(str);
+ mrb_int byte_len = RSTR_LEN(s);
+
+ if (RSTR_ASCII_P(s)) {
+ return byte_len;
+ }
+ else {
+ mrb_int utf8_len = mrb_utf8_len(RSTR_PTR(s), byte_len);
+ if (byte_len == utf8_len) RSTR_SET_ASCII_FLAG(s);
+ return utf8_len;
+ }
+}
+
+#define RSTRING_CHAR_LEN(s) utf8_strlen(s)
/* map character index to byte offset index */
static mrb_int
@@ -278,25 +342,136 @@ chars2bytes(mrb_value s, mrb_int off, mrb_int idx)
/* map byte offset to character index */
static mrb_int
-bytes2chars(char *p, mrb_int bi)
+bytes2chars(char *p, mrb_int len, mrb_int bi)
{
- mrb_int i, b, n;
+ const char *e = p + (size_t)len;
+ const char *pivot = p + bi;
+ mrb_int i;
- for (b=i=0; b<bi; i++) {
- n = utf8len_codepage[(unsigned char)*p];
- b += n;
- p += n;
+ for (i = 0; p < pivot; i ++) {
+ p += utf8len(p, e);
}
- if (b != bi) return -1;
+ if (p != pivot) return -1;
return i;
}
+static const char *
+char_adjust(const char *beg, const char *end, const char *ptr)
+{
+ if ((ptr > beg || ptr < end) && (*ptr & 0xc0) == 0x80) {
+ const int utf8_adjust_max = 3;
+ const char *p;
+
+ if (ptr - beg > utf8_adjust_max) {
+ beg = ptr - utf8_adjust_max;
+ }
+
+ p = ptr;
+ while (p > beg) {
+ p --;
+ if ((*p & 0xc0) != 0x80) {
+ int clen = utf8len(p, end);
+ if (clen > ptr - p) return p;
+ break;
+ }
+ }
+ }
+
+ return ptr;
+}
+
+static const char *
+char_backtrack(const char *ptr, const char *end)
+{
+ if (ptr < end) {
+ const int utf8_bytelen_max = 4;
+ const char *p;
+
+ if (end - ptr > utf8_bytelen_max) {
+ ptr = end - utf8_bytelen_max;
+ }
+
+ p = end;
+ while (p > ptr) {
+ p --;
+ if ((*p & 0xc0) != 0x80) {
+ int clen = utf8len_codepage[(unsigned char)*p];
+ if (clen == end - p) { return p; }
+ break;
+ }
+ }
+ }
+
+ return end - 1;
+}
+
+static mrb_int
+str_index_str_by_char_search(mrb_state *mrb, const char *p, const char *pend, const char *s, const mrb_int slen, mrb_int off)
+{
+ /* Based on Quick Search algorithm (Boyer-Moore-Horspool algorithm) */
+
+ ptrdiff_t qstable[1 << CHAR_BIT];
+
+ /* Preprocessing */
+ {
+ mrb_int i;
+
+ for (i = 0; i < 1 << CHAR_BIT; i ++) {
+ qstable[i] = slen;
+ }
+ for (i = 0; i < slen; i ++) {
+ qstable[(unsigned char)s[i]] = slen - (i + 1);
+ }
+ }
+
+ /* Searching */
+ while (p < pend && pend - p >= slen) {
+ const char *pivot;
+
+ if (memcmp(p, s, slen) == 0) {
+ return off;
+ }
+
+ pivot = p + qstable[(unsigned char)p[slen - 1]];
+ if (pivot > pend || pivot < p /* overflowed */) { return -1; }
+
+ do {
+ p += utf8len(p, pend);
+ off ++;
+ } while (p < pivot);
+ }
+
+ return -1;
+}
+
+static mrb_int
+str_index_str_by_char(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos)
+{
+ const char *p = RSTRING_PTR(str);
+ const char *pend = p + RSTRING_LEN(str);
+ const char *s = RSTRING_PTR(sub);
+ const mrb_int slen = RSTRING_LEN(sub);
+ mrb_int off = pos;
+
+ for (; pos > 0; pos --) {
+ if (pend - p < 1) { return -1; }
+ p += utf8len(p, pend);
+ }
+
+ if (slen < 1) { return off; }
+
+ return str_index_str_by_char_search(mrb, p, pend, s, slen, off);
+}
+
#define BYTES_ALIGN_CHECK(pos) if (pos < 0) return mrb_nil_value();
#else
#define RSTRING_CHAR_LEN(s) RSTRING_LEN(s)
#define chars2bytes(p, off, ci) (ci)
-#define bytes2chars(p, bi) (bi)
+#define bytes2chars(p, end, bi) (bi)
+#define char_adjust(beg, end, ptr) (ptr)
+#define char_backtrack(ptr, end) ((end) - 1)
#define BYTES_ALIGN_CHECK(pos)
+#define str_index_str_by_char(mrb, str, sub, pos) str_index_str(mrb, str, sub, pos)
#endif
static inline mrb_int
@@ -346,111 +521,114 @@ mrb_memsearch(const void *x0, mrb_int m, const void *y0, mrb_int n)
static void
str_make_shared(mrb_state *mrb, struct RString *orig, struct RString *s)
{
- mrb_shared_string *shared;
- mrb_int len = RSTR_LEN(orig);
+ size_t len = (size_t)orig->as.heap.len;
mrb_assert(!RSTR_EMBED_P(orig));
- if (RSTR_SHARED_P(orig)) {
- shared = orig->as.heap.aux.shared;
- shared->refcnt++;
- s->as.heap.ptr = orig->as.heap.ptr;
- s->as.heap.len = len;
- s->as.heap.aux.shared = shared;
- RSTR_SET_SHARED_FLAG(s);
- RSTR_UNSET_EMBED_FLAG(s);
+ if (RSTR_NOFREE_P(orig)) {
+ str_init_nofree(s, orig->as.heap.ptr, len);
+ }
+ else if (RSTR_SHARED_P(orig)) {
+ str_init_shared(mrb, orig, s, orig->as.heap.aux.shared);
}
else if (RSTR_FSHARED_P(orig)) {
- struct RString *fs;
-
- fs = orig->as.heap.aux.fshared;
- s->as.heap.ptr = orig->as.heap.ptr;
- s->as.heap.len = len;
- s->as.heap.aux.fshared = fs;
- RSTR_SET_FSHARED_FLAG(s);
- RSTR_UNSET_EMBED_FLAG(s);
+ str_init_fshared(orig, s, orig->as.heap.aux.fshared);
}
else if (MRB_FROZEN_P(orig) && !RSTR_POOL_P(orig)) {
- s->as.heap.ptr = orig->as.heap.ptr;
- s->as.heap.len = len;
- s->as.heap.aux.fshared = orig;
- RSTR_SET_FSHARED_FLAG(s);
- RSTR_UNSET_EMBED_FLAG(s);
+ str_init_fshared(orig, s, orig);
}
else {
- shared = (mrb_shared_string *)mrb_malloc(mrb, sizeof(mrb_shared_string));
- shared->refcnt = 2;
- shared->nofree = !!RSTR_NOFREE_P(orig);
- if (!shared->nofree && orig->as.heap.aux.capa > orig->as.heap.len) {
- shared->ptr = (char *)mrb_realloc(mrb, orig->as.heap.ptr, len+1);
- orig->as.heap.ptr = shared->ptr;
- }
- else {
- shared->ptr = orig->as.heap.ptr;
+ if (orig->as.heap.aux.capa > orig->as.heap.len) {
+ orig->as.heap.ptr = (char *)mrb_realloc(mrb, orig->as.heap.ptr, len+1);
+ orig->as.heap.aux.capa = len;
}
- orig->as.heap.aux.shared = shared;
- RSTR_SET_SHARED_FLAG(orig);
- shared->len = len;
- s->as.heap.aux.shared = shared;
- s->as.heap.ptr = shared->ptr;
- s->as.heap.len = len;
- RSTR_SET_SHARED_FLAG(s);
- RSTR_UNSET_EMBED_FLAG(s);
+ str_init_shared(mrb, orig, s, NULL);
+ str_init_shared(mrb, orig, orig, s->as.heap.aux.shared);
}
}
-static mrb_value
-byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
+mrb_value
+mrb_str_pool(mrb_state *mrb, mrb_value str)
+{
+ struct RString *s = (struct RString *)mrb_malloc(mrb, sizeof(struct RString));
+ struct RString *orig = mrb_str_ptr(str);
+ const char *p = RSTR_PTR(orig);
+ size_t len = (size_t)RSTR_LEN(orig);
+
+ s->tt = MRB_TT_STRING;
+ s->c = mrb->string_class;
+ s->flags = 0;
+
+ if (RSTR_EMBEDDABLE_P(len)) {
+ str_init_embed(s, p, len);
+ }
+ else if (RSTR_NOFREE_P(orig)) {
+ str_init_nofree(s, p, len);
+ }
+ else {
+ str_init_normal(mrb, s, p, len);
+ }
+ RSTR_SET_POOL_FLAG(s);
+ MRB_SET_FROZEN_FLAG(s);
+ return mrb_obj_value(s);
+}
+
+mrb_value
+mrb_str_byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
{
struct RString *orig, *s;
orig = mrb_str_ptr(str);
- if (RSTR_EMBED_P(orig) || RSTR_LEN(orig) == 0 || len <= RSTRING_EMBED_LEN_MAX) {
- s = str_new(mrb, RSTR_PTR(orig)+beg, len);
+ s = mrb_obj_alloc_string(mrb);
+ if (RSTR_EMBEDDABLE_P(len)) {
+ str_init_embed(s, RSTR_PTR(orig)+beg, len);
}
else {
- s = mrb_obj_alloc_string(mrb);
str_make_shared(mrb, orig, s);
s->as.heap.ptr += beg;
s->as.heap.len = len;
}
+ RSTR_COPY_ASCII_FLAG(s, orig);
return mrb_obj_value(s);
}
+
+static void
+str_range_to_bytes(mrb_value str, mrb_int *pos, mrb_int *len)
+{
+ *pos = chars2bytes(str, 0, *pos);
+ *len = chars2bytes(str, *pos, *len);
+}
#ifdef MRB_UTF8_STRING
static inline mrb_value
str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
{
- beg = chars2bytes(str, 0, beg);
- len = chars2bytes(str, beg, len);
-
- return byte_subseq(mrb, str, beg, len);
+ str_range_to_bytes(str, &beg, &len);
+ return mrb_str_byte_subseq(mrb, str, beg, len);
}
#else
-#define str_subseq(mrb, str, beg, len) byte_subseq(mrb, str, beg, len)
+#define str_subseq(mrb, str, beg, len) mrb_str_byte_subseq(mrb, str, beg, len)
#endif
-static mrb_value
-str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
+mrb_bool
+mrb_str_beg_len(mrb_int str_len, mrb_int *begp, mrb_int *lenp)
{
- mrb_int clen = RSTRING_CHAR_LEN(str);
-
- if (len < 0) return mrb_nil_value();
- if (clen == 0) {
- len = 0;
+ if (str_len < *begp || *lenp < 0) return FALSE;
+ if (*begp < 0) {
+ *begp += str_len;
+ if (*begp < 0) return FALSE;
}
- else if (beg < 0) {
- beg = clen + beg;
- }
- if (beg > clen) return mrb_nil_value();
- if (beg < 0) {
- beg += clen;
- if (beg < 0) return mrb_nil_value();
- }
- if (len > clen - beg)
- len = clen - beg;
- if (len <= 0) {
- len = 0;
+ if (*lenp > str_len - *begp)
+ *lenp = str_len - *begp;
+ if (*lenp <= 0) {
+ *lenp = 0;
}
- return str_subseq(mrb, str, beg, len);
+ return TRUE;
+}
+
+static mrb_value
+str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
+{
+ return mrb_str_beg_len(RSTRING_CHAR_LEN(str), &beg, &len) ?
+ str_subseq(mrb, str, beg, len) : mrb_nil_value();
}
MRB_API mrb_int
@@ -490,41 +668,25 @@ str_index_str(mrb_state *mrb, mrb_value str, mrb_value str2, mrb_int offset)
return mrb_str_index(mrb, str, ptr, len, offset);
}
-static void
-check_frozen(mrb_state *mrb, struct RString *s)
-{
- if (MRB_FROZEN_P(s)) {
- mrb_raise(mrb, E_FROZEN_ERROR, "can't modify frozen string");
- }
-}
-
static mrb_value
str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2)
{
- mrb_int len;
+ size_t len;
- check_frozen(mrb, s1);
+ mrb_check_frozen(mrb, s1);
if (s1 == s2) return mrb_obj_value(s1);
- s1->flags &= ~MRB_STR_NO_UTF;
- s1->flags |= s2->flags&MRB_STR_NO_UTF;
- len = RSTR_LEN(s2);
+ RSTR_COPY_ASCII_FLAG(s1, s2);
if (RSTR_SHARED_P(s1)) {
str_decref(mrb, s1->as.heap.aux.shared);
- RSTR_UNSET_SHARED_FLAG(s1);
}
else if (!RSTR_EMBED_P(s1) && !RSTR_NOFREE_P(s1) && !RSTR_FSHARED_P(s1)
&& s1->as.heap.ptr) {
mrb_free(mrb, s1->as.heap.ptr);
}
- RSTR_UNSET_FSHARED_FLAG(s1);
- RSTR_UNSET_NOFREE_FLAG(s1);
- if (len <= RSTRING_EMBED_LEN_MAX) {
- RSTR_UNSET_SHARED_FLAG(s1);
- RSTR_UNSET_FSHARED_FLAG(s1);
- RSTR_SET_EMBED_FLAG(s1);
- memcpy(s1->as.ary, RSTR_PTR(s2), len);
- RSTR_SET_EMBED_LEN(s1, len);
+ len = (size_t)RSTR_LEN(s2);
+ if (RSTR_EMBEDDABLE_P(len)) {
+ str_init_embed(s1, RSTR_PTR(s2), len);
}
else {
str_make_shared(mrb, s2, s1);
@@ -536,7 +698,7 @@ str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2)
static mrb_int
str_rindex(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos)
{
- char *s, *sbeg, *t;
+ const char *s, *sbeg, *t;
struct RString *ps = mrb_str_ptr(str);
mrb_int len = RSTRING_LEN(sub);
@@ -549,11 +711,12 @@ str_rindex(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos)
s = RSTR_PTR(ps) + pos;
t = RSTRING_PTR(sub);
if (len) {
+ s = char_adjust(sbeg, sbeg + RSTR_LEN(ps), s);
while (sbeg <= s) {
if (memcmp(s, t, len) == 0) {
return (mrb_int)(s - RSTR_PTR(ps));
}
- s--;
+ s = char_backtrack(sbeg, s);
}
return -1;
}
@@ -641,67 +804,34 @@ mrb_locale_from_utf8(const char *utf8, int len)
#endif
MRB_API void
-mrb_str_modify(mrb_state *mrb, struct RString *s)
+mrb_str_modify_keep_ascii(mrb_state *mrb, struct RString *s)
{
- check_frozen(mrb, s);
- s->flags &= ~MRB_STR_NO_UTF;
+ mrb_check_frozen(mrb, s);
if (RSTR_SHARED_P(s)) {
mrb_shared_string *shared = s->as.heap.aux.shared;
- if (shared->nofree == 0 && shared->refcnt == 1 && s->as.heap.ptr == shared->ptr) {
- s->as.heap.ptr = shared->ptr;
- s->as.heap.aux.capa = shared->len;
- RSTR_PTR(s)[s->as.heap.len] = '\0';
+ if (shared->refcnt == 1 && s->as.heap.ptr == shared->ptr) {
+ s->as.heap.aux.capa = shared->capa;
+ s->as.heap.ptr[s->as.heap.len] = '\0';
mrb_free(mrb, shared);
}
else {
- char *ptr, *p;
- mrb_int len;
-
- p = RSTR_PTR(s);
- len = s->as.heap.len;
- if (len < RSTRING_EMBED_LEN_MAX) {
- RSTR_SET_EMBED_FLAG(s);
- RSTR_SET_EMBED_LEN(s, len);
- ptr = RSTR_PTR(s);
- }
- else {
- ptr = (char *)mrb_malloc(mrb, (size_t)len + 1);
- s->as.heap.ptr = ptr;
- s->as.heap.aux.capa = len;
- }
- if (p) {
- memcpy(ptr, p, len);
- }
- ptr[len] = '\0';
+ str_init_modifiable(mrb, s, s->as.heap.ptr, (size_t)s->as.heap.len);
str_decref(mrb, shared);
}
- RSTR_UNSET_SHARED_FLAG(s);
- return;
}
- if (RSTR_NOFREE_P(s) || RSTR_FSHARED_P(s)) {
- char *p = s->as.heap.ptr;
- mrb_int len = s->as.heap.len;
-
- RSTR_UNSET_FSHARED_FLAG(s);
- RSTR_UNSET_NOFREE_FLAG(s);
- RSTR_UNSET_FSHARED_FLAG(s);
- if (len < RSTRING_EMBED_LEN_MAX) {
- RSTR_SET_EMBED_FLAG(s);
- RSTR_SET_EMBED_LEN(s, len);
- }
- else {
- s->as.heap.ptr = (char *)mrb_malloc(mrb, (size_t)len+1);
- s->as.heap.aux.capa = len;
- }
- if (p) {
- memcpy(RSTR_PTR(s), p, len);
- }
- RSTR_PTR(s)[len] = '\0';
- return;
+ else if (RSTR_NOFREE_P(s) || RSTR_FSHARED_P(s)) {
+ str_init_modifiable(mrb, s, s->as.heap.ptr, (size_t)s->as.heap.len);
}
}
+MRB_API void
+mrb_str_modify(mrb_state *mrb, struct RString *s)
+{
+ mrb_str_modify_keep_ascii(mrb, s);
+ RSTR_UNSET_ASCII_FLAG(s);
+}
+
MRB_API mrb_value
mrb_str_resize(mrb_state *mrb, mrb_value str, mrb_int len)
{
@@ -728,23 +858,11 @@ mrb_str_to_cstr(mrb_state *mrb, mrb_value str0)
{
struct RString *s;
- if (!mrb_string_p(str0)) {
- mrb_raise(mrb, E_TYPE_ERROR, "expected String");
- }
-
+ check_null_byte(mrb, str0);
s = str_new(mrb, RSTRING_PTR(str0), RSTRING_LEN(str0));
- if ((strlen(RSTR_PTR(s)) ^ RSTR_LEN(s)) != 0) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte");
- }
return RSTR_PTR(s);
}
-/*
- * call-seq: (Caution! String("abcd") change)
- * String("abcdefg") = String("abcd") + String("efg")
- *
- * Returns a new string object containing a copy of <i>str</i>.
- */
MRB_API void
mrb_str_concat(mrb_state *mrb, mrb_value self, mrb_value other)
{
@@ -752,12 +870,6 @@ mrb_str_concat(mrb_state *mrb, mrb_value self, mrb_value other)
mrb_str_cat_str(mrb, self, other);
}
-/*
- * call-seq: (Caution! String("abcd") remain)
- * String("abcdefg") = String("abcd") + String("efg")
- *
- * Returns a new string object containing a copy of <i>str</i>.
- */
MRB_API mrb_value
mrb_str_plus(mrb_state *mrb, mrb_value a, mrb_value b)
{
@@ -775,10 +887,13 @@ mrb_str_plus(mrb_state *mrb, mrb_value a, mrb_value b)
/* 15.2.10.5.2 */
/*
- * call-seq: (Caution! String("abcd") remain) for stack_argument
- * String("abcdefg") = String("abcd") + String("efg")
+ * call-seq:
+ * str + other_str -> new_str
*
- * Returns a new string object containing a copy of <i>str</i>.
+ * Concatenation---Returns a new <code>String</code> containing
+ * <i>other_str</i> concatenated to <i>str</i>.
+ *
+ * "Hello from " + self.to_s #=> "Hello from main"
*/
static mrb_value
mrb_str_plus_m(mrb_state *mrb, mrb_value self)
@@ -838,7 +953,7 @@ mrb_str_times(mrb_state *mrb, mrb_value self)
len = RSTRING_LEN(self)*times;
str2 = str_new(mrb, 0, len);
- str_with_class(mrb, str2, self);
+ str_with_class(str2, self);
p = RSTR_PTR(str2);
if (len > 0) {
n = RSTRING_LEN(self);
@@ -850,6 +965,7 @@ mrb_str_times(mrb_state *mrb, mrb_value self)
memcpy(p + n, p, len-n);
}
p[RSTR_LEN(str2)] = '\0';
+ RSTR_COPY_ASCII_FLAG(str2, mrb_str_ptr(self));
return mrb_obj_value(str2);
}
@@ -979,15 +1095,27 @@ mrb_str_equal_m(mrb_state *mrb, mrb_value str1)
return mrb_bool_value(mrb_str_equal(mrb, str1, str2));
}
/* ---------------------------------- */
+mrb_value mrb_mod_to_s(mrb_state *mrb, mrb_value klass);
+
MRB_API mrb_value
mrb_str_to_str(mrb_state *mrb, mrb_value str)
{
- if (!mrb_string_p(str)) {
+ switch (mrb_type(str)) {
+ case MRB_TT_STRING:
+ return str;
+ case MRB_TT_SYMBOL:
+ return mrb_sym2str(mrb, mrb_symbol(str));
+ case MRB_TT_FIXNUM:
+ return mrb_fixnum_to_str(mrb, str, 10);
+ case MRB_TT_CLASS:
+ case MRB_TT_MODULE:
+ return mrb_mod_to_s(mrb, str);
+ default:
return mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_s");
}
- return str;
}
+/* obslete: use RSTRING_PTR() */
MRB_API const char*
mrb_string_value_ptr(mrb_state *mrb, mrb_value str)
{
@@ -995,6 +1123,7 @@ mrb_string_value_ptr(mrb_state *mrb, mrb_value str)
return RSTRING_PTR(str);
}
+/* obslete: use RSTRING_LEN() */
MRB_API mrb_int
mrb_string_value_len(mrb_state *mrb, mrb_value ptr)
{
@@ -1002,76 +1131,101 @@ mrb_string_value_len(mrb_state *mrb, mrb_value ptr)
return RSTRING_LEN(ptr);
}
-void
-mrb_noregexp(mrb_state *mrb, mrb_value self)
-{
- mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp class not implemented");
-}
-
-void
-mrb_regexp_check(mrb_state *mrb, mrb_value obj)
-{
- if (mrb_regexp_p(mrb, obj)) {
- mrb_noregexp(mrb, obj);
- }
-}
-
MRB_API mrb_value
mrb_str_dup(mrb_state *mrb, mrb_value str)
{
struct RString *s = mrb_str_ptr(str);
struct RString *dup = str_new(mrb, 0, 0);
- str_with_class(mrb, dup, str);
+ str_with_class(dup, str);
return str_replace(mrb, dup, s);
}
-static mrb_value
-mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx)
-{
- mrb_int idx;
+enum str_convert_range {
+ /* `beg` and `len` are byte unit in `0 ... str.bytesize` */
+ STR_BYTE_RANGE_CORRECTED = 1,
- mrb_regexp_check(mrb, indx);
- switch (mrb_type(indx)) {
- case MRB_TT_FIXNUM:
- idx = mrb_fixnum(indx);
+ /* `beg` and `len` are char unit in any range */
+ STR_CHAR_RANGE = 2,
-num_index:
- str = str_substr(mrb, str, idx, 1);
- if (!mrb_nil_p(str) && RSTRING_LEN(str) == 0) return mrb_nil_value();
- return str;
+ /* `beg` and `len` are char unit in `0 ... str.size` */
+ STR_CHAR_RANGE_CORRECTED = 3,
- case MRB_TT_STRING:
- if (str_index_str(mrb, str, indx, 0) != -1)
- return mrb_str_dup(mrb, indx);
- return mrb_nil_value();
+ /* `beg` is out of range */
+ STR_OUT_OF_RANGE = -1
+};
- case MRB_TT_RANGE:
- goto range_arg;
+static enum str_convert_range
+str_convert_range(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen, mrb_int *beg, mrb_int *len)
+{
+ if (!mrb_undef_p(alen)) {
+ *beg = mrb_int(mrb, indx);
+ *len = mrb_int(mrb, alen);
+ return STR_CHAR_RANGE;
+ }
+ else {
+ switch (mrb_type(indx)) {
+ case MRB_TT_FIXNUM:
+ *beg = mrb_fixnum(indx);
+ *len = 1;
+ return STR_CHAR_RANGE;
- default:
- indx = mrb_Integer(mrb, indx);
- if (mrb_nil_p(indx)) {
- range_arg:
- {
- mrb_int beg, len;
-
- len = RSTRING_CHAR_LEN(str);
- switch (mrb_range_beg_len(mrb, indx, &beg, &len, len, TRUE)) {
- case 1:
- return str_subseq(mrb, str, beg, len);
- case 2:
- return mrb_nil_value();
+ case MRB_TT_STRING:
+ *beg = str_index_str(mrb, str, indx, 0);
+ if (*beg < 0) { break; }
+ *len = RSTRING_LEN(indx);
+ return STR_BYTE_RANGE_CORRECTED;
+
+ case MRB_TT_RANGE:
+ goto range_arg;
+
+ default:
+ indx = mrb_to_int(mrb, indx);
+ if (mrb_fixnum_p(indx)) {
+ *beg = mrb_fixnum(indx);
+ *len = 1;
+ return STR_CHAR_RANGE;
+ }
+range_arg:
+ *len = RSTRING_CHAR_LEN(str);
+ switch (mrb_range_beg_len(mrb, indx, beg, len, *len, TRUE)) {
+ case MRB_RANGE_OK:
+ return STR_CHAR_RANGE_CORRECTED;
+ case MRB_RANGE_OUT:
+ return STR_OUT_OF_RANGE;
default:
break;
- }
}
+
mrb_raise(mrb, E_TYPE_ERROR, "can't convert to Fixnum");
+ }
+ }
+ return STR_OUT_OF_RANGE;
+}
+
+static mrb_value
+mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen)
+{
+ mrb_int beg, len;
+
+ switch (str_convert_range(mrb, str, indx, alen, &beg, &len)) {
+ case STR_CHAR_RANGE_CORRECTED:
+ return str_subseq(mrb, str, beg, len);
+ case STR_CHAR_RANGE:
+ str = str_substr(mrb, str, beg, len);
+ if (mrb_undef_p(alen) && !mrb_nil_p(str) && RSTRING_LEN(str) == 0) return mrb_nil_value();
+ return str;
+ case STR_BYTE_RANGE_CORRECTED:
+ if (mrb_string_p(indx)) {
+ return mrb_str_dup(mrb, indx);
+ }
+ else {
+ return mrb_str_byte_subseq(mrb, str, beg, len);
}
- idx = mrb_fixnum(indx);
- goto num_index;
+ case STR_OUT_OF_RANGE:
+ default:
+ return mrb_nil_value();
}
- return mrb_nil_value(); /* not reached */
}
/* 15.2.10.5.6 */
@@ -1118,20 +1272,118 @@ static mrb_value
mrb_str_aref_m(mrb_state *mrb, mrb_value str)
{
mrb_value a1, a2;
- mrb_int argc;
- argc = mrb_get_args(mrb, "o|o", &a1, &a2);
- if (argc == 2) {
- mrb_int n1, n2;
+ if (mrb_get_args(mrb, "o|o", &a1, &a2) == 1) {
+ a2 = mrb_undef_value();
+ }
+
+ return mrb_str_aref(mrb, str, a1, a2);
+}
+
+static mrb_noreturn void
+str_out_of_index(mrb_state *mrb, mrb_value index)
+{
+ mrb_raisef(mrb, E_INDEX_ERROR, "index %v out of string", index);
+}
- mrb_regexp_check(mrb, a1);
- mrb_get_args(mrb, "ii", &n1, &n2);
- return str_substr(mrb, str, n1, n2);
+static mrb_value
+str_replace_partial(mrb_state *mrb, mrb_value src, mrb_int pos, mrb_int end, mrb_value rep)
+{
+ const mrb_int shrink_threshold = 256;
+ struct RString *str = mrb_str_ptr(src);
+ mrb_int len = RSTR_LEN(str);
+ mrb_int replen, newlen;
+ char *strp;
+
+ if (end > len) { end = len; }
+
+ if (pos < 0 || pos > len) {
+ str_out_of_index(mrb, mrb_fixnum_value(pos));
}
- if (argc != 1) {
- mrb_raisef(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%S for 1)", mrb_fixnum_value(argc));
+
+ replen = (mrb_nil_p(rep) ? 0 : RSTRING_LEN(rep));
+ newlen = replen + len - (end - pos);
+
+ if (newlen >= MRB_INT_MAX || newlen < replen /* overflowed */) {
+ mrb_raise(mrb, E_RUNTIME_ERROR, "string size too big");
+ }
+
+ mrb_str_modify(mrb, str);
+
+ if (len < newlen) {
+ resize_capa(mrb, str, newlen);
+ }
+
+ strp = RSTR_PTR(str);
+
+ memmove(strp + newlen - (len - end), strp + end, len - end);
+ if (!mrb_nil_p(rep)) {
+ memmove(strp + pos, RSTRING_PTR(rep), replen);
+ }
+ RSTR_SET_LEN(str, newlen);
+ strp[newlen] = '\0';
+
+ if (len - newlen >= shrink_threshold) {
+ resize_capa(mrb, str, newlen);
+ }
+
+ return src;
+}
+
+static void
+mrb_str_aset(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen, mrb_value replace)
+{
+ mrb_int beg, len, charlen;
+
+ mrb_to_str(mrb, replace);
+
+ switch (str_convert_range(mrb, str, indx, alen, &beg, &len)) {
+ case STR_OUT_OF_RANGE:
+ default:
+ mrb_raise(mrb, E_INDEX_ERROR, "string not matched");
+ case STR_CHAR_RANGE:
+ if (len < 0) {
+ mrb_raisef(mrb, E_INDEX_ERROR, "negative length %v", alen);
+ }
+ charlen = RSTRING_CHAR_LEN(str);
+ if (beg < 0) { beg += charlen; }
+ if (beg < 0 || beg > charlen) { str_out_of_index(mrb, indx); }
+ /* fall through */
+ case STR_CHAR_RANGE_CORRECTED:
+ str_range_to_bytes(str, &beg, &len);
+ /* fall through */
+ case STR_BYTE_RANGE_CORRECTED:
+ str_replace_partial(mrb, str, beg, beg + len, replace);
+ }
+}
+
+/*
+ * call-seq:
+ * str[fixnum] = replace
+ * str[fixnum, fixnum] = replace
+ * str[range] = replace
+ * str[regexp] = replace
+ * str[regexp, fixnum] = replace
+ * str[other_str] = replace
+ *
+ * Modify +self+ by replacing the content of +self+.
+ * The portion of the string affected is determined using the same criteria as +String#[]+.
+ */
+static mrb_value
+mrb_str_aset_m(mrb_state *mrb, mrb_value str)
+{
+ mrb_value indx, alen, replace;
+
+ switch (mrb_get_args(mrb, "oo|S!", &indx, &alen, &replace)) {
+ case 2:
+ replace = alen;
+ alen = mrb_undef_value();
+ break;
+ case 3:
+ break;
}
- return mrb_str_aref(mrb, str, a1);
+ mrb_str_aset(mrb, str, indx, alen, replace);
+ return str;
}
/* 15.2.10.5.8 */
@@ -1154,7 +1406,7 @@ mrb_str_capitalize_bang(mrb_state *mrb, mrb_value str)
mrb_bool modify = FALSE;
struct RString *s = mrb_str_ptr(str);
- mrb_str_modify(mrb, s);
+ mrb_str_modify_keep_ascii(mrb, s);
if (RSTR_LEN(s) == 0 || !RSTR_PTR(s)) return mrb_nil_value();
p = RSTR_PTR(s); pend = RSTR_PTR(s) + RSTR_LEN(s);
if (ISLOWER(*p)) {
@@ -1213,7 +1465,7 @@ mrb_str_chomp_bang(mrb_state *mrb, mrb_value str)
struct RString *s = mrb_str_ptr(str);
argc = mrb_get_args(mrb, "|S", &rs);
- mrb_str_modify(mrb, s);
+ mrb_str_modify_keep_ascii(mrb, s);
len = RSTR_LEN(s);
if (argc == 0) {
if (len == 0) return mrb_nil_value();
@@ -1312,7 +1564,7 @@ mrb_str_chop_bang(mrb_state *mrb, mrb_value str)
{
struct RString *s = mrb_str_ptr(str);
- mrb_str_modify(mrb, s);
+ mrb_str_modify_keep_ascii(mrb, s);
if (RSTR_LEN(s) > 0) {
mrb_int len;
#ifdef MRB_UTF8_STRING
@@ -1381,7 +1633,7 @@ mrb_str_downcase_bang(mrb_state *mrb, mrb_value str)
mrb_bool modify = FALSE;
struct RString *s = mrb_str_ptr(str);
- mrb_str_modify(mrb, s);
+ mrb_str_modify_keep_ascii(mrb, s);
p = RSTR_PTR(s);
pend = RSTR_PTR(s) + RSTR_LEN(s);
while (p < pend) {
@@ -1538,32 +1790,26 @@ mrb_str_include(mrb_state *mrb, mrb_value self)
static mrb_value
mrb_str_index_m(mrb_state *mrb, mrb_value str)
{
- mrb_value *argv;
- mrb_int argc;
mrb_value sub;
- mrb_int pos, clen;
+ mrb_int pos;
- mrb_get_args(mrb, "*!", &argv, &argc);
- if (argc == 2) {
- mrb_get_args(mrb, "oi", &sub, &pos);
- }
- else {
- pos = 0;
- if (argc > 0)
- sub = argv[0];
- else
+ switch (mrb_get_args(mrb, "|oi", &sub, &pos)) {
+ case 0:
sub = mrb_nil_value();
+ /* fall through */
+ case 1:
+ pos = 0;
+ break;
+ case 2:
+ if (pos < 0) {
+ mrb_int clen = RSTRING_CHAR_LEN(str);
+ pos += clen;
+ if (pos < 0) {
+ return mrb_nil_value();
+ }
+ }
+ break;
}
- mrb_regexp_check(mrb, sub);
- clen = RSTRING_CHAR_LEN(str);
- if (pos < 0) {
- pos += clen;
- if (pos < 0) {
- return mrb_nil_value();
- }
- }
- if (pos > clen) return mrb_nil_value();
- pos = chars2bytes(str, 0, pos);
switch (mrb_type(sub)) {
default: {
@@ -1571,24 +1817,21 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str)
tmp = mrb_check_string_type(mrb, sub);
if (mrb_nil_p(tmp)) {
- mrb_raisef(mrb, E_TYPE_ERROR, "type mismatch: %S given", sub);
+ mrb_raisef(mrb, E_TYPE_ERROR, "type mismatch: %v given", sub);
}
sub = tmp;
}
/* fall through */
case MRB_TT_STRING:
- pos = str_index_str(mrb, str, sub, pos);
+ pos = str_index_str_by_char(mrb, str, sub, pos);
break;
}
if (pos == -1) return mrb_nil_value();
- pos = bytes2chars(RSTRING_PTR(str), pos);
BYTES_ALIGN_CHECK(pos);
return mrb_fixnum_value(pos);
}
-#define STR_REPLACE_SHARED_MIN 10
-
/* 15.2.10.5.24 */
/* 15.2.10.5.28 */
/*
@@ -1698,6 +1941,18 @@ mrb_ptr_to_str(mrb_state *mrb, void *p)
return mrb_obj_value(p_str);
}
+static inline void
+str_reverse(char *p, char *e)
+{
+ char c;
+
+ while (p < e) {
+ c = *p;
+ *p++ = *e;
+ *e-- = c;
+ }
+}
+
/* 15.2.10.5.30 */
/*
* call-seq:
@@ -1708,53 +1963,38 @@ mrb_ptr_to_str(mrb_state *mrb, void *p)
static mrb_value
mrb_str_reverse_bang(mrb_state *mrb, mrb_value str)
{
+ struct RString *s = mrb_str_ptr(str);
+ char *p, *e;
+
#ifdef MRB_UTF8_STRING
mrb_int utf8_len = RSTRING_CHAR_LEN(str);
- mrb_int len = RSTRING_LEN(str);
-
- if (utf8_len == len) goto bytes;
- if (utf8_len > 1) {
- char *buf;
- char *p, *e, *r;
-
- mrb_str_modify(mrb, mrb_str_ptr(str));
- len = RSTRING_LEN(str);
- buf = (char*)mrb_malloc(mrb, (size_t)len);
- p = buf;
- e = buf + len;
-
- memcpy(buf, RSTRING_PTR(str), len);
- r = RSTRING_PTR(str) + len;
+ mrb_int len = RSTR_LEN(s);
+ if (utf8_len < 2) return str;
+ if (utf8_len < len) {
+ mrb_str_modify(mrb, s);
+ p = RSTR_PTR(s);
+ e = p + RSTR_LEN(s);
while (p<e) {
mrb_int clen = utf8len(p, e);
- r -= clen;
- memcpy(r, p, clen);
+ str_reverse(p, p + clen - 1);
p += clen;
}
- mrb_free(mrb, buf);
+ goto bytes;
}
- return str;
-
- bytes:
#endif
- {
- struct RString *s = mrb_str_ptr(str);
- char *p, *e;
- char c;
+ if (RSTR_LEN(s) > 1) {
mrb_str_modify(mrb, s);
- if (RSTR_LEN(s) > 1) {
- p = RSTR_PTR(s);
- e = p + RSTR_LEN(s) - 1;
- while (p < e) {
- c = *p;
- *p++ = *e;
- *e-- = c;
- }
- }
- return str;
+ goto bytes;
}
+ return str;
+
+ bytes:
+ p = RSTR_PTR(s);
+ e = p + RSTR_LEN(s) - 1;
+ str_reverse(p, e);
+ return str;
}
/* ---------------------------------- */
@@ -1797,32 +2037,27 @@ mrb_str_reverse(mrb_state *mrb, mrb_value str)
static mrb_value
mrb_str_rindex(mrb_state *mrb, mrb_value str)
{
- mrb_value *argv;
- mrb_int argc;
mrb_value sub;
mrb_int pos, len = RSTRING_CHAR_LEN(str);
- mrb_get_args(mrb, "*!", &argv, &argc);
- if (argc == 2) {
- mrb_get_args(mrb, "oi", &sub, &pos);
- if (pos < 0) {
- pos += len;
+ switch (mrb_get_args(mrb, "|oi", &sub, &pos)) {
+ case 0:
+ sub = mrb_nil_value();
+ /* fall through */
+ case 1:
+ pos = len;
+ break;
+ case 2:
if (pos < 0) {
- mrb_regexp_check(mrb, sub);
- return mrb_nil_value();
+ pos += len;
+ if (pos < 0) {
+ return mrb_nil_value();
+ }
}
- }
- if (pos > len) pos = len;
- }
- else {
- pos = len;
- if (argc > 0)
- sub = argv[0];
- else
- sub = mrb_nil_value();
+ if (pos > len) pos = len;
+ break;
}
pos = chars2bytes(str, 0, pos);
- mrb_regexp_check(mrb, sub);
switch (mrb_type(sub)) {
default: {
@@ -1830,7 +2065,7 @@ mrb_str_rindex(mrb_state *mrb, mrb_value str)
tmp = mrb_check_string_type(mrb, sub);
if (mrb_nil_p(tmp)) {
- mrb_raisef(mrb, E_TYPE_ERROR, "type mismatch: %S given", sub);
+ mrb_raisef(mrb, E_TYPE_ERROR, "type mismatch: %v given", sub);
}
sub = tmp;
}
@@ -1838,7 +2073,7 @@ mrb_str_rindex(mrb_state *mrb, mrb_value str)
case MRB_TT_STRING:
pos = str_rindex(mrb, str, sub, pos);
if (pos >= 0) {
- pos = bytes2chars(RSTRING_PTR(str), pos);
+ pos = bytes2chars(RSTRING_PTR(str), RSTRING_LEN(str), pos);
BYTES_ALIGN_CHECK(pos);
return mrb_fixnum_value(pos);
}
@@ -1916,16 +2151,11 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
if (argc == 0 || mrb_nil_p(spat)) {
split_type = awk;
}
- else {
- if (mrb_string_p(spat)) {
- split_type = string;
- if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' ') {
- split_type = awk;
- }
- }
- else {
- mrb_noregexp(mrb, str);
- }
+ else if (!mrb_string_p(spat)) {
+ mrb_raise(mrb, E_TYPE_ERROR, "expected String");
+ }
+ else if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' ') {
+ split_type = awk;
}
result = mrb_ary_new(mrb);
@@ -1951,7 +2181,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
}
}
else if (ISSPACE(c)) {
- mrb_ary_push(mrb, result, byte_subseq(mrb, str, beg, end-beg));
+ mrb_ary_push(mrb, result, mrb_str_byte_subseq(mrb, str, beg, end-beg));
mrb_gc_arena_restore(mrb, ai);
skip = TRUE;
beg = idx;
@@ -1962,7 +2192,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
}
}
}
- else if (split_type == string) {
+ else { /* split_type == string */
mrb_int str_len = RSTRING_LEN(str);
mrb_int pat_len = RSTRING_LEN(spat);
mrb_int idx = 0;
@@ -1976,22 +2206,19 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
else {
end = chars2bytes(str, idx, 1);
}
- mrb_ary_push(mrb, result, byte_subseq(mrb, str, idx, end));
+ mrb_ary_push(mrb, result, mrb_str_byte_subseq(mrb, str, idx, end));
mrb_gc_arena_restore(mrb, ai);
idx += end + pat_len;
if (lim_p && lim <= ++i) break;
}
beg = idx;
}
- else {
- mrb_noregexp(mrb, str);
- }
if (RSTRING_LEN(str) > 0 && (lim_p || RSTRING_LEN(str) > beg || lim < 0)) {
if (RSTRING_LEN(str) == beg) {
tmp = mrb_str_new_empty(mrb, str);
}
else {
- tmp = byte_subseq(mrb, str, beg, RSTRING_LEN(str)-beg);
+ tmp = mrb_str_byte_subseq(mrb, str, beg, RSTRING_LEN(str)-beg);
}
mrb_ary_push(mrb, result, tmp);
}
@@ -2005,7 +2232,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
return result;
}
-MRB_API mrb_value
+static mrb_value
mrb_str_len_to_inum(mrb_state *mrb, const char *str, mrb_int len, mrb_int base, int badcheck)
{
const char *p = str;
@@ -2089,7 +2316,7 @@ mrb_str_len_to_inum(mrb_state *mrb, const char *str, mrb_int len, mrb_int base,
break;
default:
if (base < 2 || 36 < base) {
- mrb_raisef(mrb, E_ARGUMENT_ERROR, "illegal radix %S", mrb_fixnum_value(base));
+ mrb_raisef(mrb, E_ARGUMENT_ERROR, "illegal radix %i", base);
}
break;
} /* end of switch (base) { */
@@ -2149,8 +2376,7 @@ mrb_str_len_to_inum(mrb_state *mrb, const char *str, mrb_int len, mrb_int base,
else
#endif
{
- mrb_raisef(mrb, E_ARGUMENT_ERROR, "string (%S) too big for integer",
- mrb_str_new(mrb, str, pend-str));
+ mrb_raisef(mrb, E_RANGE_ERROR, "string (%l) too big for integer", str, pend-str);
}
}
}
@@ -2166,35 +2392,49 @@ mrb_str_len_to_inum(mrb_state *mrb, const char *str, mrb_int len, mrb_int base,
mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte");
/* not reached */
bad:
- mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid string for number(%S)",
- mrb_inspect(mrb, mrb_str_new(mrb, str, pend-str)));
+ mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid string for number(%!l)", str, pend-str);
/* not reached */
return mrb_fixnum_value(0);
}
MRB_API mrb_value
-mrb_cstr_to_inum(mrb_state *mrb, const char *str, int base, int badcheck)
+mrb_cstr_to_inum(mrb_state *mrb, const char *str, mrb_int base, mrb_bool badcheck)
{
return mrb_str_len_to_inum(mrb, str, strlen(str), base, badcheck);
}
+/* obslete: use RSTRING_CSTR() or mrb_string_cstr() */
MRB_API const char*
mrb_string_value_cstr(mrb_state *mrb, mrb_value *ptr)
{
- mrb_value str = mrb_to_str(mrb, *ptr);
- struct RString *ps = mrb_str_ptr(str);
- mrb_int len = mrb_str_strlen(mrb, ps);
- char *p = RSTR_PTR(ps);
+ struct RString *ps;
+ const char *p;
+ mrb_int len;
- if (!p || p[len] != '\0') {
- if (MRB_FROZEN_P(ps)) {
- *ptr = str = mrb_str_dup(mrb, str);
- ps = mrb_str_ptr(str);
- }
+ check_null_byte(mrb, *ptr);
+ ps = mrb_str_ptr(*ptr);
+ p = RSTR_PTR(ps);
+ len = RSTR_LEN(ps);
+ if (p[len] == '\0') {
+ return p;
+ }
+ if (MRB_FROZEN_P(ps) || RSTR_CAPA(ps) == len) {
+ ps = str_new(mrb, NULL, len+1);
+ memcpy(RSTR_PTR(ps), p, len);
+ RSTR_SET_LEN(ps, len);
+ *ptr = mrb_obj_value(ps);
+ }
+ else {
mrb_str_modify(mrb, ps);
- return RSTR_PTR(ps);
}
- return p;
+ RSTR_PTR(ps)[len] = '\0';
+ return RSTR_PTR(ps);
+}
+
+MRB_API const char*
+mrb_string_cstr(mrb_state *mrb, mrb_value str)
+{
+ return mrb_string_value_cstr(mrb, &str);
}
MRB_API mrb_value
@@ -2203,7 +2443,8 @@ mrb_str_to_inum(mrb_state *mrb, mrb_value str, mrb_int base, mrb_bool badcheck)
const char *s;
mrb_int len;
- s = mrb_string_value_ptr(mrb, str);
+ mrb_to_str(mrb, str);
+ s = RSTRING_PTR(str);
len = RSTRING_LEN(str);
return mrb_str_len_to_inum(mrb, s, len, base, badcheck);
}
@@ -2236,7 +2477,7 @@ mrb_str_to_i(mrb_state *mrb, mrb_value self)
mrb_get_args(mrb, "|i", &base);
if (base < 0) {
- mrb_raisef(mrb, E_ARGUMENT_ERROR, "illegal radix %S", mrb_fixnum_value(base));
+ mrb_raisef(mrb, E_ARGUMENT_ERROR, "illegal radix %i", base);
}
return mrb_str_to_inum(mrb, self, base, FALSE);
}
@@ -2261,7 +2502,7 @@ mrb_cstr_to_dbl(mrb_state *mrb, const char * p, mrb_bool badcheck)
if (p == end) {
if (badcheck) {
bad:
- mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid string for float(%S)", mrb_str_new_cstr(mrb, p));
+ mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid string for float(%s)", p);
/* not reached */
}
return d;
@@ -2308,22 +2549,7 @@ bad:
MRB_API double
mrb_str_to_dbl(mrb_state *mrb, mrb_value str, mrb_bool badcheck)
{
- char *s;
- mrb_int len;
-
- mrb_to_str(mrb, str);
- s = RSTRING_PTR(str);
- len = RSTRING_LEN(str);
- if (s) {
- if (badcheck && memchr(s, '\0', len)) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "string for Float contains null byte");
- }
- if (s[len]) { /* no sentinel somehow */
- struct RString *temp_str = str_new(mrb, s, len);
- s = RSTR_PTR(temp_str);
- }
- }
- return mrb_cstr_to_dbl(mrb, s, badcheck);
+ return mrb_cstr_to_dbl(mrb, RSTRING_CSTR(mrb, str), badcheck);
}
/* 15.2.10.5.39 */
@@ -2378,7 +2604,7 @@ mrb_str_upcase_bang(mrb_state *mrb, mrb_value str)
char *p, *pend;
mrb_bool modify = FALSE;
- mrb_str_modify(mrb, s);
+ mrb_str_modify_keep_ascii(mrb, s);
p = RSTRING_PTR(str);
pend = RSTRING_END(str);
while (p < pend) {
@@ -2459,7 +2685,7 @@ mrb_str_dump(mrb_state *mrb, mrb_value str)
}
result = str_new(mrb, 0, len);
- str_with_class(mrb, result, str);
+ str_with_class(result, str);
p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
q = RSTR_PTR(result);
*q++ = '"';
@@ -2621,6 +2847,9 @@ mrb_str_inspect(mrb_state *mrb, mrb_value str)
const char *p, *pend;
char buf[CHAR_ESC_LEN + 1];
mrb_value result = mrb_str_new_lit(mrb, "\"");
+#ifdef MRB_UTF8_STRING
+ uint32_t ascii_flag = MRB_STR_ASCII;
+#endif
p = RSTRING_PTR(str); pend = RSTRING_END(str);
for (;p < pend; p++) {
@@ -2637,6 +2866,7 @@ mrb_str_inspect(mrb_state *mrb, mrb_value str)
}
mrb_str_cat(mrb, result, buf, clen);
p += clen-1;
+ ascii_flag = 0;
continue;
}
#endif
@@ -2678,6 +2908,10 @@ mrb_str_inspect(mrb_state *mrb, mrb_value str)
}
}
mrb_str_cat_lit(mrb, result, "\"");
+#ifdef MRB_UTF8_STRING
+ mrb_str_ptr(str)->flags |= ascii_flag;
+ mrb_str_ptr(result)->flags |= ascii_flag;
+#endif
return result;
}
@@ -2711,7 +2945,8 @@ mrb_init_string(mrb_state *mrb)
{
struct RClass *s;
- mrb_static_assert(RSTRING_EMBED_LEN_MAX < (1 << 5), "pointer size too big for embedded string");
+ mrb_static_assert(RSTRING_EMBED_LEN_MAX < (1 << MRB_STR_EMBED_LEN_BITSIZE),
+ "pointer size too big for embedded string");
mrb->string_class = s = mrb_define_class(mrb, "String", mrb->object_class); /* 15.2.10 */
MRB_SET_INSTANCE_TT(s, MRB_TT_STRING);
@@ -2723,6 +2958,7 @@ mrb_init_string(mrb_state *mrb)
mrb_define_method(mrb, s, "+", mrb_str_plus_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.4 */
mrb_define_method(mrb, s, "*", mrb_str_times, MRB_ARGS_REQ(1)); /* 15.2.10.5.5 */
mrb_define_method(mrb, s, "[]", mrb_str_aref_m, MRB_ARGS_ANY()); /* 15.2.10.5.6 */
+ mrb_define_method(mrb, s, "[]=", mrb_str_aset_m, MRB_ARGS_ANY());
mrb_define_method(mrb, s, "capitalize", mrb_str_capitalize, MRB_ARGS_NONE()); /* 15.2.10.5.7 */
mrb_define_method(mrb, s, "capitalize!", mrb_str_capitalize_bang, MRB_ARGS_NONE()); /* 15.2.10.5.8 */
mrb_define_method(mrb, s, "chomp", mrb_str_chomp, MRB_ARGS_ANY()); /* 15.2.10.5.9 */
@@ -2754,6 +2990,7 @@ mrb_init_string(mrb_state *mrb)
#endif
mrb_define_method(mrb, s, "to_i", mrb_str_to_i, MRB_ARGS_ANY()); /* 15.2.10.5.39 */
mrb_define_method(mrb, s, "to_s", mrb_str_to_s, MRB_ARGS_NONE()); /* 15.2.10.5.40 */
+ mrb_define_method(mrb, s, "to_str", mrb_str_to_s, MRB_ARGS_NONE());
mrb_define_method(mrb, s, "to_sym", mrb_str_intern, MRB_ARGS_NONE()); /* 15.2.10.5.41 */
mrb_define_method(mrb, s, "upcase", mrb_str_upcase, MRB_ARGS_NONE()); /* 15.2.10.5.42 */
mrb_define_method(mrb, s, "upcase!", mrb_str_upcase_bang, MRB_ARGS_NONE()); /* 15.2.10.5.43 */
@@ -2842,7 +3079,7 @@ mrb_float_read(const char *string, char **endPtr)
*/
p = string;
- while (isspace(*p)) {
+ while (ISSPACE(*p)) {
p += 1;
}
if (*p == '-') {
@@ -2865,7 +3102,7 @@ mrb_float_read(const char *string, char **endPtr)
for (mantSize = 0; ; mantSize += 1)
{
c = *p;
- if (!isdigit(c)) {
+ if (!ISDIGIT(c)) {
if ((c != '.') || (decPt >= 0)) {
break;
}
@@ -2950,7 +3187,7 @@ mrb_float_read(const char *string, char **endPtr)
}
expSign = FALSE;
}
- while (isdigit(*p)) {
+ while (ISDIGIT(*p)) {
exp = exp * 10 + (*p - '0');
if (exp > 19999) {
exp = 19999;