summaryrefslogtreecommitdiffhomepage
path: root/src/string.c
diff options
context:
space:
mode:
authorPaolo Bosetti <[email protected]>2012-08-06 15:02:03 +0200
committerPaolo Bosetti <[email protected]>2012-08-06 15:02:56 +0200
commitaa0d2f91447c49363059f2e95cb9023f65a6fbef (patch)
tree2cfa325956e62648f2161564adfdf6dddc45b737 /src/string.c
parentfd097b8aff7b91bd105fc1daec5a4050a947b763 (diff)
parent193c98ae540d43d082795fd77ea81a4f6f7fd0f6 (diff)
downloadmruby-aa0d2f91447c49363059f2e95cb9023f65a6fbef.tar.gz
mruby-aa0d2f91447c49363059f2e95cb9023f65a6fbef.zip
Updated Xcode project build settings in conformity with 10.8/Xcode 4.4
Diffstat (limited to 'src/string.c')
-rw-r--r--src/string.c512
1 files changed, 262 insertions, 250 deletions
diff --git a/src/string.c b/src/string.c
index 26ed7b052..aef0ac88f 100644
--- a/src/string.c
+++ b/src/string.c
@@ -10,56 +10,65 @@
#include <string.h>
#include "mruby/string.h"
#include <ctype.h>
-#include "mruby/numeric.h"
+#include <limits.h>
#include "mruby/range.h"
#include "mruby/array.h"
#include "mruby/class.h"
-#include "mruby/variable.h"
#include <stdio.h>
+#ifdef ENABLE_REGEXP
#include "re.h"
-#ifdef INCLUDE_REGEXP
#include "regex.h"
-#include "st.h"
-#endif //INCLUDE_REGEXP
-
-#ifndef FALSE
-#define FALSE 0
-#endif
-
-#ifndef TRUE
-#define TRUE 1
-#endif
+#endif //ENABLE_REGEXP
const char mrb_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz";
-#ifdef INCLUDE_REGEXP
+#ifdef ENABLE_REGEXP
static mrb_value get_pat(mrb_state *mrb, mrb_value pat, mrb_int quote);
-#endif //INCLUDE_REGEXP
+#endif //ENABLE_REGEXP
static mrb_value str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2);
-static mrb_value mrb_str_subseq(mrb_state *mrb, mrb_value str, long beg, long len);
+static mrb_value mrb_str_subseq(mrb_state *mrb, mrb_value str, int beg, int len);
#define RESIZE_CAPA(s,capacity) do {\
- s->buf = mrb_realloc(mrb, s->buf, (capacity)+1);\
+ s->ptr = (char *)mrb_realloc(mrb, s->ptr, (capacity)+1);\
s->aux.capa = capacity;\
} while (0)
+void
+mrb_str_decref(mrb_state *mrb, struct mrb_shared_string *shared)
+{
+ shared->refcnt--;
+ if (shared->refcnt == 0) {
+ mrb_free(mrb, shared->ptr);
+ mrb_free(mrb, shared);
+ }
+}
+
static void
str_modify(mrb_state *mrb, struct RString *s)
{
if (s->flags & MRB_STR_SHARED) {
- char *ptr, *p;
- long len;
+ struct mrb_shared_string *shared = s->aux.shared;
- p = s->buf;
- len = s->len;
- ptr = mrb_malloc(mrb, sizeof(char)*(len+1));
- if (p) {
- memcpy(ptr, p, len);
+ if (shared->refcnt == 1 && s->ptr == shared->ptr) {
+ s->ptr = shared->ptr;
+ s->aux.capa = shared->len;
+ mrb_free(mrb, shared);
+ }
+ else {
+ char *ptr, *p;
+ long len;
+
+ p = s->ptr;
+ len = s->len;
+ ptr = (char *)mrb_malloc(mrb, len+1);
+ if (p) {
+ memcpy(ptr, p, len);
+ }
+ ptr[len] = 0;
+ s->ptr = ptr;
+ s->aux.capa = len;
+ mrb_str_decref(mrb, shared);
}
- ptr[len] = 0;
- s->buf = ptr;
- s->len = len;
- s->aux.capa = len;
s->flags &= ~MRB_STR_SHARED;
}
}
@@ -74,13 +83,11 @@ mrb_str_resize(mrb_state *mrb, mrb_value str, int len)
slen = s->len;
if (len != slen) {
if (slen < len || slen -len > 1024) {
- s->buf = mrb_realloc(mrb, s->buf, len+1);
- }
- if (!(s->flags & MRB_STR_SHARED)) {
- s->aux.capa = len;
+ s->ptr = (char *)mrb_realloc(mrb, s->ptr, len+1);
}
+ s->aux.capa = len;
s->len = len;
- s->buf[len] = '\0'; /* sentinel */
+ s->ptr[len] = '\0'; /* sentinel */
}
return str;
}
@@ -90,8 +97,8 @@ str_mod_check(mrb_state *mrb, mrb_value str, char *p, mrb_int len)
{
struct RString *s = mrb_str_ptr(str);
- if (s->buf != p || s->len != len) {
- mrb_raise(mrb, mrb->eRuntimeError_class, "string modified");
+ if (s->ptr != p || s->len != len) {
+ mrb_raise(mrb, E_RUNTIME_ERROR, "string modified");
}
}
@@ -105,7 +112,7 @@ str_alloc(mrb_state *mrb, struct RClass *c)
s = mrb_obj_alloc_string(mrb);
s->c = c;
- s->buf = 0;
+ s->ptr = 0;
s->len = 0;
s->aux.capa = 0;
@@ -126,11 +133,11 @@ str_new(mrb_state *mrb, const char *p, int len)
s->len = len;
s->aux.capa = len;
- s->buf = mrb_malloc(mrb, len+1);
+ s->ptr = (char *)mrb_malloc(mrb, len+1);
if (p) {
- memcpy(s->buf, p, len);
+ memcpy(s->ptr, p, len);
}
- s->buf[len] = '\0';
+ s->ptr[len] = '\0';
return s;
}
@@ -161,8 +168,8 @@ mrb_str_buf_new(mrb_state *mrb, int capa)
}
s->len = 0;
s->aux.capa = capa;
- s->buf = mrb_malloc(mrb, capa+1);
- s->buf[0] = '\0';
+ s->ptr = (char *)mrb_malloc(mrb, capa+1);
+ s->ptr[0] = '\0';
return mrb_obj_value(s);
}
@@ -173,18 +180,18 @@ str_buf_cat(mrb_state *mrb, struct RString *s, const char *ptr, int len)
long capa, total, off = -1;
str_modify(mrb, s);
- if (ptr >= s->buf && ptr <= s->buf + s->len) {
- off = ptr - s->buf;
+ if (ptr >= s->ptr && ptr <= s->ptr + s->len) {
+ off = ptr - s->ptr;
}
if (len == 0) return;
capa = s->aux.capa;
- if (s->len >= LONG_MAX - len) {
+ if (s->len >= INT_MAX - len) {
mrb_raise(mrb, E_ARGUMENT_ERROR, "string sizes too big");
}
total = s->len+len;
if (capa <= total) {
while (total > capa) {
- if (capa + 1 >= LONG_MAX / 2) {
+ if (capa + 1 >= INT_MAX / 2) {
capa = (total + 4095) / 4096;
break;
}
@@ -193,11 +200,11 @@ str_buf_cat(mrb_state *mrb, struct RString *s, const char *ptr, int len)
RESIZE_CAPA(s, capa);
}
if (off != -1) {
- ptr = s->buf + off;
+ ptr = s->ptr + off;
}
- memcpy(s->buf + s->len, ptr, len);
+ memcpy(s->ptr + s->len, ptr, len);
s->len = total;
- s->buf[total] = '\0'; /* sentinel */
+ s->ptr[total] = '\0'; /* sentinel */
}
mrb_value
@@ -211,7 +218,9 @@ mrb_str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, int len)
mrb_value
mrb_str_new(mrb_state *mrb, const char *p, int len)
{
- struct RString *s = str_new(mrb, p, len);
+ struct RString *s;
+
+ s = str_new(mrb, p, len);
return mrb_obj_value(s);
}
@@ -240,39 +249,32 @@ mrb_str_new_cstr(mrb_state *mrb, const char *p)
int len = strlen(p);
s = mrb_obj_alloc_string(mrb);
- s->buf = mrb_malloc(mrb, len+1);
- memcpy(s->buf, p, len);
- s->buf[len] = 0;
+ s->ptr = (char *)mrb_malloc(mrb, len+1);
+ memcpy(s->ptr, p, len);
+ s->ptr[len] = 0;
s->len = len;
s->aux.capa = len;
return mrb_obj_value(s);
}
-static struct RString*
-str_make_shared(mrb_state *mrb, mrb_value str)
+static void
+str_make_shared(mrb_state *mrb, struct RString *s)
{
- struct RString *orig, *s;
+ if (!(s->flags & MRB_STR_SHARED)) {
+ struct mrb_shared_string *shared = (struct mrb_shared_string *)mrb_malloc(mrb, sizeof(struct mrb_shared_string));
- s = str_new(mrb, 0, 0);
- str_with_class(mrb, s, str);
- orig = mrb_str_ptr(str);
- if (!(orig->flags & MRB_STR_SHARED)) {
- struct RString *shared = mrb_obj_alloc_string(mrb);
-
- shared->buf = orig->buf;
- shared->len = orig->len;
- shared->aux.capa = orig->aux.capa;
-
- orig->aux.shared = shared;
- orig->flags |= MRB_STR_SHARED;
+ shared->refcnt = 1;
+ if (s->aux.capa > s->len) {
+ s->ptr = shared->ptr = (char *)mrb_realloc(mrb, s->ptr, s->len+1);
+ }
+ else {
+ shared->ptr = s->ptr;
+ }
+ shared->len = s->len;
+ s->aux.shared = shared;
+ s->flags |= MRB_STR_SHARED;
}
- s->buf = orig->buf;
- s->len = orig->len;
- s->aux.shared = orig->aux.shared;
- s->flags |= MRB_STR_SHARED;
-
- return s;
}
/*
@@ -285,16 +287,19 @@ str_make_shared(mrb_state *mrb, mrb_value str)
mrb_value
mrb_str_literal(mrb_state *mrb, mrb_value str)
{
- struct RString *orig, *s;
+ struct RString *s, *orig;
+ struct mrb_shared_string *shared;
- s = str_new(mrb, 0, 0);
+ s = str_alloc(mrb, mrb->string_class);
orig = mrb_str_ptr(str);
- while (orig->flags & MRB_STR_SHARED) {
- orig = orig->aux.shared;
+ if (!(orig->flags & MRB_STR_SHARED)) {
+ str_make_shared(mrb, mrb_str_ptr(str));
}
- s->buf = orig->buf;
- s->len = orig->len;
- s->aux.shared = orig;
+ shared = orig->aux.shared;
+ shared->refcnt++;
+ s->ptr = shared->ptr;
+ s->len = shared->len;
+ s->aux.shared = shared;
s->flags |= MRB_STR_SHARED;
return mrb_obj_value(s);
@@ -312,7 +317,7 @@ mrb_str_body(mrb_value str, int *len_p)
struct RString *s = mrb_str_ptr(str);
*len_p = s->len;
- return s->buf;
+ return s->ptr;
}
/*
@@ -336,11 +341,11 @@ mrb_str_concat(mrb_state *mrb, mrb_value self, mrb_value other)
if (s1->aux.capa < len) {
s1->aux.capa = len;
- s1->buf = mrb_realloc(mrb, s1->buf, len+1);
+ s1->ptr = (char *)mrb_realloc(mrb, s1->ptr, len+1);
}
- memcpy(s1->buf+s1->len, s2->buf, s2->len);
+ memcpy(s1->ptr+s1->len, s2->ptr, s2->len);
s1->len = len;
- s1->buf[len] = 0;
+ s1->ptr[len] = 0;
}
/*
@@ -357,8 +362,8 @@ mrb_str_plus(mrb_state *mrb, mrb_value a, mrb_value b)
struct RString *t;
t = str_new(mrb, 0, s->len + s2->len);
- memcpy(t->buf, s->buf, s->len);
- memcpy(t->buf + s->len, s2->buf, s2->len);
+ memcpy(t->ptr, s->ptr, s->len);
+ memcpy(t->ptr + s->len, s2->ptr, s2->len);
return mrb_obj_value(t);
}
@@ -426,14 +431,14 @@ mrb_str_times(mrb_state *mrb, mrb_value self)
if (times < 0) {
mrb_raise(mrb, E_ARGUMENT_ERROR, "negative argument");
}
- if (times && INT32_MAX/times < RSTRING_LEN(self)) {
+ if (times && INT_MAX/times < RSTRING_LEN(self)) {
mrb_raise(mrb, E_ARGUMENT_ERROR, "argument too big");
}
len = RSTRING_LEN(self)*times;
str2 = str_new(mrb, 0, len);
str_with_class(mrb, str2, self);
- p = str2->buf;
+ p = str2->ptr;
if (len > 0) {
n = RSTRING_LEN(self);
memcpy(p, RSTRING_PTR(self), n);
@@ -468,7 +473,7 @@ mrb_str_cmp(mrb_state *mrb, mrb_value str1, mrb_value str2)
struct RString *s2 = mrb_str_ptr(str2);
len = lesser(s1->len, s2->len);
- retval = memcmp(s1->buf, s2->buf, len);
+ retval = memcmp(s1->ptr, s2->ptr, len);
if (retval == 0) {
if (s1->len == s2->len) return 0;
if (s1->len > s2->len) return 1;
@@ -659,7 +664,7 @@ mrb_memsearch_qs(const unsigned char *xs, long m, const unsigned char *ys, long
static int
mrb_memsearch(const void *x0, int m, const void *y0, int n)
{
- const unsigned char *x = x0, *y = y0;
+ const unsigned char *x = (const unsigned char *)x0, *y = (const unsigned char *)y0;
if (m > n) return -1;
else if (m == n) {
@@ -676,7 +681,7 @@ mrb_memsearch(const void *x0, int m, const void *y0, int n)
}
return -1;
}
- return mrb_memsearch_qs(x0, m, y0, n);
+ return mrb_memsearch_qs((const unsigned char *)x0, m, (const unsigned char *)y0, n);
}
static mrb_int
@@ -712,7 +717,7 @@ mrb_str_dup(mrb_state *mrb, mrb_value str)
/* should return shared string */
struct RString *s = mrb_str_ptr(str);
- return mrb_str_new(mrb, s->buf, s->len);
+ return mrb_str_new(mrb, s->ptr, s->len);
}
static mrb_value
@@ -730,12 +735,12 @@ num_index:
return str;
case MRB_TT_REGEX:
-#ifdef INCLUDE_REGEXP
+#ifdef ENABLE_REGEXP
return mrb_str_subpat(mrb, str, indx, 0); //mrb_str_subpat(str, indx, INT2FIX(0));
#else
mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported");
return mrb_nil_value();
-#endif //INCLUDE_REGEXP
+#endif //ENABLE_REGEXP
case MRB_TT_STRING:
if (mrb_str_index(mrb, str, indx, 0) != -1)
@@ -750,7 +755,7 @@ num_index:
len = RSTRING_LEN(str);
switch (mrb_range_beg_len(mrb, indx, &beg, &len, len, 0)) {
- case 0/*FLASE*/:
+ case FALSE:
break;
case 2/*OTHER*/:
return mrb_nil_value();
@@ -822,12 +827,12 @@ mrb_str_aref_m(mrb_state *mrb, mrb_value str)
argc = mrb_get_args(mrb, "o|o", &a1, &a2);
if (argc == 2) {
if (mrb_type(a1) == MRB_TT_REGEX) {
-#ifdef INCLUDE_REGEXP
+#ifdef ENABLE_REGEXP
return mrb_str_subpat(mrb, str, argv[0], mrb_fixnum(argv[1]));
#else
mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported");
return mrb_nil_value();
-#endif //INCLUDE_REGEXP
+#endif //ENABLE_REGEXP
}
return mrb_str_substr(mrb, str, mrb_fixnum(a1), mrb_fixnum(a2));
}
@@ -858,8 +863,8 @@ mrb_str_capitalize_bang(mrb_state *mrb, mrb_value str)
struct RString *s = mrb_str_ptr(str);
str_modify(mrb, s);
- if (s->len == 0 || !s->buf) return mrb_nil_value();
- p = s->buf; pend = s->buf + s->len;
+ if (s->len == 0 || !s->ptr) return mrb_nil_value();
+ p = s->ptr; pend = s->ptr + s->len;
if (ISLOWER(*p)) {
*p = toupper(*p);
modify = 1;
@@ -918,14 +923,14 @@ mrb_str_chomp_bang(mrb_state *mrb, mrb_value str)
if (mrb_get_args(mrb, "|S", &rs) == 0) {
if (len == 0) return mrb_nil_value();
smart_chomp:
- if (s->buf[len-1] == '\n') {
+ if (s->ptr[len-1] == '\n') {
s->len--;
if (s->len > 0 &&
- s->buf[s->len-1] == '\r') {
+ s->ptr[s->len-1] == '\r') {
s->len--;
}
}
- else if (s->buf[len-1] == '\r') {
+ else if (s->ptr[len-1] == '\r') {
s->len--;
}
else {
@@ -935,7 +940,7 @@ mrb_str_chomp_bang(mrb_state *mrb, mrb_value str)
}
if (len == 0 || mrb_nil_p(rs)) return mrb_nil_value();
- p = s->buf;
+ p = s->ptr;
rslen = RSTRING_LEN(rs);
if (rslen == 0) {
while (len>0 && p[len-1] == '\n') {
@@ -1015,14 +1020,14 @@ mrb_str_chop_bang(mrb_state *mrb, mrb_value str)
if (s->len > 0) {
int len;
len = s->len - 1;
- if (s->buf[len] == '\n') {
+ if (s->ptr[len] == '\n') {
if (len > 0 &&
- s->buf[len-1] == '\r') {
+ s->ptr[len-1] == '\r') {
len--;
}
}
s->len = len;
- s->buf[len] = '\0';
+ s->ptr[len] = '\0';
return str;
}
return mrb_nil_value();
@@ -1070,8 +1075,8 @@ mrb_str_downcase_bang(mrb_state *mrb, mrb_value str)
struct RString *s = mrb_str_ptr(str);
str_modify(mrb, s);
- p = s->buf;
- pend = s->buf + s->len;
+ p = s->ptr;
+ pend = s->ptr + s->len;
while (p < pend) {
if (ISUPPER(*p)) {
*p = tolower(*p);
@@ -1184,13 +1189,20 @@ mrb_str_eql(mrb_state *mrb, mrb_value self)
}
static mrb_value
-mrb_str_subseq(mrb_state *mrb, mrb_value str, long beg, long len)
+mrb_str_subseq(mrb_state *mrb, mrb_value str, int beg, int len)
{
- struct RString *s;
+ struct RString *orig, *s;
+ struct mrb_shared_string *shared;
- s = str_make_shared(mrb, str);
- s->buf += beg;
+ orig = mrb_str_ptr(str);
+ str_make_shared(mrb, orig);
+ shared = orig->aux.shared;
+ s = mrb_obj_alloc_string(mrb);
+ s->ptr = orig->ptr + beg;
s->len = len;
+ s->aux.shared = shared;
+ s->flags |= MRB_STR_SHARED;
+ shared->refcnt++;
return mrb_obj_value(s);
}
@@ -1226,7 +1238,7 @@ mrb_str_buf_append(mrb_state *mrb, mrb_value str, mrb_value str2)
return str;
}
-#ifdef INCLUDE_REGEXP
+#ifdef ENABLE_REGEXP
static mrb_value
str_gsub(mrb_state *mrb, mrb_value str, mrb_int bang)
{
@@ -1342,7 +1354,6 @@ str_gsub(mrb_state *mrb, mrb_value str, mrb_int bang)
static mrb_value
mrb_str_gsub(mrb_state *mrb, mrb_value self)
{
- //return str_gsub(argc, argv, self, 0);
return str_gsub(mrb, self, 0);
}
@@ -1363,7 +1374,7 @@ mrb_str_gsub_bang(mrb_state *mrb, mrb_value self)
str_modify(mrb, s);
return str_gsub(mrb, s, 1);
}
-#endif //INCLUDE_REGEXP
+#endif //ENABLE_REGEXP
mrb_int
mrb_str_hash(mrb_state *mrb, mrb_value str)
@@ -1371,7 +1382,7 @@ mrb_str_hash(mrb_state *mrb, mrb_value str)
/* 1-8-7 */
struct RString *s = mrb_str_ptr(str);
long len = s->len;
- char *p = s->buf;
+ char *p = s->ptr;
mrb_int key = 0;
while (len--) {
@@ -1457,10 +1468,8 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str)
int argc;
mrb_value sub;
- //mrb_value initpos;
mrb_int pos;
- //if (mrb_scan_args(argc, argv, "11", &sub, &initpos) == 2) {
mrb_get_args(mrb, "*", &argv, &argc);
if (argc == 2) {
pos = mrb_fixnum(argv[1]);
@@ -1486,7 +1495,7 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str)
switch (mrb_type(sub)) {
case MRB_TT_REGEX:
-#ifdef INCLUDE_REGEXP
+#ifdef ENABLE_REGEXP
if (pos > RSTRING_LEN(str))
return mrb_nil_value();
pos = mrb_str_offset(mrb, str, pos);
@@ -1494,7 +1503,7 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str)
pos = mrb_str_sublen(mrb, str, pos);
#else
mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported");
-#endif //INCLUDE_REGEXP
+#endif //ENABLE_REGEXP
break;
case MRB_TT_FIXNUM: {
@@ -1528,16 +1537,43 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str)
return mrb_fixnum_value(pos);
}
+#define STR_REPLACE_SHARED_MIN 10
+
static mrb_value
str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2)
{
- int len = s2->len;
-
- str_modify(mrb, s1);
- s1->buf = mrb_realloc(mrb, s1->buf, len);
- memcpy(s1->buf, s2->buf, len);
- s1->len = s2->len;
- s2->aux.capa = s2->len;
+ if (s2->flags & MRB_STR_SHARED) {
+ L_SHARE:
+ if (s1->flags & MRB_STR_SHARED){
+ mrb_str_decref(mrb, s1->aux.shared);
+ }
+ else {
+ mrb_free(mrb, s1->ptr);
+ }
+ s1->ptr = s2->ptr;
+ s1->len = s2->len;
+ s1->aux.shared = s2->aux.shared;
+ s1->flags |= MRB_STR_SHARED;
+ s1->aux.shared->refcnt++;
+ }
+ else if (s2->len > STR_REPLACE_SHARED_MIN) {
+ str_make_shared(mrb, s2);
+ goto L_SHARE;
+ }
+ else {
+ if (s1->flags & MRB_STR_SHARED) {
+ mrb_str_decref(mrb, s1->aux.shared);
+ s1->flags &= ~MRB_STR_SHARED;
+ s1->ptr = (char *)mrb_malloc(mrb, s2->len+1);
+ }
+ else {
+ s1->ptr = (char *)mrb_realloc(mrb, s1->ptr, s2->len+1);
+ }
+ memcpy(s1->ptr, s2->ptr, s2->len);
+ s1->ptr[s2->len] = 0;
+ s1->len = s2->len;
+ s1->aux.capa = s2->len;
+ }
return mrb_obj_value(s1);
}
@@ -1577,12 +1613,6 @@ mrb_str_init(mrb_state *mrb, mrb_value self)
return self;
}
-mrb_sym
-mrb_intern_str(mrb_state *mrb, mrb_value str)
-{
- return mrb_intern(mrb, RSTRING_PTR(str));
-}
-
/* 15.2.10.5.25 */
/* 15.2.10.5.41 */
/*
@@ -1635,7 +1665,7 @@ mrb_check_string_type(mrb_state *mrb, mrb_value str)
return mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str");
}
-#ifdef INCLUDE_REGEXP
+#ifdef ENABLE_REGEXP
static mrb_value
get_pat(mrb_state *mrb, mrb_value pat, mrb_int quote)
{
@@ -1663,7 +1693,7 @@ get_pat(mrb_state *mrb, mrb_value pat, mrb_int quote)
return mrb_reg_regcomp(mrb, pat);
}
-#endif //INCLUDE_REGEXP
+#endif //ENABLE_REGEXP
/* 15.2.10.5.27 */
/*
@@ -1678,7 +1708,7 @@ get_pat(mrb_state *mrb, mrb_value pat, mrb_int quote)
* 'hello'.match(/(.)\1/)[0] #=> "ll"
* 'hello'.match('xx') #=> nil
*/
-#ifdef INCLUDE_REGEXP
+#ifdef ENABLE_REGEXP
static mrb_value
mrb_str_match_m(mrb_state *mrb, mrb_value self)
{
@@ -1698,7 +1728,7 @@ mrb_str_match_m(mrb_state *mrb, mrb_value self)
}
return result;
}
-#endif //INCLUDE_REGEXP
+#endif //ENABLE_REGEXP
/* ---------------------------------- */
/* 15.2.10.5.29 */
@@ -1721,7 +1751,7 @@ mrb_str_reverse(mrb_state *mrb, mrb_value str)
s2 = str_new(mrb, 0, RSTRING(str)->len);
str_with_class(mrb, s2, str);
s = RSTRING_PTR(str); e = RSTRING_END(str) - 1;
- p = s2->buf;
+ p = s2->ptr;
while (e >= s) {
*p++ = *e--;
@@ -1745,7 +1775,7 @@ mrb_str_reverse_bang(mrb_state *mrb, mrb_value str)
str_modify(mrb, s);
if (s->len > 1) {
- p = s->buf;
+ p = s->ptr;
e = p + s->len - 1;
while (p < e) {
c = *p;
@@ -1787,13 +1817,13 @@ mrb_str_rindex(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos)
if (ps->len - pos < len) {
pos = ps->len - len;
}
- sbeg = ps->buf;
- s = ps->buf + pos;
- t = psub->buf;
+ sbeg = ps->ptr;
+ s = ps->ptr + pos;
+ t = psub->ptr;
if (len) {
while (sbeg <= s) {
if (memcmp(s, t, len) == 0) {
- return s - ps->buf;
+ return s - ps->ptr;
}
s--;
}
@@ -1850,11 +1880,11 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str)
pos += len;
if (pos < 0) {
if (mrb_type(sub) == MRB_TT_REGEX) {
-#ifdef INCLUDE_REGEXP
+#ifdef ENABLE_REGEXP
mrb_backref_set(mrb, mrb_nil_value());
#else
mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported");
-#endif //INCLUDE_REGEXP
+#endif //ENABLE_REGEXP
}
return mrb_nil_value();
}
@@ -1871,7 +1901,7 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str)
switch (mrb_type(sub)) {
case MRB_TT_REGEX:
-#ifdef INCLUDE_REGEXP
+#ifdef ENABLE_REGEXP
pos = mrb_str_offset(mrb, str, pos);
if (!RREGEXP(sub)->ptr || RREGEXP_SRC_LEN(sub)) {
pos = mrb_reg_search(mrb, sub, str, pos, 1);
@@ -1880,7 +1910,7 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str)
if (pos >= 0) return mrb_fixnum_value(pos);
#else
mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported");
-#endif //INCLUDE_REGEXP
+#endif //ENABLE_REGEXP
break;
case MRB_TT_FIXNUM: {
@@ -1914,7 +1944,7 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str)
return mrb_nil_value();
}
-#ifdef INCLUDE_REGEXP
+#ifdef ENABLE_REGEXP
static mrb_value
scan_once(mrb_state *mrb, mrb_value str, mrb_value pat, mrb_int *start)
{
@@ -1926,7 +1956,6 @@ scan_once(mrb_state *mrb, mrb_value str, mrb_value pat, mrb_int *start)
if (mrb_reg_search(mrb, pat, str, *start, 0) >= 0) {
match = mrb_backref_get(mrb);
- //regs = RMATCH(match)->regs;
pmatch = mrb_match_ptr(match);
regs = &pmatch->rmatch->regs;
if (regs->beg[0] == regs->end[0]) {
@@ -1944,7 +1973,7 @@ scan_once(mrb_state *mrb, mrb_value str, mrb_value pat, mrb_int *start)
if (regs->num_regs == 1) {
return mrb_reg_nth_match(mrb, 0, match);
}
- result = mrb_ary_new_capa(mrb, regs->num_regs);//mrb_ary_new2(regs->num_regs);
+ result = mrb_ary_new_capa(mrb, regs->num_regs);
for (i=1; i < regs->num_regs; i++) {
mrb_ary_push(mrb, result, mrb_reg_nth_match(mrb, i, match));
}
@@ -1953,7 +1982,7 @@ scan_once(mrb_state *mrb, mrb_value str, mrb_value pat, mrb_int *start)
}
return mrb_nil_value();
}
-#endif //INCLUDE_REGEXP
+#endif //ENABLE_REGEXP
/* 15.2.10.5.32 */
/*
@@ -1986,7 +2015,7 @@ scan_once(mrb_state *mrb, mrb_value str, mrb_value pat, mrb_int *start)
* <<cruel>> <<world>>
* rceu lowlr
*/
-#ifdef INCLUDE_REGEXP
+#ifdef ENABLE_REGEXP
static mrb_value
mrb_str_scan(mrb_state *mrb, mrb_value str)
{
@@ -1995,7 +2024,7 @@ mrb_str_scan(mrb_state *mrb, mrb_value str)
mrb_int start = 0;
mrb_value match = mrb_nil_value();
struct RString *ps = mrb_str_ptr(str);
- char *p = ps->buf;
+ char *p = ps->ptr;
long len = ps->len;
mrb_get_args(mrb, "o&", &pat, &b);
@@ -2020,7 +2049,7 @@ mrb_str_scan(mrb_state *mrb, mrb_value str)
mrb_backref_set(mrb, match);
return str;
}
-#endif //INCLUDE_REGEXP
+#endif //ENABLE_REGEXP
static const char isspacetable[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
@@ -2089,27 +2118,16 @@ static const char isspacetable[256] = {
static mrb_value
mrb_str_split_m(mrb_state *mrb, mrb_value str)
{
- mrb_value *argv;
int argc;
mrb_value spat = mrb_nil_value();
- mrb_value limit;
enum {awk, string, regexp} split_type = string;
long beg, end, i = 0;
- int lim = 0;
+ int lim = -1;
mrb_value result, tmp;
- mrb_get_args(mrb, "*", &argv, &argc);
- if (argc > 0)
- spat = argv[0];
- if (argc > 1)
- limit = argv[1];
- else
- limit = mrb_nil_value();
-
+ argc = mrb_get_args(mrb, "|oi", &spat, &lim);
if (argc == 2) {
- lim = mrb_fixnum(limit);
- if (lim <= 0) limit = mrb_nil_value();
- else if (lim == 1) {
+ if (lim == 1) {
if (RSTRING_LEN(str) == 0)
return mrb_ary_new_capa(mrb, 0);
return mrb_ary_new_from_values(mrb, 1, &str);
@@ -2117,35 +2135,34 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
i = 1;
}
- if (argc == 0) {
+ if (argc == 0 || mrb_nil_p(spat)) {
split_type = awk;
}
else {
-//fs_set:
if (mrb_type(spat) == MRB_TT_STRING) {
split_type = string;
-#ifdef INCLUDE_REGEXP
+#ifdef ENABLE_REGEXP
if (RSTRING_LEN(spat) == 0) {
/* Special case - split into chars */
spat = mrb_reg_regcomp(mrb, spat);
split_type = regexp;
}
else {
-#endif //INCLUDE_REGEXP
+#endif //ENABLE_REGEXP
if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' '){
split_type = awk;
}
-#ifdef INCLUDE_REGEXP
+#ifdef ENABLE_REGEXP
}
-#endif //INCLUDE_REGEXP
+#endif //ENABLE_REGEXP
}
else {
-#ifdef INCLUDE_REGEXP
+#ifdef ENABLE_REGEXP
spat = get_pat(mrb, spat, 1);
split_type = regexp;
#else
mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported");
-#endif //INCLUDE_REGEXP
+#endif //ENABLE_REGEXP
}
}
@@ -2168,22 +2185,47 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
else {
end = ptr - bptr;
skip = 0;
- if (!mrb_nil_p(limit) && lim <= i) break;
+ if (lim >= 0 && lim <= i) break;
}
}
else if (ascii_isspace(c)) {
mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, beg, end-beg));
skip = 1;
beg = ptr - bptr;
- if (!mrb_nil_p(limit)) ++i;
+ if (lim >= 0) ++i;
}
else {
end = ptr - bptr;
}
}
}
+ else if (split_type == string) {
+ char *ptr = RSTRING_PTR(str);
+ char *temp = ptr;
+ char *eptr = RSTRING_END(str);
+ long slen = RSTRING_LEN(spat);
+
+ if (slen == 0) {
+ while (ptr < eptr) {
+ mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, ptr-temp, 1));
+ ptr++;
+ if (lim >= 0 && lim <= ++i) break;
+ }
+ }
+ else {
+ char *sptr = RSTRING_PTR(spat);
+
+ while (ptr < eptr &&
+ (end = mrb_memsearch(sptr, slen, ptr, eptr - ptr)) >= 0) {
+ mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, ptr - temp, end));
+ ptr += end + slen;
+ if (lim >= 0 && lim <= ++i) break;
+ }
+ }
+ beg = ptr - temp;
+ }
else {
-#ifdef INCLUDE_REGEXP
+#ifdef ENABLE_REGEXP
char *ptr = RSTRING_PTR(str);
long len = RSTRING_LEN(str);
long start = beg;
@@ -2225,20 +2267,20 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
tmp = mrb_str_subseq(mrb, str, BEG(idx), END(idx)-BEG(idx));
mrb_ary_push(mrb, result, tmp);
}
- if (!mrb_nil_p(limit) && lim <= ++i) break;
+ if (lim >= 0 && lim <= ++i) break;
}
#else
mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported");
-#endif //INCLUDE_REGEXP
+#endif //ENABLE_REGEXP
}
- if (RSTRING_LEN(str) > 0 && (!mrb_nil_p(limit) || RSTRING_LEN(str) > beg || lim < 0)) {
+ if (RSTRING_LEN(str) > 0 && (lim >= 0 || RSTRING_LEN(str) > beg || lim < 0)) {
if (RSTRING_LEN(str) == beg)
tmp = mrb_str_new_empty(mrb, str);
else
tmp = mrb_str_subseq(mrb, str, beg, RSTRING_LEN(str)-beg);
mrb_ary_push(mrb, result, tmp);
}
- if (mrb_nil_p(limit) && lim == 0) {
+ if (lim < 0) {
long len;
while ((len = RARRAY_LEN(result)) > 0 &&
(tmp = RARRAY_PTR(result)[len-1], RSTRING_LEN(tmp) == 0))
@@ -2267,14 +2309,14 @@ mrb_block_given_p()
* returning <i>str</i>, or <code>nil</code> if no substitutions were
* performed.
*/
-#ifdef INCLUDE_REGEXP
+#ifdef ENABLE_REGEXP
static mrb_value
mrb_str_sub_bang(mrb_state *mrb, mrb_value str)
{
str_modify(mrb, str);
return mrb_nil_value();
}
-#endif //INCLUDE_REGEXP
+#endif //ENABLE_REGEXP
/* 15.2.10.5.36 */
@@ -2315,7 +2357,7 @@ mrb_str_sub_bang(mrb_state *mrb, mrb_value str)
* #=> "Is /bin/bash your preferred shell?"
*/
-#ifdef INCLUDE_REGEXP
+#ifdef ENABLE_REGEXP
static mrb_value
mrb_str_sub(mrb_state *mrb, mrb_value self)
{
@@ -2324,7 +2366,7 @@ mrb_str_sub(mrb_state *mrb, mrb_value self)
mrb_str_sub_bang(mrb, str);
return str;
}
-#endif //INCLUDE_REGEXP
+#endif //ENABLE_REGEXP
mrb_value
mrb_cstr_to_inum(mrb_state *mrb, const char *str, int base, int badcheck)
@@ -2332,17 +2374,10 @@ mrb_cstr_to_inum(mrb_state *mrb, const char *str, int base, int badcheck)
#define BDIGIT unsigned int
#define BDIGIT_DBL unsigned long
-// const char *s = str;
char *end;
char sign = 1;
-// char nondigit = 0;
int c;
-// BDIGIT_DBL num;
long len;
-// long blen = 1;
-// long i;
-// mrb_value z;
-// BDIGIT *zds;
unsigned long val;
#undef ISDIGIT
@@ -2459,28 +2494,30 @@ mrb_cstr_to_inum(mrb_state *mrb, const char *str, int base, int badcheck)
}
len *= strlen(str)*sizeof(char);
- val = strtoul((char*)str, &end, base);
+ val = strtoul((char*)str, &end, base);
- if (badcheck) {
- if (end == str) goto bad; /* no number */
- while (*end && ISSPACE(*end)) end++;
- if (*end) goto bad; /* trailing garbage */
- }
+ if (badcheck) {
+ if (end == str) goto bad; /* no number */
+ while (*end && ISSPACE(*end)) end++;
+ if (*end) goto bad; /* trailing garbage */
+ }
- if (sign) return mrb_fixnum_value(val);
- else {
- long result = -(long)val;
- return mrb_fixnum_value(result);
- }
+ if (sign) return mrb_fixnum_value(val);
+ else {
+ long result = -(long)val;
+ return mrb_fixnum_value(result);
+ }
bad:
- printf("Integer");
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "invalide string for number(%s)", str);
+ /* not reached */
return mrb_fixnum_value(0);
}
+
char *
mrb_string_value_cstr(mrb_state *mrb, mrb_value *ptr)
{
struct RString *ps = mrb_str_ptr(*ptr);
- char *s = ps->buf;
+ char *s = ps->ptr;
if (!s || ps->len != strlen(s)) {
mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte");
@@ -2494,10 +2531,8 @@ mrb_str_to_inum(mrb_state *mrb, mrb_value str, int base, int badcheck)
char *s;
int len;
- //StringValue(str);
mrb_string_value(mrb, &str);
if (badcheck) {
- //s = StringValueCStr(str);
s = mrb_string_value_cstr(mrb, &str);
}
else {
@@ -2506,8 +2541,7 @@ mrb_str_to_inum(mrb_state *mrb, mrb_value str, int base, int badcheck)
if (s) {
len = RSTRING_LEN(str);
if (s[len]) { /* no sentinel somehow */
- //char *p = ALLOCA_N(char, len+1);
- char *p = mrb_malloc(mrb, len+1);
+ char *p = (char *)mrb_malloc(mrb, len+1);
//MEMCPY(p, s, char, len);
memcpy(p, s, sizeof(char)*len);
@@ -2544,10 +2578,8 @@ mrb_str_to_i(mrb_state *mrb, mrb_value self)
{
mrb_value *argv;
int argc;
- //mrb_value b;
int base;
- //mrb_scan_args(argc, *argv, "01", &b);
mrb_get_args(mrb, "*", &argv, &argc);
if (argc == 0)
base = 10;
@@ -2583,8 +2615,8 @@ mrb_cstr_to_dbl(mrb_state *mrb, const char * p, int badcheck)
if (p == end) {
if (badcheck) {
bad:
- //mrb_invalid_str(q, "Float()");
- printf("Float()\n");
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "invalide string for float(%s)", p);
+ /* not reached */
}
return d;
}
@@ -2643,7 +2675,7 @@ mrb_str_to_dbl(mrb_state *mrb, mrb_value str, int badcheck)
mrb_raise(mrb, E_ARGUMENT_ERROR, "string for Float contains null byte");
}
if (s[len]) { /* no sentinel somehow */
- char *p = mrb_malloc(mrb, len+1);
+ char *p = (char *)mrb_malloc(mrb, len+1);
memcpy(p, s, sizeof(char)*len);
p[len] = '\0';
@@ -2670,7 +2702,6 @@ mrb_str_to_dbl(mrb_state *mrb, mrb_value str, int badcheck)
static mrb_value
mrb_str_to_f(mrb_state *mrb, mrb_value self)
{
- //return mrb_float_new(mrb_str_to_dbl(self, 0/*Qfalse*/));
return mrb_float_value(mrb_str_to_dbl(mrb, self, 0/*Qfalse*/));
}
@@ -2778,7 +2809,7 @@ mrb_str_dump(mrb_state *mrb, mrb_value str)
len++;
}
else {
- len += 4; /* \xNN */
+ len += 4; /* \NNN */
}
break;
}
@@ -2787,7 +2818,7 @@ mrb_str_dump(mrb_state *mrb, mrb_value str)
result = str_new(mrb, 0, len);
str_with_class(mrb, result, str);
p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
- q = result->buf;
+ q = result->ptr;
*q++ = '"';
while (p < pend) {
@@ -2849,19 +2880,10 @@ mrb_str_dump(mrb_state *mrb, mrb_value str)
mrb_value
mrb_str_cat(mrb_state *mrb, mrb_value str, const char *ptr, long len)
{
- struct RString *s = mrb_str_ptr(str);
-
if (len < 0) {
mrb_raise(mrb, E_ARGUMENT_ERROR, "negative string size (or size too big)");
}
- if (0/*STR_ASSOC_P(str)*/) {
- mrb_realloc(mrb, s->buf, s->len+len+1);
- memcpy(s->buf + s->len, ptr, len);
- s->len += len;
- s->buf[s->len] = '\0'; /* sentinel */
- return str;
- }
- str_buf_cat(mrb, s, ptr, len);
+ str_buf_cat(mrb, mrb_str_ptr(str), ptr, len);
return str;
}
@@ -2915,18 +2937,15 @@ mrb_str_append(mrb_state *mrb, mrb_value str, mrb_value str2)
mrb_value
mrb_str_inspect(mrb_state *mrb, mrb_value str)
{
- const char *p, *pend, *prev;
+ const char *p, *pend;
char buf[CHAR_ESC_LEN + 1];
- mrb_value result = mrb_str_new_cstr(mrb, "\"");
+ mrb_value result = mrb_str_new(mrb, "\"", 1);
p = RSTRING_PTR(str); pend = RSTRING_END(str);
- prev = p;
- while (p < pend) {
+ for (;p < pend; p++) {
unsigned int c, cc;
- int n;
- c = *p++;
- n = 1;
+ c = *p;
if (c == '"'|| c == '\\' || (c == '#' && IS_EVSTR(p, pend))) {
buf[0] = '\\'; buf[1] = c;
mrb_str_buf_cat(mrb, result, buf, 2);
@@ -2949,25 +2968,18 @@ mrb_str_inspect(mrb_state *mrb, mrb_value str)
default: cc = 0; break;
}
if (cc) {
- if (p - n > prev) mrb_str_buf_cat(mrb, result, prev, p - n - prev);
buf[0] = '\\';
buf[1] = (char)cc;
mrb_str_buf_cat(mrb, result, buf, 2);
- prev = p;
continue;
}
else {
- if (p - n > prev) mrb_str_buf_cat(mrb, result, prev, p - n - prev);
- sprintf(buf, "\\%03o", c & 0377);
- mrb_str_buf_cat(mrb, result, buf, strlen(buf));
- prev = p;
+ int n = sprintf(buf, "\\%03o", c & 0377);
+ mrb_str_buf_cat(mrb, result, buf, n);
continue;
}
}
-#ifdef INCLUDE_ENCODING
- if (p > prev) mrb_str_buf_cat(mrb, result, prev, p - prev);
-#endif
- mrb_str_buf_cat(mrb, result, "\"", strlen("\""));
+ mrb_str_buf_cat(mrb, result, "\"", 1);
return result;
}
@@ -3002,7 +3014,7 @@ mrb_init_string(mrb_state *mrb)
mrb_define_method(mrb, s, "each_line", mrb_str_each_line, ARGS_REQ(1)); /* 15.2.10.5.15 */
mrb_define_method(mrb, s, "empty?", mrb_str_empty_p, ARGS_NONE()); /* 15.2.10.5.16 */
mrb_define_method(mrb, s, "eql?", mrb_str_eql, ARGS_REQ(1)); /* 15.2.10.5.17 */
-#ifdef INCLUDE_REGEXP
+#ifdef ENABLE_REGEXP
mrb_define_method(mrb, s, "gsub", mrb_str_gsub, ARGS_REQ(1)); /* 15.2.10.5.18 */
mrb_define_method(mrb, s, "gsub!", mrb_str_gsub_bang, ARGS_REQ(1)); /* 15.2.10.5.19 */
#endif
@@ -3012,19 +3024,19 @@ mrb_init_string(mrb_state *mrb)
mrb_define_method(mrb, s, "initialize", mrb_str_init, ARGS_REQ(1)); /* 15.2.10.5.23 */
mrb_define_method(mrb, s, "initialize_copy", mrb_str_replace, ARGS_REQ(1)); /* 15.2.10.5.24 */
mrb_define_method(mrb, s, "intern", mrb_str_intern, ARGS_NONE()); /* 15.2.10.5.25 */
-#ifdef INCLUDE_REGEXP
+#ifdef ENABLE_REGEXP
mrb_define_method(mrb, s, "match", mrb_str_match_m, ARGS_REQ(1)); /* 15.2.10.5.27 */
#endif
mrb_define_method(mrb, s, "replace", mrb_str_replace, ARGS_REQ(1)); /* 15.2.10.5.28 */
mrb_define_method(mrb, s, "reverse", mrb_str_reverse, ARGS_NONE()); /* 15.2.10.5.29 */
mrb_define_method(mrb, s, "reverse!", mrb_str_reverse_bang, ARGS_NONE()); /* 15.2.10.5.30 */
mrb_define_method(mrb, s, "rindex", mrb_str_rindex_m, ARGS_ANY()); /* 15.2.10.5.31 */
-#ifdef INCLUDE_REGEXP
+#ifdef ENABLE_REGEXP
mrb_define_method(mrb, s, "scan", mrb_str_scan, ARGS_REQ(1)); /* 15.2.10.5.32 */
#endif
mrb_define_method(mrb, s, "slice", mrb_str_aref_m, ARGS_ANY()); /* 15.2.10.5.34 */
mrb_define_method(mrb, s, "split", mrb_str_split_m, ARGS_ANY()); /* 15.2.10.5.35 */
-#ifdef INCLUDE_REGEXP
+#ifdef ENABLE_REGEXP
mrb_define_method(mrb, s, "sub", mrb_str_sub, ARGS_REQ(1)); /* 15.2.10.5.36 */
mrb_define_method(mrb, s, "sub!", mrb_str_sub_bang, ARGS_REQ(1)); /* 15.2.10.5.37 */
#endif