summaryrefslogtreecommitdiffhomepage
path: root/src/string.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/string.c')
-rw-r--r--src/string.c242
1 files changed, 131 insertions, 111 deletions
diff --git a/src/string.c b/src/string.c
index cfef5730c..09777ac69 100644
--- a/src/string.c
+++ b/src/string.c
@@ -6,6 +6,8 @@
#include "mruby.h"
+#include <stddef.h>
+#include <stdlib.h>
#include <string.h>
#include "mruby/string.h"
#include "mruby/class.h"
@@ -20,19 +22,13 @@
const char mrb_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz";
static mrb_value str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2);
-static mrb_value mrb_str_subseq(mrb_state *mrb, mrb_value str, int beg, int len);
+static mrb_value mrb_str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len);
#define RESIZE_CAPA(s,capacity) do {\
s->ptr = (char *)mrb_realloc(mrb, s->ptr, (capacity)+1);\
s->aux.capa = capacity;\
} while (0)
-static const char*
-_obj_classname(mrb_state *mrb, mrb_value obj)
-{
- return mrb_class_name(mrb, mrb_obj_class(mrb, obj));
-}
-
void
mrb_str_decref(mrb_state *mrb, mrb_shared_string *shared)
{
@@ -56,11 +52,11 @@ str_modify(mrb_state *mrb, struct RString *s)
}
else {
char *ptr, *p;
- long len;
+ mrb_int len;
p = s->ptr;
len = s->len;
- ptr = (char *)mrb_malloc(mrb, len+1);
+ ptr = (char *)mrb_malloc(mrb, (size_t)len + 1);
if (p) {
memcpy(ptr, p, len);
}
@@ -104,21 +100,6 @@ str_mod_check(mrb_state *mrb, mrb_value str, char *p, mrb_int len)
#define mrb_obj_alloc_string(mrb) ((struct RString*)mrb_obj_alloc((mrb), MRB_TT_STRING, (mrb)->string_class))
-static struct RString*
-str_alloc(mrb_state *mrb, struct RClass *c)
-{
- struct RString* s;
-
- s = mrb_obj_alloc_string(mrb);
-
- s->c = c;
- s->ptr = 0;
- s->len = 0;
- s->aux.capa = 0;
-
- return s;
-}
-
/* char offset to byte offset */
int
mrb_str_offset(mrb_state *mrb, mrb_value str, int pos)
@@ -129,8 +110,9 @@ mrb_str_offset(mrb_state *mrb, mrb_value str, int pos)
static struct RString*
str_new(mrb_state *mrb, const char *p, int len)
{
- struct RString *s = str_alloc(mrb, mrb->string_class);
+ struct RString *s;
+ s = mrb_obj_alloc_string(mrb);
s->len = len;
s->aux.capa = len;
s->ptr = (char *)mrb_malloc(mrb, len+1);
@@ -156,6 +138,10 @@ mrb_str_new_empty(mrb_state *mrb, mrb_value str)
return mrb_obj_value(s);
}
+#ifndef MRB_STR_BUF_MIN_SIZE
+# define MRB_STR_BUF_MIN_SIZE 128
+#endif
+
mrb_value
mrb_str_buf_new(mrb_state *mrb, int capa)
{
@@ -163,8 +149,8 @@ mrb_str_buf_new(mrb_state *mrb, int capa)
s = mrb_obj_alloc_string(mrb);
- if (capa < STR_BUF_MIN_SIZE) {
- capa = STR_BUF_MIN_SIZE;
+ if (capa < MRB_STR_BUF_MIN_SIZE) {
+ capa = MRB_STR_BUF_MIN_SIZE;
}
s->len = 0;
s->aux.capa = capa;
@@ -175,9 +161,11 @@ mrb_str_buf_new(mrb_state *mrb, int capa)
}
static void
-str_buf_cat(mrb_state *mrb, struct RString *s, const char *ptr, int len)
+str_buf_cat(mrb_state *mrb, struct RString *s, const char *ptr, size_t len)
{
- long capa, total, off = -1;
+ mrb_int capa;
+ mrb_int total;
+ ptrdiff_t off = -1;
str_modify(mrb, s);
if (ptr >= s->ptr && ptr <= s->ptr + s->len) {
@@ -185,13 +173,13 @@ str_buf_cat(mrb_state *mrb, struct RString *s, const char *ptr, int len)
}
if (len == 0) return;
capa = s->aux.capa;
- if (s->len >= INT_MAX - len) {
+ if (s->len >= MRB_INT_MAX - len) {
mrb_raise(mrb, E_ARGUMENT_ERROR, "string sizes too big");
}
total = s->len+len;
if (capa <= total) {
while (total > capa) {
- if (capa + 1 >= INT_MAX / 2) {
+ if (capa + 1 >= MRB_INT_MAX / 2) {
capa = (total + 4095) / 4096;
break;
}
@@ -208,7 +196,7 @@ str_buf_cat(mrb_state *mrb, struct RString *s, const char *ptr, int len)
}
mrb_value
-mrb_str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, int len)
+mrb_str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, size_t len)
{
if (len == 0) return str;
str_buf_cat(mrb, mrb_str_ptr(str), ptr, len);
@@ -216,7 +204,7 @@ mrb_str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, int len)
}
mrb_value
-mrb_str_new(mrb_state *mrb, const char *p, int len)
+mrb_str_new(mrb_state *mrb, const char *p, size_t len)
{
struct RString *s;
@@ -224,17 +212,6 @@ mrb_str_new(mrb_state *mrb, const char *p, int len)
return mrb_obj_value(s);
}
-mrb_value
-mrb_str_new2(mrb_state *mrb, const char *ptr)
-{
- struct RString *s;
- if (!ptr) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "NULL pointer given");
- }
- s = str_new(mrb, ptr, strlen(ptr));
- return mrb_obj_value(s);
-}
-
/*
* call-seq: (Caution! NULL string)
* String.new(str="") => new_str
@@ -246,11 +223,23 @@ mrb_value
mrb_str_new_cstr(mrb_state *mrb, const char *p)
{
struct RString *s;
- int len = strlen(p);
+ size_t len;
+
+ if (p) {
+ len = strlen(p);
+ if ((mrb_int)len < 0) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "argument too big");
+ }
+ }
+ else {
+ len = 0;
+ }
s = mrb_obj_alloc_string(mrb);
s->ptr = (char *)mrb_malloc(mrb, len+1);
- memcpy(s->ptr, p, len);
+ if (p) {
+ memcpy(s->ptr, p, len);
+ }
s->ptr[len] = 0;
s->len = len;
s->aux.capa = len;
@@ -263,6 +252,10 @@ mrb_str_to_cstr(mrb_state *mrb, mrb_value str0)
{
mrb_value str;
+ if (!mrb_string_p(str0)) {
+ mrb_raise(mrb, E_TYPE_ERROR, "expected String");
+ }
+
str = mrb_str_new(mrb, RSTRING_PTR(str0), RSTRING_LEN(str0));
if (strlen(RSTRING_PTR(str)) != RSTRING_LEN(str)) {
mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte");
@@ -302,7 +295,7 @@ mrb_str_literal(mrb_state *mrb, mrb_value str)
struct RString *s, *orig;
mrb_shared_string *shared;
- s = str_alloc(mrb, mrb->string_class);
+ s = mrb_obj_alloc_string(mrb);
orig = mrb_str_ptr(str);
if (!(orig->flags & MRB_STR_SHARED)) {
str_make_shared(mrb, mrb_str_ptr(str));
@@ -311,6 +304,7 @@ mrb_str_literal(mrb_state *mrb, mrb_value str)
shared->refcnt++;
s->ptr = shared->ptr;
s->len = shared->len;
+ s->aux.capa = 0;
s->aux.shared = shared;
s->flags |= MRB_STR_SHARED;
@@ -446,7 +440,7 @@ mrb_str_times(mrb_state *mrb, mrb_value self)
if (times < 0) {
mrb_raise(mrb, E_ARGUMENT_ERROR, "negative argument");
}
- if (times && INT_MAX/times < RSTRING_LEN(self)) {
+ if (times && MRB_INT_MAX / times < RSTRING_LEN(self)) {
mrb_raise(mrb, E_ARGUMENT_ERROR, "argument too big");
}
@@ -532,10 +526,10 @@ mrb_str_cmp_m(mrb_state *mrb, mrb_value str1)
mrb_get_args(mrb, "o", &str2);
if (!mrb_string_p(str2)) {
- if (!mrb_respond_to(mrb, str2, mrb_intern(mrb, "to_s"))) {
+ if (!mrb_respond_to(mrb, str2, mrb_intern2(mrb, "to_s", 4))) {
return mrb_nil_value();
}
- else if (!mrb_respond_to(mrb, str2, mrb_intern(mrb, "<=>"))) {
+ else if (!mrb_respond_to(mrb, str2, mrb_intern2(mrb, "<=>", 3))) {
return mrb_nil_value();
}
else {
@@ -557,8 +551,9 @@ mrb_str_cmp_m(mrb_state *mrb, mrb_value str1)
static int
str_eql(mrb_state *mrb, const mrb_value str1, const mrb_value str2)
{
- const long len = RSTRING_LEN(str1);
+ const size_t len = RSTRING_LEN(str1);
+ /* assert(SIZE_MAX >= MRB_INT_MAX) */
if (len != RSTRING_LEN(str2)) return FALSE;
if (memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2), len) == 0)
return TRUE;
@@ -571,7 +566,7 @@ mrb_str_equal(mrb_state *mrb, mrb_value str1, mrb_value str2)
if (mrb_obj_equal(mrb, str1, str2)) return TRUE;
if (!mrb_string_p(str2)) {
if (mrb_nil_p(str2)) return FALSE;
- if (!mrb_respond_to(mrb, str2, mrb_intern(mrb, "to_str"))) {
+ if (!mrb_respond_to(mrb, str2, mrb_intern2(mrb, "to_str", 6))) {
return FALSE;
}
str2 = mrb_funcall(mrb, str2, "to_str", 0);
@@ -595,11 +590,12 @@ static mrb_value
mrb_str_equal_m(mrb_state *mrb, mrb_value str1)
{
mrb_value str2;
+ mrb_bool equal_p;
mrb_get_args(mrb, "o", &str2);
- if (mrb_str_equal(mrb, str1, str2))
- return mrb_true_value();
- return mrb_false_value();
+ equal_p = mrb_str_equal(mrb, str1, str2);
+
+ return mrb_bool_value(equal_p);
}
/* ---------------------------------- */
mrb_value
@@ -657,8 +653,8 @@ mrb_str_match(mrb_state *mrb, mrb_value self/* x */)
return mrb_nil_value();
}
-static inline long
-mrb_memsearch_qs(const unsigned char *xs, long m, const unsigned char *ys, long n)
+static inline mrb_int
+mrb_memsearch_qs(const unsigned char *xs, mrb_int m, const unsigned char *ys, mrb_int n)
{
const unsigned char *x = xs, *xe = xs + m;
const unsigned char *y = ys;
@@ -677,8 +673,8 @@ mrb_memsearch_qs(const unsigned char *xs, long m, const unsigned char *ys, long
return -1;
}
-static int
-mrb_memsearch(const void *x0, int m, const void *y0, int n)
+static mrb_int
+mrb_memsearch(const void *x0, mrb_int m, const void *y0, mrb_int n)
{
const unsigned char *x = (const unsigned char *)x0, *y = (const unsigned char *)y0;
@@ -705,7 +701,8 @@ mrb_str_index(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int offset)
{
mrb_int pos;
char *s, *sptr;
- int len, slen;
+ mrb_int len, slen;
+
len = RSTRING_LEN(str);
slen = RSTRING_LEN(sub);
if (offset < 0) {
@@ -739,9 +736,9 @@ mrb_str_dup(mrb_state *mrb, mrb_value str)
static mrb_value
mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx)
{
- long idx;
+ mrb_int idx;
- if (!strcmp(_obj_classname(mrb, indx), REGEXP_CLASS)) {
+ if (!strcmp(mrb_obj_classname(mrb, indx), REGEXP_CLASS)) {
mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp Class not implemented");
}
switch (mrb_type(indx)) {
@@ -920,7 +917,8 @@ mrb_str_chomp_bang(mrb_state *mrb, mrb_value str)
mrb_value rs;
mrb_int newline;
char *p, *pp;
- long len, rslen;
+ mrb_int rslen;
+ mrb_int len;
struct RString *s = mrb_str_ptr(str);
str_modify(mrb, s);
@@ -1131,9 +1129,7 @@ mrb_str_empty_p(mrb_state *mrb, mrb_value self)
{
struct RString *s = mrb_str_ptr(self);
- if (s->len == 0)
- return mrb_true_value();
- return mrb_false_value();
+ return mrb_bool_value(s->len == 0);
}
/* 15.2.10.5.17 */
@@ -1147,17 +1143,16 @@ static mrb_value
mrb_str_eql(mrb_state *mrb, mrb_value self)
{
mrb_value str2;
+ mrb_bool eql_p;
mrb_get_args(mrb, "o", &str2);
- if (mrb_type(str2) != MRB_TT_STRING)
- return mrb_false_value();
- if (str_eql(mrb, self, str2))
- return mrb_true_value();
- return mrb_false_value();
+ eql_p = (mrb_type(str2) == MRB_TT_STRING) && str_eql(mrb, self, str2);
+
+ return mrb_bool_value(eql_p);
}
static mrb_value
-mrb_str_subseq(mrb_state *mrb, mrb_value str, int beg, int len)
+mrb_str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
{
struct RString *orig, *s;
mrb_shared_string *shared;
@@ -1176,7 +1171,7 @@ mrb_str_subseq(mrb_state *mrb, mrb_value str, int beg, int len)
}
mrb_value
-mrb_str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, int len)
+mrb_str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
{
mrb_value str2;
@@ -1267,7 +1262,7 @@ mrb_str_hash(mrb_state *mrb, mrb_value str)
{
/* 1-8-7 */
struct RString *s = mrb_str_ptr(str);
- long len = s->len;
+ mrb_int len = s->len;
char *p = s->ptr;
mrb_int key = 0;
@@ -1311,18 +1306,20 @@ mrb_str_include(mrb_state *mrb, mrb_value self)
{
mrb_int i;
mrb_value str2;
+ mrb_bool include_p;
mrb_get_args(mrb, "o", &str2);
if (mrb_type(str2) == MRB_TT_FIXNUM) {
- if (memchr(RSTRING_PTR(self), mrb_fixnum(str2), RSTRING_LEN(self)))
- return mrb_true_value();
- return mrb_false_value();
+ include_p = memchr(RSTRING_PTR(self), mrb_fixnum(str2), RSTRING_LEN(self));
+ }
+ else {
+ mrb_string_value(mrb, &str2);
+ i = mrb_str_index(mrb, self, str2, 0);
+
+ include_p = (i != -1);
}
- mrb_string_value(mrb, &str2);
- i = mrb_str_index(mrb, self, str2, 0);
- if (i == -1) return mrb_false_value();
- return mrb_true_value();
+ return mrb_bool_value(include_p);
}
/* 15.2.10.5.22 */
@@ -1381,7 +1378,7 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str)
switch (mrb_type(sub)) {
case MRB_TT_FIXNUM: {
int c = mrb_fixnum(sub);
- long len = RSTRING_LEN(str);
+ mrb_int len = RSTRING_LEN(str);
unsigned char *p = (unsigned char*)RSTRING_PTR(str);
for (;pos<len;pos++) {
@@ -1396,7 +1393,7 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str)
tmp = mrb_check_string_type(mrb, sub);
if (mrb_nil_p(tmp)) {
mrb_raisef(mrb, E_TYPE_ERROR, "type mismatch: %s given",
- _obj_classname(mrb, sub));
+ mrb_obj_classname(mrb, sub));
}
sub = tmp;
}
@@ -1404,10 +1401,10 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str)
case MRB_TT_STRING:
pos = mrb_str_index(mrb, str, sub, pos);
break;
- }
+ }
- if (pos == -1) return mrb_nil_value();
- return mrb_fixnum_value(pos);
+ if (pos == -1) return mrb_nil_value();
+ return mrb_fixnum_value(pos);
}
#define STR_REPLACE_SHARED_MIN 10
@@ -1532,6 +1529,36 @@ mrb_obj_as_string(mrb_state *mrb, mrb_value obj)
}
mrb_value
+mrb_ptr_to_str(mrb_state *mrb, void *p)
+{
+ struct RString *p_str;
+ char *p1;
+ char *p2;
+ intptr_t n = (intptr_t)p;
+
+ p_str = str_new(mrb, NULL, 2 + sizeof(uintptr_t) * CHAR_BIT / 4);
+ p1 = p_str->ptr;
+ *p1++ = '0';
+ *p1++ = 'x';
+ p2 = p1;
+
+ do {
+ *p2++ = mrb_digitmap[n % 16];
+ n /= 16;
+ } while (n > 0);
+ *p2 = '\0';
+ p_str->len = (mrb_int)(p2 - p_str->ptr);
+
+ while (p1 < p2) {
+ const char c = *p1;
+ *p1++ = *--p2;
+ *p2 = c;
+ }
+
+ return mrb_obj_value(p_str);
+}
+
+mrb_value
mrb_check_string_type(mrb_state *mrb, mrb_value str)
{
return mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str");
@@ -1637,7 +1664,7 @@ mrb_str_rindex(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos)
char *s, *sbeg, *t;
struct RString *ps = mrb_str_ptr(str);
struct RString *psub = mrb_str_ptr(sub);
- long len = psub->len;
+ mrb_int len = psub->len;
/* substring longer than string */
if (ps->len < len) return -1;
@@ -1661,15 +1688,6 @@ mrb_str_rindex(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos)
}
}
-#ifdef INCLUDE_ENCODING
-/* byte offset to char offset */
-int
-mrb_str_sublen(mrb_state *mrb, mrb_value str, long pos)
-{
- return pos;
-}
-#endif //INCLUDE_ENCODING
-
/* 15.2.10.5.31 */
/*
* call-seq:
@@ -1728,7 +1746,7 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str)
switch (mrb_type(sub)) {
case MRB_TT_FIXNUM: {
int c = mrb_fixnum(sub);
- long len = RSTRING_LEN(str);
+ mrb_int len = RSTRING_LEN(str);
unsigned char *p = (unsigned char*)RSTRING_PTR(str);
for (pos=len;pos>=0;pos--) {
@@ -1865,7 +1883,9 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
int argc;
mrb_value spat = mrb_nil_value();
enum {awk, string, regexp} split_type = string;
- long beg, end, i = 0, lim_p;
+ long i = 0, lim_p;
+ mrb_int beg;
+ mrb_int end;
mrb_int lim = 0;
mrb_value result, tmp;
@@ -1934,7 +1954,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
char *ptr = RSTRING_PTR(str);
char *temp = ptr;
char *eptr = RSTRING_END(str);
- long slen = RSTRING_LEN(spat);
+ mrb_int slen = RSTRING_LEN(spat);
if (slen == 0) {
int ai = mrb_gc_arena_save(mrb);
@@ -1972,7 +1992,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
mrb_ary_push(mrb, result, tmp);
}
if (!lim_p && lim == 0) {
- long len;
+ mrb_int len;
while ((len = RARRAY_LEN(result)) > 0 &&
(tmp = RARRAY_PTR(result)[len-1], RSTRING_LEN(tmp) == 0))
mrb_ary_pop(mrb, result);
@@ -2054,7 +2074,8 @@ mrb_cstr_to_inum(mrb_state *mrb, const char *str, int base, int badcheck)
char *end;
char sign = 1;
int c;
- unsigned long val;
+ unsigned long n;
+ mrb_int val;
#undef ISDIGIT
#define ISDIGIT(c) ('0' <= (c) && (c) <= '9')
@@ -2158,19 +2179,18 @@ mrb_cstr_to_inum(mrb_state *mrb, const char *str, int base, int badcheck)
return mrb_fixnum_value(0);
}
- val = strtoul((char*)str, &end, base);
-
+ n = strtoul((char*)str, &end, base);
+ if (n > MRB_INT_MAX) {
+ mrb_raisef(mrb, E_ARGUMENT_ERROR, "string (%s) too big for integer", str);
+ }
+ val = n;
if (badcheck) {
if (end == str) goto bad; /* no number */
while (*end && ISSPACE(*end)) end++;
if (*end) goto bad; /* trailing garbage */
}
- if (sign) return mrb_fixnum_value(val);
- else {
- long result = -(long)val;
- return mrb_fixnum_value(result);
- }
+ return mrb_fixnum_value(sign ? val : -val);
bad:
mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid string for number(%s)", str);
/* not reached */
@@ -2440,7 +2460,7 @@ mrb_str_upcase(mrb_state *mrb, mrb_value self)
mrb_value
mrb_str_dump(mrb_state *mrb, mrb_value str)
{
- long len;
+ mrb_int len;
const char *p, *pend;
char *q;
struct RString *result;
@@ -2542,7 +2562,7 @@ mrb_str_dump(mrb_state *mrb, mrb_value str)
}
mrb_value
-mrb_str_cat(mrb_state *mrb, mrb_value str, const char *ptr, long len)
+mrb_str_cat(mrb_state *mrb, mrb_value str, const char *ptr, mrb_int len)
{
if (len < 0) {
mrb_raise(mrb, E_ARGUMENT_ERROR, "negative string size (or size too big)");
@@ -2552,7 +2572,7 @@ mrb_str_cat(mrb_state *mrb, mrb_value str, const char *ptr, long len)
}
mrb_value
-mrb_str_cat2(mrb_state *mrb, mrb_value str, const char *ptr)
+mrb_str_cat_cstr(mrb_state *mrb, mrb_value str, const char *ptr)
{
return mrb_str_cat(mrb, str, ptr, strlen(ptr));
}