summaryrefslogtreecommitdiffhomepage
path: root/mrbgems/mruby-string-ext/src/string.c
diff options
context:
space:
mode:
Diffstat (limited to 'mrbgems/mruby-string-ext/src/string.c')
-rw-r--r--mrbgems/mruby-string-ext/src/string.c314
1 files changed, 309 insertions, 5 deletions
diff --git a/mrbgems/mruby-string-ext/src/string.c b/mrbgems/mruby-string-ext/src/string.c
index f04f12c4b..c2ee1fc23 100644
--- a/mrbgems/mruby-string-ext/src/string.c
+++ b/mrbgems/mruby-string-ext/src/string.c
@@ -1,9 +1,9 @@
-#include <ctype.h>
#include <string.h>
-#include "mruby.h"
-#include "mruby/array.h"
-#include "mruby/class.h"
-#include "mruby/string.h"
+#include <mruby.h>
+#include <mruby/array.h>
+#include <mruby/class.h>
+#include <mruby/string.h>
+#include <mruby/range.h>
static mrb_value
mrb_str_getbyte(mrb_state *mrb, mrb_value str)
@@ -19,6 +19,59 @@ mrb_str_getbyte(mrb_state *mrb, mrb_value str)
return mrb_fixnum_value((unsigned char)RSTRING_PTR(str)[pos]);
}
+static mrb_value
+mrb_str_setbyte(mrb_state *mrb, mrb_value str)
+{
+ mrb_int pos, byte;
+ long len = RSTRING_LEN(str);
+
+ mrb_get_args(mrb, "ii", &pos, &byte);
+
+ if (pos < -len || len <= pos)
+ mrb_raisef(mrb, E_INDEX_ERROR, "index %S is out of array", mrb_fixnum_value(pos));
+ if (pos < 0)
+ pos += len;
+
+ mrb_str_modify(mrb, mrb_str_ptr(str));
+ byte &= 0xff;
+ RSTRING_PTR(str)[pos] = byte;
+ return mrb_fixnum_value((unsigned char)byte);
+}
+
+static mrb_value
+mrb_str_byteslice(mrb_state *mrb, mrb_value str)
+{
+ mrb_value a1;
+ mrb_int len;
+ int argc;
+
+ argc = mrb_get_args(mrb, "o|i", &a1, &len);
+ if (argc == 2) {
+ return mrb_str_substr(mrb, str, mrb_fixnum(a1), len);
+ }
+ switch (mrb_type(a1)) {
+ case MRB_TT_RANGE:
+ {
+ mrb_int beg;
+
+ len = RSTRING_LEN(str);
+ if (mrb_range_beg_len(mrb, a1, &beg, &len, len)) {
+ return mrb_str_substr(mrb, str, beg, len);
+ }
+ return mrb_nil_value();
+ }
+ case MRB_TT_FLOAT:
+ a1 = mrb_fixnum_value((mrb_int)mrb_float(a1));
+ /* fall through */
+ case MRB_TT_FIXNUM:
+ return mrb_str_substr(mrb, str, mrb_fixnum(a1), 1);
+ default:
+ mrb_raise(mrb, E_TYPE_ERROR, "wrong type of argument");
+ }
+ /* not reached */
+ return mrb_nil_value();
+}
+
/*
* call-seq:
* str.swapcase! -> str or nil
@@ -192,6 +245,51 @@ mrb_str_chr(mrb_state *mrb, mrb_value self)
return mrb_str_substr(mrb, self, 0, 1);
}
+static mrb_value
+mrb_fixnum_chr(mrb_state *mrb, mrb_value num)
+{
+ mrb_int cp = mrb_fixnum(num);
+#ifdef MRB_UTF8_STRING
+ char utf8[4];
+ mrb_int len;
+
+ if (cp < 0 || 0x10FFFF < cp) {
+ mrb_raisef(mrb, E_RANGE_ERROR, "%S out of char range", num);
+ }
+ if (cp < 0x80) {
+ utf8[0] = (char)cp;
+ len = 1;
+ }
+ else if (cp < 0x800) {
+ utf8[0] = (char)(0xC0 | (cp >> 6));
+ utf8[1] = (char)(0x80 | (cp & 0x3F));
+ len = 2;
+ }
+ else if (cp < 0x10000) {
+ utf8[0] = (char)(0xE0 | (cp >> 12));
+ utf8[1] = (char)(0x80 | ((cp >> 6) & 0x3F));
+ utf8[2] = (char)(0x80 | ( cp & 0x3F));
+ len = 3;
+ }
+ else {
+ utf8[0] = (char)(0xF0 | (cp >> 18));
+ utf8[1] = (char)(0x80 | ((cp >> 12) & 0x3F));
+ utf8[2] = (char)(0x80 | ((cp >> 6) & 0x3F));
+ utf8[3] = (char)(0x80 | ( cp & 0x3F));
+ len = 4;
+ }
+ return mrb_str_new(mrb, utf8, len);
+#else
+ char c;
+
+ if (cp < 0 || 0xff < cp) {
+ mrb_raisef(mrb, E_RANGE_ERROR, "%S out of char range", num);
+ }
+ c = (char)cp;
+ return mrb_str_new(mrb, &c, 1);
+#endif
+}
+
/*
* call-seq:
* string.lines -> array of string
@@ -239,6 +337,202 @@ mrb_str_lines(mrb_state *mrb, mrb_value self)
return result;
}
+/*
+ * call-seq:
+ * string.succ -> string
+ *
+ * Returns next sequence of the string;
+ *
+ * a = "abc"
+ * a.succ #=> "abd"
+ */
+static mrb_value
+mrb_str_succ_bang(mrb_state *mrb, mrb_value self)
+{
+ mrb_value result;
+ unsigned char *p, *e, *b, *t;
+ const char *prepend;
+ struct RString *s = mrb_str_ptr(self);
+ size_t l;
+
+ if (RSTRING_LEN(self) == 0)
+ return self;
+
+ mrb_str_modify(mrb, s);
+ l = RSTRING_LEN(self);
+ b = p = (unsigned char*) RSTRING_PTR(self);
+ t = e = p + l;
+ *(e--) = 0;
+
+ // find trailing ascii/number
+ while (e >= b) {
+ if (ISALNUM(*e))
+ break;
+ e--;
+ }
+ if (e < b) {
+ e = p + l - 1;
+ result = mrb_str_new_lit(mrb, "");
+ } else {
+ // find leading letter of the ascii/number
+ b = e;
+ while (b > p) {
+ if (!ISALNUM(*b) || (ISALNUM(*b) && *b != '9' && *b != 'z' && *b != 'Z'))
+ break;
+ b--;
+ }
+ if (!ISALNUM(*b))
+ b++;
+ result = mrb_str_new(mrb, (char*) p, b - p);
+ }
+
+ while (e >= b) {
+ if (!ISALNUM(*e)) {
+ if (*e == 0xff) {
+ mrb_str_cat_lit(mrb, result, "\x01");
+ (*e) = 0;
+ } else
+ (*e)++;
+ break;
+ }
+ prepend = NULL;
+ if (*e == '9') {
+ if (e == b) prepend = "1";
+ *e = '0';
+ } else if (*e == 'z') {
+ if (e == b) prepend = "a";
+ *e = 'a';
+ } else if (*e == 'Z') {
+ if (e == b) prepend = "A";
+ *e = 'A';
+ } else {
+ (*e)++;
+ break;
+ }
+ if (prepend) mrb_str_cat_cstr(mrb, result, prepend);
+ e--;
+ }
+ result = mrb_str_cat(mrb, result, (char*) b, t - b);
+ l = RSTRING_LEN(result);
+ mrb_str_resize(mrb, self, l);
+ memcpy(RSTRING_PTR(self), RSTRING_PTR(result), l);
+ return self;
+}
+
+static mrb_value
+mrb_str_succ(mrb_state *mrb, mrb_value self)
+{
+ mrb_value str;
+
+ str = mrb_str_dup(mrb, self);
+ mrb_str_succ_bang(mrb, str);
+ return str;
+}
+
+/*
+ * call-seq:
+ * str.prepend(other_str) -> str
+ *
+ * Prepend---Prepend the given string to <i>str</i>.
+ *
+ * a = "world"
+ * a.prepend("hello ") #=> "hello world"
+ * a #=> "hello world"
+ */
+static mrb_value
+mrb_str_prepend(mrb_state *mrb, mrb_value self)
+{
+ struct RString *s1 = mrb_str_ptr(self), *s2, *temp_s;
+ mrb_int len;
+ mrb_value other, temp_str;
+
+ mrb_get_args(mrb, "S", &other);
+
+ mrb_str_modify(mrb, s1);
+ if (!mrb_string_p(other)) {
+ other = mrb_str_to_str(mrb, other);
+ }
+ s2 = mrb_str_ptr(other);
+ len = RSTR_LEN(s1) + RSTR_LEN(s2);
+ temp_str = mrb_str_new(mrb, NULL, RSTR_LEN(s1));
+ temp_s = mrb_str_ptr(temp_str);
+ memcpy(RSTR_PTR(temp_s), RSTR_PTR(s1), RSTR_LEN(s1));
+ if (RSTRING_CAPA(self) < len) {
+ mrb_str_resize(mrb, self, len);
+ }
+ memcpy(RSTR_PTR(s1), RSTR_PTR(s2), RSTR_LEN(s2));
+ memcpy(RSTR_PTR(s1) + RSTR_LEN(s2), RSTR_PTR(temp_s), RSTR_LEN(temp_s));
+ RSTR_SET_LEN(s1, len);
+ RSTR_PTR(s1)[len] = '\0';
+ return self;
+}
+
+#ifdef MRB_UTF8_STRING
+static const char utf8len_codepage_zero[256] =
+{
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0,
+};
+
+static mrb_int
+utf8code(unsigned char* p)
+{
+ mrb_int len;
+
+ if (p[0] < 0x80)
+ return p[0];
+
+ len = utf8len_codepage_zero[p[0]];
+ if (len > 1 && (p[1] & 0xc0) == 0x80) {
+ if (len == 2)
+ return ((p[0] & 0x1f) << 6) + (p[1] & 0x3f);
+ if ((p[2] & 0xc0) == 0x80) {
+ if (len == 3)
+ return ((p[0] & 0x0f) << 12) + ((p[1] & 0x3f) << 6)
+ + (p[2] & 0x3f);
+ if ((p[3] & 0xc0) == 0x80) {
+ if (len == 4)
+ return ((p[0] & 0x07) << 18) + ((p[1] & 0x3f) << 12)
+ + ((p[2] & 0x3f) << 6) + (p[3] & 0x3f);
+ if ((p[4] & 0xc0) == 0x80) {
+ if (len == 5)
+ return ((p[0] & 0x03) << 24) + ((p[1] & 0x3f) << 18)
+ + ((p[2] & 0x3f) << 12) + ((p[3] & 0x3f) << 6)
+ + (p[4] & 0x3f);
+ if ((p[5] & 0xc0) == 0x80 && len == 6)
+ return ((p[0] & 0x01) << 30) + ((p[1] & 0x3f) << 24)
+ + ((p[2] & 0x3f) << 18) + ((p[3] & 0x3f) << 12)
+ + ((p[4] & 0x3f) << 6) + (p[5] & 0x3f);
+ }
+ }
+ }
+ }
+ return p[0];
+}
+
+static mrb_value
+mrb_str_ord(mrb_state* mrb, mrb_value str)
+{
+ if (RSTRING_LEN(str) == 0)
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "empty string");
+ return mrb_fixnum_value(utf8code((unsigned char*) RSTRING_PTR(str)));
+}
+#else
+static mrb_value
+mrb_str_ord(mrb_state* mrb, mrb_value str)
+{
+ if (RSTRING_LEN(str) == 0)
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "empty string");
+ return mrb_fixnum_value(RSTRING_PTR(str)[0]);
+}
+#endif
+
void
mrb_mruby_string_ext_gem_init(mrb_state* mrb)
{
@@ -246,6 +540,8 @@ mrb_mruby_string_ext_gem_init(mrb_state* mrb)
mrb_define_method(mrb, s, "dump", mrb_str_dump, MRB_ARGS_NONE());
mrb_define_method(mrb, s, "getbyte", mrb_str_getbyte, MRB_ARGS_REQ(1));
+ mrb_define_method(mrb, s, "setbyte", mrb_str_setbyte, MRB_ARGS_REQ(2));
+ mrb_define_method(mrb, s, "byteslice", mrb_str_byteslice, MRB_ARGS_REQ(1)|MRB_ARGS_OPT(1));
mrb_define_method(mrb, s, "swapcase!", mrb_str_swapcase_bang, MRB_ARGS_NONE());
mrb_define_method(mrb, s, "swapcase", mrb_str_swapcase, MRB_ARGS_NONE());
mrb_define_method(mrb, s, "concat", mrb_str_concat2, MRB_ARGS_REQ(1));
@@ -256,6 +552,14 @@ mrb_mruby_string_ext_gem_init(mrb_state* mrb)
mrb_define_method(mrb, s, "oct", mrb_str_oct, MRB_ARGS_NONE());
mrb_define_method(mrb, s, "chr", mrb_str_chr, MRB_ARGS_NONE());
mrb_define_method(mrb, s, "lines", mrb_str_lines, MRB_ARGS_NONE());
+ mrb_define_method(mrb, s, "succ", mrb_str_succ, MRB_ARGS_NONE());
+ mrb_define_method(mrb, s, "succ!", mrb_str_succ_bang, MRB_ARGS_NONE());
+ mrb_define_method(mrb, s, "prepend", mrb_str_prepend, MRB_ARGS_REQ(1));
+ mrb_alias_method(mrb, s, mrb_intern_lit(mrb, "next"), mrb_intern_lit(mrb, "succ"));
+ mrb_alias_method(mrb, s, mrb_intern_lit(mrb, "next!"), mrb_intern_lit(mrb, "succ!"));
+ mrb_define_method(mrb, s, "ord", mrb_str_ord, MRB_ARGS_NONE());
+
+ mrb_define_method(mrb, mrb->fixnum_class, "chr", mrb_fixnum_chr, MRB_ARGS_NONE());
}
void