summaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorPaolo Bosetti <[email protected]>2012-05-31 18:52:33 -0700
committerPaolo Bosetti <[email protected]>2012-05-31 18:52:33 -0700
commit9c0bfd343679fcd84090b7611ed582ae31e0e3b9 (patch)
tree75e6ac394862821a0e466ccfee361655c40ae749 /src
parent6dbba7b799e0cf1a86ec86f347bbc1b40420d372 (diff)
parent8180fee1808c56048b9fa18a8dd16014e694e48e (diff)
downloadmruby-9c0bfd343679fcd84090b7611ed582ae31e0e3b9.tar.gz
mruby-9c0bfd343679fcd84090b7611ed582ae31e0e3b9.zip
Merge branch 'master' of git://github.com/mruby/mruby into XCode
Diffstat (limited to 'src')
-rw-r--r--src/array.c107
-rw-r--r--src/ascii.c96
-rw-r--r--src/class.c53
-rw-r--r--src/codegen.c25
-rw-r--r--src/encoding.c1685
-rw-r--r--src/encoding.h9
-rw-r--r--src/gc.c30
-rw-r--r--src/hash.c115
-rw-r--r--src/init.c2
-rw-r--r--src/kernel.c8
-rw-r--r--src/load.c1
-rw-r--r--src/object.c19
-rw-r--r--src/re.c793
-rw-r--r--src/sprintf.c34
-rw-r--r--src/string.c2544
-rw-r--r--src/struct.c2
-rw-r--r--src/symbol.c148
-rw-r--r--src/transcode.c4386
-rw-r--r--src/transcode_data.h109
-rw-r--r--src/unicode.c2607
-rw-r--r--src/us_ascii.c34
-rw-r--r--src/utf_8.c460
22 files changed, 653 insertions, 12614 deletions
diff --git a/src/array.c b/src/array.c
index 7b486430f..187a8404d 100644
--- a/src/array.c
+++ b/src/array.c
@@ -10,9 +10,6 @@
#include "mruby/string.h"
#include "mruby/class.h"
-mrb_value mrb_exec_recursive_paired(mrb_state *mrb, mrb_value (*func) (mrb_state *, mrb_value, mrb_value, int),
- mrb_value obj, mrb_value paired_obj, void* arg);
-
//#define ARY_DEFAULT_LEN 16
#define ARY_DEFAULT_LEN 4
#define ARY_SHRINK_RATIO 5 /* must be larger than 2 */
@@ -30,8 +27,8 @@ ary_elt(mrb_value ary, long offset)
return RARRAY_PTR(ary)[offset];
}
-mrb_value
-mrb_ary_new_capa(mrb_state *mrb, size_t capa)
+static struct RArray*
+ary_new_capa(mrb_state *mrb, size_t capa)
{
struct RArray *a;
size_t blen;
@@ -55,6 +52,13 @@ mrb_ary_new_capa(mrb_state *mrb, size_t capa)
a->capa = capa;
a->len = 0;
+ return a;
+}
+
+mrb_value
+mrb_ary_new_capa(mrb_state *mrb, size_t capa)
+{
+ struct RArray *a = ary_new_capa(mrb, capa);
return mrb_obj_value(a);
}
@@ -65,7 +69,7 @@ mrb_ary_new(mrb_state *mrb)
}
mrb_value
-mrb_ary_new_from_values(mrb_state *mrb, mrb_value *vals, size_t size)
+mrb_ary_new_from_values(mrb_state *mrb, size_t size, mrb_value *vals)
{
mrb_value ary;
struct RArray *a;
@@ -84,7 +88,7 @@ mrb_assoc_new(mrb_state *mrb, mrb_value car, mrb_value cdr)
mrb_value arv[2];
arv[0] = car;
arv[1] = cdr;
- return mrb_ary_new_from_values(mrb, arv, 2);
+ return mrb_ary_new_from_values(mrb, 2, arv);
}
void
@@ -156,7 +160,7 @@ mrb_ary_s_create(mrb_state *mrb, mrb_value self)
int len;
mrb_get_args(mrb, "*", &vals, &len);
- return mrb_ary_new_from_values(mrb, vals, (size_t)len);
+ return mrb_ary_new_from_values(mrb, (size_t)len, vals);
}
void
@@ -200,25 +204,6 @@ mrb_ary_plus(mrb_state *mrb, mrb_value self)
return ary;
}
-static mrb_value
-recursive_cmp(mrb_state *mrb, mrb_value ary1, mrb_value ary2, int recur)
-{
- long i, len;
-
- if (recur) return mrb_undef_value(); /* Subtle! */
- len = RARRAY_LEN(ary1);
- if (len > RARRAY_LEN(ary2)) {
- len = RARRAY_LEN(ary2);
- }
-
- for (i=0; i<len; i++) {
- mrb_value r = mrb_funcall(mrb, ary_elt(ary1, i), "<=>", 1, ary_elt(ary2, i));
- if (mrb_type(r) != MRB_TT_FIXNUM || mrb_fixnum(r) != 0) return r;
- }
-
- return mrb_undef_value();
-}
-
/*
* call-seq:
* ary <=> other_ary -> -1, 0, +1 or nil
@@ -242,15 +227,23 @@ mrb_ary_cmp(mrb_state *mrb, mrb_value ary1)
{
mrb_value ary2;
struct RArray *a1, *a2;
- mrb_value r;
- long len;
+ mrb_value r = mrb_nil_value();
+ long i, len;
mrb_get_args(mrb, "o", &ary2);
if (mrb_type(ary2) != MRB_TT_ARRAY) return mrb_nil_value();
a1 = RARRAY(ary1); a2 = RARRAY(ary2);
if (a1->len == a2->len && a1->buf == a2->buf) return mrb_fixnum_value(0);
- r = mrb_exec_recursive_paired(mrb, recursive_cmp, ary1, ary2, &ary2);
- if (mrb_type(r) != MRB_TT_UNDEF) return r;
+ else {
+ len = RARRAY_LEN(ary1);
+ if (len > RARRAY_LEN(ary2)) {
+ len = RARRAY_LEN(ary2);
+ }
+ for (i=0; i<len; i++) {
+ r = mrb_funcall(mrb, ary_elt(ary1, i), "<=>", 1, ary_elt(ary2, i));
+ if (mrb_type(r) != MRB_TT_FIXNUM || mrb_fixnum(r) != 0) return r;
+ }
+ }
len = a1->len - a2->len;
return mrb_fixnum_value((len == 0)? 0: (len > 0)? 1: -1);
}
@@ -569,7 +562,7 @@ mrb_ary_aget(mrb_state *mrb, mrb_value self)
if ((len = mrb_fixnum(argv[0])) < 0) return mrb_nil_value();
if (a->len == (size_t)index) return mrb_ary_new(mrb);
if ((size_t)len > a->len - index) len = a->len - index;
- return mrb_ary_new_from_values(mrb, a->buf + index, len);
+ return mrb_ary_new_from_values(mrb, len, a->buf + index);
default:
mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments");
@@ -654,7 +647,7 @@ mrb_ary_first(mrb_state *mrb, mrb_value self)
/* len == 1 */
size = mrb_fixnum(*vals);
if (size > a->len) size = a->len;
- return mrb_ary_new_from_values(mrb, a->buf, size);
+ return mrb_ary_new_from_values(mrb, size, a->buf);
}
mrb_value
@@ -676,7 +669,7 @@ mrb_ary_last(mrb_state *mrb, mrb_value self)
/* len == 1 */
size = mrb_fixnum(*vals);
if (size > a->len) size = a->len;
- return mrb_ary_new_from_values(mrb, a->buf + a->len - size, size);
+ return mrb_ary_new_from_values(mrb, size, a->buf + a->len - size);
}
mrb_value
@@ -716,7 +709,7 @@ mrb_ary_splat(mrb_state *mrb, mrb_value v)
return v;
}
else {
- return mrb_ary_new_from_values(mrb, &v, 1);
+ return mrb_ary_new_from_values(mrb, 1, &v);
}
}
@@ -923,19 +916,6 @@ mrb_ary_join_m(mrb_state *mrb, mrb_value ary)
return mrb_ary_join(mrb, ary, sep);
}
-static mrb_value
-recursive_equal(mrb_state *mrb, mrb_value ary1, mrb_value ary2, int recur)
-{
- long i;
-
- if (recur) return mrb_true_value(); /* Subtle! */
- for (i=0; i<RARRAY_LEN(ary1); i++) {
- if (!mrb_equal(mrb, ary_elt(ary1, i), ary_elt(ary2, i)))
- return mrb_false_value();
- }
- return mrb_true_value();
-}
-
/* 15.2.12.5.33 (x) */
/*
* call-seq:
@@ -970,20 +950,15 @@ mrb_ary_equal(mrb_state *mrb, mrb_value ary1)
}
}
if (RARRAY_LEN(ary1) != RARRAY_LEN(ary2)) return mrb_false_value();
- return mrb_exec_recursive_paired(mrb, recursive_equal, ary1, ary2, &ary2);
-}
-
-static mrb_value
-recursive_eql(mrb_state *mrb, mrb_value ary1, mrb_value ary2, int recur)
-{
- long i;
+ else {
+ long i;
- if (recur) return mrb_true_value(); /* Subtle! */
- for (i=0; i<RARRAY_LEN(ary1); i++) {
- if (!mrb_eql(mrb, ary_elt(ary1, i), ary_elt(ary2, i)))
- return mrb_false_value();
+ for (i=0; i<RARRAY_LEN(ary1); i++) {
+ if (!mrb_equal(mrb, ary_elt(ary1, i), ary_elt(ary2, i)))
+ return mrb_false_value();
+ }
+ return mrb_true_value();
}
- return mrb_true_value();
}
/* 15.2.12.5.34 (x) */
@@ -1001,10 +976,18 @@ mrb_ary_eql(mrb_state *mrb, mrb_value ary1)
mrb_value ary2;
mrb_get_args(mrb, "o", &ary2);
- if (mrb_obj_equal(mrb, ary1,ary2)) return mrb_true_value();
+ if (mrb_obj_equal(mrb, ary1, ary2)) return mrb_true_value();
if (mrb_type(ary2) != MRB_TT_ARRAY) return mrb_false_value();
if (RARRAY_LEN(ary1) != RARRAY_LEN(ary2)) return mrb_false_value();
- return mrb_exec_recursive_paired(mrb, recursive_eql, ary1, ary2, &ary2);
+ else {
+ long i;
+
+ for (i=0; i<RARRAY_LEN(ary1); i++) {
+ if (!mrb_eql(mrb, ary_elt(ary1, i), ary_elt(ary2, i)))
+ return mrb_false_value();
+ }
+ return mrb_true_value();
+ }
}
void
diff --git a/src/ascii.c b/src/ascii.c
deleted file mode 100644
index 91bd54073..000000000
--- a/src/ascii.c
+++ /dev/null
@@ -1,96 +0,0 @@
-/**********************************************************************
- ascii.c - Oniguruma (regular expression library)
-**********************************************************************/
-/*-
- * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "mruby.h"
-#ifdef INCLUDE_ENCODING
-#include "regenc.h"
-
-OnigEncodingDefine(ascii, ASCII) = {
- onigenc_single_byte_mbc_enc_len,
- "ASCII-8BIT",/* name */
- 1, /* max byte length */
- 1, /* min byte length */
- onigenc_is_mbc_newline_0x0a,
- onigenc_single_byte_mbc_to_code,
- onigenc_single_byte_code_to_mbclen,
- onigenc_single_byte_code_to_mbc,
- onigenc_ascii_mbc_case_fold,
- onigenc_ascii_apply_all_case_fold,
- onigenc_ascii_get_case_fold_codes_by_str,
- onigenc_minimum_property_name_to_ctype,
- onigenc_ascii_is_code_ctype,
- onigenc_not_support_get_ctype_code_range,
- onigenc_single_byte_left_adjust_char_head,
- onigenc_always_true_is_allowed_reverse_match
-};
-ENC_ALIAS("BINARY", "ASCII-8BIT")
-ENC_REPLICATE("IBM437", "ASCII-8BIT")
-ENC_ALIAS("CP437", "IBM437")
-ENC_REPLICATE("IBM737", "ASCII-8BIT")
-ENC_ALIAS("CP737", "IBM737")
-ENC_REPLICATE("IBM775", "ASCII-8BIT")
-ENC_ALIAS("CP775", "IBM775")
-ENC_REPLICATE("CP850", "ASCII-8BIT")
-ENC_ALIAS("IBM850", "CP850")
-ENC_REPLICATE("IBM852", "ASCII-8BIT")
-ENC_REPLICATE("CP852", "IBM852")
-ENC_REPLICATE("IBM855", "ASCII-8BIT")
-ENC_REPLICATE("CP855", "IBM855")
-ENC_REPLICATE("IBM857", "ASCII-8BIT")
-ENC_ALIAS("CP857", "IBM857")
-ENC_REPLICATE("IBM860", "ASCII-8BIT")
-ENC_ALIAS("CP860", "IBM860")
-ENC_REPLICATE("IBM861", "ASCII-8BIT")
-ENC_ALIAS("CP861", "IBM861")
-ENC_REPLICATE("IBM862", "ASCII-8BIT")
-ENC_ALIAS("CP862", "IBM862")
-ENC_REPLICATE("IBM863", "ASCII-8BIT")
-ENC_ALIAS("CP863", "IBM863")
-ENC_REPLICATE("IBM864", "ASCII-8BIT")
-ENC_ALIAS("CP864", "IBM864")
-ENC_REPLICATE("IBM865", "ASCII-8BIT")
-ENC_ALIAS("CP865", "IBM865")
-ENC_REPLICATE("IBM866", "ASCII-8BIT")
-ENC_ALIAS("CP866", "IBM866")
-ENC_REPLICATE("IBM869", "ASCII-8BIT")
-ENC_ALIAS("CP869", "IBM869")
-ENC_REPLICATE("Windows-1258", "ASCII-8BIT")
-ENC_ALIAS("CP1258", "Windows-1258")
-ENC_REPLICATE("GB1988", "ASCII-8BIT")
-ENC_REPLICATE("macCentEuro", "ASCII-8BIT")
-ENC_REPLICATE("macCroatian", "ASCII-8BIT")
-ENC_REPLICATE("macCyrillic", "ASCII-8BIT")
-ENC_REPLICATE("macGreek", "ASCII-8BIT")
-ENC_REPLICATE("macIceland", "ASCII-8BIT")
-ENC_REPLICATE("macRoman", "ASCII-8BIT")
-ENC_REPLICATE("macRomania", "ASCII-8BIT")
-ENC_REPLICATE("macThai", "ASCII-8BIT")
-ENC_REPLICATE("macTurkish", "ASCII-8BIT")
-ENC_REPLICATE("macUkraine", "ASCII-8BIT")
-#endif //INCLUDE_ENCODING
diff --git a/src/class.c b/src/class.c
index f96922f4b..b13ab2288 100644
--- a/src/class.c
+++ b/src/class.c
@@ -17,12 +17,6 @@
#include "mruby/khash.h"
-#ifdef INCLUDE_REGEXP
- #define mrb_usascii_str_new2 mrb_usascii_str_new_cstr
-#else
- #define mrb_usascii_str_new2 mrb_str_new_cstr
-#endif
-
KHASH_MAP_INIT_INT(mt, struct RProc*);
KHASH_MAP_INIT_INT(iv, mrb_value);
@@ -1052,7 +1046,7 @@ mrb_mod_to_s(mrb_state *mrb, mrb_value klass)
{
//if (FL_TEST(klass, FL_SINGLETON)) {
if (mrb_type(klass) == MRB_TT_SCLASS) {
- mrb_value s = mrb_usascii_str_new2(mrb, "#<");
+ mrb_value s = mrb_str_new_cstr(mrb, "#<");
mrb_value v = mrb_iv_get(mrb, klass, mrb_intern(mrb, "__attached__"));
mrb_str_cat2(mrb, s, "Class:");
@@ -1140,6 +1134,48 @@ mrb_mod_undef(mrb_state *mrb, mrb_value mod)
return mrb_nil_value();
}
+static mrb_sym
+mrb_sym_value(mrb_state *mrb, mrb_value val)
+{
+ if(val.tt == MRB_TT_STRING) {
+ return mrb_intern(mrb, RSTRING_PTR(val));
+ }
+ else if(val.tt != MRB_TT_SYMBOL) {
+ mrb_value obj = mrb_funcall(mrb, val, "inspect", 0);
+ mrb_raise(mrb, E_TYPE_ERROR, "%s is not a symbol",
+ mrb_string_value_ptr(mrb, obj));
+ }
+ return mrb_symbol(val);
+}
+
+mrb_value
+mrb_mod_const_defined(mrb_state *mrb, mrb_value mod)
+{
+ mrb_value sym;
+ mrb_get_args(mrb, "o", &sym);
+ if(mrb_const_defined(mrb, mod, mrb_sym_value(mrb, sym))) {
+ return mrb_true_value();
+ }
+ return mrb_false_value();
+}
+
+mrb_value
+mrb_mod_const_get(mrb_state *mrb, mrb_value mod)
+{
+ mrb_value sym;
+ mrb_get_args(mrb, "o", &sym);
+ return mrb_const_get(mrb, mod, mrb_sym_value(mrb, sym));
+}
+
+mrb_value
+mrb_mod_const_set(mrb_state *mrb, mrb_value mod)
+{
+ mrb_value sym, value;
+ mrb_get_args(mrb, "oo", &sym, &value);
+ mrb_const_set(mrb, mod, mrb_sym_value(mrb, sym), value);
+ return value;
+}
+
static mrb_value
mrb_mod_eqq(mrb_state *mrb, mrb_value mod)
@@ -1197,6 +1233,9 @@ mrb_init_class(mrb_state *mrb)
mrb_define_method(mrb, mod, "to_s", mrb_mod_to_s, ARGS_NONE());
mrb_define_method(mrb, mod, "alias_method", mrb_mod_alias, ARGS_ANY());
mrb_define_method(mrb, mod, "undef_method", mrb_mod_undef, ARGS_ANY());
+ mrb_define_method(mrb, mod, "const_defined?", mrb_mod_const_defined, ARGS_REQ(1));
+ mrb_define_method(mrb, mod, "const_get", mrb_mod_const_get, ARGS_REQ(1));
+ mrb_define_method(mrb, mod, "const_set", mrb_mod_const_set, ARGS_REQ(2));
mrb_define_method(mrb, mod, "===", mrb_mod_eqq, ARGS_REQ(1));
}
diff --git a/src/codegen.c b/src/codegen.c
index 11e9eb236..1f4fa818c 100644
--- a/src/codegen.c
+++ b/src/codegen.c
@@ -1225,15 +1225,30 @@ codegen(codegen_scope *s, node *tree, int val)
break;
case NODE_OP_ASGN:
- codegen(s, tree->car, VAL);
- codegen(s, tree->cdr->cdr->car, VAL);
- genop(s, MKOP_A(OP_LOADNIL, cursp()));
- pop(); pop();
{
mrb_sym sym = (mrb_sym)tree->cdr->car;
const char *name = mrb_sym2name(s->mrb, sym);
- int idx = new_msym(s, sym);
+ int idx;
+
+ codegen(s, tree->car, VAL);
+ if ((name[0] == '|' && strlen(name) == 2 && name[1] == '|') ||
+ (name[0] == '&' && strlen(name) == 2 && name[1] == '&')) {
+ int pos;
+
+ pop();
+ pos = new_label(s);
+ genop(s, MKOP_AsBx(name[0] == '|' ? OP_JMPIF : OP_JMPNOT, cursp(), 0));
+ codegen(s, tree->cdr->cdr->car, VAL);
+ pop();
+ gen_assignment(s, tree->car, cursp(), val);
+ dispatch(s, pos);
+ break;
+ }
+ codegen(s, tree->cdr->cdr->car, VAL);
+ genop(s, MKOP_A(OP_LOADNIL, cursp()));
+ pop(); pop();
+ idx = new_msym(s, sym);
if (name[0] == '+' && strlen(name) == 1) {
genop(s, MKOP_ABC(OP_ADD, cursp(), idx, 1));
}
diff --git a/src/encoding.c b/src/encoding.c
deleted file mode 100644
index 8e4257829..000000000
--- a/src/encoding.c
+++ /dev/null
@@ -1,1685 +0,0 @@
-/*
-** encoding.c - Encoding class
-**
-** See Copyright Notice in mruby.h
-*/
-
-#include "mruby.h"
-#ifdef INCLUDE_ENCODING
-#include <ctype.h>
-#ifndef NO_LOCALE_CHARMAP
-#ifdef __CYGWIN__
-#include <windows.h>
-#endif
-#ifdef HAVE_LANGINFO_H
-#include <langinfo.h>
-#endif
-#endif
-
-#define USE_UPPER_CASE_TABLE
-
-#include <ctype.h>
-#include <stdio.h>
-#include "regenc.h"
-#include "regint.h"
-#include "encoding.h"
-#include "st.h"
-#include <string.h>
-#include "mruby/numeric.h"
-#include "mruby/string.h"
-#include "mruby/array.h"
-#include "mruby/variable.h"
-#include "mruby/hash.h"
-
-#define pprintf printf
-#define mrb_warning printf
-#define mrb_bug printf
-#ifndef INT_MAX
-#define INT_MAX 2147483647
-#endif
-#define mrb_isascii(c) ((unsigned long)(c) < 128)
-#define OBJ_FREEZE(a)
-static mrb_sym id_encoding;
-//mrb_value mrb_cEncoding;
-static mrb_value mrb_encoding_list;
-
-struct mrb_encoding_entry {
- const char *name;
- mrb_encoding *enc;
- mrb_encoding *base;
-};
-
-static struct {
- struct mrb_encoding_entry *list;
- int count;
- int size;
- st_table *names;
-} enc_table;
-
-void mrb_enc_init(mrb_state *mrb);
-
-enum {
- ENCINDEX_ASCII,
- ENCINDEX_UTF_8,
- ENCINDEX_US_ASCII,
- ENCINDEX_BUILTIN_MAX
-};
-#define ENCODING_COUNT ENCINDEX_BUILTIN_MAX
-#define ENCODING_NAMELEN_MAX 63
-#define valid_encoding_name_p(name) ((name) && strlen(name) <= ENCODING_NAMELEN_MAX)
-#define STRCASECMP(s1, s2) (st_strcasecmp(s1, s2))
-
-//#define BUILTIN_TYPE(x) (int)(((struct RBasic*)(x))->flags & T_MASK)
-#ifndef FALSE
-#define FALSE 0
-#endif
-
-#ifndef TRUE
-#define TRUE 1
-#endif
-
-#ifndef OTHER
-#define OTHER 2
-#endif
-
-#define mrb_usascii_str_new2 mrb_usascii_str_new_cstr
-
-static const struct mrb_data_type encoding_data_type = {
- "encoding", 0,
-};
-#define is_data_encoding(obj) (DATA_TYPE(obj) == &encoding_data_type)
-
-// RUBY_IMMEDIATE_MASK = 0x03,
-//#define IMMEDIATE_MASK RUBY_IMMEDIATE_MASK
-//#define IMMEDIATE_P(x) ((VALUE)(x) & IMMEDIATE_MASK)
-//#define SPECIAL_CONST_P(x) (IMMEDIATE_P(x) || !RTEST(x))
-
-static mrb_value
-enc_new(mrb_state *mrb, mrb_encoding *encoding)
-{
- return mrb_obj_value(Data_Wrap_Struct(mrb, ENCODE_CLASS, &encoding_data_type, encoding));
-}
-
-#define enc_autoload_p(enc) (!mrb_enc_mbmaxlen(enc))
-
-#define UNSPECIFIED_ENCODING INT_MAX
-
-
-static mrb_value
-mrb_enc_from_encoding_index(mrb_state *mrb, int idx)
-{
- mrb_value list, enc;
-
- if (mrb_nil_p(list = mrb_encoding_list)) {
- mrb_bug("mrb_enc_from_encoding_index(%d): no mrb_encoding_list", idx);
- }
- enc = mrb_ary_ref(mrb, list, idx);//mrb_ary_entry(list, idx);
- if (mrb_nil_p(enc)) {
- mrb_bug("mrb_enc_from_encoding_index(%d): not created yet", idx);
- }
- return enc;
-}
-
-mrb_value
-mrb_enc_from_encoding(mrb_state *mrb, mrb_encoding *encoding)
-{
- int idx;
- if (!encoding) return mrb_nil_value();
- idx = ENC_TO_ENCINDEX(encoding);
- return mrb_enc_from_encoding_index(mrb, idx);
-}
-
-static int enc_autoload(mrb_state *mrb, mrb_encoding *enc);
-static int
-check_encoding(mrb_state *mrb, mrb_encoding *enc)
-{
- int index = mrb_enc_to_index(enc);
- if (mrb_enc_from_index(mrb, index) != enc)
- return -1;
- if (enc_autoload_p(enc)) {
- index = enc_autoload(mrb, enc);
- }
- return index;
-}
-
-static int
-enc_check_encoding(mrb_state *mrb, mrb_value obj)
-{
- if (SPECIAL_CONST_P(obj) || !is_data_encoding(obj)) {
- return -1;
- }
- return check_encoding(mrb, RDATA(obj)->data);
-}
-
-static int
-must_encoding(mrb_state *mrb, mrb_value enc)
-{
- int index = enc_check_encoding(mrb, enc);
- if (index < 0) {
- mrb_raise(mrb, E_TYPE_ERROR, "wrong argument type %s (expected Encoding)",
- mrb_obj_classname(mrb, enc));
- }
- return index;
-}
-
-int
-mrb_to_encoding_index(mrb_state *mrb, mrb_value enc)
-{
- int idx;
-
- idx = enc_check_encoding(mrb, enc);
- if (idx >= 0) {
- return idx;
- }
- else if (mrb_nil_p(enc = mrb_check_string_type(mrb, enc))) {
- return -1;
- }
- if (!mrb_enc_asciicompat(mrb, mrb_enc_get(mrb, enc))) {
- return -1;
- }
- //return mrb_enc_find_index(StringValueCStr(enc));
- return mrb_enc_find_index(mrb, mrb_string_value_cstr(mrb, &enc));
-
-}
-
-static mrb_encoding *
-to_encoding(mrb_state *mrb, mrb_value enc)
-{
- int idx;
-
- //StringValue(enc);
- mrb_string_value(mrb, &enc);
-
- if (!mrb_enc_asciicompat(mrb, mrb_enc_get(mrb, enc))) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid name encoding (non ASCII)");
- }
- //idx = mrb_enc_find_index(StringValueCStr(enc));
- idx = mrb_enc_find_index(mrb, mrb_string_value_cstr(mrb, &enc));
- if (idx < 0) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "unknown encoding name - %s", RSTRING_PTR(enc));
- }
- return mrb_enc_from_index(mrb, idx);
-}
-
-mrb_encoding *
-mrb_to_encoding(mrb_state *mrb, mrb_value enc)
-{
- if (enc_check_encoding(mrb, enc) >= 0) return RDATA(enc)->data;
- return to_encoding(mrb, enc);
-}
-
-static int
-enc_table_expand(int newsize)
-{
- struct mrb_encoding_entry *ent;
- int count = newsize;
-
- if (enc_table.size >= newsize) return newsize;
- newsize = (newsize + 7) / 8 * 8;
- ent = realloc(enc_table.list, sizeof(*enc_table.list) * newsize);
- if (!ent) return -1;
- memset(ent + enc_table.size, 0, sizeof(*ent)*(newsize - enc_table.size));
- enc_table.list = ent;
- enc_table.size = newsize;
- return count;
-}
-
-static int
-enc_register_at(mrb_state *mrb, int index, const char *name, mrb_encoding *encoding)
-{
- struct mrb_encoding_entry *ent = &enc_table.list[index];
- mrb_value list;
- mrb_value ref_ary;
-
- if (!valid_encoding_name_p(name)) return -1;
- if (!ent->name) {
- ent->name = name = strdup(name);
- }
- else if (STRCASECMP(name, ent->name)) {
- return -1;
- }
- if (!ent->enc) {
- ent->enc = xmalloc(sizeof(mrb_encoding));
- }
- if (encoding) {
- *ent->enc = *encoding;
- }
- else {
- memset(ent->enc, 0, sizeof(*ent->enc));
- }
- encoding = ent->enc;
- encoding->name = name;
- encoding->ruby_encoding_index = index;
- st_insert(enc_table.names, (st_data_t)name, (st_data_t)index);
- list = mrb_encoding_list;
- //if (list && mrb_nil_p((mrb_ary_ref(mrb, list, index)))) {
- if (list.tt) {
- ref_ary = mrb_ary_ref(mrb, list, index);
- if mrb_nil_p(ref_ary) {
- /* initialize encoding data */
- mrb_ary_set(mrb, list, index, enc_new(mrb, encoding));//rb_ary_store(list, index, enc_new(encoding));
- }
- }
- return index;
-}
-
-
-static int
-enc_register(mrb_state *mrb, const char *name, mrb_encoding *encoding)
-{
- int index = enc_table.count;
-
- if ((index = enc_table_expand(index + 1)) < 0) return -1;
- enc_table.count = index;
- return enc_register_at(mrb, index - 1, name, encoding);
-}
-
-static void set_encoding_const(mrb_state *, const char*, mrb_encoding*);
-int mrb_enc_registered(const char*);
-
-static void
-enc_check_duplication(mrb_state *mrb, const char *name)
-{
- if (mrb_enc_registered(name) >= 0) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "encoding %s is already registered", name);
- }
-}
-static mrb_encoding*
-set_base_encoding(int index, mrb_encoding *base)
-{
- mrb_encoding *enc = enc_table.list[index].enc;
-
- enc_table.list[index].base = base;
- if (mrb_enc_dummy_p(base)) ENC_SET_DUMMY(enc);
- return enc;
-}
-
-int
-mrb_enc_replicate(mrb_state *mrb, const char *name, mrb_encoding *encoding)
-{
- int idx;
-
- enc_check_duplication(mrb, name);
- idx = enc_register(mrb, name, encoding);
- set_base_encoding(idx, encoding);
- set_encoding_const(mrb, name, mrb_enc_from_index(mrb, idx));
- return idx;
-}
-
-/* 15.2.40.2.17 */
-/*
- * call-seq:
- * enc.replicate(name) -> encoding
- *
- * Returns a replicated encoding of _enc_ whose name is _name_.
- * The new encoding should have the same byte structure of _enc_.
- * If _name_ is used by another encoding, raise ArgumentError.
- *
- */
-static mrb_value
-enc_replicate(mrb_state *mrb, mrb_value encoding)
-{
- mrb_value name;
- mrb_get_args(mrb, "o", &name);
- return mrb_enc_from_encoding_index(mrb,
- //mrb_enc_replicate(mrb, StringValueCStr(name),
- mrb_enc_replicate(mrb, mrb_string_value_cstr(mrb, &name),
- mrb_to_encoding(mrb, encoding)));
-}
-static int
-enc_replicate_with_index(mrb_state *mrb, const char *name, mrb_encoding *origenc, int idx)
-{
- if (idx < 0) {
- idx = enc_register(mrb, name, origenc);
- }
- else {
- idx = enc_register_at(mrb, idx, name, origenc);
- }
- if (idx >= 0) {
- set_base_encoding(idx, origenc);
- set_encoding_const(mrb, name, mrb_enc_from_index(mrb, idx));
- }
- return idx;
-}
-int
-mrb_encdb_replicate(mrb_state *mrb, const char *name, const char *orig)
-{
- int origidx = mrb_enc_registered(orig);
- int idx = mrb_enc_registered(name);
-
- if (origidx < 0) {
- origidx = enc_register(mrb, orig, 0);
- }
- return enc_replicate_with_index(mrb, name, mrb_enc_from_index(mrb, origidx), idx);
-}
-int
-mrb_define_dummy_encoding(mrb_state *mrb, const char *name)
-{
- int index = mrb_enc_replicate(mrb, name, mrb_ascii8bit_encoding(mrb));
- mrb_encoding *enc = enc_table.list[index].enc;
-
- ENC_SET_DUMMY(enc);
- return index;
-}
-
-int
-mrb_encdb_dummy(mrb_state *mrb, const char *name)
-{
- int index = enc_replicate_with_index(mrb, name, mrb_ascii8bit_encoding(mrb),
- mrb_enc_registered(name));
- mrb_encoding *enc = enc_table.list[index].enc;
-
- ENC_SET_DUMMY(enc);
- return index;
-}
-
-/* 15.2.40.2.13 */
-/*
- * call-seq:
- * enc.dummy? -> true or false
- *
- * Returns true for dummy encodings.
- * A dummy encoding is an encoding for which character handling is not properly
- * implemented.
- * It is used for stateful encodings.
- *
- * Encoding::ISO_2022_JP.dummy? #=> true
- * Encoding::UTF_8.dummy? #=> false
- *
- */
-static mrb_value
-enc_dummy_p(mrb_state *mrb, mrb_value enc)
-{
- return ENC_DUMMY_P(enc_table.list[must_encoding(mrb, enc)].enc) ? mrb_true_value() : mrb_false_value();
-}
-
-/* 15.2.40.2.12 */
-/*
- * call-seq:
- * enc.ascii_compatible? -> true or false
- *
- * Returns whether ASCII-compatible or not.
- *
- * Encoding::UTF_8.ascii_compatible? #=> true
- * Encoding::UTF_16BE.ascii_compatible? #=> false
- *
- */
-static mrb_value
-enc_ascii_compatible_p(mrb_state *mrb, mrb_value enc)
-{
- return mrb_enc_asciicompat(mrb, enc_table.list[must_encoding(mrb, enc)].enc) ? mrb_true_value() : mrb_false_value();
-}
-
-static const char *
-enc_alias_internal(const char *alias, int idx)
-{
- alias = strdup(alias);
- st_insert(enc_table.names, (st_data_t)alias, (st_data_t)idx);
- return alias;
-}
-
-/*
- * Returns 1 when the encoding is Unicode series other than UTF-7 else 0.
- */
-int
-mrb_enc_unicode_p(mrb_encoding *enc)
-{
- const char *name = mrb_enc_name(enc);
- return name[0] == 'U' && name[1] == 'T' && name[2] == 'F' && name[4] != '7';
-}
-
-extern mrb_encoding OnigEncodingUTF_8;
-extern mrb_encoding OnigEncodingUS_ASCII;
-
-void
-mrb_enc_init(mrb_state *mrb)
-{
- enc_table_expand(ENCODING_COUNT + 1);
- if (!enc_table.names) {
- enc_table.names = st_init_strcasetable();
- }
-#define ENC_REGISTER(enc) enc_register_at(mrb, ENCINDEX_##enc, mrb_enc_name(&OnigEncoding##enc), &OnigEncoding##enc)
- ENC_REGISTER(ASCII);
- ENC_REGISTER(UTF_8);
- ENC_REGISTER(US_ASCII);
-#undef ENC_REGISTER
- enc_table.count = ENCINDEX_BUILTIN_MAX;
-}
-
-mrb_encoding *
-mrb_enc_from_index(mrb_state *mrb, int index)
-{
- if (!enc_table.list) {
- mrb_enc_init(mrb);
- }
- if (index < 0 || enc_table.count <= index) {
- return 0;
- }
- return enc_table.list[index].enc;
-}
-
-int
-mrb_enc_registered(const char *name)
-{
- st_data_t idx = 0;
-
- if (!name) return -1;
- if (!enc_table.list) return -1;
- if (st_lookup(enc_table.names, (st_data_t)name, &idx)) {
- return (int)idx;
- }
- return -1;
-}
-
-mrb_value
-mrb_require_safe(mrb_value fname, int safe)
-{
- mrb_value result = mrb_nil_value();
- return result;
-}
-static int
-load_encoding(const char *name)
-{
- mrb_value enclib;// = mrb_sprintf("enc/%s.so", name);
- //mrb_value verbose;// = ruby_verbose;
- //mrb_value debug;// = ruby_debug;
- //mrb_value loaded;
- char *s = RSTRING_PTR(enclib) + 4, *e = RSTRING_END(enclib) - 3;
- int idx;
-
- while (s < e) {
- if (!ISALNUM(*s)) *s = '_';
- else if (ISUPPER(*s)) *s = TOLOWER(*s);
- ++s;
- }
- OBJ_FREEZE(enclib);
- //ruby_verbose = mrb_false_value();
- //ruby_debug = mrb_false_value();
- //loaded = mrb_protect(require_enc, enclib, 0);
- //ruby_verbose = verbose;
- //ruby_debug = debug;
- //rb_set_errinfo(mrb_nil_value());
- //if (mrb_nil_p(loaded)) return -1;
- if ((idx = mrb_enc_registered(name)) < 0) return -1;
- if (enc_autoload_p(enc_table.list[idx].enc)) return -1;
- return idx;
-}
-
-static int
-enc_autoload(mrb_state *mrb, mrb_encoding *enc)
-{
- int i;
- mrb_encoding *base = enc_table.list[ENC_TO_ENCINDEX(enc)].base;
-
- if (base) {
- i = 0;
- do {
- if (i >= enc_table.count) return -1;
- } while (enc_table.list[i].enc != base && (++i, 1));
- if (enc_autoload_p(base)) {
- if (enc_autoload(mrb, base) < 0) return -1;
- }
- i = ENC_TO_ENCINDEX(enc);
- enc_register_at(mrb, i, mrb_enc_name(enc), base);
- }
- else {
- i = load_encoding(mrb_enc_name(enc));
- }
- return i;
-}
-
-int
-mrb_enc_find_index(mrb_state *mrb, const char *name)
-{
- int i = mrb_enc_registered(name);
- mrb_encoding *enc;
-
- if (i < 0) {
- i = load_encoding(name);
- }
- else if (!(enc = mrb_enc_from_index(mrb, i))) {
- if (i != UNSPECIFIED_ENCODING) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "encoding %s is not registered", name);
- }
- }
- else if (enc_autoload_p(enc)) {
- if (enc_autoload(mrb, enc) < 0) {
- //mrb_warn("failed to load encoding (%s); use ASCII-8BIT instead",
- printf("failed to load encoding (%s); use ASCII-8BIT instead",
- name);
- return 0;
- }
- }
- return i;
-}
-
-mrb_encoding *
-mrb_enc_find(mrb_state *mrb, const char *name)
-{
- int idx = mrb_enc_find_index(mrb, name);
- if (idx < 0) idx = 0;
- return mrb_enc_from_index(mrb, idx);
-}
-
-static inline int
-enc_capable(mrb_value obj)
-{
- if (SPECIAL_CONST_P(obj)) return (mrb_type(obj) == MRB_TT_SYMBOL);
- switch (mrb_type(obj)/*BUILTIN_TYPE(obj)*/) {
- case MRB_TT_STRING:
- case MRB_TT_REGEX:
- case MRB_TT_FILE:
- return TRUE;
- case MRB_TT_DATA:
- if (is_data_encoding(obj)) return TRUE;
- default:
- return FALSE;
- }
-}
-
-mrb_sym
-mrb_id_encoding(mrb_state *mrb)
-{
- //CONST_ID(id_encoding, "encoding");
- id_encoding = mrb_intern(mrb, "encoding");
- return id_encoding;
-}
-
-int
-mrb_enc_get_index(mrb_state *mrb, mrb_value obj)
-{
- int i = -1;
- mrb_value tmp;
- struct RString *ps;
-
- if (SPECIAL_CONST_P(obj)) {
- if (mrb_type(obj) != MRB_TT_SYMBOL) return -1;
- //obj = mrb_id2str(SYM2ID(obj));
- obj = mrb_str_new_cstr(mrb, mrb_sym2name(mrb, SYM2ID(obj)));
- }
- switch (mrb_type(obj)/*BUILTIN_TYPE(obj)*/) {
- as_default:
- default:
- case MRB_TT_STRING:
- case MRB_TT_REGEX:
- i = (int)ENCODING_GET_INLINED(obj);
- ps = mrb_str_ptr(obj);
- if (i == ENCODING_INLINE_MAX) {
- mrb_value iv;
-
- //iv = rb_ivar_get(obj, mrb_id_encoding(mrb));
- iv = mrb_iv_get(mrb, obj, mrb_id_encoding(mrb));
- i = mrb_fixnum(iv);
- }
- break;
-
- case MRB_TT_FILE:
- tmp = mrb_funcall(mrb, obj, "internal_encoding", 0, 0);
- if (mrb_nil_p(tmp)) obj = mrb_funcall(mrb, obj, "external_encoding", 0, 0);
- else obj = tmp;
- if (mrb_nil_p(obj)) break;
- case MRB_TT_DATA:
- if (is_data_encoding(obj)) {
- i = enc_check_encoding(mrb, obj);
- }
- else {
- goto as_default;
- }
- break;
- }
- return i;
-}
-
-void
-mrb_enc_set_index(mrb_state *mrb, mrb_value obj, int idx)
-{
- if (idx < ENCODING_INLINE_MAX) {
- ENCODING_SET_INLINED(obj, idx);
- return;
- }
- ENCODING_SET_INLINED(obj, ENCODING_INLINE_MAX);
- //mrb_ivar_set(obj, mrb_id_encoding(mrb), INT2NUM(idx));
- mrb_iv_set(mrb, obj, mrb_id_encoding(mrb), mrb_fixnum_value(idx));
- return;
-}
-
-mrb_value
-mrb_enc_associate_index(mrb_state *mrb, mrb_value obj, int idx)
-{
-/* enc_check_capable(obj);*/
- if (mrb_enc_get_index(mrb, obj) == idx)
- return obj;
- if (SPECIAL_CONST_P(obj)) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "cannot set encoding");
- }
- if (!ENC_CODERANGE_ASCIIONLY(obj) ||
- !mrb_enc_asciicompat(mrb, mrb_enc_from_index(mrb, idx))) {
- ENC_CODERANGE_CLEAR(obj);
- }
- mrb_enc_set_index(mrb, obj, idx);
- return obj;
-}
-
-mrb_value
-mrb_enc_associate(mrb_state *mrb, mrb_value obj, mrb_encoding *enc)
-{
- return mrb_enc_associate_index(mrb, obj, mrb_enc_to_index(enc));
-}
-
-mrb_encoding*
-mrb_enc_get(mrb_state *mrb, mrb_value obj)
-{
- return mrb_enc_from_index(mrb, mrb_enc_get_index(mrb, obj));
-}
-
-mrb_encoding*
-mrb_enc_check(mrb_state *mrb, mrb_value str1, mrb_value str2)
-{
- mrb_encoding *enc = mrb_enc_compatible(mrb, str1, str2);
- if (!enc)
- mrb_raise(mrb, E_ENCODING_ERROR, "incompatible character encodings: %s and %s",
- mrb_enc_name(mrb_enc_get(mrb, str1)),
- mrb_enc_name(mrb_enc_get(mrb, str2)));
- return enc;
-}
-
-mrb_encoding*
-mrb_enc_compatible(mrb_state *mrb, mrb_value str1, mrb_value str2)
-{
- int idx1, idx2;
- mrb_encoding *enc1, *enc2;
-
- idx1 = mrb_enc_get_index(mrb, str1);
- idx2 = mrb_enc_get_index(mrb, str2);
-
- if (idx1 < 0 || idx2 < 0)
- return 0;
-
- if (idx1 == idx2) {
- return mrb_enc_from_index(mrb, idx1);
- }
- enc1 = mrb_enc_from_index(mrb, idx1);
- enc2 = mrb_enc_from_index(mrb, idx2);
-
- if (mrb_type(str2) == MRB_TT_STRING && RSTRING_LEN(str2) == 0)
- //return (idx1 == ENCINDEX_US_ASCII && mrb_enc_asciicompat(mrb, enc2)) ? enc2 : enc1;
- return enc1;
- if (mrb_type(str1) == MRB_TT_STRING && RSTRING_LEN(str1) == 0)
- //return (idx2 == ENCINDEX_US_ASCII && mrb_enc_asciicompat(mrb, enc1)) ? enc1 : enc2;
- return enc2;
- if (!mrb_enc_asciicompat(mrb, enc1) || !mrb_enc_asciicompat(mrb, enc2)) {
- return 0;
- }
-
- /* objects whose encoding is the same of contents */
- //if (mrb_type(str2)/*BUILTIN_TYPE(str2)*/ != MRB_TT_STRING && idx2 == ENCINDEX_US_ASCII)
- //return enc1;
- //if (mrb_type(str1)/*BUILTIN_TYPE(str1)*/ != MRB_TT_STRING && idx1 == ENCINDEX_US_ASCII)
- //return enc2;
-
- if (mrb_type(str1)/*BUILTIN_TYPE(str1)*/ != MRB_TT_STRING) {
- mrb_value tmp = str1;
- int idx0 = idx1;
- str1 = str2;
- str2 = tmp;
- idx1 = idx2;
- idx2 = idx0;
- }
- if (mrb_type(str1)/*BUILTIN_TYPE(str1)*/ == MRB_TT_STRING) {
- int cr1, cr2;
-
- cr1 = mrb_enc_str_coderange(mrb, str1);
- if (mrb_type(str2)/*BUILTIN_TYPE(str2)*/ == MRB_TT_STRING) {
- cr2 = mrb_enc_str_coderange(mrb, str2);
- if (cr1 != cr2) {
- /* may need to handle ENC_CODERANGE_BROKEN */
- if (cr1 == ENC_CODERANGE_7BIT) return enc2;
- if (cr2 == ENC_CODERANGE_7BIT) return enc1;
- }
- if (cr2 == ENC_CODERANGE_7BIT) {
- if (idx1 == ENCINDEX_ASCII) return enc2;
- return enc1;
- }
- }
- if (cr1 == ENC_CODERANGE_7BIT)
- return enc2;
- }
- return 0;
-}
-
-void
-mrb_enc_copy(mrb_state *mrb, mrb_value obj1, mrb_value obj2)
-{
- mrb_enc_associate_index(mrb, obj1, mrb_enc_get_index(mrb, obj2));
-}
-
-
-/*
- * call-seq:
- * obj.encoding -> encoding
- *
- * Returns the Encoding object that represents the encoding of obj.
- */
-
-mrb_value
-mrb_obj_encoding(mrb_state *mrb, mrb_value obj)
-{
- mrb_encoding *enc = mrb_enc_get(mrb, obj);
- if (!enc) {
- mrb_raise(mrb, E_TYPE_ERROR, "unknown encoding");
- }
- return mrb_enc_from_encoding(mrb, enc);
-}
-
-int
-mrb_enc_fast_mbclen(const char *p, const char *e, mrb_encoding *enc)
-{
- return ONIGENC_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
-}
-
-int
-mrb_enc_mbclen(const char *p, const char *e, mrb_encoding *enc)
-{
- int n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
- if (MBCLEN_CHARFOUND_P(n) && MBCLEN_CHARFOUND_LEN(n) <= e-p)
- return MBCLEN_CHARFOUND_LEN(n);
- else {
- int min = mrb_enc_mbminlen(enc);
- return min <= e-p ? min : (int)(e-p);
- }
-}
-
-int
-mrb_enc_precise_mbclen(const char *p, const char *e, mrb_encoding *enc)
-{
- int n;
- if (e <= p)
- return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(1);
- n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
- if (e-p < n)
- return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n-(int)(e-p));
- return n;
-}
-
-int
-mrb_enc_ascget(mrb_state *mrb, const char *p, const char *e, int *len, mrb_encoding *enc)
-{
- unsigned int c, l;
- if (e <= p)
- return -1;
- if (mrb_enc_asciicompat(mrb, enc)) {
- c = (unsigned char)*p;
- if (!ISASCII(c))
- return -1;
- if (len) *len = 1;
- return c;
- }
- l = mrb_enc_precise_mbclen(p, e, enc);
- if (!MBCLEN_CHARFOUND_P(l))
- return -1;
- c = mrb_enc_mbc_to_codepoint(p, e, enc);
- if (!mrb_enc_isascii(c, enc))
- return -1;
- if (len) *len = l;
- return c;
-}
-
-unsigned int
-mrb_enc_codepoint_len(mrb_state *mrb, const char *p, const char *e, int *len_p, mrb_encoding *enc)
-{
- int r;
- if (e <= p)
- mrb_raise(mrb, E_ARGUMENT_ERROR, "empty string");
- r = mrb_enc_precise_mbclen(p, e, enc);
- if (MBCLEN_CHARFOUND_P(r)) {
- if (len_p) *len_p = MBCLEN_CHARFOUND_LEN(r);
- return mrb_enc_mbc_to_codepoint(p, e, enc);
- }
- else
- mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid byte sequence in %s", mrb_enc_name(enc));
- return 0;
-}
-
-#undef mrb_enc_codepoint
-unsigned int
-mrb_enc_codepoint(mrb_state *mrb, const char *p, const char *e, mrb_encoding *enc)
-{
- return mrb_enc_codepoint_len(mrb, p, e, 0, enc);
-}
-
-int
-mrb_enc_codelen(mrb_state *mrb, int c, mrb_encoding *enc)
-{
- int n = ONIGENC_CODE_TO_MBCLEN(enc,c);
- if (n == 0) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid codepoint 0x%x in %s", c, mrb_enc_name(enc));
- }
- return n;
-}
-
-int
-mrb_enc_toupper(int c, mrb_encoding *enc)
-{
- return (ONIGENC_IS_ASCII_CODE(c)?ONIGENC_ASCII_CODE_TO_UPPER_CASE(c):(c));
-}
-
-int
-mrb_enc_tolower(int c, mrb_encoding *enc)
-{
- return (ONIGENC_IS_ASCII_CODE(c)?ONIGENC_ASCII_CODE_TO_LOWER_CASE(c):(c));
-}
-
-/* 15.2.40.2.14 */
-/*
- * call-seq:
- * enc.inspect -> string
- *
- * Returns a string which represents the encoding for programmers.
- *
- * Encoding::UTF_8.inspect #=> "#<Encoding:UTF-8>"
- * Encoding::ISO_2022_JP.inspect #=> "#<Encoding:ISO-2022-JP (dummy)>"
- */
-static mrb_value
-enc_inspect(mrb_state *mrb, mrb_value self)
-{
- mrb_value str;
- //mrb_value str = mrb_sprintf("#<%s:%s%s>", mrb_obj_classname(mrb, self),
- // mrb_enc_name((mrb_encoding*)(DATA_PTR(self))),
- // (mrb_fixnum(enc_dummy_p(mrb, self)) ? " (dummy)" : ""));
- char buf[256];
- sprintf(buf, "#<%s:%s%s>", mrb_obj_classname(mrb, self),
- mrb_enc_name((mrb_encoding*)(DATA_PTR(self))),
- (mrb_enc_dummy_p((mrb_encoding*)(DATA_PTR(self))) ? " (dummy)" : ""));
- str = mrb_str_new(mrb, buf, strlen(buf));
- ENCODING_CODERANGE_SET(mrb, str, mrb_usascii_encindex(), ENC_CODERANGE_7BIT);
- return str;
-}
-
-/* 15.2.40.2.15 */
-/* 15.2.40.2.18 */
-/*
- * call-seq:
- * enc.name -> string
- *
- * Returns the name of the encoding.
- *
- * Encoding::UTF_8.name #=> "UTF-8"
- */
-static mrb_value
-enc_name(mrb_state *mrb, mrb_value self)
-{
- return mrb_usascii_str_new2(mrb, mrb_enc_name((mrb_encoding*)DATA_PTR(self)));
-}
-
-struct fn_arg {
- mrb_state *mrb;
- enum st_retval (*func)(ANYARGS);
- void *a;
-};
-
-static enum st_retval
-fn_i(st_data_t key, st_data_t val, st_data_t arg) {
- struct fn_arg *a = (struct fn_arg*)arg;
-
- return (*a->func)(a->mrb, key, val, a->a);
-}
-
-static int
-st_foreachNew(mrb_state *mrb, st_table *tbl, enum st_retval (*func)(ANYARGS), void *a)
-{
- struct fn_arg arg = {
- mrb,
- func,
- a,
- };
-
- return st_foreach(tbl, fn_i, (st_data_t)&arg);
-}
-
-static enum st_retval
-enc_names_i(mrb_state *mrb, st_data_t name, st_data_t idx, st_data_t args)
-{
- mrb_value *arg = (mrb_value*)args;
- int iargs = mrb_fixnum(arg[0]);
- //if ((int)idx == (int)arg[0]) {
- if ((int)idx == iargs) {
- mrb_value str = mrb_usascii_str_new2(mrb, (char*)name);
- //OBJ_FREEZE(str);
- mrb_ary_push(mrb, arg[1], str);
- }
- return ST_CONTINUE;
-}
-
-/* 15.2.40.2.16 */
-/*
- * call-seq:
- * enc.names -> array
- *
- * Returns the list of name and aliases of the encoding.
- *
- * Encoding::WINDOWS_31J.names #=> ["Windows-31J", "CP932", "csWindows31J"]
- */
-static mrb_value
-enc_names(mrb_state *mrb, mrb_value self)
-{
- mrb_value args[2];
-
- args[0] = mrb_fixnum_value(mrb_to_encoding_index(mrb, self));
- args[1] = mrb_ary_new_capa(mrb, 0);//mrb_ary_new2(0);
- st_foreachNew(mrb, enc_table.names, enc_names_i, args);
- return args[1];
-}
-
-/* 15.2.40.2.8 */
-/*
- * call-seq:
- * Encoding.list -> [enc1, enc2, ...]
- *
- * Returns the list of loaded encodings.
- *
- * Encoding.list
- * #=> [#<Encoding:ASCII-8BIT>, #<Encoding:UTF-8>,
- * #<Encoding:ISO-2022-JP (dummy)>]
- *
- * Encoding.find("US-ASCII")
- * #=> #<Encoding:US-ASCII>
- *
- * Encoding.list
- * #=> [#<Encoding:ASCII-8BIT>, #<Encoding:UTF-8>,
- * #<Encoding:US-ASCII>, #<Encoding:ISO-2022-JP (dummy)>]
- *
- */
-static mrb_value
-enc_list(mrb_state *mrb, mrb_value klass)
-{
- struct RArray *ar = (struct RArray*)mrb_encoding_list.value.p;
- mrb_value ary = mrb_ary_new_capa(mrb, 0);//mrb_ary_new2(0);
- //mrb_ary_replace_m(mrb, ary/*, mmrb_encoding_list*/);
- mrb_ary_replace(mrb, mrb_ary_ptr(ary), ar->buf, enc_table.count);
- return ary;
-}
-
-/* 15.2.40.2.7 */
-/*
- * call-seq:
- * Encoding.find(string) -> enc
- * Encoding.find(symbol) -> enc
- *
- * Search the encoding with specified <i>name</i>.
- * <i>name</i> should be a string or symbol.
- *
- * Encoding.find("US-ASCII") #=> #<Encoding:US-ASCII>
- * Encoding.find(:Shift_JIS) #=> #<Encoding:Shift_JIS>
- *
- * Names which this method accept are encoding names and aliases
- * including following special aliases
- *
- * "external":: default external encoding
- * "internal":: default internal encoding
- * "locale":: locale encoding
- * "filesystem":: filesystem encoding
- *
- * An ArgumentError is raised when no encoding with <i>name</i>.
- * Only <code>Encoding.find("internal")</code> however returns nil
- * when no encoding named "internal", in other words, when Ruby has no
- * default internal encoding.
- */
-static mrb_value
-enc_find(mrb_state *mrb, mrb_value klass)
-{
- mrb_value enc;
-
- mrb_get_args(mrb, "o", &enc);
- return mrb_enc_from_encoding(mrb, to_encoding(mrb, enc));
-}
-
-/* 15.2.40.2.2 */
-/*
- * call-seq:
- * Encoding.compatible?(str1, str2) -> enc or nil
- *
- * Checks the compatibility of two strings.
- * If they are compatible, means concatenatable,
- * returns an encoding which the concatenated string will be.
- * If they are not compatible, nil is returned.
- *
- * Encoding.compatible?("\xa1".force_encoding("iso-8859-1"), "b")
- * #=> #<Encoding:ISO-8859-1>
- *
- * Encoding.compatible?(
- * "\xa1".force_encoding("iso-8859-1"),
- * "\xa1\xa1".force_encoding("euc-jp"))
- * #=> nil
- *
- */
-static mrb_value
-enc_compatible_p(mrb_state *mrb, mrb_value klass)
-{
- mrb_value str1;
- mrb_value str2;
- mrb_encoding *enc;
-
- mrb_get_args(mrb, "oo", &str1, &str2);
- if (!enc_capable(str1)) return mrb_nil_value();
- if (!enc_capable(str2)) return mrb_nil_value();
- enc = mrb_enc_compatible(mrb, str1, str2);
- if (!enc) return mrb_nil_value();
- return mrb_enc_from_encoding(mrb, enc);
-}
-
-/* 15.2.40.2.19 */
-/* :nodoc: */
-static mrb_value
-enc_dump(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value self)
-{
- //mrb_scan_args(argc, argv, "01", 0);
- return enc_name(mrb, self);
-}
-
-/* 15.2.40.2.11 */
-/* :nodoc: */
-static mrb_value
-enc_load(mrb_state *mrb, mrb_value klass)
-{
- mrb_value str;
-
- mrb_get_args(mrb, "o", &str);
- return enc_find(mrb, str);
-}
-
-mrb_encoding *
-mrb_ascii8bit_encoding(mrb_state *mrb)
-{
- if (!enc_table.list) {
- mrb_enc_init(mrb);
- }
- return enc_table.list[ENCINDEX_ASCII].enc;
-}
-
-int
-mrb_ascii8bit_encindex(void)
-{
- return ENCINDEX_ASCII;
-}
-
-mrb_encoding *
-mrb_utf8_encoding(mrb_state *mrb)
-{
- if (!enc_table.list) {
- mrb_enc_init(mrb);
- }
- return enc_table.list[ENCINDEX_UTF_8].enc;
-}
-
-int
-mrb_utf8_encindex(void)
-{
- return ENCINDEX_UTF_8;
-}
-
-mrb_encoding *
-mrb_usascii_encoding(mrb_state *mrb)
-{
- if (!enc_table.list) {
- mrb_enc_init(mrb);
- }
- return enc_table.list[ENCINDEX_US_ASCII].enc;
-}
-
-int
-mrb_usascii_encindex(void)
-{
- return ENCINDEX_US_ASCII;
-}
-
-int
-mrb_locale_encindex(mrb_state *mrb)
-{
- mrb_value charmap = mrb_locale_charmap(mrb, mrb_obj_value(ENCODE_CLASS));
- int idx;
-
- if (mrb_nil_p(charmap))
- idx = mrb_usascii_encindex();
- //else if ((idx = mrb_enc_find_index(StringValueCStr(charmap))) < 0)
- else if ((idx = mrb_enc_find_index(mrb, mrb_string_value_cstr(mrb, &charmap))) < 0)
- idx = mrb_ascii8bit_encindex();
-
- if (mrb_enc_registered("locale") < 0) enc_alias_internal("locale", idx);
-
- return idx;
-}
-
-mrb_encoding *
-mrb_locale_encoding(mrb_state *mrb)
-{
- return mrb_enc_from_index(mrb, mrb_locale_encindex(mrb));
-}
-
-static int
-enc_set_filesystem_encoding(mrb_state *mrb)
-{
- int idx;
-#if defined NO_LOCALE_CHARMAP
- idx = mrb_enc_to_index(mrb_default_external_encoding(mrb));
-#elif defined _WIN32 || defined __CYGWIN__
- char cp[sizeof(int) * 8 / 3 + 4];
- //snprintf(cp, sizeof cp, "CP%d", AreFileApisANSI() ? GetACP() : GetOEMCP());
- idx = mrb_enc_find_index(mrb, cp);
- if (idx < 0) idx = mrb_ascii8bit_encindex();
-#else
- idx = mrb_enc_to_index(mrb_default_external_encoding(mrb));
-#endif
-
- enc_alias_internal("filesystem", idx);
- return idx;
-}
-
-int
-mrb_filesystem_encindex(void)
-{
- int idx = mrb_enc_registered("filesystem");
- if (idx < 0)
- idx = mrb_ascii8bit_encindex();
- return idx;
-}
-
-mrb_encoding *
-mrb_filesystem_encoding(mrb_state *mrb)
-{
- return mrb_enc_from_index(mrb, mrb_filesystem_encindex());
-}
-
-struct default_encoding {
- int index; /* -2 => not yet set, -1 => nil */
- mrb_encoding *enc;
-};
-
-static struct default_encoding default_external = {0};
-
-static int
-enc_set_default_encoding(mrb_state *mrb, struct default_encoding *def, mrb_value encoding, const char *name)
-{
- int overridden = FALSE;
-
- if (def->index != -2)
- /* Already set */
- overridden = TRUE;
-
- if (mrb_nil_p(encoding)) {
- def->index = -1;
- def->enc = 0;
- st_insert(enc_table.names, (st_data_t)strdup(name),
- (st_data_t)UNSPECIFIED_ENCODING);
- }
- else {
- def->index = mrb_enc_to_index(mrb_to_encoding(mrb, encoding));
- def->enc = 0;
- enc_alias_internal(name, def->index);
- }
-
- if (def == &default_external)
- enc_set_filesystem_encoding(mrb);
-
- return overridden;
-}
-
-mrb_encoding *
-mrb_default_external_encoding(mrb_state *mrb)
-{
- if (default_external.enc) return default_external.enc;
-
- if (default_external.index >= 0) {
- default_external.enc = mrb_enc_from_index(mrb, default_external.index);
- return default_external.enc;
- }
- else {
- return mrb_locale_encoding(mrb);
- }
-}
-
-mrb_value
-mrb_enc_default_external(mrb_state *mrb)
-{
- return mrb_enc_from_encoding(mrb, mrb_default_external_encoding(mrb));
-}
-
-/* 15.2.40.2.3 */
-/*
- * call-seq:
- * Encoding.default_external -> enc
- *
- * Returns default external encoding.
- *
- * It is initialized by the locale or -E option.
- */
-static mrb_value
-get_default_external(mrb_state *mrb, mrb_value klass)
-{
- return mrb_enc_default_external(mrb);
-}
-
-void
-mrb_enc_set_default_external(mrb_state *mrb, mrb_value encoding)
-{
- if (mrb_nil_p(encoding)) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "default external can not be nil");
- }
- enc_set_default_encoding(mrb, &default_external, encoding,
- "external");
-}
-
-/* 15.2.40.2.4 */
-/*
- * call-seq:
- * Encoding.default_external = enc
- *
- * Sets default external encoding.
- */
-static mrb_value
-set_default_external(mrb_state *mrb, mrb_value klass)
-{
- mrb_value encoding;
-
- mrb_get_args(mrb, "o", &encoding);
- mrb_warning("setting Encoding.default_external");
- mrb_enc_set_default_external(mrb, encoding);
- return encoding;
-}
-
-static struct default_encoding default_internal = {-2};
-
-mrb_encoding *
-mrb_default_internal_encoding(mrb_state *mrb)
-{
- if (!default_internal.enc && default_internal.index >= 0) {
- default_internal.enc = mrb_enc_from_index(mrb, default_internal.index);
- }
- return default_internal.enc; /* can be NULL */
-}
-
-mrb_value
-mrb_enc_default_internal(mrb_state *mrb)
-{
- /* Note: These functions cope with default_internal not being set */
- return mrb_enc_from_encoding(mrb, mrb_default_internal_encoding(mrb));
-}
-
-/* 15.2.40.2.5 */
-/*
- * call-seq:
- * Encoding.default_internal -> enc
- *
- * Returns default internal encoding.
- *
- * It is initialized by the source internal_encoding or -E option.
- */
-static mrb_value
-get_default_internal(mrb_state *mrb, mrb_value klass)
-{
- return mrb_enc_default_internal(mrb);
-}
-
-void
-mrb_enc_set_default_internal(mrb_state *mrb, mrb_value encoding)
-{
- enc_set_default_encoding(mrb, &default_internal, encoding,
- "internal");
-}
-
-/* 15.2.40.2.6 */
-/*
- * call-seq:
- * Encoding.default_internal = enc or nil
- *
- * Sets default internal encoding.
- * Or removes default internal encoding when passed nil.
- */
-static mrb_value
-set_default_internal(mrb_state *mrb, mrb_value klass)
-{
- mrb_value encoding;
-
- mrb_get_args(mrb, "o", &encoding);
- mrb_warning("setting Encoding.default_internal");
- mrb_enc_set_default_internal(mrb, encoding);
- return encoding;
-}
-
-#define digit(x) ((x) >= '0' && (x) <= '9')
-#ifndef _MSC_VER
-#define strstart(s, n) (strncasecmp(s, n, strlen(n)) == 0)
-#else
-#define strstart(s, n) (_stricmp(s, n) == 0)
-#endif
-#define C_CODESET "US-ASCII" /* Return this as the encoding of the
- * C/POSIX locale. Could as well one day
- * become "UTF-8". */
-#if defined _WIN32 || defined __CYGWIN__
-#define JA_CODESET "Windows-31J"
-#else
-#define JA_CODESET "EUC-JP"
-#endif
-
-static char buf[16];
-
-const char *
-nl_langinfo_codeset(void)
-{
- const char *l, *p;
- int n;
-
- if (((l = getenv("LC_ALL")) && *l) ||
- ((l = getenv("LC_CTYPE")) && *l) ||
- ((l = getenv("LANG")) && *l)) {
- /* check standardized locales */
- if (!strcmp(l, "C") || !strcmp(l, "POSIX"))
- return C_CODESET;
- /* check for encoding name fragment */
- p = strchr(l, '.');
- if (!p++) p = l;
- if (strstart(p, "UTF"))
- return "UTF-8";
- if ((n = 5, strstart(p, "8859-")) || (n = 9, strstart(p, "ISO-8859-"))) {
- if (digit(p[n])) {
- p += n;
- memcpy(buf, "ISO-8859-\0\0", 12);
- buf[9] = *p++;
- if (digit(*p)) buf[10] = *p++;
- return buf;
- }
- }
- if (strstart(p, "KOI8-R")) return "KOI8-R";
- if (strstart(p, "KOI8-U")) return "KOI8-U";
- if (strstart(p, "620")) return "TIS-620";
- if (strstart(p, "2312")) return "GB2312";
- if (strstart(p, "HKSCS")) return "Big5HKSCS"; /* no MIME charset */
- if (strstart(p, "BIG5")) return "Big5";
- if (strstart(p, "GBK")) return "GBK"; /* no MIME charset */
- if (strstart(p, "18030")) return "GB18030"; /* no MIME charset */
- if (strstart(p, "Shift_JIS") || strstart(p, "SJIS")) return "Windows-31J";
- /* check for conclusive modifier */
- if (strstart(p, "euro")) return "ISO-8859-15";
- /* check for language (and perhaps country) codes */
- if (strstart(l, "zh_TW")) return "Big5";
- if (strstart(l, "zh_HK")) return "Big5HKSCS"; /* no MIME charset */
- if (strstart(l, "zh")) return "GB2312";
- if (strstart(l, "ja")) return JA_CODESET;
- if (strstart(l, "ko")) return "EUC-KR";
- if (strstart(l, "ru")) return "KOI8-R";
- if (strstart(l, "uk")) return "KOI8-U";
- if (strstart(l, "pl") || strstart(l, "hr") ||
- strstart(l, "hu") || strstart(l, "cs") ||
- strstart(l, "sk") || strstart(l, "sl")) return "ISO-8859-2";
- if (strstart(l, "eo") || strstart(l, "mt")) return "ISO-8859-3";
- if (strstart(l, "el")) return "ISO-8859-7";
- if (strstart(l, "he")) return "ISO-8859-8";
- if (strstart(l, "tr")) return "ISO-8859-9";
- if (strstart(l, "th")) return "TIS-620"; /* or ISO-8859-11 */
- if (strstart(l, "lt")) return "ISO-8859-13";
- if (strstart(l, "cy")) return "ISO-8859-14";
- if (strstart(l, "ro")) return "ISO-8859-2"; /* or ISO-8859-16 */
- if (strstart(l, "am") || strstart(l, "vi")) return "UTF-8";
- /* Send me further rules if you like, but don't forget that we are
- * *only* interested in locale naming conventions on platforms
- * that do not already provide an nl_langinfo(CODESET) implementation. */
- }
- return NULL;
-}
-
-/* 15.2.40.2.9 */
-/*
- * call-seq:
- * Encoding.locale_charmap -> string
- *
- * Returns the locale charmap name.
- *
- * Debian GNU/Linux
- * LANG=C
- * Encoding.locale_charmap #=> "ANSI_X3.4-1968"
- * LANG=ja_JP.EUC-JP
- * Encoding.locale_charmap #=> "EUC-JP"
- *
- * SunOS 5
- * LANG=C
- * Encoding.locale_charmap #=> "646"
- * LANG=ja
- * Encoding.locale_charmap #=> "eucJP"
- *
- * The result is highly platform dependent.
- * So Encoding.find(Encoding.locale_charmap) may cause an error.
- * If you need some encoding object even for unknown locale,
- * Encoding.find("locale") can be used.
- *
- */
-mrb_value
-mrb_locale_charmap(mrb_state *mrb, mrb_value klass)
-{
-#if defined NO_LOCALE_CHARMAP
- return mrb_usascii_str_new2(mrb, "ASCII-8BIT");
-#elif defined _WIN32 || defined __CYGWIN__
- const char *nl_langinfo_codeset(void);
- const char *codeset = nl_langinfo_codeset();
- char cp[sizeof(int) * 3 + 4];
- if (!codeset) {
- //snprintf(cp, sizeof(cp), "CP%d", GetConsoleCP());
- codeset = cp;
- }
- return mrb_usascii_str_new2(mrb, codeset);
-#elif defined HAVE_LANGINFO_H
- char *codeset;
- codeset = nl_langinfo(CODESET);
- return mrb_usascii_str_new2(mrb, codeset);
-#else
- return mrb_nil_value();
-#endif
-}
-static void
-set_encoding_const(mrb_state *mrb, const char *name, mrb_encoding *enc)
-{
- mrb_value encoding = mrb_enc_from_encoding(mrb, enc);
- char *s = (char*)name;
- int haslower = 0, hasupper = 0, valid = 0;
-
- if (ISDIGIT(*s)) return;
- if (ISUPPER(*s)) {
- hasupper = 1;
- while (*++s && (ISALNUM(*s) || *s == '_')) {
- if (ISLOWER(*s)) haslower = 1;
- }
- }
- if (!*s) {
- if (s - name > ENCODING_NAMELEN_MAX) return;
- valid = 1;
- //mrb_define_const(mrb_cEncoding, name, encoding);
- mrb_define_const(mrb, ENCODE_CLASS, name, encoding);
- }
- if (!valid || haslower) {
- size_t len = s - name;
- if (len > ENCODING_NAMELEN_MAX) return;
- if (!haslower || !hasupper) {
- do {
- if (ISLOWER(*s)) haslower = 1;
- if (ISUPPER(*s)) hasupper = 1;
- } while (*++s && (!haslower || !hasupper));
- len = s - name;
- }
- len += strlen(s);
- if (len++ > ENCODING_NAMELEN_MAX) return;
- //MEMCPY(s = ALLOCA_N(char, len), name, char, len);
- memcpy(s = mrb_malloc(mrb, len), name, len);
- name = s;
- if (!valid) {
- if (ISLOWER(*s)) *s = ONIGENC_ASCII_CODE_TO_UPPER_CASE((int)*s);
- for (; *s; ++s) {
- if (!ISALNUM(*s)) *s = '_';
- }
- if (hasupper) {
- mrb_define_const(mrb, ENCODE_CLASS, name, encoding);
- }
- }
- if (haslower) {
- for (s = (char*)name; *s; ++s) {
- if (ISLOWER(*s)) *s = ONIGENC_ASCII_CODE_TO_UPPER_CASE((int)*s);
- }
- mrb_define_const(mrb, ENCODE_CLASS, name, encoding);
- }
- }
-}
-static enum st_retval
-mrb_enc_name_list_i(mrb_state *mrb, st_data_t name, st_data_t idx, mrb_value *arg)
-{
- mrb_value ary = *arg;
- mrb_value str = mrb_usascii_str_new2(mrb, (char*)name);
- //OBJ_FREEZE(str);
- mrb_ary_push(mrb, ary, str);
- return ST_CONTINUE;
-}
-
-/* 15.2.40.2.10 */
-/*
- * call-seq:
- * Encoding.name_list -> ["enc1", "enc2", ...]
- *
- * Returns the list of available encoding names.
- *
- * Encoding.name_list
- * #=> ["US-ASCII", "ASCII-8BIT", "UTF-8",
- * "ISO-8859-1", "Shift_JIS", "EUC-JP",
- * "Windows-31J",
- * "BINARY", "CP932", "eucJP"]
- *
- */
-
-static mrb_value
-mrb_enc_name_list(mrb_state *mrb, mrb_value klass)
-{
- mrb_value ary = mrb_ary_new_capa(mrb, enc_table.names->num_entries);//mrb_ary_new2(enc_table.names->num_entries);
- st_foreachNew(mrb, enc_table.names, mrb_enc_name_list_i, &ary);
- return ary;
-}
-
-static enum st_retval
-mrb_enc_aliases_enc_i(mrb_state *mrb, st_data_t name, st_data_t orig, st_data_t arg)
-{
- mrb_value *p = (mrb_value*)arg;
- mrb_value aliases = p[0], ary = p[1];
- int idx = (int)orig;
- mrb_value key, str = mrb_ary_ref(mrb, ary, idx);//mrb_ary_entry(ary, idx);
-
- if (mrb_nil_p(str)) {
- mrb_encoding *enc = mrb_enc_from_index(mrb, idx);
-
- if (!enc) return ST_CONTINUE;
- if (STRCASECMP((char*)name, mrb_enc_name(enc)) == 0) {
- return ST_CONTINUE;
- }
- str = mrb_usascii_str_new2(mrb, mrb_enc_name(enc));
- OBJ_FREEZE(str);
- mrb_ary_set(mrb, ary, idx, str);//rb_ary_store(ary, idx, str);
- }
- key = mrb_usascii_str_new2(mrb, (char*)name);
- OBJ_FREEZE(key);
- mrb_hash_set(mrb, aliases, key, str);
- return ST_CONTINUE;
-}
-
-/* 15.2.40.2.1 */
-/*
- * call-seq:
- * Encoding.aliases -> {"alias1" => "orig1", "alias2" => "orig2", ...}
- *
- * Returns the hash of available encoding alias and original encoding name.
- *
- * Encoding.aliases
- * #=> {"BINARY"=>"ASCII-8BIT", "ASCII"=>"US-ASCII", "ANSI_X3.4-1986"=>"US-ASCII",
- * "SJIS"=>"Shift_JIS", "eucJP"=>"EUC-JP", "CP932"=>"Windows-31J"}
- *
- */
-
-static mrb_value
-mrb_enc_aliases(mrb_state *mrb, mrb_value klass)
-{
- mrb_value aliases[2];
- aliases[0] = mrb_hash_new_capa(mrb, 0);
- aliases[1] = mrb_ary_new(mrb);
- st_foreachNew(mrb, enc_table.names, mrb_enc_aliases_enc_i, aliases);
- return aliases[0];
-}
-
-void
-mrb_init_encoding(mrb_state *mrb)
-{
-#undef mrb_intern
-#define mrb_intern(str) mrb_intern_const(str)
- mrb_value list;
- int i;
- struct RClass *s;
-
- s = mrb_define_class(mrb, "Encoding", mrb->object_class);
- //mrb_undef_alloc_func(mrb_cEncoding);
- //mrb_undef_method(CLASS_OF(mrb_cEncoding), "new");
- mrb_define_class_method(mrb, s, "aliases", mrb_enc_aliases, ARGS_NONE()); /* 15.2.40.2.1 */
- mrb_define_class_method(mrb, s, "compatible?", enc_compatible_p, ARGS_REQ(2)); /* 15.2.40.2.2 */
- mrb_define_class_method(mrb, s, "default_external", get_default_external, ARGS_NONE()); /* 15.2.40.2.3 */
- mrb_define_class_method(mrb, s, "default_external=", set_default_external, ARGS_REQ(1)); /* 15.2.40.2.4 */
- mrb_define_class_method(mrb, s, "default_internal", get_default_internal, ARGS_NONE()); /* 15.2.40.2.5 */
- mrb_define_class_method(mrb, s, "default_internal=", set_default_internal, ARGS_REQ(1)); /* 15.2.40.2.6 */
- mrb_define_class_method(mrb, s, "find", enc_find, ARGS_REQ(1)); /* 15.2.40.2.7 */
- mrb_define_class_method(mrb, s, "list", enc_list, ARGS_NONE()); /* 15.2.40.2.8 */
- mrb_define_class_method(mrb, s, "locale_charmap", mrb_locale_charmap, ARGS_NONE()); /* 15.2.40.2.9 */
- mrb_define_class_method(mrb, s, "name_list", mrb_enc_name_list, ARGS_NONE()); /* 15.2.40.2.10 */
- mrb_define_class_method(mrb, s, "_load", enc_load, ARGS_REQ(1)); /* 15.2.40.2.11 */
- mrb_define_method(mrb, s, "ascii_compatible?", enc_ascii_compatible_p, ARGS_NONE()); /* 15.2.40.2.12 */
- mrb_define_method(mrb, s, "dummy?", enc_dummy_p, ARGS_NONE()); /* 15.2.40.2.13 */
- mrb_define_method(mrb, s, "inspect", enc_inspect, ARGS_NONE()); /* 15.2.40.2.14 */
- mrb_define_method(mrb, s, "name", enc_name, ARGS_NONE()); /* 15.2.40.2.15 */
- mrb_define_method(mrb, s, "names", enc_names, ARGS_NONE()); /* 15.2.40.2.16 */
- mrb_define_method(mrb, s, "replicate", enc_replicate, ARGS_REQ(1)); /* 15.2.40.2.17 */
- mrb_define_method(mrb, s, "to_s", enc_name, ARGS_NONE()); /* 15.2.40.2.18 */
- mrb_define_method(mrb, s, "_dump", enc_dump, ARGS_ANY()); /* 15.2.40.2.19 */
-
-/* add kusuda --> */
- if (!enc_table.list) {
- mrb_enc_init(mrb);
- }
-/* add kusuda --< */
- list = mrb_ary_new_capa(mrb, enc_table.count);//mrb_ary_new2(enc_table.count);
- RBASIC(list)->c = 0;
- mrb_encoding_list = list;
- //mrb_gc_register_mark_object(list);
-
- for (i = 0; i < enc_table.count; ++i) {
- mrb_ary_push(mrb, list, enc_new(mrb, enc_table.list[i].enc));
- }
-}
-
-/* locale insensitive functions */
-
-#define ctype_test(c, ctype) \
- (mrb_isascii(c) && ONIGENC_IS_ASCII_CODE_CTYPE((c), ctype))
-
-int mrb_isalnum(int c) { return ctype_test(c, ONIGENC_CTYPE_ALNUM); }
-int mrb_isalpha(int c) { return ctype_test(c, ONIGENC_CTYPE_ALPHA); }
-int mrb_isblank(int c) { return ctype_test(c, ONIGENC_CTYPE_BLANK); }
-int mrb_iscntrl(int c) { return ctype_test(c, ONIGENC_CTYPE_CNTRL); }
-int mrb_isdigit(int c) { return ctype_test(c, ONIGENC_CTYPE_DIGIT); }
-int mrb_isgraph(int c) { return ctype_test(c, ONIGENC_CTYPE_GRAPH); }
-int mrb_islower(int c) { return ctype_test(c, ONIGENC_CTYPE_LOWER); }
-int mrb_isprint(int c) { return ctype_test(c, ONIGENC_CTYPE_PRINT); }
-int mrb_ispunct(int c) { return ctype_test(c, ONIGENC_CTYPE_PUNCT); }
-int mrb_isspace(int c) { return ctype_test(c, ONIGENC_CTYPE_SPACE); }
-int mrb_isupper(int c) { return ctype_test(c, ONIGENC_CTYPE_UPPER); }
-int mrb_isxdigit(int c) { return ctype_test(c, ONIGENC_CTYPE_XDIGIT); }
-
-int
-mrb_tolower(int c)
-{
- return mrb_isascii(c) ? ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) : c;
-}
-
-int
-mrb_toupper(int c)
-{
- return mrb_isascii(c) ? ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) : c;
-}
-#endif //INCLUDE_ENCODING
diff --git a/src/encoding.h b/src/encoding.h
index c6c470644..1312fb947 100644
--- a/src/encoding.h
+++ b/src/encoding.h
@@ -174,11 +174,7 @@ int mrb_enc_codelen(mrb_state *mrb, int code, mrb_encoding *enc);
#endif //INCLUDE_ENCODING
/* code,ptr,encoding -> write buf */
-#ifdef INCLUDE_ENCODING
-#define mrb_enc_mbcput(c,buf,enc) ONIGENC_CODE_TO_MBC(enc,c,(UChar*)(buf))
-#else
-#define mrb_enc_mbcput(c,buf,enc) *(buf) = (char)(c)
-#endif //INCLUDE_ENCODING
+#define mrb_enc_mbcput(c,buf,enc) ((*(buf) = (char)(c)),1)
/* start, ptr, end, encoding -> prev_char */
#define mrb_enc_prev_char(s,p,e,enc) (char *)onigenc_get_prev_char_head(enc,(UChar*)(s),(UChar*)(p),(UChar*)(e))
@@ -232,9 +228,6 @@ mrb_value mrb_enc_default_internal(mrb_state *mrb);
void mrb_enc_set_default_external(mrb_state *mrb, mrb_value encoding);
void mrb_enc_set_default_internal(mrb_state *mrb, mrb_value encoding);
mrb_value mrb_locale_charmap(mrb_state *mrb, mrb_value klass);
-#ifdef INCLUDE_ENCODING
-int mrb_memsearch(mrb_state *mrb, const void*,int,const void*,int,mrb_encoding*);
-#endif //INCLUDE_ENCODING
mrb_value mrb_usascii_str_new_cstr(mrb_state *mrb, const char *ptr);
int mrb_str_buf_cat_escaped_char(mrb_state *mrb, mrb_value result, unsigned int c, int unicode_p);
diff --git a/src/gc.c b/src/gc.c
index 999a1a7d2..04be437c1 100644
--- a/src/gc.c
+++ b/src/gc.c
@@ -242,6 +242,24 @@ mrb_init_heap(mrb_state *mrb)
#endif
}
+static void
+gc_protect(mrb_state *mrb, struct RBasic *p)
+{
+ if (mrb->arena_idx > MRB_ARENA_SIZE) {
+ /* arena overflow error */
+ mrb->arena_idx = MRB_ARENA_SIZE - 4; /* force room in arena */
+ mrb_raise(mrb, mrb->eRuntimeError_class, "arena overflow error");
+ }
+ mrb->arena[mrb->arena_idx++] = p;
+}
+
+void
+mrb_gc_protect(mrb_state *mrb, mrb_value obj)
+{
+ if (SPECIAL_CONST_P(obj)) return;
+ gc_protect(mrb, RBASIC(obj));
+}
+
struct RBasic*
mrb_obj_alloc(mrb_state *mrb, enum mrb_vtype ttype, struct RClass *cls)
{
@@ -264,12 +282,7 @@ mrb_obj_alloc(mrb_state *mrb, enum mrb_vtype ttype, struct RClass *cls)
}
mrb->live++;
- if (mrb->arena_idx > MRB_ARENA_SIZE) {
- /* arena overflow error */
- mrb->arena_idx = MRB_ARENA_SIZE - 2; /* force room in arena */
- mrb_raise(mrb, mrb->eRuntimeError_class, "arena overflow error");
- }
- mrb->arena[mrb->arena_idx++] = p;
+ gc_protect(mrb, p);
memset(p, 0, sizeof(RVALUE));
p->tt = ttype;
p->c = cls;
@@ -362,9 +375,8 @@ gc_mark_children(mrb_state *mrb, struct RBasic *obj)
{
struct RString *s = (struct RString*)obj;
- while (s->flags & MRB_STR_SHARED) {
- s = s->aux.shared;
- if (!s) break;
+ if (s->flags & MRB_STR_SHARED) {
+ mrb_gc_mark(mrb, (struct RBasic*)s->aux.shared);
}
}
break;
diff --git a/src/hash.c b/src/hash.c
index 28e718c0d..a06becd91 100644
--- a/src/hash.c
+++ b/src/hash.c
@@ -11,34 +11,28 @@
#include "mruby/array.h"
#include "mruby/string.h"
#include "mruby/variable.h"
-#include "st.h"
-#include <errno.h>
#include <string.h>
-
-
#include <stdio.h>
-static khint_t
+static inline khint_t
mrb_hash_ht_hash_func(mrb_state *mrb, mrb_value key)
{
- char type = mrb_type(key);
- mrb_value s1 = mrb_str_new(mrb, &type, 1);
- mrb_value s2 = mrb_inspect(mrb, key);
- s1 = mrb_str_cat(mrb, s1, RSTRING_PTR(s2), RSTRING_LEN(s2));
- return kh_str_hash_func(mrb, RSTRING_PTR(s1));
+ khint_t h = mrb_type(key) << 24;
+ mrb_value h2;
+
+ h2 = mrb_funcall(mrb, key, "hash", 0, 0);
+ h ^= h2.value.i;
+ return h;
}
-static khint_t
+static inline khint_t
mrb_hash_ht_hash_equal(mrb_state *mrb, mrb_value a, mrb_value b)
{
- return mrb_equal(mrb, a, b);
+ return mrb_eql(mrb, a, b);
}
KHASH_INIT(ht, mrb_value, mrb_value, 1, mrb_hash_ht_hash_func, mrb_hash_ht_hash_equal);
-mrb_value mrb_exec_recursive_paired(mrb_state *mrb, mrb_value (*func) (mrb_state *, mrb_value, mrb_value, int),
- mrb_value obj, mrb_value paired_obj, void* arg);
-
#ifndef FALSE
#define FALSE 0
#endif
@@ -61,12 +55,11 @@ mrb_hash_ht_key(mrb_state *mrb, mrb_value key)
#define KEY(key) mrb_hash_ht_key(mrb, key)
void
-mrb_gc_mark_ht(mrb_state *mrb, struct RHash *c)
+mrb_gc_mark_ht(mrb_state *mrb, struct RHash *hash)
{
khiter_t k;
- khash_t(ht) *h = ((struct RHash*)c)->ht;
+ khash_t(ht) *h = hash->ht;
- if (!h) return;
for (k = kh_begin(h); k != kh_end(h); k++)
if (kh_exist(h, k)) {
mrb_gc_mark_value(mrb, kh_key(h, k));
@@ -75,23 +68,15 @@ mrb_gc_mark_ht(mrb_state *mrb, struct RHash *c)
}
size_t
-mrb_gc_mark_ht_size(mrb_state *mrb, struct RHash *c)
+mrb_gc_mark_ht_size(mrb_state *mrb, struct RHash *hash)
{
- size_t ht_size = 0;
- khash_t(ht) *h = c->ht;
-
- /* ((struct RHash*)c)->ht */
- if (h) ht_size += kh_size(h)*2;
-
- return ht_size;
+ return kh_size(hash->ht)*2;
}
void
-mrb_gc_free_ht(mrb_state *mrb, struct RHash *c)
+mrb_gc_free_ht(mrb_state *mrb, struct RHash *hash)
{
- khash_t(ht) *h = c->ht;
-
- kh_destroy(ht, h);
+ kh_destroy(ht, hash->ht);
}
@@ -119,11 +104,9 @@ mrb_hash_get(mrb_state *mrb, mrb_value hash, mrb_value key) /* mrb_hash_aref */
khash_t(ht) *h = RHASH_TBL(hash);
khiter_t k;
- if (h) {
- k = kh_get(ht, h, key);
- if (k != kh_end(h))
- return kh_value(h, k);
- }
+ k = kh_get(ht, h, key);
+ if (k != kh_end(h))
+ return kh_value(h, k);
/* not found */
if (MRB_RHASH_PROCDEFAULT_P(hash)) {
@@ -176,21 +159,6 @@ mrb_hash_freeze(mrb_value hash)
}
mrb_value
-mrb_hash(mrb_state *mrb, mrb_value obj)
-{
- mrb_value hval = mrb_funcall(mrb, obj, "Hash", 0);
-retry:
- switch (mrb_type(hval)) {
- case MRB_TT_FIXNUM:
- return hval;
-
- default:
- hval = mrb_to_int(mrb, hval);
- goto retry;
- }
-}
-
-mrb_value
mrb_hash_dup(mrb_state *mrb, mrb_value hash)
{
struct RHash* ret;
@@ -675,7 +643,7 @@ mrb_hash_values_at(mrb_state *mrb, int argc, mrb_value *argv, mrb_value hash)
long i;
for (i=0; i<argc; i++) {
- mrb_ary_push(mrb, result, KEY(mrb_hash_get(mrb, hash, argv[i])));
+ mrb_ary_push(mrb, result, mrb_hash_get(mrb, hash, argv[i]));
}
return result;
}
@@ -1136,28 +1104,6 @@ mrb_hash_has_value(mrb_state *mrb, mrb_value hash)
}
static mrb_value
-recursive_eql(mrb_state *mrb, mrb_value hash, mrb_value dt, int recur)
-{
- khash_t(ht) *h1 = RHASH_TBL(hash);
- khash_t(ht) *h2 = RHASH_TBL(dt);
- khiter_t k1, k2;
- mrb_value key1;
-
- for (k1 = kh_begin(h1); k1 != kh_end(h1); k1++) {
- if (!kh_exist(h1, k1)) continue;
- key1 = kh_key(h1,k1);
- k2 = kh_get(ht, h2, key1);
- if ( k2 != kh_end(h2)) {
- if (mrb_equal(mrb, kh_value(h1,k1), kh_value(h2,k2))) {
- continue; /* next key */
- }
- }
- return mrb_false_value();
- }
- return mrb_true_value();
-}
-
-static mrb_value
hash_equal(mrb_state *mrb, mrb_value hash1, mrb_value hash2, int eql)
{
if (mrb_obj_equal(mrb, hash1, hash2)) return mrb_true_value();
@@ -1171,9 +1117,25 @@ hash_equal(mrb_state *mrb, mrb_value hash1, mrb_value hash2, int eql)
return mrb_fixnum_value(mrb_equal(mrb, hash2, hash1));
}
if (RHASH_SIZE(hash1) != RHASH_SIZE(hash2)) return mrb_false_value();
- if (!RHASH(hash1)->ht || !RHASH(hash2)->ht) return mrb_true_value();
+ else {
+ khash_t(ht) *h1 = RHASH_TBL(hash1);
+ khash_t(ht) *h2 = RHASH_TBL(hash2);
+ khiter_t k1, k2;
+ mrb_value key;
- return mrb_exec_recursive_paired(mrb, recursive_eql, hash1, hash2, (void*)0);
+ for (k1 = kh_begin(h1); k1 != kh_end(h1); k1++) {
+ if (!kh_exist(h1, k1)) continue;
+ key = kh_key(h1,k1);
+ k2 = kh_get(ht, h2, key);
+ if (k2 != kh_end(h2)) {
+ if (mrb_equal(mrb, kh_value(h1,k1), kh_value(h2,k2))) {
+ continue; /* next key */
+ }
+ }
+ return mrb_false_value();
+ }
+ }
+ return mrb_true_value();
}
/* 15.2.13.4.1 */
@@ -1319,9 +1281,6 @@ mrb_hash_rassoc(mrb_state *mrb, mrb_value hash)
mrb_value key, value, has_key;
mrb_get_args(mrb, "o", &key);
- if (mrb_nil_p(key))
- mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments");
-
has_key = mrb_hash_has_keyWithKey(mrb, hash, key);
if (mrb_test(has_key)) {
value = mrb_hash_get(mrb, hash, key);
diff --git a/src/init.c b/src/init.c
index 5aab8d6ae..17ce24313 100644
--- a/src/init.c
+++ b/src/init.c
@@ -20,7 +20,6 @@ void mrb_init_proc(mrb_state*);
void mrb_init_range(mrb_state*);
void mrb_init_string(mrb_state*);
void mrb_init_regexp(mrb_state*);
-void mrb_init_encoding(mrb_state*);
void mrb_init_exception(mrb_state*);
void mrb_init_time(mrb_state*);
void mrb_init_io(mrb_state*);
@@ -54,7 +53,6 @@ mrb_init_core(mrb_state *mrb)
mrb_init_gc(mrb);
#ifdef INCLUDE_REGEXP
mrb_init_regexp(mrb);
- mrb_init_encoding(mrb);
#endif
mrb_init_exception(mrb);
mrb_init_print(mrb);
diff --git a/src/kernel.c b/src/kernel.c
index 17951afc7..9ee9e8d34 100644
--- a/src/kernel.c
+++ b/src/kernel.c
@@ -274,11 +274,11 @@ mrb_f_send(int argc, mrb_value *argv, mrb_value recv)
static mrb_value
mrb_f_send_m(mrb_state *mrb, mrb_value self)
{
- mrb_value *argv;
+ mrb_value name, block, *argv;
int argc;
-
- mrb_get_args(mrb, "*", &argv, &argc);
- return mrb_f_send(argc, argv, self);
+
+ mrb_get_args(mrb, "&o*", &block, &name, &argv, &argc);
+ return mrb_funcall_with_block(mrb,self, mrb_string_value_ptr(mrb, name), argc, argv, block);
}
/* 15.3.1.2.1 */
diff --git a/src/load.c b/src/load.c
index 28f52433a..c0684f1aa 100644
--- a/src/load.c
+++ b/src/load.c
@@ -336,7 +336,6 @@ read_rite_irep_record(mrb_state *mrb, unsigned char *src, mrb_irep *irep, uint32
uint16_t crc, tt, pdl, snl, offset, bufsize=MRB_DUMP_DEFAULT_STR_LEN;
mrb_int fix_num;
mrb_float f;
- mrb_value str;
int ai = mrb_gc_arena_save(mrb);
recordStart = src;
diff --git a/src/object.c b/src/object.c
index 1d84909ec..81e3867a7 100644
--- a/src/object.c
+++ b/src/object.c
@@ -11,13 +11,6 @@
#include "mruby/class.h"
#include "mruby/numeric.h"
-#ifdef INCLUDE_REGEXP
- #define mrb_usascii_str_new2 mrb_usascii_str_new_cstr
-#else
- #define mrb_usascii_str_new2 mrb_str_new_cstr
- #define mrb_usascii_str_new mrb_str_new
-#endif
-
#ifndef FALSE
#define FALSE 0
#endif
@@ -106,7 +99,7 @@ mrb_true(mrb_state *mrb, mrb_value obj)
static mrb_value
nil_to_s(mrb_state *mrb, mrb_value obj)
{
- return mrb_usascii_str_new(mrb, 0, 0);
+ return mrb_str_new(mrb, 0, 0);
}
/***********************************************************************
@@ -166,7 +159,7 @@ true_xor(mrb_state *mrb, mrb_value obj)
static mrb_value
true_to_s(mrb_state *mrb, mrb_value obj)
{
- return mrb_usascii_str_new2(mrb, "true");
+ return mrb_str_new_cstr(mrb, "true");
}
/* 15.2.5.3.4 */
@@ -279,7 +272,7 @@ false_or(mrb_state *mrb, mrb_value obj)
static mrb_value
false_to_s(mrb_state *mrb, mrb_value obj)
{
- return mrb_usascii_str_new2(mrb, "false");
+ return mrb_str_new_cstr(mrb, "false");
}
void
@@ -626,12 +619,12 @@ mrb_Float(mrb_state *mrb, mrb_value val)
mrb_value
mrb_inspect(mrb_state *mrb, mrb_value obj)
{
- return mrb_obj_as_string(mrb, mrb_funcall(mrb, obj, "inspect", 0, 0));
+ return mrb_obj_as_string(mrb, mrb_funcall(mrb, obj, "inspect", 0, 0));
}
int
mrb_eql(mrb_state *mrb, mrb_value obj1, mrb_value obj2)
{
- return RTEST(mrb_funcall(mrb, obj1, "eql?", 1, obj2));
+ if (mrb_obj_eq(mrb, obj1, obj2)) return TRUE;
+ return RTEST(mrb_funcall(mrb, obj1, "eql?", 1, obj2));
}
-
diff --git a/src/re.c b/src/re.c
index aea60ec17..b4134c81c 100644
--- a/src/re.c
+++ b/src/re.c
@@ -7,16 +7,11 @@
#include "mruby.h"
#include <string.h>
#include "mruby/string.h"
-#include "mruby/khash.h"
#include "encoding.h"
#include "re.h"
-#include "mruby/numeric.h"
-#include "mruby/range.h"
#include "mruby/array.h"
#include "regint.h"
#include "mruby/class.h"
-#include "mruby/hash.h"
-#include "mruby/variable.h"
#include "error.h"
#ifdef INCLUDE_REGEXP
@@ -54,13 +49,10 @@ unsigned long ruby_scan_oct(const char*, size_t, size_t*);
unsigned long ruby_scan_hex(const char*, size_t, size_t*);
static mrb_value mrb_match_to_a(mrb_state *mrb, mrb_value match);
-static mrb_value mrb_reg_preprocess(mrb_state *mrb, const char *p, const char *end, mrb_encoding *enc,
- mrb_encoding **fixed_enc, onig_errmsg_buffer err);
-static void mrb_reg_expr_str(mrb_state *mrb, mrb_value str, const char *s, long len,
- mrb_encoding *enc, mrb_encoding *resenc);
+static mrb_value mrb_reg_preprocess(mrb_state *mrb, const char *p, const char *end, onig_errmsg_buffer err);
+static void mrb_reg_expr_str(mrb_state *mrb, mrb_value str, const char *s, long len);
static char * option_to_str(char str[4], int options);
-static mrb_value reg_cache;
//static int may_need_recompile;
//static int reg_kcode = DEFAULT_KCODE;
/* ------------------------------------------------------------------------- */
@@ -94,22 +86,20 @@ mrb_reg_s_new_instance(mrb_state *mrb, /*int argc, mrb_value *argv, */mrb_value
re->usecnt = 0;
return mrb_funcall_argv(mrb, mrb_obj_value(re), "initialize", argc, argv);
}
-//#define mrb_enc_mbcput(a,b,c) a
+
mrb_value
mrb_reg_quote(mrb_state *mrb, mrb_value str)
{
- mrb_encoding *enc = mrb_enc_get(mrb, str);
char *s, *send, *t;
mrb_value tmp;
- int c,clen;
- int ascii_only = mrb_enc_str_asciionly_p(mrb, str);
+ int c;
s = RSTRING_PTR(str);
send = s + RSTRING_LEN(str);
while (s < send) {
- c = mrb_enc_ascget(mrb, s, send, &clen, enc);
+ c = *s;
if (c == -1) {
- s += mbclen(s, send, enc);
+ s += send - s;
continue;
}
switch (c) {
@@ -121,38 +111,28 @@ mrb_reg_quote(mrb_state *mrb, mrb_value str)
case '\t': case '\f': case '\n': case '\r':
goto meta_found;
}
- s += clen;
+ s++;
}
- //tmp = mrb_str_new3(str);
tmp = mrb_str_new(mrb, RSTRING_PTR(str), RSTRING_LEN(str));
- if (ascii_only) {
- mrb_enc_associate(mrb, tmp, mrb_usascii_encoding(mrb));
- }
return tmp;
meta_found:
tmp = mrb_str_new(mrb, 0, RSTRING_LEN(str)*2);
- if (ascii_only) {
- mrb_enc_associate(mrb, tmp, mrb_usascii_encoding(mrb));
- }
- else {
- mrb_enc_copy(mrb, tmp, str);
- }
t = RSTRING_PTR(tmp);
/* copy upto metacharacter */
memcpy(t, RSTRING_PTR(str), s - RSTRING_PTR(str));
t += s - RSTRING_PTR(str);
while (s < send) {
- c = mrb_enc_ascget(mrb, s, send, &clen, enc);
+ c = *s;
if (c == -1) {
- int n = mbclen(s, send, enc);
+ int n = send - s;
while (n--)
*t++ = *s++;
continue;
}
- s += clen;
+ s++;
switch (c) {
case '[': case ']': case '{': case '}':
case '(': case ')': case '|': case '-':
@@ -263,7 +243,7 @@ mrb_reg_nth_match(mrb_state *mrb, mrb_int nth, mrb_value match)
if (start == -1) return mrb_nil_value();
end = m->rmatch->regs.end[nth];
len = end - start;
- str = mrb_str_substr(mrb, mrb_obj_value(m->str), start, len);
+ str = mrb_str_subseq(mrb, mrb_obj_value(m->str), start, len);
return str;
}
@@ -379,75 +359,13 @@ mrb_reg_options(mrb_state *mrb, mrb_value re)
return options;
}
-static void
-reg_enc_error(mrb_state *mrb, mrb_value re, mrb_value str)
-{
- mrb_raise(mrb, E_ENCODING_ERROR,
- "incompatible encoding regexp match (%s regexp with %s string)",
- mrb_enc_name(mrb_enc_get(mrb, re)),
- mrb_enc_name(mrb_enc_get(mrb, str)));
-}
-
-static int
-mrb_reg_fixed_encoding_p(mrb_value re)
-{
- /*if (FL_TEST(re, KCODE_FIXED))
- return Qtrue;
- else */
- return 0/*Qfalse*/;
-}
-
-static mrb_encoding*
-mrb_reg_prepare_enc(mrb_state *mrb, mrb_value re, mrb_value str, int warn)
-{
- mrb_encoding *enc = 0;
-
- if (mrb_enc_str_coderange(mrb, str) == ENC_CODERANGE_BROKEN) {
- mrb_raise(mrb, E_ARGUMENT_ERROR,
- "invalid byte sequence in %s",
- mrb_enc_name(mrb_enc_get(mrb, str)));
- }
-
- mrb_reg_check(mrb, re);
- enc = mrb_enc_get(mrb, str);
- if (!mrb_enc_str_asciicompat_p(mrb, str)) {
- if (RREGEXP(re)->ptr->enc != enc) {
- reg_enc_error(mrb, re, str);
- }
- }
- else if (mrb_reg_fixed_encoding_p(re)) {
- if (RREGEXP(re)->ptr->enc != enc &&
- (!mrb_enc_asciicompat(mrb, RREGEXP(re)->ptr->enc) ||
- mrb_enc_str_coderange(mrb, str) != ENC_CODERANGE_7BIT)) {
- reg_enc_error(mrb, re, str);
- }
- enc = RREGEXP(re)->ptr->enc;
- }
- if (warn && (RBASIC(re)->flags & REG_ENCODING_NONE) &&
- enc != mrb_ascii8bit_encoding(mrb) &&
- mrb_enc_str_coderange(mrb, str) != ENC_CODERANGE_7BIT) {
- mrb_warn("regexp match /.../n against to %s string",
- mrb_enc_name(enc));
- }
- return enc;
-}
-
static mrb_value
mrb_reg_desc(mrb_state *mrb, const char *s, long len, mrb_value re)
{
- mrb_encoding *enc = mrb_enc_get(mrb, re);
mrb_value str = mrb_str_new_cstr(mrb, "/");//mrb_str_buf_new2("/");
- mrb_encoding *resenc = mrb_default_internal_encoding(mrb);
- if (resenc == NULL) resenc = mrb_default_external_encoding(mrb);
- if (re.tt && mrb_enc_asciicompat(mrb, enc)) {
- mrb_enc_copy(mrb, str, re);
- }
- else {
- mrb_enc_associate(mrb, str, mrb_usascii_encoding(mrb));
- }
- mrb_reg_expr_str(mrb, str, s, len, enc, resenc);
- mrb_str_buf_cat(mrb, str, "/", strlen("/"));//mrb_str_buf_cat2(str, "/");
+ mrb_reg_expr_str(mrb, str, s, len);
+ mrb_str_buf_cat(mrb, str, "/", strlen("/"));
if (re.tt) {
char opts[4];
mrb_reg_check(mrb, re);
@@ -476,18 +394,14 @@ mrb_reg_prepare_re(mrb_state *mrb, mrb_value re, mrb_value str)
OnigErrorInfo einfo;
const char *pattern;
mrb_value unescaped;
- mrb_encoding *fixed_enc = 0;
- mrb_encoding *enc = mrb_reg_prepare_enc(mrb, re, str, 1);
-
- if (reg->enc == enc) return reg;
+ mrb_encoding *enc = mrb_ascii8bit_encoding(mrb);
mrb_reg_check(mrb, re);
reg = RREGEXP(re)->ptr;
pattern = RREGEXP_SRC_PTR(re);
unescaped = mrb_reg_preprocess(mrb,
- pattern, pattern + RREGEXP(re)->src->len, enc,
- &fixed_enc, err);
+ pattern, pattern + RREGEXP(re)->src->len, err);
if (mrb_nil_p(unescaped)) {
mrb_raise(mrb, E_ARGUMENT_ERROR, "regexp preprocess failed: %s", err);
@@ -675,18 +589,6 @@ ruby_scan_hex(const char *start, size_t len, size_t *retlen)
return retval;
}
-static int
-check_unicode_range(unsigned long code, onig_errmsg_buffer err)
-{
- if ((0xd800 <= code && code <= 0xdfff) || /* Surrogates */
- 0x10ffff < code) {
- //errcpy(err, "invalid Unicode range");
- printf("invalid Unicode range");
- return -1;
- }
- return 0;
-}
-
#define BYTEWIDTH 8
int
@@ -735,59 +637,6 @@ mrb_uv_to_utf8(mrb_state *mrb, char buf[6], unsigned long uv)
return 0;
}
-static int
-append_utf8(mrb_state *mrb, unsigned long uv,
- mrb_value buf, mrb_encoding **encp, onig_errmsg_buffer err)
-{
- if (check_unicode_range(uv, err) != 0)
- return -1;
- if (uv < 0x80) {
- char escbuf[5];
- snprintf(escbuf, sizeof(escbuf), "\\x%02X", (int)uv);
- mrb_str_buf_cat(mrb, buf, escbuf, 4);
- }
- else {
- int len;
- char utf8buf[6];
- len = mrb_uv_to_utf8(mrb, utf8buf, uv);
- mrb_str_buf_cat(mrb, buf, utf8buf, len);
-
- if (*encp == 0)
- *encp = mrb_utf8_encoding(mrb);
- else if (*encp != mrb_utf8_encoding(mrb)) {
- //errcpy(err, "UTF-8 character in non UTF-8 regexp");
- printf("UTF-8 character in non UTF-8 regexp");
- return -1;
- }
- }
- return 0;
-}
-
-static int
-unescape_unicode_bmp(mrb_state *mrb, const char **pp, const char *end,
- mrb_value buf, mrb_encoding **encp, onig_errmsg_buffer err)
-{
- const char *p = *pp;
- size_t len;
- unsigned long code;
-
- if (end < p+4) {
- //errcpy(err, "invalid Unicode escape");
- printf("invalid Unicode escape");
- return -1;
- }
- code = ruby_scan_hex(p, 4, &len);
- if (len != 4) {
- //errcpy(err, "invalid Unicode escape");
- printf("invalid Unicode escape");
- return -1;
- }
- if (append_utf8(mrb, code, buf, encp, err) != 0)
- return -1;
- *pp = p + 4;
- return 0;
-}
-
unsigned long
ruby_scan_oct(const char *start, size_t len, size_t *retlen)
{
@@ -802,400 +651,29 @@ ruby_scan_oct(const char *start, size_t len, size_t *retlen)
return retval;
}
-static int
-read_escaped_byte(const char **pp, const char *end, onig_errmsg_buffer err)
-{
- const char *p = *pp;
- int code;
- int meta_prefix = 0, ctrl_prefix = 0;
- size_t len;
-
- if (p == end || *p++ != '\\') {
- //errcpy(err, "too short escaped multibyte character");
- printf("too short escaped multibyte character");
- return -1;
- }
-
-again:
- if (p == end) {
- //errcpy(err, "too short escape sequence");
- printf("too short escape sequence");
- return -1;
- }
- switch (*p++) {
- case '\\': code = '\\'; break;
- case 'n': code = '\n'; break;
- case 't': code = '\t'; break;
- case 'r': code = '\r'; break;
- case 'f': code = '\f'; break;
- case 'v': code = '\013'; break;
- case 'a': code = '\007'; break;
- case 'e': code = '\033'; break;
-
- /* \OOO */
- case '0': case '1': case '2': case '3':
- case '4': case '5': case '6': case '7':
- p--;
- code = scan_oct(p, end < p+3 ? end-p : 3, &len);
- p += len;
- break;
-
- case 'x': /* \xHH */
- code = scan_hex(p, end < p+2 ? end-p : 2, &len);
- if (len < 1) {
- //errcpy(err, "invalid hex escape");
- printf("invalid hex escape");
- return -1;
- }
- p += len;
- break;
-
- case 'M': /* \M-X, \M-\C-X, \M-\cX */
- if (meta_prefix) {
- //errcpy(err, "duplicate meta escape");
- printf("duplicate meta escape");
- return -1;
- }
- meta_prefix = 1;
- if (p+1 < end && *p++ == '-' && (*p & 0x80) == 0) {
- if (*p == '\\') {
- p++;
- goto again;
- }
- else {
- code = *p++;
- break;
- }
- }
- //errcpy(err, "too short meta escape");
- printf("too short meta escape");
- return -1;
-
- case 'C': /* \C-X, \C-\M-X */
- if (p == end || *p++ != '-') {
- //errcpy(err, "too short control escape");
- printf("too short control escape");
- return -1;
- }
- case 'c': /* \cX, \c\M-X */
- if (ctrl_prefix) {
- //errcpy(err, "duplicate control escape");
- printf("duplicate control escape");
- return -1;
- }
- ctrl_prefix = 1;
- if (p < end && (*p & 0x80) == 0) {
- if (*p == '\\') {
- p++;
- goto again;
- }
- else {
- code = *p++;
- break;
- }
- }
- //errcpy(err, "too short control escape");
- printf("too short control escape");
- return -1;
-
- default:
- //errcpy(err, "unexpected escape sequence");
- printf("unexpected escape sequence");
- return -1;
- }
- if (code < 0 || 0xff < code) {
- //errcpy(err, "invalid escape code");
- printf("invalid escape code");
- return -1;
- }
-
- if (ctrl_prefix)
- code &= 0x1f;
- if (meta_prefix)
- code |= 0x80;
-
- *pp = p;
- return code;
-}
-
-static int
-unescape_escaped_nonascii(mrb_state *mrb, const char **pp, const char *end, mrb_encoding *enc,
- mrb_value buf, mrb_encoding **encp, onig_errmsg_buffer err)
-{
- const char *p = *pp;
- int chmaxlen = mrb_enc_mbmaxlen(enc);
- //char *chbuf = ALLOCA_N(char, chmaxlen);
- char *chbuf = mrb_malloc(mrb, chmaxlen);
- int chlen = 0;
- int byte;
- int l;
-
- memset(chbuf, 0, chmaxlen);
-
- byte = read_escaped_byte(&p, end, err);
- if (byte == -1) {
- return -1;
- }
-
- chbuf[chlen++] = byte;
- while (chlen < chmaxlen &&
- MBCLEN_NEEDMORE_P(mrb_enc_precise_mbclen(chbuf, chbuf+chlen, enc))) {
- byte = read_escaped_byte(&p, end, err);
- if (byte == -1) {
- return -1;
- }
- chbuf[chlen++] = byte;
- }
-
- l = mrb_enc_precise_mbclen(chbuf, chbuf+chlen, enc);
- if (MBCLEN_INVALID_P(l)) {
- //errcpy(err, "invalid multibyte escape");
- printf("invalid multibyte escape");
- return -1;
- }
- if (1 < chlen || (chbuf[0] & 0x80)) {
- mrb_str_buf_cat(mrb, buf, chbuf, chlen);
-
- if (*encp == 0)
- *encp = enc;
- else if (*encp != enc) {
- //errcpy(err, "escaped non ASCII character in UTF-8 regexp");
- printf("escaped non ASCII character in UTF-8 regexp");
- return -1;
- }
- }
- else {
- char escbuf[5];
- snprintf(escbuf, sizeof(escbuf), "\\x%02X", chbuf[0]&0xff);
- mrb_str_buf_cat(mrb, buf, escbuf, 4);
- }
- *pp = p;
- return 0;
-}
-
-static int
-unescape_unicode_list(mrb_state *mrb, const char **pp, const char *end,
- mrb_value buf, mrb_encoding **encp, onig_errmsg_buffer err)
-{
- const char *p = *pp;
- int has_unicode = 0;
- unsigned long code;
- size_t len;
-
- while (p < end && ISSPACE(*p)) p++;
-
- while (1) {
- code = ruby_scan_hex(p, end-p, &len);
- if (len == 0)
- break;
- if (6 < len) { /* max 10FFFF */
- //errcpy(err, "invalid Unicode range");
- printf("invalid Unicode range");
- return -1;
- }
- p += len;
- if (append_utf8(mrb, code, buf, encp, err) != 0)
- return -1;
- has_unicode = 1;
-
- while (p < end && ISSPACE(*p)) p++;
- }
-
- if (has_unicode == 0) {
- //errcpy(err, "invalid Unicode list");
- printf("invalid Unicode list");
- return -1;
- }
-
- *pp = p;
-
- return 0;
-}
-
-static int
-unescape_nonascii(mrb_state *mrb, const char *p, const char *end, mrb_encoding *enc,
- mrb_value buf, mrb_encoding **encp, int *has_property,
- onig_errmsg_buffer err)
-{
- char c;
- char smallbuf[2];
-
- while (p < end) {
- int chlen = mrb_enc_precise_mbclen(p, end, enc);
- if (!MBCLEN_CHARFOUND_P(chlen)) {
- //errcpy(err, "invalid multibyte character");
- printf("invalid multibyte character");
- return -1;
- }
- chlen = MBCLEN_CHARFOUND_LEN(chlen);
- if (1 < chlen || (*p & 0x80)) {
- mrb_str_buf_cat(mrb, buf, p, chlen);
- p += chlen;
- if (*encp == 0)
- *encp = enc;
- else if (*encp != enc) {
- //errcpy(err, "non ASCII character in UTF-8 regexp");
- printf("non ASCII character in UTF-8 regexp");
- return -1;
- }
- continue;
- }
-
- switch (c = *p++) {
- case '\\':
- if (p == end) {
- //errcpy(err, "too short escape sequence");
- printf("too short escape sequence");
- return -1;
- }
- switch (c = *p++) {
- case '1': case '2': case '3':
- case '4': case '5': case '6': case '7': /* \O, \OO, \OOO or backref */
- {
- size_t octlen;
- if (ruby_scan_oct(p-1, end-(p-1), &octlen) <= 0177) {
- /* backref or 7bit octal.
- no need to unescape anyway.
- re-escaping may break backref */
- goto escape_asis;
- }
- }
- /* xxx: How about more than 199 subexpressions? */
-
- case '0': /* \0, \0O, \0OO */
-
- case 'x': /* \xHH */
- case 'c': /* \cX, \c\M-X */
- case 'C': /* \C-X, \C-\M-X */
- case 'M': /* \M-X, \M-\C-X, \M-\cX */
- p = p-2;
- if (unescape_escaped_nonascii(mrb, &p, end, enc, buf, encp, err) != 0)
- return -1;
- break;
-
- case 'u':
- if (p == end) {
- //errcpy(err, "too short escape sequence");
- printf("too short escape sequence");
- return -1;
- }
- if (*p == '{') {
- /* \u{H HH HHH HHHH HHHHH HHHHHH ...} */
- p++;
- if (unescape_unicode_list(mrb, &p, end, buf, encp, err) != 0)
- return -1;
- if (p == end || *p++ != '}') {
- //errcpy(err, "invalid Unicode list");
- printf("invalid Unicode list");
- return -1;
- }
- break;
- }
- else {
- /* \uHHHH */
- if (unescape_unicode_bmp(mrb, &p, end, buf, encp, err) != 0)
- return -1;
- break;
- }
-
- case 'p': /* \p{Hiragana} */
- case 'P':
- if (!*encp) {
- *has_property = 1;
- }
- goto escape_asis;
-
- default: /* \n, \\, \d, \9, etc. */
-escape_asis:
- smallbuf[0] = '\\';
- smallbuf[1] = c;
- mrb_str_buf_cat(mrb, buf, smallbuf, 2);
- break;
- }
- break;
-
- default:
- mrb_str_buf_cat(mrb, buf, &c, 1);
- break;
- }
- }
-
- return 0;
-}
-
-
static mrb_value
-mrb_reg_preprocess(mrb_state *mrb, const char *p, const char *end, mrb_encoding *enc,
- mrb_encoding **fixed_enc, onig_errmsg_buffer err)
+mrb_reg_preprocess(mrb_state *mrb, const char *p, const char *end, onig_errmsg_buffer err)
{
- mrb_value buf;
- int has_property = 0;
-
- //buf = mrb_str_buf_new(0);
- buf = mrb_str_buf_new(mrb, 0);
-
- if (mrb_enc_asciicompat(mrb, enc))
- *fixed_enc = 0;
- else {
- *fixed_enc = enc;
- mrb_enc_associate(mrb, buf, enc);
- }
-
- if (unescape_nonascii(mrb, p, end, enc, buf, fixed_enc, &has_property, err) != 0)
- return mrb_nil_value();
-
- if (has_property && !*fixed_enc) {
- *fixed_enc = enc;
- }
-
- if (*fixed_enc) {
- mrb_enc_associate(mrb, buf, *fixed_enc);
- }
-
- return buf;
+ return mrb_nil_value();
}
static int
-mrb_reg_initialize(mrb_state *mrb, mrb_value obj, const char *s, long len, mrb_encoding *enc,
+mrb_reg_initialize(mrb_state *mrb, mrb_value obj, const char *s, long len,
int options, onig_errmsg_buffer err,
const char *sourcefile, int sourceline)
{
struct RRegexp *re = RREGEXP(obj);
mrb_value unescaped;
- mrb_encoding *fixed_enc = 0;
- mrb_encoding *a_enc = mrb_ascii8bit_encoding(mrb);
+ mrb_encoding *enc = mrb_ascii8bit_encoding(mrb);
if (re->ptr)
mrb_raise(mrb, E_TYPE_ERROR, "already initialized regexp");
re->ptr = 0;
- if (mrb_enc_dummy_p(enc)) {
- //errcpy(err, "can't make regexp with dummy encoding");
- printf("can't make regexp with dummy encoding");
- return -1;
- }
-
- unescaped = mrb_reg_preprocess(mrb, s, s+len, enc, &fixed_enc, err);
+ unescaped = mrb_reg_preprocess(mrb, s, s+len, err);
if (mrb_nil_p(unescaped))
return -1;
- if (fixed_enc) {
- if ((fixed_enc != enc && (options & ARG_ENCODING_FIXED)) ||
- (fixed_enc != a_enc && (options & ARG_ENCODING_NONE))) {
- //errcpy(err, "incompatible character encoding");
- printf("incompatible character encoding");
- return -1;
- }
- if (fixed_enc != a_enc) {
- options |= ARG_ENCODING_FIXED;
- enc = fixed_enc;
- }
- }
- else if (!(options & ARG_ENCODING_FIXED)) {
- enc = mrb_usascii_encoding(mrb);
- }
-
- mrb_enc_associate(mrb, mrb_obj_value(re), enc);
- if ((options & ARG_ENCODING_FIXED) || fixed_enc) {
+ if ((options & ARG_ENCODING_FIXED)) {
//re->basic.flags |= KCODE_FIXED;
re->flags|= KCODE_FIXED;
}
@@ -1207,7 +685,7 @@ mrb_reg_initialize(mrb_state *mrb, mrb_value obj, const char *s, long len, mrb_e
options & ARG_REG_OPTION_MASK, err,
sourcefile, sourceline);
if (!re->ptr) return -1;
- re->src = mrb_str_ptr(mrb_enc_str_new(mrb, s, len, enc));
+ re->src = mrb_str_ptr(mrb_str_new(mrb, s, len));
return 0;
}
@@ -1217,8 +695,8 @@ mrb_reg_initialize_str(mrb_state *mrb, mrb_value obj, mrb_value str, int options
const char *sourcefile, int sourceline)
{
int ret;
- mrb_encoding *enc = mrb_enc_get(mrb, str);
+#if 0
if (options & ARG_ENCODING_NONE) {
mrb_encoding *ascii8bit = mrb_ascii8bit_encoding(mrb);
if (enc != ascii8bit) {
@@ -1230,8 +708,9 @@ mrb_reg_initialize_str(mrb_state *mrb, mrb_value obj, mrb_value str, int options
enc = ascii8bit;
}
}
+#endif
- ret = mrb_reg_initialize(mrb, obj, RSTRING_PTR(str), RSTRING_LEN(str), enc,
+ ret = mrb_reg_initialize(mrb, obj, RSTRING_PTR(str), RSTRING_LEN(str),
options, err, sourcefile, sourceline);
return ret;
@@ -1267,7 +746,6 @@ mrb_reg_initialize_m(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value se
onig_errmsg_buffer err = "";
int flags = 0;
mrb_value str;
- mrb_encoding *enc;
const char *ptr;
long len;
@@ -1286,10 +764,7 @@ mrb_reg_initialize_m(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value se
flags = mrb_reg_options(mrb, re);
ptr = RREGEXP_SRC_PTR(re);
len = RREGEXP_SRC_LEN(re);
- enc = mrb_enc_get(mrb, re);
- if (mrb_reg_initialize(mrb, self, ptr, len, enc, flags, err, NULL, 0)) {
- /*str = mrb_enc_str_new(mrb, ptr, len, enc);
- mrb_reg_raise_str(str, flags, err);*/
+ if (mrb_reg_initialize(mrb, self, ptr, len, flags, err, NULL, 0)) {
printf("mrb_reg_raise_str(str, flags, err);");
}
}
@@ -1298,12 +773,10 @@ mrb_reg_initialize_m(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value se
if (mrb_type(argv[1]) == MRB_TT_FIXNUM) flags = mrb_fixnum(argv[1]);
else if (mrb_test(argv[1])) flags = ONIG_OPTION_IGNORECASE;
}
- enc = 0;
if (argc == 3 && !mrb_nil_p(argv[2])) {
//char *kcode = StringValuePtr(argv[2]);
char *kcode = mrb_string_value_ptr(mrb, argv[2]);
if (kcode[0] == 'n' || kcode[0] == 'N') {
- enc = mrb_ascii8bit_encoding(mrb);
flags |= ARG_ENCODING_NONE;
}
else {
@@ -1314,9 +787,7 @@ mrb_reg_initialize_m(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value se
str = argv[0];
//ptr = StringValuePtr(str);
ptr = mrb_string_value_ptr(mrb, str);
- if (enc
- ? mrb_reg_initialize(mrb, self, ptr, RSTRING_LEN(str), enc, flags, err, NULL, 0)
- : mrb_reg_initialize_str(mrb, self, str, flags, err, NULL, 0)) {
+ if (mrb_reg_initialize_str(mrb, self, str, flags, err, NULL, 0)) {
//mrb_reg_raise_str(str, flags, err);
}
}
@@ -1346,7 +817,7 @@ mrb_reg_init_copy(mrb_state *mrb, mrb_value re/*, mrb_value copy*/)
mrb_reg_check(mrb, copy);
s = RREGEXP_SRC_PTR(copy);
len = RREGEXP_SRC_LEN(copy);
- if (mrb_reg_initialize(mrb, re, s, len, mrb_enc_get(mrb, copy), mrb_reg_options(mrb, copy),
+ if (mrb_reg_initialize(mrb, re, s, len, mrb_reg_options(mrb, copy),
err, 0/*NULL*/, 0) != 0) {
mrb_reg_raise(mrb, s, len, err, re);
}
@@ -1628,7 +1099,7 @@ mrb_reg_source(mrb_state *mrb, mrb_value re)
mrb_value str;
mrb_reg_check(mrb, re);
- str = mrb_enc_str_new(mrb, RREGEXP_SRC_PTR(re),RREGEXP_SRC_LEN(re), mrb_enc_get(mrb, re));
+ str = mrb_str_new(mrb, RREGEXP_SRC_PTR(re),RREGEXP_SRC_LEN(re));
return str;
}
@@ -1757,23 +1228,12 @@ typedef struct {
long char_pos;
} pair_t;
-static int
-pair_byte_cmp(const void *pair1, const void *pair2)
-{
- long diff = ((pair_t*)pair1)->byte_pos - ((pair_t*)pair2)->byte_pos;
- return diff ? diff > 0 ? 1 : -1 : 0;
-}
-
static void
update_char_offset(mrb_state *mrb, mrb_value match)
{
struct rmatch *rm = RMATCH(match)->rmatch;
struct re_registers *regs;
- int i, num_regs, num_pos;
- long c;
- char *s, *p, *q;
- mrb_encoding *enc;
- pair_t *pairs;
+ int i, num_regs;
if (rm->char_offset_updated)
return;
@@ -1787,55 +1247,12 @@ update_char_offset(mrb_state *mrb, mrb_value match)
rm->char_offset_num_allocated = num_regs;
}
- enc = mrb_enc_get(mrb, mrb_obj_value(RMATCH(match)->str));
- if (mrb_enc_mbmaxlen(enc) == 1) {
- for (i = 0; i < num_regs; i++) {
- rm->char_offset[i].beg = BEG(i);
- rm->char_offset[i].end = END(i);
- }
- rm->char_offset_updated = 1;
- return;
- }
-
- //pairs = ALLOCA_N(pair_t, num_regs*2);
- pairs = mrb_malloc(mrb, sizeof(pair_t)*num_regs*2);
-
- num_pos = 0;
for (i = 0; i < num_regs; i++) {
- if (BEG(i) < 0)
- continue;
- pairs[num_pos++].byte_pos = BEG(i);
- pairs[num_pos++].byte_pos = END(i);
- }
- qsort(pairs, num_pos, sizeof(pair_t), pair_byte_cmp);
-
- s = p = RMATCH(match)->str->buf;
- c = 0;
- for (i = 0; i < num_pos; i++) {
- q = s + pairs[i].byte_pos;
- c += mrb_enc_strlen(p, q, enc);
- pairs[i].char_pos = c;
- p = q;
- }
-
- for (i = 0; i < num_regs; i++) {
- pair_t key, *found;
- if (BEG(i) < 0) {
- rm->char_offset[i].beg = -1;
- rm->char_offset[i].end = -1;
- continue;
- }
-
- key.byte_pos = BEG(i);
- found = bsearch(&key, pairs, num_pos, sizeof(pair_t), pair_byte_cmp);
- rm->char_offset[i].beg = found->char_pos;
-
- key.byte_pos = END(i);
- found = bsearch(&key, pairs, num_pos, sizeof(pair_t), pair_byte_cmp);
- rm->char_offset[i].end = found->char_pos;
+ rm->char_offset[i].beg = BEG(i);
+ rm->char_offset[i].end = END(i);
}
-
rm->char_offset_updated = 1;
+ return;
}
/* 15.2.16.3.2 */
@@ -2235,49 +1652,36 @@ option_to_str(char str[4], int options)
#define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */
static void
-mrb_reg_expr_str(mrb_state *mrb, mrb_value str, const char *s, long len,
- mrb_encoding *enc, mrb_encoding *resenc)
+mrb_reg_expr_str(mrb_state *mrb, mrb_value str, const char *s, long len)
{
const char *p, *pend;
int need_escape = 0;
- int c, clen;
+ int c;
p = s; pend = p + len;
- if (mrb_enc_asciicompat(mrb, enc)) {
- while (p < pend) {
- c = mrb_enc_ascget(mrb, p, pend, &clen, enc);
- if (c == -1) {
- if (enc == resenc) {
- p += mbclen(p, pend, enc);
- }
- else {
- need_escape = 1;
- break;
- }
- }
- else if (c != '/' && mrb_enc_isprint(c, enc)) {
- p += clen;
- }
- else {
- need_escape = 1;
- break;
- }
+ while (p < pend) {
+ c = *p;
+ if (c == -1) {
+ p += pend - p;
+ }
+ else if (c != '/' && ISPRINT(c)) {
+ p++;
+ }
+ else {
+ need_escape = 1;
+ break;
}
- }
- else {
- need_escape = 1;
}
if (!need_escape) {
mrb_str_buf_cat(mrb, str, s, len);
}
else {
- int unicode_p = mrb_enc_unicode_p(enc);
p = s;
while (p<pend) {
- c = mrb_enc_ascget(mrb, p, pend, &clen, enc);
- if (c == '\\' && p+clen < pend) {
- int n = clen + mbclen(p+clen, pend, enc);
+ c = *p;
+ if (c == '\\' && p+1 < pend) {
+ int n = 1 + pend - (p+1);
mrb_str_buf_cat(mrb, str, p, n);
p += n;
continue;
@@ -2285,38 +1689,21 @@ mrb_reg_expr_str(mrb_state *mrb, mrb_value str, const char *s, long len,
else if (c == '/') {
char c = '\\';
mrb_str_buf_cat(mrb, str, &c, 1);
- mrb_str_buf_cat(mrb, str, p, clen);
- }
- else if (c == -1) {
- clen = mrb_enc_precise_mbclen(p, pend, enc);
- if (!MBCLEN_CHARFOUND_P(clen)) {
- c = (unsigned char)*p;
- clen = 1;
- goto hex;
- }
- if (resenc) {
- unsigned int c = mrb_enc_mbc_to_codepoint(p, pend, enc);
- mrb_str_buf_cat_escaped_char(mrb, str, c, unicode_p);
- }
- else {
- clen = MBCLEN_CHARFOUND_LEN(clen);
- mrb_str_buf_cat(mrb, str, p, clen);
- }
+ mrb_str_buf_cat(mrb, str, p, 1);
}
- else if (mrb_enc_isprint(c, enc)) {
- mrb_str_buf_cat(mrb, str, p, clen);
+ else if (ISPRINT(c)) {
+ mrb_str_buf_cat(mrb, str, p, 1);
}
- else if (!mrb_enc_isspace(c, enc)) {
+ else if (!ISSPACE(c)) {
char b[8];
- hex:
snprintf(b, sizeof(b), "\\x%02X", c);
mrb_str_buf_cat(mrb, str, b, 4);
}
else {
- mrb_str_buf_cat(mrb, str, p, clen);
+ mrb_str_buf_cat(mrb, str, p, 1);
}
- p += clen;
+ p++;
}
}
}
@@ -2355,7 +1742,6 @@ mrb_reg_to_s(mrb_state *mrb, mrb_value re)
mrb_reg_check(mrb, re);
memset(optbuf, 0, 5);
- mrb_enc_copy(mrb, str, re);
options = RREGEXP(re)->ptr->options;
ptr = (UChar*)RREGEXP_SRC_PTR(re);
len = RREGEXP_SRC_LEN(re);
@@ -2399,7 +1785,7 @@ again:
++ptr;
len -= 2;
- err = onig_new(&rp, ptr, ptr + len, ONIG_OPTION_DEFAULT,
+ err = onig_new(&rp, ptr, ptr + len, ONIG_OPTION_DEFAULT,
enc, OnigDefaultSyntax, NULL);
onig_free(rp);
}
@@ -2419,9 +1805,8 @@ again:
}
mrb_str_buf_cat(mrb, str, ":", strlen(":"));
- mrb_reg_expr_str(mrb, str, (char*)ptr, len, enc, NULL);
+ mrb_reg_expr_str(mrb, str, (char*)ptr, len);
mrb_str_buf_cat(mrb, str, ")", strlen(")"));
- mrb_enc_copy(mrb, str, re);
return str;
}
@@ -2663,8 +2048,6 @@ mrb_init_regexp(mrb_state *mrb)
mrb_define_const(mrb, s, "MULTILINE", mrb_fixnum_value(ONIG_OPTION_MULTILINE));
mrb_define_const(mrb, s, "FIXEDENCODING", mrb_fixnum_value(ARG_ENCODING_FIXED));
- //mrb_global_variable(&reg_cache);
-
s = mrb_define_class(mrb, "MatchData", mrb->object_class);
//mrb_undef_method(CLASS_OF(rb_cMatch), "new");
@@ -2705,27 +2088,23 @@ mrb_reg_regsub(mrb_state *mrb, mrb_value str, mrb_value src, struct re_registers
{
mrb_value val;
char *p, *s, *e;
- int no, clen;
- mrb_encoding *str_enc = mrb_enc_get(mrb, str);
- mrb_encoding *src_enc = mrb_enc_get(mrb, src);
- int acompat = mrb_enc_asciicompat(mrb, str_enc);
-#define ASCGET(mrb,s,e,cl) (acompat ? (*cl=1,ISASCII(s[0])?s[0]:-1) : mrb_enc_ascget(mrb, s, e, cl, str_enc))
struct RString *ps = mrb_str_ptr(str);
+ int no;
val.tt = 0;
p = s = ps->buf;
e = s + ps->len;
while (s < e) {
- int c = ASCGET(mrb, s, e, &clen);
+ int c = *s;
char *ss;
if (c == -1) {
- s += mbclen(s, e, str_enc);
+ s += e - s;
continue;
}
ss = s;
- s += clen;
+ s++;
if (c != '\\' || s == e) continue;
@@ -2733,16 +2112,16 @@ mrb_reg_regsub(mrb_state *mrb, mrb_value str, mrb_value src, struct re_registers
if (!val.tt) {
val = mrb_str_buf_new(mrb, ss-p);
}
- mrb_enc_str_buf_cat(mrb, val, p, ss-p, str_enc);
+ mrb_str_buf_cat(mrb, val, p, ss-p);
- c = ASCGET(mrb, s, e, &clen);
+ c = *s;
if (c == -1) {
- s += mbclen(s, e, str_enc);
- mrb_enc_str_buf_cat(mrb, val, ss, s-ss, str_enc);
+ s += e - s;
+ mrb_str_buf_cat(mrb, val, ss, s-ss);
p = s;
continue;
}
- s += clen;
+ s++;
p = s;
switch (c) {
@@ -2757,18 +2136,18 @@ mrb_reg_regsub(mrb_state *mrb, mrb_value str, mrb_value src, struct re_registers
break;
case 'k':
- if (s < e && ASCGET(mrb, s, e, &clen) == '<') {
+ if (s < e && *s == '<') {
char *name, *name_end;
- name_end = name = s + clen;
+ name_end = name = s + 1;
while (name_end < e) {
- c = ASCGET(mrb, name_end, e, &clen);
+ c = *name_end;
if (c == '>') break;
- name_end += c == -1 ? mbclen(name_end, e, str_enc) : clen;
+ name_end += c == -1 ? e - name_end : 1;
}
if (name_end < e) {
no = name_to_backref_number(mrb, regs, RREGEXP(regexp), name, name_end);
- p = s = name_end + clen;
+ p = s = name_end + 1;
break;
}
else {
@@ -2776,7 +2155,7 @@ mrb_reg_regsub(mrb_state *mrb, mrb_value str, mrb_value src, struct re_registers
}
}
- mrb_enc_str_buf_cat(mrb, val, ss, s-ss, str_enc);
+ mrb_str_buf_cat(mrb, val, ss, s-ss);
continue;
case '0':
@@ -2785,11 +2164,11 @@ mrb_reg_regsub(mrb_state *mrb, mrb_value str, mrb_value src, struct re_registers
break;
case '`':
- mrb_enc_str_buf_cat(mrb, val, RSTRING_PTR(src), BEG(0), src_enc);
+ mrb_str_buf_cat(mrb, val, RSTRING_PTR(src), BEG(0));
continue;
case '\'':
- mrb_enc_str_buf_cat(mrb, val, RSTRING_PTR(src)+END(0), RSTRING_LEN(src)-END(0), src_enc);
+ mrb_str_buf_cat(mrb, val, RSTRING_PTR(src)+END(0), RSTRING_LEN(src)-END(0));
continue;
case '+':
@@ -2799,31 +2178,29 @@ mrb_reg_regsub(mrb_state *mrb, mrb_value str, mrb_value src, struct re_registers
break;
case '\\':
- mrb_enc_str_buf_cat(mrb, val, s-clen, clen, str_enc);
+ mrb_str_buf_cat(mrb, val, s-1, 1);
continue;
default:
- mrb_enc_str_buf_cat(mrb, val, ss, s-ss, str_enc);
+ mrb_str_buf_cat(mrb, val, ss, s-ss);
continue;
}
if (no >= 0) {
if (no >= regs->num_regs) continue;
if (BEG(no) == -1) continue;
- mrb_enc_str_buf_cat(mrb, val, RSTRING_PTR(src)+BEG(no), END(no)-BEG(no), src_enc);
+ mrb_str_buf_cat(mrb, val, RSTRING_PTR(src)+BEG(no), END(no)-BEG(no));
}
} /* while (s < e) { */
if (!val.tt) return str;
if (p < e) {
- mrb_enc_str_buf_cat(mrb, val, p, e-p, str_enc);
+ mrb_str_buf_cat(mrb, val, p, e-p);
}
return val;
}
-//#define NEW_NODE(t,a0,a1,a2) mrb_node_newnode((t),(int)(a0),(int)(a1),(int)(a2))
-//#define NEW_IF(c,t,e) NEW_NODE(NODE_IF,c,t,e)
static inline NODE *
lfp_svar_place(mrb_state *mrb, /*mrb_thread_t *th,*/ mrb_value *lfp)
{
@@ -3038,9 +2415,6 @@ mrb_memsearch(mrb_state *mrb, const void *x0, int m, const void *y0, int n, mrb_
}
return -1;
}
- else if (enc == mrb_utf8_encoding(mrb)) {
- return mrb_memsearch_qs_utf8(x0, m, y0, n);
- }
else {
return mrb_memsearch_qs(x0, m, y0, n);
}
@@ -3077,12 +2451,7 @@ mrb_reg_new_str(mrb_state *mrb, mrb_value s, int options)
mrb_value
mrb_reg_regcomp(mrb_state *mrb, mrb_value str)
{
- mrb_value save_str = str;
- if (reg_cache.tt && RREGEXP_SRC_LEN(reg_cache) == RSTRING_LEN(str)
- && ENCODING_GET(mrb, reg_cache) == ENCODING_GET(mrb, str)
- && memcmp(RREGEXP_SRC_PTR(reg_cache), RSTRING_PTR(str), RSTRING_LEN(str)) == 0)
- return reg_cache;
- return reg_cache = mrb_reg_new_str(mrb, save_str, 0);
+ return mrb_reg_new_str(mrb, str, 0);
}
int
@@ -3143,7 +2512,7 @@ is_special_global_name(const char *m, const char *e, mrb_encoding *enc)
++m;
if (m < e && is_identchar(m, e, enc)) {
if (!ISASCII(*m)) mb = 1;
- m += mrb_enc_mbclen(m, e, enc);
+ m += e - m;
}
break;
default:
@@ -3228,7 +2597,7 @@ mrb_enc_symname2_p(const char *name, long len, mrb_encoding *enc)
id:
if (m >= e || (*m != '_' && !mrb_enc_isalpha(*m, enc) && ISASCII(*m)))
return FALSE;
- while (m < e && is_identchar(m, e, enc)) m += mrb_enc_mbclen(m, e, enc);
+ while (m < e && is_identchar(m, e, enc)) m += e - m;
if (localid) {
switch (*m) {
case '!': case '?': case '=': ++m;
diff --git a/src/sprintf.c b/src/sprintf.c
index dc9b83dec..79bd101ad 100644
--- a/src/sprintf.c
+++ b/src/sprintf.c
@@ -520,7 +520,6 @@ mrb_str_format(mrb_state *mrb, int argc, const mrb_value *argv, mrb_value fmt)
++argc;
--argv;
mrb_string_value(mrb, &fmt);
- fmt = mrb_str_new4(mrb, fmt);
p = RSTRING_PTR(fmt);
end = p + RSTRING_LEN(fmt);
blen = 0;
@@ -668,44 +667,37 @@ retry:
mrb_value tmp;
unsigned int c;
int n;
-#ifdef INCLUDE_ENCODING
- mrb_encoding *enc = mrb_enc_get(mrb, fmt);
-#endif //INCLUDE_ENCODING
tmp = mrb_check_string_type(mrb, val);
if (!mrb_nil_p(tmp)) {
if (RSTRING_LEN(tmp) != 1 ) {
mrb_raise(mrb, E_ARGUMENT_ERROR, "%%c requires a character");
}
-#ifdef INCLUDE_ENCODING
- c = mrb_enc_codepoint_len(mrb, RSTRING_PTR(tmp), RSTRING_END(tmp), &n, enc);
-#else
c = RSTRING_PTR(tmp)[0];
n = 1;
-#endif //INCLUDE_ENCODING
}
else {
c = mrb_fixnum(val);
- n = mrb_enc_codelen(mrb, c, enc);
+ n = 1;
}
if (n <= 0) {
mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid character");
}
if (!(flags & FWIDTH)) {
CHECK(n);
- mrb_enc_mbcput(c, &buf[blen], enc);
+ buf[blen] = c;
blen += n;
}
else if ((flags & FMINUS)) {
CHECK(n);
- mrb_enc_mbcput(c, &buf[blen], enc);
+ buf[blen] = c;
blen += n;
FILL(' ', width-1);
}
else {
FILL(' ', width-1);
CHECK(n);
- mrb_enc_mbcput(c, &buf[blen], enc);
+ buf[blen] = c;
blen += n;
}
}
@@ -717,25 +709,18 @@ format_s:
{
mrb_value arg = GETARG();
long len, slen;
-#ifdef INCLUDE_ENCODING
- mrb_encoding *enc = mrb_enc_get(mrb, fmt);
-#endif //INCLUDE_ENCODING
if (*p == 'p') arg = mrb_inspect(mrb, arg);
str = mrb_obj_as_string(mrb, arg);
len = RSTRING_LEN(str);
- mrb_str_set_len(mrb, result, blen);
+ RSTRING_LEN(result) = blen;
if (flags&(FPREC|FWIDTH)) {
slen = RSTRING_LEN(str);
if (slen < 0) {
mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid mbstring sequence");
}
if ((flags&FPREC) && (prec < slen)) {
-#ifdef INCLUDE_ENCODING
- char *p = mrb_enc_nth(mrb, RSTRING_PTR(str), RSTRING_END(str),prec, enc);
-#else
char *p = RSTRING_PTR(str) + prec;
-#endif //INCLUDE_ENCODING
slen = prec;
len = p - RSTRING_PTR(str);
}
@@ -757,12 +742,10 @@ format_s:
buf[blen++] = ' ';
}
}
- mrb_enc_associate(mrb, result, enc);
break;
}
}
PUSH(RSTRING_PTR(str), len);
- mrb_enc_associate(mrb, result, enc);
}
break;
@@ -915,15 +898,8 @@ bin_retry:
if (*p == 'X') {
char *pp = s;
int c;
-#ifdef INCLUDE_ENCODING
- mrb_encoding *enc = mrb_enc_get(mrb, fmt);
-#endif //INCLUDE_ENCODING
while ((c = (int)(unsigned char)*pp) != 0) {
-#ifdef INCLUDE_ENCODING
- *pp = mrb_enc_toupper(c, enc);
-#else
*pp = toupper(c);
-#endif //INCLUDE_ENCODING
pp++;
}
}
diff --git a/src/string.c b/src/string.c
index 83d78ccb9..b6ca9e489 100644
--- a/src/string.c
+++ b/src/string.c
@@ -9,13 +9,12 @@
#include <stdarg.h>
#include <string.h>
#include "mruby/string.h"
+#include <ctype.h>
#include "mruby/numeric.h"
#include "mruby/range.h"
-#include <ctype.h>
#include "mruby/array.h"
#include "mruby/class.h"
#include "mruby/variable.h"
-#include "mruby/hash.h"
#include <stdio.h>
#include "re.h"
#ifdef INCLUDE_REGEXP
@@ -23,8 +22,6 @@
#include "st.h"
#endif //INCLUDE_REGEXP
-#define mrb_usascii_str_new2 mrb_usascii_str_new_cstr
-
#ifndef FALSE
#define FALSE 0
#endif
@@ -38,33 +35,12 @@ const char mrb_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz";
#ifdef INCLUDE_REGEXP
static mrb_value get_pat(mrb_state *mrb, mrb_value pat, mrb_int quote);
#endif //INCLUDE_REGEXP
-#ifdef INCLUDE_ENCODING
-static void mrb_enc_cr_str_copy_for_substr(mrb_state *mrb, mrb_value dest, mrb_value src);
-#else
-#define mrb_enc_cr_str_copy_for_substr(mrb, dest, src)
-#endif //INCLUDE_ENCODING
-static mrb_value str_replace(mrb_state *mrb, mrb_value str, mrb_value str2);
-#ifdef INCLUDE_ENCODING
-static long str_strlen(mrb_state *mrb, mrb_value str, mrb_encoding *enc);
-#endif //INCLUDE_ENCODING
-#ifdef INCLUDE_ENCODING
-#define is_ascii_string(mrb, str) (mrb_enc_str_coderange(mrb, str) == ENC_CODERANGE_7BIT)
-#define is_broken_string(mrb, str) (mrb_enc_str_coderange(mrb, str) == ENC_CODERANGE_BROKEN)
-#define STR_ENC_GET(mrb, str) mrb_enc_from_index(mrb, ENCODING_GET(mrb, str))
-#endif //INCLUDE_ENCODING
-
-void
-mrb_str_set_len(mrb_state *mrb, mrb_value str, long len)
-{
- mrb_str_modify(mrb, str);
- RSTRING_LEN(str) = len;
- RSTRING_PTR(str)[len] = '\0';
-}
+static mrb_value str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2);
+static mrb_value mrb_str_subseq(mrb_state *mrb, mrb_value str, long beg, long len);
#define RESIZE_CAPA(str,capacity) do {\
RSTRING(str)->buf = mrb_realloc(mrb, RSTRING(str)->buf, (capacity)+1);\
- if (!MRB_STR_NOCAPA_P(str))\
- RSTRING_CAPA(str) = capacity;\
+ RSTRING_CAPA(str) = capacity;\
} while (0)
#define STR_SET_LEN(str, n) do { \
@@ -75,86 +51,42 @@ mrb_str_set_len(mrb_state *mrb, mrb_value str, long len)
RSTRING(str)->len--;\
} while (0)
-#ifdef INCLUDE_ENCODING
-static mrb_value mrb_enc_cr_str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, long len,
- int ptr_encindex, int ptr_cr, int *ptr_cr_ret);
-#endif //INCLUDE_ENCODING
-
-#ifdef INCLUDE_ENCODING
-mrb_value
-mrb_usascii_str_new_cstr(mrb_state *mrb, const char *ptr)
-{
- mrb_value str = mrb_str_new_cstr(mrb, ptr);//mrb_str_new2(ptr);
- ENCODING_CODERANGE_SET(mrb, str, mrb_usascii_encindex(), ENC_CODERANGE_7BIT);
- return str;
-}
-
-mrb_value
-mrb_external_str_new_with_enc(mrb_state *mrb, const char *ptr, long len, mrb_encoding *eenc)
+static void
+str_modify(mrb_state *mrb, mrb_value str)
{
- mrb_value str;
-
- str = mrb_str_new(mrb, ptr, len);
- if (eenc == mrb_usascii_encoding(mrb) &&
- mrb_enc_str_coderange(mrb, str) != ENC_CODERANGE_7BIT) {
- mrb_enc_associate(mrb, str, mrb_ascii8bit_encoding(mrb));
- return str;
- }
- mrb_enc_associate(mrb, str, eenc);
- return mrb_str_conv_enc(mrb, str, eenc, mrb_default_internal_encoding(mrb));
-}
+ struct RString *s = mrb_str_ptr(str);
-mrb_value
-mrb_locale_str_new(mrb_state *mrb, const char *ptr, long len)
-{
- return mrb_external_str_new_with_enc(mrb, ptr, len, mrb_locale_encoding(mrb));
-}
+ if (MRB_STR_SHARED_P(str)) {
+ char *ptr, *p;
+ long len;
-mrb_value
-mrb_str_buf_cat_ascii(mrb_state *mrb, mrb_value str, const char *ptr)
-{
- /* ptr must reference NUL terminated ASCII string. */
- int encindex = ENCODING_GET(mrb, str);
- mrb_encoding *enc = mrb_enc_from_index(mrb, encindex);
- if (mrb_enc_asciicompat(mrb, enc)) {
- return mrb_enc_cr_str_buf_cat(mrb, str, ptr, strlen(ptr),
- encindex, ENC_CODERANGE_7BIT, 0);
- }
- else {
- //char *buf = ALLOCA_N(char, mrb_enc_mbmaxlen(enc));
- char *buf = mrb_malloc(mrb, mrb_enc_mbmaxlen(enc));
- while (*ptr) {
- unsigned int c = (unsigned char)*ptr;
- int len = mrb_enc_codelen(mrb, c, enc);
- mrb_enc_mbcput(c, buf, enc);
- mrb_enc_cr_str_buf_cat(mrb, str, buf, len,
- encindex, ENC_CODERANGE_VALID, 0);
- ptr++;
+ p = s->buf;
+ len = s->len;
+ ptr = mrb_malloc(mrb, sizeof(char)*(len+1));
+ if (p) {
+ memcpy(ptr, p, len);
}
- return str;
+ ptr[len] = 0;
+ s->buf = ptr;
+ s->len = len;
+ s->aux.capa = len;
+ MRB_STR_UNSET_NOCAPA(str);
}
}
mrb_value
-mrb_filesystem_str_new_cstr(mrb_state *mrb, const char *ptr)
-{
- return mrb_external_str_new_with_enc(mrb, ptr, strlen(ptr), mrb_filesystem_encoding(mrb));
-}
-#endif //INCLUDE_ENCODING
-
-mrb_value
-mrb_str_resize(mrb_state *mrb, mrb_value str, size_t len)
+mrb_str_resize(mrb_state *mrb, mrb_value str, int len)
{
- size_t slen;
+ int slen;
- mrb_str_modify(mrb, str);
+ str_modify(mrb, str);
slen = RSTRING_LEN(str);
if (len != slen) {
if (slen < len || slen -len > 1024) {
RSTRING_PTR(str) = mrb_realloc(mrb, RSTRING_PTR(str), len+1);
}
if (!MRB_STR_NOCAPA_P(str)) {
- RSTRING(str)->aux.capa = len;
+ RSTRING_CAPA(str) = len;
}
RSTRING(str)->len = len;
RSTRING(str)->buf[len] = '\0'; /* sentinel */
@@ -162,16 +94,6 @@ mrb_str_resize(mrb_state *mrb, mrb_value str, size_t len)
return str;
}
-#ifdef INCLUDE_ENCODING
-mrb_value
-mrb_usascii_str_new(mrb_state *mrb, const char *ptr, long len)
-{
- mrb_value str = mrb_str_new(mrb, ptr, len);
- ENCODING_CODERANGE_SET(mrb, str, mrb_usascii_encindex(), ENC_CODERANGE_7BIT);
- return str;
-}
-#endif //INCLUDE_ENCODING
-
static inline void
str_mod_check(mrb_state *mrb, mrb_value str, char *p, mrb_int len)
{
@@ -182,360 +104,62 @@ str_mod_check(mrb_state *mrb, mrb_value str, char *p, mrb_int len)
}
}
-#ifdef INCLUDE_ENCODING
-static inline int
-single_byte_optimizable(mrb_state *mrb, mrb_value str)
-{
- mrb_encoding *enc;
- /* Conservative. It may be ENC_CODERANGE_UNKNOWN. */
- if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT)
- return 1;
-
- enc = STR_ENC_GET(mrb, str);
- if (mrb_enc_mbmaxlen(enc) == 1)
- return 1;
-
- /* Conservative. Possibly single byte.
- * "\xa1" in Shift_JIS for example. */
- return 0;
-}
-
-static inline const char *
-search_nonascii(const char *p, const char *e)
-{
- while (p < e) {
- if (!ISASCII(*p))
- return p;
- p++;
- }
- return NULL;
-}
-#endif //INCLUDE_ENCODING
-
-static inline void
-str_modifiable(mrb_value str)
-{
- ;
-}
-
-static inline int
-str_independent(mrb_value str)
-{
- str_modifiable(str);
- if (!MRB_STR_SHARED_P(str)) return 1;
- return 0;
-}
-
-#ifdef INCLUDE_ENCODING
-static inline void
-str_enc_copy(mrb_state *mrb, mrb_value str1, mrb_value str2)
-{
- mrb_enc_set_index(mrb, str1, ENCODING_GET(mrb, str2));
-}
-
-static inline long
-enc_strlen(const char *p, const char *e, mrb_encoding *enc, int cr)
-{
- long c;
- const char *q;
-
- if (mrb_enc_mbmaxlen(enc) == mrb_enc_mbminlen(enc)) {
- return (e - p + mrb_enc_mbminlen(enc) - 1) / mrb_enc_mbminlen(enc);
- }
- else if (mrb_enc_asciicompat(mrb, enc)) {
- c = 0;
- if (cr == ENC_CODERANGE_7BIT || cr == ENC_CODERANGE_VALID) {
- while (p < e) {
- if (ISASCII(*p)) {
- q = search_nonascii(p, e);
- if (!q)
- return c + (e - p);
- c += q - p;
- p = q;
- }
- p += mrb_enc_fast_mbclen(p, e, enc);
- c++;
- }
- }
- else {
- while (p < e) {
- if (ISASCII(*p)) {
- q = search_nonascii(p, e);
- if (!q)
- return c + (e - p);
- c += q - p;
- p = q;
- }
- p += mrb_enc_mbclen(p, e, enc);
- c++;
- }
- }
- return c;
- }
-
- for (c=0; p<e; c++) {
- p += mrb_enc_mbclen(p, e, enc);
- }
- return c;
-}
-
-size_t
-mrb_str_capacity(mrb_value str)
-{
- if (MRB_STR_NOCAPA_P(str)) {
- return RSTRING_LEN(str);
- }
- else {
- return RSTRING_CAPA(str);
- }
-}
-#endif //INCLUDE_ENCODING
-
#define mrb_obj_alloc_string(mrb) ((struct RString*)mrb_obj_alloc((mrb), MRB_TT_STRING, (mrb)->string_class))
-static inline mrb_value
-str_alloc(mrb_state *mrb)
+static struct RString*
+str_alloc(mrb_state *mrb, struct RClass *c)
{
struct RString* s;
s = mrb_obj_alloc_string(mrb);
- //NEWOBJ(str, struct RString);
- //OBJSETUP(str, klass, T_STRING);
+ s->c = c;
s->buf = 0;
s->len = 0;
s->aux.capa = 0;
- return mrb_obj_value(s);
-}
-
-#ifdef INCLUDE_ENCODING
-long
-mrb_enc_strlen(const char *p, const char *e, mrb_encoding *enc)
-{
- return enc_strlen(p, e, enc, ENC_CODERANGE_UNKNOWN);
-}
-#endif //INCLUDE_ENCODING
-
-static void
-str_make_independent(mrb_state *mrb, mrb_value str)
-{
- char *ptr;
- long len = RSTRING_LEN(str);
-
- ptr = mrb_malloc(mrb, sizeof(char)*(len+1));
- if (RSTRING_PTR(str)) {
- memcpy(ptr, RSTRING_PTR(str), len);
- }
- ptr[len] = 0;
- RSTRING(str)->buf = ptr;
- RSTRING(str)->len = len;
- RSTRING(str)->aux.capa = len;
- MRB_STR_UNSET_NOCAPA(str);
-}
-
-#ifdef INCLUDE_ENCODING
-static int
-coderange_scan(const char *p, long len, mrb_encoding *enc)
-{
- const char *e = p + len;
-
- if (mrb_enc_to_index(enc) == 0) {
- /* enc is ASCII-8BIT. ASCII-8BIT string never be broken. */
- p = search_nonascii(p, e);
- return p ? ENC_CODERANGE_VALID : ENC_CODERANGE_7BIT;
- }
-
- if (mrb_enc_asciicompat(mrb, enc)) {
- p = search_nonascii(p, e);
- if (!p) {
- return ENC_CODERANGE_7BIT;
- }
- while (p < e) {
- int ret = mrb_enc_precise_mbclen(p, e, enc);
- if (!MBCLEN_CHARFOUND_P(ret)) {
- return ENC_CODERANGE_BROKEN;
- }
- p += MBCLEN_CHARFOUND_LEN(ret);
- if (p < e) {
- p = search_nonascii(p, e);
- if (!p) {
- return ENC_CODERANGE_VALID;
- }
- }
- }
- if (e < p) {
- return ENC_CODERANGE_BROKEN;
- }
- return ENC_CODERANGE_VALID;
- }
-
- while (p < e) {
- int ret = mrb_enc_precise_mbclen(p, e, enc);
-
- if (!MBCLEN_CHARFOUND_P(ret)) {
- return ENC_CODERANGE_BROKEN;
- }
- p += MBCLEN_CHARFOUND_LEN(ret);
- }
- if (e < p) {
- return ENC_CODERANGE_BROKEN;
- }
- return ENC_CODERANGE_VALID;
-}
-
-int
-mrb_enc_str_coderange(mrb_state *mrb, mrb_value str)
-{
- int cr = ENC_CODERANGE(str);
-
- if (cr == ENC_CODERANGE_UNKNOWN) {
- mrb_encoding *enc = STR_ENC_GET(mrb, str);
- cr = coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), enc);
- ENC_CODERANGE_SET(str, cr);
- }
- return cr;
-}
-
-char*
-mrb_enc_nth(mrb_state *mrb, const char *p, const char *e, long nth, mrb_encoding *enc)
-{
- if (mrb_enc_mbmaxlen(enc) == 1) {
- p += nth;
- }
- else if (mrb_enc_mbmaxlen(enc) == mrb_enc_mbminlen(enc)) {
- p += nth * mrb_enc_mbmaxlen(enc);
- }
- else if (mrb_enc_asciicompat(mrb, enc)) {
- const char *p2, *e2;
- int n;
-
- while (p < e && 0 < nth) {
- e2 = p + nth;
- if (e < e2)
- return (char*)e;
- if (ISASCII(*p)) {
- p2 = search_nonascii(p, e2);
- if (!p2)
- return (char*)e2;
- nth -= p2 - p;
- p = p2;
- }
- n = mrb_enc_mbclen(p, e, enc);
- p += n;
- nth--;
- }
- if (nth != 0)
- return (char*)e;
- return (char*)p;
- }
- else {
- while (p<e && nth--) {
- p += mrb_enc_mbclen(p, e, enc);
- }
- }
- if (p > e) p = e;
- return (char*)p;
-}
-
-static char*
-str_nth(mrb_state *mrb, const char *p, const char *e, long nth, mrb_encoding *enc, int singlebyte)
-{
- if (singlebyte)
- p += nth;
- else {
- p = mrb_enc_nth(mrb, p, e, nth, enc);
- }
- if (!p) return 0;
- if (p > e) p = e;
- return (char*)p;
+ return s;
}
/* char offset to byte offset */
-static long
-str_offset(mrb_state *mrb, const char *p, const char *e, long nth, mrb_encoding *enc, int singlebyte)
-{
- const char *pp = str_nth(mrb, p, e, nth, enc, singlebyte);
- if (!pp) return e - p;
- return pp - p;
-}
-
-long
-mrb_str_offset(mrb_state *mrb, mrb_value str, long pos)
-{
- return str_offset(mrb, RSTRING_PTR(str), RSTRING_END(str), pos,
- STR_ENC_GET(mrb, str), single_byte_optimizable(mrb, str));
-}
-
-static void
-mrb_enc_cr_str_exact_copy(mrb_state *mrb, mrb_value dest, mrb_value src)
-{
- str_enc_copy(mrb, dest, src);
- ENC_CODERANGE_SET(dest, ENC_CODERANGE(src));
-}
-#else
-#define mrb_enc_cr_str_exact_copy(mrb, dest, src)
-#endif //INCLUDE_ENCODING
-
-mrb_value
-str_new4(mrb_state *mrb, mrb_value str)
+int
+mrb_str_offset(mrb_state *mrb, mrb_value str, int pos)
{
- mrb_value str2;
-
- str2 = mrb_obj_value(mrb_obj_alloc_string(mrb));
- RSTRING(str2)->len = RSTRING_LEN(str);
- RSTRING(str2)->buf = RSTRING_PTR(str);
-
- if (MRB_STR_SHARED_P(str)) {
- struct RString *shared = RSTRING_SHARED(str);
- FL_SET(str2, MRB_STR_SHARED);
- RSTRING_SHARED(str2) = shared;
- }
- else {
- FL_SET(str, MRB_STR_SHARED);
- RSTRING_SHARED(str) = mrb_str_ptr(str2);
- }
- mrb_enc_cr_str_exact_copy(mrb, str2, str);
- return str2;
+ return pos;
}
-static mrb_value
-str_new(mrb_state *mrb, enum mrb_vtype ttype, const char *p, size_t len)
+static struct RString*
+str_new(mrb_state *mrb, const char *p, int len)
{
- mrb_value str;
+ struct RString *s = str_alloc(mrb, mrb->string_class);
- //str = str_alloc(mrb);
- str = mrb_str_buf_new(mrb, len);
-#ifdef INCLUDE_ENCODING
- if (len == 0) {
- ENC_CODERANGE_SET(str, ENC_CODERANGE_7BIT);
- }
-#endif //INCLUDE_ENCODING
+ s->len = len;
+ s->aux.capa = len;
+ s->buf = mrb_malloc(mrb, len+1);
if (p) {
- memcpy(RSTRING_PTR(str), p, len);
+ memcpy(s->buf, p, len);
}
- STR_SET_LEN(str, len);
- RSTRING_PTR(str)[len] = '\0';
- return str;
+ s->buf[len] = '\0';
+ return s;
}
-mrb_value
-mrb_str_new_with_class(mrb_state *mrb, mrb_value obj, const char *ptr, long len)
+void
+str_with_class(mrb_state *mrb, struct RString *s, mrb_value obj)
{
- return str_new(mrb, mrb_type(obj), ptr, len);
+ s->c = mrb_str_ptr(obj)->c;
}
-#define mrb_str_new5 mrb_str_new_with_class
-
static mrb_value
-str_new_empty(mrb_state *mrb, mrb_value str)
+mrb_str_new_empty(mrb_state *mrb, mrb_value str)
{
- mrb_value v = mrb_str_new5(mrb, str, 0, 0);
- return v;
+ struct RString *s = str_new(mrb, 0, 0);
+
+ str_with_class(mrb, s, str);
+ return mrb_obj_value(s);
}
mrb_value
-mrb_str_buf_new(mrb_state *mrb, size_t capa)
+mrb_str_buf_new(mrb_state *mrb, int capa)
{
struct RString *s;
@@ -553,14 +177,14 @@ mrb_str_buf_new(mrb_state *mrb, size_t capa)
}
mrb_value
-str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, size_t len)
+str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, int len)
{
long capa, total, off = -1;
+ str_modify(mrb, str);
if (ptr >= RSTRING_PTR(str) && ptr <= RSTRING_END(str)) {
off = ptr - RSTRING_PTR(str);
}
- mrb_str_modify(mrb, str);
if (len == 0) return mrb_fixnum_value(0);
capa = RSTRING_CAPA(str);
if (RSTRING_LEN(str) >= LONG_MAX - len) {
@@ -588,61 +212,29 @@ str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, size_t len)
}
mrb_value
-mrb_str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, size_t len)
+mrb_str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, int len)
{
if (len == 0) return str;
return str_buf_cat(mrb, str, ptr, len);
}
-/*
- * call-seq:
- * String.new(str="") => new_str
- *
- * Returns a new string object containing a copy of <i>str</i>.
- */
-
mrb_value
-mrb_str_new(mrb_state *mrb, const char *p, size_t len)
+mrb_str_new(mrb_state *mrb, const char *p, int len)
{
- struct RString *s;
-
- if (len == 0) {
- return mrb_str_buf_new(mrb, len);
- }
- s = mrb_obj_alloc_string(mrb);
- s->buf = mrb_malloc(mrb, len+1);
- if (p) {
- memcpy(s->buf, p, len);
- }
- s->len = len;
- s->aux.capa = len;
- s->buf[len] ='\0';
+ struct RString *s = str_new(mrb, p, len);
return mrb_obj_value(s);
}
-/* ptr==0 is error */
mrb_value
mrb_str_new2(mrb_state *mrb, const char *ptr)
{
+ struct RString *s;
if (!ptr) {
mrb_raise(mrb, E_ARGUMENT_ERROR, "NULL pointer given");
}
-#ifdef INCLUDE_ENCODING
- return mrb_usascii_str_new2(mrb, ptr);
-#else
- return mrb_str_new(mrb, ptr, strlen(ptr));
-#endif //INCLUDE_ENCODING
-}
-
-#ifdef INCLUDE_ENCODING
-mrb_value
-mrb_enc_str_new(mrb_state *mrb, const char *ptr, long len, mrb_encoding *enc)
-{
- mrb_value str = mrb_str_new(mrb, ptr, len);
- mrb_enc_associate(mrb, str, enc);
- return str;
+ s = str_new(mrb, ptr, strlen(ptr));
+ return mrb_obj_value(s);
}
-#endif //INCLUDE_ENCODING
/*
* call-seq: (Caution! NULL string)
@@ -655,7 +247,7 @@ mrb_value
mrb_str_new_cstr(mrb_state *mrb, const char *p)
{
struct RString *s;
- size_t len = strlen(p);
+ int len = strlen(p);
s = mrb_obj_alloc_string(mrb);
s->buf = mrb_malloc(mrb, len+1);
@@ -667,6 +259,32 @@ mrb_str_new_cstr(mrb_state *mrb, const char *p)
return mrb_obj_value(s);
}
+static struct RString*
+str_make_shared(mrb_state *mrb, mrb_value str)
+{
+ struct RString *orig, *s;
+
+ s = str_new(mrb, 0, 0);
+ str_with_class(mrb, s, str);
+ orig = mrb_str_ptr(str);
+ if (!(orig->flags & MRB_STR_SHARED)) {
+ struct RString *shared = mrb_obj_alloc_string(mrb);
+
+ shared->buf = orig->buf;
+ shared->len = orig->len;
+ shared->aux.capa = orig->aux.capa;
+
+ orig->aux.shared = shared;
+ orig->flags |= MRB_STR_SHARED;
+ }
+ s->buf = orig->buf;
+ s->len = orig->len;
+ s->aux.shared = orig->aux.shared;
+ s->flags |= MRB_STR_SHARED;
+
+ return s;
+}
+
/*
* call-seq: (Caution! string literal)
* String.new(str="") => new_str
@@ -675,11 +293,21 @@ mrb_str_new_cstr(mrb_state *mrb, const char *p)
*/
mrb_value
-mrb_str_literal(mrb_state *mrb, mrb_value lit)
+mrb_str_literal(mrb_state *mrb, mrb_value str)
{
- struct RString *s = mrb_str_ptr(lit);
+ struct RString *orig, *s;
- return mrb_str_new(mrb, s->buf, s->len);
+ s = str_new(mrb, 0, 0);
+ orig = mrb_str_ptr(str);
+ while (orig->flags & MRB_STR_SHARED) {
+ orig = orig->aux.shared;
+ }
+ s->buf = orig->buf;
+ s->len = orig->len;
+ s->aux.shared = orig;
+ s->flags |= MRB_STR_SHARED;
+
+ return mrb_obj_value(s);
}
/*
@@ -707,7 +335,7 @@ void
mrb_str_concat(mrb_state *mrb, mrb_value self, mrb_value other)
{
struct RString *s1 = mrb_str_ptr(self), *s2;
- size_t len;
+ int len;
if (mrb_type(other) != MRB_TT_STRING) {
other = mrb_str_to_str(mrb, other);
@@ -736,14 +364,12 @@ mrb_str_plus(mrb_state *mrb, mrb_value a, mrb_value b)
struct RString *s = mrb_str_ptr(a);
struct RString *s2 = mrb_str_ptr(b);
struct RString *t;
- mrb_value r;
- r = mrb_str_new(mrb, 0, s->len + s2->len);
- t = mrb_str_ptr(r);
+ t = str_new(mrb, 0, s->len + s2->len);
memcpy(t->buf, s->buf, s->len);
memcpy(t->buf + s->len, s2->buf, s2->len);
- return r;
+ return mrb_obj_value(t);
}
/* 15.2.10.5.2 */
@@ -757,30 +383,7 @@ mrb_str_plus(mrb_state *mrb, mrb_value a, mrb_value b)
static mrb_value
mrb_str_plus_m(mrb_state *mrb, mrb_value self)
{
- mrb_value str3;
- mrb_value str2;
-#ifdef INCLUDE_ENCODING
- mrb_encoding *enc;
-#endif //INCLUDE_ENCODING
-
- //mrb_get_args(mrb, "s", &p, &len);
- mrb_get_args(mrb, "o", &str2);
-
- mrb_string_value(mrb, &str2);
-#ifdef INCLUDE_ENCODING
- enc = mrb_enc_check(mrb, self, str2);
-#endif //INCLUDE_ENCODING
- str3 = mrb_str_new(mrb, 0, RSTRING_LEN(self)+RSTRING_LEN(str2));
- memcpy(RSTRING_PTR(str3), RSTRING_PTR(self), RSTRING_LEN(self));
- memcpy(RSTRING_PTR(str3) + RSTRING_LEN(self),
- RSTRING_PTR(str2), RSTRING_LEN(str2));
- RSTRING_PTR(str3)[RSTRING_LEN(str3)] = '\0';
-#ifdef INCLUDE_ENCODING
- ENCODING_CODERANGE_SET(mrb, str3, mrb_enc_to_index(enc),
- ENC_CODERANGE_AND(ENC_CODERANGE(self), ENC_CODERANGE(str2)));
-#endif //INCLUDE_ENCODING
-
- return str3;
+ return mrb_nil_value();
}
/*
@@ -793,7 +396,6 @@ static mrb_value
mrb_str_bytesize(mrb_state *mrb, mrb_value self)
{
struct RString *s = mrb_str_ptr(self);
-
return mrb_fixnum_value(s->len);
}
@@ -808,26 +410,11 @@ mrb_str_bytesize(mrb_state *mrb, mrb_value self)
mrb_value
mrb_str_size(mrb_state *mrb, mrb_value self)
{
-#ifdef INCLUDE_ENCODING
- long len;
-
- len = str_strlen(mrb, self, STR_ENC_GET(mrb, self));
- return mrb_fixnum_value(len);
-#else
- return mrb_str_bytesize(mrb, self);
-#endif //INCLUDE_ENCODING
-}
-
-void
-mrb_str_modify(mrb_state *mrb, mrb_value str)
-{
- if (!str_independent(str))
- str_make_independent(mrb, str);
+ struct RString *s = mrb_str_ptr(self);
+ return mrb_fixnum_value(s->len);
}
-
/* 15.2.10.5.1 */
-
/*
* call-seq:
* str * integer => new_str
@@ -840,12 +427,11 @@ mrb_str_modify(mrb_state *mrb, mrb_value str)
static mrb_value
mrb_str_times(mrb_state *mrb, mrb_value self)
{
- mrb_value str2;
mrb_int n,len,times;
- char *ptr2;
+ struct RString *str2;
+ char *p;
mrb_get_args(mrb, "i", &times);
-
if (times < 0) {
mrb_raise(mrb, E_ARGUMENT_ERROR, "negative argument");
}
@@ -853,22 +439,22 @@ mrb_str_times(mrb_state *mrb, mrb_value self)
mrb_raise(mrb, E_ARGUMENT_ERROR, "argument too big");
}
- str2 = mrb_str_new5(mrb, self, 0, len = RSTRING_LEN(self)*times);
- ptr2 = RSTRING_PTR(str2);
+ len = RSTRING_LEN(self)*times;
+ str2 = str_new(mrb, 0, len);
+ str_with_class(mrb, str2, self);
+ p = str2->buf;
if (len > 0) {
n = RSTRING_LEN(self);
- memcpy(ptr2, RSTRING_PTR(self), n);
+ memcpy(p, RSTRING_PTR(self), n);
while (n <= len/2) {
- memcpy(ptr2 + n, ptr2, n);
+ memcpy(p + n, p, n);
n *= 2;
}
- memcpy(ptr2 + n, ptr2, len-n);
+ memcpy(p + n, p, len-n);
}
- ptr2[RSTRING_LEN(str2)] = '\0';
-
- mrb_enc_cr_str_copy_for_substr(mrb, str2, self);
+ p[str2->len] = '\0';
- return str2;
+ return mrb_obj_value(str2);
}
/* -------------------------------------------------------------- */
@@ -941,8 +527,7 @@ mrb_str_cmp_m(mrb_state *mrb, mrb_value str1)
else if (!mrb_respond_to(mrb, str2, mrb_intern(mrb, "<=>"))) {
return mrb_nil_value();
}
- else
- {
+ else {
mrb_value tmp = mrb_funcall(mrb, str2, "<=>", 1, str1);
if (mrb_nil_p(tmp)) return mrb_nil_value();
@@ -958,55 +543,12 @@ mrb_str_cmp_m(mrb_state *mrb, mrb_value str1)
return mrb_fixnum_value(result);
}
-#ifdef INCLUDE_ENCODING
-int
-mrb_str_comparable(mrb_state *mrb, mrb_value str1, mrb_value str2)
-{
- int idx1, idx2;
- int rc1, rc2;
-
- if (RSTRING_LEN(str1) == 0) return TRUE;
- if (RSTRING_LEN(str2) == 0) return TRUE;
- idx1 = ENCODING_GET(mrb, str1);
- idx2 = ENCODING_GET(mrb, str2);
- if (idx1 == idx2) return TRUE;
- rc1 = mrb_enc_str_coderange(mrb, str1);
- rc2 = mrb_enc_str_coderange(mrb, str2);
- if (rc1 == ENC_CODERANGE_7BIT) {
- if (rc2 == ENC_CODERANGE_7BIT) return TRUE;
- if (mrb_enc_asciicompat(mrb, mrb_enc_from_index(mrb, idx2)))
- return TRUE;
- }
- if (rc2 == ENC_CODERANGE_7BIT) {
- if (mrb_enc_asciicompat(mrb, mrb_enc_from_index(mrb, idx1)))
- return TRUE;
- }
- return FALSE;
-}
-
-int
-mrb_str_hash_cmp(mrb_state *mrb, mrb_value str1, mrb_value str2)
-{
- long len;
-
- if (!mrb_str_comparable(mrb, str1, str2)) return 1;
- if (RSTRING_LEN(str1) == (len = RSTRING_LEN(str2)) &&
- memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2), len) == 0) {
- return 0;
- }
- return 1;
-}
-#endif //INCLUDE_ENCODING
-
static int
str_eql(mrb_state *mrb, const mrb_value str1, const mrb_value str2)
{
const long len = RSTRING_LEN(str1);
if (len != RSTRING_LEN(str2)) return FALSE;
-#ifdef INCLUDE_ENCODING
- if (!mrb_str_comparable(mrb, str1, str2)) return FALSE;
-#endif //INCLUDE_ENCODING
if (memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2), len) == 0)
return TRUE;
return FALSE;
@@ -1100,202 +642,9 @@ mrb_string_value_ptr(mrb_state *mrb, mrb_value ptr)
static mrb_value
mrb_str_match(mrb_state *mrb, mrb_value self/* x */)
{
- mrb_value y;
-
- mrb_get_args(mrb, "o", &y);
- switch (mrb_type(y)) {
- case MRB_TT_STRING:
- mrb_raise(mrb, E_TYPE_ERROR, "type mismatch: String given");
- case MRB_TT_REGEX:
-#ifdef INCLUDE_REGEXP
- return mrb_reg_match_str(mrb, y, self);
-#else
- mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported");
-#endif //INCLUDE_REGEXP
- default:
- if (mrb_respond_to(mrb, y, mrb_intern(mrb, "=~"))) {
- return mrb_funcall(mrb, y, "=~", 1, self);
- }
- else {
- return mrb_nil_value();
- }
- }
-}
-/* ---------------------------------- */
-mrb_value
-mrb_str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, int len)
-{
-#ifdef INCLUDE_ENCODING
- mrb_encoding *enc = STR_ENC_GET(mrb, str);
-#endif //INCLUDE_ENCODING
- mrb_value str2;
-#ifdef INCLUDE_ENCODING
- char *p, *s = RSTRING_PTR(str), *e = s + RSTRING_LEN(str);
-#else
- char *p, *s = RSTRING_PTR(str);
-#endif //INCLUDE_ENCODING
-
- if (len < 0) return mrb_nil_value();
- if (!RSTRING_LEN(str)) {
- len = 0;
- }
-#ifdef INCLUDE_ENCODING
- if (single_byte_optimizable(mrb, str)) {
-#endif //INCLUDE_ENCODING
- if (beg > RSTRING_LEN(str)) return mrb_nil_value();
- if (beg < 0) {
- beg += RSTRING_LEN(str);
- if (beg < 0) return mrb_nil_value();
- }
- if (beg + len > RSTRING_LEN(str))
- len = RSTRING_LEN(str) - beg;
- if (len <= 0) {
- len = 0;
- p = 0;
- }
- else
- p = s + beg;
-#ifdef INCLUDE_ENCODING
- goto sub;
- }
- if (beg < 0) {
- if (len > -beg) len = -beg;
- if (-beg * mrb_enc_mbmaxlen(enc) < RSTRING_LEN(str) / 8) {
- beg = -beg;
- while (beg-- > len && (e = mrb_enc_prev_char(s, e, e, enc)) != 0);
- p = e;
- if (!p) return mrb_nil_value();
- while (len-- > 0 && (p = mrb_enc_prev_char(s, p, e, enc)) != 0);
- if (!p) return mrb_nil_value();
- len = e - p;
- goto sub;
- }
- else {
- beg += str_strlen(mrb, str, enc);
- if (beg < 0) return mrb_nil_value();
- }
- }
- else if (beg > 0 && beg > str_strlen(mrb, str, enc)) {
- return mrb_nil_value();
- }
- if (len == 0) {
- p = 0;
- }
- else if (mrb_enc_mbmaxlen(enc) == mrb_enc_mbminlen(enc)) {
- int char_sz = mrb_enc_mbmaxlen(enc);
-
- p = s + beg * char_sz;
- if (p > e) {
- p = e;
- len = 0;
- }
- else if (len * char_sz > e - p)
- len = e - p;
- else
- len *= char_sz;
- }
- else if ((p = str_nth(mrb, s, e, beg, enc, 0)) == e) {
- len = 0;
- }
- else {
- len = str_offset(mrb, p, e, len, enc, 0);
- }
-sub:
-#endif //INCLUDE_ENCODING
- if (len > STR_BUF_MIN_SIZE && beg + len == RSTRING_LEN(str)) {
-#ifdef INCLUDE_ENCODING
- str2 = mrb_str_new4(mrb, str);
- str2 = str_new3(mrb, mrb_obj_class(mrb, str2), str2);
-#else
- str2 = mrb_str_new(mrb, s, RSTRING_LEN(str));
-#endif //INCLUDE_ENCODING
- RSTRING(str2)->buf += RSTRING(str2)->len - len;
- RSTRING(str2)->len = len;
- }
- else {
- str2 = mrb_str_new5(mrb, str, p, len);
- mrb_enc_cr_str_copy_for_substr(mrb, str2, str);
- }
-
- return str2;
-}
-
-#ifdef INCLUDE_REGEXP
-static mrb_value
-mrb_str_subpat(mrb_state *mrb, mrb_value str, mrb_value re, mrb_int backref)
-{
- if (mrb_reg_search(mrb, re, str, 0, 0) >= 0) {
- mrb_value match = mrb_backref_get(mrb);
- int nth = mrb_reg_backref_number(mrb, match, mrb_fixnum_value(backref));
- return mrb_reg_nth_match(mrb, nth, mrb_backref_get(mrb));
- }
return mrb_nil_value();
}
-#endif //INCLUDE_REGEXP
-
-/* --- 1-8-7parse.c --> */
-
-#ifdef INCLUDE_ENCODING
-long
-mrb_enc_strlen_cr(mrb_state *mrb, const char *p, const char *e, mrb_encoding *enc, int *cr)
-{
- long c;
- const char *q;
- int ret;
-
- *cr = 0;
- if (mrb_enc_mbmaxlen(enc) == mrb_enc_mbminlen(enc)) {
- return (e - p + mrb_enc_mbminlen(enc) - 1) / mrb_enc_mbminlen(enc);
- }
- else if (mrb_enc_asciicompat(mrb, enc)) {
- c = 0;
- while (p < e) {
- if (ISASCII(*p)) {
- q = search_nonascii(p, e);
- if (!q) {
- if (!*cr) *cr = ENC_CODERANGE_7BIT;
- return c + (e - p);
- }
- c += q - p;
- p = q;
- }
- ret = mrb_enc_precise_mbclen(p, e, enc);
- if (MBCLEN_CHARFOUND_P(ret)) {
- *cr |= ENC_CODERANGE_VALID;
- p += MBCLEN_CHARFOUND_LEN(ret);
- }
- else {
- *cr = ENC_CODERANGE_BROKEN;
- p++;
- }
- c++;
- }
- if (!*cr) *cr = ENC_CODERANGE_7BIT;
- return c;
- }
-
- for (c=0; p<e; c++) {
- ret = mrb_enc_precise_mbclen(p, e, enc);
- if (MBCLEN_CHARFOUND_P(ret)) {
- *cr |= ENC_CODERANGE_VALID;
- p += MBCLEN_CHARFOUND_LEN(ret);
- }
- else {
- *cr = ENC_CODERANGE_BROKEN;
- if (p + mrb_enc_mbminlen(enc) <= e)
- p += mrb_enc_mbminlen(enc);
- else
- p = e;
- }
- }
- if (!*cr) *cr = ENC_CODERANGE_7BIT;
- return c;
-}
-#endif //INCLUDE_ENCODING
-/* --- 1-8-7parse.c --< */
-
-#ifndef INCLUDE_ENCODING
static inline long
mrb_memsearch_qs(const unsigned char *xs, long m, const unsigned char *ys, long n)
{
@@ -1308,7 +657,7 @@ mrb_memsearch_qs(const unsigned char *xs, long m, const unsigned char *ys, long
qstable[i] = m + 1;
for (; x < xe; ++x)
qstable[*x] = xe - x;
- /* Searching */
+ /* Searching */
for (; y + m <= ys + n; y += *(qstable + y[m])) {
if (*xs == *y && memcmp(xs, y, m) == 0)
return y - ys;
@@ -1316,7 +665,7 @@ mrb_memsearch_qs(const unsigned char *xs, long m, const unsigned char *ys, long
return -1;
}
-int
+static int
mrb_memsearch(const void *x0, int m, const void *y0, int n)
{
const unsigned char *x = x0, *y = y0;
@@ -1328,7 +677,7 @@ mrb_memsearch(const void *x0, int m, const void *y0, int n)
else if (m < 1) {
return 0;
}
- else if (m == 1) {
+ else if (m == 1) {
const unsigned char *ys = y, *ye = ys + n;
for (; y < ye; ++y) {
if (*x == *y)
@@ -1338,60 +687,22 @@ mrb_memsearch(const void *x0, int m, const void *y0, int n)
}
return mrb_memsearch_qs(x0, m, y0, n);
}
-#endif //INCLUDE_ENCODING
-
-/* --- 1-8-7parse.c --< */
-#ifdef INCLUDE_ENCODING
-static long
-str_strlen(mrb_state *mrb, mrb_value str, mrb_encoding *enc)
-{
- const char *p, *e;
- long n;
- int cr;
-
- if (single_byte_optimizable(mrb, str)) return RSTRING_LEN(str);
- if (!enc) enc = STR_ENC_GET(mrb, str);
- p = RSTRING_PTR(str);
- e = RSTRING_END(str);
- cr = ENC_CODERANGE(str);
- n = mrb_enc_strlen_cr(mrb, p, e, enc, &cr);
- if (cr) {
- ENC_CODERANGE_SET(str, cr);
- }
- return n;
-}
-#endif //INCLUDE_ENCODING
static mrb_int
mrb_str_index(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int offset)
{
mrb_int pos;
- char *s, *sptr, *e;
+ char *s, *sptr;
int len, slen;
-#ifdef INCLUDE_ENCODING
- mrb_encoding *enc;
-
- enc = mrb_enc_check(mrb, str, sub);
- if (is_broken_string(mrb, sub)) {
- return -1;
- }
- len = str_strlen(mrb, str, enc);
- slen = str_strlen(mrb, sub, enc);
-#else
len = RSTRING_LEN(str);
slen = RSTRING_LEN(sub);
-#endif //INCLUDE_ENCODING
if (offset < 0) {
offset += len;
if (offset < 0) return -1;
}
if (len - offset < slen) return -1;
s = RSTRING_PTR(str);
- e = s + RSTRING_LEN(str);
if (offset) {
-#ifdef INCLUDE_ENCODING
- offset = str_offset(mrb, s, RSTRING_END(str), offset, enc, single_byte_optimizable(mrb, str));
-#endif //INCLUDE_ENCODING
s += offset;
}
if (slen == 0) return offset;
@@ -1399,39 +710,18 @@ mrb_str_index(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int offset)
sptr = RSTRING_PTR(sub);
slen = RSTRING_LEN(sub);
len = RSTRING_LEN(str) - offset;
-#ifdef INCLUDE_ENCODING
- for (;;) {
- char *t;
- pos = mrb_memsearch(mrb, sptr, slen, s, len, enc);
- if (pos < 0) return pos;
- t = mrb_enc_right_char_head(s, s+pos, e, enc);
- if (t == s + pos) break;
- if ((len -= t - s) <= 0) return -1;
- offset += t - s;
- s = t;
- }
-#else
- pos = mrb_memsearch(sptr, slen, s+offset, len-offset);
+ pos = mrb_memsearch(sptr, slen, s, len);
if (pos < 0) return pos;
-#endif //INCLUDE_ENCODING
return pos + offset;
}
mrb_value
mrb_str_dup(mrb_state *mrb, mrb_value str)
{
+ /* should return shared string */
struct RString *s = mrb_str_ptr(str);
- struct RString *dup;
- dup = mrb_obj_alloc_string(mrb);
- dup->buf = mrb_malloc(mrb, s->len+1);
- if (s->buf) {
- memcpy(dup->buf, s->buf, s->len);
- dup->buf[s->len] = 0;
- }
- dup->len = s->len;
- dup->aux.capa = s->len;
- return mrb_obj_value(dup);
+ return mrb_str_new(mrb, s->buf, s->len);
}
static mrb_value
@@ -1467,18 +757,14 @@ num_index:
mrb_int beg, len;
mrb_value tmp;
-#ifdef INCLUDE_ENCODING
- len = str_strlen(mrb, str, STR_ENC_GET(mrb, str));
-#else
len = RSTRING_LEN(str);
-#endif //INCLUDE_ENCODING
switch (mrb_range_beg_len(mrb, indx, &beg, &len, len, 0)) {
case 0/*FLASE*/:
break;
case 2/*OTHER*/:
return mrb_nil_value();
default:
- tmp = mrb_str_substr(mrb, str, beg, len);
+ tmp = mrb_str_subseq(mrb, str, beg, len);
return tmp;
}
}
@@ -1539,12 +825,12 @@ num_index:
static mrb_value
mrb_str_aref_m(mrb_state *mrb, mrb_value str)
{
+ mrb_value a1, a2;
int argc;
- mrb_value *argv;
- mrb_get_args(mrb, "*", &argv, &argc);
+ argc = mrb_get_args(mrb, "o|o", &a1, &a2);
if (argc == 2) {
- if (mrb_type(argv[0]) == MRB_TT_REGEX) {
+ if (mrb_type(a1) == MRB_TT_REGEX) {
#ifdef INCLUDE_REGEXP
return mrb_str_subpat(mrb, str, argv[0], mrb_fixnum(argv[1]));
#else
@@ -1552,37 +838,13 @@ mrb_str_aref_m(mrb_state *mrb, mrb_value str)
return mrb_nil_value();
#endif //INCLUDE_REGEXP
}
- return mrb_str_substr(mrb, str, mrb_fixnum(argv[0]), mrb_fixnum(argv[1]));
+ return mrb_str_substr(mrb, str, mrb_fixnum(a1), mrb_fixnum(a2));
}
if (argc != 1) {
mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%d for 1)", argc);
}
- return mrb_str_aref(mrb, str, argv[0]);
-}
-
-#ifdef INCLUDE_ENCODING
-/* As mrb_str_modify(), but don't clear coderange */
-static void
-str_modify_keep_cr(mrb_state *mrb, mrb_value str)
-{
- if (!str_independent(str))
- str_make_independent(mrb, str);
- if (ENC_CODERANGE(str) == ENC_CODERANGE_BROKEN)
- /* Force re-scan later */
- ENC_CODERANGE_CLEAR(str);
-}
-
-static void
-mrb_str_check_dummy_enc(mrb_state *mrb, mrb_encoding *enc)
-{
- if (mrb_enc_dummy_p(enc)) {
- mrb_raise(mrb, E_ENCODING_ERROR, "incompatible encoding with this operation: %s",
- mrb_enc_name(enc));
- }
+ return mrb_str_aref(mrb, str, a1);
}
-#else
-#define str_modify_keep_cr(mrb, str) mrb_str_modify((mrb), (str))
-#endif //INCLUDE_ENCODING
/* 15.2.10.5.8 */
/*
@@ -1600,39 +862,12 @@ mrb_str_check_dummy_enc(mrb_state *mrb, mrb_encoding *enc)
static mrb_value
mrb_str_capitalize_bang(mrb_state *mrb, mrb_value str)
{
-#ifdef INCLUDE_ENCODING
- mrb_encoding *enc;
-#endif //INCLUDE_ENCODING
char *s, *send;
int modify = 0;
-#ifdef INCLUDE_ENCODING
- unsigned int c;
- int n;
-#endif //INCLUDE_ENCODING
- str_modify_keep_cr(mrb, str);
-#ifdef INCLUDE_ENCODING
- enc = STR_ENC_GET(mrb, str);
- mrb_str_check_dummy_enc(mrb, enc);
-#endif //INCLUDE_ENCODING
+ str_modify(mrb, str);
if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return mrb_nil_value();
s = RSTRING_PTR(str); send = RSTRING_END(str);
-#ifdef INCLUDE_ENCODING
- c = mrb_enc_codepoint_len(mrb, s, send, &n, enc);
- if (mrb_enc_islower(c, enc)) {
- mrb_enc_mbcput(mrb_enc_toupper(c, enc), s, enc);
- modify = 1;
- }
- s += n;
- while (s < send) {
- c = mrb_enc_codepoint_len(mrb, s, send, &n, enc);
- if (mrb_enc_isupper(c, enc)) {
- mrb_enc_mbcput(mrb_enc_tolower(c, enc), s, enc);
- modify = 1;
- }
- s += n;
- }
-#else
if (ISLOWER(*s)) {
*s = toupper(*s);
modify = 1;
@@ -1643,7 +878,6 @@ mrb_str_capitalize_bang(mrb_state *mrb, mrb_value str)
modify = 1;
}
}
-#endif //INCLUDE_ENCODING
if (modify) return str;
return mrb_nil_value();
}
@@ -1681,71 +915,34 @@ mrb_str_capitalize(mrb_state *mrb, mrb_value self)
static mrb_value
mrb_str_chomp_bang(mrb_state *mrb, mrb_value str)
{
- mrb_value *argv;
- int argc;
-#ifdef INCLUDE_ENCODING
- mrb_encoding *enc;
-#endif //INCLUDE_ENCODING
mrb_value rs;
mrb_int newline;
- char *p, *pp, *e;
+ char *p, *pp;
long len, rslen;
- str_modify_keep_cr(mrb, str);
+ str_modify(mrb, str);
len = RSTRING_LEN(str);
- if (len == 0) return mrb_nil_value();
- p = RSTRING_PTR(str);
- e = p + len;
- //if (mrb_scan_args(argc, argv, "01", &rs) == 0) {
- mrb_get_args(mrb, "*", &argv, &argc);
- if (argc == 0) {
- rs = mrb_str_new2(mrb, "\n");
-smart_chomp:
-#ifdef INCLUDE_ENCODING
- enc = mrb_enc_get(mrb, str);
- if (mrb_enc_mbminlen(enc) > 1) {
- pp = mrb_enc_left_char_head(p, e-mrb_enc_mbminlen(enc), e, enc);
- if (mrb_enc_is_newline(pp, e, enc)) {
- e = pp;
+ if (mrb_get_args(mrb, "|S", &rs) == 0) {
+ if (len == 0) return mrb_nil_value();
+ smart_chomp:
+ if (RSTRING_PTR(str)[len-1] == '\n') {
+ STR_DEC_LEN(str);
+ if (RSTRING_LEN(str) > 0 &&
+ RSTRING_PTR(str)[RSTRING_LEN(str)-1] == '\r') {
+ STR_DEC_LEN(str);
}
- pp = e - mrb_enc_mbminlen(enc);
- if (pp >= p) {
- pp = mrb_enc_left_char_head(p, pp, e, enc);
- if (mrb_enc_ascget(mrb, pp, e, 0, enc) == '\r') {
- e = pp;
- }
- }
- if (e == RSTRING_END(str)) {
- return mrb_nil_value();
- }
- len = e - RSTRING_PTR(str);
- STR_SET_LEN(str, len);
+ }
+ else if (RSTRING_PTR(str)[len-1] == '\r') {
+ STR_DEC_LEN(str);
}
else {
-#endif //INCLUDE_ENCODING
- if (RSTRING_PTR(str)[len-1] == '\n') {
- STR_DEC_LEN(str);
- if (RSTRING_LEN(str) > 0 &&
- RSTRING_PTR(str)[RSTRING_LEN(str)-1] == '\r') {
- STR_DEC_LEN(str);
- }
- }
- else if (RSTRING_PTR(str)[len-1] == '\r') {
- STR_DEC_LEN(str);
- }
- else {
- return mrb_nil_value();
- }
-#ifdef INCLUDE_ENCODING
+ return mrb_nil_value();
}
-#endif //INCLUDE_ENCODING
- RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
return str;
}
- rs = argv[0];
- if (mrb_nil_p(rs)) return mrb_nil_value();
- //StringValue(rs);
- mrb_string_value(mrb, &rs);
+
+ if (len == 0 || mrb_nil_p(rs)) return mrb_nil_value();
+ p = RSTRING_PTR(str);
rslen = RSTRING_LEN(rs);
if (rslen == 0) {
while (len>0 && p[len-1] == '\n') {
@@ -1755,7 +952,7 @@ smart_chomp:
}
if (len < RSTRING_LEN(str)) {
STR_SET_LEN(str, len);
- RSTRING_PTR(str)[len] = '\0';
+ p[len] = '\0';
return str;
}
return mrb_nil_value();
@@ -1763,29 +960,16 @@ smart_chomp:
if (rslen > len) return mrb_nil_value();
newline = RSTRING_PTR(rs)[rslen-1];
if (rslen == 1 && newline == '\n')
+ newline = RSTRING_PTR(rs)[rslen-1];
+ if (rslen == 1 && newline == '\n')
goto smart_chomp;
-#ifdef INCLUDE_ENCODING
- enc = mrb_enc_check(mrb, str, rs);
- if (is_broken_string(mrb, rs)) {
- return mrb_nil_value();
- }
- pp = e - rslen;
-#else
pp = p + len - rslen;
-#endif //INCLUDE_ENCODING
if (p[len-1] == newline &&
(rslen <= 1 ||
memcmp(RSTRING_PTR(rs), pp, rslen) == 0)) {
-#ifdef INCLUDE_ENCODING
- if (mrb_enc_left_char_head(p, pp, e, enc) != pp)
- return mrb_nil_value();
- if (ENC_CODERANGE(str) != ENC_CODERANGE_7BIT) {
- ENC_CODERANGE_CLEAR(str);
- }
-#endif //INCLUDE_ENCODING
- STR_SET_LEN(str, RSTRING_LEN(str) - rslen);
- RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
+ STR_SET_LEN(str, len - rslen);
+ p[len] = '\0';
return str;
}
return mrb_nil_value();
@@ -1820,26 +1004,6 @@ mrb_str_chomp(mrb_state *mrb, mrb_value self)
return str;
}
-#ifdef INCLUDE_ENCODING
-static long
-chopped_length(mrb_state *mrb, mrb_value str)
-{
- mrb_encoding *enc = STR_ENC_GET(mrb, str);
- const char *p, *p2, *beg, *end;
-
- beg = RSTRING_PTR(str);
- end = beg + RSTRING_LEN(str);
- if (beg > end) return 0;
- p = mrb_enc_prev_char(beg, end, end, enc);
- if (!p) return 0;
- if (p > beg && mrb_enc_ascget(mrb, p, end, 0, enc) == '\n') {
- p2 = mrb_enc_prev_char(beg, p, end, enc);
- if (p2 && mrb_enc_ascget(mrb, p2, end, 0, enc) == '\r') p = p2;
- }
- return p - beg;
-}
-#endif //INCLUDE_ENCODING
-
/* 15.2.10.5.12 */
/*
* call-seq:
@@ -1852,13 +1016,9 @@ chopped_length(mrb_state *mrb, mrb_value str)
static mrb_value
mrb_str_chop_bang(mrb_state *mrb, mrb_value str)
{
- str_modify_keep_cr(mrb, str);
+ str_modify(mrb, str);
if (RSTRING_LEN(str) > 0) {
-#ifdef INCLUDE_ENCODING
- long len;
- len = chopped_length(mrb, str);
-#else
- size_t len;
+ int len;
len = RSTRING_LEN(str) - 1;
if (RSTRING_PTR(str)[len] == '\n') {
if (len > 0 &&
@@ -1866,14 +1026,8 @@ mrb_str_chop_bang(mrb_state *mrb, mrb_value str)
len--;
}
}
-#endif //INCLUDE_ENCODING
STR_SET_LEN(str, len);
RSTRING_PTR(str)[len] = '\0';
-#ifdef INCLUDE_ENCODING
- if (ENC_CODERANGE(str) != ENC_CODERANGE_7BIT) {
- ENC_CODERANGE_CLEAR(str);
- }
-#endif //INCLUDE_ENCODING
return str;
}
return mrb_nil_value();
@@ -1900,13 +1054,8 @@ static mrb_value
mrb_str_chop(mrb_state *mrb, mrb_value self)
{
mrb_value str;
-#ifdef INCLUDE_ENCODING
- str = mrb_str_new5(mrb, self, RSTRING_PTR(self), chopped_length(mrb, self));
- mrb_enc_cr_str_copy_for_substr(mrb, str, self);
-#else
str = mrb_str_dup(mrb, self);
mrb_str_chop_bang(mrb, str);
-#endif //INCLUDE_ENCODING
return str;
}
@@ -1921,62 +1070,20 @@ mrb_str_chop(mrb_state *mrb, mrb_value self)
static mrb_value
mrb_str_downcase_bang(mrb_state *mrb, mrb_value str)
{
-#ifdef INCLUDE_ENCODING
- mrb_encoding *enc;
-#endif //INCLUDE_ENCODING
char *s, *send;
int modify = 0;
- str_modify_keep_cr(mrb, str);
-#ifdef INCLUDE_ENCODING
- enc = STR_ENC_GET(mrb, str);
- mrb_str_check_dummy_enc(mrb, enc);
-#endif //INCLUDE_ENCODING
- s = RSTRING_PTR(str); send = RSTRING_END(str);
-#ifdef INCLUDE_ENCODING
- if (single_byte_optimizable(mrb, str)) {
-#endif //INCLUDE_ENCODING
- while (s < send) {
- unsigned int c = *(unsigned char*)s;
-
-#ifdef INCLUDE_ENCODING
- if (mrb_enc_isascii(c, enc) && 'A' <= c && c <= 'Z') {
-#else
- if ('A' <= c && c <= 'Z') {
-#endif //INCLUDE_ENCODING
- *s = 'a' + (c - 'A');
- modify = 1;
- }
- s++;
+ str_modify(mrb, str);
+ s = RSTRING_PTR(str);
+ send = RSTRING_END(str);
+ while (s < send) {
+ if (ISUPPER(*s)) {
+ *s = tolower(*s);
+ modify = 1;
}
-#ifdef INCLUDE_ENCODING
+ s++;
}
- else {
- int ascompat = mrb_enc_asciicompat(mrb, enc);
- while (s < send) {
- unsigned int c;
- int n;
-
- if (ascompat && (c = *(unsigned char*)s) < 0x80) {
- if (mrb_enc_isascii(c, enc) && 'A' <= c && c <= 'Z') {
- *s = 'a' + (c - 'A');
- modify = 1;
- }
- s++;
- }
- else {
- c = mrb_enc_codepoint_len(mrb, s, send, &n, enc);
- if (mrb_enc_isupper(c, enc)) {
- /* assuming toupper returns codepoint with same size */
- mrb_enc_mbcput(mrb_enc_tolower(c, enc), s, enc);
- modify = 1;
- }
- s += n;
- }
- }
- }
-#endif //INCLUDE_ENCODING
if (modify) return str;
return mrb_nil_value();
}
@@ -2037,62 +1144,7 @@ mrb_str_downcase(mrb_state *mrb, mrb_value self)
static mrb_value
mrb_str_each_line(mrb_state *mrb, mrb_value str)
{
- mrb_value rs;
- int newline;
- struct RString *ps = mrb_str_ptr(str);
- char *p = ps->buf, *pend = p + ps->len, *s;
- char *ptr = p;
- long len = ps->len, rslen;
- mrb_value line;
- struct RString *prs;
- mrb_value *argv, b;
- int argc;
-
- //if (mrb_scan_args(argc, argv, "01", &rs) == 0) {
- mrb_get_args(mrb, "*&", &argv, &argc, &b);
- if (argc > 0) {
- rs = argv[0];
- } else {
- rs = mrb_str_new2(mrb, "\n");
- }
- /*RETURN_ENUMERATOR(str, argc, argv);*/
- if (mrb_nil_p(rs)) {
- mrb_yield(mrb, b, str);
- return str;
- }
- //StringValue(rs);
- mrb_string_value(mrb, &rs);
- prs = mrb_str_ptr(rs);
- rslen = prs->len;
- if (rslen == 0) {
- newline = '\n';
- }
- else {
- newline = prs->buf[rslen-1];
- }
-
- for (s = p, p += rslen; p < pend; p++) {
- if (rslen == 0 && *p == '\n') {
- if (*++p != '\n') continue;
- while (*p == '\n') p++;
- }
- if (ps->buf < p && p[-1] == newline &&
- (rslen <= 1 ||
- memcmp(prs->buf, p-rslen, rslen) == 0)) {
- line = mrb_str_new5(mrb, str, s, p - s);
- mrb_yield(mrb, b, line);
- str_mod_check(mrb, str, ptr, len);
- s = p;
- }
- }
-
- if (s != pend) {
- if (p > pend) p = pend;
- line = mrb_str_new5(mrb, str, s, p - s);
- mrb_yield(mrb, b, line);
- }
-
- return str;
+ return mrb_nil_value();
}
/* 15.2.10.5.16 */
@@ -2106,7 +1158,7 @@ mrb_str_each_line(mrb_state *mrb, mrb_value str)
* "".empty? #=> true
*/
static mrb_value
-mrb_str_empty(mrb_state *mrb, mrb_value self)
+mrb_str_empty_p(mrb_state *mrb, mrb_value self)
{
struct RString *s = mrb_str_ptr(self);
@@ -2135,308 +1187,48 @@ mrb_str_eql(mrb_state *mrb, mrb_value self)
return mrb_false_value();
}
-#ifdef INCLUDE_ENCODING
-static void
-mrb_enc_cr_str_copy_for_substr(mrb_state *mrb, mrb_value dest, mrb_value src)
-{
- /* this function is designed for copying encoding and coderange
- * from src to new string "dest" which is made from the part of src.
- */
- str_enc_copy(mrb, dest, src);
- switch (ENC_CODERANGE(src)) {
- case ENC_CODERANGE_7BIT:
- ENC_CODERANGE_SET(dest, ENC_CODERANGE_7BIT);
- break;
- case ENC_CODERANGE_VALID:
- if (!mrb_enc_asciicompat(mrb, STR_ENC_GET(mrb, src)) ||
- search_nonascii(RSTRING_PTR(dest), RSTRING_END(dest)))
- ENC_CODERANGE_SET(dest, ENC_CODERANGE_VALID);
- else
- ENC_CODERANGE_SET(dest, ENC_CODERANGE_7BIT);
- break;
- default:
- if (RSTRING_LEN(dest) == 0) {
- if (!mrb_enc_asciicompat(mrb, STR_ENC_GET(mrb, src)))
- ENC_CODERANGE_SET(dest, ENC_CODERANGE_VALID);
- else
- ENC_CODERANGE_SET(dest, ENC_CODERANGE_7BIT);
- }
- break;
- }
-}
-#endif //INCLUDE_ENCODING
-
static mrb_value
-str_replace_shared(mrb_state *mrb, mrb_value str2, mrb_value str)
-{
- str = mrb_str_new_frozen(mrb, str);
- RSTRING(str2)->len = RSTRING_LEN(str);
- RSTRING(str2)->buf = RSTRING_PTR(str);
- RSTRING_SHARED(str2) = mrb_str_ptr(str);
- FL_SET(str2, MRB_STR_SHARED);
- mrb_enc_cr_str_exact_copy(mrb, str2, str);
-
- return str2;
-}
-
-static mrb_value
-str_new_shared(mrb_state *mrb, struct RClass* klass, mrb_value str)
-{
- return str_replace_shared(mrb, str_alloc(mrb), str);
-}
-
-mrb_value
-str_new3(mrb_state *mrb, struct RClass* klass, mrb_value str)
-{
- return str_new_shared(mrb, klass, str);
-}
-
-mrb_value
-mrb_str_new_shared(mrb_state *mrb, mrb_value str)
-{
- mrb_value str2 = str_new3(mrb, mrb_obj_class(mrb, str), str);
-
- return str2;
-}
-
-mrb_value
-mrb_str_new_frozen(mrb_state *mrb, mrb_value orig)
+mrb_str_subseq(mrb_state *mrb, mrb_value str, long beg, long len)
{
- struct RClass* klass;
- mrb_value str;
-
- klass = mrb_obj_class(mrb, orig);
+ struct RString *s;
- if (MRB_STR_SHARED_P(orig) && RSTRING_SHARED(orig)) {
- long ofs;
- ofs = RSTRING_LEN(str) - RSTRING_SHARED(orig)->len;
-#ifdef INCLUDE_ENCODING
- if ((ofs > 0) || (klass != RBASIC(str)->c) ||
- ENCODING_GET(mrb, str) != ENCODING_GET(mrb, orig)) {
-#else
- if ((ofs > 0) || (klass != RBASIC(str)->c)) {
-#endif //INCLUDE_ENCODING
- str = str_new3(mrb, klass, str);
- RSTRING_PTR(str) += ofs;
- RSTRING_LEN(str) -= ofs;
- mrb_enc_cr_str_exact_copy(mrb, str, orig);
- }
- }
- else {
- str = str_new4(mrb, orig);
- }
- return str;
-}
+ s = str_make_shared(mrb, str);
+ s->buf += beg;
+ s->len = len;
-mrb_value
-mrb_str_drop_bytes(mrb_state *mrb, mrb_value str, long len)
-{
- char *ptr = RSTRING_PTR(str);
- long olen = RSTRING_LEN(str), nlen;
-
- str_modifiable(str);
- if (len > olen) len = olen;
- nlen = olen - len;
- if (!MRB_STR_SHARED_P(str)) mrb_str_new4(mrb, str);
- ptr = RSTRING(str)->buf += len;
- RSTRING(str)->len = nlen;
- ptr[nlen] = 0;
- //ENC_CODERANGE_CLEAR(str);
- return str;
+ return mrb_obj_value(s);
}
mrb_value
-mrb_str_subseq(mrb_state *mrb, mrb_value str, long beg, long len)
+mrb_str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, int len)
{
mrb_value str2;
- if (RSTRING_LEN(str) == beg + len &&
- STR_BUF_MIN_SIZE < len) {
- str2 = mrb_str_new_shared(mrb, mrb_str_new_frozen(mrb, str));
- mrb_str_drop_bytes(mrb, str2, beg);
- }
- else {
- str2 = mrb_str_new5(mrb, str, RSTRING_PTR(str)+beg, len);
- }
- mrb_enc_cr_str_copy_for_substr(mrb, str2, str);
-
- return str2;
-}
-
-#ifdef INCLUDE_ENCODING
-int
-mrb_enc_str_asciionly_p(mrb_state *mrb, mrb_value str)
-{
- mrb_encoding *enc = STR_ENC_GET(mrb, str);
-
- if (!mrb_enc_asciicompat(mrb, enc))
- return 0/*FALSE*/;
- else if (mrb_enc_str_coderange(mrb, str) == ENC_CODERANGE_7BIT)
- return 1/*TRUE*/;
- return 0/*FALSE*/;
-}
-
-static mrb_value
-mrb_enc_cr_str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, long len,
- int ptr_encindex, int ptr_cr, int *ptr_cr_ret)
-{
- int str_encindex = ENCODING_GET(mrb, str);
- int res_encindex;
- int str_cr, res_cr;
- int str_a8 = ENCODING_IS_ASCII8BIT(str);
- int ptr_a8 = ptr_encindex == 0;
-
- str_cr = ENC_CODERANGE(str);
-
- if (str_encindex == ptr_encindex) {
- if (str_cr == ENC_CODERANGE_UNKNOWN ||
- (ptr_a8 && str_cr != ENC_CODERANGE_7BIT)) {
- ptr_cr = ENC_CODERANGE_UNKNOWN;
- }
- else if (ptr_cr == ENC_CODERANGE_UNKNOWN) {
- ptr_cr = coderange_scan(ptr, len, mrb_enc_from_index(mrb, ptr_encindex));
- }
- }
- else {
- mrb_encoding *str_enc = mrb_enc_from_index(mrb, str_encindex);
- mrb_encoding *ptr_enc = mrb_enc_from_index(mrb, ptr_encindex);
- if (!mrb_enc_asciicompat(mrb, str_enc) || !mrb_enc_asciicompat(mrb, ptr_enc)) {
- if (len == 0)
- return str;
- if (RSTRING_LEN(str) == 0) {
- mrb_str_buf_cat(mrb, str, ptr, len);
- ENCODING_CODERANGE_SET(mrb, str, ptr_encindex, ptr_cr);
- return str;
- }
- goto incompatible;
- }
- if (ptr_cr == ENC_CODERANGE_UNKNOWN) {
- ptr_cr = coderange_scan(ptr, len, ptr_enc);
- }
- if (str_cr == ENC_CODERANGE_UNKNOWN) {
- if (str_a8 || ptr_cr != ENC_CODERANGE_7BIT) {
- str_cr = mrb_enc_str_coderange(mrb, str);
- }
- }
- }
- if (ptr_cr_ret)
- *ptr_cr_ret = ptr_cr;
-
- if (str_encindex != ptr_encindex &&
- str_cr != ENC_CODERANGE_7BIT &&
- ptr_cr != ENC_CODERANGE_7BIT) {
-incompatible:
- mrb_raise(mrb, E_ENCODING_ERROR, "incompatible character encodings: %s and %s",
- mrb_enc_name(mrb_enc_from_index(mrb, str_encindex)),
- mrb_enc_name(mrb_enc_from_index(mrb, ptr_encindex)));
- }
- if (str_cr == ENC_CODERANGE_UNKNOWN) {
- res_encindex = str_encindex;
- res_cr = ENC_CODERANGE_UNKNOWN;
- }
- else if (str_cr == ENC_CODERANGE_7BIT) {
- if (ptr_cr == ENC_CODERANGE_7BIT) {
- res_encindex = !str_a8 ? str_encindex : ptr_encindex;
- res_cr = ENC_CODERANGE_7BIT;
- }
- else {
- res_encindex = ptr_encindex;
- res_cr = ptr_cr;
- }
- }
- else if (str_cr == ENC_CODERANGE_VALID) {
- res_encindex = str_encindex;
- if (ptr_cr == ENC_CODERANGE_7BIT || ptr_cr == ENC_CODERANGE_VALID)
- res_cr = str_cr;
- else
- res_cr = ptr_cr;
+ if (len < 0) return mrb_nil_value();
+ if (!RSTRING_LEN(str)) {
+ len = 0;
}
- else { /* str_cr == ENC_CODERANGE_BROKEN */
- res_encindex = str_encindex;
- res_cr = str_cr;
- if (0 < len) res_cr = ENC_CODERANGE_UNKNOWN;
+ if (beg > RSTRING_LEN(str)) return mrb_nil_value();
+ if (beg < 0) {
+ beg += RSTRING_LEN(str);
+ if (beg < 0) return mrb_nil_value();
}
-
- if (len < 0) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "negative string size (or size too big)");
+ if (beg + len > RSTRING_LEN(str))
+ len = RSTRING_LEN(str) - beg;
+ if (len <= 0) {
+ len = 0;
}
- str_buf_cat(mrb, str, ptr, len);
- ENCODING_CODERANGE_SET(mrb, str, res_encindex, res_cr);
- return str;
-}
+ str2 = mrb_str_subseq(mrb, str, beg, len);
-mrb_value
-mrb_enc_str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, long len, mrb_encoding *ptr_enc)
-{
- return mrb_enc_cr_str_buf_cat(mrb, str, ptr, len,
- mrb_enc_to_index(ptr_enc), ENC_CODERANGE_UNKNOWN, NULL);
+ return str2;
}
mrb_value
mrb_str_buf_append(mrb_state *mrb, mrb_value str, mrb_value str2)
{
- int str2_cr;
-
- str2_cr = ENC_CODERANGE(str2);
-
- mrb_enc_cr_str_buf_cat(mrb, str, RSTRING_PTR(str2), RSTRING_LEN(str2),
- ENCODING_GET(mrb, str2), str2_cr, &str2_cr);
-
- ENC_CODERANGE_SET(str2, str2_cr);
-
- return str;
-}
-#else
-mrb_value
-mrb_str_buf_append(mrb_state *mrb, mrb_value str, mrb_value str2)
-{
mrb_str_cat(mrb, str, RSTRING_PTR(str2), RSTRING_LEN(str2));
return str;
}
-#endif //INCLUDE_ENCODING
-
-static inline void
-str_discard(mrb_state *mrb, mrb_value str)
-{
- str_modifiable(str);
- if (!MRB_STR_SHARED_P(str)) {
- mrb_free(mrb, RSTRING_PTR(str));
- RSTRING(str)->buf = 0;
- RSTRING(str)->len = 0;
- }
-}
-
-void
-mrb_str_shared_replace(mrb_state *mrb, mrb_value str, mrb_value str2)
-{
-#ifdef INCLUDE_ENCODING
- mrb_encoding *enc;
- int cr;
-#endif //INCLUDE_ENCODING
-
- if (mrb_obj_equal(mrb, str, str2)) return;
-#ifdef INCLUDE_ENCODING
- enc = STR_ENC_GET(mrb, str2);
- cr = ENC_CODERANGE(str2);
-#endif //INCLUDE_ENCODING
- str_discard(mrb, str);
- MRB_STR_UNSET_NOCAPA(str);
- RSTRING_PTR(str) = RSTRING_PTR(str2);
- RSTRING_LEN(str) = RSTRING_LEN(str2);
- if (MRB_STR_NOCAPA_P(str2)) {
- FL_SET(str, RBASIC(str2)->flags & MRB_STR_NOCAPA);
- RSTRING_SHARED(str) = RSTRING_SHARED(str2);
- }
- else {
- RSTRING_CAPA(str) = RSTRING_CAPA(str2);
- }
-
- MRB_STR_UNSET_NOCAPA(str2); /* abandon str2 */
- RSTRING_PTR(str2)[0] = 0;
- RSTRING_LEN(str2) = 0;
- mrb_enc_associate(mrb, str, enc);
- ENC_CODERANGE_SET(str, cr);
-}
#ifdef INCLUDE_REGEXP
static mrb_value
@@ -2450,7 +1242,6 @@ str_gsub(mrb_state *mrb, mrb_value str, mrb_int bang)
mrb_int beg0, end0;
mrb_int offset, blen, len, last;
char *sp, *cp;
- mrb_encoding *str_enc;
mrb_get_args(mrb, "*", &argv, &argc);
switch (argc) {
@@ -2478,7 +1269,6 @@ str_gsub(mrb_state *mrb, mrb_value str, mrb_int bang)
dest = mrb_str_buf_new(mrb, blen);
sp = RSTRING_PTR(str);
cp = sp;
- str_enc = STR_ENC_GET(mrb, str);
do {
n++;
@@ -2490,7 +1280,7 @@ str_gsub(mrb_state *mrb, mrb_value str, mrb_int bang)
len = beg - offset; /* copy pre-match substr */
if (len) {
- mrb_enc_str_buf_cat(mrb, dest, cp, len, str_enc);
+ mrb_str_buf_cat(mrb, dest, cp, len);
}
mrb_str_buf_append(mrb, dest, val);
@@ -2503,8 +1293,8 @@ str_gsub(mrb_state *mrb, mrb_value str, mrb_int bang)
* in order to prevent infinite loops.
*/
if (RSTRING_LEN(str) <= end0) break;
- len = mrb_enc_fast_mbclen(RSTRING_PTR(str)+end0, RSTRING_END(str), str_enc);
- mrb_enc_str_buf_cat(mrb, dest, RSTRING_PTR(str)+end0, len, str_enc);
+ len = RSTRING_LEN(str)-end0;
+ mrb_str_buf_cat(mrb, dest, RSTRING_PTR(str)+end0, len);
offset = end0 + len;
}
cp = RSTRING_PTR(str) + offset;
@@ -2512,17 +1302,10 @@ str_gsub(mrb_state *mrb, mrb_value str, mrb_int bang)
beg = mrb_reg_search(mrb, pat, str, offset, 0);
} while (beg >= 0);
if (RSTRING_LEN(str) > offset) {
- mrb_enc_str_buf_cat(mrb, dest, cp, RSTRING_LEN(str) - offset, str_enc);
+ mrb_str_buf_cat(mrb, dest, cp, RSTRING_LEN(str) - offset);
}
mrb_reg_search(mrb, pat, str, last, 0);
- if (bang) {
- mrb_str_shared_replace(mrb, str, dest);
- }
- else {
- RBASIC(dest)->c = mrb_obj_class(mrb, str);
- str = dest;
- }
-
+ RBASIC(dest)->c = mrb_obj_class(mrb, str);
return str;
}
@@ -2578,8 +1361,7 @@ mrb_str_gsub(mrb_state *mrb, mrb_value self)
static mrb_value
mrb_str_gsub_bang(mrb_state *mrb, mrb_value self)
{
- str_modify_keep_cr(mrb, self);
- //return str_gsub(argc, argv, self, 1);
+ str_modify(mrb, self);
return str_gsub(mrb, self, 1);
}
#endif //INCLUDE_REGEXP
@@ -2694,18 +1476,10 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str)
}
if (pos < 0) {
-#ifdef INCLUDE_ENCODING
- pos += str_strlen(mrb, str, STR_ENC_GET(mrb, str));
-#else
pos += RSTRING_LEN(str);
-#endif //INCLUDE_ENCODING
if (pos < 0) {
if (mrb_type(sub) == MRB_TT_REGEX) {
-#ifdef INCLUDE_REGEXP
- mrb_backref_set(mrb, mrb_nil_value());
-#else
- mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported");
-#endif //INCLUDE_REGEXP
+ mrb_raise(mrb, E_TYPE_ERROR, "Regexp class not supported");
}
return mrb_nil_value();
}
@@ -2714,11 +1488,9 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str)
switch (mrb_type(sub)) {
case MRB_TT_REGEX:
#ifdef INCLUDE_REGEXP
- if (pos > str_strlen(mrb, str, STR_ENC_GET(mrb, str)))
+ if (pos > RSTRING_LEN(str))
return mrb_nil_value();
- pos = str_offset(mrb, RSTRING_PTR(str), RSTRING_END(str), pos,
- mrb_enc_check(mrb, str, sub), single_byte_optimizable(mrb, str));
-
+ pos = mrb_str_offset(mrb, str, pos);
pos = mrb_reg_search(mrb, sub, str, pos, 0);
pos = mrb_str_sublen(mrb, str, pos);
#else
@@ -2750,9 +1522,6 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str)
/* fall through */
case MRB_TT_STRING:
pos = mrb_str_index(mrb, str, sub, pos);
-#ifdef INCLUDE_ENCODING
- pos = mrb_str_sublen(mrb, str, pos);
-#endif //INCLUDE_ENCODING
break;
}
@@ -2761,24 +1530,15 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str)
}
static mrb_value
-str_replace(mrb_state *mrb, mrb_value str, mrb_value str2)
+str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2)
{
- long len;
+ int len = s2->len;
- len = RSTRING_LEN(str2);
- if (MRB_STR_SHARED_P(str2)) {
- struct RString *shared = RSTRING_SHARED(str2);
- RSTRING_LEN(str) = len;
- RSTRING_PTR(str) = shared->buf;
- FL_SET(str, MRB_STR_SHARED);
- RSTRING_SHARED(str) = shared;
- }
- else {
- str_replace_shared(mrb, str, str2);
- }
-
- mrb_enc_cr_str_exact_copy(mrb, str, str2);
- return str;
+ s1->buf = mrb_realloc(mrb, s1->buf, len);
+ memcpy(s1->buf, s2->buf, len);
+ s1->len = s2->len;
+ s2->aux.capa = s2->len;
+ return mrb_obj_value(s1);
}
/* 15.2.10.5.24 */
@@ -2795,14 +1555,8 @@ mrb_str_replace(mrb_state *mrb, mrb_value str)
{
mrb_value str2;
- mrb_get_args(mrb, "o", &str2);
- str_modifiable(str);
- if (mrb_obj_equal(mrb, str, str2)) return str;
-
- //StringValue(str2);
- mrb_string_value(mrb, &str2);
- //str_discard(str);
- return str_replace(mrb, str, str2);
+ mrb_get_args(mrb, "S", &str2);
+ return str_replace(mrb, mrb_str_ptr(str), mrb_str_ptr(str2));
}
/* 15.2.10.5.23 */
@@ -2815,43 +1569,18 @@ mrb_str_replace(mrb_state *mrb, mrb_value str)
static mrb_value
mrb_str_init(mrb_state *mrb, mrb_value self)
{
- //mrb_value orig;
- mrb_value *argv;
- int argc;
+ mrb_value str2;
- mrb_get_args(mrb, "*", &argv, &argc);
- if (argc == 1)
- mrb_str_replace(mrb, self);
+ if (mrb_get_args(mrb, "|S", &str2) == 1) {
+ str_replace(mrb, mrb_str_ptr(self), mrb_str_ptr(str2));
+ }
return self;
}
-#ifdef INCLUDE_ENCODING
-mrb_sym
-mrb_intern3(mrb_state *mrb, const char *name, long len, mrb_encoding *enc)
-{
- return mrb_intern(mrb, name);
-}
-#endif //INCLUDE_ENCODING
-
mrb_sym
mrb_intern_str(mrb_state *mrb, mrb_value str)
{
- mrb_sym id;
-#ifdef INCLUDE_ENCODING
- mrb_encoding *enc;
-
- if (mrb_enc_str_coderange(mrb, str) == ENC_CODERANGE_7BIT) {
- enc = mrb_usascii_encoding(mrb);
- }
- else {
- enc = mrb_enc_get(mrb, str);
- }
- id = mrb_intern3(mrb, RSTRING_PTR(str), RSTRING_LEN(str), enc);
-#else
- id = mrb_intern(mrb, RSTRING_PTR(str));
-#endif //INCLUDE_ENCODING
- str = RB_GC_GUARD(str);
- return id;
+ return mrb_intern(mrb, RSTRING_PTR(str));
}
/* 15.2.10.5.25 */
@@ -2984,66 +1713,20 @@ mrb_str_match_m(mrb_state *mrb, mrb_value self)
static mrb_value
mrb_str_reverse(mrb_state *mrb, mrb_value str)
{
-#ifdef INCLUDE_ENCODING
- mrb_encoding *enc;
-#endif //INCLUDE_ENCODING
- mrb_value rev;
+ struct RString *s2;
char *s, *e, *p;
-#ifdef INCLUDE_ENCODING
- int single = 1;
-#endif //INCLUDE_ENCODING
- if (RSTRING_LEN(str) <= 1) return mrb_str_dup(mrb, str);
-#ifdef INCLUDE_ENCODING
- enc = STR_ENC_GET(mrb, str);
-#endif //INCLUDE_ENCODING
- rev = mrb_str_new5(mrb, str, 0, RSTRING_LEN(str));
- s = RSTRING_PTR(str); e = RSTRING_END(str);
- p = RSTRING_END(rev);
+ if (RSTRING(str)->len <= 1) return mrb_str_dup(mrb, str);
- if (RSTRING_LEN(str) > 1) {
-#ifdef INCLUDE_ENCODING
- if (single_byte_optimizable(mrb, str)) {
-#endif //INCLUDE_ENCODING
- while (s < e) {
- *--p = *s++;
- }
-#ifdef INCLUDE_ENCODING
- }
- else if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID) {
- while (s < e) {
- int clen = mrb_enc_fast_mbclen(s, e, enc);
-
- if (clen > 1 || (*s & 0x80)) single = 0;
- p -= clen;
- memcpy(p, s, clen);
- s += clen;
- }
- }
- else {
- while (s < e) {
- int clen = mrb_enc_mbclen(s, e, enc);
+ s2 = str_new(mrb, 0, RSTRING(str)->len);
+ str_with_class(mrb, s2, str);
+ s = RSTRING_PTR(str); e = RSTRING_END(str) - 1;
+ p = s2->buf;
- if (clen > 1 || (*s & 0x80)) single = 0;
- p -= clen;
- memcpy(p, s, clen);
- s += clen;
- }
- }
- }
- STR_SET_LEN(rev, RSTRING_LEN(str));
- if (ENC_CODERANGE(str) == ENC_CODERANGE_UNKNOWN) {
- if (single) {
- ENC_CODERANGE_SET(str, ENC_CODERANGE_7BIT);
- }
- else {
- ENC_CODERANGE_SET(str, ENC_CODERANGE_VALID);
- }
-#endif //INCLUDE_ENCODING
+ while (e >= s) {
+ *p++ = *e--;
}
- mrb_enc_cr_str_copy_for_substr(mrb, rev, str);
-
- return rev;
+ return mrb_obj_value(s2);
}
/* 15.2.10.5.30 */
@@ -3056,29 +1739,19 @@ mrb_str_reverse(mrb_state *mrb, mrb_value str)
static mrb_value
mrb_str_reverse_bang(mrb_state *mrb, mrb_value str)
{
-#ifdef INCLUDE_ENCODING
- if (RSTRING_LEN(str) > 1) {
- if (single_byte_optimizable(mrb, str)) {
-#endif //INCLUDE_ENCODING
- char *s, *e, c;
- str_modify_keep_cr(mrb, str);
- s = RSTRING_PTR(str);
- e = RSTRING_END(str) - 1;
- while (s < e) {
- c = *s;
- *s++ = *e;
- *e-- = c;
- }
-#ifdef INCLUDE_ENCODING
- }
- else {
- mrb_str_shared_replace(mrb, str, mrb_str_reverse(mrb, str));
+ char *s, *e;
+ char c;
+
+ str_modify(mrb, str);
+ if (RSTRING(str)->len > 1) {
+ s = RSTRING(str)->buf;
+ e = s + RSTRING(str)->len - 1;
+ while (s < e) {
+ c = *s;
+ *s++ = *e;
+ *e-- = c;
}
}
- else {
- str_modify_keep_cr(mrb, str);
- }
-#endif //INCLUDE_ENCODING
return str;
}
@@ -3132,15 +1805,10 @@ mrb_str_rindex(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos)
#ifdef INCLUDE_ENCODING
/* byte offset to char offset */
-size_t
+int
mrb_str_sublen(mrb_state *mrb, mrb_value str, long pos)
{
- if (single_byte_optimizable(mrb, str) || pos < 0)
- return pos;
- else {
- char *p = RSTRING_PTR(str);
- return enc_strlen(p, p + pos, STR_ENC_GET(mrb, str), ENC_CODERANGE(str));
- }
+ return pos;
}
#endif //INCLUDE_ENCODING
@@ -3170,14 +1838,8 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str)
int argc;
mrb_value sub;
mrb_value vpos;
-#ifdef INCLUDE_ENCODING
- mrb_encoding *enc = STR_ENC_GET(mrb, str);
- int pos, len = str_strlen(mrb, str, enc);
-#else
int pos, len = RSTRING_LEN(str);
-#endif //INCLUDE_ENCODING
- //if (mrb_scan_args(argc, argv, "11", &sub, &vpos) == 2) {
mrb_get_args(mrb, "*", &argv, &argc);
if (argc == 2) {
sub = argv[0];
@@ -3209,9 +1871,7 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str)
switch (mrb_type(sub)) {
case MRB_TT_REGEX:
#ifdef INCLUDE_REGEXP
- pos = str_offset(mrb, RSTRING_PTR(str), RSTRING_END(str), pos,
- STR_ENC_GET(mrb, str), single_byte_optimizable(mrb, str));
-
+ pos = mrb_str_offset(mrb, str, pos);
if (!RREGEXP(sub)->ptr || RREGEXP_SRC_LEN(sub)) {
pos = mrb_reg_search(mrb, sub, str, pos, 1);
pos = mrb_str_sublen(mrb, str, pos);
@@ -3269,12 +1929,11 @@ scan_once(mrb_state *mrb, mrb_value str, mrb_value pat, mrb_int *start)
pmatch = mrb_match_ptr(match);
regs = &pmatch->rmatch->regs;
if (regs->beg[0] == regs->end[0]) {
- mrb_encoding *enc = STR_ENC_GET(mrb, str);
/*
* Always consume at least one character of the input string
*/
if (ps->len > regs->end[0])
- *start = regs->end[0] + mrb_enc_fast_mbclen(RSTRING_PTR(str)+regs->end[0],RSTRING_END(str), enc);
+ *start = regs->end[0] + RSTRING_LEN(str)-regs->end[0];
else
*start = regs->end[0] + 1;
}
@@ -3426,19 +2085,14 @@ static const char isspacetable[256] = {
* "1,2,,3,4,,".split(',', -4) #=> ["1", "2", "", "3", "4", "", ""]
*/
-//static mrb_value
-//mrb_str_split_m(int argc, mrb_value *argv, mrb_value str)
static mrb_value
mrb_str_split_m(mrb_state *mrb, mrb_value str)
{
mrb_value *argv;
int argc;
-#ifdef INCLUDE_ENCODING
- mrb_encoding *enc;
-#endif //INCLUDE_ENCODING
- mrb_value spat;
+ mrb_value spat = mrb_nil_value();
mrb_value limit;
- enum {awk, string, regexp} split_type;
+ enum {awk, string, regexp} split_type = string;
long beg, end, i = 0;
int lim = 0;
mrb_value result, tmp;
@@ -3457,26 +2111,17 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
else if (lim == 1) {
if (RSTRING_LEN(str) == 0)
return mrb_ary_new_capa(mrb, 0);
- return mrb_ary_new_from_values(mrb, &str, 1);
+ return mrb_ary_new_from_values(mrb, 1, &str);
}
i = 1;
}
-#ifdef INCLUDE_ENCODING
- enc = STR_ENC_GET(mrb, str);
-#endif //INCLUDE_ENCODING
- //if (mrb_nil_p(spat)) {
if (argc == 0) {
-// spat = mrb_nil_value();
-// goto fs_set;
split_type = awk;
}
else {
//fs_set:
if (mrb_type(spat) == MRB_TT_STRING) {
-#ifdef INCLUDE_REGEXP
- mrb_encoding *enc2 = STR_ENC_GET(mrb, spat);
-#endif //INCLUDE_REGEXP
split_type = string;
#ifdef INCLUDE_REGEXP
if (RSTRING_LEN(spat) == 0) {
@@ -3484,20 +2129,13 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
spat = mrb_reg_regcomp(mrb, spat);
split_type = regexp;
}
- else if (mrb_enc_asciicompat(mrb, enc2) == 1) {
+ else {
#endif //INCLUDE_REGEXP
if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' '){
split_type = awk;
}
#ifdef INCLUDE_REGEXP
}
- else {
- int l;
- if (mrb_enc_ascget(mrb, RSTRING_PTR(spat), RSTRING_END(spat), &l, enc2) == ' ' &&
- RSTRING_LEN(spat) == l) {
- split_type = awk;
- }
- }
#endif //INCLUDE_REGEXP
}
else {
@@ -3520,89 +2158,28 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
unsigned int c;
end = beg;
-#ifdef INCLUDE_ENCODING
- if (is_ascii_string(mrb, str)) {
-#endif //INCLUDE_ENCODING
- while (ptr < eptr) {
- c = (unsigned char)*ptr++;
- if (skip) {
- if (ascii_isspace(c)) {
- beg = ptr - bptr;
- }
- else {
- end = ptr - bptr;
- skip = 0;
- if (!mrb_nil_p(limit) && lim <= i) break;
- }
- }
- else if (ascii_isspace(c)) {
- mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, beg, end-beg));
- skip = 1;
- beg = ptr - bptr;
- if (!mrb_nil_p(limit)) ++i;
- }
- else {
- end = ptr - bptr;
- }
- }
-#ifdef INCLUDE_ENCODING
- }
- else {
- while (ptr < eptr) {
- int n;
-
- c = mrb_enc_codepoint_len(mrb, ptr, eptr, &n, enc);
- ptr += n;
- if (skip) {
- if (mrb_isspace(c)) {
- beg = ptr - bptr;
- }
- else {
- end = ptr - bptr;
- skip = 0;
- if (!mrb_nil_p(limit) && lim <= i) break;
- }
- }
- else if (mrb_isspace(c)) {
- mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, beg, end-beg));
- skip = 1;
- beg = ptr - bptr;
- if (!mrb_nil_p(limit)) ++i;
- }
- else {
- end = ptr - bptr;
- }
+ while (ptr < eptr) {
+ c = (unsigned char)*ptr++;
+ if (skip) {
+ if (ascii_isspace(c)) {
+ beg = ptr - bptr;
+ }
+ else {
+ end = ptr - bptr;
+ skip = 0;
+ if (!mrb_nil_p(limit) && lim <= i) break;
+ }
+ }
+ else if (ascii_isspace(c)) {
+ mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, beg, end-beg));
+ skip = 1;
+ beg = ptr - bptr;
+ if (!mrb_nil_p(limit)) ++i;
}
- }
- }
- else if (split_type == string) {
- char *ptr = RSTRING_PTR(str);
- char *temp = ptr;
- char *eptr = RSTRING_END(str);
- char *sptr = RSTRING_PTR(spat);
- long slen = RSTRING_LEN(spat);
-
- if (is_broken_string(mrb, str)) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid byte sequence in %s", mrb_enc_name(STR_ENC_GET(mrb, str)));
- }
- if (is_broken_string(mrb, spat)) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid byte sequence in %s", mrb_enc_name(STR_ENC_GET(mrb, spat)));
- }
- enc = mrb_enc_check(mrb, str, spat);
- while (ptr < eptr &&
- (end = mrb_memsearch(mrb, sptr, slen, ptr, eptr - ptr, enc)) >= 0) {
- /* Check we are at the start of a char */
- char *t = mrb_enc_right_char_head(ptr, ptr + end, eptr, enc);
- if (t != ptr + end) {
- ptr = t;
- continue;
+ else {
+ end = ptr - bptr;
}
- mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, ptr - temp, end));
- ptr += end + slen;
- if (!mrb_nil_p(limit) && lim <= ++i) break;
}
- beg = ptr - temp;
-#endif //INCLUDE_ENCODING
}
else {
#ifdef INCLUDE_REGEXP
@@ -3617,21 +2194,18 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
regs = RMATCH_REGS(mrb_backref_get(mrb));
if (start == end && BEG(0) == END(0)) {
if (!ptr) {
- mrb_ary_push(mrb, result, str_new_empty(mrb, str));
+ mrb_ary_push(mrb, result, mrb_str_new_empty(mrb, str));
break;
}
else if (last_null == 1) {
- mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, beg,
- mrb_enc_fast_mbclen(ptr+beg,
- ptr+len,
- enc)));
+ mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, beg, len));
beg = start;
}
else {
if (ptr+start == ptr+len)
start++;
else
- start += mrb_enc_fast_mbclen(ptr+start,ptr+len,enc);
+ start += len;
last_null = 1;
continue;
}
@@ -3645,7 +2219,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
for (idx=1; idx < regs->num_regs; idx++) {
if (BEG(idx) == -1) continue;
if (BEG(idx) == END(idx))
- tmp = str_new_empty(mrb, str);
+ tmp = mrb_str_new_empty(mrb, str);
else
tmp = mrb_str_subseq(mrb, str, BEG(idx), END(idx)-BEG(idx));
mrb_ary_push(mrb, result, tmp);
@@ -3658,7 +2232,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
}
if (RSTRING_LEN(str) > 0 && (!mrb_nil_p(limit) || RSTRING_LEN(str) > beg || lim < 0)) {
if (RSTRING_LEN(str) == beg)
- tmp = str_new_empty(mrb, str);
+ tmp = mrb_str_new_empty(mrb, str);
else
tmp = mrb_str_subseq(mrb, str, beg, RSTRING_LEN(str)-beg);
mrb_ary_push(mrb, result, tmp);
@@ -3696,77 +2270,7 @@ mrb_block_given_p()
static mrb_value
mrb_str_sub_bang(mrb_state *mrb, mrb_value str)
{
- mrb_value *argv;
- int argc;
- mrb_value pat, repl;
- long plen;
-
- mrb_get_args(mrb, "*", &argv, &argc);
- if (argc == 1 && mrb_block_given_p()) {
- /* do nothing */
- }
- else if (argc == 2) {
- repl = argv[1];
- //StringValue(repl);
- mrb_string_value(mrb, &repl);
- }
- else {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%d for 2)", argc);
- }
-
- pat = get_pat(mrb, argv[0], 1);
- str_modifiable(str);
- if (mrb_reg_search(mrb, pat, str, 0, 0) >= 0) {
- mrb_encoding *enc;
- int cr = ENC_CODERANGE(str);
- mrb_value match = mrb_backref_get(mrb);
- struct re_registers *regs = RMATCH_REGS(match);
- long beg0 = BEG(0);
- long end0 = END(0);
- char *p, *rp;
- long len, rlen;
-
- repl = mrb_reg_regsub(mrb, repl, str, regs, pat);
- enc = mrb_enc_compatible(mrb, str, repl);
- if (!enc) {
- mrb_encoding *str_enc = STR_ENC_GET(mrb, str);
- p = RSTRING_PTR(str); len = RSTRING_LEN(str);
- if (coderange_scan(p, beg0, str_enc) != ENC_CODERANGE_7BIT ||
- coderange_scan(p+end0, len-end0, str_enc) != ENC_CODERANGE_7BIT) {
- mrb_raise(mrb, E_ENCODING_ERROR, "incompatible character encodings: %s and %s",
- mrb_enc_name(str_enc),
- mrb_enc_name(STR_ENC_GET(mrb, repl)));
- }
- enc = STR_ENC_GET(mrb, repl);
- }
- mrb_str_modify(mrb, str);
- mrb_enc_associate(mrb, str, enc);
- if (ENC_CODERANGE_UNKNOWN < cr && cr < ENC_CODERANGE_BROKEN) {
- int cr2 = ENC_CODERANGE(repl);
- if (cr2 == ENC_CODERANGE_BROKEN ||
- (cr == ENC_CODERANGE_VALID && cr2 == ENC_CODERANGE_7BIT))
- cr = ENC_CODERANGE_UNKNOWN;
- else
- cr = cr2;
- }
- plen = end0 - beg0;
- rp = RSTRING_PTR(repl); rlen = RSTRING_LEN(repl);
- len = RSTRING_LEN(str);
- if (rlen > plen) {
- RESIZE_CAPA(str, len + rlen - plen);
- }
- p = RSTRING_PTR(str);
- if (rlen != plen) {
- memmove(p + beg0 + rlen, p + beg0 + plen, len - beg0 - plen);
- }
- memcpy(p + beg0, rp, rlen);
- len += rlen - plen;
- STR_SET_LEN(str, len);
- RSTRING_PTR(str)[len] = '\0';
- ENC_CODERANGE_SET(str, cr);
-
- return str;
- }
+ str_modify(mrb, str);
return mrb_nil_value();
}
#endif //INCLUDE_REGEXP
@@ -3987,7 +2491,7 @@ mrb_value
mrb_str_to_inum(mrb_state *mrb, mrb_value str, int base, int badcheck)
{
char *s;
- size_t len;
+ int len;
//StringValue(str);
mrb_string_value(mrb, &str);
@@ -4127,7 +2631,7 @@ double
mrb_str_to_dbl(mrb_state *mrb, mrb_value str, int badcheck)
{
char *s;
- size_t len;
+ int len;
//StringValue(str);
mrb_string_value(mrb, &str);
@@ -4197,66 +2701,20 @@ mrb_str_to_s(mrb_state *mrb, mrb_value self)
static mrb_value
mrb_str_upcase_bang(mrb_state *mrb, mrb_value str)
{
-#ifdef INCLUDE_ENCODING
- mrb_encoding *enc;
-#endif //INCLUDE_ENCODING
char *s, *send;
int modify = 0;
-#ifdef INCLUDE_ENCODING
- int n;
-
- str_modify_keep_cr(mrb, str);
- enc = STR_ENC_GET(mrb, str);
- mrb_str_check_dummy_enc(mrb, enc);
- s = RSTRING_PTR(str); send = RSTRING_END(str);
- if (single_byte_optimizable(mrb, str)) {
- while (s < send) {
- unsigned int c = *(unsigned char*)s;
-
- if (mrb_enc_isascii(c, enc) && 'a' <= c && c <= 'z') {
- *s = 'A' + (c - 'a');
- modify = 1;
- }
- s++;
- }
- }
- else {
- int ascompat = mrb_enc_asciicompat(mrb, enc);
-
- while (s < send) {
- unsigned int c;
- if (ascompat && (c = *(unsigned char*)s) < 0x80) {
- if (mrb_enc_isascii(c, enc) && 'a' <= c && c <= 'z') {
- *s = 'A' + (c - 'a');
- modify = 1;
- }
- s++;
- }
- else {
- c = mrb_enc_codepoint_len(mrb, s, send, &n, enc);
- if (mrb_enc_islower(c, enc)) {
- /* assuming toupper returns codepoint with same size */
- mrb_enc_mbcput(mrb_enc_toupper(c, enc), s, enc);
- modify = 1;
- }
- s += n;
- }
- }
- }
-#else
- mrb_str_modify(mrb, str);
- s = RSTRING_PTR(str); send = RSTRING_END(str);
+ str_modify(mrb, str);
+ s = RSTRING_PTR(str);
+ send = RSTRING_END(str);
while (s < send) {
- unsigned int c = *(unsigned char*)s;
-
- if ('a' <= c && c <= 'z') {
- *s = 'A' + (c - 'a');
+ if (ISLOWER(*s)) {
+ *s = toupper(*s);
modify = 1;
}
s++;
}
-#endif //INCLUDE_ENCODING
+
if (modify) return str;
return mrb_nil_value();
}
@@ -4282,252 +2740,6 @@ mrb_str_upcase(mrb_state *mrb, mrb_value self)
return str;
}
-/* 15.2.10.5.xx */
-/*
- * call-seq:
- * str.force_encoding(encoding) -> str
- *
- * Changes the encoding to +encoding+ and returns self.
- */
-#ifdef INCLUDE_ENCODING
-static mrb_value
-mrb_str_force_encoding(mrb_state *mrb, mrb_value self)
-{
- mrb_value enc;
-
- mrb_get_args(mrb, "o", &enc);
- str_modifiable(self);
- mrb_enc_associate(mrb, self, mrb_to_encoding(mrb, enc));
- ENC_CODERANGE_CLEAR(self);
- return self;
-}
-
-long
-mrb_str_coderange_scan_restartable(const char *s, const char *e, mrb_encoding *enc, int *cr)
-{
- const char *p = s;
-
- if (*cr == ENC_CODERANGE_BROKEN)
- return e - s;
-
- if (mrb_enc_to_index(enc) == 0) {
- /* enc is ASCII-8BIT. ASCII-8BIT string never be broken. */
- p = search_nonascii(p, e);
- *cr = (!p && *cr != ENC_CODERANGE_VALID) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID;
- return e - s;
- }
- else if (mrb_enc_asciicompat(mrb, enc)) {
- p = search_nonascii(p, e);
- if (!p) {
- if (*cr != ENC_CODERANGE_VALID) *cr = ENC_CODERANGE_7BIT;
- return e - s;
- }
- while (p < e) {
- int ret = mrb_enc_precise_mbclen(p, e, enc);
- if (!MBCLEN_CHARFOUND_P(ret)) {
- *cr = MBCLEN_INVALID_P(ret) ? ENC_CODERANGE_BROKEN: ENC_CODERANGE_UNKNOWN;
- return p - s;
- }
- p += MBCLEN_CHARFOUND_LEN(ret);
- if (p < e) {
- p = search_nonascii(p, e);
- if (!p) {
- *cr = ENC_CODERANGE_VALID;
- return e - s;
- }
- }
- }
- *cr = e < p ? ENC_CODERANGE_BROKEN: ENC_CODERANGE_VALID;
- return p - s;
- }
- else {
- while (p < e) {
- int ret = mrb_enc_precise_mbclen(p, e, enc);
- if (!MBCLEN_CHARFOUND_P(ret)) {
- *cr = MBCLEN_INVALID_P(ret) ? ENC_CODERANGE_BROKEN: ENC_CODERANGE_UNKNOWN;
- return p - s;
- }
- p += MBCLEN_CHARFOUND_LEN(ret);
- }
- *cr = e < p ? ENC_CODERANGE_BROKEN: ENC_CODERANGE_VALID;
- return p - s;
- }
-}
-
-mrb_value
-mrb_str_conv_enc_opts(mrb_state *mrb, mrb_value str, mrb_encoding *from, mrb_encoding *to, int ecflags, mrb_value ecopts)
-{
- mrb_econv_t *ec;
- mrb_econv_result_t ret;
- long len;
- mrb_value newstr;
- const unsigned char *sp;
- unsigned char *dp;
-
- if (!to) return str;
- if (from == to) return str;
- if ((mrb_enc_asciicompat(mrb, to) && ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) ||
- to == mrb_ascii8bit_encoding(mrb)) {
- if (STR_ENC_GET(mrb, str) != to) {
- str = mrb_str_dup(mrb, str);
- mrb_enc_associate(mrb, str, to);
- }
- return str;
- }
-
- len = RSTRING_LEN(str);
- newstr = mrb_str_new(mrb, 0, len);
-
- retry:
- ec = mrb_econv_open_opts(mrb, from->name, to->name, ecflags, ecopts);
- if (!ec) return str;
-
- sp = (unsigned char*)RSTRING_PTR(str);
- dp = (unsigned char*)RSTRING_PTR(newstr);
- ret = mrb_econv_convert(mrb, ec, &sp, (unsigned char*)RSTRING_END(str),
- &dp, (unsigned char*)RSTRING_END(newstr), 0);
- mrb_econv_close(ec);
- switch (ret) {
- case econv_destination_buffer_full:
- /* destination buffer short */
- len = len < 2 ? 2 : len * 2;
- mrb_str_resize(mrb, newstr, len);
- goto retry;
-
- case econv_finished:
- len = dp - (unsigned char*)RSTRING_PTR(newstr);
- mrb_str_set_len(mrb, newstr, len);
- mrb_enc_associate(mrb, newstr, to);
- return newstr;
-
- case econv_invalid_byte_sequence:
- case econv_undefined_conversion:
- case econv_source_buffer_empty:
- case econv_after_output:
- case econv_incomplete_input:
- /* some error, return original */
- return str;
-
- default:
- mrb_bug("Internal Error: Invalid return value mrb_econv_convert.");
- return str;
- }
-}
-
-mrb_value
-mrb_str_conv_enc(mrb_state *mrb, mrb_value str, mrb_encoding *from, mrb_encoding *to)
-{
- return mrb_str_conv_enc_opts(mrb, str, from, to, 0, mrb_nil_value());
-}
-#endif //INCLUDE_ENCODING
-
-#ifndef INCLUDE_ENCODING
-#undef SIGN_EXTEND_CHAR
-#if __STDC__
-# define SIGN_EXTEND_CHAR(c) ((signed char)(c))
-#else /* not __STDC__ */
-/* As in Harbison and Steele. */
-# define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128)
-#endif
-#define is_identchar(c) (SIGN_EXTEND_CHAR(c)!=-1&&(ISALNUM(c) || (c) == '_'))
-
-static int
-is_special_global_name(m)
- const char *m;
-{
- switch (*m) {
- case '~': case '*': case '$': case '?': case '!': case '@':
- case '/': case '\\': case ';': case ',': case '.': case '=':
- case ':': case '<': case '>': case '\"':
- case '&': case '`': case '\'': case '+':
- case '0':
- ++m;
- break;
- case '-':
- ++m;
- if (is_identchar(*m)) m += 1;
- break;
- default:
- if (!ISDIGIT(*m)) return 0;
- do ++m; while (ISDIGIT(*m));
- }
- return !*m;
-}
-
-int
-mrb_symname_p(const char *name)
-{
- const char *m = name;
- int localid = FALSE;
-
- if (!m) return FALSE;
- switch (*m) {
- case '\0':
- return FALSE;
-
- case '$':
- if (is_special_global_name(++m)) return TRUE;
- goto id;
-
- case '@':
- if (*++m == '@') ++m;
- goto id;
-
- case '<':
- switch (*++m) {
- case '<': ++m; break;
- case '=': if (*++m == '>') ++m; break;
- default: break;
- }
- break;
-
- case '>':
- switch (*++m) {
- case '>': case '=': ++m; break;
- }
- break;
-
- case '=':
- switch (*++m) {
- case '~': ++m; break;
- case '=': if (*++m == '=') ++m; break;
- default: return FALSE;
- }
- break;
-
- case '*':
- if (*++m == '*') ++m;
- break;
-
- case '+': case '-':
- if (*++m == '@') ++m;
- break;
-
- case '|': case '^': case '&': case '/': case '%': case '~': case '`':
- ++m;
- break;
-
- case '[':
- if (*++m != ']') return FALSE;
- if (*++m == '=') ++m;
- break;
-
- default:
- localid = !ISUPPER(*m);
-id:
- if (*m != '_' && !ISALPHA(*m)) return FALSE;
- while (is_identchar(*m)) m += 1;
- if (localid) {
- switch (*m) {
- case '!': case '?': case '=': ++m;
- }
- }
- break;
- }
- return *m ? FALSE : TRUE;
-}
-#endif //INCLUDE_ENCODING
-
/*
* call-seq:
* str.dump -> new_str
@@ -4538,16 +2750,10 @@ id:
mrb_value
mrb_str_dump(mrb_state *mrb, mrb_value str)
{
-#ifdef INCLUDE_ENCODING
- mrb_encoding *enc = mrb_enc_get(mrb, str);
-#endif //INCLUDE_ENCODING
long len;
const char *p, *pend;
- char *q, *qend;
- mrb_value result;
-#ifdef INCLUDE_ENCODING
- int u8 = (enc == mrb_utf8_encoding(mrb));
-#endif //INCLUDE_ENCODING
+ char *q;
+ struct RString *result;
len = 2; /* "" */
p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
@@ -4570,33 +2776,16 @@ mrb_str_dump(mrb_state *mrb, mrb_value str)
len++;
}
else {
-#ifdef INCLUDE_ENCODING
- if (u8) { /* \u{NN} */
- int n = mrb_enc_precise_mbclen(p-1, pend, enc);
- if (MBCLEN_CHARFOUND_P(n-1)) {
- unsigned int cc = mrb_enc_mbc_to_codepoint(p-1, pend, enc);
- while (cc >>= 4) len++;
- len += 5;
- p += MBCLEN_CHARFOUND_LEN(n)-1;
- break;
- }
- }
-#endif //INCLUDE_ENCODING
len += 4; /* \xNN */
}
break;
}
}
-#ifdef INCLUDE_ENCODING
- if (!mrb_enc_asciicompat(mrb, enc)) {
- len += 19; /* ".force_encoding('')" */
- len += strlen(enc->name);
- }
-#endif //INCLUDE_ENCODING
- result = mrb_str_new5(mrb, str, 0, len);
+ result = str_new(mrb, 0, len);
+ str_with_class(mrb, result, str);
p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
- q = RSTRING_PTR(result); qend = q + len + 1;
+ q = result->buf;
*q++ = '"';
while (p < pend) {
@@ -4647,36 +2836,12 @@ mrb_str_dump(mrb_state *mrb, mrb_value str)
}
else {
*q++ = '\\';
-#ifdef INCLUDE_ENCODING
- if (u8) {
- int n = mrb_enc_precise_mbclen(p-1, pend, enc) - 1;
- if (MBCLEN_CHARFOUND_P(n)) {
- int cc = mrb_enc_mbc_to_codepoint(p-1, pend, enc);
- p += n;
- snprintf(q, qend-q, "u{%x}", cc);
- q += strlen(q);
- continue;
- }
- }
- snprintf(q, qend-q, "x%02X", c);
-#else
sprintf(q, "%03o", c&0xff);
-#endif //INCLUDE_ENCODING
q += 3;
}
}
*q++ = '"';
-#ifdef INCLUDE_ENCODING
- *q = '\0';
- if (!mrb_enc_asciicompat(mrb, enc)) {
- snprintf(q, qend-q, ".force_encoding(\"%s\")", enc->name);
- enc = mrb_ascii8bit_encoding(mrb);
- }
- /* result from dump is ASCII */
- mrb_enc_associate(mrb, result, enc);
- ENC_CODERANGE_SET(result, ENC_CODERANGE_7BIT);
-#endif //INCLUDE_ENCODING
- return result;
+ return mrb_obj_value(result);
}
mrb_value
@@ -4686,8 +2851,6 @@ mrb_str_cat(mrb_state *mrb, mrb_value str, const char *ptr, long len)
mrb_raise(mrb, E_ARGUMENT_ERROR, "negative string size (or size too big)");
}
if (0/*STR_ASSOC_P(str)*/) {
- mrb_str_modify(mrb, str);
- //if (STR_EMBED_P(str)) str_make_independent(mrb, str);
mrb_realloc(mrb, RSTRING(str)->buf, RSTRING(str)->len+len+1);
memcpy(RSTRING(str)->buf + RSTRING(str)->len, ptr, len);
RSTRING(str)->len += len;
@@ -4701,18 +2864,13 @@ mrb_str_cat(mrb_state *mrb, mrb_value str, const char *ptr, long len)
mrb_value
mrb_str_cat2(mrb_state *mrb, mrb_value str, const char *ptr)
{
- return mrb_str_cat(mrb, str, ptr, strlen(ptr));
+ return mrb_str_cat(mrb, str, ptr, strlen(ptr));
}
-mrb_value
+static mrb_value
mrb_str_vcatf(mrb_state *mrb, mrb_value str, const char *fmt, va_list ap)
{
- //mrb_printf_buffer f;
- //mrb_value klass;
-
- //StringValue(str);
mrb_string_value(mrb, &str);
- mrb_str_modify(mrb, str);
mrb_str_resize(mrb, str, (char*)RSTRING_END(str) - RSTRING_PTR(str));
return str;
@@ -4730,12 +2888,6 @@ mrb_str_catf(mrb_state *mrb, mrb_value str, const char *format, ...)
return str;
}
-void
-mrb_lastline_set(mrb_value val)
-{
- //vm_svar_set(0, val);
-}
-
mrb_value
mrb_str_append(mrb_state *mrb, mrb_value str, mrb_value str2)
{
@@ -4743,69 +2895,7 @@ mrb_str_append(mrb_state *mrb, mrb_value str, mrb_value str2)
return mrb_str_buf_append(mrb, str, str2);
}
-void
-mrb_str_setter(mrb_state *mrb, mrb_value val, mrb_sym id, mrb_value *var)
-{
- if (!mrb_nil_p(val) && (mrb_type(val) != MRB_TT_STRING)) {
- mrb_raise(mrb, E_TYPE_ERROR, "value of %s must be String", mrb_sym2name(mrb, id));
- }
- *var = val;
-}
-
-#ifdef INCLUDE_ENCODING
-/*
- * call-seq:
- * str.ascii_only? -> true or false
- *
- * Returns true for a string which has only ASCII characters.
- *
- * "abc".force_encoding("UTF-8").ascii_only? #=> true
- * "abc\u{6666}".force_encoding("UTF-8").ascii_only? #=> false
- */
-
-int
-mrb_str_is_ascii_only_p(mrb_state *mrb, mrb_value str)
-{
- int cr = mrb_enc_str_coderange(mrb, str);
-
- return cr == ENC_CODERANGE_7BIT ? TRUE : FALSE;
-}
-
-#endif //INCLUDE_ENCODING
-
#define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */
-int
-mrb_str_buf_cat_escaped_char(mrb_state *mrb, mrb_value result, unsigned int c, int unicode_p)
-{
- char buf[CHAR_ESC_LEN + 1];
- int l;
-
- if (sizeof(c) > 4) {
- c &= 0xffffffff;
- }
- if (unicode_p) {
- if (c < 0x7F && ISPRINT(c)) {
- snprintf(buf, CHAR_ESC_LEN, "%c", c);
- }
- else if (c < 0x10000) {
- snprintf(buf, CHAR_ESC_LEN, "\\u%04X", c);
- }
- else {
- snprintf(buf, CHAR_ESC_LEN, "\\u{%X}", c);
- }
- }
- else {
- if (c < 0x100) {
- snprintf(buf, CHAR_ESC_LEN, "\\x%02X", c);
- }
- else {
- snprintf(buf, CHAR_ESC_LEN, "\\x{%X}", c);
- }
- }
- l = (int)strlen(buf); /* CHAR_ESC_LEN cannot exceed INT_MAX */
- mrb_str_buf_cat(mrb, result, buf, l);
- return l;
-}
/*
* call-seq:
@@ -4821,24 +2911,9 @@ mrb_str_buf_cat_escaped_char(mrb_state *mrb, mrb_value result, unsigned int c, i
mrb_value
mrb_str_inspect(mrb_state *mrb, mrb_value str)
{
-#ifdef INCLUDE_ENCODING
- mrb_encoding *enc = STR_ENC_GET(mrb, str);
-#endif //INCLUDE_ENCODING
const char *p, *pend, *prev;
char buf[CHAR_ESC_LEN + 1];
-#ifdef INCLUDE_ENCODING
- mrb_value result = mrb_str_buf_new(mrb, 0);
- mrb_encoding *resenc = mrb_default_internal_encoding(mrb);
- int unicode_p = mrb_enc_unicode_p(enc);
- int asciicompat = mrb_enc_asciicompat(mrb, enc);
-
- if (resenc == NULL) resenc = mrb_default_external_encoding(mrb);
- if (!mrb_enc_asciicompat(mrb, resenc)) resenc = mrb_usascii_encoding(mrb);
- mrb_enc_associate(mrb, result, resenc);
- mrb_str_buf_cat(mrb, result, "\"", strlen("\"")); //str_buf_cat2(result, "\"");
-#else
- mrb_value result = mrb_str_new_cstr(mrb, "\"");//mrb_str_buf_new2("\"");
-#endif //INCLUDE_ENCODING
+ mrb_value result = mrb_str_new_cstr(mrb, "\"");
p = RSTRING_PTR(str); pend = RSTRING_END(str);
prev = p;
@@ -4846,37 +2921,6 @@ mrb_str_inspect(mrb_state *mrb, mrb_value str)
unsigned int c, cc;
int n;
-#ifdef INCLUDE_ENCODING
- n = mrb_enc_precise_mbclen(p, pend, enc);
- if (!MBCLEN_CHARFOUND_P(n)) {
- if (p > prev) mrb_str_buf_cat(mrb, result, prev, p - prev);
- n = mrb_enc_mbminlen(enc);
- if (pend < p + n)
- n = (int)(pend - p);
- while (n--) {
- snprintf(buf, CHAR_ESC_LEN, "\\x%02X", *p & 0377);
- mrb_str_buf_cat(mrb, result, buf, strlen(buf));
- prev = ++p;
- }
- continue;
- }
- n = MBCLEN_CHARFOUND_LEN(n);
- c = mrb_enc_mbc_to_codepoint(p, pend, enc);
- p += n;
- if (c == '"'|| c == '\\' ||
- (c == '#' &&
- p < pend &&
- MBCLEN_CHARFOUND_P(mrb_enc_precise_mbclen(p,pend,enc)) &&
- (cc = mrb_enc_codepoint(mrb, p, pend, enc),
- (cc == '$' || cc == '@' || cc == '{')))) {
- if (p - n > prev) mrb_str_buf_cat(mrb, result, prev, p - n - prev);
- mrb_str_buf_cat(mrb, result, "\\", strlen("\\")); //str_buf_cat2(result, "\\");
- if (asciicompat || enc == resenc) {
- prev = p - n;
- continue;
- }
- }
-#else
c = *p++;
n = 1;
if (c == '"'|| c == '\\' || (c == '#' && IS_EVSTR(p, pend))) {
@@ -4889,7 +2933,6 @@ mrb_str_inspect(mrb_state *mrb, mrb_value str)
mrb_str_buf_cat(mrb, result, buf, 1);
continue;
}
-#endif //INCLUDE_ENCODING
switch (c) {
case '\n': cc = 'n'; break;
case '\r': cc = 'r'; break;
@@ -4909,45 +2952,22 @@ mrb_str_inspect(mrb_state *mrb, mrb_value str)
prev = p;
continue;
}
-#ifdef INCLUDE_ENCODING
- if ((enc == resenc && mrb_enc_isprint(c, enc)) ||
- (asciicompat && mrb_enc_isascii(c, enc) && ISPRINT(c))) {
- continue;
- }
-#endif //INCLUDE_ENCODING
else {
if (p - n > prev) mrb_str_buf_cat(mrb, result, prev, p - n - prev);
-#ifdef INCLUDE_ENCODING
- mrb_str_buf_cat_escaped_char(mrb, result, c, unicode_p);
-#else
sprintf(buf, "\\%03o", c & 0377);
mrb_str_buf_cat(mrb, result, buf, strlen(buf));
-#endif //INCLUDE_ENCODING
prev = p;
continue;
}
}
+#ifdef INCLUDE_ENCODING
if (p > prev) mrb_str_buf_cat(mrb, result, prev, p - prev);
- mrb_str_buf_cat(mrb, result, "\"", strlen("\"")); //str_buf_cat2(result, "\"");
+#endif
+ mrb_str_buf_cat(mrb, result, "\"", strlen("\""));
return result;
}
-#ifdef INCLUDE_ENCODING
-int
-sym_printable(mrb_state *mrb, const char *s, const char *send, mrb_encoding *enc)
-{
- while (s < send) {
- int n;
- int c = mrb_enc_codepoint_len(mrb, s, send, &n, enc);
-
- if (!mrb_enc_isprint(c, enc)) return FALSE;
- s += n;
- }
- return TRUE;
-}
-#endif //INCLUDE_ENCODING
-
/* ---------------------------*/
void
mrb_init_string(mrb_state *mrb)
@@ -4976,7 +2996,7 @@ mrb_init_string(mrb_state *mrb)
mrb_define_method(mrb, s, "downcase", mrb_str_downcase, ARGS_NONE()); /* 15.2.10.5.13 */
mrb_define_method(mrb, s, "downcase!", mrb_str_downcase_bang, ARGS_NONE()); /* 15.2.10.5.14 */
mrb_define_method(mrb, s, "each_line", mrb_str_each_line, ARGS_REQ(1)); /* 15.2.10.5.15 */
- mrb_define_method(mrb, s, "empty?", mrb_str_empty, ARGS_NONE()); /* 15.2.10.5.16 */
+ mrb_define_method(mrb, s, "empty?", mrb_str_empty_p, ARGS_NONE()); /* 15.2.10.5.16 */
mrb_define_method(mrb, s, "eql?", mrb_str_eql, ARGS_REQ(1)); /* 15.2.10.5.17 */
#ifdef INCLUDE_REGEXP
mrb_define_method(mrb, s, "gsub", mrb_str_gsub, ARGS_REQ(1)); /* 15.2.10.5.18 */
@@ -5011,9 +3031,5 @@ mrb_init_string(mrb_state *mrb)
mrb_define_method(mrb, s, "to_sym", mrb_str_intern, ARGS_NONE()); /* 15.2.10.5.41 */
mrb_define_method(mrb, s, "upcase", mrb_str_upcase, ARGS_REQ(1)); /* 15.2.10.5.42 */
mrb_define_method(mrb, s, "upcase!", mrb_str_upcase_bang, ARGS_REQ(1)); /* 15.2.10.5.43 */
-#ifdef INCLUDE_ENCODING
- mrb_define_method(mrb, s, "encoding", mrb_obj_encoding, ARGS_NONE()); /* 15.2.10.5.44(x) */
- mrb_define_method(mrb, s, "force_encoding", mrb_str_force_encoding, ARGS_REQ(1)); /* 15.2.10.5.45(x) */
-#endif
mrb_define_method(mrb, s, "inspect", mrb_str_inspect, ARGS_NONE()); /* 15.2.10.5.46(x) */
}
diff --git a/src/struct.c b/src/struct.c
index 699825cff..d06124b50 100644
--- a/src/struct.c
+++ b/src/struct.c
@@ -379,7 +379,7 @@ mrb_struct_s_def(mrb_state *mrb, mrb_value klass)
pargv = &argv[0];
argcnt++;
}
- rest = mrb_ary_new_from_values(mrb, pargv, argcnt);
+ rest = mrb_ary_new_from_values(mrb, argcnt, pargv);
}
st = make_struct(mrb, name, rest, struct_class(mrb));
if (!mrb_nil_p(b)) {
diff --git a/src/symbol.c b/src/symbol.c
index b4ffc19e6..89e81af0e 100644
--- a/src/symbol.c
+++ b/src/symbol.c
@@ -149,13 +149,7 @@ mrb_sym_to_s(mrb_state *mrb, mrb_value sym)
{
mrb_sym id = SYM2ID(sym);
-#ifdef INCLUDE_REGEXP
- //return str_new3(mrb_cString, mrb_id2str(id));
- return str_new3(mrb, mrb_obj_class(mrb, sym), mrb_str_new_cstr(mrb, mrb_sym2name(mrb, id)));
-#else
- return mrb_str_new_cstr(mrb, mrb_sym2name(mrb, id)); //mrb_str_new2(mrb_id2name(SYM2ID(sym)));
-#endif
-
+ return mrb_str_new_cstr(mrb, mrb_sym2name(mrb, id));
}
/* 15.2.11.3.4 */
@@ -185,42 +179,113 @@ sym_to_sym(mrb_state *mrb, mrb_value sym)
* :fred.inspect #=> ":fred"
*/
+#if __STDC__
+# define SIGN_EXTEND_CHAR(c) ((signed char)(c))
+#else /* not __STDC__ */
+/* As in Harbison and Steele. */
+# define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128)
+#endif
+#define is_identchar(c) (SIGN_EXTEND_CHAR(c)!=-1&&(ISALNUM(c) || (c) == '_'))
+
+static int
+is_special_global_name(m)
+ const char *m;
+{
+ switch (*m) {
+ case '~': case '*': case '$': case '?': case '!': case '@':
+ case '/': case '\\': case ';': case ',': case '.': case '=':
+ case ':': case '<': case '>': case '\"':
+ case '&': case '`': case '\'': case '+':
+ case '0':
+ ++m;
+ break;
+ case '-':
+ ++m;
+ if (is_identchar(*m)) m += 1;
+ break;
+ default:
+ if (!ISDIGIT(*m)) return 0;
+ do ++m; while (ISDIGIT(*m));
+ }
+ return !*m;
+}
+
+static int
+symname_p(const char *name)
+{
+ const char *m = name;
+ int localid = FALSE;
+
+ if (!m) return FALSE;
+ switch (*m) {
+ case '\0':
+ return FALSE;
+
+ case '$':
+ if (is_special_global_name(++m)) return TRUE;
+ goto id;
+
+ case '@':
+ if (*++m == '@') ++m;
+ goto id;
+
+ case '<':
+ switch (*++m) {
+ case '<': ++m; break;
+ case '=': if (*++m == '>') ++m; break;
+ default: break;
+ }
+ break;
+
+ case '>':
+ switch (*++m) {
+ case '>': case '=': ++m; break;
+ }
+ break;
+
+ case '=':
+ switch (*++m) {
+ case '~': ++m; break;
+ case '=': if (*++m == '=') ++m; break;
+ default: return FALSE;
+ }
+ break;
+
+ case '*':
+ if (*++m == '*') ++m;
+ break;
+
+ case '+': case '-':
+ if (*++m == '@') ++m;
+ break;
+
+ case '|': case '^': case '&': case '/': case '%': case '~': case '`':
+ ++m;
+ break;
+
+ case '[':
+ if (*++m != ']') return FALSE;
+ if (*++m == '=') ++m;
+ break;
+
+ default:
+ localid = !ISUPPER(*m);
+id:
+ if (*m != '_' && !ISALPHA(*m)) return FALSE;
+ while (is_identchar(*m)) m += 1;
+ if (localid) {
+ switch (*m) {
+ case '!': case '?': case '=': ++m;
+ }
+ }
+ break;
+ }
+ return *m ? FALSE : TRUE;
+}
+
static mrb_value
sym_inspect(mrb_state *mrb, mrb_value sym)
{
-#ifdef INCLUDE_ENCODING
- #define STR_ENC_GET(mrb, str) mrb_enc_from_index(mrb, ENCODING_GET(mrb, str))
- mrb_value str;
- mrb_sym id = SYM2ID(sym);
- mrb_encoding *enc;
- const char *ptr;
- long len;
- char *dest;
- mrb_encoding *resenc = mrb_default_internal_encoding(mrb);
-
- if (resenc == NULL) resenc = mrb_default_external_encoding(mrb);
- sym = mrb_str_new_cstr(mrb, mrb_sym2name(mrb, id));//mrb_id2str(id);
- enc = STR_ENC_GET(mrb, sym);
- ptr = RSTRING_PTR(sym);
- len = RSTRING_LEN(sym);
- if ((resenc != enc && !mrb_str_is_ascii_only_p(mrb, sym)) || len != (long)strlen(ptr) ||
- !mrb_enc_symname_p(ptr, enc) || !sym_printable(mrb, ptr, ptr + len, enc)) {
- str = mrb_str_inspect(mrb, sym);
- len = RSTRING_LEN(str);
- mrb_str_resize(mrb, str, len + 1);
- dest = RSTRING_PTR(str);
- memmove(dest + 1, dest, len);
- dest[0] = ':';
- }
- else {
- char *dest;
- str = mrb_enc_str_new(mrb, 0, len + 1, enc);
- dest = RSTRING_PTR(str);
- dest[0] = ':';
- memcpy(dest + 1, ptr, len);
- }
- return str;
-#else
mrb_value str;
const char *name;
mrb_sym id = SYM2ID(sym);
@@ -229,12 +294,11 @@ sym_inspect(mrb_state *mrb, mrb_value sym)
str = mrb_str_new(mrb, 0, strlen(name)+1);
RSTRING(str)->buf[0] = ':';
strcpy(RSTRING(str)->buf+1, name);
- if (!mrb_symname_p(name)) {
+ if (!symname_p(name)) {
str = mrb_str_dump(mrb, str);
strncpy(RSTRING(str)->buf, ":\"", 2);
}
return str;
-#endif
}
diff --git a/src/transcode.c b/src/transcode.c
deleted file mode 100644
index d9f0ce896..000000000
--- a/src/transcode.c
+++ /dev/null
@@ -1,4386 +0,0 @@
-/**********************************************************************
-
- transcode.c -
-
- $Author: usa $
- created at: Tue Oct 30 16:10:22 JST 2007
-
- Copyright (C) 2007 Martin Duerst
-
-**********************************************************************/
-
-#include "mruby.h"
-#ifdef INCLUDE_ENCODING
-#include "encoding.h"
-#include <sys/types.h> /* for ssize_t */
-#ifdef _MSC_VER
-typedef int ssize_t;
-#endif
-#include "transcode_data.h"
-#include <ctype.h>
-#include "st.h"
-#include "mruby/variable.h"
-#include <string.h>
-#include "mruby/string.h"
-#include "mruby/array.h"
-#include "mruby/hash.h"
-#include "error.h"
-#include "mruby/numeric.h"
-//#include "mio.h"
-#include <stdio.h>
-
-
-#define TYPE(o) (o).tt//mrb_type(o)
-
-#define E_CONVERTERNOTFOUND_ERROR (mrb_class_obj_get(mrb, "ConverterNotFoundError"))
-#define E_INVALIDBYTESEQUENCE_ERROR (mrb_class_obj_get(mrb, "InvalidByteSequenceError"))
-#define E_UNDEFINEDCONVERSION_ERROR (mrb_class_obj_get(mrb, "UndefinedConversionError"))
-
-/* mrb_value mrb_cEncoding = rb_define_class("Encoding", rb_cObject); */
-mrb_value rb_eUndefinedConversionError;
-mrb_value mrb_eInvalidByteSequenceError;
-mrb_value rb_eConverterNotFoundError;
-
-mrb_value mrb_cEncodingConverter;
-
-static mrb_value sym_invalid, sym_undef, sym_replace, sym_fallback;
-static mrb_value sym_xml, sym_text, sym_attr;
-static mrb_value sym_universal_newline;
-static mrb_value sym_crlf_newline;
-static mrb_value sym_cr_newline;
-static mrb_value sym_partial_input;
-
-static mrb_value sym_invalid_byte_sequence;
-static mrb_value sym_undefined_conversion;
-static mrb_value sym_destination_buffer_full;
-static mrb_value sym_source_buffer_empty;
-static mrb_value sym_finished;
-static mrb_value sym_after_output;
-static mrb_value sym_incomplete_input;
-
-static unsigned char *
-allocate_converted_string(mrb_state *mrb,
- const char *sname, const char *dname,
- const unsigned char *str, size_t len,
- unsigned char *caller_dst_buf, size_t caller_dst_bufsize,
- size_t *dst_len_ptr);
-
-union mrb_transcoding_state_t { /* opaque data for stateful encoding */
- void *ptr;
- char ary[sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*)];
- double dummy_for_alignment;
-};
-
-/* dynamic structure, one per conversion (similar to iconv_t) */
-/* may carry conversion state (e.g. for iso-2022-jp) */
-typedef struct mrb_transcoding {
- const mrb_transcoder *transcoder;
-
- int flags;
-
- int resume_position;
- unsigned int next_table;
- mrb_value next_info;
- unsigned char next_byte;
- unsigned int output_index;
-
- ssize_t recognized_len; /* already interpreted */
- ssize_t readagain_len; /* not yet interpreted */
- union {
- unsigned char ary[8]; /* max_input <= sizeof(ary) */
- unsigned char *ptr; /* length: max_input */
- } readbuf; /* recognized_len + readagain_len used */
-
- ssize_t writebuf_off;
- ssize_t writebuf_len;
- union {
- unsigned char ary[8]; /* max_output <= sizeof(ary) */
- unsigned char *ptr; /* length: max_output */
- } writebuf;
-
- union mrb_transcoding_state_t state;
-} mrb_transcoding;
-#define TRANSCODING_READBUF(tc) \
- ((tc)->transcoder->max_input <= (int)sizeof((tc)->readbuf.ary) ? \
- (tc)->readbuf.ary : \
- (tc)->readbuf.ptr)
-#define TRANSCODING_WRITEBUF(tc) \
- ((tc)->transcoder->max_output <= (int)sizeof((tc)->writebuf.ary) ? \
- (tc)->writebuf.ary : \
- (tc)->writebuf.ptr)
-#define TRANSCODING_WRITEBUF_SIZE(tc) \
- ((tc)->transcoder->max_output <= (int)sizeof((tc)->writebuf.ary) ? \
- sizeof((tc)->writebuf.ary) : \
- (size_t)(tc)->transcoder->max_output)
-#define TRANSCODING_STATE_EMBED_MAX ((int)sizeof(union mrb_transcoding_state_t))
-#define TRANSCODING_STATE(tc) \
- ((tc)->transcoder->state_size <= (int)sizeof((tc)->state) ? \
- (tc)->state.ary : \
- (tc)->state.ptr)
-
-typedef struct {
- struct mrb_transcoding *tc;
- unsigned char *out_buf_start;
- unsigned char *out_data_start;
- unsigned char *out_data_end;
- unsigned char *out_buf_end;
- mrb_econv_result_t last_result;
-} mrb_econv_elem_t;
-
-struct mrb_econv_t {
- int flags;
- const char *source_encoding_name;
- const char *destination_encoding_name;
-
- int started;
-
- const unsigned char *replacement_str;
- size_t replacement_len;
- const char *replacement_enc;
- int replacement_allocated;
-
- unsigned char *in_buf_start;
- unsigned char *in_data_start;
- unsigned char *in_data_end;
- unsigned char *in_buf_end;
- mrb_econv_elem_t *elems;
- int num_allocated;
- int num_trans;
- int num_finished;
- struct mrb_transcoding *last_tc;
-
- /* last error */
- struct {
- mrb_econv_result_t result;
- struct mrb_transcoding *error_tc;
- const char *source_encoding;
- const char *destination_encoding;
- const unsigned char *error_bytes_start;
- size_t error_bytes_len;
- size_t readagain_len;
- } last_error;
-
- /* The following fields are only for Encoding::Converter.
- * mrb_econv_open set them NULL. */
- mrb_encoding *source_encoding;
- mrb_encoding *destination_encoding;
-};
-
-/*
- * Dispatch data and logic
- */
-
-#define DECORATOR_P(sname, dname) (*(sname) == '\0')
-
-typedef struct {
- const char *sname;
- const char *dname;
- const char *lib; /* null means means no need to load a library */
- const mrb_transcoder *transcoder;
-} transcoder_entry_t;
-
-static st_table *transcoder_table;
-
-static transcoder_entry_t *
-make_transcoder_entry(const char *sname, const char *dname)
-{
- st_data_t val;
- st_table *table2;
-
- if (!st_lookup(transcoder_table, (st_data_t)sname, &val)) {
- val = (st_data_t)st_init_strcasetable();
- st_add_direct(transcoder_table, (st_data_t)sname, val);
- }
- table2 = (st_table*)val;
- if (!st_lookup(table2, (st_data_t)dname, &val)) {
- transcoder_entry_t *entry = malloc(sizeof(transcoder_entry_t));
- entry->sname = sname;
- entry->dname = dname;
- entry->lib = NULL;
- entry->transcoder = NULL;
- val = (st_data_t)entry;
- st_add_direct(table2, (st_data_t)dname, val);
- }
- return (transcoder_entry_t*)val;
-}
-
-static transcoder_entry_t *
-get_transcoder_entry(const char *sname, const char *dname)
-{
- st_data_t val;
- st_table *table2;
-
- if (!st_lookup(transcoder_table, (st_data_t)sname, &val)) {
- return NULL;
- }
- table2 = (st_table*)val;
- if (!st_lookup(table2, (st_data_t)dname, &val)) {
- return NULL;
- }
- return (transcoder_entry_t*)val;
-}
-
-void
-mrb_register_transcoder(mrb_state *mrb, const mrb_transcoder *tr)
-{
- const char *const sname = tr->src_encoding;
- const char *const dname = tr->dst_encoding;
-
- transcoder_entry_t *entry;
-
- entry = make_transcoder_entry(sname, dname);
- if (entry->transcoder) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "transcoder from %s to %s has been already registered",
- sname, dname);
- }
-
- entry->transcoder = tr;
-}
-
-static void
-declare_transcoder(const char *sname, const char *dname, const char *lib)
-{
- transcoder_entry_t *entry;
-
- entry = make_transcoder_entry(sname, dname);
- entry->lib = lib;
-}
-
-#define MAX_TRANSCODER_LIBNAME_LEN 64
-static const char transcoder_lib_prefix[] = "enc/trans/";
-
-void
-mrb_declare_transcoder(mrb_state *mrb, const char *enc1, const char *enc2, const char *lib)
-{
- if (!lib || strlen(lib) > MAX_TRANSCODER_LIBNAME_LEN) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid library name - %s",
- lib ? lib : "(null)");
- }
- declare_transcoder(enc1, enc2, lib);
-}
-
-#define encoding_equal(enc1, enc2) (STRCASECMP(enc1, enc2) == 0)
-
-typedef struct search_path_queue_tag {
- struct search_path_queue_tag *next;
- const char *enc;
-} search_path_queue_t;
-
-typedef struct {
- st_table *visited;
- search_path_queue_t *queue;
- search_path_queue_t **queue_last_ptr;
- const char *base_enc;
-} search_path_bfs_t;
-
-static enum st_retval
-transcode_search_path_i(st_data_t key, st_data_t val, st_data_t arg)
-{
- const char *dname = (const char*)key;
- search_path_bfs_t *bfs = (search_path_bfs_t*)arg;
- search_path_queue_t *q;
-
- if (st_lookup(bfs->visited, (st_data_t)dname, &val)) {
- return ST_CONTINUE;
- }
-
- q = malloc(sizeof(search_path_queue_t));
- q->enc = dname;
- q->next = NULL;
- *bfs->queue_last_ptr = q;
- bfs->queue_last_ptr = &q->next;
-
- st_add_direct(bfs->visited, (st_data_t)dname, (st_data_t)bfs->base_enc);
- return ST_CONTINUE;
-}
-
-static int
-transcode_search_path(mrb_state *mrb, const char *sname, const char *dname,
- void (*callback)(mrb_state *mrb, const char *sname, const char *dname, int depth, void *arg),
- void *arg)
-{
- search_path_bfs_t bfs;
- search_path_queue_t *q;
- st_data_t val;
- st_table *table2;
- int found;
- int pathlen = -1;
-
- if (encoding_equal(sname, dname))
- return -1;
-
- q = malloc(sizeof(search_path_queue_t));//ALLOC(search_path_queue_t);
- q->enc = sname;
- q->next = NULL;
- bfs.queue_last_ptr = &q->next;
- bfs.queue = q;
-
- bfs.visited = st_init_strcasetable();
- st_add_direct(bfs.visited, (st_data_t)sname, (st_data_t)NULL);
-
- while (bfs.queue) {
- q = bfs.queue;
- bfs.queue = q->next;
- if (!bfs.queue)
- bfs.queue_last_ptr = &bfs.queue;
-
- if (!st_lookup(transcoder_table, (st_data_t)q->enc, &val)) {
- xfree(q);
- continue;
- }
- table2 = (st_table*)val;
-
- if (st_lookup(table2, (st_data_t)dname, &val)) {
- st_add_direct(bfs.visited, (st_data_t)dname, (st_data_t)q->enc);
- xfree(q);
- found = 1;
- goto cleanup;
- }
-
- bfs.base_enc = q->enc;
- st_foreach(table2, transcode_search_path_i, (st_data_t)&bfs);
- bfs.base_enc = NULL;
-
- xfree(q);
- }
- found = 0;
-
- cleanup:
- while (bfs.queue) {
- q = bfs.queue;
- bfs.queue = q->next;
- xfree(q);
- }
-
- if (found) {
- const char *enc = dname;
- int depth;
- pathlen = 0;
- while (1) {
- st_lookup(bfs.visited, (st_data_t)enc, &val);
- if (!val)
- break;
- pathlen++;
- enc = (const char*)val;
- }
- depth = pathlen;
- enc = dname;
- while (1) {
- st_lookup(bfs.visited, (st_data_t)enc, &val);
- if (!val)
- break;
- callback(mrb, (const char*)val, enc, --depth, arg);
- enc = (const char*)val;
- }
- }
-
- st_free_table(bfs.visited);
-
- return pathlen; /* is -1 if not found */
-}
-
-int
-mrb_require(mrb_state *mrb, const char *fname)
-{
- //mrb_value fn = mrb_str_new2(mrb, fname);
- //OBJ_FREEZE(fn);
- //return mrb_require_safe(fn, mrb_safe_level());
- mrb_str_new2(mrb, fname);
- return 1/* OK */;
-}
-
-static const mrb_transcoder *
-load_transcoder_entry(mrb_state *mrb, transcoder_entry_t *entry)
-{
- if (entry->transcoder)
- return entry->transcoder;
-
- if (entry->lib) {
- const char *lib = entry->lib;
- size_t len = strlen(lib);
- char path[sizeof(transcoder_lib_prefix) + MAX_TRANSCODER_LIBNAME_LEN];
-
- entry->lib = NULL;
-
- if (len > MAX_TRANSCODER_LIBNAME_LEN)
- return NULL;
- memcpy(path, transcoder_lib_prefix, sizeof(transcoder_lib_prefix) - 1);
- memcpy(path + sizeof(transcoder_lib_prefix) - 1, lib, len + 1);
- if (!mrb_require(mrb, path))
- return NULL;
- }
-
- if (entry->transcoder)
- return entry->transcoder;
-
- return NULL;
-}
-
-static const char*
-get_replacement_character(const char *encname, size_t *len_ret, const char **repl_encname_ptr)
-{
- if (encoding_equal(encname, "UTF-8")) {
- *len_ret = 3;
- *repl_encname_ptr = "UTF-8";
- return "\xEF\xBF\xBD";
- }
- else {
- *len_ret = 1;
- *repl_encname_ptr = "US-ASCII";
- return "?";
- }
-}
-
-/*
- * Transcoding engine logic
- */
-
-static const unsigned char *
-transcode_char_start(mrb_transcoding *tc,
- const unsigned char *in_start,
- const unsigned char *inchar_start,
- const unsigned char *in_p,
- size_t *char_len_ptr)
-{
- const unsigned char *ptr;
- if (inchar_start - in_start < tc->recognized_len) {
- memcpy(TRANSCODING_READBUF(tc) + tc->recognized_len,
- inchar_start, in_p - inchar_start);
- ptr = TRANSCODING_READBUF(tc);
- }
- else {
- ptr = inchar_start - tc->recognized_len;
- }
- *char_len_ptr = tc->recognized_len + (in_p - inchar_start);
- return ptr;
-}
-
-static mrb_econv_result_t
-transcode_restartable0(mrb_state *mrb,
- const unsigned char **in_pos, unsigned char **out_pos,
- const unsigned char *in_stop, unsigned char *out_stop,
- mrb_transcoding *tc,
- const int opt)
-{
- const mrb_transcoder *tr = tc->transcoder;
- int unitlen = tr->input_unit_length;
- ssize_t readagain_len = 0;
-
- const unsigned char *inchar_start;
- const unsigned char *in_p;
-
- unsigned char *out_p;
-
- in_p = inchar_start = *in_pos;
-
- out_p = *out_pos;
-
-#define SUSPEND(ret, num) \
- do { \
- tc->resume_position = (num); \
- if (0 < in_p - inchar_start) \
- memmove(TRANSCODING_READBUF(tc)+tc->recognized_len, \
- inchar_start, in_p - inchar_start); \
- *in_pos = in_p; \
- *out_pos = out_p; \
- tc->recognized_len += in_p - inchar_start; \
- if (readagain_len) { \
- tc->recognized_len -= readagain_len; \
- tc->readagain_len = readagain_len; \
- } \
- return ret; \
- resume_label ## num:; \
- } while (0)
-#define SUSPEND_OBUF(num) \
- do { \
- while (out_stop - out_p < 1) { SUSPEND(econv_destination_buffer_full, num); } \
- } while (0)
-
-#define SUSPEND_AFTER_OUTPUT(num) \
- if ((opt & ECONV_AFTER_OUTPUT) && *out_pos != out_p) { \
- SUSPEND(econv_after_output, num); \
- }
-
-#define next_table (tc->next_table)
-#define next_info (tc->next_info)
-#define next_byte (tc->next_byte)
-#define writebuf_len (tc->writebuf_len)
-#define writebuf_off (tc->writebuf_off)
-
- switch (tc->resume_position) {
- case 0: break;
- case 1: goto resume_label1;
- case 2: goto resume_label2;
- case 3: goto resume_label3;
- case 4: goto resume_label4;
- case 5: goto resume_label5;
- case 6: goto resume_label6;
- case 7: goto resume_label7;
- case 8: goto resume_label8;
- case 9: goto resume_label9;
- case 10: goto resume_label10;
- case 11: goto resume_label11;
- case 12: goto resume_label12;
- case 13: goto resume_label13;
- case 14: goto resume_label14;
- case 15: goto resume_label15;
- case 16: goto resume_label16;
- case 17: goto resume_label17;
- case 18: goto resume_label18;
- case 19: goto resume_label19;
- case 20: goto resume_label20;
- case 21: goto resume_label21;
- case 22: goto resume_label22;
- case 23: goto resume_label23;
- case 24: goto resume_label24;
- case 25: goto resume_label25;
- case 26: goto resume_label26;
- case 27: goto resume_label27;
- case 28: goto resume_label28;
- case 29: goto resume_label29;
- case 30: goto resume_label30;
- case 31: goto resume_label31;
- case 32: goto resume_label32;
- case 33: goto resume_label33;
- case 34: goto resume_label34;
- default: break;
- }
-
- while (1) {
- inchar_start = in_p;
- tc->recognized_len = 0;
- next_table = tr->conv_tree_start;
-
- SUSPEND_AFTER_OUTPUT(24);
-
- if (in_stop <= in_p) {
- if (!(opt & ECONV_PARTIAL_INPUT))
- break;
- SUSPEND(econv_source_buffer_empty, 7);
- continue;
- }
-
-#define BYTE_ADDR(index) (tr->byte_array + (index))
-#define WORD_ADDR(index) (tr->word_array + INFO2WORDINDEX(index))
-#define BL_BASE BYTE_ADDR(BYTE_LOOKUP_BASE(WORD_ADDR(next_table)))
-#define BL_INFO WORD_ADDR(BYTE_LOOKUP_INFO(WORD_ADDR(next_table)))
-#define BL_MIN_BYTE (BL_BASE[0])
-#define BL_MAX_BYTE (BL_BASE[1])
-#define BL_OFFSET(byte) (BL_BASE[2+(byte)-BL_MIN_BYTE])
-#define BL_ACTION(byte) (BL_INFO[BL_OFFSET((byte))])
-
- next_byte = (unsigned char)*in_p++;
- follow_byte:
- if (next_byte < BL_MIN_BYTE || BL_MAX_BYTE < next_byte)
- next_info = mrb_fixnum_value(INVALID);
- else {
- next_info = mrb_fixnum_value(BL_ACTION(next_byte));
- }
- follow_info:
- switch (mrb_fixnum(next_info) & 0x1F) {
- case NOMAP:
- {
- {
- const unsigned char *p = inchar_start;
- writebuf_off = 0;
- while (p < in_p) {
- TRANSCODING_WRITEBUF(tc)[writebuf_off++] = (unsigned char)*p++;
- }
- }
- writebuf_len = writebuf_off;
- writebuf_off = 0;
- while (writebuf_off < writebuf_len) {
- SUSPEND_OBUF(3);
- *out_p++ = TRANSCODING_WRITEBUF(tc)[writebuf_off++];
- }
- }
- continue;
- case 0x00: case 0x04: case 0x08: case 0x0C:
- case 0x10: case 0x14: case 0x18: case 0x1C:
- SUSPEND_AFTER_OUTPUT(25);
- while (in_p >= in_stop) {
- if (!(opt & ECONV_PARTIAL_INPUT))
- goto incomplete;
- SUSPEND(econv_source_buffer_empty, 5);
- }
- next_byte = (unsigned char)*in_p++;
- next_table = (unsigned int)mrb_fixnum(next_info);
- goto follow_byte;
- case ZERObt: /* drop input */
- continue;
- case ONEbt:
- SUSPEND_OBUF(9); *out_p++ = getBT1(mrb_fixnum(next_info));
- continue;
- case TWObt:
- SUSPEND_OBUF(10); *out_p++ = getBT1(mrb_fixnum(next_info));
- SUSPEND_OBUF(21); *out_p++ = getBT2(mrb_fixnum(next_info));
- continue;
- case THREEbt:
- SUSPEND_OBUF(11); *out_p++ = getBT1(mrb_fixnum(next_info));
- SUSPEND_OBUF(15); *out_p++ = getBT2(mrb_fixnum(next_info));
- SUSPEND_OBUF(16); *out_p++ = getBT3(mrb_fixnum(next_info));
- continue;
- case FOURbt:
- SUSPEND_OBUF(12); *out_p++ = getBT0(mrb_fixnum(next_info));
- SUSPEND_OBUF(17); *out_p++ = getBT1(mrb_fixnum(next_info));
- SUSPEND_OBUF(18); *out_p++ = getBT2(mrb_fixnum(next_info));
- SUSPEND_OBUF(19); *out_p++ = getBT3(mrb_fixnum(next_info));
- continue;
- case GB4bt:
- SUSPEND_OBUF(29); *out_p++ = getGB4bt0((unsigned char)mrb_fixnum(next_info));
- SUSPEND_OBUF(30); *out_p++ = getGB4bt1((mrb_fixnum(next_info)));
- SUSPEND_OBUF(31); *out_p++ = getGB4bt2((unsigned char)mrb_fixnum(next_info));
- SUSPEND_OBUF(32); *out_p++ = getGB4bt3(mrb_fixnum(next_info));
- continue;
- case STR1:
- tc->output_index = 0;
- while (tc->output_index < STR1_LENGTH(BYTE_ADDR(STR1_BYTEINDEX(mrb_fixnum(next_info))))) {
- SUSPEND_OBUF(28); *out_p++ = BYTE_ADDR(STR1_BYTEINDEX(mrb_fixnum(next_info)))[1+tc->output_index];
- tc->output_index++;
- }
- continue;
- case FUNii:
- next_info = (*tr->func_ii)(TRANSCODING_STATE(tc), next_info);
- goto follow_info;
- case FUNsi:
- {
- const unsigned char *char_start;
- size_t char_len;
- char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
- next_info = (*tr->func_si)(TRANSCODING_STATE(tc), char_start, (size_t)char_len);
- goto follow_info;
- }
- case FUNio:
- SUSPEND_OBUF(13);
- if (tr->max_output <= out_stop - out_p)
- out_p += tr->func_io(TRANSCODING_STATE(tc),
- next_info, out_p, out_stop - out_p);
- else {
- writebuf_len = tr->func_io(TRANSCODING_STATE(tc),
- next_info,
- TRANSCODING_WRITEBUF(tc), TRANSCODING_WRITEBUF_SIZE(tc));
- writebuf_off = 0;
- while (writebuf_off < writebuf_len) {
- SUSPEND_OBUF(20);
- *out_p++ = TRANSCODING_WRITEBUF(tc)[writebuf_off++];
- }
- }
- break;
- case FUNso:
- {
- const unsigned char *char_start;
- size_t char_len;
- SUSPEND_OBUF(14);
- if (tr->max_output <= out_stop - out_p) {
- char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
- out_p += tr->func_so(TRANSCODING_STATE(tc),
- char_start, (size_t)char_len,
- out_p, out_stop - out_p);
- }
- else {
- char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
- writebuf_len = tr->func_so(TRANSCODING_STATE(tc),
- char_start, (size_t)char_len,
- TRANSCODING_WRITEBUF(tc), TRANSCODING_WRITEBUF_SIZE(tc));
- writebuf_off = 0;
- while (writebuf_off < writebuf_len) {
- SUSPEND_OBUF(22);
- *out_p++ = TRANSCODING_WRITEBUF(tc)[writebuf_off++];
- }
- }
- break;
- }
- case FUNsio:
- {
- const unsigned char *char_start;
- size_t char_len;
- SUSPEND_OBUF(33);
- if (tr->max_output <= out_stop - out_p) {
- char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
- out_p += tr->func_sio(TRANSCODING_STATE(tc),
- char_start, (size_t)char_len, next_info,
- out_p, out_stop - out_p);
- }
- else {
- char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
- writebuf_len = tr->func_sio(TRANSCODING_STATE(tc),
- char_start, (size_t)char_len, next_info,
- TRANSCODING_WRITEBUF(tc), TRANSCODING_WRITEBUF_SIZE(tc));
- writebuf_off = 0;
- while (writebuf_off < writebuf_len) {
- SUSPEND_OBUF(34);
- *out_p++ = TRANSCODING_WRITEBUF(tc)[writebuf_off++];
- }
- }
- break;
- }
- case INVALID:
- if (tc->recognized_len + (in_p - inchar_start) <= unitlen) {
- if (tc->recognized_len + (in_p - inchar_start) < unitlen)
- SUSPEND_AFTER_OUTPUT(26);
- while ((opt & ECONV_PARTIAL_INPUT) && tc->recognized_len + (in_stop - inchar_start) < unitlen) {
- in_p = in_stop;
- SUSPEND(econv_source_buffer_empty, 8);
- }
- if (tc->recognized_len + (in_stop - inchar_start) <= unitlen) {
- in_p = in_stop;
- }
- else {
- in_p = inchar_start + (unitlen - tc->recognized_len);
- }
- }
- else {
- ssize_t invalid_len; /* including the last byte which causes invalid */
- ssize_t discard_len;
- invalid_len = tc->recognized_len + (in_p - inchar_start);
- discard_len = ((invalid_len - 1) / unitlen) * unitlen;
- readagain_len = invalid_len - discard_len;
- }
- goto invalid;
- case UNDEF:
- goto undef;
- default:
- mrb_raise(mrb, mrb->eRuntimeError_class, "unknown transcoding instruction");
- }
- continue;
-
- invalid:
- SUSPEND(econv_invalid_byte_sequence, 1);
- continue;
-
- incomplete:
- SUSPEND(econv_incomplete_input, 27);
- continue;
-
- undef:
- SUSPEND(econv_undefined_conversion, 2);
- continue;
- }
-
- /* cleanup */
- if (tr->finish_func) {
- SUSPEND_OBUF(4);
- if (tr->max_output <= out_stop - out_p) {
- out_p += tr->finish_func(TRANSCODING_STATE(tc),
- out_p, out_stop - out_p);
- }
- else {
- writebuf_len = tr->finish_func(TRANSCODING_STATE(tc),
- TRANSCODING_WRITEBUF(tc), TRANSCODING_WRITEBUF_SIZE(tc));
- writebuf_off = 0;
- while (writebuf_off < writebuf_len) {
- SUSPEND_OBUF(23);
- *out_p++ = TRANSCODING_WRITEBUF(tc)[writebuf_off++];
- }
- }
- }
- while (1)
- SUSPEND(econv_finished, 6);
-#undef SUSPEND
-#undef next_table
-#undef next_info
-#undef next_byte
-#undef writebuf_len
-#undef writebuf_off
-}
-
-static mrb_econv_result_t
-transcode_restartable(mrb_state *mrb,
- const unsigned char **in_pos, unsigned char **out_pos,
- const unsigned char *in_stop, unsigned char *out_stop,
- mrb_transcoding *tc,
- const int opt)
-{
- if (tc->readagain_len) {
- unsigned char *readagain_buf = malloc(tc->readagain_len);//ALLOCA_N(unsigned char, tc->readagain_len);
- const unsigned char *readagain_pos = readagain_buf;
- const unsigned char *readagain_stop = readagain_buf + tc->readagain_len;
- mrb_econv_result_t res;
-
- memcpy(readagain_buf, TRANSCODING_READBUF(tc) + tc->recognized_len,
- tc->readagain_len);
- tc->readagain_len = 0;
- res = transcode_restartable0(mrb, &readagain_pos, out_pos, readagain_stop, out_stop, tc, opt|ECONV_PARTIAL_INPUT);
- if (res != econv_source_buffer_empty) {
- memcpy(TRANSCODING_READBUF(tc) + tc->recognized_len + tc->readagain_len,
- readagain_pos, readagain_stop - readagain_pos);
- tc->readagain_len += readagain_stop - readagain_pos;
- return res;
- }
- }
- return transcode_restartable0(mrb, in_pos, out_pos, in_stop, out_stop, tc, opt);
-}
-
-static mrb_transcoding *
-mrb_transcoding_open_by_transcoder(const mrb_transcoder *tr, int flags)
-{
- mrb_transcoding *tc;
-
- tc = malloc(sizeof(mrb_transcoding));
- tc->transcoder = tr;
- tc->flags = flags;
- if (TRANSCODING_STATE_EMBED_MAX < tr->state_size)
- tc->state.ptr = xmalloc(tr->state_size);
- if (tr->state_init_func) {
- (tr->state_init_func)(TRANSCODING_STATE(tc)); /* xxx: check return value */
- }
- tc->resume_position = 0;
- tc->recognized_len = 0;
- tc->readagain_len = 0;
- tc->writebuf_len = 0;
- tc->writebuf_off = 0;
- if ((int)sizeof(tc->readbuf.ary) < tr->max_input) {
- tc->readbuf.ptr = xmalloc(tr->max_input);
- }
- if ((int)sizeof(tc->writebuf.ary) < tr->max_output) {
- tc->writebuf.ptr = xmalloc(tr->max_output);
- }
- return tc;
-}
-
-static mrb_econv_result_t
-mrb_transcoding_convert(mrb_state *mrb, mrb_transcoding *tc,
- const unsigned char **input_ptr, const unsigned char *input_stop,
- unsigned char **output_ptr, unsigned char *output_stop,
- int flags)
-{
- return transcode_restartable(mrb,
- input_ptr, output_ptr,
- input_stop, output_stop,
- tc, flags);
-}
-
-static void
-mrb_transcoding_close(mrb_transcoding *tc)
-{
- const mrb_transcoder *tr = tc->transcoder;
- if (tr->state_fini_func) {
- (tr->state_fini_func)(TRANSCODING_STATE(tc)); /* check return value? */
- }
- if (TRANSCODING_STATE_EMBED_MAX < tr->state_size)
- xfree(tc->state.ptr);
- if ((int)sizeof(tc->readbuf.ary) < tr->max_input)
- xfree(tc->readbuf.ptr);
- if ((int)sizeof(tc->writebuf.ary) < tr->max_output)
- xfree(tc->writebuf.ptr);
- xfree(tc);
-}
-
-static size_t
-mrb_transcoding_memsize(mrb_transcoding *tc)
-{
- size_t size = sizeof(mrb_transcoding);
- const mrb_transcoder *tr = tc->transcoder;
-
- if (TRANSCODING_STATE_EMBED_MAX < tr->state_size) {
- size += tr->state_size;
- }
- if ((int)sizeof(tc->readbuf.ary) < tr->max_input) {
- size += tr->max_input;
- }
- if ((int)sizeof(tc->writebuf.ary) < tr->max_output) {
- size += tr->max_output;
- }
- return size;
-}
-
-static mrb_econv_t *
-mrb_econv_alloc(int n_hint)
-{
- mrb_econv_t *ec;
-
- if (n_hint <= 0)
- n_hint = 1;
-
- ec = malloc(sizeof(mrb_econv_t));//ALLOC(mrb_econv_t);
- ec->flags = 0;
- ec->source_encoding_name = NULL;
- ec->destination_encoding_name = NULL;
- ec->started = 0;
- ec->replacement_str = NULL;
- ec->replacement_len = 0;
- ec->replacement_enc = NULL;
- ec->replacement_allocated = 0;
- ec->in_buf_start = NULL;
- ec->in_data_start = NULL;
- ec->in_data_end = NULL;
- ec->in_buf_end = NULL;
- ec->num_allocated = n_hint;
- ec->num_trans = 0;
- ec->elems = malloc(sizeof(mrb_econv_elem_t)*ec->num_allocated);//ALLOC_N(mrb_econv_elem_t, ec->num_allocated);
- ec->num_finished = 0;
- ec->last_tc = NULL;
- ec->last_error.result = econv_source_buffer_empty;
- ec->last_error.error_tc = NULL;
- ec->last_error.source_encoding = NULL;
- ec->last_error.destination_encoding = NULL;
- ec->last_error.error_bytes_start = NULL;
- ec->last_error.error_bytes_len = 0;
- ec->last_error.readagain_len = 0;
- ec->source_encoding = NULL;
- ec->destination_encoding = NULL;
- return ec;
-}
-
-static int
-mrb_econv_add_transcoder_at(mrb_state *mrb, mrb_econv_t *ec, const mrb_transcoder *tr, int i)
-{
- int n, j;
- int bufsize = 4096;
- unsigned char *p;
-
- if (ec->num_trans == ec->num_allocated) {
- n = ec->num_allocated * 2;
- mrb_realloc(mrb, ec->elems, sizeof(mrb_econv_elem_t)*n);//REALLOC_N(ec->elems, mrb_econv_elem_t, n);
- ec->num_allocated = n;
- }
-
- p = xmalloc(bufsize);
-
- memmove(ec->elems+i+1, ec->elems+i, sizeof(mrb_econv_elem_t)*(ec->num_trans-i));
-
- ec->elems[i].tc = mrb_transcoding_open_by_transcoder(tr, 0);
- ec->elems[i].out_buf_start = p;
- ec->elems[i].out_buf_end = p + bufsize;
- ec->elems[i].out_data_start = p;
- ec->elems[i].out_data_end = p;
- ec->elems[i].last_result = econv_source_buffer_empty;
-
- ec->num_trans++;
-
- if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding))
- for (j = ec->num_trans-1; i <= j; j--) {
- mrb_transcoding *tc = ec->elems[j].tc;
- const mrb_transcoder *tr2 = tc->transcoder;
- if (!DECORATOR_P(tr2->src_encoding, tr2->dst_encoding)) {
- ec->last_tc = tc;
- break;
- }
- }
-
- return 0;
-}
-
-static mrb_econv_t *
-mrb_econv_open_by_transcoder_entries(mrb_state *mrb, int n, transcoder_entry_t **entries)
-{
- mrb_econv_t *ec;
- int i, ret;
-
- for (i = 0; i < n; i++) {
- const mrb_transcoder *tr;
- tr = load_transcoder_entry(mrb, entries[i]);
- if (!tr)
- return NULL;
- }
-
- ec = mrb_econv_alloc(n);
-
- for (i = 0; i < n; i++) {
- const mrb_transcoder *tr = load_transcoder_entry(mrb, entries[i]);
- ret = mrb_econv_add_transcoder_at(mrb, ec, tr, ec->num_trans);
- if (ret == -1) {
- mrb_econv_close(ec);
- return NULL;
- }
- }
-
- return ec;
-}
-
-struct trans_open_t {
- transcoder_entry_t **entries;
- int num_additional;
-};
-
-static void
-trans_open_i(mrb_state *mrb, const char *sname, const char *dname, int depth, void *arg)
-{
- struct trans_open_t *toarg = arg;
-
- if (!toarg->entries) {
- toarg->entries = malloc(sizeof(transcoder_entry_t*)*depth+1+toarg->num_additional);//ALLOC_N(transcoder_entry_t *, depth+1+toarg->num_additional);
- }
- toarg->entries[depth] = get_transcoder_entry(sname, dname);
-}
-
-static mrb_econv_t *
-mrb_econv_open0(mrb_state *mrb, const char *sname, const char *dname, int ecflags)
-{
- transcoder_entry_t **entries = NULL;
- int num_trans;
- mrb_econv_t *ec;
-
- mrb_encoding *senc, *denc;
- int sidx, didx;
-
- senc = NULL;
- if (*sname) {
- sidx = mrb_enc_find_index(mrb, sname);
- if (0 <= sidx) {
- senc = mrb_enc_from_index(mrb, sidx);
- }
- }
-
- denc = NULL;
- if (*dname) {
- didx = mrb_enc_find_index(mrb, dname);
- if (0 <= didx) {
- denc = mrb_enc_from_index(mrb, didx);
- }
- }
-
- if (*sname == '\0' && *dname == '\0') {
- num_trans = 0;
- entries = NULL;
- }
- else {
- struct trans_open_t toarg;
- toarg.entries = NULL;
- toarg.num_additional = 0;
- num_trans = transcode_search_path(mrb, sname, dname, trans_open_i, (void*)&toarg);
- entries = toarg.entries;
- if (num_trans < 0) {
- xfree(entries);
- return NULL;
- }
- }
-
- ec = mrb_econv_open_by_transcoder_entries(mrb, num_trans, entries);
- xfree(entries);
- if (!ec)
- return NULL;
-
- ec->flags = ecflags;
- ec->source_encoding_name = sname;
- ec->destination_encoding_name = dname;
-
- return ec;
-}
-
-#define MAX_ECFLAGS_DECORATORS 32
-
-static int
-decorator_names(int ecflags, const char **decorators_ret)
-{
- int num_decorators;
-
- if ((ecflags & ECONV_CRLF_NEWLINE_DECORATOR) &&
- (ecflags & ECONV_CR_NEWLINE_DECORATOR))
- return -1;
-
- if ((ecflags & (ECONV_CRLF_NEWLINE_DECORATOR|ECONV_CR_NEWLINE_DECORATOR)) &&
- (ecflags & ECONV_UNIVERSAL_NEWLINE_DECORATOR))
- return -1;
-
- if ((ecflags & ECONV_XML_TEXT_DECORATOR) &&
- (ecflags & ECONV_XML_ATTR_CONTENT_DECORATOR))
- return -1;
-
- num_decorators = 0;
-
- if (ecflags & ECONV_XML_TEXT_DECORATOR)
- decorators_ret[num_decorators++] = "xml_text_escape";
- if (ecflags & ECONV_XML_ATTR_CONTENT_DECORATOR)
- decorators_ret[num_decorators++] = "xml_attr_content_escape";
- if (ecflags & ECONV_XML_ATTR_QUOTE_DECORATOR)
- decorators_ret[num_decorators++] = "xml_attr_quote";
-
- if (ecflags & ECONV_CRLF_NEWLINE_DECORATOR)
- decorators_ret[num_decorators++] = "crlf_newline";
- if (ecflags & ECONV_CR_NEWLINE_DECORATOR)
- decorators_ret[num_decorators++] = "cr_newline";
- if (ecflags & ECONV_UNIVERSAL_NEWLINE_DECORATOR)
- decorators_ret[num_decorators++] = "universal_newline";
-
- return num_decorators;
-}
-
-mrb_econv_t *
-mrb_econv_open(mrb_state *mrb, const char *sname, const char *dname, int ecflags)
-{
- mrb_econv_t *ec;
- int num_decorators;
- const char *decorators[MAX_ECFLAGS_DECORATORS];
- int i;
-
- num_decorators = decorator_names(ecflags, decorators);
- if (num_decorators == -1)
- return NULL;
-
- ec = mrb_econv_open0(mrb, sname, dname, ecflags & ECONV_ERROR_HANDLER_MASK);
- if (!ec)
- return NULL;
-
- for (i = 0; i < num_decorators; i++)
- if (mrb_econv_decorate_at_last(mrb, ec, decorators[i]) == -1) {
- mrb_econv_close(ec);
- return NULL;
- }
-
- ec->flags |= ecflags & ~ECONV_ERROR_HANDLER_MASK;
-
- return ec;
-}
-
-static int
-trans_sweep(mrb_state *mrb, mrb_econv_t *ec,
- const unsigned char **input_ptr, const unsigned char *input_stop,
- unsigned char **output_ptr, unsigned char *output_stop,
- int flags,
- int start)
-{
- int should_try;
- int i, f;
-
- const unsigned char **ipp, *is, *iold;
- unsigned char **opp, *os, *oold;
- mrb_econv_result_t res;
-
- should_try = 1;
- while (should_try) {
- should_try = 0;
- for (i = start; i < ec->num_trans; i++) {
- mrb_econv_elem_t *te = &ec->elems[i];
-
- if (i == 0) {
- ipp = input_ptr;
- is = input_stop;
- }
- else {
- mrb_econv_elem_t *prev_te = &ec->elems[i-1];
- ipp = (const unsigned char **)&prev_te->out_data_start;
- is = prev_te->out_data_end;
- }
-
- if (i == ec->num_trans-1) {
- opp = output_ptr;
- os = output_stop;
- }
- else {
- if (te->out_buf_start != te->out_data_start) {
- ssize_t len = te->out_data_end - te->out_data_start;
- ssize_t off = te->out_data_start - te->out_buf_start;
- memmove(te->out_buf_start, te->out_data_start, len);
- te->out_data_start = te->out_buf_start;
- te->out_data_end -= off;
- }
- opp = &te->out_data_end;
- os = te->out_buf_end;
- }
-
- f = flags;
- if (ec->num_finished != i)
- f |= ECONV_PARTIAL_INPUT;
- if (i == 0 && (flags & ECONV_AFTER_OUTPUT)) {
- start = 1;
- flags &= ~ECONV_AFTER_OUTPUT;
- }
- if (i != 0)
- f &= ~ECONV_AFTER_OUTPUT;
- iold = *ipp;
- oold = *opp;
- te->last_result = res = mrb_transcoding_convert(mrb, te->tc, ipp, is, opp, os, f);
- if (iold != *ipp || oold != *opp)
- should_try = 1;
-
- switch (res) {
- case econv_invalid_byte_sequence:
- case econv_incomplete_input:
- case econv_undefined_conversion:
- case econv_after_output:
- return i;
-
- case econv_destination_buffer_full:
- case econv_source_buffer_empty:
- break;
-
- case econv_finished:
- ec->num_finished = i+1;
- break;
-
- default:
- mrb_bug("Internal Error: invalid return value from mrb_transcoding_convert().");
- break;
- }
- }
- }
- return -1;
-}
-
-static mrb_econv_result_t
-mrb_trans_conv(mrb_state *mrb, mrb_econv_t *ec,
- const unsigned char **input_ptr, const unsigned char *input_stop,
- unsigned char **output_ptr, unsigned char *output_stop,
- int flags,
- int *result_position_ptr)
-{
- int i;
- int needreport_index;
- int sweep_start;
-
- unsigned char empty_buf;
- unsigned char *empty_ptr = &empty_buf;
-
- if (!input_ptr) {
- input_ptr = (const unsigned char **)&empty_ptr;
- input_stop = empty_ptr;
- }
-
- if (!output_ptr) {
- output_ptr = &empty_ptr;
- output_stop = empty_ptr;
- }
-
- if (ec->elems[0].last_result == econv_after_output)
- ec->elems[0].last_result = econv_source_buffer_empty;
-
- needreport_index = -1;
- for (i = ec->num_trans-1; 0 <= i; i--) {
- switch (ec->elems[i].last_result) {
- case econv_invalid_byte_sequence:
- case econv_incomplete_input:
- case econv_undefined_conversion:
- case econv_after_output:
- case econv_finished:
- sweep_start = i+1;
- needreport_index = i;
- goto found_needreport;
-
- case econv_destination_buffer_full:
- case econv_source_buffer_empty:
- break;
-
- default:
- mrb_bug("unexpected transcode last result");
- }
- }
-
- /* /^[sd]+$/ is confirmed. but actually /^s*d*$/. */
-
- if (ec->elems[ec->num_trans-1].last_result == econv_destination_buffer_full &&
- (flags & ECONV_AFTER_OUTPUT)) {
- mrb_econv_result_t res;
-
- res = mrb_trans_conv(mrb, ec, NULL, NULL, output_ptr, output_stop,
- (flags & ~ECONV_AFTER_OUTPUT)|ECONV_PARTIAL_INPUT,
- result_position_ptr);
-
- if (res == econv_source_buffer_empty)
- return econv_after_output;
- return res;
- }
-
- sweep_start = 0;
-
- found_needreport:
-
- do {
- needreport_index = trans_sweep(mrb, ec, input_ptr, input_stop, output_ptr, output_stop, flags, sweep_start);
- sweep_start = needreport_index + 1;
- } while (needreport_index != -1 && needreport_index != ec->num_trans-1);
-
- for (i = ec->num_trans-1; 0 <= i; i--) {
- if (ec->elems[i].last_result != econv_source_buffer_empty) {
- mrb_econv_result_t res = ec->elems[i].last_result;
- if (res == econv_invalid_byte_sequence ||
- res == econv_incomplete_input ||
- res == econv_undefined_conversion ||
- res == econv_after_output) {
- ec->elems[i].last_result = econv_source_buffer_empty;
- }
- if (result_position_ptr)
- *result_position_ptr = i;
- return res;
- }
- }
- if (result_position_ptr)
- *result_position_ptr = -1;
- return econv_source_buffer_empty;
-}
-
-static mrb_econv_result_t
-mrb_econv_convert0(mrb_state *mrb, mrb_econv_t *ec,
- const unsigned char **input_ptr, const unsigned char *input_stop,
- unsigned char **output_ptr, unsigned char *output_stop,
- int flags)
-{
- mrb_econv_result_t res;
- int result_position;
- int has_output = 0;
-
- memset(&ec->last_error, 0, sizeof(ec->last_error));
-
- if (ec->num_trans == 0) {
- size_t len;
- if (ec->in_buf_start && ec->in_data_start != ec->in_data_end) {
- if (output_stop - *output_ptr < ec->in_data_end - ec->in_data_start) {
- len = output_stop - *output_ptr;
- memcpy(*output_ptr, ec->in_data_start, len);
- *output_ptr = output_stop;
- ec->in_data_start += len;
- res = econv_destination_buffer_full;
- goto gotresult;
- }
- len = ec->in_data_end - ec->in_data_start;
- memcpy(*output_ptr, ec->in_data_start, len);
- *output_ptr += len;
- ec->in_data_start = ec->in_data_end = ec->in_buf_start;
- if (flags & ECONV_AFTER_OUTPUT) {
- res = econv_after_output;
- goto gotresult;
- }
- }
- if (output_stop - *output_ptr < input_stop - *input_ptr) {
- len = output_stop - *output_ptr;
- }
- else {
- len = input_stop - *input_ptr;
- }
- if (0 < len && (flags & ECONV_AFTER_OUTPUT)) {
- *(*output_ptr)++ = *(*input_ptr)++;
- res = econv_after_output;
- goto gotresult;
- }
- memcpy(*output_ptr, *input_ptr, len);
- *output_ptr += len;
- *input_ptr += len;
- if (*input_ptr != input_stop)
- res = econv_destination_buffer_full;
- else if (flags & ECONV_PARTIAL_INPUT)
- res = econv_source_buffer_empty;
- else
- res = econv_finished;
- goto gotresult;
- }
-
- if (ec->elems[ec->num_trans-1].out_data_start) {
- unsigned char *data_start = ec->elems[ec->num_trans-1].out_data_start;
- unsigned char *data_end = ec->elems[ec->num_trans-1].out_data_end;
- if (data_start != data_end) {
- size_t len;
- if (output_stop - *output_ptr < data_end - data_start) {
- len = output_stop - *output_ptr;
- memcpy(*output_ptr, data_start, len);
- *output_ptr = output_stop;
- ec->elems[ec->num_trans-1].out_data_start += len;
- res = econv_destination_buffer_full;
- goto gotresult;
- }
- len = data_end - data_start;
- memcpy(*output_ptr, data_start, len);
- *output_ptr += len;
- ec->elems[ec->num_trans-1].out_data_start =
- ec->elems[ec->num_trans-1].out_data_end =
- ec->elems[ec->num_trans-1].out_buf_start;
- has_output = 1;
- }
- }
-
- if (ec->in_buf_start &&
- ec->in_data_start != ec->in_data_end) {
- res = mrb_trans_conv(mrb, ec, (const unsigned char **)&ec->in_data_start, ec->in_data_end, output_ptr, output_stop,
- (flags&~ECONV_AFTER_OUTPUT)|ECONV_PARTIAL_INPUT, &result_position);
- if (res != econv_source_buffer_empty)
- goto gotresult;
- }
-
- if (has_output &&
- (flags & ECONV_AFTER_OUTPUT) &&
- *input_ptr != input_stop) {
- input_stop = *input_ptr;
- res = mrb_trans_conv(mrb, ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position);
- if (res == econv_source_buffer_empty)
- res = econv_after_output;
- }
- else if ((flags & ECONV_AFTER_OUTPUT) ||
- ec->num_trans == 1) {
- res = mrb_trans_conv(mrb, ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position);
- }
- else {
- flags |= ECONV_AFTER_OUTPUT;
- do {
- res = mrb_trans_conv(mrb, ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position);
- } while (res == econv_after_output);
- }
-
- gotresult:
- ec->last_error.result = res;
- if (res == econv_invalid_byte_sequence ||
- res == econv_incomplete_input ||
- res == econv_undefined_conversion) {
- mrb_transcoding *error_tc = ec->elems[result_position].tc;
- ec->last_error.error_tc = error_tc;
- ec->last_error.source_encoding = error_tc->transcoder->src_encoding;
- ec->last_error.destination_encoding = error_tc->transcoder->dst_encoding;
- ec->last_error.error_bytes_start = TRANSCODING_READBUF(error_tc);
- ec->last_error.error_bytes_len = error_tc->recognized_len;
- ec->last_error.readagain_len = error_tc->readagain_len;
- }
-
- return res;
-}
-
-static int output_replacement_character(mrb_state *mrb, mrb_econv_t *ec);
-
-static int
-output_hex_charref(mrb_state *mrb, mrb_econv_t *ec)
-{
- int ret;
- unsigned char utfbuf[1024];
- const unsigned char *utf;
- size_t utf_len;
- int utf_allocated = 0;
- char charef_buf[16];
- const unsigned char *p;
-
- if (encoding_equal(ec->last_error.source_encoding, "UTF-32BE")) {
- utf = ec->last_error.error_bytes_start;
- utf_len = ec->last_error.error_bytes_len;
- }
- else {
- utf = allocate_converted_string(mrb,
- ec->last_error.source_encoding, "UTF-32BE",
- ec->last_error.error_bytes_start, ec->last_error.error_bytes_len,
- utfbuf, sizeof(utfbuf),
- &utf_len);
- if (!utf)
- return -1;
- if (utf != utfbuf && utf != ec->last_error.error_bytes_start)
- utf_allocated = 1;
- }
-
- if (utf_len % 4 != 0)
- goto fail;
-
- p = utf;
- while (4 <= utf_len) {
- unsigned int u = 0;
- u += p[0] << 24;
- u += p[1] << 16;
- u += p[2] << 8;
- u += p[3];
- snprintf(charef_buf, sizeof(charef_buf), "&#x%X;", u);
-
- ret = mrb_econv_insert_output(mrb, ec, (unsigned char*)charef_buf, strlen(charef_buf), "US-ASCII");
- if (ret == -1)
- goto fail;
-
- p += 4;
- utf_len -= 4;
- }
-
- if (utf_allocated)
- xfree((void*)utf);
- return 0;
-
- fail:
- if (utf_allocated)
- xfree((void*)utf);
- return -1;
-}
-
-mrb_econv_result_t
-mrb_econv_convert(mrb_state *mrb, mrb_econv_t *ec,
- const unsigned char **input_ptr, const unsigned char *input_stop,
- unsigned char **output_ptr, unsigned char *output_stop,
- int flags)
-{
- mrb_econv_result_t ret;
-
- unsigned char empty_buf;
- unsigned char *empty_ptr = &empty_buf;
-
- ec->started = 1;
-
- if (!input_ptr) {
- input_ptr = (const unsigned char **)&empty_ptr;
- input_stop = empty_ptr;
- }
-
- if (!output_ptr) {
- output_ptr = &empty_ptr;
- output_stop = empty_ptr;
- }
-
- resume:
- ret = mrb_econv_convert0(mrb, ec, input_ptr, input_stop, output_ptr, output_stop, flags);
-
- if (ret == econv_invalid_byte_sequence ||
- ret == econv_incomplete_input) {
- /* deal with invalid byte sequence */
- /* todo: add more alternative behaviors */
- switch (ec->flags & ECONV_INVALID_MASK) {
- case ECONV_INVALID_REPLACE:
- if (output_replacement_character(mrb, ec) == 0)
- goto resume;
-
- default:
- mrb_bug("Internal error: Unhandled ECONV_INVALID_xxx.");
- break;
- }
- }
-
- if (ret == econv_undefined_conversion) {
- /* valid character in source encoding
- * but no related character(s) in destination encoding */
- /* todo: add more alternative behaviors */
- switch (ec->flags & ECONV_UNDEF_MASK) {
- case ECONV_UNDEF_REPLACE:
- if (output_replacement_character(mrb, ec) == 0)
- goto resume;
- break;
-
- case ECONV_UNDEF_HEX_CHARREF:
- if (output_hex_charref(mrb, ec) == 0)
- goto resume;
- break;
-
- default:
- mrb_bug("Internal error: Unhandled ECONV_UNDEF_xxx.");
- break;
- }
- }
-
- return ret;
-}
-
-const char *
-mrb_econv_encoding_to_insert_output(mrb_econv_t *ec)
-{
- mrb_transcoding *tc = ec->last_tc;
- const mrb_transcoder *tr;
-
- if (tc == NULL)
- return "";
-
- tr = tc->transcoder;
-
- if (tr->asciicompat_type == asciicompat_encoder)
- return tr->src_encoding;
- return tr->dst_encoding;
-}
-
-static unsigned char *
-allocate_converted_string(mrb_state *mrb,
- const char *sname, const char *dname,
- const unsigned char *str, size_t len,
- unsigned char *caller_dst_buf, size_t caller_dst_bufsize,
- size_t *dst_len_ptr)
-{
- unsigned char *dst_str;
- size_t dst_len;
- size_t dst_bufsize;
-
- mrb_econv_t *ec;
- mrb_econv_result_t res;
-
- const unsigned char *sp;
- unsigned char *dp;
-
- if (caller_dst_buf)
- dst_bufsize = caller_dst_bufsize;
- else if (len == 0)
- dst_bufsize = 1;
- else
- dst_bufsize = len;
-
- ec = mrb_econv_open(mrb, sname, dname, 0);
- if (ec == NULL)
- return NULL;
- if (caller_dst_buf)
- dst_str = caller_dst_buf;
- else
- dst_str = xmalloc(dst_bufsize);
- dst_len = 0;
- sp = str;
- dp = dst_str+dst_len;
- res = mrb_econv_convert(mrb, ec, &sp, str+len, &dp, dst_str+dst_bufsize, 0);
- dst_len = dp - dst_str;
- while (res == econv_destination_buffer_full) {
- if (SIZE_MAX/2 < dst_bufsize) {
- goto fail;
- }
- dst_bufsize *= 2;
- if (dst_str == caller_dst_buf) {
- unsigned char *tmp;
- tmp = xmalloc(dst_bufsize);
- memcpy(tmp, dst_str, dst_bufsize/2);
- dst_str = tmp;
- }
- else {
- dst_str = xrealloc(dst_str, dst_bufsize);
- }
- dp = dst_str+dst_len;
- res = mrb_econv_convert(mrb, ec, &sp, str+len, &dp, dst_str+dst_bufsize, 0);
- dst_len = dp - dst_str;
- }
- if (res != econv_finished) {
- goto fail;
- }
- mrb_econv_close(ec);
- *dst_len_ptr = dst_len;
- return dst_str;
-
- fail:
- if (dst_str != caller_dst_buf)
- xfree(dst_str);
- mrb_econv_close(ec);
- return NULL;
-}
-
-/* result: 0:success -1:failure */
-int
-mrb_econv_insert_output(mrb_state *mrb, mrb_econv_t *ec,
- const unsigned char *str, size_t len, const char *str_encoding)
-{
- const char *insert_encoding = mrb_econv_encoding_to_insert_output(ec);
- unsigned char insert_buf[4096];
- const unsigned char *insert_str = NULL;
- size_t insert_len;
-
- int last_trans_index;
- mrb_transcoding *tc;
-
- unsigned char **buf_start_p;
- unsigned char **data_start_p;
- unsigned char **data_end_p;
- unsigned char **buf_end_p;
-
- size_t need;
-
- ec->started = 1;
-
- if (len == 0)
- return 0;
-
- if (encoding_equal(insert_encoding, str_encoding)) {
- insert_str = str;
- insert_len = len;
- }
- else {
- insert_str = allocate_converted_string(mrb, str_encoding, insert_encoding,
- str, len, insert_buf, sizeof(insert_buf), &insert_len);
- if (insert_str == NULL)
- return -1;
- }
-
- need = insert_len;
-
- last_trans_index = ec->num_trans-1;
- if (ec->num_trans == 0) {
- tc = NULL;
- buf_start_p = &ec->in_buf_start;
- data_start_p = &ec->in_data_start;
- data_end_p = &ec->in_data_end;
- buf_end_p = &ec->in_buf_end;
- }
- else if (ec->elems[last_trans_index].tc->transcoder->asciicompat_type == asciicompat_encoder) {
- tc = ec->elems[last_trans_index].tc;
- need += tc->readagain_len;
- if (need < insert_len)
- goto fail;
- if (last_trans_index == 0) {
- buf_start_p = &ec->in_buf_start;
- data_start_p = &ec->in_data_start;
- data_end_p = &ec->in_data_end;
- buf_end_p = &ec->in_buf_end;
- }
- else {
- mrb_econv_elem_t *ee = &ec->elems[last_trans_index-1];
- buf_start_p = &ee->out_buf_start;
- data_start_p = &ee->out_data_start;
- data_end_p = &ee->out_data_end;
- buf_end_p = &ee->out_buf_end;
- }
- }
- else {
- mrb_econv_elem_t *ee = &ec->elems[last_trans_index];
- buf_start_p = &ee->out_buf_start;
- data_start_p = &ee->out_data_start;
- data_end_p = &ee->out_data_end;
- buf_end_p = &ee->out_buf_end;
- tc = ec->elems[last_trans_index].tc;
- }
-
- if (*buf_start_p == NULL) {
- unsigned char *buf = xmalloc(need);
- *buf_start_p = buf;
- *data_start_p = buf;
- *data_end_p = buf;
- *buf_end_p = buf+need;
- }
- else if ((size_t)(*buf_end_p - *data_end_p) < need) {
- memmove(*buf_start_p, *data_start_p, *data_end_p - *data_start_p);
- *data_end_p = *buf_start_p + (*data_end_p - *data_start_p);
- *data_start_p = *buf_start_p;
- if ((size_t)(*buf_end_p - *data_end_p) < need) {
- unsigned char *buf;
- size_t s = (*data_end_p - *buf_start_p) + need;
- if (s < need)
- goto fail;
- buf = xrealloc(*buf_start_p, s);
- *data_start_p = buf;
- *data_end_p = buf + (*data_end_p - *buf_start_p);
- *buf_start_p = buf;
- *buf_end_p = buf + s;
- }
- }
-
- memcpy(*data_end_p, insert_str, insert_len);
- *data_end_p += insert_len;
- if (tc && tc->transcoder->asciicompat_type == asciicompat_encoder) {
- memcpy(*data_end_p, TRANSCODING_READBUF(tc)+tc->recognized_len, tc->readagain_len);
- *data_end_p += tc->readagain_len;
- tc->readagain_len = 0;
- }
-
- if (insert_str != str && insert_str != insert_buf)
- xfree((void*)insert_str);
- return 0;
-
- fail:
- if (insert_str != str && insert_str != insert_buf)
- xfree((void*)insert_str);
- return -1;
-}
-
-void
-mrb_econv_close(mrb_econv_t *ec)
-{
- int i;
-
- if (ec->replacement_allocated) {
- xfree((void*)ec->replacement_str);
- }
- for (i = 0; i < ec->num_trans; i++) {
- mrb_transcoding_close(ec->elems[i].tc);
- if (ec->elems[i].out_buf_start)
- xfree(ec->elems[i].out_buf_start);
- }
- xfree(ec->in_buf_start);
- xfree(ec->elems);
- xfree(ec);
-}
-
-size_t
-mrb_econv_memsize(mrb_econv_t *ec)
-{
- size_t size = sizeof(mrb_econv_t);
- int i;
-
- if (ec->replacement_allocated) {
- size += ec->replacement_len;
- }
- for (i = 0; i < ec->num_trans; i++) {
- size += mrb_transcoding_memsize(ec->elems[i].tc);
-
- if (ec->elems[i].out_buf_start) {
- size += ec->elems[i].out_buf_end - ec->elems[i].out_buf_start;
- }
- }
- size += ec->in_buf_end - ec->in_buf_start;
- size += sizeof(mrb_econv_elem_t) * ec->num_allocated;
-
- return size;
-}
-
-int
-mrb_econv_putbackable(mrb_econv_t *ec)
-{
- if (ec->num_trans == 0)
- return 0;
- if (sizeof(size_t) > sizeof(int)) {
- if (ec->elems[0].tc->readagain_len > INT_MAX) return INT_MAX;
- }
- return (int)ec->elems[0].tc->readagain_len;
-}
-
-void
-mrb_econv_putback(mrb_econv_t *ec, unsigned char *p, int n)
-{
- mrb_transcoding *tc;
- if (ec->num_trans == 0 || n == 0)
- return;
- tc = ec->elems[0].tc;
- memcpy(p, TRANSCODING_READBUF(tc) + tc->recognized_len + tc->readagain_len - n, n);
- tc->readagain_len -= n;
-}
-
-struct asciicompat_encoding_t {
- const char *ascii_compat_name;
- const char *ascii_incompat_name;
-};
-
-static enum st_retval
-asciicompat_encoding_i(mrb_state *mrb, st_data_t key, st_data_t val, st_data_t arg)
-{
- struct asciicompat_encoding_t *data = (struct asciicompat_encoding_t*)arg;
- transcoder_entry_t *entry = (transcoder_entry_t*)val;
- const mrb_transcoder *tr;
-
- if (DECORATOR_P(entry->sname, entry->dname))
- return ST_CONTINUE;
- tr = load_transcoder_entry(mrb, entry);
- if (tr && tr->asciicompat_type == asciicompat_decoder) {
- data->ascii_compat_name = tr->dst_encoding;
- return ST_STOP;
- }
- return ST_CONTINUE;
-}
-
-const char *
-mrb_econv_asciicompat_encoding(const char *ascii_incompat_name)
-{
- st_data_t v;
- st_table *table2;
- struct asciicompat_encoding_t data;
-
- if (!st_lookup(transcoder_table, (st_data_t)ascii_incompat_name, &v))
- return NULL;
- table2 = (st_table*)v;
-
- /*
- * Assumption:
- * There is at most one transcoder for
- * converting from ASCII incompatible encoding.
- *
- * For ISO-2022-JP, there is ISO-2022-JP -> stateless-ISO-2022-JP and no others.
- */
- if (table2->num_entries != 1)
- return NULL;
-
- data.ascii_incompat_name = ascii_incompat_name;
- data.ascii_compat_name = NULL;
- st_foreach(table2, asciicompat_encoding_i, (st_data_t)&data);
- return data.ascii_compat_name;
-}
-
-mrb_value
-mrb_econv_substr_append(mrb_state *mrb, mrb_econv_t *ec, mrb_value src, long off, long len, mrb_value dst, int flags)
-{
- unsigned const char *ss, *sp, *se;
- unsigned char *ds, *dp, *de;
- mrb_econv_result_t res;
- int max_output;
-
- if (mrb_nil_p(dst)) {
- dst = mrb_str_buf_new(mrb, len);
- if (ec->destination_encoding)
- mrb_enc_associate(mrb, dst, ec->destination_encoding);
- }
-
- if (ec->last_tc)
- max_output = ec->last_tc->transcoder->max_output;
- else
- max_output = 1;
-
- res = econv_destination_buffer_full;
- while (res == econv_destination_buffer_full) {
- long dlen = RSTRING_LEN(dst);
- if (mrb_str_capacity(dst) - dlen < (size_t)len + max_output) {
- unsigned long new_capa = (unsigned long)dlen + len + max_output;
- if (LONG_MAX < new_capa)
- mrb_raise(mrb, E_ARGUMENT_ERROR, "too long string");
- mrb_str_resize(mrb, dst, new_capa);
- mrb_str_set_len(mrb, dst, dlen);
- }
- ss = sp = (const unsigned char*)RSTRING_PTR(src) + off;
- se = ss + len;
- ds = (unsigned char*)RSTRING_PTR(dst);
- de = ds + mrb_str_capacity(dst);
- dp = ds += dlen;
- res = mrb_econv_convert(mrb, ec, &sp, se, &dp, de, flags);
- off += sp - ss;
- len -= sp - ss;
- mrb_str_set_len(mrb, dst, dlen + (dp - ds));
- mrb_econv_check_error(mrb, ec);
- }
-
- return dst;
-}
-
-mrb_value
-mrb_econv_str_append(mrb_state *mrb, mrb_econv_t *ec, mrb_value src, mrb_value dst, int flags)
-{
- return mrb_econv_substr_append(mrb, ec, src, 0, RSTRING_LEN(src), dst, flags);
-}
-
-mrb_value
-mrb_econv_substr_convert(mrb_state *mrb, mrb_econv_t *ec, mrb_value src, long byteoff, long bytesize, int flags)
-{
- return mrb_econv_substr_append(mrb, ec, src, byteoff, bytesize, mrb_nil_value(), flags);
-}
-
-mrb_value
-mrb_econv_str_convert(mrb_state *mrb, mrb_econv_t *ec, mrb_value src, int flags)
-{
- return mrb_econv_substr_append(mrb, ec, src, 0, RSTRING_LEN(src), mrb_nil_value(), flags);
-}
-
-static int
-mrb_econv_add_converter(mrb_state *mrb, mrb_econv_t *ec, const char *sname, const char *dname, int n)
-{
- transcoder_entry_t *entry;
- const mrb_transcoder *tr;
-
- if (ec->started != 0)
- return -1;
-
- entry = get_transcoder_entry(sname, dname);
- if (!entry)
- return -1;
-
- tr = load_transcoder_entry(mrb, entry);
-
- return mrb_econv_add_transcoder_at(mrb, ec, tr, n);
-}
-
-static int
-mrb_econv_decorate_at(mrb_state *mrb, mrb_econv_t *ec, const char *decorator_name, int n)
-{
- return mrb_econv_add_converter(mrb, ec, "", decorator_name, n);
-}
-
-int
-mrb_econv_decorate_at_first(mrb_state *mrb, mrb_econv_t *ec, const char *decorator_name)
-{
- const mrb_transcoder *tr;
-
- if (ec->num_trans == 0)
- return mrb_econv_decorate_at(mrb, ec, decorator_name, 0);
-
- tr = ec->elems[0].tc->transcoder;
-
- if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding) &&
- tr->asciicompat_type == asciicompat_decoder)
- return mrb_econv_decorate_at(mrb, ec, decorator_name, 1);
-
- return mrb_econv_decorate_at(mrb, ec, decorator_name, 0);
-}
-
-int
-mrb_econv_decorate_at_last(mrb_state *mrb, mrb_econv_t *ec, const char *decorator_name)
-{
- const mrb_transcoder *tr;
-
- if (ec->num_trans == 0)
- return mrb_econv_decorate_at(mrb, ec, decorator_name, 0);
-
- tr = ec->elems[ec->num_trans-1].tc->transcoder;
-
- if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding) &&
- tr->asciicompat_type == asciicompat_encoder)
- return mrb_econv_decorate_at(mrb, ec, decorator_name, ec->num_trans-1);
-
- return mrb_econv_decorate_at(mrb, ec, decorator_name, ec->num_trans);
-}
-
-void
-mrb_econv_binmode(mrb_econv_t *ec)
-{
- const mrb_transcoder *trs[3];
- int n, i, j;
- transcoder_entry_t *entry;
- int num_trans;
-
- n = 0;
- if (ec->flags & ECONV_UNIVERSAL_NEWLINE_DECORATOR) {
- entry = get_transcoder_entry("", "universal_newline");
- if (entry->transcoder)
- trs[n++] = entry->transcoder;
- }
- if (ec->flags & ECONV_CRLF_NEWLINE_DECORATOR) {
- entry = get_transcoder_entry("", "crlf_newline");
- if (entry->transcoder)
- trs[n++] = entry->transcoder;
- }
- if (ec->flags & ECONV_CR_NEWLINE_DECORATOR) {
- entry = get_transcoder_entry("", "cr_newline");
- if (entry->transcoder)
- trs[n++] = entry->transcoder;
- }
-
- num_trans = ec->num_trans;
- j = 0;
- for (i = 0; i < num_trans; i++) {
- int k;
- for (k = 0; k < n; k++)
- if (trs[k] == ec->elems[i].tc->transcoder)
- break;
- if (k == n) {
- ec->elems[j] = ec->elems[i];
- j++;
- }
- else {
- mrb_transcoding_close(ec->elems[i].tc);
- xfree(ec->elems[i].out_buf_start);
- ec->num_trans--;
- }
- }
-
- ec->flags &= ~(ECONV_UNIVERSAL_NEWLINE_DECORATOR|ECONV_CRLF_NEWLINE_DECORATOR|ECONV_CR_NEWLINE_DECORATOR);
-
-}
-
-static mrb_value
-econv_description(mrb_state *mrb, const char *sname, const char *dname, int ecflags, mrb_value mesg)
-{
- int has_description = 0;
-
- if (mrb_nil_p(mesg))
- mesg = mrb_str_new(mrb, NULL, 0);
-
- if (*sname != '\0' || *dname != '\0') {
- if (*sname == '\0')
- mrb_str_cat2(mrb, mesg, dname);
- else if (*dname == '\0')
- mrb_str_cat2(mrb, mesg, sname);
- else
- mrb_str_catf(mrb, mesg, "%s to %s", sname, dname);
- has_description = 1;
- }
-
- if (ecflags & (ECONV_UNIVERSAL_NEWLINE_DECORATOR|
- ECONV_CRLF_NEWLINE_DECORATOR|
- ECONV_CR_NEWLINE_DECORATOR|
- ECONV_XML_TEXT_DECORATOR|
- ECONV_XML_ATTR_CONTENT_DECORATOR|
- ECONV_XML_ATTR_QUOTE_DECORATOR)) {
- const char *pre = "";
- if (has_description)
- mrb_str_cat2(mrb, mesg, " with ");
- if (ecflags & ECONV_UNIVERSAL_NEWLINE_DECORATOR) {
- mrb_str_cat2(mrb, mesg, pre); pre = ",";
- mrb_str_cat2(mrb, mesg, "universal_newline");
- }
- if (ecflags & ECONV_CRLF_NEWLINE_DECORATOR) {
- mrb_str_cat2(mrb, mesg, pre); pre = ",";
- mrb_str_cat2(mrb, mesg, "crlf_newline");
- }
- if (ecflags & ECONV_CR_NEWLINE_DECORATOR) {
- mrb_str_cat2(mrb, mesg, pre); pre = ",";
- mrb_str_cat2(mrb, mesg, "cr_newline");
- }
- if (ecflags & ECONV_XML_TEXT_DECORATOR) {
- mrb_str_cat2(mrb, mesg, pre); pre = ",";
- mrb_str_cat2(mrb, mesg, "xml_text");
- }
- if (ecflags & ECONV_XML_ATTR_CONTENT_DECORATOR) {
- mrb_str_cat2(mrb, mesg, pre); pre = ",";
- mrb_str_cat2(mrb, mesg, "xml_attr_content");
- }
- if (ecflags & ECONV_XML_ATTR_QUOTE_DECORATOR) {
- mrb_str_cat2(mrb, mesg, pre); pre = ",";
- mrb_str_cat2(mrb, mesg, "xml_attr_quote");
- }
- has_description = 1;
- }
- if (!has_description) {
- mrb_str_cat2(mrb, mesg, "no-conversion");
- }
-
- return mesg;
-}
-
-mrb_value
-mrb_econv_open_exc(mrb_state *mrb, const char *sname, const char *dname, int ecflags)
-{
- mrb_value mesg, exc;
- mesg = mrb_str_new_cstr(mrb, "code converter not found (");
- econv_description(mrb, sname, dname, ecflags, mesg);
- mrb_str_cat2(mrb, mesg, ")");
- exc = mrb_exc_new3(mrb, E_CONVERTERNOTFOUND_ERROR, mesg);
- return exc;
-}
-
-static mrb_value
-make_econv_exception(mrb_state *mrb, mrb_econv_t *ec)
-{
- mrb_value mesg, exc;
- if (ec->last_error.result == econv_invalid_byte_sequence ||
- ec->last_error.result == econv_incomplete_input) {
- {
- const char *err = (const char*)ec->last_error.error_bytes_start;
- size_t error_len = ec->last_error.error_bytes_len;
- mrb_value bytes = mrb_str_new(mrb, err, error_len);
- mrb_value dumped = mrb_str_dump(mrb, bytes);
- size_t readagain_len = ec->last_error.readagain_len;
- mrb_value bytes2 = mrb_nil_value();
- mrb_value dumped2;
- if (ec->last_error.result == econv_incomplete_input) {
- mesg = mrb_sprintf(mrb, "incomplete %s on %s",
- //StringValueCStr(dumped),
- mrb_string_value_cstr(mrb, &dumped),
- ec->last_error.source_encoding);
- }
- else if (readagain_len) {
- bytes2 = mrb_str_new(mrb, err+error_len, readagain_len);
- dumped2 = mrb_str_dump(mrb, bytes2);
- mesg = mrb_sprintf(mrb, "%s followed by %s on %s",
- //StringValueCStr(dumped),
- mrb_string_value_cstr(mrb, &dumped),
- //StringValueCStr(dumped2),
- mrb_string_value_cstr(mrb, &dumped2),
- ec->last_error.source_encoding);
- }
- else {
- mesg = mrb_sprintf(mrb, "%s on %s",
- //StringValueCStr(dumped),
- mrb_string_value_cstr(mrb, &dumped),
- ec->last_error.source_encoding);
- }
-
- exc = mrb_exc_new3(mrb, E_INVALIDBYTESEQUENCE_ERROR, mesg);
- mrb_iv_set(mrb, exc, mrb_intern(mrb, "error_bytes"), bytes);
- mrb_iv_set(mrb, exc, mrb_intern(mrb, "readagain_bytes"), bytes2);
- mrb_iv_set(mrb, exc, mrb_intern(mrb, "incomplete_input"), ec->last_error.result == econv_incomplete_input ? mrb_true_value() : mrb_false_value());
- }
-
-set_encs:
- mrb_iv_set(mrb, exc, mrb_intern(mrb, "source_encoding_name"), mrb_str_new2(mrb, ec->last_error.source_encoding));
- mrb_iv_set(mrb, exc, mrb_intern(mrb, "destination_encoding_name"), mrb_str_new2(mrb, ec->last_error.destination_encoding));
- {
- int idx = mrb_enc_find_index(mrb, ec->last_error.source_encoding);
- if (0 <= idx)
- mrb_iv_set(mrb, exc, mrb_intern(mrb, "source_encoding"), mrb_enc_from_encoding(mrb, mrb_enc_from_index(mrb, idx)));
- idx = mrb_enc_find_index(mrb, ec->last_error.destination_encoding);
- if (0 <= idx)
- mrb_iv_set(mrb, exc, mrb_intern(mrb, "destination_encoding"), mrb_enc_from_encoding(mrb, mrb_enc_from_index(mrb, idx)));
- }
- return exc;
- }
- if (ec->last_error.result == econv_undefined_conversion) {
- mrb_value bytes = mrb_str_new(mrb, (const char*)ec->last_error.error_bytes_start,
- ec->last_error.error_bytes_len);
- mrb_value dumped = mrb_nil_value();
- int idx;
- if (strcmp(ec->last_error.source_encoding, "UTF-8") == 0) {
- mrb_encoding *utf8 = mrb_utf8_encoding(mrb);
- const char *start, *end;
- int n;
- start = (const char*)ec->last_error.error_bytes_start;
- end = start + ec->last_error.error_bytes_len;
- n = mrb_enc_precise_mbclen(start, end, utf8);
- if (MBCLEN_CHARFOUND_P(n) &&
- (size_t)MBCLEN_CHARFOUND_LEN(n) == ec->last_error.error_bytes_len) {
- unsigned int cc = mrb_enc_mbc_to_codepoint(start, end, utf8);
- dumped = mrb_sprintf(mrb, "U+%04X", cc);
- }
- }
- if (mrb_obj_equal(mrb, dumped, mrb_nil_value()))
- dumped = mrb_str_dump(mrb, bytes);
- if (strcmp(ec->last_error.source_encoding,
- ec->source_encoding_name) == 0 &&
- strcmp(ec->last_error.destination_encoding,
- ec->destination_encoding_name) == 0) {
- mesg = mrb_sprintf(mrb, "%s from %s to %s",
- //StringValueCStr(dumped),
- mrb_string_value_cstr(mrb, &dumped),
- ec->last_error.source_encoding,
- ec->last_error.destination_encoding);
- }
- else {
- int i;
- mesg = mrb_sprintf(mrb, "%s to %s in conversion from %s",
- //StringValueCStr(dumped),
- mrb_string_value_cstr(mrb, &dumped),
- ec->last_error.destination_encoding,
- ec->source_encoding_name);
- for (i = 0; i < ec->num_trans; i++) {
- const mrb_transcoder *tr = ec->elems[i].tc->transcoder;
- if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding))
- mrb_str_catf(mrb, mesg, " to %s",
- ec->elems[i].tc->transcoder->dst_encoding);
- }
- }
- exc = mrb_exc_new3(mrb, E_UNDEFINEDCONVERSION_ERROR, mesg);
- idx = mrb_enc_find_index(mrb, ec->last_error.source_encoding);
- if (0 <= idx)
- mrb_enc_associate_index(mrb, bytes, idx);
- mrb_iv_set(mrb, exc, mrb_intern(mrb, "error_char"), bytes);
- goto set_encs;
- }
- return mrb_nil_value();
-}
-
-static void
-more_output_buffer(mrb_state *mrb,
- mrb_value destination,
- unsigned char *(*resize_destination)(mrb_state *, mrb_value, size_t, size_t),
- int max_output,
- unsigned char **out_start_ptr,
- unsigned char **out_pos,
- unsigned char **out_stop_ptr)
-{
- size_t len = (*out_pos - *out_start_ptr);
- size_t new_len = (len + max_output) * 2;
- *out_start_ptr = resize_destination(mrb, destination, len, new_len);
- *out_pos = *out_start_ptr + len;
- *out_stop_ptr = *out_start_ptr + new_len;
-}
-
-static int
-make_replacement(mrb_state *mrb, mrb_econv_t *ec)
-{
- mrb_transcoding *tc;
- const mrb_transcoder *tr;
- mrb_encoding *enc;
- const unsigned char *replacement;
- const char *repl_enc;
- const char *ins_enc;
- size_t len;
-
- if (ec->replacement_str)
- return 0;
-
- ins_enc = mrb_econv_encoding_to_insert_output(ec);
-
- tc = ec->last_tc;
- if (*ins_enc) {
- tr = tc->transcoder;
- enc = mrb_enc_find(mrb, tr->dst_encoding);
- replacement = (const unsigned char*)get_replacement_character(ins_enc, &len, &repl_enc);
- }
- else {
- replacement = (unsigned char*)"?";
- len = 1;
- repl_enc = "";
- }
-
- ec->replacement_str = replacement;
- ec->replacement_len = len;
- ec->replacement_enc = repl_enc;
- ec->replacement_allocated = 0;
- return 0;
-}
-
-int
-mrb_econv_set_replacement(mrb_state *mrb, mrb_econv_t *ec,
- const unsigned char *str, size_t len, const char *encname)
-{
- unsigned char *str2;
- size_t len2;
- const char *encname2;
-
- encname2 = mrb_econv_encoding_to_insert_output(ec);
-
- if (encoding_equal(encname, encname2)) {
- str2 = xmalloc(len);
- memcpy(str2, str, len); /* xxx: str may be invalid */
- len2 = len;
- encname2 = encname;
- }
- else {
- str2 = allocate_converted_string(mrb, encname, encname2, str, len, NULL, 0, &len2);
- if (!str2)
- return -1;
- }
-
- if (ec->replacement_allocated) {
- xfree((void*)ec->replacement_str);
- }
- ec->replacement_allocated = 1;
- ec->replacement_str = str2;
- ec->replacement_len = len2;
- ec->replacement_enc = encname2;
- return 0;
-}
-
-static int
-output_replacement_character(mrb_state *mrb, mrb_econv_t *ec)
-{
- int ret;
-
- if (make_replacement(mrb, ec) == -1)
- return -1;
-
- ret = mrb_econv_insert_output(mrb, ec, ec->replacement_str, ec->replacement_len, ec->replacement_enc);
- if (ret == -1)
- return -1;
-
- return 0;
-}
-
-static void
-transcode_loop(mrb_state *mrb,
- const unsigned char **in_pos, unsigned char **out_pos,
- const unsigned char *in_stop, unsigned char *out_stop,
- mrb_value destination,
- unsigned char *(*resize_destination)(mrb_state *, mrb_value, size_t, size_t),
- const char *src_encoding,
- const char *dst_encoding,
- int ecflags,
- mrb_value ecopts)
-{
- mrb_econv_t *ec;
- mrb_transcoding *last_tc;
- mrb_econv_result_t ret;
- unsigned char *out_start = *out_pos;
- int max_output;
- mrb_value exc;
- mrb_value fallback = mrb_nil_value();
- mrb_value Qundef;
- Qundef.tt = 0;
-
- ec = mrb_econv_open_opts(mrb, src_encoding, dst_encoding, ecflags, ecopts);
- if (!ec)
- mrb_exc_raise(mrb, mrb_econv_open_exc(mrb, src_encoding, dst_encoding, ecflags));
-
- if (!mrb_nil_p(ecopts) && TYPE(ecopts) == MRB_TT_HASH)
- fallback = mrb_hash_get(mrb, ecopts, sym_fallback);
- last_tc = ec->last_tc;
- max_output = last_tc ? last_tc->transcoder->max_output : 1;
-
- resume:
- ret = mrb_econv_convert(mrb, ec, in_pos, in_stop, out_pos, out_stop, 0);
-
- if (!mrb_nil_p(fallback) && ret == econv_undefined_conversion) {
- mrb_value rep = mrb_enc_str_new(mrb,
- (const char*)ec->last_error.error_bytes_start,
- ec->last_error.error_bytes_len,
- mrb_enc_find(mrb, ec->last_error.source_encoding));
- rep = mrb_hash_getWithDef(mrb, fallback, rep, Qundef);//mrb_hash_lookup2(fallback, rep, Qundef);
- if (!mrb_obj_equal(mrb, rep, Qundef)) {
- //StringValue(rep);
- mrb_string_value(mrb, &rep);
- ret = mrb_econv_insert_output(mrb, ec, (const unsigned char*)RSTRING_PTR(rep),
- RSTRING_LEN(rep), mrb_enc_name(mrb_enc_get(mrb, rep)));
- if ((int)ret == -1) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "too big fallback string");
- }
- goto resume;
- }
- }
-
- if (ret == econv_invalid_byte_sequence ||
- ret == econv_incomplete_input ||
- ret == econv_undefined_conversion) {
- exc = make_econv_exception(mrb, ec);
- mrb_econv_close(ec);
- mrb_exc_raise(mrb, exc);
- }
-
- if (ret == econv_destination_buffer_full) {
- more_output_buffer(mrb, destination, resize_destination, max_output, &out_start, out_pos, &out_stop);
- goto resume;
- }
-
- mrb_econv_close(ec);
- return;
-}
-
-/*
- * String-specific code
- */
-
-static unsigned char *
-str_transcoding_resize(mrb_state *mrb, mrb_value destination, size_t len, size_t new_len)
-{
- mrb_str_resize(mrb, destination, new_len);
- return (unsigned char*)RSTRING_PTR(destination);
-}
-
-static int
-econv_opts(mrb_state *mrb, mrb_value opt)
-{
- mrb_value v;
- int ecflags = 0;
-
- v = mrb_hash_get(mrb, opt, sym_invalid);
- if (mrb_nil_p(v)) {
- }
- else if (mrb_obj_equal(mrb, v, sym_replace)) {
- ecflags |= ECONV_INVALID_REPLACE;
- }
- else {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "unknown value for invalid character option");
- }
-
- v = mrb_hash_get(mrb, opt, sym_undef);
- if (mrb_nil_p(v)) {
- }
- else if (mrb_obj_equal(mrb, v, sym_replace)) {
- ecflags |= ECONV_UNDEF_REPLACE;
- }
- else {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "unknown value for undefined character option");
- }
-
- v = mrb_hash_get(mrb, opt, sym_replace);
- if (!mrb_nil_p(v) && !(ecflags & ECONV_INVALID_REPLACE)) {
- ecflags |= ECONV_UNDEF_REPLACE;
- }
-
- v = mrb_hash_get(mrb, opt, sym_xml);
- if (!mrb_nil_p(v)) {
- if (mrb_obj_equal(mrb, v, sym_text)) {
- ecflags |= ECONV_XML_TEXT_DECORATOR|ECONV_UNDEF_HEX_CHARREF;
- }
- else if (mrb_obj_equal(mrb, v, sym_attr)) {
- ecflags |= ECONV_XML_ATTR_CONTENT_DECORATOR|ECONV_XML_ATTR_QUOTE_DECORATOR|ECONV_UNDEF_HEX_CHARREF;
- }
- else if (TYPE(v) == MRB_TT_SYMBOL) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "unexpected value for xml option: %s", mrb_sym2name(mrb, SYM2ID(v)));
- }
- else {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "unexpected value for xml option");
- }
- }
-
- v = mrb_hash_get(mrb, opt, sym_universal_newline);
- if (RTEST(v))
- ecflags |= ECONV_UNIVERSAL_NEWLINE_DECORATOR;
-
- v = mrb_hash_get(mrb, opt, sym_crlf_newline);
- if (RTEST(v))
- ecflags |= ECONV_CRLF_NEWLINE_DECORATOR;
-
- v = mrb_hash_get(mrb, opt, sym_cr_newline);
- if (RTEST(v))
- ecflags |= ECONV_CR_NEWLINE_DECORATOR;
-
- return ecflags;
-}
-
-int
-mrb_econv_prepare_opts(mrb_state *mrb, mrb_value opthash, mrb_value *opts)
-{
- int ecflags;
- mrb_value newhash = mrb_nil_value();
- mrb_value v;
-
- if (mrb_nil_p(opthash)) {
- *opts = mrb_nil_value();
- return 0;
- }
- ecflags = econv_opts(mrb, opthash);
-
- v = mrb_hash_get(mrb, opthash, sym_replace);
- if (!mrb_nil_p(v)) {
- //StringValue(v);
- mrb_string_value(mrb, &v);
- if (mrb_enc_str_coderange(mrb, v) == ENC_CODERANGE_BROKEN) {
- mrb_value dumped = mrb_str_dump(mrb, v);
- mrb_raise(mrb, E_ARGUMENT_ERROR, "replacement string is broken: %s as %s",
- //StringValueCStr(dumped),
- mrb_string_value_cstr(mrb, &dumped),
- mrb_enc_name(mrb_enc_get(mrb, v)));
- }
- v = mrb_str_new_frozen(mrb, v);
- newhash = mrb_hash_new_capa(mrb, 0);
- mrb_hash_set(mrb, newhash, sym_replace, v);
- }
-
- v = mrb_hash_get(mrb, opthash, sym_fallback);
- if (!mrb_nil_p(v)) {
- v = mrb_convert_type(mrb, v, MRB_TT_HASH, "Hash", "to_hash");
- if (!mrb_nil_p(v)) {
- if (mrb_nil_p(newhash))
- newhash = mrb_hash_new_capa(mrb, 0);
- mrb_hash_set(mrb, newhash, sym_fallback, v);
- }
- }
-
- //if (!mrb_nil_p(newhash))
- // mrb_hash_freeze(newhash);
- *opts = newhash;
-
- return ecflags;
-}
-
-mrb_econv_t *
-mrb_econv_open_opts(mrb_state *mrb, const char *source_encoding, const char *destination_encoding, int ecflags, mrb_value opthash)
-{
- mrb_econv_t *ec;
- mrb_value replacement;
-
- if (mrb_nil_p(opthash)) {
- replacement = mrb_nil_value();
- }
- else {
- if (TYPE(opthash) != MRB_TT_HASH /*|| !OBJ_FROZEN(opthash)*/)
- mrb_bug("mrb_econv_open_opts called with invalid opthash");
- replacement = mrb_hash_get(mrb, opthash, sym_replace);
- }
-
- ec = mrb_econv_open(mrb, source_encoding, destination_encoding, ecflags);
- if (!ec)
- return ec;
-
- if (!mrb_nil_p(replacement)) {
- int ret;
- mrb_encoding *enc = mrb_enc_get(mrb, replacement);
-
- ret = mrb_econv_set_replacement(mrb, ec,
- (const unsigned char*)RSTRING_PTR(replacement),
- RSTRING_LEN(replacement),
- mrb_enc_name(enc));
- if (ret == -1) {
- mrb_econv_close(ec);
- return NULL;
- }
- }
- return ec;
-}
-
-static int
-enc_arg(mrb_state *mrb, mrb_value *arg, const char **name_p, mrb_encoding **enc_p)
-{
- mrb_encoding *enc;
- const char *n;
- int encidx;
- mrb_value encval;
-
- if (((encidx = mrb_to_encoding_index(mrb, encval = *arg)) < 0) ||
- !(enc = mrb_enc_from_index(mrb, encidx))) {
- enc = NULL;
- encidx = 0;
- //n = StringValueCStr(*arg);
- n = mrb_string_value_cstr(mrb, arg);
- }
- else {
- n = mrb_enc_name(enc);
- }
-
- *name_p = n;
- *enc_p = enc;
-
- return encidx;
-}
-
-static int
-str_transcode_enc_args(mrb_state *mrb,
- mrb_value str, mrb_value *arg1, mrb_value *arg2,
- const char **sname_p, mrb_encoding **senc_p,
- const char **dname_p, mrb_encoding **denc_p)
-{
- mrb_encoding *senc, *denc;
- const char *sname, *dname;
- int sencidx, dencidx;
-
- dencidx = enc_arg(mrb, arg1, &dname, &denc);
-
- if (mrb_nil_p(*arg2)) {
- sencidx = mrb_enc_get_index(mrb, str);
- senc = mrb_enc_from_index(mrb, sencidx);
- sname = mrb_enc_name(senc);
- }
- else {
- sencidx = enc_arg(mrb, arg2, &sname, &senc);
- }
-
- *sname_p = sname;
- *senc_p = senc;
- *dname_p = dname;
- *denc_p = denc;
- return dencidx;
-}
-
-mrb_value
-mrb_str_tmp_new(mrb_state *mrb, long len)
-{
- return mrb_str_new(mrb, 0, len);
-}
-
-static int
-str_transcode0(mrb_state *mrb, int argc, mrb_value *argv, mrb_value *self, int ecflags, mrb_value ecopts)
-{
-
- mrb_value dest;
- mrb_value str = *self;
- mrb_value arg1, arg2;
- long blen, slen;
- unsigned char *buf, *bp, *sp;
- const unsigned char *fromp;
- mrb_encoding *senc, *denc;
- const char *sname, *dname;
- int dencidx;
-
- if (argc <0 || argc > 2) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%d for 0..2)", argc);
- }
-
- if (argc == 0) {
- arg1 = mrb_enc_default_internal(mrb);
- if (mrb_nil_p(arg1)) {
- if (!ecflags) return -1;
- arg1 = mrb_obj_encoding(mrb, str);
- }
- ecflags |= ECONV_INVALID_REPLACE | ECONV_UNDEF_REPLACE;
- }
- else {
- arg1 = argv[0];
- }
- arg2 = argc<=1 ? mrb_nil_value() : argv[1];
- dencidx = str_transcode_enc_args(mrb, str, &arg1, &arg2, &sname, &senc, &dname, &denc);
-
- if ((ecflags & (ECONV_UNIVERSAL_NEWLINE_DECORATOR|
- ECONV_CRLF_NEWLINE_DECORATOR|
- ECONV_CR_NEWLINE_DECORATOR|
- ECONV_XML_TEXT_DECORATOR|
- ECONV_XML_ATTR_CONTENT_DECORATOR|
- ECONV_XML_ATTR_QUOTE_DECORATOR)) == 0) {
- if (senc && senc == denc) {
- return mrb_nil_p(arg2) ? -1 : dencidx;
- }
- if (senc && denc && mrb_enc_asciicompat(mrb, senc) && mrb_enc_asciicompat(mrb, denc)) {
- if (mrb_enc_str_coderange(mrb, str) == ENC_CODERANGE_7BIT) {
- return dencidx;
- }
- }
- if (encoding_equal(sname, dname)) {
- return mrb_nil_p(arg2) ? -1 : dencidx;
- }
- }
- else {
- if (encoding_equal(sname, dname)) {
- sname = "";
- dname = "";
- }
- }
-
- fromp = sp = (unsigned char*)RSTRING_PTR(str);
- slen = RSTRING_LEN(str);
- blen = slen + 30; /* len + margin */
- dest = mrb_str_tmp_new(mrb, blen);
- bp = (unsigned char*)RSTRING_PTR(dest);
-
- transcode_loop(mrb, &fromp, &bp, (sp+slen), (bp+blen), dest, str_transcoding_resize, sname, dname, ecflags, ecopts);
- if (fromp != sp+slen) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "not fully converted, %td bytes left", sp+slen-fromp);
- }
- buf = (unsigned char*)RSTRING_PTR(dest);
- *bp = '\0';
- mrb_str_set_len(mrb, dest, bp - buf);
-
- /* set encoding */
- if (!denc) {
- dencidx = mrb_define_dummy_encoding(mrb, dname);
- }
- *self = dest;
-
- return dencidx;
-}
-
-static int
-str_transcode(mrb_state *mrb, int argc, mrb_value *argv, mrb_value *self)
-{
- mrb_value opt;
- int ecflags = 0;
- mrb_value ecopts = mrb_nil_value();
-
- if (0 < argc) {
- opt = mrb_check_convert_type(mrb, argv[argc-1], MRB_TT_HASH, "Hash", "to_hash");
- if (!mrb_nil_p(opt)) {
- argc--;
- ecflags = mrb_econv_prepare_opts(mrb, opt, &ecopts);
- }
- }
- return str_transcode0(mrb, argc, argv, self, ecflags, ecopts);
-}
-
-static inline mrb_value
-str_encode_associate(mrb_state *mrb, mrb_value str, int encidx)
-{
- int cr = 0;
-
- mrb_enc_associate_index(mrb, str, encidx);
-
- /* transcoded string never be broken. */
- if (mrb_enc_asciicompat(mrb, mrb_enc_from_index(mrb, encidx))) {
- mrb_str_coderange_scan_restartable(RSTRING_PTR(str), RSTRING_END(str), 0, &cr);
- }
- else {
- cr = ENC_CODERANGE_VALID;
- }
- ENC_CODERANGE_SET(str, cr);
- return str;
-}
-
-/*
- * call-seq:
- * str.encode!(encoding [, options] ) -> str
- * str.encode!(dst_encoding, src_encoding [, options] ) -> str
- *
- * The first form transcodes the contents of <i>str</i> from
- * str.encoding to +encoding+.
- * The second form transcodes the contents of <i>str</i> from
- * src_encoding to dst_encoding.
- * The options Hash gives details for conversion. See String#encode
- * for details.
- * Returns the string even if no changes were made.
- */
-
-static mrb_value
-str_encode_bang(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value str)
-{
- mrb_value argv[16];
- int argc;
- mrb_value newstr;
- int encidx;
-
- mrb_get_args(mrb, "*", &argv, &argc);
-
- newstr = str;
- encidx = str_transcode(mrb, argc, argv, &newstr);
-
- if (encidx < 0) return str;
- mrb_str_shared_replace(mrb, str, newstr);
- return str_encode_associate(mrb, str, encidx);
-}
-
-/*
- * call-seq:
- * str.encode(encoding [, options] ) -> str
- * str.encode(dst_encoding, src_encoding [, options] ) -> str
- * str.encode([options]) -> str
- *
- * The first form returns a copy of <i>str</i> transcoded
- * to encoding +encoding+.
- * The second form returns a copy of <i>str</i> transcoded
- * from src_encoding to dst_encoding.
- * The last form returns a copy of <i>str</i> transcoded to
- * <code>Encoding.default_internal</code>.
- * By default, the first and second form raise
- * Encoding::UndefinedConversionError for characters that are
- * undefined in the destination encoding, and
- * Encoding::InvalidByteSequenceError for invalid byte sequences
- * in the source encoding. The last form by default does not raise
- * exceptions but uses replacement strings.
- * The <code>options</code> Hash gives details for conversion.
- *
- * === options
- * The hash <code>options</code> can have the following keys:
- * :invalid ::
- * If the value is <code>:replace</code>, <code>#encode</code> replaces
- * invalid byte sequences in <code>str</code> with the replacement character.
- * The default is to raise the exception
- * :undef ::
- * If the value is <code>:replace</code>, <code>#encode</code> replaces
- * characters which are undefined in the destination encoding with
- * the replacement character.
- * :replace ::
- * Sets the replacement string to the value. The default replacement
- * string is "\uFFFD" for Unicode encoding forms, and "?" otherwise.
- * :fallback ::
- * Sets the replacement string by the hash for undefined character.
- * Its key is a such undefined character encoded in source encoding
- * of current transcoder. Its value can be any encoding until it
- * can be converted into the destination encoding of the transcoder.
- * :xml ::
- * The value must be <code>:text</code> or <code>:attr</code>.
- * If the value is <code>:text</code> <code>#encode</code> replaces
- * undefined characters with their (upper-case hexadecimal) numeric
- * character references. '&', '<', and '>' are converted to "&amp;",
- * "&lt;", and "&gt;", respectively.
- * If the value is <code>:attr</code>, <code>#encode</code> also quotes
- * the replacement result (using '"'), and replaces '"' with "&quot;".
- * :cr_newline ::
- * Replaces LF ("\n") with CR ("\r") if value is true.
- * :crlf_newline ::
- * Replaces LF ("\n") with CRLF ("\r\n") if value is true.
- * :universal_newline ::
- * Replaces CRLF ("\r\n") and CR ("\r") with LF ("\n") if value is true.
- */
-
-static mrb_value
-str_encode(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value str)
-{
- mrb_value argv[16];
- int argc;
- mrb_value newstr;
- int encidx;
-
- mrb_get_args(mrb, "*", &argv, &argc);
- newstr = str;
- encidx = str_transcode(mrb, argc, argv, &newstr);
-
- if (encidx < 0) return mrb_str_dup(mrb, str);
- if (mrb_obj_equal(mrb, newstr, str)) {
- newstr = mrb_str_dup(mrb, str);
- }
- else {
- RBASIC(newstr)->c = mrb_obj_class(mrb, str);
- }
- return str_encode_associate(mrb, newstr, encidx);
-}
-
-mrb_value
-mrb_str_encode(mrb_state *mrb, mrb_value str, mrb_value to, int ecflags, mrb_value ecopts)
-{
- int argc = 1;
- mrb_value *argv = &to;
- mrb_value newstr = str;
- int encidx = str_transcode0(mrb, argc, argv, &newstr, ecflags, ecopts);
-
- if (encidx < 0) return mrb_str_dup(mrb, str);
- if (mrb_obj_equal(mrb, newstr, str)) {
- newstr = mrb_str_dup(mrb, str);
- }
- else {
- RBASIC(newstr)->c = mrb_obj_class(mrb, str);
- }
- return str_encode_associate(mrb, newstr, encidx);
-}
-
-static void
-econv_free(mrb_state *mrb, void *ptr)
-{
- mrb_econv_t *ec = ptr;
- mrb_econv_close(ec);
-}
-
-static const struct mrb_data_type econv_data_type = {
- "econv", econv_free,
-};
-
-static mrb_encoding *
-make_dummy_encoding(mrb_state *mrb, const char *name)
-{
- mrb_encoding *enc;
- int idx;
- idx = mrb_define_dummy_encoding(mrb, name);
- enc = mrb_enc_from_index(mrb, idx);
- return enc;
-}
-
-static mrb_encoding *
-make_encoding(mrb_state *mrb, const char *name)
-{
- mrb_encoding *enc;
- enc = mrb_enc_find(mrb, name);
- if (!enc)
- enc = make_dummy_encoding(mrb, name);
- return enc;
-}
-
-static mrb_value
-make_encobj(mrb_state *mrb, const char *name)
-{
- return mrb_enc_from_encoding(mrb, make_encoding(mrb, name));
-}
-
-/*
- * call-seq:
- * Encoding::Converter.asciicompat_encoding(string) -> encoding or nil
- * Encoding::Converter.asciicompat_encoding(encoding) -> encoding or nil
- *
- * Returns the corresponding ASCII compatible encoding.
- *
- * Returns nil if the argument is an ASCII compatible encoding.
- *
- * "corresponding ASCII compatible encoding" is a ASCII compatible encoding which
- * can represents exactly the same characters as the given ASCII incompatible encoding.
- * So, no conversion undefined error occurs when converting between the two encodings.
- *
- * Encoding::Converter.asciicompat_encoding("ISO-2022-JP") #=> #<Encoding:stateless-ISO-2022-JP>
- * Encoding::Converter.asciicompat_encoding("UTF-16BE") #=> #<Encoding:UTF-8>
- * Encoding::Converter.asciicompat_encoding("UTF-8") #=> nil
- *
- */
-static mrb_value
-econv_s_asciicompat_encoding(mrb_state *mrb, mrb_value klass)
-{
- mrb_value arg;
- const char *arg_name, *result_name;
- mrb_encoding *arg_enc, *result_enc;
-
- mrb_get_args(mrb, "o", &arg);
- enc_arg(mrb, &arg, &arg_name, &arg_enc);
-
- result_name = mrb_econv_asciicompat_encoding(arg_name);
-
- if (result_name == NULL)
- return mrb_nil_value();
-
- result_enc = make_encoding(mrb, result_name);
-
- return mrb_enc_from_encoding(mrb, result_enc);
-}
-
-static void
-econv_args(mrb_state *mrb,
- int argc, mrb_value *argv,
- mrb_value *snamev_p, mrb_value *dnamev_p,
- const char **sname_p, const char **dname_p,
- mrb_encoding **senc_p, mrb_encoding **denc_p,
- int *ecflags_p,
- mrb_value *ecopts_p)
-{
- mrb_value opt, opthash, flags_v, ecopts;
- int sidx, didx;
- const char *sname, *dname;
- mrb_encoding *senc, *denc;
- int ecflags;
-
- //mrb_scan_args(argc, argv, "21", snamev_p, dnamev_p, &opt);
- *snamev_p = argv[0];
- *dnamev_p = argv[1];
- opt = argv[2];
-
- if (argc < 3) {//mrb_nil_p(opt)) {
- ecflags = 0;
- ecopts = mrb_nil_value();
- }
- else if (!mrb_nil_p(flags_v = mrb_check_to_integer(mrb, opt, "to_int"))) {
- ecflags = mrb_fixnum(flags_v);
- ecopts = mrb_nil_value();
- }
- else {
- opthash = mrb_convert_type(mrb, opt, MRB_TT_HASH, "Hash", "to_hash");
- ecflags = mrb_econv_prepare_opts(mrb, opthash, &ecopts);
- }
-
- senc = NULL;
- sidx = mrb_to_encoding_index(mrb, *snamev_p);
- if (0 <= sidx) {
- senc = mrb_enc_from_index(mrb, sidx);
- }
- else {
- //StringValue(*snamev_p);
- mrb_string_value(mrb, snamev_p);
- }
-
- denc = NULL;
- didx = mrb_to_encoding_index(mrb, *dnamev_p);
- if (0 <= didx) {
- denc = mrb_enc_from_index(mrb, didx);
- }
- else {
- //StringValue(*dnamev_p);
- mrb_string_value(mrb, dnamev_p);
- }
-
- //sname = senc ? mrb_enc_name(senc) : StringValueCStr(*snamev_p);
- sname = senc ? mrb_enc_name(senc) : mrb_string_value_cstr(mrb, snamev_p);
- //dname = denc ? mrb_enc_name(denc) : StringValueCStr(*dnamev_p);
- dname = denc ? mrb_enc_name(denc) : mrb_string_value_cstr(mrb, dnamev_p);
-
- *sname_p = sname;
- *dname_p = dname;
- *senc_p = senc;
- *denc_p = denc;
- *ecflags_p = ecflags;
- *ecopts_p = ecopts;
-}
-
-static int
-decorate_convpath(mrb_state *mrb, mrb_value convpath, int ecflags)
-{
- int num_decorators;
- const char *decorators[MAX_ECFLAGS_DECORATORS];
- int i;
- int n, len;
-
- num_decorators = decorator_names(ecflags, decorators);
- if (num_decorators == -1)
- return -1;
-
- len = n = RARRAY_LEN(convpath);//RARRAY_LENINT(convpath);
- if (n != 0) {
- mrb_value pair = RARRAY_PTR(convpath)[n-1];
- if (TYPE(pair) == MRB_TT_ARRAY) {
- const char *sname = mrb_enc_name(mrb_to_encoding(mrb, RARRAY_PTR(pair)[0]));
- const char *dname = mrb_enc_name(mrb_to_encoding(mrb, RARRAY_PTR(pair)[1]));
- transcoder_entry_t *entry = get_transcoder_entry(sname, dname);
- const mrb_transcoder *tr = load_transcoder_entry(mrb, entry);
- if (!tr)
- return -1;
- if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding) &&
- tr->asciicompat_type == asciicompat_encoder) {
- n--;
- mrb_ary_set(mrb, convpath, len + num_decorators - 1, pair);
- }
- }
- else {
- mrb_ary_set(mrb, convpath, len + num_decorators - 1, pair);
- }
- }
-
- for (i = 0; i < num_decorators; i++)
- mrb_ary_set(mrb, convpath, n + i, mrb_str_new_cstr(mrb, decorators[i]));
-
- return 0;
-}
-
-static void
-search_convpath_i(mrb_state *mrb, const char *sname, const char *dname, int depth, void *arg)
-{
- mrb_value *ary_p = arg;
- mrb_value v;
-
- if (mrb_obj_equal(mrb, *ary_p, mrb_nil_value())) {
- *ary_p = mrb_ary_new(mrb);
- }
-
- if (DECORATOR_P(sname, dname)) {
- v = mrb_str_new_cstr(mrb, dname);
- }
- else {
- v = mrb_assoc_new(mrb, make_encobj(mrb, sname), make_encobj(mrb, dname));
- }
- mrb_ary_set(mrb, *ary_p, depth, v);
-}
-
-/*
- * call-seq:
- * Encoding::Converter.search_convpath(source_encoding, destination_encoding) -> ary
- * Encoding::Converter.search_convpath(source_encoding, destination_encoding, opt) -> ary
- *
- * Returns a conversion path.
- *
- * p Encoding::Converter.search_convpath("ISO-8859-1", "EUC-JP")
- * #=> [[#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>],
- * # [#<Encoding:UTF-8>, #<Encoding:EUC-JP>]]
- *
- * p Encoding::Converter.search_convpath("ISO-8859-1", "EUC-JP", universal_newline: true)
- * #=> [[#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>],
- * # [#<Encoding:UTF-8>, #<Encoding:EUC-JP>],
- * # "universal_newline"]
- *
- * p Encoding::Converter.search_convpath("ISO-8859-1", "UTF-32BE", universal_newline: true)
- * #=> [[#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>],
- * # "universal_newline",
- * # [#<Encoding:UTF-8>, #<Encoding:UTF-32BE>]]
- */
-static mrb_value
-econv_s_search_convpath(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value klass)
-{
- mrb_value snamev, dnamev;
- const char *sname, *dname;
- mrb_encoding *senc, *denc;
- int ecflags;
- mrb_value ecopts;
- mrb_value convpath;
-
- mrb_value argv[16];
- int argc;
-
- mrb_get_args(mrb, "*", &argv, &argc);
- econv_args(mrb, argc, argv, &snamev, &dnamev, &sname, &dname, &senc, &denc, &ecflags, &ecopts);
- convpath = mrb_nil_value();
- transcode_search_path(mrb, sname, dname, search_convpath_i, &convpath);
-
- if (mrb_nil_p(convpath))
- mrb_exc_raise(mrb, mrb_econv_open_exc(mrb, sname, dname, ecflags));
-
- if (decorate_convpath(mrb, convpath, ecflags) == -1)
- mrb_exc_raise(mrb, mrb_econv_open_exc(mrb, sname, dname, ecflags));
-
- return convpath;
-}
-
-/*
- * Check the existence of a conversion path.
- * Returns the number of converters in the conversion path.
- * result: >=0:success -1:failure
- */
-int
-mrb_econv_has_convpath_p(mrb_state *mrb, const char* from_encoding, const char* to_encoding)
-{
- mrb_value convpath = mrb_nil_value();
- transcode_search_path(mrb, from_encoding, to_encoding, search_convpath_i,
- &convpath);
- return RTEST(convpath);
-}
-
-struct mrb_econv_init_by_convpath_t {
- mrb_econv_t *ec;
- int index;
- int ret;
-};
-
-static void
-mrb_econv_init_by_convpath_i(mrb_state *mrb, const char *sname, const char *dname, int depth, void *arg)
-{
- struct mrb_econv_init_by_convpath_t *a = (struct mrb_econv_init_by_convpath_t*)arg;
- int ret;
-
- if (a->ret == -1)
- return;
-
- ret = mrb_econv_add_converter(mrb, a->ec, sname, dname, a->index);
-
- a->ret = ret;
- return;
-}
-
-static mrb_econv_t *
-mrb_econv_init_by_convpath(mrb_state *mrb, mrb_value self, mrb_value convpath,
- const char **sname_p, const char **dname_p,
- mrb_encoding **senc_p, mrb_encoding**denc_p)
-{
- mrb_econv_t *ec;
- long i;
- int ret, first=1;
- mrb_value elt;
- mrb_encoding *senc = 0, *denc = 0;
- const char *sname, *dname;
-
- ec = mrb_econv_alloc(RARRAY_LEN/*INT*/(convpath));
- DATA_PTR(self) = ec;
-
- for (i = 0; i < RARRAY_LEN(convpath); i++) {
- mrb_value snamev, dnamev;
- mrb_value pair;
- elt = mrb_ary_ref(mrb, convpath, i);
- if (!mrb_nil_p(pair = mrb_check_array_type(mrb, elt))) {
- if (RARRAY_LEN(pair) != 2)
- mrb_raise(mrb, E_ARGUMENT_ERROR, "not a 2-element array in convpath");
- snamev = mrb_ary_ref(mrb, pair, 0);
- enc_arg(mrb, &snamev, &sname, &senc);
- dnamev = mrb_ary_ref(mrb, pair, 1);
- enc_arg(mrb, &dnamev, &dname, &denc);
- }
- else {
- sname = "";
- //dname = StringValueCStr(elt);
- dname = mrb_string_value_cstr(mrb, &elt);
- }
- if (DECORATOR_P(sname, dname)) {
- ret = mrb_econv_add_converter(mrb, ec, sname, dname, ec->num_trans);
- if (ret == -1)
- mrb_raise(mrb, E_ARGUMENT_ERROR, "decoration failed: %s", dname);
- }
- else {
- int j = ec->num_trans;
- struct mrb_econv_init_by_convpath_t arg;
- arg.ec = ec;
- arg.index = ec->num_trans;
- arg.ret = 0;
- ret = transcode_search_path(mrb, sname, dname, mrb_econv_init_by_convpath_i, &arg);
- if (ret == -1 || arg.ret == -1)
- mrb_raise(mrb, E_ARGUMENT_ERROR, "adding conversion failed: %s to %s", sname, dname);
- if (first) {
- first = 0;
- *senc_p = senc;
- *sname_p = ec->elems[j].tc->transcoder->src_encoding;
- }
- *denc_p = denc;
- *dname_p = ec->elems[ec->num_trans-1].tc->transcoder->dst_encoding;
- }
- }
-
- if (first) {
- *senc_p = NULL;
- *denc_p = NULL;
- *sname_p = "";
- *dname_p = "";
- }
-
- ec->source_encoding_name = *sname_p;
- ec->destination_encoding_name = *dname_p;
-
- return ec;
-}
-
-/*
- * call-seq:
- * Encoding::Converter.new(source_encoding, destination_encoding)
- * Encoding::Converter.new(source_encoding, destination_encoding, opt)
- * Encoding::Converter.new(convpath)
- *
- * possible options elements:
- * hash form:
- * :invalid => nil # raise error on invalid byte sequence (default)
- * :invalid => :replace # replace invalid byte sequence
- * :undef => nil # raise error on undefined conversion (default)
- * :undef => :replace # replace undefined conversion
- * :replace => string # replacement string ("?" or "\uFFFD" if not specified)
- * :universal_newline => true # decorator for converting CRLF and CR to LF
- * :crlf_newline => true # decorator for converting LF to CRLF
- * :cr_newline => true # decorator for converting LF to CR
- * :xml => :text # escape as XML CharData.
- * :xml => :attr # escape as XML AttValue
- * integer form:
- * Encoding::Converter::INVALID_REPLACE
- * Encoding::Converter::UNDEF_REPLACE
- * Encoding::Converter::UNDEF_HEX_CHARREF
- * Encoding::Converter::UNIVERSAL_NEWLINE_DECORATOR
- * Encoding::Converter::CRLF_NEWLINE_DECORATOR
- * Encoding::Converter::CR_NEWLINE_DECORATOR
- * Encoding::Converter::XML_TEXT_DECORATOR
- * Encoding::Converter::XML_ATTR_CONTENT_DECORATOR
- * Encoding::Converter::XML_ATTR_QUOTE_DECORATOR
- *
- * Encoding::Converter.new creates an instance of Encoding::Converter.
- *
- * Source_encoding and destination_encoding should be a string or
- * Encoding object.
- *
- * opt should be nil, a hash or an integer.
- *
- * convpath should be an array.
- * convpath may contain
- * - two-element arrays which contain encodings or encoding names, or
- * - strings representing decorator names.
- *
- * Encoding::Converter.new optionally takes an option.
- * The option should be a hash or an integer.
- * The option hash can contain :invalid => nil, etc.
- * The option integer should be logical-or of constants such as
- * Encoding::Converter::INVALID_REPLACE, etc.
- *
- * [:invalid => nil]
- * Raise error on invalid byte sequence. This is a default behavior.
- * [:invalid => :replace]
- * Replace invalid byte sequence by replacement string.
- * [:undef => nil]
- * Raise an error if a character in source_encoding is not defined in destination_encoding.
- * This is a default behavior.
- * [:undef => :replace]
- * Replace undefined character in destination_encoding with replacement string.
- * [:replace => string]
- * Specify the replacement string.
- * If not specified, "\uFFFD" is used for Unicode encodings and "?" for others.
- * [:universal_newline => true]
- * Convert CRLF and CR to LF.
- * [:crlf_newline => true]
- * Convert LF to CRLF.
- * [:cr_newline => true]
- * Convert LF to CR.
- * [:xml => :text]
- * Escape as XML CharData.
- * This form can be used as a HTML 4.0 #PCDATA.
- * - '&' -> '&amp;'
- * - '<' -> '&lt;'
- * - '>' -> '&gt;'
- * - undefined characters in destination_encoding -> hexadecimal CharRef such as &#xHH;
- * [:xml => :attr]
- * Escape as XML AttValue.
- * The converted result is quoted as "...".
- * This form can be used as a HTML 4.0 attribute value.
- * - '&' -> '&amp;'
- * - '<' -> '&lt;'
- * - '>' -> '&gt;'
- * - '"' -> '&quot;'
- * - undefined characters in destination_encoding -> hexadecimal CharRef such as &#xHH;
- *
- * Examples:
- * # UTF-16BE to UTF-8
- * ec = Encoding::Converter.new("UTF-16BE", "UTF-8")
- *
- * # Usually, decorators such as newline conversion are inserted last.
- * ec = Encoding::Converter.new("UTF-16BE", "UTF-8", :universal_newline => true)
- * p ec.convpath #=> [[#<Encoding:UTF-16BE>, #<Encoding:UTF-8>],
- * # "universal_newline"]
- *
- * # But, if the last encoding is ASCII incompatible,
- * # decorators are inserted before the last conversion.
- * ec = Encoding::Converter.new("UTF-8", "UTF-16BE", :crlf_newline => true)
- * p ec.convpath #=> ["crlf_newline",
- * # [#<Encoding:UTF-8>, #<Encoding:UTF-16BE>]]
- *
- * # Conversion path can be specified directly.
- * ec = Encoding::Converter.new(["universal_newline", ["EUC-JP", "UTF-8"], ["UTF-8", "UTF-16BE"]])
- * p ec.convpath #=> ["universal_newline",
- * # [#<Encoding:EUC-JP>, #<Encoding:UTF-8>],
- * # [#<Encoding:UTF-8>, #<Encoding:UTF-16BE>]]
- */
-static mrb_value
-econv_init(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value self)
-{
- mrb_value ecopts;
- mrb_value snamev, dnamev;
- const char *sname, *dname;
- mrb_encoding *senc, *denc;
- mrb_econv_t *ec;
- int ecflags;
- mrb_value convpath;
- mrb_value argv[16];
- int argc;
-
- mrb_get_args(mrb, "*", &argv, &argc);
- if (mrb_check_datatype(mrb, self, &econv_data_type)) {
- mrb_raise(mrb, E_TYPE_ERROR, "already initialized");
- }
-
- if (argc == 1 && !mrb_nil_p(convpath = mrb_check_array_type(mrb, argv[0]))) {
- ec = mrb_econv_init_by_convpath(mrb, self, convpath, &sname, &dname, &senc, &denc);
- ecflags = 0;
- ecopts = mrb_nil_value();
- }
- else {
- econv_args(mrb, argc, argv, &snamev, &dnamev, &sname, &dname, &senc, &denc, &ecflags, &ecopts);
- ec = mrb_econv_open_opts(mrb, sname, dname, ecflags, ecopts);
- }
-
- if (!ec) {
- mrb_exc_raise(mrb, mrb_econv_open_exc(mrb, sname, dname, ecflags));
- }
-
- if (!DECORATOR_P(sname, dname)) {
- if (!senc)
- senc = make_dummy_encoding(mrb, sname);
- if (!denc)
- denc = make_dummy_encoding(mrb, dname);
- }
-
- ec->source_encoding = senc;
- ec->destination_encoding = denc;
-
- DATA_PTR(self) = ec;
-
- return self;
-}
-
-/*
- * call-seq:
- * ec.inspect -> string
- *
- * Returns a printable version of <i>ec</i>
- *
- * ec = Encoding::Converter.new("iso-8859-1", "utf-8")
- * puts ec.inspect #=> #<Encoding::Converter: ISO-8859-1 to UTF-8>
- *
- */
-static mrb_value
-econv_inspect(mrb_state *mrb, mrb_value self)
-{
- const char *cname = mrb_obj_classname(mrb, self);
- mrb_econv_t *ec;
-
- Data_Get_Struct(mrb, self, &econv_data_type, ec);
- if (!ec)
- return mrb_sprintf(mrb, "#<%s: uninitialized>", cname);
- else {
- const char *sname = ec->source_encoding_name;
- const char *dname = ec->destination_encoding_name;
- mrb_value str;
- str = mrb_sprintf(mrb, "#<%s: ", cname);
- econv_description(mrb, sname, dname, ec->flags, str);
- mrb_str_cat2(mrb, str, ">");
- return str;
- }
-}
-
-static mrb_econv_t *
-check_econv(mrb_state *mrb, mrb_value self)
-{
- mrb_econv_t *ec;
-
- Data_Get_Struct(mrb, self, &econv_data_type, ec);
- if (!ec) {
- mrb_raise(mrb, E_TYPE_ERROR, "uninitialized encoding converter");
- }
- return ec;
-}
-
-/*
- * call-seq:
- * ec.source_encoding -> encoding
- *
- * Returns the source encoding as an Encoding object.
- */
-static mrb_value
-econv_source_encoding(mrb_state *mrb, mrb_value self)
-{
- mrb_econv_t *ec = check_econv(mrb, self);
- if (!ec->source_encoding)
- return mrb_nil_value();
- return mrb_enc_from_encoding(mrb, ec->source_encoding);
-}
-
-/*
- * call-seq:
- * ec.destination_encoding -> encoding
- *
- * Returns the destination encoding as an Encoding object.
- */
-static mrb_value
-econv_destination_encoding(mrb_state *mrb, mrb_value self)
-{
- mrb_econv_t *ec = check_econv(mrb, self);
- if (!ec->destination_encoding)
- return mrb_nil_value();
- return mrb_enc_from_encoding(mrb, ec->destination_encoding);
-}
-
-/*
- * call-seq:
- * ec.convpath -> ary
- *
- * Returns the conversion path of ec.
- *
- * The result is an array of conversions.
- *
- * ec = Encoding::Converter.new("ISO-8859-1", "EUC-JP", crlf_newline: true)
- * p ec.convpath
- * #=> [[#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>],
- * # [#<Encoding:UTF-8>, #<Encoding:EUC-JP>],
- * # "crlf_newline"]
- *
- * Each element of the array is a pair of encodings or a string.
- * A pair means an encoding conversion.
- * A string means a decorator.
- *
- * In the above example, [#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>] means
- * a converter from ISO-8859-1 to UTF-8.
- * "crlf_newline" means newline converter from LF to CRLF.
- */
-static mrb_value
-econv_convpath(mrb_state *mrb, mrb_value self)
-{
- mrb_econv_t *ec = check_econv(mrb, self);
- mrb_value result;
- int i;
-
- result = mrb_ary_new(mrb);
- for (i = 0; i < ec->num_trans; i++) {
- const mrb_transcoder *tr = ec->elems[i].tc->transcoder;
- mrb_value v;
- if (DECORATOR_P(tr->src_encoding, tr->dst_encoding))
- v = mrb_str_new_cstr(mrb, tr->dst_encoding);
- else
- v = mrb_assoc_new(mrb, make_encobj(mrb, tr->src_encoding), make_encobj(mrb, tr->dst_encoding));
- mrb_ary_push(mrb, result, v);
- }
- return result;
-}
-
-static mrb_value
-econv_result_to_symbol(mrb_econv_result_t res)
-{
- switch (res) {
- case econv_invalid_byte_sequence: return sym_invalid_byte_sequence;
- case econv_incomplete_input: return sym_incomplete_input;
- case econv_undefined_conversion: return sym_undefined_conversion;
- case econv_destination_buffer_full: return sym_destination_buffer_full;
- case econv_source_buffer_empty: return sym_source_buffer_empty;
- case econv_finished: return sym_finished;
- case econv_after_output: return sym_after_output;
- default: return mrb_fixnum_value(res); /* should not be reached */
- }
-}
-
-mrb_value econv_primitive_cnvproc(mrb_state *mrb, int argc, mrb_value *argv, mrb_value self)
-{
- mrb_value input, output, output_byteoffset_v, output_bytesize_v, opt, flags_v;
- mrb_econv_t *ec = check_econv(mrb, self);
- mrb_econv_result_t res;
- const unsigned char *ip, *is;
- unsigned char *op, *os;
- long output_byteoffset, output_bytesize;
- unsigned long output_byteend;
- int flags;
-
- //mrb_scan_args(argc, argv, "23", &input, &output, &output_byteoffset_v, &output_bytesize_v, &opt);
- input = argv[0];
- output = argv[1];
- output_byteoffset_v = argv[2];
- output_bytesize_v = argv[3];
- opt = argv[4];
-
- if (argc < 3)//mrb_nil_p(output_byteoffset_v))
- output_byteoffset = 0; /* dummy */
- else
- output_byteoffset = mrb_fixnum(output_byteoffset_v);
-
- if (argc < 4)//mrb_nil_p(output_bytesize_v))
- output_bytesize = 0; /* dummy */
- else
- output_bytesize = mrb_fixnum(output_bytesize_v);
-
- if (argc < 5) {//mrb_nil_p(opt)) {
- flags = 0;
- }
- else if (!mrb_nil_p(flags_v = mrb_check_to_integer(mrb, opt, "to_int"))) {
- flags = mrb_fixnum(flags_v);
- }
- else {
- mrb_value v;
- opt = mrb_convert_type(mrb, opt, MRB_TT_HASH, "Hash", "to_hash");
- flags = 0;
- v = mrb_hash_get(mrb, opt, sym_partial_input);
- if (RTEST(v))
- flags |= ECONV_PARTIAL_INPUT;
- v = mrb_hash_get(mrb, opt, sym_after_output);
- if (RTEST(v))
- flags |= ECONV_AFTER_OUTPUT;
- }
-
- //StringValue(output);
- mrb_string_value(mrb, &output);
- if (!mrb_nil_p(input))
- //StringValue(input);
- mrb_string_value(mrb, &input);
- mrb_str_modify(mrb, output);
-
- if (mrb_nil_p(output_bytesize_v)) {
- output_bytesize = STR_BUF_MIN_SIZE;
- if (!mrb_nil_p(input) && output_bytesize < RSTRING_LEN(input))
- output_bytesize = RSTRING_LEN(input);
- }
-
- retry:
-
- if (mrb_nil_p(output_byteoffset_v))
- output_byteoffset = RSTRING_LEN(output);
-
- if (output_byteoffset < 0)
- mrb_raise(mrb, E_ARGUMENT_ERROR, "negative output_byteoffset");
-
- if (RSTRING_LEN(output) < output_byteoffset)
- mrb_raise(mrb, E_ARGUMENT_ERROR, "output_byteoffset too big");
-
- if (output_bytesize < 0)
- mrb_raise(mrb, E_ARGUMENT_ERROR, "negative output_bytesize");
-
- output_byteend = (unsigned long)output_byteoffset +
- (unsigned long)output_bytesize;
-
- if (output_byteend < (unsigned long)output_byteoffset ||
- LONG_MAX < output_byteend)
- mrb_raise(mrb, E_ARGUMENT_ERROR, "output_byteoffset+output_bytesize too big");
-
- if (mrb_str_capacity(output) < output_byteend)
- mrb_str_resize(mrb, output, output_byteend);
-
- if (mrb_nil_p(input)) {
- ip = is = NULL;
- }
- else {
- ip = (const unsigned char*)RSTRING_PTR(input);
- is = ip + RSTRING_LEN(input);
- }
-
- op = (unsigned char*)RSTRING_PTR(output) + output_byteoffset;
- os = op + output_bytesize;
-
- res = mrb_econv_convert(mrb, ec, &ip, is, &op, os, flags);
- mrb_str_set_len(mrb, output, op-(unsigned char*)RSTRING_PTR(output));
- if (!mrb_nil_p(input))
- mrb_str_drop_bytes(mrb, input, ip - (unsigned char*)RSTRING_PTR(input));
-
- if (mrb_nil_p(output_bytesize_v) && res == econv_destination_buffer_full) {
- if (LONG_MAX / 2 < output_bytesize)
- mrb_raise(mrb, E_ARGUMENT_ERROR, "too long conversion result");
- output_bytesize *= 2;
- output_byteoffset_v = mrb_nil_value();
- goto retry;
- }
-
- if (ec->destination_encoding) {
- mrb_enc_associate(mrb, output, ec->destination_encoding);
- }
-
- return econv_result_to_symbol(res);
-}
-
-/*
- * call-seq:
- * ec.primitive_convert(source_buffer, destination_buffer) -> symbol
- * ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset) -> symbol
- * ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset, destination_bytesize) -> symbol
- * ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset, destination_bytesize, opt) -> symbol
- *
- * possible opt elements:
- * hash form:
- * :partial_input => true # source buffer may be part of larger source
- * :after_output => true # stop conversion after output before input
- * integer form:
- * Encoding::Converter::PARTIAL_INPUT
- * Encoding::Converter::AFTER_OUTPUT
- *
- * possible results:
- * :invalid_byte_sequence
- * :incomplete_input
- * :undefined_conversion
- * :after_output
- * :destination_buffer_full
- * :source_buffer_empty
- * :finished
- *
- * primitive_convert converts source_buffer into destination_buffer.
- *
- * source_buffer should be a string or nil.
- * nil means a empty string.
- *
- * destination_buffer should be a string.
- *
- * destination_byteoffset should be an integer or nil.
- * nil means the end of destination_buffer.
- * If it is omitted, nil is assumed.
- *
- * destination_bytesize should be an integer or nil.
- * nil means unlimited.
- * If it is omitted, nil is assumed.
- *
- * opt should be nil, a hash or an integer.
- * nil means no flags.
- * If it is omitted, nil is assumed.
- *
- * primitive_convert converts the content of source_buffer from beginning
- * and store the result into destination_buffer.
- *
- * destination_byteoffset and destination_bytesize specify the region which
- * the converted result is stored.
- * destination_byteoffset specifies the start position in destination_buffer in bytes.
- * If destination_byteoffset is nil,
- * destination_buffer.bytesize is used for appending the result.
- * destination_bytesize specifies maximum number of bytes.
- * If destination_bytesize is nil,
- * destination size is unlimited.
- * After conversion, destination_buffer is resized to
- * destination_byteoffset + actually produced number of bytes.
- * Also destination_buffer's encoding is set to destination_encoding.
- *
- * primitive_convert drops the converted part of source_buffer.
- * the dropped part is converted in destination_buffer or
- * buffered in Encoding::Converter object.
- *
- * primitive_convert stops conversion when one of following condition met.
- * - invalid byte sequence found in source buffer (:invalid_byte_sequence)
- * - unexpected end of source buffer (:incomplete_input)
- * this occur only when :partial_input is not specified.
- * - character not representable in output encoding (:undefined_conversion)
- * - after some output is generated, before input is done (:after_output)
- * this occur only when :after_output is specified.
- * - destination buffer is full (:destination_buffer_full)
- * this occur only when destination_bytesize is non-nil.
- * - source buffer is empty (:source_buffer_empty)
- * this occur only when :partial_input is specified.
- * - conversion is finished (:finished)
- *
- * example:
- * ec = Encoding::Converter.new("UTF-8", "UTF-16BE")
- * ret = ec.primitive_convert(src="pi", dst="", nil, 100)
- * p [ret, src, dst] #=> [:finished, "", "\x00p\x00i"]
- *
- * ec = Encoding::Converter.new("UTF-8", "UTF-16BE")
- * ret = ec.primitive_convert(src="pi", dst="", nil, 1)
- * p [ret, src, dst] #=> [:destination_buffer_full, "i", "\x00"]
- * ret = ec.primitive_convert(src, dst="", nil, 1)
- * p [ret, src, dst] #=> [:destination_buffer_full, "", "p"]
- * ret = ec.primitive_convert(src, dst="", nil, 1)
- * p [ret, src, dst] #=> [:destination_buffer_full, "", "\x00"]
- * ret = ec.primitive_convert(src, dst="", nil, 1)
- * p [ret, src, dst] #=> [:finished, "", "i"]
- *
- */
-static mrb_value
-econv_primitive_convert(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value self)
-{
- mrb_value argv[16];
- int argc;
-
- mrb_get_args(mrb, "*", &argv, &argc);
- return econv_primitive_cnvproc(mrb, argc, argv, self);
-}
-
-/*
- * call-seq:
- * ec.convert(source_string) -> destination_string
- *
- * Convert source_string and return destination_string.
- *
- * source_string is assumed as a part of source.
- * i.e. :partial_input=>true is specified internally.
- * finish method should be used last.
- *
- * ec = Encoding::Converter.new("utf-8", "euc-jp")
- * puts ec.convert("\u3042").dump #=> "\xA4\xA2"
- * puts ec.finish.dump #=> ""
- *
- * ec = Encoding::Converter.new("euc-jp", "utf-8")
- * puts ec.convert("\xA4").dump #=> ""
- * puts ec.convert("\xA2").dump #=> "\xE3\x81\x82"
- * puts ec.finish.dump #=> ""
- *
- * ec = Encoding::Converter.new("utf-8", "iso-2022-jp")
- * puts ec.convert("\xE3").dump #=> "".force_encoding("ISO-2022-JP")
- * puts ec.convert("\x81").dump #=> "".force_encoding("ISO-2022-JP")
- * puts ec.convert("\x82").dump #=> "\e$B$\"".force_encoding("ISO-2022-JP")
- * puts ec.finish.dump #=> "\e(B".force_encoding("ISO-2022-JP")
- *
- * If a conversion error occur,
- * Encoding::UndefinedConversionError or
- * Encoding::InvalidByteSequenceError is raised.
- * Encoding::Converter#convert doesn't supply methods to recover or restart
- * from these exceptions.
- * When you want to handle these conversion errors,
- * use Encoding::Converter#primitive_convert.
- *
- */
-static mrb_value
-econv_convert(mrb_state *mrb, mrb_value self)
-{
- mrb_value source_string;
- mrb_value ret, dst;
- mrb_value av[5];
- int ac;
- mrb_econv_t *ec = check_econv(mrb, self);
-
- mrb_get_args(mrb, "o", &source_string);
- //StringValue(source_string);
- mrb_string_value(mrb, &source_string);
-
- dst = mrb_str_new(mrb, NULL, 0);
-
- av[0] = mrb_str_dup(mrb, source_string);
- av[1] = dst;
- av[2] = mrb_nil_value();
- av[3] = mrb_nil_value();
- av[4] = mrb_fixnum_value(ECONV_PARTIAL_INPUT);
- ac = 5;
-
- ret = econv_primitive_cnvproc(mrb, ac, av, self);
-
- if (mrb_obj_equal(mrb, ret, sym_invalid_byte_sequence) ||
- mrb_obj_equal(mrb, ret, sym_undefined_conversion) ||
- mrb_obj_equal(mrb, ret, sym_incomplete_input)) {
- mrb_value exc = make_econv_exception(mrb, ec);
- mrb_exc_raise(mrb, exc);
- }
-
- if (mrb_obj_equal(mrb, ret, sym_finished)) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "converter already finished");
- }
-
- if (!mrb_obj_equal(mrb, ret, sym_source_buffer_empty)) {
- mrb_bug("unexpected result of econv_primitive_convert");
- }
-
- return dst;
-}
-
-/*
- * call-seq:
- * ec.finish -> string
- *
- * Finishes the converter.
- * It returns the last part of the converted string.
- *
- * ec = Encoding::Converter.new("utf-8", "iso-2022-jp")
- * p ec.convert("\u3042") #=> "\e$B$\""
- * p ec.finish #=> "\e(B"
- */
-static mrb_value
-econv_finish(mrb_state *mrb, mrb_value self)
-{
- mrb_value ret, dst;
- mrb_value av[5];
- int ac;
- mrb_econv_t *ec = check_econv(mrb, self);
-
- dst = mrb_str_new(mrb, NULL, 0);
-
- av[0] = mrb_nil_value();
- av[1] = dst;
- av[2] = mrb_nil_value();
- av[3] = mrb_nil_value();
- av[4] = mrb_fixnum_value(0);
- ac = 5;
-
- ret = econv_primitive_cnvproc(mrb, ac, av, self);
-
- if (mrb_obj_equal(mrb, ret, sym_invalid_byte_sequence) ||
- mrb_obj_equal(mrb, ret, sym_undefined_conversion) ||
- mrb_obj_equal(mrb, ret, sym_incomplete_input)) {
- mrb_value exc = make_econv_exception(mrb, ec);
- mrb_exc_raise(mrb, exc);
- }
-
- if (!mrb_obj_equal(mrb, ret, sym_finished)) {
- mrb_bug("unexpected result of econv_primitive_convert");
- }
-
- return dst;
-}
-
-/*
- * call-seq:
- * ec.primitive_errinfo -> array
- *
- * primitive_errinfo returns important information regarding the last error
- * as a 5-element array:
- *
- * [result, enc1, enc2, error_bytes, readagain_bytes]
- *
- * result is the last result of primitive_convert.
- *
- * Other elements are only meaningful when result is
- * :invalid_byte_sequence, :incomplete_input or :undefined_conversion.
- *
- * enc1 and enc2 indicate a conversion step as a pair of strings.
- * For example, a converter from EUC-JP to ISO-8859-1 converts
- * a string as follows: EUC-JP -> UTF-8 -> ISO-8859-1.
- * So [enc1, enc2] is either ["EUC-JP", "UTF-8"] or ["UTF-8", "ISO-8859-1"].
- *
- * error_bytes and readagain_bytes indicate the byte sequences which caused the error.
- * error_bytes is discarded portion.
- * readagain_bytes is buffered portion which is read again on next conversion.
- *
- * Example:
- *
- * # \xff is invalid as EUC-JP.
- * ec = Encoding::Converter.new("EUC-JP", "Shift_JIS")
- * ec.primitive_convert(src="\xff", dst="", nil, 10)
- * p ec.primitive_errinfo
- * #=> [:invalid_byte_sequence, "EUC-JP", "UTF-8", "\xFF", ""]
- *
- * # HIRAGANA LETTER A (\xa4\xa2 in EUC-JP) is not representable in ISO-8859-1.
- * # Since this error is occur in UTF-8 to ISO-8859-1 conversion,
- * # error_bytes is HIRAGANA LETTER A in UTF-8 (\xE3\x81\x82).
- * ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
- * ec.primitive_convert(src="\xa4\xa2", dst="", nil, 10)
- * p ec.primitive_errinfo
- * #=> [:undefined_conversion, "UTF-8", "ISO-8859-1", "\xE3\x81\x82", ""]
- *
- * # partial character is invalid
- * ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
- * ec.primitive_convert(src="\xa4", dst="", nil, 10)
- * p ec.primitive_errinfo
- * #=> [:incomplete_input, "EUC-JP", "UTF-8", "\xA4", ""]
- *
- * # Encoding::Converter::PARTIAL_INPUT prevents invalid errors by
- * # partial characters.
- * ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
- * ec.primitive_convert(src="\xa4", dst="", nil, 10, Encoding::Converter::PARTIAL_INPUT)
- * p ec.primitive_errinfo
- * #=> [:source_buffer_empty, nil, nil, nil, nil]
- *
- * # \xd8\x00\x00@ is invalid as UTF-16BE because
- * # no low surrogate after high surrogate (\xd8\x00).
- * # It is detected by 3rd byte (\00) which is part of next character.
- * # So the high surrogate (\xd8\x00) is discarded and
- * # the 3rd byte is read again later.
- * # Since the byte is buffered in ec, it is dropped from src.
- * ec = Encoding::Converter.new("UTF-16BE", "UTF-8")
- * ec.primitive_convert(src="\xd8\x00\x00@", dst="", nil, 10)
- * p ec.primitive_errinfo
- * #=> [:invalid_byte_sequence, "UTF-16BE", "UTF-8", "\xD8\x00", "\x00"]
- * p src
- * #=> "@"
- *
- * # Similar to UTF-16BE, \x00\xd8@\x00 is invalid as UTF-16LE.
- * # The problem is detected by 4th byte.
- * ec = Encoding::Converter.new("UTF-16LE", "UTF-8")
- * ec.primitive_convert(src="\x00\xd8@\x00", dst="", nil, 10)
- * p ec.primitive_errinfo
- * #=> [:invalid_byte_sequence, "UTF-16LE", "UTF-8", "\x00\xD8", "@\x00"]
- * p src
- * #=> ""
- *
- */
-static mrb_value
-econv_primitive_errinfo(mrb_state *mrb, mrb_value self)
-{
- mrb_econv_t *ec = check_econv(mrb, self);
-
- mrb_value ary;
-
- ary = mrb_ary_new_capa(mrb, 5);//mrb_ary_new2(5);
-
- mrb_ary_set(mrb, ary, 0, econv_result_to_symbol(ec->last_error.result));//rb_ary_store(ary, 0, econv_result_to_symbol(ec->last_error.result));
- mrb_ary_set(mrb, ary, 4, mrb_nil_value());//rb_ary_store(ary, 4, mrb_nil_value());
-
- if (ec->last_error.source_encoding)
- mrb_ary_set(mrb, ary, 1, mrb_str_new2(mrb, ec->last_error.source_encoding));//rb_ary_store(ary, 1, mrb_str_new2(mrb, ec->last_error.source_encoding));
-
- if (ec->last_error.destination_encoding)
- mrb_ary_set(mrb, ary, 2, mrb_str_new2(mrb, ec->last_error.destination_encoding));//rb_ary_store(ary, 2, mrb_str_new2(mrb, ec->last_error.destination_encoding));
-
- if (ec->last_error.error_bytes_start) {
- //rb_ary_store(ary, 3, mrb_str_new(mrb, (const char*)ec->last_error.error_bytes_start, ec->last_error.error_bytes_len));
- mrb_ary_set(mrb, ary, 3, mrb_str_new(mrb, (const char*)ec->last_error.error_bytes_start, ec->last_error.error_bytes_len));
- //rb_ary_store(ary, 4, mrb_str_new(mrb, (const char*)ec->last_error.error_bytes_start + ec->last_error.error_bytes_len, ec->last_error.readagain_len));
- mrb_ary_set(mrb, ary, 4, mrb_str_new(mrb, (const char*)ec->last_error.error_bytes_start + ec->last_error.error_bytes_len, ec->last_error.readagain_len));
- }
-
- return ary;
-}
-
-/*
- * call-seq:
- * ec.insert_output(string) -> nil
- *
- * Inserts string into the encoding converter.
- * The string will be converted to the destination encoding and
- * output on later conversions.
- *
- * If the destination encoding is stateful,
- * string is converted according to the state and the state is updated.
- *
- * This method should be used only when a conversion error occurs.
- *
- * ec = Encoding::Converter.new("utf-8", "iso-8859-1")
- * src = "HIRAGANA LETTER A is \u{3042}."
- * dst = ""
- * p ec.primitive_convert(src, dst) #=> :undefined_conversion
- * puts "[#{dst.dump}, #{src.dump}]" #=> ["HIRAGANA LETTER A is ", "."]
- * ec.insert_output("<err>")
- * p ec.primitive_convert(src, dst) #=> :finished
- * puts "[#{dst.dump}, #{src.dump}]" #=> ["HIRAGANA LETTER A is <err>.", ""]
- *
- * ec = Encoding::Converter.new("utf-8", "iso-2022-jp")
- * src = "\u{306F 3041 3068 2661 3002}" # U+2661 is not representable in iso-2022-jp
- * dst = ""
- * p ec.primitive_convert(src, dst) #=> :undefined_conversion
- * puts "[#{dst.dump}, #{src.dump}]" #=> ["\e$B$O$!$H".force_encoding("ISO-2022-JP"), "\xE3\x80\x82"]
- * ec.insert_output "?" # state change required to output "?".
- * p ec.primitive_convert(src, dst) #=> :finished
- * puts "[#{dst.dump}, #{src.dump}]" #=> ["\e$B$O$!$H\e(B?\e$B!#\e(B".force_encoding("ISO-2022-JP"), ""]
- *
- */
-static mrb_value
-econv_insert_output(mrb_state *mrb, mrb_value self)
-{
- mrb_value string;
- const char *insert_enc;
- mrb_econv_t *ec;
- int ret;
-
- mrb_get_args(mrb, "o", &string);
- ec = check_econv(mrb, self);
-
- //StringValue(string);
- mrb_string_value(mrb, &string);
- insert_enc = mrb_econv_encoding_to_insert_output(ec);
- string = mrb_str_encode(mrb, string, mrb_enc_from_encoding(mrb, mrb_enc_find(mrb, insert_enc)), 0, mrb_nil_value());
-
- ret = mrb_econv_insert_output(mrb, ec, (const unsigned char*)RSTRING_PTR(string), RSTRING_LEN(string), insert_enc);
- if (ret == -1) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "too big string");
- }
-
- return mrb_nil_value();
-}
-
-/*
- * call-seq
- * ec.putback -> string
- * ec.putback(max_numbytes) -> string
- *
- * Put back the bytes which will be converted.
- *
- * The bytes are caused by invalid_byte_sequence error.
- * When invalid_byte_sequence error, some bytes are discarded and
- * some bytes are buffered to be converted later.
- * The latter bytes can be put back.
- * It can be observed by
- * Encoding::InvalidByteSequenceError#readagain_bytes and
- * Encoding::Converter#primitive_errinfo.
- *
- * ec = Encoding::Converter.new("utf-16le", "iso-8859-1")
- * src = "\x00\xd8\x61\x00"
- * dst = ""
- * p ec.primitive_convert(src, dst) #=> :invalid_byte_sequence
- * p ec.primitive_errinfo #=> [:invalid_byte_sequence, "UTF-16LE", "UTF-8", "\x00\xD8", "a\x00"]
- * p ec.putback #=> "a\x00"
- * p ec.putback #=> "" # no more bytes to put back
- *
- */
-static mrb_value
-econv_putback(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value self)
-{
- mrb_econv_t *ec = check_econv(mrb, self);
- int n;
- int putbackable;
- mrb_value str, max;
-
- mrb_value argv[16];
- int argc;
-
- //mrb_scan_args(argc, argv, "01", &max);
- mrb_get_args(mrb, "*", &argv, &argc);
-
- if (argc == 0)//mrb_nil_p(max))
- n = mrb_econv_putbackable(ec);
- else {
- max = argv[0];
- n = mrb_fixnum(max);
- putbackable = mrb_econv_putbackable(ec);
- if (putbackable < n)
- n = putbackable;
- }
-
- str = mrb_str_new(mrb, NULL, n);
- mrb_econv_putback(ec, (unsigned char*)RSTRING_PTR(str), n);
-
- if (ec->source_encoding) {
- mrb_enc_associate(mrb, str, ec->source_encoding);
- }
-
- return str;
-}
-
-/*
- * call-seq:
- * ec.last_error -> exception or nil
- *
- * Returns an exception object for the last conversion.
- * Returns nil if the last conversion did not produce an error.
- *
- * "error" means that
- * Encoding::InvalidByteSequenceError and Encoding::UndefinedConversionError for
- * Encoding::Converter#convert and
- * :invalid_byte_sequence, :incomplete_input and :undefined_conversion for
- * Encoding::Converter#primitive_convert.
- *
- * ec = Encoding::Converter.new("utf-8", "iso-8859-1")
- * p ec.primitive_convert(src="\xf1abcd", dst="") #=> :invalid_byte_sequence
- * p ec.last_error #=> #<Encoding::InvalidByteSequenceError: "\xF1" followed by "a" on UTF-8>
- * p ec.primitive_convert(src, dst, nil, 1) #=> :destination_buffer_full
- * p ec.last_error #=> nil
- *
- */
-static mrb_value
-econv_last_error(mrb_state *mrb, mrb_value self)
-{
- mrb_econv_t *ec = check_econv(mrb, self);
- mrb_value exc;
-
- exc = make_econv_exception(mrb, ec);
- if (mrb_nil_p(exc))
- return mrb_nil_value();
- return exc;
-}
-
-/*
- * call-seq:
- * ec.replacement -> string
- *
- * Returns the replacement string.
- *
- * ec = Encoding::Converter.new("euc-jp", "us-ascii")
- * p ec.replacement #=> "?"
- *
- * ec = Encoding::Converter.new("euc-jp", "utf-8")
- * p ec.replacement #=> "\uFFFD"
- */
-static mrb_value
-econv_get_replacement(mrb_state *mrb, mrb_value self)
-{
- mrb_econv_t *ec = check_econv(mrb, self);
- int ret;
- mrb_encoding *enc;
-
- ret = make_replacement(mrb, ec);
- if (ret == -1) {
- mrb_raise(mrb, E_UNDEFINEDCONVERSION_ERROR, "replacement character setup failed");
- }
-
- enc = mrb_enc_find(mrb, ec->replacement_enc);
- return mrb_enc_str_new(mrb, (const char*)ec->replacement_str, (long)ec->replacement_len, enc);
-}
-
-/*
- * call-seq:
- * ec.replacement = string
- *
- * Sets the replacement string.
- *
- * ec = Encoding::Converter.new("utf-8", "us-ascii", :undef => :replace)
- * ec.replacement = "<undef>"
- * p ec.convert("a \u3042 b") #=> "a <undef> b"
- */
-static mrb_value
-econv_set_replacement(mrb_state *mrb, mrb_value self)
-{
- mrb_value arg;
- mrb_econv_t *ec = check_econv(mrb, self);
- mrb_value string = arg;
- int ret;
- mrb_encoding *enc;
- mrb_get_args(mrb, "o", &arg);
-
- //StringValue(string);
- mrb_string_value(mrb, &string);
- enc = mrb_enc_get(mrb, string);
-
- ret = mrb_econv_set_replacement(mrb, ec,
- (const unsigned char*)RSTRING_PTR(string),
- RSTRING_LEN(string),
- mrb_enc_name(enc));
-
- if (ret == -1) {
- /* xxx: mrb_eInvalidByteSequenceError? */
- mrb_raise(mrb, E_UNDEFINEDCONVERSION_ERROR, "replacement character setup failed");
- }
-
- return arg;
-}
-
-mrb_value
-mrb_econv_make_exception(mrb_state *mrb, mrb_econv_t *ec)
-{
- return make_econv_exception(mrb, ec);
-}
-
-void
-mrb_econv_check_error(mrb_state *mrb, mrb_econv_t *ec)
-{
- mrb_value exc;
-
- exc = make_econv_exception(mrb, ec);
- if (mrb_nil_p(exc))
- return;
- mrb_exc_raise(mrb, exc);
-}
-
-/*
- * call-seq:
- * ecerr.source_encoding_name -> string
- *
- * Returns the source encoding name as a string.
- */
-static mrb_value
-ecerr_source_encoding_name(mrb_state *mrb, mrb_value self)
-{
- return mrb_attr_get(mrb, self, mrb_intern(mrb, "source_encoding_name"));
-}
-
-/*
- * call-seq:
- * ecerr.source_encoding -> encoding
- *
- * Returns the source encoding as an encoding object.
- *
- * Note that the result may not be equal to the source encoding of
- * the encoding converter if the conversion has multiple steps.
- *
- * ec = Encoding::Converter.new("ISO-8859-1", "EUC-JP") # ISO-8859-1 -> UTF-8 -> EUC-JP
- * begin
- * ec.convert("\xa0") # NO-BREAK SPACE, which is available in UTF-8 but not in EUC-JP.
- * rescue Encoding::UndefinedConversionError
- * p $!.source_encoding #=> #<Encoding:UTF-8>
- * p $!.destination_encoding #=> #<Encoding:EUC-JP>
- * p $!.source_encoding_name #=> "UTF-8"
- * p $!.destination_encoding_name #=> "EUC-JP"
- * end
- *
- */
-static mrb_value
-ecerr_source_encoding(mrb_state *mrb, mrb_value self)
-{
- return mrb_attr_get(mrb, self, mrb_intern(mrb, "source_encoding"));
-}
-
-/*
- * call-seq:
- * ecerr.destination_encoding_name -> string
- *
- * Returns the destination encoding name as a string.
- */
-static mrb_value
-ecerr_destination_encoding_name(mrb_state *mrb, mrb_value self)
-{
- return mrb_attr_get(mrb, self, mrb_intern(mrb, "destination_encoding_name"));
-}
-
-/*
- * call-seq:
- * ecerr.destination_encoding -> string
- *
- * Returns the destination encoding as an encoding object.
- */
-static mrb_value
-ecerr_destination_encoding(mrb_state *mrb, mrb_value self)
-{
- return mrb_attr_get(mrb, self, mrb_intern(mrb, "destination_encoding"));
-}
-
-/*
- * call-seq:
- * ecerr.error_char -> string
- *
- * Returns the one-character string which cause Encoding::UndefinedConversionError.
- *
- * ec = Encoding::Converter.new("ISO-8859-1", "EUC-JP")
- * begin
- * ec.convert("\xa0")
- * rescue Encoding::UndefinedConversionError
- * puts $!.error_char.dump #=> "\xC2\xA0"
- * p $!.error_char.encoding #=> #<Encoding:UTF-8>
- * end
- *
- */
-static mrb_value
-ecerr_error_char(mrb_state *mrb, mrb_value self)
-{
- return mrb_attr_get(mrb, self, mrb_intern(mrb, "error_char"));
-}
-
-/*
- * call-seq:
- * ecerr.error_bytes -> string
- *
- * Returns the discarded bytes when Encoding::InvalidByteSequenceError occurs.
- *
- * ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
- * begin
- * ec.convert("abc\xA1\xFFdef")
- * rescue Encoding::InvalidByteSequenceError
- * p $! #=> #<Encoding::InvalidByteSequenceError: "\xA1" followed by "\xFF" on EUC-JP>
- * puts $!.error_bytes.dump #=> "\xA1"
- * puts $!.readagain_bytes.dump #=> "\xFF"
- * end
- */
-static mrb_value
-ecerr_error_bytes(mrb_state *mrb, mrb_value self)
-{
- return mrb_attr_get(mrb, self, mrb_intern(mrb, "error_bytes"));
-}
-
-/*
- * call-seq:
- * ecerr.readagain_bytes -> string
- *
- * Returns the bytes to be read again when Encoding::InvalidByteSequenceError occurs.
- */
-static mrb_value
-ecerr_readagain_bytes(mrb_state *mrb, mrb_value self)
-{
- return mrb_attr_get(mrb, self, mrb_intern(mrb, "readagain_bytes"));
-}
-
-/*
- * call-seq:
- * ecerr.incomplete_input? -> true or false
- *
- * Returns true if the invalid byte sequence error is caused by
- * premature end of string.
- *
- * ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
- *
- * begin
- * ec.convert("abc\xA1z")
- * rescue Encoding::InvalidByteSequenceError
- * p $! #=> #<Encoding::InvalidByteSequenceError: "\xA1" followed by "z" on EUC-JP>
- * p $!.incomplete_input? #=> false
- * end
- *
- * begin
- * ec.convert("abc\xA1")
- * ec.finish
- * rescue Encoding::InvalidByteSequenceError
- * p $! #=> #<Encoding::InvalidByteSequenceError: incomplete "\xA1" on EUC-JP>
- * p $!.incomplete_input? #=> true
- * end
- */
-static mrb_value
-ecerr_incomplete_input(mrb_state *mrb, mrb_value self)
-{
- return mrb_attr_get(mrb, self, mrb_intern(mrb, "incomplete_input"));
-}
-
-extern void Init_newline(void);
-
-/*
- * Document-class: Encoding::UndefinedConversionError
- *
- * Raised by Encoding and String methods when a transcoding operation
- * fails.
- */
-
-/*
- * Document-class: Encoding::InvalidByteSequenceError
- *
- * Raised by Encoding and String methods when the string being
- * transcoded contains a byte invalid for the either the source or
- * target encoding.
- */
-
-/*
- * Document-class: Encoding::ConverterNotFoundError
- *
- * Raised by transcoding methods when a named encoding does not
- * correspond with a known converter.
- */
-
-void
-mrb_init_transcode(mrb_state *mrb)
-{
- struct RClass *s;
- struct RClass *c;
- struct RClass *u;
- struct RClass *i;
- struct RClass *eConverterNotFoundError_class;
- struct RClass *eInvalidByteSequenceError_class;
- struct RClass *eUndefinedConversionError_class;
-
- eUndefinedConversionError_class = mrb_define_class(mrb, "UndefinedConversionError", E_ENCODING_ERROR);
- eInvalidByteSequenceError_class = mrb_define_class(mrb, "InvalidByteSequenceError", E_ENCODING_ERROR);
- eConverterNotFoundError_class = mrb_define_class(mrb, "ConverterNotFoundError", E_ENCODING_ERROR);
-
- transcoder_table = st_init_strcasetable();
-
- //sym_invalid = ID2SYM(mrb_intern("invalid"));
- //sym_undef = ID2SYM(mrb_intern("undef"));
- //sym_replace = ID2SYM(mrb_intern("replace"));
- //sym_fallback = ID2SYM(mrb_intern("fallback"));
- //sym_xml = ID2SYM(mrb_intern("xml"));
- //sym_text = ID2SYM(mrb_intern("text"));
- //sym_attr = ID2SYM(mrb_intern("attr"));
-
- //sym_invalid_byte_sequence = ID2SYM(mrb_intern("invalid_byte_sequence"));
- //sym_undefined_conversion = ID2SYM(mrb_intern("undefined_conversion"));
- //sym_destination_buffer_full = ID2SYM(mrb_intern("destination_buffer_full"));
- //sym_source_buffer_empty = ID2SYM(mrb_intern("source_buffer_empty"));
- //sym_finished = ID2SYM(mrb_intern("finished"));
- //sym_after_output = ID2SYM(mrb_intern("after_output"));
- //sym_incomplete_input = ID2SYM(mrb_intern("incomplete_input"));
- //sym_universal_newline = ID2SYM(mrb_intern("universal_newline"));
- //sym_crlf_newline = ID2SYM(mrb_intern("crlf_newline"));
- //sym_cr_newline = ID2SYM(mrb_intern("cr_newline"));
- //sym_partial_input = ID2SYM(mrb_intern("partial_input"));
-
- s = mrb->string_class;
- mrb_define_method(mrb, s, "encode", str_encode, ARGS_ANY());
- mrb_define_method(mrb, s, "encode!", str_encode_bang, ARGS_ANY());
-
- c = mrb_define_class(mrb, "Converter", ENCODE_CLASS);
- //mrb_cEncodingConverter = rb_define_class_under(mrb_cEncoding, "Converter", rb_cData);
- //mrb_define_alloc_func(mrb_cEncodingConverter, econv_s_allocate);
- mrb_define_class_method(mrb, c, "asciicompat_encoding", econv_s_asciicompat_encoding, ARGS_REQ(1)); /* 1 */
- mrb_define_class_method(mrb, c, "search_convpath", econv_s_search_convpath, ARGS_ANY()); /* 2 */
- mrb_define_method(mrb, s, "initialize", econv_init, ARGS_ANY());
- mrb_define_method(mrb, s, "inspect", econv_inspect, ARGS_NONE());
- mrb_define_method(mrb, s, "convpath", econv_convpath, ARGS_NONE());
- mrb_define_method(mrb, s, "source_encoding", econv_source_encoding, ARGS_NONE());
- mrb_define_method(mrb, s, "destination_encoding", econv_destination_encoding, ARGS_NONE());
- mrb_define_method(mrb, s, "primitive_convert", econv_primitive_convert, ARGS_ANY());
- mrb_define_method(mrb, s, "convert", econv_convert, ARGS_REQ(1));
- mrb_define_method(mrb, s, "finish", econv_finish, ARGS_NONE());
- mrb_define_method(mrb, s, "primitive_errinfo", econv_primitive_errinfo, ARGS_NONE());
- mrb_define_method(mrb, s, "insert_output", econv_insert_output, ARGS_REQ(1));
- mrb_define_method(mrb, s, "putback", econv_putback, ARGS_ANY());
- mrb_define_method(mrb, s, "last_error", econv_last_error, ARGS_NONE());
- mrb_define_method(mrb, s, "replacement", econv_get_replacement, ARGS_NONE());
- mrb_define_method(mrb, s, "replacement=", econv_set_replacement, ARGS_REQ(1));
-
- mrb_define_const(mrb, s, "INVALID_MASK", mrb_fixnum_value(ECONV_INVALID_MASK));
- mrb_define_const(mrb, s, "INVALID_REPLACE", mrb_fixnum_value(ECONV_INVALID_REPLACE));
- mrb_define_const(mrb, s, "UNDEF_MASK", mrb_fixnum_value(ECONV_UNDEF_MASK));
- mrb_define_const(mrb, s, "UNDEF_REPLACE", mrb_fixnum_value(ECONV_UNDEF_REPLACE));
- mrb_define_const(mrb, s, "UNDEF_HEX_CHARREF", mrb_fixnum_value(ECONV_UNDEF_HEX_CHARREF));
- mrb_define_const(mrb, s, "PARTIAL_INPUT", mrb_fixnum_value(ECONV_PARTIAL_INPUT));
- mrb_define_const(mrb, s, "AFTER_OUTPUT", mrb_fixnum_value(ECONV_AFTER_OUTPUT));
- mrb_define_const(mrb, s, "UNIVERSAL_NEWLINE_DECORATOR", mrb_fixnum_value(ECONV_UNIVERSAL_NEWLINE_DECORATOR));
- mrb_define_const(mrb, s, "CRLF_NEWLINE_DECORATOR", mrb_fixnum_value(ECONV_CRLF_NEWLINE_DECORATOR));
- mrb_define_const(mrb, s, "CR_NEWLINE_DECORATOR", mrb_fixnum_value(ECONV_CR_NEWLINE_DECORATOR));
- mrb_define_const(mrb, s, "XML_TEXT_DECORATOR", mrb_fixnum_value(ECONV_XML_TEXT_DECORATOR));
- mrb_define_const(mrb, s, "XML_ATTR_CONTENT_DECORATOR", mrb_fixnum_value(ECONV_XML_ATTR_CONTENT_DECORATOR));
- mrb_define_const(mrb, s, "XML_ATTR_QUOTE_DECORATOR", mrb_fixnum_value(ECONV_XML_ATTR_QUOTE_DECORATOR));
-
- u = E_UNDEFINEDCONVERSION_ERROR;
- mrb_define_method(mrb, u, "source_encoding_name", ecerr_source_encoding_name, ARGS_NONE());
- mrb_define_method(mrb, u, "destination_encoding_name", ecerr_destination_encoding_name, ARGS_NONE());
- mrb_define_method(mrb, u, "source_encoding", ecerr_source_encoding, ARGS_NONE());
- mrb_define_method(mrb, u, "destination_encoding", ecerr_destination_encoding, ARGS_NONE());
- mrb_define_method(mrb, u, "error_char", ecerr_error_char, ARGS_NONE());
-
- i = E_INVALIDBYTESEQUENCE_ERROR;
- mrb_define_method(mrb, i, "source_encoding_name", ecerr_source_encoding_name, ARGS_NONE());
- mrb_define_method(mrb, i, "destination_encoding_name", ecerr_destination_encoding_name, ARGS_NONE());
- mrb_define_method(mrb, i, "source_encoding", ecerr_source_encoding, ARGS_NONE());
- mrb_define_method(mrb, i, "destination_encoding", ecerr_destination_encoding, ARGS_NONE());
- mrb_define_method(mrb, i, "error_bytes", ecerr_error_bytes, ARGS_NONE());
- mrb_define_method(mrb, i, "readagain_bytes", ecerr_readagain_bytes, ARGS_NONE());
- mrb_define_method(mrb, i, "incomplete_input?", ecerr_incomplete_input, ARGS_NONE());
-
- //Init_newline();
-}
-#endif //INCLUDE_ENCODING
diff --git a/src/transcode_data.h b/src/transcode_data.h
deleted file mode 100644
index 62051701a..000000000
--- a/src/transcode_data.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/**********************************************************************
-
- transcode_data.h -
-
- $Author: duerst $
- created at: Mon 10 Dec 2007 14:01:47 JST 2007
-
- Copyright (C) 2007 Martin Duerst
-
-**********************************************************************/
-
-//#include "ruby/ruby.h"
-
-#ifndef RUBY_TRANSCODE_DATA_H
-#define RUBY_TRANSCODE_DATA_H 1
-
-#define WORDINDEX_SHIFT_BITS 2
-#define WORDINDEX2INFO(widx) ((widx) << WORDINDEX_SHIFT_BITS)
-#define INFO2WORDINDEX(info) ((info) >> WORDINDEX_SHIFT_BITS)
-#define BYTE_LOOKUP_BASE(bl) ((bl)[0])
-#define BYTE_LOOKUP_INFO(bl) ((bl)[1])
-
-#define PType (unsigned int)
-
-#define NOMAP (PType 0x01) /* direct map */
-#define ONEbt (0x02) /* one byte payload */
-#define TWObt (0x03) /* two bytes payload */
-#define THREEbt (0x05) /* three bytes payload */
-#define FOURbt (0x06) /* four bytes payload, UTF-8 only, macros start at getBT0 */
-#define INVALID (PType 0x07) /* invalid byte sequence */
-#define UNDEF (PType 0x09) /* legal but undefined */
-#define ZERObt (PType 0x0A) /* zero bytes of payload, i.e. remove */
-#define FUNii (PType 0x0B) /* function from info to info */
-#define FUNsi (PType 0x0D) /* function from start to info */
-#define FUNio (PType 0x0E) /* function from info to output */
-#define FUNso (PType 0x0F) /* function from start to output */
-#define STR1 (PType 0x11) /* string 4 <= len <= 259 bytes: 1byte length + content */
-#define GB4bt (PType 0x12) /* GB18030 four bytes payload */
-#define FUNsio (PType 0x13) /* function from start and info to output */
-
-#define STR1_LENGTH(byte_addr) (unsigned int)(*(byte_addr) + 4)
-#define STR1_BYTEINDEX(w) ((w) >> 6)
-#define makeSTR1(bi) (((bi) << 6) | STR1)
-#define makeSTR1LEN(len) ((len)-4)
-
-#define o1(b1) (PType((((unsigned char)(b1))<<8)|ONEbt))
-#define o2(b1,b2) (PType((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|TWObt))
-#define o3(b1,b2,b3) (PType(((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|(((unsigned int)(unsigned char)(b3))<<24)|THREEbt)&0xffffffffU))
-#define o4(b0,b1,b2,b3) (PType(((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|(((unsigned char)(b3))<<24)|((((unsigned char)(b0))&0x07)<<5)|FOURbt)&0xffffffffU))
-#define g4(b0,b1,b2,b3) (PType(((((unsigned char)(b0))<<8)|(((unsigned char)(b2))<<16)|((((unsigned char)(b1))&0x0f)<<24)|((((unsigned int)(unsigned char)(b3))&0x0f)<<28)|GB4bt)&0xffffffffU))
-#define funsio(diff) (PType((((unsigned int)(diff))<<8)|FUNsio))
-
-#define getBT1(a) ((unsigned char)((a)>> 8))
-#define getBT2(a) ((unsigned char)((a)>>16))
-#define getBT3(a) ((unsigned char)((a)>>24))
-#define getBT0(a) (((unsigned char)((a)>> 5)&0x07)|0xF0) /* for UTF-8 only!!! */
-
-#define getGB4bt0(a) ((unsigned char)((a)>> 8))
-#define getGB4bt1(a) ((((unsigned char)((a)>>24))&0x0F)|0x30)
-#define getGB4bt2(a) ((unsigned char)((a)>>16))
-#define getGB4bt3(a) ((((unsigned char)((a)>>28))&0x0F)|0x30)
-
-#define o2FUNii(b1,b2) (PType((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|FUNii))
-
-/* do we need these??? maybe not, can be done with simple tables */
-#define ONETRAIL /* legal but undefined if one more trailing UTF-8 */
-#define TWOTRAIL /* legal but undefined if two more trailing UTF-8 */
-#define THREETRAIL /* legal but undefined if three more trailing UTF-8 */
-
-typedef enum {
- asciicompat_converter, /* ASCII-compatible -> ASCII-compatible */
- asciicompat_decoder, /* ASCII-incompatible -> ASCII-compatible */
- asciicompat_encoder /* ASCII-compatible -> ASCII-incompatible */
- /* ASCII-incompatible -> ASCII-incompatible is intentionally omitted. */
-} mrb_transcoder_asciicompat_type_t;
-
-typedef struct mrb_transcoder mrb_transcoder;
-
-/* static structure, one per supported encoding pair */
-struct mrb_transcoder {
- const char *src_encoding;
- const char *dst_encoding;
- unsigned int conv_tree_start;
- const unsigned char *byte_array;
- unsigned int byte_array_length;
- const unsigned int *word_array;
- unsigned int word_array_length;
- int word_size;
- int input_unit_length;
- int max_input;
- int max_output;
- mrb_transcoder_asciicompat_type_t asciicompat_type;
- size_t state_size;
- int (*state_init_func)(void*); /* ret==0:success ret!=0:failure(errno) */
- int (*state_fini_func)(void*); /* ret==0:success ret!=0:failure(errno) */
- mrb_value (*func_ii)(void*, mrb_value); /* info -> info */
- mrb_value (*func_si)(void*, const unsigned char*, size_t); /* start -> info */
- ssize_t (*func_io)(void*, mrb_value, const unsigned char*, size_t); /* info -> output */
- ssize_t (*func_so)(void*, const unsigned char*, size_t, unsigned char*, size_t); /* start -> output */
- ssize_t (*finish_func)(void*, unsigned char*, size_t); /* -> output */
- ssize_t (*resetsize_func)(void*); /* -> len */
- ssize_t (*resetstate_func)(void*, unsigned char*, size_t); /* -> output */
- ssize_t (*func_sio)(void*, const unsigned char*, size_t, mrb_value, unsigned char*, size_t); /* start -> output */
-};
-
-void mrb_declare_transcoder(mrb_state *mrb, const char *enc1, const char *enc2, const char *lib);
-void mrb_register_transcoder(mrb_state *mrb, const mrb_transcoder *);
-
-#endif /* RUBY_TRANSCODE_DATA_H */
diff --git a/src/unicode.c b/src/unicode.c
deleted file mode 100644
index dec692500..000000000
--- a/src/unicode.c
+++ /dev/null
@@ -1,2607 +0,0 @@
-/**********************************************************************
- unicode.c - Oniguruma (regular expression library)
-**********************************************************************/
-/*-
- * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "mruby.h"
-#ifdef INCLUDE_ENCODING
-#include <string.h>
-#include "regint.h"
-
-#include "encoding.h" //#define TOLOWER(c)
-
-#define ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code,ctype) \
- ((EncUNICODE_ISO_8859_1_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
-
-static const unsigned short EncUNICODE_ISO_8859_1_CtypeTable[256] = {
- 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
- 0x4008, 0x428c, 0x4289, 0x4288, 0x4288, 0x4288, 0x4008, 0x4008,
- 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
- 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
- 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
- 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
- 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
- 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
- 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
- 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
- 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
- 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
- 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
- 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
- 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
- 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
- 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0288, 0x0008, 0x0008,
- 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
- 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
- 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
- 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
- 0x00a0, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x00a8, 0x00a0, 0x00a0,
- 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0,
- 0x00a0, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
- 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
- 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
- 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
- 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
- 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
- 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
- 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
- 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
-};
-
-#include "name2ctype.h"
-
-typedef struct {
- int n;
- OnigCodePoint code[3];
-} CodePointList3;
-
-typedef struct {
- OnigCodePoint from;
- CodePointList3 to;
-} CaseFold_11_Type;
-
-typedef struct {
- OnigCodePoint from;
- CodePointList3 to;
-} CaseUnfold_11_Type;
-
-typedef struct {
- int n;
- OnigCodePoint code[2];
-} CodePointList2;
-
-typedef struct {
- OnigCodePoint from[2];
- CodePointList2 to;
-} CaseUnfold_12_Type;
-
-typedef struct {
- OnigCodePoint from[3];
- CodePointList2 to;
-} CaseUnfold_13_Type;
-
-static const CaseFold_11_Type CaseFold[] = {
- { 0x0041, {1, {0x0061}}},
- { 0x0042, {1, {0x0062}}},
- { 0x0043, {1, {0x0063}}},
- { 0x0044, {1, {0x0064}}},
- { 0x0045, {1, {0x0065}}},
- { 0x0046, {1, {0x0066}}},
- { 0x0047, {1, {0x0067}}},
- { 0x0048, {1, {0x0068}}},
- { 0x004a, {1, {0x006a}}},
- { 0x004b, {1, {0x006b}}},
- { 0x004c, {1, {0x006c}}},
- { 0x004d, {1, {0x006d}}},
- { 0x004e, {1, {0x006e}}},
- { 0x004f, {1, {0x006f}}},
- { 0x0050, {1, {0x0070}}},
- { 0x0051, {1, {0x0071}}},
- { 0x0052, {1, {0x0072}}},
- { 0x0053, {1, {0x0073}}},
- { 0x0054, {1, {0x0074}}},
- { 0x0055, {1, {0x0075}}},
- { 0x0056, {1, {0x0076}}},
- { 0x0057, {1, {0x0077}}},
- { 0x0058, {1, {0x0078}}},
- { 0x0059, {1, {0x0079}}},
- { 0x005a, {1, {0x007a}}},
- { 0x00b5, {1, {0x03bc}}},
- { 0x00c0, {1, {0x00e0}}},
- { 0x00c1, {1, {0x00e1}}},
- { 0x00c2, {1, {0x00e2}}},
- { 0x00c3, {1, {0x00e3}}},
- { 0x00c4, {1, {0x00e4}}},
- { 0x00c5, {1, {0x00e5}}},
- { 0x00c6, {1, {0x00e6}}},
- { 0x00c7, {1, {0x00e7}}},
- { 0x00c8, {1, {0x00e8}}},
- { 0x00c9, {1, {0x00e9}}},
- { 0x00ca, {1, {0x00ea}}},
- { 0x00cb, {1, {0x00eb}}},
- { 0x00cc, {1, {0x00ec}}},
- { 0x00cd, {1, {0x00ed}}},
- { 0x00ce, {1, {0x00ee}}},
- { 0x00cf, {1, {0x00ef}}},
- { 0x00d0, {1, {0x00f0}}},
- { 0x00d1, {1, {0x00f1}}},
- { 0x00d2, {1, {0x00f2}}},
- { 0x00d3, {1, {0x00f3}}},
- { 0x00d4, {1, {0x00f4}}},
- { 0x00d5, {1, {0x00f5}}},
- { 0x00d6, {1, {0x00f6}}},
- { 0x00d8, {1, {0x00f8}}},
- { 0x00d9, {1, {0x00f9}}},
- { 0x00da, {1, {0x00fa}}},
- { 0x00db, {1, {0x00fb}}},
- { 0x00dc, {1, {0x00fc}}},
- { 0x00dd, {1, {0x00fd}}},
- { 0x00de, {1, {0x00fe}}},
- { 0x00df, {2, {0x0073, 0x0073}}},
- { 0x0100, {1, {0x0101}}},
- { 0x0102, {1, {0x0103}}},
- { 0x0104, {1, {0x0105}}},
- { 0x0106, {1, {0x0107}}},
- { 0x0108, {1, {0x0109}}},
- { 0x010a, {1, {0x010b}}},
- { 0x010c, {1, {0x010d}}},
- { 0x010e, {1, {0x010f}}},
- { 0x0110, {1, {0x0111}}},
- { 0x0112, {1, {0x0113}}},
- { 0x0114, {1, {0x0115}}},
- { 0x0116, {1, {0x0117}}},
- { 0x0118, {1, {0x0119}}},
- { 0x011a, {1, {0x011b}}},
- { 0x011c, {1, {0x011d}}},
- { 0x011e, {1, {0x011f}}},
- { 0x0120, {1, {0x0121}}},
- { 0x0122, {1, {0x0123}}},
- { 0x0124, {1, {0x0125}}},
- { 0x0126, {1, {0x0127}}},
- { 0x0128, {1, {0x0129}}},
- { 0x012a, {1, {0x012b}}},
- { 0x012c, {1, {0x012d}}},
- { 0x012e, {1, {0x012f}}},
- { 0x0132, {1, {0x0133}}},
- { 0x0134, {1, {0x0135}}},
- { 0x0136, {1, {0x0137}}},
- { 0x0139, {1, {0x013a}}},
- { 0x013b, {1, {0x013c}}},
- { 0x013d, {1, {0x013e}}},
- { 0x013f, {1, {0x0140}}},
- { 0x0141, {1, {0x0142}}},
- { 0x0143, {1, {0x0144}}},
- { 0x0145, {1, {0x0146}}},
- { 0x0147, {1, {0x0148}}},
- { 0x0149, {2, {0x02bc, 0x006e}}},
- { 0x014a, {1, {0x014b}}},
- { 0x014c, {1, {0x014d}}},
- { 0x014e, {1, {0x014f}}},
- { 0x0150, {1, {0x0151}}},
- { 0x0152, {1, {0x0153}}},
- { 0x0154, {1, {0x0155}}},
- { 0x0156, {1, {0x0157}}},
- { 0x0158, {1, {0x0159}}},
- { 0x015a, {1, {0x015b}}},
- { 0x015c, {1, {0x015d}}},
- { 0x015e, {1, {0x015f}}},
- { 0x0160, {1, {0x0161}}},
- { 0x0162, {1, {0x0163}}},
- { 0x0164, {1, {0x0165}}},
- { 0x0166, {1, {0x0167}}},
- { 0x0168, {1, {0x0169}}},
- { 0x016a, {1, {0x016b}}},
- { 0x016c, {1, {0x016d}}},
- { 0x016e, {1, {0x016f}}},
- { 0x0170, {1, {0x0171}}},
- { 0x0172, {1, {0x0173}}},
- { 0x0174, {1, {0x0175}}},
- { 0x0176, {1, {0x0177}}},
- { 0x0178, {1, {0x00ff}}},
- { 0x0179, {1, {0x017a}}},
- { 0x017b, {1, {0x017c}}},
- { 0x017d, {1, {0x017e}}},
- { 0x017f, {1, {0x0073}}},
- { 0x0181, {1, {0x0253}}},
- { 0x0182, {1, {0x0183}}},
- { 0x0184, {1, {0x0185}}},
- { 0x0186, {1, {0x0254}}},
- { 0x0187, {1, {0x0188}}},
- { 0x0189, {1, {0x0256}}},
- { 0x018a, {1, {0x0257}}},
- { 0x018b, {1, {0x018c}}},
- { 0x018e, {1, {0x01dd}}},
- { 0x018f, {1, {0x0259}}},
- { 0x0190, {1, {0x025b}}},
- { 0x0191, {1, {0x0192}}},
- { 0x0193, {1, {0x0260}}},
- { 0x0194, {1, {0x0263}}},
- { 0x0196, {1, {0x0269}}},
- { 0x0197, {1, {0x0268}}},
- { 0x0198, {1, {0x0199}}},
- { 0x019c, {1, {0x026f}}},
- { 0x019d, {1, {0x0272}}},
- { 0x019f, {1, {0x0275}}},
- { 0x01a0, {1, {0x01a1}}},
- { 0x01a2, {1, {0x01a3}}},
- { 0x01a4, {1, {0x01a5}}},
- { 0x01a6, {1, {0x0280}}},
- { 0x01a7, {1, {0x01a8}}},
- { 0x01a9, {1, {0x0283}}},
- { 0x01ac, {1, {0x01ad}}},
- { 0x01ae, {1, {0x0288}}},
- { 0x01af, {1, {0x01b0}}},
- { 0x01b1, {1, {0x028a}}},
- { 0x01b2, {1, {0x028b}}},
- { 0x01b3, {1, {0x01b4}}},
- { 0x01b5, {1, {0x01b6}}},
- { 0x01b7, {1, {0x0292}}},
- { 0x01b8, {1, {0x01b9}}},
- { 0x01bc, {1, {0x01bd}}},
- { 0x01c4, {1, {0x01c6}}},
- { 0x01c5, {1, {0x01c6}}},
- { 0x01c7, {1, {0x01c9}}},
- { 0x01c8, {1, {0x01c9}}},
- { 0x01ca, {1, {0x01cc}}},
- { 0x01cb, {1, {0x01cc}}},
- { 0x01cd, {1, {0x01ce}}},
- { 0x01cf, {1, {0x01d0}}},
- { 0x01d1, {1, {0x01d2}}},
- { 0x01d3, {1, {0x01d4}}},
- { 0x01d5, {1, {0x01d6}}},
- { 0x01d7, {1, {0x01d8}}},
- { 0x01d9, {1, {0x01da}}},
- { 0x01db, {1, {0x01dc}}},
- { 0x01de, {1, {0x01df}}},
- { 0x01e0, {1, {0x01e1}}},
- { 0x01e2, {1, {0x01e3}}},
- { 0x01e4, {1, {0x01e5}}},
- { 0x01e6, {1, {0x01e7}}},
- { 0x01e8, {1, {0x01e9}}},
- { 0x01ea, {1, {0x01eb}}},
- { 0x01ec, {1, {0x01ed}}},
- { 0x01ee, {1, {0x01ef}}},
- { 0x01f0, {2, {0x006a, 0x030c}}},
- { 0x01f1, {1, {0x01f3}}},
- { 0x01f2, {1, {0x01f3}}},
- { 0x01f4, {1, {0x01f5}}},
- { 0x01f6, {1, {0x0195}}},
- { 0x01f7, {1, {0x01bf}}},
- { 0x01f8, {1, {0x01f9}}},
- { 0x01fa, {1, {0x01fb}}},
- { 0x01fc, {1, {0x01fd}}},
- { 0x01fe, {1, {0x01ff}}},
- { 0x0200, {1, {0x0201}}},
- { 0x0202, {1, {0x0203}}},
- { 0x0204, {1, {0x0205}}},
- { 0x0206, {1, {0x0207}}},
- { 0x0208, {1, {0x0209}}},
- { 0x020a, {1, {0x020b}}},
- { 0x020c, {1, {0x020d}}},
- { 0x020e, {1, {0x020f}}},
- { 0x0210, {1, {0x0211}}},
- { 0x0212, {1, {0x0213}}},
- { 0x0214, {1, {0x0215}}},
- { 0x0216, {1, {0x0217}}},
- { 0x0218, {1, {0x0219}}},
- { 0x021a, {1, {0x021b}}},
- { 0x021c, {1, {0x021d}}},
- { 0x021e, {1, {0x021f}}},
- { 0x0220, {1, {0x019e}}},
- { 0x0222, {1, {0x0223}}},
- { 0x0224, {1, {0x0225}}},
- { 0x0226, {1, {0x0227}}},
- { 0x0228, {1, {0x0229}}},
- { 0x022a, {1, {0x022b}}},
- { 0x022c, {1, {0x022d}}},
- { 0x022e, {1, {0x022f}}},
- { 0x0230, {1, {0x0231}}},
- { 0x0232, {1, {0x0233}}},
- { 0x023b, {1, {0x023c}}},
- { 0x023d, {1, {0x019a}}},
- { 0x0241, {1, {0x0294}}},
- { 0x0345, {1, {0x03b9}}},
- { 0x0386, {1, {0x03ac}}},
- { 0x0388, {1, {0x03ad}}},
- { 0x0389, {1, {0x03ae}}},
- { 0x038a, {1, {0x03af}}},
- { 0x038c, {1, {0x03cc}}},
- { 0x038e, {1, {0x03cd}}},
- { 0x038f, {1, {0x03ce}}},
- { 0x0390, {3, {0x03b9, 0x0308, 0x0301}}},
- { 0x0391, {1, {0x03b1}}},
- { 0x0392, {1, {0x03b2}}},
- { 0x0393, {1, {0x03b3}}},
- { 0x0394, {1, {0x03b4}}},
- { 0x0395, {1, {0x03b5}}},
- { 0x0396, {1, {0x03b6}}},
- { 0x0397, {1, {0x03b7}}},
- { 0x0398, {1, {0x03b8}}},
- { 0x0399, {1, {0x03b9}}},
- { 0x039a, {1, {0x03ba}}},
- { 0x039b, {1, {0x03bb}}},
- { 0x039c, {1, {0x03bc}}},
- { 0x039d, {1, {0x03bd}}},
- { 0x039e, {1, {0x03be}}},
- { 0x039f, {1, {0x03bf}}},
- { 0x03a0, {1, {0x03c0}}},
- { 0x03a1, {1, {0x03c1}}},
- { 0x03a3, {1, {0x03c3}}},
- { 0x03a4, {1, {0x03c4}}},
- { 0x03a5, {1, {0x03c5}}},
- { 0x03a6, {1, {0x03c6}}},
- { 0x03a7, {1, {0x03c7}}},
- { 0x03a8, {1, {0x03c8}}},
- { 0x03a9, {1, {0x03c9}}},
- { 0x03aa, {1, {0x03ca}}},
- { 0x03ab, {1, {0x03cb}}},
- { 0x03b0, {3, {0x03c5, 0x0308, 0x0301}}},
- { 0x03c2, {1, {0x03c3}}},
- { 0x03d0, {1, {0x03b2}}},
- { 0x03d1, {1, {0x03b8}}},
- { 0x03d5, {1, {0x03c6}}},
- { 0x03d6, {1, {0x03c0}}},
- { 0x03d8, {1, {0x03d9}}},
- { 0x03da, {1, {0x03db}}},
- { 0x03dc, {1, {0x03dd}}},
- { 0x03de, {1, {0x03df}}},
- { 0x03e0, {1, {0x03e1}}},
- { 0x03e2, {1, {0x03e3}}},
- { 0x03e4, {1, {0x03e5}}},
- { 0x03e6, {1, {0x03e7}}},
- { 0x03e8, {1, {0x03e9}}},
- { 0x03ea, {1, {0x03eb}}},
- { 0x03ec, {1, {0x03ed}}},
- { 0x03ee, {1, {0x03ef}}},
- { 0x03f0, {1, {0x03ba}}},
- { 0x03f1, {1, {0x03c1}}},
- { 0x03f4, {1, {0x03b8}}},
- { 0x03f5, {1, {0x03b5}}},
- { 0x03f7, {1, {0x03f8}}},
- { 0x03f9, {1, {0x03f2}}},
- { 0x03fa, {1, {0x03fb}}},
- { 0x0400, {1, {0x0450}}},
- { 0x0401, {1, {0x0451}}},
- { 0x0402, {1, {0x0452}}},
- { 0x0403, {1, {0x0453}}},
- { 0x0404, {1, {0x0454}}},
- { 0x0405, {1, {0x0455}}},
- { 0x0406, {1, {0x0456}}},
- { 0x0407, {1, {0x0457}}},
- { 0x0408, {1, {0x0458}}},
- { 0x0409, {1, {0x0459}}},
- { 0x040a, {1, {0x045a}}},
- { 0x040b, {1, {0x045b}}},
- { 0x040c, {1, {0x045c}}},
- { 0x040d, {1, {0x045d}}},
- { 0x040e, {1, {0x045e}}},
- { 0x040f, {1, {0x045f}}},
- { 0x0410, {1, {0x0430}}},
- { 0x0411, {1, {0x0431}}},
- { 0x0412, {1, {0x0432}}},
- { 0x0413, {1, {0x0433}}},
- { 0x0414, {1, {0x0434}}},
- { 0x0415, {1, {0x0435}}},
- { 0x0416, {1, {0x0436}}},
- { 0x0417, {1, {0x0437}}},
- { 0x0418, {1, {0x0438}}},
- { 0x0419, {1, {0x0439}}},
- { 0x041a, {1, {0x043a}}},
- { 0x041b, {1, {0x043b}}},
- { 0x041c, {1, {0x043c}}},
- { 0x041d, {1, {0x043d}}},
- { 0x041e, {1, {0x043e}}},
- { 0x041f, {1, {0x043f}}},
- { 0x0420, {1, {0x0440}}},
- { 0x0421, {1, {0x0441}}},
- { 0x0422, {1, {0x0442}}},
- { 0x0423, {1, {0x0443}}},
- { 0x0424, {1, {0x0444}}},
- { 0x0425, {1, {0x0445}}},
- { 0x0426, {1, {0x0446}}},
- { 0x0427, {1, {0x0447}}},
- { 0x0428, {1, {0x0448}}},
- { 0x0429, {1, {0x0449}}},
- { 0x042a, {1, {0x044a}}},
- { 0x042b, {1, {0x044b}}},
- { 0x042c, {1, {0x044c}}},
- { 0x042d, {1, {0x044d}}},
- { 0x042e, {1, {0x044e}}},
- { 0x042f, {1, {0x044f}}},
- { 0x0460, {1, {0x0461}}},
- { 0x0462, {1, {0x0463}}},
- { 0x0464, {1, {0x0465}}},
- { 0x0466, {1, {0x0467}}},
- { 0x0468, {1, {0x0469}}},
- { 0x046a, {1, {0x046b}}},
- { 0x046c, {1, {0x046d}}},
- { 0x046e, {1, {0x046f}}},
- { 0x0470, {1, {0x0471}}},
- { 0x0472, {1, {0x0473}}},
- { 0x0474, {1, {0x0475}}},
- { 0x0476, {1, {0x0477}}},
- { 0x0478, {1, {0x0479}}},
- { 0x047a, {1, {0x047b}}},
- { 0x047c, {1, {0x047d}}},
- { 0x047e, {1, {0x047f}}},
- { 0x0480, {1, {0x0481}}},
- { 0x048a, {1, {0x048b}}},
- { 0x048c, {1, {0x048d}}},
- { 0x048e, {1, {0x048f}}},
- { 0x0490, {1, {0x0491}}},
- { 0x0492, {1, {0x0493}}},
- { 0x0494, {1, {0x0495}}},
- { 0x0496, {1, {0x0497}}},
- { 0x0498, {1, {0x0499}}},
- { 0x049a, {1, {0x049b}}},
- { 0x049c, {1, {0x049d}}},
- { 0x049e, {1, {0x049f}}},
- { 0x04a0, {1, {0x04a1}}},
- { 0x04a2, {1, {0x04a3}}},
- { 0x04a4, {1, {0x04a5}}},
- { 0x04a6, {1, {0x04a7}}},
- { 0x04a8, {1, {0x04a9}}},
- { 0x04aa, {1, {0x04ab}}},
- { 0x04ac, {1, {0x04ad}}},
- { 0x04ae, {1, {0x04af}}},
- { 0x04b0, {1, {0x04b1}}},
- { 0x04b2, {1, {0x04b3}}},
- { 0x04b4, {1, {0x04b5}}},
- { 0x04b6, {1, {0x04b7}}},
- { 0x04b8, {1, {0x04b9}}},
- { 0x04ba, {1, {0x04bb}}},
- { 0x04bc, {1, {0x04bd}}},
- { 0x04be, {1, {0x04bf}}},
- { 0x04c1, {1, {0x04c2}}},
- { 0x04c3, {1, {0x04c4}}},
- { 0x04c5, {1, {0x04c6}}},
- { 0x04c7, {1, {0x04c8}}},
- { 0x04c9, {1, {0x04ca}}},
- { 0x04cb, {1, {0x04cc}}},
- { 0x04cd, {1, {0x04ce}}},
- { 0x04d0, {1, {0x04d1}}},
- { 0x04d2, {1, {0x04d3}}},
- { 0x04d4, {1, {0x04d5}}},
- { 0x04d6, {1, {0x04d7}}},
- { 0x04d8, {1, {0x04d9}}},
- { 0x04da, {1, {0x04db}}},
- { 0x04dc, {1, {0x04dd}}},
- { 0x04de, {1, {0x04df}}},
- { 0x04e0, {1, {0x04e1}}},
- { 0x04e2, {1, {0x04e3}}},
- { 0x04e4, {1, {0x04e5}}},
- { 0x04e6, {1, {0x04e7}}},
- { 0x04e8, {1, {0x04e9}}},
- { 0x04ea, {1, {0x04eb}}},
- { 0x04ec, {1, {0x04ed}}},
- { 0x04ee, {1, {0x04ef}}},
- { 0x04f0, {1, {0x04f1}}},
- { 0x04f2, {1, {0x04f3}}},
- { 0x04f4, {1, {0x04f5}}},
- { 0x04f6, {1, {0x04f7}}},
- { 0x04f8, {1, {0x04f9}}},
- { 0x0500, {1, {0x0501}}},
- { 0x0502, {1, {0x0503}}},
- { 0x0504, {1, {0x0505}}},
- { 0x0506, {1, {0x0507}}},
- { 0x0508, {1, {0x0509}}},
- { 0x050a, {1, {0x050b}}},
- { 0x050c, {1, {0x050d}}},
- { 0x050e, {1, {0x050f}}},
- { 0x0531, {1, {0x0561}}},
- { 0x0532, {1, {0x0562}}},
- { 0x0533, {1, {0x0563}}},
- { 0x0534, {1, {0x0564}}},
- { 0x0535, {1, {0x0565}}},
- { 0x0536, {1, {0x0566}}},
- { 0x0537, {1, {0x0567}}},
- { 0x0538, {1, {0x0568}}},
- { 0x0539, {1, {0x0569}}},
- { 0x053a, {1, {0x056a}}},
- { 0x053b, {1, {0x056b}}},
- { 0x053c, {1, {0x056c}}},
- { 0x053d, {1, {0x056d}}},
- { 0x053e, {1, {0x056e}}},
- { 0x053f, {1, {0x056f}}},
- { 0x0540, {1, {0x0570}}},
- { 0x0541, {1, {0x0571}}},
- { 0x0542, {1, {0x0572}}},
- { 0x0543, {1, {0x0573}}},
- { 0x0544, {1, {0x0574}}},
- { 0x0545, {1, {0x0575}}},
- { 0x0546, {1, {0x0576}}},
- { 0x0547, {1, {0x0577}}},
- { 0x0548, {1, {0x0578}}},
- { 0x0549, {1, {0x0579}}},
- { 0x054a, {1, {0x057a}}},
- { 0x054b, {1, {0x057b}}},
- { 0x054c, {1, {0x057c}}},
- { 0x054d, {1, {0x057d}}},
- { 0x054e, {1, {0x057e}}},
- { 0x054f, {1, {0x057f}}},
- { 0x0550, {1, {0x0580}}},
- { 0x0551, {1, {0x0581}}},
- { 0x0552, {1, {0x0582}}},
- { 0x0553, {1, {0x0583}}},
- { 0x0554, {1, {0x0584}}},
- { 0x0555, {1, {0x0585}}},
- { 0x0556, {1, {0x0586}}},
- { 0x0587, {2, {0x0565, 0x0582}}},
- { 0x10a0, {1, {0x2d00}}},
- { 0x10a1, {1, {0x2d01}}},
- { 0x10a2, {1, {0x2d02}}},
- { 0x10a3, {1, {0x2d03}}},
- { 0x10a4, {1, {0x2d04}}},
- { 0x10a5, {1, {0x2d05}}},
- { 0x10a6, {1, {0x2d06}}},
- { 0x10a7, {1, {0x2d07}}},
- { 0x10a8, {1, {0x2d08}}},
- { 0x10a9, {1, {0x2d09}}},
- { 0x10aa, {1, {0x2d0a}}},
- { 0x10ab, {1, {0x2d0b}}},
- { 0x10ac, {1, {0x2d0c}}},
- { 0x10ad, {1, {0x2d0d}}},
- { 0x10ae, {1, {0x2d0e}}},
- { 0x10af, {1, {0x2d0f}}},
- { 0x10b0, {1, {0x2d10}}},
- { 0x10b1, {1, {0x2d11}}},
- { 0x10b2, {1, {0x2d12}}},
- { 0x10b3, {1, {0x2d13}}},
- { 0x10b4, {1, {0x2d14}}},
- { 0x10b5, {1, {0x2d15}}},
- { 0x10b6, {1, {0x2d16}}},
- { 0x10b7, {1, {0x2d17}}},
- { 0x10b8, {1, {0x2d18}}},
- { 0x10b9, {1, {0x2d19}}},
- { 0x10ba, {1, {0x2d1a}}},
- { 0x10bb, {1, {0x2d1b}}},
- { 0x10bc, {1, {0x2d1c}}},
- { 0x10bd, {1, {0x2d1d}}},
- { 0x10be, {1, {0x2d1e}}},
- { 0x10bf, {1, {0x2d1f}}},
- { 0x10c0, {1, {0x2d20}}},
- { 0x10c1, {1, {0x2d21}}},
- { 0x10c2, {1, {0x2d22}}},
- { 0x10c3, {1, {0x2d23}}},
- { 0x10c4, {1, {0x2d24}}},
- { 0x10c5, {1, {0x2d25}}},
- { 0x1e00, {1, {0x1e01}}},
- { 0x1e02, {1, {0x1e03}}},
- { 0x1e04, {1, {0x1e05}}},
- { 0x1e06, {1, {0x1e07}}},
- { 0x1e08, {1, {0x1e09}}},
- { 0x1e0a, {1, {0x1e0b}}},
- { 0x1e0c, {1, {0x1e0d}}},
- { 0x1e0e, {1, {0x1e0f}}},
- { 0x1e10, {1, {0x1e11}}},
- { 0x1e12, {1, {0x1e13}}},
- { 0x1e14, {1, {0x1e15}}},
- { 0x1e16, {1, {0x1e17}}},
- { 0x1e18, {1, {0x1e19}}},
- { 0x1e1a, {1, {0x1e1b}}},
- { 0x1e1c, {1, {0x1e1d}}},
- { 0x1e1e, {1, {0x1e1f}}},
- { 0x1e20, {1, {0x1e21}}},
- { 0x1e22, {1, {0x1e23}}},
- { 0x1e24, {1, {0x1e25}}},
- { 0x1e26, {1, {0x1e27}}},
- { 0x1e28, {1, {0x1e29}}},
- { 0x1e2a, {1, {0x1e2b}}},
- { 0x1e2c, {1, {0x1e2d}}},
- { 0x1e2e, {1, {0x1e2f}}},
- { 0x1e30, {1, {0x1e31}}},
- { 0x1e32, {1, {0x1e33}}},
- { 0x1e34, {1, {0x1e35}}},
- { 0x1e36, {1, {0x1e37}}},
- { 0x1e38, {1, {0x1e39}}},
- { 0x1e3a, {1, {0x1e3b}}},
- { 0x1e3c, {1, {0x1e3d}}},
- { 0x1e3e, {1, {0x1e3f}}},
- { 0x1e40, {1, {0x1e41}}},
- { 0x1e42, {1, {0x1e43}}},
- { 0x1e44, {1, {0x1e45}}},
- { 0x1e46, {1, {0x1e47}}},
- { 0x1e48, {1, {0x1e49}}},
- { 0x1e4a, {1, {0x1e4b}}},
- { 0x1e4c, {1, {0x1e4d}}},
- { 0x1e4e, {1, {0x1e4f}}},
- { 0x1e50, {1, {0x1e51}}},
- { 0x1e52, {1, {0x1e53}}},
- { 0x1e54, {1, {0x1e55}}},
- { 0x1e56, {1, {0x1e57}}},
- { 0x1e58, {1, {0x1e59}}},
- { 0x1e5a, {1, {0x1e5b}}},
- { 0x1e5c, {1, {0x1e5d}}},
- { 0x1e5e, {1, {0x1e5f}}},
- { 0x1e60, {1, {0x1e61}}},
- { 0x1e62, {1, {0x1e63}}},
- { 0x1e64, {1, {0x1e65}}},
- { 0x1e66, {1, {0x1e67}}},
- { 0x1e68, {1, {0x1e69}}},
- { 0x1e6a, {1, {0x1e6b}}},
- { 0x1e6c, {1, {0x1e6d}}},
- { 0x1e6e, {1, {0x1e6f}}},
- { 0x1e70, {1, {0x1e71}}},
- { 0x1e72, {1, {0x1e73}}},
- { 0x1e74, {1, {0x1e75}}},
- { 0x1e76, {1, {0x1e77}}},
- { 0x1e78, {1, {0x1e79}}},
- { 0x1e7a, {1, {0x1e7b}}},
- { 0x1e7c, {1, {0x1e7d}}},
- { 0x1e7e, {1, {0x1e7f}}},
- { 0x1e80, {1, {0x1e81}}},
- { 0x1e82, {1, {0x1e83}}},
- { 0x1e84, {1, {0x1e85}}},
- { 0x1e86, {1, {0x1e87}}},
- { 0x1e88, {1, {0x1e89}}},
- { 0x1e8a, {1, {0x1e8b}}},
- { 0x1e8c, {1, {0x1e8d}}},
- { 0x1e8e, {1, {0x1e8f}}},
- { 0x1e90, {1, {0x1e91}}},
- { 0x1e92, {1, {0x1e93}}},
- { 0x1e94, {1, {0x1e95}}},
- { 0x1e96, {2, {0x0068, 0x0331}}},
- { 0x1e97, {2, {0x0074, 0x0308}}},
- { 0x1e98, {2, {0x0077, 0x030a}}},
- { 0x1e99, {2, {0x0079, 0x030a}}},
- { 0x1e9a, {2, {0x0061, 0x02be}}},
- { 0x1e9b, {1, {0x1e61}}},
- { 0x1ea0, {1, {0x1ea1}}},
- { 0x1ea2, {1, {0x1ea3}}},
- { 0x1ea4, {1, {0x1ea5}}},
- { 0x1ea6, {1, {0x1ea7}}},
- { 0x1ea8, {1, {0x1ea9}}},
- { 0x1eaa, {1, {0x1eab}}},
- { 0x1eac, {1, {0x1ead}}},
- { 0x1eae, {1, {0x1eaf}}},
- { 0x1eb0, {1, {0x1eb1}}},
- { 0x1eb2, {1, {0x1eb3}}},
- { 0x1eb4, {1, {0x1eb5}}},
- { 0x1eb6, {1, {0x1eb7}}},
- { 0x1eb8, {1, {0x1eb9}}},
- { 0x1eba, {1, {0x1ebb}}},
- { 0x1ebc, {1, {0x1ebd}}},
- { 0x1ebe, {1, {0x1ebf}}},
- { 0x1ec0, {1, {0x1ec1}}},
- { 0x1ec2, {1, {0x1ec3}}},
- { 0x1ec4, {1, {0x1ec5}}},
- { 0x1ec6, {1, {0x1ec7}}},
- { 0x1ec8, {1, {0x1ec9}}},
- { 0x1eca, {1, {0x1ecb}}},
- { 0x1ecc, {1, {0x1ecd}}},
- { 0x1ece, {1, {0x1ecf}}},
- { 0x1ed0, {1, {0x1ed1}}},
- { 0x1ed2, {1, {0x1ed3}}},
- { 0x1ed4, {1, {0x1ed5}}},
- { 0x1ed6, {1, {0x1ed7}}},
- { 0x1ed8, {1, {0x1ed9}}},
- { 0x1eda, {1, {0x1edb}}},
- { 0x1edc, {1, {0x1edd}}},
- { 0x1ede, {1, {0x1edf}}},
- { 0x1ee0, {1, {0x1ee1}}},
- { 0x1ee2, {1, {0x1ee3}}},
- { 0x1ee4, {1, {0x1ee5}}},
- { 0x1ee6, {1, {0x1ee7}}},
- { 0x1ee8, {1, {0x1ee9}}},
- { 0x1eea, {1, {0x1eeb}}},
- { 0x1eec, {1, {0x1eed}}},
- { 0x1eee, {1, {0x1eef}}},
- { 0x1ef0, {1, {0x1ef1}}},
- { 0x1ef2, {1, {0x1ef3}}},
- { 0x1ef4, {1, {0x1ef5}}},
- { 0x1ef6, {1, {0x1ef7}}},
- { 0x1ef8, {1, {0x1ef9}}},
- { 0x1f08, {1, {0x1f00}}},
- { 0x1f09, {1, {0x1f01}}},
- { 0x1f0a, {1, {0x1f02}}},
- { 0x1f0b, {1, {0x1f03}}},
- { 0x1f0c, {1, {0x1f04}}},
- { 0x1f0d, {1, {0x1f05}}},
- { 0x1f0e, {1, {0x1f06}}},
- { 0x1f0f, {1, {0x1f07}}},
- { 0x1f18, {1, {0x1f10}}},
- { 0x1f19, {1, {0x1f11}}},
- { 0x1f1a, {1, {0x1f12}}},
- { 0x1f1b, {1, {0x1f13}}},
- { 0x1f1c, {1, {0x1f14}}},
- { 0x1f1d, {1, {0x1f15}}},
- { 0x1f28, {1, {0x1f20}}},
- { 0x1f29, {1, {0x1f21}}},
- { 0x1f2a, {1, {0x1f22}}},
- { 0x1f2b, {1, {0x1f23}}},
- { 0x1f2c, {1, {0x1f24}}},
- { 0x1f2d, {1, {0x1f25}}},
- { 0x1f2e, {1, {0x1f26}}},
- { 0x1f2f, {1, {0x1f27}}},
- { 0x1f38, {1, {0x1f30}}},
- { 0x1f39, {1, {0x1f31}}},
- { 0x1f3a, {1, {0x1f32}}},
- { 0x1f3b, {1, {0x1f33}}},
- { 0x1f3c, {1, {0x1f34}}},
- { 0x1f3d, {1, {0x1f35}}},
- { 0x1f3e, {1, {0x1f36}}},
- { 0x1f3f, {1, {0x1f37}}},
- { 0x1f48, {1, {0x1f40}}},
- { 0x1f49, {1, {0x1f41}}},
- { 0x1f4a, {1, {0x1f42}}},
- { 0x1f4b, {1, {0x1f43}}},
- { 0x1f4c, {1, {0x1f44}}},
- { 0x1f4d, {1, {0x1f45}}},
- { 0x1f50, {2, {0x03c5, 0x0313}}},
- { 0x1f52, {3, {0x03c5, 0x0313, 0x0300}}},
- { 0x1f54, {3, {0x03c5, 0x0313, 0x0301}}},
- { 0x1f56, {3, {0x03c5, 0x0313, 0x0342}}},
- { 0x1f59, {1, {0x1f51}}},
- { 0x1f5b, {1, {0x1f53}}},
- { 0x1f5d, {1, {0x1f55}}},
- { 0x1f5f, {1, {0x1f57}}},
- { 0x1f68, {1, {0x1f60}}},
- { 0x1f69, {1, {0x1f61}}},
- { 0x1f6a, {1, {0x1f62}}},
- { 0x1f6b, {1, {0x1f63}}},
- { 0x1f6c, {1, {0x1f64}}},
- { 0x1f6d, {1, {0x1f65}}},
- { 0x1f6e, {1, {0x1f66}}},
- { 0x1f6f, {1, {0x1f67}}},
- { 0x1f80, {2, {0x1f00, 0x03b9}}},
- { 0x1f81, {2, {0x1f01, 0x03b9}}},
- { 0x1f82, {2, {0x1f02, 0x03b9}}},
- { 0x1f83, {2, {0x1f03, 0x03b9}}},
- { 0x1f84, {2, {0x1f04, 0x03b9}}},
- { 0x1f85, {2, {0x1f05, 0x03b9}}},
- { 0x1f86, {2, {0x1f06, 0x03b9}}},
- { 0x1f87, {2, {0x1f07, 0x03b9}}},
- { 0x1f88, {2, {0x1f00, 0x03b9}}},
- { 0x1f89, {2, {0x1f01, 0x03b9}}},
- { 0x1f8a, {2, {0x1f02, 0x03b9}}},
- { 0x1f8b, {2, {0x1f03, 0x03b9}}},
- { 0x1f8c, {2, {0x1f04, 0x03b9}}},
- { 0x1f8d, {2, {0x1f05, 0x03b9}}},
- { 0x1f8e, {2, {0x1f06, 0x03b9}}},
- { 0x1f8f, {2, {0x1f07, 0x03b9}}},
- { 0x1f90, {2, {0x1f20, 0x03b9}}},
- { 0x1f91, {2, {0x1f21, 0x03b9}}},
- { 0x1f92, {2, {0x1f22, 0x03b9}}},
- { 0x1f93, {2, {0x1f23, 0x03b9}}},
- { 0x1f94, {2, {0x1f24, 0x03b9}}},
- { 0x1f95, {2, {0x1f25, 0x03b9}}},
- { 0x1f96, {2, {0x1f26, 0x03b9}}},
- { 0x1f97, {2, {0x1f27, 0x03b9}}},
- { 0x1f98, {2, {0x1f20, 0x03b9}}},
- { 0x1f99, {2, {0x1f21, 0x03b9}}},
- { 0x1f9a, {2, {0x1f22, 0x03b9}}},
- { 0x1f9b, {2, {0x1f23, 0x03b9}}},
- { 0x1f9c, {2, {0x1f24, 0x03b9}}},
- { 0x1f9d, {2, {0x1f25, 0x03b9}}},
- { 0x1f9e, {2, {0x1f26, 0x03b9}}},
- { 0x1f9f, {2, {0x1f27, 0x03b9}}},
- { 0x1fa0, {2, {0x1f60, 0x03b9}}},
- { 0x1fa1, {2, {0x1f61, 0x03b9}}},
- { 0x1fa2, {2, {0x1f62, 0x03b9}}},
- { 0x1fa3, {2, {0x1f63, 0x03b9}}},
- { 0x1fa4, {2, {0x1f64, 0x03b9}}},
- { 0x1fa5, {2, {0x1f65, 0x03b9}}},
- { 0x1fa6, {2, {0x1f66, 0x03b9}}},
- { 0x1fa7, {2, {0x1f67, 0x03b9}}},
- { 0x1fa8, {2, {0x1f60, 0x03b9}}},
- { 0x1fa9, {2, {0x1f61, 0x03b9}}},
- { 0x1faa, {2, {0x1f62, 0x03b9}}},
- { 0x1fab, {2, {0x1f63, 0x03b9}}},
- { 0x1fac, {2, {0x1f64, 0x03b9}}},
- { 0x1fad, {2, {0x1f65, 0x03b9}}},
- { 0x1fae, {2, {0x1f66, 0x03b9}}},
- { 0x1faf, {2, {0x1f67, 0x03b9}}},
- { 0x1fb2, {2, {0x1f70, 0x03b9}}},
- { 0x1fb3, {2, {0x03b1, 0x03b9}}},
- { 0x1fb4, {2, {0x03ac, 0x03b9}}},
- { 0x1fb6, {2, {0x03b1, 0x0342}}},
- { 0x1fb7, {3, {0x03b1, 0x0342, 0x03b9}}},
- { 0x1fb8, {1, {0x1fb0}}},
- { 0x1fb9, {1, {0x1fb1}}},
- { 0x1fba, {1, {0x1f70}}},
- { 0x1fbb, {1, {0x1f71}}},
- { 0x1fbc, {2, {0x03b1, 0x03b9}}},
- { 0x1fbe, {1, {0x03b9}}},
- { 0x1fc2, {2, {0x1f74, 0x03b9}}},
- { 0x1fc3, {2, {0x03b7, 0x03b9}}},
- { 0x1fc4, {2, {0x03ae, 0x03b9}}},
- { 0x1fc6, {2, {0x03b7, 0x0342}}},
- { 0x1fc7, {3, {0x03b7, 0x0342, 0x03b9}}},
- { 0x1fc8, {1, {0x1f72}}},
- { 0x1fc9, {1, {0x1f73}}},
- { 0x1fca, {1, {0x1f74}}},
- { 0x1fcb, {1, {0x1f75}}},
- { 0x1fcc, {2, {0x03b7, 0x03b9}}},
- { 0x1fd2, {3, {0x03b9, 0x0308, 0x0300}}},
- { 0x1fd3, {3, {0x03b9, 0x0308, 0x0301}}},
- { 0x1fd6, {2, {0x03b9, 0x0342}}},
- { 0x1fd7, {3, {0x03b9, 0x0308, 0x0342}}},
- { 0x1fd8, {1, {0x1fd0}}},
- { 0x1fd9, {1, {0x1fd1}}},
- { 0x1fda, {1, {0x1f76}}},
- { 0x1fdb, {1, {0x1f77}}},
- { 0x1fe2, {3, {0x03c5, 0x0308, 0x0300}}},
- { 0x1fe3, {3, {0x03c5, 0x0308, 0x0301}}},
- { 0x1fe4, {2, {0x03c1, 0x0313}}},
- { 0x1fe6, {2, {0x03c5, 0x0342}}},
- { 0x1fe7, {3, {0x03c5, 0x0308, 0x0342}}},
- { 0x1fe8, {1, {0x1fe0}}},
- { 0x1fe9, {1, {0x1fe1}}},
- { 0x1fea, {1, {0x1f7a}}},
- { 0x1feb, {1, {0x1f7b}}},
- { 0x1fec, {1, {0x1fe5}}},
- { 0x1ff2, {2, {0x1f7c, 0x03b9}}},
- { 0x1ff3, {2, {0x03c9, 0x03b9}}},
- { 0x1ff4, {2, {0x03ce, 0x03b9}}},
- { 0x1ff6, {2, {0x03c9, 0x0342}}},
- { 0x1ff7, {3, {0x03c9, 0x0342, 0x03b9}}},
- { 0x1ff8, {1, {0x1f78}}},
- { 0x1ff9, {1, {0x1f79}}},
- { 0x1ffa, {1, {0x1f7c}}},
- { 0x1ffb, {1, {0x1f7d}}},
- { 0x1ffc, {2, {0x03c9, 0x03b9}}},
- { 0x2126, {1, {0x03c9}}},
- { 0x212a, {1, {0x006b}}},
- { 0x212b, {1, {0x00e5}}},
- { 0x2160, {1, {0x2170}}},
- { 0x2161, {1, {0x2171}}},
- { 0x2162, {1, {0x2172}}},
- { 0x2163, {1, {0x2173}}},
- { 0x2164, {1, {0x2174}}},
- { 0x2165, {1, {0x2175}}},
- { 0x2166, {1, {0x2176}}},
- { 0x2167, {1, {0x2177}}},
- { 0x2168, {1, {0x2178}}},
- { 0x2169, {1, {0x2179}}},
- { 0x216a, {1, {0x217a}}},
- { 0x216b, {1, {0x217b}}},
- { 0x216c, {1, {0x217c}}},
- { 0x216d, {1, {0x217d}}},
- { 0x216e, {1, {0x217e}}},
- { 0x216f, {1, {0x217f}}},
- { 0x24b6, {1, {0x24d0}}},
- { 0x24b7, {1, {0x24d1}}},
- { 0x24b8, {1, {0x24d2}}},
- { 0x24b9, {1, {0x24d3}}},
- { 0x24ba, {1, {0x24d4}}},
- { 0x24bb, {1, {0x24d5}}},
- { 0x24bc, {1, {0x24d6}}},
- { 0x24bd, {1, {0x24d7}}},
- { 0x24be, {1, {0x24d8}}},
- { 0x24bf, {1, {0x24d9}}},
- { 0x24c0, {1, {0x24da}}},
- { 0x24c1, {1, {0x24db}}},
- { 0x24c2, {1, {0x24dc}}},
- { 0x24c3, {1, {0x24dd}}},
- { 0x24c4, {1, {0x24de}}},
- { 0x24c5, {1, {0x24df}}},
- { 0x24c6, {1, {0x24e0}}},
- { 0x24c7, {1, {0x24e1}}},
- { 0x24c8, {1, {0x24e2}}},
- { 0x24c9, {1, {0x24e3}}},
- { 0x24ca, {1, {0x24e4}}},
- { 0x24cb, {1, {0x24e5}}},
- { 0x24cc, {1, {0x24e6}}},
- { 0x24cd, {1, {0x24e7}}},
- { 0x24ce, {1, {0x24e8}}},
- { 0x24cf, {1, {0x24e9}}},
- { 0x2c00, {1, {0x2c30}}},
- { 0x2c01, {1, {0x2c31}}},
- { 0x2c02, {1, {0x2c32}}},
- { 0x2c03, {1, {0x2c33}}},
- { 0x2c04, {1, {0x2c34}}},
- { 0x2c05, {1, {0x2c35}}},
- { 0x2c06, {1, {0x2c36}}},
- { 0x2c07, {1, {0x2c37}}},
- { 0x2c08, {1, {0x2c38}}},
- { 0x2c09, {1, {0x2c39}}},
- { 0x2c0a, {1, {0x2c3a}}},
- { 0x2c0b, {1, {0x2c3b}}},
- { 0x2c0c, {1, {0x2c3c}}},
- { 0x2c0d, {1, {0x2c3d}}},
- { 0x2c0e, {1, {0x2c3e}}},
- { 0x2c0f, {1, {0x2c3f}}},
- { 0x2c10, {1, {0x2c40}}},
- { 0x2c11, {1, {0x2c41}}},
- { 0x2c12, {1, {0x2c42}}},
- { 0x2c13, {1, {0x2c43}}},
- { 0x2c14, {1, {0x2c44}}},
- { 0x2c15, {1, {0x2c45}}},
- { 0x2c16, {1, {0x2c46}}},
- { 0x2c17, {1, {0x2c47}}},
- { 0x2c18, {1, {0x2c48}}},
- { 0x2c19, {1, {0x2c49}}},
- { 0x2c1a, {1, {0x2c4a}}},
- { 0x2c1b, {1, {0x2c4b}}},
- { 0x2c1c, {1, {0x2c4c}}},
- { 0x2c1d, {1, {0x2c4d}}},
- { 0x2c1e, {1, {0x2c4e}}},
- { 0x2c1f, {1, {0x2c4f}}},
- { 0x2c20, {1, {0x2c50}}},
- { 0x2c21, {1, {0x2c51}}},
- { 0x2c22, {1, {0x2c52}}},
- { 0x2c23, {1, {0x2c53}}},
- { 0x2c24, {1, {0x2c54}}},
- { 0x2c25, {1, {0x2c55}}},
- { 0x2c26, {1, {0x2c56}}},
- { 0x2c27, {1, {0x2c57}}},
- { 0x2c28, {1, {0x2c58}}},
- { 0x2c29, {1, {0x2c59}}},
- { 0x2c2a, {1, {0x2c5a}}},
- { 0x2c2b, {1, {0x2c5b}}},
- { 0x2c2c, {1, {0x2c5c}}},
- { 0x2c2d, {1, {0x2c5d}}},
- { 0x2c2e, {1, {0x2c5e}}},
- { 0x2c80, {1, {0x2c81}}},
- { 0x2c82, {1, {0x2c83}}},
- { 0x2c84, {1, {0x2c85}}},
- { 0x2c86, {1, {0x2c87}}},
- { 0x2c88, {1, {0x2c89}}},
- { 0x2c8a, {1, {0x2c8b}}},
- { 0x2c8c, {1, {0x2c8d}}},
- { 0x2c8e, {1, {0x2c8f}}},
- { 0x2c90, {1, {0x2c91}}},
- { 0x2c92, {1, {0x2c93}}},
- { 0x2c94, {1, {0x2c95}}},
- { 0x2c96, {1, {0x2c97}}},
- { 0x2c98, {1, {0x2c99}}},
- { 0x2c9a, {1, {0x2c9b}}},
- { 0x2c9c, {1, {0x2c9d}}},
- { 0x2c9e, {1, {0x2c9f}}},
- { 0x2ca0, {1, {0x2ca1}}},
- { 0x2ca2, {1, {0x2ca3}}},
- { 0x2ca4, {1, {0x2ca5}}},
- { 0x2ca6, {1, {0x2ca7}}},
- { 0x2ca8, {1, {0x2ca9}}},
- { 0x2caa, {1, {0x2cab}}},
- { 0x2cac, {1, {0x2cad}}},
- { 0x2cae, {1, {0x2caf}}},
- { 0x2cb0, {1, {0x2cb1}}},
- { 0x2cb2, {1, {0x2cb3}}},
- { 0x2cb4, {1, {0x2cb5}}},
- { 0x2cb6, {1, {0x2cb7}}},
- { 0x2cb8, {1, {0x2cb9}}},
- { 0x2cba, {1, {0x2cbb}}},
- { 0x2cbc, {1, {0x2cbd}}},
- { 0x2cbe, {1, {0x2cbf}}},
- { 0x2cc0, {1, {0x2cc1}}},
- { 0x2cc2, {1, {0x2cc3}}},
- { 0x2cc4, {1, {0x2cc5}}},
- { 0x2cc6, {1, {0x2cc7}}},
- { 0x2cc8, {1, {0x2cc9}}},
- { 0x2cca, {1, {0x2ccb}}},
- { 0x2ccc, {1, {0x2ccd}}},
- { 0x2cce, {1, {0x2ccf}}},
- { 0x2cd0, {1, {0x2cd1}}},
- { 0x2cd2, {1, {0x2cd3}}},
- { 0x2cd4, {1, {0x2cd5}}},
- { 0x2cd6, {1, {0x2cd7}}},
- { 0x2cd8, {1, {0x2cd9}}},
- { 0x2cda, {1, {0x2cdb}}},
- { 0x2cdc, {1, {0x2cdd}}},
- { 0x2cde, {1, {0x2cdf}}},
- { 0x2ce0, {1, {0x2ce1}}},
- { 0x2ce2, {1, {0x2ce3}}},
- { 0xfb00, {2, {0x0066, 0x0066}}},
- { 0xfb01, {2, {0x0066, 0x0069}}},
- { 0xfb02, {2, {0x0066, 0x006c}}},
- { 0xfb03, {3, {0x0066, 0x0066, 0x0069}}},
- { 0xfb04, {3, {0x0066, 0x0066, 0x006c}}},
- { 0xfb05, {2, {0x0073, 0x0074}}},
- { 0xfb06, {2, {0x0073, 0x0074}}},
- { 0xfb13, {2, {0x0574, 0x0576}}},
- { 0xfb14, {2, {0x0574, 0x0565}}},
- { 0xfb15, {2, {0x0574, 0x056b}}},
- { 0xfb16, {2, {0x057e, 0x0576}}},
- { 0xfb17, {2, {0x0574, 0x056d}}},
- { 0xff21, {1, {0xff41}}},
- { 0xff22, {1, {0xff42}}},
- { 0xff23, {1, {0xff43}}},
- { 0xff24, {1, {0xff44}}},
- { 0xff25, {1, {0xff45}}},
- { 0xff26, {1, {0xff46}}},
- { 0xff27, {1, {0xff47}}},
- { 0xff28, {1, {0xff48}}},
- { 0xff29, {1, {0xff49}}},
- { 0xff2a, {1, {0xff4a}}},
- { 0xff2b, {1, {0xff4b}}},
- { 0xff2c, {1, {0xff4c}}},
- { 0xff2d, {1, {0xff4d}}},
- { 0xff2e, {1, {0xff4e}}},
- { 0xff2f, {1, {0xff4f}}},
- { 0xff30, {1, {0xff50}}},
- { 0xff31, {1, {0xff51}}},
- { 0xff32, {1, {0xff52}}},
- { 0xff33, {1, {0xff53}}},
- { 0xff34, {1, {0xff54}}},
- { 0xff35, {1, {0xff55}}},
- { 0xff36, {1, {0xff56}}},
- { 0xff37, {1, {0xff57}}},
- { 0xff38, {1, {0xff58}}},
- { 0xff39, {1, {0xff59}}},
- { 0xff3a, {1, {0xff5a}}},
- { 0x10400, {1, {0x10428}}},
- { 0x10401, {1, {0x10429}}},
- { 0x10402, {1, {0x1042a}}},
- { 0x10403, {1, {0x1042b}}},
- { 0x10404, {1, {0x1042c}}},
- { 0x10405, {1, {0x1042d}}},
- { 0x10406, {1, {0x1042e}}},
- { 0x10407, {1, {0x1042f}}},
- { 0x10408, {1, {0x10430}}},
- { 0x10409, {1, {0x10431}}},
- { 0x1040a, {1, {0x10432}}},
- { 0x1040b, {1, {0x10433}}},
- { 0x1040c, {1, {0x10434}}},
- { 0x1040d, {1, {0x10435}}},
- { 0x1040e, {1, {0x10436}}},
- { 0x1040f, {1, {0x10437}}},
- { 0x10410, {1, {0x10438}}},
- { 0x10411, {1, {0x10439}}},
- { 0x10412, {1, {0x1043a}}},
- { 0x10413, {1, {0x1043b}}},
- { 0x10414, {1, {0x1043c}}},
- { 0x10415, {1, {0x1043d}}},
- { 0x10416, {1, {0x1043e}}},
- { 0x10417, {1, {0x1043f}}},
- { 0x10418, {1, {0x10440}}},
- { 0x10419, {1, {0x10441}}},
- { 0x1041a, {1, {0x10442}}},
- { 0x1041b, {1, {0x10443}}},
- { 0x1041c, {1, {0x10444}}},
- { 0x1041d, {1, {0x10445}}},
- { 0x1041e, {1, {0x10446}}},
- { 0x1041f, {1, {0x10447}}},
- { 0x10420, {1, {0x10448}}},
- { 0x10421, {1, {0x10449}}},
- { 0x10422, {1, {0x1044a}}},
- { 0x10423, {1, {0x1044b}}},
- { 0x10424, {1, {0x1044c}}},
- { 0x10425, {1, {0x1044d}}},
- { 0x10426, {1, {0x1044e}}},
- { 0x10427, {1, {0x1044f}}}
-};
-
-static const CaseFold_11_Type CaseFold_Locale[] = {
- { 0x0049, {1, {0x0069}}},
- { 0x0130, {2, {0x0069, 0x0307}}}
-};
-
-static const CaseUnfold_11_Type CaseUnfold_11[] = {
- { 0x0061, {1, {0x0041 }}},
- { 0x0062, {1, {0x0042 }}},
- { 0x0063, {1, {0x0043 }}},
- { 0x0064, {1, {0x0044 }}},
- { 0x0065, {1, {0x0045 }}},
- { 0x0066, {1, {0x0046 }}},
- { 0x0067, {1, {0x0047 }}},
- { 0x0068, {1, {0x0048 }}},
- { 0x006a, {1, {0x004a }}},
- { 0x006b, {2, {0x212a, 0x004b }}},
- { 0x006c, {1, {0x004c }}},
- { 0x006d, {1, {0x004d }}},
- { 0x006e, {1, {0x004e }}},
- { 0x006f, {1, {0x004f }}},
- { 0x0070, {1, {0x0050 }}},
- { 0x0071, {1, {0x0051 }}},
- { 0x0072, {1, {0x0052 }}},
- { 0x0073, {2, {0x0053, 0x017f }}},
- { 0x0074, {1, {0x0054 }}},
- { 0x0075, {1, {0x0055 }}},
- { 0x0076, {1, {0x0056 }}},
- { 0x0077, {1, {0x0057 }}},
- { 0x0078, {1, {0x0058 }}},
- { 0x0079, {1, {0x0059 }}},
- { 0x007a, {1, {0x005a }}},
- { 0x00e0, {1, {0x00c0 }}},
- { 0x00e1, {1, {0x00c1 }}},
- { 0x00e2, {1, {0x00c2 }}},
- { 0x00e3, {1, {0x00c3 }}},
- { 0x00e4, {1, {0x00c4 }}},
- { 0x00e5, {2, {0x212b, 0x00c5 }}},
- { 0x00e6, {1, {0x00c6 }}},
- { 0x00e7, {1, {0x00c7 }}},
- { 0x00e8, {1, {0x00c8 }}},
- { 0x00e9, {1, {0x00c9 }}},
- { 0x00ea, {1, {0x00ca }}},
- { 0x00eb, {1, {0x00cb }}},
- { 0x00ec, {1, {0x00cc }}},
- { 0x00ed, {1, {0x00cd }}},
- { 0x00ee, {1, {0x00ce }}},
- { 0x00ef, {1, {0x00cf }}},
- { 0x00f0, {1, {0x00d0 }}},
- { 0x00f1, {1, {0x00d1 }}},
- { 0x00f2, {1, {0x00d2 }}},
- { 0x00f3, {1, {0x00d3 }}},
- { 0x00f4, {1, {0x00d4 }}},
- { 0x00f5, {1, {0x00d5 }}},
- { 0x00f6, {1, {0x00d6 }}},
- { 0x00f8, {1, {0x00d8 }}},
- { 0x00f9, {1, {0x00d9 }}},
- { 0x00fa, {1, {0x00da }}},
- { 0x00fb, {1, {0x00db }}},
- { 0x00fc, {1, {0x00dc }}},
- { 0x00fd, {1, {0x00dd }}},
- { 0x00fe, {1, {0x00de }}},
- { 0x00ff, {1, {0x0178 }}},
- { 0x0101, {1, {0x0100 }}},
- { 0x0103, {1, {0x0102 }}},
- { 0x0105, {1, {0x0104 }}},
- { 0x0107, {1, {0x0106 }}},
- { 0x0109, {1, {0x0108 }}},
- { 0x010b, {1, {0x010a }}},
- { 0x010d, {1, {0x010c }}},
- { 0x010f, {1, {0x010e }}},
- { 0x0111, {1, {0x0110 }}},
- { 0x0113, {1, {0x0112 }}},
- { 0x0115, {1, {0x0114 }}},
- { 0x0117, {1, {0x0116 }}},
- { 0x0119, {1, {0x0118 }}},
- { 0x011b, {1, {0x011a }}},
- { 0x011d, {1, {0x011c }}},
- { 0x011f, {1, {0x011e }}},
- { 0x0121, {1, {0x0120 }}},
- { 0x0123, {1, {0x0122 }}},
- { 0x0125, {1, {0x0124 }}},
- { 0x0127, {1, {0x0126 }}},
- { 0x0129, {1, {0x0128 }}},
- { 0x012b, {1, {0x012a }}},
- { 0x012d, {1, {0x012c }}},
- { 0x012f, {1, {0x012e }}},
- { 0x0133, {1, {0x0132 }}},
- { 0x0135, {1, {0x0134 }}},
- { 0x0137, {1, {0x0136 }}},
- { 0x013a, {1, {0x0139 }}},
- { 0x013c, {1, {0x013b }}},
- { 0x013e, {1, {0x013d }}},
- { 0x0140, {1, {0x013f }}},
- { 0x0142, {1, {0x0141 }}},
- { 0x0144, {1, {0x0143 }}},
- { 0x0146, {1, {0x0145 }}},
- { 0x0148, {1, {0x0147 }}},
- { 0x014b, {1, {0x014a }}},
- { 0x014d, {1, {0x014c }}},
- { 0x014f, {1, {0x014e }}},
- { 0x0151, {1, {0x0150 }}},
- { 0x0153, {1, {0x0152 }}},
- { 0x0155, {1, {0x0154 }}},
- { 0x0157, {1, {0x0156 }}},
- { 0x0159, {1, {0x0158 }}},
- { 0x015b, {1, {0x015a }}},
- { 0x015d, {1, {0x015c }}},
- { 0x015f, {1, {0x015e }}},
- { 0x0161, {1, {0x0160 }}},
- { 0x0163, {1, {0x0162 }}},
- { 0x0165, {1, {0x0164 }}},
- { 0x0167, {1, {0x0166 }}},
- { 0x0169, {1, {0x0168 }}},
- { 0x016b, {1, {0x016a }}},
- { 0x016d, {1, {0x016c }}},
- { 0x016f, {1, {0x016e }}},
- { 0x0171, {1, {0x0170 }}},
- { 0x0173, {1, {0x0172 }}},
- { 0x0175, {1, {0x0174 }}},
- { 0x0177, {1, {0x0176 }}},
- { 0x017a, {1, {0x0179 }}},
- { 0x017c, {1, {0x017b }}},
- { 0x017e, {1, {0x017d }}},
- { 0x0183, {1, {0x0182 }}},
- { 0x0185, {1, {0x0184 }}},
- { 0x0188, {1, {0x0187 }}},
- { 0x018c, {1, {0x018b }}},
- { 0x0192, {1, {0x0191 }}},
- { 0x0195, {1, {0x01f6 }}},
- { 0x0199, {1, {0x0198 }}},
- { 0x019a, {1, {0x023d }}},
- { 0x019e, {1, {0x0220 }}},
- { 0x01a1, {1, {0x01a0 }}},
- { 0x01a3, {1, {0x01a2 }}},
- { 0x01a5, {1, {0x01a4 }}},
- { 0x01a8, {1, {0x01a7 }}},
- { 0x01ad, {1, {0x01ac }}},
- { 0x01b0, {1, {0x01af }}},
- { 0x01b4, {1, {0x01b3 }}},
- { 0x01b6, {1, {0x01b5 }}},
- { 0x01b9, {1, {0x01b8 }}},
- { 0x01bd, {1, {0x01bc }}},
- { 0x01bf, {1, {0x01f7 }}},
- { 0x01c6, {2, {0x01c4, 0x01c5 }}},
- { 0x01c9, {2, {0x01c7, 0x01c8 }}},
- { 0x01cc, {2, {0x01ca, 0x01cb }}},
- { 0x01ce, {1, {0x01cd }}},
- { 0x01d0, {1, {0x01cf }}},
- { 0x01d2, {1, {0x01d1 }}},
- { 0x01d4, {1, {0x01d3 }}},
- { 0x01d6, {1, {0x01d5 }}},
- { 0x01d8, {1, {0x01d7 }}},
- { 0x01da, {1, {0x01d9 }}},
- { 0x01dc, {1, {0x01db }}},
- { 0x01dd, {1, {0x018e }}},
- { 0x01df, {1, {0x01de }}},
- { 0x01e1, {1, {0x01e0 }}},
- { 0x01e3, {1, {0x01e2 }}},
- { 0x01e5, {1, {0x01e4 }}},
- { 0x01e7, {1, {0x01e6 }}},
- { 0x01e9, {1, {0x01e8 }}},
- { 0x01eb, {1, {0x01ea }}},
- { 0x01ed, {1, {0x01ec }}},
- { 0x01ef, {1, {0x01ee }}},
- { 0x01f3, {2, {0x01f1, 0x01f2 }}},
- { 0x01f5, {1, {0x01f4 }}},
- { 0x01f9, {1, {0x01f8 }}},
- { 0x01fb, {1, {0x01fa }}},
- { 0x01fd, {1, {0x01fc }}},
- { 0x01ff, {1, {0x01fe }}},
- { 0x0201, {1, {0x0200 }}},
- { 0x0203, {1, {0x0202 }}},
- { 0x0205, {1, {0x0204 }}},
- { 0x0207, {1, {0x0206 }}},
- { 0x0209, {1, {0x0208 }}},
- { 0x020b, {1, {0x020a }}},
- { 0x020d, {1, {0x020c }}},
- { 0x020f, {1, {0x020e }}},
- { 0x0211, {1, {0x0210 }}},
- { 0x0213, {1, {0x0212 }}},
- { 0x0215, {1, {0x0214 }}},
- { 0x0217, {1, {0x0216 }}},
- { 0x0219, {1, {0x0218 }}},
- { 0x021b, {1, {0x021a }}},
- { 0x021d, {1, {0x021c }}},
- { 0x021f, {1, {0x021e }}},
- { 0x0223, {1, {0x0222 }}},
- { 0x0225, {1, {0x0224 }}},
- { 0x0227, {1, {0x0226 }}},
- { 0x0229, {1, {0x0228 }}},
- { 0x022b, {1, {0x022a }}},
- { 0x022d, {1, {0x022c }}},
- { 0x022f, {1, {0x022e }}},
- { 0x0231, {1, {0x0230 }}},
- { 0x0233, {1, {0x0232 }}},
- { 0x023c, {1, {0x023b }}},
- { 0x0253, {1, {0x0181 }}},
- { 0x0254, {1, {0x0186 }}},
- { 0x0256, {1, {0x0189 }}},
- { 0x0257, {1, {0x018a }}},
- { 0x0259, {1, {0x018f }}},
- { 0x025b, {1, {0x0190 }}},
- { 0x0260, {1, {0x0193 }}},
- { 0x0263, {1, {0x0194 }}},
- { 0x0268, {1, {0x0197 }}},
- { 0x0269, {1, {0x0196 }}},
- { 0x026f, {1, {0x019c }}},
- { 0x0272, {1, {0x019d }}},
- { 0x0275, {1, {0x019f }}},
- { 0x0280, {1, {0x01a6 }}},
- { 0x0283, {1, {0x01a9 }}},
- { 0x0288, {1, {0x01ae }}},
- { 0x028a, {1, {0x01b1 }}},
- { 0x028b, {1, {0x01b2 }}},
- { 0x0292, {1, {0x01b7 }}},
- { 0x0294, {1, {0x0241 }}},
- { 0x03ac, {1, {0x0386 }}},
- { 0x03ad, {1, {0x0388 }}},
- { 0x03ae, {1, {0x0389 }}},
- { 0x03af, {1, {0x038a }}},
- { 0x03b1, {1, {0x0391 }}},
- { 0x03b2, {2, {0x0392, 0x03d0 }}},
- { 0x03b3, {1, {0x0393 }}},
- { 0x03b4, {1, {0x0394 }}},
- { 0x03b5, {2, {0x03f5, 0x0395 }}},
- { 0x03b6, {1, {0x0396 }}},
- { 0x03b7, {1, {0x0397 }}},
- { 0x03b8, {3, {0x03f4, 0x0398, 0x03d1 }}},
- { 0x03b9, {3, {0x1fbe, 0x0399, 0x0345 }}},
- { 0x03ba, {2, {0x03f0, 0x039a }}},
- { 0x03bb, {1, {0x039b }}},
- { 0x03bc, {2, {0x00b5, 0x039c }}},
- { 0x03bd, {1, {0x039d }}},
- { 0x03be, {1, {0x039e }}},
- { 0x03bf, {1, {0x039f }}},
- { 0x03c0, {2, {0x03a0, 0x03d6 }}},
- { 0x03c1, {2, {0x03f1, 0x03a1 }}},
- { 0x03c3, {2, {0x03a3, 0x03c2 }}},
- { 0x03c4, {1, {0x03a4 }}},
- { 0x03c5, {1, {0x03a5 }}},
- { 0x03c6, {2, {0x03a6, 0x03d5 }}},
- { 0x03c7, {1, {0x03a7 }}},
- { 0x03c8, {1, {0x03a8 }}},
- { 0x03c9, {2, {0x03a9, 0x2126 }}},
- { 0x03ca, {1, {0x03aa }}},
- { 0x03cb, {1, {0x03ab }}},
- { 0x03cc, {1, {0x038c }}},
- { 0x03cd, {1, {0x038e }}},
- { 0x03ce, {1, {0x038f }}},
- { 0x03d9, {1, {0x03d8 }}},
- { 0x03db, {1, {0x03da }}},
- { 0x03dd, {1, {0x03dc }}},
- { 0x03df, {1, {0x03de }}},
- { 0x03e1, {1, {0x03e0 }}},
- { 0x03e3, {1, {0x03e2 }}},
- { 0x03e5, {1, {0x03e4 }}},
- { 0x03e7, {1, {0x03e6 }}},
- { 0x03e9, {1, {0x03e8 }}},
- { 0x03eb, {1, {0x03ea }}},
- { 0x03ed, {1, {0x03ec }}},
- { 0x03ef, {1, {0x03ee }}},
- { 0x03f2, {1, {0x03f9 }}},
- { 0x03f8, {1, {0x03f7 }}},
- { 0x03fb, {1, {0x03fa }}},
- { 0x0430, {1, {0x0410 }}},
- { 0x0431, {1, {0x0411 }}},
- { 0x0432, {1, {0x0412 }}},
- { 0x0433, {1, {0x0413 }}},
- { 0x0434, {1, {0x0414 }}},
- { 0x0435, {1, {0x0415 }}},
- { 0x0436, {1, {0x0416 }}},
- { 0x0437, {1, {0x0417 }}},
- { 0x0438, {1, {0x0418 }}},
- { 0x0439, {1, {0x0419 }}},
- { 0x043a, {1, {0x041a }}},
- { 0x043b, {1, {0x041b }}},
- { 0x043c, {1, {0x041c }}},
- { 0x043d, {1, {0x041d }}},
- { 0x043e, {1, {0x041e }}},
- { 0x043f, {1, {0x041f }}},
- { 0x0440, {1, {0x0420 }}},
- { 0x0441, {1, {0x0421 }}},
- { 0x0442, {1, {0x0422 }}},
- { 0x0443, {1, {0x0423 }}},
- { 0x0444, {1, {0x0424 }}},
- { 0x0445, {1, {0x0425 }}},
- { 0x0446, {1, {0x0426 }}},
- { 0x0447, {1, {0x0427 }}},
- { 0x0448, {1, {0x0428 }}},
- { 0x0449, {1, {0x0429 }}},
- { 0x044a, {1, {0x042a }}},
- { 0x044b, {1, {0x042b }}},
- { 0x044c, {1, {0x042c }}},
- { 0x044d, {1, {0x042d }}},
- { 0x044e, {1, {0x042e }}},
- { 0x044f, {1, {0x042f }}},
- { 0x0450, {1, {0x0400 }}},
- { 0x0451, {1, {0x0401 }}},
- { 0x0452, {1, {0x0402 }}},
- { 0x0453, {1, {0x0403 }}},
- { 0x0454, {1, {0x0404 }}},
- { 0x0455, {1, {0x0405 }}},
- { 0x0456, {1, {0x0406 }}},
- { 0x0457, {1, {0x0407 }}},
- { 0x0458, {1, {0x0408 }}},
- { 0x0459, {1, {0x0409 }}},
- { 0x045a, {1, {0x040a }}},
- { 0x045b, {1, {0x040b }}},
- { 0x045c, {1, {0x040c }}},
- { 0x045d, {1, {0x040d }}},
- { 0x045e, {1, {0x040e }}},
- { 0x045f, {1, {0x040f }}},
- { 0x0461, {1, {0x0460 }}},
- { 0x0463, {1, {0x0462 }}},
- { 0x0465, {1, {0x0464 }}},
- { 0x0467, {1, {0x0466 }}},
- { 0x0469, {1, {0x0468 }}},
- { 0x046b, {1, {0x046a }}},
- { 0x046d, {1, {0x046c }}},
- { 0x046f, {1, {0x046e }}},
- { 0x0471, {1, {0x0470 }}},
- { 0x0473, {1, {0x0472 }}},
- { 0x0475, {1, {0x0474 }}},
- { 0x0477, {1, {0x0476 }}},
- { 0x0479, {1, {0x0478 }}},
- { 0x047b, {1, {0x047a }}},
- { 0x047d, {1, {0x047c }}},
- { 0x047f, {1, {0x047e }}},
- { 0x0481, {1, {0x0480 }}},
- { 0x048b, {1, {0x048a }}},
- { 0x048d, {1, {0x048c }}},
- { 0x048f, {1, {0x048e }}},
- { 0x0491, {1, {0x0490 }}},
- { 0x0493, {1, {0x0492 }}},
- { 0x0495, {1, {0x0494 }}},
- { 0x0497, {1, {0x0496 }}},
- { 0x0499, {1, {0x0498 }}},
- { 0x049b, {1, {0x049a }}},
- { 0x049d, {1, {0x049c }}},
- { 0x049f, {1, {0x049e }}},
- { 0x04a1, {1, {0x04a0 }}},
- { 0x04a3, {1, {0x04a2 }}},
- { 0x04a5, {1, {0x04a4 }}},
- { 0x04a7, {1, {0x04a6 }}},
- { 0x04a9, {1, {0x04a8 }}},
- { 0x04ab, {1, {0x04aa }}},
- { 0x04ad, {1, {0x04ac }}},
- { 0x04af, {1, {0x04ae }}},
- { 0x04b1, {1, {0x04b0 }}},
- { 0x04b3, {1, {0x04b2 }}},
- { 0x04b5, {1, {0x04b4 }}},
- { 0x04b7, {1, {0x04b6 }}},
- { 0x04b9, {1, {0x04b8 }}},
- { 0x04bb, {1, {0x04ba }}},
- { 0x04bd, {1, {0x04bc }}},
- { 0x04bf, {1, {0x04be }}},
- { 0x04c2, {1, {0x04c1 }}},
- { 0x04c4, {1, {0x04c3 }}},
- { 0x04c6, {1, {0x04c5 }}},
- { 0x04c8, {1, {0x04c7 }}},
- { 0x04ca, {1, {0x04c9 }}},
- { 0x04cc, {1, {0x04cb }}},
- { 0x04ce, {1, {0x04cd }}},
- { 0x04d1, {1, {0x04d0 }}},
- { 0x04d3, {1, {0x04d2 }}},
- { 0x04d5, {1, {0x04d4 }}},
- { 0x04d7, {1, {0x04d6 }}},
- { 0x04d9, {1, {0x04d8 }}},
- { 0x04db, {1, {0x04da }}},
- { 0x04dd, {1, {0x04dc }}},
- { 0x04df, {1, {0x04de }}},
- { 0x04e1, {1, {0x04e0 }}},
- { 0x04e3, {1, {0x04e2 }}},
- { 0x04e5, {1, {0x04e4 }}},
- { 0x04e7, {1, {0x04e6 }}},
- { 0x04e9, {1, {0x04e8 }}},
- { 0x04eb, {1, {0x04ea }}},
- { 0x04ed, {1, {0x04ec }}},
- { 0x04ef, {1, {0x04ee }}},
- { 0x04f1, {1, {0x04f0 }}},
- { 0x04f3, {1, {0x04f2 }}},
- { 0x04f5, {1, {0x04f4 }}},
- { 0x04f7, {1, {0x04f6 }}},
- { 0x04f9, {1, {0x04f8 }}},
- { 0x0501, {1, {0x0500 }}},
- { 0x0503, {1, {0x0502 }}},
- { 0x0505, {1, {0x0504 }}},
- { 0x0507, {1, {0x0506 }}},
- { 0x0509, {1, {0x0508 }}},
- { 0x050b, {1, {0x050a }}},
- { 0x050d, {1, {0x050c }}},
- { 0x050f, {1, {0x050e }}},
- { 0x0561, {1, {0x0531 }}},
- { 0x0562, {1, {0x0532 }}},
- { 0x0563, {1, {0x0533 }}},
- { 0x0564, {1, {0x0534 }}},
- { 0x0565, {1, {0x0535 }}},
- { 0x0566, {1, {0x0536 }}},
- { 0x0567, {1, {0x0537 }}},
- { 0x0568, {1, {0x0538 }}},
- { 0x0569, {1, {0x0539 }}},
- { 0x056a, {1, {0x053a }}},
- { 0x056b, {1, {0x053b }}},
- { 0x056c, {1, {0x053c }}},
- { 0x056d, {1, {0x053d }}},
- { 0x056e, {1, {0x053e }}},
- { 0x056f, {1, {0x053f }}},
- { 0x0570, {1, {0x0540 }}},
- { 0x0571, {1, {0x0541 }}},
- { 0x0572, {1, {0x0542 }}},
- { 0x0573, {1, {0x0543 }}},
- { 0x0574, {1, {0x0544 }}},
- { 0x0575, {1, {0x0545 }}},
- { 0x0576, {1, {0x0546 }}},
- { 0x0577, {1, {0x0547 }}},
- { 0x0578, {1, {0x0548 }}},
- { 0x0579, {1, {0x0549 }}},
- { 0x057a, {1, {0x054a }}},
- { 0x057b, {1, {0x054b }}},
- { 0x057c, {1, {0x054c }}},
- { 0x057d, {1, {0x054d }}},
- { 0x057e, {1, {0x054e }}},
- { 0x057f, {1, {0x054f }}},
- { 0x0580, {1, {0x0550 }}},
- { 0x0581, {1, {0x0551 }}},
- { 0x0582, {1, {0x0552 }}},
- { 0x0583, {1, {0x0553 }}},
- { 0x0584, {1, {0x0554 }}},
- { 0x0585, {1, {0x0555 }}},
- { 0x0586, {1, {0x0556 }}},
- { 0x1e01, {1, {0x1e00 }}},
- { 0x1e03, {1, {0x1e02 }}},
- { 0x1e05, {1, {0x1e04 }}},
- { 0x1e07, {1, {0x1e06 }}},
- { 0x1e09, {1, {0x1e08 }}},
- { 0x1e0b, {1, {0x1e0a }}},
- { 0x1e0d, {1, {0x1e0c }}},
- { 0x1e0f, {1, {0x1e0e }}},
- { 0x1e11, {1, {0x1e10 }}},
- { 0x1e13, {1, {0x1e12 }}},
- { 0x1e15, {1, {0x1e14 }}},
- { 0x1e17, {1, {0x1e16 }}},
- { 0x1e19, {1, {0x1e18 }}},
- { 0x1e1b, {1, {0x1e1a }}},
- { 0x1e1d, {1, {0x1e1c }}},
- { 0x1e1f, {1, {0x1e1e }}},
- { 0x1e21, {1, {0x1e20 }}},
- { 0x1e23, {1, {0x1e22 }}},
- { 0x1e25, {1, {0x1e24 }}},
- { 0x1e27, {1, {0x1e26 }}},
- { 0x1e29, {1, {0x1e28 }}},
- { 0x1e2b, {1, {0x1e2a }}},
- { 0x1e2d, {1, {0x1e2c }}},
- { 0x1e2f, {1, {0x1e2e }}},
- { 0x1e31, {1, {0x1e30 }}},
- { 0x1e33, {1, {0x1e32 }}},
- { 0x1e35, {1, {0x1e34 }}},
- { 0x1e37, {1, {0x1e36 }}},
- { 0x1e39, {1, {0x1e38 }}},
- { 0x1e3b, {1, {0x1e3a }}},
- { 0x1e3d, {1, {0x1e3c }}},
- { 0x1e3f, {1, {0x1e3e }}},
- { 0x1e41, {1, {0x1e40 }}},
- { 0x1e43, {1, {0x1e42 }}},
- { 0x1e45, {1, {0x1e44 }}},
- { 0x1e47, {1, {0x1e46 }}},
- { 0x1e49, {1, {0x1e48 }}},
- { 0x1e4b, {1, {0x1e4a }}},
- { 0x1e4d, {1, {0x1e4c }}},
- { 0x1e4f, {1, {0x1e4e }}},
- { 0x1e51, {1, {0x1e50 }}},
- { 0x1e53, {1, {0x1e52 }}},
- { 0x1e55, {1, {0x1e54 }}},
- { 0x1e57, {1, {0x1e56 }}},
- { 0x1e59, {1, {0x1e58 }}},
- { 0x1e5b, {1, {0x1e5a }}},
- { 0x1e5d, {1, {0x1e5c }}},
- { 0x1e5f, {1, {0x1e5e }}},
- { 0x1e61, {2, {0x1e9b, 0x1e60 }}},
- { 0x1e63, {1, {0x1e62 }}},
- { 0x1e65, {1, {0x1e64 }}},
- { 0x1e67, {1, {0x1e66 }}},
- { 0x1e69, {1, {0x1e68 }}},
- { 0x1e6b, {1, {0x1e6a }}},
- { 0x1e6d, {1, {0x1e6c }}},
- { 0x1e6f, {1, {0x1e6e }}},
- { 0x1e71, {1, {0x1e70 }}},
- { 0x1e73, {1, {0x1e72 }}},
- { 0x1e75, {1, {0x1e74 }}},
- { 0x1e77, {1, {0x1e76 }}},
- { 0x1e79, {1, {0x1e78 }}},
- { 0x1e7b, {1, {0x1e7a }}},
- { 0x1e7d, {1, {0x1e7c }}},
- { 0x1e7f, {1, {0x1e7e }}},
- { 0x1e81, {1, {0x1e80 }}},
- { 0x1e83, {1, {0x1e82 }}},
- { 0x1e85, {1, {0x1e84 }}},
- { 0x1e87, {1, {0x1e86 }}},
- { 0x1e89, {1, {0x1e88 }}},
- { 0x1e8b, {1, {0x1e8a }}},
- { 0x1e8d, {1, {0x1e8c }}},
- { 0x1e8f, {1, {0x1e8e }}},
- { 0x1e91, {1, {0x1e90 }}},
- { 0x1e93, {1, {0x1e92 }}},
- { 0x1e95, {1, {0x1e94 }}},
- { 0x1ea1, {1, {0x1ea0 }}},
- { 0x1ea3, {1, {0x1ea2 }}},
- { 0x1ea5, {1, {0x1ea4 }}},
- { 0x1ea7, {1, {0x1ea6 }}},
- { 0x1ea9, {1, {0x1ea8 }}},
- { 0x1eab, {1, {0x1eaa }}},
- { 0x1ead, {1, {0x1eac }}},
- { 0x1eaf, {1, {0x1eae }}},
- { 0x1eb1, {1, {0x1eb0 }}},
- { 0x1eb3, {1, {0x1eb2 }}},
- { 0x1eb5, {1, {0x1eb4 }}},
- { 0x1eb7, {1, {0x1eb6 }}},
- { 0x1eb9, {1, {0x1eb8 }}},
- { 0x1ebb, {1, {0x1eba }}},
- { 0x1ebd, {1, {0x1ebc }}},
- { 0x1ebf, {1, {0x1ebe }}},
- { 0x1ec1, {1, {0x1ec0 }}},
- { 0x1ec3, {1, {0x1ec2 }}},
- { 0x1ec5, {1, {0x1ec4 }}},
- { 0x1ec7, {1, {0x1ec6 }}},
- { 0x1ec9, {1, {0x1ec8 }}},
- { 0x1ecb, {1, {0x1eca }}},
- { 0x1ecd, {1, {0x1ecc }}},
- { 0x1ecf, {1, {0x1ece }}},
- { 0x1ed1, {1, {0x1ed0 }}},
- { 0x1ed3, {1, {0x1ed2 }}},
- { 0x1ed5, {1, {0x1ed4 }}},
- { 0x1ed7, {1, {0x1ed6 }}},
- { 0x1ed9, {1, {0x1ed8 }}},
- { 0x1edb, {1, {0x1eda }}},
- { 0x1edd, {1, {0x1edc }}},
- { 0x1edf, {1, {0x1ede }}},
- { 0x1ee1, {1, {0x1ee0 }}},
- { 0x1ee3, {1, {0x1ee2 }}},
- { 0x1ee5, {1, {0x1ee4 }}},
- { 0x1ee7, {1, {0x1ee6 }}},
- { 0x1ee9, {1, {0x1ee8 }}},
- { 0x1eeb, {1, {0x1eea }}},
- { 0x1eed, {1, {0x1eec }}},
- { 0x1eef, {1, {0x1eee }}},
- { 0x1ef1, {1, {0x1ef0 }}},
- { 0x1ef3, {1, {0x1ef2 }}},
- { 0x1ef5, {1, {0x1ef4 }}},
- { 0x1ef7, {1, {0x1ef6 }}},
- { 0x1ef9, {1, {0x1ef8 }}},
- { 0x1f00, {1, {0x1f08 }}},
- { 0x1f01, {1, {0x1f09 }}},
- { 0x1f02, {1, {0x1f0a }}},
- { 0x1f03, {1, {0x1f0b }}},
- { 0x1f04, {1, {0x1f0c }}},
- { 0x1f05, {1, {0x1f0d }}},
- { 0x1f06, {1, {0x1f0e }}},
- { 0x1f07, {1, {0x1f0f }}},
- { 0x1f10, {1, {0x1f18 }}},
- { 0x1f11, {1, {0x1f19 }}},
- { 0x1f12, {1, {0x1f1a }}},
- { 0x1f13, {1, {0x1f1b }}},
- { 0x1f14, {1, {0x1f1c }}},
- { 0x1f15, {1, {0x1f1d }}},
- { 0x1f20, {1, {0x1f28 }}},
- { 0x1f21, {1, {0x1f29 }}},
- { 0x1f22, {1, {0x1f2a }}},
- { 0x1f23, {1, {0x1f2b }}},
- { 0x1f24, {1, {0x1f2c }}},
- { 0x1f25, {1, {0x1f2d }}},
- { 0x1f26, {1, {0x1f2e }}},
- { 0x1f27, {1, {0x1f2f }}},
- { 0x1f30, {1, {0x1f38 }}},
- { 0x1f31, {1, {0x1f39 }}},
- { 0x1f32, {1, {0x1f3a }}},
- { 0x1f33, {1, {0x1f3b }}},
- { 0x1f34, {1, {0x1f3c }}},
- { 0x1f35, {1, {0x1f3d }}},
- { 0x1f36, {1, {0x1f3e }}},
- { 0x1f37, {1, {0x1f3f }}},
- { 0x1f40, {1, {0x1f48 }}},
- { 0x1f41, {1, {0x1f49 }}},
- { 0x1f42, {1, {0x1f4a }}},
- { 0x1f43, {1, {0x1f4b }}},
- { 0x1f44, {1, {0x1f4c }}},
- { 0x1f45, {1, {0x1f4d }}},
- { 0x1f51, {1, {0x1f59 }}},
- { 0x1f53, {1, {0x1f5b }}},
- { 0x1f55, {1, {0x1f5d }}},
- { 0x1f57, {1, {0x1f5f }}},
- { 0x1f60, {1, {0x1f68 }}},
- { 0x1f61, {1, {0x1f69 }}},
- { 0x1f62, {1, {0x1f6a }}},
- { 0x1f63, {1, {0x1f6b }}},
- { 0x1f64, {1, {0x1f6c }}},
- { 0x1f65, {1, {0x1f6d }}},
- { 0x1f66, {1, {0x1f6e }}},
- { 0x1f67, {1, {0x1f6f }}},
- { 0x1f70, {1, {0x1fba }}},
- { 0x1f71, {1, {0x1fbb }}},
- { 0x1f72, {1, {0x1fc8 }}},
- { 0x1f73, {1, {0x1fc9 }}},
- { 0x1f74, {1, {0x1fca }}},
- { 0x1f75, {1, {0x1fcb }}},
- { 0x1f76, {1, {0x1fda }}},
- { 0x1f77, {1, {0x1fdb }}},
- { 0x1f78, {1, {0x1ff8 }}},
- { 0x1f79, {1, {0x1ff9 }}},
- { 0x1f7a, {1, {0x1fea }}},
- { 0x1f7b, {1, {0x1feb }}},
- { 0x1f7c, {1, {0x1ffa }}},
- { 0x1f7d, {1, {0x1ffb }}},
- { 0x1fb0, {1, {0x1fb8 }}},
- { 0x1fb1, {1, {0x1fb9 }}},
- { 0x1fd0, {1, {0x1fd8 }}},
- { 0x1fd1, {1, {0x1fd9 }}},
- { 0x1fe0, {1, {0x1fe8 }}},
- { 0x1fe1, {1, {0x1fe9 }}},
- { 0x1fe5, {1, {0x1fec }}},
- { 0x2170, {1, {0x2160 }}},
- { 0x2171, {1, {0x2161 }}},
- { 0x2172, {1, {0x2162 }}},
- { 0x2173, {1, {0x2163 }}},
- { 0x2174, {1, {0x2164 }}},
- { 0x2175, {1, {0x2165 }}},
- { 0x2176, {1, {0x2166 }}},
- { 0x2177, {1, {0x2167 }}},
- { 0x2178, {1, {0x2168 }}},
- { 0x2179, {1, {0x2169 }}},
- { 0x217a, {1, {0x216a }}},
- { 0x217b, {1, {0x216b }}},
- { 0x217c, {1, {0x216c }}},
- { 0x217d, {1, {0x216d }}},
- { 0x217e, {1, {0x216e }}},
- { 0x217f, {1, {0x216f }}},
- { 0x24d0, {1, {0x24b6 }}},
- { 0x24d1, {1, {0x24b7 }}},
- { 0x24d2, {1, {0x24b8 }}},
- { 0x24d3, {1, {0x24b9 }}},
- { 0x24d4, {1, {0x24ba }}},
- { 0x24d5, {1, {0x24bb }}},
- { 0x24d6, {1, {0x24bc }}},
- { 0x24d7, {1, {0x24bd }}},
- { 0x24d8, {1, {0x24be }}},
- { 0x24d9, {1, {0x24bf }}},
- { 0x24da, {1, {0x24c0 }}},
- { 0x24db, {1, {0x24c1 }}},
- { 0x24dc, {1, {0x24c2 }}},
- { 0x24dd, {1, {0x24c3 }}},
- { 0x24de, {1, {0x24c4 }}},
- { 0x24df, {1, {0x24c5 }}},
- { 0x24e0, {1, {0x24c6 }}},
- { 0x24e1, {1, {0x24c7 }}},
- { 0x24e2, {1, {0x24c8 }}},
- { 0x24e3, {1, {0x24c9 }}},
- { 0x24e4, {1, {0x24ca }}},
- { 0x24e5, {1, {0x24cb }}},
- { 0x24e6, {1, {0x24cc }}},
- { 0x24e7, {1, {0x24cd }}},
- { 0x24e8, {1, {0x24ce }}},
- { 0x24e9, {1, {0x24cf }}},
- { 0x2c30, {1, {0x2c00 }}},
- { 0x2c31, {1, {0x2c01 }}},
- { 0x2c32, {1, {0x2c02 }}},
- { 0x2c33, {1, {0x2c03 }}},
- { 0x2c34, {1, {0x2c04 }}},
- { 0x2c35, {1, {0x2c05 }}},
- { 0x2c36, {1, {0x2c06 }}},
- { 0x2c37, {1, {0x2c07 }}},
- { 0x2c38, {1, {0x2c08 }}},
- { 0x2c39, {1, {0x2c09 }}},
- { 0x2c3a, {1, {0x2c0a }}},
- { 0x2c3b, {1, {0x2c0b }}},
- { 0x2c3c, {1, {0x2c0c }}},
- { 0x2c3d, {1, {0x2c0d }}},
- { 0x2c3e, {1, {0x2c0e }}},
- { 0x2c3f, {1, {0x2c0f }}},
- { 0x2c40, {1, {0x2c10 }}},
- { 0x2c41, {1, {0x2c11 }}},
- { 0x2c42, {1, {0x2c12 }}},
- { 0x2c43, {1, {0x2c13 }}},
- { 0x2c44, {1, {0x2c14 }}},
- { 0x2c45, {1, {0x2c15 }}},
- { 0x2c46, {1, {0x2c16 }}},
- { 0x2c47, {1, {0x2c17 }}},
- { 0x2c48, {1, {0x2c18 }}},
- { 0x2c49, {1, {0x2c19 }}},
- { 0x2c4a, {1, {0x2c1a }}},
- { 0x2c4b, {1, {0x2c1b }}},
- { 0x2c4c, {1, {0x2c1c }}},
- { 0x2c4d, {1, {0x2c1d }}},
- { 0x2c4e, {1, {0x2c1e }}},
- { 0x2c4f, {1, {0x2c1f }}},
- { 0x2c50, {1, {0x2c20 }}},
- { 0x2c51, {1, {0x2c21 }}},
- { 0x2c52, {1, {0x2c22 }}},
- { 0x2c53, {1, {0x2c23 }}},
- { 0x2c54, {1, {0x2c24 }}},
- { 0x2c55, {1, {0x2c25 }}},
- { 0x2c56, {1, {0x2c26 }}},
- { 0x2c57, {1, {0x2c27 }}},
- { 0x2c58, {1, {0x2c28 }}},
- { 0x2c59, {1, {0x2c29 }}},
- { 0x2c5a, {1, {0x2c2a }}},
- { 0x2c5b, {1, {0x2c2b }}},
- { 0x2c5c, {1, {0x2c2c }}},
- { 0x2c5d, {1, {0x2c2d }}},
- { 0x2c5e, {1, {0x2c2e }}},
- { 0x2c81, {1, {0x2c80 }}},
- { 0x2c83, {1, {0x2c82 }}},
- { 0x2c85, {1, {0x2c84 }}},
- { 0x2c87, {1, {0x2c86 }}},
- { 0x2c89, {1, {0x2c88 }}},
- { 0x2c8b, {1, {0x2c8a }}},
- { 0x2c8d, {1, {0x2c8c }}},
- { 0x2c8f, {1, {0x2c8e }}},
- { 0x2c91, {1, {0x2c90 }}},
- { 0x2c93, {1, {0x2c92 }}},
- { 0x2c95, {1, {0x2c94 }}},
- { 0x2c97, {1, {0x2c96 }}},
- { 0x2c99, {1, {0x2c98 }}},
- { 0x2c9b, {1, {0x2c9a }}},
- { 0x2c9d, {1, {0x2c9c }}},
- { 0x2c9f, {1, {0x2c9e }}},
- { 0x2ca1, {1, {0x2ca0 }}},
- { 0x2ca3, {1, {0x2ca2 }}},
- { 0x2ca5, {1, {0x2ca4 }}},
- { 0x2ca7, {1, {0x2ca6 }}},
- { 0x2ca9, {1, {0x2ca8 }}},
- { 0x2cab, {1, {0x2caa }}},
- { 0x2cad, {1, {0x2cac }}},
- { 0x2caf, {1, {0x2cae }}},
- { 0x2cb1, {1, {0x2cb0 }}},
- { 0x2cb3, {1, {0x2cb2 }}},
- { 0x2cb5, {1, {0x2cb4 }}},
- { 0x2cb7, {1, {0x2cb6 }}},
- { 0x2cb9, {1, {0x2cb8 }}},
- { 0x2cbb, {1, {0x2cba }}},
- { 0x2cbd, {1, {0x2cbc }}},
- { 0x2cbf, {1, {0x2cbe }}},
- { 0x2cc1, {1, {0x2cc0 }}},
- { 0x2cc3, {1, {0x2cc2 }}},
- { 0x2cc5, {1, {0x2cc4 }}},
- { 0x2cc7, {1, {0x2cc6 }}},
- { 0x2cc9, {1, {0x2cc8 }}},
- { 0x2ccb, {1, {0x2cca }}},
- { 0x2ccd, {1, {0x2ccc }}},
- { 0x2ccf, {1, {0x2cce }}},
- { 0x2cd1, {1, {0x2cd0 }}},
- { 0x2cd3, {1, {0x2cd2 }}},
- { 0x2cd5, {1, {0x2cd4 }}},
- { 0x2cd7, {1, {0x2cd6 }}},
- { 0x2cd9, {1, {0x2cd8 }}},
- { 0x2cdb, {1, {0x2cda }}},
- { 0x2cdd, {1, {0x2cdc }}},
- { 0x2cdf, {1, {0x2cde }}},
- { 0x2ce1, {1, {0x2ce0 }}},
- { 0x2ce3, {1, {0x2ce2 }}},
- { 0x2d00, {1, {0x10a0 }}},
- { 0x2d01, {1, {0x10a1 }}},
- { 0x2d02, {1, {0x10a2 }}},
- { 0x2d03, {1, {0x10a3 }}},
- { 0x2d04, {1, {0x10a4 }}},
- { 0x2d05, {1, {0x10a5 }}},
- { 0x2d06, {1, {0x10a6 }}},
- { 0x2d07, {1, {0x10a7 }}},
- { 0x2d08, {1, {0x10a8 }}},
- { 0x2d09, {1, {0x10a9 }}},
- { 0x2d0a, {1, {0x10aa }}},
- { 0x2d0b, {1, {0x10ab }}},
- { 0x2d0c, {1, {0x10ac }}},
- { 0x2d0d, {1, {0x10ad }}},
- { 0x2d0e, {1, {0x10ae }}},
- { 0x2d0f, {1, {0x10af }}},
- { 0x2d10, {1, {0x10b0 }}},
- { 0x2d11, {1, {0x10b1 }}},
- { 0x2d12, {1, {0x10b2 }}},
- { 0x2d13, {1, {0x10b3 }}},
- { 0x2d14, {1, {0x10b4 }}},
- { 0x2d15, {1, {0x10b5 }}},
- { 0x2d16, {1, {0x10b6 }}},
- { 0x2d17, {1, {0x10b7 }}},
- { 0x2d18, {1, {0x10b8 }}},
- { 0x2d19, {1, {0x10b9 }}},
- { 0x2d1a, {1, {0x10ba }}},
- { 0x2d1b, {1, {0x10bb }}},
- { 0x2d1c, {1, {0x10bc }}},
- { 0x2d1d, {1, {0x10bd }}},
- { 0x2d1e, {1, {0x10be }}},
- { 0x2d1f, {1, {0x10bf }}},
- { 0x2d20, {1, {0x10c0 }}},
- { 0x2d21, {1, {0x10c1 }}},
- { 0x2d22, {1, {0x10c2 }}},
- { 0x2d23, {1, {0x10c3 }}},
- { 0x2d24, {1, {0x10c4 }}},
- { 0x2d25, {1, {0x10c5 }}},
- { 0xff41, {1, {0xff21 }}},
- { 0xff42, {1, {0xff22 }}},
- { 0xff43, {1, {0xff23 }}},
- { 0xff44, {1, {0xff24 }}},
- { 0xff45, {1, {0xff25 }}},
- { 0xff46, {1, {0xff26 }}},
- { 0xff47, {1, {0xff27 }}},
- { 0xff48, {1, {0xff28 }}},
- { 0xff49, {1, {0xff29 }}},
- { 0xff4a, {1, {0xff2a }}},
- { 0xff4b, {1, {0xff2b }}},
- { 0xff4c, {1, {0xff2c }}},
- { 0xff4d, {1, {0xff2d }}},
- { 0xff4e, {1, {0xff2e }}},
- { 0xff4f, {1, {0xff2f }}},
- { 0xff50, {1, {0xff30 }}},
- { 0xff51, {1, {0xff31 }}},
- { 0xff52, {1, {0xff32 }}},
- { 0xff53, {1, {0xff33 }}},
- { 0xff54, {1, {0xff34 }}},
- { 0xff55, {1, {0xff35 }}},
- { 0xff56, {1, {0xff36 }}},
- { 0xff57, {1, {0xff37 }}},
- { 0xff58, {1, {0xff38 }}},
- { 0xff59, {1, {0xff39 }}},
- { 0xff5a, {1, {0xff3a }}},
- { 0x10428, {1, {0x10400 }}},
- { 0x10429, {1, {0x10401 }}},
- { 0x1042a, {1, {0x10402 }}},
- { 0x1042b, {1, {0x10403 }}},
- { 0x1042c, {1, {0x10404 }}},
- { 0x1042d, {1, {0x10405 }}},
- { 0x1042e, {1, {0x10406 }}},
- { 0x1042f, {1, {0x10407 }}},
- { 0x10430, {1, {0x10408 }}},
- { 0x10431, {1, {0x10409 }}},
- { 0x10432, {1, {0x1040a }}},
- { 0x10433, {1, {0x1040b }}},
- { 0x10434, {1, {0x1040c }}},
- { 0x10435, {1, {0x1040d }}},
- { 0x10436, {1, {0x1040e }}},
- { 0x10437, {1, {0x1040f }}},
- { 0x10438, {1, {0x10410 }}},
- { 0x10439, {1, {0x10411 }}},
- { 0x1043a, {1, {0x10412 }}},
- { 0x1043b, {1, {0x10413 }}},
- { 0x1043c, {1, {0x10414 }}},
- { 0x1043d, {1, {0x10415 }}},
- { 0x1043e, {1, {0x10416 }}},
- { 0x1043f, {1, {0x10417 }}},
- { 0x10440, {1, {0x10418 }}},
- { 0x10441, {1, {0x10419 }}},
- { 0x10442, {1, {0x1041a }}},
- { 0x10443, {1, {0x1041b }}},
- { 0x10444, {1, {0x1041c }}},
- { 0x10445, {1, {0x1041d }}},
- { 0x10446, {1, {0x1041e }}},
- { 0x10447, {1, {0x1041f }}},
- { 0x10448, {1, {0x10420 }}},
- { 0x10449, {1, {0x10421 }}},
- { 0x1044a, {1, {0x10422 }}},
- { 0x1044b, {1, {0x10423 }}},
- { 0x1044c, {1, {0x10424 }}},
- { 0x1044d, {1, {0x10425 }}},
- { 0x1044e, {1, {0x10426 }}},
- { 0x1044f, {1, {0x10427 }}}
-};
-
-static const CaseUnfold_11_Type CaseUnfold_11_Locale[] = {
- { 0x0069, {1, {0x0049 }}}
-};
-
-static const CaseUnfold_12_Type CaseUnfold_12[] = {
- { {0x0061, 0x02be}, {1, {0x1e9a }}},
- { {0x0066, 0x0066}, {1, {0xfb00 }}},
- { {0x0066, 0x0069}, {1, {0xfb01 }}},
- { {0x0066, 0x006c}, {1, {0xfb02 }}},
- { {0x0068, 0x0331}, {1, {0x1e96 }}},
- { {0x006a, 0x030c}, {1, {0x01f0 }}},
- { {0x0073, 0x0073}, {1, {0x00df }}},
- { {0x0073, 0x0074}, {2, {0xfb05, 0xfb06 }}},
- { {0x0074, 0x0308}, {1, {0x1e97 }}},
- { {0x0077, 0x030a}, {1, {0x1e98 }}},
- { {0x0079, 0x030a}, {1, {0x1e99 }}},
- { {0x02bc, 0x006e}, {1, {0x0149 }}},
- { {0x03ac, 0x03b9}, {1, {0x1fb4 }}},
- { {0x03ae, 0x03b9}, {1, {0x1fc4 }}},
- { {0x03b1, 0x0342}, {1, {0x1fb6 }}},
- { {0x03b1, 0x03b9}, {2, {0x1fb3, 0x1fbc }}},
- { {0x03b7, 0x0342}, {1, {0x1fc6 }}},
- { {0x03b7, 0x03b9}, {2, {0x1fc3, 0x1fcc }}},
- { {0x03b9, 0x0342}, {1, {0x1fd6 }}},
- { {0x03c1, 0x0313}, {1, {0x1fe4 }}},
- { {0x03c5, 0x0313}, {1, {0x1f50 }}},
- { {0x03c5, 0x0342}, {1, {0x1fe6 }}},
- { {0x03c9, 0x0342}, {1, {0x1ff6 }}},
- { {0x03c9, 0x03b9}, {2, {0x1ff3, 0x1ffc }}},
- { {0x03ce, 0x03b9}, {1, {0x1ff4 }}},
- { {0x0565, 0x0582}, {1, {0x0587 }}},
- { {0x0574, 0x0565}, {1, {0xfb14 }}},
- { {0x0574, 0x056b}, {1, {0xfb15 }}},
- { {0x0574, 0x056d}, {1, {0xfb17 }}},
- { {0x0574, 0x0576}, {1, {0xfb13 }}},
- { {0x057e, 0x0576}, {1, {0xfb16 }}},
- { {0x1f00, 0x03b9}, {2, {0x1f88, 0x1f80 }}},
- { {0x1f01, 0x03b9}, {2, {0x1f81, 0x1f89 }}},
- { {0x1f02, 0x03b9}, {2, {0x1f82, 0x1f8a }}},
- { {0x1f03, 0x03b9}, {2, {0x1f83, 0x1f8b }}},
- { {0x1f04, 0x03b9}, {2, {0x1f84, 0x1f8c }}},
- { {0x1f05, 0x03b9}, {2, {0x1f85, 0x1f8d }}},
- { {0x1f06, 0x03b9}, {2, {0x1f86, 0x1f8e }}},
- { {0x1f07, 0x03b9}, {2, {0x1f87, 0x1f8f }}},
- { {0x1f20, 0x03b9}, {2, {0x1f90, 0x1f98 }}},
- { {0x1f21, 0x03b9}, {2, {0x1f91, 0x1f99 }}},
- { {0x1f22, 0x03b9}, {2, {0x1f92, 0x1f9a }}},
- { {0x1f23, 0x03b9}, {2, {0x1f93, 0x1f9b }}},
- { {0x1f24, 0x03b9}, {2, {0x1f94, 0x1f9c }}},
- { {0x1f25, 0x03b9}, {2, {0x1f95, 0x1f9d }}},
- { {0x1f26, 0x03b9}, {2, {0x1f96, 0x1f9e }}},
- { {0x1f27, 0x03b9}, {2, {0x1f97, 0x1f9f }}},
- { {0x1f60, 0x03b9}, {2, {0x1fa0, 0x1fa8 }}},
- { {0x1f61, 0x03b9}, {2, {0x1fa1, 0x1fa9 }}},
- { {0x1f62, 0x03b9}, {2, {0x1fa2, 0x1faa }}},
- { {0x1f63, 0x03b9}, {2, {0x1fa3, 0x1fab }}},
- { {0x1f64, 0x03b9}, {2, {0x1fa4, 0x1fac }}},
- { {0x1f65, 0x03b9}, {2, {0x1fa5, 0x1fad }}},
- { {0x1f66, 0x03b9}, {2, {0x1fa6, 0x1fae }}},
- { {0x1f67, 0x03b9}, {2, {0x1fa7, 0x1faf }}},
- { {0x1f70, 0x03b9}, {1, {0x1fb2 }}},
- { {0x1f74, 0x03b9}, {1, {0x1fc2 }}},
- { {0x1f7c, 0x03b9}, {1, {0x1ff2 }}}
-};
-
-static const CaseUnfold_12_Type CaseUnfold_12_Locale[] = {
- { {0x0069, 0x0307}, {1, {0x0130 }}}
-};
-
-static const CaseUnfold_13_Type CaseUnfold_13[] = {
- { {0x0066, 0x0066, 0x0069}, {1, {0xfb03 }}},
- { {0x0066, 0x0066, 0x006c}, {1, {0xfb04 }}},
- { {0x03b1, 0x0342, 0x03b9}, {1, {0x1fb7 }}},
- { {0x03b7, 0x0342, 0x03b9}, {1, {0x1fc7 }}},
- { {0x03b9, 0x0308, 0x0300}, {1, {0x1fd2 }}},
- { {0x03b9, 0x0308, 0x0301}, {2, {0x0390, 0x1fd3 }}},
- { {0x03b9, 0x0308, 0x0342}, {1, {0x1fd7 }}},
- { {0x03c5, 0x0308, 0x0300}, {1, {0x1fe2 }}},
- { {0x03c5, 0x0308, 0x0301}, {2, {0x03b0, 0x1fe3 }}},
- { {0x03c5, 0x0308, 0x0342}, {1, {0x1fe7 }}},
- { {0x03c5, 0x0313, 0x0300}, {1, {0x1f52 }}},
- { {0x03c5, 0x0313, 0x0301}, {1, {0x1f54 }}},
- { {0x03c5, 0x0313, 0x0342}, {1, {0x1f56 }}},
- { {0x03c9, 0x0342, 0x03b9}, {1, {0x1ff7 }}}
-};
-
-#define numberof(array) (int)(sizeof(array) / sizeof((array)[0]))
-#define CODE_RANGES_NUM numberof(CodeRanges)
-
-extern int
-onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc ARG_UNUSED)
-{
- if (
-#ifdef USE_UNICODE_PROPERTIES
- ctype <= ONIGENC_MAX_STD_CTYPE &&
-#endif
- code < 256) {
- return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype);
- }
-
- if (ctype >= CODE_RANGES_NUM) {
- return ONIGERR_TYPE_BUG;
- }
-
- return onig_is_in_code_range((UChar* )CodeRanges[ctype], code);
-}
-
-
-extern int
-onigenc_unicode_ctype_code_range(int ctype, const OnigCodePoint* ranges[])
-{
- if (ctype >= CODE_RANGES_NUM) {
- return ONIGERR_TYPE_BUG;
- }
-
- *ranges = CodeRanges[ctype];
-
- return 0;
-}
-
-extern int
-onigenc_utf16_32_get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out,
- const OnigCodePoint* ranges[],
- struct OnigEncodingTypeST* enc ARG_UNUSED)
-{
- *sb_out = 0x00;
- return onigenc_unicode_ctype_code_range(ctype, ranges);
-}
-
-#include "st.h"
-
-#define PROPERTY_NAME_MAX_SIZE MAX_WORD_LENGTH
-
-extern int
-onigenc_unicode_property_name_to_ctype(OnigEncoding enc, UChar* name, UChar* end)
-{
- int len;
- int ctype;
- UChar buf[PROPERTY_NAME_MAX_SIZE];
- UChar *p;
- OnigCodePoint code;
-
- p = name;
- len = 0;
- for (p = name; p < end; p += enclen(enc, p, end)) {
- code = ONIGENC_MBC_TO_CODE(enc, p, end);
- if (code == ' ' || code == '-' || code == '_')
- continue;
- if (code >= 0x80)
- return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
-
- buf[len++] = (UChar )TOLOWER((unsigned char)code);
- if (len >= PROPERTY_NAME_MAX_SIZE)
- return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
- }
-
- buf[len] = 0;
-
- if ((ctype = uniname2ctype(buf, len)) < 0) {
- return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
- }
-
- return ctype;
-}
-
-
-static int
-code2_cmp(OnigCodePoint* x, OnigCodePoint* y)
-{
- if (x[0] == y[0] && x[1] == y[1]) return 0;
- return 1;
-}
-
-static st_index_t
-code2_hash(OnigCodePoint* x)
-{
- return (st_index_t )(x[0] + x[1]);
-}
-
-static const struct st_hash_type type_code2_hash = {
- code2_cmp,
- code2_hash,
-};
-
-static int
-code3_cmp(OnigCodePoint* x, OnigCodePoint* y)
-{
- if (x[0] == y[0] && x[1] == y[1] && x[2] == y[2]) return 0;
- return 1;
-}
-
-static st_index_t
-code3_hash(OnigCodePoint* x)
-{
- return (st_index_t )(x[0] + x[1] + x[2]);
-}
-
-static const struct st_hash_type type_code3_hash = {
- code3_cmp,
- code3_hash,
-};
-
-
-static st_table* FoldTable; /* fold-1, fold-2, fold-3 */
-static st_table* Unfold1Table;
-static st_table* Unfold2Table;
-static st_table* Unfold3Table;
-static int CaseFoldInited = 0;
-
-static int init_case_fold_table(void)
-{
- const CaseFold_11_Type *p;
- const CaseUnfold_11_Type *p1;
- const CaseUnfold_12_Type *p2;
- const CaseUnfold_13_Type *p3;
- int i;
-
- THREAD_ATOMIC_START;
-
- FoldTable = st_init_numtable_with_size(1200);
- if (ONIG_IS_NULL(FoldTable)) return ONIGERR_MEMORY;
- for (i = 0; i < numberof(CaseFold); i++) {
- p = &CaseFold[i];
- st_add_direct(FoldTable, (st_data_t )p->from, (st_data_t )&(p->to));
- }
- for (i = 0; i < numberof(CaseFold_Locale); i++) {
- p = &CaseFold_Locale[i];
- st_add_direct(FoldTable, (st_data_t )p->from, (st_data_t )&(p->to));
- }
-
- Unfold1Table = st_init_numtable_with_size(1000);
- if (ONIG_IS_NULL(Unfold1Table)) return ONIGERR_MEMORY;
-
- for (i = 0; i < numberof(CaseUnfold_11); i++) {
- p1 = &CaseUnfold_11[i];
- st_add_direct(Unfold1Table, (st_data_t )p1->from, (st_data_t )&(p1->to));
- }
- for (i = 0; i < numberof(CaseUnfold_11_Locale); i++) {
- p1 = &CaseUnfold_11_Locale[i];
- st_add_direct(Unfold1Table, (st_data_t )p1->from, (st_data_t )&(p1->to));
- }
-
- Unfold2Table = st_init_table_with_size(&type_code2_hash, 200);
- if (ONIG_IS_NULL(Unfold2Table)) return ONIGERR_MEMORY;
-
- for (i = 0; i < numberof(CaseUnfold_12); i++) {
- p2 = &CaseUnfold_12[i];
- st_add_direct(Unfold2Table, (st_data_t )p2->from, (st_data_t )(&p2->to));
- }
- for (i = 0; i < numberof(CaseUnfold_12_Locale); i++) {
- p2 = &CaseUnfold_12_Locale[i];
- st_add_direct(Unfold2Table, (st_data_t )p2->from, (st_data_t )(&p2->to));
- }
-
- Unfold3Table = st_init_table_with_size(&type_code3_hash, 30);
- if (ONIG_IS_NULL(Unfold3Table)) return ONIGERR_MEMORY;
-
- for (i = 0; i < numberof(CaseUnfold_13); i++) {
- p3 = &CaseUnfold_13[i];
- st_add_direct(Unfold3Table, (st_data_t )p3->from, (st_data_t )(&p3->to));
- }
-
- CaseFoldInited = 1;
- THREAD_ATOMIC_END;
- return 0;
-}
-
-extern int
-onigenc_unicode_mbc_case_fold(OnigEncoding enc,
- OnigCaseFoldType flag ARG_UNUSED, const UChar** pp, const UChar* end,
- UChar* fold)
-{
- CodePointList3 *to;
- OnigCodePoint code;
- int i, len, rlen;
- const UChar *p = *pp;
-
- if (CaseFoldInited == 0) init_case_fold_table();
-
- code = ONIGENC_MBC_TO_CODE(enc, p, end);
- len = enclen(enc, p, end);
- *pp += len;
-
-#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
- if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
- if (code == 0x0049) {
- return ONIGENC_CODE_TO_MBC(enc, 0x0131, fold);
- }
- else if (code == 0x0130) {
- return ONIGENC_CODE_TO_MBC(enc, 0x0069, fold);
- }
- }
-#endif
-
- if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0) {
- if (to->n == 1) {
- return ONIGENC_CODE_TO_MBC(enc, to->code[0], fold);
- }
- else
- {
- rlen = 0;
- for (i = 0; i < to->n; i++) {
- len = ONIGENC_CODE_TO_MBC(enc, to->code[i], fold);
- fold += len;
- rlen += len;
- }
- return rlen;
- }
- }
-
- for (i = 0; i < len; i++) {
- *fold++ = *p++;
- }
- return len;
-}
-
-extern int
-onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag,
- OnigApplyAllCaseFoldFunc f, void* arg,
- OnigEncoding enc ARG_UNUSED)
-{
- const CaseUnfold_11_Type* p11;
- OnigCodePoint code;
- int i, j, k, r;
-
- /* if (CaseFoldInited == 0) init_case_fold_table(); */
-
- for (i = 0; i < numberof(CaseUnfold_11); i++) {
- p11 = &CaseUnfold_11[i];
- for (j = 0; j < p11->to.n; j++) {
- code = p11->from;
- r = (*f)(p11->to.code[j], &code, 1, arg);
- if (r != 0) return r;
-
- code = p11->to.code[j];
- r = (*f)(p11->from, &code, 1, arg);
- if (r != 0) return r;
-
- for (k = 0; k < j; k++) {
- r = (*f)(p11->to.code[j], (OnigCodePoint* )(&p11->to.code[k]), 1, arg);
- if (r != 0) return r;
-
- r = (*f)(p11->to.code[k], (OnigCodePoint* )(&p11->to.code[j]), 1, arg);
- if (r != 0) return r;
- }
- }
- }
-
-#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
- if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
- code = 0x0131;
- r = (*f)(0x0049, &code, 1, arg);
- if (r != 0) return r;
- code = 0x0049;
- r = (*f)(0x0131, &code, 1, arg);
- if (r != 0) return r;
-
- code = 0x0130;
- r = (*f)(0x0069, &code, 1, arg);
- if (r != 0) return r;
- code = 0x0069;
- r = (*f)(0x0130, &code, 1, arg);
- if (r != 0) return r;
- }
- else {
-#endif
- for (i = 0; i < numberof(CaseUnfold_11_Locale); i++) {
- p11 = &CaseUnfold_11_Locale[i];
- for (j = 0; j < p11->to.n; j++) {
- code = p11->from;
- r = (*f)(p11->to.code[j], &code, 1, arg);
- if (r != 0) return r;
-
- code = p11->to.code[j];
- r = (*f)(p11->from, &code, 1, arg);
- if (r != 0) return r;
-
- for (k = 0; k < j; k++) {
- r = (*f)(p11->to.code[j], (OnigCodePoint* )(&p11->to.code[k]),
- 1, arg);
- if (r != 0) return r;
-
- r = (*f)(p11->to.code[k], (OnigCodePoint* )(&p11->to.code[j]),
- 1, arg);
- if (r != 0) return r;
- }
- }
- }
-#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
- }
-#endif
-
- if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
- for (i = 0; i < numberof(CaseUnfold_12); i++) {
- for (j = 0; j < CaseUnfold_12[i].to.n; j++) {
- r = (*f)(CaseUnfold_12[i].to.code[j],
- (OnigCodePoint* )CaseUnfold_12[i].from, 2, arg);
- if (r != 0) return r;
-
- for (k = 0; k < CaseUnfold_12[i].to.n; k++) {
- if (k == j) continue;
-
- r = (*f)(CaseUnfold_12[i].to.code[j],
- (OnigCodePoint* )(&CaseUnfold_12[i].to.code[k]), 1, arg);
- if (r != 0) return r;
- }
- }
- }
-
-#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
- if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) == 0) {
-#endif
- for (i = 0; i < numberof(CaseUnfold_12_Locale); i++) {
- for (j = 0; j < CaseUnfold_12_Locale[i].to.n; j++) {
- r = (*f)(CaseUnfold_12_Locale[i].to.code[j],
- (OnigCodePoint* )CaseUnfold_12_Locale[i].from, 2, arg);
- if (r != 0) return r;
-
- for (k = 0; k < CaseUnfold_12_Locale[i].to.n; k++) {
- if (k == j) continue;
-
- r = (*f)(CaseUnfold_12_Locale[i].to.code[j],
- (OnigCodePoint* )(&CaseUnfold_12_Locale[i].to.code[k]),
- 1, arg);
- if (r != 0) return r;
- }
- }
- }
-#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
- }
-#endif
-
- for (i = 0; i < numberof(CaseUnfold_13); i++) {
- for (j = 0; j < CaseUnfold_13[i].to.n; j++) {
- r = (*f)(CaseUnfold_13[i].to.code[j],
- (OnigCodePoint* )CaseUnfold_13[i].from, 3, arg);
- if (r != 0) return r;
-
- for (k = 0; k < CaseUnfold_13[i].to.n; k++) {
- if (k == j) continue;
-
- r = (*f)(CaseUnfold_13[i].to.code[j],
- (OnigCodePoint* )(&CaseUnfold_13[i].to.code[k]), 1, arg);
- if (r != 0) return r;
- }
- }
- }
- }
-
- return 0;
-}
-
-extern int
-onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
- OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end,
- OnigCaseFoldCodeItem items[])
-{
- int n, i, j, k, len;
- OnigCodePoint code, codes[3];
- CodePointList3 *to, *z3;
- CodePointList2 *z2;
-
- if (CaseFoldInited == 0) init_case_fold_table();
-
- n = 0;
-
- code = ONIGENC_MBC_TO_CODE(enc, p, end);
- len = enclen(enc, p, end);
-
-#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
- if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
- if (code == 0x0049) {
- items[0].byte_len = len;
- items[0].code_len = 1;
- items[0].code[0] = 0x0131;
- return 1;
- }
- else if (code == 0x0130) {
- items[0].byte_len = len;
- items[0].code_len = 1;
- items[0].code[0] = 0x0069;
- return 1;
- }
- else if (code == 0x0131) {
- items[0].byte_len = len;
- items[0].code_len = 1;
- items[0].code[0] = 0x0049;
- return 1;
- }
- else if (code == 0x0069) {
- items[0].byte_len = len;
- items[0].code_len = 1;
- items[0].code[0] = 0x0130;
- return 1;
- }
- }
-#endif
-
- if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0) {
- if (to->n == 1) {
- OnigCodePoint orig_code = code;
-
- items[0].byte_len = len;
- items[0].code_len = 1;
- items[0].code[0] = to->code[0];
- n++;
-
- code = to->code[0];
- if (onig_st_lookup(Unfold1Table, (st_data_t )code, (void* )&to) != 0) {
- for (i = 0; i < to->n; i++) {
- if (to->code[i] != orig_code) {
- items[n].byte_len = len;
- items[n].code_len = 1;
- items[n].code[0] = to->code[i];
- n++;
- }
- }
- }
- }
- else if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
- OnigCodePoint cs[3][4];
- int fn, ncs[3];
-
- for (fn = 0; fn < to->n; fn++) {
- cs[fn][0] = to->code[fn];
- if (onig_st_lookup(Unfold1Table, (st_data_t )cs[fn][0],
- (void* )&z3) != 0) {
- for (i = 0; i < z3->n; i++) {
- cs[fn][i+1] = z3->code[i];
- }
- ncs[fn] = z3->n + 1;
- }
- else
- ncs[fn] = 1;
- }
-
- if (fn == 2) {
- for (i = 0; i < ncs[0]; i++) {
- for (j = 0; j < ncs[1]; j++) {
- items[n].byte_len = len;
- items[n].code_len = 2;
- items[n].code[0] = cs[0][i];
- items[n].code[1] = cs[1][j];
- n++;
- }
- }
-
- if (onig_st_lookup(Unfold2Table, (st_data_t )to->code,
- (void* )&z2) != 0) {
- for (i = 0; i < z2->n; i++) {
- if (z2->code[i] == code) continue;
-
- items[n].byte_len = len;
- items[n].code_len = 1;
- items[n].code[0] = z2->code[i];
- n++;
- }
- }
- }
- else {
- for (i = 0; i < ncs[0]; i++) {
- for (j = 0; j < ncs[1]; j++) {
- for (k = 0; k < ncs[2]; k++) {
- items[n].byte_len = len;
- items[n].code_len = 3;
- items[n].code[0] = cs[0][i];
- items[n].code[1] = cs[1][j];
- items[n].code[2] = cs[2][k];
- n++;
- }
- }
- }
-
- if (onig_st_lookup(Unfold3Table, (st_data_t )to->code,
- (void* )&z2) != 0) {
- for (i = 0; i < z2->n; i++) {
- if (z2->code[i] == code) continue;
-
- items[n].byte_len = len;
- items[n].code_len = 1;
- items[n].code[0] = z2->code[i];
- n++;
- }
- }
- }
-
- /* multi char folded code is not head of another folded multi char */
- flag = 0; /* DISABLE_CASE_FOLD_MULTI_CHAR(flag); */
- }
- }
- else {
- if (onig_st_lookup(Unfold1Table, (st_data_t )code, (void* )&to) != 0) {
- for (i = 0; i < to->n; i++) {
- items[n].byte_len = len;
- items[n].code_len = 1;
- items[n].code[0] = to->code[i];
- n++;
- }
- }
- }
-
-
- if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
- p += len;
- if (p < end) {
- int clen;
-
- codes[0] = code;
- code = ONIGENC_MBC_TO_CODE(enc, p, end);
- if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0
- && to->n == 1) {
- codes[1] = to->code[0];
- }
- else
- codes[1] = code;
-
- clen = enclen(enc, p, end);
- len += clen;
- if (onig_st_lookup(Unfold2Table, (st_data_t )codes, (void* )&z2) != 0) {
- for (i = 0; i < z2->n; i++) {
- items[n].byte_len = len;
- items[n].code_len = 1;
- items[n].code[0] = z2->code[i];
- n++;
- }
- }
-
- p += clen;
- if (p < end) {
- code = ONIGENC_MBC_TO_CODE(enc, p, end);
- if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0
- && to->n == 1) {
- codes[2] = to->code[0];
- }
- else
- codes[2] = code;
-
- clen = enclen(enc, p, end);
- len += clen;
- if (onig_st_lookup(Unfold3Table, (st_data_t )codes,
- (void* )&z2) != 0) {
- for (i = 0; i < z2->n; i++) {
- items[n].byte_len = len;
- items[n].code_len = 1;
- items[n].code[0] = z2->code[i];
- n++;
- }
- }
- }
- }
- }
-
- return n;
-}
-#endif //INCLUDE_ENCODING
diff --git a/src/us_ascii.c b/src/us_ascii.c
deleted file mode 100644
index b6e3f50cf..000000000
--- a/src/us_ascii.c
+++ /dev/null
@@ -1,34 +0,0 @@
-#include "mruby.h"
-#ifdef INCLUDE_ENCODING
-#include "regenc.h"
-
-static int
-us_ascii_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc)
-{
- if (*p & 0x80)
- return ONIGENC_CONSTRUCT_MBCLEN_INVALID();
- return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(1);
-}
-
-OnigEncodingDefine(us_ascii, US_ASCII) = {
- us_ascii_mbc_enc_len,
- "US-ASCII",/* name */
- 1, /* max byte length */
- 1, /* min byte length */
- onigenc_is_mbc_newline_0x0a,
- onigenc_single_byte_mbc_to_code,
- onigenc_single_byte_code_to_mbclen,
- onigenc_single_byte_code_to_mbc,
- onigenc_ascii_mbc_case_fold,
- onigenc_ascii_apply_all_case_fold,
- onigenc_ascii_get_case_fold_codes_by_str,
- onigenc_minimum_property_name_to_ctype,
- onigenc_ascii_is_code_ctype,
- onigenc_not_support_get_ctype_code_range,
- onigenc_single_byte_left_adjust_char_head,
- onigenc_always_true_is_allowed_reverse_match
-};
-ENC_ALIAS("ASCII", "US-ASCII")
-ENC_ALIAS("ANSI_X3.4-1968", "US-ASCII")
-ENC_ALIAS("646", "US-ASCII")
-#endif //INCLUDE_ENCODING
diff --git a/src/utf_8.c b/src/utf_8.c
deleted file mode 100644
index c444a2053..000000000
--- a/src/utf_8.c
+++ /dev/null
@@ -1,460 +0,0 @@
-/**********************************************************************
- utf_8.c - Oniguruma (regular expression library)
-**********************************************************************/
-/*-
- * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "mruby.h"
-#ifdef INCLUDE_ENCODING
-#include "regenc.h"
-
-#define USE_INVALID_CODE_SCHEME
-
-#ifdef USE_INVALID_CODE_SCHEME
-/* virtual codepoint values for invalid encoding byte 0xfe and 0xff */
-#define INVALID_CODE_FE 0xfffffffe
-#define INVALID_CODE_FF 0xffffffff
-#define VALID_CODE_LIMIT 0x7fffffff
-#endif
-
-#define utf8_islead(c) ((UChar )((c) & 0xc0) != 0x80)
-
-static const int EncLen_UTF8[] = {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
-};
-
-typedef enum {
- FAILURE = -2,
- ACCEPT,
- S0, S1, S2, S3,
- S4, S5, S6, S7
-} state_t;
-#define A ACCEPT
-#define F FAILURE
-static const signed char trans[][0x100] = {
- { /* S0 0 1 2 3 4 5 6 7 8 9 a b c d e f */
- /* 0 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
- /* 1 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
- /* 2 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
- /* 3 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
- /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
- /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
- /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
- /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
- /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* a */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* b */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* c */ F, F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- /* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- /* e */ 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3,
- /* f */ 5, 6, 6, 6, 7, F, F, F, F, F, F, F, F, F, F, F
- },
- { /* S1 0 1 2 3 4 5 6 7 8 9 a b c d e f */
- /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 8 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
- /* 9 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
- /* a */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
- /* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
- /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F
- },
- { /* S2 0 1 2 3 4 5 6 7 8 9 a b c d e f */
- /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* a */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F
- },
- { /* S3 0 1 2 3 4 5 6 7 8 9 a b c d e f */
- /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 8 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- /* 9 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- /* a */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F
- },
- { /* S4 0 1 2 3 4 5 6 7 8 9 a b c d e f */
- /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 8 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- /* 9 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- /* a */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* b */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F
- },
- { /* S5 0 1 2 3 4 5 6 7 8 9 a b c d e f */
- /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 9 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- /* a */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- /* b */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F
- },
- { /* S6 0 1 2 3 4 5 6 7 8 9 a b c d e f */
- /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 8 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- /* 9 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- /* a */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- /* b */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F
- },
- { /* S7 0 1 2 3 4 5 6 7 8 9 a b c d e f */
- /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* 8 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* a */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* b */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
- /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F
- },
-};
-#undef A
-#undef F
-
-static int
-mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED)
-{
- int firstbyte = *p++;
- state_t s;
- s = trans[0][firstbyte];
- if (s < 0) return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(1) :
- ONIGENC_CONSTRUCT_MBCLEN_INVALID();
-
- if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_UTF8[firstbyte]-1);
- s = trans[s][*p++];
- if (s < 0) return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(2) :
- ONIGENC_CONSTRUCT_MBCLEN_INVALID();
-
- if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_UTF8[firstbyte]-2);
- s = trans[s][*p++];
- if (s < 0) return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(3) :
- ONIGENC_CONSTRUCT_MBCLEN_INVALID();
-
- if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_UTF8[firstbyte]-3);
- s = trans[s][*p++];
- return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(4) :
- ONIGENC_CONSTRUCT_MBCLEN_INVALID();
-}
-
-static int
-is_mbc_newline(const UChar* p, const UChar* end, OnigEncoding enc)
-{
- if (p < end) {
- if (*p == 0x0a) return 1;
-
-#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
-#ifndef USE_CRNL_AS_LINE_TERMINATOR
- if (*p == 0x0d) return 1;
-#endif
- if (p + 1 < end) {
- if (*(p+1) == 0x85 && *p == 0xc2) /* U+0085 */
- return 1;
- if (p + 2 < end) {
- if ((*(p+2) == 0xa8 || *(p+2) == 0xa9)
- && *(p+1) == 0x80 && *p == 0xe2) /* U+2028, U+2029 */
- return 1;
- }
- }
-#endif
- }
-
- return 0;
-}
-
-static OnigCodePoint
-mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc)
-{
- int c, len;
- OnigCodePoint n;
-
- len = enclen(enc, p, end);
- c = *p++;
- if (len > 1) {
- len--;
- n = c & ((1 << (6 - len)) - 1);
- while (len--) {
- c = *p++;
- n = (n << 6) | (c & ((1 << 6) - 1));
- }
- return n;
- }
- else {
-#ifdef USE_INVALID_CODE_SCHEME
- if (c > 0xfd) {
- return ((c == 0xfe) ? INVALID_CODE_FE : INVALID_CODE_FF);
- }
-#endif
- return (OnigCodePoint )c;
- }
-}
-
-static int
-code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
-{
- if ((code & 0xffffff80) == 0) return 1;
- else if ((code & 0xfffff800) == 0) return 2;
- else if ((code & 0xffff0000) == 0) return 3;
- else if ((code & 0xffe00000) == 0) return 4;
- else if ((code & 0xfc000000) == 0) return 5;
- else if ((code & 0x80000000) == 0) return 6;
-#ifdef USE_INVALID_CODE_SCHEME
- else if (code == INVALID_CODE_FE) return 1;
- else if (code == INVALID_CODE_FF) return 1;
-#endif
- else
- return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
-}
-
-static int
-code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED)
-{
-#define UTF8_TRAILS(code, shift) (UChar )((((code) >> (shift)) & 0x3f) | 0x80)
-#define UTF8_TRAIL0(code) (UChar )(((code) & 0x3f) | 0x80)
-
- if ((code & 0xffffff80) == 0) {
- *buf = (UChar )code;
- return 1;
- }
- else {
- UChar *p = buf;
-
- if ((code & 0xfffff800) == 0) {
- *p++ = (UChar )(((code>>6)& 0x1f) | 0xc0);
- }
- else if ((code & 0xffff0000) == 0) {
- *p++ = (UChar )(((code>>12) & 0x0f) | 0xe0);
- *p++ = UTF8_TRAILS(code, 6);
- }
- else if ((code & 0xffe00000) == 0) {
- *p++ = (UChar )(((code>>18) & 0x07) | 0xf0);
- *p++ = UTF8_TRAILS(code, 12);
- *p++ = UTF8_TRAILS(code, 6);
- }
- else if ((code & 0xfc000000) == 0) {
- *p++ = (UChar )(((code>>24) & 0x03) | 0xf8);
- *p++ = UTF8_TRAILS(code, 18);
- *p++ = UTF8_TRAILS(code, 12);
- *p++ = UTF8_TRAILS(code, 6);
- }
- else if ((code & 0x80000000) == 0) {
- *p++ = (UChar )(((code>>30) & 0x01) | 0xfc);
- *p++ = UTF8_TRAILS(code, 24);
- *p++ = UTF8_TRAILS(code, 18);
- *p++ = UTF8_TRAILS(code, 12);
- *p++ = UTF8_TRAILS(code, 6);
- }
-#ifdef USE_INVALID_CODE_SCHEME
- else if (code == INVALID_CODE_FE) {
- *p = 0xfe;
- return 1;
- }
- else if (code == INVALID_CODE_FF) {
- *p = 0xff;
- return 1;
- }
-#endif
- else {
- return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
- }
-
- *p++ = UTF8_TRAIL0(code);
- return (int)(p - buf);
- }
-}
-
-static int
-mbc_case_fold(OnigCaseFoldType flag, const UChar** pp,
- const UChar* end, UChar* fold, OnigEncoding enc)
-{
- const UChar* p = *pp;
-
- if (ONIGENC_IS_MBC_ASCII(p)) {
-#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
- if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
- if (*p == 0x49) {
- *fold++ = 0xc4;
- *fold = 0xb1;
- (*pp)++;
- return 2;
- }
- }
-#endif
-
- *fold = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
- (*pp)++;
- return 1; /* return byte length of converted char to lower */
- }
- else {
- return onigenc_unicode_mbc_case_fold(enc, flag, pp, end, fold);
- }
-}
-
-
-static int
-get_ctype_code_range(OnigCtype ctype, OnigCodePoint *sb_out,
- const OnigCodePoint* ranges[], OnigEncoding enc ARG_UNUSED)
-{
- *sb_out = 0x80;
- return onigenc_unicode_ctype_code_range(ctype, ranges);
-}
-
-
-static UChar*
-left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end, OnigEncoding enc ARG_UNUSED)
-{
- const UChar *p;
-
- if (s <= start) return (UChar* )s;
- p = s;
-
- while (!utf8_islead(*p) && p > start) p--;
- return (UChar* )p;
-}
-
-static int
-get_case_fold_codes_by_str(OnigCaseFoldType flag,
- const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[],
- OnigEncoding enc)
-{
- return onigenc_unicode_get_case_fold_codes_by_str(enc, flag, p, end, items);
-}
-
-OnigEncodingDefine(utf_8, UTF_8) = {
- mbc_enc_len,
- "UTF-8", /* name */
- 6, /* max byte length */
- 1, /* min byte length */
- is_mbc_newline,
- mbc_to_code,
- code_to_mbclen,
- code_to_mbc,
- mbc_case_fold,
- onigenc_unicode_apply_all_case_fold,
- get_case_fold_codes_by_str,
- onigenc_unicode_property_name_to_ctype,
- onigenc_unicode_is_code_ctype,
- get_ctype_code_range,
- left_adjust_char_head,
- onigenc_always_true_is_allowed_reverse_match
-};
-ENC_ALIAS("CP65001", "UTF-8")
-
-/*
- * Name: UTF8-MAC
- * Link: http://developer.apple.com/documentation/MacOSX/Conceptual/BPFileSystem/BPFileSystem.html
- * Link: http://developer.apple.com/qa/qa2001/qa1235.html
- * Link: http://developer.apple.com/jp/qa/qa2001/qa1235.html
- * Link: http://www.gnu.org/software/emacs/NEWS.23.2
- */
-ENC_REPLICATE("UTF8-MAC", "UTF-8")
-ENC_ALIAS("UTF-8-MAC", "UTF8-MAC")
-ENC_ALIAS("UTF-8-HFS", "UTF8-MAC") /* Emacs 23.2 */
-
-#endif //INCLUDE_ENCODING