summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--.gitignore18
-rw-r--r--Makefile120
-rw-r--r--README.md4
-rw-r--r--Todo.txt14
-rw-r--r--bin/.gitkeep0
-rw-r--r--doc/.gitkeep0
-rw-r--r--ext/.gitkeep0
-rw-r--r--include/mrbconf.h42
-rw-r--r--include/mruby.h623
-rw-r--r--include/mruby/array.h42
-rw-r--r--include/mruby/class.h71
-rw-r--r--include/mruby/hash.h59
-rw-r--r--include/mruby/numeric.h17
-rw-r--r--include/mruby/object.h46
-rw-r--r--include/mruby/proc.h44
-rw-r--r--include/mruby/range.h21
-rw-r--r--include/mruby/string.h133
-rw-r--r--include/mruby/struct.h16
-rw-r--r--lib/.gitkeep0
-rw-r--r--mrblib/Makefile62
-rw-r--r--mrblib/array.rb79
-rw-r--r--mrblib/compar.rb63
-rw-r--r--mrblib/enum.rb266
-rw-r--r--mrblib/error.rb9
-rw-r--r--mrblib/hash.rb58
-rw-r--r--mrblib/init_mrblib.c17
-rw-r--r--mrblib/kernel.rb45
-rw-r--r--mrblib/numeric.rb42
-rw-r--r--mrblib/print.rb20
-rw-r--r--mrblib/range.rb30
-rw-r--r--mrblib/string.rb93
-rw-r--r--mrblib/struct.rb30
-rw-r--r--src/Makefile89
-rw-r--r--src/array.c1458
-rw-r--r--src/ascii.c96
-rw-r--r--src/cdump.c197
-rw-r--r--src/cdump.h8
-rw-r--r--src/class.c1049
-rw-r--r--src/codegen.c2273
-rw-r--r--src/compar.c144
-rw-r--r--src/compile.h73
-rw-r--r--src/crc.c28
-rw-r--r--src/dump.c697
-rw-r--r--src/dump.h118
-rw-r--r--src/encoding.c1663
-rw-r--r--src/encoding.h360
-rw-r--r--src/enum.c1077
-rw-r--r--src/error.c479
-rw-r--r--src/error.h21
-rw-r--r--src/etc.c280
-rw-r--r--src/eval_intern.h217
-rw-r--r--src/ext/.gitkeep0
-rw-r--r--src/gc.c1146
-rw-r--r--src/gc.h26
-rw-r--r--src/hash.c1436
-rw-r--r--src/init.c105
-rw-r--r--src/init_ext.c10
-rw-r--r--src/irep.h23
-rw-r--r--src/kernel.c1530
-rw-r--r--src/keywords50
-rw-r--r--src/lex.def216
-rw-r--r--src/load.c642
-rw-r--r--src/mdata.h53
-rw-r--r--src/method.h103
-rw-r--r--src/minimain.c117
-rw-r--r--src/name2ctype.h17985
-rw-r--r--src/node.h125
-rw-r--r--src/numeric.c2018
-rw-r--r--src/object.c632
-rw-r--r--src/oniguruma.h771
-rw-r--r--src/opcode.h148
-rw-r--r--src/parse.y5435
-rw-r--r--src/pool.c152
-rw-r--r--src/pool.h19
-rw-r--r--src/print.c69
-rw-r--r--src/proc.c92
-rw-r--r--src/range.c499
-rw-r--r--src/re.c3306
-rw-r--r--src/re.h85
-rw-r--r--src/regcomp.c6286
-rw-r--r--src/regenc.c909
-rw-r--r--src/regenc.h203
-rw-r--r--src/regerror.c375
-rw-r--r--src/regex.h32
-rw-r--r--src/regexec.c3757
-rw-r--r--src/regint.h833
-rw-r--r--src/regparse.c5600
-rw-r--r--src/regparse.h354
-rw-r--r--src/ritehash.h203
-rw-r--r--src/sprintf.c1112
-rw-r--r--src/st.c1283
-rw-r--r--src/st.h139
-rw-r--r--src/state.c88
-rw-r--r--src/string.c5234
-rw-r--r--src/struct.c824
-rw-r--r--src/symbol.c255
-rw-r--r--src/transcode.c4366
-rw-r--r--src/transcode_data.h109
-rw-r--r--src/unicode.c2607
-rw-r--r--src/us_ascii.c34
-rw-r--r--src/utf_8.c460
-rw-r--r--src/variable.c453
-rw-r--r--src/variable.h42
-rw-r--r--src/version.c87
-rw-r--r--src/version.h32
-rw-r--r--src/vm.c1544
-rw-r--r--src/vm_core.h414
-rw-r--r--tools/mrbc/Makefile73
-rw-r--r--tools/mrbc/mrbc.c210
-rw-r--r--tools/mruby/Makefile89
-rw-r--r--tools/mruby/mruby.c143
111 files changed, 87350 insertions, 4 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 000000000..76fdc3066
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,18 @@
+# /
+*.bak
+*.dylib
+*.inc
+*.o
+*.orig
+*.rej
+*.sav
+*.swp
+*.d
+*~
+.DS_Store
+.ccmalloc
+.svn
+/.git
+cscope.out
+mruby.exe
+y.tab.c
diff --git a/Makefile b/Makefile
new file mode 100644
index 000000000..fdf54c21d
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,120 @@
+# makefile discription.
+# basic build file for Rite-VM(mruby)
+# 11.Apr.2011 coded by Kenji Yoshimoto.
+# 17.Jan.2012 coded by Hiroshi Mimaki.
+
+# project-specific macros
+# extension of the executable-file is modifiable(.exe .out ...)
+TARGET := bin/mrubysample
+RITEVM := lib/ritevm
+MRUBY := tools/mruby/mruby
+ifeq ($(OS),Windows_NT)
+EXE := $(TARGET).exe
+LIB := $(RITEVM).lib
+MRB := $(MRUBY).exe
+else
+EXE := $(TARGET)
+LIB := $(RITEVM).a
+MRB := $(MRUBY)
+endif
+MSRC := src/minimain.c
+YSRC := src/parse.y
+YC := src/y.tab.c
+EXCEPT1 := $(YC) $(MSRC)
+OBJM := $(patsubst %.c,%.o,$(MSRC))
+OBJY := $(patsubst %.c,%.o,$(YC))
+OBJ1 := $(patsubst %.c,%.o,$(filter-out $(EXCEPT1),$(wildcard src/*.c)))
+#OBJ2 := $(patsubst %.c,%.o,$(wildcard ext/regex/*.c))
+#OBJ3 := $(patsubst %.c,%.o,$(wildcard ext/enc/*.c))
+OBJS := $(OBJ1) $(OBJ2) $(OBJ3)
+# mruby libraries
+EXTC := mrblib/mrblib.c
+EXTRB := $(wildcard mrblib/*.rb)
+EXT0 := $(patsubst %.c,%.o,src/$(EXTC))
+# ext libraries
+EXTS := $(EXT0)
+
+# libraries, includes
+LIBS = $(LIB) -lm
+INCLUDES = -I./src -I./include
+
+# library for iOS
+IOSLIB := $(RITEVM)-ios.a
+IOSSIMLIB := $(RITEVM)-iossim.a
+IOSDEVLIB := $(RITEVM)-iosdev.a
+IOSSIMCC := xcrun -sdk iphoneos llvm-gcc-4.2 -arch i386 -isysroot "/Developer/Platforms/iPhoneSimulator.platform/Developer/SDKs/iPhoneSimulator5.0.sdk/"
+IOSDEVCC := xcrun -sdk iphoneos llvm-gcc-4.2 -arch armv7 -isysroot "/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS5.0.sdk/"
+
+# compiler, linker (gcc)
+CC = gcc
+LL = gcc
+YACC = bison
+DEBUG_MODE = 1
+ifeq ($(DEBUG_MODE),1)
+CFLAGS = -g
+else
+CFLAGS = -O3
+endif
+ALL_CFLAGS = -Wall -Werror-implicit-function-declaration $(CFLAGS)
+MAKE_FLAGS = --no-print-directory CC="$(CC)" LL="$(LL)"
+
+##############################
+# generic build targets, rules
+
+.PHONY : all
+all : $(LIB) $(MRB) $(EXE)
+ @echo "make: built targets of `pwd`"
+
+##############################
+# make library for iOS
+.PHONY : ios
+ios : $(IOSLIB)
+
+$(IOSLIB) : $(IOSSIMLIB) $(IOSDEVLIB)
+ lipo -arch i386 $(IOSSIMLIB) -arch armv7 $(IOSDEVLIB) -create -output $(IOSLIB)
+
+$(IOSSIMLIB) :
+ $(MAKE) clean -C src $(MAKE_FLAGS)
+ $(MAKE) -C src $(MAKE_FLAGS) CC="$(IOSSIMCC)" LL="$(IOSSIMCC)"
+ cp $(LIB) $(IOSSIMLIB)
+
+$(IOSDEVLIB) :
+ $(MAKE) clean -C src $(MAKE_FLAGS)
+ $(MAKE) -C src $(MAKE_FLAGS) CC="$(IOSDEVCC)" LL="$(IOSDEVCC)"
+ cp $(LIB) $(IOSDEVLIB)
+
+# executable constructed using linker from object files
+$(EXE) : $(OBJM) $(LIB)
+ $(LL) -o $@ $(OBJM) $(LIBS)
+
+-include $(OBJS:.o=.d)
+
+# src compile
+$(LIB) : $(EXTS) $(OBJS) $(OBJY)
+ $(MAKE) -C src $(MAKE_FLAGS)
+
+# mruby interpreter compile
+$(MRB) : $(EXTS) $(OBJS) $(OBJY)
+ $(MAKE) -C tools/mruby $(MAKE_FLAGS)
+
+# objects compiled from source
+$(OBJS) :
+ $(MAKE) -C src $(MAKE_FLAGS) && $(MAKE) -C tools/mruby $(MAKE_FLAGS)
+
+# extend libraries complile
+$(EXTS) : $(EXTRB)
+ $(MAKE) -C mrblib $(MAKE_FLAGS)
+
+# test module compile
+$(OBJM) : $(MSRC)
+ $(CC) $(ALL_CFLAGS) -MMD $(INCLUDES) -c $(MSRC) -o $(OBJM)
+
+# clean up
+.PHONY : clean
+clean :
+ $(MAKE) clean -C src $(MAKE_FLAGS)
+ $(MAKE) clean -C tools/mruby $(MAKE_FLAGS)
+ -rm -f $(EXE) $(OBJM)
+ -rm -f $(OBJM:.o=.d)
+ -rm -f $(IOSLIB) $(IOSSIMLIB) $(IOSDEVLIB)
+ @echo "make: removing targets, objects and depend files of `pwd`"
diff --git a/README.md b/README.md
deleted file mode 100644
index bc094bb1b..000000000
--- a/README.md
+++ /dev/null
@@ -1,4 +0,0 @@
-mruby
-=====
-
-Lightweight Ruby \ No newline at end of file
diff --git a/Todo.txt b/Todo.txt
new file mode 100644
index 000000000..a14358341
--- /dev/null
+++ b/Todo.txt
@@ -0,0 +1,14 @@
+やること(まだできてないこと) / not yet complete
+
+* ヒアドキュメント / here document
+* 特殊変数 ($1,$2..) / special variables
+* super in aliased methods
+* BEGIN/END (対応しないんだっけ?)
+* const_missing
+* respond_to_missing
+
+改善すること(できているが直すこと)
+
+* Hash (サイズを減らす。khashを使うか、順序を保存するか)
+* stringEx (encoding削除、CODERANGE削除、UTF-8 or ASCII以外削除)
+* 気づいたら書き加える
diff --git a/bin/.gitkeep b/bin/.gitkeep
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/bin/.gitkeep
diff --git a/doc/.gitkeep b/doc/.gitkeep
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/doc/.gitkeep
diff --git a/ext/.gitkeep b/ext/.gitkeep
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/ext/.gitkeep
diff --git a/include/mrbconf.h b/include/mrbconf.h
new file mode 100644
index 000000000..d4802a5e7
--- /dev/null
+++ b/include/mrbconf.h
@@ -0,0 +1,42 @@
+#ifndef MRUBYCONF_H
+#define MRUBYCONF_H
+
+#include <stdint.h>
+typedef double mrb_float;
+typedef int32_t mrb_int;
+typedef intptr_t mrb_sym;
+
+#define readint(p,base) strtol((p),NULL,(base))
+#define readfloat(p) strtod((p),NULL)
+
+#undef INCLUDE_ENCODING /* not use encoding classes (ascii only) */
+#define INCLUDE_ENCODING /* use UTF-8 encoding classes */
+
+#undef INCLUDE_REGEXP /* not use regular expression classes */
+#define INCLUDE_REGEXP /* use regular expression classes */
+
+#ifdef INCLUDE_REGEXP
+# define INCLUDE_ENCODING /* Regexp depends Encoding */
+#endif
+
+#undef HAVE_UNISTD_H /* WINDOWS */
+#define HAVE_UNISTD_H /* LINUX */
+
+#define SIZEOF_INT 4
+#define SIZEOF_SHORT 2
+#define SIZEOF_LONG 4
+#define SIZEOF_LONG_LONG 8
+#define SIZEOF___INT64 0
+#define SIZEOF_VOIDP 4
+#define SIZEOF_FLOAT 4
+#define SIZEOF_DOUBLE 8
+
+#ifndef FALSE
+# define FALSE 0
+#endif
+
+#ifndef TRUE
+# define TRUE 1
+#endif
+
+#endif /* MRUBYCONF_H */
diff --git a/include/mruby.h b/include/mruby.h
new file mode 100644
index 000000000..506158d3c
--- /dev/null
+++ b/include/mruby.h
@@ -0,0 +1,623 @@
+#ifndef MRUBY_H
+#define MRUBY_H
+
+#include <stdlib.h>
+#include "mrbconf.h"
+
+enum mrb_vtype {
+ MRB_TT_FALSE = 0, /* 0 */
+ MRB_TT_FREE, /* 1 */
+ MRB_TT_TRUE, /* 2 */
+ MRB_TT_FIXNUM, /* 3 */
+ MRB_TT_SYMBOL, /* 4 */
+ MRB_TT_UNDEF, /* 5 */
+ MRB_TT_FLOAT, /* 6 */
+ MRB_TT_OBJECT, /* 7 */
+ MRB_TT_CLASS, /* 8 */
+ MRB_TT_MODULE, /* 9 */
+ MRB_TT_ICLASS, /* 10 */
+ MRB_TT_SCLASS, /* 11 */
+ MRB_TT_PROC, /* 12 */
+ MRB_TT_ARRAY, /* 13 */
+ MRB_TT_HASH, /* 14 */
+ MRB_TT_STRING, /* 15 */
+ MRB_TT_RANGE, /* 16 */
+ MRB_TT_REGEX, /* 17 */
+ MRB_TT_STRUCT, /* 18 */
+ MRB_TT_EXCEPTION, /* 19 */
+ MRB_TT_MATCH, /* 20 */
+ MRB_TT_FILE, /* 21 */
+ MRB_TT_ENV, /* 22 */
+ MRB_TT_DATA, /* 23 */
+ MRB_TT_THREAD, /* 24 */
+ MRB_TT_THREADGRP, /* 25 */
+ MRB_TT_MAXDEFINE /* 26 */
+};
+
+typedef struct mrb_value {
+ union {
+ mrb_float f;
+ void *p;
+ mrb_int i;
+ mrb_sym sym;
+ } value;
+ enum mrb_vtype tt:8;
+} mrb_value;
+
+#define mrb_type(o) (o).tt
+#define mrb_nil_p(o) ((o).tt == MRB_TT_FALSE && !(o).value.i)
+#define mrb_test(o) ((o).tt != MRB_TT_FALSE)
+#define mrb_fixnum(o) (o).value.i
+#define mrb_float(o) (o).value.f
+#define mrb_symbol(o) (o).value.sym
+#define mrb_object(o) (o).value.p
+#define FIXNUM_P(o) ((o).tt == MRB_TT_FIXNUM)
+#define UNDEF_P(o) ((o).tt == MRB_TT_UNDEF)
+
+#include "mruby/object.h"
+
+#define IMMEDIATE_P(x) ((mrb_type(x) >= MRB_TT_FALSE) && (mrb_type(x) <= MRB_TT_FLOAT))
+#define SPECIAL_CONST_P(x) IMMEDIATE_P(x)
+#define SYMBOL_P(o) (mrb_type(o) == MRB_TT_SYMBOL)
+#define RTEST(o) mrb_test(o)
+
+#define FL_ABLE(x) (!SPECIAL_CONST_P(x))
+#define FL_TEST(x,f) (FL_ABLE(x)?(RBASIC(x)->flags&(f)):0)
+#define FL_ANY(x,f) FL_TEST(x,f)
+#define FL_ALL(x,f) (FL_TEST(x,f) == (f))
+#define FL_SET(x,f) do {if (FL_ABLE(x)) RBASIC(x)->flags |= (f);} while (0)
+#define FL_UNSET(x,f) do {if (FL_ABLE(x)) RBASIC(x)->flags &= ~(f);} while (0)
+
+static inline mrb_int
+mrb_special_const_p(mrb_value obj)
+{
+ if (SPECIAL_CONST_P(obj)) return 1;
+ return 0;
+}
+static inline mrb_value
+mrb_fixnum_value(mrb_int i)
+{
+ mrb_value v;
+
+ v.tt = MRB_TT_FIXNUM;
+ v.value.i = i;
+ return v;
+}
+
+static inline mrb_value
+mrb_float_value(mrb_float f)
+{
+ mrb_value v;
+
+ v.tt = MRB_TT_FLOAT;
+ v.value.f = f;
+ return v;
+}
+
+static inline mrb_value
+mrb_symbol_value(mrb_sym i)
+{
+ mrb_value v;
+
+ v.tt = MRB_TT_SYMBOL;
+ v.value.sym = i;
+ return v;
+}
+
+static inline mrb_value
+mrb_obj_value(void *p)
+{
+ mrb_value v;
+ struct RBasic *b = p;
+
+ v.tt = b->tt;
+ v.value.p = p;
+ return v;
+}
+
+static inline mrb_value
+mrb_false_value()
+{
+ mrb_value v;
+
+ v.tt = MRB_TT_FALSE;
+ v.value.i = 1;
+ return v;
+}
+
+static inline mrb_value
+mrb_nil_value()
+{
+ mrb_value v;
+
+ v.tt = MRB_TT_FALSE;
+ v.value.p = 0;
+ return v;
+}
+
+static inline mrb_value
+mrb_true_value()
+{
+ mrb_value v;
+
+ v.tt = MRB_TT_TRUE;
+ v.value.i = 1;
+ return v;
+}
+
+static inline mrb_value
+mrb_undef_value()
+{
+ mrb_value v;
+
+ v.tt = MRB_TT_UNDEF;
+ v.value.i = 0;
+ return v;
+}
+
+typedef int32_t mrb_code;
+
+struct mrb_state;
+
+typedef void* (*mrb_allocf) (struct mrb_state *mrb, void*, size_t);
+
+#define MRB_ARENA_SIZE 1024 //256 up kusuda 2011/04/30
+#define ruby_debug (mrb_nil_value())
+#define ruby_verbose (mrb_nil_value())
+
+typedef struct {
+ mrb_sym mid;
+ struct RProc *proc;
+ int stackidx;
+ int nregs;
+ int argc;
+ mrb_code *pc;
+ int acc;
+ struct RClass *target_class;
+ int ridx;
+ int eidx;
+ struct REnv *env;
+} mrb_callinfo;
+
+enum gc_state {
+ GC_STATE_NONE = 0,
+ GC_STATE_MARK,
+ GC_STATE_SWEEP
+};
+
+typedef struct mrb_state {
+ void *jmp;
+
+ mrb_allocf allocf;
+
+ mrb_value *stack;
+ mrb_value *stbase, *stend;
+
+ mrb_callinfo *ci;
+ mrb_callinfo *cibase, *ciend;
+
+ mrb_code **rescue;
+ int rsize;
+ struct RProc **ensure;
+ int esize;
+
+ struct RObject *exc;
+ struct kh_iv *globals;
+
+ struct mrb_irep **irep;
+ size_t irep_len, irep_capa;
+
+ struct RClass *object_class;
+ struct RClass *class_class;
+ struct RClass *module_class;
+ struct RClass *proc_class;
+ struct RClass *string_class;
+ struct RClass *array_class;
+ struct RClass *hash_class;
+ struct RClass *range_class;
+#ifdef INCLUDE_REGEXP
+ struct RClass *regex_class;
+ struct RClass *match_class;
+#endif
+#ifdef INCLUDE_ENCODING
+ struct RClass *encode_class;
+ struct RClass *converter_class;
+#endif
+
+ struct RClass *float_class;
+ struct RClass *fixnum_class;
+ struct RClass *true_class;
+ struct RClass *false_class;
+ struct RClass *nil_class;
+ struct RClass *symbol_class;
+
+ struct RClass *kernel_module;
+ struct heap_page *heaps;
+ struct heap_page *sweeps;
+ struct heap_page *free_heaps;
+ size_t live; /* count of live objects */
+ struct RBasic *arena[MRB_ARENA_SIZE];
+ int arena_idx;
+
+ enum gc_state gc_state; /* state of gc */
+ int current_white_part; /* make white object by white_part */
+ struct RBasic *gray_list; /* list of gray objects */
+ struct RBasic *variable_gray_list; /* list of objects to be traversed atomically */
+ size_t gc_live_after_mark;
+ size_t gc_threshold;
+ mrb_int gc_interval_ratio;
+ mrb_int gc_step_ratio;
+
+ mrb_sym symidx;
+ struct kh_n2s *name2sym; /* symbol table */
+ struct kh_s2n *sym2name; /* reverse symbol table */
+ struct RNode *local_svar;/* regexp */
+
+ struct RClass *eException_class;
+ struct RClass *eStandardError_class;
+ struct RClass *eRuntimeError_class;
+} mrb_state;
+
+typedef mrb_value (*mrb_func_t)(mrb_state *mrb, mrb_value);
+typedef mrb_value (*mrb_funcargv_t)(mrb_state *mrb, mrb_value, int argc, mrb_value* argv);
+struct RClass *mrb_define_class(mrb_state *, const char*, struct RClass*);
+struct RClass *mrb_define_module(mrb_state *, const char*);
+mrb_value mrb_singleton_class(mrb_state*, mrb_value);
+void mrb_include_module(mrb_state*, struct RClass*, struct RClass*);
+
+void mrb_define_method(mrb_state*, struct RClass*, const char*, mrb_func_t,int);
+void mrb_define_class_method(mrb_state *, struct RClass *, const char *, mrb_func_t, int);
+void mrb_define_singleton_method(mrb_state*, void*, const char*, mrb_func_t,int);
+void mrb_define_const(mrb_state*, struct RClass*, const char *name, mrb_value);
+mrb_value mrb_instance_new(mrb_state *mrb, mrb_value cv);
+struct RClass * mrb_class_new(mrb_state *mrb, struct RClass *super);
+struct RClass * mrb_module_new(mrb_state *mrb);
+struct RClass * mrb_class_from_sym(mrb_state *mrb, struct RClass *klass, mrb_sym name);
+struct RClass * mrb_class_get(mrb_state *mrb, char *name);
+struct RClass * mrb_class_obj_get(mrb_state *mrb, char *name);
+
+mrb_value mrb_obj_dup(mrb_state *mrb, mrb_value obj);
+mrb_value mrb_check_to_integer(mrb_state *mrb, mrb_value val, const char *method);
+int mrb_obj_respond_to(struct RClass* c, mrb_sym mid);
+struct RClass * mrb_define_class_under(mrb_state *mrb, struct RClass *outer, const char *name, struct RClass *super);
+struct RClass * mrb_define_module_under(mrb_state *mrb, struct RClass *outer, const char *name);
+
+/* required arguments */
+#define ARGS_REQ(n) (((n)&0x1f) << 19)
+/* optional arguments */
+#define ARGS_OPT(n) (((n)&0x1f) << 14)
+/* rest argument */
+#define ARGS_REST() (1 << 13)
+/* required arguments after rest */
+#define ARGS_POST(n) (((n)&0x1f) << 8)
+/* keyword arguments (n of keys, kdict) */
+#define ARGS_KEY(n1,n2) ((((n1)&0x1f) << 3) | ((n2)?(1<<2):0))
+/* block argument */
+#define ARGS_BLOCK() (1 << 1)
+
+/* accept any number of arguments */
+#define ARGS_ANY() ARGS_REST()
+/* accept no arguments */
+#define ARGS_NONE() 0
+
+int mrb_get_args(mrb_state *mrb, const char *format, ...);
+
+mrb_value mrb_funcall(mrb_state*, mrb_value, const char*, int,...);
+mrb_value mrb_funcall_argv(mrb_state*, mrb_value, const char*, int, mrb_value*);
+mrb_value mrb_funcall_with_block(mrb_state*, mrb_value, const char*, int, mrb_value*, struct RProc*);
+mrb_sym mrb_intern(mrb_state*,const char*);
+const char *mrb_sym2name(mrb_state*,mrb_sym);
+mrb_value mrb_str_format(mrb_state *, int, const mrb_value *, mrb_value);
+
+void *mrb_malloc(mrb_state*, size_t);
+void *mrb_calloc(mrb_state*, size_t, size_t);
+void *mrb_realloc(mrb_state*, void*, size_t);
+void *mrb_obj_alloc(mrb_state*, enum mrb_vtype, struct RClass*);
+void *mrb_free(mrb_state*, void*);
+
+mrb_value mrb_str_new_cstr(mrb_state*, const char*);
+
+mrb_state* mrb_open(void);
+mrb_state* mrb_open_allocf(mrb_allocf);
+void mrb_close(mrb_state*);
+int mrb_checkstack(mrb_state*,int);
+
+mrb_value mrb_run(mrb_state*, struct RProc*, mrb_value);
+
+mrb_value mrb_p(mrb_state*, mrb_value);
+int mrb_obj_id(mrb_value obj);
+mrb_sym mrb_to_id(mrb_state *mrb, mrb_value name);
+
+int mrb_obj_equal(mrb_state*, mrb_value, mrb_value);
+int mrb_equal(mrb_state *mrb, mrb_value obj1, mrb_value obj2);
+mrb_value mrb_Integer(mrb_state *mrb, mrb_value val);
+mrb_value mrb_Float(mrb_state *mrb, mrb_value val);
+mrb_value mrb_inspect(mrb_state *mrb, mrb_value obj);
+int mrb_eql(mrb_state *mrb, mrb_value obj1, mrb_value obj2);
+
+void mrb_garbage_collect(mrb_state*);
+void mrb_incremental_gc(mrb_state *);
+int mrb_gc_arena_save(mrb_state*);
+void mrb_gc_arena_restore(mrb_state*,int);
+void mrb_gc_mark(mrb_state*,struct RBasic*);
+#define mrb_gc_mark_value(mrb,val) do {\
+ if ((val).tt >= MRB_TT_OBJECT) mrb_gc_mark((mrb), mrb_object(val));\
+} while (0);
+void mrb_gc_mark_gv(mrb_state*);
+void mrb_gc_free_gv(mrb_state*);
+void mrb_gc_mark_iv(mrb_state*, struct RObject*);
+size_t mrb_gc_mark_iv_size(mrb_state*, struct RObject*);
+void mrb_gc_free_iv(mrb_state*, struct RObject*);
+void mrb_gc_mark_mt(mrb_state*, struct RClass*);
+size_t mrb_gc_mark_mt_size(mrb_state*, struct RClass*);
+void mrb_gc_free_mt(mrb_state*, struct RClass*);
+void mrb_gc_mark_ht(mrb_state*, struct RClass*);
+size_t mrb_gc_mark_ht_size(mrb_state*, struct RClass*);
+void mrb_gc_free_ht(mrb_state*, struct RClass*);
+void mrb_field_write_barrier(mrb_state *, struct RBasic*, struct RBasic*);
+#define mrb_field_write_barrier_value(mrb, obj, val) do{\
+ if ((val.tt >= MRB_TT_OBJECT)) mrb_field_write_barrier((mrb), (obj), mrb_object(val));\
+} while (0);
+void mrb_write_barrier(mrb_state *, struct RBasic*);
+
+#define MRUBY_VERSION "Rite"
+
+#if 0
+#define DEBUG(x) x
+#else
+#define DEBUG(x)
+#endif
+
+mrb_value mrb_check_convert_type(mrb_state *mrb, mrb_value val, mrb_int type, const char *tname, const char *method);
+mrb_value mrb_any_to_s(mrb_state *mrb, mrb_value obj);
+const char * mrb_obj_classname(mrb_state *mrb, mrb_value obj);
+struct RClass* mrb_obj_class(mrb_state *mrb, mrb_value obj);
+mrb_value mrb_class_path(mrb_state *mrb, struct RClass *c);
+mrb_value mrb_convert_type(mrb_state *mrb, mrb_value val, mrb_int type, const char *tname, const char *method);
+mrb_int mrb_obj_is_kind_of(mrb_state *mrb, mrb_value obj, struct RClass *c);
+mrb_value mrb_obj_inspect(mrb_state *mrb, mrb_value self);
+mrb_value mrb_obj_clone(mrb_state *mrb, mrb_value self);
+mrb_value mrb_check_funcall(mrb_state *mrb, mrb_value recv, mrb_sym mid, int argc, mrb_value *argv);
+
+/* need to include <ctype.h> to use these macros */
+#ifndef ISPRINT
+//#define ISASCII(c) isascii((int)(unsigned char)(c))
+#define ISASCII(c) 1
+#undef ISPRINT
+#define ISPRINT(c) (ISASCII(c) && isprint((int)(unsigned char)(c)))
+#define ISSPACE(c) (ISASCII(c) && isspace((int)(unsigned char)(c)))
+#define ISUPPER(c) (ISASCII(c) && isupper((int)(unsigned char)(c)))
+#define ISLOWER(c) (ISASCII(c) && islower((int)(unsigned char)(c)))
+#define ISALNUM(c) (ISASCII(c) && isalnum((int)(unsigned char)(c)))
+#define ISALPHA(c) (ISASCII(c) && isalpha((int)(unsigned char)(c)))
+#define ISDIGIT(c) (ISASCII(c) && isdigit((int)(unsigned char)(c)))
+#define ISXDIGIT(c) (ISASCII(c) && isxdigit((int)(unsigned char)(c)))
+#endif
+
+extern mrb_value mrb_rs;
+extern mrb_value mrb_default_rs;
+
+int mrb_block_given_p(void);
+void mrb_raise(mrb_state *mrb, struct RClass *c, const char *fmt, ...);
+void rb_raise(struct RClass *c, const char *fmt, ...);
+void mrb_warn(const char *fmt, ...);
+void mrb_warning(const char *fmt, ...);
+void mrb_bug(const char *fmt, ...);
+
+#define E_TYPE_ERROR (mrb_class_obj_get(mrb, "TypeError"))
+#define E_ARGUMENT_ERROR (mrb_class_obj_get(mrb, "ArgumentError"))
+#define E_INDEX_ERROR (mrb_class_obj_get(mrb, "IndexError"))
+#define E_RANGE_ERROR (mrb_class_obj_get(mrb, "RangeError"))
+#define E_NAME_ERROR (mrb_class_obj_get(mrb, "NameError"))
+#define E_NOMETHOD_ERROR (mrb_class_obj_get(mrb, "NoMethodError"))
+#define E_SCRIPT_ERROR (mrb_class_obj_get(mrb, "ScriptError"))
+#define E_SYNTAX_ERROR (mrb_class_obj_get(mrb, "SyntaxError"))
+#define E_LOAD_ERROR (mrb_class_obj_get(mrb, "LoadError"))
+#define E_SYSTEMCALL_ERROR (mrb_class_obj_get(mrb, "SystemCallError"))
+#define E_LOCALJUMP_ERROR (mrb_class_obj_get(mrb, "LocalJumpError"))
+#define E_REGEXP_ERROR (mrb_class_obj_get(mrb, "RegexpError"))
+#define E_ZERODIVISION_ERROR (mrb_class_obj_get(mrb, "ZeroDivisionError"))
+
+#define E_ENCODING_ERROR (mrb_class_obj_get(mrb, "EncodingError"))
+#define E_NOTIMP_ERROR (mrb_class_obj_get(mrb, "NotImplementedError"))
+#define E_FLOATDOMAIN_ERROR (mrb_class_obj_get(mrb, "FloatDomainError"))
+
+#define E_KEY_ERROR (mrb_class_obj_get(mrb, "KeyError"))
+
+#define SYM2ID(x) ((x).value.sym)
+
+#define CONST_ID_CACHE(mrb, result, str) \
+ { \
+ static mrb_sym mrb_intern_id_cache;\
+ if (!mrb_intern_id_cache) \
+ mrb_intern_id_cache = mrb_intern(mrb, str); \
+ result mrb_intern_id_cache; \
+ }
+#define CONST_ID(mrb, var, str) \
+ do CONST_ID_CACHE(mrb, var =, str) while (0)
+
+#define NUM2CHR_internal(x) (((mrb_type(x) == MRB_TT_STRING)&&(RSTRING_LEN(x)>=1))?\
+ RSTRING_PTR(x)[0]:(char)(mrb_fixnum_number(x)&0xff))
+#ifdef __GNUC__
+# define NUM2CHR(x) __extension__ ({mrb_value num2chr_x = (x); NUM2CHR_internal(num2chr_x);})
+#else
+static inline char
+NUM2CHR(mrb_value x)
+{
+ return NUM2CHR_internal(x);
+}
+#endif
+mrb_value mrb_io_gets(mrb_state *mrb, mrb_value);
+mrb_value mrb_io_getbyte(mrb_state *mrb, mrb_value);
+mrb_value mrb_io_ungetc(mrb_state *, mrb_value, mrb_value);
+mrb_value mrb_io_ungetbyte(mrb_state *mrb, mrb_value, mrb_value);
+mrb_value mrb_io_close(mrb_state *mrb, mrb_value);
+mrb_value mrb_io_flush(mrb_state *mrb, mrb_value);
+mrb_value mrb_io_eof(mrb_value);
+mrb_value mrb_io_binmode(mrb_state *mrb, mrb_value);
+mrb_value mrb_io_ascii8bit_binmode(mrb_value);
+mrb_value mrb_io_addstr(mrb_state *mrb, mrb_value, mrb_value);
+mrb_value mrb_io_printf(mrb_state *mrb, int, mrb_value*, mrb_value);
+mrb_value mrb_io_print(mrb_state *mrb, int, mrb_value*, mrb_value);
+mrb_value mrb_io_puts(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value);
+mrb_value mrb_io_fdopen(int, int, const char*);
+mrb_value mrb_io_get_io(mrb_state *mrb, mrb_value);
+mrb_value mrb_file_open(mrb_state *mrb, const char*, const char*);
+mrb_value mrb_file_open_str(mrb_value, const char*);
+mrb_value mrb_gets(mrb_state *mrb);
+
+mrb_value mrb_yield(mrb_state *mrb, mrb_value v, mrb_value blk);
+mrb_value mrb_yield_argv(mrb_state *mrb, mrb_value b, int argc, mrb_value *argv);
+mrb_value mrb_yield_with_self(mrb_state *mrb, mrb_value b, int argc, mrb_value *argv, mrb_value self);
+mrb_value mrb_class_new_instance(mrb_state *mrb, int, mrb_value*, struct RClass *);
+mrb_value mrb_class_new_instance_m(mrb_state *mrb, mrb_value klass);
+
+mrb_value mrb_exec_recursive(mrb_state *mrb, mrb_value(*)(mrb_state *, mrb_value, mrb_value, int),mrb_value,void *);
+
+#ifndef xmalloc
+#define xmalloc malloc
+#define xrealloc realloc
+#define xcalloc calloc
+#define xfree free
+#endif
+
+void mrb_gc(void);
+#define thread_debug if(0)printf
+
+#define RUBY_VM 1 /* YARV */
+#define HAVE_NATIVETHREAD
+int ruby_native_thread_p(void);
+
+#define RUBY_EVENT_NONE 0x0000
+#define RUBY_EVENT_LINE 0x0001
+#define RUBY_EVENT_CLASS 0x0002
+#define RUBY_EVENT_END 0x0004
+#define RUBY_EVENT_CALL 0x0008
+#define RUBY_EVENT_RETURN 0x0010
+#define RUBY_EVENT_C_CALL 0x0020
+#define RUBY_EVENT_C_RETURN 0x0040
+#define RUBY_EVENT_RAISE 0x0080
+#define RUBY_EVENT_ALL 0xffff
+#define RUBY_EVENT_VM 0x10000
+#define RUBY_EVENT_SWITCH 0x20000
+#define RUBY_EVENT_COVERAGE 0x40000
+
+typedef unsigned int mrb_event_flag_t;
+typedef void (*mrb_event_hook_func_t)(mrb_state *mrb, mrb_event_flag_t, mrb_value data, mrb_value, mrb_sym, mrb_value klass);
+
+typedef struct mrb_event_hook_struct {
+ mrb_event_flag_t flag;
+ mrb_event_hook_func_t func;
+ mrb_value data;
+ struct mrb_event_hook_struct *next;
+} mrb_event_hook_t;
+
+#define RB_EVENT_HOOKS_HAVE_CALLBACK_DATA 1
+void mrb_add_event_hook(mrb_state *mrb, mrb_event_hook_func_t func, mrb_event_flag_t events,
+ mrb_value data);
+int mrb_remove_event_hook(mrb_event_hook_func_t func);
+mrb_value mrb_to_int(mrb_state *mrb, mrb_value val);
+void mrb_check_type(mrb_state *mrb, mrb_value x, enum mrb_vtype t);
+
+//#define RUBY_SETJMP(env) ${setjmp_prefix}setjmp(env${setjmp_sigmask+,0})
+//#define RUBY_LONGJMP(env,val) ${setjmp_prefix}longjmp(env,val)
+//#define RUBY_JMP_BUF ${setjmp_sigmask+${setjmp_prefix}}jmp_buf
+#define RUBY_SETJMP(env) __builtin_setjmp(env)
+#define RUBY_LONGJMP(env,val) __builtin_longjmp(env,1)//(env,val)
+//#define RUBY_JMP_BUF ${setjmp_sigmask+${setjmp_prefix}}jmp_buf
+#define select(n, r, w, e, t) select_large_fdset(n, r, w, e, t)
+
+//int RUBY_SETJMP(mrb_jmpbuf_t env); /* add kusuda */
+#define ruby_setjmp(env) RUBY_SETJMP(env)
+#define ruby_longjmp(env,val) RUBY_LONGJMP(env,val)
+
+#if defined PRIdPTR && !defined PRI_VALUE_PREFIX
+#define PRIdVALUE PRIdPTR
+#define PRIiVALUE PRIiPTR
+#define PRIoVALUE PRIoPTR
+#define PRIuVALUE PRIuPTR
+#define PRIxVALUE PRIxPTR
+#define PRIXVALUE PRIXPTR
+#else
+#define PRIdVALUE PRI_VALUE_PREFIX"d"
+#define PRIiVALUE PRI_VALUE_PREFIX"i"
+#define PRIoVALUE PRI_VALUE_PREFIX"o"
+#define PRIuVALUE PRI_VALUE_PREFIX"u"
+#define PRIxVALUE PRI_VALUE_PREFIX"x"
+#define PRIXVALUE PRI_VALUE_PREFIX"X"
+#endif
+#ifndef PRI_VALUE_PREFIX
+# define PRI_VALUE_PREFIX ""
+#endif
+
+#if defined PRIdPTR
+# define PRI_PTRDIFF_PREFIX "t"
+#elif SIZEOF_PTRDIFF_T == SIZEOF_INT
+# define PRI_PTRDIFF_PREFIX
+#elif SIZEOF_PTRDIFF_T == SIZEOF_LONG
+# define PRI_PTRDIFF_PREFIX "l"
+#elif SIZEOF_PTRDIFF_T == SIZEOF_LONG_LONG
+# define PRI_PTRDIFF_PREFIX "ll"
+#else
+# define PRI_PTRDIFF_PREFIX
+#endif
+#define PRIdPTRDIFF PRI_PTRDIFF_PREFIX"d"
+#define PRIiPTRDIFF PRI_PTRDIFF_PREFIX"i"
+#define PRIoPTRDIFF PRI_PTRDIFF_PREFIX"o"
+#define PRIuPTRDIFF PRI_PTRDIFF_PREFIX"u"
+#define PRIxPTRDIFF PRI_PTRDIFF_PREFIX"x"
+#define PRIXPTRDIFF PRI_PTRDIFF_PREFIX"X"
+
+#if defined PRIdPTR
+# define PRI_SIZE_PREFIX "z"
+#elif SIZEOF_SIZE_T == SIZEOF_INT
+# define PRI_SIZE_PREFIX
+#elif SIZEOF_SIZE_T == SIZEOF_LONG
+# define PRI_SIZE_PREFIX "l"
+#elif SIZEOF_SIZE_T == SIZEOF_LONG_LONG
+# define PRI_SIZE_PREFIX "ll"
+#endif
+#define PRIdSIZE PRI_SIZE_PREFIX"d"
+#define PRIiSIZE PRI_SIZE_PREFIX"i"
+#define PRIoSIZE PRI_SIZE_PREFIX"o"
+#define PRIuSIZE PRI_SIZE_PREFIX"u"
+#define PRIxSIZE PRI_SIZE_PREFIX"x"
+#define PRIXSIZE PRI_SIZE_PREFIX"X"
+#define PRIdPTRDIFF PRI_PTRDIFF_PREFIX"d"
+
+#define KHASH 0
+#define STHASH 1
+#define BASICHASH 2
+#define HASH_CLASS_METHOD BASICHASH
+
+typedef enum call_type {
+ CALL_PUBLIC,
+ CALL_FCALL,
+ CALL_VCALL,
+ CALL_TYPE_MAX
+} call_type;
+
+/* compar.c */
+void mrb_cmperr(mrb_state *mrb, mrb_value x, mrb_value y);
+int mrb_cmpint(mrb_state *mrb, mrb_value val, mrb_value a, mrb_value b);
+
+#define REALLOC_N(mrb,var,type,n) (var)=(type*)mrb_realloc(mrb, (char*)(var), sizeof(type)*(n))
+
+#ifndef ANYARGS
+# ifdef __cplusplus
+# define ANYARGS ...
+# else
+# define ANYARGS
+# endif
+#endif
+void st_foreach_safe(mrb_state *mrb, void *table, int (*func)(ANYARGS), void * a);
+void mrb_define_alias(mrb_state *mrb, struct RClass *klass, const char *name1, const char *name2);
+const char *mrb_class_name(mrb_state *mrb, struct RClass* klass);
+void mrb_define_global_const(mrb_state *mrb, const char *name, mrb_value val);
+
+mrb_value mrb_block_proc(void);
+int mrb_sourceline(void);
+void ruby_default_signal(int sig);
+mrb_value mrb_attr_get(mrb_state *mrb, mrb_value obj, mrb_sym id);
+
+#endif /* MRUBY_H */
diff --git a/include/mruby/array.h b/include/mruby/array.h
new file mode 100644
index 000000000..6f382ef6a
--- /dev/null
+++ b/include/mruby/array.h
@@ -0,0 +1,42 @@
+#ifndef MRUBY_ARRAY_H
+#define MRUBY_ARRAY_H
+
+struct RArray {
+ MRUBY_OBJECT_HEADER;
+ size_t len;
+ size_t capa;
+ mrb_value *buf;
+};
+
+#define mrb_ary_ptr(v) ((struct RArray*)((v).value.p))
+#define mrb_ary_value(p) mrb_obj_value((void*)(p))
+#define RARRAY(v) ((struct RArray*)((v).value.p))
+
+#define RARRAY_LEN(a) (RARRAY(a)->len)
+#define RARRAY_PTR(a) (RARRAY(a)->buf)
+
+mrb_value mrb_ary_new_capa(mrb_state*, size_t);
+mrb_value mrb_ary_new(mrb_state *mrb);
+mrb_value mrb_ary_new_elts(mrb_state *mrb, long n, const mrb_value *elts);
+void mrb_ary_concat(mrb_state*, mrb_value, mrb_value);
+mrb_value mrb_ary_splat(mrb_state*, mrb_value);
+void mrb_ary_push(mrb_state*, mrb_value, mrb_value);
+mrb_value mrb_ary_pop(mrb_state *mrb, mrb_value ary);
+mrb_value mrb_ary_new_from_values(mrb_state *mrb, mrb_value *vals, size_t size);
+mrb_value mrb_ary_aget(mrb_state *mrb, mrb_value self);
+mrb_value mrb_ary_ref(mrb_state *mrb, mrb_value ary, mrb_int n);
+void mrb_ary_set(mrb_state *mrb, mrb_value ary, mrb_int n, mrb_value val);
+int mrb_ary_len(mrb_state *mrb, mrb_value ary);
+mrb_value mrb_ary_replace_m(mrb_state *mrb, mrb_value self);
+void mrb_ary_replace(mrb_state *mrb, struct RArray *a, mrb_value *argv, size_t len);
+mrb_value mrb_check_array_type(mrb_state *mrb, mrb_value self);
+mrb_value mrb_ary_unshift(mrb_state *mrb, mrb_value self, mrb_value item);
+mrb_value mrb_ary_new4(mrb_state *mrb, long n, const mrb_value *elts);
+mrb_value mrb_assoc_new(mrb_state *mrb, mrb_value car, mrb_value cdr);
+mrb_value mrb_ary_entry(mrb_value ary, long offset);
+void mrb_mem_clear(mrb_value *mem, long size);
+mrb_value mrb_ary_tmp_new(mrb_state *mrb, long capa);
+mrb_value mrb_ary_sort(mrb_state *mrb, mrb_value ary);
+mrb_value mrb_ary_shift(mrb_state *mrb, mrb_value self);
+
+#endif /* MRUBY_ARRAY_H */
diff --git a/include/mruby/class.h b/include/mruby/class.h
new file mode 100644
index 000000000..470f517c3
--- /dev/null
+++ b/include/mruby/class.h
@@ -0,0 +1,71 @@
+#ifndef MRUBY_CLASS_H
+#define MRUBY_CLASS_H
+
+struct RClass {
+ MRUBY_OBJECT_HEADER;
+ struct kh_iv *iv;
+ struct kh_mt *mt;
+ struct RClass *super;
+};
+
+#define mrb_class_ptr(v) ((struct RClass*)((v).value.p))
+#define RCLASS_SUPER(v) (((struct RClass*)((v).value.p))->super)
+#define RCLASS_IV_TBL(v) (((struct RClass*)((v).value.p))->iv)
+#define RCLASS_M_TBL(v) (((struct RClass*)((v).value.p))->mt)
+
+static inline struct RClass*
+mrb_class(mrb_state *mrb, mrb_value v)
+{
+ switch (mrb_type(v)) {
+ case MRB_TT_FALSE:
+ if (v.value.p)
+ return mrb->false_class;
+ return mrb->nil_class;
+ case MRB_TT_TRUE:
+ return mrb->true_class;
+ case MRB_TT_SYMBOL:
+ return mrb->symbol_class;
+ case MRB_TT_FIXNUM:
+ return mrb->fixnum_class;
+ case MRB_TT_FLOAT:
+ return mrb->float_class;
+
+#ifdef INCLUDE_REGEXP
+// case MRB_TT_REGEX:
+// return mrb->regex_class;
+// case MRB_TT_MATCH:
+// return mrb->match_class;
+// case MRB_TT_DATA:
+// return mrb->encode_class;
+#else
+ case MRB_TT_REGEX:
+ case MRB_TT_MATCH:
+ mrb_raise(mrb, E_TYPE_ERROR, "type mismatch: %s given",
+ mrb_obj_classname(mrb, v));
+ return mrb->nil_class; /* not reach */
+#endif
+ default:
+ return ((struct RBasic*)mrb_object(v))->c;
+ }
+}
+
+#define MRB_SET_INSTANCE_TT(c, tt) c->flags = ((c->flags & ~0xff) | (char)tt)
+#define MRB_INSTANCE_TT(c) (enum mrb_vtype)(c->flags & 0xff)
+
+struct RClass* mrb_define_class_id(mrb_state*, mrb_sym, struct RClass*);
+struct RClass* mrb_define_module_id(mrb_state*, mrb_sym);
+struct RClass *mrb_vm_define_class(mrb_state*, mrb_value, mrb_value, mrb_sym);
+struct RClass *mrb_vm_define_module(mrb_state*, mrb_value, mrb_sym);
+void mrb_define_method_vm(mrb_state*, struct RClass*, mrb_sym, mrb_value);
+void mrb_define_method_raw(mrb_state*, struct RClass*, mrb_sym, struct RProc *);
+
+struct RClass *mrb_class_outer_module(mrb_state*, struct RClass *);
+struct RProc *mrb_method_search_vm(mrb_state*, struct RClass**, mrb_sym);
+struct RProc *mrb_method_search(mrb_state*, struct RClass*, mrb_sym);
+
+int mrb_respond_to(mrb_state *mrb, mrb_value obj, mrb_sym mid);
+void mrb_define_method_id(mrb_state *mrb, struct RClass *c, mrb_sym mid, mrb_func_t func, int aspec);
+
+void mrb_obj_call_init(mrb_state *mrb, mrb_value obj, int argc, mrb_value *argv);
+
+#endif /* MRUBY_CLASS_H */
diff --git a/include/mruby/hash.h b/include/mruby/hash.h
new file mode 100644
index 000000000..b2c3d69c8
--- /dev/null
+++ b/include/mruby/hash.h
@@ -0,0 +1,59 @@
+#ifndef MRUBY_HASH_H
+#define MRUBY_HASH_H
+
+struct RHash {
+ MRUBY_OBJECT_HEADER;
+ struct kh_ht *ht;
+ mrb_value ifnone;
+};
+
+#define N 624
+#define M 397
+#define MATRIX_A 0x9908b0dfU /* constant vector a */
+#define UMASK 0x80000000U /* most significant w-r bits */
+#define LMASK 0x7fffffffU /* least significant r bits */
+#define MIXBITS(u,v) ( ((u) & UMASK) | ((v) & LMASK) )
+#define TWIST(u,v) ((MIXBITS(u,v) >> 1) ^ ((v)&1U ? MATRIX_A : 0U))
+enum {MT_MAX_STATE = N};
+
+struct MT {
+ /* assume int is enough to store 32bits */
+ unsigned int state[N]; /* the array for the state vector */
+ unsigned int *next;
+ int left;
+};
+
+#define mrb_hash_end(h) st_hash_end(h)
+#define mrb_hash_uint(h, i) st_hash_uint(h, i)
+
+#define mrb_hash_ptr(v) ((struct RHash*)((v).value.p))
+#define mrb_hash_value(p) mrb_obj_value((void*)(p))
+
+mrb_value mrb_hash_new_capa(mrb_state*, size_t);
+mrb_value mrb_hash_new(mrb_state *mrb, int capa);
+
+void mrb_hash_set(mrb_state *mrb, mrb_value hash, mrb_value key, mrb_value val);
+mrb_value mrb_hash_get(mrb_state *mrb, mrb_value hash, mrb_value key);
+mrb_value mrb_hash_getWithDef(mrb_state *mrb, mrb_value hash, mrb_value vkey, mrb_value def);
+mrb_value mrb_hash_delete_key(mrb_state *mrb, mrb_value hash, mrb_value key);
+mrb_value mrb_hash(mrb_state *mrb, mrb_value obj);
+void ruby_setenv(mrb_state *mrb, const char *name, const char *value);
+
+/* RHASH_TBL allocates st_table if not available. */
+#define RHASH(obj) ((struct RHash*)((obj).value.p))
+#define RHASH_TBL(h) mrb_hash_tbl(h)
+#define RHASH_H_TBL(h) (RHASH(h)->ht)
+#define RHASH_SIZE(h) (RHASH_H_TBL(h)->size)
+#define RHASH_EMPTY_P(h) (RHASH_SIZE(h) == 0)
+#define RHASH_IFNONE(h) (RHASH(h)->ifnone)
+#define RHASH_PROCDEFAULT(h) (RHASH(h)->ifnone)
+struct kh_ht * mrb_hash_tbl(mrb_state *mrb, mrb_value hash);
+
+#define MRB_HASH_PROC_DEFAULT 256
+#define MRB_RHASH_PROCDEFAULT_P(h) (RHASH(h)->flags & MRB_HASH_PROC_DEFAULT)
+
+char * ruby_strdup(const char *str);
+void mrb_reset_random_seed(void);
+mrb_value mrb_obj_is_proc(mrb_value proc);
+
+#endif /* MRUBY_HASH_H */
diff --git a/include/mruby/numeric.h b/include/mruby/numeric.h
new file mode 100644
index 000000000..ee559fa1d
--- /dev/null
+++ b/include/mruby/numeric.h
@@ -0,0 +1,17 @@
+#ifndef MRUBY_NUMERIC_H
+#define MRUBY_NUMERIC_H
+
+#include <limits.h>
+
+#define RSHIFT(x,y) ((x)>>(int)(y))
+#define FIXNUM_MAX (LONG_MAX>>1)
+#define FIXNUM_MIN RSHIFT((long)LONG_MIN,1)
+#define POSFIXABLE(f) ((f) < FIXNUM_MAX+1)
+#define NEGFIXABLE(f) ((f) >= FIXNUM_MIN)
+#define FIXABLE(f) (POSFIXABLE(f) && NEGFIXABLE(f))
+
+mrb_value mrb_dbl2big(mrb_state *mrb, float d);
+void mrb_num_zerodiv(mrb_state *mrb);
+mrb_value mrb_fix2str(mrb_state *mrb, mrb_value x, int base);
+
+#endif /* MRUBY_NUMERIC_H */
diff --git a/include/mruby/object.h b/include/mruby/object.h
new file mode 100644
index 000000000..151e36b96
--- /dev/null
+++ b/include/mruby/object.h
@@ -0,0 +1,46 @@
+#ifndef MRUBY_OBJECT_H
+#define MRUBY_OBJECT_H
+
+#define MRUBY_OBJECT_HEADER \
+ enum mrb_vtype tt:8;\
+ int color:3;\
+ unsigned int flags:21;\
+ struct RClass *c;\
+ struct RBasic *gcnext;
+
+
+/* white: 011, black: 100, gray: 000 */
+#define MRB_GC_GRAY 0
+#define MRB_GC_WHITE_A 1
+#define MRB_GC_WHITE_B (1 << 1)
+#define MRB_GC_BLACK (1 << 2)
+#define MRB_GC_WHITES (MRB_GC_WHITE_A | MRB_GC_WHITE_B)
+#define MRB_GC_COLOR_MASK 7
+
+#define paint_gray(o) ((o)->color = MRB_GC_GRAY)
+#define paint_black(o) ((o)->color = MRB_GC_BLACK)
+#define paint_white(o) ((o)->color = MRB_GC_WHITES)
+#define paint_partial_white(s, o) ((o)->color = (s)->current_white_part)
+#define is_gray(o) ((o)->color == MRB_GC_GRAY)
+#define is_white(o) ((o)->color & MRB_GC_WHITES)
+#define is_black(o) ((o)->color & MRB_GC_BLACK)
+#define is_dead(s, o) (((o)->color & other_white_part(s) & MRB_GC_WHITES) || (o)->tt == MRB_TT_FREE)
+#define flip_white_part(s) ((s)->current_white_part = other_white_part(s))
+#define other_white_part(s) ((s)->current_white_part ^ MRB_GC_WHITES)
+
+struct RBasic {
+ MRUBY_OBJECT_HEADER;
+};
+
+struct RObject {
+ MRUBY_OBJECT_HEADER;
+ struct kh_iv *iv;
+};
+
+#define mrb_obj_ptr(v) ((struct RObject*)((v).value.p))
+#define RBASIC(obj) ((struct RBasic*)((obj).value.p))
+#define RBASIC_KLASS(v) ((struct RClass *)(((struct RBasic*)((v).value.p))->c))
+#define ROBJECT(v) ((struct RObject*)((v).value.p))
+#define ROBJECT_IVPTR(v) (((struct RObject*)((v).value.p))->iv)
+#define ROBJECT_NUMIV(v) (ROBJECT_IVPTR(v) ? ROBJECT_IVPTR(v)->size : 0)
+#endif /* MRUBY_OBJECT_H */
diff --git a/include/mruby/proc.h b/include/mruby/proc.h
new file mode 100644
index 000000000..eafe54cf9
--- /dev/null
+++ b/include/mruby/proc.h
@@ -0,0 +1,44 @@
+#ifndef MRUBY_PROC_H
+#define MRUBY_PROC_H
+
+#include "mruby.h"
+#include "irep.h"
+
+struct REnv {
+ MRUBY_OBJECT_HEADER;
+ mrb_value *stack;
+ mrb_sym mid;
+ int cioff;
+};
+
+struct RProc {
+ MRUBY_OBJECT_HEADER;
+ union {
+ mrb_irep *irep;
+ mrb_func_t func;
+ } body;
+ struct RClass *target_class;
+ struct REnv *env;
+};
+
+/* aspec access */
+#define ARGS_GETREQ(a) (((a) >> 19) & 0x1f)
+#define ARGS_GETOPT(a) (((a) >> 14) & 0x1f)
+#define ARGS_GETREST(a) ((a) & (1<<13))
+#define ARGS_GETPOST(a) (((a) >> 8) & 0x1f)
+#define ARGS_GETKEY(a) (((a) >> 3) & 0x1f))
+#define ARGS_GETKDICT(a) ((a) & (1<<2))
+#define ARGS_GETBLOCK(a) ((a) & (1<<1))
+
+#define MRB_PROC_CFUNC 128
+#define MRB_PROC_CFUNC_P(p) ((p)->flags & MRB_PROC_CFUNC)
+#define MRB_PROC_STRICT 256
+#define MRB_PROC_STRICT_P(p) ((p)->flags & MRB_PROC_STRICT)
+
+#define mrb_proc_ptr(v) ((struct RProc*)((v).value.p))
+
+struct RProc *mrb_proc_new(mrb_state*, mrb_irep*);
+struct RProc *mrb_proc_new_cfunc(mrb_state*, mrb_func_t);
+struct RProc *mrb_closure_new(mrb_state*, mrb_irep*);
+
+#endif /* MRUBY_STRING_H */
diff --git a/include/mruby/range.h b/include/mruby/range.h
new file mode 100644
index 000000000..d25690ab3
--- /dev/null
+++ b/include/mruby/range.h
@@ -0,0 +1,21 @@
+#ifndef MRUBY_RANGE_H
+#define MRUBY_RANGE_H
+
+struct RRange {
+ MRUBY_OBJECT_HEADER;
+ struct mrb_range_edges {
+ mrb_value beg;
+ mrb_value end;
+ } *edges;
+ int excl;
+};
+
+#define mrb_range_ptr(v) ((struct RRange*)((v).value.p))
+#define mrb_range_value(p) mrb_obj_value((void*)(p))
+
+mrb_value mrb_range_new(mrb_state*, mrb_value, mrb_value, int);
+mrb_int mrb_range_beg_len(mrb_state *mrb, mrb_value range, mrb_int *begp, mrb_int *lenp, mrb_int len, mrb_int err);
+int mrb_obj_is_instance_of(mrb_state *mrb, mrb_value obj, struct RClass* c);
+struct RClass* mrb_class_real(struct RClass* cl);
+
+#endif /* MRUBY_RANGE_H */
diff --git a/include/mruby/string.h b/include/mruby/string.h
new file mode 100644
index 000000000..e889d8447
--- /dev/null
+++ b/include/mruby/string.h
@@ -0,0 +1,133 @@
+#ifndef MRUBY_STRING_H
+#define MRUBY_STRING_H
+
+#ifdef INCLUDE_ENCODING
+#include "encoding.h"
+#endif
+
+#ifndef RB_GC_GUARD
+#define RB_GC_GUARD(v) v
+#endif
+
+#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
+
+#define mrb_str_new4 mrb_str_new_frozen
+
+#define STR_BUF_MIN_SIZE 128
+//#define RSTRING_EMBED_LEN_MAX STR_BUF_MIN_SIZE
+
+extern const char ruby_digitmap[];
+
+struct RString {
+ MRUBY_OBJECT_HEADER;
+ size_t len;
+ union {
+ size_t capa;
+ mrb_value shared;
+ } aux;
+ char *buf;
+};
+
+extern struct SCOPE {
+ struct RBasic super;
+ mrb_sym *local_tbl;
+ mrb_value *local_vars;
+ int flags;
+} *ruby_scope;
+
+struct RVarmap {
+ struct RBasic super;
+ mrb_sym id;
+ mrb_value val;
+ struct RVarmap *next;
+};
+extern struct RVarmap *ruby_dyna_vars;
+
+//struct st_hash_type {
+// int (*compare)();
+// int (*hash)();
+//};
+
+#define mrb_str_ptr(s) ((struct RString*)((s).value.p))
+#define RSTRING(s) ((struct RString*)((s).value.p))
+#define RSTRING_PTR(s) (RSTRING(s)->buf)
+#define RSTRING_LEN(s) (RSTRING(s)->len)
+#define RSTRING_CAPA(s) (RSTRING(s)->aux.capa)
+#define RSTRING_SHARED(s) (RSTRING(s)->aux.shared)
+#define RSTRING_END(s) (RSTRING(s)->buf + RSTRING(s)->len)
+
+#define MRB_STR_SHARED 256
+#define MRB_STR_SHARED_P(s) (FL_ALL(s, MRB_STR_SHARED))
+#define MRB_STR_NOCAPA (MRB_STR_SHARED)
+#define MRB_STR_NOCAPA_P(s) (FL_ANY(s, MRB_STR_NOCAPA))
+#define MRB_STR_UNSET_NOCAPA(s) do {\
+ FL_UNSET(s, MRB_STR_NOCAPA);\
+} while (0)
+
+mrb_value mrb_str_literal(mrb_state*, mrb_value);
+void mrb_str_concat(mrb_state*, mrb_value, mrb_value);
+mrb_value mrb_obj_to_str(mrb_state*, mrb_value);
+mrb_value mrb_str_plus(mrb_state*, mrb_value, mrb_value);
+mrb_value mrb_obj_as_string(mrb_state *mrb, mrb_value obj);
+mrb_value mrb_str_new(mrb_state *mrb, const char *p, size_t len); /* mrb_str_new */
+mrb_value mrb_str_resize(mrb_state *mrb, mrb_value str, size_t len); /* mrb_str_resize */
+mrb_value mrb_string_value(mrb_state *mrb, mrb_value *ptr); /* StringValue */
+mrb_value mrb_str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, int len);
+mrb_value mrb_check_string_type(mrb_state *mrb, mrb_value str);
+mrb_value mrb_str_buf_new(mrb_state *mrb, size_t capa);
+mrb_value mrb_str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, size_t len);
+mrb_value str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, size_t len);
+
+char * mrb_string_value_cstr(mrb_state *mrb, mrb_value *ptr);
+char * mrb_string_value_ptr(mrb_state *mrb, mrb_value ptr);
+mrb_value mrb_str_subseq(mrb_state *mrb, mrb_value str, long beg, long len);
+size_t mrb_str_sublen(mrb_state *mrb, mrb_value str, long pos);
+mrb_value mrb_str_size(mrb_state *mrb, mrb_value self);
+long mrb_str_offset(mrb_state *mrb, mrb_value str, long pos);
+mrb_value mrb_str_new2(mrb_state *mrb, const char *p);
+mrb_value mrb_str_dup(mrb_state *mrb, mrb_value str); /* mrb_str_dup */
+mrb_value mrb_str_new_frozen(mrb_state *mrb, mrb_value orig);
+mrb_value mrb_lastline_get(mrb_state *mrb);
+mrb_value mrb_usascii_str_new(mrb_state *mrb, const char *ptr, long len);
+void mrb_lastline_set(mrb_value val);
+mrb_value mrb_str_buf_cat_ascii(mrb_state *mrb, mrb_value str, const char *ptr);
+void mrb_str_modify(mrb_state *mrb, mrb_value str);
+void mrb_str_set_len(mrb_state *mrb, mrb_value str, long len);
+mrb_value mrb_str_intern(mrb_state *mrb, mrb_value self);
+void mrb_str_shared_replace(mrb_state *mrb, mrb_value str, mrb_value str2);
+mrb_value mrb_str_cat2(mrb_state *mrb, mrb_value str, const char *ptr);
+mrb_value mrb_str_catf(mrb_state *mrb, mrb_value str, const char *format, ...);
+mrb_value mrb_str_to_inum(mrb_state *mrb, mrb_value str, int base, int badcheck);
+double mrb_str_to_dbl(mrb_state *mrb, mrb_value str, int badcheck);
+mrb_value mrb_str_to_str(mrb_state *mrb, mrb_value str);
+mrb_value mrb_locale_str_new(mrb_state *mrb, const char *ptr, long len);
+mrb_value mrb_filesystem_str_new_cstr(mrb_state *mrb, const char *ptr);
+mrb_int mrb_str_hash(mrb_state *mrb, mrb_value str);
+int mrb_str_hash_cmp(mrb_state *mrb, mrb_value str1, mrb_value str2);
+mrb_value str_new3(mrb_state *mrb, struct RClass* klass, mrb_value str);
+mrb_value mrb_str_buf_append(mrb_state *mrb, mrb_value str, mrb_value str2);
+void mrb_str_setter(mrb_state *mrb, mrb_value val, mrb_sym id, mrb_value *var);
+int mrb_str_is_ascii_only_p(mrb_state *mrb, mrb_value str);
+mrb_value mrb_str_inspect(mrb_state *mrb, mrb_value str);
+int mrb_str_equal(mrb_state *mrb, mrb_value str1, mrb_value str2);
+mrb_value str_new4(mrb_state *mrb, enum mrb_vtype ttype, mrb_value str);
+mrb_value * mrb_svar(mrb_int cnt);
+mrb_value mrb_str_drop_bytes(mrb_state *mrb, mrb_value str, long len);
+mrb_value mrb_str_dump(mrb_state *mrb, mrb_value str);
+mrb_value mrb_str_cat(mrb_state *mrb, mrb_value str, const char *ptr, long len);
+mrb_value mrb_str_append(mrb_state *mrb, mrb_value str, mrb_value str2);
+size_t mrb_str_capacity(mrb_value str);
+
+#ifdef INCLUDE_ENCODING
+int sym_printable(mrb_state *mrb, const char *s, const char *send, mrb_encoding *enc);
+mrb_value mrb_str_conv_enc(mrb_state *mrb, mrb_value str, mrb_encoding *from, mrb_encoding *to);
+mrb_value mrb_str_conv_enc_opts(mrb_state *mrb, mrb_value str, mrb_encoding *from, mrb_encoding *to, int ecflags, mrb_value ecopts);
+mrb_value mrb_enc_str_new(mrb_state *mrb, const char *ptr, long len, mrb_encoding *enc);
+#else
+int mrb_symname_p(const char *name);
+#endif
+
+mrb_value mrb_tainted_str_new(mrb_state *mrb, const char *ptr, long len);
+int mrb_str_cmp(mrb_state *mrb, mrb_value str1, mrb_value str2);
+
+#endif /* MRUBY_STRING_H */
diff --git a/include/mruby/struct.h b/include/mruby/struct.h
new file mode 100644
index 000000000..b8244045d
--- /dev/null
+++ b/include/mruby/struct.h
@@ -0,0 +1,16 @@
+#ifndef MSTRUCT_H
+#define MSTRUCT_H
+
+struct RStruct {
+ struct RBasic basic;
+ long len;
+ mrb_value *ptr;
+};
+#define RSTRUCT(st) ((struct RStruct*)((st).value.p))
+#define RSTRUCT_LEN(st) ((int)(RSTRUCT(st)->len))
+#define RSTRUCT_PTR(st) (RSTRUCT(st)->ptr)
+
+mrb_value mrb_yield_values(int n, ...);
+mrb_value mrb_mod_module_eval(mrb_state *mrb, int argc, mrb_value *argv, mrb_value mod);
+
+#endif //MSTRUCT_H
diff --git a/lib/.gitkeep b/lib/.gitkeep
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/lib/.gitkeep
diff --git a/mrblib/Makefile b/mrblib/Makefile
new file mode 100644
index 000000000..91dfe4c64
--- /dev/null
+++ b/mrblib/Makefile
@@ -0,0 +1,62 @@
+# makefile discription.
+# basic build file for RiteVM library
+# 11.Oct.2011 coded by Hiroshi Mimaki.
+
+# project-specific macros
+# extension of the executable-file is modifiable(.exe .out ...)
+BASEDIR = .
+TARGET := mrblib
+MLIB := $(TARGET).o
+CLIB := $(TARGET).c
+DLIB := $(TARGET).ctmp
+RLIB := $(TARGET).rbtmp
+MRB1 := $(BASEDIR)/*.rb
+MRBS := $(MRB1)
+
+# C compiler (gcc)
+CC = gcc
+DEBUG_MODE = 1
+ifeq ($(DEBUG_MODE),1)
+CFLAGS = -g
+else
+CFLAGS = -O3
+endif
+INCLUDES = -I../src -I../include
+ALL_CFLAGS = -Wall -Werror-implicit-function-declaration $(CFLAGS)
+MAKE_FLAGS = --no-print-directory CC="$(CC)" LL="$(LL)"
+
+# mruby compiler
+ifeq ($(OS),Windows_NT)
+MRBC = ../bin/mrbc.exe
+else
+MRBC = ../bin/mrbc
+endif
+
+##############################
+# generic build targets, rules
+
+.PHONY : all
+all : $(MRBC) $(MLIB)
+ @echo "make: built targets of `pwd`"
+
+# Compile mrblib source
+$(MLIB) : $(CLIB)
+ $(CC) $(ALL_CFLAGS) -MMD $(INCLUDES) -c $(CLIB) -o $(MLIB)
+
+# Compile C source from merged mruby source
+$(CLIB) : $(RLIB) $(MRBC)
+ $(MRBC) -Bmrblib_irep -o$(DLIB) $(RLIB); cat init_$(TARGET).c $(DLIB) > $@
+
+$(MRBC) : ../src/opcode.h ../src/codegen.c ../src/parse.y
+ $(MAKE) -C ../tools/mrbc $(MAKE_FLAGS)
+
+# merge mruby sources
+$(RLIB) : $(MRBS)
+ cat $? > $@
+
+# clean up
+.PHONY : clean
+clean :
+ -rm -f $(MRBC) $(MLIB) $(CLIB) $(RLIB) $(DLIB)
+ @echo "make: removing targets, objects and depend files of `pwd`"
+
diff --git a/mrblib/array.rb b/mrblib/array.rb
new file mode 100644
index 000000000..a70832399
--- /dev/null
+++ b/mrblib/array.rb
@@ -0,0 +1,79 @@
+#
+# Array
+#
+class Array
+ # 15.2.12.5.10
+ def each(&block)
+ idx = 0
+ while(idx < length)
+ block.call(self[idx])
+ idx += 1
+ end
+ self
+ end
+
+ # 15.2.12.5.11
+ def each_index(&block)
+ idx = 0
+ while(idx < length)
+ block.call(idx)
+ idx += 1
+ end
+ self
+ end
+
+ # 15.2.12.5.7
+ def collect!(&block)
+ self.each_index{|idx|
+ self[idx] = block.call(self[idx])
+ }
+ self
+ end
+
+ # 15.2.12.5.20
+ # map!(&block)
+ alias map! collect!
+
+ # 15.2.12.5.15
+ def initialize(size=0, obj=nil, &block)
+ raise TypeError, "expected Integer for 1st argument" unless size.kind_of? Integer
+ raise ArgumentError, "negative array size" if size < 0
+
+ self.clear
+ if size > 0
+ self[size - 1] = nil # allocate
+
+ idx = 0
+ while(idx < size)
+ self[idx] = (block)? block.call(idx): obj
+ idx += 1
+ end
+ end
+
+ self
+ end
+
+ def delete(key, &block)
+ while i = self.index(key)
+ self.delete_at(i)
+ ret = key
+ end
+ if ret == nil && block
+ block.call
+ else
+ ret
+ end
+ end
+end
+
+# include modules
+module Enumerable; end
+module Comparable; end
+class Array
+ include Enumerable
+ include Comparable
+
+ def sort!(&block)
+ self.replace(self.sort(&block))
+ end
+end
diff --git a/mrblib/compar.rb b/mrblib/compar.rb
new file mode 100644
index 000000000..974ad5036
--- /dev/null
+++ b/mrblib/compar.rb
@@ -0,0 +1,63 @@
+### move to compar.c
+# module Comparable
+ # def == other
+ # cmp = self <=> other
+ # if cmp == 0
+ # true
+ # else
+ # false
+ # end
+ # end
+
+ # def < other
+ # cmp = self <=> other
+ # if cmp.nil?
+ # false
+ # elsif cmp < 0
+ # true
+ # else
+ # false
+ # end
+ # end
+
+ # def <= other
+ # cmp = self <=> other
+ # if cmp.nil?
+ # false
+ # elsif cmp <= 0
+ # true
+ # else
+ # false
+ # end
+ # end
+
+ # def > other
+ # cmp = self <=> other
+ # if cmp.nil?
+ # false
+ # elsif cmp > 0
+ # true
+ # else
+ # false
+ # end
+ # end
+
+ # def >= other
+ # cmp = self <=> other
+ # if cmp.nil?
+ # false
+ # elsif cmp >= 0
+ # true
+ # else
+ # false
+ # end
+ # end
+
+ # def between?(min,max)
+ # if self < min or self > max
+ # false
+ # else
+ # true
+ # end
+ # end
+# end
diff --git a/mrblib/enum.rb b/mrblib/enum.rb
new file mode 100644
index 000000000..b5a387f43
--- /dev/null
+++ b/mrblib/enum.rb
@@ -0,0 +1,266 @@
+#
+# Enumerable
+#
+module Enumerable
+ # 15.3.2.2.1
+ def all?(&block)
+ st = true
+ if block
+ self.each{|val|
+ unless block.call(val)
+ st = false
+ break
+ end
+ }
+ else
+ self.each{|val|
+ unless val
+ st = false
+ break
+ end
+ }
+ end
+ st
+ end
+
+ # 15.3.2.2.2
+ def any?(&block)
+ st = false
+ if block
+ self.each{|val|
+ if block.call(val)
+ st = true
+ break
+ end
+ }
+ else
+ self.each{|val|
+ if val
+ st = true
+ break
+ end
+ }
+ end
+ st
+ end
+
+ # 15.3.2.2.3
+ def collect(&block)
+ ary = []
+ self.each{|val|
+ ary.push(block.call(val))
+ }
+ ary
+ end
+
+ # 15.3.2.2.4
+ def detect(ifnone=nil, &block)
+ ret = ifnone
+ self.each{|val|
+ if block.call(val)
+ ret = val
+ break
+ end
+ }
+ ret
+ end
+
+ # 15.3.2.2.5
+ def each_with_index(&block)
+ i = 0
+ self.each{|val|
+ block.call(val, i)
+ i += 1
+ }
+ self
+ end
+
+ # 15.3.2.2.6
+ def entries
+ ary = []
+ self.each{|val|
+ ary.push val
+ }
+ ary
+ end
+
+ # 15.3.2.2.7
+ # find(ifnone=nil, &block)
+ alias find detect
+
+ # 15.3.2.2.8
+ def find_all(&block)
+ ary = []
+ self.each{|val|
+ ary.push(val) if block.call(val)
+ }
+ ary
+ end
+
+ # 15.3.2.2.9
+ def grep(pattern, &block)
+ ary = []
+ self.each{|val|
+ if pattern === val
+ ary.push((block)? block.call(val): val)
+ end
+ }
+ ary
+ end
+
+ # 15.3.2.2.10
+ def include?(obj)
+ st = false
+ self.each{|val|
+ if val == obj
+ st = true
+ break
+ end
+ }
+ st
+ end
+
+ # 15.3.2.2.11
+ def inject(*args, &block)
+ raise ArgumentError, "too many arguments" if args.size > 2
+ flag = true # 1st element?
+ result = nil
+ self.each{|val|
+ if flag
+ # 1st element
+ result = (args.empty?)? val: block.call(args[0], val)
+ flag = false
+ else
+ result = block.call(result, val)
+ end
+ }
+ result
+ end
+
+ # 15.3.2.2.12
+ # map(&block)
+ alias map collect
+
+ # 15.3.2.2.13
+ def max(&block)
+ flag = true # 1st element?
+ result = nil
+ self.each{|val|
+ if flag
+ # 1st element
+ result = val
+ flag = false
+ else
+ if block
+ result = val if block.call(val, result) > 0
+ else
+ result = val if (val <=> result) > 0
+ end
+ end
+ }
+ result
+ end
+
+ # 15.3.2.2.14
+ def min(&block)
+ flag = true # 1st element?
+ result = nil
+ self.each{|val|
+ if flag
+ # 1st element
+ result = val
+ flag = false
+ else
+ if block
+ result = val if block.call(val, result) < 0
+ else
+ result = val if (val <=> result) < 0
+ end
+ end
+ }
+ result
+ end
+
+ # 15.3.2.2.15
+ # member?(obj)
+ alias member? include?
+
+ # 15.3.2.2.16
+ def partition(&block)
+ ary_T = []
+ ary_F = []
+ self.each{|val|
+ if block.call(val)
+ ary_T.push(val)
+ else
+ ary_F.push(val)
+ end
+ }
+ [ary_T, ary_F]
+ end
+
+ # 15.3.2.2.17
+ def reject(&block)
+ ary = []
+ self.each{|val|
+ ary.push(val) unless block.call(val)
+ }
+ ary
+ end
+
+ # 15.3.2.2.18
+ # select(&block)
+ alias select find_all
+
+
+ # Does this OK? Please test it.
+ def __sort_sub__(sorted, work, src_ary, head, tail, &block)
+ if head == tail
+ sorted[head] = work[head] if src_ary == 1
+ return
+ end
+
+ # on current step, which is a src ary?
+ if src_ary == 0
+ src, dst = sorted, work
+ else
+ src, dst = work, sorted
+ end
+
+ key = src[head] # key value for dividing values
+ i, j = head, tail # position to store on the dst ary
+
+ (head + 1).upto(tail){|idx|
+ if ((block)? block.call(src[idx], key): (src[idx] <=> key)) > 0
+ # larger than key
+ dst[j] = src[idx]
+ j -= 1
+ else
+ dst[i] = src[idx]
+ i += 1
+ end
+ }
+
+ sorted[i] = key
+
+ # sort each sub-array
+ src_ary = (src_ary + 1) % 2 # exchange a src ary
+ __sort_sub__(sorted, work, src_ary, head, i - 1, &block) if i > head
+ __sort_sub__(sorted, work, src_ary, i + 1, tail, &block) if i < tail
+ end
+# private :__sort_sub__
+
+ # 15.3.2.2.19
+ def sort(&block)
+ ary = []
+ self.each{|val| ary.push(val)}
+ unless ary.empty?
+ __sort_sub__(ary, ::Array.new(ary.size), 0, 0, ary.size - 1, &block)
+ end
+ ary
+ end
+
+ # 15.3.2.2.20
+ # to_a
+ alias to_a entries
+end
diff --git a/mrblib/error.rb b/mrblib/error.rb
new file mode 100644
index 000000000..88da1825c
--- /dev/null
+++ b/mrblib/error.rb
@@ -0,0 +1,9 @@
+#
+# Exception
+#
+class Exception
+ # 15.2.22.4.1
+ def self.exception(*args, &block)
+ self.new(*args, &block)
+ end
+end
diff --git a/mrblib/hash.rb b/mrblib/hash.rb
new file mode 100644
index 000000000..7157684f8
--- /dev/null
+++ b/mrblib/hash.rb
@@ -0,0 +1,58 @@
+#
+# Hash
+#
+class Hash
+ # 15.2.13.4.8
+ def delete(key, &block)
+ if block && ! self.has_key?(key)
+ block.call(key)
+ else
+ self.__delete(key)
+ end
+ end
+
+ # 15.2.13.4.9
+ def each(&block)
+ self.keys.each{|k| block.call([k, self[k]])}
+ self
+ end
+
+ # 15.2.13.4.10
+ def each_key(&block)
+ self.keys.each{|k| block.call(k)}
+ self
+ end
+
+ # 15.2.13.4.11
+ def each_value(&block)
+ self.keys.each{|k| block.call(self[k])}
+ self
+ end
+
+ # 15.2.13.4.16
+ def initialize(*args, &block)
+ self.__init_core(block, *args)
+ end
+
+ # 15.2.13.4.22
+ def merge(other, &block)
+ h = {}
+ raise "can't convert argument into Hash" unless other.respond_to?(:to_hash)
+ other = other.to_hash
+ self.each_key{|k| h[k] = self[k]}
+ if block
+ other.each_key{|k|
+ h[k] = (self.has_key?(k))? block.call(k, self[k], other[k]): other[k]
+ }
+ else
+ other.each_key{|k| h[k] = other[k]}
+ end
+ h
+ end
+end
+
+# include modules
+module Enumerable; end
+class Hash
+ include Enumerable
+end
diff --git a/mrblib/init_mrblib.c b/mrblib/init_mrblib.c
new file mode 100644
index 000000000..c44d28f94
--- /dev/null
+++ b/mrblib/init_mrblib.c
@@ -0,0 +1,17 @@
+#include "mruby.h"
+#include "irep.h"
+#include "dump.h"
+#include "mruby/string.h"
+#include "mruby/proc.h"
+
+extern const char mrblib_irep[];
+
+void
+mrb_init_mrblib(mrb_state *mrb)
+{
+ int n = mrb_read_irep(mrb, mrblib_irep);
+
+ extern mrb_value mrb_top_self(mrb_state *mrb);
+ mrb_run(mrb, mrb_proc_new(mrb, mrb->irep[n]), mrb_top_self(mrb));
+}
+
diff --git a/mrblib/kernel.rb b/mrblib/kernel.rb
new file mode 100644
index 000000000..c09755d6c
--- /dev/null
+++ b/mrblib/kernel.rb
@@ -0,0 +1,45 @@
+#
+# Kernel
+#
+module Kernel
+ # 15.3.1.2.6
+ def self.lambda(&block)
+ ### *** TODO *** ###
+ block # dummy
+ end
+
+ # 15.3.1.2.8
+ def self.loop #(&block)
+ while(true)
+ yield
+ end
+ end
+
+ # 15.3.1.3.4
+ def __send__(symbol, *args, &block)
+ ### *** TODO *** ###
+ end
+
+ # 15.3.1.3.18
+ def instance_eval(string=nil, &block)
+ ### *** TODO *** ###
+ end
+
+ # 15.3.1.3.27
+ def lambda(&block)
+ ### *** TODO *** ###
+ block # dummy
+ end
+
+ # 15.3.1.3.29
+ def loop #(&block)
+ while(true)
+ yield
+ end
+ end
+
+ # 15.3.1.3.44
+ def send(symbol, *args, &block)
+ ### *** TODO *** ###
+ end
+end
diff --git a/mrblib/numeric.rb b/mrblib/numeric.rb
new file mode 100644
index 000000000..ee5bdcb56
--- /dev/null
+++ b/mrblib/numeric.rb
@@ -0,0 +1,42 @@
+#
+# Integer
+#
+class Integer
+ # 15.2.8.3.15
+ def downto(num, &block)
+ raise TypeError, "expected Integer" unless num.kind_of? Integer
+ i = self
+ while(i >= num)
+ block.call(i)
+ i -= 1
+ end
+ self
+ end
+
+ # 15.2.8.3.22
+ def times(&block)
+ i = 0
+ while(i < self)
+ block.call(i)
+ i += 1
+ end
+ self
+ end
+
+ # 15.2.8.3.27
+ def upto(num, &block)
+ raise TypeError, "expected Integer" unless num.kind_of? Integer
+ i = self
+ while(i <= num)
+ block.call(i)
+ i += 1
+ end
+ self
+ end
+end
+
+# include modules
+module Comparable; end
+class Numeric
+ include Comparable
+end
diff --git a/mrblib/print.rb b/mrblib/print.rb
new file mode 100644
index 000000000..cb1fad75d
--- /dev/null
+++ b/mrblib/print.rb
@@ -0,0 +1,20 @@
+module Kernel
+ def print(*args)
+ i = 0
+ len = args.size
+ while i < len
+ __printstr__ args[i].to_s
+ i += 1
+ end
+ end
+ def puts(*args)
+ i = 0
+ len = args.size
+ while i < len
+ __printstr__ args[i].to_s
+ __printstr__ "\n"
+ i += 1
+ end
+ __printstr__ "\n" if len == 0
+ end
+end
diff --git a/mrblib/range.rb b/mrblib/range.rb
new file mode 100644
index 000000000..79bc40ecd
--- /dev/null
+++ b/mrblib/range.rb
@@ -0,0 +1,30 @@
+#
+# Range
+#
+class Range
+ # 15.2.14.4.4
+ def each(&block)
+ val = self.first
+ unless val.respond_to? :succ
+ raise TypeError, "can't iterate"
+ end
+
+ last = self.last
+ return self if (val <=> last) > 0
+
+ while((val <=> last) < 0)
+ block.call(val)
+ val = val.succ
+ end
+
+ block.call(val) unless exclude_end?
+
+ self
+ end
+end
+
+# include modules
+module Enumerable; end
+class Range
+ include Enumerable
+end
diff --git a/mrblib/string.rb b/mrblib/string.rb
new file mode 100644
index 000000000..78f2bea9d
--- /dev/null
+++ b/mrblib/string.rb
@@ -0,0 +1,93 @@
+#
+# String
+#
+class String
+ # 15.2.10.5.15
+ def each_line(&block)
+ # expect that str.index accepts an Integer for 1st argument as a byte data
+ offset = 0
+ while(pos = self.index(0x0a, offset))
+ block.call(self[offset, pos + 1 - offset])
+ offset = pos + 1
+ end
+ block.call(self[offset, self.size - offset]) if self.size > offset
+ self
+ end
+
+ # 15.2.10.5.18
+ def gsub(*args, &block)
+ unless (args.size == 1 && block) || args.size == 2
+ raise ArgumentError, "wrong number of arguments"
+ end
+
+ ### *** TODO *** ###
+ end
+
+ # 15.2.10.5.19
+ def gsub!(*args, &block)
+ str = self.gsub(*args, &block)
+ if str != self
+ self.replace(str)
+ self
+ else
+ nil
+ end
+ end
+
+ # 15.2.10.5.32
+ def scan(reg, &block)
+ ### *** TODO *** ###
+ end
+
+ # 15.2.10.5.36
+ def sub(*args, &block)
+ unless (args.size == 1 && block) || args.size == 2
+ raise ArgumentError, "wrong number of arguments"
+ end
+
+ ### *** TODO *** ###
+ end
+
+ # 15.2.10.5.37
+ def sub!(*args, &block)
+ str = self.sub(*args, &block)
+ if str != self
+ self.replace(str)
+ self
+ else
+ nil
+ end
+ end
+
+ def each_char(&block)
+ pos = 0
+ while(pos < self.size)
+ block.call(self[pos])
+ pos += 1
+ end
+ self
+ end
+
+ def each_byte(&block)
+ bytes = self.unpack("C*")
+ pos = 0
+ while(pos < bytes.size)
+ block.call(bytes[pos])
+ pos += 1
+ end
+ self
+ end
+
+ def []=(pos, value)
+ b = self[0, pos]
+ a = self[pos+1..-1]
+ p [b, value, a].join('')
+ self.replace([b, value, a].join(''))
+ end
+end
+
+# include modules
+module Comparable; end
+class String
+ include Comparable
+end
diff --git a/mrblib/struct.rb b/mrblib/struct.rb
new file mode 100644
index 000000000..b11f59f2a
--- /dev/null
+++ b/mrblib/struct.rb
@@ -0,0 +1,30 @@
+#
+# Struct
+#
+class Struct
+ # 15.2.18.4.4
+ def each(&block)
+ self.class.members.each{|field|
+ block.call(self[field])
+ }
+ self
+ end
+
+ # 15.2.18.4.5
+ def each_pair(&block)
+ self.class.members.each{|field|
+ block.call(field.to_sym, self[field])
+ }
+ self
+ end
+
+ # 15.2.18.4.7
+ def select(&block)
+ ary = []
+ self.class.members.each{|field|
+ val = self[field]
+ ary.push(val) if block.call(val)
+ }
+ ary
+ end
+end
diff --git a/src/Makefile b/src/Makefile
new file mode 100644
index 000000000..41a2c83a0
--- /dev/null
+++ b/src/Makefile
@@ -0,0 +1,89 @@
+# makefile discription.
+# basic build file for RiteVM library
+# 11.Apr.2011 coded by Kenji Yoshimoto.
+# 31.Aug.2011 coded by Hiroshi Mimaki.
+
+# project-specific macros
+# extension of the executable-file is modifiable(.exe .out ...)
+BASEDIR = .
+TARGET := ../lib/ritevm
+ifeq ($(OS),Windows_NT)
+LIB := $(TARGET).lib
+else
+LIB := $(TARGET).a
+endif
+YSRC := $(BASEDIR)/parse.y
+YC := $(BASEDIR)/y.tab.c
+EXCEPT1 := $(YC) $(BASEDIR)/minimain.c $(BASEDIR)/compile.c $(BASEDIR)/dump.c $(BASEDIR)/cdump.c
+OBJY := $(patsubst %.c,%.o,$(YC))
+OBJ1 := $(patsubst %.c,%.o,$(filter-out $(EXCEPT1),$(wildcard $(BASEDIR)/*.c)))
+#OBJ2 := $(patsubst %.c,%.o,$(wildcard $(BASEDIR)/ext/regex/*.c))
+#OBJ3 := $(patsubst %.c,%.o,$(wildcard $(BASEDIR)/ext/enc/*.c))
+OBJS := $(OBJ1) $(OBJ2) $(OBJ3)
+# mruby libraries
+EXTC := $(BASEDIR)/../mrblib/mrblib.c
+EXTRB := $(wildcard $(BASEDIR)/../mrblib/*.rb)
+EXTM := $(patsubst %.c,%.o,$(EXTC))
+# extend libraries
+#EXT1 := $(patsubst %.c,%.o,$(wildcard $(BASEDIR)/ext/socket/*.c))
+EXTS := $(EXT1)
+
+# libraries, includes
+INCLUDES = -I$(BASEDIR) -I$(BASEDIR)/../include
+#INCLUDES = -I$(RITEVM_ROOT)
+
+# compiler, linker (gcc)
+CC = gcc
+AR = ar
+YACC = bison
+
+DEBUG_MODE = 1
+ifeq ($(DEBUG_MODE),1)
+CFLAGS = -g
+else
+CFLAGS = -O3
+endif
+ALL_CFLAGS = -Wall -Werror-implicit-function-declaration $(CFLAGS)
+MAKE_FLAGS = --no-print-directory CC="$(CC)" LL="$(LL)"
+
+##############################
+# generic build targets, rules
+
+.PHONY : all
+all : $(EXTM) $(LIB)
+ @echo "make: built targets of `pwd`"
+
+# executable constructed using linker from object files
+$(LIB) : $(OBJS) $(OBJY) $(EXTM) $(EXTS)
+ $(AR) r $@ $(OBJS) $(OBJY) $(EXTM) $(EXTS)
+
+-include $(OBJS:.o=.d) $(OBJY:.o=.d)
+
+# objects compiled from source
+$(OBJS) : %.o : %.c
+ $(CC) $(ALL_CFLAGS) -MMD $(INCLUDES) -c $< -o $@
+
+# mruby library compile
+$(EXTM) : $(EXTRB) $(OBJS) $(OBJY)
+ $(MAKE) -C ../mrblib $(MAKE_FLAGS)
+
+# extend libraries complile
+$(EXTS) : %.o : %.c
+ $(CC) $(ALL_CFLAGS) -MMD $(INCLUDES) -c $< -o $@
+
+# parser complie
+$(OBJY) : $(YC)
+ $(CC) $(ALL_CFLAGS) -MMD $(INCLUDES) -c $(YC) -o $(OBJY)
+
+# yacc complie
+$(YC) : $(YSRC)
+ $(YACC) -o $(YC) $(YSRC)
+
+# clean up
+.PHONY : clean #cleandep
+clean :
+ $(MAKE) clean -C ../mrblib $(MAKE_FLAGS)
+ -rm -f $(LIB) $(OBJS) $(OBJY) $(YC)
+ -rm -f $(OBJS:.o=.d) $(OBJY:.o=.d)
+ @echo "make: removing targets, objects and depend files of `pwd`"
+
diff --git a/src/array.c b/src/array.c
new file mode 100644
index 000000000..855a45aba
--- /dev/null
+++ b/src/array.c
@@ -0,0 +1,1458 @@
+#include "mruby.h"
+#include "mruby/array.h"
+#include <string.h>
+#include "mruby/string.h"
+#include "mdata.h"
+#include "mruby/class.h"
+
+#ifdef INCLUDE_REGEXP
+ #define mrb_usascii_str_new2 mrb_usascii_str_new_cstr
+#else
+ #define mrb_usascii_str_new2 mrb_str_new_cstr
+ #define mrb_usascii_str_new mrb_str_new
+#endif
+mrb_value mrb_exec_recursive_paired(mrb_state *mrb, mrb_value (*func) (mrb_state *, mrb_value, mrb_value, int),
+ mrb_value obj, mrb_value paired_obj, void* arg);
+
+//#define ARY_DEFAULT_LEN 16
+#define ARY_DEFAULT_LEN 4
+#define ARY_SHRINK_RATIO 5 /* must be larger than 2 */
+#ifdef LONG_MAX
+# define ARY_MAX_SIZE (LONG_MAX / sizeof(mrb_value))
+#endif
+
+static inline mrb_value
+ary_elt(mrb_value ary, long offset)
+{
+ if (RARRAY_LEN(ary) == 0) return mrb_nil_value();
+ if (offset < 0 || RARRAY_LEN(ary) <= offset) {
+ return mrb_nil_value();
+ }
+ return RARRAY_PTR(ary)[offset];
+}
+
+mrb_value
+mrb_ary_new_capa(mrb_state *mrb, size_t capa)
+{
+ struct RArray *a;
+
+ if (capa < 0) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "negative ary size (or size too big)");
+ }
+#ifdef LONG_MAX
+ if (capa > ARY_MAX_SIZE) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "ary size too big");
+ }
+#endif
+ if (capa < ARY_DEFAULT_LEN) {
+ capa = ARY_DEFAULT_LEN;
+ }
+
+ a = mrb_obj_alloc(mrb, MRB_TT_ARRAY, mrb->array_class);
+ a->buf = mrb_malloc(mrb, sizeof(mrb_value) * capa);
+ memset(a->buf, 0, sizeof(mrb_value) * capa);
+ a->capa = capa;
+ a->len = 0;
+
+ return mrb_obj_value(a);
+}
+
+mrb_value
+mrb_ary_new(mrb_state *mrb)
+{
+ return mrb_ary_new_capa(mrb, 0);
+}
+
+mrb_value
+mrb_ary_new_from_values(mrb_state *mrb, mrb_value *vals, size_t size)
+{
+ mrb_value ary;
+ struct RArray *a;
+
+ ary = mrb_ary_new_capa(mrb, size);
+ a = mrb_ary_ptr(ary);
+ memcpy(a->buf, vals, sizeof(mrb_value)*size);
+ a->len = size;
+
+ return ary;
+}
+
+mrb_value
+mrb_assoc_new(mrb_state *mrb, mrb_value car, mrb_value cdr)
+{
+ mrb_value arv[2];
+ arv[0] = car;
+ arv[1] = cdr;
+ return mrb_ary_new_from_values(mrb, arv, 2);
+}
+
+void
+ary_fill_with_nil(mrb_value *buf, size_t size)
+{
+ mrb_value nil = mrb_nil_value();
+
+ while((int)(size--)) {
+ *buf++ = nil;
+ }
+}
+
+void
+mrb_ary_expand_capa(mrb_state *mrb, struct RArray *a, size_t len)
+{
+ size_t capa = a->capa;
+
+#ifdef LONG_MAX
+ if (len > ARY_MAX_SIZE) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "array size too big");
+ }
+#endif
+
+ while(capa < len) {
+ if (capa == 0) {
+ capa = ARY_DEFAULT_LEN;
+ }
+ else {
+ capa *= 2;
+ }
+ }
+
+#ifdef LONG_MAX
+ if (capa > ARY_MAX_SIZE) capa = ARY_MAX_SIZE; /* len <= capa <= ARY_MAX_SIZE */
+#endif
+
+ if (capa > a->capa) {
+ a->capa = capa;
+ a->buf = mrb_realloc(mrb, a->buf, sizeof(mrb_value)*capa);
+ }
+}
+
+void
+mrb_ary_shrink_capa(mrb_state *mrb, struct RArray *a)
+{
+ size_t capa = a->capa;
+
+ if (capa < ARY_DEFAULT_LEN * 2) return;
+ if (capa <= a->len * ARY_SHRINK_RATIO) return;
+
+ do {
+ capa /= 2;
+ if (capa < ARY_DEFAULT_LEN) {
+ capa = ARY_DEFAULT_LEN;
+ break;
+ }
+ } while(capa > a->len * ARY_SHRINK_RATIO);
+
+ if (capa > a->len && capa < a->capa) {
+ a->capa = capa;
+ a->buf = mrb_realloc(mrb, a->buf, sizeof(mrb_value)*capa);
+ }
+}
+
+mrb_value
+mrb_ary_s_create(mrb_state *mrb, mrb_value self)
+{
+ mrb_value *vals;
+ int len;
+
+ mrb_get_args(mrb, "*", &vals, &len);
+ return mrb_ary_new_from_values(mrb, vals, (size_t)len);
+}
+
+void
+mrb_ary_concat(mrb_state *mrb, mrb_value self, mrb_value other)
+{
+ struct RArray *a1 = mrb_ary_ptr(self);
+ struct RArray *a2 = mrb_ary_ptr(other);
+ size_t len = a1->len + a2->len;
+
+ if (a1->capa < len) mrb_ary_expand_capa(mrb, a1, len);
+ memcpy(a1->buf+a1->len, a2->buf, sizeof(mrb_value)*a2->len);
+ mrb_write_barrier(mrb, (struct RBasic*)a1);
+ a1->len = len;
+}
+
+mrb_value
+mrb_ary_concat_m(mrb_state *mrb, mrb_value self)
+{
+ mrb_value other;
+
+ mrb_get_args(mrb, "o", &other);
+ if (mrb_type(other) != MRB_TT_ARRAY) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "expected Array");
+ }
+ mrb_ary_concat(mrb, self, other);
+ return self;
+}
+
+mrb_value
+mrb_ary_plus(mrb_state *mrb, mrb_value self)
+{
+ struct RArray *a1 = mrb_ary_ptr(self);
+ struct RArray *a2;
+ mrb_value other;
+ mrb_value ary;
+
+ mrb_get_args(mrb, "o", &other);
+ if (mrb_type(other) != MRB_TT_ARRAY) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "expected Array");
+ }
+
+ ary = mrb_ary_new_capa(mrb, a1->len + RARRAY_LEN(other));
+ a2 = mrb_ary_ptr(ary);
+ memcpy(a2->buf, a1->buf, sizeof(mrb_value)*a1->len);
+ memcpy(a2->buf + a1->len, RARRAY_PTR(other), sizeof(mrb_value)*RARRAY_LEN(other));
+ a2->len = a1->len + RARRAY_LEN(other);
+
+ return ary;
+}
+
+static mrb_value
+recursive_cmp(mrb_state *mrb, mrb_value ary1, mrb_value ary2, int recur)
+{
+ long i, len;
+
+ if (recur) return mrb_undef_value(); /* Subtle! */
+ len = RARRAY_LEN(ary1);
+ if (len > RARRAY_LEN(ary2)) {
+ len = RARRAY_LEN(ary2);
+ }
+
+ for (i=0; i<len; i++) {
+ mrb_value r = mrb_funcall(mrb, ary_elt(ary1, i), "<=>", 1, ary_elt(ary2, i));
+ if (mrb_type(r) != MRB_TT_FIXNUM || mrb_fixnum(r) != 0) return r;
+ }
+
+ return mrb_undef_value();
+}
+
+/*
+ * call-seq:
+ * ary <=> other_ary -> -1, 0, +1 or nil
+ *
+ * Comparison---Returns an integer (-1, 0, or +1)
+ * if this array is less than, equal to, or greater than <i>other_ary</i>.
+ * Each object in each array is compared (using <=>). If any value isn't
+ * equal, then that inequality is the return value. If all the
+ * values found are equal, then the return is based on a
+ * comparison of the array lengths. Thus, two arrays are
+ * ``equal'' according to <code>Array#<=></code> if and only if they have
+ * the same length and the value of each element is equal to the
+ * value of the corresponding element in the other array.
+ *
+ * [ "a", "a", "c" ] <=> [ "a", "b", "c" ] #=> -1
+ * [ 1, 2, 3, 4, 5, 6 ] <=> [ 1, 2 ] #=> +1
+ *
+ */
+mrb_value
+mrb_ary_cmp(mrb_state *mrb, mrb_value ary1)
+{
+ mrb_value ary2;
+ struct RArray *a1, *a2;
+ mrb_value r;
+ long len;
+
+ mrb_get_args(mrb, "o", &ary2);
+ if (mrb_type(ary2) != MRB_TT_ARRAY) return mrb_nil_value();
+ a1 = RARRAY(ary1); a2 = RARRAY(ary2);
+ if (a1->len == a2->len && a1->buf == a2->buf) return mrb_fixnum_value(0);
+ r = mrb_exec_recursive_paired(mrb, recursive_cmp, ary1, ary2, &ary2);
+ if (mrb_type(r) != MRB_TT_UNDEF) return r;
+ len = a1->len - a2->len;
+ return mrb_fixnum_value((len == 0)? 0: (len > 0)? 1: -1);
+}
+
+void
+mrb_ary_replace(mrb_state *mrb, struct RArray *a, mrb_value *argv, size_t len)
+{
+ if (a->capa < len) mrb_ary_expand_capa(mrb, a, len);
+ memcpy(a->buf, argv, sizeof(mrb_value)*len);
+ mrb_write_barrier(mrb, (struct RBasic*)a);
+ a->len = len;
+}
+
+mrb_value
+mrb_ary_replace_m(mrb_state *mrb, mrb_value self)
+{
+ mrb_value other;
+
+ mrb_get_args(mrb, "o", &other);
+ mrb_ary_replace(mrb, mrb_ary_ptr(self), RARRAY_PTR(other), RARRAY_LEN(other));
+
+ return self;
+}
+
+mrb_value
+mrb_ary_times(mrb_state *mrb, mrb_value self)
+{
+ struct RArray *a1 = mrb_ary_ptr(self);
+ struct RArray *a2;
+ mrb_value ary;
+ mrb_value *buf;
+ mrb_int times;
+ //size_t len;
+
+ mrb_get_args(mrb, "i", &times);
+ if (times < 0) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "negative argument");
+ }
+ if (times == 0) return mrb_ary_new(mrb);
+
+ ary = mrb_ary_new_capa(mrb, a1->len * times);
+ a2 = mrb_ary_ptr(ary);
+ buf = a2->buf;
+ while(times--) {
+ memcpy(buf, a1->buf, sizeof(mrb_value)*(a1->len));
+ buf += a1->len;
+ a2->len += a1->len;
+ }
+
+ return ary;
+}
+
+static void
+ary_reverse(struct RArray *a)
+{
+ mrb_value *p1, *p2;
+
+ p1 = a->buf;
+ p2 = a->buf + a->len - 1;
+
+ while(p1 < p2) {
+ mrb_value tmp = *p1;
+ *p1++ = *p2;
+ *p2-- = tmp;
+ }
+}
+
+mrb_value
+mrb_ary_reverse_bang(mrb_state *mrb, mrb_value self)
+{
+ struct RArray *a = mrb_ary_ptr(self);
+
+ if (a->len > 1) {
+ ary_reverse(a);
+ }
+ return self;
+}
+
+mrb_value
+mrb_ary_reverse(mrb_state *mrb, mrb_value self)
+{
+ struct RArray *a = mrb_ary_ptr(self);
+ mrb_value ary;
+
+ ary = mrb_ary_new_capa(mrb, a->len);
+ if (a->len > 0) {
+ mrb_ary_replace(mrb, mrb_ary_ptr(ary), a->buf, a->len);
+ ary_reverse(mrb_ary_ptr(ary));
+ }
+ return ary;
+}
+
+mrb_value
+mrb_ary_new4(mrb_state *mrb, long n, const mrb_value *elts)
+{
+ mrb_value ary;
+
+ ary = mrb_ary_new_capa(mrb, n);//mrb_ary_new2(n);
+ if (n > 0 && elts) {
+ memcpy(RARRAY_PTR(ary), elts, sizeof(mrb_value)*n);
+ RARRAY_LEN(ary) = n; //ARY_SET_LEN(ary, n);
+ }
+
+ return ary;
+}
+
+mrb_value
+mrb_ary_new_elts(mrb_state *mrb, long n, const mrb_value *elts)
+{
+ return mrb_ary_new4(mrb, n, elts);
+}
+
+void
+mrb_ary_push(mrb_state *mrb, mrb_value ary, mrb_value elem) /* mrb_ary_push */
+{
+ struct RArray *a = mrb_ary_ptr(ary);
+
+ if (a->len == a->capa) mrb_ary_expand_capa(mrb, a, a->len + 1);
+ a->buf[a->len++] = elem;
+ mrb_write_barrier(mrb, (struct RBasic*)a);
+}
+
+mrb_value
+mrb_ary_pop(mrb_state *mrb, mrb_value ary)
+{
+ struct RArray *a = mrb_ary_ptr(ary);
+
+ if (a->len == 0) return mrb_nil_value();
+
+ return a->buf[--a->len];
+}
+
+mrb_value
+mrb_ary_push_m(mrb_state *mrb, mrb_value self)
+{
+ mrb_value *argv;
+ int len;
+
+ mrb_get_args(mrb, "*", &argv, &len);
+ while(len--) {
+ mrb_ary_push(mrb, self, *argv++);
+ }
+
+ return self;
+}
+
+mrb_value
+mrb_ary_pop_m(mrb_state *mrb, mrb_value self)
+{
+ struct RArray *a = mrb_ary_ptr(self);
+
+ return ((a->len == 0)? mrb_nil_value(): mrb_ary_pop(mrb, self));
+}
+
+mrb_value
+mrb_ary_shift(mrb_state *mrb, mrb_value self)
+{
+ struct RArray *a = mrb_ary_ptr(self);
+ mrb_value *buf = a->buf;
+ size_t size = a->len;
+ mrb_value val;
+
+ if (size == 0) return mrb_nil_value();
+
+ val = *buf;
+ while((int)(--size)) {
+ *buf = *(buf+1);
+ ++buf;
+ }
+ --a->len;
+
+ return val;
+}
+
+/* self = [1,2,3]
+ item = 0
+ self.unshift item
+ p self #=> [0, 1, 2, 3] */
+mrb_value
+mrb_ary_unshift(mrb_state *mrb, mrb_value self, mrb_value item)
+{
+ struct RArray *a = mrb_ary_ptr(self);
+
+ if (a->capa < a->len + 1) mrb_ary_expand_capa(mrb, a, a->len + 1);
+ memmove(a->buf + 1, a->buf, sizeof(mrb_value)*a->len);
+ memcpy(a->buf, &item, sizeof(mrb_value));
+ a->len += 1;
+ mrb_write_barrier(mrb, (struct RBasic*)a);
+
+ return self;
+}
+
+mrb_value
+mrb_ary_unshift_m(mrb_state *mrb, mrb_value self)
+{
+ struct RArray *a = mrb_ary_ptr(self);
+ mrb_value *vals;
+ int len;
+
+ mrb_get_args(mrb, "*", &vals, &len);
+ if (len == 0) return self;
+ if (a->capa < a->len + len) mrb_ary_expand_capa(mrb, a, a->len + len);
+ memmove(a->buf + len, a->buf, sizeof(mrb_value)*a->len);
+ memcpy(a->buf, vals, sizeof(mrb_value)*len);
+ a->len += len;
+ mrb_write_barrier(mrb, (struct RBasic*)a);
+
+ return self;
+}
+
+mrb_value
+mrb_ary_ref(mrb_state *mrb, mrb_value ary, mrb_int n)
+{
+ struct RArray *a = mrb_ary_ptr(ary);
+
+ /* range check */
+ if (n < 0) n += a->len;
+ if (n < 0 || a->len <= (size_t)n) return mrb_nil_value();
+
+ return a->buf[n];
+}
+
+void
+mrb_ary_set(mrb_state *mrb, mrb_value ary, mrb_int n, mrb_value val) /* rb_ary_store */
+{
+ struct RArray *a = mrb_ary_ptr(ary);
+
+ /* range check */
+ if (n < 0) n += a->len;
+ if (n < 0) {
+ mrb_raise(mrb, E_INDEX_ERROR, "index %ld out of array", n - a->len);
+ }
+ if (a->len <= (size_t)n) {
+ if (a->capa <= (size_t)n) mrb_ary_expand_capa(mrb, a, n + 1);
+ ary_fill_with_nil(a->buf + a->len, n + 1 - a->len);
+ a->len = n + 1;
+ }
+
+ a->buf[n] = val;
+ mrb_write_barrier(mrb, (struct RBasic*)a);
+}
+
+mrb_value
+mrb_ary_splice(mrb_state *mrb, mrb_value ary, mrb_int head, mrb_int len, mrb_value rpl)
+{
+ struct RArray *a = mrb_ary_ptr(ary);
+ mrb_int tail;
+ size_t size;
+ mrb_value *argv;
+ int i, argc;
+
+ /* range check */
+ if (head < 0) head += a->len;
+ if (head < 0) {
+ mrb_raise(mrb, E_INDEX_ERROR, "index is out of array");
+ }
+ tail = head + len;
+
+ /* size check */
+ if (mrb_type(rpl) == MRB_TT_ARRAY) {
+ argc = RARRAY_LEN(rpl);
+ argv = RARRAY_PTR(rpl);
+ }
+ else {
+ argc = 1;
+ argv = &rpl;
+ }
+ size = head + argc;
+
+ if (tail < a->len) size += a->len - tail;
+
+ if (size > a->capa) mrb_ary_expand_capa(mrb, a, size);
+
+ if (head > a->len) {
+ ary_fill_with_nil(a->buf + a->len, (size_t)(head - a->len));
+ }
+ else if (head < a->len) {
+ memmove(a->buf + head + argc, a->buf + tail, sizeof(mrb_value)*(a->len - tail));
+ }
+
+ for(i = 0; i < argc; i++) {
+ *(a->buf + head + i) = *(argv + i);
+ }
+
+ a->len = size;
+
+ return ary;
+}
+
+int
+mrb_ary_alen(mrb_state *mrb, mrb_value ary)
+{
+ return RARRAY_LEN(ary);
+}
+
+mrb_value
+mrb_ary_aget(mrb_state *mrb, mrb_value self)
+{
+ struct RArray *a = mrb_ary_ptr(self);
+ mrb_int index, len;
+ mrb_value *argv;
+ int size;
+
+ mrb_get_args(mrb, "i*", &index, &argv, &size);
+ switch(size) {
+ case 0:
+ return mrb_ary_ref(mrb, self, index);
+
+ case 1:
+ if (mrb_type(argv[0]) != MRB_TT_FIXNUM) {
+ mrb_raise(mrb, E_TYPE_ERROR, "expected Fixnum");
+ }
+ len = mrb_fixnum(argv[0]);
+ if (index < 0) index += a->len;
+ if (index < 0 || a->len < (size_t)index) return mrb_nil_value();
+ if ((len = mrb_fixnum(argv[0])) < 0) return mrb_nil_value();
+ if (a->len == (size_t)index) return mrb_ary_new(mrb);
+ if ((size_t)len > a->len - index) len = a->len - index;
+ return mrb_ary_new_from_values(mrb, a->buf + index, len);
+
+ default:
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments");
+ }
+
+ return mrb_nil_value(); /* dummy to avoid warning : not reach here */
+}
+
+mrb_value
+mrb_ary_aset(mrb_state *mrb, mrb_value self)
+{
+ mrb_value *argv;
+ int argc;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+ switch(argc) {
+ case 2:
+ if (FIXNUM_P(argv[0])) {
+ mrb_ary_set(mrb, self, mrb_fixnum(argv[0]), argv[1]);
+ }
+ else {
+ /* Should we support Range object for 1st arg ? */
+ mrb_raise(mrb, E_TYPE_ERROR, "expected Fixnum for 1st argument");
+ }
+ break;
+
+ case 3:
+ mrb_ary_splice(mrb, self, mrb_fixnum(argv[0]), mrb_fixnum(argv[1]), argv[2]);
+ break;
+
+ default:
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments");
+ }
+
+ return self;
+}
+
+mrb_value
+mrb_ary_delete_at(mrb_state *mrb, mrb_value self)
+{
+ struct RArray *a = mrb_ary_ptr(self);
+ mrb_int index;
+ mrb_value val;
+ mrb_value *buf;
+ size_t len;
+
+ mrb_get_args(mrb, "i", &index);
+ if (index < 0) index += a->len;
+ if (index < 0 || a->len <= (size_t)index) return mrb_nil_value();
+
+ val = a->buf[index];
+
+ buf = a->buf + index;
+ len = a->len - index;
+ while((int)(--len)) {
+ *buf = *(buf+1);
+ ++buf;
+ }
+ --a->len;
+
+ mrb_ary_shrink_capa(mrb, a);
+
+ return val;
+}
+
+mrb_value
+mrb_ary_first(mrb_state *mrb, mrb_value self)
+{
+ struct RArray *a = mrb_ary_ptr(self);
+ //mrb_value ary;
+ size_t size;
+ mrb_value *vals;
+ int len;
+
+ mrb_get_args(mrb, "*", &vals, &len);
+ if (len > 1) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments");
+ }
+
+ if (len == 0) return (a->len > 0)? a->buf[0]: mrb_nil_value();
+
+ /* len == 1 */
+ size = mrb_fixnum(*vals);
+ if (size > a->len) size = a->len;
+ return mrb_ary_new_from_values(mrb, a->buf, size);
+}
+
+mrb_value
+mrb_ary_last(mrb_state *mrb, mrb_value self)
+{
+ struct RArray *a = mrb_ary_ptr(self);
+ //mrb_value ary;
+ size_t size;
+ mrb_value *vals;
+ int len;
+
+ mrb_get_args(mrb, "*", &vals, &len);
+ if (len > 1) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments");
+ }
+
+ if (len == 0) return (a->len > 0)? a->buf[a->len - 1]: mrb_nil_value();
+
+ /* len == 1 */
+ size = mrb_fixnum(*vals);
+ if (size > a->len) size = a->len;
+ return mrb_ary_new_from_values(mrb, a->buf + a->len - size, size);
+}
+
+mrb_value
+mrb_ary_index_m(mrb_state *mrb, mrb_value self)
+{
+ mrb_value obj;
+ long i;
+
+ mrb_get_args(mrb, "o", &obj);
+ for (i = 0; i < RARRAY_LEN(self); i++) {
+ if (mrb_equal(mrb, RARRAY_PTR(self)[i], obj)) {
+ return mrb_fixnum_value(i);
+ }
+ }
+ return mrb_nil_value();
+}
+
+mrb_value
+mrb_ary_rindex_m(mrb_state *mrb, mrb_value self)
+{
+ mrb_value obj;
+ long i;
+
+ mrb_get_args(mrb, "o", &obj);
+ for (i = RARRAY_LEN(self) - 1; i >= 0; i--) {
+ if (mrb_equal(mrb, RARRAY_PTR(self)[i], obj)) {
+ return mrb_fixnum_value(i);
+ }
+ }
+ return mrb_nil_value();
+}
+
+mrb_value
+mrb_ary_splat(mrb_state *mrb, mrb_value v)
+{
+ return v;
+}
+
+static mrb_value
+mrb_ary_size(mrb_state *mrb, mrb_value self)
+{
+ struct RArray *a = mrb_ary_ptr(self);
+
+ return mrb_fixnum_value(a->len);
+}
+
+#if 0 /* --> implement with ruby code */
+mrb_value
+mrb_ary_each(mrb_state *mrb, mrb_value self)
+{
+ long i;
+
+ for (i = 0; i < RARRAY_LEN(self); i++) {
+ mrb_yield(RARRAY_PTR(self)[i]);
+ }
+
+ return self;
+}
+#endif
+
+#if 0 /* --> implement with ruby code */
+mrb_value
+mrb_ary_each_index(mrb_state *mrb, mrb_value self)
+{
+ long i;
+
+ for (i = 0; i < RARRAY_LEN(self); i++) {
+ mrb_yield(mrb_fixnum_value(i));
+ }
+
+ return self;
+}
+#endif
+
+#if 0 /* --> implement with ruby code */
+mrb_value
+mrb_ary_collect_bang(mrb_state *mrb, mrb_value self)
+{
+ long i;
+
+ for (i = 0; i < RARRAY_LEN(self); i++) {
+ RARRAY_PTR(self)[i] = mrb_yield(RARRAY_PTR(self)[i]);
+ }
+
+ return self;
+}
+#endif
+
+mrb_value
+mrb_ary_clear(mrb_state *mrb, mrb_value self)
+{
+ struct RArray *a = mrb_ary_ptr(self);
+
+ a->len = 0;
+ mrb_ary_shrink_capa(mrb, a);
+
+ return self;
+}
+
+mrb_value
+mrb_ary_empty_p(mrb_state *mrb, mrb_value self)
+{
+ struct RArray *a = mrb_ary_ptr(self);
+
+ return ((a->len == 0)? mrb_true_value(): mrb_false_value());
+}
+
+mrb_value
+mrb_check_array_type(mrb_state *mrb, mrb_value ary)
+{
+ return mrb_check_convert_type(mrb, ary, MRB_TT_ARRAY, "Array", "to_ary");
+}
+
+mrb_value
+mrb_ary_entry(mrb_value ary, long offset)
+{
+ if (offset < 0) {
+ offset += RARRAY_LEN(ary);
+ }
+ return ary_elt(ary, offset);
+}
+
+void
+mrb_mem_clear(mrb_value *mem, long size)
+{
+ while (size--) {
+ *mem++ = mrb_nil_value();
+ }
+}
+
+mrb_value
+mrb_ary_tmp_new(mrb_state *mrb, long capa)
+{
+ return mrb_ary_new_capa(mrb, capa);//ary_new(0, capa);
+}
+
+#if 0
+/*
+ * call-seq:
+ * ary.sort! -> ary
+ * ary.sort! {| a,b | block } -> ary
+ *
+ * Sorts +self+. Comparisons for
+ * the sort will be done using the <code><=></code> operator or using
+ * an optional code block. The block implements a comparison between
+ * <i>a</i> and <i>b</i>, returning -1, 0, or +1. See also
+ * <code>Enumerable#sort_by</code>.
+ *
+ * a = [ "d", "a", "e", "c", "b" ]
+ * a.sort #=> ["a", "b", "c", "d", "e"]
+ * a.sort {|x,y| y <=> x } #=> ["e", "d", "c", "b", "a"]
+ */
+
+mrb_value
+mrb_ary_sort_bang(mrb_value ary)
+{
+#if 0
+ mrb_ary_modify(ary);
+ //assert(!ARY_SHARED_P(ary));
+ if (RARRAY_LEN(ary) > 1) {
+ mrb_value tmp = ary_make_substitution(ary); /* only ary refers tmp */
+ struct ary_sort_data data;
+
+ RBASIC(tmp)->klass = 0;
+ data.ary = tmp;
+ data.opt_methods = 0;
+ data.opt_inited = 0;
+ ruby_qsort(RARRAY_PTR(tmp), RARRAY_LEN(tmp), sizeof(VALUE),
+ mrb_block_given_p()?sort_1:sort_2, &data);
+
+ if (ARY_EMBED_P(tmp)) {
+ assert(ARY_EMBED_P(tmp));
+ if (ARY_SHARED_P(ary)) { /* ary might be destructively operated in the given block */
+ mrb_ary_unshare(ary);
+ }
+ FL_SET_EMBED(ary);
+ MEMCPY(RARRAY_PTR(ary), ARY_EMBED_PTR(tmp), VALUE, ARY_EMBED_LEN(tmp));
+ ARY_SET_LEN(ary, ARY_EMBED_LEN(tmp));
+ }
+ else {
+ assert(!ARY_EMBED_P(tmp));
+ if (ARY_HEAP_PTR(ary) == ARY_HEAP_PTR(tmp)) {
+ assert(!ARY_EMBED_P(ary));
+ FL_UNSET_SHARED(ary);
+ ARY_SET_CAPA(ary, ARY_CAPA(tmp));
+ }
+ else {
+ assert(!ARY_SHARED_P(tmp));
+ if (ARY_EMBED_P(ary)) {
+ FL_UNSET_EMBED(ary);
+ }
+ else if (ARY_SHARED_P(ary)) {
+ /* ary might be destructively operated in the given block */
+ mrb_ary_unshare(ary);
+ }
+ else {
+ xfree(ARY_HEAP_PTR(ary));
+ }
+ ARY_SET_PTR(ary, RARRAY_PTR(tmp));
+ ARY_SET_HEAP_LEN(ary, RARRAY_LEN(tmp));
+ ARY_SET_CAPA(ary, ARY_CAPA(tmp));
+ }
+ /* tmp was lost ownership for the ptr */
+ FL_UNSET(tmp, FL_FREEZE);
+ FL_SET_EMBED(tmp);
+ ARY_SET_EMBED_LEN(tmp, 0);
+ FL_SET(tmp, FL_FREEZE);
+ }
+ /* tmp will be GC'ed. */
+ RBASIC(tmp)->c = mrb->array_class;
+ }
+#endif
+ return ary;
+}
+#endif
+
+mrb_value
+mrb_ary_dup(mrb_state *mrb, mrb_value self)
+{
+ struct RArray *a1 = mrb_ary_ptr(self);
+ struct RArray *a2;
+ mrb_value ary;
+ mrb_value *buf;
+ mrb_int times;
+ //size_t len;
+
+ ary = mrb_ary_new_capa(mrb, a1->len);
+ a2 = mrb_ary_ptr(ary);
+ buf = a2->buf;
+ while(times--) {
+ memcpy(buf, a1->buf, sizeof(mrb_value)*a1->len);
+ buf += a1->len;
+ }
+ a2->len = a1->len;
+
+ return ary;
+}
+
+#if 0
+/*
+ * call-seq:
+ * ary.sort -> new_ary
+ * ary.sort {| a,b | block } -> new_ary
+ *
+ * Returns a new array created by sorting +self+. Comparisons for
+ * the sort will be done using the <code><=></code> operator or using
+ * an optional code block. The block implements a comparison between
+ * <i>a</i> and <i>b</i>, returning -1, 0, or +1. See also
+ * <code>Enumerable#sort_by</code>.
+ *
+ * a = [ "d", "a", "e", "c", "b" ]
+ * a.sort #=> ["a", "b", "c", "d", "e"]
+ * a.sort {|x,y| y <=> x } #=> ["e", "d", "c", "b", "a"]
+ */
+
+mrb_value
+mrb_ary_sort(mrb_state *mrb, mrb_value ary)
+{
+ ary = mrb_ary_dup(mrb, ary);
+ mrb_ary_sort_bang(ary);
+ return ary;
+}
+#endif
+
+static mrb_value
+inspect_ary(mrb_state *mrb, mrb_value ary, mrb_value list)
+{
+ long i;
+ mrb_value s, arystr;
+ char *head = "[";
+ char *sep = ", ";
+ char *tail = "]";
+
+ /* check recursive */
+ for(i=0; i<RARRAY_LEN(list); i++) {
+ if (mrb_obj_equal(mrb, ary, RARRAY_PTR(list)[i])) {
+ return mrb_str_new2(mrb, "[...]");
+ }
+ }
+
+ mrb_ary_push(mrb, list, ary);
+
+ arystr = mrb_str_buf_new(mrb, 64);
+ mrb_str_buf_cat(mrb, arystr, head, strlen(head));
+
+ for(i=0; i<RARRAY_LEN(ary); i++) {
+ int ai = mrb_gc_arena_save(mrb);
+
+ if (i > 0) {
+ mrb_str_buf_cat(mrb, arystr, sep, strlen(sep));
+ }
+ if (mrb_type(RARRAY_PTR(ary)[i]) == MRB_TT_ARRAY) {
+ s = inspect_ary(mrb, RARRAY_PTR(ary)[i], list);
+ } else {
+ s = mrb_inspect(mrb, RARRAY_PTR(ary)[i]);
+ }
+ //mrb_str_buf_append(mrb, arystr, s);
+ mrb_str_buf_cat(mrb, arystr, RSTRING_PTR(s), RSTRING_LEN(s));
+ mrb_gc_arena_restore(mrb, ai);
+ }
+
+ mrb_str_buf_cat(mrb, arystr, tail, strlen(tail));
+ mrb_ary_pop(mrb, list);
+
+ return arystr;
+}
+
+#if 0
+static mrb_value
+inspect_ary_r(mrb_state *mrb, mrb_value ary, mrb_value dummy, int recur)
+{
+ //int tainted = OBJ_TAINTED(ary);
+ //int untrust = OBJ_UNTRUSTED(ary);
+ long i;
+ mrb_value s, arystr;
+ //if (recur) return mrb_tainted_str_new2("[...]");
+ arystr = mrb_str_buf_new(mrb, 128);
+ mrb_str_buf_cat(mrb, arystr, "[", strlen("[")); /* for capa */
+ //arystr = mrb_str_new_cstr(mrb, "[");//mrb_str_buf_new2("[");
+ for (i=0; i<RARRAY_LEN(ary); i++) {
+ s = mrb_inspect(mrb, RARRAY_PTR(ary)[i]);//mrb_inspect(RARRAY_PTR(ary)[i]);
+ //if (OBJ_TAINTED(s)) tainted = TRUE;
+ //if (OBJ_UNTRUSTED(s)) untrust = TRUE;
+ if (i > 0) mrb_str_buf_cat(mrb, arystr, ", ", strlen(", "));//mrb_str_buf_cat2(str, ", ");
+ mrb_str_buf_append(mrb, arystr, s);
+ }
+ mrb_str_buf_cat(mrb, arystr, "]", strlen("]"));// mrb_str_buf_cat2(str, "]");
+ //if (tainted) OBJ_TAINT(str);
+ //if (untrust) OBJ_UNTRUST(str);
+ return arystr;
+}
+#endif
+
+/* 15.2.12.5.31 (x) */
+/*
+ * call-seq:
+ * ary.to_s -> string
+ * ary.inspect -> string
+ *
+ * Creates a string representation of +self+.
+ */
+
+static mrb_value
+mrb_ary_inspect(mrb_state *mrb, mrb_value ary)
+{
+ if (RARRAY_LEN(ary) == 0) return mrb_str_new2(mrb, "[]");
+ #if 0 /* THREAD */
+ return mrb_exec_recursive(inspect_ary_r, ary, 0);
+ #else
+ return inspect_ary(mrb, ary, mrb_ary_new(mrb));
+ #endif
+}
+
+static mrb_value
+join_ary(mrb_state *mrb, mrb_value ary, mrb_value sep, mrb_value list)
+{
+ long i;
+ mrb_value result, val, tmp;
+
+ /* check recursive */
+ for(i=0; i<RARRAY_LEN(list); i++) {
+ if (mrb_obj_equal(mrb, ary, RARRAY_PTR(list)[i])) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "recursive array join");
+ }
+ }
+
+ mrb_ary_push(mrb, list, ary);
+
+ result = mrb_str_buf_new(mrb, 64);
+
+ for(i=0; i<RARRAY_LEN(ary); i++) {
+ if (i > 0 && !mrb_nil_p(sep)) {
+ //mrb_str_buf_append(mrb, result, sep); // segv (encoding error?)
+ mrb_str_buf_cat(mrb, result, RSTRING_PTR(sep), RSTRING_LEN(sep));
+ }
+
+ val = RARRAY_PTR(ary)[i];
+ switch(mrb_type(val)) {
+ case MRB_TT_ARRAY:
+ ary_join:
+ val = join_ary(mrb, val, sep, list);
+ /* fall through */
+
+ case MRB_TT_STRING:
+ str_join:
+ //mrb_str_buf_append(mrb, result, val);
+ mrb_str_buf_cat(mrb, result, RSTRING_PTR(val), RSTRING_LEN(val));
+ break;
+
+ default:
+ tmp = mrb_check_string_type(mrb, val);
+ if (!mrb_nil_p(tmp)) {
+ val = tmp;
+ goto str_join;
+ }
+ tmp = mrb_check_convert_type(mrb, val, MRB_TT_ARRAY, "Array", "to_ary");
+ if (!mrb_nil_p(tmp)) {
+ val = tmp;
+ goto ary_join;
+ }
+ val = mrb_obj_as_string(mrb, val);
+ goto str_join;
+ }
+ }
+
+ mrb_ary_pop(mrb, list);
+
+ return result;
+}
+
+mrb_value
+mrb_ary_join(mrb_state *mrb, mrb_value ary, mrb_value sep)
+{
+ sep = mrb_obj_as_string(mrb, sep);
+ return join_ary(mrb, ary, sep, mrb_ary_new(mrb));
+}
+
+#if 0
+static void ary_join_1(mrb_state *mrb, mrb_value obj, mrb_value ary, mrb_value sep, long i, mrb_value result, mrb_value first);
+
+static mrb_value
+recursive_join(mrb_state *mrb, mrb_value obj, mrb_value args, int recur)
+{
+ mrb_value ary = mrb_ary_ref(mrb, args, 0);
+ mrb_value sep = mrb_ary_ref(mrb, args, 1);
+ mrb_value result = mrb_ary_ref(mrb, args, 2);
+ mrb_value first = mrb_ary_ref(mrb, args, 3);
+
+ if (recur) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "recursive array join");
+ }
+ else {
+ ary_join_1(mrb, obj, ary, sep, 0, result, first);
+ }
+ return mrb_nil_value();
+}
+
+static void
+ary_join_0(mrb_state *mrb, mrb_value ary, mrb_value sep, long max, mrb_value result)
+{
+ long i;
+ mrb_value val;
+
+ for (i=0; i<max; i++) {
+ val = RARRAY_PTR(ary)[i];
+ if (i > 0 && !mrb_nil_p(sep))
+ mrb_str_buf_append(mrb, result, sep);
+ mrb_str_buf_append(mrb, result, val);
+ //if (OBJ_TAINTED(val)) OBJ_TAINT(result);
+ //if (OBJ_UNTRUSTED(val)) OBJ_TAINT(result);
+ }
+}
+
+static void
+ary_join_1(mrb_state *mrb, mrb_value obj, mrb_value ary, mrb_value sep, long i, mrb_value result, mrb_value first)
+{
+ mrb_value val, tmp;
+
+ for (; i<RARRAY_LEN(ary); i++) {
+ if (i > 0 && !mrb_nil_p(sep)) {
+ mrb_str_buf_append(mrb, result, sep);
+ }
+
+ val = RARRAY_PTR(ary)[i];
+ switch (mrb_type(val)) {
+ case MRB_TT_STRING:
+ str_join:
+ mrb_str_buf_append(mrb, result, val);
+ break;
+ case MRB_TT_ARRAY:
+ obj = val;
+ ary_join:
+ if (mrb_obj_equal(mrb, val, ary)) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "recursive array join");
+ }
+ else {
+ //struct recursive_join_arg args;
+ mrb_value args = mrb_ary_new(mrb);
+
+ mrb_ary_set(mrb, args, 0, val);
+ mrb_ary_set(mrb, args, 1, sep);
+ mrb_ary_set(mrb, args, 2, result);
+ mrb_ary_set(mrb, args, 3, first);
+
+ mrb_exec_recursive(mrb, recursive_join, obj, &args);
+ }
+ break;
+ default:
+ tmp = mrb_check_string_type(mrb, val);
+ if (!mrb_nil_p(tmp)) {
+ val = tmp;
+ goto str_join;
+ }
+ tmp = mrb_check_convert_type(mrb, val, MRB_TT_ARRAY, "Array", "to_ary");
+ if (!mrb_nil_p(tmp)) {
+ obj = val;
+ val = tmp;
+ goto ary_join;
+ }
+ val = mrb_obj_as_string(mrb, val);
+ if (mrb_test(first)) {
+#ifdef INCLUDE_REGEXP /* include "encoding.h" */
+ mrb_enc_copy(mrb, result, val);
+#endif
+ first = mrb_false_value();
+ }
+ goto str_join;
+ }
+ }
+}
+
+mrb_value
+mrb_ary_join(mrb_state *mrb, mrb_value ary, mrb_value sep)
+{
+ long len = 1, i;
+ //int taint = FALSE;
+ //int untrust = FALSE;
+ mrb_value val, tmp, result;
+
+ if (RARRAY_LEN(ary) == 0) return mrb_str_new2(mrb, "");
+ //if (OBJ_TAINTED(ary) || OBJ_TAINTED(sep)) taint = TRUE;
+ //if (OBJ_UNTRUSTED(ary) || OBJ_UNTRUSTED(sep)) untrust = TRUE;
+
+ if (!mrb_nil_p(sep)) {
+ //StringValue(sep);
+ mrb_string_value(mrb, &sep);
+ len += RSTRING_LEN(sep) * (RARRAY_LEN(ary) - 1);
+ }
+
+ for (i=0; i<RARRAY_LEN(ary); i++) {
+ val = RARRAY_PTR(ary)[i];
+ tmp = mrb_check_string_type(mrb, val);
+
+ if (mrb_nil_p(tmp) || (!mrb_obj_equal(mrb, tmp, val))) {
+ mrb_value first;
+
+ result = mrb_str_buf_new(mrb, len + (RARRAY_LEN(ary)-i)*10);
+ //if (taint) OBJ_TAINT(result);
+ //if (untrust) OBJ_UNTRUST(result);
+ first = (i == 0)? mrb_true_value(): mrb_false_value();
+mrb_realloc(mrb, RSTRING(result)->buf, ++(RSTRING(result)->capa));
+ ary_join_0(mrb, ary, sep, i, result);
+mrb_realloc(mrb, RSTRING(result)->buf, ++(RSTRING(result)->capa));
+ ary_join_1(mrb, ary, ary, sep, i, result, first);
+mrb_realloc(mrb, RSTRING(result)->buf, ++(RSTRING(result)->capa));
+ return result;
+ }
+
+ len += RSTRING_LEN(tmp);
+ }
+
+ result = mrb_str_buf_new(mrb, len);
+ //if (taint) OBJ_TAINT(result);
+ //if (untrust) OBJ_UNTRUST(result);
+ ary_join_0(mrb, ary, sep, RARRAY_LEN(ary), result);
+
+ return result;
+}
+#endif
+
+/*
+ * call-seq:
+ * ary.join(sep=nil) -> str
+ *
+ * Returns a string created by converting each element of the array to
+ * a string, separated by <i>sep</i>.
+ *
+ * [ "a", "b", "c" ].join #=> "abc"
+ * [ "a", "b", "c" ].join("-") #=> "a-b-c"
+ */
+
+static mrb_value
+mrb_ary_join_m(mrb_state *mrb, mrb_value ary)
+{
+ mrb_value *argv;
+ int argc;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+ switch(argc) {
+ case 0:
+ return mrb_ary_join(mrb, ary, mrb_nil_value());
+
+ case 1:
+ return mrb_ary_join(mrb, ary, argv[0]);
+
+ default:
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments");
+ }
+
+ return mrb_nil_value(); /* dummy */
+}
+
+static mrb_value
+recursive_equal(mrb_state *mrb, mrb_value ary1, mrb_value ary2, int recur)
+{
+ long i;
+
+ if (recur) return mrb_true_value(); /* Subtle! */
+ for (i=0; i<RARRAY_LEN(ary1); i++) {
+ if (!mrb_equal(mrb, ary_elt(ary1, i), ary_elt(ary2, i)))
+ return mrb_false_value();
+ }
+ return mrb_true_value();
+}
+
+/* 15.2.12.5.33 (x) */
+/*
+ * call-seq:
+ * ary == other_ary -> bool
+ *
+ * Equality---Two arrays are equal if they contain the same number
+ * of elements and if each element is equal to (according to
+ * Object.==) the corresponding element in the other array.
+ *
+ * [ "a", "c" ] == [ "a", "c", 7 ] #=> false
+ * [ "a", "c", 7 ] == [ "a", "c", 7 ] #=> true
+ * [ "a", "c", 7 ] == [ "a", "d", "f" ] #=> false
+ *
+ */
+
+static mrb_value
+mrb_ary_equal(mrb_state *mrb, mrb_value ary1)
+{
+ mrb_value ary2;
+
+ mrb_get_args(mrb, "o", &ary2);
+ if (mrb_obj_equal(mrb, ary1,ary2)) return mrb_true_value();
+ if (mrb_type(ary2) != MRB_TT_ARRAY) {
+ if (!mrb_respond_to(mrb, ary2, mrb_intern(mrb, "to_ary"))) {
+ return mrb_false_value();
+ }
+ if (mrb_equal(mrb, ary2, ary1)){
+ return mrb_true_value();
+ }
+ else {
+ return mrb_false_value();
+ }
+ }
+ if (RARRAY_LEN(ary1) != RARRAY_LEN(ary2)) return mrb_false_value();
+ return mrb_exec_recursive_paired(mrb, recursive_equal, ary1, ary2, &ary2);
+}
+
+static mrb_value
+recursive_eql(mrb_state *mrb, mrb_value ary1, mrb_value ary2, int recur)
+{
+ long i;
+
+ if (recur) return mrb_true_value(); /* Subtle! */
+ for (i=0; i<RARRAY_LEN(ary1); i++) {
+ if (!mrb_eql(mrb, ary_elt(ary1, i), ary_elt(ary2, i)))
+ return mrb_false_value();
+ }
+ return mrb_true_value();
+}
+
+/* 15.2.12.5.34 (x) */
+/*
+ * call-seq:
+ * ary.eql?(other) -> true or false
+ *
+ * Returns <code>true</code> if +self+ and _other_ are the same object,
+ * or are both arrays with the same content.
+ */
+
+static mrb_value
+mrb_ary_eql(mrb_state *mrb, mrb_value ary1)
+{
+ mrb_value ary2;
+
+ mrb_get_args(mrb, "o", &ary2);
+ if (mrb_obj_equal(mrb, ary1,ary2)) return mrb_true_value();
+ if (mrb_type(ary2) != MRB_TT_ARRAY) return mrb_false_value();
+ if (RARRAY_LEN(ary1) != RARRAY_LEN(ary2)) return mrb_false_value();
+ return mrb_exec_recursive_paired(mrb, recursive_eql, ary1, ary2, &ary2);
+}
+
+#if 0
+static mrb_value
+recursive_hash(mrb_value ary, mrb_value dummy, int recur)
+{
+ long i;
+ st_index_t h;
+ mrb_value n;
+
+ h = mrb_hash_start(RARRAY_LEN(ary));
+ if (recur) {
+ h = mrb_hash_uint(h, NUM2LONG(mrb_hash(mrb_cArray)));
+ }
+ else {
+ for (i=0; i<RARRAY_LEN(ary); i++) {
+ n = mrb_hash(RARRAY_PTR(ary)[i]);
+ h = mrb_hash_uint(h, NUM2LONG(n));
+ }
+ }
+ h = mrb_hash_end(h);
+ return LONG2FIX(h);
+}
+
+/* 15.2.12.5.35 (x) */
+/*
+ * call-seq:
+ * ary.hash -> fixnum
+ *
+ * Compute a hash-code for this array. Two arrays with the same content
+ * will have the same hash code (and will compare using <code>eql?</code>).
+ */
+
+static mrb_value
+mrb_ary_hash(mrb_state *mrb, mrb_value ary)
+{
+ return mrb_exec_recursive_outer(mrb, recursive_hash, ary, mrb_fixnum_value(0));
+}
+#endif
+
+void
+mrb_init_array(mrb_state *mrb)
+{
+ struct RClass *a;
+
+ a = mrb->array_class = mrb_define_class(mrb, "Array", mrb->object_class);
+ MRB_SET_INSTANCE_TT(a, MRB_TT_ARRAY);
+ mrb_include_module(mrb, a, mrb_class_get(mrb, "Enumerable"));
+
+ mrb_define_class_method(mrb, a, "[]", mrb_ary_s_create, ARGS_ANY()); /* 15.2.12.4.1 */
+
+ mrb_define_method(mrb, a, "*", mrb_ary_times, ARGS_REQ(1)); /* 15.2.12.5.1 */
+ mrb_define_method(mrb, a, "+", mrb_ary_plus, ARGS_REQ(1)); /* 15.2.12.5.2 */
+ mrb_define_method(mrb, a, "<<", mrb_ary_push_m, ARGS_REQ(1)); /* 15.2.12.5.3 */
+ mrb_define_method(mrb, a, "[]", mrb_ary_aget, ARGS_ANY()); /* 15.2.12.5.4 */
+ mrb_define_method(mrb, a, "[]=", mrb_ary_aset, ARGS_ANY()); /* 15.2.12.5.5 */
+ mrb_define_method(mrb, a, "clear", mrb_ary_clear, ARGS_NONE()); /* 15.2.12.5.6 */
+#if 0 /* --> implement with ruby code */
+ mrb_define_method(mrb, a, "collect!", mrb_ary_collect_bang, ARGS_NONE()); /* 15.2.12.5.7 */
+#endif
+ mrb_define_method(mrb, a, "concat", mrb_ary_concat_m, ARGS_REQ(1)); /* 15.2.12.5.8 */
+ mrb_define_method(mrb, a, "delete_at", mrb_ary_delete_at, ARGS_REQ(1)); /* 15.2.12.5.9 */
+#if 0 /* --> implement with ruby code */
+ mrb_define_method(mrb, a, "each", mrb_ary_each, ARGS_NONE()); /* 15.2.12.5.10 */
+#endif
+#if 0 /* --> implement with ruby code */
+ mrb_define_method(mrb, a, "each_index", mrb_ary_each_index, ARGS_NONE()); /* 15.2.12.5.11 */
+#endif
+ mrb_define_method(mrb, a, "empty?", mrb_ary_empty_p, ARGS_NONE()); /* 15.2.12.5.12 */
+ mrb_define_method(mrb, a, "first", mrb_ary_first, ARGS_ANY()); /* 15.2.12.5.13 */
+ mrb_define_method(mrb, a, "index", mrb_ary_index_m, ARGS_REQ(1)); /* 15.2.12.5.14 */
+#if 0 /* --> implement with ruby code */
+ mrb_define_method(mrb, a, "initialize", mrb_ary_initialize, ARGS_ANY()); /* 15.2.12.5.15 */
+#endif
+ mrb_define_method(mrb, a, "initialize_copy", mrb_ary_replace_m, ARGS_REQ(1)); /* 15.2.12.5.16 */
+ mrb_define_method(mrb, a, "join", mrb_ary_join_m, ARGS_ANY()); /* 15.2.12.5.17 */
+ mrb_define_method(mrb, a, "last", mrb_ary_last, ARGS_ANY()); /* 15.2.12.5.18 */
+ mrb_define_method(mrb, a, "length", mrb_ary_size, ARGS_NONE()); /* 15.2.12.5.19 */
+#if 0 /* --> implement with ruby code */
+ mrb_define_method(mrb, a, "map!", mrb_ary_collect_bang, ARGS_NONE()); /* 15.2.12.5.20 */
+#endif
+ mrb_define_method(mrb, a, "pop", mrb_ary_pop_m, ARGS_NONE()); /* 15.2.12.5.21 */
+ mrb_define_method(mrb, a, "push", mrb_ary_push_m, ARGS_ANY()); /* 15.2.12.5.22 */
+ mrb_define_method(mrb, a, "replace", mrb_ary_replace_m, ARGS_REQ(1)); /* 15.2.12.5.23 */
+ mrb_define_method(mrb, a, "reverse", mrb_ary_reverse, ARGS_NONE()); /* 15.2.12.5.24 */
+ mrb_define_method(mrb, a, "reverse!", mrb_ary_reverse_bang, ARGS_NONE()); /* 15.2.12.5.25 */
+ mrb_define_method(mrb, a, "rindex", mrb_ary_rindex_m, ARGS_REQ(1)); /* 15.2.12.5.26 */
+ mrb_define_method(mrb, a, "shift", mrb_ary_shift, ARGS_NONE()); /* 15.2.12.5.27 */
+ mrb_define_method(mrb, a, "size", mrb_ary_size, ARGS_NONE()); /* 15.2.12.5.28 */
+ mrb_define_method(mrb, a, "slice", mrb_ary_aget, ARGS_ANY()); /* 15.2.12.5.29 */
+ mrb_define_method(mrb, a, "unshift", mrb_ary_unshift_m, ARGS_ANY()); /* 15.2.12.5.30 */
+
+ mrb_define_method(mrb, a, "inspect", mrb_ary_inspect, ARGS_NONE()); /* 15.2.12.5.31 (x) */
+ mrb_define_alias(mrb, a, "to_s", "inspect"); /* 15.2.12.5.32 (x) */
+ mrb_define_method(mrb, a, "==", mrb_ary_equal, ARGS_REQ(1)); /* 15.2.12.5.33 (x) */
+ mrb_define_method(mrb, a, "eql?", mrb_ary_eql, ARGS_REQ(1)); /* 15.2.12.5.34 (x) */
+ //mrb_define_method(mrb, a, "hash", mrb_ary_hash, ARGS_NONE()); /* 15.2.12.5.35 (x) */
+ mrb_define_method(mrb, a, "<=>", mrb_ary_cmp, ARGS_REQ(1)); /* 15.2.12.5.36 (x) */
+}
diff --git a/src/ascii.c b/src/ascii.c
new file mode 100644
index 000000000..91bd54073
--- /dev/null
+++ b/src/ascii.c
@@ -0,0 +1,96 @@
+/**********************************************************************
+ ascii.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "mruby.h"
+#ifdef INCLUDE_ENCODING
+#include "regenc.h"
+
+OnigEncodingDefine(ascii, ASCII) = {
+ onigenc_single_byte_mbc_enc_len,
+ "ASCII-8BIT",/* name */
+ 1, /* max byte length */
+ 1, /* min byte length */
+ onigenc_is_mbc_newline_0x0a,
+ onigenc_single_byte_mbc_to_code,
+ onigenc_single_byte_code_to_mbclen,
+ onigenc_single_byte_code_to_mbc,
+ onigenc_ascii_mbc_case_fold,
+ onigenc_ascii_apply_all_case_fold,
+ onigenc_ascii_get_case_fold_codes_by_str,
+ onigenc_minimum_property_name_to_ctype,
+ onigenc_ascii_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ onigenc_single_byte_left_adjust_char_head,
+ onigenc_always_true_is_allowed_reverse_match
+};
+ENC_ALIAS("BINARY", "ASCII-8BIT")
+ENC_REPLICATE("IBM437", "ASCII-8BIT")
+ENC_ALIAS("CP437", "IBM437")
+ENC_REPLICATE("IBM737", "ASCII-8BIT")
+ENC_ALIAS("CP737", "IBM737")
+ENC_REPLICATE("IBM775", "ASCII-8BIT")
+ENC_ALIAS("CP775", "IBM775")
+ENC_REPLICATE("CP850", "ASCII-8BIT")
+ENC_ALIAS("IBM850", "CP850")
+ENC_REPLICATE("IBM852", "ASCII-8BIT")
+ENC_REPLICATE("CP852", "IBM852")
+ENC_REPLICATE("IBM855", "ASCII-8BIT")
+ENC_REPLICATE("CP855", "IBM855")
+ENC_REPLICATE("IBM857", "ASCII-8BIT")
+ENC_ALIAS("CP857", "IBM857")
+ENC_REPLICATE("IBM860", "ASCII-8BIT")
+ENC_ALIAS("CP860", "IBM860")
+ENC_REPLICATE("IBM861", "ASCII-8BIT")
+ENC_ALIAS("CP861", "IBM861")
+ENC_REPLICATE("IBM862", "ASCII-8BIT")
+ENC_ALIAS("CP862", "IBM862")
+ENC_REPLICATE("IBM863", "ASCII-8BIT")
+ENC_ALIAS("CP863", "IBM863")
+ENC_REPLICATE("IBM864", "ASCII-8BIT")
+ENC_ALIAS("CP864", "IBM864")
+ENC_REPLICATE("IBM865", "ASCII-8BIT")
+ENC_ALIAS("CP865", "IBM865")
+ENC_REPLICATE("IBM866", "ASCII-8BIT")
+ENC_ALIAS("CP866", "IBM866")
+ENC_REPLICATE("IBM869", "ASCII-8BIT")
+ENC_ALIAS("CP869", "IBM869")
+ENC_REPLICATE("Windows-1258", "ASCII-8BIT")
+ENC_ALIAS("CP1258", "Windows-1258")
+ENC_REPLICATE("GB1988", "ASCII-8BIT")
+ENC_REPLICATE("macCentEuro", "ASCII-8BIT")
+ENC_REPLICATE("macCroatian", "ASCII-8BIT")
+ENC_REPLICATE("macCyrillic", "ASCII-8BIT")
+ENC_REPLICATE("macGreek", "ASCII-8BIT")
+ENC_REPLICATE("macIceland", "ASCII-8BIT")
+ENC_REPLICATE("macRoman", "ASCII-8BIT")
+ENC_REPLICATE("macRomania", "ASCII-8BIT")
+ENC_REPLICATE("macThai", "ASCII-8BIT")
+ENC_REPLICATE("macTurkish", "ASCII-8BIT")
+ENC_REPLICATE("macUkraine", "ASCII-8BIT")
+#endif //INCLUDE_ENCODING
diff --git a/src/cdump.c b/src/cdump.c
new file mode 100644
index 000000000..74365b090
--- /dev/null
+++ b/src/cdump.c
@@ -0,0 +1,197 @@
+#include "cdump.h"
+
+#include <string.h>
+
+#include "irep.h"
+#include "mruby/string.h"
+#include "re.h"
+
+#define MRB_CDUMP_LINE_LEN 128
+
+#define SOURCE_CODE(fmt, ...) fprintf(f, fmt"\n", __VA_ARGS__)
+#define SOURCE_CODE0(str) do {fputs(str, f); putc('\n', f);} while (0)
+
+static int
+make_cdump_isec(mrb_state *mrb, int irep_no, FILE *f)
+{
+ int i;
+ mrb_irep *irep = mrb->irep[irep_no];
+
+ if (irep == 0)
+ return -1;
+
+ /* dump isec struct*/
+ if (irep->ilen > 0) {
+ SOURCE_CODE ("static mrb_code iseq_%d[] = {", irep_no);
+ for (i=0; i<irep->ilen; i++)
+ SOURCE_CODE(" 0x%08x," , irep->iseq[i]);
+ SOURCE_CODE0 ("};");
+ SOURCE_CODE0 ("");
+ }
+
+ return 0;
+}
+
+static size_t
+str_format_len(mrb_value str)
+{
+ size_t dump_len = 0;
+
+ char *src;
+
+ for (src = RSTRING_PTR(str); src < RSTRING_END(str);) {
+ switch (*src) {
+ case 0x07:/* BEL */ /* fall through */
+ case 0x08:/* BS */ /* fall through */
+ case 0x09:/* HT */ /* fall through */
+ case 0x0A:/* LF */ /* fall through */
+ case 0x0B:/* VT */ /* fall through */
+ case 0x0C:/* FF */ /* fall through */
+ case 0x0D:/* CR */ /* fall through */
+ case 0x22:/* " */ /* fall through */
+ case 0x27:/* ' */ /* fall through */
+ case 0x3F:/* ? */ /* fall through */
+ case 0x5C:/* \ */ /* fall through */
+ dump_len += 2; src += 2;
+ break;
+
+ default:
+ dump_len++; src++;
+ break;
+ }
+ }
+
+ return dump_len;
+}
+
+static char*
+str_to_format(mrb_value str, char *buf)
+{
+ char *src, *dst;
+
+ for (src = RSTRING_PTR(str), dst = buf; src < RSTRING_END(str);) {
+ switch (*src) {
+ case 0x07:/* BEL */ memcpy(dst, "\\a", 2); dst+=2; src+=2; break;
+ case 0x08:/* BS */ memcpy(dst, "\\b", 2); dst+=2; src+=2; break;
+ case 0x09:/* HT */ memcpy(dst, "\\t", 2); dst+=2; src+=2; break;
+ case 0x0A:/* LF */ memcpy(dst, "\\n", 2); dst+=2; src+=2; break;
+ case 0x0B:/* VT */ memcpy(dst, "\\v", 2); dst+=2; src+=2; break;
+ case 0x0C:/* FF */ memcpy(dst, "\\f", 2); dst+=2; src+=2; break;
+ case 0x0D:/* CR */ memcpy(dst, "\\r", 2); dst+=2; src+=2; break;
+ case 0x22:/* " */ memcpy(dst, "\\\"", 2); dst+=2; src+=2; break;
+ case 0x27:/* ' */ memcpy(dst, "\\\'", 2); dst+=2; src+=2; break;
+ case 0x3F:/* ? */ memcpy(dst, "\\\?", 2); dst+=2; src+=2; break;
+ case 0x5C:/* \ */ memcpy(dst, "\\\\", 2); dst+=2; src+=2; break;
+ default: *dst++ = *src++; break;
+ }
+ }
+
+ return buf;
+}
+
+int
+make_cdump_irep(mrb_state *mrb, int irep_no, FILE *f)
+{
+ mrb_irep *irep = mrb->irep[irep_no];
+ int n;
+ char *buf = 0;
+ size_t buf_len, str_len;
+
+ if (irep == 0)
+ return -1;
+
+ buf_len = MRB_CDUMP_LINE_LEN;
+ if ((buf = mrb_malloc(mrb, buf_len)) == 0 ) {
+ return MRB_CDUMP_GENERAL_FAILURE;
+ }
+
+ SOURCE_CODE0 (" irep = mrb->irep[idx] = mrb_malloc(mrb, sizeof(mrb_irep));");
+ SOURCE_CODE0 (" irep->idx = idx++;");
+ SOURCE_CODE (" irep->flags = %d | MRB_ISEQ_NOFREE;", irep->flags);
+ SOURCE_CODE (" irep->nlocals = %d;", irep->nlocals);
+ SOURCE_CODE (" irep->nregs = %d;", irep->nregs);
+ SOURCE_CODE (" irep->ilen = %d;", irep->ilen);
+ SOURCE_CODE (" irep->iseq = iseq_%d;", irep_no);
+
+ SOURCE_CODE (" irep->slen = %d;", irep->slen);
+ if(irep->slen > 0) {
+ SOURCE_CODE (" irep->syms = mrb_malloc(mrb, sizeof(mrb_sym)*%d);", irep->slen);
+ for (n=0; n<irep->slen; n++)
+ if (irep->syms[n]) {
+ SOURCE_CODE (" irep->syms[%d] = mrb_intern(mrb, \"%s\");", n, mrb_sym2name(mrb, irep->syms[n]));
+ }
+ }
+ else
+ SOURCE_CODE0 (" irep->syms = NULL;");
+
+ SOURCE_CODE (" irep->plen = %d;", irep->plen);
+ if(irep->plen > 0) {
+ SOURCE_CODE (" irep->pool = mrb_malloc(mrb, sizeof(mrb_value)*%d);", irep->plen);
+ for (n=0; n<irep->plen; n++) {
+ switch (irep->pool[n].tt) {
+ case MRB_TT_FLOAT:
+ SOURCE_CODE(" irep->pool[%d] = mrb_float_value(%.16e);", n, irep->pool[n].value.f); break;
+ case MRB_TT_STRING:
+ str_len = str_format_len(irep->pool[n]) + 1;
+ if ( str_len > buf_len ) {
+ buf_len = str_len;
+ if ((buf = mrb_realloc(mrb, buf, buf_len)) == 0 ) {
+ return MRB_CDUMP_GENERAL_FAILURE;
+ }
+ }
+ memset(buf, 0, buf_len);
+ SOURCE_CODE(" irep->pool[%d] = mrb_str_new(mrb, \"%s\", %d);", n, str_to_format(irep->pool[n], buf), RSTRING_LEN(irep->pool[n])); break;
+ /* TODO MRB_TT_REGEX */
+ default: break;
+ }
+ }
+ }
+ else
+ SOURCE_CODE0 (" irep->pool = NULL;");
+ SOURCE_CODE0("");
+ return MRB_CDUMP_OK;
+}
+
+int
+mrb_cdump_irep(mrb_state *mrb, int n, FILE *f,const char *initname)
+{
+ int irep_no, irep_num;
+
+ if (mrb == 0 || n < 0 || n >= mrb->irep_len || f == 0 || initname == 0)
+ return -1;
+
+ irep_num = mrb->irep_len - n;
+
+ SOURCE_CODE0("#include \"mruby.h\"");
+ SOURCE_CODE0("#include \"irep.h\"");
+ SOURCE_CODE0("#include \"mruby/string.h\"");
+ SOURCE_CODE0("#include \"mruby/proc.h\"");
+ SOURCE_CODE0("");
+
+ for (irep_no=n; irep_no<mrb->irep_len; irep_no++) {
+ if (make_cdump_isec(mrb, irep_no, f) != 0)
+ return -1;
+ }
+
+ SOURCE_CODE0("void");
+ SOURCE_CODE ("%s(mrb_state *mrb)", initname);
+ SOURCE_CODE0("{");
+ SOURCE_CODE0(" int n = mrb->irep_len;");
+ SOURCE_CODE0(" int idx = n;");
+ SOURCE_CODE0(" mrb_irep *irep;");
+ SOURCE_CODE0("");
+ SOURCE_CODE (" mrb_add_irep(mrb, idx+%d);", irep_num);
+ SOURCE_CODE0("");
+ for (irep_no=n; irep_no<mrb->irep_len; irep_no++) {
+ if (make_cdump_irep(mrb, irep_no, f) != 0)
+ return -1;
+ }
+
+ SOURCE_CODE0(" mrb->irep_len = idx;");
+ SOURCE_CODE0("");
+ SOURCE_CODE0(" extern mrb_value mrb_top_self(mrb_state *mrb);");
+ SOURCE_CODE0(" mrb_run(mrb, mrb_proc_new(mrb, mrb->irep[n]), mrb_top_self(mrb));");
+ SOURCE_CODE0("}");
+
+ return 0;
+}
diff --git a/src/cdump.h b/src/cdump.h
new file mode 100644
index 000000000..82fc442a6
--- /dev/null
+++ b/src/cdump.h
@@ -0,0 +1,8 @@
+#include "mruby.h"
+#include <stdio.h>
+
+int mrb_cdump_irep(mrb_state *mrb, int n, FILE *f,const char *initname);
+
+/* error code */
+#define MRB_CDUMP_OK 0
+#define MRB_CDUMP_GENERAL_FAILURE -1
diff --git a/src/class.c b/src/class.c
new file mode 100644
index 000000000..aa2485cb1
--- /dev/null
+++ b/src/class.c
@@ -0,0 +1,1049 @@
+#include "mruby.h"
+#include <stdarg.h>
+#include <stdio.h>
+#include "mruby/class.h"
+#include "mruby/proc.h"
+#include "mruby/string.h"
+#include "mruby/numeric.h"
+#include "variable.h"
+#include "mruby/array.h"
+#include "error.h"
+
+#include "ritehash.h"
+
+#ifdef INCLUDE_REGEXP
+ #define mrb_usascii_str_new2 mrb_usascii_str_new_cstr
+#else
+ #define mrb_usascii_str_new2 mrb_str_new_cstr
+#endif
+
+KHASH_MAP_INIT_INT(mt, struct RProc*);
+KHASH_MAP_INIT_INT(iv, mrb_value);
+
+typedef struct fc_result {
+ mrb_sym name;
+ struct RClass * klass;
+ mrb_value path;
+ struct RClass * track;
+ struct fc_result *prev;
+} fcresult_t;
+
+struct RClass* mrb_class_real(struct RClass* cl);
+int kiv_lookup(khash_t(iv) *table, mrb_sym key, mrb_value *value);
+extern struct kh_iv *mrb_class_tbl;
+
+void
+mrb_gc_mark_mt(mrb_state *mrb, struct RClass *c)
+{
+ khiter_t k;
+ khash_t(mt) *h = c->mt;
+
+ if (!h) return;
+ for (k = kh_begin(h); k != kh_end(h); k++) {
+ if (kh_exist(h, k)){
+ struct RProc *m = kh_value(h, k);
+ if (m) {
+ paint_black(m);
+ }
+ }
+ }
+}
+
+size_t
+mrb_gc_mark_mt_size(mrb_state *mrb, struct RClass *c)
+{
+ khash_t(mt) *h = c->mt;
+
+ if (!h) return 0;
+ return kh_size(h);
+}
+
+void
+mrb_gc_free_mt(mrb_state *mrb, struct RClass *c)
+{
+ kh_destroy(mt, c->mt);
+}
+
+void
+mrb_name_class(mrb_state *mrb, struct RClass *c, mrb_sym name)
+{
+ mrb_obj_iv_set(mrb, (struct RObject*)c,
+ mrb_intern(mrb, "__classid__"), mrb_symbol_value(name));
+}
+
+static mrb_sym
+class_sym(mrb_state *mrb, struct RClass *c, struct RClass *outer)
+{
+ mrb_value name;
+
+ name = mrb_obj_iv_get(mrb, (struct RObject*)c, mrb_intern(mrb, "__classid__"));
+ if (mrb_nil_p(name)) {
+ khash_t(iv)* h;
+ khiter_t k;
+ mrb_value v;
+
+ if (!outer) outer = mrb->object_class;
+ h = outer->iv;
+ for (k = kh_begin(h); k != kh_end(h); k++) {
+ if (!kh_exist(h,k)) continue;
+ v = kh_value(h,k);
+ if (mrb_type(v) == c->tt && mrb_class_ptr(v) == c) {
+ return kh_key(h,k);
+ }
+ }
+ }
+ return SYM2ID(name);
+}
+
+static void
+make_metaclass(mrb_state *mrb, struct RClass *c)
+{
+ struct RClass *sc;
+
+ if (c->c->tt == MRB_TT_SCLASS) {
+ return;
+ }
+ sc = mrb_obj_alloc(mrb, MRB_TT_SCLASS, mrb->class_class);
+ sc->mt = 0;
+ if (!c->super) {
+ sc->super = mrb->class_class;
+ }
+ else {
+ sc->super = c->super->c;
+ }
+ c->c = sc;
+ mrb_field_write_barrier(mrb, (struct RBasic*)c, (struct RBasic*)sc);
+ mrb_field_write_barrier(mrb, (struct RBasic*)sc, (struct RBasic*)sc->super);
+}
+
+struct RClass*
+mrb_define_module_id(mrb_state *mrb, mrb_sym name)
+{
+ struct RClass *m = mrb_module_new(mrb);
+
+ m->mt = kh_init(mt, mrb);
+ mrb_obj_iv_set(mrb, (struct RObject*)mrb->object_class,
+ name, mrb_obj_value(m));
+ mrb_name_class(mrb, m, name);
+
+ return m;
+}
+
+struct RClass*
+mrb_define_module(mrb_state *mrb, const char *name)
+{
+ return mrb_define_module_id(mrb, mrb_intern(mrb, name));
+}
+
+static void
+setup_class(mrb_state *mrb, mrb_value outer, struct RClass *c, mrb_sym id)
+{
+ mrb_name_class(mrb, c, id);
+ mrb_const_set(mrb, outer, id, mrb_obj_value(c));
+ mrb_obj_iv_set(mrb, (struct RObject*)c,
+ mrb_intern(mrb, "__outer__"), outer);
+}
+
+struct RClass*
+mrb_class_outer_module(mrb_state *mrb, struct RClass *c)
+{
+ mrb_value outer;
+
+ outer = mrb_obj_iv_get(mrb, (struct RObject*)c, mrb_intern(mrb, "__outer__"));
+ if (mrb_nil_p(outer)) return 0;
+ return mrb_class_ptr(outer);
+}
+
+struct RClass*
+mrb_vm_define_module(mrb_state *mrb, mrb_value outer, mrb_sym id)
+{
+ struct RClass *c;
+ mrb_value v;
+
+ if (mrb_const_defined(mrb, outer, id)) {
+ v = mrb_const_get(mrb, outer, id);
+ c = mrb_class_ptr(v);
+ }
+ else {
+ c = mrb_module_new(mrb);
+ setup_class(mrb, outer, c, id);
+ }
+ return c;
+}
+
+struct RClass*
+mrb_define_class_id(mrb_state *mrb, mrb_sym name, struct RClass *super)
+{
+ struct RClass *c = mrb_class_new(mrb, super);
+
+ mrb_obj_iv_set(mrb, (struct RObject*)mrb->object_class,
+ name, mrb_obj_value(c));
+ mrb_name_class(mrb, c, name);
+
+ return c;
+}
+
+struct RClass*
+mrb_define_class(mrb_state *mrb, const char *name, struct RClass *super)
+{
+ struct RClass *c;
+ c = mrb_define_class_id(mrb, mrb_intern(mrb, name), super);
+ return c;
+}
+
+struct RClass*
+mrb_vm_define_class(mrb_state *mrb, mrb_value outer, mrb_value super, mrb_sym id)
+{
+ struct RClass *c = 0;
+
+ if (mrb_const_defined(mrb, outer, id)) {
+ mrb_value v = mrb_const_get(mrb, outer, id);
+
+ c = mrb_class_ptr(v);
+ if (!mrb_nil_p(super) && (c->tt != MRB_TT_CLASS || c->super != mrb_class_ptr(super))) {
+ c = 0;
+ }
+ }
+ if (!c) {
+ struct RClass *s = 0;
+
+ if (!mrb_nil_p(super)) s = mrb_class_ptr(super);
+ c = mrb_class_new(mrb, s);
+ setup_class(mrb, outer, c, id);
+ }
+
+ return c;
+}
+
+/*!
+ * Defines a class under the namespace of \a outer.
+ * \param outer a class which contains the new class.
+ * \param id name of the new class
+ * \param super a class from which the new class will derive.
+ * NULL means \c Object class.
+ * \return the created class
+ * \throw TypeError if the constant name \a name is already taken but
+ * the constant is not a \c Class.
+ * \throw NameError if the class is already defined but the class can not
+ * be reopened because its superclass is not \a super.
+ * \post top-level constant named \a name refers the returned class.
+ *
+ * \note if a class named \a name is already defined and its superclass is
+ * \a super, the function just returns the defined class.
+ */
+struct RClass *
+mrb_define_class_under(mrb_state *mrb, struct RClass *outer, const char *name, struct RClass *super)
+{
+ struct RClass * c;
+ mrb_sym id = mrb_intern(mrb, name);
+
+ if (mrb_const_defined_at(mrb, outer, id)) {
+ c = mrb_class_from_sym(mrb, outer, id);
+ if (c->tt != MRB_TT_CLASS) {
+ mrb_raise(mrb, E_TYPE_ERROR, "%s is not a class", mrb_sym2name(mrb, id));
+ }
+ if (mrb_class_real(c->super) != super) {
+ mrb_name_error(mrb, id, "%s is already defined", mrb_sym2name(mrb, id));
+ }
+ return c;
+ }
+ if (!super) {
+ mrb_warn("no super class for `%s::%s', Object assumed",
+ mrb_obj_classname(mrb, mrb_obj_value(outer)), mrb_sym2name(mrb, id));
+ }
+ c = mrb_class_new(mrb, super);
+ setup_class(mrb, mrb_obj_value(outer), c, id);
+ mrb_const_set(mrb, mrb_obj_value(outer), id, mrb_obj_value(c));
+
+ return c;
+}
+
+struct RClass *
+mrb_define_module_under(mrb_state *mrb, struct RClass *outer, const char *name)
+{
+ struct RClass * c;
+ mrb_sym id = mrb_intern(mrb, name);
+
+ if (mrb_const_defined_at(mrb, outer, id)) {
+ c = mrb_class_from_sym(mrb, outer, id);
+ if (c->tt != MRB_TT_MODULE) {
+ mrb_raise(mrb, E_TYPE_ERROR, "%s is not a module", mrb_sym2name(mrb, id));
+ }
+ return c;
+ }
+ c = mrb_module_new(mrb);
+ setup_class(mrb, mrb_obj_value(outer), c, id);
+ mrb_const_set(mrb, mrb_obj_value(outer), id, mrb_obj_value(c));
+
+ return c;
+}
+
+void
+mrb_define_method_raw(mrb_state *mrb, struct RClass *c, mrb_sym mid, struct RProc *p)
+{
+ khash_t(mt) *h = c->mt;
+ khiter_t k;
+ int ret;
+
+ if (!h) h = c->mt = kh_init(mt, mrb);
+ k = kh_put(mt, h, mid, &ret);
+ kh_value(h, k) = p;
+}
+
+void
+mrb_define_method_id(mrb_state *mrb, struct RClass *c, mrb_sym mid, mrb_func_t func, int aspec)
+{
+ struct RProc *p;
+
+ p = mrb_proc_new_cfunc(mrb, func);
+ p->target_class = c;
+ mrb_define_method_raw(mrb, c, mid, p);
+}
+
+void
+mrb_define_method(mrb_state *mrb, struct RClass *c, const char *name, mrb_func_t func, int apec)
+{
+ return mrb_define_method_id(mrb, c, mrb_intern(mrb, name), func, apec);
+}
+
+void
+mrb_define_method_vm(mrb_state *mrb, struct RClass *c, mrb_sym name, mrb_value body)
+{
+ khash_t(mt) *h = c->mt;
+ khiter_t k;
+ int ret;
+
+ if (!h) h = c->mt = kh_init(mt, mrb);
+ k = kh_put(mt, h, name, &ret);
+ kh_value(h, k) = mrb_proc_ptr(body);
+}
+
+int
+mrb_get_args(mrb_state *mrb, const char *format, ...)
+{
+ char c;
+ int i=0;
+ mrb_value *sp = mrb->stack + 1;
+ va_list ap;
+ int argc = mrb->ci->argc;
+ int *argcp;
+
+ va_start(ap, format);
+ if (argc < 0) {
+ struct RArray *a = mrb_ary_ptr(mrb->stack[1]);
+
+ argc = a->len;
+ sp = a->buf;
+ }
+ while ((c = *format++)) {
+ switch (c) {
+ case 'o':
+ {
+ mrb_value *p;
+ p = va_arg(ap, mrb_value*);
+ *p = (argc > i) ? *sp : mrb_nil_value();
+ i++; sp++;
+ }
+ break;
+ case 'i':
+ {
+ mrb_int *p;
+
+ p = va_arg(ap, mrb_int*);
+ *p = (argc > i) ? mrb_fixnum(*sp) : 0;
+ i++; sp++;
+ }
+ break;
+ case 'f':
+ {
+ mrb_float *p;
+
+ p = va_arg(ap, mrb_float*);
+ switch (sp->tt) {
+ case MRB_TT_FLOAT:
+ *p = (argc > i) ? mrb_float(*sp) : 0;
+ break;
+ case MRB_TT_FIXNUM:
+ *p = (argc > i) ? (mrb_float)mrb_fixnum(*sp) : 0;
+ break;
+ default:
+ // error
+ break;
+ }
+ i++; sp++;
+ }
+ break;
+ case 's':
+ {
+ char **ps;
+ size_t *pl;
+ struct RString *s;
+
+ if (argc > i) {
+ s = mrb_str_ptr(*sp);
+ ps = va_arg(ap, char**);
+ *ps = s->buf;
+ pl = va_arg(ap, size_t*);
+ *pl = s->len;
+ }
+ else {
+ *ps = "";
+ *pl = 0;
+ }
+ i++; sp++;
+ }
+ break;
+ case 'a':
+ {
+ mrb_value *var;
+ var = va_arg(ap, mrb_value*);
+ if (argc > i) {
+ if (var) {
+ memcpy(var, sp, sizeof(mrb_value)*(argc-i));
+ }
+ //i = mrb->argc;
+ }
+ else {
+ if (var) *var = mrb_ary_new(mrb);
+ }
+ argcp = va_arg(ap, int*);
+ *argcp = argc-i;
+ goto last_var;
+ }
+ break;
+ case 'b':
+ {
+ struct RProc **p;
+ mrb_value *bp = mrb->stack + 1;
+
+ p = va_arg(ap, struct RProc**);
+ if (mrb->ci->argc > 0) {
+ bp += mrb->ci->argc;
+ }
+ if (mrb_nil_p(*bp)) *p = 0;
+ else *p = mrb_proc_ptr(*bp);
+ }
+ break;
+ case '&':
+ {
+ mrb_value *p, *bp = mrb->stack + 1;
+
+ p = va_arg(ap, mrb_value*);
+ if (mrb->ci->argc > 0) {
+ bp += mrb->ci->argc;
+ }
+ *p = *bp;
+ }
+ break;
+ case '*':
+ {
+ mrb_value **var;
+ var = va_arg(ap, mrb_value**);
+ argcp = va_arg(ap, int*);
+ if (argc > i) {
+ *argcp = argc-i;
+ if (*argcp > 0) {
+ if (var) {
+ *var = sp;
+ }
+ i += *argcp;
+ }
+ }
+ else {
+ *argcp = 0;
+ *var = NULL;
+ }
+ goto last_var;
+ }
+ break;
+ }
+ }
+last_var:
+ va_end(ap);
+ return 0;
+}
+
+static struct RClass*
+boot_defclass(mrb_state *mrb, struct RClass *super)
+{
+ struct RClass *c;
+
+ c = mrb_obj_alloc(mrb, MRB_TT_CLASS, mrb->class_class);
+ c->super = super ? super : mrb->object_class;
+ mrb_field_write_barrier(mrb, (struct RBasic*)c, (struct RBasic*)super);
+ c->mt = kh_init(mt, mrb);
+ return c;
+}
+
+void
+mrb_include_module(mrb_state *mrb, struct RClass *c, struct RClass *m)
+{
+ struct RClass *ic;
+
+ ic = mrb_obj_alloc(mrb, MRB_TT_ICLASS, mrb->class_class);
+ ic->c = m;
+ ic->mt = m->mt;
+ ic->iv = m->iv;
+ ic->super = c->super;
+ c->super = ic;
+ mrb_field_write_barrier(mrb, (struct RBasic*)c, (struct RBasic*)ic);
+}
+
+static mrb_value
+mrb_mod_include(mrb_state *mrb, mrb_value klass)
+{
+ mrb_value mod;
+
+ mrb_get_args(mrb, "o", &mod);
+ mrb_include_module(mrb, mrb_class_ptr(klass), mrb_class_ptr(mod));
+ return mod;
+}
+
+static struct RClass *
+mrb_singleton_class_ptr(mrb_state *mrb, struct RClass *c)
+{
+ struct RClass *sc;
+
+ if (c->tt == MRB_TT_SCLASS) {
+ return c;
+ }
+ sc = mrb_obj_alloc(mrb, MRB_TT_SCLASS, mrb->class_class);
+ sc->mt = 0;
+ sc->super = c;
+ mrb_field_write_barrier(mrb, (struct RBasic*)sc, (struct RBasic*)c);
+
+ return sc;
+}
+
+mrb_value
+mrb_singleton_class(mrb_state *mrb, mrb_value v)
+{
+ struct RBasic *obj;
+
+ switch (mrb_type(v)) {
+ case MRB_TT_FALSE:
+ case MRB_TT_TRUE:
+ case MRB_TT_SYMBOL:
+ case MRB_TT_FIXNUM:
+ case MRB_TT_FLOAT:
+ return mrb_nil_value(); /* should raise TypeError */
+ default:
+ break;
+ }
+ obj = (struct RBasic*)mrb_object(v);
+ obj->c = mrb_singleton_class_ptr(mrb, obj->c);
+ return mrb_obj_value(obj->c);
+}
+
+void
+mrb_define_class_method(mrb_state *mrb, struct RClass *c, const char *name, mrb_func_t func, int aspec)
+{
+ return mrb_define_method_id(mrb, c->c, mrb_intern(mrb, name), func, aspec);
+}
+
+struct RProc*
+mrb_method_search_vm(mrb_state *mrb, struct RClass **cp, mrb_sym mid)
+{
+ khiter_t k;
+ struct RProc *m;
+ struct RClass *c = *cp;
+
+ while (c) {
+ khash_t(mt) *h = c->mt;
+
+ if (h) {
+ k = kh_get(mt, h, mid);
+ if (k != kh_end(h)) {
+ m = kh_value(h, k);
+ if (!m) break;
+ *cp = c;
+ return m;
+ }
+ }
+ c = c->super;
+ }
+ return 0; /* no method */
+}
+
+struct RProc*
+mrb_method_search(mrb_state *mrb, struct RClass* c, mrb_sym mid)
+{
+ struct RProc *m;
+
+ m = mrb_method_search_vm(mrb, &c, mid);
+ if (!m) {
+ mrb_raise(mrb, E_NOMETHOD_ERROR, "no method named %s\n", mrb_sym2name(mrb, mid));
+ }
+ return m;
+}
+
+mrb_value
+mrb_funcall(mrb_state *mrb, mrb_value self, const char *name, int argc,...)
+{
+ mrb_value args[16];
+ va_list ap;
+ int i;
+
+ if (argc == 0) {
+ for (i=0; i<5; i++) {
+ args[i] = mrb_nil_value();
+ }
+ }
+ else {
+ va_start(ap, argc);
+ // assert(argc < 16);
+ for (i=0; i<argc; i++) {
+ args[i] = va_arg(ap, mrb_value);
+ }
+ va_end(ap);
+ }
+ return mrb_funcall_argv(mrb, self, name, argc, args);
+}
+
+
+void
+mrb_obj_call_init(mrb_state *mrb, mrb_value obj, int argc, mrb_value *argv)
+{
+ mrb_funcall_argv(mrb, obj, "initialize", argc, argv);
+}
+
+/*
+ * call-seq:
+ * class.new(args, ...) -> obj
+ *
+ * Calls <code>allocate</code> to create a new object of
+ * <i>class</i>'s class, then invokes that object's
+ * <code>initialize</code> method, passing it <i>args</i>.
+ * This is the method that ends up getting called whenever
+ * an object is constructed using .new.
+ *
+ */
+mrb_value
+mrb_class_new_instance(mrb_state *mrb, int argc, mrb_value *argv, struct RClass * klass)
+{
+ mrb_value obj;
+ struct RClass * c = mrb_obj_alloc(mrb, klass->tt, klass);
+ c->super = klass;
+ obj = mrb_obj_value(c);
+ mrb_obj_call_init(mrb, obj, argc, argv);
+ return obj;
+}
+
+mrb_value
+mrb_class_new_instance_m(mrb_state *mrb, mrb_value klass)
+{
+ mrb_value *argv;
+ struct RProc *b;
+ struct RClass *k = mrb_class_ptr(klass);
+ struct RClass *c;
+ int argc;
+ mrb_value obj;
+
+ mrb_get_args(mrb, "b*", &b, &argv, &argc);
+ c = mrb_obj_alloc(mrb, k->tt, k);
+ c->super = k;
+ obj = mrb_obj_value(c);
+ mrb_funcall_with_block(mrb, obj, "initialize", argc, argv, b);
+
+ return obj;
+}
+
+mrb_value
+mrb_instance_new(mrb_state *mrb, mrb_value cv)
+{
+ struct RClass *c = mrb_class_ptr(cv);
+ struct RProc *b;
+ struct RObject *o;
+ enum mrb_vtype ttype = MRB_INSTANCE_TT(c);
+ mrb_value obj;
+ mrb_value *argv;
+ int argc;
+
+ if (ttype == 0) ttype = MRB_TT_OBJECT;
+ o = mrb_obj_alloc(mrb, ttype, c);
+ obj = mrb_obj_value(o);
+ mrb_get_args(mrb, "b*", &b, &argv, &argc);
+ mrb_funcall_with_block(mrb, obj, "initialize", argc, argv, b);
+
+ return obj;
+}
+
+static mrb_value
+mrb_bob_init(mrb_state *mrb, mrb_value cv)
+{
+ return mrb_nil_value();
+}
+
+static mrb_value
+mrb_bob_not(mrb_state *mrb, mrb_value cv)
+{
+ if (mrb_test(cv))
+ return mrb_false_value();
+ return mrb_true_value();
+}
+
+/* 15.3.1.3.30 */
+/*
+ * call-seq:
+ * obj.method_missing(symbol [, *args] ) -> result
+ *
+ * Invoked by Ruby when <i>obj</i> is sent a message it cannot handle.
+ * <i>symbol</i> is the symbol for the method called, and <i>args</i>
+ * are any arguments that were passed to it. By default, the interpreter
+ * raises an error when this method is called. However, it is possible
+ * to override the method to provide more dynamic behavior.
+ * If it is decided that a particular method should not be handled, then
+ * <i>super</i> should be called, so that ancestors can pick up the
+ * missing method.
+ * The example below creates
+ * a class <code>Roman</code>, which responds to methods with names
+ * consisting of roman numerals, returning the corresponding integer
+ * values.
+ *
+ * class Roman
+ * def romanToInt(str)
+ * # ...
+ * end
+ * def method_missing(methId)
+ * str = methId.id2name
+ * romanToInt(str)
+ * end
+ * end
+ *
+ * r = Roman.new
+ * r.iv #=> 4
+ * r.xxiii #=> 23
+ * r.mm #=> 2000
+ */
+static mrb_value
+mrb_bob_missing(mrb_state *mrb, mrb_value mod)
+{
+ mrb_value name, *a;
+ int alen;
+
+ mrb_get_args(mrb, "o*", &name, &a, &alen);
+ if (!SYMBOL_P(name)) {
+ mrb_raise(mrb, E_TYPE_ERROR, "name should be a symbol");
+ }
+ mrb_raise(mrb, E_NOMETHOD_ERROR, "no method named %s", mrb_sym2name(mrb, mrb_symbol(name)));
+ /* not reached */
+ return mrb_nil_value();
+}
+
+int
+mrb_obj_respond_to(struct RClass* c, mrb_sym mid)
+{
+ khiter_t k;
+
+ while (c) {
+ khash_t(mt) *h = c->mt;
+
+ if (h) {
+ k = kh_get(mt, h, mid);
+ if (k != kh_end(h))
+ return 1; /* exist method */
+ }
+ c = c->super;
+ }
+ return 0; /* no method */
+}
+
+int
+mrb_respond_to(mrb_state *mrb, mrb_value obj, mrb_sym mid)
+{
+ return mrb_obj_respond_to(mrb_class(mrb, obj), mid);
+}
+
+mrb_value
+mrb_class_path(mrb_state *mrb, struct RClass *c)
+{
+ mrb_value path;
+
+ path = mrb_obj_iv_get(mrb, (struct RObject*)c, mrb_intern(mrb, "__classpath__"));
+ if (mrb_nil_p(path)) {
+ struct RClass *outer = mrb_class_outer_module(mrb, c);
+ mrb_sym sym = class_sym(mrb, c, outer);
+ if (outer && outer != mrb->object_class) {
+ mrb_value base = mrb_class_path(mrb, outer);
+ path = mrb_str_plus(mrb, base, mrb_str_new_cstr(mrb, "::"));
+ mrb_str_concat(mrb, path, mrb_str_new_cstr(mrb, mrb_sym2name(mrb, sym)));
+ }
+ else if (sym == 0) {
+ return mrb_nil_value();
+ }
+ else {
+ path = mrb_str_new_cstr(mrb, mrb_sym2name(mrb, sym));
+ }
+ mrb_obj_iv_set(mrb, (struct RObject*)c, mrb_intern(mrb, "__classpath__"), path);
+ }
+ return path;
+}
+
+struct RClass *
+mrb_class_real(struct RClass* cl)
+{
+ while ((cl->tt == MRB_TT_SCLASS) || (cl->tt == MRB_TT_ICLASS)) {
+ cl = cl->super;
+ }
+ return cl;
+}
+
+const char*
+mrb_class_name(mrb_state *mrb, struct RClass* c)
+{
+ mrb_value path = mrb_class_path(mrb, c);
+ if (mrb_nil_p(path)) return 0;
+ return mrb_str_ptr(path)->buf;
+}
+
+const char*
+mrb_obj_classname(mrb_state *mrb, mrb_value obj)
+{
+ return mrb_class_name(mrb, mrb_class(mrb, obj));
+}
+
+/*!
+ * Ensures a class can be derived from super.
+ *
+ * \param super a reference to an object.
+ * \exception TypeError if \a super is not a Class or \a super is a singleton class.
+ */
+void
+mrb_check_inheritable(mrb_state *mrb, struct RClass *super)
+{
+ if (super->tt != MRB_TT_CLASS) {
+ mrb_raise(mrb, E_TYPE_ERROR, "superclass must be a Class (%s given)",
+ mrb_obj_classname(mrb, mrb_obj_value(super)));
+ }
+ if (super->tt == MRB_TT_SCLASS) {
+ mrb_raise(mrb, E_TYPE_ERROR, "can't make subclass of singleton class");
+ }
+ if (super == mrb->class_class) {
+ mrb_raise(mrb, E_TYPE_ERROR, "can't make subclass of Class");
+ }
+}
+
+/*!
+ * Creates a new class.
+ * \param super a class from which the new class derives.
+ * \exception TypeError \a super is not inheritable.
+ * \exception TypeError \a super is the Class class.
+ */struct RClass *
+mrb_class_new(mrb_state *mrb, struct RClass *super)
+{
+ struct RClass *c;
+
+ if (super) {
+// mrb_check_type(mrb, super, MRB_TT_CLASS);
+ mrb_check_inheritable(mrb, super);
+ }
+ c = boot_defclass(mrb, super);
+ make_metaclass(mrb, c);
+
+ return c;
+}
+
+/*!
+ * Creates a new module.
+ */
+struct RClass *
+mrb_module_new(mrb_state *mrb)
+{
+ struct RClass *m = mrb_obj_alloc(mrb, MRB_TT_MODULE, mrb->module_class);
+ m->mt = kh_init(mt, mrb);
+ make_metaclass(mrb, m);
+
+ return m;
+}
+
+/*
+ * call-seq:
+ * obj.class => class
+ *
+ * Returns the class of <i>obj</i>, now preferred over
+ * <code>Object#type</code>, as an object's type in Ruby is only
+ * loosely tied to that object's class. This method must always be
+ * called with an explicit receiver, as <code>class</code> is also a
+ * reserved word in Ruby.
+ *
+ * 1.class #=> Fixnum
+ * self.class #=> Object
+ */
+
+struct RClass*
+mrb_obj_class(mrb_state *mrb, mrb_value obj)
+{
+ return mrb_class_real(mrb_class(mrb, obj));
+}
+
+void
+mrb_alias_method(mrb_state *mrb, struct RClass *c, mrb_sym a, mrb_sym b)
+{
+ struct RProc *m = mrb_method_search(mrb, c, b);
+
+ mrb_define_method_vm(mrb, c, a, mrb_obj_value(m));
+}
+
+/*!
+ * Defines an alias of a method.
+ * \param klass the class which the original method belongs to
+ * \param name1 a new name for the method
+ * \param name2 the original name of the method
+ */
+void
+mrb_define_alias(mrb_state *mrb, struct RClass *klass, const char *name1, const char *name2)
+{
+ mrb_alias_method(mrb, klass, mrb_intern(mrb, name1), mrb_intern(mrb, name2));
+}
+
+/*
+ * call-seq:
+ * mod.to_s -> string
+ *
+ * Return a string representing this module or class. For basic
+ * classes and modules, this is the name. For singletons, we
+ * show information on the thing we're attached to as well.
+ */
+
+static mrb_value
+mrb_mod_to_s(mrb_state *mrb, mrb_value klass)
+{
+ //if (FL_TEST(klass, FL_SINGLETON)) {
+ if (mrb_type(klass) == MRB_TT_SCLASS) {
+ mrb_value s = mrb_usascii_str_new2(mrb, "#<");
+ mrb_value v = mrb_iv_get(mrb, klass, mrb_intern(mrb, "__attached__"));
+
+ mrb_str_cat2(mrb, s, "Class:");
+ switch (mrb_type(v)) {
+ case MRB_TT_CLASS:
+ case MRB_TT_MODULE:
+ mrb_str_append(mrb, s, mrb_inspect(mrb, v));
+ break;
+ default:
+ mrb_str_append(mrb, s, mrb_any_to_s(mrb, v));
+ break;
+ }
+ mrb_str_cat2(mrb, s, ">");
+
+ return s;
+ }
+ else {
+ struct RClass *c = mrb_class_ptr(klass);
+ const char *cn = mrb_class_name(mrb, c);
+
+ if (!cn) {
+ char buf[256];
+
+
+ switch (mrb_type(klass)) {
+ case MRB_TT_CLASS:
+ snprintf(buf, 256, "#<Class:%p>", c);
+ break;
+ case MRB_TT_MODULE:
+ snprintf(buf, 256, "#<Module:%p>", c);
+ break;
+ }
+ return mrb_str_dup(mrb, mrb_str_new_cstr(mrb, buf));
+ }
+ else {
+ return mrb_str_dup(mrb, mrb_str_new_cstr(mrb, cn));
+ }
+ }
+}
+
+mrb_value
+mrb_mod_alias(mrb_state *mrb, mrb_value mod)
+{
+ struct RClass *c = mrb_class_ptr(mod);
+ mrb_value new, old;
+
+ mrb_get_args(mrb, "oo", &new, &old);
+ mrb_alias_method(mrb, c, mrb_symbol(new), mrb_symbol(old));
+ return mrb_nil_value();
+}
+
+
+void
+mrb_undef_method(mrb_state *mrb, struct RClass *c, mrb_sym a)
+{
+ mrb_value m;
+
+ m.tt = MRB_TT_PROC;
+ m.value.p = 0;
+ mrb_define_method_vm(mrb, c, a, m);
+}
+
+mrb_value
+mrb_mod_undef(mrb_state *mrb, mrb_value mod)
+{
+ struct RClass *c = mrb_class_ptr(mod);
+ int argc;
+ mrb_value *argv;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+ while (argc--) {
+ mrb_undef_method(mrb, c, mrb_symbol(*argv));
+ argv++;
+ }
+ return mrb_nil_value();
+}
+
+
+static mrb_value
+mrb_mod_eqq(mrb_state *mrb, mrb_value mod)
+{
+ mrb_value obj;
+
+ mrb_get_args(mrb, "o", &obj);
+ if (!mrb_obj_is_kind_of(mrb, obj, mrb_class_ptr(mod)))
+ return mrb_false_value();
+ return mrb_true_value();
+}
+
+void
+mrb_init_class(mrb_state *mrb)
+{
+ struct RClass *bob; /* BasicObject */
+ struct RClass *obj; /* Object */
+ struct RClass *mod; /* Module */
+ struct RClass *cls; /* Class */
+ //struct RClass *krn; /* Kernel */
+
+ /* boot class hierarchy */
+ bob = boot_defclass(mrb, 0);
+ obj = boot_defclass(mrb, bob); mrb->object_class = obj;
+ mod = boot_defclass(mrb, obj); mrb->module_class = mod;/* obj -> mod */
+ cls = boot_defclass(mrb, mod); mrb->class_class = cls; /* obj -> cls */
+ /* fix-up loose ends */
+ bob->c = obj->c = mod->c = cls->c = cls;
+ make_metaclass(mrb, bob);
+ make_metaclass(mrb, obj);
+ make_metaclass(mrb, mod);
+ make_metaclass(mrb, cls);
+
+ /* name basic classes */
+ mrb_define_const(mrb, obj, "BasicObject", mrb_obj_value(bob));
+ mrb_define_const(mrb, obj, "Object", mrb_obj_value(obj));
+ mrb_define_const(mrb, obj, "Module", mrb_obj_value(mod));
+ mrb_define_const(mrb, obj, "Class", mrb_obj_value(cls));
+
+ /* name each classes */
+ mrb_name_class(mrb, bob, mrb_intern(mrb, "BasicObject"));
+ mrb_name_class(mrb, obj, mrb_intern(mrb, "Object"));
+ mrb_name_class(mrb, mod, mrb_intern(mrb, "Module"));
+ mrb_name_class(mrb, cls, mrb_intern(mrb, "Class"));
+
+ MRB_SET_INSTANCE_TT(mod, MRB_TT_MODULE);
+ MRB_SET_INSTANCE_TT(cls, MRB_TT_CLASS);
+ mrb_define_method(mrb, bob, "initialize", mrb_bob_init, ARGS_NONE());
+ mrb_define_method(mrb, bob, "!", mrb_bob_not, ARGS_NONE());
+ mrb_define_method(mrb, bob, "method_missing", mrb_bob_missing, ARGS_ANY()); /* 15.3.1.3.30 */
+ mrb_define_method(mrb, cls, "new", mrb_instance_new, ARGS_ANY());
+ mrb_define_method(mrb, mod, "include", mrb_mod_include, ARGS_REQ(1));
+
+ mrb_define_method(mrb, mod, "to_s", mrb_mod_to_s, ARGS_NONE());
+ mrb_define_method(mrb, mod, "alias_method", mrb_mod_alias, ARGS_ANY());
+ mrb_define_method(mrb, mod, "undef_method", mrb_mod_undef, ARGS_ANY());
+
+ mrb_define_method(mrb, mod, "===", mrb_mod_eqq, ARGS_REQ(1));
+}
diff --git a/src/codegen.c b/src/codegen.c
new file mode 100644
index 000000000..c44e619e9
--- /dev/null
+++ b/src/codegen.c
@@ -0,0 +1,2273 @@
+#undef CODEGEN_TEST
+#define CODEGEN_DUMP
+
+#include "mruby.h"
+#include "irep.h"
+#include "compile.h"
+#include "mruby/proc.h"
+#include "opcode.h"
+#include "mruby/string.h"
+#include <string.h>
+#include <stdlib.h>
+
+typedef mrb_ast_node node;
+typedef struct mrb_parser_state parser_state;
+
+struct loopinfo {
+ enum looptype {
+ LOOP_NORMAL,
+ LOOP_BLOCK,
+ LOOP_FOR,
+ LOOP_BEGIN,
+ LOOP_RESCUE,
+ } type;
+ int pc1, pc2, pc3, acc;
+ int ensure_level;
+ struct loopinfo *prev;
+};
+
+typedef struct scope {
+ mrb_state *mrb;
+ mrb_pool *mpool;
+ jmp_buf jmp;
+
+ struct scope *prev;
+
+ node *lv;
+
+ int sp;
+ int pc;
+ int lastlabel;
+ int ainfo;
+
+ struct loopinfo *loop;
+ int ensure_level;
+
+ mrb_code *iseq;
+ int icapa;
+
+ mrb_value *pool;
+ int plen;
+ int pcapa;
+
+ mrb_sym *syms;
+ int slen;
+
+ int nlocals;
+ int nregs;
+
+ int idx;
+} codegen_scope;
+
+static codegen_scope* scope_new(mrb_state *mrb, codegen_scope *prev, node *lv);
+static void scope_finish(codegen_scope *s, int idx);
+static struct loopinfo *loop_push(codegen_scope *s, enum looptype t);
+static void loop_break(codegen_scope *s, node *tree);
+static void loop_pop(codegen_scope *s, int val);
+
+static void gen_assignment(codegen_scope *s, node *node, int sp, int val);
+static void gen_vmassignment(codegen_scope *s, node *tree, int rhs, int val);
+
+static void codegen(codegen_scope *s, node *tree, int val);
+
+static void
+codegen_error(codegen_scope *s, const char *message)
+{
+ if (!s) return;
+ while (s->prev) {
+ mrb_pool_close(s->mpool);
+ s = s->prev;
+ }
+ mrb_pool_close(s->mpool);
+ fprintf(stderr, "codegen error: %s\n", message);
+ longjmp(s->jmp, 1);
+}
+
+static void*
+codegen_palloc(codegen_scope *s, size_t len)
+{
+ void *p = mrb_pool_alloc(s->mpool, len);
+
+ if (!p) codegen_error(s, "pool memory allocation");
+ return p;
+}
+
+void*
+codegen_malloc(codegen_scope *s, size_t len)
+{
+ void *p = mrb_malloc(s->mrb, len);
+
+ if (!p) codegen_error(s, "mrb_malloc");
+ return p;
+}
+
+void*
+codegen_realloc(codegen_scope *s, void *p, size_t len)
+{
+ p = mrb_realloc(s->mrb, p, len);
+
+ if (!p && len > 0) codegen_error(s, "mrb_realloc");
+ return p;
+}
+
+static int
+new_label(codegen_scope *s)
+{
+ s->lastlabel = s->pc;
+ return s->pc;
+}
+
+static inline void
+genop(codegen_scope *s, mrb_code i)
+{
+ if (s->pc == s->icapa) {
+ s->icapa *= 2;
+ s->iseq = codegen_realloc(s, s->iseq, sizeof(mrb_code)*s->icapa);
+ }
+ s->iseq[s->pc] = i;
+ s->pc++;
+}
+
+static void
+genop_peep(codegen_scope *s, mrb_code i, int val)
+{
+ // peephole optimization
+ if (!val && s->lastlabel != s->pc && s->pc > 0) {
+ mrb_code i0 = s->iseq[s->pc-1];
+ int c1 = GET_OPCODE(i);
+ int c0 = GET_OPCODE(i0);
+
+ switch (c1) {
+ case OP_MOVE:
+ switch (c0) {
+ case OP_MOVE:
+ if (GETARG_B(i) == GETARG_A(i0) && GETARG_A(i) == GETARG_B(i0) && GETARG_A(i) >= s->nlocals) {
+ // skip swapping OP_MOVE
+ return;
+ }
+ break;
+ case OP_LOADI:
+ if (GETARG_B(i) == GETARG_A(i0) && GETARG_A(i0) >= s->nlocals) {
+ s->iseq[s->pc-1] = MKOP_AsBx(OP_LOADI, GETARG_A(i), GETARG_sBx(i0));
+ return;
+ }
+ break;
+ case OP_ARRAY:
+ case OP_HASH:
+ case OP_RANGE:
+ case OP_AREF:
+ case OP_GETUPVAR:
+ if (GETARG_B(i) == GETARG_A(i0) && GETARG_A(i0) >= s->nlocals) {
+ s->iseq[s->pc-1] = MKOP_ABC(c0, GETARG_A(i), GETARG_B(i0), GETARG_C(i0));
+ return;
+ }
+ break;
+ case OP_LOADSYM:
+ case OP_GETGLOBAL:
+ case OP_GETIV:
+ case OP_GETCV:
+ case OP_GETCONST:
+ case OP_GETSPECIAL:
+ case OP_LOADL:
+ case OP_STRING:
+ case OP_GETMCNST:
+ if (GETARG_B(i) == GETARG_A(i0) && GETARG_A(i0) >= s->nlocals) {
+ s->iseq[s->pc-1] = MKOP_ABx(c0, GETARG_A(i), GETARG_Bx(i0));
+ return;
+ }
+ break;
+ case OP_SCLASS:
+ if (GETARG_B(i) == GETARG_A(i0) && GETARG_A(i0) >= s->nlocals) {
+ s->iseq[s->pc-1] = MKOP_AB(c0, GETARG_A(i), GETARG_B(i0));
+ return;
+ }
+ break;
+ case OP_LOADNIL:
+ case OP_LOADSELF:
+ case OP_LOADT:
+ case OP_LOADF:
+ case OP_OCLASS:
+ if (GETARG_B(i) == GETARG_A(i0) && GETARG_A(i0) >= s->nlocals) {
+ s->iseq[s->pc-1] = MKOP_A(c0, GETARG_A(i));
+ return;
+ }
+ break;
+ }
+ break;
+ case OP_SETIV:
+ case OP_SETCV:
+ case OP_SETCONST:
+ case OP_SETMCNST:
+ switch (c0) {
+ case OP_MOVE:
+ if (GETARG_A(i) == GETARG_A(i0)) {
+ s->iseq[s->pc-1] = MKOP_ABx(c1, GETARG_B(i0), GETARG_Bx(i));
+ return;
+ }
+ break;
+ }
+ break;
+ case OP_SETUPVAR:
+ switch (c0) {
+ case OP_MOVE:
+ if (GETARG_A(i) == GETARG_A(i0)) {
+ s->iseq[s->pc-1] = MKOP_ABC(c1, GETARG_B(i0), GETARG_B(i), GETARG_C(i));
+ return;
+ }
+ break;
+ }
+ break;
+ case OP_EPOP:
+ if (c0 == OP_EPOP) {
+ s->iseq[s->pc-1] = MKOP_A(OP_EPOP, GETARG_A(i0)+GETARG_A(i));
+ return;
+ }
+ break;
+ case OP_POPERR:
+ if (c0 == OP_POPERR) {
+ s->iseq[s->pc-1] = MKOP_A(OP_POPERR, GETARG_A(i0)+GETARG_A(i));
+ return;
+ }
+ break;
+ }
+ }
+ genop(s, i);
+}
+
+static void
+scope_error(codegen_scope *s)
+{
+ exit(1);
+}
+
+static inline void
+dispatch(codegen_scope *s, int pc)
+{
+ int diff = s->pc - pc;
+ mrb_code i = s->iseq[pc];
+ int c = GET_OPCODE(i);
+
+ s->lastlabel = s->pc;
+ switch (c) {
+ case OP_JMP:
+ case OP_JMPIF:
+ case OP_JMPNOT:
+ case OP_ONERR:
+ break;
+ default:
+ fprintf(stderr, "bug: dispatch on non JMP op\n");
+ scope_error(s);
+ }
+ s->iseq[pc] = MKOP_AsBx(c, GETARG_A(i), diff);
+}
+
+static void
+dispatch_linked(codegen_scope *s, int pc)
+{
+ mrb_code i;
+ int pos;
+
+ if (!pc) return;
+ for (;;) {
+ i = s->iseq[pc];
+ pos = GETARG_sBx(i);
+ dispatch(s, pc);
+ if (!pos) break;
+ pc = pos;
+ }
+}
+
+#define nregs_update do {if (s->sp > s->nregs) s->nregs = s->sp;} while (0)
+static void
+push_(codegen_scope *s)
+{
+ if (s->sp > 511) {
+ codegen_error(s, "too complex expression");
+ }
+ s->sp++;
+ nregs_update;
+}
+#if 0
+static void
+push_n_(codegen_scope *s, int n)
+{
+ if (s->sp + n > 511) {
+ codegen_error(s, "too complex expression");
+ }
+ s->sp += n;
+ nregs_update;
+}
+#endif
+
+#define push() push_(s)
+#define push_n(n) push_n_(s, n)
+#define pop() (s->sp--)
+#define pop_n(n) (s->sp-=(n))
+#define cursp() (s->sp)
+
+static inline int
+new_lit(codegen_scope *s, mrb_value val)
+{
+ int i;
+
+ for (i=0; i<s->plen; i++) {
+ if (memcmp(&s->pool[i], &val, sizeof(mrb_value)) == 0) return i;
+ }
+ if (s->plen == s->pcapa) {
+ s->pcapa *= 2;
+ s->pool = codegen_realloc(s, s->pool, sizeof(mrb_value)*s->pcapa);
+ }
+ s->pool[s->plen] = val;
+ return s->plen++;
+}
+
+static inline int
+new_msym(codegen_scope *s, mrb_sym sym)
+{
+ int i, len;
+
+ len = s->slen;
+ if (len > 255) len = 255;
+ for (i=0; i<len; i++) {
+ if (s->syms[i] == sym) return i;
+ if (s->syms[i] == 0) break;
+ }
+ if (i > 255) {
+ codegen_error(s, "too many symbols (max 256)");
+ }
+ s->syms[i] = sym;
+ if (i == s->slen) s->slen++;
+ return i;
+}
+
+static inline int
+new_sym(codegen_scope *s, mrb_sym sym)
+{
+ int i;
+
+ for (i=0; i<s->slen; i++) {
+ if (s->syms[i] == sym) return i;
+ }
+ if (s->slen > 125 && s->slen < 256) {
+ s->syms = codegen_realloc(s, s->syms, sizeof(mrb_sym)*65536);
+ memset(s->syms+s->slen, 0, sizeof(mrb_sym)*(256-s->slen));
+ s->slen = 256;
+ }
+ s->syms[s->slen] = sym;
+ return s->slen++;
+}
+
+static int
+node_len(node *tree)
+{
+ int n = 0;
+
+ while (tree) {
+ n++;
+ tree = tree->cdr;
+ }
+ return n;
+}
+
+#define lv_name(lv) ((mrb_sym)(lv)->car)
+static int
+lv_idx(codegen_scope *s, mrb_sym id)
+{
+ node *lv = s->lv;
+ int n = 1;
+
+ while (lv) {
+ if (lv_name(lv) == id) return n;
+ n++;
+ lv = lv->cdr;
+ }
+ return 0;
+}
+
+#define NOVAL 0
+#define VAL 1
+
+static void
+for_body(codegen_scope *s, node *tree)
+{
+ codegen_scope *prev = s;
+ int idx, base = s->idx;
+ struct loopinfo *lp;
+ node *n2;
+ mrb_code c;
+
+ // generate receiver
+ codegen(s, tree->cdr->car, VAL);
+ // generate loop-block
+ s = scope_new(s->mrb, s, tree->car);
+ idx = s->idx;
+
+ lp = loop_push(s, LOOP_FOR);
+ lp->pc1 = new_label(s);
+
+ // generate loop variable
+ n2 = tree->car;
+ if (n2->car && !n2->car->cdr && !n2->cdr) {
+ genop(s, MKOP_Ax(OP_ENTER, 1<<18));
+ gen_assignment(s, n2->car->car, 1, NOVAL);
+ }
+ else {
+ genop(s, MKOP_Ax(OP_ENTER, 1<<18));
+ gen_vmassignment(s, n2, 1, VAL);
+ }
+ codegen(s, tree->cdr->cdr->car, VAL);
+ pop();
+ c = s->iseq[s->pc-1];
+ if (GET_OPCODE(c) != OP_RETURN || GETARG_B(c) != OP_R_NORMAL || s->pc == s->lastlabel) {
+ genop(s, MKOP_AB(OP_RETURN, cursp(), OP_R_NORMAL));
+ }
+ loop_pop(s, NOVAL);
+ scope_finish(s, idx);
+ s = prev;
+ genop(s, MKOP_Abc(OP_LAMBDA, cursp(), idx - base, OP_L_BLOCK));
+ pop();
+ idx = new_msym(s, mrb_intern(s->mrb, "each"));
+ genop(s, MKOP_ABC(OP_SEND, cursp(), idx, 0));
+}
+
+static int
+lambda_body(codegen_scope *s, node *tree, int blk)
+{
+ int idx, base = s->idx;
+ mrb_code c;
+
+ s = scope_new(s->mrb, s, tree->car);
+ idx = s->idx;
+
+ if (blk) {
+ struct loopinfo *lp = loop_push(s, LOOP_BLOCK);
+ lp->pc1 = new_label(s);
+ }
+ tree = tree->cdr;
+ if (tree->car) {
+ int ma, oa, ra, pa, ka, kd, ba, a;
+ int pos, i;
+ node *n, *opt;
+
+ ma = node_len(tree->car->car);
+ n = tree->car->car;
+ while (n) {
+ n = n->cdr;
+ }
+ oa = node_len(tree->car->cdr->car);
+ ra = tree->car->cdr->cdr->car ? 1 : 0;
+ pa = node_len(tree->car->cdr->cdr->cdr->car);
+ ka = kd = 0;
+ ba = tree->car->cdr->cdr->cdr->cdr ? 1 : 0;
+
+ a = ((ma & 0x1f) << 18)
+ | ((oa & 0x1f) << 13)
+ | ((ra & 1) << 12)
+ | ((pa & 0x1f) << 7)
+ | ((ka & 0x1f) << 2)
+ | ((kd & 1)<< 1)
+ | (ba & 1);
+ s->ainfo = (((ma+oa) & 0x3f) << 6) /* (12bits = 6:1:5) */
+ | ((ra & 1) << 5)
+ | (pa & 0x1f);
+ genop(s, MKOP_Ax(OP_ENTER, a));
+ pos = new_label(s);
+ for (i=0; i<oa; i++) {
+ new_label(s);
+ genop(s, MKOP_Ax(OP_JMP, 0));
+ }
+ if (oa > 0) {
+ genop(s, MKOP_Ax(OP_JMP, 0));
+ }
+ opt = tree->car->cdr->car;
+ i = 0;
+ while (opt) {
+ int idx;
+
+ dispatch(s, pos+i);
+ codegen(s, opt->car->cdr, VAL);
+ idx = lv_idx(s, (mrb_sym)opt->car->car);
+ pop();
+ genop_peep(s, MKOP_AB(OP_MOVE, idx, cursp()), NOVAL);
+ i++;
+ opt = opt->cdr;
+ }
+ if (oa > 0) {
+ dispatch(s, pos+i);
+ }
+ }
+ codegen(s, tree->cdr->car, VAL);
+ pop();
+ c = s->iseq[s->pc-1];
+ if (GET_OPCODE(c) != OP_RETURN || GETARG_B(c) != OP_R_NORMAL || s->pc == s->lastlabel) {
+ genop(s, MKOP_AB(OP_RETURN, cursp(), OP_R_NORMAL));
+ }
+ if (blk) {
+ loop_pop(s, NOVAL);
+ }
+ scope_finish(s, idx);
+
+ return idx - base;
+}
+
+static int
+scope_body(codegen_scope *s, node *tree)
+{
+ codegen_scope *scope = scope_new(s->mrb, s, tree->car);
+ int idx = scope->idx;
+
+ if (!s->iseq) {
+ codegen(scope, tree->cdr, NOVAL);
+ genop(scope, MKOP_A(OP_STOP, 0));
+ }
+ else {
+ codegen(scope, tree->cdr, VAL);
+ genop(scope, MKOP_AB(OP_RETURN, cursp(), OP_R_NORMAL));
+ }
+ scope_finish(scope, idx);
+
+ return idx - s->idx;
+}
+
+static int
+nosplat(node *t)
+{
+ while (t) {
+ if ((intptr_t)t->car->car == NODE_SPLAT) return 0;
+ t = t->cdr;
+ }
+ return 1;
+}
+
+static mrb_sym
+attrsym(codegen_scope *s, mrb_sym a)
+{
+ const char *name = mrb_sym2name(s->mrb, a);
+ char *name2;
+ size_t len = strlen(name);
+
+ name2 = codegen_palloc(s, len+1);
+ strcpy(name2, name);
+ name2[len] = '=';
+ name2[len+1] = '\0';
+
+ return mrb_intern(s->mrb, name2);
+}
+
+static int
+gen_values(codegen_scope *s, node *t)
+{
+ int n = 0;
+
+ while (t) {
+ if ((intptr_t)t->car->car == NODE_SPLAT) { // splat mode
+ pop_n(n);
+ genop(s, MKOP_ABC(OP_ARRAY, cursp(), cursp(), n));
+ push();
+ codegen(s, t->car, VAL);
+ pop(); pop();
+ genop(s, MKOP_AB(OP_ARYCAT, cursp(), cursp()+1));
+ t = t->cdr;
+ while (t) {
+ push();
+ codegen(s, t->car, VAL);
+ pop(); pop();
+ if ((intptr_t)t->car->car == NODE_SPLAT) {
+ genop(s, MKOP_AB(OP_ARYCAT, cursp(), cursp()+1));
+ }
+ else {
+ genop(s, MKOP_AB(OP_ARYPUSH, cursp(), cursp()+1));
+ }
+ t = t->cdr;
+ }
+ return -1;
+ }
+ // normal (no splat) mode
+ codegen(s, t->car, VAL);
+ n++;
+ t = t->cdr;
+ }
+ return n;
+}
+
+#define CALL_MAXARGS 127
+
+static void
+gen_call(codegen_scope *s, node *tree, mrb_sym name, int sp, int val)
+{
+ mrb_sym sym = name ? name : (mrb_sym)tree->cdr->car;
+ int idx;
+ int n = 0, noop = 0, sendv = 0;
+
+ codegen(s, tree->car, VAL); /* receiver */
+ idx = new_msym(s, sym);
+ tree = tree->cdr->cdr->car;
+ if (tree) {
+ n = gen_values(s, tree->car);
+ if (n < 0) {
+ n = noop = sendv = 1;
+ push();
+ }
+ }
+ if (sp) {
+ if (sendv) {
+ pop();
+ genop(s, MKOP_AB(OP_ARYPUSH, cursp(), sp));
+ push();
+ }
+ else {
+ genop(s, MKOP_AB(OP_MOVE, cursp(), sp));
+ push();
+ n++;
+ }
+ }
+ if (tree && tree->cdr) {
+ noop = 1;
+ codegen(s, tree->cdr, VAL);
+ pop();
+ }
+ else {
+ genop(s, MKOP_A(OP_LOADNIL, cursp()));
+ }
+ pop_n(n+1);
+ {
+ const char *name = mrb_sym2name(s->mrb, sym);
+
+ if (!noop && name[0] == '+' && strlen(name) == 1) {
+ genop(s, MKOP_ABC(OP_ADD, cursp(), idx, n));
+ }
+ else if (!noop && name[0] == '-' && strlen(name) == 1) {
+ genop(s, MKOP_ABC(OP_SUB, cursp(), idx, n));
+ }
+ else if (!noop && name[0] == '<' && strlen(name) == 1) {
+ genop(s, MKOP_ABC(OP_LT, cursp(), idx, n));
+ }
+ else if (!noop && name[0] == '<' && strlen(name) == 2 && name[1] == '=') {
+ genop(s, MKOP_ABC(OP_LE, cursp(), idx, n));
+ }
+ else if (!noop && name[0] == '>' && strlen(name) == 1) {
+ genop(s, MKOP_ABC(OP_GT, cursp(), idx, n));
+ }
+ else if (!noop && name[0] == '>' && strlen(name) == 2 && name[1] == '=') {
+ genop(s, MKOP_ABC(OP_GE, cursp(), idx, n));
+ }
+ else {
+ if (sendv) n = CALL_MAXARGS;
+ genop(s, MKOP_ABC(OP_SEND, cursp(), idx, n));
+ }
+ }
+ if (val) {
+ push();
+ }
+}
+
+static void
+gen_assignment(codegen_scope *s, node *node, int sp, int val)
+{
+ int idx;
+ int type = (intptr_t)node->car;
+
+ node = node->cdr;
+ switch ((intptr_t)type) {
+ case NODE_GVAR:
+ idx = new_sym(s, (mrb_sym)node);
+ genop_peep(s, MKOP_ABx(OP_SETGLOBAL, sp, idx), val);
+ break;
+ case NODE_LVAR:
+ idx = lv_idx(s, (mrb_sym)node);
+ if (idx > 0) {
+ if (idx != sp) {
+ genop_peep(s, MKOP_AB(OP_MOVE, idx, sp), val);
+ }
+ break;
+ }
+ else { /* upvar */
+ int lv = 0;
+ codegen_scope *up = s->prev;
+
+ while (up) {
+ idx = lv_idx(up, (mrb_sym)node);
+ if (idx > 0) {
+ genop_peep(s, MKOP_ABC(OP_SETUPVAR, sp, idx, lv), val);
+ break;
+ }
+ lv++;
+ up = up->prev;
+ }
+ // assert(up!=0);
+ }
+ break;
+ case NODE_IVAR:
+ idx = new_sym(s, (mrb_sym)node);
+ genop_peep(s, MKOP_ABx(OP_SETIV, sp, idx), val);
+ break;
+ case NODE_CVAR:
+ idx = new_sym(s, (mrb_sym)node);
+ genop_peep(s, MKOP_ABx(OP_SETCV, sp, idx), val);
+ break;
+ case NODE_CONST:
+ idx = new_sym(s, (mrb_sym)node);
+ genop_peep(s, MKOP_ABx(OP_SETCONST, sp, idx), val);
+ break;
+ case NODE_COLON2:
+ idx = new_sym(s, (mrb_sym)node->cdr);
+ genop_peep(s, MKOP_AB(OP_MOVE, cursp(), sp), NOVAL);
+ push();
+ codegen(s, node->car, VAL);
+ pop_n(2);
+ genop_peep(s, MKOP_ABx(OP_SETMCNST, cursp(), idx), val);
+ break;
+
+ case NODE_CALL:
+ push();
+ gen_call(s, node, attrsym(s, (mrb_sym)node->cdr->car), sp, val);
+ val = NOVAL; /* push should have done in gen_call() */
+ break;
+
+ default:
+ printf("unknown lhs %d\n", type);
+ break;
+ }
+ if (val) push();
+}
+
+static void
+gen_vmassignment(codegen_scope *s, node *tree, int rhs, int val)
+{
+ int n = 0, post = 0;
+ node *t, *p;
+
+ if (tree->car) { /* pre */
+ t = tree->car;
+ n = 0;
+ while (t) {
+ genop(s, MKOP_ABC(OP_AREF, cursp(), rhs, n));
+ gen_assignment(s, t->car, cursp(), NOVAL);
+ n++;
+ t = t->cdr;
+ }
+ }
+ t = tree->cdr;
+ if (t) {
+ if (t->cdr) { /* post count */
+ p = t->cdr->car;
+ while (p) {
+ post++;
+ p = p->cdr;
+ }
+ }
+ if (val) {
+ genop(s, MKOP_AB(OP_MOVE, cursp(), rhs));
+ push();
+ }
+ pop();
+ genop(s, MKOP_ABC(OP_APOST, cursp(), n, post));
+ n = 1;
+ if (t->car) { /* rest */
+ gen_assignment(s, t->car, cursp(), NOVAL);
+ }
+ if (t->cdr && t->cdr->car) {
+ t = t->cdr->car;
+ while (t) {
+ gen_assignment(s, t->car, cursp()+n, NOVAL);
+ t = t->cdr;
+ n++;
+ }
+ }
+ }
+}
+
+static void
+raise_error(codegen_scope *s, const char *msg)
+{
+ int idx = new_lit(s, mrb_str_new_cstr(s->mrb, msg));
+
+ genop(s, MKOP_ABx(OP_ERR, 0, idx));
+}
+
+static void
+codegen(codegen_scope *s, node *tree, int val)
+{
+ int nt;
+
+ if (!tree) return;
+ nt = (intptr_t)tree->car;
+ tree = tree->cdr;
+ switch (nt) {
+ case NODE_BEGIN:
+ while (tree) {
+ codegen(s, tree->car, tree->cdr ? NOVAL : val);
+ tree = tree->cdr;
+ }
+ break;
+
+ case NODE_RESCUE:
+ {
+ int onerr, noexc, exend, pos1, pos2, tmp;
+ struct loopinfo *lp;
+
+ onerr = new_label(s);
+ genop(s, MKOP_Bx(OP_ONERR, 0));
+ lp = loop_push(s, LOOP_BEGIN);
+ lp->pc1 = onerr;
+ if (tree->car) {
+ codegen(s, tree->car, val);
+ }
+ lp->type = LOOP_RESCUE;
+ noexc = new_label(s);
+ genop(s, MKOP_Bx(OP_JMP, 0));
+ dispatch(s, onerr);
+ tree = tree->cdr;
+ exend = 0;
+ pos1 = 0;
+ if (tree->car) {
+ node *n2 = tree->car;
+ int exc = cursp();
+
+ genop(s, MKOP_A(OP_RESCUE, exc));
+ push();
+ while (n2) {
+ node *n3 = n2->car;
+
+ if (pos1) dispatch(s, pos1);
+ if (n3->car) {
+ node *n4 = n3->car;
+
+ pos2 = 0;
+ while (n4) {
+ codegen(s, n4->car, VAL);
+ genop(s, MKOP_AB(OP_MOVE, cursp(), exc));
+ push();
+ genop(s, MKOP_A(OP_LOADNIL, cursp()));
+ pop(); pop();
+ genop(s, MKOP_ABC(OP_SEND, cursp(), new_msym(s, mrb_intern(s->mrb, "===")), 1));
+ tmp = new_label(s);
+ genop(s, MKOP_AsBx(OP_JMPIF, cursp(), pos2));
+ pos2 = tmp;
+ n4 = n4->cdr;
+ }
+ pos1 = new_label(s);
+ genop(s, MKOP_Bx(OP_JMP, 0));
+ dispatch_linked(s, pos2);
+ }
+ pop();
+ if (n3->cdr->car) {
+ gen_assignment(s, n3->cdr->car, exc, NOVAL);
+ }
+ if (n3->cdr->cdr->car) {
+ codegen(s, n3->cdr->cdr->car, val);
+ }
+ tmp = new_label(s);
+ genop(s, MKOP_AsBx(OP_JMP, cursp(), exend));
+ exend = tmp;
+ n2 = n2->cdr;
+ push();
+ }
+ if (pos1) {
+ dispatch(s, pos1);
+ genop(s, MKOP_A(OP_RAISE, exc));
+ }
+ }
+ tree = tree->cdr;
+ dispatch(s, noexc);
+ genop(s, MKOP_A(OP_POPERR, 1));
+ if (tree->car) {
+ codegen(s, tree->car, val);
+ }
+ dispatch_linked(s, exend);
+ loop_pop(s, NOVAL);
+ }
+ break;
+
+ case NODE_ENSURE:
+ {
+ int idx;
+ int epush = s->pc;
+
+ genop(s, MKOP_Bx(OP_EPUSH, 0));
+ s->ensure_level++;
+ codegen(s, tree->car, val);
+ idx = scope_body(s, tree->cdr);
+ s->iseq[epush] = MKOP_Bx(OP_EPUSH, idx);
+ s->ensure_level--;
+ genop_peep(s, MKOP_A(OP_EPOP, 1), NOVAL);
+ }
+ break;
+
+ case NODE_LAMBDA:
+ {
+ int idx = lambda_body(s, tree, 1);
+
+ genop(s, MKOP_Abc(OP_LAMBDA, cursp(), idx, OP_L_LAMBDA));
+ push();
+ }
+ break;
+
+ case NODE_BLOCK:
+ {
+ int idx = lambda_body(s, tree, 1);
+
+ genop(s, MKOP_Abc(OP_LAMBDA, cursp(), idx, OP_L_BLOCK));
+ push();
+ }
+ break;
+
+ case NODE_IF:
+ {
+ int pos1, pos2;
+ node *e = tree->cdr->cdr->car;
+
+ codegen(s, tree->car, VAL);
+ pop();
+ pos1 = new_label(s);
+ genop(s, MKOP_AsBx(OP_JMPNOT, cursp(), 0));
+
+ codegen(s, tree->cdr->car, val);
+ if (e) {
+ if (val) pop();
+ pos2 = new_label(s);
+ genop(s, MKOP_sBx(OP_JMP, 0));
+ dispatch(s, pos1);
+ codegen(s, e, val);
+ dispatch(s, pos2);
+ }
+ else {
+ if (val) {
+ pop();
+ genop(s, MKOP_A(OP_LOADNIL, cursp()));
+ push();
+ }
+ dispatch(s, pos1);
+ }
+ }
+ break;
+
+ case NODE_AND:
+ {
+ int pos;
+
+ codegen(s, tree->car, VAL);
+ pos = new_label(s);
+ pop();
+ genop(s, MKOP_AsBx(OP_JMPNOT, cursp(), 0));
+ codegen(s, tree->cdr, val);
+ dispatch(s, pos);
+ }
+ break;
+
+ case NODE_OR:
+ {
+ int pos;
+
+ codegen(s, tree->car, VAL);
+ pos = new_label(s);
+ pop();
+ genop(s, MKOP_AsBx(OP_JMPIF, cursp(), 0));
+ codegen(s, tree->cdr, val);
+ dispatch(s, pos);
+ }
+ break;
+
+ case NODE_WHILE:
+ {
+ struct loopinfo *lp = loop_push(s, LOOP_NORMAL);
+
+ lp->pc1 = new_label(s);
+ codegen(s, tree->car, VAL);
+ pop();
+ lp->pc2 = new_label(s);
+ genop(s, MKOP_AsBx(OP_JMPNOT, cursp(), 0));
+ codegen(s, tree->cdr, NOVAL);
+ genop(s, MKOP_sBx(OP_JMP, lp->pc1 - s->pc));
+ dispatch(s, lp->pc2);
+ loop_pop(s, val);
+ }
+ break;
+
+ case NODE_UNTIL:
+ {
+ struct loopinfo *lp = loop_push(s, LOOP_NORMAL);
+
+ lp->pc1 = new_label(s);
+ codegen(s, tree->car, VAL);
+ pop();
+ lp->pc2 = new_label(s);
+ genop(s, MKOP_AsBx(OP_JMPIF, cursp(), 0));
+ codegen(s, tree->cdr, NOVAL);
+ genop(s, MKOP_sBx(OP_JMP, lp->pc1 - s->pc));
+ dispatch(s, lp->pc2);
+ loop_pop(s, val);
+ }
+ break;
+
+ case NODE_FOR:
+ for_body(s, tree);
+ if (val) push();
+ break;
+
+ case NODE_CASE:
+ {
+ int head = 0;
+ int pos1, pos2, pos3, tmp;
+ node *n;
+
+ pos3 = 0;
+ if (tree->car) {
+ head = cursp();
+ codegen(s, tree->car, VAL);
+ }
+ tree = tree->cdr;
+ while (tree) {
+ n = tree->car->car;
+ pos1 = pos2 = 0;
+ while (n) {
+ codegen(s, n->car, VAL);
+ if (head) {
+ genop(s, MKOP_AB(OP_MOVE, cursp(), head));
+ push();
+ genop(s, MKOP_A(OP_LOADNIL, cursp()));
+ pop(); pop();
+ genop(s, MKOP_ABC(OP_SEND, cursp(), new_msym(s, mrb_intern(s->mrb, "===")), 1));
+ }
+ tmp = new_label(s);
+ genop(s, MKOP_AsBx(OP_JMPIF, cursp(), pos2));
+ pos2 = tmp;
+ n = n->cdr;
+ }
+ if (tree->car->car) {
+ pos1 = new_label(s);
+ genop(s, MKOP_AsBx(OP_JMP, cursp(), 0));
+ dispatch_linked(s, pos2);
+ }
+ pop(); pop();
+ codegen(s, tree->car->cdr, val);
+ tmp = new_label(s);
+ genop(s, MKOP_AsBx(OP_JMP, cursp(), pos3));
+ pos3 = tmp;
+ if (pos1) dispatch(s, pos1);
+ tree = tree->cdr;
+ push(); push();
+ }
+ pop();
+ if (pos3) dispatch_linked(s, pos3);
+ if (val) push();
+ }
+ break;
+
+ case NODE_SCOPE:
+ scope_body(s, tree);
+ break;
+
+ case NODE_FCALL:
+ case NODE_CALL:
+ gen_call(s, tree, 0, 0, val);
+ break;
+
+ case NODE_DOT2:
+ codegen(s, tree->car, VAL);
+ codegen(s, tree->cdr, VAL);
+ pop(); pop();
+ if (val) {
+ genop(s, MKOP_ABC(OP_RANGE, cursp(), cursp(), 0));
+ push();
+ }
+ break;
+
+ case NODE_DOT3:
+ codegen(s, tree->car, VAL);
+ codegen(s, tree->cdr, VAL);
+ pop(); pop();
+ if (val) {
+ genop(s, MKOP_ABC(OP_RANGE, cursp(), cursp(), 1));
+ push();
+ }
+ break;
+
+ case NODE_COLON2:
+ {
+ int sym = new_sym(s, (mrb_sym)tree->cdr);
+
+ codegen(s, tree->car, VAL);
+ pop();
+ genop(s, MKOP_ABx(OP_GETMCNST, cursp(), sym));
+ push();
+ }
+ break;
+
+ case NODE_COLON3:
+ {
+ int sym = new_sym(s, (mrb_sym)tree);
+
+ genop(s, MKOP_A(OP_OCLASS, cursp()));
+ genop(s, MKOP_ABx(OP_GETMCNST, cursp(), sym));
+ push();
+ }
+ break;
+
+ case NODE_ARRAY:
+ {
+ int n;
+
+ n = gen_values(s, tree);
+ if (n >= 0) {
+ pop_n(n);
+ if (val) {
+ genop(s, MKOP_ABC(OP_ARRAY, cursp(), cursp(), n));
+ push();
+ }
+ }
+ else if (val) {
+ push();
+ }
+ }
+ break;
+
+ case NODE_HASH:
+ {
+ int len = 0;
+
+ while (tree) {
+ codegen(s, tree->car->car, VAL);
+ codegen(s, tree->car->cdr, VAL);
+ len++;
+ tree = tree->cdr;
+ }
+ pop_n(len*2);
+ if (val) {
+ genop(s, MKOP_ABC(OP_HASH, cursp(), cursp(), len));
+ push();
+ }
+ }
+ break;
+
+ case NODE_SPLAT:
+ codegen(s, tree, VAL);
+ break;
+
+ case NODE_ASGN:
+ codegen(s, tree->cdr, VAL);
+ pop();
+ gen_assignment(s, tree->car, cursp(), val);
+ break;
+
+ case NODE_MASGN:
+ {
+ int len = 0, n = 0, post = 0;
+ node *t = tree->cdr, *p;
+ int rhs = cursp();
+
+ if ((intptr_t)t->car == NODE_ARRAY && nosplat(t->cdr)) {
+ // fixed rhs
+ t = t->cdr;
+ while (t) {
+ codegen(s, t->car, VAL);
+ len++;
+ t = t->cdr;
+ }
+ tree = tree->car;
+ if (tree->car) { /* pre */
+ t = tree->car;
+ n = 0;
+ while (t) {
+ gen_assignment(s, t->car, rhs+n, NOVAL);
+ n++;
+ t = t->cdr;
+ }
+ }
+ t = tree->cdr;
+ if (t) {
+ if (t->cdr) { /* post count */
+ p = t->cdr->car;
+ while (p) {
+ post++;
+ p = p->cdr;
+ }
+ }
+ if (t->car) { /* rest (len - pre - post) */
+ int rn = len - post - n;
+
+ genop(s, MKOP_ABC(OP_ARRAY, cursp(), rhs+n, rn));
+ gen_assignment(s, t->car, cursp(), NOVAL);
+ n += rn;
+ }
+ if (t->cdr && t->cdr->car) {
+ t = t->cdr->car;
+ while (n<len) {
+ gen_assignment(s, t->car, rhs+n, NOVAL);
+ t = t->cdr;
+ n++;
+ }
+ }
+ }
+ pop_n(len);
+ if (val) {
+ genop(s, MKOP_ABC(OP_ARRAY, rhs, rhs, len));
+ push();
+ }
+ }
+ else {
+ // variable rhs
+ codegen(s, t, VAL);
+ gen_vmassignment(s, tree->car, rhs, val);
+ if (!val) pop();
+ }
+ }
+ break;
+
+ case NODE_OP_ASGN:
+ codegen(s, tree->car, VAL);
+ codegen(s, tree->cdr->cdr->car, VAL);
+ genop(s, MKOP_A(OP_LOADNIL, cursp()));
+ pop(); pop();
+ {
+ mrb_sym sym = (mrb_sym)tree->cdr->car;
+ const char *name = mrb_sym2name(s->mrb, sym);
+ int idx = new_msym(s, sym);
+
+ if (name[0] == '+' && strlen(name) == 1) {
+ genop(s, MKOP_ABC(OP_ADD, cursp(), idx, 2));
+ }
+ else if (name[0] == '-' && strlen(name) == 1) {
+ genop(s, MKOP_ABC(OP_SUB, cursp(), idx, 2));
+ }
+ else if (name[0] == '<' && strlen(name) == 1) {
+ genop(s, MKOP_ABC(OP_LT, cursp(), idx, 2));
+ }
+ else if (name[0] == '<' && strlen(name) == 2 && name[1] == '=') {
+ genop(s, MKOP_ABC(OP_LE, cursp(), idx, 2));
+ }
+ else if (name[0] == '>' && strlen(name) == 1) {
+ genop(s, MKOP_ABC(OP_GT, cursp(), idx, 2));
+ }
+ else if (name[0] == '>' && strlen(name) == 2 && name[1] == '=') {
+ genop(s, MKOP_ABC(OP_GE, cursp(), idx, 2));
+ }
+ else {
+ genop(s, MKOP_ABC(OP_SEND, cursp(), idx, 2));
+ }
+ }
+ gen_assignment(s, tree->car, cursp(), val);
+ break;
+
+ case NODE_SUPER:
+ {
+ int n = 0;
+
+ push();
+ if (tree) {
+ node *args = tree->car;
+ while (args) {
+ codegen(s, args->car, VAL);
+ n++;
+ args = args->cdr;
+ }
+ }
+ if (tree && tree->cdr) {
+ codegen(s, tree->cdr, VAL);
+ pop();
+ }
+ else {
+ genop(s, MKOP_A(OP_LOADNIL, cursp()));
+ }
+ pop_n(n+1);
+ genop(s, MKOP_ABC(OP_SUPER, cursp(), 0, n));
+ if (val) push();
+ }
+ break;
+
+ case NODE_ZSUPER:
+ {
+ codegen_scope *s2 = s;
+ int lv = 0, ainfo = 0;
+
+ while (s2->ainfo < 0) {
+ lv++;
+ s2 = s2->prev;
+ if (!s2) break;
+ }
+ if (s2) ainfo = s2->ainfo;
+ push();
+ genop(s, MKOP_ABx(OP_ARGARY, cursp(), (ainfo<<4)|(lv & 0xf)));
+ pop();
+ genop(s, MKOP_ABC(OP_SUPER, cursp(), 0, CALL_MAXARGS));
+ if (val) push();
+ }
+ break;
+
+ case NODE_RETURN:
+ codegen(s, tree, VAL);
+ pop();
+ if (s->loop && s->loop->type != LOOP_NORMAL) {
+ genop(s, MKOP_AB(OP_RETURN, cursp(), OP_R_RETURN));
+ }
+ else {
+ genop(s, MKOP_AB(OP_RETURN, cursp(), OP_R_NORMAL));
+ }
+ break;
+
+ case NODE_YIELD:
+ {
+ codegen_scope *s2 = s;
+ int lv = 0, ainfo = 0;
+ int n = 0, sendv = 0;
+
+ while (s2->ainfo < 0) {
+ lv++;
+ s2 = s2->prev;
+ if (!s2) break;
+ }
+ if (s2) ainfo = s2->ainfo;
+ genop(s, MKOP_ABx(OP_BLKPUSH, cursp(), (ainfo<<4)|(lv & 0xf)));
+ push();
+ if (tree) {
+ n = gen_values(s, tree);
+ if (n < 0) {
+ n = sendv = 1;
+ push();
+ }
+ }
+ genop(s, MKOP_A(OP_LOADNIL, cursp()));
+ pop_n(n+1);
+ if (sendv) n = CALL_MAXARGS;
+ genop(s, MKOP_ABC(OP_SEND, cursp(), new_msym(s, mrb_intern(s->mrb, "call")), n));
+ if (val) push();
+ }
+ break;
+
+ case NODE_BREAK:
+ loop_break(s, tree);
+ if (val) push();
+ break;
+
+ case NODE_NEXT:
+ if (!s->loop) {
+ raise_error(s, "unexpected next");
+ }
+ else if (s->loop->type == LOOP_NORMAL) {
+ if (s->ensure_level > s->loop->ensure_level) {
+ genop_peep(s, MKOP_A(OP_EPOP, s->ensure_level - s->loop->ensure_level), NOVAL);
+ }
+ codegen(s, tree, NOVAL);
+ genop(s, MKOP_sBx(OP_JMP, s->loop->pc1 - s->pc));
+ }
+ else {
+ codegen(s, tree, VAL);
+ pop();
+ genop(s, MKOP_AB(OP_RETURN, cursp(), OP_R_NORMAL));
+ }
+ if (val) push();
+ break;
+
+ case NODE_REDO:
+ if (!s->loop) {
+ raise_error(s, "unexpected redo");
+ }
+ else {
+ if (s->ensure_level > s->loop->ensure_level) {
+ genop_peep(s, MKOP_A(OP_EPOP, s->ensure_level - s->loop->ensure_level), NOVAL);
+ }
+ genop(s, MKOP_sBx(OP_JMP, s->loop->pc2 - s->pc));
+ }
+ break;
+
+ case NODE_RETRY:
+ {
+ const char *msg = "unexpected retry";
+
+ if (!s->loop) {
+ raise_error(s, msg);
+ }
+ else {
+ struct loopinfo *lp = s->loop;
+ int n = 0;
+
+ while (lp && lp->type != LOOP_RESCUE) {
+ if (lp->type == LOOP_BEGIN) {
+ n++;
+ }
+ lp = lp->prev;
+ }
+ if (!lp) {
+ raise_error(s, msg);
+ }
+ else {
+ if (n > 0) {
+ while (n--) {
+ genop_peep(s, MKOP_A(OP_POPERR, 1), NOVAL);
+ }
+ }
+ if (s->ensure_level > lp->ensure_level) {
+ genop_peep(s, MKOP_A(OP_EPOP, s->ensure_level - lp->ensure_level), NOVAL);
+ }
+ genop(s, MKOP_sBx(OP_JMP, lp->pc1 - s->pc));
+ }
+ }
+ }
+ break;
+
+ case NODE_LVAR:
+ if (val) {
+ int idx = lv_idx(s, (mrb_sym)tree);
+
+ if (idx > 0) {
+ genop(s, MKOP_AB(OP_MOVE, cursp(), idx));
+ }
+ else {
+ int lv = 0;
+ codegen_scope *up = s->prev;
+
+ while (up) {
+ idx = lv_idx(up, (mrb_sym)tree);
+ if (idx > 0) {
+ genop(s, MKOP_ABC(OP_GETUPVAR, cursp(), idx, lv));
+ break;
+ }
+ lv++;
+ up = up->prev;
+ }
+ }
+ push();
+ }
+ break;
+
+ case NODE_GVAR:
+ {
+ int sym = new_sym(s, (mrb_sym)tree);
+
+ genop(s, MKOP_ABx(OP_GETGLOBAL, cursp(), sym));
+ push();
+ }
+ break;
+
+ case NODE_IVAR:
+ {
+ int sym = new_sym(s, (mrb_sym)tree);
+
+ genop(s, MKOP_ABx(OP_GETIV, cursp(), sym));
+ push();
+ }
+ break;
+
+ case NODE_CVAR:
+ {
+ int sym = new_sym(s, (mrb_sym)tree);
+
+ genop(s, MKOP_ABx(OP_GETCV, cursp(), sym));
+ push();
+ }
+ break;
+
+ case NODE_CONST:
+ {
+ int sym = new_sym(s, (mrb_sym)tree);
+
+ genop(s, MKOP_ABx(OP_GETCONST, cursp(), sym));
+ push();
+ }
+ break;
+
+ case NODE_DEFINED:
+ codegen(s, tree, VAL);
+ break;
+
+ case NODE_BACK_REF:
+ codegen(s, tree, VAL);
+ break;
+
+ case NODE_NTH_REF:
+ codegen(s, tree, VAL);
+ break;
+
+ case NODE_ARG:
+ // should not happen
+ break;
+
+ case NODE_BLOCK_ARG:
+ codegen(s, tree, VAL);
+ break;
+
+ case NODE_INT:
+ if (val) {
+ char *p = (char*)tree->car;
+ int base = (intptr_t)tree->cdr->car;
+ int i = readint(p, base);
+ mrb_code co;
+
+ if (i < MAXARG_sBx && i > -MAXARG_sBx) {
+ co = MKOP_AsBx(OP_LOADI, cursp(), i);
+ }
+ else {
+ int off = new_lit(s, mrb_fixnum_value(i));
+ co = MKOP_ABx(OP_LOADL, cursp(), off);
+ }
+ genop(s, co);
+ push();
+ }
+ break;
+
+ case NODE_FLOAT:
+ if (val) {
+ char *p = (char*)tree;
+ mrb_float f = readfloat(p);
+ int off = new_lit(s, mrb_float_value(f));
+
+ genop(s, MKOP_ABx(OP_LOADL, cursp(), off));
+ push();
+ }
+ break;
+
+ case NODE_NEGATE:
+ {
+ nt = (intptr_t)tree->car;
+ tree = tree->cdr;
+ switch (nt) {
+ case NODE_FLOAT:
+ {
+ char *p = (char*)tree;
+ mrb_float f = readfloat(p);
+ int off = new_lit(s, mrb_float_value(-f));
+
+ genop(s, MKOP_ABx(OP_LOADL, cursp(), off));
+ push();
+ }
+ break;
+
+ case NODE_INT:
+ {
+ char *p = (char*)tree->car;
+ int base = (intptr_t)tree->cdr->car;
+ int i = readint(p, base);
+ mrb_code co;
+
+ i = -i;
+ if (i < MAXARG_sBx && i > -MAXARG_sBx) {
+ co = MKOP_AsBx(OP_LOADI, cursp(), i);
+ }
+ else {
+ int off = new_lit(s, mrb_fixnum_value(i));
+ co = MKOP_ABx(OP_LOADL, cursp(), off);
+ }
+ genop(s, co);
+ push();
+ }
+ break;
+
+ default:
+ {
+ int sym = new_msym(s, mrb_intern(s->mrb, "-"));
+
+ genop(s, MKOP_ABx(OP_LOADI, cursp(), 0));
+ push();
+ codegen(s, tree, VAL);
+ pop(); pop();
+ genop(s, MKOP_ABC(OP_SUB, cursp(), sym, 2));
+ }
+ break;
+ }
+ }
+ break;
+
+ case NODE_STR:
+ if (val) {
+ char *p = (char*)tree->car;
+ size_t len = (intptr_t)tree->cdr;
+ int off = new_lit(s, mrb_str_new(s->mrb, p, len));
+
+ genop(s, MKOP_ABx(OP_STRING, cursp(), off));
+ push();
+ }
+ break;
+
+ case NODE_DSTR:
+ if (val) {
+ node *n = tree;
+
+ codegen(s, n->car, VAL);
+ n = n->cdr;
+ while (n) {
+ codegen(s, n->car, VAL);
+ pop(); pop();
+ genop(s, MKOP_AB(OP_STRCAT, cursp(), cursp()+1));
+ push();
+ n = n->cdr;
+ }
+ }
+ else {
+ node *n = tree;
+
+ while (n) {
+ if ((intptr_t)n->car->car != NODE_STR) {
+ codegen(s, n->car, NOVAL);
+ }
+ n = n->cdr;
+ }
+ }
+ break;
+
+ case NODE_SYM:
+ if (val) {
+ int sym = new_sym(s, (mrb_sym)tree);
+
+ genop(s, MKOP_ABx(OP_LOADSYM, cursp(), sym));
+ push();
+ }
+ break;
+
+ case NODE_SELF:
+ if (val) {
+ genop(s, MKOP_A(OP_LOADSELF, cursp()));
+ push();
+ }
+ break;
+
+ case NODE_NIL:
+ if (val) {
+ genop(s, MKOP_A(OP_LOADNIL, cursp()));
+ push();
+ }
+ break;
+
+ case NODE_TRUE:
+ if (val) {
+ genop(s, MKOP_A(OP_LOADT, cursp()));
+ push();
+ }
+ break;
+
+ case NODE_FALSE:
+ if (val) {
+ genop(s, MKOP_A(OP_LOADF, cursp()));
+ push();
+ }
+ break;
+
+ case NODE_ALIAS:
+ {
+ int a = new_msym(s, (mrb_sym)tree->car);
+ int b = new_msym(s, (mrb_sym)tree->cdr);
+ int c = new_msym(s, mrb_intern(s->mrb, "alias_method"));
+
+ genop(s, MKOP_A(OP_TCLASS, cursp()));
+ push();
+ genop(s, MKOP_ABx(OP_LOADSYM, cursp(), a));
+ push();
+ genop(s, MKOP_ABx(OP_LOADSYM, cursp(), b));
+ push();
+ genop(s, MKOP_A(OP_LOADNIL, cursp()));
+ pop_n(3);
+ genop(s, MKOP_ABC(OP_SEND, cursp(), c, 2));
+ if (val) {
+ push();
+ }
+ }
+ break;
+
+ case NODE_UNDEF:
+ {
+ int sym = new_msym(s, (mrb_sym)tree);
+ int undef = new_msym(s, mrb_intern(s->mrb, "undef_method"));
+
+ genop(s, MKOP_A(OP_TCLASS, cursp()));
+ push();
+ genop(s, MKOP_ABx(OP_LOADSYM, cursp(), sym));
+ push();
+ genop(s, MKOP_A(OP_LOADNIL, cursp()));
+ pop_n(2);
+ genop(s, MKOP_ABC(OP_SEND, cursp(), undef, 2));
+ if (val) {
+ push();
+ }
+ }
+ break;
+
+ case NODE_CLASS:
+ {
+ int idx;
+
+ if (tree->car->car == (node*)0) {
+ genop(s, MKOP_A(OP_LOADNIL, cursp()));
+ push();
+ }
+ else if (tree->car->car == (node*)1) {
+ genop(s, MKOP_A(OP_OCLASS, cursp()));
+ push();
+ }
+ else {
+ codegen(s, tree->car->car, VAL);
+ }
+ if (tree->cdr->car) {
+ codegen(s, tree->cdr->car, VAL);
+ }
+ else {
+ genop(s, MKOP_A(OP_LOADNIL, cursp()));
+ push();
+ }
+ pop(); pop();
+ idx = new_msym(s, (mrb_sym)tree->car->cdr);
+ genop(s, MKOP_AB(OP_CLASS, cursp(), idx));
+ idx = scope_body(s, tree->cdr->cdr->car);
+ genop(s, MKOP_ABx(OP_EXEC, cursp(), idx));
+ if (val) {
+ push();
+ }
+ }
+ break;
+
+ case NODE_MODULE:
+ {
+ int idx;
+
+ if (tree->car->car == (node*)0) {
+ genop(s, MKOP_A(OP_LOADNIL, cursp()));
+ push();
+ }
+ else if (tree->car->car == (node*)1) {
+ genop(s, MKOP_A(OP_OCLASS, cursp()));
+ push();
+ }
+ else {
+ codegen(s, tree->car->car, VAL);
+ }
+ pop();
+ idx = new_msym(s, (mrb_sym)tree->car->cdr);
+ genop(s, MKOP_AB(OP_MODULE, cursp(), idx));
+ idx = scope_body(s, tree->cdr->car);
+ genop(s, MKOP_ABx(OP_EXEC, cursp(), idx));
+ if (val) {
+ push();
+ }
+ }
+ break;
+
+ case NODE_SCLASS:
+ {
+ int idx;
+
+ codegen(s, tree->car, VAL);
+ pop();
+ genop(s, MKOP_AB(OP_SCLASS, cursp(), cursp()));
+ idx = scope_body(s, tree->cdr->car);
+ genop(s, MKOP_ABx(OP_EXEC, cursp(), idx));
+ if (val) {
+ push();
+ }
+ }
+ break;
+
+ case NODE_DEF:
+ {
+ int sym = new_msym(s, (mrb_sym)tree->car);
+ int idx = lambda_body(s, tree->cdr, 0);
+
+ genop(s, MKOP_A(OP_TCLASS, cursp()));
+ push();
+ genop(s, MKOP_Abc(OP_LAMBDA, cursp(), idx, OP_L_METHOD));
+ pop();
+ genop(s, MKOP_AB(OP_METHOD, cursp(), sym));
+ if (val) {
+ genop(s, MKOP_A(OP_LOADNIL, cursp()));
+ }
+ }
+ break;
+
+ case NODE_SDEF:
+ {
+ node *recv = tree->car;
+ int sym = new_msym(s, (mrb_sym)tree->cdr->car);
+ int idx = lambda_body(s, tree->cdr->cdr, 0);
+
+ codegen(s, recv, VAL);
+ pop();
+ genop(s, MKOP_AB(OP_SCLASS, cursp(), cursp()));
+ push();
+ genop(s, MKOP_Abc(OP_LAMBDA, cursp(), idx, OP_L_METHOD));
+ pop();
+ genop(s, MKOP_AB(OP_METHOD, cursp(), sym));
+ if (val) {
+ genop(s, MKOP_A(OP_LOADNIL, cursp()));
+ }
+ }
+ break;
+
+ case NODE_POSTEXE:
+ codegen(s, tree, NOVAL);
+ break;
+
+ default:
+ break;
+ }
+}
+
+static codegen_scope*
+scope_new(mrb_state *mrb, codegen_scope *prev, node *lv)
+{
+ mrb_pool *pool = mrb_pool_open(mrb);
+ codegen_scope *p = mrb_pool_alloc(pool, sizeof(codegen_scope));
+ if (!p) return 0;
+
+ memset(p, 0, sizeof(codegen_scope));
+ p->mrb = mrb;
+ p->mpool = pool;
+ if (!prev) return p;
+ p->prev = prev;
+ p->ainfo = -1;
+
+ p->mrb = prev->mrb;
+ p->icapa = 1024;
+ p->iseq = mrb_malloc(mrb, sizeof(mrb_code)*p->icapa);
+
+ p->pcapa = 32;
+ p->pool = mrb_malloc(mrb, sizeof(mrb_value)*p->pcapa);
+
+ p->syms = mrb_malloc(mrb, sizeof(mrb_sym)*256);
+
+ p->lv = lv;
+ p->sp += node_len(lv)+2;
+ p->nlocals = p->sp;
+
+ p->idx = mrb->irep_len++;
+
+ return p;
+}
+
+static void
+scope_finish(codegen_scope *s, int idx)
+{
+ mrb_state *mrb = s->mrb;
+ mrb_irep *irep;
+
+ mrb_add_irep(mrb, idx);
+ irep = mrb->irep[idx] = mrb_malloc(mrb, sizeof(mrb_irep));
+
+ irep->idx = idx;
+ irep->flags = 0;
+ if (s->iseq) {
+ irep->iseq = codegen_realloc(s, s->iseq, sizeof(mrb_code)*s->pc);
+ irep->ilen = s->pc;
+ }
+ if (s->pool) {
+ irep->pool = codegen_realloc(s, s->pool, sizeof(mrb_value)*s->plen);
+ irep->plen = s->plen;
+ }
+ if (s->syms) {
+ irep->syms = codegen_realloc(s, s->syms, sizeof(mrb_sym)*s->slen);
+ irep->slen = s->slen;
+ }
+
+ irep->nlocals = s->nlocals;
+ irep->nregs = s->nregs;
+
+ mrb_pool_close(s->mpool);
+}
+
+static struct loopinfo*
+loop_push(codegen_scope *s, enum looptype t)
+{
+ struct loopinfo *p = codegen_palloc(s, sizeof(struct loopinfo));
+
+ p->type = t;
+ p->pc1 = p->pc2 = p->pc3 = 0;
+ p->prev = s->loop;
+ p->ensure_level = s->ensure_level;
+ p->acc = cursp();
+ s->loop = p;
+
+ return p;
+}
+
+static void
+loop_break(codegen_scope *s, node *tree)
+{
+ if (!s->loop) {
+ codegen(s, tree, NOVAL);
+ raise_error(s, "unexpected break");
+ }
+ else {
+ struct loopinfo *loop;
+
+ if (tree) {
+ codegen(s, tree, VAL);
+ printf("break op %d\n", cursp());
+ pop();
+ }
+
+ loop = s->loop;
+ while (loop->type == LOOP_BEGIN) {
+ genop_peep(s, MKOP_A(OP_POPERR, 1), NOVAL);
+ loop = loop->prev;
+ }
+ if (loop->type == LOOP_NORMAL) {
+ int tmp;
+
+ if (s->ensure_level > s->loop->ensure_level) {
+ genop_peep(s, MKOP_A(OP_EPOP, s->ensure_level - s->loop->ensure_level), NOVAL);
+ }
+ if (tree) {
+ genop_peep(s, MKOP_AB(OP_MOVE, loop->acc, cursp()), NOVAL);
+ }
+ tmp = new_label(s);
+ genop(s, MKOP_sBx(OP_JMP, loop->pc3));
+ loop->pc3 = tmp;
+ }
+ else {
+ genop(s, MKOP_AB(OP_RETURN, cursp(), OP_R_BREAK));
+ }
+ }
+}
+
+static void
+loop_pop(codegen_scope *s, int val)
+{
+ if (val) {
+ genop(s, MKOP_A(OP_LOADNIL, cursp()));
+ }
+ dispatch_linked(s, s->loop->pc3);
+ s->loop = s->loop->prev;
+ if (val) push();
+}
+
+static void
+codedump(mrb_state *mrb, int n)
+{
+ mrb_irep *irep = mrb->irep[n];
+ int i;
+ mrb_code c;
+
+ if (!irep) return;
+ printf("irep %d nregs=%d nlocals=%d pools=%d syms=%d\n", n,
+ irep->nregs, irep->nlocals, irep->plen, irep->slen);
+ for (i=0; i<irep->ilen; i++) {
+ printf("%03d ", i);
+ c = irep->iseq[i];
+ switch (GET_OPCODE(c)) {
+ case OP_NOP:
+ printf("OP_NOP\n");
+ break;
+ case OP_MOVE:
+ printf("OP_MOVE\tR%d\tR%d\n", GETARG_A(c), GETARG_B(c));
+ break;
+ case OP_LOADL:
+ printf("OP_LOADL\tR%d\tL(%d)\n", GETARG_A(c), GETARG_Bx(c));
+ break;
+ case OP_LOADI:
+ printf("OP_LOADI\tR%d\t%d\n", GETARG_A(c), GETARG_sBx(c));
+ break;
+ case OP_LOADSYM:
+ printf("OP_LOADSYM\tR%d\t'%s'\n", GETARG_A(c),
+ mrb_sym2name(mrb, irep->syms[GETARG_Bx(c)]));
+ break;
+ case OP_LOADNIL:
+ printf("OP_LOADNIL\tR%d\n", GETARG_A(c));
+ break;
+ case OP_LOADSELF:
+ printf("OP_LOADSELF\tR%d\n", GETARG_A(c));
+ break;
+ case OP_LOADT:
+ printf("OP_LOADT\tR%d\n", GETARG_A(c));
+ break;
+ case OP_LOADF:
+ printf("OP_LOADF\tR%d\n", GETARG_A(c));
+ break;
+ case OP_GETGLOBAL:
+ printf("OP_GETGLOBAL\tR%d\t'%s'\n", GETARG_A(c),
+ mrb_sym2name(mrb, irep->syms[GETARG_Bx(c)]));
+ break;
+ case OP_SETGLOBAL:
+ printf("OP_SETGLOBAL\t'%s'\tR%d\n",
+ mrb_sym2name(mrb, irep->syms[GETARG_Bx(c)]),
+ GETARG_A(c));
+ break;
+ case OP_GETCONST:
+ printf("OP_GETCONST\tR%d\t'%s'\n", GETARG_A(c),
+ mrb_sym2name(mrb, irep->syms[GETARG_Bx(c)]));
+ break;
+ case OP_SETCONST:
+ printf("OP_SETCONST\t'%s'\tR%d\n",
+ mrb_sym2name(mrb, irep->syms[GETARG_Bx(c)]),
+ GETARG_A(c));
+ break;
+ case OP_GETMCNST:
+ printf("OP_GETMCNST\tR%d\tR%d::%s\n", GETARG_A(c), GETARG_A(c),
+ mrb_sym2name(mrb, irep->syms[GETARG_Bx(c)]));
+ break;
+ case OP_SETMCNST:
+ printf("OP_SETMCNST\tR%d::%s\tR%d\n", GETARG_A(c)+1,
+ mrb_sym2name(mrb, irep->syms[GETARG_Bx(c)]),
+ GETARG_A(c));
+ break;
+ case OP_GETIV:
+ printf("OP_GETIV\tR%d\t%s\n", GETARG_A(c),
+ mrb_sym2name(mrb, irep->syms[GETARG_Bx(c)]));
+ break;
+ case OP_SETIV:
+ printf("OP_SETIV\t%s\tR%d\n",
+ mrb_sym2name(mrb, irep->syms[GETARG_Bx(c)]),
+ GETARG_A(c));
+ break;
+ case OP_GETUPVAR:
+ printf("OP_GETUPVAR\tR%d\t%d\t%d\n",
+ GETARG_A(c), GETARG_B(c), GETARG_C(c));
+ break;
+ case OP_SETUPVAR:
+ printf("OP_SETUPVAR\tR%d\t%d\t%d\n",
+ GETARG_A(c), GETARG_B(c), GETARG_C(c));
+ break;
+ case OP_GETCV:
+ printf("OP_GETCV\tR%d\t%s\n", GETARG_A(c),
+ mrb_sym2name(mrb, irep->syms[GETARG_Bx(c)]));
+ break;
+ case OP_SETCV:
+ printf("OP_SETCV\t%s\tR%d\n",
+ mrb_sym2name(mrb, irep->syms[GETARG_Bx(c)]),
+ GETARG_A(c));
+ break;
+ case OP_JMP:
+ printf("OP_JMP\t\t%03d\n", i+GETARG_sBx(c));
+ break;
+ case OP_JMPIF:
+ printf("OP_JMPIF\tR%d\t%03d\n", GETARG_A(c), i+GETARG_sBx(c));
+ break;
+ case OP_JMPNOT:
+ printf("OP_JMPNOT\tR%d\t%03d\n", GETARG_A(c), i+GETARG_sBx(c));
+ break;
+ case OP_SEND:
+ printf("OP_SEND\tR%d\t'%s'\t%d\n", GETARG_A(c),
+ mrb_sym2name(mrb, irep->syms[GETARG_B(c)]),
+ GETARG_C(c));
+ break;
+ case OP_SUPER:
+ printf("OP_SUPER\tR%d\t%d\n", GETARG_A(c),
+ GETARG_C(c));
+ break;
+ case OP_ARGARY:
+ printf("OP_ARGARY\tR%d\t%d:%d:%d:%d\n", GETARG_A(c),
+ (GETARG_Bx(c)>>10)&0x3f,
+ (GETARG_Bx(c)>>9)&0x1,
+ (GETARG_Bx(c)>>4)&0x1f,
+ (GETARG_Bx(c)>>0)&0xf);
+ break;
+
+ case OP_ENTER:
+ printf("OP_ENTER\t%d:%d:%d:%d:%d:%d:%d\n",
+ (GETARG_Ax(c)>>18)&0x1f,
+ (GETARG_Ax(c)>>13)&0x1f,
+ (GETARG_Ax(c)>>12)&0x1,
+ (GETARG_Ax(c)>>7)&0x1f,
+ (GETARG_Ax(c)>>2)&0x1f,
+ (GETARG_Ax(c)>>1)&0x1,
+ GETARG_Ax(c) & 0x1);
+ break;
+ case OP_RETURN:
+ printf("OP_RETURN\tR%d", GETARG_A(c));
+ switch (GETARG_B(c)) {
+ case OP_R_NORMAL:
+ printf("\n"); break;
+ case OP_R_RETURN:
+ printf("\treturn\n"); break;
+ case OP_R_BREAK:
+ printf("\tbreak\n"); break;
+ default:
+ printf("\tbroken\n"); break;
+ break;
+ }
+ break;
+ case OP_BLKPUSH:
+ printf("OP_BLKPUSH\tR%d\t%d:%d:%d:%d\n", GETARG_A(c),
+ (GETARG_Bx(c)>>10)&0x3f,
+ (GETARG_Bx(c)>>9)&0x1,
+ (GETARG_Bx(c)>>4)&0x1f,
+ (GETARG_Bx(c)>>0)&0xf);
+ break;
+
+ case OP_LAMBDA:
+ printf("OP_LAMBDA\tR%d\tI(%d)\t%d\n", GETARG_A(c), n+GETARG_b(c), GETARG_c(c));
+ break;
+ case OP_RANGE:
+ printf("OP_RANGE\tR%d\tR%d\t%d\n", GETARG_A(c), GETARG_B(c), GETARG_C(c));
+ break;
+ case OP_METHOD:
+ printf("OP_METHOD\tR%d\t'%s'\n", GETARG_A(c),
+ mrb_sym2name(mrb, irep->syms[GETARG_B(c)]));
+ break;
+
+ case OP_ADD:
+ printf("OP_ADD\tR%d\t'%s'\t%d\n", GETARG_A(c),
+ mrb_sym2name(mrb, irep->syms[GETARG_B(c)]),
+ GETARG_C(c));
+ break;
+ case OP_SUB:
+ printf("OP_SUB\tR%d\t'%s'\t%d\n", GETARG_A(c),
+ mrb_sym2name(mrb, irep->syms[GETARG_B(c)]),
+ GETARG_C(c));
+ break;
+ case OP_LT:
+ printf("OP_LT\tR%d\t'%s'\t%d\n", GETARG_A(c),
+ mrb_sym2name(mrb, irep->syms[GETARG_B(c)]),
+ GETARG_C(c));
+ break;
+ case OP_LE:
+ printf("OP_LE\tR%d\t'%s'\t%d\n", GETARG_A(c),
+ mrb_sym2name(mrb, irep->syms[GETARG_B(c)]),
+ GETARG_C(c));
+ break;
+ case OP_GT:
+ printf("OP_GT\tR%d\t'%s'\t%d\n", GETARG_A(c),
+ mrb_sym2name(mrb, irep->syms[GETARG_B(c)]),
+ GETARG_C(c));
+ break;
+ case OP_GE:
+ printf("OP_GE\tR%d\t'%s'\t%d\n", GETARG_A(c),
+ mrb_sym2name(mrb, irep->syms[GETARG_B(c)]),
+ GETARG_C(c));
+ break;
+
+ case OP_STOP:
+ printf("OP_STOP\n");
+ break;
+
+ case OP_ARRAY:
+ printf("OP_ARRAY\tR%d\tR%d\t%d\n", GETARG_A(c), GETARG_B(c), GETARG_C(c));
+ break;
+ case OP_ARYCAT:
+ printf("OP_ARYCAT\tR%d\tR%d\n", GETARG_A(c), GETARG_B(c));
+ break;
+ case OP_ARYPUSH:
+ printf("OP_ARYPUSH\tR%d\tR%d\n", GETARG_A(c), GETARG_B(c));
+ break;
+ case OP_AREF:
+ printf("OP_AREF\tR%d\tR%d\t%d\n", GETARG_A(c), GETARG_B(c), GETARG_C(c));
+ break;
+ case OP_APOST:
+ printf("OP_APOST\tR%d\t%d\t%d\n", GETARG_A(c), GETARG_B(c), GETARG_C(c));
+ break;
+ case OP_STRING:
+ printf("OP_STRING\tR%d\t'%s'\n", GETARG_A(c), RSTRING_PTR(irep->pool[GETARG_Bx(c)]));
+ break;
+ case OP_STRCAT:
+ printf("OP_STRCAT\tR%d\tR%d\n", GETARG_A(c), GETARG_B(c));
+ break;
+ case OP_HASH:
+ printf("OP_HASH\tR%d\tR%d\t%d\n", GETARG_A(c), GETARG_B(c), GETARG_C(c));
+ break;
+
+ case OP_OCLASS:
+ printf("OP_OCLASS\tR%d\n", GETARG_A(c));
+ break;
+ case OP_CLASS:
+ printf("OP_CLASS\tR%d\t'%s'\n", GETARG_A(c),
+ mrb_sym2name(mrb, irep->syms[GETARG_B(c)]));
+ break;
+ case OP_MODULE:
+ printf("OP_MODULE\tR%d\t'%s'\n", GETARG_A(c),
+ mrb_sym2name(mrb, irep->syms[GETARG_B(c)]));
+ break;
+ case OP_EXEC:
+ printf("OP_EXEC\tR%d\tI(%d)\n", GETARG_A(c), n+GETARG_Bx(c));
+ break;
+ case OP_SCLASS:
+ printf("OP_SCLASS\tR%d\tR%d\n", GETARG_A(c), GETARG_B(c));
+ break;
+ case OP_TCLASS:
+ printf("OP_TCLASS\tR%d\n", GETARG_A(c));
+ break;
+ case OP_ERR:
+ printf("OP_ERR\t:L(%d)\n", GETARG_Bx(c));
+ break;
+ case OP_EPUSH:
+ printf("OP_EPUSH\t:I(%d)\n", n+GETARG_Bx(c));
+ break;
+ case OP_ONERR:
+ printf("OP_ONERR\t%03d\n", i+GETARG_sBx(c));
+ break;
+ case OP_RESCUE:
+ printf("OP_RESCUE\tR%d\n", GETARG_A(c));
+ break;
+ case OP_RAISE:
+ printf("OP_RAISE\tR%d\n", GETARG_A(c));
+ break;
+ case OP_POPERR:
+ printf("OP_POPERR\t%d\n", GETARG_A(c));
+ break;
+ case OP_EPOP:
+ printf("OP_EPOP\t%d\n", GETARG_A(c));
+ break;
+
+ default:
+ printf("OP_unknown %d\t%d\t%d\t%d\n", GET_OPCODE(c),
+ GETARG_A(c), GETARG_B(c), GETARG_C(c));
+ break;
+ }
+ }
+ printf("\n");
+}
+
+void
+codedump_all(mrb_state *mrb, int start)
+{
+ int i;
+
+ for (i=start; i<mrb->irep_len; i++) {
+ codedump(mrb, i);
+ }
+}
+
+static int
+codegen_start(mrb_state *mrb, node *tree)
+{
+ codegen_scope *scope = scope_new(mrb, 0, 0);
+
+ if (!scope) {
+ return -1;
+ }
+ scope->mrb = mrb;
+
+ if (setjmp(scope->jmp) != 0) {
+ return -1;
+ }
+ // prepare irep
+ codegen(scope, tree, NOVAL);
+ return 0;
+}
+
+int
+mrb_generate_code(mrb_state *mrb, node *tree)
+{
+ int start = mrb->irep_len;
+ int n;
+
+ n = codegen_start(mrb, tree);
+ if (n < 0) return n;
+
+ return start;
+}
+
+#ifdef CODEGEN_TEST
+int
+main()
+{
+ mrb_state *mrb = mrb_open();
+ int n;
+
+#if 1
+ n = mrb_compile_string(mrb, "p(__FILE__)\np(__LINE__)");
+#else
+ n = mrb_compile_string(mrb, "\
+def fib(n)\n\
+ if n<2\n\
+ n\n\
+ else\n\
+ fib(n-2)+fib(n-1)\n\
+ end\n\
+end\n\
+p(fib(30), \"\\n\")\n\
+");
+#endif
+ printf("ret: %d\n", n);
+#ifdef CODEGEN_DUMP
+ codedump_all(mrb, n);
+#endif
+ mrb_run(mrb, mrb_proc_new(mrb, mrb->irep[0]), mrb_nil_value());
+
+ return 0;
+}
+#endif
diff --git a/src/compar.c b/src/compar.c
new file mode 100644
index 000000000..d66525c15
--- /dev/null
+++ b/src/compar.c
@@ -0,0 +1,144 @@
+#include "mruby.h"
+#include "mruby/string.h"
+#include "mruby/numeric.h"
+
+void
+mrb_cmperr(mrb_state *mrb, mrb_value x, mrb_value y)
+{
+ const char *classname;
+
+ if (SPECIAL_CONST_P(y)) {
+ y = mrb_inspect(mrb, y);
+ //classname = StringValuePtr(y);
+ classname = mrb_string_value_ptr(mrb, y);
+ }
+ else {
+ classname = mrb_obj_classname(mrb, y);
+ }
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "comparison of %s with %s failed",
+ mrb_obj_classname(mrb, x), classname);
+}
+
+int
+mrb_cmpint(mrb_state *mrb, mrb_value val, mrb_value a, mrb_value b)
+{
+ if (mrb_nil_p(val)) {
+ mrb_cmperr(mrb, a, b);
+ }
+ if (FIXNUM_P(val)) {
+ long l = mrb_fixnum(val);
+ if (l > 0) return 1;
+ if (l < 0) return -1;
+ return 0;
+ }
+ if (mrb_test(mrb_funcall(mrb, val, ">", 1, mrb_fixnum_value(0)))) return 1;
+ if (mrb_test(mrb_funcall(mrb, val, "<", 1, mrb_fixnum_value(0)))) return -1;
+ return 0;
+}
+
+static mrb_value
+cmp_equal(mrb_state *mrb, mrb_value x)
+{
+ mrb_value y, c;
+
+ /* *** TEMPORAL IMPLEMENT *** */
+
+ mrb_get_args(mrb, "o", &y);
+ if (mrb_obj_equal(mrb, x, y)) return mrb_true_value();
+
+ c = mrb_funcall(mrb, x, "<=>", 1, y);
+
+ if (mrb_cmpint(mrb, c, x, y) == 0) return mrb_true_value();
+ return mrb_false_value();
+}
+
+#include <stdio.h>
+static mrb_value
+cmp_gt(mrb_state *mrb, mrb_value x, mrb_value y)
+{
+ mrb_value c;
+
+ c = mrb_funcall(mrb, x, "<=>", 1, y);
+
+ if (mrb_cmpint(mrb, c, x, y) > 0) return mrb_true_value();
+ return mrb_false_value();
+}
+
+static mrb_value
+cmp_gt_m(mrb_state *mrb, mrb_value x)
+{
+ mrb_value y;
+
+ mrb_get_args(mrb, "o", &y);
+ return cmp_gt(mrb, x, y);
+}
+
+static mrb_value
+cmp_ge_m(mrb_state *mrb, mrb_value x)
+{
+ mrb_value y, c;
+
+ mrb_get_args(mrb, "o", &y);
+ c = mrb_funcall(mrb, x, "<=>", 1, y);
+
+ if (mrb_cmpint(mrb, c, x, y) >= 0) return mrb_true_value();
+ return mrb_false_value();
+}
+
+static mrb_value
+cmp_lt(mrb_state *mrb, mrb_value x, mrb_value y)
+{
+ mrb_value c;
+
+ c = mrb_funcall(mrb, x, "<=>", 1, y);
+
+ if (mrb_cmpint(mrb, c, x, y) < 0) return mrb_true_value();
+ return mrb_false_value();
+}
+
+static mrb_value
+cmp_lt_m(mrb_state *mrb, mrb_value x)
+{
+ mrb_value y;
+
+ mrb_get_args(mrb, "o", &y);
+ return cmp_lt(mrb, x, y);
+}
+
+static mrb_value
+cmp_le_m(mrb_state *mrb, mrb_value x)
+{
+ mrb_value y, c;
+
+ mrb_get_args(mrb, "o", &y);
+ c = mrb_funcall(mrb, x, "<=>", 1, y);
+
+ if (mrb_cmpint(mrb, c, x, y) <= 0) return mrb_true_value();
+ return mrb_false_value();
+}
+
+static mrb_value
+cmp_between(mrb_state *mrb, mrb_value x)
+{
+ mrb_value min, max;
+
+ mrb_get_args(mrb, "oo", &min, &max);
+
+ if (mrb_test(cmp_lt(mrb, x, min))) return mrb_false_value();
+ if (mrb_test(cmp_gt(mrb, x, max))) return mrb_false_value();
+ return mrb_true_value();
+}
+
+void
+mrb_init_comparable(mrb_state *mrb)
+{
+ struct RClass *comp;
+
+ comp = mrb_define_module(mrb, "Comparable");
+ mrb_define_method(mrb, comp, "<", cmp_lt_m, ARGS_REQ(1)); /* 15.3.3.2.1 */
+ mrb_define_method(mrb, comp, "<=", cmp_le_m, ARGS_REQ(1)); /* 15.3.3.2.2 */
+ mrb_define_method(mrb, comp, "==", cmp_equal, ARGS_REQ(1)); /* 15.3.3.2.3 */
+ mrb_define_method(mrb, comp, ">", cmp_gt_m, ARGS_REQ(1)); /* 15.3.3.2.4 */
+ mrb_define_method(mrb, comp, ">=", cmp_ge_m, ARGS_REQ(1)); /* 15.3.3.2.5 */
+ mrb_define_method(mrb, comp, "between?", cmp_between, ARGS_REQ(2)); /* 15.3.3.2.6 */
+}
diff --git a/src/compile.h b/src/compile.h
new file mode 100644
index 000000000..f0e6b1874
--- /dev/null
+++ b/src/compile.h
@@ -0,0 +1,73 @@
+#include "mruby.h"
+#include <stdio.h>
+#include <setjmp.h>
+
+typedef struct mrb_ast_node {
+ struct mrb_ast_node *car, *cdr;
+} mrb_ast_node;
+
+#include "node.h"
+#include "pool.h"
+#include <stdio.h>
+
+enum mrb_lex_state_enum {
+ EXPR_BEG, /* ignore newline, +/- is a sign. */
+ EXPR_END, /* newline significant, +/- is an operator. */
+ EXPR_ENDARG, /* ditto, and unbound braces. */
+ EXPR_ENDFN, /* ditto, and unbound braces. */
+ EXPR_ARG, /* newline significant, +/- is an operator. */
+ EXPR_CMDARG, /* newline significant, +/- is an operator. */
+ EXPR_MID, /* newline significant, +/- is an operator. */
+ EXPR_FNAME, /* ignore newline, no reserved words. */
+ EXPR_DOT, /* right after `.' or `::', no reserved words. */
+ EXPR_CLASS, /* immediate after `class', no here document. */
+ EXPR_VALUE, /* alike EXPR_BEG but label is disallowed. */
+ EXPR_MAX_STATE
+};
+
+struct mrb_parser_state {
+ mrb_state *mrb;
+ struct mrb_pool *pool;
+ mrb_ast_node *cells;
+ char *s, *send;
+ FILE *f;
+ int lineno;
+ int column;
+ const char *filename;
+
+ enum mrb_lex_state_enum lstate;
+ int sterm;
+
+ unsigned int cond_stack;
+ unsigned int cmdarg_stack;
+ int paren_nest;
+ int lpar_beg;
+
+ mrb_ast_node *pb;
+ char buf[1024];
+ int bidx;
+
+ mrb_ast_node *heredoc;
+
+ int in_def, in_single, cmd_start;
+ mrb_ast_node *locals;
+
+ void *ylval;
+
+ int nerr;
+ mrb_ast_node *tree, *begin_tree;
+
+ jmp_buf jmp;
+};
+
+struct mrb_parser_state* mrb_parse_file(mrb_state*,FILE*);
+struct mrb_parser_state* mrb_parse_string(mrb_state*,char*);
+struct mrb_parser_state* mrb_parse_nstring(mrb_state*,char*,size_t);
+int mrb_generate_code(mrb_state*, mrb_ast_node*);
+
+int mrb_compile_file(mrb_state*,FILE*);
+int mrb_compile_string(mrb_state*,char*);
+int mrb_compile_nstring(mrb_state*,char*,size_t);
+
+const char *mrb_parser_filename(struct mrb_parser_state *p, const char *s);
+int mrb_parser_lineno(struct mrb_parser_state *p, int n);
diff --git a/src/crc.c b/src/crc.c
new file mode 100644
index 000000000..513622a09
--- /dev/null
+++ b/src/crc.c
@@ -0,0 +1,28 @@
+#include <limits.h>
+#include <stdint.h>
+// Calculate CRC (CRC-16-CCITT)
+//
+// 0000_0000_0000_0000_0000_0000_0000_0000
+// ^|------- CRC -------|- work --|
+// carry
+#define CRC_16_CCITT 0x11021ul //x^16+x^12+x^5+1
+#define CRC_XOR_PATTERN (CRC_16_CCITT << 8)
+#define CRC_CARRY_BIT (1 << 24)
+
+uint16_t
+calc_crc_16_ccitt(unsigned char *src, int nbytes)
+{
+ uint32_t crcwk = 0ul;
+ int ibyte, ibit;
+
+ for (ibyte = 0; ibyte < nbytes; ibyte++) {
+ crcwk |= *src++;
+ for (ibit = 0; ibit < CHAR_BIT; ibit++) {
+ crcwk <<= 1;
+ if (crcwk & CRC_CARRY_BIT) {
+ crcwk ^= CRC_XOR_PATTERN;
+ }
+ }
+ }
+ return (uint16_t)(crcwk >> 8);
+}
diff --git a/src/dump.c b/src/dump.c
new file mode 100644
index 000000000..ba2a9ab7a
--- /dev/null
+++ b/src/dump.c
@@ -0,0 +1,697 @@
+#include <string.h>
+#include "dump.h"
+
+#include "mruby/string.h"
+#ifdef INCLUDE_REGEXP
+#include "re.h"
+#endif
+#include "irep.h"
+
+static const unsigned char def_rite_binary_header[] =
+ RITE_FILE_IDENFIFIER
+ RITE_FILE_FORMAT_VER
+ RITE_VM_VER
+ RITE_COMPILER_TYPE
+ RITE_COMPILER_VER
+ "0000" //Binary data size
+ "00" //Number of ireps
+ "00" //Start index
+ RITE_RESERVED
+;
+
+static const unsigned char def_rite_file_header[] =
+ RITE_FILE_IDENFIFIER
+ RITE_FILE_FORMAT_VER
+ RITE_VM_VER
+ RITE_COMPILER_TYPE
+ RITE_COMPILER_VER
+ "00000000" //Binary data size
+ "0000" //Number of ireps
+ "0000" //Start index
+ RITE_RESERVED
+ "0000" //CRC
+;
+
+const char bin2hex[] = {
+ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
+};
+
+#define DUMP_SIZE(size, type) ((type == DUMP_TYPE_BIN) ? size : size * RITE_FILE_HEX_SIZE)
+
+enum {
+ DUMP_IREP_HEADER = 0,
+ DUMP_ISEQ_BLOCK,
+ DUMP_POOL_BLOCK,
+ DUMP_SYMS_BLOCK,
+ DUMP_SECTION_NUM,
+};
+
+uint16_t calc_crc_16_ccitt(unsigned char*,int);
+static inline int uint8_dump(uint8_t,char*,int);
+static inline int uint16_dump(uint16_t,char*,int);
+static inline int uint32_dump(uint32_t,char*,int);
+static char* str_dump(char*,char*,uint16_t,int);
+static uint16_t str_dump_len(char*,uint16_t, int);
+static uint32_t get_irep_header_size(mrb_state*,mrb_irep*,int);
+static uint32_t get_iseq_block_size(mrb_state*,mrb_irep*,int);
+static uint32_t get_pool_block_size(mrb_state*,mrb_irep*,int);
+static uint32_t get_syms_block_size(mrb_state*,mrb_irep*,int);
+static uint32_t get_irep_record_size(mrb_state*,int,int);
+static int write_irep_header(mrb_state*,mrb_irep*,char*,int);
+static int write_iseq_block(mrb_state*,mrb_irep*,char*,int);
+static int write_pool_block(mrb_state*,mrb_irep*,char*,int);
+static int write_syms_block(mrb_state*,mrb_irep*,char*,int);
+static int calc_crc_section(mrb_state*,mrb_irep*,uint16_t*,int);
+static int write_rite_header(mrb_state*,int,char*,uint32_t);
+static int dump_rite_header(mrb_state*,int,FILE*,uint32_t);
+static int write_irep_record(mrb_state*,int,char*,uint32_t*,int);
+static int dump_irep_record(mrb_state*,int,FILE*,uint32_t*);
+static int mrb_write_irep(mrb_state*,int,char*);
+
+
+static inline int
+uint8_dump(unsigned char bin, char *hex, int type)
+{
+ if (type == DUMP_TYPE_BIN) {
+ *hex = bin;
+ } else {
+ *hex++ = bin2hex[(bin >> 4) & 0x0f];
+ *hex = bin2hex[bin & 0x0f];
+ }
+ return DUMP_SIZE(sizeof(char), type);
+}
+
+static inline int
+uint16_dump(uint16_t bin, char *hex, int type)
+{
+ if (type == DUMP_TYPE_BIN) {
+ return (uint16_to_bin(bin, hex));
+ } else {
+ *hex++ = bin2hex[(bin >> 12)& 0x0f];
+ *hex++ = bin2hex[(bin >> 8) & 0x0f];
+ *hex++ = bin2hex[(bin >> 4) & 0x0f];
+ *hex = bin2hex[bin & 0x0f];
+ return DUMP_SIZE(MRB_DUMP_SIZE_OF_SHORT, type);
+ }
+}
+
+static inline int
+uint32_dump(uint32_t bin, char *hex, int type)
+{
+ if (type == DUMP_TYPE_BIN) {
+ return (uint32_to_bin(bin, hex));
+ } else {
+ *hex++ = bin2hex[(bin >> 28) & 0x0f];
+ *hex++ = bin2hex[(bin >> 24) & 0x0f];
+ *hex++ = bin2hex[(bin >> 20) & 0x0f];
+ *hex++ = bin2hex[(bin >> 16) & 0x0f];
+ *hex++ = bin2hex[(bin >> 12) & 0x0f];
+ *hex++ = bin2hex[(bin >> 8) & 0x0f];
+ *hex++ = bin2hex[(bin >> 4) & 0x0f];
+ *hex = bin2hex[bin & 0x0f];
+ return DUMP_SIZE(MRB_DUMP_SIZE_OF_LONG, type);
+ }
+}
+
+static char*
+str_dump(char *str, char *hex, uint16_t len, int type)
+{
+ if (type == DUMP_TYPE_BIN)
+ memcpy(hex, str, len);
+ else {
+ char *src, *dst;
+
+ for (src = str, dst = hex; len > 0; src++, dst++, len--) {
+ switch (*src) {
+ case 0x07:/* BEL */ *dst++ = '\\'; *dst = 'a'; break;
+ case 0x08:/* BS */ *dst++ = '\\'; *dst = 'b'; break;
+ case 0x09:/* HT */ *dst++ = '\\'; *dst = 't'; break;
+ case 0x0A:/* LF */ *dst++ = '\\'; *dst = 'n'; break;
+ case 0x0B:/* VT */ *dst++ = '\\'; *dst = 'v'; break;
+ case 0x0C:/* FF */ *dst++ = '\\'; *dst = 'f'; break;
+ case 0x0D:/* CR */ *dst++ = '\\'; *dst = 'r'; break;
+ case 0x22:/* " */ /* fall through */
+ case 0x27:/* ' */ /* fall through */
+ // case 0x3F:/* ? */ /* fall through */
+ case 0x5C:/* \ */ /* fall through */
+ default: *dst = *src; break;
+ }
+ }
+ }
+
+ return hex;
+}
+
+static uint16_t
+str_dump_len(char *str, uint16_t len, int type)
+{
+ uint16_t dump_len = 0;
+
+ if (type == DUMP_TYPE_BIN)
+ dump_len = len;
+ else {
+ char *src;
+
+ for (src = str; len > 0; src++, len--) {
+ switch (*src) {
+ case 0x07:/* BEL */ /* fall through */
+ case 0x08:/* BS */ /* fall through */
+ case 0x09:/* HT */ /* fall through */
+ case 0x0A:/* LF */ /* fall through */
+ case 0x0B:/* VT */ /* fall through */
+ case 0x0C:/* FF */ /* fall through */
+ case 0x0D:/* CR */ /* fall through */
+ dump_len += 2;
+ break;
+
+ case 0x22:/* " */ /* fall through */
+ case 0x27:/* ' */ /* fall through */
+ // case 0x3F:/* ? */ /* fall through */
+ case 0x5C:/* \ */ /* fall through */
+ default:
+ dump_len++; break;
+ }
+ }
+ }
+
+ return dump_len;
+}
+
+static uint32_t
+get_irep_header_size(mrb_state *mrb, mrb_irep *irep, int type)
+{
+ uint32_t size = 0;
+
+ size += sizeof(char) * 2;
+ size += DUMP_SIZE(MRB_DUMP_SIZE_OF_SHORT, type) * 4;
+
+ return size;
+}
+
+static uint32_t
+get_iseq_block_size(mrb_state *mrb, mrb_irep *irep, int type)
+{
+ uint32_t size = 0;
+
+ size += MRB_DUMP_SIZE_OF_LONG; /* ilen */
+ size += irep->ilen * MRB_DUMP_SIZE_OF_LONG; /* iseq(n) */
+ size += MRB_DUMP_SIZE_OF_SHORT; /* crc */
+
+ return DUMP_SIZE(size, type);
+}
+
+static uint32_t
+get_pool_block_size(mrb_state *mrb, mrb_irep *irep, int type)
+{
+ uint32_t size = 0;
+ int pool_no;
+ mrb_value str;
+ char buf[32];
+
+ size += MRB_DUMP_SIZE_OF_LONG; /* plen */
+ size += irep->plen * sizeof(char); /* tt(n) */
+ size += irep->plen * MRB_DUMP_SIZE_OF_SHORT; /* len(n) */
+ size += MRB_DUMP_SIZE_OF_SHORT; /* crc */
+ size = DUMP_SIZE(size, type);
+
+ for (pool_no = 0; pool_no < irep->plen; pool_no++) {
+ uint16_t nlen =0;
+
+ switch (irep->pool[pool_no].tt) {
+ case MRB_TT_FIXNUM:
+ sprintf( buf, "%d", irep->pool[pool_no].value.i);
+ size += strlen(buf);
+ break;
+ case MRB_TT_FLOAT:
+ sprintf( buf, "%.16e", irep->pool[pool_no].value.f);
+ size += strlen(buf);
+ break;
+ case MRB_TT_STRING:
+ str = mrb_string_value( mrb, &irep->pool[pool_no]);
+ nlen = str_dump_len(RSTRING_PTR(str), RSTRING_LEN(str), type);
+ size += nlen;
+ break;
+#ifdef INCLUDE_REGEXP
+ case MRB_TT_REGEX:
+ str = mrb_reg_to_s(mrb, irep->pool[pool_no]);
+ nlen = str_dump_len(RSTRING_PTR(str), RSTRING_LEN(str), type);
+ size += nlen;
+ break;
+#endif
+ default:
+ break;
+ }
+ }
+
+ return size;
+}
+
+static uint32_t
+get_syms_block_size(mrb_state *mrb, mrb_irep *irep, int type)
+{
+ uint32_t size = 0;
+ int sym_no;
+
+ size += MRB_DUMP_SIZE_OF_LONG; /* slen */
+ size += MRB_DUMP_SIZE_OF_SHORT; /* crc */
+ size = DUMP_SIZE(size, type);
+
+ for (sym_no = 0; sym_no < irep->slen; sym_no++) {
+ const char * name;
+ uint16_t nlen =0;
+
+ size += DUMP_SIZE(MRB_DUMP_SIZE_OF_SHORT, type); /* snl(n) */
+ if (irep->syms[sym_no] != 0) {
+ name = mrb_sym2name(mrb, irep->syms[sym_no]);
+ nlen = str_dump_len((char*)name, strlen(name), type);
+ size += nlen; /* sn(n) */
+ }
+ }
+
+ return size;
+}
+
+static uint32_t
+get_irep_record_size(mrb_state *mrb, int irep_no, int type)
+{
+ uint32_t size = 0;
+ mrb_irep *irep = mrb->irep[irep_no];
+
+ size += DUMP_SIZE(MRB_DUMP_SIZE_OF_LONG, type); /* rlen */
+ size += get_irep_header_size(mrb, irep, type);
+ size += get_iseq_block_size(mrb, irep, type);
+ size += get_pool_block_size(mrb, irep, type);
+ size += get_syms_block_size(mrb, irep, type);
+
+ return size;
+}
+
+static int
+write_irep_header(mrb_state *mrb, mrb_irep *irep, char *buf, int type)
+{
+ char *buf_top = buf;
+
+ *buf++ = RITE_IREP_IDENFIFIER; /* record identifier */
+ *buf++ = RITE_IREP_TYPE_CLASS; /* class or module */
+ buf += uint16_dump((uint16_t)irep->nlocals, buf, type); /* number of local variable */
+ buf += uint16_dump((uint16_t)irep->nregs, buf, type); /* number of register variable */
+ buf += uint16_dump(DUMP_SIZE(MRB_DUMP_SIZE_OF_SHORT, type)/* crc */, buf, type); /* offset of isec block */
+
+ return (int)(buf - buf_top);
+}
+
+static int
+write_iseq_block(mrb_state *mrb, mrb_irep *irep, char *buf, int type)
+{
+ char *buf_top = buf;
+ int iseq_no;
+
+ buf += uint32_dump((uint32_t)irep->ilen, buf, type); /* number of opcode */
+
+ for (iseq_no = 0; iseq_no < irep->ilen; iseq_no++) {
+ buf += uint32_dump((uint32_t)irep->iseq[iseq_no], buf, type); /* opcode */
+ }
+
+ return (int)(buf - buf_top);
+}
+
+static int
+write_pool_block(mrb_state *mrb, mrb_irep *irep, char *buf, int type)
+{
+ int pool_no;
+ mrb_value str;
+ char *buf_top = buf;
+ char *char_buf;
+ uint16_t buf_size =0;
+
+ buf_size = MRB_DUMP_DEFAULT_STR_LEN;
+ if ((char_buf = mrb_malloc(mrb, buf_size)) == 0)
+ goto error_exit;
+
+ buf += uint32_dump((uint32_t)irep->plen, buf, type); /* number of pool */
+
+ for (pool_no = 0; pool_no < irep->plen; pool_no++) {
+ uint16_t nlen =0;
+
+ buf += uint8_dump(irep->pool[pool_no].tt, buf, type); /* data type */
+ memset(char_buf, 0, buf_size);
+
+ switch (irep->pool[pool_no].tt) {
+ case MRB_TT_FIXNUM:
+ sprintf(char_buf, "%d", irep->pool[pool_no].value.i);
+ break;
+
+ case MRB_TT_FLOAT:
+ sprintf(char_buf, "%.16e", irep->pool[pool_no].value.f);
+ break;
+
+ case MRB_TT_STRING:
+ str = mrb_string_value( mrb, &irep->pool[pool_no]);
+ nlen = str_dump_len(RSTRING_PTR(str), RSTRING_LEN(str), type);
+ if ( nlen > buf_size - 1) {
+ buf_size = nlen + 1;
+ if ((char_buf = mrb_realloc(mrb, char_buf, buf_size)) == 0)
+ goto error_exit;
+ memset(char_buf, 0, buf_size);
+ }
+ str_dump(RSTRING_PTR(str), char_buf, RSTRING_LEN(str), type);
+ break;
+
+#ifdef INCLUDE_REGEXP
+ case MRB_TT_REGEX:
+ str = mrb_reg_to_s(mrb, irep->pool[pool_no]);
+ nlen = str_dump_len(RSTRING_PTR(str), RSTRING_LEN(str), type);
+ if ( nlen > buf_size - 1) {
+ buf_size = nlen + 1;
+ if ((char_buf = mrb_realloc(mrb, char_buf, buf_size)) == 0)
+ goto error_exit;
+ memset(char_buf, 0, buf_size);
+ }
+ str_dump(RSTRING_PTR(str), char_buf, RSTRING_LEN(str), type);
+ break;
+#endif
+
+ default:
+ buf += uint16_dump(0, buf, type); /* data length = 0 */
+ continue;
+ }
+
+ buf += uint16_dump((uint16_t)strlen(char_buf), buf, type); /* data length */
+
+ memcpy(buf, char_buf, strlen(char_buf));
+ buf += strlen(char_buf);
+ }
+
+error_exit:
+ if (char_buf)
+ mrb_free(mrb, char_buf);
+ return (int)(buf - buf_top);
+}
+
+static int
+write_syms_block(mrb_state *mrb, mrb_irep *irep, char *buf, int type)
+{
+ int sym_no;
+ char *buf_top = buf;
+ char *char_buf;
+ uint16_t buf_size =0;
+
+ buf_size = MRB_DUMP_DEFAULT_STR_LEN;
+ if ((char_buf = mrb_malloc(mrb, buf_size)) == 0)
+ goto error_exit;
+
+ buf += uint32_dump((uint32_t)irep->slen, buf, type); /* number of symbol */
+
+ for (sym_no = 0; sym_no < irep->slen; sym_no++) {
+ const char * name;
+ uint16_t nlen =0;
+
+ if (irep->syms[sym_no] != 0) {
+ name = mrb_sym2name(mrb, irep->syms[sym_no]);
+ nlen = str_dump_len((char*)name, strlen(name), type);
+ if ( nlen > buf_size - 1) {
+ buf_size = nlen + 1;
+ if ((char_buf = mrb_realloc(mrb, char_buf, buf_size)) == 0)
+ goto error_exit;
+ }
+ memset(char_buf, 0, buf_size);
+ str_dump((char*)name, char_buf, strlen(name), type);
+
+ buf += uint16_dump(nlen, buf, type); /* length of symbol name */
+ memcpy(buf, char_buf, nlen); /* symbol name */
+ buf += nlen;
+ }
+ else {
+ buf += uint16_dump(MRB_DUMP_NULL_SYM_LEN, buf, type); /* length of symbol name */
+ }
+ }
+
+error_exit:
+ if (char_buf)
+ mrb_free(mrb, char_buf);
+ return (int)(buf - buf_top);
+}
+
+static int
+calc_crc_section(mrb_state *mrb, mrb_irep *irep, uint16_t *crc, int section)
+{
+ char *buf, *buf_top;
+ uint32_t buf_size;
+ int type = DUMP_TYPE_BIN;
+
+ switch (section) {
+ case DUMP_IREP_HEADER: buf_size = get_irep_header_size(mrb, irep, type); break;
+ case DUMP_ISEQ_BLOCK: buf_size = get_iseq_block_size(mrb, irep, type); break;
+ case DUMP_POOL_BLOCK: buf_size = get_pool_block_size(mrb, irep, type); break;
+ case DUMP_SYMS_BLOCK: buf_size = get_syms_block_size(mrb, irep, type); break;
+ default: return MRB_DUMP_GENERAL_FAILURE;
+ }
+
+ if ((buf = mrb_malloc(mrb, buf_size)) == 0)
+ return MRB_DUMP_GENERAL_FAILURE;
+
+ buf_top = buf;
+ memset(buf, 0, buf_size);
+
+ switch (section) {
+ case DUMP_IREP_HEADER: buf += write_irep_header(mrb, irep, buf, type); break;
+ case DUMP_ISEQ_BLOCK: buf += write_iseq_block(mrb, irep, buf, type); break;
+ case DUMP_POOL_BLOCK: buf += write_pool_block(mrb, irep, buf, type); break;
+ case DUMP_SYMS_BLOCK: buf += write_syms_block(mrb, irep, buf, type); break;
+ default: break;
+ }
+
+ *crc = calc_crc_16_ccitt((unsigned char *)buf_top, (int)(buf - buf_top));
+
+ mrb_free(mrb, buf_top);
+
+ return MRB_DUMP_OK;
+}
+
+static int
+write_rite_header(mrb_state *mrb, int top, char* bin, uint32_t rbds)
+{
+ rite_binary_header *binary_header;
+ uint16_t crc;
+ int type = DUMP_TYPE_BIN;
+
+ binary_header = (rite_binary_header *)bin;
+
+ memcpy( binary_header, def_rite_binary_header, sizeof(*binary_header));
+
+ uint32_dump(rbds, (char *)binary_header->rbds, type);
+ uint16_dump((uint16_t)mrb->irep_len, (char *)binary_header->nirep, type);
+ uint16_dump((uint16_t)top, (char *)binary_header->sirep, type);
+
+ crc = calc_crc_16_ccitt((unsigned char *)binary_header, sizeof(*binary_header));
+ bin += sizeof(*binary_header);
+ uint16_dump(crc, bin, type);
+
+ return MRB_DUMP_OK;
+}
+
+static int
+dump_rite_header(mrb_state *mrb, int top, FILE* fp, uint32_t rbds)
+{
+ rite_binary_header binary_header;
+ rite_file_header file_header;
+ uint16_t crc;
+ int type;
+
+ if (fseek(fp, 0, SEEK_SET) != 0)
+ return MRB_DUMP_GENERAL_FAILURE;
+
+ /* calc crc */
+ memcpy( &binary_header, def_rite_binary_header, sizeof(binary_header));
+
+ type = DUMP_TYPE_BIN;
+ uint32_dump(rbds, (char *)&binary_header.rbds, type);
+ uint16_dump((uint16_t)mrb->irep_len, (char *)&binary_header.nirep, type);
+ uint16_dump((uint16_t)top, (char *)&binary_header.sirep, type);
+
+ crc = calc_crc_16_ccitt((unsigned char *)&binary_header, sizeof(binary_header));
+
+ /* dump rbc header */
+ memcpy( &file_header, def_rite_file_header, sizeof(file_header));
+
+ type = DUMP_TYPE_HEX;
+ uint32_dump(rbds, (char *)&file_header.rbds, type);
+ uint16_dump((uint16_t)mrb->irep_len, (char *)&file_header.nirep, type);
+ uint16_dump((uint16_t)top, (char *)&file_header.sirep, type);
+ uint16_dump(crc, (char *)&file_header.hcrc, type);
+
+ if (fwrite(&file_header, sizeof(file_header), 1, fp) != 1)
+ return MRB_DUMP_WRITE_FAULT;
+
+ return MRB_DUMP_OK;
+}
+
+static int
+write_irep_record(mrb_state *mrb, int irep_no, char* bin, uint32_t *rlen, int type)
+{
+ uint32_t irep_record_size;
+ mrb_irep *irep = mrb->irep[irep_no];
+ int section;
+
+ if (irep == 0)
+ return MRB_DUMP_INVALID_IREP;
+
+ /* buf alloc */
+ irep_record_size = get_irep_record_size(mrb, irep_no, type);
+ if (irep_record_size == 0)
+ return MRB_DUMP_GENERAL_FAILURE;
+
+ memset( bin, 0, irep_record_size);
+
+ /* rlen */
+ *rlen = irep_record_size - DUMP_SIZE(MRB_DUMP_SIZE_OF_LONG, type);
+
+ bin += uint32_dump(*rlen, bin, type);
+
+ for (section = 0; section < DUMP_SECTION_NUM; section++) {
+ int rc;
+ uint16_t crc;
+
+ switch (section) {
+ case DUMP_IREP_HEADER: bin += write_irep_header(mrb, irep, bin, type); break;
+ case DUMP_ISEQ_BLOCK: bin += write_iseq_block(mrb, irep, bin, type); break;
+ case DUMP_POOL_BLOCK: bin += write_pool_block(mrb, irep, bin, type); break;
+ case DUMP_SYMS_BLOCK: bin += write_syms_block(mrb, irep, bin, type); break;
+ default: break;
+ }
+
+ if ((rc = calc_crc_section(mrb, irep, &crc, section)) != 0)
+ return rc;
+
+ bin += uint16_dump(crc, bin, type); /* crc */
+ }
+
+ return MRB_DUMP_OK;
+}
+
+static int
+dump_irep_record(mrb_state *mrb, int irep_no, FILE* fp, uint32_t *rlen)
+{
+ int rc = MRB_DUMP_OK;
+ uint32_t irep_record_size;
+ char *buf;
+ mrb_irep *irep = mrb->irep[irep_no];
+
+ if (irep == 0)
+ return MRB_DUMP_INVALID_IREP;
+
+ /* buf alloc */
+ irep_record_size = get_irep_record_size(mrb, irep_no, DUMP_TYPE_HEX);
+ if (irep_record_size == 0)
+ return MRB_DUMP_GENERAL_FAILURE;
+
+ if ((buf = mrb_malloc(mrb, irep_record_size)) == 0)
+ return MRB_DUMP_GENERAL_FAILURE;
+
+ memset( buf, 0, irep_record_size);
+
+ if ((rc = write_irep_record(mrb, irep_no, buf, rlen, DUMP_TYPE_HEX)) != MRB_DUMP_OK)
+ goto error_exit;
+
+
+ if (fwrite(buf, irep_record_size, 1, fp) != 1)
+ rc = MRB_DUMP_WRITE_FAULT;
+
+error_exit:
+ mrb_free(mrb, buf);
+
+ return rc;
+}
+
+static int
+mrb_write_irep(mrb_state *mrb, int top, char *bin)
+{
+ int rc;
+ uint32_t rlen=0; /* size of irep record */
+ int irep_no;
+ char *bin_top;
+
+ if (mrb == 0 || top < 0 || top >= mrb->irep_len || bin == 0)
+ return MRB_DUMP_INVALID_ARGUMENT;
+
+ bin_top = bin;
+ bin += sizeof(rite_binary_header) + MRB_DUMP_SIZE_OF_SHORT/* crc */;
+
+ for (irep_no=top; irep_no<mrb->irep_len; irep_no++) {
+ if ((rc = write_irep_record(mrb, irep_no, bin, &rlen, DUMP_TYPE_BIN)) != 0)
+ return rc;
+
+ bin += (rlen + DUMP_SIZE(MRB_DUMP_SIZE_OF_LONG, DUMP_TYPE_BIN));
+ }
+
+ bin += uint32_dump(0, bin, DUMP_TYPE_BIN); /* end of file */
+
+ rc = write_rite_header(mrb, top, bin_top, (bin - bin_top)); //TODO: Remove top(SIREP)
+
+ return rc;
+}
+
+int
+mrb_dump_irep(mrb_state *mrb, int top, FILE* fp)
+{
+ int rc;
+ uint32_t rbds=0; /* size of Rite Binary Data */
+ uint32_t rlen=0; /* size of irep record */
+ int irep_no;
+
+ if (mrb == 0 || top < 0 || top >= mrb->irep_len || fp == 0)
+ return MRB_DUMP_INVALID_ARGUMENT;
+
+ if (fwrite(&def_rite_file_header, sizeof(rite_file_header), 1, fp) != 1) /* dummy write */
+ return MRB_DUMP_WRITE_FAULT;
+
+ for (irep_no=top; irep_no<mrb->irep_len; irep_no++) {
+ if ((rc = dump_irep_record(mrb, irep_no, fp, &rlen)) != 0)
+ return rc;
+
+ rbds += rlen;
+ }
+
+ if (fwrite("00000000"/* end of file */, 8, 1, fp) != 1)
+ return MRB_DUMP_WRITE_FAULT;
+
+ rc = dump_rite_header(mrb, top, fp, rbds); //TODO: Remove top(SIREP)
+
+ return rc;
+}
+
+int
+mrb_bdump_irep(mrb_state *mrb, int n, FILE *f,const char *initname)
+{
+ int rc;
+ int irep_no;
+ char *buf;
+ int buf_size = 0;
+ int buf_idx = 0;
+
+ if (mrb == 0 || n < 0 || n >= mrb->irep_len || f == 0 || initname == 0)
+ return -1;
+
+ buf_size = sizeof(rite_binary_header) + MRB_DUMP_SIZE_OF_SHORT/* crc */;
+ for (irep_no=n; irep_no<mrb->irep_len; irep_no++)
+ buf_size += get_irep_record_size(mrb, irep_no, DUMP_TYPE_BIN);
+ buf_size += MRB_DUMP_SIZE_OF_LONG; /* end of file */
+
+ if ((buf = mrb_malloc(mrb, buf_size)) == 0)
+ return MRB_DUMP_GENERAL_FAILURE;
+
+ rc = mrb_write_irep(mrb, n, buf);
+
+ if (rc == MRB_DUMP_OK) {
+ fprintf(f, "const char %s[] = {", initname);
+ while (buf_idx < buf_size ) {
+ if (buf_idx % 16 == 0 ) fputs("\n", f);
+ fprintf(f, "0x%02x,", (unsigned char)buf[buf_idx++]);
+ }
+ fputs("\n};\n", f);
+ }
+
+ mrb_free(mrb, buf);
+
+ return rc;
+}
diff --git a/src/dump.h b/src/dump.h
new file mode 100644
index 000000000..410dfcd71
--- /dev/null
+++ b/src/dump.h
@@ -0,0 +1,118 @@
+#include "mruby.h"
+#include <stdio.h>
+#include <stdint.h>
+
+int mrb_dump_irep(mrb_state*,int,FILE*);
+int mrb_load_irep(mrb_state*,FILE*);
+int mrb_load_irep_offset(mrb_state*,FILE*,long);
+int mrb_read_irep(mrb_state*,char*);
+
+int mrb_bdump_irep(mrb_state *mrb, int n, FILE *f,const char *initname);
+
+/* dump type */
+#define DUMP_TYPE_CODE 0
+#define DUMP_TYPE_BIN 1
+#define DUMP_TYPE_HEX 2
+
+/* dump/load error code */
+#define MRB_DUMP_OK 0
+#define MRB_DUMP_GENERAL_FAILURE -1
+#define MRB_DUMP_WRITE_FAULT -2
+#define MRB_DUMP_READ_FAULT -3
+#define MRB_DUMP_CRC_ERROR -4
+#define MRB_DUMP_INVALID_FILE_HEADER -5
+#define MRB_DUMP_INVALID_IREP -6
+#define MRB_DUMP_INVALID_ARGUMENT -7
+
+/* size of long/int/short value on dump/load */
+#define MRB_DUMP_SIZE_OF_LONG 4
+#define MRB_DUMP_SIZE_OF_INT 4
+#define MRB_DUMP_SIZE_OF_SHORT 2
+
+/* null symbol length */
+#define MRB_DUMP_NULL_SYM_LEN 0xFFFF
+
+/* Use HEX format string */
+#define RITE_FILE_IS_HEX
+
+#ifdef RITE_FILE_IS_HEX
+#define RITE_FILE_HEX_SIZE 2
+#else
+#define RITE_FILE_HEX_SIZE 1
+#endif
+
+/* Rite Binary File header */
+#define RITE_FILE_IDENFIFIER "RITE"
+#define RITE_FILE_FORMAT_VER "00090000"
+#define RITE_VM_VER "00090000"
+#define RITE_COMPILER_TYPE "MATZ "
+#define RITE_COMPILER_VER "00090000"
+#define RITE_RESERVED " "
+
+/* irep header */
+#define RITE_IREP_IDENFIFIER 'S'
+#define RITE_IREP_TYPE_CLASS 'C'
+#define RITE_IREP_TYPE_MODULE 'M'
+
+#define MRB_DUMP_DEFAULT_STR_LEN 128
+
+//Rite Binary file_header
+typedef struct _rite_binary_header {
+ unsigned char rbfi[4]; //Rite Binary File Identify
+ unsigned char rbfv[8]; //Rite Binary File Format Version
+ unsigned char risv[8]; //Rite Instruction Specification Version
+ unsigned char rct[8]; //Rite Compiler Type
+ unsigned char rcv[8]; //Rite Compiler Version
+ unsigned char rbds[4]; //Rite Binary Data Size
+ unsigned char nirep[2]; //Number of ireps
+ unsigned char sirep[2]; //Start index
+ unsigned char rsv[8]; //Reserved
+} rite_binary_header;
+
+// Rite File file_header
+typedef struct _rite_file_header {
+ unsigned char rbfi[4]; //Rite Binary File Identify
+ unsigned char rbfv[8]; //Rite Binary File Format Version
+ unsigned char risv[8]; //Rite Instruction Specification Version
+ unsigned char rct[8]; //Rite Compiler Type
+ unsigned char rcv[8]; //Rite Compiler Version
+ unsigned char rbds[8]; //Rite Binary Data Size
+ unsigned char nirep[4]; //Number of ireps
+ unsigned char sirep[4]; //Start index
+ unsigned char rsv[8]; //Reserved
+ unsigned char hcrc[4]; //HCRC
+} rite_file_header;
+
+static inline int
+uint16_to_bin(uint16_t s, char *bin)
+{
+ *bin++ = (s >> 8) & 0xff;
+ *bin = s & 0xff;
+ return (MRB_DUMP_SIZE_OF_SHORT);
+}
+
+static inline int
+uint32_to_bin(uint32_t l, char *bin)
+{
+ *bin++ = (l >> 24) & 0xff;
+ *bin++ = (l >> 16) & 0xff;
+ *bin++ = (l >> 8) & 0xff;
+ *bin = l & 0xff;
+ return (MRB_DUMP_SIZE_OF_LONG);
+}
+
+static inline uint32_t
+bin_to_uint32(unsigned char bin[])
+{
+ return (uint32_t)bin[0] << 24 |
+ (uint32_t)bin[1] << 16 |
+ (uint32_t)bin[2] << 8 |
+ (uint32_t)bin[3];
+}
+
+static inline uint16_t
+bin_to_uint16(unsigned char bin[])
+{
+ return (uint16_t)bin[0] << 8 |
+ (uint16_t)bin[1];
+}
diff --git a/src/encoding.c b/src/encoding.c
new file mode 100644
index 000000000..db9a36425
--- /dev/null
+++ b/src/encoding.c
@@ -0,0 +1,1663 @@
+/**********************************************************************
+
+ encoding.c -
+
+ $Author: naruse $
+ created at: Thu May 24 17:23:27 JST 2007
+
+ Copyright (C) 2007 Yukihiro Matsumoto
+
+**********************************************************************/
+
+#include "mruby.h"
+#ifdef INCLUDE_ENCODING
+#include <ctype.h>
+#ifndef NO_LOCALE_CHARMAP
+#ifdef __CYGWIN__
+#include <windows.h>
+#endif
+#ifdef HAVE_LANGINFO_H
+#include <langinfo.h>
+#endif
+#endif
+
+#define USE_UPPER_CASE_TABLE
+
+#include <ctype.h>
+#include <stdio.h>
+#include "regenc.h"
+#include "regint.h"
+#include "encoding.h"
+#include "st.h"
+#include <string.h>
+#include "mruby/numeric.h"
+#include "mruby/string.h"
+#include "mruby/array.h"
+#include "variable.h"
+#include "mruby/hash.h"
+
+#define pprintf printf
+#define mrb_warning printf
+#define mrb_bug printf
+#ifndef INT_MAX
+#define INT_MAX 2147483647
+#endif
+#define mrb_isascii(c) ((unsigned long)(c) < 128)
+#define OBJ_FREEZE(a)
+static mrb_sym id_encoding;
+//mrb_value mrb_cEncoding;
+static mrb_value mrb_encoding_list;
+
+struct mrb_encoding_entry {
+ const char *name;
+ mrb_encoding *enc;
+ mrb_encoding *base;
+};
+
+static struct {
+ struct mrb_encoding_entry *list;
+ int count;
+ int size;
+ st_table *names;
+} enc_table;
+
+void mrb_enc_init(mrb_state *mrb);
+
+enum {
+ ENCINDEX_ASCII,
+ ENCINDEX_UTF_8,
+ ENCINDEX_US_ASCII,
+ ENCINDEX_BUILTIN_MAX
+};
+#define ENCODING_COUNT ENCINDEX_BUILTIN_MAX
+#define ENCODING_NAMELEN_MAX 63
+#define valid_encoding_name_p(name) ((name) && strlen(name) <= ENCODING_NAMELEN_MAX)
+#define STRCASECMP(s1, s2) (st_strcasecmp(s1, s2))
+
+//#define BUILTIN_TYPE(x) (int)(((struct RBasic*)(x))->flags & T_MASK)
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+#ifndef OTHER
+#define OTHER 2
+#endif
+
+#define mrb_usascii_str_new2 mrb_usascii_str_new_cstr
+
+static size_t
+enc_memsize(mrb_state *mrb, const void *p)
+{
+ return 0;
+}
+
+static const struct mrb_data_type encoding_data_type = {
+ "encoding", 0,
+};
+#define is_data_encoding(obj) (DATA_TYPE(obj) == &encoding_data_type)
+
+// RUBY_IMMEDIATE_MASK = 0x03,
+//#define IMMEDIATE_MASK RUBY_IMMEDIATE_MASK
+//#define IMMEDIATE_P(x) ((VALUE)(x) & IMMEDIATE_MASK)
+//#define SPECIAL_CONST_P(x) (IMMEDIATE_P(x) || !RTEST(x))
+
+static mrb_value
+enc_new(mrb_state *mrb, mrb_encoding *encoding)
+{
+ return mrb_obj_value(Data_Wrap_Struct(mrb, mrb->encode_class, &encoding_data_type, encoding));
+}
+
+#define enc_autoload_p(enc) (!mrb_enc_mbmaxlen(enc))
+
+#define UNSPECIFIED_ENCODING INT_MAX
+
+
+static mrb_value
+mrb_enc_from_encoding_index(mrb_state *mrb, int idx)
+{
+ mrb_value list, enc;
+
+ if (mrb_nil_p(list = mrb_encoding_list)) {
+ mrb_bug("mrb_enc_from_encoding_index(%d): no mrb_encoding_list", idx);
+ }
+ enc = mrb_ary_ref(mrb, list, idx);//mrb_ary_entry(list, idx);
+ if (mrb_nil_p(enc)) {
+ mrb_bug("mrb_enc_from_encoding_index(%d): not created yet", idx);
+ }
+ return enc;
+}
+
+mrb_value
+mrb_enc_from_encoding(mrb_state *mrb, mrb_encoding *encoding)
+{
+ int idx;
+ if (!encoding) return mrb_nil_value();
+ idx = ENC_TO_ENCINDEX(encoding);
+ return mrb_enc_from_encoding_index(mrb, idx);
+}
+
+static int enc_autoload(mrb_state *mrb, mrb_encoding *enc);
+static int
+check_encoding(mrb_state *mrb, mrb_encoding *enc)
+{
+ int index = mrb_enc_to_index(enc);
+ if (mrb_enc_from_index(mrb, index) != enc)
+ return -1;
+ if (enc_autoload_p(enc)) {
+ index = enc_autoload(mrb, enc);
+ }
+ return index;
+}
+
+static int
+enc_check_encoding(mrb_state *mrb, mrb_value obj)
+{
+ if (SPECIAL_CONST_P(obj) || !is_data_encoding(obj)) {
+ return -1;
+ }
+ return check_encoding(mrb, RDATA(obj)->data);
+}
+
+static int
+must_encoding(mrb_state *mrb, mrb_value enc)
+{
+ int index = enc_check_encoding(mrb, enc);
+ if (index < 0) {
+ mrb_raise(mrb, E_TYPE_ERROR, "wrong argument type %s (expected Encoding)",
+ mrb_obj_classname(mrb, enc));
+ }
+ return index;
+}
+
+int
+mrb_to_encoding_index(mrb_state *mrb, mrb_value enc)
+{
+ int idx;
+
+ idx = enc_check_encoding(mrb, enc);
+ if (idx >= 0) {
+ return idx;
+ }
+ else if (mrb_nil_p(enc = mrb_check_string_type(mrb, enc))) {
+ return -1;
+ }
+ if (!mrb_enc_asciicompat(mrb, mrb_enc_get(mrb, enc))) {
+ return -1;
+ }
+ //return mrb_enc_find_index(StringValueCStr(enc));
+ return mrb_enc_find_index(mrb, mrb_string_value_cstr(mrb, &enc));
+
+}
+
+static mrb_encoding *
+to_encoding(mrb_state *mrb, mrb_value enc)
+{
+ int idx;
+
+ //StringValue(enc);
+ mrb_string_value(mrb, &enc);
+
+ if (!mrb_enc_asciicompat(mrb, mrb_enc_get(mrb, enc))) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid name encoding (non ASCII)");
+ }
+ //idx = mrb_enc_find_index(StringValueCStr(enc));
+ idx = mrb_enc_find_index(mrb, mrb_string_value_cstr(mrb, &enc));
+ if (idx < 0) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "unknown encoding name - %s", RSTRING_PTR(enc));
+ }
+ return mrb_enc_from_index(mrb, idx);
+}
+
+mrb_encoding *
+mrb_to_encoding(mrb_state *mrb, mrb_value enc)
+{
+ if (enc_check_encoding(mrb, enc) >= 0) return RDATA(enc)->data;
+ return to_encoding(mrb, enc);
+}
+
+static int
+enc_table_expand(int newsize)
+{
+ struct mrb_encoding_entry *ent;
+ int count = newsize;
+
+ if (enc_table.size >= newsize) return newsize;
+ newsize = (newsize + 7) / 8 * 8;
+ ent = realloc(enc_table.list, sizeof(*enc_table.list) * newsize);
+ if (!ent) return -1;
+ memset(ent + enc_table.size, 0, sizeof(*ent)*(newsize - enc_table.size));
+ enc_table.list = ent;
+ enc_table.size = newsize;
+ return count;
+}
+
+static int
+enc_register_at(mrb_state *mrb, int index, const char *name, mrb_encoding *encoding)
+{
+ struct mrb_encoding_entry *ent = &enc_table.list[index];
+ mrb_value list;
+ mrb_value ref_ary;
+
+ if (!valid_encoding_name_p(name)) return -1;
+ if (!ent->name) {
+ ent->name = name = strdup(name);
+ }
+ else if (STRCASECMP(name, ent->name)) {
+ return -1;
+ }
+ if (!ent->enc) {
+ ent->enc = xmalloc(sizeof(mrb_encoding));
+ }
+ if (encoding) {
+ *ent->enc = *encoding;
+ }
+ else {
+ memset(ent->enc, 0, sizeof(*ent->enc));
+ }
+ encoding = ent->enc;
+ encoding->name = name;
+ encoding->ruby_encoding_index = index;
+ st_insert(enc_table.names, (st_data_t)name, (st_data_t)index);
+ list = mrb_encoding_list;
+ //if (list && mrb_nil_p((mrb_ary_ref(mrb, list, index)))) {
+ if (list.tt) {
+ ref_ary = mrb_ary_ref(mrb, list, index);
+ if mrb_nil_p(ref_ary) {
+ /* initialize encoding data */
+ mrb_ary_set(mrb, list, index, enc_new(mrb, encoding));//rb_ary_store(list, index, enc_new(encoding));
+ }
+ }
+ return index;
+}
+
+
+static int
+enc_register(mrb_state *mrb, const char *name, mrb_encoding *encoding)
+{
+ int index = enc_table.count;
+
+ if ((index = enc_table_expand(index + 1)) < 0) return -1;
+ enc_table.count = index;
+ return enc_register_at(mrb, index - 1, name, encoding);
+}
+
+static void set_encoding_const(mrb_state *mrb, const char *, mrb_encoding *);
+int mrb_enc_registered(const char *name);
+
+static void
+enc_check_duplication(mrb_state *mrb, const char *name)
+{
+ if (mrb_enc_registered(name) >= 0) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "encoding %s is already registered", name);
+ }
+}
+static mrb_encoding*
+set_base_encoding(int index, mrb_encoding *base)
+{
+ mrb_encoding *enc = enc_table.list[index].enc;
+
+ enc_table.list[index].base = base;
+ if (mrb_enc_dummy_p(base)) ENC_SET_DUMMY(enc);
+ return enc;
+}
+
+int
+mrb_enc_replicate(mrb_state *mrb, const char *name, mrb_encoding *encoding)
+{
+ int idx;
+
+ enc_check_duplication(mrb, name);
+ idx = enc_register(mrb, name, encoding);
+ set_base_encoding(idx, encoding);
+ set_encoding_const(mrb, name, mrb_enc_from_index(mrb, idx));
+ return idx;
+}
+
+/* 15.2.40.2.17 */
+/*
+ * call-seq:
+ * enc.replicate(name) -> encoding
+ *
+ * Returns a replicated encoding of _enc_ whose name is _name_.
+ * The new encoding should have the same byte structure of _enc_.
+ * If _name_ is used by another encoding, raise ArgumentError.
+ *
+ */
+static mrb_value
+enc_replicate(mrb_state *mrb, mrb_value encoding)
+{
+ mrb_value name;
+ mrb_get_args(mrb, "o", &name);
+ return mrb_enc_from_encoding_index(mrb,
+ //mrb_enc_replicate(mrb, StringValueCStr(name),
+ mrb_enc_replicate(mrb, mrb_string_value_cstr(mrb, &name),
+ mrb_to_encoding(mrb, encoding)));
+}
+static int
+enc_replicate_with_index(mrb_state *mrb, const char *name, mrb_encoding *origenc, int idx)
+{
+ if (idx < 0) {
+ idx = enc_register(mrb, name, origenc);
+ }
+ else {
+ idx = enc_register_at(mrb, idx, name, origenc);
+ }
+ if (idx >= 0) {
+ set_base_encoding(idx, origenc);
+ set_encoding_const(mrb, name, mrb_enc_from_index(mrb, idx));
+ }
+ return idx;
+}
+int
+mrb_encdb_replicate(mrb_state *mrb, const char *name, const char *orig)
+{
+ int origidx = mrb_enc_registered(orig);
+ int idx = mrb_enc_registered(name);
+
+ if (origidx < 0) {
+ origidx = enc_register(mrb, orig, 0);
+ }
+ return enc_replicate_with_index(mrb, name, mrb_enc_from_index(mrb, origidx), idx);
+}
+int
+mrb_define_dummy_encoding(mrb_state *mrb, const char *name)
+{
+ int index = mrb_enc_replicate(mrb, name, mrb_ascii8bit_encoding(mrb));
+ mrb_encoding *enc = enc_table.list[index].enc;
+
+ ENC_SET_DUMMY(enc);
+ return index;
+}
+
+int
+mrb_encdb_dummy(mrb_state *mrb, const char *name)
+{
+ int index = enc_replicate_with_index(mrb, name, mrb_ascii8bit_encoding(mrb),
+ mrb_enc_registered(name));
+ mrb_encoding *enc = enc_table.list[index].enc;
+
+ ENC_SET_DUMMY(enc);
+ return index;
+}
+
+/* 15.2.40.2.13 */
+/*
+ * call-seq:
+ * enc.dummy? -> true or false
+ *
+ * Returns true for dummy encodings.
+ * A dummy encoding is an encoding for which character handling is not properly
+ * implemented.
+ * It is used for stateful encodings.
+ *
+ * Encoding::ISO_2022_JP.dummy? #=> true
+ * Encoding::UTF_8.dummy? #=> false
+ *
+ */
+static mrb_value
+enc_dummy_p(mrb_state *mrb, mrb_value enc)
+{
+ return ENC_DUMMY_P(enc_table.list[must_encoding(mrb, enc)].enc) ? mrb_true_value() : mrb_false_value();
+}
+
+/* 15.2.40.2.12 */
+/*
+ * call-seq:
+ * enc.ascii_compatible? -> true or false
+ *
+ * Returns whether ASCII-compatible or not.
+ *
+ * Encoding::UTF_8.ascii_compatible? #=> true
+ * Encoding::UTF_16BE.ascii_compatible? #=> false
+ *
+ */
+static mrb_value
+enc_ascii_compatible_p(mrb_state *mrb, mrb_value enc)
+{
+ return mrb_enc_asciicompat(mrb, enc_table.list[must_encoding(mrb, enc)].enc) ? mrb_true_value() : mrb_false_value();
+}
+
+static const char *
+enc_alias_internal(const char *alias, int idx)
+{
+ alias = strdup(alias);
+ st_insert(enc_table.names, (st_data_t)alias, (st_data_t)idx);
+ return alias;
+}
+
+/*
+ * Returns 1 when the encoding is Unicode series other than UTF-7 else 0.
+ */
+int
+mrb_enc_unicode_p(mrb_encoding *enc)
+{
+ const char *name = mrb_enc_name(enc);
+ return name[0] == 'U' && name[1] == 'T' && name[2] == 'F' && name[4] != '7';
+}
+
+extern mrb_encoding OnigEncodingUTF_8;
+extern mrb_encoding OnigEncodingUS_ASCII;
+
+void
+mrb_enc_init(mrb_state *mrb)
+{
+ enc_table_expand(ENCODING_COUNT + 1);
+ if (!enc_table.names) {
+ enc_table.names = st_init_strcasetable();
+ }
+#define ENC_REGISTER(enc) enc_register_at(mrb, ENCINDEX_##enc, mrb_enc_name(&OnigEncoding##enc), &OnigEncoding##enc)
+ ENC_REGISTER(ASCII);
+ ENC_REGISTER(UTF_8);
+ ENC_REGISTER(US_ASCII);
+#undef ENC_REGISTER
+ enc_table.count = ENCINDEX_BUILTIN_MAX;
+}
+
+mrb_encoding *
+mrb_enc_from_index(mrb_state *mrb, int index)
+{
+ if (!enc_table.list) {
+ mrb_enc_init(mrb);
+ }
+ if (index < 0 || enc_table.count <= index) {
+ return 0;
+ }
+ return enc_table.list[index].enc;
+}
+
+int
+mrb_enc_registered(const char *name)
+{
+ st_data_t idx = 0;
+
+ if (!name) return -1;
+ if (!enc_table.list) return -1;
+ if (st_lookup(enc_table.names, (st_data_t)name, &idx)) {
+ return (int)idx;
+ }
+ return -1;
+}
+
+mrb_value
+mrb_require_safe(mrb_value fname, int safe)
+{
+ mrb_value result = mrb_nil_value();
+ return result;
+}
+static int
+load_encoding(const char *name)
+{
+ mrb_value enclib;// = mrb_sprintf("enc/%s.so", name);
+ //mrb_value verbose;// = ruby_verbose;
+ //mrb_value debug;// = ruby_debug;
+ //mrb_value loaded;
+ char *s = RSTRING_PTR(enclib) + 4, *e = RSTRING_END(enclib) - 3;
+ int idx;
+
+ while (s < e) {
+ if (!ISALNUM(*s)) *s = '_';
+ else if (ISUPPER(*s)) *s = TOLOWER(*s);
+ ++s;
+ }
+ OBJ_FREEZE(enclib);
+ //ruby_verbose = mrb_false_value();
+ //ruby_debug = mrb_false_value();
+ //loaded = mrb_protect(require_enc, enclib, 0);
+ //ruby_verbose = verbose;
+ //ruby_debug = debug;
+ //rb_set_errinfo(mrb_nil_value());
+ //if (mrb_nil_p(loaded)) return -1;
+ if ((idx = mrb_enc_registered(name)) < 0) return -1;
+ if (enc_autoload_p(enc_table.list[idx].enc)) return -1;
+ return idx;
+}
+
+static int
+enc_autoload(mrb_state *mrb, mrb_encoding *enc)
+{
+ int i;
+ mrb_encoding *base = enc_table.list[ENC_TO_ENCINDEX(enc)].base;
+
+ if (base) {
+ i = 0;
+ do {
+ if (i >= enc_table.count) return -1;
+ } while (enc_table.list[i].enc != base && (++i, 1));
+ if (enc_autoload_p(base)) {
+ if (enc_autoload(mrb, base) < 0) return -1;
+ }
+ i = ENC_TO_ENCINDEX(enc);
+ enc_register_at(mrb, i, mrb_enc_name(enc), base);
+ }
+ else {
+ i = load_encoding(mrb_enc_name(enc));
+ }
+ return i;
+}
+
+int
+mrb_enc_find_index(mrb_state *mrb, const char *name)
+{
+ int i = mrb_enc_registered(name);
+ mrb_encoding *enc;
+
+ if (i < 0) {
+ i = load_encoding(name);
+ }
+ else if (!(enc = mrb_enc_from_index(mrb, i))) {
+ if (i != UNSPECIFIED_ENCODING) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "encoding %s is not registered", name);
+ }
+ }
+ else if (enc_autoload_p(enc)) {
+ if (enc_autoload(mrb, enc) < 0) {
+ //mrb_warn("failed to load encoding (%s); use ASCII-8BIT instead",
+ printf("failed to load encoding (%s); use ASCII-8BIT instead",
+ name);
+ return 0;
+ }
+ }
+ return i;
+}
+
+mrb_encoding *
+mrb_enc_find(mrb_state *mrb, const char *name)
+{
+ int idx = mrb_enc_find_index(mrb, name);
+ if (idx < 0) idx = 0;
+ return mrb_enc_from_index(mrb, idx);
+}
+
+static inline int
+enc_capable(mrb_value obj)
+{
+ if (SPECIAL_CONST_P(obj)) return (mrb_type(obj) == MRB_TT_SYMBOL);
+ switch (mrb_type(obj)/*BUILTIN_TYPE(obj)*/) {
+ case MRB_TT_STRING:
+ case MRB_TT_REGEX:
+ case MRB_TT_FILE:
+ return TRUE;
+ case MRB_TT_DATA:
+ if (is_data_encoding(obj)) return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+mrb_sym
+mrb_id_encoding(mrb_state *mrb)
+{
+ //CONST_ID(id_encoding, "encoding");
+ id_encoding = mrb_intern(mrb, "encoding");
+ return id_encoding;
+}
+
+int
+mrb_enc_get_index(mrb_state *mrb, mrb_value obj)
+{
+ int i = -1;
+ mrb_value tmp;
+ struct RString *ps;
+
+ if (SPECIAL_CONST_P(obj)) {
+ if (mrb_type(obj) != MRB_TT_SYMBOL) return -1;
+ //obj = mrb_id2str(SYM2ID(obj));
+ obj = mrb_str_new_cstr(mrb, mrb_sym2name(mrb, SYM2ID(obj)));
+ }
+ switch (mrb_type(obj)/*BUILTIN_TYPE(obj)*/) {
+ as_default:
+ default:
+ case MRB_TT_STRING:
+ case MRB_TT_REGEX:
+ i = (int)ENCODING_GET_INLINED(obj);
+ ps = mrb_str_ptr(obj);
+ if (i == ENCODING_INLINE_MAX) {
+ mrb_value iv;
+
+ //iv = rb_ivar_get(obj, mrb_id_encoding(mrb));
+ iv = mrb_iv_get(mrb, obj, mrb_id_encoding(mrb));
+ i = mrb_fixnum(iv);
+ }
+ break;
+
+ case MRB_TT_FILE:
+ tmp = mrb_funcall(mrb, obj, "internal_encoding", 0, 0);
+ if (mrb_nil_p(tmp)) obj = mrb_funcall(mrb, obj, "external_encoding", 0, 0);
+ else obj = tmp;
+ if (mrb_nil_p(obj)) break;
+ case MRB_TT_DATA:
+ if (is_data_encoding(obj)) {
+ i = enc_check_encoding(mrb, obj);
+ }
+ else {
+ goto as_default;
+ }
+ break;
+ }
+ return i;
+}
+
+void
+mrb_enc_set_index(mrb_state *mrb, mrb_value obj, int idx)
+{
+ if (idx < ENCODING_INLINE_MAX) {
+ ENCODING_SET_INLINED(obj, idx);
+ return;
+ }
+ ENCODING_SET_INLINED(obj, ENCODING_INLINE_MAX);
+ //mrb_ivar_set(obj, mrb_id_encoding(mrb), INT2NUM(idx));
+ mrb_iv_set(mrb, obj, mrb_id_encoding(mrb), mrb_fixnum_value(idx));
+ return;
+}
+
+mrb_value
+mrb_enc_associate_index(mrb_state *mrb, mrb_value obj, int idx)
+{
+/* enc_check_capable(obj);*/
+ if (mrb_enc_get_index(mrb, obj) == idx)
+ return obj;
+ if (SPECIAL_CONST_P(obj)) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "cannot set encoding");
+ }
+ if (!ENC_CODERANGE_ASCIIONLY(obj) ||
+ !mrb_enc_asciicompat(mrb, mrb_enc_from_index(mrb, idx))) {
+ ENC_CODERANGE_CLEAR(obj);
+ }
+ mrb_enc_set_index(mrb, obj, idx);
+ return obj;
+}
+
+mrb_value
+mrb_enc_associate(mrb_state *mrb, mrb_value obj, mrb_encoding *enc)
+{
+ return mrb_enc_associate_index(mrb, obj, mrb_enc_to_index(enc));
+}
+
+mrb_encoding*
+mrb_enc_get(mrb_state *mrb, mrb_value obj)
+{
+ return mrb_enc_from_index(mrb, mrb_enc_get_index(mrb, obj));
+}
+
+mrb_encoding*
+mrb_enc_check(mrb_state *mrb, mrb_value str1, mrb_value str2)
+{
+ mrb_encoding *enc = mrb_enc_compatible(mrb, str1, str2);
+ if (!enc)
+ mrb_raise(mrb, E_ENCODING_ERROR, "incompatible character encodings: %s and %s",
+ mrb_enc_name(mrb_enc_get(mrb, str1)),
+ mrb_enc_name(mrb_enc_get(mrb, str2)));
+ return enc;
+}
+
+mrb_encoding*
+mrb_enc_compatible(mrb_state *mrb, mrb_value str1, mrb_value str2)
+{
+ int idx1, idx2;
+ mrb_encoding *enc1, *enc2;
+
+ idx1 = mrb_enc_get_index(mrb, str1);
+ idx2 = mrb_enc_get_index(mrb, str2);
+
+ if (idx1 < 0 || idx2 < 0)
+ return 0;
+
+ if (idx1 == idx2) {
+ return mrb_enc_from_index(mrb, idx1);
+ }
+ enc1 = mrb_enc_from_index(mrb, idx1);
+ enc2 = mrb_enc_from_index(mrb, idx2);
+
+ if (mrb_type(str2) == MRB_TT_STRING && RSTRING_LEN(str2) == 0)
+ //return (idx1 == ENCINDEX_US_ASCII && mrb_enc_asciicompat(mrb, enc2)) ? enc2 : enc1;
+ return enc1;
+ if (mrb_type(str1) == MRB_TT_STRING && RSTRING_LEN(str1) == 0)
+ //return (idx2 == ENCINDEX_US_ASCII && mrb_enc_asciicompat(mrb, enc1)) ? enc1 : enc2;
+ return enc2;
+ if (!mrb_enc_asciicompat(mrb, enc1) || !mrb_enc_asciicompat(mrb, enc2)) {
+ return 0;
+ }
+
+ /* objects whose encoding is the same of contents */
+ //if (mrb_type(str2)/*BUILTIN_TYPE(str2)*/ != MRB_TT_STRING && idx2 == ENCINDEX_US_ASCII)
+ //return enc1;
+ //if (mrb_type(str1)/*BUILTIN_TYPE(str1)*/ != MRB_TT_STRING && idx1 == ENCINDEX_US_ASCII)
+ //return enc2;
+
+ if (mrb_type(str1)/*BUILTIN_TYPE(str1)*/ != MRB_TT_STRING) {
+ mrb_value tmp = str1;
+ int idx0 = idx1;
+ str1 = str2;
+ str2 = tmp;
+ idx1 = idx2;
+ idx2 = idx0;
+ }
+ if (mrb_type(str1)/*BUILTIN_TYPE(str1)*/ == MRB_TT_STRING) {
+ int cr1, cr2;
+
+ cr1 = mrb_enc_str_coderange(mrb, str1);
+ if (mrb_type(str2)/*BUILTIN_TYPE(str2)*/ == MRB_TT_STRING) {
+ cr2 = mrb_enc_str_coderange(mrb, str2);
+ if (cr1 != cr2) {
+ /* may need to handle ENC_CODERANGE_BROKEN */
+ if (cr1 == ENC_CODERANGE_7BIT) return enc2;
+ if (cr2 == ENC_CODERANGE_7BIT) return enc1;
+ }
+ if (cr2 == ENC_CODERANGE_7BIT) {
+ if (idx1 == ENCINDEX_ASCII) return enc2;
+ return enc1;
+ }
+ }
+ if (cr1 == ENC_CODERANGE_7BIT)
+ return enc2;
+ }
+ return 0;
+}
+
+void
+mrb_enc_copy(mrb_state *mrb, mrb_value obj1, mrb_value obj2)
+{
+ mrb_enc_associate_index(mrb, obj1, mrb_enc_get_index(mrb, obj2));
+}
+
+
+/*
+ * call-seq:
+ * obj.encoding -> encoding
+ *
+ * Returns the Encoding object that represents the encoding of obj.
+ */
+
+mrb_value
+mrb_obj_encoding(mrb_state *mrb, mrb_value obj)
+{
+ mrb_encoding *enc = mrb_enc_get(mrb, obj);
+ if (!enc) {
+ mrb_raise(mrb, E_TYPE_ERROR, "unknown encoding");
+ }
+ return mrb_enc_from_encoding(mrb, enc);
+}
+
+int
+mrb_enc_fast_mbclen(const char *p, const char *e, mrb_encoding *enc)
+{
+ return ONIGENC_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
+}
+
+int
+mrb_enc_mbclen(const char *p, const char *e, mrb_encoding *enc)
+{
+ int n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
+ if (MBCLEN_CHARFOUND_P(n) && MBCLEN_CHARFOUND_LEN(n) <= e-p)
+ return MBCLEN_CHARFOUND_LEN(n);
+ else {
+ int min = mrb_enc_mbminlen(enc);
+ return min <= e-p ? min : (int)(e-p);
+ }
+}
+
+int
+mrb_enc_precise_mbclen(const char *p, const char *e, mrb_encoding *enc)
+{
+ int n;
+ if (e <= p)
+ return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(1);
+ n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
+ if (e-p < n)
+ return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n-(int)(e-p));
+ return n;
+}
+
+int
+mrb_enc_ascget(mrb_state *mrb, const char *p, const char *e, int *len, mrb_encoding *enc)
+{
+ unsigned int c, l;
+ if (e <= p)
+ return -1;
+ if (mrb_enc_asciicompat(mrb, enc)) {
+ c = (unsigned char)*p;
+ if (!ISASCII(c))
+ return -1;
+ if (len) *len = 1;
+ return c;
+ }
+ l = mrb_enc_precise_mbclen(p, e, enc);
+ if (!MBCLEN_CHARFOUND_P(l))
+ return -1;
+ c = mrb_enc_mbc_to_codepoint(p, e, enc);
+ if (!mrb_enc_isascii(c, enc))
+ return -1;
+ if (len) *len = l;
+ return c;
+}
+
+unsigned int
+mrb_enc_codepoint_len(mrb_state *mrb, const char *p, const char *e, int *len_p, mrb_encoding *enc)
+{
+ int r;
+ if (e <= p)
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "empty string");
+ r = mrb_enc_precise_mbclen(p, e, enc);
+ if (MBCLEN_CHARFOUND_P(r)) {
+ if (len_p) *len_p = MBCLEN_CHARFOUND_LEN(r);
+ return mrb_enc_mbc_to_codepoint(p, e, enc);
+ }
+ else
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid byte sequence in %s", mrb_enc_name(enc));
+ return 0;
+}
+
+#undef mrb_enc_codepoint
+unsigned int
+mrb_enc_codepoint(mrb_state *mrb, const char *p, const char *e, mrb_encoding *enc)
+{
+ return mrb_enc_codepoint_len(mrb, p, e, 0, enc);
+}
+
+int
+mrb_enc_codelen(mrb_state *mrb, int c, mrb_encoding *enc)
+{
+ int n = ONIGENC_CODE_TO_MBCLEN(enc,c);
+ if (n == 0) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid codepoint 0x%x in %s", c, mrb_enc_name(enc));
+ }
+ return n;
+}
+
+int
+mrb_enc_toupper(int c, mrb_encoding *enc)
+{
+ return (ONIGENC_IS_ASCII_CODE(c)?ONIGENC_ASCII_CODE_TO_UPPER_CASE(c):(c));
+}
+
+int
+mrb_enc_tolower(int c, mrb_encoding *enc)
+{
+ return (ONIGENC_IS_ASCII_CODE(c)?ONIGENC_ASCII_CODE_TO_LOWER_CASE(c):(c));
+}
+
+/* 15.2.40.2.14 */
+/*
+ * call-seq:
+ * enc.inspect -> string
+ *
+ * Returns a string which represents the encoding for programmers.
+ *
+ * Encoding::UTF_8.inspect #=> "#<Encoding:UTF-8>"
+ * Encoding::ISO_2022_JP.inspect #=> "#<Encoding:ISO-2022-JP (dummy)>"
+ */
+static mrb_value
+enc_inspect(mrb_state *mrb, mrb_value self)
+{
+ mrb_value str;
+ //mrb_value str = mrb_sprintf("#<%s:%s%s>", mrb_obj_classname(mrb, self),
+ // mrb_enc_name((mrb_encoding*)(DATA_PTR(self))),
+ // (mrb_fixnum(enc_dummy_p(mrb, self)) ? " (dummy)" : ""));
+ char buf[256];
+ sprintf(buf, "#<%s:%s%s>", mrb_obj_classname(mrb, self),
+ mrb_enc_name((mrb_encoding*)(DATA_PTR(self))),
+ (mrb_enc_dummy_p((mrb_encoding*)(DATA_PTR(self))) ? " (dummy)" : ""));
+ str = mrb_str_new(mrb, buf, strlen(buf));
+ ENCODING_CODERANGE_SET(mrb, str, mrb_usascii_encindex(), ENC_CODERANGE_7BIT);
+ return str;
+}
+
+/* 15.2.40.2.15 */
+/* 15.2.40.2.18 */
+/*
+ * call-seq:
+ * enc.name -> string
+ *
+ * Returns the name of the encoding.
+ *
+ * Encoding::UTF_8.name #=> "UTF-8"
+ */
+static mrb_value
+enc_name(mrb_state *mrb, mrb_value self)
+{
+ return mrb_usascii_str_new2(mrb, mrb_enc_name((mrb_encoding*)DATA_PTR(self)));
+}
+
+static int
+enc_names_i(mrb_state *mrb, st_data_t name, st_data_t idx, st_data_t args)
+{
+ mrb_value *arg = (mrb_value *)args;
+ int iargs = mrb_fixnum(arg[0]);
+ //if ((int)idx == (int)arg[0]) {
+ if ((int)idx == iargs) {
+ mrb_value str = mrb_usascii_str_new2(mrb, (char *)name);
+ //OBJ_FREEZE(str);
+ mrb_ary_push(mrb, arg[1], str);
+ }
+ return ST_CONTINUE;
+}
+
+/* 15.2.40.2.16 */
+/*
+ * call-seq:
+ * enc.names -> array
+ *
+ * Returns the list of name and aliases of the encoding.
+ *
+ * Encoding::WINDOWS_31J.names #=> ["Windows-31J", "CP932", "csWindows31J"]
+ */
+static mrb_value
+enc_names(mrb_state *mrb, mrb_value self)
+{
+ mrb_value args[2];
+
+ args[0] = mrb_fixnum_value(mrb_to_encoding_index(mrb, self));
+ args[1] = mrb_ary_new_capa(mrb, 0);//mrb_ary_new2(0);
+ st_foreachNew(mrb, enc_table.names, enc_names_i, args);
+ return args[1];
+}
+
+/* 15.2.40.2.8 */
+/*
+ * call-seq:
+ * Encoding.list -> [enc1, enc2, ...]
+ *
+ * Returns the list of loaded encodings.
+ *
+ * Encoding.list
+ * #=> [#<Encoding:ASCII-8BIT>, #<Encoding:UTF-8>,
+ * #<Encoding:ISO-2022-JP (dummy)>]
+ *
+ * Encoding.find("US-ASCII")
+ * #=> #<Encoding:US-ASCII>
+ *
+ * Encoding.list
+ * #=> [#<Encoding:ASCII-8BIT>, #<Encoding:UTF-8>,
+ * #<Encoding:US-ASCII>, #<Encoding:ISO-2022-JP (dummy)>]
+ *
+ */
+static mrb_value
+enc_list(mrb_state *mrb, mrb_value klass)
+{
+ struct RArray *ar = (struct RArray *)mrb_encoding_list.value.p;
+ mrb_value ary = mrb_ary_new_capa(mrb, 0);//mrb_ary_new2(0);
+ //mrb_ary_replace_m(mrb, ary/*, mmrb_encoding_list*/);
+ mrb_ary_replace(mrb, mrb_ary_ptr(ary), ar->buf, enc_table.count);
+ return ary;
+}
+
+/* 15.2.40.2.7 */
+/*
+ * call-seq:
+ * Encoding.find(string) -> enc
+ * Encoding.find(symbol) -> enc
+ *
+ * Search the encoding with specified <i>name</i>.
+ * <i>name</i> should be a string or symbol.
+ *
+ * Encoding.find("US-ASCII") #=> #<Encoding:US-ASCII>
+ * Encoding.find(:Shift_JIS) #=> #<Encoding:Shift_JIS>
+ *
+ * Names which this method accept are encoding names and aliases
+ * including following special aliases
+ *
+ * "external":: default external encoding
+ * "internal":: default internal encoding
+ * "locale":: locale encoding
+ * "filesystem":: filesystem encoding
+ *
+ * An ArgumentError is raised when no encoding with <i>name</i>.
+ * Only <code>Encoding.find("internal")</code> however returns nil
+ * when no encoding named "internal", in other words, when Ruby has no
+ * default internal encoding.
+ */
+static mrb_value
+enc_find(mrb_state *mrb, mrb_value klass)
+{
+ mrb_value enc;
+ mrb_get_args(mrb, "o", &enc);
+
+ return mrb_enc_from_encoding(mrb, to_encoding(mrb, enc));
+}
+
+/* 15.2.40.2.2 */
+/*
+ * call-seq:
+ * Encoding.compatible?(str1, str2) -> enc or nil
+ *
+ * Checks the compatibility of two strings.
+ * If they are compatible, means concatenatable,
+ * returns an encoding which the concatenated string will be.
+ * If they are not compatible, nil is returned.
+ *
+ * Encoding.compatible?("\xa1".force_encoding("iso-8859-1"), "b")
+ * #=> #<Encoding:ISO-8859-1>
+ *
+ * Encoding.compatible?(
+ * "\xa1".force_encoding("iso-8859-1"),
+ * "\xa1\xa1".force_encoding("euc-jp"))
+ * #=> nil
+ *
+ */
+static mrb_value
+enc_compatible_p(mrb_state *mrb, mrb_value klass)
+{
+ mrb_value str1;
+ mrb_value str2;
+ mrb_encoding *enc;
+ mrb_get_args(mrb, "oo", &str1, &str2);
+ if (!enc_capable(str1)) return mrb_nil_value();
+ if (!enc_capable(str2)) return mrb_nil_value();
+ enc = mrb_enc_compatible(mrb, str1, str2);
+ if (!enc) return mrb_nil_value();
+ return mrb_enc_from_encoding(mrb, enc);
+}
+
+/* 15.2.40.2.19 */
+/* :nodoc: */
+static mrb_value
+enc_dump(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value self)
+{
+ //mrb_scan_args(argc, argv, "01", 0);
+ return enc_name(mrb, self);
+}
+
+/* 15.2.40.2.11 */
+/* :nodoc: */
+static mrb_value
+enc_load(mrb_state *mrb, mrb_value klass)
+{
+ mrb_value str;
+ mrb_get_args(mrb, "o", &str);
+ return enc_find(mrb, str);
+}
+
+mrb_encoding *
+mrb_ascii8bit_encoding(mrb_state *mrb)
+{
+ if (!enc_table.list) {
+ mrb_enc_init(mrb);
+ }
+ return enc_table.list[ENCINDEX_ASCII].enc;
+}
+
+int
+mrb_ascii8bit_encindex(void)
+{
+ return ENCINDEX_ASCII;
+}
+
+mrb_encoding *
+mrb_utf8_encoding(mrb_state *mrb)
+{
+ if (!enc_table.list) {
+ mrb_enc_init(mrb);
+ }
+ return enc_table.list[ENCINDEX_UTF_8].enc;
+}
+
+int
+mrb_utf8_encindex(void)
+{
+ return ENCINDEX_UTF_8;
+}
+
+mrb_encoding *
+mrb_usascii_encoding(mrb_state *mrb)
+{
+ if (!enc_table.list) {
+ mrb_enc_init(mrb);
+ }
+ return enc_table.list[ENCINDEX_US_ASCII].enc;
+}
+
+int
+mrb_usascii_encindex(void)
+{
+ return ENCINDEX_US_ASCII;
+}
+
+int
+mrb_locale_encindex(mrb_state *mrb)
+{
+ mrb_value charmap = mrb_locale_charmap(mrb, mrb_obj_value(mrb->encode_class));
+ int idx;
+
+ if (mrb_nil_p(charmap))
+ idx = mrb_usascii_encindex();
+ //else if ((idx = mrb_enc_find_index(StringValueCStr(charmap))) < 0)
+ else if ((idx = mrb_enc_find_index(mrb, mrb_string_value_cstr(mrb, &charmap))) < 0)
+ idx = mrb_ascii8bit_encindex();
+
+ if (mrb_enc_registered("locale") < 0) enc_alias_internal("locale", idx);
+
+ return idx;
+}
+
+mrb_encoding *
+mrb_locale_encoding(mrb_state *mrb)
+{
+ return mrb_enc_from_index(mrb, mrb_locale_encindex(mrb));
+}
+
+static int
+enc_set_filesystem_encoding(mrb_state *mrb)
+{
+ int idx;
+#if defined NO_LOCALE_CHARMAP
+ idx = mrb_enc_to_index(mrb_default_external_encoding(mrb));
+#elif defined _WIN32 || defined __CYGWIN__
+ char cp[sizeof(int) * 8 / 3 + 4];
+ //snprintf(cp, sizeof cp, "CP%d", AreFileApisANSI() ? GetACP() : GetOEMCP());
+ idx = mrb_enc_find_index(mrb, cp);
+ if (idx < 0) idx = mrb_ascii8bit_encindex();
+#else
+ idx = mrb_enc_to_index(mrb_default_external_encoding(mrb));
+#endif
+
+ enc_alias_internal("filesystem", idx);
+ return idx;
+}
+
+int
+mrb_filesystem_encindex(void)
+{
+ int idx = mrb_enc_registered("filesystem");
+ if (idx < 0)
+ idx = mrb_ascii8bit_encindex();
+ return idx;
+}
+
+mrb_encoding *
+mrb_filesystem_encoding(mrb_state *mrb)
+{
+ return mrb_enc_from_index(mrb, mrb_filesystem_encindex());
+}
+
+struct default_encoding {
+ int index; /* -2 => not yet set, -1 => nil */
+ mrb_encoding *enc;
+};
+
+static struct default_encoding default_external = {0};
+
+static int
+enc_set_default_encoding(mrb_state *mrb, struct default_encoding *def, mrb_value encoding, const char *name)
+{
+ int overridden = FALSE;
+
+ if (def->index != -2)
+ /* Already set */
+ overridden = TRUE;
+
+ if (mrb_nil_p(encoding)) {
+ def->index = -1;
+ def->enc = 0;
+ st_insert(enc_table.names, (st_data_t)strdup(name),
+ (st_data_t)UNSPECIFIED_ENCODING);
+ }
+ else {
+ def->index = mrb_enc_to_index(mrb_to_encoding(mrb, encoding));
+ def->enc = 0;
+ enc_alias_internal(name, def->index);
+ }
+
+ if (def == &default_external)
+ enc_set_filesystem_encoding(mrb);
+
+ return overridden;
+}
+
+mrb_encoding *
+mrb_default_external_encoding(mrb_state *mrb)
+{
+ if (default_external.enc) return default_external.enc;
+
+ if (default_external.index >= 0) {
+ default_external.enc = mrb_enc_from_index(mrb, default_external.index);
+ return default_external.enc;
+ }
+ else {
+ return mrb_locale_encoding(mrb);
+ }
+}
+
+mrb_value
+mrb_enc_default_external(mrb_state *mrb)
+{
+ return mrb_enc_from_encoding(mrb, mrb_default_external_encoding(mrb));
+}
+
+/* 15.2.40.2.3 */
+/*
+ * call-seq:
+ * Encoding.default_external -> enc
+ *
+ * Returns default external encoding.
+ *
+ * It is initialized by the locale or -E option.
+ */
+static mrb_value
+get_default_external(mrb_state *mrb, mrb_value klass)
+{
+ return mrb_enc_default_external(mrb);
+}
+
+void
+mrb_enc_set_default_external(mrb_state *mrb, mrb_value encoding)
+{
+ if (mrb_nil_p(encoding)) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "default external can not be nil");
+ }
+ enc_set_default_encoding(mrb, &default_external, encoding,
+ "external");
+}
+
+/* 15.2.40.2.4 */
+/*
+ * call-seq:
+ * Encoding.default_external = enc
+ *
+ * Sets default external encoding.
+ */
+static mrb_value
+set_default_external(mrb_state *mrb, mrb_value klass)
+{
+ mrb_value encoding;
+ mrb_get_args(mrb, "o", &encoding);
+ mrb_warning("setting Encoding.default_external");
+ mrb_enc_set_default_external(mrb, encoding);
+ return encoding;
+}
+
+static struct default_encoding default_internal = {-2};
+
+mrb_encoding *
+mrb_default_internal_encoding(mrb_state *mrb)
+{
+ if (!default_internal.enc && default_internal.index >= 0) {
+ default_internal.enc = mrb_enc_from_index(mrb, default_internal.index);
+ }
+ return default_internal.enc; /* can be NULL */
+}
+
+mrb_value
+mrb_enc_default_internal(mrb_state *mrb)
+{
+ /* Note: These functions cope with default_internal not being set */
+ return mrb_enc_from_encoding(mrb, mrb_default_internal_encoding(mrb));
+}
+
+/* 15.2.40.2.5 */
+/*
+ * call-seq:
+ * Encoding.default_internal -> enc
+ *
+ * Returns default internal encoding.
+ *
+ * It is initialized by the source internal_encoding or -E option.
+ */
+static mrb_value
+get_default_internal(mrb_state *mrb, mrb_value klass)
+{
+ return mrb_enc_default_internal(mrb);
+}
+
+void
+mrb_enc_set_default_internal(mrb_state *mrb, mrb_value encoding)
+{
+ enc_set_default_encoding(mrb, &default_internal, encoding,
+ "internal");
+}
+
+/* 15.2.40.2.6 */
+/*
+ * call-seq:
+ * Encoding.default_internal = enc or nil
+ *
+ * Sets default internal encoding.
+ * Or removes default internal encoding when passed nil.
+ */
+static mrb_value
+set_default_internal(mrb_state *mrb, mrb_value klass)
+{
+ mrb_value encoding;
+ mrb_get_args(mrb, "o", &encoding);
+ mrb_warning("setting Encoding.default_internal");
+ mrb_enc_set_default_internal(mrb, encoding);
+ return encoding;
+}
+
+#define digit(x) ((x) >= '0' && (x) <= '9')
+#define strstart(s, n) (strncasecmp(s, n, strlen(n)) == 0)
+#define C_CODESET "US-ASCII" /* Return this as the encoding of the
+ * C/POSIX locale. Could as well one day
+ * become "UTF-8". */
+#if defined _WIN32 || defined __CYGWIN__
+#define JA_CODESET "Windows-31J"
+#else
+#define JA_CODESET "EUC-JP"
+#endif
+
+static char buf[16];
+
+const char *
+nl_langinfo_codeset(void)
+{
+ const char *l, *p;
+ int n;
+
+ if (((l = getenv("LC_ALL")) && *l) ||
+ ((l = getenv("LC_CTYPE")) && *l) ||
+ ((l = getenv("LANG")) && *l)) {
+ /* check standardized locales */
+ if (!strcmp(l, "C") || !strcmp(l, "POSIX"))
+ return C_CODESET;
+ /* check for encoding name fragment */
+ p = strchr(l, '.');
+ if (!p++) p = l;
+ if (strstart(p, "UTF"))
+ return "UTF-8";
+ if ((n = 5, strstart(p, "8859-")) || (n = 9, strstart(p, "ISO-8859-"))) {
+ if (digit(p[n])) {
+ p += n;
+ memcpy(buf, "ISO-8859-\0\0", 12);
+ buf[9] = *p++;
+ if (digit(*p)) buf[10] = *p++;
+ return buf;
+ }
+ }
+ if (strstart(p, "KOI8-R")) return "KOI8-R";
+ if (strstart(p, "KOI8-U")) return "KOI8-U";
+ if (strstart(p, "620")) return "TIS-620";
+ if (strstart(p, "2312")) return "GB2312";
+ if (strstart(p, "HKSCS")) return "Big5HKSCS"; /* no MIME charset */
+ if (strstart(p, "BIG5")) return "Big5";
+ if (strstart(p, "GBK")) return "GBK"; /* no MIME charset */
+ if (strstart(p, "18030")) return "GB18030"; /* no MIME charset */
+ if (strstart(p, "Shift_JIS") || strstart(p, "SJIS")) return "Windows-31J";
+ /* check for conclusive modifier */
+ if (strstart(p, "euro")) return "ISO-8859-15";
+ /* check for language (and perhaps country) codes */
+ if (strstart(l, "zh_TW")) return "Big5";
+ if (strstart(l, "zh_HK")) return "Big5HKSCS"; /* no MIME charset */
+ if (strstart(l, "zh")) return "GB2312";
+ if (strstart(l, "ja")) return JA_CODESET;
+ if (strstart(l, "ko")) return "EUC-KR";
+ if (strstart(l, "ru")) return "KOI8-R";
+ if (strstart(l, "uk")) return "KOI8-U";
+ if (strstart(l, "pl") || strstart(l, "hr") ||
+ strstart(l, "hu") || strstart(l, "cs") ||
+ strstart(l, "sk") || strstart(l, "sl")) return "ISO-8859-2";
+ if (strstart(l, "eo") || strstart(l, "mt")) return "ISO-8859-3";
+ if (strstart(l, "el")) return "ISO-8859-7";
+ if (strstart(l, "he")) return "ISO-8859-8";
+ if (strstart(l, "tr")) return "ISO-8859-9";
+ if (strstart(l, "th")) return "TIS-620"; /* or ISO-8859-11 */
+ if (strstart(l, "lt")) return "ISO-8859-13";
+ if (strstart(l, "cy")) return "ISO-8859-14";
+ if (strstart(l, "ro")) return "ISO-8859-2"; /* or ISO-8859-16 */
+ if (strstart(l, "am") || strstart(l, "vi")) return "UTF-8";
+ /* Send me further rules if you like, but don't forget that we are
+ * *only* interested in locale naming conventions on platforms
+ * that do not already provide an nl_langinfo(CODESET) implementation. */
+ }
+ return NULL;
+}
+
+/* 15.2.40.2.9 */
+/*
+ * call-seq:
+ * Encoding.locale_charmap -> string
+ *
+ * Returns the locale charmap name.
+ *
+ * Debian GNU/Linux
+ * LANG=C
+ * Encoding.locale_charmap #=> "ANSI_X3.4-1968"
+ * LANG=ja_JP.EUC-JP
+ * Encoding.locale_charmap #=> "EUC-JP"
+ *
+ * SunOS 5
+ * LANG=C
+ * Encoding.locale_charmap #=> "646"
+ * LANG=ja
+ * Encoding.locale_charmap #=> "eucJP"
+ *
+ * The result is highly platform dependent.
+ * So Encoding.find(Encoding.locale_charmap) may cause an error.
+ * If you need some encoding object even for unknown locale,
+ * Encoding.find("locale") can be used.
+ *
+ */
+mrb_value
+mrb_locale_charmap(mrb_state *mrb, mrb_value klass)
+{
+#if defined NO_LOCALE_CHARMAP
+ return mrb_usascii_str_new2(mrb, "ASCII-8BIT");
+#elif defined _WIN32 || defined __CYGWIN__
+ const char *nl_langinfo_codeset(void);
+ const char *codeset = nl_langinfo_codeset();
+ char cp[sizeof(int) * 3 + 4];
+ if (!codeset) {
+ //snprintf(cp, sizeof(cp), "CP%d", GetConsoleCP());
+ codeset = cp;
+ }
+ return mrb_usascii_str_new2(mrb, codeset);
+#elif defined HAVE_LANGINFO_H
+ char *codeset;
+ codeset = nl_langinfo(CODESET);
+ return mrb_usascii_str_new2(mrb, codeset);
+#else
+ return mrb_nil_value();
+#endif
+}
+static void
+set_encoding_const(mrb_state *mrb, const char *name, mrb_encoding *enc)
+{
+ mrb_value encoding = mrb_enc_from_encoding(mrb, enc);
+ char *s = (char *)name;
+ int haslower = 0, hasupper = 0, valid = 0;
+
+ if (ISDIGIT(*s)) return;
+ if (ISUPPER(*s)) {
+ hasupper = 1;
+ while (*++s && (ISALNUM(*s) || *s == '_')) {
+ if (ISLOWER(*s)) haslower = 1;
+ }
+ }
+ if (!*s) {
+ if (s - name > ENCODING_NAMELEN_MAX) return;
+ valid = 1;
+ //mrb_define_const(mrb_cEncoding, name, encoding);
+ mrb_define_const(mrb, mrb->encode_class, name, encoding);
+ }
+ if (!valid || haslower) {
+ size_t len = s - name;
+ if (len > ENCODING_NAMELEN_MAX) return;
+ if (!haslower || !hasupper) {
+ do {
+ if (ISLOWER(*s)) haslower = 1;
+ if (ISUPPER(*s)) hasupper = 1;
+ } while (*++s && (!haslower || !hasupper));
+ len = s - name;
+ }
+ len += strlen(s);
+ if (len++ > ENCODING_NAMELEN_MAX) return;
+ //MEMCPY(s = ALLOCA_N(char, len), name, char, len);
+ memcpy(s = mrb_malloc(mrb, len), name, len);
+ name = s;
+ if (!valid) {
+ if (ISLOWER(*s)) *s = ONIGENC_ASCII_CODE_TO_UPPER_CASE((int)*s);
+ for (; *s; ++s) {
+ if (!ISALNUM(*s)) *s = '_';
+ }
+ if (hasupper) {
+ mrb_define_const(mrb, mrb->encode_class, name, encoding);
+ }
+ }
+ if (haslower) {
+ for (s = (char *)name; *s; ++s) {
+ if (ISLOWER(*s)) *s = ONIGENC_ASCII_CODE_TO_UPPER_CASE((int)*s);
+ }
+ mrb_define_const(mrb, mrb->encode_class, name, encoding);
+ }
+ }
+}
+static int
+mrb_enc_name_list_i(mrb_state *mrb, st_data_t name, st_data_t idx, mrb_value *arg)
+{
+ mrb_value ary = *arg;
+ mrb_value str = mrb_usascii_str_new2(mrb, (char *)name);
+ //OBJ_FREEZE(str);
+ mrb_ary_push(mrb, ary, str);
+ return ST_CONTINUE;
+}
+
+/* 15.2.40.2.10 */
+/*
+ * call-seq:
+ * Encoding.name_list -> ["enc1", "enc2", ...]
+ *
+ * Returns the list of available encoding names.
+ *
+ * Encoding.name_list
+ * #=> ["US-ASCII", "ASCII-8BIT", "UTF-8",
+ * "ISO-8859-1", "Shift_JIS", "EUC-JP",
+ * "Windows-31J",
+ * "BINARY", "CP932", "eucJP"]
+ *
+ */
+
+static mrb_value
+mrb_enc_name_list(mrb_state *mrb, mrb_value klass)
+{
+ mrb_value ary = mrb_ary_new_capa(mrb, enc_table.names->num_entries);//mrb_ary_new2(enc_table.names->num_entries);
+ st_foreachNew(mrb, enc_table.names, mrb_enc_name_list_i, &ary);
+ return ary;
+}
+
+static int
+mrb_enc_aliases_enc_i(mrb_state *mrb, st_data_t name, st_data_t orig, st_data_t arg)
+{
+ mrb_value *p = (mrb_value *)arg;
+ mrb_value aliases = p[0], ary = p[1];
+ int idx = (int)orig;
+ mrb_value key, str = mrb_ary_ref(mrb, ary, idx);//mrb_ary_entry(ary, idx);
+
+ if (mrb_nil_p(str)) {
+ mrb_encoding *enc = mrb_enc_from_index(mrb, idx);
+
+ if (!enc) return ST_CONTINUE;
+ if (STRCASECMP((char*)name, mrb_enc_name(enc)) == 0) {
+ return ST_CONTINUE;
+ }
+ str = mrb_usascii_str_new2(mrb, mrb_enc_name(enc));
+ OBJ_FREEZE(str);
+ mrb_ary_set(mrb, ary, idx, str);//rb_ary_store(ary, idx, str);
+ }
+ key = mrb_usascii_str_new2(mrb, (char *)name);
+ OBJ_FREEZE(key);
+ mrb_hash_set(mrb, aliases, key, str);
+ return ST_CONTINUE;
+}
+
+/* 15.2.40.2.1 */
+/*
+ * call-seq:
+ * Encoding.aliases -> {"alias1" => "orig1", "alias2" => "orig2", ...}
+ *
+ * Returns the hash of available encoding alias and original encoding name.
+ *
+ * Encoding.aliases
+ * #=> {"BINARY"=>"ASCII-8BIT", "ASCII"=>"US-ASCII", "ANSI_X3.4-1986"=>"US-ASCII",
+ * "SJIS"=>"Shift_JIS", "eucJP"=>"EUC-JP", "CP932"=>"Windows-31J"}
+ *
+ */
+
+static mrb_value
+mrb_enc_aliases(mrb_state *mrb, mrb_value klass)
+{
+ mrb_value aliases[2];
+ aliases[0] = mrb_hash_new_capa(mrb, 0);
+ aliases[1] = mrb_ary_new(mrb);
+ st_foreachNew(mrb, enc_table.names, mrb_enc_aliases_enc_i, aliases);
+ return aliases[0];
+}
+
+void
+mrb_init_encoding(mrb_state *mrb)
+{
+#undef mrb_intern
+#define mrb_intern(str) mrb_intern_const(str)
+ mrb_value list;
+ int i;
+ struct RClass *s;
+
+ s = mrb->encode_class = mrb_define_class(mrb, "Encoding", mrb->object_class);
+ //mrb_undef_alloc_func(mrb_cEncoding);
+ //mrb_undef_method(CLASS_OF(mrb_cEncoding), "new");
+ mrb_define_class_method(mrb, s, "aliases", mrb_enc_aliases, ARGS_NONE()); /* 15.2.40.2.1 */
+ mrb_define_class_method(mrb, s, "compatible?", enc_compatible_p, ARGS_REQ(2)); /* 15.2.40.2.2 */
+ mrb_define_class_method(mrb, s, "default_external", get_default_external, ARGS_NONE()); /* 15.2.40.2.3 */
+ mrb_define_class_method(mrb, s, "default_external=", set_default_external, ARGS_REQ(1)); /* 15.2.40.2.4 */
+ mrb_define_class_method(mrb, s, "default_internal", get_default_internal, ARGS_NONE()); /* 15.2.40.2.5 */
+ mrb_define_class_method(mrb, s, "default_internal=", set_default_internal, ARGS_REQ(1)); /* 15.2.40.2.6 */
+ mrb_define_class_method(mrb, s, "find", enc_find, ARGS_REQ(1)); /* 15.2.40.2.7 */
+ mrb_define_class_method(mrb, s, "list", enc_list, ARGS_NONE()); /* 15.2.40.2.8 */
+ mrb_define_class_method(mrb, s, "locale_charmap", mrb_locale_charmap, ARGS_NONE()); /* 15.2.40.2.9 */
+ mrb_define_class_method(mrb, s, "name_list", mrb_enc_name_list, ARGS_NONE()); /* 15.2.40.2.10 */
+ mrb_define_class_method(mrb, s, "_load", enc_load, ARGS_REQ(1)); /* 15.2.40.2.11 */
+ mrb_define_method(mrb, s, "ascii_compatible?", enc_ascii_compatible_p, ARGS_NONE()); /* 15.2.40.2.12 */
+ mrb_define_method(mrb, s, "dummy?", enc_dummy_p, ARGS_NONE()); /* 15.2.40.2.13 */
+ mrb_define_method(mrb, s, "inspect", enc_inspect, ARGS_NONE()); /* 15.2.40.2.14 */
+ mrb_define_method(mrb, s, "name", enc_name, ARGS_NONE()); /* 15.2.40.2.15 */
+ mrb_define_method(mrb, s, "names", enc_names, ARGS_NONE()); /* 15.2.40.2.16 */
+ mrb_define_method(mrb, s, "replicate", enc_replicate, ARGS_REQ(1)); /* 15.2.40.2.17 */
+ mrb_define_method(mrb, s, "to_s", enc_name, ARGS_NONE()); /* 15.2.40.2.18 */
+ mrb_define_method(mrb, s, "_dump", enc_dump, ARGS_ANY()); /* 15.2.40.2.19 */
+
+/* add kusuda --> */
+ if (!enc_table.list) {
+ mrb_enc_init(mrb);
+ }
+/* add kusuda --< */
+ list = mrb_ary_new_capa(mrb, enc_table.count);//mrb_ary_new2(enc_table.count);
+ RBASIC(list)->c = 0;
+ mrb_encoding_list = list;
+ //mrb_gc_register_mark_object(list);
+
+ for (i = 0; i < enc_table.count; ++i) {
+ mrb_ary_push(mrb, list, enc_new(mrb, enc_table.list[i].enc));
+ }
+}
+
+/* locale insensitive functions */
+
+#define ctype_test(c, ctype) \
+ (mrb_isascii(c) && ONIGENC_IS_ASCII_CODE_CTYPE((c), ctype))
+
+int mrb_isalnum(int c) { return ctype_test(c, ONIGENC_CTYPE_ALNUM); }
+int mrb_isalpha(int c) { return ctype_test(c, ONIGENC_CTYPE_ALPHA); }
+int mrb_isblank(int c) { return ctype_test(c, ONIGENC_CTYPE_BLANK); }
+int mrb_iscntrl(int c) { return ctype_test(c, ONIGENC_CTYPE_CNTRL); }
+int mrb_isdigit(int c) { return ctype_test(c, ONIGENC_CTYPE_DIGIT); }
+int mrb_isgraph(int c) { return ctype_test(c, ONIGENC_CTYPE_GRAPH); }
+int mrb_islower(int c) { return ctype_test(c, ONIGENC_CTYPE_LOWER); }
+int mrb_isprint(int c) { return ctype_test(c, ONIGENC_CTYPE_PRINT); }
+int mrb_ispunct(int c) { return ctype_test(c, ONIGENC_CTYPE_PUNCT); }
+int mrb_isspace(int c) { return ctype_test(c, ONIGENC_CTYPE_SPACE); }
+int mrb_isupper(int c) { return ctype_test(c, ONIGENC_CTYPE_UPPER); }
+int mrb_isxdigit(int c) { return ctype_test(c, ONIGENC_CTYPE_XDIGIT); }
+
+int
+mrb_tolower(int c)
+{
+ return mrb_isascii(c) ? ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) : c;
+}
+
+int
+mrb_toupper(int c)
+{
+ return mrb_isascii(c) ? ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) : c;
+}
+#endif //INCLUDE_ENCODING
diff --git a/src/encoding.h b/src/encoding.h
new file mode 100644
index 000000000..784d67f44
--- /dev/null
+++ b/src/encoding.h
@@ -0,0 +1,360 @@
+/**********************************************************************
+
+ encoding.h -
+
+ $Author: matz $
+ created at: Thu May 24 11:49:41 JST 2007
+
+ Copyright (C) 2007 Yukihiro Matsumoto
+
+**********************************************************************/
+
+#ifndef RUBY_ENCODING_H
+#define RUBY_ENCODING_H 1
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include <stdarg.h>
+#include "oniguruma.h"
+#include "mdata.h"
+
+int mrb_tolower(int c);
+int mrb_toupper(int c);
+#define TOUPPER(c) mrb_toupper((unsigned char)(c))
+#define TOLOWER(c) mrb_tolower((unsigned char)(c))
+
+#define FL_USHIFT 12
+
+#define FL_USER0 (((int)1)<<(FL_USHIFT+0))
+#define FL_USER1 (((int)1)<<(FL_USHIFT+1))
+#define FL_USER2 (((int)1)<<(FL_USHIFT+2))
+#define FL_USER3 (((int)1)<<(FL_USHIFT+3))
+#define FL_USER4 (((int)1)<<(FL_USHIFT+4))
+#define FL_USER5 (((int)1)<<(FL_USHIFT+5))
+#define FL_USER6 (((int)1)<<(FL_USHIFT+6))
+#define FL_USER7 (((int)1)<<(FL_USHIFT+7))
+#define FL_USER8 (((int)1)<<(FL_USHIFT+8))
+#define FL_USER9 (((int)1)<<(FL_USHIFT+9))
+
+#define ENCODING_INLINE_MAX 1023
+/* 1023 = 0x03FF */
+/*#define ENCODING_SHIFT (FL_USHIFT+10)*/
+#define ENCODING_SHIFT (10)
+#define ENCODING_MASK (((unsigned int)ENCODING_INLINE_MAX)<<ENCODING_SHIFT)
+
+#define ENCODING_SET_INLINED(obj,i) do {\
+ RBASIC(obj)->flags &= ~ENCODING_MASK;\
+ RBASIC(obj)->flags |= (unsigned int)(i) << ENCODING_SHIFT;\
+} while (0)
+#define ENCODING_SET(mrb, obj,i) do {\
+ mrb_value mrb_encoding_set_obj = (obj); \
+ int encoding_set_enc_index = (i); \
+ if (encoding_set_enc_index < ENCODING_INLINE_MAX) \
+ ENCODING_SET_INLINED(mrb_encoding_set_obj, encoding_set_enc_index); \
+ else \
+ mrb_enc_set_index(mrb, mrb_encoding_set_obj, encoding_set_enc_index); \
+} while (0)
+
+#define ENCODING_GET_INLINED(obj) (unsigned int)((RSTRING(obj)->flags & ENCODING_MASK)>>ENCODING_SHIFT)
+#define ENCODING_GET(mrb, obj) \
+ (ENCODING_GET_INLINED(obj) != ENCODING_INLINE_MAX ? \
+ ENCODING_GET_INLINED(obj) : \
+ mrb_enc_get_index(mrb, obj))
+
+#define ENCODING_IS_ASCII8BIT(obj) (ENCODING_GET_INLINED(obj) == 0)
+
+#define ENCODING_MAXNAMELEN 42
+
+#define ENC_CODERANGE_MASK ((int)(FL_USER8|FL_USER9))
+#define ENC_CODERANGE_UNKNOWN 0
+#define ENC_CODERANGE_7BIT ((int)FL_USER8)
+#define ENC_CODERANGE_VALID ((int)FL_USER9)
+#define ENC_CODERANGE_BROKEN ((int)(FL_USER8|FL_USER9))
+#define ENC_CODERANGE(obj) ((int)(RSTRING(obj)->flags & ENC_CODERANGE_MASK))
+#define ENC_CODERANGE_ASCIIONLY(obj) (ENC_CODERANGE(obj) == ENC_CODERANGE_7BIT)
+#ifdef INCLUDE_ENCODING
+#define ENC_CODERANGE_SET(obj,cr) (RSTRING(obj)->flags = \
+ (RSTRING(obj)->flags & ~ENC_CODERANGE_MASK) | (cr))
+#else
+#define ENC_CODERANGE_SET(obj,cr)
+#endif //INCLUDE_ENCODING
+#define ENC_CODERANGE_CLEAR(obj) ENC_CODERANGE_SET(obj,0)
+
+/* assumed ASCII compatibility */
+#define ENC_CODERANGE_AND(a, b) \
+ (a == ENC_CODERANGE_7BIT ? b : \
+ a == ENC_CODERANGE_VALID ? (b == ENC_CODERANGE_7BIT ? ENC_CODERANGE_VALID : b) : \
+ ENC_CODERANGE_UNKNOWN)
+
+#define ENCODING_CODERANGE_SET(mrb, obj, encindex, cr) \
+ do { \
+ mrb_value mrb_encoding_coderange_obj = (obj); \
+ ENCODING_SET(mrb, mrb_encoding_coderange_obj, (encindex)); \
+ ENC_CODERANGE_SET(mrb_encoding_coderange_obj, (cr)); \
+ } while (0)
+
+typedef OnigEncodingType mrb_encoding;
+
+mrb_encoding* mrb_enc_get(mrb_state *mrb, mrb_value obj);
+/* mrb_encoding * -> name */
+#define mrb_enc_name(enc) (enc)->name
+int mrb_enc_get_index(mrb_state *mrb, mrb_value obj);
+
+int mrb_enc_replicate(mrb_state *, const char *, mrb_encoding *);
+int mrb_define_dummy_encoding(mrb_state *mrb, const char *);
+#define mrb_enc_to_index(enc) ((enc) ? ENC_TO_ENCINDEX(enc) : 0)
+void mrb_enc_set_index(mrb_state *mrb, mrb_value obj, int encindex);
+int mrb_enc_find_index(mrb_state *mrb, const char *name);
+int mrb_to_encoding_index(mrb_state *mrb, mrb_value);
+mrb_encoding* mrb_to_encoding(mrb_state *mrb, mrb_value);
+mrb_encoding* mrb_enc_get(mrb_state *, mrb_value);
+mrb_encoding* mrb_enc_compatible(mrb_state *, mrb_value, mrb_value);
+mrb_encoding* mrb_enc_check(mrb_state *, mrb_value, mrb_value);
+mrb_value mrb_enc_associate_index(mrb_state *mrb, mrb_value, int);
+#ifdef INCLUDE_ENCODING
+mrb_value mrb_enc_associate(mrb_state *mrb, mrb_value, mrb_encoding*);
+#else
+#define mrb_enc_associate(mrb,value,enc)
+#endif //INCLUDE_ENCODING
+void mrb_enc_copy(mrb_state *mrb, mrb_value dst, mrb_value src);
+
+mrb_value mrb_enc_reg_new(const char*, long, mrb_encoding*, int);
+//PRINTF_ARGS(mrb_value rb_enc_sprintf(mrb_encoding *, const char*, ...), 2, 3);
+mrb_value mrb_enc_vsprintf(mrb_encoding *, const char*, va_list);
+long mrb_enc_strlen(const char*, const char*, mrb_encoding*);
+char* mrb_enc_nth(mrb_state *, const char*, const char*, long, mrb_encoding*);
+mrb_value mrb_obj_encoding(mrb_state *, mrb_value);
+mrb_value mrb_enc_str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, long len, mrb_encoding *enc);
+mrb_value rb_enc_uint_chr(mrb_state *mrb, unsigned int code, mrb_encoding *enc);
+
+mrb_value mrb_external_str_new_with_enc(mrb_state *mrb, const char *ptr, long len, mrb_encoding *);
+mrb_value mrb_str_export_to_enc(mrb_value, mrb_encoding *);
+
+/* index -> mrb_encoding */
+mrb_encoding* mrb_enc_from_index(mrb_state *mrb, int idx);
+
+/* name -> mrb_encoding */
+mrb_encoding * mrb_enc_find(mrb_state *mrb, const char *name);
+
+/* mrb_encoding * -> name */
+#define mrb_enc_name(enc) (enc)->name
+
+/* mrb_encoding * -> minlen/maxlen */
+#define mrb_enc_mbminlen(enc) (enc)->min_enc_len
+#define mrb_enc_mbmaxlen(enc) (enc)->max_enc_len
+
+/* -> mbclen (no error notification: 0 < ret <= e-p, no exception) */
+int mrb_enc_mbclen(const char *p, const char *e, mrb_encoding *enc);
+
+/* -> mbclen (only for valid encoding) */
+int mrb_enc_fast_mbclen(const char *p, const char *e, mrb_encoding *enc);
+
+/* -> chlen, invalid or needmore */
+int mrb_enc_precise_mbclen(const char *p, const char *e, mrb_encoding *enc);
+#define MBCLEN_CHARFOUND_P(ret) ONIGENC_MBCLEN_CHARFOUND_P(ret)
+#define MBCLEN_CHARFOUND_LEN(ret) ONIGENC_MBCLEN_CHARFOUND_LEN(ret)
+#define MBCLEN_INVALID_P(ret) ONIGENC_MBCLEN_INVALID_P(ret)
+#define MBCLEN_NEEDMORE_P(ret) ONIGENC_MBCLEN_NEEDMORE_P(ret)
+#define MBCLEN_NEEDMORE_LEN(ret) ONIGENC_MBCLEN_NEEDMORE_LEN(ret)
+
+/* -> 0x00..0x7f, -1 */
+int mrb_enc_ascget(mrb_state *mrb, const char *p, const char *e, int *len, mrb_encoding *enc);
+
+
+/* -> code (and len) or raise exception */
+unsigned int mrb_enc_codepoint_len(mrb_state *mrb, const char *p, const char *e, int *len, mrb_encoding *enc);
+
+/* prototype for obsolete function */
+unsigned int mrb_enc_codepoint(mrb_state *mrb, const char *p, const char *e, mrb_encoding *enc);
+/* overriding macro */
+#define mrb_enc_codepoint(mrb,p,e,enc) mrb_enc_codepoint_len((mrb),(p),(e),0,(enc))
+#define mrb_enc_mbc_to_codepoint(p, e, enc) ONIGENC_MBC_TO_CODE(enc,(UChar*)(p),(UChar*)(e))
+
+/* -> codelen>0 or raise exception */
+#ifdef INCLUDE_ENCODING
+int mrb_enc_codelen(mrb_state *mrb, int code, mrb_encoding *enc);
+#else
+#define mrb_enc_codelen(mrb,code,enc) 1
+#endif //INCLUDE_ENCODING
+
+/* code,ptr,encoding -> write buf */
+#ifdef INCLUDE_ENCODING
+#define mrb_enc_mbcput(c,buf,enc) ONIGENC_CODE_TO_MBC(enc,c,(UChar*)(buf))
+#else
+#define mrb_enc_mbcput(c,buf,enc) *(buf) = (char)(c)
+#endif //INCLUDE_ENCODING
+
+/* start, ptr, end, encoding -> prev_char */
+#define mrb_enc_prev_char(s,p,e,enc) (char *)onigenc_get_prev_char_head(enc,(UChar*)(s),(UChar*)(p),(UChar*)(e))
+/* start, ptr, end, encoding -> next_char */
+#define mrb_enc_left_char_head(s,p,e,enc) (char *)onigenc_get_left_adjust_char_head(enc,(UChar*)(s),(UChar*)(p),(UChar*)(e))
+#define mrb_enc_right_char_head(s,p,e,enc) (char *)onigenc_get_right_adjust_char_head(enc,(UChar*)(s),(UChar*)(p),(UChar*)(e))
+
+/* ptr, ptr, encoding -> newline_or_not */
+#define mrb_enc_is_newline(p,end,enc) ONIGENC_IS_MBC_NEWLINE(enc,(UChar*)(p),(UChar*)(end))
+
+#define mrb_enc_isctype(c,t,enc) ONIGENC_IS_CODE_CTYPE(enc,c,t)
+#define mrb_enc_isascii(c,enc) ONIGENC_IS_CODE_ASCII(c)
+#define mrb_enc_isalpha(c,enc) ONIGENC_IS_CODE_ALPHA(enc,c)
+#define mrb_enc_islower(c,enc) ONIGENC_IS_CODE_LOWER(enc,c)
+#define mrb_enc_isupper(c,enc) ONIGENC_IS_CODE_UPPER(enc,c)
+#define mrb_enc_ispunct(c,enc) ONIGENC_IS_CODE_PUNCT(enc,c)
+#define mrb_enc_isalnum(c,enc) ONIGENC_IS_CODE_ALNUM(enc,c)
+#define mrb_enc_isprint(c,enc) ONIGENC_IS_CODE_PRINT(enc,c)
+#define mrb_enc_isspace(c,enc) ONIGENC_IS_CODE_SPACE(enc,c)
+#define mrb_enc_isdigit(c,enc) ONIGENC_IS_CODE_DIGIT(enc,c)
+
+#define mrb_enc_asciicompat(mrb, enc) (mrb_enc_mbminlen(enc)==1 && !mrb_enc_dummy_p(enc))
+
+int mrb_enc_casefold(char *to, const char *p, const char *e, mrb_encoding *enc);
+int mrb_enc_toupper(int c, mrb_encoding *enc);
+int mrb_enc_tolower(int c, mrb_encoding *enc);
+//ID mrb_intern3(const char*, long, mrb_encoding*);
+//ID mrb_interned_id_p(const char *, long, mrb_encoding *);
+int mrb_enc_symname_p(const char*, mrb_encoding*);
+int mrb_enc_symname2_p(const char*, long, mrb_encoding*);
+int mrb_enc_str_coderange(mrb_state *mrb, mrb_value);
+long mrb_str_coderange_scan_restartable(const char*, const char*, mrb_encoding*, int*);
+int mrb_enc_str_asciionly_p(mrb_state *mrb, mrb_value);
+#define mrb_enc_str_asciicompat_p(mrb, str) mrb_enc_asciicompat(mrb, mrb_enc_get(mrb, str))
+mrb_value mrb_enc_from_encoding(mrb_state *mrb, mrb_encoding *enc);
+int mrb_enc_unicode_p(mrb_encoding *enc);
+mrb_encoding *mrb_ascii8bit_encoding(mrb_state *mrb);
+mrb_encoding *mrb_utf8_encoding(mrb_state *mrb);
+mrb_encoding *mrb_usascii_encoding(mrb_state *mrb);
+mrb_encoding *mrb_locale_encoding(mrb_state *mrb);
+mrb_encoding *mrb_filesystem_encoding(mrb_state *mrb);
+mrb_encoding *mrb_default_external_encoding(mrb_state *mrb);
+mrb_encoding *mrb_default_internal_encoding(mrb_state *mrb);
+int mrb_ascii8bit_encindex(void);
+int mrb_utf8_encindex(void);
+int mrb_usascii_encindex(void);
+int mrb_locale_encindex(mrb_state *mrb);
+int mrb_filesystem_encindex(void);
+mrb_value mrb_enc_default_external(mrb_state *mrb);
+mrb_value mrb_enc_default_internal(mrb_state *mrb);
+void mrb_enc_set_default_external(mrb_state *mrb, mrb_value encoding);
+void mrb_enc_set_default_internal(mrb_state *mrb, mrb_value encoding);
+mrb_value mrb_locale_charmap(mrb_state *mrb, mrb_value klass);
+#ifdef INCLUDE_ENCODING
+int mrb_memsearch(mrb_state *mrb, const void*,int,const void*,int,mrb_encoding*);
+#endif //INCLUDE_ENCODING
+mrb_value mrb_usascii_str_new_cstr(mrb_state *mrb, const char *ptr);
+int mrb_str_buf_cat_escaped_char(mrb_state *mrb, mrb_value result, unsigned int c, int unicode_p);
+
+#define ENC_DUMMY_FLAG (1<<24)
+#define ENC_INDEX_MASK (~(~0U<<24))
+
+#define ENC_TO_ENCINDEX(enc) (int)((enc)->ruby_encoding_index & ENC_INDEX_MASK)
+
+#define ENC_DUMMY_P(enc) ((enc)->ruby_encoding_index & ENC_DUMMY_FLAG)
+#define ENC_SET_DUMMY(enc) ((enc)->ruby_encoding_index |= ENC_DUMMY_FLAG)
+
+static inline int
+mrb_enc_dummy_p(mrb_encoding *enc)
+{
+ return ENC_DUMMY_P(enc) != 0;
+}
+
+/* econv stuff */
+
+typedef enum {
+ econv_invalid_byte_sequence,
+ econv_undefined_conversion,
+ econv_destination_buffer_full,
+ econv_source_buffer_empty,
+ econv_finished,
+ econv_after_output,
+ econv_incomplete_input
+} mrb_econv_result_t;
+
+typedef struct mrb_econv_t mrb_econv_t;
+
+mrb_value mrb_str_encode(mrb_state *mrb, mrb_value str, mrb_value to, int ecflags, mrb_value ecopts);
+int mrb_econv_has_convpath_p(mrb_state *mrb, const char* from_encoding, const char* to_encoding);
+
+int mrb_econv_prepare_opts(mrb_state *mrb, mrb_value opthash, mrb_value *ecopts);
+
+mrb_econv_t *mrb_econv_open(mrb_state *mrb, const char *source_encoding, const char *destination_encoding, int ecflags);
+mrb_econv_t *mrb_econv_open_opts(mrb_state *mrb, const char *source_encoding, const char *destination_encoding, int ecflags, mrb_value ecopts);
+
+mrb_econv_result_t mrb_econv_convert(mrb_state *mrb, mrb_econv_t *ec,
+ const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end,
+ unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end,
+ int flags);
+void mrb_econv_close(mrb_econv_t *ec);
+
+/* result: 0:success -1:failure */
+int mrb_econv_set_replacement(mrb_state *mrb, mrb_econv_t *ec, const unsigned char *str, size_t len, const char *encname);
+
+/* result: 0:success -1:failure */
+int mrb_econv_decorate_at_first(mrb_state *mrb, mrb_econv_t *ec, const char *decorator_name);
+int mrb_econv_decorate_at_last(mrb_state *mrb, mrb_econv_t *ec, const char *decorator_name);
+
+mrb_value mrb_econv_open_exc(mrb_state *mrb, const char *senc, const char *denc, int ecflags);
+
+/* result: 0:success -1:failure */
+int mrb_econv_insert_output(mrb_state *mrb, mrb_econv_t *ec,
+ const unsigned char *str, size_t len, const char *str_encoding);
+
+/* encoding that mrb_econv_insert_output doesn't need conversion */
+const char *mrb_econv_encoding_to_insert_output(mrb_econv_t *ec);
+
+/* raise an error if the last mrb_econv_convert is error */
+void mrb_econv_check_error(mrb_state *mrb, mrb_econv_t *ec);
+
+/* returns an exception object or nil */
+mrb_value mrb_econv_make_exception(mrb_state *mrb, mrb_econv_t *ec);
+
+int mrb_econv_putbackable(mrb_econv_t *ec);
+void mrb_econv_putback(mrb_econv_t *ec, unsigned char *p, int n);
+
+/* returns the corresponding ASCII compatible encoding for encname,
+ * or NULL if encname is not ASCII incompatible encoding. */
+const char *mrb_econv_asciicompat_encoding(const char *encname);
+
+mrb_value mrb_econv_str_convert(mrb_state *mrb, mrb_econv_t *ec, mrb_value src, int flags);
+mrb_value mrb_econv_substr_convert(mrb_state *mrb, mrb_econv_t *ec, mrb_value src, long byteoff, long bytesize, int flags);
+mrb_value mrb_econv_str_append(mrb_state *mrb, mrb_econv_t *ec, mrb_value src, mrb_value dst, int flags);
+mrb_value mrb_econv_substr_append(mrb_state *mrb, mrb_econv_t *ec, mrb_value src, long byteoff, long bytesize, mrb_value dst, int flags);
+
+void mrb_econv_binmode(mrb_econv_t *ec);
+
+/* flags for mrb_econv_open */
+
+#define ECONV_ERROR_HANDLER_MASK 0x000000ff
+
+#define ECONV_INVALID_MASK 0x0000000f
+#define ECONV_INVALID_REPLACE 0x00000002
+
+#define ECONV_UNDEF_MASK 0x000000f0
+#define ECONV_UNDEF_REPLACE 0x00000020
+#define ECONV_UNDEF_HEX_CHARREF 0x00000030
+
+#define ECONV_DECORATOR_MASK 0x0000ff00
+
+#define ECONV_UNIVERSAL_NEWLINE_DECORATOR 0x00000100
+#define ECONV_CRLF_NEWLINE_DECORATOR 0x00001000
+#define ECONV_CR_NEWLINE_DECORATOR 0x00002000
+#define ECONV_XML_TEXT_DECORATOR 0x00004000
+#define ECONV_XML_ATTR_CONTENT_DECORATOR 0x00008000
+
+#define ECONV_STATEFUL_DECORATOR_MASK 0x00f00000
+#define ECONV_XML_ATTR_QUOTE_DECORATOR 0x00100000
+
+/* end of flags for mrb_econv_open */
+
+/* flags for mrb_econv_convert */
+#define ECONV_PARTIAL_INPUT 0x00010000
+#define ECONV_AFTER_OUTPUT 0x00020000
+/* end of flags for mrb_econv_convert */
+
+int mrb_isspace(int c);
+
+#if defined(__cplusplus)
+} /* extern "C" { */
+#endif
+
+#endif /* RUBY_ENCODING_H */
diff --git a/src/enum.c b/src/enum.c
new file mode 100644
index 000000000..c9bb43139
--- /dev/null
+++ b/src/enum.c
@@ -0,0 +1,1077 @@
+/**********************************************************************
+
+ enum.c -
+
+ $Author: yugui $
+ created at: Fri Oct 1 15:15:19 JST 1993
+
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
+
+**********************************************************************/
+
+#include "mruby.h"
+
+#if 0
+
+#include "mruby/struct.h"
+#include "mruby/array.h"
+
+static inline mrb_value
+mrb_call0(mrb_state *mrb, mrb_value recv, mrb_sym mid, int argc, const mrb_value *argv,
+ call_type scope, mrb_value self)
+{
+ return mrb_funcall(mrb, recv, mrb_sym2name(mrb, mid), argc, argv);
+}
+static inline mrb_value
+mrb_call(mrb_state *mrb, mrb_value recv, mrb_sym mid, int argc, const mrb_value *argv, call_type scope)
+{
+ return mrb_call0(mrb, recv, mid, argc, argv, scope, mrb_fixnum_value(0)/*Qundef*/);
+}
+
+mrb_value rb_mEnumerable;
+static mrb_sym id_each, id_eqq, id_cmp, id_next, id_size;
+
+struct iter_method_arg {
+ mrb_value obj;
+ mrb_sym mid;
+ int argc;
+ mrb_value *argv;
+};
+
+static mrb_value
+iterate_method(mrb_state *mrb, void *obj)
+{
+ const struct iter_method_arg * arg =
+ (struct iter_method_arg *) obj;
+
+ return mrb_call(mrb, arg->obj, arg->mid, arg->argc, arg->argv, CALL_FCALL);
+}
+
+#ifndef ANYARGS
+# ifdef __cplusplus
+# define ANYARGS ...
+# else
+# define ANYARGS
+# endif
+#endif
+
+mrb_value
+mrb_iterate(mrb_state *mrb,
+ mrb_value (* it_proc) (mrb_state *, void*), void *data1,
+ mrb_value (* bl_proc) (ANYARGS),
+ void *data2)
+{
+ mrb_value retval = mrb_nil_value();
+ retval = (*bl_proc) (data2);
+ retval = (*it_proc) (mrb, data1);
+ return retval;
+}
+
+mrb_value
+mrb_block_call(mrb_state *mrb, mrb_value obj, mrb_sym mid, int argc, mrb_value * argv,
+ mrb_value (*bl_proc) (ANYARGS),
+ void *data2)
+{
+ struct iter_method_arg arg;
+
+ arg.obj = obj;
+ arg.mid = mid;
+ arg.argc = argc;
+ arg.argv = argv;
+ return mrb_iterate(mrb, iterate_method, &arg, bl_proc, data2);
+}
+
+static mrb_value
+enum_values_pack(mrb_state *mrb, int argc, mrb_value *argv)
+{
+ if (argc == 0) return mrb_nil_value();
+ if (argc == 1) return argv[0];
+ return mrb_ary_new4(mrb, argc, argv);
+}
+
+#define ENUM_WANT_SVALUE(mrb) do { \
+ i = enum_values_pack(mrb, argc, argv); \
+} while (0)
+
+#define enum_yield mrb_yield_values2
+mrb_value
+mrb_yield_values2(int argc, const mrb_value *argv)
+{
+ //return mrb_yield_0(argc, argv);
+ return mrb_nil_value(); /* dummy */
+}
+
+static mrb_value
+grep_i(mrb_state *mrb, mrb_value i, mrb_value args, int argc, mrb_value *argv)
+{
+ mrb_value *arg = &args;
+ ENUM_WANT_SVALUE(mrb);
+
+ if (RTEST(mrb_funcall(mrb, arg[0], "===", 1, i))) {
+ mrb_ary_push(mrb, arg[1], i);
+ }
+ return mrb_nil_value();
+}
+
+static mrb_value
+grep_iter_i(mrb_state *mrb, mrb_value i, mrb_value args, int argc, mrb_value *argv)
+{
+ mrb_value *arg = &args;
+ ENUM_WANT_SVALUE(mrb);
+
+ if (RTEST(mrb_funcall(mrb, arg[0], "===", 1, i))) {
+ mrb_ary_push(mrb, arg[1], mrb_yield(i));
+ }
+ return mrb_nil_value();
+}
+
+/* 15.3.2.2.9 */
+/*
+ * call-seq:
+ * enum.grep(pattern) -> array
+ * enum.grep(pattern) {| obj | block } -> array
+ *
+ * Returns an array of every element in <i>enum</i> for which
+ * <code>Pattern === element</code>. If the optional <em>block</em> is
+ * supplied, each matching element is passed to it, and the block's
+ * result is stored in the output array.
+ *
+ * (1..100).grep 38..44 #=> [38, 39, 40, 41, 42, 43, 44]
+ * c = IO.constants
+ * c.grep(/SEEK/) #=> [:SEEK_SET, :SEEK_CUR, :SEEK_END]
+ * res = c.grep(/SEEK/) {|v| IO.const_get(v) }
+ * res #=> [0, 1, 2]
+ *
+ */
+
+static mrb_value
+enum_grep(mrb_state *mrb, mrb_value obj)
+{
+ mrb_value ary = mrb_ary_new(mrb);
+ mrb_value arg[2];
+ mrb_value pat;
+
+ mrb_get_args(mrb, "o", &pat);
+
+ arg[0] = pat;
+ arg[1] = ary;
+
+ mrb_block_call(mrb, obj, id_each, 0, 0, mrb_block_given_p() ? grep_iter_i : grep_i, arg);
+
+ return ary;
+}
+
+/*
+ * call-seq:
+ * enum.count -> int
+ * enum.count(item) -> int
+ * enum.count {| obj | block } -> int
+ *
+ * Returns the number of items in <i>enum</i>, where #size is called
+ * if it responds to it, otherwise the items are counted through
+ * enumeration. If an argument is given, counts the number of items
+ * in <i>enum</i>, for which equals to <i>item</i>. If a block is
+ * given, counts the number of elements yielding a true value.
+ *
+ * ary = [1, 2, 4, 2]
+ * ary.count #=> 4
+ * ary.count(2) #=> 2
+ * ary.count{|x|x%2==0} #=> 3
+ *
+ */
+
+void
+mrb_iter_break(void)
+{
+ //vm_iter_break(GET_THREAD()); /* dummy */
+}
+
+static mrb_value
+find_i(mrb_state *mrb, mrb_value i, mrb_value *memo, int argc, mrb_value *argv)
+{
+ ENUM_WANT_SVALUE(mrb);
+
+ if (RTEST(mrb_yield(i))) {
+ *memo = i;
+ mrb_iter_break();
+ }
+ return mrb_nil_value();
+}
+
+/* 15.3.2.2.4 */
+/* 15.3.2.2.7 */
+/*
+ * call-seq:
+ * enum.detect(ifnone = nil) {| obj | block } -> obj or nil
+ * enum.find(ifnone = nil) {| obj | block } -> obj or nil
+ * enum.detect(ifnone = nil) -> an_enumerator
+ * enum.find(ifnone = nil) -> an_enumerator
+ *
+ * Passes each entry in <i>enum</i> to <em>block</em>. Returns the
+ * first for which <em>block</em> is not false. If no
+ * object matches, calls <i>ifnone</i> and returns its result when it
+ * is specified, or returns <code>nil</code> otherwise.
+ *
+ * If no block is given, an enumerator is returned instead.
+ *
+ * (1..10).detect {|i| i % 5 == 0 and i % 7 == 0 } #=> nil
+ * (1..100).detect {|i| i % 5 == 0 and i % 7 == 0 } #=> 35
+ *
+ */
+
+static mrb_value
+enum_find(mrb_state *mrb, int argc, mrb_value *argv, mrb_value obj)
+{
+ mrb_value memo;
+ mrb_value if_none;
+
+ memo.tt = MRB_TT_FREE;
+ //mrb_scan_args(argc, argv, "01", &if_none);
+ if_none = argv[0];
+ //RETURN_ENUMERATOR(obj, argc, argv);
+ mrb_block_call(mrb, obj, id_each, 0, 0, find_i, &memo);
+ if (memo.tt != MRB_TT_FREE) {
+ return memo;
+ }
+ if (!mrb_nil_p(if_none)) {
+ return mrb_funcall(mrb, if_none, "call", 0, 0);
+ }
+ return mrb_nil_value();
+}
+
+static mrb_value
+enum_find_m(mrb_state *mrb, mrb_value self)
+{
+ mrb_value *argv;
+ int argc;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+ return enum_find(mrb, argc, argv, self);
+}
+
+/*
+ * call-seq:
+ * enum.find_index(value) -> int or nil
+ * enum.find_index {| obj | block } -> int or nil
+ * enum.find_index -> an_enumerator
+ *
+ * Compares each entry in <i>enum</i> with <em>value</em> or passes
+ * to <em>block</em>. Returns the index for the first for which the
+ * evaluated value is non-false. If no object matches, returns
+ * <code>nil</code>
+ *
+ * If neither block nor argument is given, an enumerator is returned instead.
+ *
+ * (1..10).find_index {|i| i % 5 == 0 and i % 7 == 0 } #=> nil
+ * (1..100).find_index {|i| i % 5 == 0 and i % 7 == 0 } #=> 34
+ * (1..100).find_index(50) #=> 49
+ *
+ */
+
+static mrb_value
+find_all_i(mrb_state *mrb, mrb_value i, mrb_value ary, int argc, mrb_value *argv)
+{
+ ENUM_WANT_SVALUE(mrb);
+
+ if (RTEST(mrb_yield(i))) {
+ mrb_ary_push(mrb, ary, i);
+ }
+ return mrb_nil_value();
+}
+
+/* 15.3.2.2.8 */
+/* 15.3.2.2.18 */
+/*
+ * call-seq:
+ * enum.find_all {| obj | block } -> array
+ * enum.select {| obj | block } -> array
+ * enum.find_all -> an_enumerator
+ * enum.select -> an_enumerator
+ *
+ * Returns an array containing all elements of <i>enum</i> for which
+ * <em>block</em> is not <code>false</code> (see also
+ * <code>Enumerable#reject</code>).
+ *
+ * If no block is given, an enumerator is returned instead.
+ *
+ *
+ * (1..10).find_all {|i| i % 3 == 0 } #=> [3, 6, 9]
+ *
+ */
+
+static mrb_value
+enum_find_all(mrb_state *mrb, mrb_value obj)
+{
+ mrb_value ary;
+
+ //RETURN_ENUMERATOR(obj, 0, 0);
+
+ ary = mrb_ary_new(mrb);
+ mrb_block_call(mrb, obj, id_each, 0, 0, find_all_i, &ary);
+
+ return ary;
+}
+
+static mrb_value
+reject_i(mrb_state *mrb, mrb_value i, mrb_value ary, int argc, mrb_value *argv)
+{
+ ENUM_WANT_SVALUE(mrb);
+
+ if (!RTEST(mrb_yield(i))) {
+ mrb_ary_push(mrb, ary, i);
+ }
+ return mrb_nil_value();
+}
+
+/* 15.3.2.2.17 */
+/*
+ * call-seq:
+ * enum.reject {| obj | block } -> array
+ * enum.reject -> an_enumerator
+ *
+ * Returns an array for all elements of <i>enum</i> for which
+ * <em>block</em> is false (see also <code>Enumerable#find_all</code>).
+ *
+ * If no block is given, an enumerator is returned instead.
+ *
+ * (1..10).reject {|i| i % 3 == 0 } #=> [1, 2, 4, 5, 7, 8, 10]
+ *
+ */
+
+static mrb_value
+enum_reject(mrb_state *mrb, mrb_value obj)
+{
+ mrb_value ary;
+
+ //RETURN_ENUMERATOR(obj, 0, 0);
+
+ ary = mrb_ary_new(mrb);
+ mrb_block_call(mrb, obj, id_each, 0, 0, reject_i, &ary);
+
+ return ary;
+}
+
+static mrb_value
+collect_i(mrb_state *mrb, mrb_value i, mrb_value ary, int argc, mrb_value *argv)
+{
+ mrb_ary_push(mrb, ary, enum_yield(argc, argv));
+
+ return mrb_nil_value();
+}
+
+static mrb_value
+collect_all(mrb_state *mrb, mrb_value i, mrb_value ary, int argc, mrb_value *argv)
+{
+ //mrb_thread_check_ints(); /* dummy */
+ mrb_ary_push(mrb, ary, enum_values_pack(mrb, argc, argv));
+
+ return mrb_nil_value();
+}
+
+/* 15.3.2.2.3 */
+/* 15.3.2.2.12 */
+/*
+ * call-seq:
+ * enum.collect {| obj | block } -> array
+ * enum.map {| obj | block } -> array
+ * enum.collect -> an_enumerator
+ * enum.map -> an_enumerator
+ *
+ * Returns a new array with the results of running <em>block</em> once
+ * for every element in <i>enum</i>.
+ *
+ * If no block is given, an enumerator is returned instead.
+ *
+ * (1..4).collect {|i| i*i } #=> [1, 4, 9, 16]
+ * (1..4).collect { "cat" } #=> ["cat", "cat", "cat", "cat"]
+ *
+ */
+
+static mrb_value
+enum_collect(mrb_state *mrb, mrb_value obj)
+{
+ mrb_value ary;
+
+ //RETURN_ENUMERATOR(obj, 0, 0);
+
+ ary = mrb_ary_new(mrb);
+ mrb_block_call(mrb, obj, id_each, 0, 0, collect_i, &ary);
+
+ return ary;
+}
+
+/* 15.3.2.2.6 */
+/* 15.3.2.2.20 */
+/*
+ * call-seq:
+ * enum.to_a -> array
+ * enum.entries -> array
+ *
+ * Returns an array containing the items in <i>enum</i>.
+ *
+ * (1..7).to_a #=> [1, 2, 3, 4, 5, 6, 7]
+ * { 'a'=>1, 'b'=>2, 'c'=>3 }.to_a #=> [["a", 1], ["b", 2], ["c", 3]]
+ */
+static mrb_value
+enum_to_a(mrb_state *mrb, int argc, mrb_value *argv, mrb_value obj)
+{
+ mrb_value ary = mrb_ary_new(mrb);
+
+ mrb_block_call(mrb, obj, id_each, argc, argv, collect_all, &ary);
+ //OBJ_INFECT(ary, obj);
+
+ return ary;
+}
+
+static mrb_value
+enum_to_a_m(mrb_state *mrb, mrb_value self)
+{
+ mrb_value *argv;
+ int argc;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+ return enum_to_a(mrb, argc, argv, self);
+}
+
+static mrb_value
+inject_i(mrb_state *mrb, mrb_value i, mrb_value p, int argc, mrb_value *argv)
+{
+ mrb_value *memo = &p;
+
+ ENUM_WANT_SVALUE(mrb);
+
+ if (memo[0].tt == MRB_TT_FREE) {
+ memo[0] = i;
+ }
+ else {
+ memo[0] = mrb_yield_values(2, memo[0], i);
+ }
+ return mrb_nil_value();
+}
+
+static mrb_value
+inject_op_i(mrb_state *mrb, mrb_value i, mrb_value p, int argc, mrb_value *argv)
+{
+ mrb_value *memo = &p;
+
+ ENUM_WANT_SVALUE(mrb);
+
+ if (memo[0].tt == MRB_TT_FREE) {
+ memo[0] = i;
+ }
+ else {
+ memo[0] = mrb_funcall(mrb, memo[0], mrb_sym2name(mrb, SYM2ID(memo[1])), 1, i);
+ }
+ return mrb_nil_value();
+}
+
+/* 15.3.2.2.11 */
+/*
+ * call-seq:
+ * enum.inject(initial, sym) -> obj
+ * enum.inject(sym) -> obj
+ * enum.inject(initial) {| memo, obj | block } -> obj
+ * enum.inject {| memo, obj | block } -> obj
+ *
+ * enum.reduce(initial, sym) -> obj
+ * enum.reduce(sym) -> obj
+ * enum.reduce(initial) {| memo, obj | block } -> obj
+ * enum.reduce {| memo, obj | block } -> obj
+ *
+ * Combines all elements of <i>enum</i> by applying a binary
+ * operation, specified by a block or a symbol that names a
+ * method or operator.
+ *
+ * If you specify a block, then for each element in <i>enum</i>
+ * the block is passed an accumulator value (<i>memo</i>) and the element.
+ * If you specify a symbol instead, then each element in the collection
+ * will be passed to the named method of <i>memo</i>.
+ * In either case, the result becomes the new value for <i>memo</i>.
+ * At the end of the iteration, the final value of <i>memo</i> is the
+ * return value fo the method.
+ *
+ * If you do not explicitly specify an <i>initial</i> value for <i>memo</i>,
+ * then uses the first element of collection is used as the initial value
+ * of <i>memo</i>.
+ *
+ * Examples:
+ *
+ * # Sum some numbers
+ * (5..10).reduce(:+) #=> 45
+ * # Same using a block and inject
+ * (5..10).inject {|sum, n| sum + n } #=> 45
+ * # Multiply some numbers
+ * (5..10).reduce(1, :*) #=> 151200
+ * # Same using a block
+ * (5..10).inject(1) {|product, n| product * n } #=> 151200
+ * # find the longest word
+ * longest = %w{ cat sheep bear }.inject do |memo,word|
+ * memo.length > word.length ? memo : word
+ * end
+ * longest #=> "sheep"
+ *
+ */
+static mrb_value
+enum_inject(mrb_state *mrb, int argc, mrb_value *argv, mrb_value obj)
+{
+ mrb_value memo[2];
+ mrb_value (*iter)(mrb_state *mrb, mrb_value, mrb_value, int, mrb_value*) = inject_i;
+
+ //switch (mrb_scan_args(argc, argv, "02", &memo[0], &memo[1])) {
+ switch (argc) {
+ case 0:
+ memo[0].tt = MRB_TT_FREE;
+ break;
+ case 1:
+ if (mrb_block_given_p()) {
+ break;
+ }
+ memo[1] = mrb_symbol_value(mrb_to_id(mrb, argv[0]));
+ memo[0].tt = MRB_TT_FREE;
+ iter = inject_op_i;
+ break;
+ case 2:
+ if (mrb_block_given_p()) {
+ mrb_warning("given block not used");
+ }
+ memo[0] = argv[0];
+ memo[1] = mrb_symbol_value(mrb_to_id(mrb, argv[1]));
+ iter = inject_op_i;
+ break;
+ }
+ mrb_block_call(mrb, obj, id_each, 0, 0, iter, memo);
+ if (memo[0].tt == MRB_TT_FREE) return mrb_nil_value();
+ return memo[0];
+}
+
+static mrb_value
+enum_inject_m(mrb_state *mrb, mrb_value self)
+{
+ mrb_value *argv;
+ int argc;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+ return enum_inject(mrb, argc, argv, self);
+}
+
+static mrb_value
+partition_i(mrb_state *mrb, mrb_value i, mrb_value *ary, int argc, mrb_value *argv)
+{
+ ENUM_WANT_SVALUE(mrb);
+
+ if (RTEST(mrb_yield(i))) {
+ mrb_ary_push(mrb, ary[0], i);
+ }
+ else {
+ mrb_ary_push(mrb, ary[1], i);
+ }
+ return mrb_nil_value();
+}
+
+/* 15.3.2.2.16 */
+/*
+ * call-seq:
+ * enum.partition {| obj | block } -> [ true_array, false_array ]
+ * enum.partition -> an_enumerator
+ *
+ * Returns two arrays, the first containing the elements of
+ * <i>enum</i> for which the block evaluates to true, the second
+ * containing the rest.
+ *
+ * If no block is given, an enumerator is returned instead.
+ *
+ * (1..6).partition {|i| (i&1).zero?} #=> [[2, 4, 6], [1, 3, 5]]
+ *
+ */
+
+static mrb_value
+enum_partition(mrb_state *mrb, mrb_value obj)
+{
+ mrb_value ary[2];
+
+ //RETURN_ENUMERATOR(obj, 0, 0);
+
+ ary[0] = mrb_ary_new(mrb);
+ ary[1] = mrb_ary_new(mrb);
+ mrb_block_call(mrb, obj, id_each, 0, 0, partition_i, ary);
+
+ return mrb_assoc_new(mrb, ary[0], ary[1]);
+}
+
+/* 15.3.2.2.19 */
+/*
+ * call-seq:
+ * enum.sort -> array
+ * enum.sort {| a, b | block } -> array
+ *
+ * Returns an array containing the items in <i>enum</i> sorted,
+ * either according to their own <code><=></code> method, or by using
+ * the results of the supplied block. The block should return -1, 0, or
+ * +1 depending on the comparison between <i>a</i> and <i>b</i>. As of
+ * Ruby 1.8, the method <code>Enumerable#sort_by</code> implements a
+ * built-in Schwartzian Transform, useful when key computation or
+ * comparison is expensive.
+ *
+ * %w(rhea kea flea).sort #=> ["flea", "kea", "rhea"]
+ * (1..10).sort {|a,b| b <=> a} #=> [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]
+ */
+
+static mrb_value
+enum_sort(mrb_state *mrb, mrb_value obj)
+{
+ return mrb_ary_sort(mrb, enum_to_a(mrb, 0, 0, obj));
+}
+
+/*
+ * call-seq:
+ * enum.sort_by {| obj | block } -> array
+ * enum.sort_by -> an_enumerator
+ *
+ * Sorts <i>enum</i> using a set of keys generated by mapping the
+ * values in <i>enum</i> through the given block.
+ *
+ * If no block is given, an enumerator is returned instead.
+ *
+ * %w{ apple pear fig }.sort_by {|word| word.length}
+ * #=> ["fig", "pear", "apple"]
+ *
+ * The current implementation of <code>sort_by</code> generates an
+ * array of tuples containing the original collection element and the
+ * mapped value. This makes <code>sort_by</code> fairly expensive when
+ * the keysets are simple
+ *
+ * require 'benchmark'
+ *
+ * a = (1..100000).map {rand(100000)}
+ *
+ * Benchmark.bm(10) do |b|
+ * b.report("Sort") { a.sort }
+ * b.report("Sort by") { a.sort_by {|a| a} }
+ * end
+ *
+ * <em>produces:</em>
+ *
+ * user system total real
+ * Sort 0.180000 0.000000 0.180000 ( 0.175469)
+ * Sort by 1.980000 0.040000 2.020000 ( 2.013586)
+ *
+ * However, consider the case where comparing the keys is a non-trivial
+ * operation. The following code sorts some files on modification time
+ * using the basic <code>sort</code> method.
+ *
+ * files = Dir["*"]
+ * sorted = files.sort {|a,b| File.new(a).mtime <=> File.new(b).mtime}
+ * sorted #=> ["mon", "tues", "wed", "thurs"]
+ *
+ * This sort is inefficient: it generates two new <code>File</code>
+ * objects during every comparison. A slightly better technique is to
+ * use the <code>Kernel#test</code> method to generate the modification
+ * times directly.
+ *
+ * files = Dir["*"]
+ * sorted = files.sort { |a,b|
+ * test(?M, a) <=> test(?M, b)
+ * }
+ * sorted #=> ["mon", "tues", "wed", "thurs"]
+ *
+ * This still generates many unnecessary <code>Time</code> objects. A
+ * more efficient technique is to cache the sort keys (modification
+ * times in this case) before the sort. Perl users often call this
+ * approach a Schwartzian Transform, after Randal Schwartz. We
+ * construct a temporary array, where each element is an array
+ * containing our sort key along with the filename. We sort this array,
+ * and then extract the filename from the result.
+ *
+ * sorted = Dir["*"].collect { |f|
+ * [test(?M, f), f]
+ * }.sort.collect { |f| f[1] }
+ * sorted #=> ["mon", "tues", "wed", "thurs"]
+ *
+ * This is exactly what <code>sort_by</code> does internally.
+ *
+ * sorted = Dir["*"].sort_by {|f| test(?M, f)}
+ * sorted #=> ["mon", "tues", "wed", "thurs"]
+ */
+
+#define ENUMFUNC(name) mrb_block_given_p() ? name##_iter_i : name##_i
+
+#define DEFINE_ENUMFUNCS(mrb, name) \
+static mrb_value enum_##name##_func(mrb_value result, mrb_value *memo); \
+\
+static mrb_value \
+name##_i(mrb_state *mrb, mrb_value i, mrb_value *memo, int argc, mrb_value *argv) \
+{ \
+ return enum_##name##_func(enum_values_pack(mrb, argc, argv), memo); \
+} \
+\
+static mrb_value \
+name##_iter_i(mrb_state *mrb,mrb_value i, mrb_value *memo, int argc, mrb_value *argv) \
+{ \
+ return enum_##name##_func(enum_yield(argc, argv), memo); \
+} \
+\
+static mrb_value \
+enum_##name##_func(mrb_value result, mrb_value *memo)
+
+DEFINE_ENUMFUNCS(mrb, all)
+{
+ if (!RTEST(result)) {
+ *memo = mrb_false_value();
+ mrb_iter_break();
+ }
+ return mrb_nil_value();
+}
+
+/* 15.3.2.2.1 */
+/*
+ * call-seq:
+ * enum.all? [{|obj| block } ] -> true or false
+ *
+ * Passes each element of the collection to the given block. The method
+ * returns <code>true</code> if the block never returns
+ * <code>false</code> or <code>nil</code>. If the block is not given,
+ * Ruby adds an implicit block of <code>{|obj| obj}</code> (that is
+ * <code>all?</code> will return <code>true</code> only if none of the
+ * collection members are <code>false</code> or <code>nil</code>.)
+ *
+ * %w{ant bear cat}.all? {|word| word.length >= 3} #=> true
+ * %w{ant bear cat}.all? {|word| word.length >= 4} #=> false
+ * [ nil, true, 99 ].all? #=> false
+ *
+ */
+
+static mrb_value
+enum_all(mrb_state *mrb, mrb_value obj)
+{
+ mrb_value result = mrb_true_value();
+
+ mrb_block_call(mrb, obj, id_each, 0, 0, ENUMFUNC(all), &result);
+ return result;
+}
+
+DEFINE_ENUMFUNCS(mrb, any)
+{
+ if (RTEST(result)) {
+ *memo = mrb_true_value();
+ mrb_iter_break();
+ }
+ return mrb_nil_value();
+}
+
+/* 15.3.2.2.2 */
+/*
+ * call-seq:
+ * enum.any? [{|obj| block } ] -> true or false
+ *
+ * Passes each element of the collection to the given block. The method
+ * returns <code>true</code> if the block ever returns a value other
+ * than <code>false</code> or <code>nil</code>. If the block is not
+ * given, Ruby adds an implicit block of <code>{|obj| obj}</code> (that
+ * is <code>any?</code> will return <code>true</code> if at least one
+ * of the collection members is not <code>false</code> or
+ * <code>nil</code>.
+ *
+ * %w{ant bear cat}.any? {|word| word.length >= 3} #=> true
+ * %w{ant bear cat}.any? {|word| word.length >= 4} #=> true
+ * [ nil, true, 99 ].any? #=> true
+ *
+ */
+
+static mrb_value
+enum_any(mrb_state *mrb, mrb_value obj)
+{
+ mrb_value result = mrb_false_value();
+
+ mrb_block_call(mrb, obj, id_each, 0, 0, ENUMFUNC(any), &result);
+ return result;
+}
+
+static mrb_value
+min_i(mrb_state *mrb, mrb_value i, mrb_value *memo, int argc, mrb_value *argv)
+{
+ mrb_value cmp;
+
+ ENUM_WANT_SVALUE(mrb);
+
+ if (memo->tt == MRB_TT_FREE) {
+ *memo = i;
+ }
+ else {
+ cmp = mrb_funcall(mrb, i, "<=>", 1, *memo);
+ if (mrb_cmpint(mrb, cmp, i, *memo) < 0) {
+ *memo = i;
+ }
+ }
+ return mrb_nil_value();
+}
+
+static mrb_value
+min_ii(mrb_state *mrb, mrb_value i, mrb_value *memo, int argc, mrb_value *argv)
+{
+ mrb_value cmp;
+
+ ENUM_WANT_SVALUE(mrb);
+
+ if (memo->tt == MRB_TT_FREE) {
+ *memo = i;
+ }
+ else {
+ cmp = mrb_yield_values(2, i, *memo);
+ if (mrb_cmpint(mrb, cmp, i, *memo) < 0) {
+ *memo = i;
+ }
+ }
+ return mrb_nil_value();
+}
+
+/* 15.3.2.2.14 */
+/*
+ * call-seq:
+ * enum.min -> obj
+ * enum.min {| a,b | block } -> obj
+ *
+ * Returns the object in <i>enum</i> with the minimum value. The
+ * first form assumes all objects implement <code>Comparable</code>;
+ * the second uses the block to return <em>a <=> b</em>.
+ *
+ * a = %w(albatross dog horse)
+ * a.min #=> "albatross"
+ * a.min {|a,b| a.length <=> b.length } #=> "dog"
+ */
+
+static mrb_value
+enum_min(mrb_state *mrb, mrb_value obj)
+{
+ mrb_value result;
+ result.tt = MRB_TT_FREE;
+
+ if (mrb_block_given_p()) {
+ mrb_block_call(mrb, obj, id_each, 0, 0, min_ii, &result);
+ }
+ else {
+ mrb_block_call(mrb, obj, id_each, 0, 0, min_i, &result);
+ }
+ if (result.tt == MRB_TT_FREE) return mrb_nil_value();
+ return result;
+}
+
+static mrb_value
+max_i(mrb_state *mrb, mrb_value i, mrb_value *memo, int argc, mrb_value *argv)
+{
+ mrb_value cmp;
+
+ ENUM_WANT_SVALUE(mrb);
+
+ if (memo->tt == MRB_TT_FREE) {
+ *memo = i;
+ }
+ else {
+ cmp = mrb_funcall(mrb, i, "<=>", 1, *memo);
+ if (mrb_cmpint(mrb, cmp, i, *memo) > 0) {
+ *memo = i;
+ }
+ }
+ return mrb_nil_value();
+}
+
+static mrb_value
+max_ii(mrb_state *mrb, mrb_value i, mrb_value *memo, int argc, mrb_value *argv)
+{
+ mrb_value cmp;
+
+ ENUM_WANT_SVALUE(mrb);
+
+ if (memo->tt == MRB_TT_FREE) {
+ *memo = i;
+ }
+ else {
+ cmp = mrb_yield_values(2, i, *memo);
+ if (mrb_cmpint(mrb, cmp, i, *memo) > 0) {
+ *memo = i;
+ }
+ }
+ return mrb_nil_value();
+}
+
+/* 15.3.2.2.13 */
+/*
+ * call-seq:
+ * enum.max -> obj
+ * enum.max {|a,b| block } -> obj
+ *
+ * Returns the object in _enum_ with the maximum value. The
+ * first form assumes all objects implement <code>Comparable</code>;
+ * the second uses the block to return <em>a <=> b</em>.
+ *
+ * a = %w(albatross dog horse)
+ * a.max #=> "horse"
+ * a.max {|a,b| a.length <=> b.length } #=> "albatross"
+ */
+
+static mrb_value
+enum_max(mrb_state *mrb, mrb_value obj)
+{
+ mrb_value result;
+ result.tt = MRB_TT_FREE;
+
+ if (mrb_block_given_p()) {
+ mrb_block_call(mrb, obj, id_each, 0, 0, max_ii, &result);
+ }
+ else {
+ mrb_block_call(mrb, obj, id_each, 0, 0, max_i, &result);
+ }
+ if (result.tt == MRB_TT_FREE) return mrb_nil_value();
+ return result;
+}
+
+static mrb_value
+member_i(mrb_state *mrb, mrb_value iter, mrb_value *memo, int argc, mrb_value *argv)
+{
+ if (mrb_equal(mrb, enum_values_pack(mrb, argc, argv), memo[0])) {
+ memo[1] = mrb_true_value();
+ mrb_iter_break();
+ }
+ return mrb_nil_value();
+}
+
+/* 15.3.2.2.10 */
+/* 15.3.2.2.15 */
+/*
+ * call-seq:
+ * enum.include?(obj) -> true or false
+ * enum.member?(obj) -> true or false
+ *
+ * Returns <code>true</code> if any member of <i>enum</i> equals
+ * <i>obj</i>. Equality is tested using <code>==</code>.
+ *
+ * IO.constants.include? :SEEK_SET #=> true
+ * IO.constants.include? :SEEK_NO_FURTHER #=> false
+ *
+ */
+
+static mrb_value
+enum_member(mrb_state *mrb, mrb_value obj)
+{
+ mrb_value memo[2];
+ mrb_value val;
+
+ mrb_get_args(mrb, "o", &val);
+
+ memo[0] = val;
+ memo[1] = mrb_false_value();
+ mrb_block_call(mrb, obj, id_each, 0, 0, member_i, memo);
+ return memo[1];
+}
+
+static mrb_value
+each_with_index_i(mrb_state *mrb, mrb_value i, long *memo, int argc, void *argv)
+{
+ long n = (*memo)++;
+
+ return mrb_yield_values(2, enum_values_pack(mrb, argc, argv), mrb_fixnum_value(n));
+}
+
+/* 15.3.2.2.5 */
+/*
+ * call-seq:
+ * enum.each_with_index(*args) {|obj, i| block } -> enum
+ * enum.each_with_index(*args) -> an_enumerator
+ *
+ * Calls <em>block</em> with two arguments, the item and its index,
+ * for each item in <i>enum</i>. Given arguments are passed through
+ * to #each().
+ *
+ * If no block is given, an enumerator is returned instead.
+ *
+ * hash = Hash.new
+ * %w(cat dog wombat).each_with_index {|item, index|
+ * hash[item] = index
+ * }
+ * hash #=> {"cat"=>0, "dog"=>1, "wombat"=>2}
+ *
+ */
+
+static mrb_value
+enum_each_with_index(mrb_state *mrb, int argc, mrb_value *argv, mrb_value obj)
+{
+ long memo;
+
+ //RETURN_ENUMERATOR(obj, argc, argv);
+
+ memo = 0;
+ mrb_block_call(mrb, obj, id_each, argc, argv, each_with_index_i, &memo);
+ return obj;
+}
+
+static mrb_value
+enum_each_with_index_m(mrb_state *mrb, mrb_value self)
+{
+ mrb_value *argv;
+ int argc;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+ return enum_each_with_index(mrb, argc, argv, self);
+}
+
+/*
+ * call-seq:
+ * enum.reverse_each(*args) {|item| block } -> enum
+ * enum.reverse_each(*args) -> an_enumerator
+ *
+ * Builds a temporary array and traverses that array in reverse order.
+ *
+ * If no block is given, an enumerator is returned instead.
+ *
+ */
+
+#endif
+
+/*
+ * The <code>Enumerable</code> mixin provides collection classes with
+ * several traversal and searching methods, and with the ability to
+ * sort. The class must provide a method <code>each</code>, which
+ * yields successive members of the collection. If
+ * <code>Enumerable#max</code>, <code>#min</code>, or
+ * <code>#sort</code> is used, the objects in the collection must also
+ * implement a meaningful <code><=></code> operator, as these methods
+ * rely on an ordering between members of the collection.
+ */
+
+void
+mrb_init_enumerable(mrb_state *mrb)
+{
+ struct RClass *cenum;
+//#undef mrb_intern
+//#define mrb_intern(str) mrb_intern_const(str)
+
+ cenum = mrb_define_module(mrb, "Enumerable");
+
+#if 0
+ //mrb_define_class_method(mrb, cenum, "all?", enum_all, ARGS_NONE()); /* 15.3.2.2.1 */
+ //mrb_define_class_method(mrb, cenum, "any?", enum_any, ARGS_NONE()); /* 15.3.2.2.2 */
+ //mrb_define_class_method(mrb, cenum, "collect", enum_collect, ARGS_NONE()); /* 15.3.2.2.3 */
+ //mrb_define_class_method(mrb, cenum, "detect", enum_find_m, ARGS_ANY()); /* 15.3.2.2.4 */
+ //mrb_define_class_method(mrb, cenum, "each_with_index", enum_each_with_index_m, ARGS_ANY()); /* 15.3.2.2.5 */
+ mrb_define_class_method(mrb, cenum, "entries", enum_to_a_m, ARGS_ANY()); /* 15.3.2.2.6 */
+ //mrb_define_class_method(mrb, cenum, "find", enum_find_m, ARGS_ANY()); /* 15.3.2.2.7 */
+ //mrb_define_class_method(mrb, cenum, "find_all", enum_find_all, ARGS_NONE()); /* 15.3.2.2.8 */
+ //mrb_define_class_method(mrb, cenum, "grep", enum_grep, ARGS_REQ(1)); /* 15.3.2.2.9 */
+ mrb_define_class_method(mrb, cenum, "include?", enum_member, ARGS_REQ(1)); /* 15.3.2.2.10 */
+ //mrb_define_class_method(mrb, cenum, "inject", enum_inject_m, ARGS_ANY()); /* 15.3.2.2.11 */
+ //mrb_define_class_method(mrb, cenum, "map", enum_collect, ARGS_NONE()); /* 15.3.2.2.12 */
+ //mrb_define_class_method(mrb, cenum, "max", enum_max, ARGS_NONE()); /* 15.3.2.2.13 */
+ //mrb_define_class_method(mrb, cenum, "min", enum_min, ARGS_NONE()); /* 15.3.2.2.14 */
+ mrb_define_class_method(mrb, cenum, "member?", enum_member, ARGS_REQ(1)); /* 15.3.2.2.15 */
+ //mrb_define_class_method(mrb, cenum, "partition", enum_partition, ARGS_NONE()); /* 15.3.2.2.16 */
+ //mrb_define_class_method(mrb, cenum, "reject", enum_reject, ARGS_NONE()); /* 15.3.2.2.17 */
+ //mrb_define_class_method(mrb, cenum, "select", enum_find_all, ARGS_NONE()); /* 15.3.2.2.18 */
+ //mrb_define_class_method(mrb, cenum, "sort", enum_sort, ARGS_NONE()); /* 15.3.2.2.19 */
+ mrb_define_class_method(mrb, cenum, "to_a", enum_to_a_m, ARGS_ANY()); /* 15.3.2.2.20 */
+ id_eqq = mrb_intern(mrb, "===");
+ id_each = mrb_intern(mrb, "each");
+ id_cmp = mrb_intern(mrb, "<=>");
+ id_next = mrb_intern(mrb, "next");
+ id_size = mrb_intern(mrb, "size");
+#endif
+}
+
diff --git a/src/error.c b/src/error.c
new file mode 100644
index 000000000..9dbfc972f
--- /dev/null
+++ b/src/error.c
@@ -0,0 +1,479 @@
+#include "mruby.h"
+#include <stdarg.h>
+#include <string.h>
+#include <stdio.h>
+#include <setjmp.h>
+#include "error.h"
+#include "opcode.h"
+#include "irep.h"
+#include "mruby/proc.h"
+#include "mruby/numeric.h"
+#include "variable.h"
+#include "mruby/string.h"
+#include "eval_intern.h"
+#include "mruby/class.h"
+
+#define warn_printf printf
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+mrb_value
+mrb_exc_new(mrb_state *mrb, struct RClass *c, const char *ptr, long len)
+{
+ return mrb_funcall(mrb, mrb_obj_value(c), "new", 1, mrb_str_new(mrb, ptr, len));
+}
+
+mrb_value
+mrb_exc_new3(mrb_state *mrb, struct RClass* c, mrb_value str)
+{
+ //StringValue(str);
+ mrb_string_value(mrb, &str);
+ return mrb_funcall(mrb, mrb_obj_value(c), "new", 1, str);
+}
+
+//mrb_value make_exception(mrb_state *mrb, int argc, mrb_value *argv, int isstr);
+/*
+ * call-seq:
+ * Exception.new(msg = nil) -> exception
+ *
+ * Construct a new Exception object, optionally passing in
+ * a message.
+ */
+
+static mrb_value
+exc_initialize(mrb_state *mrb, mrb_value exc)
+{
+ mrb_value mesg;
+
+ mrb_get_args(mrb, "o", &mesg);
+ mrb_iv_set(mrb, exc, mrb_intern(mrb, "mesg"), mesg);
+
+ return exc;
+}
+
+/*
+ * Document-method: exception
+ *
+ * call-seq:
+ * exc.exception(string) -> an_exception or exc
+ *
+ * With no argument, or if the argument is the same as the receiver,
+ * return the receiver. Otherwise, create a new
+ * exception object of the same class as the receiver, but with a
+ * message equal to <code>string.to_str</code>.
+ *
+ */
+
+static mrb_value
+exc_exception(mrb_state *mrb, mrb_value self)
+{
+ mrb_value exc;
+ mrb_value *argv;
+ int argc;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+
+ if (argc == 0) return self;
+ if (argc == 1 && mrb_obj_equal(mrb, self, argv[0])) return self;
+ exc = mrb_obj_clone(mrb, self);
+ exc_initialize(mrb, exc);
+
+ return exc;
+}
+
+/*
+ * call-seq:
+ * exception.to_s -> string
+ *
+ * Returns exception's message (or the name of the exception if
+ * no message is set).
+ */
+
+static mrb_value
+exc_to_s(mrb_state *mrb, mrb_value exc)
+{
+ mrb_value mesg = mrb_attr_get(mrb, exc, mrb_intern(mrb, "mesg"));
+
+ if (mrb_nil_p(mesg)) return mrb_str_new2(mrb, mrb_obj_classname(mrb, exc));
+ return mesg;
+}
+
+/*
+ * call-seq:
+ * exception.message -> string
+ *
+ * Returns the result of invoking <code>exception.to_s</code>.
+ * Normally this returns the exception's message or name. By
+ * supplying a to_str method, exceptions are agreeing to
+ * be used where Strings are expected.
+ */
+
+static mrb_value
+exc_message(mrb_state *mrb, mrb_value exc)
+{
+ return mrb_funcall(mrb, exc, "to_s", 0);
+}
+
+/*
+ * call-seq:
+ * exception.inspect -> string
+ *
+ * Return this exception's class name an message
+ */
+
+static mrb_value
+exc_inspect(mrb_state *mrb, mrb_value exc)
+{
+ mrb_value str, klass;
+
+ klass = mrb_str_new2(mrb, mrb_obj_classname(mrb, exc));
+ exc = mrb_obj_as_string(mrb, exc);
+ if (RSTRING_LEN(exc) == 0) {
+ return klass;
+ }
+
+ str = mrb_str_new2(mrb, "#<");
+ mrb_str_append(mrb, str, klass);
+ mrb_str_cat2(mrb, str, ": ");
+ mrb_str_append(mrb, str, exc);
+ mrb_str_cat2(mrb, str, ">");
+
+ return str;
+}
+
+
+static mrb_value
+exc_equal(mrb_state *mrb, mrb_value exc)
+{
+ mrb_value obj;
+ mrb_value mesg;
+ mrb_sym id_mesg = mrb_intern(mrb, "mesg");
+
+ mrb_get_args(mrb, "o", &obj);
+
+ if (mrb_obj_equal(mrb, exc, obj)) return mrb_true_value();
+
+ if (mrb_obj_class(mrb, exc) != mrb_obj_class(mrb, obj)) {
+ if ( mrb_respond_to(mrb, obj, mrb_intern(mrb, "message")) ) {
+ mesg = mrb_funcall(mrb, obj, "message", 0);
+ }
+ else
+ return mrb_false_value();
+ }
+ else {
+ mesg = mrb_attr_get(mrb, obj, id_mesg);
+ }
+
+ if (!mrb_equal(mrb, mrb_attr_get(mrb, exc, id_mesg), mesg))
+ return mrb_false_value();
+ return mrb_true_value();
+}
+
+void
+mrb_exc_raise(mrb_state *mrb, mrb_value exc)
+{
+ mrb->exc = mrb_object(exc);
+ longjmp(*(jmp_buf*)mrb->jmp, 1);
+}
+
+void
+mrb_raise_va(mrb_state *mrb, struct RClass *c, const char *fmt, va_list args)
+{
+ char buf[256];
+
+ vsnprintf(buf, 256, fmt, args);
+ mrb_exc_raise(mrb, mrb_exc_new(mrb, c, buf, strlen(buf)));
+}
+
+void
+mrb_raise(mrb_state *mrb, struct RClass *c, const char *fmt, ...)
+{
+ va_list args;
+ char buf[256];
+
+ va_start(args, fmt);
+ vsnprintf(buf, 256, fmt, args);
+ mrb_raise_va(mrb, c, fmt, args);
+ va_end(args);
+}
+
+void
+mrb_name_error(mrb_state *mrb, mrb_sym id, const char *fmt, ...)
+{
+ mrb_value exc, argv[2];
+ va_list args;
+ char buf[256];
+
+ va_start(args, fmt);
+ //argv[0] = mrb_vsprintf(fmt, args);
+ vsnprintf(buf, 256, fmt, args);
+ argv[0] = mrb_str_new(mrb, buf, strlen(buf));
+ va_end(args);
+
+ argv[1] = mrb_str_new_cstr(mrb, mrb_sym2name(mrb, id));
+ exc = mrb_class_new_instance(mrb, 2, argv, E_NAME_ERROR);
+ mrb_exc_raise(mrb, exc);
+}
+mrb_value
+mrb_sprintf(mrb_state *mrb, const char *fmt, ...)
+{
+ va_list args;
+ char buf[256];
+
+ va_start(args, fmt);
+ vsnprintf(buf, 256, fmt, args);
+ va_end(args);
+ return mrb_str_new(mrb, buf, strlen(buf));
+}
+
+void
+mrb_warn(const char *fmt, ...)
+{
+ va_list args;
+ char buf[256];
+
+ va_start(args, fmt);
+ snprintf(buf, 256, "warning: %s", fmt);
+ printf(buf, args);
+ va_end(args);
+}
+
+
+void
+mrb_warning(const char *fmt, ...)
+{
+ va_list args;
+ char buf[256];
+
+ va_start(args, fmt);
+ snprintf(buf, 256, "warning: %s", fmt);
+ printf(buf, args);
+ va_end(args);
+}
+
+void
+mrb_bug(const char *fmt, ...)
+{
+ va_list args;
+ char buf[256];
+
+ va_start(args, fmt);
+ snprintf(buf, 256, "bug: %s", fmt);
+ printf(buf, args);
+ va_end(args);
+}
+
+static const char *
+mrb_strerrno(int err)
+{
+#define defined_error(name, num) if (err == num) return name;
+#define undefined_error(name)
+//#include "known_errors.inc"
+#undef defined_error
+#undef undefined_error
+ return NULL;
+}
+
+void
+mrb_bug_errno(const char *mesg, int errno_arg)
+{
+ if (errno_arg == 0)
+ mrb_bug("%s: errno == 0 (NOERROR)", mesg);
+ else {
+ const char *errno_str = mrb_strerrno(errno_arg);
+ if (errno_str)
+ mrb_bug("%s: %s (%s)", mesg, strerror(errno_arg), errno_str);
+ else
+ mrb_bug("%s: %s (%d)", mesg, strerror(errno_arg), errno_arg);
+ }
+}
+
+int
+sysexit_status(mrb_state *mrb, mrb_value err)
+{
+ mrb_value st = mrb_iv_get(mrb, err, mrb_intern(mrb, "status"));
+ return mrb_fixnum(st);
+}
+
+void
+error_pos(void)
+{
+#if 0
+ const char *sourcefile = mrb_sourcefile();
+ int sourceline = mrb_sourceline();
+
+ if (sourcefile) {
+ if (sourceline == 0) {
+ warn_printf("%s", sourcefile);
+ }
+ else if (mrb_frame_callee()) {
+ warn_printf("%s:%d:in `%s'", sourcefile, sourceline,
+ mrb_sym2name(mrb, mrb_frame_callee()));
+ }
+ else {
+ warn_printf("%s:%d", sourcefile, sourceline);
+ }
+ }
+#endif
+}
+
+static void
+set_backtrace(mrb_state *mrb, mrb_value info, mrb_value bt)
+{
+ mrb_funcall(mrb, info, "set_backtrace", 1, bt);
+}
+
+mrb_value
+make_exception(mrb_state *mrb, int argc, mrb_value *argv, int isstr)
+{
+ mrb_value mesg;
+ int n;
+
+ mesg = mrb_nil_value();
+ switch (argc) {
+ case 0:
+ break;
+ case 1:
+ if (mrb_nil_p(argv[0]))
+ break;
+ if (isstr) {
+ mesg = mrb_check_string_type(mrb, argv[0]);
+ if (!mrb_nil_p(mesg)) {
+ mesg = mrb_exc_new3(mrb, mrb->eRuntimeError_class, mesg);
+ break;
+ }
+ }
+ n = 0;
+ goto exception_call;
+
+ case 2:
+ case 3:
+ n = 1;
+exception_call:
+ //if (argv[0] == sysstack_error) return argv[0];
+
+ //CONST_ID(mrb, exception, "exception");
+ //mesg = mrb_check_funcall(mrb, argv[0], exception, n, argv+1);
+ //if (mrb_nil_p(mesg)) {
+ // /* undef */
+ // mrb_raise(mrb, E_TYPE_ERROR, "exception class/object expected");
+ //}
+ if (mrb_respond_to(mrb, argv[0], mrb_intern(mrb, "exception"))) {
+ mesg = mrb_funcall(mrb, argv[0], "exception", n, argv+1);
+ }
+ else {
+ /* undef */
+ mrb_raise(mrb, E_TYPE_ERROR, "exception class/object expected");
+ }
+
+ break;
+ default:
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%d for 0..3)", argc);
+ break;
+ }
+ if (argc > 0) {
+ if (!mrb_obj_is_kind_of(mrb, mesg, mrb->eException_class))
+ mrb_raise(mrb, E_TYPE_ERROR, "exception object expected");
+ if (argc > 2)
+ set_backtrace(mrb, mesg, argv[2]);
+ }
+
+ return mesg;
+}
+
+mrb_value
+mrb_make_exception(mrb_state *mrb, int argc, mrb_value *argv)
+{
+ return make_exception(mrb, argc, argv, TRUE);
+}
+
+void
+mrb_sys_fail(mrb_state *mrb, const char *mesg)
+{
+ mrb_raise(mrb, mrb->eRuntimeError_class, "%s", mesg);
+}
+
+static mrb_value
+mrb_exc_c_exception(mrb_state *mrb, mrb_value exc)
+{
+ mrb_value *argv;
+ int argc;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+ return mrb_make_exception(mrb, argc, argv);
+}
+
+static mrb_value
+mrb_exc_exception(mrb_state *mrb, mrb_value exc)
+{
+ mrb_value *argv;
+ int argc;
+ mrb_value exclass;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+ if (argc == 0) return exc;
+ exclass = mrb_obj_value(mrb_class(mrb, exc));
+ return mrb_funcall(mrb, exclass, mrb_intern(mrb, "exception"), argc, argv);
+}
+
+void
+mrb_init_exception(mrb_state *mrb)
+{
+ struct RClass *e;
+ struct RClass *eTypeError_class;
+ struct RClass *eArgumentError_class;
+ struct RClass *eIndexError_class;
+ struct RClass *eRangeError_class;
+ struct RClass *eNameError_class;
+ struct RClass *eNoMethodError_class;
+ struct RClass *eScriptError_class;
+ struct RClass *eSyntaxError_class;
+ struct RClass *eLoadError_class;
+ struct RClass *eSystemCallError_class;
+ struct RClass *eLocalJumpError_class;
+ struct RClass *eRegexpError_class;
+ struct RClass *eZeroDivisionError_class;
+ struct RClass *eEncodingError_class;
+ struct RClass *eNotImpError_class;
+ struct RClass *eFloatDomainError_class;
+ struct RClass *eKeyError_class;
+
+ mrb->eException_class = e = mrb_define_class(mrb, "Exception", mrb->object_class); /* 15.2.22 */
+ mrb_define_class_method(mrb, e, "exception", mrb_instance_new, ARGS_ANY());
+ mrb_define_method(mrb, e, "exception", exc_exception, ARGS_ANY());
+ mrb_define_method(mrb, e, "initialize", exc_initialize, ARGS_ANY());
+ mrb_define_method(mrb, e, "==", exc_equal, ARGS_REQ(1));
+ mrb_define_method(mrb, e, "to_s", exc_to_s, ARGS_NONE());
+ mrb_define_method(mrb, e, "message", exc_message, ARGS_NONE());
+ mrb_define_method(mrb, e, "inspect", exc_inspect, ARGS_NONE());
+
+ mrb->eStandardError_class = mrb_define_class(mrb, "StandardError", mrb->eException_class); /* 15.2.23 */
+ mrb->eRuntimeError_class = mrb_define_class(mrb, "RuntimeError", mrb->eStandardError_class); /* 15.2.28 */
+
+ eTypeError_class = mrb_define_class(mrb, "TypeError", mrb->eStandardError_class); /* 15.2.29 */
+ eArgumentError_class = mrb_define_class(mrb, "ArgumentError", mrb->eStandardError_class); /* 15.2.24 */
+ eIndexError_class = mrb_define_class(mrb, "IndexError", mrb->eStandardError_class); /* 15.2.33 */
+ eRangeError_class = mrb_define_class(mrb, "RangeError", mrb->eStandardError_class); /* 15.2.26 */
+ eNameError_class = mrb_define_class(mrb, "NameError", mrb->eStandardError_class); /* 15.2.31 */
+
+ eNoMethodError_class = mrb_define_class(mrb, "NoMethodError", eNameError_class); /* 15.2.32 */
+ eScriptError_class = mrb_define_class(mrb, "ScriptError", mrb->eException_class); /* 15.2.37 */
+ eSyntaxError_class = mrb_define_class(mrb, "SyntaxError", eScriptError_class); /* 15.2.38 */
+ eLoadError_class = mrb_define_class(mrb, "LoadError", eScriptError_class); /* 15.2.39 */
+ eSystemCallError_class = mrb_define_class(mrb, "SystemCallError", mrb->eStandardError_class); /* 15.2.36 */
+ eLocalJumpError_class = mrb_define_class(mrb, "LocalJumpError", mrb->eStandardError_class); /* 15.2.25 */
+ eRegexpError_class = mrb_define_class(mrb, "RegexpError", mrb->eStandardError_class); /* 15.2.27 */
+ eZeroDivisionError_class = mrb_define_class(mrb, "ZeroDivisionError", mrb->eStandardError_class); /* 15.2.30 */
+
+ eEncodingError_class = mrb_define_class(mrb, "EncodingError", mrb->eStandardError_class);
+ eNotImpError_class = mrb_define_class(mrb, "NotImplementedError", eScriptError_class);
+
+ eFloatDomainError_class = mrb_define_class(mrb, "FloatDomainError", eRangeError_class);
+ eKeyError_class = mrb_define_class(mrb, "KeyError", eIndexError_class);
+}
diff --git a/src/error.h b/src/error.h
new file mode 100644
index 000000000..5b1873b1d
--- /dev/null
+++ b/src/error.h
@@ -0,0 +1,21 @@
+#ifndef MRUBY_ERROR_H
+#define MRUBY_ERROR_H
+
+struct RException {
+ MRUBY_OBJECT_HEADER;
+};
+
+void mrb_sys_fail(mrb_state *mrb, const char *mesg);
+void mrb_exc_raise(mrb_state *mrb, mrb_value mesg);
+void mrb_bug_errno(const char*, int);
+int sysexit_status(mrb_state *mrb, mrb_value err);
+void error_pos(void);
+mrb_value mrb_exc_new3(mrb_state *mrb, struct RClass* c, mrb_value str);
+mrb_value make_exception(mrb_state *mrb, int argc, mrb_value *argv, int isstr);
+mrb_value mrb_exc_new(mrb_state *mrb, struct RClass *c, const char *ptr, long len);
+mrb_value mrb_make_exception(mrb_state *mrb, int argc, mrb_value *argv);
+mrb_value mrb_sprintf(mrb_state *mrb, const char *fmt, ...);
+void mrb_name_error(mrb_state *mrb, mrb_sym id, const char *fmt, ...);
+void mrb_exc_print(mrb_state *mrb, struct RObject *exc);
+
+#endif /* MRUBY_ERROR_H */
diff --git a/src/etc.c b/src/etc.c
new file mode 100644
index 000000000..8c98700a3
--- /dev/null
+++ b/src/etc.c
@@ -0,0 +1,280 @@
+#include "mruby.h"
+#include "mdata.h"
+#include "mruby/string.h"
+#include "error.h"
+#include "mruby/numeric.h"
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+void
+ruby_xfree(void *x)
+{
+ //if (x)
+ // vm_xfree(&mrb_objspace, x);
+}
+
+struct RData*
+mrb_data_object_alloc(mrb_state *mrb, struct RClass *klass, void *ptr, const struct mrb_data_type *type)
+{
+ struct RData *data;
+
+ data = mrb_obj_alloc(mrb, MRB_TT_DATA, klass);
+ data->data = ptr;
+ data->type = type;
+
+ return data;
+}
+
+void *
+mrb_check_datatype(mrb_state *mrb, mrb_value obj, const struct mrb_data_type *type)
+{
+ static const char mesg[] = "wrong argument type %s (expected %s)";
+
+ if (SPECIAL_CONST_P(obj) || (mrb_type(obj) != MRB_TT_DATA)) {
+ mrb_check_type(mrb, obj, MRB_TT_DATA);
+ }
+ if (DATA_TYPE(obj) != type) {
+ const char *etype = DATA_TYPE(obj)->struct_name;
+ mrb_raise(mrb, E_TYPE_ERROR, mesg, etype, type->struct_name);
+ }
+ return DATA_PTR(obj);
+}
+
+mrb_value
+mrb_lastline_get(mrb_state *mrb)
+{
+ //mrb_value *var = mrb_svar(0);
+ //if (var) {
+ // return *var;
+ //}
+ //return mrb_nil_value();
+ mrb_value *argv;
+ int argc;
+ mrb_get_args(mrb, "*", &argv, &argc);
+ if (argc < 1) {
+ return mrb_nil_value();
+ }
+ else
+ {
+ return argv[0];
+ }
+}
+
+mrb_value
+mrb_rescue2(mrb_state *mrb, mrb_value (* b_proc) (ANYARGS), mrb_value *data1,
+ mrb_value (* r_proc) (ANYARGS), mrb_value *data2, ...)
+{
+ mrb_value result = (*b_proc) (mrb, data1);
+ return result;
+}
+
+mrb_value
+mrb_rescue(mrb_state *mrb, mrb_value (* b_proc)(ANYARGS), mrb_value *data1,
+ mrb_value (* r_proc)(ANYARGS), mrb_value *data2)
+{
+ return mrb_rescue2(mrb, b_proc, data1, r_proc, data2, mrb->eStandardError_class,
+ mrb_fixnum_value(0));
+}
+/* ------------------------------------------------ */
+/*
+ * Calls func(obj, arg, recursive), where recursive is non-zero if the
+ * current method is called recursively on obj
+ */
+
+mrb_value
+mrb_exec_recursive(mrb_state *mrb, mrb_value (*func) (mrb_state *, mrb_value, mrb_value, int), mrb_value obj, void *arg)
+{
+ // return mrb_exec_recursive(mrb, io_puts_ary, line, &out);
+ return func(mrb, obj, *(mrb_value*)arg, 0);
+}
+
+/*
+ * Calls func(obj, arg, recursive), where recursive is non-zero if the
+ * current method is called recursively on the ordered pair <obj, paired_obj>
+ */
+
+mrb_value
+mrb_exec_recursive_paired(mrb_state *mrb, mrb_value (*func) (mrb_state *, mrb_value, mrb_value, int),
+ mrb_value obj, mrb_value paired_obj, void* arg)
+{
+ // return mrb_exec_recursive_paired(mrb, recursive_eql, hash1, hash2, mrb_fixnum_value((int)&data));
+ return func(mrb, obj, paired_obj, 0);
+}
+
+mrb_sym
+mrb_to_id(mrb_state *mrb, mrb_value name)
+{
+ mrb_value tmp;
+ mrb_sym id;
+
+ switch (mrb_type(name)) {
+ default:
+ tmp = mrb_check_string_type(mrb, name);
+ if (mrb_nil_p(tmp)) {
+ tmp = mrb_inspect(mrb, name);
+ mrb_raise(mrb, E_TYPE_ERROR, "%s is not a symbol",
+ RSTRING_PTR(tmp));
+ }
+ name = tmp;
+ /* fall through */
+ case MRB_TT_STRING:
+ name = mrb_str_intern(mrb, name);
+ /* fall through */
+ case MRB_TT_SYMBOL:
+ return SYM2ID(name);
+ }
+ return id;
+}
+
+/*
+ * call-seq:
+ * proc { |...| block } -> a_proc
+ *
+ * Equivalent to <code>Proc.new</code>.
+ */
+
+mrb_value
+mrb_block_proc(void)
+{
+ return mrb_nil_value();//proc_new(mrb_cProc, FALSE);
+}
+
+/*
+ * Document-method: __id__
+ * Document-method: object_id
+ *
+ * call-seq:
+ * obj.__id__ -> fixnum
+ * obj.object_id -> fixnum
+ *
+ * Returns an integer identifier for <i>obj</i>. The same number will
+ * be returned on all calls to <code>id</code> for a given object, and
+ * no two active objects will share an id.
+ * <code>Object#object_id</code> is a different concept from the
+ * <code>:name</code> notation, which returns the symbol id of
+ * <code>name</code>. Replaces the deprecated <code>Object#id</code>.
+ */
+
+/*
+ * call-seq:
+ * obj.hash -> fixnum
+ *
+ * Generates a <code>Fixnum</code> hash value for this object. This
+ * function must have the property that <code>a.eql?(b)</code> implies
+ * <code>a.hash == b.hash</code>. The hash value is used by class
+ * <code>Hash</code>. Any hash value that exceeds the capacity of a
+ * <code>Fixnum</code> will be truncated before being used.
+ */
+
+int
+mrb_obj_id(mrb_value obj)
+{
+ /*
+ * 32-bit mrb_value space
+ * MSB ------------------------ LSB
+ * false 00000000000000000000000000000000
+ * true 00000000000000000000000000000010
+ * nil 00000000000000000000000000000100
+ * undef 00000000000000000000000000000110
+ * symbol ssssssssssssssssssssssss00001110
+ * object oooooooooooooooooooooooooooooo00 = 0 (mod sizeof(RVALUE))
+ * fixnum fffffffffffffffffffffffffffffff1
+ *
+ * object_id space
+ * LSB
+ * false 00000000000000000000000000000000
+ * true 00000000000000000000000000000010
+ * nil 00000000000000000000000000000100
+ * undef 00000000000000000000000000000110
+ * symbol 000SSSSSSSSSSSSSSSSSSSSSSSSSSS0 S...S % A = 4 (S...S = s...s * A + 4)
+ * object oooooooooooooooooooooooooooooo0 o...o % A = 0
+ * fixnum fffffffffffffffffffffffffffffff1 bignum if required
+ *
+ * where A = sizeof(RVALUE)/4
+ *
+ * sizeof(RVALUE) is
+ * 20 if 32-bit, double is 4-byte aligned
+ * 24 if 32-bit, double is 8-byte aligned
+ * 40 if 64-bit
+ */
+ /*
+ * 128-bit mrb_value space
+ * MSB -------- LSB
+ * x86 [0,1] [2,3] [4,5] [6,7] [8,9] [A,B] [C,D] [E,F]
+ * 7 6 5 4 3 2 1 0
+ * 0123456789ABCDEF 0123456789ABCDEF 0123456789ABCDEF 0123456789ABCDEF 0123456789ABCDEF 0123456789ABCDEF 0123456789ABCDEF 0123456789ABCDEF
+ * FEDCBA9876543210 FEDCBA9876543210 FEDCBA9876543210 FEDCBA9876543210 FEDCBA9876543210 FEDCBA9876543210 FEDCBA9876543210 FEDCBA9876543210
+ * false 0000000000000000 0000000000000000 xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx xxxxxxxx00000001 xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx
+ * true 0000000000000001 0000000000000000 xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx xxxxxxxx00000010 xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx
+ * nil 0000000000000001 0000000000000000 xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx xxxxxxxx00000001 xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx
+ * undef 0000000000000000 0000000000000000 xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx xxxxxxxx00000101 xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx
+ * symbol ssssssssssssssss ssssssssssssssss xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx xxxxxxxx00000100 xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx
+ * object oooooooooooooooo oooooooooooooo00 = 0 (mod sizeof(RVALUE))
+ (1)fixnum 0000000000000001 0000000000000000 xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx xxxxxxxx00000011 xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx
+ * float 0000000000000001 0000000000000000 0000000000000000 0000000000000000 xxxxxxxx00000011 xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx
+ * <-- mrb_float --> xxxxxxxx00001101 xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx
+ *
+ * object_id space
+ * LSB
+ * false 0000000000000000 0000000000000000
+ * true 0000000000000000 0000000000000010
+ * nil 0000000000000000 0000000000000100
+ * undef 0000000000000000 0000000000000110
+ * symbol 000SSSSSSSSSSSS SSSSSSSSSSSSSSS0 S...S % A = 4 (S...S = s...s * A + 4)
+ * object ooooooooooooooo ooooooooooooooo0 o...o % A = 0
+ * fixnum ffffffffffffffff fffffffffffffff1 bignum if required
+ *
+ * where A = sizeof(RVALUE)/4
+ *
+ * sizeof(RVALUE) is
+ * 20 if 32-bit, double is 4-byte aligned
+ * 24 if 32-bit, double is 8-byte aligned
+ * 40 if 64-bit
+ */
+ /* tt:0_27 */
+ switch (mrb_type(obj)) {
+ case MRB_TT_FREE:
+ return 0; /* not define */
+ case MRB_TT_FALSE:
+ if (mrb_nil_p(obj))
+ return 4;
+ return 0;
+ case MRB_TT_TRUE:
+ return 2;
+ case MRB_TT_FIXNUM:
+ return mrb_fixnum(obj)*2+1; /* odd number */
+ case MRB_TT_SYMBOL:
+ return SYM2ID(obj) * 2;
+ case MRB_TT_UNDEF:
+ return 0; /* not define */
+ case MRB_TT_FLOAT:
+ return (int)mrb_float(obj)*2; /* even number */
+ case MRB_TT_OBJECT:
+ case MRB_TT_CLASS:
+ case MRB_TT_MODULE:
+ case MRB_TT_ICLASS:
+ case MRB_TT_SCLASS:
+ case MRB_TT_PROC:
+ case MRB_TT_ARRAY:
+ case MRB_TT_HASH:
+ case MRB_TT_STRING:
+ case MRB_TT_RANGE:
+ case MRB_TT_REGEX:
+ case MRB_TT_STRUCT:
+ case MRB_TT_EXCEPTION:
+ case MRB_TT_MATCH:
+ case MRB_TT_FILE:
+ case MRB_TT_DATA:
+ case MRB_TT_THREAD:
+ case MRB_TT_THREADGRP:
+ default:
+ return mrb_fixnum(obj); /* even number */
+ }
+}
+
diff --git a/src/eval_intern.h b/src/eval_intern.h
new file mode 100644
index 000000000..c3e8cdf50
--- /dev/null
+++ b/src/eval_intern.h
@@ -0,0 +1,217 @@
+#ifndef RUBY_EVAL_INTERN_H
+#define RUBY_EVAL_INTERN_H
+
+//#include "ruby/ruby.h"
+#include "mruby.h"
+#define HAVE_STRING_H
+//#include "vm_core.h"
+#include "node.h"
+
+/* other frame flag */
+#define VM_FRAME_FLAG_PASSED 0x0100
+#define PASS_PASSED_BLOCK_TH(th) do { \
+ (th)->passed_block = GC_GUARDED_PTR_REF((mrb_block_t *)(th)->cfp->lfp[0]); \
+ (th)->cfp->flag |= VM_FRAME_FLAG_PASSED; \
+} while (0)
+
+#define PASS_PASSED_BLOCK() do { \
+ mrb_thread_t * const __th__ = GET_THREAD(); \
+ PASS_PASSED_BLOCK_TH(__th__); \
+} while (0)
+
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+#ifndef EXIT_SUCCESS
+#define EXIT_SUCCESS 0
+#endif
+#ifndef EXIT_FAILURE
+#define EXIT_FAILURE 1
+#endif
+
+#include <stdio.h>
+#include <setjmp.h>
+
+#ifdef __APPLE__
+#include <crt_externs.h>
+#endif
+
+/* Make alloca work the best possible way. */
+#ifdef __GNUC__
+# ifndef atarist
+# ifndef alloca
+# define alloca __builtin_alloca
+# endif
+# endif /* atarist */
+#else
+# ifdef HAVE_ALLOCA_H
+# include <alloca.h>
+# else
+# ifdef _AIX
+#pragma alloca
+# else
+# ifndef alloca /* predefined by HP cc +Olibcalls */
+void *alloca();
+# endif
+# endif /* AIX */
+# endif /* HAVE_ALLOCA_H */
+#endif /* __GNUC__ */
+
+#ifndef HAVE_STRING_H
+char *strrchr(const char *, const char);
+#endif
+
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#ifdef HAVE_NET_SOCKET_H
+#include <net/socket.h>
+#endif
+
+
+
+#include <sys/types.h>
+#include <signal.h>
+#include <errno.h>
+
+#ifdef HAVE_SYS_SELECT_H
+#include <sys/select.h>
+#endif
+
+/*
+ Solaris sys/select.h switches select to select_large_fdset to support larger
+ file descriptors if FD_SETSIZE is larger than 1024 on 32bit environment.
+ But Ruby doesn't change FD_SETSIZE because fd_set is allocated dynamically.
+ So following definition is required to use select_large_fdset.
+*/
+#ifdef HAVE_SELECT_LARGE_FDSET
+#define select(n, r, w, e, t) select_large_fdset(n, r, w, e, t)
+#endif
+
+#ifdef HAVE_SYS_PARAM_H
+#include <sys/param.h>
+#endif
+
+#include <sys/stat.h>
+
+#define SAVE_ROOT_JMPBUF(th, stmt) do \
+ if (ruby_setjmp((th)->root_jmpbuf) == 0) { \
+ stmt; \
+ } while (0)
+
+#define TH_PUSH_TAG(th) do { \
+ mrb_thread_t * const _th = th; \
+ struct mrb_vm_tag _tag; \
+ _tag.tag = 0; \
+ _tag.prev = _th->tag; \
+ _th->tag = &_tag;
+
+#define TH_POP_TAG() \
+ _th->tag = _tag.prev; \
+} while (0)
+
+#define TH_POP_TAG2() \
+ _th->tag = _tag.prev
+
+#define PUSH_TAG() TH_PUSH_TAG(GET_THREAD())
+#define POP_TAG() TH_POP_TAG()
+
+#define TH_EXEC_TAG() ruby_setjmp(_th->tag->buf)
+
+#define EXEC_TAG() \
+ TH_EXEC_TAG()
+
+#define TH_JUMP_TAG(th, st) do { \
+ ruby_longjmp(th->tag->buf,(st)); \
+} while (0)
+
+//#define JUMP_TAG(st) TH_JUMP_TAG(GET_THREAD(), st)
+
+enum ruby_tag_type {
+ RUBY_TAG_RETURN = 0x1,
+ RUBY_TAG_BREAK = 0x2,
+ RUBY_TAG_NEXT = 0x3,
+ RUBY_TAG_RETRY = 0x4,
+ RUBY_TAG_REDO = 0x5,
+ RUBY_TAG_RAISE = 0x6,
+ RUBY_TAG_THROW = 0x7,
+ RUBY_TAG_FATAL = 0x8,
+ RUBY_TAG_MASK = 0xf
+};
+#define TAG_RETURN RUBY_TAG_RETURN
+#define TAG_BREAK RUBY_TAG_BREAK
+#define TAG_NEXT RUBY_TAG_NEXT
+#define TAG_RETRY RUBY_TAG_RETRY
+#define TAG_REDO RUBY_TAG_REDO
+#define TAG_RAISE RUBY_TAG_RAISE
+#define TAG_THROW RUBY_TAG_THROW
+#define TAG_FATAL RUBY_TAG_FATAL
+#define TAG_MASK RUBY_TAG_MASK
+
+#define NEW_THROW_OBJECT(val, pt, st) \
+ ((mrb_value)mrb_node_newnode(NODE_LIT, (mrb_value)(val), (mrb_value)(pt), (mrb_value)(st)))
+//#define SET_THROWOBJ_CATCH_POINT(obj, val)
+// (RNODE((obj))->u2.value = (val))
+//#define SET_THROWOBJ_STATE(obj, val)
+// (RNODE((obj))->u3.value = (val))
+
+#define GET_THROWOBJ_VAL(obj) ((mrb_value)RNODE((obj))->u1.value)
+#define GET_THROWOBJ_CATCH_POINT(obj) ((mrb_value*)RNODE((obj))->u2.value)
+#define GET_THROWOBJ_STATE(obj) ((int)RNODE((obj))->u3.value)
+
+#define SCOPE_TEST(f) (mrb_vm_cref()->nd_visi & (f))
+#define SCOPE_CHECK(f) (mrb_vm_cref()->nd_visi == (f))
+#define SCOPE_SET(f) (mrb_vm_cref()->nd_visi = (f))
+
+#define sysstack_error mrb_fixnum_value(0)
+
+#define CHECK_STACK_OVERFLOW(mrb, cfp, margin) do \
+ if ((mrb_value *)((char *)(((mrb_value *)(cfp)->sp) + (margin)) + sizeof(mrb_control_frame_t)) >= ((mrb_value *)cfp)) { \
+ mrb_exc_raise(mrb, sysstack_error); \
+ } \
+while (0)
+
+void mrb_thread_cleanup(void);
+void mrb_thread_wait_other_threads(void);
+
+enum {
+ RAISED_EXCEPTION = 1,
+ RAISED_STACKOVERFLOW = 2,
+ RAISED_NOMEMORY = 4
+};
+//int rb_threadptr_set_raised(mrb_thread_t *th);
+//int rb_threadptr_reset_raised(mrb_thread_t *th);
+#define mrb_thread_raised_set(th, f) ((th)->raised_flag |= (f))
+#define mrb_thread_raised_reset(th, f) ((th)->raised_flag &= ~(f))
+#define mrb_thread_raised_p(th, f) (((th)->raised_flag & (f)) != 0)
+#define mrb_thread_raised_clear(th) ((th)->raised_flag = 0)
+
+//mrb_value mrb_f_eval(int argc, mrb_value *argv, mrb_value self);
+//mrb_value mrb_make_exception(int argc, mrb_value *argv);
+#ifndef NORETURN
+# define NORETURN(x) x
+#endif
+#ifndef DEPRECATED
+# define DEPRECATED(x) x
+#endif
+
+NORETURN(void mrb_fiber_start(void));
+
+NORETURN(void rb_print_undef(mrb_value, mrb_sym, int));
+NORETURN(void rb_vm_localjump_error(const char *,mrb_value, int));
+NORETURN(void rb_vm_jump_tag_but_local_jump(int, mrb_value));
+//NORETURN(void mrb_raise_method_missing(mrb_thread_t *th, int argc, mrb_value *argv,
+// mrb_value obj, int call_status));
+
+mrb_value mrb_vm_make_jump_tag_but_local_jump(int state, mrb_value val);
+NODE *mrb_vm_cref(void);
+//mrb_value rb_vm_call_cfunc(mrb_value recv, mrb_value (*func)(mrb_value), mrb_value arg, const mrb_block_t *blockptr, mrb_value filename, mrb_value filepath);
+void mrb_vm_set_progname(mrb_value filename);
+void mrb_thread_terminate_all(mrb_state *mrb);
+//mrb_value mrb_vm_top_self();
+mrb_value mrb_vm_cbase(void);
+//int mrb_vm_get_sourceline(const mrb_control_frame_t *);
+void mrb_trap_restore_mask(void);
+
+#endif /* RUBY_EVAL_INTERN_H */
diff --git a/src/ext/.gitkeep b/src/ext/.gitkeep
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/src/ext/.gitkeep
diff --git a/src/gc.c b/src/gc.c
new file mode 100644
index 000000000..e4b1f82ba
--- /dev/null
+++ b/src/gc.c
@@ -0,0 +1,1146 @@
+#include "mruby.h"
+#include "mruby/object.h"
+#include "mruby/class.h"
+#include "mruby/array.h"
+#include "mruby/string.h"
+#include "mruby/hash.h"
+#include "mruby/range.h"
+#include "ritehash.h"
+#include <string.h>
+#include <stdio.h>
+#include "mruby/struct.h"
+#include "mruby/proc.h"
+#include "mdata.h"
+#include "mruby/numeric.h"
+
+/*
+ = Tri-color Incremental Garbage Collection
+
+ RiteVM's GC is Tri-color Incremental GC with Mark & Sweep.
+ Algorithm details are omitted.
+ Instead, the part about the implementation described below.
+
+ == Object's Color
+
+ Each object to be painted in three colors.
+
+ * White - Unmarked.
+ * Gray - Marked, But the child objects are unmarked.
+ * Black - Marked, the child objects are also marked.
+
+ == Two white part
+
+ The white has a different part of A and B.
+ In sweep phase, the sweep target white is either A or B.
+ The sweep target white is switched just before sweep phase.
+ e.g. A -> B -> A -> B ...
+
+ All objects are painted white when allocated.
+ This white is another the sweep target white.
+ For example, if the sweep target white is A, it's B.
+ So objects when allocated in sweep phase will be next sweep phase target.
+ Therefore, these objects will not be released accidentally in sweep phase.
+
+ == Execution Timing
+
+ GC Execution Time and Each step interval are decided by live objects count.
+ List of Adjustment API:
+
+ * gc_interval_ratio_set
+ * gc_step_ratio_set
+
+ For details, see the comments for each function.
+
+ = Write Barrier
+
+ RiteVM implementer, C extension library writer must write a write
+ barrier when writing a pointer to an object on object's field.
+ Two different write barrier:
+
+ * mrb_field_write_barrier
+ * mrb_write_barrier
+
+ For details, see the comments for each function.
+
+*/
+
+#ifdef INCLUDE_REGEXP
+#include "re.h"
+#endif
+
+#include "gc.h"
+
+#ifdef GC_PROFILE
+#include <sys/time.h>
+
+static double program_invoke_time = 0;
+static double gc_time = 0;
+static double gc_total_time = 0;
+
+static double
+gettimeofday_time(void)
+{
+ struct timeval tv;
+ gettimeofday(&tv, NULL);
+ return tv.tv_sec + tv.tv_usec * 1e-6;
+}
+
+#define GC_INVOKE_TIME_REPORT do {\
+ fprintf(stderr, "gc_invoke: %19.3f\n", gettimeofday_time() - program_invoke_time);\
+} while(0)
+
+#define GC_TIME_START do {\
+ gc_time = gettimeofday_time();\
+} while(0)
+
+#define GC_TIME_STOP_AND_REPORT do {\
+ gc_time = gettimeofday_time() - gc_time;\
+ gc_total_time += gc_time;\
+ fprintf(stderr, "gc_state: %d\n", mrb->gc_state);\
+ fprintf(stderr, "gc_time: %30.20f\n", gc_time);\
+ fprintf(stderr, "gc_total_time: %30.20f\n\n", gc_total_time);\
+} while(0)
+#else
+#define GC_INVOKE_TIME_REPORT
+#define GC_TIME_START
+#define GC_TIME_STOP_AND_REPORT
+#endif
+
+#ifdef GC_DEBUG
+#include <assert.h>
+#define gc_assert(expect) assert(expect)
+#else
+#define gc_assert(expect) ((void)0)
+#endif
+
+#define GC_STEP_SIZE 1024
+
+
+void*
+mrb_realloc(mrb_state *mrb, void *p, size_t len)
+{
+ return (mrb->allocf)(mrb, p, len);
+}
+
+void*
+mrb_malloc(mrb_state *mrb, size_t len)
+{
+ return (mrb->allocf)(mrb, 0, len);
+}
+
+void*
+mrb_calloc(mrb_state *mrb, size_t nelem, size_t len)
+{
+ void *p = (mrb->allocf)(mrb, 0, nelem*len);
+
+ memset(p, 0, nelem*len);
+ return p;
+}
+
+void*
+mrb_free(mrb_state *mrb, void *p)
+{
+ return (mrb->allocf)(mrb, p, 0);
+}
+
+#define HEAP_PAGE_SIZE 1024
+
+struct heap_page {
+ struct RBasic *freelist;
+ struct heap_page *prev;
+ struct heap_page *next;
+ struct heap_page *free_next;
+ struct heap_page *free_prev;
+ RVALUE objects[HEAP_PAGE_SIZE];
+};
+
+static void
+link_heap_page(mrb_state *mrb, struct heap_page *page)
+{
+ page->next = mrb->heaps;
+ if (mrb->heaps)
+ mrb->heaps->prev = page;
+ mrb->heaps = page;
+}
+
+static void
+unlink_heap_page(mrb_state *mrb, struct heap_page *page)
+{
+ if (page->prev)
+ page->prev->next = page->next;
+ if (page->next)
+ page->next->prev = page->prev;
+ if (mrb->heaps == page)
+ mrb->heaps = page->next;
+ page->prev = NULL;
+ page->next = NULL;
+}
+
+static void
+link_free_heap_page(mrb_state *mrb, struct heap_page *page)
+{
+ page->free_next = mrb->free_heaps;
+ if (mrb->free_heaps) {
+ mrb->free_heaps->free_prev = page;
+ }
+ mrb->free_heaps = page;
+}
+
+static void
+unlink_free_heap_page(mrb_state *mrb, struct heap_page *page)
+{
+ if (page->free_prev)
+ page->free_prev->free_next = page->free_next;
+ if (page->free_next)
+ page->free_next->free_prev = page->free_prev;
+ if (mrb->free_heaps == page)
+ mrb->free_heaps = page->free_next;
+ page->free_prev = NULL;
+ page->free_next = NULL;
+}
+
+static void
+add_heap(mrb_state *mrb)
+{
+ struct heap_page *page = mrb_malloc(mrb, sizeof(struct heap_page));
+ RVALUE *p, *e;
+ struct RBasic *prev = NULL;
+
+ memset(page, 0, sizeof(struct heap_page));
+
+ for (p = page->objects, e=p+HEAP_PAGE_SIZE; p<e; p++) {
+ p->as.free.tt = MRB_TT_FREE;
+ p->as.free.next = prev;
+ prev = &p->as.basic;
+ }
+ page->freelist = prev;
+
+ link_heap_page(mrb, page);
+ link_free_heap_page(mrb, page);
+}
+
+#define DEFAULT_GC_INTERVAL_RATIO 200
+#define DEFAULT_GC_STEP_RATIO 200
+
+void
+mrb_init_heap(mrb_state *mrb)
+{
+ mrb->heaps = 0;
+ mrb->free_heaps = 0;
+ add_heap(mrb);
+ mrb->gc_interval_ratio = DEFAULT_GC_INTERVAL_RATIO;
+ mrb->gc_step_ratio = DEFAULT_GC_STEP_RATIO;
+
+#ifdef GC_PROFILE
+ program_invoke_time = gettimeofday_time();
+#endif
+}
+
+void*
+mrb_obj_alloc(mrb_state *mrb, enum mrb_vtype ttype, struct RClass *cls)
+{
+ struct RBasic *p;
+
+ if (mrb->gc_threshold < mrb->live) {
+ mrb_incremental_gc(mrb);
+ }
+ if (mrb->free_heaps == NULL) {
+ add_heap(mrb);
+ }
+
+ p = mrb->free_heaps->freelist;
+ mrb->free_heaps->freelist = ((struct free_obj*)p)->next;
+ if (mrb->free_heaps->freelist == NULL) {
+ unlink_free_heap_page(mrb, mrb->free_heaps);
+ }
+
+ mrb->live++;
+ mrb->arena[mrb->arena_idx++] = p;
+ memset(p, 0, sizeof(RVALUE));
+ if (mrb->arena_idx >= MRB_ARENA_SIZE) {
+ /* arena overflow error */
+ mrb_raise(mrb, E_TYPE_ERROR, "arena overflow error");
+ }
+ p->tt = ttype;
+ p->c = cls;
+ paint_partial_white(mrb, p);
+ return (void*)p;
+}
+
+static inline void
+add_gray_list(mrb_state *mrb, struct RBasic *obj)
+{
+ paint_gray(obj);
+ obj->gcnext = mrb->gray_list;
+ mrb->gray_list = obj;
+}
+
+static void
+gc_mark_children(mrb_state *mrb, struct RBasic *obj)
+{
+ gc_assert(is_gray(obj));
+ paint_black(obj);
+ mrb->gray_list = obj->gcnext;
+ mrb_gc_mark(mrb, (struct RBasic*)obj->c);
+ switch (obj->tt) {
+ case MRB_TT_ICLASS:
+ mrb_gc_mark(mrb, (struct RBasic*)((struct RClass*)obj)->super);
+ break;
+
+ case MRB_TT_CLASS:
+ case MRB_TT_SCLASS:
+ case MRB_TT_MODULE:
+ {
+ struct RClass *c = (struct RClass*)obj;
+
+ mrb_gc_mark_iv(mrb, (struct RObject*)obj);
+ mrb_gc_mark_mt(mrb, c);
+ mrb_gc_mark(mrb, (struct RBasic*)c->super);
+ }
+ break;
+
+ case MRB_TT_OBJECT:
+ mrb_gc_mark_iv(mrb, (struct RObject*)obj);
+ break;
+
+ case MRB_TT_PROC:
+ {
+ struct RProc *p = (struct RProc*)obj;
+
+ mrb_gc_mark(mrb, (struct RBasic*)p->env);
+ mrb_gc_mark(mrb, (struct RBasic*)p->target_class);
+ }
+ break;
+
+ case MRB_TT_ENV:
+ {
+ struct REnv *e = (struct REnv *)obj;
+
+ if (e->cioff < 0) {
+ int i, len;
+
+ len = (int)e->flags;
+ for (i=0; i<len; i++) {
+ mrb_gc_mark_value(mrb, e->stack[i]);
+ }
+ }
+ }
+ break;
+
+ case MRB_TT_ARRAY:
+ {
+ struct RArray *a = (struct RArray*)obj;
+ size_t i, e;
+
+ for (i=0,e=a->len; i<e; i++) {
+ mrb_gc_mark_value(mrb, a->buf[i]);
+ }
+ }
+ break;
+
+ case MRB_TT_HASH:
+ mrb_gc_mark_ht(mrb, (struct RClass*)obj);
+ break;
+ case MRB_TT_STRING:
+ {
+ struct RString *s = (struct RString*)obj;
+
+ if (s->flags & MRB_STR_SHARED) {
+ mrb_gc_mark_value(mrb, s->aux.shared)
+ }
+ }
+ break;
+ case MRB_TT_RANGE:
+ {
+ struct RRange *r = (struct RRange*)obj;
+
+ mrb_gc_mark_value(mrb, r->edges->beg);
+ mrb_gc_mark_value(mrb, r->edges->end);
+ }
+ break;
+ case MRB_TT_REGEX:
+ case MRB_TT_STRUCT:
+ case MRB_TT_EXCEPTION:
+ break;
+ }
+}
+
+void
+mrb_gc_mark(mrb_state *mrb, struct RBasic *obj)
+{
+ if (obj == 0) return;
+ if (!is_white(obj)) return;
+ gc_assert(!is_dead(mrb, obj));
+ add_gray_list(mrb, obj);
+}
+
+static void
+obj_free(mrb_state *mrb, struct RBasic *obj)
+{
+ DEBUG(printf("obj_free(%p,tt=%d)\n",obj,obj->tt));
+ switch (obj->tt) {
+ /* immediate - no mark */
+ case MRB_TT_TRUE:
+ case MRB_TT_FIXNUM:
+ case MRB_TT_SYMBOL:
+ case MRB_TT_FLOAT:
+ /* cannot happen */
+ return;
+
+ case MRB_TT_OBJECT:
+ mrb_gc_free_iv(mrb, (struct RObject*)obj);
+ break;
+ case MRB_TT_CLASS:
+ case MRB_TT_MODULE:
+ case MRB_TT_SCLASS:
+ mrb_gc_free_mt(mrb, (struct RClass*)obj);
+ mrb_gc_free_iv(mrb, (struct RObject*)obj);
+ break;
+ case MRB_TT_ENV:
+ {
+ struct REnv *e = (struct REnv *)obj;
+
+ if (e->cioff < 0) {
+ mrb_free(mrb, mrb->stack);
+ mrb->stack = 0;
+ }
+ }
+ break;
+ case MRB_TT_PROC:
+ case MRB_TT_ICLASS:
+ break;
+ case MRB_TT_ARRAY:
+ mrb_free(mrb, ((struct RArray*)obj)->buf);
+ break;
+ case MRB_TT_HASH:
+ mrb_gc_free_ht(mrb, (struct RClass*)obj);
+ break;
+ case MRB_TT_STRING:
+ if (!(obj->flags & MRB_STR_SHARED))
+ mrb_free(mrb, ((struct RString*)obj)->buf);
+ break;
+ case MRB_TT_RANGE:
+ mrb_free(mrb, ((struct RRange*)obj)->edges);
+ break;
+ case MRB_TT_REGEX:
+ case MRB_TT_STRUCT:
+ case MRB_TT_EXCEPTION:
+ break;
+ }
+ obj->tt = MRB_TT_FREE;
+}
+
+static void
+root_scan_phase(mrb_state *mrb)
+{
+ int i, j, e;
+ mrb_callinfo *ci;
+
+ mrb->gray_list = 0;
+ mrb->variable_gray_list = 0;
+
+ mrb_gc_mark_gv(mrb);
+ /* mark arena */
+ for (i=0,e=mrb->arena_idx; i<e; i++) {
+ mrb_gc_mark(mrb, mrb->arena[i]);
+ }
+ mrb_gc_mark(mrb, (struct RBasic*)mrb->object_class);
+ /* mark stack */
+ e = mrb->stack - mrb->stbase;
+ if (mrb->ci) e += mrb->ci->nregs;
+ for (i=0; i<e; i++) {
+ mrb_gc_mark_value(mrb, mrb->stbase[i]);
+ }
+ /* mark ensure stack */
+ e = (mrb->ci) ? mrb->ci->eidx : 0;
+ for (i=0; i<e; i++) {
+ mrb_gc_mark(mrb, (struct RBasic*)mrb->ensure[i]);
+ }
+ /* mark closure */
+ for (ci = mrb->cibase; ci <= mrb->ci; ci++) {
+ if (!ci) continue;
+ mrb_gc_mark( mrb, (struct RBasic*)ci->env);
+ }
+ /* mark irep pool */
+ for (i=0; i<mrb->irep_len; i++) {
+ mrb_irep *irep = mrb->irep[i];
+ if (!irep) continue;
+ for (j=0; j<irep->plen; j++) {
+ mrb_gc_mark_value(mrb, irep->pool[j]);
+ }
+ }
+}
+
+static size_t
+gc_gray_mark(mrb_state *mrb, struct RBasic *obj)
+{
+ size_t children = 0;
+
+ gc_mark_children(mrb, obj);
+
+ switch (obj->tt) {
+ case MRB_TT_ICLASS:
+ children++;
+ break;
+
+ case MRB_TT_CLASS:
+ case MRB_TT_SCLASS:
+ case MRB_TT_MODULE:
+ {
+ struct RClass *c = (struct RClass*)obj;
+
+ children += mrb_gc_mark_iv_size(mrb, (struct RObject*)obj);
+ children += mrb_gc_mark_mt_size(mrb, c);
+ children++;
+ }
+ break;
+
+ case MRB_TT_OBJECT:
+ children += mrb_gc_mark_iv_size(mrb, (struct RObject*)obj);
+ break;
+
+ case MRB_TT_ENV:
+ children += (int)obj->flags;
+ break;
+
+ case MRB_TT_ARRAY:
+ {
+ struct RArray *a = (struct RArray*)obj;
+ children += a->len;
+ }
+ break;
+
+ case MRB_TT_HASH:
+ children += mrb_gc_mark_ht_size(mrb, (struct RClass*)obj);
+ break;
+
+ case MRB_TT_STRING:
+ break;
+ case MRB_TT_PROC:
+ case MRB_TT_RANGE:
+ children+=2;
+ break;
+
+ case MRB_TT_REGEX:
+ case MRB_TT_STRUCT:
+ case MRB_TT_EXCEPTION:
+ break;
+ }
+ return children;
+}
+
+static size_t
+incremental_marking_phase(mrb_state *mrb, size_t limit)
+{
+ size_t tried_marks = 0;
+
+ while (mrb->gray_list && tried_marks < limit) {
+ tried_marks += gc_gray_mark(mrb, mrb->gray_list);
+ }
+
+ return tried_marks;
+}
+
+static void
+final_marking_phase(mrb_state *mrb)
+{
+ while (mrb->gray_list) {
+ gc_mark_children(mrb, mrb->gray_list);
+ }
+ gc_assert(mrb->gray_list == NULL);
+ mrb->gray_list = mrb->variable_gray_list;
+ mrb->variable_gray_list = 0;
+ while (mrb->gray_list) {
+ gc_mark_children(mrb, mrb->gray_list);
+ }
+ gc_assert(mrb->gray_list == NULL);
+}
+
+static void
+prepare_incremental_sweep(mrb_state *mrb)
+{
+ mrb->gc_state = GC_STATE_SWEEP;
+ mrb->sweeps = mrb->heaps;
+ mrb->gc_live_after_mark = mrb->live;
+}
+
+static size_t
+incremental_sweep_phase(mrb_state *mrb, size_t limit)
+{
+ struct heap_page *page = mrb->sweeps;
+ size_t tried_sweep = 0;
+
+ while (page && (tried_sweep < limit)) {
+ RVALUE *p = page->objects;
+ RVALUE *e = p + HEAP_PAGE_SIZE;
+ size_t freed = 0;
+ int dead_slot = 1;
+ int full = (page->freelist == NULL);
+
+ while (p<e) {
+ if (is_dead(mrb, &p->as.basic)) {
+ if (p->as.basic.tt != MRB_TT_FREE) {
+ obj_free(mrb, &p->as.basic);
+ p->as.free.next = page->freelist;
+ page->freelist = (struct RBasic*)p;
+ freed++;
+ }
+ }
+ else {
+ paint_partial_white(mrb, &p->as.basic); /* next gc target */
+ dead_slot = 0;
+ }
+ p++;
+ }
+
+ /* free dead slot */
+ if (dead_slot && freed < HEAP_PAGE_SIZE) {
+ struct heap_page *next = page->next;
+
+ unlink_heap_page(mrb, page);
+ unlink_free_heap_page(mrb, page);
+ mrb_free(mrb, page);
+ page = next;
+ }
+ else {
+ if (full && freed > 0) {
+ link_free_heap_page(mrb, page);
+ }
+ page = page->next;
+ }
+ tried_sweep += HEAP_PAGE_SIZE;
+ mrb->live -= freed;
+ mrb->gc_live_after_mark -= freed;
+ }
+ mrb->sweeps = page;
+ return tried_sweep;
+}
+
+static size_t
+incremental_gc(mrb_state *mrb, size_t limit)
+{
+ switch (mrb->gc_state) {
+ case GC_STATE_NONE:
+ root_scan_phase(mrb);
+ mrb->gc_state = GC_STATE_MARK;
+ flip_white_part(mrb);
+ return 0;
+ case GC_STATE_MARK:
+ if (mrb->gray_list) {
+ return incremental_marking_phase(mrb, limit);
+ }
+ else {
+ final_marking_phase(mrb);
+ prepare_incremental_sweep(mrb);
+ return 0;
+ }
+ case GC_STATE_SWEEP: {
+ size_t tried_sweep = 0;
+ tried_sweep = incremental_sweep_phase(mrb, limit);
+ if (tried_sweep == 0)
+ mrb->gc_state = GC_STATE_NONE;
+ return tried_sweep;
+ }
+ default:
+ /* unknown state */
+ gc_assert(0);
+ return 0;
+ }
+}
+
+void
+mrb_incremental_gc(mrb_state *mrb)
+{
+ size_t limit = 0, result = 0;
+
+ GC_INVOKE_TIME_REPORT;
+ GC_TIME_START;
+
+ limit = (GC_STEP_SIZE/100) * mrb->gc_step_ratio;
+ while (result < limit) {
+ result += incremental_gc(mrb, limit);
+ if (mrb->gc_state == GC_STATE_NONE)
+ break;
+ }
+
+ if (mrb->gc_state == GC_STATE_NONE) {
+ gc_assert(mrb->live >= mrb->gc_live_after_mark);
+ mrb->gc_threshold = (mrb->gc_live_after_mark/100) * mrb->gc_interval_ratio;
+ if (mrb->gc_threshold < GC_STEP_SIZE) {
+ mrb->gc_threshold = GC_STEP_SIZE;
+ }
+ }
+ else {
+ mrb->gc_threshold = mrb->live + GC_STEP_SIZE;
+ }
+
+
+ GC_TIME_STOP_AND_REPORT;
+}
+
+void
+mrb_garbage_collect(mrb_state *mrb)
+{
+ size_t max_limit = ~0;
+
+ GC_INVOKE_TIME_REPORT;
+ GC_TIME_START;
+
+ if (mrb->gc_state == GC_STATE_SWEEP) {
+ /* finish sweep phase */
+ while (mrb->gc_state != GC_STATE_NONE) {
+ incremental_gc(mrb, max_limit);
+ }
+ }
+
+ do {
+ incremental_gc(mrb, max_limit);
+ } while (mrb->gc_state != GC_STATE_NONE);
+
+ mrb->gc_threshold = (mrb->gc_live_after_mark/100) * mrb->gc_interval_ratio;
+
+ GC_TIME_STOP_AND_REPORT;
+}
+
+int
+mrb_gc_arena_save(mrb_state *mrb)
+{
+ return mrb->arena_idx;
+}
+
+void
+mrb_gc_arena_restore(mrb_state *mrb, int idx)
+{
+ mrb->arena_idx = idx;
+}
+
+/*
+ * Field write barrier
+ * Paint obj(Black) -> value(White) to obj(Black) -> value(Black).
+ */
+
+void
+mrb_field_write_barrier(mrb_state *mrb, struct RBasic *obj, struct RBasic *value)
+{
+ if (!is_black(obj)) return;
+ if (!is_white(value)) return;
+
+ gc_assert(!is_dead(mrb, value) && !is_dead(mrb, obj));
+ gc_assert(mrb->gc_state != GC_STATE_NONE);
+
+ if (mrb->gc_state == GC_STATE_MARK) {
+ add_gray_list(mrb, value);
+ }
+ else {
+ gc_assert(mrb->gc_state == GC_STATE_SWEEP);
+ paint_partial_white(mrb, obj); /* for never write barriers */
+ }
+}
+
+/*
+ * Write barrier
+ * Paint obj(Black) to obj(Gray).
+ *
+ * The object that is painted gray will be traversed atomically in final
+ * mark phase. So you use this write barrier if it's frequency written spot.
+ * e.g. Set element on Array.
+ */
+
+void
+mrb_write_barrier(mrb_state *mrb, struct RBasic *obj)
+{
+ if (!is_black(obj)) return;
+
+ gc_assert(!is_dead(mrb, obj));
+ gc_assert(mrb->gc_state != GC_STATE_NONE);
+ paint_gray(obj);
+ obj->gcnext = mrb->variable_gray_list;
+ mrb->variable_gray_list = obj;
+}
+
+/*
+ * call-seq:
+ * GC.start -> nil
+ *
+ * Initiates full garbage collection.
+ *
+ */
+
+static mrb_value
+gc_start(mrb_state *mrb, mrb_value obj)
+{
+ mrb_garbage_collect(mrb);
+ return mrb_nil_value();
+}
+
+/*
+ * call-seq:
+ * GC.interval_ratio -> fixnum
+ *
+ * Returns ratio of GC interval. Default value is 200(%).
+ *
+ */
+
+static mrb_value
+gc_interval_ratio_get(mrb_state *mrb, mrb_value obj)
+{
+ return mrb_fixnum_value(mrb->gc_interval_ratio);
+}
+
+/*
+ * call-seq:
+ * GC.interval_ratio = fixnum -> nil
+ *
+ * Updates ratio of GC interval. Default value is 200(%).
+ * GC start as soon as after end all step of GC if you set 100(%).
+ *
+ */
+
+static mrb_value
+gc_interval_ratio_set(mrb_state *mrb, mrb_value obj)
+{
+ mrb_value ratio;
+ mrb_get_args(mrb, "o", &ratio);
+ mrb->gc_interval_ratio = mrb_fixnum(mrb_to_int(mrb, ratio));
+ return mrb_nil_value();
+}
+
+/*
+ * call-seq:
+ * GC.step_ratio -> fixnum
+ *
+ * Returns step span ratio of Incremental GC. Default value is 200(%).
+ *
+ */
+
+static mrb_value
+gc_step_ratio_get(mrb_state *mrb, mrb_value obj)
+{
+ return mrb_fixnum_value(mrb->gc_step_ratio);
+}
+
+/*
+ * call-seq:
+ * GC.step_ratio = fixnum -> nil
+ *
+ * Updates step span ratio of Incremental GC. Default value is 200(%).
+ * 1 step of incrementalGC becomes long if a rate is big.
+ *
+ */
+
+static mrb_value
+gc_step_ratio_set(mrb_state *mrb, mrb_value obj)
+{
+ mrb_value ratio;
+ mrb_get_args(mrb, "o", &ratio);
+ mrb->gc_step_ratio = mrb_fixnum(mrb_to_int(mrb, ratio));
+ return mrb_nil_value();
+}
+
+void
+mrb_init_gc(mrb_state *mrb)
+{
+ struct RClass *gc;
+ gc = mrb_define_module(mrb, "GC");
+
+ mrb_define_class_method(mrb, gc, "start", gc_start, ARGS_NONE());
+ mrb_define_class_method(mrb, gc, "interval_ratio", gc_interval_ratio_get, ARGS_NONE());
+ mrb_define_class_method(mrb, gc, "interval_ratio=", gc_interval_ratio_set, ARGS_REQ(1));
+ mrb_define_class_method(mrb, gc, "step_ratio", gc_step_ratio_get, ARGS_NONE());
+ mrb_define_class_method(mrb, gc, "step_ratio=", gc_step_ratio_set, ARGS_REQ(1));
+}
+
+#ifdef GC_TEST
+#ifdef GC_DEBUG
+void
+test_mrb_field_write_barrier(void)
+{
+ mrb_state *mrb = mrb_open();
+ struct RBasic *obj, *value;
+
+ puts("test_mrb_field_write_barrier");
+ obj = RBASIC(mrb_ary_new(mrb));
+ value = RBASIC(mrb_str_new_cstr(mrb, "value"));
+ paint_black(obj);
+ paint_partial_white(mrb,value);
+
+
+ puts(" in GC_STATE_MARK");
+ mrb->gc_state = GC_STATE_MARK;
+ mrb_field_write_barrier(mrb, obj, value);
+
+ gc_assert(is_gray(value));
+
+
+ puts(" in GC_STATE_SWEEP");
+ paint_partial_white(mrb,value);
+ mrb->gc_state = GC_STATE_SWEEP;
+ mrb_field_write_barrier(mrb, obj, value);
+
+ gc_assert(obj->color & mrb->current_white_part);
+ gc_assert(obj->color & mrb->current_white_part);
+
+
+ puts(" fail with black");
+ mrb->gc_state = GC_STATE_MARK;
+ paint_white(obj);
+ paint_partial_white(mrb,value);
+ mrb_field_write_barrier(mrb, obj, value);
+
+ gc_assert(obj->color & mrb->current_white_part);
+
+
+ puts(" fail with gray");
+ mrb->gc_state = GC_STATE_MARK;
+ paint_black(obj);
+ paint_gray(value);
+ mrb_field_write_barrier(mrb, obj, value);
+
+ gc_assert(is_gray(value));
+
+
+ {
+ puts("test_mrb_field_write_barrier_value");
+ obj = RBASIC(mrb_ary_new(mrb));
+ mrb_value value = mrb_str_new_cstr(mrb, "value");
+ paint_black(obj);
+ paint_partial_white(mrb, RBASIC(value));
+
+ mrb->gc_state = GC_STATE_MARK;
+ mrb_field_write_barrier_value(mrb, obj, value);
+
+ gc_assert(is_gray(RBASIC(value)));
+ }
+
+ mrb_close(mrb);
+}
+
+void
+test_mrb_write_barrier(void)
+{
+ mrb_state *mrb = mrb_open();
+ struct RBasic *obj;
+
+ puts("test_mrb_write_barrier");
+ obj = RBASIC(mrb_ary_new(mrb));
+ paint_black(obj);
+
+ puts(" in GC_STATE_MARK");
+ mrb->gc_state = GC_STATE_MARK;
+ mrb_write_barrier(mrb, obj);
+
+ gc_assert(is_gray(obj));
+ gc_assert(mrb->variable_gray_list == obj);
+
+
+ puts(" fail with gray");
+ paint_gray(obj);
+ mrb_write_barrier(mrb, obj);
+
+ gc_assert(is_gray(obj));
+
+ mrb_close(mrb);
+}
+
+void
+test_add_gray_list(void)
+{
+ mrb_state *mrb = mrb_open();
+ struct RBasic *obj1, *obj2;
+
+ puts("test_add_gray_list");
+ gc_assert(mrb->gray_list == NULL);
+ obj1 = RBASIC(mrb_str_new_cstr(mrb, "test"));
+ add_gray_list(mrb, obj1);
+ gc_assert(mrb->gray_list == obj1);
+ gc_assert(is_gray(obj1));
+
+ obj2 = RBASIC(mrb_str_new_cstr(mrb, "test"));
+ add_gray_list(mrb, obj2);
+ gc_assert(mrb->gray_list == obj2);
+ gc_assert(mrb->gray_list->gcnext == obj1);
+ gc_assert(is_gray(obj2));
+
+ mrb_close(mrb);
+}
+
+void
+test_gc_gray_mark(void)
+{
+ mrb_state *mrb = mrb_open();
+ mrb_value obj_v, value_v;
+ struct RBasic *obj;
+ size_t gray_num = 0;
+
+ puts("test_gc_gray_mark");
+
+ puts(" in MRB_TT_CLASS");
+ obj = (struct RBasic *)mrb->object_class;
+ paint_gray(obj);
+ gray_num = gc_gray_mark(mrb, obj);
+ gc_assert(is_black(obj));
+ gc_assert(gray_num > 1);
+
+ puts(" in MRB_TT_ARRAY");
+ obj_v = mrb_ary_new(mrb);
+ value_v = mrb_str_new_cstr(mrb, "test");
+ paint_gray(RBASIC(obj_v));
+ paint_partial_white(mrb, RBASIC(value_v));
+ mrb_ary_push(mrb, obj_v, value_v);
+ gray_num = gc_gray_mark(mrb, RBASIC(obj_v));
+ gc_assert(is_black(RBASIC(obj_v)));
+ gc_assert(is_gray(RBASIC(value_v)));
+ gc_assert(gray_num == 1);
+
+ mrb_close(mrb);
+}
+
+void
+test_incremental_gc(void)
+{
+ mrb_state *mrb = mrb_open();
+ size_t max = ~0, live = 0, total = 0, freed = 0;
+ RVALUE *free;
+ struct heap_page *page;
+
+ puts("test_incremental_gc");
+
+ mrb_garbage_collect(mrb);
+
+ gc_assert(mrb->gc_state == GC_STATE_NONE);
+ incremental_gc(mrb, max);
+ gc_assert(mrb->gc_state == GC_STATE_MARK);
+
+ incremental_gc(mrb, max);
+ gc_assert(mrb->gc_state == GC_STATE_MARK);
+
+ incremental_gc(mrb, max);
+ gc_assert(mrb->gc_state == GC_STATE_SWEEP);
+
+ page = mrb->heaps;
+ while (page) {
+ RVALUE *p = page->objects;
+ RVALUE *e = p + HEAP_PAGE_SIZE;
+ while (p<e) {
+ if (is_black(&p->as.basic)) {
+ live++;
+ }
+ if (is_gray(&p->as.basic) && !is_dead(mrb, &p->as.basic)) {
+ printf("%p\n", &p->as.basic);
+ }
+ p++;
+ }
+ page = page->next;
+ total += HEAP_PAGE_SIZE;
+ }
+
+ gc_assert(mrb->gray_list == NULL);
+
+ incremental_gc(mrb, max);
+ gc_assert(mrb->gc_state == GC_STATE_SWEEP);
+
+ incremental_gc(mrb, max);
+ gc_assert(mrb->gc_state == GC_STATE_NONE);
+
+ free = (RVALUE *)mrb->heaps->freelist;
+ while (free) {
+ freed++;
+ free = (RVALUE *)free->as.free.next;
+ }
+
+ gc_assert(mrb->live == live);
+ gc_assert(mrb->live == total-freed);
+
+ mrb_close(mrb);
+}
+
+void
+test_incremental_sweep_phase(void)
+{
+ mrb_state *mrb = mrb_open();
+
+ puts("test_incremental_sweep_phase");
+
+ add_heap(mrb);
+ mrb->sweeps = mrb->heaps;
+
+ gc_assert(mrb->heaps->next->next == NULL);
+ gc_assert(mrb->free_heaps->next->next == NULL);
+ incremental_sweep_phase(mrb, HEAP_PAGE_SIZE*3);
+
+ gc_assert(mrb->heaps->next == NULL);
+ gc_assert(mrb->heaps == mrb->free_heaps);
+
+ mrb_close(mrb);
+}
+
+void
+test_gc_api(void)
+{
+ mrb_state *mrb = mrb_open();
+ mrb_value res;
+
+ mrb_value argv[1];
+
+ puts("test_gc_api");
+
+ gc_start(mrb, mrb_nil_value());
+
+ res = gc_interval_ratio_get(mrb, mrb_nil_value());
+ gc_assert(mrb_fixnum(res) == 200);
+
+ argv[0] = mrb_fixnum_value(300);
+ mrb->argv = &argv;
+ mrb->argc = 1;
+
+ gc_interval_ratio_set(mrb, mrb_nil_value());
+ res = gc_interval_ratio_get(mrb, mrb_nil_value());
+ gc_assert(mrb_fixnum(res) == 300);
+
+ res = gc_step_ratio_get(mrb, mrb_nil_value());
+ gc_assert(mrb_fixnum(res) == 200);
+
+ gc_step_ratio_set(mrb, mrb_nil_value());
+ res = gc_step_ratio_get(mrb, mrb_nil_value());
+ gc_assert(mrb_fixnum(res) == 300);
+
+ mrb_close(mrb);
+}
+
+static void
+test_many_object_benchmark(void)
+{
+ mrb_state *mrb = mrb_open();
+ size_t i = 0, j=0;
+ mrb_value ary = mrb_ary_new(mrb);
+ int save_point = mrb_gc_arena_save(mrb);
+
+ puts("test_many_object_benchmark");
+
+ for (i=0; i<1000; i++) {
+ mrb_value cary = mrb_ary_new(mrb);
+ mrb_ary_push(mrb, ary, cary);
+ for (j=0; j<1000; j++) {
+ mrb_ary_push(mrb, cary, mrb_str_new_cstr(mrb, "t"));
+ }
+ mrb_gc_arena_restore(mrb, save_point);
+ }
+
+ mrb_close(mrb);
+}
+
+int
+main(void)
+{
+ test_mrb_field_write_barrier();
+ test_mrb_write_barrier();
+ test_add_gray_list();
+ test_gc_gray_mark();
+ test_incremental_gc();
+ test_incremental_sweep_phase();
+ test_gc_api();
+ test_many_object_benchmark();
+ return 0;
+}
+#endif
+#endif
diff --git a/src/gc.h b/src/gc.h
new file mode 100644
index 000000000..139580533
--- /dev/null
+++ b/src/gc.h
@@ -0,0 +1,26 @@
+#ifndef MRUBY_GC_H
+#define MRUBY_GC_H
+
+typedef struct {
+ union {
+ struct free_obj {
+ MRUBY_OBJECT_HEADER;
+ struct RBasic *next;
+ } free;
+ struct RBasic basic;
+ struct RObject object;
+ struct RClass klass;
+ struct RString string;
+ struct RArray array;
+ struct RHash hash;
+ struct RRange range;
+ struct RStruct structdata;
+ struct RProc procdata;
+#ifdef INCLUDE_REGEXP
+ struct RMatch match;
+ struct RRegexp regexp;
+#endif
+ } as;
+} RVALUE;
+
+#endif /* MRUBY_GC_H */
diff --git a/src/hash.c b/src/hash.c
new file mode 100644
index 000000000..3f336f425
--- /dev/null
+++ b/src/hash.c
@@ -0,0 +1,1436 @@
+/**********************************************************************
+
+ hash.c -
+
+ $Author: yugui $
+ created at: Mon Nov 22 18:51:18 JST 1993
+
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
+ Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
+ Copyright (C) 2000 Information-technology Promotion Agency, Japan
+
+**********************************************************************/
+
+#include "mruby.h"
+#include "mruby/hash.h"
+#include "ritehash.h"
+#include "mruby/class.h"
+#include "mruby/array.h"
+#include "error.h"
+#include "mruby/string.h"
+#include "mruby/numeric.h"
+#include "mruby/struct.h"
+#include "st.h"
+#include <errno.h>
+#include <string.h>
+
+
+#ifdef __APPLE__
+#include <crt_externs.h>
+#endif
+
+#include <stdio.h>
+
+static khint_t
+mrb_hash_ht_hash_func(mrb_state *mrb, mrb_value key)
+{
+ char type = mrb_type(key);
+ mrb_value s1 = mrb_str_new(mrb, &type, 1);
+ mrb_value s2 = mrb_inspect(mrb, key);
+ s1 = mrb_str_cat(mrb, s1, RSTRING_PTR(s2), RSTRING_LEN(s2));
+ return kh_str_hash_func(mrb, RSTRING_PTR(s1));
+}
+
+static khint_t
+mrb_hash_ht_hash_equal(mrb_state *mrb, mrb_value a, mrb_value b)
+{
+ return mrb_equal(mrb, a, b);
+}
+KHASH_INIT(ht, mrb_value, mrb_value, 1, mrb_hash_ht_hash_func, mrb_hash_ht_hash_equal);
+
+mrb_value mrb_exec_recursive_paired(mrb_state *mrb, mrb_value (*func) (mrb_state *, mrb_value, mrb_value, int),
+ mrb_value obj, mrb_value paired_obj, void* arg);
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+static void mrb_hash_modify(mrb_state *mrb, mrb_value hash);
+
+static inline mrb_value
+mrb_hash_ht_key(mrb_state *mrb, mrb_value key)
+{
+ if (mrb_type(key) == MRB_TT_STRING)
+ return mrb_str_dup(mrb, key);
+ else
+ return key;
+}
+
+#define KEY(key) mrb_hash_ht_key(mrb, key)
+
+void
+mrb_gc_mark_ht(mrb_state *mrb, struct RClass *c)
+{
+ khiter_t k;
+ khash_t(ht) *h = ((struct RHash*)c)->ht;
+
+ mrb_gc_mark_value(mrb, ((struct RHash*)c)->ifnone);
+ if (!h) return;
+ for (k = kh_begin(h); k != kh_end(h); k++)
+ if (kh_exist(h, k)) {
+ mrb_gc_mark_value(mrb, kh_key(h, k));
+ mrb_gc_mark_value(mrb, kh_value(h, k));
+ }
+}
+
+size_t
+mrb_gc_mark_ht_size(mrb_state *mrb, struct RClass *c)
+{
+ size_t ht_size = 0;
+ khash_t(ht) *h = ((struct RHash*)c)->ht;
+
+ /* ((struct RHash*)c)->ifnone */
+ ht_size++;
+
+ /* ((struct RHash*)c)->ht */
+ if (h) ht_size += kh_size(h)*2;
+
+ return ht_size;
+}
+
+void
+mrb_gc_free_ht(mrb_state *mrb, struct RClass *c)
+{
+ khash_t(ht) *h = ((struct RHash*)c)->ht;
+
+ kh_destroy(ht, h);
+}
+
+
+mrb_value
+mrb_hash_new_capa(mrb_state *mrb, size_t capa)
+{
+ struct RHash *h;
+
+ h = mrb_obj_alloc(mrb, MRB_TT_HASH, mrb->hash_class);
+ h->ht = kh_init(ht, mrb);
+ kh_resize(ht, h->ht, capa);
+ h->ifnone = mrb_nil_value();
+ return mrb_obj_value(h);
+}
+
+mrb_value
+mrb_hash_new(mrb_state *mrb, int capa)
+{
+ return mrb_hash_new_capa(mrb, capa);
+}
+
+mrb_value
+mrb_hash_get(mrb_state *mrb, mrb_value hash, mrb_value key) /* mrb_hash_aref */ /* mrb_hash_lookup */
+{
+ khash_t(ht) *h = RHASH_H_TBL(hash);
+ khiter_t k;
+
+ if (h) {
+ k = kh_get(ht, h, key);
+ if (k != kh_end(h))
+ return kh_value(h, k);
+ }
+
+ /* not found */
+ if (MRB_RHASH_PROCDEFAULT_P(hash)) {
+ return mrb_funcall(mrb, RHASH_PROCDEFAULT(hash), "call", 2, hash, key);
+ }
+ else {
+ return RHASH_IFNONE(hash);
+ }
+}
+
+mrb_value
+mrb_hash_getWithDef(mrb_state *mrb, mrb_value hash, mrb_value vkey, mrb_value def) /* mrb_hash_lookup2 */
+{
+ khash_t(ht) *h = RHASH_H_TBL(hash);
+ khiter_t k;
+
+ if (h) {
+ k = kh_get(ht, h, vkey);
+ if (k != kh_end(h))
+ return kh_value(h, k);
+ }
+
+ /* not found */
+ return def;
+}
+
+void
+mrb_hash_set(mrb_state *mrb, mrb_value hash, mrb_value key, mrb_value val) /* mrb_hash_aset */
+{
+ khash_t(ht) *h;
+ khiter_t k;
+ int r;
+
+ mrb_hash_modify(mrb, hash);
+ h = RHASH_H_TBL(hash);
+
+ k = kh_get(ht, h, key);
+ if (k == kh_end(h)) {
+ /* expand */
+ k = kh_put(ht, h, KEY(key), &r);
+ }
+
+ kh_value(h, k) = val;
+ mrb_write_barrier(mrb, (struct RBasic*)RHASH(hash));
+ return;
+}
+
+mrb_value
+mrb_hash_freeze(mrb_value hash)
+{
+ //return mrb_obj_freeze(hash);
+ return (hash);
+}
+
+mrb_value
+mrb_hash(mrb_state *mrb, mrb_value obj)
+{
+ mrb_value hval = mrb_funcall(mrb, obj, "Hash", 0);
+retry:
+ switch (mrb_type(hval)) {
+ case MRB_TT_FIXNUM:
+ return hval;
+
+ default:
+ hval = mrb_to_int(mrb, hval);
+ goto retry;
+ }
+}
+
+static mrb_value
+hash_s_new(mrb_state *mrb, mrb_value klass)
+{
+ mrb_value *argv;
+ int argc;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+ mrb_value hash = mrb_hash_new_capa(mrb, 0);
+ mrb_obj_call_init(mrb, hash, argc, argv);
+ return hash;
+}
+
+mrb_value
+mrb_hash_dup(mrb_state *mrb, mrb_value hash)
+{
+ struct RHash* ret;
+ khash_t(ht) *h, *ret_h;
+ khiter_t k, ret_k;
+ int r;
+
+ ret = mrb_obj_alloc(mrb, MRB_TT_HASH, mrb->hash_class);
+ ret->ht = kh_init(ht, mrb);
+
+ if (!RHASH_EMPTY_P(hash)) {
+ h = RHASH_H_TBL(hash);
+ ret_h = ret->ht;
+
+ for (k = kh_begin(h); k != kh_end(h); k++) {
+ if (kh_exist(h,k)) {
+ ret_k = kh_put(ht, ret_h, KEY(kh_key(h,k)), &r);
+ kh_val(ret_h, ret_k) = kh_val(h,k);
+ }
+ }
+ }
+
+ return mrb_obj_value(ret);
+}
+
+static void
+mrb_hash_modify_check(mrb_state *mrb, mrb_value hash)
+{
+ //if (OBJ_FROZEN(hash)) mrb_error_frozen("hash");
+}
+
+khash_t(ht) *
+mrb_hash_tbl(mrb_state *mrb, mrb_value hash)
+{
+ khash_t(ht) *h = RHASH_H_TBL(hash);
+
+ if (!RHASH_H_TBL(hash)) {
+ RHASH_H_TBL(hash) = kh_init(ht, mrb);
+ }
+ return h;
+}
+
+static void
+mrb_hash_modify(mrb_state *mrb, mrb_value hash)
+{
+ //mrb_hash_modify_check(mrb, hash);
+ mrb_hash_tbl(mrb, hash);
+}
+
+/* 15.2.13.4.16 */
+/*
+ * call-seq:
+ * Hash.new -> new_hash
+ * Hash.new(obj) -> new_hash
+ * Hash.new {|hash, key| block } -> new_hash
+ *
+ * Returns a new, empty hash. If this hash is subsequently accessed by
+ * a key that doesn't correspond to a hash entry, the value returned
+ * depends on the style of <code>new</code> used to create the hash. In
+ * the first form, the access returns <code>nil</code>. If
+ * <i>obj</i> is specified, this single object will be used for
+ * all <em>default values</em>. If a block is specified, it will be
+ * called with the hash object and the key, and should return the
+ * default value. It is the block's responsibility to store the value
+ * in the hash if required.
+ *
+ * h = Hash.new("Go Fish")
+ * h["a"] = 100
+ * h["b"] = 200
+ * h["a"] #=> 100
+ * h["c"] #=> "Go Fish"
+ * # The following alters the single default object
+ * h["c"].upcase! #=> "GO FISH"
+ * h["d"] #=> "GO FISH"
+ * h.keys #=> ["a", "b"]
+ *
+ * # While this creates a new default object each time
+ * h = Hash.new { |hash, key| hash[key] = "Go Fish: #{key}" }
+ * h["c"] #=> "Go Fish: c"
+ * h["c"].upcase! #=> "GO FISH: C"
+ * h["d"] #=> "Go Fish: d"
+ * h.keys #=> ["c", "d"]
+ *
+ */
+
+static mrb_value
+mrb_hash_init_core(mrb_state *mrb, mrb_value hash)
+{
+ mrb_value block;
+ mrb_value *argv;
+ int argc;
+
+ mrb_get_args(mrb, "o*", &block, &argv, &argc);
+
+ mrb_hash_modify(mrb, hash);
+
+ if (mrb_nil_p(block)) {
+ if (argc > 0) {
+ if (argc != 1) mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments");
+ RHASH_IFNONE(hash) = argv[0];
+ }
+ else {
+ RHASH_IFNONE(hash) = mrb_nil_value();
+ }
+ }
+ else {
+ if (argc > 0) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments");
+ }
+ RHASH(hash)->flags |= MRB_HASH_PROC_DEFAULT;
+ RHASH_PROCDEFAULT(hash) = block;
+ }
+
+ return hash;
+}
+
+/*
+ * call-seq:
+ * Hash[ key, value, ... ] -> new_hash
+ * Hash[ [ [key, value], ... ] ] -> new_hash
+ * Hash[ object ] -> new_hash
+ *
+ * Creates a new hash populated with the given objects. Equivalent to
+ * the literal <code>{ <i>key</i> => <i>value</i>, ... }</code>. In the first
+ * form, keys and values occur in pairs, so there must be an even number of arguments.
+ * The second and third form take a single argument which is either
+ * an array of key-value pairs or an object convertible to a hash.
+ *
+ * Hash["a", 100, "b", 200] #=> {"a"=>100, "b"=>200}
+ * Hash[ [ ["a", 100], ["b", 200] ] ] #=> {"a"=>100, "b"=>200}
+ * Hash["a" => 100, "b" => 200] #=> {"a"=>100, "b"=>200}
+ */
+
+static mrb_value
+to_hash(mrb_state *mrb, mrb_value hash)
+{
+ return mrb_convert_type(mrb, hash, MRB_TT_HASH, "Hash", "to_hash");
+}
+
+/*
+ * call-seq:
+ * Hash.try_convert(obj) -> hash or nil
+ *
+ * Try to convert <i>obj</i> into a hash, using to_hash method.
+ * Returns converted hash or nil if <i>obj</i> cannot be converted
+ * for any reason.
+ *
+ * Hash.try_convert({1=>2}) # => {1=>2}
+ * Hash.try_convert("1=>2") # => nil
+ */
+
+/* 15.2.13.4.2 */
+/*
+ * call-seq:
+ * hsh[key] -> value
+ *
+ * Element Reference---Retrieves the <i>value</i> object corresponding
+ * to the <i>key</i> object. If not found, returns the default value (see
+ * <code>Hash::new</code> for details).
+ *
+ * h = { "a" => 100, "b" => 200 }
+ * h["a"] #=> 100
+ * h["c"] #=> nil
+ *
+ */
+mrb_value
+mrb_hash_aget(mrb_state *mrb, mrb_value self)
+{
+ mrb_value key;
+
+ mrb_get_args(mrb, "o", &key);
+ return mrb_hash_get(mrb, self, key);
+}
+
+mrb_value
+mrb_hash_lookup2(mrb_state *mrb, mrb_value hash, mrb_value key, mrb_value def)
+{
+ return mrb_hash_getWithDef(mrb, hash, key, def);
+}
+
+mrb_value
+mrb_hash_lookup(mrb_state *mrb, mrb_value hash, mrb_value key)
+{
+ return mrb_hash_lookup2(mrb, hash, key, mrb_nil_value());
+}
+
+/*
+ * call-seq:
+ * hsh.fetch(key [, default] ) -> obj
+ * hsh.fetch(key) {| key | block } -> obj
+ *
+ * Returns a value from the hash for the given key. If the key can't be
+ * found, there are several options: With no other arguments, it will
+ * raise an <code>KeyError</code> exception; if <i>default</i> is
+ * given, then that will be returned; if the optional code block is
+ * specified, then that will be run and its result returned.
+ *
+ * h = { "a" => 100, "b" => 200 }
+ * h.fetch("a") #=> 100
+ * h.fetch("z", "go fish") #=> "go fish"
+ * h.fetch("z") { |el| "go fish, #{el}"} #=> "go fish, z"
+ *
+ * The following example shows that an exception is raised if the key
+ * is not found and a default value is not supplied.
+ *
+ * h = { "a" => 100, "b" => 200 }
+ * h.fetch("z")
+ *
+ * <em>produces:</em>
+ *
+ * prog.rb:2:in `fetch': key not found (KeyError)
+ * from prog.rb:2
+ *
+ */
+
+/* 15.2.13.4.5 */
+/*
+ * call-seq:
+ * hsh.default(key=nil) -> obj
+ *
+ * Returns the default value, the value that would be returned by
+ * <i>hsh</i>[<i>key</i>] if <i>key</i> did not exist in <i>hsh</i>.
+ * See also <code>Hash::new</code> and <code>Hash#default=</code>.
+ *
+ * h = Hash.new #=> {}
+ * h.default #=> nil
+ * h.default(2) #=> nil
+ *
+ * h = Hash.new("cat") #=> {}
+ * h.default #=> "cat"
+ * h.default(2) #=> "cat"
+ *
+ * h = Hash.new {|h,k| h[k] = k.to_i*10} #=> {}
+ * h.default #=> nil
+ * h.default(2) #=> 20
+ */
+
+static mrb_value
+mrb_hash_default(mrb_state *mrb, mrb_value hash)
+{
+ mrb_value *argv;
+ int argc;
+ mrb_value key;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+
+ if (MRB_RHASH_PROCDEFAULT_P(hash)) {
+ if (argc == 0) return mrb_nil_value();
+ key = argv[0];
+ return mrb_funcall(mrb, RHASH_PROCDEFAULT(hash), "call", 2, hash, key);
+ }
+ else {
+ return RHASH_IFNONE(hash);
+ }
+}
+
+/* 15.2.13.4.6 */
+/*
+ * call-seq:
+ * hsh.default = obj -> obj
+ *
+ * Sets the default value, the value returned for a key that does not
+ * exist in the hash. It is not possible to set the default to a
+ * <code>Proc</code> that will be executed on each key lookup.
+ *
+ * h = { "a" => 100, "b" => 200 }
+ * h.default = "Go fish"
+ * h["a"] #=> 100
+ * h["z"] #=> "Go fish"
+ * # This doesn't do what you might hope...
+ * h.default = proc do |hash, key|
+ * hash[key] = key + key
+ * end
+ * h[2] #=> #<Proc:0x401b3948@-:6>
+ * h["cat"] #=> #<Proc:0x401b3948@-:6>
+ */
+
+static mrb_value
+mrb_hash_set_default(mrb_state *mrb, mrb_value hash)
+{
+ mrb_value ifnone;
+ mrb_get_args(mrb, "o", &ifnone);
+
+ mrb_hash_modify(mrb, hash);
+ RHASH_IFNONE(hash) = ifnone;
+ RHASH(hash)->flags &= ~(MRB_HASH_PROC_DEFAULT);
+
+ return ifnone;
+}
+
+/* 15.2.13.4.7 */
+/*
+ * call-seq:
+ * hsh.default_proc -> anObject
+ *
+ * If <code>Hash::new</code> was invoked with a block, return that
+ * block, otherwise return <code>nil</code>.
+ *
+ * h = Hash.new {|h,k| h[k] = k*k } #=> {}
+ * p = h.default_proc #=> #<Proc:0x401b3d08@-:1>
+ * a = [] #=> []
+ * p.call(a, 2)
+ * a #=> [nil, nil, 4]
+ */
+
+
+static mrb_value
+mrb_hash_default_proc(mrb_state *mrb, mrb_value hash)
+{
+ if (MRB_RHASH_PROCDEFAULT_P(hash)) {
+ return RHASH_PROCDEFAULT(hash);
+ }
+ return mrb_nil_value();
+}
+
+/*
+ * call-seq:
+ * hsh.default_proc = proc_obj -> proc_obj
+ *
+ * Sets the default proc to be executed on each key lookup.
+ *
+ * h.default_proc = proc do |hash, key|
+ * hash[key] = key + key
+ * end
+ * h[2] #=> 4
+ * h["cat"] #=> "catcat"
+ */
+
+mrb_value
+mrb_hash_delete_key(mrb_state *mrb, mrb_value hash, mrb_value key)
+{
+ khash_t(ht) *h = RHASH_H_TBL(hash);
+ khiter_t k;
+ mrb_value delVal;
+
+ if (h) {
+ k = kh_get(ht, h, key);
+ if (k != kh_end(h)) {
+ delVal = kh_value(h, k);
+ kh_del(ht, h, k);
+ return delVal;
+ }
+ }
+
+ /* not found */
+ return mrb_nil_value();
+}
+
+/* 15.2.13.4.8 */
+/*
+ * call-seq:
+ * hsh.delete(key) -> value
+ * hsh.delete(key) {| key | block } -> value
+ *
+ * Deletes and returns a key-value pair from <i>hsh</i> whose key is
+ * equal to <i>key</i>. If the key is not found, returns the
+ * <em>default value</em>. If the optional code block is given and the
+ * key is not found, pass in the key and return the result of
+ * <i>block</i>.
+ *
+ * h = { "a" => 100, "b" => 200 }
+ * h.delete("a") #=> 100
+ * h.delete("z") #=> nil
+ * h.delete("z") { |el| "#{el} not found" } #=> "z not found"
+ *
+ */
+mrb_value
+mrb_hash_delete(mrb_state *mrb, mrb_value self)
+{
+ mrb_value key;
+
+ mrb_get_args(mrb, "o", &key);
+ return mrb_hash_delete_key(mrb, self, key);
+}
+struct shift_var {
+ mrb_value key;
+ mrb_value val;
+};
+
+
+/* 15.2.13.4.24 */
+/*
+ * call-seq:
+ * hsh.shift -> anArray or obj
+ *
+ * Removes a key-value pair from <i>hsh</i> and returns it as the
+ * two-item array <code>[</code> <i>key, value</i> <code>]</code>, or
+ * the hash's default value if the hash is empty.
+ *
+ * h = { 1 => "a", 2 => "b", 3 => "c" }
+ * h.shift #=> [1, "a"]
+ * h #=> {2=>"b", 3=>"c"}
+ */
+
+static mrb_value
+mrb_hash_shift(mrb_state *mrb, mrb_value hash)
+{
+ khash_t(ht) *h = RHASH_H_TBL(hash);
+ khiter_t k;
+ mrb_value delKey, delVal;
+ mrb_value result;
+ int r;
+
+ mrb_hash_modify(mrb, hash);
+ if (h) {
+ if (kh_size(h) > 0) {
+ for (k = kh_begin(h); k != kh_end(h); k++) {
+ if (!kh_exist(h,k)) continue;
+
+ delKey = kh_key(h,k);
+ delVal = mrb_hash_delete_key(mrb, hash, delKey);
+
+ result = mrb_hash_new(mrb, 1);
+ k = kh_put(ht, RHASH_H_TBL(result), KEY(delKey), &r);
+ kh_value(RHASH_H_TBL(result), k) = delVal;
+ return result;
+ }
+ }
+ }
+
+ if (MRB_RHASH_PROCDEFAULT_P(hash)) {
+ return mrb_funcall(mrb, RHASH_PROCDEFAULT(hash), "call", 2, hash, mrb_nil_value());
+ }
+ else {
+ return RHASH_IFNONE(hash);
+ }
+}
+
+/*
+ * call-seq:
+ * hsh.delete_if {| key, value | block } -> hsh
+ * hsh.delete_if -> an_enumerator
+ *
+ * Deletes every key-value pair from <i>hsh</i> for which <i>block</i>
+ * evaluates to <code>true</code>.
+ *
+ * If no block is given, an enumerator is returned instead.
+ *
+ * h = { "a" => 100, "b" => 200, "c" => 300 }
+ * h.delete_if {|key, value| key >= "b" } #=> {"a"=>100}
+ *
+ */
+
+/*
+ * call-seq:
+ * hsh.reject! {| key, value | block } -> hsh or nil
+ * hsh.reject! -> an_enumerator
+ *
+ * Equivalent to <code>Hash#delete_if</code>, but returns
+ * <code>nil</code> if no changes were made.
+ */
+
+/*
+ * call-seq:
+ * hsh.reject {| key, value | block } -> a_hash
+ *
+ * Same as <code>Hash#delete_if</code>, but works on (and returns) a
+ * copy of the <i>hsh</i>. Equivalent to
+ * <code><i>hsh</i>.dup.delete_if</code>.
+ *
+ */
+
+/*
+ * call-seq:
+ * hsh.values_at(key, ...) -> array
+ *
+ * Return an array containing the values associated with the given keys.
+ * Also see <code>Hash.select</code>.
+ *
+ * h = { "cat" => "feline", "dog" => "canine", "cow" => "bovine" }
+ * h.values_at("cow", "cat") #=> ["bovine", "feline"]
+ */
+
+mrb_value
+mrb_hash_values_at(mrb_state *mrb, int argc, mrb_value *argv, mrb_value hash)
+{
+ mrb_value result = mrb_ary_new_capa(mrb, argc);//mrb_ary_new2(argc);
+ long i;
+
+ for (i=0; i<argc; i++) {
+ mrb_ary_push(mrb, result, KEY(mrb_hash_get(mrb, hash, argv[i])));
+ }
+ return result;
+}
+
+/*
+ * call-seq:
+ * hsh.select {|key, value| block} -> a_hash
+ * hsh.select -> an_enumerator
+ *
+ * Returns a new hash consisting of entries for which the block returns true.
+ *
+ * If no block is given, an enumerator is returned instead.
+ *
+ * h = { "a" => 100, "b" => 200, "c" => 300 }
+ * h.select {|k,v| k > "a"} #=> {"b" => 200, "c" => 300}
+ * h.select {|k,v| v < 200} #=> {"a" => 100}
+ */
+
+/*
+ * call-seq:
+ * hsh.select! {| key, value | block } -> hsh or nil
+ * hsh.select! -> an_enumerator
+ *
+ * Equivalent to <code>Hash#keep_if</code>, but returns
+ * <code>nil</code> if no changes were made.
+ */
+
+/*
+ * call-seq:
+ * hsh.keep_if {| key, value | block } -> hsh
+ * hsh.keep_if -> an_enumerator
+ *
+ * Deletes every key-value pair from <i>hsh</i> for which <i>block</i>
+ * evaluates to false.
+ *
+ * If no block is given, an enumerator is returned instead.
+ *
+ */
+
+/* 15.2.13.4.4 */
+/*
+ * call-seq:
+ * hsh.clear -> hsh
+ *
+ * Removes all key-value pairs from <i>hsh</i>.
+ *
+ * h = { "a" => 100, "b" => 200 } #=> {"a"=>100, "b"=>200}
+ * h.clear #=> {}
+ *
+ */
+
+static mrb_value
+mrb_hash_clear(mrb_state *mrb, mrb_value hash)
+{
+ khash_t(ht) *h = RHASH_H_TBL(hash);
+
+ kh_clear(ht, h);
+ return hash;
+}
+
+/* 15.2.13.4.3 */
+/* 15.2.13.4.26 */
+/*
+ * call-seq:
+ * hsh[key] = value -> value
+ * hsh.store(key, value) -> value
+ *
+ * Element Assignment---Associates the value given by
+ * <i>value</i> with the key given by <i>key</i>.
+ * <i>key</i> should not have its value changed while it is in
+ * use as a key (a <code>String</code> passed as a key will be
+ * duplicated and frozen).
+ *
+ * h = { "a" => 100, "b" => 200 }
+ * h["a"] = 9
+ * h["c"] = 4
+ * h #=> {"a"=>9, "b"=>200, "c"=>4}
+ *
+ */
+mrb_value
+mrb_hash_aset(mrb_state *mrb, mrb_value self)
+{
+ mrb_value key, val;
+
+ mrb_get_args(mrb, "oo", &key, &val);
+ mrb_hash_set(mrb, self, key, val);
+ return val;
+}
+
+/* 15.2.13.4.17 */
+/* 15.2.13.4.23 */
+/*
+ * call-seq:
+ * hsh.replace(other_hash) -> hsh
+ *
+ * Replaces the contents of <i>hsh</i> with the contents of
+ * <i>other_hash</i>.
+ *
+ * h = { "a" => 100, "b" => 200 }
+ * h.replace({ "c" => 300, "d" => 400 }) #=> {"c"=>300, "d"=>400}
+ *
+ */
+
+static mrb_value
+mrb_hash_replace(mrb_state *mrb, mrb_value hash)
+{
+ mrb_value hash2;
+ khash_t(ht) *h2;
+ khiter_t k;
+
+ mrb_get_args(mrb, "o", &hash2);
+
+ mrb_hash_modify_check(mrb, hash);
+ hash2 = to_hash(mrb, hash2);
+ if (mrb_obj_equal(mrb, hash, hash2)) return hash;
+ mrb_hash_clear(mrb, hash);
+
+ h2 = RHASH_H_TBL(hash2);
+ if (h2) {
+ for (k = kh_begin(h2); k != kh_end(h2); k++) {
+ if (kh_exist(h2, k))
+ mrb_hash_set(mrb, hash, kh_key(h2, k), kh_value(h2, k));
+ }
+ }
+
+ if (MRB_RHASH_PROCDEFAULT_P(hash2)) {
+ RHASH(hash)->flags |= MRB_HASH_PROC_DEFAULT;
+ RHASH_PROCDEFAULT(hash) = RHASH_PROCDEFAULT(hash2);
+ }
+ else {
+ RHASH_IFNONE(hash) = RHASH_IFNONE(hash2);
+ }
+ return hash;
+}
+
+/* 15.2.13.4.20 */
+/* 15.2.13.4.25 */
+/*
+ * call-seq:
+ * hsh.length -> fixnum
+ * hsh.size -> fixnum
+ *
+ * Returns the number of key-value pairs in the hash.
+ *
+ * h = { "d" => 100, "a" => 200, "v" => 300, "e" => 400 }
+ * h.length #=> 4
+ * h.delete("a") #=> 200
+ * h.length #=> 3
+ */
+static mrb_value
+mrb_hash_size_m(mrb_state *mrb, mrb_value self)
+{
+ khash_t(ht) *h = RHASH_H_TBL(self);
+
+ if (!h) return mrb_fixnum_value(0);
+ return mrb_fixnum_value(kh_size(h));
+}
+
+/* 15.2.13.4.12 */
+/*
+ * call-seq:
+ * hsh.empty? -> true or false
+ *
+ * Returns <code>true</code> if <i>hsh</i> contains no key-value pairs.
+ *
+ * {}.empty? #=> true
+ *
+ */
+static mrb_value
+mrb_hash_empty_p(mrb_state *mrb, mrb_value self)
+{
+ khash_t(ht) *h = RHASH_H_TBL(self);
+ khiter_t k;
+ if (h) {
+ for (k = kh_begin(h); k != kh_end(h); k++)
+ if (kh_exist(h, k))
+ return mrb_false_value();
+ }
+ return mrb_true_value();
+}
+
+/* 15.2.13.4.11 */
+/*
+ * call-seq:
+ * hsh.each_value {| value | block } -> hsh
+ * hsh.each_value -> an_enumerator
+ *
+ * Calls <i>block</i> once for each key in <i>hsh</i>, passing the
+ * value as a parameter.
+ *
+ * If no block is given, an enumerator is returned instead.
+ *
+ * h = { "a" => 100, "b" => 200 }
+ * h.each_value {|value| puts value }
+ *
+ * <em>produces:</em>
+ *
+ * 100
+ * 200
+ */
+
+/* 15.2.13.4.10 */
+/*
+ * call-seq:
+ * hsh.each_key {| key | block } -> hsh
+ * hsh.each_key -> an_enumerator
+ *
+ * Calls <i>block</i> once for each key in <i>hsh</i>, passing the key
+ * as a parameter.
+ *
+ * If no block is given, an enumerator is returned instead.
+ *
+ * h = { "a" => 100, "b" => 200 }
+ * h.each_key {|key| puts key }
+ *
+ * <em>produces:</em>
+ *
+ * a
+ * b
+ */
+
+/* 15.2.13.4.9 */
+/*
+ * call-seq:
+ * hsh.each {| key, value | block } -> hsh
+ * hsh.each_pair {| key, value | block } -> hsh
+ * hsh.each -> an_enumerator
+ * hsh.each_pair -> an_enumerator
+ *
+ * Calls <i>block</i> once for each key in <i>hsh</i>, passing the key-value
+ * pair as parameters.
+ *
+ * If no block is given, an enumerator is returned instead.
+ *
+ * h = { "a" => 100, "b" => 200 }
+ * h.each {|key, value| puts "#{key} is #{value}" }
+ *
+ * <em>produces:</em>
+ *
+ * a is 100
+ * b is 200
+ *
+ */
+
+static mrb_value
+inspect_hash(mrb_state *mrb, mrb_value hash, int recur)
+{
+ mrb_value str, str2;
+ khash_t(ht) *h = RHASH_H_TBL(hash);
+ khiter_t k;
+
+ if (recur) return mrb_str_new2(mrb, "{...}");
+
+ str = mrb_str_new2(mrb, "{");
+ if (h && kh_size(h) > 0) {
+ for (k = kh_begin(h); k != kh_end(h); k++) {
+ int ai;
+
+ if (!kh_exist(h,k)) continue;
+
+ ai = mrb_gc_arena_save(mrb);
+
+ if (RSTRING_LEN(str) > 1) mrb_str_cat2(mrb, str, ", ");
+
+ str2 = mrb_inspect(mrb, kh_key(h,k));
+ mrb_str_append(mrb, str, str2);
+ mrb_str_buf_cat(mrb, str, "=>", strlen("=>"));
+ str2 = mrb_inspect(mrb, kh_value(h,k));
+ mrb_str_append(mrb, str, str2);
+
+ mrb_gc_arena_restore(mrb, ai);
+ }
+ }
+ mrb_str_buf_cat(mrb, str, "}", strlen("}"));//mrb_str_buf_cat2(str, "}");
+
+ return str;
+}
+
+/* 15.2.13.4.30 (x)*/
+/*
+ * call-seq:
+ * hsh.to_s -> string
+ * hsh.inspect -> string
+ *
+ * Return the contents of this hash as a string.
+ *
+ * h = { "c" => 300, "a" => 100, "d" => 400, "c" => 300 }
+ * h.to_s #=> "{\"c\"=>300, \"a\"=>100, \"d\"=>400}"
+ */
+
+static mrb_value
+mrb_hash_inspect(mrb_state *mrb, mrb_value hash)
+{
+ if (RHASH_EMPTY_P(hash))
+ return mrb_str_new2(mrb, "{}");
+ return inspect_hash(mrb, hash, 0);
+}
+
+/* 15.2.13.4.29 (x)*/
+/*
+ * call-seq:
+ * hsh.to_hash => hsh
+ *
+ * Returns +self+.
+ */
+
+static mrb_value
+mrb_hash_to_hash(mrb_state *mrb, mrb_value hash)
+{
+ return hash;
+}
+
+/* 15.2.13.4.19 */
+/*
+ * call-seq:
+ * hsh.keys -> array
+ *
+ * Returns a new array populated with the keys from this hash. See also
+ * <code>Hash#values</code>.
+ *
+ * h = { "a" => 100, "b" => 200, "c" => 300, "d" => 400 }
+ * h.keys #=> ["a", "b", "c", "d"]
+ *
+ */
+
+static mrb_value
+mrb_hash_keys(mrb_state *mrb, mrb_value hash)
+{
+ khash_t(ht) *h = RHASH_H_TBL(hash);
+ khiter_t k;
+ mrb_value ary = mrb_ary_new(mrb);
+
+ if (!h) return ary;
+ for (k = kh_begin(h); k != kh_end(h); k++) {
+ if (kh_exist(h, k)) {
+ mrb_value v = kh_key(h,k);
+ if ( !mrb_special_const_p(v) )
+ v = mrb_obj_dup(mrb, v);
+ mrb_ary_push(mrb, ary, v);
+ }
+ }
+ return ary;
+}
+
+/* 15.2.13.4.28 */
+/*
+ * call-seq:
+ * hsh.values -> array
+ *
+ * Returns a new array populated with the values from <i>hsh</i>. See
+ * also <code>Hash#keys</code>.
+ *
+ * h = { "a" => 100, "b" => 200, "c" => 300 }
+ * h.values #=> [100, 200, 300]
+ *
+ */
+
+static mrb_value
+mrb_hash_values(mrb_state *mrb, mrb_value hash)
+{
+ khash_t(ht) *h = RHASH_H_TBL(hash);
+ khiter_t k;
+ mrb_value ary = mrb_ary_new(mrb);
+
+ if (!h) return ary;
+ for (k = kh_begin(h); k != kh_end(h); k++) {
+ if (kh_exist(h, k)){
+ mrb_value v = kh_value(h,k);
+ if ( !mrb_special_const_p(v) )
+ v = mrb_obj_dup(mrb, v);
+ mrb_ary_push(mrb, ary, v);
+ }
+ }
+ return ary;
+}
+
+static mrb_value
+mrb_hash_has_keyWithKey(mrb_state *mrb, mrb_value hash, mrb_value key)
+{
+ khash_t(ht) *h = RHASH_H_TBL(hash);
+ khiter_t k;
+
+ if (h) {
+ k = kh_get(ht, h, key);
+ if (k != kh_end(h))
+ return mrb_true_value();
+ }
+
+ return mrb_false_value();
+}
+
+/* 15.2.13.4.13 */
+/* 15.2.13.4.15 */
+/* 15.2.13.4.18 */
+/* 15.2.13.4.21 */
+/*
+ * call-seq:
+ * hsh.has_key?(key) -> true or false
+ * hsh.include?(key) -> true or false
+ * hsh.key?(key) -> true or false
+ * hsh.member?(key) -> true or false
+ *
+ * Returns <code>true</code> if the given key is present in <i>hsh</i>.
+ *
+ * h = { "a" => 100, "b" => 200 }
+ * h.has_key?("a") #=> true
+ * h.has_key?("z") #=> false
+ *
+ */
+
+static mrb_value
+mrb_hash_has_key(mrb_state *mrb, mrb_value hash)
+{
+ mrb_value key;
+
+ mrb_get_args(mrb, "o", &key);
+ return mrb_hash_has_keyWithKey(mrb, hash, key);
+}
+
+static mrb_value
+mrb_hash_has_valueWithvalue(mrb_state *mrb, mrb_value hash, mrb_value value)
+{
+ khash_t(ht) *h = RHASH_H_TBL(hash);
+ khiter_t k;
+
+ if (h) {
+ for (k = kh_begin(h); k != kh_end(h); k++) {
+ if (!kh_exist(h, k)) continue;
+
+ if (mrb_equal(mrb, kh_value(h,k), value)) {
+ return mrb_true_value();
+ }
+ }
+ }
+
+ return mrb_false_value();
+}
+
+/* 15.2.13.4.14 */
+/* 15.2.13.4.27 */
+/*
+ * call-seq:
+ * hsh.has_value?(value) -> true or false
+ * hsh.value?(value) -> true or false
+ *
+ * Returns <code>true</code> if the given value is present for some key
+ * in <i>hsh</i>.
+ *
+ * h = { "a" => 100, "b" => 200 }
+ * h.has_value?(100) #=> true
+ * h.has_value?(999) #=> false
+ */
+
+static mrb_value
+mrb_hash_has_value(mrb_state *mrb, mrb_value hash)
+{
+ mrb_value val;
+
+ mrb_get_args(mrb, "o", &val);
+ return mrb_hash_has_valueWithvalue(mrb, hash, val);
+}
+
+static mrb_value
+recursive_eql(mrb_state *mrb, mrb_value hash, mrb_value dt, int recur)
+{
+ khash_t(ht) *h1 = RHASH_H_TBL(hash);
+ khash_t(ht) *h2 = RHASH_H_TBL(dt);
+ khiter_t k1, k2;
+ mrb_value key1;
+
+ for (k1 = kh_begin(h1); k1 != kh_end(h1); k1++) {
+ if (!kh_exist(h1, k1)) continue;
+ key1 = kh_key(h1,k1);
+ k2 = kh_get(ht, h2, key1);
+ if ( k2 != kh_end(h2)) {
+ if (mrb_equal(mrb, kh_value(h1,k1), kh_value(h2,k2))) {
+ continue; /* next key */
+ }
+ }
+ return mrb_false_value();
+ }
+ return mrb_true_value();
+}
+
+static mrb_value
+hash_equal(mrb_state *mrb, mrb_value hash1, mrb_value hash2, int eql)
+{
+ if (mrb_obj_equal(mrb, hash1, hash2)) return mrb_true_value();
+ if (mrb_type(hash2) != MRB_TT_HASH) {
+ if (!mrb_respond_to(mrb, hash2, mrb_intern(mrb, "to_hash"))) {
+ return mrb_false_value();
+ }
+ if (eql)
+ return mrb_fixnum_value(mrb_eql(mrb, hash2, hash1));
+ else
+ return mrb_fixnum_value(mrb_equal(mrb, hash2, hash1));
+ }
+ if (RHASH_SIZE(hash1) != RHASH_SIZE(hash2)) return mrb_false_value();
+ if (!RHASH(hash1)->ht || !RHASH(hash2)->ht) return mrb_true_value();
+
+ return mrb_exec_recursive_paired(mrb, recursive_eql, hash1, hash2, (void*)0);
+}
+
+/* 15.2.13.4.1 */
+/*
+ * call-seq:
+ * hsh == other_hash -> true or false
+ *
+ * Equality---Two hashes are equal if they each contain the same number
+ * of keys and if each key-value pair is equal to (according to
+ * <code>Object#==</code>) the corresponding elements in the other
+ * hash.
+ *
+ * h1 = { "a" => 1, "c" => 2 }
+ * h2 = { 7 => 35, "c" => 2, "a" => 1 }
+ * h3 = { "a" => 1, "c" => 2, 7 => 35 }
+ * h4 = { "a" => 1, "d" => 2, "f" => 35 }
+ * h1 == h2 #=> false
+ * h2 == h3 #=> true
+ * h3 == h4 #=> false
+ *
+ */
+
+static mrb_value
+mrb_hash_equal(mrb_state *mrb, mrb_value hash1)
+{
+ mrb_value hash2;
+ mrb_get_args(mrb, "o", &hash2);
+ return hash_equal(mrb, hash1, hash2, FALSE);
+}
+
+/* 15.2.13.4.32 (x)*/
+/*
+ * call-seq:
+ * hash.eql?(other) -> true or false
+ *
+ * Returns <code>true</code> if <i>hash</i> and <i>other</i> are
+ * both hashes with the same content.
+ */
+
+static mrb_value
+mrb_hash_eql(mrb_state *mrb, mrb_value hash1)
+{
+ mrb_value hash2;
+ mrb_get_args(mrb, "o", &hash2);
+ return hash_equal(mrb, hash1, hash2, TRUE);
+}
+
+/*
+ * call-seq:
+ * hsh.merge!(other_hash) -> hsh
+ * hsh.update(other_hash) -> hsh
+ * hsh.merge!(other_hash){|key, oldval, newval| block} -> hsh
+ * hsh.update(other_hash){|key, oldval, newval| block} -> hsh
+ *
+ * Adds the contents of <i>other_hash</i> to <i>hsh</i>. If no
+ * block is specified, entries with duplicate keys are overwritten
+ * with the values from <i>other_hash</i>, otherwise the value
+ * of each duplicate key is determined by calling the block with
+ * the key, its value in <i>hsh</i> and its value in <i>other_hash</i>.
+ *
+ * h1 = { "a" => 100, "b" => 200 }
+ * h2 = { "b" => 254, "c" => 300 }
+ * h1.merge!(h2) #=> {"a"=>100, "b"=>254, "c"=>300}
+ *
+ * h1 = { "a" => 100, "b" => 200 }
+ * h2 = { "b" => 254, "c" => 300 }
+ * h1.merge!(h2) { |key, v1, v2| v1 }
+ * #=> {"a"=>100, "b"=>200, "c"=>300}
+ */
+
+/* 15.2.13.4.22 */
+/*
+ * call-seq:
+ * hsh.merge(other_hash) -> new_hash
+ * hsh.merge(other_hash){|key, oldval, newval| block} -> new_hash
+ *
+ * Returns a new hash containing the contents of <i>other_hash</i> and
+ * the contents of <i>hsh</i>. If no block is specified, the value for
+ * entries with duplicate keys will be that of <i>other_hash</i>. Otherwise
+ * the value for each duplicate key is determined by calling the block
+ * with the key, its value in <i>hsh</i> and its value in <i>other_hash</i>.
+ *
+ * h1 = { "a" => 100, "b" => 200 }
+ * h2 = { "b" => 254, "c" => 300 }
+ * h1.merge(h2) #=> {"a"=>100, "b"=>254, "c"=>300}
+ * h1.merge(h2){|key, oldval, newval| newval - oldval}
+ * #=> {"a"=>100, "b"=>54, "c"=>300}
+ * h1 #=> {"a"=>100, "b"=>200}
+ *
+ */
+
+/*
+ * call-seq:
+ * hash.assoc(obj) -> an_array or nil
+ *
+ * Searches through the hash comparing _obj_ with the key using <code>==</code>.
+ * Returns the key-value pair (two elements array) or +nil+
+ * if no match is found. See <code>Array#assoc</code>.
+ *
+ * h = {"colors" => ["red", "blue", "green"],
+ * "letters" => ["a", "b", "c" ]}
+ * h.assoc("letters") #=> ["letters", ["a", "b", "c"]]
+ * h.assoc("foo") #=> nil
+ */
+
+mrb_value
+mrb_hash_assoc(mrb_state *mrb, mrb_value hash)
+{
+ mrb_value key, value, has_key;
+
+ mrb_get_args(mrb, "o", &key);
+
+ if (mrb_nil_p(key))
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments");
+
+ has_key = mrb_hash_has_keyWithKey(mrb, hash, key);
+ if (mrb_test(has_key)) {
+ value = mrb_hash_get(mrb, hash, key);
+ return mrb_assoc_new(mrb, key, value);
+ }
+ else {
+ return mrb_nil_value();
+ }
+}
+
+/*
+ * call-seq:
+ * hash.rassoc(key) -> an_array or nil
+ *
+ * Searches through the hash comparing _obj_ with the value using <code>==</code>.
+ * Returns the first key-value pair (two-element array) that matches. See
+ * also <code>Array#rassoc</code>.
+ *
+ * a = {1=> "one", 2 => "two", 3 => "three", "ii" => "two"}
+ * a.rassoc("two") #=> [2, "two"]
+ * a.rassoc("four") #=> nil
+ */
+
+mrb_value
+mrb_hash_rassoc(mrb_state *mrb, mrb_value hash)
+{
+ mrb_value key, value, has_key;
+
+ mrb_get_args(mrb, "o", &key);
+
+ if (mrb_nil_p(key))
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments");
+
+ has_key = mrb_hash_has_keyWithKey(mrb, hash, key);
+ if (mrb_test(has_key)) {
+ value = mrb_hash_get(mrb, hash, key);
+ return mrb_assoc_new(mrb, value, key);
+ }
+ else {
+ return mrb_nil_value();
+ }
+}
+
+/*
+ * call-seq:
+ * hash.flatten -> an_array
+ * hash.flatten(level) -> an_array
+ *
+ * Returns a new array that is a one-dimensional flattening of this
+ * hash. That is, for every key or value that is an array, extract
+ * its elements into the new array. Unlike Array#flatten, this
+ * method does not flatten recursively by default. The optional
+ * <i>level</i> argument determines the level of recursion to flatten.
+ *
+ * a = {1=> "one", 2 => [2,"two"], 3 => "three"}
+ * a.flatten # => [1, "one", 2, [2, "two"], 3, "three"]
+ * a.flatten(2) # => [1, "one", 2, 2, "two", 3, "three"]
+ */
+
+/*
+ * A <code>Hash</code> is a collection of key-value pairs. It is
+ * similar to an <code>Array</code>, except that indexing is done via
+ * arbitrary keys of any object type, not an integer index. Hashes enumerate
+ * their values in the order that the corresponding keys were inserted.
+ *
+ * Hashes have a <em>default value</em> that is returned when accessing
+ * keys that do not exist in the hash. By default, that value is
+ * <code>nil</code>.
+ *
+ */
+
+void
+mrb_init_hash(mrb_state *mrb)
+{
+ struct RClass *h;
+
+ h = mrb->hash_class = mrb_define_class(mrb, "Hash", mrb->object_class);
+ MRB_SET_INSTANCE_TT(h, MRB_TT_HASH);
+
+ //mrb_define_class_method(mrb, h, "new", hash_s_new, ARGS_ANY());
+ mrb_include_module(mrb, h, mrb_class_get(mrb, "Enumerable"));
+ mrb_define_method(mrb, h, "==", mrb_hash_equal, ARGS_REQ(1)); /* 15.2.13.4.1 */
+ mrb_define_method(mrb, h, "[]", mrb_hash_aget, ARGS_REQ(1)); /* 15.2.13.4.2 */
+ mrb_define_method(mrb, h, "[]=", mrb_hash_aset, ARGS_REQ(2)); /* 15.2.13.4.3 */
+ mrb_define_method(mrb, h, "clear", mrb_hash_clear, ARGS_NONE()); /* 15.2.13.4.4 */
+ mrb_define_method(mrb, h, "default", mrb_hash_default, ARGS_ANY()); /* 15.2.13.4.5 */
+ mrb_define_method(mrb, h, "default=", mrb_hash_set_default, ARGS_REQ(1)); /* 15.2.13.4.6 */
+ mrb_define_method(mrb, h, "default_proc", mrb_hash_default_proc,ARGS_NONE()); /* 15.2.13.4.7 */
+ mrb_define_method(mrb, h, "__delete", mrb_hash_delete, ARGS_REQ(1)); /* core of 15.2.13.4.8 */
+//mrb_define_method(mrb, h, "each", mrb_hash_each_pair, ARGS_NONE()); /* 15.2.13.4.9 */ /* move to mrblib\hash.rb */
+//mrb_define_method(mrb, h, "each_key", mrb_hash_each_key, ARGS_NONE()); /* 15.2.13.4.10 */ /* move to mrblib\hash.rb */
+//mrb_define_method(mrb, h, "each_value", mrb_hash_each_value, ARGS_NONE()); /* 15.2.13.4.11 */ /* move to mrblib\hash.rb */
+ mrb_define_method(mrb, h, "empty?", mrb_hash_empty_p, ARGS_NONE()); /* 15.2.13.4.12 */
+ mrb_define_method(mrb, h, "has_key?", mrb_hash_has_key, ARGS_REQ(1)); /* 15.2.13.4.13 */
+ mrb_define_method(mrb, h, "has_value?", mrb_hash_has_value, ARGS_REQ(1)); /* 15.2.13.4.14 */
+ mrb_define_method(mrb, h, "include?", mrb_hash_has_key, ARGS_REQ(1)); /* 15.2.13.4.15 */
+ mrb_define_method(mrb, h, "__init_core", mrb_hash_init_core, ARGS_ANY()); /* core of 15.2.13.4.16 */
+ mrb_define_method(mrb, h, "initialize_copy", mrb_hash_replace, ARGS_REQ(1)); /* 15.2.13.4.17 */
+ mrb_define_method(mrb, h, "key?", mrb_hash_has_key, ARGS_REQ(1)); /* 15.2.13.4.18 */
+ mrb_define_method(mrb, h, "keys", mrb_hash_keys, ARGS_NONE()); /* 15.2.13.4.19 */
+ mrb_define_method(mrb, h, "length", mrb_hash_size_m, ARGS_NONE()); /* 15.2.13.4.20 */
+ mrb_define_method(mrb, h, "member?", mrb_hash_has_key, ARGS_REQ(1)); /* 15.2.13.4.21 */
+//mrb_define_method(mrb, h, "merge", mrb_hash_merge, ARGS_REQ(1)); /* 15.2.13.4.22 */ /* move to mrblib\hash.rb */
+ mrb_define_method(mrb, h, "replace", mrb_hash_replace, ARGS_REQ(1)); /* 15.2.13.4.23 */
+ mrb_define_method(mrb, h, "shift", mrb_hash_shift, ARGS_NONE()); /* 15.2.13.4.24 */
+ mrb_define_method(mrb, h, "size", mrb_hash_size_m, ARGS_NONE()); /* 15.2.13.4.25 */
+ mrb_define_method(mrb, h, "store", mrb_hash_aset, ARGS_REQ(2)); /* 15.2.13.4.26 */
+ mrb_define_method(mrb, h, "value?", mrb_hash_has_value, ARGS_REQ(1)); /* 15.2.13.4.27 */
+ mrb_define_method(mrb, h, "values", mrb_hash_values, ARGS_NONE()); /* 15.2.13.4.28 */
+
+ mrb_define_method(mrb, h, "to_hash", mrb_hash_to_hash, ARGS_NONE()); /* 15.2.13.4.29 (x)*/
+ mrb_define_method(mrb, h, "inspect", mrb_hash_inspect, ARGS_NONE()); /* 15.2.13.4.30 (x)*/
+ mrb_define_alias(mrb, h, "to_s", "inspect"); /* 15.2.13.4.31 (x)*/
+ mrb_define_method(mrb, h, "eql?", mrb_hash_eql, ARGS_REQ(1)); /* 15.2.13.4.32 (x)*/
+}
diff --git a/src/init.c b/src/init.c
new file mode 100644
index 000000000..ce039ce30
--- /dev/null
+++ b/src/init.c
@@ -0,0 +1,105 @@
+#include "mruby.h"
+
+void mrb_init_class(mrb_state*);
+void mrb_init_symtbl(mrb_state*);
+void mrb_init_symbols(mrb_state*);
+void mrb_init_object(mrb_state*);
+void mrb_init_kernel(mrb_state*);
+void mrb_init_enumerable(mrb_state*);
+void mrb_init_comparable(mrb_state*);
+void mrb_init_array(mrb_state*);
+void mrb_init_hash(mrb_state*);
+void mrb_init_numeric(mrb_state*);
+void mrb_init_proc(mrb_state*);
+void mrb_init_range(mrb_state*);
+void mrb_init_string(mrb_state*);
+void mrb_init_regexp(mrb_state*);
+void mrb_init_encoding(mrb_state*);
+void mrb_init_exception(mrb_state*);
+void mrb_init_time(mrb_state *);
+void mrb_init_io(mrb_state *);
+void mrb_init_file(mrb_state *);
+void mrb_init_thread(mrb_state *);
+void mrb_init_struct(mrb_state *);
+void mrb_init_gc(mrb_state *);
+void Init_var_tables(mrb_state *mrb);
+void Init_version(mrb_state *mrb);
+void mrb_init_print(mrb_state *mrb);
+void mrb_init_mrblib(mrb_state *mrb);
+
+#define MANDEL
+#ifdef MANDEL
+#include <stdio.h>
+#include <math.h>
+static mrb_value
+mpow(mrb_state *mrb, mrb_value obj)
+{
+ mrb_float x, y;
+
+ mrb_get_args(mrb, "ff", &x, &y);
+ x = pow(x, y);
+
+ return mrb_float_value(x);
+}
+
+static mrb_value
+msqrt(mrb_state *mrb, mrb_value obj)
+{
+ mrb_float x;
+
+ mrb_get_args(mrb, "f", &x);
+ x = sqrt(x);
+
+ return mrb_float_value(x);
+}
+
+static mrb_value
+mputc(mrb_state *mrb, mrb_value obj)
+{
+ int x;
+
+ mrb_get_args(mrb, "i", &x);
+ putc(x, stdout);
+
+ return mrb_nil_value();
+}
+#endif
+
+void
+mrb_init_core(mrb_state *mrb)
+{
+ mrb_init_symtbl(mrb);
+
+ mrb_init_class(mrb);
+ mrb_init_object(mrb);
+ mrb_init_kernel(mrb);
+ mrb_init_comparable(mrb);
+ mrb_init_enumerable(mrb);
+
+ mrb_init_symbols(mrb);
+ mrb_init_proc(mrb);
+ mrb_init_string(mrb);
+ Init_version(mrb); /* after init_string */
+ mrb_init_array(mrb);
+ mrb_init_hash(mrb);
+ mrb_init_numeric(mrb);
+ mrb_init_range(mrb);
+ mrb_init_struct(mrb);
+ mrb_init_gc(mrb);
+#ifdef INCLUDE_REGEXP
+ mrb_init_regexp(mrb);
+ mrb_init_encoding(mrb);
+#endif
+ mrb_init_exception(mrb);
+ mrb_init_print(mrb);
+
+#ifdef MANDEL
+ mrb_define_method(mrb, mrb->kernel_module, "pow", mpow, ARGS_REQ(2));
+ mrb_define_method(mrb, mrb->kernel_module, "sqrt", msqrt, ARGS_REQ(1));
+ mrb_define_method(mrb, mrb->kernel_module, "putc", mputc, ARGS_REQ(1));
+#endif
+
+ mrb_init_mrblib(mrb);
+
+ mrb_gc_arena_restore(mrb, 0);
+}
diff --git a/src/init_ext.c b/src/init_ext.c
new file mode 100644
index 000000000..cb094f171
--- /dev/null
+++ b/src/init_ext.c
@@ -0,0 +1,10 @@
+#include "mruby.h"
+
+void
+mrb_init_ext(mrb_state *mrb)
+{
+#ifdef INCLUDE_SOCKET
+ extern void mrb_init_socket(mrb_state *mrb);
+ mrb_init_socket(mrb);
+#endif
+}
diff --git a/src/irep.h b/src/irep.h
new file mode 100644
index 000000000..5ec6cc6f1
--- /dev/null
+++ b/src/irep.h
@@ -0,0 +1,23 @@
+#ifndef MRUBY_IREP_H
+#define MRUBY_IREP_H
+
+typedef struct mrb_irep {
+ int idx;
+
+ int flags;
+ int nlocals;
+ int nregs;
+
+ mrb_code *iseq;
+ mrb_value *pool;
+ int *syms;
+
+ int ilen, plen, slen;
+} mrb_irep;
+
+#define MRB_IREP_NOFREE 3
+#define MRB_ISEQ_NOFREE 1
+
+void mrb_add_irep(mrb_state *mrb, int n);
+
+#endif /* MRUBY_IREP_H */
diff --git a/src/kernel.c b/src/kernel.c
new file mode 100644
index 000000000..e5b2cab04
--- /dev/null
+++ b/src/kernel.c
@@ -0,0 +1,1530 @@
+#include "mruby.h"
+#include "mruby/string.h"
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "mruby/proc.h"
+
+#include "mruby/range.h"
+#include "mruby/array.h"
+#include "mruby/hash.h"
+#include "mruby/class.h"
+#include "mruby/struct.h"
+#include "variable.h"
+#include "ritehash.h"
+#include "error.h"
+#include "method.h"
+#include "mdata.h"
+
+#ifdef INCLUDE_REGEXP
+#include "re.h"
+#include "regint.h"
+#endif
+
+KHASH_MAP_INIT_INT(mt, struct RProc*);
+KHASH_MAP_INIT_INT(iv, mrb_value);
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+static mrb_value tst_setconst(mrb_state *mrb, mrb_value obj);
+int kiv_lookup(khash_t(iv) *table, mrb_sym key, mrb_value *value);
+
+struct obj_ivar_tag {
+ mrb_value obj;
+ int (*func)(mrb_sym key, mrb_value val, void * arg);
+ void * arg;
+};
+
+static int
+obj_ivar_i(mrb_sym key, int index, struct obj_ivar_tag *arg)
+{
+ enum st_retval {ST_CONTINUE, ST_STOP, ST_DELETE, ST_CHECK};
+ struct obj_ivar_tag *data = (struct obj_ivar_tag *)arg;
+ if ((long)index < ROBJECT_NUMIV(data->obj)) {
+ mrb_value val = ROBJECT_IVPTR(data->obj)->vals[(long)index];
+ if (val.tt != MRB_TT_FREE) {
+ return (data->func)((mrb_sym)key, val, data->arg);
+ }
+ }
+ return ST_CONTINUE;
+}
+
+void
+mrb_ivar_foreach(mrb_state *mrb, mrb_value obj, int (*func)(ANYARGS), void* arg)
+{
+ struct obj_ivar_tag data;
+ switch (mrb_type(obj)) {
+ case MRB_TT_OBJECT:
+ //obj_ivar_each(mrb, obj, func, arg);
+ if (RCLASS_IV_TBL(obj)) {
+ data.obj = obj;
+ data.func = (int (*)(mrb_sym key, mrb_value val, void * arg))func;
+ data.arg = arg;
+ st_foreach_safe(mrb, RCLASS_IV_TBL(obj), obj_ivar_i, (void *)&data);
+ }
+ break;
+ case MRB_TT_CLASS:
+ case MRB_TT_MODULE:
+ if (RCLASS_IV_TBL(obj)) {
+ st_foreach_safe(mrb, RCLASS_IV_TBL(obj), func, arg);
+ }
+ break;
+ default:
+ if (!ROBJECT_IVPTR(obj)/*generic_iv_tbl*/) break;
+ if (/*FL_TEST(obj, FL_EXIVAR) ||*/ mrb_special_const_p(obj)) {
+ mrb_value *tbl=0;
+ if (kiv_lookup(ROBJECT_IVPTR(obj)/*generic_iv_tbl*/, SYM2ID(obj), tbl)) {
+ st_foreach_safe(mrb, (void *)tbl, func, arg);
+ }
+ }
+ break;
+ }
+}
+
+static int
+inspect_i(mrb_state *mrb, mrb_sym id, mrb_value value, mrb_value str)
+{
+ enum st_retval {ST_CONTINUE, ST_STOP, ST_DELETE, ST_CHECK};
+ mrb_value str2;
+ const char *ivname;
+ /* need not to show internal data */
+ if (RSTRING_PTR(str)[0] == '-') { /* first element */
+ RSTRING_PTR(str)[0] = '#';
+ mrb_str_cat2(mrb, str, " ");
+ }
+ else {
+ mrb_str_cat2(mrb, str, ", ");
+ }
+ ivname = mrb_sym2name(mrb, id);
+ mrb_str_cat2(mrb, str, ivname);
+ mrb_str_cat2(mrb, str, "=");
+ str2 = mrb_inspect(mrb, value);
+ mrb_str_append(mrb, str, str2);
+ //OBJ_INFECT(str, str2);
+
+ return ST_CONTINUE;
+}
+
+static mrb_value
+inspect_obj(mrb_state *mrb, mrb_value obj, mrb_value str, int recur)
+{
+ if (recur) {
+ mrb_str_cat2(mrb, str, " ...");
+ }
+ else {
+ mrb_ivar_foreach(mrb, obj, inspect_i, &str);
+ }
+ mrb_str_cat2(mrb, str, ">");
+ RSTRING_PTR(str)[0] = '#';
+ //OBJ_INFECT(str, obj);
+
+ return str;
+}
+
+int
+mrb_obj_basic_to_s_p(mrb_state *mrb, mrb_value obj)
+{
+ //const mrb_method_entry_t *me = mrb_method_entry(CLASS_OF(obj), mrb_intern("to_s"));
+ //if (me && me->def && me->def->type == VM_METHOD_TYPE_CFUNC &&
+ //me->def->body.cfunc.func == mrb_any_to_s)
+ struct RProc *me = mrb_method_search(mrb, mrb_class(mrb, obj), mrb_intern(mrb, "to_s"));
+ if (me && MRB_PROC_CFUNC_P(me) && (me->body.func == mrb_any_to_s))
+ return 1;
+ return 0;
+}
+
+/* 15.3.1.3.17 */
+/*
+ * call-seq:
+ * obj.inspect -> string
+ *
+ * Returns a string containing a human-readable representation of
+ * <i>obj</i>. If not overridden and no instance variables, uses the
+ * <code>to_s</code> method to generate the string.
+ * <i>obj</i>. If not overridden, uses the <code>to_s</code> method to
+ * generate the string.
+ *
+ * [ 1, 2, 3..4, 'five' ].inspect #=> "[1, 2, 3..4, \"five\"]"
+ * Time.new.inspect #=> "2008-03-08 19:43:39 +0900"
+ */
+mrb_value
+mrb_obj_inspect(mrb_state *mrb, mrb_value obj)
+{
+ if ((mrb_type(obj) == MRB_TT_OBJECT) && mrb_obj_basic_to_s_p(mrb, obj)) {
+ int has_ivar = 0;
+ mrb_value *ptr = (mrb_value *)ROBJECT_IVPTR(obj);
+ long len = ROBJECT_NUMIV(obj);
+ long i;
+
+ for (i = 0; i < len; i++) {
+ if (ptr[i].tt != MRB_TT_FREE) {
+ has_ivar = 1;
+ break;
+ }
+ }
+
+ if (has_ivar) {
+ mrb_value str;
+ const char *c = mrb_obj_classname(mrb, obj);
+
+ str = mrb_sprintf(mrb, "-<%s:%p", c, (void*)&obj);
+ return inspect_obj(mrb, obj, str, 0);
+ }
+ return mrb_any_to_s(mrb, obj);
+ }
+ else if (mrb_nil_p(obj)) {
+ return mrb_str_new_cstr(mrb, "nil");
+ }
+ return mrb_funcall(mrb, obj, "to_s", 0, 0);
+}
+
+/* 15.3.1.3.1 */
+/* 15.3.1.3.10 */
+/* 15.3.1.3.11 */
+/*
+ * call-seq:
+ * obj == other -> true or false
+ * obj.equal?(other) -> true or false
+ * obj.eql?(other) -> true or false
+ *
+ * Equality---At the <code>Object</code> level, <code>==</code> returns
+ * <code>true</code> only if <i>obj</i> and <i>other</i> are the
+ * same object. Typically, this method is overridden in descendant
+ * classes to provide class-specific meaning.
+ *
+ * Unlike <code>==</code>, the <code>equal?</code> method should never be
+ * overridden by subclasses: it is used to determine object identity
+ * (that is, <code>a.equal?(b)</code> iff <code>a</code> is the same
+ * object as <code>b</code>).
+ *
+ * The <code>eql?</code> method returns <code>true</code> if
+ * <i>obj</i> and <i>anObject</i> have the same value. Used by
+ * <code>Hash</code> to test members for equality. For objects of
+ * class <code>Object</code>, <code>eql?</code> is synonymous with
+ * <code>==</code>. Subclasses normally continue this tradition, but
+ * there are exceptions. <code>Numeric</code> types, for example,
+ * perform type conversion across <code>==</code>, but not across
+ * <code>eql?</code>, so:
+ *
+ * 1 == 1.0 #=> true
+ * 1.eql? 1.0 #=> false
+ */
+static mrb_value
+mrb_obj_equal_m(mrb_state *mrb, mrb_value self)
+{
+ mrb_value arg;
+
+ mrb_get_args(mrb, "o", &arg);
+ if (mrb_obj_equal(mrb, self, arg)) {
+ return mrb_true_value();
+ }
+ else {
+ return mrb_false_value();
+ }
+}
+
+static mrb_value
+mrb_obj_not_equal_m(mrb_state *mrb, mrb_value self)
+{
+ mrb_value arg;
+
+ mrb_get_args(mrb, "o", &arg);
+ if (mrb_obj_equal(mrb, self, arg)) {
+ return mrb_false_value();
+ }
+ else {
+ return mrb_true_value();
+ }
+}
+
+/* 15.3.1.3.2 */
+/*
+ * call-seq:
+ * obj === other -> true or false
+ *
+ * Case Equality---For class <code>Object</code>, effectively the same
+ * as calling <code>#==</code>, but typically overridden by descendants
+ * to provide meaningful semantics in <code>case</code> statements.
+ */
+static mrb_value
+mrb_equal_m(mrb_state *mrb, mrb_value self)
+{
+ mrb_value arg;
+
+ mrb_get_args(mrb, "o", &arg);
+ if (mrb_equal(mrb, self, arg)){
+ return mrb_true_value();
+ }
+ else {
+ return mrb_false_value();
+ }
+}
+
+/* 15.3.1.3.3 */
+/* 15.3.1.3.33 */
+/*
+ * call-seq:
+ * obj.hash -> fixnum
+ *
+ * Generates a <code>Fixnum</code> hash value for this object. This
+ * function must have the property that <code>a.eql?(b)</code> implies
+ * <code>a.hash == b.hash</code>. The hash value is used by class
+ * <code>Hash</code>. Any hash value that exceeds the capacity of a
+ * <code>Fixnum</code> will be truncated before being used.
+ */
+static mrb_value
+mrb_obj_id_m(mrb_state *mrb, mrb_value self)
+{
+ return mrb_fixnum_value(mrb_obj_id(self));
+}
+
+mrb_value
+send_internal(int argc, mrb_value *argv, mrb_value recv, enum call_type ctype)
+{
+ return mrb_nil_value(); /* dummy */
+}
+
+mrb_value
+mrb_f_send(int argc, mrb_value *argv, mrb_value recv)
+{
+ return send_internal(argc, argv, recv, CALL_FCALL);
+}
+
+/* 15.3.1.3.4 */
+/* 15.3.1.3.44 */
+/*
+ * call-seq:
+ * obj.send(symbol [, args...]) -> obj
+ * obj.__send__(symbol [, args...]) -> obj
+ *
+ * Invokes the method identified by _symbol_, passing it any
+ * arguments specified. You can use <code>__send__</code> if the name
+ * +send+ clashes with an existing method in _obj_.
+ *
+ * class Klass
+ * def hello(*args)
+ * "Hello " + args.join(' ')
+ * end
+ * end
+ * k = Klass.new
+ * k.send :hello, "gentle", "readers" #=> "Hello gentle readers"
+ */
+static mrb_value
+mrb_f_send_m(mrb_state *mrb, mrb_value self)
+{
+ mrb_value *argv;
+ int argc;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+ return mrb_f_send(argc, argv, self);
+}
+
+/* 15.3.1.2.1 */
+/* 15.3.1.3.5 */
+/*
+ * call-seq:
+ * spawn([env,] command... [,options]) -> pid
+ * Process.spawn([env,] command... [,options]) -> pid
+ *
+ * spawn executes specified command and return its pid.
+ */
+static mrb_value
+mrb_f_spawn_m(mrb_state *mrb, mrb_value self)
+{
+ mrb_value *argv;
+ int argc;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+ return (mrb_f_send(argc, argv, self));
+}
+
+static mrb_value
+mrb_f_block_given_p(void)
+{
+ return mrb_false_value(); /* dummy */
+}
+
+/* 15.3.1.2.2 */
+/* 15.3.1.2.5 */
+/* 15.3.1.3.6 */
+/* 15.3.1.3.25 */
+/*
+ * call-seq:
+ * block_given? -> true or false
+ * iterator? -> true or false
+ *
+ * Returns <code>true</code> if <code>yield</code> would execute a
+ * block in the current context. The <code>iterator?</code> form
+ * is mildly deprecated.
+ *
+ * def try
+ * if block_given?
+ * yield
+ * else
+ * "no block"
+ * end
+ * end
+ * try #=> "no block"
+ * try { "hello" } #=> "hello"
+ * try do "hello" end #=> "hello"
+ */
+static mrb_value
+mrb_f_block_given_p_m(mrb_state *mrb, mrb_value self)
+{
+ return mrb_f_block_given_p();
+}
+
+/* 15.3.1.3.7 */
+/*
+ * call-seq:
+ * obj.class -> class
+ *
+ * Returns the class of <i>obj</i>. This method must always be
+ * called with an explicit receiver, as <code>class</code> is also a
+ * reserved word in Ruby.
+ *
+ * 1.class #=> Fixnum
+ * self.class #=> Object
+ */
+static mrb_value
+mrb_obj_class_m(mrb_state *mrb, mrb_value self)
+{
+ return mrb_obj_value(mrb_obj_class(mrb, self));
+}
+
+struct RClass*
+mrb_singleton_class_clone(mrb_state *mrb, mrb_value obj)
+{
+ struct RClass *klass = RBASIC(obj)->c;
+
+ //if (!FL_TEST(klass, FL_SINGLETON))
+ //return klass;
+ if (klass->tt != MRB_TT_SCLASS)
+ return klass;
+ else {
+ //struct clone_method_data data;
+ /* copy singleton(unnamed) class */
+ //VALUE clone = class_alloc(RBASIC(klass)->flags, 0);
+ struct RClass *clone = mrb_obj_alloc(mrb, klass->tt, mrb->class_class);
+ //clone->super = objklass->super;
+
+ if ((mrb_type(obj) == MRB_TT_CLASS) ||
+ (mrb_type(obj) == MRB_TT_SCLASS)) { /* BUILTIN_TYPE(obj) == T_CLASS */
+ clone->c = clone;
+ }
+ else {
+ clone->c = mrb_singleton_class_clone(mrb, mrb_obj_value(klass));
+ }
+
+ clone->super = klass->super;
+ if (klass->iv) {
+ //clone->iv = st_copy(klass->iv);
+ clone->iv = klass->iv;
+ }
+ clone->mt = kh_init(mt, mrb);
+ clone->tt = MRB_TT_SCLASS;
+ return clone;
+ }
+}
+
+static void
+init_copy(mrb_state *mrb, mrb_value dest, mrb_value obj)
+{
+ //if (OBJ_FROZEN(dest)) {
+ // rb_raise(rb_eTypeError, "[bug] frozen object (%s) allocated", rb_obj_classname(dest));
+ //}
+ //RBASIC(dest)->flags &= ~(T_MASK|FL_EXIVAR);
+ //RBASIC(dest)->flags |= RBASIC(obj)->flags & (T_MASK|FL_EXIVAR|FL_TAINT);
+ //if (FL_TEST(obj, FL_EXIVAR)) {
+ // mrb_copy_generic_ivar(dest, obj);
+ //}
+ //mrb_gc_copy_finalizer(dest, obj);
+ switch (mrb_type(obj)) {
+ case MRB_TT_OBJECT:
+ case MRB_TT_CLASS:
+ case MRB_TT_MODULE:
+ if (ROBJECT(dest)->iv) {
+ //st_free_table(ROBJECT(dest)->iv);
+ ROBJECT(dest)->iv = 0;
+ }
+ if (ROBJECT(obj)->iv) {
+ //ROBJECT(dest)->iv = st_copy((st_table *)ROBJECT(obj)->iv);
+ ROBJECT(dest)->iv = ROBJECT(obj)->iv;
+ }
+ }
+ mrb_funcall(mrb, dest, "initialize_copy", 1, obj);
+}
+
+/* 15.3.1.3.8 */
+/*
+ * call-seq:
+ * obj.clone -> an_object
+ *
+ * Produces a shallow copy of <i>obj</i>---the instance variables of
+ * <i>obj</i> are copied, but not the objects they reference. Copies
+ * the frozen and tainted state of <i>obj</i>. See also the discussion
+ * under <code>Object#dup</code>.
+ *
+ * class Klass
+ * attr_accessor :str
+ * end
+ * s1 = Klass.new #=> #<Klass:0x401b3a38>
+ * s1.str = "Hello" #=> "Hello"
+ * s2 = s1.clone #=> #<Klass:0x401b3998 @str="Hello">
+ * s2.str[1,4] = "i" #=> "i"
+ * s1.inspect #=> "#<Klass:0x401b3a38 @str=\"Hi\">"
+ * s2.inspect #=> "#<Klass:0x401b3998 @str=\"Hi\">"
+ *
+ * This method may have class-specific behavior. If so, that
+ * behavior will be documented under the #+initialize_copy+ method of
+ * the class.
+ *
+ * Some Class(True False Nil Symbol Fixnum Float) Object cannot clone.
+ */
+mrb_value
+mrb_obj_clone(mrb_state *mrb, mrb_value self)
+{
+ struct RObject *clone;
+
+ if (mrb_special_const_p(self)) {
+ mrb_raise(mrb, E_TYPE_ERROR, "can't clone %s", mrb_obj_classname(mrb, self));
+ }
+ clone = mrb_obj_alloc(mrb, self.tt, mrb_obj_class(mrb, self));
+ clone->c = mrb_singleton_class_clone(mrb, self);
+ //RBASIC(clone)->flags = (RBASIC(obj)->flags | FL_TEST(clone, FL_TAINT) | FL_TEST(clone, FL_UNTRUSTED)) & ~(FL_FREEZE|FL_FINALIZE);
+ init_copy(mrb, mrb_obj_value(clone), self);
+ //1-9-2 no bug mrb_funcall(mrb, clone, "initialize_clone", 1, self);
+ //RBASIC(clone)->flags |= RBASIC(obj)->flags & FL_FREEZE;
+
+ return mrb_obj_value(clone);
+}
+
+/* 15.3.1.3.9 */
+/*
+ * call-seq:
+ * obj.dup -> an_object
+ *
+ * Produces a shallow copy of <i>obj</i>---the instance variables of
+ * <i>obj</i> are copied, but not the objects they reference.
+ * <code>dup</code> copies the tainted state of <i>obj</i>. See also
+ * the discussion under <code>Object#clone</code>. In general,
+ * <code>clone</code> and <code>dup</code> may have different semantics
+ * in descendant classes. While <code>clone</code> is used to duplicate
+ * an object, including its internal state, <code>dup</code> typically
+ * uses the class of the descendant object to create the new instance.
+ *
+ * This method may have class-specific behavior. If so, that
+ * behavior will be documented under the #+initialize_copy+ method of
+ * the class.
+ */
+
+mrb_value
+mrb_obj_dup(mrb_state *mrb, mrb_value obj)
+{
+ struct RBasic *p;
+ mrb_value dup;
+
+ if (mrb_special_const_p(obj)) {
+ mrb_raise(mrb, E_TYPE_ERROR, "can't dup %s", mrb_obj_classname(mrb, obj));
+ }
+ p = mrb_obj_alloc(mrb, mrb_type(obj), mrb_obj_class(mrb, obj));
+ //init_copy(dup, obj);
+ dup = mrb_obj_value(p);
+ mrb_funcall(mrb, dup, "initialize_copy", 1, obj);
+
+ return dup;
+}
+
+/* 15.3.1.2.3 */
+/* 15.3.1.3.12 */
+/*
+ * call-seq:
+ * eval(string [, binding [, filename [,lineno]]]) -> obj
+ *
+ * Evaluates the Ruby expression(s) in <em>string</em>. If
+ * <em>binding</em> is given, which must be a <code>Binding</code>
+ * object, the evaluation is performed in its context. If the
+ * optional <em>filename</em> and <em>lineno</em> parameters are
+ * present, they will be used when reporting syntax errors.
+ *
+ * def getBinding(str)
+ * return binding
+ * end
+ * str = "hello"
+ * eval "str + ' Fred'" #=> "hello Fred"
+ * eval "str + ' Fred'", getBinding("bye") #=> "bye Fred"
+ */
+mrb_value
+mrb_f_eval(int argc, mrb_value *argv, mrb_value self)
+{
+ return mrb_false_value(); /* dummy */
+}
+mrb_value
+mrb_f_eval_m(mrb_state *mrb, mrb_value self)
+{
+ mrb_value *argv;
+ int argc;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+ return mrb_f_eval(argc, argv, self);
+}
+
+static mrb_value
+mrb_obj_extend(mrb_state *mrb, int argc, mrb_value *argv, mrb_value obj)
+{
+ int i;
+
+ if (argc == 0) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (at least 1)");
+ }
+ for (i = 0; i < argc; i++) {
+ //Check_Type(argv[i], T_MODULE);
+ mrb_check_type(mrb, argv[i], MRB_TT_MODULE);
+ }
+ while (argc--) {
+ mrb_funcall(mrb, argv[argc], "extend_object", 1, obj);
+ mrb_funcall(mrb, argv[argc], "extended", 1, obj);
+ }
+ return obj;
+}
+
+/* 15.3.1.3.13 */
+/*
+ * call-seq:
+ * obj.extend(module, ...) -> obj
+ *
+ * Adds to _obj_ the instance methods from each module given as a
+ * parameter.
+ *
+ * module Mod
+ * def hello
+ * "Hello from Mod.\n"
+ * end
+ * end
+ *
+ * class Klass
+ * def hello
+ * "Hello from Klass.\n"
+ * end
+ * end
+ *
+ * k = Klass.new
+ * k.hello #=> "Hello from Klass.\n"
+ * k.extend(Mod) #=> #<Klass:0x401b3bc8>
+ * k.hello #=> "Hello from Mod.\n"
+ */
+mrb_value
+mrb_obj_extend_m(mrb_state *mrb, mrb_value self)
+{
+ mrb_value *argv;
+ int argc;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+ return mrb_obj_extend(mrb, argc, argv, self);
+}
+
+/* 15.3.1.2.4 */
+/* 15.3.1.3.14 */
+/*
+ * call-seq:
+ * global_variables -> array
+ *
+ * Returns an array of the names of global variables.
+ *
+ * global_variables.grep /std/ #=> [:$stdin, :$stdout, :$stderr]
+ */
+//mrb_value
+//mrb_f_global_variables(mrb_state *mrb, mrb_value self)
+
+/* 15.3.1.3.15 */
+mrb_value
+mrb_obj_hash(mrb_state *mrb, mrb_value self)
+{
+ return mrb_fixnum_value(mrb_obj_id(self));
+}
+
+/* 15.3.1.3.16 */
+mrb_value
+mrb_obj_init_copy(mrb_state *mrb, mrb_value self)
+{
+ mrb_value orig;
+
+ mrb_get_args(mrb, "o", &orig);
+ if (mrb_obj_equal(mrb, self, orig)) return self;
+ if ((mrb_type(self) != mrb_type(orig)) || (mrb_obj_class(mrb, self) != mrb_obj_class(mrb, orig))) {
+ mrb_raise(mrb, E_TYPE_ERROR, "initialize_copy should take same class object");
+ }
+ return self;
+}
+
+/* 15.3.1.3.18 */
+/*
+ * call-seq:
+ * obj.instance_eval(string [, filename [, lineno]] ) -> obj
+ * obj.instance_eval {| | block } -> obj
+ *
+ * Evaluates a string containing Ruby source code, or the given block,
+ * within the context of the receiver (_obj_). In order to set the
+ * context, the variable +self+ is set to _obj_ while
+ * the code is executing, giving the code access to _obj_'s
+ * instance variables. In the version of <code>instance_eval</code>
+ * that takes a +String+, the optional second and third
+ * parameters supply a filename and starting line number that are used
+ * when reporting compilation errors.
+ *
+ * class KlassWithSecret
+ * def initialize
+ * @secret = 99
+ * end
+ * end
+ * k = KlassWithSecret.new
+ * k.instance_eval { @secret } #=> 99
+ */
+mrb_value
+mrb_obj_instance_eval(mrb_state *mrb, mrb_value self)
+{
+ mrb_value *argv;
+ int argc;
+ mrb_value b, klass;
+
+ mrb_get_args(mrb, "&", &b);
+ return mrb_yield_with_self(mrb, b, 0, 0, self);
+}
+
+/* 15.3.1.3.19 */
+/*
+ * call-seq:
+ * obj.instance_of?(class) -> true or false
+ *
+ * Returns <code>true</code> if <i>obj</i> is an instance of the given
+ * class. See also <code>Object#kind_of?</code>.
+ */
+mrb_value
+rb_obj_is_instance_of(mrb_state *mrb, mrb_value self)
+{
+ mrb_value arg;
+
+ mrb_get_args(mrb, "o", &arg);
+ if (mrb_obj_is_instance_of(mrb, self, mrb_class_ptr(arg))){
+ return mrb_true_value();
+ }
+ else {
+ return mrb_false_value();
+ }
+}
+
+/* 15.3.1.3.20 */
+/*
+ * call-seq:
+ * obj.instance_variable_defined?(symbol) -> true or false
+ *
+ * Returns <code>true</code> if the given instance variable is
+ * defined in <i>obj</i>.
+ *
+ * class Fred
+ * def initialize(p1, p2)
+ * @a, @b = p1, p2
+ * end
+ * end
+ * fred = Fred.new('cat', 99)
+ * fred.instance_variable_defined?(:@a) #=> true
+ * fred.instance_variable_defined?("@b") #=> true
+ * fred.instance_variable_defined?("@c") #=> false
+ */
+mrb_value
+mrb_obj_ivar_defined(mrb_state *mrb, mrb_value self)
+{
+ mrb_value arg;
+ khiter_t k;
+ kh_iv_t *h = RCLASS_IV_TBL(self);
+
+ mrb_get_args(mrb, "o", &arg);
+ mrb_sym mid = mrb_to_id(mrb, arg);
+
+ //if (!mrb_is_instance_id(id)) {
+ // mrb_name_error(id, "`%s' is not allowed as an instance variable name", mrb_sym2name(mrb, id));
+ //}
+ //return mrb_ivar_defined(self, id);
+ k = kh_get(iv, h, mid);
+ if (k != kh_end(h)) {
+ return mrb_true_value();
+ }
+ else {
+ return mrb_false_value();
+ }
+}
+
+/* 15.3.1.3.21 */
+/*
+ * call-seq:
+ * obj.instance_variable_get(symbol) -> obj
+ *
+ * Returns the value of the given instance variable, or nil if the
+ * instance variable is not set. The <code>@</code> part of the
+ * variable name should be included for regular instance
+ * variables. Throws a <code>NameError</code> exception if the
+ * supplied symbol is not valid as an instance variable name.
+ *
+ * class Fred
+ * def initialize(p1, p2)
+ * @a, @b = p1, p2
+ * end
+ * end
+ * fred = Fred.new('cat', 99)
+ * fred.instance_variable_get(:@a) #=> "cat"
+ * fred.instance_variable_get("@b") #=> 99
+ */
+mrb_value
+mrb_obj_ivar_get(mrb_state *mrb, mrb_value self)
+{
+ mrb_value arg;
+
+ mrb_get_args(mrb, "o", &arg);
+ mrb_sym id = mrb_to_id(mrb, arg);
+
+ //if (!mrb_is_instance_id(id)) {
+ // mrb_name_error(mrb, id, "`%s' is not allowed as an instance variable name", mrb_sym2name(mrb, id));
+ //}
+ return mrb_iv_get(mrb, self, id);
+}
+
+/* 15.3.1.3.22 */
+/*
+ * call-seq:
+ * obj.instance_variable_set(symbol, obj) -> obj
+ *
+ * Sets the instance variable names by <i>symbol</i> to
+ * <i>object</i>, thereby frustrating the efforts of the class's
+ * author to attempt to provide proper encapsulation. The variable
+ * did not have to exist prior to this call.
+ *
+ * class Fred
+ * def initialize(p1, p2)
+ * @a, @b = p1, p2
+ * end
+ * end
+ * fred = Fred.new('cat', 99)
+ * fred.instance_variable_set(:@a, 'dog') #=> "dog"
+ * fred.instance_variable_set(:@c, 'cat') #=> "cat"
+ * fred.inspect #=> "#<Fred:0x401b3da8 @a=\"dog\", @b=99, @c=\"cat\">"
+ */
+mrb_value
+mrb_obj_ivar_set(mrb_state *mrb, mrb_value self)
+{
+ mrb_value key;
+ mrb_value val;
+
+ mrb_get_args(mrb, "oo", &key, &val);
+ mrb_sym id = mrb_to_id(mrb, key);
+
+ //if (!mrb_is_instance_id(id)) {
+ // mrb_name_error(mrb, id, "`%s' is not allowed as an instance variable name", mrb_sym2name(mrb, id));
+ //}
+ mrb_iv_set(mrb, self, id, val);
+ return val;
+}
+
+/* 15.3.1.3.23 */
+/*
+ * call-seq:
+ * obj.instance_variables -> array
+ *
+ * Returns an array of instance variable names for the receiver. Note
+ * that simply defining an accessor does not create the corresponding
+ * instance variable.
+ *
+ * class Fred
+ * attr_accessor :a1
+ * def initialize
+ * @iv = 3
+ * end
+ * end
+ * Fred.new.instance_variables #=> [:@iv]
+ */
+mrb_value
+mrb_obj_instance_variables(mrb_state *mrb, mrb_value self)
+{
+ mrb_value ary;
+ kh_iv_t *h = RCLASS_IV_TBL(self);
+ int i;
+ const char* p;
+
+ ary = mrb_ary_new(mrb);
+ //if (mrb_is_instance_id(key)) {
+ // mrb_ary_push(mrb, ary, mrb_sym2name(mrb, key));
+ //}
+ for (i=0;i<kh_end(h);i++) {
+ if (kh_exist(h, i)) {
+ p = mrb_sym2name(mrb, kh_key(h,i));
+ if (*p == '@') {
+ if (mrb_type(kh_value(h, i)) != MRB_TT_UNDEF)
+ mrb_ary_push(mrb, ary, mrb_str_new_cstr(mrb, p));
+ }
+ }
+ }
+ return ary;
+}
+
+/* 15.3.1.3.24 */
+/* 15.3.1.3.26 */
+/*
+ * call-seq:
+ * obj.is_a?(class) -> true or false
+ * obj.kind_of?(class) -> true or false
+ *
+ * Returns <code>true</code> if <i>class</i> is the class of
+ * <i>obj</i>, or if <i>class</i> is one of the superclasses of
+ * <i>obj</i> or modules included in <i>obj</i>.
+ *
+ * module M; end
+ * class A
+ * include M
+ * end
+ * class B < A; end
+ * class C < B; end
+ * b = B.new
+ * b.instance_of? A #=> false
+ * b.instance_of? B #=> true
+ * b.instance_of? C #=> false
+ * b.instance_of? M #=> false
+ * b.kind_of? A #=> true
+ * b.kind_of? B #=> true
+ * b.kind_of? C #=> false
+ * b.kind_of? M #=> true
+ */
+mrb_value
+mrb_obj_is_kind_of_m(mrb_state *mrb, mrb_value self)
+{
+ mrb_value arg;
+
+ mrb_get_args(mrb, "o", &arg);
+ if (mrb_obj_is_kind_of(mrb, self, mrb_class_ptr(arg))) {
+ return mrb_true_value();
+ }
+ else {
+ return mrb_false_value();
+ }
+}
+
+/* 15.3.1.2.6 */
+/* 15.3.1.3.27 */
+/*
+ * call-seq:
+ * lambda { |...| block } -> a_proc
+ *
+ * Equivalent to <code>Proc.new</code>, except the resulting Proc objects
+ * check the number of parameters passed when called.
+ */
+mrb_value
+proc_lambda(mrb_state *mrb, mrb_value self)
+{
+ //return mrb_block_lambda();
+ return mrb_nil_value(); /* dummy */
+}
+
+/* 15.3.1.2.7 */
+/* 15.3.1.3.28 */
+/*
+ * call-seq:
+ * local_variables -> array
+ *
+ * Returns the names of the current local variables.
+ *
+ * fred = 1
+ * for i in 1..10
+ * # ...
+ * end
+ * local_variables #=> [:fred, :i]
+ */
+mrb_value
+mrb_f_local_variables(mrb_state *mrb, mrb_value self)
+{
+ mrb_value ary;
+
+ ary = mrb_ary_new(mrb);
+ return ary; /* dummy */
+}
+
+/* 15.3.1.2.8 */
+/* 15.3.1.3.29 */
+/*
+ * call-seq:
+ * loop { block }
+ * loop -> an_enumerator
+ *
+ * Repeatedly executes the block.
+ *
+ * If no block is given, an enumerator is returned instead.
+ *
+ * loop do
+ * print "Input: "
+ * line = gets
+ * break if !line or line =~ /^qQ/
+ * # ...
+ * end
+ *
+ * StopIteration raised in the block breaks the loop.
+ */
+mrb_value
+mrb_f_loop(mrb_state *mrb, mrb_value self)
+{
+ return mrb_nil_value(); /* dummy */
+}
+
+static void
+method_entry_loop(mrb_state *mrb, struct RClass* klass, mrb_value ary)
+{
+ int i;
+
+ khash_t(mt) *h = klass->mt;
+ for (i=0;i<kh_end(h);i++) {
+ if (kh_exist(h, i)) {
+ mrb_ary_push(mrb, ary, mrb_symbol_value(kh_key(h,i)));
+ }
+ }
+}
+
+static mrb_value
+class_instance_method_list(mrb_state *mrb, int argc, mrb_value *argv, struct RClass* klass, int obj)
+{
+ mrb_value ary;
+ int recur;
+ //st_table *list;
+ struct RClass* oldklass;
+
+ if (argc == 0) {
+ recur = TRUE;
+ }
+ else {
+ mrb_value r;
+ mrb_get_args(mrb, "o", &r);
+ recur = mrb_test(r);
+ }
+
+ //list = st_init_numtable();
+ ary = mrb_ary_new(mrb);
+ //for (; mod; mod = RCLASS_SUPER(mod)) {
+ oldklass = 0;
+ while (klass && (klass != oldklass)) {
+ //st_foreach(RCLASS_M_TBL(mod), method_entry, (st_data_t)list);
+ method_entry_loop(mrb, klass, ary);
+ if ((klass->tt == MRB_TT_ICLASS) ||
+ (klass->tt == MRB_TT_SCLASS)) {
+ }
+ else
+ {
+ if (!recur) break;
+ }
+ oldklass = klass;
+ klass = klass->super;
+ }
+ //st_foreach(list, func, ary);
+ //st_free_table(list);
+
+ return ary;
+}
+
+mrb_value
+mrb_obj_singleton_methods(mrb_state *mrb, int argc, mrb_value *argv, mrb_value obj)
+{
+ mrb_value recur, ary;
+ //st_table *list;
+ struct RClass* klass;
+
+ if (argc == 0) {
+ recur = mrb_true_value();
+ }
+ else {
+ //mrb_scan_args(argc, argv, "01", &recur);
+ recur = argv[0];
+ }
+ klass = mrb_class(mrb, obj);
+ //list = st_init_numtable();
+ ary = mrb_ary_new(mrb);
+ if (klass && (klass->tt == MRB_TT_SCLASS)) {
+ //st_foreach(RCLASS_M_TBL(klass), method_entry, (st_data_t)list);
+ method_entry_loop(mrb, klass, ary);
+ klass = klass->super;
+ }
+ if (RTEST(recur)) {
+ while (klass && ((klass->tt == MRB_TT_SCLASS) || (klass->tt == MRB_TT_ICLASS))) {
+ //st_foreach(RCLASS_M_TBL(klass), method_entry, (st_data_t)list);
+ method_entry_loop(mrb, klass, ary);
+ klass = klass->super;
+ }
+ }
+ //st_foreach(list, ins_methods_i, ary);
+ //st_free_table(list);
+
+ return ary;
+}
+
+mrb_value
+mrb_obj_methods(mrb_state *mrb, int argc, mrb_value *argv, mrb_value obj, mrb_method_flag_t flag)
+{
+retry:
+ if (argc == 0) {
+ mrb_value args[1];
+
+ args[0] = mrb_true_value();
+ return class_instance_method_list(mrb, argc, argv, mrb_class(mrb, obj), 0);
+ }
+ else {
+ mrb_value recur;
+
+ //mrb_scan_args(argc, argv, "1", &recur);
+ recur = argv[0];
+ if (mrb_test(recur)) {
+ argc = 0;
+ goto retry;
+ }
+ return mrb_obj_singleton_methods(mrb, argc, argv, obj);
+ }
+}
+/* 15.3.1.3.31 */
+/*
+ * call-seq:
+ * obj.methods -> array
+ *
+ * Returns a list of the names of methods publicly accessible in
+ * <i>obj</i>. This will include all the methods accessible in
+ * <i>obj</i>'s ancestors.
+ *
+ * class Klass
+ * def kMethod()
+ * end
+ * end
+ * k = Klass.new
+ * k.methods[0..9] #=> [:kMethod, :freeze, :nil?, :is_a?,
+ * # :class, :instance_variable_set,
+ * # :methods, :extend, :__send__, :instance_eval]
+ * k.methods.length #=> 42
+ */
+mrb_value
+mrb_obj_methods_m(mrb_state *mrb, mrb_value self)
+{
+ mrb_value *argv;
+ int argc;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+ return mrb_obj_methods(mrb, argc, argv, self, 0); /* everything but private */
+}
+
+/* 15.3.1.3.32 */
+/*
+ * call_seq:
+ * nil.nil? -> true
+ * <anything_else>.nil? -> false
+ *
+ * Only the object <i>nil</i> responds <code>true</code> to <code>nil?</code>.
+ */
+mrb_value
+mrb_false(mrb_state *mrb, mrb_value self)
+{
+ return mrb_false_value();
+}
+
+/* 15.3.1.2.10 */
+/* 15.3.1.3.35 */
+/*
+ * call-seq:
+ * print(obj, ...) -> nil
+ *
+ * Prints each object in turn to <code>$stdout</code>. If the output
+ * field separator (<code>$,</code>) is not +nil+, its
+ * contents will appear between each field. If the output record
+ * separator (<code>$\\</code>) is not +nil+, it will be
+ * appended to the output. If no arguments are given, prints
+ * <code>$_</code>. Objects that aren't strings will be converted by
+ * calling their <code>to_s</code> method.
+ *
+ * print "cat", [1,2,3], 99, "\n"
+ * $, = ", "
+ * $\ = "\n"
+ * print "cat", [1,2,3], 99
+ *
+ * <em>produces:</em>
+ *
+ * cat12399
+ * cat, 1, 2, 3, 99
+ */
+
+/* 15.3.1.3.36 */
+/*
+ * call-seq:
+ * obj.private_methods(all=true) -> array
+ *
+ * Returns the list of private methods accessible to <i>obj</i>. If
+ * the <i>all</i> parameter is set to <code>false</code>, only those methods
+ * in the receiver will be listed.
+ */
+mrb_value
+mrb_obj_private_methods(mrb_state *mrb, mrb_value self)
+{
+ mrb_value *argv;
+ int argc;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+ return mrb_obj_methods(mrb, argc, argv, self, NOEX_PRIVATE); /* private attribute not define */
+}
+
+/* 15.3.1.3.37 */
+/*
+ * call-seq:
+ * obj.protected_methods(all=true) -> array
+ *
+ * Returns the list of protected methods accessible to <i>obj</i>. If
+ * the <i>all</i> parameter is set to <code>false</code>, only those methods
+ * in the receiver will be listed.
+ */
+mrb_value
+mrb_obj_protected_methods(mrb_state *mrb, mrb_value self)
+{
+ mrb_value *argv;
+ int argc;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+ return mrb_obj_methods(mrb, argc, argv, self, NOEX_PROTECTED); /* protected attribute not define */
+}
+
+/* 15.3.1.3.38 */
+/*
+ * call-seq:
+ * obj.public_methods(all=true) -> array
+ *
+ * Returns the list of public methods accessible to <i>obj</i>. If
+ * the <i>all</i> parameter is set to <code>false</code>, only those methods
+ * in the receiver will be listed.
+ */
+mrb_value
+mrb_obj_public_methods(mrb_state *mrb, mrb_value self)
+{
+ mrb_value *argv;
+ int argc;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+ return mrb_obj_methods(mrb, argc, argv, self, NOEX_PUBLIC); /* public attribute not define */
+}
+
+/* 15.3.1.2.11 */
+/* 15.3.1.3.39 */
+/*
+ * call-seq:
+ * puts(obj, ...) -> nil
+ *
+ * Equivalent to
+ *
+ * $stdout.puts(obj, ...)
+ */
+
+static mrb_value
+get_errinfo(mrb_state *mrb)
+{
+ //return get_thread_errinfo(GET_THREAD());
+ return mrb_str_new_cstr(mrb, "error!!"); /* dummy */
+}
+
+/* 15.3.1.2.12 */
+/* 15.3.1.3.40 */
+/*
+ * call-seq:
+ * raise
+ * raise(string)
+ * raise(exception [, string [, array]])
+ * fail
+ * fail(string)
+ * fail(exception [, string [, array]])
+ *
+ * With no arguments, raises the exception in <code>$!</code> or raises
+ * a <code>RuntimeError</code> if <code>$!</code> is +nil+.
+ * With a single +String+ argument, raises a
+ * +RuntimeError+ with the string as a message. Otherwise,
+ * the first parameter should be the name of an +Exception+
+ * class (or an object that returns an +Exception+ object when sent
+ * an +exception+ message). The optional second parameter sets the
+ * message associated with the exception, and the third parameter is an
+ * array of callback information. Exceptions are caught by the
+ * +rescue+ clause of <code>begin...end</code> blocks.
+ *
+ * raise "Failed to create socket"
+ * raise ArgumentError, "No parameters", caller
+ */
+mrb_value
+mrb_f_raise(mrb_state *mrb, mrb_value self)
+{
+ mrb_value err;
+ mrb_value *argv;
+ int argc;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+ if (argc == 0) {
+ err = get_errinfo(mrb);
+ if (!mrb_nil_p(err)) {
+ argc = 1;
+ argv[0] = err;
+ }
+ }
+ mrb_exc_raise(mrb, mrb_make_exception(mrb, argc, argv));
+ return mrb_nil_value(); /* not reached */
+}
+
+/* 15.3.1.3.41 */
+/*
+ * call-seq:
+ * obj.remove_instance_variable(symbol) -> obj
+ *
+ * Removes the named instance variable from <i>obj</i>, returning that
+ * variable's value.
+ *
+ * class Dummy
+ * attr_reader :var
+ * def initialize
+ * @var = 99
+ * end
+ * def remove
+ * remove_instance_variable(:@var)
+ * end
+ * end
+ * d = Dummy.new
+ * d.var #=> 99
+ * d.remove #=> 99
+ * d.var #=> nil
+ */
+mrb_value
+mrb_obj_remove_instance_variable(mrb_state *mrb, mrb_value self)
+{
+ mrb_sym sym;
+ mrb_value name;
+ khash_t(iv) *h;
+ khiter_t k;
+ mrb_value val;
+ mrb_value Qundef = mrb_undef_value();
+
+ mrb_get_args(mrb, "o", &name);
+ sym = mrb_to_id(mrb, name);
+ //if (OBJ_FROZEN(obj)) mrb_error_frozen("object");
+ //if (!mrb_is_instance_id(id)) {
+ // mrb_name_error(mrb, id, "`%s' is not allowed as an instance variable name", mrb_sym2name(mrb, id));
+ //}
+ switch (mrb_type(self)) {
+ case MRB_TT_OBJECT:
+ case MRB_TT_CLASS:
+ case MRB_TT_MODULE:
+ if (!mrb_obj_ptr(self)->iv) break;
+ h = mrb_obj_ptr(self)->iv;
+ k = kh_get(iv, h, sym);
+ if (k != kh_end(h)) {
+ val = kh_value(h, k);
+ if (!mrb_obj_equal(mrb, val, Qundef)) {
+ kh_value(h, k) = Qundef;
+ return val;
+ }
+ }
+ break;
+ //default:
+ // if (mrb_special_const_p(obj)) {
+ // v = val;
+ // if (generic_ivar_remove(obj, (st_data_t)id, &v)) {
+ // return (VALUE)v;
+ // }
+ // }
+ // break;
+ }
+ mrb_name_error(mrb, sym, "instance variable %s not defined", mrb_sym2name(mrb, sym));
+ return mrb_nil_value(); /* not reached */
+}
+
+/* 15.3.1.2.13 */
+/* 15.3.1.3.42 */
+/*
+ * call-seq:
+ * require(string) -> true or false
+ *
+ * Ruby tries to load the library named _string_, returning
+ * +true+ if successful. If the filename does not resolve to
+ * an absolute path, it will be searched for in the directories listed
+ * in <code>$:</code>. If the file has the extension ``.rb'', it is
+ * loaded as a source file; if the extension is ``.so'', ``.o'', or
+ * ``.dll'', or whatever the default shared library extension is on
+ * the current platform, Ruby loads the shared library as a Ruby
+ * extension. Otherwise, Ruby tries adding ``.rb'', ``.so'', and so on
+ * to the name. The name of the loaded feature is added to the array in
+ * <code>$"</code>. A feature will not be loaded if its name already
+ * appears in <code>$"</code>. The file name is converted to an absolute
+ * path, so ``<code>require 'a'; require './a'</code>'' will not load
+ * <code>a.rb</code> twice.
+ *
+ * require "my-library.rb"
+ * require "db-driver"
+ */
+mrb_value
+mrb_f_require(mrb_state *mrb, mrb_value self)
+{
+ mrb_value fname;
+
+ mrb_get_args(mrb, "o", &fname);
+ return mrb_nil_value(); /* dummy */
+}
+
+
+static inline int
+basic_obj_respond_to(mrb_state *mrb, mrb_value obj, mrb_sym id, int pub)
+{
+ return mrb_respond_to(mrb, obj, id);
+ //return TRUE;
+}
+/* 15.3.1.3.43 */
+/*
+ * call-seq:
+ * obj.respond_to?(symbol, include_private=false) -> true or false
+ *
+ * Returns +true+ if _obj_ responds to the given
+ * method. Private methods are included in the search only if the
+ * optional second parameter evaluates to +true+.
+ *
+ * If the method is not implemented,
+ * as Process.fork on Windows, File.lchmod on GNU/Linux, etc.,
+ * false is returned.
+ *
+ * If the method is not defined, <code>respond_to_missing?</code>
+ * method is called and the result is returned.
+ */
+mrb_value
+obj_respond_to(mrb_state *mrb, mrb_value self)
+{
+ mrb_value *argv;
+ int argc;
+ mrb_value mid, priv;
+ mrb_sym id;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+ //mrb_scan_args(argc, argv, "11", &mid, &priv);
+ mid = argv[0];
+ if (argc > 1) priv = argv[1];
+ else priv = mrb_nil_value();
+ id = mrb_to_id(mrb, mid);
+ if (basic_obj_respond_to(mrb, self, id, !RTEST(priv)))
+ return mrb_true_value();
+ return mrb_false_value();
+}
+
+/* 15.3.1.3.45 */
+/*
+ * call-seq:
+ * obj.singleton_methods(all=true) -> array
+ *
+ * Returns an array of the names of singleton methods for <i>obj</i>.
+ * If the optional <i>all</i> parameter is true, the list will include
+ * methods in modules included in <i>obj</i>.
+ * Only public and protected singleton methods are returned.
+ *
+ * module Other
+ * def three() end
+ * end
+ *
+ * class Single
+ * def Single.four() end
+ * end
+ *
+ * a = Single.new
+ *
+ * def a.one()
+ * end
+ *
+ * class << a
+ * include Other
+ * def two()
+ * end
+ * end
+ *
+ * Single.singleton_methods #=> [:four]
+ * a.singleton_methods(false) #=> [:two, :one]
+ * a.singleton_methods #=> [:two, :one, :three]
+ */
+mrb_value
+mrb_obj_singleton_methods_m(mrb_state *mrb, mrb_value self)
+{
+ mrb_value *argv;
+ int argc;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+ return mrb_obj_singleton_methods(mrb, argc, argv, self);
+}
+
+mrb_value mrb_f_sprintf(mrb_state *mrb, mrb_value obj); /* in sprintf.c */
+
+void
+mrb_init_kernel(mrb_state *mrb)
+{
+ struct RClass *krn;
+
+ krn = mrb->kernel_module = mrb_define_module(mrb, "Kernel");
+ mrb_define_class_method(mrb, krn, "'", mrb_f_spawn_m, ARGS_ANY()); /* 15.3.1.2.1 */
+ mrb_define_class_method(mrb, krn, "block_given?", mrb_f_block_given_p_m, ARGS_NONE()); /* 15.3.1.2.2 */
+ mrb_define_class_method(mrb, krn, "eval", mrb_f_eval_m, ARGS_ANY()); /* 15.3.1.2.3 */
+ mrb_define_class_method(mrb, krn, "global_variables", mrb_f_global_variables, ARGS_NONE()); /* 15.3.1.2.4 */
+ mrb_define_class_method(mrb, krn, "iterator?", mrb_f_block_given_p_m, ARGS_NONE()); /* 15.3.1.2.5 */
+ mrb_define_class_method(mrb, krn, "lambda", proc_lambda, ARGS_NONE()); /* 15.3.1.2.6 */
+ mrb_define_class_method(mrb, krn, "local_variables", mrb_f_local_variables, ARGS_NONE()); /* 15.3.1.2.7 */
+ mrb_define_class_method(mrb, krn, "loop", mrb_f_loop, ARGS_NONE()); /* 15.3.1.2.8 */
+; /* 15.3.1.2.11 */
+ mrb_define_class_method(mrb, krn, "raise", mrb_f_raise, ARGS_ANY()); /* 15.3.1.2.12 */
+ mrb_define_class_method(mrb, krn, "require", mrb_f_require, ARGS_REQ(1)); /* 15.3.1.2.13 */
+
+ mrb_define_method(mrb, krn, "singleton_class", mrb_singleton_class, ARGS_NONE());
+
+ mrb_define_method(mrb, krn, "==", mrb_obj_equal_m, ARGS_REQ(1)); /* 15.3.1.3.1 */
+ mrb_define_method(mrb, krn, "!=", mrb_obj_not_equal_m, ARGS_REQ(1));
+ mrb_define_method(mrb, krn, "===", mrb_equal_m, ARGS_REQ(1)); /* 15.3.1.3.2 */
+ mrb_define_method(mrb, krn, "__id__", mrb_obj_id_m, ARGS_NONE()); /* 15.3.1.3.3 */
+ mrb_define_method(mrb, krn, "__send__", mrb_f_send_m, ARGS_ANY()); /* 15.3.1.3.4 */
+ mrb_define_method(mrb, krn, "'", mrb_f_spawn_m, ARGS_ANY()); /* 15.3.1.3.5 *//* "spawn"->"'" */
+ mrb_define_method(mrb, krn, "block_given?", mrb_f_block_given_p_m, ARGS_NONE()); /* 15.3.1.3.6 */
+ mrb_define_method(mrb, krn, "class", mrb_obj_class_m, ARGS_NONE()); /* 15.3.1.3.7 */
+ mrb_define_method(mrb, krn, "clone", mrb_obj_clone, ARGS_NONE()); /* 15.3.1.3.8 */
+ mrb_define_method(mrb, krn, "dup", mrb_obj_dup, ARGS_NONE()); /* 15.3.1.3.9 */
+ mrb_define_method(mrb, krn, "eql?", mrb_obj_equal_m, ARGS_REQ(1)); /* 15.3.1.3.10 */
+ mrb_define_method(mrb, krn, "equal?", mrb_obj_equal_m, ARGS_REQ(1)); /* 15.3.1.3.11 */
+ mrb_define_method(mrb, krn, "eval", mrb_f_eval_m, ARGS_ANY()); /* 15.3.1.3.12 */
+ mrb_define_method(mrb, krn, "extend", mrb_obj_extend_m, ARGS_ANY()); /* 15.3.1.3.13 */
+ mrb_define_method(mrb, krn, "global_variables", mrb_f_global_variables, ARGS_NONE()); /* 15.3.1.3.14 */
+ mrb_define_method(mrb, krn, "hash", mrb_obj_hash, ARGS_NONE()); /* 15.3.1.3.15 */
+ mrb_define_method(mrb, krn, "initialize_copy", mrb_obj_init_copy, ARGS_REQ(1)); /* 15.3.1.3.16 */
+ mrb_define_method(mrb, krn, "inspect", mrb_obj_inspect, ARGS_NONE()); /* 15.3.1.3.17 */
+ mrb_define_method(mrb, krn, "instance_eval", mrb_obj_instance_eval, ARGS_ANY()); /* 15.3.1.3.18 */
+ mrb_define_method(mrb, krn, "instance_of?", rb_obj_is_instance_of, ARGS_REQ(1)); /* 15.3.1.3.19 */
+ mrb_define_method(mrb, krn, "instance_variable_defined?", mrb_obj_ivar_defined, ARGS_REQ(1)); /* 15.3.1.3.20 */
+ mrb_define_method(mrb, krn, "instance_variable_get", mrb_obj_ivar_get, ARGS_REQ(1)); /* 15.3.1.3.21 */
+ mrb_define_method(mrb, krn, "instance_variable_set", mrb_obj_ivar_set, ARGS_REQ(2)); /* 15.3.1.3.22 */
+ mrb_define_method(mrb, krn, "instance_variables", mrb_obj_instance_variables, ARGS_NONE()); /* 15.3.1.3.23 */
+ mrb_define_method(mrb, krn, "is_a?", mrb_obj_is_kind_of_m, ARGS_REQ(1)); /* 15.3.1.3.24 */
+ mrb_define_method(mrb, krn, "iterator?", mrb_f_block_given_p_m, ARGS_NONE()); /* 15.3.1.3.25 */
+ mrb_define_method(mrb, krn, "kind_of?", mrb_obj_is_kind_of_m, ARGS_REQ(1)); /* 15.3.1.3.26 */
+ mrb_define_method(mrb, krn, "lambda", proc_lambda, ARGS_NONE()); /* 15.3.1.3.27 */
+ mrb_define_method(mrb, krn, "local_variables", mrb_f_local_variables, ARGS_NONE()); /* 15.3.1.3.28 */
+ mrb_define_method(mrb, krn, "loop", mrb_f_loop, ARGS_NONE()); /* 15.3.1.3.29 */
+ mrb_define_method(mrb, krn, "methods", mrb_obj_methods_m, ARGS_ANY()); /* 15.3.1.3.31 */
+ mrb_define_method(mrb, krn, "nil?", mrb_false, ARGS_NONE()); /* 15.3.1.3.32 */
+ mrb_define_method(mrb, krn, "object_id", mrb_obj_id_m, ARGS_NONE()); /* 15.3.1.3.33 */
+ mrb_define_method(mrb, krn, "private_methods", mrb_obj_private_methods, ARGS_ANY()); /* 15.3.1.3.36 */
+ mrb_define_method(mrb, krn, "protected_methods", mrb_obj_protected_methods, ARGS_ANY()); /* 15.3.1.3.37 */
+ mrb_define_method(mrb, krn, "public_methods", mrb_obj_public_methods, ARGS_ANY()); /* 15.3.1.3.38 */
+ mrb_define_method(mrb, krn, "raise", mrb_f_raise, ARGS_ANY()); /* 15.3.1.3.40 */
+ mrb_define_method(mrb, krn, "remove_instance_variable", mrb_obj_remove_instance_variable,ARGS_REQ(1)); /* 15.3.1.3.41 */
+ mrb_define_method(mrb, krn, "require", mrb_f_require, ARGS_REQ(1)); /* 15.3.1.3.42 */
+ mrb_define_method(mrb, krn, "respond_to?", obj_respond_to, ARGS_ANY()); /* 15.3.1.3.43 */
+ mrb_define_method(mrb, krn, "send", mrb_f_send_m, ARGS_ANY()); /* 15.3.1.3.44 */
+ mrb_define_method(mrb, krn, "singleton_methods", mrb_obj_singleton_methods_m, ARGS_ANY()); /* 15.3.1.3.45 */
+ mrb_define_method(mrb, krn, "to_s", mrb_any_to_s, ARGS_NONE()); /* 15.3.1.3.46 */
+
+ mrb_define_method(mrb, krn, "sprintf", mrb_f_sprintf, ARGS_ANY()); /* in sprintf.c */
+ mrb_define_method(mrb, krn, "format", mrb_f_sprintf, ARGS_ANY()); /* in sprintf.c */
+
+ mrb_include_module(mrb, mrb->object_class, mrb->kernel_module);
+}
diff --git a/src/keywords b/src/keywords
new file mode 100644
index 000000000..be5324875
--- /dev/null
+++ b/src/keywords
@@ -0,0 +1,50 @@
+%{
+struct kwtable {const char *name; int id[2]; enum mrb_lex_state_enum state;};
+const struct kwtable *mrb_reserved_word(const char *, unsigned int);
+static const struct kwtable *reserved_word(const char *, unsigned int);
+#define mrb_reserved_word(str, len) reserved_word(str, len)
+%}
+
+struct kwtable;
+%%
+__ENCODING__, {keyword__ENCODING__, keyword__ENCODING__}, EXPR_END
+__LINE__, {keyword__LINE__, keyword__LINE__}, EXPR_END
+__FILE__, {keyword__FILE__, keyword__FILE__}, EXPR_END
+BEGIN, {keyword_BEGIN, keyword_BEGIN}, EXPR_END
+END, {keyword_END, keyword_END}, EXPR_END
+alias, {keyword_alias, keyword_alias}, EXPR_FNAME
+and, {keyword_and, keyword_and}, EXPR_VALUE
+begin, {keyword_begin, keyword_begin}, EXPR_BEG
+break, {keyword_break, keyword_break}, EXPR_MID
+case, {keyword_case, keyword_case}, EXPR_VALUE
+class, {keyword_class, keyword_class}, EXPR_CLASS
+def, {keyword_def, keyword_def}, EXPR_FNAME
+do, {keyword_do, keyword_do}, EXPR_BEG
+else, {keyword_else, keyword_else}, EXPR_BEG
+elsif, {keyword_elsif, keyword_elsif}, EXPR_VALUE
+end, {keyword_end, keyword_end}, EXPR_END
+ensure, {keyword_ensure, keyword_ensure}, EXPR_BEG
+false, {keyword_false, keyword_false}, EXPR_END
+for, {keyword_for, keyword_for}, EXPR_VALUE
+if, {keyword_if, modifier_if}, EXPR_VALUE
+in, {keyword_in, keyword_in}, EXPR_VALUE
+module, {keyword_module, keyword_module}, EXPR_VALUE
+next, {keyword_next, keyword_next}, EXPR_MID
+nil, {keyword_nil, keyword_nil}, EXPR_END
+not, {keyword_not, keyword_not}, EXPR_ARG
+or, {keyword_or, keyword_or}, EXPR_VALUE
+redo, {keyword_redo, keyword_redo}, EXPR_END
+rescue, {keyword_rescue, modifier_rescue}, EXPR_MID
+retry, {keyword_retry, keyword_retry}, EXPR_END
+return, {keyword_return, keyword_return}, EXPR_MID
+self, {keyword_self, keyword_self}, EXPR_END
+super, {keyword_super, keyword_super}, EXPR_ARG
+then, {keyword_then, keyword_then}, EXPR_BEG
+true, {keyword_true, keyword_true}, EXPR_END
+undef, {keyword_undef, keyword_undef}, EXPR_FNAME
+unless, {keyword_unless, modifier_unless}, EXPR_VALUE
+until, {keyword_until, modifier_until}, EXPR_VALUE
+when, {keyword_when, keyword_when}, EXPR_VALUE
+while, {keyword_while, modifier_while}, EXPR_VALUE
+yield, {keyword_yield, keyword_yield}, EXPR_ARG
+%%
diff --git a/src/lex.def b/src/lex.def
new file mode 100644
index 000000000..9e3938b6a
--- /dev/null
+++ b/src/lex.def
@@ -0,0 +1,216 @@
+/* C code produced by gperf version 3.0.3 */
+/* Command-line: gperf -C -p -j1 -i 1 -g -o -t -N mrb_reserved_word -k'1,3,$' keywords */
+
+#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
+ && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
+ && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
+ && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
+ && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
+ && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
+ && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
+ && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
+ && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
+ && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
+ && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
+ && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
+ && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
+ && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
+ && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
+ && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
+ && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
+ && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
+ && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
+ && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
+ && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
+ && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
+ && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
+/* The character set is not based on ISO-646. */
+error "gperf generated tables don't work with this execution character set. Please report a bug to <[email protected]>."
+#endif
+
+#line 1 "keywords"
+
+struct kwtable {const char *name; int id[2]; enum mrb_lex_state_enum state;};
+const struct kwtable *mrb_reserved_word(const char *, unsigned int);
+static const struct kwtable *reserved_word(const char *, unsigned int);
+#define mrb_reserved_word(str, len) reserved_word(str, len)
+#line 8 "keywords"
+struct kwtable;
+
+#define TOTAL_KEYWORDS 40
+#define MIN_WORD_LENGTH 2
+#define MAX_WORD_LENGTH 12
+#define MIN_HASH_VALUE 8
+#define MAX_HASH_VALUE 50
+/* maximum key range = 43, duplicates = 0 */
+
+#ifdef __GNUC__
+__inline
+#else
+#ifdef __cplusplus
+inline
+#endif
+#endif
+static unsigned int
+hash (str, len)
+ register const char *str;
+ register unsigned int len;
+{
+ static const unsigned char asso_values[] =
+ {
+ 51, 51, 51, 51, 51, 51, 51, 51, 51, 51,
+ 51, 51, 51, 51, 51, 51, 51, 51, 51, 51,
+ 51, 51, 51, 51, 51, 51, 51, 51, 51, 51,
+ 51, 51, 51, 51, 51, 51, 51, 51, 51, 51,
+ 51, 51, 51, 51, 51, 51, 51, 51, 51, 51,
+ 51, 51, 51, 51, 51, 51, 51, 51, 51, 51,
+ 51, 51, 51, 51, 51, 51, 14, 51, 16, 8,
+ 11, 13, 51, 51, 51, 51, 10, 51, 13, 51,
+ 51, 51, 51, 51, 51, 51, 51, 51, 51, 51,
+ 51, 51, 51, 51, 51, 11, 51, 13, 1, 26,
+ 4, 1, 8, 28, 51, 23, 51, 1, 1, 27,
+ 5, 19, 21, 51, 8, 3, 3, 11, 51, 21,
+ 24, 16, 51, 51, 51, 51, 51, 51, 51, 51,
+ 51, 51, 51, 51, 51, 51, 51, 51, 51, 51,
+ 51, 51, 51, 51, 51, 51, 51, 51, 51, 51,
+ 51, 51, 51, 51, 51, 51, 51, 51, 51, 51,
+ 51, 51, 51, 51, 51, 51, 51, 51, 51, 51,
+ 51, 51, 51, 51, 51, 51, 51, 51, 51, 51,
+ 51, 51, 51, 51, 51, 51, 51, 51, 51, 51,
+ 51, 51, 51, 51, 51, 51, 51, 51, 51, 51,
+ 51, 51, 51, 51, 51, 51, 51, 51, 51, 51,
+ 51, 51, 51, 51, 51, 51, 51, 51, 51, 51,
+ 51, 51, 51, 51, 51, 51, 51, 51, 51, 51,
+ 51, 51, 51, 51, 51, 51, 51, 51, 51, 51,
+ 51, 51, 51, 51, 51, 51, 51, 51, 51, 51,
+ 51, 51, 51, 51, 51, 51
+ };
+ register int hval = len;
+
+ switch (hval)
+ {
+ default:
+ hval += asso_values[(unsigned char)str[2]];
+ /*FALLTHROUGH*/
+ case 2:
+ case 1:
+ hval += asso_values[(unsigned char)str[0]];
+ break;
+ }
+ return hval + asso_values[(unsigned char)str[len - 1]];
+}
+
+#ifdef __GNUC__
+__inline
+#ifdef __GNUC_STDC_INLINE__
+__attribute__ ((__gnu_inline__))
+#endif
+#endif
+const struct kwtable *
+mrb_reserved_word (str, len)
+ register const char *str;
+ register unsigned int len;
+{
+ static const struct kwtable wordlist[] =
+ {
+ {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+#line 18 "keywords"
+ {"break", {keyword_break, keyword_break}, EXPR_MID},
+#line 23 "keywords"
+ {"else", {keyword_else, keyword_else}, EXPR_BEG},
+#line 33 "keywords"
+ {"nil", {keyword_nil, keyword_nil}, EXPR_END},
+#line 26 "keywords"
+ {"ensure", {keyword_ensure, keyword_ensure}, EXPR_BEG},
+#line 25 "keywords"
+ {"end", {keyword_end, keyword_end}, EXPR_END},
+#line 42 "keywords"
+ {"then", {keyword_then, keyword_then}, EXPR_BEG},
+#line 34 "keywords"
+ {"not", {keyword_not, keyword_not}, EXPR_ARG},
+#line 27 "keywords"
+ {"false", {keyword_false, keyword_false}, EXPR_END},
+#line 40 "keywords"
+ {"self", {keyword_self, keyword_self}, EXPR_END},
+#line 24 "keywords"
+ {"elsif", {keyword_elsif, keyword_elsif}, EXPR_VALUE},
+#line 37 "keywords"
+ {"rescue", {keyword_rescue, modifier_rescue}, EXPR_MID},
+#line 43 "keywords"
+ {"true", {keyword_true, keyword_true}, EXPR_END},
+#line 46 "keywords"
+ {"until", {keyword_until, modifier_until}, EXPR_VALUE},
+#line 45 "keywords"
+ {"unless", {keyword_unless, modifier_unless}, EXPR_VALUE},
+#line 39 "keywords"
+ {"return", {keyword_return, keyword_return}, EXPR_MID},
+#line 21 "keywords"
+ {"def", {keyword_def, keyword_def}, EXPR_FNAME},
+#line 16 "keywords"
+ {"and", {keyword_and, keyword_and}, EXPR_VALUE},
+#line 22 "keywords"
+ {"do", {keyword_do, keyword_do}, EXPR_BEG},
+#line 49 "keywords"
+ {"yield", {keyword_yield, keyword_yield}, EXPR_ARG},
+#line 28 "keywords"
+ {"for", {keyword_for, keyword_for}, EXPR_VALUE},
+#line 44 "keywords"
+ {"undef", {keyword_undef, keyword_undef}, EXPR_FNAME},
+#line 35 "keywords"
+ {"or", {keyword_or, keyword_or}, EXPR_VALUE},
+#line 30 "keywords"
+ {"in", {keyword_in, keyword_in}, EXPR_VALUE},
+#line 47 "keywords"
+ {"when", {keyword_when, keyword_when}, EXPR_VALUE},
+#line 38 "keywords"
+ {"retry", {keyword_retry, keyword_retry}, EXPR_END},
+#line 29 "keywords"
+ {"if", {keyword_if, modifier_if}, EXPR_VALUE},
+#line 19 "keywords"
+ {"case", {keyword_case, keyword_case}, EXPR_VALUE},
+#line 36 "keywords"
+ {"redo", {keyword_redo, keyword_redo}, EXPR_END},
+#line 32 "keywords"
+ {"next", {keyword_next, keyword_next}, EXPR_MID},
+#line 41 "keywords"
+ {"super", {keyword_super, keyword_super}, EXPR_ARG},
+#line 31 "keywords"
+ {"module", {keyword_module, keyword_module}, EXPR_VALUE},
+#line 17 "keywords"
+ {"begin", {keyword_begin, keyword_begin}, EXPR_BEG},
+#line 11 "keywords"
+ {"__LINE__", {keyword__LINE__, keyword__LINE__}, EXPR_END},
+#line 12 "keywords"
+ {"__FILE__", {keyword__FILE__, keyword__FILE__}, EXPR_END},
+#line 10 "keywords"
+ {"__ENCODING__", {keyword__ENCODING__, keyword__ENCODING__}, EXPR_END},
+#line 14 "keywords"
+ {"END", {keyword_END, keyword_END}, EXPR_END},
+#line 15 "keywords"
+ {"alias", {keyword_alias, keyword_alias}, EXPR_FNAME},
+#line 13 "keywords"
+ {"BEGIN", {keyword_BEGIN, keyword_BEGIN}, EXPR_END},
+ {""},
+#line 20 "keywords"
+ {"class", {keyword_class, keyword_class}, EXPR_CLASS},
+ {""}, {""},
+#line 48 "keywords"
+ {"while", {keyword_while, modifier_while}, EXPR_VALUE}
+ };
+
+ if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
+ {
+ register int key = hash (str, len);
+
+ if (key <= MAX_HASH_VALUE && key >= 0)
+ {
+ register const char *s = wordlist[key].name;
+
+ if (*str == *s && !strcmp (str + 1, s + 1))
+ return &wordlist[key];
+ }
+ }
+ return 0;
+}
+#line 50 "keywords"
+
diff --git a/src/load.c b/src/load.c
new file mode 100644
index 000000000..848cf8f9a
--- /dev/null
+++ b/src/load.c
@@ -0,0 +1,642 @@
+#include <string.h>
+#include "dump.h"
+
+#include "mruby/string.h"
+#ifdef INCLUDE_REGEXP
+#include "re.h"
+#endif
+#include "irep.h"
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+typedef struct _RiteFILE
+{
+ FILE* fp;
+ unsigned char buf[256];
+ int cnt;
+ int readlen;
+} RiteFILE;
+
+const char hex2bin[256] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //00-0f
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //10-1f
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //20-2f
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, //30-3f
+ 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, //40-4f
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //50-5f
+ 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0 //60-6f
+ //70-ff
+};
+
+static uint16_t hex_to_bin8(unsigned char*,unsigned char*);
+static uint16_t hex_to_bin16(unsigned char*,unsigned char*);
+static uint16_t hex_to_bin32(unsigned char*,unsigned char*);
+static uint8_t hex_to_uint8(unsigned char*);
+static uint16_t hex_to_uint16(unsigned char*);
+static uint32_t hex_to_uint32(unsigned char*);
+static char* hex_to_str(char*,char*,uint16_t*);
+uint16_t calc_crc_16_ccitt(unsigned char*,int);
+static unsigned char rite_fgetcSub(RiteFILE*);
+static unsigned char rite_fgetc(RiteFILE*,int);
+static unsigned char* rite_fgets(RiteFILE*,unsigned char*,int,int);
+static int load_rite_header(FILE*,rite_binary_header*,unsigned char*);
+static int load_rite_irep_record(mrb_state*, RiteFILE*,unsigned char*,uint32_t*);
+static int read_rite_header(mrb_state*,unsigned char*,rite_binary_header*);
+static int read_rite_irep_record(mrb_state*,unsigned char*,mrb_irep*,uint32_t*);
+
+
+static unsigned char
+rite_fgetcSub(RiteFILE* rfp)
+{
+ //only first call
+ if (rfp->buf[0] == '\0') {
+ rfp->readlen = fread(rfp->buf, 1, sizeof(rfp->buf), rfp->fp);
+ rfp->cnt = 0;
+ }
+
+ if (rfp->readlen == rfp->cnt) {
+ rfp->readlen = fread(rfp->buf, 1, sizeof(rfp->buf), rfp->fp);
+ rfp->cnt = 0;
+ if (rfp->readlen == 0) {
+ return '\0';
+ }
+ }
+ return rfp->buf[(rfp->cnt)++];
+}
+
+static unsigned char
+rite_fgetc(RiteFILE* rfp, int ignorecomment)
+{
+ unsigned char tmp;
+
+ for (;;) {
+ tmp = rite_fgetcSub(rfp);
+ if (tmp == '\n' || tmp == '\r') {
+ continue;
+ }
+ else if (ignorecomment && tmp == '#') {
+ while (tmp != '\n' && tmp != '\r' && tmp != '\0')
+ tmp = rite_fgetcSub(rfp);
+ if (tmp == '\0')
+ return '\0';
+ }
+ else {
+ return tmp;
+ }
+ }
+}
+
+static unsigned char*
+rite_fgets(RiteFILE* rfp, unsigned char* dst, int len, int ignorecomment)
+{
+ int i;
+
+ for (i=0; i<len; i++) {
+ if ('\0' == (dst[i] = rite_fgetc(rfp, ignorecomment))) {
+ return NULL;
+ }
+ }
+ return dst;
+}
+
+static int
+load_rite_header(FILE* fp, rite_binary_header* bin_header, unsigned char* hcrc)
+{
+ rite_file_header file_header;
+
+ fread(&file_header, 1, sizeof(file_header), fp);
+ memcpy(bin_header->rbfi, file_header.rbfi, sizeof(file_header.rbfi));
+ if (memcmp(bin_header->rbfi, RITE_FILE_IDENFIFIER, sizeof(bin_header->rbfi)) != 0) {
+ return MRB_DUMP_INVALID_FILE_HEADER; //File identifier error
+ }
+ memcpy(bin_header->rbfv, file_header.rbfv, sizeof(file_header.rbfv));
+ if (memcmp(bin_header->rbfv, RITE_FILE_FORMAT_VER, sizeof(bin_header->rbfv)) != 0) {
+ return MRB_DUMP_INVALID_FILE_HEADER; //File format version error
+ }
+ memcpy(bin_header->risv, file_header.risv, sizeof(file_header.risv));
+ memcpy(bin_header->rct, file_header.rct, sizeof(file_header.rct));
+ memcpy(bin_header->rcv, file_header.rcv, sizeof(file_header.rcv));
+ hex_to_bin32(bin_header->rbds, file_header.rbds);
+ hex_to_bin16(bin_header->nirep, file_header.nirep);
+ hex_to_bin16(bin_header->sirep, file_header.sirep);
+ memcpy(bin_header->rsv, file_header.rsv, sizeof(file_header.rsv));
+ memcpy(hcrc, file_header.hcrc, sizeof(file_header.hcrc));
+
+ return MRB_DUMP_OK;
+}
+
+static int
+load_rite_irep_record(mrb_state *mrb, RiteFILE* rfp, unsigned char* dst, uint32_t* len)
+{
+ int i;
+ uint32_t blocklen;
+ uint16_t offset, tt, pdl, snl, clen;
+ unsigned char hex2[2], hex4[4], hex8[8], hcrc[4];
+ unsigned char *pStart;
+ char *char_buf;
+ uint16_t buf_size =0;
+
+ buf_size = MRB_DUMP_DEFAULT_STR_LEN;
+ if ((char_buf = mrb_malloc(mrb, buf_size)) == 0)
+ goto error_exit;
+
+ pStart = dst;
+
+ //IREP HEADER BLOCK
+ *dst = rite_fgetc(rfp, TRUE); //record identifier
+ if (*dst != RITE_IREP_IDENFIFIER)
+ return MRB_DUMP_INVALID_IREP;
+ dst += sizeof(unsigned char);
+ *dst = rite_fgetc(rfp, TRUE); //class or module
+ dst += sizeof(unsigned char);
+ rite_fgets(rfp, hex4, sizeof(hex4), TRUE); //number of local variable
+ dst += hex_to_bin16(dst, hex4);
+ rite_fgets(rfp, hex4, sizeof(hex4), TRUE); //number of register variable
+ dst += hex_to_bin16(dst, hex4);
+ rite_fgets(rfp, hex4, sizeof(hex4), TRUE); //offset of isec block
+ offset = hex_to_uint16(hex4);
+ rite_fgets(rfp, hcrc, sizeof(hcrc), TRUE); //header CRC
+ memset( char_buf, '\0', buf_size);
+ rite_fgets(rfp, (unsigned char*)char_buf, (offset - (MRB_DUMP_SIZE_OF_SHORT * RITE_FILE_HEX_SIZE)), TRUE); //class or module name
+ hex_to_str(char_buf, (char*)(dst + MRB_DUMP_SIZE_OF_SHORT + MRB_DUMP_SIZE_OF_SHORT), &clen); //class or module name
+ dst += uint16_to_bin((MRB_DUMP_SIZE_OF_SHORT/*crc*/ + clen), (char*)dst); //offset of isec block
+ dst += hex_to_bin16(dst, hcrc); //header CRC
+ dst += clen;
+
+ //ISEQ BLOCK
+ rite_fgets(rfp, hex8, sizeof(hex8), TRUE); //iseq length
+ dst += hex_to_bin32(dst, hex8);
+ blocklen = hex_to_uint32(hex8);
+ for (i=0; i<blocklen; i++) {
+ rite_fgets(rfp, hex8, sizeof(hex8), TRUE); //iseq
+ dst += hex_to_bin32(dst, hex8);
+ }
+ rite_fgets(rfp, hcrc, sizeof(hcrc), TRUE); //iseq CRC
+ dst += hex_to_bin16(dst, hcrc);
+
+ //POOL BLOCK
+ rite_fgets(rfp, hex8, sizeof(hex8), TRUE); //pool length
+ dst += hex_to_bin32(dst, hex8);
+ blocklen = hex_to_uint32(hex8);
+ for (i=0; i<blocklen; i++) {
+ rite_fgets(rfp, hex2, sizeof(hex2), TRUE); //TT
+ dst += hex_to_bin8(dst, hex2);
+ tt = hex_to_uint8(hex2);
+ rite_fgets(rfp, hex4, sizeof(hex4), TRUE); //pool data length
+ pdl = hex_to_uint16(hex4);
+
+ if ( pdl > buf_size - 1) {
+ buf_size = pdl + 1;
+ if ((char_buf = mrb_realloc(mrb, char_buf, buf_size)) == 0)
+ goto error_exit;
+ }
+ memset(char_buf, '\0', buf_size);
+ rite_fgets(rfp, (unsigned char*)char_buf, pdl, FALSE); //pool
+ hex_to_str(char_buf, (char*)(dst + MRB_DUMP_SIZE_OF_SHORT), &clen);
+ dst += uint16_to_bin(clen, (char*)dst);
+ dst += clen;
+ }
+ rite_fgets(rfp, hcrc, sizeof(hcrc), TRUE); //pool CRC
+ dst += hex_to_bin16(dst, hcrc);
+
+ //SYMS BLOCK
+ rite_fgets(rfp, hex8, sizeof(hex8), TRUE); //syms length
+ dst += hex_to_bin32(dst, hex8);
+ blocklen = hex_to_uint32(hex8);
+ for (i=0; i<blocklen; i++) {
+ rite_fgets(rfp, hex4, sizeof(hex4), TRUE); //symbol name length
+ snl = hex_to_uint16(hex4);
+
+ if (snl == MRB_DUMP_NULL_SYM_LEN) {
+ dst += uint16_to_bin(snl, (char*)dst);
+ continue;
+ }
+
+ if ( snl > buf_size - 1) {
+ buf_size = snl + 1;
+ if ((char_buf = mrb_realloc(mrb, char_buf, buf_size)) == 0)
+ goto error_exit;
+ }
+ memset(char_buf, '\0', buf_size);
+ rite_fgets(rfp, (unsigned char*)char_buf, snl, FALSE); //symbol name
+ hex_to_str(char_buf, (char*)(dst + MRB_DUMP_SIZE_OF_SHORT), &clen);
+ dst += uint16_to_bin(clen, (char*)dst);
+ dst += clen;
+ }
+ rite_fgets(rfp, hcrc, sizeof(hcrc), TRUE); //syms CRC
+ dst += hex_to_bin16(dst, hcrc);
+
+ *len = dst - pStart;
+
+error_exit:
+ if (char_buf)
+ mrb_free(mrb, char_buf);
+
+ return MRB_DUMP_OK;
+}
+
+int
+mrb_load_irep(mrb_state *mrb, FILE* fp)
+{
+ int ret, i;
+ uint32_t len, rlen;
+ unsigned char hex8[8], hcrc[4];
+ unsigned char *dst, *rite_dst = NULL;
+ rite_binary_header bin_header;
+ RiteFILE ritefp, *rfp;
+
+ if ((mrb == NULL) || (fp == NULL)) {
+ return MRB_DUMP_INVALID_ARGUMENT;
+ }
+ memset(&ritefp, 0, sizeof(ritefp));
+ ritefp.fp = fp;
+ rfp = &ritefp;
+
+ //Read File Header Section
+ if ((ret = load_rite_header(fp, &bin_header, hcrc)) != MRB_DUMP_OK)
+ return ret;
+
+ len = sizeof(rite_binary_header) + bin_to_uint32(bin_header.rbds);
+ if ((rite_dst = mrb_malloc(mrb, len)) == NULL)
+ return MRB_DUMP_GENERAL_FAILURE;
+
+ dst = rite_dst;
+ memset(dst, 0x00, len);
+ memcpy(dst, &bin_header, sizeof(rite_binary_header));
+ dst += sizeof(rite_binary_header);
+ dst += hex_to_bin16(dst, hcrc);
+
+ //Read Binary Data Section
+ len = bin_to_uint16(bin_header.nirep);
+ for (i=0; i<len; i++) {
+ rite_fgets(rfp, hex8, sizeof(hex8), TRUE); //record len
+ dst += hex_to_bin32(dst, hex8);
+ if ((ret = load_rite_irep_record(mrb, rfp, dst, &rlen)) != MRB_DUMP_OK) //irep info
+ goto error_exit;
+ dst += rlen;
+ }
+ rite_fgets(rfp, hex8, sizeof(hex8), TRUE); //dummy record len
+ dst += hex_to_bin32(dst, hex8);
+ if (0 != hex_to_uint32(hex8)) {
+ ret = MRB_DUMP_INVALID_IREP;
+ goto error_exit;
+ }
+
+ if (ret == MRB_DUMP_OK)
+ ret = mrb_read_irep(mrb, (char*)rite_dst);
+
+error_exit:
+ if (rite_dst)
+ mrb_free(mrb, rite_dst);
+
+ return ret;
+}
+
+static int
+read_rite_header(mrb_state *mrb, unsigned char *bin, rite_binary_header* bin_header)
+{
+ uint16_t crc;
+
+ memcpy(bin_header, bin, sizeof(rite_binary_header));
+ bin += sizeof(rite_binary_header);
+ if (memcmp(bin_header->rbfi, RITE_FILE_IDENFIFIER, sizeof(bin_header->rbfi)) != 0) {
+ return MRB_DUMP_INVALID_FILE_HEADER; //File identifier error
+ }
+ if (memcmp(bin_header->risv, RITE_VM_VER, sizeof(bin_header->risv)) != 0) {
+ return MRB_DUMP_INVALID_FILE_HEADER; //Instruction set version check
+ }
+
+ crc = calc_crc_16_ccitt((unsigned char *)bin_header, sizeof(*bin_header)); //Calculate CRC
+ if (crc != bin_to_uint16(bin)) {
+ return MRB_DUMP_INVALID_FILE_HEADER; //CRC error
+ }
+
+ return bin_to_uint16(bin_header->nirep);
+}
+
+static int
+read_rite_irep_record(mrb_state *mrb, unsigned char *src, mrb_irep *irep, uint32_t* len)
+{
+ int i, ret = MRB_DUMP_OK;
+ char *buf;
+ unsigned char *recordStart, *pStart;
+ uint16_t crc, tt, pdl, snl, offset, bufsize=MRB_DUMP_DEFAULT_STR_LEN;
+ mrb_int fix_num;
+ mrb_float f;
+ mrb_value str;
+
+ recordStart = src;
+ buf = mrb_malloc(mrb, bufsize);
+ if (buf == NULL) {
+ ret = MRB_DUMP_INVALID_IREP;
+ goto error_exit;
+ }
+
+ //Header Section
+ pStart = src;
+ if (*src != RITE_IREP_IDENFIFIER)
+ return MRB_DUMP_INVALID_IREP;
+ src += (sizeof(unsigned char) * 2);
+ irep->nlocals = bin_to_uint16(src); //number of local variable
+ src += MRB_DUMP_SIZE_OF_SHORT;
+ irep->nregs = bin_to_uint16(src); //number of register variable
+ src += MRB_DUMP_SIZE_OF_SHORT;
+ offset = bin_to_uint16(src); //offset of isec block
+ src += MRB_DUMP_SIZE_OF_SHORT;
+ crc = calc_crc_16_ccitt(pStart, src - pStart); //Calculate CRC
+ if (crc != bin_to_uint16(src)) //header CRC
+ return MRB_DUMP_INVALID_IREP;
+ src += offset;
+
+ //Binary Data Section
+ //ISEQ BLOCK
+ pStart = src;
+ irep->ilen = bin_to_uint32(src); //iseq length
+ src += MRB_DUMP_SIZE_OF_LONG;
+ if (irep->ilen > 0) {
+ if ((irep->iseq = mrb_malloc(mrb, sizeof(mrb_code) * irep->ilen)) == NULL) {
+ ret = MRB_DUMP_GENERAL_FAILURE;
+ goto error_exit;
+ }
+ for (i=0; i<irep->ilen; i++) {
+ irep->iseq[i] = bin_to_uint32(src); //iseq
+ src += MRB_DUMP_SIZE_OF_LONG;
+ }
+ }
+ crc = calc_crc_16_ccitt((unsigned char *)pStart, src - pStart); //Calculate CRC
+ if (crc != bin_to_uint16(src)) { //iseq CRC
+ ret = MRB_DUMP_INVALID_IREP;
+ goto error_exit;
+ }
+ src += MRB_DUMP_SIZE_OF_SHORT;
+
+ //POOL BLOCK
+ pStart = src;
+ irep->plen = bin_to_uint32(src); //pool length
+ src += MRB_DUMP_SIZE_OF_LONG;
+ if (irep->plen > 0) {
+ irep->pool = mrb_malloc(mrb, sizeof(mrb_value) * irep->plen);
+ if (irep->pool == NULL) {
+ ret = MRB_DUMP_INVALID_IREP;
+ goto error_exit;
+ }
+
+ for (i=0; i<irep->plen; i++) {
+ tt = *src; //pool TT
+ src += sizeof(unsigned char);
+ pdl = bin_to_uint16(src); //pool data length
+ src += MRB_DUMP_SIZE_OF_SHORT;
+ if (pdl > bufsize - 1) {
+ mrb_free(mrb, buf);
+ bufsize = pdl + 1;
+ if ((buf = mrb_malloc(mrb, bufsize)) == NULL) {
+ ret = MRB_DUMP_GENERAL_FAILURE;
+ goto error_exit;
+ }
+ }
+ memcpy(buf, src, pdl);
+ src += pdl;
+ buf[pdl] = '\0';
+
+ switch (tt) { //pool data
+ case MRB_TT_FIXNUM:
+ sscanf(buf, "%d", &fix_num);
+ irep->pool[i] = mrb_fixnum_value(fix_num);
+ break;
+
+ case MRB_TT_FLOAT:
+ sscanf(buf, "%le", &f);
+ irep->pool[i] = mrb_float_value(f);
+ break;
+
+ case MRB_TT_STRING:
+ irep->pool[i] = mrb_str_new(mrb, buf, pdl);
+ break;
+
+#ifdef INCLUDE_REGEXP
+ case MRB_TT_REGEX:
+ str = mrb_str_new(mrb, buf, pdl);
+ irep->pool[i] = mrb_reg_quote(mrb, str);
+ break;
+#endif
+
+ default:
+ irep->pool[i] = mrb_nil_value();
+ break;
+ }
+ }
+ }
+ crc = calc_crc_16_ccitt((unsigned char *)pStart, src - pStart); //Calculate CRC
+ if (crc != bin_to_uint16(src)) { //pool CRC
+ ret = MRB_DUMP_INVALID_IREP;
+ goto error_exit;
+ }
+ src += MRB_DUMP_SIZE_OF_SHORT;
+
+ //SYMS BLOCK
+ pStart = src;
+ irep->slen = bin_to_uint32(src); //syms length
+ src += MRB_DUMP_SIZE_OF_LONG;
+ if (irep->slen > 0) {
+ if ((irep->syms = mrb_malloc(mrb, MRB_DUMP_SIZE_OF_INT * irep->slen)) == NULL) {
+ ret = MRB_DUMP_INVALID_IREP;
+ goto error_exit;
+ }
+
+ memset(irep->syms, 0, sizeof(mrb_sym)*(irep->slen));
+ for (i=0; i<irep->slen; i++) {
+ snl = bin_to_uint16(src); //symbol name length
+ src += MRB_DUMP_SIZE_OF_SHORT;
+
+ if (snl == MRB_DUMP_NULL_SYM_LEN) {
+ irep->syms[i] = 0;
+ continue;
+ }
+
+ if (snl > bufsize - 1) {
+ mrb_free(mrb, buf);
+ bufsize = snl + 1;
+ if ((buf = mrb_malloc(mrb, bufsize)) == NULL) {
+ ret = MRB_DUMP_GENERAL_FAILURE;
+ goto error_exit;
+ }
+ }
+ memcpy(buf, src, snl); //symbol name
+ src += snl;
+ buf[snl] = '\0';
+ irep->syms[i] = mrb_intern(mrb, buf);
+ }
+ }
+ crc = calc_crc_16_ccitt((unsigned char *)pStart, src - pStart); //Calculate CRC
+ if (crc != bin_to_uint16(src)) { //syms CRC
+ ret = MRB_DUMP_INVALID_IREP;
+ goto error_exit;
+ }
+ src += MRB_DUMP_SIZE_OF_SHORT;
+
+ *len = src - recordStart;
+error_exit:
+ if (buf)
+ mrb_free(mrb, buf);
+
+ return ret;
+}
+
+int
+mrb_read_irep(mrb_state *mrb, char *bin)
+{
+ int ret = MRB_DUMP_OK, i, n, nirep, sirep;
+ uint32_t len;
+ unsigned char *src;
+ rite_binary_header bin_header;
+
+ if ((mrb == NULL) || (bin == NULL)) {
+ return MRB_DUMP_INVALID_ARGUMENT;
+ }
+ src = (unsigned char*)bin;
+ sirep = mrb->irep_len;
+
+ //Read File Header Section
+ if ((nirep = read_rite_header(mrb, src, &bin_header)) < 0)
+ return nirep;
+
+ mrb_add_irep(mrb, sirep + nirep);
+
+ for (n=0,i=sirep; n<nirep; n++,i++) {
+ if ((mrb->irep[i] = mrb_malloc(mrb, sizeof(mrb_irep))) == NULL) {
+ ret = MRB_DUMP_GENERAL_FAILURE;
+ goto error_exit;
+ }
+ memset(mrb->irep[i], 0, sizeof(mrb_irep));
+ }
+ src += sizeof(bin_header) + MRB_DUMP_SIZE_OF_SHORT; //header + crc
+
+ //Read Binary Data Section
+ for (n=0,i=sirep; n<nirep; n++,i++) {
+ src += MRB_DUMP_SIZE_OF_LONG; //record ren
+ if ((ret = read_rite_irep_record(mrb, src, mrb->irep[i], &len)) != MRB_DUMP_OK)
+ goto error_exit;
+ mrb->irep[i]->idx = i;
+ src += len;
+ }
+ if (0 != bin_to_uint32(src)) { //dummy record len
+ ret = MRB_DUMP_GENERAL_FAILURE;
+ }
+
+ mrb->irep_len += nirep;
+
+error_exit:
+ if (ret != MRB_DUMP_OK) {
+ for (n=0,i=sirep; n<nirep; n++,i++) {
+ if (mrb->irep[i]) {
+ if (mrb->irep[i]->iseq)
+ mrb_free(mrb, mrb->irep[i]->iseq);
+
+ if (mrb->irep[i]->pool)
+ mrb_free(mrb, mrb->irep[i]->pool);
+
+ if (mrb->irep[i]->syms)
+ mrb_free(mrb, mrb->irep[i]->syms);
+
+ mrb_free(mrb, mrb->irep[i]);
+ }
+ }
+ return ret;
+ }
+ return sirep + hex_to_uint8(bin_header.sirep);
+}
+
+static uint16_t
+hex_to_bin8(unsigned char *dst, unsigned char *src)
+{
+ dst[0] = (hex2bin[src[0]] << 4) | (hex2bin[src[1]]);
+ return 1;
+}
+
+static uint16_t
+hex_to_bin16(unsigned char *dst, unsigned char *src)
+{
+ dst[0] = (hex2bin[src[0]] << 4) | (hex2bin[src[1]]);
+ dst[1] = (hex2bin[src[2]] << 4) | (hex2bin[src[3]]);
+ return 2;
+}
+
+static uint16_t
+hex_to_bin32(unsigned char *dst, unsigned char *src)
+{
+ dst[0] = (hex2bin[src[0]] << 4) | (hex2bin[src[1]]);
+ dst[1] = (hex2bin[src[2]] << 4) | (hex2bin[src[3]]);
+ dst[2] = (hex2bin[src[4]] << 4) | (hex2bin[src[5]]);
+ dst[3] = (hex2bin[src[6]] << 4) | (hex2bin[src[7]]);
+ return 4;
+}
+
+static uint8_t
+hex_to_uint8(unsigned char *hex)
+{
+ return (unsigned char)hex2bin[hex[0]] << 4 |
+ (unsigned char)hex2bin[hex[1]];
+}
+
+static uint16_t
+hex_to_uint16(unsigned char *hex)
+{
+ return (uint16_t)hex2bin[hex[0]] << 12 |
+ (uint16_t)hex2bin[hex[1]] << 8 |
+ (uint16_t)hex2bin[hex[2]] << 4 |
+ (uint16_t)hex2bin[hex[3]];
+}
+
+static uint32_t
+hex_to_uint32(unsigned char *hex)
+{
+ return (uint32_t)hex2bin[hex[0]] << 28 |
+ (uint32_t)hex2bin[hex[1]] << 24 |
+ (uint32_t)hex2bin[hex[2]] << 20 |
+ (uint32_t)hex2bin[hex[3]] << 16 |
+ (uint32_t)hex2bin[hex[4]] << 12 |
+ (uint32_t)hex2bin[hex[5]] << 8 |
+ (uint32_t)hex2bin[hex[6]] << 4 |
+ (uint32_t)hex2bin[hex[7]];
+}
+
+static char*
+hex_to_str(char *hex, char *str, uint16_t *str_len)
+{
+ char *src, *dst;
+ uint16_t hex_len = strlen(hex);
+
+ *str_len = 0;
+
+ for (src = hex, dst = str; hex_len > 0; (*str_len)++, hex_len--) {
+ if (*src == '\\' && hex_len > 1) {
+ src++; hex_len--;
+ switch(*src) {
+ case 'a': *dst++ = '\a'/* BEL */; break;
+ case 'b': *dst++ = '\b'/* BS */; break;
+ case 't': *dst++ = '\t'/* HT */; break;
+ case 'n': *dst++ = '\n'/* LF */; break;
+ case 'v': *dst++ = '\v'/* VT */; break;
+ case 'f': *dst++ = '\f'/* FF */; break;
+ case 'r': *dst++ = '\r'/* CR */; break;
+ case '\"': /* fall through */
+ case '\'': /* fall through */
+ case '\?': /* fall through */
+ case '\\': *dst++ = *src; break;
+ default:break;
+ }
+ src++;
+ } else {
+ *dst++ = *src++;
+ }
+ }
+
+ return str;
+}
+
diff --git a/src/mdata.h b/src/mdata.h
new file mode 100644
index 000000000..827f7c114
--- /dev/null
+++ b/src/mdata.h
@@ -0,0 +1,53 @@
+/**********************************************************************
+
+ mdata.h -
+
+
+ Copyright (C) 2007 Yukihiro Matsumoto
+
+**********************************************************************/
+
+#ifndef RUBY_DATA_H
+#define RUBY_DATA_H 1
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+
+struct mrb_data_type {
+ const char *struct_name;
+ void (*dfree)(mrb_state *mrb, void*);
+};
+
+struct RData {
+ MRUBY_OBJECT_HEADER;
+ struct kh_iv *iv;
+ struct mrb_data_type *type;
+ void *data;
+};
+
+struct RData *mrb_data_object_alloc(mrb_state *mrb, struct RClass* klass, void *datap, const struct mrb_data_type *type);
+
+#define Data_Wrap_Struct(mrb,klass,type,ptr)\
+ mrb_data_object_alloc(mrb,klass,ptr,type)
+
+#define Data_Make_Struct(mrb,klass,strct,type,sval) (\
+ sval = mrb_malloc(mrb, sizeof(strct)),\
+ memset(sval, 0, sizeof(strct)),\
+ Data_Wrap_Struct(mrb,klass,type,sval)\
+)
+
+#define RDATA(obj) ((struct RData *)((obj).value.p))
+#define DATA_PTR(d) (RDATA(d)->data)
+#define DATA_TYPE(d) (RDATA(d)->type)
+void *mrb_check_datatype(mrb_state *mrb, mrb_value, const struct mrb_data_type*);
+#define Data_Get_Struct(mrb,obj,type,sval) do {\
+ sval = mrb_check_datatype(mrb, obj, type); \
+} while (0)
+
+#if defined(__cplusplus)
+} /* extern "C" { */
+#endif
+
+#endif /* RUBY_DATA_H */
diff --git a/src/method.h b/src/method.h
new file mode 100644
index 000000000..3591917a2
--- /dev/null
+++ b/src/method.h
@@ -0,0 +1,103 @@
+/**********************************************************************
+
+ method.h -
+
+ $Author: ko1 $
+ created at: Wed Jul 15 20:02:33 2009
+
+ Copyright (C) 2009 Koichi Sasada
+
+**********************************************************************/
+#ifndef METHOD_H
+#define METHOD_H
+
+typedef enum {
+ NOEX_PUBLIC = 0x00,
+ NOEX_NOSUPER = 0x01,
+ NOEX_PRIVATE = 0x02,
+ NOEX_PROTECTED = 0x04,
+ NOEX_MASK = 0x06,
+ NOEX_BASIC = 0x08,
+ NOEX_UNDEF = NOEX_NOSUPER,
+ NOEX_MODFUNC = 0x12,
+ NOEX_SUPER = 0x20,
+ NOEX_VCALL = 0x40,
+ NOEX_RESPONDS = 0x80
+} mrb_method_flag_t;
+
+#define NOEX_SAFE(n) ((int)((n) >> 8) & 0x0F)
+#define NOEX_WITH(n, s) ((s << 8) | (n) | (ruby_running ? 0 : NOEX_BASIC))
+#define NOEX_WITH_SAFE(n) NOEX_WITH(n, mrb_safe_level())
+
+/* method data type */
+
+typedef enum {
+ VM_METHOD_TYPE_ISEQ,
+ VM_METHOD_TYPE_CFUNC,
+ VM_METHOD_TYPE_ATTRSET,
+ VM_METHOD_TYPE_IVAR,
+ VM_METHOD_TYPE_BMETHOD,
+ VM_METHOD_TYPE_ZSUPER,
+ VM_METHOD_TYPE_UNDEF,
+ VM_METHOD_TYPE_NOTIMPLEMENTED,
+ VM_METHOD_TYPE_OPTIMIZED, /* Kernel#send, Proc#call, etc */
+ VM_METHOD_TYPE_MISSING /* wrapper for method_missing(id) */
+} mrb_method_type_t;
+
+typedef struct mrb_method_cfunc_struct {
+ mrb_value (*func)(ANYARGS);
+ int argc;
+} mrb_method_cfunc_t;
+
+typedef struct mrb_method_attr_struct {
+ mrb_sym id;
+ mrb_value location;
+} mrb_method_attr_t;
+
+typedef struct mrb_iseq_struct mrb_iseq_t;
+
+typedef struct mrb_method_definition_struct {
+ mrb_method_type_t type; /* method type */
+ mrb_sym original_id;
+ union {
+ mrb_iseq_t *iseq; /* should be mark */
+ mrb_method_cfunc_t cfunc;
+ mrb_method_attr_t attr;
+ mrb_value proc; /* should be mark */
+ enum method_optimized_type {
+ OPTIMIZED_METHOD_TYPE_SEND,
+ OPTIMIZED_METHOD_TYPE_CALL
+ } optimize_type;
+ } body;
+ int alias_count;
+} mrb_method_definition_t;
+
+typedef struct mrb_method_entry_struct {
+ mrb_method_flag_t flag;
+ char mark;
+ mrb_method_definition_t *def;
+ mrb_sym called_id;
+ mrb_value klass; /* should be mark */
+} mrb_method_entry_t;
+
+struct unlinked_method_entry_list_entry {
+ struct unlinked_method_entry_list_entry *next;
+ mrb_method_entry_t *me;
+};
+
+#define UNDEFINED_METHOD_ENTRY_P(me) (!(me) || !(me)->def || (me)->def->type == VM_METHOD_TYPE_UNDEF)
+
+void mrb_add_method_cfunc(mrb_value klass, mrb_sym mid, mrb_value (*func)(ANYARGS), int argc, mrb_method_flag_t noex);
+mrb_method_entry_t *mrb_add_method(mrb_value klass, mrb_sym mid, mrb_method_type_t type, void *option, mrb_method_flag_t noex);
+mrb_method_entry_t *mrb_method_entry(mrb_state *mrb, mrb_value klass, mrb_sym id);
+
+mrb_method_entry_t *mrb_method_entry_get_without_cache(mrb_value klass, mrb_sym id);
+mrb_method_entry_t *mrb_method_entry_set(mrb_value klass, mrb_sym mid, const mrb_method_entry_t *, mrb_method_flag_t noex);
+
+int mrb_method_entry_arity(const mrb_method_entry_t *me);
+
+void mrb_mark_method_entry(const mrb_method_entry_t *me);
+void mrb_free_method_entry(mrb_method_entry_t *me);
+void mrb_sweep_method_entry(void *vm);
+
+#endif /* METHOD_H */
diff --git a/src/minimain.c b/src/minimain.c
new file mode 100644
index 000000000..b1b70c2af
--- /dev/null
+++ b/src/minimain.c
@@ -0,0 +1,117 @@
+#include "mruby.h"
+#include "mruby/proc.h"
+
+#if 0
+#include "opcode.h"
+
+mrb_code fib_iseq[256];
+
+int fib_syms[4];
+
+mrb_irep fib_irep = {
+ 1,
+ MRB_IREP_NOFREE,
+ 2,
+ 5,
+ fib_iseq,
+ NULL,
+ fib_syms,
+
+ 256, 0, 4,
+};
+
+mrb_code main_iseq[256];
+
+int main_syms[2];
+
+mrb_irep main_irep = {
+ 0,
+ MRB_IREP_NOFREE,
+ 1,
+ 3,
+ main_iseq,
+ NULL,
+ main_syms,
+
+ 256, 0, 2,
+};
+
+int
+main(int argc, char **argv)
+{
+ mrb_state *mrb = mrb_open();
+ int sirep = mrb->irep_len;
+ int n;
+
+ main_syms[0] = mrb_intern(mrb, "fib");
+ main_syms[1] = mrb_intern(mrb, "p");
+ n = 0;
+
+ main_iseq[n++] = MKOP_AB(OP_LAMBDA, 1, 1); /* r1 := lambda(1) */
+ main_iseq[n++] = MKOP_AB(OP_METHOD, 1, 0); /* defmethod(r1) */
+ main_iseq[n++] = MKOP_AB(OP_MOVE, 1, 0); /* r1 := r0 */
+ main_iseq[n++] = MKOP_AB(OP_MOVE, 2, 0); /* r2 := r0 */
+ main_iseq[n++] = MKOP_AsBx(OP_LOADI, 3, 35); /* r3 := 20 */
+ main_iseq[n++] = MKOP_ABC(OP_SEND, 2, 0, 1); /* r2 .fib r3 */
+ main_iseq[n++] = MKOP_ABC(OP_SEND, 1, 1, 1); /* r1 .p r2 */
+ main_iseq[n++] = MKOP_ABC(OP_STOP, 1, 1, 2); /* stop */
+ main_irep.ilen = n;
+ main_irep.idx = sirep;
+
+ fib_syms[0] = mrb_intern(mrb, "<");
+ fib_syms[1] = mrb_intern(mrb, "-");
+ fib_syms[2] = mrb_intern(mrb, "+");
+ fib_syms[3] = mrb_intern(mrb, "fib");
+ n = 0;
+
+ fib_iseq[n++] = MKOP_AB(OP_MOVE, 2, 1); /* r2 := r1 */
+ fib_iseq[n++] = MKOP_AsBx(OP_LOADI, 3, 3); /* r3 := 2 */
+ fib_iseq[n++] = MKOP_ABC(OP_LT, 2, 0, 2); /* r2 .< r3 */
+ fib_iseq[n++] = MKOP_AsBx(OP_JMPNOT, 2, 2); /* ifnot r2 :else */
+ fib_iseq[n++] = MKOP_AsBx(OP_LOADI, 2, 1); /* r6 := 1 */
+ fib_iseq[n++] = MKOP_A(OP_RETURN, 2); /* return r2 */
+ fib_iseq[n++] = MKOP_AB(OP_MOVE, 3, 0); /* r3 := r0 :else */
+ fib_iseq[n++] = MKOP_AB(OP_MOVE, 4, 1); /* r4 := r1 */
+ fib_iseq[n++] = MKOP_ABC(OP_SUBI, 4, 1, 2); /* r4 .- 2 */
+ fib_iseq[n++] = MKOP_ABC(OP_SEND, 3, 3, 1); /* r3 .fib r4 */
+ fib_iseq[n++] = MKOP_AB(OP_MOVE, 4, 0); /* r4 := r0 */
+ fib_iseq[n++] = MKOP_AB(OP_MOVE, 5, 1); /* r5 := r1 */
+ fib_iseq[n++] = MKOP_ABC(OP_SUBI, 5, 1, 1); /* r5 .- 1 */
+ fib_iseq[n++] = MKOP_ABC(OP_SEND, 4, 3, 1); /* r4 .fib :r5 */
+ fib_iseq[n++] = MKOP_ABC(OP_ADD, 3, 2, 1); /* r3 .+ r4 */
+ fib_iseq[n++] = MKOP_A(OP_RETURN, 3); /* return r3 */
+ fib_irep.ilen = n;
+ fib_irep.idx = sirep+1;
+
+ mrb_add_irep(mrb, sirep+2);
+ mrb->irep[sirep ] = &main_irep;
+ mrb->irep[sirep+1] = &fib_irep;
+
+ mrb_run(mrb, mrb_proc_new(mrb, &main_irep), mrb_nil_value());
+}
+
+#else
+#include "compile.h"
+
+int
+main()
+{
+ mrb_state *mrb = mrb_open();
+ int n;
+
+ n = mrb_compile_string(mrb, "\
+def fib(n)\n\
+ if n<2\n\
+ n\n\
+ else\n\
+ fib(n-2)+fib(n-1)\n\
+ end\n\
+end\n\
+p(fib(30), \"\\n\")\n\
+");
+ mrb_run(mrb, mrb_proc_new(mrb, mrb->irep[n]), mrb_nil_value());
+
+ return 0;
+}
+
+#endif
diff --git a/src/name2ctype.h b/src/name2ctype.h
new file mode 100644
index 000000000..2248b8f7c
--- /dev/null
+++ b/src/name2ctype.h
@@ -0,0 +1,17985 @@
+/* C code produced by gperf version 3.0.3 */
+/* Command-line: gperf -7 -c -j1 -i1 -t -C -P -T -H uniname2ctype_hash -Q uniname2ctype_pool -N uniname2ctype_p */
+#ifndef USE_UNICODE_PROPERTIES
+/* Computed positions: -k'1,3' */
+#else /* USE_UNICODE_PROPERTIES */
+/* Computed positions: -k'1-3,6,12,16,$' */
+#endif /* USE_UNICODE_PROPERTIES */
+
+#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
+ && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
+ && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
+ && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
+ && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
+ && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
+ && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
+ && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
+ && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
+ && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
+ && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
+ && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
+ && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
+ && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
+ && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
+ && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
+ && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
+ && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
+ && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
+ && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
+ && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
+ && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
+ && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
+/* The character set is not based on ISO-646. */
+error "gperf generated tables don't work with this execution character set. Please report a bug to <[email protected]>."
+#endif
+
+
+
+#ifdef USE_UNICODE_PROPERTIES
+#ifdef USE_UNICODE_PROPERTIES
+/* 'Any': - */
+static const OnigCodePoint CR_Any[] = {
+ 1,
+ 0x0000, 0x10ffff,
+}; /* CR_Any */
+
+/* 'Assigned': - */
+static const OnigCodePoint CR_Assigned[] = {
+ 485,
+ 0x0000, 0x0377,
+ 0x037a, 0x037e,
+ 0x0384, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x0525,
+ 0x0531, 0x0556,
+ 0x0559, 0x055f,
+ 0x0561, 0x0587,
+ 0x0589, 0x058a,
+ 0x0591, 0x05c7,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f4,
+ 0x0600, 0x0603,
+ 0x0606, 0x061b,
+ 0x061e, 0x061f,
+ 0x0621, 0x065e,
+ 0x0660, 0x070d,
+ 0x070f, 0x074a,
+ 0x074d, 0x07b1,
+ 0x07c0, 0x07fa,
+ 0x0800, 0x082d,
+ 0x0830, 0x083e,
+ 0x0900, 0x0939,
+ 0x093c, 0x094e,
+ 0x0950, 0x0955,
+ 0x0958, 0x0972,
+ 0x0979, 0x097f,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09ce,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09fb,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a51, 0x0a51,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a75,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0af1, 0x0af1,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b44,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b63,
+ 0x0b66, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd0, 0x0bd0,
+ 0x0bd7, 0x0bd7,
+ 0x0be6, 0x0bfa,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3d, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c58, 0x0c59,
+ 0x0c60, 0x0c63,
+ 0x0c66, 0x0c6f,
+ 0x0c78, 0x0c7f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce3,
+ 0x0ce6, 0x0cef,
+ 0x0cf1, 0x0cf2,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3d, 0x0d44,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d63,
+ 0x0d66, 0x0d75,
+ 0x0d79, 0x0d7f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df4,
+ 0x0e01, 0x0e3a,
+ 0x0e3f, 0x0e5b,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f47,
+ 0x0f49, 0x0f6c,
+ 0x0f71, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fbe, 0x0fcc,
+ 0x0fce, 0x0fd8,
+ 0x1000, 0x10c5,
+ 0x10d0, 0x10fc,
+ 0x1100, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12d6,
+ 0x12d8, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x135a,
+ 0x135f, 0x137c,
+ 0x1380, 0x1399,
+ 0x13a0, 0x13f4,
+ 0x1400, 0x169c,
+ 0x16a0, 0x16f0,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1736,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x1800, 0x180e,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18aa,
+ 0x18b0, 0x18f5,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1940, 0x1940,
+ 0x1944, 0x196d,
+ 0x1970, 0x1974,
+ 0x1980, 0x19ab,
+ 0x19b0, 0x19c9,
+ 0x19d0, 0x19da,
+ 0x19de, 0x1a1b,
+ 0x1a1e, 0x1a5e,
+ 0x1a60, 0x1a7c,
+ 0x1a7f, 0x1a89,
+ 0x1a90, 0x1a99,
+ 0x1aa0, 0x1aad,
+ 0x1b00, 0x1b4b,
+ 0x1b50, 0x1b7c,
+ 0x1b80, 0x1baa,
+ 0x1bae, 0x1bb9,
+ 0x1c00, 0x1c37,
+ 0x1c3b, 0x1c49,
+ 0x1c4d, 0x1c7f,
+ 0x1cd0, 0x1cf2,
+ 0x1d00, 0x1de6,
+ 0x1dfd, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fc4,
+ 0x1fc6, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fdd, 0x1fef,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffe,
+ 0x2000, 0x2064,
+ 0x206a, 0x2071,
+ 0x2074, 0x208e,
+ 0x2090, 0x2094,
+ 0x20a0, 0x20b8,
+ 0x20d0, 0x20f0,
+ 0x2100, 0x2189,
+ 0x2190, 0x23e8,
+ 0x2400, 0x2426,
+ 0x2440, 0x244a,
+ 0x2460, 0x26cd,
+ 0x26cf, 0x26e1,
+ 0x26e3, 0x26e3,
+ 0x26e8, 0x26ff,
+ 0x2701, 0x2704,
+ 0x2706, 0x2709,
+ 0x270c, 0x2727,
+ 0x2729, 0x274b,
+ 0x274d, 0x274d,
+ 0x274f, 0x2752,
+ 0x2756, 0x275e,
+ 0x2761, 0x2794,
+ 0x2798, 0x27af,
+ 0x27b1, 0x27be,
+ 0x27c0, 0x27ca,
+ 0x27cc, 0x27cc,
+ 0x27d0, 0x2b4c,
+ 0x2b50, 0x2b59,
+ 0x2c00, 0x2c2e,
+ 0x2c30, 0x2c5e,
+ 0x2c60, 0x2cf1,
+ 0x2cf9, 0x2d25,
+ 0x2d30, 0x2d65,
+ 0x2d6f, 0x2d6f,
+ 0x2d80, 0x2d96,
+ 0x2da0, 0x2da6,
+ 0x2da8, 0x2dae,
+ 0x2db0, 0x2db6,
+ 0x2db8, 0x2dbe,
+ 0x2dc0, 0x2dc6,
+ 0x2dc8, 0x2dce,
+ 0x2dd0, 0x2dd6,
+ 0x2dd8, 0x2dde,
+ 0x2de0, 0x2e31,
+ 0x2e80, 0x2e99,
+ 0x2e9b, 0x2ef3,
+ 0x2f00, 0x2fd5,
+ 0x2ff0, 0x2ffb,
+ 0x3000, 0x303f,
+ 0x3041, 0x3096,
+ 0x3099, 0x30ff,
+ 0x3105, 0x312d,
+ 0x3131, 0x318e,
+ 0x3190, 0x31b7,
+ 0x31c0, 0x31e3,
+ 0x31f0, 0x321e,
+ 0x3220, 0x32fe,
+ 0x3300, 0x4db5,
+ 0x4dc0, 0x9fcb,
+ 0xa000, 0xa48c,
+ 0xa490, 0xa4c6,
+ 0xa4d0, 0xa62b,
+ 0xa640, 0xa65f,
+ 0xa662, 0xa673,
+ 0xa67c, 0xa697,
+ 0xa6a0, 0xa6f7,
+ 0xa700, 0xa78c,
+ 0xa7fb, 0xa82b,
+ 0xa830, 0xa839,
+ 0xa840, 0xa877,
+ 0xa880, 0xa8c4,
+ 0xa8ce, 0xa8d9,
+ 0xa8e0, 0xa8fb,
+ 0xa900, 0xa953,
+ 0xa95f, 0xa97c,
+ 0xa980, 0xa9cd,
+ 0xa9cf, 0xa9d9,
+ 0xa9de, 0xa9df,
+ 0xaa00, 0xaa36,
+ 0xaa40, 0xaa4d,
+ 0xaa50, 0xaa59,
+ 0xaa5c, 0xaa7b,
+ 0xaa80, 0xaac2,
+ 0xaadb, 0xaadf,
+ 0xabc0, 0xabed,
+ 0xabf0, 0xabf9,
+ 0xac00, 0xd7a3,
+ 0xd7b0, 0xd7c6,
+ 0xd7cb, 0xd7fb,
+ 0xd800, 0xfa2d,
+ 0xfa30, 0xfa6d,
+ 0xfa70, 0xfad9,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3f,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfd,
+ 0xfe00, 0xfe19,
+ 0xfe20, 0xfe26,
+ 0xfe30, 0xfe52,
+ 0xfe54, 0xfe66,
+ 0xfe68, 0xfe6b,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xfeff, 0xfeff,
+ 0xff01, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0xffe0, 0xffe6,
+ 0xffe8, 0xffee,
+ 0xfff9, 0xfffd,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10100, 0x10102,
+ 0x10107, 0x10133,
+ 0x10137, 0x1018a,
+ 0x10190, 0x1019b,
+ 0x101d0, 0x101fd,
+ 0x10280, 0x1029c,
+ 0x102a0, 0x102d0,
+ 0x10300, 0x1031e,
+ 0x10320, 0x10323,
+ 0x10330, 0x1034a,
+ 0x10380, 0x1039d,
+ 0x1039f, 0x103c3,
+ 0x103c8, 0x103d5,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x10855,
+ 0x10857, 0x1085f,
+ 0x10900, 0x1091b,
+ 0x1091f, 0x10939,
+ 0x1093f, 0x1093f,
+ 0x10a00, 0x10a03,
+ 0x10a05, 0x10a06,
+ 0x10a0c, 0x10a13,
+ 0x10a15, 0x10a17,
+ 0x10a19, 0x10a33,
+ 0x10a38, 0x10a3a,
+ 0x10a3f, 0x10a47,
+ 0x10a50, 0x10a58,
+ 0x10a60, 0x10a7f,
+ 0x10b00, 0x10b35,
+ 0x10b39, 0x10b55,
+ 0x10b58, 0x10b72,
+ 0x10b78, 0x10b7f,
+ 0x10c00, 0x10c48,
+ 0x10e60, 0x10e7e,
+ 0x11080, 0x110c1,
+ 0x12000, 0x1236e,
+ 0x12400, 0x12462,
+ 0x12470, 0x12473,
+ 0x13000, 0x1342e,
+ 0x1d000, 0x1d0f5,
+ 0x1d100, 0x1d126,
+ 0x1d129, 0x1d1dd,
+ 0x1d200, 0x1d245,
+ 0x1d300, 0x1d356,
+ 0x1d360, 0x1d371,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a5,
+ 0x1d6a8, 0x1d7cb,
+ 0x1d7ce, 0x1d7ff,
+ 0x1f000, 0x1f02b,
+ 0x1f030, 0x1f093,
+ 0x1f100, 0x1f10a,
+ 0x1f110, 0x1f12e,
+ 0x1f131, 0x1f131,
+ 0x1f13d, 0x1f13d,
+ 0x1f13f, 0x1f13f,
+ 0x1f142, 0x1f142,
+ 0x1f146, 0x1f146,
+ 0x1f14a, 0x1f14e,
+ 0x1f157, 0x1f157,
+ 0x1f15f, 0x1f15f,
+ 0x1f179, 0x1f179,
+ 0x1f17b, 0x1f17c,
+ 0x1f17f, 0x1f17f,
+ 0x1f18a, 0x1f18d,
+ 0x1f190, 0x1f190,
+ 0x1f200, 0x1f200,
+ 0x1f210, 0x1f231,
+ 0x1f240, 0x1f248,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
+ 0x2f800, 0x2fa1d,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f,
+ 0xe0100, 0xe01ef,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd,
+}; /* CR_Assigned */
+
+/* 'C': Major Category */
+static const OnigCodePoint CR_C[] = {
+ 20,
+ 0x0000, 0x001f,
+ 0x007f, 0x009f,
+ 0x00ad, 0x00ad,
+ 0x0600, 0x0603,
+ 0x06dd, 0x06dd,
+ 0x070f, 0x070f,
+ 0x17b4, 0x17b5,
+ 0x200b, 0x200f,
+ 0x202a, 0x202e,
+ 0x2060, 0x2064,
+ 0x206a, 0x206f,
+ 0xd800, 0xf8ff,
+ 0xfeff, 0xfeff,
+ 0xfff9, 0xfffb,
+ 0x110bd, 0x110bd,
+ 0x1d173, 0x1d17a,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10ffff,
+}; /* CR_C */
+
+/* 'Cc': General Category */
+static const OnigCodePoint CR_Cc[] = {
+ 2,
+ 0x0000, 0x001f,
+ 0x007f, 0x009f,
+}; /* CR_Cc */
+
+/* 'Cf': General Category */
+static const OnigCodePoint CR_Cf[] = {
+ 15,
+ 0x00ad, 0x00ad,
+ 0x0600, 0x0603,
+ 0x06dd, 0x06dd,
+ 0x070f, 0x070f,
+ 0x17b4, 0x17b5,
+ 0x200b, 0x200f,
+ 0x202a, 0x202e,
+ 0x2060, 0x2064,
+ 0x206a, 0x206f,
+ 0xfeff, 0xfeff,
+ 0xfff9, 0xfffb,
+ 0x110bd, 0x110bd,
+ 0x1d173, 0x1d17a,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f,
+}; /* CR_Cf */
+
+/* 'Cn': General Category */
+static const OnigCodePoint CR_Cn[] = {
+ 485,
+ 0x0378, 0x0379,
+ 0x037f, 0x0383,
+ 0x038b, 0x038b,
+ 0x038d, 0x038d,
+ 0x03a2, 0x03a2,
+ 0x0526, 0x0530,
+ 0x0557, 0x0558,
+ 0x0560, 0x0560,
+ 0x0588, 0x0588,
+ 0x058b, 0x0590,
+ 0x05c8, 0x05cf,
+ 0x05eb, 0x05ef,
+ 0x05f5, 0x05ff,
+ 0x0604, 0x0605,
+ 0x061c, 0x061d,
+ 0x0620, 0x0620,
+ 0x065f, 0x065f,
+ 0x070e, 0x070e,
+ 0x074b, 0x074c,
+ 0x07b2, 0x07bf,
+ 0x07fb, 0x07ff,
+ 0x082e, 0x082f,
+ 0x083f, 0x08ff,
+ 0x093a, 0x093b,
+ 0x094f, 0x094f,
+ 0x0956, 0x0957,
+ 0x0973, 0x0978,
+ 0x0980, 0x0980,
+ 0x0984, 0x0984,
+ 0x098d, 0x098e,
+ 0x0991, 0x0992,
+ 0x09a9, 0x09a9,
+ 0x09b1, 0x09b1,
+ 0x09b3, 0x09b5,
+ 0x09ba, 0x09bb,
+ 0x09c5, 0x09c6,
+ 0x09c9, 0x09ca,
+ 0x09cf, 0x09d6,
+ 0x09d8, 0x09db,
+ 0x09de, 0x09de,
+ 0x09e4, 0x09e5,
+ 0x09fc, 0x0a00,
+ 0x0a04, 0x0a04,
+ 0x0a0b, 0x0a0e,
+ 0x0a11, 0x0a12,
+ 0x0a29, 0x0a29,
+ 0x0a31, 0x0a31,
+ 0x0a34, 0x0a34,
+ 0x0a37, 0x0a37,
+ 0x0a3a, 0x0a3b,
+ 0x0a3d, 0x0a3d,
+ 0x0a43, 0x0a46,
+ 0x0a49, 0x0a4a,
+ 0x0a4e, 0x0a50,
+ 0x0a52, 0x0a58,
+ 0x0a5d, 0x0a5d,
+ 0x0a5f, 0x0a65,
+ 0x0a76, 0x0a80,
+ 0x0a84, 0x0a84,
+ 0x0a8e, 0x0a8e,
+ 0x0a92, 0x0a92,
+ 0x0aa9, 0x0aa9,
+ 0x0ab1, 0x0ab1,
+ 0x0ab4, 0x0ab4,
+ 0x0aba, 0x0abb,
+ 0x0ac6, 0x0ac6,
+ 0x0aca, 0x0aca,
+ 0x0ace, 0x0acf,
+ 0x0ad1, 0x0adf,
+ 0x0ae4, 0x0ae5,
+ 0x0af0, 0x0af0,
+ 0x0af2, 0x0b00,
+ 0x0b04, 0x0b04,
+ 0x0b0d, 0x0b0e,
+ 0x0b11, 0x0b12,
+ 0x0b29, 0x0b29,
+ 0x0b31, 0x0b31,
+ 0x0b34, 0x0b34,
+ 0x0b3a, 0x0b3b,
+ 0x0b45, 0x0b46,
+ 0x0b49, 0x0b4a,
+ 0x0b4e, 0x0b55,
+ 0x0b58, 0x0b5b,
+ 0x0b5e, 0x0b5e,
+ 0x0b64, 0x0b65,
+ 0x0b72, 0x0b81,
+ 0x0b84, 0x0b84,
+ 0x0b8b, 0x0b8d,
+ 0x0b91, 0x0b91,
+ 0x0b96, 0x0b98,
+ 0x0b9b, 0x0b9b,
+ 0x0b9d, 0x0b9d,
+ 0x0ba0, 0x0ba2,
+ 0x0ba5, 0x0ba7,
+ 0x0bab, 0x0bad,
+ 0x0bba, 0x0bbd,
+ 0x0bc3, 0x0bc5,
+ 0x0bc9, 0x0bc9,
+ 0x0bce, 0x0bcf,
+ 0x0bd1, 0x0bd6,
+ 0x0bd8, 0x0be5,
+ 0x0bfb, 0x0c00,
+ 0x0c04, 0x0c04,
+ 0x0c0d, 0x0c0d,
+ 0x0c11, 0x0c11,
+ 0x0c29, 0x0c29,
+ 0x0c34, 0x0c34,
+ 0x0c3a, 0x0c3c,
+ 0x0c45, 0x0c45,
+ 0x0c49, 0x0c49,
+ 0x0c4e, 0x0c54,
+ 0x0c57, 0x0c57,
+ 0x0c5a, 0x0c5f,
+ 0x0c64, 0x0c65,
+ 0x0c70, 0x0c77,
+ 0x0c80, 0x0c81,
+ 0x0c84, 0x0c84,
+ 0x0c8d, 0x0c8d,
+ 0x0c91, 0x0c91,
+ 0x0ca9, 0x0ca9,
+ 0x0cb4, 0x0cb4,
+ 0x0cba, 0x0cbb,
+ 0x0cc5, 0x0cc5,
+ 0x0cc9, 0x0cc9,
+ 0x0cce, 0x0cd4,
+ 0x0cd7, 0x0cdd,
+ 0x0cdf, 0x0cdf,
+ 0x0ce4, 0x0ce5,
+ 0x0cf0, 0x0cf0,
+ 0x0cf3, 0x0d01,
+ 0x0d04, 0x0d04,
+ 0x0d0d, 0x0d0d,
+ 0x0d11, 0x0d11,
+ 0x0d29, 0x0d29,
+ 0x0d3a, 0x0d3c,
+ 0x0d45, 0x0d45,
+ 0x0d49, 0x0d49,
+ 0x0d4e, 0x0d56,
+ 0x0d58, 0x0d5f,
+ 0x0d64, 0x0d65,
+ 0x0d76, 0x0d78,
+ 0x0d80, 0x0d81,
+ 0x0d84, 0x0d84,
+ 0x0d97, 0x0d99,
+ 0x0db2, 0x0db2,
+ 0x0dbc, 0x0dbc,
+ 0x0dbe, 0x0dbf,
+ 0x0dc7, 0x0dc9,
+ 0x0dcb, 0x0dce,
+ 0x0dd5, 0x0dd5,
+ 0x0dd7, 0x0dd7,
+ 0x0de0, 0x0df1,
+ 0x0df5, 0x0e00,
+ 0x0e3b, 0x0e3e,
+ 0x0e5c, 0x0e80,
+ 0x0e83, 0x0e83,
+ 0x0e85, 0x0e86,
+ 0x0e89, 0x0e89,
+ 0x0e8b, 0x0e8c,
+ 0x0e8e, 0x0e93,
+ 0x0e98, 0x0e98,
+ 0x0ea0, 0x0ea0,
+ 0x0ea4, 0x0ea4,
+ 0x0ea6, 0x0ea6,
+ 0x0ea8, 0x0ea9,
+ 0x0eac, 0x0eac,
+ 0x0eba, 0x0eba,
+ 0x0ebe, 0x0ebf,
+ 0x0ec5, 0x0ec5,
+ 0x0ec7, 0x0ec7,
+ 0x0ece, 0x0ecf,
+ 0x0eda, 0x0edb,
+ 0x0ede, 0x0eff,
+ 0x0f48, 0x0f48,
+ 0x0f6d, 0x0f70,
+ 0x0f8c, 0x0f8f,
+ 0x0f98, 0x0f98,
+ 0x0fbd, 0x0fbd,
+ 0x0fcd, 0x0fcd,
+ 0x0fd9, 0x0fff,
+ 0x10c6, 0x10cf,
+ 0x10fd, 0x10ff,
+ 0x1249, 0x1249,
+ 0x124e, 0x124f,
+ 0x1257, 0x1257,
+ 0x1259, 0x1259,
+ 0x125e, 0x125f,
+ 0x1289, 0x1289,
+ 0x128e, 0x128f,
+ 0x12b1, 0x12b1,
+ 0x12b6, 0x12b7,
+ 0x12bf, 0x12bf,
+ 0x12c1, 0x12c1,
+ 0x12c6, 0x12c7,
+ 0x12d7, 0x12d7,
+ 0x1311, 0x1311,
+ 0x1316, 0x1317,
+ 0x135b, 0x135e,
+ 0x137d, 0x137f,
+ 0x139a, 0x139f,
+ 0x13f5, 0x13ff,
+ 0x169d, 0x169f,
+ 0x16f1, 0x16ff,
+ 0x170d, 0x170d,
+ 0x1715, 0x171f,
+ 0x1737, 0x173f,
+ 0x1754, 0x175f,
+ 0x176d, 0x176d,
+ 0x1771, 0x1771,
+ 0x1774, 0x177f,
+ 0x17de, 0x17df,
+ 0x17ea, 0x17ef,
+ 0x17fa, 0x17ff,
+ 0x180f, 0x180f,
+ 0x181a, 0x181f,
+ 0x1878, 0x187f,
+ 0x18ab, 0x18af,
+ 0x18f6, 0x18ff,
+ 0x191d, 0x191f,
+ 0x192c, 0x192f,
+ 0x193c, 0x193f,
+ 0x1941, 0x1943,
+ 0x196e, 0x196f,
+ 0x1975, 0x197f,
+ 0x19ac, 0x19af,
+ 0x19ca, 0x19cf,
+ 0x19db, 0x19dd,
+ 0x1a1c, 0x1a1d,
+ 0x1a5f, 0x1a5f,
+ 0x1a7d, 0x1a7e,
+ 0x1a8a, 0x1a8f,
+ 0x1a9a, 0x1a9f,
+ 0x1aae, 0x1aff,
+ 0x1b4c, 0x1b4f,
+ 0x1b7d, 0x1b7f,
+ 0x1bab, 0x1bad,
+ 0x1bba, 0x1bff,
+ 0x1c38, 0x1c3a,
+ 0x1c4a, 0x1c4c,
+ 0x1c80, 0x1ccf,
+ 0x1cf3, 0x1cff,
+ 0x1de7, 0x1dfc,
+ 0x1f16, 0x1f17,
+ 0x1f1e, 0x1f1f,
+ 0x1f46, 0x1f47,
+ 0x1f4e, 0x1f4f,
+ 0x1f58, 0x1f58,
+ 0x1f5a, 0x1f5a,
+ 0x1f5c, 0x1f5c,
+ 0x1f5e, 0x1f5e,
+ 0x1f7e, 0x1f7f,
+ 0x1fb5, 0x1fb5,
+ 0x1fc5, 0x1fc5,
+ 0x1fd4, 0x1fd5,
+ 0x1fdc, 0x1fdc,
+ 0x1ff0, 0x1ff1,
+ 0x1ff5, 0x1ff5,
+ 0x1fff, 0x1fff,
+ 0x2065, 0x2069,
+ 0x2072, 0x2073,
+ 0x208f, 0x208f,
+ 0x2095, 0x209f,
+ 0x20b9, 0x20cf,
+ 0x20f1, 0x20ff,
+ 0x218a, 0x218f,
+ 0x23e9, 0x23ff,
+ 0x2427, 0x243f,
+ 0x244b, 0x245f,
+ 0x26ce, 0x26ce,
+ 0x26e2, 0x26e2,
+ 0x26e4, 0x26e7,
+ 0x2700, 0x2700,
+ 0x2705, 0x2705,
+ 0x270a, 0x270b,
+ 0x2728, 0x2728,
+ 0x274c, 0x274c,
+ 0x274e, 0x274e,
+ 0x2753, 0x2755,
+ 0x275f, 0x2760,
+ 0x2795, 0x2797,
+ 0x27b0, 0x27b0,
+ 0x27bf, 0x27bf,
+ 0x27cb, 0x27cb,
+ 0x27cd, 0x27cf,
+ 0x2b4d, 0x2b4f,
+ 0x2b5a, 0x2bff,
+ 0x2c2f, 0x2c2f,
+ 0x2c5f, 0x2c5f,
+ 0x2cf2, 0x2cf8,
+ 0x2d26, 0x2d2f,
+ 0x2d66, 0x2d6e,
+ 0x2d70, 0x2d7f,
+ 0x2d97, 0x2d9f,
+ 0x2da7, 0x2da7,
+ 0x2daf, 0x2daf,
+ 0x2db7, 0x2db7,
+ 0x2dbf, 0x2dbf,
+ 0x2dc7, 0x2dc7,
+ 0x2dcf, 0x2dcf,
+ 0x2dd7, 0x2dd7,
+ 0x2ddf, 0x2ddf,
+ 0x2e32, 0x2e7f,
+ 0x2e9a, 0x2e9a,
+ 0x2ef4, 0x2eff,
+ 0x2fd6, 0x2fef,
+ 0x2ffc, 0x2fff,
+ 0x3040, 0x3040,
+ 0x3097, 0x3098,
+ 0x3100, 0x3104,
+ 0x312e, 0x3130,
+ 0x318f, 0x318f,
+ 0x31b8, 0x31bf,
+ 0x31e4, 0x31ef,
+ 0x321f, 0x321f,
+ 0x32ff, 0x32ff,
+ 0x4db6, 0x4dbf,
+ 0x9fcc, 0x9fff,
+ 0xa48d, 0xa48f,
+ 0xa4c7, 0xa4cf,
+ 0xa62c, 0xa63f,
+ 0xa660, 0xa661,
+ 0xa674, 0xa67b,
+ 0xa698, 0xa69f,
+ 0xa6f8, 0xa6ff,
+ 0xa78d, 0xa7fa,
+ 0xa82c, 0xa82f,
+ 0xa83a, 0xa83f,
+ 0xa878, 0xa87f,
+ 0xa8c5, 0xa8cd,
+ 0xa8da, 0xa8df,
+ 0xa8fc, 0xa8ff,
+ 0xa954, 0xa95e,
+ 0xa97d, 0xa97f,
+ 0xa9ce, 0xa9ce,
+ 0xa9da, 0xa9dd,
+ 0xa9e0, 0xa9ff,
+ 0xaa37, 0xaa3f,
+ 0xaa4e, 0xaa4f,
+ 0xaa5a, 0xaa5b,
+ 0xaa7c, 0xaa7f,
+ 0xaac3, 0xaada,
+ 0xaae0, 0xabbf,
+ 0xabee, 0xabef,
+ 0xabfa, 0xabff,
+ 0xd7a4, 0xd7af,
+ 0xd7c7, 0xd7ca,
+ 0xd7fc, 0xd7ff,
+ 0xfa2e, 0xfa2f,
+ 0xfa6e, 0xfa6f,
+ 0xfada, 0xfaff,
+ 0xfb07, 0xfb12,
+ 0xfb18, 0xfb1c,
+ 0xfb37, 0xfb37,
+ 0xfb3d, 0xfb3d,
+ 0xfb3f, 0xfb3f,
+ 0xfb42, 0xfb42,
+ 0xfb45, 0xfb45,
+ 0xfbb2, 0xfbd2,
+ 0xfd40, 0xfd4f,
+ 0xfd90, 0xfd91,
+ 0xfdc8, 0xfdef,
+ 0xfdfe, 0xfdff,
+ 0xfe1a, 0xfe1f,
+ 0xfe27, 0xfe2f,
+ 0xfe53, 0xfe53,
+ 0xfe67, 0xfe67,
+ 0xfe6c, 0xfe6f,
+ 0xfe75, 0xfe75,
+ 0xfefd, 0xfefe,
+ 0xff00, 0xff00,
+ 0xffbf, 0xffc1,
+ 0xffc8, 0xffc9,
+ 0xffd0, 0xffd1,
+ 0xffd8, 0xffd9,
+ 0xffdd, 0xffdf,
+ 0xffe7, 0xffe7,
+ 0xffef, 0xfff8,
+ 0xfffe, 0xffff,
+ 0x1000c, 0x1000c,
+ 0x10027, 0x10027,
+ 0x1003b, 0x1003b,
+ 0x1003e, 0x1003e,
+ 0x1004e, 0x1004f,
+ 0x1005e, 0x1007f,
+ 0x100fb, 0x100ff,
+ 0x10103, 0x10106,
+ 0x10134, 0x10136,
+ 0x1018b, 0x1018f,
+ 0x1019c, 0x101cf,
+ 0x101fe, 0x1027f,
+ 0x1029d, 0x1029f,
+ 0x102d1, 0x102ff,
+ 0x1031f, 0x1031f,
+ 0x10324, 0x1032f,
+ 0x1034b, 0x1037f,
+ 0x1039e, 0x1039e,
+ 0x103c4, 0x103c7,
+ 0x103d6, 0x103ff,
+ 0x1049e, 0x1049f,
+ 0x104aa, 0x107ff,
+ 0x10806, 0x10807,
+ 0x10809, 0x10809,
+ 0x10836, 0x10836,
+ 0x10839, 0x1083b,
+ 0x1083d, 0x1083e,
+ 0x10856, 0x10856,
+ 0x10860, 0x108ff,
+ 0x1091c, 0x1091e,
+ 0x1093a, 0x1093e,
+ 0x10940, 0x109ff,
+ 0x10a04, 0x10a04,
+ 0x10a07, 0x10a0b,
+ 0x10a14, 0x10a14,
+ 0x10a18, 0x10a18,
+ 0x10a34, 0x10a37,
+ 0x10a3b, 0x10a3e,
+ 0x10a48, 0x10a4f,
+ 0x10a59, 0x10a5f,
+ 0x10a80, 0x10aff,
+ 0x10b36, 0x10b38,
+ 0x10b56, 0x10b57,
+ 0x10b73, 0x10b77,
+ 0x10b80, 0x10bff,
+ 0x10c49, 0x10e5f,
+ 0x10e7f, 0x1107f,
+ 0x110c2, 0x11fff,
+ 0x1236f, 0x123ff,
+ 0x12463, 0x1246f,
+ 0x12474, 0x12fff,
+ 0x1342f, 0x1cfff,
+ 0x1d0f6, 0x1d0ff,
+ 0x1d127, 0x1d128,
+ 0x1d1de, 0x1d1ff,
+ 0x1d246, 0x1d2ff,
+ 0x1d357, 0x1d35f,
+ 0x1d372, 0x1d3ff,
+ 0x1d455, 0x1d455,
+ 0x1d49d, 0x1d49d,
+ 0x1d4a0, 0x1d4a1,
+ 0x1d4a3, 0x1d4a4,
+ 0x1d4a7, 0x1d4a8,
+ 0x1d4ad, 0x1d4ad,
+ 0x1d4ba, 0x1d4ba,
+ 0x1d4bc, 0x1d4bc,
+ 0x1d4c4, 0x1d4c4,
+ 0x1d506, 0x1d506,
+ 0x1d50b, 0x1d50c,
+ 0x1d515, 0x1d515,
+ 0x1d51d, 0x1d51d,
+ 0x1d53a, 0x1d53a,
+ 0x1d53f, 0x1d53f,
+ 0x1d545, 0x1d545,
+ 0x1d547, 0x1d549,
+ 0x1d551, 0x1d551,
+ 0x1d6a6, 0x1d6a7,
+ 0x1d7cc, 0x1d7cd,
+ 0x1d800, 0x1efff,
+ 0x1f02c, 0x1f02f,
+ 0x1f094, 0x1f0ff,
+ 0x1f10b, 0x1f10f,
+ 0x1f12f, 0x1f130,
+ 0x1f132, 0x1f13c,
+ 0x1f13e, 0x1f13e,
+ 0x1f140, 0x1f141,
+ 0x1f143, 0x1f145,
+ 0x1f147, 0x1f149,
+ 0x1f14f, 0x1f156,
+ 0x1f158, 0x1f15e,
+ 0x1f160, 0x1f178,
+ 0x1f17a, 0x1f17a,
+ 0x1f17d, 0x1f17e,
+ 0x1f180, 0x1f189,
+ 0x1f18e, 0x1f18f,
+ 0x1f191, 0x1f1ff,
+ 0x1f201, 0x1f20f,
+ 0x1f232, 0x1f23f,
+ 0x1f249, 0x1ffff,
+ 0x2a6d7, 0x2a6ff,
+ 0x2b735, 0x2f7ff,
+ 0x2fa1e, 0xe0000,
+ 0xe0002, 0xe001f,
+ 0xe0080, 0xe00ff,
+ 0xe01f0, 0xeffff,
+ 0xffffe, 0xfffff,
+ 0x10fffe, 0x10ffff,
+}; /* CR_Cn */
+
+/* 'Co': General Category */
+static const OnigCodePoint CR_Co[] = {
+ 3,
+ 0xe000, 0xf8ff,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd,
+}; /* CR_Co */
+
+/* 'Cs': General Category */
+static const OnigCodePoint CR_Cs[] = {
+ 1,
+ 0xd800, 0xdfff,
+}; /* CR_Cs */
+
+/* 'L': Major Category */
+static const OnigCodePoint CR_L[] = {
+ 422,
+ 0x0041, 0x005a,
+ 0x0061, 0x007a,
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00ba, 0x00ba,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+ 0x00f8, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x02ec, 0x02ec,
+ 0x02ee, 0x02ee,
+ 0x0370, 0x0374,
+ 0x0376, 0x0377,
+ 0x037a, 0x037d,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03f5,
+ 0x03f7, 0x0481,
+ 0x048a, 0x0525,
+ 0x0531, 0x0556,
+ 0x0559, 0x0559,
+ 0x0561, 0x0587,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0621, 0x064a,
+ 0x066e, 0x066f,
+ 0x0671, 0x06d3,
+ 0x06d5, 0x06d5,
+ 0x06e5, 0x06e6,
+ 0x06ee, 0x06ef,
+ 0x06fa, 0x06fc,
+ 0x06ff, 0x06ff,
+ 0x0710, 0x0710,
+ 0x0712, 0x072f,
+ 0x074d, 0x07a5,
+ 0x07b1, 0x07b1,
+ 0x07ca, 0x07ea,
+ 0x07f4, 0x07f5,
+ 0x07fa, 0x07fa,
+ 0x0800, 0x0815,
+ 0x081a, 0x081a,
+ 0x0824, 0x0824,
+ 0x0828, 0x0828,
+ 0x0904, 0x0939,
+ 0x093d, 0x093d,
+ 0x0950, 0x0950,
+ 0x0958, 0x0961,
+ 0x0971, 0x0972,
+ 0x0979, 0x097f,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bd, 0x09bd,
+ 0x09ce, 0x09ce,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e1,
+ 0x09f0, 0x09f1,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a72, 0x0a74,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abd, 0x0abd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae1,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3d, 0x0b3d,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b71, 0x0b71,
+ 0x0b83, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb9,
+ 0x0bd0, 0x0bd0,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3d, 0x0c3d,
+ 0x0c58, 0x0c59,
+ 0x0c60, 0x0c61,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbd, 0x0cbd,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3d, 0x0d3d,
+ 0x0d60, 0x0d61,
+ 0x0d7a, 0x0d7f,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0e01, 0x0e30,
+ 0x0e32, 0x0e33,
+ 0x0e40, 0x0e46,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb0,
+ 0x0eb2, 0x0eb3,
+ 0x0ebd, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f00,
+ 0x0f40, 0x0f47,
+ 0x0f49, 0x0f6c,
+ 0x0f88, 0x0f8b,
+ 0x1000, 0x102a,
+ 0x103f, 0x103f,
+ 0x1050, 0x1055,
+ 0x105a, 0x105d,
+ 0x1061, 0x1061,
+ 0x1065, 0x1066,
+ 0x106e, 0x1070,
+ 0x1075, 0x1081,
+ 0x108e, 0x108e,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10fa,
+ 0x10fc, 0x10fc,
+ 0x1100, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12d6,
+ 0x12d8, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x135a,
+ 0x1380, 0x138f,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x167f,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x1700, 0x170c,
+ 0x170e, 0x1711,
+ 0x1720, 0x1731,
+ 0x1740, 0x1751,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1780, 0x17b3,
+ 0x17d7, 0x17d7,
+ 0x17dc, 0x17dc,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a8,
+ 0x18aa, 0x18aa,
+ 0x18b0, 0x18f5,
+ 0x1900, 0x191c,
+ 0x1950, 0x196d,
+ 0x1970, 0x1974,
+ 0x1980, 0x19ab,
+ 0x19c1, 0x19c7,
+ 0x1a00, 0x1a16,
+ 0x1a20, 0x1a54,
+ 0x1aa7, 0x1aa7,
+ 0x1b05, 0x1b33,
+ 0x1b45, 0x1b4b,
+ 0x1b83, 0x1ba0,
+ 0x1bae, 0x1baf,
+ 0x1c00, 0x1c23,
+ 0x1c4d, 0x1c4f,
+ 0x1c5a, 0x1c7d,
+ 0x1ce9, 0x1cec,
+ 0x1cee, 0x1cf1,
+ 0x1d00, 0x1dbf,
+ 0x1e00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x2090, 0x2094,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x212f, 0x2139,
+ 0x213c, 0x213f,
+ 0x2145, 0x2149,
+ 0x214e, 0x214e,
+ 0x2183, 0x2184,
+ 0x2c00, 0x2c2e,
+ 0x2c30, 0x2c5e,
+ 0x2c60, 0x2ce4,
+ 0x2ceb, 0x2cee,
+ 0x2d00, 0x2d25,
+ 0x2d30, 0x2d65,
+ 0x2d6f, 0x2d6f,
+ 0x2d80, 0x2d96,
+ 0x2da0, 0x2da6,
+ 0x2da8, 0x2dae,
+ 0x2db0, 0x2db6,
+ 0x2db8, 0x2dbe,
+ 0x2dc0, 0x2dc6,
+ 0x2dc8, 0x2dce,
+ 0x2dd0, 0x2dd6,
+ 0x2dd8, 0x2dde,
+ 0x2e2f, 0x2e2f,
+ 0x3005, 0x3006,
+ 0x3031, 0x3035,
+ 0x303b, 0x303c,
+ 0x3041, 0x3096,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30fa,
+ 0x30fc, 0x30ff,
+ 0x3105, 0x312d,
+ 0x3131, 0x318e,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fcb,
+ 0xa000, 0xa48c,
+ 0xa4d0, 0xa4fd,
+ 0xa500, 0xa60c,
+ 0xa610, 0xa61f,
+ 0xa62a, 0xa62b,
+ 0xa640, 0xa65f,
+ 0xa662, 0xa66e,
+ 0xa67f, 0xa697,
+ 0xa6a0, 0xa6e5,
+ 0xa717, 0xa71f,
+ 0xa722, 0xa788,
+ 0xa78b, 0xa78c,
+ 0xa7fb, 0xa801,
+ 0xa803, 0xa805,
+ 0xa807, 0xa80a,
+ 0xa80c, 0xa822,
+ 0xa840, 0xa873,
+ 0xa882, 0xa8b3,
+ 0xa8f2, 0xa8f7,
+ 0xa8fb, 0xa8fb,
+ 0xa90a, 0xa925,
+ 0xa930, 0xa946,
+ 0xa960, 0xa97c,
+ 0xa984, 0xa9b2,
+ 0xa9cf, 0xa9cf,
+ 0xaa00, 0xaa28,
+ 0xaa40, 0xaa42,
+ 0xaa44, 0xaa4b,
+ 0xaa60, 0xaa76,
+ 0xaa7a, 0xaa7a,
+ 0xaa80, 0xaaaf,
+ 0xaab1, 0xaab1,
+ 0xaab5, 0xaab6,
+ 0xaab9, 0xaabd,
+ 0xaac0, 0xaac0,
+ 0xaac2, 0xaac2,
+ 0xaadb, 0xaadd,
+ 0xabc0, 0xabe2,
+ 0xac00, 0xd7a3,
+ 0xd7b0, 0xd7c6,
+ 0xd7cb, 0xd7fb,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6d,
+ 0xfa70, 0xfad9,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb1d,
+ 0xfb1f, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff21, 0xff3a,
+ 0xff41, 0xff5a,
+ 0xff66, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10280, 0x1029c,
+ 0x102a0, 0x102d0,
+ 0x10300, 0x1031e,
+ 0x10330, 0x10340,
+ 0x10342, 0x10349,
+ 0x10380, 0x1039d,
+ 0x103a0, 0x103c3,
+ 0x103c8, 0x103cf,
+ 0x10400, 0x1049d,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x10855,
+ 0x10900, 0x10915,
+ 0x10920, 0x10939,
+ 0x10a00, 0x10a00,
+ 0x10a10, 0x10a13,
+ 0x10a15, 0x10a17,
+ 0x10a19, 0x10a33,
+ 0x10a60, 0x10a7c,
+ 0x10b00, 0x10b35,
+ 0x10b40, 0x10b55,
+ 0x10b60, 0x10b72,
+ 0x10c00, 0x10c48,
+ 0x11083, 0x110af,
+ 0x12000, 0x1236e,
+ 0x13000, 0x1342e,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a5,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7cb,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
+ 0x2f800, 0x2fa1d,
+}; /* CR_L */
+
+/* 'Ll': General Category */
+static const OnigCodePoint CR_Ll[] = {
+ 599,
+ 0x0061, 0x007a,
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00ba, 0x00ba,
+ 0x00df, 0x00f6,
+ 0x00f8, 0x00ff,
+ 0x0101, 0x0101,
+ 0x0103, 0x0103,
+ 0x0105, 0x0105,
+ 0x0107, 0x0107,
+ 0x0109, 0x0109,
+ 0x010b, 0x010b,
+ 0x010d, 0x010d,
+ 0x010f, 0x010f,
+ 0x0111, 0x0111,
+ 0x0113, 0x0113,
+ 0x0115, 0x0115,
+ 0x0117, 0x0117,
+ 0x0119, 0x0119,
+ 0x011b, 0x011b,
+ 0x011d, 0x011d,
+ 0x011f, 0x011f,
+ 0x0121, 0x0121,
+ 0x0123, 0x0123,
+ 0x0125, 0x0125,
+ 0x0127, 0x0127,
+ 0x0129, 0x0129,
+ 0x012b, 0x012b,
+ 0x012d, 0x012d,
+ 0x012f, 0x012f,
+ 0x0131, 0x0131,
+ 0x0133, 0x0133,
+ 0x0135, 0x0135,
+ 0x0137, 0x0138,
+ 0x013a, 0x013a,
+ 0x013c, 0x013c,
+ 0x013e, 0x013e,
+ 0x0140, 0x0140,
+ 0x0142, 0x0142,
+ 0x0144, 0x0144,
+ 0x0146, 0x0146,
+ 0x0148, 0x0149,
+ 0x014b, 0x014b,
+ 0x014d, 0x014d,
+ 0x014f, 0x014f,
+ 0x0151, 0x0151,
+ 0x0153, 0x0153,
+ 0x0155, 0x0155,
+ 0x0157, 0x0157,
+ 0x0159, 0x0159,
+ 0x015b, 0x015b,
+ 0x015d, 0x015d,
+ 0x015f, 0x015f,
+ 0x0161, 0x0161,
+ 0x0163, 0x0163,
+ 0x0165, 0x0165,
+ 0x0167, 0x0167,
+ 0x0169, 0x0169,
+ 0x016b, 0x016b,
+ 0x016d, 0x016d,
+ 0x016f, 0x016f,
+ 0x0171, 0x0171,
+ 0x0173, 0x0173,
+ 0x0175, 0x0175,
+ 0x0177, 0x0177,
+ 0x017a, 0x017a,
+ 0x017c, 0x017c,
+ 0x017e, 0x0180,
+ 0x0183, 0x0183,
+ 0x0185, 0x0185,
+ 0x0188, 0x0188,
+ 0x018c, 0x018d,
+ 0x0192, 0x0192,
+ 0x0195, 0x0195,
+ 0x0199, 0x019b,
+ 0x019e, 0x019e,
+ 0x01a1, 0x01a1,
+ 0x01a3, 0x01a3,
+ 0x01a5, 0x01a5,
+ 0x01a8, 0x01a8,
+ 0x01aa, 0x01ab,
+ 0x01ad, 0x01ad,
+ 0x01b0, 0x01b0,
+ 0x01b4, 0x01b4,
+ 0x01b6, 0x01b6,
+ 0x01b9, 0x01ba,
+ 0x01bd, 0x01bf,
+ 0x01c6, 0x01c6,
+ 0x01c9, 0x01c9,
+ 0x01cc, 0x01cc,
+ 0x01ce, 0x01ce,
+ 0x01d0, 0x01d0,
+ 0x01d2, 0x01d2,
+ 0x01d4, 0x01d4,
+ 0x01d6, 0x01d6,
+ 0x01d8, 0x01d8,
+ 0x01da, 0x01da,
+ 0x01dc, 0x01dd,
+ 0x01df, 0x01df,
+ 0x01e1, 0x01e1,
+ 0x01e3, 0x01e3,
+ 0x01e5, 0x01e5,
+ 0x01e7, 0x01e7,
+ 0x01e9, 0x01e9,
+ 0x01eb, 0x01eb,
+ 0x01ed, 0x01ed,
+ 0x01ef, 0x01f0,
+ 0x01f3, 0x01f3,
+ 0x01f5, 0x01f5,
+ 0x01f9, 0x01f9,
+ 0x01fb, 0x01fb,
+ 0x01fd, 0x01fd,
+ 0x01ff, 0x01ff,
+ 0x0201, 0x0201,
+ 0x0203, 0x0203,
+ 0x0205, 0x0205,
+ 0x0207, 0x0207,
+ 0x0209, 0x0209,
+ 0x020b, 0x020b,
+ 0x020d, 0x020d,
+ 0x020f, 0x020f,
+ 0x0211, 0x0211,
+ 0x0213, 0x0213,
+ 0x0215, 0x0215,
+ 0x0217, 0x0217,
+ 0x0219, 0x0219,
+ 0x021b, 0x021b,
+ 0x021d, 0x021d,
+ 0x021f, 0x021f,
+ 0x0221, 0x0221,
+ 0x0223, 0x0223,
+ 0x0225, 0x0225,
+ 0x0227, 0x0227,
+ 0x0229, 0x0229,
+ 0x022b, 0x022b,
+ 0x022d, 0x022d,
+ 0x022f, 0x022f,
+ 0x0231, 0x0231,
+ 0x0233, 0x0239,
+ 0x023c, 0x023c,
+ 0x023f, 0x0240,
+ 0x0242, 0x0242,
+ 0x0247, 0x0247,
+ 0x0249, 0x0249,
+ 0x024b, 0x024b,
+ 0x024d, 0x024d,
+ 0x024f, 0x0293,
+ 0x0295, 0x02af,
+ 0x0371, 0x0371,
+ 0x0373, 0x0373,
+ 0x0377, 0x0377,
+ 0x037b, 0x037d,
+ 0x0390, 0x0390,
+ 0x03ac, 0x03ce,
+ 0x03d0, 0x03d1,
+ 0x03d5, 0x03d7,
+ 0x03d9, 0x03d9,
+ 0x03db, 0x03db,
+ 0x03dd, 0x03dd,
+ 0x03df, 0x03df,
+ 0x03e1, 0x03e1,
+ 0x03e3, 0x03e3,
+ 0x03e5, 0x03e5,
+ 0x03e7, 0x03e7,
+ 0x03e9, 0x03e9,
+ 0x03eb, 0x03eb,
+ 0x03ed, 0x03ed,
+ 0x03ef, 0x03f3,
+ 0x03f5, 0x03f5,
+ 0x03f8, 0x03f8,
+ 0x03fb, 0x03fc,
+ 0x0430, 0x045f,
+ 0x0461, 0x0461,
+ 0x0463, 0x0463,
+ 0x0465, 0x0465,
+ 0x0467, 0x0467,
+ 0x0469, 0x0469,
+ 0x046b, 0x046b,
+ 0x046d, 0x046d,
+ 0x046f, 0x046f,
+ 0x0471, 0x0471,
+ 0x0473, 0x0473,
+ 0x0475, 0x0475,
+ 0x0477, 0x0477,
+ 0x0479, 0x0479,
+ 0x047b, 0x047b,
+ 0x047d, 0x047d,
+ 0x047f, 0x047f,
+ 0x0481, 0x0481,
+ 0x048b, 0x048b,
+ 0x048d, 0x048d,
+ 0x048f, 0x048f,
+ 0x0491, 0x0491,
+ 0x0493, 0x0493,
+ 0x0495, 0x0495,
+ 0x0497, 0x0497,
+ 0x0499, 0x0499,
+ 0x049b, 0x049b,
+ 0x049d, 0x049d,
+ 0x049f, 0x049f,
+ 0x04a1, 0x04a1,
+ 0x04a3, 0x04a3,
+ 0x04a5, 0x04a5,
+ 0x04a7, 0x04a7,
+ 0x04a9, 0x04a9,
+ 0x04ab, 0x04ab,
+ 0x04ad, 0x04ad,
+ 0x04af, 0x04af,
+ 0x04b1, 0x04b1,
+ 0x04b3, 0x04b3,
+ 0x04b5, 0x04b5,
+ 0x04b7, 0x04b7,
+ 0x04b9, 0x04b9,
+ 0x04bb, 0x04bb,
+ 0x04bd, 0x04bd,
+ 0x04bf, 0x04bf,
+ 0x04c2, 0x04c2,
+ 0x04c4, 0x04c4,
+ 0x04c6, 0x04c6,
+ 0x04c8, 0x04c8,
+ 0x04ca, 0x04ca,
+ 0x04cc, 0x04cc,
+ 0x04ce, 0x04cf,
+ 0x04d1, 0x04d1,
+ 0x04d3, 0x04d3,
+ 0x04d5, 0x04d5,
+ 0x04d7, 0x04d7,
+ 0x04d9, 0x04d9,
+ 0x04db, 0x04db,
+ 0x04dd, 0x04dd,
+ 0x04df, 0x04df,
+ 0x04e1, 0x04e1,
+ 0x04e3, 0x04e3,
+ 0x04e5, 0x04e5,
+ 0x04e7, 0x04e7,
+ 0x04e9, 0x04e9,
+ 0x04eb, 0x04eb,
+ 0x04ed, 0x04ed,
+ 0x04ef, 0x04ef,
+ 0x04f1, 0x04f1,
+ 0x04f3, 0x04f3,
+ 0x04f5, 0x04f5,
+ 0x04f7, 0x04f7,
+ 0x04f9, 0x04f9,
+ 0x04fb, 0x04fb,
+ 0x04fd, 0x04fd,
+ 0x04ff, 0x04ff,
+ 0x0501, 0x0501,
+ 0x0503, 0x0503,
+ 0x0505, 0x0505,
+ 0x0507, 0x0507,
+ 0x0509, 0x0509,
+ 0x050b, 0x050b,
+ 0x050d, 0x050d,
+ 0x050f, 0x050f,
+ 0x0511, 0x0511,
+ 0x0513, 0x0513,
+ 0x0515, 0x0515,
+ 0x0517, 0x0517,
+ 0x0519, 0x0519,
+ 0x051b, 0x051b,
+ 0x051d, 0x051d,
+ 0x051f, 0x051f,
+ 0x0521, 0x0521,
+ 0x0523, 0x0523,
+ 0x0525, 0x0525,
+ 0x0561, 0x0587,
+ 0x1d00, 0x1d2b,
+ 0x1d62, 0x1d77,
+ 0x1d79, 0x1d9a,
+ 0x1e01, 0x1e01,
+ 0x1e03, 0x1e03,
+ 0x1e05, 0x1e05,
+ 0x1e07, 0x1e07,
+ 0x1e09, 0x1e09,
+ 0x1e0b, 0x1e0b,
+ 0x1e0d, 0x1e0d,
+ 0x1e0f, 0x1e0f,
+ 0x1e11, 0x1e11,
+ 0x1e13, 0x1e13,
+ 0x1e15, 0x1e15,
+ 0x1e17, 0x1e17,
+ 0x1e19, 0x1e19,
+ 0x1e1b, 0x1e1b,
+ 0x1e1d, 0x1e1d,
+ 0x1e1f, 0x1e1f,
+ 0x1e21, 0x1e21,
+ 0x1e23, 0x1e23,
+ 0x1e25, 0x1e25,
+ 0x1e27, 0x1e27,
+ 0x1e29, 0x1e29,
+ 0x1e2b, 0x1e2b,
+ 0x1e2d, 0x1e2d,
+ 0x1e2f, 0x1e2f,
+ 0x1e31, 0x1e31,
+ 0x1e33, 0x1e33,
+ 0x1e35, 0x1e35,
+ 0x1e37, 0x1e37,
+ 0x1e39, 0x1e39,
+ 0x1e3b, 0x1e3b,
+ 0x1e3d, 0x1e3d,
+ 0x1e3f, 0x1e3f,
+ 0x1e41, 0x1e41,
+ 0x1e43, 0x1e43,
+ 0x1e45, 0x1e45,
+ 0x1e47, 0x1e47,
+ 0x1e49, 0x1e49,
+ 0x1e4b, 0x1e4b,
+ 0x1e4d, 0x1e4d,
+ 0x1e4f, 0x1e4f,
+ 0x1e51, 0x1e51,
+ 0x1e53, 0x1e53,
+ 0x1e55, 0x1e55,
+ 0x1e57, 0x1e57,
+ 0x1e59, 0x1e59,
+ 0x1e5b, 0x1e5b,
+ 0x1e5d, 0x1e5d,
+ 0x1e5f, 0x1e5f,
+ 0x1e61, 0x1e61,
+ 0x1e63, 0x1e63,
+ 0x1e65, 0x1e65,
+ 0x1e67, 0x1e67,
+ 0x1e69, 0x1e69,
+ 0x1e6b, 0x1e6b,
+ 0x1e6d, 0x1e6d,
+ 0x1e6f, 0x1e6f,
+ 0x1e71, 0x1e71,
+ 0x1e73, 0x1e73,
+ 0x1e75, 0x1e75,
+ 0x1e77, 0x1e77,
+ 0x1e79, 0x1e79,
+ 0x1e7b, 0x1e7b,
+ 0x1e7d, 0x1e7d,
+ 0x1e7f, 0x1e7f,
+ 0x1e81, 0x1e81,
+ 0x1e83, 0x1e83,
+ 0x1e85, 0x1e85,
+ 0x1e87, 0x1e87,
+ 0x1e89, 0x1e89,
+ 0x1e8b, 0x1e8b,
+ 0x1e8d, 0x1e8d,
+ 0x1e8f, 0x1e8f,
+ 0x1e91, 0x1e91,
+ 0x1e93, 0x1e93,
+ 0x1e95, 0x1e9d,
+ 0x1e9f, 0x1e9f,
+ 0x1ea1, 0x1ea1,
+ 0x1ea3, 0x1ea3,
+ 0x1ea5, 0x1ea5,
+ 0x1ea7, 0x1ea7,
+ 0x1ea9, 0x1ea9,
+ 0x1eab, 0x1eab,
+ 0x1ead, 0x1ead,
+ 0x1eaf, 0x1eaf,
+ 0x1eb1, 0x1eb1,
+ 0x1eb3, 0x1eb3,
+ 0x1eb5, 0x1eb5,
+ 0x1eb7, 0x1eb7,
+ 0x1eb9, 0x1eb9,
+ 0x1ebb, 0x1ebb,
+ 0x1ebd, 0x1ebd,
+ 0x1ebf, 0x1ebf,
+ 0x1ec1, 0x1ec1,
+ 0x1ec3, 0x1ec3,
+ 0x1ec5, 0x1ec5,
+ 0x1ec7, 0x1ec7,
+ 0x1ec9, 0x1ec9,
+ 0x1ecb, 0x1ecb,
+ 0x1ecd, 0x1ecd,
+ 0x1ecf, 0x1ecf,
+ 0x1ed1, 0x1ed1,
+ 0x1ed3, 0x1ed3,
+ 0x1ed5, 0x1ed5,
+ 0x1ed7, 0x1ed7,
+ 0x1ed9, 0x1ed9,
+ 0x1edb, 0x1edb,
+ 0x1edd, 0x1edd,
+ 0x1edf, 0x1edf,
+ 0x1ee1, 0x1ee1,
+ 0x1ee3, 0x1ee3,
+ 0x1ee5, 0x1ee5,
+ 0x1ee7, 0x1ee7,
+ 0x1ee9, 0x1ee9,
+ 0x1eeb, 0x1eeb,
+ 0x1eed, 0x1eed,
+ 0x1eef, 0x1eef,
+ 0x1ef1, 0x1ef1,
+ 0x1ef3, 0x1ef3,
+ 0x1ef5, 0x1ef5,
+ 0x1ef7, 0x1ef7,
+ 0x1ef9, 0x1ef9,
+ 0x1efb, 0x1efb,
+ 0x1efd, 0x1efd,
+ 0x1eff, 0x1f07,
+ 0x1f10, 0x1f15,
+ 0x1f20, 0x1f27,
+ 0x1f30, 0x1f37,
+ 0x1f40, 0x1f45,
+ 0x1f50, 0x1f57,
+ 0x1f60, 0x1f67,
+ 0x1f70, 0x1f7d,
+ 0x1f80, 0x1f87,
+ 0x1f90, 0x1f97,
+ 0x1fa0, 0x1fa7,
+ 0x1fb0, 0x1fb4,
+ 0x1fb6, 0x1fb7,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fc7,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fd7,
+ 0x1fe0, 0x1fe7,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ff7,
+ 0x210a, 0x210a,
+ 0x210e, 0x210f,
+ 0x2113, 0x2113,
+ 0x212f, 0x212f,
+ 0x2134, 0x2134,
+ 0x2139, 0x2139,
+ 0x213c, 0x213d,
+ 0x2146, 0x2149,
+ 0x214e, 0x214e,
+ 0x2184, 0x2184,
+ 0x2c30, 0x2c5e,
+ 0x2c61, 0x2c61,
+ 0x2c65, 0x2c66,
+ 0x2c68, 0x2c68,
+ 0x2c6a, 0x2c6a,
+ 0x2c6c, 0x2c6c,
+ 0x2c71, 0x2c71,
+ 0x2c73, 0x2c74,
+ 0x2c76, 0x2c7c,
+ 0x2c81, 0x2c81,
+ 0x2c83, 0x2c83,
+ 0x2c85, 0x2c85,
+ 0x2c87, 0x2c87,
+ 0x2c89, 0x2c89,
+ 0x2c8b, 0x2c8b,
+ 0x2c8d, 0x2c8d,
+ 0x2c8f, 0x2c8f,
+ 0x2c91, 0x2c91,
+ 0x2c93, 0x2c93,
+ 0x2c95, 0x2c95,
+ 0x2c97, 0x2c97,
+ 0x2c99, 0x2c99,
+ 0x2c9b, 0x2c9b,
+ 0x2c9d, 0x2c9d,
+ 0x2c9f, 0x2c9f,
+ 0x2ca1, 0x2ca1,
+ 0x2ca3, 0x2ca3,
+ 0x2ca5, 0x2ca5,
+ 0x2ca7, 0x2ca7,
+ 0x2ca9, 0x2ca9,
+ 0x2cab, 0x2cab,
+ 0x2cad, 0x2cad,
+ 0x2caf, 0x2caf,
+ 0x2cb1, 0x2cb1,
+ 0x2cb3, 0x2cb3,
+ 0x2cb5, 0x2cb5,
+ 0x2cb7, 0x2cb7,
+ 0x2cb9, 0x2cb9,
+ 0x2cbb, 0x2cbb,
+ 0x2cbd, 0x2cbd,
+ 0x2cbf, 0x2cbf,
+ 0x2cc1, 0x2cc1,
+ 0x2cc3, 0x2cc3,
+ 0x2cc5, 0x2cc5,
+ 0x2cc7, 0x2cc7,
+ 0x2cc9, 0x2cc9,
+ 0x2ccb, 0x2ccb,
+ 0x2ccd, 0x2ccd,
+ 0x2ccf, 0x2ccf,
+ 0x2cd1, 0x2cd1,
+ 0x2cd3, 0x2cd3,
+ 0x2cd5, 0x2cd5,
+ 0x2cd7, 0x2cd7,
+ 0x2cd9, 0x2cd9,
+ 0x2cdb, 0x2cdb,
+ 0x2cdd, 0x2cdd,
+ 0x2cdf, 0x2cdf,
+ 0x2ce1, 0x2ce1,
+ 0x2ce3, 0x2ce4,
+ 0x2cec, 0x2cec,
+ 0x2cee, 0x2cee,
+ 0x2d00, 0x2d25,
+ 0xa641, 0xa641,
+ 0xa643, 0xa643,
+ 0xa645, 0xa645,
+ 0xa647, 0xa647,
+ 0xa649, 0xa649,
+ 0xa64b, 0xa64b,
+ 0xa64d, 0xa64d,
+ 0xa64f, 0xa64f,
+ 0xa651, 0xa651,
+ 0xa653, 0xa653,
+ 0xa655, 0xa655,
+ 0xa657, 0xa657,
+ 0xa659, 0xa659,
+ 0xa65b, 0xa65b,
+ 0xa65d, 0xa65d,
+ 0xa65f, 0xa65f,
+ 0xa663, 0xa663,
+ 0xa665, 0xa665,
+ 0xa667, 0xa667,
+ 0xa669, 0xa669,
+ 0xa66b, 0xa66b,
+ 0xa66d, 0xa66d,
+ 0xa681, 0xa681,
+ 0xa683, 0xa683,
+ 0xa685, 0xa685,
+ 0xa687, 0xa687,
+ 0xa689, 0xa689,
+ 0xa68b, 0xa68b,
+ 0xa68d, 0xa68d,
+ 0xa68f, 0xa68f,
+ 0xa691, 0xa691,
+ 0xa693, 0xa693,
+ 0xa695, 0xa695,
+ 0xa697, 0xa697,
+ 0xa723, 0xa723,
+ 0xa725, 0xa725,
+ 0xa727, 0xa727,
+ 0xa729, 0xa729,
+ 0xa72b, 0xa72b,
+ 0xa72d, 0xa72d,
+ 0xa72f, 0xa731,
+ 0xa733, 0xa733,
+ 0xa735, 0xa735,
+ 0xa737, 0xa737,
+ 0xa739, 0xa739,
+ 0xa73b, 0xa73b,
+ 0xa73d, 0xa73d,
+ 0xa73f, 0xa73f,
+ 0xa741, 0xa741,
+ 0xa743, 0xa743,
+ 0xa745, 0xa745,
+ 0xa747, 0xa747,
+ 0xa749, 0xa749,
+ 0xa74b, 0xa74b,
+ 0xa74d, 0xa74d,
+ 0xa74f, 0xa74f,
+ 0xa751, 0xa751,
+ 0xa753, 0xa753,
+ 0xa755, 0xa755,
+ 0xa757, 0xa757,
+ 0xa759, 0xa759,
+ 0xa75b, 0xa75b,
+ 0xa75d, 0xa75d,
+ 0xa75f, 0xa75f,
+ 0xa761, 0xa761,
+ 0xa763, 0xa763,
+ 0xa765, 0xa765,
+ 0xa767, 0xa767,
+ 0xa769, 0xa769,
+ 0xa76b, 0xa76b,
+ 0xa76d, 0xa76d,
+ 0xa76f, 0xa76f,
+ 0xa771, 0xa778,
+ 0xa77a, 0xa77a,
+ 0xa77c, 0xa77c,
+ 0xa77f, 0xa77f,
+ 0xa781, 0xa781,
+ 0xa783, 0xa783,
+ 0xa785, 0xa785,
+ 0xa787, 0xa787,
+ 0xa78c, 0xa78c,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xff41, 0xff5a,
+ 0x10428, 0x1044f,
+ 0x1d41a, 0x1d433,
+ 0x1d44e, 0x1d454,
+ 0x1d456, 0x1d467,
+ 0x1d482, 0x1d49b,
+ 0x1d4b6, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d4cf,
+ 0x1d4ea, 0x1d503,
+ 0x1d51e, 0x1d537,
+ 0x1d552, 0x1d56b,
+ 0x1d586, 0x1d59f,
+ 0x1d5ba, 0x1d5d3,
+ 0x1d5ee, 0x1d607,
+ 0x1d622, 0x1d63b,
+ 0x1d656, 0x1d66f,
+ 0x1d68a, 0x1d6a5,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6e1,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d71b,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d755,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d78f,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9,
+ 0x1d7cb, 0x1d7cb,
+}; /* CR_Ll */
+
+/* 'Lm': General Category */
+static const OnigCodePoint CR_Lm[] = {
+ 49,
+ 0x02b0, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x02ec, 0x02ec,
+ 0x02ee, 0x02ee,
+ 0x0374, 0x0374,
+ 0x037a, 0x037a,
+ 0x0559, 0x0559,
+ 0x0640, 0x0640,
+ 0x06e5, 0x06e6,
+ 0x07f4, 0x07f5,
+ 0x07fa, 0x07fa,
+ 0x081a, 0x081a,
+ 0x0824, 0x0824,
+ 0x0828, 0x0828,
+ 0x0971, 0x0971,
+ 0x0e46, 0x0e46,
+ 0x0ec6, 0x0ec6,
+ 0x10fc, 0x10fc,
+ 0x17d7, 0x17d7,
+ 0x1843, 0x1843,
+ 0x1aa7, 0x1aa7,
+ 0x1c78, 0x1c7d,
+ 0x1d2c, 0x1d61,
+ 0x1d78, 0x1d78,
+ 0x1d9b, 0x1dbf,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x2090, 0x2094,
+ 0x2c7d, 0x2c7d,
+ 0x2d6f, 0x2d6f,
+ 0x2e2f, 0x2e2f,
+ 0x3005, 0x3005,
+ 0x3031, 0x3035,
+ 0x303b, 0x303b,
+ 0x309d, 0x309e,
+ 0x30fc, 0x30fe,
+ 0xa015, 0xa015,
+ 0xa4f8, 0xa4fd,
+ 0xa60c, 0xa60c,
+ 0xa67f, 0xa67f,
+ 0xa717, 0xa71f,
+ 0xa770, 0xa770,
+ 0xa788, 0xa788,
+ 0xa9cf, 0xa9cf,
+ 0xaa70, 0xaa70,
+ 0xaadd, 0xaadd,
+ 0xff70, 0xff70,
+ 0xff9e, 0xff9f,
+}; /* CR_Lm */
+
+/* 'Lo': General Category */
+static const OnigCodePoint CR_Lo[] = {
+ 311,
+ 0x01bb, 0x01bb,
+ 0x01c0, 0x01c3,
+ 0x0294, 0x0294,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0621, 0x063f,
+ 0x0641, 0x064a,
+ 0x066e, 0x066f,
+ 0x0671, 0x06d3,
+ 0x06d5, 0x06d5,
+ 0x06ee, 0x06ef,
+ 0x06fa, 0x06fc,
+ 0x06ff, 0x06ff,
+ 0x0710, 0x0710,
+ 0x0712, 0x072f,
+ 0x074d, 0x07a5,
+ 0x07b1, 0x07b1,
+ 0x07ca, 0x07ea,
+ 0x0800, 0x0815,
+ 0x0904, 0x0939,
+ 0x093d, 0x093d,
+ 0x0950, 0x0950,
+ 0x0958, 0x0961,
+ 0x0972, 0x0972,
+ 0x0979, 0x097f,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bd, 0x09bd,
+ 0x09ce, 0x09ce,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e1,
+ 0x09f0, 0x09f1,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a72, 0x0a74,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abd, 0x0abd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae1,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3d, 0x0b3d,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b71, 0x0b71,
+ 0x0b83, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb9,
+ 0x0bd0, 0x0bd0,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3d, 0x0c3d,
+ 0x0c58, 0x0c59,
+ 0x0c60, 0x0c61,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbd, 0x0cbd,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3d, 0x0d3d,
+ 0x0d60, 0x0d61,
+ 0x0d7a, 0x0d7f,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0e01, 0x0e30,
+ 0x0e32, 0x0e33,
+ 0x0e40, 0x0e45,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb0,
+ 0x0eb2, 0x0eb3,
+ 0x0ebd, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f00,
+ 0x0f40, 0x0f47,
+ 0x0f49, 0x0f6c,
+ 0x0f88, 0x0f8b,
+ 0x1000, 0x102a,
+ 0x103f, 0x103f,
+ 0x1050, 0x1055,
+ 0x105a, 0x105d,
+ 0x1061, 0x1061,
+ 0x1065, 0x1066,
+ 0x106e, 0x1070,
+ 0x1075, 0x1081,
+ 0x108e, 0x108e,
+ 0x10d0, 0x10fa,
+ 0x1100, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12d6,
+ 0x12d8, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x135a,
+ 0x1380, 0x138f,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x167f,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x1700, 0x170c,
+ 0x170e, 0x1711,
+ 0x1720, 0x1731,
+ 0x1740, 0x1751,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1780, 0x17b3,
+ 0x17dc, 0x17dc,
+ 0x1820, 0x1842,
+ 0x1844, 0x1877,
+ 0x1880, 0x18a8,
+ 0x18aa, 0x18aa,
+ 0x18b0, 0x18f5,
+ 0x1900, 0x191c,
+ 0x1950, 0x196d,
+ 0x1970, 0x1974,
+ 0x1980, 0x19ab,
+ 0x19c1, 0x19c7,
+ 0x1a00, 0x1a16,
+ 0x1a20, 0x1a54,
+ 0x1b05, 0x1b33,
+ 0x1b45, 0x1b4b,
+ 0x1b83, 0x1ba0,
+ 0x1bae, 0x1baf,
+ 0x1c00, 0x1c23,
+ 0x1c4d, 0x1c4f,
+ 0x1c5a, 0x1c77,
+ 0x1ce9, 0x1cec,
+ 0x1cee, 0x1cf1,
+ 0x2135, 0x2138,
+ 0x2d30, 0x2d65,
+ 0x2d80, 0x2d96,
+ 0x2da0, 0x2da6,
+ 0x2da8, 0x2dae,
+ 0x2db0, 0x2db6,
+ 0x2db8, 0x2dbe,
+ 0x2dc0, 0x2dc6,
+ 0x2dc8, 0x2dce,
+ 0x2dd0, 0x2dd6,
+ 0x2dd8, 0x2dde,
+ 0x3006, 0x3006,
+ 0x303c, 0x303c,
+ 0x3041, 0x3096,
+ 0x309f, 0x309f,
+ 0x30a1, 0x30fa,
+ 0x30ff, 0x30ff,
+ 0x3105, 0x312d,
+ 0x3131, 0x318e,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fcb,
+ 0xa000, 0xa014,
+ 0xa016, 0xa48c,
+ 0xa4d0, 0xa4f7,
+ 0xa500, 0xa60b,
+ 0xa610, 0xa61f,
+ 0xa62a, 0xa62b,
+ 0xa66e, 0xa66e,
+ 0xa6a0, 0xa6e5,
+ 0xa7fb, 0xa801,
+ 0xa803, 0xa805,
+ 0xa807, 0xa80a,
+ 0xa80c, 0xa822,
+ 0xa840, 0xa873,
+ 0xa882, 0xa8b3,
+ 0xa8f2, 0xa8f7,
+ 0xa8fb, 0xa8fb,
+ 0xa90a, 0xa925,
+ 0xa930, 0xa946,
+ 0xa960, 0xa97c,
+ 0xa984, 0xa9b2,
+ 0xaa00, 0xaa28,
+ 0xaa40, 0xaa42,
+ 0xaa44, 0xaa4b,
+ 0xaa60, 0xaa6f,
+ 0xaa71, 0xaa76,
+ 0xaa7a, 0xaa7a,
+ 0xaa80, 0xaaaf,
+ 0xaab1, 0xaab1,
+ 0xaab5, 0xaab6,
+ 0xaab9, 0xaabd,
+ 0xaac0, 0xaac0,
+ 0xaac2, 0xaac2,
+ 0xaadb, 0xaadc,
+ 0xabc0, 0xabe2,
+ 0xac00, 0xd7a3,
+ 0xd7b0, 0xd7c6,
+ 0xd7cb, 0xd7fb,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6d,
+ 0xfa70, 0xfad9,
+ 0xfb1d, 0xfb1d,
+ 0xfb1f, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff66, 0xff6f,
+ 0xff71, 0xff9d,
+ 0xffa0, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10280, 0x1029c,
+ 0x102a0, 0x102d0,
+ 0x10300, 0x1031e,
+ 0x10330, 0x10340,
+ 0x10342, 0x10349,
+ 0x10380, 0x1039d,
+ 0x103a0, 0x103c3,
+ 0x103c8, 0x103cf,
+ 0x10450, 0x1049d,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x10855,
+ 0x10900, 0x10915,
+ 0x10920, 0x10939,
+ 0x10a00, 0x10a00,
+ 0x10a10, 0x10a13,
+ 0x10a15, 0x10a17,
+ 0x10a19, 0x10a33,
+ 0x10a60, 0x10a7c,
+ 0x10b00, 0x10b35,
+ 0x10b40, 0x10b55,
+ 0x10b60, 0x10b72,
+ 0x10c00, 0x10c48,
+ 0x11083, 0x110af,
+ 0x12000, 0x1236e,
+ 0x13000, 0x1342e,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
+ 0x2f800, 0x2fa1d,
+}; /* CR_Lo */
+
+/* 'Lt': General Category */
+static const OnigCodePoint CR_Lt[] = {
+ 10,
+ 0x01c5, 0x01c5,
+ 0x01c8, 0x01c8,
+ 0x01cb, 0x01cb,
+ 0x01f2, 0x01f2,
+ 0x1f88, 0x1f8f,
+ 0x1f98, 0x1f9f,
+ 0x1fa8, 0x1faf,
+ 0x1fbc, 0x1fbc,
+ 0x1fcc, 0x1fcc,
+ 0x1ffc, 0x1ffc,
+}; /* CR_Lt */
+
+/* 'Lu': General Category */
+static const OnigCodePoint CR_Lu[] = {
+ 594,
+ 0x0041, 0x005a,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00de,
+ 0x0100, 0x0100,
+ 0x0102, 0x0102,
+ 0x0104, 0x0104,
+ 0x0106, 0x0106,
+ 0x0108, 0x0108,
+ 0x010a, 0x010a,
+ 0x010c, 0x010c,
+ 0x010e, 0x010e,
+ 0x0110, 0x0110,
+ 0x0112, 0x0112,
+ 0x0114, 0x0114,
+ 0x0116, 0x0116,
+ 0x0118, 0x0118,
+ 0x011a, 0x011a,
+ 0x011c, 0x011c,
+ 0x011e, 0x011e,
+ 0x0120, 0x0120,
+ 0x0122, 0x0122,
+ 0x0124, 0x0124,
+ 0x0126, 0x0126,
+ 0x0128, 0x0128,
+ 0x012a, 0x012a,
+ 0x012c, 0x012c,
+ 0x012e, 0x012e,
+ 0x0130, 0x0130,
+ 0x0132, 0x0132,
+ 0x0134, 0x0134,
+ 0x0136, 0x0136,
+ 0x0139, 0x0139,
+ 0x013b, 0x013b,
+ 0x013d, 0x013d,
+ 0x013f, 0x013f,
+ 0x0141, 0x0141,
+ 0x0143, 0x0143,
+ 0x0145, 0x0145,
+ 0x0147, 0x0147,
+ 0x014a, 0x014a,
+ 0x014c, 0x014c,
+ 0x014e, 0x014e,
+ 0x0150, 0x0150,
+ 0x0152, 0x0152,
+ 0x0154, 0x0154,
+ 0x0156, 0x0156,
+ 0x0158, 0x0158,
+ 0x015a, 0x015a,
+ 0x015c, 0x015c,
+ 0x015e, 0x015e,
+ 0x0160, 0x0160,
+ 0x0162, 0x0162,
+ 0x0164, 0x0164,
+ 0x0166, 0x0166,
+ 0x0168, 0x0168,
+ 0x016a, 0x016a,
+ 0x016c, 0x016c,
+ 0x016e, 0x016e,
+ 0x0170, 0x0170,
+ 0x0172, 0x0172,
+ 0x0174, 0x0174,
+ 0x0176, 0x0176,
+ 0x0178, 0x0179,
+ 0x017b, 0x017b,
+ 0x017d, 0x017d,
+ 0x0181, 0x0182,
+ 0x0184, 0x0184,
+ 0x0186, 0x0187,
+ 0x0189, 0x018b,
+ 0x018e, 0x0191,
+ 0x0193, 0x0194,
+ 0x0196, 0x0198,
+ 0x019c, 0x019d,
+ 0x019f, 0x01a0,
+ 0x01a2, 0x01a2,
+ 0x01a4, 0x01a4,
+ 0x01a6, 0x01a7,
+ 0x01a9, 0x01a9,
+ 0x01ac, 0x01ac,
+ 0x01ae, 0x01af,
+ 0x01b1, 0x01b3,
+ 0x01b5, 0x01b5,
+ 0x01b7, 0x01b8,
+ 0x01bc, 0x01bc,
+ 0x01c4, 0x01c4,
+ 0x01c7, 0x01c7,
+ 0x01ca, 0x01ca,
+ 0x01cd, 0x01cd,
+ 0x01cf, 0x01cf,
+ 0x01d1, 0x01d1,
+ 0x01d3, 0x01d3,
+ 0x01d5, 0x01d5,
+ 0x01d7, 0x01d7,
+ 0x01d9, 0x01d9,
+ 0x01db, 0x01db,
+ 0x01de, 0x01de,
+ 0x01e0, 0x01e0,
+ 0x01e2, 0x01e2,
+ 0x01e4, 0x01e4,
+ 0x01e6, 0x01e6,
+ 0x01e8, 0x01e8,
+ 0x01ea, 0x01ea,
+ 0x01ec, 0x01ec,
+ 0x01ee, 0x01ee,
+ 0x01f1, 0x01f1,
+ 0x01f4, 0x01f4,
+ 0x01f6, 0x01f8,
+ 0x01fa, 0x01fa,
+ 0x01fc, 0x01fc,
+ 0x01fe, 0x01fe,
+ 0x0200, 0x0200,
+ 0x0202, 0x0202,
+ 0x0204, 0x0204,
+ 0x0206, 0x0206,
+ 0x0208, 0x0208,
+ 0x020a, 0x020a,
+ 0x020c, 0x020c,
+ 0x020e, 0x020e,
+ 0x0210, 0x0210,
+ 0x0212, 0x0212,
+ 0x0214, 0x0214,
+ 0x0216, 0x0216,
+ 0x0218, 0x0218,
+ 0x021a, 0x021a,
+ 0x021c, 0x021c,
+ 0x021e, 0x021e,
+ 0x0220, 0x0220,
+ 0x0222, 0x0222,
+ 0x0224, 0x0224,
+ 0x0226, 0x0226,
+ 0x0228, 0x0228,
+ 0x022a, 0x022a,
+ 0x022c, 0x022c,
+ 0x022e, 0x022e,
+ 0x0230, 0x0230,
+ 0x0232, 0x0232,
+ 0x023a, 0x023b,
+ 0x023d, 0x023e,
+ 0x0241, 0x0241,
+ 0x0243, 0x0246,
+ 0x0248, 0x0248,
+ 0x024a, 0x024a,
+ 0x024c, 0x024c,
+ 0x024e, 0x024e,
+ 0x0370, 0x0370,
+ 0x0372, 0x0372,
+ 0x0376, 0x0376,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x038f,
+ 0x0391, 0x03a1,
+ 0x03a3, 0x03ab,
+ 0x03cf, 0x03cf,
+ 0x03d2, 0x03d4,
+ 0x03d8, 0x03d8,
+ 0x03da, 0x03da,
+ 0x03dc, 0x03dc,
+ 0x03de, 0x03de,
+ 0x03e0, 0x03e0,
+ 0x03e2, 0x03e2,
+ 0x03e4, 0x03e4,
+ 0x03e6, 0x03e6,
+ 0x03e8, 0x03e8,
+ 0x03ea, 0x03ea,
+ 0x03ec, 0x03ec,
+ 0x03ee, 0x03ee,
+ 0x03f4, 0x03f4,
+ 0x03f7, 0x03f7,
+ 0x03f9, 0x03fa,
+ 0x03fd, 0x042f,
+ 0x0460, 0x0460,
+ 0x0462, 0x0462,
+ 0x0464, 0x0464,
+ 0x0466, 0x0466,
+ 0x0468, 0x0468,
+ 0x046a, 0x046a,
+ 0x046c, 0x046c,
+ 0x046e, 0x046e,
+ 0x0470, 0x0470,
+ 0x0472, 0x0472,
+ 0x0474, 0x0474,
+ 0x0476, 0x0476,
+ 0x0478, 0x0478,
+ 0x047a, 0x047a,
+ 0x047c, 0x047c,
+ 0x047e, 0x047e,
+ 0x0480, 0x0480,
+ 0x048a, 0x048a,
+ 0x048c, 0x048c,
+ 0x048e, 0x048e,
+ 0x0490, 0x0490,
+ 0x0492, 0x0492,
+ 0x0494, 0x0494,
+ 0x0496, 0x0496,
+ 0x0498, 0x0498,
+ 0x049a, 0x049a,
+ 0x049c, 0x049c,
+ 0x049e, 0x049e,
+ 0x04a0, 0x04a0,
+ 0x04a2, 0x04a2,
+ 0x04a4, 0x04a4,
+ 0x04a6, 0x04a6,
+ 0x04a8, 0x04a8,
+ 0x04aa, 0x04aa,
+ 0x04ac, 0x04ac,
+ 0x04ae, 0x04ae,
+ 0x04b0, 0x04b0,
+ 0x04b2, 0x04b2,
+ 0x04b4, 0x04b4,
+ 0x04b6, 0x04b6,
+ 0x04b8, 0x04b8,
+ 0x04ba, 0x04ba,
+ 0x04bc, 0x04bc,
+ 0x04be, 0x04be,
+ 0x04c0, 0x04c1,
+ 0x04c3, 0x04c3,
+ 0x04c5, 0x04c5,
+ 0x04c7, 0x04c7,
+ 0x04c9, 0x04c9,
+ 0x04cb, 0x04cb,
+ 0x04cd, 0x04cd,
+ 0x04d0, 0x04d0,
+ 0x04d2, 0x04d2,
+ 0x04d4, 0x04d4,
+ 0x04d6, 0x04d6,
+ 0x04d8, 0x04d8,
+ 0x04da, 0x04da,
+ 0x04dc, 0x04dc,
+ 0x04de, 0x04de,
+ 0x04e0, 0x04e0,
+ 0x04e2, 0x04e2,
+ 0x04e4, 0x04e4,
+ 0x04e6, 0x04e6,
+ 0x04e8, 0x04e8,
+ 0x04ea, 0x04ea,
+ 0x04ec, 0x04ec,
+ 0x04ee, 0x04ee,
+ 0x04f0, 0x04f0,
+ 0x04f2, 0x04f2,
+ 0x04f4, 0x04f4,
+ 0x04f6, 0x04f6,
+ 0x04f8, 0x04f8,
+ 0x04fa, 0x04fa,
+ 0x04fc, 0x04fc,
+ 0x04fe, 0x04fe,
+ 0x0500, 0x0500,
+ 0x0502, 0x0502,
+ 0x0504, 0x0504,
+ 0x0506, 0x0506,
+ 0x0508, 0x0508,
+ 0x050a, 0x050a,
+ 0x050c, 0x050c,
+ 0x050e, 0x050e,
+ 0x0510, 0x0510,
+ 0x0512, 0x0512,
+ 0x0514, 0x0514,
+ 0x0516, 0x0516,
+ 0x0518, 0x0518,
+ 0x051a, 0x051a,
+ 0x051c, 0x051c,
+ 0x051e, 0x051e,
+ 0x0520, 0x0520,
+ 0x0522, 0x0522,
+ 0x0524, 0x0524,
+ 0x0531, 0x0556,
+ 0x10a0, 0x10c5,
+ 0x1e00, 0x1e00,
+ 0x1e02, 0x1e02,
+ 0x1e04, 0x1e04,
+ 0x1e06, 0x1e06,
+ 0x1e08, 0x1e08,
+ 0x1e0a, 0x1e0a,
+ 0x1e0c, 0x1e0c,
+ 0x1e0e, 0x1e0e,
+ 0x1e10, 0x1e10,
+ 0x1e12, 0x1e12,
+ 0x1e14, 0x1e14,
+ 0x1e16, 0x1e16,
+ 0x1e18, 0x1e18,
+ 0x1e1a, 0x1e1a,
+ 0x1e1c, 0x1e1c,
+ 0x1e1e, 0x1e1e,
+ 0x1e20, 0x1e20,
+ 0x1e22, 0x1e22,
+ 0x1e24, 0x1e24,
+ 0x1e26, 0x1e26,
+ 0x1e28, 0x1e28,
+ 0x1e2a, 0x1e2a,
+ 0x1e2c, 0x1e2c,
+ 0x1e2e, 0x1e2e,
+ 0x1e30, 0x1e30,
+ 0x1e32, 0x1e32,
+ 0x1e34, 0x1e34,
+ 0x1e36, 0x1e36,
+ 0x1e38, 0x1e38,
+ 0x1e3a, 0x1e3a,
+ 0x1e3c, 0x1e3c,
+ 0x1e3e, 0x1e3e,
+ 0x1e40, 0x1e40,
+ 0x1e42, 0x1e42,
+ 0x1e44, 0x1e44,
+ 0x1e46, 0x1e46,
+ 0x1e48, 0x1e48,
+ 0x1e4a, 0x1e4a,
+ 0x1e4c, 0x1e4c,
+ 0x1e4e, 0x1e4e,
+ 0x1e50, 0x1e50,
+ 0x1e52, 0x1e52,
+ 0x1e54, 0x1e54,
+ 0x1e56, 0x1e56,
+ 0x1e58, 0x1e58,
+ 0x1e5a, 0x1e5a,
+ 0x1e5c, 0x1e5c,
+ 0x1e5e, 0x1e5e,
+ 0x1e60, 0x1e60,
+ 0x1e62, 0x1e62,
+ 0x1e64, 0x1e64,
+ 0x1e66, 0x1e66,
+ 0x1e68, 0x1e68,
+ 0x1e6a, 0x1e6a,
+ 0x1e6c, 0x1e6c,
+ 0x1e6e, 0x1e6e,
+ 0x1e70, 0x1e70,
+ 0x1e72, 0x1e72,
+ 0x1e74, 0x1e74,
+ 0x1e76, 0x1e76,
+ 0x1e78, 0x1e78,
+ 0x1e7a, 0x1e7a,
+ 0x1e7c, 0x1e7c,
+ 0x1e7e, 0x1e7e,
+ 0x1e80, 0x1e80,
+ 0x1e82, 0x1e82,
+ 0x1e84, 0x1e84,
+ 0x1e86, 0x1e86,
+ 0x1e88, 0x1e88,
+ 0x1e8a, 0x1e8a,
+ 0x1e8c, 0x1e8c,
+ 0x1e8e, 0x1e8e,
+ 0x1e90, 0x1e90,
+ 0x1e92, 0x1e92,
+ 0x1e94, 0x1e94,
+ 0x1e9e, 0x1e9e,
+ 0x1ea0, 0x1ea0,
+ 0x1ea2, 0x1ea2,
+ 0x1ea4, 0x1ea4,
+ 0x1ea6, 0x1ea6,
+ 0x1ea8, 0x1ea8,
+ 0x1eaa, 0x1eaa,
+ 0x1eac, 0x1eac,
+ 0x1eae, 0x1eae,
+ 0x1eb0, 0x1eb0,
+ 0x1eb2, 0x1eb2,
+ 0x1eb4, 0x1eb4,
+ 0x1eb6, 0x1eb6,
+ 0x1eb8, 0x1eb8,
+ 0x1eba, 0x1eba,
+ 0x1ebc, 0x1ebc,
+ 0x1ebe, 0x1ebe,
+ 0x1ec0, 0x1ec0,
+ 0x1ec2, 0x1ec2,
+ 0x1ec4, 0x1ec4,
+ 0x1ec6, 0x1ec6,
+ 0x1ec8, 0x1ec8,
+ 0x1eca, 0x1eca,
+ 0x1ecc, 0x1ecc,
+ 0x1ece, 0x1ece,
+ 0x1ed0, 0x1ed0,
+ 0x1ed2, 0x1ed2,
+ 0x1ed4, 0x1ed4,
+ 0x1ed6, 0x1ed6,
+ 0x1ed8, 0x1ed8,
+ 0x1eda, 0x1eda,
+ 0x1edc, 0x1edc,
+ 0x1ede, 0x1ede,
+ 0x1ee0, 0x1ee0,
+ 0x1ee2, 0x1ee2,
+ 0x1ee4, 0x1ee4,
+ 0x1ee6, 0x1ee6,
+ 0x1ee8, 0x1ee8,
+ 0x1eea, 0x1eea,
+ 0x1eec, 0x1eec,
+ 0x1eee, 0x1eee,
+ 0x1ef0, 0x1ef0,
+ 0x1ef2, 0x1ef2,
+ 0x1ef4, 0x1ef4,
+ 0x1ef6, 0x1ef6,
+ 0x1ef8, 0x1ef8,
+ 0x1efa, 0x1efa,
+ 0x1efc, 0x1efc,
+ 0x1efe, 0x1efe,
+ 0x1f08, 0x1f0f,
+ 0x1f18, 0x1f1d,
+ 0x1f28, 0x1f2f,
+ 0x1f38, 0x1f3f,
+ 0x1f48, 0x1f4d,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f5f,
+ 0x1f68, 0x1f6f,
+ 0x1fb8, 0x1fbb,
+ 0x1fc8, 0x1fcb,
+ 0x1fd8, 0x1fdb,
+ 0x1fe8, 0x1fec,
+ 0x1ff8, 0x1ffb,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210b, 0x210d,
+ 0x2110, 0x2112,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x2130, 0x2133,
+ 0x213e, 0x213f,
+ 0x2145, 0x2145,
+ 0x2183, 0x2183,
+ 0x2c00, 0x2c2e,
+ 0x2c60, 0x2c60,
+ 0x2c62, 0x2c64,
+ 0x2c67, 0x2c67,
+ 0x2c69, 0x2c69,
+ 0x2c6b, 0x2c6b,
+ 0x2c6d, 0x2c70,
+ 0x2c72, 0x2c72,
+ 0x2c75, 0x2c75,
+ 0x2c7e, 0x2c80,
+ 0x2c82, 0x2c82,
+ 0x2c84, 0x2c84,
+ 0x2c86, 0x2c86,
+ 0x2c88, 0x2c88,
+ 0x2c8a, 0x2c8a,
+ 0x2c8c, 0x2c8c,
+ 0x2c8e, 0x2c8e,
+ 0x2c90, 0x2c90,
+ 0x2c92, 0x2c92,
+ 0x2c94, 0x2c94,
+ 0x2c96, 0x2c96,
+ 0x2c98, 0x2c98,
+ 0x2c9a, 0x2c9a,
+ 0x2c9c, 0x2c9c,
+ 0x2c9e, 0x2c9e,
+ 0x2ca0, 0x2ca0,
+ 0x2ca2, 0x2ca2,
+ 0x2ca4, 0x2ca4,
+ 0x2ca6, 0x2ca6,
+ 0x2ca8, 0x2ca8,
+ 0x2caa, 0x2caa,
+ 0x2cac, 0x2cac,
+ 0x2cae, 0x2cae,
+ 0x2cb0, 0x2cb0,
+ 0x2cb2, 0x2cb2,
+ 0x2cb4, 0x2cb4,
+ 0x2cb6, 0x2cb6,
+ 0x2cb8, 0x2cb8,
+ 0x2cba, 0x2cba,
+ 0x2cbc, 0x2cbc,
+ 0x2cbe, 0x2cbe,
+ 0x2cc0, 0x2cc0,
+ 0x2cc2, 0x2cc2,
+ 0x2cc4, 0x2cc4,
+ 0x2cc6, 0x2cc6,
+ 0x2cc8, 0x2cc8,
+ 0x2cca, 0x2cca,
+ 0x2ccc, 0x2ccc,
+ 0x2cce, 0x2cce,
+ 0x2cd0, 0x2cd0,
+ 0x2cd2, 0x2cd2,
+ 0x2cd4, 0x2cd4,
+ 0x2cd6, 0x2cd6,
+ 0x2cd8, 0x2cd8,
+ 0x2cda, 0x2cda,
+ 0x2cdc, 0x2cdc,
+ 0x2cde, 0x2cde,
+ 0x2ce0, 0x2ce0,
+ 0x2ce2, 0x2ce2,
+ 0x2ceb, 0x2ceb,
+ 0x2ced, 0x2ced,
+ 0xa640, 0xa640,
+ 0xa642, 0xa642,
+ 0xa644, 0xa644,
+ 0xa646, 0xa646,
+ 0xa648, 0xa648,
+ 0xa64a, 0xa64a,
+ 0xa64c, 0xa64c,
+ 0xa64e, 0xa64e,
+ 0xa650, 0xa650,
+ 0xa652, 0xa652,
+ 0xa654, 0xa654,
+ 0xa656, 0xa656,
+ 0xa658, 0xa658,
+ 0xa65a, 0xa65a,
+ 0xa65c, 0xa65c,
+ 0xa65e, 0xa65e,
+ 0xa662, 0xa662,
+ 0xa664, 0xa664,
+ 0xa666, 0xa666,
+ 0xa668, 0xa668,
+ 0xa66a, 0xa66a,
+ 0xa66c, 0xa66c,
+ 0xa680, 0xa680,
+ 0xa682, 0xa682,
+ 0xa684, 0xa684,
+ 0xa686, 0xa686,
+ 0xa688, 0xa688,
+ 0xa68a, 0xa68a,
+ 0xa68c, 0xa68c,
+ 0xa68e, 0xa68e,
+ 0xa690, 0xa690,
+ 0xa692, 0xa692,
+ 0xa694, 0xa694,
+ 0xa696, 0xa696,
+ 0xa722, 0xa722,
+ 0xa724, 0xa724,
+ 0xa726, 0xa726,
+ 0xa728, 0xa728,
+ 0xa72a, 0xa72a,
+ 0xa72c, 0xa72c,
+ 0xa72e, 0xa72e,
+ 0xa732, 0xa732,
+ 0xa734, 0xa734,
+ 0xa736, 0xa736,
+ 0xa738, 0xa738,
+ 0xa73a, 0xa73a,
+ 0xa73c, 0xa73c,
+ 0xa73e, 0xa73e,
+ 0xa740, 0xa740,
+ 0xa742, 0xa742,
+ 0xa744, 0xa744,
+ 0xa746, 0xa746,
+ 0xa748, 0xa748,
+ 0xa74a, 0xa74a,
+ 0xa74c, 0xa74c,
+ 0xa74e, 0xa74e,
+ 0xa750, 0xa750,
+ 0xa752, 0xa752,
+ 0xa754, 0xa754,
+ 0xa756, 0xa756,
+ 0xa758, 0xa758,
+ 0xa75a, 0xa75a,
+ 0xa75c, 0xa75c,
+ 0xa75e, 0xa75e,
+ 0xa760, 0xa760,
+ 0xa762, 0xa762,
+ 0xa764, 0xa764,
+ 0xa766, 0xa766,
+ 0xa768, 0xa768,
+ 0xa76a, 0xa76a,
+ 0xa76c, 0xa76c,
+ 0xa76e, 0xa76e,
+ 0xa779, 0xa779,
+ 0xa77b, 0xa77b,
+ 0xa77d, 0xa77e,
+ 0xa780, 0xa780,
+ 0xa782, 0xa782,
+ 0xa784, 0xa784,
+ 0xa786, 0xa786,
+ 0xa78b, 0xa78b,
+ 0xff21, 0xff3a,
+ 0x10400, 0x10427,
+ 0x1d400, 0x1d419,
+ 0x1d434, 0x1d44d,
+ 0x1d468, 0x1d481,
+ 0x1d49c, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b5,
+ 0x1d4d0, 0x1d4e9,
+ 0x1d504, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d538, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d56c, 0x1d585,
+ 0x1d5a0, 0x1d5b9,
+ 0x1d5d4, 0x1d5ed,
+ 0x1d608, 0x1d621,
+ 0x1d63c, 0x1d655,
+ 0x1d670, 0x1d689,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6e2, 0x1d6fa,
+ 0x1d71c, 0x1d734,
+ 0x1d756, 0x1d76e,
+ 0x1d790, 0x1d7a8,
+ 0x1d7ca, 0x1d7ca,
+}; /* CR_Lu */
+
+/* 'M': Major Category */
+static const OnigCodePoint CR_M[] = {
+ 188,
+ 0x0300, 0x036f,
+ 0x0483, 0x0489,
+ 0x0591, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c5,
+ 0x05c7, 0x05c7,
+ 0x0610, 0x061a,
+ 0x064b, 0x065e,
+ 0x0670, 0x0670,
+ 0x06d6, 0x06dc,
+ 0x06de, 0x06e4,
+ 0x06e7, 0x06e8,
+ 0x06ea, 0x06ed,
+ 0x0711, 0x0711,
+ 0x0730, 0x074a,
+ 0x07a6, 0x07b0,
+ 0x07eb, 0x07f3,
+ 0x0816, 0x0819,
+ 0x081b, 0x0823,
+ 0x0825, 0x0827,
+ 0x0829, 0x082d,
+ 0x0900, 0x0903,
+ 0x093c, 0x093c,
+ 0x093e, 0x094e,
+ 0x0951, 0x0955,
+ 0x0962, 0x0963,
+ 0x0981, 0x0983,
+ 0x09bc, 0x09bc,
+ 0x09be, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cd,
+ 0x09d7, 0x09d7,
+ 0x09e2, 0x09e3,
+ 0x0a01, 0x0a03,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a51, 0x0a51,
+ 0x0a70, 0x0a71,
+ 0x0a75, 0x0a75,
+ 0x0a81, 0x0a83,
+ 0x0abc, 0x0abc,
+ 0x0abe, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ae2, 0x0ae3,
+ 0x0b01, 0x0b03,
+ 0x0b3c, 0x0b3c,
+ 0x0b3e, 0x0b44,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b62, 0x0b63,
+ 0x0b82, 0x0b82,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0c01, 0x0c03,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c62, 0x0c63,
+ 0x0c82, 0x0c83,
+ 0x0cbc, 0x0cbc,
+ 0x0cbe, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0ce2, 0x0ce3,
+ 0x0d02, 0x0d03,
+ 0x0d3e, 0x0d44,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d62, 0x0d63,
+ 0x0d82, 0x0d83,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df3,
+ 0x0e31, 0x0e31,
+ 0x0e34, 0x0e3a,
+ 0x0e47, 0x0e4e,
+ 0x0eb1, 0x0eb1,
+ 0x0eb4, 0x0eb9,
+ 0x0ebb, 0x0ebc,
+ 0x0ec8, 0x0ecd,
+ 0x0f18, 0x0f19,
+ 0x0f35, 0x0f35,
+ 0x0f37, 0x0f37,
+ 0x0f39, 0x0f39,
+ 0x0f3e, 0x0f3f,
+ 0x0f71, 0x0f84,
+ 0x0f86, 0x0f87,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fc6, 0x0fc6,
+ 0x102b, 0x103e,
+ 0x1056, 0x1059,
+ 0x105e, 0x1060,
+ 0x1062, 0x1064,
+ 0x1067, 0x106d,
+ 0x1071, 0x1074,
+ 0x1082, 0x108d,
+ 0x108f, 0x108f,
+ 0x109a, 0x109d,
+ 0x135f, 0x135f,
+ 0x1712, 0x1714,
+ 0x1732, 0x1734,
+ 0x1752, 0x1753,
+ 0x1772, 0x1773,
+ 0x17b6, 0x17d3,
+ 0x17dd, 0x17dd,
+ 0x180b, 0x180d,
+ 0x18a9, 0x18a9,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x19b0, 0x19c0,
+ 0x19c8, 0x19c9,
+ 0x1a17, 0x1a1b,
+ 0x1a55, 0x1a5e,
+ 0x1a60, 0x1a7c,
+ 0x1a7f, 0x1a7f,
+ 0x1b00, 0x1b04,
+ 0x1b34, 0x1b44,
+ 0x1b6b, 0x1b73,
+ 0x1b80, 0x1b82,
+ 0x1ba1, 0x1baa,
+ 0x1c24, 0x1c37,
+ 0x1cd0, 0x1cd2,
+ 0x1cd4, 0x1ce8,
+ 0x1ced, 0x1ced,
+ 0x1cf2, 0x1cf2,
+ 0x1dc0, 0x1de6,
+ 0x1dfd, 0x1dff,
+ 0x20d0, 0x20f0,
+ 0x2cef, 0x2cf1,
+ 0x2de0, 0x2dff,
+ 0x302a, 0x302f,
+ 0x3099, 0x309a,
+ 0xa66f, 0xa672,
+ 0xa67c, 0xa67d,
+ 0xa6f0, 0xa6f1,
+ 0xa802, 0xa802,
+ 0xa806, 0xa806,
+ 0xa80b, 0xa80b,
+ 0xa823, 0xa827,
+ 0xa880, 0xa881,
+ 0xa8b4, 0xa8c4,
+ 0xa8e0, 0xa8f1,
+ 0xa926, 0xa92d,
+ 0xa947, 0xa953,
+ 0xa980, 0xa983,
+ 0xa9b3, 0xa9c0,
+ 0xaa29, 0xaa36,
+ 0xaa43, 0xaa43,
+ 0xaa4c, 0xaa4d,
+ 0xaa7b, 0xaa7b,
+ 0xaab0, 0xaab0,
+ 0xaab2, 0xaab4,
+ 0xaab7, 0xaab8,
+ 0xaabe, 0xaabf,
+ 0xaac1, 0xaac1,
+ 0xabe3, 0xabea,
+ 0xabec, 0xabed,
+ 0xfb1e, 0xfb1e,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe26,
+ 0x101fd, 0x101fd,
+ 0x10a01, 0x10a03,
+ 0x10a05, 0x10a06,
+ 0x10a0c, 0x10a0f,
+ 0x10a38, 0x10a3a,
+ 0x10a3f, 0x10a3f,
+ 0x11080, 0x11082,
+ 0x110b0, 0x110ba,
+ 0x1d165, 0x1d169,
+ 0x1d16d, 0x1d172,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0x1d242, 0x1d244,
+ 0xe0100, 0xe01ef,
+}; /* CR_M */
+
+/* 'Mc': General Category */
+static const OnigCodePoint CR_Mc[] = {
+ 106,
+ 0x0903, 0x0903,
+ 0x093e, 0x0940,
+ 0x0949, 0x094c,
+ 0x094e, 0x094e,
+ 0x0982, 0x0983,
+ 0x09be, 0x09c0,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cc,
+ 0x09d7, 0x09d7,
+ 0x0a03, 0x0a03,
+ 0x0a3e, 0x0a40,
+ 0x0a83, 0x0a83,
+ 0x0abe, 0x0ac0,
+ 0x0ac9, 0x0ac9,
+ 0x0acb, 0x0acc,
+ 0x0b02, 0x0b03,
+ 0x0b3e, 0x0b3e,
+ 0x0b40, 0x0b40,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4c,
+ 0x0b57, 0x0b57,
+ 0x0bbe, 0x0bbf,
+ 0x0bc1, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcc,
+ 0x0bd7, 0x0bd7,
+ 0x0c01, 0x0c03,
+ 0x0c41, 0x0c44,
+ 0x0c82, 0x0c83,
+ 0x0cbe, 0x0cbe,
+ 0x0cc0, 0x0cc4,
+ 0x0cc7, 0x0cc8,
+ 0x0cca, 0x0ccb,
+ 0x0cd5, 0x0cd6,
+ 0x0d02, 0x0d03,
+ 0x0d3e, 0x0d40,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4c,
+ 0x0d57, 0x0d57,
+ 0x0d82, 0x0d83,
+ 0x0dcf, 0x0dd1,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df3,
+ 0x0f3e, 0x0f3f,
+ 0x0f7f, 0x0f7f,
+ 0x102b, 0x102c,
+ 0x1031, 0x1031,
+ 0x1038, 0x1038,
+ 0x103b, 0x103c,
+ 0x1056, 0x1057,
+ 0x1062, 0x1064,
+ 0x1067, 0x106d,
+ 0x1083, 0x1084,
+ 0x1087, 0x108c,
+ 0x108f, 0x108f,
+ 0x109a, 0x109c,
+ 0x17b6, 0x17b6,
+ 0x17be, 0x17c5,
+ 0x17c7, 0x17c8,
+ 0x1923, 0x1926,
+ 0x1929, 0x192b,
+ 0x1930, 0x1931,
+ 0x1933, 0x1938,
+ 0x19b0, 0x19c0,
+ 0x19c8, 0x19c9,
+ 0x1a19, 0x1a1b,
+ 0x1a55, 0x1a55,
+ 0x1a57, 0x1a57,
+ 0x1a61, 0x1a61,
+ 0x1a63, 0x1a64,
+ 0x1a6d, 0x1a72,
+ 0x1b04, 0x1b04,
+ 0x1b35, 0x1b35,
+ 0x1b3b, 0x1b3b,
+ 0x1b3d, 0x1b41,
+ 0x1b43, 0x1b44,
+ 0x1b82, 0x1b82,
+ 0x1ba1, 0x1ba1,
+ 0x1ba6, 0x1ba7,
+ 0x1baa, 0x1baa,
+ 0x1c24, 0x1c2b,
+ 0x1c34, 0x1c35,
+ 0x1ce1, 0x1ce1,
+ 0x1cf2, 0x1cf2,
+ 0xa823, 0xa824,
+ 0xa827, 0xa827,
+ 0xa880, 0xa881,
+ 0xa8b4, 0xa8c3,
+ 0xa952, 0xa953,
+ 0xa983, 0xa983,
+ 0xa9b4, 0xa9b5,
+ 0xa9ba, 0xa9bb,
+ 0xa9bd, 0xa9c0,
+ 0xaa2f, 0xaa30,
+ 0xaa33, 0xaa34,
+ 0xaa4d, 0xaa4d,
+ 0xaa7b, 0xaa7b,
+ 0xabe3, 0xabe4,
+ 0xabe6, 0xabe7,
+ 0xabe9, 0xabea,
+ 0xabec, 0xabec,
+ 0x11082, 0x11082,
+ 0x110b0, 0x110b2,
+ 0x110b7, 0x110b8,
+ 0x1d165, 0x1d166,
+ 0x1d16d, 0x1d172,
+}; /* CR_Mc */
+
+/* 'Me': General Category */
+static const OnigCodePoint CR_Me[] = {
+ 5,
+ 0x0488, 0x0489,
+ 0x06de, 0x06de,
+ 0x20dd, 0x20e0,
+ 0x20e2, 0x20e4,
+ 0xa670, 0xa672,
+}; /* CR_Me */
+
+/* 'Mn': General Category */
+static const OnigCodePoint CR_Mn[] = {
+ 194,
+ 0x0300, 0x036f,
+ 0x0483, 0x0487,
+ 0x0591, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c5,
+ 0x05c7, 0x05c7,
+ 0x0610, 0x061a,
+ 0x064b, 0x065e,
+ 0x0670, 0x0670,
+ 0x06d6, 0x06dc,
+ 0x06df, 0x06e4,
+ 0x06e7, 0x06e8,
+ 0x06ea, 0x06ed,
+ 0x0711, 0x0711,
+ 0x0730, 0x074a,
+ 0x07a6, 0x07b0,
+ 0x07eb, 0x07f3,
+ 0x0816, 0x0819,
+ 0x081b, 0x0823,
+ 0x0825, 0x0827,
+ 0x0829, 0x082d,
+ 0x0900, 0x0902,
+ 0x093c, 0x093c,
+ 0x0941, 0x0948,
+ 0x094d, 0x094d,
+ 0x0951, 0x0955,
+ 0x0962, 0x0963,
+ 0x0981, 0x0981,
+ 0x09bc, 0x09bc,
+ 0x09c1, 0x09c4,
+ 0x09cd, 0x09cd,
+ 0x09e2, 0x09e3,
+ 0x0a01, 0x0a02,
+ 0x0a3c, 0x0a3c,
+ 0x0a41, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a51, 0x0a51,
+ 0x0a70, 0x0a71,
+ 0x0a75, 0x0a75,
+ 0x0a81, 0x0a82,
+ 0x0abc, 0x0abc,
+ 0x0ac1, 0x0ac5,
+ 0x0ac7, 0x0ac8,
+ 0x0acd, 0x0acd,
+ 0x0ae2, 0x0ae3,
+ 0x0b01, 0x0b01,
+ 0x0b3c, 0x0b3c,
+ 0x0b3f, 0x0b3f,
+ 0x0b41, 0x0b44,
+ 0x0b4d, 0x0b4d,
+ 0x0b56, 0x0b56,
+ 0x0b62, 0x0b63,
+ 0x0b82, 0x0b82,
+ 0x0bc0, 0x0bc0,
+ 0x0bcd, 0x0bcd,
+ 0x0c3e, 0x0c40,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c62, 0x0c63,
+ 0x0cbc, 0x0cbc,
+ 0x0cbf, 0x0cbf,
+ 0x0cc6, 0x0cc6,
+ 0x0ccc, 0x0ccd,
+ 0x0ce2, 0x0ce3,
+ 0x0d41, 0x0d44,
+ 0x0d4d, 0x0d4d,
+ 0x0d62, 0x0d63,
+ 0x0dca, 0x0dca,
+ 0x0dd2, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0e31, 0x0e31,
+ 0x0e34, 0x0e3a,
+ 0x0e47, 0x0e4e,
+ 0x0eb1, 0x0eb1,
+ 0x0eb4, 0x0eb9,
+ 0x0ebb, 0x0ebc,
+ 0x0ec8, 0x0ecd,
+ 0x0f18, 0x0f19,
+ 0x0f35, 0x0f35,
+ 0x0f37, 0x0f37,
+ 0x0f39, 0x0f39,
+ 0x0f71, 0x0f7e,
+ 0x0f80, 0x0f84,
+ 0x0f86, 0x0f87,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fc6, 0x0fc6,
+ 0x102d, 0x1030,
+ 0x1032, 0x1037,
+ 0x1039, 0x103a,
+ 0x103d, 0x103e,
+ 0x1058, 0x1059,
+ 0x105e, 0x1060,
+ 0x1071, 0x1074,
+ 0x1082, 0x1082,
+ 0x1085, 0x1086,
+ 0x108d, 0x108d,
+ 0x109d, 0x109d,
+ 0x135f, 0x135f,
+ 0x1712, 0x1714,
+ 0x1732, 0x1734,
+ 0x1752, 0x1753,
+ 0x1772, 0x1773,
+ 0x17b7, 0x17bd,
+ 0x17c6, 0x17c6,
+ 0x17c9, 0x17d3,
+ 0x17dd, 0x17dd,
+ 0x180b, 0x180d,
+ 0x18a9, 0x18a9,
+ 0x1920, 0x1922,
+ 0x1927, 0x1928,
+ 0x1932, 0x1932,
+ 0x1939, 0x193b,
+ 0x1a17, 0x1a18,
+ 0x1a56, 0x1a56,
+ 0x1a58, 0x1a5e,
+ 0x1a60, 0x1a60,
+ 0x1a62, 0x1a62,
+ 0x1a65, 0x1a6c,
+ 0x1a73, 0x1a7c,
+ 0x1a7f, 0x1a7f,
+ 0x1b00, 0x1b03,
+ 0x1b34, 0x1b34,
+ 0x1b36, 0x1b3a,
+ 0x1b3c, 0x1b3c,
+ 0x1b42, 0x1b42,
+ 0x1b6b, 0x1b73,
+ 0x1b80, 0x1b81,
+ 0x1ba2, 0x1ba5,
+ 0x1ba8, 0x1ba9,
+ 0x1c2c, 0x1c33,
+ 0x1c36, 0x1c37,
+ 0x1cd0, 0x1cd2,
+ 0x1cd4, 0x1ce0,
+ 0x1ce2, 0x1ce8,
+ 0x1ced, 0x1ced,
+ 0x1dc0, 0x1de6,
+ 0x1dfd, 0x1dff,
+ 0x20d0, 0x20dc,
+ 0x20e1, 0x20e1,
+ 0x20e5, 0x20f0,
+ 0x2cef, 0x2cf1,
+ 0x2de0, 0x2dff,
+ 0x302a, 0x302f,
+ 0x3099, 0x309a,
+ 0xa66f, 0xa66f,
+ 0xa67c, 0xa67d,
+ 0xa6f0, 0xa6f1,
+ 0xa802, 0xa802,
+ 0xa806, 0xa806,
+ 0xa80b, 0xa80b,
+ 0xa825, 0xa826,
+ 0xa8c4, 0xa8c4,
+ 0xa8e0, 0xa8f1,
+ 0xa926, 0xa92d,
+ 0xa947, 0xa951,
+ 0xa980, 0xa982,
+ 0xa9b3, 0xa9b3,
+ 0xa9b6, 0xa9b9,
+ 0xa9bc, 0xa9bc,
+ 0xaa29, 0xaa2e,
+ 0xaa31, 0xaa32,
+ 0xaa35, 0xaa36,
+ 0xaa43, 0xaa43,
+ 0xaa4c, 0xaa4c,
+ 0xaab0, 0xaab0,
+ 0xaab2, 0xaab4,
+ 0xaab7, 0xaab8,
+ 0xaabe, 0xaabf,
+ 0xaac1, 0xaac1,
+ 0xabe5, 0xabe5,
+ 0xabe8, 0xabe8,
+ 0xabed, 0xabed,
+ 0xfb1e, 0xfb1e,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe26,
+ 0x101fd, 0x101fd,
+ 0x10a01, 0x10a03,
+ 0x10a05, 0x10a06,
+ 0x10a0c, 0x10a0f,
+ 0x10a38, 0x10a3a,
+ 0x10a3f, 0x10a3f,
+ 0x11080, 0x11081,
+ 0x110b3, 0x110b6,
+ 0x110b9, 0x110ba,
+ 0x1d167, 0x1d169,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0x1d242, 0x1d244,
+ 0xe0100, 0xe01ef,
+}; /* CR_Mn */
+
+/* 'N': Major Category */
+static const OnigCodePoint CR_N[] = {
+ 81,
+ 0x0030, 0x0039,
+ 0x00b2, 0x00b3,
+ 0x00b9, 0x00b9,
+ 0x00bc, 0x00be,
+ 0x0660, 0x0669,
+ 0x06f0, 0x06f9,
+ 0x07c0, 0x07c9,
+ 0x0966, 0x096f,
+ 0x09e6, 0x09ef,
+ 0x09f4, 0x09f9,
+ 0x0a66, 0x0a6f,
+ 0x0ae6, 0x0aef,
+ 0x0b66, 0x0b6f,
+ 0x0be6, 0x0bf2,
+ 0x0c66, 0x0c6f,
+ 0x0c78, 0x0c7e,
+ 0x0ce6, 0x0cef,
+ 0x0d66, 0x0d75,
+ 0x0e50, 0x0e59,
+ 0x0ed0, 0x0ed9,
+ 0x0f20, 0x0f33,
+ 0x1040, 0x1049,
+ 0x1090, 0x1099,
+ 0x1369, 0x137c,
+ 0x16ee, 0x16f0,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x1810, 0x1819,
+ 0x1946, 0x194f,
+ 0x19d0, 0x19da,
+ 0x1a80, 0x1a89,
+ 0x1a90, 0x1a99,
+ 0x1b50, 0x1b59,
+ 0x1bb0, 0x1bb9,
+ 0x1c40, 0x1c49,
+ 0x1c50, 0x1c59,
+ 0x2070, 0x2070,
+ 0x2074, 0x2079,
+ 0x2080, 0x2089,
+ 0x2150, 0x2182,
+ 0x2185, 0x2189,
+ 0x2460, 0x249b,
+ 0x24ea, 0x24ff,
+ 0x2776, 0x2793,
+ 0x2cfd, 0x2cfd,
+ 0x3007, 0x3007,
+ 0x3021, 0x3029,
+ 0x3038, 0x303a,
+ 0x3192, 0x3195,
+ 0x3220, 0x3229,
+ 0x3251, 0x325f,
+ 0x3280, 0x3289,
+ 0x32b1, 0x32bf,
+ 0xa620, 0xa629,
+ 0xa6e6, 0xa6ef,
+ 0xa830, 0xa835,
+ 0xa8d0, 0xa8d9,
+ 0xa900, 0xa909,
+ 0xa9d0, 0xa9d9,
+ 0xaa50, 0xaa59,
+ 0xabf0, 0xabf9,
+ 0xff10, 0xff19,
+ 0x10107, 0x10133,
+ 0x10140, 0x10178,
+ 0x1018a, 0x1018a,
+ 0x10320, 0x10323,
+ 0x10341, 0x10341,
+ 0x1034a, 0x1034a,
+ 0x103d1, 0x103d5,
+ 0x104a0, 0x104a9,
+ 0x10858, 0x1085f,
+ 0x10916, 0x1091b,
+ 0x10a40, 0x10a47,
+ 0x10a7d, 0x10a7e,
+ 0x10b58, 0x10b5f,
+ 0x10b78, 0x10b7f,
+ 0x10e60, 0x10e7e,
+ 0x12400, 0x12462,
+ 0x1d360, 0x1d371,
+ 0x1d7ce, 0x1d7ff,
+ 0x1f100, 0x1f10a,
+}; /* CR_N */
+
+/* 'Nd': General Category */
+static const OnigCodePoint CR_Nd[] = {
+ 37,
+ 0x0030, 0x0039,
+ 0x0660, 0x0669,
+ 0x06f0, 0x06f9,
+ 0x07c0, 0x07c9,
+ 0x0966, 0x096f,
+ 0x09e6, 0x09ef,
+ 0x0a66, 0x0a6f,
+ 0x0ae6, 0x0aef,
+ 0x0b66, 0x0b6f,
+ 0x0be6, 0x0bef,
+ 0x0c66, 0x0c6f,
+ 0x0ce6, 0x0cef,
+ 0x0d66, 0x0d6f,
+ 0x0e50, 0x0e59,
+ 0x0ed0, 0x0ed9,
+ 0x0f20, 0x0f29,
+ 0x1040, 0x1049,
+ 0x1090, 0x1099,
+ 0x17e0, 0x17e9,
+ 0x1810, 0x1819,
+ 0x1946, 0x194f,
+ 0x19d0, 0x19da,
+ 0x1a80, 0x1a89,
+ 0x1a90, 0x1a99,
+ 0x1b50, 0x1b59,
+ 0x1bb0, 0x1bb9,
+ 0x1c40, 0x1c49,
+ 0x1c50, 0x1c59,
+ 0xa620, 0xa629,
+ 0xa8d0, 0xa8d9,
+ 0xa900, 0xa909,
+ 0xa9d0, 0xa9d9,
+ 0xaa50, 0xaa59,
+ 0xabf0, 0xabf9,
+ 0xff10, 0xff19,
+ 0x104a0, 0x104a9,
+ 0x1d7ce, 0x1d7ff,
+}; /* CR_Nd */
+
+/* 'Nl': General Category */
+static const OnigCodePoint CR_Nl[] = {
+ 12,
+ 0x16ee, 0x16f0,
+ 0x2160, 0x2182,
+ 0x2185, 0x2188,
+ 0x3007, 0x3007,
+ 0x3021, 0x3029,
+ 0x3038, 0x303a,
+ 0xa6e6, 0xa6ef,
+ 0x10140, 0x10174,
+ 0x10341, 0x10341,
+ 0x1034a, 0x1034a,
+ 0x103d1, 0x103d5,
+ 0x12400, 0x12462,
+}; /* CR_Nl */
+
+/* 'No': General Category */
+static const OnigCodePoint CR_No[] = {
+ 38,
+ 0x00b2, 0x00b3,
+ 0x00b9, 0x00b9,
+ 0x00bc, 0x00be,
+ 0x09f4, 0x09f9,
+ 0x0bf0, 0x0bf2,
+ 0x0c78, 0x0c7e,
+ 0x0d70, 0x0d75,
+ 0x0f2a, 0x0f33,
+ 0x1369, 0x137c,
+ 0x17f0, 0x17f9,
+ 0x2070, 0x2070,
+ 0x2074, 0x2079,
+ 0x2080, 0x2089,
+ 0x2150, 0x215f,
+ 0x2189, 0x2189,
+ 0x2460, 0x249b,
+ 0x24ea, 0x24ff,
+ 0x2776, 0x2793,
+ 0x2cfd, 0x2cfd,
+ 0x3192, 0x3195,
+ 0x3220, 0x3229,
+ 0x3251, 0x325f,
+ 0x3280, 0x3289,
+ 0x32b1, 0x32bf,
+ 0xa830, 0xa835,
+ 0x10107, 0x10133,
+ 0x10175, 0x10178,
+ 0x1018a, 0x1018a,
+ 0x10320, 0x10323,
+ 0x10858, 0x1085f,
+ 0x10916, 0x1091b,
+ 0x10a40, 0x10a47,
+ 0x10a7d, 0x10a7e,
+ 0x10b58, 0x10b5f,
+ 0x10b78, 0x10b7f,
+ 0x10e60, 0x10e7e,
+ 0x1d360, 0x1d371,
+ 0x1f100, 0x1f10a,
+}; /* CR_No */
+
+/* 'P': Major Category */
+static const OnigCodePoint CR_P[] = {
+ 129,
+ 0x0021, 0x0023,
+ 0x0025, 0x002a,
+ 0x002c, 0x002f,
+ 0x003a, 0x003b,
+ 0x003f, 0x0040,
+ 0x005b, 0x005d,
+ 0x005f, 0x005f,
+ 0x007b, 0x007b,
+ 0x007d, 0x007d,
+ 0x00a1, 0x00a1,
+ 0x00ab, 0x00ab,
+ 0x00b7, 0x00b7,
+ 0x00bb, 0x00bb,
+ 0x00bf, 0x00bf,
+ 0x037e, 0x037e,
+ 0x0387, 0x0387,
+ 0x055a, 0x055f,
+ 0x0589, 0x058a,
+ 0x05be, 0x05be,
+ 0x05c0, 0x05c0,
+ 0x05c3, 0x05c3,
+ 0x05c6, 0x05c6,
+ 0x05f3, 0x05f4,
+ 0x0609, 0x060a,
+ 0x060c, 0x060d,
+ 0x061b, 0x061b,
+ 0x061e, 0x061f,
+ 0x066a, 0x066d,
+ 0x06d4, 0x06d4,
+ 0x0700, 0x070d,
+ 0x07f7, 0x07f9,
+ 0x0830, 0x083e,
+ 0x0964, 0x0965,
+ 0x0970, 0x0970,
+ 0x0df4, 0x0df4,
+ 0x0e4f, 0x0e4f,
+ 0x0e5a, 0x0e5b,
+ 0x0f04, 0x0f12,
+ 0x0f3a, 0x0f3d,
+ 0x0f85, 0x0f85,
+ 0x0fd0, 0x0fd4,
+ 0x104a, 0x104f,
+ 0x10fb, 0x10fb,
+ 0x1361, 0x1368,
+ 0x1400, 0x1400,
+ 0x166d, 0x166e,
+ 0x169b, 0x169c,
+ 0x16eb, 0x16ed,
+ 0x1735, 0x1736,
+ 0x17d4, 0x17d6,
+ 0x17d8, 0x17da,
+ 0x1800, 0x180a,
+ 0x1944, 0x1945,
+ 0x19de, 0x19df,
+ 0x1a1e, 0x1a1f,
+ 0x1aa0, 0x1aa6,
+ 0x1aa8, 0x1aad,
+ 0x1b5a, 0x1b60,
+ 0x1c3b, 0x1c3f,
+ 0x1c7e, 0x1c7f,
+ 0x1cd3, 0x1cd3,
+ 0x2010, 0x2027,
+ 0x2030, 0x2043,
+ 0x2045, 0x2051,
+ 0x2053, 0x205e,
+ 0x207d, 0x207e,
+ 0x208d, 0x208e,
+ 0x2329, 0x232a,
+ 0x2768, 0x2775,
+ 0x27c5, 0x27c6,
+ 0x27e6, 0x27ef,
+ 0x2983, 0x2998,
+ 0x29d8, 0x29db,
+ 0x29fc, 0x29fd,
+ 0x2cf9, 0x2cfc,
+ 0x2cfe, 0x2cff,
+ 0x2e00, 0x2e2e,
+ 0x2e30, 0x2e31,
+ 0x3001, 0x3003,
+ 0x3008, 0x3011,
+ 0x3014, 0x301f,
+ 0x3030, 0x3030,
+ 0x303d, 0x303d,
+ 0x30a0, 0x30a0,
+ 0x30fb, 0x30fb,
+ 0xa4fe, 0xa4ff,
+ 0xa60d, 0xa60f,
+ 0xa673, 0xa673,
+ 0xa67e, 0xa67e,
+ 0xa6f2, 0xa6f7,
+ 0xa874, 0xa877,
+ 0xa8ce, 0xa8cf,
+ 0xa8f8, 0xa8fa,
+ 0xa92e, 0xa92f,
+ 0xa95f, 0xa95f,
+ 0xa9c1, 0xa9cd,
+ 0xa9de, 0xa9df,
+ 0xaa5c, 0xaa5f,
+ 0xaade, 0xaadf,
+ 0xabeb, 0xabeb,
+ 0xfd3e, 0xfd3f,
+ 0xfe10, 0xfe19,
+ 0xfe30, 0xfe52,
+ 0xfe54, 0xfe61,
+ 0xfe63, 0xfe63,
+ 0xfe68, 0xfe68,
+ 0xfe6a, 0xfe6b,
+ 0xff01, 0xff03,
+ 0xff05, 0xff0a,
+ 0xff0c, 0xff0f,
+ 0xff1a, 0xff1b,
+ 0xff1f, 0xff20,
+ 0xff3b, 0xff3d,
+ 0xff3f, 0xff3f,
+ 0xff5b, 0xff5b,
+ 0xff5d, 0xff5d,
+ 0xff5f, 0xff65,
+ 0x10100, 0x10101,
+ 0x1039f, 0x1039f,
+ 0x103d0, 0x103d0,
+ 0x10857, 0x10857,
+ 0x1091f, 0x1091f,
+ 0x1093f, 0x1093f,
+ 0x10a50, 0x10a58,
+ 0x10a7f, 0x10a7f,
+ 0x10b39, 0x10b3f,
+ 0x110bb, 0x110bc,
+ 0x110be, 0x110c1,
+ 0x12470, 0x12473,
+}; /* CR_P */
+
+/* 'Pc': General Category */
+static const OnigCodePoint CR_Pc[] = {
+ 6,
+ 0x005f, 0x005f,
+ 0x203f, 0x2040,
+ 0x2054, 0x2054,
+ 0xfe33, 0xfe34,
+ 0xfe4d, 0xfe4f,
+ 0xff3f, 0xff3f,
+}; /* CR_Pc */
+
+/* 'Pd': General Category */
+static const OnigCodePoint CR_Pd[] = {
+ 15,
+ 0x002d, 0x002d,
+ 0x058a, 0x058a,
+ 0x05be, 0x05be,
+ 0x1400, 0x1400,
+ 0x1806, 0x1806,
+ 0x2010, 0x2015,
+ 0x2e17, 0x2e17,
+ 0x2e1a, 0x2e1a,
+ 0x301c, 0x301c,
+ 0x3030, 0x3030,
+ 0x30a0, 0x30a0,
+ 0xfe31, 0xfe32,
+ 0xfe58, 0xfe58,
+ 0xfe63, 0xfe63,
+ 0xff0d, 0xff0d,
+}; /* CR_Pd */
+
+/* 'Pe': General Category */
+static const OnigCodePoint CR_Pe[] = {
+ 70,
+ 0x0029, 0x0029,
+ 0x005d, 0x005d,
+ 0x007d, 0x007d,
+ 0x0f3b, 0x0f3b,
+ 0x0f3d, 0x0f3d,
+ 0x169c, 0x169c,
+ 0x2046, 0x2046,
+ 0x207e, 0x207e,
+ 0x208e, 0x208e,
+ 0x232a, 0x232a,
+ 0x2769, 0x2769,
+ 0x276b, 0x276b,
+ 0x276d, 0x276d,
+ 0x276f, 0x276f,
+ 0x2771, 0x2771,
+ 0x2773, 0x2773,
+ 0x2775, 0x2775,
+ 0x27c6, 0x27c6,
+ 0x27e7, 0x27e7,
+ 0x27e9, 0x27e9,
+ 0x27eb, 0x27eb,
+ 0x27ed, 0x27ed,
+ 0x27ef, 0x27ef,
+ 0x2984, 0x2984,
+ 0x2986, 0x2986,
+ 0x2988, 0x2988,
+ 0x298a, 0x298a,
+ 0x298c, 0x298c,
+ 0x298e, 0x298e,
+ 0x2990, 0x2990,
+ 0x2992, 0x2992,
+ 0x2994, 0x2994,
+ 0x2996, 0x2996,
+ 0x2998, 0x2998,
+ 0x29d9, 0x29d9,
+ 0x29db, 0x29db,
+ 0x29fd, 0x29fd,
+ 0x2e23, 0x2e23,
+ 0x2e25, 0x2e25,
+ 0x2e27, 0x2e27,
+ 0x2e29, 0x2e29,
+ 0x3009, 0x3009,
+ 0x300b, 0x300b,
+ 0x300d, 0x300d,
+ 0x300f, 0x300f,
+ 0x3011, 0x3011,
+ 0x3015, 0x3015,
+ 0x3017, 0x3017,
+ 0x3019, 0x3019,
+ 0x301b, 0x301b,
+ 0x301e, 0x301f,
+ 0xfd3f, 0xfd3f,
+ 0xfe18, 0xfe18,
+ 0xfe36, 0xfe36,
+ 0xfe38, 0xfe38,
+ 0xfe3a, 0xfe3a,
+ 0xfe3c, 0xfe3c,
+ 0xfe3e, 0xfe3e,
+ 0xfe40, 0xfe40,
+ 0xfe42, 0xfe42,
+ 0xfe44, 0xfe44,
+ 0xfe48, 0xfe48,
+ 0xfe5a, 0xfe5a,
+ 0xfe5c, 0xfe5c,
+ 0xfe5e, 0xfe5e,
+ 0xff09, 0xff09,
+ 0xff3d, 0xff3d,
+ 0xff5d, 0xff5d,
+ 0xff60, 0xff60,
+ 0xff63, 0xff63,
+}; /* CR_Pe */
+
+/* 'Pf': General Category */
+static const OnigCodePoint CR_Pf[] = {
+ 10,
+ 0x00bb, 0x00bb,
+ 0x2019, 0x2019,
+ 0x201d, 0x201d,
+ 0x203a, 0x203a,
+ 0x2e03, 0x2e03,
+ 0x2e05, 0x2e05,
+ 0x2e0a, 0x2e0a,
+ 0x2e0d, 0x2e0d,
+ 0x2e1d, 0x2e1d,
+ 0x2e21, 0x2e21,
+}; /* CR_Pf */
+
+/* 'Pi': General Category */
+static const OnigCodePoint CR_Pi[] = {
+ 11,
+ 0x00ab, 0x00ab,
+ 0x2018, 0x2018,
+ 0x201b, 0x201c,
+ 0x201f, 0x201f,
+ 0x2039, 0x2039,
+ 0x2e02, 0x2e02,
+ 0x2e04, 0x2e04,
+ 0x2e09, 0x2e09,
+ 0x2e0c, 0x2e0c,
+ 0x2e1c, 0x2e1c,
+ 0x2e20, 0x2e20,
+}; /* CR_Pi */
+
+/* 'Po': General Category */
+static const OnigCodePoint CR_Po[] = {
+ 124,
+ 0x0021, 0x0023,
+ 0x0025, 0x0027,
+ 0x002a, 0x002a,
+ 0x002c, 0x002c,
+ 0x002e, 0x002f,
+ 0x003a, 0x003b,
+ 0x003f, 0x0040,
+ 0x005c, 0x005c,
+ 0x00a1, 0x00a1,
+ 0x00b7, 0x00b7,
+ 0x00bf, 0x00bf,
+ 0x037e, 0x037e,
+ 0x0387, 0x0387,
+ 0x055a, 0x055f,
+ 0x0589, 0x0589,
+ 0x05c0, 0x05c0,
+ 0x05c3, 0x05c3,
+ 0x05c6, 0x05c6,
+ 0x05f3, 0x05f4,
+ 0x0609, 0x060a,
+ 0x060c, 0x060d,
+ 0x061b, 0x061b,
+ 0x061e, 0x061f,
+ 0x066a, 0x066d,
+ 0x06d4, 0x06d4,
+ 0x0700, 0x070d,
+ 0x07f7, 0x07f9,
+ 0x0830, 0x083e,
+ 0x0964, 0x0965,
+ 0x0970, 0x0970,
+ 0x0df4, 0x0df4,
+ 0x0e4f, 0x0e4f,
+ 0x0e5a, 0x0e5b,
+ 0x0f04, 0x0f12,
+ 0x0f85, 0x0f85,
+ 0x0fd0, 0x0fd4,
+ 0x104a, 0x104f,
+ 0x10fb, 0x10fb,
+ 0x1361, 0x1368,
+ 0x166d, 0x166e,
+ 0x16eb, 0x16ed,
+ 0x1735, 0x1736,
+ 0x17d4, 0x17d6,
+ 0x17d8, 0x17da,
+ 0x1800, 0x1805,
+ 0x1807, 0x180a,
+ 0x1944, 0x1945,
+ 0x19de, 0x19df,
+ 0x1a1e, 0x1a1f,
+ 0x1aa0, 0x1aa6,
+ 0x1aa8, 0x1aad,
+ 0x1b5a, 0x1b60,
+ 0x1c3b, 0x1c3f,
+ 0x1c7e, 0x1c7f,
+ 0x1cd3, 0x1cd3,
+ 0x2016, 0x2017,
+ 0x2020, 0x2027,
+ 0x2030, 0x2038,
+ 0x203b, 0x203e,
+ 0x2041, 0x2043,
+ 0x2047, 0x2051,
+ 0x2053, 0x2053,
+ 0x2055, 0x205e,
+ 0x2cf9, 0x2cfc,
+ 0x2cfe, 0x2cff,
+ 0x2e00, 0x2e01,
+ 0x2e06, 0x2e08,
+ 0x2e0b, 0x2e0b,
+ 0x2e0e, 0x2e16,
+ 0x2e18, 0x2e19,
+ 0x2e1b, 0x2e1b,
+ 0x2e1e, 0x2e1f,
+ 0x2e2a, 0x2e2e,
+ 0x2e30, 0x2e31,
+ 0x3001, 0x3003,
+ 0x303d, 0x303d,
+ 0x30fb, 0x30fb,
+ 0xa4fe, 0xa4ff,
+ 0xa60d, 0xa60f,
+ 0xa673, 0xa673,
+ 0xa67e, 0xa67e,
+ 0xa6f2, 0xa6f7,
+ 0xa874, 0xa877,
+ 0xa8ce, 0xa8cf,
+ 0xa8f8, 0xa8fa,
+ 0xa92e, 0xa92f,
+ 0xa95f, 0xa95f,
+ 0xa9c1, 0xa9cd,
+ 0xa9de, 0xa9df,
+ 0xaa5c, 0xaa5f,
+ 0xaade, 0xaadf,
+ 0xabeb, 0xabeb,
+ 0xfe10, 0xfe16,
+ 0xfe19, 0xfe19,
+ 0xfe30, 0xfe30,
+ 0xfe45, 0xfe46,
+ 0xfe49, 0xfe4c,
+ 0xfe50, 0xfe52,
+ 0xfe54, 0xfe57,
+ 0xfe5f, 0xfe61,
+ 0xfe68, 0xfe68,
+ 0xfe6a, 0xfe6b,
+ 0xff01, 0xff03,
+ 0xff05, 0xff07,
+ 0xff0a, 0xff0a,
+ 0xff0c, 0xff0c,
+ 0xff0e, 0xff0f,
+ 0xff1a, 0xff1b,
+ 0xff1f, 0xff20,
+ 0xff3c, 0xff3c,
+ 0xff61, 0xff61,
+ 0xff64, 0xff65,
+ 0x10100, 0x10101,
+ 0x1039f, 0x1039f,
+ 0x103d0, 0x103d0,
+ 0x10857, 0x10857,
+ 0x1091f, 0x1091f,
+ 0x1093f, 0x1093f,
+ 0x10a50, 0x10a58,
+ 0x10a7f, 0x10a7f,
+ 0x10b39, 0x10b3f,
+ 0x110bb, 0x110bc,
+ 0x110be, 0x110c1,
+ 0x12470, 0x12473,
+}; /* CR_Po */
+
+/* 'Ps': General Category */
+static const OnigCodePoint CR_Ps[] = {
+ 72,
+ 0x0028, 0x0028,
+ 0x005b, 0x005b,
+ 0x007b, 0x007b,
+ 0x0f3a, 0x0f3a,
+ 0x0f3c, 0x0f3c,
+ 0x169b, 0x169b,
+ 0x201a, 0x201a,
+ 0x201e, 0x201e,
+ 0x2045, 0x2045,
+ 0x207d, 0x207d,
+ 0x208d, 0x208d,
+ 0x2329, 0x2329,
+ 0x2768, 0x2768,
+ 0x276a, 0x276a,
+ 0x276c, 0x276c,
+ 0x276e, 0x276e,
+ 0x2770, 0x2770,
+ 0x2772, 0x2772,
+ 0x2774, 0x2774,
+ 0x27c5, 0x27c5,
+ 0x27e6, 0x27e6,
+ 0x27e8, 0x27e8,
+ 0x27ea, 0x27ea,
+ 0x27ec, 0x27ec,
+ 0x27ee, 0x27ee,
+ 0x2983, 0x2983,
+ 0x2985, 0x2985,
+ 0x2987, 0x2987,
+ 0x2989, 0x2989,
+ 0x298b, 0x298b,
+ 0x298d, 0x298d,
+ 0x298f, 0x298f,
+ 0x2991, 0x2991,
+ 0x2993, 0x2993,
+ 0x2995, 0x2995,
+ 0x2997, 0x2997,
+ 0x29d8, 0x29d8,
+ 0x29da, 0x29da,
+ 0x29fc, 0x29fc,
+ 0x2e22, 0x2e22,
+ 0x2e24, 0x2e24,
+ 0x2e26, 0x2e26,
+ 0x2e28, 0x2e28,
+ 0x3008, 0x3008,
+ 0x300a, 0x300a,
+ 0x300c, 0x300c,
+ 0x300e, 0x300e,
+ 0x3010, 0x3010,
+ 0x3014, 0x3014,
+ 0x3016, 0x3016,
+ 0x3018, 0x3018,
+ 0x301a, 0x301a,
+ 0x301d, 0x301d,
+ 0xfd3e, 0xfd3e,
+ 0xfe17, 0xfe17,
+ 0xfe35, 0xfe35,
+ 0xfe37, 0xfe37,
+ 0xfe39, 0xfe39,
+ 0xfe3b, 0xfe3b,
+ 0xfe3d, 0xfe3d,
+ 0xfe3f, 0xfe3f,
+ 0xfe41, 0xfe41,
+ 0xfe43, 0xfe43,
+ 0xfe47, 0xfe47,
+ 0xfe59, 0xfe59,
+ 0xfe5b, 0xfe5b,
+ 0xfe5d, 0xfe5d,
+ 0xff08, 0xff08,
+ 0xff3b, 0xff3b,
+ 0xff5b, 0xff5b,
+ 0xff5f, 0xff5f,
+ 0xff62, 0xff62,
+}; /* CR_Ps */
+
+/* 'S': Major Category */
+static const OnigCodePoint CR_S[] = {
+ 198,
+ 0x0024, 0x0024,
+ 0x002b, 0x002b,
+ 0x003c, 0x003e,
+ 0x005e, 0x005e,
+ 0x0060, 0x0060,
+ 0x007c, 0x007c,
+ 0x007e, 0x007e,
+ 0x00a2, 0x00a9,
+ 0x00ac, 0x00ac,
+ 0x00ae, 0x00b1,
+ 0x00b4, 0x00b4,
+ 0x00b6, 0x00b6,
+ 0x00b8, 0x00b8,
+ 0x00d7, 0x00d7,
+ 0x00f7, 0x00f7,
+ 0x02c2, 0x02c5,
+ 0x02d2, 0x02df,
+ 0x02e5, 0x02eb,
+ 0x02ed, 0x02ed,
+ 0x02ef, 0x02ff,
+ 0x0375, 0x0375,
+ 0x0384, 0x0385,
+ 0x03f6, 0x03f6,
+ 0x0482, 0x0482,
+ 0x0606, 0x0608,
+ 0x060b, 0x060b,
+ 0x060e, 0x060f,
+ 0x06e9, 0x06e9,
+ 0x06fd, 0x06fe,
+ 0x07f6, 0x07f6,
+ 0x09f2, 0x09f3,
+ 0x09fa, 0x09fb,
+ 0x0af1, 0x0af1,
+ 0x0b70, 0x0b70,
+ 0x0bf3, 0x0bfa,
+ 0x0c7f, 0x0c7f,
+ 0x0cf1, 0x0cf2,
+ 0x0d79, 0x0d79,
+ 0x0e3f, 0x0e3f,
+ 0x0f01, 0x0f03,
+ 0x0f13, 0x0f17,
+ 0x0f1a, 0x0f1f,
+ 0x0f34, 0x0f34,
+ 0x0f36, 0x0f36,
+ 0x0f38, 0x0f38,
+ 0x0fbe, 0x0fc5,
+ 0x0fc7, 0x0fcc,
+ 0x0fce, 0x0fcf,
+ 0x0fd5, 0x0fd8,
+ 0x109e, 0x109f,
+ 0x1360, 0x1360,
+ 0x1390, 0x1399,
+ 0x17db, 0x17db,
+ 0x1940, 0x1940,
+ 0x19e0, 0x19ff,
+ 0x1b61, 0x1b6a,
+ 0x1b74, 0x1b7c,
+ 0x1fbd, 0x1fbd,
+ 0x1fbf, 0x1fc1,
+ 0x1fcd, 0x1fcf,
+ 0x1fdd, 0x1fdf,
+ 0x1fed, 0x1fef,
+ 0x1ffd, 0x1ffe,
+ 0x2044, 0x2044,
+ 0x2052, 0x2052,
+ 0x207a, 0x207c,
+ 0x208a, 0x208c,
+ 0x20a0, 0x20b8,
+ 0x2100, 0x2101,
+ 0x2103, 0x2106,
+ 0x2108, 0x2109,
+ 0x2114, 0x2114,
+ 0x2116, 0x2118,
+ 0x211e, 0x2123,
+ 0x2125, 0x2125,
+ 0x2127, 0x2127,
+ 0x2129, 0x2129,
+ 0x212e, 0x212e,
+ 0x213a, 0x213b,
+ 0x2140, 0x2144,
+ 0x214a, 0x214d,
+ 0x214f, 0x214f,
+ 0x2190, 0x2328,
+ 0x232b, 0x23e8,
+ 0x2400, 0x2426,
+ 0x2440, 0x244a,
+ 0x249c, 0x24e9,
+ 0x2500, 0x26cd,
+ 0x26cf, 0x26e1,
+ 0x26e3, 0x26e3,
+ 0x26e8, 0x26ff,
+ 0x2701, 0x2704,
+ 0x2706, 0x2709,
+ 0x270c, 0x2727,
+ 0x2729, 0x274b,
+ 0x274d, 0x274d,
+ 0x274f, 0x2752,
+ 0x2756, 0x275e,
+ 0x2761, 0x2767,
+ 0x2794, 0x2794,
+ 0x2798, 0x27af,
+ 0x27b1, 0x27be,
+ 0x27c0, 0x27c4,
+ 0x27c7, 0x27ca,
+ 0x27cc, 0x27cc,
+ 0x27d0, 0x27e5,
+ 0x27f0, 0x2982,
+ 0x2999, 0x29d7,
+ 0x29dc, 0x29fb,
+ 0x29fe, 0x2b4c,
+ 0x2b50, 0x2b59,
+ 0x2ce5, 0x2cea,
+ 0x2e80, 0x2e99,
+ 0x2e9b, 0x2ef3,
+ 0x2f00, 0x2fd5,
+ 0x2ff0, 0x2ffb,
+ 0x3004, 0x3004,
+ 0x3012, 0x3013,
+ 0x3020, 0x3020,
+ 0x3036, 0x3037,
+ 0x303e, 0x303f,
+ 0x309b, 0x309c,
+ 0x3190, 0x3191,
+ 0x3196, 0x319f,
+ 0x31c0, 0x31e3,
+ 0x3200, 0x321e,
+ 0x322a, 0x3250,
+ 0x3260, 0x327f,
+ 0x328a, 0x32b0,
+ 0x32c0, 0x32fe,
+ 0x3300, 0x33ff,
+ 0x4dc0, 0x4dff,
+ 0xa490, 0xa4c6,
+ 0xa700, 0xa716,
+ 0xa720, 0xa721,
+ 0xa789, 0xa78a,
+ 0xa828, 0xa82b,
+ 0xa836, 0xa839,
+ 0xaa77, 0xaa79,
+ 0xfb29, 0xfb29,
+ 0xfdfc, 0xfdfd,
+ 0xfe62, 0xfe62,
+ 0xfe64, 0xfe66,
+ 0xfe69, 0xfe69,
+ 0xff04, 0xff04,
+ 0xff0b, 0xff0b,
+ 0xff1c, 0xff1e,
+ 0xff3e, 0xff3e,
+ 0xff40, 0xff40,
+ 0xff5c, 0xff5c,
+ 0xff5e, 0xff5e,
+ 0xffe0, 0xffe6,
+ 0xffe8, 0xffee,
+ 0xfffc, 0xfffd,
+ 0x10102, 0x10102,
+ 0x10137, 0x1013f,
+ 0x10179, 0x10189,
+ 0x10190, 0x1019b,
+ 0x101d0, 0x101fc,
+ 0x1d000, 0x1d0f5,
+ 0x1d100, 0x1d126,
+ 0x1d129, 0x1d164,
+ 0x1d16a, 0x1d16c,
+ 0x1d183, 0x1d184,
+ 0x1d18c, 0x1d1a9,
+ 0x1d1ae, 0x1d1dd,
+ 0x1d200, 0x1d241,
+ 0x1d245, 0x1d245,
+ 0x1d300, 0x1d356,
+ 0x1d6c1, 0x1d6c1,
+ 0x1d6db, 0x1d6db,
+ 0x1d6fb, 0x1d6fb,
+ 0x1d715, 0x1d715,
+ 0x1d735, 0x1d735,
+ 0x1d74f, 0x1d74f,
+ 0x1d76f, 0x1d76f,
+ 0x1d789, 0x1d789,
+ 0x1d7a9, 0x1d7a9,
+ 0x1d7c3, 0x1d7c3,
+ 0x1f000, 0x1f02b,
+ 0x1f030, 0x1f093,
+ 0x1f110, 0x1f12e,
+ 0x1f131, 0x1f131,
+ 0x1f13d, 0x1f13d,
+ 0x1f13f, 0x1f13f,
+ 0x1f142, 0x1f142,
+ 0x1f146, 0x1f146,
+ 0x1f14a, 0x1f14e,
+ 0x1f157, 0x1f157,
+ 0x1f15f, 0x1f15f,
+ 0x1f179, 0x1f179,
+ 0x1f17b, 0x1f17c,
+ 0x1f17f, 0x1f17f,
+ 0x1f18a, 0x1f18d,
+ 0x1f190, 0x1f190,
+ 0x1f200, 0x1f200,
+ 0x1f210, 0x1f231,
+ 0x1f240, 0x1f248,
+}; /* CR_S */
+
+/* 'Sc': General Category */
+static const OnigCodePoint CR_Sc[] = {
+ 16,
+ 0x0024, 0x0024,
+ 0x00a2, 0x00a5,
+ 0x060b, 0x060b,
+ 0x09f2, 0x09f3,
+ 0x09fb, 0x09fb,
+ 0x0af1, 0x0af1,
+ 0x0bf9, 0x0bf9,
+ 0x0e3f, 0x0e3f,
+ 0x17db, 0x17db,
+ 0x20a0, 0x20b8,
+ 0xa838, 0xa838,
+ 0xfdfc, 0xfdfc,
+ 0xfe69, 0xfe69,
+ 0xff04, 0xff04,
+ 0xffe0, 0xffe1,
+ 0xffe5, 0xffe6,
+}; /* CR_Sc */
+
+/* 'Sk': General Category */
+static const OnigCodePoint CR_Sk[] = {
+ 26,
+ 0x005e, 0x005e,
+ 0x0060, 0x0060,
+ 0x00a8, 0x00a8,
+ 0x00af, 0x00af,
+ 0x00b4, 0x00b4,
+ 0x00b8, 0x00b8,
+ 0x02c2, 0x02c5,
+ 0x02d2, 0x02df,
+ 0x02e5, 0x02eb,
+ 0x02ed, 0x02ed,
+ 0x02ef, 0x02ff,
+ 0x0375, 0x0375,
+ 0x0384, 0x0385,
+ 0x1fbd, 0x1fbd,
+ 0x1fbf, 0x1fc1,
+ 0x1fcd, 0x1fcf,
+ 0x1fdd, 0x1fdf,
+ 0x1fed, 0x1fef,
+ 0x1ffd, 0x1ffe,
+ 0x309b, 0x309c,
+ 0xa700, 0xa716,
+ 0xa720, 0xa721,
+ 0xa789, 0xa78a,
+ 0xff3e, 0xff3e,
+ 0xff40, 0xff40,
+ 0xffe3, 0xffe3,
+}; /* CR_Sk */
+
+/* 'Sm': General Category */
+static const OnigCodePoint CR_Sm[] = {
+ 65,
+ 0x002b, 0x002b,
+ 0x003c, 0x003e,
+ 0x007c, 0x007c,
+ 0x007e, 0x007e,
+ 0x00ac, 0x00ac,
+ 0x00b1, 0x00b1,
+ 0x00d7, 0x00d7,
+ 0x00f7, 0x00f7,
+ 0x03f6, 0x03f6,
+ 0x0606, 0x0608,
+ 0x2044, 0x2044,
+ 0x2052, 0x2052,
+ 0x207a, 0x207c,
+ 0x208a, 0x208c,
+ 0x2140, 0x2144,
+ 0x214b, 0x214b,
+ 0x2190, 0x2194,
+ 0x219a, 0x219b,
+ 0x21a0, 0x21a0,
+ 0x21a3, 0x21a3,
+ 0x21a6, 0x21a6,
+ 0x21ae, 0x21ae,
+ 0x21ce, 0x21cf,
+ 0x21d2, 0x21d2,
+ 0x21d4, 0x21d4,
+ 0x21f4, 0x22ff,
+ 0x2308, 0x230b,
+ 0x2320, 0x2321,
+ 0x237c, 0x237c,
+ 0x239b, 0x23b3,
+ 0x23dc, 0x23e1,
+ 0x25b7, 0x25b7,
+ 0x25c1, 0x25c1,
+ 0x25f8, 0x25ff,
+ 0x266f, 0x266f,
+ 0x27c0, 0x27c4,
+ 0x27c7, 0x27ca,
+ 0x27cc, 0x27cc,
+ 0x27d0, 0x27e5,
+ 0x27f0, 0x27ff,
+ 0x2900, 0x2982,
+ 0x2999, 0x29d7,
+ 0x29dc, 0x29fb,
+ 0x29fe, 0x2aff,
+ 0x2b30, 0x2b44,
+ 0x2b47, 0x2b4c,
+ 0xfb29, 0xfb29,
+ 0xfe62, 0xfe62,
+ 0xfe64, 0xfe66,
+ 0xff0b, 0xff0b,
+ 0xff1c, 0xff1e,
+ 0xff5c, 0xff5c,
+ 0xff5e, 0xff5e,
+ 0xffe2, 0xffe2,
+ 0xffe9, 0xffec,
+ 0x1d6c1, 0x1d6c1,
+ 0x1d6db, 0x1d6db,
+ 0x1d6fb, 0x1d6fb,
+ 0x1d715, 0x1d715,
+ 0x1d735, 0x1d735,
+ 0x1d74f, 0x1d74f,
+ 0x1d76f, 0x1d76f,
+ 0x1d789, 0x1d789,
+ 0x1d7a9, 0x1d7a9,
+ 0x1d7c3, 0x1d7c3,
+}; /* CR_Sm */
+
+/* 'So': General Category */
+static const OnigCodePoint CR_So[] = {
+ 154,
+ 0x00a6, 0x00a7,
+ 0x00a9, 0x00a9,
+ 0x00ae, 0x00ae,
+ 0x00b0, 0x00b0,
+ 0x00b6, 0x00b6,
+ 0x0482, 0x0482,
+ 0x060e, 0x060f,
+ 0x06e9, 0x06e9,
+ 0x06fd, 0x06fe,
+ 0x07f6, 0x07f6,
+ 0x09fa, 0x09fa,
+ 0x0b70, 0x0b70,
+ 0x0bf3, 0x0bf8,
+ 0x0bfa, 0x0bfa,
+ 0x0c7f, 0x0c7f,
+ 0x0cf1, 0x0cf2,
+ 0x0d79, 0x0d79,
+ 0x0f01, 0x0f03,
+ 0x0f13, 0x0f17,
+ 0x0f1a, 0x0f1f,
+ 0x0f34, 0x0f34,
+ 0x0f36, 0x0f36,
+ 0x0f38, 0x0f38,
+ 0x0fbe, 0x0fc5,
+ 0x0fc7, 0x0fcc,
+ 0x0fce, 0x0fcf,
+ 0x0fd5, 0x0fd8,
+ 0x109e, 0x109f,
+ 0x1360, 0x1360,
+ 0x1390, 0x1399,
+ 0x1940, 0x1940,
+ 0x19e0, 0x19ff,
+ 0x1b61, 0x1b6a,
+ 0x1b74, 0x1b7c,
+ 0x2100, 0x2101,
+ 0x2103, 0x2106,
+ 0x2108, 0x2109,
+ 0x2114, 0x2114,
+ 0x2116, 0x2118,
+ 0x211e, 0x2123,
+ 0x2125, 0x2125,
+ 0x2127, 0x2127,
+ 0x2129, 0x2129,
+ 0x212e, 0x212e,
+ 0x213a, 0x213b,
+ 0x214a, 0x214a,
+ 0x214c, 0x214d,
+ 0x214f, 0x214f,
+ 0x2195, 0x2199,
+ 0x219c, 0x219f,
+ 0x21a1, 0x21a2,
+ 0x21a4, 0x21a5,
+ 0x21a7, 0x21ad,
+ 0x21af, 0x21cd,
+ 0x21d0, 0x21d1,
+ 0x21d3, 0x21d3,
+ 0x21d5, 0x21f3,
+ 0x2300, 0x2307,
+ 0x230c, 0x231f,
+ 0x2322, 0x2328,
+ 0x232b, 0x237b,
+ 0x237d, 0x239a,
+ 0x23b4, 0x23db,
+ 0x23e2, 0x23e8,
+ 0x2400, 0x2426,
+ 0x2440, 0x244a,
+ 0x249c, 0x24e9,
+ 0x2500, 0x25b6,
+ 0x25b8, 0x25c0,
+ 0x25c2, 0x25f7,
+ 0x2600, 0x266e,
+ 0x2670, 0x26cd,
+ 0x26cf, 0x26e1,
+ 0x26e3, 0x26e3,
+ 0x26e8, 0x26ff,
+ 0x2701, 0x2704,
+ 0x2706, 0x2709,
+ 0x270c, 0x2727,
+ 0x2729, 0x274b,
+ 0x274d, 0x274d,
+ 0x274f, 0x2752,
+ 0x2756, 0x275e,
+ 0x2761, 0x2767,
+ 0x2794, 0x2794,
+ 0x2798, 0x27af,
+ 0x27b1, 0x27be,
+ 0x2800, 0x28ff,
+ 0x2b00, 0x2b2f,
+ 0x2b45, 0x2b46,
+ 0x2b50, 0x2b59,
+ 0x2ce5, 0x2cea,
+ 0x2e80, 0x2e99,
+ 0x2e9b, 0x2ef3,
+ 0x2f00, 0x2fd5,
+ 0x2ff0, 0x2ffb,
+ 0x3004, 0x3004,
+ 0x3012, 0x3013,
+ 0x3020, 0x3020,
+ 0x3036, 0x3037,
+ 0x303e, 0x303f,
+ 0x3190, 0x3191,
+ 0x3196, 0x319f,
+ 0x31c0, 0x31e3,
+ 0x3200, 0x321e,
+ 0x322a, 0x3250,
+ 0x3260, 0x327f,
+ 0x328a, 0x32b0,
+ 0x32c0, 0x32fe,
+ 0x3300, 0x33ff,
+ 0x4dc0, 0x4dff,
+ 0xa490, 0xa4c6,
+ 0xa828, 0xa82b,
+ 0xa836, 0xa837,
+ 0xa839, 0xa839,
+ 0xaa77, 0xaa79,
+ 0xfdfd, 0xfdfd,
+ 0xffe4, 0xffe4,
+ 0xffe8, 0xffe8,
+ 0xffed, 0xffee,
+ 0xfffc, 0xfffd,
+ 0x10102, 0x10102,
+ 0x10137, 0x1013f,
+ 0x10179, 0x10189,
+ 0x10190, 0x1019b,
+ 0x101d0, 0x101fc,
+ 0x1d000, 0x1d0f5,
+ 0x1d100, 0x1d126,
+ 0x1d129, 0x1d164,
+ 0x1d16a, 0x1d16c,
+ 0x1d183, 0x1d184,
+ 0x1d18c, 0x1d1a9,
+ 0x1d1ae, 0x1d1dd,
+ 0x1d200, 0x1d241,
+ 0x1d245, 0x1d245,
+ 0x1d300, 0x1d356,
+ 0x1f000, 0x1f02b,
+ 0x1f030, 0x1f093,
+ 0x1f110, 0x1f12e,
+ 0x1f131, 0x1f131,
+ 0x1f13d, 0x1f13d,
+ 0x1f13f, 0x1f13f,
+ 0x1f142, 0x1f142,
+ 0x1f146, 0x1f146,
+ 0x1f14a, 0x1f14e,
+ 0x1f157, 0x1f157,
+ 0x1f15f, 0x1f15f,
+ 0x1f179, 0x1f179,
+ 0x1f17b, 0x1f17c,
+ 0x1f17f, 0x1f17f,
+ 0x1f18a, 0x1f18d,
+ 0x1f190, 0x1f190,
+ 0x1f200, 0x1f200,
+ 0x1f210, 0x1f231,
+ 0x1f240, 0x1f248,
+}; /* CR_So */
+
+/* 'Z': Major Category */
+static const OnigCodePoint CR_Z[] = {
+ 9,
+ 0x0020, 0x0020,
+ 0x00a0, 0x00a0,
+ 0x1680, 0x1680,
+ 0x180e, 0x180e,
+ 0x2000, 0x200a,
+ 0x2028, 0x2029,
+ 0x202f, 0x202f,
+ 0x205f, 0x205f,
+ 0x3000, 0x3000,
+}; /* CR_Z */
+
+/* 'Zl': General Category */
+static const OnigCodePoint CR_Zl[] = {
+ 1,
+ 0x2028, 0x2028,
+}; /* CR_Zl */
+
+/* 'Zp': General Category */
+static const OnigCodePoint CR_Zp[] = {
+ 1,
+ 0x2029, 0x2029,
+}; /* CR_Zp */
+
+/* 'Zs': General Category */
+static const OnigCodePoint CR_Zs[] = {
+ 8,
+ 0x0020, 0x0020,
+ 0x00a0, 0x00a0,
+ 0x1680, 0x1680,
+ 0x180e, 0x180e,
+ 0x2000, 0x200a,
+ 0x202f, 0x202f,
+ 0x205f, 0x205f,
+ 0x3000, 0x3000,
+}; /* CR_Zs */
+
+/* 'Math': Derived Property */
+static const OnigCodePoint CR_Math[] = {
+ 105,
+ 0x002b, 0x002b,
+ 0x003c, 0x003e,
+ 0x005e, 0x005e,
+ 0x007c, 0x007c,
+ 0x007e, 0x007e,
+ 0x00ac, 0x00ac,
+ 0x00b1, 0x00b1,
+ 0x00d7, 0x00d7,
+ 0x00f7, 0x00f7,
+ 0x03d0, 0x03d2,
+ 0x03d5, 0x03d5,
+ 0x03f0, 0x03f1,
+ 0x03f4, 0x03f6,
+ 0x0606, 0x0608,
+ 0x2016, 0x2016,
+ 0x2032, 0x2034,
+ 0x2040, 0x2040,
+ 0x2044, 0x2044,
+ 0x2052, 0x2052,
+ 0x2061, 0x2064,
+ 0x207a, 0x207e,
+ 0x208a, 0x208e,
+ 0x20d0, 0x20dc,
+ 0x20e1, 0x20e1,
+ 0x20e5, 0x20e6,
+ 0x20eb, 0x20ef,
+ 0x2102, 0x2102,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2128, 0x2129,
+ 0x212c, 0x212d,
+ 0x212f, 0x2131,
+ 0x2133, 0x2138,
+ 0x213c, 0x2149,
+ 0x214b, 0x214b,
+ 0x2190, 0x21a7,
+ 0x21a9, 0x21ae,
+ 0x21b0, 0x21b1,
+ 0x21b6, 0x21b7,
+ 0x21bc, 0x21db,
+ 0x21dd, 0x21dd,
+ 0x21e4, 0x21e5,
+ 0x21f4, 0x22ff,
+ 0x2308, 0x230b,
+ 0x2320, 0x2321,
+ 0x237c, 0x237c,
+ 0x239b, 0x23b5,
+ 0x23b7, 0x23b7,
+ 0x23d0, 0x23d0,
+ 0x23dc, 0x23e2,
+ 0x25a0, 0x25a1,
+ 0x25ae, 0x25b7,
+ 0x25bc, 0x25c1,
+ 0x25c6, 0x25c7,
+ 0x25ca, 0x25cb,
+ 0x25cf, 0x25d3,
+ 0x25e2, 0x25e2,
+ 0x25e4, 0x25e4,
+ 0x25e7, 0x25ec,
+ 0x25f8, 0x25ff,
+ 0x2605, 0x2606,
+ 0x2640, 0x2640,
+ 0x2642, 0x2642,
+ 0x2660, 0x2663,
+ 0x266d, 0x266f,
+ 0x27c0, 0x27ca,
+ 0x27cc, 0x27cc,
+ 0x27d0, 0x27ff,
+ 0x2900, 0x2aff,
+ 0x2b30, 0x2b44,
+ 0x2b47, 0x2b4c,
+ 0xfb29, 0xfb29,
+ 0xfe61, 0xfe66,
+ 0xfe68, 0xfe68,
+ 0xff0b, 0xff0b,
+ 0xff1c, 0xff1e,
+ 0xff3c, 0xff3c,
+ 0xff3e, 0xff3e,
+ 0xff5c, 0xff5c,
+ 0xff5e, 0xff5e,
+ 0xffe2, 0xffe2,
+ 0xffe9, 0xffec,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a5,
+ 0x1d6a8, 0x1d7cb,
+ 0x1d7ce, 0x1d7ff,
+}; /* CR_Math */
+
+/* 'Alphabetic': Derived Property */
+static const OnigCodePoint CR_Alphabetic[] = {
+ 474,
+ 0x0041, 0x005a,
+ 0x0061, 0x007a,
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00ba, 0x00ba,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+ 0x00f8, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x02ec, 0x02ec,
+ 0x02ee, 0x02ee,
+ 0x0345, 0x0345,
+ 0x0370, 0x0374,
+ 0x0376, 0x0377,
+ 0x037a, 0x037d,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03f5,
+ 0x03f7, 0x0481,
+ 0x048a, 0x0525,
+ 0x0531, 0x0556,
+ 0x0559, 0x0559,
+ 0x0561, 0x0587,
+ 0x05b0, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c5,
+ 0x05c7, 0x05c7,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0610, 0x061a,
+ 0x0621, 0x0657,
+ 0x0659, 0x065e,
+ 0x066e, 0x06d3,
+ 0x06d5, 0x06dc,
+ 0x06e1, 0x06e8,
+ 0x06ed, 0x06ef,
+ 0x06fa, 0x06fc,
+ 0x06ff, 0x06ff,
+ 0x0710, 0x073f,
+ 0x074d, 0x07b1,
+ 0x07ca, 0x07ea,
+ 0x07f4, 0x07f5,
+ 0x07fa, 0x07fa,
+ 0x0800, 0x0817,
+ 0x081a, 0x082c,
+ 0x0900, 0x0939,
+ 0x093d, 0x094c,
+ 0x094e, 0x094e,
+ 0x0950, 0x0950,
+ 0x0955, 0x0955,
+ 0x0958, 0x0963,
+ 0x0971, 0x0972,
+ 0x0979, 0x097f,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bd, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cc,
+ 0x09ce, 0x09ce,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09f0, 0x09f1,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4c,
+ 0x0a51, 0x0a51,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a70, 0x0a75,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abd, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acc,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3d, 0x0b44,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4c,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b63,
+ 0x0b71, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcc,
+ 0x0bd0, 0x0bd0,
+ 0x0bd7, 0x0bd7,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3d, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4c,
+ 0x0c55, 0x0c56,
+ 0x0c58, 0x0c59,
+ 0x0c60, 0x0c63,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbd, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccc,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce3,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3d, 0x0d44,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4c,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d63,
+ 0x0d7a, 0x0d7f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df3,
+ 0x0e01, 0x0e3a,
+ 0x0e40, 0x0e46,
+ 0x0e4d, 0x0e4d,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ecd, 0x0ecd,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f00,
+ 0x0f40, 0x0f47,
+ 0x0f49, 0x0f6c,
+ 0x0f71, 0x0f81,
+ 0x0f88, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x1000, 0x1036,
+ 0x1038, 0x1038,
+ 0x103b, 0x103f,
+ 0x1050, 0x1062,
+ 0x1065, 0x1068,
+ 0x106e, 0x1086,
+ 0x108e, 0x108e,
+ 0x109c, 0x109d,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10fa,
+ 0x10fc, 0x10fc,
+ 0x1100, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12d6,
+ 0x12d8, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x135a,
+ 0x135f, 0x135f,
+ 0x1380, 0x138f,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x167f,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x16ee, 0x16f0,
+ 0x1700, 0x170c,
+ 0x170e, 0x1713,
+ 0x1720, 0x1733,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17b3,
+ 0x17b6, 0x17c8,
+ 0x17d7, 0x17d7,
+ 0x17dc, 0x17dc,
+ 0x1820, 0x1877,
+ 0x1880, 0x18aa,
+ 0x18b0, 0x18f5,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x1938,
+ 0x1950, 0x196d,
+ 0x1970, 0x1974,
+ 0x1980, 0x19ab,
+ 0x19b0, 0x19c9,
+ 0x1a00, 0x1a1b,
+ 0x1a20, 0x1a5e,
+ 0x1a61, 0x1a74,
+ 0x1aa7, 0x1aa7,
+ 0x1b00, 0x1b33,
+ 0x1b35, 0x1b43,
+ 0x1b45, 0x1b4b,
+ 0x1b80, 0x1ba9,
+ 0x1bae, 0x1baf,
+ 0x1c00, 0x1c35,
+ 0x1c4d, 0x1c4f,
+ 0x1c5a, 0x1c7d,
+ 0x1ce9, 0x1cec,
+ 0x1cee, 0x1cf2,
+ 0x1d00, 0x1dbf,
+ 0x1e00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x2090, 0x2094,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x212f, 0x2139,
+ 0x213c, 0x213f,
+ 0x2145, 0x2149,
+ 0x214e, 0x214e,
+ 0x2160, 0x2188,
+ 0x24b6, 0x24e9,
+ 0x2c00, 0x2c2e,
+ 0x2c30, 0x2c5e,
+ 0x2c60, 0x2ce4,
+ 0x2ceb, 0x2cee,
+ 0x2d00, 0x2d25,
+ 0x2d30, 0x2d65,
+ 0x2d6f, 0x2d6f,
+ 0x2d80, 0x2d96,
+ 0x2da0, 0x2da6,
+ 0x2da8, 0x2dae,
+ 0x2db0, 0x2db6,
+ 0x2db8, 0x2dbe,
+ 0x2dc0, 0x2dc6,
+ 0x2dc8, 0x2dce,
+ 0x2dd0, 0x2dd6,
+ 0x2dd8, 0x2dde,
+ 0x2de0, 0x2dff,
+ 0x2e2f, 0x2e2f,
+ 0x3005, 0x3007,
+ 0x3021, 0x3029,
+ 0x3031, 0x3035,
+ 0x3038, 0x303c,
+ 0x3041, 0x3096,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30fa,
+ 0x30fc, 0x30ff,
+ 0x3105, 0x312d,
+ 0x3131, 0x318e,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fcb,
+ 0xa000, 0xa48c,
+ 0xa4d0, 0xa4fd,
+ 0xa500, 0xa60c,
+ 0xa610, 0xa61f,
+ 0xa62a, 0xa62b,
+ 0xa640, 0xa65f,
+ 0xa662, 0xa66e,
+ 0xa67f, 0xa697,
+ 0xa6a0, 0xa6ef,
+ 0xa717, 0xa71f,
+ 0xa722, 0xa788,
+ 0xa78b, 0xa78c,
+ 0xa7fb, 0xa801,
+ 0xa803, 0xa805,
+ 0xa807, 0xa80a,
+ 0xa80c, 0xa827,
+ 0xa840, 0xa873,
+ 0xa880, 0xa8c3,
+ 0xa8f2, 0xa8f7,
+ 0xa8fb, 0xa8fb,
+ 0xa90a, 0xa92a,
+ 0xa930, 0xa952,
+ 0xa960, 0xa97c,
+ 0xa980, 0xa9bf,
+ 0xa9cf, 0xa9cf,
+ 0xaa00, 0xaa36,
+ 0xaa40, 0xaa4d,
+ 0xaa60, 0xaa76,
+ 0xaa7a, 0xaa7a,
+ 0xaa80, 0xaabe,
+ 0xaac0, 0xaac0,
+ 0xaac2, 0xaac2,
+ 0xaadb, 0xaadd,
+ 0xabc0, 0xabea,
+ 0xac00, 0xd7a3,
+ 0xd7b0, 0xd7c6,
+ 0xd7cb, 0xd7fb,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6d,
+ 0xfa70, 0xfad9,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff21, 0xff3a,
+ 0xff41, 0xff5a,
+ 0xff66, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10140, 0x10174,
+ 0x10280, 0x1029c,
+ 0x102a0, 0x102d0,
+ 0x10300, 0x1031e,
+ 0x10330, 0x1034a,
+ 0x10380, 0x1039d,
+ 0x103a0, 0x103c3,
+ 0x103c8, 0x103cf,
+ 0x103d1, 0x103d5,
+ 0x10400, 0x1049d,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x10855,
+ 0x10900, 0x10915,
+ 0x10920, 0x10939,
+ 0x10a00, 0x10a03,
+ 0x10a05, 0x10a06,
+ 0x10a0c, 0x10a13,
+ 0x10a15, 0x10a17,
+ 0x10a19, 0x10a33,
+ 0x10a60, 0x10a7c,
+ 0x10b00, 0x10b35,
+ 0x10b40, 0x10b55,
+ 0x10b60, 0x10b72,
+ 0x10c00, 0x10c48,
+ 0x11082, 0x110b8,
+ 0x12000, 0x1236e,
+ 0x12400, 0x12462,
+ 0x13000, 0x1342e,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a5,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7cb,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
+ 0x2f800, 0x2fa1d,
+}; /* CR_Alphabetic */
+
+/* 'Lowercase': Derived Property */
+static const OnigCodePoint CR_Lowercase[] = {
+ 602,
+ 0x0061, 0x007a,
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00ba, 0x00ba,
+ 0x00df, 0x00f6,
+ 0x00f8, 0x00ff,
+ 0x0101, 0x0101,
+ 0x0103, 0x0103,
+ 0x0105, 0x0105,
+ 0x0107, 0x0107,
+ 0x0109, 0x0109,
+ 0x010b, 0x010b,
+ 0x010d, 0x010d,
+ 0x010f, 0x010f,
+ 0x0111, 0x0111,
+ 0x0113, 0x0113,
+ 0x0115, 0x0115,
+ 0x0117, 0x0117,
+ 0x0119, 0x0119,
+ 0x011b, 0x011b,
+ 0x011d, 0x011d,
+ 0x011f, 0x011f,
+ 0x0121, 0x0121,
+ 0x0123, 0x0123,
+ 0x0125, 0x0125,
+ 0x0127, 0x0127,
+ 0x0129, 0x0129,
+ 0x012b, 0x012b,
+ 0x012d, 0x012d,
+ 0x012f, 0x012f,
+ 0x0131, 0x0131,
+ 0x0133, 0x0133,
+ 0x0135, 0x0135,
+ 0x0137, 0x0138,
+ 0x013a, 0x013a,
+ 0x013c, 0x013c,
+ 0x013e, 0x013e,
+ 0x0140, 0x0140,
+ 0x0142, 0x0142,
+ 0x0144, 0x0144,
+ 0x0146, 0x0146,
+ 0x0148, 0x0149,
+ 0x014b, 0x014b,
+ 0x014d, 0x014d,
+ 0x014f, 0x014f,
+ 0x0151, 0x0151,
+ 0x0153, 0x0153,
+ 0x0155, 0x0155,
+ 0x0157, 0x0157,
+ 0x0159, 0x0159,
+ 0x015b, 0x015b,
+ 0x015d, 0x015d,
+ 0x015f, 0x015f,
+ 0x0161, 0x0161,
+ 0x0163, 0x0163,
+ 0x0165, 0x0165,
+ 0x0167, 0x0167,
+ 0x0169, 0x0169,
+ 0x016b, 0x016b,
+ 0x016d, 0x016d,
+ 0x016f, 0x016f,
+ 0x0171, 0x0171,
+ 0x0173, 0x0173,
+ 0x0175, 0x0175,
+ 0x0177, 0x0177,
+ 0x017a, 0x017a,
+ 0x017c, 0x017c,
+ 0x017e, 0x0180,
+ 0x0183, 0x0183,
+ 0x0185, 0x0185,
+ 0x0188, 0x0188,
+ 0x018c, 0x018d,
+ 0x0192, 0x0192,
+ 0x0195, 0x0195,
+ 0x0199, 0x019b,
+ 0x019e, 0x019e,
+ 0x01a1, 0x01a1,
+ 0x01a3, 0x01a3,
+ 0x01a5, 0x01a5,
+ 0x01a8, 0x01a8,
+ 0x01aa, 0x01ab,
+ 0x01ad, 0x01ad,
+ 0x01b0, 0x01b0,
+ 0x01b4, 0x01b4,
+ 0x01b6, 0x01b6,
+ 0x01b9, 0x01ba,
+ 0x01bd, 0x01bf,
+ 0x01c6, 0x01c6,
+ 0x01c9, 0x01c9,
+ 0x01cc, 0x01cc,
+ 0x01ce, 0x01ce,
+ 0x01d0, 0x01d0,
+ 0x01d2, 0x01d2,
+ 0x01d4, 0x01d4,
+ 0x01d6, 0x01d6,
+ 0x01d8, 0x01d8,
+ 0x01da, 0x01da,
+ 0x01dc, 0x01dd,
+ 0x01df, 0x01df,
+ 0x01e1, 0x01e1,
+ 0x01e3, 0x01e3,
+ 0x01e5, 0x01e5,
+ 0x01e7, 0x01e7,
+ 0x01e9, 0x01e9,
+ 0x01eb, 0x01eb,
+ 0x01ed, 0x01ed,
+ 0x01ef, 0x01f0,
+ 0x01f3, 0x01f3,
+ 0x01f5, 0x01f5,
+ 0x01f9, 0x01f9,
+ 0x01fb, 0x01fb,
+ 0x01fd, 0x01fd,
+ 0x01ff, 0x01ff,
+ 0x0201, 0x0201,
+ 0x0203, 0x0203,
+ 0x0205, 0x0205,
+ 0x0207, 0x0207,
+ 0x0209, 0x0209,
+ 0x020b, 0x020b,
+ 0x020d, 0x020d,
+ 0x020f, 0x020f,
+ 0x0211, 0x0211,
+ 0x0213, 0x0213,
+ 0x0215, 0x0215,
+ 0x0217, 0x0217,
+ 0x0219, 0x0219,
+ 0x021b, 0x021b,
+ 0x021d, 0x021d,
+ 0x021f, 0x021f,
+ 0x0221, 0x0221,
+ 0x0223, 0x0223,
+ 0x0225, 0x0225,
+ 0x0227, 0x0227,
+ 0x0229, 0x0229,
+ 0x022b, 0x022b,
+ 0x022d, 0x022d,
+ 0x022f, 0x022f,
+ 0x0231, 0x0231,
+ 0x0233, 0x0239,
+ 0x023c, 0x023c,
+ 0x023f, 0x0240,
+ 0x0242, 0x0242,
+ 0x0247, 0x0247,
+ 0x0249, 0x0249,
+ 0x024b, 0x024b,
+ 0x024d, 0x024d,
+ 0x024f, 0x0293,
+ 0x0295, 0x02b8,
+ 0x02c0, 0x02c1,
+ 0x02e0, 0x02e4,
+ 0x0345, 0x0345,
+ 0x0371, 0x0371,
+ 0x0373, 0x0373,
+ 0x0377, 0x0377,
+ 0x037a, 0x037d,
+ 0x0390, 0x0390,
+ 0x03ac, 0x03ce,
+ 0x03d0, 0x03d1,
+ 0x03d5, 0x03d7,
+ 0x03d9, 0x03d9,
+ 0x03db, 0x03db,
+ 0x03dd, 0x03dd,
+ 0x03df, 0x03df,
+ 0x03e1, 0x03e1,
+ 0x03e3, 0x03e3,
+ 0x03e5, 0x03e5,
+ 0x03e7, 0x03e7,
+ 0x03e9, 0x03e9,
+ 0x03eb, 0x03eb,
+ 0x03ed, 0x03ed,
+ 0x03ef, 0x03f3,
+ 0x03f5, 0x03f5,
+ 0x03f8, 0x03f8,
+ 0x03fb, 0x03fc,
+ 0x0430, 0x045f,
+ 0x0461, 0x0461,
+ 0x0463, 0x0463,
+ 0x0465, 0x0465,
+ 0x0467, 0x0467,
+ 0x0469, 0x0469,
+ 0x046b, 0x046b,
+ 0x046d, 0x046d,
+ 0x046f, 0x046f,
+ 0x0471, 0x0471,
+ 0x0473, 0x0473,
+ 0x0475, 0x0475,
+ 0x0477, 0x0477,
+ 0x0479, 0x0479,
+ 0x047b, 0x047b,
+ 0x047d, 0x047d,
+ 0x047f, 0x047f,
+ 0x0481, 0x0481,
+ 0x048b, 0x048b,
+ 0x048d, 0x048d,
+ 0x048f, 0x048f,
+ 0x0491, 0x0491,
+ 0x0493, 0x0493,
+ 0x0495, 0x0495,
+ 0x0497, 0x0497,
+ 0x0499, 0x0499,
+ 0x049b, 0x049b,
+ 0x049d, 0x049d,
+ 0x049f, 0x049f,
+ 0x04a1, 0x04a1,
+ 0x04a3, 0x04a3,
+ 0x04a5, 0x04a5,
+ 0x04a7, 0x04a7,
+ 0x04a9, 0x04a9,
+ 0x04ab, 0x04ab,
+ 0x04ad, 0x04ad,
+ 0x04af, 0x04af,
+ 0x04b1, 0x04b1,
+ 0x04b3, 0x04b3,
+ 0x04b5, 0x04b5,
+ 0x04b7, 0x04b7,
+ 0x04b9, 0x04b9,
+ 0x04bb, 0x04bb,
+ 0x04bd, 0x04bd,
+ 0x04bf, 0x04bf,
+ 0x04c2, 0x04c2,
+ 0x04c4, 0x04c4,
+ 0x04c6, 0x04c6,
+ 0x04c8, 0x04c8,
+ 0x04ca, 0x04ca,
+ 0x04cc, 0x04cc,
+ 0x04ce, 0x04cf,
+ 0x04d1, 0x04d1,
+ 0x04d3, 0x04d3,
+ 0x04d5, 0x04d5,
+ 0x04d7, 0x04d7,
+ 0x04d9, 0x04d9,
+ 0x04db, 0x04db,
+ 0x04dd, 0x04dd,
+ 0x04df, 0x04df,
+ 0x04e1, 0x04e1,
+ 0x04e3, 0x04e3,
+ 0x04e5, 0x04e5,
+ 0x04e7, 0x04e7,
+ 0x04e9, 0x04e9,
+ 0x04eb, 0x04eb,
+ 0x04ed, 0x04ed,
+ 0x04ef, 0x04ef,
+ 0x04f1, 0x04f1,
+ 0x04f3, 0x04f3,
+ 0x04f5, 0x04f5,
+ 0x04f7, 0x04f7,
+ 0x04f9, 0x04f9,
+ 0x04fb, 0x04fb,
+ 0x04fd, 0x04fd,
+ 0x04ff, 0x04ff,
+ 0x0501, 0x0501,
+ 0x0503, 0x0503,
+ 0x0505, 0x0505,
+ 0x0507, 0x0507,
+ 0x0509, 0x0509,
+ 0x050b, 0x050b,
+ 0x050d, 0x050d,
+ 0x050f, 0x050f,
+ 0x0511, 0x0511,
+ 0x0513, 0x0513,
+ 0x0515, 0x0515,
+ 0x0517, 0x0517,
+ 0x0519, 0x0519,
+ 0x051b, 0x051b,
+ 0x051d, 0x051d,
+ 0x051f, 0x051f,
+ 0x0521, 0x0521,
+ 0x0523, 0x0523,
+ 0x0525, 0x0525,
+ 0x0561, 0x0587,
+ 0x1d00, 0x1dbf,
+ 0x1e01, 0x1e01,
+ 0x1e03, 0x1e03,
+ 0x1e05, 0x1e05,
+ 0x1e07, 0x1e07,
+ 0x1e09, 0x1e09,
+ 0x1e0b, 0x1e0b,
+ 0x1e0d, 0x1e0d,
+ 0x1e0f, 0x1e0f,
+ 0x1e11, 0x1e11,
+ 0x1e13, 0x1e13,
+ 0x1e15, 0x1e15,
+ 0x1e17, 0x1e17,
+ 0x1e19, 0x1e19,
+ 0x1e1b, 0x1e1b,
+ 0x1e1d, 0x1e1d,
+ 0x1e1f, 0x1e1f,
+ 0x1e21, 0x1e21,
+ 0x1e23, 0x1e23,
+ 0x1e25, 0x1e25,
+ 0x1e27, 0x1e27,
+ 0x1e29, 0x1e29,
+ 0x1e2b, 0x1e2b,
+ 0x1e2d, 0x1e2d,
+ 0x1e2f, 0x1e2f,
+ 0x1e31, 0x1e31,
+ 0x1e33, 0x1e33,
+ 0x1e35, 0x1e35,
+ 0x1e37, 0x1e37,
+ 0x1e39, 0x1e39,
+ 0x1e3b, 0x1e3b,
+ 0x1e3d, 0x1e3d,
+ 0x1e3f, 0x1e3f,
+ 0x1e41, 0x1e41,
+ 0x1e43, 0x1e43,
+ 0x1e45, 0x1e45,
+ 0x1e47, 0x1e47,
+ 0x1e49, 0x1e49,
+ 0x1e4b, 0x1e4b,
+ 0x1e4d, 0x1e4d,
+ 0x1e4f, 0x1e4f,
+ 0x1e51, 0x1e51,
+ 0x1e53, 0x1e53,
+ 0x1e55, 0x1e55,
+ 0x1e57, 0x1e57,
+ 0x1e59, 0x1e59,
+ 0x1e5b, 0x1e5b,
+ 0x1e5d, 0x1e5d,
+ 0x1e5f, 0x1e5f,
+ 0x1e61, 0x1e61,
+ 0x1e63, 0x1e63,
+ 0x1e65, 0x1e65,
+ 0x1e67, 0x1e67,
+ 0x1e69, 0x1e69,
+ 0x1e6b, 0x1e6b,
+ 0x1e6d, 0x1e6d,
+ 0x1e6f, 0x1e6f,
+ 0x1e71, 0x1e71,
+ 0x1e73, 0x1e73,
+ 0x1e75, 0x1e75,
+ 0x1e77, 0x1e77,
+ 0x1e79, 0x1e79,
+ 0x1e7b, 0x1e7b,
+ 0x1e7d, 0x1e7d,
+ 0x1e7f, 0x1e7f,
+ 0x1e81, 0x1e81,
+ 0x1e83, 0x1e83,
+ 0x1e85, 0x1e85,
+ 0x1e87, 0x1e87,
+ 0x1e89, 0x1e89,
+ 0x1e8b, 0x1e8b,
+ 0x1e8d, 0x1e8d,
+ 0x1e8f, 0x1e8f,
+ 0x1e91, 0x1e91,
+ 0x1e93, 0x1e93,
+ 0x1e95, 0x1e9d,
+ 0x1e9f, 0x1e9f,
+ 0x1ea1, 0x1ea1,
+ 0x1ea3, 0x1ea3,
+ 0x1ea5, 0x1ea5,
+ 0x1ea7, 0x1ea7,
+ 0x1ea9, 0x1ea9,
+ 0x1eab, 0x1eab,
+ 0x1ead, 0x1ead,
+ 0x1eaf, 0x1eaf,
+ 0x1eb1, 0x1eb1,
+ 0x1eb3, 0x1eb3,
+ 0x1eb5, 0x1eb5,
+ 0x1eb7, 0x1eb7,
+ 0x1eb9, 0x1eb9,
+ 0x1ebb, 0x1ebb,
+ 0x1ebd, 0x1ebd,
+ 0x1ebf, 0x1ebf,
+ 0x1ec1, 0x1ec1,
+ 0x1ec3, 0x1ec3,
+ 0x1ec5, 0x1ec5,
+ 0x1ec7, 0x1ec7,
+ 0x1ec9, 0x1ec9,
+ 0x1ecb, 0x1ecb,
+ 0x1ecd, 0x1ecd,
+ 0x1ecf, 0x1ecf,
+ 0x1ed1, 0x1ed1,
+ 0x1ed3, 0x1ed3,
+ 0x1ed5, 0x1ed5,
+ 0x1ed7, 0x1ed7,
+ 0x1ed9, 0x1ed9,
+ 0x1edb, 0x1edb,
+ 0x1edd, 0x1edd,
+ 0x1edf, 0x1edf,
+ 0x1ee1, 0x1ee1,
+ 0x1ee3, 0x1ee3,
+ 0x1ee5, 0x1ee5,
+ 0x1ee7, 0x1ee7,
+ 0x1ee9, 0x1ee9,
+ 0x1eeb, 0x1eeb,
+ 0x1eed, 0x1eed,
+ 0x1eef, 0x1eef,
+ 0x1ef1, 0x1ef1,
+ 0x1ef3, 0x1ef3,
+ 0x1ef5, 0x1ef5,
+ 0x1ef7, 0x1ef7,
+ 0x1ef9, 0x1ef9,
+ 0x1efb, 0x1efb,
+ 0x1efd, 0x1efd,
+ 0x1eff, 0x1f07,
+ 0x1f10, 0x1f15,
+ 0x1f20, 0x1f27,
+ 0x1f30, 0x1f37,
+ 0x1f40, 0x1f45,
+ 0x1f50, 0x1f57,
+ 0x1f60, 0x1f67,
+ 0x1f70, 0x1f7d,
+ 0x1f80, 0x1f87,
+ 0x1f90, 0x1f97,
+ 0x1fa0, 0x1fa7,
+ 0x1fb0, 0x1fb4,
+ 0x1fb6, 0x1fb7,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fc7,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fd7,
+ 0x1fe0, 0x1fe7,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ff7,
+ 0x2090, 0x2094,
+ 0x210a, 0x210a,
+ 0x210e, 0x210f,
+ 0x2113, 0x2113,
+ 0x212f, 0x212f,
+ 0x2134, 0x2134,
+ 0x2139, 0x2139,
+ 0x213c, 0x213d,
+ 0x2146, 0x2149,
+ 0x214e, 0x214e,
+ 0x2170, 0x217f,
+ 0x2184, 0x2184,
+ 0x24d0, 0x24e9,
+ 0x2c30, 0x2c5e,
+ 0x2c61, 0x2c61,
+ 0x2c65, 0x2c66,
+ 0x2c68, 0x2c68,
+ 0x2c6a, 0x2c6a,
+ 0x2c6c, 0x2c6c,
+ 0x2c71, 0x2c71,
+ 0x2c73, 0x2c74,
+ 0x2c76, 0x2c7d,
+ 0x2c81, 0x2c81,
+ 0x2c83, 0x2c83,
+ 0x2c85, 0x2c85,
+ 0x2c87, 0x2c87,
+ 0x2c89, 0x2c89,
+ 0x2c8b, 0x2c8b,
+ 0x2c8d, 0x2c8d,
+ 0x2c8f, 0x2c8f,
+ 0x2c91, 0x2c91,
+ 0x2c93, 0x2c93,
+ 0x2c95, 0x2c95,
+ 0x2c97, 0x2c97,
+ 0x2c99, 0x2c99,
+ 0x2c9b, 0x2c9b,
+ 0x2c9d, 0x2c9d,
+ 0x2c9f, 0x2c9f,
+ 0x2ca1, 0x2ca1,
+ 0x2ca3, 0x2ca3,
+ 0x2ca5, 0x2ca5,
+ 0x2ca7, 0x2ca7,
+ 0x2ca9, 0x2ca9,
+ 0x2cab, 0x2cab,
+ 0x2cad, 0x2cad,
+ 0x2caf, 0x2caf,
+ 0x2cb1, 0x2cb1,
+ 0x2cb3, 0x2cb3,
+ 0x2cb5, 0x2cb5,
+ 0x2cb7, 0x2cb7,
+ 0x2cb9, 0x2cb9,
+ 0x2cbb, 0x2cbb,
+ 0x2cbd, 0x2cbd,
+ 0x2cbf, 0x2cbf,
+ 0x2cc1, 0x2cc1,
+ 0x2cc3, 0x2cc3,
+ 0x2cc5, 0x2cc5,
+ 0x2cc7, 0x2cc7,
+ 0x2cc9, 0x2cc9,
+ 0x2ccb, 0x2ccb,
+ 0x2ccd, 0x2ccd,
+ 0x2ccf, 0x2ccf,
+ 0x2cd1, 0x2cd1,
+ 0x2cd3, 0x2cd3,
+ 0x2cd5, 0x2cd5,
+ 0x2cd7, 0x2cd7,
+ 0x2cd9, 0x2cd9,
+ 0x2cdb, 0x2cdb,
+ 0x2cdd, 0x2cdd,
+ 0x2cdf, 0x2cdf,
+ 0x2ce1, 0x2ce1,
+ 0x2ce3, 0x2ce4,
+ 0x2cec, 0x2cec,
+ 0x2cee, 0x2cee,
+ 0x2d00, 0x2d25,
+ 0xa641, 0xa641,
+ 0xa643, 0xa643,
+ 0xa645, 0xa645,
+ 0xa647, 0xa647,
+ 0xa649, 0xa649,
+ 0xa64b, 0xa64b,
+ 0xa64d, 0xa64d,
+ 0xa64f, 0xa64f,
+ 0xa651, 0xa651,
+ 0xa653, 0xa653,
+ 0xa655, 0xa655,
+ 0xa657, 0xa657,
+ 0xa659, 0xa659,
+ 0xa65b, 0xa65b,
+ 0xa65d, 0xa65d,
+ 0xa65f, 0xa65f,
+ 0xa663, 0xa663,
+ 0xa665, 0xa665,
+ 0xa667, 0xa667,
+ 0xa669, 0xa669,
+ 0xa66b, 0xa66b,
+ 0xa66d, 0xa66d,
+ 0xa681, 0xa681,
+ 0xa683, 0xa683,
+ 0xa685, 0xa685,
+ 0xa687, 0xa687,
+ 0xa689, 0xa689,
+ 0xa68b, 0xa68b,
+ 0xa68d, 0xa68d,
+ 0xa68f, 0xa68f,
+ 0xa691, 0xa691,
+ 0xa693, 0xa693,
+ 0xa695, 0xa695,
+ 0xa697, 0xa697,
+ 0xa723, 0xa723,
+ 0xa725, 0xa725,
+ 0xa727, 0xa727,
+ 0xa729, 0xa729,
+ 0xa72b, 0xa72b,
+ 0xa72d, 0xa72d,
+ 0xa72f, 0xa731,
+ 0xa733, 0xa733,
+ 0xa735, 0xa735,
+ 0xa737, 0xa737,
+ 0xa739, 0xa739,
+ 0xa73b, 0xa73b,
+ 0xa73d, 0xa73d,
+ 0xa73f, 0xa73f,
+ 0xa741, 0xa741,
+ 0xa743, 0xa743,
+ 0xa745, 0xa745,
+ 0xa747, 0xa747,
+ 0xa749, 0xa749,
+ 0xa74b, 0xa74b,
+ 0xa74d, 0xa74d,
+ 0xa74f, 0xa74f,
+ 0xa751, 0xa751,
+ 0xa753, 0xa753,
+ 0xa755, 0xa755,
+ 0xa757, 0xa757,
+ 0xa759, 0xa759,
+ 0xa75b, 0xa75b,
+ 0xa75d, 0xa75d,
+ 0xa75f, 0xa75f,
+ 0xa761, 0xa761,
+ 0xa763, 0xa763,
+ 0xa765, 0xa765,
+ 0xa767, 0xa767,
+ 0xa769, 0xa769,
+ 0xa76b, 0xa76b,
+ 0xa76d, 0xa76d,
+ 0xa76f, 0xa778,
+ 0xa77a, 0xa77a,
+ 0xa77c, 0xa77c,
+ 0xa77f, 0xa77f,
+ 0xa781, 0xa781,
+ 0xa783, 0xa783,
+ 0xa785, 0xa785,
+ 0xa787, 0xa787,
+ 0xa78c, 0xa78c,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xff41, 0xff5a,
+ 0x10428, 0x1044f,
+ 0x1d41a, 0x1d433,
+ 0x1d44e, 0x1d454,
+ 0x1d456, 0x1d467,
+ 0x1d482, 0x1d49b,
+ 0x1d4b6, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d4cf,
+ 0x1d4ea, 0x1d503,
+ 0x1d51e, 0x1d537,
+ 0x1d552, 0x1d56b,
+ 0x1d586, 0x1d59f,
+ 0x1d5ba, 0x1d5d3,
+ 0x1d5ee, 0x1d607,
+ 0x1d622, 0x1d63b,
+ 0x1d656, 0x1d66f,
+ 0x1d68a, 0x1d6a5,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6e1,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d71b,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d755,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d78f,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9,
+ 0x1d7cb, 0x1d7cb,
+}; /* CR_Lowercase */
+
+/* 'Uppercase': Derived Property */
+static const OnigCodePoint CR_Uppercase[] = {
+ 596,
+ 0x0041, 0x005a,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00de,
+ 0x0100, 0x0100,
+ 0x0102, 0x0102,
+ 0x0104, 0x0104,
+ 0x0106, 0x0106,
+ 0x0108, 0x0108,
+ 0x010a, 0x010a,
+ 0x010c, 0x010c,
+ 0x010e, 0x010e,
+ 0x0110, 0x0110,
+ 0x0112, 0x0112,
+ 0x0114, 0x0114,
+ 0x0116, 0x0116,
+ 0x0118, 0x0118,
+ 0x011a, 0x011a,
+ 0x011c, 0x011c,
+ 0x011e, 0x011e,
+ 0x0120, 0x0120,
+ 0x0122, 0x0122,
+ 0x0124, 0x0124,
+ 0x0126, 0x0126,
+ 0x0128, 0x0128,
+ 0x012a, 0x012a,
+ 0x012c, 0x012c,
+ 0x012e, 0x012e,
+ 0x0130, 0x0130,
+ 0x0132, 0x0132,
+ 0x0134, 0x0134,
+ 0x0136, 0x0136,
+ 0x0139, 0x0139,
+ 0x013b, 0x013b,
+ 0x013d, 0x013d,
+ 0x013f, 0x013f,
+ 0x0141, 0x0141,
+ 0x0143, 0x0143,
+ 0x0145, 0x0145,
+ 0x0147, 0x0147,
+ 0x014a, 0x014a,
+ 0x014c, 0x014c,
+ 0x014e, 0x014e,
+ 0x0150, 0x0150,
+ 0x0152, 0x0152,
+ 0x0154, 0x0154,
+ 0x0156, 0x0156,
+ 0x0158, 0x0158,
+ 0x015a, 0x015a,
+ 0x015c, 0x015c,
+ 0x015e, 0x015e,
+ 0x0160, 0x0160,
+ 0x0162, 0x0162,
+ 0x0164, 0x0164,
+ 0x0166, 0x0166,
+ 0x0168, 0x0168,
+ 0x016a, 0x016a,
+ 0x016c, 0x016c,
+ 0x016e, 0x016e,
+ 0x0170, 0x0170,
+ 0x0172, 0x0172,
+ 0x0174, 0x0174,
+ 0x0176, 0x0176,
+ 0x0178, 0x0179,
+ 0x017b, 0x017b,
+ 0x017d, 0x017d,
+ 0x0181, 0x0182,
+ 0x0184, 0x0184,
+ 0x0186, 0x0187,
+ 0x0189, 0x018b,
+ 0x018e, 0x0191,
+ 0x0193, 0x0194,
+ 0x0196, 0x0198,
+ 0x019c, 0x019d,
+ 0x019f, 0x01a0,
+ 0x01a2, 0x01a2,
+ 0x01a4, 0x01a4,
+ 0x01a6, 0x01a7,
+ 0x01a9, 0x01a9,
+ 0x01ac, 0x01ac,
+ 0x01ae, 0x01af,
+ 0x01b1, 0x01b3,
+ 0x01b5, 0x01b5,
+ 0x01b7, 0x01b8,
+ 0x01bc, 0x01bc,
+ 0x01c4, 0x01c4,
+ 0x01c7, 0x01c7,
+ 0x01ca, 0x01ca,
+ 0x01cd, 0x01cd,
+ 0x01cf, 0x01cf,
+ 0x01d1, 0x01d1,
+ 0x01d3, 0x01d3,
+ 0x01d5, 0x01d5,
+ 0x01d7, 0x01d7,
+ 0x01d9, 0x01d9,
+ 0x01db, 0x01db,
+ 0x01de, 0x01de,
+ 0x01e0, 0x01e0,
+ 0x01e2, 0x01e2,
+ 0x01e4, 0x01e4,
+ 0x01e6, 0x01e6,
+ 0x01e8, 0x01e8,
+ 0x01ea, 0x01ea,
+ 0x01ec, 0x01ec,
+ 0x01ee, 0x01ee,
+ 0x01f1, 0x01f1,
+ 0x01f4, 0x01f4,
+ 0x01f6, 0x01f8,
+ 0x01fa, 0x01fa,
+ 0x01fc, 0x01fc,
+ 0x01fe, 0x01fe,
+ 0x0200, 0x0200,
+ 0x0202, 0x0202,
+ 0x0204, 0x0204,
+ 0x0206, 0x0206,
+ 0x0208, 0x0208,
+ 0x020a, 0x020a,
+ 0x020c, 0x020c,
+ 0x020e, 0x020e,
+ 0x0210, 0x0210,
+ 0x0212, 0x0212,
+ 0x0214, 0x0214,
+ 0x0216, 0x0216,
+ 0x0218, 0x0218,
+ 0x021a, 0x021a,
+ 0x021c, 0x021c,
+ 0x021e, 0x021e,
+ 0x0220, 0x0220,
+ 0x0222, 0x0222,
+ 0x0224, 0x0224,
+ 0x0226, 0x0226,
+ 0x0228, 0x0228,
+ 0x022a, 0x022a,
+ 0x022c, 0x022c,
+ 0x022e, 0x022e,
+ 0x0230, 0x0230,
+ 0x0232, 0x0232,
+ 0x023a, 0x023b,
+ 0x023d, 0x023e,
+ 0x0241, 0x0241,
+ 0x0243, 0x0246,
+ 0x0248, 0x0248,
+ 0x024a, 0x024a,
+ 0x024c, 0x024c,
+ 0x024e, 0x024e,
+ 0x0370, 0x0370,
+ 0x0372, 0x0372,
+ 0x0376, 0x0376,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x038f,
+ 0x0391, 0x03a1,
+ 0x03a3, 0x03ab,
+ 0x03cf, 0x03cf,
+ 0x03d2, 0x03d4,
+ 0x03d8, 0x03d8,
+ 0x03da, 0x03da,
+ 0x03dc, 0x03dc,
+ 0x03de, 0x03de,
+ 0x03e0, 0x03e0,
+ 0x03e2, 0x03e2,
+ 0x03e4, 0x03e4,
+ 0x03e6, 0x03e6,
+ 0x03e8, 0x03e8,
+ 0x03ea, 0x03ea,
+ 0x03ec, 0x03ec,
+ 0x03ee, 0x03ee,
+ 0x03f4, 0x03f4,
+ 0x03f7, 0x03f7,
+ 0x03f9, 0x03fa,
+ 0x03fd, 0x042f,
+ 0x0460, 0x0460,
+ 0x0462, 0x0462,
+ 0x0464, 0x0464,
+ 0x0466, 0x0466,
+ 0x0468, 0x0468,
+ 0x046a, 0x046a,
+ 0x046c, 0x046c,
+ 0x046e, 0x046e,
+ 0x0470, 0x0470,
+ 0x0472, 0x0472,
+ 0x0474, 0x0474,
+ 0x0476, 0x0476,
+ 0x0478, 0x0478,
+ 0x047a, 0x047a,
+ 0x047c, 0x047c,
+ 0x047e, 0x047e,
+ 0x0480, 0x0480,
+ 0x048a, 0x048a,
+ 0x048c, 0x048c,
+ 0x048e, 0x048e,
+ 0x0490, 0x0490,
+ 0x0492, 0x0492,
+ 0x0494, 0x0494,
+ 0x0496, 0x0496,
+ 0x0498, 0x0498,
+ 0x049a, 0x049a,
+ 0x049c, 0x049c,
+ 0x049e, 0x049e,
+ 0x04a0, 0x04a0,
+ 0x04a2, 0x04a2,
+ 0x04a4, 0x04a4,
+ 0x04a6, 0x04a6,
+ 0x04a8, 0x04a8,
+ 0x04aa, 0x04aa,
+ 0x04ac, 0x04ac,
+ 0x04ae, 0x04ae,
+ 0x04b0, 0x04b0,
+ 0x04b2, 0x04b2,
+ 0x04b4, 0x04b4,
+ 0x04b6, 0x04b6,
+ 0x04b8, 0x04b8,
+ 0x04ba, 0x04ba,
+ 0x04bc, 0x04bc,
+ 0x04be, 0x04be,
+ 0x04c0, 0x04c1,
+ 0x04c3, 0x04c3,
+ 0x04c5, 0x04c5,
+ 0x04c7, 0x04c7,
+ 0x04c9, 0x04c9,
+ 0x04cb, 0x04cb,
+ 0x04cd, 0x04cd,
+ 0x04d0, 0x04d0,
+ 0x04d2, 0x04d2,
+ 0x04d4, 0x04d4,
+ 0x04d6, 0x04d6,
+ 0x04d8, 0x04d8,
+ 0x04da, 0x04da,
+ 0x04dc, 0x04dc,
+ 0x04de, 0x04de,
+ 0x04e0, 0x04e0,
+ 0x04e2, 0x04e2,
+ 0x04e4, 0x04e4,
+ 0x04e6, 0x04e6,
+ 0x04e8, 0x04e8,
+ 0x04ea, 0x04ea,
+ 0x04ec, 0x04ec,
+ 0x04ee, 0x04ee,
+ 0x04f0, 0x04f0,
+ 0x04f2, 0x04f2,
+ 0x04f4, 0x04f4,
+ 0x04f6, 0x04f6,
+ 0x04f8, 0x04f8,
+ 0x04fa, 0x04fa,
+ 0x04fc, 0x04fc,
+ 0x04fe, 0x04fe,
+ 0x0500, 0x0500,
+ 0x0502, 0x0502,
+ 0x0504, 0x0504,
+ 0x0506, 0x0506,
+ 0x0508, 0x0508,
+ 0x050a, 0x050a,
+ 0x050c, 0x050c,
+ 0x050e, 0x050e,
+ 0x0510, 0x0510,
+ 0x0512, 0x0512,
+ 0x0514, 0x0514,
+ 0x0516, 0x0516,
+ 0x0518, 0x0518,
+ 0x051a, 0x051a,
+ 0x051c, 0x051c,
+ 0x051e, 0x051e,
+ 0x0520, 0x0520,
+ 0x0522, 0x0522,
+ 0x0524, 0x0524,
+ 0x0531, 0x0556,
+ 0x10a0, 0x10c5,
+ 0x1e00, 0x1e00,
+ 0x1e02, 0x1e02,
+ 0x1e04, 0x1e04,
+ 0x1e06, 0x1e06,
+ 0x1e08, 0x1e08,
+ 0x1e0a, 0x1e0a,
+ 0x1e0c, 0x1e0c,
+ 0x1e0e, 0x1e0e,
+ 0x1e10, 0x1e10,
+ 0x1e12, 0x1e12,
+ 0x1e14, 0x1e14,
+ 0x1e16, 0x1e16,
+ 0x1e18, 0x1e18,
+ 0x1e1a, 0x1e1a,
+ 0x1e1c, 0x1e1c,
+ 0x1e1e, 0x1e1e,
+ 0x1e20, 0x1e20,
+ 0x1e22, 0x1e22,
+ 0x1e24, 0x1e24,
+ 0x1e26, 0x1e26,
+ 0x1e28, 0x1e28,
+ 0x1e2a, 0x1e2a,
+ 0x1e2c, 0x1e2c,
+ 0x1e2e, 0x1e2e,
+ 0x1e30, 0x1e30,
+ 0x1e32, 0x1e32,
+ 0x1e34, 0x1e34,
+ 0x1e36, 0x1e36,
+ 0x1e38, 0x1e38,
+ 0x1e3a, 0x1e3a,
+ 0x1e3c, 0x1e3c,
+ 0x1e3e, 0x1e3e,
+ 0x1e40, 0x1e40,
+ 0x1e42, 0x1e42,
+ 0x1e44, 0x1e44,
+ 0x1e46, 0x1e46,
+ 0x1e48, 0x1e48,
+ 0x1e4a, 0x1e4a,
+ 0x1e4c, 0x1e4c,
+ 0x1e4e, 0x1e4e,
+ 0x1e50, 0x1e50,
+ 0x1e52, 0x1e52,
+ 0x1e54, 0x1e54,
+ 0x1e56, 0x1e56,
+ 0x1e58, 0x1e58,
+ 0x1e5a, 0x1e5a,
+ 0x1e5c, 0x1e5c,
+ 0x1e5e, 0x1e5e,
+ 0x1e60, 0x1e60,
+ 0x1e62, 0x1e62,
+ 0x1e64, 0x1e64,
+ 0x1e66, 0x1e66,
+ 0x1e68, 0x1e68,
+ 0x1e6a, 0x1e6a,
+ 0x1e6c, 0x1e6c,
+ 0x1e6e, 0x1e6e,
+ 0x1e70, 0x1e70,
+ 0x1e72, 0x1e72,
+ 0x1e74, 0x1e74,
+ 0x1e76, 0x1e76,
+ 0x1e78, 0x1e78,
+ 0x1e7a, 0x1e7a,
+ 0x1e7c, 0x1e7c,
+ 0x1e7e, 0x1e7e,
+ 0x1e80, 0x1e80,
+ 0x1e82, 0x1e82,
+ 0x1e84, 0x1e84,
+ 0x1e86, 0x1e86,
+ 0x1e88, 0x1e88,
+ 0x1e8a, 0x1e8a,
+ 0x1e8c, 0x1e8c,
+ 0x1e8e, 0x1e8e,
+ 0x1e90, 0x1e90,
+ 0x1e92, 0x1e92,
+ 0x1e94, 0x1e94,
+ 0x1e9e, 0x1e9e,
+ 0x1ea0, 0x1ea0,
+ 0x1ea2, 0x1ea2,
+ 0x1ea4, 0x1ea4,
+ 0x1ea6, 0x1ea6,
+ 0x1ea8, 0x1ea8,
+ 0x1eaa, 0x1eaa,
+ 0x1eac, 0x1eac,
+ 0x1eae, 0x1eae,
+ 0x1eb0, 0x1eb0,
+ 0x1eb2, 0x1eb2,
+ 0x1eb4, 0x1eb4,
+ 0x1eb6, 0x1eb6,
+ 0x1eb8, 0x1eb8,
+ 0x1eba, 0x1eba,
+ 0x1ebc, 0x1ebc,
+ 0x1ebe, 0x1ebe,
+ 0x1ec0, 0x1ec0,
+ 0x1ec2, 0x1ec2,
+ 0x1ec4, 0x1ec4,
+ 0x1ec6, 0x1ec6,
+ 0x1ec8, 0x1ec8,
+ 0x1eca, 0x1eca,
+ 0x1ecc, 0x1ecc,
+ 0x1ece, 0x1ece,
+ 0x1ed0, 0x1ed0,
+ 0x1ed2, 0x1ed2,
+ 0x1ed4, 0x1ed4,
+ 0x1ed6, 0x1ed6,
+ 0x1ed8, 0x1ed8,
+ 0x1eda, 0x1eda,
+ 0x1edc, 0x1edc,
+ 0x1ede, 0x1ede,
+ 0x1ee0, 0x1ee0,
+ 0x1ee2, 0x1ee2,
+ 0x1ee4, 0x1ee4,
+ 0x1ee6, 0x1ee6,
+ 0x1ee8, 0x1ee8,
+ 0x1eea, 0x1eea,
+ 0x1eec, 0x1eec,
+ 0x1eee, 0x1eee,
+ 0x1ef0, 0x1ef0,
+ 0x1ef2, 0x1ef2,
+ 0x1ef4, 0x1ef4,
+ 0x1ef6, 0x1ef6,
+ 0x1ef8, 0x1ef8,
+ 0x1efa, 0x1efa,
+ 0x1efc, 0x1efc,
+ 0x1efe, 0x1efe,
+ 0x1f08, 0x1f0f,
+ 0x1f18, 0x1f1d,
+ 0x1f28, 0x1f2f,
+ 0x1f38, 0x1f3f,
+ 0x1f48, 0x1f4d,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f5f,
+ 0x1f68, 0x1f6f,
+ 0x1fb8, 0x1fbb,
+ 0x1fc8, 0x1fcb,
+ 0x1fd8, 0x1fdb,
+ 0x1fe8, 0x1fec,
+ 0x1ff8, 0x1ffb,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210b, 0x210d,
+ 0x2110, 0x2112,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x2130, 0x2133,
+ 0x213e, 0x213f,
+ 0x2145, 0x2145,
+ 0x2160, 0x216f,
+ 0x2183, 0x2183,
+ 0x24b6, 0x24cf,
+ 0x2c00, 0x2c2e,
+ 0x2c60, 0x2c60,
+ 0x2c62, 0x2c64,
+ 0x2c67, 0x2c67,
+ 0x2c69, 0x2c69,
+ 0x2c6b, 0x2c6b,
+ 0x2c6d, 0x2c70,
+ 0x2c72, 0x2c72,
+ 0x2c75, 0x2c75,
+ 0x2c7e, 0x2c80,
+ 0x2c82, 0x2c82,
+ 0x2c84, 0x2c84,
+ 0x2c86, 0x2c86,
+ 0x2c88, 0x2c88,
+ 0x2c8a, 0x2c8a,
+ 0x2c8c, 0x2c8c,
+ 0x2c8e, 0x2c8e,
+ 0x2c90, 0x2c90,
+ 0x2c92, 0x2c92,
+ 0x2c94, 0x2c94,
+ 0x2c96, 0x2c96,
+ 0x2c98, 0x2c98,
+ 0x2c9a, 0x2c9a,
+ 0x2c9c, 0x2c9c,
+ 0x2c9e, 0x2c9e,
+ 0x2ca0, 0x2ca0,
+ 0x2ca2, 0x2ca2,
+ 0x2ca4, 0x2ca4,
+ 0x2ca6, 0x2ca6,
+ 0x2ca8, 0x2ca8,
+ 0x2caa, 0x2caa,
+ 0x2cac, 0x2cac,
+ 0x2cae, 0x2cae,
+ 0x2cb0, 0x2cb0,
+ 0x2cb2, 0x2cb2,
+ 0x2cb4, 0x2cb4,
+ 0x2cb6, 0x2cb6,
+ 0x2cb8, 0x2cb8,
+ 0x2cba, 0x2cba,
+ 0x2cbc, 0x2cbc,
+ 0x2cbe, 0x2cbe,
+ 0x2cc0, 0x2cc0,
+ 0x2cc2, 0x2cc2,
+ 0x2cc4, 0x2cc4,
+ 0x2cc6, 0x2cc6,
+ 0x2cc8, 0x2cc8,
+ 0x2cca, 0x2cca,
+ 0x2ccc, 0x2ccc,
+ 0x2cce, 0x2cce,
+ 0x2cd0, 0x2cd0,
+ 0x2cd2, 0x2cd2,
+ 0x2cd4, 0x2cd4,
+ 0x2cd6, 0x2cd6,
+ 0x2cd8, 0x2cd8,
+ 0x2cda, 0x2cda,
+ 0x2cdc, 0x2cdc,
+ 0x2cde, 0x2cde,
+ 0x2ce0, 0x2ce0,
+ 0x2ce2, 0x2ce2,
+ 0x2ceb, 0x2ceb,
+ 0x2ced, 0x2ced,
+ 0xa640, 0xa640,
+ 0xa642, 0xa642,
+ 0xa644, 0xa644,
+ 0xa646, 0xa646,
+ 0xa648, 0xa648,
+ 0xa64a, 0xa64a,
+ 0xa64c, 0xa64c,
+ 0xa64e, 0xa64e,
+ 0xa650, 0xa650,
+ 0xa652, 0xa652,
+ 0xa654, 0xa654,
+ 0xa656, 0xa656,
+ 0xa658, 0xa658,
+ 0xa65a, 0xa65a,
+ 0xa65c, 0xa65c,
+ 0xa65e, 0xa65e,
+ 0xa662, 0xa662,
+ 0xa664, 0xa664,
+ 0xa666, 0xa666,
+ 0xa668, 0xa668,
+ 0xa66a, 0xa66a,
+ 0xa66c, 0xa66c,
+ 0xa680, 0xa680,
+ 0xa682, 0xa682,
+ 0xa684, 0xa684,
+ 0xa686, 0xa686,
+ 0xa688, 0xa688,
+ 0xa68a, 0xa68a,
+ 0xa68c, 0xa68c,
+ 0xa68e, 0xa68e,
+ 0xa690, 0xa690,
+ 0xa692, 0xa692,
+ 0xa694, 0xa694,
+ 0xa696, 0xa696,
+ 0xa722, 0xa722,
+ 0xa724, 0xa724,
+ 0xa726, 0xa726,
+ 0xa728, 0xa728,
+ 0xa72a, 0xa72a,
+ 0xa72c, 0xa72c,
+ 0xa72e, 0xa72e,
+ 0xa732, 0xa732,
+ 0xa734, 0xa734,
+ 0xa736, 0xa736,
+ 0xa738, 0xa738,
+ 0xa73a, 0xa73a,
+ 0xa73c, 0xa73c,
+ 0xa73e, 0xa73e,
+ 0xa740, 0xa740,
+ 0xa742, 0xa742,
+ 0xa744, 0xa744,
+ 0xa746, 0xa746,
+ 0xa748, 0xa748,
+ 0xa74a, 0xa74a,
+ 0xa74c, 0xa74c,
+ 0xa74e, 0xa74e,
+ 0xa750, 0xa750,
+ 0xa752, 0xa752,
+ 0xa754, 0xa754,
+ 0xa756, 0xa756,
+ 0xa758, 0xa758,
+ 0xa75a, 0xa75a,
+ 0xa75c, 0xa75c,
+ 0xa75e, 0xa75e,
+ 0xa760, 0xa760,
+ 0xa762, 0xa762,
+ 0xa764, 0xa764,
+ 0xa766, 0xa766,
+ 0xa768, 0xa768,
+ 0xa76a, 0xa76a,
+ 0xa76c, 0xa76c,
+ 0xa76e, 0xa76e,
+ 0xa779, 0xa779,
+ 0xa77b, 0xa77b,
+ 0xa77d, 0xa77e,
+ 0xa780, 0xa780,
+ 0xa782, 0xa782,
+ 0xa784, 0xa784,
+ 0xa786, 0xa786,
+ 0xa78b, 0xa78b,
+ 0xff21, 0xff3a,
+ 0x10400, 0x10427,
+ 0x1d400, 0x1d419,
+ 0x1d434, 0x1d44d,
+ 0x1d468, 0x1d481,
+ 0x1d49c, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b5,
+ 0x1d4d0, 0x1d4e9,
+ 0x1d504, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d538, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d56c, 0x1d585,
+ 0x1d5a0, 0x1d5b9,
+ 0x1d5d4, 0x1d5ed,
+ 0x1d608, 0x1d621,
+ 0x1d63c, 0x1d655,
+ 0x1d670, 0x1d689,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6e2, 0x1d6fa,
+ 0x1d71c, 0x1d734,
+ 0x1d756, 0x1d76e,
+ 0x1d790, 0x1d7a8,
+ 0x1d7ca, 0x1d7ca,
+}; /* CR_Uppercase */
+
+/* 'Cased': Derived Property */
+static const OnigCodePoint CR_Cased[] = {
+ 110,
+ 0x0041, 0x005a,
+ 0x0061, 0x007a,
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00ba, 0x00ba,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+ 0x00f8, 0x01ba,
+ 0x01bc, 0x01bf,
+ 0x01c4, 0x0293,
+ 0x0295, 0x02b8,
+ 0x02c0, 0x02c1,
+ 0x02e0, 0x02e4,
+ 0x0345, 0x0345,
+ 0x0370, 0x0373,
+ 0x0376, 0x0377,
+ 0x037a, 0x037d,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03f5,
+ 0x03f7, 0x0481,
+ 0x048a, 0x0525,
+ 0x0531, 0x0556,
+ 0x0561, 0x0587,
+ 0x10a0, 0x10c5,
+ 0x1d00, 0x1dbf,
+ 0x1e00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x2090, 0x2094,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x212f, 0x2134,
+ 0x2139, 0x2139,
+ 0x213c, 0x213f,
+ 0x2145, 0x2149,
+ 0x214e, 0x214e,
+ 0x2160, 0x217f,
+ 0x2183, 0x2184,
+ 0x24b6, 0x24e9,
+ 0x2c00, 0x2c2e,
+ 0x2c30, 0x2c5e,
+ 0x2c60, 0x2ce4,
+ 0x2ceb, 0x2cee,
+ 0x2d00, 0x2d25,
+ 0xa640, 0xa65f,
+ 0xa662, 0xa66d,
+ 0xa680, 0xa697,
+ 0xa722, 0xa787,
+ 0xa78b, 0xa78c,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xff21, 0xff3a,
+ 0xff41, 0xff5a,
+ 0x10400, 0x1044f,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a5,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7cb,
+}; /* CR_Cased */
+
+/* 'Case_Ignorable': Derived Property */
+static const OnigCodePoint CR_Case_Ignorable[] = {
+ 266,
+ 0x0027, 0x0027,
+ 0x002e, 0x002e,
+ 0x003a, 0x003a,
+ 0x005e, 0x005e,
+ 0x0060, 0x0060,
+ 0x00a8, 0x00a8,
+ 0x00ad, 0x00ad,
+ 0x00af, 0x00af,
+ 0x00b4, 0x00b4,
+ 0x00b7, 0x00b8,
+ 0x02b0, 0x036f,
+ 0x0374, 0x0375,
+ 0x037a, 0x037a,
+ 0x0384, 0x0385,
+ 0x0387, 0x0387,
+ 0x0483, 0x0489,
+ 0x0559, 0x0559,
+ 0x0591, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c5,
+ 0x05c7, 0x05c7,
+ 0x05f4, 0x05f4,
+ 0x0600, 0x0603,
+ 0x0610, 0x061a,
+ 0x0640, 0x0640,
+ 0x064b, 0x065e,
+ 0x0670, 0x0670,
+ 0x06d6, 0x06e8,
+ 0x06ea, 0x06ed,
+ 0x070f, 0x070f,
+ 0x0711, 0x0711,
+ 0x0730, 0x074a,
+ 0x07a6, 0x07b0,
+ 0x07eb, 0x07f5,
+ 0x07fa, 0x07fa,
+ 0x0816, 0x082d,
+ 0x0900, 0x0902,
+ 0x093c, 0x093c,
+ 0x0941, 0x0948,
+ 0x094d, 0x094d,
+ 0x0951, 0x0955,
+ 0x0962, 0x0963,
+ 0x0971, 0x0971,
+ 0x0981, 0x0981,
+ 0x09bc, 0x09bc,
+ 0x09c1, 0x09c4,
+ 0x09cd, 0x09cd,
+ 0x09e2, 0x09e3,
+ 0x0a01, 0x0a02,
+ 0x0a3c, 0x0a3c,
+ 0x0a41, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a51, 0x0a51,
+ 0x0a70, 0x0a71,
+ 0x0a75, 0x0a75,
+ 0x0a81, 0x0a82,
+ 0x0abc, 0x0abc,
+ 0x0ac1, 0x0ac5,
+ 0x0ac7, 0x0ac8,
+ 0x0acd, 0x0acd,
+ 0x0ae2, 0x0ae3,
+ 0x0b01, 0x0b01,
+ 0x0b3c, 0x0b3c,
+ 0x0b3f, 0x0b3f,
+ 0x0b41, 0x0b44,
+ 0x0b4d, 0x0b4d,
+ 0x0b56, 0x0b56,
+ 0x0b62, 0x0b63,
+ 0x0b82, 0x0b82,
+ 0x0bc0, 0x0bc0,
+ 0x0bcd, 0x0bcd,
+ 0x0c3e, 0x0c40,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c62, 0x0c63,
+ 0x0cbc, 0x0cbc,
+ 0x0cbf, 0x0cbf,
+ 0x0cc6, 0x0cc6,
+ 0x0ccc, 0x0ccd,
+ 0x0ce2, 0x0ce3,
+ 0x0d41, 0x0d44,
+ 0x0d4d, 0x0d4d,
+ 0x0d62, 0x0d63,
+ 0x0dca, 0x0dca,
+ 0x0dd2, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0e31, 0x0e31,
+ 0x0e34, 0x0e3a,
+ 0x0e46, 0x0e4e,
+ 0x0eb1, 0x0eb1,
+ 0x0eb4, 0x0eb9,
+ 0x0ebb, 0x0ebc,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0f18, 0x0f19,
+ 0x0f35, 0x0f35,
+ 0x0f37, 0x0f37,
+ 0x0f39, 0x0f39,
+ 0x0f71, 0x0f7e,
+ 0x0f80, 0x0f84,
+ 0x0f86, 0x0f87,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fc6, 0x0fc6,
+ 0x102d, 0x1030,
+ 0x1032, 0x1037,
+ 0x1039, 0x103a,
+ 0x103d, 0x103e,
+ 0x1058, 0x1059,
+ 0x105e, 0x1060,
+ 0x1071, 0x1074,
+ 0x1082, 0x1082,
+ 0x1085, 0x1086,
+ 0x108d, 0x108d,
+ 0x109d, 0x109d,
+ 0x10fc, 0x10fc,
+ 0x135f, 0x135f,
+ 0x1712, 0x1714,
+ 0x1732, 0x1734,
+ 0x1752, 0x1753,
+ 0x1772, 0x1773,
+ 0x17b4, 0x17b5,
+ 0x17b7, 0x17bd,
+ 0x17c6, 0x17c6,
+ 0x17c9, 0x17d3,
+ 0x17d7, 0x17d7,
+ 0x17dd, 0x17dd,
+ 0x180b, 0x180d,
+ 0x1843, 0x1843,
+ 0x18a9, 0x18a9,
+ 0x1920, 0x1922,
+ 0x1927, 0x1928,
+ 0x1932, 0x1932,
+ 0x1939, 0x193b,
+ 0x1a17, 0x1a18,
+ 0x1a56, 0x1a56,
+ 0x1a58, 0x1a5e,
+ 0x1a60, 0x1a60,
+ 0x1a62, 0x1a62,
+ 0x1a65, 0x1a6c,
+ 0x1a73, 0x1a7c,
+ 0x1a7f, 0x1a7f,
+ 0x1aa7, 0x1aa7,
+ 0x1b00, 0x1b03,
+ 0x1b34, 0x1b34,
+ 0x1b36, 0x1b3a,
+ 0x1b3c, 0x1b3c,
+ 0x1b42, 0x1b42,
+ 0x1b6b, 0x1b73,
+ 0x1b80, 0x1b81,
+ 0x1ba2, 0x1ba5,
+ 0x1ba8, 0x1ba9,
+ 0x1c2c, 0x1c33,
+ 0x1c36, 0x1c37,
+ 0x1c78, 0x1c7d,
+ 0x1cd0, 0x1cd2,
+ 0x1cd4, 0x1ce0,
+ 0x1ce2, 0x1ce8,
+ 0x1ced, 0x1ced,
+ 0x1d2c, 0x1d61,
+ 0x1d78, 0x1d78,
+ 0x1d9b, 0x1de6,
+ 0x1dfd, 0x1dff,
+ 0x1fbd, 0x1fbd,
+ 0x1fbf, 0x1fc1,
+ 0x1fcd, 0x1fcf,
+ 0x1fdd, 0x1fdf,
+ 0x1fed, 0x1fef,
+ 0x1ffd, 0x1ffe,
+ 0x200b, 0x200f,
+ 0x2018, 0x2019,
+ 0x2024, 0x2024,
+ 0x2027, 0x2027,
+ 0x202a, 0x202e,
+ 0x2060, 0x2064,
+ 0x206a, 0x206f,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x2090, 0x2094,
+ 0x20d0, 0x20f0,
+ 0x2c7d, 0x2c7d,
+ 0x2cef, 0x2cf1,
+ 0x2d6f, 0x2d6f,
+ 0x2de0, 0x2dff,
+ 0x2e2f, 0x2e2f,
+ 0x3005, 0x3005,
+ 0x302a, 0x302f,
+ 0x3031, 0x3035,
+ 0x303b, 0x303b,
+ 0x3099, 0x309e,
+ 0x30fc, 0x30fe,
+ 0xa015, 0xa015,
+ 0xa4f8, 0xa4fd,
+ 0xa60c, 0xa60c,
+ 0xa66f, 0xa672,
+ 0xa67c, 0xa67d,
+ 0xa67f, 0xa67f,
+ 0xa6f0, 0xa6f1,
+ 0xa700, 0xa721,
+ 0xa770, 0xa770,
+ 0xa788, 0xa78a,
+ 0xa802, 0xa802,
+ 0xa806, 0xa806,
+ 0xa80b, 0xa80b,
+ 0xa825, 0xa826,
+ 0xa8c4, 0xa8c4,
+ 0xa8e0, 0xa8f1,
+ 0xa926, 0xa92d,
+ 0xa947, 0xa951,
+ 0xa980, 0xa982,
+ 0xa9b3, 0xa9b3,
+ 0xa9b6, 0xa9b9,
+ 0xa9bc, 0xa9bc,
+ 0xa9cf, 0xa9cf,
+ 0xaa29, 0xaa2e,
+ 0xaa31, 0xaa32,
+ 0xaa35, 0xaa36,
+ 0xaa43, 0xaa43,
+ 0xaa4c, 0xaa4c,
+ 0xaa70, 0xaa70,
+ 0xaab0, 0xaab0,
+ 0xaab2, 0xaab4,
+ 0xaab7, 0xaab8,
+ 0xaabe, 0xaabf,
+ 0xaac1, 0xaac1,
+ 0xaadd, 0xaadd,
+ 0xabe5, 0xabe5,
+ 0xabe8, 0xabe8,
+ 0xabed, 0xabed,
+ 0xfb1e, 0xfb1e,
+ 0xfe00, 0xfe0f,
+ 0xfe13, 0xfe13,
+ 0xfe20, 0xfe26,
+ 0xfe52, 0xfe52,
+ 0xfe55, 0xfe55,
+ 0xfeff, 0xfeff,
+ 0xff07, 0xff07,
+ 0xff0e, 0xff0e,
+ 0xff1a, 0xff1a,
+ 0xff3e, 0xff3e,
+ 0xff40, 0xff40,
+ 0xff70, 0xff70,
+ 0xff9e, 0xff9f,
+ 0xffe3, 0xffe3,
+ 0xfff9, 0xfffb,
+ 0x101fd, 0x101fd,
+ 0x10a01, 0x10a03,
+ 0x10a05, 0x10a06,
+ 0x10a0c, 0x10a0f,
+ 0x10a38, 0x10a3a,
+ 0x10a3f, 0x10a3f,
+ 0x11080, 0x11081,
+ 0x110b3, 0x110b6,
+ 0x110b9, 0x110ba,
+ 0x110bd, 0x110bd,
+ 0x1d167, 0x1d169,
+ 0x1d173, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0x1d242, 0x1d244,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f,
+ 0xe0100, 0xe01ef,
+}; /* CR_Case_Ignorable */
+
+/* 'Changes_When_Lowercased': Derived Property */
+static const OnigCodePoint CR_Changes_When_Lowercased[] = {
+ 557,
+ 0x0041, 0x005a,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00de,
+ 0x0100, 0x0100,
+ 0x0102, 0x0102,
+ 0x0104, 0x0104,
+ 0x0106, 0x0106,
+ 0x0108, 0x0108,
+ 0x010a, 0x010a,
+ 0x010c, 0x010c,
+ 0x010e, 0x010e,
+ 0x0110, 0x0110,
+ 0x0112, 0x0112,
+ 0x0114, 0x0114,
+ 0x0116, 0x0116,
+ 0x0118, 0x0118,
+ 0x011a, 0x011a,
+ 0x011c, 0x011c,
+ 0x011e, 0x011e,
+ 0x0120, 0x0120,
+ 0x0122, 0x0122,
+ 0x0124, 0x0124,
+ 0x0126, 0x0126,
+ 0x0128, 0x0128,
+ 0x012a, 0x012a,
+ 0x012c, 0x012c,
+ 0x012e, 0x012e,
+ 0x0130, 0x0130,
+ 0x0132, 0x0132,
+ 0x0134, 0x0134,
+ 0x0136, 0x0136,
+ 0x0139, 0x0139,
+ 0x013b, 0x013b,
+ 0x013d, 0x013d,
+ 0x013f, 0x013f,
+ 0x0141, 0x0141,
+ 0x0143, 0x0143,
+ 0x0145, 0x0145,
+ 0x0147, 0x0147,
+ 0x014a, 0x014a,
+ 0x014c, 0x014c,
+ 0x014e, 0x014e,
+ 0x0150, 0x0150,
+ 0x0152, 0x0152,
+ 0x0154, 0x0154,
+ 0x0156, 0x0156,
+ 0x0158, 0x0158,
+ 0x015a, 0x015a,
+ 0x015c, 0x015c,
+ 0x015e, 0x015e,
+ 0x0160, 0x0160,
+ 0x0162, 0x0162,
+ 0x0164, 0x0164,
+ 0x0166, 0x0166,
+ 0x0168, 0x0168,
+ 0x016a, 0x016a,
+ 0x016c, 0x016c,
+ 0x016e, 0x016e,
+ 0x0170, 0x0170,
+ 0x0172, 0x0172,
+ 0x0174, 0x0174,
+ 0x0176, 0x0176,
+ 0x0178, 0x0179,
+ 0x017b, 0x017b,
+ 0x017d, 0x017d,
+ 0x0181, 0x0182,
+ 0x0184, 0x0184,
+ 0x0186, 0x0187,
+ 0x0189, 0x018b,
+ 0x018e, 0x0191,
+ 0x0193, 0x0194,
+ 0x0196, 0x0198,
+ 0x019c, 0x019d,
+ 0x019f, 0x01a0,
+ 0x01a2, 0x01a2,
+ 0x01a4, 0x01a4,
+ 0x01a6, 0x01a7,
+ 0x01a9, 0x01a9,
+ 0x01ac, 0x01ac,
+ 0x01ae, 0x01af,
+ 0x01b1, 0x01b3,
+ 0x01b5, 0x01b5,
+ 0x01b7, 0x01b8,
+ 0x01bc, 0x01bc,
+ 0x01c4, 0x01c5,
+ 0x01c7, 0x01c8,
+ 0x01ca, 0x01cb,
+ 0x01cd, 0x01cd,
+ 0x01cf, 0x01cf,
+ 0x01d1, 0x01d1,
+ 0x01d3, 0x01d3,
+ 0x01d5, 0x01d5,
+ 0x01d7, 0x01d7,
+ 0x01d9, 0x01d9,
+ 0x01db, 0x01db,
+ 0x01de, 0x01de,
+ 0x01e0, 0x01e0,
+ 0x01e2, 0x01e2,
+ 0x01e4, 0x01e4,
+ 0x01e6, 0x01e6,
+ 0x01e8, 0x01e8,
+ 0x01ea, 0x01ea,
+ 0x01ec, 0x01ec,
+ 0x01ee, 0x01ee,
+ 0x01f1, 0x01f2,
+ 0x01f4, 0x01f4,
+ 0x01f6, 0x01f8,
+ 0x01fa, 0x01fa,
+ 0x01fc, 0x01fc,
+ 0x01fe, 0x01fe,
+ 0x0200, 0x0200,
+ 0x0202, 0x0202,
+ 0x0204, 0x0204,
+ 0x0206, 0x0206,
+ 0x0208, 0x0208,
+ 0x020a, 0x020a,
+ 0x020c, 0x020c,
+ 0x020e, 0x020e,
+ 0x0210, 0x0210,
+ 0x0212, 0x0212,
+ 0x0214, 0x0214,
+ 0x0216, 0x0216,
+ 0x0218, 0x0218,
+ 0x021a, 0x021a,
+ 0x021c, 0x021c,
+ 0x021e, 0x021e,
+ 0x0220, 0x0220,
+ 0x0222, 0x0222,
+ 0x0224, 0x0224,
+ 0x0226, 0x0226,
+ 0x0228, 0x0228,
+ 0x022a, 0x022a,
+ 0x022c, 0x022c,
+ 0x022e, 0x022e,
+ 0x0230, 0x0230,
+ 0x0232, 0x0232,
+ 0x023a, 0x023b,
+ 0x023d, 0x023e,
+ 0x0241, 0x0241,
+ 0x0243, 0x0246,
+ 0x0248, 0x0248,
+ 0x024a, 0x024a,
+ 0x024c, 0x024c,
+ 0x024e, 0x024e,
+ 0x0370, 0x0370,
+ 0x0372, 0x0372,
+ 0x0376, 0x0376,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x038f,
+ 0x0391, 0x03a1,
+ 0x03a3, 0x03ab,
+ 0x03cf, 0x03cf,
+ 0x03d8, 0x03d8,
+ 0x03da, 0x03da,
+ 0x03dc, 0x03dc,
+ 0x03de, 0x03de,
+ 0x03e0, 0x03e0,
+ 0x03e2, 0x03e2,
+ 0x03e4, 0x03e4,
+ 0x03e6, 0x03e6,
+ 0x03e8, 0x03e8,
+ 0x03ea, 0x03ea,
+ 0x03ec, 0x03ec,
+ 0x03ee, 0x03ee,
+ 0x03f4, 0x03f4,
+ 0x03f7, 0x03f7,
+ 0x03f9, 0x03fa,
+ 0x03fd, 0x042f,
+ 0x0460, 0x0460,
+ 0x0462, 0x0462,
+ 0x0464, 0x0464,
+ 0x0466, 0x0466,
+ 0x0468, 0x0468,
+ 0x046a, 0x046a,
+ 0x046c, 0x046c,
+ 0x046e, 0x046e,
+ 0x0470, 0x0470,
+ 0x0472, 0x0472,
+ 0x0474, 0x0474,
+ 0x0476, 0x0476,
+ 0x0478, 0x0478,
+ 0x047a, 0x047a,
+ 0x047c, 0x047c,
+ 0x047e, 0x047e,
+ 0x0480, 0x0480,
+ 0x048a, 0x048a,
+ 0x048c, 0x048c,
+ 0x048e, 0x048e,
+ 0x0490, 0x0490,
+ 0x0492, 0x0492,
+ 0x0494, 0x0494,
+ 0x0496, 0x0496,
+ 0x0498, 0x0498,
+ 0x049a, 0x049a,
+ 0x049c, 0x049c,
+ 0x049e, 0x049e,
+ 0x04a0, 0x04a0,
+ 0x04a2, 0x04a2,
+ 0x04a4, 0x04a4,
+ 0x04a6, 0x04a6,
+ 0x04a8, 0x04a8,
+ 0x04aa, 0x04aa,
+ 0x04ac, 0x04ac,
+ 0x04ae, 0x04ae,
+ 0x04b0, 0x04b0,
+ 0x04b2, 0x04b2,
+ 0x04b4, 0x04b4,
+ 0x04b6, 0x04b6,
+ 0x04b8, 0x04b8,
+ 0x04ba, 0x04ba,
+ 0x04bc, 0x04bc,
+ 0x04be, 0x04be,
+ 0x04c0, 0x04c1,
+ 0x04c3, 0x04c3,
+ 0x04c5, 0x04c5,
+ 0x04c7, 0x04c7,
+ 0x04c9, 0x04c9,
+ 0x04cb, 0x04cb,
+ 0x04cd, 0x04cd,
+ 0x04d0, 0x04d0,
+ 0x04d2, 0x04d2,
+ 0x04d4, 0x04d4,
+ 0x04d6, 0x04d6,
+ 0x04d8, 0x04d8,
+ 0x04da, 0x04da,
+ 0x04dc, 0x04dc,
+ 0x04de, 0x04de,
+ 0x04e0, 0x04e0,
+ 0x04e2, 0x04e2,
+ 0x04e4, 0x04e4,
+ 0x04e6, 0x04e6,
+ 0x04e8, 0x04e8,
+ 0x04ea, 0x04ea,
+ 0x04ec, 0x04ec,
+ 0x04ee, 0x04ee,
+ 0x04f0, 0x04f0,
+ 0x04f2, 0x04f2,
+ 0x04f4, 0x04f4,
+ 0x04f6, 0x04f6,
+ 0x04f8, 0x04f8,
+ 0x04fa, 0x04fa,
+ 0x04fc, 0x04fc,
+ 0x04fe, 0x04fe,
+ 0x0500, 0x0500,
+ 0x0502, 0x0502,
+ 0x0504, 0x0504,
+ 0x0506, 0x0506,
+ 0x0508, 0x0508,
+ 0x050a, 0x050a,
+ 0x050c, 0x050c,
+ 0x050e, 0x050e,
+ 0x0510, 0x0510,
+ 0x0512, 0x0512,
+ 0x0514, 0x0514,
+ 0x0516, 0x0516,
+ 0x0518, 0x0518,
+ 0x051a, 0x051a,
+ 0x051c, 0x051c,
+ 0x051e, 0x051e,
+ 0x0520, 0x0520,
+ 0x0522, 0x0522,
+ 0x0524, 0x0524,
+ 0x0531, 0x0556,
+ 0x10a0, 0x10c5,
+ 0x1e00, 0x1e00,
+ 0x1e02, 0x1e02,
+ 0x1e04, 0x1e04,
+ 0x1e06, 0x1e06,
+ 0x1e08, 0x1e08,
+ 0x1e0a, 0x1e0a,
+ 0x1e0c, 0x1e0c,
+ 0x1e0e, 0x1e0e,
+ 0x1e10, 0x1e10,
+ 0x1e12, 0x1e12,
+ 0x1e14, 0x1e14,
+ 0x1e16, 0x1e16,
+ 0x1e18, 0x1e18,
+ 0x1e1a, 0x1e1a,
+ 0x1e1c, 0x1e1c,
+ 0x1e1e, 0x1e1e,
+ 0x1e20, 0x1e20,
+ 0x1e22, 0x1e22,
+ 0x1e24, 0x1e24,
+ 0x1e26, 0x1e26,
+ 0x1e28, 0x1e28,
+ 0x1e2a, 0x1e2a,
+ 0x1e2c, 0x1e2c,
+ 0x1e2e, 0x1e2e,
+ 0x1e30, 0x1e30,
+ 0x1e32, 0x1e32,
+ 0x1e34, 0x1e34,
+ 0x1e36, 0x1e36,
+ 0x1e38, 0x1e38,
+ 0x1e3a, 0x1e3a,
+ 0x1e3c, 0x1e3c,
+ 0x1e3e, 0x1e3e,
+ 0x1e40, 0x1e40,
+ 0x1e42, 0x1e42,
+ 0x1e44, 0x1e44,
+ 0x1e46, 0x1e46,
+ 0x1e48, 0x1e48,
+ 0x1e4a, 0x1e4a,
+ 0x1e4c, 0x1e4c,
+ 0x1e4e, 0x1e4e,
+ 0x1e50, 0x1e50,
+ 0x1e52, 0x1e52,
+ 0x1e54, 0x1e54,
+ 0x1e56, 0x1e56,
+ 0x1e58, 0x1e58,
+ 0x1e5a, 0x1e5a,
+ 0x1e5c, 0x1e5c,
+ 0x1e5e, 0x1e5e,
+ 0x1e60, 0x1e60,
+ 0x1e62, 0x1e62,
+ 0x1e64, 0x1e64,
+ 0x1e66, 0x1e66,
+ 0x1e68, 0x1e68,
+ 0x1e6a, 0x1e6a,
+ 0x1e6c, 0x1e6c,
+ 0x1e6e, 0x1e6e,
+ 0x1e70, 0x1e70,
+ 0x1e72, 0x1e72,
+ 0x1e74, 0x1e74,
+ 0x1e76, 0x1e76,
+ 0x1e78, 0x1e78,
+ 0x1e7a, 0x1e7a,
+ 0x1e7c, 0x1e7c,
+ 0x1e7e, 0x1e7e,
+ 0x1e80, 0x1e80,
+ 0x1e82, 0x1e82,
+ 0x1e84, 0x1e84,
+ 0x1e86, 0x1e86,
+ 0x1e88, 0x1e88,
+ 0x1e8a, 0x1e8a,
+ 0x1e8c, 0x1e8c,
+ 0x1e8e, 0x1e8e,
+ 0x1e90, 0x1e90,
+ 0x1e92, 0x1e92,
+ 0x1e94, 0x1e94,
+ 0x1e9e, 0x1e9e,
+ 0x1ea0, 0x1ea0,
+ 0x1ea2, 0x1ea2,
+ 0x1ea4, 0x1ea4,
+ 0x1ea6, 0x1ea6,
+ 0x1ea8, 0x1ea8,
+ 0x1eaa, 0x1eaa,
+ 0x1eac, 0x1eac,
+ 0x1eae, 0x1eae,
+ 0x1eb0, 0x1eb0,
+ 0x1eb2, 0x1eb2,
+ 0x1eb4, 0x1eb4,
+ 0x1eb6, 0x1eb6,
+ 0x1eb8, 0x1eb8,
+ 0x1eba, 0x1eba,
+ 0x1ebc, 0x1ebc,
+ 0x1ebe, 0x1ebe,
+ 0x1ec0, 0x1ec0,
+ 0x1ec2, 0x1ec2,
+ 0x1ec4, 0x1ec4,
+ 0x1ec6, 0x1ec6,
+ 0x1ec8, 0x1ec8,
+ 0x1eca, 0x1eca,
+ 0x1ecc, 0x1ecc,
+ 0x1ece, 0x1ece,
+ 0x1ed0, 0x1ed0,
+ 0x1ed2, 0x1ed2,
+ 0x1ed4, 0x1ed4,
+ 0x1ed6, 0x1ed6,
+ 0x1ed8, 0x1ed8,
+ 0x1eda, 0x1eda,
+ 0x1edc, 0x1edc,
+ 0x1ede, 0x1ede,
+ 0x1ee0, 0x1ee0,
+ 0x1ee2, 0x1ee2,
+ 0x1ee4, 0x1ee4,
+ 0x1ee6, 0x1ee6,
+ 0x1ee8, 0x1ee8,
+ 0x1eea, 0x1eea,
+ 0x1eec, 0x1eec,
+ 0x1eee, 0x1eee,
+ 0x1ef0, 0x1ef0,
+ 0x1ef2, 0x1ef2,
+ 0x1ef4, 0x1ef4,
+ 0x1ef6, 0x1ef6,
+ 0x1ef8, 0x1ef8,
+ 0x1efa, 0x1efa,
+ 0x1efc, 0x1efc,
+ 0x1efe, 0x1efe,
+ 0x1f08, 0x1f0f,
+ 0x1f18, 0x1f1d,
+ 0x1f28, 0x1f2f,
+ 0x1f38, 0x1f3f,
+ 0x1f48, 0x1f4d,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f5f,
+ 0x1f68, 0x1f6f,
+ 0x1f88, 0x1f8f,
+ 0x1f98, 0x1f9f,
+ 0x1fa8, 0x1faf,
+ 0x1fb8, 0x1fbc,
+ 0x1fc8, 0x1fcc,
+ 0x1fd8, 0x1fdb,
+ 0x1fe8, 0x1fec,
+ 0x1ff8, 0x1ffc,
+ 0x2126, 0x2126,
+ 0x212a, 0x212b,
+ 0x2132, 0x2132,
+ 0x2160, 0x216f,
+ 0x2183, 0x2183,
+ 0x24b6, 0x24cf,
+ 0x2c00, 0x2c2e,
+ 0x2c60, 0x2c60,
+ 0x2c62, 0x2c64,
+ 0x2c67, 0x2c67,
+ 0x2c69, 0x2c69,
+ 0x2c6b, 0x2c6b,
+ 0x2c6d, 0x2c70,
+ 0x2c72, 0x2c72,
+ 0x2c75, 0x2c75,
+ 0x2c7e, 0x2c80,
+ 0x2c82, 0x2c82,
+ 0x2c84, 0x2c84,
+ 0x2c86, 0x2c86,
+ 0x2c88, 0x2c88,
+ 0x2c8a, 0x2c8a,
+ 0x2c8c, 0x2c8c,
+ 0x2c8e, 0x2c8e,
+ 0x2c90, 0x2c90,
+ 0x2c92, 0x2c92,
+ 0x2c94, 0x2c94,
+ 0x2c96, 0x2c96,
+ 0x2c98, 0x2c98,
+ 0x2c9a, 0x2c9a,
+ 0x2c9c, 0x2c9c,
+ 0x2c9e, 0x2c9e,
+ 0x2ca0, 0x2ca0,
+ 0x2ca2, 0x2ca2,
+ 0x2ca4, 0x2ca4,
+ 0x2ca6, 0x2ca6,
+ 0x2ca8, 0x2ca8,
+ 0x2caa, 0x2caa,
+ 0x2cac, 0x2cac,
+ 0x2cae, 0x2cae,
+ 0x2cb0, 0x2cb0,
+ 0x2cb2, 0x2cb2,
+ 0x2cb4, 0x2cb4,
+ 0x2cb6, 0x2cb6,
+ 0x2cb8, 0x2cb8,
+ 0x2cba, 0x2cba,
+ 0x2cbc, 0x2cbc,
+ 0x2cbe, 0x2cbe,
+ 0x2cc0, 0x2cc0,
+ 0x2cc2, 0x2cc2,
+ 0x2cc4, 0x2cc4,
+ 0x2cc6, 0x2cc6,
+ 0x2cc8, 0x2cc8,
+ 0x2cca, 0x2cca,
+ 0x2ccc, 0x2ccc,
+ 0x2cce, 0x2cce,
+ 0x2cd0, 0x2cd0,
+ 0x2cd2, 0x2cd2,
+ 0x2cd4, 0x2cd4,
+ 0x2cd6, 0x2cd6,
+ 0x2cd8, 0x2cd8,
+ 0x2cda, 0x2cda,
+ 0x2cdc, 0x2cdc,
+ 0x2cde, 0x2cde,
+ 0x2ce0, 0x2ce0,
+ 0x2ce2, 0x2ce2,
+ 0x2ceb, 0x2ceb,
+ 0x2ced, 0x2ced,
+ 0xa640, 0xa640,
+ 0xa642, 0xa642,
+ 0xa644, 0xa644,
+ 0xa646, 0xa646,
+ 0xa648, 0xa648,
+ 0xa64a, 0xa64a,
+ 0xa64c, 0xa64c,
+ 0xa64e, 0xa64e,
+ 0xa650, 0xa650,
+ 0xa652, 0xa652,
+ 0xa654, 0xa654,
+ 0xa656, 0xa656,
+ 0xa658, 0xa658,
+ 0xa65a, 0xa65a,
+ 0xa65c, 0xa65c,
+ 0xa65e, 0xa65e,
+ 0xa662, 0xa662,
+ 0xa664, 0xa664,
+ 0xa666, 0xa666,
+ 0xa668, 0xa668,
+ 0xa66a, 0xa66a,
+ 0xa66c, 0xa66c,
+ 0xa680, 0xa680,
+ 0xa682, 0xa682,
+ 0xa684, 0xa684,
+ 0xa686, 0xa686,
+ 0xa688, 0xa688,
+ 0xa68a, 0xa68a,
+ 0xa68c, 0xa68c,
+ 0xa68e, 0xa68e,
+ 0xa690, 0xa690,
+ 0xa692, 0xa692,
+ 0xa694, 0xa694,
+ 0xa696, 0xa696,
+ 0xa722, 0xa722,
+ 0xa724, 0xa724,
+ 0xa726, 0xa726,
+ 0xa728, 0xa728,
+ 0xa72a, 0xa72a,
+ 0xa72c, 0xa72c,
+ 0xa72e, 0xa72e,
+ 0xa732, 0xa732,
+ 0xa734, 0xa734,
+ 0xa736, 0xa736,
+ 0xa738, 0xa738,
+ 0xa73a, 0xa73a,
+ 0xa73c, 0xa73c,
+ 0xa73e, 0xa73e,
+ 0xa740, 0xa740,
+ 0xa742, 0xa742,
+ 0xa744, 0xa744,
+ 0xa746, 0xa746,
+ 0xa748, 0xa748,
+ 0xa74a, 0xa74a,
+ 0xa74c, 0xa74c,
+ 0xa74e, 0xa74e,
+ 0xa750, 0xa750,
+ 0xa752, 0xa752,
+ 0xa754, 0xa754,
+ 0xa756, 0xa756,
+ 0xa758, 0xa758,
+ 0xa75a, 0xa75a,
+ 0xa75c, 0xa75c,
+ 0xa75e, 0xa75e,
+ 0xa760, 0xa760,
+ 0xa762, 0xa762,
+ 0xa764, 0xa764,
+ 0xa766, 0xa766,
+ 0xa768, 0xa768,
+ 0xa76a, 0xa76a,
+ 0xa76c, 0xa76c,
+ 0xa76e, 0xa76e,
+ 0xa779, 0xa779,
+ 0xa77b, 0xa77b,
+ 0xa77d, 0xa77e,
+ 0xa780, 0xa780,
+ 0xa782, 0xa782,
+ 0xa784, 0xa784,
+ 0xa786, 0xa786,
+ 0xa78b, 0xa78b,
+ 0xff21, 0xff3a,
+ 0x10400, 0x10427,
+}; /* CR_Changes_When_Lowercased */
+
+/* 'Changes_When_Uppercased': Derived Property */
+static const OnigCodePoint CR_Changes_When_Uppercased[] = {
+ 573,
+ 0x0061, 0x007a,
+ 0x00b5, 0x00b5,
+ 0x00df, 0x00f6,
+ 0x00f8, 0x00ff,
+ 0x0101, 0x0101,
+ 0x0103, 0x0103,
+ 0x0105, 0x0105,
+ 0x0107, 0x0107,
+ 0x0109, 0x0109,
+ 0x010b, 0x010b,
+ 0x010d, 0x010d,
+ 0x010f, 0x010f,
+ 0x0111, 0x0111,
+ 0x0113, 0x0113,
+ 0x0115, 0x0115,
+ 0x0117, 0x0117,
+ 0x0119, 0x0119,
+ 0x011b, 0x011b,
+ 0x011d, 0x011d,
+ 0x011f, 0x011f,
+ 0x0121, 0x0121,
+ 0x0123, 0x0123,
+ 0x0125, 0x0125,
+ 0x0127, 0x0127,
+ 0x0129, 0x0129,
+ 0x012b, 0x012b,
+ 0x012d, 0x012d,
+ 0x012f, 0x012f,
+ 0x0131, 0x0131,
+ 0x0133, 0x0133,
+ 0x0135, 0x0135,
+ 0x0137, 0x0137,
+ 0x013a, 0x013a,
+ 0x013c, 0x013c,
+ 0x013e, 0x013e,
+ 0x0140, 0x0140,
+ 0x0142, 0x0142,
+ 0x0144, 0x0144,
+ 0x0146, 0x0146,
+ 0x0148, 0x0149,
+ 0x014b, 0x014b,
+ 0x014d, 0x014d,
+ 0x014f, 0x014f,
+ 0x0151, 0x0151,
+ 0x0153, 0x0153,
+ 0x0155, 0x0155,
+ 0x0157, 0x0157,
+ 0x0159, 0x0159,
+ 0x015b, 0x015b,
+ 0x015d, 0x015d,
+ 0x015f, 0x015f,
+ 0x0161, 0x0161,
+ 0x0163, 0x0163,
+ 0x0165, 0x0165,
+ 0x0167, 0x0167,
+ 0x0169, 0x0169,
+ 0x016b, 0x016b,
+ 0x016d, 0x016d,
+ 0x016f, 0x016f,
+ 0x0171, 0x0171,
+ 0x0173, 0x0173,
+ 0x0175, 0x0175,
+ 0x0177, 0x0177,
+ 0x017a, 0x017a,
+ 0x017c, 0x017c,
+ 0x017e, 0x0180,
+ 0x0183, 0x0183,
+ 0x0185, 0x0185,
+ 0x0188, 0x0188,
+ 0x018c, 0x018c,
+ 0x0192, 0x0192,
+ 0x0195, 0x0195,
+ 0x0199, 0x019a,
+ 0x019e, 0x019e,
+ 0x01a1, 0x01a1,
+ 0x01a3, 0x01a3,
+ 0x01a5, 0x01a5,
+ 0x01a8, 0x01a8,
+ 0x01ad, 0x01ad,
+ 0x01b0, 0x01b0,
+ 0x01b4, 0x01b4,
+ 0x01b6, 0x01b6,
+ 0x01b9, 0x01b9,
+ 0x01bd, 0x01bd,
+ 0x01bf, 0x01bf,
+ 0x01c5, 0x01c6,
+ 0x01c8, 0x01c9,
+ 0x01cb, 0x01cc,
+ 0x01ce, 0x01ce,
+ 0x01d0, 0x01d0,
+ 0x01d2, 0x01d2,
+ 0x01d4, 0x01d4,
+ 0x01d6, 0x01d6,
+ 0x01d8, 0x01d8,
+ 0x01da, 0x01da,
+ 0x01dc, 0x01dd,
+ 0x01df, 0x01df,
+ 0x01e1, 0x01e1,
+ 0x01e3, 0x01e3,
+ 0x01e5, 0x01e5,
+ 0x01e7, 0x01e7,
+ 0x01e9, 0x01e9,
+ 0x01eb, 0x01eb,
+ 0x01ed, 0x01ed,
+ 0x01ef, 0x01f0,
+ 0x01f2, 0x01f3,
+ 0x01f5, 0x01f5,
+ 0x01f9, 0x01f9,
+ 0x01fb, 0x01fb,
+ 0x01fd, 0x01fd,
+ 0x01ff, 0x01ff,
+ 0x0201, 0x0201,
+ 0x0203, 0x0203,
+ 0x0205, 0x0205,
+ 0x0207, 0x0207,
+ 0x0209, 0x0209,
+ 0x020b, 0x020b,
+ 0x020d, 0x020d,
+ 0x020f, 0x020f,
+ 0x0211, 0x0211,
+ 0x0213, 0x0213,
+ 0x0215, 0x0215,
+ 0x0217, 0x0217,
+ 0x0219, 0x0219,
+ 0x021b, 0x021b,
+ 0x021d, 0x021d,
+ 0x021f, 0x021f,
+ 0x0223, 0x0223,
+ 0x0225, 0x0225,
+ 0x0227, 0x0227,
+ 0x0229, 0x0229,
+ 0x022b, 0x022b,
+ 0x022d, 0x022d,
+ 0x022f, 0x022f,
+ 0x0231, 0x0231,
+ 0x0233, 0x0233,
+ 0x023c, 0x023c,
+ 0x023f, 0x0240,
+ 0x0242, 0x0242,
+ 0x0247, 0x0247,
+ 0x0249, 0x0249,
+ 0x024b, 0x024b,
+ 0x024d, 0x024d,
+ 0x024f, 0x0254,
+ 0x0256, 0x0257,
+ 0x0259, 0x0259,
+ 0x025b, 0x025b,
+ 0x0260, 0x0260,
+ 0x0263, 0x0263,
+ 0x0268, 0x0269,
+ 0x026b, 0x026b,
+ 0x026f, 0x026f,
+ 0x0271, 0x0272,
+ 0x0275, 0x0275,
+ 0x027d, 0x027d,
+ 0x0280, 0x0280,
+ 0x0283, 0x0283,
+ 0x0288, 0x028c,
+ 0x0292, 0x0292,
+ 0x0345, 0x0345,
+ 0x0371, 0x0371,
+ 0x0373, 0x0373,
+ 0x0377, 0x0377,
+ 0x037b, 0x037d,
+ 0x0390, 0x0390,
+ 0x03ac, 0x03ce,
+ 0x03d0, 0x03d1,
+ 0x03d5, 0x03d7,
+ 0x03d9, 0x03d9,
+ 0x03db, 0x03db,
+ 0x03dd, 0x03dd,
+ 0x03df, 0x03df,
+ 0x03e1, 0x03e1,
+ 0x03e3, 0x03e3,
+ 0x03e5, 0x03e5,
+ 0x03e7, 0x03e7,
+ 0x03e9, 0x03e9,
+ 0x03eb, 0x03eb,
+ 0x03ed, 0x03ed,
+ 0x03ef, 0x03f2,
+ 0x03f5, 0x03f5,
+ 0x03f8, 0x03f8,
+ 0x03fb, 0x03fb,
+ 0x0430, 0x045f,
+ 0x0461, 0x0461,
+ 0x0463, 0x0463,
+ 0x0465, 0x0465,
+ 0x0467, 0x0467,
+ 0x0469, 0x0469,
+ 0x046b, 0x046b,
+ 0x046d, 0x046d,
+ 0x046f, 0x046f,
+ 0x0471, 0x0471,
+ 0x0473, 0x0473,
+ 0x0475, 0x0475,
+ 0x0477, 0x0477,
+ 0x0479, 0x0479,
+ 0x047b, 0x047b,
+ 0x047d, 0x047d,
+ 0x047f, 0x047f,
+ 0x0481, 0x0481,
+ 0x048b, 0x048b,
+ 0x048d, 0x048d,
+ 0x048f, 0x048f,
+ 0x0491, 0x0491,
+ 0x0493, 0x0493,
+ 0x0495, 0x0495,
+ 0x0497, 0x0497,
+ 0x0499, 0x0499,
+ 0x049b, 0x049b,
+ 0x049d, 0x049d,
+ 0x049f, 0x049f,
+ 0x04a1, 0x04a1,
+ 0x04a3, 0x04a3,
+ 0x04a5, 0x04a5,
+ 0x04a7, 0x04a7,
+ 0x04a9, 0x04a9,
+ 0x04ab, 0x04ab,
+ 0x04ad, 0x04ad,
+ 0x04af, 0x04af,
+ 0x04b1, 0x04b1,
+ 0x04b3, 0x04b3,
+ 0x04b5, 0x04b5,
+ 0x04b7, 0x04b7,
+ 0x04b9, 0x04b9,
+ 0x04bb, 0x04bb,
+ 0x04bd, 0x04bd,
+ 0x04bf, 0x04bf,
+ 0x04c2, 0x04c2,
+ 0x04c4, 0x04c4,
+ 0x04c6, 0x04c6,
+ 0x04c8, 0x04c8,
+ 0x04ca, 0x04ca,
+ 0x04cc, 0x04cc,
+ 0x04ce, 0x04cf,
+ 0x04d1, 0x04d1,
+ 0x04d3, 0x04d3,
+ 0x04d5, 0x04d5,
+ 0x04d7, 0x04d7,
+ 0x04d9, 0x04d9,
+ 0x04db, 0x04db,
+ 0x04dd, 0x04dd,
+ 0x04df, 0x04df,
+ 0x04e1, 0x04e1,
+ 0x04e3, 0x04e3,
+ 0x04e5, 0x04e5,
+ 0x04e7, 0x04e7,
+ 0x04e9, 0x04e9,
+ 0x04eb, 0x04eb,
+ 0x04ed, 0x04ed,
+ 0x04ef, 0x04ef,
+ 0x04f1, 0x04f1,
+ 0x04f3, 0x04f3,
+ 0x04f5, 0x04f5,
+ 0x04f7, 0x04f7,
+ 0x04f9, 0x04f9,
+ 0x04fb, 0x04fb,
+ 0x04fd, 0x04fd,
+ 0x04ff, 0x04ff,
+ 0x0501, 0x0501,
+ 0x0503, 0x0503,
+ 0x0505, 0x0505,
+ 0x0507, 0x0507,
+ 0x0509, 0x0509,
+ 0x050b, 0x050b,
+ 0x050d, 0x050d,
+ 0x050f, 0x050f,
+ 0x0511, 0x0511,
+ 0x0513, 0x0513,
+ 0x0515, 0x0515,
+ 0x0517, 0x0517,
+ 0x0519, 0x0519,
+ 0x051b, 0x051b,
+ 0x051d, 0x051d,
+ 0x051f, 0x051f,
+ 0x0521, 0x0521,
+ 0x0523, 0x0523,
+ 0x0525, 0x0525,
+ 0x0561, 0x0587,
+ 0x1d79, 0x1d79,
+ 0x1d7d, 0x1d7d,
+ 0x1e01, 0x1e01,
+ 0x1e03, 0x1e03,
+ 0x1e05, 0x1e05,
+ 0x1e07, 0x1e07,
+ 0x1e09, 0x1e09,
+ 0x1e0b, 0x1e0b,
+ 0x1e0d, 0x1e0d,
+ 0x1e0f, 0x1e0f,
+ 0x1e11, 0x1e11,
+ 0x1e13, 0x1e13,
+ 0x1e15, 0x1e15,
+ 0x1e17, 0x1e17,
+ 0x1e19, 0x1e19,
+ 0x1e1b, 0x1e1b,
+ 0x1e1d, 0x1e1d,
+ 0x1e1f, 0x1e1f,
+ 0x1e21, 0x1e21,
+ 0x1e23, 0x1e23,
+ 0x1e25, 0x1e25,
+ 0x1e27, 0x1e27,
+ 0x1e29, 0x1e29,
+ 0x1e2b, 0x1e2b,
+ 0x1e2d, 0x1e2d,
+ 0x1e2f, 0x1e2f,
+ 0x1e31, 0x1e31,
+ 0x1e33, 0x1e33,
+ 0x1e35, 0x1e35,
+ 0x1e37, 0x1e37,
+ 0x1e39, 0x1e39,
+ 0x1e3b, 0x1e3b,
+ 0x1e3d, 0x1e3d,
+ 0x1e3f, 0x1e3f,
+ 0x1e41, 0x1e41,
+ 0x1e43, 0x1e43,
+ 0x1e45, 0x1e45,
+ 0x1e47, 0x1e47,
+ 0x1e49, 0x1e49,
+ 0x1e4b, 0x1e4b,
+ 0x1e4d, 0x1e4d,
+ 0x1e4f, 0x1e4f,
+ 0x1e51, 0x1e51,
+ 0x1e53, 0x1e53,
+ 0x1e55, 0x1e55,
+ 0x1e57, 0x1e57,
+ 0x1e59, 0x1e59,
+ 0x1e5b, 0x1e5b,
+ 0x1e5d, 0x1e5d,
+ 0x1e5f, 0x1e5f,
+ 0x1e61, 0x1e61,
+ 0x1e63, 0x1e63,
+ 0x1e65, 0x1e65,
+ 0x1e67, 0x1e67,
+ 0x1e69, 0x1e69,
+ 0x1e6b, 0x1e6b,
+ 0x1e6d, 0x1e6d,
+ 0x1e6f, 0x1e6f,
+ 0x1e71, 0x1e71,
+ 0x1e73, 0x1e73,
+ 0x1e75, 0x1e75,
+ 0x1e77, 0x1e77,
+ 0x1e79, 0x1e79,
+ 0x1e7b, 0x1e7b,
+ 0x1e7d, 0x1e7d,
+ 0x1e7f, 0x1e7f,
+ 0x1e81, 0x1e81,
+ 0x1e83, 0x1e83,
+ 0x1e85, 0x1e85,
+ 0x1e87, 0x1e87,
+ 0x1e89, 0x1e89,
+ 0x1e8b, 0x1e8b,
+ 0x1e8d, 0x1e8d,
+ 0x1e8f, 0x1e8f,
+ 0x1e91, 0x1e91,
+ 0x1e93, 0x1e93,
+ 0x1e95, 0x1e9b,
+ 0x1ea1, 0x1ea1,
+ 0x1ea3, 0x1ea3,
+ 0x1ea5, 0x1ea5,
+ 0x1ea7, 0x1ea7,
+ 0x1ea9, 0x1ea9,
+ 0x1eab, 0x1eab,
+ 0x1ead, 0x1ead,
+ 0x1eaf, 0x1eaf,
+ 0x1eb1, 0x1eb1,
+ 0x1eb3, 0x1eb3,
+ 0x1eb5, 0x1eb5,
+ 0x1eb7, 0x1eb7,
+ 0x1eb9, 0x1eb9,
+ 0x1ebb, 0x1ebb,
+ 0x1ebd, 0x1ebd,
+ 0x1ebf, 0x1ebf,
+ 0x1ec1, 0x1ec1,
+ 0x1ec3, 0x1ec3,
+ 0x1ec5, 0x1ec5,
+ 0x1ec7, 0x1ec7,
+ 0x1ec9, 0x1ec9,
+ 0x1ecb, 0x1ecb,
+ 0x1ecd, 0x1ecd,
+ 0x1ecf, 0x1ecf,
+ 0x1ed1, 0x1ed1,
+ 0x1ed3, 0x1ed3,
+ 0x1ed5, 0x1ed5,
+ 0x1ed7, 0x1ed7,
+ 0x1ed9, 0x1ed9,
+ 0x1edb, 0x1edb,
+ 0x1edd, 0x1edd,
+ 0x1edf, 0x1edf,
+ 0x1ee1, 0x1ee1,
+ 0x1ee3, 0x1ee3,
+ 0x1ee5, 0x1ee5,
+ 0x1ee7, 0x1ee7,
+ 0x1ee9, 0x1ee9,
+ 0x1eeb, 0x1eeb,
+ 0x1eed, 0x1eed,
+ 0x1eef, 0x1eef,
+ 0x1ef1, 0x1ef1,
+ 0x1ef3, 0x1ef3,
+ 0x1ef5, 0x1ef5,
+ 0x1ef7, 0x1ef7,
+ 0x1ef9, 0x1ef9,
+ 0x1efb, 0x1efb,
+ 0x1efd, 0x1efd,
+ 0x1eff, 0x1f07,
+ 0x1f10, 0x1f15,
+ 0x1f20, 0x1f27,
+ 0x1f30, 0x1f37,
+ 0x1f40, 0x1f45,
+ 0x1f50, 0x1f57,
+ 0x1f60, 0x1f67,
+ 0x1f70, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fb7,
+ 0x1fbc, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fc7,
+ 0x1fcc, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fd7,
+ 0x1fe0, 0x1fe7,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ff7,
+ 0x1ffc, 0x1ffc,
+ 0x214e, 0x214e,
+ 0x2170, 0x217f,
+ 0x2184, 0x2184,
+ 0x24d0, 0x24e9,
+ 0x2c30, 0x2c5e,
+ 0x2c61, 0x2c61,
+ 0x2c65, 0x2c66,
+ 0x2c68, 0x2c68,
+ 0x2c6a, 0x2c6a,
+ 0x2c6c, 0x2c6c,
+ 0x2c73, 0x2c73,
+ 0x2c76, 0x2c76,
+ 0x2c81, 0x2c81,
+ 0x2c83, 0x2c83,
+ 0x2c85, 0x2c85,
+ 0x2c87, 0x2c87,
+ 0x2c89, 0x2c89,
+ 0x2c8b, 0x2c8b,
+ 0x2c8d, 0x2c8d,
+ 0x2c8f, 0x2c8f,
+ 0x2c91, 0x2c91,
+ 0x2c93, 0x2c93,
+ 0x2c95, 0x2c95,
+ 0x2c97, 0x2c97,
+ 0x2c99, 0x2c99,
+ 0x2c9b, 0x2c9b,
+ 0x2c9d, 0x2c9d,
+ 0x2c9f, 0x2c9f,
+ 0x2ca1, 0x2ca1,
+ 0x2ca3, 0x2ca3,
+ 0x2ca5, 0x2ca5,
+ 0x2ca7, 0x2ca7,
+ 0x2ca9, 0x2ca9,
+ 0x2cab, 0x2cab,
+ 0x2cad, 0x2cad,
+ 0x2caf, 0x2caf,
+ 0x2cb1, 0x2cb1,
+ 0x2cb3, 0x2cb3,
+ 0x2cb5, 0x2cb5,
+ 0x2cb7, 0x2cb7,
+ 0x2cb9, 0x2cb9,
+ 0x2cbb, 0x2cbb,
+ 0x2cbd, 0x2cbd,
+ 0x2cbf, 0x2cbf,
+ 0x2cc1, 0x2cc1,
+ 0x2cc3, 0x2cc3,
+ 0x2cc5, 0x2cc5,
+ 0x2cc7, 0x2cc7,
+ 0x2cc9, 0x2cc9,
+ 0x2ccb, 0x2ccb,
+ 0x2ccd, 0x2ccd,
+ 0x2ccf, 0x2ccf,
+ 0x2cd1, 0x2cd1,
+ 0x2cd3, 0x2cd3,
+ 0x2cd5, 0x2cd5,
+ 0x2cd7, 0x2cd7,
+ 0x2cd9, 0x2cd9,
+ 0x2cdb, 0x2cdb,
+ 0x2cdd, 0x2cdd,
+ 0x2cdf, 0x2cdf,
+ 0x2ce1, 0x2ce1,
+ 0x2ce3, 0x2ce3,
+ 0x2cec, 0x2cec,
+ 0x2cee, 0x2cee,
+ 0x2d00, 0x2d25,
+ 0xa641, 0xa641,
+ 0xa643, 0xa643,
+ 0xa645, 0xa645,
+ 0xa647, 0xa647,
+ 0xa649, 0xa649,
+ 0xa64b, 0xa64b,
+ 0xa64d, 0xa64d,
+ 0xa64f, 0xa64f,
+ 0xa651, 0xa651,
+ 0xa653, 0xa653,
+ 0xa655, 0xa655,
+ 0xa657, 0xa657,
+ 0xa659, 0xa659,
+ 0xa65b, 0xa65b,
+ 0xa65d, 0xa65d,
+ 0xa65f, 0xa65f,
+ 0xa663, 0xa663,
+ 0xa665, 0xa665,
+ 0xa667, 0xa667,
+ 0xa669, 0xa669,
+ 0xa66b, 0xa66b,
+ 0xa66d, 0xa66d,
+ 0xa681, 0xa681,
+ 0xa683, 0xa683,
+ 0xa685, 0xa685,
+ 0xa687, 0xa687,
+ 0xa689, 0xa689,
+ 0xa68b, 0xa68b,
+ 0xa68d, 0xa68d,
+ 0xa68f, 0xa68f,
+ 0xa691, 0xa691,
+ 0xa693, 0xa693,
+ 0xa695, 0xa695,
+ 0xa697, 0xa697,
+ 0xa723, 0xa723,
+ 0xa725, 0xa725,
+ 0xa727, 0xa727,
+ 0xa729, 0xa729,
+ 0xa72b, 0xa72b,
+ 0xa72d, 0xa72d,
+ 0xa72f, 0xa72f,
+ 0xa733, 0xa733,
+ 0xa735, 0xa735,
+ 0xa737, 0xa737,
+ 0xa739, 0xa739,
+ 0xa73b, 0xa73b,
+ 0xa73d, 0xa73d,
+ 0xa73f, 0xa73f,
+ 0xa741, 0xa741,
+ 0xa743, 0xa743,
+ 0xa745, 0xa745,
+ 0xa747, 0xa747,
+ 0xa749, 0xa749,
+ 0xa74b, 0xa74b,
+ 0xa74d, 0xa74d,
+ 0xa74f, 0xa74f,
+ 0xa751, 0xa751,
+ 0xa753, 0xa753,
+ 0xa755, 0xa755,
+ 0xa757, 0xa757,
+ 0xa759, 0xa759,
+ 0xa75b, 0xa75b,
+ 0xa75d, 0xa75d,
+ 0xa75f, 0xa75f,
+ 0xa761, 0xa761,
+ 0xa763, 0xa763,
+ 0xa765, 0xa765,
+ 0xa767, 0xa767,
+ 0xa769, 0xa769,
+ 0xa76b, 0xa76b,
+ 0xa76d, 0xa76d,
+ 0xa76f, 0xa76f,
+ 0xa77a, 0xa77a,
+ 0xa77c, 0xa77c,
+ 0xa77f, 0xa77f,
+ 0xa781, 0xa781,
+ 0xa783, 0xa783,
+ 0xa785, 0xa785,
+ 0xa787, 0xa787,
+ 0xa78c, 0xa78c,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xff41, 0xff5a,
+ 0x10428, 0x1044f,
+}; /* CR_Changes_When_Uppercased */
+
+/* 'Changes_When_Titlecased': Derived Property */
+static const OnigCodePoint CR_Changes_When_Titlecased[] = {
+ 574,
+ 0x0061, 0x007a,
+ 0x00b5, 0x00b5,
+ 0x00df, 0x00f6,
+ 0x00f8, 0x00ff,
+ 0x0101, 0x0101,
+ 0x0103, 0x0103,
+ 0x0105, 0x0105,
+ 0x0107, 0x0107,
+ 0x0109, 0x0109,
+ 0x010b, 0x010b,
+ 0x010d, 0x010d,
+ 0x010f, 0x010f,
+ 0x0111, 0x0111,
+ 0x0113, 0x0113,
+ 0x0115, 0x0115,
+ 0x0117, 0x0117,
+ 0x0119, 0x0119,
+ 0x011b, 0x011b,
+ 0x011d, 0x011d,
+ 0x011f, 0x011f,
+ 0x0121, 0x0121,
+ 0x0123, 0x0123,
+ 0x0125, 0x0125,
+ 0x0127, 0x0127,
+ 0x0129, 0x0129,
+ 0x012b, 0x012b,
+ 0x012d, 0x012d,
+ 0x012f, 0x012f,
+ 0x0131, 0x0131,
+ 0x0133, 0x0133,
+ 0x0135, 0x0135,
+ 0x0137, 0x0137,
+ 0x013a, 0x013a,
+ 0x013c, 0x013c,
+ 0x013e, 0x013e,
+ 0x0140, 0x0140,
+ 0x0142, 0x0142,
+ 0x0144, 0x0144,
+ 0x0146, 0x0146,
+ 0x0148, 0x0149,
+ 0x014b, 0x014b,
+ 0x014d, 0x014d,
+ 0x014f, 0x014f,
+ 0x0151, 0x0151,
+ 0x0153, 0x0153,
+ 0x0155, 0x0155,
+ 0x0157, 0x0157,
+ 0x0159, 0x0159,
+ 0x015b, 0x015b,
+ 0x015d, 0x015d,
+ 0x015f, 0x015f,
+ 0x0161, 0x0161,
+ 0x0163, 0x0163,
+ 0x0165, 0x0165,
+ 0x0167, 0x0167,
+ 0x0169, 0x0169,
+ 0x016b, 0x016b,
+ 0x016d, 0x016d,
+ 0x016f, 0x016f,
+ 0x0171, 0x0171,
+ 0x0173, 0x0173,
+ 0x0175, 0x0175,
+ 0x0177, 0x0177,
+ 0x017a, 0x017a,
+ 0x017c, 0x017c,
+ 0x017e, 0x0180,
+ 0x0183, 0x0183,
+ 0x0185, 0x0185,
+ 0x0188, 0x0188,
+ 0x018c, 0x018c,
+ 0x0192, 0x0192,
+ 0x0195, 0x0195,
+ 0x0199, 0x019a,
+ 0x019e, 0x019e,
+ 0x01a1, 0x01a1,
+ 0x01a3, 0x01a3,
+ 0x01a5, 0x01a5,
+ 0x01a8, 0x01a8,
+ 0x01ad, 0x01ad,
+ 0x01b0, 0x01b0,
+ 0x01b4, 0x01b4,
+ 0x01b6, 0x01b6,
+ 0x01b9, 0x01b9,
+ 0x01bd, 0x01bd,
+ 0x01bf, 0x01bf,
+ 0x01c4, 0x01c4,
+ 0x01c6, 0x01c7,
+ 0x01c9, 0x01ca,
+ 0x01cc, 0x01cc,
+ 0x01ce, 0x01ce,
+ 0x01d0, 0x01d0,
+ 0x01d2, 0x01d2,
+ 0x01d4, 0x01d4,
+ 0x01d6, 0x01d6,
+ 0x01d8, 0x01d8,
+ 0x01da, 0x01da,
+ 0x01dc, 0x01dd,
+ 0x01df, 0x01df,
+ 0x01e1, 0x01e1,
+ 0x01e3, 0x01e3,
+ 0x01e5, 0x01e5,
+ 0x01e7, 0x01e7,
+ 0x01e9, 0x01e9,
+ 0x01eb, 0x01eb,
+ 0x01ed, 0x01ed,
+ 0x01ef, 0x01f1,
+ 0x01f3, 0x01f3,
+ 0x01f5, 0x01f5,
+ 0x01f9, 0x01f9,
+ 0x01fb, 0x01fb,
+ 0x01fd, 0x01fd,
+ 0x01ff, 0x01ff,
+ 0x0201, 0x0201,
+ 0x0203, 0x0203,
+ 0x0205, 0x0205,
+ 0x0207, 0x0207,
+ 0x0209, 0x0209,
+ 0x020b, 0x020b,
+ 0x020d, 0x020d,
+ 0x020f, 0x020f,
+ 0x0211, 0x0211,
+ 0x0213, 0x0213,
+ 0x0215, 0x0215,
+ 0x0217, 0x0217,
+ 0x0219, 0x0219,
+ 0x021b, 0x021b,
+ 0x021d, 0x021d,
+ 0x021f, 0x021f,
+ 0x0223, 0x0223,
+ 0x0225, 0x0225,
+ 0x0227, 0x0227,
+ 0x0229, 0x0229,
+ 0x022b, 0x022b,
+ 0x022d, 0x022d,
+ 0x022f, 0x022f,
+ 0x0231, 0x0231,
+ 0x0233, 0x0233,
+ 0x023c, 0x023c,
+ 0x023f, 0x0240,
+ 0x0242, 0x0242,
+ 0x0247, 0x0247,
+ 0x0249, 0x0249,
+ 0x024b, 0x024b,
+ 0x024d, 0x024d,
+ 0x024f, 0x0254,
+ 0x0256, 0x0257,
+ 0x0259, 0x0259,
+ 0x025b, 0x025b,
+ 0x0260, 0x0260,
+ 0x0263, 0x0263,
+ 0x0268, 0x0269,
+ 0x026b, 0x026b,
+ 0x026f, 0x026f,
+ 0x0271, 0x0272,
+ 0x0275, 0x0275,
+ 0x027d, 0x027d,
+ 0x0280, 0x0280,
+ 0x0283, 0x0283,
+ 0x0288, 0x028c,
+ 0x0292, 0x0292,
+ 0x0345, 0x0345,
+ 0x0371, 0x0371,
+ 0x0373, 0x0373,
+ 0x0377, 0x0377,
+ 0x037b, 0x037d,
+ 0x0390, 0x0390,
+ 0x03ac, 0x03ce,
+ 0x03d0, 0x03d1,
+ 0x03d5, 0x03d7,
+ 0x03d9, 0x03d9,
+ 0x03db, 0x03db,
+ 0x03dd, 0x03dd,
+ 0x03df, 0x03df,
+ 0x03e1, 0x03e1,
+ 0x03e3, 0x03e3,
+ 0x03e5, 0x03e5,
+ 0x03e7, 0x03e7,
+ 0x03e9, 0x03e9,
+ 0x03eb, 0x03eb,
+ 0x03ed, 0x03ed,
+ 0x03ef, 0x03f2,
+ 0x03f5, 0x03f5,
+ 0x03f8, 0x03f8,
+ 0x03fb, 0x03fb,
+ 0x0430, 0x045f,
+ 0x0461, 0x0461,
+ 0x0463, 0x0463,
+ 0x0465, 0x0465,
+ 0x0467, 0x0467,
+ 0x0469, 0x0469,
+ 0x046b, 0x046b,
+ 0x046d, 0x046d,
+ 0x046f, 0x046f,
+ 0x0471, 0x0471,
+ 0x0473, 0x0473,
+ 0x0475, 0x0475,
+ 0x0477, 0x0477,
+ 0x0479, 0x0479,
+ 0x047b, 0x047b,
+ 0x047d, 0x047d,
+ 0x047f, 0x047f,
+ 0x0481, 0x0481,
+ 0x048b, 0x048b,
+ 0x048d, 0x048d,
+ 0x048f, 0x048f,
+ 0x0491, 0x0491,
+ 0x0493, 0x0493,
+ 0x0495, 0x0495,
+ 0x0497, 0x0497,
+ 0x0499, 0x0499,
+ 0x049b, 0x049b,
+ 0x049d, 0x049d,
+ 0x049f, 0x049f,
+ 0x04a1, 0x04a1,
+ 0x04a3, 0x04a3,
+ 0x04a5, 0x04a5,
+ 0x04a7, 0x04a7,
+ 0x04a9, 0x04a9,
+ 0x04ab, 0x04ab,
+ 0x04ad, 0x04ad,
+ 0x04af, 0x04af,
+ 0x04b1, 0x04b1,
+ 0x04b3, 0x04b3,
+ 0x04b5, 0x04b5,
+ 0x04b7, 0x04b7,
+ 0x04b9, 0x04b9,
+ 0x04bb, 0x04bb,
+ 0x04bd, 0x04bd,
+ 0x04bf, 0x04bf,
+ 0x04c2, 0x04c2,
+ 0x04c4, 0x04c4,
+ 0x04c6, 0x04c6,
+ 0x04c8, 0x04c8,
+ 0x04ca, 0x04ca,
+ 0x04cc, 0x04cc,
+ 0x04ce, 0x04cf,
+ 0x04d1, 0x04d1,
+ 0x04d3, 0x04d3,
+ 0x04d5, 0x04d5,
+ 0x04d7, 0x04d7,
+ 0x04d9, 0x04d9,
+ 0x04db, 0x04db,
+ 0x04dd, 0x04dd,
+ 0x04df, 0x04df,
+ 0x04e1, 0x04e1,
+ 0x04e3, 0x04e3,
+ 0x04e5, 0x04e5,
+ 0x04e7, 0x04e7,
+ 0x04e9, 0x04e9,
+ 0x04eb, 0x04eb,
+ 0x04ed, 0x04ed,
+ 0x04ef, 0x04ef,
+ 0x04f1, 0x04f1,
+ 0x04f3, 0x04f3,
+ 0x04f5, 0x04f5,
+ 0x04f7, 0x04f7,
+ 0x04f9, 0x04f9,
+ 0x04fb, 0x04fb,
+ 0x04fd, 0x04fd,
+ 0x04ff, 0x04ff,
+ 0x0501, 0x0501,
+ 0x0503, 0x0503,
+ 0x0505, 0x0505,
+ 0x0507, 0x0507,
+ 0x0509, 0x0509,
+ 0x050b, 0x050b,
+ 0x050d, 0x050d,
+ 0x050f, 0x050f,
+ 0x0511, 0x0511,
+ 0x0513, 0x0513,
+ 0x0515, 0x0515,
+ 0x0517, 0x0517,
+ 0x0519, 0x0519,
+ 0x051b, 0x051b,
+ 0x051d, 0x051d,
+ 0x051f, 0x051f,
+ 0x0521, 0x0521,
+ 0x0523, 0x0523,
+ 0x0525, 0x0525,
+ 0x0561, 0x0587,
+ 0x1d79, 0x1d79,
+ 0x1d7d, 0x1d7d,
+ 0x1e01, 0x1e01,
+ 0x1e03, 0x1e03,
+ 0x1e05, 0x1e05,
+ 0x1e07, 0x1e07,
+ 0x1e09, 0x1e09,
+ 0x1e0b, 0x1e0b,
+ 0x1e0d, 0x1e0d,
+ 0x1e0f, 0x1e0f,
+ 0x1e11, 0x1e11,
+ 0x1e13, 0x1e13,
+ 0x1e15, 0x1e15,
+ 0x1e17, 0x1e17,
+ 0x1e19, 0x1e19,
+ 0x1e1b, 0x1e1b,
+ 0x1e1d, 0x1e1d,
+ 0x1e1f, 0x1e1f,
+ 0x1e21, 0x1e21,
+ 0x1e23, 0x1e23,
+ 0x1e25, 0x1e25,
+ 0x1e27, 0x1e27,
+ 0x1e29, 0x1e29,
+ 0x1e2b, 0x1e2b,
+ 0x1e2d, 0x1e2d,
+ 0x1e2f, 0x1e2f,
+ 0x1e31, 0x1e31,
+ 0x1e33, 0x1e33,
+ 0x1e35, 0x1e35,
+ 0x1e37, 0x1e37,
+ 0x1e39, 0x1e39,
+ 0x1e3b, 0x1e3b,
+ 0x1e3d, 0x1e3d,
+ 0x1e3f, 0x1e3f,
+ 0x1e41, 0x1e41,
+ 0x1e43, 0x1e43,
+ 0x1e45, 0x1e45,
+ 0x1e47, 0x1e47,
+ 0x1e49, 0x1e49,
+ 0x1e4b, 0x1e4b,
+ 0x1e4d, 0x1e4d,
+ 0x1e4f, 0x1e4f,
+ 0x1e51, 0x1e51,
+ 0x1e53, 0x1e53,
+ 0x1e55, 0x1e55,
+ 0x1e57, 0x1e57,
+ 0x1e59, 0x1e59,
+ 0x1e5b, 0x1e5b,
+ 0x1e5d, 0x1e5d,
+ 0x1e5f, 0x1e5f,
+ 0x1e61, 0x1e61,
+ 0x1e63, 0x1e63,
+ 0x1e65, 0x1e65,
+ 0x1e67, 0x1e67,
+ 0x1e69, 0x1e69,
+ 0x1e6b, 0x1e6b,
+ 0x1e6d, 0x1e6d,
+ 0x1e6f, 0x1e6f,
+ 0x1e71, 0x1e71,
+ 0x1e73, 0x1e73,
+ 0x1e75, 0x1e75,
+ 0x1e77, 0x1e77,
+ 0x1e79, 0x1e79,
+ 0x1e7b, 0x1e7b,
+ 0x1e7d, 0x1e7d,
+ 0x1e7f, 0x1e7f,
+ 0x1e81, 0x1e81,
+ 0x1e83, 0x1e83,
+ 0x1e85, 0x1e85,
+ 0x1e87, 0x1e87,
+ 0x1e89, 0x1e89,
+ 0x1e8b, 0x1e8b,
+ 0x1e8d, 0x1e8d,
+ 0x1e8f, 0x1e8f,
+ 0x1e91, 0x1e91,
+ 0x1e93, 0x1e93,
+ 0x1e95, 0x1e9b,
+ 0x1ea1, 0x1ea1,
+ 0x1ea3, 0x1ea3,
+ 0x1ea5, 0x1ea5,
+ 0x1ea7, 0x1ea7,
+ 0x1ea9, 0x1ea9,
+ 0x1eab, 0x1eab,
+ 0x1ead, 0x1ead,
+ 0x1eaf, 0x1eaf,
+ 0x1eb1, 0x1eb1,
+ 0x1eb3, 0x1eb3,
+ 0x1eb5, 0x1eb5,
+ 0x1eb7, 0x1eb7,
+ 0x1eb9, 0x1eb9,
+ 0x1ebb, 0x1ebb,
+ 0x1ebd, 0x1ebd,
+ 0x1ebf, 0x1ebf,
+ 0x1ec1, 0x1ec1,
+ 0x1ec3, 0x1ec3,
+ 0x1ec5, 0x1ec5,
+ 0x1ec7, 0x1ec7,
+ 0x1ec9, 0x1ec9,
+ 0x1ecb, 0x1ecb,
+ 0x1ecd, 0x1ecd,
+ 0x1ecf, 0x1ecf,
+ 0x1ed1, 0x1ed1,
+ 0x1ed3, 0x1ed3,
+ 0x1ed5, 0x1ed5,
+ 0x1ed7, 0x1ed7,
+ 0x1ed9, 0x1ed9,
+ 0x1edb, 0x1edb,
+ 0x1edd, 0x1edd,
+ 0x1edf, 0x1edf,
+ 0x1ee1, 0x1ee1,
+ 0x1ee3, 0x1ee3,
+ 0x1ee5, 0x1ee5,
+ 0x1ee7, 0x1ee7,
+ 0x1ee9, 0x1ee9,
+ 0x1eeb, 0x1eeb,
+ 0x1eed, 0x1eed,
+ 0x1eef, 0x1eef,
+ 0x1ef1, 0x1ef1,
+ 0x1ef3, 0x1ef3,
+ 0x1ef5, 0x1ef5,
+ 0x1ef7, 0x1ef7,
+ 0x1ef9, 0x1ef9,
+ 0x1efb, 0x1efb,
+ 0x1efd, 0x1efd,
+ 0x1eff, 0x1f07,
+ 0x1f10, 0x1f15,
+ 0x1f20, 0x1f27,
+ 0x1f30, 0x1f37,
+ 0x1f40, 0x1f45,
+ 0x1f50, 0x1f57,
+ 0x1f60, 0x1f67,
+ 0x1f70, 0x1f7d,
+ 0x1f80, 0x1f87,
+ 0x1f90, 0x1f97,
+ 0x1fa0, 0x1fa7,
+ 0x1fb0, 0x1fb4,
+ 0x1fb6, 0x1fb7,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fc7,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fd7,
+ 0x1fe0, 0x1fe7,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ff7,
+ 0x214e, 0x214e,
+ 0x2170, 0x217f,
+ 0x2184, 0x2184,
+ 0x24d0, 0x24e9,
+ 0x2c30, 0x2c5e,
+ 0x2c61, 0x2c61,
+ 0x2c65, 0x2c66,
+ 0x2c68, 0x2c68,
+ 0x2c6a, 0x2c6a,
+ 0x2c6c, 0x2c6c,
+ 0x2c73, 0x2c73,
+ 0x2c76, 0x2c76,
+ 0x2c81, 0x2c81,
+ 0x2c83, 0x2c83,
+ 0x2c85, 0x2c85,
+ 0x2c87, 0x2c87,
+ 0x2c89, 0x2c89,
+ 0x2c8b, 0x2c8b,
+ 0x2c8d, 0x2c8d,
+ 0x2c8f, 0x2c8f,
+ 0x2c91, 0x2c91,
+ 0x2c93, 0x2c93,
+ 0x2c95, 0x2c95,
+ 0x2c97, 0x2c97,
+ 0x2c99, 0x2c99,
+ 0x2c9b, 0x2c9b,
+ 0x2c9d, 0x2c9d,
+ 0x2c9f, 0x2c9f,
+ 0x2ca1, 0x2ca1,
+ 0x2ca3, 0x2ca3,
+ 0x2ca5, 0x2ca5,
+ 0x2ca7, 0x2ca7,
+ 0x2ca9, 0x2ca9,
+ 0x2cab, 0x2cab,
+ 0x2cad, 0x2cad,
+ 0x2caf, 0x2caf,
+ 0x2cb1, 0x2cb1,
+ 0x2cb3, 0x2cb3,
+ 0x2cb5, 0x2cb5,
+ 0x2cb7, 0x2cb7,
+ 0x2cb9, 0x2cb9,
+ 0x2cbb, 0x2cbb,
+ 0x2cbd, 0x2cbd,
+ 0x2cbf, 0x2cbf,
+ 0x2cc1, 0x2cc1,
+ 0x2cc3, 0x2cc3,
+ 0x2cc5, 0x2cc5,
+ 0x2cc7, 0x2cc7,
+ 0x2cc9, 0x2cc9,
+ 0x2ccb, 0x2ccb,
+ 0x2ccd, 0x2ccd,
+ 0x2ccf, 0x2ccf,
+ 0x2cd1, 0x2cd1,
+ 0x2cd3, 0x2cd3,
+ 0x2cd5, 0x2cd5,
+ 0x2cd7, 0x2cd7,
+ 0x2cd9, 0x2cd9,
+ 0x2cdb, 0x2cdb,
+ 0x2cdd, 0x2cdd,
+ 0x2cdf, 0x2cdf,
+ 0x2ce1, 0x2ce1,
+ 0x2ce3, 0x2ce3,
+ 0x2cec, 0x2cec,
+ 0x2cee, 0x2cee,
+ 0x2d00, 0x2d25,
+ 0xa641, 0xa641,
+ 0xa643, 0xa643,
+ 0xa645, 0xa645,
+ 0xa647, 0xa647,
+ 0xa649, 0xa649,
+ 0xa64b, 0xa64b,
+ 0xa64d, 0xa64d,
+ 0xa64f, 0xa64f,
+ 0xa651, 0xa651,
+ 0xa653, 0xa653,
+ 0xa655, 0xa655,
+ 0xa657, 0xa657,
+ 0xa659, 0xa659,
+ 0xa65b, 0xa65b,
+ 0xa65d, 0xa65d,
+ 0xa65f, 0xa65f,
+ 0xa663, 0xa663,
+ 0xa665, 0xa665,
+ 0xa667, 0xa667,
+ 0xa669, 0xa669,
+ 0xa66b, 0xa66b,
+ 0xa66d, 0xa66d,
+ 0xa681, 0xa681,
+ 0xa683, 0xa683,
+ 0xa685, 0xa685,
+ 0xa687, 0xa687,
+ 0xa689, 0xa689,
+ 0xa68b, 0xa68b,
+ 0xa68d, 0xa68d,
+ 0xa68f, 0xa68f,
+ 0xa691, 0xa691,
+ 0xa693, 0xa693,
+ 0xa695, 0xa695,
+ 0xa697, 0xa697,
+ 0xa723, 0xa723,
+ 0xa725, 0xa725,
+ 0xa727, 0xa727,
+ 0xa729, 0xa729,
+ 0xa72b, 0xa72b,
+ 0xa72d, 0xa72d,
+ 0xa72f, 0xa72f,
+ 0xa733, 0xa733,
+ 0xa735, 0xa735,
+ 0xa737, 0xa737,
+ 0xa739, 0xa739,
+ 0xa73b, 0xa73b,
+ 0xa73d, 0xa73d,
+ 0xa73f, 0xa73f,
+ 0xa741, 0xa741,
+ 0xa743, 0xa743,
+ 0xa745, 0xa745,
+ 0xa747, 0xa747,
+ 0xa749, 0xa749,
+ 0xa74b, 0xa74b,
+ 0xa74d, 0xa74d,
+ 0xa74f, 0xa74f,
+ 0xa751, 0xa751,
+ 0xa753, 0xa753,
+ 0xa755, 0xa755,
+ 0xa757, 0xa757,
+ 0xa759, 0xa759,
+ 0xa75b, 0xa75b,
+ 0xa75d, 0xa75d,
+ 0xa75f, 0xa75f,
+ 0xa761, 0xa761,
+ 0xa763, 0xa763,
+ 0xa765, 0xa765,
+ 0xa767, 0xa767,
+ 0xa769, 0xa769,
+ 0xa76b, 0xa76b,
+ 0xa76d, 0xa76d,
+ 0xa76f, 0xa76f,
+ 0xa77a, 0xa77a,
+ 0xa77c, 0xa77c,
+ 0xa77f, 0xa77f,
+ 0xa781, 0xa781,
+ 0xa783, 0xa783,
+ 0xa785, 0xa785,
+ 0xa787, 0xa787,
+ 0xa78c, 0xa78c,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xff41, 0xff5a,
+ 0x10428, 0x1044f,
+}; /* CR_Changes_When_Titlecased */
+
+/* 'Changes_When_Casefolded': Derived Property */
+static const OnigCodePoint CR_Changes_When_Casefolded[] = {
+ 568,
+ 0x0041, 0x005a,
+ 0x00b5, 0x00b5,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00df,
+ 0x0100, 0x0100,
+ 0x0102, 0x0102,
+ 0x0104, 0x0104,
+ 0x0106, 0x0106,
+ 0x0108, 0x0108,
+ 0x010a, 0x010a,
+ 0x010c, 0x010c,
+ 0x010e, 0x010e,
+ 0x0110, 0x0110,
+ 0x0112, 0x0112,
+ 0x0114, 0x0114,
+ 0x0116, 0x0116,
+ 0x0118, 0x0118,
+ 0x011a, 0x011a,
+ 0x011c, 0x011c,
+ 0x011e, 0x011e,
+ 0x0120, 0x0120,
+ 0x0122, 0x0122,
+ 0x0124, 0x0124,
+ 0x0126, 0x0126,
+ 0x0128, 0x0128,
+ 0x012a, 0x012a,
+ 0x012c, 0x012c,
+ 0x012e, 0x012e,
+ 0x0130, 0x0130,
+ 0x0132, 0x0132,
+ 0x0134, 0x0134,
+ 0x0136, 0x0136,
+ 0x0139, 0x0139,
+ 0x013b, 0x013b,
+ 0x013d, 0x013d,
+ 0x013f, 0x013f,
+ 0x0141, 0x0141,
+ 0x0143, 0x0143,
+ 0x0145, 0x0145,
+ 0x0147, 0x0147,
+ 0x0149, 0x014a,
+ 0x014c, 0x014c,
+ 0x014e, 0x014e,
+ 0x0150, 0x0150,
+ 0x0152, 0x0152,
+ 0x0154, 0x0154,
+ 0x0156, 0x0156,
+ 0x0158, 0x0158,
+ 0x015a, 0x015a,
+ 0x015c, 0x015c,
+ 0x015e, 0x015e,
+ 0x0160, 0x0160,
+ 0x0162, 0x0162,
+ 0x0164, 0x0164,
+ 0x0166, 0x0166,
+ 0x0168, 0x0168,
+ 0x016a, 0x016a,
+ 0x016c, 0x016c,
+ 0x016e, 0x016e,
+ 0x0170, 0x0170,
+ 0x0172, 0x0172,
+ 0x0174, 0x0174,
+ 0x0176, 0x0176,
+ 0x0178, 0x0179,
+ 0x017b, 0x017b,
+ 0x017d, 0x017d,
+ 0x017f, 0x017f,
+ 0x0181, 0x0182,
+ 0x0184, 0x0184,
+ 0x0186, 0x0187,
+ 0x0189, 0x018b,
+ 0x018e, 0x0191,
+ 0x0193, 0x0194,
+ 0x0196, 0x0198,
+ 0x019c, 0x019d,
+ 0x019f, 0x01a0,
+ 0x01a2, 0x01a2,
+ 0x01a4, 0x01a4,
+ 0x01a6, 0x01a7,
+ 0x01a9, 0x01a9,
+ 0x01ac, 0x01ac,
+ 0x01ae, 0x01af,
+ 0x01b1, 0x01b3,
+ 0x01b5, 0x01b5,
+ 0x01b7, 0x01b8,
+ 0x01bc, 0x01bc,
+ 0x01c4, 0x01c5,
+ 0x01c7, 0x01c8,
+ 0x01ca, 0x01cb,
+ 0x01cd, 0x01cd,
+ 0x01cf, 0x01cf,
+ 0x01d1, 0x01d1,
+ 0x01d3, 0x01d3,
+ 0x01d5, 0x01d5,
+ 0x01d7, 0x01d7,
+ 0x01d9, 0x01d9,
+ 0x01db, 0x01db,
+ 0x01de, 0x01de,
+ 0x01e0, 0x01e0,
+ 0x01e2, 0x01e2,
+ 0x01e4, 0x01e4,
+ 0x01e6, 0x01e6,
+ 0x01e8, 0x01e8,
+ 0x01ea, 0x01ea,
+ 0x01ec, 0x01ec,
+ 0x01ee, 0x01ee,
+ 0x01f1, 0x01f2,
+ 0x01f4, 0x01f4,
+ 0x01f6, 0x01f8,
+ 0x01fa, 0x01fa,
+ 0x01fc, 0x01fc,
+ 0x01fe, 0x01fe,
+ 0x0200, 0x0200,
+ 0x0202, 0x0202,
+ 0x0204, 0x0204,
+ 0x0206, 0x0206,
+ 0x0208, 0x0208,
+ 0x020a, 0x020a,
+ 0x020c, 0x020c,
+ 0x020e, 0x020e,
+ 0x0210, 0x0210,
+ 0x0212, 0x0212,
+ 0x0214, 0x0214,
+ 0x0216, 0x0216,
+ 0x0218, 0x0218,
+ 0x021a, 0x021a,
+ 0x021c, 0x021c,
+ 0x021e, 0x021e,
+ 0x0220, 0x0220,
+ 0x0222, 0x0222,
+ 0x0224, 0x0224,
+ 0x0226, 0x0226,
+ 0x0228, 0x0228,
+ 0x022a, 0x022a,
+ 0x022c, 0x022c,
+ 0x022e, 0x022e,
+ 0x0230, 0x0230,
+ 0x0232, 0x0232,
+ 0x023a, 0x023b,
+ 0x023d, 0x023e,
+ 0x0241, 0x0241,
+ 0x0243, 0x0246,
+ 0x0248, 0x0248,
+ 0x024a, 0x024a,
+ 0x024c, 0x024c,
+ 0x024e, 0x024e,
+ 0x0345, 0x0345,
+ 0x0370, 0x0370,
+ 0x0372, 0x0372,
+ 0x0376, 0x0376,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x038f,
+ 0x0391, 0x03a1,
+ 0x03a3, 0x03ab,
+ 0x03c2, 0x03c2,
+ 0x03cf, 0x03d1,
+ 0x03d5, 0x03d6,
+ 0x03d8, 0x03d8,
+ 0x03da, 0x03da,
+ 0x03dc, 0x03dc,
+ 0x03de, 0x03de,
+ 0x03e0, 0x03e0,
+ 0x03e2, 0x03e2,
+ 0x03e4, 0x03e4,
+ 0x03e6, 0x03e6,
+ 0x03e8, 0x03e8,
+ 0x03ea, 0x03ea,
+ 0x03ec, 0x03ec,
+ 0x03ee, 0x03ee,
+ 0x03f0, 0x03f1,
+ 0x03f4, 0x03f5,
+ 0x03f7, 0x03f7,
+ 0x03f9, 0x03fa,
+ 0x03fd, 0x042f,
+ 0x0460, 0x0460,
+ 0x0462, 0x0462,
+ 0x0464, 0x0464,
+ 0x0466, 0x0466,
+ 0x0468, 0x0468,
+ 0x046a, 0x046a,
+ 0x046c, 0x046c,
+ 0x046e, 0x046e,
+ 0x0470, 0x0470,
+ 0x0472, 0x0472,
+ 0x0474, 0x0474,
+ 0x0476, 0x0476,
+ 0x0478, 0x0478,
+ 0x047a, 0x047a,
+ 0x047c, 0x047c,
+ 0x047e, 0x047e,
+ 0x0480, 0x0480,
+ 0x048a, 0x048a,
+ 0x048c, 0x048c,
+ 0x048e, 0x048e,
+ 0x0490, 0x0490,
+ 0x0492, 0x0492,
+ 0x0494, 0x0494,
+ 0x0496, 0x0496,
+ 0x0498, 0x0498,
+ 0x049a, 0x049a,
+ 0x049c, 0x049c,
+ 0x049e, 0x049e,
+ 0x04a0, 0x04a0,
+ 0x04a2, 0x04a2,
+ 0x04a4, 0x04a4,
+ 0x04a6, 0x04a6,
+ 0x04a8, 0x04a8,
+ 0x04aa, 0x04aa,
+ 0x04ac, 0x04ac,
+ 0x04ae, 0x04ae,
+ 0x04b0, 0x04b0,
+ 0x04b2, 0x04b2,
+ 0x04b4, 0x04b4,
+ 0x04b6, 0x04b6,
+ 0x04b8, 0x04b8,
+ 0x04ba, 0x04ba,
+ 0x04bc, 0x04bc,
+ 0x04be, 0x04be,
+ 0x04c0, 0x04c1,
+ 0x04c3, 0x04c3,
+ 0x04c5, 0x04c5,
+ 0x04c7, 0x04c7,
+ 0x04c9, 0x04c9,
+ 0x04cb, 0x04cb,
+ 0x04cd, 0x04cd,
+ 0x04d0, 0x04d0,
+ 0x04d2, 0x04d2,
+ 0x04d4, 0x04d4,
+ 0x04d6, 0x04d6,
+ 0x04d8, 0x04d8,
+ 0x04da, 0x04da,
+ 0x04dc, 0x04dc,
+ 0x04de, 0x04de,
+ 0x04e0, 0x04e0,
+ 0x04e2, 0x04e2,
+ 0x04e4, 0x04e4,
+ 0x04e6, 0x04e6,
+ 0x04e8, 0x04e8,
+ 0x04ea, 0x04ea,
+ 0x04ec, 0x04ec,
+ 0x04ee, 0x04ee,
+ 0x04f0, 0x04f0,
+ 0x04f2, 0x04f2,
+ 0x04f4, 0x04f4,
+ 0x04f6, 0x04f6,
+ 0x04f8, 0x04f8,
+ 0x04fa, 0x04fa,
+ 0x04fc, 0x04fc,
+ 0x04fe, 0x04fe,
+ 0x0500, 0x0500,
+ 0x0502, 0x0502,
+ 0x0504, 0x0504,
+ 0x0506, 0x0506,
+ 0x0508, 0x0508,
+ 0x050a, 0x050a,
+ 0x050c, 0x050c,
+ 0x050e, 0x050e,
+ 0x0510, 0x0510,
+ 0x0512, 0x0512,
+ 0x0514, 0x0514,
+ 0x0516, 0x0516,
+ 0x0518, 0x0518,
+ 0x051a, 0x051a,
+ 0x051c, 0x051c,
+ 0x051e, 0x051e,
+ 0x0520, 0x0520,
+ 0x0522, 0x0522,
+ 0x0524, 0x0524,
+ 0x0531, 0x0556,
+ 0x0587, 0x0587,
+ 0x10a0, 0x10c5,
+ 0x1e00, 0x1e00,
+ 0x1e02, 0x1e02,
+ 0x1e04, 0x1e04,
+ 0x1e06, 0x1e06,
+ 0x1e08, 0x1e08,
+ 0x1e0a, 0x1e0a,
+ 0x1e0c, 0x1e0c,
+ 0x1e0e, 0x1e0e,
+ 0x1e10, 0x1e10,
+ 0x1e12, 0x1e12,
+ 0x1e14, 0x1e14,
+ 0x1e16, 0x1e16,
+ 0x1e18, 0x1e18,
+ 0x1e1a, 0x1e1a,
+ 0x1e1c, 0x1e1c,
+ 0x1e1e, 0x1e1e,
+ 0x1e20, 0x1e20,
+ 0x1e22, 0x1e22,
+ 0x1e24, 0x1e24,
+ 0x1e26, 0x1e26,
+ 0x1e28, 0x1e28,
+ 0x1e2a, 0x1e2a,
+ 0x1e2c, 0x1e2c,
+ 0x1e2e, 0x1e2e,
+ 0x1e30, 0x1e30,
+ 0x1e32, 0x1e32,
+ 0x1e34, 0x1e34,
+ 0x1e36, 0x1e36,
+ 0x1e38, 0x1e38,
+ 0x1e3a, 0x1e3a,
+ 0x1e3c, 0x1e3c,
+ 0x1e3e, 0x1e3e,
+ 0x1e40, 0x1e40,
+ 0x1e42, 0x1e42,
+ 0x1e44, 0x1e44,
+ 0x1e46, 0x1e46,
+ 0x1e48, 0x1e48,
+ 0x1e4a, 0x1e4a,
+ 0x1e4c, 0x1e4c,
+ 0x1e4e, 0x1e4e,
+ 0x1e50, 0x1e50,
+ 0x1e52, 0x1e52,
+ 0x1e54, 0x1e54,
+ 0x1e56, 0x1e56,
+ 0x1e58, 0x1e58,
+ 0x1e5a, 0x1e5a,
+ 0x1e5c, 0x1e5c,
+ 0x1e5e, 0x1e5e,
+ 0x1e60, 0x1e60,
+ 0x1e62, 0x1e62,
+ 0x1e64, 0x1e64,
+ 0x1e66, 0x1e66,
+ 0x1e68, 0x1e68,
+ 0x1e6a, 0x1e6a,
+ 0x1e6c, 0x1e6c,
+ 0x1e6e, 0x1e6e,
+ 0x1e70, 0x1e70,
+ 0x1e72, 0x1e72,
+ 0x1e74, 0x1e74,
+ 0x1e76, 0x1e76,
+ 0x1e78, 0x1e78,
+ 0x1e7a, 0x1e7a,
+ 0x1e7c, 0x1e7c,
+ 0x1e7e, 0x1e7e,
+ 0x1e80, 0x1e80,
+ 0x1e82, 0x1e82,
+ 0x1e84, 0x1e84,
+ 0x1e86, 0x1e86,
+ 0x1e88, 0x1e88,
+ 0x1e8a, 0x1e8a,
+ 0x1e8c, 0x1e8c,
+ 0x1e8e, 0x1e8e,
+ 0x1e90, 0x1e90,
+ 0x1e92, 0x1e92,
+ 0x1e94, 0x1e94,
+ 0x1e9a, 0x1e9b,
+ 0x1e9e, 0x1e9e,
+ 0x1ea0, 0x1ea0,
+ 0x1ea2, 0x1ea2,
+ 0x1ea4, 0x1ea4,
+ 0x1ea6, 0x1ea6,
+ 0x1ea8, 0x1ea8,
+ 0x1eaa, 0x1eaa,
+ 0x1eac, 0x1eac,
+ 0x1eae, 0x1eae,
+ 0x1eb0, 0x1eb0,
+ 0x1eb2, 0x1eb2,
+ 0x1eb4, 0x1eb4,
+ 0x1eb6, 0x1eb6,
+ 0x1eb8, 0x1eb8,
+ 0x1eba, 0x1eba,
+ 0x1ebc, 0x1ebc,
+ 0x1ebe, 0x1ebe,
+ 0x1ec0, 0x1ec0,
+ 0x1ec2, 0x1ec2,
+ 0x1ec4, 0x1ec4,
+ 0x1ec6, 0x1ec6,
+ 0x1ec8, 0x1ec8,
+ 0x1eca, 0x1eca,
+ 0x1ecc, 0x1ecc,
+ 0x1ece, 0x1ece,
+ 0x1ed0, 0x1ed0,
+ 0x1ed2, 0x1ed2,
+ 0x1ed4, 0x1ed4,
+ 0x1ed6, 0x1ed6,
+ 0x1ed8, 0x1ed8,
+ 0x1eda, 0x1eda,
+ 0x1edc, 0x1edc,
+ 0x1ede, 0x1ede,
+ 0x1ee0, 0x1ee0,
+ 0x1ee2, 0x1ee2,
+ 0x1ee4, 0x1ee4,
+ 0x1ee6, 0x1ee6,
+ 0x1ee8, 0x1ee8,
+ 0x1eea, 0x1eea,
+ 0x1eec, 0x1eec,
+ 0x1eee, 0x1eee,
+ 0x1ef0, 0x1ef0,
+ 0x1ef2, 0x1ef2,
+ 0x1ef4, 0x1ef4,
+ 0x1ef6, 0x1ef6,
+ 0x1ef8, 0x1ef8,
+ 0x1efa, 0x1efa,
+ 0x1efc, 0x1efc,
+ 0x1efe, 0x1efe,
+ 0x1f08, 0x1f0f,
+ 0x1f18, 0x1f1d,
+ 0x1f28, 0x1f2f,
+ 0x1f38, 0x1f3f,
+ 0x1f48, 0x1f4d,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f5f,
+ 0x1f68, 0x1f6f,
+ 0x1f80, 0x1faf,
+ 0x1fb2, 0x1fb4,
+ 0x1fb7, 0x1fbc,
+ 0x1fc2, 0x1fc4,
+ 0x1fc7, 0x1fcc,
+ 0x1fd8, 0x1fdb,
+ 0x1fe8, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff7, 0x1ffc,
+ 0x2126, 0x2126,
+ 0x212a, 0x212b,
+ 0x2132, 0x2132,
+ 0x2160, 0x216f,
+ 0x2183, 0x2183,
+ 0x24b6, 0x24cf,
+ 0x2c00, 0x2c2e,
+ 0x2c60, 0x2c60,
+ 0x2c62, 0x2c64,
+ 0x2c67, 0x2c67,
+ 0x2c69, 0x2c69,
+ 0x2c6b, 0x2c6b,
+ 0x2c6d, 0x2c70,
+ 0x2c72, 0x2c72,
+ 0x2c75, 0x2c75,
+ 0x2c7e, 0x2c80,
+ 0x2c82, 0x2c82,
+ 0x2c84, 0x2c84,
+ 0x2c86, 0x2c86,
+ 0x2c88, 0x2c88,
+ 0x2c8a, 0x2c8a,
+ 0x2c8c, 0x2c8c,
+ 0x2c8e, 0x2c8e,
+ 0x2c90, 0x2c90,
+ 0x2c92, 0x2c92,
+ 0x2c94, 0x2c94,
+ 0x2c96, 0x2c96,
+ 0x2c98, 0x2c98,
+ 0x2c9a, 0x2c9a,
+ 0x2c9c, 0x2c9c,
+ 0x2c9e, 0x2c9e,
+ 0x2ca0, 0x2ca0,
+ 0x2ca2, 0x2ca2,
+ 0x2ca4, 0x2ca4,
+ 0x2ca6, 0x2ca6,
+ 0x2ca8, 0x2ca8,
+ 0x2caa, 0x2caa,
+ 0x2cac, 0x2cac,
+ 0x2cae, 0x2cae,
+ 0x2cb0, 0x2cb0,
+ 0x2cb2, 0x2cb2,
+ 0x2cb4, 0x2cb4,
+ 0x2cb6, 0x2cb6,
+ 0x2cb8, 0x2cb8,
+ 0x2cba, 0x2cba,
+ 0x2cbc, 0x2cbc,
+ 0x2cbe, 0x2cbe,
+ 0x2cc0, 0x2cc0,
+ 0x2cc2, 0x2cc2,
+ 0x2cc4, 0x2cc4,
+ 0x2cc6, 0x2cc6,
+ 0x2cc8, 0x2cc8,
+ 0x2cca, 0x2cca,
+ 0x2ccc, 0x2ccc,
+ 0x2cce, 0x2cce,
+ 0x2cd0, 0x2cd0,
+ 0x2cd2, 0x2cd2,
+ 0x2cd4, 0x2cd4,
+ 0x2cd6, 0x2cd6,
+ 0x2cd8, 0x2cd8,
+ 0x2cda, 0x2cda,
+ 0x2cdc, 0x2cdc,
+ 0x2cde, 0x2cde,
+ 0x2ce0, 0x2ce0,
+ 0x2ce2, 0x2ce2,
+ 0x2ceb, 0x2ceb,
+ 0x2ced, 0x2ced,
+ 0xa640, 0xa640,
+ 0xa642, 0xa642,
+ 0xa644, 0xa644,
+ 0xa646, 0xa646,
+ 0xa648, 0xa648,
+ 0xa64a, 0xa64a,
+ 0xa64c, 0xa64c,
+ 0xa64e, 0xa64e,
+ 0xa650, 0xa650,
+ 0xa652, 0xa652,
+ 0xa654, 0xa654,
+ 0xa656, 0xa656,
+ 0xa658, 0xa658,
+ 0xa65a, 0xa65a,
+ 0xa65c, 0xa65c,
+ 0xa65e, 0xa65e,
+ 0xa662, 0xa662,
+ 0xa664, 0xa664,
+ 0xa666, 0xa666,
+ 0xa668, 0xa668,
+ 0xa66a, 0xa66a,
+ 0xa66c, 0xa66c,
+ 0xa680, 0xa680,
+ 0xa682, 0xa682,
+ 0xa684, 0xa684,
+ 0xa686, 0xa686,
+ 0xa688, 0xa688,
+ 0xa68a, 0xa68a,
+ 0xa68c, 0xa68c,
+ 0xa68e, 0xa68e,
+ 0xa690, 0xa690,
+ 0xa692, 0xa692,
+ 0xa694, 0xa694,
+ 0xa696, 0xa696,
+ 0xa722, 0xa722,
+ 0xa724, 0xa724,
+ 0xa726, 0xa726,
+ 0xa728, 0xa728,
+ 0xa72a, 0xa72a,
+ 0xa72c, 0xa72c,
+ 0xa72e, 0xa72e,
+ 0xa732, 0xa732,
+ 0xa734, 0xa734,
+ 0xa736, 0xa736,
+ 0xa738, 0xa738,
+ 0xa73a, 0xa73a,
+ 0xa73c, 0xa73c,
+ 0xa73e, 0xa73e,
+ 0xa740, 0xa740,
+ 0xa742, 0xa742,
+ 0xa744, 0xa744,
+ 0xa746, 0xa746,
+ 0xa748, 0xa748,
+ 0xa74a, 0xa74a,
+ 0xa74c, 0xa74c,
+ 0xa74e, 0xa74e,
+ 0xa750, 0xa750,
+ 0xa752, 0xa752,
+ 0xa754, 0xa754,
+ 0xa756, 0xa756,
+ 0xa758, 0xa758,
+ 0xa75a, 0xa75a,
+ 0xa75c, 0xa75c,
+ 0xa75e, 0xa75e,
+ 0xa760, 0xa760,
+ 0xa762, 0xa762,
+ 0xa764, 0xa764,
+ 0xa766, 0xa766,
+ 0xa768, 0xa768,
+ 0xa76a, 0xa76a,
+ 0xa76c, 0xa76c,
+ 0xa76e, 0xa76e,
+ 0xa779, 0xa779,
+ 0xa77b, 0xa77b,
+ 0xa77d, 0xa77e,
+ 0xa780, 0xa780,
+ 0xa782, 0xa782,
+ 0xa784, 0xa784,
+ 0xa786, 0xa786,
+ 0xa78b, 0xa78b,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xff21, 0xff3a,
+ 0x10400, 0x10427,
+}; /* CR_Changes_When_Casefolded */
+
+/* 'Changes_When_Casemapped': Derived Property */
+static const OnigCodePoint CR_Changes_When_Casemapped[] = {
+ 97,
+ 0x0041, 0x005a,
+ 0x0061, 0x007a,
+ 0x00b5, 0x00b5,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+ 0x00f8, 0x0137,
+ 0x0139, 0x018c,
+ 0x018e, 0x019a,
+ 0x019c, 0x01a9,
+ 0x01ac, 0x01b9,
+ 0x01bc, 0x01bd,
+ 0x01bf, 0x01bf,
+ 0x01c4, 0x0220,
+ 0x0222, 0x0233,
+ 0x023a, 0x0254,
+ 0x0256, 0x0257,
+ 0x0259, 0x0259,
+ 0x025b, 0x025b,
+ 0x0260, 0x0260,
+ 0x0263, 0x0263,
+ 0x0268, 0x0269,
+ 0x026b, 0x026b,
+ 0x026f, 0x026f,
+ 0x0271, 0x0272,
+ 0x0275, 0x0275,
+ 0x027d, 0x027d,
+ 0x0280, 0x0280,
+ 0x0283, 0x0283,
+ 0x0288, 0x028c,
+ 0x0292, 0x0292,
+ 0x0345, 0x0345,
+ 0x0370, 0x0373,
+ 0x0376, 0x0377,
+ 0x037b, 0x037d,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03d1,
+ 0x03d5, 0x03f2,
+ 0x03f4, 0x03f5,
+ 0x03f7, 0x03fb,
+ 0x03fd, 0x0481,
+ 0x048a, 0x0525,
+ 0x0531, 0x0556,
+ 0x0561, 0x0587,
+ 0x10a0, 0x10c5,
+ 0x1d79, 0x1d79,
+ 0x1d7d, 0x1d7d,
+ 0x1e00, 0x1e9b,
+ 0x1e9e, 0x1e9e,
+ 0x1ea0, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x2126, 0x2126,
+ 0x212a, 0x212b,
+ 0x2132, 0x2132,
+ 0x214e, 0x214e,
+ 0x2160, 0x217f,
+ 0x2183, 0x2184,
+ 0x24b6, 0x24e9,
+ 0x2c00, 0x2c2e,
+ 0x2c30, 0x2c5e,
+ 0x2c60, 0x2c70,
+ 0x2c72, 0x2c73,
+ 0x2c75, 0x2c76,
+ 0x2c7e, 0x2ce3,
+ 0x2ceb, 0x2cee,
+ 0x2d00, 0x2d25,
+ 0xa640, 0xa65f,
+ 0xa662, 0xa66d,
+ 0xa680, 0xa697,
+ 0xa722, 0xa72f,
+ 0xa732, 0xa76f,
+ 0xa779, 0xa787,
+ 0xa78b, 0xa78c,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xff21, 0xff3a,
+ 0xff41, 0xff5a,
+ 0x10400, 0x1044f,
+}; /* CR_Changes_When_Casemapped */
+
+/* 'ID_Start': Derived Property */
+static const OnigCodePoint CR_ID_Start[] = {
+ 424,
+ 0x0041, 0x005a,
+ 0x0061, 0x007a,
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00ba, 0x00ba,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+ 0x00f8, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x02ec, 0x02ec,
+ 0x02ee, 0x02ee,
+ 0x0370, 0x0374,
+ 0x0376, 0x0377,
+ 0x037a, 0x037d,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03f5,
+ 0x03f7, 0x0481,
+ 0x048a, 0x0525,
+ 0x0531, 0x0556,
+ 0x0559, 0x0559,
+ 0x0561, 0x0587,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0621, 0x064a,
+ 0x066e, 0x066f,
+ 0x0671, 0x06d3,
+ 0x06d5, 0x06d5,
+ 0x06e5, 0x06e6,
+ 0x06ee, 0x06ef,
+ 0x06fa, 0x06fc,
+ 0x06ff, 0x06ff,
+ 0x0710, 0x0710,
+ 0x0712, 0x072f,
+ 0x074d, 0x07a5,
+ 0x07b1, 0x07b1,
+ 0x07ca, 0x07ea,
+ 0x07f4, 0x07f5,
+ 0x07fa, 0x07fa,
+ 0x0800, 0x0815,
+ 0x081a, 0x081a,
+ 0x0824, 0x0824,
+ 0x0828, 0x0828,
+ 0x0904, 0x0939,
+ 0x093d, 0x093d,
+ 0x0950, 0x0950,
+ 0x0958, 0x0961,
+ 0x0971, 0x0972,
+ 0x0979, 0x097f,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bd, 0x09bd,
+ 0x09ce, 0x09ce,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e1,
+ 0x09f0, 0x09f1,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a72, 0x0a74,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abd, 0x0abd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae1,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3d, 0x0b3d,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b71, 0x0b71,
+ 0x0b83, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb9,
+ 0x0bd0, 0x0bd0,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3d, 0x0c3d,
+ 0x0c58, 0x0c59,
+ 0x0c60, 0x0c61,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbd, 0x0cbd,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3d, 0x0d3d,
+ 0x0d60, 0x0d61,
+ 0x0d7a, 0x0d7f,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0e01, 0x0e30,
+ 0x0e32, 0x0e33,
+ 0x0e40, 0x0e46,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb0,
+ 0x0eb2, 0x0eb3,
+ 0x0ebd, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f00,
+ 0x0f40, 0x0f47,
+ 0x0f49, 0x0f6c,
+ 0x0f88, 0x0f8b,
+ 0x1000, 0x102a,
+ 0x103f, 0x103f,
+ 0x1050, 0x1055,
+ 0x105a, 0x105d,
+ 0x1061, 0x1061,
+ 0x1065, 0x1066,
+ 0x106e, 0x1070,
+ 0x1075, 0x1081,
+ 0x108e, 0x108e,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10fa,
+ 0x10fc, 0x10fc,
+ 0x1100, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12d6,
+ 0x12d8, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x135a,
+ 0x1380, 0x138f,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x167f,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x16ee, 0x16f0,
+ 0x1700, 0x170c,
+ 0x170e, 0x1711,
+ 0x1720, 0x1731,
+ 0x1740, 0x1751,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1780, 0x17b3,
+ 0x17d7, 0x17d7,
+ 0x17dc, 0x17dc,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a8,
+ 0x18aa, 0x18aa,
+ 0x18b0, 0x18f5,
+ 0x1900, 0x191c,
+ 0x1950, 0x196d,
+ 0x1970, 0x1974,
+ 0x1980, 0x19ab,
+ 0x19c1, 0x19c7,
+ 0x1a00, 0x1a16,
+ 0x1a20, 0x1a54,
+ 0x1aa7, 0x1aa7,
+ 0x1b05, 0x1b33,
+ 0x1b45, 0x1b4b,
+ 0x1b83, 0x1ba0,
+ 0x1bae, 0x1baf,
+ 0x1c00, 0x1c23,
+ 0x1c4d, 0x1c4f,
+ 0x1c5a, 0x1c7d,
+ 0x1ce9, 0x1cec,
+ 0x1cee, 0x1cf1,
+ 0x1d00, 0x1dbf,
+ 0x1e00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x2090, 0x2094,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2118, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x2139,
+ 0x213c, 0x213f,
+ 0x2145, 0x2149,
+ 0x214e, 0x214e,
+ 0x2160, 0x2188,
+ 0x2c00, 0x2c2e,
+ 0x2c30, 0x2c5e,
+ 0x2c60, 0x2ce4,
+ 0x2ceb, 0x2cee,
+ 0x2d00, 0x2d25,
+ 0x2d30, 0x2d65,
+ 0x2d6f, 0x2d6f,
+ 0x2d80, 0x2d96,
+ 0x2da0, 0x2da6,
+ 0x2da8, 0x2dae,
+ 0x2db0, 0x2db6,
+ 0x2db8, 0x2dbe,
+ 0x2dc0, 0x2dc6,
+ 0x2dc8, 0x2dce,
+ 0x2dd0, 0x2dd6,
+ 0x2dd8, 0x2dde,
+ 0x3005, 0x3007,
+ 0x3021, 0x3029,
+ 0x3031, 0x3035,
+ 0x3038, 0x303c,
+ 0x3041, 0x3096,
+ 0x309b, 0x309f,
+ 0x30a1, 0x30fa,
+ 0x30fc, 0x30ff,
+ 0x3105, 0x312d,
+ 0x3131, 0x318e,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fcb,
+ 0xa000, 0xa48c,
+ 0xa4d0, 0xa4fd,
+ 0xa500, 0xa60c,
+ 0xa610, 0xa61f,
+ 0xa62a, 0xa62b,
+ 0xa640, 0xa65f,
+ 0xa662, 0xa66e,
+ 0xa67f, 0xa697,
+ 0xa6a0, 0xa6ef,
+ 0xa717, 0xa71f,
+ 0xa722, 0xa788,
+ 0xa78b, 0xa78c,
+ 0xa7fb, 0xa801,
+ 0xa803, 0xa805,
+ 0xa807, 0xa80a,
+ 0xa80c, 0xa822,
+ 0xa840, 0xa873,
+ 0xa882, 0xa8b3,
+ 0xa8f2, 0xa8f7,
+ 0xa8fb, 0xa8fb,
+ 0xa90a, 0xa925,
+ 0xa930, 0xa946,
+ 0xa960, 0xa97c,
+ 0xa984, 0xa9b2,
+ 0xa9cf, 0xa9cf,
+ 0xaa00, 0xaa28,
+ 0xaa40, 0xaa42,
+ 0xaa44, 0xaa4b,
+ 0xaa60, 0xaa76,
+ 0xaa7a, 0xaa7a,
+ 0xaa80, 0xaaaf,
+ 0xaab1, 0xaab1,
+ 0xaab5, 0xaab6,
+ 0xaab9, 0xaabd,
+ 0xaac0, 0xaac0,
+ 0xaac2, 0xaac2,
+ 0xaadb, 0xaadd,
+ 0xabc0, 0xabe2,
+ 0xac00, 0xd7a3,
+ 0xd7b0, 0xd7c6,
+ 0xd7cb, 0xd7fb,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6d,
+ 0xfa70, 0xfad9,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb1d,
+ 0xfb1f, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff21, 0xff3a,
+ 0xff41, 0xff5a,
+ 0xff66, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10140, 0x10174,
+ 0x10280, 0x1029c,
+ 0x102a0, 0x102d0,
+ 0x10300, 0x1031e,
+ 0x10330, 0x1034a,
+ 0x10380, 0x1039d,
+ 0x103a0, 0x103c3,
+ 0x103c8, 0x103cf,
+ 0x103d1, 0x103d5,
+ 0x10400, 0x1049d,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x10855,
+ 0x10900, 0x10915,
+ 0x10920, 0x10939,
+ 0x10a00, 0x10a00,
+ 0x10a10, 0x10a13,
+ 0x10a15, 0x10a17,
+ 0x10a19, 0x10a33,
+ 0x10a60, 0x10a7c,
+ 0x10b00, 0x10b35,
+ 0x10b40, 0x10b55,
+ 0x10b60, 0x10b72,
+ 0x10c00, 0x10c48,
+ 0x11083, 0x110af,
+ 0x12000, 0x1236e,
+ 0x12400, 0x12462,
+ 0x13000, 0x1342e,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a5,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7cb,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
+ 0x2f800, 0x2fa1d,
+}; /* CR_ID_Start */
+
+/* 'ID_Continue': Derived Property */
+static const OnigCodePoint CR_ID_Continue[] = {
+ 506,
+ 0x0030, 0x0039,
+ 0x0041, 0x005a,
+ 0x005f, 0x005f,
+ 0x0061, 0x007a,
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00b7, 0x00b7,
+ 0x00ba, 0x00ba,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+ 0x00f8, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x02ec, 0x02ec,
+ 0x02ee, 0x02ee,
+ 0x0300, 0x0374,
+ 0x0376, 0x0377,
+ 0x037a, 0x037d,
+ 0x0386, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03f5,
+ 0x03f7, 0x0481,
+ 0x0483, 0x0487,
+ 0x048a, 0x0525,
+ 0x0531, 0x0556,
+ 0x0559, 0x0559,
+ 0x0561, 0x0587,
+ 0x0591, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c5,
+ 0x05c7, 0x05c7,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0610, 0x061a,
+ 0x0621, 0x065e,
+ 0x0660, 0x0669,
+ 0x066e, 0x06d3,
+ 0x06d5, 0x06dc,
+ 0x06df, 0x06e8,
+ 0x06ea, 0x06fc,
+ 0x06ff, 0x06ff,
+ 0x0710, 0x074a,
+ 0x074d, 0x07b1,
+ 0x07c0, 0x07f5,
+ 0x07fa, 0x07fa,
+ 0x0800, 0x082d,
+ 0x0900, 0x0939,
+ 0x093c, 0x094e,
+ 0x0950, 0x0955,
+ 0x0958, 0x0963,
+ 0x0966, 0x096f,
+ 0x0971, 0x0972,
+ 0x0979, 0x097f,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09ce,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09f1,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a51, 0x0a51,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a75,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b44,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b63,
+ 0x0b66, 0x0b6f,
+ 0x0b71, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd0, 0x0bd0,
+ 0x0bd7, 0x0bd7,
+ 0x0be6, 0x0bef,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3d, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c58, 0x0c59,
+ 0x0c60, 0x0c63,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce3,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3d, 0x0d44,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d63,
+ 0x0d66, 0x0d6f,
+ 0x0d7a, 0x0d7f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df3,
+ 0x0e01, 0x0e3a,
+ 0x0e40, 0x0e4e,
+ 0x0e50, 0x0e59,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f00,
+ 0x0f18, 0x0f19,
+ 0x0f20, 0x0f29,
+ 0x0f35, 0x0f35,
+ 0x0f37, 0x0f37,
+ 0x0f39, 0x0f39,
+ 0x0f3e, 0x0f47,
+ 0x0f49, 0x0f6c,
+ 0x0f71, 0x0f84,
+ 0x0f86, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fc6, 0x0fc6,
+ 0x1000, 0x1049,
+ 0x1050, 0x109d,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10fa,
+ 0x10fc, 0x10fc,
+ 0x1100, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12d6,
+ 0x12d8, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x135a,
+ 0x135f, 0x135f,
+ 0x1369, 0x1371,
+ 0x1380, 0x138f,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x167f,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x16ee, 0x16f0,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1734,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17b3,
+ 0x17b6, 0x17d3,
+ 0x17d7, 0x17d7,
+ 0x17dc, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x180b, 0x180d,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18aa,
+ 0x18b0, 0x18f5,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1946, 0x196d,
+ 0x1970, 0x1974,
+ 0x1980, 0x19ab,
+ 0x19b0, 0x19c9,
+ 0x19d0, 0x19da,
+ 0x1a00, 0x1a1b,
+ 0x1a20, 0x1a5e,
+ 0x1a60, 0x1a7c,
+ 0x1a7f, 0x1a89,
+ 0x1a90, 0x1a99,
+ 0x1aa7, 0x1aa7,
+ 0x1b00, 0x1b4b,
+ 0x1b50, 0x1b59,
+ 0x1b6b, 0x1b73,
+ 0x1b80, 0x1baa,
+ 0x1bae, 0x1bb9,
+ 0x1c00, 0x1c37,
+ 0x1c40, 0x1c49,
+ 0x1c4d, 0x1c7d,
+ 0x1cd0, 0x1cd2,
+ 0x1cd4, 0x1cf2,
+ 0x1d00, 0x1de6,
+ 0x1dfd, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x203f, 0x2040,
+ 0x2054, 0x2054,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x2090, 0x2094,
+ 0x20d0, 0x20dc,
+ 0x20e1, 0x20e1,
+ 0x20e5, 0x20f0,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2118, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x2139,
+ 0x213c, 0x213f,
+ 0x2145, 0x2149,
+ 0x214e, 0x214e,
+ 0x2160, 0x2188,
+ 0x2c00, 0x2c2e,
+ 0x2c30, 0x2c5e,
+ 0x2c60, 0x2ce4,
+ 0x2ceb, 0x2cf1,
+ 0x2d00, 0x2d25,
+ 0x2d30, 0x2d65,
+ 0x2d6f, 0x2d6f,
+ 0x2d80, 0x2d96,
+ 0x2da0, 0x2da6,
+ 0x2da8, 0x2dae,
+ 0x2db0, 0x2db6,
+ 0x2db8, 0x2dbe,
+ 0x2dc0, 0x2dc6,
+ 0x2dc8, 0x2dce,
+ 0x2dd0, 0x2dd6,
+ 0x2dd8, 0x2dde,
+ 0x2de0, 0x2dff,
+ 0x3005, 0x3007,
+ 0x3021, 0x302f,
+ 0x3031, 0x3035,
+ 0x3038, 0x303c,
+ 0x3041, 0x3096,
+ 0x3099, 0x309f,
+ 0x30a1, 0x30fa,
+ 0x30fc, 0x30ff,
+ 0x3105, 0x312d,
+ 0x3131, 0x318e,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fcb,
+ 0xa000, 0xa48c,
+ 0xa4d0, 0xa4fd,
+ 0xa500, 0xa60c,
+ 0xa610, 0xa62b,
+ 0xa640, 0xa65f,
+ 0xa662, 0xa66f,
+ 0xa67c, 0xa67d,
+ 0xa67f, 0xa697,
+ 0xa6a0, 0xa6f1,
+ 0xa717, 0xa71f,
+ 0xa722, 0xa788,
+ 0xa78b, 0xa78c,
+ 0xa7fb, 0xa827,
+ 0xa840, 0xa873,
+ 0xa880, 0xa8c4,
+ 0xa8d0, 0xa8d9,
+ 0xa8e0, 0xa8f7,
+ 0xa8fb, 0xa8fb,
+ 0xa900, 0xa92d,
+ 0xa930, 0xa953,
+ 0xa960, 0xa97c,
+ 0xa980, 0xa9c0,
+ 0xa9cf, 0xa9d9,
+ 0xaa00, 0xaa36,
+ 0xaa40, 0xaa4d,
+ 0xaa50, 0xaa59,
+ 0xaa60, 0xaa76,
+ 0xaa7a, 0xaa7b,
+ 0xaa80, 0xaac2,
+ 0xaadb, 0xaadd,
+ 0xabc0, 0xabea,
+ 0xabec, 0xabed,
+ 0xabf0, 0xabf9,
+ 0xac00, 0xd7a3,
+ 0xd7b0, 0xd7c6,
+ 0xd7cb, 0xd7fb,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6d,
+ 0xfa70, 0xfad9,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe26,
+ 0xfe33, 0xfe34,
+ 0xfe4d, 0xfe4f,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff10, 0xff19,
+ 0xff21, 0xff3a,
+ 0xff3f, 0xff3f,
+ 0xff41, 0xff5a,
+ 0xff66, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10140, 0x10174,
+ 0x101fd, 0x101fd,
+ 0x10280, 0x1029c,
+ 0x102a0, 0x102d0,
+ 0x10300, 0x1031e,
+ 0x10330, 0x1034a,
+ 0x10380, 0x1039d,
+ 0x103a0, 0x103c3,
+ 0x103c8, 0x103cf,
+ 0x103d1, 0x103d5,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x10855,
+ 0x10900, 0x10915,
+ 0x10920, 0x10939,
+ 0x10a00, 0x10a03,
+ 0x10a05, 0x10a06,
+ 0x10a0c, 0x10a13,
+ 0x10a15, 0x10a17,
+ 0x10a19, 0x10a33,
+ 0x10a38, 0x10a3a,
+ 0x10a3f, 0x10a3f,
+ 0x10a60, 0x10a7c,
+ 0x10b00, 0x10b35,
+ 0x10b40, 0x10b55,
+ 0x10b60, 0x10b72,
+ 0x10c00, 0x10c48,
+ 0x11080, 0x110ba,
+ 0x12000, 0x1236e,
+ 0x12400, 0x12462,
+ 0x13000, 0x1342e,
+ 0x1d165, 0x1d169,
+ 0x1d16d, 0x1d172,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0x1d242, 0x1d244,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a5,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7cb,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
+ 0x2f800, 0x2fa1d,
+ 0xe0100, 0xe01ef,
+}; /* CR_ID_Continue */
+
+/* 'XID_Start': Derived Property */
+static const OnigCodePoint CR_XID_Start[] = {
+ 431,
+ 0x0041, 0x005a,
+ 0x0061, 0x007a,
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00ba, 0x00ba,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+ 0x00f8, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x02ec, 0x02ec,
+ 0x02ee, 0x02ee,
+ 0x0370, 0x0374,
+ 0x0376, 0x0377,
+ 0x037b, 0x037d,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03f5,
+ 0x03f7, 0x0481,
+ 0x048a, 0x0525,
+ 0x0531, 0x0556,
+ 0x0559, 0x0559,
+ 0x0561, 0x0587,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0621, 0x064a,
+ 0x066e, 0x066f,
+ 0x0671, 0x06d3,
+ 0x06d5, 0x06d5,
+ 0x06e5, 0x06e6,
+ 0x06ee, 0x06ef,
+ 0x06fa, 0x06fc,
+ 0x06ff, 0x06ff,
+ 0x0710, 0x0710,
+ 0x0712, 0x072f,
+ 0x074d, 0x07a5,
+ 0x07b1, 0x07b1,
+ 0x07ca, 0x07ea,
+ 0x07f4, 0x07f5,
+ 0x07fa, 0x07fa,
+ 0x0800, 0x0815,
+ 0x081a, 0x081a,
+ 0x0824, 0x0824,
+ 0x0828, 0x0828,
+ 0x0904, 0x0939,
+ 0x093d, 0x093d,
+ 0x0950, 0x0950,
+ 0x0958, 0x0961,
+ 0x0971, 0x0972,
+ 0x0979, 0x097f,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bd, 0x09bd,
+ 0x09ce, 0x09ce,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e1,
+ 0x09f0, 0x09f1,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a72, 0x0a74,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abd, 0x0abd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae1,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3d, 0x0b3d,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b71, 0x0b71,
+ 0x0b83, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb9,
+ 0x0bd0, 0x0bd0,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3d, 0x0c3d,
+ 0x0c58, 0x0c59,
+ 0x0c60, 0x0c61,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbd, 0x0cbd,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3d, 0x0d3d,
+ 0x0d60, 0x0d61,
+ 0x0d7a, 0x0d7f,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0e01, 0x0e30,
+ 0x0e32, 0x0e32,
+ 0x0e40, 0x0e46,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb0,
+ 0x0eb2, 0x0eb2,
+ 0x0ebd, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f00,
+ 0x0f40, 0x0f47,
+ 0x0f49, 0x0f6c,
+ 0x0f88, 0x0f8b,
+ 0x1000, 0x102a,
+ 0x103f, 0x103f,
+ 0x1050, 0x1055,
+ 0x105a, 0x105d,
+ 0x1061, 0x1061,
+ 0x1065, 0x1066,
+ 0x106e, 0x1070,
+ 0x1075, 0x1081,
+ 0x108e, 0x108e,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10fa,
+ 0x10fc, 0x10fc,
+ 0x1100, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12d6,
+ 0x12d8, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x135a,
+ 0x1380, 0x138f,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x167f,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x16ee, 0x16f0,
+ 0x1700, 0x170c,
+ 0x170e, 0x1711,
+ 0x1720, 0x1731,
+ 0x1740, 0x1751,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1780, 0x17b3,
+ 0x17d7, 0x17d7,
+ 0x17dc, 0x17dc,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a8,
+ 0x18aa, 0x18aa,
+ 0x18b0, 0x18f5,
+ 0x1900, 0x191c,
+ 0x1950, 0x196d,
+ 0x1970, 0x1974,
+ 0x1980, 0x19ab,
+ 0x19c1, 0x19c7,
+ 0x1a00, 0x1a16,
+ 0x1a20, 0x1a54,
+ 0x1aa7, 0x1aa7,
+ 0x1b05, 0x1b33,
+ 0x1b45, 0x1b4b,
+ 0x1b83, 0x1ba0,
+ 0x1bae, 0x1baf,
+ 0x1c00, 0x1c23,
+ 0x1c4d, 0x1c4f,
+ 0x1c5a, 0x1c7d,
+ 0x1ce9, 0x1cec,
+ 0x1cee, 0x1cf1,
+ 0x1d00, 0x1dbf,
+ 0x1e00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x2090, 0x2094,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2118, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x2139,
+ 0x213c, 0x213f,
+ 0x2145, 0x2149,
+ 0x214e, 0x214e,
+ 0x2160, 0x2188,
+ 0x2c00, 0x2c2e,
+ 0x2c30, 0x2c5e,
+ 0x2c60, 0x2ce4,
+ 0x2ceb, 0x2cee,
+ 0x2d00, 0x2d25,
+ 0x2d30, 0x2d65,
+ 0x2d6f, 0x2d6f,
+ 0x2d80, 0x2d96,
+ 0x2da0, 0x2da6,
+ 0x2da8, 0x2dae,
+ 0x2db0, 0x2db6,
+ 0x2db8, 0x2dbe,
+ 0x2dc0, 0x2dc6,
+ 0x2dc8, 0x2dce,
+ 0x2dd0, 0x2dd6,
+ 0x2dd8, 0x2dde,
+ 0x3005, 0x3007,
+ 0x3021, 0x3029,
+ 0x3031, 0x3035,
+ 0x3038, 0x303c,
+ 0x3041, 0x3096,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30fa,
+ 0x30fc, 0x30ff,
+ 0x3105, 0x312d,
+ 0x3131, 0x318e,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fcb,
+ 0xa000, 0xa48c,
+ 0xa4d0, 0xa4fd,
+ 0xa500, 0xa60c,
+ 0xa610, 0xa61f,
+ 0xa62a, 0xa62b,
+ 0xa640, 0xa65f,
+ 0xa662, 0xa66e,
+ 0xa67f, 0xa697,
+ 0xa6a0, 0xa6ef,
+ 0xa717, 0xa71f,
+ 0xa722, 0xa788,
+ 0xa78b, 0xa78c,
+ 0xa7fb, 0xa801,
+ 0xa803, 0xa805,
+ 0xa807, 0xa80a,
+ 0xa80c, 0xa822,
+ 0xa840, 0xa873,
+ 0xa882, 0xa8b3,
+ 0xa8f2, 0xa8f7,
+ 0xa8fb, 0xa8fb,
+ 0xa90a, 0xa925,
+ 0xa930, 0xa946,
+ 0xa960, 0xa97c,
+ 0xa984, 0xa9b2,
+ 0xa9cf, 0xa9cf,
+ 0xaa00, 0xaa28,
+ 0xaa40, 0xaa42,
+ 0xaa44, 0xaa4b,
+ 0xaa60, 0xaa76,
+ 0xaa7a, 0xaa7a,
+ 0xaa80, 0xaaaf,
+ 0xaab1, 0xaab1,
+ 0xaab5, 0xaab6,
+ 0xaab9, 0xaabd,
+ 0xaac0, 0xaac0,
+ 0xaac2, 0xaac2,
+ 0xaadb, 0xaadd,
+ 0xabc0, 0xabe2,
+ 0xac00, 0xd7a3,
+ 0xd7b0, 0xd7c6,
+ 0xd7cb, 0xd7fb,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6d,
+ 0xfa70, 0xfad9,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb1d,
+ 0xfb1f, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfc5d,
+ 0xfc64, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdf9,
+ 0xfe71, 0xfe71,
+ 0xfe73, 0xfe73,
+ 0xfe77, 0xfe77,
+ 0xfe79, 0xfe79,
+ 0xfe7b, 0xfe7b,
+ 0xfe7d, 0xfe7d,
+ 0xfe7f, 0xfefc,
+ 0xff21, 0xff3a,
+ 0xff41, 0xff5a,
+ 0xff66, 0xff9d,
+ 0xffa0, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10140, 0x10174,
+ 0x10280, 0x1029c,
+ 0x102a0, 0x102d0,
+ 0x10300, 0x1031e,
+ 0x10330, 0x1034a,
+ 0x10380, 0x1039d,
+ 0x103a0, 0x103c3,
+ 0x103c8, 0x103cf,
+ 0x103d1, 0x103d5,
+ 0x10400, 0x1049d,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x10855,
+ 0x10900, 0x10915,
+ 0x10920, 0x10939,
+ 0x10a00, 0x10a00,
+ 0x10a10, 0x10a13,
+ 0x10a15, 0x10a17,
+ 0x10a19, 0x10a33,
+ 0x10a60, 0x10a7c,
+ 0x10b00, 0x10b35,
+ 0x10b40, 0x10b55,
+ 0x10b60, 0x10b72,
+ 0x10c00, 0x10c48,
+ 0x11083, 0x110af,
+ 0x12000, 0x1236e,
+ 0x12400, 0x12462,
+ 0x13000, 0x1342e,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a5,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7cb,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
+ 0x2f800, 0x2fa1d,
+}; /* CR_XID_Start */
+
+/* 'XID_Continue': Derived Property */
+static const OnigCodePoint CR_XID_Continue[] = {
+ 513,
+ 0x0030, 0x0039,
+ 0x0041, 0x005a,
+ 0x005f, 0x005f,
+ 0x0061, 0x007a,
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00b7, 0x00b7,
+ 0x00ba, 0x00ba,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+ 0x00f8, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x02ec, 0x02ec,
+ 0x02ee, 0x02ee,
+ 0x0300, 0x0374,
+ 0x0376, 0x0377,
+ 0x037b, 0x037d,
+ 0x0386, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03f5,
+ 0x03f7, 0x0481,
+ 0x0483, 0x0487,
+ 0x048a, 0x0525,
+ 0x0531, 0x0556,
+ 0x0559, 0x0559,
+ 0x0561, 0x0587,
+ 0x0591, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c5,
+ 0x05c7, 0x05c7,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0610, 0x061a,
+ 0x0621, 0x065e,
+ 0x0660, 0x0669,
+ 0x066e, 0x06d3,
+ 0x06d5, 0x06dc,
+ 0x06df, 0x06e8,
+ 0x06ea, 0x06fc,
+ 0x06ff, 0x06ff,
+ 0x0710, 0x074a,
+ 0x074d, 0x07b1,
+ 0x07c0, 0x07f5,
+ 0x07fa, 0x07fa,
+ 0x0800, 0x082d,
+ 0x0900, 0x0939,
+ 0x093c, 0x094e,
+ 0x0950, 0x0955,
+ 0x0958, 0x0963,
+ 0x0966, 0x096f,
+ 0x0971, 0x0972,
+ 0x0979, 0x097f,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09ce,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09f1,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a51, 0x0a51,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a75,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b44,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b63,
+ 0x0b66, 0x0b6f,
+ 0x0b71, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd0, 0x0bd0,
+ 0x0bd7, 0x0bd7,
+ 0x0be6, 0x0bef,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3d, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c58, 0x0c59,
+ 0x0c60, 0x0c63,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce3,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3d, 0x0d44,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d63,
+ 0x0d66, 0x0d6f,
+ 0x0d7a, 0x0d7f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df3,
+ 0x0e01, 0x0e3a,
+ 0x0e40, 0x0e4e,
+ 0x0e50, 0x0e59,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f00,
+ 0x0f18, 0x0f19,
+ 0x0f20, 0x0f29,
+ 0x0f35, 0x0f35,
+ 0x0f37, 0x0f37,
+ 0x0f39, 0x0f39,
+ 0x0f3e, 0x0f47,
+ 0x0f49, 0x0f6c,
+ 0x0f71, 0x0f84,
+ 0x0f86, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fc6, 0x0fc6,
+ 0x1000, 0x1049,
+ 0x1050, 0x109d,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10fa,
+ 0x10fc, 0x10fc,
+ 0x1100, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12d6,
+ 0x12d8, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x135a,
+ 0x135f, 0x135f,
+ 0x1369, 0x1371,
+ 0x1380, 0x138f,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x167f,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x16ee, 0x16f0,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1734,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17b3,
+ 0x17b6, 0x17d3,
+ 0x17d7, 0x17d7,
+ 0x17dc, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x180b, 0x180d,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18aa,
+ 0x18b0, 0x18f5,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1946, 0x196d,
+ 0x1970, 0x1974,
+ 0x1980, 0x19ab,
+ 0x19b0, 0x19c9,
+ 0x19d0, 0x19da,
+ 0x1a00, 0x1a1b,
+ 0x1a20, 0x1a5e,
+ 0x1a60, 0x1a7c,
+ 0x1a7f, 0x1a89,
+ 0x1a90, 0x1a99,
+ 0x1aa7, 0x1aa7,
+ 0x1b00, 0x1b4b,
+ 0x1b50, 0x1b59,
+ 0x1b6b, 0x1b73,
+ 0x1b80, 0x1baa,
+ 0x1bae, 0x1bb9,
+ 0x1c00, 0x1c37,
+ 0x1c40, 0x1c49,
+ 0x1c4d, 0x1c7d,
+ 0x1cd0, 0x1cd2,
+ 0x1cd4, 0x1cf2,
+ 0x1d00, 0x1de6,
+ 0x1dfd, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x203f, 0x2040,
+ 0x2054, 0x2054,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x2090, 0x2094,
+ 0x20d0, 0x20dc,
+ 0x20e1, 0x20e1,
+ 0x20e5, 0x20f0,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2118, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x2139,
+ 0x213c, 0x213f,
+ 0x2145, 0x2149,
+ 0x214e, 0x214e,
+ 0x2160, 0x2188,
+ 0x2c00, 0x2c2e,
+ 0x2c30, 0x2c5e,
+ 0x2c60, 0x2ce4,
+ 0x2ceb, 0x2cf1,
+ 0x2d00, 0x2d25,
+ 0x2d30, 0x2d65,
+ 0x2d6f, 0x2d6f,
+ 0x2d80, 0x2d96,
+ 0x2da0, 0x2da6,
+ 0x2da8, 0x2dae,
+ 0x2db0, 0x2db6,
+ 0x2db8, 0x2dbe,
+ 0x2dc0, 0x2dc6,
+ 0x2dc8, 0x2dce,
+ 0x2dd0, 0x2dd6,
+ 0x2dd8, 0x2dde,
+ 0x2de0, 0x2dff,
+ 0x3005, 0x3007,
+ 0x3021, 0x302f,
+ 0x3031, 0x3035,
+ 0x3038, 0x303c,
+ 0x3041, 0x3096,
+ 0x3099, 0x309a,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30fa,
+ 0x30fc, 0x30ff,
+ 0x3105, 0x312d,
+ 0x3131, 0x318e,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fcb,
+ 0xa000, 0xa48c,
+ 0xa4d0, 0xa4fd,
+ 0xa500, 0xa60c,
+ 0xa610, 0xa62b,
+ 0xa640, 0xa65f,
+ 0xa662, 0xa66f,
+ 0xa67c, 0xa67d,
+ 0xa67f, 0xa697,
+ 0xa6a0, 0xa6f1,
+ 0xa717, 0xa71f,
+ 0xa722, 0xa788,
+ 0xa78b, 0xa78c,
+ 0xa7fb, 0xa827,
+ 0xa840, 0xa873,
+ 0xa880, 0xa8c4,
+ 0xa8d0, 0xa8d9,
+ 0xa8e0, 0xa8f7,
+ 0xa8fb, 0xa8fb,
+ 0xa900, 0xa92d,
+ 0xa930, 0xa953,
+ 0xa960, 0xa97c,
+ 0xa980, 0xa9c0,
+ 0xa9cf, 0xa9d9,
+ 0xaa00, 0xaa36,
+ 0xaa40, 0xaa4d,
+ 0xaa50, 0xaa59,
+ 0xaa60, 0xaa76,
+ 0xaa7a, 0xaa7b,
+ 0xaa80, 0xaac2,
+ 0xaadb, 0xaadd,
+ 0xabc0, 0xabea,
+ 0xabec, 0xabed,
+ 0xabf0, 0xabf9,
+ 0xac00, 0xd7a3,
+ 0xd7b0, 0xd7c6,
+ 0xd7cb, 0xd7fb,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6d,
+ 0xfa70, 0xfad9,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfc5d,
+ 0xfc64, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdf9,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe26,
+ 0xfe33, 0xfe34,
+ 0xfe4d, 0xfe4f,
+ 0xfe71, 0xfe71,
+ 0xfe73, 0xfe73,
+ 0xfe77, 0xfe77,
+ 0xfe79, 0xfe79,
+ 0xfe7b, 0xfe7b,
+ 0xfe7d, 0xfe7d,
+ 0xfe7f, 0xfefc,
+ 0xff10, 0xff19,
+ 0xff21, 0xff3a,
+ 0xff3f, 0xff3f,
+ 0xff41, 0xff5a,
+ 0xff66, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10140, 0x10174,
+ 0x101fd, 0x101fd,
+ 0x10280, 0x1029c,
+ 0x102a0, 0x102d0,
+ 0x10300, 0x1031e,
+ 0x10330, 0x1034a,
+ 0x10380, 0x1039d,
+ 0x103a0, 0x103c3,
+ 0x103c8, 0x103cf,
+ 0x103d1, 0x103d5,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x10855,
+ 0x10900, 0x10915,
+ 0x10920, 0x10939,
+ 0x10a00, 0x10a03,
+ 0x10a05, 0x10a06,
+ 0x10a0c, 0x10a13,
+ 0x10a15, 0x10a17,
+ 0x10a19, 0x10a33,
+ 0x10a38, 0x10a3a,
+ 0x10a3f, 0x10a3f,
+ 0x10a60, 0x10a7c,
+ 0x10b00, 0x10b35,
+ 0x10b40, 0x10b55,
+ 0x10b60, 0x10b72,
+ 0x10c00, 0x10c48,
+ 0x11080, 0x110ba,
+ 0x12000, 0x1236e,
+ 0x12400, 0x12462,
+ 0x13000, 0x1342e,
+ 0x1d165, 0x1d169,
+ 0x1d16d, 0x1d172,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0x1d242, 0x1d244,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a5,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7cb,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
+ 0x2f800, 0x2fa1d,
+ 0xe0100, 0xe01ef,
+}; /* CR_XID_Continue */
+
+/* 'Default_Ignorable_Code_Point': Derived Property */
+static const OnigCodePoint CR_Default_Ignorable_Code_Point[] = {
+ 15,
+ 0x00ad, 0x00ad,
+ 0x034f, 0x034f,
+ 0x115f, 0x1160,
+ 0x17b4, 0x17b5,
+ 0x180b, 0x180d,
+ 0x200b, 0x200f,
+ 0x202a, 0x202e,
+ 0x2060, 0x206f,
+ 0x3164, 0x3164,
+ 0xfe00, 0xfe0f,
+ 0xfeff, 0xfeff,
+ 0xffa0, 0xffa0,
+ 0xfff0, 0xfff8,
+ 0x1d173, 0x1d17a,
+ 0xe0000, 0xe0fff,
+}; /* CR_Default_Ignorable_Code_Point */
+
+/* 'Grapheme_Extend': Derived Property */
+static const OnigCodePoint CR_Grapheme_Extend[] = {
+ 206,
+ 0x0300, 0x036f,
+ 0x0483, 0x0489,
+ 0x0591, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c5,
+ 0x05c7, 0x05c7,
+ 0x0610, 0x061a,
+ 0x064b, 0x065e,
+ 0x0670, 0x0670,
+ 0x06d6, 0x06dc,
+ 0x06de, 0x06e4,
+ 0x06e7, 0x06e8,
+ 0x06ea, 0x06ed,
+ 0x0711, 0x0711,
+ 0x0730, 0x074a,
+ 0x07a6, 0x07b0,
+ 0x07eb, 0x07f3,
+ 0x0816, 0x0819,
+ 0x081b, 0x0823,
+ 0x0825, 0x0827,
+ 0x0829, 0x082d,
+ 0x0900, 0x0902,
+ 0x093c, 0x093c,
+ 0x0941, 0x0948,
+ 0x094d, 0x094d,
+ 0x0951, 0x0955,
+ 0x0962, 0x0963,
+ 0x0981, 0x0981,
+ 0x09bc, 0x09bc,
+ 0x09be, 0x09be,
+ 0x09c1, 0x09c4,
+ 0x09cd, 0x09cd,
+ 0x09d7, 0x09d7,
+ 0x09e2, 0x09e3,
+ 0x0a01, 0x0a02,
+ 0x0a3c, 0x0a3c,
+ 0x0a41, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a51, 0x0a51,
+ 0x0a70, 0x0a71,
+ 0x0a75, 0x0a75,
+ 0x0a81, 0x0a82,
+ 0x0abc, 0x0abc,
+ 0x0ac1, 0x0ac5,
+ 0x0ac7, 0x0ac8,
+ 0x0acd, 0x0acd,
+ 0x0ae2, 0x0ae3,
+ 0x0b01, 0x0b01,
+ 0x0b3c, 0x0b3c,
+ 0x0b3e, 0x0b3f,
+ 0x0b41, 0x0b44,
+ 0x0b4d, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b62, 0x0b63,
+ 0x0b82, 0x0b82,
+ 0x0bbe, 0x0bbe,
+ 0x0bc0, 0x0bc0,
+ 0x0bcd, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0c3e, 0x0c40,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c62, 0x0c63,
+ 0x0cbc, 0x0cbc,
+ 0x0cbf, 0x0cbf,
+ 0x0cc2, 0x0cc2,
+ 0x0cc6, 0x0cc6,
+ 0x0ccc, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0ce2, 0x0ce3,
+ 0x0d3e, 0x0d3e,
+ 0x0d41, 0x0d44,
+ 0x0d4d, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d62, 0x0d63,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dcf,
+ 0x0dd2, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0ddf, 0x0ddf,
+ 0x0e31, 0x0e31,
+ 0x0e34, 0x0e3a,
+ 0x0e47, 0x0e4e,
+ 0x0eb1, 0x0eb1,
+ 0x0eb4, 0x0eb9,
+ 0x0ebb, 0x0ebc,
+ 0x0ec8, 0x0ecd,
+ 0x0f18, 0x0f19,
+ 0x0f35, 0x0f35,
+ 0x0f37, 0x0f37,
+ 0x0f39, 0x0f39,
+ 0x0f71, 0x0f7e,
+ 0x0f80, 0x0f84,
+ 0x0f86, 0x0f87,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fc6, 0x0fc6,
+ 0x102d, 0x1030,
+ 0x1032, 0x1037,
+ 0x1039, 0x103a,
+ 0x103d, 0x103e,
+ 0x1058, 0x1059,
+ 0x105e, 0x1060,
+ 0x1071, 0x1074,
+ 0x1082, 0x1082,
+ 0x1085, 0x1086,
+ 0x108d, 0x108d,
+ 0x109d, 0x109d,
+ 0x135f, 0x135f,
+ 0x1712, 0x1714,
+ 0x1732, 0x1734,
+ 0x1752, 0x1753,
+ 0x1772, 0x1773,
+ 0x17b7, 0x17bd,
+ 0x17c6, 0x17c6,
+ 0x17c9, 0x17d3,
+ 0x17dd, 0x17dd,
+ 0x180b, 0x180d,
+ 0x18a9, 0x18a9,
+ 0x1920, 0x1922,
+ 0x1927, 0x1928,
+ 0x1932, 0x1932,
+ 0x1939, 0x193b,
+ 0x1a17, 0x1a18,
+ 0x1a56, 0x1a56,
+ 0x1a58, 0x1a5e,
+ 0x1a60, 0x1a60,
+ 0x1a62, 0x1a62,
+ 0x1a65, 0x1a6c,
+ 0x1a73, 0x1a7c,
+ 0x1a7f, 0x1a7f,
+ 0x1b00, 0x1b03,
+ 0x1b34, 0x1b34,
+ 0x1b36, 0x1b3a,
+ 0x1b3c, 0x1b3c,
+ 0x1b42, 0x1b42,
+ 0x1b6b, 0x1b73,
+ 0x1b80, 0x1b81,
+ 0x1ba2, 0x1ba5,
+ 0x1ba8, 0x1ba9,
+ 0x1c2c, 0x1c33,
+ 0x1c36, 0x1c37,
+ 0x1cd0, 0x1cd2,
+ 0x1cd4, 0x1ce0,
+ 0x1ce2, 0x1ce8,
+ 0x1ced, 0x1ced,
+ 0x1dc0, 0x1de6,
+ 0x1dfd, 0x1dff,
+ 0x200c, 0x200d,
+ 0x20d0, 0x20f0,
+ 0x2cef, 0x2cf1,
+ 0x2de0, 0x2dff,
+ 0x302a, 0x302f,
+ 0x3099, 0x309a,
+ 0xa66f, 0xa672,
+ 0xa67c, 0xa67d,
+ 0xa6f0, 0xa6f1,
+ 0xa802, 0xa802,
+ 0xa806, 0xa806,
+ 0xa80b, 0xa80b,
+ 0xa825, 0xa826,
+ 0xa8c4, 0xa8c4,
+ 0xa8e0, 0xa8f1,
+ 0xa926, 0xa92d,
+ 0xa947, 0xa951,
+ 0xa980, 0xa982,
+ 0xa9b3, 0xa9b3,
+ 0xa9b6, 0xa9b9,
+ 0xa9bc, 0xa9bc,
+ 0xaa29, 0xaa2e,
+ 0xaa31, 0xaa32,
+ 0xaa35, 0xaa36,
+ 0xaa43, 0xaa43,
+ 0xaa4c, 0xaa4c,
+ 0xaab0, 0xaab0,
+ 0xaab2, 0xaab4,
+ 0xaab7, 0xaab8,
+ 0xaabe, 0xaabf,
+ 0xaac1, 0xaac1,
+ 0xabe5, 0xabe5,
+ 0xabe8, 0xabe8,
+ 0xabed, 0xabed,
+ 0xfb1e, 0xfb1e,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe26,
+ 0xff9e, 0xff9f,
+ 0x101fd, 0x101fd,
+ 0x10a01, 0x10a03,
+ 0x10a05, 0x10a06,
+ 0x10a0c, 0x10a0f,
+ 0x10a38, 0x10a3a,
+ 0x10a3f, 0x10a3f,
+ 0x11080, 0x11081,
+ 0x110b3, 0x110b6,
+ 0x110b9, 0x110ba,
+ 0x1d165, 0x1d165,
+ 0x1d167, 0x1d169,
+ 0x1d16e, 0x1d172,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0x1d242, 0x1d244,
+ 0xe0100, 0xe01ef,
+}; /* CR_Grapheme_Extend */
+
+/* 'Grapheme_Base': Derived Property */
+static const OnigCodePoint CR_Grapheme_Base[] = {
+ 567,
+ 0x0020, 0x007e,
+ 0x00a0, 0x00ac,
+ 0x00ae, 0x02ff,
+ 0x0370, 0x0377,
+ 0x037a, 0x037e,
+ 0x0384, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x0482,
+ 0x048a, 0x0525,
+ 0x0531, 0x0556,
+ 0x0559, 0x055f,
+ 0x0561, 0x0587,
+ 0x0589, 0x058a,
+ 0x05be, 0x05be,
+ 0x05c0, 0x05c0,
+ 0x05c3, 0x05c3,
+ 0x05c6, 0x05c6,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f4,
+ 0x0606, 0x060f,
+ 0x061b, 0x061b,
+ 0x061e, 0x061f,
+ 0x0621, 0x064a,
+ 0x0660, 0x066f,
+ 0x0671, 0x06d5,
+ 0x06e5, 0x06e6,
+ 0x06e9, 0x06e9,
+ 0x06ee, 0x070d,
+ 0x0710, 0x0710,
+ 0x0712, 0x072f,
+ 0x074d, 0x07a5,
+ 0x07b1, 0x07b1,
+ 0x07c0, 0x07ea,
+ 0x07f4, 0x07fa,
+ 0x0800, 0x0815,
+ 0x081a, 0x081a,
+ 0x0824, 0x0824,
+ 0x0828, 0x0828,
+ 0x0830, 0x083e,
+ 0x0903, 0x0939,
+ 0x093d, 0x0940,
+ 0x0949, 0x094c,
+ 0x094e, 0x094e,
+ 0x0950, 0x0950,
+ 0x0958, 0x0961,
+ 0x0964, 0x0972,
+ 0x0979, 0x097f,
+ 0x0982, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bd, 0x09bd,
+ 0x09bf, 0x09c0,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cc,
+ 0x09ce, 0x09ce,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e1,
+ 0x09e6, 0x09fb,
+ 0x0a03, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3e, 0x0a40,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a6f,
+ 0x0a72, 0x0a74,
+ 0x0a83, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abd, 0x0ac0,
+ 0x0ac9, 0x0ac9,
+ 0x0acb, 0x0acc,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae1,
+ 0x0ae6, 0x0aef,
+ 0x0af1, 0x0af1,
+ 0x0b02, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3d, 0x0b3d,
+ 0x0b40, 0x0b40,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4c,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b66, 0x0b71,
+ 0x0b83, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb9,
+ 0x0bbf, 0x0bbf,
+ 0x0bc1, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcc,
+ 0x0bd0, 0x0bd0,
+ 0x0be6, 0x0bfa,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3d, 0x0c3d,
+ 0x0c41, 0x0c44,
+ 0x0c58, 0x0c59,
+ 0x0c60, 0x0c61,
+ 0x0c66, 0x0c6f,
+ 0x0c78, 0x0c7f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbd, 0x0cbe,
+ 0x0cc0, 0x0cc1,
+ 0x0cc3, 0x0cc4,
+ 0x0cc7, 0x0cc8,
+ 0x0cca, 0x0ccb,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0ce6, 0x0cef,
+ 0x0cf1, 0x0cf2,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3d, 0x0d3d,
+ 0x0d3f, 0x0d40,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4c,
+ 0x0d60, 0x0d61,
+ 0x0d66, 0x0d75,
+ 0x0d79, 0x0d7f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dd0, 0x0dd1,
+ 0x0dd8, 0x0dde,
+ 0x0df2, 0x0df4,
+ 0x0e01, 0x0e30,
+ 0x0e32, 0x0e33,
+ 0x0e3f, 0x0e46,
+ 0x0e4f, 0x0e5b,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb0,
+ 0x0eb2, 0x0eb3,
+ 0x0ebd, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f17,
+ 0x0f1a, 0x0f34,
+ 0x0f36, 0x0f36,
+ 0x0f38, 0x0f38,
+ 0x0f3a, 0x0f47,
+ 0x0f49, 0x0f6c,
+ 0x0f7f, 0x0f7f,
+ 0x0f85, 0x0f85,
+ 0x0f88, 0x0f8b,
+ 0x0fbe, 0x0fc5,
+ 0x0fc7, 0x0fcc,
+ 0x0fce, 0x0fd8,
+ 0x1000, 0x102c,
+ 0x1031, 0x1031,
+ 0x1038, 0x1038,
+ 0x103b, 0x103c,
+ 0x103f, 0x1057,
+ 0x105a, 0x105d,
+ 0x1061, 0x1070,
+ 0x1075, 0x1081,
+ 0x1083, 0x1084,
+ 0x1087, 0x108c,
+ 0x108e, 0x109c,
+ 0x109e, 0x10c5,
+ 0x10d0, 0x10fc,
+ 0x1100, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12d6,
+ 0x12d8, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x135a,
+ 0x1360, 0x137c,
+ 0x1380, 0x1399,
+ 0x13a0, 0x13f4,
+ 0x1400, 0x169c,
+ 0x16a0, 0x16f0,
+ 0x1700, 0x170c,
+ 0x170e, 0x1711,
+ 0x1720, 0x1731,
+ 0x1735, 0x1736,
+ 0x1740, 0x1751,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1780, 0x17b3,
+ 0x17b6, 0x17b6,
+ 0x17be, 0x17c5,
+ 0x17c7, 0x17c8,
+ 0x17d4, 0x17dc,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x1800, 0x180a,
+ 0x180e, 0x180e,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a8,
+ 0x18aa, 0x18aa,
+ 0x18b0, 0x18f5,
+ 0x1900, 0x191c,
+ 0x1923, 0x1926,
+ 0x1929, 0x192b,
+ 0x1930, 0x1931,
+ 0x1933, 0x1938,
+ 0x1940, 0x1940,
+ 0x1944, 0x196d,
+ 0x1970, 0x1974,
+ 0x1980, 0x19ab,
+ 0x19b0, 0x19c9,
+ 0x19d0, 0x19da,
+ 0x19de, 0x1a16,
+ 0x1a19, 0x1a1b,
+ 0x1a1e, 0x1a55,
+ 0x1a57, 0x1a57,
+ 0x1a61, 0x1a61,
+ 0x1a63, 0x1a64,
+ 0x1a6d, 0x1a72,
+ 0x1a80, 0x1a89,
+ 0x1a90, 0x1a99,
+ 0x1aa0, 0x1aad,
+ 0x1b04, 0x1b33,
+ 0x1b35, 0x1b35,
+ 0x1b3b, 0x1b3b,
+ 0x1b3d, 0x1b41,
+ 0x1b43, 0x1b4b,
+ 0x1b50, 0x1b6a,
+ 0x1b74, 0x1b7c,
+ 0x1b82, 0x1ba1,
+ 0x1ba6, 0x1ba7,
+ 0x1baa, 0x1baa,
+ 0x1bae, 0x1bb9,
+ 0x1c00, 0x1c2b,
+ 0x1c34, 0x1c35,
+ 0x1c3b, 0x1c49,
+ 0x1c4d, 0x1c7f,
+ 0x1cd3, 0x1cd3,
+ 0x1ce1, 0x1ce1,
+ 0x1ce9, 0x1cec,
+ 0x1cee, 0x1cf2,
+ 0x1d00, 0x1dbf,
+ 0x1e00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fc4,
+ 0x1fc6, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fdd, 0x1fef,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffe,
+ 0x2000, 0x200a,
+ 0x2010, 0x2027,
+ 0x202f, 0x205f,
+ 0x2070, 0x2071,
+ 0x2074, 0x208e,
+ 0x2090, 0x2094,
+ 0x20a0, 0x20b8,
+ 0x2100, 0x2189,
+ 0x2190, 0x23e8,
+ 0x2400, 0x2426,
+ 0x2440, 0x244a,
+ 0x2460, 0x26cd,
+ 0x26cf, 0x26e1,
+ 0x26e3, 0x26e3,
+ 0x26e8, 0x26ff,
+ 0x2701, 0x2704,
+ 0x2706, 0x2709,
+ 0x270c, 0x2727,
+ 0x2729, 0x274b,
+ 0x274d, 0x274d,
+ 0x274f, 0x2752,
+ 0x2756, 0x275e,
+ 0x2761, 0x2794,
+ 0x2798, 0x27af,
+ 0x27b1, 0x27be,
+ 0x27c0, 0x27ca,
+ 0x27cc, 0x27cc,
+ 0x27d0, 0x2b4c,
+ 0x2b50, 0x2b59,
+ 0x2c00, 0x2c2e,
+ 0x2c30, 0x2c5e,
+ 0x2c60, 0x2cee,
+ 0x2cf9, 0x2d25,
+ 0x2d30, 0x2d65,
+ 0x2d6f, 0x2d6f,
+ 0x2d80, 0x2d96,
+ 0x2da0, 0x2da6,
+ 0x2da8, 0x2dae,
+ 0x2db0, 0x2db6,
+ 0x2db8, 0x2dbe,
+ 0x2dc0, 0x2dc6,
+ 0x2dc8, 0x2dce,
+ 0x2dd0, 0x2dd6,
+ 0x2dd8, 0x2dde,
+ 0x2e00, 0x2e31,
+ 0x2e80, 0x2e99,
+ 0x2e9b, 0x2ef3,
+ 0x2f00, 0x2fd5,
+ 0x2ff0, 0x2ffb,
+ 0x3000, 0x3029,
+ 0x3030, 0x303f,
+ 0x3041, 0x3096,
+ 0x309b, 0x30ff,
+ 0x3105, 0x312d,
+ 0x3131, 0x318e,
+ 0x3190, 0x31b7,
+ 0x31c0, 0x31e3,
+ 0x31f0, 0x321e,
+ 0x3220, 0x32fe,
+ 0x3300, 0x4db5,
+ 0x4dc0, 0x9fcb,
+ 0xa000, 0xa48c,
+ 0xa490, 0xa4c6,
+ 0xa4d0, 0xa62b,
+ 0xa640, 0xa65f,
+ 0xa662, 0xa66e,
+ 0xa673, 0xa673,
+ 0xa67e, 0xa697,
+ 0xa6a0, 0xa6ef,
+ 0xa6f2, 0xa6f7,
+ 0xa700, 0xa78c,
+ 0xa7fb, 0xa801,
+ 0xa803, 0xa805,
+ 0xa807, 0xa80a,
+ 0xa80c, 0xa824,
+ 0xa827, 0xa82b,
+ 0xa830, 0xa839,
+ 0xa840, 0xa877,
+ 0xa880, 0xa8c3,
+ 0xa8ce, 0xa8d9,
+ 0xa8f2, 0xa8fb,
+ 0xa900, 0xa925,
+ 0xa92e, 0xa946,
+ 0xa952, 0xa953,
+ 0xa95f, 0xa97c,
+ 0xa983, 0xa9b2,
+ 0xa9b4, 0xa9b5,
+ 0xa9ba, 0xa9bb,
+ 0xa9bd, 0xa9cd,
+ 0xa9cf, 0xa9d9,
+ 0xa9de, 0xa9df,
+ 0xaa00, 0xaa28,
+ 0xaa2f, 0xaa30,
+ 0xaa33, 0xaa34,
+ 0xaa40, 0xaa42,
+ 0xaa44, 0xaa4b,
+ 0xaa4d, 0xaa4d,
+ 0xaa50, 0xaa59,
+ 0xaa5c, 0xaa7b,
+ 0xaa80, 0xaaaf,
+ 0xaab1, 0xaab1,
+ 0xaab5, 0xaab6,
+ 0xaab9, 0xaabd,
+ 0xaac0, 0xaac0,
+ 0xaac2, 0xaac2,
+ 0xaadb, 0xaadf,
+ 0xabc0, 0xabe4,
+ 0xabe6, 0xabe7,
+ 0xabe9, 0xabec,
+ 0xabf0, 0xabf9,
+ 0xac00, 0xd7a3,
+ 0xd7b0, 0xd7c6,
+ 0xd7cb, 0xd7fb,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6d,
+ 0xfa70, 0xfad9,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb1d,
+ 0xfb1f, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3f,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfd,
+ 0xfe10, 0xfe19,
+ 0xfe30, 0xfe52,
+ 0xfe54, 0xfe66,
+ 0xfe68, 0xfe6b,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff01, 0xff9d,
+ 0xffa0, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0xffe0, 0xffe6,
+ 0xffe8, 0xffee,
+ 0xfffc, 0xfffd,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10100, 0x10102,
+ 0x10107, 0x10133,
+ 0x10137, 0x1018a,
+ 0x10190, 0x1019b,
+ 0x101d0, 0x101fc,
+ 0x10280, 0x1029c,
+ 0x102a0, 0x102d0,
+ 0x10300, 0x1031e,
+ 0x10320, 0x10323,
+ 0x10330, 0x1034a,
+ 0x10380, 0x1039d,
+ 0x1039f, 0x103c3,
+ 0x103c8, 0x103d5,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x10855,
+ 0x10857, 0x1085f,
+ 0x10900, 0x1091b,
+ 0x1091f, 0x10939,
+ 0x1093f, 0x1093f,
+ 0x10a00, 0x10a00,
+ 0x10a10, 0x10a13,
+ 0x10a15, 0x10a17,
+ 0x10a19, 0x10a33,
+ 0x10a40, 0x10a47,
+ 0x10a50, 0x10a58,
+ 0x10a60, 0x10a7f,
+ 0x10b00, 0x10b35,
+ 0x10b39, 0x10b55,
+ 0x10b58, 0x10b72,
+ 0x10b78, 0x10b7f,
+ 0x10c00, 0x10c48,
+ 0x10e60, 0x10e7e,
+ 0x11082, 0x110b2,
+ 0x110b7, 0x110b8,
+ 0x110bb, 0x110bc,
+ 0x110be, 0x110c1,
+ 0x12000, 0x1236e,
+ 0x12400, 0x12462,
+ 0x12470, 0x12473,
+ 0x13000, 0x1342e,
+ 0x1d000, 0x1d0f5,
+ 0x1d100, 0x1d126,
+ 0x1d129, 0x1d164,
+ 0x1d166, 0x1d166,
+ 0x1d16a, 0x1d16d,
+ 0x1d183, 0x1d184,
+ 0x1d18c, 0x1d1a9,
+ 0x1d1ae, 0x1d1dd,
+ 0x1d200, 0x1d241,
+ 0x1d245, 0x1d245,
+ 0x1d300, 0x1d356,
+ 0x1d360, 0x1d371,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a5,
+ 0x1d6a8, 0x1d7cb,
+ 0x1d7ce, 0x1d7ff,
+ 0x1f000, 0x1f02b,
+ 0x1f030, 0x1f093,
+ 0x1f100, 0x1f10a,
+ 0x1f110, 0x1f12e,
+ 0x1f131, 0x1f131,
+ 0x1f13d, 0x1f13d,
+ 0x1f13f, 0x1f13f,
+ 0x1f142, 0x1f142,
+ 0x1f146, 0x1f146,
+ 0x1f14a, 0x1f14e,
+ 0x1f157, 0x1f157,
+ 0x1f15f, 0x1f15f,
+ 0x1f179, 0x1f179,
+ 0x1f17b, 0x1f17c,
+ 0x1f17f, 0x1f17f,
+ 0x1f18a, 0x1f18d,
+ 0x1f190, 0x1f190,
+ 0x1f200, 0x1f200,
+ 0x1f210, 0x1f231,
+ 0x1f240, 0x1f248,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
+ 0x2f800, 0x2fa1d,
+}; /* CR_Grapheme_Base */
+
+/* 'Grapheme_Link': Derived Property */
+static const OnigCodePoint CR_Grapheme_Link[] = {
+ 26,
+ 0x094d, 0x094d,
+ 0x09cd, 0x09cd,
+ 0x0a4d, 0x0a4d,
+ 0x0acd, 0x0acd,
+ 0x0b4d, 0x0b4d,
+ 0x0bcd, 0x0bcd,
+ 0x0c4d, 0x0c4d,
+ 0x0ccd, 0x0ccd,
+ 0x0d4d, 0x0d4d,
+ 0x0dca, 0x0dca,
+ 0x0e3a, 0x0e3a,
+ 0x0f84, 0x0f84,
+ 0x1039, 0x103a,
+ 0x1714, 0x1714,
+ 0x1734, 0x1734,
+ 0x17d2, 0x17d2,
+ 0x1a60, 0x1a60,
+ 0x1b44, 0x1b44,
+ 0x1baa, 0x1baa,
+ 0xa806, 0xa806,
+ 0xa8c4, 0xa8c4,
+ 0xa953, 0xa953,
+ 0xa9c0, 0xa9c0,
+ 0xabed, 0xabed,
+ 0x10a3f, 0x10a3f,
+ 0x110b9, 0x110b9,
+}; /* CR_Grapheme_Link */
+
+/* 'Common': Script */
+static const OnigCodePoint CR_Common[] = {
+ 159,
+ 0x0000, 0x0040,
+ 0x005b, 0x0060,
+ 0x007b, 0x00a9,
+ 0x00ab, 0x00b9,
+ 0x00bb, 0x00bf,
+ 0x00d7, 0x00d7,
+ 0x00f7, 0x00f7,
+ 0x02b9, 0x02df,
+ 0x02e5, 0x02ff,
+ 0x0374, 0x0374,
+ 0x037e, 0x037e,
+ 0x0385, 0x0385,
+ 0x0387, 0x0387,
+ 0x0589, 0x0589,
+ 0x0600, 0x0603,
+ 0x060c, 0x060c,
+ 0x061b, 0x061b,
+ 0x061f, 0x061f,
+ 0x0640, 0x0640,
+ 0x0660, 0x0669,
+ 0x06dd, 0x06dd,
+ 0x0964, 0x0965,
+ 0x0970, 0x0970,
+ 0x0cf1, 0x0cf2,
+ 0x0e3f, 0x0e3f,
+ 0x0fd5, 0x0fd8,
+ 0x10fb, 0x10fb,
+ 0x16eb, 0x16ed,
+ 0x1735, 0x1736,
+ 0x1802, 0x1803,
+ 0x1805, 0x1805,
+ 0x1cd3, 0x1cd3,
+ 0x1ce1, 0x1ce1,
+ 0x1ce9, 0x1cec,
+ 0x1cee, 0x1cf2,
+ 0x2000, 0x200b,
+ 0x200e, 0x2064,
+ 0x206a, 0x2070,
+ 0x2074, 0x207e,
+ 0x2080, 0x208e,
+ 0x20a0, 0x20b8,
+ 0x2100, 0x2125,
+ 0x2127, 0x2129,
+ 0x212c, 0x2131,
+ 0x2133, 0x214d,
+ 0x214f, 0x215f,
+ 0x2189, 0x2189,
+ 0x2190, 0x23e8,
+ 0x2400, 0x2426,
+ 0x2440, 0x244a,
+ 0x2460, 0x26cd,
+ 0x26cf, 0x26e1,
+ 0x26e3, 0x26e3,
+ 0x26e8, 0x26ff,
+ 0x2701, 0x2704,
+ 0x2706, 0x2709,
+ 0x270c, 0x2727,
+ 0x2729, 0x274b,
+ 0x274d, 0x274d,
+ 0x274f, 0x2752,
+ 0x2756, 0x275e,
+ 0x2761, 0x2794,
+ 0x2798, 0x27af,
+ 0x27b1, 0x27be,
+ 0x27c0, 0x27ca,
+ 0x27cc, 0x27cc,
+ 0x27d0, 0x27ff,
+ 0x2900, 0x2b4c,
+ 0x2b50, 0x2b59,
+ 0x2e00, 0x2e31,
+ 0x2ff0, 0x2ffb,
+ 0x3000, 0x3004,
+ 0x3006, 0x3006,
+ 0x3008, 0x3020,
+ 0x3030, 0x3037,
+ 0x303c, 0x303f,
+ 0x309b, 0x309c,
+ 0x30a0, 0x30a0,
+ 0x30fb, 0x30fc,
+ 0x3190, 0x319f,
+ 0x31c0, 0x31e3,
+ 0x3220, 0x325f,
+ 0x327f, 0x32cf,
+ 0x3358, 0x33ff,
+ 0x4dc0, 0x4dff,
+ 0xa700, 0xa721,
+ 0xa788, 0xa78a,
+ 0xa830, 0xa839,
+ 0xfd3e, 0xfd3f,
+ 0xfdfd, 0xfdfd,
+ 0xfe10, 0xfe19,
+ 0xfe30, 0xfe52,
+ 0xfe54, 0xfe66,
+ 0xfe68, 0xfe6b,
+ 0xfeff, 0xfeff,
+ 0xff01, 0xff20,
+ 0xff3b, 0xff40,
+ 0xff5b, 0xff65,
+ 0xff70, 0xff70,
+ 0xff9e, 0xff9f,
+ 0xffe0, 0xffe6,
+ 0xffe8, 0xffee,
+ 0xfff9, 0xfffd,
+ 0x10100, 0x10102,
+ 0x10107, 0x10133,
+ 0x10137, 0x1013f,
+ 0x10190, 0x1019b,
+ 0x101d0, 0x101fc,
+ 0x1d000, 0x1d0f5,
+ 0x1d100, 0x1d126,
+ 0x1d129, 0x1d166,
+ 0x1d16a, 0x1d17a,
+ 0x1d183, 0x1d184,
+ 0x1d18c, 0x1d1a9,
+ 0x1d1ae, 0x1d1dd,
+ 0x1d300, 0x1d356,
+ 0x1d360, 0x1d371,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a5,
+ 0x1d6a8, 0x1d7cb,
+ 0x1d7ce, 0x1d7ff,
+ 0x1f000, 0x1f02b,
+ 0x1f030, 0x1f093,
+ 0x1f100, 0x1f10a,
+ 0x1f110, 0x1f12e,
+ 0x1f131, 0x1f131,
+ 0x1f13d, 0x1f13d,
+ 0x1f13f, 0x1f13f,
+ 0x1f142, 0x1f142,
+ 0x1f146, 0x1f146,
+ 0x1f14a, 0x1f14e,
+ 0x1f157, 0x1f157,
+ 0x1f15f, 0x1f15f,
+ 0x1f179, 0x1f179,
+ 0x1f17b, 0x1f17c,
+ 0x1f17f, 0x1f17f,
+ 0x1f18a, 0x1f18d,
+ 0x1f190, 0x1f190,
+ 0x1f210, 0x1f231,
+ 0x1f240, 0x1f248,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f,
+}; /* CR_Common */
+
+/* 'Latin': Script */
+static const OnigCodePoint CR_Latin[] = {
+ 28,
+ 0x0041, 0x005a,
+ 0x0061, 0x007a,
+ 0x00aa, 0x00aa,
+ 0x00ba, 0x00ba,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+ 0x00f8, 0x02b8,
+ 0x02e0, 0x02e4,
+ 0x1d00, 0x1d25,
+ 0x1d2c, 0x1d5c,
+ 0x1d62, 0x1d65,
+ 0x1d6b, 0x1d77,
+ 0x1d79, 0x1dbe,
+ 0x1e00, 0x1eff,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x2090, 0x2094,
+ 0x212a, 0x212b,
+ 0x2132, 0x2132,
+ 0x214e, 0x214e,
+ 0x2160, 0x2188,
+ 0x2c60, 0x2c7f,
+ 0xa722, 0xa787,
+ 0xa78b, 0xa78c,
+ 0xa7fb, 0xa7ff,
+ 0xfb00, 0xfb06,
+ 0xff21, 0xff3a,
+ 0xff41, 0xff5a,
+}; /* CR_Latin */
+
+/* 'Greek': Script */
+static const OnigCodePoint CR_Greek[] = {
+ 33,
+ 0x0370, 0x0373,
+ 0x0375, 0x0377,
+ 0x037a, 0x037d,
+ 0x0384, 0x0384,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03e1,
+ 0x03f0, 0x03ff,
+ 0x1d26, 0x1d2a,
+ 0x1d5d, 0x1d61,
+ 0x1d66, 0x1d6a,
+ 0x1dbf, 0x1dbf,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fc4,
+ 0x1fc6, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fdd, 0x1fef,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffe,
+ 0x2126, 0x2126,
+ 0x10140, 0x1018a,
+ 0x1d200, 0x1d245,
+}; /* CR_Greek */
+
+/* 'Cyrillic': Script */
+static const OnigCodePoint CR_Cyrillic[] = {
+ 8,
+ 0x0400, 0x0484,
+ 0x0487, 0x0525,
+ 0x1d2b, 0x1d2b,
+ 0x1d78, 0x1d78,
+ 0x2de0, 0x2dff,
+ 0xa640, 0xa65f,
+ 0xa662, 0xa673,
+ 0xa67c, 0xa697,
+}; /* CR_Cyrillic */
+
+/* 'Armenian': Script */
+static const OnigCodePoint CR_Armenian[] = {
+ 5,
+ 0x0531, 0x0556,
+ 0x0559, 0x055f,
+ 0x0561, 0x0587,
+ 0x058a, 0x058a,
+ 0xfb13, 0xfb17,
+}; /* CR_Armenian */
+
+/* 'Hebrew': Script */
+static const OnigCodePoint CR_Hebrew[] = {
+ 9,
+ 0x0591, 0x05c7,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f4,
+ 0xfb1d, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfb4f,
+}; /* CR_Hebrew */
+
+/* 'Arabic': Script */
+static const OnigCodePoint CR_Arabic[] = {
+ 18,
+ 0x0606, 0x060b,
+ 0x060d, 0x061a,
+ 0x061e, 0x061e,
+ 0x0621, 0x063f,
+ 0x0641, 0x064a,
+ 0x0656, 0x065e,
+ 0x066a, 0x066f,
+ 0x0671, 0x06dc,
+ 0x06de, 0x06ff,
+ 0x0750, 0x077f,
+ 0xfb50, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfc,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0x10e60, 0x10e7e,
+}; /* CR_Arabic */
+
+/* 'Syriac': Script */
+static const OnigCodePoint CR_Syriac[] = {
+ 3,
+ 0x0700, 0x070d,
+ 0x070f, 0x074a,
+ 0x074d, 0x074f,
+}; /* CR_Syriac */
+
+/* 'Thaana': Script */
+static const OnigCodePoint CR_Thaana[] = {
+ 1,
+ 0x0780, 0x07b1,
+}; /* CR_Thaana */
+
+/* 'Devanagari': Script */
+static const OnigCodePoint CR_Devanagari[] = {
+ 9,
+ 0x0900, 0x0939,
+ 0x093c, 0x094e,
+ 0x0950, 0x0950,
+ 0x0953, 0x0955,
+ 0x0958, 0x0963,
+ 0x0966, 0x096f,
+ 0x0971, 0x0972,
+ 0x0979, 0x097f,
+ 0xa8e0, 0xa8fb,
+}; /* CR_Devanagari */
+
+/* 'Bengali': Script */
+static const OnigCodePoint CR_Bengali[] = {
+ 14,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09ce,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09fb,
+}; /* CR_Bengali */
+
+/* 'Gurmukhi': Script */
+static const OnigCodePoint CR_Gurmukhi[] = {
+ 16,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a51, 0x0a51,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a75,
+}; /* CR_Gurmukhi */
+
+/* 'Gujarati': Script */
+static const OnigCodePoint CR_Gujarati[] = {
+ 14,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0af1, 0x0af1,
+}; /* CR_Gujarati */
+
+/* 'Oriya': Script */
+static const OnigCodePoint CR_Oriya[] = {
+ 14,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b44,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b63,
+ 0x0b66, 0x0b71,
+}; /* CR_Oriya */
+
+/* 'Tamil': Script */
+static const OnigCodePoint CR_Tamil[] = {
+ 16,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd0, 0x0bd0,
+ 0x0bd7, 0x0bd7,
+ 0x0be6, 0x0bfa,
+}; /* CR_Tamil */
+
+/* 'Telugu': Script */
+static const OnigCodePoint CR_Telugu[] = {
+ 14,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3d, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c58, 0x0c59,
+ 0x0c60, 0x0c63,
+ 0x0c66, 0x0c6f,
+ 0x0c78, 0x0c7f,
+}; /* CR_Telugu */
+
+/* 'Kannada': Script */
+static const OnigCodePoint CR_Kannada[] = {
+ 13,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce3,
+ 0x0ce6, 0x0cef,
+}; /* CR_Kannada */
+
+/* 'Malayalam': Script */
+static const OnigCodePoint CR_Malayalam[] = {
+ 12,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3d, 0x0d44,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d63,
+ 0x0d66, 0x0d75,
+ 0x0d79, 0x0d7f,
+}; /* CR_Malayalam */
+
+/* 'Sinhala': Script */
+static const OnigCodePoint CR_Sinhala[] = {
+ 11,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df4,
+}; /* CR_Sinhala */
+
+/* 'Thai': Script */
+static const OnigCodePoint CR_Thai[] = {
+ 2,
+ 0x0e01, 0x0e3a,
+ 0x0e40, 0x0e5b,
+}; /* CR_Thai */
+
+/* 'Lao': Script */
+static const OnigCodePoint CR_Lao[] = {
+ 18,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+}; /* CR_Lao */
+
+/* 'Tibetan': Script */
+static const OnigCodePoint CR_Tibetan[] = {
+ 7,
+ 0x0f00, 0x0f47,
+ 0x0f49, 0x0f6c,
+ 0x0f71, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fbe, 0x0fcc,
+ 0x0fce, 0x0fd4,
+}; /* CR_Tibetan */
+
+/* 'Myanmar': Script */
+static const OnigCodePoint CR_Myanmar[] = {
+ 2,
+ 0x1000, 0x109f,
+ 0xaa60, 0xaa7b,
+}; /* CR_Myanmar */
+
+/* 'Georgian': Script */
+static const OnigCodePoint CR_Georgian[] = {
+ 4,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10fa,
+ 0x10fc, 0x10fc,
+ 0x2d00, 0x2d25,
+}; /* CR_Georgian */
+
+/* 'Hangul': Script */
+static const OnigCodePoint CR_Hangul[] = {
+ 13,
+ 0x1100, 0x11ff,
+ 0x3131, 0x318e,
+ 0x3200, 0x321e,
+ 0x3260, 0x327e,
+ 0xa960, 0xa97c,
+ 0xac00, 0xd7a3,
+ 0xd7b0, 0xd7c6,
+ 0xd7cb, 0xd7fb,
+ 0xffa0, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+}; /* CR_Hangul */
+
+/* 'Ethiopic': Script */
+static const OnigCodePoint CR_Ethiopic[] = {
+ 27,
+ 0x1200, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12d6,
+ 0x12d8, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x135a,
+ 0x135f, 0x137c,
+ 0x1380, 0x1399,
+ 0x2d80, 0x2d96,
+ 0x2da0, 0x2da6,
+ 0x2da8, 0x2dae,
+ 0x2db0, 0x2db6,
+ 0x2db8, 0x2dbe,
+ 0x2dc0, 0x2dc6,
+ 0x2dc8, 0x2dce,
+ 0x2dd0, 0x2dd6,
+ 0x2dd8, 0x2dde,
+}; /* CR_Ethiopic */
+
+/* 'Cherokee': Script */
+static const OnigCodePoint CR_Cherokee[] = {
+ 1,
+ 0x13a0, 0x13f4,
+}; /* CR_Cherokee */
+
+/* 'Canadian_Aboriginal': Script */
+static const OnigCodePoint CR_Canadian_Aboriginal[] = {
+ 2,
+ 0x1400, 0x167f,
+ 0x18b0, 0x18f5,
+}; /* CR_Canadian_Aboriginal */
+
+/* 'Ogham': Script */
+static const OnigCodePoint CR_Ogham[] = {
+ 1,
+ 0x1680, 0x169c,
+}; /* CR_Ogham */
+
+/* 'Runic': Script */
+static const OnigCodePoint CR_Runic[] = {
+ 2,
+ 0x16a0, 0x16ea,
+ 0x16ee, 0x16f0,
+}; /* CR_Runic */
+
+/* 'Khmer': Script */
+static const OnigCodePoint CR_Khmer[] = {
+ 4,
+ 0x1780, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x19e0, 0x19ff,
+}; /* CR_Khmer */
+
+/* 'Mongolian': Script */
+static const OnigCodePoint CR_Mongolian[] = {
+ 6,
+ 0x1800, 0x1801,
+ 0x1804, 0x1804,
+ 0x1806, 0x180e,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18aa,
+}; /* CR_Mongolian */
+
+/* 'Hiragana': Script */
+static const OnigCodePoint CR_Hiragana[] = {
+ 3,
+ 0x3041, 0x3096,
+ 0x309d, 0x309f,
+ 0x1f200, 0x1f200,
+}; /* CR_Hiragana */
+
+/* 'Katakana': Script */
+static const OnigCodePoint CR_Katakana[] = {
+ 7,
+ 0x30a1, 0x30fa,
+ 0x30fd, 0x30ff,
+ 0x31f0, 0x31ff,
+ 0x32d0, 0x32fe,
+ 0x3300, 0x3357,
+ 0xff66, 0xff6f,
+ 0xff71, 0xff9d,
+}; /* CR_Katakana */
+
+/* 'Bopomofo': Script */
+static const OnigCodePoint CR_Bopomofo[] = {
+ 2,
+ 0x3105, 0x312d,
+ 0x31a0, 0x31b7,
+}; /* CR_Bopomofo */
+
+/* 'Han': Script */
+static const OnigCodePoint CR_Han[] = {
+ 15,
+ 0x2e80, 0x2e99,
+ 0x2e9b, 0x2ef3,
+ 0x2f00, 0x2fd5,
+ 0x3005, 0x3005,
+ 0x3007, 0x3007,
+ 0x3021, 0x3029,
+ 0x3038, 0x303b,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fcb,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6d,
+ 0xfa70, 0xfad9,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
+ 0x2f800, 0x2fa1d,
+}; /* CR_Han */
+
+/* 'Yi': Script */
+static const OnigCodePoint CR_Yi[] = {
+ 2,
+ 0xa000, 0xa48c,
+ 0xa490, 0xa4c6,
+}; /* CR_Yi */
+
+/* 'Old_Italic': Script */
+static const OnigCodePoint CR_Old_Italic[] = {
+ 2,
+ 0x10300, 0x1031e,
+ 0x10320, 0x10323,
+}; /* CR_Old_Italic */
+
+/* 'Gothic': Script */
+static const OnigCodePoint CR_Gothic[] = {
+ 1,
+ 0x10330, 0x1034a,
+}; /* CR_Gothic */
+
+/* 'Deseret': Script */
+static const OnigCodePoint CR_Deseret[] = {
+ 1,
+ 0x10400, 0x1044f,
+}; /* CR_Deseret */
+
+/* 'Inherited': Script */
+static const OnigCodePoint CR_Inherited[] = {
+ 23,
+ 0x0300, 0x036f,
+ 0x0485, 0x0486,
+ 0x064b, 0x0655,
+ 0x0670, 0x0670,
+ 0x0951, 0x0952,
+ 0x1cd0, 0x1cd2,
+ 0x1cd4, 0x1ce0,
+ 0x1ce2, 0x1ce8,
+ 0x1ced, 0x1ced,
+ 0x1dc0, 0x1de6,
+ 0x1dfd, 0x1dff,
+ 0x200c, 0x200d,
+ 0x20d0, 0x20f0,
+ 0x302a, 0x302f,
+ 0x3099, 0x309a,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe26,
+ 0x101fd, 0x101fd,
+ 0x1d167, 0x1d169,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0xe0100, 0xe01ef,
+}; /* CR_Inherited */
+
+/* 'Tagalog': Script */
+static const OnigCodePoint CR_Tagalog[] = {
+ 2,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+}; /* CR_Tagalog */
+
+/* 'Hanunoo': Script */
+static const OnigCodePoint CR_Hanunoo[] = {
+ 1,
+ 0x1720, 0x1734,
+}; /* CR_Hanunoo */
+
+/* 'Buhid': Script */
+static const OnigCodePoint CR_Buhid[] = {
+ 1,
+ 0x1740, 0x1753,
+}; /* CR_Buhid */
+
+/* 'Tagbanwa': Script */
+static const OnigCodePoint CR_Tagbanwa[] = {
+ 3,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+}; /* CR_Tagbanwa */
+
+/* 'Limbu': Script */
+static const OnigCodePoint CR_Limbu[] = {
+ 5,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1940, 0x1940,
+ 0x1944, 0x194f,
+}; /* CR_Limbu */
+
+/* 'Tai_Le': Script */
+static const OnigCodePoint CR_Tai_Le[] = {
+ 2,
+ 0x1950, 0x196d,
+ 0x1970, 0x1974,
+}; /* CR_Tai_Le */
+
+/* 'Linear_B': Script */
+static const OnigCodePoint CR_Linear_B[] = {
+ 7,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+}; /* CR_Linear_B */
+
+/* 'Ugaritic': Script */
+static const OnigCodePoint CR_Ugaritic[] = {
+ 2,
+ 0x10380, 0x1039d,
+ 0x1039f, 0x1039f,
+}; /* CR_Ugaritic */
+
+/* 'Shavian': Script */
+static const OnigCodePoint CR_Shavian[] = {
+ 1,
+ 0x10450, 0x1047f,
+}; /* CR_Shavian */
+
+/* 'Osmanya': Script */
+static const OnigCodePoint CR_Osmanya[] = {
+ 2,
+ 0x10480, 0x1049d,
+ 0x104a0, 0x104a9,
+}; /* CR_Osmanya */
+
+/* 'Cypriot': Script */
+static const OnigCodePoint CR_Cypriot[] = {
+ 6,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+}; /* CR_Cypriot */
+
+/* 'Braille': Script */
+static const OnigCodePoint CR_Braille[] = {
+ 1,
+ 0x2800, 0x28ff,
+}; /* CR_Braille */
+
+/* 'Buginese': Script */
+static const OnigCodePoint CR_Buginese[] = {
+ 2,
+ 0x1a00, 0x1a1b,
+ 0x1a1e, 0x1a1f,
+}; /* CR_Buginese */
+
+/* 'Coptic': Script */
+static const OnigCodePoint CR_Coptic[] = {
+ 3,
+ 0x03e2, 0x03ef,
+ 0x2c80, 0x2cf1,
+ 0x2cf9, 0x2cff,
+}; /* CR_Coptic */
+
+/* 'New_Tai_Lue': Script */
+static const OnigCodePoint CR_New_Tai_Lue[] = {
+ 4,
+ 0x1980, 0x19ab,
+ 0x19b0, 0x19c9,
+ 0x19d0, 0x19da,
+ 0x19de, 0x19df,
+}; /* CR_New_Tai_Lue */
+
+/* 'Glagolitic': Script */
+static const OnigCodePoint CR_Glagolitic[] = {
+ 2,
+ 0x2c00, 0x2c2e,
+ 0x2c30, 0x2c5e,
+}; /* CR_Glagolitic */
+
+/* 'Tifinagh': Script */
+static const OnigCodePoint CR_Tifinagh[] = {
+ 2,
+ 0x2d30, 0x2d65,
+ 0x2d6f, 0x2d6f,
+}; /* CR_Tifinagh */
+
+/* 'Syloti_Nagri': Script */
+static const OnigCodePoint CR_Syloti_Nagri[] = {
+ 1,
+ 0xa800, 0xa82b,
+}; /* CR_Syloti_Nagri */
+
+/* 'Old_Persian': Script */
+static const OnigCodePoint CR_Old_Persian[] = {
+ 2,
+ 0x103a0, 0x103c3,
+ 0x103c8, 0x103d5,
+}; /* CR_Old_Persian */
+
+/* 'Kharoshthi': Script */
+static const OnigCodePoint CR_Kharoshthi[] = {
+ 8,
+ 0x10a00, 0x10a03,
+ 0x10a05, 0x10a06,
+ 0x10a0c, 0x10a13,
+ 0x10a15, 0x10a17,
+ 0x10a19, 0x10a33,
+ 0x10a38, 0x10a3a,
+ 0x10a3f, 0x10a47,
+ 0x10a50, 0x10a58,
+}; /* CR_Kharoshthi */
+
+/* 'Balinese': Script */
+static const OnigCodePoint CR_Balinese[] = {
+ 2,
+ 0x1b00, 0x1b4b,
+ 0x1b50, 0x1b7c,
+}; /* CR_Balinese */
+
+/* 'Cuneiform': Script */
+static const OnigCodePoint CR_Cuneiform[] = {
+ 3,
+ 0x12000, 0x1236e,
+ 0x12400, 0x12462,
+ 0x12470, 0x12473,
+}; /* CR_Cuneiform */
+
+/* 'Phoenician': Script */
+static const OnigCodePoint CR_Phoenician[] = {
+ 2,
+ 0x10900, 0x1091b,
+ 0x1091f, 0x1091f,
+}; /* CR_Phoenician */
+
+/* 'Phags_Pa': Script */
+static const OnigCodePoint CR_Phags_Pa[] = {
+ 1,
+ 0xa840, 0xa877,
+}; /* CR_Phags_Pa */
+
+/* 'Nko': Script */
+static const OnigCodePoint CR_Nko[] = {
+ 1,
+ 0x07c0, 0x07fa,
+}; /* CR_Nko */
+
+/* 'Sundanese': Script */
+static const OnigCodePoint CR_Sundanese[] = {
+ 2,
+ 0x1b80, 0x1baa,
+ 0x1bae, 0x1bb9,
+}; /* CR_Sundanese */
+
+/* 'Lepcha': Script */
+static const OnigCodePoint CR_Lepcha[] = {
+ 3,
+ 0x1c00, 0x1c37,
+ 0x1c3b, 0x1c49,
+ 0x1c4d, 0x1c4f,
+}; /* CR_Lepcha */
+
+/* 'Ol_Chiki': Script */
+static const OnigCodePoint CR_Ol_Chiki[] = {
+ 1,
+ 0x1c50, 0x1c7f,
+}; /* CR_Ol_Chiki */
+
+/* 'Vai': Script */
+static const OnigCodePoint CR_Vai[] = {
+ 1,
+ 0xa500, 0xa62b,
+}; /* CR_Vai */
+
+/* 'Saurashtra': Script */
+static const OnigCodePoint CR_Saurashtra[] = {
+ 2,
+ 0xa880, 0xa8c4,
+ 0xa8ce, 0xa8d9,
+}; /* CR_Saurashtra */
+
+/* 'Kayah_Li': Script */
+static const OnigCodePoint CR_Kayah_Li[] = {
+ 1,
+ 0xa900, 0xa92f,
+}; /* CR_Kayah_Li */
+
+/* 'Rejang': Script */
+static const OnigCodePoint CR_Rejang[] = {
+ 2,
+ 0xa930, 0xa953,
+ 0xa95f, 0xa95f,
+}; /* CR_Rejang */
+
+/* 'Lycian': Script */
+static const OnigCodePoint CR_Lycian[] = {
+ 1,
+ 0x10280, 0x1029c,
+}; /* CR_Lycian */
+
+/* 'Carian': Script */
+static const OnigCodePoint CR_Carian[] = {
+ 1,
+ 0x102a0, 0x102d0,
+}; /* CR_Carian */
+
+/* 'Lydian': Script */
+static const OnigCodePoint CR_Lydian[] = {
+ 2,
+ 0x10920, 0x10939,
+ 0x1093f, 0x1093f,
+}; /* CR_Lydian */
+
+/* 'Cham': Script */
+static const OnigCodePoint CR_Cham[] = {
+ 4,
+ 0xaa00, 0xaa36,
+ 0xaa40, 0xaa4d,
+ 0xaa50, 0xaa59,
+ 0xaa5c, 0xaa5f,
+}; /* CR_Cham */
+
+/* 'Tai_Tham': Script */
+static const OnigCodePoint CR_Tai_Tham[] = {
+ 5,
+ 0x1a20, 0x1a5e,
+ 0x1a60, 0x1a7c,
+ 0x1a7f, 0x1a89,
+ 0x1a90, 0x1a99,
+ 0x1aa0, 0x1aad,
+}; /* CR_Tai_Tham */
+
+/* 'Tai_Viet': Script */
+static const OnigCodePoint CR_Tai_Viet[] = {
+ 2,
+ 0xaa80, 0xaac2,
+ 0xaadb, 0xaadf,
+}; /* CR_Tai_Viet */
+
+/* 'Avestan': Script */
+static const OnigCodePoint CR_Avestan[] = {
+ 2,
+ 0x10b00, 0x10b35,
+ 0x10b39, 0x10b3f,
+}; /* CR_Avestan */
+
+/* 'Egyptian_Hieroglyphs': Script */
+static const OnigCodePoint CR_Egyptian_Hieroglyphs[] = {
+ 1,
+ 0x13000, 0x1342e,
+}; /* CR_Egyptian_Hieroglyphs */
+
+/* 'Samaritan': Script */
+static const OnigCodePoint CR_Samaritan[] = {
+ 2,
+ 0x0800, 0x082d,
+ 0x0830, 0x083e,
+}; /* CR_Samaritan */
+
+/* 'Lisu': Script */
+static const OnigCodePoint CR_Lisu[] = {
+ 1,
+ 0xa4d0, 0xa4ff,
+}; /* CR_Lisu */
+
+/* 'Bamum': Script */
+static const OnigCodePoint CR_Bamum[] = {
+ 1,
+ 0xa6a0, 0xa6f7,
+}; /* CR_Bamum */
+
+/* 'Javanese': Script */
+static const OnigCodePoint CR_Javanese[] = {
+ 3,
+ 0xa980, 0xa9cd,
+ 0xa9cf, 0xa9d9,
+ 0xa9de, 0xa9df,
+}; /* CR_Javanese */
+
+/* 'Meetei_Mayek': Script */
+static const OnigCodePoint CR_Meetei_Mayek[] = {
+ 2,
+ 0xabc0, 0xabed,
+ 0xabf0, 0xabf9,
+}; /* CR_Meetei_Mayek */
+
+/* 'Imperial_Aramaic': Script */
+static const OnigCodePoint CR_Imperial_Aramaic[] = {
+ 2,
+ 0x10840, 0x10855,
+ 0x10857, 0x1085f,
+}; /* CR_Imperial_Aramaic */
+
+/* 'Old_South_Arabian': Script */
+static const OnigCodePoint CR_Old_South_Arabian[] = {
+ 1,
+ 0x10a60, 0x10a7f,
+}; /* CR_Old_South_Arabian */
+
+/* 'Inscriptional_Parthian': Script */
+static const OnigCodePoint CR_Inscriptional_Parthian[] = {
+ 2,
+ 0x10b40, 0x10b55,
+ 0x10b58, 0x10b5f,
+}; /* CR_Inscriptional_Parthian */
+
+/* 'Inscriptional_Pahlavi': Script */
+static const OnigCodePoint CR_Inscriptional_Pahlavi[] = {
+ 2,
+ 0x10b60, 0x10b72,
+ 0x10b78, 0x10b7f,
+}; /* CR_Inscriptional_Pahlavi */
+
+/* 'Old_Turkic': Script */
+static const OnigCodePoint CR_Old_Turkic[] = {
+ 1,
+ 0x10c00, 0x10c48,
+}; /* CR_Old_Turkic */
+
+/* 'Kaithi': Script */
+static const OnigCodePoint CR_Kaithi[] = {
+ 1,
+ 0x11080, 0x110c1,
+}; /* CR_Kaithi */
+
+/* 'White_Space': Binary Property */
+static const OnigCodePoint CR_White_Space[] = {
+ 11,
+ 0x0009, 0x000d,
+ 0x0020, 0x0020,
+ 0x0085, 0x0085,
+ 0x00a0, 0x00a0,
+ 0x1680, 0x1680,
+ 0x180e, 0x180e,
+ 0x2000, 0x200a,
+ 0x2028, 0x2029,
+ 0x202f, 0x202f,
+ 0x205f, 0x205f,
+ 0x3000, 0x3000,
+}; /* CR_White_Space */
+
+/* 'Bidi_Control': Binary Property */
+static const OnigCodePoint CR_Bidi_Control[] = {
+ 2,
+ 0x200e, 0x200f,
+ 0x202a, 0x202e,
+}; /* CR_Bidi_Control */
+
+/* 'Join_Control': Binary Property */
+static const OnigCodePoint CR_Join_Control[] = {
+ 1,
+ 0x200c, 0x200d,
+}; /* CR_Join_Control */
+
+/* 'Dash': Binary Property */
+static const OnigCodePoint CR_Dash[] = {
+ 19,
+ 0x002d, 0x002d,
+ 0x058a, 0x058a,
+ 0x05be, 0x05be,
+ 0x1400, 0x1400,
+ 0x1806, 0x1806,
+ 0x2010, 0x2015,
+ 0x2053, 0x2053,
+ 0x207b, 0x207b,
+ 0x208b, 0x208b,
+ 0x2212, 0x2212,
+ 0x2e17, 0x2e17,
+ 0x2e1a, 0x2e1a,
+ 0x301c, 0x301c,
+ 0x3030, 0x3030,
+ 0x30a0, 0x30a0,
+ 0xfe31, 0xfe32,
+ 0xfe58, 0xfe58,
+ 0xfe63, 0xfe63,
+ 0xff0d, 0xff0d,
+}; /* CR_Dash */
+
+/* 'Hyphen': Binary Property */
+static const OnigCodePoint CR_Hyphen[] = {
+ 10,
+ 0x002d, 0x002d,
+ 0x00ad, 0x00ad,
+ 0x058a, 0x058a,
+ 0x1806, 0x1806,
+ 0x2010, 0x2011,
+ 0x2e17, 0x2e17,
+ 0x30fb, 0x30fb,
+ 0xfe63, 0xfe63,
+ 0xff0d, 0xff0d,
+ 0xff65, 0xff65,
+}; /* CR_Hyphen */
+
+/* 'Quotation_Mark': Binary Property */
+static const OnigCodePoint CR_Quotation_Mark[] = {
+ 12,
+ 0x0022, 0x0022,
+ 0x0027, 0x0027,
+ 0x00ab, 0x00ab,
+ 0x00bb, 0x00bb,
+ 0x2018, 0x201f,
+ 0x2039, 0x203a,
+ 0x300c, 0x300f,
+ 0x301d, 0x301f,
+ 0xfe41, 0xfe44,
+ 0xff02, 0xff02,
+ 0xff07, 0xff07,
+ 0xff62, 0xff63,
+}; /* CR_Quotation_Mark */
+
+/* 'Terminal_Punctuation': Binary Property */
+static const OnigCodePoint CR_Terminal_Punctuation[] = {
+ 65,
+ 0x0021, 0x0021,
+ 0x002c, 0x002c,
+ 0x002e, 0x002e,
+ 0x003a, 0x003b,
+ 0x003f, 0x003f,
+ 0x037e, 0x037e,
+ 0x0387, 0x0387,
+ 0x0589, 0x0589,
+ 0x05c3, 0x05c3,
+ 0x060c, 0x060c,
+ 0x061b, 0x061b,
+ 0x061f, 0x061f,
+ 0x06d4, 0x06d4,
+ 0x0700, 0x070a,
+ 0x070c, 0x070c,
+ 0x07f8, 0x07f9,
+ 0x0830, 0x083e,
+ 0x0964, 0x0965,
+ 0x0e5a, 0x0e5b,
+ 0x0f08, 0x0f08,
+ 0x0f0d, 0x0f12,
+ 0x104a, 0x104b,
+ 0x1361, 0x1368,
+ 0x166d, 0x166e,
+ 0x16eb, 0x16ed,
+ 0x17d4, 0x17d6,
+ 0x17da, 0x17da,
+ 0x1802, 0x1805,
+ 0x1808, 0x1809,
+ 0x1944, 0x1945,
+ 0x1aa8, 0x1aab,
+ 0x1b5a, 0x1b5b,
+ 0x1b5d, 0x1b5f,
+ 0x1c3b, 0x1c3f,
+ 0x1c7e, 0x1c7f,
+ 0x203c, 0x203d,
+ 0x2047, 0x2049,
+ 0x2e2e, 0x2e2e,
+ 0x3001, 0x3002,
+ 0xa4fe, 0xa4ff,
+ 0xa60d, 0xa60f,
+ 0xa6f3, 0xa6f7,
+ 0xa876, 0xa877,
+ 0xa8ce, 0xa8cf,
+ 0xa92f, 0xa92f,
+ 0xa9c7, 0xa9c9,
+ 0xaa5d, 0xaa5f,
+ 0xaadf, 0xaadf,
+ 0xabeb, 0xabeb,
+ 0xfe50, 0xfe52,
+ 0xfe54, 0xfe57,
+ 0xff01, 0xff01,
+ 0xff0c, 0xff0c,
+ 0xff0e, 0xff0e,
+ 0xff1a, 0xff1b,
+ 0xff1f, 0xff1f,
+ 0xff61, 0xff61,
+ 0xff64, 0xff64,
+ 0x1039f, 0x1039f,
+ 0x103d0, 0x103d0,
+ 0x10857, 0x10857,
+ 0x1091f, 0x1091f,
+ 0x10b3a, 0x10b3f,
+ 0x110be, 0x110c1,
+ 0x12470, 0x12473,
+}; /* CR_Terminal_Punctuation */
+
+/* 'Other_Math': Binary Property */
+static const OnigCodePoint CR_Other_Math[] = {
+ 99,
+ 0x005e, 0x005e,
+ 0x03d0, 0x03d2,
+ 0x03d5, 0x03d5,
+ 0x03f0, 0x03f1,
+ 0x03f4, 0x03f5,
+ 0x2016, 0x2016,
+ 0x2032, 0x2034,
+ 0x2040, 0x2040,
+ 0x2061, 0x2064,
+ 0x207d, 0x207e,
+ 0x208d, 0x208e,
+ 0x20d0, 0x20dc,
+ 0x20e1, 0x20e1,
+ 0x20e5, 0x20e6,
+ 0x20eb, 0x20ef,
+ 0x2102, 0x2102,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2128, 0x2129,
+ 0x212c, 0x212d,
+ 0x212f, 0x2131,
+ 0x2133, 0x2138,
+ 0x213c, 0x213f,
+ 0x2145, 0x2149,
+ 0x2195, 0x2199,
+ 0x219c, 0x219f,
+ 0x21a1, 0x21a2,
+ 0x21a4, 0x21a5,
+ 0x21a7, 0x21a7,
+ 0x21a9, 0x21ad,
+ 0x21b0, 0x21b1,
+ 0x21b6, 0x21b7,
+ 0x21bc, 0x21cd,
+ 0x21d0, 0x21d1,
+ 0x21d3, 0x21d3,
+ 0x21d5, 0x21db,
+ 0x21dd, 0x21dd,
+ 0x21e4, 0x21e5,
+ 0x23b4, 0x23b5,
+ 0x23b7, 0x23b7,
+ 0x23d0, 0x23d0,
+ 0x23e2, 0x23e2,
+ 0x25a0, 0x25a1,
+ 0x25ae, 0x25b6,
+ 0x25bc, 0x25c0,
+ 0x25c6, 0x25c7,
+ 0x25ca, 0x25cb,
+ 0x25cf, 0x25d3,
+ 0x25e2, 0x25e2,
+ 0x25e4, 0x25e4,
+ 0x25e7, 0x25ec,
+ 0x2605, 0x2606,
+ 0x2640, 0x2640,
+ 0x2642, 0x2642,
+ 0x2660, 0x2663,
+ 0x266d, 0x266e,
+ 0x27c5, 0x27c6,
+ 0x27e6, 0x27ef,
+ 0x2983, 0x2998,
+ 0x29d8, 0x29db,
+ 0x29fc, 0x29fd,
+ 0xfe61, 0xfe61,
+ 0xfe63, 0xfe63,
+ 0xfe68, 0xfe68,
+ 0xff3c, 0xff3c,
+ 0xff3e, 0xff3e,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a5,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7cb,
+ 0x1d7ce, 0x1d7ff,
+}; /* CR_Other_Math */
+
+/* 'Hex_Digit': Binary Property */
+static const OnigCodePoint CR_Hex_Digit[] = {
+ 6,
+ 0x0030, 0x0039,
+ 0x0041, 0x0046,
+ 0x0061, 0x0066,
+ 0xff10, 0xff19,
+ 0xff21, 0xff26,
+ 0xff41, 0xff46,
+}; /* CR_Hex_Digit */
+
+/* 'ASCII_Hex_Digit': Binary Property */
+static const OnigCodePoint CR_ASCII_Hex_Digit[] = {
+ 3,
+ 0x0030, 0x0039,
+ 0x0041, 0x0046,
+ 0x0061, 0x0066,
+}; /* CR_ASCII_Hex_Digit */
+
+/* 'Other_Alphabetic': Binary Property */
+static const OnigCodePoint CR_Other_Alphabetic[] = {
+ 141,
+ 0x0345, 0x0345,
+ 0x05b0, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c5,
+ 0x05c7, 0x05c7,
+ 0x0610, 0x061a,
+ 0x064b, 0x0657,
+ 0x0659, 0x065e,
+ 0x0670, 0x0670,
+ 0x06d6, 0x06dc,
+ 0x06e1, 0x06e4,
+ 0x06e7, 0x06e8,
+ 0x06ed, 0x06ed,
+ 0x0711, 0x0711,
+ 0x0730, 0x073f,
+ 0x07a6, 0x07b0,
+ 0x0816, 0x0817,
+ 0x081b, 0x0823,
+ 0x0825, 0x0827,
+ 0x0829, 0x082c,
+ 0x0900, 0x0903,
+ 0x093e, 0x094c,
+ 0x094e, 0x094e,
+ 0x0955, 0x0955,
+ 0x0962, 0x0963,
+ 0x0981, 0x0983,
+ 0x09be, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cc,
+ 0x09d7, 0x09d7,
+ 0x09e2, 0x09e3,
+ 0x0a01, 0x0a03,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4c,
+ 0x0a51, 0x0a51,
+ 0x0a70, 0x0a71,
+ 0x0a75, 0x0a75,
+ 0x0a81, 0x0a83,
+ 0x0abe, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acc,
+ 0x0ae2, 0x0ae3,
+ 0x0b01, 0x0b03,
+ 0x0b3e, 0x0b44,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4c,
+ 0x0b56, 0x0b57,
+ 0x0b62, 0x0b63,
+ 0x0b82, 0x0b82,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcc,
+ 0x0bd7, 0x0bd7,
+ 0x0c01, 0x0c03,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4c,
+ 0x0c55, 0x0c56,
+ 0x0c62, 0x0c63,
+ 0x0c82, 0x0c83,
+ 0x0cbe, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccc,
+ 0x0cd5, 0x0cd6,
+ 0x0ce2, 0x0ce3,
+ 0x0d02, 0x0d03,
+ 0x0d3e, 0x0d44,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4c,
+ 0x0d57, 0x0d57,
+ 0x0d62, 0x0d63,
+ 0x0d82, 0x0d83,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df3,
+ 0x0e31, 0x0e31,
+ 0x0e34, 0x0e3a,
+ 0x0e4d, 0x0e4d,
+ 0x0eb1, 0x0eb1,
+ 0x0eb4, 0x0eb9,
+ 0x0ebb, 0x0ebc,
+ 0x0ecd, 0x0ecd,
+ 0x0f71, 0x0f81,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x102b, 0x1036,
+ 0x1038, 0x1038,
+ 0x103b, 0x103e,
+ 0x1056, 0x1059,
+ 0x105e, 0x1060,
+ 0x1062, 0x1062,
+ 0x1067, 0x1068,
+ 0x1071, 0x1074,
+ 0x1082, 0x1086,
+ 0x109c, 0x109d,
+ 0x135f, 0x135f,
+ 0x1712, 0x1713,
+ 0x1732, 0x1733,
+ 0x1752, 0x1753,
+ 0x1772, 0x1773,
+ 0x17b6, 0x17c8,
+ 0x18a9, 0x18a9,
+ 0x1920, 0x192b,
+ 0x1930, 0x1938,
+ 0x19b0, 0x19c0,
+ 0x19c8, 0x19c9,
+ 0x1a17, 0x1a1b,
+ 0x1a55, 0x1a5e,
+ 0x1a61, 0x1a74,
+ 0x1b00, 0x1b04,
+ 0x1b35, 0x1b43,
+ 0x1b80, 0x1b82,
+ 0x1ba1, 0x1ba9,
+ 0x1c24, 0x1c35,
+ 0x1cf2, 0x1cf2,
+ 0x24b6, 0x24e9,
+ 0x2de0, 0x2dff,
+ 0xa823, 0xa827,
+ 0xa880, 0xa881,
+ 0xa8b4, 0xa8c3,
+ 0xa926, 0xa92a,
+ 0xa947, 0xa952,
+ 0xa980, 0xa983,
+ 0xa9b3, 0xa9bf,
+ 0xaa29, 0xaa36,
+ 0xaa43, 0xaa43,
+ 0xaa4c, 0xaa4d,
+ 0xaab0, 0xaab0,
+ 0xaab2, 0xaab4,
+ 0xaab7, 0xaab8,
+ 0xaabe, 0xaabe,
+ 0xabe3, 0xabea,
+ 0xfb1e, 0xfb1e,
+ 0x10a01, 0x10a03,
+ 0x10a05, 0x10a06,
+ 0x10a0c, 0x10a0f,
+ 0x11082, 0x11082,
+ 0x110b0, 0x110b8,
+}; /* CR_Other_Alphabetic */
+
+/* 'Ideographic': Binary Property */
+static const OnigCodePoint CR_Ideographic[] = {
+ 11,
+ 0x3006, 0x3007,
+ 0x3021, 0x3029,
+ 0x3038, 0x303a,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fcb,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6d,
+ 0xfa70, 0xfad9,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
+ 0x2f800, 0x2fa1d,
+}; /* CR_Ideographic */
+
+/* 'Diacritic': Binary Property */
+static const OnigCodePoint CR_Diacritic[] = {
+ 117,
+ 0x005e, 0x005e,
+ 0x0060, 0x0060,
+ 0x00a8, 0x00a8,
+ 0x00af, 0x00af,
+ 0x00b4, 0x00b4,
+ 0x00b7, 0x00b8,
+ 0x02b0, 0x034e,
+ 0x0350, 0x0357,
+ 0x035d, 0x0362,
+ 0x0374, 0x0375,
+ 0x037a, 0x037a,
+ 0x0384, 0x0385,
+ 0x0483, 0x0487,
+ 0x0559, 0x0559,
+ 0x0591, 0x05a1,
+ 0x05a3, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c4,
+ 0x064b, 0x0652,
+ 0x0657, 0x0658,
+ 0x06df, 0x06e0,
+ 0x06e5, 0x06e6,
+ 0x06ea, 0x06ec,
+ 0x0730, 0x074a,
+ 0x07a6, 0x07b0,
+ 0x07eb, 0x07f5,
+ 0x0818, 0x0819,
+ 0x093c, 0x093c,
+ 0x094d, 0x094d,
+ 0x0951, 0x0954,
+ 0x0971, 0x0971,
+ 0x09bc, 0x09bc,
+ 0x09cd, 0x09cd,
+ 0x0a3c, 0x0a3c,
+ 0x0a4d, 0x0a4d,
+ 0x0abc, 0x0abc,
+ 0x0acd, 0x0acd,
+ 0x0b3c, 0x0b3c,
+ 0x0b4d, 0x0b4d,
+ 0x0bcd, 0x0bcd,
+ 0x0c4d, 0x0c4d,
+ 0x0cbc, 0x0cbc,
+ 0x0ccd, 0x0ccd,
+ 0x0d4d, 0x0d4d,
+ 0x0dca, 0x0dca,
+ 0x0e47, 0x0e4c,
+ 0x0e4e, 0x0e4e,
+ 0x0ec8, 0x0ecc,
+ 0x0f18, 0x0f19,
+ 0x0f35, 0x0f35,
+ 0x0f37, 0x0f37,
+ 0x0f39, 0x0f39,
+ 0x0f3e, 0x0f3f,
+ 0x0f82, 0x0f84,
+ 0x0f86, 0x0f87,
+ 0x0fc6, 0x0fc6,
+ 0x1037, 0x1037,
+ 0x1039, 0x103a,
+ 0x1087, 0x108d,
+ 0x108f, 0x108f,
+ 0x109a, 0x109b,
+ 0x17c9, 0x17d3,
+ 0x17dd, 0x17dd,
+ 0x1939, 0x193b,
+ 0x1a75, 0x1a7c,
+ 0x1a7f, 0x1a7f,
+ 0x1b34, 0x1b34,
+ 0x1b44, 0x1b44,
+ 0x1b6b, 0x1b73,
+ 0x1baa, 0x1baa,
+ 0x1c36, 0x1c37,
+ 0x1c78, 0x1c7d,
+ 0x1cd0, 0x1ce8,
+ 0x1ced, 0x1ced,
+ 0x1d2c, 0x1d6a,
+ 0x1dc4, 0x1dcf,
+ 0x1dfd, 0x1dff,
+ 0x1fbd, 0x1fbd,
+ 0x1fbf, 0x1fc1,
+ 0x1fcd, 0x1fcf,
+ 0x1fdd, 0x1fdf,
+ 0x1fed, 0x1fef,
+ 0x1ffd, 0x1ffe,
+ 0x2cef, 0x2cf1,
+ 0x2e2f, 0x2e2f,
+ 0x302a, 0x302f,
+ 0x3099, 0x309c,
+ 0x30fc, 0x30fc,
+ 0xa66f, 0xa66f,
+ 0xa67c, 0xa67d,
+ 0xa67f, 0xa67f,
+ 0xa6f0, 0xa6f1,
+ 0xa717, 0xa721,
+ 0xa788, 0xa788,
+ 0xa8c4, 0xa8c4,
+ 0xa8e0, 0xa8f1,
+ 0xa92b, 0xa92e,
+ 0xa953, 0xa953,
+ 0xa9b3, 0xa9b3,
+ 0xa9c0, 0xa9c0,
+ 0xaa7b, 0xaa7b,
+ 0xaabf, 0xaac2,
+ 0xabec, 0xabed,
+ 0xfb1e, 0xfb1e,
+ 0xfe20, 0xfe26,
+ 0xff3e, 0xff3e,
+ 0xff40, 0xff40,
+ 0xff70, 0xff70,
+ 0xff9e, 0xff9f,
+ 0xffe3, 0xffe3,
+ 0x110b9, 0x110ba,
+ 0x1d167, 0x1d169,
+ 0x1d16d, 0x1d172,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+}; /* CR_Diacritic */
+
+/* 'Extender': Binary Property */
+static const OnigCodePoint CR_Extender[] = {
+ 20,
+ 0x00b7, 0x00b7,
+ 0x02d0, 0x02d1,
+ 0x0640, 0x0640,
+ 0x07fa, 0x07fa,
+ 0x0e46, 0x0e46,
+ 0x0ec6, 0x0ec6,
+ 0x1843, 0x1843,
+ 0x1aa7, 0x1aa7,
+ 0x1c36, 0x1c36,
+ 0x1c7b, 0x1c7b,
+ 0x3005, 0x3005,
+ 0x3031, 0x3035,
+ 0x309d, 0x309e,
+ 0x30fc, 0x30fe,
+ 0xa015, 0xa015,
+ 0xa60c, 0xa60c,
+ 0xa9cf, 0xa9cf,
+ 0xaa70, 0xaa70,
+ 0xaadd, 0xaadd,
+ 0xff70, 0xff70,
+}; /* CR_Extender */
+
+/* 'Other_Lowercase': Binary Property */
+static const OnigCodePoint CR_Other_Lowercase[] = {
+ 13,
+ 0x02b0, 0x02b8,
+ 0x02c0, 0x02c1,
+ 0x02e0, 0x02e4,
+ 0x0345, 0x0345,
+ 0x037a, 0x037a,
+ 0x1d2c, 0x1d61,
+ 0x1d78, 0x1d78,
+ 0x1d9b, 0x1dbf,
+ 0x2090, 0x2094,
+ 0x2170, 0x217f,
+ 0x24d0, 0x24e9,
+ 0x2c7d, 0x2c7d,
+ 0xa770, 0xa770,
+}; /* CR_Other_Lowercase */
+
+/* 'Other_Uppercase': Binary Property */
+static const OnigCodePoint CR_Other_Uppercase[] = {
+ 2,
+ 0x2160, 0x216f,
+ 0x24b6, 0x24cf,
+}; /* CR_Other_Uppercase */
+
+/* 'Noncharacter_Code_Point': Binary Property */
+static const OnigCodePoint CR_Noncharacter_Code_Point[] = {
+ 18,
+ 0xfdd0, 0xfdef,
+ 0xfffe, 0xffff,
+ 0x1fffe, 0x1ffff,
+ 0x2fffe, 0x2ffff,
+ 0x3fffe, 0x3ffff,
+ 0x4fffe, 0x4ffff,
+ 0x5fffe, 0x5ffff,
+ 0x6fffe, 0x6ffff,
+ 0x7fffe, 0x7ffff,
+ 0x8fffe, 0x8ffff,
+ 0x9fffe, 0x9ffff,
+ 0xafffe, 0xaffff,
+ 0xbfffe, 0xbffff,
+ 0xcfffe, 0xcffff,
+ 0xdfffe, 0xdffff,
+ 0xefffe, 0xeffff,
+ 0xffffe, 0xfffff,
+ 0x10fffe, 0x10ffff,
+}; /* CR_Noncharacter_Code_Point */
+
+/* 'Other_Grapheme_Extend': Binary Property */
+static const OnigCodePoint CR_Other_Grapheme_Extend[] = {
+ 16,
+ 0x09be, 0x09be,
+ 0x09d7, 0x09d7,
+ 0x0b3e, 0x0b3e,
+ 0x0b57, 0x0b57,
+ 0x0bbe, 0x0bbe,
+ 0x0bd7, 0x0bd7,
+ 0x0cc2, 0x0cc2,
+ 0x0cd5, 0x0cd6,
+ 0x0d3e, 0x0d3e,
+ 0x0d57, 0x0d57,
+ 0x0dcf, 0x0dcf,
+ 0x0ddf, 0x0ddf,
+ 0x200c, 0x200d,
+ 0xff9e, 0xff9f,
+ 0x1d165, 0x1d165,
+ 0x1d16e, 0x1d172,
+}; /* CR_Other_Grapheme_Extend */
+
+/* 'IDS_Binary_Operator': Binary Property */
+static const OnigCodePoint CR_IDS_Binary_Operator[] = {
+ 2,
+ 0x2ff0, 0x2ff1,
+ 0x2ff4, 0x2ffb,
+}; /* CR_IDS_Binary_Operator */
+
+/* 'IDS_Trinary_Operator': Binary Property */
+static const OnigCodePoint CR_IDS_Trinary_Operator[] = {
+ 1,
+ 0x2ff2, 0x2ff3,
+}; /* CR_IDS_Trinary_Operator */
+
+/* 'Radical': Binary Property */
+static const OnigCodePoint CR_Radical[] = {
+ 3,
+ 0x2e80, 0x2e99,
+ 0x2e9b, 0x2ef3,
+ 0x2f00, 0x2fd5,
+}; /* CR_Radical */
+
+/* 'Unified_Ideograph': Binary Property */
+static const OnigCodePoint CR_Unified_Ideograph[] = {
+ 11,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fcb,
+ 0xfa0e, 0xfa0f,
+ 0xfa11, 0xfa11,
+ 0xfa13, 0xfa14,
+ 0xfa1f, 0xfa1f,
+ 0xfa21, 0xfa21,
+ 0xfa23, 0xfa24,
+ 0xfa27, 0xfa29,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
+}; /* CR_Unified_Ideograph */
+
+/* 'Other_Default_Ignorable_Code_Point': Binary Property */
+static const OnigCodePoint CR_Other_Default_Ignorable_Code_Point[] = {
+ 10,
+ 0x034f, 0x034f,
+ 0x115f, 0x1160,
+ 0x2065, 0x2069,
+ 0x3164, 0x3164,
+ 0xffa0, 0xffa0,
+ 0xfff0, 0xfff8,
+ 0xe0000, 0xe0000,
+ 0xe0002, 0xe001f,
+ 0xe0080, 0xe00ff,
+ 0xe01f0, 0xe0fff,
+}; /* CR_Other_Default_Ignorable_Code_Point */
+
+/* 'Deprecated': Binary Property */
+static const OnigCodePoint CR_Deprecated[] = {
+ 8,
+ 0x0149, 0x0149,
+ 0x0f77, 0x0f77,
+ 0x0f79, 0x0f79,
+ 0x17a3, 0x17a4,
+ 0x206a, 0x206f,
+ 0x2329, 0x232a,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f,
+}; /* CR_Deprecated */
+
+/* 'Soft_Dotted': Binary Property */
+static const OnigCodePoint CR_Soft_Dotted[] = {
+ 31,
+ 0x0069, 0x006a,
+ 0x012f, 0x012f,
+ 0x0249, 0x0249,
+ 0x0268, 0x0268,
+ 0x029d, 0x029d,
+ 0x02b2, 0x02b2,
+ 0x03f3, 0x03f3,
+ 0x0456, 0x0456,
+ 0x0458, 0x0458,
+ 0x1d62, 0x1d62,
+ 0x1d96, 0x1d96,
+ 0x1da4, 0x1da4,
+ 0x1da8, 0x1da8,
+ 0x1e2d, 0x1e2d,
+ 0x1ecb, 0x1ecb,
+ 0x2071, 0x2071,
+ 0x2148, 0x2149,
+ 0x2c7c, 0x2c7c,
+ 0x1d422, 0x1d423,
+ 0x1d456, 0x1d457,
+ 0x1d48a, 0x1d48b,
+ 0x1d4be, 0x1d4bf,
+ 0x1d4f2, 0x1d4f3,
+ 0x1d526, 0x1d527,
+ 0x1d55a, 0x1d55b,
+ 0x1d58e, 0x1d58f,
+ 0x1d5c2, 0x1d5c3,
+ 0x1d5f6, 0x1d5f7,
+ 0x1d62a, 0x1d62b,
+ 0x1d65e, 0x1d65f,
+ 0x1d692, 0x1d693,
+}; /* CR_Soft_Dotted */
+
+/* 'Logical_Order_Exception': Binary Property */
+static const OnigCodePoint CR_Logical_Order_Exception[] = {
+ 5,
+ 0x0e40, 0x0e44,
+ 0x0ec0, 0x0ec4,
+ 0xaab5, 0xaab6,
+ 0xaab9, 0xaab9,
+ 0xaabb, 0xaabc,
+}; /* CR_Logical_Order_Exception */
+
+/* 'Other_ID_Start': Binary Property */
+static const OnigCodePoint CR_Other_ID_Start[] = {
+ 3,
+ 0x2118, 0x2118,
+ 0x212e, 0x212e,
+ 0x309b, 0x309c,
+}; /* CR_Other_ID_Start */
+
+/* 'Other_ID_Continue': Binary Property */
+static const OnigCodePoint CR_Other_ID_Continue[] = {
+ 3,
+ 0x00b7, 0x00b7,
+ 0x0387, 0x0387,
+ 0x1369, 0x1371,
+}; /* CR_Other_ID_Continue */
+
+/* 'STerm': Binary Property */
+static const OnigCodePoint CR_STerm[] = {
+ 43,
+ 0x0021, 0x0021,
+ 0x002e, 0x002e,
+ 0x003f, 0x003f,
+ 0x055c, 0x055c,
+ 0x055e, 0x055e,
+ 0x0589, 0x0589,
+ 0x061f, 0x061f,
+ 0x06d4, 0x06d4,
+ 0x0700, 0x0702,
+ 0x07f9, 0x07f9,
+ 0x0964, 0x0965,
+ 0x104a, 0x104b,
+ 0x1362, 0x1362,
+ 0x1367, 0x1368,
+ 0x166e, 0x166e,
+ 0x1803, 0x1803,
+ 0x1809, 0x1809,
+ 0x1944, 0x1945,
+ 0x1b5a, 0x1b5b,
+ 0x1b5e, 0x1b5f,
+ 0x1c3b, 0x1c3c,
+ 0x1c7e, 0x1c7f,
+ 0x203c, 0x203d,
+ 0x2047, 0x2049,
+ 0x2e2e, 0x2e2e,
+ 0x3002, 0x3002,
+ 0xa4ff, 0xa4ff,
+ 0xa60e, 0xa60f,
+ 0xa6f3, 0xa6f3,
+ 0xa6f7, 0xa6f7,
+ 0xa876, 0xa877,
+ 0xa8ce, 0xa8cf,
+ 0xa92f, 0xa92f,
+ 0xa9c8, 0xa9c9,
+ 0xaa5d, 0xaa5f,
+ 0xabeb, 0xabeb,
+ 0xfe52, 0xfe52,
+ 0xfe56, 0xfe57,
+ 0xff01, 0xff01,
+ 0xff0e, 0xff0e,
+ 0xff1f, 0xff1f,
+ 0xff61, 0xff61,
+ 0x110be, 0x110c1,
+}; /* CR_STerm */
+
+/* 'Variation_Selector': Binary Property */
+static const OnigCodePoint CR_Variation_Selector[] = {
+ 3,
+ 0x180b, 0x180d,
+ 0xfe00, 0xfe0f,
+ 0xe0100, 0xe01ef,
+}; /* CR_Variation_Selector */
+
+/* 'Pattern_White_Space': Binary Property */
+static const OnigCodePoint CR_Pattern_White_Space[] = {
+ 5,
+ 0x0009, 0x000d,
+ 0x0020, 0x0020,
+ 0x0085, 0x0085,
+ 0x200e, 0x200f,
+ 0x2028, 0x2029,
+}; /* CR_Pattern_White_Space */
+
+/* 'Pattern_Syntax': Binary Property */
+static const OnigCodePoint CR_Pattern_Syntax[] = {
+ 28,
+ 0x0021, 0x002f,
+ 0x003a, 0x0040,
+ 0x005b, 0x005e,
+ 0x0060, 0x0060,
+ 0x007b, 0x007e,
+ 0x00a1, 0x00a7,
+ 0x00a9, 0x00a9,
+ 0x00ab, 0x00ac,
+ 0x00ae, 0x00ae,
+ 0x00b0, 0x00b1,
+ 0x00b6, 0x00b6,
+ 0x00bb, 0x00bb,
+ 0x00bf, 0x00bf,
+ 0x00d7, 0x00d7,
+ 0x00f7, 0x00f7,
+ 0x2010, 0x2027,
+ 0x2030, 0x203e,
+ 0x2041, 0x2053,
+ 0x2055, 0x205e,
+ 0x2190, 0x245f,
+ 0x2500, 0x2775,
+ 0x2794, 0x2bff,
+ 0x2e00, 0x2e7f,
+ 0x3001, 0x3003,
+ 0x3008, 0x3020,
+ 0x3030, 0x3030,
+ 0xfd3e, 0xfd3f,
+ 0xfe45, 0xfe46,
+}; /* CR_Pattern_Syntax */
+#endif /* USE_UNICODE_PROPERTIES */
+#endif /* USE_UNICODE_PROPERTIES */
+
+/* 'NEWLINE': [[:NEWLINE:]] */
+static const OnigCodePoint CR_NEWLINE[] = {
+ 1,
+ 0x000a, 0x000a,
+}; /* CR_NEWLINE */
+
+/* 'Alpha': [[:Alpha:]] */
+#define CR_Alpha CR_Alphabetic
+
+/* 'Blank': [[:Blank:]] */
+static const OnigCodePoint CR_Blank[] = {
+ 9,
+ 0x0009, 0x0009,
+ 0x0020, 0x0020,
+ 0x00a0, 0x00a0,
+ 0x1680, 0x1680,
+ 0x180e, 0x180e,
+ 0x2000, 0x200a,
+ 0x202f, 0x202f,
+ 0x205f, 0x205f,
+ 0x3000, 0x3000,
+}; /* CR_Blank */
+
+/* 'Cntrl': [[:Cntrl:]] */
+#define CR_Cntrl CR_Cc
+
+/* 'Digit': [[:Digit:]] */
+#define CR_Digit CR_Nd
+
+/* 'Graph': [[:Graph:]] */
+static const OnigCodePoint CR_Graph[] = {
+ 490,
+ 0x0021, 0x007e,
+ 0x00a1, 0x0377,
+ 0x037a, 0x037e,
+ 0x0384, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x0525,
+ 0x0531, 0x0556,
+ 0x0559, 0x055f,
+ 0x0561, 0x0587,
+ 0x0589, 0x058a,
+ 0x0591, 0x05c7,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f4,
+ 0x0600, 0x0603,
+ 0x0606, 0x061b,
+ 0x061e, 0x061f,
+ 0x0621, 0x065e,
+ 0x0660, 0x070d,
+ 0x070f, 0x074a,
+ 0x074d, 0x07b1,
+ 0x07c0, 0x07fa,
+ 0x0800, 0x082d,
+ 0x0830, 0x083e,
+ 0x0900, 0x0939,
+ 0x093c, 0x094e,
+ 0x0950, 0x0955,
+ 0x0958, 0x0972,
+ 0x0979, 0x097f,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09ce,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09fb,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a51, 0x0a51,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a75,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0af1, 0x0af1,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b44,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b63,
+ 0x0b66, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd0, 0x0bd0,
+ 0x0bd7, 0x0bd7,
+ 0x0be6, 0x0bfa,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3d, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c58, 0x0c59,
+ 0x0c60, 0x0c63,
+ 0x0c66, 0x0c6f,
+ 0x0c78, 0x0c7f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce3,
+ 0x0ce6, 0x0cef,
+ 0x0cf1, 0x0cf2,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3d, 0x0d44,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d63,
+ 0x0d66, 0x0d75,
+ 0x0d79, 0x0d7f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df4,
+ 0x0e01, 0x0e3a,
+ 0x0e3f, 0x0e5b,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f47,
+ 0x0f49, 0x0f6c,
+ 0x0f71, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fbe, 0x0fcc,
+ 0x0fce, 0x0fd8,
+ 0x1000, 0x10c5,
+ 0x10d0, 0x10fc,
+ 0x1100, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12d6,
+ 0x12d8, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x135a,
+ 0x135f, 0x137c,
+ 0x1380, 0x1399,
+ 0x13a0, 0x13f4,
+ 0x1400, 0x167f,
+ 0x1681, 0x169c,
+ 0x16a0, 0x16f0,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1736,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x1800, 0x180d,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18aa,
+ 0x18b0, 0x18f5,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1940, 0x1940,
+ 0x1944, 0x196d,
+ 0x1970, 0x1974,
+ 0x1980, 0x19ab,
+ 0x19b0, 0x19c9,
+ 0x19d0, 0x19da,
+ 0x19de, 0x1a1b,
+ 0x1a1e, 0x1a5e,
+ 0x1a60, 0x1a7c,
+ 0x1a7f, 0x1a89,
+ 0x1a90, 0x1a99,
+ 0x1aa0, 0x1aad,
+ 0x1b00, 0x1b4b,
+ 0x1b50, 0x1b7c,
+ 0x1b80, 0x1baa,
+ 0x1bae, 0x1bb9,
+ 0x1c00, 0x1c37,
+ 0x1c3b, 0x1c49,
+ 0x1c4d, 0x1c7f,
+ 0x1cd0, 0x1cf2,
+ 0x1d00, 0x1de6,
+ 0x1dfd, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fc4,
+ 0x1fc6, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fdd, 0x1fef,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffe,
+ 0x200b, 0x2027,
+ 0x202a, 0x202e,
+ 0x2030, 0x205e,
+ 0x2060, 0x2064,
+ 0x206a, 0x2071,
+ 0x2074, 0x208e,
+ 0x2090, 0x2094,
+ 0x20a0, 0x20b8,
+ 0x20d0, 0x20f0,
+ 0x2100, 0x2189,
+ 0x2190, 0x23e8,
+ 0x2400, 0x2426,
+ 0x2440, 0x244a,
+ 0x2460, 0x26cd,
+ 0x26cf, 0x26e1,
+ 0x26e3, 0x26e3,
+ 0x26e8, 0x26ff,
+ 0x2701, 0x2704,
+ 0x2706, 0x2709,
+ 0x270c, 0x2727,
+ 0x2729, 0x274b,
+ 0x274d, 0x274d,
+ 0x274f, 0x2752,
+ 0x2756, 0x275e,
+ 0x2761, 0x2794,
+ 0x2798, 0x27af,
+ 0x27b1, 0x27be,
+ 0x27c0, 0x27ca,
+ 0x27cc, 0x27cc,
+ 0x27d0, 0x2b4c,
+ 0x2b50, 0x2b59,
+ 0x2c00, 0x2c2e,
+ 0x2c30, 0x2c5e,
+ 0x2c60, 0x2cf1,
+ 0x2cf9, 0x2d25,
+ 0x2d30, 0x2d65,
+ 0x2d6f, 0x2d6f,
+ 0x2d80, 0x2d96,
+ 0x2da0, 0x2da6,
+ 0x2da8, 0x2dae,
+ 0x2db0, 0x2db6,
+ 0x2db8, 0x2dbe,
+ 0x2dc0, 0x2dc6,
+ 0x2dc8, 0x2dce,
+ 0x2dd0, 0x2dd6,
+ 0x2dd8, 0x2dde,
+ 0x2de0, 0x2e31,
+ 0x2e80, 0x2e99,
+ 0x2e9b, 0x2ef3,
+ 0x2f00, 0x2fd5,
+ 0x2ff0, 0x2ffb,
+ 0x3001, 0x303f,
+ 0x3041, 0x3096,
+ 0x3099, 0x30ff,
+ 0x3105, 0x312d,
+ 0x3131, 0x318e,
+ 0x3190, 0x31b7,
+ 0x31c0, 0x31e3,
+ 0x31f0, 0x321e,
+ 0x3220, 0x32fe,
+ 0x3300, 0x4db5,
+ 0x4dc0, 0x9fcb,
+ 0xa000, 0xa48c,
+ 0xa490, 0xa4c6,
+ 0xa4d0, 0xa62b,
+ 0xa640, 0xa65f,
+ 0xa662, 0xa673,
+ 0xa67c, 0xa697,
+ 0xa6a0, 0xa6f7,
+ 0xa700, 0xa78c,
+ 0xa7fb, 0xa82b,
+ 0xa830, 0xa839,
+ 0xa840, 0xa877,
+ 0xa880, 0xa8c4,
+ 0xa8ce, 0xa8d9,
+ 0xa8e0, 0xa8fb,
+ 0xa900, 0xa953,
+ 0xa95f, 0xa97c,
+ 0xa980, 0xa9cd,
+ 0xa9cf, 0xa9d9,
+ 0xa9de, 0xa9df,
+ 0xaa00, 0xaa36,
+ 0xaa40, 0xaa4d,
+ 0xaa50, 0xaa59,
+ 0xaa5c, 0xaa7b,
+ 0xaa80, 0xaac2,
+ 0xaadb, 0xaadf,
+ 0xabc0, 0xabed,
+ 0xabf0, 0xabf9,
+ 0xac00, 0xd7a3,
+ 0xd7b0, 0xd7c6,
+ 0xd7cb, 0xd7fb,
+ 0xe000, 0xfa2d,
+ 0xfa30, 0xfa6d,
+ 0xfa70, 0xfad9,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3f,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfd,
+ 0xfe00, 0xfe19,
+ 0xfe20, 0xfe26,
+ 0xfe30, 0xfe52,
+ 0xfe54, 0xfe66,
+ 0xfe68, 0xfe6b,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xfeff, 0xfeff,
+ 0xff01, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0xffe0, 0xffe6,
+ 0xffe8, 0xffee,
+ 0xfff9, 0xfffd,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10100, 0x10102,
+ 0x10107, 0x10133,
+ 0x10137, 0x1018a,
+ 0x10190, 0x1019b,
+ 0x101d0, 0x101fd,
+ 0x10280, 0x1029c,
+ 0x102a0, 0x102d0,
+ 0x10300, 0x1031e,
+ 0x10320, 0x10323,
+ 0x10330, 0x1034a,
+ 0x10380, 0x1039d,
+ 0x1039f, 0x103c3,
+ 0x103c8, 0x103d5,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x10855,
+ 0x10857, 0x1085f,
+ 0x10900, 0x1091b,
+ 0x1091f, 0x10939,
+ 0x1093f, 0x1093f,
+ 0x10a00, 0x10a03,
+ 0x10a05, 0x10a06,
+ 0x10a0c, 0x10a13,
+ 0x10a15, 0x10a17,
+ 0x10a19, 0x10a33,
+ 0x10a38, 0x10a3a,
+ 0x10a3f, 0x10a47,
+ 0x10a50, 0x10a58,
+ 0x10a60, 0x10a7f,
+ 0x10b00, 0x10b35,
+ 0x10b39, 0x10b55,
+ 0x10b58, 0x10b72,
+ 0x10b78, 0x10b7f,
+ 0x10c00, 0x10c48,
+ 0x10e60, 0x10e7e,
+ 0x11080, 0x110c1,
+ 0x12000, 0x1236e,
+ 0x12400, 0x12462,
+ 0x12470, 0x12473,
+ 0x13000, 0x1342e,
+ 0x1d000, 0x1d0f5,
+ 0x1d100, 0x1d126,
+ 0x1d129, 0x1d1dd,
+ 0x1d200, 0x1d245,
+ 0x1d300, 0x1d356,
+ 0x1d360, 0x1d371,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a5,
+ 0x1d6a8, 0x1d7cb,
+ 0x1d7ce, 0x1d7ff,
+ 0x1f000, 0x1f02b,
+ 0x1f030, 0x1f093,
+ 0x1f100, 0x1f10a,
+ 0x1f110, 0x1f12e,
+ 0x1f131, 0x1f131,
+ 0x1f13d, 0x1f13d,
+ 0x1f13f, 0x1f13f,
+ 0x1f142, 0x1f142,
+ 0x1f146, 0x1f146,
+ 0x1f14a, 0x1f14e,
+ 0x1f157, 0x1f157,
+ 0x1f15f, 0x1f15f,
+ 0x1f179, 0x1f179,
+ 0x1f17b, 0x1f17c,
+ 0x1f17f, 0x1f17f,
+ 0x1f18a, 0x1f18d,
+ 0x1f190, 0x1f190,
+ 0x1f200, 0x1f200,
+ 0x1f210, 0x1f231,
+ 0x1f240, 0x1f248,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
+ 0x2f800, 0x2fa1d,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f,
+ 0xe0100, 0xe01ef,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd,
+}; /* CR_Graph */
+
+/* 'Lower': [[:Lower:]] */
+#define CR_Lower CR_Lowercase
+
+/* 'Print': [[:Print:]] */
+static const OnigCodePoint CR_Print[] = {
+ 487,
+ 0x0020, 0x007e,
+ 0x00a0, 0x0377,
+ 0x037a, 0x037e,
+ 0x0384, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x0525,
+ 0x0531, 0x0556,
+ 0x0559, 0x055f,
+ 0x0561, 0x0587,
+ 0x0589, 0x058a,
+ 0x0591, 0x05c7,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f4,
+ 0x0600, 0x0603,
+ 0x0606, 0x061b,
+ 0x061e, 0x061f,
+ 0x0621, 0x065e,
+ 0x0660, 0x070d,
+ 0x070f, 0x074a,
+ 0x074d, 0x07b1,
+ 0x07c0, 0x07fa,
+ 0x0800, 0x082d,
+ 0x0830, 0x083e,
+ 0x0900, 0x0939,
+ 0x093c, 0x094e,
+ 0x0950, 0x0955,
+ 0x0958, 0x0972,
+ 0x0979, 0x097f,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09ce,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09fb,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a51, 0x0a51,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a75,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0af1, 0x0af1,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b44,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b63,
+ 0x0b66, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd0, 0x0bd0,
+ 0x0bd7, 0x0bd7,
+ 0x0be6, 0x0bfa,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3d, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c58, 0x0c59,
+ 0x0c60, 0x0c63,
+ 0x0c66, 0x0c6f,
+ 0x0c78, 0x0c7f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce3,
+ 0x0ce6, 0x0cef,
+ 0x0cf1, 0x0cf2,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3d, 0x0d44,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d63,
+ 0x0d66, 0x0d75,
+ 0x0d79, 0x0d7f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df4,
+ 0x0e01, 0x0e3a,
+ 0x0e3f, 0x0e5b,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f47,
+ 0x0f49, 0x0f6c,
+ 0x0f71, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fbe, 0x0fcc,
+ 0x0fce, 0x0fd8,
+ 0x1000, 0x10c5,
+ 0x10d0, 0x10fc,
+ 0x1100, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12d6,
+ 0x12d8, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x135a,
+ 0x135f, 0x137c,
+ 0x1380, 0x1399,
+ 0x13a0, 0x13f4,
+ 0x1400, 0x169c,
+ 0x16a0, 0x16f0,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1736,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x1800, 0x180e,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18aa,
+ 0x18b0, 0x18f5,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1940, 0x1940,
+ 0x1944, 0x196d,
+ 0x1970, 0x1974,
+ 0x1980, 0x19ab,
+ 0x19b0, 0x19c9,
+ 0x19d0, 0x19da,
+ 0x19de, 0x1a1b,
+ 0x1a1e, 0x1a5e,
+ 0x1a60, 0x1a7c,
+ 0x1a7f, 0x1a89,
+ 0x1a90, 0x1a99,
+ 0x1aa0, 0x1aad,
+ 0x1b00, 0x1b4b,
+ 0x1b50, 0x1b7c,
+ 0x1b80, 0x1baa,
+ 0x1bae, 0x1bb9,
+ 0x1c00, 0x1c37,
+ 0x1c3b, 0x1c49,
+ 0x1c4d, 0x1c7f,
+ 0x1cd0, 0x1cf2,
+ 0x1d00, 0x1de6,
+ 0x1dfd, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fc4,
+ 0x1fc6, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fdd, 0x1fef,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffe,
+ 0x2000, 0x2027,
+ 0x202a, 0x2064,
+ 0x206a, 0x2071,
+ 0x2074, 0x208e,
+ 0x2090, 0x2094,
+ 0x20a0, 0x20b8,
+ 0x20d0, 0x20f0,
+ 0x2100, 0x2189,
+ 0x2190, 0x23e8,
+ 0x2400, 0x2426,
+ 0x2440, 0x244a,
+ 0x2460, 0x26cd,
+ 0x26cf, 0x26e1,
+ 0x26e3, 0x26e3,
+ 0x26e8, 0x26ff,
+ 0x2701, 0x2704,
+ 0x2706, 0x2709,
+ 0x270c, 0x2727,
+ 0x2729, 0x274b,
+ 0x274d, 0x274d,
+ 0x274f, 0x2752,
+ 0x2756, 0x275e,
+ 0x2761, 0x2794,
+ 0x2798, 0x27af,
+ 0x27b1, 0x27be,
+ 0x27c0, 0x27ca,
+ 0x27cc, 0x27cc,
+ 0x27d0, 0x2b4c,
+ 0x2b50, 0x2b59,
+ 0x2c00, 0x2c2e,
+ 0x2c30, 0x2c5e,
+ 0x2c60, 0x2cf1,
+ 0x2cf9, 0x2d25,
+ 0x2d30, 0x2d65,
+ 0x2d6f, 0x2d6f,
+ 0x2d80, 0x2d96,
+ 0x2da0, 0x2da6,
+ 0x2da8, 0x2dae,
+ 0x2db0, 0x2db6,
+ 0x2db8, 0x2dbe,
+ 0x2dc0, 0x2dc6,
+ 0x2dc8, 0x2dce,
+ 0x2dd0, 0x2dd6,
+ 0x2dd8, 0x2dde,
+ 0x2de0, 0x2e31,
+ 0x2e80, 0x2e99,
+ 0x2e9b, 0x2ef3,
+ 0x2f00, 0x2fd5,
+ 0x2ff0, 0x2ffb,
+ 0x3000, 0x303f,
+ 0x3041, 0x3096,
+ 0x3099, 0x30ff,
+ 0x3105, 0x312d,
+ 0x3131, 0x318e,
+ 0x3190, 0x31b7,
+ 0x31c0, 0x31e3,
+ 0x31f0, 0x321e,
+ 0x3220, 0x32fe,
+ 0x3300, 0x4db5,
+ 0x4dc0, 0x9fcb,
+ 0xa000, 0xa48c,
+ 0xa490, 0xa4c6,
+ 0xa4d0, 0xa62b,
+ 0xa640, 0xa65f,
+ 0xa662, 0xa673,
+ 0xa67c, 0xa697,
+ 0xa6a0, 0xa6f7,
+ 0xa700, 0xa78c,
+ 0xa7fb, 0xa82b,
+ 0xa830, 0xa839,
+ 0xa840, 0xa877,
+ 0xa880, 0xa8c4,
+ 0xa8ce, 0xa8d9,
+ 0xa8e0, 0xa8fb,
+ 0xa900, 0xa953,
+ 0xa95f, 0xa97c,
+ 0xa980, 0xa9cd,
+ 0xa9cf, 0xa9d9,
+ 0xa9de, 0xa9df,
+ 0xaa00, 0xaa36,
+ 0xaa40, 0xaa4d,
+ 0xaa50, 0xaa59,
+ 0xaa5c, 0xaa7b,
+ 0xaa80, 0xaac2,
+ 0xaadb, 0xaadf,
+ 0xabc0, 0xabed,
+ 0xabf0, 0xabf9,
+ 0xac00, 0xd7a3,
+ 0xd7b0, 0xd7c6,
+ 0xd7cb, 0xd7fb,
+ 0xe000, 0xfa2d,
+ 0xfa30, 0xfa6d,
+ 0xfa70, 0xfad9,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3f,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfd,
+ 0xfe00, 0xfe19,
+ 0xfe20, 0xfe26,
+ 0xfe30, 0xfe52,
+ 0xfe54, 0xfe66,
+ 0xfe68, 0xfe6b,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xfeff, 0xfeff,
+ 0xff01, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0xffe0, 0xffe6,
+ 0xffe8, 0xffee,
+ 0xfff9, 0xfffd,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10100, 0x10102,
+ 0x10107, 0x10133,
+ 0x10137, 0x1018a,
+ 0x10190, 0x1019b,
+ 0x101d0, 0x101fd,
+ 0x10280, 0x1029c,
+ 0x102a0, 0x102d0,
+ 0x10300, 0x1031e,
+ 0x10320, 0x10323,
+ 0x10330, 0x1034a,
+ 0x10380, 0x1039d,
+ 0x1039f, 0x103c3,
+ 0x103c8, 0x103d5,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x10855,
+ 0x10857, 0x1085f,
+ 0x10900, 0x1091b,
+ 0x1091f, 0x10939,
+ 0x1093f, 0x1093f,
+ 0x10a00, 0x10a03,
+ 0x10a05, 0x10a06,
+ 0x10a0c, 0x10a13,
+ 0x10a15, 0x10a17,
+ 0x10a19, 0x10a33,
+ 0x10a38, 0x10a3a,
+ 0x10a3f, 0x10a47,
+ 0x10a50, 0x10a58,
+ 0x10a60, 0x10a7f,
+ 0x10b00, 0x10b35,
+ 0x10b39, 0x10b55,
+ 0x10b58, 0x10b72,
+ 0x10b78, 0x10b7f,
+ 0x10c00, 0x10c48,
+ 0x10e60, 0x10e7e,
+ 0x11080, 0x110c1,
+ 0x12000, 0x1236e,
+ 0x12400, 0x12462,
+ 0x12470, 0x12473,
+ 0x13000, 0x1342e,
+ 0x1d000, 0x1d0f5,
+ 0x1d100, 0x1d126,
+ 0x1d129, 0x1d1dd,
+ 0x1d200, 0x1d245,
+ 0x1d300, 0x1d356,
+ 0x1d360, 0x1d371,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a5,
+ 0x1d6a8, 0x1d7cb,
+ 0x1d7ce, 0x1d7ff,
+ 0x1f000, 0x1f02b,
+ 0x1f030, 0x1f093,
+ 0x1f100, 0x1f10a,
+ 0x1f110, 0x1f12e,
+ 0x1f131, 0x1f131,
+ 0x1f13d, 0x1f13d,
+ 0x1f13f, 0x1f13f,
+ 0x1f142, 0x1f142,
+ 0x1f146, 0x1f146,
+ 0x1f14a, 0x1f14e,
+ 0x1f157, 0x1f157,
+ 0x1f15f, 0x1f15f,
+ 0x1f179, 0x1f179,
+ 0x1f17b, 0x1f17c,
+ 0x1f17f, 0x1f17f,
+ 0x1f18a, 0x1f18d,
+ 0x1f190, 0x1f190,
+ 0x1f200, 0x1f200,
+ 0x1f210, 0x1f231,
+ 0x1f240, 0x1f248,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
+ 0x2f800, 0x2fa1d,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f,
+ 0xe0100, 0xe01ef,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd,
+}; /* CR_Print */
+
+/* 'Punct': [[:Punct:]] */
+#define CR_Punct CR_P
+
+/* 'Space': [[:Space:]] */
+#define CR_Space CR_White_Space
+
+/* 'Upper': [[:Upper:]] */
+#define CR_Upper CR_Uppercase
+
+/* 'XDigit': [[:XDigit:]] */
+#define CR_XDigit CR_ASCII_Hex_Digit
+
+/* 'Word': [[:Word:]] */
+static const OnigCodePoint CR_Word[] = {
+ 506,
+ 0x0030, 0x0039,
+ 0x0041, 0x005a,
+ 0x005f, 0x005f,
+ 0x0061, 0x007a,
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00ba, 0x00ba,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+ 0x00f8, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x02ec, 0x02ec,
+ 0x02ee, 0x02ee,
+ 0x0300, 0x0374,
+ 0x0376, 0x0377,
+ 0x037a, 0x037d,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03f5,
+ 0x03f7, 0x0481,
+ 0x0483, 0x0525,
+ 0x0531, 0x0556,
+ 0x0559, 0x0559,
+ 0x0561, 0x0587,
+ 0x0591, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c5,
+ 0x05c7, 0x05c7,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0610, 0x061a,
+ 0x0621, 0x065e,
+ 0x0660, 0x0669,
+ 0x066e, 0x06d3,
+ 0x06d5, 0x06dc,
+ 0x06de, 0x06e8,
+ 0x06ea, 0x06fc,
+ 0x06ff, 0x06ff,
+ 0x0710, 0x074a,
+ 0x074d, 0x07b1,
+ 0x07c0, 0x07f5,
+ 0x07fa, 0x07fa,
+ 0x0800, 0x082d,
+ 0x0900, 0x0939,
+ 0x093c, 0x094e,
+ 0x0950, 0x0955,
+ 0x0958, 0x0963,
+ 0x0966, 0x096f,
+ 0x0971, 0x0972,
+ 0x0979, 0x097f,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09ce,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09f1,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a51, 0x0a51,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a75,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b44,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b63,
+ 0x0b66, 0x0b6f,
+ 0x0b71, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd0, 0x0bd0,
+ 0x0bd7, 0x0bd7,
+ 0x0be6, 0x0bef,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3d, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c58, 0x0c59,
+ 0x0c60, 0x0c63,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce3,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3d, 0x0d44,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d63,
+ 0x0d66, 0x0d6f,
+ 0x0d7a, 0x0d7f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df3,
+ 0x0e01, 0x0e3a,
+ 0x0e40, 0x0e4e,
+ 0x0e50, 0x0e59,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f00,
+ 0x0f18, 0x0f19,
+ 0x0f20, 0x0f29,
+ 0x0f35, 0x0f35,
+ 0x0f37, 0x0f37,
+ 0x0f39, 0x0f39,
+ 0x0f3e, 0x0f47,
+ 0x0f49, 0x0f6c,
+ 0x0f71, 0x0f84,
+ 0x0f86, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fc6, 0x0fc6,
+ 0x1000, 0x1049,
+ 0x1050, 0x109d,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10fa,
+ 0x10fc, 0x10fc,
+ 0x1100, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12d6,
+ 0x12d8, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x135a,
+ 0x135f, 0x135f,
+ 0x1380, 0x138f,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x167f,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x16ee, 0x16f0,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1734,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17b3,
+ 0x17b6, 0x17d3,
+ 0x17d7, 0x17d7,
+ 0x17dc, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x180b, 0x180d,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18aa,
+ 0x18b0, 0x18f5,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1946, 0x196d,
+ 0x1970, 0x1974,
+ 0x1980, 0x19ab,
+ 0x19b0, 0x19c9,
+ 0x19d0, 0x19da,
+ 0x1a00, 0x1a1b,
+ 0x1a20, 0x1a5e,
+ 0x1a60, 0x1a7c,
+ 0x1a7f, 0x1a89,
+ 0x1a90, 0x1a99,
+ 0x1aa7, 0x1aa7,
+ 0x1b00, 0x1b4b,
+ 0x1b50, 0x1b59,
+ 0x1b6b, 0x1b73,
+ 0x1b80, 0x1baa,
+ 0x1bae, 0x1bb9,
+ 0x1c00, 0x1c37,
+ 0x1c40, 0x1c49,
+ 0x1c4d, 0x1c7d,
+ 0x1cd0, 0x1cd2,
+ 0x1cd4, 0x1cf2,
+ 0x1d00, 0x1de6,
+ 0x1dfd, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x203f, 0x2040,
+ 0x2054, 0x2054,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x2090, 0x2094,
+ 0x20d0, 0x20f0,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x212f, 0x2139,
+ 0x213c, 0x213f,
+ 0x2145, 0x2149,
+ 0x214e, 0x214e,
+ 0x2160, 0x2188,
+ 0x24b6, 0x24e9,
+ 0x2c00, 0x2c2e,
+ 0x2c30, 0x2c5e,
+ 0x2c60, 0x2ce4,
+ 0x2ceb, 0x2cf1,
+ 0x2d00, 0x2d25,
+ 0x2d30, 0x2d65,
+ 0x2d6f, 0x2d6f,
+ 0x2d80, 0x2d96,
+ 0x2da0, 0x2da6,
+ 0x2da8, 0x2dae,
+ 0x2db0, 0x2db6,
+ 0x2db8, 0x2dbe,
+ 0x2dc0, 0x2dc6,
+ 0x2dc8, 0x2dce,
+ 0x2dd0, 0x2dd6,
+ 0x2dd8, 0x2dde,
+ 0x2de0, 0x2dff,
+ 0x2e2f, 0x2e2f,
+ 0x3005, 0x3007,
+ 0x3021, 0x302f,
+ 0x3031, 0x3035,
+ 0x3038, 0x303c,
+ 0x3041, 0x3096,
+ 0x3099, 0x309a,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30fa,
+ 0x30fc, 0x30ff,
+ 0x3105, 0x312d,
+ 0x3131, 0x318e,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fcb,
+ 0xa000, 0xa48c,
+ 0xa4d0, 0xa4fd,
+ 0xa500, 0xa60c,
+ 0xa610, 0xa62b,
+ 0xa640, 0xa65f,
+ 0xa662, 0xa672,
+ 0xa67c, 0xa67d,
+ 0xa67f, 0xa697,
+ 0xa6a0, 0xa6f1,
+ 0xa717, 0xa71f,
+ 0xa722, 0xa788,
+ 0xa78b, 0xa78c,
+ 0xa7fb, 0xa827,
+ 0xa840, 0xa873,
+ 0xa880, 0xa8c4,
+ 0xa8d0, 0xa8d9,
+ 0xa8e0, 0xa8f7,
+ 0xa8fb, 0xa8fb,
+ 0xa900, 0xa92d,
+ 0xa930, 0xa953,
+ 0xa960, 0xa97c,
+ 0xa980, 0xa9c0,
+ 0xa9cf, 0xa9d9,
+ 0xaa00, 0xaa36,
+ 0xaa40, 0xaa4d,
+ 0xaa50, 0xaa59,
+ 0xaa60, 0xaa76,
+ 0xaa7a, 0xaa7b,
+ 0xaa80, 0xaac2,
+ 0xaadb, 0xaadd,
+ 0xabc0, 0xabea,
+ 0xabec, 0xabed,
+ 0xabf0, 0xabf9,
+ 0xac00, 0xd7a3,
+ 0xd7b0, 0xd7c6,
+ 0xd7cb, 0xd7fb,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6d,
+ 0xfa70, 0xfad9,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe26,
+ 0xfe33, 0xfe34,
+ 0xfe4d, 0xfe4f,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff10, 0xff19,
+ 0xff21, 0xff3a,
+ 0xff3f, 0xff3f,
+ 0xff41, 0xff5a,
+ 0xff66, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10140, 0x10174,
+ 0x101fd, 0x101fd,
+ 0x10280, 0x1029c,
+ 0x102a0, 0x102d0,
+ 0x10300, 0x1031e,
+ 0x10330, 0x1034a,
+ 0x10380, 0x1039d,
+ 0x103a0, 0x103c3,
+ 0x103c8, 0x103cf,
+ 0x103d1, 0x103d5,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x10855,
+ 0x10900, 0x10915,
+ 0x10920, 0x10939,
+ 0x10a00, 0x10a03,
+ 0x10a05, 0x10a06,
+ 0x10a0c, 0x10a13,
+ 0x10a15, 0x10a17,
+ 0x10a19, 0x10a33,
+ 0x10a38, 0x10a3a,
+ 0x10a3f, 0x10a3f,
+ 0x10a60, 0x10a7c,
+ 0x10b00, 0x10b35,
+ 0x10b40, 0x10b55,
+ 0x10b60, 0x10b72,
+ 0x10c00, 0x10c48,
+ 0x11080, 0x110ba,
+ 0x12000, 0x1236e,
+ 0x12400, 0x12462,
+ 0x13000, 0x1342e,
+ 0x1d165, 0x1d169,
+ 0x1d16d, 0x1d172,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0x1d242, 0x1d244,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a5,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7cb,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
+ 0x2f800, 0x2fa1d,
+ 0xe0100, 0xe01ef,
+}; /* CR_Word */
+
+/* 'Alnum': [[:Alnum:]] */
+static const OnigCodePoint CR_Alnum[] = {
+ 497,
+ 0x0030, 0x0039,
+ 0x0041, 0x005a,
+ 0x0061, 0x007a,
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00ba, 0x00ba,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+ 0x00f8, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x02ec, 0x02ec,
+ 0x02ee, 0x02ee,
+ 0x0345, 0x0345,
+ 0x0370, 0x0374,
+ 0x0376, 0x0377,
+ 0x037a, 0x037d,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03f5,
+ 0x03f7, 0x0481,
+ 0x048a, 0x0525,
+ 0x0531, 0x0556,
+ 0x0559, 0x0559,
+ 0x0561, 0x0587,
+ 0x05b0, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c5,
+ 0x05c7, 0x05c7,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0610, 0x061a,
+ 0x0621, 0x0657,
+ 0x0659, 0x065e,
+ 0x0660, 0x0669,
+ 0x066e, 0x06d3,
+ 0x06d5, 0x06dc,
+ 0x06e1, 0x06e8,
+ 0x06ed, 0x06fc,
+ 0x06ff, 0x06ff,
+ 0x0710, 0x073f,
+ 0x074d, 0x07b1,
+ 0x07c0, 0x07ea,
+ 0x07f4, 0x07f5,
+ 0x07fa, 0x07fa,
+ 0x0800, 0x0817,
+ 0x081a, 0x082c,
+ 0x0900, 0x0939,
+ 0x093d, 0x094c,
+ 0x094e, 0x094e,
+ 0x0950, 0x0950,
+ 0x0955, 0x0955,
+ 0x0958, 0x0963,
+ 0x0966, 0x096f,
+ 0x0971, 0x0972,
+ 0x0979, 0x097f,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bd, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cc,
+ 0x09ce, 0x09ce,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09f1,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4c,
+ 0x0a51, 0x0a51,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a75,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abd, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acc,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3d, 0x0b44,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4c,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b63,
+ 0x0b66, 0x0b6f,
+ 0x0b71, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcc,
+ 0x0bd0, 0x0bd0,
+ 0x0bd7, 0x0bd7,
+ 0x0be6, 0x0bef,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3d, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4c,
+ 0x0c55, 0x0c56,
+ 0x0c58, 0x0c59,
+ 0x0c60, 0x0c63,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbd, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccc,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce3,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3d, 0x0d44,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4c,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d63,
+ 0x0d66, 0x0d6f,
+ 0x0d7a, 0x0d7f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df3,
+ 0x0e01, 0x0e3a,
+ 0x0e40, 0x0e46,
+ 0x0e4d, 0x0e4d,
+ 0x0e50, 0x0e59,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ecd, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f00,
+ 0x0f20, 0x0f29,
+ 0x0f40, 0x0f47,
+ 0x0f49, 0x0f6c,
+ 0x0f71, 0x0f81,
+ 0x0f88, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x1000, 0x1036,
+ 0x1038, 0x1038,
+ 0x103b, 0x1049,
+ 0x1050, 0x1062,
+ 0x1065, 0x1068,
+ 0x106e, 0x1086,
+ 0x108e, 0x108e,
+ 0x1090, 0x1099,
+ 0x109c, 0x109d,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10fa,
+ 0x10fc, 0x10fc,
+ 0x1100, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12d6,
+ 0x12d8, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x135a,
+ 0x135f, 0x135f,
+ 0x1380, 0x138f,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x167f,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x16ee, 0x16f0,
+ 0x1700, 0x170c,
+ 0x170e, 0x1713,
+ 0x1720, 0x1733,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17b3,
+ 0x17b6, 0x17c8,
+ 0x17d7, 0x17d7,
+ 0x17dc, 0x17dc,
+ 0x17e0, 0x17e9,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18aa,
+ 0x18b0, 0x18f5,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x1938,
+ 0x1946, 0x196d,
+ 0x1970, 0x1974,
+ 0x1980, 0x19ab,
+ 0x19b0, 0x19c9,
+ 0x19d0, 0x19da,
+ 0x1a00, 0x1a1b,
+ 0x1a20, 0x1a5e,
+ 0x1a61, 0x1a74,
+ 0x1a80, 0x1a89,
+ 0x1a90, 0x1a99,
+ 0x1aa7, 0x1aa7,
+ 0x1b00, 0x1b33,
+ 0x1b35, 0x1b43,
+ 0x1b45, 0x1b4b,
+ 0x1b50, 0x1b59,
+ 0x1b80, 0x1ba9,
+ 0x1bae, 0x1bb9,
+ 0x1c00, 0x1c35,
+ 0x1c40, 0x1c49,
+ 0x1c4d, 0x1c7d,
+ 0x1ce9, 0x1cec,
+ 0x1cee, 0x1cf2,
+ 0x1d00, 0x1dbf,
+ 0x1e00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x2090, 0x2094,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x212f, 0x2139,
+ 0x213c, 0x213f,
+ 0x2145, 0x2149,
+ 0x214e, 0x214e,
+ 0x2160, 0x2188,
+ 0x24b6, 0x24e9,
+ 0x2c00, 0x2c2e,
+ 0x2c30, 0x2c5e,
+ 0x2c60, 0x2ce4,
+ 0x2ceb, 0x2cee,
+ 0x2d00, 0x2d25,
+ 0x2d30, 0x2d65,
+ 0x2d6f, 0x2d6f,
+ 0x2d80, 0x2d96,
+ 0x2da0, 0x2da6,
+ 0x2da8, 0x2dae,
+ 0x2db0, 0x2db6,
+ 0x2db8, 0x2dbe,
+ 0x2dc0, 0x2dc6,
+ 0x2dc8, 0x2dce,
+ 0x2dd0, 0x2dd6,
+ 0x2dd8, 0x2dde,
+ 0x2de0, 0x2dff,
+ 0x2e2f, 0x2e2f,
+ 0x3005, 0x3007,
+ 0x3021, 0x3029,
+ 0x3031, 0x3035,
+ 0x3038, 0x303c,
+ 0x3041, 0x3096,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30fa,
+ 0x30fc, 0x30ff,
+ 0x3105, 0x312d,
+ 0x3131, 0x318e,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fcb,
+ 0xa000, 0xa48c,
+ 0xa4d0, 0xa4fd,
+ 0xa500, 0xa60c,
+ 0xa610, 0xa62b,
+ 0xa640, 0xa65f,
+ 0xa662, 0xa66e,
+ 0xa67f, 0xa697,
+ 0xa6a0, 0xa6ef,
+ 0xa717, 0xa71f,
+ 0xa722, 0xa788,
+ 0xa78b, 0xa78c,
+ 0xa7fb, 0xa801,
+ 0xa803, 0xa805,
+ 0xa807, 0xa80a,
+ 0xa80c, 0xa827,
+ 0xa840, 0xa873,
+ 0xa880, 0xa8c3,
+ 0xa8d0, 0xa8d9,
+ 0xa8f2, 0xa8f7,
+ 0xa8fb, 0xa8fb,
+ 0xa900, 0xa92a,
+ 0xa930, 0xa952,
+ 0xa960, 0xa97c,
+ 0xa980, 0xa9bf,
+ 0xa9cf, 0xa9d9,
+ 0xaa00, 0xaa36,
+ 0xaa40, 0xaa4d,
+ 0xaa50, 0xaa59,
+ 0xaa60, 0xaa76,
+ 0xaa7a, 0xaa7a,
+ 0xaa80, 0xaabe,
+ 0xaac0, 0xaac0,
+ 0xaac2, 0xaac2,
+ 0xaadb, 0xaadd,
+ 0xabc0, 0xabea,
+ 0xabf0, 0xabf9,
+ 0xac00, 0xd7a3,
+ 0xd7b0, 0xd7c6,
+ 0xd7cb, 0xd7fb,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6d,
+ 0xfa70, 0xfad9,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff10, 0xff19,
+ 0xff21, 0xff3a,
+ 0xff41, 0xff5a,
+ 0xff66, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10140, 0x10174,
+ 0x10280, 0x1029c,
+ 0x102a0, 0x102d0,
+ 0x10300, 0x1031e,
+ 0x10330, 0x1034a,
+ 0x10380, 0x1039d,
+ 0x103a0, 0x103c3,
+ 0x103c8, 0x103cf,
+ 0x103d1, 0x103d5,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x10855,
+ 0x10900, 0x10915,
+ 0x10920, 0x10939,
+ 0x10a00, 0x10a03,
+ 0x10a05, 0x10a06,
+ 0x10a0c, 0x10a13,
+ 0x10a15, 0x10a17,
+ 0x10a19, 0x10a33,
+ 0x10a60, 0x10a7c,
+ 0x10b00, 0x10b35,
+ 0x10b40, 0x10b55,
+ 0x10b60, 0x10b72,
+ 0x10c00, 0x10c48,
+ 0x11082, 0x110b8,
+ 0x12000, 0x1236e,
+ 0x12400, 0x12462,
+ 0x13000, 0x1342e,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a5,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7cb,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2a700, 0x2b734,
+ 0x2f800, 0x2fa1d,
+}; /* CR_Alnum */
+
+/* 'ASCII': [[:ASCII:]] */
+static const OnigCodePoint CR_ASCII[] = {
+ 1,
+ 0x0000, 0x007f,
+}; /* CR_ASCII */
+
+static const OnigCodePoint* const CodeRanges[] = {
+ CR_NEWLINE,
+ CR_Alpha,
+ CR_Blank,
+ CR_Cntrl,
+ CR_Digit,
+ CR_Graph,
+ CR_Lower,
+ CR_Print,
+ CR_Punct,
+ CR_Space,
+ CR_Upper,
+ CR_XDigit,
+ CR_Word,
+ CR_Alnum,
+ CR_ASCII,
+#ifdef USE_UNICODE_PROPERTIES
+#ifdef USE_UNICODE_PROPERTIES
+ CR_Any,
+ CR_Assigned,
+ CR_C,
+ CR_Cc,
+ CR_Cf,
+ CR_Cn,
+ CR_Co,
+ CR_Cs,
+ CR_L,
+ CR_Ll,
+ CR_Lm,
+ CR_Lo,
+ CR_Lt,
+ CR_Lu,
+ CR_M,
+ CR_Mc,
+ CR_Me,
+ CR_Mn,
+ CR_N,
+ CR_Nd,
+ CR_Nl,
+ CR_No,
+ CR_P,
+ CR_Pc,
+ CR_Pd,
+ CR_Pe,
+ CR_Pf,
+ CR_Pi,
+ CR_Po,
+ CR_Ps,
+ CR_S,
+ CR_Sc,
+ CR_Sk,
+ CR_Sm,
+ CR_So,
+ CR_Z,
+ CR_Zl,
+ CR_Zp,
+ CR_Zs,
+ CR_Math,
+ CR_Alphabetic,
+ CR_Lowercase,
+ CR_Uppercase,
+ CR_Cased,
+ CR_Case_Ignorable,
+ CR_Changes_When_Lowercased,
+ CR_Changes_When_Uppercased,
+ CR_Changes_When_Titlecased,
+ CR_Changes_When_Casefolded,
+ CR_Changes_When_Casemapped,
+ CR_ID_Start,
+ CR_ID_Continue,
+ CR_XID_Start,
+ CR_XID_Continue,
+ CR_Default_Ignorable_Code_Point,
+ CR_Grapheme_Extend,
+ CR_Grapheme_Base,
+ CR_Grapheme_Link,
+ CR_Common,
+ CR_Latin,
+ CR_Greek,
+ CR_Cyrillic,
+ CR_Armenian,
+ CR_Hebrew,
+ CR_Arabic,
+ CR_Syriac,
+ CR_Thaana,
+ CR_Devanagari,
+ CR_Bengali,
+ CR_Gurmukhi,
+ CR_Gujarati,
+ CR_Oriya,
+ CR_Tamil,
+ CR_Telugu,
+ CR_Kannada,
+ CR_Malayalam,
+ CR_Sinhala,
+ CR_Thai,
+ CR_Lao,
+ CR_Tibetan,
+ CR_Myanmar,
+ CR_Georgian,
+ CR_Hangul,
+ CR_Ethiopic,
+ CR_Cherokee,
+ CR_Canadian_Aboriginal,
+ CR_Ogham,
+ CR_Runic,
+ CR_Khmer,
+ CR_Mongolian,
+ CR_Hiragana,
+ CR_Katakana,
+ CR_Bopomofo,
+ CR_Han,
+ CR_Yi,
+ CR_Old_Italic,
+ CR_Gothic,
+ CR_Deseret,
+ CR_Inherited,
+ CR_Tagalog,
+ CR_Hanunoo,
+ CR_Buhid,
+ CR_Tagbanwa,
+ CR_Limbu,
+ CR_Tai_Le,
+ CR_Linear_B,
+ CR_Ugaritic,
+ CR_Shavian,
+ CR_Osmanya,
+ CR_Cypriot,
+ CR_Braille,
+ CR_Buginese,
+ CR_Coptic,
+ CR_New_Tai_Lue,
+ CR_Glagolitic,
+ CR_Tifinagh,
+ CR_Syloti_Nagri,
+ CR_Old_Persian,
+ CR_Kharoshthi,
+ CR_Balinese,
+ CR_Cuneiform,
+ CR_Phoenician,
+ CR_Phags_Pa,
+ CR_Nko,
+ CR_Sundanese,
+ CR_Lepcha,
+ CR_Ol_Chiki,
+ CR_Vai,
+ CR_Saurashtra,
+ CR_Kayah_Li,
+ CR_Rejang,
+ CR_Lycian,
+ CR_Carian,
+ CR_Lydian,
+ CR_Cham,
+ CR_Tai_Tham,
+ CR_Tai_Viet,
+ CR_Avestan,
+ CR_Egyptian_Hieroglyphs,
+ CR_Samaritan,
+ CR_Lisu,
+ CR_Bamum,
+ CR_Javanese,
+ CR_Meetei_Mayek,
+ CR_Imperial_Aramaic,
+ CR_Old_South_Arabian,
+ CR_Inscriptional_Parthian,
+ CR_Inscriptional_Pahlavi,
+ CR_Old_Turkic,
+ CR_Kaithi,
+ CR_White_Space,
+ CR_Bidi_Control,
+ CR_Join_Control,
+ CR_Dash,
+ CR_Hyphen,
+ CR_Quotation_Mark,
+ CR_Terminal_Punctuation,
+ CR_Other_Math,
+ CR_Hex_Digit,
+ CR_ASCII_Hex_Digit,
+ CR_Other_Alphabetic,
+ CR_Ideographic,
+ CR_Diacritic,
+ CR_Extender,
+ CR_Other_Lowercase,
+ CR_Other_Uppercase,
+ CR_Noncharacter_Code_Point,
+ CR_Other_Grapheme_Extend,
+ CR_IDS_Binary_Operator,
+ CR_IDS_Trinary_Operator,
+ CR_Radical,
+ CR_Unified_Ideograph,
+ CR_Other_Default_Ignorable_Code_Point,
+ CR_Deprecated,
+ CR_Soft_Dotted,
+ CR_Logical_Order_Exception,
+ CR_Other_ID_Start,
+ CR_Other_ID_Continue,
+ CR_STerm,
+ CR_Variation_Selector,
+ CR_Pattern_White_Space,
+ CR_Pattern_Syntax,
+#endif /* USE_UNICODE_PROPERTIES */
+#endif /* USE_UNICODE_PROPERTIES */
+};
+struct uniname2ctype_struct {
+ int name, ctype;
+};
+
+static const struct uniname2ctype_struct *uniname2ctype_p(const char *, unsigned int);
+
+#ifndef USE_UNICODE_PROPERTIES
+#define TOTAL_KEYWORDS 15
+#define MIN_WORD_LENGTH 4
+#define MAX_WORD_LENGTH 7
+#define MIN_HASH_VALUE 7
+#define MAX_HASH_VALUE 21
+/* maximum key range = 15, duplicates = 0 */
+#else /* USE_UNICODE_PROPERTIES */
+#define TOTAL_KEYWORDS 367
+#define MIN_WORD_LENGTH 1
+#define MAX_WORD_LENGTH 30
+#define MIN_HASH_VALUE 3
+#define MAX_HASH_VALUE 1751
+/* maximum key range = 1749, duplicates = 0 */
+#endif /* USE_UNICODE_PROPERTIES */
+
+#ifdef __GNUC__
+__inline
+#else
+#ifdef __cplusplus
+inline
+#endif
+#endif
+static unsigned int
+uniname2ctype_hash (str, len)
+ register const char *str;
+ register unsigned int len;
+{
+#ifndef USE_UNICODE_PROPERTIES
+ static const unsigned char asso_values[] =
+#else /* USE_UNICODE_PROPERTIES */
+ static const unsigned short asso_values[] =
+#endif /* USE_UNICODE_PROPERTIES */
+ {
+#ifndef USE_UNICODE_PROPERTIES
+ 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
+ 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
+ 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
+ 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
+ 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
+ 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
+ 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
+ 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
+ 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
+ 22, 22, 22, 22, 22, 22, 22, 3, 13, 6,
+ 4, 22, 22, 11, 22, 1, 22, 22, 10, 22,
+ 2, 22, 1, 22, 10, 8, 4, 7, 22, 3,
+ 4, 22, 22, 22, 22, 22, 22, 22
+#else /* USE_UNICODE_PROPERTIES */
+ 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752,
+ 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752,
+ 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752,
+ 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752,
+ 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752,
+ 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752,
+ 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752,
+ 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752,
+ 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752, 1752,
+ 1752, 1752, 1752, 1752, 1752, 1752, 1752, 7, 419, 1,
+ 128, 38, 100, 186, 46, 11, 226, 463, 4, 317,
+ 6, 2, 265, 5, 18, 112, 30, 289, 164, 384,
+ 368, 579, 10, 1752, 1752, 1752, 1752, 1752
+#endif /* USE_UNICODE_PROPERTIES */
+ };
+#ifndef USE_UNICODE_PROPERTIES
+ return len + asso_values[(unsigned char)str[2]] + asso_values[(unsigned char)str[0]];
+#else /* USE_UNICODE_PROPERTIES */
+ register int hval = len;
+
+ switch (hval)
+ {
+ default:
+ hval += asso_values[(unsigned char)str[15]];
+ /*FALLTHROUGH*/
+ case 15:
+ case 14:
+ case 13:
+ case 12:
+ hval += asso_values[(unsigned char)str[11]];
+ /*FALLTHROUGH*/
+ case 11:
+ case 10:
+ case 9:
+ case 8:
+ case 7:
+ case 6:
+ hval += asso_values[(unsigned char)str[5]];
+ /*FALLTHROUGH*/
+ case 5:
+ case 4:
+ case 3:
+ hval += asso_values[(unsigned char)str[2]];
+ /*FALLTHROUGH*/
+ case 2:
+ hval += asso_values[(unsigned char)str[1]];
+ /*FALLTHROUGH*/
+ case 1:
+ hval += asso_values[(unsigned char)str[0]];
+ break;
+ }
+ return hval + asso_values[(unsigned char)str[len - 1]];
+#endif /* USE_UNICODE_PROPERTIES */
+}
+
+struct uniname2ctype_pool_t
+ {
+#ifndef USE_UNICODE_PROPERTIES
+ char uniname2ctype_pool_str7[sizeof("print")];
+ char uniname2ctype_pool_str8[sizeof("punct")];
+ char uniname2ctype_pool_str9[sizeof("alpha")];
+ char uniname2ctype_pool_str10[sizeof("alnum")];
+ char uniname2ctype_pool_str11[sizeof("xdigit")];
+ char uniname2ctype_pool_str12[sizeof("newline")];
+ char uniname2ctype_pool_str13[sizeof("upper")];
+ char uniname2ctype_pool_str14[sizeof("ascii")];
+ char uniname2ctype_pool_str15[sizeof("cntrl")];
+ char uniname2ctype_pool_str16[sizeof("space")];
+ char uniname2ctype_pool_str17[sizeof("word")];
+ char uniname2ctype_pool_str18[sizeof("lower")];
+ char uniname2ctype_pool_str19[sizeof("graph")];
+ char uniname2ctype_pool_str20[sizeof("digit")];
+ char uniname2ctype_pool_str21[sizeof("blank")];
+#else /* USE_UNICODE_PROPERTIES */
+ char uniname2ctype_pool_str3[sizeof("c")];
+ char uniname2ctype_pool_str5[sizeof("cc")];
+ char uniname2ctype_pool_str7[sizeof("co")];
+ char uniname2ctype_pool_str9[sizeof("l")];
+ char uniname2ctype_pool_str10[sizeof("lo")];
+ char uniname2ctype_pool_str12[sizeof("no")];
+ char uniname2ctype_pool_str13[sizeof("n")];
+ char uniname2ctype_pool_str14[sizeof("ll")];
+ char uniname2ctype_pool_str15[sizeof("cn")];
+ char uniname2ctype_pool_str16[sizeof("nl")];
+ char uniname2ctype_pool_str18[sizeof("lao")];
+ char uniname2ctype_pool_str19[sizeof("laoo")];
+ char uniname2ctype_pool_str20[sizeof("zl")];
+ char uniname2ctype_pool_str21[sizeof("z")];
+ char uniname2ctype_pool_str22[sizeof("control")];
+ char uniname2ctype_pool_str24[sizeof("qaac")];
+ char uniname2ctype_pool_str25[sizeof("ci")];
+ char uniname2ctype_pool_str28[sizeof("lana")];
+ char uniname2ctype_pool_str33[sizeof("oalpha")];
+ char uniname2ctype_pool_str34[sizeof("qaai")];
+ char uniname2ctype_pool_str40[sizeof("arabic")];
+ char uniname2ctype_pool_str41[sizeof("cari")];
+ char uniname2ctype_pool_str43[sizeof("oriya")];
+ char uniname2ctype_pool_str44[sizeof("carian")];
+ char uniname2ctype_pool_str46[sizeof("cntrl")];
+ char uniname2ctype_pool_str49[sizeof("connectorpunctuation")];
+ char uniname2ctype_pool_str50[sizeof("olower")];
+ char uniname2ctype_pool_str51[sizeof("latn")];
+ char uniname2ctype_pool_str52[sizeof("latin")];
+ char uniname2ctype_pool_str56[sizeof("ital")];
+ char uniname2ctype_pool_str65[sizeof("hano")];
+ char uniname2ctype_pool_str66[sizeof("lt")];
+ char uniname2ctype_pool_str68[sizeof("han")];
+ char uniname2ctype_pool_str70[sizeof("hanunoo")];
+ char uniname2ctype_pool_str71[sizeof("canadianaboriginal")];
+ char uniname2ctype_pool_str73[sizeof("hangul")];
+ char uniname2ctype_pool_str74[sizeof("hani")];
+ char uniname2ctype_pool_str76[sizeof("nchar")];
+ char uniname2ctype_pool_str77[sizeof("zinh")];
+ char uniname2ctype_pool_str83[sizeof("tale")];
+ char uniname2ctype_pool_str85[sizeof("loe")];
+ char uniname2ctype_pool_str86[sizeof("hira")];
+ char uniname2ctype_pool_str91[sizeof("taile")];
+ char uniname2ctype_pool_str92[sizeof("lineseparator")];
+ char uniname2ctype_pool_str94[sizeof("thaa")];
+ char uniname2ctype_pool_str97[sizeof("hiragana")];
+ char uniname2ctype_pool_str98[sizeof("thai")];
+ char uniname2ctype_pool_str100[sizeof("initialpunctuation")];
+ char uniname2ctype_pool_str101[sizeof("other")];
+ char uniname2ctype_pool_str103[sizeof("thaana")];
+ char uniname2ctype_pool_str107[sizeof("cher")];
+ char uniname2ctype_pool_str111[sizeof("otherletter")];
+ char uniname2ctype_pool_str113[sizeof("othernumber")];
+ char uniname2ctype_pool_str114[sizeof("letter")];
+ char uniname2ctype_pool_str116[sizeof("sc")];
+ char uniname2ctype_pool_str118[sizeof("so")];
+ char uniname2ctype_pool_str123[sizeof("taiviet")];
+ char uniname2ctype_pool_str128[sizeof("noncharactercodepoint")];
+ char uniname2ctype_pool_str129[sizeof("ethi")];
+ char uniname2ctype_pool_str130[sizeof("cans")];
+ char uniname2ctype_pool_str135[sizeof("titlecaseletter")];
+ char uniname2ctype_pool_str136[sizeof("ascii")];
+ char uniname2ctype_pool_str138[sizeof("letternumber")];
+ char uniname2ctype_pool_str139[sizeof("otheralphabetic")];
+ char uniname2ctype_pool_str141[sizeof("otherlowercase")];
+ char uniname2ctype_pool_str144[sizeof("idc")];
+ char uniname2ctype_pool_str146[sizeof("oidc")];
+ char uniname2ctype_pool_str147[sizeof("sinhala")];
+ char uniname2ctype_pool_str148[sizeof("terminalpunctuation")];
+ char uniname2ctype_pool_str151[sizeof("olditalic")];
+ char uniname2ctype_pool_str152[sizeof("di")];
+ char uniname2ctype_pool_str153[sizeof("otheridcontinue")];
+ char uniname2ctype_pool_str155[sizeof("odi")];
+ char uniname2ctype_pool_str156[sizeof("dia")];
+ char uniname2ctype_pool_str161[sizeof("otheridstart")];
+ char uniname2ctype_pool_str162[sizeof("oldturkic")];
+ char uniname2ctype_pool_str167[sizeof("diacritic")];
+ char uniname2ctype_pool_str168[sizeof("oldpersian")];
+ char uniname2ctype_pool_str171[sizeof("radical")];
+ char uniname2ctype_pool_str179[sizeof("sinh")];
+ char uniname2ctype_pool_str183[sizeof("ideo")];
+ char uniname2ctype_pool_str185[sizeof("shavian")];
+ char uniname2ctype_pool_str186[sizeof("format")];
+ char uniname2ctype_pool_str192[sizeof("inscriptionalparthian")];
+ char uniname2ctype_pool_str196[sizeof("vai")];
+ char uniname2ctype_pool_str197[sizeof("vaii")];
+ char uniname2ctype_pool_str202[sizeof("tifinagh")];
+ char uniname2ctype_pool_str203[sizeof("cf")];
+ char uniname2ctype_pool_str205[sizeof("othersymbol")];
+ char uniname2ctype_pool_str207[sizeof("ideographic")];
+ char uniname2ctype_pool_str211[sizeof("inherited")];
+ char uniname2ctype_pool_str212[sizeof("glagolitic")];
+ char uniname2ctype_pool_str218[sizeof("idcontinue")];
+ char uniname2ctype_pool_str220[sizeof("asciihexdigit")];
+ char uniname2ctype_pool_str224[sizeof("inscriptionalpahlavi")];
+ char uniname2ctype_pool_str225[sizeof("s")];
+ char uniname2ctype_pool_str226[sizeof("gothic")];
+ char uniname2ctype_pool_str227[sizeof("cs")];
+ char uniname2ctype_pool_str229[sizeof("avestan")];
+ char uniname2ctype_pool_str235[sizeof("tavt")];
+ char uniname2ctype_pool_str236[sizeof("zs")];
+ char uniname2ctype_pool_str243[sizeof("decimalnumber")];
+ char uniname2ctype_pool_str244[sizeof("tagbanwa")];
+ char uniname2ctype_pool_str245[sizeof("joinc")];
+ char uniname2ctype_pool_str248[sizeof("geor")];
+ char uniname2ctype_pool_str249[sizeof("hang")];
+ char uniname2ctype_pool_str251[sizeof("georgian")];
+ char uniname2ctype_pool_str253[sizeof("cased")];
+ char uniname2ctype_pool_str256[sizeof("joincontrol")];
+ char uniname2ctype_pool_str257[sizeof("oids")];
+ char uniname2ctype_pool_str260[sizeof("variationselector")];
+ char uniname2ctype_pool_str262[sizeof("graph")];
+ char uniname2ctype_pool_str263[sizeof("changeswhenlowercased")];
+ char uniname2ctype_pool_str264[sizeof("nd")];
+ char uniname2ctype_pool_str268[sizeof("goth")];
+ char uniname2ctype_pool_str269[sizeof("pc")];
+ char uniname2ctype_pool_str271[sizeof("po")];
+ char uniname2ctype_pool_str272[sizeof("ogrext")];
+ char uniname2ctype_pool_str276[sizeof("coptic")];
+ char uniname2ctype_pool_str277[sizeof("grext")];
+ char uniname2ctype_pool_str282[sizeof("logicalorderexception")];
+ char uniname2ctype_pool_str285[sizeof("idst")];
+ char uniname2ctype_pool_str288[sizeof("alpha")];
+ char uniname2ctype_pool_str289[sizeof("pi")];
+ char uniname2ctype_pool_str292[sizeof("dsrt")];
+ char uniname2ctype_pool_str297[sizeof("dash")];
+ char uniname2ctype_pool_str298[sizeof("otherdefaultignorablecodepoint")];
+ char uniname2ctype_pool_str302[sizeof("copt")];
+ char uniname2ctype_pool_str306[sizeof("idstart")];
+ char uniname2ctype_pool_str307[sizeof("closepunctuation")];
+ char uniname2ctype_pool_str309[sizeof("changeswhentitlecased")];
+ char uniname2ctype_pool_str312[sizeof("lepc")];
+ char uniname2ctype_pool_str317[sizeof("avst")];
+ char uniname2ctype_pool_str318[sizeof("cprt")];
+ char uniname2ctype_pool_str319[sizeof("runic")];
+ char uniname2ctype_pool_str320[sizeof("patsyn")];
+ char uniname2ctype_pool_str321[sizeof("mc")];
+ char uniname2ctype_pool_str326[sizeof("tfng")];
+ char uniname2ctype_pool_str327[sizeof("lepcha")];
+ char uniname2ctype_pool_str328[sizeof("prti")];
+ char uniname2ctype_pool_str329[sizeof("print")];
+ char uniname2ctype_pool_str330[sizeof("phli")];
+ char uniname2ctype_pool_str331[sizeof("mn")];
+ char uniname2ctype_pool_str332[sizeof("idsbinaryoperator")];
+ char uniname2ctype_pool_str334[sizeof("talu")];
+ char uniname2ctype_pool_str335[sizeof("runr")];
+ char uniname2ctype_pool_str337[sizeof("graphemebase")];
+ char uniname2ctype_pool_str338[sizeof("common")];
+ char uniname2ctype_pool_str339[sizeof("alnum")];
+ char uniname2ctype_pool_str340[sizeof("phoenician")];
+ char uniname2ctype_pool_str341[sizeof("deva")];
+ char uniname2ctype_pool_str342[sizeof("changeswhencasefolded")];
+ char uniname2ctype_pool_str343[sizeof("pe")];
+ char uniname2ctype_pool_str344[sizeof("mongolian")];
+ char uniname2ctype_pool_str352[sizeof("armn")];
+ char uniname2ctype_pool_str353[sizeof("deseret")];
+ char uniname2ctype_pool_str354[sizeof("softdotted")];
+ char uniname2ctype_pool_str357[sizeof("armi")];
+ char uniname2ctype_pool_str358[sizeof("devanagari")];
+ char uniname2ctype_pool_str360[sizeof("digit")];
+ char uniname2ctype_pool_str361[sizeof("caseignorable")];
+ char uniname2ctype_pool_str362[sizeof("taml")];
+ char uniname2ctype_pool_str363[sizeof("tamil")];
+ char uniname2ctype_pool_str365[sizeof("telu")];
+ char uniname2ctype_pool_str366[sizeof("ids")];
+ char uniname2ctype_pool_str367[sizeof("armenian")];
+ char uniname2ctype_pool_str370[sizeof("sd")];
+ char uniname2ctype_pool_str372[sizeof("privateuse")];
+ char uniname2ctype_pool_str373[sizeof("assigned")];
+ char uniname2ctype_pool_str375[sizeof("cham")];
+ char uniname2ctype_pool_str377[sizeof("omath")];
+ char uniname2ctype_pool_str378[sizeof("otherpunctuation")];
+ char uniname2ctype_pool_str379[sizeof("taitham")];
+ char uniname2ctype_pool_str381[sizeof("defaultignorablecodepoint")];
+ char uniname2ctype_pool_str387[sizeof("glag")];
+ char uniname2ctype_pool_str388[sizeof("ethiopic")];
+ char uniname2ctype_pool_str390[sizeof("vs")];
+ char uniname2ctype_pool_str395[sizeof("me")];
+ char uniname2ctype_pool_str396[sizeof("cwl")];
+ char uniname2ctype_pool_str400[sizeof("mtei")];
+ char uniname2ctype_pool_str404[sizeof("math")];
+ char uniname2ctype_pool_str407[sizeof("term")];
+ char uniname2ctype_pool_str408[sizeof("java")];
+ char uniname2ctype_pool_str410[sizeof("tglg")];
+ char uniname2ctype_pool_str413[sizeof("lower")];
+ char uniname2ctype_pool_str414[sizeof("patternwhitespace")];
+ char uniname2ctype_pool_str417[sizeof("finalpunctuation")];
+ char uniname2ctype_pool_str418[sizeof("tagalog")];
+ char uniname2ctype_pool_str419[sizeof("patws")];
+ char uniname2ctype_pool_str420[sizeof("lisu")];
+ char uniname2ctype_pool_str426[sizeof("otheruppercase")];
+ char uniname2ctype_pool_str427[sizeof("space")];
+ char uniname2ctype_pool_str429[sizeof("graphemeextend")];
+ char uniname2ctype_pool_str430[sizeof("saur")];
+ char uniname2ctype_pool_str435[sizeof("uideo")];
+ char uniname2ctype_pool_str438[sizeof("lowercase")];
+ char uniname2ctype_pool_str440[sizeof("rjng")];
+ char uniname2ctype_pool_str442[sizeof("osma")];
+ char uniname2ctype_pool_str444[sizeof("linb")];
+ char uniname2ctype_pool_str445[sizeof("bali")];
+ char uniname2ctype_pool_str448[sizeof("cwt")];
+ char uniname2ctype_pool_str449[sizeof("separator")];
+ char uniname2ctype_pool_str450[sizeof("othermath")];
+ char uniname2ctype_pool_str451[sizeof("unassigned")];
+ char uniname2ctype_pool_str454[sizeof("lowercaseletter")];
+ char uniname2ctype_pool_str455[sizeof("arab")];
+ char uniname2ctype_pool_str458[sizeof("samr")];
+ char uniname2ctype_pool_str459[sizeof("brai")];
+ char uniname2ctype_pool_str460[sizeof("sundanese")];
+ char uniname2ctype_pool_str462[sizeof("samaritan")];
+ char uniname2ctype_pool_str463[sizeof("ahex")];
+ char uniname2ctype_pool_str465[sizeof("linearb")];
+ char uniname2ctype_pool_str467[sizeof("pf")];
+ char uniname2ctype_pool_str469[sizeof("ext")];
+ char uniname2ctype_pool_str474[sizeof("olck")];
+ char uniname2ctype_pool_str476[sizeof("nko")];
+ char uniname2ctype_pool_str477[sizeof("nkoo")];
+ char uniname2ctype_pool_str479[sizeof("newline")];
+ char uniname2ctype_pool_str480[sizeof("tibetan")];
+ char uniname2ctype_pool_str481[sizeof("javanese")];
+ char uniname2ctype_pool_str485[sizeof("bengali")];
+ char uniname2ctype_pool_str486[sizeof("newtailue")];
+ char uniname2ctype_pool_str487[sizeof("kana")];
+ char uniname2ctype_pool_str488[sizeof("olchiki")];
+ char uniname2ctype_pool_str489[sizeof("kali")];
+ char uniname2ctype_pool_str490[sizeof("cwcf")];
+ char uniname2ctype_pool_str491[sizeof("ps")];
+ char uniname2ctype_pool_str493[sizeof("braille")];
+ char uniname2ctype_pool_str494[sizeof("tibt")];
+ char uniname2ctype_pool_str502[sizeof("sterm")];
+ char uniname2ctype_pool_str504[sizeof("ugar")];
+ char uniname2ctype_pool_str505[sizeof("nonspacingmark")];
+ char uniname2ctype_pool_str508[sizeof("phag")];
+ char uniname2ctype_pool_str509[sizeof("kaithi")];
+ char uniname2ctype_pool_str512[sizeof("xidc")];
+ char uniname2ctype_pool_str514[sizeof("balinese")];
+ char uniname2ctype_pool_str515[sizeof("mong")];
+ char uniname2ctype_pool_str516[sizeof("ogam")];
+ char uniname2ctype_pool_str520[sizeof("modifierletter")];
+ char uniname2ctype_pool_str521[sizeof("ugaritic")];
+ char uniname2ctype_pool_str522[sizeof("katakana")];
+ char uniname2ctype_pool_str523[sizeof("pd")];
+ char uniname2ctype_pool_str525[sizeof("hebr")];
+ char uniname2ctype_pool_str531[sizeof("p")];
+ char uniname2ctype_pool_str533[sizeof("orkh")];
+ char uniname2ctype_pool_str536[sizeof("word")];
+ char uniname2ctype_pool_str537[sizeof("saurashtra")];
+ char uniname2ctype_pool_str538[sizeof("khar")];
+ char uniname2ctype_pool_str539[sizeof("sund")];
+ char uniname2ctype_pool_str542[sizeof("zp")];
+ char uniname2ctype_pool_str548[sizeof("changeswhenuppercased")];
+ char uniname2ctype_pool_str552[sizeof("xidstart")];
+ char uniname2ctype_pool_str553[sizeof("shaw")];
+ char uniname2ctype_pool_str554[sizeof("kthi")];
+ char uniname2ctype_pool_str556[sizeof("ogham")];
+ char uniname2ctype_pool_str558[sizeof("spaceseparator")];
+ char uniname2ctype_pool_str559[sizeof("changeswhencasemapped")];
+ char uniname2ctype_pool_str560[sizeof("sarb")];
+ char uniname2ctype_pool_str562[sizeof("xidcontinue")];
+ char uniname2ctype_pool_str564[sizeof("bidic")];
+ char uniname2ctype_pool_str570[sizeof("deprecated")];
+ char uniname2ctype_pool_str573[sizeof("xdigit")];
+ char uniname2ctype_pool_str575[sizeof("bidicontrol")];
+ char uniname2ctype_pool_str584[sizeof("lu")];
+ char uniname2ctype_pool_str587[sizeof("dashpunctuation")];
+ char uniname2ctype_pool_str590[sizeof("extender")];
+ char uniname2ctype_pool_str593[sizeof("idstrinaryoperator")];
+ char uniname2ctype_pool_str594[sizeof("cherokee")];
+ char uniname2ctype_pool_str595[sizeof("punct")];
+ char uniname2ctype_pool_str597[sizeof("phagspa")];
+ char uniname2ctype_pool_str598[sizeof("oupper")];
+ char uniname2ctype_pool_str599[sizeof("lyci")];
+ char uniname2ctype_pool_str601[sizeof("whitespace")];
+ char uniname2ctype_pool_str602[sizeof("lycian")];
+ char uniname2ctype_pool_str603[sizeof("yi")];
+ char uniname2ctype_pool_str606[sizeof("cyrl")];
+ char uniname2ctype_pool_str608[sizeof("knda")];
+ char uniname2ctype_pool_str610[sizeof("orya")];
+ char uniname2ctype_pool_str611[sizeof("cyrillic")];
+ char uniname2ctype_pool_str616[sizeof("yiii")];
+ char uniname2ctype_pool_str618[sizeof("kannada")];
+ char uniname2ctype_pool_str623[sizeof("xids")];
+ char uniname2ctype_pool_str626[sizeof("limbu")];
+ char uniname2ctype_pool_str635[sizeof("m")];
+ char uniname2ctype_pool_str638[sizeof("unifiedideograph")];
+ char uniname2ctype_pool_str639[sizeof("paragraphseparator")];
+ char uniname2ctype_pool_str640[sizeof("lm")];
+ char uniname2ctype_pool_str645[sizeof("openpunctuation")];
+ char uniname2ctype_pool_str646[sizeof("tagb")];
+ char uniname2ctype_pool_str649[sizeof("kharoshthi")];
+ char uniname2ctype_pool_str651[sizeof("enclosingmark")];
+ char uniname2ctype_pool_str652[sizeof("surrogate")];
+ char uniname2ctype_pool_str653[sizeof("beng")];
+ char uniname2ctype_pool_str654[sizeof("number")];
+ char uniname2ctype_pool_str656[sizeof("telugu")];
+ char uniname2ctype_pool_str660[sizeof("rejang")];
+ char uniname2ctype_pool_str661[sizeof("malayalam")];
+ char uniname2ctype_pool_str674[sizeof("idsb")];
+ char uniname2ctype_pool_str676[sizeof("hexdigit")];
+ char uniname2ctype_pool_str677[sizeof("xpeo")];
+ char uniname2ctype_pool_str689[sizeof("phnx")];
+ char uniname2ctype_pool_str692[sizeof("bopo")];
+ char uniname2ctype_pool_str698[sizeof("bopomofo")];
+ char uniname2ctype_pool_str699[sizeof("dep")];
+ char uniname2ctype_pool_str701[sizeof("sylo")];
+ char uniname2ctype_pool_str705[sizeof("grbase")];
+ char uniname2ctype_pool_str706[sizeof("alphabetic")];
+ char uniname2ctype_pool_str707[sizeof("cwcm")];
+ char uniname2ctype_pool_str708[sizeof("patternsyntax")];
+ char uniname2ctype_pool_str709[sizeof("grek")];
+ char uniname2ctype_pool_str710[sizeof("greek")];
+ char uniname2ctype_pool_str714[sizeof("syrc")];
+ char uniname2ctype_pool_str717[sizeof("syriac")];
+ char uniname2ctype_pool_str722[sizeof("cuneiform")];
+ char uniname2ctype_pool_str723[sizeof("gujr")];
+ char uniname2ctype_pool_str726[sizeof("lydi")];
+ char uniname2ctype_pool_str727[sizeof("gujarati")];
+ char uniname2ctype_pool_str728[sizeof("sylotinagri")];
+ char uniname2ctype_pool_str729[sizeof("lydian")];
+ char uniname2ctype_pool_str748[sizeof("sm")];
+ char uniname2ctype_pool_str751[sizeof("currencysymbol")];
+ char uniname2ctype_pool_str755[sizeof("limb")];
+ char uniname2ctype_pool_str758[sizeof("othergraphemeextend")];
+ char uniname2ctype_pool_str786[sizeof("guru")];
+ char uniname2ctype_pool_str797[sizeof("qmark")];
+ char uniname2ctype_pool_str809[sizeof("mark")];
+ char uniname2ctype_pool_str820[sizeof("quotationmark")];
+ char uniname2ctype_pool_str823[sizeof("hex")];
+ char uniname2ctype_pool_str842[sizeof("upper")];
+ char uniname2ctype_pool_str843[sizeof("wspace")];
+ char uniname2ctype_pool_str848[sizeof("khmr")];
+ char uniname2ctype_pool_str849[sizeof("khmer")];
+ char uniname2ctype_pool_str863[sizeof("oldsoutharabian")];
+ char uniname2ctype_pool_str864[sizeof("spacingmark")];
+ char uniname2ctype_pool_str866[sizeof("punctuation")];
+ char uniname2ctype_pool_str867[sizeof("uppercase")];
+ char uniname2ctype_pool_str878[sizeof("meeteimayek")];
+ char uniname2ctype_pool_str883[sizeof("uppercaseletter")];
+ char uniname2ctype_pool_str884[sizeof("cypriot")];
+ char uniname2ctype_pool_str886[sizeof("buhd")];
+ char uniname2ctype_pool_str887[sizeof("buhid")];
+ char uniname2ctype_pool_str895[sizeof("modifiersymbol")];
+ char uniname2ctype_pool_str898[sizeof("blank")];
+ char uniname2ctype_pool_str908[sizeof("hyphen")];
+ char uniname2ctype_pool_str909[sizeof("bugi")];
+ char uniname2ctype_pool_str935[sizeof("myanmar")];
+ char uniname2ctype_pool_str937[sizeof("imperialaramaic")];
+ char uniname2ctype_pool_str947[sizeof("mathsymbol")];
+ char uniname2ctype_pool_str966[sizeof("cwu")];
+ char uniname2ctype_pool_str975[sizeof("gurmukhi")];
+ char uniname2ctype_pool_str978[sizeof("buginese")];
+ char uniname2ctype_pool_str1022[sizeof("symbol")];
+ char uniname2ctype_pool_str1024[sizeof("osmanya")];
+ char uniname2ctype_pool_str1036[sizeof("bamu")];
+ char uniname2ctype_pool_str1040[sizeof("sk")];
+ char uniname2ctype_pool_str1065[sizeof("bamum")];
+ char uniname2ctype_pool_str1071[sizeof("kayahli")];
+ char uniname2ctype_pool_str1072[sizeof("egyp")];
+ char uniname2ctype_pool_str1140[sizeof("grlink")];
+ char uniname2ctype_pool_str1141[sizeof("xsux")];
+ char uniname2ctype_pool_str1174[sizeof("any")];
+ char uniname2ctype_pool_str1187[sizeof("graphemelink")];
+ char uniname2ctype_pool_str1221[sizeof("mlym")];
+ char uniname2ctype_pool_str1235[sizeof("mymr")];
+ char uniname2ctype_pool_str1277[sizeof("hebrew")];
+ char uniname2ctype_pool_str1542[sizeof("egyptianhieroglyphs")];
+ char uniname2ctype_pool_str1751[sizeof("zyyy")];
+#endif /* USE_UNICODE_PROPERTIES */
+ };
+static const struct uniname2ctype_pool_t uniname2ctype_pool_contents =
+ {
+#ifndef USE_UNICODE_PROPERTIES
+ "print",
+ "punct",
+#else /* USE_UNICODE_PROPERTIES */
+ "c",
+ "cc",
+ "co",
+ "l",
+ "lo",
+ "no",
+ "n",
+ "ll",
+ "cn",
+ "nl",
+ "lao",
+ "laoo",
+ "zl",
+ "z",
+ "control",
+ "qaac",
+ "ci",
+ "lana",
+ "oalpha",
+ "qaai",
+ "arabic",
+ "cari",
+ "oriya",
+ "carian",
+ "cntrl",
+ "connectorpunctuation",
+ "olower",
+ "latn",
+ "latin",
+ "ital",
+ "hano",
+ "lt",
+ "han",
+ "hanunoo",
+ "canadianaboriginal",
+ "hangul",
+ "hani",
+ "nchar",
+ "zinh",
+ "tale",
+ "loe",
+ "hira",
+ "taile",
+ "lineseparator",
+ "thaa",
+ "hiragana",
+ "thai",
+ "initialpunctuation",
+ "other",
+ "thaana",
+ "cher",
+ "otherletter",
+ "othernumber",
+ "letter",
+ "sc",
+ "so",
+ "taiviet",
+ "noncharactercodepoint",
+ "ethi",
+ "cans",
+ "titlecaseletter",
+ "ascii",
+ "letternumber",
+ "otheralphabetic",
+ "otherlowercase",
+ "idc",
+ "oidc",
+ "sinhala",
+ "terminalpunctuation",
+ "olditalic",
+ "di",
+ "otheridcontinue",
+ "odi",
+ "dia",
+ "otheridstart",
+ "oldturkic",
+ "diacritic",
+ "oldpersian",
+ "radical",
+ "sinh",
+ "ideo",
+ "shavian",
+ "format",
+ "inscriptionalparthian",
+ "vai",
+ "vaii",
+ "tifinagh",
+ "cf",
+ "othersymbol",
+ "ideographic",
+ "inherited",
+ "glagolitic",
+ "idcontinue",
+ "asciihexdigit",
+ "inscriptionalpahlavi",
+ "s",
+ "gothic",
+ "cs",
+ "avestan",
+ "tavt",
+ "zs",
+ "decimalnumber",
+ "tagbanwa",
+ "joinc",
+ "geor",
+ "hang",
+ "georgian",
+ "cased",
+ "joincontrol",
+ "oids",
+ "variationselector",
+ "graph",
+ "changeswhenlowercased",
+ "nd",
+ "goth",
+ "pc",
+ "po",
+ "ogrext",
+ "coptic",
+ "grext",
+ "logicalorderexception",
+ "idst",
+#endif /* USE_UNICODE_PROPERTIES */
+ "alpha",
+#ifdef USE_UNICODE_PROPERTIES
+ "pi",
+ "dsrt",
+ "dash",
+ "otherdefaultignorablecodepoint",
+ "copt",
+ "idstart",
+ "closepunctuation",
+ "changeswhentitlecased",
+ "lepc",
+ "avst",
+ "cprt",
+ "runic",
+ "patsyn",
+ "mc",
+ "tfng",
+ "lepcha",
+ "prti",
+ "print",
+ "phli",
+ "mn",
+ "idsbinaryoperator",
+ "talu",
+ "runr",
+ "graphemebase",
+ "common",
+#endif /* USE_UNICODE_PROPERTIES */
+ "alnum",
+#ifndef USE_UNICODE_PROPERTIES
+ "xdigit",
+ "newline",
+ "upper",
+ "ascii",
+ "cntrl",
+#else /* USE_UNICODE_PROPERTIES */
+ "phoenician",
+ "deva",
+ "changeswhencasefolded",
+ "pe",
+ "mongolian",
+ "armn",
+ "deseret",
+ "softdotted",
+ "armi",
+ "devanagari",
+ "digit",
+ "caseignorable",
+ "taml",
+ "tamil",
+ "telu",
+ "ids",
+ "armenian",
+ "sd",
+ "privateuse",
+ "assigned",
+ "cham",
+ "omath",
+ "otherpunctuation",
+ "taitham",
+ "defaultignorablecodepoint",
+ "glag",
+ "ethiopic",
+ "vs",
+ "me",
+ "cwl",
+ "mtei",
+ "math",
+ "term",
+ "java",
+ "tglg",
+ "lower",
+ "patternwhitespace",
+ "finalpunctuation",
+ "tagalog",
+ "patws",
+ "lisu",
+ "otheruppercase",
+#endif /* USE_UNICODE_PROPERTIES */
+ "space",
+#ifdef USE_UNICODE_PROPERTIES
+ "graphemeextend",
+ "saur",
+ "uideo",
+ "lowercase",
+ "rjng",
+ "osma",
+ "linb",
+ "bali",
+ "cwt",
+ "separator",
+ "othermath",
+ "unassigned",
+ "lowercaseletter",
+ "arab",
+ "samr",
+ "brai",
+ "sundanese",
+ "samaritan",
+ "ahex",
+ "linearb",
+ "pf",
+ "ext",
+ "olck",
+ "nko",
+ "nkoo",
+ "newline",
+ "tibetan",
+ "javanese",
+ "bengali",
+ "newtailue",
+ "kana",
+ "olchiki",
+ "kali",
+ "cwcf",
+ "ps",
+ "braille",
+ "tibt",
+ "sterm",
+ "ugar",
+ "nonspacingmark",
+ "phag",
+ "kaithi",
+ "xidc",
+ "balinese",
+ "mong",
+ "ogam",
+ "modifierletter",
+ "ugaritic",
+ "katakana",
+ "pd",
+ "hebr",
+ "p",
+ "orkh",
+#endif /* USE_UNICODE_PROPERTIES */
+ "word",
+#ifndef USE_UNICODE_PROPERTIES
+ "lower",
+ "graph",
+ "digit",
+ "blank"
+#else /* USE_UNICODE_PROPERTIES */
+ "saurashtra",
+ "khar",
+ "sund",
+ "zp",
+ "changeswhenuppercased",
+ "xidstart",
+ "shaw",
+ "kthi",
+ "ogham",
+ "spaceseparator",
+ "changeswhencasemapped",
+ "sarb",
+ "xidcontinue",
+ "bidic",
+ "deprecated",
+ "xdigit",
+ "bidicontrol",
+ "lu",
+ "dashpunctuation",
+ "extender",
+ "idstrinaryoperator",
+ "cherokee",
+ "punct",
+ "phagspa",
+ "oupper",
+ "lyci",
+ "whitespace",
+ "lycian",
+ "yi",
+ "cyrl",
+ "knda",
+ "orya",
+ "cyrillic",
+ "yiii",
+ "kannada",
+ "xids",
+ "limbu",
+ "m",
+ "unifiedideograph",
+ "paragraphseparator",
+ "lm",
+ "openpunctuation",
+ "tagb",
+ "kharoshthi",
+ "enclosingmark",
+ "surrogate",
+ "beng",
+ "number",
+ "telugu",
+ "rejang",
+ "malayalam",
+ "idsb",
+ "hexdigit",
+ "xpeo",
+ "phnx",
+ "bopo",
+ "bopomofo",
+ "dep",
+ "sylo",
+ "grbase",
+ "alphabetic",
+ "cwcm",
+ "patternsyntax",
+ "grek",
+ "greek",
+ "syrc",
+ "syriac",
+ "cuneiform",
+ "gujr",
+ "lydi",
+ "gujarati",
+ "sylotinagri",
+ "lydian",
+ "sm",
+ "currencysymbol",
+ "limb",
+ "othergraphemeextend",
+ "guru",
+ "qmark",
+ "mark",
+ "quotationmark",
+ "hex",
+ "upper",
+ "wspace",
+ "khmr",
+ "khmer",
+ "oldsoutharabian",
+ "spacingmark",
+ "punctuation",
+ "uppercase",
+ "meeteimayek",
+ "uppercaseletter",
+ "cypriot",
+ "buhd",
+ "buhid",
+ "modifiersymbol",
+ "blank",
+ "hyphen",
+ "bugi",
+ "myanmar",
+ "imperialaramaic",
+ "mathsymbol",
+ "cwu",
+ "gurmukhi",
+ "buginese",
+ "symbol",
+ "osmanya",
+ "bamu",
+ "sk",
+ "bamum",
+ "kayahli",
+ "egyp",
+ "grlink",
+ "xsux",
+ "any",
+ "graphemelink",
+ "mlym",
+ "mymr",
+ "hebrew",
+ "egyptianhieroglyphs",
+ "zyyy"
+#endif /* USE_UNICODE_PROPERTIES */
+ };
+#define uniname2ctype_pool ((const char *) &uniname2ctype_pool_contents)
+#ifdef __GNUC__
+__inline
+#ifdef __GNUC_STDC_INLINE__
+__attribute__ ((__gnu_inline__))
+#endif
+#endif
+const struct uniname2ctype_struct *
+uniname2ctype_p (str, len)
+ register const char *str;
+ register unsigned int len;
+{
+ static const struct uniname2ctype_struct wordlist[] =
+ {
+#ifdef USE_UNICODE_PROPERTIES
+ {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str3, 17},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str5, 18},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str7, 21},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str9, 23},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str10, 26},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str12, 36},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str13, 33},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str14, 24},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str15, 20},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str16, 35},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str18, 93},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str19, 93},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str20, 51},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str21, 50},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str22, 18},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str24, 127},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str25, 59},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str28, 150},
+ {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str33, 175},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str34, 113},
+ {-1}, {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str40, 79},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str41, 147},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str43, 86},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str44, 147},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str46, 3},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str49, 38},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str50, 179},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str51, 74},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str52, 74},
+ {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str56, 110},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str65, 115},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str66, 27},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str68, 108},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str70, 115},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str71, 100},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str73, 97},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str74, 108},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str76, 181},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str77, 113},
+ {-1}, {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str83, 119},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str85, 190},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str86, 105},
+ {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str91, 119},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str92, 51},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str94, 81},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str97, 105},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str98, 92},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str100, 42},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str101, 17},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str103, 81},
+ {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str107, 99},
+ {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str111, 26},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str113, 36},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str114, 23},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str116, 46},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str118, 49},
+ {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str123, 151},
+ {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str128, 181},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str129, 98},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str130, 100},
+ {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str135, 27},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str136, 14},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str138, 35},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str139, 175},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str141, 179},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str144, 66},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str146, 192},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str147, 91},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str148, 171},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str151, 110},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str152, 69},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str153, 192},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str155, 187},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str156, 177},
+ {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str161, 191},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str162, 163},
+ {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str167, 177},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str168, 132},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str171, 185},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str179, 91},
+ {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str183, 176},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str185, 122},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str186, 19},
+ {-1}, {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str192, 161},
+ {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str196, 142},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str197, 142},
+ {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str202, 130},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str203, 19},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str205, 49},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str207, 176},
+ {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str211, 113},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str212, 129},
+ {-1}, {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str218, 66},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str220, 174},
+ {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str224, 162},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str225, 45},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str226, 111},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str227, 22},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str229, 152},
+ {-1}, {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str235, 151},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str236, 53},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str243, 34},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str244, 117},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str245, 167},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str248, 96},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str249, 97},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str251, 96},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str253, 58},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str256, 167},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str257, 191},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str260, 194},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str262, 5},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str263, 60},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str264, 34},
+ {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str268, 111},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str269, 38},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str271, 43},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str272, 182},
+ {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str276, 127},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str277, 70},
+ {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str282, 190},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str285, 184},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str288, 1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str289, 42},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str292, 112},
+ {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str297, 168},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str298, 187},
+ {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str302, 127},
+ {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str306, 65},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str307, 40},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str309, 62},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str312, 140},
+ {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str317, 152},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str318, 124},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str319, 102},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str320, 196},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str321, 30},
+ {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str326, 130},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str327, 140},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str328, 161},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str329, 7},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str330, 162},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str331, 32},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str332, 183},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str334, 128},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str335, 102},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str337, 71},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str338, 73},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str339, 13},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str340, 136},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str341, 82},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str342, 63},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str343, 40},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str344, 104},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str352, 77},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str353, 112},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str354, 189},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str357, 159},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str358, 82},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str360, 4},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str361, 59},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str362, 87},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str363, 87},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str365, 88},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str366, 65},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str367, 77},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str370, 189},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str372, 21},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str373, 16},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str375, 149},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str377, 172},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str378, 43},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str379, 150},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str381, 69},
+ {-1}, {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str387, 129},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str388, 98},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str390, 194},
+ {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str395, 31},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str396, 60},
+ {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str400, 158},
+ {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str404, 54},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str407, 171},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str408, 157},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str410, 114},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str413, 6},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str414, 195},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str417, 41},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str418, 114},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str419, 195},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str420, 155},
+ {-1}, {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str426, 180},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str427, 9},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str429, 70},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str430, 143},
+ {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str435, 186},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str438, 56},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str440, 145},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str442, 123},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str444, 120},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str445, 134},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str448, 62},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str449, 50},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str450, 172},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str451, 20},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str454, 24},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str455, 79},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str458, 154},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str459, 125},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str460, 139},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str462, 154},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str463, 174},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str465, 120},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str467, 41},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str469, 178},
+ {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str474, 141},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str476, 138},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str477, 138},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str479, 0},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str480, 94},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str481, 157},
+ {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str485, 83},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str486, 128},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str487, 106},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str488, 141},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str489, 144},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str490, 63},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str491, 44},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str493, 125},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str494, 94},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str502, 193},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str504, 121},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str505, 32},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str508, 137},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str509, 164},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str512, 68},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str514, 134},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str515, 104},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str516, 101},
+ {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str520, 25},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str521, 121},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str522, 106},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str523, 39},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str525, 78},
+ {-1}, {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str531, 37},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str533, 163},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str536, 12},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str537, 143},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str538, 133},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str539, 139},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str542, 52},
+ {-1}, {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str548, 61},
+ {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str552, 67},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str553, 122},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str554, 164},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str556, 101},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str558, 53},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str559, 64},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str560, 160},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str562, 68},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str564, 166},
+ {-1}, {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str570, 188},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str573, 11},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str575, 166},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str584, 28},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str587, 39},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str590, 178},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str593, 184},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str594, 99},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str595, 8},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str597, 137},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str598, 180},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str599, 146},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str601, 165},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str602, 146},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str603, 109},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str606, 76},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str608, 89},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str610, 86},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str611, 76},
+ {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str616, 109},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str618, 89},
+ {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str623, 67},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str626, 118},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str635, 29},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str638, 186},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str639, 52},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str640, 25},
+ {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str645, 44},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str646, 117},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str649, 133},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str651, 31},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str652, 22},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str653, 83},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str654, 33},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str656, 88},
+ {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str660, 145},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str661, 90},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str674, 183},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str676, 173},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str677, 132},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str689, 136},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str692, 107},
+ {-1}, {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str698, 107},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str699, 188},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str701, 131},
+ {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str705, 71},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str706, 55},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str707, 64},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str708, 196},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str709, 75},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str710, 75},
+ {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str714, 80},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str717, 80},
+ {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str722, 135},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str723, 85},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str726, 148},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str727, 85},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str728, 131},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str729, 148},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str748, 48},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str751, 46},
+ {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str755, 118},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str758, 182},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str786, 84},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str797, 170},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str809, 29},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str820, 170},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str823, 173},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str842, 10},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str843, 165},
+ {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str848, 103},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str849, 103},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str863, 160},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str864, 30},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str866, 37},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str867, 57},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str878, 158},
+ {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str883, 28},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str884, 124},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str886, 116},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str887, 116},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str895, 47},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str898, 2},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str908, 169},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str909, 126},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str935, 95},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str937, 159},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str947, 48},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str966, 61},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str975, 84},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str978, 126},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+#endif /* USE_UNICODE_PROPERTIES */
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+#ifndef USE_UNICODE_PROPERTIES
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str7, 7},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str8, 8},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str9, 1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str10, 13},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str11, 11},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str12, 0},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str13, 10},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str14, 14},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str15, 3},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str16, 9},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str17, 12},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str18, 6},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str19, 5},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str20, 4},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str21, 2}
+#else /* USE_UNICODE_PROPERTIES */
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str1022, 45},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str1024, 123},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str1036, 156},
+ {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str1040, 47},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str1065, 156},
+ {-1}, {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str1071, 144},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str1072, 153},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str1140, 72},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str1141, 135},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str1174, 15},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str1187, 72},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str1221, 90},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str1235, 95},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str1277, 78},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str1542, 153},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1}, {-1},
+ {-1},
+ {(int)(long)&((struct uniname2ctype_pool_t *)0)->uniname2ctype_pool_str1751, 73}
+#endif /* USE_UNICODE_PROPERTIES */
+ };
+
+ if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
+ {
+ register int key = uniname2ctype_hash (str, len);
+
+ if (key <= MAX_HASH_VALUE && key >= 0)
+ {
+ register int o = wordlist[key].name;
+ if (o >= 0)
+ {
+ register const char *s = o + uniname2ctype_pool;
+
+ if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
+ return &wordlist[key];
+ }
+ }
+ }
+ return 0;
+}
+
+static int
+uniname2ctype(const UChar *name, unsigned int len)
+{
+ const struct uniname2ctype_struct *p = uniname2ctype_p((const char *)name, len);
+ if (p) return p->ctype;
+ return -1;
+}
diff --git a/src/node.h b/src/node.h
new file mode 100644
index 000000000..a6e3d58fe
--- /dev/null
+++ b/src/node.h
@@ -0,0 +1,125 @@
+enum node_type {
+ NODE_METHOD,
+ NODE_FBODY,
+ NODE_CFUNC,
+ NODE_SCOPE,
+ NODE_BLOCK,
+ NODE_IF,
+ NODE_CASE,
+ NODE_WHEN,
+ NODE_OPT_N,
+ NODE_WHILE,
+ NODE_UNTIL,
+ NODE_ITER,
+ NODE_FOR,
+ NODE_BREAK,
+ NODE_NEXT,
+ NODE_REDO,
+ NODE_RETRY,
+ NODE_BEGIN,
+ NODE_RESCUE,
+ NODE_ENSURE,
+ NODE_AND,
+ NODE_OR,
+ NODE_NOT,
+ NODE_MASGN,
+ NODE_ASGN,
+ NODE_CDECL,
+ NODE_CVASGN,
+ NODE_CVDECL,
+ NODE_OP_ASGN,
+ NODE_CALL,
+ NODE_FCALL,
+ NODE_VCALL,
+ NODE_SUPER,
+ NODE_ZSUPER,
+ NODE_ARRAY,
+ NODE_ZARRAY,
+ NODE_HASH,
+ NODE_RETURN,
+ NODE_YIELD,
+ NODE_LVAR,
+ NODE_DVAR,
+ NODE_GVAR,
+ NODE_IVAR,
+ NODE_CONST,
+ NODE_CVAR,
+ NODE_NTH_REF,
+ NODE_BACK_REF,
+ NODE_MATCH,
+ NODE_MATCH2,
+ NODE_MATCH3,
+ NODE_INT,
+ NODE_FLOAT,
+ NODE_NEGATE,
+ NODE_LAMBDA,
+ NODE_SYM,
+ NODE_STR,
+ NODE_DSTR,
+ NODE_DREGX,
+ NODE_DREGX_ONCE,
+ NODE_LIST,
+ NODE_ARG,
+ NODE_ARGSCAT,
+ NODE_ARGSPUSH,
+ NODE_SPLAT,
+ NODE_TO_ARY,
+ NODE_SVALUE,
+ NODE_BLOCK_ARG,
+ NODE_DEF,
+ NODE_SDEF,
+ NODE_ALIAS,
+ NODE_UNDEF,
+ NODE_CLASS,
+ NODE_MODULE,
+ NODE_SCLASS,
+ NODE_COLON2,
+ NODE_COLON3,
+ NODE_CREF,
+ NODE_DOT2,
+ NODE_DOT3,
+ NODE_FLIP2,
+ NODE_FLIP3,
+ NODE_ATTRSET,
+ NODE_SELF,
+ NODE_NIL,
+ NODE_TRUE,
+ NODE_FALSE,
+ NODE_DEFINED,
+ NODE_NEWLINE,
+ NODE_POSTEXE,
+ NODE_ALLOCA,
+ NODE_DMETHOD,
+ NODE_BMETHOD,
+ NODE_MEMO,
+ NODE_IFUNC,
+ NODE_DSYM,
+ NODE_ATTRASGN,
+ NODE_LAST
+};
+
+typedef struct RNode {
+ unsigned long flags;
+ char *nd_file;
+ union {
+ struct RNode *node;
+ mrb_sym id;
+ mrb_value value;
+ //mrb_value (*cfunc)((ARGS_ANY()));
+ mrb_sym *tbl;
+ } u1;
+ union {
+ struct RNode *node;
+ mrb_sym id;
+ long argc;
+ mrb_value value;
+ } u2;
+ union {
+ struct RNode *node;
+ mrb_sym id;
+ long state;
+ struct global_entry *entry;
+ long cnt;
+ mrb_value value;
+ } u3;
+} NODE;
diff --git a/src/numeric.c b/src/numeric.c
new file mode 100644
index 000000000..f79369d90
--- /dev/null
+++ b/src/numeric.c
@@ -0,0 +1,2018 @@
+/**********************************************************************
+
+ numeric.c -
+
+ $Author: yugui $
+ created at: Fri Aug 13 18:33:09 JST 1993
+
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
+
+**********************************************************************/
+
+#include "mruby.h"
+#include "mruby/numeric.h"
+#include "mruby/string.h"
+#include "mruby/array.h"
+#include <string.h>
+#include "mruby/class.h"
+#include "variable.h"
+
+#include <ctype.h>
+#include <math.h>
+#include <stdio.h>
+
+#ifdef INCLUDE_REGEXP
+#include "encoding.h"
+#endif
+
+#if defined(__FreeBSD__) && __FreeBSD__ < 4
+#include <floatingpoint.h>
+#endif
+
+#ifdef HAVE_FLOAT_H
+#include <float.h>
+#endif
+
+#ifdef HAVE_IEEEFP_H
+#include <ieeefp.h>
+#endif
+
+#ifndef mrb_usascii_str_new2
+ #ifdef INCLUDE_REGEXP
+ #define mrb_usascii_str_new2 mrb_usascii_str_new_cstr
+ #else
+ #define mrb_usascii_str_new2 mrb_str_new_cstr
+ #endif
+#endif
+#ifndef mrb_usascii_str_new2
+ #ifdef INCLUDE_REGEXP
+ #else
+ #define mrb_usascii_str_new mrb_str_new
+ #endif
+#endif
+
+/* use IEEE 64bit values if not defined */
+#ifndef FLT_RADIX
+#define FLT_RADIX 2
+#endif
+#ifndef FLT_ROUNDS
+#define FLT_ROUNDS 1
+#endif
+#ifndef DBL_MIN
+#define DBL_MIN 2.2250738585072014e-308
+#endif
+#ifndef DBL_MAX
+#define DBL_MAX 1.7976931348623157e+308
+#endif
+#ifndef DBL_MIN_EXP
+#define DBL_MIN_EXP (-1021)
+#endif
+#ifndef DBL_MAX_EXP
+#define DBL_MAX_EXP 1024
+#endif
+#ifndef DBL_MIN_10_EXP
+#define DBL_MIN_10_EXP (-307)
+#endif
+#ifndef DBL_MAX_10_EXP
+#define DBL_MAX_10_EXP 308
+#endif
+#ifndef DBL_DIG
+#define DBL_DIG 15
+#endif
+#ifndef DBL_MANT_DIG
+#define DBL_MANT_DIG 53
+#endif
+#ifndef DBL_EPSILON
+#define DBL_EPSILON 2.2204460492503131e-16
+#endif
+
+#define mrb_rational_raw1(x) mrb_rational_raw(x, INT2FIX(1))
+
+#if SIZEOF_LONG_LONG > 0
+# define LONG_LONG long long
+#elif SIZEOF___INT64 > 0
+# define HAVE_LONG_LONG 1
+# define LONG_LONG __int64
+# undef SIZEOF_LONG_LONG
+# define SIZEOF_LONG_LONG SIZEOF___INT64
+#endif
+
+#if defined HAVE_UINTPTR_T && 0
+typedef uintptr_t VALUE;
+typedef uintptr_t ID;
+# define SIGNED_VALUE intptr_t
+# define SIZEOF_VALUE SIZEOF_UINTPTR_T
+#elif SIZEOF_LONG == SIZEOF_VOIDP
+//typedef unsigned long VALUE;
+//typedef unsigned long ID;
+# define SIGNED_VALUE long long
+# define SIZEOF_VALUE SIZEOF_LONG
+#elif SIZEOF_LONG_LONG == SIZEOF_VOIDP
+typedef unsigned LONG_LONG VALUE;
+typedef unsigned LONG_LONG ID;
+# define SIGNED_VALUE LONG_LONG
+# define LONG_LONG_VALUE 1
+# define SIZEOF_VALUE SIZEOF_LONG_LONG
+#else
+# error ---->> ruby requires sizeof(void*) == sizeof(long) to be compiled. <<----
+#endif
+
+#ifdef HAVE_INFINITY
+#elif BYTE_ORDER == LITTLE_ENDIAN
+const unsigned char mrb_infinity[] = "\x00\x00\x80\x7f";
+#else
+const unsigned char mrb_infinity[] = "\x7f\x80\x00\x00";
+#endif
+
+#ifdef HAVE_NAN
+#elif BYTE_ORDER == LITTLE_ENDIAN
+const unsigned char mrb_nan[] = "\x00\x00\xc0\x7f";
+#else
+const unsigned char mrb_nan[] = "\x7f\xc0\x00\x00";
+#endif
+
+extern double round(double);
+
+#ifndef HAVE_ROUND
+double
+round(double x)
+{
+ double f;
+
+ if (x > 0.0) {
+ f = floor(x);
+ x = f + (x - f >= 0.5);
+ }
+ else if (x < 0.0) {
+ f = ceil(x);
+ x = f - (f - x >= 0.5);
+ }
+ return x;
+}
+#endif
+
+
+
+
+void mrb_cmperr(mrb_state *mrb, mrb_value x, mrb_value y);
+
+void
+mrb_num_zerodiv(mrb_state *mrb)
+{
+ mrb_raise(mrb, E_ZERODIVISION_ERROR, "divided by 0");
+}
+
+
+/*
+ * call-seq:
+ * num.coerce(numeric) -> array
+ *
+ * If <i>aNumeric</i> is the same type as <i>num</i>, returns an array
+ * containing <i>aNumeric</i> and <i>num</i>. Otherwise, returns an
+ * array with both <i>aNumeric</i> and <i>num</i> represented as
+ * <code>Float</code> objects. This coercion mechanism is used by
+ * Ruby to handle mixed-type numeric operations: it is intended to
+ * find a compatible common type between the two operands of the operator.
+ *
+ * 1.coerce(2.5) #=> [2.5, 1.0]
+ * 1.2.coerce(3) #=> [3.0, 1.2]
+ * 1.coerce(2) #=> [2, 1]
+ */
+
+static mrb_value
+num_coerce(mrb_state *mrb, mrb_value x)
+{
+ mrb_value y;
+ mrb_get_args(mrb, "o", &y);
+
+ //if (CLASS_OF(x) == CLASS_OF(y))
+ if (mrb_class(mrb, x) == mrb_class(mrb, y))
+ return mrb_assoc_new(mrb, y, x);
+ x = mrb_Float(mrb, x);
+ y = mrb_Float(mrb, y);
+ return mrb_assoc_new(mrb, y, x);
+}
+
+static mrb_value
+coerce_body(mrb_state *mrb, mrb_value *x)
+{
+ return mrb_funcall(mrb, x[1], "coerce", 1, x[0]);
+}
+
+static mrb_value
+coerce_rescue(mrb_state *mrb, mrb_value *x)
+{
+ volatile mrb_value v = mrb_inspect(mrb, x[1]);
+
+ mrb_raise(mrb, E_TYPE_ERROR, "%s can't be coerced into %s",
+ mrb_special_const_p(x[1])?
+ RSTRING_PTR(v):
+ mrb_obj_classname(mrb, x[1]),
+ mrb_obj_classname(mrb, x[0]));
+ return mrb_nil_value(); /* dummy */
+}
+
+static int
+do_coerce(mrb_state *mrb, mrb_value *x, mrb_value *y, int err)
+{
+ mrb_value ary;
+ mrb_value a[2];
+
+ a[0] = *x; a[1] = *y;
+
+ ary = coerce_body(mrb, a);
+ if (mrb_type(ary) != MRB_TT_ARRAY || RARRAY_LEN(ary) != 2) {
+ if (err) {
+ mrb_raise(mrb, E_TYPE_ERROR, "coerce must return [x, y]");
+ }
+ return FALSE;
+ }
+
+ *x = RARRAY_PTR(ary)[0];
+ *y = RARRAY_PTR(ary)[1];
+ return TRUE;
+}
+
+mrb_value
+mrb_num_coerce_bin(mrb_state *mrb, mrb_value x, mrb_value y, char* func)
+{
+ do_coerce(mrb, &x, &y, TRUE);
+ return mrb_funcall(mrb, x, func, 1, y);
+}
+
+mrb_value
+mrb_num_coerce_cmp(mrb_state *mrb, mrb_value x, mrb_value y, char* func)
+{
+ if (do_coerce(mrb, &x, &y, FALSE))
+ return mrb_funcall(mrb, x, func, 1, y);
+ return mrb_nil_value();
+}
+
+mrb_value
+mrb_num_coerce_relop(mrb_state *mrb, mrb_value x, mrb_value y, char* func)
+{
+ mrb_value c, x0 = x, y0 = y;
+
+ if (!do_coerce(mrb, &x, &y, FALSE) ||
+ mrb_nil_p(c = mrb_funcall(mrb, x, func, 1, y))) {
+ mrb_cmperr(mrb, x0, y0);
+ return mrb_nil_value(); /* not reached */
+ }
+ return c;
+}
+
+/*
+ * call-seq:
+ * +num -> num
+ *
+ * Unary Plus---Returns the receiver's value.
+ */
+
+static mrb_value
+num_uplus(mrb_state *mrb, mrb_value num)
+{
+ return num;
+}
+
+/*
+ * call-seq:
+ * -num -> numeric
+ *
+ * Unary Minus---Returns the receiver's value, negated.
+ */
+
+static mrb_value
+num_uminus(mrb_state *mrb, mrb_value num)
+{
+ mrb_value zero;
+
+ zero = mrb_fixnum_value(0);
+ do_coerce(mrb, &zero, &num, TRUE);
+
+ return mrb_funcall(mrb, zero, "-", 1, num);
+}
+
+/*
+ * call-seq:
+ * num.quo(numeric) -> real
+ *
+ * Returns most exact division (rational for integers, float for floats).
+ */
+static mrb_value
+num_quo(mrb_state *mrb, mrb_value x)
+{
+ mrb_value y;
+
+ mrb_get_args(mrb, "o", &y);
+ return mrb_funcall(mrb, mrb_float_value((double)mrb_fixnum(x)), "/", 1, y);
+}
+
+/*
+ * call-seq:
+ * num.abs -> numeric
+ * num.magnitude -> numeric
+ *
+ * Returns the absolute value of <i>num</i>.
+ *
+ * 12.abs #=> 12
+ * (-34.56).abs #=> 34.56
+ * -34.56.abs #=> 34.56
+ */
+
+static mrb_value
+num_abs(mrb_state *mrb, mrb_value num)
+{
+ if (mrb_test(mrb_funcall(mrb, num, "<", 1, mrb_fixnum_value(0)))) {
+ return mrb_funcall(mrb, num, "-@", 0);
+ }
+ return num;
+}
+
+/********************************************************************
+ *
+ * Document-class: Float
+ *
+ * <code>Float</code> objects represent inexact real numbers using
+ * the native architecture's double-precision floating point
+ * representation.
+ */
+
+mrb_value
+mrb_float_new(double d)
+{
+ //NEWOBJ(flt, struct RFloat);
+ //OBJSETUP(flt, mrb_cFloat, MRB_TT_FLOAT);
+
+ //flt->float_value = d;
+ //return (mrb_value)flt;
+ return mrb_float_value(d);
+}
+
+/* 15.2.9.3.16(x) */
+/*
+ * call-seq:
+ * flt.to_s -> string
+ *
+ * Returns a string containing a representation of self. As well as a
+ * fixed or exponential form of the number, the call may return
+ * ``<code>NaN</code>'', ``<code>Infinity</code>'', and
+ * ``<code>-Infinity</code>''.
+ */
+
+static mrb_value
+flo_to_s(mrb_state *mrb, mrb_value flt)
+{
+ char buf[32];
+ double value = mrb_float(flt);
+ char *p, *e;
+
+ if (isinf(value))
+ return mrb_str_new2(mrb, value < 0 ? "-Infinity" : "Infinity");
+ else if(isnan(value))
+ return mrb_str_new2(mrb, "NaN");
+
+ sprintf(buf, "%#.15g", value); /* ensure to print decimal point */
+ if (!(e = strchr(buf, 'e'))) {
+ e = buf + strlen(buf);
+ }
+ if (!ISDIGIT(e[-1])) { /* reformat if ended with decimal point (ex 111111111111111.) */
+ sprintf(buf, "%#.14e", value);
+ if (!(e = strchr(buf, 'e'))) {
+ e = buf + strlen(buf);
+ }
+ }
+ p = e;
+ while (p[-1]=='0' && ISDIGIT(p[-2]))
+ p--;
+ memmove(p, e, strlen(e)+1);
+ return mrb_str_new2(mrb, buf);
+}
+
+/* 15.2.9.3.2 */
+/*
+ * call-seq:
+ * float - other -> float
+ *
+ * Returns a new float which is the difference of <code>float</code>
+ * and <code>other</code>.
+ */
+
+static mrb_value
+flo_minus(mrb_state *mrb, mrb_value x)
+{
+ mrb_value y;
+
+ mrb_get_args(mrb, "o", &y);
+
+ switch (mrb_type(y)) {
+ case MRB_TT_FIXNUM:
+ return mrb_float_value(mrb_float(x) - (double)mrb_fixnum(y));
+ case MRB_TT_FLOAT:
+ return mrb_float_value(mrb_float(x) - mrb_float(y));
+ default:
+ return mrb_num_coerce_bin(mrb, x, y, "-");
+ }
+}
+
+/* 15.2.9.3.3 */
+/*
+ * call-seq:
+ * float * other -> float
+ *
+ * Returns a new float which is the product of <code>float</code>
+ * and <code>other</code>.
+ */
+
+static mrb_value
+flo_mul(mrb_state *mrb, mrb_value x)
+{
+ mrb_value y;
+
+ mrb_get_args(mrb, "o", &y);
+
+ switch (mrb_type(y)) {
+ case MRB_TT_FIXNUM:
+ return mrb_float_value(mrb_float(x) * (double)mrb_fixnum(y));
+ case MRB_TT_FLOAT:
+ return mrb_float_value(mrb_float(x) * mrb_float(y));
+ default:
+ return mrb_num_coerce_bin(mrb, x, y, "*");
+ }
+}
+
+/* 15.2.9.3.4 */
+/*
+ * call-seq:
+ * float / other -> float
+ *
+ * Returns a new float which is the result of dividing
+ * <code>float</code> by <code>other</code>.
+ */
+
+static mrb_value
+flo_div(mrb_state *mrb, mrb_value x)
+{
+ mrb_value y;
+ long f_y;
+ //double d;
+
+ mrb_get_args(mrb, "o", &y);
+
+ switch (mrb_type(y)) {
+ case MRB_TT_FIXNUM:
+ f_y = mrb_fixnum(y);
+ return mrb_float_value(mrb_float(x) / (double)f_y);
+ case MRB_TT_FLOAT:
+ return mrb_float_value(mrb_float(x) / mrb_float(y));
+ default:
+ return mrb_num_coerce_bin(mrb, x, y, "/");
+ }
+}
+
+/*
+ * call-seq:
+ * float.quo(numeric) -> float
+ *
+ * Returns float / numeric.
+ */
+static mrb_value
+flo_quo(mrb_state *mrb, mrb_value x)
+{
+ mrb_value y;
+
+ mrb_get_args(mrb, "o", &y);
+ return mrb_funcall(mrb, x, "/", 1, y);
+}
+
+static void
+flodivmod(mrb_state *mrb, double x, double y, double *divp, double *modp)
+{
+ double div, mod;
+
+ if (y == 0.0) mrb_num_zerodiv(mrb);
+#ifdef HAVE_FMOD
+ mod = fmod(x, y);
+#else
+ {
+ double z;
+
+ modf(x/y, &z);
+ mod = x - z * y;
+ }
+#endif
+ if (isinf(x) && !isinf(y) && !isnan(y))
+ div = x;
+ else
+ div = (x - mod) / y;
+ if (y*mod < 0) {
+ mod += y;
+ div -= 1.0;
+ }
+ if (modp) *modp = mod;
+ if (divp) *divp = div;
+}
+
+/* 15.2.9.3.5 */
+/*
+ * call-seq:
+ * flt % other -> float
+ * flt.modulo(other) -> float
+ *
+ * Return the modulo after division of <code>flt</code> by <code>other</code>.
+ *
+ * 6543.21.modulo(137) #=> 104.21
+ * 6543.21.modulo(137.24) #=> 92.9299999999996
+ */
+
+static mrb_value
+flo_mod(mrb_state *mrb, mrb_value x)
+{
+ mrb_value y;
+ double fy, mod;
+ mrb_get_args(mrb, "o", &y);
+
+ switch (mrb_type(y)) {
+ case MRB_TT_FIXNUM:
+ fy = (double)mrb_fixnum(y);
+ break;
+ case MRB_TT_FLOAT:
+ fy = mrb_float(y);
+ break;
+ default:
+ return mrb_num_coerce_bin(mrb, x, y, "%");
+ }
+ flodivmod(mrb, mrb_float(x), fy, 0, &mod);
+ return mrb_float_value(mod);
+}
+
+static mrb_value
+dbl2ival(double d)
+{
+ if (FIXABLE(d)) {
+ d = round(d);
+ return mrb_fixnum_value((long)d);
+ }
+ return mrb_nil_value(); /* range over */ //mrb_dbl2big(d);
+}
+
+
+/* 15.2.8.3.16 */
+/*
+ * call-seq:
+ * num.eql?(numeric) -> true or false
+ *
+ * Returns <code>true</code> if <i>num</i> and <i>numeric</i> are the
+ * same type and have equal values.
+ *
+ * 1 == 1.0 #=> true
+ * 1.eql?(1.0) #=> false
+ * (1.0).eql?(1.0) #=> true
+ */
+static mrb_value
+num_eql(mrb_state *mrb, mrb_value x)
+{
+ mrb_value y;
+ mrb_get_args(mrb, "o", &y);
+ if (mrb_type(x) != mrb_type(y)) return mrb_false_value();
+ if (mrb_equal(mrb, x, y)) {
+ return mrb_true_value();
+ }
+ else {
+ return mrb_false_value();
+ }
+}
+
+static mrb_value
+num_equal(mrb_state *mrb, mrb_value x, mrb_value y)
+{
+ if (mrb_obj_equal(mrb, x, y)) return mrb_true_value();
+ return mrb_funcall(mrb, y, "==", 1, x);
+}
+
+/* 15.2.9.3.7 */
+/*
+ * call-seq:
+ * flt == obj -> true or false
+ *
+ * Returns <code>true</code> only if <i>obj</i> has the same value
+ * as <i>flt</i>. Contrast this with <code>Float#eql?</code>, which
+ * requires <i>obj</i> to be a <code>Float</code>.
+ *
+ * 1.0 == 1 #=> true
+ *
+ */
+
+static mrb_value
+flo_eq(mrb_state *mrb, mrb_value x)
+{
+ mrb_value y;
+ volatile double a, b;
+ mrb_get_args(mrb, "o", &y);
+
+ switch (mrb_type(y)) {
+ case MRB_TT_FIXNUM:
+ b = (double)mrb_fixnum(y);
+ break;
+ case MRB_TT_FLOAT:
+ b = mrb_float(y);
+#if defined(_MSC_VER) && _MSC_VER < 1300
+ if (isnan(b)) return mrb_false_value();
+#endif
+ break;
+ default:
+ return num_equal(mrb, x, y);
+ }
+ a = mrb_float(x);
+#if defined(_MSC_VER) && _MSC_VER < 1300
+ if (isnan(a)) return mrb_false_value();
+#endif
+ return (a == b)?mrb_true_value():mrb_false_value();
+}
+
+/* 15.2.8.3.18 */
+/*
+ * call-seq:
+ * flt.hash -> integer
+ *
+ * Returns a hash code for this float.
+ */
+static mrb_value
+flo_hash(mrb_state *mrb, mrb_value num)
+{
+ double d;
+ char *c;
+ int i, hash;
+
+ d = (double)mrb_fixnum(num);
+ if (d == 0) d = fabs(d);
+ c = (char*)&d;
+ for (hash=0, i=0; i<sizeof(double);i++) {
+ hash = (hash * 971) ^ (unsigned char)c[i];
+ }
+ if (hash < 0) hash = -hash;
+ return mrb_fixnum_value(hash);
+}
+
+mrb_value
+mrb_dbl_cmp(double a, double b)
+{
+ if (isnan(a) || isnan(b)) return mrb_nil_value();
+ if (a == b) return mrb_fixnum_value(0);
+ if (a > b) return mrb_fixnum_value(1);
+ if (a < b) return mrb_fixnum_value(-1);
+ return mrb_nil_value();
+}
+
+/* 15.2.9.3.13 */
+/*
+ * call-seq:
+ * flt.to_f -> self
+ *
+ * As <code>flt</code> is already a float, returns +self+.
+ */
+
+static mrb_value
+flo_to_f(mrb_state *mrb, mrb_value num)
+{
+ return num;
+}
+
+/* 15.2.9.3.11 */
+/*
+ * call-seq:
+ * flt.infinite? -> nil, -1, +1
+ *
+ * Returns <code>nil</code>, -1, or +1 depending on whether <i>flt</i>
+ * is finite, -infinity, or +infinity.
+ *
+ * (0.0).infinite? #=> nil
+ * (-1.0/0.0).infinite? #=> -1
+ * (+1.0/0.0).infinite? #=> 1
+ */
+
+static mrb_value
+flo_is_infinite_p(mrb_state *mrb, mrb_value num)
+{
+ double value = mrb_float(num);
+
+ if (isinf(value)) {
+ return mrb_fixnum_value( value < 0 ? -1 : 1 );
+ }
+
+ return mrb_nil_value();
+}
+
+/* 15.2.9.3.9 */
+/*
+ * call-seq:
+ * flt.finite? -> true or false
+ *
+ * Returns <code>true</code> if <i>flt</i> is a valid IEEE floating
+ * point number (it is not infinite, and <code>nan?</code> is
+ * <code>false</code>).
+ *
+ */
+
+static mrb_value
+flo_is_finite_p(mrb_state *mrb, mrb_value num)
+{
+ double value = mrb_float(num);
+
+#if HAVE_FINITE
+ if (!finite(value))
+ return mrb_false_value();
+#else
+ if (isinf(value) || isnan(value))
+ return mrb_false_value();
+#endif
+
+ return mrb_true_value();
+}
+
+/* 15.2.9.3.10 */
+/*
+ * call-seq:
+ * flt.floor -> integer
+ *
+ * Returns the largest integer less than or equal to <i>flt</i>.
+ *
+ * 1.2.floor #=> 1
+ * 2.0.floor #=> 2
+ * (-1.2).floor #=> -2
+ * (-2.0).floor #=> -2
+ */
+
+static mrb_value
+flo_floor(mrb_state *mrb, mrb_value num)
+{
+ double f = floor(mrb_float(num));
+ long val;
+
+ if (!FIXABLE(f)) {
+ return mrb_dbl2big(mrb, f);
+ }
+ val = (long)f;
+ return mrb_fixnum_value(val);
+}
+
+/* 15.2.9.3.8 */
+/*
+ * call-seq:
+ * flt.ceil -> integer
+ *
+ * Returns the smallest <code>Integer</code> greater than or equal to
+ * <i>flt</i>.
+ *
+ * 1.2.ceil #=> 2
+ * 2.0.ceil #=> 2
+ * (-1.2).ceil #=> -1
+ * (-2.0).ceil #=> -2
+ */
+
+static mrb_value
+flo_ceil(mrb_state *mrb, mrb_value num)
+{
+ double f = ceil(mrb_float(num));
+ long val;
+
+ if (!FIXABLE(f)) {
+ return mrb_dbl2big(mrb, f);
+ }
+ val = (long)f;
+ return mrb_fixnum_value(val);
+}
+
+/* 15.2.9.3.12 */
+/*
+ * call-seq:
+ * flt.round([ndigits]) -> integer or float
+ *
+ * Rounds <i>flt</i> to a given precision in decimal digits (default 0 digits).
+ * Precision may be negative. Returns a floating point number when ndigits
+ * is more than zero.
+ *
+ * 1.4.round #=> 1
+ * 1.5.round #=> 2
+ * 1.6.round #=> 2
+ * (-1.5).round #=> -2
+ *
+ * 1.234567.round(2) #=> 1.23
+ * 1.234567.round(3) #=> 1.235
+ * 1.234567.round(4) #=> 1.2346
+ * 1.234567.round(5) #=> 1.23457
+ *
+ * 34567.89.round(-5) #=> 0
+ * 34567.89.round(-4) #=> 30000
+ * 34567.89.round(-3) #=> 35000
+ * 34567.89.round(-2) #=> 34600
+ * 34567.89.round(-1) #=> 34570
+ * 34567.89.round(0) #=> 34568
+ * 34567.89.round(1) #=> 34567.9
+ * 34567.89.round(2) #=> 34567.89
+ * 34567.89.round(3) #=> 34567.89
+ *
+ */
+
+static mrb_value
+flo_round(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value num)
+{
+ mrb_value nd;
+ double number, f;
+ int ndigits = 0, i;
+ long val;
+ mrb_value *argv;
+ int argc;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+
+ if (argc /*> 0 && mrb_scan_args(argc, argv, "01", &nd) */== 1) {
+ nd = argv[0];
+ ndigits = mrb_fixnum(nd);
+ }
+ number = mrb_float(num);
+ f = 1.0;
+ i = abs(ndigits);
+ while (--i >= 0)
+ f = f*10.0;
+
+ if (isinf(f)) {
+ if (ndigits < 0) number = 0;
+ }
+ else {
+ if (ndigits < 0) number /= f;
+ else number *= f;
+ number = round(number);
+ if (ndigits < 0) number *= f;
+ else number /= f;
+ }
+
+ if (ndigits > 0) return mrb_float_value(number);
+
+ if (!FIXABLE(number)) {
+ return mrb_dbl2big(mrb, number);
+ }
+ val = (long)number;
+ return mrb_fixnum_value(val);
+}
+
+/* 15.2.9.3.14 */
+/* 15.2.9.3.15 */
+/*
+ * call-seq:
+ * flt.to_i -> integer
+ * flt.to_int -> integer
+ * flt.truncate -> integer
+ *
+ * Returns <i>flt</i> truncated to an <code>Integer</code>.
+ */
+
+static mrb_value
+flo_truncate(mrb_state *mrb, mrb_value num)
+{
+ double f = mrb_float(num);
+ long val;
+
+ if (f > 0.0) f = floor(f);
+ if (f < 0.0) f = ceil(f);
+
+ if (!FIXABLE(f)) {
+ return mrb_dbl2big(mrb, f);
+ }
+ val = (long)f;
+ return mrb_fixnum_value(val);
+}
+
+/* 15.2.8.3.17 */
+/*
+ * call-seq:
+ * num.floor -> integer
+ *
+ * Returns the largest integer less than or equal to <i>num</i>.
+ * <code>Numeric</code> implements this by converting <i>anInteger</i>
+ * to a <code>Float</code> and invoking <code>Float#floor</code>.
+ *
+ * 1.floor #=> 1
+ * (-1).floor #=> -1
+ */
+
+static mrb_value
+num_floor(mrb_state *mrb, mrb_value num)
+{
+ return flo_floor(mrb, mrb_Float(mrb, num));
+}
+
+/* 15.2.8.3.20 */
+/*
+ * call-seq:
+ * num.round([ndigits]) -> integer or float
+ *
+ * Rounds <i>num</i> to a given precision in decimal digits (default 0 digits).
+ * Precision may be negative. Returns a floating point number when ndigits
+ * is more than zero. <code>Numeric</code> implements this by converting itself
+ * to a <code>Float</code> and invoking <code>Float#round</code>.
+ */
+
+static mrb_value
+num_round(mrb_state *mrb, /*int argc, mrb_value* argv,*/ mrb_value num)
+{
+ return flo_round(mrb, /*argc, argv,*/ mrb_Float(mrb, num));
+}
+
+SIGNED_VALUE
+mrb_num2long(mrb_state *mrb, mrb_value val)
+{
+ again:
+ if (mrb_nil_p(val)) {
+ mrb_raise(mrb, E_TYPE_ERROR, "no implicit conversion from nil to integer");
+ }
+
+ if (FIXNUM_P(val)) return mrb_fixnum(val);
+
+ switch (mrb_type(val)) {
+ case MRB_TT_FLOAT:
+ if (mrb_float(val) <= (double)LONG_MAX
+ && mrb_float(val) >= (double)LONG_MIN) {
+ return (SIGNED_VALUE)(mrb_float(val));
+ }
+ else {
+ char buf[24];
+ char *s;
+
+ snprintf(buf, sizeof(buf), "%-.10g", mrb_float(val));
+ if ((s = strchr(buf, ' ')) != 0) *s = '\0';
+ mrb_raise(mrb, E_RANGE_ERROR, "float %s out of range of integer", buf);
+ }
+
+ default:
+ val = mrb_to_int(mrb, val);
+ goto again;
+ }
+}
+
+mrb_value
+mrb_num2ulong(mrb_state *mrb, mrb_value val)
+{
+ again:
+ if (mrb_nil_p(val)) {
+ mrb_raise(mrb, E_TYPE_ERROR, "no implicit conversion from nil to integer");
+ }
+
+ if (FIXNUM_P(val)) return val; /* this is FIX2LONG, inteneded */
+
+ switch (mrb_type(val)) {
+ case MRB_TT_FLOAT:
+ if (mrb_float(val) <= (double)LONG_MAX
+ && mrb_float(val) >= (double)LONG_MIN) {
+ return mrb_fixnum_value(mrb_float(val));
+ }
+ else {
+ char buf[24];
+ char *s;
+
+ snprintf(buf, sizeof(buf), "%-.10g", mrb_float(val));
+ if ((s = strchr(buf, ' ')) != 0) *s = '\0';
+ mrb_raise(mrb, E_RANGE_ERROR, "float %s out of range of integer", buf);
+ }
+
+ default:
+ val = mrb_to_int(mrb, val);
+ goto again;
+ }
+}
+
+#if SIZEOF_INT < SIZEOF_VALUE
+void
+mrb_out_of_int(mrb_state *mrb, SIGNED_VALUE num)
+{
+ mrb_raise(mrb, E_RANGE_ERROR, "integer %"PRIdVALUE " too %s to convert to `int'",
+ num, num < 0 ? "small" : "big");
+}
+
+static void
+check_int(SIGNED_VALUE num)
+{
+ if ((SIGNED_VALUE)(int)num != num) {
+ mrb_out_of_int(num);
+ }
+}
+
+static void
+check_uint(mrb_state *mrb, mrb_value num, mrb_value sign)
+{
+ static const mrb_value mask = ~(mrb_value)UINT_MAX;
+
+ if (RTEST(sign)) {
+ /* minus */
+ if ((num & mask) != mask || (num & ~mask) <= INT_MAX + 1UL)
+ mrb_raise(mrb, E_RANGE_ERROR, "integer %"PRIdVALUE " too small to convert to `unsigned int'", num);
+ }
+ else {
+ /* plus */
+ if ((num & mask) != 0)
+ mrb_raise(mrb, E_RANGE_ERROR, "integer %"PRIuVALUE " too big to convert to `unsigned int'", num);
+ }
+}
+
+long
+mrb_num2int(mrb_value val)
+{
+ long num = mrb_num2long(mrb, val);
+
+ check_int(num);
+ return num;
+}
+
+long
+mrb_fix2int(mrb_state *mrb, mrb_value val)
+{
+ long num = FIXNUM_P(val)?mrb_fixnum(val):mrb_num2long(mrb, val);
+
+ check_int(num);
+ return num;
+}
+
+unsigned long
+mrb_num2uint(mrb_value val)
+{
+ unsigned long num = mrb_num2ulong(val);
+
+ check_uint(num, mrb_funcall(mrb, val, "<", 1, mrb_fixnum_value(0)));
+ return num;
+}
+
+unsigned long
+mrb_fix2uint(mrb_state *mrb, mrb_value val)
+{
+ unsigned long num;
+
+ if (!FIXNUM_P(val)) {
+ return mrb_num2uint(mrb, val);
+ }
+ num = FIX2ULONG(val);
+
+ check_uint(num, mrb_funcall(mrb, val, "<", 1, mrb_fixnum_value(0)));
+ return num;
+}
+#else
+long
+mrb_num2int(mrb_state *mrb, mrb_value val)
+{
+ return mrb_num2long(mrb, val);
+}
+
+long
+mrb_fix2int(mrb_value val)
+{
+ return mrb_fixnum(val);
+}
+#endif
+
+mrb_value
+mrb_num2fix(mrb_state *mrb, mrb_value val)
+{
+ long v;
+
+ if (FIXNUM_P(val)) return val;
+
+ v = mrb_num2long(mrb, val);
+ if (!FIXABLE(v))
+ mrb_raise(mrb, E_RANGE_ERROR, "integer %"PRIdVALUE " out of range of fixnum", v);
+ return mrb_fixnum_value(v);
+}
+
+#if HAVE_LONG_LONG
+
+LONG_LONG
+mrb_num2ll(mrb_state *mrb, mrb_value val)
+{
+ if (mrb_nil_p(val)) {
+ mrb_raise(mrb, E_TYPE_ERROR, "no implicit conversion from nil");
+ }
+
+ if (FIXNUM_P(val)) return (LONG_LONG)mrb_fixnum(val);
+
+ switch (mrb_type(val)) {
+ case MRB_TT_FLOAT:
+ if (mrb_float(val) <= (double)LLONG_MAX
+ && mrb_float(val) >= (double)LLONG_MIN) {
+ return (LONG_LONG)(mrb_float(val));
+ }
+ else {
+ char buf[24];
+ char *s;
+
+ snprintf(buf, sizeof(buf), "%-.10g", mrb_float(val));
+ if ((s = strchr(buf, ' ')) != 0) *s = '\0';
+ mrb_raise(mrb, E_RANGE_ERROR, "float %s out of range of long long", buf);
+ }
+
+ case MRB_TT_STRING:
+ mrb_raise(mrb, E_TYPE_ERROR, "no implicit conversion from string");
+ return mrb_nil_value(); /* not reached */
+
+ case MRB_TT_TRUE:
+ case MRB_TT_FALSE:
+ mrb_raise(mrb, E_TYPE_ERROR, "no implicit conversion from boolean");
+ return mrb_nil_value(); /* not reached */
+
+ default:
+ val = mrb_to_int(mrb, val);
+ return NUM2LL(val);
+ }
+}
+
+unsigned LONG_LONG
+mrb_num2ull(mrb_state *mrb, mrb_value val)
+{
+ return (unsigned LONG_LONG)mrb_num2ll(mrb, val);
+}
+
+#endif /* HAVE_LONG_LONG */
+
+/*
+ * Document-class: Integer
+ *
+ * <code>Integer</code> is the basis for the two concrete classes that
+ * hold whole numbers, <code>Bignum</code> and <code>Fixnum</code>.
+ *
+ */
+
+
+/* 15.2.8.3.14 */
+/* 15.2.8.3.24 */
+/* 15.2.8.3.26 */
+/*
+ * call-seq:
+ * int.to_i -> integer
+ * int.to_int -> integer
+ * int.floor -> integer
+ * int.ceil -> integer
+ * int.round -> integer
+ * int.truncate -> integer
+ *
+ * As <i>int</i> is already an <code>Integer</code>, all these
+ * methods simply return the receiver.
+ */
+
+static mrb_value
+int_to_i(mrb_state *mrb, mrb_value num)
+{
+ return num;
+}
+
+/* 15.2.8.3.21 */
+/*
+ * call-seq:
+ * fixnum.next -> integer
+ * fixnum.succ -> integer
+ *
+ * Returns the <code>Integer</code> equal to <i>int</i> + 1.
+ *
+ * 1.next #=> 2
+ * (-1).next #=> 0
+ */
+
+static mrb_value
+fix_succ(mrb_state *mrb, mrb_value num)
+{
+ long i = mrb_fixnum(num) + 1;
+ return mrb_fixnum_value(i);
+}
+
+/* 15.2.8.3.19 */
+/*
+ * call-seq:
+ * int.next -> integer
+ * int.succ -> integer
+ *
+ * Returns the <code>Integer</code> equal to <i>int</i> + 1.
+ *
+ * 1.next #=> 2
+ * (-1).next #=> 0
+ */
+static mrb_value
+int_succ(mrb_state *mrb, mrb_value num)
+{
+ if (FIXNUM_P(num)) {
+ long i = mrb_fixnum(num) + 1;
+ return mrb_fixnum_value(i);
+ }
+ return mrb_funcall(mrb, num, "+", 1, mrb_fixnum_value(1));
+}
+
+mrb_value
+rb_fix2str(mrb_state *mrb, mrb_value x, int base)
+{
+ extern const char ruby_digitmap[];
+ char buf[SIZEOF_VALUE*CHAR_BIT + 2], *b = buf + sizeof buf;
+ long val = mrb_fixnum(x);
+ int neg = 0;
+
+ if (base < 2 || 36 < base) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid radix %d", base);
+ }
+ if (val == 0) {
+ return mrb_usascii_str_new2(mrb, "0");
+ }
+ if (val < 0) {
+ val = -val;
+ neg = 1;
+ }
+ *--b = '\0';
+ do {
+ *--b = ruby_digitmap[(int)(val % base)];
+ } while (val /= base);
+ if (neg) {
+ *--b = '-';
+ }
+
+ return mrb_usascii_str_new2(mrb, b);
+}
+
+#define SQRT_LONG_MAX ((SIGNED_VALUE)1<<((SIZEOF_LONG*CHAR_BIT-1)/2))
+/*tests if N*N would overflow*/
+#define FIT_SQRT_LONG(n) (((n)<SQRT_LONG_MAX)&&((n)>=-SQRT_LONG_MAX))
+
+/* 15.2.8.3.3 */
+/*
+ * call-seq:
+ * fix * numeric -> numeric_result
+ *
+ * Performs multiplication: the class of the resulting object depends on
+ * the class of <code>numeric</code> and on the magnitude of the
+ * result.
+ */
+
+static mrb_value
+fix_mul(mrb_state *mrb, mrb_value x)
+{
+ mrb_value y;
+ mrb_get_args(mrb, "o", &y);
+
+ if (FIXNUM_P(y)) {
+#ifdef __HP_cc
+/* avoids an optimization bug of HP aC++/ANSI C B3910B A.06.05 [Jul 25 2005] */
+ volatile
+#endif
+ long a, b;
+#if SIZEOF_LONG * 2 <= SIZEOF_LONG_LONG
+ LONG_LONG d;
+#else
+ long c;
+ mrb_value r;
+#endif
+
+ a = mrb_fixnum(x);
+ b = mrb_fixnum(y);
+
+#if SIZEOF_LONG * 2 <= SIZEOF_LONG_LONG
+ d = (LONG_LONG)a * b;
+ if (FIXABLE(d)) return mrb_fixnum_value(d);
+ return mrb_nil_value();// rb_ll2inum(d);
+#else
+ if (FIT_SQRT_LONG(a) && FIT_SQRT_LONG(b))
+ return mrb_fixnum_value(a*b);
+ c = a * b;
+ r = mrb_fixnum_value(c);
+
+ if (a == 0) return x;
+ if (mrb_fixnum(r) != c || c/a != b) {
+ //r = mrb_big_mul(mrb_int2big(a), mrb_int2big(b));
+ r = mrb_fixnum_value(a*b);
+ }
+ return r;
+#endif
+ }
+ switch (mrb_type(y)) {
+ case MRB_TT_FLOAT:
+ return mrb_float_value((double)mrb_fixnum(x) * mrb_float(y));
+ default:
+ return mrb_num_coerce_bin(mrb, x, y, "*");
+ }
+}
+
+static void
+fixdivmod(mrb_state *mrb, long x, long y, long *divp, long *modp)
+{
+ long div, mod;
+
+ if (y == 0) mrb_num_zerodiv(mrb);
+ if (y < 0) {
+ if (x < 0)
+ div = -x / -y;
+ else
+ div = - (x / -y);
+ }
+ else {
+ if (x < 0)
+ div = - (-x / y);
+ else
+ div = x / y;
+ }
+ mod = x - div*y;
+ if ((mod < 0 && y > 0) || (mod > 0 && y < 0)) {
+ mod += y;
+ div -= 1;
+ }
+ if (divp) *divp = div;
+ if (modp) *modp = mod;
+}
+
+mrb_value rb_big_fdiv(mrb_value x, mrb_value y);
+
+//mrb_value mrb_rational_reciprocal(mrb_value x);
+
+static mrb_value
+fix_divide(mrb_state *mrb, mrb_value x, mrb_value y, char* op)
+{
+ if (FIXNUM_P(y)) {
+ long div;
+
+ fixdivmod(mrb, mrb_fixnum(x), mrb_fixnum(y), &div, 0);
+ return mrb_fixnum_value(div);
+ }
+ switch (mrb_type(y)) {
+ case MRB_TT_FLOAT:
+ {
+ double div;
+
+ if (*op == '/') {
+ div = (double)mrb_fixnum(x) / mrb_float(y);
+ return mrb_float_value(div);
+ }
+ else {
+ if (mrb_float(y) == 0) mrb_num_zerodiv(mrb);
+ div = (double)mrb_fixnum(x) / mrb_float(y);
+ return mrb_dbl2big(mrb, floor(div));
+ }
+ }
+ //case MRB_TT_RATIONAL:
+ // if (op == '/' && mrb_fixnum(x) == 1)
+ // return mrb_rational_reciprocal(y);
+ /* fall through */
+ default:
+ return mrb_num_coerce_bin(mrb, x, y, op);
+ }
+}
+
+/* 15.2.8.3.4 */
+/*
+ * call-seq:
+ * fix / numeric -> numeric_result
+ *
+ * Performs division: the class of the resulting object depends on
+ * the class of <code>numeric</code> and on the magnitude of the
+ * result.
+ */
+
+static mrb_value
+fix_div(mrb_state *mrb, mrb_value x)
+{
+ mrb_value y;
+ mrb_get_args(mrb, "o", &y);
+
+ return fix_divide(mrb, x, y, "/");
+}
+
+/* 15.2.8.3.5 */
+/*
+ * call-seq:
+ * fix % other -> real
+ * fix.modulo(other) -> real
+ *
+ * Returns <code>fix</code> modulo <code>other</code>.
+ * See <code>numeric.divmod</code> for more information.
+ */
+
+static mrb_value
+fix_mod(mrb_state *mrb, mrb_value x)
+{
+ mrb_value y;
+ mrb_get_args(mrb, "o", &y);
+
+ if (FIXNUM_P(y)) {
+ long mod;
+
+ fixdivmod(mrb, mrb_fixnum(x), mrb_fixnum(y), 0, &mod);
+ return mrb_fixnum_value(mod);
+ }
+ switch (mrb_type(y)) {
+ case MRB_TT_FLOAT:
+ {
+ double mod;
+
+ flodivmod(mrb, (double)mrb_fixnum(x), mrb_float(y), 0, &mod);
+ return mrb_float_value(mod);
+ }
+ default:
+ return mrb_num_coerce_bin(mrb, x, y, "%");
+ }
+}
+
+/*
+ * call-seq:
+ * fix.divmod(numeric) -> array
+ *
+ * See <code>Numeric#divmod</code>.
+ */
+static mrb_value
+fix_divmod(mrb_state *mrb, mrb_value x)
+{
+ mrb_value y;
+ mrb_get_args(mrb, "o", &y);
+
+ if (FIXNUM_P(y)) {
+ long div, mod;
+
+ fixdivmod(mrb, mrb_fixnum(x), mrb_fixnum(y), &div, &mod);
+
+ return mrb_assoc_new(mrb, mrb_fixnum_value(div), mrb_fixnum_value(mod));
+ }
+ switch (mrb_type(y)) {
+ case MRB_TT_FLOAT:
+ {
+ double div, mod;
+ volatile mrb_value a, b;
+
+ flodivmod(mrb, (double)mrb_fixnum(x), mrb_float(y), &div, &mod);
+ a = dbl2ival(div);
+ b = mrb_float_value(mod);
+ return mrb_assoc_new(mrb, a, b);
+ }
+ default:
+ return mrb_num_coerce_bin(mrb, x, y, "divmod");
+ }
+}
+
+/* 15.2.8.3.7 */
+/*
+ * call-seq:
+ * fix == other -> true or false
+ *
+ * Return <code>true</code> if <code>fix</code> equals <code>other</code>
+ * numerically.
+ *
+ * 1 == 2 #=> false
+ * 1 == 1.0 #=> true
+ */
+
+static mrb_value
+fix_equal(mrb_state *mrb, mrb_value x)
+{
+ mrb_value y;
+ mrb_get_args(mrb, "o", &y);
+
+ if (mrb_obj_equal(mrb, x, y)) return mrb_true_value();
+ if (FIXNUM_P(y)) return mrb_false_value();
+ switch (mrb_type(y)) {
+ case MRB_TT_FLOAT:
+ return (double)mrb_fixnum(x) == mrb_float(y) ? mrb_true_value() : mrb_false_value();
+ default:
+ return num_equal(mrb, x, y);
+ }
+}
+
+/* 15.2.8.3.8 */
+/*
+ * call-seq:
+ * ~fix -> integer
+ *
+ * One's complement: returns a number where each bit is flipped.
+ * ex.0---00001 (1)-> 1---11110 (-2)
+ * ex.0---00010 (2)-> 1---11101 (-3)
+ * ex.0---00100 (4)-> 1---11011 (-5)
+ */
+
+static mrb_value
+fix_rev(mrb_state *mrb, mrb_value num)
+{
+ long val = mrb_fixnum(num);
+
+ val = ~val;
+ return mrb_fixnum_value(val);
+}
+
+static mrb_value
+bit_coerce(mrb_state *mrb, mrb_value x)
+{
+ while (!FIXNUM_P(x)) {
+ if (mrb_type(x) == MRB_TT_FLOAT) {
+ mrb_raise(mrb, E_TYPE_ERROR, "can't convert Float into Integer");
+ }
+ x = mrb_to_int(mrb, x);
+ }
+ return x;
+}
+
+/* 15.2.8.3.9 */
+/*
+ * call-seq:
+ * fix & integer -> integer_result
+ *
+ * Bitwise AND.
+ */
+
+static mrb_value
+fix_and(mrb_state *mrb, mrb_value x)
+{
+ mrb_value y;
+ long val;
+ mrb_get_args(mrb, "o", &y);
+
+ //if (!FIXNUM_P(y = bit_coerce(mrb, y))) {
+ // return mrb_big_and(y, x);
+ //}
+ if (mrb_type(y) == MRB_TT_FLOAT) {
+ mrb_raise(mrb, E_TYPE_ERROR, "can't convert Float into Integer");
+ }
+ y = bit_coerce(mrb, y);
+ val = mrb_fixnum(x) & mrb_fixnum(y);
+ return mrb_fixnum_value(val);
+}
+
+/* 15.2.8.3.10 */
+/*
+ * call-seq:
+ * fix | integer -> integer_result
+ *
+ * Bitwise OR.
+ */
+
+static mrb_value
+fix_or(mrb_state *mrb, mrb_value x)
+{
+ mrb_value y;
+ long val;
+ mrb_get_args(mrb, "o", &y);
+
+ //if (!FIXNUM_P(y = bit_coerce(mrb, y))) {
+ // return mrb_big_or(y, x);
+ //}
+ if (mrb_type(y) == MRB_TT_FLOAT) {
+ mrb_raise(mrb, E_TYPE_ERROR, "can't convert Float into Integer");
+ }
+ y = bit_coerce(mrb, y);
+ val = mrb_fixnum(x) | mrb_fixnum(y);
+ return mrb_fixnum_value(val);
+}
+
+/* 15.2.8.3.11 */
+/*
+ * call-seq:
+ * fix ^ integer -> integer_result
+ *
+ * Bitwise EXCLUSIVE OR.
+ */
+
+static mrb_value
+fix_xor(mrb_state *mrb, mrb_value x)
+{
+ mrb_value y;
+ long val;
+ mrb_get_args(mrb, "o", &y);
+
+ //if (!FIXNUM_P(y = bit_coerce(mrb, y))) {
+ // return mrb_big_xor(y, x);
+ //}
+ if (mrb_type(y) == MRB_TT_FLOAT) {
+ mrb_raise(mrb, E_TYPE_ERROR, "can't convert Float into Integer");
+ }
+ y = bit_coerce(mrb, y);
+ val = mrb_fixnum(x) ^ mrb_fixnum(y);
+ return mrb_fixnum_value(val);
+}
+
+static mrb_value fix_lshift(mrb_state *mrb, long, unsigned long);
+static mrb_value fix_rshift(long, unsigned long);
+
+/* 15.2.8.3.12 */
+/*
+ * call-seq:
+ * fix << count -> integer
+ *
+ * Shifts _fix_ left _count_ positions (right if _count_ is negative).
+ */
+
+static mrb_value
+mrb_fix_lshift(mrb_state *mrb, mrb_value x)
+{
+ mrb_value y;
+ long val, width;
+ mrb_get_args(mrb, "o", &y);
+
+ val = mrb_fixnum(x);
+ //if (!FIXNUM_P(y))
+ // return mrb_big_lshift(mrb_int2big(val), y);
+ if (mrb_type(y) == MRB_TT_FLOAT) {
+ mrb_raise(mrb, E_TYPE_ERROR, "can't convert Float into Integer");
+ }
+ width = mrb_fixnum(y);
+ if (width < 0)
+ return fix_rshift(val, (unsigned long)-width);
+ return fix_lshift(mrb, val, width);
+}
+
+static mrb_value
+fix_lshift(mrb_state *mrb, long val, unsigned long width)
+{
+ if (width > (SIZEOF_LONG*CHAR_BIT-1)
+ || ((unsigned long)abs(val))>>(SIZEOF_LONG*CHAR_BIT-1-width) > 0) {
+ mrb_raise(mrb, E_RANGE_ERROR, "width(%d) > (SIZEOF_LONG*CHAR_BIT-1)", width);
+ }
+ val = val << width;
+ return mrb_fixnum_value(val);
+}
+
+/* 15.2.8.3.13 */
+/*
+ * call-seq:
+ * fix >> count -> integer
+ *
+ * Shifts _fix_ right _count_ positions (left if _count_ is negative).
+ */
+
+static mrb_value
+mrb_fix_rshift(mrb_state *mrb, mrb_value x)
+{
+ mrb_value y;
+ long i, val;
+ mrb_get_args(mrb, "o", &y);
+
+ val = mrb_fixnum(x);
+ //if (!FIXNUM_P(y))
+ // return mrb_big_rshift(mrb_int2big(val), y);
+ i = mrb_fixnum(y);
+ if (i == 0) return x;
+ if (i < 0)
+ return fix_lshift(mrb, val, (unsigned long)-i);
+ return fix_rshift(val, i);
+}
+
+static mrb_value
+fix_rshift(long val, unsigned long i)
+{
+ if (i >= sizeof(long)*CHAR_BIT-1) {
+ if (val < 0) return mrb_fixnum_value(-1);
+ return mrb_fixnum_value(0);
+ }
+ val = RSHIFT(val, i);
+ return mrb_fixnum_value(val);
+}
+
+/* 15.2.8.3.23 */
+/*
+ * call-seq:
+ * fix.to_f -> float
+ *
+ * Converts <i>fix</i> to a <code>Float</code>.
+ *
+ */
+
+static mrb_value
+fix_to_f(mrb_state *mrb, mrb_value num)
+{
+ double val;
+
+ val = (double)mrb_fixnum(num);
+
+ return mrb_float_value(val);
+}
+
+/*
+ * Document-class: ZeroDivisionError
+ *
+ * Raised when attempting to divide an integer by 0.
+ *
+ * 42 / 0
+ *
+ * <em>raises the exception:</em>
+ *
+ * ZeroDivisionError: divided by 0
+ *
+ * Note that only division by an exact 0 will raise that exception:
+ *
+ * 42 / 0.0 #=> Float::INFINITY
+ * 42 / -0.0 #=> -Float::INFINITY
+ * 0 / 0.0 #=> NaN
+ */
+
+/*
+ * Document-class: FloatDomainError
+ *
+ * Raised when attempting to convert special float values
+ * (in particular infinite or NaN)
+ * to numerical classes which don't support them.
+ *
+ * Float::INFINITY.to_r
+ *
+ * <em>raises the exception:</em>
+ *
+ * FloatDomainError: Infinity
+ */
+/* ------------------------------------------------------------------------*/
+static mrb_int
+dbl2big(mrb_state *mrb, float d)
+{
+ //long i = 0;
+ //BDIGIT c;
+ //BDIGIT *digits;
+ mrb_int z;
+ //double u = (d < 0)?-d:d;
+
+ if (isinf(d)) {
+ mrb_raise(mrb, E_FLOATDOMAIN_ERROR, d < 0 ? "-Infinity" : "Infinity");
+ }
+ if (isnan(d)) {
+ mrb_raise(mrb, E_FLOATDOMAIN_ERROR, "NaN");
+ }
+ z = (mrb_int)d;
+ return z;
+}
+
+mrb_value
+mrb_dbl2big(mrb_state *mrb, float d)
+{
+ return mrb_fixnum_value(dbl2big(mrb, d));//bignorm(dbl2big(d));
+}
+
+/* 15.2.8.3.1 */
+/*
+ * call-seq:
+ * fix + numeric -> numeric_result
+ *
+ * Performs addition: the class of the resulting object depends on
+ * the class of <code>numeric</code> and on the magnitude of the
+ * result.
+ */
+static mrb_value
+mrb_fixnum_plus(mrb_state *mrb, mrb_value self)
+{
+ mrb_int x, y;
+
+ x = mrb_fixnum(self);
+ mrb_get_args(mrb, "i", &y);
+
+ DEBUG(printf("%d + %d = %d\n", x, y, x+y));
+ return mrb_fixnum_value(x + y);
+}
+
+/* 15.2.8.3.2 */
+/* 15.2.8.3.16 */
+/*
+ * call-seq:
+ * fix - numeric -> numeric_result
+ *
+ * Performs subtraction: the class of the resulting object depends on
+ * the class of <code>numeric</code> and on the magnitude of the
+ * result.
+ */
+static mrb_value
+mrb_fixnum_minus(mrb_state *mrb, mrb_value self)
+{
+ mrb_int x, y;
+
+ x = mrb_fixnum(self);
+ mrb_get_args(mrb, "i", &y);
+
+ DEBUG(printf("%d - %d = %d\n", x, y, x-y));
+ return mrb_fixnum_value(x - y);
+}
+
+/* 15.2.8.3.6 */
+/*
+ * call-seq:
+ * self.i <=> other.i => -1, 0, +1
+ * < => -1
+ * = => 0
+ * > => +1
+ * Comparison---Returns -1, 0, or +1 depending on whether <i>fix</i> is
+ * less than, equal to, or greater than <i>numeric</i>. This is the
+ * basis for the tests in <code>Comparable</code>.
+ */
+static mrb_value
+mrb_fixnum_cmp(mrb_state *mrb, mrb_value self)
+{
+ mrb_int x, y;
+ mrb_value vy;
+
+ mrb_get_args(mrb, "o", &vy);
+ if (FIXNUM_P(vy)) {
+ x = mrb_fixnum(self);
+ y = mrb_fixnum(vy);
+ DEBUG(printf("%d <=> %d\n", x, y));
+ if (x > y)
+ return mrb_fixnum_value(1);
+ else if (x < y)
+ return mrb_fixnum_value(-1);
+ else
+ return mrb_fixnum_value(0);
+ }
+ else {
+ return mrb_num_coerce_cmp(mrb, self, vy, "<=>");
+ }
+
+}
+
+/* 15.2.8.3.29 (x) */
+/*
+ * call-seq:
+ * fix > other => true or false
+ *
+ * Returns <code>true</code> if the value of <code>fix</code> is
+ * greater than that of <code>other</code>.
+ */
+
+mrb_value
+mrb_fix2str(mrb_state *mrb, mrb_value x, int base)
+{
+ char buf[64], *b = buf + sizeof buf;
+ long val = mrb_fixnum(x);
+ int neg = 0;
+
+ if (base < 2 || 36 < base) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid radix %d", base);
+ }
+ if (val == 0) {
+ return mrb_str_new2(mrb, "0");
+ }
+ if (val < 0) {
+ val = -val;
+ neg = 1;
+ }
+ *--b = '\0';
+ do {
+ *--b = ruby_digitmap[(int)(val % base)];
+ } while (val /= base);
+ if (neg) {
+ *--b = '-';
+ }
+
+ return mrb_str_new2(mrb, b);
+}
+
+mrb_value
+mrb_fix_to_s(mrb_state *mrb, mrb_value self, int argc, mrb_value *argv)
+{
+ int base;
+
+ if (argc == 0) base = 10;
+ else {
+ //mrb_value b;
+
+ //mrb_scan_args(argc, argv, "01", &b);
+ base = mrb_fixnum(argv[0]);
+ }
+
+ return mrb_fix2str(mrb, self, base);
+}
+
+/* 15.2.8.3.25 */
+/*
+ * call-seq:
+ * fix.to_s(base=10) -> string
+ *
+ * Returns a string containing the representation of <i>fix</i> radix
+ * <i>base</i> (between 2 and 36).
+ *
+ * 12345.to_s #=> "12345"
+ * 12345.to_s(2) #=> "11000000111001"
+ * 12345.to_s(8) #=> "30071"
+ * 12345.to_s(10) #=> "12345"
+ * 12345.to_s(16) #=> "3039"
+ * 12345.to_s(36) #=> "9ix"
+ *
+ */
+static mrb_value
+mrb_fixnum_to_s(mrb_state *mrb, mrb_value self) /* fix_to_s */
+{
+ mrb_value *argv;
+ int argc;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+ return mrb_fix_to_s(mrb, self, argc, argv);
+}
+
+/* 15.2.9.3.6 */
+/*
+ * call-seq:
+ * self.f <=> other.f => -1, 0, +1
+ * < => -1
+ * = => 0
+ * > => +1
+ * Comparison---Returns -1, 0, or +1 depending on whether <i>fix</i> is
+ * less than, equal to, or greater than <i>numeric</i>. This is the
+ * basis for the tests in <code>Comparable</code>.
+ */
+static mrb_value
+mrb_float_cmp(mrb_state *mrb, mrb_value self)
+{
+ mrb_value vy;
+ mrb_float x, y;
+
+ x = mrb_float(self);
+ mrb_get_args(mrb, "o", &vy);
+ if (FIXNUM_P(vy)) {
+ y = (mrb_float)mrb_fixnum(vy);
+ }
+ else {
+ y = mrb_float(vy);
+ }
+
+ DEBUG(printf("%f <=> %f\n", x, y));
+ if (x > y)
+ return mrb_fixnum_value(1);
+ else {
+ if (x < y)
+ return mrb_fixnum_value(-1);
+ return mrb_fixnum_value(0);
+ }
+}
+
+/* 15.2.9.3.1 */
+/*
+ * call-seq:
+ * float + other -> float
+ *
+ * Returns a new float which is the sum of <code>float</code>
+ * and <code>other</code>.
+ */
+static mrb_value
+mrb_float_plus(mrb_state *mrb, mrb_value self)
+{
+ mrb_float x, y;
+
+ x = mrb_float(self);
+ mrb_get_args(mrb, "f", &y);
+
+ return mrb_float_value(x + y);
+}
+/* ------------------------------------------------------------------------*/
+void
+mrb_init_numeric(mrb_state *mrb)
+{
+ struct RClass *numeric, *integer, *fixnum, *fl;
+ /* Numeric Class */
+ numeric = mrb_define_class(mrb, "Numeric", mrb->object_class);
+ mrb_include_module(mrb, numeric, mrb_class_get(mrb, "Comparable"));
+
+ mrb_define_method(mrb, numeric, "+@", num_uplus, ARGS_REQ(1)); /* 15.2.7.4.1 */
+ mrb_define_method(mrb, numeric, "-@", num_uminus, ARGS_REQ(1)); /* 15.2.7.4.2 */
+ mrb_define_method(mrb, numeric, "abs", num_abs, ARGS_NONE()); /* 15.2.7.4.3 */
+ mrb_define_method(mrb, numeric, "coerce", num_coerce, ARGS_REQ(1)); /* 15.2.7.4.4 */
+ mrb_define_method(mrb, numeric, "quo", num_quo, ARGS_REQ(1)); /* 15.2.7.4.5 (x) */
+
+ /* Integer Class */
+ integer = mrb_define_class(mrb, "Integer", numeric);
+ fixnum = mrb->fixnum_class = mrb_define_class(mrb, "Fixnum", integer);
+
+ mrb_define_method(mrb, fixnum, "+", mrb_fixnum_plus, ARGS_REQ(1)); /* 15.2.8.3.1 */
+ mrb_define_method(mrb, fixnum, "-", mrb_fixnum_minus, ARGS_REQ(1)); /* 15.2.8.3.2 */
+ mrb_define_method(mrb, fixnum, "*", fix_mul, ARGS_REQ(1)); /* 15.2.8.3.3 */
+ mrb_define_method(mrb, fixnum, "/", fix_div, ARGS_REQ(1)); /* 15.2.8.3.4 */
+ mrb_define_method(mrb, fixnum, "%", fix_mod, ARGS_REQ(1)); /* 15.2.8.3.5 */
+ mrb_define_method(mrb, fixnum, "<=>", mrb_fixnum_cmp, ARGS_REQ(1)); /* 15.2.8.3.6 */
+ mrb_define_method(mrb, fixnum, "==", fix_equal, ARGS_REQ(1)); /* 15.2.8.3.7 */
+ mrb_define_method(mrb, fixnum, "~", fix_rev, ARGS_NONE()); /* 15.2.8.3.8 */
+ mrb_define_method(mrb, fixnum, "&", fix_and, ARGS_REQ(1)); /* 15.2.8.3.9 */
+ mrb_define_method(mrb, fixnum, "|", fix_or, ARGS_REQ(1)); /* 15.2.8.3.10 */
+ mrb_define_method(mrb, fixnum, "^", fix_xor, ARGS_REQ(1)); /* 15.2.8.3.11 */
+ mrb_define_method(mrb, fixnum, "<<", mrb_fix_lshift, ARGS_REQ(1)); /* 15.2.8.3.12 */
+ mrb_define_method(mrb, fixnum, ">>", mrb_fix_rshift, ARGS_REQ(1)); /* 15.2.8.3.13 */
+ mrb_define_method(mrb, fixnum, "ceil", int_to_i, ARGS_NONE()); /* 15.2.8.3.14 */
+ mrb_define_method(mrb, fixnum, "eql?", num_eql, ARGS_REQ(1)); /* 15.2.8.3.16 */
+ mrb_define_method(mrb, fixnum, "floor", num_floor, ARGS_NONE()); /* 15.2.8.3.17 */
+ mrb_define_method(mrb, fixnum, "hash", flo_hash, ARGS_NONE()); /* 15.2.8.3.18 */
+ mrb_define_method(mrb, fixnum, "next", int_succ, ARGS_NONE()); /* 15.2.8.3.19 */
+ mrb_define_method(mrb, fixnum, "round", num_round, ARGS_ANY()); /* 15.2.8.3.20 */
+ mrb_define_method(mrb, fixnum, "succ", fix_succ, ARGS_NONE()); /* 15.2.8.3.21 */
+ mrb_define_method(mrb, fixnum, "to_f", fix_to_f, ARGS_NONE()); /* 15.2.8.3.23 */
+ mrb_define_method(mrb, fixnum, "to_i", int_to_i, ARGS_NONE()); /* 15.2.8.3.24 */
+ mrb_define_method(mrb, fixnum, "to_s", mrb_fixnum_to_s, ARGS_NONE()); /* 15.2.8.3.25 */
+ mrb_define_method(mrb, fixnum, "truncate", int_to_i, ARGS_NONE()); /* 15.2.8.3.26 */
+ //mrb_define_method(mrb, fixnum, "<", mrb_fixnum_lt, ARGS_REQ(1)); /* 15.2.8.3.28 (x) */
+ //mrb_define_method(mrb, fixnum, ">", mrb_fixnum_gt, ARGS_REQ(1)); /* 15.2.8.3.29 (x) */
+ mrb_define_method(mrb, fixnum, "divmod", fix_divmod, ARGS_REQ(1)); /* 15.2.8.3.30 (x) */
+
+ /* Float Class */
+ fl = mrb->float_class = mrb_define_class(mrb, "Float", numeric);
+ mrb_define_method(mrb, fl, "+", mrb_float_plus, ARGS_REQ(1)); /* 15.2.9.3.1 */
+ mrb_define_method(mrb, fl, "-", flo_minus, ARGS_REQ(1)); /* 15.2.9.3.2 */
+ mrb_define_method(mrb, fl, "*", flo_mul, ARGS_REQ(1)); /* 15.2.9.3.3 */
+ mrb_define_method(mrb, fl, "/", flo_div, ARGS_REQ(1)); /* 15.2.9.3.4 */
+ mrb_define_method(mrb, fl, "%", flo_mod, ARGS_REQ(1)); /* 15.2.9.3.5 */
+ mrb_define_method(mrb, fl, "<=>", mrb_float_cmp, ARGS_REQ(1)); /* 15.2.9.3.6 */
+ mrb_define_method(mrb, fl, "==", flo_eq, ARGS_REQ(1)); /* 15.2.9.3.7 */
+ mrb_define_method(mrb, fl, "ceil", flo_ceil, ARGS_NONE()); /* 15.2.9.3.8 */
+ mrb_define_method(mrb, fl, "finite?", flo_is_finite_p, ARGS_NONE()); /* 15.2.9.3.9 */
+ mrb_define_method(mrb, fl, "floor", flo_floor, ARGS_NONE()); /* 15.2.9.3.10 */
+ mrb_define_method(mrb, fl, "infinite?", flo_is_infinite_p,ARGS_NONE()); /* 15.2.9.3.11 */
+ mrb_define_method(mrb, fl, "round", flo_round, ARGS_ANY()); /* 15.2.9.3.12 */
+ mrb_define_method(mrb, fl, "to_f", flo_to_f, ARGS_NONE()); /* 15.2.9.3.13 */
+ mrb_define_method(mrb, fl, "to_i", flo_truncate, ARGS_NONE()); /* 15.2.9.3.14 */
+ mrb_define_method(mrb, fl, "truncate", flo_truncate, ARGS_NONE()); /* 15.2.9.3.15 */
+
+ mrb_define_method(mrb, fl, "to_s", flo_to_s, ARGS_NONE()); /* 15.2.9.3.16(x) */
+ //mrb_define_method(mrb, fl, "<", flo_lt, ARGS_REQ(1)); /* 15.2.9.3.17(x) */
+ //mrb_define_method(mrb, fl, ">", flo_gt, ARGS_REQ(1)); /* 15.2.9.3.18(x) */
+ mrb_define_method(mrb, fl, "quo", flo_quo, ARGS_REQ(1)); /* 15.2.9.3.19(x) */
+}
diff --git a/src/object.c b/src/object.c
new file mode 100644
index 000000000..c60c2fb7b
--- /dev/null
+++ b/src/object.c
@@ -0,0 +1,632 @@
+#include "mruby.h"
+#include <string.h>
+#include "mruby/string.h"
+#include <stdio.h>
+#include "mruby/class.h"
+#include "method.h"
+#include "mruby/numeric.h"
+#include "mdata.h"
+
+#ifdef INCLUDE_REGEXP
+ #define mrb_usascii_str_new2 mrb_usascii_str_new_cstr
+#else
+ #define mrb_usascii_str_new2 mrb_str_new_cstr
+ #define mrb_usascii_str_new mrb_str_new
+#endif
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+int
+mrb_obj_eq(mrb_state *mrb, mrb_value v1, mrb_value v2)
+{
+ if (v1.tt != v2.tt) return 0;
+ switch (v1.tt) {
+ case MRB_TT_TRUE:
+ return 1;
+
+ case MRB_TT_FALSE:
+ case MRB_TT_FIXNUM:
+ return (v1.value.i == v2.value.i);
+ case MRB_TT_SYMBOL:
+ return (v1.value.sym == v2.value.sym);
+
+ case MRB_TT_FLOAT:
+ return (v1.value.f == v2.value.f);
+
+ default:
+ return (v1.value.p == v2.value.p);
+ }
+}
+
+int
+mrb_obj_equal(mrb_state *mrb, mrb_value v1, mrb_value v2)
+{
+ /* temporary definition */
+ return mrb_obj_eq(mrb, v1, v2);
+}
+
+int
+mrb_equal(mrb_state *mrb, mrb_value obj1, mrb_value obj2)
+{
+ mrb_value result;
+
+ if (mrb_obj_eq(mrb, obj1, obj2)) return TRUE;
+ result = mrb_funcall(mrb, obj1, "==", 1, obj2);
+ if (mrb_nil_p(result)) {
+ return FALSE;
+ }
+ else {
+ if (mrb_type(result) == MRB_TT_TRUE) {
+ return TRUE;
+ }
+ else {
+ return FALSE;
+ }
+ }
+}
+
+/*
+ * Document-class: NilClass
+ *
+ * The class of the singleton object <code>nil</code>.
+ */
+
+/* 15.2.4.3.4 */
+/*
+ * call_seq:
+ * nil.nil? -> true
+ *
+ * Only the object <i>nil</i> responds <code>true</code> to <code>nil?</code>.
+ */
+
+static mrb_value
+mrb_true(mrb_state *mrb, mrb_value obj)
+{
+ return mrb_true_value();
+}
+
+/* 15.2.4.3.5 */
+/*
+ * call-seq:
+ * nil.to_s -> ""
+ *
+ * Always returns the empty string.
+ */
+
+static mrb_value
+nil_to_s(mrb_state *mrb, mrb_value obj)
+{
+ return mrb_usascii_str_new(mrb, 0, 0);
+}
+
+/***********************************************************************
+ * Document-class: TrueClass
+ *
+ * The global value <code>true</code> is the only instance of class
+ * <code>TrueClass</code> and represents a logically true value in
+ * boolean expressions. The class provides operators allowing
+ * <code>true</code> to be used in logical expressions.
+ */
+
+/* 15.2.5.3.1 */
+/*
+ * call-seq:
+ * true & obj -> true or false
+ *
+ * And---Returns <code>false</code> if <i>obj</i> is
+ * <code>nil</code> or <code>false</code>, <code>true</code> otherwise.
+ */
+
+static mrb_value
+true_and(mrb_state *mrb, mrb_value obj)
+{
+ mrb_value obj2;
+
+ mrb_get_args(mrb, "o", &obj2);
+ return mrb_test(obj2)?mrb_true_value():mrb_false_value();
+}
+
+/* 15.2.5.3.2 */
+/*
+ * call-seq:
+ * true ^ obj -> !obj
+ *
+ * Exclusive Or---Returns <code>true</code> if <i>obj</i> is
+ * <code>nil</code> or <code>false</code>, <code>false</code>
+ * otherwise.
+ */
+
+static mrb_value
+true_xor(mrb_state *mrb, mrb_value obj)
+{
+ mrb_value obj2;
+
+ mrb_get_args(mrb, "o", &obj2);
+ return mrb_test(obj2)?mrb_false_value():mrb_true_value();
+}
+
+/* 15.2.5.3.3 */
+/*
+ * call-seq:
+ * true.to_s -> "true"
+ *
+ * The string representation of <code>true</code> is "true".
+ */
+
+static mrb_value
+true_to_s(mrb_state *mrb, mrb_value obj)
+{
+ return mrb_usascii_str_new2(mrb, "true");
+}
+
+/* 15.2.5.3.4 */
+/*
+ * call-seq:
+ * true | obj -> true
+ *
+ * Or---Returns <code>true</code>. As <i>anObject</i> is an argument to
+ * a method call, it is always evaluated; there is no short-circuit
+ * evaluation in this case.
+ *
+ * true | puts("or")
+ * true || puts("logical or")
+ *
+ * <em>produces:</em>
+ *
+ * or
+ */
+
+static mrb_value
+true_or(mrb_state *mrb, mrb_value obj)
+{
+ mrb_value obj2;
+
+ mrb_get_args(mrb, "o", &obj2);
+ return mrb_true_value();
+}
+
+/*
+ * Document-class: FalseClass
+ *
+ * The global value <code>false</code> is the only instance of class
+ * <code>FalseClass</code> and represents a logically false value in
+ * boolean expressions. The class provides operators allowing
+ * <code>false</code> to participate correctly in logical expressions.
+ *
+ */
+
+/* 15.2.4.3.1 */
+/* 15.2.6.3.1 */
+/*
+ * call-seq:
+ * false & obj -> false
+ * nil & obj -> false
+ *
+ * And---Returns <code>false</code>. <i>obj</i> is always
+ * evaluated as it is the argument to a method call---there is no
+ * short-circuit evaluation in this case.
+ */
+
+static mrb_value
+false_and(mrb_state *mrb, mrb_value obj)
+{
+ mrb_value obj2;
+
+ mrb_get_args(mrb, "o", &obj2);
+ return mrb_false_value();
+}
+
+/* 15.2.4.3.2 */
+/* 15.2.6.3.2 */
+/*
+ * call-seq:
+ * false ^ obj -> true or false
+ * nil ^ obj -> true or false
+ *
+ * Exclusive Or---If <i>obj</i> is <code>nil</code> or
+ * <code>false</code>, returns <code>false</code>; otherwise, returns
+ * <code>true</code>.
+ *
+ */
+
+static mrb_value
+false_xor(mrb_state *mrb, mrb_value obj)
+{
+ mrb_value obj2;
+
+ mrb_get_args(mrb, "o", &obj2);
+ return mrb_test(obj2)?mrb_true_value():mrb_false_value();
+}
+
+/* 15.2.4.3.3 */
+/* 15.2.6.3.4 */
+/*
+ * call-seq:
+ * false | obj -> true or false
+ * nil | obj -> true or false
+ *
+ * Or---Returns <code>false</code> if <i>obj</i> is
+ * <code>nil</code> or <code>false</code>; <code>true</code> otherwise.
+ */
+
+static mrb_value
+false_or(mrb_state *mrb, mrb_value obj)
+{
+ mrb_value obj2;
+
+ mrb_get_args(mrb, "o", &obj2);
+ return mrb_test(obj2)?mrb_true_value():mrb_false_value();
+}
+
+/* 15.2.6.3.3 */
+/*
+ * call-seq:
+ * false.to_s -> "false"
+ *
+ * 'nuf said...
+ */
+
+static mrb_value
+false_to_s(mrb_state *mrb, mrb_value obj)
+{
+ return mrb_usascii_str_new2(mrb, "false");
+}
+
+void
+mrb_init_object(mrb_state *mrb)
+{
+ struct RClass *n;
+ struct RClass *t;
+ struct RClass *f;
+
+ n = mrb->nil_class = mrb_define_class(mrb, "NilClass", mrb->object_class);
+ mrb_define_method(mrb, n, "&", false_and, ARGS_REQ(1)); /* 15.2.4.3.1 */
+ mrb_define_method(mrb, n, "^", false_xor, ARGS_REQ(1)); /* 15.2.4.3.2 */
+ mrb_define_method(mrb, n, "|", false_or, ARGS_REQ(1)); /* 15.2.4.3.3 */
+ mrb_define_method(mrb, n, "nil?", mrb_true, ARGS_NONE()); /* 15.2.4.3.4 */
+ mrb_define_method(mrb, n, "to_s", nil_to_s, ARGS_NONE()); /* 15.2.4.3.5 */
+
+ t = mrb->true_class = mrb_define_class(mrb, "TrueClass", mrb->object_class);
+ mrb_define_method(mrb, t, "&", true_and, ARGS_REQ(1)); /* 15.2.5.3.1 */
+ mrb_define_method(mrb, t, "^", true_xor, ARGS_REQ(1)); /* 15.2.5.3.2 */
+ mrb_define_method(mrb, t, "to_s", true_to_s, ARGS_NONE()); /* 15.2.5.3.3 */
+ mrb_define_method(mrb, t, "|", true_or, ARGS_REQ(1)); /* 15.2.5.3.4 */
+
+ f = mrb->false_class = mrb_define_class(mrb, "FalseClass", mrb->object_class);
+ mrb_define_method(mrb, f, "&", false_and, ARGS_REQ(1)); /* 15.2.6.3.1 */
+ mrb_define_method(mrb, f, "^", false_xor, ARGS_REQ(1)); /* 15.2.6.3.2 */
+ mrb_define_method(mrb, f, "to_s", false_to_s, ARGS_NONE()); /* 15.2.6.3.3 */
+ mrb_define_method(mrb, f, "|", false_or, ARGS_REQ(1)); /* 15.2.6.3.4 */
+}
+
+mrb_value
+convert_type(mrb_state *mrb, mrb_value val, const char *tname, const char *method, int raise)
+{
+ mrb_sym m = 0;
+
+ m = mrb_intern(mrb, method);
+ if (!mrb_respond_to(mrb, val, m)) {
+ if (raise) {
+ mrb_raise(mrb, E_TYPE_ERROR, "can't convert %s into %s",
+ mrb_nil_p(val) ? "nil" :
+ (mrb_type(val) == MRB_TT_TRUE) ? "true" :
+ (mrb_type(val) == MRB_TT_FALSE) ? "false" :
+ mrb_obj_classname(mrb, val),
+ tname);
+ return mrb_nil_value();
+ }
+ else {
+ return mrb_nil_value();
+ }
+ }
+ return mrb_funcall(mrb, val, method, 0);
+}
+
+mrb_value
+mrb_check_to_integer(mrb_state *mrb, mrb_value val, const char *method)
+{
+ mrb_value v;
+
+ if (mrb_type(val) == MRB_TT_FIXNUM) return val;
+ v = convert_type(mrb, val, "Integer", method, FALSE);
+ if (mrb_nil_p(v)) return (v);
+ if (!mrb_obj_is_kind_of(mrb, v, mrb_obj_class(mrb, v))) {
+ return mrb_nil_value();
+ }
+ return v;
+}
+
+mrb_value
+mrb_convert_type(mrb_state *mrb, mrb_value val, mrb_int type, const char *tname, const char *method)
+{
+ mrb_value v;
+
+ if (mrb_type(val) == type) return val;
+ v = convert_type(mrb, val, tname, method, 1/*Qtrue*/);
+ if (mrb_type(v) != type) {
+ mrb_raise(mrb, E_TYPE_ERROR, "%s#%s should return %s",
+ mrb_obj_classname(mrb, val), method, tname);
+ }
+ return v;
+}
+
+mrb_value
+mrb_check_convert_type(mrb_state *mrb, mrb_value val, mrb_int type, const char *tname, const char *method)
+{
+ mrb_value v;
+
+ /* always convert T_DATA */
+ if (mrb_type(val) == type && type != MRB_TT_DATA) return val;
+ v = convert_type(mrb, val, tname, method, 0/*Qfalse*/);
+ if (mrb_nil_p(v)) return mrb_nil_value();
+ if (mrb_type(v) != type) {
+ mrb_raise(mrb, E_TYPE_ERROR, "%s#%s should return %s",
+ mrb_obj_classname(mrb, val), method, tname);
+ }
+ return v;
+}
+
+static const struct types {
+ unsigned char type;
+ const char *name;
+} builtin_types[] = {
+// {MRB_TT_NIL, "nil"},
+ {MRB_TT_FALSE, "false"},
+ {MRB_TT_TRUE, "true"},
+ {MRB_TT_FIXNUM, "Fixnum"},
+ {MRB_TT_SYMBOL, "Symbol"}, /* :symbol */
+ {MRB_TT_MODULE, "Module"},
+ {MRB_TT_OBJECT, "Object"},
+ {MRB_TT_CLASS, "Class"},
+ {MRB_TT_ICLASS, "iClass"}, /* internal use: mixed-in module holder */
+ {MRB_TT_SCLASS, "SClass"},
+ {MRB_TT_PROC, "Proc"},
+ {MRB_TT_FLOAT, "Float"},
+ {MRB_TT_ARRAY, "Array"},
+ {MRB_TT_HASH, "Hash"},
+ {MRB_TT_STRING, "String"},
+ {MRB_TT_RANGE, "Range"},
+ {MRB_TT_REGEX, "Regexp"},
+ {MRB_TT_STRUCT, "Struct"},
+// {MRB_TT_BIGNUM, "Bignum"},
+ {MRB_TT_FILE, "File"},
+ {MRB_TT_DATA, "Data"}, /* internal use: wrapped C pointers */
+ {MRB_TT_MATCH, "MatchData"}, /* data of $~ */
+// {MRB_TT_VARMAP, "Varmap"}, /* internal use: dynamic variables */
+// {MRB_TT_NODE, "Node"}, /* internal use: syntax tree node */
+// {MRB_TT_UNDEF, "undef"}, /* internal use: #undef; should not happen */
+ {-1, 0}
+};
+
+void
+mrb_check_type(mrb_state *mrb, mrb_value x, enum mrb_vtype t)
+{
+ const struct types *type = builtin_types;
+ struct RString *s;
+ int xt;
+
+ /*if (x == Qundef) {
+ //mrb_bug("undef leaked to the Ruby space");
+ printf ("undef leaked to the Ruby space\n");
+ }*/
+
+ xt = mrb_type(x);
+ if ((xt != t) || (xt == MRB_TT_DATA)) {
+ while (type->type < MRB_TT_MAXDEFINE) {
+ if (type->type == t) {
+ const char *etype;
+
+ if (mrb_nil_p(x)) {
+ etype = "nil";
+ }
+ else if (mrb_type(x) == MRB_TT_FIXNUM) {
+ etype = "Fixnum";
+ }
+ else if (mrb_type(x) == MRB_TT_SYMBOL) {
+ etype = "Symbol";
+ }
+ else if (mrb_special_const_p(x)) {
+ s = mrb_str_ptr(mrb_obj_as_string(mrb, x));
+ etype = s->buf;
+ }
+ else {
+ etype = mrb_obj_classname(mrb, x);
+ }
+ mrb_raise(mrb, E_TYPE_ERROR, "wrong argument type %s (expected %s)",
+ etype, type->name);
+ }
+ type++;
+ }
+ /*mrb_bug("unknown type 0x%x", t);*/
+ printf ("unknown type 0x%x (0x%x given)", t, mrb_type(x));
+ }
+}
+
+/* 15.3.1.3.46 */
+/*
+ * call-seq:
+ * obj.to_s => string
+ *
+ * Returns a string representing <i>obj</i>. The default
+ * <code>to_s</code> prints the object's class and an encoding of the
+ * object id. As a special case, the top-level object that is the
+ * initial execution context of Ruby programs returns ``main.''
+ */
+
+mrb_value
+mrb_any_to_s(mrb_state *mrb, mrb_value obj)
+{
+ const char *cname = mrb_obj_classname(mrb, obj);
+ size_t len;
+ mrb_value str;
+ struct RString *s;
+
+ len = strlen(cname)+6+16;
+ str = mrb_str_new(mrb, 0, len); /* 6:tags 16:addr */
+ s = mrb_str_ptr(str);
+ // snprintf(RSTRING(str)->ptr, len+1, "#<%s:0x%lx>", cname, obj);
+ sprintf(s->buf, "#<%s:0x%lx>", cname, (unsigned long)(obj.value.p));
+ s->len = strlen(s->buf);
+ /*if (OBJ_TAINTED(obj)) OBJ_TAINT(str);*/
+
+ return str;
+}
+
+/*
+ * call-seq:
+ * obj.is_a?(class) => true or false
+ * obj.kind_of?(class) => true or false
+ *
+ * Returns <code>true</code> if <i>class</i> is the class of
+ * <i>obj</i>, or if <i>class</i> is one of the superclasses of
+ * <i>obj</i> or modules included in <i>obj</i>.
+ *
+ * module M; end
+ * class A
+ * include M
+ * end
+ * class B < A; end
+ * class C < B; end
+ * b = B.new
+ * b.instance_of? A #=> false
+ * b.instance_of? B #=> true
+ * b.instance_of? C #=> false
+ * b.instance_of? M #=> false
+ * b.kind_of? A #=> true
+ * b.kind_of? B #=> true
+ * b.kind_of? C #=> false
+ * b.kind_of? M #=> true
+ */
+
+int
+mrb_obj_is_kind_of(mrb_state *mrb, mrb_value obj, struct RClass *c)
+{
+ struct RClass *cl = mrb_class(mrb, obj);
+
+ switch (c->tt) {
+ case MRB_TT_MODULE:
+ case MRB_TT_CLASS:
+ case MRB_TT_ICLASS:
+ break;
+
+ default:
+ mrb_raise(mrb, E_TYPE_ERROR, "class or module required");
+ }
+
+ while (cl) {
+ if (cl == c || cl->mt == c->mt)
+ return 1/* TRUE */;
+ cl = cl->super;
+ }
+ return 0/* FALSE */;
+}
+
+static mrb_value
+mrb_to_integer(mrb_state *mrb, mrb_value val, const char *method)
+{
+ mrb_value v;
+
+ if (FIXNUM_P(val)) return val;
+ //if (TYPE(val) == T_BIGNUM) return val;
+ v = convert_type(mrb, val, "Integer", method, TRUE);
+ if (!mrb_obj_is_kind_of(mrb, v, mrb->fixnum_class)) {
+ const char *cname = mrb_obj_classname(mrb, val);
+ mrb_raise(mrb, E_TYPE_ERROR, "can't convert %s to Integer (%s#%s gives %s)",
+ cname, cname, method, mrb_obj_classname(mrb, v));
+ }
+ return v;
+}
+
+mrb_value
+mrb_to_int(mrb_state *mrb, mrb_value val)
+{
+ return mrb_to_integer(mrb, val, "to_int");
+}
+
+static mrb_value
+mrb_convert_to_integer(mrb_state *mrb, mrb_value val, int base)
+{
+ mrb_value tmp;
+
+ if (mrb_nil_p(val)) {
+ if (base != 0) goto arg_error;
+ mrb_raise(mrb, E_TYPE_ERROR, "can't convert nil into Integer");
+ }
+ switch (mrb_type(val)) {
+ case MRB_TT_FLOAT:
+ if (base != 0) goto arg_error;
+ if (mrb_float(val) <= (double)FIXNUM_MAX
+ && mrb_float(val) >= (double)FIXNUM_MIN) {
+ break;
+ }
+ return mrb_dbl2big(mrb, mrb_float(val));
+
+ case MRB_TT_FIXNUM:
+ if (base != 0) goto arg_error;
+ return val;
+
+ case MRB_TT_STRING:
+string_conv:
+ return mrb_str_to_inum(mrb, val, base, TRUE);
+
+ default:
+ break;
+ }
+ if (base != 0) {
+ tmp = mrb_check_string_type(mrb, val);
+ if (!mrb_nil_p(tmp)) goto string_conv;
+ arg_error:
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "base specified for non string value");
+ }
+ tmp = convert_type(mrb, val, "Integer", "to_int", FALSE);
+ if (mrb_nil_p(tmp)) {
+ return mrb_to_integer(mrb, val, "to_i");
+ }
+ return tmp;
+}
+
+mrb_value
+mrb_Integer(mrb_state *mrb, mrb_value val)
+{
+ return mrb_convert_to_integer(mrb, val, 0);
+}
+
+mrb_value
+mrb_Float(mrb_state *mrb, mrb_value val)
+{
+ if (mrb_nil_p(val)) {
+ mrb_raise(mrb, E_TYPE_ERROR, "can't convert nil into Float");
+ }
+ switch (mrb_type(val)) {
+ case MRB_TT_FIXNUM:
+ return mrb_float_value((mrb_float)mrb_fixnum(val));
+
+ case MRB_TT_FLOAT:
+ return val;
+
+ case MRB_TT_STRING:
+ return mrb_float_value(mrb_str_to_dbl(mrb, val, TRUE));
+
+ default:
+ return mrb_convert_type(mrb, val, MRB_TT_FLOAT, "Float", "to_f");
+ }
+}
+
+mrb_value
+mrb_inspect(mrb_state *mrb, mrb_value obj)
+{
+ return mrb_obj_as_string(mrb, mrb_funcall(mrb, obj, "inspect", 0, 0));
+}
+
+int
+mrb_eql(mrb_state *mrb, mrb_value obj1, mrb_value obj2)
+{
+ return RTEST(mrb_funcall(mrb, obj1, "eql?", 1, obj2));
+}
+
diff --git a/src/oniguruma.h b/src/oniguruma.h
new file mode 100644
index 000000000..3332df023
--- /dev/null
+++ b/src/oniguruma.h
@@ -0,0 +1,771 @@
+#ifndef ONIGURUMA_H
+#define ONIGURUMA_H
+/**********************************************************************
+ oniguruma.h - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define ONIGURUMA
+#define ONIGURUMA_VERSION_MAJOR 5
+#define ONIGURUMA_VERSION_MINOR 9
+#define ONIGURUMA_VERSION_TEENY 2
+
+#ifdef __cplusplus
+# ifndef HAVE_PROTOTYPES
+# define HAVE_PROTOTYPES 1
+# endif
+# ifndef HAVE_STDARG_PROTOTYPES
+# define HAVE_STDARG_PROTOTYPES 1
+# endif
+#endif
+
+/* escape Mac OS X/Xcode 2.4/gcc 4.0.1 problem */
+#if defined(__APPLE__) && defined(__GNUC__) && __GNUC__ >= 4
+# ifndef HAVE_STDARG_PROTOTYPES
+# define HAVE_STDARG_PROTOTYPES 1
+# endif
+#endif
+
+#ifndef ONIG_EXTERN
+#ifdef RUBY_EXTERN
+#define ONIG_EXTERN RUBY_EXTERN
+#else
+#if defined(_WIN32) && !defined(__GNUC__)
+#if defined(EXPORT) || defined(RUBY_EXPORT)
+#define ONIG_EXTERN extern __declspec(dllexport)
+#else
+#define ONIG_EXTERN extern __declspec(dllimport)
+#endif
+#endif
+#endif
+#endif
+
+#ifndef ONIG_EXTERN
+#define ONIG_EXTERN extern
+#endif
+
+/* PART: character encoding */
+
+#ifndef ONIG_ESCAPE_UCHAR_COLLISION
+#define UChar OnigUChar
+#endif
+
+typedef unsigned char OnigUChar;
+typedef unsigned int OnigCodePoint;
+typedef unsigned int OnigCtype;
+typedef size_t OnigDistance;
+
+#define ONIG_INFINITE_DISTANCE ~((OnigDistance )0)
+
+typedef unsigned int OnigCaseFoldType; /* case fold flag */
+
+ONIG_EXTERN OnigCaseFoldType OnigDefaultCaseFoldFlag;
+
+/* #define ONIGENC_CASE_FOLD_HIRAGANA_KATAKANA (1<<1) */
+/* #define ONIGENC_CASE_FOLD_KATAKANA_WIDTH (1<<2) */
+#define ONIGENC_CASE_FOLD_TURKISH_AZERI (1<<20)
+#define INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR (1<<30)
+
+#define ONIGENC_CASE_FOLD_MIN INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR
+#define ONIGENC_CASE_FOLD_DEFAULT OnigDefaultCaseFoldFlag
+
+
+#define ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN 3
+#define ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM 13
+/* 13 => Unicode:0x1ffc */
+
+/* code range */
+#define ONIGENC_CODE_RANGE_NUM(range) ((int )range[0])
+#define ONIGENC_CODE_RANGE_FROM(range,i) range[((i)*2) + 1]
+#define ONIGENC_CODE_RANGE_TO(range,i) range[((i)*2) + 2]
+
+typedef struct {
+ int byte_len; /* argument(original) character(s) byte length */
+ int code_len; /* number of code */
+ OnigCodePoint code[ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN];
+} OnigCaseFoldCodeItem;
+
+typedef struct {
+ OnigCodePoint esc;
+ OnigCodePoint anychar;
+ OnigCodePoint anytime;
+ OnigCodePoint zero_or_one_time;
+ OnigCodePoint one_or_more_time;
+ OnigCodePoint anychar_anytime;
+} OnigMetaCharTableType;
+
+typedef int (*OnigApplyAllCaseFoldFunc)(OnigCodePoint from, OnigCodePoint* to, int to_len, void* arg);
+
+typedef struct OnigEncodingTypeST {
+ int (*precise_mbc_enc_len)(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc);
+ const char* name;
+ int max_enc_len;
+ int min_enc_len;
+ int (*is_mbc_newline)(const OnigUChar* p, const OnigUChar* end, struct OnigEncodingTypeST* enc);
+ OnigCodePoint (*mbc_to_code)(const OnigUChar* p, const OnigUChar* end, struct OnigEncodingTypeST* enc);
+ int (*code_to_mbclen)(OnigCodePoint code, struct OnigEncodingTypeST* enc);
+ int (*code_to_mbc)(OnigCodePoint code, OnigUChar *buf, struct OnigEncodingTypeST* enc);
+ int (*mbc_case_fold)(OnigCaseFoldType flag, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, struct OnigEncodingTypeST* enc);
+ int (*apply_all_case_fold)(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, struct OnigEncodingTypeST* enc);
+ int (*get_case_fold_codes_by_str)(OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem acs[], struct OnigEncodingTypeST* enc);
+ int (*property_name_to_ctype)(struct OnigEncodingTypeST* enc, OnigUChar* p, OnigUChar* end);
+ int (*is_code_ctype)(OnigCodePoint code, OnigCtype ctype, struct OnigEncodingTypeST* enc);
+ int (*get_ctype_code_range)(OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[], struct OnigEncodingTypeST* enc);
+ OnigUChar* (*left_adjust_char_head)(const OnigUChar* start, const OnigUChar* p, const OnigUChar* end, struct OnigEncodingTypeST* enc);
+ int (*is_allowed_reverse_match)(const OnigUChar* p, const OnigUChar* end, struct OnigEncodingTypeST* enc);
+ int ruby_encoding_index;
+} OnigEncodingType;
+
+typedef OnigEncodingType* OnigEncoding;
+
+ONIG_EXTERN OnigEncodingType OnigEncodingASCII;
+
+#define ONIG_ENCODING_ASCII (&OnigEncodingASCII)
+
+#define ONIG_ENCODING_UNDEF ((OnigEncoding )0)
+
+
+/* work size */
+#define ONIGENC_CODE_TO_MBC_MAXLEN 7
+#define ONIGENC_MBC_CASE_FOLD_MAXLEN 18
+/* 18: 6(max-byte) * 3(case-fold chars) */
+
+/* character types */
+#define ONIGENC_CTYPE_NEWLINE 0
+#define ONIGENC_CTYPE_ALPHA 1
+#define ONIGENC_CTYPE_BLANK 2
+#define ONIGENC_CTYPE_CNTRL 3
+#define ONIGENC_CTYPE_DIGIT 4
+#define ONIGENC_CTYPE_GRAPH 5
+#define ONIGENC_CTYPE_LOWER 6
+#define ONIGENC_CTYPE_PRINT 7
+#define ONIGENC_CTYPE_PUNCT 8
+#define ONIGENC_CTYPE_SPACE 9
+#define ONIGENC_CTYPE_UPPER 10
+#define ONIGENC_CTYPE_XDIGIT 11
+#define ONIGENC_CTYPE_WORD 12
+#define ONIGENC_CTYPE_ALNUM 13 /* alpha || digit */
+#define ONIGENC_CTYPE_ASCII 14
+#define ONIGENC_MAX_STD_CTYPE ONIGENC_CTYPE_ASCII
+#define ONIGENC_CTYPE_SPECIAL_MASK 128
+#define ONIGENC_CTYPE_S /* [\t\n\v\f\r\s] */ \
+ ONIGENC_CTYPE_SPECIAL_MASK | ONIGENC_CTYPE_SPACE
+#define ONIGENC_CTYPE_D /* [0-9] */ \
+ ONIGENC_CTYPE_SPECIAL_MASK | ONIGENC_CTYPE_DIGIT
+#define ONIGENC_CTYPE_W /* [0-9A-Za-z_] */ \
+ ONIGENC_CTYPE_SPECIAL_MASK | ONIGENC_CTYPE_WORD
+#define ONIGENC_CTYPE_SPECIAL_P(ctype) ((ctype) & ONIGENC_CTYPE_SPECIAL_MASK)
+
+
+#define onig_enc_len(enc,p,e) ONIGENC_MBC_ENC_LEN(enc, p, e)
+
+#define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF)
+#define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1)
+#define ONIGENC_IS_MBC_HEAD(enc,p,e) (ONIGENC_MBC_ENC_LEN(enc,p,e) != 1)
+#define ONIGENC_IS_MBC_ASCII(p) (*(p) < 128)
+#define ONIGENC_IS_CODE_ASCII(code) ((code) < 128)
+#define ONIGENC_IS_MBC_WORD(enc,s,end) \
+ ONIGENC_IS_CODE_WORD(enc,ONIGENC_MBC_TO_CODE(enc,s,end))
+
+
+#define ONIGENC_NAME(enc) ((enc)->name)
+
+#define ONIGENC_MBC_CASE_FOLD(enc,flag,pp,end,buf) \
+ (enc)->mbc_case_fold(flag,(const OnigUChar** )pp,end,buf,enc)
+#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
+ (enc)->is_allowed_reverse_match(s,end,enc)
+#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s,end) \
+ (enc)->left_adjust_char_head(start, s, end, enc)
+#define ONIGENC_APPLY_ALL_CASE_FOLD(enc,case_fold_flag,f,arg) \
+ (enc)->apply_all_case_fold(case_fold_flag,f,arg,enc)
+#define ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc,case_fold_flag,p,end,acs) \
+ (enc)->get_case_fold_codes_by_str(case_fold_flag,p,end,acs,enc)
+#define ONIGENC_STEP_BACK(enc,start,s,end,n) \
+ onigenc_step_back((enc),(start),(s),(end),(n))
+
+#define ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(n) (n)
+#define ONIGENC_MBCLEN_CHARFOUND_P(r) (0 < (r))
+#define ONIGENC_MBCLEN_CHARFOUND_LEN(r) (r)
+
+#define ONIGENC_CONSTRUCT_MBCLEN_INVALID() (-1)
+#define ONIGENC_MBCLEN_INVALID_P(r) ((r) == -1)
+
+#define ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n) (-1-(n))
+#define ONIGENC_MBCLEN_NEEDMORE_P(r) ((r) < -1)
+#define ONIGENC_MBCLEN_NEEDMORE_LEN(r) (-1-(r))
+
+#define ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e) (enc)->precise_mbc_enc_len(p,e,enc)
+
+ONIG_EXTERN
+int onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc);
+
+#define ONIGENC_MBC_ENC_LEN(enc,p,e) onigenc_mbclen_approximate(p,e,enc)
+#define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len)
+#define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc)
+#define ONIGENC_MBC_MINLEN(enc) ((enc)->min_enc_len)
+#define ONIGENC_IS_MBC_NEWLINE(enc,p,end) (enc)->is_mbc_newline((p),(end),enc)
+#define ONIGENC_MBC_TO_CODE(enc,p,end) (enc)->mbc_to_code((p),(end),enc)
+#define ONIGENC_CODE_TO_MBCLEN(enc,code) (enc)->code_to_mbclen(code,enc)
+#define ONIGENC_CODE_TO_MBC(enc,code,buf) (enc)->code_to_mbc(code,buf,enc)
+#define ONIGENC_PROPERTY_NAME_TO_CTYPE(enc,p,end) \
+ (enc)->property_name_to_ctype(enc,p,end)
+
+#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) (enc)->is_code_ctype(code,ctype,enc)
+
+#define ONIGENC_IS_CODE_NEWLINE(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_NEWLINE)
+#define ONIGENC_IS_CODE_GRAPH(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_GRAPH)
+#define ONIGENC_IS_CODE_PRINT(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PRINT)
+#define ONIGENC_IS_CODE_ALNUM(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALNUM)
+#define ONIGENC_IS_CODE_ALPHA(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALPHA)
+#define ONIGENC_IS_CODE_LOWER(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_LOWER)
+#define ONIGENC_IS_CODE_UPPER(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_UPPER)
+#define ONIGENC_IS_CODE_CNTRL(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_CNTRL)
+#define ONIGENC_IS_CODE_PUNCT(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PUNCT)
+#define ONIGENC_IS_CODE_SPACE(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_SPACE)
+#define ONIGENC_IS_CODE_BLANK(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_BLANK)
+#define ONIGENC_IS_CODE_DIGIT(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_DIGIT)
+#define ONIGENC_IS_CODE_XDIGIT(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_XDIGIT)
+#define ONIGENC_IS_CODE_WORD(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_WORD)
+
+#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbout,ranges) \
+ (enc)->get_ctype_code_range(ctype,sbout,ranges,enc)
+
+ONIG_EXTERN
+OnigUChar* onigenc_step_back(OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end, int n);
+
+
+/* encoding API */
+ONIG_EXTERN
+int onigenc_init(void);
+ONIG_EXTERN
+int onigenc_set_default_encoding(OnigEncoding enc);
+ONIG_EXTERN
+OnigEncoding onigenc_get_default_encoding(void);
+ONIG_EXTERN
+void onigenc_set_default_caseconv_table(const OnigUChar* table);
+ONIG_EXTERN
+OnigUChar* onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end, const OnigUChar** prev);
+ONIG_EXTERN
+OnigUChar* onigenc_get_prev_char_head(OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end);
+ONIG_EXTERN
+OnigUChar* onigenc_get_left_adjust_char_head(OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end);
+ONIG_EXTERN
+OnigUChar* onigenc_get_right_adjust_char_head(OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end);
+ONIG_EXTERN
+int onigenc_strlen(OnigEncoding enc, const OnigUChar* p, const OnigUChar* end);
+ONIG_EXTERN
+int onigenc_strlen_null(OnigEncoding enc, const OnigUChar* p);
+ONIG_EXTERN
+int onigenc_str_bytelen_null(OnigEncoding enc, const OnigUChar* p);
+
+
+
+/* PART: regular expression */
+
+/* config parameters */
+#define ONIG_NREGION 10
+#define ONIG_MAX_BACKREF_NUM 1000
+#define ONIG_MAX_REPEAT_NUM 100000
+#define ONIG_MAX_MULTI_BYTE_RANGES_NUM 10000
+/* constants */
+#define ONIG_MAX_ERROR_MESSAGE_LEN 90
+
+typedef unsigned int OnigOptionType;
+
+#define ONIG_OPTION_DEFAULT ONIG_OPTION_NONE
+
+/* options */
+#define ONIG_OPTION_NONE 0U
+#define ONIG_OPTION_IGNORECASE 1U
+#define ONIG_OPTION_EXTEND (ONIG_OPTION_IGNORECASE << 1)
+#define ONIG_OPTION_MULTILINE (ONIG_OPTION_EXTEND << 1)
+#define ONIG_OPTION_SINGLELINE (ONIG_OPTION_MULTILINE << 1)
+#define ONIG_OPTION_FIND_LONGEST (ONIG_OPTION_SINGLELINE << 1)
+#define ONIG_OPTION_FIND_NOT_EMPTY (ONIG_OPTION_FIND_LONGEST << 1)
+#define ONIG_OPTION_NEGATE_SINGLELINE (ONIG_OPTION_FIND_NOT_EMPTY << 1)
+#define ONIG_OPTION_DONT_CAPTURE_GROUP (ONIG_OPTION_NEGATE_SINGLELINE << 1)
+#define ONIG_OPTION_CAPTURE_GROUP (ONIG_OPTION_DONT_CAPTURE_GROUP << 1)
+/* options (search time) */
+#define ONIG_OPTION_NOTBOL (ONIG_OPTION_CAPTURE_GROUP << 1)
+#define ONIG_OPTION_NOTEOL (ONIG_OPTION_NOTBOL << 1)
+#define ONIG_OPTION_POSIX_REGION (ONIG_OPTION_NOTEOL << 1)
+#define ONIG_OPTION_MAXBIT ONIG_OPTION_POSIX_REGION /* limit */
+
+#define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt))
+#define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt))
+#define ONIG_IS_OPTION_ON(options,option) ((options) & (option))
+
+/* syntax */
+typedef struct {
+ unsigned int op;
+ unsigned int op2;
+ unsigned int behavior;
+ OnigOptionType options; /* default option */
+ OnigMetaCharTableType meta_char_table;
+} OnigSyntaxType;
+
+ONIG_EXTERN const OnigSyntaxType OnigSyntaxASIS;
+ONIG_EXTERN const OnigSyntaxType OnigSyntaxPosixBasic;
+ONIG_EXTERN const OnigSyntaxType OnigSyntaxPosixExtended;
+ONIG_EXTERN const OnigSyntaxType OnigSyntaxEmacs;
+ONIG_EXTERN const OnigSyntaxType OnigSyntaxGrep;
+ONIG_EXTERN const OnigSyntaxType OnigSyntaxGnuRegex;
+ONIG_EXTERN const OnigSyntaxType OnigSyntaxJava;
+ONIG_EXTERN const OnigSyntaxType OnigSyntaxPerl;
+ONIG_EXTERN const OnigSyntaxType OnigSyntaxPerl_NG;
+ONIG_EXTERN const OnigSyntaxType OnigSyntaxRuby;
+
+/* predefined syntaxes (see regsyntax.c) */
+#define ONIG_SYNTAX_ASIS (&OnigSyntaxASIS)
+#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic)
+#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended)
+#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs)
+#define ONIG_SYNTAX_GREP (&OnigSyntaxGrep)
+#define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex)
+#define ONIG_SYNTAX_JAVA (&OnigSyntaxJava)
+#define ONIG_SYNTAX_PERL (&OnigSyntaxPerl)
+#define ONIG_SYNTAX_PERL_NG (&OnigSyntaxPerl_NG)
+#define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby)
+
+/* default syntax */
+ONIG_EXTERN const OnigSyntaxType* OnigDefaultSyntax;
+#define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax
+
+/* syntax (operators) */
+#define ONIG_SYN_OP_VARIABLE_META_CHARACTERS (1U<<0)
+#define ONIG_SYN_OP_DOT_ANYCHAR (1U<<1) /* . */
+#define ONIG_SYN_OP_ASTERISK_ZERO_INF (1U<<2) /* * */
+#define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF (1U<<3)
+#define ONIG_SYN_OP_PLUS_ONE_INF (1U<<4) /* + */
+#define ONIG_SYN_OP_ESC_PLUS_ONE_INF (1U<<5)
+#define ONIG_SYN_OP_QMARK_ZERO_ONE (1U<<6) /* ? */
+#define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE (1U<<7)
+#define ONIG_SYN_OP_BRACE_INTERVAL (1U<<8) /* {lower,upper} */
+#define ONIG_SYN_OP_ESC_BRACE_INTERVAL (1U<<9) /* \{lower,upper\} */
+#define ONIG_SYN_OP_VBAR_ALT (1U<<10) /* | */
+#define ONIG_SYN_OP_ESC_VBAR_ALT (1U<<11) /* \| */
+#define ONIG_SYN_OP_LPAREN_SUBEXP (1U<<12) /* (...) */
+#define ONIG_SYN_OP_ESC_LPAREN_SUBEXP (1U<<13) /* \(...\) */
+#define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR (1U<<14) /* \A, \Z, \z */
+#define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR (1U<<15) /* \G */
+#define ONIG_SYN_OP_DECIMAL_BACKREF (1U<<16) /* \num */
+#define ONIG_SYN_OP_BRACKET_CC (1U<<17) /* [...] */
+#define ONIG_SYN_OP_ESC_W_WORD (1U<<18) /* \w, \W */
+#define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END (1U<<19) /* \<. \> */
+#define ONIG_SYN_OP_ESC_B_WORD_BOUND (1U<<20) /* \b, \B */
+#define ONIG_SYN_OP_ESC_S_WHITE_SPACE (1U<<21) /* \s, \S */
+#define ONIG_SYN_OP_ESC_D_DIGIT (1U<<22) /* \d, \D */
+#define ONIG_SYN_OP_LINE_ANCHOR (1U<<23) /* ^, $ */
+#define ONIG_SYN_OP_POSIX_BRACKET (1U<<24) /* [:xxxx:] */
+#define ONIG_SYN_OP_QMARK_NON_GREEDY (1U<<25) /* ??,*?,+?,{n,m}? */
+#define ONIG_SYN_OP_ESC_CONTROL_CHARS (1U<<26) /* \n,\r,\t,\a ... */
+#define ONIG_SYN_OP_ESC_C_CONTROL (1U<<27) /* \cx */
+#define ONIG_SYN_OP_ESC_OCTAL3 (1U<<28) /* \OOO */
+#define ONIG_SYN_OP_ESC_X_HEX2 (1U<<29) /* \xHH */
+#define ONIG_SYN_OP_ESC_X_BRACE_HEX8 (1U<<30) /* \x{7HHHHHHH} */
+
+#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE (1U<<0) /* \Q...\E */
+#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT (1U<<1) /* (?...) */
+#define ONIG_SYN_OP2_OPTION_PERL (1U<<2) /* (?imsx),(?-imsx) */
+#define ONIG_SYN_OP2_OPTION_RUBY (1U<<3) /* (?imx), (?-imx) */
+#define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT (1U<<4) /* ?+,*+,++ */
+#define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL (1U<<5) /* {n,m}+ */
+#define ONIG_SYN_OP2_CCLASS_SET_OP (1U<<6) /* [...&&..[..]..] */
+#define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP (1U<<7) /* (?<name>...) */
+#define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF (1U<<8) /* \k<name> */
+#define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL (1U<<9) /* \g<name>, \g<n> */
+#define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY (1U<<10) /* (?@..),(?@<x>..) */
+#define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL (1U<<11) /* \C-x */
+#define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META (1U<<12) /* \M-x */
+#define ONIG_SYN_OP2_ESC_V_VTAB (1U<<13) /* \v as VTAB */
+#define ONIG_SYN_OP2_ESC_U_HEX4 (1U<<14) /* \uHHHH */
+#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR (1U<<15) /* \`, \' */
+#define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY (1U<<16) /* \p{...}, \P{...} */
+#define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (1U<<17) /* \p{^..}, \P{^..} */
+/* #define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1U<<18) */
+#define ONIG_SYN_OP2_ESC_H_XDIGIT (1U<<19) /* \h, \H */
+#define ONIG_SYN_OP2_INEFFECTIVE_ESCAPE (1U<<20) /* \ */
+
+/* syntax (behavior) */
+#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1U<<31) /* not implemented */
+#define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS (1U<<0) /* ?, *, +, {n,m} */
+#define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS (1U<<1) /* error or ignore */
+#define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP (1U<<2) /* ...)... */
+#define ONIG_SYN_ALLOW_INVALID_INTERVAL (1U<<3) /* {??? */
+#define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV (1U<<4) /* {,n} => {0,n} */
+#define ONIG_SYN_STRICT_CHECK_BACKREF (1U<<5) /* /(\1)/,/\1()/ ..*/
+#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1U<<6) /* (?<=a|bc) */
+#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (1U<<7) /* see doc/RE */
+#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1U<<8) /* (?<x>)(?<x>) */
+#define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY (1U<<9) /* a{n}?=(?:a{n})? */
+
+/* syntax (behavior) in char class [...] */
+#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1U<<20) /* [^...] */
+#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (1U<<21) /* [..\w..] etc.. */
+#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1U<<22)
+#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (1U<<23) /* [0-9-a]=[0-9\-a] */
+/* syntax (behavior) warning */
+#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1U<<24) /* [,-,] */
+#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1U<<25) /* (?:a*)+ */
+#define ONIG_SYN_WARN_CC_DUP (1U<<26) /* [aa] */
+
+/* meta character specifiers (onig_set_meta_char()) */
+#define ONIG_META_CHAR_ESCAPE 0
+#define ONIG_META_CHAR_ANYCHAR 1
+#define ONIG_META_CHAR_ANYTIME 2
+#define ONIG_META_CHAR_ZERO_OR_ONE_TIME 3
+#define ONIG_META_CHAR_ONE_OR_MORE_TIME 4
+#define ONIG_META_CHAR_ANYCHAR_ANYTIME 5
+
+#define ONIG_INEFFECTIVE_META_CHAR 0
+
+/* error codes */
+#define ONIG_IS_PATTERN_ERROR(ecode) ((ecode) <= -100 && (ecode) > -1000)
+/* normal return */
+#define ONIG_NORMAL 0
+#define ONIG_MISMATCH -1
+#define ONIG_NO_SUPPORT_CONFIG -2
+
+/* internal error */
+#define ONIGERR_MEMORY -5
+#define ONIGERR_TYPE_BUG -6
+#define ONIGERR_PARSER_BUG -11
+#define ONIGERR_STACK_BUG -12
+#define ONIGERR_UNDEFINED_BYTECODE -13
+#define ONIGERR_UNEXPECTED_BYTECODE -14
+#define ONIGERR_MATCH_STACK_LIMIT_OVER -15
+#define ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED -21
+#define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22
+/* general error */
+#define ONIGERR_INVALID_ARGUMENT -30
+/* syntax error */
+#define ONIGERR_END_PATTERN_AT_LEFT_BRACE -100
+#define ONIGERR_END_PATTERN_AT_LEFT_BRACKET -101
+#define ONIGERR_EMPTY_CHAR_CLASS -102
+#define ONIGERR_PREMATURE_END_OF_CHAR_CLASS -103
+#define ONIGERR_END_PATTERN_AT_ESCAPE -104
+#define ONIGERR_END_PATTERN_AT_META -105
+#define ONIGERR_END_PATTERN_AT_CONTROL -106
+#define ONIGERR_META_CODE_SYNTAX -108
+#define ONIGERR_CONTROL_CODE_SYNTAX -109
+#define ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE -110
+#define ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE -111
+#define ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS -112
+#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED -113
+#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID -114
+#define ONIGERR_NESTED_REPEAT_OPERATOR -115
+#define ONIGERR_UNMATCHED_CLOSE_PARENTHESIS -116
+#define ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS -117
+#define ONIGERR_END_PATTERN_IN_GROUP -118
+#define ONIGERR_UNDEFINED_GROUP_OPTION -119
+#define ONIGERR_INVALID_POSIX_BRACKET_TYPE -121
+#define ONIGERR_INVALID_LOOK_BEHIND_PATTERN -122
+#define ONIGERR_INVALID_REPEAT_RANGE_PATTERN -123
+/* values error (syntax error) */
+#define ONIGERR_TOO_BIG_NUMBER -200
+#define ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE -201
+#define ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE -202
+#define ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS -203
+#define ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE -204
+#define ONIGERR_TOO_MANY_MULTI_BYTE_RANGES -205
+#define ONIGERR_TOO_SHORT_MULTI_BYTE_STRING -206
+#define ONIGERR_TOO_BIG_BACKREF_NUMBER -207
+#define ONIGERR_INVALID_BACKREF -208
+#define ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED -209
+#define ONIGERR_TOO_LONG_WIDE_CHAR_VALUE -212
+#define ONIGERR_EMPTY_GROUP_NAME -214
+#define ONIGERR_INVALID_GROUP_NAME -215
+#define ONIGERR_INVALID_CHAR_IN_GROUP_NAME -216
+#define ONIGERR_UNDEFINED_NAME_REFERENCE -217
+#define ONIGERR_UNDEFINED_GROUP_REFERENCE -218
+#define ONIGERR_MULTIPLEX_DEFINED_NAME -219
+#define ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL -220
+#define ONIGERR_NEVER_ENDING_RECURSION -221
+#define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY -222
+#define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223
+#define ONIGERR_INVALID_CODE_POINT_VALUE -400
+#define ONIGERR_INVALID_WIDE_CHAR_VALUE -400
+#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401
+#define ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION -402
+#define ONIGERR_INVALID_COMBINATION_OF_OPTIONS -403
+
+/* errors related to thread */
+#define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001
+
+
+/* must be smaller than BIT_STATUS_BITS_NUM (unsigned int * 8) */
+#define ONIG_MAX_CAPTURE_HISTORY_GROUP 31
+#define ONIG_IS_CAPTURE_HISTORY_GROUP(r, i) \
+ ((i) <= ONIG_MAX_CAPTURE_HISTORY_GROUP && (r)->list && (r)->list[i])
+
+typedef struct OnigCaptureTreeNodeStruct {
+ int group; /* group number */
+ int beg;
+ int end;
+ int allocated;
+ int num_childs;
+ struct OnigCaptureTreeNodeStruct** childs;
+} OnigCaptureTreeNode;
+
+/* match result region type */
+struct re_registers {
+ int allocated;
+ int num_regs;
+ int* beg;
+ int* end;
+ /* extended */
+ OnigCaptureTreeNode* history_root; /* capture history tree root */
+};
+
+/* capture tree traverse */
+#define ONIG_TRAVERSE_CALLBACK_AT_FIRST 1
+#define ONIG_TRAVERSE_CALLBACK_AT_LAST 2
+#define ONIG_TRAVERSE_CALLBACK_AT_BOTH \
+ ( ONIG_TRAVERSE_CALLBACK_AT_FIRST | ONIG_TRAVERSE_CALLBACK_AT_LAST )
+
+
+#define ONIG_REGION_NOTPOS -1
+
+typedef struct re_registers OnigRegion;
+
+typedef struct {
+ OnigEncoding enc;
+ OnigUChar* par;
+ OnigUChar* par_end;
+} OnigErrorInfo;
+
+typedef struct {
+ int lower;
+ int upper;
+} OnigRepeatRange;
+
+typedef void (*OnigWarnFunc)(const char* s);
+extern void onig_null_warn(const char* s);
+#define ONIG_NULL_WARN onig_null_warn
+
+#define ONIG_CHAR_TABLE_SIZE 256
+
+/* regex_t state */
+#define ONIG_STATE_NORMAL 0
+#define ONIG_STATE_SEARCHING 1
+#define ONIG_STATE_COMPILING -1
+#define ONIG_STATE_MODIFY -2
+
+#define ONIG_STATE(reg) \
+ ((reg)->state > 0 ? ONIG_STATE_SEARCHING : (reg)->state)
+
+typedef struct re_pattern_buffer {
+ /* common members of BBuf(bytes-buffer) */
+ unsigned char* p; /* compiled pattern */
+ unsigned int used; /* used space for p */
+ unsigned int alloc; /* allocated space for p */
+
+ int state; /* normal, searching, compiling */
+ int num_mem; /* used memory(...) num counted from 1 */
+ int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
+ int num_null_check; /* OP_NULL_CHECK_START/END id counter */
+ int num_comb_exp_check; /* combination explosion check */
+ int num_call; /* number of subexp call */
+ unsigned int capture_history; /* (?@...) flag (1-31) */
+ unsigned int bt_mem_start; /* need backtrack flag */
+ unsigned int bt_mem_end; /* need backtrack flag */
+ int stack_pop_level;
+ int repeat_range_alloc;
+ OnigRepeatRange* repeat_range;
+
+ OnigEncoding enc;
+ OnigOptionType options;
+ const OnigSyntaxType* syntax;
+ OnigCaseFoldType case_fold_flag;
+ void* name_table;
+
+ /* optimization info (string search, char-map and anchors) */
+ int optimize; /* optimize flag */
+ int threshold_len; /* search str-length for apply optimize */
+ int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
+ OnigDistance anchor_dmin; /* (SEMI_)END_BUF anchor distance */
+ OnigDistance anchor_dmax; /* (SEMI_)END_BUF anchor distance */
+ int sub_anchor; /* start-anchor for exact or map */
+ unsigned char *exact;
+ unsigned char *exact_end;
+ unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */
+ int *int_map; /* BM skip for exact_len > 255 */
+ int *int_map_backward; /* BM skip for backward search */
+ OnigDistance dmin; /* min-distance of exact or map */
+ OnigDistance dmax; /* max-distance of exact or map */
+
+ /* regex_t link chain */
+ struct re_pattern_buffer* chain; /* escape compile-conflict */
+} OnigRegexType;
+
+typedef OnigRegexType* OnigRegex;
+
+#ifndef ONIG_ESCAPE_REGEX_T_COLLISION
+ typedef OnigRegexType regex_t;
+#endif
+
+
+typedef struct {
+ int num_of_elements;
+ OnigEncoding pattern_enc;
+ OnigEncoding target_enc;
+ OnigSyntaxType* syntax;
+ OnigOptionType option;
+ OnigCaseFoldType case_fold_flag;
+} OnigCompileInfo;
+
+/* Oniguruma Native API */
+ONIG_EXTERN
+int onig_init(void);
+ONIG_EXTERN
+int onig_error_code_to_str(OnigUChar* s, int err_code, ...);
+ONIG_EXTERN
+void onig_set_warn_func(OnigWarnFunc f);
+ONIG_EXTERN
+void onig_set_verb_warn_func(OnigWarnFunc f);
+ONIG_EXTERN
+int onig_new(OnigRegex*, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax, OnigErrorInfo* einfo);
+ONIG_EXTERN
+int onig_reg_init(regex_t* reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, const OnigSyntaxType* syntax);
+ONIG_EXTERN
+int onig_new_without_alloc(OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo);
+ONIG_EXTERN
+int onig_new_deluxe(OnigRegex* reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo);
+ONIG_EXTERN
+void onig_free(OnigRegex);
+ONIG_EXTERN
+void onig_free_body(OnigRegex);
+ONIG_EXTERN
+int onig_recompile(OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo);
+ONIG_EXTERN
+int onig_recompile_deluxe(OnigRegex reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo);
+ONIG_EXTERN
+long onig_search(OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option);
+ONIG_EXTERN
+long onig_match(OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option);
+ONIG_EXTERN
+OnigRegion* onig_region_new(void);
+ONIG_EXTERN
+void onig_region_init(OnigRegion* region);
+ONIG_EXTERN
+void onig_region_free(OnigRegion* region, int free_self);
+ONIG_EXTERN
+void onig_region_copy(OnigRegion* to, OnigRegion* from);
+ONIG_EXTERN
+void onig_region_clear(OnigRegion* region);
+ONIG_EXTERN
+int onig_region_resize(OnigRegion* region, int n);
+ONIG_EXTERN
+int onig_region_set(OnigRegion* region, int at, int beg, int end);
+ONIG_EXTERN
+int onig_name_to_group_numbers(OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, int** nums);
+ONIG_EXTERN
+int onig_name_to_backref_number(OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, OnigRegion *region);
+ONIG_EXTERN
+int onig_foreach_name(OnigRegex reg, int (*func)(const OnigUChar*, const OnigUChar*,int,int*,OnigRegex,void*), void* arg);
+ONIG_EXTERN
+int onig_number_of_names(OnigRegex reg);
+ONIG_EXTERN
+int onig_number_of_captures(OnigRegex reg);
+ONIG_EXTERN
+int onig_number_of_capture_histories(OnigRegex reg);
+ONIG_EXTERN
+OnigCaptureTreeNode* onig_get_capture_tree(OnigRegion* region);
+ONIG_EXTERN
+int onig_capture_tree_traverse(OnigRegion* region, int at, int(*callback_func)(int,int,int,int,int,void*), void* arg);
+ONIG_EXTERN
+int onig_noname_group_capture_is_active(OnigRegex reg);
+ONIG_EXTERN
+OnigEncoding onig_get_encoding(OnigRegex reg);
+ONIG_EXTERN
+OnigOptionType onig_get_options(OnigRegex reg);
+ONIG_EXTERN
+OnigCaseFoldType onig_get_case_fold_flag(OnigRegex reg);
+ONIG_EXTERN
+const OnigSyntaxType* onig_get_syntax(OnigRegex reg);
+ONIG_EXTERN
+int onig_set_default_syntax(const OnigSyntaxType* syntax);
+ONIG_EXTERN
+void onig_copy_syntax(OnigSyntaxType* to, const OnigSyntaxType* from);
+ONIG_EXTERN
+unsigned int onig_get_syntax_op(OnigSyntaxType* syntax);
+ONIG_EXTERN
+unsigned int onig_get_syntax_op2(OnigSyntaxType* syntax);
+ONIG_EXTERN
+unsigned int onig_get_syntax_behavior(OnigSyntaxType* syntax);
+ONIG_EXTERN
+OnigOptionType onig_get_syntax_options(OnigSyntaxType* syntax);
+ONIG_EXTERN
+void onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op);
+ONIG_EXTERN
+void onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2);
+ONIG_EXTERN
+void onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior);
+ONIG_EXTERN
+void onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options);
+ONIG_EXTERN
+int onig_set_meta_char(OnigSyntaxType* syntax, unsigned int what, OnigCodePoint code);
+ONIG_EXTERN
+void onig_copy_encoding(OnigEncoding to, OnigEncoding from);
+ONIG_EXTERN
+OnigCaseFoldType onig_get_default_case_fold_flag(void);
+ONIG_EXTERN
+int onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag);
+ONIG_EXTERN
+unsigned int onig_get_match_stack_limit_size(void);
+ONIG_EXTERN
+int onig_set_match_stack_limit_size(unsigned int size);
+ONIG_EXTERN
+int onig_end(void);
+ONIG_EXTERN
+const char* onig_version(void);
+ONIG_EXTERN
+const char* onig_copyright(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* ONIGURUMA_H */
diff --git a/src/opcode.h b/src/opcode.h
new file mode 100644
index 000000000..e6ea74f79
--- /dev/null
+++ b/src/opcode.h
@@ -0,0 +1,148 @@
+#ifndef OPCODE_H
+#define OPCODE_H
+
+#define MAXARG_Bx ((1<<16)-1)
+#define MAXARG_sBx (MAXARG_Bx>>1) /* `sBx' is signed */
+
+/* instructions OP:A:B:C = 7:9:9:7 (32 bits) */
+/* OP:A:Bx = 7:9:16 */
+/* OP:Ax = 7:25 */
+
+#define GET_OPCODE(i) (((mrb_code)(i)) & 0x7f)
+#define GETARG_A(i) ((((mrb_code)(i)) >> 23) & 0x1ff)
+#define GETARG_B(i) ((((mrb_code)(i)) >> 14) & 0x1ff)
+#define GETARG_C(i) ((((mrb_code)(i)) >> 7) & 0x7f)
+#define GETARG_Bx(i) ((((mrb_code)(i)) >> 7) & 0xffff)
+#define GETARG_sBx(i) (GETARG_Bx(i)-MAXARG_sBx)
+#define GETARG_Ax(i) ((((mrb_code)(i)) >> 7) & 0x1ffffff)
+#define GETARG_UNPACK_b(i,n1,n2) ((((mrb_code)(i)) >> (7+n2)) & (((1<<n1)-1)))
+#define GETARG_UNPACK_c(i,n1,n2) ((((mrb_code)(i)) >> 7) & (((1<<n2)-1)))
+#define GETARG_b(i) GETARG_UNPACK_b(i,14,2)
+#define GETARG_c(i) GETARG_UNPACK_c(i,14,2)
+
+#define MKOPCODE(op) ((op) & 0x7f)
+#define MKARG_A(c) (((c) & 0x1ff) << 23)
+#define MKARG_B(c) (((c) & 0x1ff) << 14)
+#define MKARG_C(c) (((c) & 0x7f) << 7)
+#define MKARG_Bx(v) (((v) & 0xffff) << 7)
+#define MKARG_sBx(v) MKARG_Bx((v)+MAXARG_sBx)
+#define MKARG_Ax(v) (((v) & 0x1ffffff) << 7)
+#define MKARG_PACK(b,n1,c,n2) ((((b) & ((1<<n1)-1)) << (7+n2))|(((c) & ((1<<n2)-1)) << 7))
+#define MKARG_bc(b,c) MKARG_PACK(b,14,c,2)
+
+#define MKOP_A(op,a) (MKOPCODE(op)|MKARG_A(a))
+#define MKOP_AB(op,a,b) (MKOP_A(op,a)|MKARG_B(b))
+#define MKOP_ABC(op,a,b,c) (MKOP_AB(op,a,b)|MKARG_C(c))
+#define MKOP_ABx(op,a,bx) (MKOP_A(op,a)|MKARG_Bx(bx))
+#define MKOP_Bx(op,bx) (MKOPCODE(op)|MKARG_Bx(bx))
+#define MKOP_sBx(op,sbx) (MKOPCODE(op)|MKARG_sBx(sbx))
+#define MKOP_AsBx(op,a,sbx) (MKOP_A(op,a)|MKARG_sBx(sbx))
+#define MKOP_Ax(op,ax) (MKOPCODE(op)|MKARG_Ax(ax))
+#define MKOP_Abc(op,a,b,c) (MKOP_A(op,a)|MKARG_bc(b,c))
+
+enum {
+OP_NOP=0,/* */
+OP_MOVE,/* A B R(A) := R(B) */
+OP_LOADL,/* A Bx R(A) := Lit(Bx) */
+OP_LOADI,/* A sBx R(A) := sBx */
+OP_LOADSYM,/* A Bx R(A) := Sym(Bx) */
+OP_LOADNIL,/* A R(A) := nil */
+OP_LOADSELF,/* A R(A) := self */
+OP_LOADT,/* A R(A) := true */
+OP_LOADF,/* A R(A) := false */
+
+OP_GETGLOBAL,/* A Bx R(A) := getglobal(Sym(Bx)) */
+OP_SETGLOBAL,/* A Bx setglobal(Sym(Bx), R(A)) */
+OP_GETSPECIAL,/*A Bx R(A) := Special[Bx] */
+OP_SETSPECIAL,/*A Bx Special[Bx] := R(A) */
+OP_GETIV,/* A Bx R(A) := ivget(Sym(Bx)) */
+OP_SETIV,/* A Bx ivset(Sym(Bx),R(A)) */
+OP_GETCV,/* A Bx R(A) := cvget(Sym(Bx)) */
+OP_SETCV,/* A Bx cvset(Sym(Bx),R(A)) */
+OP_GETCONST,/* A Bx R(A) := constget(Sym(Bx)) */
+OP_SETCONST,/* A Bx constset(Sym(Bx),R(A)) */
+OP_GETMCNST,/* A Bx R(A) := R(A)::Sym(B) */
+OP_SETMCNST,/* A Bx R(A+1)::Sym(B) := R(A) */
+OP_GETUPVAR,/* A B C R(A) := uvget(B,C) */
+OP_SETUPVAR,/* A B C uvset(B,C,R(A)) */
+
+OP_JMP,/* sBx pc+=sBx */
+OP_JMPIF,/* A sBx if R(A) pc+=sBx */
+OP_JMPNOT,/* A sBx if !R(A) pc+=sBx */
+OP_ONERR,/* sBx rescue_push(pc+sBx) */
+OP_RESCUE,/* A clear(exc); R(A) := exception (ignore when A=0) */
+OP_POPERR,/* A A.times{rescue_pop()} */
+OP_RAISE,/* A raise(R(A)) */
+OP_EPUSH,/* Bx ensure_push(SEQ[Bx]) */
+OP_EPOP,/* A A.times{ensure_pop().call} */
+
+OP_SEND,/* A B C R(A) := call(R(A),mSym(B),R(A+1),...,R(A+C)) */
+OP_FSEND,/* A B C R(A) := fcall(R(A),mSym(B),R(A+1),...,R(A+C-1)) */
+OP_VSEND,/* A B R(A) := vcall(R(A),mSym(B)) */
+OP_CALL,/* A B C R(A) := self.call(R(A),.., R(A+C)) */
+OP_SUPER,/* A B C R(A) := super(R(A+1),... ,R(A+C-1)) */
+OP_ARGARY,/* A Bx R(A) := argument array (16=6:1:5:4) */
+OP_ENTER,/* Ax arg setup according to flags (24=5:5:1:5:5:1:1) */
+OP_KARG,/* A B C R(A) := kdict[mSym(B)]; if C kdict.rm(mSym(B)) */
+OP_KDICT,/* A C R(A) := kdict */
+
+OP_RETURN,/* A B return R(A) (B=normal,in-block return/break) */
+OP_TAILCALL,/* A B C return call(R(A),mSym(B),*R(C)) */
+OP_BLKPUSH,/* A Bx R(A) := block (16=6:1:5:4) */
+
+OP_ADD,/* A B C R(A) := R(A)+R(A+1) (mSyms[B]=:+,C=1) */
+OP_ADDI,/* A B C R(A) := R(A)+C (mSyms[B]=:+) */
+OP_SUB,/* A B C R(A) := R(A)-R(A+1) (mSyms[B]=:-,C=1) */
+OP_SUBI,/* A B C R(A) := R(A)-C (mSyms[B]=:-) */
+OP_MUL,/* A B C R(A) := R(A)*R(A+1) (mSyms[B]=:*,C=1) */
+OP_DIV,/* A B C R(A) := R(A)/R(A+1) (mSyms[B]=:/,C=1) */
+OP_EQ,/* A B C R(A) := R(A)==R(A+1) (mSyms[B]=:==,C=1) */
+OP_LT,/* A B C R(A) := R(A)<R(A+1) (mSyms[B]=:<,C=1) */
+OP_LE,/* A B C R(A) := R(A)<=R(A+1) (mSyms[B]=:<=,C=1) */
+OP_GT,/* A B C R(A) := R(A)>R(A+1) (mSyms[B]=:>,C=1) */
+OP_GE,/* A B C R(A) := R(A)>=R(A+1) (mSyms[B]=:>=,C=1) */
+
+OP_ARRAY,/* A B C R(A) := ary_new(R(B),R(B+1)..R(B+C)) */
+OP_ARYCAT,/* A B ary_cat(R(A),R(B)) */
+OP_ARYPUSH,/* A B ary_push(R(A),R(B)) */
+OP_AREF,/* A B C R(A) := R(B)[C] */
+OP_ASET,/* A B C R(B)[C] := R(A) */
+OP_APOST,/* A B C *R(A),R(A+1)..R(A+C) := R(A) */
+
+OP_STRING,/* A Bx R(A) := str_dup(Lit(Bx)) */
+OP_STRCAT,/* A B str_cat(R(A),R(B)) */
+
+OP_HASH,/* A B C R(A) := hash_new(R(B),R(B+1)..R(B+C)) */
+OP_LAMBDA,/* A Bz Cz R(A) := lambda(SEQ[Bz],Cm) */
+OP_RANGE,/* A B C R(A) := range_new(R(B),R(B+1),C) */
+
+OP_OCLASS,/* A R(A) := ::Object */
+OP_CLASS,/* A B R(A) := newclass(R(A),mSym(B),R(A+1)) */
+OP_MODULE,/* A B R(A) := newmodule(R(A),mSym(B)) */
+OP_EXEC,/* A Bx R(A) := blockexec(R(A),SEQ[Bx]) */
+OP_METHOD,/* A B R(A).newmethod(mSym(B),R(A+1)) */
+OP_SCLASS,/* A B R(A) := R(B).singleton_class */
+OP_TCLASS,/* A R(A) := target_class */
+
+OP_DEBUG,/* A print R(A) */
+OP_STOP,/* stop VM */
+OP_ERR,/* Bx raise RuntimeError with message Lit(Bx) */
+
+OP_RSVD1,/* reserved instruction #1 */
+OP_RSVD2,/* reserved instruction #2 */
+OP_RSVD3,/* reserved instruction #3 */
+OP_RSVD4,/* reserved instruction #4 */
+OP_RSVD5,/* reserved instruction #5 */
+};
+
+#define OP_L_STRICT 1
+#define OP_L_CAPTURE 2
+#define OP_L_METHOD OP_L_STRICT
+#define OP_L_LAMBDA (OP_L_STRICT|OP_L_CAPTURE)
+#define OP_L_BLOCK OP_L_CAPTURE
+
+#define OP_R_NORMAL 0
+#define OP_R_BREAK 1
+#define OP_R_RETURN 2
+
+#endif /* OPCODE_H */
diff --git a/src/parse.y b/src/parse.y
new file mode 100644
index 000000000..5925b9a5e
--- /dev/null
+++ b/src/parse.y
@@ -0,0 +1,5435 @@
+%{
+#undef PARSER_TEST
+#undef PARSER_DEBUG
+
+#define YYDEBUG 1
+#define YYERROR_VERBOSE 1
+#define YYSTACK_USE_ALLOCA 0
+
+#include "mruby.h"
+#include "st.h"
+#include "compile.h"
+#include <stdio.h>
+#include <errno.h>
+#include <ctype.h>
+#include <string.h>
+
+#define YYLEX_PARAM p
+
+typedef mrb_ast_node node;
+typedef struct mrb_parser_state parser_state;
+
+static int yylex(void *lval, parser_state *p);
+static void yyerror(parser_state *p, const char *s);
+static void yywarn(parser_state *p, const char *s);
+static void yywarning(parser_state *p, const char *s);
+static void backref_error(parser_state *p, node *n);
+
+#define identchar(c) (isalnum(c) || (c) == '_' || !isascii(c))
+
+#define TRUE 1
+#define FALSE 0
+
+typedef unsigned int stack_type;
+
+#define BITSTACK_PUSH(stack, n) ((stack) = ((stack)<<1)|((n)&1))
+#define BITSTACK_POP(stack) ((stack) = (stack) >> 1)
+#define BITSTACK_LEXPOP(stack) ((stack) = ((stack) >> 1) | ((stack) & 1))
+#define BITSTACK_SET_P(stack) ((stack)&1)
+
+#define COND_PUSH(n) BITSTACK_PUSH(p->cond_stack, (n))
+#define COND_POP() BITSTACK_POP(p->cond_stack)
+#define COND_LEXPOP() BITSTACK_LEXPOP(p->cond_stack)
+#define COND_P() BITSTACK_SET_P(p->cond_stack)
+
+#define CMDARG_PUSH(n) BITSTACK_PUSH(p->cmdarg_stack, (n))
+#define CMDARG_POP() BITSTACK_POP(p->cmdarg_stack)
+#define CMDARG_LEXPOP() BITSTACK_LEXPOP(p->cmdarg_stack)
+#define CMDARG_P() BITSTACK_SET_P(p->cmdarg_stack)
+
+static mrb_sym
+intern_gen(parser_state *p, const char *s)
+{
+ return mrb_intern(p->mrb, s);
+}
+#define intern(s) intern_gen(p,(s))
+
+static void
+cons_free_gen(parser_state *p, node *cons)
+{
+ cons->cdr = p->cells;
+ p->cells = cons;
+}
+#define cons_free(c) cons_free_gen(p, (c))
+
+static void*
+parser_palloc(parser_state *p, size_t size)
+{
+ void *m = mrb_pool_alloc(p->pool, size);
+
+ if (!m) {
+ longjmp(p->jmp, 1);
+ }
+ return m;
+}
+
+static node*
+cons_gen(parser_state *p, node *car, node *cdr)
+{
+ node *c;
+
+ if (p->cells) {
+ c = p->cells;
+ p->cells = p->cells->cdr;
+ }
+ else {
+ c = parser_palloc(p, sizeof(mrb_ast_node));
+ }
+
+ c->car = car;
+ c->cdr = cdr;
+ return c;
+}
+#define cons(a,b) cons_gen(p,(a),(b))
+
+static node*
+list1_gen(parser_state *p, node *a)
+{
+ return cons(a, 0);
+}
+#define list1(a) list1_gen(p, (a))
+
+static node*
+list2_gen(parser_state *p, node *a, node *b)
+{
+ return cons(a, cons(b,0));
+}
+#define list2(a,b) list2_gen(p, (a),(b))
+
+static node*
+list3_gen(parser_state *p, node *a, node *b, node *c)
+{
+ return cons(a, cons(b, cons(c,0)));
+}
+#define list3(a,b,c) list3_gen(p, (a),(b),(c))
+
+static node*
+list4_gen(parser_state *p, node *a, node *b, node *c, node *d)
+{
+ return cons(a, cons(b, cons(c, cons(d, 0))));
+}
+#define list4(a,b,c,d) list4_gen(p, (a),(b),(c),(d))
+
+static node*
+list5_gen(parser_state *p, node *a, node *b, node *c, node *d, node *e)
+{
+ return cons(a, cons(b, cons(c, cons(d, cons(e, 0)))));
+}
+#define list5(a,b,c,d,e) list5_gen(p, (a),(b),(c),(d),(e))
+
+static node*
+list6_gen(parser_state *p, node *a, node *b, node *c, node *d, node *e, node *f)
+{
+ return cons(a, cons(b, cons(c, cons(d, cons(e, cons(f, 0))))));
+}
+#define list6(a,b,c,d,e,f) list6_gen(p, (a),(b),(c),(d),(e),(f))
+
+static node*
+append_gen(parser_state *p, node *a, node *b)
+{
+ node *c = a;
+
+ if (!a) return b;
+ while (c->cdr) {
+ c = c->cdr;
+ }
+ if (b) {
+ c->cdr = b;
+ }
+ return a;
+}
+#define append(a,b) append_gen(p,(a),(b))
+#define push(a,b) append_gen(p,(a),list1(b))
+
+static char*
+parser_strndup(parser_state *p, const char *s, size_t len)
+{
+ char *b = parser_palloc(p, len+1);
+
+ memcpy(b, s, len);
+ b[len] = '\0';
+ return b;
+}
+#define strndup(s,len) parser_strndup(p, s, len)
+
+static char*
+parser_strdup(parser_state *p, const char *s)
+{
+ return parser_strndup(p, s, strlen(s));
+}
+#undef strdup
+#define strdup(s) parser_strdup(p, s)
+
+// xxx -----------------------------
+
+static node*
+local_switch(parser_state *p)
+{
+ node *prev = p->locals;
+
+ p->locals = cons(0, 0);
+ return prev;
+}
+
+static void
+local_resume(parser_state *p, node *prev)
+{
+ p->locals = prev;
+}
+
+static void
+local_nest(parser_state *p)
+{
+ p->locals = cons(0, p->locals);
+}
+
+static void
+local_unnest(parser_state *p)
+{
+ p->locals = p->locals->cdr;
+}
+
+static int
+local_var_p(parser_state *p, mrb_sym sym)
+{
+ node *l = p->locals;
+
+ while (l) {
+ node *n = l->car;
+ while (n) {
+ if ((mrb_sym)n->car == sym) return 1;
+ n = n->cdr;
+ }
+ l = l->cdr;
+ }
+ return 0;
+}
+
+static void
+local_add_f(parser_state *p, mrb_sym sym)
+{
+ p->locals->car = push(p->locals->car, (node*)sym);
+}
+
+static void
+local_add(parser_state *p, mrb_sym sym)
+{
+ if (!local_var_p(p, sym)) {
+ local_add_f(p, sym);
+ }
+}
+
+// (:scope (vars..) (prog...))
+static node*
+new_scope(parser_state *p, node *body)
+{
+ return cons((node*)NODE_SCOPE, cons(p->locals->car, body));
+}
+
+// (:begin prog...)
+static node*
+new_begin(parser_state *p, node *body)
+{
+ if (body)
+ return list2((node*)NODE_BEGIN, body);
+ return cons((node*)NODE_BEGIN, 0);
+}
+
+#define newline_node(n) (n)
+
+// (:rescue body rescue else)
+static node*
+new_rescue(parser_state *p, node *body, node *resq, node *els)
+{
+ return list4((node*)NODE_RESCUE, body, resq, els);
+}
+
+// (:ensure body ensure)
+static node*
+new_ensure(parser_state *p, node *a, node *b)
+{
+ return cons((node*)NODE_ENSURE, cons(a, cons(0, b)));
+}
+
+// (:nil)
+static node*
+new_nil(parser_state *p)
+{
+ return list1((node*)NODE_NIL);
+}
+
+// (:true)
+static node*
+new_true(parser_state *p)
+{
+ return list1((node*)NODE_TRUE);
+}
+
+// (:true)
+static node*
+new_false(parser_state *p)
+{
+ return list1((node*)NODE_FALSE);
+}
+
+// (:alias new old)
+static node*
+new_alias(parser_state *p, mrb_sym a, mrb_sym b)
+{
+ return cons((node*)NODE_ALIAS, cons((node*)a, (node*)b));
+}
+
+// (:if cond then else)
+static node*
+new_if(parser_state *p, node *a, node *b, node *c)
+{
+ return list4((node*)NODE_IF, a, b, c);
+}
+
+// (:unless cond then else)
+static node*
+new_unless(parser_state *p, node *a, node *b, node *c)
+{
+ return list4((node*)NODE_IF, a, c, b);
+}
+
+// (:while cond body)
+static node*
+new_while(parser_state *p, node *a, node *b)
+{
+ return cons((node*)NODE_WHILE, cons(a, b));
+}
+
+// (:until cond body)
+static node*
+new_until(parser_state *p, node *a, node *b)
+{
+ return cons((node*)NODE_UNTIL, cons(a, b));
+}
+
+// (:for var obj body)
+static node*
+new_for(parser_state *p, node *v, node *o, node *b)
+{
+ return list4((node*)NODE_FOR, v, o, b);
+}
+
+// (:case a ((when ...) body) ((when...) body))
+static node*
+new_case(parser_state *p, node *a, node *b)
+{
+ node *n = list2((node*)NODE_CASE, a);
+ node *n2 = n;
+
+ while (n2->cdr) {
+ n2 = n2->cdr;
+ }
+ n2->cdr = b;
+ return n;
+}
+
+// (:postexe a)
+static node*
+new_postexe(parser_state *p, node *a)
+{
+ return cons((node*)NODE_POSTEXE, a);
+}
+
+// (:self)
+static node*
+new_self(parser_state *p)
+{
+ return list1((node*)NODE_SELF);
+}
+
+// (:call a b c)
+static node*
+new_call(parser_state *p, node *a, mrb_sym b, node *c)
+{
+ return list4((node*)NODE_CALL, a, (node*)b, c);
+}
+
+// (:fcall self mid args)
+static node*
+new_fcall(parser_state *p, mrb_sym b, node *c)
+{
+ return list4((node*)NODE_FCALL, new_self(p), (node*)b, c);
+}
+
+#if 0
+// (:vcall self mid)
+static node*
+new_vcall(parser_state *p, mrb_sym b)
+{
+ return list3((node*)NODE_VCALL, new_self(p), (node*)b);
+}
+#endif
+
+// (:super . c)
+static node*
+new_super(parser_state *p, node *c)
+{
+ return cons((node*)NODE_SUPER, c);
+}
+
+// (:zsuper)
+static node*
+new_zsuper(parser_state *p)
+{
+ return list1((node*)NODE_ZSUPER);
+}
+
+// (:yield . c)
+static node*
+new_yield(parser_state *p, node *c)
+{
+ if (c) {
+ if (c->cdr) {
+ yyerror(p, "both block arg and actual block given");
+ }
+ return cons((node*)NODE_YIELD, c->car);
+ }
+ return cons((node*)NODE_YIELD, 0);
+}
+
+// (:return . c)
+static node*
+new_return(parser_state *p, node *c)
+{
+ return cons((node*)NODE_RETURN, c);
+}
+
+// (:break . c)
+static node*
+new_break(parser_state *p, node *c)
+{
+ return cons((node*)NODE_BREAK, c);
+}
+
+// (:next . c)
+static node*
+new_next(parser_state *p, node *c)
+{
+ return cons((node*)NODE_NEXT, c);
+}
+
+// (:redo)
+static node*
+new_redo(parser_state *p)
+{
+ return list1((node*)NODE_REDO);
+}
+
+// (:retry)
+static node*
+new_retry(parser_state *p)
+{
+ return list1((node*)NODE_RETRY);
+}
+
+// (:dot2 a b)
+static node*
+new_dot2(parser_state *p, node *a, node *b)
+{
+ return cons((node*)NODE_DOT2, cons(a, b));
+}
+
+// (:dot3 a b)
+static node*
+new_dot3(parser_state *p, node *a, node *b)
+{
+ return cons((node*)NODE_DOT3, cons(a, b));
+}
+
+// (:colon2 b c)
+static node*
+new_colon2(parser_state *p, node *b, mrb_sym c)
+{
+ return cons((node*)NODE_COLON2, cons(b, (node*)c));
+}
+
+// (:colon3 . c)
+static node*
+new_colon3(parser_state *p, mrb_sym c)
+{
+ return cons((node*)NODE_COLON3, (node*)c);
+}
+
+// (:and a b)
+static node*
+new_and(parser_state *p, node *a, node *b)
+{
+ return cons((node*)NODE_AND, cons(a, b));
+}
+
+// (:or a b)
+static node*
+new_or(parser_state *p, node *a, node *b)
+{
+ return cons((node*)NODE_OR, cons(a, b));
+}
+
+// (:array a...)
+static node*
+new_array(parser_state *p, node *a)
+{
+ return cons((node*)NODE_ARRAY, a);
+}
+
+// (:splat . a)
+static node*
+new_splat(parser_state *p, node *a)
+{
+ return cons((node*)NODE_SPLAT, a);
+}
+
+// (:hash (k . v) (k . v)...)
+static node*
+new_hash(parser_state *p, node *a)
+{
+ return cons((node*)NODE_HASH, a);
+}
+
+// (:sym . a)
+static node*
+new_sym(parser_state *p, mrb_sym sym)
+{
+ return cons((node*)NODE_SYM, (node*)sym);
+}
+
+// (:lvar . a)
+static node*
+new_lvar(parser_state *p, mrb_sym sym)
+{
+ return cons((node*)NODE_LVAR, (node*)sym);
+}
+
+// (:gvar . a)
+static node*
+new_gvar(parser_state *p, mrb_sym sym)
+{
+ return cons((node*)NODE_GVAR, (node*)sym);
+}
+
+// (:ivar . a)
+static node*
+new_ivar(parser_state *p, mrb_sym sym)
+{
+ return cons((node*)NODE_IVAR, (node*)sym);
+}
+
+// (:cvar . a)
+static node*
+new_cvar(parser_state *p, mrb_sym sym)
+{
+ return cons((node*)NODE_CVAR, (node*)sym);
+}
+
+// (:const . a)
+static node*
+new_const(parser_state *p, mrb_sym sym)
+{
+ return cons((node*)NODE_CONST, (node*)sym);
+}
+
+// (:undef a...)
+static node*
+new_undef(parser_state *p, mrb_sym sym)
+{
+ return cons((node*)NODE_UNDEF, (node*)sym);
+}
+
+// (:class class super body)
+static node*
+new_class(parser_state *p, node *c, node *s, node *b)
+{
+ return list4((node*)NODE_CLASS, c, s, cons(p->locals->car, b));
+}
+
+// (:sclass obj body)
+static node*
+new_sclass(parser_state *p, node *o, node *b)
+{
+ return list3((node*)NODE_SCLASS, o, cons(p->locals->car, b));
+}
+
+// (:module module body)
+static node*
+new_module(parser_state *p, node *m, node *b)
+{
+ return list3((node*)NODE_MODULE, m, cons(p->locals->car, b));
+}
+
+// (:def m lv (arg . body))
+static node*
+new_def(parser_state *p, mrb_sym m, node *a, node *b)
+{
+ return list5((node*)NODE_DEF, (node*)m, p->locals->car, a, b);
+}
+
+// (:sdef obj m lv (arg . body))
+static node*
+new_sdef(parser_state *p, node *o, mrb_sym m, node *a, node *b)
+{
+ return list6((node*)NODE_SDEF, o, (node*)m, p->locals->car, a, b);
+}
+
+// (:arg . sym)
+static node*
+new_arg(parser_state *p, mrb_sym sym)
+{
+ return cons((node*)NODE_ARG, (node*)sym);
+}
+
+// (m o r m2 b)
+// m: (a b c)
+// o: ((a . e1) (b . e2))
+// r: a
+// m2: (a b c)
+// b: a
+static node*
+new_args(parser_state *p, node *m, node *opt, mrb_sym rest, node *m2, mrb_sym blk)
+{
+ node *n;
+
+ n = cons(m2, (node*)blk);
+ n = cons((node*)rest, n);
+ n = cons(opt, n);
+ return cons(m, n);
+}
+
+// (:block_arg . a)
+static node*
+new_block_arg(parser_state *p, node *a)
+{
+ return cons((node*)NODE_BLOCK_ARG, a);
+}
+
+// (:block arg body)
+static node*
+new_block(parser_state *p, node *a, node *b)
+{
+ return list4((node*)NODE_BLOCK, p->locals->car, a, b);
+}
+
+// (:lambda arg body)
+static node*
+new_lambda(parser_state *p, node *a, node *b)
+{
+ return list4((node*)NODE_LAMBDA, p->locals->car, a, b);
+}
+
+// (:asgn lhs rhs)
+static node*
+new_asgn(parser_state *p, node *a, node *b)
+{
+ return cons((node*)NODE_ASGN, cons(a, b));
+}
+
+// (:masgn mlhs=(pre rest post) mrhs)
+static node*
+new_masgn(parser_state *p, node *a, node *b)
+{
+ return cons((node*)NODE_MASGN, cons(a, b));
+}
+
+// (:asgn lhs rhs)
+static node*
+new_op_asgn(parser_state *p, node *a, mrb_sym op, node *b)
+{
+ return list4((node*)NODE_OP_ASGN, a, (node*)op, b);
+}
+
+// (:int . i)
+static node*
+new_int(parser_state *p, const char *s, int base)
+{
+ return list3((node*)NODE_INT, (node*)strdup(s), (node*)base);
+}
+
+// (:float . i)
+static node*
+new_float(parser_state *p, const char *s)
+{
+ return cons((node*)NODE_FLOAT, (node*)strdup(s));
+}
+
+// (:str . (s . len))
+static node*
+new_str(parser_state *p, const char *s, size_t len)
+{
+ return cons((node*)NODE_STR, cons((node*)strndup(s, len), (node*)len));
+}
+
+// (:dstr . a)
+static node*
+new_dstr(parser_state *p, node *a)
+{
+ return cons((node*)NODE_DSTR, a);
+}
+
+// (:backref . n)
+static node*
+new_back_ref(parser_state *p, int n)
+{
+ return cons((node*)NODE_BACK_REF, (node*)n);
+}
+
+// (:nthref . n)
+static node*
+new_nth_ref(parser_state *p, int n)
+{
+ return cons((node*)NODE_NTH_REF, (node*)n);
+}
+
+static void
+new_bv(parser_state *p, mrb_sym id)
+{
+}
+
+// xxx -----------------------------
+
+// (:call a op)
+static node*
+call_uni_op(parser_state *p, node *recv, char *m)
+{
+ return new_call(p, recv, intern(m), 0);
+}
+
+// (:call a op b)
+static node*
+call_bin_op(parser_state *p, node *recv, char *m, node *arg1)
+{
+ return new_call(p, recv, intern(m), list1(list1(arg1)));
+}
+
+// (:match (a . b))
+static node*
+match_op(parser_state *p, node *a, node *b)
+{
+ return cons((node*)NODE_MATCH, cons((node*)a, (node*)b));
+}
+
+
+static void
+args_with_block(parser_state *p, node *a, node *b)
+{
+ if (b) {
+ if (a->cdr) {
+ yyerror(p, "both block arg and actual block given");
+ }
+ a->cdr = b;
+ }
+}
+
+static void
+call_with_block(parser_state *p, node *a, node *b)
+{
+ node *n = a->cdr->cdr->cdr;
+
+ if (!n->car) n->car = cons(0, b);
+ else {
+ args_with_block(p, n->car, b);
+ }
+}
+
+static node*
+negate_lit(parser_state *p, node *n)
+{
+ return cons((node*)NODE_NEGATE, n);
+}
+
+static node*
+cond(node *n)
+{
+ return n;
+}
+
+static node*
+ret_args(parser_state *p, node *n)
+{
+ if (n->cdr) {
+ yyerror(p, "block argument should not be given");
+ }
+ if (!n->car->cdr) return n->car->car;
+ return new_array(p, n->car);
+}
+
+static void
+assignable(parser_state *p, node *lhs)
+{
+ switch ((int)lhs->car) {
+ case NODE_LVAR:
+ local_add(p, (mrb_sym)lhs->cdr);
+ break;
+ default:
+ break;
+ }
+}
+
+static node*
+var_reference(parser_state *p, node *lhs)
+{
+ node *n;
+
+ switch ((int)lhs->car) {
+ case NODE_LVAR:
+ if (!local_var_p(p, (mrb_sym)lhs->cdr)) {
+ n = new_fcall(p, (mrb_sym)lhs->cdr, 0);
+ cons_free(lhs);
+ return n;
+ }
+ break;
+ default:
+ break;
+ }
+ return lhs;
+}
+
+// xxx -----------------------------
+
+%}
+
+%pure_parser
+%parse-param {parser_state *p}
+%lex-param {parser_state *p}
+
+%union {
+ node *node;
+ mrb_sym id;
+ int num;
+ const struct vtable *vars;
+}
+
+%token
+ keyword_class
+ keyword_module
+ keyword_def
+ keyword_undef
+ keyword_begin
+ keyword_rescue
+ keyword_ensure
+ keyword_end
+ keyword_if
+ keyword_unless
+ keyword_then
+ keyword_elsif
+ keyword_else
+ keyword_case
+ keyword_when
+ keyword_while
+ keyword_until
+ keyword_for
+ keyword_break
+ keyword_next
+ keyword_redo
+ keyword_retry
+ keyword_in
+ keyword_do
+ keyword_do_cond
+ keyword_do_block
+ keyword_do_LAMBDA
+ keyword_return
+ keyword_yield
+ keyword_super
+ keyword_self
+ keyword_nil
+ keyword_true
+ keyword_false
+ keyword_and
+ keyword_or
+ keyword_not
+ modifier_if
+ modifier_unless
+ modifier_while
+ modifier_until
+ modifier_rescue
+ keyword_alias
+ keyword_BEGIN
+ keyword_END
+ keyword__LINE__
+ keyword__FILE__
+ keyword__ENCODING__
+
+%token <id> tIDENTIFIER tFID tGVAR tIVAR tCONSTANT tCVAR tLABEL
+%token <node> tINTEGER tFLOAT tCHAR tREGEXP
+%token <node> tSTRING tSTRING_PART
+%token <node> tNTH_REF tBACK_REF
+%token <num> tREGEXP_END
+
+%type <node> singleton string string_interp regexp
+%type <node> literal numeric cpath
+%type <node> top_compstmt top_stmts top_stmt
+%type <node> bodystmt compstmt stmts stmt expr arg primary command command_call method_call
+%type <node> expr_value arg_value primary_value
+%type <node> if_tail opt_else case_body cases opt_rescue exc_list exc_var opt_ensure
+%type <node> args call_args opt_call_args
+%type <node> paren_args opt_paren_args variable
+%type <node> command_args aref_args opt_block_arg block_arg var_ref var_lhs
+%type <node> command_asgn mrhs superclass block_call block_command
+%type <node> f_block_optarg f_block_opt
+%type <node> f_arglist f_args f_arg f_arg_item f_optarg f_marg f_marg_list f_margs
+%type <node> assoc_list assocs assoc undef_list backref for_var
+%type <node> block_param opt_block_param block_param_def f_opt
+%type <node> bv_decls opt_bv_decl bvar f_larglist lambda_body
+%type <node> brace_block cmd_brace_block do_block lhs none fitem f_bad_arg
+%type <node> mlhs mlhs_list mlhs_post mlhs_basic mlhs_item mlhs_node mlhs_inner
+%type <id> fsym sym symbol operation operation2 operation3
+%type <id> cname fname op f_rest_arg f_block_arg opt_f_block_arg f_norm_arg
+
+%token tUPLUS /* unary+ */
+%token tUMINUS /* unary- */
+%token tPOW /* ** */
+%token tCMP /* <=> */
+%token tEQ /* == */
+%token tEQQ /* === */
+%token tNEQ /* != */
+%token tGEQ /* >= */
+%token tLEQ /* <= */
+%token tANDOP tOROP /* && and || */
+%token tMATCH tNMATCH /* =~ and !~ */
+%token tDOT2 tDOT3 /* .. and ... */
+%token tAREF tASET /* [] and []= */
+%token tLSHFT tRSHFT /* << and >> */
+%token tCOLON2 /* :: */
+%token tCOLON3 /* :: at EXPR_BEG */
+%token <id> tOP_ASGN /* +=, -= etc. */
+%token tASSOC /* => */
+%token tLPAREN /* ( */
+%token tLPAREN_ARG /* ( */
+%token tRPAREN /* ) */
+%token tLBRACK /* [ */
+%token tLBRACE /* { */
+%token tLBRACE_ARG /* { */
+%token tSTAR /* * */
+%token tAMPER /* & */
+%token tLAMBDA /* -> */
+%token tSYMBEG tREGEXP_BEG tWORDS_BEG tQWORDS_BEG
+%token tSTRING_BEG tSTRING_DVAR tLAMBEG
+
+/*
+ * precedence table
+ */
+
+%nonassoc tLOWEST
+%nonassoc tLBRACE_ARG
+
+%nonassoc modifier_if modifier_unless modifier_while modifier_until
+%left keyword_or keyword_and
+%right keyword_not
+%right '=' tOP_ASGN
+%left modifier_rescue
+%right '?' ':'
+%nonassoc tDOT2 tDOT3
+%left tOROP
+%left tANDOP
+%nonassoc tCMP tEQ tEQQ tNEQ tMATCH tNMATCH
+%left '>' tGEQ '<' tLEQ
+%left '|' '^'
+%left '&'
+%left tLSHFT tRSHFT
+%left '+' '-'
+%left '*' '/' '%'
+%right tUMINUS_NUM tUMINUS
+%right tPOW
+%right '!' '~' tUPLUS
+
+%nonassoc idNULL
+%nonassoc idRespond_to
+%nonassoc idIFUNC
+%nonassoc idCFUNC
+%nonassoc id_core_set_method_alias
+%nonassoc id_core_set_variable_alias
+%nonassoc id_core_undef_method
+%nonassoc id_core_define_method
+%nonassoc id_core_define_singleton_method
+%nonassoc id_core_set_postexe
+
+%token tLAST_TOKEN
+
+%%
+program : {
+ p->lstate = EXPR_BEG;
+ local_nest(p);
+ }
+ top_compstmt
+ {
+ p->tree = new_scope(p, $2);
+ local_unnest(p);
+ }
+ ;
+
+top_compstmt : top_stmts opt_terms
+ {
+ $$ = $1;
+ }
+ ;
+
+top_stmts : none
+ {
+ $$ = new_begin(p, 0);
+ }
+ | top_stmt
+ {
+ $$ = new_begin(p, $1);
+ }
+ | top_stmts terms top_stmt
+ {
+ $$ = push($1, newline_node($3));
+ }
+ | error top_stmt
+ {
+ $$ = $2;
+ }
+ ;
+
+top_stmt : stmt
+ | keyword_BEGIN
+ {
+ if (p->in_def || p->in_single) {
+ yyerror(p, "BEGIN in method");
+ }
+ $<node>$ = local_switch(p);
+ }
+ '{' top_compstmt '}'
+ {
+ p->begin_tree = push(p->begin_tree, $4);
+ local_resume(p, $<node>2);
+ $$ = 0;
+ }
+ ;
+
+bodystmt : compstmt
+ opt_rescue
+ opt_else
+ opt_ensure
+ {
+ if ($2) {
+ $$ = new_rescue(p, $1, $2, $3);
+ }
+ else if ($3) {
+ yywarn(p, "else without rescue is useless");
+ $$ = append($$, $3);
+ }
+ else {
+ $$ = $1;
+ }
+ if ($4) {
+ if ($$) {
+ $$ = new_ensure(p, $$, $4);
+ }
+ else {
+ $$ = push($4, new_nil(p));
+ }
+ }
+ }
+ ;
+
+compstmt : stmts opt_terms
+ {
+ $$ = $1;
+ }
+ ;
+
+stmts : none
+ {
+ $$ = new_begin(p, 0);
+ }
+ | stmt
+ {
+ $$ = new_begin(p, $1);
+ }
+ | stmts terms stmt
+ {
+ $$ = push($1, newline_node($3));
+ }
+ | error stmt
+ {
+ $$ = new_begin(p, $2);
+ }
+ ;
+
+stmt : keyword_alias fsym {p->lstate = EXPR_FNAME;} fsym
+ {
+ $$ = new_alias(p, $2, $4);
+ }
+ | keyword_undef undef_list
+ {
+ $$ = $2;
+ }
+ | stmt modifier_if expr_value
+ {
+ $$ = new_if(p, cond($3), $1, 0);
+ }
+ | stmt modifier_unless expr_value
+ {
+ $$ = new_unless(p, cond($3), $1, 0);
+ }
+ | stmt modifier_while expr_value
+ {
+ $$ = new_while(p, cond($3), $1);
+ }
+ | stmt modifier_until expr_value
+ {
+ $$ = new_until(p, cond($3), $1);
+ }
+ | stmt modifier_rescue stmt
+ {
+ $$ = new_rescue(p, $1, list1(list3(0, 0, $3)), 0);
+ }
+ | keyword_END '{' compstmt '}'
+ {
+ if (p->in_def || p->in_single) {
+ yywarn(p, "END in method; use at_exit");
+ }
+ $$ = new_postexe(p, $3);
+ }
+ | command_asgn
+ | mlhs '=' command_call
+ {
+ $$ = new_masgn(p, $1, list1($3));
+ }
+ | var_lhs tOP_ASGN command_call
+ {
+ $$ = new_op_asgn(p, $1, $2, $3);
+ }
+ | primary_value '[' opt_call_args rbracket tOP_ASGN command_call
+ {
+ $$ = new_op_asgn(p, new_call(p, $1, intern("[]"), $3), $5, $6);
+ }
+ | primary_value '.' tIDENTIFIER tOP_ASGN command_call
+ {
+ $$ = new_op_asgn(p, new_call(p, $1, $3, 0), $4, $5);
+ }
+ | primary_value '.' tCONSTANT tOP_ASGN command_call
+ {
+ $$ = new_op_asgn(p, new_call(p, $1, $3, 0), $4, $5);
+ }
+ | primary_value tCOLON2 tCONSTANT tOP_ASGN command_call
+ {
+ yyerror(p, "constant re-assignment");
+ $$ = 0;
+ }
+ | primary_value tCOLON2 tIDENTIFIER tOP_ASGN command_call
+ {
+ $$ = new_op_asgn(p, new_call(p, $1, $3, 0), $4, $5);
+ }
+ | backref tOP_ASGN command_call
+ {
+ backref_error(p, $1);
+ $$ = new_begin(p, 0);
+ }
+ | lhs '=' mrhs
+ {
+ $$ = new_asgn(p, $1, new_array(p, $3));
+ }
+ | mlhs '=' arg_value
+ {
+ $$ = new_masgn(p, $1, $3);
+ }
+ | mlhs '=' mrhs
+ {
+ $$ = new_masgn(p, $1, new_array(p, $3));
+ }
+ | expr
+ ;
+
+command_asgn : lhs '=' command_call
+ {
+ $$ = new_asgn(p, $1, $3);
+ }
+ | lhs '=' command_asgn
+ {
+ $$ = new_asgn(p, $1, $3);
+ }
+ ;
+
+
+expr : command_call
+ | expr keyword_and expr
+ {
+ $$ = new_and(p, $1, $3);
+ }
+ | expr keyword_or expr
+ {
+ $$ = new_or(p, $1, $3);
+ }
+ | keyword_not opt_nl expr
+ {
+ $$ = call_uni_op(p, cond($3), "!");
+ }
+ | '!' command_call
+ {
+ $$ = call_uni_op(p, cond($2), "!");
+ }
+ | arg
+ ;
+
+expr_value : expr
+ {
+ if (!$1) $$ = new_nil(p);
+ else $$ = $1;
+ }
+ ;
+
+command_call : command
+ | block_command
+ ;
+
+block_command : block_call
+ | block_call dot_or_colon operation2 command_args
+ ;
+
+cmd_brace_block : tLBRACE_ARG
+ {
+ local_nest(p);
+ }
+ opt_block_param
+ compstmt
+ '}'
+ {
+ $$ = new_block(p, $3, $4);
+ local_unnest(p);
+ }
+ ;
+
+command : operation command_args %prec tLOWEST
+ {
+ $$ = new_fcall(p, $1, $2);
+ }
+ | operation command_args cmd_brace_block
+ {
+ args_with_block(p, $2, $3);
+ $$ = new_fcall(p, $1, $2);
+ }
+ | primary_value '.' operation2 command_args %prec tLOWEST
+ {
+ $$ = new_call(p, $1, $3, $4);
+ }
+ | primary_value '.' operation2 command_args cmd_brace_block
+ {
+ args_with_block(p, $4, $5);
+ $$ = new_call(p, $1, $3, $4);
+ }
+ | primary_value tCOLON2 operation2 command_args %prec tLOWEST
+ {
+ $$ = new_call(p, $1, $3, $4);
+ }
+ | primary_value tCOLON2 operation2 command_args cmd_brace_block
+ {
+ args_with_block(p, $4, $5);
+ $$ = new_call(p, $1, $3, $4);
+ }
+ | keyword_super command_args
+ {
+ $$ = new_super(p, $2);
+ }
+ | keyword_yield command_args
+ {
+ $$ = new_yield(p, $2);
+ }
+ | keyword_return call_args
+ {
+ $$ = new_return(p, ret_args(p, $2));
+ }
+ | keyword_break call_args
+ {
+ $$ = new_break(p, ret_args(p, $2));
+ }
+ | keyword_next call_args
+ {
+ $$ = new_next(p, ret_args(p, $2));
+ }
+ ;
+
+mlhs : mlhs_basic
+ {
+ $$ = $1;
+ }
+ | tLPAREN mlhs_inner rparen
+ {
+ $$ = $2;
+ }
+ ;
+
+mlhs_inner : mlhs_basic
+ | tLPAREN mlhs_inner rparen
+ {
+ $$ = list1($2);
+ }
+ ;
+
+mlhs_basic : mlhs_list
+ {
+ $$ = list1($1);
+ }
+ | mlhs_list mlhs_item
+ {
+ $$ = list1(push($1,$2));
+ }
+ | mlhs_list tSTAR mlhs_node
+ {
+ $$ = list2($1, $3);
+ }
+ | mlhs_list tSTAR mlhs_node ',' mlhs_post
+ {
+ $$ = list3($1, $3, $5);
+ }
+ | mlhs_list tSTAR
+ {
+ $$ = list2($1, new_nil(p));
+ }
+ | mlhs_list tSTAR ',' mlhs_post
+ {
+ $$ = list3($1, new_nil(p), $4);
+ }
+ | tSTAR mlhs_node
+ {
+ $$ = list2(0, $2);
+ }
+ | tSTAR mlhs_node ',' mlhs_post
+ {
+ $$ = list3(0, $2, $4);
+ }
+ | tSTAR
+ {
+ $$ = list2(0, new_nil(p));
+ }
+ | tSTAR ',' mlhs_post
+ {
+ $$ = list3(0, new_nil(p), $3);
+ }
+ ;
+
+mlhs_item : mlhs_node
+ | tLPAREN mlhs_inner rparen
+ {
+ $$ = $2;
+ }
+ ;
+
+mlhs_list : mlhs_item ','
+ {
+ $$ = list1($1);
+ }
+ | mlhs_list mlhs_item ','
+ {
+ $$ = push($1, $2);
+ }
+ ;
+
+mlhs_post : mlhs_item
+ {
+ $$ = list1($1);
+ }
+ | mlhs_list mlhs_item
+ {
+ $$ = push($1, $2);
+ }
+ ;
+
+mlhs_node : variable
+ {
+ assignable(p, $1);
+ }
+ | primary_value '[' opt_call_args rbracket
+ {
+ $$ = new_call(p, $1, intern("[]"), $3);
+ }
+ | primary_value '.' tIDENTIFIER
+ {
+ $$ = new_call(p, $1, $3, 0);
+ }
+ | primary_value tCOLON2 tIDENTIFIER
+ {
+ $$ = new_call(p, $1, $3, 0);
+ }
+ | primary_value '.' tCONSTANT
+ {
+ $$ = new_call(p, $1, $3, 0);
+ }
+ | primary_value tCOLON2 tCONSTANT
+ {
+ if (p->in_def || p->in_single)
+ yyerror(p, "dynamic constant assignment");
+ $$ = new_colon2(p, $1, $3);
+ }
+ | tCOLON3 tCONSTANT
+ {
+ if (p->in_def || p->in_single)
+ yyerror(p, "dynamic constant assignment");
+ $$ = new_colon3(p, $2);
+ }
+ | backref
+ {
+ backref_error(p, $1);
+ $$ = 0;
+ }
+ ;
+
+lhs : variable
+ {
+ assignable(p, $1);
+ }
+ | primary_value '[' opt_call_args rbracket
+ {
+ $$ = new_call(p, $1, intern("[]"), $3);
+ }
+ | primary_value '.' tIDENTIFIER
+ {
+ $$ = new_call(p, $1, $3, 0);
+ }
+ | primary_value tCOLON2 tIDENTIFIER
+ {
+ $$ = new_call(p, $1, $3, 0);
+ }
+ | primary_value '.' tCONSTANT
+ {
+ $$ = new_call(p, $1, $3, 0);
+ }
+ | primary_value tCOLON2 tCONSTANT
+ {
+ if (p->in_def || p->in_single)
+ yyerror(p, "dynamic constant assignment");
+ $$ = new_colon2(p, $1, $3);
+ }
+ | tCOLON3 tCONSTANT
+ {
+ if (p->in_def || p->in_single)
+ yyerror(p, "dynamic constant assignment");
+ $$ = new_colon3(p, $2);
+ }
+ | backref
+ {
+ backref_error(p, $1);
+ $$ = 0;
+ }
+ ;
+
+cname : tIDENTIFIER
+ {
+ yyerror(p, "class/module name must be CONSTANT");
+ }
+ | tCONSTANT
+ ;
+
+cpath : tCOLON3 cname
+ {
+ $$ = cons((node*)1, (node*)$2);
+ }
+ | cname
+ {
+ $$ = cons((node*)0, (node*)$1);
+ }
+ | primary_value tCOLON2 cname
+ {
+ $$ = cons($1, (node*)$3);
+ }
+ ;
+
+fname : tIDENTIFIER
+ | tCONSTANT
+ | tFID
+ | op
+ {
+ p->lstate = EXPR_ENDFN;
+ $$ = $1;
+ }
+ | reswords
+ {
+ p->lstate = EXPR_ENDFN;
+ $$ = $<id>1;
+ }
+ ;
+
+fsym : fname
+ | symbol
+ ;
+
+fitem : fsym
+ {
+ $$ = new_sym(p, $1);
+ }
+ ;
+
+undef_list : fsym
+ {
+ $$ = new_undef(p, $1);
+ }
+ | undef_list ',' {p->lstate = EXPR_FNAME;} fitem
+ {
+ $$ = push($1, (node*)$4);
+ }
+ ;
+
+op : '|' { $$ = intern("|"); }
+ | '^' { $$ = intern("^"); }
+ | '&' { $$ = intern("&"); }
+ | tCMP { $$ = intern("<=>"); }
+ | tEQ { $$ = intern("=="); }
+ | tEQQ { $$ = intern("==="); }
+ | tMATCH { $$ = intern("=~"); }
+ | tNMATCH { $$ = intern("!~"); }
+ | '>' { $$ = intern(">"); }
+ | tGEQ { $$ = intern(">="); }
+ | '<' { $$ = intern("<"); }
+ | tLEQ { $$ = intern(">="); }
+ | tNEQ { $$ = intern("!="); }
+ | tLSHFT { $$ = intern("<<"); }
+ | tRSHFT { $$ = intern(">>"); }
+ | '+' { $$ = intern("+"); }
+ | '-' { $$ = intern("-"); }
+ | '*' { $$ = intern("*"); }
+ | tSTAR { $$ = intern("*"); }
+ | '/' { $$ = intern("/"); }
+ | '%' { $$ = intern("%"); }
+ | tPOW { $$ = intern("**"); }
+ | '!' { $$ = intern("!"); }
+ | '~' { $$ = intern("~"); }
+ | tUPLUS { $$ = intern("+@"); }
+ | tUMINUS { $$ = intern("-@"); }
+ | tAREF { $$ = intern("[]"); }
+ | tASET { $$ = intern("[]="); }
+ ;
+
+reswords : keyword__LINE__ | keyword__FILE__ | keyword__ENCODING__
+ | keyword_BEGIN | keyword_END
+ | keyword_alias | keyword_and | keyword_begin
+ | keyword_break | keyword_case | keyword_class | keyword_def
+ | keyword_do | keyword_else | keyword_elsif
+ | keyword_end | keyword_ensure | keyword_false
+ | keyword_for | keyword_in | keyword_module | keyword_next
+ | keyword_nil | keyword_not | keyword_or | keyword_redo
+ | keyword_rescue | keyword_retry | keyword_return | keyword_self
+ | keyword_super | keyword_then | keyword_true | keyword_undef
+ | keyword_when | keyword_yield | keyword_if | keyword_unless
+ | keyword_while | keyword_until
+ ;
+
+arg : lhs '=' arg
+ {
+ $$ = new_asgn(p, $1, $3);
+ }
+ | lhs '=' arg modifier_rescue arg
+ {
+ $$ = new_asgn(p, $1, new_rescue(p, $3, list1(list3(0, 0, $5)), 0));
+ }
+ | var_lhs tOP_ASGN arg
+ {
+ $$ = new_op_asgn(p, $1, $2, $3);
+ }
+ | var_lhs tOP_ASGN arg modifier_rescue arg
+ {
+ $$ = new_op_asgn(p, $1, $2, new_rescue(p, $3, list1(list3(0, 0, $5)), 0));
+ }
+ | primary_value '[' opt_call_args rbracket tOP_ASGN arg
+ {
+ $$ = new_op_asgn(p, new_call(p, $1, intern("[]"), $3), $5, $6);
+ }
+ | primary_value '.' tIDENTIFIER tOP_ASGN arg
+ {
+ $$ = new_op_asgn(p, new_call(p, $1, $3, 0), $4, $5);
+ }
+ | primary_value '.' tCONSTANT tOP_ASGN arg
+ {
+ $$ = new_op_asgn(p, new_call(p, $1, $3, 0), $4, $5);
+ }
+ | primary_value tCOLON2 tIDENTIFIER tOP_ASGN arg
+ {
+ $$ = new_op_asgn(p, new_call(p, $1, $3, 0), $4, $5);
+ }
+ | primary_value tCOLON2 tCONSTANT tOP_ASGN arg
+ {
+ yyerror(p, "constant re-assignment");
+ $$ = new_begin(p, 0);
+ }
+ | tCOLON3 tCONSTANT tOP_ASGN arg
+ {
+ yyerror(p, "constant re-assignment");
+ $$ = new_begin(p, 0);
+ }
+ | backref tOP_ASGN arg
+ {
+ backref_error(p, $1);
+ $$ = new_begin(p, 0);
+ }
+ | arg tDOT2 arg
+ {
+ $$ = new_dot2(p, $1, $3);
+ }
+ | arg tDOT3 arg
+ {
+ $$ = new_dot3(p, $1, $3);
+ }
+ | arg '+' arg
+ {
+ $$ = call_bin_op(p, $1, "+", $3);
+ }
+ | arg '-' arg
+ {
+ $$ = call_bin_op(p, $1, "-", $3);
+ }
+ | arg '*' arg
+ {
+ $$ = call_bin_op(p, $1, "*", $3);
+ }
+ | arg '/' arg
+ {
+ $$ = call_bin_op(p, $1, "/", $3);
+ }
+ | arg '%' arg
+ {
+ $$ = call_bin_op(p, $1, "%", $3);
+ }
+ | arg tPOW arg
+ {
+ $$ = call_bin_op(p, $1, "**", $3);
+ }
+ | tUMINUS_NUM tINTEGER tPOW arg
+ {
+ $$ = call_uni_op(p, call_bin_op(p, $2, "**", $4), "-@");
+ }
+ | tUMINUS_NUM tFLOAT tPOW arg
+ {
+ $$ = call_uni_op(p, call_bin_op(p, $2, "**", $4), "-@");
+ }
+ | tUPLUS arg
+ {
+ $$ = call_uni_op(p, $2, "+@");
+ }
+ | tUMINUS arg
+ {
+ $$ = call_uni_op(p, $2, "-@");
+ }
+ | arg '|' arg
+ {
+ $$ = call_bin_op(p, $1, "|", $3);
+ }
+ | arg '^' arg
+ {
+ $$ = call_bin_op(p, $1, "^", $3);
+ }
+ | arg '&' arg
+ {
+ $$ = call_bin_op(p, $1, "&", $3);
+ }
+ | arg tCMP arg
+ {
+ $$ = call_bin_op(p, $1, "<=>", $3);
+ }
+ | arg '>' arg
+ {
+ $$ = call_bin_op(p, $1, ">", $3);
+ }
+ | arg tGEQ arg
+ {
+ $$ = call_bin_op(p, $1, ">=", $3);
+ }
+ | arg '<' arg
+ {
+ $$ = call_bin_op(p, $1, "<", $3);
+ }
+ | arg tLEQ arg
+ {
+ $$ = call_bin_op(p, $1, "<=", $3);
+ }
+ | arg tEQ arg
+ {
+ $$ = call_bin_op(p, $1, "==", $3);
+ }
+ | arg tEQQ arg
+ {
+ $$ = call_bin_op(p, $1, "===", $3);
+ }
+ | arg tNEQ arg
+ {
+ $$ = call_bin_op(p, $1, "!=", $3);
+ }
+ | arg tMATCH arg
+ {
+ $$ = match_op(p, $1, $3);
+#if 0
+ if (nd_type($1) == NODE_LIT && TYPE($1->nd_lit) == T_REGEXP) {
+ $$ = reg_named_capture_assign($1->nd_lit, $$);
+ }
+#endif
+ }
+ | arg tNMATCH arg
+ {
+ $$ = call_bin_op(p, $1, "!~", $3);
+ }
+ | '!' arg
+ {
+ $$ = call_uni_op(p, cond($2), "!");
+ }
+ | '~' arg
+ {
+ $$ = call_uni_op(p, cond($2), "~");
+ }
+ | arg tLSHFT arg
+ {
+ $$ = call_bin_op(p, $1, "<<", $3);
+ }
+ | arg tRSHFT arg
+ {
+ $$ = call_bin_op(p, $1, ">>", $3);
+ }
+ | arg tANDOP arg
+ {
+ $$ = new_and(p, $1, $3);
+ }
+ | arg tOROP arg
+ {
+ $$ = new_or(p, $1, $3);
+ }
+ | arg '?' arg opt_nl ':' arg
+ {
+ $$ = new_if(p, cond($1), $3, $6);
+ }
+ | primary
+ {
+ $$ = $1;
+ }
+ ;
+
+arg_value : arg
+ {
+ $$ = $1;
+ if (!$$) $$ = new_nil(p);
+ }
+ ;
+
+aref_args : none
+ | args trailer
+ {
+ $$ = $1;
+ }
+ | args ',' assocs trailer
+ {
+ $$ = push($1, new_hash(p, $3));
+ }
+ | assocs trailer
+ {
+ $$ = new_hash(p, $1);
+ }
+ ;
+
+paren_args : '(' opt_call_args rparen
+ {
+ $$ = $2;
+ }
+ ;
+
+opt_paren_args : none
+ | paren_args
+ ;
+
+opt_call_args : none
+ | call_args
+ | args ','
+ {
+ $$ = cons($1,0);
+ }
+ | args ',' assocs ','
+ {
+ $$ = cons(push($1, new_hash(p, $3)), 0);
+ }
+ | assocs ','
+ {
+ $$ = cons(list1(new_hash(p, $1)), 0);
+ }
+ ;
+
+call_args : command
+ {
+ $$ = cons(list1($1), 0);
+ }
+ | args opt_block_arg
+ {
+ $$ = cons($1, $2);
+ }
+ | assocs opt_block_arg
+ {
+ $$ = cons(list1(new_hash(p, $1)), $2);
+ }
+ | args ',' assocs opt_block_arg
+ {
+ $$ = cons(push($1, new_hash(p, $3)), $4);
+ }
+ | block_arg
+ {
+ $$ = cons(0, $1);
+ }
+ ;
+
+command_args : {
+ $<num>$ = p->cmdarg_stack;
+ CMDARG_PUSH(1);
+ }
+ call_args
+ {
+ /* CMDARG_POP() */
+ p->cmdarg_stack = $<num>1;
+ $$ = $2;
+ }
+ ;
+
+block_arg : tAMPER arg_value
+ {
+ $$ = new_block_arg(p, $2);
+ }
+ ;
+
+opt_block_arg : ',' block_arg
+ {
+ $$ = $2;
+ }
+ | none
+ {
+ $$ = 0;
+ }
+ ;
+
+args : arg_value
+ {
+ $$ = cons($1, 0);
+ }
+ | tSTAR arg_value
+ {
+ $$ = cons(new_splat(p, $2), 0);
+ }
+ | args ',' arg_value
+ {
+ $$ = push($1, $3);
+ }
+ | args ',' tSTAR arg_value
+ {
+ $$ = push($1, new_splat(p, $4));
+ }
+ ;
+
+mrhs : args ',' arg_value
+ {
+ $$ = push($1, $3);
+ }
+ | args ',' tSTAR arg_value
+ {
+ $$ = push($1, new_splat(p, $4));
+ }
+ | tSTAR arg_value
+ {
+ $$ = list1(new_splat(p, $2));
+ }
+ ;
+
+primary : literal
+ | string
+ | regexp
+ | var_ref
+ | backref
+ | tFID
+ {
+ $$ = new_fcall(p, $1, 0);
+ }
+ | keyword_begin
+ bodystmt
+ keyword_end
+ {
+ $$ = $2;
+ }
+ | tLPAREN_ARG expr {p->lstate = EXPR_ENDARG;} rparen
+ {
+ yywarning(p, "(...) interpreted as grouped expression");
+ $$ = $2;
+ }
+ | tLPAREN compstmt ')'
+ {
+ $$ = $2;
+ }
+ | primary_value tCOLON2 tCONSTANT
+ {
+ $$ = new_colon2(p, $1, $3);
+ }
+ | tCOLON3 tCONSTANT
+ {
+ $$ = new_colon3(p, $2);
+ }
+ | tLBRACK aref_args ']'
+ {
+ $$ = new_array(p, $2);
+ }
+ | tLBRACE assoc_list '}'
+ {
+ $$ = new_hash(p, $2);
+ }
+ | keyword_return
+ {
+ $$ = new_return(p, 0);
+ }
+ | keyword_yield '(' call_args rparen
+ {
+ $$ = new_yield(p, $3);
+ }
+ | keyword_yield '(' rparen
+ {
+ $$ = new_yield(p, 0);
+ }
+ | keyword_yield
+ {
+ $$ = new_yield(p, 0);
+ }
+ | keyword_not '(' expr rparen
+ {
+ $$ = call_uni_op(p, cond($3), "!");
+ }
+ | keyword_not '(' rparen
+ {
+ $$ = call_uni_op(p, new_nil(p), "!");
+ }
+ | operation brace_block
+ {
+ $$ = new_fcall(p, $1, cons(0, $2));
+ }
+ | method_call
+ | method_call brace_block
+ {
+ call_with_block(p, $1, $2);
+ $$ = $1;
+ }
+ | tLAMBDA
+ {
+ local_nest(p);
+ $<num>$ = p->lpar_beg;
+ p->lpar_beg = ++p->paren_nest;
+ }
+ f_larglist
+ lambda_body
+ {
+ p->lpar_beg = $<num>2;
+ $$ = new_lambda(p, $3, $4);
+ local_unnest(p);
+ }
+ | keyword_if expr_value then
+ compstmt
+ if_tail
+ keyword_end
+ {
+ $$ = new_if(p, cond($2), $4, $5);
+ }
+ | keyword_unless expr_value then
+ compstmt
+ opt_else
+ keyword_end
+ {
+ $$ = new_unless(p, cond($2), $4, $5);
+ }
+ | keyword_while {COND_PUSH(1);} expr_value do {COND_POP();}
+ compstmt
+ keyword_end
+ {
+ $$ = new_while(p, cond($3), $6);
+ }
+ | keyword_until {COND_PUSH(1);} expr_value do {COND_POP();}
+ compstmt
+ keyword_end
+ {
+ $$ = new_until(p, cond($3), $6);
+ }
+ | keyword_case expr_value opt_terms
+ case_body
+ keyword_end
+ {
+ $$ = new_case(p, $2, $4);
+ }
+ | keyword_case opt_terms case_body keyword_end
+ {
+ $$ = new_case(p, 0, $3);
+ }
+ | keyword_for for_var keyword_in
+ {COND_PUSH(1);}
+ expr_value do
+ {COND_POP();}
+ compstmt
+ keyword_end
+ {
+ $$ = new_for(p, $2, $5, $8);
+ }
+ | keyword_class cpath superclass
+ {
+ if (p->in_def || p->in_single)
+ yyerror(p, "class definition in method body");
+ $<node>$ = local_switch(p);
+ }
+ bodystmt
+ keyword_end
+ {
+ $$ = new_class(p, $2, $3, $5);
+ local_resume(p, $<node>4);
+ }
+ | keyword_class tLSHFT expr
+ {
+ $<num>$ = p->in_def;
+ p->in_def = 0;
+ }
+ term
+ {
+ $<node>$ = cons(local_switch(p), (node*)p->in_single);
+ p->in_single = 0;
+ }
+ bodystmt
+ keyword_end
+ {
+ $$ = new_sclass(p, $3, $7);
+ local_resume(p, $<node>6->car);
+ p->in_def = $<num>4;
+ p->in_single = (int)$<node>6->cdr;
+ }
+ | keyword_module cpath
+ {
+ if (p->in_def || p->in_single)
+ yyerror(p, "module definition in method body");
+ $<node>$ = local_switch(p);
+ }
+ bodystmt
+ keyword_end
+ {
+ $$ = new_module(p, $2, $4);
+ local_resume(p, $<node>3);
+ }
+ | keyword_def fname
+ {
+ p->in_def++;
+ $<node>$ = local_switch(p);
+ }
+ f_arglist
+ bodystmt
+ keyword_end
+ {
+ $$ = new_def(p, $2, $4, $5);
+ local_resume(p, $<node>3);
+ p->in_def--;
+ }
+ | keyword_def singleton dot_or_colon {p->lstate = EXPR_FNAME;} fname
+ {
+ p->in_single++;
+ p->lstate = EXPR_ENDFN; /* force for args */
+ $<node>$ = local_switch(p);
+ }
+ f_arglist
+ bodystmt
+ keyword_end
+ {
+ $$ = new_sdef(p, $2, $5, $7, $8);
+ local_resume(p, $<node>6);
+ p->in_single--;
+ }
+ | keyword_break
+ {
+ $$ = new_break(p, 0);
+ }
+ | keyword_next
+ {
+ $$ = new_next(p, 0);
+ }
+ | keyword_redo
+ {
+ $$ = new_redo(p);
+ }
+ | keyword_retry
+ {
+ $$ = new_retry(p);
+ }
+ ;
+
+primary_value : primary
+ {
+ $$ = $1;
+ if (!$$) $$ = new_nil(p);
+ }
+ ;
+
+then : term
+ | keyword_then
+ | term keyword_then
+ ;
+
+do : term
+ | keyword_do_cond
+ ;
+
+if_tail : opt_else
+ | keyword_elsif expr_value then
+ compstmt
+ if_tail
+ {
+ $$ = new_if(p, cond($2), $4, $5);
+ }
+ ;
+
+opt_else : none
+ | keyword_else compstmt
+ {
+ $$ = $2;
+ }
+ ;
+
+for_var : lhs
+ {
+ $$ = list1(list1($1));
+ }
+ | mlhs
+ ;
+
+f_marg : f_norm_arg
+ {
+ $$ = new_arg(p, $1);
+ }
+ | tLPAREN f_margs rparen
+ {
+ $$ = new_masgn(p, $2, 0);
+ }
+ ;
+
+f_marg_list : f_marg
+ {
+ $$ = list1($1);
+ }
+ | f_marg_list ',' f_marg
+ {
+ $$ = push($1, $3);
+ }
+ ;
+
+f_margs : f_marg_list
+ {
+ $$ = list3($1,0,0);
+ }
+ | f_marg_list ',' tSTAR f_norm_arg
+ {
+ $$ = list3($1, new_arg(p, $4), 0);
+ }
+ | f_marg_list ',' tSTAR f_norm_arg ',' f_marg_list
+ {
+ $$ = list3($1, new_arg(p, $4), $6);
+ }
+ | f_marg_list ',' tSTAR
+ {
+ $$ = list3($1, (node*)-1, 0);
+ }
+ | f_marg_list ',' tSTAR ',' f_marg_list
+ {
+ $$ = list3($1, (node*)-1, $5);
+ }
+ | tSTAR f_norm_arg
+ {
+ $$ = list3(0, new_arg(p, $2), 0);
+ }
+ | tSTAR f_norm_arg ',' f_marg_list
+ {
+ $$ = list3(0, new_arg(p, $2), $4);
+ }
+ | tSTAR
+ {
+ $$ = list3(0, (node*)-1, 0);
+ }
+ | tSTAR ',' f_marg_list
+ {
+ $$ = list3(0, (node*)-1, $3);
+ }
+ ;
+
+block_param : f_arg ',' f_block_optarg ',' f_rest_arg opt_f_block_arg
+ {
+ $$ = new_args(p, $1, $3, $5, 0, $6);
+ }
+ | f_arg ',' f_block_optarg ',' f_rest_arg ',' f_arg opt_f_block_arg
+ {
+ $$ = new_args(p, $1, $3, $5, $7, $8);
+ }
+ | f_arg ',' f_block_optarg opt_f_block_arg
+ {
+ $$ = new_args(p, $1, $3, 0, 0, $4);
+ }
+ | f_arg ',' f_block_optarg ',' f_arg opt_f_block_arg
+ {
+ $$ = new_args(p, $1, $3, 0, $5, $6);
+ }
+ | f_arg ',' f_rest_arg opt_f_block_arg
+ {
+ $$ = new_args(p, $1, 0, $3, 0, $4);
+ }
+ | f_arg ','
+ {
+ $$ = new_args(p, $1, 0, 1, 0, 0);
+ }
+ | f_arg ',' f_rest_arg ',' f_arg opt_f_block_arg
+ {
+ $$ = new_args(p, $1, 0, $3, $5, $6);
+ }
+ | f_arg opt_f_block_arg
+ {
+ $$ = new_args(p, $1, 0, 0, 0, $2);
+ }
+ | f_block_optarg ',' f_rest_arg opt_f_block_arg
+ {
+ $$ = new_args(p, 0, $1, $3, 0, $4);
+ }
+ | f_block_optarg ',' f_rest_arg ',' f_arg opt_f_block_arg
+ {
+ $$ = new_args(p, 0, $1, $3, $5, $6);
+ }
+ | f_block_optarg opt_f_block_arg
+ {
+ $$ = new_args(p, 0, $1, 0, 0, $2);
+ }
+ | f_block_optarg ',' f_arg opt_f_block_arg
+ {
+ $$ = new_args(p, 0, $1, 0, $3, $4);
+ }
+ | f_rest_arg opt_f_block_arg
+ {
+ $$ = new_args(p, 0, 0, $1, 0, $2);
+ }
+ | f_rest_arg ',' f_arg opt_f_block_arg
+ {
+ $$ = new_args(p, 0, 0, $1, $3, $4);
+ }
+ | f_block_arg
+ {
+ $$ = new_args(p, 0, 0, 0, 0, $1);
+ }
+ ;
+
+opt_block_param : none
+ | block_param_def
+ {
+ p->cmd_start = TRUE;
+ $$ = $1;
+ }
+ ;
+
+block_param_def : '|' opt_bv_decl '|'
+ {
+ local_add_f(p, 0);
+ $$ = 0;
+ }
+ | tOROP
+ {
+ local_add_f(p, 0);
+ $$ = 0;
+ }
+ | '|' block_param opt_bv_decl '|'
+ {
+ $$ = $2;
+ }
+ ;
+
+
+opt_bv_decl : opt_nl
+ {
+ $$ = 0;
+ }
+ | opt_nl ';' bv_decls opt_nl
+ {
+ $$ = 0;
+ }
+ ;
+
+bv_decls : bvar
+ | bv_decls ',' bvar
+ ;
+
+bvar : tIDENTIFIER
+ {
+ local_add_f(p, $1);
+ new_bv(p, $1);
+ }
+ | f_bad_arg
+ ;
+
+f_larglist : '(' f_args opt_bv_decl ')'
+ {
+ $$ = $2;
+ }
+ | f_args
+ {
+ $$ = $1;
+ }
+ ;
+
+lambda_body : tLAMBEG compstmt '}'
+ {
+ $$ = $2;
+ }
+ | keyword_do_LAMBDA compstmt keyword_end
+ {
+ $$ = $2;
+ }
+ ;
+
+do_block : keyword_do_block
+ {
+ local_nest(p);
+ }
+ opt_block_param
+ compstmt
+ keyword_end
+ {
+ $$ = new_block(p,$3,$4);
+ local_unnest(p);
+ }
+ ;
+
+block_call : command do_block
+ {
+ if ($1->car == (node*)NODE_YIELD) {
+ yyerror(p, "block given to yield");
+ }
+ else {
+ call_with_block(p, $1, $2);
+ }
+ $$ = $1;
+ }
+ | block_call dot_or_colon operation2 opt_paren_args
+ {
+ $$ = new_call(p, $1, $3, $4);
+ }
+ | block_call dot_or_colon operation2 opt_paren_args brace_block
+ {
+ $$ = new_call(p, $1, $3, $4);
+ call_with_block(p, $$, $5);
+ }
+ | block_call dot_or_colon operation2 command_args do_block
+ {
+ $$ = new_call(p, $1, $3, $4);
+ call_with_block(p, $$, $5);
+ }
+ ;
+
+method_call : operation paren_args
+ {
+ $$ = new_fcall(p, $1, $2);
+ }
+ | primary_value '.' operation2 opt_paren_args
+ {
+ $$ = new_call(p, $1, $3, $4);
+ }
+ | primary_value tCOLON2 operation2 paren_args
+ {
+ $$ = new_call(p, $1, $3, $4);
+ }
+ | primary_value tCOLON2 operation3
+ {
+ $$ = new_call(p, $1, $3, 0);
+ }
+ | primary_value '.' paren_args
+ {
+ $$ = new_call(p, $1, intern("call"), $3);
+ }
+ | primary_value tCOLON2 paren_args
+ {
+ $$ = new_call(p, $1, intern("call"), $3);
+ }
+ | keyword_super paren_args
+ {
+ $$ = new_super(p, $2);
+ }
+ | keyword_super
+ {
+ $$ = new_zsuper(p);
+ }
+ | primary_value '[' opt_call_args rbracket
+ {
+ $$ = new_call(p, $1, intern("[]"), $3);
+ }
+ ;
+
+brace_block : '{'
+ {
+ local_nest(p);
+ }
+ opt_block_param
+ compstmt '}'
+ {
+ $$ = new_block(p,$3,$4);
+ local_unnest(p);
+ }
+ | keyword_do
+ {
+ local_nest(p);
+ }
+ opt_block_param
+ compstmt keyword_end
+ {
+ $$ = new_block(p,$3,$4);
+ local_unnest(p);
+ }
+ ;
+
+case_body : keyword_when args then
+ compstmt
+ cases
+ {
+ $$ = cons(cons($2, $4), $5);
+ }
+ ;
+
+cases : opt_else
+ {
+ if ($1) {
+ $$ = cons(cons(0, $1), 0);
+ }
+ else {
+ $$ = 0;
+ }
+ }
+ | case_body
+ ;
+
+opt_rescue : keyword_rescue exc_list exc_var then
+ compstmt
+ opt_rescue
+ {
+ $$ = list1(list3($2, $3, $5));
+ if ($6) $$ = append($$, $6);
+ }
+ | none
+ ;
+
+exc_list : arg_value
+ {
+ $$ = list1($1);
+ }
+ | mrhs
+ | none
+ ;
+
+exc_var : tASSOC lhs
+ {
+ $$ = $2;
+ }
+ | none
+ ;
+
+opt_ensure : keyword_ensure compstmt
+ {
+ $$ = $2;
+ }
+ | none
+ ;
+
+literal : numeric
+ | symbol
+ {
+ $$ = new_sym(p, $1);
+ }
+ ;
+
+string : tCHAR
+ | tSTRING
+ | tSTRING_BEG tSTRING
+ {
+ $$ = $2;
+ }
+ | tSTRING_BEG string_interp tSTRING
+ {
+ $$ = new_dstr(p, push($2, $3));
+ }
+ ;
+
+string_interp : tSTRING_PART
+ {
+ $<num>$ = p->sterm;
+ p->sterm = 0;
+ }
+ compstmt
+ '}'
+ {
+ p->sterm = $<num>2;
+ $$ = list2($1, $3);
+ }
+ | string_interp
+ tSTRING_PART
+ {
+ $<num>$ = p->sterm;
+ p->sterm = 0;
+ }
+ compstmt
+ '}'
+ {
+ p->sterm = $<num>3;
+ $$ = push(push($1, $2), $4);
+ }
+ ;
+
+regexp : tREGEXP
+ ;
+
+symbol : tSYMBEG sym
+ {
+ p->lstate = EXPR_END;
+ $$ = $2;
+ }
+ ;
+
+sym : fname
+ | tIVAR
+ | tGVAR
+ | tCVAR
+ ;
+
+numeric : tINTEGER
+ | tFLOAT
+ | tUMINUS_NUM tINTEGER %prec tLOWEST
+ {
+ $$ = negate_lit(p, $2);
+ }
+ | tUMINUS_NUM tFLOAT %prec tLOWEST
+ {
+ $$ = negate_lit(p, $2);
+ }
+ ;
+
+variable : tIDENTIFIER
+ {
+ $$ = new_lvar(p, $1);
+ }
+ | tIVAR
+ {
+ $$ = new_ivar(p, $1);
+ }
+ | tGVAR
+ {
+ $$ = new_gvar(p, $1);
+ }
+ | tCVAR
+ {
+ $$ = new_cvar(p, $1);
+ }
+ | tCONSTANT
+ {
+ $$ = new_const(p, $1);
+ }
+ ;
+
+var_lhs : variable
+ {
+ assignable(p, $1);
+ }
+ ;
+
+var_ref : variable
+ {
+ $$ = var_reference(p, $1);
+ }
+ | keyword_nil
+ {
+ $$ = new_nil(p);
+ }
+ | keyword_self
+ {
+ $$ = new_self(p);
+ }
+ | keyword_true
+ {
+ $$ = new_true(p);
+ }
+ | keyword_false
+ {
+ $$ = new_false(p);
+ }
+ | keyword__FILE__
+ {
+ if (!p->filename) {
+ p->filename = "(null)";
+ }
+ $$ = new_str(p, p->filename, strlen(p->filename));
+ }
+ | keyword__LINE__
+ {
+ char buf[16];
+
+ snprintf(buf, 16, "%d", p->lineno);
+ $$ = new_int(p, buf, 10);
+ }
+ ;
+
+backref : tNTH_REF
+ | tBACK_REF
+ ;
+
+superclass : term
+ {
+ $$ = 0;
+ }
+ | '<'
+ {
+ p->lstate = EXPR_BEG;
+ }
+ expr_value term
+ {
+ $$ = $3;
+ }
+ | error term
+ {
+ yyerrok;
+ $$ = 0;
+ }
+ ;
+
+f_arglist : '(' f_args rparen
+ {
+ $$ = $2;
+ p->lstate = EXPR_BEG;
+ p->cmd_start = TRUE;
+ }
+ | f_args term
+ {
+ $$ = $1;
+ }
+ ;
+
+f_args : f_arg ',' f_optarg ',' f_rest_arg opt_f_block_arg
+ {
+ $$ = new_args(p, $1, $3, $5, 0, $6);
+ }
+ | f_arg ',' f_optarg ',' f_rest_arg ',' f_arg opt_f_block_arg
+ {
+ $$ = new_args(p, $1, $3, $5, $7, $8);
+ }
+ | f_arg ',' f_optarg opt_f_block_arg
+ {
+ $$ = new_args(p, $1, $3, 0, 0, $4);
+ }
+ | f_arg ',' f_optarg ',' f_arg opt_f_block_arg
+ {
+ $$ = new_args(p, $1, $3, 0, $5, $6);
+ }
+ | f_arg ',' f_rest_arg opt_f_block_arg
+ {
+ $$ = new_args(p, $1, 0, $3, 0, $4);
+ }
+ | f_arg ',' f_rest_arg ',' f_arg opt_f_block_arg
+ {
+ $$ = new_args(p, $1, 0, $3, $5, $6);
+ }
+ | f_arg opt_f_block_arg
+ {
+ $$ = new_args(p, $1, 0, 0, 0, $2);
+ }
+ | f_optarg ',' f_rest_arg opt_f_block_arg
+ {
+ $$ = new_args(p, 0, $1, $3, 0, $4);
+ }
+ | f_optarg ',' f_rest_arg ',' f_arg opt_f_block_arg
+ {
+ $$ = new_args(p, 0, $1, $3, $5, $6);
+ }
+ | f_optarg opt_f_block_arg
+ {
+ $$ = new_args(p, 0, $1, 0, 0, $2);
+ }
+ | f_optarg ',' f_arg opt_f_block_arg
+ {
+ $$ = new_args(p, 0, $1, 0, $3, $4);
+ }
+ | f_rest_arg opt_f_block_arg
+ {
+ $$ = new_args(p, 0, 0, $1, 0, $2);
+ }
+ | f_rest_arg ',' f_arg opt_f_block_arg
+ {
+ $$ = new_args(p, 0, 0, $1, $3, $4);
+ }
+ | f_block_arg
+ {
+ $$ = new_args(p, 0, 0, 0, 0, $1);
+ }
+ | /* none */
+ {
+ local_add_f(p, 0);
+ $$ = new_args(p, 0, 0, 0, 0, 0);
+ }
+ ;
+
+f_bad_arg : tCONSTANT
+ {
+ yyerror(p, "formal argument cannot be a constant");
+ $$ = 0;
+ }
+ | tIVAR
+ {
+ yyerror(p, "formal argument cannot be an instance variable");
+ $$ = 0;
+ }
+ | tGVAR
+ {
+ yyerror(p, "formal argument cannot be a global variable");
+ $$ = 0;
+ }
+ | tCVAR
+ {
+ yyerror(p, "formal argument cannot be a class variable");
+ $$ = 0;
+ }
+ ;
+
+f_norm_arg : f_bad_arg
+ {
+ $$ = 0;
+ }
+ | tIDENTIFIER
+ {
+ local_add_f(p, $1);
+ $$ = $1;
+ }
+ ;
+
+f_arg_item : f_norm_arg
+ {
+ $$ = new_arg(p, $1);
+ }
+ | tLPAREN f_margs rparen
+ {
+ $$ = new_masgn(p, $2, 0);
+ }
+ ;
+
+f_arg : f_arg_item
+ {
+ $$ = list1($1);
+ }
+ | f_arg ',' f_arg_item
+ {
+ $$ = push($1, $3);
+ }
+ ;
+
+f_opt : tIDENTIFIER '=' arg_value
+ {
+ local_add_f(p, $1);
+ $$ = cons((node*)$1, $3);
+ }
+ ;
+
+f_block_opt : tIDENTIFIER '=' primary_value
+ {
+ local_add_f(p, $1);
+ $$ = cons((node*)$1, $3);
+ }
+ ;
+
+f_block_optarg : f_block_opt
+ {
+ $$ = list1($1);
+ }
+ | f_block_optarg ',' f_block_opt
+ {
+ $$ = push($1, $3);
+ }
+ ;
+
+f_optarg : f_opt
+ {
+ $$ = list1($1);
+ }
+ | f_optarg ',' f_opt
+ {
+ $$ = push($1, $3);
+ }
+ ;
+
+restarg_mark : '*'
+ | tSTAR
+ ;
+
+f_rest_arg : restarg_mark tIDENTIFIER
+ {
+ local_add_f(p, $2);
+ $$ = $2;
+ }
+ | restarg_mark
+ {
+ $$ = 0;
+ }
+ ;
+
+blkarg_mark : '&'
+ | tAMPER
+ ;
+
+f_block_arg : blkarg_mark tIDENTIFIER
+ {
+ local_add_f(p, $2);
+ $$ = $2;
+ }
+ ;
+
+opt_f_block_arg : ',' f_block_arg
+ {
+ $$ = $2;
+ }
+ | none
+ {
+ local_add_f(p, 0);
+ $$ = 0;
+ }
+ ;
+
+singleton : var_ref
+ {
+ $$ = $1;
+ if (!$$) $$ = new_nil(p);
+ }
+ | '(' {p->lstate = EXPR_BEG;} expr rparen
+ {
+ if ($3 == 0) {
+ yyerror(p, "can't define singleton method for ().");
+ }
+ else {
+ switch ((enum node_type)$3->car) {
+ case NODE_STR:
+ case NODE_DSTR:
+ case NODE_DREGX:
+ case NODE_MATCH:
+ case NODE_FLOAT:
+ case NODE_ARRAY:
+ yyerror(p, "can't define singleton method for literals");
+ default:
+ break;
+ }
+ }
+ $$ = $3;
+ }
+ ;
+
+assoc_list : none
+ | assocs trailer
+ {
+ $$ = $1;
+ }
+ ;
+
+assocs : assoc
+ {
+ $$ = list1($1);
+ }
+ | assocs ',' assoc
+ {
+ $$ = push($1, $3);
+ }
+ ;
+
+assoc : arg_value tASSOC arg_value
+ {
+ $$ = cons($1, $3);
+ }
+ | tLABEL arg_value
+ {
+ $$ = cons(new_sym(p, $1), $2);
+ }
+ ;
+
+operation : tIDENTIFIER
+ | tCONSTANT
+ | tFID
+ ;
+
+operation2 : tIDENTIFIER
+ | tCONSTANT
+ | tFID
+ | op
+ ;
+
+operation3 : tIDENTIFIER
+ | tFID
+ | op
+ ;
+
+dot_or_colon : '.'
+ | tCOLON2
+ ;
+
+opt_terms : /* none */
+ | terms
+ ;
+
+opt_nl : /* none */
+ | '\n'
+ ;
+
+rparen : opt_nl ')'
+ ;
+
+rbracket : opt_nl ']'
+ ;
+
+trailer : /* none */
+ | '\n'
+ | ','
+ ;
+
+term : ';' {yyerrok;}
+ | '\n'
+ ;
+
+terms : term
+ | terms ';' {yyerrok;}
+ ;
+
+none : /* none */
+ {
+ $$ = 0;
+ }
+ ;
+%%
+#define yylval (*((YYSTYPE*)(p->ylval)))
+
+static void
+yyerror(parser_state *p, const char *s)
+{
+ fputs(s, stderr);
+ fputs("\n", stderr);
+ p->nerr++;
+}
+
+static void
+yyerror_i(parser_state *p, const char *fmt, int i)
+{
+ char buf[256];
+
+ snprintf(buf, 256, fmt, i);
+ yyerror(p, buf);
+}
+
+static void
+yywarn(parser_state *p, const char *s)
+{
+ fputs(s, stderr);
+ fputs("\n", stderr);
+}
+
+static void
+yywarning(parser_state *p, const char *s)
+{
+ fputs(s, stderr);
+ fputs("\n", stderr);
+}
+
+static void
+yywarning_s(parser_state *p, const char *fmt, const char *s)
+{
+ char buf[256];
+
+ snprintf(buf, 256, fmt, s);
+ yywarning(p, buf);
+}
+
+static void
+backref_error(parser_state *p, node *n)
+{
+ switch ((int)n->car) {
+ case NODE_NTH_REF:
+ yyerror_i(p, "can't set variable $%d", (int)n->cdr);
+ break;
+ case NODE_BACK_REF:
+ yyerror_i(p, "can't set variable $%c", (int)n->cdr);
+ break;
+ }
+}
+
+static int peeks(parser_state *p, const char *s);
+static int skips(parser_state *p, const char *s);
+
+static inline int
+nextc(parser_state *p)
+{
+ int c;
+
+ if (p->pb) {
+ node *tmp;
+
+ c = (int)p->pb->car;
+ tmp = p->pb;
+ p->pb = p->pb->cdr;
+ cons_free(tmp);
+ }
+ else if (p->f) {
+ if (feof(p->f)) return -1;
+ c = fgetc(p->f);
+ if (c == EOF) return -1;
+ }
+ else if (!p->s || p->s >= p->send) {
+ return -1;
+ }
+ else {
+ c = *p->s++;
+ }
+ if (c == '\n') {
+ p->lineno++;
+ p->column = 0;
+ // must understand heredoc
+ }
+ else {
+ p->column++;
+ }
+ return c;
+}
+
+static void
+pushback(parser_state *p, int c)
+{
+ if (c < 0) return;
+ p->column--;
+ p->pb = cons((node*)c, p->pb);
+}
+
+static void
+skip(parser_state *p, char term)
+{
+ int c;
+
+ while ((c = nextc(p)) != term)
+ ;
+}
+
+static int
+peek_n(parser_state *p, int c, int n)
+{
+ node *list = 0;
+ int c0;
+
+ n++; /* must read 1 char */
+ while (n--) {
+ c0 = nextc(p);
+ if (c0 < 0) return FALSE;
+ list = push(list, (node*)c0);
+ }
+ if (p->pb) {
+ p->pb = push(p->pb, (node*)list);
+ }
+ else {
+ p->pb = list;
+ }
+ if (c0 == c) return TRUE;
+ return FALSE;
+}
+#define peek(p,c) peek_n((p), (c), 0)
+
+static int
+peeks(parser_state *p, const char *s)
+{
+ int len = strlen(s);
+
+ if (p->f) {
+ int n = 0;
+ while (*s) {
+ if (!peek_n(p, *s++, n++)) return FALSE;
+ }
+ return TRUE;
+ }
+ else if (p->s && p->s + len >= p->send) {
+ if (memcmp(p->s, s, len) == 0) return TRUE;
+ }
+ return FALSE;
+}
+
+static int
+skips(parser_state *p, const char *s)
+{
+ int c;
+
+ for (;;) {
+ // skip until first char
+ for (;;) {
+ c = nextc(p);
+ if (c < 0) return c;
+ if (c == *s) break;
+ }
+ s++;
+ if (peeks(p, s)) {
+ int len = strlen(s);
+
+ while (len--) {
+ nextc(p);
+ }
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+#define STR_FUNC_ESCAPE 0x01
+#define STR_FUNC_EXPAND 0x02
+#define STR_FUNC_REGEXP 0x04
+#define STR_FUNC_QWORDS 0x08
+#define STR_FUNC_SYMBOL 0x10
+#define STR_FUNC_INDENT 0x20
+
+enum string_type {
+ str_squote = (0),
+ str_dquote = (STR_FUNC_EXPAND),
+ str_xquote = (STR_FUNC_EXPAND),
+ str_regexp = (STR_FUNC_REGEXP|STR_FUNC_ESCAPE|STR_FUNC_EXPAND),
+ str_sword = (STR_FUNC_QWORDS),
+ str_dword = (STR_FUNC_QWORDS|STR_FUNC_EXPAND),
+ str_ssym = (STR_FUNC_SYMBOL),
+ str_dsym = (STR_FUNC_SYMBOL|STR_FUNC_EXPAND)
+};
+
+static void
+newtok(parser_state *p)
+{
+ p->bidx = 0;
+}
+
+static void
+tokadd(parser_state *p, int c)
+{
+ if (p->bidx < 1024) {
+ p->buf[p->bidx++] = c;
+ }
+}
+
+static int
+toklast(parser_state *p)
+{
+ return p->buf[p->bidx-1];
+}
+
+static void
+tokfix(parser_state *p)
+{
+ if (p->bidx >= 1024) {
+ yyerror(p, "string too long (truncated)");
+ }
+ p->buf[p->bidx] = '\0';
+}
+
+static const char*
+tok(parser_state *p)
+{
+ return p->buf;
+}
+
+static int
+toklen(parser_state *p)
+{
+ return p->bidx;
+}
+
+#define IS_ARG() (p->lstate == EXPR_ARG || p->lstate == EXPR_CMDARG)
+#define IS_END() (p->lstate == EXPR_END || p->lstate == EXPR_ENDARG || p->lstate == EXPR_ENDFN)
+#define IS_BEG() (p->lstate == EXPR_BEG || p->lstate == EXPR_MID || p->lstate == EXPR_VALUE || p->lstate == EXPR_CLASS)
+#define IS_SPCARG(c) (IS_ARG() && space_seen && !ISSPACE(c))
+#define IS_LABEL_POSSIBLE() ((p->lstate == EXPR_BEG && !cmd_state) || IS_ARG())
+#define IS_LABEL_SUFFIX(n) (peek_n(p, ':',(n)) && !peek_n(p, ':', (n)+1))
+
+static unsigned long
+scan_oct(const char *start, int len, int *retlen)
+{
+ const char *s = start;
+ unsigned long retval = 0;
+
+ while (len-- && *s >= '0' && *s <= '7') {
+ retval <<= 3;
+ retval |= *s++ - '0';
+ }
+ *retlen = s - start;
+ return retval;
+}
+
+static unsigned long
+scan_hex(const char *start, int len, int *retlen)
+{
+ static const char hexdigit[] = "0123456789abcdef0123456789ABCDEF";
+ register const char *s = start;
+ register unsigned long retval = 0;
+ char *tmp;
+
+ while (len-- && *s && (tmp = strchr(hexdigit, *s))) {
+ retval <<= 4;
+ retval |= (tmp - hexdigit) & 15;
+ s++;
+ }
+ *retlen = s - start;
+ return retval;
+}
+
+static int
+read_escape(parser_state *p)
+{
+ int c;
+
+ switch (c = nextc(p)) {
+ case '\\': /* Backslash */
+ return c;
+
+ case 'n': /* newline */
+ return '\n';
+
+ case 't': /* horizontal tab */
+ return '\t';
+
+ case 'r': /* carriage-return */
+ return '\r';
+
+ case 'f': /* form-feed */
+ return '\f';
+
+ case 'v': /* vertical tab */
+ return '\13';
+
+ case 'a': /* alarm(bell) */
+ return '\007';
+
+ case 'e': /* escape */
+ return 033;
+
+ case '0': case '1': case '2': case '3': /* octal constant */
+ case '4': case '5': case '6': case '7':
+ {
+ char buf[3];
+ int i;
+
+ for (i=0; i<3; i++) {
+ buf[i] = nextc(p);
+ if (buf[i] == -1) goto eof;
+ if (buf[i] < '0' || '7' < buf[i]) {
+ pushback(p, buf[i]);
+ break;
+ }
+ }
+ c = scan_oct(buf, i+1, &i);
+ }
+ return c;
+
+ case 'x': /* hex constant */
+ {
+ char buf[2];
+ int i;
+
+ for (i=0; i<2; i++) {
+ buf[i] = nextc(p);
+ if (buf[i] == -1) goto eof;
+ if (!isxdigit(buf[i])) {
+ pushback(p, buf[i]);
+ break;
+ }
+ }
+ c = scan_hex(buf, i+1, &i);
+ if (i == 0) {
+ yyerror(p, "Invalid escape character syntax");
+ return 0;
+ }
+ }
+ return c;
+
+ case 'b': /* backspace */
+ return '\010';
+
+ case 's': /* space */
+ return ' ';
+
+ case 'M':
+ if ((c = nextc(p)) != '-') {
+ yyerror(p, "Invalid escape character syntax");
+ pushback(p, c);
+ return '\0';
+ }
+ if ((c = nextc(p)) == '\\') {
+ return read_escape(p) | 0x80;
+ }
+ else if (c == -1) goto eof;
+ else {
+ return ((c & 0xff) | 0x80);
+ }
+
+ case 'C':
+ if ((c = nextc(p)) != '-') {
+ yyerror(p, "Invalid escape character syntax");
+ pushback(p, c);
+ return '\0';
+ }
+ case 'c':
+ if ((c = nextc(p))== '\\') {
+ c = read_escape(p);
+ }
+ else if (c == '?')
+ return 0177;
+ else if (c == -1) goto eof;
+ return c & 0x9f;
+
+ eof:
+ case -1:
+ yyerror(p, "Invalid escape character syntax");
+ return '\0';
+
+ default:
+ return c;
+ }
+}
+
+static int
+parse_string(parser_state *p, int term)
+{
+ int c;
+
+ newtok(p);
+
+ while ((c = nextc(p)) != term) {
+ if (c == -1) {
+ yyerror(p, "unterminated string meets end of file");
+ return 0;
+ }
+ else if (c == '\\') {
+ c = nextc(p);
+ if (c == term) {
+ tokadd(p, c);
+ }
+ else {
+ pushback(p, c);
+ tokadd(p, read_escape(p));
+ }
+ continue;
+ }
+ if (c == '#') {
+ c = nextc(p);
+ if (c == '{') {
+ tokfix(p);
+ p->lstate = EXPR_END;
+ p->sterm = term;
+ yylval.node = new_str(p, tok(p), toklen(p));
+ return tSTRING_PART;
+ }
+ tokadd(p, '#');
+ pushback(p, c);
+ continue;
+ }
+ tokadd(p, c);
+ }
+
+ tokfix(p);
+ p->lstate = EXPR_END;
+ p->sterm = 0;
+ yylval.node = new_str(p, tok(p), toklen(p));
+ return tSTRING;
+}
+
+static int
+parse_qstring(parser_state *p, int term)
+{
+ int c;
+
+ newtok(p);
+ while ((c = nextc(p)) != term) {
+ if (c == -1) {
+ yyerror(p, "unterminated string meets end of file");
+ return 0;
+ }
+ if (c == '\\') {
+ c = nextc(p);
+ switch (c) {
+ case '\n':
+ continue;
+
+ case '\\':
+ c = '\\';
+ break;
+
+ case '\'':
+ if (term == '\'') {
+ c = '\'';
+ break;
+ }
+ /* fall through */
+ default:
+ tokadd(p, '\\');
+ }
+ }
+ tokadd(p, c);
+ }
+
+ tokfix(p);
+ yylval.node = new_str(p, tok(p), toklen(p));
+ p->lstate = EXPR_END;
+ return tSTRING;
+}
+
+static int
+arg_ambiguous(parser_state *p)
+{
+ yywarning(p, "ambiguous first argument; put parentheses or even spaces");
+ return 1;
+}
+
+#include "lex.def"
+
+static int
+parser_yylex(parser_state *p)
+{
+ register int c;
+ int space_seen = 0;
+ int cmd_state;
+ enum mrb_lex_state_enum last_state;
+
+ if (p->sterm) {
+ return parse_string(p, p->sterm);
+ }
+ cmd_state = p->cmd_start;
+ p->cmd_start = FALSE;
+ retry:
+ last_state = p->lstate;
+ switch (c = nextc(p)) {
+ case '\0': /* NUL */
+ case '\004': /* ^D */
+ case '\032': /* ^Z */
+ case -1: /* end of script. */
+ return 0;
+
+ /* white spaces */
+ case ' ': case '\t': case '\f': case '\r':
+ case '\13': /* '\v' */
+ space_seen = 1;
+ goto retry;
+
+ case '#': /* it's a comment */
+ skip(p, '\n');
+ /* fall through */
+ case '\n':
+ switch (p->lstate) {
+ case EXPR_BEG:
+ case EXPR_FNAME:
+ case EXPR_DOT:
+ case EXPR_CLASS:
+ case EXPR_VALUE:
+ goto retry;
+ default:
+ break;
+ }
+ while ((c = nextc(p))) {
+ switch (c) {
+ case ' ': case '\t': case '\f': case '\r':
+ case '\13': /* '\v' */
+ space_seen = 1;
+ break;
+ case '.':
+ if ((c = nextc(p)) != '.') {
+ pushback(p, c);
+ pushback(p, '.');
+ goto retry;
+ }
+ case -1: /* EOF */
+ goto normal_newline;
+ default:
+ pushback(p, c);
+ goto normal_newline;
+ }
+ }
+ normal_newline:
+ p->lstate = EXPR_BEG;
+ return '\n';
+
+ case '*':
+ if ((c = nextc(p)) == '*') {
+ if ((c = nextc(p)) == '=') {
+ yylval.id = intern("**");
+ p->lstate = EXPR_BEG;
+ return tOP_ASGN;
+ }
+ pushback(p, c);
+ c = tPOW;
+ }
+ else {
+ if (c == '=') {
+ yylval.id = intern("*");
+ p->lstate = EXPR_BEG;
+ return tOP_ASGN;
+ }
+ pushback(p, c);
+ if (IS_SPCARG(c)) {
+ yywarning(p, "`*' interpreted as argument prefix");
+ c = tSTAR;
+ }
+ else if (IS_BEG()) {
+ c = tSTAR;
+ }
+ else {
+ c = '*';
+ }
+ }
+ switch (p->lstate) {
+ case EXPR_FNAME: case EXPR_DOT:
+ p->lstate = EXPR_ARG; break;
+ default:
+ p->lstate = EXPR_BEG; break;
+ }
+ return c;
+
+ case '!':
+ c = nextc(p);
+ if (p->lstate == EXPR_FNAME || p->lstate == EXPR_DOT) {
+ p->lstate = EXPR_ARG;
+ if (c == '@') {
+ return '!';
+ }
+ }
+ else {
+ p->lstate = EXPR_BEG;
+ }
+ if (c == '=') {
+ return tNEQ;
+ }
+ if (c == '~') {
+ return tNMATCH;
+ }
+ pushback(p, c);
+ return '!';
+
+ case '=':
+ if (p->column == 1) {
+ if (peeks(p, "begin\n")) {
+ skips(p, "\n=end\n");
+ }
+ goto retry;
+ }
+ switch (p->lstate) {
+ case EXPR_FNAME: case EXPR_DOT:
+ p->lstate = EXPR_ARG; break;
+ default:
+ p->lstate = EXPR_BEG; break;
+ }
+ if ((c = nextc(p)) == '=') {
+ if ((c = nextc(p)) == '=') {
+ return tEQQ;
+ }
+ pushback(p, c);
+ return tEQ;
+ }
+ if (c == '~') {
+ return tMATCH;
+ }
+ else if (c == '>') {
+ return tASSOC;
+ }
+ pushback(p, c);
+ return '=';
+
+ case '<':
+ last_state = p->lstate;
+ c = nextc(p);
+#if 0
+ // no heredoc supported yet
+ if (c == '<' &&
+ p->lstate != EXPR_DOT &&
+ p->lstate != EXPR_CLASS &&
+ !IS_END() &&
+ (!IS_ARG() || space_seen)) {
+ int token = heredoc_identifier();
+ if (token) return token;
+ }
+#endif
+ switch (p->lstate) {
+ case EXPR_FNAME: case EXPR_DOT:
+ p->lstate = EXPR_ARG; break;
+ default:
+ p->lstate = EXPR_BEG; break;
+ }
+ if (c == '=') {
+ if ((c = nextc(p)) == '>') {
+ return tCMP;
+ }
+ pushback(p, c);
+ return tLEQ;
+ }
+ if (c == '<') {
+ if ((c = nextc(p)) == '=') {
+ yylval.id = intern("<<");
+ p->lstate = EXPR_BEG;
+ return tOP_ASGN;
+ }
+ pushback(p, c);
+ return tLSHFT;
+ }
+ pushback(p, c);
+ return '<';
+
+ case '>':
+ switch (p->lstate) {
+ case EXPR_FNAME: case EXPR_DOT:
+ p->lstate = EXPR_ARG; break;
+ default:
+ p->lstate = EXPR_BEG; break;
+ }
+ if ((c = nextc(p)) == '=') {
+ return tGEQ;
+ }
+ if (c == '>') {
+ if ((c = nextc(p)) == '=') {
+ yylval.id = intern(">>");
+ p->lstate = EXPR_BEG;
+ return tOP_ASGN;
+ }
+ pushback(p, c);
+ return tRSHFT;
+ }
+ pushback(p, c);
+ return '>';
+
+ case '"':
+ p->sterm = '"';
+ return tSTRING_BEG;
+
+ case '\'':
+ return parse_qstring(p, c);
+
+ case '?':
+ if (IS_END()) {
+ p->lstate = EXPR_VALUE;
+ return '?';
+ }
+ c = nextc(p);
+ if (c == -1) {
+ yyerror(p, "incomplete character syntax");
+ return 0;
+ }
+ if (isspace(c)) {
+ if (!IS_ARG()) {
+ int c2 = 0;
+ switch (c) {
+ case ' ':
+ c2 = 's';
+ break;
+ case '\n':
+ c2 = 'n';
+ break;
+ case '\t':
+ c2 = 't';
+ break;
+ case '\v':
+ c2 = 'v';
+ break;
+ case '\r':
+ c2 = 'r';
+ break;
+ case '\f':
+ c2 = 'f';
+ break;
+ }
+ if (c2) {
+ char buf[256];
+ snprintf(buf, 256, "invalid character syntax; use ?\\%c", c2);
+ yyerror(p, buf);
+ }
+ }
+ ternary:
+ pushback(p, c);
+ p->lstate = EXPR_VALUE;
+ return '?';
+ }
+ newtok(p);
+ // need support UTF-8 if configured
+ if ((isalnum(c) || c == '_')) {
+ int c2 = nextc(p);
+ pushback(p, c2);
+ if ((isalnum(c2) || c2 == '_')) {
+ goto ternary;
+ }
+ }
+ if (c == '\\') {
+ c = nextc(p);
+ if (c == 'u') {
+#if 0
+ tokadd_utf8(p);
+#endif
+ }
+ else {
+ pushback(p, c);
+ c = read_escape(p);
+ tokadd(p, c);
+ }
+ }
+ else {
+ tokadd(p, c);
+ }
+ tokfix(p);
+ yylval.node = new_str(p, tok(p), toklen(p));
+ p->lstate = EXPR_END;
+ return tCHAR;
+
+ case '&':
+ if ((c = nextc(p)) == '&') {
+ p->lstate = EXPR_BEG;
+ if ((c = nextc(p)) == '=') {
+ yylval.id = intern("&&");
+ p->lstate = EXPR_BEG;
+ return tOP_ASGN;
+ }
+ pushback(p, c);
+ return tANDOP;
+ }
+ else if (c == '=') {
+ yylval.id = intern("&");
+ p->lstate = EXPR_BEG;
+ return tOP_ASGN;
+ }
+ pushback(p, c);
+ if (IS_SPCARG(c)) {
+ yywarning(p, "`&' interpreted as argument prefix");
+ c = tAMPER;
+ }
+ else if (IS_BEG()) {
+ c = tAMPER;
+ }
+ else {
+ c = '&';
+ }
+ switch (p->lstate) {
+ case EXPR_FNAME: case EXPR_DOT:
+ p->lstate = EXPR_ARG; break;
+ default:
+ p->lstate = EXPR_BEG;
+ }
+ return c;
+
+ case '|':
+ if ((c = nextc(p)) == '|') {
+ p->lstate = EXPR_BEG;
+ if ((c = nextc(p)) == '=') {
+ yylval.id = intern("||");
+ p->lstate = EXPR_BEG;
+ return tOP_ASGN;
+ }
+ pushback(p, c);
+ return tOROP;
+ }
+ if (c == '=') {
+ yylval.id = intern("|");
+ p->lstate = EXPR_BEG;
+ return tOP_ASGN;
+ }
+ if (p->lstate == EXPR_FNAME || p->lstate == EXPR_DOT) {
+ p->lstate = EXPR_ARG;
+ }
+ else {
+ p->lstate = EXPR_BEG;
+ }
+ pushback(p, c);
+ return '|';
+
+ case '+':
+ c = nextc(p);
+ if (p->lstate == EXPR_FNAME || p->lstate == EXPR_DOT) {
+ p->lstate = EXPR_ARG;
+ if (c == '@') {
+ return tUPLUS;
+ }
+ pushback(p, c);
+ return '+';
+ }
+ if (c == '=') {
+ yylval.id = intern("+");
+ p->lstate = EXPR_BEG;
+ return tOP_ASGN;
+ }
+ if (IS_BEG() || (IS_SPCARG(c) && arg_ambiguous(p))) {
+ p->lstate = EXPR_BEG;
+ pushback(p, c);
+ if (c != -1 && ISDIGIT(c)) {
+ c = '+';
+ goto start_num;
+ }
+ return tUPLUS;
+ }
+ p->lstate = EXPR_BEG;
+ pushback(p, c);
+ return '+';
+
+ case '-':
+ c = nextc(p);
+ if (p->lstate == EXPR_FNAME || p->lstate == EXPR_DOT) {
+ p->lstate = EXPR_ARG;
+ if (c == '@') {
+ return tUMINUS;
+ }
+ pushback(p, c);
+ return '-';
+ }
+ if (c == '=') {
+ yylval.id = intern("-");
+ p->lstate = EXPR_BEG;
+ return tOP_ASGN;
+ }
+ if (c == '>') {
+ p->lstate = EXPR_ARG;
+ return tLAMBDA;
+ }
+ if (IS_BEG() || (IS_SPCARG(c) && arg_ambiguous(p))) {
+ p->lstate = EXPR_BEG;
+ pushback(p, c);
+ if (c != -1 && ISDIGIT(c)) {
+ return tUMINUS_NUM;
+ }
+ return tUMINUS;
+ }
+ p->lstate = EXPR_BEG;
+ pushback(p, c);
+ return '-';
+
+ case '.':
+ p->lstate = EXPR_BEG;
+ if ((c = nextc(p)) == '.') {
+ if ((c = nextc(p)) == '.') {
+ return tDOT3;
+ }
+ pushback(p, c);
+ return tDOT2;
+ }
+ pushback(p, c);
+ if (c != -1 && ISDIGIT(c)) {
+ yyerror(p, "no .<digit> floating literal anymore; put 0 before dot");
+ }
+ p->lstate = EXPR_DOT;
+ return '.';
+
+ start_num:
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ {
+ int is_float, seen_point, seen_e, nondigit;
+
+ is_float = seen_point = seen_e = nondigit = 0;
+ p->lstate = EXPR_END;
+ newtok(p);
+ if (c == '-' || c == '+') {
+ tokadd(p, c);
+ c = nextc(p);
+ }
+ if (c == '0') {
+#define no_digits() do {yyerror(p,"numeric literal without digits"); return 0;} while (0)
+ int start = toklen(p);
+ c = nextc(p);
+ if (c == 'x' || c == 'X') {
+ /* hexadecimal */
+ c = nextc(p);
+ if (c != -1 && ISXDIGIT(c)) {
+ do {
+ if (c == '_') {
+ if (nondigit) break;
+ nondigit = c;
+ continue;
+ }
+ if (!ISXDIGIT(c)) break;
+ nondigit = 0;
+ tokadd(p, c);
+ } while ((c = nextc(p)) != -1);
+ }
+ pushback(p, c);
+ tokfix(p);
+ if (toklen(p) == start) {
+ no_digits();
+ }
+ else if (nondigit) goto trailing_uc;
+ yylval.node = new_int(p, tok(p), 16);
+ return tINTEGER;
+ }
+ if (c == 'b' || c == 'B') {
+ /* binary */
+ c = nextc(p);
+ if (c == '0' || c == '1') {
+ do {
+ if (c == '_') {
+ if (nondigit) break;
+ nondigit = c;
+ continue;
+ }
+ if (c != '0' && c != '1') break;
+ nondigit = 0;
+ tokadd(p, c);
+ } while ((c = nextc(p)) != -1);
+ }
+ pushback(p, c);
+ tokfix(p);
+ if (toklen(p) == start) {
+ no_digits();
+ }
+ else if (nondigit) goto trailing_uc;
+ yylval.node = new_int(p, tok(p), 2);
+ return tINTEGER;
+ }
+ if (c == 'd' || c == 'D') {
+ /* decimal */
+ c = nextc(p);
+ if (c != -1 && ISDIGIT(c)) {
+ do {
+ if (c == '_') {
+ if (nondigit) break;
+ nondigit = c;
+ continue;
+ }
+ if (!ISDIGIT(c)) break;
+ nondigit = 0;
+ tokadd(p, c);
+ } while ((c = nextc(p)) != -1);
+ }
+ pushback(p, c);
+ tokfix(p);
+ if (toklen(p) == start) {
+ no_digits();
+ }
+ else if (nondigit) goto trailing_uc;
+ yylval.node = new_int(p, tok(p), 10);
+ return tINTEGER;
+ }
+ if (c == '_') {
+ /* 0_0 */
+ goto octal_number;
+ }
+ if (c == 'o' || c == 'O') {
+ /* prefixed octal */
+ c = nextc(p);
+ if (c == -1 || c == '_' || !ISDIGIT(c)) {
+ no_digits();
+ }
+ }
+ if (c >= '0' && c <= '7') {
+ /* octal */
+ octal_number:
+ do {
+ if (c == '_') {
+ if (nondigit) break;
+ nondigit = c;
+ continue;
+ }
+ if (c < '0' || c > '9') break;
+ if (c > '7') goto invalid_octal;
+ nondigit = 0;
+ tokadd(p, c);
+ } while ((c = nextc(p)) != -1);
+
+ if (toklen(p) > start) {
+ pushback(p, c);
+ tokfix(p);
+ if (nondigit) goto trailing_uc;
+ yylval.node = new_int(p, tok(p), 8);
+ return tINTEGER;
+ }
+ if (nondigit) {
+ pushback(p, c);
+ goto trailing_uc;
+ }
+ }
+ if (c > '7' && c <= '9') {
+ invalid_octal:
+ yyerror(p, "Invalid octal digit");
+ }
+ else if (c == '.' || c == 'e' || c == 'E') {
+ tokadd(p, '0');
+ }
+ else {
+ pushback(p, c);
+ yylval.node = new_int(p, "0", 10);
+ return tINTEGER;
+ }
+ }
+
+ for (;;) {
+ switch (c) {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ nondigit = 0;
+ tokadd(p, c);
+ break;
+
+ case '.':
+ if (nondigit) goto trailing_uc;
+ if (seen_point || seen_e) {
+ goto decode_num;
+ }
+ else {
+ int c0 = nextc(p);
+ if (c0 == -1 || !ISDIGIT(c0)) {
+ pushback(p, c0);
+ goto decode_num;
+ }
+ c = c0;
+ }
+ tokadd(p, '.');
+ tokadd(p, c);
+ is_float++;
+ seen_point++;
+ nondigit = 0;
+ break;
+
+ case 'e':
+ case 'E':
+ if (nondigit) {
+ pushback(p, c);
+ c = nondigit;
+ goto decode_num;
+ }
+ if (seen_e) {
+ goto decode_num;
+ }
+ tokadd(p, c);
+ seen_e++;
+ is_float++;
+ nondigit = c;
+ c = nextc(p);
+ if (c != '-' && c != '+') continue;
+ tokadd(p, c);
+ nondigit = c;
+ break;
+
+ case '_': /* `_' in number just ignored */
+ if (nondigit) goto decode_num;
+ nondigit = c;
+ break;
+
+ default:
+ goto decode_num;
+ }
+ c = nextc(p);
+ }
+
+ decode_num:
+ pushback(p, c);
+ if (nondigit) {
+ trailing_uc:
+ yyerror_i(p, "trailing `%c' in number", nondigit);
+ }
+ tokfix(p);
+ if (is_float) {
+ strtod(tok(p), 0);
+ if (errno == ERANGE) {
+ yywarning_s(p, "float %s out of range", tok(p));
+ errno = 0;
+ }
+ yylval.node = new_float(p, tok(p));
+ return tFLOAT;
+ }
+ yylval.node = new_int(p, tok(p), 10);
+ return tINTEGER;
+ }
+
+ case ')':
+ case ']':
+ p->paren_nest--;
+ case '}':
+ COND_LEXPOP();
+ CMDARG_LEXPOP();
+ if (c == ')')
+ p->lstate = EXPR_ENDFN;
+ else
+ p->lstate = EXPR_ENDARG;
+ return c;
+
+ case ':':
+ c = nextc(p);
+ if (c == ':') {
+ if (IS_BEG() || p->lstate == EXPR_CLASS || IS_SPCARG(-1)) {
+ p->lstate = EXPR_BEG;
+ return tCOLON3;
+ }
+ p->lstate = EXPR_DOT;
+ return tCOLON2;
+ }
+ if (IS_END() || ISSPACE(c)) {
+ pushback(p, c);
+ p->lstate = EXPR_BEG;
+ return ':';
+ }
+ switch (c) {
+ case '\'':
+#if 0
+ p->lex_strterm = new_strterm(p, str_ssym, c, 0);
+#endif
+ break;
+ case '"':
+#if 0
+ p->lex_strterm = new_strterm(p, str_dsym, c, 0);
+#endif
+ break;
+ default:
+ pushback(p, c);
+ break;
+ }
+ p->lstate = EXPR_FNAME;
+ return tSYMBEG;
+
+ case '/':
+ if (IS_BEG()) {
+#if 0
+ p->lex_strterm = new_strterm(p, str_regexp, '/', 0);
+#endif
+ return tREGEXP_BEG;
+ }
+ if ((c = nextc(p)) == '=') {
+ yylval.id = intern("/");
+ p->lstate = EXPR_BEG;
+ return tOP_ASGN;
+ }
+ pushback(p, c);
+ if (IS_SPCARG(c)) {
+ arg_ambiguous(p);
+#if 0
+ p->lex_strterm = new_strterm(p, str_regexp, '/', 0);
+#endif
+ return tREGEXP_BEG;
+ }
+ switch (p->lstate) {
+ case EXPR_FNAME: case EXPR_DOT:
+ p->lstate = EXPR_ARG; break;
+ default:
+ p->lstate = EXPR_BEG; break;
+ }
+ return '/';
+
+ case '^':
+ if ((c = nextc(p)) == '=') {
+ yylval.id = intern("^");
+ p->lstate = EXPR_BEG;
+ return tOP_ASGN;
+ }
+ switch (p->lstate) {
+ case EXPR_FNAME: case EXPR_DOT:
+ p->lstate = EXPR_ARG; break;
+ default:
+ p->lstate = EXPR_BEG; break;
+ }
+ pushback(p, c);
+ return '^';
+
+ case ';':
+ p->lstate = EXPR_BEG;
+ return ';';
+
+ case ',':
+ p->lstate = EXPR_BEG;
+ return ',';
+
+ case '~':
+ if (p->lstate == EXPR_FNAME || p->lstate == EXPR_DOT) {
+ if ((c = nextc(p)) != '@') {
+ pushback(p, c);
+ }
+ p->lstate = EXPR_ARG;
+ }
+ else {
+ p->lstate = EXPR_BEG;
+ }
+ return '~';
+
+ case '(':
+ if (IS_BEG()) {
+ c = tLPAREN;
+ }
+ else if (IS_SPCARG(-1)) {
+ c = tLPAREN_ARG;
+ }
+ p->paren_nest++;
+ COND_PUSH(0);
+ CMDARG_PUSH(0);
+ p->lstate = EXPR_BEG;
+ return c;
+
+ case '[':
+ p->paren_nest++;
+ if (p->lstate == EXPR_FNAME || p->lstate == EXPR_DOT) {
+ p->lstate = EXPR_ARG;
+ if ((c = nextc(p)) == ']') {
+ if ((c = nextc(p)) == '=') {
+ return tASET;
+ }
+ pushback(p, c);
+ return tAREF;
+ }
+ pushback(p, c);
+ return '[';
+ }
+ else if (IS_BEG()) {
+ c = tLBRACK;
+ }
+ else if (IS_ARG() && space_seen) {
+ c = tLBRACK;
+ }
+ p->lstate = EXPR_BEG;
+ COND_PUSH(0);
+ CMDARG_PUSH(0);
+ return c;
+
+ case '{':
+ if (p->lpar_beg && p->lpar_beg == p->paren_nest) {
+ p->lstate = EXPR_BEG;
+ p->lpar_beg = 0;
+ p->paren_nest--;
+ COND_PUSH(0);
+ CMDARG_PUSH(0);
+ return tLAMBEG;
+ }
+ if (IS_ARG() || p->lstate == EXPR_END || p->lstate == EXPR_ENDFN)
+ c = '{'; /* block (primary) */
+ else if (p->lstate == EXPR_ENDARG)
+ c = tLBRACE_ARG; /* block (expr) */
+ else
+ c = tLBRACE; /* hash */
+ COND_PUSH(0);
+ CMDARG_PUSH(0);
+ p->lstate = EXPR_BEG;
+ return c;
+
+ case '\\':
+ c = nextc(p);
+ if (c == '\n') {
+ space_seen = 1;
+ goto retry; /* skip \\n */
+ }
+ pushback(p, c);
+ return '\\';
+
+ case '%':
+ if (IS_BEG()) {
+ int term;
+#if 0
+ int paren;
+#endif
+
+ c = nextc(p);
+ quotation:
+ if (c == -1 || !ISALNUM(c)) {
+ term = c;
+ c = 'Q';
+ }
+ else {
+ term = nextc(p);
+ if (isalnum(term)) {
+ yyerror(p, "unknown type of %string");
+ return 0;
+ }
+ }
+ if (c == -1 || term == -1) {
+ yyerror(p, "unterminated quoted string meets end of file");
+ return 0;
+ }
+#if 0
+ paren = term;
+#endif
+ if (term == '(') term = ')';
+ else if (term == '[') term = ']';
+ else if (term == '{') term = '}';
+ else if (term == '<') term = '>';
+#if 0
+ else paren = 0;
+#endif
+
+ switch (c) {
+ case 'Q':
+#if 0
+ p->lex_strterm = new_strterm(p, str_dquote, term, paren);
+#endif
+ return tSTRING_BEG;
+
+ case 'q':
+#if 0
+ p->lex_strterm = new_strterm(p, str_squote, term, paren);
+#endif
+ return tSTRING_BEG;
+
+ case 'W':
+#if 0
+ p->lex_strterm = new_strterm(p, str_dword, term, paren);
+#endif
+ do {c = nextc(p);} while (isspace(c));
+ pushback(p, c);
+ return tWORDS_BEG;
+
+ case 'w':
+#if 0
+ p->lex_strterm = new_strterm(p, str_sword, term, paren);
+#endif
+ do {c = nextc(p);} while (isspace(c));
+ pushback(p, c);
+ return tQWORDS_BEG;
+
+ case 'r':
+#if 0
+ p->lex_strterm = new_strterm(p, str_regexp, term, paren);
+#endif
+ return tREGEXP_BEG;
+
+ case 's':
+#if 0
+ p->lex_strterm = new_strterm(p, str_ssym, term, paren);
+#endif
+ p->lstate = EXPR_FNAME;
+ return tSYMBEG;
+
+ default:
+ yyerror(p, "unknown type of %string");
+ return 0;
+ }
+ }
+ if ((c = nextc(p)) == '=') {
+ yylval.id = intern("%");
+ p->lstate = EXPR_BEG;
+ return tOP_ASGN;
+ }
+ if (IS_SPCARG(c)) {
+ goto quotation;
+ }
+ switch (p->lstate) {
+ case EXPR_FNAME: case EXPR_DOT:
+ p->lstate = EXPR_ARG; break;
+ default:
+ p->lstate = EXPR_BEG; break;
+ }
+ pushback(p, c);
+ return '%';
+
+ case '$':
+ p->lstate = EXPR_END;
+ newtok(p);
+ c = nextc(p);
+ switch (c) {
+ case '_': /* $_: last read line string */
+ c = nextc(p);
+ pushback(p, c);
+ c = '_';
+ /* fall through */
+ case '~': /* $~: match-data */
+ case '*': /* $*: argv */
+ case '$': /* $$: pid */
+ case '?': /* $?: last status */
+ case '!': /* $!: error string */
+ case '@': /* $@: error position */
+ case '/': /* $/: input record separator */
+ case '\\': /* $\: output record separator */
+ case ';': /* $;: field separator */
+ case ',': /* $,: output field separator */
+ case '.': /* $.: last read line number */
+ case '=': /* $=: ignorecase */
+ case ':': /* $:: load path */
+ case '<': /* $<: reading filename */
+ case '>': /* $>: default output handle */
+ case '\"': /* $": already loaded files */
+ tokadd(p, '$');
+ tokadd(p, c);
+ tokfix(p);
+ yylval.id = intern(tok(p));
+ return tGVAR;
+
+ case '-':
+ tokadd(p, '$');
+ tokadd(p, c);
+ c = nextc(p);
+ pushback(p, c);
+ gvar:
+ tokfix(p);
+ yylval.id = intern(tok(p));
+ return tGVAR;
+
+ case '&': /* $&: last match */
+ case '`': /* $`: string before last match */
+ case '\'': /* $': string after last match */
+ case '+': /* $+: string matches last paren. */
+ if (last_state == EXPR_FNAME) {
+ tokadd(p, '$');
+ tokadd(p, c);
+ goto gvar;
+ }
+ yylval.node = new_back_ref(p, c);
+ return tBACK_REF;
+
+ case '1': case '2': case '3':
+ case '4': case '5': case '6':
+ case '7': case '8': case '9':
+ tokadd(p, '$');
+ do {
+ tokadd(p, c);
+ c = nextc(p);
+ } while (c != -1 && isdigit(c));
+ pushback(p, c);
+ if (last_state == EXPR_FNAME) goto gvar;
+ tokfix(p);
+ yylval.node = new_nth_ref(p, atoi(tok(p)+1));
+ return tNTH_REF;
+
+ default:
+ if (!identchar(c)) {
+ pushback(p, c);
+ return '$';
+ }
+ case '0':
+ tokadd(p, '$');
+ }
+ break;
+
+ case '@':
+ c = nextc(p);
+ newtok(p);
+ tokadd(p, '@');
+ if (c == '@') {
+ tokadd(p, '@');
+ c = nextc(p);
+ }
+ if (c != -1 && isdigit(c)) {
+ if (p->bidx == 1) {
+ yyerror_i(p, "`@%c' is not allowed as an instance variable name", c);
+ }
+ else {
+ yyerror_i(p, "`@@%c' is not allowed as a class variable name", c);
+ }
+ return 0;
+ }
+ if (!identchar(c)) {
+ pushback(p, c);
+ return '@';
+ }
+ break;
+
+ case '_':
+ newtok(p);
+ break;
+
+ default:
+ if (!identchar(c)) {
+ yyerror_i(p, "Invalid char `\\x%02X' in expression", c);
+ goto retry;
+ }
+
+ newtok(p);
+ break;
+ }
+
+ do {
+ tokadd(p, c);
+ c = nextc(p);
+ if (c < 0) break;
+ } while (identchar(c));
+
+ switch (tok(p)[0]) {
+ case '@': case '$':
+ pushback(p, c);
+ break;
+ default:
+ if ((c == '!' || c == '?') && !peek(p, '=')) {
+ tokadd(p, c);
+ }
+ else {
+ pushback(p, c);
+ }
+ }
+ tokfix(p);
+ {
+ int result = 0;
+
+ last_state = p->lstate;
+ switch (tok(p)[0]) {
+ case '$':
+ p->lstate = EXPR_END;
+ result = tGVAR;
+ break;
+ case '@':
+ p->lstate = EXPR_END;
+ if (tok(p)[1] == '@')
+ result = tCVAR;
+ else
+ result = tIVAR;
+ break;
+
+ default:
+ if (toklast(p) == '!' || toklast(p) == '?') {
+ result = tFID;
+ }
+ else {
+ if (p->lstate == EXPR_FNAME) {
+ if ((c = nextc(p)) == '=' && !peek(p, '~') && !peek(p, '>') &&
+ (!peek(p, '=') || (peek_n(p, '>', 1)))) {
+ result = tIDENTIFIER;
+ tokadd(p, c);
+ tokfix(p);
+ }
+ else {
+ pushback(p, c);
+ }
+ }
+ if (result == 0 && isupper(tok(p)[0])) {
+ result = tCONSTANT;
+ }
+ else {
+ result = tIDENTIFIER;
+ }
+ }
+
+ if (IS_LABEL_POSSIBLE()) {
+ if (IS_LABEL_SUFFIX(0)) {
+ p->lstate = EXPR_BEG;
+ nextc(p);
+ tokfix(p);
+ yylval.id = intern(tok(p));
+ return tLABEL;
+ }
+ }
+ if (p->lstate != EXPR_DOT) {
+ const struct kwtable *kw;
+
+ /* See if it is a reserved word. */
+ kw = mrb_reserved_word(tok(p), toklen(p));
+ if (kw) {
+ enum mrb_lex_state_enum state = p->lstate;
+ p->lstate = kw->state;
+ if (state == EXPR_FNAME) {
+ yylval.id = intern(kw->name);
+ return kw->id[0];
+ }
+ if (kw->id[0] == keyword_do) {
+ if (p->lpar_beg && p->lpar_beg == p->paren_nest) {
+ p->lpar_beg = 0;
+ p->paren_nest--;
+ return keyword_do_LAMBDA;
+ }
+ if (COND_P()) return keyword_do_cond;
+ if (CMDARG_P() && state != EXPR_CMDARG)
+ return keyword_do_block;
+ if (state == EXPR_ENDARG || state == EXPR_BEG)
+ return keyword_do_block;
+ return keyword_do;
+ }
+ if (state == EXPR_BEG || state == EXPR_VALUE)
+ return kw->id[0];
+ else {
+ if (kw->id[0] != kw->id[1])
+ p->lstate = EXPR_BEG;
+ return kw->id[1];
+ }
+ }
+ }
+
+ if (IS_BEG() ||
+ p->lstate == EXPR_DOT ||
+ IS_ARG()) {
+ if (cmd_state) {
+ p->lstate = EXPR_CMDARG;
+ }
+ else {
+ p->lstate = EXPR_ARG;
+ }
+ }
+ else if (p->lstate == EXPR_FNAME) {
+ p->lstate = EXPR_ENDFN;
+ }
+ else {
+ p->lstate = EXPR_END;
+ }
+ }
+ {
+ mrb_sym ident = intern(tok(p));
+
+ yylval.id = ident;
+#if 0
+ if (last_state != EXPR_DOT && islower(tok(p)[0]) && lvar_defined(ident)) {
+ p->lstate = EXPR_END;
+ }
+#endif
+ }
+ return result;
+ }
+}
+
+static int
+yylex(void *lval, parser_state *p)
+{
+ int t;
+
+ p->ylval = lval;
+ t = parser_yylex(p);
+
+ return t;
+}
+
+static void
+start_parser(parser_state *p)
+{
+ node *tree;
+
+ if (setjmp(p->jmp) != 0) {
+ yyerror(p, "memory allocation error");
+ p->nerr++;
+ p->tree = p->begin_tree = 0;
+ return;
+ }
+ yyparse(p);
+ tree = p->tree;
+ if (!tree) {
+ if (p->begin_tree) {
+ tree = p->begin_tree;
+ }
+ else {
+ tree = new_nil(p);
+ }
+ }
+ else if (p->begin_tree) {
+ tree = new_begin(p, p->begin_tree);
+ append(tree, p->tree);
+ }
+}
+
+static parser_state*
+parser_new(mrb_state *mrb)
+{
+ mrb_pool *pool;
+ parser_state *p;
+
+ pool = mrb_pool_open(mrb);
+ if (!pool) return 0;
+ p = mrb_pool_alloc(pool, sizeof(parser_state));
+ if (!p) return 0;
+
+ memset(p, 0, sizeof(parser_state));
+ p->mrb = mrb;
+ p->pool = pool;
+ p->in_def = p->in_single = 0;
+
+ p->cmd_start = TRUE;
+ p->in_def = p->in_single = FALSE;
+
+ p->lineno = 1;
+#if defined(PARSER_TEST) || defined(PARSER_DEBUG)
+ yydebug = 1;
+#endif
+
+ return p;
+}
+
+parser_state*
+mrb_parse_file(mrb_state *mrb, FILE *f)
+{
+ parser_state *p;
+
+ p = parser_new(mrb);
+ if (!p) return 0;
+ p->s = p->send = NULL;
+ p->f = f;
+
+ start_parser(p);
+ return p;
+}
+
+parser_state*
+mrb_parse_nstring(mrb_state *mrb, char *s, size_t len)
+{
+ parser_state *p;
+
+ p = parser_new(mrb);
+ if (!p) return 0;
+ p->s = s;
+ p->send = s + len;
+ p->f = NULL;
+
+ start_parser(p);
+ return p;
+}
+
+parser_state*
+mrb_parse_string(mrb_state *mrb, char *s)
+{
+ return mrb_parse_nstring(mrb, s, strlen(s));
+}
+
+#define PARSER_DUMP
+
+void parser_dump(mrb_state *mrb, node *tree, int offset);
+int mrb_generate_code(mrb_state*, mrb_ast_node*);
+
+int
+mrb_compile_file(mrb_state * mrb, FILE *f)
+{
+ parser_state *p;
+ int n;
+
+ p = mrb_parse_file(mrb, f);
+ if (!p) return -1;
+ if (!p->tree) return -1;
+ if (p->nerr) return -1;
+#ifdef PARSER_DUMP
+ parser_dump(mrb, p->tree, 0);
+#endif
+ n = mrb_generate_code(mrb, p->tree);
+ mrb_pool_close(p->pool);
+
+ return n;
+}
+
+const char*
+mrb_parser_filename(parser_state *p, const char *s)
+{
+ if (s) {
+ p->filename = strdup(s);
+ }
+ return p->filename;
+}
+
+int
+mrb_parser_lineno(struct mrb_parser_state *p, int n)
+{
+ if (n <= 0) {
+ return p->lineno;
+ }
+ return p->lineno = n;
+}
+
+int
+mrb_compile_nstring(mrb_state *mrb, char *s, size_t len)
+{
+ parser_state *p;
+ int n;
+
+ p = mrb_parse_nstring(mrb, s, len);
+ if (!p) return -1;
+ if (!p->tree) return -1;
+ if (p->nerr) return -1;
+#ifdef PARSER_DUMP
+ parser_dump(mrb, p->tree, 0);
+#endif
+ n = mrb_generate_code(mrb, p->tree);
+ mrb_pool_close(p->pool);
+
+ return n;
+}
+
+int
+mrb_compile_string(mrb_state *mrb, char *s)
+{
+ return mrb_compile_nstring(mrb, s, strlen(s));
+}
+
+static void
+dump_prefix(int offset)
+{
+ while (offset--) {
+ putc(' ', stdout);
+ putc(' ', stdout);
+ }
+}
+
+static void
+dump_recur(mrb_state *mrb, node *tree, int offset)
+{
+ while (tree) {
+ parser_dump(mrb, tree->car, offset);
+ tree = tree->cdr;
+ }
+}
+
+void
+parser_dump(mrb_state *mrb, node *tree, int offset)
+{
+ int n;
+
+ if (!tree) return;
+ again:
+ dump_prefix(offset);
+ n = (int)tree->car;
+ tree = tree->cdr;
+ switch (n) {
+ case NODE_BEGIN:
+ printf("NODE_BEGIN:\n");
+ dump_recur(mrb, tree, offset+1);
+ break;
+
+ case NODE_RESCUE:
+ printf("NODE_RESCUE:\n");
+ if (tree->car) {
+ dump_prefix(offset+1);
+ printf("body:\n");
+ parser_dump(mrb, tree->car, offset+2);
+ }
+ tree = tree->cdr;
+ if (tree->car) {
+ node *n2 = tree->car;
+
+ dump_prefix(offset+1);
+ printf("rescue:\n");
+ while (n2) {
+ node *n3 = n2->car;
+ if (n3->car) {
+ dump_prefix(offset+2);
+ printf("handle classes:\n");
+ dump_recur(mrb, n3->car, offset+3);
+ }
+ if (n3->cdr->car) {
+ dump_prefix(offset+2);
+ printf("exc_var:\n");
+ parser_dump(mrb, n3->cdr->car, offset+3);
+ }
+ if (n3->cdr->cdr->car) {
+ dump_prefix(offset+2);
+ printf("rescue body:\n");
+ parser_dump(mrb, n3->cdr->cdr->car, offset+3);
+ }
+ n2 = n2->cdr;
+ }
+ }
+ tree = tree->cdr;
+ if (tree->car) {
+ dump_prefix(offset+1);
+ printf("else:\n");
+ parser_dump(mrb, tree->car, offset+2);
+ }
+ break;
+
+ case NODE_ENSURE:
+ printf("NODE_ENSURE:\n");
+ dump_prefix(offset+1);
+ printf("body:\n");
+ parser_dump(mrb, tree->car, offset+2);
+ dump_prefix(offset+1);
+ printf("ensure:\n");
+ parser_dump(mrb, tree->cdr, offset+2);
+ break;
+
+ case NODE_LAMBDA:
+ printf("NODE_BLOCK:\n");
+ goto block;
+
+ case NODE_BLOCK:
+ block:
+ printf("NODE_BLOCK:\n");
+ tree = tree->cdr;
+ if (tree->car) {
+ node *n = tree->car;
+
+ if (n->car) {
+ dump_prefix(offset+1);
+ printf("mandatory args:\n");
+ dump_recur(mrb, n->car, offset+2);
+ }
+ n = n->cdr;
+ if (n->car) {
+ dump_prefix(offset+1);
+ printf("optional args:\n");
+ {
+ node *n2 = n->car;
+
+ while (n2) {
+ dump_prefix(offset+2);
+ printf("%s=", mrb_sym2name(mrb, (mrb_sym)n2->car->car));
+ parser_dump(mrb, n2->car->cdr, 0);
+ n2 = n2->cdr;
+ }
+ }
+ }
+ n = n->cdr;
+ if (n->car) {
+ dump_prefix(offset+1);
+ printf("rest=*%s\n", mrb_sym2name(mrb, (mrb_sym)n->car));
+ }
+ n = n->cdr;
+ if (n->car) {
+ dump_prefix(offset+1);
+ printf("post mandatory args:\n");
+ dump_recur(mrb, n->car, offset+2);
+ }
+ n = n->cdr;
+ if (n) {
+ dump_prefix(offset+1);
+ printf("blk=&%s\n", mrb_sym2name(mrb, (mrb_sym)n));
+ }
+ }
+ dump_prefix(offset+1);
+ printf("body:\n");
+ parser_dump(mrb, tree->cdr->car, offset+2);
+ break;
+
+ case NODE_IF:
+ printf("NODE_IF:\n");
+ dump_prefix(offset+1);
+ printf("cond:\n");
+ parser_dump(mrb, tree->car, offset+2);
+ dump_prefix(offset+1);
+ printf("then:\n");
+ parser_dump(mrb, tree->cdr->car, offset+2);
+ if (tree->cdr->cdr->car) {
+ dump_prefix(offset+1);
+ printf("else:\n");
+ parser_dump(mrb, tree->cdr->cdr->car, offset+2);
+ }
+ break;
+
+ case NODE_AND:
+ printf("NODE_AND:\n");
+ parser_dump(mrb, tree->car, offset+1);
+ parser_dump(mrb, tree->cdr, offset+1);
+ break;
+
+ case NODE_OR:
+ printf("NODE_OR:\n");
+ parser_dump(mrb, tree->car, offset+1);
+ parser_dump(mrb, tree->cdr, offset+1);
+ break;
+
+ case NODE_CASE:
+ printf("NODE_CASE:\n");
+ if (tree->car) {
+ parser_dump(mrb, tree->car, offset+1);
+ }
+ tree = tree->cdr;
+ while (tree) {
+ dump_prefix(offset+1);
+ printf("case:\n");
+ dump_recur(mrb, tree->car->car, offset+2);
+ dump_prefix(offset+1);
+ printf("body:\n");
+ parser_dump(mrb, tree->car->cdr, offset+2);
+ tree = tree->cdr;
+ }
+ break;
+
+ case NODE_WHILE:
+ printf("NODE_WHILE:\n");
+ dump_prefix(offset+1);
+ printf("cond:\n");
+ parser_dump(mrb, tree->car, offset+2);
+ dump_prefix(offset+1);
+ printf("body:\n");
+ parser_dump(mrb, tree->cdr, offset+2);
+ break;
+
+ case NODE_UNTIL:
+ printf("NODE_UNTIL:\n");
+ dump_prefix(offset+1);
+ printf("cond:\n");
+ parser_dump(mrb, tree->car, offset+2);
+ dump_prefix(offset+1);
+ printf("body:\n");
+ parser_dump(mrb, tree->cdr, offset+2);
+ break;
+
+ case NODE_FOR:
+ printf("NODE_FOR:\n");
+ dump_prefix(offset+1);
+ printf("var:\n");
+ {
+ node *n2 = tree->car;
+
+ if (n2->car) {
+ dump_prefix(offset+2);
+ printf("pre:\n");
+ dump_recur(mrb, n2->car, offset+3);
+ }
+ n2 = n2->cdr;
+ if (n2) {
+ if (n2->car) {
+ dump_prefix(offset+2);
+ printf("rest:\n");
+ parser_dump(mrb, n2->car, offset+3);
+ }
+ n2 = n2->cdr;
+ if (n2) {
+ if (n2->car) {
+ dump_prefix(offset+2);
+ printf("post:\n");
+ dump_recur(mrb, n2->car, offset+3);
+ }
+ }
+ }
+ }
+ tree = tree->cdr;
+ dump_prefix(offset+1);
+ printf("in:\n");
+ parser_dump(mrb, tree->car, offset+2);
+ tree = tree->cdr;
+ dump_prefix(offset+1);
+ printf("do:\n");
+ parser_dump(mrb, tree->car, offset+2);
+ break;
+
+ case NODE_SCOPE:
+ printf("NODE_SCOPE:\n");
+ dump_prefix(offset+1);
+ printf("local variables:\n");
+ {
+ node *n2 = tree->car;
+
+ while (n2) {
+ dump_prefix(offset+2);
+ printf("%s\n", mrb_sym2name(mrb, (mrb_sym)n2->car));
+ n2 = n2->cdr;
+ }
+ }
+ tree = tree->cdr;
+ offset++;
+ goto again;
+
+ case NODE_FCALL:
+ case NODE_CALL:
+ printf("NODE_CALL:\n");
+ parser_dump(mrb, tree->car, offset+1);
+ dump_prefix(offset+1);
+ printf("method='%s' (%d)\n",
+ mrb_sym2name(mrb, (mrb_sym)tree->cdr->car),
+ (int)tree->cdr->car);
+ tree = tree->cdr->cdr->car;
+ if (tree) {
+ dump_prefix(offset+1);
+ printf("args:\n");
+ dump_recur(mrb, tree->car, offset+2);
+ if (tree->cdr) {
+ dump_prefix(offset+1);
+ printf("block:\n");
+ parser_dump(mrb, tree->cdr, offset+2);
+ }
+ }
+ break;
+
+ case NODE_DOT2:
+ printf("NODE_DOT2:\n");
+ parser_dump(mrb, tree->car, offset+1);
+ parser_dump(mrb, tree->cdr, offset+1);
+ break;
+
+ case NODE_DOT3:
+ printf("NODE_DOT3:\n");
+ parser_dump(mrb, tree->car, offset+1);
+ parser_dump(mrb, tree->cdr, offset+1);
+ break;
+
+ case NODE_COLON2:
+ printf("NODE_COLON2:\n");
+ parser_dump(mrb, tree->car, offset+1);
+ dump_prefix(offset+1);
+ printf("::%s\n", mrb_sym2name(mrb, (mrb_sym)tree->cdr));
+ break;
+
+ case NODE_COLON3:
+ printf("NODE_COLON3:\n");
+ dump_prefix(offset+1);
+ printf("::%s\n", mrb_sym2name(mrb, (mrb_sym)tree));
+ break;
+
+ case NODE_ARRAY:
+ printf("NODE_ARRAY:\n");
+ dump_recur(mrb, tree, offset+1);
+ break;
+
+ case NODE_HASH:
+ printf("NODE_HASH:\n");
+ while (tree) {
+ dump_prefix(offset+1);
+ printf("key:\n");
+ parser_dump(mrb, tree->car->car, offset+2);
+ dump_prefix(offset+1);
+ printf("value:\n");
+ parser_dump(mrb, tree->car->cdr, offset+2);
+ tree = tree->cdr;
+ }
+ break;
+
+ case NODE_SPLAT:
+ printf("NODE_SPLAT:\n");
+ parser_dump(mrb, tree, offset+1);
+ break;
+
+ case NODE_ASGN:
+ printf("NODE_ASGN:\n");
+ dump_prefix(offset+1);
+ printf("lhs:\n");
+ parser_dump(mrb, tree->car, offset+2);
+ dump_prefix(offset+1);
+ printf("rhs:\n");
+ parser_dump(mrb, tree->cdr, offset+2);
+ break;
+
+ case NODE_MASGN:
+ printf("NODE_MASGN:\n");
+ dump_prefix(offset+1);
+ printf("mlhs:\n");
+ {
+ node *n2 = tree->car;
+
+ if (n2->car) {
+ dump_prefix(offset+2);
+ printf("pre:\n");
+ dump_recur(mrb, n2->car, offset+3);
+ }
+ n2 = n2->cdr;
+ if (n2) {
+ if (n2->car) {
+ dump_prefix(offset+2);
+ printf("rest:\n");
+ parser_dump(mrb, n2->car, offset+3);
+ }
+ n2 = n2->cdr;
+ if (n2) {
+ if (n2->car) {
+ dump_prefix(offset+2);
+ printf("post:\n");
+ dump_recur(mrb, n2->car, offset+3);
+ }
+ }
+ }
+ }
+ dump_prefix(offset+1);
+ printf("rhs:\n");
+ parser_dump(mrb, tree->cdr, offset+2);
+ break;
+
+ case NODE_OP_ASGN:
+ printf("NODE_OP_ASGN:\n");
+ dump_prefix(offset+1);
+ printf("lhs:\n");
+ parser_dump(mrb, tree->car, offset+2);
+ tree = tree->cdr;
+ dump_prefix(offset+1);
+ printf("op='%s' (%d)\n", mrb_sym2name(mrb, (mrb_sym)tree->car), (int)tree->car);
+ tree = tree->cdr;
+ parser_dump(mrb, tree->car, offset+1);
+ break;
+
+ case NODE_SUPER:
+ printf("NODE_SUPER:\n");
+ if (tree) {
+ dump_prefix(offset+1);
+ printf("args:\n");
+ dump_recur(mrb, tree->car, offset+2);
+ if (tree->cdr) {
+ dump_prefix(offset+1);
+ printf("block:\n");
+ parser_dump(mrb, tree->cdr, offset+2);
+ }
+ }
+ break;
+
+ case NODE_ZSUPER:
+ printf("NODE_ZSUPER\n");
+ break;
+
+ case NODE_RETURN:
+ printf("NODE_RETURN:\n");
+ parser_dump(mrb, tree, offset+1);
+ break;
+
+ case NODE_YIELD:
+ printf("NODE_YIELD:\n");
+ dump_recur(mrb, tree, offset+1);
+ break;
+
+ case NODE_BREAK:
+ printf("NODE_BREAK:\n");
+ parser_dump(mrb, tree, offset+1);
+ break;
+
+ case NODE_NEXT:
+ printf("NODE_NEXT:\n");
+ parser_dump(mrb, tree, offset+1);
+ break;
+
+ case NODE_REDO:
+ printf("NODE_REDO\n");
+ break;
+
+ case NODE_RETRY:
+ printf("NODE_RETRY\n");
+ break;
+
+ case NODE_LVAR:
+ printf("NODE_LVAR %s\n", mrb_sym2name(mrb, (mrb_sym)tree));
+ break;
+
+ case NODE_GVAR:
+ printf("NODE_GVAR %s\n", mrb_sym2name(mrb, (mrb_sym)tree));
+ break;
+
+ case NODE_IVAR:
+ printf("NODE_IVAR %s\n", mrb_sym2name(mrb, (mrb_sym)tree));
+ break;
+
+ case NODE_CVAR:
+ printf("NODE_CVAR %s\n", mrb_sym2name(mrb, (mrb_sym)tree));
+ break;
+
+ case NODE_CONST:
+ printf("NODE_CONST %s\n", mrb_sym2name(mrb, (mrb_sym)tree));
+ break;
+
+ case NODE_BACK_REF:
+ printf("NODE_BACK_REF:\n");
+ parser_dump(mrb, tree, offset+1);
+ break;
+
+ case NODE_NTH_REF:
+ printf("NODE_NTH_REF:\n");
+ parser_dump(mrb, tree, offset+1);
+ break;
+
+ case NODE_ARG:
+ printf("NODE_ARG %s\n", mrb_sym2name(mrb, (mrb_sym)tree));
+ break;
+
+ case NODE_BLOCK_ARG:
+ printf("NODE_BLOCK_ARG:\n");
+ parser_dump(mrb, tree, offset+1);
+ break;
+
+ case NODE_INT:
+ printf("NODE_INT %s base %d\n", (char*)tree->car, (int)tree->cdr->car);
+ break;
+
+ case NODE_FLOAT:
+ printf("NODE_FLOAT %s\n", (char*)tree);
+ break;
+
+ case NODE_NEGATE:
+ printf("NODE_NEGATE\n");
+ parser_dump(mrb, tree, offset+1);
+ break;
+
+ case NODE_STR:
+ printf("NODE_STR \"%s\" len %d\n", (char*)tree->car, (int)tree->cdr);
+ break;
+
+ case NODE_DSTR:
+ printf("NODE_DSTR\n");
+ dump_recur(mrb, tree, offset+1);
+ break;
+
+ case NODE_SYM:
+ printf("NODE_SYM :%s\n", mrb_sym2name(mrb, (mrb_sym)tree));
+ break;
+
+ case NODE_SELF:
+ printf("NODE_SELF\n");
+ break;
+
+ case NODE_NIL:
+ printf("NODE_NIL\n");
+ break;
+
+ case NODE_TRUE:
+ printf("NODE_TRUE\n");
+ break;
+
+ case NODE_FALSE:
+ printf("NODE_FALSE\n");
+ break;
+
+ case NODE_ALIAS:
+ printf("NODE_ALIAS %s %s:\n",
+ mrb_sym2name(mrb, (mrb_sym)tree->car),
+ mrb_sym2name(mrb, (mrb_sym)tree->cdr));
+ break;
+
+ case NODE_UNDEF:
+ printf("NODE_UNDEF %s:\n",
+ mrb_sym2name(mrb, (mrb_sym)tree));
+ break;
+
+ case NODE_CLASS:
+ printf("NODE_CLASS:\n");
+ if (tree->car->car == (node*)0) {
+ dump_prefix(offset+1);
+ printf(":%s\n", mrb_sym2name(mrb, (mrb_sym)tree->car->cdr));
+ }
+ else if (tree->car->car == (node*)1) {
+ dump_prefix(offset+1);
+ printf("::%s\n", mrb_sym2name(mrb, (mrb_sym)tree->car->cdr));
+ }
+ else {
+ parser_dump(mrb, tree->car->car, offset+1);
+ dump_prefix(offset+1);
+ printf("::%s\n", mrb_sym2name(mrb, (mrb_sym)tree->car->cdr));
+ }
+ if (tree->cdr->car) {
+ dump_prefix(offset+1);
+ printf("super:\n");
+ parser_dump(mrb, tree->cdr->car, offset+2);
+ }
+ dump_prefix(offset+1);
+ printf("body:\n");
+ parser_dump(mrb, tree->cdr->cdr->car->cdr, offset+2);
+ break;
+
+ case NODE_MODULE:
+ printf("NODE_MODULE:\n");
+ if (tree->car->car == (node*)0) {
+ dump_prefix(offset+1);
+ printf(":%s\n", mrb_sym2name(mrb, (mrb_sym)tree->car->cdr));
+ }
+ else if (tree->car->car == (node*)1) {
+ dump_prefix(offset+1);
+ printf("::%s\n", mrb_sym2name(mrb, (mrb_sym)tree->car->cdr));
+ }
+ else {
+ parser_dump(mrb, tree->car->car, offset+1);
+ dump_prefix(offset+1);
+ printf("::%s\n", mrb_sym2name(mrb, (mrb_sym)tree->car->cdr));
+ }
+ dump_prefix(offset+1);
+ printf("body:\n");
+ parser_dump(mrb, tree->cdr->car->cdr, offset+2);
+ break;
+
+ case NODE_SCLASS:
+ printf("NODE_SCLASS:\n");
+ parser_dump(mrb, tree->car, offset+1);
+ dump_prefix(offset+1);
+ printf("body:\n");
+ parser_dump(mrb, tree->cdr->car->cdr, offset+2);
+ break;
+
+ case NODE_DEF:
+ printf("NODE_DEF:\n");
+ dump_prefix(offset+1);
+ printf("%s\n", mrb_sym2name(mrb, (mrb_sym)tree->car));
+ tree = tree->cdr;
+ dump_prefix(offset+1);
+ printf("local variables:\n");
+ {
+ node *n2 = tree->car;
+
+ while (n2) {
+ dump_prefix(offset+2);
+ if (n2->car)
+ printf("%s\n", mrb_sym2name(mrb, (mrb_sym)n2->car));
+ n2 = n2->cdr;
+ }
+ }
+ tree = tree->cdr;
+ if (tree->car) {
+ node *n = tree->car;
+
+ if (n->car) {
+ dump_prefix(offset+1);
+ printf("mandatory args:\n");
+ dump_recur(mrb, n->car, offset+2);
+ }
+ n = n->cdr;
+ if (n->car) {
+ dump_prefix(offset+1);
+ printf("optional args:\n");
+ {
+ node *n2 = n->car;
+
+ while (n2) {
+ dump_prefix(offset+2);
+ printf("%s=", mrb_sym2name(mrb, (mrb_sym)n2->car->car));
+ parser_dump(mrb, n2->car->cdr, 0);
+ n2 = n2->cdr;
+ }
+ }
+ }
+ n = n->cdr;
+ if (n->car) {
+ dump_prefix(offset+1);
+ printf("rest=*%s\n", mrb_sym2name(mrb, (mrb_sym)n->car));
+ }
+ n = n->cdr;
+ if (n->car) {
+ dump_prefix(offset+1);
+ printf("post mandatory args:\n");
+ dump_recur(mrb, n->car, offset+2);
+ }
+ n = n->cdr;
+ if (n) {
+ dump_prefix(offset+1);
+ printf("blk=&%s\n", mrb_sym2name(mrb, (mrb_sym)n));
+ }
+ }
+ parser_dump(mrb, tree->cdr->car, offset+1);
+ break;
+
+ case NODE_SDEF:
+ printf("NODE_SDEF:\n");
+ parser_dump(mrb, tree->car, offset+1);
+ tree = tree->cdr;
+ dump_prefix(offset+1);
+ printf(":%s\n", mrb_sym2name(mrb, (mrb_sym)tree->car));
+ tree = tree->cdr->cdr;
+ if (tree->car) {
+ node *n = tree->car;
+
+ if (n->car) {
+ dump_prefix(offset+1);
+ printf("mandatory args:\n");
+ dump_recur(mrb, n->car, offset+2);
+ }
+ n = n->cdr;
+ if (n->car) {
+ dump_prefix(offset+1);
+ printf("optional args:\n");
+ {
+ node *n2 = n->car;
+
+ while (n2) {
+ dump_prefix(offset+2);
+ printf("%s=", mrb_sym2name(mrb, (mrb_sym)n2->car->car));
+ parser_dump(mrb, n2->car->cdr, 0);
+ n2 = n2->cdr;
+ }
+ }
+ }
+ n = n->cdr;
+ if (n->car) {
+ dump_prefix(offset+1);
+ printf("rest=*%s\n", mrb_sym2name(mrb, (mrb_sym)n->car));
+ }
+ n = n->cdr;
+ if (n->car) {
+ dump_prefix(offset+1);
+ printf("post mandatory args:\n");
+ dump_recur(mrb, n->car, offset+2);
+ }
+ n = n->cdr;
+ if (n) {
+ dump_prefix(offset+1);
+ printf("blk=&%s\n", mrb_sym2name(mrb, (mrb_sym)n));
+ }
+ }
+ tree = tree->cdr;
+ parser_dump(mrb, tree->car, offset+1);
+ break;
+
+ case NODE_POSTEXE:
+ printf("NODE_POSTEXE:\n");
+ parser_dump(mrb, tree, offset+1);
+ break;
+
+ default:
+ printf("node type: %d (0x%x)\n", (int)n, (int)n);
+ break;
+ }
+ return;
+}
+
+#ifdef PARSER_TEST
+int
+main()
+{
+ mrb_state *mrb = mrb_open();
+ int n;
+
+ n = mrb_compile_string(mrb, "\
+def fib(n)\n\
+ if n<2\n\
+ n\n\
+ else\n\
+ fib(n-2)+fib(n-1)\n\
+ end\n\
+end\n\
+print(fib(20), \"\\n\")\n\
+");
+ printf("ret: %d\n", n);
+
+ return 0;
+}
+#endif
diff --git a/src/pool.c b/src/pool.c
new file mode 100644
index 000000000..62e1a4d17
--- /dev/null
+++ b/src/pool.c
@@ -0,0 +1,152 @@
+#include "pool.h"
+#include <string.h>
+
+#undef TEST_POOL
+#ifdef TEST_POOL
+#include <stdio.h>
+
+#define mrb_malloc(m,s) malloc(s)
+#define mrb_free(m,p) free(p)
+#endif
+
+#define POOL_PAGE_SIZE 16000
+
+mrb_pool*
+mrb_pool_open(mrb_state *mrb)
+{
+ mrb_pool *pool = mrb_malloc(mrb, sizeof(mrb_pool));
+
+ if (pool) {
+ pool->mrb = mrb;
+ pool->pages = 0;
+ }
+
+ return pool;
+}
+
+void
+mrb_pool_close(mrb_pool *pool)
+{
+ struct mrb_pool_page *page, *tmp;
+
+ if (!pool) return;
+ page = pool->pages;
+ while (page) {
+ tmp = page;
+ page = page->next;
+ mrb_free(pool->mrb, tmp);
+ }
+ mrb_free(pool->mrb, pool);
+}
+
+static struct mrb_pool_page*
+page_alloc(mrb_pool *pool, size_t len)
+{
+ struct mrb_pool_page *page;
+
+ if (len < POOL_PAGE_SIZE)
+ len = POOL_PAGE_SIZE;
+ page = mrb_malloc(pool->mrb, sizeof(struct mrb_pool_page)+len-1);
+ if (page) {
+ page->offset = 0;
+ page->len = len;
+ }
+
+ return page;
+}
+
+void*
+mrb_pool_alloc(mrb_pool *pool, size_t len)
+{
+ struct mrb_pool_page *page;
+ size_t n;
+
+ if (!pool) return 0;
+
+ page = pool->pages;
+ while (page) {
+ if (page->offset + len <= page->len) {
+ n = page->offset;
+ page->offset += len;
+ page->last = (void*)page->page+n;
+ return page->last;
+ }
+ page = page->next;
+ }
+ page = page_alloc(pool, len);
+ if (!page) return 0;
+ page->offset = len;
+ page->next = pool->pages;
+ pool->pages = page;
+
+ page->last = (void*)page->page;
+ return page->last;
+}
+
+int
+mrb_pool_can_realloc(mrb_pool *pool, void *p, size_t len)
+{
+ struct mrb_pool_page *page;
+
+ if (!pool) return 0;
+ page = pool->pages;
+ while (page) {
+ if (page->last == p) {
+ size_t beg;
+
+ beg = (char*)p - page->page;
+ if (beg + len > page->len) return 0;
+ return 1;
+ }
+ page = page->next;
+ }
+ return 0;
+}
+
+void*
+mrb_pool_realloc(mrb_pool *pool, void *p, size_t oldlen, size_t newlen)
+{
+ struct mrb_pool_page *page;
+ void *np;
+
+ if (!pool) return 0;
+ page = pool->pages;
+ while (page) {
+ if (page->last == p) {
+ size_t beg;
+
+ beg = (char*)p - page->page;
+ if (beg + oldlen != page->offset) break;
+ if (beg + newlen > page->len) {
+ page->offset = beg;
+ break;
+ }
+ page->offset = beg + newlen;
+ return p;
+ }
+ page = page->next;
+ }
+ np = mrb_pool_alloc(pool, newlen);
+ memcpy(np, p, oldlen);
+ return np;
+}
+
+#ifdef TEST_POOL
+int
+main()
+{
+ int i, len = 250;
+ mrb_pool *pool;
+ void *p;
+
+ pool = mrb_pool_open(0);
+ p = mrb_pool_alloc(pool, len);
+ for (i=1; i<20; i++) {
+ printf("%p (len=%d) %d\n", p, len, mrb_pool_can_realloc(pool, p, len*2));
+ p = mrb_pool_realloc(pool, p, len, len*2);
+ len *= 2;
+ }
+ mrb_pool_close(pool);
+ return 0;
+}
+#endif
diff --git a/src/pool.h b/src/pool.h
new file mode 100644
index 000000000..4f0b906a5
--- /dev/null
+++ b/src/pool.h
@@ -0,0 +1,19 @@
+#include "mruby.h"
+#include <stddef.h>
+
+typedef struct mrb_pool {
+ mrb_state *mrb;
+ struct mrb_pool_page {
+ struct mrb_pool_page *next;
+ size_t offset;
+ size_t len;
+ void *last;
+ char page[1];
+ } *pages;
+} mrb_pool;
+
+mrb_pool* mrb_pool_open(mrb_state*);
+void mrb_pool_close(mrb_pool*);
+void* mrb_pool_alloc(mrb_pool*, size_t);
+void* mrb_pool_realloc(mrb_pool*, void*, size_t oldlen, size_t newlen);
+int mrb_pool_can_realloc(mrb_pool*, void*, size_t);
diff --git a/src/print.c b/src/print.c
new file mode 100644
index 000000000..a3e05b6f0
--- /dev/null
+++ b/src/print.c
@@ -0,0 +1,69 @@
+#include "mruby.h"
+#include "mruby/string.h"
+#include <stdio.h>
+
+mrb_value
+printstr(mrb_state *mrb, mrb_value obj)
+{
+ struct RString *str;
+ char *s;
+ size_t len;
+
+ if (mrb_type(obj) == MRB_TT_STRING) {
+ str = mrb_str_ptr(obj);
+ s = str->buf;
+ len = str->len;
+ while (len--) {
+ putc(*s, stdout);
+ s++;
+ }
+ }
+ return obj;
+}
+
+mrb_value
+mrb_p(mrb_state *mrb, mrb_value obj)
+{
+ obj = mrb_funcall(mrb, obj, "inspect", 0);
+ printstr(mrb, obj);
+ putc('\n', stdout);
+ return obj;
+}
+
+/* 15.3.1.2.9 */
+/* 15.3.1.3.34 */
+static mrb_value
+p_m(mrb_state *mrb, mrb_value self)
+{
+ int argc, i;
+ mrb_value *argv;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+ for (i=0; i<argc; i++) {
+ mrb_p(mrb, argv[i]);
+ }
+
+ return argv[0];
+}
+
+mrb_value
+mrb_printstr(mrb_state *mrb, mrb_value self)
+{
+ mrb_value argv;
+
+ mrb_get_args(mrb, "o", &argv);
+ printstr(mrb, argv);
+
+ return argv;
+}
+
+void
+mrb_init_print(mrb_state *mrb)
+{
+ struct RClass *krn;
+
+ krn = mrb->kernel_module;
+
+ mrb_define_method(mrb, krn, "__printstr__", mrb_printstr, ARGS_REQ(1));
+ mrb_define_method(mrb, krn, "p", p_m, ARGS_ANY()); /* 15.3.1.3.34 */
+}
diff --git a/src/proc.c b/src/proc.c
new file mode 100644
index 000000000..aae9b1932
--- /dev/null
+++ b/src/proc.c
@@ -0,0 +1,92 @@
+#include "mruby.h"
+#include "mruby/proc.h"
+#include "mruby/array.h"
+#include "mruby/class.h"
+#include "opcode.h"
+
+struct RProc *
+mrb_proc_new(mrb_state *mrb, mrb_irep *irep)
+{
+ struct RProc *p;
+
+ p = mrb_obj_alloc(mrb, MRB_TT_PROC, mrb->proc_class);
+ p->body.irep = irep;
+ p->target_class = (mrb->ci) ? mrb->ci->target_class : 0;
+ p->env = 0;
+
+ return p;
+}
+
+struct RProc *
+mrb_closure_new(mrb_state *mrb, mrb_irep *irep)
+{
+ struct RProc *p = mrb_proc_new(mrb, irep);
+ struct REnv *e;
+
+ if (!mrb->ci->env) {
+ e = mrb_obj_alloc(mrb, MRB_TT_ENV, mrb->ci->proc->env);
+ e->flags= (unsigned int)irep->nlocals;
+ e->mid = mrb->ci->mid;
+ e->cioff = mrb->ci - mrb->cibase;
+ e->stack = mrb->stack;
+ mrb->ci->env = e;
+ }
+ else {
+ e = mrb->ci->env;
+ }
+ p->env = e;
+ return p;
+}
+
+struct RProc *
+mrb_proc_new_cfunc(mrb_state *mrb, mrb_func_t func)
+{
+ struct RProc *p;
+
+ p = mrb_obj_alloc(mrb, MRB_TT_PROC, mrb->proc_class);
+ p->body.func = func;
+ p->flags |= MRB_PROC_CFUNC;
+
+ return p;
+}
+
+int
+mrb_proc_cfunc_p(struct RProc *p)
+{
+ return MRB_PROC_CFUNC_P(p);
+}
+
+mrb_value
+mrb_proc_call_cfunc(mrb_state *mrb, struct RProc *p, mrb_value self)
+{
+ return (p->body.func)(mrb, self);
+}
+
+mrb_code*
+mrb_proc_iseq(mrb_state *mrb, struct RProc *p)
+{
+ return p->body.irep->iseq;
+}
+
+void
+mrb_init_proc(mrb_state *mrb)
+{
+ struct RProc *m;
+ mrb_code *call_iseq = mrb_malloc(mrb, sizeof(mrb_code));
+ mrb_irep *call_irep = mrb_calloc(mrb, sizeof(mrb_irep), 1);
+
+ if ( call_iseq == NULL || call_irep == NULL )
+ return;
+
+ *call_iseq = MKOP_A(OP_CALL, 0);
+ call_irep->idx = -1;
+ call_irep->flags = MRB_IREP_NOFREE;
+ call_irep->iseq = call_iseq;
+ call_irep->ilen = 1;
+
+ mrb->proc_class = mrb_define_class(mrb, "Proc", mrb->object_class);
+
+ m = mrb_proc_new(mrb, call_irep);
+ mrb_define_method_raw(mrb, mrb->proc_class, mrb_intern(mrb, "call"), m);
+ mrb_define_method_raw(mrb, mrb->proc_class, mrb_intern(mrb, "[]"), m);
+}
diff --git a/src/range.c b/src/range.c
new file mode 100644
index 000000000..bc85f1f62
--- /dev/null
+++ b/src/range.c
@@ -0,0 +1,499 @@
+#include "mruby.h"
+#include "mruby/class.h"
+#include "mruby/range.h"
+#include "variable.h"
+#include "error.h"
+#include "mruby/numeric.h"
+#include "mruby/string.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+#ifndef OTHER
+#define OTHER 2
+#endif
+
+mrb_value mrb_exec_recursive_paired(mrb_state *mrb, mrb_value (*func) (mrb_state *, mrb_value, mrb_value, int),
+ mrb_value obj, mrb_value paired_obj, void* arg);
+
+int printf (const char*, ...);
+/*--------- <1.8.7>object.c ---------> */
+
+/*
+ * call-seq:
+ * obj.instance_of?(class) => true or false
+ *
+ * Returns <code>true</code> if <i>obj</i> is an instance of the given
+ * class. See also <code>Object#kind_of?</code>.
+ */
+
+int
+mrb_obj_is_instance_of(mrb_state *mrb, mrb_value obj, struct RClass* c)
+{
+ if (mrb_obj_class(mrb, obj) == c) return TRUE;
+ return FALSE;
+}
+/*--------- <1.8.7>object.c ---------< */
+
+mrb_value
+mrb_range_new(mrb_state *mrb, mrb_value beg, mrb_value end, int excl)
+{
+ struct RRange *r;
+
+ r = mrb_obj_alloc(mrb, MRB_TT_RANGE, mrb->range_class);
+ r->edges = mrb_malloc(mrb, sizeof(struct mrb_range_edges));
+ r->edges->beg = beg;
+ r->edges->end = end;
+ r->excl = excl;
+ return mrb_range_value(r);
+}
+
+/*
+ * call-seq:
+ * rng.first => obj
+ * rng.begin => obj
+ *
+ * Returns the first object in <i>rng</i>.
+ */
+mrb_value
+mrb_range_beg(mrb_state *mrb, mrb_value range)
+{
+ struct RRange *r = mrb_range_ptr(range);
+
+ return r->edges->beg;
+}
+
+/*
+ * call-seq:
+ * rng.end => obj
+ * rng.last => obj
+ *
+ * Returns the object that defines the end of <i>rng</i>.
+ *
+ * (1..10).end #=> 10
+ * (1...10).end #=> 10
+ */
+
+mrb_value
+mrb_range_end(mrb_state *mrb, mrb_value range)
+{
+ struct RRange *r = mrb_range_ptr(range);
+
+ return r->edges->end;
+}
+
+/*
+ * call-seq:
+ * range.exclude_end? => true or false
+ *
+ * Returns <code>true</code> if <i>range</i> excludes its end value.
+ */
+mrb_value
+mrb_range_excl(mrb_state *mrb, mrb_value range)
+{
+ struct RRange *r = mrb_range_ptr(range);
+
+ return r->excl ? mrb_true_value() : mrb_false_value();
+}
+
+/*
+ * call-seq:
+ * beg end
+ * args[0] <= args[1] => true
+ * args[0] > args[1] => false
+ */
+static int
+range_check(mrb_state *mrb, mrb_value *args)
+{
+ mrb_value ans = mrb_funcall(mrb, args[0], "<=>", 1, args[1]);
+ /* beg end
+ ans :args[0] < args[1] => -1
+ args[0] = args[1] => 0
+ args[0] > args[1] => +1 */
+ if (mrb_nil_p(ans)) return FALSE;
+ //if (mrb_obj_equal(mrb, ans, mrb_fixnum_value(1))) return FALSE;
+ if (mrb_fixnum(ans) == 1) return FALSE;
+ return TRUE;
+}
+
+static void
+range_init(mrb_state *mrb, mrb_value range, mrb_value beg, mrb_value end, mrb_int exclude_end)
+{
+ mrb_value args[2];
+ struct RRange *r = mrb_range_ptr(range);
+
+ if ((mrb_type(beg) != MRB_TT_FIXNUM) || (mrb_type(end) != MRB_TT_FIXNUM)) {
+ args[0] = beg;
+ args[1] = end;
+ /* eroor.c v = mrb_rescue(range_check, (mrb_value)args, range_failed, 0);
+ if (mrb_nil_p(v)) range_failed(); */
+ if (!range_check(mrb, args)) {
+ printf("range_failed()\n");
+ }
+ }
+ r->excl = exclude_end;
+ r->edges->beg = beg;
+ r->edges->end = end;
+}
+/*
+ * call-seq:
+ * Range.new(start, end, exclusive=false) => range
+ *
+ * Constructs a range using the given <i>start</i> and <i>end</i>. If the third
+ * parameter is omitted or is <code>false</code>, the <i>range</i> will include
+ * the end object; otherwise, it will be excluded.
+ */
+
+mrb_value
+mrb_range_initialize(mrb_state *mrb, mrb_value range)
+{
+ mrb_value beg, end;
+ mrb_value flags;
+
+ mrb_get_args(mrb, "ooo", &beg, &end, &flags);
+ /* Ranges are immutable, so that they should be initialized only once. */
+ range_init(mrb, range, beg, end, mrb_test(flags));
+ return range;
+}
+/*
+ * call-seq:
+ * range == obj => true or false
+ *
+ * Returns <code>true</code> only if
+ * 1) <i>obj</i> is a Range,
+ * 2) <i>obj</i> has equivalent beginning and end items (by comparing them with <code>==</code>),
+ * 3) <i>obj</i> has the same #exclude_end? setting as <i>rng</t>.
+ *
+ * (0..2) == (0..2) #=> true
+ * (0..2) == Range.new(0,2) #=> true
+ * (0..2) == (0...2) #=> false
+ *
+ */
+
+mrb_value
+mrb_range_eq(mrb_state *mrb, mrb_value range)
+{
+ struct RRange *rr;
+ struct RRange *ro;
+ mrb_value obj;
+
+ mrb_get_args(mrb, "o", &obj);
+
+ if (mrb_obj_equal(mrb, range, obj)) return mrb_true_value();
+
+ /* same class? */
+ // if (!rb_obj_is_instance_of(obj, rb_obj_class(range)))
+ if (!mrb_obj_is_instance_of(mrb, obj, mrb_obj_class(mrb, range)))
+ return mrb_false_value();
+
+ rr = mrb_range_ptr(range);
+ ro = mrb_range_ptr(obj);
+ if (!mrb_obj_equal(mrb, rr->edges->beg, ro->edges->beg))
+ return mrb_false_value();
+ if (!mrb_obj_equal(mrb, rr->edges->end, ro->edges->end))
+ return mrb_false_value();
+ if (rr->excl != ro->excl)
+ return mrb_false_value();
+
+ return mrb_true_value();
+}
+
+static int
+r_le(mrb_state *mrb, mrb_value a, mrb_value b)
+{
+ //int c;
+ mrb_value r = mrb_funcall(mrb, a, "<=>", 1, b); /* compare result */
+ /* output :a < b => -1, a = b => 0, a > b => +1 */
+
+ if (mrb_nil_p(r)) return FALSE;
+
+ /* mrb_value -> int */
+ //c = mrb_cmpint(mrb, r, a, b);
+ //if (c == 0) return TRUE;
+ //if (c < 0) return TRUE;
+ //return FALSE;
+ if (mrb_obj_equal(mrb, r, mrb_fixnum_value(0))) return TRUE;
+ if (mrb_obj_equal(mrb, r, mrb_fixnum_value(-1))) return TRUE;
+ return FALSE;
+}
+
+static int
+r_gt(mrb_state *mrb, mrb_value a, mrb_value b)
+{
+ //int c;
+ mrb_value r = mrb_funcall(mrb, a, "<=>", 1, b);
+ /* output :a < b => -1, a = b => 0, a > b => +1 */
+
+ if (mrb_nil_p(r)) return FALSE;
+
+ /* mrb_value -> int */
+ //c = mrb_cmpint(mrb, r);
+ //if (c > 0) return TRUE;
+ //return FALSE;
+ if (mrb_obj_equal(mrb, r, mrb_fixnum_value(1))) return TRUE;
+ return FALSE;
+}
+
+static int
+r_ge(mrb_state *mrb, mrb_value a, mrb_value b)
+{
+ //int c;
+ mrb_value r = mrb_funcall(mrb, a, "<=>", 1, b); /* compare result */
+ /* output :a < b => -1, a = b => 0, a > b => +1 */
+
+ if (mrb_nil_p(r)) return FALSE;
+
+ /* mrb_value -> int */
+ //c = mrb_cmpint(mrb, r);
+ //if (c == 0) return TRUE;
+ //if (c > 0) return TRUE;
+ //return FALSE;
+ if (mrb_obj_equal(mrb, r, mrb_fixnum_value(0))) return TRUE;
+ if (mrb_obj_equal(mrb, r, mrb_fixnum_value(1))) return TRUE;
+ return FALSE;
+}
+
+/*
+ * call-seq:
+ * range === obj => true or false
+ * range.member?(val) => true or false
+ * range.include?(val) => true or false
+ *
+ */
+mrb_value
+mrb_range_include(mrb_state *mrb, mrb_value range)
+{
+ mrb_value val;
+ struct RRange *r = mrb_range_ptr(range);
+ mrb_value beg, end;
+
+ mrb_get_args(mrb, "o", &val);
+
+ beg = r->edges->beg;
+ end = r->edges->end;
+ if (r_le(mrb, beg, val)) {
+ /* beg <= val */
+ if (r->excl) {
+ if (r_gt(mrb, end, val)) return mrb_true_value(); /* end > val */
+ }
+ else {
+ if (r_ge(mrb, end, val)) return mrb_true_value(); /* end >= val */
+ }
+ }
+ return mrb_false_value();
+}
+
+/*
+ * call-seq:
+ * rng.each {| i | block } => rng
+ *
+ * Iterates over the elements <i>rng</i>, passing each in turn to the
+ * block. You can only iterate if the start object of the range
+ * supports the +succ+ method (which means that you can't iterate over
+ * ranges of +Float+ objects).
+ *
+ * (10..15).each do |n|
+ * print n, ' '
+ * end
+ *
+ * <em>produces:</em>
+ *
+ * 10 11 12 13 14 15
+ */
+
+mrb_value
+mrb_range_each(mrb_state *mrb, mrb_value range)
+{
+ return range;
+}
+
+mrb_int
+mrb_range_beg_len(mrb_state *mrb, mrb_value range, mrb_int *begp, mrb_int *lenp, mrb_int len, mrb_int err)
+{
+ mrb_int beg, end, b, e;
+ struct RRange *r = mrb_range_ptr(range);
+
+ //if (!mrb_obj_is_kind_of(mrb, range, mrb->range_class)) return FALSE;
+ if (mrb_type(range) != MRB_TT_RANGE) return FALSE;
+
+ beg = b = mrb_fixnum(r->edges->beg);
+ end = e = mrb_fixnum(r->edges->end);
+
+ if (beg < 0) {
+ beg += len;
+ if (beg < 0) goto out_of_range;
+ }
+ if (err == 0 || err == 2) {
+ if (beg > len) goto out_of_range;
+ if (end > len) end = len;
+ }
+ if (end < 0) end += len;
+ if (!r->excl) end++; /* include end point */
+ len = end - beg;
+ if (len < 0) len = 0;
+
+ *begp = beg;
+ *lenp = len;
+ return TRUE;
+
+out_of_range:
+ if (err) {
+ mrb_raise(mrb, E_RANGE_ERROR, "%ld..%s%ld out of range",
+ b, r->excl? "." : "", e);
+ }
+ return OTHER;
+}
+
+/* 15.2.14.4.12(x) */
+/*
+ * call-seq:
+ * rng.to_s -> string
+ *
+ * Convert this range object to a printable form.
+ */
+
+static mrb_value
+range_to_s(mrb_state *mrb, mrb_value range)
+{
+ mrb_value str, str2;
+ struct RRange *r = mrb_range_ptr(range);
+
+ str = mrb_obj_as_string(mrb, r->edges->beg);
+ str2 = mrb_obj_as_string(mrb, r->edges->end);
+ str = mrb_str_dup(mrb, str);
+ mrb_str_cat(mrb, str, "...", r->excl ? 3 : 2);
+ mrb_str_append(mrb, str, str2);
+ //OBJ_INFECT(str, str2);
+
+ return str;
+}
+
+static mrb_value
+inspect_range(mrb_state *mrb, mrb_value range, mrb_value dummy, int recur)
+{
+ mrb_value str, str2;
+ struct RRange *r = mrb_range_ptr(range);
+
+ if (recur) {
+ return mrb_str_new2(mrb, r->excl ? "(... ... ...)" : "(... .. ...)");
+ }
+ str = mrb_inspect(mrb, r->edges->beg);
+ str2 = mrb_inspect(mrb, r->edges->end);
+ str = mrb_str_dup(mrb, str);
+ mrb_str_cat(mrb, str, "...", r->excl ? 3 : 2);
+ mrb_str_append(mrb, str, str2);
+ // OBJ_INFECT(str, str2);
+
+ return str;
+}
+
+/* 15.2.14.4.13(x) */
+/*
+ * call-seq:
+ * rng.inspect -> string
+ *
+ * Convert this range object to a printable form (using
+ * <code>inspect</code> to convert the start and end
+ * objects).
+ */
+
+static mrb_value
+range_inspect(mrb_state *mrb, mrb_value range)
+{
+ return inspect_range(mrb, range, range, 0);
+}
+
+static mrb_value
+recursive_eql(mrb_state *mrb, mrb_value range, mrb_value obj, int recur)
+{
+ struct RRange *r = mrb_range_ptr(range);
+ struct RRange *o = mrb_range_ptr(obj);
+
+ if (recur) return mrb_true_value(); /* Subtle! */
+ if (!mrb_eql(mrb, r->edges->beg, o->edges->beg))
+ return mrb_false_value();
+ if (!mrb_eql(mrb, r->edges->end, o->edges->end))
+ return mrb_false_value();
+
+ if (r->excl != o->excl)
+ return mrb_false_value();
+ return mrb_true_value();
+}
+
+/* 15.2.14.4.14(x) */
+/*
+ * call-seq:
+ * rng.eql?(obj) -> true or false
+ *
+ * Returns <code>true</code> only if <i>obj</i> is a Range, has equivalent
+ * beginning and end items (by comparing them with #eql?), and has the same
+ * #exclude_end? setting as <i>rng</i>.
+ *
+ * (0..2).eql?(0..2) #=> true
+ * (0..2).eql?(Range.new(0,2)) #=> true
+ * (0..2).eql?(0...2) #=> false
+ *
+ */
+
+static mrb_value
+range_eql(mrb_state *mrb, mrb_value range)
+{
+ mrb_value obj;
+ mrb_get_args(mrb, "o", &obj);
+
+ if (mrb_obj_equal(mrb, range, obj))
+ return mrb_true_value();
+ if (!mrb_obj_is_kind_of(mrb, obj, mrb->range_class))
+ return mrb_false_value();
+ return mrb_exec_recursive_paired(mrb, recursive_eql, range, obj, &obj);
+}
+
+/* 15.2.14.4.15(x) */
+mrb_value
+range_initialize_copy(mrb_state *mrb, mrb_value copy)
+{
+ mrb_value src;
+ mrb_get_args(mrb, "o", &src);
+
+ if (mrb_obj_equal(mrb, copy, src)) return copy;
+ //mrb_check_frozen(copy);
+ if (!mrb_obj_is_instance_of(mrb, src, mrb_obj_class(mrb, copy))) {
+ mrb_raise(mrb, E_TYPE_ERROR, "wrong argument class");
+ }
+ memcpy(mrb_range_ptr(copy), mrb_range_ptr(src), sizeof(struct RRange));
+
+ return copy;
+}
+
+void
+mrb_init_range(mrb_state *mrb)
+{
+ struct RClass *r;
+ r = mrb->range_class = mrb_define_class(mrb, "Range", mrb->object_class);
+ mrb_include_module(mrb, r, mrb_class_get(mrb, "Enumerable"));
+
+ mrb_define_method(mrb, r, "begin", mrb_range_beg, ARGS_NONE()); /* 15.2.14.4.3 */
+ mrb_define_method(mrb, r, "end", mrb_range_end, ARGS_NONE()); /* 15.2.14.4.5 */
+ mrb_define_method(mrb, r, "==", mrb_range_eq, ARGS_REQ(1)); /* 15.2.14.4.1 */
+ mrb_define_method(mrb, r, "===", mrb_range_include, ARGS_REQ(1)); /* 15.2.14.4.2 */
+ mrb_define_method(mrb, r, "each", mrb_range_each, ARGS_NONE()); /* 15.2.14.4.4 */
+ mrb_define_method(mrb, r, "exclude_end?", mrb_range_excl, ARGS_NONE()); /* 15.2.14.4.6 */
+ mrb_define_method(mrb, r, "first", mrb_range_beg, ARGS_NONE()); /* 15.2.14.4.7 */
+ mrb_define_method(mrb, r, "include?", mrb_range_include, ARGS_REQ(1)); /* 15.2.14.4.8 */
+ mrb_define_method(mrb, r, "initialize", mrb_range_initialize, ARGS_REQ(4)); /* 15.2.14.4.9 */
+ mrb_define_method(mrb, r, "last", mrb_range_end, ARGS_NONE()); /* 15.2.14.4.10 */
+ mrb_define_method(mrb, r, "member?", mrb_range_include, ARGS_REQ(1)); /* 15.2.14.4.11 */
+
+ mrb_define_method(mrb, r, "to_s", range_to_s, ARGS_NONE()); /* 15.2.14.4.12(x) */
+ mrb_define_method(mrb, r, "inspect", range_inspect, ARGS_NONE()); /* 15.2.14.4.13(x) */
+ mrb_define_method(mrb, r, "eql?", range_eql, ARGS_REQ(1)); /* 15.2.14.4.14(x) */
+ mrb_define_method(mrb, r, "initialize_copy", range_initialize_copy, ARGS_REQ(1)); /* 15.2.14.4.15(x) */
+}
diff --git a/src/re.c b/src/re.c
new file mode 100644
index 000000000..645af6ba6
--- /dev/null
+++ b/src/re.c
@@ -0,0 +1,3306 @@
+/* re.c for RegExp Class */
+#include "mruby.h"
+#include <string.h>
+#include "mruby/string.h"
+#include "ritehash.h"
+#include "encoding.h"
+#include "re.h"
+#include "mruby/numeric.h"
+#include "mruby/range.h"
+#include "mruby/array.h"
+#include "regint.h"
+#include "mruby/class.h"
+#include "mruby/hash.h"
+#include "variable.h"
+#include "error.h"
+#ifdef INCLUDE_REGEXP
+
+//from opcode.h
+#define GETARG_A(i) ((((mrb_code)(i)) >> 24) & 0xff)
+#define GETARG_B(i) ((((mrb_code)(i)) >> 16) & 0xff)
+#define GETARG_C(i) ((((mrb_code)(i)) >> 8) & 0xff)
+#define MKARG_A(c) (((c) & 0xff) << 24)
+#define MKARG_B(c) (((c) & 0xff) << 16)
+#define MKARG_C(c) (((c) & 0xff) << 8)
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+#define ARG_REG_OPTION_MASK \
+ (ONIG_OPTION_IGNORECASE|ONIG_OPTION_MULTILINE|ONIG_OPTION_EXTEND)
+#define ARG_ENCODING_FIXED 16
+#define ARG_ENCODING_NONE 32
+#define REG_LITERAL FL_USER5
+#define REG_ENCODING_NONE FL_USER6
+typedef char onig_errmsg_buffer[ONIG_MAX_ERROR_MESSAGE_LEN];
+#define mrb_bug printf
+#define KCODE_FIXED FL_USER4
+#define scan_oct(s,l,e) (int)ruby_scan_oct(s,l,e)
+unsigned long ruby_scan_oct(const char *, size_t, size_t *);
+#define scan_hex(s,l,e) (int)ruby_scan_hex(s,l,e)
+unsigned long ruby_scan_hex(const char *, size_t, size_t *);
+
+static mrb_value mrb_match_to_a(mrb_state *mrb, mrb_value match);
+int re_adjust_startpos(struct re_pattern_buffer *bufp, const char *string, int size, int startpos, int range);
+static mrb_value mrb_reg_preprocess(mrb_state *mrb, const char *p, const char *end, mrb_encoding *enc,
+ mrb_encoding **fixed_enc, onig_errmsg_buffer err);
+static void mrb_reg_expr_str(mrb_state *mrb, mrb_value str, const char *s, long len,
+ mrb_encoding *enc, mrb_encoding *resenc);
+static char * option_to_str(char str[4], int options);
+mrb_value match_alloc(mrb_state *mrb);
+void mrb_warn(const char *fmt, ...);
+
+static mrb_value reg_cache;
+//static int may_need_recompile;
+//static int reg_kcode = DEFAULT_KCODE;
+/* ------------------------------------------------------------------------- */
+/* RegExp Class */
+/* ------------------------------------------------------------------------- */
+/* 15.2.15.6.1 */
+/*
+ * call-seq:
+ * class.new(args, ...) -> obj
+ *
+ * Calls <code>allocate</code> to create a new object of
+ * <i>class</i>'s class, then invokes that object's
+ * <code>initialize</code> method, passing it <i>args</i>.
+ * This is the method that ends up getting called whenever
+ * an object is constructed using .new.
+ *
+ */
+mrb_value
+mrb_reg_s_new_instance(mrb_state *mrb, /*int argc, mrb_value *argv, */mrb_value self)
+{
+ //obj = mrb_obj_alloc(klass);
+ //mrb_obj_call_init(obj, argc, argv);...mrb_funcall2(obj, idInitialize, argc, argv);
+ mrb_value argv[16];
+ int argc;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+ struct RRegexp *re;
+ re = mrb_obj_alloc(mrb, MRB_TT_REGEX, mrb->regex_class);
+ re->ptr = 0;
+ re->src = mrb_nil_value();
+ re->usecnt = 0;
+ return mrb_funcall_argv(mrb, mrb_obj_value(re), "initialize", argc, argv);
+}
+//#define mrb_enc_mbcput(a,b,c) a
+mrb_value
+mrb_reg_quote(mrb_state *mrb, mrb_value str)
+{
+ mrb_encoding *enc = mrb_enc_get(mrb, str);
+ char *s, *send, *t;
+ mrb_value tmp;
+ int c,clen;
+ int ascii_only = mrb_enc_str_asciionly_p(mrb, str);
+
+ s = RSTRING_PTR(str);
+ send = s + RSTRING_LEN(str);
+ while (s < send) {
+ c = mrb_enc_ascget(mrb, s, send, &clen, enc);
+ if (c == -1) {
+ s += mbclen(s, send, enc);
+ continue;
+ }
+ switch (c) {
+ case '[': case ']': case '{': case '}':
+ case '(': case ')': case '|': case '-':
+ case '*': case '.': case '\\':
+ case '?': case '+': case '^': case '$':
+ case ' ': case '#':
+ case '\t': case '\f': case '\n': case '\r':
+ goto meta_found;
+ }
+ s += clen;
+ }
+ //tmp = mrb_str_new3(str);
+ tmp = mrb_str_new(mrb, RSTRING_PTR(str), RSTRING_LEN(str));
+ if (ascii_only) {
+ mrb_enc_associate(mrb, tmp, mrb_usascii_encoding(mrb));
+ }
+ return tmp;
+
+meta_found:
+ tmp = mrb_str_new(mrb, 0, RSTRING_LEN(str)*2);
+ if (ascii_only) {
+ mrb_enc_associate(mrb, tmp, mrb_usascii_encoding(mrb));
+ }
+ else {
+ mrb_enc_copy(mrb, tmp, str);
+ }
+ t = RSTRING_PTR(tmp);
+ /* copy upto metacharacter */
+ memcpy(t, RSTRING_PTR(str), s - RSTRING_PTR(str));
+ t += s - RSTRING_PTR(str);
+
+ while (s < send) {
+ c = mrb_enc_ascget(mrb, s, send, &clen, enc);
+ if (c == -1) {
+ int n = mbclen(s, send, enc);
+
+ while (n--)
+ *t++ = *s++;
+ continue;
+ }
+ s += clen;
+ switch (c) {
+ case '[': case ']': case '{': case '}':
+ case '(': case ')': case '|': case '-':
+ case '*': case '.': case '\\':
+ case '?': case '+': case '^': case '$':
+ case '#':
+ t += mrb_enc_mbcput('\\', t, enc);
+ break;
+ case ' ':
+ t += mrb_enc_mbcput('\\', t, enc);
+ t += mrb_enc_mbcput(' ', t, enc);
+ continue;
+ case '\t':
+ t += mrb_enc_mbcput('\\', t, enc);
+ t += mrb_enc_mbcput('t', t, enc);
+ continue;
+ case '\n':
+ t += mrb_enc_mbcput('\\', t, enc);
+ t += mrb_enc_mbcput('n', t, enc);
+ continue;
+ case '\r':
+ t += mrb_enc_mbcput('\\', t, enc);
+ t += mrb_enc_mbcput('r', t, enc);
+ continue;
+ case '\f':
+ t += mrb_enc_mbcput('\\', t, enc);
+ t += mrb_enc_mbcput('f', t, enc);
+ continue;
+ case '\v':
+ t += mrb_enc_mbcput('\\', t, enc);
+ t += mrb_enc_mbcput('v', t, enc);
+ continue;
+ }
+ t += mrb_enc_mbcput(c, t, enc);
+ }
+ mrb_str_resize(mrb, tmp, t - RSTRING_PTR(tmp));
+ /*OBJ_INFECT(tmp, str);*/
+ return tmp;
+}
+
+static mrb_value
+reg_operand(mrb_state *mrb, mrb_value s, int check)
+{
+ if (mrb_type(s) == MRB_TT_SYMBOL) {
+ //return mrb_sym_to_s(s);
+ return mrb_obj_inspect(mrb, s);
+ }
+ else {
+ mrb_value tmp = mrb_check_string_type(mrb, s);
+ if (check && mrb_nil_p(tmp)) {
+ mrb_raise(mrb, E_TYPE_ERROR, "can't convert %s to String",
+ mrb_obj_classname(mrb, s));
+ }
+ return tmp;
+ }
+}
+/* 15.2.15.6.2 */
+/* 15.2.15.6.4 */
+/*
+ * call-seq:
+ * Regexp.escape(str) -> string
+ * Regexp.quote(str) -> string
+ *
+ * Escapes any characters that would have special meaning in a regular
+ * expression. Returns a new escaped string, or self if no characters are
+ * escaped. For any string,
+ * <code>Regexp.new(Regexp.escape(<i>str</i>))=~<i>str</i></code> will be true.
+ *
+ * Regexp.escape('\*?{}.') #=> \\\*\?\{\}\.
+ *
+ */
+
+static mrb_value
+mrb_reg_s_quote(mrb_state *mrb, mrb_value c/*, mrb_value str*/)
+{
+ mrb_value str;
+ mrb_get_args(mrb, "o", &str);
+ return mrb_reg_quote(mrb, reg_operand(mrb, str, 1/*TRUE*/));
+}
+
+static void
+match_check(mrb_state *mrb, mrb_value match)
+{
+ struct RMatch *m = mrb_match_ptr(match);
+ if (!m->str.tt) {
+ mrb_raise(mrb, E_TYPE_ERROR, "uninitialized Match");
+ }
+}
+
+mrb_value
+mrb_reg_nth_match(mrb_state *mrb, mrb_int nth, mrb_value match)
+{
+ mrb_value str;
+ long start, end, len;
+ struct RMatch *m = mrb_match_ptr(match);
+
+ if (mrb_nil_p(match)) return mrb_nil_value();
+ match_check(mrb, match);
+ if (nth >= m->rmatch->regs.num_regs) {
+ return mrb_nil_value();
+ }
+ if (nth < 0) {
+ nth += m->rmatch->regs.num_regs;
+ if (nth <= 0) return mrb_nil_value();
+ }
+ start = m->rmatch->regs.beg[nth];
+ if (start == -1) return mrb_nil_value();
+ end = m->rmatch->regs.end[nth];
+ len = end - start;
+ str = mrb_str_substr(mrb, m->str, start, len);
+ /*OBJ_INFECT(str, match);*/
+ return str;
+}
+
+mrb_value
+mrb_reg_last_match(mrb_state *mrb, mrb_value match)
+{
+ return mrb_reg_nth_match(mrb, 0, match);
+}
+
+
+static int
+match_backref_number(mrb_state *mrb, mrb_value match, mrb_value backref)
+{
+ const char *name;
+ int num;
+
+ struct re_registers *regs = RMATCH_REGS(match);
+ mrb_value regexp = RMATCH(match)->regexp;
+
+ match_check(mrb, match);
+ switch(mrb_type(backref)) {
+ default:
+ return mrb_fixnum(backref);
+
+ case MRB_TT_SYMBOL:
+ name = mrb_sym2name(mrb, SYM2ID(backref));
+ break;
+
+ case MRB_TT_STRING:
+ //name = StringValueCStr(backref);
+ name = mrb_string_value_cstr(mrb, &backref);
+ break;
+ }
+ num = onig_name_to_backref_number(mrb_regex_ptr(regexp)->ptr,
+ (const unsigned char*)name,
+ (const unsigned char*)name + strlen(name),
+ regs);
+ if (num < 1) {
+ mrb_raise(mrb, E_INDEX_ERROR, "undefined group name reference: %s", name);
+ }
+
+ return num;
+}
+/* 15.2.15.6.3 */
+/*
+ * call-seq:
+ * Regexp.last_match -> matchdata
+ * Regexp.last_match(n) -> str
+ *
+ * The first form returns the <code>MatchData</code> object generated by the
+ * last successful pattern match. Equivalent to reading the global variable
+ * <code>$~</code>. The second form returns the <i>n</i>th field in this
+ * <code>MatchData</code> object.
+ * <em>n</em> can be a string or symbol to reference a named capture.
+ *
+ * Note that the <code>last_match</code> is local to the thread and method scope
+ * of the method that did the pattern match.
+ *
+ * /c(.)t/ =~ 'cat' #=> 0
+ * Regexp.last_match #=> #<MatchData "cat" 1:"a">
+ * Regexp.last_match(0) #=> "cat"
+ * Regexp.last_match(1) #=> "a"
+ * Regexp.last_match(2) #=> nil
+ *
+ * /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ "var = val"
+ * Regexp.last_match #=> #<MatchData "var = val" lhs:"var" rhs:"val">
+ * Regexp.last_match(:lhs) #=> "var"
+ * Regexp.last_match(:rhs) #=> "val"
+ */
+static mrb_value
+mrb_reg_s_last_match(mrb_state *mrb, mrb_value self/*int argc, mrb_value *argv*/)
+{
+ //mrb_value nth;
+ mrb_value argv[16];
+ int argc;
+ mrb_value match = mrb_backref_get(mrb);
+
+ //if (argc > 0 && mrb_scan_args(argc, argv, "01", &nth) == 1) {
+ mrb_get_args(mrb, "*", &argv, &argc);
+ if (argc != 0) {
+ int n;
+ if (mrb_nil_p(match)) return mrb_nil_value();
+ n = match_backref_number(mrb, match, argv[0]);
+ return mrb_reg_nth_match(mrb, n, match);
+ }
+ return match;//match_getter();
+}
+
+static void
+mrb_reg_check(mrb_state *mrb, mrb_value re)
+{
+ //struct RRegexp *r = mrb_regex_ptr(re);
+
+ //if (!(RREGEXP(re)->ptr) || !RREGEXP_SRC(re) || !RREGEXP_SRC_PTR(re)) {
+ if (!(RREGEXP(re)->ptr)) {
+ mrb_raise(mrb, E_TYPE_ERROR, "uninitialized Regexp");
+ }
+ if (RREGEXP_SRC(re).tt == 0) {
+ mrb_raise(mrb, E_TYPE_ERROR, "uninitialized Regexp");
+ }
+ if (!RREGEXP_SRC_PTR(re)) {
+ mrb_raise(mrb, E_TYPE_ERROR, "uninitialized Regexp");
+ }
+}
+
+int
+mrb_reg_options(mrb_state *mrb, mrb_value re)
+{
+ int options;
+
+ mrb_reg_check(mrb, re);
+ options = RREGEXP(re)->ptr->options & ARG_REG_OPTION_MASK;
+ if (RBASIC(re)->flags & KCODE_FIXED) options |= ARG_ENCODING_FIXED;
+ if (RBASIC(re)->flags & REG_ENCODING_NONE) options |= ARG_ENCODING_NONE;
+ return options;
+}
+
+static void
+reg_enc_error(mrb_state *mrb, mrb_value re, mrb_value str)
+{
+ mrb_raise(mrb, E_ENCODING_ERROR,
+ "incompatible encoding regexp match (%s regexp with %s string)",
+ mrb_enc_name(mrb_enc_get(mrb, re)),
+ mrb_enc_name(mrb_enc_get(mrb, str)));
+}
+
+static int
+mrb_reg_fixed_encoding_p(mrb_value re)
+{
+ /*if (FL_TEST(re, KCODE_FIXED))
+ return Qtrue;
+ else */
+ return 0/*Qfalse*/;
+}
+
+static mrb_encoding*
+mrb_reg_prepare_enc(mrb_state *mrb, mrb_value re, mrb_value str, int warn)
+{
+ mrb_encoding *enc = 0;
+
+ if (mrb_enc_str_coderange(mrb, str) == ENC_CODERANGE_BROKEN) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR,
+ "invalid byte sequence in %s",
+ mrb_enc_name(mrb_enc_get(mrb, str)));
+ }
+
+ mrb_reg_check(mrb, re);
+ enc = mrb_enc_get(mrb, str);
+ if (!mrb_enc_str_asciicompat_p(mrb, str)) {
+ if (RREGEXP(re)->ptr->enc != enc) {
+ reg_enc_error(mrb, re, str);
+ }
+ }
+ else if (mrb_reg_fixed_encoding_p(re)) {
+ if (RREGEXP(re)->ptr->enc != enc &&
+ (!mrb_enc_asciicompat(mrb, RREGEXP(re)->ptr->enc) ||
+ mrb_enc_str_coderange(mrb, str) != ENC_CODERANGE_7BIT)) {
+ reg_enc_error(mrb, re, str);
+ }
+ enc = RREGEXP(re)->ptr->enc;
+ }
+ if (warn && (RBASIC(re)->flags & REG_ENCODING_NONE) &&
+ enc != mrb_ascii8bit_encoding(mrb) &&
+ mrb_enc_str_coderange(mrb, str) != ENC_CODERANGE_7BIT) {
+ mrb_warn("regexp match /.../n against to %s string",
+ mrb_enc_name(enc));
+ }
+ return enc;
+}
+
+static mrb_value
+mrb_reg_desc(mrb_state *mrb, const char *s, long len, mrb_value re)
+{
+ mrb_encoding *enc = mrb_enc_get(mrb, re);
+ mrb_value str = mrb_str_new_cstr(mrb, "/");//mrb_str_buf_new2("/");
+ mrb_encoding *resenc = mrb_default_internal_encoding(mrb);
+ if (resenc == NULL) resenc = mrb_default_external_encoding(mrb);
+
+ if (re.tt && mrb_enc_asciicompat(mrb, enc)) {
+ mrb_enc_copy(mrb, str, re);
+ }
+ else {
+ mrb_enc_associate(mrb, str, mrb_usascii_encoding(mrb));
+ }
+ mrb_reg_expr_str(mrb, str, s, len, enc, resenc);
+ mrb_str_buf_cat(mrb, str, "/", strlen("/"));//mrb_str_buf_cat2(str, "/");
+ if (re.tt) {
+ char opts[4];
+ mrb_reg_check(mrb, re);
+ if (*option_to_str(opts, RREGEXP(re)->ptr->options))
+ mrb_str_buf_cat(mrb, str, opts, strlen(opts));//mrb_str_buf_cat2(str, opts);
+ if (RBASIC(re)->flags & REG_ENCODING_NONE)
+ mrb_str_buf_cat(mrb, str, "n", strlen("n"));//mrb_str_buf_cat2(str, "n");
+ }
+ /*OBJ_INFECT(str, re);*/
+ return str;
+}
+static void
+mrb_reg_raise(mrb_state *mrb, const char *s, long len, const char *err, mrb_value re)
+{
+ mrb_value desc = mrb_reg_desc(mrb, s, len, re);
+
+ mrb_raise(mrb, E_REGEXP_ERROR, "%s: %s", err, RSTRING_PTR(desc));
+}
+
+regex_t *
+mrb_reg_prepare_re(mrb_state *mrb, mrb_value re, mrb_value str)
+{
+ regex_t *reg = RREGEXP(re)->ptr;
+ onig_errmsg_buffer err = "";
+ int r;
+ OnigErrorInfo einfo;
+ const char *pattern;
+ mrb_value unescaped;
+ mrb_encoding *fixed_enc = 0;
+ mrb_encoding *enc = mrb_reg_prepare_enc(mrb, re, str, 1);
+
+ if (reg->enc == enc) return reg;
+
+ mrb_reg_check(mrb, re);
+ reg = RREGEXP(re)->ptr;
+ pattern = RREGEXP_SRC_PTR(re);
+
+ unescaped = mrb_reg_preprocess(mrb,
+ pattern, pattern + RREGEXP_SRC_LEN(re), enc,
+ &fixed_enc, err);
+
+ if (mrb_nil_p(unescaped)) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "regexp preprocess failed: %s", err);
+ }
+
+ r = onig_new(&reg, (UChar* )RSTRING_PTR(unescaped),
+ (UChar* )(RSTRING_PTR(unescaped) + RSTRING_LEN(unescaped)),
+ reg->options, enc,
+ OnigDefaultSyntax, &einfo);
+ if (r) {
+ onig_error_code_to_str((UChar*)err, r, &einfo);
+ mrb_reg_raise(mrb, pattern, RREGEXP_SRC_LEN(re), err, re);
+ }
+
+ //RB_GC_GUARD(unescaped);
+ return reg;
+}
+
+
+mrb_int
+mrb_reg_search(mrb_state *mrb, mrb_value re, mrb_value str, mrb_int pos, mrb_int reverse)
+{
+ long result;
+ mrb_value match;
+ struct re_registers regi, *regs = &regi;
+ char *range = RSTRING_PTR(str);
+ regex_t *reg;
+ int tmpreg;
+
+ if (pos > RSTRING_LEN(str) || pos < 0) {
+ mrb_backref_set(mrb, mrb_nil_value());
+ return -1;
+ }
+
+ reg = mrb_reg_prepare_re(mrb, re, str);
+ tmpreg = reg != RREGEXP(re)->ptr;
+ if (!tmpreg) RREGEXP(re)->usecnt++;
+
+ match = mrb_backref_get(mrb);
+ if (!mrb_nil_p(match)) {
+ /*if (FL_TEST(match, MATCH_BUSY)) {
+ match = Qnil;
+ }
+ else {
+ regs = RMATCH_REGS(match);
+ }*/
+ regs = RMATCH_REGS(match);
+ }
+ if (mrb_nil_p(match)) {
+ memset(regs, 0, sizeof(struct re_registers));
+ }
+//-->
+ if (!reverse) {
+ range += RSTRING_LEN(str);
+ }
+ result = onig_search(reg,
+ (UChar*)(RSTRING_PTR(str)),
+ ((UChar*)(RSTRING_PTR(str)) + RSTRING_LEN(str)),
+ ((UChar*)(RSTRING_PTR(str)) + pos),
+ ((UChar*)range),
+ regs, ONIG_OPTION_NONE);
+ if (!tmpreg) RREGEXP(re)->usecnt--;
+ if (tmpreg) {
+ if (RREGEXP(re)->usecnt) {
+ onig_free(reg);
+ }
+ else {
+ onig_free(RREGEXP(re)->ptr);
+ RREGEXP(re)->ptr = reg;
+ }
+ }
+ if (result < 0) {
+ if (regs == &regi)
+ onig_region_free(regs, 0);
+ if (result == ONIG_MISMATCH) {
+ mrb_backref_set(mrb, mrb_nil_value());
+ return result;
+ }
+ else {
+ onig_errmsg_buffer err = "";
+ onig_error_code_to_str((UChar*)err, (int)result);
+ mrb_reg_raise(mrb, RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), err, re);
+ }
+ }
+//--<
+ if (mrb_nil_p(match) ) {
+ match = match_alloc(mrb);
+ onig_region_copy(RMATCH_REGS(match), regs);
+ onig_region_free(regs, 0);
+ }
+ /*else {
+ if (mrb_safe_level() >= 3)
+ OBJ_TAINT(match);
+ else
+ FL_UNSET(match, FL_TAINT);
+ }*/
+
+ RMATCH(match)->str = str_new4(mrb, str.tt, str);
+ RMATCH(match)->regexp = re;
+ RMATCH(match)->rmatch->char_offset_updated = 0;
+ mrb_backref_set(mrb, match);
+
+ //OBJ_INFECT(match, re);
+ //OBJ_INFECT(match, str);
+
+ return result;
+}
+
+mrb_int
+mrb_reg_adjust_startpos(mrb_state *mrb, mrb_value re, mrb_value str, mrb_int pos, mrb_int reverse)
+{
+ mrb_int range;
+ struct RString *s = mrb_str_ptr(str);
+ struct RRegexp *r = mrb_regex_ptr(re);
+
+ mrb_reg_check(mrb, re);
+ /*if (may_need_recompile) mrb_reg_prepare_re(re);*/
+
+ /* if (FL_TEST(re, KCODE_FIXED))
+ mrb_kcode_set_option(re);
+ else if (reg_kcode != curr_kcode)
+ mrb_kcode_reset_option(); */
+
+ if (reverse) {
+ range = -pos;
+ }
+ else {
+ range = s->len - pos;
+ }
+ return re_adjust_startpos(r->ptr,
+ s->buf, s->len,
+ pos, range);
+}
+
+static int
+onig_new_with_source(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
+ OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
+ OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
+{
+ int r;
+
+ *reg = (regex_t* )malloc/*xmalloc*/(sizeof(regex_t));
+ if ((void*)(*reg) == (void*)0) return ONIGERR_MEMORY;
+
+ r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
+ if (r) goto err;
+ r = onig_compile(*reg, pattern, pattern_end, einfo, sourcefile, sourceline);
+ if (r) {
+ err:
+ onig_free(*reg);
+ *reg = 0/*NULL*/;
+ }
+ return r;
+}
+
+static Regexp*
+make_regexp(const char *s, long len, mrb_encoding *enc, int flags, onig_errmsg_buffer err,
+ const char *sourcefile, int sourceline)
+{
+ Regexp *rp;
+ int r;
+ OnigErrorInfo einfo;
+
+ /* Handle escaped characters first. */
+
+ /* Build a copy of the string (in dest) with the
+ escaped characters translated, and generate the regex
+ from that.
+ */
+
+ r = onig_new_with_source(&rp, (UChar*)s, (UChar*)(s + len), flags,
+ enc, OnigDefaultSyntax, &einfo, sourcefile, sourceline);
+ if (r) {
+ onig_error_code_to_str((UChar*)err, r, &einfo);
+ return 0;
+ }
+ return rp;
+}
+
+unsigned long
+ruby_scan_hex(const char *start, size_t len, size_t *retlen)
+{
+ static const char hexdigit[] = "0123456789abcdef0123456789ABCDEF";
+ register const char *s = start;
+ register unsigned long retval = 0;
+ const char *tmp;
+
+ while (len-- && *s && (tmp = strchr(hexdigit, *s))) {
+ retval <<= 4;
+ retval |= (tmp - hexdigit) & 15;
+ s++;
+ }
+ *retlen = (int)(s - start); /* less than len */
+ return retval;
+}
+
+static int
+check_unicode_range(unsigned long code, onig_errmsg_buffer err)
+{
+ if ((0xd800 <= code && code <= 0xdfff) || /* Surrogates */
+ 0x10ffff < code) {
+ //errcpy(err, "invalid Unicode range");
+ printf("invalid Unicode range");
+ return -1;
+ }
+ return 0;
+}
+
+#define BYTEWIDTH 8
+
+int
+mrb_uv_to_utf8(mrb_state *mrb, char buf[6], unsigned long uv)
+{
+ if (uv <= 0x7f) {
+ buf[0] = (char)uv;
+ return 1;
+ }
+ if (uv <= 0x7ff) {
+ buf[0] = (char)((uv>>6)&0xff)|0xc0;
+ buf[1] = (char)(uv&0x3f)|0x80;
+ return 2;
+ }
+ if (uv <= 0xffff) {
+ buf[0] = (char)((uv>>12)&0xff)|0xe0;
+ buf[1] = (char)((uv>>6)&0x3f)|0x80;
+ buf[2] = (char)(uv&0x3f)|0x80;
+ return 3;
+ }
+ if (uv <= 0x1fffff) {
+ buf[0] = (char)((uv>>18)&0xff)|0xf0;
+ buf[1] = (char)((uv>>12)&0x3f)|0x80;
+ buf[2] = (char)((uv>>6)&0x3f)|0x80;
+ buf[3] = (char)(uv&0x3f)|0x80;
+ return 4;
+ }
+ if (uv <= 0x3ffffff) {
+ buf[0] = (char)((uv>>24)&0xff)|0xf8;
+ buf[1] = (char)((uv>>18)&0x3f)|0x80;
+ buf[2] = (char)((uv>>12)&0x3f)|0x80;
+ buf[3] = (char)((uv>>6)&0x3f)|0x80;
+ buf[4] = (char)(uv&0x3f)|0x80;
+ return 5;
+ }
+ if (uv <= 0x7fffffff) {
+ buf[0] = (char)((uv>>30)&0xff)|0xfc;
+ buf[1] = (char)((uv>>24)&0x3f)|0x80;
+ buf[2] = (char)((uv>>18)&0x3f)|0x80;
+ buf[3] = (char)((uv>>12)&0x3f)|0x80;
+ buf[4] = (char)((uv>>6)&0x3f)|0x80;
+ buf[5] = (char)(uv&0x3f)|0x80;
+ return 6;
+ }
+ mrb_raise(mrb, E_RANGE_ERROR, "pack(U): value out of range");
+ return 0;
+}
+
+static int
+append_utf8(mrb_state *mrb, unsigned long uv,
+ mrb_value buf, mrb_encoding **encp, onig_errmsg_buffer err)
+{
+ if (check_unicode_range(uv, err) != 0)
+ return -1;
+ if (uv < 0x80) {
+ char escbuf[5];
+ snprintf(escbuf, sizeof(escbuf), "\\x%02X", (int)uv);
+ mrb_str_buf_cat(mrb, buf, escbuf, 4);
+ }
+ else {
+ int len;
+ char utf8buf[6];
+ len = mrb_uv_to_utf8(mrb, utf8buf, uv);
+ mrb_str_buf_cat(mrb, buf, utf8buf, len);
+
+ if (*encp == 0)
+ *encp = mrb_utf8_encoding(mrb);
+ else if (*encp != mrb_utf8_encoding(mrb)) {
+ //errcpy(err, "UTF-8 character in non UTF-8 regexp");
+ printf("UTF-8 character in non UTF-8 regexp");
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static int
+unescape_unicode_bmp(mrb_state *mrb, const char **pp, const char *end,
+ mrb_value buf, mrb_encoding **encp, onig_errmsg_buffer err)
+{
+ const char *p = *pp;
+ size_t len;
+ unsigned long code;
+
+ if (end < p+4) {
+ //errcpy(err, "invalid Unicode escape");
+ printf("invalid Unicode escape");
+ return -1;
+ }
+ code = ruby_scan_hex(p, 4, &len);
+ if (len != 4) {
+ //errcpy(err, "invalid Unicode escape");
+ printf("invalid Unicode escape");
+ return -1;
+ }
+ if (append_utf8(mrb, code, buf, encp, err) != 0)
+ return -1;
+ *pp = p + 4;
+ return 0;
+}
+
+unsigned long
+ruby_scan_oct(const char *start, size_t len, size_t *retlen)
+{
+ register const char *s = start;
+ register unsigned long retval = 0;
+
+ while (len-- && *s >= '0' && *s <= '7') {
+ retval <<= 3;
+ retval |= *s++ - '0';
+ }
+ *retlen = (int)(s - start); /* less than len */
+ return retval;
+}
+
+static int
+read_escaped_byte(const char **pp, const char *end, onig_errmsg_buffer err)
+{
+ const char *p = *pp;
+ int code;
+ int meta_prefix = 0, ctrl_prefix = 0;
+ size_t len;
+ int retbyte;
+
+ retbyte = -1;
+ if (p == end || *p++ != '\\') {
+ //errcpy(err, "too short escaped multibyte character");
+ printf("too short escaped multibyte character");
+ return -1;
+ }
+
+again:
+ if (p == end) {
+ //errcpy(err, "too short escape sequence");
+ printf("too short escape sequence");
+ return -1;
+ }
+ switch (*p++) {
+ case '\\': code = '\\'; break;
+ case 'n': code = '\n'; break;
+ case 't': code = '\t'; break;
+ case 'r': code = '\r'; break;
+ case 'f': code = '\f'; break;
+ case 'v': code = '\013'; break;
+ case 'a': code = '\007'; break;
+ case 'e': code = '\033'; break;
+
+ /* \OOO */
+ case '0': case '1': case '2': case '3':
+ case '4': case '5': case '6': case '7':
+ p--;
+ code = scan_oct(p, end < p+3 ? end-p : 3, &len);
+ p += len;
+ break;
+
+ case 'x': /* \xHH */
+ code = scan_hex(p, end < p+2 ? end-p : 2, &len);
+ if (len < 1) {
+ //errcpy(err, "invalid hex escape");
+ printf("invalid hex escape");
+ return -1;
+ }
+ p += len;
+ break;
+
+ case 'M': /* \M-X, \M-\C-X, \M-\cX */
+ if (meta_prefix) {
+ //errcpy(err, "duplicate meta escape");
+ printf("duplicate meta escape");
+ return -1;
+ }
+ meta_prefix = 1;
+ if (p+1 < end && *p++ == '-' && (*p & 0x80) == 0) {
+ if (*p == '\\') {
+ p++;
+ goto again;
+ }
+ else {
+ code = *p++;
+ break;
+ }
+ }
+ //errcpy(err, "too short meta escape");
+ printf("too short meta escape");
+ return -1;
+
+ case 'C': /* \C-X, \C-\M-X */
+ if (p == end || *p++ != '-') {
+ //errcpy(err, "too short control escape");
+ printf("too short control escape");
+ return -1;
+ }
+ case 'c': /* \cX, \c\M-X */
+ if (ctrl_prefix) {
+ //errcpy(err, "duplicate control escape");
+ printf("duplicate control escape");
+ return -1;
+ }
+ ctrl_prefix = 1;
+ if (p < end && (*p & 0x80) == 0) {
+ if (*p == '\\') {
+ p++;
+ goto again;
+ }
+ else {
+ code = *p++;
+ break;
+ }
+ }
+ //errcpy(err, "too short control escape");
+ printf("too short control escape");
+ return -1;
+
+ default:
+ //errcpy(err, "unexpected escape sequence");
+ printf("unexpected escape sequence");
+ return -1;
+ }
+ if (code < 0 || 0xff < code) {
+ //errcpy(err, "invalid escape code");
+ printf("invalid escape code");
+ return -1;
+ }
+
+ if (ctrl_prefix)
+ code &= 0x1f;
+ if (meta_prefix)
+ code |= 0x80;
+
+ *pp = p;
+ return code;
+}
+
+static int
+unescape_escaped_nonascii(mrb_state *mrb, const char **pp, const char *end, mrb_encoding *enc,
+ mrb_value buf, mrb_encoding **encp, onig_errmsg_buffer err)
+{
+ const char *p = *pp;
+ int chmaxlen = mrb_enc_mbmaxlen(enc);
+ //char *chbuf = ALLOCA_N(char, chmaxlen);
+ char *chbuf = mrb_malloc(mrb, chmaxlen);
+ int chlen = 0;
+ int byte;
+ int l;
+
+ memset(chbuf, 0, chmaxlen);
+
+ byte = read_escaped_byte(&p, end, err);
+ if (byte == -1) {
+ return -1;
+ }
+
+ chbuf[chlen++] = byte;
+ while (chlen < chmaxlen &&
+ MBCLEN_NEEDMORE_P(mrb_enc_precise_mbclen(chbuf, chbuf+chlen, enc))) {
+ byte = read_escaped_byte(&p, end, err);
+ if (byte == -1) {
+ return -1;
+ }
+ chbuf[chlen++] = byte;
+ }
+
+ l = mrb_enc_precise_mbclen(chbuf, chbuf+chlen, enc);
+ if (MBCLEN_INVALID_P(l)) {
+ //errcpy(err, "invalid multibyte escape");
+ printf("invalid multibyte escape");
+ return -1;
+ }
+ if (1 < chlen || (chbuf[0] & 0x80)) {
+ mrb_str_buf_cat(mrb, buf, chbuf, chlen);
+
+ if (*encp == 0)
+ *encp = enc;
+ else if (*encp != enc) {
+ //errcpy(err, "escaped non ASCII character in UTF-8 regexp");
+ printf("escaped non ASCII character in UTF-8 regexp");
+ return -1;
+ }
+ }
+ else {
+ char escbuf[5];
+ snprintf(escbuf, sizeof(escbuf), "\\x%02X", chbuf[0]&0xff);
+ mrb_str_buf_cat(mrb, buf, escbuf, 4);
+ }
+ *pp = p;
+ return 0;
+}
+
+static int
+unescape_unicode_list(mrb_state *mrb, const char **pp, const char *end,
+ mrb_value buf, mrb_encoding **encp, onig_errmsg_buffer err)
+{
+ const char *p = *pp;
+ int has_unicode = 0;
+ unsigned long code;
+ size_t len;
+
+ while (p < end && ISSPACE(*p)) p++;
+
+ while (1) {
+ code = ruby_scan_hex(p, end-p, &len);
+ if (len == 0)
+ break;
+ if (6 < len) { /* max 10FFFF */
+ //errcpy(err, "invalid Unicode range");
+ printf("invalid Unicode range");
+ return -1;
+ }
+ p += len;
+ if (append_utf8(mrb, code, buf, encp, err) != 0)
+ return -1;
+ has_unicode = 1;
+
+ while (p < end && ISSPACE(*p)) p++;
+ }
+
+ if (has_unicode == 0) {
+ //errcpy(err, "invalid Unicode list");
+ printf("invalid Unicode list");
+ return -1;
+ }
+
+ *pp = p;
+
+ return 0;
+}
+
+static int
+unescape_nonascii(mrb_state *mrb, const char *p, const char *end, mrb_encoding *enc,
+ mrb_value buf, mrb_encoding **encp, int *has_property,
+ onig_errmsg_buffer err)
+{
+ char c;
+ char smallbuf[2];
+
+ while (p < end) {
+ int chlen = mrb_enc_precise_mbclen(p, end, enc);
+ if (!MBCLEN_CHARFOUND_P(chlen)) {
+ //errcpy(err, "invalid multibyte character");
+ printf("invalid multibyte character");
+ return -1;
+ }
+ chlen = MBCLEN_CHARFOUND_LEN(chlen);
+ if (1 < chlen || (*p & 0x80)) {
+ mrb_str_buf_cat(mrb, buf, p, chlen);
+ p += chlen;
+ if (*encp == 0)
+ *encp = enc;
+ else if (*encp != enc) {
+ //errcpy(err, "non ASCII character in UTF-8 regexp");
+ printf("non ASCII character in UTF-8 regexp");
+ return -1;
+ }
+ continue;
+ }
+
+ switch (c = *p++) {
+ case '\\':
+ if (p == end) {
+ //errcpy(err, "too short escape sequence");
+ printf("too short escape sequence");
+ return -1;
+ }
+ switch (c = *p++) {
+ case '1': case '2': case '3':
+ case '4': case '5': case '6': case '7': /* \O, \OO, \OOO or backref */
+ {
+ size_t octlen;
+ if (ruby_scan_oct(p-1, end-(p-1), &octlen) <= 0177) {
+ /* backref or 7bit octal.
+ no need to unescape anyway.
+ re-escaping may break backref */
+ goto escape_asis;
+ }
+ }
+ /* xxx: How about more than 199 subexpressions? */
+
+ case '0': /* \0, \0O, \0OO */
+
+ case 'x': /* \xHH */
+ case 'c': /* \cX, \c\M-X */
+ case 'C': /* \C-X, \C-\M-X */
+ case 'M': /* \M-X, \M-\C-X, \M-\cX */
+ p = p-2;
+ if (unescape_escaped_nonascii(mrb, &p, end, enc, buf, encp, err) != 0)
+ return -1;
+ break;
+
+ case 'u':
+ if (p == end) {
+ //errcpy(err, "too short escape sequence");
+ printf("too short escape sequence");
+ return -1;
+ }
+ if (*p == '{') {
+ /* \u{H HH HHH HHHH HHHHH HHHHHH ...} */
+ p++;
+ if (unescape_unicode_list(mrb, &p, end, buf, encp, err) != 0)
+ return -1;
+ if (p == end || *p++ != '}') {
+ //errcpy(err, "invalid Unicode list");
+ printf("invalid Unicode list");
+ return -1;
+ }
+ break;
+ }
+ else {
+ /* \uHHHH */
+ if (unescape_unicode_bmp(mrb, &p, end, buf, encp, err) != 0)
+ return -1;
+ break;
+ }
+
+ case 'p': /* \p{Hiragana} */
+ case 'P':
+ if (!*encp) {
+ *has_property = 1;
+ }
+ goto escape_asis;
+
+ default: /* \n, \\, \d, \9, etc. */
+escape_asis:
+ smallbuf[0] = '\\';
+ smallbuf[1] = c;
+ mrb_str_buf_cat(mrb, buf, smallbuf, 2);
+ break;
+ }
+ break;
+
+ default:
+ mrb_str_buf_cat(mrb, buf, &c, 1);
+ break;
+ }
+ }
+
+ return 0;
+}
+
+
+static mrb_value
+mrb_reg_preprocess(mrb_state *mrb, const char *p, const char *end, mrb_encoding *enc,
+ mrb_encoding **fixed_enc, onig_errmsg_buffer err)
+{
+ mrb_value buf;
+ int has_property = 0;
+
+ //buf = mrb_str_buf_new(0);
+ buf = mrb_str_buf_new(mrb, 0);
+
+ if (mrb_enc_asciicompat(mrb, enc))
+ *fixed_enc = 0;
+ else {
+ *fixed_enc = enc;
+ mrb_enc_associate(mrb, buf, enc);
+ }
+
+ if (unescape_nonascii(mrb, p, end, enc, buf, fixed_enc, &has_property, err) != 0)
+ return mrb_nil_value();
+
+ if (has_property && !*fixed_enc) {
+ *fixed_enc = enc;
+ }
+
+ if (*fixed_enc) {
+ mrb_enc_associate(mrb, buf, *fixed_enc);
+ }
+
+ return buf;
+}
+
+static int
+mrb_reg_initialize(mrb_state *mrb, mrb_value obj, const char *s, long len, mrb_encoding *enc,
+ int options, onig_errmsg_buffer err,
+ const char *sourcefile, int sourceline)
+{
+ struct RRegexp *re = RREGEXP(obj);
+ mrb_value unescaped;
+ mrb_encoding *fixed_enc = 0;
+ mrb_encoding *a_enc = mrb_ascii8bit_encoding(mrb);
+ if (re->ptr)
+ mrb_raise(mrb, E_TYPE_ERROR, "already initialized regexp");
+ re->ptr = 0;
+
+ if (mrb_enc_dummy_p(enc)) {
+ //errcpy(err, "can't make regexp with dummy encoding");
+ printf("can't make regexp with dummy encoding");
+ return -1;
+ }
+
+ unescaped = mrb_reg_preprocess(mrb, s, s+len, enc, &fixed_enc, err);
+ if (mrb_nil_p(unescaped))
+ return -1;
+
+ if (fixed_enc) {
+ if ((fixed_enc != enc && (options & ARG_ENCODING_FIXED)) ||
+ (fixed_enc != a_enc && (options & ARG_ENCODING_NONE))) {
+ //errcpy(err, "incompatible character encoding");
+ printf("incompatible character encoding");
+ return -1;
+ }
+ if (fixed_enc != a_enc) {
+ options |= ARG_ENCODING_FIXED;
+ enc = fixed_enc;
+ }
+ }
+ else if (!(options & ARG_ENCODING_FIXED)) {
+ enc = mrb_usascii_encoding(mrb);
+ }
+
+ mrb_enc_associate(mrb, mrb_obj_value(re), enc);
+ if ((options & ARG_ENCODING_FIXED) || fixed_enc) {
+ //re->basic.flags |= KCODE_FIXED;
+ re->flags|= KCODE_FIXED;
+ }
+ if (options & ARG_ENCODING_NONE) {
+ re->flags |= REG_ENCODING_NONE;
+ }
+
+ re->ptr = make_regexp(RSTRING_PTR(unescaped), RSTRING_LEN(unescaped), enc,
+ options & ARG_REG_OPTION_MASK, err,
+ sourcefile, sourceline);
+ if (!re->ptr) return -1;
+ re->src = mrb_enc_str_new(mrb, s, len, enc);
+
+ /*OBJ_FREEZE(re->src);
+ RB_GC_GUARD(unescaped);*/
+ return 0;
+}
+
+static int
+mrb_reg_initialize_str(mrb_state *mrb, mrb_value obj, mrb_value str, int options, onig_errmsg_buffer err,
+ const char *sourcefile, int sourceline)
+{
+ int ret;
+ mrb_encoding *enc = mrb_enc_get(mrb, str);
+
+ if (options & ARG_ENCODING_NONE) {
+ mrb_encoding *ascii8bit = mrb_ascii8bit_encoding(mrb);
+ if (enc != ascii8bit) {
+ if (mrb_enc_str_coderange(mrb, str) != ENC_CODERANGE_7BIT) {
+ //errcpy(err, "/.../n has a non escaped non ASCII character in non ASCII-8BIT script");
+ printf("/.../n has a non escaped non ASCII character in non ASCII-8BIT script");
+ return -1;
+ }
+ enc = ascii8bit;
+ }
+ }
+
+ ret = mrb_reg_initialize(mrb, obj, RSTRING_PTR(str), RSTRING_LEN(str), enc,
+ options, err, sourcefile, sourceline);
+ /*OBJ_INFECT(obj, str);
+ RB_GC_GUARD(str);*/
+ return ret;
+}
+
+/* 15.2.15.7.1 */
+/*
+ * call-seq:
+ * Regexp.initialize(string, [options [, lang]]) -> regexp
+ * Regexp.initialize(regexp) -> regexp
+ *
+ * Constructs a new regular expression from <i>pattern</i>, which can be either
+ * a <code>String</code> or a <code>Regexp</code> (in which case that regexp's
+ * options are propagated, and new options may not be specified (a change as of
+ * Ruby 1.8). If <i>options</i> is a <code>Fixnum</code>, it should be one or
+ * more of the constants <code>Regexp::EXTENDED</code>,
+ * <code>Regexp::IGNORECASE</code>, and <code>Regexp::MULTILINE</code>,
+ * <em>or</em>-ed together. Otherwise, if <i>options</i> is not
+ * <code>nil</code>, the regexp will be case insensitive.
+ * When the <i>lang</i> parameter is `n' or `N' sets the regexp no encoding.
+ *
+ * r1 = Regexp.initialize('^a-z+:\\s+\w+') #=> /^a-z+:\s+\w+/
+ * r2 = Regexp.initialize('cat', true) #=> /cat/i
+ * r3 = Regexp.initialize('dog', Regexp::EXTENDED) #=> /dog/x
+ * r4 = Regexp.initialize(r2) #=> /cat/i
+ */
+
+static mrb_value
+mrb_reg_initialize_m(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value self)
+{
+ mrb_value argv[16];
+ int argc;
+ onig_errmsg_buffer err = "";
+ int flags = 0;
+ mrb_value str;
+ mrb_encoding *enc;
+ const char *ptr;
+ long len;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+ if (argc == 0 || argc > 3) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%d for 1..3)", argc);
+ }
+ if (mrb_type(argv[0]) == MRB_TT_REGEX) {
+ mrb_value re = argv[0];
+
+ if (argc > 1) {
+ /* mrb_warn("flags ignored"); */
+ printf("flags ignored");
+ }
+ mrb_reg_check(mrb, re);
+ flags = mrb_reg_options(mrb, re);
+ ptr = RREGEXP_SRC_PTR(re);
+ len = RREGEXP_SRC_LEN(re);
+ enc = mrb_enc_get(mrb, re);
+ if (mrb_reg_initialize(mrb, self, ptr, len, enc, flags, err, NULL, 0)) {
+ /*str = mrb_enc_str_new(mrb, ptr, len, enc);
+ mrb_reg_raise_str(str, flags, err);*/
+ printf("mrb_reg_raise_str(str, flags, err);");
+ }
+ }
+ else {
+ if (argc >= 2) {
+ if (mrb_type(argv[1]) == MRB_TT_FIXNUM) flags = mrb_fixnum(argv[1]);
+ else if (mrb_test(argv[1])) flags = ONIG_OPTION_IGNORECASE;
+ }
+ enc = 0;
+ if (argc == 3 && !mrb_nil_p(argv[2])) {
+ //char *kcode = StringValuePtr(argv[2]);
+ char *kcode = mrb_string_value_ptr(mrb, argv[2]);
+ if (kcode[0] == 'n' || kcode[0] == 'N') {
+ enc = mrb_ascii8bit_encoding(mrb);
+ flags |= ARG_ENCODING_NONE;
+ }
+ else {
+ /*mrb_warn("encoding option is ignored - %s", kcode); */
+ printf("mrb_warn:encoding option is ignored - %s", kcode);
+ }
+ }
+ str = argv[0];
+ //ptr = StringValuePtr(str);
+ ptr = mrb_string_value_ptr(mrb, str);
+ if (enc
+ ? mrb_reg_initialize(mrb, self, ptr, RSTRING_LEN(str), enc, flags, err, NULL, 0)
+ : mrb_reg_initialize_str(mrb, self, str, flags, err, NULL, 0)) {
+ //mrb_reg_raise_str(str, flags, err);
+ }
+ }
+ return self;
+}
+
+/* 15.2.15.7.2 */
+/* :nodoc: */
+static mrb_value
+mrb_reg_init_copy(mrb_state *mrb, mrb_value re/*, mrb_value copy*/)
+{
+ mrb_value argv[16];
+ int argc;
+ onig_errmsg_buffer err = "";
+ const char *s;
+ long len;
+ mrb_value copy;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+ copy = argv[0];
+ if (mrb_obj_equal(mrb, copy, re)) return copy;
+ /*mrb_check_frozen(copy);*/
+ /* need better argument type check */
+ if (!mrb_obj_is_instance_of(mrb, re, mrb_obj_class(mrb, copy))) {
+ mrb_raise(mrb, E_TYPE_ERROR, "wrong argument type");
+ }
+ mrb_reg_check(mrb, copy);
+ s = RREGEXP_SRC_PTR(copy);
+ len = RREGEXP_SRC_LEN(copy);
+ if (mrb_reg_initialize(mrb, re, s, len, mrb_enc_get(mrb, copy), mrb_reg_options(mrb, copy),
+ err, 0/*NULL*/, 0) != 0) {
+ mrb_reg_raise(mrb, s, len, err, re);
+ }
+ return re;
+}
+
+static int
+mrb_reg_equal(mrb_state *mrb, mrb_value re1, mrb_value re2)
+{
+ if (mrb_obj_equal(mrb, re1, re2)) return TRUE;
+
+ if (mrb_type(re2) != MRB_TT_REGEX) return FALSE;
+ mrb_reg_check(mrb, re1);
+ mrb_reg_check(mrb, re2);
+ /*if (FL_TEST(re1, KCODE_FIXED) != FL_TEST(re2, KCODE_FIXED)) return Qfalse; */
+ if (RREGEXP(re1)->ptr->options != RREGEXP(re2)->ptr->options) return FALSE;
+ if (RREGEXP_SRC_LEN(re1) != RREGEXP_SRC_LEN(re2)) return FALSE;
+ /*if (ENCODING_GET(re1) != ENCODING_GET(re2)) return mrb_false_value();*/
+ if (memcmp(RREGEXP_SRC_PTR(re1), RREGEXP_SRC_PTR(re2), RREGEXP_SRC_LEN(re1)) == 0) {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+/* 15.2.15.7.3 */
+/*
+ * call-seq:
+ * rxp == other_rxp -> true or false
+ * rxp.eql?(other_rxp) -> true or false
+ *
+ * Equality---Two regexps are equal if their patterns are identical, they have
+ * the same character set code, and their <code>casefold?</code> values are the
+ * same.
+ *
+ * /abc/ == /abc/x #=> false
+ * /abc/ == /abc/i #=> false
+ * /abc/ == /abc/n #=> false
+ * /abc/u == /abc/n #=> false
+ */
+
+static mrb_value
+mrb_reg_equal_m(mrb_state *mrb, mrb_value re1/*, mrb_value re2*/)
+{
+ mrb_value re2;
+
+ mrb_get_args(mrb, "o", &re2);
+ if (mrb_reg_equal(mrb, re1, re2))
+ return mrb_true_value();
+ return mrb_false_value();
+}
+
+/* 15.2.15.7.4 */
+/*
+ * call-seq:
+ * rxp === str -> true or false
+ *
+ * Case Equality---Synonym for <code>Regexp#=~</code> used in case statements.
+ *
+ * a = "HELLO"
+ * case a
+ * when /^[a-z]*$/; print "Lower case\n"
+ * when /^[A-Z]*$/; print "Upper case\n"
+ * else; print "Mixed case\n"
+ * end
+ *
+ * <em>produces:</em>
+ *
+ * Upper case
+ */
+
+mrb_value
+mrb_reg_eqq(mrb_state *mrb, mrb_value re/*, mrb_value str*/)
+{
+ long start;
+ mrb_value str;
+
+ mrb_get_args(mrb, "o", &str);
+ str = reg_operand(mrb, str, 0/*FALSE*/);
+ if (mrb_nil_p(str)) {
+ mrb_backref_set(mrb, mrb_nil_value());
+ return mrb_false_value();
+ }
+ start = mrb_reg_search(mrb, re, str, 0, 0);
+ if (start < 0) {
+ return mrb_false_value();
+ }
+ return mrb_true_value();
+}
+
+static long
+reg_match_pos(mrb_state *mrb, mrb_value re, mrb_value *strp, long pos)
+{
+ mrb_value str = *strp;
+
+ if (mrb_nil_p(str)) {
+ mrb_backref_set(mrb, mrb_nil_value());
+ return -1;
+ }
+ *strp = str = reg_operand(mrb, str, 1/*TRUE*/);
+ if (pos != 0) {
+ if (pos < 0) {
+ mrb_value l = mrb_str_size(mrb, str);
+ pos += mrb_fixnum(l);
+ if (pos < 0) {
+ return pos;
+ }
+ }
+ pos = mrb_str_offset(mrb, str, pos);
+ }
+ return mrb_reg_search(mrb, re, str, pos, 0);
+}
+
+mrb_value
+mrb_reg_match_str(mrb_state *mrb, mrb_value re, mrb_value str)
+{
+ mrb_int pos = reg_match_pos(mrb, re, &str, 0);
+ if (pos < 0) return mrb_nil_value();
+ pos = mrb_str_sublen(mrb, str, pos);
+ return mrb_fixnum_value(pos);
+}
+/* 15.2.15.7.5 */
+/*
+ * call-seq:
+ * rxp =~ str -> integer or nil
+ *
+ * Match---Matches <i>rxp</i> against <i>str</i>.
+ *
+ * /at/ =~ "input data" #=> 7
+ * /ax/ =~ "input data" #=> nil
+ *
+ * If <code>=~</code> is used with a regexp literal with named captures,
+ * captured strings (or nil) is assigned to local variables named by
+ * the capture names.
+ *
+ * /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ " x = y "
+ * p lhs #=> "x"
+ * p rhs #=> "y"
+ *
+ * If it is not matched, nil is assigned for the variables.
+ *
+ * /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ " x = "
+ * p lhs #=> nil
+ * p rhs #=> nil
+ *
+ * This assignment is implemented in the Ruby parser.
+ * The parser detects 'regexp-literal =~ expression' for the assignment.
+ * The regexp must be a literal without interpolation and placed at left hand side.
+ *
+ * The assignment is not occur if the regexp is not a literal.
+ *
+ * re = /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/
+ * re =~ " x = y "
+ * p lhs # undefined local variable
+ * p rhs # undefined local variable
+ *
+ * A regexp interpolation, <code>#{}</code>, also disables
+ * the assignment.
+ *
+ * rhs_pat = /(?<rhs>\w+)/
+ * /(?<lhs>\w+)\s*=\s*#{rhs_pat}/ =~ "x = y"
+ * p lhs # undefined local variable
+ *
+ * The assignment is not occur if the regexp is placed at right hand side.
+ *
+ * " x = y " =~ /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/
+ * p lhs, rhs # undefined local variable
+ *
+ */
+mrb_value
+mrb_reg_match(mrb_state *mrb, mrb_value re/*, mrb_value str*/)
+{
+ mrb_value str;
+ mrb_get_args(mrb, "o", &str);
+ return mrb_reg_match_str(mrb, re, str);
+}
+
+/* 15.2.15.7.6 */
+/*
+ * call-seq:
+ * rxp.casefold? -> true or false
+ *
+ * Returns the value of the case-insensitive flag.
+ *
+ * /a/.casefold? #=> false
+ * /a/i.casefold? #=> true
+ * /(?i:a)/.casefold? #=> false
+ */
+
+static mrb_value
+mrb_reg_casefold_p(mrb_state *mrb, mrb_value re)
+{
+ mrb_reg_check(mrb, re);
+ if (RREGEXP(re)->ptr->options & ONIG_OPTION_IGNORECASE) return mrb_true_value();
+ return mrb_false_value();
+}
+
+/* 15.2.15.7.7 */
+/*
+ * call-seq:
+ * rxp.match(str) -> matchdata or nil
+ * rxp.match(str,pos) -> matchdata or nil
+ *
+ * Returns a <code>MatchData</code> object describing the match, or
+ * <code>nil</code> if there was no match. This is equivalent to retrieving the
+ * value of the special variable <code>$~</code> following a normal match.
+ * If the second parameter is present, it specifies the position in the string
+ * to begin the search.
+ *
+ * /(.)(.)(.)/.match("abc")[2] #=> "b"
+ * /(.)(.)/.match("abc", 1)[2] #=> "c"
+ *
+ * If a block is given, invoke the block with MatchData if match succeed, so
+ * that you can write
+ *
+ * pat.match(str) {|m| ...}
+ *
+ * instead of
+ *
+ * if m = pat.match(str)
+ * ...
+ * end
+ *
+ * The return value is a value from block execution in this case.
+ */
+
+static mrb_value
+mrb_reg_match_m(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value re)
+{
+ mrb_value argv[16];
+ int argc;
+ mrb_value result, str, initpos, b;
+ long pos;
+
+ //if (mrb_scan_args(argc, argv, "11", &str, &initpos) == 2) {
+ mrb_get_args(mrb, "&*", &b, &argv, &argc);
+ if (argc == 2) {
+ initpos = argv[1];
+ pos = mrb_fixnum(initpos);
+ }
+ else {
+ pos = 0;
+ }
+ str = argv[0];
+ pos = reg_match_pos(mrb, re, &str, pos);
+ if (pos < 0) {
+ mrb_backref_set(mrb, mrb_nil_value());
+ return mrb_nil_value();
+ }
+ result = mrb_backref_get(mrb);
+ /*mrb_match_busy(result);*/
+ if (!mrb_nil_p(result) && mrb_block_given_p()) {
+ return mrb_yield(mrb, result, b);
+ }
+ return result;
+}
+
+/* 15.2.15.7.8 */
+
+/*
+ * call-seq:
+ * rxp.source -> str
+ *
+ * Returns the original string of the pattern.
+ *
+ * /ab+c/ix.source #=> "ab+c"
+ *
+ * Note that escape sequences are retained as is.
+ *
+ * /\x20\+/.source #=> "\\x20\\+"
+ *
+ */
+
+static mrb_value
+mrb_reg_source(mrb_state *mrb, mrb_value re)
+{
+ mrb_value str;
+
+ mrb_reg_check(mrb, re);
+ str = mrb_enc_str_new(mrb, RREGEXP_SRC_PTR(re),RREGEXP_SRC_LEN(re), mrb_enc_get(mrb, re));
+ /*if (OBJ_TAINTED(re)) OBJ_TAINT(str);*/
+ return str;
+}
+
+static int
+name_to_backref_number(mrb_state *mrb, struct re_registers *regs, mrb_value regexp, const char* name, const char* name_end)
+{
+ int num;
+
+ num = onig_name_to_backref_number(RREGEXP(regexp)->ptr,
+ (const unsigned char* )name, (const unsigned char* )name_end, regs);
+ if (num >= 1) {
+ return num;
+ }
+ else {
+ mrb_value s = mrb_str_new(mrb, name, (long )(name_end - name));//mrb_str_new(name, (long )(name_end - name));
+ mrb_raise(mrb, E_INDEX_ERROR, "undefined group name reference: %s",
+ mrb_string_value_ptr(mrb, s));
+ return num; /* not reach */
+ }
+}
+
+/*
+ * Document-class: MatchData
+ *
+ * <code>MatchData</code> is the type of the special variable <code>$~</code>,
+ * and is the type of the object returned by <code>Regexp#match</code> and
+ * <code>Regexp.last_match</code>. It encapsulates all the results of a pattern
+ * match, results normally accessed through the special variables
+ * <code>$&</code>, <code>$'</code>, <code>$`</code>, <code>$1</code>,
+ * <code>$2</code>, and so on.
+ *
+ */
+
+mrb_value
+match_alloc(mrb_state *mrb)
+{
+ struct RMatch* m;
+
+ m = mrb_obj_alloc(mrb, MRB_TT_MATCH, mrb->match_class);
+ // NEWOBJ(match, struct RMatch);
+ // OBJSETUP(match, klass, T_MATCH);
+
+ m->str = mrb_nil_value();
+ m->rmatch = 0;
+ m->regexp = mrb_nil_value();
+ m->rmatch = mrb_malloc(mrb, sizeof(struct rmatch));//ALLOC(struct rmatch);
+ memset(m->rmatch, 0, sizeof(struct rmatch));
+
+ return mrb_obj_value(m);
+}
+
+/* ------------------------------------------------------------------------- */
+/* MatchData Class */
+/* ------------------------------------------------------------------------- */
+/* 15.2.16.3.1 */
+/*
+ * call-seq:
+ * mtch[i] -> str or nil
+ * mtch[start, length] -> array
+ * mtch[range] -> array
+ * mtch[name] -> str or nil
+ *
+ * Match Reference---<code>MatchData</code> acts as an array, and may be
+ * accessed using the normal array indexing techniques. <i>mtch</i>[0] is
+ * equivalent to the special variable <code>$&</code>, and returns the entire
+ * matched string. <i>mtch</i>[1], <i>mtch</i>[2], and so on return the values
+ * of the matched backreferences (portions of the pattern between parentheses).
+ *
+ * m = /(.)(.)(\d+)(\d)/.match("THX1138.")
+ * m #=> #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8">
+ * m[0] #=> "HX1138"
+ * m[1, 2] #=> ["H", "X"]
+ * m[1..3] #=> ["H", "X", "113"]
+ * m[-3, 2] #=> ["X", "113"]
+ *
+ * m = /(?<foo>a+)b/.match("ccaaab")
+ * m #=> #<MatchData "aaab" foo:"aaa">
+ * m["foo"] #=> "aaa"
+ * m[:foo] #=> "aaa"
+ */
+
+static mrb_value
+mrb_match_aref(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value match)
+{
+ mrb_value argv[16];
+ int argc;
+ mrb_value idx, rest;
+
+ match_check(mrb, match);
+ //mrb_scan_args(argc, argv, "11", &idx, &rest);
+ mrb_get_args(mrb, "*", &argv, &argc);
+ idx = argv[0];
+ rest = argv[1];
+ if (argc<2) {
+ if (mrb_type(idx) == MRB_TT_FIXNUM) {
+ if (mrb_fixnum(idx) >= 0) {
+ return mrb_reg_nth_match(mrb, mrb_fixnum(idx), match);
+ }
+ }
+ else {
+ const char *p;
+ int num;
+
+ switch (mrb_type(idx)) {
+ case MRB_TT_SYMBOL:
+ //p = mrb_id2name(SYM2ID(idx));
+ p = mrb_sym2name(mrb, SYM2ID(idx));
+ goto name_to_backref;
+ break;
+ case MRB_TT_STRING:
+ //p = StringValuePtr(idx);
+ p = mrb_string_value_ptr(mrb, idx);
+name_to_backref:
+ num = name_to_backref_number(mrb, RMATCH_REGS(match),
+ RMATCH(match)->regexp, p, p + strlen(p));
+ return mrb_reg_nth_match(mrb, num, match);
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ return mrb_ary_aget(mrb, /*argc, argv,*/ mrb_match_to_a(mrb, match));
+}
+
+typedef struct {
+ long byte_pos;
+ long char_pos;
+} pair_t;
+
+static int
+pair_byte_cmp(const void *pair1, const void *pair2)
+{
+ long diff = ((pair_t*)pair1)->byte_pos - ((pair_t*)pair2)->byte_pos;
+#if SIZEOF_LONG > SIZEOF_INT
+ return diff ? diff > 0 ? 1 : -1 : 0;
+#else
+ return (int)diff;
+#endif
+}
+
+static void
+update_char_offset(mrb_state *mrb, mrb_value match)
+{
+ struct rmatch *rm = RMATCH(match)->rmatch;
+ struct re_registers *regs;
+ int i, num_regs, num_pos;
+ long c;
+ char *s, *p, *q, *e;
+ mrb_encoding *enc;
+ pair_t *pairs;
+
+ if (rm->char_offset_updated)
+ return;
+
+ regs = &rm->regs;
+ num_regs = rm->regs.num_regs;
+
+ if (rm->char_offset_num_allocated < num_regs) {
+ //REALLOC_N(rm->char_offset, struct rmatch_offset, num_regs);
+ rm->char_offset = mrb_realloc(mrb, rm->char_offset, sizeof(struct rmatch_offset)*num_regs);
+ rm->char_offset_num_allocated = num_regs;
+ }
+
+ enc = mrb_enc_get(mrb, RMATCH(match)->str);
+ if (mrb_enc_mbmaxlen(enc) == 1) {
+ for (i = 0; i < num_regs; i++) {
+ rm->char_offset[i].beg = BEG(i);
+ rm->char_offset[i].end = END(i);
+ }
+ rm->char_offset_updated = 1;
+ return;
+ }
+
+ //pairs = ALLOCA_N(pair_t, num_regs*2);
+ pairs = mrb_malloc(mrb, sizeof(pair_t)*num_regs*2);
+
+ num_pos = 0;
+ for (i = 0; i < num_regs; i++) {
+ if (BEG(i) < 0)
+ continue;
+ pairs[num_pos++].byte_pos = BEG(i);
+ pairs[num_pos++].byte_pos = END(i);
+ }
+ qsort(pairs, num_pos, sizeof(pair_t), pair_byte_cmp);
+
+ s = p = RSTRING_PTR(RMATCH(match)->str);
+ e = s + RSTRING_LEN(RMATCH(match)->str);
+ c = 0;
+ for (i = 0; i < num_pos; i++) {
+ q = s + pairs[i].byte_pos;
+ c += mrb_enc_strlen(p, q, enc);
+ pairs[i].char_pos = c;
+ p = q;
+ }
+
+ for (i = 0; i < num_regs; i++) {
+ pair_t key, *found;
+ if (BEG(i) < 0) {
+ rm->char_offset[i].beg = -1;
+ rm->char_offset[i].end = -1;
+ continue;
+ }
+
+ key.byte_pos = BEG(i);
+ found = bsearch(&key, pairs, num_pos, sizeof(pair_t), pair_byte_cmp);
+ rm->char_offset[i].beg = found->char_pos;
+
+ key.byte_pos = END(i);
+ found = bsearch(&key, pairs, num_pos, sizeof(pair_t), pair_byte_cmp);
+ rm->char_offset[i].end = found->char_pos;
+ }
+
+ rm->char_offset_updated = 1;
+}
+
+/* 15.2.16.3.2 */
+/*
+ * call-seq:
+ * mtch.begin(n) -> integer
+ *
+ * Returns the offset of the start of the <em>n</em>th element of the match
+ * array in the string.
+ * <em>n</em> can be a string or symbol to reference a named capture.
+ *
+ * m = /(.)(.)(\d+)(\d)/.match("THX1138.")
+ * m.begin(0) #=> 1
+ * m.begin(2) #=> 2
+ *
+ * m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
+ * p m.begin(:foo) #=> 0
+ * p m.begin(:bar) #=> 2
+ */
+
+static mrb_value
+mrb_match_begin(mrb_state *mrb, mrb_value match/*, mrb_value n*/)
+{
+ mrb_value argv[16];
+ int argc;
+ mrb_get_args(mrb, "*", &argv, &argc);
+ mrb_value n = argv[0];
+ int i = match_backref_number(mrb, match, n);
+ struct re_registers *regs = RMATCH_REGS(match);
+
+ match_check(mrb, match);
+ if (i < 0 || regs->num_regs <= i)
+ mrb_raise(mrb, E_INDEX_ERROR, "index %d out of matches", i);
+
+ if (BEG(i) < 0)
+ return mrb_nil_value();
+
+ update_char_offset(mrb, match);
+ return mrb_fixnum_value(RMATCH(match)->rmatch->char_offset[i].beg);
+}
+
+static mrb_value
+match_array(mrb_state *mrb, mrb_value match, int start)
+{
+ struct re_registers *regs;
+ mrb_value ary;
+ mrb_value target;
+ int i;
+ /*int taint = OBJ_TAINTED(match);*/
+
+ match_check(mrb, match);
+ regs = RMATCH_REGS(match);
+ ary = mrb_ary_new_capa(mrb, regs->num_regs);//mrb_ary_new2(regs->num_regs);
+ target = RMATCH(match)->str;
+
+ for (i=start; i<regs->num_regs; i++) {
+ if (regs->beg[i] == -1) {
+ mrb_ary_push(mrb, ary, mrb_nil_value());
+ }
+ else {
+ mrb_value str = mrb_str_subseq(mrb, target, regs->beg[i], regs->end[i]-regs->beg[i]);
+ /*if (taint) OBJ_TAINT(str);*/
+ mrb_ary_push(mrb, ary, str);
+ }
+ }
+ return ary;
+}
+
+/* 15.2.16.3.3 */
+/*
+ * call-seq:
+ * mtch.captures -> array
+ *
+ * Returns the array of captures; equivalent to <code>mtch.to_a[1..-1]</code>.
+ *
+ * f1,f2,f3,f4 = /(.)(.)(\d+)(\d)/.match("THX1138.").captures
+ * f1 #=> "H"
+ * f2 #=> "X"
+ * f3 #=> "113"
+ * f4 #=> "8"
+ */
+static mrb_value
+mrb_match_captures(mrb_state *mrb, mrb_value match)
+{
+ return match_array(mrb, match, 1);
+}
+
+/* 15.2.16.3.4 */
+/*
+ * call-seq:
+ * mtch.end(n) -> integer
+ *
+ * Returns the offset of the character immediately following the end of the
+ * <em>n</em>th element of the match array in the string.
+ * <em>n</em> can be a string or symbol to reference a named capture.
+ *
+ * m = /(.)(.)(\d+)(\d)/.match("THX1138.")
+ * m.end(0) #=> 7
+ * m.end(2) #=> 3
+ *
+ * m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
+ * p m.end(:foo) #=> 1
+ * p m.end(:bar) #=> 3
+ */
+
+static mrb_value
+mrb_match_end(mrb_state *mrb, mrb_value match/*, mrb_value n*/)
+{
+ mrb_value argv[16];
+ int argc;
+ mrb_get_args(mrb, "*", &argv, &argc);
+ mrb_value n = argv[0];
+ int i = match_backref_number(mrb, match, n);
+ struct re_registers *regs = RMATCH_REGS(match);
+
+ match_check(mrb, match);
+ if (i < 0 || regs->num_regs <= i)
+ mrb_raise(mrb, E_INDEX_ERROR, "index %d out of matches", i);
+
+ if (BEG(i) < 0)
+ return mrb_nil_value();
+
+ update_char_offset(mrb, match);
+ return mrb_fixnum_value(RMATCH(match)->rmatch->char_offset[i].end);
+}
+
+/* 15.2.16.3.5 */
+/* :nodoc: */
+static mrb_value
+mrb_match_init_copy(mrb_state *mrb, mrb_value obj/*, mrb_value orig*/)
+{
+ mrb_value argv[16];
+ int argc;
+ struct rmatch *rm;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+ mrb_value orig = argv[0];
+
+ if (mrb_obj_equal(mrb, obj, orig)) return obj;
+
+ if (!mrb_obj_is_instance_of(mrb, orig, mrb_obj_class(mrb, obj))) {
+ mrb_raise(mrb, E_TYPE_ERROR, "wrong argument class");
+ }
+
+ RMATCH(obj)->str = RMATCH(orig)->str;
+ RMATCH(obj)->regexp = RMATCH(orig)->regexp;
+
+ if (RMATCH(obj)->rmatch == 0) {
+ RMATCH(obj)->rmatch = mrb_malloc(mrb, sizeof(struct rmatch));//ALLOC(struct rmatch);
+ memset(RMATCH(obj)->rmatch, 0, sizeof(struct rmatch));
+ }
+ rm = RMATCH(obj)->rmatch;
+ onig_region_copy(&rm->regs, RMATCH_REGS(orig));
+
+ if (!RMATCH(orig)->rmatch->char_offset_updated) {
+ rm->char_offset_updated = 0;
+ }
+ else {
+ if (rm->char_offset_num_allocated < rm->regs.num_regs) {
+ //REALLOC_N(rm->char_offset, struct rmatch_offset, rm->regs.num_regs);
+ rm->char_offset = mrb_realloc(mrb, rm->char_offset, sizeof(struct rmatch_offset)* rm->regs.num_regs);
+ rm->char_offset_num_allocated = rm->regs.num_regs;
+ }
+ memcpy(rm->char_offset, RMATCH(orig)->rmatch->char_offset,
+ sizeof(struct rmatch_offset)* rm->regs.num_regs);
+ rm->char_offset_updated = 1;
+ }
+
+ return obj;
+}
+
+/* 15.2.16.3.6 */
+/* 15.2.16.3.10 */
+/*
+ * call-seq:
+ * mtch.length -> integer
+ * mtch.size -> integer
+ *
+ * Returns the number of elements in the match array.
+ *
+ * m = /(.)(.)(\d+)(\d)/.match("THX1138.")
+ * m.length #=> 5
+ * m.size #=> 5
+ */
+
+static mrb_value
+mrb_match_size(mrb_state *mrb, mrb_value match)
+{
+ match_check(mrb, match);
+ return mrb_fixnum_value(RMATCH_REGS(match)->num_regs);
+}
+
+/* 15.2.16.3.7 */
+/*
+ * call-seq:
+ * mtch.offset(n) -> array
+ *
+ * Returns a two-element array containing the beginning and ending offsets of
+ * the <em>n</em>th match.
+ * <em>n</em> can be a string or symbol to reference a named capture.
+ *
+ * m = /(.)(.)(\d+)(\d)/.match("THX1138.")
+ * m.offset(0) #=> [1, 7]
+ * m.offset(4) #=> [6, 7]
+ *
+ * m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
+ * p m.offset(:foo) #=> [0, 1]
+ * p m.offset(:bar) #=> [2, 3]
+ *
+ */
+
+static mrb_value
+mrb_match_offset(mrb_state *mrb, mrb_value match/*, mrb_value n*/)
+{
+ mrb_value n;
+ struct re_registers *regs = RMATCH_REGS(match);
+
+ mrb_get_args(mrb, "o", &n);
+ int i = match_backref_number(mrb, match, n);
+
+ match_check(mrb, match);
+ if (i < 0 || regs->num_regs <= i)
+ mrb_raise(mrb, E_INDEX_ERROR, "index %d out of matches", i);
+
+ if (BEG(i) < 0)
+ return mrb_assoc_new(mrb, mrb_nil_value(), mrb_nil_value());
+
+ update_char_offset(mrb, match);
+ return mrb_assoc_new(mrb, mrb_fixnum_value(RMATCH(match)->rmatch->char_offset[i].beg),
+ mrb_fixnum_value(RMATCH(match)->rmatch->char_offset[i].end));
+}
+
+/* 15.2.16.3.8 */
+/*
+ * call-seq:
+ * mtch.post_match -> str
+ *
+ * Returns the portion of the original string after the current match.
+ * Equivalent to the special variable <code>$'</code>.
+ *
+ * m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
+ * m.post_match #=> ": The Movie"
+ */
+mrb_value
+mrb_reg_match_post(mrb_state *mrb, mrb_value match)
+{
+ mrb_value str;
+ long pos;
+ struct re_registers *regs;
+
+ if (mrb_nil_p(match)) return mrb_nil_value();
+ match_check(mrb, match);
+ regs = RMATCH_REGS(match);
+ if (BEG(0) == -1) return mrb_nil_value();
+ str = RMATCH(match)->str;
+ pos = END(0);
+ str = mrb_str_subseq(mrb, str, pos, RSTRING_LEN(str) - pos);
+ /*if (OBJ_TAINTED(match)) OBJ_TAINT(str);*/
+ return str;
+}
+
+/* 15.2.16.3.9 */
+/*
+ * call-seq:
+ * mtch.pre_match -> str
+ *
+ * Returns the portion of the original string before the current match.
+ * Equivalent to the special variable <code>$`</code>.
+ *
+ * m = /(.)(.)(\d+)(\d)/.match("THX1138.")
+ * m.pre_match #=> "T"
+ */
+
+mrb_value
+mrb_reg_match_pre(mrb_state *mrb, mrb_value match)
+{
+ mrb_value str;
+ struct re_registers *regs;
+
+ if (mrb_nil_p(match)) return mrb_nil_value();
+ match_check(mrb, match);
+ regs = RMATCH_REGS(match);
+ if (BEG(0) == -1) return mrb_nil_value();
+ str = mrb_str_subseq(mrb, RMATCH(match)->str, 0, BEG(0));
+ /*if (OBJ_TAINTED(match)) OBJ_TAINT(str);*/
+ return str;
+}
+
+/* 15.2.16.3.11 */
+/*
+ * call-seq:
+ * mtch.string -> str
+ *
+ * Returns a frozen copy of the string passed in to <code>match</code>.
+ *
+ * m = /(.)(.)(\d+)(\d)/.match("THX1138.")
+ * m.string #=> "THX1138."
+ */
+
+static mrb_value
+mrb_match_string(mrb_state *mrb, mrb_value match)
+{
+ match_check(mrb, match);
+ return RMATCH(match)->str; /* str is frozen */
+}
+
+/* 15.2.16.3.12 */
+/*
+ * call-seq:
+ * mtch.to_a -> anArray
+ *
+ * Returns the array of matches.
+ *
+ * m = /(.)(.)(\d+)(\d)/.match("THX1138.")
+ * m.to_a #=> ["HX1138", "H", "X", "113", "8"]
+ *
+ * Because <code>to_a</code> is called when expanding
+ * <code>*</code><em>variable</em>, there's a useful assignment
+ * shortcut for extracting matched fields. This is slightly slower than
+ * accessing the fields directly (as an intermediate array is
+ * generated).
+ *
+ * all,f1,f2,f3 = *(/(.)(.)(\d+)(\d)/.match("THX1138."))
+ * all #=> "HX1138"
+ * f1 #=> "H"
+ * f2 #=> "X"
+ * f3 #=> "113"
+ */
+
+static mrb_value
+mrb_match_to_a(mrb_state *mrb, mrb_value match)
+{
+ return match_array(mrb, match, 0);
+}
+
+/* 15.2.16.3.13 */
+/*
+ * call-seq:
+ * mtch.to_s -> str
+ *
+ * Returns the entire matched string.
+ *
+ * m = /(.)(.)(\d+)(\d)/.match("THX1138.")
+ * m.to_s #=> "HX1138"
+ */
+
+static mrb_value
+mrb_match_to_s(mrb_state *mrb, mrb_value match)
+{
+ mrb_value str = mrb_reg_last_match(mrb, match);
+
+ match_check(mrb, match);
+ if (mrb_nil_p(str)) str = mrb_str_new(mrb, 0, 0);//mrb_str_new(0,0);
+ /*if (OBJ_TAINTED(match)) OBJ_TAINT(str); */
+ /*if (OBJ_TAINTED(RMATCH(match)->str)) OBJ_TAINT(str); */
+ return str;
+}
+
+static int
+char_to_option(int c)
+{
+ int val;
+
+ switch (c) {
+ case 'i':
+ val = ONIG_OPTION_IGNORECASE;
+ break;
+ case 'x':
+ val = ONIG_OPTION_EXTEND;
+ break;
+ case 'm':
+ val = ONIG_OPTION_MULTILINE;
+ break;
+ default:
+ val = 0;
+ break;
+ }
+ return val;
+}
+
+static char *
+option_to_str(char str[4], int options)
+{
+ char *p = str;
+ if (options & ONIG_OPTION_MULTILINE) *p++ = 'm';
+ if (options & ONIG_OPTION_IGNORECASE) *p++ = 'i';
+ if (options & ONIG_OPTION_EXTEND) *p++ = 'x';
+ *p = 0;
+ return str;
+}
+
+#define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */
+
+static void
+mrb_reg_expr_str(mrb_state *mrb, mrb_value str, const char *s, long len,
+ mrb_encoding *enc, mrb_encoding *resenc)
+{
+ const char *p, *pend;
+ int need_escape = 0;
+ int c, clen;
+
+ p = s; pend = p + len;
+ if (mrb_enc_asciicompat(mrb, enc)) {
+ while (p < pend) {
+ c = mrb_enc_ascget(mrb, p, pend, &clen, enc);
+ if (c == -1) {
+ if (enc == resenc) {
+ p += mbclen(p, pend, enc);
+ }
+ else {
+ need_escape = 1;
+ break;
+ }
+ }
+ else if (c != '/' && mrb_enc_isprint(c, enc)) {
+ p += clen;
+ }
+ else {
+ need_escape = 1;
+ break;
+ }
+ }
+ }
+ else {
+ need_escape = 1;
+ }
+
+ if (!need_escape) {
+ mrb_str_buf_cat(mrb, str, s, len);
+ }
+ else {
+ int unicode_p = mrb_enc_unicode_p(enc);
+ p = s;
+ while (p<pend) {
+ c = mrb_enc_ascget(mrb, p, pend, &clen, enc);
+ if (c == '\\' && p+clen < pend) {
+ int n = clen + mbclen(p+clen, pend, enc);
+ mrb_str_buf_cat(mrb, str, p, n);
+ p += n;
+ continue;
+ }
+ else if (c == '/') {
+ char c = '\\';
+ mrb_str_buf_cat(mrb, str, &c, 1);
+ mrb_str_buf_cat(mrb, str, p, clen);
+ }
+ else if (c == -1) {
+ clen = mrb_enc_precise_mbclen(p, pend, enc);
+ if (!MBCLEN_CHARFOUND_P(clen)) {
+ c = (unsigned char)*p;
+ clen = 1;
+ goto hex;
+ }
+ if (resenc) {
+ unsigned int c = mrb_enc_mbc_to_codepoint(p, pend, enc);
+ mrb_str_buf_cat_escaped_char(mrb, str, c, unicode_p);
+ }
+ else {
+ clen = MBCLEN_CHARFOUND_LEN(clen);
+ mrb_str_buf_cat(mrb, str, p, clen);
+ }
+ }
+ else if (mrb_enc_isprint(c, enc)) {
+ mrb_str_buf_cat(mrb, str, p, clen);
+ }
+ else if (!mrb_enc_isspace(c, enc)) {
+ char b[8];
+
+ hex:
+ snprintf(b, sizeof(b), "\\x%02X", c);
+ mrb_str_buf_cat(mrb, str, b, 4);
+ }
+ else {
+ mrb_str_buf_cat(mrb, str, p, clen);
+ }
+ p += clen;
+ }
+ }
+}
+
+/* 15.2.15.7.9 (x) */
+/*
+ * call-seq:
+ * rxp.to_s -> str
+ *
+ * Returns a string containing the regular expression and its options (using the
+ * <code>(?opts:source)</code> notation. This string can be fed back in to
+ * <code>Regexp::new</code> to a regular expression with the same semantics as
+ * the original. (However, <code>Regexp#==</code> may not return true when
+ * comparing the two, as the source of the regular expression itself may
+ * differ, as the example shows). <code>Regexp#inspect</code> produces a
+ * generally more readable version of <i>rxp</i>.
+ *
+ * r1 = /ab+c/ix #=> /ab+c/ix
+ * s1 = r1.to_s #=> "(?ix-m:ab+c)"
+ * r2 = Regexp.new(s1) #=> /(?ix-m:ab+c)/
+ * r1 == r2 #=> false
+ * r1.source #=> "ab+c"
+ * r2.source #=> "(?ix-m:ab+c)"
+ */
+
+mrb_value
+mrb_reg_to_s(mrb_state *mrb, mrb_value re)
+{
+ int options, opt;
+ const int embeddable = ONIG_OPTION_MULTILINE|ONIG_OPTION_IGNORECASE|ONIG_OPTION_EXTEND;
+ long len;
+ const UChar* ptr;
+ mrb_value str = mrb_str_new_cstr(mrb, "(?");
+ char optbuf[5];
+ mrb_encoding *enc = mrb_enc_get(mrb, re);
+
+ mrb_reg_check(mrb, re);
+ memset(optbuf, 0, 5);
+ mrb_enc_copy(mrb, str, re);
+ options = RREGEXP(re)->ptr->options;
+ ptr = (UChar*)RREGEXP_SRC_PTR(re);
+ len = RREGEXP_SRC_LEN(re);
+again:
+ if (len >= 4 && ptr[0] == '(' && ptr[1] == '?') {
+ int err = 1;
+ ptr += 2;
+ if ((len -= 2) > 0) {
+ do {
+ opt = char_to_option((int )*ptr);
+ if (opt != 0) {
+ options |= opt;
+ }
+ else {
+ break;
+ }
+ ++ptr;
+ } while (--len > 0);
+ }
+ if (len > 1 && *ptr == '-') {
+ ++ptr;
+ --len;
+ do {
+ opt = char_to_option((int )*ptr);
+ if (opt != 0) {
+ options &= ~opt;
+ }
+ else {
+ break;
+ }
+ ++ptr;
+ } while (--len > 0);
+ }
+ if (*ptr == ')') {
+ --len;
+ ++ptr;
+ goto again;
+ }
+ if (*ptr == ':' && ptr[len-1] == ')') {
+ Regexp *rp;
+
+ ++ptr;
+ len -= 2;
+ err = onig_new(&rp, ptr, ptr + len, ONIG_OPTION_DEFAULT,
+ enc, OnigDefaultSyntax, NULL);
+ onig_free(rp);
+ }
+ if (err) {
+ options = RREGEXP(re)->ptr->options;
+ ptr = (UChar*)RREGEXP_SRC_PTR(re);
+ len = RREGEXP_SRC_LEN(re);
+ }
+ }
+
+ if (*option_to_str(optbuf, options)) mrb_str_buf_cat(mrb, str, optbuf, strlen(optbuf));
+
+ if ((options & embeddable) != embeddable) {
+ optbuf[0] = '-';
+ option_to_str(optbuf + 1, ~options);
+ mrb_str_buf_cat(mrb, str, optbuf, strlen(optbuf));
+ }
+
+ mrb_str_buf_cat(mrb, str, ":", strlen(":"));
+ mrb_reg_expr_str(mrb, str, (char*)ptr, len, enc, NULL);
+ mrb_str_buf_cat(mrb, str, ")", strlen(")"));
+ mrb_enc_copy(mrb, str, re);
+
+ /*OBJ_INFECT(str, re);*/
+ return str;
+}
+
+/* 15.2.15.7.10(x) */
+/*
+ * call-seq:
+ * rxp.inspect -> string
+ *
+ * Produce a nicely formatted string-version of _rxp_. Perhaps surprisingly,
+ * <code>#inspect</code> actually produces the more natural version of
+ * the string than <code>#to_s</code>.
+ *
+ * /ab+c/ix.inspect #=> "/ab+c/ix"
+ *
+ */
+
+static mrb_value
+mrb_reg_inspect(mrb_state *mrb, mrb_value re)
+{
+ if (!RREGEXP(re)->ptr || mrb_nil_p(RREGEXP_SRC(re)) || !RREGEXP_SRC_PTR(re)) {
+ return mrb_any_to_s(mrb, re);
+ }
+ return mrb_reg_desc(mrb, RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), re);
+}
+
+static mrb_value
+mrb_reg_s_alloc(mrb_state *mrb, mrb_value dummy)
+{
+ struct RRegexp* re;
+
+ //NEWOBJ(re, struct RRegexp);
+ //OBJSETUP(re, klass, T_REGEXP);
+ re = mrb_obj_alloc(mrb, MRB_TT_REGEX, mrb->regex_class);
+
+ re->ptr = 0;
+ re->src.tt = 0;
+ re->usecnt = 0;
+
+ return mrb_obj_value(re);
+}
+
+mrb_value
+mrb_reg_match_last(mrb_state *mrb, mrb_value match)
+{
+ int i;
+
+ if (mrb_nil_p(match)) return mrb_nil_value();
+ match_check(mrb, match);
+ if (RMATCH(match)->rmatch->char_offset[0].beg == -1) return mrb_nil_value();
+
+ for (i=RMATCH(match)->rmatch->regs.num_regs-1; RMATCH(match)->rmatch->char_offset[i].beg == -1 && i > 0; i--)
+ ;
+ if (i == 0) return mrb_nil_value();
+ return mrb_reg_nth_match(mrb, i, match);
+}
+
+/* 15.2.16.3.14(x) */
+/*
+ * call-seq:
+ * mtch.inspect -> str
+ *
+ * Returns a printable version of <i>mtch</i>.
+ *
+ * puts /.$/.match("foo").inspect
+ * #=> #<MatchData "o">
+ *
+ * puts /(.)(.)(.)/.match("foo").inspect
+ * #=> #<MatchData "foo" 1:"f" 2:"o" 3:"o">
+ *
+ * puts /(.)(.)?(.)/.match("fo").inspect
+ * #=> #<MatchData "fo" 1:"f" 2:nil 3:"o">
+ *
+ * puts /(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge").inspect
+ * #=> #<MatchData "hog" foo:"h" bar:"o" baz:"g">
+ *
+ */
+struct backref_name_tag {
+ const UChar *name;
+ long len;
+};
+
+static int
+match_inspect_name_iter(const OnigUChar *name, const OnigUChar *name_end,
+ int back_num, int *back_refs, OnigRegex regex, void *arg0)
+{
+ struct backref_name_tag *arg = (struct backref_name_tag *)arg0;
+ int i;
+
+ for (i = 0; i < back_num; i++) {
+ arg[back_refs[i]].name = name;
+ arg[back_refs[i]].len = name_end - name;
+ }
+ return 0;
+}
+
+static mrb_value
+mrb_match_inspect(mrb_state *mrb, mrb_value match)
+{
+ const char *cname = mrb_obj_classname(mrb, match);
+ mrb_value str;
+ int i;
+ struct re_registers *regs = RMATCH_REGS(match);
+ int num_regs = regs->num_regs;
+ struct backref_name_tag *names;
+ mrb_value regexp = RMATCH(match)->regexp;
+
+ if (regexp.value.p == 0) {
+ return mrb_sprintf(mrb, "#<%s:%p>", cname, (void*)&match);
+ }
+
+ //names = ALLOCA_N(struct backref_name_tag, num_regs);
+ //MEMZERO(names, struct backref_name_tag, num_regs);
+ names = mrb_malloc(mrb, sizeof(struct backref_name_tag)*num_regs);
+ memset(names, 0, sizeof(struct backref_name_tag)*num_regs);
+
+ onig_foreach_name(RREGEXP(regexp)->ptr,
+ match_inspect_name_iter, names);
+
+ str = mrb_str_new_cstr(mrb, "#<");//mrb_str_buf_new2("#<");
+ mrb_str_buf_cat(mrb, str, cname, strlen(cname));//mrb_str_buf_cat2(str, cname);
+
+ for (i = 0; i < num_regs; i++) {
+ char buf[sizeof(num_regs)*3+1];
+ mrb_value v;
+ mrb_str_buf_cat(mrb, str, " ", strlen(" "));//mrb_str_buf_cat2(str, " ");
+ if (0 < i) {
+ if (names[i].name)
+ mrb_str_buf_cat(mrb, str, (const char *)names[i].name, names[i].len);
+ else {
+ //mrb_str_catf(mrb, str, "%d", i);
+ sprintf(buf, "%d", i);
+ mrb_str_buf_cat(mrb, str, (const char *)buf, strlen(buf));
+ }
+ mrb_str_buf_cat(mrb, str, ":", strlen(":"));//mrb_str_buf_cat2(str, ":");
+ }
+ v = mrb_reg_nth_match(mrb, i, match);
+ if (mrb_nil_p(v))
+ mrb_str_buf_cat(mrb, str, "nil", strlen("nil"));//mrb_str_buf_cat2(str, "nil");
+ else
+ mrb_str_buf_append(mrb, str, mrb_str_inspect(mrb, v));
+ }
+ mrb_str_buf_cat(mrb, str, ">", strlen(">"));//mrb_str_buf_cat2(str, ">");
+
+ return str;
+}
+
+/* 15.2.16.3.15(x) */
+/* 15.2.16.3.16(x) */
+/*
+ * call-seq:
+ * mtch == mtch2 -> true or false
+ *
+ * Equality---Two matchdata are equal if their target strings,
+ * patterns, and matched positions are identical.
+ */
+
+static mrb_value
+mrb_match_equal(mrb_state *mrb, mrb_value match1)
+{
+ const struct re_registers *regs1, *regs2;
+ mrb_value match2;
+
+ mrb_get_args(mrb, "o", &match2);
+ if (mrb_obj_equal(mrb, match1, match2)) return mrb_true_value();
+ if (mrb_type(match2) != MRB_TT_MATCH) return mrb_false_value();
+ if (!mrb_str_equal(mrb, RMATCH(match1)->str, RMATCH(match2)->str)) return mrb_false_value();
+ if (!mrb_reg_equal(mrb, RMATCH(match1)->regexp, RMATCH(match2)->regexp)) return mrb_false_value();
+ regs1 = RMATCH_REGS(match1);
+ regs2 = RMATCH_REGS(match2);
+ if (regs1->num_regs != regs2->num_regs) return mrb_false_value();
+ if (memcmp(regs1->beg, regs2->beg, regs1->num_regs * sizeof(*regs1->beg))) return mrb_false_value();
+ if (memcmp(regs1->end, regs2->end, regs1->num_regs * sizeof(*regs1->end))) return mrb_false_value();
+ return mrb_true_value();
+}
+
+/*
+ * Document-class: RegexpError
+ *
+ * Raised when given an invalid regexp expression.
+ *
+ * Regexp.new("?")
+ *
+ * <em>raises the exception:</em>
+ *
+ * RegexpError: target of repeat operator is not specified: /?/
+ */
+
+/*
+ * Document-class: Regexp
+ *
+ * A <code>Regexp</code> holds a regular expression, used to match a pattern
+ * against strings. Regexps are created using the <code>/.../</code> and
+ * <code>%r{...}</code> literals, and by the <code>Regexp::new</code>
+ * constructor.
+ *
+ * :include: doc/re.rdoc
+ */
+
+void
+mrb_init_regexp(mrb_state *mrb)
+{
+ struct RClass *s;
+ s = mrb->regex_class = mrb_define_class(mrb, "Regexp", mrb->object_class);
+
+ //mrb->encode_class = mrb_define_class(mrb, "Encoding", mrb->object_class);
+ //mrb_define_alloc_func(mrb, s, mrb_reg_s_alloc);
+ mrb_define_class_method(mrb, s, "compile", mrb_reg_s_new_instance, ARGS_ANY()); /* 15.2.15.6.1 */
+ mrb_define_class_method(mrb, s, "escape", mrb_reg_s_quote, ARGS_REQ(1)); /* 15.2.15.6.2 */
+ mrb_define_class_method(mrb, s, "last_match", mrb_reg_s_last_match, ARGS_ANY()); /* 15.2.15.6.3 */
+ mrb_define_class_method(mrb, s, "quote", mrb_reg_s_quote, ARGS_REQ(1)); /* 15.2.15.6.4 */
+ //mrb_define_singleton_method(rb_cRegexp, "union", rb_reg_s_union_m, -2);
+ //mrb_define_singleton_method(rb_cRegexp, "try_convert", rb_reg_s_try_convert, 1);
+
+ mrb_define_method(mrb, s, "initialize", mrb_reg_initialize_m, ARGS_ANY()); /* 15.2.15.7.1 */
+ mrb_define_method(mrb, s, "initialize_copy", mrb_reg_init_copy, ARGS_REQ(1)); /* 15.2.15.7.2 */
+ mrb_define_method(mrb, s, "==", mrb_reg_equal_m, ARGS_REQ(1)); /* 15.2.15.7.3 */
+ mrb_define_method(mrb, s, "===", mrb_reg_eqq, ARGS_REQ(1)); /* 15.2.15.7.4 */
+ mrb_define_method(mrb, s, "=~", mrb_reg_match, ARGS_REQ(1)); /* 15.2.15.7.5 */
+ mrb_define_method(mrb, s, "casefold?", mrb_reg_casefold_p, ARGS_NONE()); /* 15.2.15.7.6 */
+ mrb_define_method(mrb, s, "match", mrb_reg_match_m, ARGS_ANY()); /* 15.2.15.7.7 */
+ mrb_define_method(mrb, s, "source", mrb_reg_source, ARGS_NONE()); /* 15.2.15.7.8 */
+ //mrb_define_method(rb_cRegexp, "hash", rb_reg_hash, 0);
+ //mrb_define_method(rb_cRegexp, "~", rb_reg_match2, 0);
+ mrb_define_method(mrb, s, "to_s", mrb_reg_to_s, ARGS_NONE()); /* 15.2.15.7.9 (x) */
+ mrb_define_method(mrb, s, "inspect", mrb_reg_inspect, ARGS_NONE()); /* 15.2.15.7.10(x) */
+ mrb_define_method(mrb, s, "eql?", mrb_reg_equal_m, ARGS_REQ(1)); /* 15.2.15.7.11(x) */
+ //mrb_define_method(rb_cRegexp, "options", mrb_reg_options_m, 0);
+ //mrb_define_method(rb_cRegexp, "encoding", rb_obj_encoding, 0); /* in encoding.c */
+ //mrb_define_method(rb_cRegexp, "fixed_encoding?", mrb_reg_fixed_encoding_p, 0);
+ //mrb_define_method(rb_cRegexp, "names", rb_reg_names, 0);
+ //mrb_define_method(rb_cRegexp, "named_captures", rb_reg_named_captures, 0);
+
+ //mrb_define_const(rb_cRegexp, "IGNORECASE", INT2FIX(ONIG_OPTION_IGNORECASE));
+ //mrb_define_const(rb_cRegexp, "EXTENDED", INT2FIX(ONIG_OPTION_EXTEND));
+ //mrb_define_const(rb_cRegexp, "MULTILINE", INT2FIX(ONIG_OPTION_MULTILINE));
+ //mrb_define_const(rb_cRegexp, "FIXEDENCODING", INT2FIX(ARG_ENCODING_FIXED));
+ mrb_define_const(mrb, s, "IGNORECASE", mrb_fixnum_value(ONIG_OPTION_IGNORECASE));
+ mrb_define_const(mrb, s, "EXTENDED", mrb_fixnum_value(ONIG_OPTION_EXTEND));
+ mrb_define_const(mrb, s, "MULTILINE", mrb_fixnum_value(ONIG_OPTION_MULTILINE));
+ mrb_define_const(mrb, s, "FIXEDENCODING", mrb_fixnum_value(ARG_ENCODING_FIXED));
+
+ //mrb_global_variable(&reg_cache);
+
+ s = mrb->match_class = mrb_define_class(mrb, "MatchData", mrb->object_class);
+ //mrb_undef_method(CLASS_OF(rb_cMatch), "new");
+
+ mrb_define_method(mrb, s, "[]", mrb_match_aref, ARGS_ANY()); /* 15.2.16.3.1 */
+ mrb_define_method(mrb, s, "begin", mrb_match_begin, ARGS_REQ(1)); /* 15.2.16.3.2 */
+ mrb_define_method(mrb, s, "captures", mrb_match_captures, ARGS_NONE()); /* 15.2.16.3.3 */
+ mrb_define_method(mrb, s, "end", mrb_match_end, ARGS_REQ(1)); /* 15.2.16.3.4 */
+ mrb_define_method(mrb, s, "initialize_copy", mrb_match_init_copy, ARGS_REQ(1)); /* 15.2.16.3.5 */
+ mrb_define_method(mrb, s, "length", mrb_match_size, ARGS_NONE()); /* 15.2.16.3.6 */
+ mrb_define_method(mrb, s, "offset", mrb_match_offset, ARGS_REQ(1)); /* 15.2.16.3.7 */
+ mrb_define_method(mrb, s, "post_match", mrb_reg_match_post, ARGS_NONE()); /* 15.2.16.3.8 */
+ mrb_define_method(mrb, s, "pre_match", mrb_reg_match_pre, ARGS_NONE()); /* 15.2.16.3.9 */
+ mrb_define_method(mrb, s, "size", mrb_match_size, ARGS_NONE()); /* 15.2.16.3.10 */
+ mrb_define_method(mrb, s, "string", mrb_match_string, ARGS_NONE()); /* 15.2.16.3.11 */
+ mrb_define_method(mrb, s, "to_a", mrb_match_to_a, ARGS_NONE()); /* 15.2.16.3.12 */
+ mrb_define_method(mrb, s, "to_s", mrb_match_to_s, ARGS_NONE()); /* 15.2.16.3.13 */
+ mrb_define_method(mrb, s, "inspect", mrb_match_inspect, ARGS_NONE()); /* 15.2.16.3.14(x) */
+ mrb_define_method(mrb, s, "==", mrb_match_equal, ARGS_REQ(1)); /* 15.2.16.3.15(x) */
+ mrb_define_method(mrb, s, "eql?", mrb_match_equal, ARGS_REQ(1)); /* 15.2.16.3.16(x) */
+ //mrb_define_method(rb_cMatch, "regexp", match_regexp, 0);
+ //mrb_define_method(rb_cMatch, "names", match_names, 0);
+ //mrb_define_method(rb_cMatch, "values_at", match_values_at, -1);
+ //mrb_define_method(rb_cMatch, "hash", match_hash, 0);
+ //mrb_define_method(rb_cMatch, "==", match_equal, 1);
+}
+/* ----------------1_8_7---------------------------------------- */
+//`mrb_check_type'
+//`mrb_reg_regsub'
+//`mrb_backref_get'
+//`mrb_memsearch'
+//`mrb_reg_mbclen2'
+//`mrb_reg_regcomp'
+//`mrb_yield'
+
+
+mrb_value
+mrb_reg_regsub(mrb_state *mrb, mrb_value str, mrb_value src, struct re_registers *regs, mrb_value regexp)
+{
+ mrb_value val;
+ char *p, *s, *e;
+ int no, clen;
+ mrb_encoding *str_enc = mrb_enc_get(mrb, str);
+ mrb_encoding *src_enc = mrb_enc_get(mrb, src);
+ int acompat = mrb_enc_asciicompat(mrb, str_enc);
+#define ASCGET(mrb,s,e,cl) (acompat ? (*cl=1,ISASCII(s[0])?s[0]:-1) : mrb_enc_ascget(mrb, s, e, cl, str_enc))
+ struct RString *ps = mrb_str_ptr(str);
+
+ val.tt = 0;
+ p = s = ps->buf;
+ e = s + ps->len;
+
+ while (s < e) {
+ int c = ASCGET(mrb, s, e, &clen);
+ char *ss;
+
+ if (c == -1) {
+ s += mbclen(s, e, str_enc);
+ continue;
+ }
+ ss = s;
+ s += clen;
+
+ if (c != '\\' || s == e) continue;
+
+ //if (!val) {
+ if (!val.tt) {
+ val = mrb_str_buf_new(mrb, ss-p);
+ }
+ mrb_enc_str_buf_cat(mrb, val, p, ss-p, str_enc);
+
+ c = ASCGET(mrb, s, e, &clen);
+ if (c == -1) {
+ s += mbclen(s, e, str_enc);
+ mrb_enc_str_buf_cat(mrb, val, ss, s-ss, str_enc);
+ p = s;
+ continue;
+ }
+ s += clen;
+
+ p = s;
+ switch (c) {
+ case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ if (onig_noname_group_capture_is_active(RREGEXP(regexp)->ptr)) {
+ no = c - '0';
+ }
+ else {
+ continue;
+ }
+ break;
+
+ case 'k':
+ if (s < e && ASCGET(mrb, s, e, &clen) == '<') {
+ char *name, *name_end;
+
+ name_end = name = s + clen;
+ while (name_end < e) {
+ c = ASCGET(mrb, name_end, e, &clen);
+ if (c == '>') break;
+ name_end += c == -1 ? mbclen(name_end, e, str_enc) : clen;
+ }
+ if (name_end < e) {
+ no = name_to_backref_number(mrb, regs, regexp, name, name_end);
+ p = s = name_end + clen;
+ break;
+ }
+ else {
+ mrb_raise(mrb, mrb->eRuntimeError_class, "invalid group name reference format");
+ }
+ }
+
+ mrb_enc_str_buf_cat(mrb, val, ss, s-ss, str_enc);
+ continue;
+
+ case '0':
+ case '&':
+ no = 0;
+ break;
+
+ case '`':
+ mrb_enc_str_buf_cat(mrb, val, RSTRING_PTR(src), BEG(0), src_enc);
+ continue;
+
+ case '\'':
+ mrb_enc_str_buf_cat(mrb, val, RSTRING_PTR(src)+END(0), RSTRING_LEN(src)-END(0), src_enc);
+ continue;
+
+ case '+':
+ no = regs->num_regs-1;
+ while (BEG(no) == -1 && no > 0) no--;
+ if (no == 0) continue;
+ break;
+
+ case '\\':
+ mrb_enc_str_buf_cat(mrb, val, s-clen, clen, str_enc);
+ continue;
+
+ default:
+ mrb_enc_str_buf_cat(mrb, val, ss, s-ss, str_enc);
+ continue;
+ }
+
+ if (no >= 0) {
+ if (no >= regs->num_regs) continue;
+ if (BEG(no) == -1) continue;
+ mrb_enc_str_buf_cat(mrb, val, RSTRING_PTR(src)+BEG(no), END(no)-BEG(no), src_enc);
+ }
+ } /* while (s < e) { */
+
+
+ if (!val.tt) return str;
+ if (p < e) {
+ mrb_enc_str_buf_cat(mrb, val, p, e-p, str_enc);
+ }
+ return val;
+}
+
+//#define NEW_NODE(t,a0,a1,a2) mrb_node_newnode((t),(int)(a0),(int)(a1),(int)(a2))
+//#define NEW_IF(c,t,e) NEW_NODE(NODE_IF,c,t,e)
+static inline NODE *
+lfp_svar_place(mrb_state *mrb, /*mrb_thread_t *th,*/ mrb_value *lfp)
+{
+ NODE *svar;
+
+ /*if (lfp && th->local_lfp != lfp) {
+ svar = &lfp[-1];
+ }
+ else {
+ svar = mrb->&th->local_svar;
+ }*/
+ svar = mrb->local_svar;
+ /*if (mrb_nil_p(*svar)) {
+ *svar = mrb_obj_value(NEW_IF(0, 0, 0));
+ }*/
+ return svar;//(NODE *)((*svar).value.p);
+}
+
+static mrb_value
+lfp_svar_get(mrb_state *mrb, /*mrb_thread_t *th,*/ mrb_value *lfp, mrb_int key)
+{
+ //mrb_value *regs;
+ NODE *svar = lfp_svar_place(mrb, /*th,*/ lfp);
+ //regs = mrb->stack;
+
+ switch (key) {
+ case 0:
+ return svar->u1.value;
+ case 1:
+ return svar->u2.value;
+ default: {
+ return svar->u3.value;
+ /*const mrb_value hash = regs[GETARG_C(*svar)];//svar->u3.value;
+
+ if (mrb_nil_p(hash)) {
+ return mrb_nil_value();
+ }
+ else {
+ return mrb_hash_get(mrb, hash, mrb_fixnum_value(key));//mrb_hash_lookup(hash, key);
+ }*/
+ }
+ }
+}
+
+static void
+lfp_svar_set(mrb_state *mrb, /*mrb_thread_t *th,*/ mrb_value *lfp, mrb_int key, mrb_value val)
+{
+ //mrb_value *regs;
+ NODE *svar = lfp_svar_place(mrb, /*th,*/ lfp);
+ //regs = mrb->stack;
+
+ switch (key) {
+ case 0:
+ svar->u1.value = val;
+ return;
+ case 1:
+ svar->u2.value = val;
+ return;
+ default: {
+ svar->u3.value = val;
+ //mrb_value hash = *svar;//svar->u3.value;
+
+ //if (mrb_nil_p(hash)) {
+ // svar->u3.value = hash = mrb_hash_new(mrb, 0);
+ //}
+ //mrb_hash_aset(hash, key, val);
+ //mrb_hash_set(mrb, hash, mrb_fixnum_value(key), val);
+ }
+ }
+}
+
+static mrb_value
+vm_cfp_svar_get(mrb_state *mrb, /*mrb_thread_t *th, mrb_control_frame_t *cfp,*/ mrb_int key)
+{
+ //cfp = vm_normal_frame(th, cfp);
+ return lfp_svar_get(mrb, /*th, cfp ? cfp->lfp :*/ 0, key);
+}
+
+static void
+vm_cfp_svar_set(mrb_state *mrb, /*mrb_thread_t *th, mrb_control_frame_t *cfp,*/ mrb_int key, const mrb_value val)
+{
+ //cfp = vm_normal_frame(th, cfp);
+ lfp_svar_set(mrb, /*th, cfp ? cfp->lfp : */0, key, val);
+}
+
+static mrb_value
+vm_svar_get(mrb_state *mrb, mrb_int key)
+{
+ //mrb_thread_t *th = GET_THREAD();
+ return vm_cfp_svar_get(mrb,/*th, th->cfp,*/ key);
+}
+
+static void
+vm_svar_set(mrb_state *mrb, mrb_int key, mrb_value val)
+{
+ //mrb_thread_t *th = GET_THREAD();
+ vm_cfp_svar_set(mrb,/*th, th->cfp,*/ key, val);
+}
+
+
+int
+mrb_reg_backref_number(mrb_state *mrb, mrb_value match, mrb_value backref)
+{
+ return match_backref_number(mrb, match, backref);
+}
+
+mrb_value
+mrb_backref_get(mrb_state *mrb)
+{
+ return vm_svar_get(mrb, 1);
+}
+
+void
+mrb_backref_set(mrb_state *mrb, mrb_value val)
+{
+ vm_svar_set(mrb, 1, val);
+}
+#endif //INCLUDE_REGEXP
+
+#ifdef INCLUDE_ENCODING
+static inline long
+mrb_memsearch_ss(const unsigned char *xs, long m, const unsigned char *ys, long n)
+{
+ const unsigned char *x = xs, *xe = xs + m;
+ const unsigned char *y = ys, *ye = ys + n;
+#define SIZEOF_VOIDP 4
+#define SIZEOF_LONG 4
+
+#ifndef VALUE_MAX
+# if SIZEOF_VALUE == 8
+# define VALUE_MAX 0xFFFFFFFFFFFFFFFFULL
+# elif SIZEOF_VALUE == 4
+# define VALUE_MAX 0xFFFFFFFFUL
+# elif SIZEOF_LONG == SIZEOF_VOIDP
+# define SIZEOF_VALUE 4
+# define VALUE_MAX 0xFFFFFFFFUL
+# endif
+#endif
+ int hx, hy, mask = VALUE_MAX >> ((SIZEOF_VALUE - m) * CHAR_BIT);
+
+ if (m > SIZEOF_VALUE)
+ mrb_bug("!!too long pattern string!!");
+
+ /* Prepare hash value */
+ for (hx = *x++, hy = *y++; x < xe; ++x, ++y) {
+ hx <<= CHAR_BIT;
+ hy <<= CHAR_BIT;
+ hx |= *x;
+ hy |= *y;
+ }
+ /* Searching */
+ while (hx != hy) {
+ if (y == ye)
+ return -1;
+ hy <<= CHAR_BIT;
+ hy |= *y;
+ hy &= mask;
+ y++;
+ }
+ return y - ys - m;
+}
+
+static inline long
+mrb_memsearch_qs(const unsigned char *xs, long m, const unsigned char *ys, long n)
+{
+ const unsigned char *x = xs, *xe = xs + m;
+ const unsigned char *y = ys;
+ int i, qstable[256];
+
+ /* Preprocessing */
+ for (i = 0; i < 256; ++i)
+ qstable[i] = m + 1;
+ for (; x < xe; ++x)
+ qstable[*x] = xe - x;
+ /* Searching */
+ for (; y + m <= ys + n; y += *(qstable + y[m])) {
+ if (*xs == *y && memcmp(xs, y, m) == 0)
+ return y - ys;
+ }
+ return -1;
+}
+
+static inline unsigned int
+mrb_memsearch_qs_utf8_hash(const unsigned char *x)
+{
+ register const unsigned int mix = 8353;
+ register unsigned int h = *x;
+ if (h < 0xC0) {
+ return h + 256;
+ }
+ else if (h < 0xE0) {
+ h *= mix;
+ h += x[1];
+ }
+ else if (h < 0xF0) {
+ h *= mix;
+ h += x[1];
+ h *= mix;
+ h += x[2];
+ }
+ else if (h < 0xF5) {
+ h *= mix;
+ h += x[1];
+ h *= mix;
+ h += x[2];
+ h *= mix;
+ h += x[3];
+ }
+ else {
+ return h + 256;
+ }
+ return (unsigned char)h;
+}
+
+static inline long
+mrb_memsearch_qs_utf8(const unsigned char *xs, long m, const unsigned char *ys, long n)
+{
+ const unsigned char *x = xs, *xe = xs + m;
+ const unsigned char *y = ys;
+ int i, qstable[512];
+
+ /* Preprocessing */
+ for (i = 0; i < 512; ++i) {
+ qstable[i] = m + 1;
+ }
+ for (; x < xe; ++x) {
+ qstable[mrb_memsearch_qs_utf8_hash(x)] = xe - x;
+ }
+ /* Searching */
+ for (; y + m <= ys + n; y += qstable[mrb_memsearch_qs_utf8_hash(y+m)]) {
+ if (*xs == *y && memcmp(xs, y, m) == 0)
+ return y - ys;
+ }
+ return -1;
+}
+
+int
+mrb_memsearch(mrb_state *mrb, const void *x0, int m, const void *y0, int n, mrb_encoding *enc)
+{
+ const unsigned char *x = x0, *y = y0;
+
+ if (m > n) return -1;
+ else if (m == n) {
+ return memcmp(x0, y0, m) == 0 ? 0 : -1;
+ }
+ else if (m < 1) {
+ return 0;
+ }
+ else if (m == 1) {
+ const unsigned char *ys = y, *ye = ys + n;
+ for (; y < ye; ++y) {
+ if (*x == *y)
+ return y - ys;
+ }
+ return -1;
+ }
+ else if (m <= SIZEOF_VALUE) {
+ return mrb_memsearch_ss(x0, m, y0, n);
+ }
+ else if (enc == mrb_utf8_encoding(mrb)) {
+ return mrb_memsearch_qs_utf8(x0, m, y0, n);
+ }
+ else {
+ return mrb_memsearch_qs(x0, m, y0, n);
+ }
+}
+#endif //INCLUDE_ENCODING
+
+#ifdef INCLUDE_REGEXP
+mrb_value
+mrb_reg_init_str(mrb_state *mrb, mrb_value re, mrb_value s, int options)
+{
+ onig_errmsg_buffer err = "";
+
+ if (mrb_reg_initialize_str(mrb, re, s, options, err, NULL, 0) != 0) {
+ //mrb_reg_raise_str(s, options, err);
+ printf("mrb_reg_raise_str(s, options, err);");
+ }
+
+ return re;
+}
+
+mrb_value
+mrb_reg_alloc(mrb_state *mrb)
+{
+ mrb_value dummy = mrb_nil_value();
+ return mrb_reg_s_alloc(mrb, dummy);
+}
+
+mrb_value
+mrb_reg_new_str(mrb_state *mrb, mrb_value s, int options)
+{
+ return mrb_reg_init_str(mrb, mrb_reg_alloc(mrb), s, options);
+}
+
+mrb_value
+mrb_reg_regcomp(mrb_state *mrb, mrb_value str)
+{
+ mrb_value save_str = str;
+ if (reg_cache.tt && RREGEXP_SRC_LEN(reg_cache) == RSTRING_LEN(str)
+ && ENCODING_GET(mrb, reg_cache) == ENCODING_GET(mrb, str)
+ && memcmp(RREGEXP_SRC_PTR(reg_cache), RSTRING_PTR(str), RSTRING_LEN(str)) == 0)
+ return reg_cache;
+ return reg_cache = mrb_reg_new_str(mrb, save_str, 0);
+}
+
+int
+re_adjust_startpos(struct re_pattern_buffer *bufp, const char *string, int size, int startpos, int range)
+{
+ /* Update the fastmap now if not correct already. */
+ /*if (!bufp->fastmap_accurate) {
+ int ret = re_compile_fastmap0(bufp);
+ if (ret) return ret;
+ }*/
+
+ /* Adjust startpos for mbc string */
+ /*if (current_mbctype && startpos>0 && !(bufp->options&RE_OPTIMIZE_BMATCH)) {
+ startpos = re_mbc_startpos(string, size, startpos, range);
+ }*/
+ return startpos;
+}
+#endif //INCLUDE_REGEXP
+
+#ifdef INCLUDE_ENCODING
+static const unsigned char mbctab_ascii[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+const unsigned char *re_mbctab = mbctab_ascii;
+
+#define is_identchar(p,e,enc) (mrb_enc_isalnum(*p,enc) || (*p) == '_' || !ISASCII(*p))
+
+static int
+is_special_global_name(const char *m, const char *e, mrb_encoding *enc)
+{
+ int mb = 0;
+
+ if (m >= e) return 0;
+ switch (*m) {
+ case '~': case '*': case '$': case '?': case '!': case '@':
+ case '/': case '\\': case ';': case ',': case '.': case '=':
+ case ':': case '<': case '>': case '\"':
+ case '&': case '`': case '\'': case '+':
+ case '0':
+ ++m;
+ break;
+ case '-':
+ ++m;
+ if (m < e && is_identchar(m, e, enc)) {
+ if (!ISASCII(*m)) mb = 1;
+ m += mrb_enc_mbclen(m, e, enc);
+ }
+ break;
+ default:
+ if (!mrb_enc_isdigit(*m, enc)) return 0;
+ do {
+ if (!ISASCII(*m)) mb = 1;
+ ++m;
+ } while (m < e && mrb_enc_isdigit(*m, enc));
+ }
+ return m == e ? mb + 1 : 0;
+}
+
+int
+mrb_enc_symname2_p(const char *name, long len, mrb_encoding *enc)
+{
+ const char *m = name;
+ const char *e = m + len;
+ int localid = FALSE;
+
+ if (!m) return FALSE;
+ switch (*m) {
+ case '\0':
+ return FALSE;
+
+ case '$':
+ if (is_special_global_name(++m, e, enc)) return TRUE;
+ goto id;
+
+ case '@':
+ if (*++m == '@') ++m;
+ goto id;
+
+ case '<':
+ switch (*++m) {
+ case '<': ++m; break;
+ case '=': if (*++m == '>') ++m; break;
+ default: break;
+ }
+ break;
+
+ case '>':
+ switch (*++m) {
+ case '>': case '=': ++m; break;
+ }
+ break;
+
+ case '=':
+ switch (*++m) {
+ case '~': ++m; break;
+ case '=': if (*++m == '=') ++m; break;
+ default: return FALSE;
+ }
+ break;
+
+ case '*':
+ if (*++m == '*') ++m;
+ break;
+
+ case '+': case '-':
+ if (*++m == '@') ++m;
+ break;
+
+ case '|': case '^': case '&': case '/': case '%': case '~': case '`':
+ ++m;
+ break;
+
+ case '[':
+ if (*++m != ']') return FALSE;
+ if (*++m == '=') ++m;
+ break;
+
+ case '!':
+ switch (*++m) {
+ case '\0': return TRUE;
+ case '=': case '~': ++m; break;
+ default: return FALSE;
+ }
+ break;
+
+ default:
+ localid = !mrb_enc_isupper(*m, enc);
+id:
+ if (m >= e || (*m != '_' && !mrb_enc_isalpha(*m, enc) && ISASCII(*m)))
+ return FALSE;
+ while (m < e && is_identchar(m, e, enc)) m += mrb_enc_mbclen(m, e, enc);
+ if (localid) {
+ switch (*m) {
+ case '!': case '?': case '=': ++m;
+ }
+ }
+ break;
+ }
+ return m == e;
+}
+
+int
+mrb_enc_symname_p(const char *name, mrb_encoding *enc)
+{
+ return mrb_enc_symname2_p(name, strlen(name), enc);
+}
+#endif //INCLUDE_ENCODING
diff --git a/src/re.h b/src/re.h
new file mode 100644
index 000000000..e388f602c
--- /dev/null
+++ b/src/re.h
@@ -0,0 +1,85 @@
+/**********************************************************************
+
+ re.h -
+
+**********************************************************************/
+
+#ifndef RE_H
+#define RE_H
+
+//#include <sys/types.h>
+#include <stdio.h>
+
+#include "node.h"
+#include "regex.h"
+#include "encoding.h"
+
+#define BEG(no) regs->beg[no]
+#define END(no) regs->end[no]
+
+struct rmatch_offset {
+ long beg;
+ long end;
+};
+
+struct rmatch {
+ struct re_registers regs;
+
+ int char_offset_updated;
+ int char_offset_num_allocated;
+ struct rmatch_offset *char_offset;
+};
+
+//struct RMatch {
+// MRUBY_OBJECT_HEADER;
+// mrb_value str;
+// struct re_registers *regs;
+//};
+struct RMatch {
+ MRUBY_OBJECT_HEADER;
+ mrb_value str;
+ struct rmatch *rmatch;
+ mrb_value regexp; /* RRegexp */
+};
+
+struct RRegexp {
+ MRUBY_OBJECT_HEADER;
+ struct re_pattern_buffer *ptr;
+ mrb_value src;
+ unsigned long usecnt;
+};
+
+#define mrb_regex_ptr(r) ((struct RRegexp*)((r).value.p))
+#define RREGEXP(r) ((struct RRegexp*)((r).value.p))
+#define RREGEXP_SRC(r) (RREGEXP(r)->src)
+#define RREGEXP_SRC_PTR(r) (((struct RString*)(RREGEXP_SRC(r).value.p))->buf)
+#define RREGEXP_SRC_LEN(r) RSTRING_LEN(RREGEXP(r)->src)
+int re_adjust_startpos(struct re_pattern_buffer *bufp, const char *string, int size, int startpos, int range);
+
+typedef struct re_pattern_buffer Regexp;
+
+//#define RMATCH(obj) (R_CAST(RMatch)(obj))
+#define RMATCH_REGS(v) (&((struct RMatch*)((v).value.p))->rmatch->regs)
+#define RMATCH(v) ((struct RMatch*)((v).value.p))
+#define mrb_match_ptr(v) ((struct RMatch*)((v).value.p))
+
+int mrb_memcmp(const void *p1, const void *p2, int len);
+
+mrb_int mrb_reg_search (mrb_state *mrb, mrb_value, mrb_value, mrb_int, mrb_int);
+mrb_value mrb_reg_regsub (mrb_state *mrb, mrb_value, mrb_value, struct re_registers *, mrb_value);
+//mrb_value mrb_reg_regsub(mrb_value, mrb_value, struct re_registers *, mrb_value);
+mrb_int mrb_reg_adjust_startpos(mrb_state *mrb, mrb_value re, mrb_value str, mrb_int pos, mrb_int reverse);
+void mrb_match_busy (mrb_value);
+
+mrb_value mrb_reg_quote(mrb_state *mrb, mrb_value str);
+mrb_value mrb_reg_regcomp(mrb_state *mrb, mrb_value str);
+mrb_value mrb_reg_match_str(mrb_state *mrb, mrb_value re, mrb_value str);
+mrb_value mrb_reg_nth_match(mrb_state *mrb, mrb_int nth, mrb_value match);
+mrb_value mrb_backref_get(mrb_state *mrb);
+//mrb_int mrb_memsearch(const void *x0, mrb_int m, const void *y0, mrb_int n);
+mrb_value mrb_reg_to_s(mrb_state *mrb, mrb_value re);
+void mrb_backref_set(mrb_state *mrb, mrb_value val);
+mrb_value match_alloc(mrb_state *mrb);
+int mrb_reg_backref_number(mrb_state *mrb, mrb_value match, mrb_value backref);
+
+#endif
diff --git a/src/regcomp.c b/src/regcomp.c
new file mode 100644
index 000000000..3aaac4194
--- /dev/null
+++ b/src/regcomp.c
@@ -0,0 +1,6286 @@
+/**********************************************************************
+ regcomp.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "mruby.h"
+#include <string.h>
+#include "regparse.h"
+#ifdef INCLUDE_REGEXP
+
+OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
+
+extern OnigCaseFoldType
+onig_get_default_case_fold_flag(void)
+{
+ return OnigDefaultCaseFoldFlag;
+}
+
+extern int
+onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
+{
+ OnigDefaultCaseFoldFlag = case_fold_flag;
+ return 0;
+}
+
+
+#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
+static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
+#endif
+
+static UChar*
+str_dup(UChar* s, UChar* end)
+{
+ ptrdiff_t len = end - s;
+
+ if (len > 0) {
+ UChar* r = (UChar* )xmalloc(len + 1);
+ CHECK_NULL_RETURN(r);
+ xmemcpy(r, s, len);
+ r[len] = (UChar )0;
+ return r;
+ }
+ else return NULL;
+}
+
+static void
+swap_node(Node* a, Node* b)
+{
+ Node c;
+ c = *a; *a = *b; *b = c;
+
+ if (NTYPE(a) == NT_STR) {
+ StrNode* sn = NSTR(a);
+ if (sn->capa == 0) {
+ size_t len = sn->end - sn->s;
+ sn->s = sn->buf;
+ sn->end = sn->s + len;
+ }
+ }
+
+ if (NTYPE(b) == NT_STR) {
+ StrNode* sn = NSTR(b);
+ if (sn->capa == 0) {
+ size_t len = sn->end - sn->s;
+ sn->s = sn->buf;
+ sn->end = sn->s + len;
+ }
+ }
+}
+
+static OnigDistance
+distance_add(OnigDistance d1, OnigDistance d2)
+{
+ if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
+ return ONIG_INFINITE_DISTANCE;
+ else {
+ if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
+ else return ONIG_INFINITE_DISTANCE;
+ }
+}
+
+static OnigDistance
+distance_multiply(OnigDistance d, int m)
+{
+ if (m == 0) return 0;
+
+ if (d < ONIG_INFINITE_DISTANCE / m)
+ return d * m;
+ else
+ return ONIG_INFINITE_DISTANCE;
+}
+
+static int
+bitset_is_empty(BitSetRef bs)
+{
+ int i;
+ for (i = 0; i < (int )BITSET_SIZE; i++) {
+ if (bs[i] != 0) return 0;
+ }
+ return 1;
+}
+
+#ifdef ONIG_DEBUG
+static int
+bitset_on_num(BitSetRef bs)
+{
+ int i, n;
+
+ n = 0;
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ if (BITSET_AT(bs, i)) n++;
+ }
+ return n;
+}
+#endif
+
+extern int
+onig_bbuf_init(BBuf* buf, int size)
+{
+ if (size <= 0) {
+ size = 0;
+ buf->p = NULL;
+ }
+ else {
+ buf->p = (UChar* )xmalloc(size);
+ if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
+ }
+
+ buf->alloc = size;
+ buf->used = 0;
+ return 0;
+}
+
+
+#ifdef USE_SUBEXP_CALL
+
+static int
+unset_addr_list_init(UnsetAddrList* uslist, int size)
+{
+ UnsetAddr* p;
+
+ p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
+ CHECK_NULL_RETURN_MEMERR(p);
+ uslist->num = 0;
+ uslist->alloc = size;
+ uslist->us = p;
+ return 0;
+}
+
+static void
+unset_addr_list_end(UnsetAddrList* uslist)
+{
+ if (IS_NOT_NULL(uslist->us))
+ xfree(uslist->us);
+}
+
+static int
+unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
+{
+ UnsetAddr* p;
+ int size;
+
+ if (uslist->num >= uslist->alloc) {
+ size = uslist->alloc * 2;
+ p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
+ CHECK_NULL_RETURN_MEMERR(p);
+ uslist->alloc = size;
+ uslist->us = p;
+ }
+
+ uslist->us[uslist->num].offset = offset;
+ uslist->us[uslist->num].target = node;
+ uslist->num++;
+ return 0;
+}
+#endif /* USE_SUBEXP_CALL */
+
+
+static int
+add_opcode(regex_t* reg, int opcode)
+{
+ BBUF_ADD1(reg, opcode);
+ return 0;
+}
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+static int
+add_state_check_num(regex_t* reg, int num)
+{
+ StateCheckNumType n = (StateCheckNumType )num;
+
+ BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
+ return 0;
+}
+#endif
+
+static int
+add_rel_addr(regex_t* reg, int addr)
+{
+ RelAddrType ra = (RelAddrType )addr;
+
+ BBUF_ADD(reg, &ra, SIZE_RELADDR);
+ return 0;
+}
+
+static int
+add_abs_addr(regex_t* reg, int addr)
+{
+ AbsAddrType ra = (AbsAddrType )addr;
+
+ BBUF_ADD(reg, &ra, SIZE_ABSADDR);
+ return 0;
+}
+
+static int
+add_length(regex_t* reg, int len)
+{
+ LengthType l = (LengthType )len;
+
+ BBUF_ADD(reg, &l, SIZE_LENGTH);
+ return 0;
+}
+
+static int
+add_mem_num(regex_t* reg, int num)
+{
+ MemNumType n = (MemNumType )num;
+
+ BBUF_ADD(reg, &n, SIZE_MEMNUM);
+ return 0;
+}
+
+static int
+add_pointer(regex_t* reg, void* addr)
+{
+ PointerType ptr = (PointerType )addr;
+
+ BBUF_ADD(reg, &ptr, SIZE_POINTER);
+ return 0;
+}
+
+static int
+add_option(regex_t* reg, OnigOptionType option)
+{
+ BBUF_ADD(reg, &option, SIZE_OPTION);
+ return 0;
+}
+
+static int
+add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
+{
+ int r;
+
+ r = add_opcode(reg, opcode);
+ if (r) return r;
+ r = add_rel_addr(reg, addr);
+ return r;
+}
+
+static int
+add_bytes(regex_t* reg, UChar* bytes, int len)
+{
+ BBUF_ADD(reg, bytes, len);
+ return 0;
+}
+
+static int
+add_bitset(regex_t* reg, BitSetRef bs)
+{
+ BBUF_ADD(reg, bs, SIZE_BITSET);
+ return 0;
+}
+
+static int
+add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
+{
+ int r;
+
+ r = add_opcode(reg, opcode);
+ if (r) return r;
+ r = add_option(reg, option);
+ return r;
+}
+
+static int compile_length_tree(Node* node, regex_t* reg);
+static int compile_tree(Node* node, regex_t* reg);
+
+
+#define IS_NEED_STR_LEN_OP_EXACT(op) \
+ ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
+ (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
+
+static int
+select_str_opcode(int mb_len, int str_len, int ignore_case)
+{
+ int op;
+
+ if (ignore_case) {
+ switch (str_len) {
+ case 1: op = OP_EXACT1_IC; break;
+ default: op = OP_EXACTN_IC; break;
+ }
+ }
+ else {
+ switch (mb_len) {
+ case 1:
+ switch (str_len) {
+ case 1: op = OP_EXACT1; break;
+ case 2: op = OP_EXACT2; break;
+ case 3: op = OP_EXACT3; break;
+ case 4: op = OP_EXACT4; break;
+ case 5: op = OP_EXACT5; break;
+ default: op = OP_EXACTN; break;
+ }
+ break;
+
+ case 2:
+ switch (str_len) {
+ case 1: op = OP_EXACTMB2N1; break;
+ case 2: op = OP_EXACTMB2N2; break;
+ case 3: op = OP_EXACTMB2N3; break;
+ default: op = OP_EXACTMB2N; break;
+ }
+ break;
+
+ case 3:
+ op = OP_EXACTMB3N;
+ break;
+
+ default:
+ op = OP_EXACTMBN;
+ break;
+ }
+ }
+ return op;
+}
+
+static int
+compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
+{
+ int r;
+ int saved_num_null_check = reg->num_null_check;
+
+ if (empty_info != 0) {
+ r = add_opcode(reg, OP_NULL_CHECK_START);
+ if (r) return r;
+ r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
+ if (r) return r;
+ reg->num_null_check++;
+ }
+
+ r = compile_tree(node, reg);
+ if (r) return r;
+
+ if (empty_info != 0) {
+ if (empty_info == NQ_TARGET_IS_EMPTY)
+ r = add_opcode(reg, OP_NULL_CHECK_END);
+ else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
+ r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
+ else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
+ r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
+
+ if (r) return r;
+ r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
+ }
+ return r;
+}
+
+#ifdef USE_SUBEXP_CALL
+static int
+compile_call(CallNode* node, regex_t* reg)
+{
+ int r;
+
+ r = add_opcode(reg, OP_CALL);
+ if (r) return r;
+ r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
+ node->target);
+ if (r) return r;
+ r = add_abs_addr(reg, 0 /*dummy addr.*/);
+ return r;
+}
+#endif
+
+static int
+compile_tree_n_times(Node* node, int n, regex_t* reg)
+{
+ int i, r;
+
+ for (i = 0; i < n; i++) {
+ r = compile_tree(node, reg);
+ if (r) return r;
+ }
+ return 0;
+}
+
+static int
+add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance str_len,
+ regex_t* reg ARG_UNUSED, int ignore_case)
+{
+ int len;
+ int op = select_str_opcode(mb_len, str_len, ignore_case);
+
+ len = SIZE_OPCODE;
+
+ if (op == OP_EXACTMBN) len += SIZE_LENGTH;
+ if (IS_NEED_STR_LEN_OP_EXACT(op))
+ len += SIZE_LENGTH;
+
+ len += mb_len * str_len;
+ return len;
+}
+
+static int
+add_compile_string(UChar* s, int mb_len, int str_len,
+ regex_t* reg, int ignore_case)
+{
+ int op = select_str_opcode(mb_len, str_len, ignore_case);
+ add_opcode(reg, op);
+
+ if (op == OP_EXACTMBN)
+ add_length(reg, mb_len);
+
+ if (IS_NEED_STR_LEN_OP_EXACT(op)) {
+ if (op == OP_EXACTN_IC)
+ add_length(reg, mb_len * str_len);
+ else
+ add_length(reg, str_len);
+ }
+
+ add_bytes(reg, s, mb_len * str_len);
+ return 0;
+}
+
+
+static int
+compile_length_string_node(Node* node, regex_t* reg)
+{
+ int rlen, r, len, prev_len, slen, ambig;
+ OnigEncoding enc = reg->enc;
+ UChar *p, *prev;
+ StrNode* sn;
+
+ sn = NSTR(node);
+ if (sn->end <= sn->s)
+ return 0;
+
+ ambig = NSTRING_IS_AMBIG(node);
+
+ p = prev = sn->s;
+ prev_len = enclen(enc, p, sn->end);
+ p += prev_len;
+ slen = 1;
+ rlen = 0;
+
+ for (; p < sn->end; ) {
+ len = enclen(enc, p, sn->end);
+ if (len == prev_len) {
+ slen++;
+ }
+ else {
+ r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
+ rlen += r;
+ prev = p;
+ slen = 1;
+ prev_len = len;
+ }
+ p += len;
+ }
+ r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
+ rlen += r;
+ return rlen;
+}
+
+static int
+compile_length_string_raw_node(StrNode* sn, regex_t* reg)
+{
+ if (sn->end <= sn->s)
+ return 0;
+
+ return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
+}
+
+static int
+compile_string_node(Node* node, regex_t* reg)
+{
+ int r, len, prev_len, slen, ambig;
+ OnigEncoding enc = reg->enc;
+ UChar *p, *prev, *end;
+ StrNode* sn;
+
+ sn = NSTR(node);
+ if (sn->end <= sn->s)
+ return 0;
+
+ end = sn->end;
+ ambig = NSTRING_IS_AMBIG(node);
+
+ p = prev = sn->s;
+ prev_len = enclen(enc, p, end);
+ p += prev_len;
+ slen = 1;
+
+ for (; p < end; ) {
+ len = enclen(enc, p, end);
+ if (len == prev_len) {
+ slen++;
+ }
+ else {
+ r = add_compile_string(prev, prev_len, slen, reg, ambig);
+ if (r) return r;
+
+ prev = p;
+ slen = 1;
+ prev_len = len;
+ }
+
+ p += len;
+ }
+ return add_compile_string(prev, prev_len, slen, reg, ambig);
+}
+
+static int
+compile_string_raw_node(StrNode* sn, regex_t* reg)
+{
+ if (sn->end <= sn->s)
+ return 0;
+
+ return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
+}
+
+static int
+add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
+{
+#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
+ add_length(reg, mbuf->used);
+ return add_bytes(reg, mbuf->p, mbuf->used);
+#else
+ int r, pad_size;
+ UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
+
+ GET_ALIGNMENT_PAD_SIZE(p, pad_size);
+ add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
+ if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
+
+ r = add_bytes(reg, mbuf->p, mbuf->used);
+
+ /* padding for return value from compile_length_cclass_node() to be fix. */
+ pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
+ if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
+ return r;
+#endif
+}
+
+static int
+compile_length_cclass_node(CClassNode* cc, regex_t* reg)
+{
+ int len;
+
+ if (IS_NCCLASS_SHARE(cc)) {
+ len = SIZE_OPCODE + SIZE_POINTER;
+ return len;
+ }
+
+ if (IS_NULL(cc->mbuf)) {
+ len = SIZE_OPCODE + SIZE_BITSET;
+ }
+ else {
+ if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
+ len = SIZE_OPCODE;
+ }
+ else {
+ len = SIZE_OPCODE + SIZE_BITSET;
+ }
+#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
+ len += SIZE_LENGTH + cc->mbuf->used;
+#else
+ len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
+#endif
+ }
+
+ return len;
+}
+
+static int
+compile_cclass_node(CClassNode* cc, regex_t* reg)
+{
+ int r;
+
+ if (IS_NCCLASS_SHARE(cc)) {
+ add_opcode(reg, OP_CCLASS_NODE);
+ r = add_pointer(reg, cc);
+ return r;
+ }
+
+ if (IS_NULL(cc->mbuf)) {
+ if (IS_NCCLASS_NOT(cc))
+ add_opcode(reg, OP_CCLASS_NOT);
+ else
+ add_opcode(reg, OP_CCLASS);
+
+ r = add_bitset(reg, cc->bs);
+ }
+ else {
+ if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
+ if (IS_NCCLASS_NOT(cc))
+ add_opcode(reg, OP_CCLASS_MB_NOT);
+ else
+ add_opcode(reg, OP_CCLASS_MB);
+
+ r = add_multi_byte_cclass(cc->mbuf, reg);
+ }
+ else {
+ if (IS_NCCLASS_NOT(cc))
+ add_opcode(reg, OP_CCLASS_MIX_NOT);
+ else
+ add_opcode(reg, OP_CCLASS_MIX);
+
+ r = add_bitset(reg, cc->bs);
+ if (r) return r;
+ r = add_multi_byte_cclass(cc->mbuf, reg);
+ }
+ }
+
+ return r;
+}
+
+static int
+entry_repeat_range(regex_t* reg, int id, int lower, int upper)
+{
+#define REPEAT_RANGE_ALLOC 4
+
+ OnigRepeatRange* p;
+
+ if (reg->repeat_range_alloc == 0) {
+ p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
+ CHECK_NULL_RETURN_MEMERR(p);
+ reg->repeat_range = p;
+ reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
+ }
+ else if (reg->repeat_range_alloc <= id) {
+ int n;
+ n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
+ p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
+ sizeof(OnigRepeatRange) * n);
+ CHECK_NULL_RETURN_MEMERR(p);
+ reg->repeat_range = p;
+ reg->repeat_range_alloc = n;
+ }
+ else {
+ p = reg->repeat_range;
+ }
+
+ p[id].lower = lower;
+ p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
+ return 0;
+}
+
+static int
+compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
+ regex_t* reg)
+{
+ int r;
+ int num_repeat = reg->num_repeat;
+
+ r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
+ if (r) return r;
+ r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
+ reg->num_repeat++;
+ if (r) return r;
+ r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
+ if (r) return r;
+
+ r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
+ if (r) return r;
+
+ r = compile_tree_empty_check(qn->target, reg, empty_info);
+ if (r) return r;
+
+ if (
+#ifdef USE_SUBEXP_CALL
+ reg->num_call > 0 ||
+#endif
+ IS_QUANTIFIER_IN_REPEAT(qn)) {
+ r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
+ }
+ else {
+ r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
+ }
+ if (r) return r;
+ r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
+ return r;
+}
+
+static int
+is_anychar_star_quantifier(QtfrNode* qn)
+{
+ if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
+ NTYPE(qn->target) == NT_CANY)
+ return 1;
+ else
+ return 0;
+}
+
+#define QUANTIFIER_EXPAND_LIMIT_SIZE 50
+#define CKN_ON (ckn > 0)
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+
+static int
+compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
+{
+ int len, mod_tlen, cklen;
+ int ckn;
+ int infinite = IS_REPEAT_INFINITE(qn->upper);
+ int empty_info = qn->target_empty_info;
+ int tlen = compile_length_tree(qn->target, reg);
+
+ if (tlen < 0) return tlen;
+
+ ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
+
+ cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
+
+ /* anychar repeat */
+ if (NTYPE(qn->target) == NT_CANY) {
+ if (qn->greedy && infinite) {
+ if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
+ return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
+ else
+ return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
+ }
+ }
+
+ if (empty_info != 0)
+ mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
+ else
+ mod_tlen = tlen;
+
+ if (infinite && qn->lower <= 1) {
+ if (qn->greedy) {
+ if (qn->lower == 1)
+ len = SIZE_OP_JUMP;
+ else
+ len = 0;
+
+ len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
+ }
+ else {
+ if (qn->lower == 0)
+ len = SIZE_OP_JUMP;
+ else
+ len = 0;
+
+ len += mod_tlen + SIZE_OP_PUSH + cklen;
+ }
+ }
+ else if (qn->upper == 0) {
+ if (qn->is_refered != 0) /* /(?<n>..){0}/ */
+ len = SIZE_OP_JUMP + tlen;
+ else
+ len = 0;
+ }
+ else if (qn->upper == 1 && qn->greedy) {
+ if (qn->lower == 0) {
+ if (CKN_ON) {
+ len = SIZE_OP_STATE_CHECK_PUSH + tlen;
+ }
+ else {
+ len = SIZE_OP_PUSH + tlen;
+ }
+ }
+ else {
+ len = tlen;
+ }
+ }
+ else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
+ len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
+ }
+ else {
+ len = SIZE_OP_REPEAT_INC
+ + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
+ if (CKN_ON)
+ len += SIZE_OP_STATE_CHECK;
+ }
+
+ return len;
+}
+
+static int
+compile_quantifier_node(QtfrNode* qn, regex_t* reg)
+{
+ int r, mod_tlen;
+ int ckn;
+ int infinite = IS_REPEAT_INFINITE(qn->upper);
+ int empty_info = qn->target_empty_info;
+ int tlen = compile_length_tree(qn->target, reg);
+
+ if (tlen < 0) return tlen;
+
+ ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
+
+ if (is_anychar_star_quantifier(qn)) {
+ r = compile_tree_n_times(qn->target, qn->lower, reg);
+ if (r) return r;
+ if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
+ if (IS_MULTILINE(reg->options))
+ r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
+ else
+ r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
+ if (r) return r;
+ if (CKN_ON) {
+ r = add_state_check_num(reg, ckn);
+ if (r) return r;
+ }
+
+ return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
+ }
+ else {
+ if (IS_MULTILINE(reg->options)) {
+ r = add_opcode(reg, (CKN_ON ?
+ OP_STATE_CHECK_ANYCHAR_ML_STAR
+ : OP_ANYCHAR_ML_STAR));
+ }
+ else {
+ r = add_opcode(reg, (CKN_ON ?
+ OP_STATE_CHECK_ANYCHAR_STAR
+ : OP_ANYCHAR_STAR));
+ }
+ if (r) return r;
+ if (CKN_ON)
+ r = add_state_check_num(reg, ckn);
+
+ return r;
+ }
+ }
+
+ if (empty_info != 0)
+ mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
+ else
+ mod_tlen = tlen;
+
+ if (infinite && qn->lower <= 1) {
+ if (qn->greedy) {
+ if (qn->lower == 1) {
+ r = add_opcode_rel_addr(reg, OP_JUMP,
+ (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
+ if (r) return r;
+ }
+
+ if (CKN_ON) {
+ r = add_opcode(reg, OP_STATE_CHECK_PUSH);
+ if (r) return r;
+ r = add_state_check_num(reg, ckn);
+ if (r) return r;
+ r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
+ }
+ else {
+ r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
+ }
+ if (r) return r;
+ r = compile_tree_empty_check(qn->target, reg, empty_info);
+ if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_JUMP,
+ -(mod_tlen + (int )SIZE_OP_JUMP
+ + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
+ }
+ else {
+ if (qn->lower == 0) {
+ r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
+ if (r) return r;
+ }
+ r = compile_tree_empty_check(qn->target, reg, empty_info);
+ if (r) return r;
+ if (CKN_ON) {
+ r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
+ if (r) return r;
+ r = add_state_check_num(reg, ckn);
+ if (r) return r;
+ r = add_rel_addr(reg,
+ -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
+ }
+ else
+ r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
+ }
+ }
+ else if (qn->upper == 0) {
+ if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
+ r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
+ if (r) return r;
+ r = compile_tree(qn->target, reg);
+ }
+ else
+ r = 0;
+ }
+ else if (qn->upper == 1 && qn->greedy) {
+ if (qn->lower == 0) {
+ if (CKN_ON) {
+ r = add_opcode(reg, OP_STATE_CHECK_PUSH);
+ if (r) return r;
+ r = add_state_check_num(reg, ckn);
+ if (r) return r;
+ r = add_rel_addr(reg, tlen);
+ }
+ else {
+ r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
+ }
+ if (r) return r;
+ }
+
+ r = compile_tree(qn->target, reg);
+ }
+ else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
+ if (CKN_ON) {
+ r = add_opcode(reg, OP_STATE_CHECK_PUSH);
+ if (r) return r;
+ r = add_state_check_num(reg, ckn);
+ if (r) return r;
+ r = add_rel_addr(reg, SIZE_OP_JUMP);
+ }
+ else {
+ r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
+ }
+
+ if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
+ if (r) return r;
+ r = compile_tree(qn->target, reg);
+ }
+ else {
+ r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
+ if (CKN_ON) {
+ if (r) return r;
+ r = add_opcode(reg, OP_STATE_CHECK);
+ if (r) return r;
+ r = add_state_check_num(reg, ckn);
+ }
+ }
+ return r;
+}
+
+#else /* USE_COMBINATION_EXPLOSION_CHECK */
+
+static int
+compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
+{
+ int len, mod_tlen;
+ int infinite = IS_REPEAT_INFINITE(qn->upper);
+ int empty_info = qn->target_empty_info;
+ int tlen = compile_length_tree(qn->target, reg);
+
+ if (tlen < 0) return tlen;
+
+ /* anychar repeat */
+ if (NTYPE(qn->target) == NT_CANY) {
+ if (qn->greedy && infinite) {
+ if (IS_NOT_NULL(qn->next_head_exact))
+ return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
+ else
+ return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
+ }
+ }
+
+ if (empty_info != 0)
+ mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
+ else
+ mod_tlen = tlen;
+
+ if (infinite &&
+ (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
+ if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
+ len = SIZE_OP_JUMP;
+ }
+ else {
+ len = tlen * qn->lower;
+ }
+
+ if (qn->greedy) {
+ if (IS_NOT_NULL(qn->head_exact))
+ len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
+ else if (IS_NOT_NULL(qn->next_head_exact))
+ len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
+ else
+ len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
+ }
+ else
+ len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
+ }
+ else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
+ len = SIZE_OP_JUMP + tlen;
+ }
+ else if (!infinite && qn->greedy &&
+ (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
+ <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
+ len = tlen * qn->lower;
+ len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
+ }
+ else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
+ len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
+ }
+ else {
+ len = SIZE_OP_REPEAT_INC
+ + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
+ }
+
+ return len;
+}
+
+static int
+compile_quantifier_node(QtfrNode* qn, regex_t* reg)
+{
+ int i, r, mod_tlen;
+ int infinite = IS_REPEAT_INFINITE(qn->upper);
+ int empty_info = qn->target_empty_info;
+ int tlen = compile_length_tree(qn->target, reg);
+
+ if (tlen < 0) return tlen;
+
+ if (is_anychar_star_quantifier(qn)) {
+ r = compile_tree_n_times(qn->target, qn->lower, reg);
+ if (r) return r;
+ if (IS_NOT_NULL(qn->next_head_exact)) {
+ if (IS_MULTILINE(reg->options))
+ r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
+ else
+ r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
+ if (r) return r;
+ return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
+ }
+ else {
+ if (IS_MULTILINE(reg->options))
+ return add_opcode(reg, OP_ANYCHAR_ML_STAR);
+ else
+ return add_opcode(reg, OP_ANYCHAR_STAR);
+ }
+ }
+
+ if (empty_info != 0)
+ mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
+ else
+ mod_tlen = tlen;
+
+ if (infinite &&
+ (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
+ if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
+ if (qn->greedy) {
+ if (IS_NOT_NULL(qn->head_exact))
+ r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
+ else if (IS_NOT_NULL(qn->next_head_exact))
+ r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
+ else
+ r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
+ }
+ else {
+ r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
+ }
+ if (r) return r;
+ }
+ else {
+ r = compile_tree_n_times(qn->target, qn->lower, reg);
+ if (r) return r;
+ }
+
+ if (qn->greedy) {
+ if (IS_NOT_NULL(qn->head_exact)) {
+ r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
+ mod_tlen + SIZE_OP_JUMP);
+ if (r) return r;
+ add_bytes(reg, NSTR(qn->head_exact)->s, 1);
+ r = compile_tree_empty_check(qn->target, reg, empty_info);
+ if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_JUMP,
+ -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
+ }
+ else if (IS_NOT_NULL(qn->next_head_exact)) {
+ r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
+ mod_tlen + SIZE_OP_JUMP);
+ if (r) return r;
+ add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
+ r = compile_tree_empty_check(qn->target, reg, empty_info);
+ if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_JUMP,
+ -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
+ }
+ else {
+ r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
+ if (r) return r;
+ r = compile_tree_empty_check(qn->target, reg, empty_info);
+ if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_JUMP,
+ -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
+ }
+ }
+ else {
+ r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
+ if (r) return r;
+ r = compile_tree_empty_check(qn->target, reg, empty_info);
+ if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
+ }
+ }
+ else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
+ r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
+ if (r) return r;
+ r = compile_tree(qn->target, reg);
+ }
+ else if (!infinite && qn->greedy &&
+ (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
+ <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
+ int n = qn->upper - qn->lower;
+
+ r = compile_tree_n_times(qn->target, qn->lower, reg);
+ if (r) return r;
+
+ for (i = 0; i < n; i++) {
+ r = add_opcode_rel_addr(reg, OP_PUSH,
+ (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
+ if (r) return r;
+ r = compile_tree(qn->target, reg);
+ if (r) return r;
+ }
+ }
+ else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
+ r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
+ if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
+ if (r) return r;
+ r = compile_tree(qn->target, reg);
+ }
+ else {
+ r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
+ }
+ return r;
+}
+#endif /* USE_COMBINATION_EXPLOSION_CHECK */
+
+static int
+compile_length_option_node(EncloseNode* node, regex_t* reg)
+{
+ int tlen;
+ OnigOptionType prev = reg->options;
+
+ reg->options = node->option;
+ tlen = compile_length_tree(node->target, reg);
+ reg->options = prev;
+
+ if (tlen < 0) return tlen;
+
+ if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
+ return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
+ + tlen + SIZE_OP_SET_OPTION;
+ }
+ else
+ return tlen;
+}
+
+static int
+compile_option_node(EncloseNode* node, regex_t* reg)
+{
+ int r;
+ OnigOptionType prev = reg->options;
+
+ if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
+ r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
+ if (r) return r;
+ r = add_opcode_option(reg, OP_SET_OPTION, prev);
+ if (r) return r;
+ r = add_opcode(reg, OP_FAIL);
+ if (r) return r;
+ }
+
+ reg->options = node->option;
+ r = compile_tree(node->target, reg);
+ reg->options = prev;
+
+ if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
+ if (r) return r;
+ r = add_opcode_option(reg, OP_SET_OPTION, prev);
+ }
+ return r;
+}
+
+static int
+compile_length_enclose_node(EncloseNode* node, regex_t* reg)
+{
+ int len;
+ int tlen;
+
+ if (node->type == ENCLOSE_OPTION)
+ return compile_length_option_node(node, reg);
+
+ if (node->target) {
+ tlen = compile_length_tree(node->target, reg);
+ if (tlen < 0) return tlen;
+ }
+ else
+ tlen = 0;
+
+ switch (node->type) {
+ case ENCLOSE_MEMORY:
+#ifdef USE_SUBEXP_CALL
+ if (IS_ENCLOSE_CALLED(node)) {
+ len = SIZE_OP_MEMORY_START_PUSH + tlen
+ + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
+ if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
+ len += (IS_ENCLOSE_RECURSION(node)
+ ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
+ else
+ len += (IS_ENCLOSE_RECURSION(node)
+ ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
+ }
+ else
+#endif
+ {
+ if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
+ len = SIZE_OP_MEMORY_START_PUSH;
+ else
+ len = SIZE_OP_MEMORY_START;
+
+ len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
+ ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
+ }
+ break;
+
+ case ENCLOSE_STOP_BACKTRACK:
+ if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
+ QtfrNode* qn = NQTFR(node->target);
+ tlen = compile_length_tree(qn->target, reg);
+ if (tlen < 0) return tlen;
+
+ len = tlen * qn->lower
+ + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
+ }
+ else {
+ len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
+ }
+ break;
+
+ default:
+ return ONIGERR_TYPE_BUG;
+ break;
+ }
+
+ return len;
+}
+
+static int get_char_length_tree(Node* node, regex_t* reg, int* len);
+
+static int
+compile_enclose_node(EncloseNode* node, regex_t* reg)
+{
+ int r, len;
+
+ if (node->type == ENCLOSE_OPTION)
+ return compile_option_node(node, reg);
+
+ switch (node->type) {
+ case ENCLOSE_MEMORY:
+#ifdef USE_SUBEXP_CALL
+ if (IS_ENCLOSE_CALLED(node)) {
+ r = add_opcode(reg, OP_CALL);
+ if (r) return r;
+ node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
+ node->state |= NST_ADDR_FIXED;
+ r = add_abs_addr(reg, (int )node->call_addr);
+ if (r) return r;
+ len = compile_length_tree(node->target, reg);
+ len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
+ if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
+ len += (IS_ENCLOSE_RECURSION(node)
+ ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
+ else
+ len += (IS_ENCLOSE_RECURSION(node)
+ ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
+
+ r = add_opcode_rel_addr(reg, OP_JUMP, len);
+ if (r) return r;
+ }
+#endif
+ if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
+ r = add_opcode(reg, OP_MEMORY_START_PUSH);
+ else
+ r = add_opcode(reg, OP_MEMORY_START);
+ if (r) return r;
+ r = add_mem_num(reg, node->regnum);
+ if (r) return r;
+ r = compile_tree(node->target, reg);
+ if (r) return r;
+#ifdef USE_SUBEXP_CALL
+ if (IS_ENCLOSE_CALLED(node)) {
+ if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
+ r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
+ ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
+ else
+ r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
+ ? OP_MEMORY_END_REC : OP_MEMORY_END));
+
+ if (r) return r;
+ r = add_mem_num(reg, node->regnum);
+ if (r) return r;
+ r = add_opcode(reg, OP_RETURN);
+ }
+ else
+#endif
+ {
+ if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
+ r = add_opcode(reg, OP_MEMORY_END_PUSH);
+ else
+ r = add_opcode(reg, OP_MEMORY_END);
+ if (r) return r;
+ r = add_mem_num(reg, node->regnum);
+ }
+ break;
+
+ case ENCLOSE_STOP_BACKTRACK:
+ if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
+ QtfrNode* qn = NQTFR(node->target);
+ r = compile_tree_n_times(qn->target, qn->lower, reg);
+ if (r) return r;
+
+ len = compile_length_tree(qn->target, reg);
+ if (len < 0) return len;
+
+ r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
+ if (r) return r;
+ r = compile_tree(qn->target, reg);
+ if (r) return r;
+ r = add_opcode(reg, OP_POP);
+ if (r) return r;
+ r = add_opcode_rel_addr(reg, OP_JUMP,
+ -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
+ }
+ else {
+ r = add_opcode(reg, OP_PUSH_STOP_BT);
+ if (r) return r;
+ r = compile_tree(node->target, reg);
+ if (r) return r;
+ r = add_opcode(reg, OP_POP_STOP_BT);
+ }
+ break;
+
+ default:
+ return ONIGERR_TYPE_BUG;
+ break;
+ }
+
+ return r;
+}
+
+static int
+compile_length_anchor_node(AnchorNode* node, regex_t* reg)
+{
+ int len;
+ int tlen = 0;
+
+ if (node->target) {
+ tlen = compile_length_tree(node->target, reg);
+ if (tlen < 0) return tlen;
+ }
+
+ switch (node->type) {
+ case ANCHOR_PREC_READ:
+ len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
+ break;
+ case ANCHOR_PREC_READ_NOT:
+ len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
+ break;
+ case ANCHOR_LOOK_BEHIND:
+ len = SIZE_OP_LOOK_BEHIND + tlen;
+ break;
+ case ANCHOR_LOOK_BEHIND_NOT:
+ len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
+ break;
+
+ default:
+ len = SIZE_OPCODE;
+ break;
+ }
+
+ return len;
+}
+
+static int
+compile_anchor_node(AnchorNode* node, regex_t* reg)
+{
+ int r, len;
+
+ switch (node->type) {
+ case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
+ case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
+ case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
+ case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
+ case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
+ case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
+
+ case ANCHOR_WORD_BOUND: r = add_opcode(reg, OP_WORD_BOUND); break;
+ case ANCHOR_NOT_WORD_BOUND: r = add_opcode(reg, OP_NOT_WORD_BOUND); break;
+#ifdef USE_WORD_BEGIN_END
+ case ANCHOR_WORD_BEGIN: r = add_opcode(reg, OP_WORD_BEGIN); break;
+ case ANCHOR_WORD_END: r = add_opcode(reg, OP_WORD_END); break;
+#endif
+
+ case ANCHOR_PREC_READ:
+ r = add_opcode(reg, OP_PUSH_POS);
+ if (r) return r;
+ r = compile_tree(node->target, reg);
+ if (r) return r;
+ r = add_opcode(reg, OP_POP_POS);
+ break;
+
+ case ANCHOR_PREC_READ_NOT:
+ len = compile_length_tree(node->target, reg);
+ if (len < 0) return len;
+ r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
+ if (r) return r;
+ r = compile_tree(node->target, reg);
+ if (r) return r;
+ r = add_opcode(reg, OP_FAIL_POS);
+ break;
+
+ case ANCHOR_LOOK_BEHIND:
+ {
+ int n;
+ r = add_opcode(reg, OP_LOOK_BEHIND);
+ if (r) return r;
+ if (node->char_len < 0) {
+ r = get_char_length_tree(node->target, reg, &n);
+ if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
+ }
+ else
+ n = node->char_len;
+ r = add_length(reg, n);
+ if (r) return r;
+ r = compile_tree(node->target, reg);
+ }
+ break;
+
+ case ANCHOR_LOOK_BEHIND_NOT:
+ {
+ int n;
+ len = compile_length_tree(node->target, reg);
+ r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
+ len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
+ if (r) return r;
+ if (node->char_len < 0) {
+ r = get_char_length_tree(node->target, reg, &n);
+ if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
+ }
+ else
+ n = node->char_len;
+ r = add_length(reg, n);
+ if (r) return r;
+ r = compile_tree(node->target, reg);
+ if (r) return r;
+ r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
+ }
+ break;
+
+ default:
+ return ONIGERR_TYPE_BUG;
+ break;
+ }
+
+ return r;
+}
+
+static int
+compile_length_tree(Node* node, regex_t* reg)
+{
+ int len, type, r;
+
+ type = NTYPE(node);
+ switch (type) {
+ case NT_LIST:
+ len = 0;
+ do {
+ r = compile_length_tree(NCAR(node), reg);
+ if (r < 0) return r;
+ len += r;
+ } while (IS_NOT_NULL(node = NCDR(node)));
+ r = len;
+ break;
+
+ case NT_ALT:
+ {
+ int n;
+
+ n = r = 0;
+ do {
+ r += compile_length_tree(NCAR(node), reg);
+ n++;
+ } while (IS_NOT_NULL(node = NCDR(node)));
+ r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
+ }
+ break;
+
+ case NT_STR:
+ if (NSTRING_IS_RAW(node))
+ r = compile_length_string_raw_node(NSTR(node), reg);
+ else
+ r = compile_length_string_node(node, reg);
+ break;
+
+ case NT_CCLASS:
+ r = compile_length_cclass_node(NCCLASS(node), reg);
+ break;
+
+ case NT_CTYPE:
+ case NT_CANY:
+ r = SIZE_OPCODE;
+ break;
+
+ case NT_BREF:
+ {
+ BRefNode* br = NBREF(node);
+
+#ifdef USE_BACKREF_WITH_LEVEL
+ if (IS_BACKREF_NEST_LEVEL(br)) {
+ r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
+ SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
+ }
+ else
+#endif
+ if (br->back_num == 1) {
+ r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
+ ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
+ }
+ else {
+ r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
+ }
+ }
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case NT_CALL:
+ r = SIZE_OP_CALL;
+ break;
+#endif
+
+ case NT_QTFR:
+ r = compile_length_quantifier_node(NQTFR(node), reg);
+ break;
+
+ case NT_ENCLOSE:
+ r = compile_length_enclose_node(NENCLOSE(node), reg);
+ break;
+
+ case NT_ANCHOR:
+ r = compile_length_anchor_node(NANCHOR(node), reg);
+ break;
+
+ default:
+ return ONIGERR_TYPE_BUG;
+ break;
+ }
+
+ return r;
+}
+
+static int
+compile_tree(Node* node, regex_t* reg)
+{
+ int n, type, len, pos, r = 0;
+
+ type = NTYPE(node);
+ switch (type) {
+ case NT_LIST:
+ do {
+ r = compile_tree(NCAR(node), reg);
+ } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
+ break;
+
+ case NT_ALT:
+ {
+ Node* x = node;
+ len = 0;
+ do {
+ len += compile_length_tree(NCAR(x), reg);
+ if (NCDR(x) != NULL) {
+ len += SIZE_OP_PUSH + SIZE_OP_JUMP;
+ }
+ } while (IS_NOT_NULL(x = NCDR(x)));
+ pos = reg->used + len; /* goal position */
+
+ do {
+ len = compile_length_tree(NCAR(node), reg);
+ if (IS_NOT_NULL(NCDR(node))) {
+ r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
+ if (r) break;
+ }
+ r = compile_tree(NCAR(node), reg);
+ if (r) break;
+ if (IS_NOT_NULL(NCDR(node))) {
+ len = pos - (reg->used + SIZE_OP_JUMP);
+ r = add_opcode_rel_addr(reg, OP_JUMP, len);
+ if (r) break;
+ }
+ } while (IS_NOT_NULL(node = NCDR(node)));
+ }
+ break;
+
+ case NT_STR:
+ if (NSTRING_IS_RAW(node))
+ r = compile_string_raw_node(NSTR(node), reg);
+ else
+ r = compile_string_node(node, reg);
+ break;
+
+ case NT_CCLASS:
+ r = compile_cclass_node(NCCLASS(node), reg);
+ break;
+
+ case NT_CTYPE:
+ {
+ int op;
+
+ switch (NCTYPE(node)->ctype) {
+ case ONIGENC_CTYPE_WORD:
+ if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
+ else op = OP_WORD;
+ break;
+ default:
+ return ONIGERR_TYPE_BUG;
+ break;
+ }
+ r = add_opcode(reg, op);
+ }
+ break;
+
+ case NT_CANY:
+ if (IS_MULTILINE(reg->options))
+ r = add_opcode(reg, OP_ANYCHAR_ML);
+ else
+ r = add_opcode(reg, OP_ANYCHAR);
+ break;
+
+ case NT_BREF:
+ {
+ BRefNode* br = NBREF(node);
+
+#ifdef USE_BACKREF_WITH_LEVEL
+ if (IS_BACKREF_NEST_LEVEL(br)) {
+ r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
+ if (r) return r;
+ r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
+ if (r) return r;
+ r = add_length(reg, br->nest_level);
+ if (r) return r;
+
+ goto add_bacref_mems;
+ }
+ else
+#endif
+ if (br->back_num == 1) {
+ n = br->back_static[0];
+ if (IS_IGNORECASE(reg->options)) {
+ r = add_opcode(reg, OP_BACKREFN_IC);
+ if (r) return r;
+ r = add_mem_num(reg, n);
+ }
+ else {
+ switch (n) {
+ case 1: r = add_opcode(reg, OP_BACKREF1); break;
+ case 2: r = add_opcode(reg, OP_BACKREF2); break;
+ default:
+ r = add_opcode(reg, OP_BACKREFN);
+ if (r) return r;
+ r = add_mem_num(reg, n);
+ break;
+ }
+ }
+ }
+ else {
+ int i;
+ int* p;
+
+ if (IS_IGNORECASE(reg->options)) {
+ r = add_opcode(reg, OP_BACKREF_MULTI_IC);
+ }
+ else {
+ r = add_opcode(reg, OP_BACKREF_MULTI);
+ }
+ if (r) return r;
+
+#ifdef USE_BACKREF_WITH_LEVEL
+ add_bacref_mems:
+#endif
+ r = add_length(reg, br->back_num);
+ if (r) return r;
+ p = BACKREFS_P(br);
+ for (i = br->back_num - 1; i >= 0; i--) {
+ r = add_mem_num(reg, p[i]);
+ if (r) return r;
+ }
+ }
+ }
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case NT_CALL:
+ r = compile_call(NCALL(node), reg);
+ break;
+#endif
+
+ case NT_QTFR:
+ r = compile_quantifier_node(NQTFR(node), reg);
+ break;
+
+ case NT_ENCLOSE:
+ r = compile_enclose_node(NENCLOSE(node), reg);
+ break;
+
+ case NT_ANCHOR:
+ r = compile_anchor_node(NANCHOR(node), reg);
+ break;
+
+ default:
+#ifdef ONIG_DEBUG
+ fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
+#endif
+ break;
+ }
+
+ return r;
+}
+
+#ifdef USE_NAMED_GROUP
+
+static int
+noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
+{
+ int r = 0;
+ Node* node = *plink;
+
+ switch (NTYPE(node)) {
+ case NT_LIST:
+ case NT_ALT:
+ do {
+ r = noname_disable_map(&(NCAR(node)), map, counter);
+ } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
+ break;
+
+ case NT_QTFR:
+ {
+ Node** ptarget = &(NQTFR(node)->target);
+ Node* old = *ptarget;
+ r = noname_disable_map(ptarget, map, counter);
+ if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
+ onig_reduce_nested_quantifier(node, *ptarget);
+ }
+ }
+ break;
+
+ case NT_ENCLOSE:
+ {
+ EncloseNode* en = NENCLOSE(node);
+ if (en->type == ENCLOSE_MEMORY) {
+ if (IS_ENCLOSE_NAMED_GROUP(en)) {
+ (*counter)++;
+ map[en->regnum].new_val = *counter;
+ en->regnum = *counter;
+ r = noname_disable_map(&(en->target), map, counter);
+ }
+ else {
+ *plink = en->target;
+ en->target = NULL_NODE;
+ onig_node_free(node);
+ r = noname_disable_map(plink, map, counter);
+ }
+ }
+ else
+ r = noname_disable_map(&(en->target), map, counter);
+ }
+ break;
+
+ case NT_ANCHOR:
+ {
+ AnchorNode* an = NANCHOR(node);
+ switch (an->type) {
+ case ANCHOR_PREC_READ:
+ case ANCHOR_PREC_READ_NOT:
+ case ANCHOR_LOOK_BEHIND:
+ case ANCHOR_LOOK_BEHIND_NOT:
+ r = noname_disable_map(&(an->target), map, counter);
+ break;
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return r;
+}
+
+static int
+renumber_node_backref(Node* node, GroupNumRemap* map)
+{
+ int i, pos, n, old_num;
+ int *backs;
+ BRefNode* bn = NBREF(node);
+
+ if (! IS_BACKREF_NAME_REF(bn))
+ return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
+
+ old_num = bn->back_num;
+ if (IS_NULL(bn->back_dynamic))
+ backs = bn->back_static;
+ else
+ backs = bn->back_dynamic;
+
+ for (i = 0, pos = 0; i < old_num; i++) {
+ n = map[backs[i]].new_val;
+ if (n > 0) {
+ backs[pos] = n;
+ pos++;
+ }
+ }
+
+ bn->back_num = pos;
+ return 0;
+}
+
+static int
+renumber_by_map(Node* node, GroupNumRemap* map)
+{
+ int r = 0;
+
+ switch (NTYPE(node)) {
+ case NT_LIST:
+ case NT_ALT:
+ do {
+ r = renumber_by_map(NCAR(node), map);
+ } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
+ break;
+ case NT_QTFR:
+ r = renumber_by_map(NQTFR(node)->target, map);
+ break;
+ case NT_ENCLOSE:
+ r = renumber_by_map(NENCLOSE(node)->target, map);
+ break;
+
+ case NT_BREF:
+ r = renumber_node_backref(node, map);
+ break;
+
+ case NT_ANCHOR:
+ {
+ AnchorNode* an = NANCHOR(node);
+ switch (an->type) {
+ case ANCHOR_PREC_READ:
+ case ANCHOR_PREC_READ_NOT:
+ case ANCHOR_LOOK_BEHIND:
+ case ANCHOR_LOOK_BEHIND_NOT:
+ r = renumber_by_map(an->target, map);
+ break;
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return r;
+}
+
+static int
+numbered_ref_check(Node* node)
+{
+ int r = 0;
+
+ switch (NTYPE(node)) {
+ case NT_LIST:
+ case NT_ALT:
+ do {
+ r = numbered_ref_check(NCAR(node));
+ } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
+ break;
+ case NT_QTFR:
+ r = numbered_ref_check(NQTFR(node)->target);
+ break;
+ case NT_ENCLOSE:
+ r = numbered_ref_check(NENCLOSE(node)->target);
+ break;
+
+ case NT_BREF:
+ if (! IS_BACKREF_NAME_REF(NBREF(node)))
+ return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
+ break;
+
+ default:
+ break;
+ }
+
+ return r;
+}
+
+static int
+disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
+{
+ int r, i, pos, counter;
+ BitStatusType loc;
+ GroupNumRemap* map;
+
+ map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
+ CHECK_NULL_RETURN_MEMERR(map);
+ for (i = 1; i <= env->num_mem; i++) {
+ map[i].new_val = 0;
+ }
+ counter = 0;
+ r = noname_disable_map(root, map, &counter);
+ if (r != 0) return r;
+
+ r = renumber_by_map(*root, map);
+ if (r != 0) return r;
+
+ for (i = 1, pos = 1; i <= env->num_mem; i++) {
+ if (map[i].new_val > 0) {
+ SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
+ pos++;
+ }
+ }
+
+ loc = env->capture_history;
+ BIT_STATUS_CLEAR(env->capture_history);
+ for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
+ if (BIT_STATUS_AT(loc, i)) {
+ BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
+ }
+ }
+
+ env->num_mem = env->num_named;
+ reg->num_mem = env->num_named;
+
+ return onig_renumber_name_table(reg, map);
+}
+#endif /* USE_NAMED_GROUP */
+
+#ifdef USE_SUBEXP_CALL
+static int
+unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
+{
+ int i, offset;
+ EncloseNode* en;
+ AbsAddrType addr;
+
+ for (i = 0; i < uslist->num; i++) {
+ en = NENCLOSE(uslist->us[i].target);
+ if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
+ addr = en->call_addr;
+ offset = uslist->us[i].offset;
+
+ BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
+ }
+ return 0;
+}
+#endif
+
+#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
+static int
+quantifiers_memory_node_info(Node* node)
+{
+ int r = 0;
+
+ switch (NTYPE(node)) {
+ case NT_LIST:
+ case NT_ALT:
+ {
+ int v;
+ do {
+ v = quantifiers_memory_node_info(NCAR(node));
+ if (v > r) r = v;
+ } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
+ }
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case NT_CALL:
+ if (IS_CALL_RECURSION(NCALL(node))) {
+ return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
+ }
+ else
+ r = quantifiers_memory_node_info(NCALL(node)->target);
+ break;
+#endif
+
+ case NT_QTFR:
+ {
+ QtfrNode* qn = NQTFR(node);
+ if (qn->upper != 0) {
+ r = quantifiers_memory_node_info(qn->target);
+ }
+ }
+ break;
+
+ case NT_ENCLOSE:
+ {
+ EncloseNode* en = NENCLOSE(node);
+ switch (en->type) {
+ case ENCLOSE_MEMORY:
+ return NQ_TARGET_IS_EMPTY_MEM;
+ break;
+
+ case ENCLOSE_OPTION:
+ case ENCLOSE_STOP_BACKTRACK:
+ r = quantifiers_memory_node_info(en->target);
+ break;
+ default:
+ break;
+ }
+ }
+ break;
+
+ case NT_BREF:
+ case NT_STR:
+ case NT_CTYPE:
+ case NT_CCLASS:
+ case NT_CANY:
+ case NT_ANCHOR:
+ default:
+ break;
+ }
+
+ return r;
+}
+#endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
+
+static int
+get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
+{
+ OnigDistance tmin;
+ int r = 0;
+
+ *min = 0;
+ switch (NTYPE(node)) {
+ case NT_BREF:
+ {
+ int i;
+ int* backs;
+ Node** nodes = SCANENV_MEM_NODES(env);
+ BRefNode* br = NBREF(node);
+ if (br->state & NST_RECURSION) break;
+
+ backs = BACKREFS_P(br);
+ if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF;
+ r = get_min_match_length(nodes[backs[0]], min, env);
+ if (r != 0) break;
+ for (i = 1; i < br->back_num; i++) {
+ if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
+ r = get_min_match_length(nodes[backs[i]], &tmin, env);
+ if (r != 0) break;
+ if (*min > tmin) *min = tmin;
+ }
+ }
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case NT_CALL:
+ if (IS_CALL_RECURSION(NCALL(node))) {
+ EncloseNode* en = NENCLOSE(NCALL(node)->target);
+ if (IS_ENCLOSE_MIN_FIXED(en))
+ *min = en->min_len;
+ }
+ else
+ r = get_min_match_length(NCALL(node)->target, min, env);
+ break;
+#endif
+
+ case NT_LIST:
+ do {
+ r = get_min_match_length(NCAR(node), &tmin, env);
+ if (r == 0) *min += tmin;
+ } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
+ break;
+
+ case NT_ALT:
+ {
+ Node *x, *y;
+ y = node;
+ do {
+ x = NCAR(y);
+ r = get_min_match_length(x, &tmin, env);
+ if (r != 0) break;
+ if (y == node) *min = tmin;
+ else if (*min > tmin) *min = tmin;
+ } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
+ }
+ break;
+
+ case NT_STR:
+ {
+ StrNode* sn = NSTR(node);
+ *min = sn->end - sn->s;
+ }
+ break;
+
+ case NT_CTYPE:
+ *min = 1;
+ break;
+
+ case NT_CCLASS:
+ case NT_CANY:
+ *min = 1;
+ break;
+
+ case NT_QTFR:
+ {
+ QtfrNode* qn = NQTFR(node);
+
+ if (qn->lower > 0) {
+ r = get_min_match_length(qn->target, min, env);
+ if (r == 0)
+ *min = distance_multiply(*min, qn->lower);
+ }
+ }
+ break;
+
+ case NT_ENCLOSE:
+ {
+ EncloseNode* en = NENCLOSE(node);
+ switch (en->type) {
+ case ENCLOSE_MEMORY:
+#ifdef USE_SUBEXP_CALL
+ if (IS_ENCLOSE_MIN_FIXED(en))
+ *min = en->min_len;
+ else {
+ r = get_min_match_length(en->target, min, env);
+ if (r == 0) {
+ en->min_len = *min;
+ SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
+ }
+ }
+ break;
+#endif
+ case ENCLOSE_OPTION:
+ case ENCLOSE_STOP_BACKTRACK:
+ r = get_min_match_length(en->target, min, env);
+ break;
+ }
+ }
+ break;
+
+ case NT_ANCHOR:
+ default:
+ break;
+ }
+
+ return r;
+}
+
+static int
+get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
+{
+ OnigDistance tmax;
+ int r = 0;
+
+ *max = 0;
+ switch (NTYPE(node)) {
+ case NT_LIST:
+ do {
+ r = get_max_match_length(NCAR(node), &tmax, env);
+ if (r == 0)
+ *max = distance_add(*max, tmax);
+ } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
+ break;
+
+ case NT_ALT:
+ do {
+ r = get_max_match_length(NCAR(node), &tmax, env);
+ if (r == 0 && *max < tmax) *max = tmax;
+ } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
+ break;
+
+ case NT_STR:
+ {
+ StrNode* sn = NSTR(node);
+ *max = sn->end - sn->s;
+ }
+ break;
+
+ case NT_CTYPE:
+ *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
+ break;
+
+ case NT_CCLASS:
+ case NT_CANY:
+ *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
+ break;
+
+ case NT_BREF:
+ {
+ int i;
+ int* backs;
+ Node** nodes = SCANENV_MEM_NODES(env);
+ BRefNode* br = NBREF(node);
+ if (br->state & NST_RECURSION) {
+ *max = ONIG_INFINITE_DISTANCE;
+ break;
+ }
+ backs = BACKREFS_P(br);
+ for (i = 0; i < br->back_num; i++) {
+ if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
+ r = get_max_match_length(nodes[backs[i]], &tmax, env);
+ if (r != 0) break;
+ if (*max < tmax) *max = tmax;
+ }
+ }
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case NT_CALL:
+ if (! IS_CALL_RECURSION(NCALL(node)))
+ r = get_max_match_length(NCALL(node)->target, max, env);
+ else
+ *max = ONIG_INFINITE_DISTANCE;
+ break;
+#endif
+
+ case NT_QTFR:
+ {
+ QtfrNode* qn = NQTFR(node);
+
+ if (qn->upper != 0) {
+ r = get_max_match_length(qn->target, max, env);
+ if (r == 0 && *max != 0) {
+ if (! IS_REPEAT_INFINITE(qn->upper))
+ *max = distance_multiply(*max, qn->upper);
+ else
+ *max = ONIG_INFINITE_DISTANCE;
+ }
+ }
+ }
+ break;
+
+ case NT_ENCLOSE:
+ {
+ EncloseNode* en = NENCLOSE(node);
+ switch (en->type) {
+ case ENCLOSE_MEMORY:
+#ifdef USE_SUBEXP_CALL
+ if (IS_ENCLOSE_MAX_FIXED(en))
+ *max = en->max_len;
+ else {
+ r = get_max_match_length(en->target, max, env);
+ if (r == 0) {
+ en->max_len = *max;
+ SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
+ }
+ }
+ break;
+#endif
+ case ENCLOSE_OPTION:
+ case ENCLOSE_STOP_BACKTRACK:
+ r = get_max_match_length(en->target, max, env);
+ break;
+ }
+ }
+ break;
+
+ case NT_ANCHOR:
+ default:
+ break;
+ }
+
+ return r;
+}
+
+#define GET_CHAR_LEN_VARLEN -1
+#define GET_CHAR_LEN_TOP_ALT_VARLEN -2
+
+/* fixed size pattern node only */
+static int
+get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
+{
+ int tlen;
+ int r = 0;
+
+ level++;
+ *len = 0;
+ switch (NTYPE(node)) {
+ case NT_LIST:
+ do {
+ r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
+ if (r == 0)
+ *len = distance_add(*len, tlen);
+ } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
+ break;
+
+ case NT_ALT:
+ {
+ int tlen2;
+ int varlen = 0;
+
+ r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
+ while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
+ r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
+ if (r == 0) {
+ if (tlen != tlen2)
+ varlen = 1;
+ }
+ }
+ if (r == 0) {
+ if (varlen != 0) {
+ if (level == 1)
+ r = GET_CHAR_LEN_TOP_ALT_VARLEN;
+ else
+ r = GET_CHAR_LEN_VARLEN;
+ }
+ else
+ *len = tlen;
+ }
+ }
+ break;
+
+ case NT_STR:
+ {
+ StrNode* sn = NSTR(node);
+ UChar *s = sn->s;
+ while (s < sn->end) {
+ s += enclen(reg->enc, s, sn->end);
+ (*len)++;
+ }
+ }
+ break;
+
+ case NT_QTFR:
+ {
+ QtfrNode* qn = NQTFR(node);
+ if (qn->lower == qn->upper) {
+ r = get_char_length_tree1(qn->target, reg, &tlen, level);
+ if (r == 0)
+ *len = distance_multiply(tlen, qn->lower);
+ }
+ else
+ r = GET_CHAR_LEN_VARLEN;
+ }
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case NT_CALL:
+ if (! IS_CALL_RECURSION(NCALL(node)))
+ r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
+ else
+ r = GET_CHAR_LEN_VARLEN;
+ break;
+#endif
+
+ case NT_CTYPE:
+ *len = 1;
+ break;
+
+ case NT_CCLASS:
+ case NT_CANY:
+ *len = 1;
+ break;
+
+ case NT_ENCLOSE:
+ {
+ EncloseNode* en = NENCLOSE(node);
+ switch (en->type) {
+ case ENCLOSE_MEMORY:
+#ifdef USE_SUBEXP_CALL
+ if (IS_ENCLOSE_CLEN_FIXED(en))
+ *len = en->char_len;
+ else {
+ r = get_char_length_tree1(en->target, reg, len, level);
+ if (r == 0) {
+ en->char_len = *len;
+ SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
+ }
+ }
+ break;
+#endif
+ case ENCLOSE_OPTION:
+ case ENCLOSE_STOP_BACKTRACK:
+ r = get_char_length_tree1(en->target, reg, len, level);
+ break;
+ default:
+ break;
+ }
+ }
+ break;
+
+ case NT_ANCHOR:
+ break;
+
+ default:
+ r = GET_CHAR_LEN_VARLEN;
+ break;
+ }
+
+ return r;
+}
+
+static int
+get_char_length_tree(Node* node, regex_t* reg, int* len)
+{
+ return get_char_length_tree1(node, reg, len, 0);
+}
+
+/* x is not included y ==> 1 : 0 */
+static int
+is_not_included(Node* x, Node* y, regex_t* reg)
+{
+ int i, len;
+ OnigCodePoint code;
+ UChar *p, c;
+ int ytype;
+
+ retry:
+ ytype = NTYPE(y);
+ switch (NTYPE(x)) {
+ case NT_CTYPE:
+ {
+ switch (ytype) {
+ case NT_CTYPE:
+ if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
+ NCTYPE(y)->not != NCTYPE(x)->not)
+ return 1;
+ else
+ return 0;
+ break;
+
+ case NT_CCLASS:
+ swap:
+ {
+ Node* tmp;
+ tmp = x; x = y; y = tmp;
+ goto retry;
+ }
+ break;
+
+ case NT_STR:
+ goto swap;
+ break;
+
+ default:
+ break;
+ }
+ }
+ break;
+
+ case NT_CCLASS:
+ {
+ CClassNode* xc = NCCLASS(x);
+ switch (ytype) {
+ case NT_CTYPE:
+ switch (NCTYPE(y)->ctype) {
+ case ONIGENC_CTYPE_WORD:
+ if (NCTYPE(y)->not == 0) {
+ if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ if (BITSET_AT(xc->bs, i)) {
+ if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
+ }
+ }
+ return 1;
+ }
+ return 0;
+ }
+ else {
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ if (! IS_CODE_SB_WORD(reg->enc, i)) {
+ if (!IS_NCCLASS_NOT(xc)) {
+ if (BITSET_AT(xc->bs, i))
+ return 0;
+ }
+ else {
+ if (! BITSET_AT(xc->bs, i))
+ return 0;
+ }
+ }
+ }
+ return 1;
+ }
+ break;
+
+ default:
+ break;
+ }
+ break;
+
+ case NT_CCLASS:
+ {
+ int v;
+ CClassNode* yc = NCCLASS(y);
+
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ v = BITSET_AT(xc->bs, i);
+ if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
+ (v == 0 && IS_NCCLASS_NOT(xc))) {
+ v = BITSET_AT(yc->bs, i);
+ if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
+ (v == 0 && IS_NCCLASS_NOT(yc)))
+ return 0;
+ }
+ }
+ if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
+ (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
+ return 1;
+ return 0;
+ }
+ break;
+
+ case NT_STR:
+ goto swap;
+ break;
+
+ default:
+ break;
+ }
+ }
+ break;
+
+ case NT_STR:
+ {
+ StrNode* xs = NSTR(x);
+ if (NSTRING_LEN(x) == 0)
+ break;
+
+ c = *(xs->s);
+ switch (ytype) {
+ case NT_CTYPE:
+ switch (NCTYPE(y)->ctype) {
+ case ONIGENC_CTYPE_WORD:
+ if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
+ return NCTYPE(y)->not;
+ else
+ return !(NCTYPE(y)->not);
+ break;
+ default:
+ break;
+ }
+ break;
+
+ case NT_CCLASS:
+ {
+ CClassNode* cc = NCCLASS(y);
+
+ code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
+ xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
+ return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
+ }
+ break;
+
+ case NT_STR:
+ {
+ UChar *q;
+ StrNode* ys = NSTR(y);
+ len = NSTRING_LEN(x);
+ if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
+ if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
+ /* tiny version */
+ return 0;
+ }
+ else {
+ for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) {
+ if (*p != *q) return 1;
+ }
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static Node*
+get_head_value_node(Node* node, int exact, regex_t* reg)
+{
+ Node* n = NULL_NODE;
+
+ switch (NTYPE(node)) {
+ case NT_BREF:
+ case NT_ALT:
+ case NT_CANY:
+#ifdef USE_SUBEXP_CALL
+ case NT_CALL:
+#endif
+ break;
+
+ case NT_CTYPE:
+ case NT_CCLASS:
+ if (exact == 0) {
+ n = node;
+ }
+ break;
+
+ case NT_LIST:
+ n = get_head_value_node(NCAR(node), exact, reg);
+ break;
+
+ case NT_STR:
+ {
+ StrNode* sn = NSTR(node);
+
+ if (sn->end <= sn->s)
+ break;
+
+ if (exact != 0 &&
+ !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
+ }
+ else {
+ n = node;
+ }
+ }
+ break;
+
+ case NT_QTFR:
+ {
+ QtfrNode* qn = NQTFR(node);
+ if (qn->lower > 0) {
+ if (IS_NOT_NULL(qn->head_exact))
+ n = qn->head_exact;
+ else
+ n = get_head_value_node(qn->target, exact, reg);
+ }
+ }
+ break;
+
+ case NT_ENCLOSE:
+ {
+ EncloseNode* en = NENCLOSE(node);
+ switch (en->type) {
+ case ENCLOSE_OPTION:
+ {
+ OnigOptionType options = reg->options;
+
+ reg->options = NENCLOSE(node)->option;
+ n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
+ reg->options = options;
+ }
+ break;
+
+ case ENCLOSE_MEMORY:
+ case ENCLOSE_STOP_BACKTRACK:
+ n = get_head_value_node(en->target, exact, reg);
+ break;
+ }
+ }
+ break;
+
+ case NT_ANCHOR:
+ if (NANCHOR(node)->type == ANCHOR_PREC_READ)
+ n = get_head_value_node(NANCHOR(node)->target, exact, reg);
+ break;
+
+ default:
+ break;
+ }
+
+ return n;
+}
+
+static int
+check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
+{
+ int type, r = 0;
+
+ type = NTYPE(node);
+ if ((NTYPE2BIT(type) & type_mask) == 0)
+ return 1;
+
+ switch (type) {
+ case NT_LIST:
+ case NT_ALT:
+ do {
+ r = check_type_tree(NCAR(node), type_mask, enclose_mask,
+ anchor_mask);
+ } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
+ break;
+
+ case NT_QTFR:
+ r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
+ anchor_mask);
+ break;
+
+ case NT_ENCLOSE:
+ {
+ EncloseNode* en = NENCLOSE(node);
+ if ((en->type & enclose_mask) == 0)
+ return 1;
+
+ r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
+ }
+ break;
+
+ case NT_ANCHOR:
+ type = NANCHOR(node)->type;
+ if ((type & anchor_mask) == 0)
+ return 1;
+
+ if (NANCHOR(node)->target)
+ r = check_type_tree(NANCHOR(node)->target,
+ type_mask, enclose_mask, anchor_mask);
+ break;
+
+ default:
+ break;
+ }
+ return r;
+}
+
+#ifdef USE_SUBEXP_CALL
+
+#define RECURSION_EXIST 1
+#define RECURSION_INFINITE 2
+
+static int
+subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
+{
+ int type;
+ int r = 0;
+
+ type = NTYPE(node);
+ switch (type) {
+ case NT_LIST:
+ {
+ Node *x;
+ OnigDistance min;
+ int ret;
+
+ x = node;
+ do {
+ ret = subexp_inf_recursive_check(NCAR(x), env, head);
+ if (ret < 0 || ret == RECURSION_INFINITE) return ret;
+ r |= ret;
+ if (head) {
+ ret = get_min_match_length(NCAR(x), &min, env);
+ if (ret != 0) return ret;
+ if (min != 0) head = 0;
+ }
+ } while (IS_NOT_NULL(x = NCDR(x)));
+ }
+ break;
+
+ case NT_ALT:
+ {
+ int ret;
+ r = RECURSION_EXIST;
+ do {
+ ret = subexp_inf_recursive_check(NCAR(node), env, head);
+ if (ret < 0 || ret == RECURSION_INFINITE) return ret;
+ r &= ret;
+ } while (IS_NOT_NULL(node = NCDR(node)));
+ }
+ break;
+
+ case NT_QTFR:
+ r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
+ if (r == RECURSION_EXIST) {
+ if (NQTFR(node)->lower == 0) r = 0;
+ }
+ break;
+
+ case NT_ANCHOR:
+ {
+ AnchorNode* an = NANCHOR(node);
+ switch (an->type) {
+ case ANCHOR_PREC_READ:
+ case ANCHOR_PREC_READ_NOT:
+ case ANCHOR_LOOK_BEHIND:
+ case ANCHOR_LOOK_BEHIND_NOT:
+ r = subexp_inf_recursive_check(an->target, env, head);
+ break;
+ }
+ }
+ break;
+
+ case NT_CALL:
+ r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
+ break;
+
+ case NT_ENCLOSE:
+ if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
+ return 0;
+ else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
+ return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
+ else {
+ SET_ENCLOSE_STATUS(node, NST_MARK2);
+ r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
+ CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return r;
+}
+
+static int
+subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
+{
+ int type;
+ int r = 0;
+
+ type = NTYPE(node);
+ switch (type) {
+ case NT_LIST:
+ case NT_ALT:
+ do {
+ r = subexp_inf_recursive_check_trav(NCAR(node), env);
+ } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
+ break;
+
+ case NT_QTFR:
+ r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
+ break;
+
+ case NT_ANCHOR:
+ {
+ AnchorNode* an = NANCHOR(node);
+ switch (an->type) {
+ case ANCHOR_PREC_READ:
+ case ANCHOR_PREC_READ_NOT:
+ case ANCHOR_LOOK_BEHIND:
+ case ANCHOR_LOOK_BEHIND_NOT:
+ r = subexp_inf_recursive_check_trav(an->target, env);
+ break;
+ }
+ }
+ break;
+
+ case NT_ENCLOSE:
+ {
+ EncloseNode* en = NENCLOSE(node);
+
+ if (IS_ENCLOSE_RECURSION(en)) {
+ SET_ENCLOSE_STATUS(node, NST_MARK1);
+ r = subexp_inf_recursive_check(en->target, env, 1);
+ if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
+ CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
+ }
+ r = subexp_inf_recursive_check_trav(en->target, env);
+ }
+
+ break;
+
+ default:
+ break;
+ }
+
+ return r;
+}
+
+static int
+subexp_recursive_check(Node* node)
+{
+ int r = 0;
+
+ switch (NTYPE(node)) {
+ case NT_LIST:
+ case NT_ALT:
+ do {
+ r |= subexp_recursive_check(NCAR(node));
+ } while (IS_NOT_NULL(node = NCDR(node)));
+ break;
+
+ case NT_QTFR:
+ r = subexp_recursive_check(NQTFR(node)->target);
+ break;
+
+ case NT_ANCHOR:
+ {
+ AnchorNode* an = NANCHOR(node);
+ switch (an->type) {
+ case ANCHOR_PREC_READ:
+ case ANCHOR_PREC_READ_NOT:
+ case ANCHOR_LOOK_BEHIND:
+ case ANCHOR_LOOK_BEHIND_NOT:
+ r = subexp_recursive_check(an->target);
+ break;
+ }
+ }
+ break;
+
+ case NT_CALL:
+ r = subexp_recursive_check(NCALL(node)->target);
+ if (r != 0) SET_CALL_RECURSION(node);
+ break;
+
+ case NT_ENCLOSE:
+ if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
+ return 0;
+ else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
+ return 1; /* recursion */
+ else {
+ SET_ENCLOSE_STATUS(node, NST_MARK2);
+ r = subexp_recursive_check(NENCLOSE(node)->target);
+ CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return r;
+}
+
+
+static int
+subexp_recursive_check_trav(Node* node, ScanEnv* env)
+{
+#define FOUND_CALLED_NODE 1
+
+ int type;
+ int r = 0;
+
+ type = NTYPE(node);
+ switch (type) {
+ case NT_LIST:
+ case NT_ALT:
+ {
+ int ret;
+ do {
+ ret = subexp_recursive_check_trav(NCAR(node), env);
+ if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
+ else if (ret < 0) return ret;
+ } while (IS_NOT_NULL(node = NCDR(node)));
+ }
+ break;
+
+ case NT_QTFR:
+ r = subexp_recursive_check_trav(NQTFR(node)->target, env);
+ if (NQTFR(node)->upper == 0) {
+ if (r == FOUND_CALLED_NODE)
+ NQTFR(node)->is_refered = 1;
+ }
+ break;
+
+ case NT_ANCHOR:
+ {
+ AnchorNode* an = NANCHOR(node);
+ switch (an->type) {
+ case ANCHOR_PREC_READ:
+ case ANCHOR_PREC_READ_NOT:
+ case ANCHOR_LOOK_BEHIND:
+ case ANCHOR_LOOK_BEHIND_NOT:
+ r = subexp_recursive_check_trav(an->target, env);
+ break;
+ }
+ }
+ break;
+
+ case NT_ENCLOSE:
+ {
+ EncloseNode* en = NENCLOSE(node);
+
+ if (! IS_ENCLOSE_RECURSION(en)) {
+ if (IS_ENCLOSE_CALLED(en)) {
+ SET_ENCLOSE_STATUS(node, NST_MARK1);
+ r = subexp_recursive_check(en->target);
+ if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
+ CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
+ }
+ }
+ r = subexp_recursive_check_trav(en->target, env);
+ if (IS_ENCLOSE_CALLED(en))
+ r |= FOUND_CALLED_NODE;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return r;
+}
+
+static int
+setup_subexp_call(Node* node, ScanEnv* env)
+{
+ int type;
+ int r = 0;
+
+ type = NTYPE(node);
+ switch (type) {
+ case NT_LIST:
+ do {
+ r = setup_subexp_call(NCAR(node), env);
+ } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
+ break;
+
+ case NT_ALT:
+ do {
+ r = setup_subexp_call(NCAR(node), env);
+ } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
+ break;
+
+ case NT_QTFR:
+ r = setup_subexp_call(NQTFR(node)->target, env);
+ break;
+ case NT_ENCLOSE:
+ r = setup_subexp_call(NENCLOSE(node)->target, env);
+ break;
+
+ case NT_CALL:
+ {
+ CallNode* cn = NCALL(node);
+ Node** nodes = SCANENV_MEM_NODES(env);
+
+ if (cn->group_num != 0) {
+ int gnum = cn->group_num;
+
+#ifdef USE_NAMED_GROUP
+ if (env->num_named > 0 &&
+ IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
+ !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
+ return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
+ }
+#endif
+ if (gnum > env->num_mem) {
+ onig_scan_env_set_error_string(env,
+ ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
+ return ONIGERR_UNDEFINED_GROUP_REFERENCE;
+ }
+
+#ifdef USE_NAMED_GROUP
+ set_call_attr:
+#endif
+ cn->target = nodes[cn->group_num];
+ if (IS_NULL(cn->target)) {
+ onig_scan_env_set_error_string(env,
+ ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
+ return ONIGERR_UNDEFINED_NAME_REFERENCE;
+ }
+ SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
+ BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
+ cn->unset_addr_list = env->unset_addr_list;
+ }
+#ifdef USE_NAMED_GROUP
+ else {
+ int *refs;
+
+ int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
+ &refs);
+ if (n <= 0) {
+ onig_scan_env_set_error_string(env,
+ ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
+ return ONIGERR_UNDEFINED_NAME_REFERENCE;
+ }
+ else if (n > 1) {
+ onig_scan_env_set_error_string(env,
+ ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
+ return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
+ }
+ else {
+ cn->group_num = refs[0];
+ goto set_call_attr;
+ }
+ }
+#endif
+ }
+ break;
+
+ case NT_ANCHOR:
+ {
+ AnchorNode* an = NANCHOR(node);
+
+ switch (an->type) {
+ case ANCHOR_PREC_READ:
+ case ANCHOR_PREC_READ_NOT:
+ case ANCHOR_LOOK_BEHIND:
+ case ANCHOR_LOOK_BEHIND_NOT:
+ r = setup_subexp_call(an->target, env);
+ break;
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return r;
+}
+#endif
+
+/* divide different length alternatives in look-behind.
+ (?<=A|B) ==> (?<=A)|(?<=B)
+ (?<!A|B) ==> (?<!A)(?<!B)
+*/
+static int
+divide_look_behind_alternatives(Node* node)
+{
+ Node *head, *np, *insert_node;
+ AnchorNode* an = NANCHOR(node);
+ int anc_type = an->type;
+
+ head = an->target;
+ np = NCAR(head);
+ swap_node(node, head);
+ NCAR(node) = head;
+ NANCHOR(head)->target = np;
+
+ np = node;
+ while ((np = NCDR(np)) != NULL_NODE) {
+ insert_node = onig_node_new_anchor(anc_type);
+ CHECK_NULL_RETURN_MEMERR(insert_node);
+ NANCHOR(insert_node)->target = NCAR(np);
+ NCAR(np) = insert_node;
+ }
+
+ if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
+ np = node;
+ do {
+ SET_NTYPE(np, NT_LIST); /* alt -> list */
+ } while ((np = NCDR(np)) != NULL_NODE);
+ }
+ return 0;
+}
+
+static int
+setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
+{
+ int r, len;
+ AnchorNode* an = NANCHOR(node);
+
+ r = get_char_length_tree(an->target, reg, &len);
+ if (r == 0)
+ an->char_len = len;
+ else if (r == GET_CHAR_LEN_VARLEN)
+ r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
+ else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
+ r = divide_look_behind_alternatives(node);
+ else
+ r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
+ }
+
+ return r;
+}
+
+static int
+next_setup(Node* node, Node* next_node, regex_t* reg)
+{
+ int type;
+
+ retry:
+ type = NTYPE(node);
+ if (type == NT_QTFR) {
+ QtfrNode* qn = NQTFR(node);
+ if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
+#ifdef USE_QTFR_PEEK_NEXT
+ Node* n = get_head_value_node(next_node, 1, reg);
+ /* '\0': for UTF-16BE etc... */
+ if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
+ qn->next_head_exact = n;
+ }
+#endif
+ /* automatic posseivation a*b ==> (?>a*)b */
+ if (qn->lower <= 1) {
+ int ttype = NTYPE(qn->target);
+ if (IS_NODE_TYPE_SIMPLE(ttype)) {
+ Node *x, *y;
+ x = get_head_value_node(qn->target, 0, reg);
+ if (IS_NOT_NULL(x)) {
+ y = get_head_value_node(next_node, 0, reg);
+ if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
+ Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
+ CHECK_NULL_RETURN_MEMERR(en);
+ SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
+ swap_node(node, en);
+ NENCLOSE(node)->target = en;
+ }
+ }
+ }
+ }
+ }
+ }
+ else if (type == NT_ENCLOSE) {
+ EncloseNode* en = NENCLOSE(node);
+ if (en->type == ENCLOSE_MEMORY) {
+ node = en->target;
+ goto retry;
+ }
+ }
+ return 0;
+}
+
+
+static int
+update_string_node_case_fold(regex_t* reg, Node *node)
+{
+ UChar *p, *q, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
+ UChar *sbuf, *ebuf, *sp;
+ int r, i, len, sbuf_size;
+ StrNode* sn = NSTR(node);
+
+ end = sn->end;
+ sbuf_size = (end - sn->s) * 2;
+ sbuf = (UChar* )xmalloc(sbuf_size);
+ CHECK_NULL_RETURN_MEMERR(sbuf);
+ ebuf = sbuf + sbuf_size;
+
+ sp = sbuf;
+ p = sn->s;
+ while (p < end) {
+ len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
+ q = buf;
+ for (i = 0; i < len; i++) {
+ if (sp >= ebuf) {
+ sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2);
+ CHECK_NULL_RETURN_MEMERR(sbuf);
+ sp = sbuf + sbuf_size;
+ sbuf_size *= 2;
+ ebuf = sbuf + sbuf_size;
+ }
+
+ *sp++ = buf[i];
+ }
+ }
+
+ r = onig_node_str_set(node, sbuf, sp);
+ if (r != 0) {
+ xfree(sbuf);
+ return r;
+ }
+
+ xfree(sbuf);
+ return 0;
+}
+
+static int
+expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
+ regex_t* reg)
+{
+ int r;
+ Node *node;
+
+ node = onig_node_new_str(s, end);
+ if (IS_NULL(node)) return ONIGERR_MEMORY;
+
+ r = update_string_node_case_fold(reg, node);
+ if (r != 0) {
+ onig_node_free(node);
+ return r;
+ }
+
+ NSTRING_SET_AMBIG(node);
+ NSTRING_SET_DONT_GET_OPT_INFO(node);
+ *rnode = node;
+ return 0;
+}
+
+static int
+expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
+ UChar *p, int slen, UChar *end,
+ regex_t* reg, Node **rnode)
+{
+ int r, i, j, len, varlen;
+ Node *anode, *var_anode, *snode, *xnode, *an;
+ UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
+
+ *rnode = var_anode = NULL_NODE;
+
+ varlen = 0;
+ for (i = 0; i < item_num; i++) {
+ if (items[i].byte_len != slen) {
+ varlen = 1;
+ break;
+ }
+ }
+
+ if (varlen != 0) {
+ *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
+ if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
+
+ xnode = onig_node_new_list(NULL, NULL);
+ if (IS_NULL(xnode)) goto mem_err;
+ NCAR(var_anode) = xnode;
+
+ anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
+ if (IS_NULL(anode)) goto mem_err;
+ NCAR(xnode) = anode;
+ }
+ else {
+ *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
+ if (IS_NULL(anode)) return ONIGERR_MEMORY;
+ }
+
+ snode = onig_node_new_str(p, p + slen);
+ if (IS_NULL(snode)) goto mem_err;
+
+ NCAR(anode) = snode;
+
+ for (i = 0; i < item_num; i++) {
+ snode = onig_node_new_str(NULL, NULL);
+ if (IS_NULL(snode)) goto mem_err;
+
+ for (j = 0; j < items[i].code_len; j++) {
+ len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
+ if (len < 0) {
+ r = len;
+ goto mem_err2;
+ }
+
+ r = onig_node_str_cat(snode, buf, buf + len);
+ if (r != 0) goto mem_err2;
+ }
+
+ an = onig_node_new_alt(NULL_NODE, NULL_NODE);
+ if (IS_NULL(an)) {
+ goto mem_err2;
+ }
+
+ if (items[i].byte_len != slen) {
+ Node *rem;
+ UChar *q = p + items[i].byte_len;
+
+ if (q < end) {
+ r = expand_case_fold_make_rem_string(&rem, q, end, reg);
+ if (r != 0) {
+ onig_node_free(an);
+ goto mem_err2;
+ }
+
+ xnode = onig_node_list_add(NULL_NODE, snode);
+ if (IS_NULL(xnode)) {
+ onig_node_free(an);
+ onig_node_free(rem);
+ goto mem_err2;
+ }
+ if (IS_NULL(onig_node_list_add(xnode, rem))) {
+ onig_node_free(an);
+ onig_node_free(xnode);
+ onig_node_free(rem);
+ goto mem_err;
+ }
+
+ NCAR(an) = xnode;
+ }
+ else {
+ NCAR(an) = snode;
+ }
+
+ NCDR(var_anode) = an;
+ var_anode = an;
+ }
+ else {
+ NCAR(an) = snode;
+ NCDR(anode) = an;
+ anode = an;
+ }
+ }
+
+ return varlen;
+
+ mem_err2:
+ onig_node_free(snode);
+
+ mem_err:
+ onig_node_free(*rnode);
+
+ return ONIGERR_MEMORY;
+}
+
+static int
+expand_case_fold_string(Node* node, regex_t* reg)
+{
+#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
+
+ int r, n, len, alt_num;
+ UChar *start, *end, *p;
+ Node *top_root, *root, *snode, *prev_node;
+ OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
+ StrNode* sn = NSTR(node);
+
+ if (NSTRING_IS_AMBIG(node)) return 0;
+
+ start = sn->s;
+ end = sn->end;
+ if (start >= end) return 0;
+
+ r = 0;
+ top_root = root = prev_node = snode = NULL_NODE;
+ alt_num = 1;
+ p = start;
+ while (p < end) {
+ n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
+ p, end, items);
+ if (n < 0) {
+ r = n;
+ goto err;
+ }
+
+ len = enclen(reg->enc, p, end);
+
+ if (n == 0) {
+ if (IS_NULL(snode)) {
+ if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
+ top_root = root = onig_node_list_add(NULL_NODE, prev_node);
+ if (IS_NULL(root)) {
+ onig_node_free(prev_node);
+ goto mem_err;
+ }
+ }
+
+ prev_node = snode = onig_node_new_str(NULL, NULL);
+ if (IS_NULL(snode)) goto mem_err;
+ if (IS_NOT_NULL(root)) {
+ if (IS_NULL(onig_node_list_add(root, snode))) {
+ onig_node_free(snode);
+ goto mem_err;
+ }
+ }
+ }
+
+ r = onig_node_str_cat(snode, p, p + len);
+ if (r != 0) goto err;
+ }
+ else {
+ alt_num *= (n + 1);
+ if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
+
+ if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
+ top_root = root = onig_node_list_add(NULL_NODE, prev_node);
+ if (IS_NULL(root)) {
+ onig_node_free(prev_node);
+ goto mem_err;
+ }
+ }
+
+ r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
+ if (r < 0) goto mem_err;
+ if (r == 1) {
+ if (IS_NULL(root)) {
+ top_root = prev_node;
+ }
+ else {
+ if (IS_NULL(onig_node_list_add(root, prev_node))) {
+ onig_node_free(prev_node);
+ goto mem_err;
+ }
+ }
+
+ root = NCAR(prev_node);
+ }
+ else { /* r == 0 */
+ if (IS_NOT_NULL(root)) {
+ if (IS_NULL(onig_node_list_add(root, prev_node))) {
+ onig_node_free(prev_node);
+ goto mem_err;
+ }
+ }
+ }
+
+ snode = NULL_NODE;
+ }
+
+ p += len;
+ }
+
+ if (p < end) {
+ Node *srem;
+
+ r = expand_case_fold_make_rem_string(&srem, p, end, reg);
+ if (r != 0) goto mem_err;
+
+ if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
+ top_root = root = onig_node_list_add(NULL_NODE, prev_node);
+ if (IS_NULL(root)) {
+ onig_node_free(srem);
+ onig_node_free(prev_node);
+ goto mem_err;
+ }
+ }
+
+ if (IS_NULL(root)) {
+ prev_node = srem;
+ }
+ else {
+ if (IS_NULL(onig_node_list_add(root, srem))) {
+ onig_node_free(srem);
+ goto mem_err;
+ }
+ }
+ }
+
+ /* ending */
+ top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
+ swap_node(node, top_root);
+ onig_node_free(top_root);
+ return 0;
+
+ mem_err:
+ r = ONIGERR_MEMORY;
+
+ err:
+ onig_node_free(top_root);
+ return r;
+}
+
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+
+#define CEC_THRES_NUM_BIG_REPEAT 512
+#define CEC_INFINITE_NUM 0x7fffffff
+
+#define CEC_IN_INFINITE_REPEAT (1<<0)
+#define CEC_IN_FINITE_REPEAT (1<<1)
+#define CEC_CONT_BIG_REPEAT (1<<2)
+
+static int
+setup_comb_exp_check(Node* node, int state, ScanEnv* env)
+{
+ int type;
+ int r = state;
+
+ type = NTYPE(node);
+ switch (type) {
+ case NT_LIST:
+ {
+ Node* prev = NULL_NODE;
+ do {
+ r = setup_comb_exp_check(NCAR(node), r, env);
+ prev = NCAR(node);
+ } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
+ }
+ break;
+
+ case NT_ALT:
+ {
+ int ret;
+ do {
+ ret = setup_comb_exp_check(NCAR(node), state, env);
+ r |= ret;
+ } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
+ }
+ break;
+
+ case NT_QTFR:
+ {
+ int child_state = state;
+ int add_state = 0;
+ QtfrNode* qn = NQTFR(node);
+ Node* target = qn->target;
+ int var_num;
+
+ if (! IS_REPEAT_INFINITE(qn->upper)) {
+ if (qn->upper > 1) {
+ /* {0,1}, {1,1} are allowed */
+ child_state |= CEC_IN_FINITE_REPEAT;
+
+ /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
+ if (env->backrefed_mem == 0) {
+ if (NTYPE(qn->target) == NT_ENCLOSE) {
+ EncloseNode* en = NENCLOSE(qn->target);
+ if (en->type == ENCLOSE_MEMORY) {
+ if (NTYPE(en->target) == NT_QTFR) {
+ QtfrNode* q = NQTFR(en->target);
+ if (IS_REPEAT_INFINITE(q->upper)
+ && q->greedy == qn->greedy) {
+ qn->upper = (qn->lower == 0 ? 1 : qn->lower);
+ if (qn->upper == 1)
+ child_state = state;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (state & CEC_IN_FINITE_REPEAT) {
+ qn->comb_exp_check_num = -1;
+ }
+ else {
+ if (IS_REPEAT_INFINITE(qn->upper)) {
+ var_num = CEC_INFINITE_NUM;
+ child_state |= CEC_IN_INFINITE_REPEAT;
+ }
+ else {
+ var_num = qn->upper - qn->lower;
+ }
+
+ if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
+ add_state |= CEC_CONT_BIG_REPEAT;
+
+ if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
+ ((state & CEC_CONT_BIG_REPEAT) != 0 &&
+ var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
+ if (qn->comb_exp_check_num == 0) {
+ env->num_comb_exp_check++;
+ qn->comb_exp_check_num = env->num_comb_exp_check;
+ if (env->curr_max_regnum > env->comb_exp_max_regnum)
+ env->comb_exp_max_regnum = env->curr_max_regnum;
+ }
+ }
+ }
+
+ r = setup_comb_exp_check(target, child_state, env);
+ r |= add_state;
+ }
+ break;
+
+ case NT_ENCLOSE:
+ {
+ EncloseNode* en = NENCLOSE(node);
+
+ switch (en->type) {
+ case ENCLOSE_MEMORY:
+ {
+ if (env->curr_max_regnum < en->regnum)
+ env->curr_max_regnum = en->regnum;
+
+ r = setup_comb_exp_check(en->target, state, env);
+ }
+ break;
+
+ default:
+ r = setup_comb_exp_check(en->target, state, env);
+ break;
+ }
+ }
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case NT_CALL:
+ if (IS_CALL_RECURSION(NCALL(node)))
+ env->has_recursion = 1;
+ else
+ r = setup_comb_exp_check(NCALL(node)->target, state, env);
+ break;
+#endif
+
+ default:
+ break;
+ }
+
+ return r;
+}
+#endif
+
+#define IN_ALT (1<<0)
+#define IN_NOT (1<<1)
+#define IN_REPEAT (1<<2)
+#define IN_VAR_REPEAT (1<<3)
+
+/* setup_tree does the following work.
+ 1. check empty loop. (set qn->target_empty_info)
+ 2. expand ignore-case in char class.
+ 3. set memory status bit flags. (reg->mem_stats)
+ 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
+ 5. find invalid patterns in look-behind.
+ 6. expand repeated string.
+ */
+static int
+setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
+{
+ int type;
+ int r = 0;
+
+ type = NTYPE(node);
+ switch (type) {
+ case NT_LIST:
+ {
+ Node* prev = NULL_NODE;
+ do {
+ r = setup_tree(NCAR(node), reg, state, env);
+ if (IS_NOT_NULL(prev) && r == 0) {
+ r = next_setup(prev, NCAR(node), reg);
+ }
+ prev = NCAR(node);
+ } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
+ }
+ break;
+
+ case NT_ALT:
+ do {
+ r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
+ } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
+ break;
+
+ case NT_CCLASS:
+ break;
+
+ case NT_STR:
+ if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
+ r = expand_case_fold_string(node, reg);
+ }
+ break;
+
+ case NT_CTYPE:
+ case NT_CANY:
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case NT_CALL:
+ break;
+#endif
+
+ case NT_BREF:
+ {
+ int i;
+ int* p;
+ Node** nodes = SCANENV_MEM_NODES(env);
+ BRefNode* br = NBREF(node);
+ p = BACKREFS_P(br);
+ for (i = 0; i < br->back_num; i++) {
+ if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
+ BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
+ BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
+#ifdef USE_BACKREF_WITH_LEVEL
+ if (IS_BACKREF_NEST_LEVEL(br)) {
+ BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
+ }
+#endif
+ SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
+ }
+ }
+ break;
+
+ case NT_QTFR:
+ {
+ OnigDistance d;
+ QtfrNode* qn = NQTFR(node);
+ Node* target = qn->target;
+
+ if ((state & IN_REPEAT) != 0) {
+ qn->state |= NST_IN_REPEAT;
+ }
+
+ if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
+ r = get_min_match_length(target, &d, env);
+ if (r) break;
+ if (d == 0) {
+ qn->target_empty_info = NQ_TARGET_IS_EMPTY;
+#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
+ r = quantifiers_memory_node_info(target);
+ if (r < 0) break;
+ if (r > 0) {
+ qn->target_empty_info = r;
+ }
+#endif
+ }
+ }
+
+ state |= IN_REPEAT;
+ if (qn->lower != qn->upper)
+ state |= IN_VAR_REPEAT;
+ r = setup_tree(target, reg, state, env);
+ if (r) break;
+
+ /* expand string */
+#define EXPAND_STRING_MAX_LENGTH 100
+ if (NTYPE(target) == NT_STR) {
+ if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper &&
+ qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {
+ int len = NSTRING_LEN(target);
+ StrNode* sn = NSTR(target);
+
+ if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {
+ int i, n = qn->lower;
+ onig_node_conv_to_str_node(node, NSTR(target)->flag);
+ for (i = 0; i < n; i++) {
+ r = onig_node_str_cat(node, sn->s, sn->end);
+ if (r) break;
+ }
+ onig_node_free(target);
+ break; /* break case NT_QTFR: */
+ }
+ }
+ }
+
+#ifdef USE_OP_PUSH_OR_JUMP_EXACT
+ if (qn->greedy && (qn->target_empty_info != 0)) {
+ if (NTYPE(target) == NT_QTFR) {
+ QtfrNode* tqn = NQTFR(target);
+ if (IS_NOT_NULL(tqn->head_exact)) {
+ qn->head_exact = tqn->head_exact;
+ tqn->head_exact = NULL;
+ }
+ }
+ else {
+ qn->head_exact = get_head_value_node(qn->target, 1, reg);
+ }
+ }
+#endif
+ }
+ break;
+
+ case NT_ENCLOSE:
+ {
+ EncloseNode* en = NENCLOSE(node);
+
+ switch (en->type) {
+ case ENCLOSE_OPTION:
+ {
+ OnigOptionType options = reg->options;
+ reg->options = NENCLOSE(node)->option;
+ r = setup_tree(NENCLOSE(node)->target, reg, state, env);
+ reg->options = options;
+ }
+ break;
+
+ case ENCLOSE_MEMORY:
+ if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {
+ BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
+ /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
+ }
+ r = setup_tree(en->target, reg, state, env);
+ break;
+
+ case ENCLOSE_STOP_BACKTRACK:
+ {
+ Node* target = en->target;
+ r = setup_tree(target, reg, state, env);
+ if (NTYPE(target) == NT_QTFR) {
+ QtfrNode* tqn = NQTFR(target);
+ if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
+ tqn->greedy != 0) { /* (?>a*), a*+ etc... */
+ int qtype = NTYPE(tqn->target);
+ if (IS_NODE_TYPE_SIMPLE(qtype))
+ SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
+ }
+ }
+ }
+ break;
+ }
+ }
+ break;
+
+ case NT_ANCHOR:
+ {
+ AnchorNode* an = NANCHOR(node);
+
+ switch (an->type) {
+ case ANCHOR_PREC_READ:
+ r = setup_tree(an->target, reg, state, env);
+ break;
+ case ANCHOR_PREC_READ_NOT:
+ r = setup_tree(an->target, reg, (state | IN_NOT), env);
+ break;
+
+/* allowed node types in look-behind */
+#define ALLOWED_TYPE_IN_LB \
+ ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
+ BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
+
+#define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY )
+#define ALLOWED_ENCLOSE_IN_LB_NOT 0
+
+#define ALLOWED_ANCHOR_IN_LB \
+( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
+#define ALLOWED_ANCHOR_IN_LB_NOT \
+( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
+
+ case ANCHOR_LOOK_BEHIND:
+ {
+ r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
+ ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
+ if (r < 0) return r;
+ if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
+ r = setup_look_behind(node, reg, env);
+ if (r != 0) return r;
+ r = setup_tree(an->target, reg, state, env);
+ }
+ break;
+
+ case ANCHOR_LOOK_BEHIND_NOT:
+ {
+ r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
+ ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
+ if (r < 0) return r;
+ if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
+ r = setup_look_behind(node, reg, env);
+ if (r != 0) return r;
+ r = setup_tree(an->target, reg, (state | IN_NOT), env);
+ }
+ break;
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return r;
+}
+
+/* set skip map for Boyer-Moor search */
+static int
+set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
+ UChar skip[], int** int_skip)
+{
+ int i, len;
+
+ len = end - s;
+ if (len < ONIG_CHAR_TABLE_SIZE) {
+ for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len;
+
+ for (i = 0; i < len - 1; i++)
+ skip[s[i]] = len - 1 - i;
+ }
+ else {
+ if (IS_NULL(*int_skip)) {
+ *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
+ if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
+ }
+ for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len;
+
+ for (i = 0; i < len - 1; i++)
+ (*int_skip)[s[i]] = len - 1 - i;
+ }
+ return 0;
+}
+
+#define OPT_EXACT_MAXLEN 24
+
+typedef struct {
+ OnigDistance min; /* min byte length */
+ OnigDistance max; /* max byte length */
+} MinMaxLen;
+
+typedef struct {
+ MinMaxLen mmd;
+ OnigEncoding enc;
+ OnigOptionType options;
+ OnigCaseFoldType case_fold_flag;
+ ScanEnv* scan_env;
+} OptEnv;
+
+typedef struct {
+ int left_anchor;
+ int right_anchor;
+} OptAncInfo;
+
+typedef struct {
+ MinMaxLen mmd; /* info position */
+ OptAncInfo anc;
+
+ int reach_end;
+ int ignore_case;
+ int len;
+ UChar s[OPT_EXACT_MAXLEN];
+} OptExactInfo;
+
+typedef struct {
+ MinMaxLen mmd; /* info position */
+ OptAncInfo anc;
+
+ int value; /* weighted value */
+ UChar map[ONIG_CHAR_TABLE_SIZE];
+} OptMapInfo;
+
+typedef struct {
+ MinMaxLen len;
+
+ OptAncInfo anc;
+ OptExactInfo exb; /* boundary */
+ OptExactInfo exm; /* middle */
+ OptExactInfo expr; /* prec read (?=...) */
+
+ OptMapInfo map; /* boundary */
+} NodeOptInfo;
+
+
+static int
+map_position_value(OnigEncoding enc, int i)
+{
+ static const short int ByteValTable[] = {
+ 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
+ 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
+ 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
+ };
+
+ if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) {
+ if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
+ return 20;
+ else
+ return (int )ByteValTable[i];
+ }
+ else
+ return 4; /* Take it easy. */
+}
+
+static int
+distance_value(MinMaxLen* mm)
+{
+ /* 1000 / (min-max-dist + 1) */
+ static const short int dist_vals[] = {
+ 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
+ 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
+ 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
+ 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
+ 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
+ 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
+ 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
+ 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
+ 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
+ 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
+ };
+
+ int d;
+
+ if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
+
+ d = mm->max - mm->min;
+ if (d < (int )(sizeof(dist_vals)/sizeof(dist_vals[0])))
+ /* return dist_vals[d] * 16 / (mm->min + 12); */
+ return (int )dist_vals[d];
+ else
+ return 1;
+}
+
+static int
+comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
+{
+ if (v2 <= 0) return -1;
+ if (v1 <= 0) return 1;
+
+ v1 *= distance_value(d1);
+ v2 *= distance_value(d2);
+
+ if (v2 > v1) return 1;
+ if (v2 < v1) return -1;
+
+ if (d2->min < d1->min) return 1;
+ if (d2->min > d1->min) return -1;
+ return 0;
+}
+
+static int
+is_equal_mml(MinMaxLen* a, MinMaxLen* b)
+{
+ return (a->min == b->min && a->max == b->max) ? 1 : 0;
+}
+
+
+static void
+set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
+{
+ mml->min = min;
+ mml->max = max;
+}
+
+static void
+clear_mml(MinMaxLen* mml)
+{
+ mml->min = mml->max = 0;
+}
+
+static void
+copy_mml(MinMaxLen* to, MinMaxLen* from)
+{
+ to->min = from->min;
+ to->max = from->max;
+}
+
+static void
+add_mml(MinMaxLen* to, MinMaxLen* from)
+{
+ to->min = distance_add(to->min, from->min);
+ to->max = distance_add(to->max, from->max);
+}
+
+static void
+alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
+{
+ if (to->min > from->min) to->min = from->min;
+ if (to->max < from->max) to->max = from->max;
+}
+
+static void
+copy_opt_env(OptEnv* to, OptEnv* from)
+{
+ *to = *from;
+}
+
+static void
+clear_opt_anc_info(OptAncInfo* anc)
+{
+ anc->left_anchor = 0;
+ anc->right_anchor = 0;
+}
+
+static void
+copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
+{
+ *to = *from;
+}
+
+static void
+concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
+ OnigDistance left_len, OnigDistance right_len)
+{
+ clear_opt_anc_info(to);
+
+ to->left_anchor = left->left_anchor;
+ if (left_len == 0) {
+ to->left_anchor |= right->left_anchor;
+ }
+
+ to->right_anchor = right->right_anchor;
+ if (right_len == 0) {
+ to->right_anchor |= left->right_anchor;
+ }
+}
+
+static int
+is_left_anchor(int anc)
+{
+ if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
+ anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
+ anc == ANCHOR_PREC_READ_NOT)
+ return 0;
+
+ return 1;
+}
+
+static int
+is_set_opt_anc_info(OptAncInfo* to, int anc)
+{
+ if ((to->left_anchor & anc) != 0) return 1;
+
+ return ((to->right_anchor & anc) != 0 ? 1 : 0);
+}
+
+static void
+add_opt_anc_info(OptAncInfo* to, int anc)
+{
+ if (is_left_anchor(anc))
+ to->left_anchor |= anc;
+ else
+ to->right_anchor |= anc;
+}
+
+static void
+remove_opt_anc_info(OptAncInfo* to, int anc)
+{
+ if (is_left_anchor(anc))
+ to->left_anchor &= ~anc;
+ else
+ to->right_anchor &= ~anc;
+}
+
+static void
+alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
+{
+ to->left_anchor &= add->left_anchor;
+ to->right_anchor &= add->right_anchor;
+}
+
+static int
+is_full_opt_exact_info(OptExactInfo* ex)
+{
+ return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
+}
+
+static void
+clear_opt_exact_info(OptExactInfo* ex)
+{
+ clear_mml(&ex->mmd);
+ clear_opt_anc_info(&ex->anc);
+ ex->reach_end = 0;
+ ex->ignore_case = 0;
+ ex->len = 0;
+ ex->s[0] = '\0';
+}
+
+static void
+copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
+{
+ *to = *from;
+}
+
+static void
+concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
+{
+ int i, j, len;
+ UChar *p, *end;
+ OptAncInfo tanc;
+
+ if (! to->ignore_case && add->ignore_case) {
+ if (to->len >= add->len) return ; /* avoid */
+
+ to->ignore_case = 1;
+ }
+
+ p = add->s;
+ end = p + add->len;
+ for (i = to->len; p < end; ) {
+ len = enclen(enc, p, end);
+ if (i + len > OPT_EXACT_MAXLEN) break;
+ for (j = 0; j < len && p < end; j++)
+ to->s[i++] = *p++;
+ }
+
+ to->len = i;
+ to->reach_end = (p == end ? add->reach_end : 0);
+
+ concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
+ if (! to->reach_end) tanc.right_anchor = 0;
+ copy_opt_anc_info(&to->anc, &tanc);
+}
+
+static void
+concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
+ int raw ARG_UNUSED, OnigEncoding enc)
+{
+ int i, j, len;
+ UChar *p;
+
+ for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
+ len = enclen(enc, p, end);
+ if (i + len > OPT_EXACT_MAXLEN) break;
+ for (j = 0; j < len && p < end; j++)
+ to->s[i++] = *p++;
+ }
+
+ to->len = i;
+}
+
+static void
+alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
+{
+ int i, j, len;
+
+ if (add->len == 0 || to->len == 0) {
+ clear_opt_exact_info(to);
+ return ;
+ }
+
+ if (! is_equal_mml(&to->mmd, &add->mmd)) {
+ clear_opt_exact_info(to);
+ return ;
+ }
+
+ for (i = 0; i < to->len && i < add->len; ) {
+ if (to->s[i] != add->s[i]) break;
+ len = enclen(env->enc, to->s + i, to->s + to->len);
+
+ for (j = 1; j < len; j++) {
+ if (to->s[i+j] != add->s[i+j]) break;
+ }
+ if (j < len) break;
+ i += len;
+ }
+
+ if (! add->reach_end || i < add->len || i < to->len) {
+ to->reach_end = 0;
+ }
+ to->len = i;
+ to->ignore_case |= add->ignore_case;
+
+ alt_merge_opt_anc_info(&to->anc, &add->anc);
+ if (! to->reach_end) to->anc.right_anchor = 0;
+}
+
+static void
+select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
+{
+ int v1, v2;
+
+ v1 = now->len;
+ v2 = alt->len;
+
+ if (v2 == 0) {
+ return ;
+ }
+ else if (v1 == 0) {
+ copy_opt_exact_info(now, alt);
+ return ;
+ }
+ else if (v1 <= 2 && v2 <= 2) {
+ /* ByteValTable[x] is big value --> low price */
+ v2 = map_position_value(enc, now->s[0]);
+ v1 = map_position_value(enc, alt->s[0]);
+
+ if (now->len > 1) v1 += 5;
+ if (alt->len > 1) v2 += 5;
+ }
+
+ if (now->ignore_case == 0) v1 *= 2;
+ if (alt->ignore_case == 0) v2 *= 2;
+
+ if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
+ copy_opt_exact_info(now, alt);
+}
+
+static void
+clear_opt_map_info(OptMapInfo* map)
+{
+ static const OptMapInfo clean_info = {
+ {0, 0}, {0, 0}, 0,
+ {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ }
+ };
+
+ xmemcpy(map, &clean_info, sizeof(OptMapInfo));
+}
+
+static void
+copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
+{
+ *to = *from;
+}
+
+static void
+add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
+{
+ if (map->map[c] == 0) {
+ map->map[c] = 1;
+ map->value += map_position_value(enc, c);
+ }
+}
+
+static int
+add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
+ OnigEncoding enc, OnigCaseFoldType case_fold_flag)
+{
+ OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
+ UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
+ int i, n;
+
+ add_char_opt_map_info(map, p[0], enc);
+
+ case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
+ n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
+ if (n < 0) return n;
+
+ for (i = 0; i < n; i++) {
+ ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
+ add_char_opt_map_info(map, buf[0], enc);
+ }
+
+ return 0;
+}
+
+static void
+select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
+{
+ const int z = 1<<15; /* 32768: something big value */
+
+ int v1, v2;
+
+ if (alt->value == 0) return ;
+ if (now->value == 0) {
+ copy_opt_map_info(now, alt);
+ return ;
+ }
+
+ v1 = z / now->value;
+ v2 = z / alt->value;
+ if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
+ copy_opt_map_info(now, alt);
+}
+
+static int
+comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
+{
+#define COMP_EM_BASE 20
+ int ve, vm;
+
+ if (m->value <= 0) return -1;
+
+ ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2);
+ vm = COMP_EM_BASE * 5 * 2 / m->value;
+ return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
+}
+
+static void
+alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
+{
+ int i, val;
+
+ /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
+ if (to->value == 0) return ;
+ if (add->value == 0 || to->mmd.max < add->mmd.min) {
+ clear_opt_map_info(to);
+ return ;
+ }
+
+ alt_merge_mml(&to->mmd, &add->mmd);
+
+ val = 0;
+ for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
+ if (add->map[i])
+ to->map[i] = 1;
+
+ if (to->map[i])
+ val += map_position_value(enc, i);
+ }
+ to->value = val;
+
+ alt_merge_opt_anc_info(&to->anc, &add->anc);
+}
+
+static void
+set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
+{
+ copy_mml(&(opt->exb.mmd), mmd);
+ copy_mml(&(opt->expr.mmd), mmd);
+ copy_mml(&(opt->map.mmd), mmd);
+}
+
+static void
+clear_node_opt_info(NodeOptInfo* opt)
+{
+ clear_mml(&opt->len);
+ clear_opt_anc_info(&opt->anc);
+ clear_opt_exact_info(&opt->exb);
+ clear_opt_exact_info(&opt->exm);
+ clear_opt_exact_info(&opt->expr);
+ clear_opt_map_info(&opt->map);
+}
+
+static void
+copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
+{
+ *to = *from;
+}
+
+static void
+concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
+{
+ int exb_reach, exm_reach;
+ OptAncInfo tanc;
+
+ concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
+ copy_opt_anc_info(&to->anc, &tanc);
+
+ if (add->exb.len > 0 && to->len.max == 0) {
+ concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
+ to->len.max, add->len.max);
+ copy_opt_anc_info(&add->exb.anc, &tanc);
+ }
+
+ if (add->map.value > 0 && to->len.max == 0) {
+ if (add->map.mmd.max == 0)
+ add->map.anc.left_anchor |= to->anc.left_anchor;
+ }
+
+ exb_reach = to->exb.reach_end;
+ exm_reach = to->exm.reach_end;
+
+ if (add->len.max != 0)
+ to->exb.reach_end = to->exm.reach_end = 0;
+
+ if (add->exb.len > 0) {
+ if (exb_reach) {
+ concat_opt_exact_info(&to->exb, &add->exb, enc);
+ clear_opt_exact_info(&add->exb);
+ }
+ else if (exm_reach) {
+ concat_opt_exact_info(&to->exm, &add->exb, enc);
+ clear_opt_exact_info(&add->exb);
+ }
+ }
+ select_opt_exact_info(enc, &to->exm, &add->exb);
+ select_opt_exact_info(enc, &to->exm, &add->exm);
+
+ if (to->expr.len > 0) {
+ if (add->len.max > 0) {
+ if (to->expr.len > (int )add->len.max)
+ to->expr.len = add->len.max;
+
+ if (to->expr.mmd.max == 0)
+ select_opt_exact_info(enc, &to->exb, &to->expr);
+ else
+ select_opt_exact_info(enc, &to->exm, &to->expr);
+ }
+ }
+ else if (add->expr.len > 0) {
+ copy_opt_exact_info(&to->expr, &add->expr);
+ }
+
+ select_opt_map_info(&to->map, &add->map);
+
+ add_mml(&to->len, &add->len);
+}
+
+static void
+alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
+{
+ alt_merge_opt_anc_info (&to->anc, &add->anc);
+ alt_merge_opt_exact_info(&to->exb, &add->exb, env);
+ alt_merge_opt_exact_info(&to->exm, &add->exm, env);
+ alt_merge_opt_exact_info(&to->expr, &add->expr, env);
+ alt_merge_opt_map_info(env->enc, &to->map, &add->map);
+
+ alt_merge_mml(&to->len, &add->len);
+}
+
+
+#define MAX_NODE_OPT_INFO_REF_COUNT 5
+
+static int
+optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
+{
+ int type;
+ int r = 0;
+
+ clear_node_opt_info(opt);
+ set_bound_node_opt_info(opt, &env->mmd);
+
+ type = NTYPE(node);
+ switch (type) {
+ case NT_LIST:
+ {
+ OptEnv nenv;
+ NodeOptInfo nopt;
+ Node* nd = node;
+
+ copy_opt_env(&nenv, env);
+ do {
+ r = optimize_node_left(NCAR(nd), &nopt, &nenv);
+ if (r == 0) {
+ add_mml(&nenv.mmd, &nopt.len);
+ concat_left_node_opt_info(env->enc, opt, &nopt);
+ }
+ } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
+ }
+ break;
+
+ case NT_ALT:
+ {
+ NodeOptInfo nopt;
+ Node* nd = node;
+
+ do {
+ r = optimize_node_left(NCAR(nd), &nopt, env);
+ if (r == 0) {
+ if (nd == node) copy_node_opt_info(opt, &nopt);
+ else alt_merge_node_opt_info(opt, &nopt, env);
+ }
+ } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
+ }
+ break;
+
+ case NT_STR:
+ {
+ StrNode* sn = NSTR(node);
+ int slen = sn->end - sn->s;
+ int is_raw = NSTRING_IS_RAW(node);
+
+ if (! NSTRING_IS_AMBIG(node)) {
+ concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
+ NSTRING_IS_RAW(node), env->enc);
+ if (slen > 0) {
+ add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
+ }
+ set_mml(&opt->len, slen, slen);
+ }
+ else {
+ int max;
+
+ if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
+ int n = onigenc_strlen(env->enc, sn->s, sn->end);
+ max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
+ }
+ else {
+ concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
+ is_raw, env->enc);
+ opt->exb.ignore_case = 1;
+
+ if (slen > 0) {
+ r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
+ env->enc, env->case_fold_flag);
+ if (r != 0) break;
+ }
+
+ max = slen;
+ }
+
+ set_mml(&opt->len, slen, max);
+ }
+
+ if (opt->exb.len == slen)
+ opt->exb.reach_end = 1;
+ }
+ break;
+
+ case NT_CCLASS:
+ {
+ int i, z;
+ CClassNode* cc = NCCLASS(node);
+
+ /* no need to check ignore case. (setted in setup_tree()) */
+
+ if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
+ OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
+ OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
+
+ set_mml(&opt->len, min, max);
+ }
+ else {
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ z = BITSET_AT(cc->bs, i);
+ if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
+ add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
+ }
+ }
+ set_mml(&opt->len, 1, 1);
+ }
+ }
+ break;
+
+ case NT_CTYPE:
+ {
+ int i, min, max;
+
+ max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
+
+ if (max == 1) {
+ min = 1;
+
+ switch (NCTYPE(node)->ctype) {
+ case ONIGENC_CTYPE_WORD:
+ if (NCTYPE(node)->not != 0) {
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ if (! ONIGENC_IS_CODE_WORD(env->enc, i)) {
+ add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
+ }
+ }
+ }
+ else {
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
+ add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
+ }
+ }
+ }
+ break;
+ }
+ }
+ else {
+ min = ONIGENC_MBC_MINLEN(env->enc);
+ }
+ set_mml(&opt->len, min, max);
+ }
+ break;
+
+ case NT_CANY:
+ {
+ OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
+ OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
+ set_mml(&opt->len, min, max);
+ }
+ break;
+
+ case NT_ANCHOR:
+ switch (NANCHOR(node)->type) {
+ case ANCHOR_BEGIN_BUF:
+ case ANCHOR_BEGIN_POSITION:
+ case ANCHOR_BEGIN_LINE:
+ case ANCHOR_END_BUF:
+ case ANCHOR_SEMI_END_BUF:
+ case ANCHOR_END_LINE:
+ add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
+ break;
+
+ case ANCHOR_PREC_READ:
+ {
+ NodeOptInfo nopt;
+
+ r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
+ if (r == 0) {
+ if (nopt.exb.len > 0)
+ copy_opt_exact_info(&opt->expr, &nopt.exb);
+ else if (nopt.exm.len > 0)
+ copy_opt_exact_info(&opt->expr, &nopt.exm);
+
+ opt->expr.reach_end = 0;
+
+ if (nopt.map.value > 0)
+ copy_opt_map_info(&opt->map, &nopt.map);
+ }
+ }
+ break;
+
+ case ANCHOR_PREC_READ_NOT:
+ case ANCHOR_LOOK_BEHIND: /* Sorry, I can't make use of it. */
+ case ANCHOR_LOOK_BEHIND_NOT:
+ break;
+ }
+ break;
+
+ case NT_BREF:
+ {
+ int i;
+ int* backs;
+ OnigDistance min, max, tmin, tmax;
+ Node** nodes = SCANENV_MEM_NODES(env->scan_env);
+ BRefNode* br = NBREF(node);
+
+ if (br->state & NST_RECURSION) {
+ set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
+ break;
+ }
+ backs = BACKREFS_P(br);
+ r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
+ if (r != 0) break;
+ r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
+ if (r != 0) break;
+ for (i = 1; i < br->back_num; i++) {
+ r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
+ if (r != 0) break;
+ r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
+ if (r != 0) break;
+ if (min > tmin) min = tmin;
+ if (max < tmax) max = tmax;
+ }
+ if (r == 0) set_mml(&opt->len, min, max);
+ }
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case NT_CALL:
+ if (IS_CALL_RECURSION(NCALL(node)))
+ set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
+ else {
+ OnigOptionType save = env->options;
+ env->options = NENCLOSE(NCALL(node)->target)->option;
+ r = optimize_node_left(NCALL(node)->target, opt, env);
+ env->options = save;
+ }
+ break;
+#endif
+
+ case NT_QTFR:
+ {
+ int i;
+ OnigDistance min, max;
+ NodeOptInfo nopt;
+ QtfrNode* qn = NQTFR(node);
+
+ r = optimize_node_left(qn->target, &nopt, env);
+ if (r) break;
+
+ if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {
+ if (env->mmd.max == 0 &&
+ NTYPE(qn->target) == NT_CANY && qn->greedy) {
+ if (IS_MULTILINE(env->options))
+ add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
+ else
+ add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
+ }
+ }
+ else {
+ if (qn->lower > 0) {
+ copy_node_opt_info(opt, &nopt);
+ if (nopt.exb.len > 0) {
+ if (nopt.exb.reach_end) {
+ for (i = 2; i <= qn->lower &&
+ ! is_full_opt_exact_info(&opt->exb); i++) {
+ concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
+ }
+ if (i < qn->lower) {
+ opt->exb.reach_end = 0;
+ }
+ }
+ }
+
+ if (qn->lower != qn->upper) {
+ opt->exb.reach_end = 0;
+ opt->exm.reach_end = 0;
+ }
+ if (qn->lower > 1)
+ opt->exm.reach_end = 0;
+ }
+ }
+
+ min = distance_multiply(nopt.len.min, qn->lower);
+ if (IS_REPEAT_INFINITE(qn->upper))
+ max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
+ else
+ max = distance_multiply(nopt.len.max, qn->upper);
+
+ set_mml(&opt->len, min, max);
+ }
+ break;
+
+ case NT_ENCLOSE:
+ {
+ EncloseNode* en = NENCLOSE(node);
+
+ switch (en->type) {
+ case ENCLOSE_OPTION:
+ {
+ OnigOptionType save = env->options;
+
+ env->options = en->option;
+ r = optimize_node_left(en->target, opt, env);
+ env->options = save;
+ }
+ break;
+
+ case ENCLOSE_MEMORY:
+#ifdef USE_SUBEXP_CALL
+ en->opt_count++;
+ if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
+ OnigDistance min, max;
+
+ min = 0;
+ max = ONIG_INFINITE_DISTANCE;
+ if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
+ if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
+ set_mml(&opt->len, min, max);
+ }
+ else
+#endif
+ {
+ r = optimize_node_left(en->target, opt, env);
+
+ if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
+ if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
+ remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
+ }
+ }
+ break;
+
+ case ENCLOSE_STOP_BACKTRACK:
+ r = optimize_node_left(en->target, opt, env);
+ break;
+ }
+ }
+ break;
+
+ default:
+#ifdef ONIG_DEBUG
+ fprintf(stderr, "optimize_node_left: undefined node type %d\n",
+ NTYPE(node));
+#endif
+ r = ONIGERR_TYPE_BUG;
+ break;
+ }
+
+ return r;
+}
+
+static int
+set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
+{
+ int r;
+
+ if (e->len == 0) return 0;
+
+ if (e->ignore_case) {
+ reg->exact = (UChar* )xmalloc(e->len);
+ CHECK_NULL_RETURN_MEMERR(reg->exact);
+ xmemcpy(reg->exact, e->s, e->len);
+ reg->exact_end = reg->exact + e->len;
+ reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
+ }
+ else {
+ int allow_reverse;
+
+ reg->exact = str_dup(e->s, e->s + e->len);
+ CHECK_NULL_RETURN_MEMERR(reg->exact);
+ reg->exact_end = reg->exact + e->len;
+
+ allow_reverse =
+ ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
+
+ if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
+ r = set_bm_skip(reg->exact, reg->exact_end, reg->enc,
+ reg->map, &(reg->int_map));
+ if (r) return r;
+
+ reg->optimize = (allow_reverse != 0
+ ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
+ }
+ else {
+ reg->optimize = ONIG_OPTIMIZE_EXACT;
+ }
+ }
+
+ reg->dmin = e->mmd.min;
+ reg->dmax = e->mmd.max;
+
+ if (reg->dmin != ONIG_INFINITE_DISTANCE) {
+ reg->threshold_len = reg->dmin + (reg->exact_end - reg->exact);
+ }
+
+ return 0;
+}
+
+static void
+set_optimize_map_info(regex_t* reg, OptMapInfo* m)
+{
+ int i;
+
+ for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
+ reg->map[i] = m->map[i];
+
+ reg->optimize = ONIG_OPTIMIZE_MAP;
+ reg->dmin = m->mmd.min;
+ reg->dmax = m->mmd.max;
+
+ if (reg->dmin != ONIG_INFINITE_DISTANCE) {
+ reg->threshold_len = reg->dmin + 1;
+ }
+}
+
+static void
+set_sub_anchor(regex_t* reg, OptAncInfo* anc)
+{
+ reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE;
+ reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
+}
+
+#ifdef ONIG_DEBUG
+static void print_optimize_info(FILE* f, regex_t* reg);
+#endif
+
+static int
+set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
+{
+
+ int r;
+ NodeOptInfo opt;
+ OptEnv env;
+
+ env.enc = reg->enc;
+ env.options = reg->options;
+ env.case_fold_flag = reg->case_fold_flag;
+ env.scan_env = scan_env;
+ clear_mml(&env.mmd);
+
+ r = optimize_node_left(node, &opt, &env);
+ if (r) return r;
+
+ reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
+ ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML);
+
+ reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF);
+
+ if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
+ reg->anchor_dmin = opt.len.min;
+ reg->anchor_dmax = opt.len.max;
+ }
+
+ if (opt.exb.len > 0 || opt.exm.len > 0) {
+ select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
+ if (opt.map.value > 0 &&
+ comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
+ goto set_map;
+ }
+ else {
+ r = set_optimize_exact_info(reg, &opt.exb);
+ set_sub_anchor(reg, &opt.exb.anc);
+ }
+ }
+ else if (opt.map.value > 0) {
+ set_map:
+ set_optimize_map_info(reg, &opt.map);
+ set_sub_anchor(reg, &opt.map.anc);
+ }
+ else {
+ reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
+ if (opt.len.max == 0)
+ reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
+ }
+
+#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
+ print_optimize_info(stderr, reg);
+#endif
+ return r;
+}
+
+static void
+clear_optimize_info(regex_t* reg)
+{
+ reg->optimize = ONIG_OPTIMIZE_NONE;
+ reg->anchor = 0;
+ reg->anchor_dmin = 0;
+ reg->anchor_dmax = 0;
+ reg->sub_anchor = 0;
+ reg->exact_end = (UChar* )NULL;
+ reg->threshold_len = 0;
+ if (IS_NOT_NULL(reg->exact)) {
+ xfree(reg->exact);
+ reg->exact = (UChar* )NULL;
+ }
+}
+
+#ifdef ONIG_DEBUG
+
+static void print_enc_string(FILE* fp, OnigEncoding enc,
+ const UChar *s, const UChar *end)
+{
+ fprintf(fp, "\nPATTERN: /");
+
+ if (ONIGENC_MBC_MINLEN(enc) > 1) {
+ const UChar *p;
+ OnigCodePoint code;
+
+ p = s;
+ while (p < end) {
+ code = ONIGENC_MBC_TO_CODE(enc, p, end);
+ if (code >= 0x80) {
+ fprintf(fp, " 0x%04x ", (int )code);
+ }
+ else {
+ fputc((int )code, fp);
+ }
+
+ p += enclen(enc, p, end);
+ }
+ }
+ else {
+ while (s < end) {
+ fputc((int )*s, fp);
+ s++;
+ }
+ }
+
+ fprintf(fp, "/\n");
+}
+
+static void
+print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
+{
+ if (a == ONIG_INFINITE_DISTANCE)
+ fputs("inf", f);
+ else
+ fprintf(f, "(%u)", a);
+
+ fputs("-", f);
+
+ if (b == ONIG_INFINITE_DISTANCE)
+ fputs("inf", f);
+ else
+ fprintf(f, "(%u)", b);
+}
+
+static void
+print_anchor(FILE* f, int anchor)
+{
+ int q = 0;
+
+ fprintf(f, "[");
+
+ if (anchor & ANCHOR_BEGIN_BUF) {
+ fprintf(f, "begin-buf");
+ q = 1;
+ }
+ if (anchor & ANCHOR_BEGIN_LINE) {
+ if (q) fprintf(f, ", ");
+ q = 1;
+ fprintf(f, "begin-line");
+ }
+ if (anchor & ANCHOR_BEGIN_POSITION) {
+ if (q) fprintf(f, ", ");
+ q = 1;
+ fprintf(f, "begin-pos");
+ }
+ if (anchor & ANCHOR_END_BUF) {
+ if (q) fprintf(f, ", ");
+ q = 1;
+ fprintf(f, "end-buf");
+ }
+ if (anchor & ANCHOR_SEMI_END_BUF) {
+ if (q) fprintf(f, ", ");
+ q = 1;
+ fprintf(f, "semi-end-buf");
+ }
+ if (anchor & ANCHOR_END_LINE) {
+ if (q) fprintf(f, ", ");
+ q = 1;
+ fprintf(f, "end-line");
+ }
+ if (anchor & ANCHOR_ANYCHAR_STAR) {
+ if (q) fprintf(f, ", ");
+ q = 1;
+ fprintf(f, "anychar-star");
+ }
+ if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
+ if (q) fprintf(f, ", ");
+ fprintf(f, "anychar-star-pl");
+ }
+
+ fprintf(f, "]");
+}
+
+static void
+print_optimize_info(FILE* f, regex_t* reg)
+{
+ static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
+ "EXACT_IC", "MAP" };
+
+ fprintf(f, "optimize: %s\n", on[reg->optimize]);
+ fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
+ if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
+ print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
+ fprintf(f, "\n");
+
+ if (reg->optimize) {
+ fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);
+ fprintf(f, "\n");
+ }
+ fprintf(f, "\n");
+
+ if (reg->exact) {
+ UChar *p;
+ fprintf(f, "exact: [");
+ for (p = reg->exact; p < reg->exact_end; p++) {
+ fputc(*p, f);
+ }
+ fprintf(f, "]: length: %d\n", (reg->exact_end - reg->exact));
+ }
+ else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
+ int c, i, n = 0;
+
+ for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
+ if (reg->map[i]) n++;
+
+ fprintf(f, "map: n=%d\n", n);
+ if (n > 0) {
+ c = 0;
+ fputc('[', f);
+ for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
+ if (reg->map[i] != 0) {
+ if (c > 0) fputs(", ", f);
+ c++;
+ if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
+ ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
+ fputc(i, f);
+ else
+ fprintf(f, "%d", i);
+ }
+ }
+ fprintf(f, "]\n");
+ }
+ }
+}
+#endif /* ONIG_DEBUG */
+
+
+extern void
+onig_free_body(regex_t* reg)
+{
+ if (IS_NOT_NULL(reg)) {
+ if (IS_NOT_NULL(reg->p)) xfree(reg->p);
+ if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);
+ if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
+ if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
+ if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
+ if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
+
+#ifdef USE_NAMED_GROUP
+ onig_names_free(reg);
+#endif
+ }
+}
+
+extern void
+onig_free(regex_t* reg)
+{
+ if (IS_NOT_NULL(reg)) {
+ onig_free_body(reg);
+ xfree(reg);
+ }
+}
+
+size_t
+onig_memsize(regex_t *reg)
+{
+ size_t size = sizeof(regex_t);
+ if (IS_NOT_NULL(reg->p)) size += reg->alloc;
+ if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact;
+ if (IS_NOT_NULL(reg->int_map)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
+ if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
+ if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc * sizeof(OnigRepeatRange);
+ if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain);
+
+ return size;
+}
+
+#define REGEX_TRANSFER(to,from) do {\
+ (to)->state = ONIG_STATE_MODIFY;\
+ onig_free_body(to);\
+ xmemcpy(to, from, sizeof(regex_t));\
+ xfree(from);\
+} while (0)
+
+extern void
+onig_transfer(regex_t* to, regex_t* from)
+{
+ THREAD_ATOMIC_START;
+ REGEX_TRANSFER(to, from);
+ THREAD_ATOMIC_END;
+}
+
+#define REGEX_CHAIN_HEAD(reg) do {\
+ while (IS_NOT_NULL((reg)->chain)) {\
+ (reg) = (reg)->chain;\
+ }\
+} while (0)
+
+extern void
+onig_chain_link_add(regex_t* to, regex_t* add)
+{
+ THREAD_ATOMIC_START;
+ REGEX_CHAIN_HEAD(to);
+ to->chain = add;
+ THREAD_ATOMIC_END;
+}
+
+extern void
+onig_chain_reduce(regex_t* reg)
+{
+ regex_t *head, *prev;
+
+ prev = reg;
+ head = prev->chain;
+ if (IS_NOT_NULL(head)) {
+ reg->state = ONIG_STATE_MODIFY;
+ while (IS_NOT_NULL(head->chain)) {
+ prev = head;
+ head = head->chain;
+ }
+ prev->chain = (regex_t* )NULL;
+ REGEX_TRANSFER(reg, head);
+ }
+}
+
+#ifdef ONIG_DEBUG
+static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg));
+#endif
+#ifdef ONIG_DEBUG_PARSE_TREE
+static void print_tree P_((FILE* f, Node* node));
+#endif
+
+extern int
+onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
+ OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
+{
+#define COMPILE_INIT_SIZE 20
+
+ int r, init_size;
+ Node* root;
+ ScanEnv scan_env = {0};
+#ifdef USE_SUBEXP_CALL
+ UnsetAddrList uslist;
+#endif
+
+ if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
+
+ scan_env.sourcefile = sourcefile;
+ scan_env.sourceline = sourceline;
+ reg->state = ONIG_STATE_COMPILING;
+
+#ifdef ONIG_DEBUG
+ print_enc_string(stderr, reg->enc, pattern, pattern_end);
+#endif
+
+ if (reg->alloc == 0) {
+ init_size = (pattern_end - pattern) * 2;
+ if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
+ r = BBUF_INIT(reg, init_size);
+ if (r != 0) goto end;
+ }
+ else
+ reg->used = 0;
+
+ reg->num_mem = 0;
+ reg->num_repeat = 0;
+ reg->num_null_check = 0;
+ reg->repeat_range_alloc = 0;
+ reg->repeat_range = (OnigRepeatRange* )NULL;
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ reg->num_comb_exp_check = 0;
+#endif
+
+ r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
+ if (r != 0) goto err;
+
+#ifdef USE_NAMED_GROUP
+ /* mixed use named group and no-named group */
+ if (scan_env.num_named > 0 &&
+ IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
+ !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
+ if (scan_env.num_named != scan_env.num_mem)
+ r = disable_noname_group_capture(&root, reg, &scan_env);
+ else
+ r = numbered_ref_check(root);
+
+ if (r != 0) goto err;
+ }
+#endif
+
+#ifdef USE_SUBEXP_CALL
+ if (scan_env.num_call > 0) {
+ r = unset_addr_list_init(&uslist, scan_env.num_call);
+ if (r != 0) goto err;
+ scan_env.unset_addr_list = &uslist;
+ r = setup_subexp_call(root, &scan_env);
+ if (r != 0) goto err_unset;
+ r = subexp_recursive_check_trav(root, &scan_env);
+ if (r < 0) goto err_unset;
+ r = subexp_inf_recursive_check_trav(root, &scan_env);
+ if (r != 0) goto err_unset;
+
+ reg->num_call = scan_env.num_call;
+ }
+ else
+ reg->num_call = 0;
+#endif
+
+ r = setup_tree(root, reg, 0, &scan_env);
+ if (r != 0) goto err_unset;
+
+#ifdef ONIG_DEBUG_PARSE_TREE
+ print_tree(stderr, root);
+#endif
+
+ reg->capture_history = scan_env.capture_history;
+ reg->bt_mem_start = scan_env.bt_mem_start;
+ reg->bt_mem_start |= reg->capture_history;
+ if (IS_FIND_CONDITION(reg->options))
+ BIT_STATUS_ON_ALL(reg->bt_mem_end);
+ else {
+ reg->bt_mem_end = scan_env.bt_mem_end;
+ reg->bt_mem_end |= reg->capture_history;
+ }
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ if (scan_env.backrefed_mem == 0
+#ifdef USE_SUBEXP_CALL
+ || scan_env.num_call == 0
+#endif
+ ) {
+ setup_comb_exp_check(root, 0, &scan_env);
+#ifdef USE_SUBEXP_CALL
+ if (scan_env.has_recursion != 0) {
+ scan_env.num_comb_exp_check = 0;
+ }
+ else
+#endif
+ if (scan_env.comb_exp_max_regnum > 0) {
+ int i;
+ for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
+ if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
+ scan_env.num_comb_exp_check = 0;
+ break;
+ }
+ }
+ }
+ }
+
+ reg->num_comb_exp_check = scan_env.num_comb_exp_check;
+#endif
+
+ clear_optimize_info(reg);
+#ifndef ONIG_DONT_OPTIMIZE
+ r = set_optimize_info_from_tree(root, reg, &scan_env);
+ if (r != 0) goto err_unset;
+#endif
+
+ if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
+ xfree(scan_env.mem_nodes_dynamic);
+ scan_env.mem_nodes_dynamic = (Node** )NULL;
+ }
+
+ r = compile_tree(root, reg);
+ if (r == 0) {
+ r = add_opcode(reg, OP_END);
+#ifdef USE_SUBEXP_CALL
+ if (scan_env.num_call > 0) {
+ r = unset_addr_list_fix(&uslist, reg);
+ unset_addr_list_end(&uslist);
+ if (r) goto err;
+ }
+#endif
+
+ if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
+ reg->stack_pop_level = STACK_POP_LEVEL_ALL;
+ else {
+ if (reg->bt_mem_start != 0)
+ reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
+ else
+ reg->stack_pop_level = STACK_POP_LEVEL_FREE;
+ }
+ }
+#ifdef USE_SUBEXP_CALL
+ else if (scan_env.num_call > 0) {
+ unset_addr_list_end(&uslist);
+ }
+#endif
+ onig_node_free(root);
+
+#ifdef ONIG_DEBUG_COMPILE
+#ifdef USE_NAMED_GROUP
+ onig_print_names(stderr, reg);
+#endif
+ print_compiled_byte_code_list(stderr, reg);
+#endif
+
+ end:
+ reg->state = ONIG_STATE_NORMAL;
+ return r;
+
+ err_unset:
+#ifdef USE_SUBEXP_CALL
+ if (scan_env.num_call > 0) {
+ unset_addr_list_end(&uslist);
+ }
+#endif
+ err:
+ if (IS_NOT_NULL(scan_env.error)) {
+ if (IS_NOT_NULL(einfo)) {
+ einfo->enc = scan_env.enc;
+ einfo->par = scan_env.error;
+ einfo->par_end = scan_env.error_end;
+ }
+ }
+
+ onig_node_free(root);
+ if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
+ xfree(scan_env.mem_nodes_dynamic);
+ return r;
+}
+
+#ifdef USE_RECOMPILE_API
+extern int
+onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
+ OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
+ OnigErrorInfo* einfo)
+{
+ int r;
+ regex_t *new_reg;
+
+ r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo);
+ if (r) return r;
+ if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
+ onig_transfer(reg, new_reg);
+ }
+ else {
+ onig_chain_link_add(reg, new_reg);
+ }
+ return 0;
+}
+#endif
+
+static int onig_inited = 0;
+
+extern int
+onig_reg_init(regex_t* reg, OnigOptionType option,
+ OnigCaseFoldType case_fold_flag,
+ OnigEncoding enc, const OnigSyntaxType* syntax)
+{
+ if (! onig_inited)
+ onig_init();
+
+ if (IS_NULL(reg))
+ return ONIGERR_INVALID_ARGUMENT;
+
+ if (ONIGENC_IS_UNDEF(enc))
+ return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED;
+
+ if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
+ == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
+ return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
+ }
+
+ (reg)->state = ONIG_STATE_MODIFY;
+
+ if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
+ option |= syntax->options;
+ option &= ~ONIG_OPTION_SINGLELINE;
+ }
+ else
+ option |= syntax->options;
+
+ (reg)->enc = enc;
+ (reg)->options = option;
+ (reg)->syntax = syntax;
+ (reg)->optimize = 0;
+ (reg)->exact = (UChar* )NULL;
+ (reg)->int_map = (int* )NULL;
+ (reg)->int_map_backward = (int* )NULL;
+ (reg)->chain = (regex_t* )NULL;
+
+ (reg)->p = (UChar* )NULL;
+ (reg)->alloc = 0;
+ (reg)->used = 0;
+ (reg)->name_table = (void* )NULL;
+
+ (reg)->case_fold_flag = case_fold_flag;
+ return 0;
+}
+
+extern int
+onig_new_without_alloc(regex_t* reg, const UChar* pattern,
+ const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
+ OnigSyntaxType* syntax, OnigErrorInfo* einfo)
+{
+ int r;
+
+ r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
+ if (r) return r;
+
+ r = onig_compile(reg, pattern, pattern_end, einfo, NULL, 0);
+ return r;
+}
+
+extern int
+onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
+ OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
+ OnigErrorInfo* einfo)
+{
+ int r;
+
+ *reg = (regex_t* )xmalloc(sizeof(regex_t));
+ if (IS_NULL(*reg)) return ONIGERR_MEMORY;
+
+ r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
+ if (r) goto err;
+
+ r = onig_compile(*reg, pattern, pattern_end, einfo, NULL, 0);
+ if (r) {
+ err:
+ onig_free(*reg);
+ *reg = NULL;
+ }
+ return r;
+}
+
+
+extern int
+onig_init(void)
+{
+ if (onig_inited != 0)
+ return 0;
+
+ THREAD_SYSTEM_INIT;
+ THREAD_ATOMIC_START;
+
+ onig_inited = 1;
+
+ onigenc_init();
+ /* onigenc_set_default_caseconv_table((UChar* )0); */
+
+#ifdef ONIG_DEBUG_STATISTICS
+ onig_statistics_init();
+#endif
+
+ THREAD_ATOMIC_END;
+ return 0;
+}
+
+
+extern int
+onig_end(void)
+{
+ THREAD_ATOMIC_START;
+
+#ifdef ONIG_DEBUG_STATISTICS
+ onig_print_statistics(stderr);
+#endif
+
+#ifdef USE_SHARED_CCLASS_TABLE
+ onig_free_shared_cclass_table();
+#endif
+
+#ifdef USE_PARSE_TREE_NODE_RECYCLE
+ onig_free_node_list();
+#endif
+
+ onig_inited = 0;
+
+ THREAD_ATOMIC_END;
+ THREAD_SYSTEM_END;
+ return 0;
+}
+#endif //INCLUDE_REGEXP
+
+#ifdef INCLUDE_ENCODING
+extern int
+onig_is_in_code_range(const UChar* p, OnigCodePoint code)
+{
+ OnigCodePoint n, *data;
+ OnigCodePoint low, high, x;
+
+ GET_CODE_POINT(n, p);
+ data = (OnigCodePoint* )p;
+ data++;
+
+ for (low = 0, high = n; low < high; ) {
+ x = (low + high) >> 1;
+ if (code > data[x * 2 + 1])
+ low = x + 1;
+ else
+ high = x;
+ }
+
+ return ((low < n && code >= data[low * 2]) ? 1 : 0);
+}
+#endif //INCLUDE_ENCODING
+
+#ifdef INCLUDE_REGEXP
+extern int
+onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc)
+{
+ int found;
+
+ if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
+ if (IS_NULL(cc->mbuf)) {
+ found = 0;
+ }
+ else {
+ found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
+ }
+ }
+ else {
+ found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
+ }
+
+ if (IS_NCCLASS_NOT(cc))
+ return !found;
+ else
+ return found;
+}
+
+extern int
+onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
+{
+ int len;
+
+ if (ONIGENC_MBC_MINLEN(enc) > 1) {
+ len = 2;
+ }
+ else {
+ len = ONIGENC_CODE_TO_MBCLEN(enc, code);
+ }
+ return onig_is_code_in_cc_len(len, code, cc);
+}
+
+
+#ifdef ONIG_DEBUG
+
+/* arguments type */
+#define ARG_SPECIAL -1
+#define ARG_NON 0
+#define ARG_RELADDR 1
+#define ARG_ABSADDR 2
+#define ARG_LENGTH 3
+#define ARG_MEMNUM 4
+#define ARG_OPTION 5
+#define ARG_STATE_CHECK 6
+
+OnigOpInfoType OnigOpInfo[] = {
+ { OP_FINISH, "finish", ARG_NON },
+ { OP_END, "end", ARG_NON },
+ { OP_EXACT1, "exact1", ARG_SPECIAL },
+ { OP_EXACT2, "exact2", ARG_SPECIAL },
+ { OP_EXACT3, "exact3", ARG_SPECIAL },
+ { OP_EXACT4, "exact4", ARG_SPECIAL },
+ { OP_EXACT5, "exact5", ARG_SPECIAL },
+ { OP_EXACTN, "exactn", ARG_SPECIAL },
+ { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL },
+ { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL },
+ { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL },
+ { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL },
+ { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL },
+ { OP_EXACTMBN, "exactmbn", ARG_SPECIAL },
+ { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL },
+ { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL },
+ { OP_CCLASS, "cclass", ARG_SPECIAL },
+ { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL },
+ { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL },
+ { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
+ { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
+ { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
+ { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL },
+ { OP_ANYCHAR, "anychar", ARG_NON },
+ { OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
+ { OP_ANYCHAR_STAR, "anychar*", ARG_NON },
+ { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
+ { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
+ { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
+ { OP_WORD, "word", ARG_NON },
+ { OP_NOT_WORD, "not-word", ARG_NON },
+ { OP_WORD_BOUND, "word-bound", ARG_NON },
+ { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
+ { OP_WORD_BEGIN, "word-begin", ARG_NON },
+ { OP_WORD_END, "word-end", ARG_NON },
+ { OP_BEGIN_BUF, "begin-buf", ARG_NON },
+ { OP_END_BUF, "end-buf", ARG_NON },
+ { OP_BEGIN_LINE, "begin-line", ARG_NON },
+ { OP_END_LINE, "end-line", ARG_NON },
+ { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
+ { OP_BEGIN_POSITION, "begin-position", ARG_NON },
+ { OP_BACKREF1, "backref1", ARG_NON },
+ { OP_BACKREF2, "backref2", ARG_NON },
+ { OP_BACKREFN, "backrefn", ARG_MEMNUM },
+ { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
+ { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
+ { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
+ { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL },
+ { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
+ { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
+ { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
+ { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
+ { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
+ { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
+ { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
+ { OP_SET_OPTION, "set-option", ARG_OPTION },
+ { OP_FAIL, "fail", ARG_NON },
+ { OP_JUMP, "jump", ARG_RELADDR },
+ { OP_PUSH, "push", ARG_RELADDR },
+ { OP_POP, "pop", ARG_NON },
+ { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
+ { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
+ { OP_REPEAT, "repeat", ARG_SPECIAL },
+ { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
+ { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
+ { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
+ { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
+ { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
+ { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },
+ { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
+ { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
+ { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
+ { OP_PUSH_POS, "push-pos", ARG_NON },
+ { OP_POP_POS, "pop-pos", ARG_NON },
+ { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
+ { OP_FAIL_POS, "fail-pos", ARG_NON },
+ { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
+ { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
+ { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
+ { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
+ { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
+ { OP_CALL, "call", ARG_ABSADDR },
+ { OP_RETURN, "return", ARG_NON },
+ { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },
+ { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
+ { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },
+ { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },
+ { OP_STATE_CHECK_ANYCHAR_ML_STAR,
+ "state-check-anychar-ml*", ARG_STATE_CHECK },
+ { -1, "", ARG_NON }
+};
+
+static char*
+op2name(int opcode)
+{
+ int i;
+
+ for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
+ if (opcode == OnigOpInfo[i].opcode)
+ return OnigOpInfo[i].name;
+ }
+ return "";
+}
+
+static int
+op2arg_type(int opcode)
+{
+ int i;
+
+ for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
+ if (opcode == OnigOpInfo[i].opcode)
+ return OnigOpInfo[i].arg_type;
+ }
+ return ARG_SPECIAL;
+}
+
+static void
+Indent(FILE* f, int indent)
+{
+ int i;
+ for (i = 0; i < indent; i++) putc(' ', f);
+}
+
+static void
+p_string(FILE* f, int len, UChar* s)
+{
+ fputs(":", f);
+ while (len-- > 0) { fputc(*s++, f); }
+}
+
+static void
+p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
+{
+ int x = len * mb_len;
+
+ fprintf(f, ":%d:", len);
+ while (x-- > 0) { fputc(*s++, f); }
+}
+
+extern void
+onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
+ OnigEncoding enc)
+{
+ int i, n, arg_type;
+ RelAddrType addr;
+ LengthType len;
+ MemNumType mem;
+ StateCheckNumType scn;
+ OnigCodePoint code;
+ UChar *q;
+
+ fprintf(f, "[%s", op2name(*bp));
+ arg_type = op2arg_type(*bp);
+ if (arg_type != ARG_SPECIAL) {
+ bp++;
+ switch (arg_type) {
+ case ARG_NON:
+ break;
+ case ARG_RELADDR:
+ GET_RELADDR_INC(addr, bp);
+ fprintf(f, ":(%d)", addr);
+ break;
+ case ARG_ABSADDR:
+ GET_ABSADDR_INC(addr, bp);
+ fprintf(f, ":(%d)", addr);
+ break;
+ case ARG_LENGTH:
+ GET_LENGTH_INC(len, bp);
+ fprintf(f, ":%d", len);
+ break;
+ case ARG_MEMNUM:
+ mem = *((MemNumType* )bp);
+ bp += SIZE_MEMNUM;
+ fprintf(f, ":%d", mem);
+ break;
+ case ARG_OPTION:
+ {
+ OnigOptionType option = *((OnigOptionType* )bp);
+ bp += SIZE_OPTION;
+ fprintf(f, ":%d", option);
+ }
+ break;
+
+ case ARG_STATE_CHECK:
+ scn = *((StateCheckNumType* )bp);
+ bp += SIZE_STATE_CHECK_NUM;
+ fprintf(f, ":%d", scn);
+ break;
+ }
+ }
+ else {
+ switch (*bp++) {
+ case OP_EXACT1:
+ case OP_ANYCHAR_STAR_PEEK_NEXT:
+ case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
+ p_string(f, 1, bp++); break;
+ case OP_EXACT2:
+ p_string(f, 2, bp); bp += 2; break;
+ case OP_EXACT3:
+ p_string(f, 3, bp); bp += 3; break;
+ case OP_EXACT4:
+ p_string(f, 4, bp); bp += 4; break;
+ case OP_EXACT5:
+ p_string(f, 5, bp); bp += 5; break;
+ case OP_EXACTN:
+ GET_LENGTH_INC(len, bp);
+ p_len_string(f, len, 1, bp);
+ bp += len;
+ break;
+
+ case OP_EXACTMB2N1:
+ p_string(f, 2, bp); bp += 2; break;
+ case OP_EXACTMB2N2:
+ p_string(f, 4, bp); bp += 4; break;
+ case OP_EXACTMB2N3:
+ p_string(f, 6, bp); bp += 6; break;
+ case OP_EXACTMB2N:
+ GET_LENGTH_INC(len, bp);
+ p_len_string(f, len, 2, bp);
+ bp += len * 2;
+ break;
+ case OP_EXACTMB3N:
+ GET_LENGTH_INC(len, bp);
+ p_len_string(f, len, 3, bp);
+ bp += len * 3;
+ break;
+ case OP_EXACTMBN:
+ {
+ int mb_len;
+
+ GET_LENGTH_INC(mb_len, bp);
+ GET_LENGTH_INC(len, bp);
+ fprintf(f, ":%d:%d:", mb_len, len);
+ n = len * mb_len;
+ while (n-- > 0) { fputc(*bp++, f); }
+ }
+ break;
+
+ case OP_EXACT1_IC:
+ len = enclen(enc, bp, bpend);
+ p_string(f, len, bp);
+ bp += len;
+ break;
+ case OP_EXACTN_IC:
+ GET_LENGTH_INC(len, bp);
+ p_len_string(f, len, 1, bp);
+ bp += len;
+ break;
+
+ case OP_CCLASS:
+ n = bitset_on_num((BitSetRef )bp);
+ bp += SIZE_BITSET;
+ fprintf(f, ":%d", n);
+ break;
+
+ case OP_CCLASS_NOT:
+ n = bitset_on_num((BitSetRef )bp);
+ bp += SIZE_BITSET;
+ fprintf(f, ":%d", n);
+ break;
+
+ case OP_CCLASS_MB:
+ case OP_CCLASS_MB_NOT:
+ GET_LENGTH_INC(len, bp);
+ q = bp;
+#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
+ ALIGNMENT_RIGHT(q);
+#endif
+ GET_CODE_POINT(code, q);
+ bp += len;
+ fprintf(f, ":%d:%d", (int )code, len);
+ break;
+
+ case OP_CCLASS_MIX:
+ case OP_CCLASS_MIX_NOT:
+ n = bitset_on_num((BitSetRef )bp);
+ bp += SIZE_BITSET;
+ GET_LENGTH_INC(len, bp);
+ q = bp;
+#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
+ ALIGNMENT_RIGHT(q);
+#endif
+ GET_CODE_POINT(code, q);
+ bp += len;
+ fprintf(f, ":%d:%d:%d", n, (int )code, len);
+ break;
+
+ case OP_CCLASS_NODE:
+ {
+ CClassNode *cc;
+
+ GET_POINTER_INC(cc, bp);
+ n = bitset_on_num(cc->bs);
+ fprintf(f, ":%u:%d", (unsigned int )cc, n);
+ }
+ break;
+
+ case OP_BACKREFN_IC:
+ mem = *((MemNumType* )bp);
+ bp += SIZE_MEMNUM;
+ fprintf(f, ":%d", mem);
+ break;
+
+ case OP_BACKREF_MULTI_IC:
+ case OP_BACKREF_MULTI:
+ fputs(" ", f);
+ GET_LENGTH_INC(len, bp);
+ for (i = 0; i < len; i++) {
+ GET_MEMNUM_INC(mem, bp);
+ if (i > 0) fputs(", ", f);
+ fprintf(f, "%d", mem);
+ }
+ break;
+
+ case OP_BACKREF_WITH_LEVEL:
+ {
+ OnigOptionType option;
+ LengthType level;
+
+ GET_OPTION_INC(option, bp);
+ fprintf(f, ":%d", option);
+ GET_LENGTH_INC(level, bp);
+ fprintf(f, ":%d", level);
+
+ fputs(" ", f);
+ GET_LENGTH_INC(len, bp);
+ for (i = 0; i < len; i++) {
+ GET_MEMNUM_INC(mem, bp);
+ if (i > 0) fputs(", ", f);
+ fprintf(f, "%d", mem);
+ }
+ }
+ break;
+
+ case OP_REPEAT:
+ case OP_REPEAT_NG:
+ {
+ mem = *((MemNumType* )bp);
+ bp += SIZE_MEMNUM;
+ addr = *((RelAddrType* )bp);
+ bp += SIZE_RELADDR;
+ fprintf(f, ":%d:%d", mem, addr);
+ }
+ break;
+
+ case OP_PUSH_OR_JUMP_EXACT1:
+ case OP_PUSH_IF_PEEK_NEXT:
+ addr = *((RelAddrType* )bp);
+ bp += SIZE_RELADDR;
+ fprintf(f, ":(%d)", addr);
+ p_string(f, 1, bp);
+ bp += 1;
+ break;
+
+ case OP_LOOK_BEHIND:
+ GET_LENGTH_INC(len, bp);
+ fprintf(f, ":%d", len);
+ break;
+
+ case OP_PUSH_LOOK_BEHIND_NOT:
+ GET_RELADDR_INC(addr, bp);
+ GET_LENGTH_INC(len, bp);
+ fprintf(f, ":%d:(%d)", len, addr);
+ break;
+
+ case OP_STATE_CHECK_PUSH:
+ case OP_STATE_CHECK_PUSH_OR_JUMP:
+ scn = *((StateCheckNumType* )bp);
+ bp += SIZE_STATE_CHECK_NUM;
+ addr = *((RelAddrType* )bp);
+ bp += SIZE_RELADDR;
+ fprintf(f, ":%d:(%d)", scn, addr);
+ break;
+
+ default:
+ fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
+ *--bp);
+ }
+ }
+ fputs("]", f);
+ if (nextp) *nextp = bp;
+}
+
+static void
+print_compiled_byte_code_list(FILE* f, regex_t* reg)
+{
+ int ncode;
+ UChar* bp = reg->p;
+ UChar* end = reg->p + reg->used;
+
+ fprintf(f, "code length: %d\n", reg->used);
+
+ ncode = 0;
+ while (bp < end) {
+ ncode++;
+ if (bp > reg->p) {
+ if (ncode % 5 == 0)
+ fprintf(f, "\n");
+ else
+ fputs(" ", f);
+ }
+ onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
+ }
+
+ fprintf(f, "\n");
+}
+
+static void
+print_indent_tree(FILE* f, Node* node, int indent)
+{
+ int i, type;
+ int add = 3;
+ UChar* p;
+
+ Indent(f, indent);
+ if (IS_NULL(node)) {
+ fprintf(f, "ERROR: null node!!!\n");
+ exit (0);
+ }
+
+ type = NTYPE(node);
+ switch (type) {
+ case NT_LIST:
+ case NT_ALT:
+ if (NTYPE(node) == NT_LIST)
+ fprintf(f, "<list:%x>\n", (int )node);
+ else
+ fprintf(f, "<alt:%x>\n", (int )node);
+
+ print_indent_tree(f, NCAR(node), indent + add);
+ while (IS_NOT_NULL(node = NCDR(node))) {
+ if (NTYPE(node) != type) {
+ fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
+ exit(0);
+ }
+ print_indent_tree(f, NCAR(node), indent + add);
+ }
+ break;
+
+ case NT_STR:
+ fprintf(f, "<string%s:%x>",
+ (NSTRING_IS_RAW(node) ? "-raw" : ""), (int )node);
+ for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
+ if (*p >= 0x20 && *p < 0x7f)
+ fputc(*p, f);
+ else {
+ fprintf(f, " 0x%02x", *p);
+ }
+ }
+ break;
+
+ case NT_CCLASS:
+ fprintf(f, "<cclass:%x>", (int )node);
+ if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f);
+ if (NCCLASS(node)->mbuf) {
+ BBuf* bbuf = NCCLASS(node)->mbuf;
+ for (i = 0; i < bbuf->used; i++) {
+ if (i > 0) fprintf(f, ",");
+ fprintf(f, "%0x", bbuf->p[i]);
+ }
+ }
+ break;
+
+ case NT_CTYPE:
+ fprintf(f, "<ctype:%x> ", (int )node);
+ switch (NCTYPE(node)->ctype) {
+ case ONIGENC_CTYPE_WORD:
+ if (NCTYPE(node)->not != 0)
+ fputs("not word", f);
+ else
+ fputs("word", f);
+ break;
+
+ default:
+ fprintf(f, "ERROR: undefined ctype.\n");
+ exit(0);
+ }
+ break;
+
+ case NT_CANY:
+ fprintf(f, "<anychar:%x>", (int )node);
+ break;
+
+ case NT_ANCHOR:
+ fprintf(f, "<anchor:%x> ", (int )node);
+ switch (NANCHOR(node)->type) {
+ case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
+ case ANCHOR_END_BUF: fputs("end buf", f); break;
+ case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;
+ case ANCHOR_END_LINE: fputs("end line", f); break;
+ case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
+ case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
+
+ case ANCHOR_WORD_BOUND: fputs("word bound", f); break;
+ case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break;
+#ifdef USE_WORD_BEGIN_END
+ case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
+ case ANCHOR_WORD_END: fputs("word end", f); break;
+#endif
+ case ANCHOR_PREC_READ: fputs("prec read", f); break;
+ case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); break;
+ case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); break;
+ case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); break;
+
+ default:
+ fprintf(f, "ERROR: undefined anchor type.\n");
+ break;
+ }
+ break;
+
+ case NT_BREF:
+ {
+ int* p;
+ BRefNode* br = NBREF(node);
+ p = BACKREFS_P(br);
+ fprintf(f, "<backref:%x>", (int )node);
+ for (i = 0; i < br->back_num; i++) {
+ if (i > 0) fputs(", ", f);
+ fprintf(f, "%d", p[i]);
+ }
+ }
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case NT_CALL:
+ {
+ CallNode* cn = NCALL(node);
+ fprintf(f, "<call:%x>", (int )node);
+ p_string(f, cn->name_end - cn->name, cn->name);
+ }
+ break;
+#endif
+
+ case NT_QTFR:
+ fprintf(f, "<quantifier:%x>{%d,%d}%s\n", (int )node,
+ NQTFR(node)->lower, NQTFR(node)->upper,
+ (NQTFR(node)->greedy ? "" : "?"));
+ print_indent_tree(f, NQTFR(node)->target, indent + add);
+ break;
+
+ case NT_ENCLOSE:
+ fprintf(f, "<enclose:%x> ", (int )node);
+ switch (NENCLOSE(node)->type) {
+ case ENCLOSE_OPTION:
+ fprintf(f, "option:%d\n", NENCLOSE(node)->option);
+ print_indent_tree(f, NENCLOSE(node)->target, indent + add);
+ break;
+ case ENCLOSE_MEMORY:
+ fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
+ break;
+ case ENCLOSE_STOP_BACKTRACK:
+ fprintf(f, "stop-bt");
+ break;
+
+ default:
+ break;
+ }
+ fprintf(f, "\n");
+ print_indent_tree(f, NENCLOSE(node)->target, indent + add);
+ break;
+
+ default:
+ fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
+ break;
+ }
+
+ if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
+ type != NT_ENCLOSE)
+ fprintf(f, "\n");
+ fflush(f);
+}
+#endif /* ONIG_DEBUG */
+
+#ifdef ONIG_DEBUG_PARSE_TREE
+static void
+print_tree(FILE* f, Node* node)
+{
+ print_indent_tree(f, node, 0);
+}
+#endif
+#endif //INCLUDE_REGEXP
diff --git a/src/regenc.c b/src/regenc.c
new file mode 100644
index 000000000..70978cde8
--- /dev/null
+++ b/src/regenc.c
@@ -0,0 +1,909 @@
+/**********************************************************************
+ regenc.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "mruby.h"
+#ifdef INCLUDE_ENCODING
+#include <string.h>
+#include "regint.h"
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
+
+extern int
+onigenc_init(void)
+{
+ return 0;
+}
+
+extern OnigEncoding
+onigenc_get_default_encoding(void)
+{
+ return OnigEncDefaultCharEncoding;
+}
+
+extern int
+onigenc_set_default_encoding(OnigEncoding enc)
+{
+ OnigEncDefaultCharEncoding = enc;
+ return 0;
+}
+
+extern int
+onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc)
+{
+ int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e);
+ if (ONIGENC_MBCLEN_CHARFOUND_P(ret))
+ return ONIGENC_MBCLEN_CHARFOUND_LEN(ret);
+ else if (ONIGENC_MBCLEN_NEEDMORE_P(ret))
+ return (int)(e-p)+ONIGENC_MBCLEN_NEEDMORE_LEN(ret);
+ return 1;
+}
+
+extern UChar*
+onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end)
+{
+ UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);
+ if (p < s) {
+ p += enclen(enc, p, end);
+ }
+ return p;
+}
+
+extern UChar*
+onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
+ const UChar* start, const UChar* s, const UChar* end, const UChar** prev)
+{
+ UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);
+
+ if (p < s) {
+ if (prev) *prev = (const UChar* )p;
+ p += enclen(enc, p, end);
+ }
+ else {
+ if (prev) *prev = (const UChar* )NULL; /* Sorry */
+ }
+ return p;
+}
+
+extern UChar*
+onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end)
+{
+ if (s <= start)
+ return (UChar* )NULL;
+
+ return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end);
+}
+
+extern UChar*
+onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end, int n)
+{
+ while (ONIG_IS_NOT_NULL(s) && n-- > 0) {
+ if (s <= start)
+ return (UChar* )NULL;
+
+ s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end);
+ }
+ return (UChar* )s;
+}
+
+extern UChar*
+onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n)
+{
+ UChar* q = (UChar* )p;
+ while (n-- > 0) {
+ q += ONIGENC_MBC_ENC_LEN(enc, q, end);
+ }
+ return (q <= end ? q : NULL);
+}
+
+extern int
+onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end)
+{
+ int n = 0;
+ UChar* q = (UChar* )p;
+
+ while (q < end) {
+ q += ONIGENC_MBC_ENC_LEN(enc, q, end);
+ n++;
+ }
+ return n;
+}
+
+extern int
+onigenc_strlen_null(OnigEncoding enc, const UChar* s)
+{
+ int n = 0;
+ UChar* p = (UChar* )s;
+ UChar* e;
+
+ while (1) {
+ if (*p == '\0') {
+ UChar* q;
+ int len = ONIGENC_MBC_MINLEN(enc);
+
+ if (len == 1) return n;
+ q = p + 1;
+ while (len > 1) {
+ if (*q != '\0') break;
+ q++;
+ len--;
+ }
+ if (len == 1) return n;
+ }
+ e = p + ONIGENC_MBC_MAXLEN(enc);
+ p += ONIGENC_MBC_ENC_LEN(enc, p, e);
+ n++;
+ }
+}
+
+extern int
+onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
+{
+ UChar* start = (UChar* )s;
+ UChar* p = (UChar* )s;
+ UChar* e;
+
+ while (1) {
+ if (*p == '\0') {
+ UChar* q;
+ int len = ONIGENC_MBC_MINLEN(enc);
+
+ if (len == 1) return (int )(p - start);
+ q = p + 1;
+ while (len > 1) {
+ if (*q != '\0') break;
+ q++;
+ len--;
+ }
+ if (len == 1) return (int )(p - start);
+ }
+ e = p + ONIGENC_MBC_MAXLEN(enc);
+ p += ONIGENC_MBC_ENC_LEN(enc, p, e);
+ }
+}
+
+const UChar OnigEncAsciiToLowerCaseTable[] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
+ '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
+ '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
+ '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
+ '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
+ '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
+ '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
+ '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
+ '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
+};
+
+#ifdef USE_UPPER_CASE_TABLE
+const UChar OnigEncAsciiToUpperCaseTable[256] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
+ '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
+ '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
+ '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
+ '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
+ '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
+ '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
+ '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
+ '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
+ '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
+ '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
+ '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
+ '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
+ '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
+ '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
+};
+#endif
+
+const unsigned short OnigEncAsciiCtypeTable[256] = {
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+ 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+ 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
+};
+
+const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
+ '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+ '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+ '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+ '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
+ '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
+ '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
+ '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
+};
+
+#ifdef USE_UPPER_CASE_TABLE
+const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
+ '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+ '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+ '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+ '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+ '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+ '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+ '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+ '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+ '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
+ '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
+ '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
+ '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
+ '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
+ '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
+ '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
+ '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
+ '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
+ '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
+ '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
+ '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
+ '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
+ '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
+ '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
+ '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
+ '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
+ '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367',
+ '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377',
+};
+#endif
+
+extern void
+onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED)
+{
+ /* nothing */
+ /* obsoleted. */
+}
+
+extern UChar*
+onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end)
+{
+ return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);
+}
+
+const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = {
+ { 0x41, 0x61 },
+ { 0x42, 0x62 },
+ { 0x43, 0x63 },
+ { 0x44, 0x64 },
+ { 0x45, 0x65 },
+ { 0x46, 0x66 },
+ { 0x47, 0x67 },
+ { 0x48, 0x68 },
+ { 0x49, 0x69 },
+ { 0x4a, 0x6a },
+ { 0x4b, 0x6b },
+ { 0x4c, 0x6c },
+ { 0x4d, 0x6d },
+ { 0x4e, 0x6e },
+ { 0x4f, 0x6f },
+ { 0x50, 0x70 },
+ { 0x51, 0x71 },
+ { 0x52, 0x72 },
+ { 0x53, 0x73 },
+ { 0x54, 0x74 },
+ { 0x55, 0x75 },
+ { 0x56, 0x76 },
+ { 0x57, 0x77 },
+ { 0x58, 0x78 },
+ { 0x59, 0x79 },
+ { 0x5a, 0x7a }
+};
+
+extern int
+onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
+ OnigApplyAllCaseFoldFunc f, void* arg,
+ OnigEncoding enc ARG_UNUSED)
+{
+ OnigCodePoint code;
+ int i, r;
+
+ for (i = 0;
+ i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes));
+ i++) {
+ code = OnigAsciiLowerMap[i].to;
+ r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg);
+ if (r != 0) return r;
+
+ code = OnigAsciiLowerMap[i].from;
+ r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg);
+ if (r != 0) return r;
+ }
+
+ return 0;
+}
+
+extern int
+onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,
+ const OnigUChar* p, const OnigUChar* end ARG_UNUSED, OnigCaseFoldCodeItem items[],
+ OnigEncoding enc ARG_UNUSED)
+{
+ if (0x41 <= *p && *p <= 0x5a) {
+ items[0].byte_len = 1;
+ items[0].code_len = 1;
+ items[0].code[0] = (OnigCodePoint )(*p + 0x20);
+ return 1;
+ }
+ else if (0x61 <= *p && *p <= 0x7a) {
+ items[0].byte_len = 1;
+ items[0].code_len = 1;
+ items[0].code[0] = (OnigCodePoint )(*p - 0x20);
+ return 1;
+ }
+ else
+ return 0;
+}
+
+static int
+ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
+ OnigApplyAllCaseFoldFunc f, void* arg)
+{
+ OnigCodePoint ss[] = { 0x73, 0x73 };
+
+ return (*f)((OnigCodePoint )0xdf, ss, 2, arg);
+}
+
+extern int
+onigenc_apply_all_case_fold_with_map(int map_size,
+ const OnigPairCaseFoldCodes map[],
+ int ess_tsett_flag, OnigCaseFoldType flag,
+ OnigApplyAllCaseFoldFunc f, void* arg)
+{
+ OnigCodePoint code;
+ int i, r;
+
+ r = onigenc_ascii_apply_all_case_fold(flag, f, arg, 0);
+ if (r != 0) return r;
+
+ for (i = 0; i < map_size; i++) {
+ code = map[i].to;
+ r = (*f)(map[i].from, &code, 1, arg);
+ if (r != 0) return r;
+
+ code = map[i].from;
+ r = (*f)(map[i].to, &code, 1, arg);
+ if (r != 0) return r;
+ }
+
+ if (ess_tsett_flag != 0)
+ return ss_apply_all_case_fold(flag, f, arg);
+
+ return 0;
+}
+
+extern int
+onigenc_get_case_fold_codes_by_str_with_map(int map_size,
+ const OnigPairCaseFoldCodes map[],
+ int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED,
+ const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
+{
+ if (0x41 <= *p && *p <= 0x5a) {
+ items[0].byte_len = 1;
+ items[0].code_len = 1;
+ items[0].code[0] = (OnigCodePoint )(*p + 0x20);
+ if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1
+ && (*(p+1) == 0x53 || *(p+1) == 0x73)) {
+ /* SS */
+ items[1].byte_len = 2;
+ items[1].code_len = 1;
+ items[1].code[0] = (OnigCodePoint )0xdf;
+ return 2;
+ }
+ else
+ return 1;
+ }
+ else if (0x61 <= *p && *p <= 0x7a) {
+ items[0].byte_len = 1;
+ items[0].code_len = 1;
+ items[0].code[0] = (OnigCodePoint )(*p - 0x20);
+ if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1
+ && (*(p+1) == 0x73 || *(p+1) == 0x53)) {
+ /* ss */
+ items[1].byte_len = 2;
+ items[1].code_len = 1;
+ items[1].code[0] = (OnigCodePoint )0xdf;
+ return 2;
+ }
+ else
+ return 1;
+ }
+ else if (*p == 0xdf && ess_tsett_flag != 0) {
+ items[0].byte_len = 1;
+ items[0].code_len = 2;
+ items[0].code[0] = (OnigCodePoint )'s';
+ items[0].code[1] = (OnigCodePoint )'s';
+
+ items[1].byte_len = 1;
+ items[1].code_len = 2;
+ items[1].code[0] = (OnigCodePoint )'S';
+ items[1].code[1] = (OnigCodePoint )'S';
+
+ items[2].byte_len = 1;
+ items[2].code_len = 2;
+ items[2].code[0] = (OnigCodePoint )'s';
+ items[2].code[1] = (OnigCodePoint )'S';
+
+ items[3].byte_len = 1;
+ items[3].code_len = 2;
+ items[3].code[0] = (OnigCodePoint )'S';
+ items[3].code[1] = (OnigCodePoint )'s';
+
+ return 4;
+ }
+ else {
+ int i;
+
+ for (i = 0; i < map_size; i++) {
+ if (*p == map[i].from) {
+ items[0].byte_len = 1;
+ items[0].code_len = 1;
+ items[0].code[0] = map[i].to;
+ return 1;
+ }
+ else if (*p == map[i].to) {
+ items[0].byte_len = 1;
+ items[0].code_len = 1;
+ items[0].code[0] = map[i].from;
+ return 1;
+ }
+ }
+ }
+
+ return 0;
+}
+
+
+extern int
+onigenc_not_support_get_ctype_code_range(OnigCtype ctype,
+ OnigCodePoint* sb_out, const OnigCodePoint* ranges[],
+ OnigEncoding enc)
+{
+ return ONIG_NO_SUPPORT_CONFIG;
+}
+
+extern int
+onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end, OnigEncoding enc ARG_UNUSED)
+{
+ if (p < end) {
+ if (*p == 0x0a) return 1;
+ }
+ return 0;
+}
+
+/* for single byte encodings */
+extern int
+onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p,
+ const UChar*end, UChar* lower, OnigEncoding enc ARG_UNUSED)
+{
+ *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);
+
+ (*p)++;
+ return 1; /* return byte length of converted char to lower */
+}
+
+extern int
+onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED, const UChar* e ARG_UNUSED,
+ OnigEncoding enc ARG_UNUSED)
+{
+ return 1;
+}
+
+extern OnigCodePoint
+onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED,
+ OnigEncoding enc ARG_UNUSED)
+{
+ return (OnigCodePoint )(*p);
+}
+
+extern int
+onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED, OnigEncoding enc ARG_UNUSED)
+{
+ return 1;
+}
+
+extern int
+onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED)
+{
+ *buf = (UChar )(code & 0xff);
+ return 1;
+}
+
+extern UChar*
+onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED, const UChar* s,
+ const UChar* end,
+ OnigEncoding enc ARG_UNUSED)
+{
+ return (UChar* )s;
+}
+
+extern int
+onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED, const UChar* end ARG_UNUSED,
+ OnigEncoding enc ARG_UNUSED)
+{
+ return TRUE;
+}
+
+extern int
+onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED, const UChar* end ARG_UNUSED,
+ OnigEncoding enc ARG_UNUSED)
+{
+ return FALSE;
+}
+
+extern int
+onigenc_ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype,
+ OnigEncoding enc ARG_UNUSED)
+{
+ if (code < 128)
+ return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+ else
+ return FALSE;
+}
+
+extern OnigCodePoint
+onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
+{
+ int c, i, len;
+ OnigCodePoint n;
+
+ len = enclen(enc, p, end);
+ n = (OnigCodePoint )(*p++);
+ if (len == 1) return n;
+
+ for (i = 1; i < len; i++) {
+ if (p >= end) break;
+ c = *p++;
+ n <<= 8; n += c;
+ }
+ return n;
+}
+
+extern int
+onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,
+ const UChar** pp, const UChar* end ARG_UNUSED,
+ UChar* lower)
+{
+ int len;
+ const UChar *p = *pp;
+
+ if (ONIGENC_IS_MBC_ASCII(p)) {
+ *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
+ (*pp)++;
+ return 1;
+ }
+ else {
+ int i;
+
+ len = enclen(enc, p, end);
+ for (i = 0; i < len; i++) {
+ *lower++ = *p++;
+ }
+ (*pp) += len;
+ return len; /* return byte length of converted to lower char */
+ }
+}
+
+extern int
+onigenc_mb2_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
+{
+ if ((code & 0xff00) != 0) return 2;
+ else return 1;
+}
+
+extern int
+onigenc_mb4_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
+{
+ if ((code & 0xff000000) != 0) return 4;
+ else if ((code & 0xff0000) != 0) return 3;
+ else if ((code & 0xff00) != 0) return 2;
+ else return 1;
+}
+
+extern int
+onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
+{
+ UChar *p = buf;
+
+ if ((code & 0xff00) != 0) {
+ *p++ = (UChar )((code >> 8) & 0xff);
+ }
+ *p++ = (UChar )(code & 0xff);
+
+ if (enclen(enc, buf, p) != (p - buf))
+ return ONIGERR_INVALID_CODE_POINT_VALUE;
+ return (int)(p - buf);
+}
+
+extern int
+onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
+{
+ UChar *p = buf;
+
+ if ((code & 0xff000000) != 0) {
+ *p++ = (UChar )((code >> 24) & 0xff);
+ }
+ if ((code & 0xff0000) != 0 || p != buf) {
+ *p++ = (UChar )((code >> 16) & 0xff);
+ }
+ if ((code & 0xff00) != 0 || p != buf) {
+ *p++ = (UChar )((code >> 8) & 0xff);
+ }
+ *p++ = (UChar )(code & 0xff);
+
+ if (enclen(enc, buf, p) != (p - buf))
+ return ONIGERR_INVALID_CODE_POINT_VALUE;
+ return (int)(p - buf);
+}
+
+extern int
+onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
+{
+ static const PosixBracketEntryType PBS[] = {
+ PosixBracketEntryInit("Alnum", ONIGENC_CTYPE_ALNUM),
+ PosixBracketEntryInit("Alpha", ONIGENC_CTYPE_ALPHA),
+ PosixBracketEntryInit("Blank", ONIGENC_CTYPE_BLANK),
+ PosixBracketEntryInit("Cntrl", ONIGENC_CTYPE_CNTRL),
+ PosixBracketEntryInit("Digit", ONIGENC_CTYPE_DIGIT),
+ PosixBracketEntryInit("Graph", ONIGENC_CTYPE_GRAPH),
+ PosixBracketEntryInit("Lower", ONIGENC_CTYPE_LOWER),
+ PosixBracketEntryInit("Print", ONIGENC_CTYPE_PRINT),
+ PosixBracketEntryInit("Punct", ONIGENC_CTYPE_PUNCT),
+ PosixBracketEntryInit("Space", ONIGENC_CTYPE_SPACE),
+ PosixBracketEntryInit("Upper", ONIGENC_CTYPE_UPPER),
+ PosixBracketEntryInit("XDigit", ONIGENC_CTYPE_XDIGIT),
+ PosixBracketEntryInit("ASCII", ONIGENC_CTYPE_ASCII),
+ PosixBracketEntryInit("Word", ONIGENC_CTYPE_WORD),
+ };
+
+ const PosixBracketEntryType *pb, *pbe;
+ int len;
+
+ len = onigenc_strlen(enc, p, end);
+ for (pbe = (pb = PBS) + sizeof(PBS)/sizeof(PBS[0]); pb < pbe; ++pb) {
+ if (len == pb->len &&
+ onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0)
+ return pb->ctype;
+ }
+
+ return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
+}
+
+extern int
+onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
+ unsigned int ctype)
+{
+ if (code < 128)
+ return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+ else {
+ if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
+ return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
+ }
+ }
+
+ return FALSE;
+}
+
+extern int
+onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
+ unsigned int ctype)
+{
+ if (code < 128)
+ return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+ else {
+ if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
+ return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
+ }
+ }
+
+ return FALSE;
+}
+
+extern int
+onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end,
+ const UChar* sascii /* ascii */, int n)
+{
+ int x, c;
+
+ while (n-- > 0) {
+ if (p >= end) return (int )(*sascii);
+
+ c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);
+ x = *sascii - c;
+ if (x) return x;
+
+ sascii++;
+ p += enclen(enc, p, end);
+ }
+ return 0;
+}
+
+/* Property management */
+static int
+resize_property_list(int new_size, const OnigCodePoint*** plist, int* psize)
+{
+ size_t size;
+ const OnigCodePoint **list = *plist;
+
+ size = sizeof(OnigCodePoint*) * new_size;
+ if (IS_NULL(list)) {
+ list = (const OnigCodePoint** )xmalloc(size);
+ }
+ else {
+ list = (const OnigCodePoint** )xrealloc((void* )list, size);
+ }
+
+ if (IS_NULL(list)) return ONIGERR_MEMORY;
+
+ *plist = list;
+ *psize = new_size;
+
+ return 0;
+}
+
+extern int
+onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop,
+ hash_table_type **table, const OnigCodePoint*** plist, int *pnum,
+ int *psize)
+{
+#define PROP_INIT_SIZE 16
+
+ int r;
+
+ if (*psize <= *pnum) {
+ int new_size = (*psize == 0 ? PROP_INIT_SIZE : *psize * 2);
+ r = resize_property_list(new_size, plist, psize);
+ if (r != 0) return r;
+ }
+
+ (*plist)[*pnum] = prop;
+
+ if (ONIG_IS_NULL(*table)) {
+ *table = onig_st_init_strend_table_with_size(PROP_INIT_SIZE);
+ if (ONIG_IS_NULL(*table)) return ONIGERR_MEMORY;
+ }
+
+ *pnum = *pnum + 1;
+ onig_st_insert_strend(*table, name, name + strlen((char* )name),
+ (hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE));
+ return 0;
+}
+
+extern int
+onigenc_property_list_init(int (*f)(void))
+{
+ int r;
+
+ THREAD_ATOMIC_START;
+
+ r = f();
+
+ THREAD_ATOMIC_END;
+ return r;
+}
+#endif //INCLUDE_ENCODING
diff --git a/src/regenc.h b/src/regenc.h
new file mode 100644
index 000000000..1d8e752bc
--- /dev/null
+++ b/src/regenc.h
@@ -0,0 +1,203 @@
+#ifndef ONIGURUMA_REGENC_H
+#define ONIGURUMA_REGENC_H
+/**********************************************************************
+ regenc.h - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdarg.h>
+#define RUBY
+
+#ifndef mrb_compile_warn
+#define mrb_compile_warn(a,b,c,d) printf(c,d)
+#endif
+
+#ifndef REGINT_H
+#ifdef ONIG_ESCAPE_UCHAR_COLLISION
+#undef ONIG_ESCAPE_UCHAR_COLLISION
+#endif
+#endif
+#include "oniguruma.h"
+
+typedef struct {
+ OnigCodePoint from;
+ OnigCodePoint to;
+} OnigPairCaseFoldCodes;
+
+
+#ifndef ARG_UNUSED
+#if defined(__GNUC__)
+# define ARG_UNUSED __attribute__ ((unused))
+#else
+# define ARG_UNUSED
+#endif
+#endif
+
+#define ONIG_IS_NULL(p) (((void*)(p)) == (void*)0)
+#define ONIG_IS_NOT_NULL(p) (((void*)(p)) != (void*)0)
+#define ONIG_CHECK_NULL_RETURN(p) if (ONIG_IS_NULL(p)) return NULL
+#define ONIG_CHECK_NULL_RETURN_VAL(p,val) if (ONIG_IS_NULL(p)) return (val)
+
+#define enclen(enc,p,e) ((enc->max_enc_len == enc->min_enc_len) ? enc->min_enc_len : ONIGENC_MBC_ENC_LEN(enc,p,e))
+
+/* character types bit flag */
+#define BIT_CTYPE_NEWLINE (1<< ONIGENC_CTYPE_NEWLINE)
+#define BIT_CTYPE_ALPHA (1<< ONIGENC_CTYPE_ALPHA)
+#define BIT_CTYPE_BLANK (1<< ONIGENC_CTYPE_BLANK)
+#define BIT_CTYPE_CNTRL (1<< ONIGENC_CTYPE_CNTRL)
+#define BIT_CTYPE_DIGIT (1<< ONIGENC_CTYPE_DIGIT)
+#define BIT_CTYPE_GRAPH (1<< ONIGENC_CTYPE_GRAPH)
+#define BIT_CTYPE_LOWER (1<< ONIGENC_CTYPE_LOWER)
+#define BIT_CTYPE_PRINT (1<< ONIGENC_CTYPE_PRINT)
+#define BIT_CTYPE_PUNCT (1<< ONIGENC_CTYPE_PUNCT)
+#define BIT_CTYPE_SPACE (1<< ONIGENC_CTYPE_SPACE)
+#define BIT_CTYPE_UPPER (1<< ONIGENC_CTYPE_UPPER)
+#define BIT_CTYPE_XDIGIT (1<< ONIGENC_CTYPE_XDIGIT)
+#define BIT_CTYPE_WORD (1<< ONIGENC_CTYPE_WORD)
+#define BIT_CTYPE_ALNUM (1<< ONIGENC_CTYPE_ALNUM)
+#define BIT_CTYPE_ASCII (1<< ONIGENC_CTYPE_ASCII)
+
+#define CTYPE_TO_BIT(ctype) (1<<(ctype))
+#define CTYPE_IS_WORD_GRAPH_PRINT(ctype) \
+ ((ctype) == ONIGENC_CTYPE_WORD || (ctype) == ONIGENC_CTYPE_GRAPH ||\
+ (ctype) == ONIGENC_CTYPE_PRINT)
+
+
+typedef struct {
+ const UChar *name;
+ int ctype;
+ short int len;
+} PosixBracketEntryType;
+
+#define PosixBracketEntryInit(name, ctype) {(const UChar *)name, ctype, (short int)(sizeof(name) - 1)}
+
+/* #define USE_CRNL_AS_LINE_TERMINATOR */
+#define USE_UNICODE_PROPERTIES
+/* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */
+/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTF#18 */
+
+
+#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII
+
+/* for encoding system implementation (internal) */
+ONIG_EXTERN int onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc);
+ONIG_EXTERN int onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[], OnigEncoding enc);
+ONIG_EXTERN int onigenc_apply_all_case_fold_with_map(int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg);
+ONIG_EXTERN int onigenc_get_case_fold_codes_by_str_with_map(int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]);
+ONIG_EXTERN int onigenc_not_support_get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[], OnigEncoding enc);
+ONIG_EXTERN int onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end, OnigEncoding enc);
+
+/* methods for single byte encoding */
+ONIG_EXTERN int onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower, OnigEncoding enc);
+ONIG_EXTERN int onigenc_single_byte_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc);
+ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc);
+ONIG_EXTERN int onigenc_single_byte_code_to_mbclen(OnigCodePoint code, OnigEncoding enc);
+ONIG_EXTERN int onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc);
+ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head(const UChar* start, const UChar* s, const OnigUChar* end, OnigEncoding enc);
+ONIG_EXTERN int onigenc_always_true_is_allowed_reverse_match(const UChar* s, const UChar* end, OnigEncoding enc);
+ONIG_EXTERN int onigenc_always_false_is_allowed_reverse_match(const UChar* s, const UChar* end, OnigEncoding enc);
+ONIG_EXTERN int onigenc_ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc);
+
+/* methods for multi byte encoding */
+ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end);
+ONIG_EXTERN int onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower);
+ONIG_EXTERN int onigenc_mb2_code_to_mbclen(OnigCodePoint code, OnigEncoding enc);
+ONIG_EXTERN int onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf);
+ONIG_EXTERN int onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end);
+ONIG_EXTERN int onigenc_unicode_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end);
+ONIG_EXTERN int onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code, unsigned int ctype);
+ONIG_EXTERN int onigenc_mb4_code_to_mbclen(OnigCodePoint code, OnigEncoding enc);
+ONIG_EXTERN int onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf);
+ONIG_EXTERN int onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code, unsigned int ctype);
+
+
+/* in enc/unicode.c */
+ONIG_EXTERN int onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc);
+ONIG_EXTERN int onigenc_utf16_32_get_ctype_code_range(OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[], OnigEncoding enc);
+ONIG_EXTERN int onigenc_unicode_ctype_code_range(int ctype, const OnigCodePoint* ranges[]);
+ONIG_EXTERN int onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]);
+ONIG_EXTERN int onigenc_unicode_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold);
+ONIG_EXTERN int onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc);
+
+
+#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8)
+#define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc)
+
+#define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \
+ OnigEncISO_8859_1_ToLowerCaseTable[c]
+#define ONIGENC_ISO_8859_1_TO_UPPER_CASE(c) \
+ OnigEncISO_8859_1_ToUpperCaseTable[c]
+
+ONIG_EXTERN const UChar OnigEncISO_8859_1_ToLowerCaseTable[];
+ONIG_EXTERN const UChar OnigEncISO_8859_1_ToUpperCaseTable[];
+
+ONIG_EXTERN int
+onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n);
+ONIG_EXTERN UChar*
+onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n);
+
+/* defined in regexec.c, but used in enc/xxx.c */
+extern int onig_is_in_code_range (const UChar* p, OnigCodePoint code);
+
+ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding;
+ONIG_EXTERN const UChar OnigEncAsciiToLowerCaseTable[];
+ONIG_EXTERN const UChar OnigEncAsciiToUpperCaseTable[];
+ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[];
+
+#define ONIGENC_IS_ASCII_CODE(code) ((code) < 0x80)
+#define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) OnigEncAsciiToLowerCaseTable[c]
+#define ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) OnigEncAsciiToUpperCaseTable[c]
+#define ONIGENC_IS_ASCII_CODE_CTYPE(code,ctype) \
+ ((OnigEncAsciiCtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
+#define ONIGENC_IS_ASCII_CODE_CASE_AMBIG(code) \
+ (ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_UPPER) ||\
+ ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_LOWER))
+
+#ifdef ONIG_ENC_REGISTER
+extern int ONIG_ENC_REGISTER(const char *, OnigEncodingType*);
+#define OnigEncodingName(n) encoding_##n
+#define OnigEncodingDeclare(n) static OnigEncodingType OnigEncodingName(n)
+#define OnigEncodingDefine(f,n) \
+ OnigEncodingDeclare(n); \
+ void Init_##f(void) { \
+ ONIG_ENC_REGISTER(OnigEncodingName(n).name, \
+ &OnigEncodingName(n)); \
+ } \
+ OnigEncodingDeclare(n)
+#else
+#define OnigEncodingName(n) OnigEncoding##n
+#define OnigEncodingDeclare(n) OnigEncodingType OnigEncodingName(n)
+#define OnigEncodingDefine(f,n) OnigEncodingDeclare(n)
+#endif
+
+/* macros for define replica encoding and encoding alias */
+#define ENC_REPLICATE(name, orig)
+#define ENC_ALIAS(name, orig)
+#define ENC_DUMMY(name)
+
+#endif /* ONIGURUMA_REGENC_H */
diff --git a/src/regerror.c b/src/regerror.c
new file mode 100644
index 000000000..72db3a502
--- /dev/null
+++ b/src/regerror.c
@@ -0,0 +1,375 @@
+/**********************************************************************
+ regerror.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "mruby.h"
+#ifdef INCLUDE_REGEXP
+#include <string.h>
+#include "regint.h"
+#include <stdio.h> /* for vsnprintf() */
+#include <stdarg.h>
+
+extern UChar*
+onig_error_code_to_format(int code)
+{
+ const char *p;
+
+ if (code >= 0) return (UChar* )0;
+
+ switch (code) {
+ case ONIG_MISMATCH:
+ p = "mismatch"; break;
+ case ONIG_NO_SUPPORT_CONFIG:
+ p = "no support in this configuration"; break;
+ case ONIGERR_MEMORY:
+ p = "failed to allocate memory"; break;
+ case ONIGERR_MATCH_STACK_LIMIT_OVER:
+ p = "match-stack limit over"; break;
+ case ONIGERR_TYPE_BUG:
+ p = "undefined type (bug)"; break;
+ case ONIGERR_PARSER_BUG:
+ p = "internal parser error (bug)"; break;
+ case ONIGERR_STACK_BUG:
+ p = "stack error (bug)"; break;
+ case ONIGERR_UNDEFINED_BYTECODE:
+ p = "undefined bytecode (bug)"; break;
+ case ONIGERR_UNEXPECTED_BYTECODE:
+ p = "unexpected bytecode (bug)"; break;
+ case ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED:
+ p = "default multibyte-encoding is not setted"; break;
+ case ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR:
+ p = "can't convert to wide-char on specified multibyte-encoding"; break;
+ case ONIGERR_INVALID_ARGUMENT:
+ p = "invalid argument"; break;
+ case ONIGERR_END_PATTERN_AT_LEFT_BRACE:
+ p = "end pattern at left brace"; break;
+ case ONIGERR_END_PATTERN_AT_LEFT_BRACKET:
+ p = "end pattern at left bracket"; break;
+ case ONIGERR_EMPTY_CHAR_CLASS:
+ p = "empty char-class"; break;
+ case ONIGERR_PREMATURE_END_OF_CHAR_CLASS:
+ p = "premature end of char-class"; break;
+ case ONIGERR_END_PATTERN_AT_ESCAPE:
+ p = "end pattern at escape"; break;
+ case ONIGERR_END_PATTERN_AT_META:
+ p = "end pattern at meta"; break;
+ case ONIGERR_END_PATTERN_AT_CONTROL:
+ p = "end pattern at control"; break;
+ case ONIGERR_META_CODE_SYNTAX:
+ p = "invalid meta-code syntax"; break;
+ case ONIGERR_CONTROL_CODE_SYNTAX:
+ p = "invalid control-code syntax"; break;
+ case ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE:
+ p = "char-class value at end of range"; break;
+ case ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE:
+ p = "char-class value at start of range"; break;
+ case ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS:
+ p = "unmatched range specifier in char-class"; break;
+ case ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED:
+ p = "target of repeat operator is not specified"; break;
+ case ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID:
+ p = "target of repeat operator is invalid"; break;
+ case ONIGERR_NESTED_REPEAT_OPERATOR:
+ p = "nested repeat operator"; break;
+ case ONIGERR_UNMATCHED_CLOSE_PARENTHESIS:
+ p = "unmatched close parenthesis"; break;
+ case ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS:
+ p = "end pattern with unmatched parenthesis"; break;
+ case ONIGERR_END_PATTERN_IN_GROUP:
+ p = "end pattern in group"; break;
+ case ONIGERR_UNDEFINED_GROUP_OPTION:
+ p = "undefined group option"; break;
+ case ONIGERR_INVALID_POSIX_BRACKET_TYPE:
+ p = "invalid POSIX bracket type"; break;
+ case ONIGERR_INVALID_LOOK_BEHIND_PATTERN:
+ p = "invalid pattern in look-behind"; break;
+ case ONIGERR_INVALID_REPEAT_RANGE_PATTERN:
+ p = "invalid repeat range {lower,upper}"; break;
+ case ONIGERR_TOO_BIG_NUMBER:
+ p = "too big number"; break;
+ case ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE:
+ p = "too big number for repeat range"; break;
+ case ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE:
+ p = "upper is smaller than lower in repeat range"; break;
+ case ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS:
+ p = "empty range in char class"; break;
+ case ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE:
+ p = "mismatch multibyte code length in char-class range"; break;
+ case ONIGERR_TOO_MANY_MULTI_BYTE_RANGES:
+ p = "too many multibyte code ranges are specified"; break;
+ case ONIGERR_TOO_SHORT_MULTI_BYTE_STRING:
+ p = "too short multibyte code string"; break;
+ case ONIGERR_TOO_BIG_BACKREF_NUMBER:
+ p = "too big backref number"; break;
+ case ONIGERR_INVALID_BACKREF:
+#ifdef USE_NAMED_GROUP
+ p = "invalid backref number/name"; break;
+#else
+ p = "invalid backref number"; break;
+#endif
+ case ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED:
+ p = "numbered backref/call is not allowed. (use name)"; break;
+ case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
+ p = "too big wide-char value"; break;
+ case ONIGERR_TOO_LONG_WIDE_CHAR_VALUE:
+ p = "too long wide-char value"; break;
+ case ONIGERR_INVALID_CODE_POINT_VALUE:
+ p = "invalid code point value"; break;
+ case ONIGERR_EMPTY_GROUP_NAME:
+ p = "group name is empty"; break;
+ case ONIGERR_INVALID_GROUP_NAME:
+ p = "invalid group name <%n>"; break;
+ case ONIGERR_INVALID_CHAR_IN_GROUP_NAME:
+#ifdef USE_NAMED_GROUP
+ p = "invalid char in group name <%n>"; break;
+#else
+ p = "invalid char in group number <%n>"; break;
+#endif
+ case ONIGERR_UNDEFINED_NAME_REFERENCE:
+ p = "undefined name <%n> reference"; break;
+ case ONIGERR_UNDEFINED_GROUP_REFERENCE:
+ p = "undefined group <%n> reference"; break;
+ case ONIGERR_MULTIPLEX_DEFINED_NAME:
+ p = "multiplex defined name <%n>"; break;
+ case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL:
+ p = "multiplex definition name <%n> call"; break;
+ case ONIGERR_NEVER_ENDING_RECURSION:
+ p = "never ending recursion"; break;
+ case ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY:
+ p = "group number is too big for capture history"; break;
+ case ONIGERR_INVALID_CHAR_PROPERTY_NAME:
+ p = "invalid character property name {%n}"; break;
+ case ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION:
+ p = "not supported encoding combination"; break;
+ case ONIGERR_INVALID_COMBINATION_OF_OPTIONS:
+ p = "invalid combination of options"; break;
+ case ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT:
+ p = "over thread pass limit count"; break;
+
+ default:
+ p = "undefined error code"; break;
+ }
+
+ return (UChar* )p;
+}
+
+static void sprint_byte(char* s, unsigned int v)
+{
+ sprintf(s, "%02x", (v & 0377));
+}
+
+static void sprint_byte_with_x(char* s, unsigned int v)
+{
+ sprintf(s, "\\x%02x", (v & 0377));
+}
+
+static int to_ascii(OnigEncoding enc, UChar *s, UChar *end,
+ UChar buf[], int buf_size, int *is_over)
+{
+ int len;
+ UChar *p;
+ OnigCodePoint code;
+
+ if (ONIGENC_MBC_MINLEN(enc) > 1) {
+ p = s;
+ len = 0;
+ while (p < end) {
+ code = ONIGENC_MBC_TO_CODE(enc, p, end);
+ if (code >= 0x80) {
+ if (code > 0xffff && len + 10 <= buf_size) {
+ sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 24));
+ sprint_byte((char*)(&(buf[len+4])), (unsigned int)(code >> 16));
+ sprint_byte((char*)(&(buf[len+6])), (unsigned int)(code >> 8));
+ sprint_byte((char*)(&(buf[len+8])), (unsigned int)code);
+ len += 10;
+ }
+ else if (len + 6 <= buf_size) {
+ sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 8));
+ sprint_byte((char*)(&(buf[len+4])), (unsigned int)code);
+ len += 6;
+ }
+ else {
+ break;
+ }
+ }
+ else {
+ buf[len++] = (UChar )code;
+ }
+
+ p += enclen(enc, p, end);
+ if (len >= buf_size) break;
+ }
+
+ *is_over = ((p < end) ? 1 : 0);
+ }
+ else {
+ len = (int)MIN((end - s), buf_size);
+ xmemcpy(buf, s, (size_t )len);
+ *is_over = ((buf_size < (end - s)) ? 1 : 0);
+ }
+
+ return len;
+}
+
+
+/* for ONIG_MAX_ERROR_MESSAGE_LEN */
+#define MAX_ERROR_PAR_LEN 30
+
+extern int
+onig_error_code_to_str(UChar* s, int code, ...)
+{
+ UChar *p, *q;
+ OnigErrorInfo* einfo;
+ size_t len;
+ int is_over;
+ UChar parbuf[MAX_ERROR_PAR_LEN];
+ va_list vargs;
+
+ va_start(vargs, code);
+
+ switch (code) {
+ case ONIGERR_UNDEFINED_NAME_REFERENCE:
+ case ONIGERR_UNDEFINED_GROUP_REFERENCE:
+ case ONIGERR_MULTIPLEX_DEFINED_NAME:
+ case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL:
+ case ONIGERR_INVALID_GROUP_NAME:
+ case ONIGERR_INVALID_CHAR_IN_GROUP_NAME:
+ case ONIGERR_INVALID_CHAR_PROPERTY_NAME:
+ einfo = va_arg(vargs, OnigErrorInfo*);
+ len = to_ascii(einfo->enc, einfo->par, einfo->par_end,
+ parbuf, MAX_ERROR_PAR_LEN - 3, &is_over);
+ q = onig_error_code_to_format(code);
+ p = s;
+ while (*q != '\0') {
+ if (*q == '%') {
+ q++;
+ if (*q == 'n') { /* '%n': name */
+ xmemcpy(p, parbuf, len);
+ p += len;
+ if (is_over != 0) {
+ xmemcpy(p, "...", 3);
+ p += 3;
+ }
+ q++;
+ }
+ else
+ goto normal_char;
+ }
+ else {
+ normal_char:
+ *p++ = *q++;
+ }
+ }
+ *p = '\0';
+ len = p - s;
+ break;
+
+ default:
+ q = onig_error_code_to_format(code);
+ len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, q);
+ xmemcpy(s, q, len);
+ s[len] = '\0';
+ break;
+ }
+
+ va_end(vargs);
+ return (int)len;
+}
+
+void
+onig_vsnprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc,
+ UChar* pat, UChar* pat_end, const UChar *fmt, va_list args)
+{
+ size_t need;
+ int n, len;
+ UChar *p, *s, *bp;
+ UChar bs[6];
+
+ n = xvsnprintf((char* )buf, bufsize, (const char* )fmt, args);
+
+ need = (pat_end - pat) * 4 + 4;
+
+ if (n + need < (size_t)bufsize) {
+ strcat((char* )buf, ": /");
+ s = buf + onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, buf);
+
+ p = pat;
+ while (p < pat_end) {
+ if (*p == '\\') {
+ *s++ = *p++;
+ len = enclen(enc, p, pat_end);
+ while (len-- > 0) *s++ = *p++;
+ }
+ else if (*p == '/') {
+ *s++ = (unsigned char )'\\';
+ *s++ = *p++;
+ }
+ else if (ONIGENC_IS_MBC_HEAD(enc, p, pat_end)) {
+ len = enclen(enc, p, pat_end);
+ if (ONIGENC_MBC_MINLEN(enc) == 1) {
+ while (len-- > 0) *s++ = *p++;
+ }
+ else { /* for UTF16 */
+ int blen;
+
+ while (len-- > 0) {
+ sprint_byte_with_x((char* )bs, (unsigned int )(*p++));
+ blen = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs);
+ bp = bs;
+ while (blen-- > 0) *s++ = *bp++;
+ }
+ }
+ }
+ else if (!ONIGENC_IS_CODE_PRINT(enc, *p) &&
+ !ONIGENC_IS_CODE_SPACE(enc, *p)) {
+ sprint_byte_with_x((char* )bs, (unsigned int )(*p++));
+ len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs);
+ bp = bs;
+ while (len-- > 0) *s++ = *bp++;
+ }
+ else {
+ *s++ = *p++;
+ }
+ }
+
+ *s++ = '/';
+ *s = '\0';
+ }
+}
+
+void
+onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc,
+ UChar* pat, UChar* pat_end, const UChar *fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+ onig_vsnprintf_with_pattern(buf, bufsize, enc,
+ pat, pat_end, fmt, args);
+ va_end(args);
+}
+#endif //INCLUDE_REGEXP
diff --git a/src/regex.h b/src/regex.h
new file mode 100644
index 000000000..b5472122a
--- /dev/null
+++ b/src/regex.h
@@ -0,0 +1,32 @@
+/**********************************************************************
+
+ regex.h -
+
+ $Author: akr $
+
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
+
+**********************************************************************/
+
+#ifndef ONIGURUMA_REGEX_H
+#define ONIGURUMA_REGEX_H 1
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#include "oniguruma.h"
+
+#ifndef ONIG_RUBY_M17N
+
+ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding;
+
+#define mbclen(p,e,enc) mrb_enc_mbclen((p),(e),(enc))
+
+#endif /* ifndef ONIG_RUBY_M17N */
+
+#if defined(__cplusplus)
+} /* extern "C" { */
+#endif
+
+#endif /* ONIGURUMA_REGEX_H */
diff --git a/src/regexec.c b/src/regexec.c
new file mode 100644
index 000000000..3ef38bc8f
--- /dev/null
+++ b/src/regexec.c
@@ -0,0 +1,3757 @@
+/**********************************************************************
+ regexec.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "mruby.h"
+#ifdef INCLUDE_REGEXP
+#include <string.h>
+#include "regint.h"
+
+/* #define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
+
+#ifdef USE_CRNL_AS_LINE_TERMINATOR
+#define ONIGENC_IS_MBC_CRNL(enc,p,end) \
+ (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
+ ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end))
+#endif
+
+#ifdef USE_CAPTURE_HISTORY
+static void history_tree_free(OnigCaptureTreeNode* node);
+
+static void
+history_tree_clear(OnigCaptureTreeNode* node)
+{
+ int i;
+
+ if (IS_NOT_NULL(node)) {
+ for (i = 0; i < node->num_childs; i++) {
+ if (IS_NOT_NULL(node->childs[i])) {
+ history_tree_free(node->childs[i]);
+ }
+ }
+ for (i = 0; i < node->allocated; i++) {
+ node->childs[i] = (OnigCaptureTreeNode* )0;
+ }
+ node->num_childs = 0;
+ node->beg = ONIG_REGION_NOTPOS;
+ node->end = ONIG_REGION_NOTPOS;
+ node->group = -1;
+ }
+}
+
+static void
+history_tree_free(OnigCaptureTreeNode* node)
+{
+ history_tree_clear(node);
+ xfree(node);
+}
+
+static void
+history_root_free(OnigRegion* r)
+{
+ if (IS_NOT_NULL(r->history_root)) {
+ history_tree_free(r->history_root);
+ r->history_root = (OnigCaptureTreeNode* )0;
+ }
+}
+
+static OnigCaptureTreeNode*
+history_node_new(void)
+{
+ OnigCaptureTreeNode* node;
+
+ node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));
+ CHECK_NULL_RETURN(node);
+ node->childs = (OnigCaptureTreeNode** )0;
+ node->allocated = 0;
+ node->num_childs = 0;
+ node->group = -1;
+ node->beg = ONIG_REGION_NOTPOS;
+ node->end = ONIG_REGION_NOTPOS;
+
+ return node;
+}
+
+static int
+history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
+{
+#define HISTORY_TREE_INIT_ALLOC_SIZE 8
+
+ if (parent->num_childs >= parent->allocated) {
+ int n, i;
+
+ if (IS_NULL(parent->childs)) {
+ n = HISTORY_TREE_INIT_ALLOC_SIZE;
+ parent->childs =
+ (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n);
+ }
+ else {
+ n = parent->allocated * 2;
+ parent->childs =
+ (OnigCaptureTreeNode** )xrealloc(parent->childs,
+ sizeof(OnigCaptureTreeNode*) * n);
+ }
+ CHECK_NULL_RETURN_MEMERR(parent->childs);
+ for (i = parent->allocated; i < n; i++) {
+ parent->childs[i] = (OnigCaptureTreeNode* )0;
+ }
+ parent->allocated = n;
+ }
+
+ parent->childs[parent->num_childs] = child;
+ parent->num_childs++;
+ return 0;
+}
+
+static OnigCaptureTreeNode*
+history_tree_clone(OnigCaptureTreeNode* node)
+{
+ int i;
+ OnigCaptureTreeNode *clone, *child;
+
+ clone = history_node_new();
+ CHECK_NULL_RETURN(clone);
+
+ clone->beg = node->beg;
+ clone->end = node->end;
+ for (i = 0; i < node->num_childs; i++) {
+ child = history_tree_clone(node->childs[i]);
+ if (IS_NULL(child)) {
+ history_tree_free(clone);
+ return (OnigCaptureTreeNode* )0;
+ }
+ history_tree_add_child(clone, child);
+ }
+
+ return clone;
+}
+
+extern OnigCaptureTreeNode*
+onig_get_capture_tree(OnigRegion* region)
+{
+ return region->history_root;
+}
+#endif /* USE_CAPTURE_HISTORY */
+
+extern void
+onig_region_clear(OnigRegion* region)
+{
+ int i;
+
+ for (i = 0; i < region->num_regs; i++) {
+ region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
+ }
+#ifdef USE_CAPTURE_HISTORY
+ history_root_free(region);
+#endif
+}
+
+extern int
+onig_region_resize(OnigRegion* region, int n)
+{
+ region->num_regs = n;
+
+ if (n < ONIG_NREGION)
+ n = ONIG_NREGION;
+
+ if (region->allocated == 0) {
+ region->beg = (int* )xmalloc(n * sizeof(int));
+ if (region->beg == 0)
+ return ONIGERR_MEMORY;
+
+ region->end = (int* )xmalloc(n * sizeof(int));
+ if (region->end == 0) {
+ xfree(region->beg);
+ return ONIGERR_MEMORY;
+ }
+
+ region->allocated = n;
+ }
+ else if (region->allocated < n) {
+ int *tmp;
+
+ region->allocated = 0;
+ tmp = (int* )xrealloc(region->beg, n * sizeof(int));
+ if (tmp == 0) {
+ xfree(region->beg);
+ xfree(region->end);
+ return ONIGERR_MEMORY;
+ }
+ region->beg = tmp;
+ tmp = (int* )xrealloc(region->end, n * sizeof(int));
+ if (tmp == 0) {
+ xfree(region->beg);
+ return ONIGERR_MEMORY;
+ }
+ region->end = tmp;
+
+ if (region->beg == 0 || region->end == 0)
+ return ONIGERR_MEMORY;
+
+ region->allocated = n;
+ }
+
+ return 0;
+}
+
+static int
+onig_region_resize_clear(OnigRegion* region, int n)
+{
+ int r;
+
+ r = onig_region_resize(region, n);
+ if (r != 0) return r;
+ onig_region_clear(region);
+ return 0;
+}
+
+extern int
+onig_region_set(OnigRegion* region, int at, int beg, int end)
+{
+ if (at < 0) return ONIGERR_INVALID_ARGUMENT;
+
+ if (at >= region->allocated) {
+ int r = onig_region_resize(region, at + 1);
+ if (r < 0) return r;
+ }
+
+ region->beg[at] = beg;
+ region->end[at] = end;
+ return 0;
+}
+
+extern void
+onig_region_init(OnigRegion* region)
+{
+ region->num_regs = 0;
+ region->allocated = 0;
+ region->beg = (int* )0;
+ region->end = (int* )0;
+ region->history_root = (OnigCaptureTreeNode* )0;
+}
+
+extern OnigRegion*
+onig_region_new(void)
+{
+ OnigRegion* r;
+
+ r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
+ if (r)
+ onig_region_init(r);
+ return r;
+}
+
+extern void
+onig_region_free(OnigRegion* r, int free_self)
+{
+ if (r) {
+ if (r->allocated > 0) {
+ if (r->beg) xfree(r->beg);
+ if (r->end) xfree(r->end);
+ r->allocated = 0;
+ }
+#ifdef USE_CAPTURE_HISTORY
+ history_root_free(r);
+#endif
+ if (free_self) xfree(r);
+ }
+}
+
+extern void
+onig_region_copy(OnigRegion* to, OnigRegion* from)
+{
+#define RREGC_SIZE (sizeof(int) * from->num_regs)
+ int i;
+
+ if (to == from) return;
+
+ onig_region_resize(to, from->num_regs);
+ for (i = 0; i < from->num_regs; i++) {
+ to->beg[i] = from->beg[i];
+ to->end[i] = from->end[i];
+ }
+ to->num_regs = from->num_regs;
+
+#ifdef USE_CAPTURE_HISTORY
+ history_root_free(to);
+
+ if (IS_NOT_NULL(from->history_root)) {
+ to->history_root = history_tree_clone(from->history_root);
+ }
+#endif
+}
+
+
+/** stack **/
+#define INVALID_STACK_INDEX -1
+
+/* stack type */
+/* used by normal-POP */
+#define STK_ALT 0x0001
+#define STK_LOOK_BEHIND_NOT 0x0002
+#define STK_POS_NOT 0x0003
+/* handled by normal-POP */
+#define STK_MEM_START 0x0100
+#define STK_MEM_END 0x8200
+#define STK_REPEAT_INC 0x0300
+#define STK_STATE_CHECK_MARK 0x1000
+/* avoided by normal-POP */
+#define STK_NULL_CHECK_START 0x3000
+#define STK_NULL_CHECK_END 0x5000 /* for recursive call */
+#define STK_MEM_END_MARK 0x8400
+#define STK_POS 0x0500 /* used when POP-POS */
+#define STK_STOP_BT 0x0600 /* mark for "(?>...)" */
+#define STK_REPEAT 0x0700
+#define STK_CALL_FRAME 0x0800
+#define STK_RETURN 0x0900
+#define STK_VOID 0x0a00 /* for fill a blank */
+
+/* stack type check mask */
+#define STK_MASK_POP_USED 0x00ff
+#define STK_MASK_TO_VOID_TARGET 0x10ff
+#define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
+
+#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
+ (msa).stack_p = (void* )0;\
+ (msa).options = (arg_option);\
+ (msa).region = (arg_region);\
+ (msa).start = (arg_start);\
+ (msa).best_len = ONIG_MISMATCH;\
+} while(0)
+#else
+#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
+ (msa).stack_p = (void* )0;\
+ (msa).options = (arg_option);\
+ (msa).region = (arg_region);\
+ (msa).start = (arg_start);\
+} while(0)
+#endif
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+
+#define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
+
+#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \
+ if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
+ unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
+ offset = ((offset) * (state_num)) >> 3;\
+ if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
+ if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) {\
+ (msa).state_check_buff = (void* )xmalloc(size);\
+ CHECK_NULL_RETURN_MEMERR((msa).state_check_buff);\
+ }\
+ else \
+ (msa).state_check_buff = (void* )xalloca(size);\
+ xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
+ (size_t )(size - (offset))); \
+ (msa).state_check_buff_size = size;\
+ }\
+ else {\
+ (msa).state_check_buff = (void* )0;\
+ (msa).state_check_buff_size = 0;\
+ }\
+ }\
+ else {\
+ (msa).state_check_buff = (void* )0;\
+ (msa).state_check_buff_size = 0;\
+ }\
+ } while(0)
+
+#define MATCH_ARG_FREE(msa) do {\
+ if ((msa).stack_p) xfree((msa).stack_p);\
+ if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
+ if ((msa).state_check_buff) xfree((msa).state_check_buff);\
+ }\
+} while(0)
+#else
+#define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
+#endif
+
+
+
+#define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\
+ if (msa->stack_p) {\
+ alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num));\
+ stk_alloc = (OnigStackType* )(msa->stack_p);\
+ stk_base = stk_alloc;\
+ stk = stk_base;\
+ stk_end = stk_base + msa->stack_n;\
+ }\
+ else {\
+ alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num)\
+ + sizeof(OnigStackType) * (stack_num));\
+ stk_alloc = (OnigStackType* )(alloc_addr + sizeof(char*) * (ptr_num));\
+ stk_base = stk_alloc;\
+ stk = stk_base;\
+ stk_end = stk_base + (stack_num);\
+ }\
+} while(0)
+
+#define STACK_SAVE do{\
+ if (stk_base != stk_alloc) {\
+ msa->stack_p = stk_base;\
+ msa->stack_n = stk_end - stk_base; /* TODO: check overflow */\
+ };\
+} while(0)
+
+static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
+
+extern unsigned int
+onig_get_match_stack_limit_size(void)
+{
+ return MatchStackLimitSize;
+}
+
+extern int
+onig_set_match_stack_limit_size(unsigned int size)
+{
+ MatchStackLimitSize = size;
+ return 0;
+}
+
+static int
+stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
+ OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa)
+{
+ size_t n;
+ OnigStackType *x, *stk_base, *stk_end, *stk;
+
+ stk_base = *arg_stk_base;
+ stk_end = *arg_stk_end;
+ stk = *arg_stk;
+
+ n = stk_end - stk_base;
+ if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) {
+ x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2);
+ if (IS_NULL(x)) {
+ STACK_SAVE;
+ return ONIGERR_MEMORY;
+ }
+ xmemcpy(x, stk_base, n * sizeof(OnigStackType));
+ n *= 2;
+ }
+ else {
+ unsigned int limit_size = MatchStackLimitSize;
+ n *= 2;
+ if (limit_size != 0 && n > limit_size) {
+ if ((unsigned int )(stk_end - stk_base) == limit_size)
+ return ONIGERR_MATCH_STACK_LIMIT_OVER;
+ else
+ n = limit_size;
+ }
+ x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n);
+ if (IS_NULL(x)) {
+ STACK_SAVE;
+ return ONIGERR_MEMORY;
+ }
+ }
+ *arg_stk = x + (stk - stk_base);
+ *arg_stk_base = x;
+ *arg_stk_end = x + n;
+ return 0;
+}
+
+#define STACK_ENSURE(n) do {\
+ if (stk_end - stk < (n)) {\
+ int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
+ if (r != 0) { STACK_SAVE; return r; } \
+ }\
+} while(0)
+
+#define STACK_AT(index) (stk_base + (index))
+#define GET_STACK_INDEX(stk) ((stk) - stk_base)
+
+#define STACK_PUSH_TYPE(stack_type) do {\
+ STACK_ENSURE(1);\
+ stk->type = (stack_type);\
+ STACK_INC;\
+} while(0)
+
+#define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+#define STATE_CHECK_POS(s,snum) \
+ (((s) - str) * num_comb_exp_check + ((snum) - 1))
+#define STATE_CHECK_VAL(v,snum) do {\
+ if (state_check_buff != NULL) {\
+ int x = STATE_CHECK_POS(s,snum);\
+ (v) = state_check_buff[x/8] & (1<<(x%8));\
+ }\
+ else (v) = 0;\
+} while(0)
+
+
+#define ELSE_IF_STATE_CHECK_MARK(stk) \
+ else if ((stk)->type == STK_STATE_CHECK_MARK) { \
+ int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
+ state_check_buff[x/8] |= (1<<(x%8)); \
+ }
+
+#define STACK_PUSH(stack_type,pat,s,sprev) do {\
+ STACK_ENSURE(1);\
+ stk->type = (stack_type);\
+ stk->u.state.pcode = (pat);\
+ stk->u.state.pstr = (s);\
+ stk->u.state.pstr_prev = (sprev);\
+ stk->u.state.state_check = 0;\
+ STACK_INC;\
+} while(0)
+
+#define STACK_PUSH_ENSURED(stack_type,pat) do {\
+ stk->type = (stack_type);\
+ stk->u.state.pcode = (pat);\
+ stk->u.state.state_check = 0;\
+ STACK_INC;\
+} while(0)
+
+#define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum) do {\
+ STACK_ENSURE(1);\
+ stk->type = STK_ALT;\
+ stk->u.state.pcode = (pat);\
+ stk->u.state.pstr = (s);\
+ stk->u.state.pstr_prev = (sprev);\
+ stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
+ STACK_INC;\
+} while(0)
+
+#define STACK_PUSH_STATE_CHECK(s,snum) do {\
+ if (state_check_buff != NULL) {\
+ STACK_ENSURE(1);\
+ stk->type = STK_STATE_CHECK_MARK;\
+ stk->u.state.pstr = (s);\
+ stk->u.state.state_check = (snum);\
+ STACK_INC;\
+ }\
+} while(0)
+
+#else /* USE_COMBINATION_EXPLOSION_CHECK */
+
+#define ELSE_IF_STATE_CHECK_MARK(stk)
+
+#define STACK_PUSH(stack_type,pat,s,sprev) do {\
+ STACK_ENSURE(1);\
+ stk->type = (stack_type);\
+ stk->u.state.pcode = (pat);\
+ stk->u.state.pstr = (s);\
+ stk->u.state.pstr_prev = (sprev);\
+ STACK_INC;\
+} while(0)
+
+#define STACK_PUSH_ENSURED(stack_type,pat) do {\
+ stk->type = (stack_type);\
+ stk->u.state.pcode = (pat);\
+ STACK_INC;\
+} while(0)
+#endif /* USE_COMBINATION_EXPLOSION_CHECK */
+
+#define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev)
+#define STACK_PUSH_POS(s,sprev) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev)
+#define STACK_PUSH_POS_NOT(pat,s,sprev) STACK_PUSH(STK_POS_NOT,pat,s,sprev)
+#define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT)
+#define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev) \
+ STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev)
+
+#define STACK_PUSH_REPEAT(id, pat) do {\
+ STACK_ENSURE(1);\
+ stk->type = STK_REPEAT;\
+ stk->u.repeat.num = (id);\
+ stk->u.repeat.pcode = (pat);\
+ stk->u.repeat.count = 0;\
+ STACK_INC;\
+} while(0)
+
+#define STACK_PUSH_REPEAT_INC(sindex) do {\
+ STACK_ENSURE(1);\
+ stk->type = STK_REPEAT_INC;\
+ stk->u.repeat_inc.si = (sindex);\
+ STACK_INC;\
+} while(0)
+
+#define STACK_PUSH_MEM_START(mnum, s) do {\
+ STACK_ENSURE(1);\
+ stk->type = STK_MEM_START;\
+ stk->u.mem.num = (mnum);\
+ stk->u.mem.pstr = (s);\
+ stk->u.mem.start = mem_start_stk[mnum];\
+ stk->u.mem.end = mem_end_stk[mnum];\
+ mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
+ mem_end_stk[mnum] = INVALID_STACK_INDEX;\
+ STACK_INC;\
+} while(0)
+
+#define STACK_PUSH_MEM_END(mnum, s) do {\
+ STACK_ENSURE(1);\
+ stk->type = STK_MEM_END;\
+ stk->u.mem.num = (mnum);\
+ stk->u.mem.pstr = (s);\
+ stk->u.mem.start = mem_start_stk[mnum];\
+ stk->u.mem.end = mem_end_stk[mnum];\
+ mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
+ STACK_INC;\
+} while(0)
+
+#define STACK_PUSH_MEM_END_MARK(mnum) do {\
+ STACK_ENSURE(1);\
+ stk->type = STK_MEM_END_MARK;\
+ stk->u.mem.num = (mnum);\
+ STACK_INC;\
+} while(0)
+
+#define STACK_GET_MEM_START(mnum, k) do {\
+ int level = 0;\
+ k = stk;\
+ while (k > stk_base) {\
+ k--;\
+ if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
+ && k->u.mem.num == (mnum)) {\
+ level++;\
+ }\
+ else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
+ if (level == 0) break;\
+ level--;\
+ }\
+ }\
+} while(0)
+
+#define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
+ int level = 0;\
+ while (k < stk) {\
+ if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
+ if (level == 0) (start) = k->u.mem.pstr;\
+ level++;\
+ }\
+ else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
+ level--;\
+ if (level == 0) {\
+ (end) = k->u.mem.pstr;\
+ break;\
+ }\
+ }\
+ k++;\
+ }\
+} while(0)
+
+#define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
+ STACK_ENSURE(1);\
+ stk->type = STK_NULL_CHECK_START;\
+ stk->u.null_check.num = (cnum);\
+ stk->u.null_check.pstr = (s);\
+ STACK_INC;\
+} while(0)
+
+#define STACK_PUSH_NULL_CHECK_END(cnum) do {\
+ STACK_ENSURE(1);\
+ stk->type = STK_NULL_CHECK_END;\
+ stk->u.null_check.num = (cnum);\
+ STACK_INC;\
+} while(0)
+
+#define STACK_PUSH_CALL_FRAME(pat) do {\
+ STACK_ENSURE(1);\
+ stk->type = STK_CALL_FRAME;\
+ stk->u.call_frame.ret_addr = (pat);\
+ STACK_INC;\
+} while(0)
+
+#define STACK_PUSH_RETURN do {\
+ STACK_ENSURE(1);\
+ stk->type = STK_RETURN;\
+ STACK_INC;\
+} while(0)
+
+
+#ifdef ONIG_DEBUG
+#define STACK_BASE_CHECK(p, at) \
+ if ((p) < stk_base) {\
+ fprintf(stderr, "at %s\n", at);\
+ goto stack_error;\
+ }
+#else
+#define STACK_BASE_CHECK(p, at)
+#endif
+
+#define STACK_POP_ONE do {\
+ stk--;\
+ STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
+} while(0)
+
+#define STACK_POP do {\
+ switch (pop_level) {\
+ case STACK_POP_LEVEL_FREE:\
+ while (1) {\
+ stk--;\
+ STACK_BASE_CHECK(stk, "STACK_POP"); \
+ if ((stk->type & STK_MASK_POP_USED) != 0) break;\
+ ELSE_IF_STATE_CHECK_MARK(stk);\
+ }\
+ break;\
+ case STACK_POP_LEVEL_MEM_START:\
+ while (1) {\
+ stk--;\
+ STACK_BASE_CHECK(stk, "STACK_POP 2"); \
+ if ((stk->type & STK_MASK_POP_USED) != 0) break;\
+ else if (stk->type == STK_MEM_START) {\
+ mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
+ mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
+ }\
+ ELSE_IF_STATE_CHECK_MARK(stk);\
+ }\
+ break;\
+ default:\
+ while (1) {\
+ stk--;\
+ STACK_BASE_CHECK(stk, "STACK_POP 3"); \
+ if ((stk->type & STK_MASK_POP_USED) != 0) break;\
+ else if (stk->type == STK_MEM_START) {\
+ mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
+ mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
+ }\
+ else if (stk->type == STK_REPEAT_INC) {\
+ STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
+ }\
+ else if (stk->type == STK_MEM_END) {\
+ mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
+ mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
+ }\
+ ELSE_IF_STATE_CHECK_MARK(stk);\
+ }\
+ break;\
+ }\
+} while(0)
+
+#define STACK_POP_TIL_POS_NOT do {\
+ while (1) {\
+ stk--;\
+ STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
+ if (stk->type == STK_POS_NOT) break;\
+ else if (stk->type == STK_MEM_START) {\
+ mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
+ mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
+ }\
+ else if (stk->type == STK_REPEAT_INC) {\
+ STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
+ }\
+ else if (stk->type == STK_MEM_END) {\
+ mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
+ mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
+ }\
+ ELSE_IF_STATE_CHECK_MARK(stk);\
+ }\
+} while(0)
+
+#define STACK_POP_TIL_LOOK_BEHIND_NOT do {\
+ while (1) {\
+ stk--;\
+ STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
+ if (stk->type == STK_LOOK_BEHIND_NOT) break;\
+ else if (stk->type == STK_MEM_START) {\
+ mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
+ mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
+ }\
+ else if (stk->type == STK_REPEAT_INC) {\
+ STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
+ }\
+ else if (stk->type == STK_MEM_END) {\
+ mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
+ mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
+ }\
+ ELSE_IF_STATE_CHECK_MARK(stk);\
+ }\
+} while(0)
+
+#define STACK_POS_END(k) do {\
+ k = stk;\
+ while (1) {\
+ k--;\
+ STACK_BASE_CHECK(k, "STACK_POS_END"); \
+ if (IS_TO_VOID_TARGET(k)) {\
+ k->type = STK_VOID;\
+ }\
+ else if (k->type == STK_POS) {\
+ k->type = STK_VOID;\
+ break;\
+ }\
+ }\
+} while(0)
+
+#define STACK_STOP_BT_END do {\
+ OnigStackType *k = stk;\
+ while (1) {\
+ k--;\
+ STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
+ if (IS_TO_VOID_TARGET(k)) {\
+ k->type = STK_VOID;\
+ }\
+ else if (k->type == STK_STOP_BT) {\
+ k->type = STK_VOID;\
+ break;\
+ }\
+ }\
+} while(0)
+
+#define STACK_NULL_CHECK(isnull,id,s) do {\
+ OnigStackType* k = stk;\
+ while (1) {\
+ k--;\
+ STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
+ if (k->type == STK_NULL_CHECK_START) {\
+ if (k->u.null_check.num == (id)) {\
+ (isnull) = (k->u.null_check.pstr == (s));\
+ break;\
+ }\
+ }\
+ }\
+} while(0)
+
+#define STACK_NULL_CHECK_REC(isnull,id,s) do {\
+ int level = 0;\
+ OnigStackType* k = stk;\
+ while (1) {\
+ k--;\
+ STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
+ if (k->type == STK_NULL_CHECK_START) {\
+ if (k->u.null_check.num == (id)) {\
+ if (level == 0) {\
+ (isnull) = (k->u.null_check.pstr == (s));\
+ break;\
+ }\
+ else level--;\
+ }\
+ }\
+ else if (k->type == STK_NULL_CHECK_END) {\
+ level++;\
+ }\
+ }\
+} while(0)
+
+#define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
+ OnigStackType* k = stk;\
+ while (1) {\
+ k--;\
+ STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
+ if (k->type == STK_NULL_CHECK_START) {\
+ if (k->u.null_check.num == (id)) {\
+ if (k->u.null_check.pstr != (s)) {\
+ (isnull) = 0;\
+ break;\
+ }\
+ else {\
+ UChar* endp;\
+ (isnull) = 1;\
+ while (k < stk) {\
+ if (k->type == STK_MEM_START) {\
+ if (k->u.mem.end == INVALID_STACK_INDEX) {\
+ (isnull) = 0; break;\
+ }\
+ if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
+ endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
+ else\
+ endp = (UChar* )k->u.mem.end;\
+ if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
+ (isnull) = 0; break;\
+ }\
+ else if (endp != s) {\
+ (isnull) = -1; /* empty, but position changed */ \
+ }\
+ }\
+ k++;\
+ }\
+ break;\
+ }\
+ }\
+ }\
+ }\
+} while(0)
+
+#define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
+ int level = 0;\
+ OnigStackType* k = stk;\
+ while (1) {\
+ k--;\
+ STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
+ if (k->type == STK_NULL_CHECK_START) {\
+ if (k->u.null_check.num == (id)) {\
+ if (level == 0) {\
+ if (k->u.null_check.pstr != (s)) {\
+ (isnull) = 0;\
+ break;\
+ }\
+ else {\
+ UChar* endp;\
+ (isnull) = 1;\
+ while (k < stk) {\
+ if (k->type == STK_MEM_START) {\
+ if (k->u.mem.end == INVALID_STACK_INDEX) {\
+ (isnull) = 0; break;\
+ }\
+ if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
+ endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
+ else\
+ endp = (UChar* )k->u.mem.end;\
+ if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
+ (isnull) = 0; break;\
+ }\
+ else if (endp != s) {\
+ (isnull) = -1; /* empty, but position changed */ \
+ }\
+ }\
+ k++;\
+ }\
+ break;\
+ }\
+ }\
+ else {\
+ level--;\
+ }\
+ }\
+ }\
+ else if (k->type == STK_NULL_CHECK_END) {\
+ if (k->u.null_check.num == (id)) level++;\
+ }\
+ }\
+} while(0)
+
+#define STACK_GET_REPEAT(id, k) do {\
+ int level = 0;\
+ k = stk;\
+ while (1) {\
+ k--;\
+ STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
+ if (k->type == STK_REPEAT) {\
+ if (level == 0) {\
+ if (k->u.repeat.num == (id)) {\
+ break;\
+ }\
+ }\
+ }\
+ else if (k->type == STK_CALL_FRAME) level--;\
+ else if (k->type == STK_RETURN) level++;\
+ }\
+} while(0)
+
+#define STACK_RETURN(addr) do {\
+ int level = 0;\
+ OnigStackType* k = stk;\
+ while (1) {\
+ k--;\
+ STACK_BASE_CHECK(k, "STACK_RETURN"); \
+ if (k->type == STK_CALL_FRAME) {\
+ if (level == 0) {\
+ (addr) = k->u.call_frame.ret_addr;\
+ break;\
+ }\
+ else level--;\
+ }\
+ else if (k->type == STK_RETURN)\
+ level++;\
+ }\
+} while(0)
+
+
+#define STRING_CMP(s1,s2,len) do {\
+ while (len-- > 0) {\
+ if (*s1++ != *s2++) goto fail;\
+ }\
+} while(0)
+
+#define STRING_CMP_IC(case_fold_flag,s1,ps2,len,text_end) do {\
+ if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
+ goto fail; \
+} while(0)
+
+static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
+ UChar* s1, UChar** ps2, int mblen, const UChar* text_end)
+{
+ UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];
+ UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];
+ UChar *p1, *p2, *end1, *s2;
+ int len1, len2;
+
+ s2 = *ps2;
+ end1 = s1 + mblen;
+ while (s1 < end1) {
+ len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, text_end, buf1);
+ len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, text_end, buf2);
+ if (len1 != len2) return 0;
+ p1 = buf1;
+ p2 = buf2;
+ while (len1-- > 0) {
+ if (*p1 != *p2) return 0;
+ p1++;
+ p2++;
+ }
+ }
+
+ *ps2 = s2;
+ return 1;
+}
+
+#define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
+ is_fail = 0;\
+ while (len-- > 0) {\
+ if (*s1++ != *s2++) {\
+ is_fail = 1; break;\
+ }\
+ }\
+} while(0)
+
+#define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,text_end,is_fail) do {\
+ if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
+ is_fail = 1; \
+ else \
+ is_fail = 0; \
+} while(0)
+
+
+#define IS_EMPTY_STR (str == end)
+#define ON_STR_BEGIN(s) ((s) == str)
+#define ON_STR_END(s) ((s) == end)
+#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
+#define DATA_ENSURE_CHECK1 (s < right_range)
+#define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
+#define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
+#else
+#define DATA_ENSURE_CHECK1 (s < end)
+#define DATA_ENSURE_CHECK(n) (s + (n) <= end)
+#define DATA_ENSURE(n) if (s + (n) > end) goto fail
+#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
+
+
+#ifdef USE_CAPTURE_HISTORY
+static int
+make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp,
+ OnigStackType* stk_top, UChar* str, regex_t* reg)
+{
+ int n, r;
+ OnigCaptureTreeNode* child;
+ OnigStackType* k = *kp;
+
+ while (k < stk_top) {
+ if (k->type == STK_MEM_START) {
+ n = k->u.mem.num;
+ if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
+ BIT_STATUS_AT(reg->capture_history, n) != 0) {
+ child = history_node_new();
+ CHECK_NULL_RETURN_MEMERR(child);
+ child->group = n;
+ child->beg = (int )(k->u.mem.pstr - str);
+ r = history_tree_add_child(node, child);
+ if (r != 0) return r;
+ *kp = (k + 1);
+ r = make_capture_history_tree(child, kp, stk_top, str, reg);
+ if (r != 0) return r;
+
+ k = *kp;
+ child->end = (int )(k->u.mem.pstr - str);
+ }
+ }
+ else if (k->type == STK_MEM_END) {
+ if (k->u.mem.num == node->group) {
+ node->end = (int )(k->u.mem.pstr - str);
+ *kp = k;
+ return 0;
+ }
+ }
+ k++;
+ }
+
+ return 1; /* 1: root node ending. */
+}
+#endif
+
+#ifdef USE_BACKREF_WITH_LEVEL
+static int mem_is_in_memp(int mem, int num, UChar* memp)
+{
+ int i;
+ MemNumType m;
+
+ for (i = 0; i < num; i++) {
+ GET_MEMNUM_INC(m, memp);
+ if (mem == (int )m) return 1;
+ }
+ return 0;
+}
+
+static int backref_match_at_nested_level(regex_t* reg
+ , OnigStackType* top, OnigStackType* stk_base
+ , int ignore_case, int case_fold_flag
+ , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)
+{
+ UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
+ int level;
+ OnigStackType* k;
+
+ level = 0;
+ k = top;
+ k--;
+ while (k >= stk_base) {
+ if (k->type == STK_CALL_FRAME) {
+ level--;
+ }
+ else if (k->type == STK_RETURN) {
+ level++;
+ }
+ else if (level == nest) {
+ if (k->type == STK_MEM_START) {
+ if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
+ pstart = k->u.mem.pstr;
+ if (pend != NULL_UCHARP) {
+ if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
+ p = pstart;
+ ss = *s;
+
+ if (ignore_case != 0) {
+ if (string_cmp_ic(reg->enc, case_fold_flag,
+ pstart, &ss, (int )(pend - pstart), send) == 0)
+ return 0; /* or goto next_mem; */
+ }
+ else {
+ while (p < pend) {
+ if (*p++ != *ss++) return 0; /* or goto next_mem; */
+ }
+ }
+
+ *s = ss;
+ return 1;
+ }
+ }
+ }
+ else if (k->type == STK_MEM_END) {
+ if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
+ pend = k->u.mem.pstr;
+ }
+ }
+ }
+ k--;
+ }
+
+ return 0;
+}
+#endif /* USE_BACKREF_WITH_LEVEL */
+
+
+#ifdef ONIG_DEBUG_STATISTICS
+
+#define USE_TIMEOFDAY
+
+#ifdef USE_TIMEOFDAY
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+static struct timeval ts, te;
+#define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
+#define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
+ (((te).tv_sec - (ts).tv_sec)*1000000))
+#else
+#ifdef HAVE_SYS_TIMES_H
+#include <sys/times.h>
+#endif
+static struct tms ts, te;
+#define GETTIME(t) times(&(t))
+#define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
+#endif
+
+static int OpCounter[256];
+static int OpPrevCounter[256];
+static unsigned long OpTime[256];
+static int OpCurr = OP_FINISH;
+static int OpPrevTarget = OP_FAIL;
+static int MaxStackDepth = 0;
+
+#define MOP_IN(opcode) do {\
+ if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
+ OpCurr = opcode;\
+ OpCounter[opcode]++;\
+ GETTIME(ts);\
+} while(0)
+
+#define MOP_OUT do {\
+ GETTIME(te);\
+ OpTime[OpCurr] += TIMEDIFF(te, ts);\
+} while(0)
+
+extern void
+onig_statistics_init(void)
+{
+ int i;
+ for (i = 0; i < 256; i++) {
+ OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
+ }
+ MaxStackDepth = 0;
+}
+
+extern void
+onig_print_statistics(FILE* f)
+{
+ int i;
+ fprintf(f, " count prev time\n");
+ for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
+ fprintf(f, "%8d: %8d: %10ld: %s\n",
+ OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
+ }
+ fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
+}
+
+#define STACK_INC do {\
+ stk++;\
+ if (stk - stk_base > MaxStackDepth) \
+ MaxStackDepth = stk - stk_base;\
+} while(0)
+
+#else
+#define STACK_INC stk++
+
+#define MOP_IN(opcode)
+#define MOP_OUT
+#endif
+
+
+/* matching region of POSIX API */
+typedef int regoff_t;
+
+typedef struct {
+ regoff_t rm_so;
+ regoff_t rm_eo;
+} posix_regmatch_t;
+
+/* match data(str - end) from position (sstart). */
+/* if sstart == str then set sprev to NULL. */
+static long
+match_at(regex_t* reg, const UChar* str, const UChar* end,
+#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
+ const UChar* right_range,
+#endif
+ const UChar* sstart, UChar* sprev, OnigMatchArg* msa)
+{
+ static const UChar FinishCode[] = { OP_FINISH };
+
+ int i, n, num_mem, best_len, pop_level;
+ LengthType tlen, tlen2;
+ MemNumType mem;
+ RelAddrType addr;
+ OnigOptionType option = reg->options;
+ OnigEncoding encode = reg->enc;
+ OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
+ UChar *s, *q, *sbegin;
+ UChar *p = reg->p;
+ char *alloca_base;
+ OnigStackType *stk_alloc, *stk_base, *stk, *stk_end;
+ OnigStackType *stkp; /* used as any purpose. */
+ OnigStackIndex si;
+ OnigStackIndex *repeat_stk;
+ OnigStackIndex *mem_start_stk, *mem_end_stk;
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ int scv;
+ unsigned char* state_check_buff = msa->state_check_buff;
+ int num_comb_exp_check = reg->num_comb_exp_check;
+#endif
+ n = reg->num_repeat + reg->num_mem * 2;
+
+ STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE);
+ pop_level = reg->stack_pop_level;
+ num_mem = reg->num_mem;
+ repeat_stk = (OnigStackIndex* )alloca_base;
+
+ mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);
+ mem_end_stk = mem_start_stk + num_mem;
+ mem_start_stk--; /* for index start from 1,
+ mem_start_stk[1]..mem_start_stk[num_mem] */
+ mem_end_stk--; /* for index start from 1,
+ mem_end_stk[1]..mem_end_stk[num_mem] */
+ for (i = 1; i <= num_mem; i++) {
+ mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX;
+ }
+
+#ifdef ONIG_DEBUG_MATCH
+ fprintf(stderr, "match_at: str: %d, end: %d, start: %d, sprev: %d\n",
+ (int )str, (int )end, (int )sstart, (int )sprev);
+ fprintf(stderr, "size: %d, start offset: %d\n",
+ (int )(end - str), (int )(sstart - str));
+#endif
+
+ STACK_PUSH_ENSURED(STK_ALT, (UChar *)FinishCode); /* bottom stack */
+ best_len = ONIG_MISMATCH;
+ s = (UChar* )sstart;
+ while (1) {
+#ifdef ONIG_DEBUG_MATCH
+ if (s) {
+ UChar *q, *bp, buf[50];
+ int len;
+ fprintf(stderr, "%4d> \"", (int )(s - str));
+ bp = buf;
+ for (i = 0, q = s; i < 7 && q < end; i++) {
+ len = enclen(encode, q, end);
+ while (len-- > 0) *bp++ = *q++;
+ }
+ if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }
+ else { xmemcpy(bp, "\"", 1); bp += 1; }
+ *bp = 0;
+ fputs((char* )buf, stderr);
+ for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);
+ onig_print_compiled_byte_code(stderr, p, NULL, encode);
+ fprintf(stderr, "\n");
+ }
+#endif
+
+ sbegin = s;
+ switch (*p++) {
+ case OP_END: MOP_IN(OP_END);
+ n = s - sstart;
+ if (n > best_len) {
+ OnigRegion* region;
+#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+ if (IS_FIND_LONGEST(option)) {
+ if (n > msa->best_len) {
+ msa->best_len = n;
+ msa->best_s = (UChar* )sstart;
+ }
+ else
+ goto end_best_len;
+ }
+#endif
+ best_len = n;
+ region = msa->region;
+ if (region) {
+ region->beg[0] = sstart - str;
+ region->end[0] = s - str;
+ for (i = 1; i <= num_mem; i++) {
+ if (mem_end_stk[i] != INVALID_STACK_INDEX) {
+ if (BIT_STATUS_AT(reg->bt_mem_start, i))
+ region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
+ else
+ region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str;
+
+ region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
+ ? STACK_AT(mem_end_stk[i])->u.mem.pstr
+ : (UChar* )((void* )mem_end_stk[i])) - str;
+ }
+ else {
+ region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
+ }
+ }
+
+#ifdef USE_CAPTURE_HISTORY
+ if (reg->capture_history != 0) {
+ int r;
+ OnigCaptureTreeNode* node;
+
+ if (IS_NULL(region->history_root)) {
+ region->history_root = node = history_node_new();
+ CHECK_NULL_RETURN_MEMERR(node);
+ }
+ else {
+ node = region->history_root;
+ history_tree_clear(node);
+ }
+
+ node->group = 0;
+ node->beg = sstart - str;
+ node->end = s - str;
+
+ stkp = stk_base;
+ r = make_capture_history_tree(region->history_root, &stkp,
+ stk, (UChar* )str, reg);
+ if (r < 0) {
+ best_len = r; /* error code */
+ goto finish;
+ }
+ }
+#endif /* USE_CAPTURE_HISTORY */
+ } /* if (region) */
+ } /* n > best_len */
+
+#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+ end_best_len:
+#endif
+ MOP_OUT;
+
+ if (IS_FIND_CONDITION(option)) {
+ if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
+ best_len = ONIG_MISMATCH;
+ goto fail; /* for retry */
+ }
+ if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
+ goto fail; /* for retry */
+ }
+ }
+
+ /* default behavior: return first-matching result. */
+ goto finish;
+ break;
+
+ case OP_EXACT1: MOP_IN(OP_EXACT1);
+ if (*p != *s++) goto fail;
+ DATA_ENSURE(0);
+ p++;
+ MOP_OUT;
+ break;
+
+ case OP_EXACT1_IC: MOP_IN(OP_EXACT1_IC);
+ {
+ int len;
+ UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
+
+ DATA_ENSURE(1);
+ len = ONIGENC_MBC_CASE_FOLD(encode,
+ /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
+ case_fold_flag,
+ &s, end, lowbuf);
+ DATA_ENSURE(0);
+ q = lowbuf;
+ while (len-- > 0) {
+ if (*p != *q) {
+ goto fail;
+ }
+ p++; q++;
+ }
+ }
+ MOP_OUT;
+ break;
+
+ case OP_EXACT2: MOP_IN(OP_EXACT2);
+ DATA_ENSURE(2);
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ sprev = s;
+ p++; s++;
+ MOP_OUT;
+ continue;
+ break;
+
+ case OP_EXACT3: MOP_IN(OP_EXACT3);
+ DATA_ENSURE(3);
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ sprev = s;
+ p++; s++;
+ MOP_OUT;
+ continue;
+ break;
+
+ case OP_EXACT4: MOP_IN(OP_EXACT4);
+ DATA_ENSURE(4);
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ sprev = s;
+ p++; s++;
+ MOP_OUT;
+ continue;
+ break;
+
+ case OP_EXACT5: MOP_IN(OP_EXACT5);
+ DATA_ENSURE(5);
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ sprev = s;
+ p++; s++;
+ MOP_OUT;
+ continue;
+ break;
+
+ case OP_EXACTN: MOP_IN(OP_EXACTN);
+ GET_LENGTH_INC(tlen, p);
+ DATA_ENSURE(tlen);
+ while (tlen-- > 0) {
+ if (*p++ != *s++) goto fail;
+ }
+ sprev = s - 1;
+ MOP_OUT;
+ continue;
+ break;
+
+ case OP_EXACTN_IC: MOP_IN(OP_EXACTN_IC);
+ {
+ int len;
+ UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
+
+ GET_LENGTH_INC(tlen, p);
+ endp = p + tlen;
+
+ while (p < endp) {
+ sprev = s;
+ DATA_ENSURE(1);
+ len = ONIGENC_MBC_CASE_FOLD(encode,
+ /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
+ case_fold_flag,
+ &s, end, lowbuf);
+ DATA_ENSURE(0);
+ q = lowbuf;
+ while (len-- > 0) {
+ if (*p != *q) goto fail;
+ p++; q++;
+ }
+ }
+ }
+
+ MOP_OUT;
+ continue;
+ break;
+
+ case OP_EXACTMB2N1: MOP_IN(OP_EXACTMB2N1);
+ DATA_ENSURE(2);
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ MOP_OUT;
+ break;
+
+ case OP_EXACTMB2N2: MOP_IN(OP_EXACTMB2N2);
+ DATA_ENSURE(4);
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ sprev = s;
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ MOP_OUT;
+ continue;
+ break;
+
+ case OP_EXACTMB2N3: MOP_IN(OP_EXACTMB2N3);
+ DATA_ENSURE(6);
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ sprev = s;
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ MOP_OUT;
+ continue;
+ break;
+
+ case OP_EXACTMB2N: MOP_IN(OP_EXACTMB2N);
+ GET_LENGTH_INC(tlen, p);
+ DATA_ENSURE(tlen * 2);
+ while (tlen-- > 0) {
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ }
+ sprev = s - 2;
+ MOP_OUT;
+ continue;
+ break;
+
+ case OP_EXACTMB3N: MOP_IN(OP_EXACTMB3N);
+ GET_LENGTH_INC(tlen, p);
+ DATA_ENSURE(tlen * 3);
+ while (tlen-- > 0) {
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ if (*p != *s) goto fail;
+ p++; s++;
+ }
+ sprev = s - 3;
+ MOP_OUT;
+ continue;
+ break;
+
+ case OP_EXACTMBN: MOP_IN(OP_EXACTMBN);
+ GET_LENGTH_INC(tlen, p); /* mb-len */
+ GET_LENGTH_INC(tlen2, p); /* string len */
+ tlen2 *= tlen;
+ DATA_ENSURE(tlen2);
+ while (tlen2-- > 0) {
+ if (*p != *s) goto fail;
+ p++; s++;
+ }
+ sprev = s - tlen;
+ MOP_OUT;
+ continue;
+ break;
+
+ case OP_CCLASS: MOP_IN(OP_CCLASS);
+ DATA_ENSURE(1);
+ if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
+ p += SIZE_BITSET;
+ s += enclen(encode, s, end); /* OP_CCLASS can match mb-code. \D, \S */
+ MOP_OUT;
+ break;
+
+ case OP_CCLASS_MB: MOP_IN(OP_CCLASS_MB);
+ if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) goto fail;
+
+ cclass_mb:
+ GET_LENGTH_INC(tlen, p);
+ {
+ OnigCodePoint code;
+ UChar *ss;
+ int mb_len;
+
+ DATA_ENSURE(1);
+ mb_len = enclen(encode, s, end);
+ DATA_ENSURE(mb_len);
+ ss = s;
+ s += mb_len;
+ code = ONIGENC_MBC_TO_CODE(encode, ss, s);
+
+#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
+ if (! onig_is_in_code_range(p, code)) goto fail;
+#else
+ q = p;
+ ALIGNMENT_RIGHT(q);
+ if (! onig_is_in_code_range(q, code)) goto fail;
+#endif
+ }
+ p += tlen;
+ MOP_OUT;
+ break;
+
+ case OP_CCLASS_MIX: MOP_IN(OP_CCLASS_MIX);
+ DATA_ENSURE(1);
+ if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
+ p += SIZE_BITSET;
+ goto cclass_mb;
+ }
+ else {
+ if (BITSET_AT(((BitSetRef )p), *s) == 0)
+ goto fail;
+
+ p += SIZE_BITSET;
+ GET_LENGTH_INC(tlen, p);
+ p += tlen;
+ s++;
+ }
+ MOP_OUT;
+ break;
+
+ case OP_CCLASS_NOT: MOP_IN(OP_CCLASS_NOT);
+ DATA_ENSURE(1);
+ if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
+ p += SIZE_BITSET;
+ s += enclen(encode, s, end);
+ MOP_OUT;
+ break;
+
+ case OP_CCLASS_MB_NOT: MOP_IN(OP_CCLASS_MB_NOT);
+ DATA_ENSURE(1);
+ if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) {
+ s++;
+ GET_LENGTH_INC(tlen, p);
+ p += tlen;
+ goto cc_mb_not_success;
+ }
+
+ cclass_mb_not:
+ GET_LENGTH_INC(tlen, p);
+ {
+ OnigCodePoint code;
+ UChar *ss;
+ int mb_len = enclen(encode, s, end);
+
+ if (! DATA_ENSURE_CHECK(mb_len)) {
+ DATA_ENSURE(1);
+ s = (UChar* )end;
+ p += tlen;
+ goto cc_mb_not_success;
+ }
+
+ ss = s;
+ s += mb_len;
+ code = ONIGENC_MBC_TO_CODE(encode, ss, s);
+
+#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
+ if (onig_is_in_code_range(p, code)) goto fail;
+#else
+ q = p;
+ ALIGNMENT_RIGHT(q);
+ if (onig_is_in_code_range(q, code)) goto fail;
+#endif
+ }
+ p += tlen;
+
+ cc_mb_not_success:
+ MOP_OUT;
+ break;
+
+ case OP_CCLASS_MIX_NOT: MOP_IN(OP_CCLASS_MIX_NOT);
+ DATA_ENSURE(1);
+ if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
+ p += SIZE_BITSET;
+ goto cclass_mb_not;
+ }
+ else {
+ if (BITSET_AT(((BitSetRef )p), *s) != 0)
+ goto fail;
+
+ p += SIZE_BITSET;
+ GET_LENGTH_INC(tlen, p);
+ p += tlen;
+ s++;
+ }
+ MOP_OUT;
+ break;
+
+ case OP_CCLASS_NODE: MOP_IN(OP_CCLASS_NODE);
+ {
+ OnigCodePoint code;
+ void *node;
+ int mb_len;
+ UChar *ss;
+
+ DATA_ENSURE(1);
+ GET_POINTER_INC(node, p);
+ mb_len = enclen(encode, s, end);
+ ss = s;
+ s += mb_len;
+ DATA_ENSURE(0);
+ code = ONIGENC_MBC_TO_CODE(encode, ss, s);
+ if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail;
+ }
+ MOP_OUT;
+ break;
+
+ case OP_ANYCHAR: MOP_IN(OP_ANYCHAR);
+ DATA_ENSURE(1);
+ n = enclen(encode, s, end);
+ DATA_ENSURE(n);
+ if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
+ s += n;
+ MOP_OUT;
+ break;
+
+ case OP_ANYCHAR_ML: MOP_IN(OP_ANYCHAR_ML);
+ DATA_ENSURE(1);
+ n = enclen(encode, s, end);
+ DATA_ENSURE(n);
+ s += n;
+ MOP_OUT;
+ break;
+
+ case OP_ANYCHAR_STAR: MOP_IN(OP_ANYCHAR_STAR);
+ while (DATA_ENSURE_CHECK1) {
+ STACK_PUSH_ALT(p, s, sprev);
+ n = enclen(encode, s, end);
+ DATA_ENSURE(n);
+ if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
+ sprev = s;
+ s += n;
+ }
+ MOP_OUT;
+ break;
+
+ case OP_ANYCHAR_ML_STAR: MOP_IN(OP_ANYCHAR_ML_STAR);
+ while (DATA_ENSURE_CHECK1) {
+ STACK_PUSH_ALT(p, s, sprev);
+ n = enclen(encode, s, end);
+ if (n > 1) {
+ DATA_ENSURE(n);
+ sprev = s;
+ s += n;
+ }
+ else {
+ sprev = s;
+ s++;
+ }
+ }
+ MOP_OUT;
+ break;
+
+ case OP_ANYCHAR_STAR_PEEK_NEXT: MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
+ while (DATA_ENSURE_CHECK1) {
+ if (*p == *s) {
+ STACK_PUSH_ALT(p + 1, s, sprev);
+ }
+ n = enclen(encode, s, end);
+ DATA_ENSURE(n);
+ if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
+ sprev = s;
+ s += n;
+ }
+ p++;
+ MOP_OUT;
+ break;
+
+ case OP_ANYCHAR_ML_STAR_PEEK_NEXT:MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
+ while (DATA_ENSURE_CHECK1) {
+ if (*p == *s) {
+ STACK_PUSH_ALT(p + 1, s, sprev);
+ }
+ n = enclen(encode, s, end);
+ if (n > 1) {
+ DATA_ENSURE(n);
+ sprev = s;
+ s += n;
+ }
+ else {
+ sprev = s;
+ s++;
+ }
+ }
+ p++;
+ MOP_OUT;
+ break;
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ case OP_STATE_CHECK_ANYCHAR_STAR: MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
+ GET_STATE_CHECK_NUM_INC(mem, p);
+ while (DATA_ENSURE_CHECK1) {
+ STATE_CHECK_VAL(scv, mem);
+ if (scv) goto fail;
+
+ STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
+ n = enclen(encode, s, end);
+ DATA_ENSURE(n);
+ if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
+ sprev = s;
+ s += n;
+ }
+ MOP_OUT;
+ break;
+
+ case OP_STATE_CHECK_ANYCHAR_ML_STAR:
+ MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
+
+ GET_STATE_CHECK_NUM_INC(mem, p);
+ while (DATA_ENSURE_CHECK1) {
+ STATE_CHECK_VAL(scv, mem);
+ if (scv) goto fail;
+
+ STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
+ n = enclen(encode, s, end);
+ if (n > 1) {
+ DATA_ENSURE(n);
+ sprev = s;
+ s += n;
+ }
+ else {
+ sprev = s;
+ s++;
+ }
+ }
+ MOP_OUT;
+ break;
+#endif /* USE_COMBINATION_EXPLOSION_CHECK */
+
+ case OP_WORD: MOP_IN(OP_WORD);
+ DATA_ENSURE(1);
+ if (! ONIGENC_IS_MBC_WORD(encode, s, end))
+ goto fail;
+
+ s += enclen(encode, s, end);
+ MOP_OUT;
+ break;
+
+ case OP_NOT_WORD: MOP_IN(OP_NOT_WORD);
+ DATA_ENSURE(1);
+ if (ONIGENC_IS_MBC_WORD(encode, s, end))
+ goto fail;
+
+ s += enclen(encode, s, end);
+ MOP_OUT;
+ break;
+
+ case OP_WORD_BOUND: MOP_IN(OP_WORD_BOUND);
+ if (ON_STR_BEGIN(s)) {
+ DATA_ENSURE(1);
+ if (! ONIGENC_IS_MBC_WORD(encode, s, end))
+ goto fail;
+ }
+ else if (ON_STR_END(s)) {
+ if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
+ goto fail;
+ }
+ else {
+ if (ONIGENC_IS_MBC_WORD(encode, s, end)
+ == ONIGENC_IS_MBC_WORD(encode, sprev, end))
+ goto fail;
+ }
+ MOP_OUT;
+ continue;
+ break;
+
+ case OP_NOT_WORD_BOUND: MOP_IN(OP_NOT_WORD_BOUND);
+ if (ON_STR_BEGIN(s)) {
+ if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end))
+ goto fail;
+ }
+ else if (ON_STR_END(s)) {
+ if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
+ goto fail;
+ }
+ else {
+ if (ONIGENC_IS_MBC_WORD(encode, s, end)
+ != ONIGENC_IS_MBC_WORD(encode, sprev, end))
+ goto fail;
+ }
+ MOP_OUT;
+ continue;
+ break;
+
+#ifdef USE_WORD_BEGIN_END
+ case OP_WORD_BEGIN: MOP_IN(OP_WORD_BEGIN);
+ if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) {
+ if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
+ MOP_OUT;
+ continue;
+ }
+ }
+ goto fail;
+ break;
+
+ case OP_WORD_END: MOP_IN(OP_WORD_END);
+ if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
+ if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
+ MOP_OUT;
+ continue;
+ }
+ }
+ goto fail;
+ break;
+#endif
+
+ case OP_BEGIN_BUF: MOP_IN(OP_BEGIN_BUF);
+ if (! ON_STR_BEGIN(s)) goto fail;
+
+ MOP_OUT;
+ continue;
+ break;
+
+ case OP_END_BUF: MOP_IN(OP_END_BUF);
+ if (! ON_STR_END(s)) goto fail;
+
+ MOP_OUT;
+ continue;
+ break;
+
+ case OP_BEGIN_LINE: MOP_IN(OP_BEGIN_LINE);
+ if (ON_STR_BEGIN(s)) {
+ if (IS_NOTBOL(msa->options)) goto fail;
+ MOP_OUT;
+ continue;
+ }
+ else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) {
+ MOP_OUT;
+ continue;
+ }
+ goto fail;
+ break;
+
+ case OP_END_LINE: MOP_IN(OP_END_LINE);
+ if (ON_STR_END(s)) {
+#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
+ if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
+#endif
+ if (IS_NOTEOL(msa->options)) goto fail;
+ MOP_OUT;
+ continue;
+#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
+ }
+#endif
+ }
+ else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) {
+ MOP_OUT;
+ continue;
+ }
+#ifdef USE_CRNL_AS_LINE_TERMINATOR
+ else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
+ MOP_OUT;
+ continue;
+ }
+#endif
+ goto fail;
+ break;
+
+ case OP_SEMI_END_BUF: MOP_IN(OP_SEMI_END_BUF);
+ if (ON_STR_END(s)) {
+#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
+ if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
+#endif
+ if (IS_NOTEOL(msa->options)) goto fail;
+ MOP_OUT;
+ continue;
+#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
+ }
+#endif
+ }
+ else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) &&
+ ON_STR_END(s + enclen(encode, s, end))) {
+ MOP_OUT;
+ continue;
+ }
+#ifdef USE_CRNL_AS_LINE_TERMINATOR
+ else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
+ UChar* ss = s + enclen(encode, s);
+ ss += enclen(encode, ss);
+ if (ON_STR_END(ss)) {
+ MOP_OUT;
+ continue;
+ }
+ }
+#endif
+ goto fail;
+ break;
+
+ case OP_BEGIN_POSITION: MOP_IN(OP_BEGIN_POSITION);
+ if (s != msa->start)
+ goto fail;
+
+ MOP_OUT;
+ continue;
+ break;
+
+ case OP_MEMORY_START_PUSH: MOP_IN(OP_MEMORY_START_PUSH);
+ GET_MEMNUM_INC(mem, p);
+ STACK_PUSH_MEM_START(mem, s);
+ MOP_OUT;
+ continue;
+ break;
+
+ case OP_MEMORY_START: MOP_IN(OP_MEMORY_START);
+ GET_MEMNUM_INC(mem, p);
+ mem_start_stk[mem] = (OnigStackIndex )((void* )s);
+ MOP_OUT;
+ continue;
+ break;
+
+ case OP_MEMORY_END_PUSH: MOP_IN(OP_MEMORY_END_PUSH);
+ GET_MEMNUM_INC(mem, p);
+ STACK_PUSH_MEM_END(mem, s);
+ MOP_OUT;
+ continue;
+ break;
+
+ case OP_MEMORY_END: MOP_IN(OP_MEMORY_END);
+ GET_MEMNUM_INC(mem, p);
+ mem_end_stk[mem] = (OnigStackIndex )((void* )s);
+ MOP_OUT;
+ continue;
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case OP_MEMORY_END_PUSH_REC: MOP_IN(OP_MEMORY_END_PUSH_REC);
+ GET_MEMNUM_INC(mem, p);
+ STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
+ STACK_PUSH_MEM_END(mem, s);
+ mem_start_stk[mem] = GET_STACK_INDEX(stkp);
+ MOP_OUT;
+ continue;
+ break;
+
+ case OP_MEMORY_END_REC: MOP_IN(OP_MEMORY_END_REC);
+ GET_MEMNUM_INC(mem, p);
+ mem_end_stk[mem] = (OnigStackIndex )((void* )s);
+ STACK_GET_MEM_START(mem, stkp);
+
+ if (BIT_STATUS_AT(reg->bt_mem_start, mem))
+ mem_start_stk[mem] = GET_STACK_INDEX(stkp);
+ else
+ mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr);
+
+ STACK_PUSH_MEM_END_MARK(mem);
+ MOP_OUT;
+ continue;
+ break;
+#endif
+
+ case OP_BACKREF1: MOP_IN(OP_BACKREF1);
+ mem = 1;
+ goto backref;
+ break;
+
+ case OP_BACKREF2: MOP_IN(OP_BACKREF2);
+ mem = 2;
+ goto backref;
+ break;
+
+ case OP_BACKREFN: MOP_IN(OP_BACKREFN);
+ GET_MEMNUM_INC(mem, p);
+ backref:
+ {
+ int len;
+ UChar *pstart, *pend;
+
+ /* if you want to remove following line,
+ you should check in parse and compile time. */
+ if (mem > num_mem) goto fail;
+ if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
+ if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
+
+ if (BIT_STATUS_AT(reg->bt_mem_start, mem))
+ pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
+ else
+ pstart = (UChar* )((void* )mem_start_stk[mem]);
+
+ pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
+ ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
+ : (UChar* )((void* )mem_end_stk[mem]));
+ n = pend - pstart;
+ DATA_ENSURE(n);
+ sprev = s;
+ STRING_CMP(pstart, s, n);
+ while (sprev + (len = enclen(encode, sprev, end)) < s)
+ sprev += len;
+
+ MOP_OUT;
+ continue;
+ }
+ break;
+
+ case OP_BACKREFN_IC: MOP_IN(OP_BACKREFN_IC);
+ GET_MEMNUM_INC(mem, p);
+ {
+ int len;
+ UChar *pstart, *pend;
+
+ /* if you want to remove following line,
+ you should check in parse and compile time. */
+ if (mem > num_mem) goto fail;
+ if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
+ if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
+
+ if (BIT_STATUS_AT(reg->bt_mem_start, mem))
+ pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
+ else
+ pstart = (UChar* )((void* )mem_start_stk[mem]);
+
+ pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
+ ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
+ : (UChar* )((void* )mem_end_stk[mem]));
+ n = pend - pstart;
+ DATA_ENSURE(n);
+ sprev = s;
+ STRING_CMP_IC(case_fold_flag, pstart, &s, n, end);
+ while (sprev + (len = enclen(encode, sprev, end)) < s)
+ sprev += len;
+
+ MOP_OUT;
+ continue;
+ }
+ break;
+
+ case OP_BACKREF_MULTI: MOP_IN(OP_BACKREF_MULTI);
+ {
+ int len, is_fail;
+ UChar *pstart, *pend, *swork;
+
+ GET_LENGTH_INC(tlen, p);
+ for (i = 0; i < tlen; i++) {
+ GET_MEMNUM_INC(mem, p);
+
+ if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
+ if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
+
+ if (BIT_STATUS_AT(reg->bt_mem_start, mem))
+ pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
+ else
+ pstart = (UChar* )((void* )mem_start_stk[mem]);
+
+ pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
+ ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
+ : (UChar* )((void* )mem_end_stk[mem]));
+ n = pend - pstart;
+ DATA_ENSURE(n);
+ sprev = s;
+ swork = s;
+ STRING_CMP_VALUE(pstart, swork, n, is_fail);
+ if (is_fail) continue;
+ s = swork;
+ while (sprev + (len = enclen(encode, sprev, end)) < s)
+ sprev += len;
+
+ p += (SIZE_MEMNUM * (tlen - i - 1));
+ break; /* success */
+ }
+ if (i == tlen) goto fail;
+ MOP_OUT;
+ continue;
+ }
+ break;
+
+ case OP_BACKREF_MULTI_IC: MOP_IN(OP_BACKREF_MULTI_IC);
+ {
+ int len, is_fail;
+ UChar *pstart, *pend, *swork;
+
+ GET_LENGTH_INC(tlen, p);
+ for (i = 0; i < tlen; i++) {
+ GET_MEMNUM_INC(mem, p);
+
+ if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
+ if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
+
+ if (BIT_STATUS_AT(reg->bt_mem_start, mem))
+ pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
+ else
+ pstart = (UChar* )((void* )mem_start_stk[mem]);
+
+ pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
+ ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
+ : (UChar* )((void* )mem_end_stk[mem]));
+ n = pend - pstart;
+ DATA_ENSURE(n);
+ sprev = s;
+ swork = s;
+ STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, end, is_fail);
+ if (is_fail) continue;
+ s = swork;
+ while (sprev + (len = enclen(encode, sprev, end)) < s)
+ sprev += len;
+
+ p += (SIZE_MEMNUM * (tlen - i - 1));
+ break; /* success */
+ }
+ if (i == tlen) goto fail;
+ MOP_OUT;
+ continue;
+ }
+ break;
+
+#ifdef USE_BACKREF_WITH_LEVEL
+ case OP_BACKREF_WITH_LEVEL:
+ {
+ int len;
+ OnigOptionType ic;
+ LengthType level;
+
+ GET_OPTION_INC(ic, p);
+ GET_LENGTH_INC(level, p);
+ GET_LENGTH_INC(tlen, p);
+
+ sprev = s;
+ if (backref_match_at_nested_level(reg, stk, stk_base, ic
+ , case_fold_flag, (int )level, (int )tlen, p, &s, end)) {
+ while (sprev + (len = enclen(encode, sprev, end)) < s)
+ sprev += len;
+
+ p += (SIZE_MEMNUM * tlen);
+ }
+ else
+ goto fail;
+
+ MOP_OUT;
+ continue;
+ }
+
+ break;
+#endif
+
+ case OP_NULL_CHECK_START: MOP_IN(OP_NULL_CHECK_START);
+ GET_MEMNUM_INC(mem, p); /* mem: null check id */
+ STACK_PUSH_NULL_CHECK_START(mem, s);
+ MOP_OUT;
+ continue;
+ break;
+
+ case OP_NULL_CHECK_END: MOP_IN(OP_NULL_CHECK_END);
+ {
+ int isnull;
+
+ GET_MEMNUM_INC(mem, p); /* mem: null check id */
+ STACK_NULL_CHECK(isnull, mem, s);
+ if (isnull) {
+#ifdef ONIG_DEBUG_MATCH
+ fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%d\n",
+ (int )mem, (int )s);
+#endif
+ null_check_found:
+ /* empty loop founded, skip next instruction */
+ switch (*p++) {
+ case OP_JUMP:
+ case OP_PUSH:
+ p += SIZE_RELADDR;
+ break;
+ case OP_REPEAT_INC:
+ case OP_REPEAT_INC_NG:
+ case OP_REPEAT_INC_SG:
+ case OP_REPEAT_INC_NG_SG:
+ p += SIZE_MEMNUM;
+ break;
+ default:
+ goto unexpected_bytecode_error;
+ break;
+ }
+ }
+ }
+ MOP_OUT;
+ continue;
+ break;
+
+#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
+ case OP_NULL_CHECK_END_MEMST: MOP_IN(OP_NULL_CHECK_END_MEMST);
+ {
+ int isnull;
+
+ GET_MEMNUM_INC(mem, p); /* mem: null check id */
+ STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);
+ if (isnull) {
+#ifdef ONIG_DEBUG_MATCH
+ fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%d\n",
+ (int )mem, (int )s);
+#endif
+ if (isnull == -1) goto fail;
+ goto null_check_found;
+ }
+ }
+ MOP_OUT;
+ continue;
+ break;
+#endif
+
+#ifdef USE_SUBEXP_CALL
+ case OP_NULL_CHECK_END_MEMST_PUSH:
+ MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
+ {
+ int isnull;
+
+ GET_MEMNUM_INC(mem, p); /* mem: null check id */
+#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
+ STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
+#else
+ STACK_NULL_CHECK_REC(isnull, mem, s);
+#endif
+ if (isnull) {
+#ifdef ONIG_DEBUG_MATCH
+ fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%d\n",
+ (int )mem, (int )s);
+#endif
+ if (isnull == -1) goto fail;
+ goto null_check_found;
+ }
+ else {
+ STACK_PUSH_NULL_CHECK_END(mem);
+ }
+ }
+ MOP_OUT;
+ continue;
+ break;
+#endif
+
+ case OP_JUMP: MOP_IN(OP_JUMP);
+ GET_RELADDR_INC(addr, p);
+ p += addr;
+ MOP_OUT;
+ CHECK_INTERRUPT_IN_MATCH_AT;
+ continue;
+ break;
+
+ case OP_PUSH: MOP_IN(OP_PUSH);
+ GET_RELADDR_INC(addr, p);
+ STACK_PUSH_ALT(p + addr, s, sprev);
+ MOP_OUT;
+ continue;
+ break;
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ case OP_STATE_CHECK_PUSH: MOP_IN(OP_STATE_CHECK_PUSH);
+ GET_STATE_CHECK_NUM_INC(mem, p);
+ STATE_CHECK_VAL(scv, mem);
+ if (scv) goto fail;
+
+ GET_RELADDR_INC(addr, p);
+ STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
+ MOP_OUT;
+ continue;
+ break;
+
+ case OP_STATE_CHECK_PUSH_OR_JUMP: MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
+ GET_STATE_CHECK_NUM_INC(mem, p);
+ GET_RELADDR_INC(addr, p);
+ STATE_CHECK_VAL(scv, mem);
+ if (scv) {
+ p += addr;
+ }
+ else {
+ STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
+ }
+ MOP_OUT;
+ continue;
+ break;
+
+ case OP_STATE_CHECK: MOP_IN(OP_STATE_CHECK);
+ GET_STATE_CHECK_NUM_INC(mem, p);
+ STATE_CHECK_VAL(scv, mem);
+ if (scv) goto fail;
+
+ STACK_PUSH_STATE_CHECK(s, mem);
+ MOP_OUT;
+ continue;
+ break;
+#endif /* USE_COMBINATION_EXPLOSION_CHECK */
+
+ case OP_POP: MOP_IN(OP_POP);
+ STACK_POP_ONE;
+ MOP_OUT;
+ continue;
+ break;
+
+ case OP_PUSH_OR_JUMP_EXACT1: MOP_IN(OP_PUSH_OR_JUMP_EXACT1);
+ GET_RELADDR_INC(addr, p);
+ if (*p == *s && DATA_ENSURE_CHECK1) {
+ p++;
+ STACK_PUSH_ALT(p + addr, s, sprev);
+ MOP_OUT;
+ continue;
+ }
+ p += (addr + 1);
+ MOP_OUT;
+ continue;
+ break;
+
+ case OP_PUSH_IF_PEEK_NEXT: MOP_IN(OP_PUSH_IF_PEEK_NEXT);
+ GET_RELADDR_INC(addr, p);
+ if (*p == *s) {
+ p++;
+ STACK_PUSH_ALT(p + addr, s, sprev);
+ MOP_OUT;
+ continue;
+ }
+ p++;
+ MOP_OUT;
+ continue;
+ break;
+
+ case OP_REPEAT: MOP_IN(OP_REPEAT);
+ {
+ GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
+ GET_RELADDR_INC(addr, p);
+
+ STACK_ENSURE(1);
+ repeat_stk[mem] = GET_STACK_INDEX(stk);
+ STACK_PUSH_REPEAT(mem, p);
+
+ if (reg->repeat_range[mem].lower == 0) {
+ STACK_PUSH_ALT(p + addr, s, sprev);
+ }
+ }
+ MOP_OUT;
+ continue;
+ break;
+
+ case OP_REPEAT_NG: MOP_IN(OP_REPEAT_NG);
+ {
+ GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
+ GET_RELADDR_INC(addr, p);
+
+ STACK_ENSURE(1);
+ repeat_stk[mem] = GET_STACK_INDEX(stk);
+ STACK_PUSH_REPEAT(mem, p);
+
+ if (reg->repeat_range[mem].lower == 0) {
+ STACK_PUSH_ALT(p, s, sprev);
+ p += addr;
+ }
+ }
+ MOP_OUT;
+ continue;
+ break;
+
+ case OP_REPEAT_INC: MOP_IN(OP_REPEAT_INC);
+ GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
+ si = repeat_stk[mem];
+ stkp = STACK_AT(si);
+
+ repeat_inc:
+ stkp->u.repeat.count++;
+ if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
+ /* end of repeat. Nothing to do. */
+ }
+ else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
+ STACK_PUSH_ALT(p, s, sprev);
+ p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
+ }
+ else {
+ p = stkp->u.repeat.pcode;
+ }
+ STACK_PUSH_REPEAT_INC(si);
+ MOP_OUT;
+ CHECK_INTERRUPT_IN_MATCH_AT;
+ continue;
+ break;
+
+ case OP_REPEAT_INC_SG: MOP_IN(OP_REPEAT_INC_SG);
+ GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
+ STACK_GET_REPEAT(mem, stkp);
+ si = GET_STACK_INDEX(stkp);
+ goto repeat_inc;
+ break;
+
+ case OP_REPEAT_INC_NG: MOP_IN(OP_REPEAT_INC_NG);
+ GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
+ si = repeat_stk[mem];
+ stkp = STACK_AT(si);
+
+ repeat_inc_ng:
+ stkp->u.repeat.count++;
+ if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
+ if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
+ UChar* pcode = stkp->u.repeat.pcode;
+
+ STACK_PUSH_REPEAT_INC(si);
+ STACK_PUSH_ALT(pcode, s, sprev);
+ }
+ else {
+ p = stkp->u.repeat.pcode;
+ STACK_PUSH_REPEAT_INC(si);
+ }
+ }
+ else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
+ STACK_PUSH_REPEAT_INC(si);
+ }
+ MOP_OUT;
+ CHECK_INTERRUPT_IN_MATCH_AT;
+ continue;
+ break;
+
+ case OP_REPEAT_INC_NG_SG: MOP_IN(OP_REPEAT_INC_NG_SG);
+ GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
+ STACK_GET_REPEAT(mem, stkp);
+ si = GET_STACK_INDEX(stkp);
+ goto repeat_inc_ng;
+ break;
+
+ case OP_PUSH_POS: MOP_IN(OP_PUSH_POS);
+ STACK_PUSH_POS(s, sprev);
+ MOP_OUT;
+ continue;
+ break;
+
+ case OP_POP_POS: MOP_IN(OP_POP_POS);
+ {
+ STACK_POS_END(stkp);
+ s = stkp->u.state.pstr;
+ sprev = stkp->u.state.pstr_prev;
+ }
+ MOP_OUT;
+ continue;
+ break;
+
+ case OP_PUSH_POS_NOT: MOP_IN(OP_PUSH_POS_NOT);
+ GET_RELADDR_INC(addr, p);
+ STACK_PUSH_POS_NOT(p + addr, s, sprev);
+ MOP_OUT;
+ continue;
+ break;
+
+ case OP_FAIL_POS: MOP_IN(OP_FAIL_POS);
+ STACK_POP_TIL_POS_NOT;
+ goto fail;
+ break;
+
+ case OP_PUSH_STOP_BT: MOP_IN(OP_PUSH_STOP_BT);
+ STACK_PUSH_STOP_BT;
+ MOP_OUT;
+ continue;
+ break;
+
+ case OP_POP_STOP_BT: MOP_IN(OP_POP_STOP_BT);
+ STACK_STOP_BT_END;
+ MOP_OUT;
+ continue;
+ break;
+
+ case OP_LOOK_BEHIND: MOP_IN(OP_LOOK_BEHIND);
+ GET_LENGTH_INC(tlen, p);
+ s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen);
+ if (IS_NULL(s)) goto fail;
+ sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
+ MOP_OUT;
+ continue;
+ break;
+
+ case OP_PUSH_LOOK_BEHIND_NOT: MOP_IN(OP_PUSH_LOOK_BEHIND_NOT);
+ GET_RELADDR_INC(addr, p);
+ GET_LENGTH_INC(tlen, p);
+ q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen);
+ if (IS_NULL(q)) {
+ /* too short case -> success. ex. /(?<!XXX)a/.match("a")
+ If you want to change to fail, replace following line. */
+ p += addr;
+ /* goto fail; */
+ }
+ else {
+ STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev);
+ s = q;
+ sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
+ }
+ MOP_OUT;
+ continue;
+ break;
+
+ case OP_FAIL_LOOK_BEHIND_NOT: MOP_IN(OP_FAIL_LOOK_BEHIND_NOT);
+ STACK_POP_TIL_LOOK_BEHIND_NOT;
+ goto fail;
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case OP_CALL: MOP_IN(OP_CALL);
+ GET_ABSADDR_INC(addr, p);
+ STACK_PUSH_CALL_FRAME(p);
+ p = reg->p + addr;
+ MOP_OUT;
+ continue;
+ break;
+
+ case OP_RETURN: MOP_IN(OP_RETURN);
+ STACK_RETURN(p);
+ STACK_PUSH_RETURN;
+ MOP_OUT;
+ continue;
+ break;
+#endif
+
+ case OP_FINISH:
+ goto finish;
+ break;
+
+ fail:
+ MOP_OUT;
+ /* fall */
+ case OP_FAIL: MOP_IN(OP_FAIL);
+ STACK_POP;
+ p = stk->u.state.pcode;
+ s = stk->u.state.pstr;
+ sprev = stk->u.state.pstr_prev;
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ if (stk->u.state.state_check != 0) {
+ stk->type = STK_STATE_CHECK_MARK;
+ stk++;
+ }
+#endif
+
+ MOP_OUT;
+ continue;
+ break;
+
+ default:
+ goto bytecode_error;
+
+ } /* end of switch */
+ sprev = sbegin;
+ } /* end of while(1) */
+
+ finish:
+ STACK_SAVE;
+ return best_len;
+
+#ifdef ONIG_DEBUG
+ stack_error:
+ STACK_SAVE;
+ return ONIGERR_STACK_BUG;
+#endif
+
+ bytecode_error:
+ STACK_SAVE;
+ return ONIGERR_UNDEFINED_BYTECODE;
+
+ unexpected_bytecode_error:
+ STACK_SAVE;
+ return ONIGERR_UNEXPECTED_BYTECODE;
+}
+
+
+static UChar*
+slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
+ const UChar* text, const UChar* text_end, UChar* text_range)
+{
+ UChar *t, *p, *s, *end;
+
+ end = (UChar* )text_end;
+ end -= target_end - target - 1;
+ if (end > text_range)
+ end = text_range;
+
+ s = (UChar* )text;
+
+ if (enc->max_enc_len == enc->min_enc_len) {
+ int n = enc->max_enc_len;
+
+ while (s < end) {
+ if (*s == *target) {
+ p = s + 1;
+ t = target + 1;
+ if (target_end == t || memcmp(t, p, target_end - t) == 0)
+ return s;
+ }
+ s += n;
+ }
+ return (UChar*)NULL;
+ }
+ while (s < end) {
+ if (*s == *target) {
+ p = s + 1;
+ t = target + 1;
+ if (target_end == t || memcmp(t, p, target_end - t) == 0)
+ return s;
+ }
+ s += enclen(enc, s, text_end);
+ }
+
+ return (UChar* )NULL;
+}
+
+static int
+str_lower_case_match(OnigEncoding enc, int case_fold_flag,
+ const UChar* t, const UChar* tend,
+ const UChar* p, const UChar* end)
+{
+ int lowlen;
+ UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
+
+ while (t < tend) {
+ lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
+ q = lowbuf;
+ while (lowlen > 0) {
+ if (*t++ != *q++) return 0;
+ lowlen--;
+ }
+ }
+
+ return 1;
+}
+
+static UChar*
+slow_search_ic(OnigEncoding enc, int case_fold_flag,
+ UChar* target, UChar* target_end,
+ const UChar* text, const UChar* text_end, UChar* text_range)
+{
+ UChar *s, *end;
+
+ end = (UChar* )text_end;
+ end -= target_end - target - 1;
+ if (end > text_range)
+ end = text_range;
+
+ s = (UChar* )text;
+
+ while (s < end) {
+ if (str_lower_case_match(enc, case_fold_flag, target, target_end,
+ s, text_end))
+ return s;
+
+ s += enclen(enc, s, text_end);
+ }
+
+ return (UChar* )NULL;
+}
+
+static UChar*
+slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
+ const UChar* text, const UChar* adjust_text,
+ const UChar* text_end, const UChar* text_start)
+{
+ UChar *t, *p, *s;
+
+ s = (UChar* )text_end;
+ s -= (target_end - target);
+ if (s > text_start)
+ s = (UChar* )text_start;
+ else
+ s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
+
+ while (s >= text) {
+ if (*s == *target) {
+ p = s + 1;
+ t = target + 1;
+ while (t < target_end) {
+ if (*t != *p++)
+ break;
+ t++;
+ }
+ if (t == target_end)
+ return s;
+ }
+ s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
+ }
+
+ return (UChar* )NULL;
+}
+
+static UChar*
+slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
+ UChar* target, UChar* target_end,
+ const UChar* text, const UChar* adjust_text,
+ const UChar* text_end, const UChar* text_start)
+{
+ UChar *s;
+
+ s = (UChar* )text_end;
+ s -= (target_end - target);
+ if (s > text_start)
+ s = (UChar* )text_start;
+ else
+ s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
+
+ while (s >= text) {
+ if (str_lower_case_match(enc, case_fold_flag,
+ target, target_end, s, text_end))
+ return s;
+
+ s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
+ }
+
+ return (UChar* )NULL;
+}
+
+static UChar*
+bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
+ const UChar* text, const UChar* text_end,
+ const UChar* text_range)
+{
+ const UChar *s, *se, *t, *p, *end;
+ const UChar *tail;
+ int skip, tlen1;
+
+#ifdef ONIG_DEBUG_SEARCH
+ fprintf(stderr, "bm_search_notrev: text: %d, text_end: %d, text_range: %d\n",
+ (int )text, (int )text_end, (int )text_range);
+#endif
+
+ tail = target_end - 1;
+ tlen1 = tail - target;
+ end = text_range;
+ if (end + tlen1 > text_end)
+ end = text_end - tlen1;
+
+ s = text;
+
+ if (IS_NULL(reg->int_map)) {
+ while (s < end) {
+ p = se = s + tlen1;
+ t = tail;
+ while (*p == *t) {
+ if (t == target) return (UChar* )s;
+ p--; t--;
+ }
+ skip = reg->map[*se];
+ t = s;
+ do {
+ s += enclen(reg->enc, s, end);
+ } while ((s - t) < skip && s < end);
+ }
+ }
+ else {
+ while (s < end) {
+ p = se = s + tlen1;
+ t = tail;
+ while (*p == *t) {
+ if (t == target) return (UChar* )s;
+ p--; t--;
+ }
+ skip = reg->int_map[*se];
+ t = s;
+ do {
+ s += enclen(reg->enc, s, end);
+ } while ((s - t) < skip && s < end);
+ }
+ }
+
+ return (UChar* )NULL;
+}
+
+static UChar*
+bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
+ const UChar* text, const UChar* text_end, const UChar* text_range)
+{
+ const UChar *s, *t, *p, *end;
+ const UChar *tail;
+
+ end = text_range + (target_end - target) - 1;
+ if (end > text_end)
+ end = text_end;
+
+ tail = target_end - 1;
+ s = text + (target_end - target) - 1;
+ if (IS_NULL(reg->int_map)) {
+ while (s < end) {
+ p = s;
+ t = tail;
+ while (*p == *t) {
+ if (t == target) return (UChar* )p;
+ p--; t--;
+ }
+ s += reg->map[*s];
+ }
+ }
+ else { /* see int_map[] */
+ while (s < end) {
+ p = s;
+ t = tail;
+ while (*p == *t) {
+ if (t == target) return (UChar* )p;
+ p--; t--;
+ }
+ s += reg->int_map[*s];
+ }
+ }
+ return (UChar* )NULL;
+}
+
+static int
+set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
+ int** skip)
+{
+ int i, len;
+
+ if (IS_NULL(*skip)) {
+ *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
+ if (IS_NULL(*skip)) return ONIGERR_MEMORY;
+ }
+
+ len = end - s;
+ for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
+ (*skip)[i] = len;
+
+ for (i = len - 1; i > 0; i--)
+ (*skip)[s[i]] = i;
+
+ return 0;
+}
+
+static UChar*
+bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end,
+ const UChar* text, const UChar* adjust_text,
+ const UChar* text_end, const UChar* text_start)
+{
+ const UChar *s, *t, *p;
+
+ s = text_end - (target_end - target);
+ if (text_start < s)
+ s = text_start;
+ else
+ s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
+
+ while (s >= text) {
+ p = s;
+ t = target;
+ while (t < target_end && *p == *t) {
+ p++; t++;
+ }
+ if (t == target_end)
+ return (UChar* )s;
+
+ s -= reg->int_map_backward[*s];
+ s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
+ }
+
+ return (UChar* )NULL;
+}
+
+static UChar*
+map_search(OnigEncoding enc, UChar map[],
+ const UChar* text, const UChar* text_range, const UChar* text_end)
+{
+ const UChar *s = text;
+
+ while (s < text_range) {
+ if (map[*s]) return (UChar* )s;
+
+ s += enclen(enc, s, text_end);
+ }
+ return (UChar* )NULL;
+}
+
+static UChar*
+map_search_backward(OnigEncoding enc, UChar map[],
+ const UChar* text, const UChar* adjust_text,
+ const UChar* text_start, const UChar* text_end)
+{
+ const UChar *s = text_start;
+
+ while (s >= text) {
+ if (map[*s]) return (UChar* )s;
+
+ s = onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
+ }
+ return (UChar* )NULL;
+}
+
+extern long
+onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region,
+ OnigOptionType option)
+{
+ long r;
+ UChar *prev;
+ OnigMatchArg msa;
+
+#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
+ start:
+ THREAD_ATOMIC_START;
+ if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
+ ONIG_STATE_INC(reg);
+ if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
+ onig_chain_reduce(reg);
+ ONIG_STATE_INC(reg);
+ }
+ }
+ else {
+ int n;
+
+ THREAD_ATOMIC_END;
+ n = 0;
+ while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
+ if (++n > THREAD_PASS_LIMIT_COUNT)
+ return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
+ THREAD_PASS;
+ }
+ goto start;
+ }
+ THREAD_ATOMIC_END;
+#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
+
+ MATCH_ARG_INIT(msa, option, region, at);
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ {
+ int offset = at - str;
+ STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
+ }
+#endif
+
+ if (region
+ ) {
+ r = onig_region_resize_clear(region, reg->num_mem + 1);
+ }
+ else
+ r = 0;
+
+ if (r == 0) {
+ prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at, end);
+ r = match_at(reg, str, end,
+#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
+ end,
+#endif
+ at, prev, &msa);
+ }
+
+ MATCH_ARG_FREE(msa);
+ ONIG_STATE_DEC_THREAD(reg);
+ return r;
+}
+
+static int
+forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
+ UChar* range, UChar** low, UChar** high, UChar** low_prev)
+{
+ UChar *p, *pprev = (UChar* )NULL;
+
+#ifdef ONIG_DEBUG_SEARCH
+ fprintf(stderr, "forward_search_range: str: %d, end: %d, s: %d, range: %d\n",
+ (int )str, (int )end, (int )s, (int )range);
+#endif
+
+ p = s;
+ if (reg->dmin > 0) {
+ if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
+ p += reg->dmin;
+ }
+ else {
+ UChar *q = p + reg->dmin;
+ while (p < q) p += enclen(reg->enc, p, end);
+ }
+ }
+
+ retry:
+ switch (reg->optimize) {
+ case ONIG_OPTIMIZE_EXACT:
+ p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
+ break;
+ case ONIG_OPTIMIZE_EXACT_IC:
+ p = slow_search_ic(reg->enc, reg->case_fold_flag,
+ reg->exact, reg->exact_end, p, end, range);
+ break;
+
+ case ONIG_OPTIMIZE_EXACT_BM:
+ p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
+ break;
+
+ case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
+ p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
+ break;
+
+ case ONIG_OPTIMIZE_MAP:
+ p = map_search(reg->enc, reg->map, p, range, end);
+ break;
+ }
+
+ if (p && p < range) {
+ if (p - reg->dmin < s) {
+ retry_gate:
+ pprev = p;
+ p += enclen(reg->enc, p, end);
+ goto retry;
+ }
+
+ if (reg->sub_anchor) {
+ UChar* prev;
+
+ switch (reg->sub_anchor) {
+ case ANCHOR_BEGIN_LINE:
+ if (!ON_STR_BEGIN(p)) {
+ prev = onigenc_get_prev_char_head(reg->enc,
+ (pprev ? pprev : str), p, end);
+ if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
+ goto retry_gate;
+ }
+ break;
+
+ case ANCHOR_END_LINE:
+ if (ON_STR_END(p)) {
+#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
+ prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
+ (pprev ? pprev : str), p);
+ if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
+ goto retry_gate;
+#endif
+ }
+ else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
+#ifdef USE_CRNL_AS_LINE_TERMINATOR
+ && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
+#endif
+ )
+ goto retry_gate;
+ break;
+ }
+ }
+
+ if (reg->dmax == 0) {
+ *low = p;
+ if (low_prev) {
+ if (*low > s)
+ *low_prev = onigenc_get_prev_char_head(reg->enc, s, p, end);
+ else
+ *low_prev = onigenc_get_prev_char_head(reg->enc,
+ (pprev ? pprev : str), p, end);
+ }
+ }
+ else {
+ if (reg->dmax != ONIG_INFINITE_DISTANCE) {
+ *low = p - reg->dmax;
+ if (*low > s) {
+ *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
+ *low, end, (const UChar** )low_prev);
+ if (low_prev && IS_NULL(*low_prev))
+ *low_prev = onigenc_get_prev_char_head(reg->enc,
+ (pprev ? pprev : s), *low, end);
+ }
+ else {
+ if (low_prev)
+ *low_prev = onigenc_get_prev_char_head(reg->enc,
+ (pprev ? pprev : str), *low, end);
+ }
+ }
+ }
+ /* no needs to adjust *high, *high is used as range check only */
+ *high = p - reg->dmin;
+
+#ifdef ONIG_DEBUG_SEARCH
+ fprintf(stderr,
+ "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n",
+ (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax);
+#endif
+ return 1; /* success */
+ }
+
+ return 0; /* fail */
+}
+
+#define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
+
+static long
+backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
+ UChar* s, const UChar* range, UChar* adjrange,
+ UChar** low, UChar** high)
+{
+ int r;
+ UChar *p;
+
+ range += reg->dmin;
+ p = s;
+
+ retry:
+ switch (reg->optimize) {
+ case ONIG_OPTIMIZE_EXACT:
+ exact_method:
+ p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
+ range, adjrange, end, p);
+ break;
+
+ case ONIG_OPTIMIZE_EXACT_IC:
+ p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
+ reg->exact, reg->exact_end,
+ range, adjrange, end, p);
+ break;
+
+ case ONIG_OPTIMIZE_EXACT_BM:
+ case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
+ if (IS_NULL(reg->int_map_backward)) {
+ if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
+ goto exact_method;
+
+ r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
+ &(reg->int_map_backward));
+ if (r) return r;
+ }
+ p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
+ end, p);
+ break;
+
+ case ONIG_OPTIMIZE_MAP:
+ p = map_search_backward(reg->enc, reg->map, range, adjrange, p, end);
+ break;
+ }
+
+ if (p) {
+ if (reg->sub_anchor) {
+ UChar* prev;
+
+ switch (reg->sub_anchor) {
+ case ANCHOR_BEGIN_LINE:
+ if (!ON_STR_BEGIN(p)) {
+ prev = onigenc_get_prev_char_head(reg->enc, str, p, end);
+ if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
+ p = prev;
+ goto retry;
+ }
+ }
+ break;
+
+ case ANCHOR_END_LINE:
+ if (ON_STR_END(p)) {
+#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
+ prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
+ if (IS_NULL(prev)) goto fail;
+ if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
+ p = prev;
+ goto retry;
+ }
+#endif
+ }
+ else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
+#ifdef USE_CRNL_AS_LINE_TERMINATOR
+ && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
+#endif
+ ) {
+ p = onigenc_get_prev_char_head(reg->enc, adjrange, p, end);
+ if (IS_NULL(p)) goto fail;
+ goto retry;
+ }
+ break;
+ }
+ }
+
+ /* no needs to adjust *high, *high is used as range check only */
+ if (reg->dmax != ONIG_INFINITE_DISTANCE) {
+ *low = p - reg->dmax;
+ *high = p - reg->dmin;
+ *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high, end);
+ }
+
+#ifdef ONIG_DEBUG_SEARCH
+ fprintf(stderr, "backward_search_range: low: %d, high: %d\n",
+ (int )(*low - str), (int )(*high - str));
+#endif
+ return 1; /* success */
+ }
+
+ fail:
+#ifdef ONIG_DEBUG_SEARCH
+ fprintf(stderr, "backward_search_range: fail.\n");
+#endif
+ return 0; /* fail */
+}
+
+
+extern long
+onig_search(regex_t* reg, const UChar* str, const UChar* end,
+ const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
+{
+ int r;
+ UChar *s, *prev;
+ OnigMatchArg msa;
+ const UChar *orig_start = start;
+#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
+ const UChar *orig_range = range;
+#endif
+
+#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
+ start:
+ THREAD_ATOMIC_START;
+ if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
+ ONIG_STATE_INC(reg);
+ if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
+ onig_chain_reduce(reg);
+ ONIG_STATE_INC(reg);
+ }
+ }
+ else {
+ int n;
+
+ THREAD_ATOMIC_END;
+ n = 0;
+ while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
+ if (++n > THREAD_PASS_LIMIT_COUNT)
+ return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
+ THREAD_PASS;
+ }
+ goto start;
+ }
+ THREAD_ATOMIC_END;
+#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
+
+#ifdef ONIG_DEBUG_SEARCH
+ fprintf(stderr,
+ "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n",
+ (int )str, (int )(end - str), (int )(start - str), (int )(range - str));
+#endif
+
+ if (region
+ ) {
+ r = onig_region_resize_clear(region, reg->num_mem + 1);
+ if (r) goto finish_no_msa;
+ }
+
+ if (start > end || start < str) goto mismatch_no_msa;
+
+
+#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
+#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+#define MATCH_AND_RETURN_CHECK(upper_range) \
+ r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
+ if (r != ONIG_MISMATCH) {\
+ if (r >= 0) {\
+ if (! IS_FIND_LONGEST(reg->options)) {\
+ goto match;\
+ }\
+ }\
+ else goto finish; /* error */ \
+ }
+#else
+#define MATCH_AND_RETURN_CHECK(upper_range) \
+ r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
+ if (r != ONIG_MISMATCH) {\
+ if (r >= 0) {\
+ goto match;\
+ }\
+ else goto finish; /* error */ \
+ }
+#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
+#else
+#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+#define MATCH_AND_RETURN_CHECK(none) \
+ r = match_at(reg, str, end, s, prev, &msa);\
+ if (r != ONIG_MISMATCH) {\
+ if (r >= 0) {\
+ if (! IS_FIND_LONGEST(reg->options)) {\
+ goto match;\
+ }\
+ }\
+ else goto finish; /* error */ \
+ }
+#else
+#define MATCH_AND_RETURN_CHECK(none) \
+ r = match_at(reg, str, end, s, prev, &msa);\
+ if (r != ONIG_MISMATCH) {\
+ if (r >= 0) {\
+ goto match;\
+ }\
+ else goto finish; /* error */ \
+ }
+#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
+#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
+
+
+ /* anchor optimize: resume search range */
+ if (reg->anchor != 0 && str < end) {
+ UChar *min_semi_end, *max_semi_end;
+
+ if (reg->anchor & ANCHOR_BEGIN_POSITION) {
+ /* search start-position only */
+ begin_position:
+ if (range > start)
+ range = start + 1;
+ else
+ range = start;
+ }
+ else if (reg->anchor & ANCHOR_BEGIN_BUF) {
+ /* search str-position only */
+ if (range > start) {
+ if (start != str) goto mismatch_no_msa;
+ range = str + 1;
+ }
+ else {
+ if (range <= str) {
+ start = str;
+ range = str;
+ }
+ else
+ goto mismatch_no_msa;
+ }
+ }
+ else if (reg->anchor & ANCHOR_END_BUF) {
+ min_semi_end = max_semi_end = (UChar* )end;
+
+ end_buf:
+ if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin)
+ goto mismatch_no_msa;
+
+ if (range > start) {
+ if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) {
+ start = min_semi_end - reg->anchor_dmax;
+ if (start < end)
+ start = onigenc_get_right_adjust_char_head(reg->enc, str, start, end);
+ else { /* match with empty at end */
+ start = onigenc_get_prev_char_head(reg->enc, str, end, end);
+ }
+ }
+ if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
+ range = max_semi_end - reg->anchor_dmin + 1;
+ }
+
+ if (start >= range) goto mismatch_no_msa;
+ }
+ else {
+ if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) {
+ range = min_semi_end - reg->anchor_dmax;
+ }
+ if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) {
+ start = max_semi_end - reg->anchor_dmin;
+ start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start, end);
+ }
+ if (range > start) goto mismatch_no_msa;
+ }
+ }
+ else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
+ UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, end, 1);
+
+ max_semi_end = (UChar* )end;
+ if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
+ min_semi_end = pre_end;
+
+#ifdef USE_CRNL_AS_LINE_TERMINATOR
+ pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, end, 1);
+ if (IS_NOT_NULL(pre_end) &&
+ ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
+ min_semi_end = pre_end;
+ }
+#endif
+ if (min_semi_end > str && start <= min_semi_end) {
+ goto end_buf;
+ }
+ }
+ else {
+ min_semi_end = (UChar* )end;
+ goto end_buf;
+ }
+ }
+ else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
+ goto begin_position;
+ }
+ }
+ else if (str == end) { /* empty string */
+ static const UChar address_for_empty_string[] = "";
+
+#ifdef ONIG_DEBUG_SEARCH
+ fprintf(stderr, "onig_search: empty string.\n");
+#endif
+
+ if (reg->threshold_len == 0) {
+ start = end = str = address_for_empty_string;
+ s = (UChar* )start;
+ prev = (UChar* )NULL;
+
+ MATCH_ARG_INIT(msa, option, region, start);
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ msa.state_check_buff = (void* )0;
+ msa.state_check_buff_size = 0; /* NO NEED, for valgrind */
+#endif
+ MATCH_AND_RETURN_CHECK(end);
+ goto mismatch;
+ }
+ goto mismatch_no_msa;
+ }
+
+#ifdef ONIG_DEBUG_SEARCH
+ fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
+ (int )(end - str), (int )(start - str), (int )(range - str));
+#endif
+
+ MATCH_ARG_INIT(msa, option, region, orig_start);
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ {
+ int offset = (MIN(start, range) - str);
+ STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
+ }
+#endif
+
+ s = (UChar* )start;
+ if (range > start) { /* forward search */
+ if (s > str)
+ prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
+ else
+ prev = (UChar* )NULL;
+
+ if (reg->optimize != ONIG_OPTIMIZE_NONE) {
+ UChar *sch_range, *low, *high, *low_prev;
+
+ sch_range = (UChar* )range;
+ if (reg->dmax != 0) {
+ if (reg->dmax == ONIG_INFINITE_DISTANCE)
+ sch_range = (UChar* )end;
+ else {
+ sch_range += reg->dmax;
+ if (sch_range > end) sch_range = (UChar* )end;
+ }
+ }
+
+ if ((end - start) < reg->threshold_len)
+ goto mismatch;
+
+ if (reg->dmax != ONIG_INFINITE_DISTANCE) {
+ do {
+ if (! forward_search_range(reg, str, end, s, sch_range,
+ &low, &high, &low_prev)) goto mismatch;
+ if (s < low) {
+ s = low;
+ prev = low_prev;
+ }
+ while (s <= high) {
+ MATCH_AND_RETURN_CHECK(orig_range);
+ prev = s;
+ s += enclen(reg->enc, s, end);
+ }
+ } while (s < range);
+ goto mismatch;
+ }
+ else { /* check only. */
+ if (! forward_search_range(reg, str, end, s, sch_range,
+ &low, &high, (UChar** )NULL)) goto mismatch;
+
+ if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
+ do {
+ MATCH_AND_RETURN_CHECK(orig_range);
+ prev = s;
+ s += enclen(reg->enc, s, end);
+ } while (s < range);
+ goto mismatch;
+ }
+ }
+ }
+
+ do {
+ MATCH_AND_RETURN_CHECK(orig_range);
+ prev = s;
+ s += enclen(reg->enc, s, end);
+ } while (s < range);
+
+ if (s == range) { /* because empty match with /$/. */
+ MATCH_AND_RETURN_CHECK(orig_range);
+ }
+ }
+ else { /* backward search */
+#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
+ if (orig_start < end)
+ orig_start += enclen(reg->enc, orig_start, end); /* is upper range */
+#endif
+
+ if (reg->optimize != ONIG_OPTIMIZE_NONE) {
+ UChar *low, *high, *adjrange, *sch_start;
+
+ if (range < end)
+ adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range, end);
+ else
+ adjrange = (UChar* )end;
+
+ if (reg->dmax != ONIG_INFINITE_DISTANCE &&
+ (end - range) >= reg->threshold_len) {
+ do {
+ sch_start = s + reg->dmax;
+ if (sch_start > end) sch_start = (UChar* )end;
+ if (backward_search_range(reg, str, end, sch_start, range, adjrange,
+ &low, &high) <= 0)
+ goto mismatch;
+
+ if (s > high)
+ s = high;
+
+ while (s >= low) {
+ prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
+ MATCH_AND_RETURN_CHECK(orig_start);
+ s = prev;
+ }
+ } while (s >= range);
+ goto mismatch;
+ }
+ else { /* check only. */
+ if ((end - range) < reg->threshold_len) goto mismatch;
+
+ sch_start = s;
+ if (reg->dmax != 0) {
+ if (reg->dmax == ONIG_INFINITE_DISTANCE)
+ sch_start = (UChar* )end;
+ else {
+ sch_start += reg->dmax;
+ if (sch_start > end) sch_start = (UChar* )end;
+ else
+ sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
+ start, sch_start, end);
+ }
+ }
+ if (backward_search_range(reg, str, end, sch_start, range, adjrange,
+ &low, &high) <= 0) goto mismatch;
+ }
+ }
+
+ do {
+ prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
+ MATCH_AND_RETURN_CHECK(orig_start);
+ s = prev;
+ } while (s >= range);
+ }
+
+ mismatch:
+#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+ if (IS_FIND_LONGEST(reg->options)) {
+ if (msa.best_len >= 0) {
+ s = msa.best_s;
+ goto match;
+ }
+ }
+#endif
+ r = ONIG_MISMATCH;
+
+ finish:
+ MATCH_ARG_FREE(msa);
+ ONIG_STATE_DEC_THREAD(reg);
+
+ /* If result is mismatch and no FIND_NOT_EMPTY option,
+ then the region is not setted in match_at(). */
+ if (IS_FIND_NOT_EMPTY(reg->options) && region
+ ) {
+ onig_region_clear(region);
+ }
+
+#ifdef ONIG_DEBUG
+ if (r != ONIG_MISMATCH)
+ fprintf(stderr, "onig_search: error %d\n", r);
+#endif
+ return r;
+
+ mismatch_no_msa:
+ r = ONIG_MISMATCH;
+ finish_no_msa:
+ ONIG_STATE_DEC_THREAD(reg);
+#ifdef ONIG_DEBUG
+ if (r != ONIG_MISMATCH)
+ fprintf(stderr, "onig_search: error %d\n", r);
+#endif
+ return r;
+
+ match:
+ ONIG_STATE_DEC_THREAD(reg);
+ MATCH_ARG_FREE(msa);
+ return s - str;
+}
+
+extern OnigEncoding
+onig_get_encoding(regex_t* reg)
+{
+ return reg->enc;
+}
+
+extern OnigOptionType
+onig_get_options(regex_t* reg)
+{
+ return reg->options;
+}
+
+extern OnigCaseFoldType
+onig_get_case_fold_flag(regex_t* reg)
+{
+ return reg->case_fold_flag;
+}
+
+extern const OnigSyntaxType*
+onig_get_syntax(regex_t* reg)
+{
+ return reg->syntax;
+}
+
+extern int
+onig_number_of_captures(regex_t* reg)
+{
+ return reg->num_mem;
+}
+
+extern int
+onig_number_of_capture_histories(regex_t* reg)
+{
+#ifdef USE_CAPTURE_HISTORY
+ int i, n;
+
+ n = 0;
+ for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
+ if (BIT_STATUS_AT(reg->capture_history, i) != 0)
+ n++;
+ }
+ return n;
+#else
+ return 0;
+#endif
+}
+
+extern void
+onig_copy_encoding(OnigEncoding to, OnigEncoding from)
+{
+ *to = *from;
+}
+#endif //INCLUDE_REGEXP
diff --git a/src/regint.h b/src/regint.h
new file mode 100644
index 000000000..e86a95f27
--- /dev/null
+++ b/src/regint.h
@@ -0,0 +1,833 @@
+#ifndef ONIGURUMA_REGINT_H
+#define ONIGURUMA_REGINT_H
+/**********************************************************************
+ regint.h - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* for debug */
+/* #define ONIG_DEBUG_PARSE_TREE */
+/* #define ONIG_DEBUG_COMPILE */
+/* #define ONIG_DEBUG_SEARCH */
+/* #define ONIG_DEBUG_MATCH */
+/* #define ONIG_DONT_OPTIMIZE */
+
+/* for byte-code statistical data. */
+/* #define ONIG_DEBUG_STATISTICS */
+
+#ifndef RUBY
+#define RUBY
+#endif
+
+#include <stddef.h> //typedef unsigned int ptrdiff_t;
+
+#if defined(ONIG_DEBUG_PARSE_TREE) || defined(ONIG_DEBUG_MATCH) || \
+ defined(ONIG_DEBUG_SEARCH) || defined(ONIG_DEBUG_COMPILE) || \
+ defined(ONIG_DEBUG_STATISTICS)
+#ifndef ONIG_DEBUG
+#define ONIG_DEBUG
+#endif
+#endif
+
+#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
+ (defined(__ppc__) && defined(__APPLE__)) || \
+ defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD86) || \
+ defined(__mc68020__)
+#define PLATFORM_UNALIGNED_WORD_ACCESS
+#endif
+
+/* config */
+/* spec. config */
+#define USE_NAMED_GROUP
+#define USE_SUBEXP_CALL
+#define USE_BACKREF_WITH_LEVEL /* \k<name+n>, \k<name-n> */
+#define USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT /* /(?:()|())*\2/ */
+#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */
+#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
+/* #define USE_RECOMPILE_API */
+/* !!! moved to regenc.h. */ /* #define USE_CRNL_AS_LINE_TERMINATOR */
+
+/* internal config */
+#define USE_PARSE_TREE_NODE_RECYCLE
+#define USE_OP_PUSH_OR_JUMP_EXACT
+#define USE_QTFR_PEEK_NEXT
+#define USE_ST_LIBRARY
+#define USE_SHARED_CCLASS_TABLE
+
+#define INIT_MATCH_STACK_SIZE 160
+#define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */
+
+#if defined(__GNUC__)
+# define ARG_UNUSED __attribute__ ((unused))
+#else
+# define ARG_UNUSED
+#endif
+
+/* */
+/* escape other system UChar definition */
+#ifndef RUBY_DEFINES_H
+#include "mruby.h"
+#endif
+#ifdef ONIG_ESCAPE_UCHAR_COLLISION
+#undef ONIG_ESCAPE_UCHAR_COLLISION
+#endif
+#undef USE_MATCH_RANGE_IS_COMPLETE_RANGE
+#undef USE_CAPTURE_HISTORY
+#define USE_VARIABLE_META_CHARS
+#define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */
+#define USE_POSIX_REGION_OPTION /* needed for POSIX API support */
+#define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+/* #define USE_COMBINATION_EXPLOSION_CHECK */ /* (X*)* */
+/* #define USE_MULTI_THREAD_SYSTEM */
+#define THREAD_SYSTEM_INIT /* depend on thread system */
+#define THREAD_SYSTEM_END /* depend on thread system */
+#define THREAD_ATOMIC_START /* depend on thread system */
+#define THREAD_ATOMIC_END /* depend on thread system */
+#define THREAD_PASS /* depend on thread system */
+
+#ifdef RUBY
+
+//#define CHECK_INTERRUPT_IN_MATCH_AT mrb_thread_check_ints()
+#define CHECK_INTERRUPT_IN_MATCH_AT
+#define onig_st_init_table st_init_table
+#define onig_st_init_table_with_size st_init_table_with_size
+#define onig_st_init_numtable st_init_numtable
+#define onig_st_init_numtable_with_size st_init_numtable_with_size
+#define onig_st_init_strtable st_init_strtable
+#define onig_st_init_strtable_with_size st_init_strtable_with_size
+#define onig_st_delete st_delete
+#define onig_st_delete_safe st_delete_safe
+#define onig_st_insert st_insert
+#define onig_st_lookup st_lookup
+#define onig_st_foreach st_foreach
+#define onig_st_add_direct st_add_direct
+#define onig_st_free_table st_free_table
+#define onig_st_cleanup_safe st_cleanup_safe
+#define onig_st_copy st_copy
+#define onig_st_nothing_key_clone st_nothing_key_clone
+#define onig_st_nothing_key_free st_nothing_key_free
+#define onig_st_is_member st_is_member
+
+#define USE_UPPER_CASE_TABLE
+#else
+
+#define st_init_table onig_st_init_table
+#define st_init_table_with_size onig_st_init_table_with_size
+#define st_init_numtable onig_st_init_numtable
+#define st_init_numtable_with_size onig_st_init_numtable_with_size
+#define st_init_strtable onig_st_init_strtable
+#define st_init_strtable_with_size onig_st_init_strtable_with_size
+#define st_delete onig_st_delete
+#define st_delete_safe onig_st_delete_safe
+#define st_insert onig_st_insert
+#define st_lookup onig_st_lookup
+#define st_foreach onig_st_foreach
+#define st_add_direct onig_st_add_direct
+#define st_free_table onig_st_free_table
+#define st_cleanup_safe onig_st_cleanup_safe
+#define st_copy onig_st_copy
+#define st_nothing_key_clone onig_st_nothing_key_clone
+#define st_nothing_key_free onig_st_nothing_key_free
+/* */
+#define onig_st_is_member st_is_member
+
+#define CHECK_INTERRUPT_IN_MATCH_AT
+
+#endif
+
+#define STATE_CHECK_STRING_THRESHOLD_LEN 7
+#define STATE_CHECK_BUFF_MAX_SIZE 0x4000
+
+#define THREAD_PASS_LIMIT_COUNT 8
+#define xmemset memset
+#define xmemcpy memcpy
+#define xmemmove memmove
+
+#if defined(_WIN32) && !defined(__GNUC__)
+#define xalloca _alloca
+#define xvsnprintf _vsnprintf
+#else
+#define xalloca malloc
+#define xvsnprintf vsnprintf
+#endif
+
+
+#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
+#define ONIG_STATE_INC(reg) (reg)->state++
+#define ONIG_STATE_DEC(reg) (reg)->state--
+
+#define ONIG_STATE_INC_THREAD(reg) do {\
+ THREAD_ATOMIC_START;\
+ (reg)->state++;\
+ THREAD_ATOMIC_END;\
+} while(0)
+#define ONIG_STATE_DEC_THREAD(reg) do {\
+ THREAD_ATOMIC_START;\
+ (reg)->state--;\
+ THREAD_ATOMIC_END;\
+} while(0)
+#else
+#define ONIG_STATE_INC(reg) /* Nothing */
+#define ONIG_STATE_DEC(reg) /* Nothing */
+#define ONIG_STATE_INC_THREAD(reg) /* Nothing */
+#define ONIG_STATE_DEC_THREAD(reg) /* Nothing */
+#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
+
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+
+#if defined(HAVE_ALLOCA_H) && (defined(_AIX) || !defined(__GNUC__))
+#include <alloca.h>
+#endif
+
+#ifdef HAVE_STRING_H
+# include <string.h>
+#else
+# include <strings.h>
+#endif
+
+#include <ctype.h>
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+
+#ifdef ONIG_DEBUG
+# include <stdio.h>
+#endif
+
+#include "regenc.h"
+
+#define MIN(a,b) (((a)>(b))?(b):(a))
+#define MAX(a,b) (((a)<(b))?(b):(a))
+
+#define IS_NULL(p) (((void*)(p)) == (void*)0)
+#define IS_NOT_NULL(p) (((void*)(p)) != (void*)0)
+#define CHECK_NULL_RETURN(p) if (IS_NULL(p)) return NULL
+#define CHECK_NULL_RETURN_MEMERR(p) if (IS_NULL(p)) return ONIGERR_MEMORY
+#define NULL_UCHARP ((UChar* )0)
+
+#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
+
+#define PLATFORM_GET_INC(val,p,type) do{\
+ val = *(type* )p;\
+ (p) += sizeof(type);\
+} while(0)
+
+#else
+
+#define PLATFORM_GET_INC(val,p,type) do{\
+ xmemcpy(&val, (p), sizeof(type));\
+ (p) += sizeof(type);\
+} while(0)
+
+/* sizeof(OnigCodePoint) */
+#define WORD_ALIGNMENT_SIZE SIZEOF_LONG
+
+#define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\
+ (pad_size) = WORD_ALIGNMENT_SIZE \
+ - ((uintptr_t )(addr) % WORD_ALIGNMENT_SIZE);\
+ if ((pad_size) == WORD_ALIGNMENT_SIZE) (pad_size) = 0;\
+} while (0)
+
+#define ALIGNMENT_RIGHT(addr) do {\
+ (addr) += (WORD_ALIGNMENT_SIZE - 1);\
+ (addr) -= ((uintptr_t )(addr) % WORD_ALIGNMENT_SIZE);\
+} while (0)
+
+#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */
+
+/* stack pop level */
+#define STACK_POP_LEVEL_FREE 0
+#define STACK_POP_LEVEL_MEM_START 1
+#define STACK_POP_LEVEL_ALL 2
+
+/* optimize flags */
+#define ONIG_OPTIMIZE_NONE 0
+#define ONIG_OPTIMIZE_EXACT 1 /* Slow Search */
+#define ONIG_OPTIMIZE_EXACT_BM 2 /* Boyer Moore Search */
+#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV 3 /* BM (but not simple match) */
+#define ONIG_OPTIMIZE_EXACT_IC 4 /* Slow Search (ignore case) */
+#define ONIG_OPTIMIZE_MAP 5 /* char map */
+
+/* bit status */
+typedef unsigned int BitStatusType;
+
+#define BIT_STATUS_BITS_NUM (sizeof(BitStatusType) * 8)
+#define BIT_STATUS_CLEAR(stats) (stats) = 0
+#define BIT_STATUS_ON_ALL(stats) (stats) = ~((BitStatusType )0)
+#define BIT_STATUS_AT(stats,n) \
+ ((n) < (int )BIT_STATUS_BITS_NUM ? ((stats) & (1 << n)) : ((stats) & 1))
+
+#define BIT_STATUS_ON_AT(stats,n) do {\
+ if ((n) < (int )BIT_STATUS_BITS_NUM) \
+ (stats) |= (1 << (n));\
+ else\
+ (stats) |= 1;\
+} while (0)
+
+#define BIT_STATUS_ON_AT_SIMPLE(stats,n) do {\
+ if ((n) < (int )BIT_STATUS_BITS_NUM)\
+ (stats) |= (1 << (n));\
+} while (0)
+
+
+#define INT_MAX_LIMIT ((1UL << (sizeof(int) * 8 - 1)) - 1)
+
+#define DIGITVAL(code) ((code) - '0')
+#define ODIGITVAL(code) DIGITVAL(code)
+#define XDIGITVAL(enc,code) \
+ (ONIGENC_IS_CODE_DIGIT(enc,code) ? DIGITVAL(code) \
+ : (ONIGENC_IS_CODE_UPPER(enc,code) ? (code) - 'A' + 10 : (code) - 'a' + 10))
+
+#define IS_SINGLELINE(option) ((option) & ONIG_OPTION_SINGLELINE)
+#define IS_MULTILINE(option) ((option) & ONIG_OPTION_MULTILINE)
+#define IS_IGNORECASE(option) ((option) & ONIG_OPTION_IGNORECASE)
+#define IS_EXTEND(option) ((option) & ONIG_OPTION_EXTEND)
+#define IS_FIND_LONGEST(option) ((option) & ONIG_OPTION_FIND_LONGEST)
+#define IS_FIND_NOT_EMPTY(option) ((option) & ONIG_OPTION_FIND_NOT_EMPTY)
+#define IS_FIND_CONDITION(option) ((option) & \
+ (ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY))
+#define IS_NOTBOL(option) ((option) & ONIG_OPTION_NOTBOL)
+#define IS_NOTEOL(option) ((option) & ONIG_OPTION_NOTEOL)
+#define IS_POSIX_REGION(option) ((option) & ONIG_OPTION_POSIX_REGION)
+
+/* OP_SET_OPTION is required for these options.
+#define IS_DYNAMIC_OPTION(option) \
+ (((option) & (ONIG_OPTION_MULTILINE | ONIG_OPTION_IGNORECASE)) != 0)
+*/
+/* ignore-case and multibyte status are included in compiled code. */
+#define IS_DYNAMIC_OPTION(option) 0
+
+#define DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag) \
+ ((case_fold_flag) & ~INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR)
+
+#define REPEAT_INFINITE -1
+#define IS_REPEAT_INFINITE(n) ((n) == REPEAT_INFINITE)
+
+/* bitset */
+#define BITS_PER_BYTE 8
+#define SINGLE_BYTE_SIZE (1 << BITS_PER_BYTE)
+#define BITS_IN_ROOM (sizeof(Bits) * BITS_PER_BYTE)
+#define BITSET_SIZE (SINGLE_BYTE_SIZE / BITS_IN_ROOM)
+
+#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
+typedef unsigned int Bits;
+#else
+typedef unsigned char Bits;
+#endif
+typedef Bits BitSet[BITSET_SIZE];
+typedef Bits* BitSetRef;
+
+#define SIZE_BITSET (int)sizeof(BitSet)
+
+#define BITSET_CLEAR(bs) do {\
+ int i;\
+ for (i = 0; i < (int )BITSET_SIZE; i++) { (bs)[i] = 0; } \
+} while (0)
+
+#define BS_ROOM(bs,pos) (bs)[pos / BITS_IN_ROOM]
+#define BS_BIT(pos) (1 << (pos % BITS_IN_ROOM))
+
+#define BITSET_AT(bs, pos) (BS_ROOM(bs,pos) & BS_BIT(pos))
+#define BITSET_SET_BIT(bs, pos) BS_ROOM(bs,pos) |= BS_BIT(pos)
+#define BITSET_CLEAR_BIT(bs, pos) BS_ROOM(bs,pos) &= ~(BS_BIT(pos))
+#define BITSET_INVERT_BIT(bs, pos) BS_ROOM(bs,pos) ^= BS_BIT(pos)
+
+/* bytes buffer */
+typedef struct _BBuf {
+ UChar* p;
+ unsigned int used;
+ unsigned int alloc;
+} BBuf;
+
+#define BBUF_INIT(buf,size) onig_bbuf_init((BBuf* )(buf), (size))
+
+#define BBUF_SIZE_INC(buf,inc) do{\
+ (buf)->alloc += (inc);\
+ (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\
+ if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
+} while (0)
+
+#define BBUF_EXPAND(buf,low) do{\
+ do { (buf)->alloc *= 2; } while ((buf)->alloc < (unsigned int )low);\
+ (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\
+ if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
+} while (0)
+
+#define BBUF_ENSURE_SIZE(buf,size) do{\
+ unsigned int new_alloc = (buf)->alloc;\
+ while (new_alloc < (unsigned int )(size)) { new_alloc *= 2; }\
+ if ((buf)->alloc != new_alloc) {\
+ (buf)->p = (UChar* )xrealloc((buf)->p, new_alloc);\
+ if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
+ (buf)->alloc = new_alloc;\
+ }\
+} while (0)
+
+#define BBUF_WRITE(buf,pos,bytes,n) do{\
+ int used = (pos) + (n);\
+ if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\
+ xmemcpy((buf)->p + (pos), (bytes), (n));\
+ if ((buf)->used < (unsigned int )used) (buf)->used = used;\
+} while (0)
+
+#define BBUF_WRITE1(buf,pos,byte) do{\
+ int used = (pos) + 1;\
+ if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\
+ (buf)->p[(pos)] = (byte);\
+ if ((buf)->used < (unsigned int )used) (buf)->used = used;\
+} while (0)
+
+#define BBUF_ADD(buf,bytes,n) BBUF_WRITE((buf),(buf)->used,(bytes),(n))
+#define BBUF_ADD1(buf,byte) BBUF_WRITE1((buf),(buf)->used,(byte))
+#define BBUF_GET_ADD_ADDRESS(buf) ((buf)->p + (buf)->used)
+#define BBUF_GET_OFFSET_POS(buf) ((buf)->used)
+
+/* from < to */
+#define BBUF_MOVE_RIGHT(buf,from,to,n) do {\
+ if ((unsigned int )((to)+(n)) > (buf)->alloc) BBUF_EXPAND((buf),(to) + (n));\
+ xmemmove((buf)->p + (to), (buf)->p + (from), (n));\
+ if ((unsigned int )((to)+(n)) > (buf)->used) (buf)->used = (to) + (n);\
+} while (0)
+
+/* from > to */
+#define BBUF_MOVE_LEFT(buf,from,to,n) do {\
+ xmemmove((buf)->p + (to), (buf)->p + (from), (n));\
+} while (0)
+
+/* from > to */
+#define BBUF_MOVE_LEFT_REDUCE(buf,from,to) do {\
+ xmemmove((buf)->p + (to), (buf)->p + (from), (buf)->used - (from));\
+ (buf)->used -= (from - to);\
+} while (0)
+
+#define BBUF_INSERT(buf,pos,bytes,n) do {\
+ if (pos >= (buf)->used) {\
+ BBUF_WRITE(buf,pos,bytes,n);\
+ }\
+ else {\
+ BBUF_MOVE_RIGHT((buf),(pos),(pos) + (n),((buf)->used - (pos)));\
+ xmemcpy((buf)->p + (pos), (bytes), (n));\
+ }\
+} while (0)
+
+#define BBUF_GET_BYTE(buf, pos) (buf)->p[(pos)]
+
+
+#define ANCHOR_BEGIN_BUF (1<<0)
+#define ANCHOR_BEGIN_LINE (1<<1)
+#define ANCHOR_BEGIN_POSITION (1<<2)
+#define ANCHOR_END_BUF (1<<3)
+#define ANCHOR_SEMI_END_BUF (1<<4)
+#define ANCHOR_END_LINE (1<<5)
+
+#define ANCHOR_WORD_BOUND (1<<6)
+#define ANCHOR_NOT_WORD_BOUND (1<<7)
+#define ANCHOR_WORD_BEGIN (1<<8)
+#define ANCHOR_WORD_END (1<<9)
+#define ANCHOR_PREC_READ (1<<10)
+#define ANCHOR_PREC_READ_NOT (1<<11)
+#define ANCHOR_LOOK_BEHIND (1<<12)
+#define ANCHOR_LOOK_BEHIND_NOT (1<<13)
+
+#define ANCHOR_ANYCHAR_STAR (1<<14) /* ".*" optimize info */
+#define ANCHOR_ANYCHAR_STAR_ML (1<<15) /* ".*" optimize info (multi-line) */
+
+/* operation code */
+enum OpCode {
+ OP_FINISH = 0, /* matching process terminator (no more alternative) */
+ OP_END = 1, /* pattern code terminator (success end) */
+
+ OP_EXACT1 = 2, /* single byte, N = 1 */
+ OP_EXACT2, /* single byte, N = 2 */
+ OP_EXACT3, /* single byte, N = 3 */
+ OP_EXACT4, /* single byte, N = 4 */
+ OP_EXACT5, /* single byte, N = 5 */
+ OP_EXACTN, /* single byte */
+ OP_EXACTMB2N1, /* mb-length = 2 N = 1 */
+ OP_EXACTMB2N2, /* mb-length = 2 N = 2 */
+ OP_EXACTMB2N3, /* mb-length = 2 N = 3 */
+ OP_EXACTMB2N, /* mb-length = 2 */
+ OP_EXACTMB3N, /* mb-length = 3 */
+ OP_EXACTMBN, /* other length */
+
+ OP_EXACT1_IC, /* single byte, N = 1, ignore case */
+ OP_EXACTN_IC, /* single byte, ignore case */
+
+ OP_CCLASS,
+ OP_CCLASS_MB,
+ OP_CCLASS_MIX,
+ OP_CCLASS_NOT,
+ OP_CCLASS_MB_NOT,
+ OP_CCLASS_MIX_NOT,
+ OP_CCLASS_NODE, /* pointer to CClassNode node */
+
+ OP_ANYCHAR, /* "." */
+ OP_ANYCHAR_ML, /* "." multi-line */
+ OP_ANYCHAR_STAR, /* ".*" */
+ OP_ANYCHAR_ML_STAR, /* ".*" multi-line */
+ OP_ANYCHAR_STAR_PEEK_NEXT,
+ OP_ANYCHAR_ML_STAR_PEEK_NEXT,
+
+ OP_WORD,
+ OP_NOT_WORD,
+ OP_WORD_BOUND,
+ OP_NOT_WORD_BOUND,
+ OP_WORD_BEGIN,
+ OP_WORD_END,
+
+ OP_BEGIN_BUF,
+ OP_END_BUF,
+ OP_BEGIN_LINE,
+ OP_END_LINE,
+ OP_SEMI_END_BUF,
+ OP_BEGIN_POSITION,
+
+ OP_BACKREF1,
+ OP_BACKREF2,
+ OP_BACKREFN,
+ OP_BACKREFN_IC,
+ OP_BACKREF_MULTI,
+ OP_BACKREF_MULTI_IC,
+ OP_BACKREF_WITH_LEVEL, /* \k<xxx+n>, \k<xxx-n> */
+
+ OP_MEMORY_START,
+ OP_MEMORY_START_PUSH, /* push back-tracker to stack */
+ OP_MEMORY_END_PUSH, /* push back-tracker to stack */
+ OP_MEMORY_END_PUSH_REC, /* push back-tracker to stack */
+ OP_MEMORY_END,
+ OP_MEMORY_END_REC, /* push marker to stack */
+
+ OP_FAIL, /* pop stack and move */
+ OP_JUMP,
+ OP_PUSH,
+ OP_POP,
+ OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */
+ OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */
+ OP_REPEAT, /* {n,m} */
+ OP_REPEAT_NG, /* {n,m}? (non greedy) */
+ OP_REPEAT_INC,
+ OP_REPEAT_INC_NG, /* non greedy */
+ OP_REPEAT_INC_SG, /* search and get in stack */
+ OP_REPEAT_INC_NG_SG, /* search and get in stack (non greedy) */
+ OP_NULL_CHECK_START, /* null loop checker start */
+ OP_NULL_CHECK_END, /* null loop checker end */
+ OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */
+ OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */
+
+ OP_PUSH_POS, /* (?=...) start */
+ OP_POP_POS, /* (?=...) end */
+ OP_PUSH_POS_NOT, /* (?!...) start */
+ OP_FAIL_POS, /* (?!...) end */
+ OP_PUSH_STOP_BT, /* (?>...) start */
+ OP_POP_STOP_BT, /* (?>...) end */
+ OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */
+ OP_PUSH_LOOK_BEHIND_NOT, /* (?<!...) start */
+ OP_FAIL_LOOK_BEHIND_NOT, /* (?<!...) end */
+
+ OP_CALL, /* \g<name> */
+ OP_RETURN,
+
+ OP_STATE_CHECK_PUSH, /* combination explosion check and push */
+ OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */
+ OP_STATE_CHECK, /* check only */
+ OP_STATE_CHECK_ANYCHAR_STAR,
+ OP_STATE_CHECK_ANYCHAR_ML_STAR,
+
+ /* no need: IS_DYNAMIC_OPTION() == 0 */
+ OP_SET_OPTION_PUSH, /* set option and push recover option */
+ OP_SET_OPTION /* set option */
+};
+
+typedef int RelAddrType;
+typedef int AbsAddrType;
+typedef int LengthType;
+typedef int RepeatNumType;
+typedef short int MemNumType;
+typedef short int StateCheckNumType;
+typedef void* PointerType;
+
+#define SIZE_OPCODE 1
+#define SIZE_RELADDR (int)sizeof(RelAddrType)
+#define SIZE_ABSADDR (int)sizeof(AbsAddrType)
+#define SIZE_LENGTH (int)sizeof(LengthType)
+#define SIZE_MEMNUM (int)sizeof(MemNumType)
+#define SIZE_STATE_CHECK_NUM (int)sizeof(StateCheckNumType)
+#define SIZE_REPEATNUM (int)sizeof(RepeatNumType)
+#define SIZE_OPTION (int)sizeof(OnigOptionType)
+#define SIZE_CODE_POINT (int)sizeof(OnigCodePoint)
+#define SIZE_POINTER (int)sizeof(PointerType)
+
+
+#define GET_RELADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, RelAddrType)
+#define GET_ABSADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, AbsAddrType)
+#define GET_LENGTH_INC(len,p) PLATFORM_GET_INC(len, p, LengthType)
+#define GET_MEMNUM_INC(num,p) PLATFORM_GET_INC(num, p, MemNumType)
+#define GET_REPEATNUM_INC(num,p) PLATFORM_GET_INC(num, p, RepeatNumType)
+#define GET_OPTION_INC(option,p) PLATFORM_GET_INC(option, p, OnigOptionType)
+#define GET_POINTER_INC(ptr,p) PLATFORM_GET_INC(ptr, p, PointerType)
+#define GET_STATE_CHECK_NUM_INC(num,p) PLATFORM_GET_INC(num, p, StateCheckNumType)
+
+/* code point's address must be aligned address. */
+#define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p))
+#define GET_BYTE_INC(byte,p) do{\
+ byte = *(p);\
+ (p)++;\
+} while(0)
+
+
+/* op-code + arg size */
+#define SIZE_OP_ANYCHAR_STAR SIZE_OPCODE
+#define SIZE_OP_ANYCHAR_STAR_PEEK_NEXT (SIZE_OPCODE + 1)
+#define SIZE_OP_JUMP (SIZE_OPCODE + SIZE_RELADDR)
+#define SIZE_OP_PUSH (SIZE_OPCODE + SIZE_RELADDR)
+#define SIZE_OP_POP SIZE_OPCODE
+#define SIZE_OP_PUSH_OR_JUMP_EXACT1 (SIZE_OPCODE + SIZE_RELADDR + 1)
+#define SIZE_OP_PUSH_IF_PEEK_NEXT (SIZE_OPCODE + SIZE_RELADDR + 1)
+#define SIZE_OP_REPEAT_INC (SIZE_OPCODE + SIZE_MEMNUM)
+#define SIZE_OP_REPEAT_INC_NG (SIZE_OPCODE + SIZE_MEMNUM)
+#define SIZE_OP_PUSH_POS SIZE_OPCODE
+#define SIZE_OP_PUSH_POS_NOT (SIZE_OPCODE + SIZE_RELADDR)
+#define SIZE_OP_POP_POS SIZE_OPCODE
+#define SIZE_OP_FAIL_POS SIZE_OPCODE
+#define SIZE_OP_SET_OPTION (SIZE_OPCODE + SIZE_OPTION)
+#define SIZE_OP_SET_OPTION_PUSH (SIZE_OPCODE + SIZE_OPTION)
+#define SIZE_OP_FAIL SIZE_OPCODE
+#define SIZE_OP_MEMORY_START (SIZE_OPCODE + SIZE_MEMNUM)
+#define SIZE_OP_MEMORY_START_PUSH (SIZE_OPCODE + SIZE_MEMNUM)
+#define SIZE_OP_MEMORY_END_PUSH (SIZE_OPCODE + SIZE_MEMNUM)
+#define SIZE_OP_MEMORY_END_PUSH_REC (SIZE_OPCODE + SIZE_MEMNUM)
+#define SIZE_OP_MEMORY_END (SIZE_OPCODE + SIZE_MEMNUM)
+#define SIZE_OP_MEMORY_END_REC (SIZE_OPCODE + SIZE_MEMNUM)
+#define SIZE_OP_PUSH_STOP_BT SIZE_OPCODE
+#define SIZE_OP_POP_STOP_BT SIZE_OPCODE
+#define SIZE_OP_NULL_CHECK_START (SIZE_OPCODE + SIZE_MEMNUM)
+#define SIZE_OP_NULL_CHECK_END (SIZE_OPCODE + SIZE_MEMNUM)
+#define SIZE_OP_LOOK_BEHIND (SIZE_OPCODE + SIZE_LENGTH)
+#define SIZE_OP_PUSH_LOOK_BEHIND_NOT (SIZE_OPCODE + SIZE_RELADDR + SIZE_LENGTH)
+#define SIZE_OP_FAIL_LOOK_BEHIND_NOT SIZE_OPCODE
+#define SIZE_OP_CALL (SIZE_OPCODE + SIZE_ABSADDR)
+#define SIZE_OP_RETURN SIZE_OPCODE
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+#define SIZE_OP_STATE_CHECK (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
+#define SIZE_OP_STATE_CHECK_PUSH (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR)
+#define SIZE_OP_STATE_CHECK_PUSH_OR_JUMP (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR)
+#define SIZE_OP_STATE_CHECK_ANYCHAR_STAR (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
+#endif
+
+#define MC_ESC(syn) (syn)->meta_char_table.esc
+#define MC_ANYCHAR(syn) (syn)->meta_char_table.anychar
+#define MC_ANYTIME(syn) (syn)->meta_char_table.anytime
+#define MC_ZERO_OR_ONE_TIME(syn) (syn)->meta_char_table.zero_or_one_time
+#define MC_ONE_OR_MORE_TIME(syn) (syn)->meta_char_table.one_or_more_time
+#define MC_ANYCHAR_ANYTIME(syn) (syn)->meta_char_table.anychar_anytime
+
+#define IS_MC_ESC_CODE(code, syn) \
+ ((code) == MC_ESC(syn) && \
+ !IS_SYNTAX_OP2((syn), ONIG_SYN_OP2_INEFFECTIVE_ESCAPE))
+
+
+#define SYN_POSIX_COMMON_OP \
+ ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_POSIX_BRACKET | \
+ ONIG_SYN_OP_DECIMAL_BACKREF | \
+ ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_ASTERISK_ZERO_INF | \
+ ONIG_SYN_OP_LINE_ANCHOR | \
+ ONIG_SYN_OP_ESC_CONTROL_CHARS )
+
+#define SYN_GNU_REGEX_OP \
+ ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | \
+ ONIG_SYN_OP_POSIX_BRACKET | ONIG_SYN_OP_DECIMAL_BACKREF | \
+ ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_LPAREN_SUBEXP | \
+ ONIG_SYN_OP_VBAR_ALT | \
+ ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF | \
+ ONIG_SYN_OP_QMARK_ZERO_ONE | \
+ ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR | ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR | \
+ ONIG_SYN_OP_ESC_W_WORD | \
+ ONIG_SYN_OP_ESC_B_WORD_BOUND | ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | \
+ ONIG_SYN_OP_ESC_S_WHITE_SPACE | ONIG_SYN_OP_ESC_D_DIGIT | \
+ ONIG_SYN_OP_LINE_ANCHOR )
+
+#define SYN_GNU_REGEX_BV \
+ ( ONIG_SYN_CONTEXT_INDEP_ANCHORS | ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | \
+ ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | ONIG_SYN_ALLOW_INVALID_INTERVAL | \
+ ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
+
+
+#define NCCLASS_FLAGS(cc) ((cc)->flags)
+#define NCCLASS_FLAG_SET(cc,flag) (NCCLASS_FLAGS(cc) |= (flag))
+#define NCCLASS_FLAG_CLEAR(cc,flag) (NCCLASS_FLAGS(cc) &= ~(flag))
+#define IS_NCCLASS_FLAG_ON(cc,flag) ((NCCLASS_FLAGS(cc) & (flag)) != 0)
+
+/* cclass node */
+#define FLAG_NCCLASS_NOT (1<<0)
+#define FLAG_NCCLASS_SHARE (1<<1)
+
+#define NCCLASS_SET_NOT(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_NOT)
+#define NCCLASS_SET_SHARE(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_SHARE)
+#define NCCLASS_CLEAR_NOT(nd) NCCLASS_FLAG_CLEAR(nd, FLAG_NCCLASS_NOT)
+#define IS_NCCLASS_NOT(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_NOT)
+#define IS_NCCLASS_SHARE(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_SHARE)
+
+typedef struct {
+ int type;
+ /* struct _Node* next; */
+ /* unsigned int flags; */
+} NodeBase;
+
+typedef struct {
+ NodeBase base;
+ unsigned int flags;
+ BitSet bs;
+ BBuf* mbuf; /* multi-byte info or NULL */
+} CClassNode;
+
+typedef intptr_t OnigStackIndex;
+
+typedef struct _OnigStackType {
+ unsigned int type;
+ union {
+ struct {
+ UChar *pcode; /* byte code position */
+ UChar *pstr; /* string position */
+ UChar *pstr_prev; /* previous char position of pstr */
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ unsigned int state_check;
+#endif
+ } state;
+ struct {
+ int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
+ UChar *pcode; /* byte code position (head of repeated target) */
+ int num; /* repeat id */
+ } repeat;
+ struct {
+ OnigStackIndex si; /* index of stack */
+ } repeat_inc;
+ struct {
+ int num; /* memory num */
+ UChar *pstr; /* start/end position */
+ /* Following information is setted, if this stack type is MEM-START */
+ OnigStackIndex start; /* prev. info (for backtrack "(...)*" ) */
+ OnigStackIndex end; /* prev. info (for backtrack "(...)*" ) */
+ } mem;
+ struct {
+ int num; /* null check id */
+ UChar *pstr; /* start position */
+ } null_check;
+#ifdef USE_SUBEXP_CALL
+ struct {
+ UChar *ret_addr; /* byte code position */
+ int num; /* null check id */
+ UChar *pstr; /* string position */
+ } call_frame;
+#endif
+ } u;
+} OnigStackType;
+
+typedef struct {
+ void* stack_p;
+ size_t stack_n;
+ OnigOptionType options;
+ OnigRegion* region;
+ const UChar* start; /* search start position (for \G: BEGIN_POSITION) */
+#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+ int best_len; /* for ONIG_OPTION_FIND_LONGEST */
+ UChar* best_s;
+#endif
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ void* state_check_buff;
+ int state_check_buff_size;
+#endif
+} OnigMatchArg;
+
+
+#define IS_CODE_SB_WORD(enc,code) \
+ (ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_WORD(enc,code))
+
+#ifdef ONIG_DEBUG
+
+typedef struct {
+ short int opcode;
+ char* name;
+ short int arg_type;
+} OnigOpInfoType;
+
+extern OnigOpInfoType OnigOpInfo[];
+
+/* extern void onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp, OnigEncoding enc); */
+
+#ifdef ONIG_DEBUG_STATISTICS
+extern void onig_statistics_init(void);
+extern void onig_print_statistics(FILE* f);
+#endif
+#endif
+
+extern UChar* onig_error_code_to_format(int code);
+extern void onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, ...);
+extern int onig_bbuf_init(BBuf* buf, int size);
+extern int onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo, const char *sourcefile, int sourceline);
+extern void onig_chain_reduce(regex_t* reg);
+extern void onig_chain_link_add(regex_t* to, regex_t* add);
+extern void onig_transfer(regex_t* to, regex_t* from);
+extern int onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc);
+extern int onig_is_code_in_cc_len(int enclen, OnigCodePoint code, CClassNode* cc);
+
+/* strend hash */
+typedef void hash_table_type;
+#ifdef RUBY
+#include "st.h"
+
+typedef st_data_t hash_data_type;
+#else
+typedef unsigned long hash_data_type;
+#endif
+
+extern hash_table_type* onig_st_init_strend_table_with_size(st_index_t size);
+extern int onig_st_lookup_strend(hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type *value);
+extern int onig_st_insert_strend(hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type value);
+
+/* encoding property management */
+#define PROPERTY_LIST_ADD_PROP(Name, CR) \
+ r = onigenc_property_list_add_property((UChar* )Name, CR,\
+ &PropertyNameTable, &PropertyList, &PropertyListNum,\
+ &PropertyListSize);\
+ if (r != 0) goto end
+
+#define PROPERTY_LIST_INIT_CHECK \
+ if (PropertyInited == 0) {\
+ int r = onigenc_property_list_init(init_property_list);\
+ if (r != 0) return r;\
+ }
+
+extern int onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop, hash_table_type **table, const OnigCodePoint*** plist, int *pnum, int *psize);
+
+typedef int (*ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE)(void);
+
+extern int onigenc_property_list_init(ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE);
+
+#endif /* ONIGURUMA_REGINT_H */
diff --git a/src/regparse.c b/src/regparse.c
new file mode 100644
index 000000000..c07d5c35e
--- /dev/null
+++ b/src/regparse.c
@@ -0,0 +1,5600 @@
+/* -*- mode:c; c-file-style:"gnu" -*- */
+/**********************************************************************
+ regparse.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "mruby.h"
+#include <string.h>
+#include "regparse.h"
+#include <stdarg.h>
+#ifdef INCLUDE_REGEXP
+
+#define WARN_BUFSIZE 256
+
+#define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
+
+
+const OnigSyntaxType OnigSyntaxRuby = {
+ (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
+ ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
+ ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
+ ONIG_SYN_OP_ESC_C_CONTROL )
+ & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
+ , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
+ ONIG_SYN_OP2_OPTION_RUBY |
+ ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
+ ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
+ ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
+ ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
+ ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
+ ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
+ ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |
+ ONIG_SYN_OP2_ESC_H_XDIGIT )
+ , ( SYN_GNU_REGEX_BV |
+ ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |
+ ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |
+ ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
+ ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
+ ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
+ ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
+ ONIG_SYN_WARN_CC_DUP |
+ ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
+ , ONIG_OPTION_NONE
+ ,
+ {
+ (OnigCodePoint )'\\' /* esc */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+ }
+};
+
+const OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_RUBY;
+
+extern void onig_null_warn(const char* s ARG_UNUSED) { }
+
+#ifdef DEFAULT_WARN_FUNCTION
+static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;
+#else
+static OnigWarnFunc onig_warn = onig_null_warn;
+#endif
+
+#ifdef DEFAULT_VERB_WARN_FUNCTION
+static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION;
+#else
+static OnigWarnFunc onig_verb_warn = onig_null_warn;
+#endif
+
+extern void onig_set_warn_func(OnigWarnFunc f)
+{
+ onig_warn = f;
+}
+
+extern void onig_set_verb_warn_func(OnigWarnFunc f)
+{
+ onig_verb_warn = f;
+}
+
+static void CC_DUP_WARN(ScanEnv *env);
+
+static void
+bbuf_free(BBuf* bbuf)
+{
+ if (IS_NOT_NULL(bbuf)) {
+ if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p);
+ xfree(bbuf);
+ }
+}
+
+static int
+bbuf_clone(BBuf** rto, BBuf* from)
+{
+ int r;
+ BBuf *to;
+
+ *rto = to = (BBuf* )xmalloc(sizeof(BBuf));
+ CHECK_NULL_RETURN_MEMERR(to);
+ r = BBUF_INIT(to, from->alloc);
+ if (r != 0) return r;
+ to->used = from->used;
+ xmemcpy(to->p, from->p, from->used);
+ return 0;
+}
+
+#define BACKREF_REL_TO_ABS(rel_no, env) \
+ ((env)->num_mem + 1 + (rel_no))
+
+#define ONOFF(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f))
+
+#define MBCODE_START_POS(enc) \
+ (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)
+
+#define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \
+ add_code_range_to_buf(pbuf, env, MBCODE_START_POS(enc), ~((OnigCodePoint )0))
+
+#define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\
+ if (! ONIGENC_IS_SINGLEBYTE(enc)) {\
+ r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\
+ if (r) return r;\
+ }\
+} while (0)
+
+
+#define BITSET_SET_BIT_CHKDUP(bs, pos) do { \
+ if (BITSET_AT(bs, pos)) CC_DUP_WARN(env); \
+ BS_ROOM(bs, pos) |= BS_BIT(pos); \
+} while (0)
+
+#define BITSET_IS_EMPTY(bs,empty) do {\
+ int i;\
+ empty = 1;\
+ for (i = 0; i < (int )BITSET_SIZE; i++) {\
+ if ((bs)[i] != 0) {\
+ empty = 0; break;\
+ }\
+ }\
+} while (0)
+
+static void
+bitset_set_range(ScanEnv *env, BitSetRef bs, int from, int to)
+{
+ int i;
+ for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) {
+ BITSET_SET_BIT_CHKDUP(bs, i);
+ }
+}
+
+static void
+bitset_invert(BitSetRef bs)
+{
+ int i;
+ for (i = 0; i < (int )BITSET_SIZE; i++) { bs[i] = ~(bs[i]); }
+}
+
+static void
+bitset_invert_to(BitSetRef from, BitSetRef to)
+{
+ int i;
+ for (i = 0; i < (int )BITSET_SIZE; i++) { to[i] = ~(from[i]); }
+}
+
+static void
+bitset_and(BitSetRef dest, BitSetRef bs)
+{
+ int i;
+ for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] &= bs[i]; }
+}
+
+static void
+bitset_or(BitSetRef dest, BitSetRef bs)
+{
+ int i;
+ for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] |= bs[i]; }
+}
+
+static void
+bitset_copy(BitSetRef dest, BitSetRef bs)
+{
+ int i;
+ for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] = bs[i]; }
+}
+
+extern int
+onig_strncmp(const UChar* s1, const UChar* s2, int n)
+{
+ int x;
+
+ while (n-- > 0) {
+ x = *s2++ - *s1++;
+ if (x) return x;
+ }
+ return 0;
+}
+
+extern void
+onig_strcpy(UChar* dest, const UChar* src, const UChar* end)
+{
+ ptrdiff_t len = end - src;
+ if (len > 0) {
+ xmemcpy(dest, src, len);
+ dest[len] = (UChar )0;
+ }
+}
+
+#ifdef USE_NAMED_GROUP
+static UChar*
+strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)
+{
+ ptrdiff_t slen;
+ int term_len, i;
+ UChar *r;
+
+ slen = end - s;
+ term_len = ONIGENC_MBC_MINLEN(enc);
+
+ r = (UChar* )xmalloc(slen + term_len);
+ CHECK_NULL_RETURN(r);
+ xmemcpy(r, s, slen);
+
+ for (i = 0; i < term_len; i++)
+ r[slen + i] = (UChar )0;
+
+ return r;
+}
+#endif
+
+/* scan pattern methods */
+#define PEND_VALUE 0
+
+#define PFETCH_READY UChar* pfetch_prev
+#define PEND (p < end ? 0 : 1)
+#define PUNFETCH p = pfetch_prev
+#define PINC do { \
+ pfetch_prev = p; \
+ p += enclen(enc, p, end); \
+} while (0)
+#define PFETCH(c) do { \
+ c = ((enc->max_enc_len == 1) ? *p : ONIGENC_MBC_TO_CODE(enc, p, end)); \
+ pfetch_prev = p; \
+ p += enclen(enc, p, end); \
+} while (0)
+
+#define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)
+#define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c)
+
+static UChar*
+strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,
+ int capa)
+{
+ UChar* r;
+
+ if (dest)
+ r = (UChar* )xrealloc(dest, capa + 1);
+ else
+ r = (UChar* )xmalloc(capa + 1);
+
+ CHECK_NULL_RETURN(r);
+ onig_strcpy(r + (dest_end - dest), src, src_end);
+ return r;
+}
+
+/* dest on static area */
+static UChar*
+strcat_capa_from_static(UChar* dest, UChar* dest_end,
+ const UChar* src, const UChar* src_end, int capa)
+{
+ UChar* r;
+
+ r = (UChar* )xmalloc(capa + 1);
+ CHECK_NULL_RETURN(r);
+ onig_strcpy(r, dest, dest_end);
+ onig_strcpy(r + (dest_end - dest), src, src_end);
+ return r;
+}
+#endif //INCLUDE_REGEXP
+
+#ifdef INCLUDE_ENCODING
+#ifdef USE_ST_LIBRARY
+
+//#include "st.h"
+
+typedef struct {
+ const UChar* s;
+ const UChar* end;
+} st_str_end_key;
+
+static int
+str_end_cmp(st_data_t xp, st_data_t yp)
+{
+ const st_str_end_key *x, *y;
+ const UChar *p, *q;
+ int c;
+
+ x = (const st_str_end_key *)xp;
+ y = (const st_str_end_key *)yp;
+ if ((x->end - x->s) != (y->end - y->s))
+ return 1;
+
+ p = x->s;
+ q = y->s;
+ while (p < x->end) {
+ c = (int )*p - (int )*q;
+ if (c != 0) return c;
+
+ p++; q++;
+ }
+
+ return 0;
+}
+
+static st_index_t
+str_end_hash(st_data_t xp)
+{
+ const st_str_end_key *x = (const st_str_end_key *)xp;
+ const UChar *p;
+ st_index_t val = 0;
+
+ p = x->s;
+ while (p < x->end) {
+ val = val * 997 + (int )*p++;
+ }
+
+ return val + (val >> 5);
+}
+
+extern hash_table_type*
+onig_st_init_strend_table_with_size(st_index_t size)
+{
+ static const struct st_hash_type hashType = {
+ str_end_cmp,
+ str_end_hash,
+ };
+
+ return (hash_table_type* )
+ onig_st_init_table_with_size(&hashType, size);
+}
+
+extern int
+onig_st_lookup_strend(hash_table_type* table, const UChar* str_key,
+ const UChar* end_key, hash_data_type *value)
+{
+ st_str_end_key key;
+
+ key.s = (UChar* )str_key;
+ key.end = (UChar* )end_key;
+
+ return onig_st_lookup(table, (st_data_t )(&key), value);
+}
+
+extern int
+onig_st_insert_strend(hash_table_type* table, const UChar* str_key,
+ const UChar* end_key, hash_data_type value)
+{
+ st_str_end_key* key;
+ int result;
+
+ key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key));
+ key->s = (UChar* )str_key;
+ key->end = (UChar* )end_key;
+ result = onig_st_insert(table, (st_data_t )key, value);
+ if (result) {
+ xfree(key);
+ }
+ return result;
+}
+
+#endif /* USE_ST_LIBRARY */
+#endif //INCLUDE_ENCODING
+
+#ifdef INCLUDE_REGEXP
+#ifdef USE_NAMED_GROUP
+
+#define INIT_NAME_BACKREFS_ALLOC_NUM 8
+
+typedef struct {
+ UChar* name;
+ size_t name_len; /* byte length */
+ int back_num; /* number of backrefs */
+ int back_alloc;
+ int back_ref1;
+ int* back_refs;
+} NameEntry;
+
+#ifdef USE_ST_LIBRARY
+
+typedef st_table NameTable;
+typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */
+
+#define NAMEBUF_SIZE 24
+#define NAMEBUF_SIZE_1 25
+
+#ifdef ONIG_DEBUG
+static int
+i_print_name_entry(UChar* key, NameEntry* e, void* arg)
+{
+ int i;
+ FILE* fp = (FILE* )arg;
+
+ fprintf(fp, "%s: ", e->name);
+ if (e->back_num == 0)
+ fputs("-", fp);
+ else if (e->back_num == 1)
+ fprintf(fp, "%d", e->back_ref1);
+ else {
+ for (i = 0; i < e->back_num; i++) {
+ if (i > 0) fprintf(fp, ", ");
+ fprintf(fp, "%d", e->back_refs[i]);
+ }
+ }
+ fputs("\n", fp);
+ return ST_CONTINUE;
+}
+
+extern int
+onig_print_names(FILE* fp, regex_t* reg)
+{
+ NameTable* t = (NameTable* )reg->name_table;
+
+ if (IS_NOT_NULL(t)) {
+ fprintf(fp, "name table\n");
+ onig_st_foreach(t, i_print_name_entry, (HashDataType )fp);
+ fputs("\n", fp);
+ }
+ return 0;
+}
+#endif /* ONIG_DEBUG */
+
+static int
+i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED)
+{
+ xfree(e->name);
+ if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
+ xfree(key);
+ xfree(e);
+ return ST_DELETE;
+}
+
+static int
+names_clear(regex_t* reg)
+{
+ NameTable* t = (NameTable* )reg->name_table;
+
+ if (IS_NOT_NULL(t)) {
+ onig_st_foreach(t, i_free_name_entry, 0);
+ }
+ return 0;
+}
+
+extern int
+onig_names_free(regex_t* reg)
+{
+ int r;
+ NameTable* t;
+
+ r = names_clear(reg);
+ if (r) return r;
+
+ t = (NameTable* )reg->name_table;
+ if (IS_NOT_NULL(t)) onig_st_free_table(t);
+ reg->name_table = (void* )NULL;
+ return 0;
+}
+
+static NameEntry*
+name_find(regex_t* reg, const UChar* name, const UChar* name_end)
+{
+ NameEntry* e;
+ NameTable* t = (NameTable* )reg->name_table;
+
+ e = (NameEntry* )NULL;
+ if (IS_NOT_NULL(t)) {
+ onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
+ }
+ return e;
+}
+
+typedef struct {
+ int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*);
+ regex_t* reg;
+ void* arg;
+ int ret;
+ OnigEncoding enc;
+} INamesArg;
+
+static int
+i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg)
+{
+ int r = (*(arg->func))(e->name,
+ e->name + e->name_len,
+ e->back_num,
+ (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
+ arg->reg, arg->arg);
+ if (r != 0) {
+ arg->ret = r;
+ return ST_STOP;
+ }
+ return ST_CONTINUE;
+}
+
+extern int
+onig_foreach_name(regex_t* reg,
+ int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
+{
+ INamesArg narg;
+ NameTable* t = (NameTable* )reg->name_table;
+
+ narg.ret = 0;
+ if (IS_NOT_NULL(t)) {
+ narg.func = func;
+ narg.reg = reg;
+ narg.arg = arg;
+ narg.enc = reg->enc; /* should be pattern encoding. */
+ onig_st_foreach(t, i_names, (HashDataType )&narg);
+ }
+ return narg.ret;
+}
+
+static int
+i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map)
+{
+ int i;
+
+ if (e->back_num > 1) {
+ for (i = 0; i < e->back_num; i++) {
+ e->back_refs[i] = map[e->back_refs[i]].new_val;
+ }
+ }
+ else if (e->back_num == 1) {
+ e->back_ref1 = map[e->back_ref1].new_val;
+ }
+
+ return ST_CONTINUE;
+}
+
+extern int
+onig_renumber_name_table(regex_t* reg, GroupNumRemap* map)
+{
+ NameTable* t = (NameTable* )reg->name_table;
+
+ if (IS_NOT_NULL(t)) {
+ onig_st_foreach(t, i_renumber_name, (HashDataType )map);
+ }
+ return 0;
+}
+
+
+extern int
+onig_number_of_names(regex_t* reg)
+{
+ NameTable* t = (NameTable* )reg->name_table;
+
+ if (IS_NOT_NULL(t))
+ return t->num_entries;
+ else
+ return 0;
+}
+
+#else /* USE_ST_LIBRARY */
+
+#define INIT_NAMES_ALLOC_NUM 8
+
+typedef struct {
+ NameEntry* e;
+ int num;
+ int alloc;
+} NameTable;
+
+#ifdef ONIG_DEBUG
+extern int
+onig_print_names(FILE* fp, regex_t* reg)
+{
+ int i, j;
+ NameEntry* e;
+ NameTable* t = (NameTable* )reg->name_table;
+
+ if (IS_NOT_NULL(t) && t->num > 0) {
+ fprintf(fp, "name table\n");
+ for (i = 0; i < t->num; i++) {
+ e = &(t->e[i]);
+ fprintf(fp, "%s: ", e->name);
+ if (e->back_num == 0) {
+ fputs("-", fp);
+ }
+ else if (e->back_num == 1) {
+ fprintf(fp, "%d", e->back_ref1);
+ }
+ else {
+ for (j = 0; j < e->back_num; j++) {
+ if (j > 0) fprintf(fp, ", ");
+ fprintf(fp, "%d", e->back_refs[j]);
+ }
+ }
+ fputs("\n", fp);
+ }
+ fputs("\n", fp);
+ }
+ return 0;
+}
+#endif
+
+static int
+names_clear(regex_t* reg)
+{
+ int i;
+ NameEntry* e;
+ NameTable* t = (NameTable* )reg->name_table;
+
+ if (IS_NOT_NULL(t)) {
+ for (i = 0; i < t->num; i++) {
+ e = &(t->e[i]);
+ if (IS_NOT_NULL(e->name)) {
+ xfree(e->name);
+ e->name = NULL;
+ e->name_len = 0;
+ e->back_num = 0;
+ e->back_alloc = 0;
+ if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
+ e->back_refs = (int* )NULL;
+ }
+ }
+ if (IS_NOT_NULL(t->e)) {
+ xfree(t->e);
+ t->e = NULL;
+ }
+ t->num = 0;
+ }
+ return 0;
+}
+
+extern int
+onig_names_free(regex_t* reg)
+{
+ int r;
+ NameTable* t;
+
+ r = names_clear(reg);
+ if (r) return r;
+
+ t = (NameTable* )reg->name_table;
+ if (IS_NOT_NULL(t)) xfree(t);
+ reg->name_table = NULL;
+ return 0;
+}
+
+static NameEntry*
+name_find(regex_t* reg, UChar* name, UChar* name_end)
+{
+ int i, len;
+ NameEntry* e;
+ NameTable* t = (NameTable* )reg->name_table;
+
+ if (IS_NOT_NULL(t)) {
+ len = name_end - name;
+ for (i = 0; i < t->num; i++) {
+ e = &(t->e[i]);
+ if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)
+ return e;
+ }
+ }
+ return (NameEntry* )NULL;
+}
+
+extern int
+onig_foreach_name(regex_t* reg,
+ int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
+{
+ int i, r;
+ NameEntry* e;
+ NameTable* t = (NameTable* )reg->name_table;
+
+ if (IS_NOT_NULL(t)) {
+ for (i = 0; i < t->num; i++) {
+ e = &(t->e[i]);
+ r = (*func)(e->name, e->name + e->name_len, e->back_num,
+ (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
+ reg, arg);
+ if (r != 0) return r;
+ }
+ }
+ return 0;
+}
+
+extern int
+onig_number_of_names(regex_t* reg)
+{
+ NameTable* t = (NameTable* )reg->name_table;
+
+ if (IS_NOT_NULL(t))
+ return t->num;
+ else
+ return 0;
+}
+
+#endif /* else USE_ST_LIBRARY */
+
+static int
+name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
+{
+ int alloc;
+ NameEntry* e;
+ NameTable* t = (NameTable* )reg->name_table;
+
+ if (name_end - name <= 0)
+ return ONIGERR_EMPTY_GROUP_NAME;
+
+ e = name_find(reg, name, name_end);
+ if (IS_NULL(e)) {
+#ifdef USE_ST_LIBRARY
+ if (IS_NULL(t)) {
+ t = onig_st_init_strend_table_with_size(5);
+ reg->name_table = (void* )t;
+ }
+ e = (NameEntry* )xmalloc(sizeof(NameEntry));
+ CHECK_NULL_RETURN_MEMERR(e);
+
+ e->name = strdup_with_null(reg->enc, name, name_end);
+ if (IS_NULL(e->name)) {
+ xfree(e);
+ return ONIGERR_MEMORY;
+ }
+ onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),
+ (HashDataType )e);
+
+ e->name_len = name_end - name;
+ e->back_num = 0;
+ e->back_alloc = 0;
+ e->back_refs = (int* )NULL;
+
+#else
+
+ if (IS_NULL(t)) {
+ alloc = INIT_NAMES_ALLOC_NUM;
+ t = (NameTable* )xmalloc(sizeof(NameTable));
+ CHECK_NULL_RETURN_MEMERR(t);
+ t->e = NULL;
+ t->alloc = 0;
+ t->num = 0;
+
+ t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc);
+ if (IS_NULL(t->e)) {
+ xfree(t);
+ return ONIGERR_MEMORY;
+ }
+ t->alloc = alloc;
+ reg->name_table = t;
+ goto clear;
+ }
+ else if (t->num == t->alloc) {
+ int i;
+
+ alloc = t->alloc * 2;
+ t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc);
+ CHECK_NULL_RETURN_MEMERR(t->e);
+ t->alloc = alloc;
+
+ clear:
+ for (i = t->num; i < t->alloc; i++) {
+ t->e[i].name = NULL;
+ t->e[i].name_len = 0;
+ t->e[i].back_num = 0;
+ t->e[i].back_alloc = 0;
+ t->e[i].back_refs = (int* )NULL;
+ }
+ }
+ e = &(t->e[t->num]);
+ t->num++;
+ e->name = strdup_with_null(reg->enc, name, name_end);
+ if (IS_NULL(e->name)) return ONIGERR_MEMORY;
+ e->name_len = name_end - name;
+#endif
+ }
+
+ if (e->back_num >= 1 &&
+ ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) {
+ onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,
+ name, name_end);
+ return ONIGERR_MULTIPLEX_DEFINED_NAME;
+ }
+
+ e->back_num++;
+ if (e->back_num == 1) {
+ e->back_ref1 = backref;
+ }
+ else {
+ if (e->back_num == 2) {
+ alloc = INIT_NAME_BACKREFS_ALLOC_NUM;
+ e->back_refs = (int* )xmalloc(sizeof(int) * alloc);
+ CHECK_NULL_RETURN_MEMERR(e->back_refs);
+ e->back_alloc = alloc;
+ e->back_refs[0] = e->back_ref1;
+ e->back_refs[1] = backref;
+ }
+ else {
+ if (e->back_num > e->back_alloc) {
+ alloc = e->back_alloc * 2;
+ e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc);
+ CHECK_NULL_RETURN_MEMERR(e->back_refs);
+ e->back_alloc = alloc;
+ }
+ e->back_refs[e->back_num - 1] = backref;
+ }
+ }
+
+ return 0;
+}
+
+extern int
+onig_name_to_group_numbers(regex_t* reg, const UChar* name,
+ const UChar* name_end, int** nums)
+{
+ NameEntry* e = name_find(reg, name, name_end);
+
+ if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE;
+
+ switch (e->back_num) {
+ case 0:
+ *nums = 0;
+ break;
+ case 1:
+ *nums = &(e->back_ref1);
+ break;
+ default:
+ *nums = e->back_refs;
+ break;
+ }
+ return e->back_num;
+}
+
+extern int
+onig_name_to_backref_number(regex_t* reg, const UChar* name,
+ const UChar* name_end, OnigRegion *region)
+{
+ int i, n, *nums;
+
+ n = onig_name_to_group_numbers(reg, name, name_end, &nums);
+ if (n < 0)
+ return n;
+ else if (n == 0)
+ return ONIGERR_PARSER_BUG;
+ else if (n == 1)
+ return nums[0];
+ else {
+ if (IS_NOT_NULL(region)) {
+ for (i = n - 1; i >= 0; i--) {
+ if (region->beg[nums[i]] != ONIG_REGION_NOTPOS)
+ return nums[i];
+ }
+ }
+ return nums[n - 1];
+ }
+}
+
+#else /* USE_NAMED_GROUP */
+
+extern int
+onig_name_to_group_numbers(regex_t* reg, const UChar* name,
+ const UChar* name_end, int** nums)
+{
+ return ONIG_NO_SUPPORT_CONFIG;
+}
+
+extern int
+onig_name_to_backref_number(regex_t* reg, const UChar* name,
+ const UChar* name_end, OnigRegion* region)
+{
+ return ONIG_NO_SUPPORT_CONFIG;
+}
+
+extern int
+onig_foreach_name(regex_t* reg,
+ int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
+{
+ return ONIG_NO_SUPPORT_CONFIG;
+}
+
+extern int
+onig_number_of_names(regex_t* reg)
+{
+ return 0;
+}
+#endif /* else USE_NAMED_GROUP */
+
+extern int
+onig_noname_group_capture_is_active(regex_t* reg)
+{
+ if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP))
+ return 0;
+
+#ifdef USE_NAMED_GROUP
+ if (onig_number_of_names(reg) > 0 &&
+ IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
+ !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
+ return 0;
+ }
+#endif
+
+ return 1;
+}
+
+
+#define INIT_SCANENV_MEMNODES_ALLOC_SIZE 16
+
+static void
+scan_env_clear(ScanEnv* env)
+{
+ int i;
+
+ BIT_STATUS_CLEAR(env->capture_history);
+ BIT_STATUS_CLEAR(env->bt_mem_start);
+ BIT_STATUS_CLEAR(env->bt_mem_end);
+ BIT_STATUS_CLEAR(env->backrefed_mem);
+ env->error = (UChar* )NULL;
+ env->error_end = (UChar* )NULL;
+ env->num_call = 0;
+ env->num_mem = 0;
+#ifdef USE_NAMED_GROUP
+ env->num_named = 0;
+#endif
+ env->mem_alloc = 0;
+ env->mem_nodes_dynamic = (Node** )NULL;
+
+ for (i = 0; i < SCANENV_MEMNODES_SIZE; i++)
+ env->mem_nodes_static[i] = NULL_NODE;
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ env->num_comb_exp_check = 0;
+ env->comb_exp_max_regnum = 0;
+ env->curr_max_regnum = 0;
+ env->has_recursion = 0;
+#endif
+ env->warnings_flag = 0;
+}
+
+static int
+scan_env_add_mem_entry(ScanEnv* env)
+{
+ int i, need, alloc;
+ Node** p;
+
+ need = env->num_mem + 1;
+ if (need >= SCANENV_MEMNODES_SIZE) {
+ if (env->mem_alloc <= need) {
+ if (IS_NULL(env->mem_nodes_dynamic)) {
+ alloc = INIT_SCANENV_MEMNODES_ALLOC_SIZE;
+ p = (Node** )xmalloc(sizeof(Node*) * alloc);
+ xmemcpy(p, env->mem_nodes_static,
+ sizeof(Node*) * SCANENV_MEMNODES_SIZE);
+ }
+ else {
+ alloc = env->mem_alloc * 2;
+ p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc);
+ }
+ CHECK_NULL_RETURN_MEMERR(p);
+
+ for (i = env->num_mem + 1; i < alloc; i++)
+ p[i] = NULL_NODE;
+
+ env->mem_nodes_dynamic = p;
+ env->mem_alloc = alloc;
+ }
+ }
+
+ env->num_mem++;
+ return env->num_mem;
+}
+
+static int
+scan_env_set_mem_node(ScanEnv* env, int num, Node* node)
+{
+ if (env->num_mem >= num)
+ SCANENV_MEM_NODES(env)[num] = node;
+ else
+ return ONIGERR_PARSER_BUG;
+ return 0;
+}
+
+
+#ifdef USE_PARSE_TREE_NODE_RECYCLE
+typedef struct _FreeNode {
+ struct _FreeNode* next;
+} FreeNode;
+
+static FreeNode* FreeNodeList = (FreeNode* )NULL;
+#endif
+
+extern void
+onig_node_free(Node* node)
+{
+ start:
+ if (IS_NULL(node)) return ;
+
+ switch (NTYPE(node)) {
+ case NT_STR:
+ if (NSTR(node)->capa != 0 &&
+ IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {
+ xfree(NSTR(node)->s);
+ }
+ break;
+
+ case NT_LIST:
+ case NT_ALT:
+ onig_node_free(NCAR(node));
+ {
+ Node* next_node = NCDR(node);
+
+#ifdef USE_PARSE_TREE_NODE_RECYCLE
+ {
+ FreeNode* n = (FreeNode* )node;
+
+ THREAD_ATOMIC_START;
+ n->next = FreeNodeList;
+ FreeNodeList = n;
+ THREAD_ATOMIC_END;
+ }
+#else
+ xfree(node);
+#endif
+ node = next_node;
+ goto start;
+ }
+ break;
+
+ case NT_CCLASS:
+ {
+ CClassNode* cc = NCCLASS(node);
+
+ if (IS_NCCLASS_SHARE(cc)) return ;
+ if (cc->mbuf)
+ bbuf_free(cc->mbuf);
+ }
+ break;
+
+ case NT_QTFR:
+ if (NQTFR(node)->target)
+ onig_node_free(NQTFR(node)->target);
+ break;
+
+ case NT_ENCLOSE:
+ if (NENCLOSE(node)->target)
+ onig_node_free(NENCLOSE(node)->target);
+ break;
+
+ case NT_BREF:
+ if (IS_NOT_NULL(NBREF(node)->back_dynamic))
+ xfree(NBREF(node)->back_dynamic);
+ break;
+
+ case NT_ANCHOR:
+ if (NANCHOR(node)->target)
+ onig_node_free(NANCHOR(node)->target);
+ break;
+ }
+
+#ifdef USE_PARSE_TREE_NODE_RECYCLE
+ {
+ FreeNode* n = (FreeNode* )node;
+
+ THREAD_ATOMIC_START;
+ n->next = FreeNodeList;
+ FreeNodeList = n;
+ THREAD_ATOMIC_END;
+ }
+#else
+ xfree(node);
+#endif
+}
+
+#ifdef USE_PARSE_TREE_NODE_RECYCLE
+extern int
+onig_free_node_list(void)
+{
+ FreeNode* n;
+
+ /* THREAD_ATOMIC_START; */
+ while (IS_NOT_NULL(FreeNodeList)) {
+ n = FreeNodeList;
+ FreeNodeList = FreeNodeList->next;
+ xfree(n);
+ }
+ /* THREAD_ATOMIC_END; */
+ return 0;
+}
+#endif
+
+static Node*
+node_new(void)
+{
+ Node* node;
+
+#ifdef USE_PARSE_TREE_NODE_RECYCLE
+ THREAD_ATOMIC_START;
+ if (IS_NOT_NULL(FreeNodeList)) {
+ node = (Node* )FreeNodeList;
+ FreeNodeList = FreeNodeList->next;
+ THREAD_ATOMIC_END;
+ return node;
+ }
+ THREAD_ATOMIC_END;
+#endif
+
+ node = (Node* )xmalloc(sizeof(Node));
+ /* xmemset(node, 0, sizeof(Node)); */
+ return node;
+}
+
+
+static void
+initialize_cclass(CClassNode* cc)
+{
+ BITSET_CLEAR(cc->bs);
+ /* cc->base.flags = 0; */
+ cc->flags = 0;
+ cc->mbuf = NULL;
+}
+
+static Node*
+node_new_cclass(void)
+{
+ Node* node = node_new();
+ CHECK_NULL_RETURN(node);
+
+ SET_NTYPE(node, NT_CCLASS);
+ initialize_cclass(NCCLASS(node));
+ return node;
+}
+
+static Node*
+node_new_cclass_by_codepoint_range(int not, OnigCodePoint sb_out,
+ const OnigCodePoint ranges[])
+{
+ int n, i;
+ CClassNode* cc;
+ OnigCodePoint j;
+
+ Node* node = node_new_cclass();
+ CHECK_NULL_RETURN(node);
+
+ cc = NCCLASS(node);
+ if (not != 0) NCCLASS_SET_NOT(cc);
+
+ BITSET_CLEAR(cc->bs);
+ if (sb_out > 0 && IS_NOT_NULL(ranges)) {
+ n = ONIGENC_CODE_RANGE_NUM(ranges);
+ for (i = 0; i < n; i++) {
+ for (j = ONIGENC_CODE_RANGE_FROM(ranges, i);
+ j <= (OnigCodePoint )ONIGENC_CODE_RANGE_TO(ranges, i); j++) {
+ if (j >= sb_out) goto sb_end;
+
+ BITSET_SET_BIT(cc->bs, j);
+ }
+ }
+ }
+
+ sb_end:
+ if (IS_NULL(ranges)) {
+ is_null:
+ cc->mbuf = NULL;
+ }
+ else {
+ BBuf* bbuf;
+
+ n = ONIGENC_CODE_RANGE_NUM(ranges);
+ if (n == 0) goto is_null;
+
+ bbuf = (BBuf* )xmalloc(sizeof(BBuf));
+ CHECK_NULL_RETURN(bbuf);
+ bbuf->alloc = n + 1;
+ bbuf->used = n + 1;
+ bbuf->p = (UChar* )((void* )ranges);
+
+ cc->mbuf = bbuf;
+ }
+
+ return node;
+}
+
+static Node*
+node_new_ctype(int type, int not)
+{
+ Node* node = node_new();
+ CHECK_NULL_RETURN(node);
+
+ SET_NTYPE(node, NT_CTYPE);
+ NCTYPE(node)->ctype = type;
+ NCTYPE(node)->not = not;
+ return node;
+}
+
+static Node*
+node_new_anychar(void)
+{
+ Node* node = node_new();
+ CHECK_NULL_RETURN(node);
+
+ SET_NTYPE(node, NT_CANY);
+ return node;
+}
+
+static Node*
+node_new_list(Node* left, Node* right)
+{
+ Node* node = node_new();
+ CHECK_NULL_RETURN(node);
+
+ SET_NTYPE(node, NT_LIST);
+ NCAR(node) = left;
+ NCDR(node) = right;
+ return node;
+}
+
+extern Node*
+onig_node_new_list(Node* left, Node* right)
+{
+ return node_new_list(left, right);
+}
+
+extern Node*
+onig_node_list_add(Node* list, Node* x)
+{
+ Node *n;
+
+ n = onig_node_new_list(x, NULL);
+ if (IS_NULL(n)) return NULL_NODE;
+
+ if (IS_NOT_NULL(list)) {
+ while (IS_NOT_NULL(NCDR(list)))
+ list = NCDR(list);
+
+ NCDR(list) = n;
+ }
+
+ return n;
+}
+
+extern Node*
+onig_node_new_alt(Node* left, Node* right)
+{
+ Node* node = node_new();
+ CHECK_NULL_RETURN(node);
+
+ SET_NTYPE(node, NT_ALT);
+ NCAR(node) = left;
+ NCDR(node) = right;
+ return node;
+}
+
+extern Node*
+onig_node_new_anchor(int type)
+{
+ Node* node = node_new();
+ CHECK_NULL_RETURN(node);
+
+ SET_NTYPE(node, NT_ANCHOR);
+ NANCHOR(node)->type = type;
+ NANCHOR(node)->target = NULL;
+ NANCHOR(node)->char_len = -1;
+ return node;
+}
+
+static Node*
+node_new_backref(int back_num, int* backrefs, int by_name,
+#ifdef USE_BACKREF_WITH_LEVEL
+ int exist_level, int nest_level,
+#endif
+ ScanEnv* env)
+{
+ int i;
+ Node* node = node_new();
+
+ CHECK_NULL_RETURN(node);
+
+ SET_NTYPE(node, NT_BREF);
+ NBREF(node)->state = 0;
+ NBREF(node)->back_num = back_num;
+ NBREF(node)->back_dynamic = (int* )NULL;
+ if (by_name != 0)
+ NBREF(node)->state |= NST_NAME_REF;
+
+#ifdef USE_BACKREF_WITH_LEVEL
+ if (exist_level != 0) {
+ NBREF(node)->state |= NST_NEST_LEVEL;
+ NBREF(node)->nest_level = nest_level;
+ }
+#endif
+
+ for (i = 0; i < back_num; i++) {
+ if (backrefs[i] <= env->num_mem &&
+ IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) {
+ NBREF(node)->state |= NST_RECURSION; /* /...(\1).../ */
+ break;
+ }
+ }
+
+ if (back_num <= NODE_BACKREFS_SIZE) {
+ for (i = 0; i < back_num; i++)
+ NBREF(node)->back_static[i] = backrefs[i];
+ }
+ else {
+ int* p = (int* )xmalloc(sizeof(int) * back_num);
+ if (IS_NULL(p)) {
+ onig_node_free(node);
+ return NULL;
+ }
+ NBREF(node)->back_dynamic = p;
+ for (i = 0; i < back_num; i++)
+ p[i] = backrefs[i];
+ }
+ return node;
+}
+
+#ifdef USE_SUBEXP_CALL
+static Node*
+node_new_call(UChar* name, UChar* name_end, int gnum)
+{
+ Node* node = node_new();
+ CHECK_NULL_RETURN(node);
+
+ SET_NTYPE(node, NT_CALL);
+ NCALL(node)->state = 0;
+ NCALL(node)->target = NULL_NODE;
+ NCALL(node)->name = name;
+ NCALL(node)->name_end = name_end;
+ NCALL(node)->group_num = gnum; /* call by number if gnum != 0 */
+ return node;
+}
+#endif
+
+static Node*
+node_new_quantifier(int lower, int upper, int by_number)
+{
+ Node* node = node_new();
+ CHECK_NULL_RETURN(node);
+
+ SET_NTYPE(node, NT_QTFR);
+ NQTFR(node)->state = 0;
+ NQTFR(node)->target = NULL;
+ NQTFR(node)->lower = lower;
+ NQTFR(node)->upper = upper;
+ NQTFR(node)->greedy = 1;
+ NQTFR(node)->target_empty_info = NQ_TARGET_ISNOT_EMPTY;
+ NQTFR(node)->head_exact = NULL_NODE;
+ NQTFR(node)->next_head_exact = NULL_NODE;
+ NQTFR(node)->is_refered = 0;
+ if (by_number != 0)
+ NQTFR(node)->state |= NST_BY_NUMBER;
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ NQTFR(node)->comb_exp_check_num = 0;
+#endif
+
+ return node;
+}
+
+static Node*
+node_new_enclose(int type)
+{
+ Node* node = node_new();
+ CHECK_NULL_RETURN(node);
+
+ SET_NTYPE(node, NT_ENCLOSE);
+ NENCLOSE(node)->type = type;
+ NENCLOSE(node)->state = 0;
+ NENCLOSE(node)->regnum = 0;
+ NENCLOSE(node)->option = 0;
+ NENCLOSE(node)->target = NULL;
+ NENCLOSE(node)->call_addr = -1;
+ NENCLOSE(node)->opt_count = 0;
+ return node;
+}
+
+extern Node*
+onig_node_new_enclose(int type)
+{
+ return node_new_enclose(type);
+}
+
+static Node*
+node_new_enclose_memory(OnigOptionType option, int is_named)
+{
+ Node* node = node_new_enclose(ENCLOSE_MEMORY);
+ CHECK_NULL_RETURN(node);
+ if (is_named != 0)
+ SET_ENCLOSE_STATUS(node, NST_NAMED_GROUP);
+
+#ifdef USE_SUBEXP_CALL
+ NENCLOSE(node)->option = option;
+#endif
+ return node;
+}
+
+static Node*
+node_new_option(OnigOptionType option)
+{
+ Node* node = node_new_enclose(ENCLOSE_OPTION);
+ CHECK_NULL_RETURN(node);
+ NENCLOSE(node)->option = option;
+ return node;
+}
+
+extern int
+onig_node_str_cat(Node* node, const UChar* s, const UChar* end)
+{
+ ptrdiff_t addlen = end - s;
+
+ if (addlen > 0) {
+ ptrdiff_t len = NSTR(node)->end - NSTR(node)->s;
+
+ if (NSTR(node)->capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) {
+ UChar* p;
+ ptrdiff_t capa = len + addlen + NODE_STR_MARGIN;
+
+ if (capa <= NSTR(node)->capa) {
+ onig_strcpy(NSTR(node)->s + len, s, end);
+ }
+ else {
+ if (NSTR(node)->s == NSTR(node)->buf)
+ p = strcat_capa_from_static(NSTR(node)->s, NSTR(node)->end,
+ s, end, capa);
+ else
+ p = strcat_capa(NSTR(node)->s, NSTR(node)->end, s, end, capa);
+
+ CHECK_NULL_RETURN_MEMERR(p);
+ NSTR(node)->s = p;
+ NSTR(node)->capa = capa;
+ }
+ }
+ else {
+ onig_strcpy(NSTR(node)->s + len, s, end);
+ }
+ NSTR(node)->end = NSTR(node)->s + len + addlen;
+ }
+
+ return 0;
+}
+
+extern int
+onig_node_str_set(Node* node, const UChar* s, const UChar* end)
+{
+ onig_node_str_clear(node);
+ return onig_node_str_cat(node, s, end);
+}
+
+static int
+node_str_cat_char(Node* node, UChar c)
+{
+ UChar s[1];
+
+ s[0] = c;
+ return onig_node_str_cat(node, s, s + 1);
+}
+
+extern void
+onig_node_conv_to_str_node(Node* node, int flag)
+{
+ SET_NTYPE(node, NT_STR);
+ NSTR(node)->flag = flag;
+ NSTR(node)->capa = 0;
+ NSTR(node)->s = NSTR(node)->buf;
+ NSTR(node)->end = NSTR(node)->buf;
+}
+
+extern void
+onig_node_str_clear(Node* node)
+{
+ if (NSTR(node)->capa != 0 &&
+ IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {
+ xfree(NSTR(node)->s);
+ }
+
+ NSTR(node)->capa = 0;
+ NSTR(node)->flag = 0;
+ NSTR(node)->s = NSTR(node)->buf;
+ NSTR(node)->end = NSTR(node)->buf;
+}
+
+static Node*
+node_new_str(const UChar* s, const UChar* end)
+{
+ Node* node = node_new();
+ CHECK_NULL_RETURN(node);
+
+ SET_NTYPE(node, NT_STR);
+ NSTR(node)->capa = 0;
+ NSTR(node)->flag = 0;
+ NSTR(node)->s = NSTR(node)->buf;
+ NSTR(node)->end = NSTR(node)->buf;
+ if (onig_node_str_cat(node, s, end)) {
+ onig_node_free(node);
+ return NULL;
+ }
+ return node;
+}
+
+extern Node*
+onig_node_new_str(const UChar* s, const UChar* end)
+{
+ return node_new_str(s, end);
+}
+
+static Node*
+node_new_str_raw(UChar* s, UChar* end)
+{
+ Node* node = node_new_str(s, end);
+ NSTRING_SET_RAW(node);
+ return node;
+}
+
+static Node*
+node_new_empty(void)
+{
+ return node_new_str(NULL, NULL);
+}
+
+static Node*
+node_new_str_raw_char(UChar c)
+{
+ UChar p[1];
+
+ p[0] = c;
+ return node_new_str_raw(p, p + 1);
+}
+
+static Node*
+str_node_split_last_char(StrNode* sn, OnigEncoding enc)
+{
+ const UChar *p;
+ Node* n = NULL_NODE;
+
+ if (sn->end > sn->s) {
+ p = onigenc_get_prev_char_head(enc, sn->s, sn->end, sn->end);
+ if (p && p > sn->s) { /* can be splitted. */
+ n = node_new_str(p, sn->end);
+ if ((sn->flag & NSTR_RAW) != 0)
+ NSTRING_SET_RAW(n);
+ sn->end = (UChar* )p;
+ }
+ }
+ return n;
+}
+
+static int
+str_node_can_be_split(StrNode* sn, OnigEncoding enc)
+{
+ if (sn->end > sn->s) {
+ return ((enclen(enc, sn->s, sn->end) < sn->end - sn->s) ? 1 : 0);
+ }
+ return 0;
+}
+
+extern int
+onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)
+{
+ unsigned int num, val;
+ OnigCodePoint c;
+ UChar* p = *src;
+ PFETCH_READY;
+
+ num = 0;
+ while (!PEND) {
+ PFETCH(c);
+ if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
+ val = (unsigned int )DIGITVAL(c);
+ if ((INT_MAX_LIMIT - val) / 10UL < num)
+ return -1; /* overflow */
+
+ num = num * 10 + val;
+ }
+ else {
+ PUNFETCH;
+ break;
+ }
+ }
+ *src = p;
+ return num;
+}
+
+static int
+scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int maxlen,
+ OnigEncoding enc)
+{
+ OnigCodePoint c;
+ unsigned int num, val;
+ UChar* p = *src;
+ PFETCH_READY;
+
+ num = 0;
+ while (!PEND && maxlen-- != 0) {
+ PFETCH(c);
+ if (ONIGENC_IS_CODE_XDIGIT(enc, c)) {
+ val = (unsigned int )XDIGITVAL(enc,c);
+ if ((INT_MAX_LIMIT - val) / 16UL < num)
+ return -1; /* overflow */
+
+ num = (num << 4) + XDIGITVAL(enc,c);
+ }
+ else {
+ PUNFETCH;
+ break;
+ }
+ }
+ *src = p;
+ return num;
+}
+
+static int
+scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen,
+ OnigEncoding enc)
+{
+ OnigCodePoint c;
+ unsigned int num, val;
+ UChar* p = *src;
+ PFETCH_READY;
+
+ num = 0;
+ while (!PEND && maxlen-- != 0) {
+ PFETCH(c);
+ if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8') {
+ val = ODIGITVAL(c);
+ if ((INT_MAX_LIMIT - val) / 8UL < num)
+ return -1; /* overflow */
+
+ num = (num << 3) + val;
+ }
+ else {
+ PUNFETCH;
+ break;
+ }
+ }
+ *src = p;
+ return num;
+}
+
+
+#define BBUF_WRITE_CODE_POINT(bbuf,pos,code) \
+ BBUF_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)
+
+/* data format:
+ [n][from-1][to-1][from-2][to-2] ... [from-n][to-n]
+ (all data size is OnigCodePoint)
+ */
+static int
+new_code_range(BBuf** pbuf)
+{
+#define INIT_MULTI_BYTE_RANGE_SIZE (SIZE_CODE_POINT * 5)
+ int r;
+ OnigCodePoint n;
+ BBuf* bbuf;
+
+ bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));
+ CHECK_NULL_RETURN_MEMERR(*pbuf);
+ r = BBUF_INIT(*pbuf, INIT_MULTI_BYTE_RANGE_SIZE);
+ if (r) return r;
+
+ n = 0;
+ BBUF_WRITE_CODE_POINT(bbuf, 0, n);
+ return 0;
+}
+
+static int
+add_code_range_to_buf0(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to,
+ int checkdup)
+{
+ int r, inc_n, pos;
+ int low, high, bound, x;
+ OnigCodePoint n, *data;
+ BBuf* bbuf;
+
+ if (from > to) {
+ n = from; from = to; to = n;
+ }
+
+ if (IS_NULL(*pbuf)) {
+ r = new_code_range(pbuf);
+ if (r) return r;
+ bbuf = *pbuf;
+ n = 0;
+ }
+ else {
+ bbuf = *pbuf;
+ GET_CODE_POINT(n, bbuf->p);
+ }
+ data = (OnigCodePoint* )(bbuf->p);
+ data++;
+
+ for (low = 0, bound = n; low < bound; ) {
+ x = (low + bound) >> 1;
+ if (from > data[x*2 + 1])
+ low = x + 1;
+ else
+ bound = x;
+ }
+
+ for (high = low, bound = n; high < bound; ) {
+ x = (high + bound) >> 1;
+ if (to >= data[x*2] - 1)
+ high = x + 1;
+ else
+ bound = x;
+ }
+
+ inc_n = low + 1 - high;
+ if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM)
+ return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES;
+
+ if (inc_n != 1) {
+ if (checkdup && to >= data[low*2]) CC_DUP_WARN(env);
+ if (from > data[low*2])
+ from = data[low*2];
+ if (to < data[(high - 1)*2 + 1])
+ to = data[(high - 1)*2 + 1];
+ }
+
+ if (inc_n != 0 && (OnigCodePoint )high < n) {
+ int from_pos = SIZE_CODE_POINT * (1 + high * 2);
+ int to_pos = SIZE_CODE_POINT * (1 + (low + 1) * 2);
+ int size = (n - high) * 2 * SIZE_CODE_POINT;
+
+ if (inc_n > 0) {
+ BBUF_MOVE_RIGHT(bbuf, from_pos, to_pos, size);
+ }
+ else {
+ BBUF_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos);
+ }
+ }
+
+ pos = SIZE_CODE_POINT * (1 + low * 2);
+ BBUF_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);
+ BBUF_WRITE_CODE_POINT(bbuf, pos, from);
+ BBUF_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);
+ n += inc_n;
+ BBUF_WRITE_CODE_POINT(bbuf, 0, n);
+
+ return 0;
+}
+
+static int
+add_code_range_to_buf(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)
+{
+ return add_code_range_to_buf0(pbuf, env, from, to, 1);
+}
+
+static int
+add_code_range0(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to, int checkdup)
+{
+ if (from > to) {
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
+ return 0;
+ else
+ return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
+ }
+
+ return add_code_range_to_buf0(pbuf, env, from, to, checkdup);
+}
+
+static int
+add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)
+{
+ return add_code_range0(pbuf, env, from, to, 1);
+}
+
+static int
+not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf, ScanEnv* env)
+{
+ int r, i, n;
+ OnigCodePoint pre, from, *data, to = 0;
+
+ *pbuf = (BBuf* )NULL;
+ if (IS_NULL(bbuf)) {
+ set_all:
+ return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
+ }
+
+ data = (OnigCodePoint* )(bbuf->p);
+ GET_CODE_POINT(n, data);
+ data++;
+ if (n <= 0) goto set_all;
+
+ r = 0;
+ pre = MBCODE_START_POS(enc);
+ for (i = 0; i < n; i++) {
+ from = data[i*2];
+ to = data[i*2+1];
+ if (pre <= from - 1) {
+ r = add_code_range_to_buf(pbuf, env, pre, from - 1);
+ if (r != 0) return r;
+ }
+ if (to == ~((OnigCodePoint )0)) break;
+ pre = to + 1;
+ }
+ if (to < ~((OnigCodePoint )0)) {
+ r = add_code_range_to_buf(pbuf, env, to + 1, ~((OnigCodePoint )0));
+ }
+ return r;
+}
+
+#define SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2) do {\
+ BBuf *tbuf; \
+ int tnot; \
+ tnot = not1; not1 = not2; not2 = tnot; \
+ tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \
+} while (0)
+
+static int
+or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,
+ BBuf* bbuf2, int not2, BBuf** pbuf, ScanEnv* env)
+{
+ int r;
+ OnigCodePoint i, n1, *data1;
+ OnigCodePoint from, to;
+
+ *pbuf = (BBuf* )NULL;
+ if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) {
+ if (not1 != 0 || not2 != 0)
+ return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
+ return 0;
+ }
+
+ r = 0;
+ if (IS_NULL(bbuf2))
+ SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
+
+ if (IS_NULL(bbuf1)) {
+ if (not1 != 0) {
+ return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
+ }
+ else {
+ if (not2 == 0) {
+ return bbuf_clone(pbuf, bbuf2);
+ }
+ else {
+ return not_code_range_buf(enc, bbuf2, pbuf, env);
+ }
+ }
+ }
+
+ if (not1 != 0)
+ SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
+
+ data1 = (OnigCodePoint* )(bbuf1->p);
+ GET_CODE_POINT(n1, data1);
+ data1++;
+
+ if (not2 == 0 && not1 == 0) { /* 1 OR 2 */
+ r = bbuf_clone(pbuf, bbuf2);
+ }
+ else if (not1 == 0) { /* 1 OR (not 2) */
+ r = not_code_range_buf(enc, bbuf2, pbuf, env);
+ }
+ if (r != 0) return r;
+
+ for (i = 0; i < n1; i++) {
+ from = data1[i*2];
+ to = data1[i*2+1];
+ r = add_code_range_to_buf(pbuf, env, from, to);
+ if (r != 0) return r;
+ }
+ return 0;
+}
+
+static int
+and_code_range1(BBuf** pbuf, ScanEnv* env, OnigCodePoint from1, OnigCodePoint to1,
+ OnigCodePoint* data, int n)
+{
+ int i, r;
+ OnigCodePoint from2, to2;
+
+ for (i = 0; i < n; i++) {
+ from2 = data[i*2];
+ to2 = data[i*2+1];
+ if (from2 < from1) {
+ if (to2 < from1) continue;
+ else {
+ from1 = to2 + 1;
+ }
+ }
+ else if (from2 <= to1) {
+ if (to2 < to1) {
+ if (from1 <= from2 - 1) {
+ r = add_code_range_to_buf(pbuf, env, from1, from2-1);
+ if (r != 0) return r;
+ }
+ from1 = to2 + 1;
+ }
+ else {
+ to1 = from2 - 1;
+ }
+ }
+ else {
+ from1 = from2;
+ }
+ if (from1 > to1) break;
+ }
+ if (from1 <= to1) {
+ r = add_code_range_to_buf(pbuf, env, from1, to1);
+ if (r != 0) return r;
+ }
+ return 0;
+}
+
+static int
+and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf, ScanEnv* env)
+{
+ int r;
+ OnigCodePoint i, j, n1, n2, *data1, *data2;
+ OnigCodePoint from, to, from1, to1, from2, to2;
+
+ *pbuf = (BBuf* )NULL;
+ if (IS_NULL(bbuf1)) {
+ if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */
+ return bbuf_clone(pbuf, bbuf2);
+ return 0;
+ }
+ else if (IS_NULL(bbuf2)) {
+ if (not2 != 0)
+ return bbuf_clone(pbuf, bbuf1);
+ return 0;
+ }
+
+ if (not1 != 0)
+ SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
+
+ data1 = (OnigCodePoint* )(bbuf1->p);
+ data2 = (OnigCodePoint* )(bbuf2->p);
+ GET_CODE_POINT(n1, data1);
+ GET_CODE_POINT(n2, data2);
+ data1++;
+ data2++;
+
+ if (not2 == 0 && not1 == 0) { /* 1 AND 2 */
+ for (i = 0; i < n1; i++) {
+ from1 = data1[i*2];
+ to1 = data1[i*2+1];
+ for (j = 0; j < n2; j++) {
+ from2 = data2[j*2];
+ to2 = data2[j*2+1];
+ if (from2 > to1) break;
+ if (to2 < from1) continue;
+ from = MAX(from1, from2);
+ to = MIN(to1, to2);
+ r = add_code_range_to_buf(pbuf, env, from, to);
+ if (r != 0) return r;
+ }
+ }
+ }
+ else if (not1 == 0) { /* 1 AND (not 2) */
+ for (i = 0; i < n1; i++) {
+ from1 = data1[i*2];
+ to1 = data1[i*2+1];
+ r = and_code_range1(pbuf, env, from1, to1, data2, n2);
+ if (r != 0) return r;
+ }
+ }
+
+ return 0;
+}
+
+static int
+and_cclass(CClassNode* dest, CClassNode* cc, ScanEnv* env)
+{
+ OnigEncoding enc = env->enc;
+ int r, not1, not2;
+ BBuf *buf1, *buf2, *pbuf = 0;
+ BitSetRef bsr1, bsr2;
+ BitSet bs1, bs2;
+
+ not1 = IS_NCCLASS_NOT(dest);
+ bsr1 = dest->bs;
+ buf1 = dest->mbuf;
+ not2 = IS_NCCLASS_NOT(cc);
+ bsr2 = cc->bs;
+ buf2 = cc->mbuf;
+
+ if (not1 != 0) {
+ bitset_invert_to(bsr1, bs1);
+ bsr1 = bs1;
+ }
+ if (not2 != 0) {
+ bitset_invert_to(bsr2, bs2);
+ bsr2 = bs2;
+ }
+ bitset_and(bsr1, bsr2);
+ if (bsr1 != dest->bs) {
+ bitset_copy(dest->bs, bsr1);
+ bsr1 = dest->bs;
+ }
+ if (not1 != 0) {
+ bitset_invert(dest->bs);
+ }
+
+ if (! ONIGENC_IS_SINGLEBYTE(enc)) {
+ if (not1 != 0 && not2 != 0) {
+ r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf, env);
+ }
+ else {
+ r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf, env);
+ if (r == 0 && not1 != 0) {
+ BBuf *tbuf = 0;
+ r = not_code_range_buf(enc, pbuf, &tbuf, env);
+ bbuf_free(pbuf);
+ pbuf = tbuf;
+ }
+ }
+ if (r != 0) {
+ bbuf_free(pbuf);
+ return r;
+ }
+
+ dest->mbuf = pbuf;
+ bbuf_free(buf1);
+ return r;
+ }
+ return 0;
+}
+
+static int
+or_cclass(CClassNode* dest, CClassNode* cc, ScanEnv* env)
+{
+ OnigEncoding enc = env->enc;
+ int r, not1, not2;
+ BBuf *buf1, *buf2, *pbuf = 0;
+ BitSetRef bsr1, bsr2;
+ BitSet bs1, bs2;
+
+ not1 = IS_NCCLASS_NOT(dest);
+ bsr1 = dest->bs;
+ buf1 = dest->mbuf;
+ not2 = IS_NCCLASS_NOT(cc);
+ bsr2 = cc->bs;
+ buf2 = cc->mbuf;
+
+ if (not1 != 0) {
+ bitset_invert_to(bsr1, bs1);
+ bsr1 = bs1;
+ }
+ if (not2 != 0) {
+ bitset_invert_to(bsr2, bs2);
+ bsr2 = bs2;
+ }
+ bitset_or(bsr1, bsr2);
+ if (bsr1 != dest->bs) {
+ bitset_copy(dest->bs, bsr1);
+ bsr1 = dest->bs;
+ }
+ if (not1 != 0) {
+ bitset_invert(dest->bs);
+ }
+
+ if (! ONIGENC_IS_SINGLEBYTE(enc)) {
+ if (not1 != 0 && not2 != 0) {
+ r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf, env);
+ }
+ else {
+ r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf, env);
+ if (r == 0 && not1 != 0) {
+ BBuf *tbuf = 0;
+ r = not_code_range_buf(enc, pbuf, &tbuf, env);
+ bbuf_free(pbuf);
+ pbuf = tbuf;
+ }
+ }
+ if (r != 0) {
+ bbuf_free(pbuf);
+ return r;
+ }
+
+ dest->mbuf = pbuf;
+ bbuf_free(buf1);
+ return r;
+ }
+ else
+ return 0;
+}
+
+static void UNKNOWN_ESC_WARN(ScanEnv *env, int c);
+
+static int
+conv_backslash_value(int c, ScanEnv* env)
+{
+ if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) {
+ switch (c) {
+ case 'n': return '\n';
+ case 't': return '\t';
+ case 'r': return '\r';
+ case 'f': return '\f';
+ case 'a': return '\007';
+ case 'b': return '\010';
+ case 'e': return '\033';
+ case 'v':
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB))
+ return '\v';
+ break;
+
+ default:
+ if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
+ UNKNOWN_ESC_WARN(env, c);
+ break;
+ }
+ }
+ return c;
+}
+
+#define is_invalid_quantifier_target(node) 0
+
+/* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */
+static int
+popular_quantifier_num(QtfrNode* q)
+{
+ if (q->greedy) {
+ if (q->lower == 0) {
+ if (q->upper == 1) return 0;
+ else if (IS_REPEAT_INFINITE(q->upper)) return 1;
+ }
+ else if (q->lower == 1) {
+ if (IS_REPEAT_INFINITE(q->upper)) return 2;
+ }
+ }
+ else {
+ if (q->lower == 0) {
+ if (q->upper == 1) return 3;
+ else if (IS_REPEAT_INFINITE(q->upper)) return 4;
+ }
+ else if (q->lower == 1) {
+ if (IS_REPEAT_INFINITE(q->upper)) return 5;
+ }
+ }
+ return -1;
+}
+
+
+enum ReduceType {
+ RQ_ASIS = 0, /* as is */
+ RQ_DEL = 1, /* delete parent */
+ RQ_A, /* to '*' */
+ RQ_AQ, /* to '*?' */
+ RQ_QQ, /* to '??' */
+ RQ_P_QQ, /* to '+)??' */
+ RQ_PQ_Q /* to '+?)?' */
+};
+
+static enum ReduceType const ReduceTypeTable[6][6] = {
+ {RQ_DEL, RQ_A, RQ_A, RQ_QQ, RQ_AQ, RQ_ASIS}, /* '?' */
+ {RQ_DEL, RQ_DEL, RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL}, /* '*' */
+ {RQ_A, RQ_A, RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL}, /* '+' */
+ {RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL, RQ_AQ, RQ_AQ}, /* '??' */
+ {RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL}, /* '*?' */
+ {RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL} /* '+?' */
+};
+
+extern void
+onig_reduce_nested_quantifier(Node* pnode, Node* cnode)
+{
+ int pnum, cnum;
+ QtfrNode *p, *c;
+
+ p = NQTFR(pnode);
+ c = NQTFR(cnode);
+ pnum = popular_quantifier_num(p);
+ cnum = popular_quantifier_num(c);
+ if (pnum < 0 || cnum < 0) return ;
+
+ switch(ReduceTypeTable[cnum][pnum]) {
+ case RQ_DEL:
+ *pnode = *cnode;
+ break;
+ case RQ_A:
+ p->target = c->target;
+ p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 1;
+ break;
+ case RQ_AQ:
+ p->target = c->target;
+ p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 0;
+ break;
+ case RQ_QQ:
+ p->target = c->target;
+ p->lower = 0; p->upper = 1; p->greedy = 0;
+ break;
+ case RQ_P_QQ:
+ p->target = cnode;
+ p->lower = 0; p->upper = 1; p->greedy = 0;
+ c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 1;
+ return ;
+ break;
+ case RQ_PQ_Q:
+ p->target = cnode;
+ p->lower = 0; p->upper = 1; p->greedy = 1;
+ c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 0;
+ return ;
+ break;
+ case RQ_ASIS:
+ p->target = cnode;
+ return ;
+ break;
+ }
+
+ c->target = NULL_NODE;
+ onig_node_free(cnode);
+}
+
+
+enum TokenSyms {
+ TK_EOT = 0, /* end of token */
+ TK_RAW_BYTE = 1,
+ TK_CHAR,
+ TK_STRING,
+ TK_CODE_POINT,
+ TK_ANYCHAR,
+ TK_CHAR_TYPE,
+ TK_BACKREF,
+ TK_CALL,
+ TK_ANCHOR,
+ TK_OP_REPEAT,
+ TK_INTERVAL,
+ TK_ANYCHAR_ANYTIME, /* SQL '%' == .* */
+ TK_ALT,
+ TK_SUBEXP_OPEN,
+ TK_SUBEXP_CLOSE,
+ TK_CC_OPEN,
+ TK_QUOTE_OPEN,
+ TK_CHAR_PROPERTY, /* \p{...}, \P{...} */
+ /* in cc */
+ TK_CC_CLOSE,
+ TK_CC_RANGE,
+ TK_POSIX_BRACKET_OPEN,
+ TK_CC_AND, /* && */
+ TK_CC_CC_OPEN /* [ */
+};
+
+typedef struct {
+ enum TokenSyms type;
+ int escaped;
+ int base; /* is number: 8, 16 (used in [....]) */
+ UChar* backp;
+ union {
+ UChar* s;
+ int c;
+ OnigCodePoint code;
+ int anchor;
+ int subtype;
+ struct {
+ int lower;
+ int upper;
+ int greedy;
+ int possessive;
+ } repeat;
+ struct {
+ int num;
+ int ref1;
+ int* refs;
+ int by_name;
+#ifdef USE_BACKREF_WITH_LEVEL
+ int exist_level;
+ int level; /* \k<name+n> */
+#endif
+ } backref;
+ struct {
+ UChar* name;
+ UChar* name_end;
+ int gnum;
+ } call;
+ struct {
+ int ctype;
+ int not;
+ } prop;
+ } u;
+} OnigToken;
+
+
+static int
+fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)
+{
+ int low, up, syn_allow, non_low = 0;
+ int r = 0;
+ OnigCodePoint c;
+ OnigEncoding enc = env->enc;
+ UChar* p = *src;
+ PFETCH_READY;
+
+ syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL);
+
+ if (PEND) {
+ if (syn_allow)
+ return 1; /* "....{" : OK! */
+ else
+ return ONIGERR_END_PATTERN_AT_LEFT_BRACE; /* "....{" syntax error */
+ }
+
+ if (! syn_allow) {
+ c = PPEEK;
+ if (c == ')' || c == '(' || c == '|') {
+ return ONIGERR_END_PATTERN_AT_LEFT_BRACE;
+ }
+ }
+
+ low = onig_scan_unsigned_number(&p, end, env->enc);
+ if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
+ if (low > ONIG_MAX_REPEAT_NUM)
+ return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
+
+ if (p == *src) { /* can't read low */
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) {
+ /* allow {,n} as {0,n} */
+ low = 0;
+ non_low = 1;
+ }
+ else
+ goto invalid;
+ }
+
+ if (PEND) goto invalid;
+ PFETCH(c);
+ if (c == ',') {
+ UChar* prev = p;
+ up = onig_scan_unsigned_number(&p, end, env->enc);
+ if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
+ if (up > ONIG_MAX_REPEAT_NUM)
+ return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
+
+ if (p == prev) {
+ if (non_low != 0)
+ goto invalid;
+ up = REPEAT_INFINITE; /* {n,} : {n,infinite} */
+ }
+ }
+ else {
+ if (non_low != 0)
+ goto invalid;
+
+ PUNFETCH;
+ up = low; /* {n} : exact n times */
+ r = 2; /* fixed */
+ }
+
+ if (PEND) goto invalid;
+ PFETCH(c);
+ if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) {
+ if (c != MC_ESC(env->syntax)) goto invalid;
+ PFETCH(c);
+ }
+ if (c != '}') goto invalid;
+
+ if (!IS_REPEAT_INFINITE(up) && low > up) {
+ return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE;
+ }
+
+ tok->type = TK_INTERVAL;
+ tok->u.repeat.lower = low;
+ tok->u.repeat.upper = up;
+ *src = p;
+ return r; /* 0: normal {n,m}, 2: fixed {n} */
+
+ invalid:
+ if (syn_allow)
+ return 1; /* OK */
+ else
+ return ONIGERR_INVALID_REPEAT_RANGE_PATTERN;
+}
+
+/* \M-, \C-, \c, or \... */
+static int
+fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
+{
+ int v;
+ OnigCodePoint c;
+ OnigEncoding enc = env->enc;
+ UChar* p = *src;
+ PFETCH_READY;
+
+ if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
+
+ PFETCH(c);
+ switch (c) {
+ case 'M':
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) {
+ if (PEND) return ONIGERR_END_PATTERN_AT_META;
+ PFETCH(c);
+ if (c != '-') return ONIGERR_META_CODE_SYNTAX;
+ if (PEND) return ONIGERR_END_PATTERN_AT_META;
+ PFETCH(c);
+ if (c == MC_ESC(env->syntax)) {
+ v = fetch_escaped_value(&p, end, env);
+ if (v < 0) return v;
+ c = (OnigCodePoint )v;
+ }
+ c = ((c & 0xff) | 0x80);
+ }
+ else
+ goto backslash;
+ break;
+
+ case 'C':
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) {
+ if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
+ PFETCH(c);
+ if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX;
+ goto control;
+ }
+ else
+ goto backslash;
+
+ case 'c':
+ if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) {
+ control:
+ if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
+ PFETCH(c);
+ if (c == '?') {
+ c = 0177;
+ }
+ else {
+ if (c == MC_ESC(env->syntax)) {
+ v = fetch_escaped_value(&p, end, env);
+ if (v < 0) return v;
+ c = (OnigCodePoint )v;
+ }
+ c &= 0x9f;
+ }
+ break;
+ }
+ /* fall through */
+
+ default:
+ {
+ backslash:
+ c = conv_backslash_value(c, env);
+ }
+ break;
+ }
+
+ *src = p;
+ return c;
+}
+
+static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env);
+
+static OnigCodePoint
+get_name_end_code_point(OnigCodePoint start)
+{
+ switch (start) {
+ case '<': return (OnigCodePoint )'>'; break;
+ case '\'': return (OnigCodePoint )'\''; break;
+ default:
+ break;
+ }
+
+ return (OnigCodePoint )0;
+}
+
+#ifdef USE_NAMED_GROUP
+#ifdef USE_BACKREF_WITH_LEVEL
+/*
+ \k<name+n>, \k<name-n>
+ \k<num+n>, \k<num-n>
+ \k<-num+n>, \k<-num-n>
+*/
+static int
+fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
+ UChar** rname_end, ScanEnv* env,
+ int* rback_num, int* rlevel)
+{
+ int r, sign, is_num, exist_level;
+ OnigCodePoint end_code;
+ OnigCodePoint c = 0;
+ OnigEncoding enc = env->enc;
+ UChar *name_end;
+ UChar *pnum_head;
+ UChar *p = *src;
+ PFETCH_READY;
+
+ *rback_num = 0;
+ is_num = exist_level = 0;
+ sign = 1;
+ pnum_head = *src;
+
+ end_code = get_name_end_code_point(start_code);
+
+ name_end = end;
+ r = 0;
+ if (PEND) {
+ return ONIGERR_EMPTY_GROUP_NAME;
+ }
+ else {
+ PFETCH(c);
+ if (c == end_code)
+ return ONIGERR_EMPTY_GROUP_NAME;
+
+ if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
+ is_num = 1;
+ }
+ else if (c == '-') {
+ is_num = 2;
+ sign = -1;
+ pnum_head = p;
+ }
+ else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
+ r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
+ }
+ }
+
+ while (!PEND) {
+ name_end = p;
+ PFETCH(c);
+ if (c == end_code || c == ')' || c == '+' || c == '-') {
+ if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME;
+ break;
+ }
+
+ if (is_num != 0) {
+ if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
+ is_num = 1;
+ }
+ else {
+ r = ONIGERR_INVALID_GROUP_NAME;
+ is_num = 0;
+ }
+ }
+ else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
+ r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
+ }
+ }
+
+ if (r == 0 && c != end_code) {
+ if (c == '+' || c == '-') {
+ int level;
+ int flag = (c == '-' ? -1 : 1);
+
+ PFETCH(c);
+ if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err;
+ PUNFETCH;
+ level = onig_scan_unsigned_number(&p, end, enc);
+ if (level < 0) return ONIGERR_TOO_BIG_NUMBER;
+ *rlevel = (level * flag);
+ exist_level = 1;
+
+ PFETCH(c);
+ if (c == end_code)
+ goto end;
+ }
+
+ err:
+ r = ONIGERR_INVALID_GROUP_NAME;
+ name_end = end;
+ }
+
+ end:
+ if (r == 0) {
+ if (is_num != 0) {
+ *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
+ if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
+ else if (*rback_num == 0) goto err;
+
+ *rback_num *= sign;
+ }
+
+ *rname_end = name_end;
+ *src = p;
+ return (exist_level ? 1 : 0);
+ }
+ else {
+ onig_scan_env_set_error_string(env, r, *src, name_end);
+ return r;
+ }
+}
+#endif /* USE_BACKREF_WITH_LEVEL */
+
+/*
+ def: 0 -> define name (don't allow number name)
+ 1 -> reference name (allow number name)
+*/
+static int
+fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
+ UChar** rname_end, ScanEnv* env, int* rback_num, int ref)
+{
+ int r, is_num, sign;
+ OnigCodePoint end_code;
+ OnigCodePoint c = 0;
+ OnigEncoding enc = env->enc;
+ UChar *name_end;
+ UChar *pnum_head;
+ UChar *p = *src;
+ PFETCH_READY;
+
+ *rback_num = 0;
+
+ end_code = get_name_end_code_point(start_code);
+
+ name_end = end;
+ pnum_head = *src;
+ r = 0;
+ is_num = 0;
+ sign = 1;
+ if (PEND) {
+ return ONIGERR_EMPTY_GROUP_NAME;
+ }
+ else {
+ PFETCH(c);
+ if (c == end_code)
+ return ONIGERR_EMPTY_GROUP_NAME;
+
+ if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
+ if (ref == 1)
+ is_num = 1;
+ else {
+ r = ONIGERR_INVALID_GROUP_NAME;
+ is_num = 0;
+ }
+ }
+ else if (c == '-') {
+ if (ref == 1) {
+ is_num = 2;
+ sign = -1;
+ pnum_head = p;
+ }
+ else {
+ r = ONIGERR_INVALID_GROUP_NAME;
+ is_num = 0;
+ }
+ }
+ else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
+ r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
+ }
+ }
+
+ if (r == 0) {
+ while (!PEND) {
+ name_end = p;
+ PFETCH(c);
+ if (c == end_code || c == ')') {
+ if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME;
+ break;
+ }
+
+ if (is_num != 0) {
+ if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
+ is_num = 1;
+ }
+ else {
+ if (!ONIGENC_IS_CODE_WORD(enc, c))
+ r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
+ else
+ r = ONIGERR_INVALID_GROUP_NAME;
+
+ is_num = 0;
+ }
+ }
+ else {
+ if (!ONIGENC_IS_CODE_WORD(enc, c)) {
+ r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
+ }
+ }
+ }
+
+ if (c != end_code) {
+ r = ONIGERR_INVALID_GROUP_NAME;
+ name_end = end;
+ }
+
+ if (is_num != 0) {
+ *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
+ if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
+ else if (*rback_num == 0) {
+ r = ONIGERR_INVALID_GROUP_NAME;
+ goto err;
+ }
+
+ *rback_num *= sign;
+ }
+
+ *rname_end = name_end;
+ *src = p;
+ return 0;
+ }
+ else {
+ while (!PEND) {
+ name_end = p;
+ PFETCH(c);
+ if (c == end_code || c == ')')
+ break;
+ }
+ if (PEND)
+ name_end = end;
+
+ err:
+ onig_scan_env_set_error_string(env, r, *src, name_end);
+ return r;
+ }
+}
+#else
+static int
+fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
+ UChar** rname_end, ScanEnv* env, int* rback_num, int ref)
+{
+ int r, is_num, sign;
+ OnigCodePoint end_code;
+ OnigCodePoint c = 0;
+ UChar *name_end;
+ OnigEncoding enc = env->enc;
+ UChar *pnum_head;
+ UChar *p = *src;
+ PFETCH_READY;
+
+ *rback_num = 0;
+
+ end_code = get_name_end_code_point(start_code);
+
+ *rname_end = name_end = end;
+ r = 0;
+ pnum_head = *src;
+ is_num = 0;
+ sign = 1;
+
+ if (PEND) {
+ return ONIGERR_EMPTY_GROUP_NAME;
+ }
+ else {
+ PFETCH(c);
+ if (c == end_code)
+ return ONIGERR_EMPTY_GROUP_NAME;
+
+ if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
+ is_num = 1;
+ }
+ else if (c == '-') {
+ is_num = 2;
+ sign = -1;
+ pnum_head = p;
+ }
+ else {
+ r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
+ }
+ }
+
+ while (!PEND) {
+ name_end = p;
+
+ PFETCH(c);
+ if (c == end_code || c == ')') break;
+ if (! ONIGENC_IS_CODE_DIGIT(enc, c))
+ r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
+ }
+ if (r == 0 && c != end_code) {
+ r = ONIGERR_INVALID_GROUP_NAME;
+ name_end = end;
+ }
+
+ if (r == 0) {
+ *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
+ if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
+ else if (*rback_num == 0) {
+ r = ONIGERR_INVALID_GROUP_NAME;
+ goto err;
+ }
+ *rback_num *= sign;
+
+ *rname_end = name_end;
+ *src = p;
+ return 0;
+ }
+ else {
+ err:
+ onig_scan_env_set_error_string(env, r, *src, name_end);
+ return r;
+ }
+}
+#endif /* USE_NAMED_GROUP */
+
+void onig_vsnprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc,
+ UChar* pat, UChar* pat_end, const UChar *fmt, va_list args);
+
+static void
+onig_syntax_warn(ScanEnv *env, const char *fmt, ...)
+{
+ va_list args;
+ UChar buf[WARN_BUFSIZE];
+ va_start(args, fmt);
+ onig_vsnprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
+ env->pattern, env->pattern_end,
+ (const UChar *)fmt, args);
+ va_end(args);
+ if (env->sourcefile == NULL)
+ mrb_warn("%s", (char *)buf);
+ else
+ mrb_compile_warn(env->sourcefile, env->sourceline, "%s", (char *)buf);
+}
+
+static void
+CC_ESC_WARN(ScanEnv *env, UChar *c)
+{
+ if (onig_warn == onig_null_warn) return ;
+
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) &&
+ IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) {
+ onig_syntax_warn(env, "character class has '%s' without escape", c);
+ }
+}
+
+static void
+CLOSE_BRACKET_WITHOUT_ESC_WARN(ScanEnv* env, UChar* c)
+{
+ if (onig_warn == onig_null_warn) return ;
+
+ if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) {
+ onig_syntax_warn(env, "regular expression has '%s' without escape", c);
+ }
+}
+
+static void
+CC_DUP_WARN(ScanEnv *env)
+{
+ if (onig_warn == onig_null_warn /*|| !RTEST(ruby_verbose)*/) return ;
+
+ if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_DUP) &&
+ !((env)->warnings_flag & ONIG_SYN_WARN_CC_DUP)) {
+ (env)->warnings_flag |= ONIG_SYN_WARN_CC_DUP;
+ onig_syntax_warn(env, "character class has duplicated range");
+ }
+}
+
+static void
+UNKNOWN_ESC_WARN(ScanEnv *env, int c)
+{
+ if (onig_warn == onig_null_warn /*|| !RTEST(ruby_verbose)*/) return ;
+ onig_syntax_warn(env, "Unknown escape \\%c is ignored", c);
+}
+
+static UChar*
+find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,
+ UChar **next, OnigEncoding enc)
+{
+ int i;
+ OnigCodePoint x;
+ UChar *q;
+ UChar *p = from;
+
+ while (p < to) {
+ x = ONIGENC_MBC_TO_CODE(enc, p, to);
+ q = p + enclen(enc, p, to);
+ if (x == s[0]) {
+ for (i = 1; i < n && q < to; i++) {
+ x = ONIGENC_MBC_TO_CODE(enc, q, to);
+ if (x != s[i]) break;
+ q += enclen(enc, q, to);
+ }
+ if (i >= n) {
+ if (IS_NOT_NULL(next))
+ *next = q;
+ return p;
+ }
+ }
+ p = q;
+ }
+ return NULL_UCHARP;
+}
+
+static int
+str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,
+ OnigCodePoint bad, OnigEncoding enc, const OnigSyntaxType* syn)
+{
+ int i, in_esc;
+ OnigCodePoint x;
+ UChar *q;
+ UChar *p = from;
+
+ in_esc = 0;
+ while (p < to) {
+ if (in_esc) {
+ in_esc = 0;
+ p += enclen(enc, p, to);
+ }
+ else {
+ x = ONIGENC_MBC_TO_CODE(enc, p, to);
+ q = p + enclen(enc, p, to);
+ if (x == s[0]) {
+ for (i = 1; i < n && q < to; i++) {
+ x = ONIGENC_MBC_TO_CODE(enc, q, to);
+ if (x != s[i]) break;
+ q += enclen(enc, q, to);
+ }
+ if (i >= n) return 1;
+ p += enclen(enc, p, to);
+ }
+ else {
+ x = ONIGENC_MBC_TO_CODE(enc, p, to);
+ if (x == bad) return 0;
+ else if (x == MC_ESC(syn)) in_esc = 1;
+ p = q;
+ }
+ }
+ }
+ return 0;
+}
+
+static int
+fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
+{
+ int num;
+ OnigCodePoint c, c2;
+ const OnigSyntaxType* syn = env->syntax;
+ OnigEncoding enc = env->enc;
+ UChar* prev;
+ UChar* p = *src;
+ PFETCH_READY;
+
+ if (PEND) {
+ tok->type = TK_EOT;
+ return tok->type;
+ }
+
+ PFETCH(c);
+ tok->type = TK_CHAR;
+ tok->base = 0;
+ tok->u.c = c;
+ tok->escaped = 0;
+
+ if (c == ']') {
+ tok->type = TK_CC_CLOSE;
+ }
+ else if (c == '-') {
+ tok->type = TK_CC_RANGE;
+ }
+ else if (c == MC_ESC(syn)) {
+ if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC))
+ goto end;
+
+ if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
+
+ PFETCH(c);
+ tok->escaped = 1;
+ tok->u.c = c;
+ switch (c) {
+ case 'w':
+ tok->type = TK_CHAR_TYPE;
+ tok->u.prop.ctype = ONIGENC_CTYPE_W;
+ tok->u.prop.not = 0;
+ break;
+ case 'W':
+ tok->type = TK_CHAR_TYPE;
+ tok->u.prop.ctype = ONIGENC_CTYPE_W;
+ tok->u.prop.not = 1;
+ break;
+ case 'd':
+ tok->type = TK_CHAR_TYPE;
+ tok->u.prop.ctype = ONIGENC_CTYPE_D;
+ tok->u.prop.not = 0;
+ break;
+ case 'D':
+ tok->type = TK_CHAR_TYPE;
+ tok->u.prop.ctype = ONIGENC_CTYPE_D;
+ tok->u.prop.not = 1;
+ break;
+ case 's':
+ tok->type = TK_CHAR_TYPE;
+ tok->u.prop.ctype = ONIGENC_CTYPE_S;
+ tok->u.prop.not = 0;
+ break;
+ case 'S':
+ tok->type = TK_CHAR_TYPE;
+ tok->u.prop.ctype = ONIGENC_CTYPE_S;
+ tok->u.prop.not = 1;
+ break;
+ case 'h':
+ if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
+ tok->type = TK_CHAR_TYPE;
+ tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
+ tok->u.prop.not = 0;
+ break;
+ case 'H':
+ if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
+ tok->type = TK_CHAR_TYPE;
+ tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
+ tok->u.prop.not = 1;
+ break;
+
+ case 'p':
+ case 'P':
+ c2 = PPEEK;
+ if (c2 == '{' &&
+ IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
+ PINC;
+ tok->type = TK_CHAR_PROPERTY;
+ tok->u.prop.not = (c == 'P' ? 1 : 0);
+
+ if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
+ PFETCH(c2);
+ if (c2 == '^') {
+ tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
+ }
+ else
+ PUNFETCH;
+ }
+ }
+ else {
+ onig_syntax_warn(env, "invalid Unicode Property \\%c", c);
+ }
+ break;
+
+ case 'x':
+ if (PEND) break;
+
+ prev = p;
+ if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
+ PINC;
+ num = scan_unsigned_hexadecimal_number(&p, end, 8, enc);
+ if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
+ if (!PEND) {
+ c2 = PPEEK;
+ if (ONIGENC_IS_CODE_XDIGIT(enc, c2))
+ return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
+ }
+
+ if (p > prev + enclen(enc, prev, end) && !PEND && (PPEEK_IS('}'))) {
+ PINC;
+ tok->type = TK_CODE_POINT;
+ tok->base = 16;
+ tok->u.code = (OnigCodePoint )num;
+ }
+ else {
+ /* can't read nothing or invalid format */
+ p = prev;
+ }
+ }
+ else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
+ num = scan_unsigned_hexadecimal_number(&p, end, 2, enc);
+ if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
+ if (p == prev) { /* can't read nothing. */
+ num = 0; /* but, it's not error */
+ }
+ tok->type = TK_RAW_BYTE;
+ tok->base = 16;
+ tok->u.c = num;
+ }
+ break;
+
+ case 'u':
+ if (PEND) break;
+
+ prev = p;
+ if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
+ num = scan_unsigned_hexadecimal_number(&p, end, 4, enc);
+ if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
+ if (p == prev) { /* can't read nothing. */
+ num = 0; /* but, it's not error */
+ }
+ tok->type = TK_CODE_POINT;
+ tok->base = 16;
+ tok->u.code = (OnigCodePoint )num;
+ }
+ break;
+
+ case '0':
+ case '1': case '2': case '3': case '4': case '5': case '6': case '7':
+ if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
+ PUNFETCH;
+ prev = p;
+ num = scan_unsigned_octal_number(&p, end, 3, enc);
+ if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
+ if (p == prev) { /* can't read nothing. */
+ num = 0; /* but, it's not error */
+ }
+ tok->type = TK_RAW_BYTE;
+ tok->base = 8;
+ tok->u.c = num;
+ }
+ break;
+
+ default:
+ PUNFETCH;
+ num = fetch_escaped_value(&p, end, env);
+ if (num < 0) return num;
+ if (tok->u.c != num) {
+ tok->u.code = (OnigCodePoint )num;
+ tok->type = TK_CODE_POINT;
+ }
+ break;
+ }
+ }
+ else if (c == '[') {
+ if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) {
+ OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' };
+ tok->backp = p; /* point at '[' is readed */
+ PINC;
+ if (str_exist_check_with_esc(send, 2, p, end,
+ (OnigCodePoint )']', enc, syn)) {
+ tok->type = TK_POSIX_BRACKET_OPEN;
+ }
+ else {
+ PUNFETCH;
+ goto cc_in_cc;
+ }
+ }
+ else {
+ cc_in_cc:
+ if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) {
+ tok->type = TK_CC_CC_OPEN;
+ }
+ else {
+ CC_ESC_WARN(env, (UChar* )"[");
+ }
+ }
+ }
+ else if (c == '&') {
+ if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) &&
+ !PEND && (PPEEK_IS('&'))) {
+ PINC;
+ tok->type = TK_CC_AND;
+ }
+ }
+
+ end:
+ *src = p;
+ return tok->type;
+}
+
+static int
+fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
+{
+ int r, num;
+ OnigCodePoint c;
+ OnigEncoding enc = env->enc;
+ const OnigSyntaxType* syn = env->syntax;
+ UChar* prev;
+ UChar* p = *src;
+ PFETCH_READY;
+
+ start:
+ if (PEND) {
+ tok->type = TK_EOT;
+ return tok->type;
+ }
+
+ tok->type = TK_STRING;
+ tok->base = 0;
+ tok->backp = p;
+
+ PFETCH(c);
+ if (IS_MC_ESC_CODE(c, syn)) {
+ if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
+
+ tok->backp = p;
+ PFETCH(c);
+
+ tok->u.c = c;
+ tok->escaped = 1;
+ switch (c) {
+ case '*':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break;
+ tok->type = TK_OP_REPEAT;
+ tok->u.repeat.lower = 0;
+ tok->u.repeat.upper = REPEAT_INFINITE;
+ goto greedy_check;
+ break;
+
+ case '+':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break;
+ tok->type = TK_OP_REPEAT;
+ tok->u.repeat.lower = 1;
+ tok->u.repeat.upper = REPEAT_INFINITE;
+ goto greedy_check;
+ break;
+
+ case '?':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break;
+ tok->type = TK_OP_REPEAT;
+ tok->u.repeat.lower = 0;
+ tok->u.repeat.upper = 1;
+ greedy_check:
+ if (!PEND && PPEEK_IS('?') &&
+ IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) {
+ PFETCH(c);
+ tok->u.repeat.greedy = 0;
+ tok->u.repeat.possessive = 0;
+ }
+ else {
+ possessive_check:
+ if (!PEND && PPEEK_IS('+') &&
+ ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) &&
+ tok->type != TK_INTERVAL) ||
+ (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) &&
+ tok->type == TK_INTERVAL))) {
+ PFETCH(c);
+ tok->u.repeat.greedy = 1;
+ tok->u.repeat.possessive = 1;
+ }
+ else {
+ tok->u.repeat.greedy = 1;
+ tok->u.repeat.possessive = 0;
+ }
+ }
+ break;
+
+ case '{':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break;
+ r = fetch_range_quantifier(&p, end, tok, env);
+ if (r < 0) return r; /* error */
+ if (r == 0) goto greedy_check;
+ else if (r == 2) { /* {n} */
+ if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
+ goto possessive_check;
+
+ goto greedy_check;
+ }
+ /* r == 1 : normal char */
+ break;
+
+ case '|':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break;
+ tok->type = TK_ALT;
+ break;
+
+ case '(':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
+ tok->type = TK_SUBEXP_OPEN;
+ break;
+
+ case ')':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
+ tok->type = TK_SUBEXP_CLOSE;
+ break;
+
+ case 'w':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
+ tok->type = TK_CHAR_TYPE;
+ tok->u.prop.ctype = ONIGENC_CTYPE_W;
+ tok->u.prop.not = 0;
+ break;
+
+ case 'W':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
+ tok->type = TK_CHAR_TYPE;
+ tok->u.prop.ctype = ONIGENC_CTYPE_W;
+ tok->u.prop.not = 1;
+ break;
+
+ case 'b':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
+ tok->type = TK_ANCHOR;
+ tok->u.anchor = ANCHOR_WORD_BOUND;
+ break;
+
+ case 'B':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
+ tok->type = TK_ANCHOR;
+ tok->u.anchor = ANCHOR_NOT_WORD_BOUND;
+ break;
+
+#ifdef USE_WORD_BEGIN_END
+ case '<':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
+ tok->type = TK_ANCHOR;
+ tok->u.anchor = ANCHOR_WORD_BEGIN;
+ break;
+
+ case '>':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
+ tok->type = TK_ANCHOR;
+ tok->u.anchor = ANCHOR_WORD_END;
+ break;
+#endif
+
+ case 's':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
+ tok->type = TK_CHAR_TYPE;
+ tok->u.prop.ctype = ONIGENC_CTYPE_S;
+ tok->u.prop.not = 0;
+ break;
+
+ case 'S':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
+ tok->type = TK_CHAR_TYPE;
+ tok->u.prop.ctype = ONIGENC_CTYPE_S;
+ tok->u.prop.not = 1;
+ break;
+
+ case 'd':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
+ tok->type = TK_CHAR_TYPE;
+ tok->u.prop.ctype = ONIGENC_CTYPE_D;
+ tok->u.prop.not = 0;
+ break;
+
+ case 'D':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
+ tok->type = TK_CHAR_TYPE;
+ tok->u.prop.ctype = ONIGENC_CTYPE_D;
+ tok->u.prop.not = 1;
+ break;
+
+ case 'h':
+ if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
+ tok->type = TK_CHAR_TYPE;
+ tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
+ tok->u.prop.not = 0;
+ break;
+
+ case 'H':
+ if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
+ tok->type = TK_CHAR_TYPE;
+ tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
+ tok->u.prop.not = 1;
+ break;
+
+ case 'A':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
+ begin_buf:
+ tok->type = TK_ANCHOR;
+ tok->u.subtype = ANCHOR_BEGIN_BUF;
+ break;
+
+ case 'Z':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
+ tok->type = TK_ANCHOR;
+ tok->u.subtype = ANCHOR_SEMI_END_BUF;
+ break;
+
+ case 'z':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
+ end_buf:
+ tok->type = TK_ANCHOR;
+ tok->u.subtype = ANCHOR_END_BUF;
+ break;
+
+ case 'G':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break;
+ tok->type = TK_ANCHOR;
+ tok->u.subtype = ANCHOR_BEGIN_POSITION;
+ break;
+
+ case '`':
+ if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
+ goto begin_buf;
+ break;
+
+ case '\'':
+ if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
+ goto end_buf;
+ break;
+
+ case 'x':
+ if (PEND) break;
+
+ prev = p;
+ if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
+ PINC;
+ num = scan_unsigned_hexadecimal_number(&p, end, 8, enc);
+ if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
+ if (!PEND) {
+ if (ONIGENC_IS_CODE_XDIGIT(enc, PPEEK))
+ return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
+ }
+
+ if ((p > prev + enclen(enc, prev, end)) && !PEND && PPEEK_IS('}')) {
+ PINC;
+ tok->type = TK_CODE_POINT;
+ tok->u.code = (OnigCodePoint )num;
+ }
+ else {
+ /* can't read nothing or invalid format */
+ p = prev;
+ }
+ }
+ else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
+ num = scan_unsigned_hexadecimal_number(&p, end, 2, enc);
+ if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
+ if (p == prev) { /* can't read nothing. */
+ num = 0; /* but, it's not error */
+ }
+ tok->type = TK_RAW_BYTE;
+ tok->base = 16;
+ tok->u.c = num;
+ }
+ break;
+
+ case 'u':
+ if (PEND) break;
+
+ prev = p;
+ if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
+ num = scan_unsigned_hexadecimal_number(&p, end, 4, enc);
+ if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
+ if (p == prev) { /* can't read nothing. */
+ num = 0; /* but, it's not error */
+ }
+ tok->type = TK_CODE_POINT;
+ tok->base = 16;
+ tok->u.code = (OnigCodePoint )num;
+ }
+ break;
+
+ case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ PUNFETCH;
+ prev = p;
+ num = onig_scan_unsigned_number(&p, end, enc);
+ if (num < 0 || num > ONIG_MAX_BACKREF_NUM) {
+ goto skip_backref;
+ }
+
+ if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) &&
+ (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */
+ if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
+ if (num > env->num_mem || IS_NULL(SCANENV_MEM_NODES(env)[num]))
+ return ONIGERR_INVALID_BACKREF;
+ }
+
+ tok->type = TK_BACKREF;
+ tok->u.backref.num = 1;
+ tok->u.backref.ref1 = num;
+ tok->u.backref.by_name = 0;
+#ifdef USE_BACKREF_WITH_LEVEL
+ tok->u.backref.exist_level = 0;
+#endif
+ break;
+ }
+
+ skip_backref:
+ if (c == '8' || c == '9') {
+ /* normal char */
+ p = prev; PINC;
+ break;
+ }
+
+ p = prev;
+ /* fall through */
+ case '0':
+ if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
+ prev = p;
+ num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc);
+ if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
+ if (p == prev) { /* can't read nothing. */
+ num = 0; /* but, it's not error */
+ }
+ tok->type = TK_RAW_BYTE;
+ tok->base = 8;
+ tok->u.c = num;
+ }
+ else if (c != '0') {
+ PINC;
+ }
+ break;
+
+#ifdef USE_NAMED_GROUP
+ case 'k':
+ if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) {
+ PFETCH(c);
+ if (c == '<' || c == '\'') {
+ UChar* name_end;
+ int* backs;
+ int back_num;
+
+ prev = p;
+
+#ifdef USE_BACKREF_WITH_LEVEL
+ name_end = NULL_UCHARP; /* no need. escape gcc warning. */
+ r = fetch_name_with_level((OnigCodePoint )c, &p, end, &name_end,
+ env, &back_num, &tok->u.backref.level);
+ if (r == 1) tok->u.backref.exist_level = 1;
+ else tok->u.backref.exist_level = 0;
+#else
+ r = fetch_name(&p, end, &name_end, env, &back_num, 1);
+#endif
+ if (r < 0) return r;
+
+ if (back_num != 0) {
+ if (back_num < 0) {
+ back_num = BACKREF_REL_TO_ABS(back_num, env);
+ if (back_num <= 0)
+ return ONIGERR_INVALID_BACKREF;
+ }
+
+ if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
+ if (back_num > env->num_mem ||
+ IS_NULL(SCANENV_MEM_NODES(env)[back_num]))
+ return ONIGERR_INVALID_BACKREF;
+ }
+ tok->type = TK_BACKREF;
+ tok->u.backref.by_name = 0;
+ tok->u.backref.num = 1;
+ tok->u.backref.ref1 = back_num;
+ }
+ else {
+ num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);
+ if (num <= 0) {
+ onig_scan_env_set_error_string(env,
+ ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);
+ return ONIGERR_UNDEFINED_NAME_REFERENCE;
+ }
+ if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
+ int i;
+ for (i = 0; i < num; i++) {
+ if (backs[i] > env->num_mem ||
+ IS_NULL(SCANENV_MEM_NODES(env)[backs[i]]))
+ return ONIGERR_INVALID_BACKREF;
+ }
+ }
+
+ tok->type = TK_BACKREF;
+ tok->u.backref.by_name = 1;
+ if (num == 1) {
+ tok->u.backref.num = 1;
+ tok->u.backref.ref1 = backs[0];
+ }
+ else {
+ tok->u.backref.num = num;
+ tok->u.backref.refs = backs;
+ }
+ }
+ }
+ else {
+ PUNFETCH;
+ onig_syntax_warn(env, "invalid back reference");
+ }
+ }
+ break;
+#endif
+
+#ifdef USE_SUBEXP_CALL
+ case 'g':
+ if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) {
+ PFETCH(c);
+ if (c == '<' || c == '\'') {
+ int gnum;
+ UChar* name_end;
+
+ prev = p;
+ r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &gnum, 1);
+ if (r < 0) return r;
+
+ tok->type = TK_CALL;
+ tok->u.call.name = prev;
+ tok->u.call.name_end = name_end;
+ tok->u.call.gnum = gnum;
+ }
+ else {
+ onig_syntax_warn(env, "invalid subexp call");
+ PUNFETCH;
+ }
+ }
+ break;
+#endif
+
+ case 'Q':
+ if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)) {
+ tok->type = TK_QUOTE_OPEN;
+ }
+ break;
+
+ case 'p':
+ case 'P':
+ if (PPEEK_IS('{') &&
+ IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
+ PINC;
+ tok->type = TK_CHAR_PROPERTY;
+ tok->u.prop.not = (c == 'P' ? 1 : 0);
+
+ if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
+ PFETCH(c);
+ if (c == '^') {
+ tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
+ }
+ else
+ PUNFETCH;
+ }
+ }
+ else {
+ onig_syntax_warn(env, "invalid Unicode Property \\%c", c);
+ }
+ break;
+
+ default:
+ PUNFETCH;
+ num = fetch_escaped_value(&p, end, env);
+ if (num < 0) return num;
+ /* set_raw: */
+ if (tok->u.c != num) {
+ tok->type = TK_CODE_POINT;
+ tok->u.code = (OnigCodePoint )num;
+ }
+ else { /* string */
+ p = tok->backp + enclen(enc, tok->backp, end);
+ }
+ break;
+ }
+ }
+ else {
+ tok->u.c = c;
+ tok->escaped = 0;
+
+#ifdef USE_VARIABLE_META_CHARS
+ if ((c != ONIG_INEFFECTIVE_META_CHAR) &&
+ IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) {
+ if (c == MC_ANYCHAR(syn))
+ goto any_char;
+ else if (c == MC_ANYTIME(syn))
+ goto anytime;
+ else if (c == MC_ZERO_OR_ONE_TIME(syn))
+ goto zero_or_one_time;
+ else if (c == MC_ONE_OR_MORE_TIME(syn))
+ goto one_or_more_time;
+ else if (c == MC_ANYCHAR_ANYTIME(syn)) {
+ tok->type = TK_ANYCHAR_ANYTIME;
+ goto out;
+ }
+ }
+#endif
+
+ switch (c) {
+ case '.':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break;
+#ifdef USE_VARIABLE_META_CHARS
+ any_char:
+#endif
+ tok->type = TK_ANYCHAR;
+ break;
+
+ case '*':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break;
+#ifdef USE_VARIABLE_META_CHARS
+ anytime:
+#endif
+ tok->type = TK_OP_REPEAT;
+ tok->u.repeat.lower = 0;
+ tok->u.repeat.upper = REPEAT_INFINITE;
+ goto greedy_check;
+ break;
+
+ case '+':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break;
+#ifdef USE_VARIABLE_META_CHARS
+ one_or_more_time:
+#endif
+ tok->type = TK_OP_REPEAT;
+ tok->u.repeat.lower = 1;
+ tok->u.repeat.upper = REPEAT_INFINITE;
+ goto greedy_check;
+ break;
+
+ case '?':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break;
+#ifdef USE_VARIABLE_META_CHARS
+ zero_or_one_time:
+#endif
+ tok->type = TK_OP_REPEAT;
+ tok->u.repeat.lower = 0;
+ tok->u.repeat.upper = 1;
+ goto greedy_check;
+ break;
+
+ case '{':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;
+ r = fetch_range_quantifier(&p, end, tok, env);
+ if (r < 0) return r; /* error */
+ if (r == 0) goto greedy_check;
+ else if (r == 2) { /* {n} */
+ if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
+ goto possessive_check;
+
+ goto greedy_check;
+ }
+ /* r == 1 : normal char */
+ break;
+
+ case '|':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break;
+ tok->type = TK_ALT;
+ break;
+
+ case '(':
+ if (PPEEK_IS('?') &&
+ IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
+ PINC;
+ if (PPEEK_IS('#')) {
+ PFETCH(c);
+ while (1) {
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
+ PFETCH(c);
+ if (c == MC_ESC(syn)) {
+ if (!PEND) PFETCH(c);
+ }
+ else {
+ if (c == ')') break;
+ }
+ }
+ goto start;
+ }
+ PUNFETCH;
+ }
+
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
+ tok->type = TK_SUBEXP_OPEN;
+ break;
+
+ case ')':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
+ tok->type = TK_SUBEXP_CLOSE;
+ break;
+
+ case '^':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
+ tok->type = TK_ANCHOR;
+ tok->u.subtype = (IS_SINGLELINE(env->option)
+ ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE);
+ break;
+
+ case '$':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
+ tok->type = TK_ANCHOR;
+ tok->u.subtype = (IS_SINGLELINE(env->option)
+ ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE);
+ break;
+
+ case '[':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break;
+ tok->type = TK_CC_OPEN;
+ break;
+
+ case ']':
+ if (*src > env->pattern) /* /].../ is allowed. */
+ CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]");
+ break;
+
+ case '#':
+ if (IS_EXTEND(env->option)) {
+ while (!PEND) {
+ PFETCH(c);
+ if (ONIGENC_IS_CODE_NEWLINE(enc, c))
+ break;
+ }
+ goto start;
+ break;
+ }
+ break;
+
+ case ' ': case '\t': case '\n': case '\r': case '\f':
+ if (IS_EXTEND(env->option))
+ goto start;
+ break;
+
+ default:
+ /* string */
+ break;
+ }
+ }
+
+#ifdef USE_VARIABLE_META_CHARS
+ out:
+#endif
+ *src = p;
+ return tok->type;
+}
+
+static int
+add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,
+ ScanEnv* env,
+ OnigCodePoint sb_out, const OnigCodePoint mbr[])
+{
+ int i, r;
+ OnigCodePoint j;
+
+ int n = ONIGENC_CODE_RANGE_NUM(mbr);
+
+ if (not == 0) {
+ for (i = 0; i < n; i++) {
+ for (j = ONIGENC_CODE_RANGE_FROM(mbr, i);
+ j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {
+ if (j >= sb_out) {
+ if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {
+ r = add_code_range_to_buf(&(cc->mbuf), env, j,
+ ONIGENC_CODE_RANGE_TO(mbr, i));
+ if (r != 0) return r;
+ i++;
+ }
+
+ goto sb_end;
+ }
+ BITSET_SET_BIT_CHKDUP(cc->bs, j);
+ }
+ }
+
+ sb_end:
+ for ( ; i < n; i++) {
+ r = add_code_range_to_buf(&(cc->mbuf), env,
+ ONIGENC_CODE_RANGE_FROM(mbr, i),
+ ONIGENC_CODE_RANGE_TO(mbr, i));
+ if (r != 0) return r;
+ }
+ }
+ else {
+ OnigCodePoint prev = 0;
+
+ for (i = 0; i < n; i++) {
+ for (j = prev;
+ j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) {
+ if (j >= sb_out) {
+ goto sb_end2;
+ }
+ BITSET_SET_BIT_CHKDUP(cc->bs, j);
+ }
+ prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
+ }
+ for (j = prev; j < sb_out; j++) {
+ BITSET_SET_BIT_CHKDUP(cc->bs, j);
+ }
+
+ sb_end2:
+ prev = sb_out;
+
+ for (i = 0; i < n; i++) {
+ if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {
+ r = add_code_range_to_buf(&(cc->mbuf), env, prev,
+ ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);
+ if (r != 0) return r;
+ }
+ prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
+ }
+ if (prev < 0x7fffffff) {
+ r = add_code_range_to_buf(&(cc->mbuf), env, prev, 0x7fffffff);
+ if (r != 0) return r;
+ }
+ }
+
+ return 0;
+}
+
+static int
+add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
+{
+ int c, r;
+ const OnigCodePoint *ranges;
+ OnigCodePoint sb_out;
+ OnigEncoding enc = env->enc;
+
+ switch (ctype) {
+ case ONIGENC_CTYPE_D:
+ case ONIGENC_CTYPE_S:
+ case ONIGENC_CTYPE_W:
+ ctype ^= ONIGENC_CTYPE_SPECIAL_MASK;
+ if (not != 0) {
+ for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
+ if (! ONIGENC_IS_ASCII_CODE_CTYPE((OnigCodePoint )c, ctype))
+ BITSET_SET_BIT_CHKDUP(cc->bs, c);
+ }
+ ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
+ }
+ else {
+ for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
+ if (ONIGENC_IS_ASCII_CODE_CTYPE((OnigCodePoint )c, ctype))
+ BITSET_SET_BIT_CHKDUP(cc->bs, c);
+ }
+ }
+ return 0;
+ break;
+ }
+
+ r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);
+ if (r == 0) {
+ return add_ctype_to_cc_by_range(cc, ctype, not, env, sb_out, ranges);
+ }
+ else if (r != ONIG_NO_SUPPORT_CONFIG) {
+ return r;
+ }
+
+ r = 0;
+ switch (ctype) {
+ case ONIGENC_CTYPE_ALPHA:
+ case ONIGENC_CTYPE_BLANK:
+ case ONIGENC_CTYPE_CNTRL:
+ case ONIGENC_CTYPE_DIGIT:
+ case ONIGENC_CTYPE_LOWER:
+ case ONIGENC_CTYPE_PUNCT:
+ case ONIGENC_CTYPE_SPACE:
+ case ONIGENC_CTYPE_UPPER:
+ case ONIGENC_CTYPE_XDIGIT:
+ case ONIGENC_CTYPE_ASCII:
+ case ONIGENC_CTYPE_ALNUM:
+ if (not != 0) {
+ for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
+ if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
+ BITSET_SET_BIT_CHKDUP(cc->bs, c);
+ }
+ ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
+ }
+ else {
+ for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
+ if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
+ BITSET_SET_BIT_CHKDUP(cc->bs, c);
+ }
+ }
+ break;
+
+ case ONIGENC_CTYPE_GRAPH:
+ case ONIGENC_CTYPE_PRINT:
+ if (not != 0) {
+ for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
+ if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
+ BITSET_SET_BIT_CHKDUP(cc->bs, c);
+ }
+ }
+ else {
+ for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
+ if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
+ BITSET_SET_BIT_CHKDUP(cc->bs, c);
+ }
+ ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
+ }
+ break;
+
+ case ONIGENC_CTYPE_WORD:
+ if (not == 0) {
+ for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
+ if (IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT_CHKDUP(cc->bs, c);
+ }
+ ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
+ }
+ else {
+ for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
+ if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* check invalid code point */
+ && ! ONIGENC_IS_CODE_WORD(enc, c))
+ BITSET_SET_BIT_CHKDUP(cc->bs, c);
+ }
+ }
+ break;
+
+ default:
+ return ONIGERR_PARSER_BUG;
+ break;
+ }
+
+ return r;
+}
+
+static int
+parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
+{
+#define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20
+#define POSIX_BRACKET_NAME_MIN_LEN 4
+
+ static const PosixBracketEntryType PBS[] = {
+ { (UChar* )"alnum", ONIGENC_CTYPE_ALNUM, 5 },
+ { (UChar* )"alpha", ONIGENC_CTYPE_ALPHA, 5 },
+ { (UChar* )"blank", ONIGENC_CTYPE_BLANK, 5 },
+ { (UChar* )"cntrl", ONIGENC_CTYPE_CNTRL, 5 },
+ { (UChar* )"digit", ONIGENC_CTYPE_DIGIT, 5 },
+ { (UChar* )"graph", ONIGENC_CTYPE_GRAPH, 5 },
+ { (UChar* )"lower", ONIGENC_CTYPE_LOWER, 5 },
+ { (UChar* )"print", ONIGENC_CTYPE_PRINT, 5 },
+ { (UChar* )"punct", ONIGENC_CTYPE_PUNCT, 5 },
+ { (UChar* )"space", ONIGENC_CTYPE_SPACE, 5 },
+ { (UChar* )"upper", ONIGENC_CTYPE_UPPER, 5 },
+ { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 },
+ { (UChar* )"ascii", ONIGENC_CTYPE_ASCII, 5 },
+ { (UChar* )"word", ONIGENC_CTYPE_WORD, 4 },
+ { (UChar* )NULL, -1, 0 }
+ };
+
+ const PosixBracketEntryType *pb;
+ int not, i, r;
+ OnigCodePoint c;
+ OnigEncoding enc = env->enc;
+ UChar *p = *src;
+ PFETCH_READY;
+
+ if (PPEEK_IS('^')) {
+ PINC;
+ not = 1;
+ }
+ else
+ not = 0;
+
+ if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3)
+ goto not_posix_bracket;
+
+ for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
+ if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {
+ p = (UChar* )onigenc_step(enc, p, end, pb->len);
+ if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)
+ return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
+
+ r = add_ctype_to_cc(cc, pb->ctype, not, env);
+ if (r != 0) return r;
+
+ PINC; PINC;
+ *src = p;
+ return 0;
+ }
+ }
+
+ not_posix_bracket:
+ c = 0;
+ i = 0;
+ while (!PEND && ((c = PPEEK) != ':') && c != ']') {
+ PINC;
+ if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break;
+ }
+ if (c == ':' && ! PEND) {
+ PINC;
+ if (! PEND) {
+ PFETCH(c);
+ if (c == ']')
+ return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
+ }
+ }
+
+ return 1; /* 1: is not POSIX bracket, but no error. */
+}
+
+static int
+fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)
+{
+ int r;
+ OnigCodePoint c;
+ OnigEncoding enc = env->enc;
+ UChar *prev, *start, *p = *src;
+ PFETCH_READY;
+
+ r = 0;
+ start = prev = p;
+
+ while (!PEND) {
+ prev = p;
+ PFETCH(c);
+ if (c == '}') {
+ r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev);
+ if (r < 0) break;
+
+ *src = p;
+ return r;
+ }
+ else if (c == '(' || c == ')' || c == '{' || c == '|') {
+ r = ONIGERR_INVALID_CHAR_PROPERTY_NAME;
+ break;
+ }
+ }
+
+ onig_scan_env_set_error_string(env, r, *src, prev);
+ return r;
+}
+
+static int
+parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end,
+ ScanEnv* env)
+{
+ int r, ctype;
+ CClassNode* cc;
+
+ ctype = fetch_char_property_to_ctype(src, end, env);
+ if (ctype < 0) return ctype;
+
+ *np = node_new_cclass();
+ CHECK_NULL_RETURN_MEMERR(*np);
+ cc = NCCLASS(*np);
+ r = add_ctype_to_cc(cc, ctype, 0, env);
+ if (r != 0) return r;
+ if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
+
+ return 0;
+}
+
+
+enum CCSTATE {
+ CCS_VALUE,
+ CCS_RANGE,
+ CCS_COMPLETE,
+ CCS_START
+};
+
+enum CCVALTYPE {
+ CCV_SB,
+ CCV_CODE_POINT,
+ CCV_CLASS
+};
+
+static int
+next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,
+ enum CCSTATE* state, ScanEnv* env)
+{
+ int r;
+
+ if (*state == CCS_RANGE)
+ return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE;
+
+ if (*state == CCS_VALUE && *type != CCV_CLASS) {
+ if (*type == CCV_SB)
+ BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs));
+ else if (*type == CCV_CODE_POINT) {
+ r = add_code_range(&(cc->mbuf), env, *vs, *vs);
+ if (r < 0) return r;
+ }
+ }
+
+ *state = CCS_VALUE;
+ *type = CCV_CLASS;
+ return 0;
+}
+
+static int
+next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
+ int* vs_israw, int v_israw,
+ enum CCVALTYPE intype, enum CCVALTYPE* type,
+ enum CCSTATE* state, ScanEnv* env)
+{
+ int r;
+
+ switch (*state) {
+ case CCS_VALUE:
+ if (*type == CCV_SB)
+ BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs));
+ else if (*type == CCV_CODE_POINT) {
+ r = add_code_range(&(cc->mbuf), env, *vs, *vs);
+ if (r < 0) return r;
+ }
+ break;
+
+ case CCS_RANGE:
+ if (intype == *type) {
+ if (intype == CCV_SB) {
+ if (*vs > 0xff || v > 0xff)
+ return ONIGERR_INVALID_CODE_POINT_VALUE;
+
+ if (*vs > v) {
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
+ goto ccs_range_end;
+ else
+ return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
+ }
+ bitset_set_range(env, cc->bs, (int )*vs, (int )v);
+ }
+ else {
+ r = add_code_range(&(cc->mbuf), env, *vs, v);
+ if (r < 0) return r;
+ }
+ }
+ else {
+ if (*vs > v) {
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
+ goto ccs_range_end;
+ else
+ return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
+ }
+ bitset_set_range(env, cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff));
+ r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v);
+ if (r < 0) return r;
+ }
+ ccs_range_end:
+ *state = CCS_COMPLETE;
+ break;
+
+ case CCS_COMPLETE:
+ case CCS_START:
+ *state = CCS_VALUE;
+ break;
+
+ default:
+ break;
+ }
+
+ *vs_israw = v_israw;
+ *vs = v;
+ *type = intype;
+ return 0;
+}
+
+static int
+code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
+ ScanEnv* env)
+{
+ int in_esc;
+ OnigCodePoint code;
+ OnigEncoding enc = env->enc;
+ UChar* p = from;
+ PFETCH_READY;
+
+ in_esc = 0;
+ while (! PEND) {
+ if (ignore_escaped && in_esc) {
+ in_esc = 0;
+ }
+ else {
+ PFETCH(code);
+ if (code == c) return 1;
+ if (code == MC_ESC(env->syntax)) in_esc = 1;
+ }
+ }
+ return 0;
+}
+
+static int
+parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
+ ScanEnv* env)
+{
+ int r, neg, len, fetched, and_start;
+ OnigCodePoint v, vs;
+ UChar *p;
+ Node* node;
+ CClassNode *cc, *prev_cc;
+ CClassNode work_cc;
+
+ enum CCSTATE state;
+ enum CCVALTYPE val_type, in_type;
+ int val_israw, in_israw;
+
+ prev_cc = (CClassNode* )NULL;
+ *np = NULL_NODE;
+ r = fetch_token_in_cc(tok, src, end, env);
+ if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) {
+ neg = 1;
+ r = fetch_token_in_cc(tok, src, end, env);
+ }
+ else {
+ neg = 0;
+ }
+
+ if (r < 0) return r;
+ if (r == TK_CC_CLOSE) {
+ if (! code_exist_check((OnigCodePoint )']',
+ *src, env->pattern_end, 1, env))
+ return ONIGERR_EMPTY_CHAR_CLASS;
+
+ CC_ESC_WARN(env, (UChar* )"]");
+ r = tok->type = TK_CHAR; /* allow []...] */
+ }
+
+ *np = node = node_new_cclass();
+ CHECK_NULL_RETURN_MEMERR(node);
+ cc = NCCLASS(node);
+
+ and_start = 0;
+ state = CCS_START;
+ p = *src;
+ while (r != TK_CC_CLOSE) {
+ fetched = 0;
+ switch (r) {
+ case TK_CHAR:
+ if ((tok->u.code >= SINGLE_BYTE_SIZE) ||
+ (len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c)) > 1) {
+ in_type = CCV_CODE_POINT;
+ }
+ else if (len < 0) {
+ r = len;
+ goto err;
+ }
+ else {
+ sb_char:
+ in_type = CCV_SB;
+ }
+ v = (OnigCodePoint )tok->u.c;
+ in_israw = 0;
+ goto val_entry2;
+ break;
+
+ case TK_RAW_BYTE:
+ /* tok->base != 0 : octal or hexadec. */
+ if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) {
+ UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
+ UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN;
+ UChar* psave = p;
+ int i, base = tok->base;
+
+ buf[0] = tok->u.c;
+ for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {
+ r = fetch_token_in_cc(tok, &p, end, env);
+ if (r < 0) goto err;
+ if (r != TK_RAW_BYTE || tok->base != base) {
+ fetched = 1;
+ break;
+ }
+ buf[i] = tok->u.c;
+ }
+
+ if (i < ONIGENC_MBC_MINLEN(env->enc)) {
+ r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
+ goto err;
+ }
+
+ len = enclen(env->enc, buf, buf+i);
+ if (i < len) {
+ r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
+ goto err;
+ }
+ else if (i > len) { /* fetch back */
+ p = psave;
+ for (i = 1; i < len; i++) {
+ r = fetch_token_in_cc(tok, &p, end, env);
+ }
+ fetched = 0;
+ }
+
+ if (i == 1) {
+ v = (OnigCodePoint )buf[0];
+ goto raw_single;
+ }
+ else {
+ v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe);
+ in_type = CCV_CODE_POINT;
+ }
+ }
+ else {
+ v = (OnigCodePoint )tok->u.c;
+ raw_single:
+ in_type = CCV_SB;
+ }
+ in_israw = 1;
+ goto val_entry2;
+ break;
+
+ case TK_CODE_POINT:
+ v = tok->u.code;
+ in_israw = 1;
+ val_entry:
+ len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);
+ if (len < 0) {
+ r = len;
+ goto err;
+ }
+ in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT);
+ val_entry2:
+ r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type,
+ &state, env);
+ if (r != 0) goto err;
+ break;
+
+ case TK_POSIX_BRACKET_OPEN:
+ r = parse_posix_bracket(cc, &p, end, env);
+ if (r < 0) goto err;
+ if (r == 1) { /* is not POSIX bracket */
+ CC_ESC_WARN(env, (UChar* )"[");
+ p = tok->backp;
+ v = (OnigCodePoint )tok->u.c;
+ in_israw = 0;
+ goto val_entry;
+ }
+ goto next_class;
+ break;
+
+ case TK_CHAR_TYPE:
+ r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, env);
+ if (r != 0) return r;
+
+ next_class:
+ r = next_state_class(cc, &vs, &val_type, &state, env);
+ if (r != 0) goto err;
+ break;
+
+ case TK_CHAR_PROPERTY:
+ {
+ int ctype;
+
+ ctype = fetch_char_property_to_ctype(&p, end, env);
+ if (ctype < 0) return ctype;
+ r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env);
+ if (r != 0) return r;
+ goto next_class;
+ }
+ break;
+
+ case TK_CC_RANGE:
+ if (state == CCS_VALUE) {
+ r = fetch_token_in_cc(tok, &p, end, env);
+ if (r < 0) goto err;
+ fetched = 1;
+ if (r == TK_CC_CLOSE) { /* allow [x-] */
+ range_end_val:
+ v = (OnigCodePoint )'-';
+ in_israw = 0;
+ goto val_entry;
+ }
+ else if (r == TK_CC_AND) {
+ CC_ESC_WARN(env, (UChar* )"-");
+ goto range_end_val;
+ }
+ state = CCS_RANGE;
+ }
+ else if (state == CCS_START) {
+ /* [-xa] is allowed */
+ v = (OnigCodePoint )tok->u.c;
+ in_israw = 0;
+
+ r = fetch_token_in_cc(tok, &p, end, env);
+ if (r < 0) goto err;
+ fetched = 1;
+ /* [--x] or [a&&-x] is warned. */
+ if (r == TK_CC_RANGE || and_start != 0)
+ CC_ESC_WARN(env, (UChar* )"-");
+
+ goto val_entry;
+ }
+ else if (state == CCS_RANGE) {
+ CC_ESC_WARN(env, (UChar* )"-");
+ goto sb_char; /* [!--x] is allowed */
+ }
+ else { /* CCS_COMPLETE */
+ r = fetch_token_in_cc(tok, &p, end, env);
+ if (r < 0) goto err;
+ fetched = 1;
+ if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */
+ else if (r == TK_CC_AND) {
+ CC_ESC_WARN(env, (UChar* )"-");
+ goto range_end_val;
+ }
+
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {
+ CC_ESC_WARN(env, (UChar* )"-");
+ goto sb_char; /* [0-9-a] is allowed as [0-9\-a] */
+ }
+ r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
+ goto err;
+ }
+ break;
+
+ case TK_CC_CC_OPEN: /* [ */
+ {
+ Node *anode;
+ CClassNode* acc;
+
+ r = parse_char_class(&anode, tok, &p, end, env);
+ if (r == 0) {
+ acc = NCCLASS(anode);
+ r = or_cclass(cc, acc, env);
+ }
+ onig_node_free(anode);
+ if (r != 0) goto err;
+ }
+ break;
+
+ case TK_CC_AND: /* && */
+ {
+ if (state == CCS_VALUE) {
+ r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
+ &val_type, &state, env);
+ if (r != 0) goto err;
+ }
+ /* initialize local variables */
+ and_start = 1;
+ state = CCS_START;
+
+ if (IS_NOT_NULL(prev_cc)) {
+ r = and_cclass(prev_cc, cc, env);
+ if (r != 0) goto err;
+ bbuf_free(cc->mbuf);
+ }
+ else {
+ prev_cc = cc;
+ cc = &work_cc;
+ }
+ initialize_cclass(cc);
+ }
+ break;
+
+ case TK_EOT:
+ r = ONIGERR_PREMATURE_END_OF_CHAR_CLASS;
+ goto err;
+ break;
+ default:
+ r = ONIGERR_PARSER_BUG;
+ goto err;
+ break;
+ }
+
+ if (fetched)
+ r = tok->type;
+ else {
+ r = fetch_token_in_cc(tok, &p, end, env);
+ if (r < 0) goto err;
+ }
+ }
+
+ if (state == CCS_VALUE) {
+ r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
+ &val_type, &state, env);
+ if (r != 0) goto err;
+ }
+
+ if (IS_NOT_NULL(prev_cc)) {
+ r = and_cclass(prev_cc, cc, env);
+ if (r != 0) goto err;
+ bbuf_free(cc->mbuf);
+ cc = prev_cc;
+ }
+
+ if (neg != 0)
+ NCCLASS_SET_NOT(cc);
+ else
+ NCCLASS_CLEAR_NOT(cc);
+ if (IS_NCCLASS_NOT(cc) &&
+ IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {
+ int is_empty;
+
+ is_empty = (IS_NULL(cc->mbuf) ? 1 : 0);
+ if (is_empty != 0)
+ BITSET_IS_EMPTY(cc->bs, is_empty);
+
+ if (is_empty == 0) {
+#define NEWLINE_CODE 0x0a
+
+ if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) {
+ if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)
+ BITSET_SET_BIT_CHKDUP(cc->bs, NEWLINE_CODE);
+ else
+ add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE);
+ }
+ }
+ }
+ *src = p;
+ return 0;
+
+ err:
+ if (cc != NCCLASS(*np))
+ bbuf_free(cc->mbuf);
+ return r;
+}
+
+static int parse_subexp(Node** top, OnigToken* tok, int term,
+ UChar** src, UChar* end, ScanEnv* env);
+
+static int
+parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
+ ScanEnv* env)
+{
+ int r, num;
+ Node *target;
+ OnigOptionType option;
+ OnigCodePoint c;
+ OnigEncoding enc = env->enc;
+
+#ifdef USE_NAMED_GROUP
+ int list_capture;
+#endif
+
+ UChar* p = *src;
+ PFETCH_READY;
+
+ *np = NULL;
+ if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
+
+ option = env->option;
+ if (PPEEK_IS('?') &&
+ IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
+ PINC;
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
+
+ PFETCH(c);
+ switch (c) {
+ case ':': /* (?:...) grouping only */
+ group:
+ r = fetch_token(tok, &p, end, env);
+ if (r < 0) return r;
+ r = parse_subexp(np, tok, term, &p, end, env);
+ if (r < 0) return r;
+ *src = p;
+ return 1; /* group */
+ break;
+
+ case '=':
+ *np = onig_node_new_anchor(ANCHOR_PREC_READ);
+ break;
+ case '!': /* preceding read */
+ *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT);
+ break;
+ case '>': /* (?>...) stop backtrack */
+ *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
+ break;
+
+#ifdef USE_NAMED_GROUP
+ case '\'':
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
+ goto named_group1;
+ }
+ else
+ return ONIGERR_UNDEFINED_GROUP_OPTION;
+ break;
+#endif
+
+ case '<': /* look behind (?<=...), (?<!...) */
+ PFETCH(c);
+ if (c == '=')
+ *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND);
+ else if (c == '!')
+ *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT);
+#ifdef USE_NAMED_GROUP
+ else {
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
+ UChar *name;
+ UChar *name_end;
+
+ PUNFETCH;
+ c = '<';
+
+ named_group1:
+ list_capture = 0;
+
+ named_group2:
+ name = p;
+ r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num, 0);
+ if (r < 0) return r;
+
+ num = scan_env_add_mem_entry(env);
+ if (num < 0) return num;
+ if (list_capture != 0 && num >= (int )BIT_STATUS_BITS_NUM)
+ return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
+
+ r = name_add(env->reg, name, name_end, num, env);
+ if (r != 0) return r;
+ *np = node_new_enclose_memory(env->option, 1);
+ CHECK_NULL_RETURN_MEMERR(*np);
+ NENCLOSE(*np)->regnum = num;
+ if (list_capture != 0)
+ BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
+ env->num_named++;
+ }
+ else {
+ return ONIGERR_UNDEFINED_GROUP_OPTION;
+ }
+ }
+#else
+ else {
+ return ONIGERR_UNDEFINED_GROUP_OPTION;
+ }
+#endif
+ break;
+
+ case '@':
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) {
+#ifdef USE_NAMED_GROUP
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
+ PFETCH(c);
+ if (c == '<' || c == '\'') {
+ list_capture = 1;
+ goto named_group2; /* (?@<name>...) */
+ }
+ PUNFETCH;
+ }
+#endif
+ *np = node_new_enclose_memory(env->option, 0);
+ CHECK_NULL_RETURN_MEMERR(*np);
+ num = scan_env_add_mem_entry(env);
+ if (num < 0) {
+ onig_node_free(*np);
+ return num;
+ }
+ else if (num >= (int )BIT_STATUS_BITS_NUM) {
+ onig_node_free(*np);
+ return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
+ }
+ NENCLOSE(*np)->regnum = num;
+ BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
+ }
+ else {
+ return ONIGERR_UNDEFINED_GROUP_OPTION;
+ }
+ break;
+
+ case '-': case 'i': case 'm': case 's': case 'x':
+ {
+ int neg = 0;
+
+ while (1) {
+ switch (c) {
+ case ':':
+ case ')':
+ break;
+
+ case '-': neg = 1; break;
+ case 'x': ONOFF(option, ONIG_OPTION_EXTEND, neg); break;
+ case 'i': ONOFF(option, ONIG_OPTION_IGNORECASE, neg); break;
+ case 's':
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
+ ONOFF(option, ONIG_OPTION_MULTILINE, neg);
+ }
+ else
+ return ONIGERR_UNDEFINED_GROUP_OPTION;
+ break;
+
+ case 'm':
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
+ ONOFF(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0));
+ }
+ else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {
+ ONOFF(option, ONIG_OPTION_MULTILINE, neg);
+ }
+ else
+ return ONIGERR_UNDEFINED_GROUP_OPTION;
+ break;
+ default:
+ return ONIGERR_UNDEFINED_GROUP_OPTION;
+ }
+
+ if (c == ')') {
+ *np = node_new_option(option);
+ CHECK_NULL_RETURN_MEMERR(*np);
+ *src = p;
+ return 2; /* option only */
+ }
+ else if (c == ':') {
+ OnigOptionType prev = env->option;
+
+ env->option = option;
+ r = fetch_token(tok, &p, end, env);
+ if (r < 0) return r;
+ r = parse_subexp(&target, tok, term, &p, end, env);
+ env->option = prev;
+ if (r < 0) return r;
+ *np = node_new_option(option);
+ CHECK_NULL_RETURN_MEMERR(*np);
+ NENCLOSE(*np)->target = target;
+ *src = p;
+ return 0;
+ }
+
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
+ PFETCH(c);
+ }
+ }
+ break;
+
+ default:
+ return ONIGERR_UNDEFINED_GROUP_OPTION;
+ }
+ }
+ else {
+ if (ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_DONT_CAPTURE_GROUP))
+ goto group;
+
+ *np = node_new_enclose_memory(env->option, 0);
+ CHECK_NULL_RETURN_MEMERR(*np);
+ num = scan_env_add_mem_entry(env);
+ if (num < 0) return num;
+ NENCLOSE(*np)->regnum = num;
+ }
+
+ CHECK_NULL_RETURN_MEMERR(*np);
+ r = fetch_token(tok, &p, end, env);
+ if (r < 0) return r;
+ r = parse_subexp(&target, tok, term, &p, end, env);
+ if (r < 0) {
+ onig_node_free(target);
+ return r;
+ }
+
+ if (NTYPE(*np) == NT_ANCHOR)
+ NANCHOR(*np)->target = target;
+ else {
+ NENCLOSE(*np)->target = target;
+ if (NENCLOSE(*np)->type == ENCLOSE_MEMORY) {
+ /* Don't move this to previous of parse_subexp() */
+ r = scan_env_set_mem_node(env, NENCLOSE(*np)->regnum, *np);
+ if (r != 0) return r;
+ }
+ }
+
+ *src = p;
+ return 0;
+}
+
+static const char* const PopularQStr[] = {
+ "?", "*", "+", "??", "*?", "+?"
+};
+
+static const char* const ReduceQStr[] = {
+ "", "", "*", "*?", "??", "+ and ??", "+? and ?"
+};
+
+static int
+set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
+{
+ QtfrNode* qn;
+
+ qn = NQTFR(qnode);
+ if (qn->lower == 1 && qn->upper == 1) {
+ return 1;
+ }
+
+ switch (NTYPE(target)) {
+ case NT_STR:
+ if (! group) {
+ StrNode* sn = NSTR(target);
+ if (str_node_can_be_split(sn, env->enc)) {
+ Node* n = str_node_split_last_char(sn, env->enc);
+ if (IS_NOT_NULL(n)) {
+ qn->target = n;
+ return 2;
+ }
+ }
+ }
+ break;
+
+ case NT_QTFR:
+ { /* check redundant double repeat. */
+ /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */
+ QtfrNode* qnt = NQTFR(target);
+ int nestq_num = popular_quantifier_num(qn);
+ int targetq_num = popular_quantifier_num(qnt);
+
+#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
+ if (!IS_QUANTIFIER_BY_NUMBER(qn) && !IS_QUANTIFIER_BY_NUMBER(qnt) &&
+ IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {
+ UChar buf[WARN_BUFSIZE];
+
+ switch(ReduceTypeTable[targetq_num][nestq_num]) {
+ case RQ_ASIS:
+ break;
+
+ case RQ_DEL:
+ if (onig_verb_warn != onig_null_warn) {
+ onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
+ env->pattern, env->pattern_end,
+ (UChar* )"redundant nested repeat operator");
+ (*onig_verb_warn)((char* )buf);
+ }
+ goto warn_exit;
+ break;
+
+ default:
+ if (onig_verb_warn != onig_null_warn) {
+ onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
+ env->pattern, env->pattern_end,
+ (UChar* )"nested repeat operator %s and %s was replaced with '%s'",
+ PopularQStr[targetq_num], PopularQStr[nestq_num],
+ ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
+ (*onig_verb_warn)((char* )buf);
+ }
+ goto warn_exit;
+ break;
+ }
+ }
+
+ warn_exit:
+#endif
+ if (targetq_num >= 0) {
+ if (nestq_num >= 0) {
+ onig_reduce_nested_quantifier(qnode, target);
+ goto q_exit;
+ }
+ else if (targetq_num == 1 || targetq_num == 2) { /* * or + */
+ /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */
+ if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) {
+ qn->upper = (qn->lower == 0 ? 1 : qn->lower);
+ }
+ }
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ qn->target = target;
+ q_exit:
+ return 0;
+}
+
+
+#ifdef USE_SHARED_CCLASS_TABLE
+
+#define THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS 8
+
+/* for ctype node hash table */
+
+typedef struct {
+ OnigEncoding enc;
+ int not;
+ int type;
+} type_cclass_key;
+
+static int type_cclass_cmp(type_cclass_key* x, type_cclass_key* y)
+{
+ if (x->type != y->type) return 1;
+ if (x->enc != y->enc) return 1;
+ if (x->not != y->not) return 1;
+ return 0;
+}
+
+static st_index_t type_cclass_hash(type_cclass_key* key)
+{
+ int i, val;
+ UChar *p;
+
+ val = 0;
+
+ p = (UChar* )&(key->enc);
+ for (i = 0; i < (int )sizeof(key->enc); i++) {
+ val = val * 997 + (int )*p++;
+ }
+
+ p = (UChar* )(&key->type);
+ for (i = 0; i < (int )sizeof(key->type); i++) {
+ val = val * 997 + (int )*p++;
+ }
+
+ val += key->not;
+ return val + (val >> 5);
+}
+
+static const struct st_hash_type type_type_cclass_hash = {
+ type_cclass_cmp,
+ type_cclass_hash,
+};
+
+static st_table* OnigTypeCClassTable;
+
+
+static int
+i_free_shared_class(type_cclass_key* key, Node* node, void* arg ARG_UNUSED)
+{
+ if (IS_NOT_NULL(node)) {
+ CClassNode* cc = NCCLASS(node);
+ if (IS_NOT_NULL(cc->mbuf)) xfree(cc->mbuf);
+ xfree(node);
+ }
+
+ if (IS_NOT_NULL(key)) xfree(key);
+ return ST_DELETE;
+}
+
+extern int
+onig_free_shared_cclass_table(void)
+{
+ THREAD_ATOMIC_START;
+ if (IS_NOT_NULL(OnigTypeCClassTable)) {
+ onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0);
+ onig_st_free_table(OnigTypeCClassTable);
+ OnigTypeCClassTable = NULL;
+ }
+ THREAD_ATOMIC_END;
+
+ return 0;
+}
+
+#endif /* USE_SHARED_CCLASS_TABLE */
+
+
+#ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
+static int
+clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)
+{
+ BBuf *tbuf;
+ int r;
+
+ if (IS_NCCLASS_NOT(cc)) {
+ bitset_invert(cc->bs);
+
+ if (! ONIGENC_IS_SINGLEBYTE(enc)) {
+ r = not_code_range_buf(enc, cc->mbuf, &tbuf);
+ if (r != 0) return r;
+
+ bbuf_free(cc->mbuf);
+ cc->mbuf = tbuf;
+ }
+
+ NCCLASS_CLEAR_NOT(cc);
+ }
+
+ return 0;
+}
+#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
+
+typedef struct {
+ ScanEnv* env;
+ CClassNode* cc;
+ Node* alt_root;
+ Node** ptail;
+} IApplyCaseFoldArg;
+
+static int
+i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[],
+ int to_len, void* arg)
+{
+ IApplyCaseFoldArg* iarg;
+ ScanEnv* env;
+ CClassNode* cc;
+ BitSetRef bs;
+
+ iarg = (IApplyCaseFoldArg* )arg;
+ env = iarg->env;
+ cc = iarg->cc;
+ bs = cc->bs;
+
+ if (to_len == 1) {
+ int is_in = onig_is_code_in_cc(env->enc, from, cc);
+#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
+ if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||
+ (is_in == 0 && IS_NCCLASS_NOT(cc))) {
+ if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
+ add_code_range0(&(cc->mbuf), env, *to, *to, 0);
+ }
+ else {
+ BITSET_SET_BIT(bs, *to);
+ }
+ }
+#else
+ if (is_in != 0) {
+ if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
+ if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);
+ add_code_range0(&(cc->mbuf), env, *to, *to, 0);
+ }
+ else {
+ if (IS_NCCLASS_NOT(cc)) {
+ BITSET_CLEAR_BIT(bs, *to);
+ }
+ else
+ BITSET_SET_BIT(bs, *to);
+ }
+ }
+#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
+ }
+ else {
+ int r, i, len;
+ UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
+ Node *snode = NULL_NODE;
+
+ if (onig_is_code_in_cc(env->enc, from, cc)
+#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
+ && !IS_NCCLASS_NOT(cc)
+#endif
+ ) {
+ for (i = 0; i < to_len; i++) {
+ len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf);
+ if (i == 0) {
+ snode = onig_node_new_str(buf, buf + len);
+ CHECK_NULL_RETURN_MEMERR(snode);
+
+ /* char-class expanded multi-char only
+ compare with string folded at match time. */
+ NSTRING_SET_AMBIG(snode);
+ }
+ else {
+ r = onig_node_str_cat(snode, buf, buf + len);
+ if (r < 0) {
+ onig_node_free(snode);
+ return r;
+ }
+ }
+ }
+
+ *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE);
+ CHECK_NULL_RETURN_MEMERR(*(iarg->ptail));
+ iarg->ptail = &(NCDR((*(iarg->ptail))));
+ }
+ }
+
+ return 0;
+}
+
+static int
+parse_exp(Node** np, OnigToken* tok, int term,
+ UChar** src, UChar* end, ScanEnv* env)
+{
+ int r, len, group = 0;
+ Node* qn;
+ Node** targetp;
+
+ *np = NULL;
+ if (tok->type == (enum TokenSyms )term)
+ goto end_of_token;
+
+ switch (tok->type) {
+ case TK_ALT:
+ case TK_EOT:
+ end_of_token:
+ *np = node_new_empty();
+ return tok->type;
+
+ case TK_SUBEXP_OPEN:
+ r = parse_enclose(np, tok, TK_SUBEXP_CLOSE, src, end, env);
+ if (r < 0) return r;
+ if (r == 1) group = 1;
+ else if (r == 2) { /* option only */
+ Node* target;
+ OnigOptionType prev = env->option;
+
+ env->option = NENCLOSE(*np)->option;
+ r = fetch_token(tok, src, end, env);
+ if (r < 0) return r;
+ r = parse_subexp(&target, tok, term, src, end, env);
+ env->option = prev;
+ if (r < 0) {
+ onig_node_free(target);
+ return r;
+ }
+ NENCLOSE(*np)->target = target;
+ return tok->type;
+ }
+ break;
+
+ case TK_SUBEXP_CLOSE:
+ if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP))
+ return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS;
+
+ if (tok->escaped) goto tk_raw_byte;
+ else goto tk_byte;
+ break;
+
+ case TK_STRING:
+ tk_byte:
+ {
+ *np = node_new_str(tok->backp, *src);
+ CHECK_NULL_RETURN_MEMERR(*np);
+
+ while (1) {
+ r = fetch_token(tok, src, end, env);
+ if (r < 0) return r;
+ if (r != TK_STRING) break;
+
+ r = onig_node_str_cat(*np, tok->backp, *src);
+ if (r < 0) return r;
+ }
+
+ string_end:
+ targetp = np;
+ goto repeat;
+ }
+ break;
+
+ case TK_RAW_BYTE:
+ tk_raw_byte:
+ {
+ *np = node_new_str_raw_char((UChar )tok->u.c);
+ CHECK_NULL_RETURN_MEMERR(*np);
+ len = 1;
+ while (1) {
+ if (len >= ONIGENC_MBC_MINLEN(env->enc)) {
+ if (len == enclen(env->enc, NSTR(*np)->s, NSTR(*np)->end)) {
+ r = fetch_token(tok, src, end, env);
+ NSTRING_CLEAR_RAW(*np);
+ goto string_end;
+ }
+ }
+
+ r = fetch_token(tok, src, end, env);
+ if (r < 0) return r;
+ if (r != TK_RAW_BYTE) {
+ /* Don't use this, it is wrong for little endian encodings. */
+ return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
+ }
+
+ r = node_str_cat_char(*np, (UChar )tok->u.c);
+ if (r < 0) return r;
+
+ len++;
+ }
+ }
+ break;
+
+ case TK_CODE_POINT:
+ {
+ UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
+ int num = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf);
+ if (num < 0) return num;
+#ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
+ *np = node_new_str_raw(buf, buf + num);
+#else
+ *np = node_new_str(buf, buf + num);
+#endif
+ CHECK_NULL_RETURN_MEMERR(*np);
+ }
+ break;
+
+ case TK_QUOTE_OPEN:
+ {
+ OnigCodePoint end_op[2];
+ UChar *qstart, *qend, *nextp;
+
+ end_op[0] = (OnigCodePoint )MC_ESC(env->syntax);
+ end_op[1] = (OnigCodePoint )'E';
+ qstart = *src;
+ qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc);
+ if (IS_NULL(qend)) {
+ nextp = qend = end;
+ }
+ *np = node_new_str(qstart, qend);
+ CHECK_NULL_RETURN_MEMERR(*np);
+ *src = nextp;
+ }
+ break;
+
+ case TK_CHAR_TYPE:
+ {
+ switch (tok->u.prop.ctype) {
+ case ONIGENC_CTYPE_D:
+ case ONIGENC_CTYPE_S:
+ case ONIGENC_CTYPE_W:
+ {
+ CClassNode* cc;
+ *np = node_new_cclass();
+ CHECK_NULL_RETURN_MEMERR(*np);
+ cc = NCCLASS(*np);
+ add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env);
+ if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
+ }
+ break;
+
+ case ONIGENC_CTYPE_WORD:
+ *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not);
+ CHECK_NULL_RETURN_MEMERR(*np);
+ break;
+
+ case ONIGENC_CTYPE_SPACE:
+ case ONIGENC_CTYPE_DIGIT:
+ case ONIGENC_CTYPE_XDIGIT:
+ {
+ CClassNode* cc;
+
+#ifdef USE_SHARED_CCLASS_TABLE
+ const OnigCodePoint *mbr;
+ OnigCodePoint sb_out;
+
+ r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, tok->u.prop.ctype,
+ &sb_out, &mbr);
+ if (r == 0 &&
+ ONIGENC_CODE_RANGE_NUM(mbr)
+ >= THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS) {
+ type_cclass_key key;
+ type_cclass_key* new_key;
+
+ key.enc = env->enc;
+ key.not = tok->u.prop.not;
+ key.type = tok->u.prop.ctype;
+
+ THREAD_ATOMIC_START;
+
+ if (IS_NULL(OnigTypeCClassTable)) {
+ OnigTypeCClassTable
+ = onig_st_init_table_with_size(&type_type_cclass_hash, 10);
+ if (IS_NULL(OnigTypeCClassTable)) {
+ THREAD_ATOMIC_END;
+ return ONIGERR_MEMORY;
+ }
+ }
+ else {
+ if (onig_st_lookup(OnigTypeCClassTable, (st_data_t )&key,
+ (st_data_t* )np)) {
+ THREAD_ATOMIC_END;
+ break;
+ }
+ }
+
+ *np = node_new_cclass_by_codepoint_range(tok->u.prop.not,
+ sb_out, mbr);
+ if (IS_NULL(*np)) {
+ THREAD_ATOMIC_END;
+ return ONIGERR_MEMORY;
+ }
+
+ cc = NCCLASS(*np);
+ NCCLASS_SET_SHARE(cc);
+ new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key));
+ xmemcpy(new_key, &key, sizeof(type_cclass_key));
+ onig_st_add_direct(OnigTypeCClassTable, (st_data_t )new_key,
+ (st_data_t )*np);
+
+ THREAD_ATOMIC_END;
+ }
+ else {
+#endif
+ *np = node_new_cclass();
+ CHECK_NULL_RETURN_MEMERR(*np);
+ cc = NCCLASS(*np);
+ add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env);
+ if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
+#ifdef USE_SHARED_CCLASS_TABLE
+ }
+#endif
+ }
+ break;
+
+ default:
+ return ONIGERR_PARSER_BUG;
+ break;
+ }
+ }
+ break;
+
+ case TK_CHAR_PROPERTY:
+ r = parse_char_property(np, tok, src, end, env);
+ if (r != 0) return r;
+ break;
+
+ case TK_CC_OPEN:
+ {
+ CClassNode* cc;
+
+ r = parse_char_class(np, tok, src, end, env);
+ if (r != 0) return r;
+
+ cc = NCCLASS(*np);
+ if (IS_IGNORECASE(env->option)) {
+ IApplyCaseFoldArg iarg;
+
+ iarg.env = env;
+ iarg.cc = cc;
+ iarg.alt_root = NULL_NODE;
+ iarg.ptail = &(iarg.alt_root);
+
+ r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag,
+ i_apply_case_fold, &iarg);
+ if (r != 0) {
+ onig_node_free(iarg.alt_root);
+ return r;
+ }
+ if (IS_NOT_NULL(iarg.alt_root)) {
+ Node* work = onig_node_new_alt(*np, iarg.alt_root);
+ if (IS_NULL(work)) {
+ onig_node_free(iarg.alt_root);
+ return ONIGERR_MEMORY;
+ }
+ *np = work;
+ }
+ }
+ }
+ break;
+
+ case TK_ANYCHAR:
+ *np = node_new_anychar();
+ CHECK_NULL_RETURN_MEMERR(*np);
+ break;
+
+ case TK_ANYCHAR_ANYTIME:
+ *np = node_new_anychar();
+ CHECK_NULL_RETURN_MEMERR(*np);
+ qn = node_new_quantifier(0, REPEAT_INFINITE, 0);
+ CHECK_NULL_RETURN_MEMERR(qn);
+ NQTFR(qn)->target = *np;
+ *np = qn;
+ break;
+
+ case TK_BACKREF:
+ len = tok->u.backref.num;
+ *np = node_new_backref(len,
+ (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),
+ tok->u.backref.by_name,
+#ifdef USE_BACKREF_WITH_LEVEL
+ tok->u.backref.exist_level,
+ tok->u.backref.level,
+#endif
+ env);
+ CHECK_NULL_RETURN_MEMERR(*np);
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case TK_CALL:
+ {
+ int gnum = tok->u.call.gnum;
+
+ if (gnum < 0) {
+ gnum = BACKREF_REL_TO_ABS(gnum, env);
+ if (gnum <= 0)
+ return ONIGERR_INVALID_BACKREF;
+ }
+ *np = node_new_call(tok->u.call.name, tok->u.call.name_end, gnum);
+ CHECK_NULL_RETURN_MEMERR(*np);
+ env->num_call++;
+ }
+ break;
+#endif
+
+ case TK_ANCHOR:
+ *np = onig_node_new_anchor(tok->u.anchor);
+ break;
+
+ case TK_OP_REPEAT:
+ case TK_INTERVAL:
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) {
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS))
+ return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED;
+ else
+ *np = node_new_empty();
+ }
+ else {
+ goto tk_byte;
+ }
+ break;
+
+ default:
+ return ONIGERR_PARSER_BUG;
+ break;
+ }
+
+ {
+ targetp = np;
+
+ re_entry:
+ r = fetch_token(tok, src, end, env);
+ if (r < 0) return r;
+
+ repeat:
+ if (r == TK_OP_REPEAT || r == TK_INTERVAL) {
+ if (is_invalid_quantifier_target(*targetp))
+ return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;
+
+ qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,
+ (r == TK_INTERVAL ? 1 : 0));
+ CHECK_NULL_RETURN_MEMERR(qn);
+ NQTFR(qn)->greedy = tok->u.repeat.greedy;
+ r = set_quantifier(qn, *targetp, group, env);
+ if (r < 0) {
+ onig_node_free(qn);
+ return r;
+ }
+
+ if (tok->u.repeat.possessive != 0) {
+ Node* en;
+ en = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
+ if (IS_NULL(en)) {
+ onig_node_free(qn);
+ return ONIGERR_MEMORY;
+ }
+ NENCLOSE(en)->target = qn;
+ qn = en;
+ }
+
+ if (r == 0) {
+ *targetp = qn;
+ }
+ else if (r == 1) {
+ onig_node_free(qn);
+ }
+ else if (r == 2) { /* split case: /abc+/ */
+ Node *tmp;
+
+ *targetp = node_new_list(*targetp, NULL);
+ if (IS_NULL(*targetp)) {
+ onig_node_free(qn);
+ return ONIGERR_MEMORY;
+ }
+ tmp = NCDR(*targetp) = node_new_list(qn, NULL);
+ if (IS_NULL(tmp)) {
+ onig_node_free(qn);
+ return ONIGERR_MEMORY;
+ }
+ targetp = &(NCAR(tmp));
+ }
+ goto re_entry;
+ }
+ }
+
+ return r;
+}
+
+static int
+parse_branch(Node** top, OnigToken* tok, int term,
+ UChar** src, UChar* end, ScanEnv* env)
+{
+ int r;
+ Node *node, **headp;
+
+ *top = NULL;
+ r = parse_exp(&node, tok, term, src, end, env);
+ if (r < 0) {
+ onig_node_free(node);
+ return r;
+ }
+
+ if (r == TK_EOT || r == term || r == TK_ALT) {
+ *top = node;
+ }
+ else {
+ *top = node_new_list(node, NULL);
+ headp = &(NCDR(*top));
+ while (r != TK_EOT && r != term && r != TK_ALT) {
+ r = parse_exp(&node, tok, term, src, end, env);
+ if (r < 0) {
+ onig_node_free(node);
+ return r;
+ }
+
+ if (NTYPE(node) == NT_LIST) {
+ *headp = node;
+ while (IS_NOT_NULL(NCDR(node))) node = NCDR(node);
+ headp = &(NCDR(node));
+ }
+ else {
+ *headp = node_new_list(node, NULL);
+ headp = &(NCDR(*headp));
+ }
+ }
+ }
+
+ return r;
+}
+
+/* term_tok: TK_EOT or TK_SUBEXP_CLOSE */
+static int
+parse_subexp(Node** top, OnigToken* tok, int term,
+ UChar** src, UChar* end, ScanEnv* env)
+{
+ int r;
+ Node *node, **headp;
+
+ *top = NULL;
+ r = parse_branch(&node, tok, term, src, end, env);
+ if (r < 0) {
+ onig_node_free(node);
+ return r;
+ }
+
+ if (r == term) {
+ *top = node;
+ }
+ else if (r == TK_ALT) {
+ *top = onig_node_new_alt(node, NULL);
+ headp = &(NCDR(*top));
+ while (r == TK_ALT) {
+ r = fetch_token(tok, src, end, env);
+ if (r < 0) return r;
+ r = parse_branch(&node, tok, term, src, end, env);
+ if (r < 0) {
+ onig_node_free(node);
+ return r;
+ }
+
+ *headp = onig_node_new_alt(node, NULL);
+ headp = &(NCDR(*headp));
+ }
+
+ if (tok->type != (enum TokenSyms )term)
+ goto err;
+ }
+ else {
+ onig_node_free(node);
+ err:
+ if (term == TK_SUBEXP_CLOSE)
+ return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
+ else
+ return ONIGERR_PARSER_BUG;
+ }
+
+ return r;
+}
+
+static int
+parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)
+{
+ int r;
+ OnigToken tok;
+
+ r = fetch_token(&tok, src, end, env);
+ if (r < 0) return r;
+ r = parse_subexp(top, &tok, TK_EOT, src, end, env);
+ if (r < 0) return r;
+ return 0;
+}
+
+extern int
+onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end,
+ regex_t* reg, ScanEnv* env)
+{
+ int r;
+ UChar* p;
+
+#ifdef USE_NAMED_GROUP
+ names_clear(reg);
+#endif
+
+ scan_env_clear(env);
+ env->option = reg->options;
+ env->case_fold_flag = reg->case_fold_flag;
+ env->enc = reg->enc;
+ env->syntax = reg->syntax;
+ env->pattern = (UChar* )pattern;
+ env->pattern_end = (UChar* )end;
+ env->reg = reg;
+
+ *root = NULL;
+ p = (UChar* )pattern;
+ r = parse_regexp(root, &p, (UChar* )end, env);
+ reg->num_mem = env->num_mem;
+ return r;
+}
+
+extern void
+onig_scan_env_set_error_string(ScanEnv* env, int ecode ARG_UNUSED,
+ UChar* arg, UChar* arg_end)
+{
+ env->error = arg;
+ env->error_end = arg_end;
+}
+#endif //INCLUDE_REGEXP
diff --git a/src/regparse.h b/src/regparse.h
new file mode 100644
index 000000000..ac8758bd1
--- /dev/null
+++ b/src/regparse.h
@@ -0,0 +1,354 @@
+#ifndef ONIGURUMA_REGPARSE_H
+#define ONIGURUMA_REGPARSE_H
+/**********************************************************************
+ regparse.h - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regint.h"
+
+/* node type */
+#define NT_STR 0
+#define NT_CCLASS 1
+#define NT_CTYPE 2
+#define NT_CANY 3
+#define NT_BREF 4
+#define NT_QTFR 5
+#define NT_ENCLOSE 6
+#define NT_ANCHOR 7
+#define NT_LIST 8
+#define NT_ALT 9
+#define NT_CALL 10
+
+/* node type bit */
+#define NTYPE2BIT(type) (1<<(type))
+
+#define BIT_NT_STR NTYPE2BIT(NT_STR)
+#define BIT_NT_CCLASS NTYPE2BIT(NT_CCLASS)
+#define BIT_NT_CTYPE NTYPE2BIT(NT_CTYPE)
+#define BIT_NT_CANY NTYPE2BIT(NT_CANY)
+#define BIT_NT_BREF NTYPE2BIT(NT_BREF)
+#define BIT_NT_QTFR NTYPE2BIT(NT_QTFR)
+#define BIT_NT_ENCLOSE NTYPE2BIT(NT_ENCLOSE)
+#define BIT_NT_ANCHOR NTYPE2BIT(NT_ANCHOR)
+#define BIT_NT_LIST NTYPE2BIT(NT_LIST)
+#define BIT_NT_ALT NTYPE2BIT(NT_ALT)
+#define BIT_NT_CALL NTYPE2BIT(NT_CALL)
+
+#define IS_NODE_TYPE_SIMPLE(type) \
+ ((NTYPE2BIT(type) & (BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE |\
+ BIT_NT_CANY | BIT_NT_BREF)) != 0)
+
+#define NTYPE(node) ((node)->u.base.type)
+#define SET_NTYPE(node, ntype) (node)->u.base.type = (ntype)
+
+#define NSTR(node) (&((node)->u.str))
+#define NCCLASS(node) (&((node)->u.cclass))
+#define NCTYPE(node) (&((node)->u.ctype))
+#define NBREF(node) (&((node)->u.bref))
+#define NQTFR(node) (&((node)->u.qtfr))
+#define NENCLOSE(node) (&((node)->u.enclose))
+#define NANCHOR(node) (&((node)->u.anchor))
+#define NCONS(node) (&((node)->u.cons))
+#define NCALL(node) (&((node)->u.call))
+
+#define NCAR(node) (NCONS(node)->car)
+#define NCDR(node) (NCONS(node)->cdr)
+
+
+
+#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML)
+#define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)
+
+#define ENCLOSE_MEMORY (1<<0)
+#define ENCLOSE_OPTION (1<<1)
+#define ENCLOSE_STOP_BACKTRACK (1<<2)
+
+#define NODE_STR_MARGIN 16
+#define NODE_STR_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */
+#define NODE_BACKREFS_SIZE 6
+
+#define NSTR_RAW (1<<0) /* by backslashed number */
+#define NSTR_AMBIG (1<<1)
+#define NSTR_DONT_GET_OPT_INFO (1<<2)
+
+#define NSTRING_LEN(node) ((node)->u.str.end - (node)->u.str.s)
+#define NSTRING_SET_RAW(node) (node)->u.str.flag |= NSTR_RAW
+#define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NSTR_RAW
+#define NSTRING_SET_AMBIG(node) (node)->u.str.flag |= NSTR_AMBIG
+#define NSTRING_SET_DONT_GET_OPT_INFO(node) \
+ (node)->u.str.flag |= NSTR_DONT_GET_OPT_INFO
+#define NSTRING_IS_RAW(node) (((node)->u.str.flag & NSTR_RAW) != 0)
+#define NSTRING_IS_AMBIG(node) (((node)->u.str.flag & NSTR_AMBIG) != 0)
+#define NSTRING_IS_DONT_GET_OPT_INFO(node) \
+ (((node)->u.str.flag & NSTR_DONT_GET_OPT_INFO) != 0)
+
+#define BACKREFS_P(br) \
+ (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static);
+
+#define NQ_TARGET_ISNOT_EMPTY 0
+#define NQ_TARGET_IS_EMPTY 1
+#define NQ_TARGET_IS_EMPTY_MEM 2
+#define NQ_TARGET_IS_EMPTY_REC 3
+
+/* status bits */
+#define NST_MIN_FIXED (1<<0)
+#define NST_MAX_FIXED (1<<1)
+#define NST_CLEN_FIXED (1<<2)
+#define NST_MARK1 (1<<3)
+#define NST_MARK2 (1<<4)
+#define NST_MEM_BACKREFED (1<<5)
+#define NST_STOP_BT_SIMPLE_REPEAT (1<<6)
+#define NST_RECURSION (1<<7)
+#define NST_CALLED (1<<8)
+#define NST_ADDR_FIXED (1<<9)
+#define NST_NAMED_GROUP (1<<10)
+#define NST_NAME_REF (1<<11)
+#define NST_IN_REPEAT (1<<12) /* STK_REPEAT is nested in stack. */
+#define NST_NEST_LEVEL (1<<13)
+#define NST_BY_NUMBER (1<<14) /* {n,m} */
+
+#define SET_ENCLOSE_STATUS(node,f) (node)->u.enclose.state |= (f)
+#define CLEAR_ENCLOSE_STATUS(node,f) (node)->u.enclose.state &= ~(f)
+
+#define IS_ENCLOSE_CALLED(en) (((en)->state & NST_CALLED) != 0)
+#define IS_ENCLOSE_ADDR_FIXED(en) (((en)->state & NST_ADDR_FIXED) != 0)
+#define IS_ENCLOSE_RECURSION(en) (((en)->state & NST_RECURSION) != 0)
+#define IS_ENCLOSE_MARK1(en) (((en)->state & NST_MARK1) != 0)
+#define IS_ENCLOSE_MARK2(en) (((en)->state & NST_MARK2) != 0)
+#define IS_ENCLOSE_MIN_FIXED(en) (((en)->state & NST_MIN_FIXED) != 0)
+#define IS_ENCLOSE_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0)
+#define IS_ENCLOSE_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0)
+#define IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(en) \
+ (((en)->state & NST_STOP_BT_SIMPLE_REPEAT) != 0)
+#define IS_ENCLOSE_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0)
+
+#define SET_CALL_RECURSION(node) (node)->u.call.state |= NST_RECURSION
+#define IS_CALL_RECURSION(cn) (((cn)->state & NST_RECURSION) != 0)
+#define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0)
+#define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0)
+#define IS_BACKREF_NEST_LEVEL(bn) (((bn)->state & NST_NEST_LEVEL) != 0)
+#define IS_QUANTIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0)
+#define IS_QUANTIFIER_BY_NUMBER(qn) (((qn)->state & NST_BY_NUMBER) != 0)
+
+#define CALLNODE_REFNUM_UNDEF -1
+
+typedef struct {
+ NodeBase base;
+ UChar* s;
+ UChar* end;
+ unsigned int flag;
+ int capa; /* (allocated size - 1) or 0: use buf[] */
+ UChar buf[NODE_STR_BUF_SIZE];
+} StrNode;
+
+typedef struct {
+ NodeBase base;
+ int state;
+ struct _Node* target;
+ int lower;
+ int upper;
+ int greedy;
+ int target_empty_info;
+ struct _Node* head_exact;
+ struct _Node* next_head_exact;
+ int is_refered; /* include called node. don't eliminate even if {0} */
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ int comb_exp_check_num; /* 1,2,3...: check, 0: no check */
+#endif
+} QtfrNode;
+
+typedef struct {
+ NodeBase base;
+ int state;
+ int type;
+ int regnum;
+ OnigOptionType option;
+ struct _Node* target;
+ AbsAddrType call_addr;
+ /* for multiple call reference */
+ OnigDistance min_len; /* min length (byte) */
+ OnigDistance max_len; /* max length (byte) */
+ int char_len; /* character length */
+ int opt_count; /* referenced count in optimize_node_left() */
+} EncloseNode;
+
+#ifdef USE_SUBEXP_CALL
+
+typedef struct {
+ int offset;
+ struct _Node* target;
+} UnsetAddr;
+
+typedef struct {
+ int num;
+ int alloc;
+ UnsetAddr* us;
+} UnsetAddrList;
+
+typedef struct {
+ NodeBase base;
+ int state;
+ int group_num;
+ UChar* name;
+ UChar* name_end;
+ struct _Node* target; /* EncloseNode : ENCLOSE_MEMORY */
+ UnsetAddrList* unset_addr_list;
+} CallNode;
+
+#endif
+
+typedef struct {
+ NodeBase base;
+ int state;
+ int back_num;
+ int back_static[NODE_BACKREFS_SIZE];
+ int* back_dynamic;
+ int nest_level;
+} BRefNode;
+
+typedef struct {
+ NodeBase base;
+ int type;
+ struct _Node* target;
+ int char_len;
+} AnchorNode;
+
+typedef struct {
+ NodeBase base;
+ struct _Node* car;
+ struct _Node* cdr;
+} ConsAltNode;
+
+typedef struct {
+ NodeBase base;
+ int ctype;
+ int not;
+} CtypeNode;
+
+typedef struct _Node {
+ union {
+ NodeBase base;
+ StrNode str;
+ CClassNode cclass;
+ QtfrNode qtfr;
+ EncloseNode enclose;
+ BRefNode bref;
+ AnchorNode anchor;
+ ConsAltNode cons;
+ CtypeNode ctype;
+#ifdef USE_SUBEXP_CALL
+ CallNode call;
+#endif
+ } u;
+} Node;
+
+
+#define NULL_NODE ((Node* )0)
+
+#define SCANENV_MEMNODES_SIZE 8
+#define SCANENV_MEM_NODES(senv) \
+ (IS_NOT_NULL((senv)->mem_nodes_dynamic) ? \
+ (senv)->mem_nodes_dynamic : (senv)->mem_nodes_static)
+
+typedef struct {
+ OnigOptionType option;
+ OnigCaseFoldType case_fold_flag;
+ OnigEncoding enc;
+ const OnigSyntaxType* syntax;
+ BitStatusType capture_history;
+ BitStatusType bt_mem_start;
+ BitStatusType bt_mem_end;
+ BitStatusType backrefed_mem;
+ UChar* pattern;
+ UChar* pattern_end;
+ UChar* error;
+ UChar* error_end;
+ regex_t* reg; /* for reg->names only */
+ int num_call;
+#ifdef USE_SUBEXP_CALL
+ UnsetAddrList* unset_addr_list;
+#endif
+ int num_mem;
+#ifdef USE_NAMED_GROUP
+ int num_named;
+#endif
+ int mem_alloc;
+ Node* mem_nodes_static[SCANENV_MEMNODES_SIZE];
+ Node** mem_nodes_dynamic;
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+ int num_comb_exp_check;
+ int comb_exp_max_regnum;
+ int curr_max_regnum;
+ int has_recursion;
+#endif
+ int warnings_flag;
+ const char* sourcefile;
+ int sourceline;
+} ScanEnv;
+
+
+#define IS_SYNTAX_OP(syn, opm) (((syn)->op & (opm)) != 0)
+#define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0)
+#define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0)
+
+#ifdef USE_NAMED_GROUP
+typedef struct {
+ int new_val;
+} GroupNumRemap;
+
+extern int onig_renumber_name_table(regex_t* reg, GroupNumRemap* map);
+#endif
+
+extern int onig_strncmp(const UChar* s1, const UChar* s2, int n);
+extern void onig_strcpy(UChar* dest, const UChar* src, const UChar* end);
+extern void onig_scan_env_set_error_string(ScanEnv* env, int ecode, UChar* arg, UChar* arg_end);
+extern int onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc);
+extern void onig_reduce_nested_quantifier(Node* pnode, Node* cnode);
+extern void onig_node_conv_to_str_node(Node* node, int raw);
+extern int onig_node_str_cat(Node* node, const UChar* s, const UChar* end);
+extern int onig_node_str_set(Node* node, const UChar* s, const UChar* end);
+extern void onig_node_free(Node* node);
+extern Node* onig_node_new_enclose(int type);
+extern Node* onig_node_new_anchor(int type);
+extern Node* onig_node_new_str(const UChar* s, const UChar* end);
+extern Node* onig_node_new_list(Node* left, Node* right);
+extern Node* onig_node_list_add(Node* list, Node* x);
+extern Node* onig_node_new_alt(Node* left, Node* right);
+extern void onig_node_str_clear(Node* node);
+extern int onig_free_node_list(void);
+extern int onig_names_free(regex_t* reg);
+extern int onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env);
+extern int onig_free_shared_cclass_table(void);
+
+#ifdef ONIG_DEBUG
+#ifdef USE_NAMED_GROUP
+extern int onig_print_names(FILE*, regex_t*);
+#endif
+#endif
+
+#endif /* ONIGURUMA_REGPARSE_H */
diff --git a/src/ritehash.h b/src/ritehash.h
new file mode 100644
index 000000000..48feb9656
--- /dev/null
+++ b/src/ritehash.h
@@ -0,0 +1,203 @@
+/*
+ * Rite Hash
+ *
+ *
+ */
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+typedef uint32_t khint_t;
+typedef khint_t khiter_t;
+
+#define INITIAL_HASH_SIZE 32
+#define UPPER_BOUND(x) ((x)>>2|(x>>1))
+
+//extern uint8_t __m[];
+
+/* mask for flags */
+static uint8_t __m[8] = {0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80};
+
+
+#define __ac_isempty(e_flag, d_flag, i) (e_flag[(i)/8]&__m[(i)%8])
+#define __ac_isdel(e_flag, d_flag, i) (d_flag[(i)/8]&__m[(i)%8])
+#define __ac_iseither(e_flag, d_flag, i) (__ac_isempty(e_flag,d_flag,i)||__ac_isdel(e_flag,d_flag,i))
+
+
+/* struct kh_xxx
+
+ name: ash name
+ khkey_t: key data type
+ khval_t: value data type
+ kh_is_map: (not implemented / not used in RiteVM )
+ __hash_func: hash function
+ __hash_equal: hash comparation function
+*/
+#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
+ typedef struct kh_##name { \
+ khint_t n_buckets; \
+ khint_t size; \
+ khint_t n_occupied; \
+ khint_t upper_bound; \
+ uint8_t *e_flags; \
+ uint8_t *d_flags; \
+ khkey_t *keys; \
+ khval_t *vals; \
+ khint_t mask; \
+ khint_t inc; \
+ mrb_state *mrb; \
+ } kh_##name##_t; \
+ static void kh_alloc_##name(kh_##name##_t *h) \
+ { \
+ khint_t sz = h->n_buckets; \
+ h->size = h->n_occupied = 0; \
+ h->upper_bound = UPPER_BOUND(sz); \
+ h->e_flags = (uint8_t *)mrb_malloc(h->mrb, sizeof(uint8_t)*sz/4); \
+ h->d_flags = h->e_flags + sz/8; \
+ memset(h->e_flags, 0xff, sz/8*sizeof(uint8_t)); \
+ memset(h->d_flags, 0x00, sz/8*sizeof(uint8_t)); \
+ h->keys = (khkey_t *)mrb_malloc(h->mrb, sizeof(khkey_t)*sz); \
+ h->vals = (khval_t *)mrb_malloc(h->mrb, sizeof(khval_t)*sz); \
+ h->mask = sz-1; \
+ h->inc = sz/2-1; \
+ } \
+ static inline kh_##name##_t *kh_init_##name(mrb_state *mrb){ \
+ kh_##name##_t *h = (kh_##name##_t*)mrb_calloc(mrb, 1, sizeof(kh_##name##_t)); \
+ h->n_buckets = INITIAL_HASH_SIZE; \
+ h->mrb = mrb; \
+ kh_alloc_##name(h); \
+ return h; \
+ } \
+ static inline void kh_destroy_##name(kh_##name##_t *h) \
+ { \
+ if( h ){ \
+ mrb_free(h->mrb, h->keys); \
+ mrb_free(h->mrb, h->vals); \
+ mrb_free(h->mrb, h->e_flags); \
+ mrb_free(h->mrb, h); \
+ } \
+ } \
+ static inline void kh_clear_##name(kh_##name##_t *h) \
+ { \
+ if( h && h->e_flags ){ \
+ memset(h->e_flags, 0xff, h->n_buckets/8*sizeof(uint8_t)); \
+ memset(h->d_flags, 0x00, h->n_buckets/8*sizeof(uint8_t)); \
+ h->size = h->n_occupied = 0; \
+ } \
+ } \
+ static inline khint_t kh_get_##name(kh_##name##_t *h, khkey_t key) \
+ { \
+ khint_t k = __hash_func(h->mrb,key) & (h->mask); \
+ while( !__ac_isempty(h->e_flags, h->d_flags, k) ){ \
+ if( !__ac_isdel(h->e_flags, h->d_flags, k) ){ \
+ if( __hash_equal(h->mrb,h->keys[k], key) ) return k; \
+ } \
+ k = (k+h->inc) & (h->mask); \
+ } \
+ return h->n_buckets; \
+ } \
+ static inline khint_t kh_put_##name(kh_##name##_t *h, khkey_t key); \
+ static void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
+ { \
+ if( new_n_buckets<INITIAL_HASH_SIZE ){ \
+ new_n_buckets = INITIAL_HASH_SIZE; \
+ } else { \
+ khint_t limit = new_n_buckets; \
+ new_n_buckets = INITIAL_HASH_SIZE; \
+ while( new_n_buckets < limit ) new_n_buckets *= 2; \
+ } \
+ uint8_t *old_e_flags = h->e_flags; \
+ uint8_t *old_d_flags = h->d_flags; \
+ khkey_t *old_keys = h->keys; \
+ khval_t *old_vals = h->vals; \
+ khint_t old_n_buckets = h->n_buckets; \
+ h->n_buckets = new_n_buckets; \
+ kh_alloc_##name(h); \
+ /* relocate */ \
+ khint_t i; \
+ for( i=0 ; i<old_n_buckets ; i++ ){ \
+ if( !__ac_isempty(old_e_flags, old_d_flags, i) ){ \
+ khint_t k = kh_put_##name(h, old_keys[i]); \
+ kh_value(h,k) = old_vals[i]; \
+ } \
+ } \
+ } \
+ static inline khint_t kh_put_##name(kh_##name##_t *h, khkey_t key) \
+ { \
+ khint_t k; \
+ if( h->n_occupied >= h->upper_bound ){ \
+ kh_resize_##name(h, h->n_buckets*2); \
+ } \
+ k = __hash_func(h->mrb,key) & (h->mask); \
+ while( !__ac_iseither(h->e_flags, h->d_flags, k) ){ \
+ if( __hash_equal(h->mrb,h->keys[k], key) ) break; \
+ k = (k+h->inc) & (h->mask); \
+ } \
+ if( __ac_isempty(h->e_flags, h->d_flags, k) ) { \
+ /* put at empty */ \
+ h->keys[k] = key; \
+ h->e_flags[k/8] &= ~__m[k%8]; \
+ h->size++; \
+ h->n_occupied++; \
+ } else if( __ac_isdel(h->e_flags, h->d_flags, k) ) { \
+ /* put at del */ \
+ h->keys[k] = key; \
+ h->d_flags[k/8] &= ~__m[k%8]; \
+ h->size++; \
+ } \
+ return k; \
+ } \
+ static inline void kh_del_##name(kh_##name##_t *h, khint_t x) \
+ { \
+ h->d_flags[x/8] |= __m[x%8]; \
+ h->size--; \
+ } \
+ static inline void kh_debug_##name(kh_##name##_t *h) \
+ { \
+ khint_t i; \
+ printf("idx:e_flag:d_flag\n"); \
+ for( i=0 ; i<h->n_buckets/8 ; i++ ){ \
+ printf("%4d:%02X:%02X\n", i, h->e_flags[i], h->d_flags[i]); \
+ } \
+ } \
+
+#define khash_t(name) kh_##name##_t
+
+#define kh_init(name,mrb) kh_init_##name(mrb)
+#define kh_destroy(name, h) kh_destroy_##name(h)
+#define kh_clear(name, h) kh_clear_##name(h)
+#define kh_resize(name, h, s) kh_resize_##name(h, s)
+#define kh_put(name, h, k, r) kh_put_##name(h, k)
+#define kh_get(name, h, k) kh_get_##name(h, k)
+#define kh_del(name, h, k) kh_del_##name(h, k)
+#define kh_debug(name, h) kh_debug_##name(h)
+
+#define kh_exist(h, x) (!__ac_iseither((h)->e_flags, (h)->d_flags, (x)))
+#define kh_key(h, x) ((h)->keys[x])
+#define kh_val(h, x) ((h)->vals[x])
+#define kh_value(h, x) ((h)->vals[x])
+#define kh_begin(h) (khint_t)(0)
+#define kh_end(h) ((h)->n_buckets)
+#define kh_size(h) ((h)->size)
+#define kh_n_buckets(h) ((h)->n_buckets)
+
+//#define kh_int_hash_func(mrb,key) (uint32_t)(key)
+#define kh_int_hash_func(mrb,key) (uint32_t)((key)^((key)<<2)^((key)>>2))
+#define kh_int_hash_equal(mrb,a, b) (a == b)
+#define kh_int64_hash_func(mrb,key) (uint32_t)((key)>>33^(key)^(key)<<11)
+#define kh_int64_hash_equal(mrb,a, b) (a == b)
+static inline khint_t __ac_X31_hash_string(const char *s)
+{
+ khint_t h = *s;
+ if (h) for (++s ; *s; ++s) h = (h << 5) - h + *s;
+ return h;
+}
+#define kh_str_hash_func(mrb,key) __ac_X31_hash_string(key)
+#define kh_str_hash_equal(mrb,a, b) (strcmp(a, b) == 0)
+
+#define KHASH_MAP_INIT_INT(name, khval_t) \
+ KHASH_INIT(name, uint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
+typedef const char *kh_cstr_t;
+#define KHASH_MAP_INIT_STR(name, khval_t) \
+ KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)
+
diff --git a/src/sprintf.c b/src/sprintf.c
new file mode 100644
index 000000000..06e6b33d7
--- /dev/null
+++ b/src/sprintf.c
@@ -0,0 +1,1112 @@
+/**********************************************************************
+
+ sprintf.c -
+
+ $Author: yugui $
+ created at: Fri Oct 15 10:39:26 JST 1993
+
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
+ Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
+ Copyright (C) 2000 Information-technology Promotion Agency, Japan
+
+**********************************************************************/
+
+#include "mruby.h"
+#include <stdio.h>
+#include <string.h>
+#include "encoding.h"
+#include "mruby/string.h"
+#include "mruby/hash.h"
+#include "mruby/numeric.h"
+#include <math.h>
+#include <stdarg.h>
+#include <ctype.h>
+
+#ifdef HAVE_IEEEFP_H
+#include <ieeefp.h>
+#endif
+
+#ifndef MRB_TAINT_P
+ #define MRB_TAINTED_P(p) FALSE
+#endif
+
+#define BIT_DIGITS(N) (((N)*146)/485 + 1) /* log2(10) =~ 146/485 */
+#define BITSPERDIG (sizeof(mrb_int)*CHAR_BIT)
+#define EXTENDSIGN(n, l) (((~0 << (n)) >> (((n)*(l)) % BITSPERDIG)) & ~(~0 << (n)))
+
+static void fmt_setup(char*,size_t,int,int,int,int);
+
+static char*
+remove_sign_bits(char *str, int base)
+{
+ char *s, *t;
+
+ s = t = str;
+
+ if (base == 16) {
+ while (*t == 'f') {
+ t++;
+ }
+ }
+ else if (base == 8) {
+ *t |= EXTENDSIGN(3, strlen(t));
+ while (*t == '7') {
+ t++;
+ }
+ }
+ else if (base == 2) {
+ while (*t == '1') {
+ t++;
+ }
+ }
+
+ return t;
+}
+
+static char
+sign_bits(int base, const char *p)
+{
+ char c = '.';
+
+ switch (base) {
+ case 16:
+ if (*p == 'X') c = 'F';
+ else c = 'f';
+ break;
+ case 8:
+ c = '7'; break;
+ case 2:
+ c = '1'; break;
+ }
+ return c;
+}
+
+static mrb_value
+mrb_fix2binstr(mrb_state *mrb, mrb_value x, int base)
+{
+ char buf[64], *b = buf + sizeof buf;
+ unsigned long val = mrb_fixnum(x);
+ char d = 0;
+
+ if (base != 2) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid radix %d", base);
+ }
+
+ if (val >= (1 << 10))
+ val &= 0x3ff;
+
+ if (val == 0) {
+ return mrb_str_new2(mrb, "0");
+ }
+ *--b = '\0';
+ do {
+ *--b = ruby_digitmap[(int)(val % base)];
+ } while (val /= base);
+
+ if (mrb_fixnum(x) < 0) {
+ b = remove_sign_bits(b, base);
+ switch (base) {
+ case 16: d = 'f'; break;
+ case 8: d = '7'; break;
+ case 2: d = '1'; break;
+ }
+
+ if (d && *b != d) {
+ *--b = d;
+ }
+ }
+
+ return mrb_str_new2(mrb, b);
+}
+
+#define FNONE 0
+#define FSHARP 1
+#define FMINUS 2
+#define FPLUS 4
+#define FZERO 8
+#define FSPACE 16
+#define FWIDTH 32
+#define FPREC 64
+#define FPREC0 128
+
+#define CHECK(l) do {\
+/* int cr = ENC_CODERANGE(result);*/\
+ while (blen + (l) >= bsiz) {\
+ bsiz*=2;\
+ }\
+ mrb_str_resize(mrb, result, bsiz);\
+/* ENC_CODERANGE_SET(result, cr);*/\
+ buf = RSTRING_PTR(result);\
+} while (0)
+
+#define PUSH(s, l) do { \
+ CHECK(l);\
+ memcpy(&buf[blen], s, l);\
+ blen += (l);\
+} while (0)
+
+#define FILL(c, l) do { \
+ CHECK(l);\
+ memset(&buf[blen], c, l);\
+ blen += (l);\
+} while (0)
+
+#define GETARG() (!UNDEF_P(nextvalue) ? nextvalue : \
+ posarg == -1 ? \
+ (mrb_raise(mrb, E_ARGUMENT_ERROR, "unnumbered(%d) mixed with numbered", nextarg), mrb_undef_value()) : \
+ posarg == -2 ? \
+ (mrb_raise(mrb, E_ARGUMENT_ERROR, "unnumbered(%d) mixed with named", nextarg), mrb_undef_value()) : \
+ (posarg = nextarg++, GETNTHARG(posarg)))
+
+#define GETPOSARG(n) (posarg > 0 ? \
+ (mrb_raise(mrb, E_ARGUMENT_ERROR, "numbered(%d) after unnumbered(%d)", n, posarg), mrb_undef_value()) : \
+ posarg == -2 ? \
+ (mrb_raise(mrb, E_ARGUMENT_ERROR, "numbered(%d) after named", n), mrb_undef_value()) : \
+ ((n < 1) ? \
+ (mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid index - %d$", n), mrb_undef_value()) : \
+ (posarg = -1, GETNTHARG(n))))
+
+#define GETNTHARG(nth) \
+ ((nth >= argc) ? (mrb_raise(mrb, E_ARGUMENT_ERROR, "too few arguments"), mrb_undef_value()) : argv[nth])
+
+#define GETNAMEARG(id, name, len) ( \
+ posarg > 0 ? \
+ (mrb_raise(mrb, E_ARGUMENT_ERROR, "named%.*s after unnumbered(%d)", (len), (name), posarg), mrb_undef_value()) : \
+ posarg == -1 ? \
+ (mrb_raise(mrb, E_ARGUMENT_ERROR, "named%.*s after numbered", (len), (name)), mrb_undef_value()) : \
+ (posarg = -2, mrb_hash_getWithDef(mrb, get_hash(mrb, &hash, argc, argv), id, mrb_undef_value())))
+
+#define GETNUM(n, val) \
+ for (; p < end && ISDIGIT(*p); p++) {\
+ int next_n = 10 * n + (*p - '0'); \
+ if (next_n / 10 != n) {\
+ mrb_raise(mrb, E_ARGUMENT_ERROR, #val " too big"); \
+ } \
+ n = next_n; \
+ } \
+ if (p >= end) { \
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "malformed format string - %%*[0-9]"); \
+ }
+
+#define GETASTER(num) do { \
+ t = p++; \
+ n = 0; \
+ GETNUM(n, val); \
+ if (*p == '$') { \
+ tmp = GETPOSARG(n); \
+ } \
+ else { \
+ tmp = GETARG(); \
+ p = t; \
+ } \
+ num = mrb_fixnum(tmp); \
+} while (0)
+
+static mrb_value
+get_hash(mrb_state *mrb, volatile mrb_value *hash, int argc, const mrb_value *argv)
+{
+ mrb_value tmp;
+
+ if (!UNDEF_P(*hash)) return *hash;
+ if (argc != 2) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "one hash required");
+ }
+ tmp = mrb_check_convert_type(mrb, argv[1], MRB_TT_HASH, "Hash", "to_hash");
+ if (mrb_nil_p(tmp)) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "one hash required");
+ }
+ return (*hash = tmp);
+}
+
+/*
+ * call-seq:
+ * format(format_string [, arguments...] ) -> string
+ * sprintf(format_string [, arguments...] ) -> string
+ *
+ * Returns the string resulting from applying <i>format_string</i> to
+ * any additional arguments. Within the format string, any characters
+ * other than format sequences are copied to the result.
+ *
+ * The syntax of a format sequence is follows.
+ *
+ * %[flags][width][.precision]type
+ *
+ * A format
+ * sequence consists of a percent sign, followed by optional flags,
+ * width, and precision indicators, then terminated with a field type
+ * character. The field type controls how the corresponding
+ * <code>sprintf</code> argument is to be interpreted, while the flags
+ * modify that interpretation.
+ *
+ * The field type characters are:
+ *
+ * Field | Integer Format
+ * ------+--------------------------------------------------------------
+ * b | Convert argument as a binary number.
+ * | Negative numbers will be displayed as a two's complement
+ * | prefixed with `..1'.
+ * B | Equivalent to `b', but uses an uppercase 0B for prefix
+ * | in the alternative format by #.
+ * d | Convert argument as a decimal number.
+ * i | Identical to `d'.
+ * o | Convert argument as an octal number.
+ * | Negative numbers will be displayed as a two's complement
+ * | prefixed with `..7'.
+ * u | Identical to `d'.
+ * x | Convert argument as a hexadecimal number.
+ * | Negative numbers will be displayed as a two's complement
+ * | prefixed with `..f' (representing an infinite string of
+ * | leading 'ff's).
+ * X | Equivalent to `x', but uses uppercase letters.
+ *
+ * Field | Float Format
+ * ------+--------------------------------------------------------------
+ * e | Convert floating point argument into exponential notation
+ * | with one digit before the decimal point as [-]d.dddddde[+-]dd.
+ * | The precision specifies the number of digits after the decimal
+ * | point (defaulting to six).
+ * E | Equivalent to `e', but uses an uppercase E to indicate
+ * | the exponent.
+ * f | Convert floating point argument as [-]ddd.dddddd,
+ * | where the precision specifies the number of digits after
+ * | the decimal point.
+ * g | Convert a floating point number using exponential form
+ * | if the exponent is less than -4 or greater than or
+ * | equal to the precision, or in dd.dddd form otherwise.
+ * | The precision specifies the number of significant digits.
+ * G | Equivalent to `g', but use an uppercase `E' in exponent form.
+ * a | Convert floating point argument as [-]0xh.hhhhp[+-]dd,
+ * | which is consisted from optional sign, "0x", fraction part
+ * | as hexadecimal, "p", and exponential part as decimal.
+ * A | Equivalent to `a', but use uppercase `X' and `P'.
+ *
+ * Field | Other Format
+ * ------+--------------------------------------------------------------
+ * c | Argument is the numeric code for a single character or
+ * | a single character string itself.
+ * p | The valuing of argument.inspect.
+ * s | Argument is a string to be substituted. If the format
+ * | sequence contains a precision, at most that many characters
+ * | will be copied.
+ * % | A percent sign itself will be displayed. No argument taken.
+ *
+ * The flags modifies the behavior of the formats.
+ * The flag characters are:
+ *
+ * Flag | Applies to | Meaning
+ * ---------+---------------+-----------------------------------------
+ * space | bBdiouxX | Leave a space at the start of
+ * | aAeEfgG | non-negative numbers.
+ * | (numeric fmt) | For `o', `x', `X', `b' and `B', use
+ * | | a minus sign with absolute value for
+ * | | negative values.
+ * ---------+---------------+-----------------------------------------
+ * (digit)$ | all | Specifies the absolute argument number
+ * | | for this field. Absolute and relative
+ * | | argument numbers cannot be mixed in a
+ * | | sprintf string.
+ * ---------+---------------+-----------------------------------------
+ * # | bBoxX | Use an alternative format.
+ * | aAeEfgG | For the conversions `o', increase the precision
+ * | | until the first digit will be `0' if
+ * | | it is not formatted as complements.
+ * | | For the conversions `x', `X', `b' and `B'
+ * | | on non-zero, prefix the result with ``0x'',
+ * | | ``0X'', ``0b'' and ``0B'', respectively.
+ * | | For `a', `A', `e', `E', `f', `g', and 'G',
+ * | | force a decimal point to be added,
+ * | | even if no digits follow.
+ * | | For `g' and 'G', do not remove trailing zeros.
+ * ---------+---------------+-----------------------------------------
+ * + | bBdiouxX | Add a leading plus sign to non-negative
+ * | aAeEfgG | numbers.
+ * | (numeric fmt) | For `o', `x', `X', `b' and `B', use
+ * | | a minus sign with absolute value for
+ * | | negative values.
+ * ---------+---------------+-----------------------------------------
+ * - | all | Left-justify the result of this conversion.
+ * ---------+---------------+-----------------------------------------
+ * 0 (zero) | bBdiouxX | Pad with zeros, not spaces.
+ * | aAeEfgG | For `o', `x', `X', `b' and `B', radix-1
+ * | (numeric fmt) | is used for negative numbers formatted as
+ * | | complements.
+ * ---------+---------------+-----------------------------------------
+ * * | all | Use the next argument as the field width.
+ * | | If negative, left-justify the result. If the
+ * | | asterisk is followed by a number and a dollar
+ * | | sign, use the indicated argument as the width.
+ *
+ * Examples of flags:
+ *
+ * # `+' and space flag specifies the sign of non-negative numbers.
+ * sprintf("%d", 123) #=> "123"
+ * sprintf("%+d", 123) #=> "+123"
+ * sprintf("% d", 123) #=> " 123"
+ *
+ * # `#' flag for `o' increases number of digits to show `0'.
+ * # `+' and space flag changes format of negative numbers.
+ * sprintf("%o", 123) #=> "173"
+ * sprintf("%#o", 123) #=> "0173"
+ * sprintf("%+o", -123) #=> "-173"
+ * sprintf("%o", -123) #=> "..7605"
+ * sprintf("%#o", -123) #=> "..7605"
+ *
+ * # `#' flag for `x' add a prefix `0x' for non-zero numbers.
+ * # `+' and space flag disables complements for negative numbers.
+ * sprintf("%x", 123) #=> "7b"
+ * sprintf("%#x", 123) #=> "0x7b"
+ * sprintf("%+x", -123) #=> "-7b"
+ * sprintf("%x", -123) #=> "..f85"
+ * sprintf("%#x", -123) #=> "0x..f85"
+ * sprintf("%#x", 0) #=> "0"
+ *
+ * # `#' for `X' uses the prefix `0X'.
+ * sprintf("%X", 123) #=> "7B"
+ * sprintf("%#X", 123) #=> "0X7B"
+ *
+ * # `#' flag for `b' add a prefix `0b' for non-zero numbers.
+ * # `+' and space flag disables complements for negative numbers.
+ * sprintf("%b", 123) #=> "1111011"
+ * sprintf("%#b", 123) #=> "0b1111011"
+ * sprintf("%+b", -123) #=> "-1111011"
+ * sprintf("%b", -123) #=> "..10000101"
+ * sprintf("%#b", -123) #=> "0b..10000101"
+ * sprintf("%#b", 0) #=> "0"
+ *
+ * # `#' for `B' uses the prefix `0B'.
+ * sprintf("%B", 123) #=> "1111011"
+ * sprintf("%#B", 123) #=> "0B1111011"
+ *
+ * # `#' for `e' forces to show the decimal point.
+ * sprintf("%.0e", 1) #=> "1e+00"
+ * sprintf("%#.0e", 1) #=> "1.e+00"
+ *
+ * # `#' for `f' forces to show the decimal point.
+ * sprintf("%.0f", 1234) #=> "1234"
+ * sprintf("%#.0f", 1234) #=> "1234."
+ *
+ * # `#' for `g' forces to show the decimal point.
+ * # It also disables stripping lowest zeros.
+ * sprintf("%g", 123.4) #=> "123.4"
+ * sprintf("%#g", 123.4) #=> "123.400"
+ * sprintf("%g", 123456) #=> "123456"
+ * sprintf("%#g", 123456) #=> "123456."
+ *
+ * The field width is an optional integer, followed optionally by a
+ * period and a precision. The width specifies the minimum number of
+ * characters that will be written to the result for this field.
+ *
+ * Examples of width:
+ *
+ * # padding is done by spaces, width=20
+ * # 0 or radix-1. <------------------>
+ * sprintf("%20d", 123) #=> " 123"
+ * sprintf("%+20d", 123) #=> " +123"
+ * sprintf("%020d", 123) #=> "00000000000000000123"
+ * sprintf("%+020d", 123) #=> "+0000000000000000123"
+ * sprintf("% 020d", 123) #=> " 0000000000000000123"
+ * sprintf("%-20d", 123) #=> "123 "
+ * sprintf("%-+20d", 123) #=> "+123 "
+ * sprintf("%- 20d", 123) #=> " 123 "
+ * sprintf("%020x", -123) #=> "..ffffffffffffffff85"
+ *
+ * For
+ * numeric fields, the precision controls the number of decimal places
+ * displayed. For string fields, the precision determines the maximum
+ * number of characters to be copied from the string. (Thus, the format
+ * sequence <code>%10.10s</code> will always contribute exactly ten
+ * characters to the result.)
+ *
+ * Examples of precisions:
+ *
+ * # precision for `d', 'o', 'x' and 'b' is
+ * # minimum number of digits <------>
+ * sprintf("%20.8d", 123) #=> " 00000123"
+ * sprintf("%20.8o", 123) #=> " 00000173"
+ * sprintf("%20.8x", 123) #=> " 0000007b"
+ * sprintf("%20.8b", 123) #=> " 01111011"
+ * sprintf("%20.8d", -123) #=> " -00000123"
+ * sprintf("%20.8o", -123) #=> " ..777605"
+ * sprintf("%20.8x", -123) #=> " ..ffff85"
+ * sprintf("%20.8b", -11) #=> " ..110101"
+ *
+ * # "0x" and "0b" for `#x' and `#b' is not counted for
+ * # precision but "0" for `#o' is counted. <------>
+ * sprintf("%#20.8d", 123) #=> " 00000123"
+ * sprintf("%#20.8o", 123) #=> " 00000173"
+ * sprintf("%#20.8x", 123) #=> " 0x0000007b"
+ * sprintf("%#20.8b", 123) #=> " 0b01111011"
+ * sprintf("%#20.8d", -123) #=> " -00000123"
+ * sprintf("%#20.8o", -123) #=> " ..777605"
+ * sprintf("%#20.8x", -123) #=> " 0x..ffff85"
+ * sprintf("%#20.8b", -11) #=> " 0b..110101"
+ *
+ * # precision for `e' is number of
+ * # digits after the decimal point <------>
+ * sprintf("%20.8e", 1234.56789) #=> " 1.23456789e+03"
+ *
+ * # precision for `f' is number of
+ * # digits after the decimal point <------>
+ * sprintf("%20.8f", 1234.56789) #=> " 1234.56789000"
+ *
+ * # precision for `g' is number of
+ * # significant digits <------->
+ * sprintf("%20.8g", 1234.56789) #=> " 1234.5679"
+ *
+ * # <------->
+ * sprintf("%20.8g", 123456789) #=> " 1.2345679e+08"
+ *
+ * # precision for `s' is
+ * # maximum number of characters <------>
+ * sprintf("%20.8s", "string test") #=> " string t"
+ *
+ * Examples:
+ *
+ * sprintf("%d %04x", 123, 123) #=> "123 007b"
+ * sprintf("%08b '%4s'", 123, 123) #=> "01111011 ' 123'"
+ * sprintf("%1$*2$s %2$d %1$s", "hello", 8) #=> " hello 8 hello"
+ * sprintf("%1$*2$s %2$d", "hello", -8) #=> "hello -8"
+ * sprintf("%+g:% g:%-g", 1.23, 1.23, 1.23) #=> "+1.23: 1.23:1.23"
+ * sprintf("%u", -123) #=> "-123"
+ *
+ * For more complex formatting, Ruby supports a reference by name.
+ * %<name>s style uses format style, but %{name} style doesn't.
+ *
+ * Exapmles:
+ * sprintf("%<foo>d : %<bar>f", { :foo => 1, :bar => 2 })
+ * #=> 1 : 2.000000
+ * sprintf("%{foo}f", { :foo => 1 })
+ * # => "1f"
+ */
+
+mrb_value
+mrb_f_sprintf(mrb_state *mrb, mrb_value obj)
+{
+ int argc;
+ mrb_value *argv;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+
+ if (argc <= 0) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "too few arguments");
+ return mrb_nil_value();
+ } else {
+ return mrb_str_format(mrb, argc - 1, argv + 1, argv[0]);
+ }
+}
+
+mrb_value
+mrb_str_format(mrb_state *mrb, int argc, const mrb_value *argv, mrb_value fmt)
+{
+ const char *p, *end;
+ char *buf;
+ long blen, bsiz;
+ mrb_value result;
+
+ int width, prec, flags = FNONE;
+ int nextarg = 1;
+ int posarg = 0;
+ int tainted = 0;
+ mrb_value nextvalue;
+ mrb_value tmp;
+ mrb_value str;
+ volatile mrb_value hash = mrb_undef_value();
+
+#define CHECK_FOR_WIDTH(f) \
+ if ((f) & FWIDTH) { \
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "width given twice"); \
+ } \
+ if ((f) & FPREC0) { \
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "width after precision"); \
+ }
+#define CHECK_FOR_FLAGS(f) \
+ if ((f) & FWIDTH) { \
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "flag after width"); \
+ } \
+ if ((f) & FPREC0) { \
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "flag after precision"); \
+ }
+
+ ++argc;
+ --argv;
+ if (MRB_TAINTED_P(fmt)) tainted = 1;
+ mrb_string_value(mrb, &fmt);
+ fmt = mrb_str_new4(mrb, fmt);
+ p = RSTRING_PTR(fmt);
+ end = p + RSTRING_LEN(fmt);
+ blen = 0;
+ bsiz = 120;
+ result = mrb_str_buf_new(mrb, bsiz);
+ buf = RSTRING_PTR(result);
+ memset(buf, 0, bsiz);
+
+ for (; p < end; p++) {
+ const char *t;
+ int n;
+ mrb_sym id = 0;
+
+ for (t = p; t < end && *t != '%'; t++) ;
+ PUSH(p, t - p);
+ if (t >= end)
+ goto sprint_exit; /* end of fmt string */
+
+ p = t + 1; /* skip `%' */
+
+ width = prec = -1;
+ nextvalue = mrb_undef_value();
+
+retry:
+ switch (*p) {
+ default:
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "malformed format string - %%%c", *p);
+ break;
+
+ case ' ':
+ CHECK_FOR_FLAGS(flags);
+ flags |= FSPACE;
+ p++;
+ goto retry;
+
+ case '#':
+ CHECK_FOR_FLAGS(flags);
+ flags |= FSHARP;
+ p++;
+ goto retry;
+
+ case '+':
+ CHECK_FOR_FLAGS(flags);
+ flags |= FPLUS;
+ p++;
+ goto retry;
+
+ case '-':
+ CHECK_FOR_FLAGS(flags);
+ flags |= FMINUS;
+ p++;
+ goto retry;
+
+ case '0':
+ CHECK_FOR_FLAGS(flags);
+ flags |= FZERO;
+ p++;
+ goto retry;
+
+ case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ n = 0;
+ GETNUM(n, width);
+ if (*p == '$') {
+ if (!UNDEF_P(nextvalue)) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "value given twice - %d$", n);
+ }
+ nextvalue = GETPOSARG(n);
+ p++;
+ goto retry;
+ }
+ CHECK_FOR_WIDTH(flags);
+ width = n;
+ flags |= FWIDTH;
+ goto retry;
+
+ case '<':
+ case '{':
+ {
+ const char *start = p;
+ char term = (*p == '<') ? '>' : '}';
+
+ for (; p < end && *p != term; )
+ p++;
+ if (id) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "name%.*s after <%s>",
+ (int)(p - start + 1), start, mrb_sym2name(mrb, id));
+ }
+ mrb_value symname = mrb_str_new(mrb, start + 1, p - start - 1);
+ id = mrb_intern(mrb, RSTRING_PTR(symname));
+ nextvalue = GETNAMEARG(mrb_symbol_value(id), start, (int)(p - start + 1));
+ if (UNDEF_P(nextvalue)) {
+ mrb_raise(mrb, E_KEY_ERROR, "key%.*s not found", (int)(p - start + 1), start);
+ }
+ if (term == '}') goto format_s;
+ p++;
+ goto retry;
+ }
+
+ case '*':
+ CHECK_FOR_WIDTH(flags);
+ flags |= FWIDTH;
+ GETASTER(width);
+ if (width < 0) {
+ flags |= FMINUS;
+ width = -width;
+ }
+ p++;
+ goto retry;
+
+ case '.':
+ if (flags & FPREC0) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "precision given twice");
+ }
+ flags |= FPREC|FPREC0;
+
+ prec = 0;
+ p++;
+ if (*p == '*') {
+ GETASTER(prec);
+ if (prec < 0) { /* ignore negative precision */
+ flags &= ~FPREC;
+ }
+ p++;
+ goto retry;
+ }
+
+ GETNUM(prec, precision);
+ goto retry;
+
+ case '\n':
+ case '\0':
+ p--;
+ case '%':
+ if (flags != FNONE) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid format character - %%");
+ }
+ PUSH("%", 1);
+ break;
+
+ case 'c':
+ {
+ mrb_value val = GETARG();
+ mrb_value tmp;
+ unsigned int c;
+ int n;
+#ifdef INCLUDE_ENCODING
+ mrb_encoding *enc = mrb_enc_get(mrb, fmt);
+#endif //INCLUDE_ENCODING
+
+ tmp = mrb_check_string_type(mrb, val);
+ if (!mrb_nil_p(tmp)) {
+ if (RSTRING_LEN(tmp) != 1 ) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "%%c requires a character");
+ }
+#ifdef INCLUDE_ENCODING
+ c = mrb_enc_codepoint_len(mrb, RSTRING_PTR(tmp), RSTRING_END(tmp), &n, enc);
+#else
+ c = RSTRING_PTR(tmp)[0];
+ n = 1;
+#endif //INCLUDE_ENCODING
+ }
+ else {
+ c = mrb_fixnum(val);
+ n = mrb_enc_codelen(mrb, c, enc);
+ }
+ if (n <= 0) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid character");
+ }
+ if (!(flags & FWIDTH)) {
+ CHECK(n);
+ mrb_enc_mbcput(c, &buf[blen], enc);
+ blen += n;
+ }
+ else if ((flags & FMINUS)) {
+ CHECK(n);
+ mrb_enc_mbcput(c, &buf[blen], enc);
+ blen += n;
+ FILL(' ', width-1);
+ }
+ else {
+ FILL(' ', width-1);
+ CHECK(n);
+ mrb_enc_mbcput(c, &buf[blen], enc);
+ blen += n;
+ }
+ }
+ break;
+
+ case 's':
+ case 'p':
+format_s:
+ {
+ mrb_value arg = GETARG();
+ long len, slen;
+#ifdef INCLUDE_ENCODING
+ mrb_encoding *enc = mrb_enc_get(mrb, fmt);
+#endif //INCLUDE_ENCODING
+
+ if (*p == 'p') arg = mrb_inspect(mrb, arg);
+ str = mrb_obj_as_string(mrb, arg);
+ if (MRB_TAINTED_P(str)) tainted = 1;
+ len = RSTRING_LEN(str);
+ mrb_str_set_len(mrb, result, blen);
+ if (flags&(FPREC|FWIDTH)) {
+ slen = RSTRING_LEN(str);
+ if (slen < 0) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid mbstring sequence");
+ }
+ if ((flags&FPREC) && (prec < slen)) {
+#ifdef INCLUDE_ENCODING
+ char *p = mrb_enc_nth(mrb, RSTRING_PTR(str), RSTRING_END(str),prec, enc);
+#else
+ char *p = RSTRING_PTR(str) + prec;
+#endif //INCLUDE_ENCODING
+ slen = prec;
+ len = p - RSTRING_PTR(str);
+ }
+ /* need to adjust multi-byte string pos */
+ if ((flags&FWIDTH) && (width > slen)) {
+ width -= (int)slen;
+ if (!(flags&FMINUS)) {
+ CHECK(width);
+ while (width--) {
+ buf[blen++] = ' ';
+ }
+ }
+ CHECK(len);
+ memcpy(&buf[blen], RSTRING_PTR(str), len);
+ blen += len;
+ if (flags&FMINUS) {
+ CHECK(width);
+ while (width--) {
+ buf[blen++] = ' ';
+ }
+ }
+ mrb_enc_associate(mrb, result, enc);
+ break;
+ }
+ }
+ PUSH(RSTRING_PTR(str), len);
+ mrb_enc_associate(mrb, result, enc);
+ }
+ break;
+
+ case 'd':
+ case 'i':
+ case 'o':
+ case 'x':
+ case 'X':
+ case 'b':
+ case 'B':
+ case 'u':
+ {
+ volatile mrb_value val = GETARG();
+ char fbuf[32], nbuf[64], *s;
+ const char *prefix = 0;
+ int sign = 0, dots = 0;
+ char sc = 0;
+ long v = 0, org_v = 0;
+ int base;
+ int len, pos;
+
+ switch (*p) {
+ case 'd':
+ case 'i':
+ case 'u':
+ sign = 1; break;
+ case 'o':
+ case 'x':
+ case 'X':
+ case 'b':
+ case 'B':
+ if (flags&(FPLUS|FSPACE)) sign = 1;
+ break;
+ }
+ if (flags & FSHARP) {
+ switch (*p) {
+ case 'o': prefix = "0"; break;
+ case 'x': prefix = "0x"; break;
+ case 'X': prefix = "0X"; break;
+ case 'b': prefix = "0b"; break;
+ case 'B': prefix = "0B"; break;
+ }
+ }
+
+bin_retry:
+ switch (mrb_type(val)) {
+ case MRB_TT_FLOAT:
+ if (FIXABLE(mrb_float(val))) {
+ val = mrb_fixnum_value((mrb_int)mrb_float(val));
+ goto bin_retry;
+ }
+ val = mrb_dbl2big(mrb, mrb_float(val));
+ if (FIXNUM_P(val)) goto bin_retry;
+ break;
+ case MRB_TT_STRING:
+ val = mrb_str_to_inum(mrb, val, 0, TRUE);
+ goto bin_retry;
+ case MRB_TT_FIXNUM:
+ v = (long)mrb_fixnum(val);
+ break;
+ default:
+ val = mrb_Integer(mrb, val);
+ goto bin_retry;
+ }
+
+ switch (*p) {
+ case 'o':
+ base = 8; break;
+ case 'x':
+ case 'X':
+ base = 16; break;
+ case 'b':
+ case 'B':
+ base = 2; break;
+ case 'u':
+ case 'd':
+ case 'i':
+ default:
+ base = 10; break;
+ }
+
+ if (base == 2) {
+ org_v = v;
+ if ( v < 0 && !sign ) {
+ val = mrb_fix2binstr(mrb, mrb_fixnum_value(v), base);
+ dots = 1;
+ }
+ else {
+ val = mrb_fix2str(mrb, mrb_fixnum_value(v), base);
+ }
+ v = mrb_fixnum(mrb_str_to_inum(mrb, val, 10, 0/*Qfalse*/));
+ }
+ if (sign) {
+ char c = *p;
+ if (c == 'i') c = 'd'; /* %d and %i are identical */
+ if (base == 2) c = 'd';
+ if (v < 0) {
+ v = -v;
+ sc = '-';
+ width--;
+ }
+ else if (flags & FPLUS) {
+ sc = '+';
+ width--;
+ }
+ else if (flags & FSPACE) {
+ sc = ' ';
+ width--;
+ }
+ snprintf(fbuf, sizeof(fbuf), "%%l%c", c);
+ snprintf(nbuf, sizeof(nbuf), fbuf, v);
+ s = nbuf;
+ }
+ else {
+ char c = *p;
+ if (c == 'X') c = 'x';
+ if (base == 2) c = 'd';
+ s = nbuf;
+ if (v < 0) {
+ dots = 1;
+ }
+ snprintf(fbuf, sizeof(fbuf), "%%l%c", c);
+ snprintf(++s, sizeof(nbuf) - 1, fbuf, v);
+ if (v < 0) {
+ char d = 0;
+
+ s = remove_sign_bits(s, base);
+ switch (base) {
+ case 16: d = 'f'; break;
+ case 8: d = '7'; break;
+ case 2: d = '1'; break;
+ }
+
+ if (d && *s != d) {
+ *--s = d;
+ }
+ }
+ }
+ len = (int)strlen(s);
+
+ pos = -1;
+ if (dots) {
+ prec -= 2;
+ width -= 2;
+ }
+
+ if (*p == 'X') {
+ char *pp = s;
+ int c;
+#ifdef INCLUDE_ENCODING
+ mrb_encoding *enc = mrb_enc_get(mrb, fmt);
+#endif //INCLUDE_ENCODING
+ while ((c = (int)(unsigned char)*pp) != 0) {
+#ifdef INCLUDE_ENCODING
+ *pp = mrb_enc_toupper(c, enc);
+#else
+ *pp = toupper(c);
+#endif //INCLUDE_ENCODING
+ pp++;
+ }
+ }
+ if (prefix && !prefix[1]) { /* octal */
+ if (dots) {
+ prefix = 0;
+ }
+ else if (len == 1 && *s == '0') {
+ len = 0;
+ if (flags & FPREC) prec--;
+ }
+ else if ((flags & FPREC) && (prec > len)) {
+ prefix = 0;
+ }
+ }
+ else if (len == 1 && *s == '0') {
+ prefix = 0;
+ }
+ if (prefix) {
+ width -= (int)strlen(prefix);
+ }
+ if ((flags & (FZERO|FMINUS|FPREC)) == FZERO) {
+ prec = width;
+ width = 0;
+ }
+ else {
+ if (prec < len) {
+ if (!prefix && prec == 0 && len == 1 && *s == '0') len = 0;
+ prec = len;
+ }
+ width -= prec;
+ }
+ if (!(flags&FMINUS)) {
+ CHECK(width);
+ while (width-- > 0) {
+ buf[blen++] = ' ';
+ }
+ }
+ if (sc) PUSH(&sc, 1);
+ if (prefix) {
+ int plen = (int)strlen(prefix);
+ PUSH(prefix, plen);
+ }
+ CHECK(prec - len);
+ if (dots) PUSH("..", 2);
+ if (v < 0 || (base == 2 && org_v < 0)) {
+ char c = sign_bits(base, p);
+ while (len < prec--) {
+ buf[blen++] = c;
+ }
+ }
+ else if ((flags & (FMINUS|FPREC)) != FMINUS) {
+ char c = '0';
+ while (len < prec--) {
+ buf[blen++] = c;
+ }
+ }
+ PUSH(s, len);
+ CHECK(width);
+ while (width-- > 0) {
+ buf[blen++] = ' ';
+ }
+ }
+ break;
+
+ case 'f':
+ case 'g':
+ case 'G':
+ case 'e':
+ case 'E':
+ case 'a':
+ case 'A':
+ {
+ mrb_value val = GETARG();
+ double fval;
+ int i, need = 6;
+ char fbuf[32];
+
+ fval = mrb_float(mrb_Float(mrb, val));
+ if (isnan(fval) || isinf(fval)) {
+ const char *expr;
+
+ if (isnan(fval)) {
+ expr = "NaN";
+ }
+ else {
+ expr = "Inf";
+ }
+ need = (int)strlen(expr);
+ if ((!isnan(fval) && fval < 0.0) || (flags & FPLUS))
+ need++;
+ if ((flags & FWIDTH) && need < width)
+ need = width;
+
+ CHECK(need + 1);
+ snprintf(&buf[blen], need + 1, "%*s", need, "");
+ if (flags & FMINUS) {
+ if (!isnan(fval) && fval < 0.0)
+ buf[blen++] = '-';
+ else if (flags & FPLUS)
+ buf[blen++] = '+';
+ else if (flags & FSPACE)
+ blen++;
+ memcpy(&buf[blen], expr, strlen(expr));
+ }
+ else {
+ if (!isnan(fval) && fval < 0.0)
+ buf[blen + need - strlen(expr) - 1] = '-';
+ else if (flags & FPLUS)
+ buf[blen + need - strlen(expr) - 1] = '+';
+ else if ((flags & FSPACE) && need > width)
+ blen++;
+ memcpy(&buf[blen + need - strlen(expr)], expr,
+ strlen(expr));
+ }
+ blen += strlen(&buf[blen]);
+ break;
+ }
+
+ fmt_setup(fbuf, sizeof(fbuf), *p, flags, width, prec);
+ need = 0;
+ if (*p != 'e' && *p != 'E') {
+ i = INT_MIN;
+ frexp(fval, &i);
+ if (i > 0)
+ need = BIT_DIGITS(i);
+ }
+ need += (flags&FPREC) ? prec : 6;
+ if ((flags&FWIDTH) && need < width)
+ need = width;
+ need += 20;
+
+ CHECK(need);
+ snprintf(&buf[blen], need, fbuf, fval);
+ blen += strlen(&buf[blen]);
+ }
+ break;
+ }
+ flags = FNONE;
+ }
+
+ sprint_exit:
+ /* XXX - We cannot validate the number of arguments if (digit)$ style used.
+ */
+ if (posarg >= 0 && nextarg < argc) {
+ const char *mesg = "too many arguments for format string";
+ if (RTEST(ruby_debug)) mrb_raise(mrb, E_ARGUMENT_ERROR, "%s", mesg);
+ if (RTEST(ruby_verbose)) mrb_warn("%s", mesg);
+ }
+ mrb_str_resize(mrb, result, blen);
+
+ return result;
+}
+
+static void
+fmt_setup(char *buf, size_t size, int c, int flags, int width, int prec)
+{
+ char *end = buf + size;
+ *buf++ = '%';
+ if (flags & FSHARP) *buf++ = '#';
+ if (flags & FPLUS) *buf++ = '+';
+ if (flags & FMINUS) *buf++ = '-';
+ if (flags & FZERO) *buf++ = '0';
+ if (flags & FSPACE) *buf++ = ' ';
+
+ if (flags & FWIDTH) {
+ snprintf(buf, end - buf, "%d", width);
+ buf += strlen(buf);
+ }
+
+ if (flags & FPREC) {
+ snprintf(buf, end - buf, ".%d", prec);
+ buf += strlen(buf);
+ }
+
+ *buf++ = c;
+ *buf = '\0';
+}
diff --git a/src/st.c b/src/st.c
new file mode 100644
index 000000000..c4bbc702c
--- /dev/null
+++ b/src/st.c
@@ -0,0 +1,1283 @@
+/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */
+
+/* static char sccsid[] = "@(#) st.c 5.1 89/12/14 Crucible"; */
+#define NOT_RUBY
+#ifdef NOT_RUBY
+#include "regint.h"
+#include "st.h"
+#else
+#include "ruby/ruby.h"
+#endif
+
+#include <stdio.h>
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+#include <string.h>
+
+#define ST_DEFAULT_MAX_DENSITY 5
+#define ST_DEFAULT_INIT_TABLE_SIZE 11
+
+ /*
+ * DEFAULT_MAX_DENSITY is the default for the largest we allow the
+ * average number of items per bin before increasing the number of
+ * bins
+ *
+ * DEFAULT_INIT_TABLE_SIZE is the default for the number of bins
+ * allocated initially
+ *
+ */
+
+static const struct st_hash_type type_numhash = {
+ st_numcmp,
+ st_numhash,
+};
+
+/* extern int strcmp(const char *, const char *); */
+static st_index_t strhash(st_data_t);
+static const struct st_hash_type type_strhash = {
+ strcmp,
+ strhash,
+};
+
+static st_index_t strcasehash(st_data_t);
+static const struct st_hash_type type_strcasehash = {
+ st_strcasecmp,
+ strcasehash,
+};
+
+static void rehash(st_table *);
+
+#ifdef RUBY
+#define malloc xmalloc
+#define calloc xcalloc
+#define free(x) xfree(x)
+#endif
+
+#define numberof(array) (int)(sizeof(array) / sizeof((array)[0]))
+
+#define alloc(type) (type*)malloc((size_t)sizeof(type))
+#define Calloc(n,s) (char*)calloc((n),(s))
+
+#define EQUAL(table,x,y) ((x)==(y) || (*table->type->compare)((x),(y)) == 0)
+
+/* remove cast to unsigned int in the future */
+#define do_hash(key,table) (unsigned int)(st_index_t)(*(table)->type->hash)((key))
+#define do_hash_bin(key,table) (do_hash(key, table)%(table)->num_bins)
+
+/*
+ * MINSIZE is the minimum size of a dictionary.
+ */
+
+#define MINSIZE 8
+
+/*
+Table of prime numbers 2^n+a, 2<=n<=30.
+*/
+static const unsigned int primes[] = {
+ 8 + 3,
+ 16 + 3,
+ 32 + 5,
+ 64 + 3,
+ 128 + 3,
+ 256 + 27,
+ 512 + 9,
+ 1024 + 9,
+ 2048 + 5,
+ 4096 + 3,
+ 8192 + 27,
+ 16384 + 43,
+ 32768 + 3,
+ 65536 + 45,
+ 131072 + 29,
+ 262144 + 3,
+ 524288 + 21,
+ 1048576 + 7,
+ 2097152 + 17,
+ 4194304 + 15,
+ 8388608 + 9,
+ 16777216 + 43,
+ 33554432 + 35,
+ 67108864 + 15,
+ 134217728 + 29,
+ 268435456 + 3,
+ 536870912 + 11,
+ 1073741824 + 85,
+ 0
+};
+
+static st_index_t
+new_size(st_index_t size)
+{
+ int i;
+
+ st_index_t newsize;
+
+ for (i = 0, newsize = MINSIZE; i < numberof(primes); i++, newsize <<= 1) {
+ if (newsize > size) return primes[i];
+ }
+ /* Ran out of polynomials */
+#ifndef NOT_RUBY
+ rb_raise(rb_eRuntimeError, "st_table too big");
+#endif
+ return -1; /* should raise exception */
+}
+
+#define MAX_PACKED_NUMHASH (ST_DEFAULT_INIT_TABLE_SIZE/2)
+
+st_table*
+st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
+{
+ st_table *tbl;
+
+ size = new_size(size); /* round up to prime number */
+
+ tbl = alloc(st_table);
+ tbl->type = type;
+ tbl->num_entries = 0;
+ tbl->entries_packed = type == &type_numhash && size/2 <= MAX_PACKED_NUMHASH;
+ tbl->num_bins = size;
+ tbl->bins = (st_table_entry **)Calloc(size, sizeof(st_table_entry*));
+ tbl->head = 0;
+ tbl->tail = 0;
+
+ return tbl;
+}
+
+st_table*
+st_init_table(const struct st_hash_type *type)
+{
+ return st_init_table_with_size(type, 0);
+}
+
+st_table*
+st_init_numtable(void)
+{
+ return st_init_table(&type_numhash);
+}
+
+st_table*
+st_init_numtable_with_size(st_index_t size)
+{
+ return st_init_table_with_size(&type_numhash, size);
+}
+
+st_table*
+st_init_strtable(void)
+{
+ return st_init_table(&type_strhash);
+}
+
+st_table*
+st_init_strtable_with_size(st_index_t size)
+{
+ return st_init_table_with_size(&type_strhash, size);
+}
+
+st_table*
+st_init_strcasetable(void)
+{
+ return st_init_table(&type_strcasehash);
+}
+
+st_table*
+st_init_strcasetable_with_size(st_index_t size)
+{
+ return st_init_table_with_size(&type_strcasehash, size);
+}
+
+void
+st_clear(st_table *table)
+{
+ register st_table_entry *ptr, *next;
+ st_index_t i;
+
+ if (table->entries_packed) {
+ table->num_entries = 0;
+ return;
+ }
+
+ for(i = 0; i < table->num_bins; i++) {
+ ptr = table->bins[i];
+ table->bins[i] = 0;
+ while (ptr != 0) {
+ next = ptr->next;
+ free(ptr);
+ ptr = next;
+ }
+ }
+ table->num_entries = 0;
+ table->head = 0;
+ table->tail = 0;
+}
+
+void
+st_free_table(st_table *table)
+{
+ st_clear(table);
+ free(table->bins);
+ free(table);
+}
+
+size_t
+st_memsize(const st_table *table)
+{
+ if (table->entries_packed) {
+ return table->num_bins * sizeof (void *) + sizeof(st_table);
+ }
+ else {
+ return table->num_entries * sizeof(struct st_table_entry) + table->num_bins * sizeof (void *) + sizeof(st_table);
+ }
+}
+
+#define PTR_NOT_EQUAL(table, ptr, hash_val, key) \
+((ptr) != 0 && (ptr->hash != (hash_val) || !EQUAL((table), (key), (ptr)->key)))
+
+#define COLLISION
+#define FOUND_ENTRY
+
+#define FIND_ENTRY(table, ptr, hash_val, bin_pos) do {\
+ bin_pos = hash_val%(table)->num_bins;\
+ ptr = (table)->bins[bin_pos];\
+ FOUND_ENTRY;\
+ if (PTR_NOT_EQUAL(table, ptr, hash_val, key)) {\
+ COLLISION;\
+ while (PTR_NOT_EQUAL(table, ptr->next, hash_val, key)) {\
+ ptr = ptr->next;\
+ }\
+ ptr = ptr->next;\
+ }\
+} while (0)
+
+#define collision_check 0
+
+int
+st_lookup(st_table *table, register st_data_t key, st_data_t *value)
+{
+ st_index_t hash_val, bin_pos;
+ register st_table_entry *ptr;
+
+ if (table->entries_packed) {
+ st_index_t i;
+ for (i = 0; i < table->num_entries; i++) {
+ if ((st_data_t)table->bins[i*2] == key) {
+ if (value !=0) *value = (st_data_t)table->bins[i*2+1];
+ return 1;
+ }
+ }
+ return 0;
+ }
+
+ hash_val = do_hash(key, table);
+ FIND_ENTRY(table, ptr, hash_val, bin_pos);
+
+ if (ptr == 0) {
+ return 0;
+ }
+ else {
+ if (value != 0) *value = ptr->record;
+ return 1;
+ }
+}
+
+int
+st_get_key(st_table *table, register st_data_t key, st_data_t *result)
+{
+ st_index_t hash_val, bin_pos;
+ register st_table_entry *ptr;
+
+ if (table->entries_packed) {
+ st_index_t i;
+ for (i = 0; i < table->num_entries; i++) {
+ if ((st_data_t)table->bins[i*2] == key) {
+ if (result !=0) *result = (st_data_t)table->bins[i*2];
+ return 1;
+ }
+ }
+ return 0;
+ }
+
+ hash_val = do_hash(key, table);
+ FIND_ENTRY(table, ptr, hash_val, bin_pos);
+
+ if (ptr == 0) {
+ return 0;
+ }
+ else {
+ if (result != 0) *result = ptr->key;
+ return 1;
+ }
+}
+
+#undef collision_check
+#define collision_check 1
+
+#define MORE_PACKABLE_P(table) \
+ ((st_index_t)((table)->num_entries+1) * 2 <= (table)->num_bins && \
+ (table)->num_entries+1 <= MAX_PACKED_NUMHASH)
+
+#define ADD_DIRECT(table, key, value, hash_val, bin_pos)\
+do {\
+ st_table_entry *entry;\
+ if (table->num_entries > ST_DEFAULT_MAX_DENSITY * table->num_bins) {\
+ rehash(table);\
+ bin_pos = hash_val % table->num_bins;\
+ }\
+ \
+ entry = alloc(st_table_entry);\
+ \
+ entry->hash = hash_val;\
+ entry->key = key;\
+ entry->record = value;\
+ entry->next = table->bins[bin_pos];\
+ if (table->head != 0) {\
+ entry->fore = 0;\
+ (entry->back = table->tail)->fore = entry;\
+ table->tail = entry;\
+ }\
+ else {\
+ table->head = table->tail = entry;\
+ entry->fore = entry->back = 0;\
+ }\
+ table->bins[bin_pos] = entry;\
+ table->num_entries++;\
+} while (0)
+
+static void
+unpack_entries(register st_table *table)
+{
+ st_index_t i;
+ struct st_table_entry *packed_bins[MAX_PACKED_NUMHASH*2];
+ st_table tmp_table = *table;
+
+ memcpy(packed_bins, table->bins, sizeof(struct st_table_entry *) * table->num_entries*2);
+ table->bins = packed_bins;
+ tmp_table.entries_packed = 0;
+ tmp_table.num_entries = 0;
+ memset(tmp_table.bins, 0, sizeof(struct st_table_entry *) * tmp_table.num_bins);
+ for (i = 0; i < table->num_entries; i++) {
+ st_insert(&tmp_table, (st_data_t)packed_bins[i*2], (st_data_t)packed_bins[i*2+1]);
+ }
+ *table = tmp_table;
+}
+
+int
+st_insert(register st_table *table, register st_data_t key, st_data_t value)
+{
+ st_index_t hash_val, bin_pos;
+ register st_table_entry *ptr;
+
+ if (table->entries_packed) {
+ st_index_t i;
+ for (i = 0; i < table->num_entries; i++) {
+ if ((st_data_t)table->bins[i*2] == key) {
+ table->bins[i*2+1] = (struct st_table_entry*)value;
+ return 1;
+ }
+ }
+ if (MORE_PACKABLE_P(table)) {
+ i = table->num_entries++;
+ table->bins[i*2] = (struct st_table_entry*)key;
+ table->bins[i*2+1] = (struct st_table_entry*)value;
+ return 0;
+ }
+ else {
+ unpack_entries(table);
+ }
+ }
+
+ hash_val = do_hash(key, table);
+ FIND_ENTRY(table, ptr, hash_val, bin_pos);
+
+ if (ptr == 0) {
+ ADD_DIRECT(table, key, value, hash_val, bin_pos);
+ return 0;
+ }
+ else {
+ ptr->record = value;
+ return 1;
+ }
+}
+
+int
+st_insert2(register st_table *table, register st_data_t key, st_data_t value,
+ st_data_t (*func)(st_data_t))
+{
+ st_index_t hash_val, bin_pos;
+ register st_table_entry *ptr;
+
+ if (table->entries_packed) {
+ st_index_t i;
+ for (i = 0; i < table->num_entries; i++) {
+ if ((st_data_t)table->bins[i*2] == key) {
+ table->bins[i*2+1] = (struct st_table_entry*)value;
+ return 1;
+ }
+ }
+ if (MORE_PACKABLE_P(table)) {
+ i = table->num_entries++;
+ table->bins[i*2] = (struct st_table_entry*)key;
+ table->bins[i*2+1] = (struct st_table_entry*)value;
+ return 0;
+ }
+ else {
+ unpack_entries(table);
+ }
+ }
+
+ hash_val = do_hash(key, table);
+ FIND_ENTRY(table, ptr, hash_val, bin_pos);
+
+ if (ptr == 0) {
+ key = (*func)(key);
+ ADD_DIRECT(table, key, value, hash_val, bin_pos);
+ return 0;
+ }
+ else {
+ ptr->record = value;
+ return 1;
+ }
+}
+
+void
+st_add_direct(st_table *table, st_data_t key, st_data_t value)
+{
+ st_index_t hash_val, bin_pos;
+
+ if (table->entries_packed) {
+ int i;
+ if (MORE_PACKABLE_P(table)) {
+ i = table->num_entries++;
+ table->bins[i*2] = (struct st_table_entry*)key;
+ table->bins[i*2+1] = (struct st_table_entry*)value;
+ return;
+ }
+ else {
+ unpack_entries(table);
+ }
+ }
+
+ hash_val = do_hash(key, table);
+ bin_pos = hash_val % table->num_bins;
+ ADD_DIRECT(table, key, value, hash_val, bin_pos);
+}
+
+static void
+rehash(register st_table *table)
+{
+ register st_table_entry *ptr, **new_bins;
+ st_index_t i, new_num_bins, hash_val;
+
+ new_num_bins = new_size(table->num_bins+1);
+ new_bins = (st_table_entry**)
+ xrealloc(table->bins, new_num_bins * sizeof(st_table_entry*));
+ for (i = 0; i < new_num_bins; ++i) new_bins[i] = 0;
+ table->num_bins = new_num_bins;
+ table->bins = new_bins;
+
+ if ((ptr = table->head) != 0) {
+ do {
+ hash_val = ptr->hash % new_num_bins;
+ ptr->next = new_bins[hash_val];
+ new_bins[hash_val] = ptr;
+ } while ((ptr = ptr->fore) != 0);
+ }
+}
+
+st_table*
+st_copy(st_table *old_table)
+{
+ st_table *new_table;
+ st_table_entry *ptr, *entry, *prev, **tail;
+ st_index_t num_bins = old_table->num_bins;
+ st_index_t hash_val;
+
+ new_table = alloc(st_table);
+ if (new_table == 0) {
+ return 0;
+ }
+
+ *new_table = *old_table;
+ new_table->bins = (st_table_entry**)
+ Calloc((unsigned)num_bins, sizeof(st_table_entry*));
+
+ if (new_table->bins == 0) {
+ free(new_table);
+ return 0;
+ }
+
+ if (old_table->entries_packed) {
+ memcpy(new_table->bins, old_table->bins, sizeof(struct st_table_entry *) * old_table->num_bins);
+ return new_table;
+ }
+
+ if ((ptr = old_table->head) != 0) {
+ prev = 0;
+ tail = &new_table->head;
+ do {
+ entry = alloc(st_table_entry);
+ if (entry == 0) {
+ st_free_table(new_table);
+ return 0;
+ }
+ *entry = *ptr;
+ hash_val = entry->hash % num_bins;
+ entry->next = new_table->bins[hash_val];
+ new_table->bins[hash_val] = entry;
+ entry->back = prev;
+ *tail = prev = entry;
+ tail = &entry->fore;
+ } while ((ptr = ptr->fore) != 0);
+ new_table->tail = prev;
+ }
+
+ return new_table;
+}
+
+#define REMOVE_ENTRY(table, ptr) do \
+ { \
+ if (ptr->fore == 0 && ptr->back == 0) { \
+ table->head = 0; \
+ table->tail = 0; \
+ } \
+ else { \
+ st_table_entry *fore = ptr->fore, *back = ptr->back; \
+ if (fore) fore->back = back; \
+ if (back) back->fore = fore; \
+ if (ptr == table->head) table->head = fore; \
+ if (ptr == table->tail) table->tail = back; \
+ } \
+ table->num_entries--; \
+ } while (0)
+
+int
+st_delete(register st_table *table, register st_data_t *key, st_data_t *value)
+{
+ st_index_t hash_val;
+ st_table_entry **prev;
+ register st_table_entry *ptr;
+
+ if (table->entries_packed) {
+ st_index_t i;
+ for (i = 0; i < table->num_entries; i++) {
+ if ((st_data_t)table->bins[i*2] == *key) {
+ if (value != 0) *value = (st_data_t)table->bins[i*2+1];
+ table->num_entries--;
+ memmove(&table->bins[i*2], &table->bins[(i+1)*2],
+ sizeof(struct st_table_entry*) * 2*(table->num_entries-i));
+ return 1;
+ }
+ }
+ if (value != 0) *value = 0;
+ return 0;
+ }
+
+ hash_val = do_hash_bin(*key, table);
+
+ for (prev = &table->bins[hash_val]; (ptr = *prev) != 0; prev = &ptr->next) {
+ if (EQUAL(table, *key, ptr->key)) {
+ *prev = ptr->next;
+ REMOVE_ENTRY(table, ptr);
+ if (value != 0) *value = ptr->record;
+ *key = ptr->key;
+ free(ptr);
+ return 1;
+ }
+ }
+
+ if (value != 0) *value = 0;
+ return 0;
+}
+
+int
+st_delete_safe(register st_table *table, register st_data_t *key, st_data_t *value, st_data_t never)
+{
+ st_index_t hash_val;
+ register st_table_entry *ptr;
+
+ if (table->entries_packed) {
+ st_index_t i;
+ for (i = 0; i < table->num_entries; i++) {
+ if ((st_data_t)table->bins[i*2] == *key) {
+ if (value != 0) *value = (st_data_t)table->bins[i*2+1];
+ table->bins[i*2] = (void *)never;
+ return 1;
+ }
+ }
+ if (value != 0) *value = 0;
+ return 0;
+ }
+
+ hash_val = do_hash_bin(*key, table);
+ ptr = table->bins[hash_val];
+
+ for (; ptr != 0; ptr = ptr->next) {
+ if ((ptr->key != never) && EQUAL(table, ptr->key, *key)) {
+ REMOVE_ENTRY(table, ptr);
+ *key = ptr->key;
+ if (value != 0) *value = ptr->record;
+ ptr->key = ptr->record = never;
+ return 1;
+ }
+ }
+
+ if (value != 0) *value = 0;
+ return 0;
+}
+
+void
+st_cleanup_safe(st_table *table, st_data_t never)
+{
+ st_table_entry *ptr, **last, *tmp;
+ st_index_t i;
+
+ if (table->entries_packed) {
+ st_index_t i = 0, j = 0;
+ while ((st_data_t)table->bins[i*2] != never) {
+ if (i++ == table->num_entries) return;
+ }
+ for (j = i; ++i < table->num_entries;) {
+ if ((st_data_t)table->bins[i*2] == never) continue;
+ table->bins[j*2] = table->bins[i*2];
+ table->bins[j*2+1] = table->bins[i*2+1];
+ j++;
+ }
+ table->num_entries = j;
+ return;
+ }
+
+ for (i = 0; i < table->num_bins; i++) {
+ ptr = *(last = &table->bins[i]);
+ while (ptr != 0) {
+ if (ptr->key == never) {
+ tmp = ptr;
+ *last = ptr = ptr->next;
+ free(tmp);
+ }
+ else {
+ ptr = *(last = &ptr->next);
+ }
+ }
+ }
+}
+
+int
+st_foreach(st_table *table, int (*func)(ANYARGS), st_data_t arg)
+{
+ st_table_entry *ptr, **last, *tmp;
+ enum st_retval retval;
+ st_index_t i;
+
+ if (table->entries_packed) {
+ for (i = 0; i < table->num_entries; i++) {
+ st_index_t j;
+ st_data_t key, val;
+ key = (st_data_t)table->bins[i*2];
+ val = (st_data_t)table->bins[i*2+1];
+ retval = (*func)(key, val, arg);
+ switch (retval) {
+ case ST_CHECK: /* check if hash is modified during iteration */
+ for (j = 0; j < table->num_entries; j++) {
+ if ((st_data_t)table->bins[j*2] == key)
+ break;
+ }
+ if (j == table->num_entries) {
+ /* call func with error notice */
+ retval = (*func)(0, 0, arg, 1);
+ return 1;
+ }
+ /* fall through */
+ case ST_CONTINUE:
+ break;
+ case ST_STOP:
+ return 0;
+ case ST_DELETE:
+ table->num_entries--;
+ memmove(&table->bins[i*2], &table->bins[(i+1)*2],
+ sizeof(struct st_table_entry*) * 2*(table->num_entries-i));
+ i--;
+ break;
+ }
+ }
+ return 0;
+ }
+
+ if ((ptr = table->head) != 0) {
+ do {
+ i = ptr->hash % table->num_bins;
+ retval = (*func)(ptr->key, ptr->record, (void*)arg);
+ switch (retval) {
+ case ST_CHECK: /* check if hash is modified during iteration */
+ for (tmp = table->bins[i]; tmp != ptr; tmp = tmp->next) {
+ if (!tmp) {
+ /* call func with error notice */
+ retval = (*func)(0, 0, arg, 1);
+ return 1;
+ }
+ }
+ /* fall through */
+ case ST_CONTINUE:
+ ptr = ptr->fore;
+ break;
+ case ST_STOP:
+ return 0;
+ case ST_DELETE:
+ last = &table->bins[ptr->hash % table->num_bins];
+ for (; (tmp = *last) != 0; last = &tmp->next) {
+ if (ptr == tmp) {
+ tmp = ptr->fore;
+ *last = ptr->next;
+ REMOVE_ENTRY(table, ptr);
+ free(ptr);
+ if (ptr == tmp) return 0;
+ ptr = tmp;
+ break;
+ }
+ }
+ }
+ } while (ptr && table->head);
+ }
+ return 0;
+}
+
+typedef int st_foreach_func(mrb_sym, void*, void *);
+
+struct foreach_safe_arg {
+ st_table *tbl;
+ st_foreach_func *func;
+ void *arg;
+};
+
+static int
+foreach_safe_i(mrb_state *mrb, mrb_sym key, void* value, struct foreach_safe_arg *arg)
+{
+ int status;
+
+ if (key == 0xffffffff/*key == Qundef*/) return ST_CONTINUE;
+ status = (*arg->func)(key, value, arg->arg);
+ if (status == ST_CONTINUE) {
+ return ST_CHECK;
+ }
+ return status;
+}
+
+void
+st_foreach_safe(mrb_state *mrb, void *table, int (*func)(ANYARGS), void* a)
+{
+ struct foreach_safe_arg arg;
+
+ arg.tbl = table;
+ arg.func = (st_foreach_func *)func;
+ arg.arg = a;
+ if (st_foreach(table, foreach_safe_i, (st_data_t)&arg)) {
+ mrb_raise(mrb, mrb->eRuntimeError_class, "hash modified during iteration");
+ }
+}
+
+int
+st_foreachNew(mrb_state *mrb, st_table *table, int (*func)(ANYARGS), void* arg)
+{
+ st_table_entry *ptr, **last, *tmp;
+ enum st_retval retval;
+ st_index_t i;
+
+ if (table->entries_packed) {
+ for (i = 0; i < table->num_entries; i++) {
+ st_index_t j;
+ st_data_t key, val;
+ key = (st_data_t)table->bins[i*2];
+ val = (st_data_t)table->bins[i*2+1];
+ retval = (*func)(mrb, key, val, arg);
+ switch (retval) {
+ case ST_CHECK: /* check if hash is modified during iteration */
+ for (j = 0; j < table->num_entries; j++) {
+ if ((st_data_t)table->bins[j*2] == key)
+ break;
+ }
+ if (j == table->num_entries) {
+ /* call func with error notice */
+ retval = (*func)(0, 0, arg, 1);
+ return 1;
+ }
+ /* fall through */
+ case ST_CONTINUE:
+ break;
+ case ST_STOP:
+ return 0;
+ case ST_DELETE:
+ table->num_entries--;
+ memmove(&table->bins[i*2], &table->bins[(i+1)*2],
+ sizeof(struct st_table_entry*) * 2*(table->num_entries-i));
+ i--;
+ break;
+ }
+ }
+ return 0;
+ }
+
+ if ((ptr = table->head) != 0) {
+ do {
+ i = ptr->hash % table->num_bins;
+ retval = (*func)(mrb, ptr->key, ptr->record, arg);
+ switch (retval) {
+ case ST_CHECK: /* check if hash is modified during iteration */
+ for (tmp = table->bins[i]; tmp != ptr; tmp = tmp->next) {
+ if (!tmp) {
+ /* call func with error notice */
+ retval = (*func)(0, 0, arg, 1);
+ return 1;
+ }
+ }
+ /* fall through */
+ case ST_CONTINUE:
+ ptr = ptr->fore;
+ break;
+ case ST_STOP:
+ return 0;
+ case ST_DELETE:
+ last = &table->bins[ptr->hash % table->num_bins];
+ for (; (tmp = *last) != 0; last = &tmp->next) {
+ if (ptr == tmp) {
+ tmp = ptr->fore;
+ *last = ptr->next;
+ REMOVE_ENTRY(table, ptr);
+ free(ptr);
+ if (ptr == tmp) return 0;
+ ptr = tmp;
+ break;
+ }
+ }
+ }
+ } while (ptr && table->head);
+ }
+ return 0;
+}
+
+/*
+ * hash_32 - 32 bit Fowler/Noll/Vo FNV-1a hash code
+ *
+ * @(#) $Hash32: Revision: 1.1 $
+ * @(#) $Hash32: Id: hash_32a.c,v 1.1 2003/10/03 20:38:53 chongo Exp $
+ * @(#) $Hash32: Source: /usr/local/src/cmd/fnv/RCS/hash_32a.c,v $
+ *
+ ***
+ *
+ * Fowler/Noll/Vo hash
+ *
+ * The basis of this hash algorithm was taken from an idea sent
+ * as reviewer comments to the IEEE POSIX P1003.2 committee by:
+ *
+ * Phong Vo (http://www.research.att.com/info/kpv/)
+ * Glenn Fowler (http://www.research.att.com/~gsf/)
+ *
+ * In a subsequent ballot round:
+ *
+ * Landon Curt Noll (http://www.isthe.com/chongo/)
+ *
+ * improved on their algorithm. Some people tried this hash
+ * and found that it worked rather well. In an EMail message
+ * to Landon, they named it the ``Fowler/Noll/Vo'' or FNV hash.
+ *
+ * FNV hashes are designed to be fast while maintaining a low
+ * collision rate. The FNV speed allows one to quickly hash lots
+ * of data while maintaining a reasonable collision rate. See:
+ *
+ * http://www.isthe.com/chongo/tech/comp/fnv/index.html
+ *
+ * for more details as well as other forms of the FNV hash.
+ ***
+ *
+ * To use the recommended 32 bit FNV-1a hash, pass FNV1_32A_INIT as the
+ * Fnv32_t hashval argument to fnv_32a_buf() or fnv_32a_str().
+ *
+ ***
+ *
+ * Please do not copyright this code. This code is in the public domain.
+ *
+ * LANDON CURT NOLL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO
+ * EVENT SHALL LANDON CURT NOLL BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
+ * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+ * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ *
+ * By:
+ * chongo <Landon Curt Noll> /\oo/\
+ * http://www.isthe.com/chongo/
+ *
+ * Share and Enjoy! :-)
+ */
+
+/*
+ * 32 bit FNV-1 and FNV-1a non-zero initial basis
+ *
+ * The FNV-1 initial basis is the FNV-0 hash of the following 32 octets:
+ *
+ * chongo <Landon Curt Noll> /\../\
+ *
+ * NOTE: The \'s above are not back-slashing escape characters.
+ * They are literal ASCII backslash 0x5c characters.
+ *
+ * NOTE: The FNV-1a initial basis is the same value as FNV-1 by definition.
+ */
+#define FNV1_32A_INIT 0x811c9dc5
+
+/*
+ * 32 bit magic FNV-1a prime
+ */
+#define FNV_32_PRIME 0x01000193
+
+#ifdef ST_USE_FNV1
+static st_index_t
+strhash(st_data_t arg)
+{
+ register const char *string = (const char *)arg;
+ register st_index_t hval = FNV1_32A_INIT;
+
+ /*
+ * FNV-1a hash each octet in the buffer
+ */
+ while (*string) {
+ /* xor the bottom with the current octet */
+ hval ^= (unsigned int)*string++;
+
+ /* multiply by the 32 bit FNV magic prime mod 2^32 */
+ hval *= FNV_32_PRIME;
+ }
+ return hval;
+}
+#else
+
+#ifndef UNALIGNED_WORD_ACCESS
+# if defined __i386__ || defined _M_IX86
+# define UNALIGNED_WORD_ACCESS 1
+# endif
+#endif
+#ifndef UNALIGNED_WORD_ACCESS
+# define UNALIGNED_WORD_ACCESS 0
+#endif
+
+/* MurmurHash described in http://murmurhash.googlepages.com/ */
+#ifndef MURMUR
+#define MURMUR 2
+#endif
+
+#if MURMUR == 1
+#define MurmurMagic 0xc6a4a793
+#elif MURMUR == 2
+#if SIZEOF_ST_INDEX_T > 4
+#define MurmurMagic 0xc6a4a7935bd1e995
+#else
+#define MurmurMagic 0x5bd1e995
+#endif
+#endif
+
+static inline st_index_t
+murmur(st_index_t h, st_index_t k, int r)
+{
+ const st_index_t m = MurmurMagic;
+#if MURMUR == 1
+ h += k;
+ h *= m;
+ h ^= h >> r;
+#elif MURMUR == 2
+ k *= m;
+ k ^= k >> r;
+ k *= m;
+
+ h *= m;
+ h ^= k;
+#endif
+ return h;
+}
+
+static inline st_index_t
+murmur_finish(st_index_t h)
+{
+#if MURMUR == 1
+ h = murmur(h, 0, 10);
+ h = murmur(h, 0, 17);
+#elif MURMUR == 2
+ h ^= h >> 13;
+ h *= MurmurMagic;
+ h ^= h >> 15;
+#endif
+ return h;
+}
+
+#define murmur_step(h, k) murmur(h, k, 16)
+
+#if MURMUR == 1
+#define murmur1(h) murmur_step(h, 16)
+#else
+#define murmur1(h) murmur_step(h, 24)
+#endif
+
+st_index_t
+st_hash(const void *ptr, size_t len, st_index_t h)
+{
+ const char *data = ptr;
+ st_index_t t = 0;
+
+ h += 0xdeadbeef;
+
+#define data_at(n) (st_index_t)((unsigned char)data[n])
+#define UNALIGNED_ADD_4 UNALIGNED_ADD(2); UNALIGNED_ADD(1); UNALIGNED_ADD(0)
+#if SIZEOF_ST_INDEX_T > 4
+#define UNALIGNED_ADD_8 UNALIGNED_ADD(6); UNALIGNED_ADD(5); UNALIGNED_ADD(4); UNALIGNED_ADD(3); UNALIGNED_ADD_4
+#if SIZEOF_ST_INDEX_T > 8
+#define UNALIGNED_ADD_16 UNALIGNED_ADD(14); UNALIGNED_ADD(13); UNALIGNED_ADD(12); UNALIGNED_ADD(11); \
+ UNALIGNED_ADD(10); UNALIGNED_ADD(9); UNALIGNED_ADD(8); UNALIGNED_ADD(7); UNALIGNED_ADD_8
+#define UNALIGNED_ADD_ALL UNALIGNED_ADD_16
+#endif
+#define UNALIGNED_ADD_ALL UNALIGNED_ADD_8
+#else
+#define UNALIGNED_ADD_ALL UNALIGNED_ADD_4
+#endif
+ if (len >= sizeof(st_index_t)) {
+#if !UNALIGNED_WORD_ACCESS
+ int align = (int)((st_data_t)data % sizeof(st_index_t));
+ if (align) {
+ st_index_t d = 0;
+ int sl, sr, pack;
+
+ switch (align) {
+#ifdef WORDS_BIGENDIAN
+# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
+ t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 2)
+#else
+# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
+ t |= data_at(n) << CHAR_BIT*(n)
+#endif
+ UNALIGNED_ADD_ALL;
+#undef UNALIGNED_ADD
+ }
+
+#ifdef WORDS_BIGENDIAN
+ t >>= (CHAR_BIT * align) - CHAR_BIT;
+#else
+ t <<= (CHAR_BIT * align);
+#endif
+
+ data += sizeof(st_index_t)-align;
+ len -= sizeof(st_index_t)-align;
+
+ sl = CHAR_BIT * (SIZEOF_ST_INDEX_T-align);
+ sr = CHAR_BIT * align;
+
+ while (len >= sizeof(st_index_t)) {
+ d = *(st_index_t *)data;
+#ifdef WORDS_BIGENDIAN
+ t = (t << sr) | (d >> sl);
+#else
+ t = (t >> sr) | (d << sl);
+#endif
+ h = murmur_step(h, t);
+ t = d;
+ data += sizeof(st_index_t);
+ len -= sizeof(st_index_t);
+ }
+
+ pack = len < (size_t)align ? (int)len : align;
+ d = 0;
+ switch (pack) {
+#ifdef WORDS_BIGENDIAN
+# define UNALIGNED_ADD(n) case (n) + 1: \
+ d |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
+#else
+# define UNALIGNED_ADD(n) case (n) + 1: \
+ d |= data_at(n) << CHAR_BIT*(n)
+#endif
+ UNALIGNED_ADD_ALL;
+#undef UNALIGNED_ADD
+ }
+#ifdef WORDS_BIGENDIAN
+ t = (t << sr) | (d >> sl);
+#else
+ t = (t >> sr) | (d << sl);
+#endif
+
+#if MURMUR == 2
+ if (len < (size_t)align) goto skip_tail;
+#endif
+ h = murmur_step(h, t);
+ data += pack;
+ len -= pack;
+ }
+ else
+#endif
+ {
+ do {
+ h = murmur_step(h, *(st_index_t *)data);
+ data += sizeof(st_index_t);
+ len -= sizeof(st_index_t);
+ } while (len >= sizeof(st_index_t));
+ }
+ }
+
+ t = 0;
+ switch (len) {
+#ifdef WORDS_BIGENDIAN
+# define UNALIGNED_ADD(n) case (n) + 1: \
+ t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
+#else
+# define UNALIGNED_ADD(n) case (n) + 1: \
+ t |= data_at(n) << CHAR_BIT*(n)
+#endif
+ UNALIGNED_ADD_ALL;
+#undef UNALIGNED_ADD
+#if MURMUR == 1
+ h = murmur_step(h, t);
+#elif MURMUR == 2
+# if !UNALIGNED_WORD_ACCESS
+ skip_tail:
+# endif
+ h ^= t;
+ h *= MurmurMagic;
+#endif
+ }
+
+ return murmur_finish(h);
+}
+
+st_index_t
+st_hash_uint32(st_index_t h, uint32_t i)
+{
+ return murmur_step(h + i, 16);
+}
+
+st_index_t
+st_hash_uint(st_index_t h, st_index_t i)
+{
+ st_index_t v = 0;
+ h += i;
+#ifdef WORDS_BIGENDIAN
+#if SIZEOF_ST_INDEX_T*CHAR_BIT > 12*8
+ v = murmur1(v + (h >> 12*8));
+#endif
+#if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8
+ v = murmur1(v + (h >> 8*8));
+#endif
+#if SIZEOF_ST_INDEX_T*CHAR_BIT > 4*8
+ v = murmur1(v + (h >> 4*8));
+#endif
+#endif
+ v = murmur1(v + h);
+#ifndef WORDS_BIGENDIAN
+#if SIZEOF_ST_INDEX_T*CHAR_BIT > 4*8
+ v = murmur1(v + (h >> 4*8));
+#endif
+#if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8
+ v = murmur1(v + (h >> 8*8));
+#endif
+#if SIZEOF_ST_INDEX_T*CHAR_BIT > 12*8
+ v = murmur1(v + (h >> 12*8));
+#endif
+#endif
+ return v;
+}
+
+st_index_t
+st_hash_end(st_index_t h)
+{
+ h = murmur_step(h, 10);
+ h = murmur_step(h, 17);
+ return h;
+}
+
+#undef st_hash_start
+st_index_t
+st_hash_start(st_index_t h)
+{
+ return h;
+}
+
+static st_index_t
+strhash(st_data_t arg)
+{
+ register const char *string = (const char *)arg;
+ return st_hash(string, strlen(string), FNV1_32A_INIT);
+}
+#endif
+
+int
+st_strcasecmp(const char *s1, const char *s2)
+{
+ unsigned int c1, c2;
+
+ while (1) {
+ c1 = (unsigned char)*s1++;
+ c2 = (unsigned char)*s2++;
+ if (c1 == '\0' || c2 == '\0') {
+ if (c1 != '\0') return 1;
+ if (c2 != '\0') return -1;
+ return 0;
+ }
+ if ((unsigned int)(c1 - 'A') <= ('Z' - 'A')) c1 += 'a' - 'A';
+ if ((unsigned int)(c2 - 'A') <= ('Z' - 'A')) c2 += 'a' - 'A';
+ if (c1 != c2) {
+ if (c1 > c2)
+ return 1;
+ else
+ return -1;
+ }
+ }
+}
+
+int
+st_strncasecmp(const char *s1, const char *s2, size_t n)
+{
+ unsigned int c1, c2;
+
+ while (n--) {
+ c1 = (unsigned char)*s1++;
+ c2 = (unsigned char)*s2++;
+ if (c1 == '\0' || c2 == '\0') {
+ if (c1 != '\0') return 1;
+ if (c2 != '\0') return -1;
+ return 0;
+ }
+ if ((unsigned int)(c1 - 'A') <= ('Z' - 'A')) c1 += 'a' - 'A';
+ if ((unsigned int)(c2 - 'A') <= ('Z' - 'A')) c2 += 'a' - 'A';
+ if (c1 != c2) {
+ if (c1 > c2)
+ return 1;
+ else
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static st_index_t
+strcasehash(st_data_t arg)
+{
+ register const char *string = (const char *)arg;
+ register st_index_t hval = FNV1_32A_INIT;
+
+ /*
+ * FNV-1a hash each octet in the buffer
+ */
+ while (*string) {
+ unsigned int c = (unsigned char)*string++;
+ if ((unsigned int)(c - 'A') <= ('Z' - 'A')) c += 'a' - 'A';
+ hval ^= c;
+
+ /* multiply by the 32 bit FNV magic prime mod 2^32 */
+ hval *= FNV_32_PRIME;
+ }
+ return hval;
+}
+
+int
+st_numcmp(st_data_t x, st_data_t y)
+{
+ return x != y;
+}
+
+st_index_t
+st_numhash(st_data_t n)
+{
+ return (st_index_t)n;
+}
diff --git a/src/st.h b/src/st.h
new file mode 100644
index 000000000..7324e8da7
--- /dev/null
+++ b/src/st.h
@@ -0,0 +1,139 @@
+/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */
+
+/* @(#) st.h 5.1 89/12/14 */
+
+#ifndef RUBY_ST_H
+#define RUBY_ST_H 1
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#ifndef RUBY_LIB_PREFIX
+
+#ifdef RUBY_EXTCONF_H
+#include RUBY_EXTCONF_H
+#endif
+#endif
+
+#if defined STDC_HEADERS
+#include <stddef.h>
+#elif defined HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+
+#ifdef HAVE_STDINT_H
+# include <stdint.h>
+#endif
+#include <inttypes.h>
+
+#ifndef CHAR_BIT
+# ifdef HAVE_LIMITS_H
+# include <limits.h>
+# else
+# define CHAR_BIT 8
+# endif
+#endif
+
+#ifndef _
+# define _(args) args
+#endif
+
+#ifndef ANYARGS
+# ifdef __cplusplus
+# define ANYARGS ...
+# else
+# define ANYARGS
+# endif
+#endif
+
+typedef uintptr_t st_data_t;
+typedef struct st_table st_table;
+
+typedef st_data_t st_index_t;
+typedef int st_compare_func(st_data_t, st_data_t);
+typedef st_index_t st_hash_func(st_data_t);
+
+typedef struct st_table_entry st_table_entry;
+
+struct st_table_entry {
+ st_index_t hash;
+ st_data_t key;
+ st_data_t record;
+ st_table_entry *next;
+ st_table_entry *fore, *back;
+};
+
+#ifndef SIZEOF_VOIDP
+#define SIZEOF_VOIDP 4
+#endif
+
+#define SIZEOF_ST_INDEX_T SIZEOF_VOIDP
+
+struct st_hash_type {
+ int (*compare)(ANYARGS /*st_data_t, st_data_t*/); /* st_compare_func* */
+ st_index_t (*hash)(ANYARGS /*st_data_t*/); /* st_hash_func* */
+};
+
+#define ST_INDEX_BITS (sizeof(st_index_t) * CHAR_BIT)
+
+struct st_table {
+ const struct st_hash_type *type;
+ st_index_t num_bins;
+ unsigned int entries_packed : 1;
+#ifdef __GNUC__
+ __extension__
+#endif
+ st_index_t num_entries : ST_INDEX_BITS - 1;
+ struct st_table_entry **bins;
+ struct st_table_entry *head, *tail;
+};
+
+#define st_is_member(table,key) st_lookup(table,key,(st_data_t *)0)
+
+enum st_retval {ST_CONTINUE, ST_STOP, ST_DELETE, ST_CHECK};
+
+st_table *st_init_table(const struct st_hash_type *);
+st_table *st_init_table_with_size(const struct st_hash_type *, st_index_t);
+st_table *st_init_numtable(void);
+st_table *st_init_numtable_with_size(st_index_t);
+st_table *st_init_strtable(void);
+st_table *st_init_strtable_with_size(st_index_t);
+st_table *st_init_strcasetable(void);
+st_table *st_init_strcasetable_with_size(st_index_t);
+int st_delete(st_table *, st_data_t *, st_data_t *); /* returns 0:notfound 1:deleted */
+int st_delete_safe(st_table *, st_data_t *, st_data_t *, st_data_t);
+int st_insert(st_table *, st_data_t, st_data_t);
+int st_insert2(st_table *, st_data_t, st_data_t, st_data_t (*)(st_data_t));
+int st_lookup(st_table *, st_data_t, st_data_t *);
+int st_get_key(st_table *, st_data_t, st_data_t *);
+int st_foreach(st_table *, int (*)(ANYARGS), st_data_t);
+int st_foreachNew(mrb_state *mrb, st_table *, int (*)(ANYARGS), void*);
+int st_reverse_foreach(st_table *, int (*)(ANYARGS), st_data_t);
+void st_add_direct(st_table *, st_data_t, st_data_t);
+void st_free_table(st_table *);
+void st_cleanup_safe(st_table *, st_data_t);
+void st_clear(st_table *);
+st_table *st_copy(st_table *);
+int st_numcmp(st_data_t, st_data_t);
+st_index_t st_numhash(st_data_t);
+int st_strcasecmp(const char *s1, const char *s2);
+int st_strncasecmp(const char *s1, const char *s2, size_t n);
+size_t st_memsize(const st_table *);
+st_index_t st_hash(const void *ptr, size_t len, st_index_t h);
+st_index_t st_hash_uint32(st_index_t h, uint32_t i);
+st_index_t st_hash_uint(st_index_t h, st_index_t i);
+st_index_t st_hash_end(st_index_t h);
+st_index_t st_hash_start(st_index_t h);
+#define st_hash_start(h) ((st_index_t)(h))
+
+int st_strcasecmp(const char *s1, const char *s2);
+int st_strncasecmp(const char *s1, const char *s2, size_t n);
+#define STRCASECMP(s1, s2) (st_strcasecmp(s1, s2))
+#define STRNCASECMP(s1, s2, n) (st_strncasecmp(s1, s2, n))
+
+#if defined(__cplusplus)
+} /* extern "C" { */
+#endif
+
+#endif /* RUBY_ST_H */
diff --git a/src/state.c b/src/state.c
new file mode 100644
index 000000000..14efed986
--- /dev/null
+++ b/src/state.c
@@ -0,0 +1,88 @@
+#include "mruby.h"
+#include "irep.h"
+#include <string.h>
+
+void mrb_init_heap(mrb_state*);
+void mrb_init_core(mrb_state*);
+void mrb_init_ext(mrb_state*);
+
+mrb_state*
+mrb_open_allocf(mrb_allocf f)
+{
+ mrb_state *mrb = (f)(NULL, NULL, sizeof(mrb_state));
+
+ memset(mrb, 0, sizeof(mrb_state));
+ mrb->allocf = f;
+ mrb->current_white_part = MRB_GC_WHITE_A;
+
+ mrb_init_heap(mrb);
+ mrb_init_core(mrb);
+ mrb_init_ext(mrb);
+ return mrb;
+}
+
+static void*
+allocf(mrb_state *mrb, void *p, size_t size)
+{
+ if (size == 0) {
+ free(p);
+ return NULL;
+ }
+ else {
+ return realloc(p, size);
+ }
+}
+
+mrb_state*
+mrb_open()
+{
+ mrb_state *mrb = mrb_open_allocf(allocf);
+
+ return mrb;
+}
+
+void
+mrb_close(mrb_state *mrb)
+{
+ int i;
+
+ /* free */
+ mrb_free(mrb, mrb->stbase);
+ mrb_free(mrb, mrb->cibase);
+ for (i=0; i<mrb->irep_len; i++) {
+ if (mrb->irep[i]->flags & MRB_IREP_NOFREE) continue;
+ if ((mrb->irep[i]->flags & MRB_ISEQ_NOFREE) == 0) {
+ mrb_free(mrb, mrb->irep[i]->iseq);
+ }
+ mrb_free(mrb, mrb->irep[i]->pool);
+ mrb_free(mrb, mrb->irep[i]->syms);
+ mrb_free(mrb, mrb->irep[i]);
+ }
+ mrb_free(mrb, mrb->irep);
+ mrb_free(mrb, mrb);
+}
+
+void
+mrb_add_irep(mrb_state *mrb, int idx)
+{
+ if (!mrb->irep) {
+ int max = 256;
+
+ if (idx > max) max = idx+1;
+ mrb->irep = mrb_malloc(mrb, sizeof(mrb_irep*)*max);
+ mrb->irep_capa = max;
+ }
+ else if (mrb->irep_capa < idx) {
+ while (mrb->irep_capa < idx) {
+ mrb->irep_capa *= 2;
+ }
+ mrb->irep = mrb_realloc(mrb, mrb->irep, sizeof(mrb_irep)*mrb->irep_capa);
+ }
+}
+
+mrb_value
+mrb_top_self(mrb_state *mrb)
+{
+ // for now
+ return mrb_nil_value();
+}
diff --git a/src/string.c b/src/string.c
new file mode 100644
index 000000000..da52172f7
--- /dev/null
+++ b/src/string.c
@@ -0,0 +1,5234 @@
+#include "mruby.h"
+
+#include <stdarg.h>
+#include <string.h>
+#include "mruby/string.h"
+#include "mruby/numeric.h"
+#include "mruby/range.h"
+#include <ctype.h>
+#include "mruby/array.h"
+#include "mruby/class.h"
+#include "variable.h"
+#include "mruby/hash.h"
+#include <stdio.h>
+#include "variable.h"
+#include "re.h"
+#ifdef INCLUDE_REGEXP
+#include "regex.h"
+#include "st.h"
+#endif //INCLUDE_REGEXP
+
+#define mrb_usascii_str_new2 mrb_usascii_str_new_cstr
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+const char ruby_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz";
+
+#ifdef INCLUDE_REGEXP
+static mrb_value get_pat(mrb_state *mrb, mrb_value pat, mrb_int quote);
+#endif //INCLUDE_REGEXP
+#ifdef INCLUDE_ENCODING
+static void mrb_enc_cr_str_copy_for_substr(mrb_state *mrb, mrb_value dest, mrb_value src);
+#else
+#define mrb_enc_cr_str_copy_for_substr(mrb, dest, src)
+#endif //INCLUDE_ENCODING
+static mrb_value str_replace(mrb_state *mrb, mrb_value str, mrb_value str2);
+#ifdef INCLUDE_ENCODING
+static long str_strlen(mrb_state *mrb, mrb_value str, mrb_encoding *enc);
+#endif //INCLUDE_ENCODING
+int mrb_block_given_p();
+#ifdef INCLUDE_ENCODING
+#define is_ascii_string(mrb, str) (mrb_enc_str_coderange(mrb, str) == ENC_CODERANGE_7BIT)
+#define is_broken_string(mrb, str) (mrb_enc_str_coderange(mrb, str) == ENC_CODERANGE_BROKEN)
+#define STR_ENC_GET(mrb, str) mrb_enc_from_index(mrb, ENCODING_GET(mrb, str))
+#endif //INCLUDE_ENCODING
+
+void
+mrb_str_set_len(mrb_state *mrb, mrb_value str, long len)
+{
+ mrb_str_modify(mrb, str);
+ RSTRING_LEN(str) = len;
+ RSTRING_PTR(str)[len] = '\0';
+}
+
+#define RESIZE_CAPA(str,capacity) do {\
+ RSTRING(str)->buf = mrb_realloc(mrb, RSTRING(str)->buf, (capacity)+1);\
+ if (!MRB_STR_NOCAPA_P(str))\
+ RSTRING_CAPA(str) = capacity;\
+} while (0)
+
+#define STR_SET_LEN(str, n) do { \
+ RSTRING(str)->len = (n);\
+} while (0)
+
+#define STR_DEC_LEN(str) do {\
+ RSTRING(str)->len--;\
+} while (0)
+
+#ifdef INCLUDE_ENCODING
+static mrb_value mrb_enc_cr_str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, long len,
+ int ptr_encindex, int ptr_cr, int *ptr_cr_ret);
+#endif //INCLUDE_ENCODING
+mrb_value
+mrb_tainted_str_new(mrb_state *mrb, const char *ptr, long len)
+{
+ mrb_value str = mrb_str_new(mrb, ptr, len);
+
+ //OBJ_TAINT(str);
+ return str;
+}
+
+#ifdef INCLUDE_ENCODING
+mrb_value
+mrb_usascii_str_new_cstr(mrb_state *mrb, const char *ptr)
+{
+ mrb_value str = mrb_str_new_cstr(mrb, ptr);//mrb_str_new2(ptr);
+ ENCODING_CODERANGE_SET(mrb, str, mrb_usascii_encindex(), ENC_CODERANGE_7BIT);
+ return str;
+}
+
+mrb_value
+mrb_external_str_new_with_enc(mrb_state *mrb, const char *ptr, long len, mrb_encoding *eenc)
+{
+ mrb_value str;
+
+ str = mrb_tainted_str_new(mrb, ptr, len);
+ if (eenc == mrb_usascii_encoding(mrb) &&
+ mrb_enc_str_coderange(mrb, str) != ENC_CODERANGE_7BIT) {
+ mrb_enc_associate(mrb, str, mrb_ascii8bit_encoding(mrb));
+ return str;
+ }
+ mrb_enc_associate(mrb, str, eenc);
+ return mrb_str_conv_enc(mrb, str, eenc, mrb_default_internal_encoding(mrb));
+}
+
+mrb_value
+mrb_locale_str_new(mrb_state *mrb, const char *ptr, long len)
+{
+ return mrb_external_str_new_with_enc(mrb, ptr, len, mrb_locale_encoding(mrb));
+}
+
+mrb_value
+mrb_str_buf_cat_ascii(mrb_state *mrb, mrb_value str, const char *ptr)
+{
+ /* ptr must reference NUL terminated ASCII string. */
+ int encindex = ENCODING_GET(mrb, str);
+ mrb_encoding *enc = mrb_enc_from_index(mrb, encindex);
+ if (mrb_enc_asciicompat(mrb, enc)) {
+ return mrb_enc_cr_str_buf_cat(mrb, str, ptr, strlen(ptr),
+ encindex, ENC_CODERANGE_7BIT, 0);
+ }
+ else {
+ //char *buf = ALLOCA_N(char, mrb_enc_mbmaxlen(enc));
+ char *buf = mrb_malloc(mrb, mrb_enc_mbmaxlen(enc));
+ while (*ptr) {
+ unsigned int c = (unsigned char)*ptr;
+ int len = mrb_enc_codelen(mrb, c, enc);
+ mrb_enc_mbcput(c, buf, enc);
+ mrb_enc_cr_str_buf_cat(mrb, str, buf, len,
+ encindex, ENC_CODERANGE_VALID, 0);
+ ptr++;
+ }
+ return str;
+ }
+}
+
+mrb_value
+mrb_filesystem_str_new_cstr(mrb_state *mrb, const char *ptr)
+{
+ return mrb_external_str_new_with_enc(mrb, ptr, strlen(ptr), mrb_filesystem_encoding(mrb));
+}
+#endif //INCLUDE_ENCODING
+
+mrb_value
+mrb_str_resize(mrb_state *mrb, mrb_value str, size_t len)
+{
+ size_t slen;
+
+ if (len < 0) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "negative string size (or size too big)");
+ }
+
+ mrb_str_modify(mrb, str);
+ slen = RSTRING_LEN(str);
+ if (len != slen) {
+ if (slen < len || slen -len > 1024) {
+ RSTRING_PTR(str) = mrb_realloc(mrb, RSTRING_PTR(str), len+1);
+ }
+ if (!MRB_STR_NOCAPA_P(str)) {
+ RSTRING(str)->aux.capa = len;
+ }
+ RSTRING(str)->len = len;
+ RSTRING(str)->buf[len] = '\0'; /* sentinel */
+ }
+ return str;
+}
+
+#ifdef INCLUDE_ENCODING
+mrb_value
+mrb_usascii_str_new(mrb_state *mrb, const char *ptr, long len)
+{
+ mrb_value str = mrb_str_new(mrb, ptr, len);
+ ENCODING_CODERANGE_SET(mrb, str, mrb_usascii_encindex(), ENC_CODERANGE_7BIT);
+ return str;
+}
+#endif //INCLUDE_ENCODING
+
+static inline void
+str_mod_check(mrb_state *mrb, mrb_value str, char *p, mrb_int len)
+{
+ struct RString *s = mrb_str_ptr(str);
+
+ if (s->buf != p || s->len != len) {
+ mrb_raise(mrb, mrb->eRuntimeError_class, "string modified");
+ }
+}
+
+#ifdef INCLUDE_ENCODING
+static inline int
+single_byte_optimizable(mrb_state *mrb, mrb_value str)
+{
+ mrb_encoding *enc;
+ /* Conservative. It may be ENC_CODERANGE_UNKNOWN. */
+ if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT)
+ return 1;
+
+ enc = STR_ENC_GET(mrb, str);
+ if (mrb_enc_mbmaxlen(enc) == 1)
+ return 1;
+
+ /* Conservative. Possibly single byte.
+ * "\xa1" in Shift_JIS for example. */
+ return 0;
+}
+
+static inline const char *
+search_nonascii(const char *p, const char *e)
+{
+#if SIZEOF_VALUE == 8
+# define NONASCII_MASK 0x8080808080808080ULL
+#elif SIZEOF_VALUE == 4
+# define NONASCII_MASK 0x80808080UL
+#endif
+#ifdef NONASCII_MASK
+ if ((int)sizeof(intptr_t) * 2 < e - p) {
+ const intptr_t *s, *t;
+ const intptr_t lowbits = sizeof(intptr_t) - 1;
+ s = (const intptr_t*)(~lowbits & ((intptr_t)p + lowbits));
+ while (p < (const char *)s) {
+ if (!ISASCII(*p))
+ return p;
+ p++;
+ }
+ t = (const intptr_t*)(~lowbits & (intptr_t)e);
+ while (s < t) {
+ if (*s & (intptr_t)NONASCII_MASK) {
+ t = s;
+ break;
+ }
+ s++;
+ }
+ p = (const char *)t;
+ }
+#endif
+ while (p < e) {
+ if (!ISASCII(*p))
+ return p;
+ p++;
+ }
+ return NULL;
+}
+#endif //INCLUDE_ENCODING
+
+static inline void
+str_modifiable(mrb_value str)
+{
+ ;
+}
+
+static inline int
+str_independent(mrb_value str)
+{
+ str_modifiable(str);
+ if (!MRB_STR_SHARED_P(str)) return 1;
+ return 0;
+}
+
+#ifdef INCLUDE_ENCODING
+static inline void
+str_enc_copy(mrb_state *mrb, mrb_value str1, mrb_value str2)
+{
+ unsigned int tmp;
+ tmp = ENCODING_GET_INLINED(str2);
+ mrb_enc_set_index(mrb, str1, ENCODING_GET(mrb, str2));
+}
+
+static inline long
+enc_strlen(const char *p, const char *e, mrb_encoding *enc, int cr)
+{
+ long c;
+ const char *q;
+
+ if (mrb_enc_mbmaxlen(enc) == mrb_enc_mbminlen(enc)) {
+ return (e - p + mrb_enc_mbminlen(enc) - 1) / mrb_enc_mbminlen(enc);
+ }
+ else if (mrb_enc_asciicompat(mrb, enc)) {
+ c = 0;
+ if (cr == ENC_CODERANGE_7BIT || cr == ENC_CODERANGE_VALID) {
+ while (p < e) {
+ if (ISASCII(*p)) {
+ q = search_nonascii(p, e);
+ if (!q)
+ return c + (e - p);
+ c += q - p;
+ p = q;
+ }
+ p += mrb_enc_fast_mbclen(p, e, enc);
+ c++;
+ }
+ }
+ else {
+ while (p < e) {
+ if (ISASCII(*p)) {
+ q = search_nonascii(p, e);
+ if (!q)
+ return c + (e - p);
+ c += q - p;
+ p = q;
+ }
+ p += mrb_enc_mbclen(p, e, enc);
+ c++;
+ }
+ }
+ return c;
+ }
+
+ for (c=0; p<e; c++) {
+ p += mrb_enc_mbclen(p, e, enc);
+ }
+ return c;
+}
+
+size_t
+mrb_str_capacity(mrb_value str)
+{
+ if (MRB_STR_NOCAPA_P(str)) {
+ return RSTRING_LEN(str);
+ }
+ else {
+ return RSTRING_CAPA(str);
+ }
+}
+#endif //INCLUDE_ENCODING
+
+static inline mrb_value
+str_alloc(mrb_state *mrb)
+{
+ struct RString* s;
+
+ s = mrb_obj_alloc(mrb, MRB_TT_STRING, mrb->string_class);
+ //NEWOBJ(str, struct RString);
+ //OBJSETUP(str, klass, T_STRING);
+
+ s->buf = 0;
+ s->len = 0;
+ s->aux.capa = 0;
+
+ return mrb_obj_value(s);
+}
+
+#ifdef INCLUDE_ENCODING
+long
+mrb_enc_strlen(const char *p, const char *e, mrb_encoding *enc)
+{
+ return enc_strlen(p, e, enc, ENC_CODERANGE_UNKNOWN);
+}
+#endif //INCLUDE_ENCODING
+
+static void
+str_make_independent(mrb_state *mrb, mrb_value str)
+{
+ char *ptr;
+ long len = RSTRING_LEN(str);
+
+ ptr = mrb_malloc(mrb, sizeof(char)*(len+1));
+ if (RSTRING_PTR(str)) {
+ memcpy(ptr, RSTRING_PTR(str), len);
+ }
+ ptr[len] = 0;
+ RSTRING(str)->buf = ptr;
+ RSTRING(str)->len = len;
+ RSTRING(str)->aux.capa = len;
+ MRB_STR_UNSET_NOCAPA(str);
+}
+
+#ifdef INCLUDE_ENCODING
+static int
+coderange_scan(const char *p, long len, mrb_encoding *enc)
+{
+ const char *e = p + len;
+
+ if (mrb_enc_to_index(enc) == 0) {
+ /* enc is ASCII-8BIT. ASCII-8BIT string never be broken. */
+ p = search_nonascii(p, e);
+ return p ? ENC_CODERANGE_VALID : ENC_CODERANGE_7BIT;
+ }
+
+ if (mrb_enc_asciicompat(mrb, enc)) {
+ p = search_nonascii(p, e);
+ if (!p) {
+ return ENC_CODERANGE_7BIT;
+ }
+ while (p < e) {
+ int ret = mrb_enc_precise_mbclen(p, e, enc);
+ if (!MBCLEN_CHARFOUND_P(ret)) {
+ return ENC_CODERANGE_BROKEN;
+ }
+ p += MBCLEN_CHARFOUND_LEN(ret);
+ if (p < e) {
+ p = search_nonascii(p, e);
+ if (!p) {
+ return ENC_CODERANGE_VALID;
+ }
+ }
+ }
+ if (e < p) {
+ return ENC_CODERANGE_BROKEN;
+ }
+ return ENC_CODERANGE_VALID;
+ }
+
+ while (p < e) {
+ int ret = mrb_enc_precise_mbclen(p, e, enc);
+
+ if (!MBCLEN_CHARFOUND_P(ret)) {
+ return ENC_CODERANGE_BROKEN;
+ }
+ p += MBCLEN_CHARFOUND_LEN(ret);
+ }
+ if (e < p) {
+ return ENC_CODERANGE_BROKEN;
+ }
+ return ENC_CODERANGE_VALID;
+}
+
+int
+mrb_enc_str_coderange(mrb_state *mrb, mrb_value str)
+{
+ int cr = ENC_CODERANGE(str);
+
+ if (cr == ENC_CODERANGE_UNKNOWN) {
+ mrb_encoding *enc = STR_ENC_GET(mrb, str);
+ cr = coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), enc);
+ ENC_CODERANGE_SET(str, cr);
+ }
+ return cr;
+}
+
+char*
+mrb_enc_nth(mrb_state *mrb, const char *p, const char *e, long nth, mrb_encoding *enc)
+{
+ if (mrb_enc_mbmaxlen(enc) == 1) {
+ p += nth;
+ }
+ else if (mrb_enc_mbmaxlen(enc) == mrb_enc_mbminlen(enc)) {
+ p += nth * mrb_enc_mbmaxlen(enc);
+ }
+ else if (mrb_enc_asciicompat(mrb, enc)) {
+ const char *p2, *e2;
+ int n;
+
+ while (p < e && 0 < nth) {
+ e2 = p + nth;
+ if (e < e2)
+ return (char *)e;
+ if (ISASCII(*p)) {
+ p2 = search_nonascii(p, e2);
+ if (!p2)
+ return (char *)e2;
+ nth -= p2 - p;
+ p = p2;
+ }
+ n = mrb_enc_mbclen(p, e, enc);
+ p += n;
+ nth--;
+ }
+ if (nth != 0)
+ return (char *)e;
+ return (char *)p;
+ }
+ else {
+ while (p<e && nth--) {
+ p += mrb_enc_mbclen(p, e, enc);
+ }
+ }
+ if (p > e) p = e;
+ return (char*)p;
+}
+
+static char*
+str_nth(mrb_state *mrb, const char *p, const char *e, long nth, mrb_encoding *enc, int singlebyte)
+{
+ if (singlebyte)
+ p += nth;
+ else {
+ p = mrb_enc_nth(mrb, p, e, nth, enc);
+ }
+ if (!p) return 0;
+ if (p > e) p = e;
+ return (char *)p;
+}
+
+/* char offset to byte offset */
+static long
+str_offset(mrb_state *mrb, const char *p, const char *e, long nth, mrb_encoding *enc, int singlebyte)
+{
+ const char *pp = str_nth(mrb, p, e, nth, enc, singlebyte);
+ if (!pp) return e - p;
+ return pp - p;
+}
+
+long
+mrb_str_offset(mrb_state *mrb, mrb_value str, long pos)
+{
+ return str_offset(mrb, RSTRING_PTR(str), RSTRING_END(str), pos,
+ STR_ENC_GET(mrb, str), single_byte_optimizable(mrb, str));
+}
+
+static void
+mrb_enc_cr_str_exact_copy(mrb_state *mrb, mrb_value dest, mrb_value src)
+{
+ str_enc_copy(mrb, dest, src);
+ ENC_CODERANGE_SET(dest, ENC_CODERANGE(src));
+}
+#else
+#define mrb_enc_cr_str_exact_copy(mrb, dest, src)
+#endif //INCLUDE_ENCODING
+
+mrb_value
+str_new4(mrb_state *mrb, enum mrb_vtype ttype, mrb_value str)
+{
+ mrb_value str2;
+
+ str2 = mrb_obj_value(mrb_obj_alloc(mrb, ttype, mrb->string_class));//str_alloc(klass);
+ RSTRING(str2)->len = RSTRING_LEN(str);
+ RSTRING(str2)->buf = RSTRING_PTR(str);
+
+ if (MRB_STR_SHARED_P(str)) {
+ mrb_value shared = RSTRING_SHARED(str);
+ FL_SET(str2, MRB_STR_SHARED);
+ RSTRING_SHARED(str2) = shared;
+ }
+ else {
+ FL_SET(str, MRB_STR_SHARED);
+ RSTRING_SHARED(str) = str2;
+ }
+ mrb_enc_cr_str_exact_copy(mrb, str2, str);
+ return str2;
+}
+
+static mrb_value
+str_new(mrb_state *mrb, enum mrb_vtype ttype, const char *p, size_t len)
+{
+ mrb_value str;
+
+ if (len < 0) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "negative string size (or size too big)");
+ }
+
+ //str = str_alloc(mrb);
+ str = mrb_str_buf_new(mrb, len);
+#ifdef INCLUDE_ENCODING
+ if (len == 0) {
+ ENC_CODERANGE_SET(str, ENC_CODERANGE_7BIT);
+ }
+#endif //INCLUDE_ENCODING
+ if (p) {
+ memcpy(RSTRING_PTR(str), p, len);
+ }
+ STR_SET_LEN(str, len);
+ RSTRING_PTR(str)[len] = '\0';
+ return str;
+}
+
+mrb_value
+mrb_str_new_with_class(mrb_state *mrb, mrb_value obj, const char *ptr, long len)
+{
+ return str_new(mrb, mrb_type(obj), ptr, len);
+}
+
+#define mrb_str_new5 mrb_str_new_with_class
+
+static mrb_value
+str_new_empty(mrb_state *mrb, mrb_value str)
+{
+ mrb_value v = mrb_str_new5(mrb, str, 0, 0);
+ return v;
+}
+
+mrb_value
+mrb_str_buf_new(mrb_state *mrb, size_t capa)
+{
+ struct RString *s;
+
+ s = mrb_obj_alloc(mrb, MRB_TT_STRING, mrb->string_class);
+
+ if (capa < STR_BUF_MIN_SIZE) {
+ capa = STR_BUF_MIN_SIZE;
+ }
+ s->len = 0;
+ s->aux.capa = capa;
+ s->buf = mrb_malloc(mrb, capa+1);
+ s->buf[0] = '\0';
+
+ return mrb_obj_value(s);
+}
+
+mrb_value
+str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, size_t len)
+{
+ long capa, total, off = -1;
+
+ if (ptr >= RSTRING_PTR(str) && ptr <= RSTRING_END(str)) {
+ off = ptr - RSTRING_PTR(str);
+ }
+ mrb_str_modify(mrb, str);
+ if (len == 0) return mrb_fixnum_value(0);
+ capa = RSTRING_CAPA(str);
+ if (RSTRING_LEN(str) >= LONG_MAX - len) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "string sizes too big");
+ }
+ total = RSTRING_LEN(str)+len;
+ if (capa <= total) {
+ while (total > capa) {
+ if (capa + 1 >= LONG_MAX / 2) {
+ capa = (total + 4095) / 4096;
+ break;
+ }
+ capa = (capa + 1) * 2;
+ }
+ RESIZE_CAPA(str, capa);
+ }
+ if (off != -1) {
+ ptr = RSTRING_PTR(str) + off;
+ }
+ memcpy(RSTRING_PTR(str) + RSTRING_LEN(str), ptr, len);
+ STR_SET_LEN(str, total);
+ RSTRING_PTR(str)[total] = '\0'; /* sentinel */
+
+ return str;
+}
+
+mrb_value
+mrb_str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, size_t len)
+{
+ if (len == 0) return str;
+ if (len < 0) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "negative string size (or size too big)");
+ }
+ return str_buf_cat(mrb, str, ptr, len);
+}
+
+/*
+ * call-seq:
+ * String.new(str="") => new_str
+ *
+ * Returns a new string object containing a copy of <i>str</i>.
+ */
+
+mrb_value
+mrb_str_new(mrb_state *mrb, const char *p, size_t len)
+{
+ struct RString *s;
+
+ if (len == 0) {
+ return mrb_str_buf_new(mrb, len);
+ }
+ s = mrb_obj_alloc(mrb, MRB_TT_STRING, mrb->string_class);
+ s->buf = mrb_malloc(mrb, len+1);
+ if (p) {
+ memcpy(s->buf, p, len);
+ }
+ s->len = len;
+ s->aux.capa = len;
+ s->buf[len] ='\0';
+ return mrb_obj_value(s);
+}
+
+/* ptr==0 is error */
+mrb_value
+mrb_str_new2(mrb_state *mrb, const char *ptr)
+{
+ if (!ptr) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "NULL pointer given");
+ }
+#ifdef INCLUDE_ENCODING
+ return mrb_usascii_str_new2(mrb, ptr);
+#else
+ return mrb_str_new(mrb, ptr, strlen(ptr));
+#endif //INCLUDE_ENCODING
+}
+
+#ifdef INCLUDE_ENCODING
+mrb_value
+mrb_enc_str_new(mrb_state *mrb, const char *ptr, long len, mrb_encoding *enc)
+{
+ mrb_value str = mrb_str_new(mrb, ptr, len);
+ mrb_enc_associate(mrb, str, enc);
+ return str;
+}
+#endif //INCLUDE_ENCODING
+
+/*
+ * call-seq: (Caution! NULL string)
+ * String.new(str="") => new_str
+ *
+ * Returns a new string object containing a copy of <i>str</i>.
+ */
+
+mrb_value
+mrb_str_new_cstr(mrb_state *mrb, const char *p)
+{
+ struct RString *s;
+ size_t len = strlen(p);
+
+ s = mrb_obj_alloc(mrb, MRB_TT_STRING, mrb->string_class);
+ s->buf = mrb_malloc(mrb, len+1);
+ memcpy(s->buf, p, len);
+ s->buf[len] = 0;
+ s->len = len;
+ s->aux.capa = len;
+
+ return mrb_obj_value(s);
+}
+
+/*
+ * call-seq: (Caution! string literal)
+ * String.new(str="") => new_str
+ *
+ * Returns a new string object containing a copy of <i>str</i>.
+ */
+
+mrb_value
+mrb_str_literal(mrb_state *mrb, mrb_value lit)
+{
+ struct RString *s = mrb_str_ptr(lit);
+
+ return mrb_str_new(mrb, s->buf, s->len);
+}
+
+/*
+ * call-seq:
+ * char* str = String("abcd"), len=strlen("abcd")
+ *
+ * Returns a new string object containing a copy of <i>str</i>.
+ */
+const char*
+mrb_str_body(mrb_value str, int *len_p)
+{
+ struct RString *s = mrb_str_ptr(str);
+
+ *len_p = s->len;
+ return s->buf;
+}
+
+/*
+ * call-seq: (Caution! String("abcd") change)
+ * String("abcdefg") = String("abcd") + String("efg")
+ *
+ * Returns a new string object containing a copy of <i>str</i>.
+ */
+void
+mrb_str_concat(mrb_state *mrb, mrb_value self, mrb_value other)
+{
+ struct RString *s1 = mrb_str_ptr(self), *s2;
+ size_t len;
+
+ if (mrb_type(other) != MRB_TT_STRING) {
+ other = mrb_str_to_str(mrb, other);
+ }
+ s2 = mrb_str_ptr(other);
+ len = s1->len + s2->len;
+
+ if (s1->aux.capa < len) {
+ s1->aux.capa = len;
+ s1->buf = mrb_realloc(mrb, s1->buf, len+1);
+ }
+ memcpy(s1->buf+s1->len, s2->buf, s2->len);
+ s1->len = len;
+ s1->buf[len] = 0;
+}
+
+/*
+ * call-seq: (Caution! String("abcd") remain)
+ * String("abcdefg") = String("abcd") + String("efg")
+ *
+ * Returns a new string object containing a copy of <i>str</i>.
+ */
+mrb_value
+mrb_str_plus(mrb_state *mrb, mrb_value a, mrb_value b)
+{
+ struct RString *s = mrb_str_ptr(a);
+ struct RString *s2 = mrb_str_ptr(b);
+ struct RString *t;
+ mrb_value r;
+
+ r = mrb_str_new(mrb, 0, s->len + s2->len);
+ t = mrb_str_ptr(r);
+ memcpy(t->buf, s->buf, s->len);
+ memcpy(t->buf + s->len, s2->buf, s2->len);
+
+ return r;
+}
+
+/* 15.2.10.5.2 */
+
+/*
+ * call-seq: (Caution! String("abcd") remain) for stack_argument
+ * String("abcdefg") = String("abcd") + String("efg")
+ *
+ * Returns a new string object containing a copy of <i>str</i>.
+ */
+static mrb_value
+mrb_str_plus_m(mrb_state *mrb, mrb_value self)
+{
+ mrb_value str3;
+ mrb_value str2;
+#ifdef INCLUDE_ENCODING
+ mrb_encoding *enc;
+#endif //INCLUDE_ENCODING
+
+ //mrb_get_args(mrb, "s", &p, &len);
+ mrb_get_args(mrb, "o", &str2);
+
+ mrb_string_value(mrb, &str2);
+#ifdef INCLUDE_ENCODING
+ enc = mrb_enc_check(mrb, self, str2);
+#endif //INCLUDE_ENCODING
+ str3 = mrb_str_new(mrb, 0, RSTRING_LEN(self)+RSTRING_LEN(str2));
+ memcpy(RSTRING_PTR(str3), RSTRING_PTR(self), RSTRING_LEN(self));
+ memcpy(RSTRING_PTR(str3) + RSTRING_LEN(self),
+ RSTRING_PTR(str2), RSTRING_LEN(str2));
+ RSTRING_PTR(str3)[RSTRING_LEN(str3)] = '\0';
+#ifdef INCLUDE_ENCODING
+ //if (OBJ_TAINTED(str1) || OBJ_TAINTED(str2))
+ // OBJ_TAINT(str3);
+ ENCODING_CODERANGE_SET(mrb, str3, mrb_enc_to_index(enc),
+ ENC_CODERANGE_AND(ENC_CODERANGE(self), ENC_CODERANGE(str2)));
+#endif //INCLUDE_ENCODING
+
+ return str3;
+}
+
+/*
+ * call-seq:
+ * len = strlen(String("abcd"))
+ *
+ * Returns a new string object containing a copy of <i>str</i>.
+ */
+static mrb_value
+mrb_str_bytesize(mrb_state *mrb, mrb_value self)
+{
+ struct RString *s = mrb_str_ptr(self);
+
+ return mrb_fixnum_value(s->len);
+}
+
+/* 15.2.10.5.26 */
+/* 15.2.10.5.33 */
+/*
+ * call-seq:
+ * len = strlen(String("abcd"))
+ *
+ * Returns a new string object containing a copy of <i>str</i>.
+ */
+mrb_value
+mrb_str_size(mrb_state *mrb, mrb_value self)
+{
+#ifdef INCLUDE_ENCODING
+ long len;
+
+ len = str_strlen(mrb, self, STR_ENC_GET(mrb, self));
+ return mrb_fixnum_value(len);
+#else
+ return mrb_str_bytesize(mrb, self);
+#endif //INCLUDE_ENCODING
+}
+
+void
+mrb_str_modify(mrb_state *mrb, mrb_value str)
+{
+ if (!str_independent(str))
+ str_make_independent(mrb, str);
+}
+
+
+/* 15.2.10.5.1 */
+
+/*
+ * call-seq:
+ * str * integer => new_str
+ *
+ * Copy---Returns a new <code>String</code> containing <i>integer</i> copies of
+ * the receiver.
+ *
+ * "Ho! " * 3 #=> "Ho! Ho! Ho! "
+ */
+static mrb_value
+mrb_str_times(mrb_state *mrb, mrb_value self)
+{
+ mrb_value str2;
+ mrb_int n,len,times;
+ char *ptr2;
+
+ mrb_get_args(mrb, "i", &times);
+
+ if (times < 0) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "negative argument");
+ }
+ if (times && INT32_MAX/times < RSTRING_LEN(self)) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "argument too big");
+ }
+
+ str2 = mrb_str_new5(mrb, self, 0, len = RSTRING_LEN(self)*times);
+ ptr2 = RSTRING_PTR(str2);
+ if (len > 0) {
+ n = RSTRING_LEN(self);
+ memcpy(ptr2, RSTRING_PTR(self), n);
+ while (n <= len/2) {
+ memcpy(ptr2 + n, ptr2, n);
+ n *= 2;
+ }
+ memcpy(ptr2 + n, ptr2, len-n);
+ }
+ ptr2[RSTRING_LEN(str2)] = '\0';
+
+ //OBJ_INFECT(str2, str);
+ mrb_enc_cr_str_copy_for_substr(mrb, str2, self);
+
+ return str2;
+}
+/* -------------------------------------------------------------- */
+
+#define lesser(a,b) (((a)>(b))?(b):(a))
+
+/* ---------------------------*/
+/*
+ * call-seq:
+ * mrb_value str1 <=> mrb_value str2 => int
+ * > 1
+ * = 0
+ * < -1
+ */
+int
+mrb_str_cmp(mrb_state *mrb, mrb_value str1, mrb_value str2)
+{
+ mrb_int len;
+ mrb_int retval;
+ struct RString *s1 = mrb_str_ptr(str1);
+ struct RString *s2 = mrb_str_ptr(str2);
+
+ len = lesser(s1->len, s2->len);
+ retval = memcmp(s1->buf, s2->buf, len);
+ if (retval == 0) {
+ if (s1->len == s2->len) return 0;
+ if (s1->len > s2->len) return 1;
+ return -1;
+ }
+ if (retval > 0) return 1;
+ return -1;
+}
+
+/* 15.2.10.5.3 */
+
+/*
+ * call-seq:
+ * str <=> other_str => -1, 0, +1
+ *
+ * Comparison---Returns -1 if <i>other_str</i> is less than, 0 if
+ * <i>other_str</i> is equal to, and +1 if <i>other_str</i> is greater than
+ * <i>str</i>. If the strings are of different lengths, and the strings are
+ * equal when compared up to the shortest length, then the longer string is
+ * considered greater than the shorter one. If the variable <code>$=</code> is
+ * <code>false</code>, the comparison is based on comparing the binary values
+ * of each character in the string. In older versions of Ruby, setting
+ * <code>$=</code> allowed case-insensitive comparisons; this is now deprecated
+ * in favor of using <code>String#casecmp</code>.
+ *
+ * <code><=></code> is the basis for the methods <code><</code>,
+ * <code><=</code>, <code>></code>, <code>>=</code>, and <code>between?</code>,
+ * included from module <code>Comparable</code>. The method
+ * <code>String#==</code> does not use <code>Comparable#==</code>.
+ *
+ * "abcdef" <=> "abcde" #=> 1
+ * "abcdef" <=> "abcdef" #=> 0
+ * "abcdef" <=> "abcdefg" #=> -1
+ * "abcdef" <=> "ABCDEF" #=> 1
+ */
+static mrb_value
+mrb_str_cmp_m(mrb_state *mrb, mrb_value str1)
+{
+ mrb_value str2;
+ mrb_int result;
+
+ mrb_get_args(mrb, "o", &str2);
+ if (mrb_type(str2) != MRB_TT_STRING) {
+ if (!mrb_respond_to(mrb, str2, mrb_intern(mrb, "to_s"))) {
+ return mrb_nil_value();
+ }
+ else if (!mrb_respond_to(mrb, str2, mrb_intern(mrb, "<=>"))) {
+ return mrb_nil_value();
+ }
+ else
+ {
+ mrb_value tmp = mrb_funcall(mrb, str2, "<=>", 1, str1);
+
+ if (mrb_nil_p(tmp)) return mrb_nil_value();
+ if (!mrb_fixnum(tmp)) {
+ return mrb_funcall(mrb, mrb_fixnum_value(0), "-", 1, tmp);
+ }
+ result = -mrb_fixnum(tmp);
+ }
+ }
+ else {
+ result = mrb_str_cmp(mrb, str1, str2);
+ }
+ return mrb_fixnum_value(result);
+}
+
+#ifdef INCLUDE_ENCODING
+int
+mrb_str_comparable(mrb_state *mrb, mrb_value str1, mrb_value str2)
+{
+ int idx1, idx2;
+ int rc1, rc2;
+
+ if (RSTRING_LEN(str1) == 0) return TRUE;
+ if (RSTRING_LEN(str2) == 0) return TRUE;
+ idx1 = ENCODING_GET(mrb, str1);
+ idx2 = ENCODING_GET(mrb, str2);
+ if (idx1 == idx2) return TRUE;
+ rc1 = mrb_enc_str_coderange(mrb, str1);
+ rc2 = mrb_enc_str_coderange(mrb, str2);
+ if (rc1 == ENC_CODERANGE_7BIT) {
+ if (rc2 == ENC_CODERANGE_7BIT) return TRUE;
+ if (mrb_enc_asciicompat(mrb, mrb_enc_from_index(mrb, idx2)))
+ return TRUE;
+ }
+ if (rc2 == ENC_CODERANGE_7BIT) {
+ if (mrb_enc_asciicompat(mrb, mrb_enc_from_index(mrb, idx1)))
+ return TRUE;
+ }
+ return FALSE;
+}
+
+int
+mrb_str_hash_cmp(mrb_state *mrb, mrb_value str1, mrb_value str2)
+{
+ long len;
+
+ if (!mrb_str_comparable(mrb, str1, str2)) return 1;
+ if (RSTRING_LEN(str1) == (len = RSTRING_LEN(str2)) &&
+ memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2), len) == 0) {
+ return 0;
+ }
+ return 1;
+}
+#endif //INCLUDE_ENCODING
+
+static int
+str_eql(mrb_state *mrb, const mrb_value str1, const mrb_value str2)
+{
+ const long len = RSTRING_LEN(str1);
+
+ if (len != RSTRING_LEN(str2)) return FALSE;
+#ifdef INCLUDE_ENCODING
+ if (!mrb_str_comparable(mrb, str1, str2)) return FALSE;
+#endif //INCLUDE_ENCODING
+ if (memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2), len) == 0)
+ return TRUE;
+ return FALSE;
+}
+
+int
+mrb_str_equal(mrb_state *mrb, mrb_value str1, mrb_value str2)
+{
+ if (mrb_obj_equal(mrb, str1, str2)) return TRUE;
+ if (mrb_type(str2) != MRB_TT_STRING) {
+ if (mrb_nil_p(str2)) return FALSE;
+ if (!mrb_respond_to(mrb, str2, mrb_intern(mrb, "to_str"))) {
+ return FALSE;
+ }
+ str2 = mrb_funcall(mrb, str2, "to_str", 0);
+ return mrb_equal(mrb, str2, str1);
+ }
+ return str_eql(mrb, str1, str2);
+}
+
+/* 15.2.10.5.4 */
+/*
+ * call-seq:
+ * str == obj => true or false
+ *
+ * Equality---
+ * If <i>obj</i> is not a <code>String</code>, returns <code>false</code>.
+ * Otherwise, returns <code>false</code> or <code>true</code>
+ *
+ * caution:if <i>str</i> <code><=></code> <i>obj</i> returns zero.
+ */
+static mrb_value
+mrb_str_equal_m(mrb_state *mrb, mrb_value str1)
+{
+ mrb_value str2;
+
+ mrb_get_args(mrb, "o", &str2);
+ if (mrb_str_equal(mrb, str1, str2))
+ return mrb_true_value();
+ return mrb_false_value();
+}
+/* ---------------------------------- */
+mrb_value
+mrb_str_to_str(mrb_state *mrb, mrb_value str)
+{
+ mrb_value s;
+
+ if (mrb_type(str) != MRB_TT_STRING) {
+ s = mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str");
+ if (mrb_nil_p(s)) {
+ s = mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_s");
+ }
+ return s;
+ }
+ return str;
+}
+
+mrb_value
+mrb_string_value(mrb_state *mrb, mrb_value *ptr)
+{
+ struct RString *ps;
+ mrb_value s = *ptr;
+ if (mrb_type(s) != MRB_TT_STRING) {
+ s = mrb_str_to_str(mrb, s);
+ *ptr = s;
+ }
+ ps = mrb_str_ptr(s);
+ return s;
+}
+
+char *
+mrb_string_value_ptr(mrb_state *mrb, mrb_value ptr)
+{
+ mrb_value str = mrb_string_value(mrb, &ptr);
+ return RSTRING_PTR(str);
+}
+/* 15.2.10.5.5 */
+
+/*
+ * call-seq:
+ * str =~ obj -> fixnum or nil
+ *
+ * Match---If <i>obj</i> is a <code>Regexp</code>, use it as a pattern to match
+ * against <i>str</i>,and returns the position the match starts, or
+ * <code>nil</code> if there is no match. Otherwise, invokes
+ * <i>obj.=~</i>, passing <i>str</i> as an argument. The default
+ * <code>=~</code> in <code>Object</code> returns <code>nil</code>.
+ *
+ * "cat o' 9 tails" =~ /\d/ #=> 7
+ * "cat o' 9 tails" =~ 9 #=> nil
+ */
+
+static mrb_value
+mrb_str_match(mrb_state *mrb, mrb_value self/* x */)
+{
+ mrb_value y;
+ mrb_get_args(mrb, "o", &y);
+ switch (mrb_type(y)) {
+ case MRB_TT_STRING:
+ mrb_raise(mrb, E_TYPE_ERROR, "type mismatch: String given");
+ case MRB_TT_REGEX:
+#ifdef INCLUDE_REGEXP
+ return mrb_reg_match_str(mrb, y, self);
+#else
+ mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported");
+#endif //INCLUDE_REGEXP
+ default:
+ if (mrb_respond_to(mrb, y, mrb_intern(mrb, "=~"))) {
+ return mrb_funcall(mrb, y, "=~", 1, self);
+ }
+ else {
+ return mrb_nil_value();
+ }
+ }
+}
+/* ---------------------------------- */
+#ifdef INCLUDE_ENCODING
+#ifdef NONASCII_MASK
+#define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
+static inline int
+count_utf8_lead_bytes_with_word(const intptr_t *s)
+{
+ int d = *s;
+ d |= ~(d>>1);
+ d >>= 6;
+ d &= NONASCII_MASK >> 7;
+ d += (d>>8);
+ d += (d>>16);
+#if SIZEOF_VALUE == 8
+ d += (d>>32);
+#endif
+ return (d&0xF);
+}
+#endif
+
+#ifdef NONASCII_MASK
+static char *
+str_utf8_nth(const char *p, const char *e, long nth)
+{
+ if ((int)SIZEOF_VALUE < e - p && (int)SIZEOF_VALUE * 2 < nth) {
+ const intptr_t *s, *t;
+ const intptr_t lowbits = sizeof(int) - 1;
+ s = (const intptr_t*)(~lowbits & ((intptr_t)p + lowbits));
+ t = (const intptr_t*)(~lowbits & (intptr_t)e);
+ while (p < (const char *)s) {
+ if (is_utf8_lead_byte(*p)) nth--;
+ p++;
+ }
+ do {
+ nth -= count_utf8_lead_bytes_with_word(s);
+ s++;
+ } while (s < t && (int)sizeof(intptr_t) <= nth);
+ p = (char *)s;
+ }
+ while (p < e) {
+ if (is_utf8_lead_byte(*p)) {
+ if (nth == 0) break;
+ nth--;
+ }
+ p++;
+ }
+ return (char *)p;
+}
+
+static long
+str_utf8_offset(const char *p, const char *e, long nth)
+{
+ const char *pp = str_utf8_nth(p, e, nth);
+ return pp - p;
+}
+#endif
+#endif //INCLUDE_ENCODING
+
+mrb_value
+mrb_str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, int len)
+{
+#ifdef INCLUDE_ENCODING
+ mrb_encoding *enc = STR_ENC_GET(mrb, str);
+#endif //INCLUDE_ENCODING
+ mrb_value str2;
+#ifdef INCLUDE_ENCODING
+ char *p, *s = RSTRING_PTR(str), *e = s + RSTRING_LEN(str);
+#else
+ char *p, *s = RSTRING_PTR(str);
+#endif //INCLUDE_ENCODING
+
+ if (len < 0) return mrb_nil_value();
+ if (!RSTRING_LEN(str)) {
+ len = 0;
+ }
+#ifdef INCLUDE_ENCODING
+ if (single_byte_optimizable(mrb, str)) {
+#endif //INCLUDE_ENCODING
+ if (beg > RSTRING_LEN(str)) return mrb_nil_value();
+ if (beg < 0) {
+ beg += RSTRING_LEN(str);
+ if (beg < 0) return mrb_nil_value();
+ }
+ if (beg + len > RSTRING_LEN(str))
+ len = RSTRING_LEN(str) - beg;
+ if (len <= 0) {
+ len = 0;
+ p = 0;
+ }
+ else
+ p = s + beg;
+#ifdef INCLUDE_ENCODING
+ goto sub;
+ }
+ if (beg < 0) {
+ if (len > -beg) len = -beg;
+ if (-beg * mrb_enc_mbmaxlen(enc) < RSTRING_LEN(str) / 8) {
+ beg = -beg;
+ while (beg-- > len && (e = mrb_enc_prev_char(s, e, e, enc)) != 0);
+ p = e;
+ if (!p) return mrb_nil_value();
+ while (len-- > 0 && (p = mrb_enc_prev_char(s, p, e, enc)) != 0);
+ if (!p) return mrb_nil_value();
+ len = e - p;
+ goto sub;
+ }
+ else {
+ beg += str_strlen(mrb, str, enc);
+ if (beg < 0) return mrb_nil_value();
+ }
+ }
+ else if (beg > 0 && beg > str_strlen(mrb, str, enc)) {
+ return mrb_nil_value();
+ }
+ if (len == 0) {
+ p = 0;
+ }
+#ifdef NONASCII_MASK
+ else if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID &&
+ enc == mrb_utf8_encoding(mrb)) {
+ p = str_utf8_nth(s, e, beg);
+ len = str_utf8_offset(p, e, len);
+ }
+#endif
+ else if (mrb_enc_mbmaxlen(enc) == mrb_enc_mbminlen(enc)) {
+ int char_sz = mrb_enc_mbmaxlen(enc);
+
+ p = s + beg * char_sz;
+ if (p > e) {
+ p = e;
+ len = 0;
+ }
+ else if (len * char_sz > e - p)
+ len = e - p;
+ else
+ len *= char_sz;
+ }
+ else if ((p = str_nth(mrb, s, e, beg, enc, 0)) == e) {
+ len = 0;
+ }
+ else {
+ len = str_offset(mrb, p, e, len, enc, 0);
+ }
+sub:
+#endif //INCLUDE_ENCODING
+ if (len > STR_BUF_MIN_SIZE && beg + len == RSTRING_LEN(str)) {
+#ifdef INCLUDE_ENCODING
+ str2 = mrb_str_new4(mrb, str);
+ str2 = str_new3(mrb, mrb_obj_class(mrb, str2), str2);
+#else
+ str2 = mrb_str_new(mrb, s, RSTRING_LEN(str));
+#endif //INCLUDE_ENCODING
+ RSTRING(str2)->buf += RSTRING(str2)->len - len;
+ RSTRING(str2)->len = len;
+ }
+ else {
+ str2 = mrb_str_new5(mrb, str, p, len);
+ mrb_enc_cr_str_copy_for_substr(mrb, str2, str);
+ }
+
+ return str2;
+}
+
+#ifdef INCLUDE_REGEXP
+static mrb_value
+mrb_str_subpat(mrb_state *mrb, mrb_value str, mrb_value re, mrb_int backref)
+{
+ if (mrb_reg_search(mrb, re, str, 0, 0) >= 0) {
+ mrb_value match = mrb_backref_get(mrb);
+ int nth = mrb_reg_backref_number(mrb, match, mrb_fixnum_value(backref));
+ return mrb_reg_nth_match(mrb, nth, mrb_backref_get(mrb));
+ }
+ return mrb_nil_value();
+}
+#endif //INCLUDE_REGEXP
+
+/* --- 1-8-7parse.c --> */
+
+#ifdef INCLUDE_ENCODING
+long
+mrb_enc_strlen_cr(mrb_state *mrb, const char *p, const char *e, mrb_encoding *enc, int *cr)
+{
+ long c;
+ const char *q;
+ int ret;
+
+ *cr = 0;
+ if (mrb_enc_mbmaxlen(enc) == mrb_enc_mbminlen(enc)) {
+ return (e - p + mrb_enc_mbminlen(enc) - 1) / mrb_enc_mbminlen(enc);
+ }
+ else if (mrb_enc_asciicompat(mrb, enc)) {
+ c = 0;
+ while (p < e) {
+ if (ISASCII(*p)) {
+ q = search_nonascii(p, e);
+ if (!q) {
+ if (!*cr) *cr = ENC_CODERANGE_7BIT;
+ return c + (e - p);
+ }
+ c += q - p;
+ p = q;
+ }
+ ret = mrb_enc_precise_mbclen(p, e, enc);
+ if (MBCLEN_CHARFOUND_P(ret)) {
+ *cr |= ENC_CODERANGE_VALID;
+ p += MBCLEN_CHARFOUND_LEN(ret);
+ }
+ else {
+ *cr = ENC_CODERANGE_BROKEN;
+ p++;
+ }
+ c++;
+ }
+ if (!*cr) *cr = ENC_CODERANGE_7BIT;
+ return c;
+ }
+
+ for (c=0; p<e; c++) {
+ ret = mrb_enc_precise_mbclen(p, e, enc);
+ if (MBCLEN_CHARFOUND_P(ret)) {
+ *cr |= ENC_CODERANGE_VALID;
+ p += MBCLEN_CHARFOUND_LEN(ret);
+ }
+ else {
+ *cr = ENC_CODERANGE_BROKEN;
+ if (p + mrb_enc_mbminlen(enc) <= e)
+ p += mrb_enc_mbminlen(enc);
+ else
+ p = e;
+ }
+ }
+ if (!*cr) *cr = ENC_CODERANGE_7BIT;
+ return c;
+}
+#endif //INCLUDE_ENCODING
+
+/* --- 1-8-7parse.c --< */
+
+#ifndef INCLUDE_ENCODING
+static inline long
+mrb_memsearch_ss(const unsigned char *xs, long m, const unsigned char *ys, long n)
+{
+ const unsigned char *x = xs, *xe = xs + m;
+ const unsigned char *y = ys, *ye = ys + n;
+//2011/06/30 #define SIZEOF_VALUE 4
+#ifndef VALUE_MAX
+# if SIZEOF_VALUE == 8
+# define VALUE_MAX 0xFFFFFFFFFFFFFFFFULL
+# elif SIZEOF_VALUE == 4
+# define VALUE_MAX 0xFFFFFFFFUL
+# elif SIZEOF_LONG == SIZEOF_VOIDP
+# define SIZEOF_VALUE 4
+# define VALUE_MAX 0xFFFFFFFFUL
+# endif
+#endif
+ int hx, hy, mask = VALUE_MAX >> ((SIZEOF_VALUE - m) * CHAR_BIT);
+
+ if (m > SIZEOF_VALUE)
+ mrb_bug("!!too long pattern string!!");
+
+ /* Prepare hash value */
+ for (hx = *x++, hy = *y++; x < xe; ++x, ++y) {
+ hx <<= CHAR_BIT;
+ hy <<= CHAR_BIT;
+ hx |= *x;
+ hy |= *y;
+ }
+ /* Searching */
+ while (hx != hy) {
+ if (y == ye)
+ return -1;
+ hy <<= CHAR_BIT;
+ hy |= *y;
+ hy &= mask;
+ y++;
+ }
+ return y - ys - m;
+}
+
+static inline long
+mrb_memsearch_qs(const unsigned char *xs, long m, const unsigned char *ys, long n)
+{
+ const unsigned char *x = xs, *xe = xs + m;
+ const unsigned char *y = ys;
+ int i, qstable[256];
+
+ /* Preprocessing */
+ for (i = 0; i < 256; ++i)
+ qstable[i] = m + 1;
+ for (; x < xe; ++x)
+ qstable[*x] = xe - x;
+ /* Searching */
+ for (; y + m <= ys + n; y += *(qstable + y[m])) {
+ if (*xs == *y && memcmp(xs, y, m) == 0)
+ return y - ys;
+ }
+ return -1;
+}
+int
+mrb_memsearch(const void *x0, int m, const void *y0, int n)
+{
+ const unsigned char *x = x0, *y = y0;
+
+ if (m > n) return -1;
+ else if (m == n) {
+ return memcmp(x0, y0, m) == 0 ? 0 : -1;
+ }
+ else if (m < 1) {
+ return 0;
+ }
+ else if (m == 1) {
+ const unsigned char *ys = y, *ye = ys + n;
+ for (; y < ye; ++y) {
+ if (*x == *y)
+ return y - ys;
+ }
+ return -1;
+ }
+ else if (m <= SIZEOF_VALUE) {
+ return mrb_memsearch_ss(x0, m, y0, n);
+ }
+ else {
+ return mrb_memsearch_qs(x0, m, y0, n);
+ }
+}
+#endif //INCLUDE_ENCODING
+
+/* --- 1-8-7parse.c --< */
+#ifdef INCLUDE_ENCODING
+static long
+str_strlen(mrb_state *mrb, mrb_value str, mrb_encoding *enc)
+{
+ const char *p, *e;
+ long n;
+ int cr;
+
+ if (single_byte_optimizable(mrb, str)) return RSTRING_LEN(str);
+ if (!enc) enc = STR_ENC_GET(mrb, str);
+ p = RSTRING_PTR(str);
+ e = RSTRING_END(str);
+ cr = ENC_CODERANGE(str);
+#ifdef NONASCII_MASK
+ if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID &&
+ enc == mrb_utf8_encoding(mrb)) {
+
+ int len = 0;
+ if ((int)sizeof(intptr_t) * 2 < e - p) {
+ const intptr_t *s, *t;
+ const intptr_t lowbits = sizeof(int) - 1;
+ s = (const intptr_t*)(~lowbits & ((intptr_t)p + lowbits));
+ t = (const intptr_t*)(~lowbits & (intptr_t)e);
+ while (p < (const char *)s) {
+ if (is_utf8_lead_byte(*p)) len++;
+ p++;
+ }
+ while (s < t) {
+ len += count_utf8_lead_bytes_with_word(s);
+ s++;
+ }
+ p = (const char *)s;
+ }
+ while (p < e) {
+ if (is_utf8_lead_byte(*p)) len++;
+ p++;
+ }
+ return (long)len;
+ }
+#endif
+ n = mrb_enc_strlen_cr(mrb, p, e, enc, &cr);
+ if (cr) {
+ ENC_CODERANGE_SET(str, cr);
+ }
+ return n;
+}
+#endif //INCLUDE_ENCODING
+
+static mrb_int
+mrb_str_index(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int offset)
+{
+ mrb_int pos;
+ char *s, *sptr, *e;
+ int len, slen;
+#ifdef INCLUDE_ENCODING
+ mrb_encoding *enc;
+
+ enc = mrb_enc_check(mrb, str, sub);
+ if (is_broken_string(mrb, sub)) {
+ return -1;
+ }
+ len = str_strlen(mrb, str, enc);
+ slen = str_strlen(mrb, sub, enc);
+#else
+ len = RSTRING_LEN(str);
+ slen = RSTRING_LEN(sub);
+#endif //INCLUDE_ENCODING
+ if (offset < 0) {
+ offset += len;
+ if (offset < 0) return -1;
+ }
+ if (len - offset < slen) return -1;
+ s = RSTRING_PTR(str);
+ e = s + RSTRING_LEN(str);
+ if (offset) {
+#ifdef INCLUDE_ENCODING
+ offset = str_offset(mrb, s, RSTRING_END(str), offset, enc, single_byte_optimizable(mrb, str));
+#endif //INCLUDE_ENCODING
+ s += offset;
+ }
+ if (slen == 0) return offset;
+ /* need proceed one character at a time */
+ sptr = RSTRING_PTR(sub);
+ slen = RSTRING_LEN(sub);
+ len = RSTRING_LEN(str) - offset;
+#ifdef INCLUDE_ENCODING
+ for (;;) {
+ char *t;
+ pos = mrb_memsearch(mrb, sptr, slen, s, len, enc);
+ if (pos < 0) return pos;
+ t = mrb_enc_right_char_head(s, s+pos, e, enc);
+ if (t == s + pos) break;
+ if ((len -= t - s) <= 0) return -1;
+ offset += t - s;
+ s = t;
+ }
+#else
+ pos = mrb_memsearch(sptr, slen, s+offset, len-offset);
+ if (pos < 0) return pos;
+#endif //INCLUDE_ENCODING
+ return pos + offset;
+}
+
+mrb_value
+mrb_str_dup(mrb_state *mrb, mrb_value str)
+{
+ struct RString *s = mrb_str_ptr(str);
+ struct RString *dup;
+
+ dup = mrb_obj_alloc(mrb, MRB_TT_STRING, mrb->string_class);
+ dup->buf = mrb_malloc(mrb, s->len+1);
+ if (s->buf) {
+ memcpy(dup->buf, s->buf, s->len);
+ dup->buf[s->len] = 0;
+ }
+ dup->len = s->len;
+ dup->aux.capa = s->len;
+ return mrb_obj_value(dup);
+}
+
+static mrb_value
+mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx)
+{
+ long idx;
+
+ switch (mrb_type(indx)) {
+ case MRB_TT_FIXNUM:
+ idx = mrb_fixnum(indx);
+
+num_index:
+ str = mrb_str_substr(mrb, str, idx, 1);
+ if (!mrb_nil_p(str) && RSTRING_LEN(str) == 0) return mrb_nil_value();
+ return str;
+
+ case MRB_TT_REGEX:
+#ifdef INCLUDE_REGEXP
+ return mrb_str_subpat(mrb, str, indx, 0); //mrb_str_subpat(str, indx, INT2FIX(0));
+#else
+ mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported");
+ return mrb_nil_value();
+#endif //INCLUDE_REGEXP
+
+ case MRB_TT_STRING:
+ if (mrb_str_index(mrb, str, indx, 0) != -1)
+ return mrb_str_dup(mrb, indx);
+ return mrb_nil_value();
+
+ default:
+ /* check if indx is Range */
+ {
+ mrb_int beg, len;
+ mrb_value tmp;
+
+#ifdef INCLUDE_ENCODING
+ len = str_strlen(mrb, str, STR_ENC_GET(mrb, str));
+#else
+ len = RSTRING_LEN(str);
+#endif //INCLUDE_ENCODING
+ switch (mrb_range_beg_len(mrb, indx, &beg, &len, len, 0)) {
+ case 0/*FLASE*/:
+ break;
+ case 2/*OTHER*/:
+ return mrb_nil_value();
+ default:
+ tmp = mrb_str_substr(mrb, str, beg, len);
+ /*OBJ_INFECT(tmp, indx);*/
+ return tmp;
+ }
+ }
+ idx = mrb_fixnum(indx);
+ goto num_index;
+ }
+ return mrb_nil_value(); /* not reached */
+}
+
+/* 15.2.10.5.6 */
+/* 15.2.10.5.34 */
+/*
+ * call-seq:
+ * str[fixnum] => fixnum or nil
+ * str[fixnum, fixnum] => new_str or nil
+ * str[range] => new_str or nil
+ * str[regexp] => new_str or nil
+ * str[regexp, fixnum] => new_str or nil
+ * str[other_str] => new_str or nil
+ * str.slice(fixnum) => fixnum or nil
+ * str.slice(fixnum, fixnum) => new_str or nil
+ * str.slice(range) => new_str or nil
+ * str.slice(regexp) => new_str or nil
+ * str.slice(regexp, fixnum) => new_str or nil
+ * str.slice(other_str) => new_str or nil
+ *
+ * Element Reference---If passed a single <code>Fixnum</code>, returns the code
+ * of the character at that position. If passed two <code>Fixnum</code>
+ * objects, returns a substring starting at the offset given by the first, and
+ * a length given by the second. If given a range, a substring containing
+ * characters at offsets given by the range is returned. In all three cases, if
+ * an offset is negative, it is counted from the end of <i>str</i>. Returns
+ * <code>nil</code> if the initial offset falls outside the string, the length
+ * is negative, or the beginning of the range is greater than the end.
+ *
+ * If a <code>Regexp</code> is supplied, the matching portion of <i>str</i> is
+ * returned. If a numeric parameter follows the regular expression, that
+ * component of the <code>MatchData</code> is returned instead. If a
+ * <code>String</code> is given, that string is returned if it occurs in
+ * <i>str</i>. In both cases, <code>nil</code> is returned if there is no
+ * match.
+ *
+ * a = "hello there"
+ * a[1] #=> 101(1.8.7) "e"(1.9.2)
+ * a[1,3] #=> "ell"
+ * a[1..3] #=> "ell"
+ * a[-3,2] #=> "er"
+ * a[-4..-2] #=> "her"
+ * a[12..-1] #=> nil
+ * a[-2..-4] #=> ""
+ * a[/[aeiou](.)\1/] #=> "ell"
+ * a[/[aeiou](.)\1/, 0] #=> "ell"
+ * a[/[aeiou](.)\1/, 1] #=> "l"
+ * a[/[aeiou](.)\1/, 2] #=> nil
+ * a["lo"] #=> "lo"
+ * a["bye"] #=> nil
+ */
+static mrb_value
+mrb_str_aref_m(mrb_state *mrb, mrb_value str)
+{
+ int argc;
+ mrb_value *argv;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+ if (argc == 2) {
+ if (mrb_type(argv[0]) == MRB_TT_REGEX) {
+#ifdef INCLUDE_REGEXP
+ return mrb_str_subpat(mrb, str, argv[0], mrb_fixnum(argv[1]));
+#else
+ mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported");
+ return mrb_nil_value();
+#endif //INCLUDE_REGEXP
+ }
+ return mrb_str_substr(mrb, str, mrb_fixnum(argv[0]), mrb_fixnum(argv[1]));
+ }
+ if (argc != 1) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%d for 1)", argc);
+ }
+ return mrb_str_aref(mrb, str, argv[0]);
+}
+
+#ifdef INCLUDE_ENCODING
+/* As mrb_str_modify(), but don't clear coderange */
+static void
+str_modify_keep_cr(mrb_state *mrb, mrb_value str)
+{
+ if (!str_independent(str))
+ str_make_independent(mrb, str);
+ if (ENC_CODERANGE(str) == ENC_CODERANGE_BROKEN)
+ /* Force re-scan later */
+ ENC_CODERANGE_CLEAR(str);
+}
+
+static void
+mrb_str_check_dummy_enc(mrb_state *mrb, mrb_encoding *enc)
+{
+ if (mrb_enc_dummy_p(enc)) {
+ mrb_raise(mrb, E_ENCODING_ERROR, "incompatible encoding with this operation: %s",
+ mrb_enc_name(enc));
+ }
+}
+#else
+#define str_modify_keep_cr(mrb, str) mrb_str_modify((mrb), (str))
+#endif //INCLUDE_ENCODING
+
+/* 15.2.10.5.8 */
+/*
+ * call-seq:
+ * str.capitalize! => str or nil
+ *
+ * Modifies <i>str</i> by converting the first character to uppercase and the
+ * remainder to lowercase. Returns <code>nil</code> if no changes are made.
+ *
+ * a = "hello"
+ * a.capitalize! #=> "Hello"
+ * a #=> "Hello"
+ * a.capitalize! #=> nil
+ */
+static mrb_value
+mrb_str_capitalize_bang(mrb_state *mrb, mrb_value str)
+{
+#ifdef INCLUDE_ENCODING
+ mrb_encoding *enc;
+#endif //INCLUDE_ENCODING
+ char *s, *send;
+ int modify = 0;
+#ifdef INCLUDE_ENCODING
+ unsigned int c;
+ int n;
+#endif //INCLUDE_ENCODING
+
+ str_modify_keep_cr(mrb, str);
+#ifdef INCLUDE_ENCODING
+ enc = STR_ENC_GET(mrb, str);
+ mrb_str_check_dummy_enc(mrb, enc);
+#endif //INCLUDE_ENCODING
+ if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return mrb_nil_value();
+ s = RSTRING_PTR(str); send = RSTRING_END(str);
+#ifdef INCLUDE_ENCODING
+ c = mrb_enc_codepoint_len(mrb, s, send, &n, enc);
+ if (mrb_enc_islower(c, enc)) {
+ mrb_enc_mbcput(mrb_enc_toupper(c, enc), s, enc);
+ modify = 1;
+ }
+ s += n;
+ while (s < send) {
+ c = mrb_enc_codepoint_len(mrb, s, send, &n, enc);
+ if (mrb_enc_isupper(c, enc)) {
+ mrb_enc_mbcput(mrb_enc_tolower(c, enc), s, enc);
+ modify = 1;
+ }
+ s += n;
+ }
+#else
+ if (ISLOWER(*s)) {
+ *s = toupper(*s);
+ modify = 1;
+ }
+ while (++s < send) {
+ if (ISUPPER(*s)) {
+ *s = tolower(*s);
+ modify = 1;
+ }
+ }
+#endif //INCLUDE_ENCODING
+ if (modify) return str;
+ return mrb_nil_value();
+}
+
+/* 15.2.10.5.7 */
+/*
+ * call-seq:
+ * str.capitalize => new_str
+ *
+ * Returns a copy of <i>str</i> with the first character converted to uppercase
+ * and the remainder to lowercase.
+ *
+ * "hello".capitalize #=> "Hello"
+ * "HELLO".capitalize #=> "Hello"
+ * "123ABC".capitalize #=> "123abc"
+ */
+static mrb_value
+mrb_str_capitalize(mrb_state *mrb, mrb_value self)
+{
+ mrb_value str;
+
+ str = mrb_str_dup(mrb, self);
+ mrb_str_capitalize_bang(mrb, str);
+ return str;
+}
+
+/* 15.2.10.5.10 */
+/*
+ * call-seq:
+ * str.chomp!(separator=$/) => str or nil
+ *
+ * Modifies <i>str</i> in place as described for <code>String#chomp</code>,
+ * returning <i>str</i>, or <code>nil</code> if no modifications were made.
+ */
+static mrb_value
+mrb_str_chomp_bang(mrb_state *mrb, mrb_value str)
+{
+ mrb_value *argv;
+ int argc;
+#ifdef INCLUDE_ENCODING
+ mrb_encoding *enc;
+#endif //INCLUDE_ENCODING
+ mrb_value rs;
+ mrb_int newline;
+ char *p, *pp, *e;
+ long len, rslen;
+
+ str_modify_keep_cr(mrb, str);
+ len = RSTRING_LEN(str);
+ if (len == 0) return mrb_nil_value();
+ p = RSTRING_PTR(str);
+ e = p + len;
+ //if (mrb_scan_args(argc, argv, "01", &rs) == 0) {
+ mrb_get_args(mrb, "*", &argv, &argc);
+ if (argc == 0) {
+ rs = mrb_str_new2(mrb, "\n");
+smart_chomp:
+#ifdef INCLUDE_ENCODING
+ enc = mrb_enc_get(mrb, str);
+ if (mrb_enc_mbminlen(enc) > 1) {
+ pp = mrb_enc_left_char_head(p, e-mrb_enc_mbminlen(enc), e, enc);
+ if (mrb_enc_is_newline(pp, e, enc)) {
+ e = pp;
+ }
+ pp = e - mrb_enc_mbminlen(enc);
+ if (pp >= p) {
+ pp = mrb_enc_left_char_head(p, pp, e, enc);
+ if (mrb_enc_ascget(mrb, pp, e, 0, enc) == '\r') {
+ e = pp;
+ }
+ }
+ if (e == RSTRING_END(str)) {
+ return mrb_nil_value();
+ }
+ len = e - RSTRING_PTR(str);
+ STR_SET_LEN(str, len);
+ }
+ else {
+#endif //INCLUDE_ENCODING
+ if (RSTRING_PTR(str)[len-1] == '\n') {
+ STR_DEC_LEN(str);
+ if (RSTRING_LEN(str) > 0 &&
+ RSTRING_PTR(str)[RSTRING_LEN(str)-1] == '\r') {
+ STR_DEC_LEN(str);
+ }
+ }
+ else if (RSTRING_PTR(str)[len-1] == '\r') {
+ STR_DEC_LEN(str);
+ }
+ else {
+ return mrb_nil_value();
+ }
+#ifdef INCLUDE_ENCODING
+ }
+#endif //INCLUDE_ENCODING
+ RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
+ return str;
+ }
+ rs = argv[0];
+ if (mrb_nil_p(rs)) return mrb_nil_value();
+ //StringValue(rs);
+ mrb_string_value(mrb, &rs);
+ rslen = RSTRING_LEN(rs);
+ if (rslen == 0) {
+ while (len>0 && p[len-1] == '\n') {
+ len--;
+ if (len>0 && p[len-1] == '\r')
+ len--;
+ }
+ if (len < RSTRING_LEN(str)) {
+ STR_SET_LEN(str, len);
+ RSTRING_PTR(str)[len] = '\0';
+ return str;
+ }
+ return mrb_nil_value();
+ }
+ if (rslen > len) return mrb_nil_value();
+ newline = RSTRING_PTR(rs)[rslen-1];
+ if (rslen == 1 && newline == '\n')
+ goto smart_chomp;
+
+#ifdef INCLUDE_ENCODING
+ enc = mrb_enc_check(mrb, str, rs);
+ if (is_broken_string(mrb, rs)) {
+ return mrb_nil_value();
+ }
+ pp = e - rslen;
+#else
+ pp = p + len - rslen;
+#endif //INCLUDE_ENCODING
+ if (p[len-1] == newline &&
+ (rslen <= 1 ||
+ memcmp(RSTRING_PTR(rs), pp, rslen) == 0)) {
+#ifdef INCLUDE_ENCODING
+ if (mrb_enc_left_char_head(p, pp, e, enc) != pp)
+ return mrb_nil_value();
+ if (ENC_CODERANGE(str) != ENC_CODERANGE_7BIT) {
+ ENC_CODERANGE_CLEAR(str);
+ }
+#endif //INCLUDE_ENCODING
+ STR_SET_LEN(str, RSTRING_LEN(str) - rslen);
+ RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
+ return str;
+ }
+ return mrb_nil_value();
+}
+
+/* 15.2.10.5.9 */
+/*
+ * call-seq:
+ * str.chomp(separator=$/) => new_str
+ *
+ * Returns a new <code>String</code> with the given record separator removed
+ * from the end of <i>str</i> (if present). If <code>$/</code> has not been
+ * changed from the default Ruby record separator, then <code>chomp</code> also
+ * removes carriage return characters (that is it will remove <code>\n</code>,
+ * <code>\r</code>, and <code>\r\n</code>).
+ *
+ * "hello".chomp #=> "hello"
+ * "hello\n".chomp #=> "hello"
+ * "hello\r\n".chomp #=> "hello"
+ * "hello\n\r".chomp #=> "hello\n"
+ * "hello\r".chomp #=> "hello"
+ * "hello \n there".chomp #=> "hello \n there"
+ * "hello".chomp("llo") #=> "he"
+ */
+static mrb_value
+mrb_str_chomp(mrb_state *mrb, mrb_value self)
+{
+ mrb_value str;
+
+ str = mrb_str_dup(mrb, self);
+ mrb_str_chomp_bang(mrb, str);
+ return str;
+}
+
+#ifdef INCLUDE_ENCODING
+static long
+chopped_length(mrb_state *mrb, mrb_value str)
+{
+ mrb_encoding *enc = STR_ENC_GET(mrb, str);
+ const char *p, *p2, *beg, *end;
+
+ beg = RSTRING_PTR(str);
+ end = beg + RSTRING_LEN(str);
+ if (beg > end) return 0;
+ p = mrb_enc_prev_char(beg, end, end, enc);
+ if (!p) return 0;
+ if (p > beg && mrb_enc_ascget(mrb, p, end, 0, enc) == '\n') {
+ p2 = mrb_enc_prev_char(beg, p, end, enc);
+ if (p2 && mrb_enc_ascget(mrb, p2, end, 0, enc) == '\r') p = p2;
+ }
+ return p - beg;
+}
+#endif //INCLUDE_ENCODING
+
+/* 15.2.10.5.12 */
+/*
+ * call-seq:
+ * str.chop! => str or nil
+ *
+ * Processes <i>str</i> as for <code>String#chop</code>, returning <i>str</i>,
+ * or <code>nil</code> if <i>str</i> is the empty string. See also
+ * <code>String#chomp!</code>.
+ */
+static mrb_value
+mrb_str_chop_bang(mrb_state *mrb, mrb_value str)
+{
+ str_modify_keep_cr(mrb, str);
+ if (RSTRING_LEN(str) > 0) {
+#ifdef INCLUDE_ENCODING
+ long len;
+ len = chopped_length(mrb, str);
+#else
+ size_t len;
+ len = RSTRING_LEN(str) - 1;
+ if (RSTRING_PTR(str)[len] == '\n') {
+ if (len > 0 &&
+ RSTRING_PTR(str)[len-1] == '\r') {
+ len--;
+ }
+ }
+#endif //INCLUDE_ENCODING
+ STR_SET_LEN(str, len);
+ RSTRING_PTR(str)[len] = '\0';
+#ifdef INCLUDE_ENCODING
+ if (ENC_CODERANGE(str) != ENC_CODERANGE_7BIT) {
+ ENC_CODERANGE_CLEAR(str);
+ }
+#endif //INCLUDE_ENCODING
+ return str;
+ }
+ return mrb_nil_value();
+}
+
+/* 15.2.10.5.11 */
+/*
+ * call-seq:
+ * str.chop => new_str
+ *
+ * Returns a new <code>String</code> with the last character removed. If the
+ * string ends with <code>\r\n</code>, both characters are removed. Applying
+ * <code>chop</code> to an empty string returns an empty
+ * string. <code>String#chomp</code> is often a safer alternative, as it leaves
+ * the string unchanged if it doesn't end in a record separator.
+ *
+ * "string\r\n".chop #=> "string"
+ * "string\n\r".chop #=> "string\n"
+ * "string\n".chop #=> "string"
+ * "string".chop #=> "strin"
+ * "x".chop #=> ""
+ */
+static mrb_value
+mrb_str_chop(mrb_state *mrb, mrb_value self)
+{
+ mrb_value str;
+#ifdef INCLUDE_ENCODING
+ str = mrb_str_new5(mrb, self, RSTRING_PTR(self), chopped_length(mrb, self));
+ mrb_enc_cr_str_copy_for_substr(mrb, str, self);
+#else
+ str = mrb_str_dup(mrb, self);
+ mrb_str_chop_bang(mrb, str);
+#endif //INCLUDE_ENCODING
+ return str;
+}
+
+/* 15.2.10.5.14 */
+/*
+ * call-seq:
+ * str.downcase! => str or nil
+ *
+ * Downcases the contents of <i>str</i>, returning <code>nil</code> if no
+ * changes were made.
+ */
+static mrb_value
+mrb_str_downcase_bang(mrb_state *mrb, mrb_value str)
+{
+#ifdef INCLUDE_ENCODING
+ mrb_encoding *enc;
+#endif //INCLUDE_ENCODING
+ char *s, *send;
+ int modify = 0;
+
+ str_modify_keep_cr(mrb, str);
+#ifdef INCLUDE_ENCODING
+ enc = STR_ENC_GET(mrb, str);
+ mrb_str_check_dummy_enc(mrb, enc);
+#endif //INCLUDE_ENCODING
+ s = RSTRING_PTR(str); send = RSTRING_END(str);
+#ifdef INCLUDE_ENCODING
+ if (single_byte_optimizable(mrb, str)) {
+#endif //INCLUDE_ENCODING
+ while (s < send) {
+ unsigned int c = *(unsigned char*)s;
+
+#ifdef INCLUDE_ENCODING
+ if (mrb_enc_isascii(c, enc) && 'A' <= c && c <= 'Z') {
+#else
+ if ('A' <= c && c <= 'Z') {
+#endif //INCLUDE_ENCODING
+ *s = 'a' + (c - 'A');
+ modify = 1;
+ }
+ s++;
+ }
+#ifdef INCLUDE_ENCODING
+ }
+ else {
+ int ascompat = mrb_enc_asciicompat(mrb, enc);
+
+ while (s < send) {
+ unsigned int c;
+ int n;
+
+ if (ascompat && (c = *(unsigned char*)s) < 0x80) {
+ if (mrb_enc_isascii(c, enc) && 'A' <= c && c <= 'Z') {
+ *s = 'a' + (c - 'A');
+ modify = 1;
+ }
+ s++;
+ }
+ else {
+ c = mrb_enc_codepoint_len(mrb, s, send, &n, enc);
+ if (mrb_enc_isupper(c, enc)) {
+ /* assuming toupper returns codepoint with same size */
+ mrb_enc_mbcput(mrb_enc_tolower(c, enc), s, enc);
+ modify = 1;
+ }
+ s += n;
+ }
+ }
+ }
+#endif //INCLUDE_ENCODING
+ if (modify) return str;
+ return mrb_nil_value();
+}
+
+/* 15.2.10.5.13 */
+/*
+ * call-seq:
+ * str.downcase => new_str
+ *
+ * Returns a copy of <i>str</i> with all uppercase letters replaced with their
+ * lowercase counterparts. The operation is locale insensitive---only
+ * characters ``A'' to ``Z'' are affected.
+ *
+ * "hEllO".downcase #=> "hello"
+ */
+static mrb_value
+mrb_str_downcase(mrb_state *mrb, mrb_value self)
+{
+ mrb_value str;
+
+ str = mrb_str_dup(mrb, self);
+ mrb_str_downcase_bang(mrb, str);
+ return str;
+}
+
+/* 15.2.10.5.15 */
+/*
+ * call-seq:
+ * str.each(separator=$/) {|substr| block } => str
+ * str.each_line(separator=$/) {|substr| block } => str
+ *
+ * Splits <i>str</i> using the supplied parameter as the record separator
+ * (<code>$/</code> by default), passing each substring in turn to the supplied
+ * block. If a zero-length record separator is supplied, the string is split
+ * into paragraphs delimited by multiple successive newlines.
+ *
+ * print "Example one\n"
+ * "hello\nworld".each {|s| p s}
+ * print "Example two\n"
+ * "hello\nworld".each('l') {|s| p s}
+ * print "Example three\n"
+ * "hello\n\n\nworld".each('') {|s| p s}
+ *
+ * <em>produces:</em>
+ *
+ * Example one
+ * "hello\n"
+ * "world"
+ * Example two
+ * "hel"
+ * "l"
+ * "o\nworl"
+ * "d"
+ * Example three
+ * "hello\n\n\n"
+ * "world"
+ */
+static mrb_value
+mrb_str_each_line(mrb_state *mrb, mrb_value str)
+{
+ mrb_value rs;
+ int newline;
+ struct RString *ps = mrb_str_ptr(str);
+ char *p = ps->buf, *pend = p + ps->len, *s;
+ char *ptr = p;
+ long len = ps->len, rslen;
+ mrb_value line;
+ struct RString *prs;
+ mrb_value *argv, b;
+ int argc;
+
+ //if (mrb_scan_args(argc, argv, "01", &rs) == 0) {
+ mrb_get_args(mrb, "&*", &b, &argv, &argc);
+ if (argc > 0) {
+ rs = argv[0];
+ } else {
+ rs = mrb_str_new2(mrb, "\n");
+ }
+ /*RETURN_ENUMERATOR(str, argc, argv);*/
+ if (mrb_nil_p(rs)) {
+ mrb_yield(mrb, b, str);
+ return str;
+ }
+ //StringValue(rs);
+ mrb_string_value(mrb, &rs);
+ prs = mrb_str_ptr(rs);
+ rslen = prs->len;
+ if (rslen == 0) {
+ newline = '\n';
+ }
+ else {
+ newline = prs->buf[rslen-1];
+ }
+
+ for (s = p, p += rslen; p < pend; p++) {
+ if (rslen == 0 && *p == '\n') {
+ if (*++p != '\n') continue;
+ while (*p == '\n') p++;
+ }
+ if (ps->buf < p && p[-1] == newline &&
+ (rslen <= 1 ||
+ memcmp(prs->buf, p-rslen, rslen) == 0)) {
+ line = mrb_str_new5(mrb, str, s, p - s);
+ /*OBJ_INFECT(line, str);*/
+ mrb_yield(mrb, b, line);
+ str_mod_check(mrb, str, ptr, len);
+ s = p;
+ }
+ }
+
+ if (s != pend) {
+ if (p > pend) p = pend;
+ line = mrb_str_new5(mrb, str, s, p - s);
+ /*OBJ_INFECT(line, str);*/
+ mrb_yield(mrb, b, line);
+ }
+
+ return str;
+}
+
+/* 15.2.10.5.16 */
+/*
+ * call-seq:
+ * str.empty? => true or false
+ *
+ * Returns <code>true</code> if <i>str</i> has a length of zero.
+ *
+ * "hello".empty? #=> false
+ * "".empty? #=> true
+ */
+static mrb_value
+mrb_str_empty(mrb_state *mrb, mrb_value self)
+{
+ struct RString *s = mrb_str_ptr(self);
+
+ if (s->len == 0)
+ return mrb_true_value();
+ return mrb_false_value();
+}
+
+/* 15.2.10.5.17 */
+/*
+ * call-seq:
+ * str.eql?(other) => true or false
+ *
+ * Two strings are equal if the have the same length and content.
+ */
+static mrb_value
+mrb_str_eql(mrb_state *mrb, mrb_value self)
+{
+ mrb_value str2;
+
+ mrb_get_args(mrb, "o", &str2);
+ if (mrb_type(str2) != MRB_TT_STRING)
+ return mrb_false_value();
+ if (str_eql(mrb, self, str2))
+ return mrb_true_value();
+ return mrb_false_value();
+}
+
+#ifdef INCLUDE_ENCODING
+static void
+mrb_enc_cr_str_copy_for_substr(mrb_state *mrb, mrb_value dest, mrb_value src)
+{
+ /* this function is designed for copying encoding and coderange
+ * from src to new string "dest" which is made from the part of src.
+ */
+ str_enc_copy(mrb, dest, src);
+ switch (ENC_CODERANGE(src)) {
+ case ENC_CODERANGE_7BIT:
+ ENC_CODERANGE_SET(dest, ENC_CODERANGE_7BIT);
+ break;
+ case ENC_CODERANGE_VALID:
+ if (!mrb_enc_asciicompat(mrb, STR_ENC_GET(mrb, src)) ||
+ search_nonascii(RSTRING_PTR(dest), RSTRING_END(dest)))
+ ENC_CODERANGE_SET(dest, ENC_CODERANGE_VALID);
+ else
+ ENC_CODERANGE_SET(dest, ENC_CODERANGE_7BIT);
+ break;
+ default:
+ if (RSTRING_LEN(dest) == 0) {
+ if (!mrb_enc_asciicompat(mrb, STR_ENC_GET(mrb, src)))
+ ENC_CODERANGE_SET(dest, ENC_CODERANGE_VALID);
+ else
+ ENC_CODERANGE_SET(dest, ENC_CODERANGE_7BIT);
+ }
+ break;
+ }
+}
+#endif //INCLUDE_ENCODING
+
+static mrb_value
+str_replace_shared(mrb_state *mrb, mrb_value str2, mrb_value str)
+{
+ str = mrb_str_new_frozen(mrb, str);
+ RSTRING(str2)->len = RSTRING_LEN(str);
+ RSTRING(str2)->buf = RSTRING_PTR(str);
+ RSTRING_SHARED(str2) = str;
+ FL_SET(str2, MRB_STR_SHARED);
+ mrb_enc_cr_str_exact_copy(mrb, str2, str);
+
+ return str2;
+}
+
+static mrb_value
+str_new_shared(mrb_state *mrb, struct RClass* klass, mrb_value str)
+{
+ return str_replace_shared(mrb, str_alloc(mrb), str);
+}
+
+mrb_value
+str_new3(mrb_state *mrb, struct RClass* klass, mrb_value str)
+{
+ return str_new_shared(mrb, klass, str);
+}
+
+mrb_value
+mrb_str_new_shared(mrb_state *mrb, mrb_value str)
+{
+ mrb_value str2 = str_new3(mrb, mrb_obj_class(mrb, str), str);
+
+ //OBJ_INFECT(str2, str);
+ return str2;
+}
+
+mrb_value
+mrb_str_new_frozen(mrb_state *mrb, mrb_value orig)
+{
+ struct RClass* klass;
+ mrb_value str;
+
+ klass = mrb_obj_class(mrb, orig);
+
+ if (MRB_STR_SHARED_P(orig) && !mrb_nil_p(RSTRING_SHARED(orig))) {
+ long ofs;
+ ofs = RSTRING_LEN(str) - RSTRING_LEN(orig);
+#ifdef INCLUDE_ENCODING
+ if ((ofs > 0) || (klass != RBASIC(str)->c) ||
+ ENCODING_GET(mrb, str) != ENCODING_GET(mrb, orig)) {
+#else
+ if ((ofs > 0) || (klass != RBASIC(str)->c)) {
+#endif //INCLUDE_ENCODING
+ str = str_new3(mrb, klass, str);
+ RSTRING_PTR(str) += ofs;
+ RSTRING_LEN(str) -= ofs;
+ mrb_enc_cr_str_exact_copy(mrb, str, orig);
+ }
+ }
+ else {
+ str = str_new4(mrb, orig.tt, orig);
+ }
+ return str;
+}
+
+mrb_value
+mrb_str_drop_bytes(mrb_state *mrb, mrb_value str, long len)
+{
+ char *ptr = RSTRING_PTR(str);
+ long olen = RSTRING_LEN(str), nlen;
+
+ str_modifiable(str);
+ if (len > olen) len = olen;
+ nlen = olen - len;
+ if (!MRB_STR_SHARED_P(str)) mrb_str_new4(mrb, str);
+ ptr = RSTRING(str)->buf += len;
+ RSTRING(str)->len = nlen;
+ ptr[nlen] = 0;
+ //ENC_CODERANGE_CLEAR(str);
+ return str;
+}
+
+mrb_value
+mrb_str_subseq(mrb_state *mrb, mrb_value str, long beg, long len)
+{
+ mrb_value str2;
+ if (RSTRING_LEN(str) == beg + len &&
+ STR_BUF_MIN_SIZE < len) {
+ str2 = mrb_str_new_shared(mrb, mrb_str_new_frozen(mrb, str));
+ mrb_str_drop_bytes(mrb, str2, beg);
+ }
+ else {
+ str2 = mrb_str_new5(mrb, str, RSTRING_PTR(str)+beg, len);
+ }
+ mrb_enc_cr_str_copy_for_substr(mrb, str2, str);
+
+ return str2;
+}
+
+#ifdef INCLUDE_ENCODING
+int
+mrb_enc_str_asciionly_p(mrb_state *mrb, mrb_value str)
+{
+ mrb_encoding *enc = STR_ENC_GET(mrb, str);
+
+ if (!mrb_enc_asciicompat(mrb, enc))
+ return 0/*FALSE*/;
+ else if (mrb_enc_str_coderange(mrb, str) == ENC_CODERANGE_7BIT)
+ return 1/*TRUE*/;
+ return 0/*FALSE*/;
+}
+
+static mrb_value
+mrb_enc_cr_str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, long len,
+ int ptr_encindex, int ptr_cr, int *ptr_cr_ret)
+{
+ int str_encindex = ENCODING_GET(mrb, str);
+ int res_encindex;
+ int str_cr, res_cr;
+ int str_a8 = ENCODING_IS_ASCII8BIT(str);
+ int ptr_a8 = ptr_encindex == 0;
+
+ str_cr = ENC_CODERANGE(str);
+
+ if (str_encindex == ptr_encindex) {
+ if (str_cr == ENC_CODERANGE_UNKNOWN ||
+ (ptr_a8 && str_cr != ENC_CODERANGE_7BIT)) {
+ ptr_cr = ENC_CODERANGE_UNKNOWN;
+ }
+ else if (ptr_cr == ENC_CODERANGE_UNKNOWN) {
+ ptr_cr = coderange_scan(ptr, len, mrb_enc_from_index(mrb, ptr_encindex));
+ }
+ }
+ else {
+ mrb_encoding *str_enc = mrb_enc_from_index(mrb, str_encindex);
+ mrb_encoding *ptr_enc = mrb_enc_from_index(mrb, ptr_encindex);
+ if (!mrb_enc_asciicompat(mrb, str_enc) || !mrb_enc_asciicompat(mrb, ptr_enc)) {
+ if (len == 0)
+ return str;
+ if (RSTRING_LEN(str) == 0) {
+ mrb_str_buf_cat(mrb, str, ptr, len);
+ ENCODING_CODERANGE_SET(mrb, str, ptr_encindex, ptr_cr);
+ return str;
+ }
+ goto incompatible;
+ }
+ if (ptr_cr == ENC_CODERANGE_UNKNOWN) {
+ ptr_cr = coderange_scan(ptr, len, ptr_enc);
+ }
+ if (str_cr == ENC_CODERANGE_UNKNOWN) {
+ if (str_a8 || ptr_cr != ENC_CODERANGE_7BIT) {
+ str_cr = mrb_enc_str_coderange(mrb, str);
+ }
+ }
+ }
+ if (ptr_cr_ret)
+ *ptr_cr_ret = ptr_cr;
+
+ if (str_encindex != ptr_encindex &&
+ str_cr != ENC_CODERANGE_7BIT &&
+ ptr_cr != ENC_CODERANGE_7BIT) {
+incompatible:
+ mrb_raise(mrb, E_ENCODING_ERROR, "incompatible character encodings: %s and %s",
+ mrb_enc_name(mrb_enc_from_index(mrb, str_encindex)),
+ mrb_enc_name(mrb_enc_from_index(mrb, ptr_encindex)));
+ }
+
+ if (str_cr == ENC_CODERANGE_UNKNOWN) {
+ res_encindex = str_encindex;
+ res_cr = ENC_CODERANGE_UNKNOWN;
+ }
+ else if (str_cr == ENC_CODERANGE_7BIT) {
+ if (ptr_cr == ENC_CODERANGE_7BIT) {
+ res_encindex = !str_a8 ? str_encindex : ptr_encindex;
+ res_cr = ENC_CODERANGE_7BIT;
+ }
+ else {
+ res_encindex = ptr_encindex;
+ res_cr = ptr_cr;
+ }
+ }
+ else if (str_cr == ENC_CODERANGE_VALID) {
+ res_encindex = str_encindex;
+ if (ptr_cr == ENC_CODERANGE_7BIT || ptr_cr == ENC_CODERANGE_VALID)
+ res_cr = str_cr;
+ else
+ res_cr = ptr_cr;
+ }
+ else { /* str_cr == ENC_CODERANGE_BROKEN */
+ res_encindex = str_encindex;
+ res_cr = str_cr;
+ if (0 < len) res_cr = ENC_CODERANGE_UNKNOWN;
+ }
+
+ if (len < 0) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "negative string size (or size too big)");
+ }
+ str_buf_cat(mrb, str, ptr, len);
+ ENCODING_CODERANGE_SET(mrb, str, res_encindex, res_cr);
+ return str;
+}
+
+mrb_value
+mrb_enc_str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, long len, mrb_encoding *ptr_enc)
+{
+ return mrb_enc_cr_str_buf_cat(mrb, str, ptr, len,
+ mrb_enc_to_index(ptr_enc), ENC_CODERANGE_UNKNOWN, NULL);
+}
+
+mrb_value
+mrb_str_buf_append(mrb_state *mrb, mrb_value str, mrb_value str2)
+{
+ int str2_cr;
+
+ str2_cr = ENC_CODERANGE(str2);
+
+ mrb_enc_cr_str_buf_cat(mrb, str, RSTRING_PTR(str2), RSTRING_LEN(str2),
+ ENCODING_GET(mrb, str2), str2_cr, &str2_cr);
+
+ //OBJ_INFECT(str, str2);
+ ENC_CODERANGE_SET(str2, str2_cr);
+
+ return str;
+}
+#endif //INCLUDE_ENCODING
+
+static inline void
+str_discard(mrb_state *mrb, mrb_value str)
+{
+ str_modifiable(str);
+ if (!MRB_STR_SHARED_P(str)) {
+ mrb_free(mrb, RSTRING_PTR(str));
+ RSTRING(str)->buf = 0;
+ RSTRING(str)->len = 0;
+ }
+}
+
+void
+mrb_str_shared_replace(mrb_state *mrb, mrb_value str, mrb_value str2)
+{
+#ifdef INCLUDE_ENCODING
+ mrb_encoding *enc;
+ int cr;
+#endif //INCLUDE_ENCODING
+
+ if (mrb_obj_equal(mrb, str, str2)) return;
+#ifdef INCLUDE_ENCODING
+ enc = STR_ENC_GET(mrb, str2);
+ cr = ENC_CODERANGE(str2);
+#endif //INCLUDE_ENCODING
+ str_discard(mrb, str);
+ MRB_STR_UNSET_NOCAPA(str);
+ RSTRING_PTR(str) = RSTRING_PTR(str2);
+ RSTRING_LEN(str) = RSTRING_LEN(str2);
+ if (MRB_STR_NOCAPA_P(str2)) {
+ FL_SET(str, RBASIC(str2)->flags & MRB_STR_NOCAPA);
+ RSTRING_SHARED(str) = RSTRING_SHARED(str2);
+ }
+ else {
+ RSTRING_CAPA(str) = RSTRING_CAPA(str2);
+ }
+
+ MRB_STR_UNSET_NOCAPA(str2); /* abandon str2 */
+ RSTRING_PTR(str2)[0] = 0;
+ RSTRING_LEN(str2) = 0;
+ mrb_enc_associate(mrb, str, enc);
+ ENC_CODERANGE_SET(str, cr);
+}
+
+#ifdef INCLUDE_REGEXP
+static mrb_value
+str_gsub(mrb_state *mrb, mrb_value str, mrb_int bang)
+{
+ mrb_value *argv;
+ int argc;
+ mrb_value pat, val, repl, match, dest = mrb_nil_value();
+ struct re_registers *regs;
+ mrb_int beg, n;
+ mrb_int beg0, end0;
+ mrb_int offset, blen, slen, len, last;
+ int iter = 0;
+ char *sp, *cp;
+ //int tainted = 0;
+ mrb_encoding *str_enc;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+ switch (argc) {
+ case 1:
+ /*RETURN_ENUMERATOR(str, argc, argv);*/
+ iter = 1;
+ break;
+ case 2:
+ repl = argv[1];
+ mrb_string_value(mrb, &repl);
+ /*if (OBJ_TAINTED(repl)) tainted = 1;*/
+ break;
+ default:
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%d for 2)", argc);
+ }
+
+ pat = get_pat(mrb, argv[0], 1);
+ beg = mrb_reg_search(mrb, pat, str, 0, 0);
+ if (beg < 0) {
+ if (bang) return mrb_nil_value(); /* no match, no substitution */
+ return mrb_str_dup(mrb, str);
+ }
+
+ offset = 0;
+ n = 0;
+ blen = RSTRING_LEN(str) + 30;
+ dest = mrb_str_buf_new(mrb, blen);
+ sp = RSTRING_PTR(str);
+ slen = RSTRING_LEN(str);
+ cp = sp;
+ str_enc = STR_ENC_GET(mrb, str);
+
+ do {
+ n++;
+ match = mrb_backref_get(mrb);
+ regs = RMATCH_REGS(match);
+ beg0 = BEG(0);
+ end0 = END(0);
+ val = mrb_reg_regsub(mrb, repl, str, regs, pat);
+
+ len = beg - offset; /* copy pre-match substr */
+ if (len) {
+ mrb_enc_str_buf_cat(mrb, dest, cp, len, str_enc);
+ }
+
+ mrb_str_buf_append(mrb, dest, val);
+
+ last = offset;
+ offset = end0;
+ if (beg0 == end0) {
+ /*
+ * Always consume at least one character of the input string
+ * in order to prevent infinite loops.
+ */
+ if (RSTRING_LEN(str) <= end0) break;
+ len = mrb_enc_fast_mbclen(RSTRING_PTR(str)+end0, RSTRING_END(str), str_enc);
+ mrb_enc_str_buf_cat(mrb, dest, RSTRING_PTR(str)+end0, len, str_enc);
+ offset = end0 + len;
+ }
+ cp = RSTRING_PTR(str) + offset;
+ if (offset > RSTRING_LEN(str)) break;
+ beg = mrb_reg_search(mrb, pat, str, offset, 0);
+ } while (beg >= 0);
+ if (RSTRING_LEN(str) > offset) {
+ mrb_enc_str_buf_cat(mrb, dest, cp, RSTRING_LEN(str) - offset, str_enc);
+ }
+ mrb_reg_search(mrb, pat, str, last, 0);
+ if (bang) {
+ mrb_str_shared_replace(mrb, str, dest);
+ }
+ else {
+ RBASIC(dest)->c = mrb_obj_class(mrb, str);
+ str = dest;
+ }
+
+ return str;
+}
+
+/* 15.2.10.5.18 */
+/*
+ * call-seq:
+ * str.gsub(pattern, replacement) => new_str
+ * str.gsub(pattern) {|match| block } => new_str
+ *
+ * Returns a copy of <i>str</i> with <em>all</em> occurrences of <i>pattern</i>
+ * replaced with either <i>replacement</i> or the value of the block. The
+ * <i>pattern</i> will typically be a <code>Regexp</code>; if it is a
+ * <code>String</code> then no regular expression metacharacters will be
+ * interpreted (that is <code>/\d/</code> will match a digit, but
+ * <code>'\d'</code> will match a backslash followed by a 'd').
+ *
+ * If a string is used as the replacement, special variables from the match
+ * (such as <code>$&</code> and <code>$1</code>) cannot be substituted into it,
+ * as substitution into the string occurs before the pattern match
+ * starts. However, the sequences <code>\1</code>, <code>\2</code>, and so on
+ * may be used to interpolate successive groups in the match.
+ *
+ * In the block form, the current match string is passed in as a parameter, and
+ * variables such as <code>$1</code>, <code>$2</code>, <code>$`</code>,
+ * <code>$&</code>, and <code>$'</code> will be set appropriately. The value
+ * returned by the block will be substituted for the match on each call.
+ *
+ * The result inherits any tainting in the original string or any supplied
+ * replacement string.
+ *
+ * When neither a block nor a second argument is supplied, an
+ * <code>Enumerator</code> is returned.
+ *
+ * "hello".gsub(/[aeiou]/, '*') #=> "h*ll*"
+ * "hello".gsub(/([aeiou])/, '<\1>') #=> "h<e>ll<o>"
+ * "hello".gsub(/./) {|s| s.ord.to_s + ' '} #=> "104 101 108 108 111 "
+ * "hello".gsub(/(?<foo>[aeiou])/, '{\k<foo>}') #=> "h{e}ll{o}"
+ * 'hello'.gsub(/[eo]/, 'e' => 3, 'o' => '*') #=> "h3ll*"
+ */
+static mrb_value
+mrb_str_gsub(mrb_state *mrb, mrb_value self)
+{
+ //return str_gsub(argc, argv, self, 0);
+ return str_gsub(mrb, self, 0);
+}
+
+/* 15.2.10.5.19 */
+/*
+ * call-seq:
+ * str.gsub!(pattern, replacement) => str or nil
+ * str.gsub!(pattern) {|match| block } => str or nil
+ *
+ * Performs the substitutions of <code>String#gsub</code> in place, returning
+ * <i>str</i>, or <code>nil</code> if no substitutions were performed.
+ */
+static mrb_value
+mrb_str_gsub_bang(mrb_state *mrb, mrb_value self)
+{
+ str_modify_keep_cr(mrb, self);
+ //return str_gsub(argc, argv, self, 1);
+ return str_gsub(mrb, self, 1);
+}
+#endif //INCLUDE_REGEXP
+
+mrb_int
+mrb_str_hash(mrb_state *mrb, mrb_value str)
+{
+ /* 1-8-7 */
+ struct RString *s = mrb_str_ptr(str);
+ long len = s->len;
+ char *p = s->buf;
+ mrb_int key = 0;
+
+ while (len--) {
+ key = key*65599 + *p;
+ p++;
+ }
+ key = key + (key>>5);
+ return key;
+}
+
+/* 15.2.10.5.20 */
+/*
+ * call-seq:
+ * str.hash => fixnum
+ *
+ * Return a hash based on the string's length and content.
+ */
+static mrb_value
+mrb_str_hash_m(mrb_state *mrb, mrb_value self)
+{
+ mrb_int key = mrb_str_hash(mrb, self);
+ return mrb_fixnum_value(key);
+}
+
+/* 15.2.10.5.21 */
+/*
+ * call-seq:
+ * str.include? other_str => true or false
+ * str.include? fixnum => true or false
+ *
+ * Returns <code>true</code> if <i>str</i> contains the given string or
+ * character.
+ *
+ * "hello".include? "lo" #=> true
+ * "hello".include? "ol" #=> false
+ * "hello".include? ?h #=> true
+ */
+static mrb_value
+mrb_str_include(mrb_state *mrb, mrb_value self)
+{
+ mrb_int i;
+ mrb_value str2;
+ mrb_get_args(mrb, "o", &str2);
+
+ if (mrb_type(str2) == MRB_TT_FIXNUM) {
+ if (memchr(RSTRING_PTR(self), mrb_fixnum(str2), RSTRING_LEN(self)))
+ return mrb_true_value();
+ return mrb_false_value();
+ }
+ //StringValue(arg);
+ mrb_string_value(mrb, &str2);
+ i = mrb_str_index(mrb, self, str2, 0);
+
+ if (i == -1) return mrb_false_value();
+ return mrb_true_value();
+}
+
+/* 15.2.10.5.22 */
+/*
+ * call-seq:
+ * str.index(substring [, offset]) => fixnum or nil
+ * str.index(fixnum [, offset]) => fixnum or nil
+ * str.index(regexp [, offset]) => fixnum or nil
+ *
+ * Returns the index of the first occurrence of the given
+ * <i>substring</i>,
+ * character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>.
+ * Returns
+ * <code>nil</code> if not found.
+ * If the second parameter is present, it
+ * specifies the position in the string to begin the search.
+ *
+ * "hello".index('e') #=> 1
+ * "hello".index('lo') #=> 3
+ * "hello".index('a') #=> nil
+ * "hello".index(101) #=> 1(101=0x65='e')
+ * "hello".index(/[aeiou]/, -3) #=> 4
+ */
+static mrb_value
+mrb_str_index_m(mrb_state *mrb, mrb_value str)
+{
+ mrb_value *argv;
+ int argc;
+
+ mrb_value sub;
+ //mrb_value initpos;
+ mrb_int pos;
+
+ //if (mrb_scan_args(argc, argv, "11", &sub, &initpos) == 2) {
+ mrb_get_args(mrb, "*", &argv, &argc);
+ if (argc == 2) {
+ pos = mrb_fixnum(argv[1]);
+ sub = argv[0];
+ }
+ else {
+ pos = 0;
+ if (argc > 0)
+ sub = argv[0];
+ else
+ sub = mrb_nil_value();
+
+ }
+ if (pos < 0) {
+#ifdef INCLUDE_ENCODING
+ pos += str_strlen(mrb, str, STR_ENC_GET(mrb, str));
+#else
+ pos += RSTRING_LEN(str);
+#endif //INCLUDE_ENCODING
+ if (pos < 0) {
+ if (mrb_type(sub) == MRB_TT_REGEX) {
+#ifdef INCLUDE_REGEXP
+ mrb_backref_set(mrb, mrb_nil_value());
+#else
+ mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported");
+#endif //INCLUDE_REGEXP
+ }
+ return mrb_nil_value();
+ }
+ }
+
+ switch (mrb_type(sub)) {
+ case MRB_TT_REGEX:
+#ifdef INCLUDE_REGEXP
+ if (pos > str_strlen(mrb, str, STR_ENC_GET(mrb, str)))
+ return mrb_nil_value();
+ pos = str_offset(mrb, RSTRING_PTR(str), RSTRING_END(str), pos,
+ mrb_enc_check(mrb, str, sub), single_byte_optimizable(mrb, str));
+
+ pos = mrb_reg_search(mrb, sub, str, pos, 0);
+ pos = mrb_str_sublen(mrb, str, pos);
+#else
+ mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported");
+#endif //INCLUDE_REGEXP
+ break;
+
+ case MRB_TT_FIXNUM: {
+ int c = mrb_fixnum(sub);
+ long len = RSTRING_LEN(str);
+ unsigned char *p = (unsigned char*)RSTRING_PTR(str);
+
+ for (;pos<len;pos++) {
+ if (p[pos] == c) return mrb_fixnum_value(pos);
+ }
+ return mrb_nil_value();
+ }
+
+ default: {
+ mrb_value tmp;
+
+ tmp = mrb_check_string_type(mrb, sub);
+ if (mrb_nil_p(tmp)) {
+ mrb_raise(mrb, E_TYPE_ERROR, "type mismatch: %s given",
+ mrb_obj_classname(mrb, sub));
+ }
+ sub = tmp;
+ }
+ /* fall through */
+ case MRB_TT_STRING:
+ pos = mrb_str_index(mrb, str, sub, pos);
+#ifdef INCLUDE_ENCODING
+ pos = mrb_str_sublen(mrb, str, pos);
+#endif //INCLUDE_ENCODING
+ break;
+ }
+
+ if (pos == -1) return mrb_nil_value();
+ return mrb_fixnum_value(pos);
+}
+
+static mrb_value
+str_replace(mrb_state *mrb, mrb_value str, mrb_value str2)
+{
+ long len;
+
+ len = RSTRING_LEN(str2);
+ if (MRB_STR_SHARED_P(str2)) {
+ mrb_value shared = RSTRING_SHARED(str2);
+ RSTRING_LEN(str) = len;
+ RSTRING_PTR(str) = RSTRING_PTR(str2);
+ FL_SET(str, MRB_STR_SHARED);
+ RSTRING_SHARED(str) = shared;
+ }
+ else {
+ str_replace_shared(mrb, str, str2);
+ }
+
+ mrb_enc_cr_str_exact_copy(mrb, str, str2);
+ return str;
+}
+
+/* 15.2.10.5.24 */
+/* 15.2.10.5.28 */
+/*
+ * call-seq:
+ * str.replace(other_str) => str
+ *
+ * Replaces the contents and taintedness of <i>str</i> with the corresponding
+ * values in <i>other_str</i>.
+ *
+ * s = "hello" #=> "hello"
+ * s.replace "world" #=> "world"
+ */
+static mrb_value
+mrb_str_replace(mrb_state *mrb, mrb_value str)
+{
+ mrb_value str2;
+
+ mrb_get_args(mrb, "o", &str2);
+ str_modifiable(str);
+ if (mrb_obj_equal(mrb, str, str2)) return str;
+
+ //StringValue(str2);
+ mrb_string_value(mrb, &str2);
+ //str_discard(str);
+ return str_replace(mrb, str, str2);
+}
+
+/* 15.2.10.5.23 */
+/*
+ * call-seq:
+ * String.new(str="") => new_str
+ *
+ * Returns a new string object containing a copy of <i>str</i>.
+ */
+static mrb_value
+mrb_str_init(mrb_state *mrb, mrb_value self)
+{
+ //mrb_value orig;
+ mrb_value *argv;
+ int argc;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+ if (argc == 1)
+ mrb_str_replace(mrb, self);
+ return self;
+}
+
+#ifdef INCLUDE_ENCODING
+mrb_sym
+mrb_intern3(mrb_state *mrb, const char *name, long len, mrb_encoding *enc)
+{
+ return mrb_intern(mrb, name);
+}
+#endif //INCLUDE_ENCODING
+
+mrb_sym
+mrb_intern_str(mrb_state *mrb, mrb_value str)
+{
+ mrb_sym id;
+#ifdef INCLUDE_ENCODING
+ mrb_encoding *enc;
+
+ if (mrb_enc_str_coderange(mrb, str) == ENC_CODERANGE_7BIT) {
+ enc = mrb_usascii_encoding(mrb);
+ }
+ else {
+ enc = mrb_enc_get(mrb, str);
+ }
+ id = mrb_intern3(mrb, RSTRING_PTR(str), RSTRING_LEN(str), enc);
+#else
+ id = mrb_intern(mrb, RSTRING_PTR(str));
+#endif //INCLUDE_ENCODING
+ str = RB_GC_GUARD(str);
+ return id;
+}
+
+/* 15.2.10.5.25 */
+/* 15.2.10.5.41 */
+/*
+ * call-seq:
+ * str.intern => symbol
+ * str.to_sym => symbol
+ *
+ * Returns the <code>Symbol</code> corresponding to <i>str</i>, creating the
+ * symbol if it did not previously exist. See <code>Symbol#id2name</code>.
+ *
+ * "Koala".intern #=> :Koala
+ * s = 'cat'.to_sym #=> :cat
+ * s == :cat #=> true
+ * s = '@cat'.to_sym #=> :@cat
+ * s == :@cat #=> true
+ *
+ * This can also be used to create symbols that cannot be represented using the
+ * <code>:xxx</code> notation.
+ *
+ * 'cat and dog'.to_sym #=> :"cat and dog"
+ */
+mrb_value
+mrb_str_intern(mrb_state *mrb, mrb_value self)
+{
+ mrb_sym id;
+ mrb_value str = RB_GC_GUARD(self);
+
+ id = mrb_intern_str(mrb, str);
+ return mrb_symbol_value(id);
+
+}
+/* ---------------------------------- */
+mrb_value
+mrb_obj_as_string(mrb_state *mrb, mrb_value obj)
+{
+ mrb_value str;
+
+ if (mrb_type(obj) == MRB_TT_STRING) {
+ return obj;
+ }
+ str = mrb_funcall(mrb, obj, "to_s", 0);
+ if (mrb_type(str) != MRB_TT_STRING)
+ return mrb_any_to_s(mrb, obj);
+ return str;
+}
+
+mrb_value
+mrb_check_string_type(mrb_state *mrb, mrb_value str)
+{
+ return mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str");
+}
+
+#ifdef INCLUDE_REGEXP
+static mrb_value
+get_pat(mrb_state *mrb, mrb_value pat, mrb_int quote)
+{
+ mrb_value val;
+
+ switch (mrb_type(pat)) {
+ case MRB_TT_REGEX:
+ return pat;
+
+ case MRB_TT_STRING:
+ break;
+
+ default:
+ val = mrb_check_string_type(mrb, pat);
+ if (mrb_nil_p(val)) {
+ //Check_Type(pat, T_REGEXP);
+ mrb_check_type(mrb, pat, MRB_TT_REGEX);
+ }
+ pat = val;
+ }
+
+ if (quote) {
+ pat = mrb_reg_quote(mrb, pat);
+ }
+
+ return mrb_reg_regcomp(mrb, pat);
+}
+#endif //INCLUDE_REGEXP
+
+/* 15.2.10.5.27 */
+/*
+ * call-seq:
+ * str.match(pattern) => matchdata or nil
+ *
+ * Converts <i>pattern</i> to a <code>Regexp</code> (if it isn't already one),
+ * then invokes its <code>match</code> method on <i>str</i>.
+ *
+ * 'hello'.match('(.)\1') #=> #<MatchData:0x401b3d30>
+ * 'hello'.match('(.)\1')[0] #=> "ll"
+ * 'hello'.match(/(.)\1/)[0] #=> "ll"
+ * 'hello'.match('xx') #=> nil
+ */
+#ifdef INCLUDE_REGEXP
+static mrb_value
+mrb_str_match_m(mrb_state *mrb, mrb_value self)
+{
+ mrb_value *argv;
+ int argc;
+ mrb_value re, result, b;
+ mrb_get_args(mrb, "&*", &b, &argv, &argc);
+ if (argc < 1)
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%d for 1..2)", argc);
+ re = argv[0];
+ argv[0] = self;
+ // result = mrb_funcall2(get_pat(re, 0), mrb_intern("match"), argc, argv);
+ result = mrb_funcall(mrb, get_pat(mrb, re, 0), "match", 1, self);
+ if (!mrb_nil_p(result) && mrb_block_given_p()) {
+ return mrb_yield(mrb, b, result);
+ }
+ return result;
+}
+#endif //INCLUDE_REGEXP
+
+/* ---------------------------------- */
+/* 15.2.10.5.29 */
+/*
+ * call-seq:
+ * str.reverse => new_str
+ *
+ * Returns a new string with the characters from <i>str</i> in reverse order.
+ *
+ * "stressed".reverse #=> "desserts"
+ */
+static mrb_value
+mrb_str_reverse(mrb_state *mrb, mrb_value str)
+{
+#ifdef INCLUDE_ENCODING
+ mrb_encoding *enc;
+#endif //INCLUDE_ENCODING
+ mrb_value rev;
+ char *s, *e, *p;
+#ifdef INCLUDE_ENCODING
+ int single = 1;
+#endif //INCLUDE_ENCODING
+
+ if (RSTRING_LEN(str) <= 1) return mrb_str_dup(mrb, str);
+#ifdef INCLUDE_ENCODING
+ enc = STR_ENC_GET(mrb, str);
+#endif //INCLUDE_ENCODING
+ rev = mrb_str_new5(mrb, str, 0, RSTRING_LEN(str));
+ s = RSTRING_PTR(str); e = RSTRING_END(str);
+ p = RSTRING_END(rev);
+
+ if (RSTRING_LEN(str) > 1) {
+#ifdef INCLUDE_ENCODING
+ if (single_byte_optimizable(mrb, str)) {
+#endif //INCLUDE_ENCODING
+ while (s < e) {
+ *--p = *s++;
+ }
+#ifdef INCLUDE_ENCODING
+ }
+ else if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID) {
+ while (s < e) {
+ int clen = mrb_enc_fast_mbclen(s, e, enc);
+
+ if (clen > 1 || (*s & 0x80)) single = 0;
+ p -= clen;
+ memcpy(p, s, clen);
+ s += clen;
+ }
+ }
+ else {
+ while (s < e) {
+ int clen = mrb_enc_mbclen(s, e, enc);
+
+ if (clen > 1 || (*s & 0x80)) single = 0;
+ p -= clen;
+ memcpy(p, s, clen);
+ s += clen;
+ }
+ }
+ }
+ STR_SET_LEN(rev, RSTRING_LEN(str));
+ if (ENC_CODERANGE(str) == ENC_CODERANGE_UNKNOWN) {
+ if (single) {
+ ENC_CODERANGE_SET(str, ENC_CODERANGE_7BIT);
+ }
+ else {
+ ENC_CODERANGE_SET(str, ENC_CODERANGE_VALID);
+ }
+#endif //INCLUDE_ENCODING
+ }
+ mrb_enc_cr_str_copy_for_substr(mrb, rev, str);
+
+ return rev;
+}
+
+/* 15.2.10.5.30 */
+/*
+ * call-seq:
+ * str.reverse! => str
+ *
+ * Reverses <i>str</i> in place.
+ */
+static mrb_value
+mrb_str_reverse_bang(mrb_state *mrb, mrb_value str)
+{
+#ifdef INCLUDE_ENCODING
+ if (RSTRING_LEN(str) > 1) {
+ if (single_byte_optimizable(mrb, str)) {
+#endif //INCLUDE_ENCODING
+ char *s, *e, c;
+ str_modify_keep_cr(mrb, str);
+ s = RSTRING_PTR(str);
+ e = RSTRING_END(str) - 1;
+ while (s < e) {
+ c = *s;
+ *s++ = *e;
+ *e-- = c;
+ }
+#ifdef INCLUDE_ENCODING
+ }
+ else {
+ mrb_str_shared_replace(mrb, str, mrb_str_reverse(mrb, str));
+ }
+ }
+ else {
+ str_modify_keep_cr(mrb, str);
+ }
+#endif //INCLUDE_ENCODING
+ return str;
+}
+
+/*
+ * call-seq:
+ * str.rindex(substring [, fixnum]) => fixnum or nil
+ * str.rindex(fixnum [, fixnum]) => fixnum or nil
+ * str.rindex(regexp [, fixnum]) => fixnum or nil
+ *
+ * Returns the index of the last occurrence of the given <i>substring</i>,
+ * character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>. Returns
+ * <code>nil</code> if not found. If the second parameter is present, it
+ * specifies the position in the string to end the search---characters beyond
+ * this point will not be considered.
+ *
+ * "hello".rindex('e') #=> 1
+ * "hello".rindex('l') #=> 3
+ * "hello".rindex('a') #=> nil
+ * "hello".rindex(101) #=> 1
+ * "hello".rindex(/[aeiou]/, -2) #=> 1
+ */
+static mrb_int
+mrb_str_rindex(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos)
+{
+ char *s, *sbeg, *t;
+ struct RString *ps = mrb_str_ptr(str);
+ struct RString *psub = mrb_str_ptr(sub);
+ long len = psub->len;
+
+ /* substring longer than string */
+ if (ps->len < len) return -1;
+ if (ps->len - pos < len) {
+ pos = ps->len - len;
+ }
+ sbeg = ps->buf;
+ s = ps->buf + pos;
+ t = psub->buf;
+ if (len) {
+ while (sbeg <= s) {
+ if (memcmp(s, t, len) == 0) {
+ return s - ps->buf;
+ }
+ s--;
+ }
+ return -1;
+ }
+ else {
+ return pos;
+ }
+}
+
+#ifdef INCLUDE_ENCODING
+/* byte offset to char offset */
+size_t
+mrb_str_sublen(mrb_state *mrb, mrb_value str, long pos)
+{
+ if (single_byte_optimizable(mrb, str) || pos < 0)
+ return pos;
+ else {
+ char *p = RSTRING_PTR(str);
+ return enc_strlen(p, p + pos, STR_ENC_GET(mrb, str), ENC_CODERANGE(str));
+ }
+}
+#endif //INCLUDE_ENCODING
+
+/* 15.2.10.5.31 */
+/*
+ * call-seq:
+ * str.rindex(substring [, fixnum]) => fixnum or nil
+ * str.rindex(fixnum [, fixnum]) => fixnum or nil
+ * str.rindex(regexp [, fixnum]) => fixnum or nil
+ *
+ * Returns the index of the last occurrence of the given <i>substring</i>,
+ * character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>. Returns
+ * <code>nil</code> if not found. If the second parameter is present, it
+ * specifies the position in the string to end the search---characters beyond
+ * this point will not be considered.
+ *
+ * "hello".rindex('e') #=> 1
+ * "hello".rindex('l') #=> 3
+ * "hello".rindex('a') #=> nil
+ * "hello".rindex(101) #=> 1
+ * "hello".rindex(/[aeiou]/, -2) #=> 1
+ */
+static mrb_value
+mrb_str_rindex_m(mrb_state *mrb, mrb_value str)
+{
+ mrb_value *argv;
+ int argc;
+ mrb_value sub;
+ mrb_value vpos;
+#ifdef INCLUDE_ENCODING
+ mrb_encoding *enc = STR_ENC_GET(mrb, str);
+ int pos, len = str_strlen(mrb, str, enc);
+#else
+ int pos, len = RSTRING_LEN(str);
+#endif //INCLUDE_ENCODING
+
+ //if (mrb_scan_args(argc, argv, "11", &sub, &vpos) == 2) {
+ mrb_get_args(mrb, "*", &argv, &argc);
+ if (argc == 2) {
+ sub = argv[0];
+ vpos = argv[1];
+ pos = mrb_fixnum(vpos);
+ if (pos < 0) {
+ pos += len;
+ if (pos < 0) {
+ if (mrb_type(sub) == MRB_TT_REGEX) {
+#ifdef INCLUDE_REGEXP
+ mrb_backref_set(mrb, mrb_nil_value());
+#else
+ mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported");
+#endif //INCLUDE_REGEXP
+ }
+ return mrb_nil_value();
+ }
+ }
+ if (pos > len) pos = len;
+ }
+ else {
+ pos = len;
+ if (argc > 0)
+ sub = argv[0];
+ else
+ sub = mrb_nil_value();
+ }
+
+ switch (mrb_type(sub)) {
+ case MRB_TT_REGEX:
+#ifdef INCLUDE_REGEXP
+ pos = str_offset(mrb, RSTRING_PTR(str), RSTRING_END(str), pos,
+ STR_ENC_GET(mrb, str), single_byte_optimizable(mrb, str));
+
+ if (!RREGEXP(sub)->ptr || RREGEXP_SRC_LEN(sub)) {
+ pos = mrb_reg_search(mrb, sub, str, pos, 1);
+ pos = mrb_str_sublen(mrb, str, pos);
+ }
+ if (pos >= 0) return mrb_fixnum_value(pos);
+#else
+ mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported");
+#endif //INCLUDE_REGEXP
+ break;
+
+ case MRB_TT_FIXNUM: {
+ int c = mrb_fixnum(sub);
+ long len = RSTRING_LEN(str);
+ unsigned char *p = (unsigned char*)RSTRING_PTR(str);
+
+ for (pos=len;pos>=0;pos--) {
+ if (p[pos] == c) return mrb_fixnum_value(pos);
+ }
+ return mrb_nil_value();
+ }
+
+ default: {
+ mrb_value tmp;
+
+ tmp = mrb_check_string_type(mrb, sub);
+ if (mrb_nil_p(tmp)) {
+ mrb_raise(mrb, E_TYPE_ERROR, "type mismatch: %s given",
+ mrb_obj_classname(mrb, sub));
+ }
+ sub = tmp;
+ }
+ /* fall through */
+ case MRB_TT_STRING:
+ pos = mrb_str_rindex(mrb, str, sub, pos);
+ if (pos >= 0) return mrb_fixnum_value(pos);
+ break;
+
+ } /* end of switch (TYPE(sub)) */
+ return mrb_nil_value();
+}
+
+#ifdef INCLUDE_REGEXP
+static mrb_value
+scan_once(mrb_state *mrb, mrb_value str, mrb_value pat, mrb_int *start)
+{
+ mrb_value result, match;
+ struct re_registers *regs;
+ long i;
+ struct RString *ps = mrb_str_ptr(str);
+ struct RMatch *pmatch;
+
+ if (mrb_reg_search(mrb, pat, str, *start, 0) >= 0) {
+ match = mrb_backref_get(mrb);
+ //regs = RMATCH(match)->regs;
+ pmatch = mrb_match_ptr(match);
+ regs = &pmatch->rmatch->regs;
+ if (regs->beg[0] == regs->end[0]) {
+ mrb_encoding *enc = STR_ENC_GET(mrb, str);
+ /*
+ * Always consume at least one character of the input string
+ */
+ if (ps->len > regs->end[0])
+ *start = regs->end[0] + mrb_enc_fast_mbclen(RSTRING_PTR(str)+regs->end[0],RSTRING_END(str), enc);
+ else
+ *start = regs->end[0] + 1;
+ }
+ else {
+ *start = regs->end[0];
+ }
+ if (regs->num_regs == 1) {
+ return mrb_reg_nth_match(mrb, 0, match);
+ }
+ result = mrb_ary_new_capa(mrb, regs->num_regs);//mrb_ary_new2(regs->num_regs);
+ for (i=1; i < regs->num_regs; i++) {
+ mrb_ary_push(mrb, result, mrb_reg_nth_match(mrb, i, match));
+ }
+
+ return result;
+ }
+ return mrb_nil_value();
+}
+#endif //INCLUDE_REGEXP
+
+/* 15.2.10.5.32 */
+/*
+ * call-seq:
+ * str.scan(pattern) => array
+ * str.scan(pattern) {|match, ...| block } => str
+ *
+ * Both forms iterate through <i>str</i>, matching the pattern (which may be a
+ * <code>Regexp</code> or a <code>String</code>). For each match, a result is
+ * generated and either added to the result array or passed to the block. If
+ * the pattern contains no groups, each individual result consists of the
+ * matched string, <code>$&</code>. If the pattern contains groups, each
+ * individual result is itself an array containing one entry per group.
+ *
+ * a = "cruel world"
+ * a.scan(/\w+/) #=> ["cruel", "world"]
+ * a.scan(/.../) #=> ["cru", "el ", "wor"]
+ * a.scan(/(...)/) #=> [["cru"], ["el "], ["wor"]]
+ * a.scan(/(..)(..)/) #=> [["cr", "ue"], ["l ", "wo"]]
+ *
+ * And the block form:
+ *
+ * a.scan(/\w+/) {|w| print "<<#{w}>> " }
+ * print "\n"
+ * a.scan(/(.)(.)/) {|x,y| print y, x }
+ * print "\n"
+ *
+ * <em>produces:</em>
+ *
+ * <<cruel>> <<world>>
+ * rceu lowlr
+ */
+#ifdef INCLUDE_REGEXP
+static mrb_value
+mrb_str_scan(mrb_state *mrb, mrb_value str)
+{
+ mrb_value result;
+ mrb_value pat, b;
+ mrb_int start = 0;
+ mrb_value match = mrb_nil_value();
+ struct RString *ps = mrb_str_ptr(str);
+ char *p = ps->buf;
+ long len = ps->len;
+
+ mrb_get_args(mrb, "&o", &b, &pat);
+ pat = get_pat(mrb, pat, 1);
+ if (!mrb_block_given_p()) {
+ mrb_value ary = mrb_ary_new(mrb);
+
+ while (!mrb_nil_p(result = scan_once(mrb, str, pat, &start))) {
+ match = mrb_backref_get(mrb);
+ mrb_ary_push(mrb, ary, result);
+ }
+ mrb_backref_set(mrb, match);
+ return ary;
+ }
+
+ while (!mrb_nil_p(result = scan_once(mrb, str, pat, &start))) {
+ match = mrb_backref_get(mrb);
+ mrb_yield(mrb, b, result);
+ str_mod_check(mrb, str, p, len);
+ mrb_backref_set(mrb, match); /* restore $~ value */
+ }
+ mrb_backref_set(mrb, match);
+ return str;
+}
+#endif //INCLUDE_REGEXP
+
+static const char isspacetable[256] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+#define ascii_isspace(c) isspacetable[(unsigned char)(c)]
+
+/* 15.2.10.5.35 */
+
+/*
+ * call-seq:
+ * str.split(pattern=$;, [limit]) => anArray
+ *
+ * Divides <i>str</i> into substrings based on a delimiter, returning an array
+ * of these substrings.
+ *
+ * If <i>pattern</i> is a <code>String</code>, then its contents are used as
+ * the delimiter when splitting <i>str</i>. If <i>pattern</i> is a single
+ * space, <i>str</i> is split on whitespace, with leading whitespace and runs
+ * of contiguous whitespace characters ignored.
+ *
+ * If <i>pattern</i> is a <code>Regexp</code>, <i>str</i> is divided where the
+ * pattern matches. Whenever the pattern matches a zero-length string,
+ * <i>str</i> is split into individual characters.
+ *
+ * If <i>pattern</i> is omitted, the value of <code>$;</code> is used. If
+ * <code>$;</code> is <code>nil</code> (which is the default), <i>str</i> is
+ * split on whitespace as if ` ' were specified.
+ *
+ * If the <i>limit</i> parameter is omitted, trailing null fields are
+ * suppressed. If <i>limit</i> is a positive number, at most that number of
+ * fields will be returned (if <i>limit</i> is <code>1</code>, the entire
+ * string is returned as the only entry in an array). If negative, there is no
+ * limit to the number of fields returned, and trailing null fields are not
+ * suppressed.
+ *
+ * " now's the time".split #=> ["now's", "the", "time"]
+ * " now's the time".split(' ') #=> ["now's", "the", "time"]
+ * " now's the time".split(/ /) #=> ["", "now's", "", "the", "time"]
+ * "1, 2.34,56, 7".split(%r{,\s*}) #=> ["1", "2.34", "56", "7"]
+ * "hello".split(//) #=> ["h", "e", "l", "l", "o"]
+ * "hello".split(//, 3) #=> ["h", "e", "llo"]
+ * "hi mom".split(%r{\s*}) #=> ["h", "i", "m", "o", "m"]
+ *
+ * "mellow yellow".split("ello") #=> ["m", "w y", "w"]
+ * "1,2,,3,4,,".split(',') #=> ["1", "2", "", "3", "4"]
+ * "1,2,,3,4,,".split(',', 4) #=> ["1", "2", "", "3,4,,"]
+ * "1,2,,3,4,,".split(',', -4) #=> ["1", "2", "", "3", "4", "", ""]
+ */
+
+//static mrb_value
+//mrb_str_split_m(int argc, mrb_value *argv, mrb_value str)
+static mrb_value
+mrb_str_split_m(mrb_state *mrb, mrb_value str)
+{
+ mrb_value *argv;
+ int argc;
+#ifdef INCLUDE_ENCODING
+ mrb_encoding *enc;
+#endif //INCLUDE_ENCODING
+ mrb_value spat;
+ mrb_value limit;
+ enum {awk, string, regexp} split_type;
+ long beg, end, i = 0;
+ int lim = 0;
+ mrb_value result, tmp;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+ if (argc > 0)
+ spat = argv[0];
+ if (argc > 1)
+ limit = argv[1];
+ else
+ limit = mrb_nil_value();
+
+ if (argc == 2) {
+ lim = mrb_fixnum(limit);
+ if (lim <= 0) limit = mrb_nil_value();
+ else if (lim == 1) {
+ if (RSTRING_LEN(str) == 0)
+ return mrb_ary_new_capa(mrb, 0);
+ return mrb_ary_new_from_values(mrb, &str, 1);
+ }
+ i = 1;
+ }
+
+#ifdef INCLUDE_ENCODING
+ enc = STR_ENC_GET(mrb, str);
+#endif //INCLUDE_ENCODING
+ //if (mrb_nil_p(spat)) {
+ if (argc == 0) {
+// spat = mrb_nil_value();
+// goto fs_set;
+ split_type = awk;
+ }
+ else {
+//fs_set:
+ if (mrb_type(spat) == MRB_TT_STRING) {
+#ifdef INCLUDE_REGEXP
+ mrb_encoding *enc2 = STR_ENC_GET(mrb, spat);
+#endif //INCLUDE_REGEXP
+ split_type = string;
+#ifdef INCLUDE_REGEXP
+ if (RSTRING_LEN(spat) == 0) {
+ /* Special case - split into chars */
+ spat = mrb_reg_regcomp(mrb, spat);
+ split_type = regexp;
+ }
+ else if (mrb_enc_asciicompat(mrb, enc2) == 1) {
+#endif //INCLUDE_REGEXP
+ if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' '){
+ split_type = awk;
+ }
+#ifdef INCLUDE_REGEXP
+ }
+ else {
+ int l;
+ if (mrb_enc_ascget(mrb, RSTRING_PTR(spat), RSTRING_END(spat), &l, enc2) == ' ' &&
+ RSTRING_LEN(spat) == l) {
+ split_type = awk;
+ }
+ }
+#endif //INCLUDE_REGEXP
+ }
+ else {
+#ifdef INCLUDE_REGEXP
+ spat = get_pat(mrb, spat, 1);
+ split_type = regexp;
+#else
+ mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported");
+#endif //INCLUDE_REGEXP
+ }
+ }
+
+ result = mrb_ary_new(mrb);
+ beg = 0;
+ if (split_type == awk) {
+ char *ptr = RSTRING_PTR(str);
+ char *eptr = RSTRING_END(str);
+ char *bptr = ptr;
+ int skip = 1;
+ unsigned int c;
+
+ end = beg;
+#ifdef INCLUDE_ENCODING
+ if (is_ascii_string(mrb, str)) {
+#endif //INCLUDE_ENCODING
+ while (ptr < eptr) {
+ c = (unsigned char)*ptr++;
+ if (skip) {
+ if (ascii_isspace(c)) {
+ beg = ptr - bptr;
+ }
+ else {
+ end = ptr - bptr;
+ skip = 0;
+ if (!mrb_nil_p(limit) && lim <= i) break;
+ }
+ }
+ else if (ascii_isspace(c)) {
+ mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, beg, end-beg));
+ skip = 1;
+ beg = ptr - bptr;
+ if (!mrb_nil_p(limit)) ++i;
+ }
+ else {
+ end = ptr - bptr;
+ }
+ }
+#ifdef INCLUDE_ENCODING
+ }
+ else {
+ while (ptr < eptr) {
+ int n;
+
+ c = mrb_enc_codepoint_len(mrb, ptr, eptr, &n, enc);
+ ptr += n;
+ if (skip) {
+ if (mrb_isspace(c)) {
+ beg = ptr - bptr;
+ }
+ else {
+ end = ptr - bptr;
+ skip = 0;
+ if (!mrb_nil_p(limit) && lim <= i) break;
+ }
+ }
+ else if (mrb_isspace(c)) {
+ mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, beg, end-beg));
+ skip = 1;
+ beg = ptr - bptr;
+ if (!mrb_nil_p(limit)) ++i;
+ }
+ else {
+ end = ptr - bptr;
+ }
+ }
+ }
+ }
+ else if (split_type == string) {
+ char *ptr = RSTRING_PTR(str);
+ char *temp = ptr;
+ char *eptr = RSTRING_END(str);
+ char *sptr = RSTRING_PTR(spat);
+ long slen = RSTRING_LEN(spat);
+
+ if (is_broken_string(mrb, str)) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid byte sequence in %s", mrb_enc_name(STR_ENC_GET(mrb, str)));
+ }
+ if (is_broken_string(mrb, spat)) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid byte sequence in %s", mrb_enc_name(STR_ENC_GET(mrb, spat)));
+ }
+ enc = mrb_enc_check(mrb, str, spat);
+ while (ptr < eptr &&
+ (end = mrb_memsearch(mrb, sptr, slen, ptr, eptr - ptr, enc)) >= 0) {
+ /* Check we are at the start of a char */
+ char *t = mrb_enc_right_char_head(ptr, ptr + end, eptr, enc);
+ if (t != ptr + end) {
+ ptr = t;
+ continue;
+ }
+ mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, ptr - temp, end));
+ ptr += end + slen;
+ if (!mrb_nil_p(limit) && lim <= ++i) break;
+ }
+ beg = ptr - temp;
+#endif //INCLUDE_ENCODING
+ }
+ else {
+#ifdef INCLUDE_REGEXP
+ char *ptr = RSTRING_PTR(str);
+ long len = RSTRING_LEN(str);
+ long start = beg;
+ long idx;
+ int last_null = 0;
+ struct re_registers *regs;
+
+ while ((end = mrb_reg_search(mrb, spat, str, start, 0)) >= 0) {
+ regs = RMATCH_REGS(mrb_backref_get(mrb));
+ if (start == end && BEG(0) == END(0)) {
+ if (!ptr) {
+ mrb_ary_push(mrb, result, str_new_empty(mrb, str));
+ break;
+ }
+ else if (last_null == 1) {
+ mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, beg,
+ mrb_enc_fast_mbclen(ptr+beg,
+ ptr+len,
+ enc)));
+ beg = start;
+ }
+ else {
+ if (ptr+start == ptr+len)
+ start++;
+ else
+ start += mrb_enc_fast_mbclen(ptr+start,ptr+len,enc);
+ last_null = 1;
+ continue;
+ }
+ }
+ else {
+ mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, beg, end-beg));
+ beg = start = END(0);
+ }
+ last_null = 0;
+
+ for (idx=1; idx < regs->num_regs; idx++) {
+ if (BEG(idx) == -1) continue;
+ if (BEG(idx) == END(idx))
+ tmp = str_new_empty(mrb, str);
+ else
+ tmp = mrb_str_subseq(mrb, str, BEG(idx), END(idx)-BEG(idx));
+ mrb_ary_push(mrb, result, tmp);
+ }
+ if (!mrb_nil_p(limit) && lim <= ++i) break;
+ }
+#else
+ mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported");
+#endif //INCLUDE_REGEXP
+ }
+ if (RSTRING_LEN(str) > 0 && (!mrb_nil_p(limit) || RSTRING_LEN(str) > beg || lim < 0)) {
+ if (RSTRING_LEN(str) == beg)
+ tmp = str_new_empty(mrb, str);
+ else
+ tmp = mrb_str_subseq(mrb, str, beg, RSTRING_LEN(str)-beg);
+ mrb_ary_push(mrb, result, tmp);
+ }
+ if (mrb_nil_p(limit) && lim == 0) {
+ long len;
+ while ((len = RARRAY_LEN(result)) > 0 &&
+ (tmp = RARRAY_PTR(result)[len-1], RSTRING_LEN(tmp) == 0))
+ mrb_ary_pop(mrb, result);
+ }
+
+ return result;
+}
+
+
+int
+mrb_block_given_p()
+{
+ /*if (ruby_frame->iter == ITER_CUR && ruby_block)
+ return 1;*//*Qtrue*/
+ return 0/*Qfalse*/;
+}
+
+/* 15.2.10.5.37 */
+/*
+ * call-seq:
+ * str.sub!(pattern, replacement) => str or nil
+ * str.sub!(pattern) {|match| block } => str or nil
+ *
+ * Performs the substitutions of <code>String#sub</code> in place,
+ * returning <i>str</i>, or <code>nil</code> if no substitutions were
+ * performed.
+ */
+#ifdef INCLUDE_REGEXP
+static mrb_value
+mrb_str_sub_bang(mrb_state *mrb, mrb_value str)
+{
+ mrb_value *argv;
+ int argc;
+ mrb_value pat, repl;
+ int iter = 0;
+ long plen;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+ if (argc == 1 && mrb_block_given_p()) {
+ iter = 1;
+ }
+ else if (argc == 2) {
+ repl = argv[1];
+ //StringValue(repl);
+ mrb_string_value(mrb, &repl);
+ }
+ else {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%d for 2)", argc);
+ }
+
+ pat = get_pat(mrb, argv[0], 1);
+ str_modifiable(str);
+ if (mrb_reg_search(mrb, pat, str, 0, 0) >= 0) {
+ mrb_encoding *enc;
+ int cr = ENC_CODERANGE(str);
+ mrb_value match = mrb_backref_get(mrb);
+ struct re_registers *regs = RMATCH_REGS(match);
+ long beg0 = BEG(0);
+ long end0 = END(0);
+ char *p, *rp;
+ long len, rlen;
+
+ repl = mrb_reg_regsub(mrb, repl, str, regs, pat);
+ enc = mrb_enc_compatible(mrb, str, repl);
+ if (!enc) {
+ mrb_encoding *str_enc = STR_ENC_GET(mrb, str);
+ p = RSTRING_PTR(str); len = RSTRING_LEN(str);
+ if (coderange_scan(p, beg0, str_enc) != ENC_CODERANGE_7BIT ||
+ coderange_scan(p+end0, len-end0, str_enc) != ENC_CODERANGE_7BIT) {
+ mrb_raise(mrb, E_ENCODING_ERROR, "incompatible character encodings: %s and %s",
+ mrb_enc_name(str_enc),
+ mrb_enc_name(STR_ENC_GET(mrb, repl)));
+ }
+ enc = STR_ENC_GET(mrb, repl);
+ }
+ mrb_str_modify(mrb, str);
+ mrb_enc_associate(mrb, str, enc);
+ //if (OBJ_TAINTED(repl)) tainted = 1;
+ //if (OBJ_UNTRUSTED(repl)) untrusted = 1;
+ if (ENC_CODERANGE_UNKNOWN < cr && cr < ENC_CODERANGE_BROKEN) {
+ int cr2 = ENC_CODERANGE(repl);
+ if (cr2 == ENC_CODERANGE_BROKEN ||
+ (cr == ENC_CODERANGE_VALID && cr2 == ENC_CODERANGE_7BIT))
+ cr = ENC_CODERANGE_UNKNOWN;
+ else
+ cr = cr2;
+ }
+ plen = end0 - beg0;
+ rp = RSTRING_PTR(repl); rlen = RSTRING_LEN(repl);
+ len = RSTRING_LEN(str);
+ if (rlen > plen) {
+ RESIZE_CAPA(str, len + rlen - plen);
+ }
+ p = RSTRING_PTR(str);
+ if (rlen != plen) {
+ memmove(p + beg0 + rlen, p + beg0 + plen, len - beg0 - plen);
+ }
+ memcpy(p + beg0, rp, rlen);
+ len += rlen - plen;
+ STR_SET_LEN(str, len);
+ RSTRING_PTR(str)[len] = '\0';
+ ENC_CODERANGE_SET(str, cr);
+
+ return str;
+ }
+ return mrb_nil_value();
+}
+#endif //INCLUDE_REGEXP
+
+/* 15.2.10.5.36 */
+
+/*
+ * call-seq:
+ * str.sub(pattern, replacement) -> new_str
+ * str.sub(pattern, hash) -> new_str
+ * str.sub(pattern) {|match| block } -> new_str
+ *
+ * Returns a copy of <i>str</i> with the <em>first</em> occurrence of
+ * <i>pattern</i> substituted for the second argument. The <i>pattern</i> is
+ * typically a <code>Regexp</code>; if given as a <code>String</code>, any
+ * regular expression metacharacters it contains will be interpreted
+ * literally, e.g. <code>'\\\d'</code> will match a backlash followed by 'd',
+ * instead of a digit.
+ *
+ * If <i>replacement</i> is a <code>String</code> it will be substituted for
+ * the matched text. It may contain back-references to the pattern's capture
+ * groups of the form <code>\\\d</code>, where <i>d</i> is a group number, or
+ * <code>\\\k<n></code>, where <i>n</i> is a group name. If it is a
+ * double-quoted string, both back-references must be preceded by an
+ * additional backslash. However, within <i>replacement</i> the special match
+ * variables, such as <code>&$</code>, will not refer to the current match.
+ *
+ * If the second argument is a <code>Hash</code>, and the matched text is one
+ * of its keys, the corresponding value is the replacement string.
+ *
+ * In the block form, the current match string is passed in as a parameter,
+ * and variables such as <code>$1</code>, <code>$2</code>, <code>$`</code>,
+ * <code>$&</code>, and <code>$'</code> will be set appropriately. The value
+ * returned by the block will be substituted for the match on each call.
+ *
+ * The result inherits any tainting in the original string or any supplied
+ * replacement string.
+ *
+ * "hello".sub(/[aeiou]/, '*') #=> "h*llo"
+ * "hello".sub(/([aeiou])/, '<\1>') #=> "h<e>llo"
+ * "hello".sub(/./) {|s| s.ord.to_s + ' ' } #=> "104 ello"
+ * "hello".sub(/(?<foo>[aeiou])/, '*\k<foo>*') #=> "h*e*llo"
+ * 'Is SHELL your preferred shell?'.sub(/[[:upper:]]{2,}/, ENV)
+ * #=> "Is /bin/bash your preferred shell?"
+ */
+
+#ifdef INCLUDE_REGEXP
+static mrb_value
+mrb_str_sub(mrb_state *mrb, mrb_value self)
+{
+ mrb_value str = mrb_str_dup(mrb, self);
+
+ mrb_str_sub_bang(mrb, str);
+ return str;
+}
+#endif //INCLUDE_REGEXP
+
+mrb_value
+mrb_cstr_to_inum(mrb_state *mrb, const char *str, int base, int badcheck)
+{
+ #define BDIGIT unsigned int
+ #define BDIGIT_DBL unsigned long
+
+// const char *s = str;
+ char *end;
+ char sign = 1;
+// char nondigit = 0;
+ int c;
+// BDIGIT_DBL num;
+ long len;
+// long blen = 1;
+// long i;
+// mrb_value z;
+// BDIGIT *zds;
+
+#undef ISDIGIT
+#define ISDIGIT(c) ('0' <= (c) && (c) <= '9')
+#define conv_digit(c) \
+ (!ISASCII(c) ? -1 : \
+ isdigit(c) ? ((c) - '0') : \
+ islower(c) ? ((c) - 'a' + 10) : \
+ isupper(c) ? ((c) - 'A' + 10) : \
+ -1)
+
+ if (!str) {
+ if (badcheck) goto bad;
+ return mrb_fixnum_value(0);
+ }
+ while (ISSPACE(*str)) str++;
+
+ if (str[0] == '+') {
+ str++;
+ }
+ else if (str[0] == '-') {
+ str++;
+ sign = 0;
+ }
+ if (str[0] == '+' || str[0] == '-') {
+ if (badcheck) goto bad;
+ return mrb_fixnum_value(0);
+ }
+ if (base <= 0) {
+ if (str[0] == '0') {
+ switch (str[1]) {
+ case 'x': case 'X':
+ base = 16;
+ break;
+ case 'b': case 'B':
+ base = 2;
+ break;
+ case 'o': case 'O':
+ base = 8;
+ break;
+ case 'd': case 'D':
+ base = 10;
+ break;
+ default:
+ base = 8;
+ }
+ }
+ else if (base < -1) {
+ base = -base;
+ }
+ else {
+ base = 10;
+ }
+ }
+ switch (base) {
+ case 2:
+ len = 1;
+ if (str[0] == '0' && (str[1] == 'b'||str[1] == 'B')) {
+ str += 2;
+ }
+ break;
+ case 3:
+ len = 2;
+ break;
+ case 8:
+ if (str[0] == '0' && (str[1] == 'o'||str[1] == 'O')) {
+ str += 2;
+ }
+ case 4: case 5: case 6: case 7:
+ len = 3;
+ break;
+ case 10:
+ if (str[0] == '0' && (str[1] == 'd'||str[1] == 'D')) {
+ str += 2;
+ }
+ case 9: case 11: case 12: case 13: case 14: case 15:
+ len = 4;
+ break;
+ case 16:
+ len = 4;
+ if (str[0] == '0' && (str[1] == 'x'||str[1] == 'X')) {
+ str += 2;
+ }
+ break;
+ default:
+ if (base < 2 || 36 < base) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "illegal radix %d", base);
+ }
+ if (base <= 32) {
+ len = 5;
+ }
+ else {
+ len = 6;
+ }
+ break;
+ } /* end of switch (base) { */
+ if (*str == '0') { /* squeeze preceeding 0s */
+ int us = 0;
+ while ((c = *++str) == '0' || c == '_') {
+ if (c == '_') {
+ if (++us >= 2)
+ break;
+ }
+ else
+ us = 0;
+ }
+ if (!(c = *str) || ISSPACE(c)) --str;
+ }
+ c = *str;
+ c = conv_digit(c);
+ if (c < 0 || c >= base) {
+ if (badcheck) goto bad;
+ return mrb_fixnum_value(0);
+ }
+ len *= strlen(str)*sizeof(char);
+
+ unsigned long val = strtoul((char*)str, &end, base);
+
+ if (badcheck) {
+ if (end == str) goto bad; /* no number */
+ while (*end && ISSPACE(*end)) end++;
+ if (*end) goto bad; /* trailing garbage */
+ }
+
+ if (sign) return mrb_fixnum_value(val);
+ else {
+ long result = -(long)val;
+ return mrb_fixnum_value(result);
+ }
+bad:
+ printf("Integer");
+ return mrb_fixnum_value(0);
+}
+char *
+mrb_string_value_cstr(mrb_state *mrb, mrb_value *ptr)
+{
+ struct RString *ps = mrb_str_ptr(*ptr);
+ char *s = ps->buf;
+
+ if (!s || ps->len != strlen(s)) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte");
+ }
+ return s;
+}
+
+mrb_value
+mrb_str_to_inum(mrb_state *mrb, mrb_value str, int base, int badcheck)
+{
+ char *s;
+ size_t len;
+
+ //StringValue(str);
+ mrb_string_value(mrb, &str);
+ if (badcheck) {
+ //s = StringValueCStr(str);
+ s = mrb_string_value_cstr(mrb, &str);
+ }
+ else {
+ s = RSTRING_PTR(str);
+ }
+ if (s) {
+ len = RSTRING_LEN(str);
+ if (s[len]) { /* no sentinel somehow */
+ //char *p = ALLOCA_N(char, len+1);
+ char *p = mrb_malloc(mrb, len+1);
+
+ //MEMCPY(p, s, char, len);
+ memcpy(p, s, sizeof(char)*len);
+ p[len] = '\0';
+ s = p;
+ }
+ }
+ return mrb_cstr_to_inum(mrb, s, base, badcheck);
+}
+
+/* 15.2.10.5.38 */
+/*
+ * call-seq:
+ * str.to_i(base=10) => integer
+ *
+ * Returns the result of interpreting leading characters in <i>str</i> as an
+ * integer base <i>base</i> (between 2 and 36). Extraneous characters past the
+ * end of a valid number are ignored. If there is not a valid number at the
+ * start of <i>str</i>, <code>0</code> is returned. This method never raises an
+ * exception.
+ *
+ * "12345".to_i #=> 12345
+ * "99 red balloons".to_i #=> 99
+ * "0a".to_i #=> 0
+ * "0a".to_i(16) #=> 10
+ * "hello".to_i #=> 0
+ * "1100101".to_i(2) #=> 101
+ * "1100101".to_i(8) #=> 294977
+ * "1100101".to_i(10) #=> 1100101
+ * "1100101".to_i(16) #=> 17826049
+ */
+static mrb_value
+mrb_str_to_i(mrb_state *mrb, mrb_value self)
+{
+ mrb_value *argv;
+ int argc;
+ //mrb_value b;
+ int base;
+
+ //mrb_scan_args(argc, *argv, "01", &b);
+ mrb_get_args(mrb, "*", &argv, &argc);
+ if (argc == 0)
+ base = 10;
+ else
+ base = mrb_fixnum(argv[0]);
+
+ if (base < 0) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "illegal radix %d", base);
+ }
+ return mrb_str_to_inum(mrb, self, base, 0/*Qfalse*/);
+}
+
+double
+mrb_cstr_to_dbl(mrb_state *mrb, const char * p, int badcheck)
+{
+ const char *q;
+ char *end;
+ double d;
+// const char *ellipsis = "";
+// int w;
+#define DBL_DIG 16
+ enum {max_width = 20};
+#define OutOfRange() (((w = end - p) > max_width) ? \
+ (w = max_width, ellipsis = "...") : \
+ (w = (int)(end - p), ellipsis = ""))
+
+ if (!p) return 0.0;
+ q = p;
+ while (ISSPACE(*p)) p++;
+
+ if (!badcheck && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
+ return 0.0;
+ }
+ d = strtod(p, &end);
+ if (p == end) {
+ if (badcheck) {
+bad:
+ //mrb_invalid_str(q, "Float()");
+ printf("Float()\n");
+ }
+ return d;
+ }
+ if (*end) {
+ char buf[DBL_DIG * 4 + 10];
+ char *n = buf;
+ char *e = buf + sizeof(buf) - 1;
+ char prev = 0;
+
+ while (p < end && n < e) prev = *n++ = *p++;
+ while (*p) {
+ if (*p == '_') {
+ /* remove underscores between digits */
+ if (badcheck) {
+ if (n == buf || !ISDIGIT(prev)) goto bad;
+ ++p;
+ if (!ISDIGIT(*p)) goto bad;
+ }
+ else {
+ while (*++p == '_');
+ continue;
+ }
+ }
+ prev = *p++;
+ if (n < e) *n++ = prev;
+ }
+ *n = '\0';
+ p = buf;
+
+ if (!badcheck && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
+ return 0.0;
+ }
+
+ d = strtod(p, &end);
+ if (badcheck) {
+ if (!end || p == end) goto bad;
+ while (*end && ISSPACE(*end)) end++;
+ if (*end) goto bad;
+ }
+ }
+ return d;
+}
+
+double
+mrb_str_to_dbl(mrb_state *mrb, mrb_value str, int badcheck)
+{
+ char *s;
+ size_t len;
+
+ //StringValue(str);
+ mrb_string_value(mrb, &str);
+ s = RSTRING_PTR(str);
+ len = RSTRING_LEN(str);
+ if (s) {
+ if (badcheck && memchr(s, '\0', len)) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "string for Float contains null byte");
+ }
+ if (s[len]) { /* no sentinel somehow */
+ char *p = mrb_malloc(mrb, len+1);
+
+ memcpy(p, s, sizeof(char)*len);
+ p[len] = '\0';
+ s = p;
+ }
+ }
+ return mrb_cstr_to_dbl(mrb, s, badcheck);
+}
+
+/* 15.2.10.5.39 */
+/*
+ * call-seq:
+ * str.to_f => float
+ *
+ * Returns the result of interpreting leading characters in <i>str</i> as a
+ * floating point number. Extraneous characters past the end of a valid number
+ * are ignored. If there is not a valid number at the start of <i>str</i>,
+ * <code>0.0</code> is returned. This method never raises an exception.
+ *
+ * "123.45e1".to_f #=> 1234.5
+ * "45.67 degrees".to_f #=> 45.67
+ * "thx1138".to_f #=> 0.0
+ */
+static mrb_value
+mrb_str_to_f(mrb_state *mrb, mrb_value self)
+{
+ //return mrb_float_new(mrb_str_to_dbl(self, 0/*Qfalse*/));
+ return mrb_float_value(mrb_str_to_dbl(mrb, self, 0/*Qfalse*/));
+}
+
+/* 15.2.10.5.40 */
+/*
+ * call-seq:
+ * str.to_s => str
+ * str.to_str => str
+ *
+ * Returns the receiver.
+ */
+static mrb_value
+mrb_str_to_s(mrb_state *mrb, mrb_value self)
+{
+ if (mrb_obj_class(mrb, self) != mrb->string_class) {
+ return mrb_str_dup(mrb, self);
+ }
+ return self;
+}
+
+/* 15.2.10.5.43 */
+/*
+ * call-seq:
+ * str.upcase! => str or nil
+ *
+ * Upcases the contents of <i>str</i>, returning <code>nil</code> if no changes
+ * were made.
+ */
+static mrb_value
+mrb_str_upcase_bang(mrb_state *mrb, mrb_value str)
+{
+#ifdef INCLUDE_ENCODING
+ mrb_encoding *enc;
+#endif //INCLUDE_ENCODING
+ char *s, *send;
+ int modify = 0;
+#ifdef INCLUDE_ENCODING
+ int n;
+
+ str_modify_keep_cr(mrb, str);
+ enc = STR_ENC_GET(mrb, str);
+ mrb_str_check_dummy_enc(mrb, enc);
+ s = RSTRING_PTR(str); send = RSTRING_END(str);
+ if (single_byte_optimizable(mrb, str)) {
+ while (s < send) {
+ unsigned int c = *(unsigned char*)s;
+
+ if (mrb_enc_isascii(c, enc) && 'a' <= c && c <= 'z') {
+ *s = 'A' + (c - 'a');
+ modify = 1;
+ }
+ s++;
+ }
+ }
+ else {
+ int ascompat = mrb_enc_asciicompat(mrb, enc);
+
+ while (s < send) {
+ unsigned int c;
+
+ if (ascompat && (c = *(unsigned char*)s) < 0x80) {
+ if (mrb_enc_isascii(c, enc) && 'a' <= c && c <= 'z') {
+ *s = 'A' + (c - 'a');
+ modify = 1;
+ }
+ s++;
+ }
+ else {
+ c = mrb_enc_codepoint_len(mrb, s, send, &n, enc);
+ if (mrb_enc_islower(c, enc)) {
+ /* assuming toupper returns codepoint with same size */
+ mrb_enc_mbcput(mrb_enc_toupper(c, enc), s, enc);
+ modify = 1;
+ }
+ s += n;
+ }
+ }
+ }
+#else
+ mrb_str_modify(mrb, str);
+ s = RSTRING_PTR(str); send = RSTRING_END(str);
+ while (s < send) {
+ unsigned int c = *(unsigned char*)s;
+
+ if ('a' <= c && c <= 'z') {
+ *s = 'A' + (c - 'a');
+ modify = 1;
+ }
+ s++;
+ }
+#endif //INCLUDE_ENCODING
+ if (modify) return str;
+ return mrb_nil_value();
+}
+
+/* 15.2.10.5.42 */
+/*
+ * call-seq:
+ * str.upcase => new_str
+ *
+ * Returns a copy of <i>str</i> with all lowercase letters replaced with their
+ * uppercase counterparts. The operation is locale insensitive---only
+ * characters ``a'' to ``z'' are affected.
+ *
+ * "hEllO".upcase #=> "HELLO"
+ */
+static mrb_value
+mrb_str_upcase(mrb_state *mrb, mrb_value self)
+{
+ mrb_value str;
+
+ str = mrb_str_dup(mrb, self);
+ mrb_str_upcase_bang(mrb, str);
+ return str;
+}
+
+/* 15.2.10.5.xx */
+/*
+ * call-seq:
+ * str.force_encoding(encoding) -> str
+ *
+ * Changes the encoding to +encoding+ and returns self.
+ */
+#ifdef INCLUDE_ENCODING
+static mrb_value
+mrb_str_force_encoding(mrb_state *mrb, mrb_value self)
+{
+ mrb_value enc;
+ mrb_get_args(mrb, "o", &enc);
+ str_modifiable(self);
+ mrb_enc_associate(mrb, self, mrb_to_encoding(mrb, enc));
+ ENC_CODERANGE_CLEAR(self);
+ return self;
+}
+
+long
+mrb_str_coderange_scan_restartable(const char *s, const char *e, mrb_encoding *enc, int *cr)
+{
+ const char *p = s;
+
+ if (*cr == ENC_CODERANGE_BROKEN)
+ return e - s;
+
+ if (mrb_enc_to_index(enc) == 0) {
+ /* enc is ASCII-8BIT. ASCII-8BIT string never be broken. */
+ p = search_nonascii(p, e);
+ *cr = (!p && *cr != ENC_CODERANGE_VALID) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID;
+ return e - s;
+ }
+ else if (mrb_enc_asciicompat(mrb, enc)) {
+ p = search_nonascii(p, e);
+ if (!p) {
+ if (*cr != ENC_CODERANGE_VALID) *cr = ENC_CODERANGE_7BIT;
+ return e - s;
+ }
+ while (p < e) {
+ int ret = mrb_enc_precise_mbclen(p, e, enc);
+ if (!MBCLEN_CHARFOUND_P(ret)) {
+ *cr = MBCLEN_INVALID_P(ret) ? ENC_CODERANGE_BROKEN: ENC_CODERANGE_UNKNOWN;
+ return p - s;
+ }
+ p += MBCLEN_CHARFOUND_LEN(ret);
+ if (p < e) {
+ p = search_nonascii(p, e);
+ if (!p) {
+ *cr = ENC_CODERANGE_VALID;
+ return e - s;
+ }
+ }
+ }
+ *cr = e < p ? ENC_CODERANGE_BROKEN: ENC_CODERANGE_VALID;
+ return p - s;
+ }
+ else {
+ while (p < e) {
+ int ret = mrb_enc_precise_mbclen(p, e, enc);
+ if (!MBCLEN_CHARFOUND_P(ret)) {
+ *cr = MBCLEN_INVALID_P(ret) ? ENC_CODERANGE_BROKEN: ENC_CODERANGE_UNKNOWN;
+ return p - s;
+ }
+ p += MBCLEN_CHARFOUND_LEN(ret);
+ }
+ *cr = e < p ? ENC_CODERANGE_BROKEN: ENC_CODERANGE_VALID;
+ return p - s;
+ }
+}
+
+mrb_value
+mrb_str_conv_enc_opts(mrb_state *mrb, mrb_value str, mrb_encoding *from, mrb_encoding *to, int ecflags, mrb_value ecopts)
+{
+ mrb_econv_t *ec;
+ mrb_econv_result_t ret;
+ long len;
+ mrb_value newstr;
+ const unsigned char *sp;
+ unsigned char *dp;
+
+ if (!to) return str;
+ if (from == to) return str;
+ if ((mrb_enc_asciicompat(mrb, to) && ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) ||
+ to == mrb_ascii8bit_encoding(mrb)) {
+ if (STR_ENC_GET(mrb, str) != to) {
+ str = mrb_str_dup(mrb, str);
+ mrb_enc_associate(mrb, str, to);
+ }
+ return str;
+ }
+
+ len = RSTRING_LEN(str);
+ newstr = mrb_str_new(mrb, 0, len);
+
+ retry:
+ ec = mrb_econv_open_opts(mrb, from->name, to->name, ecflags, ecopts);
+ if (!ec) return str;
+
+ sp = (unsigned char*)RSTRING_PTR(str);
+ dp = (unsigned char*)RSTRING_PTR(newstr);
+ ret = mrb_econv_convert(mrb, ec, &sp, (unsigned char*)RSTRING_END(str),
+ &dp, (unsigned char*)RSTRING_END(newstr), 0);
+ mrb_econv_close(ec);
+ switch (ret) {
+ case econv_destination_buffer_full:
+ /* destination buffer short */
+ len = len < 2 ? 2 : len * 2;
+ mrb_str_resize(mrb, newstr, len);
+ goto retry;
+
+ case econv_finished:
+ len = dp - (unsigned char*)RSTRING_PTR(newstr);
+ mrb_str_set_len(mrb, newstr, len);
+ mrb_enc_associate(mrb, newstr, to);
+ return newstr;
+
+ default:
+ /* some error, return original */
+ return str;
+ }
+}
+
+mrb_value
+mrb_str_conv_enc(mrb_state *mrb, mrb_value str, mrb_encoding *from, mrb_encoding *to)
+{
+ return mrb_str_conv_enc_opts(mrb, str, from, to, 0, mrb_nil_value());
+}
+#endif //INCLUDE_ENCODING
+
+#ifndef INCLUDE_ENCODING
+#undef SIGN_EXTEND_CHAR
+#if __STDC__
+# define SIGN_EXTEND_CHAR(c) ((signed char)(c))
+#else /* not __STDC__ */
+/* As in Harbison and Steele. */
+# define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128)
+#endif
+#define is_identchar(c) (SIGN_EXTEND_CHAR(c)!=-1&&(ISALNUM(c) || (c) == '_'))
+
+static int
+is_special_global_name(m)
+ const char *m;
+{
+ switch (*m) {
+ case '~': case '*': case '$': case '?': case '!': case '@':
+ case '/': case '\\': case ';': case ',': case '.': case '=':
+ case ':': case '<': case '>': case '\"':
+ case '&': case '`': case '\'': case '+':
+ case '0':
+ ++m;
+ break;
+ case '-':
+ ++m;
+ if (is_identchar(*m)) m += 1;
+ break;
+ default:
+ if (!ISDIGIT(*m)) return 0;
+ do ++m; while (ISDIGIT(*m));
+ }
+ return !*m;
+}
+
+int
+mrb_symname_p(const char *name)
+{
+ const char *m = name;
+ int localid = FALSE;
+
+ if (!m) return FALSE;
+ switch (*m) {
+ case '\0':
+ return FALSE;
+
+ case '$':
+ if (is_special_global_name(++m)) return TRUE;
+ goto id;
+
+ case '@':
+ if (*++m == '@') ++m;
+ goto id;
+
+ case '<':
+ switch (*++m) {
+ case '<': ++m; break;
+ case '=': if (*++m == '>') ++m; break;
+ default: break;
+ }
+ break;
+
+ case '>':
+ switch (*++m) {
+ case '>': case '=': ++m; break;
+ }
+ break;
+
+ case '=':
+ switch (*++m) {
+ case '~': ++m; break;
+ case '=': if (*++m == '=') ++m; break;
+ default: return FALSE;
+ }
+ break;
+
+ case '*':
+ if (*++m == '*') ++m;
+ break;
+
+ case '+': case '-':
+ if (*++m == '@') ++m;
+ break;
+
+ case '|': case '^': case '&': case '/': case '%': case '~': case '`':
+ ++m;
+ break;
+
+ case '[':
+ if (*++m != ']') return FALSE;
+ if (*++m == '=') ++m;
+ break;
+
+ default:
+ localid = !ISUPPER(*m);
+id:
+ if (*m != '_' && !ISALPHA(*m)) return FALSE;
+ while (is_identchar(*m)) m += 1;
+ if (localid) {
+ switch (*m) {
+ case '!': case '?': case '=': ++m;
+ }
+ }
+ break;
+ }
+ return *m ? FALSE : TRUE;
+}
+#endif //INCLUDE_ENCODING
+
+/*
+ * call-seq:
+ * str.dump -> new_str
+ *
+ * Produces a version of <i>str</i> with all nonprinting characters replaced by
+ * <code>\nnn</code> notation and all special characters escaped.
+ */
+mrb_value
+mrb_str_dump(mrb_state *mrb, mrb_value str)
+{
+#ifdef INCLUDE_ENCODING
+ mrb_encoding *enc = mrb_enc_get(mrb, str);
+#endif //INCLUDE_ENCODING
+ long len;
+ const char *p, *pend;
+ char *q, *qend;
+ mrb_value result;
+#ifdef INCLUDE_ENCODING
+ int u8 = (enc == mrb_utf8_encoding(mrb));
+#endif //INCLUDE_ENCODING
+
+ len = 2; /* "" */
+ p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
+ while (p < pend) {
+ unsigned char c = *p++;
+ switch (c) {
+ case '"': case '\\':
+ case '\n': case '\r':
+ case '\t': case '\f':
+ case '\013': case '\010': case '\007': case '\033':
+ len += 2;
+ break;
+
+ case '#':
+ len += IS_EVSTR(p, pend) ? 2 : 1;
+ break;
+
+ default:
+ if (ISPRINT(c)) {
+ len++;
+ }
+ else {
+#ifdef INCLUDE_ENCODING
+ if (u8) { /* \u{NN} */
+ int n = mrb_enc_precise_mbclen(p-1, pend, enc);
+ if (MBCLEN_CHARFOUND_P(n-1)) {
+ unsigned int cc = mrb_enc_mbc_to_codepoint(p-1, pend, enc);
+ while (cc >>= 4) len++;
+ len += 5;
+ p += MBCLEN_CHARFOUND_LEN(n)-1;
+ break;
+ }
+ }
+#endif //INCLUDE_ENCODING
+ len += 4; /* \xNN */
+ }
+ break;
+ }
+ }
+#ifdef INCLUDE_ENCODING
+ if (!mrb_enc_asciicompat(mrb, enc)) {
+ len += 19; /* ".force_encoding('')" */
+ len += strlen(enc->name);
+ }
+#endif //INCLUDE_ENCODING
+
+ result = mrb_str_new5(mrb, str, 0, len);
+ p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
+ q = RSTRING_PTR(result); qend = q + len + 1;
+
+ *q++ = '"';
+ while (p < pend) {
+ unsigned char c = *p++;
+
+ if (c == '"' || c == '\\') {
+ *q++ = '\\';
+ *q++ = c;
+ }
+ else if (c == '#') {
+ if (IS_EVSTR(p, pend)) *q++ = '\\';
+ *q++ = '#';
+ }
+ else if (c == '\n') {
+ *q++ = '\\';
+ *q++ = 'n';
+ }
+ else if (c == '\r') {
+ *q++ = '\\';
+ *q++ = 'r';
+ }
+ else if (c == '\t') {
+ *q++ = '\\';
+ *q++ = 't';
+ }
+ else if (c == '\f') {
+ *q++ = '\\';
+ *q++ = 'f';
+ }
+ else if (c == '\013') {
+ *q++ = '\\';
+ *q++ = 'v';
+ }
+ else if (c == '\010') {
+ *q++ = '\\';
+ *q++ = 'b';
+ }
+ else if (c == '\007') {
+ *q++ = '\\';
+ *q++ = 'a';
+ }
+ else if (c == '\033') {
+ *q++ = '\\';
+ *q++ = 'e';
+ }
+ else if (ISPRINT(c)) {
+ *q++ = c;
+ }
+ else {
+ *q++ = '\\';
+#ifdef INCLUDE_ENCODING
+ if (u8) {
+ int n = mrb_enc_precise_mbclen(p-1, pend, enc) - 1;
+ if (MBCLEN_CHARFOUND_P(n)) {
+ int cc = mrb_enc_mbc_to_codepoint(p-1, pend, enc);
+ p += n;
+ snprintf(q, qend-q, "u{%x}", cc);
+ q += strlen(q);
+ continue;
+ }
+ }
+ snprintf(q, qend-q, "x%02X", c);
+#else
+ sprintf(q, "%03o", c&0xff);
+#endif //INCLUDE_ENCODING
+ q += 3;
+ }
+ }
+ *q++ = '"';
+#ifdef INCLUDE_ENCODING
+ *q = '\0';
+ if (!mrb_enc_asciicompat(mrb, enc)) {
+ snprintf(q, qend-q, ".force_encoding(\"%s\")", enc->name);
+ enc = mrb_ascii8bit_encoding(mrb);
+ }
+ //OBJ_INFECT(result, str);
+ /* result from dump is ASCII */
+ mrb_enc_associate(mrb, result, enc);
+ ENC_CODERANGE_SET(result, ENC_CODERANGE_7BIT);
+#endif //INCLUDE_ENCODING
+ return result;
+}
+
+mrb_value
+mrb_str_cat(mrb_state *mrb, mrb_value str, const char *ptr, long len)
+{
+ if (len < 0) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "negative string size (or size too big)");
+ }
+ if (0/*STR_ASSOC_P(str)*/) {
+ mrb_str_modify(mrb, str);
+ //if (STR_EMBED_P(str)) str_make_independent(mrb, str);
+ mrb_realloc(mrb, RSTRING(str)->buf, RSTRING(str)->len+len+1);
+ memcpy(RSTRING(str)->buf + RSTRING(str)->len, ptr, len);
+ RSTRING(str)->len += len;
+ RSTRING(str)->buf[RSTRING(str)->len] = '\0'; /* sentinel */
+ return str;
+ }
+
+ return str_buf_cat(mrb, str, ptr, len);
+}
+
+mrb_value
+mrb_str_cat2(mrb_state *mrb, mrb_value str, const char *ptr)
+{
+ return mrb_str_cat(mrb, str, ptr, strlen(ptr));
+}
+
+mrb_value
+mrb_str_vcatf(mrb_state *mrb, mrb_value str, const char *fmt, va_list ap)
+{
+ //mrb_printf_buffer f;
+ //mrb_value klass;
+
+ //StringValue(str);
+ mrb_string_value(mrb, &str);
+ mrb_str_modify(mrb, str);
+ mrb_str_resize(mrb, str, (char *)RSTRING_END(str) - RSTRING_PTR(str));
+
+ return str;
+}
+
+mrb_value
+mrb_str_catf(mrb_state *mrb, mrb_value str, const char *format, ...)
+{
+ va_list ap;
+
+ va_start(ap, format);
+ str = mrb_str_vcatf(mrb, str, format, ap);
+ va_end(ap);
+
+ return str;
+}
+
+void
+mrb_lastline_set(mrb_value val)
+{
+ //vm_svar_set(0, val);
+}
+
+mrb_value
+mrb_str_append(mrb_state *mrb, mrb_value str, mrb_value str2)
+{
+#ifdef INCLUDE_ENCODING
+ mrb_encoding *enc;
+ int cr, cr2;
+#endif //INCLUDE_ENCODING
+
+ //StringValue(str2);
+ mrb_string_value(mrb, &str2);
+ if (RSTRING_LEN(str2) > 0 /*&& STR_ASSOC_P(str)*/) {
+ long len = RSTRING_LEN(str)+RSTRING_LEN(str2);
+#ifdef INCLUDE_ENCODING
+ enc = mrb_enc_check(mrb, str, str2);
+ cr = ENC_CODERANGE(str);
+ if ((cr2 = ENC_CODERANGE(str2)) > cr) cr = cr2;
+#endif //INCLUDE_ENCODING
+ mrb_str_modify(mrb, str);
+ REALLOC_N(mrb, RSTRING(str)->buf, char, len+1);
+ memcpy(RSTRING(str)->buf + RSTRING(str)->len,
+ RSTRING_PTR(str2), RSTRING_LEN(str2)+1);
+ RSTRING(str)->len = len;
+ mrb_enc_associate(mrb, str, enc);
+ ENC_CODERANGE_SET(str, cr);
+ //OBJ_INFECT(str, str2);
+ return str;
+ }
+#ifdef INCLUDE_ENCODING
+ return mrb_str_buf_append(mrb, str, str2);
+#else
+ return str;
+#endif //INCLUDE_ENCODING
+}
+
+void
+mrb_str_setter(mrb_state *mrb, mrb_value val, mrb_sym id, mrb_value *var)
+{
+ if (!mrb_nil_p(val) && (mrb_type(val) != MRB_TT_STRING)) {
+ mrb_raise(mrb, E_TYPE_ERROR, "value of %s must be String", mrb_sym2name(mrb, id));
+ }
+ *var = val;
+}
+
+#ifdef INCLUDE_ENCODING
+/*
+ * call-seq:
+ * str.ascii_only? -> true or false
+ *
+ * Returns true for a string which has only ASCII characters.
+ *
+ * "abc".force_encoding("UTF-8").ascii_only? #=> true
+ * "abc\u{6666}".force_encoding("UTF-8").ascii_only? #=> false
+ */
+
+int
+mrb_str_is_ascii_only_p(mrb_state *mrb, mrb_value str)
+{
+ int cr = mrb_enc_str_coderange(mrb, str);
+
+ return cr == ENC_CODERANGE_7BIT ? TRUE : FALSE;
+}
+
+#endif //INCLUDE_ENCODING
+
+#define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */
+int
+mrb_str_buf_cat_escaped_char(mrb_state *mrb, mrb_value result, unsigned int c, int unicode_p)
+{
+ char buf[CHAR_ESC_LEN + 1];
+ int l;
+
+#if SIZEOF_INT > 4
+ c &= 0xffffffff;
+#endif
+ if (unicode_p) {
+ if (c < 0x7F && ISPRINT(c)) {
+ snprintf(buf, CHAR_ESC_LEN, "%c", c);
+ }
+ else if (c < 0x10000) {
+ snprintf(buf, CHAR_ESC_LEN, "\\u%04X", c);
+ }
+ else {
+ snprintf(buf, CHAR_ESC_LEN, "\\u{%X}", c);
+ }
+ }
+ else {
+ if (c < 0x100) {
+ snprintf(buf, CHAR_ESC_LEN, "\\x%02X", c);
+ }
+ else {
+ snprintf(buf, CHAR_ESC_LEN, "\\x{%X}", c);
+ }
+ }
+ l = (int)strlen(buf); /* CHAR_ESC_LEN cannot exceed INT_MAX */
+ mrb_str_buf_cat(mrb, result, buf, l);
+ return l;
+}
+
+/*
+ * call-seq:
+ * str.inspect -> string
+ *
+ * Returns a printable version of _str_, surrounded by quote marks,
+ * with special characters escaped.
+ *
+ * str = "hello"
+ * str[3] = "\b"
+ * str.inspect #=> "\"hel\\bo\""
+ */
+mrb_value
+mrb_str_inspect(mrb_state *mrb, mrb_value str)
+{
+#ifdef INCLUDE_ENCODING
+ mrb_encoding *enc = STR_ENC_GET(mrb, str);
+#endif //INCLUDE_ENCODING
+ const char *p, *pend, *prev;
+ char buf[CHAR_ESC_LEN + 1];
+#ifdef INCLUDE_ENCODING
+ mrb_value result = mrb_str_buf_new(mrb, 0);
+ mrb_encoding *resenc = mrb_default_internal_encoding(mrb);
+ int unicode_p = mrb_enc_unicode_p(enc);
+ int asciicompat = mrb_enc_asciicompat(mrb, enc);
+
+ if (resenc == NULL) resenc = mrb_default_external_encoding(mrb);
+ if (!mrb_enc_asciicompat(mrb, resenc)) resenc = mrb_usascii_encoding(mrb);
+ mrb_enc_associate(mrb, result, resenc);
+ mrb_str_buf_cat(mrb, result, "\"", strlen("\"")); //str_buf_cat2(result, "\"");
+#else
+ mrb_value result = mrb_str_new_cstr(mrb, "\"");//mrb_str_buf_new2("\"");
+#endif //INCLUDE_ENCODING
+
+ p = RSTRING_PTR(str); pend = RSTRING_END(str);
+ prev = p;
+ while (p < pend) {
+ unsigned int c, cc;
+ int n;
+
+#ifdef INCLUDE_ENCODING
+ n = mrb_enc_precise_mbclen(p, pend, enc);
+ if (!MBCLEN_CHARFOUND_P(n)) {
+ if (p > prev) mrb_str_buf_cat(mrb, result, prev, p - prev);
+ n = mrb_enc_mbminlen(enc);
+ if (pend < p + n)
+ n = (int)(pend - p);
+ while (n--) {
+ snprintf(buf, CHAR_ESC_LEN, "\\x%02X", *p & 0377);
+ mrb_str_buf_cat(mrb, result, buf, strlen(buf));
+ prev = ++p;
+ }
+ continue;
+ }
+ n = MBCLEN_CHARFOUND_LEN(n);
+ c = mrb_enc_mbc_to_codepoint(p, pend, enc);
+ p += n;
+ if (c == '"'|| c == '\\' ||
+ (c == '#' &&
+ p < pend &&
+ MBCLEN_CHARFOUND_P(mrb_enc_precise_mbclen(p,pend,enc)) &&
+ (cc = mrb_enc_codepoint(mrb, p, pend, enc),
+ (cc == '$' || cc == '@' || cc == '{')))) {
+ if (p - n > prev) mrb_str_buf_cat(mrb, result, prev, p - n - prev);
+ mrb_str_buf_cat(mrb, result, "\\", strlen("\\")); //str_buf_cat2(result, "\\");
+ if (asciicompat || enc == resenc) {
+ prev = p - n;
+ continue;
+ }
+ }
+#else
+ c = *p++;
+ n = 1;
+ if (c == '"'|| c == '\\' || (c == '#' && IS_EVSTR(p, pend))) {
+ buf[0] = '\\'; buf[1] = c;
+ mrb_str_buf_cat(mrb, result, buf, 2);
+ continue;
+ }
+ if (ISPRINT(c)) {
+ buf[0] = c;
+ mrb_str_buf_cat(mrb, result, buf, 1);
+ continue;
+ }
+#endif //INCLUDE_ENCODING
+ switch (c) {
+ case '\n': cc = 'n'; break;
+ case '\r': cc = 'r'; break;
+ case '\t': cc = 't'; break;
+ case '\f': cc = 'f'; break;
+ case '\013': cc = 'v'; break;
+ case '\010': cc = 'b'; break;
+ case '\007': cc = 'a'; break;
+ case 033: cc = 'e'; break;
+ default: cc = 0; break;
+ }
+ if (cc) {
+ if (p - n > prev) mrb_str_buf_cat(mrb, result, prev, p - n - prev);
+ buf[0] = '\\';
+ buf[1] = (char)cc;
+ mrb_str_buf_cat(mrb, result, buf, 2);
+ prev = p;
+ continue;
+ }
+#ifdef INCLUDE_ENCODING
+ if ((enc == resenc && mrb_enc_isprint(c, enc)) ||
+ (asciicompat && mrb_enc_isascii(c, enc) && ISPRINT(c))) {
+ continue;
+ }
+#endif //INCLUDE_ENCODING
+ else {
+ if (p - n > prev) mrb_str_buf_cat(mrb, result, prev, p - n - prev);
+#ifdef INCLUDE_ENCODING
+ mrb_str_buf_cat_escaped_char(mrb, result, c, unicode_p);
+#else
+ sprintf(buf, "\\%03o", c & 0377);
+ mrb_str_buf_cat(mrb, result, buf, strlen(buf));
+#endif //INCLUDE_ENCODING
+ prev = p;
+ continue;
+ }
+ }
+ if (p > prev) mrb_str_buf_cat(mrb, result, prev, p - prev);
+ mrb_str_buf_cat(mrb, result, "\"", strlen("\"")); //str_buf_cat2(result, "\"");
+
+ //OBJ_INFECT(result, str);
+ return result;
+}
+
+#ifdef INCLUDE_ENCODING
+int
+sym_printable(mrb_state *mrb, const char *s, const char *send, mrb_encoding *enc)
+{
+ while (s < send) {
+ int n;
+ int c = mrb_enc_codepoint_len(mrb, s, send, &n, enc);
+
+ if (!mrb_enc_isprint(c, enc)) return FALSE;
+ s += n;
+ }
+ return TRUE;
+}
+#endif //INCLUDE_ENCODING
+
+/* ---------------------------*/
+void
+mrb_init_string(mrb_state *mrb)
+{
+ struct RClass *s;
+
+ s = mrb->string_class = mrb_define_class(mrb, "String", mrb->object_class);
+ MRB_SET_INSTANCE_TT(s, MRB_TT_STRING);
+ mrb_include_module(mrb, s, mrb_class_get(mrb, "Comparable"));
+
+ mrb_define_method(mrb, s, "+", mrb_str_plus_m, ARGS_REQ(1)); /* 15.2.10.5.2 */
+ mrb_define_method(mrb, s, "bytesize", mrb_str_bytesize, ARGS_NONE());
+ mrb_define_method(mrb, s, "size", mrb_str_size, ARGS_NONE()); /* 15.2.10.5.33 */
+ mrb_define_method(mrb, s, "length", mrb_str_size, ARGS_NONE()); /* 15.2.10.5.26 */
+ mrb_define_method(mrb, s, "*", mrb_str_times, ARGS_REQ(1)); /* 15.2.10.5.1 */
+ mrb_define_method(mrb, s, "<=>", mrb_str_cmp_m, ARGS_REQ(1)); /* 15.2.10.5.3 */
+ mrb_define_method(mrb, s, "==", mrb_str_equal_m, ARGS_REQ(1)); /* 15.2.10.5.4 */
+ mrb_define_method(mrb, s, "=~", mrb_str_match, ARGS_REQ(1)); /* 15.2.10.5.5 */
+ mrb_define_method(mrb, s, "[]", mrb_str_aref_m, ARGS_ANY()); /* 15.2.10.5.6 */
+ mrb_define_method(mrb, s, "capitalize", mrb_str_capitalize, ARGS_NONE()); /* 15.2.10.5.7 */
+ mrb_define_method(mrb, s, "capitalize!", mrb_str_capitalize_bang, ARGS_REQ(1)); /* 15.2.10.5.8 */
+ mrb_define_method(mrb, s, "chomp", mrb_str_chomp, ARGS_ANY()); /* 15.2.10.5.9 */
+ mrb_define_method(mrb, s, "chomp!", mrb_str_chomp_bang, ARGS_ANY()); /* 15.2.10.5.10 */
+ mrb_define_method(mrb, s, "chop", mrb_str_chop, ARGS_REQ(1)); /* 15.2.10.5.11 */
+ mrb_define_method(mrb, s, "chop!", mrb_str_chop_bang, ARGS_REQ(1)); /* 15.2.10.5.12 */
+ mrb_define_method(mrb, s, "downcase", mrb_str_downcase, ARGS_NONE()); /* 15.2.10.5.13 */
+ mrb_define_method(mrb, s, "downcase!", mrb_str_downcase_bang, ARGS_NONE()); /* 15.2.10.5.14 */
+ mrb_define_method(mrb, s, "each_line", mrb_str_each_line, ARGS_REQ(1)); /* 15.2.10.5.15 */
+ mrb_define_method(mrb, s, "empty?", mrb_str_empty, ARGS_NONE()); /* 15.2.10.5.16 */
+ mrb_define_method(mrb, s, "eql?", mrb_str_eql, ARGS_REQ(1)); /* 15.2.10.5.17 */
+#ifdef INCLUDE_REGEXP
+ mrb_define_method(mrb, s, "gsub", mrb_str_gsub, ARGS_REQ(1)); /* 15.2.10.5.18 */
+ mrb_define_method(mrb, s, "gsub!", mrb_str_gsub_bang, ARGS_REQ(1)); /* 15.2.10.5.19 */
+#endif
+ mrb_define_method(mrb, s, "hash", mrb_str_hash_m, ARGS_REQ(1)); /* 15.2.10.5.20 */
+ mrb_define_method(mrb, s, "include?", mrb_str_include, ARGS_REQ(1)); /* 15.2.10.5.21 */
+ mrb_define_method(mrb, s, "index", mrb_str_index_m, ARGS_ANY()); /* 15.2.10.5.22 */
+ mrb_define_method(mrb, s, "initialize", mrb_str_init, ARGS_REQ(1)); /* 15.2.10.5.23 */
+ mrb_define_method(mrb, s, "initialize_copy", mrb_str_replace, ARGS_REQ(1)); /* 15.2.10.5.24 */
+ mrb_define_method(mrb, s, "intern", mrb_str_intern, ARGS_NONE()); /* 15.2.10.5.25 */
+#ifdef INCLUDE_REGEXP
+ mrb_define_method(mrb, s, "match", mrb_str_match_m, ARGS_REQ(1)); /* 15.2.10.5.27 */
+#endif
+ mrb_define_method(mrb, s, "replace", mrb_str_replace, ARGS_REQ(1)); /* 15.2.10.5.28 */
+ mrb_define_method(mrb, s, "reverse", mrb_str_reverse, ARGS_NONE()); /* 15.2.10.5.29 */
+ mrb_define_method(mrb, s, "reverse!", mrb_str_reverse_bang, ARGS_NONE()); /* 15.2.10.5.30 */
+ mrb_define_method(mrb, s, "rindex", mrb_str_rindex_m, ARGS_ANY()); /* 15.2.10.5.31 */
+#ifdef INCLUDE_REGEXP
+ mrb_define_method(mrb, s, "scan", mrb_str_scan, ARGS_REQ(1)); /* 15.2.10.5.32 */
+#endif
+ mrb_define_method(mrb, s, "slice", mrb_str_aref_m, ARGS_ANY()); /* 15.2.10.5.34 */
+ mrb_define_method(mrb, s, "split", mrb_str_split_m, ARGS_ANY()); /* 15.2.10.5.35 */
+#ifdef INCLUDE_REGEXP
+ mrb_define_method(mrb, s, "sub", mrb_str_sub, ARGS_REQ(1)); /* 15.2.10.5.36 */
+ mrb_define_method(mrb, s, "sub!", mrb_str_sub_bang, ARGS_REQ(1)); /* 15.2.10.5.37 */
+#endif
+ mrb_define_method(mrb, s, "to_i", mrb_str_to_i, ARGS_ANY()); /* 15.2.10.5.38 */
+ mrb_define_method(mrb, s, "to_f", mrb_str_to_f, ARGS_NONE()); /* 15.2.10.5.39 */
+ mrb_define_method(mrb, s, "to_s", mrb_str_to_s, ARGS_NONE()); /* 15.2.10.5.40 */
+ mrb_define_method(mrb, s, "to_str", mrb_str_to_s, ARGS_NONE()); /* 15.2.10.5.40 */
+ mrb_define_method(mrb, s, "to_sym", mrb_str_intern, ARGS_NONE()); /* 15.2.10.5.41 */
+ mrb_define_method(mrb, s, "upcase", mrb_str_upcase, ARGS_REQ(1)); /* 15.2.10.5.42 */
+ mrb_define_method(mrb, s, "upcase!", mrb_str_upcase_bang, ARGS_REQ(1)); /* 15.2.10.5.43 */
+#ifdef INCLUDE_ENCODING
+ mrb_define_method(mrb, s, "encoding", mrb_obj_encoding, ARGS_NONE()); /* 15.2.10.5.44(x) */
+ mrb_define_method(mrb, s, "force_encoding", mrb_str_force_encoding, ARGS_REQ(1)); /* 15.2.10.5.45(x) */
+#endif
+ mrb_define_method(mrb, s, "inspect", mrb_str_inspect, ARGS_NONE()); /* 15.2.10.5.46(x) */
+}
diff --git a/src/struct.c b/src/struct.c
new file mode 100644
index 000000000..9af6e2ee4
--- /dev/null
+++ b/src/struct.c
@@ -0,0 +1,824 @@
+/**********************************************************************
+
+ struct.c -
+
+ $Author: marcandre $
+ created at: Tue Mar 22 18:44:30 JST 1995
+
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
+
+**********************************************************************/
+
+#include "mruby.h"
+#include <string.h>
+#include "error.h"
+#include "mruby/struct.h"
+#include "mruby/array.h"
+#include <stdarg.h>
+//#include "defines.h"
+
+#ifdef INCLUDE_REGEXP
+#include "encoding.h"
+#endif
+mrb_sym rb_frame_this_func(mrb_state *mrb);
+mrb_sym mrb_frame_callee(mrb_state *mrb);
+mrb_value mrb_exec_recursive_paired(mrb_state *mrb, mrb_value (*func) (mrb_state *, mrb_value, mrb_value, int),
+ mrb_value obj, mrb_value paired_obj, void* arg);
+
+#include "mruby/numeric.h"
+#include "mruby/hash.h"
+#include "mruby/string.h"
+#include "mruby/class.h"
+#include "variable.h"
+#include "mruby/range.h"
+#include "error.h"
+//#include "defines.h"
+#define mrb_long2int(n) ((int)(n))
+
+
+static mrb_value struct_alloc(mrb_state *mrb, mrb_value);
+
+static struct RClass *
+struct_class(mrb_state *mrb)
+{
+ return mrb_class_get(mrb, "Struct");
+}
+
+static inline mrb_value
+struct_ivar_get(mrb_state *mrb, mrb_value c, mrb_sym id)
+{
+ struct RClass* kclass;
+ struct RClass* sclass = struct_class(mrb);
+
+ mrb_value ans;
+ for (;;) {
+ //if (mrb_ivar_defined(c, id))
+ // return mrb_iv_get(mrb, c, id);
+ ans = mrb_iv_get(mrb, c, id);
+ if (!mrb_nil_p(ans)) return ans;
+ kclass = RCLASS_SUPER(c);
+ if (kclass == 0 || kclass == sclass)
+ return mrb_nil_value();
+ c = mrb_obj_value(kclass);
+ }
+}
+
+mrb_value
+mrb_struct_iv_get(mrb_state *mrb, mrb_value c, const char *name)
+{
+ return struct_ivar_get(mrb, c, mrb_intern(mrb, name));
+}
+
+mrb_value
+mrb_struct_s_members(mrb_state *mrb, mrb_value klass)
+{
+ mrb_value members = struct_ivar_get(mrb, klass, mrb_intern(mrb, "__members__"));
+
+ if (mrb_nil_p(members)) {
+ mrb_raise(mrb, E_TYPE_ERROR, "uninitialized struct");
+ }
+ if (mrb_type(members) != MRB_TT_ARRAY) {
+ mrb_raise(mrb, E_TYPE_ERROR, "corrupted struct");
+ }
+ return members;
+}
+
+mrb_value
+mrb_struct_members(mrb_state *mrb, mrb_value s)
+{
+ mrb_value members = mrb_struct_s_members(mrb, mrb_obj_value(mrb_obj_class(mrb, s)));
+ if (mrb_type(s) == MRB_TT_STRUCT) {
+ if (RSTRUCT_LEN(s) != RARRAY_LEN(members)) {
+ mrb_raise(mrb, E_TYPE_ERROR, "struct size differs (%ld required %ld given)",
+ RARRAY_LEN(members), RSTRUCT_LEN(s));
+ }
+ }
+ return members;
+}
+
+static mrb_value
+mrb_struct_s_members_m(mrb_state *mrb, mrb_value klass)
+{
+ mrb_value members, ary;
+ mrb_value *p, *pend;
+
+ members = mrb_struct_s_members(mrb, klass);
+ ary = mrb_ary_new_capa(mrb, RARRAY_LEN(members));//mrb_ary_new2(RARRAY_LEN(members));
+ p = RARRAY_PTR(members); pend = p + RARRAY_LEN(members);
+ while (p < pend) {
+ mrb_ary_push(mrb, ary, *p);
+ p++;
+ }
+
+ return ary;
+}
+
+/* 15.2.18.4.6 */
+/*
+ * call-seq:
+ * struct.members -> array
+ *
+ * Returns an array of strings representing the names of the instance
+ * variables.
+ *
+ * Customer = Struct.new(:name, :address, :zip)
+ * joe = Customer.new("Joe Smith", "123 Maple, Anytown NC", 12345)
+ * joe.members #=> [:name, :address, :zip]
+ */
+
+static mrb_value
+mrb_struct_members_m(mrb_state *mrb, mrb_value obj)
+{
+ return mrb_struct_s_members_m(mrb, mrb_obj_value(mrb_obj_class(mrb, obj)));
+}
+
+mrb_value
+mrb_struct_getmember(mrb_state *mrb, mrb_value obj, mrb_sym id)
+{
+ mrb_value members, slot, *ptr, *ptr_members;
+ long i, len;
+
+ ptr = RSTRUCT_PTR(obj);
+ members = mrb_struct_members(mrb, obj);
+ ptr_members = RARRAY_PTR(members);
+ slot = mrb_str_new_cstr(mrb, mrb_sym2name(mrb, id));
+ len = RARRAY_LEN(members);
+ for (i=0; i<len; i++) {
+ if (mrb_obj_equal(mrb, ptr_members[i], slot)) {
+ return ptr[i];
+ }
+ }
+ mrb_name_error(mrb, id, "%s is not struct member", mrb_sym2name(mrb, id));
+ return mrb_nil_value(); /* not reached */
+}
+
+static mrb_value
+mrb_struct_ref(mrb_state *mrb, mrb_value obj)
+{
+ //return mrb_struct_getmember(mrb, obj, rb_frame_this_func(mrb));
+ return mrb_nil_value();
+}
+
+static mrb_value mrb_struct_ref0(mrb_state* mrb, mrb_value obj) {return RSTRUCT_PTR(obj)[0];}
+static mrb_value mrb_struct_ref1(mrb_state* mrb, mrb_value obj) {return RSTRUCT_PTR(obj)[1];}
+static mrb_value mrb_struct_ref2(mrb_state* mrb, mrb_value obj) {return RSTRUCT_PTR(obj)[2];}
+static mrb_value mrb_struct_ref3(mrb_state* mrb, mrb_value obj) {return RSTRUCT_PTR(obj)[3];}
+static mrb_value mrb_struct_ref4(mrb_state* mrb, mrb_value obj) {return RSTRUCT_PTR(obj)[4];}
+static mrb_value mrb_struct_ref5(mrb_state* mrb, mrb_value obj) {return RSTRUCT_PTR(obj)[5];}
+static mrb_value mrb_struct_ref6(mrb_state* mrb, mrb_value obj) {return RSTRUCT_PTR(obj)[6];}
+static mrb_value mrb_struct_ref7(mrb_state* mrb, mrb_value obj) {return RSTRUCT_PTR(obj)[7];}
+static mrb_value mrb_struct_ref8(mrb_state* mrb, mrb_value obj) {return RSTRUCT_PTR(obj)[8];}
+static mrb_value mrb_struct_ref9(mrb_state* mrb, mrb_value obj) {return RSTRUCT_PTR(obj)[9];}
+
+#define numberof(array) (int)(sizeof(array) / sizeof((array)[0]))
+#define N_REF_FUNC numberof(ref_func)
+
+static mrb_value (*const ref_func[])(mrb_state*, mrb_value) = {
+ mrb_struct_ref0,
+ mrb_struct_ref1,
+ mrb_struct_ref2,
+ mrb_struct_ref3,
+ mrb_struct_ref4,
+ mrb_struct_ref5,
+ mrb_struct_ref6,
+ mrb_struct_ref7,
+ mrb_struct_ref8,
+ mrb_struct_ref9,
+};
+
+static void
+mrb_struct_modify(mrb_value s)
+{
+ //if (OBJ_FROZEN(s)) mrb_error_frozen("Struct");
+}
+
+mrb_sym
+mrb_id_attrset(mrb_sym id)
+{
+ //id &= ~ID_SCOPE_MASK;
+ //id |= ID_ATTRSET;
+ return id;
+}
+
+static mrb_value
+mrb_struct_set(mrb_state *mrb, mrb_value obj, mrb_value val)
+{
+ mrb_value members, slot, *ptr, *ptr_members;
+ long i, len;
+
+ members = mrb_struct_members(mrb, obj);
+ ptr_members = RARRAY_PTR(members);
+ len = RARRAY_LEN(members);
+ mrb_struct_modify(obj);
+ ptr = RSTRUCT_PTR(obj);
+ for (i=0; i<len; i++) {
+ slot = ptr_members[i];
+ if (mrb_id_attrset(SYM2ID(slot)) == 0/*rb_frame_this_func(mrb)*/) {
+ return ptr[i] = val;
+ }
+ }
+ mrb_name_error(mrb, 0/*rb_frame_this_func(mrb)*/, "`%s' is not a struct member",
+ mrb_sym2name(mrb, 0/*rb_frame_this_func(mrb)*/));
+ return mrb_nil_value(); /* not reached */
+}
+
+#define is_notop_id(id) (id)//((id)>tLAST_TOKEN)
+#define is_local_id(id) (is_notop_id(id))//&&((id)&ID_SCOPE_MASK)==ID_LOCAL)
+int
+mrb_is_local_id(mrb_sym id)
+{
+ return is_local_id(id);
+}
+
+#define is_const_id(id) (is_notop_id(id))//&&((id)&ID_SCOPE_MASK)==ID_CONST)
+int
+mrb_is_const_id(mrb_sym id)
+{
+ return is_const_id(id);
+}
+
+static mrb_value
+make_struct(mrb_state *mrb, mrb_value name, mrb_value members, struct RClass * klass)
+{
+ mrb_value nstr, *ptr_members;
+ mrb_sym id;
+ long i, len;
+ struct RClass *c;
+
+ //OBJ_FREEZE(members);
+ if (mrb_nil_p(name)) {
+ c = mrb_class_new(mrb, klass);
+ //mrb_make_metaclass(nstr, RBASIC(klass)->c);
+ //mrb_class_inherited(klass, nstr);
+ }
+ else {
+ /* old style: should we warn? */
+ name = mrb_str_to_str(mrb, name);
+ id = mrb_to_id(mrb, name);
+ if (!mrb_is_const_id(id)) {
+ //mrb_name_error(id, "identifier %s needs to be constant", StringValuePtr(name));
+ mrb_name_error(mrb, id, "identifier %s needs to be constant", mrb_string_value_ptr(mrb, name));
+ }
+ if (mrb_const_defined_at(mrb, klass, id)) {
+ //mrb_warn("redefining constant Struct::%s", StringValuePtr(name));
+ mrb_warn("redefining constant Struct::%s", mrb_string_value_ptr(mrb, name));
+ //?rb_mod_remove_const(klass, mrb_sym2name(mrb, id));
+ }
+ c = mrb_define_class_under(mrb, klass, RSTRING_PTR(name), klass);
+ }
+ MRB_SET_INSTANCE_TT(c, MRB_TT_STRUCT);
+ nstr = mrb_obj_value(c);
+ mrb_iv_set(mrb, nstr, mrb_intern(mrb, "__members__"), members);
+
+ mrb_define_class_method(mrb, c, "new", mrb_class_new_instance_m, ARGS_ANY());
+ mrb_define_class_method(mrb, c, "[]", mrb_class_new_instance_m, ARGS_ANY());
+ mrb_define_class_method(mrb, c, "members", mrb_struct_s_members_m, ARGS_NONE());
+ //RSTRUCT(nstr)->basic.c->super = c->c;
+ ptr_members = RARRAY_PTR(members);
+ len = RARRAY_LEN(members);
+ for (i=0; i< len; i++) {
+ mrb_sym id = SYM2ID(ptr_members[i]);
+ if (mrb_is_local_id(id) || mrb_is_const_id(id)) {
+ if (i < N_REF_FUNC) {
+ mrb_define_method_id(mrb, c, id, (mrb_func_t)ref_func[i], 0);
+ }
+ else {
+ mrb_define_method_id(mrb, c, id, mrb_struct_ref, 0);
+ }
+ mrb_define_method_id(mrb, c, mrb_id_attrset(id), (mrb_func_t)mrb_struct_set, 1);
+ }
+ }
+
+ return nstr;
+}
+
+mrb_value
+mrb_struct_define(mrb_state *mrb, const char *name, ...)
+{
+ va_list ar;
+ mrb_value nm, ary;
+ char *mem;
+
+ if (!name) nm = mrb_nil_value();
+ else nm = mrb_str_new2(mrb, name);
+ ary = mrb_ary_new(mrb);
+
+ va_start(ar, name);
+ while ((mem = va_arg(ar, char*)) != 0) {
+ mrb_sym slot = mrb_intern(mrb, mem);
+ mrb_ary_push(mrb, ary, mrb_str_new_cstr(mrb, mrb_sym2name(mrb, slot)));
+ }
+ va_end(ar);
+
+ return make_struct(mrb, nm, ary, struct_class(mrb));
+}
+
+/* 15.2.18.3.1 */
+/*
+ * call-seq:
+ * Struct.new( [aString] [, aSym]+> ) -> StructClass
+ * StructClass.new(arg, ...) -> obj
+ * StructClass[arg, ...] -> obj
+ *
+ * Creates a new class, named by <i>aString</i>, containing accessor
+ * methods for the given symbols. If the name <i>aString</i> is
+ * omitted, an anonymous structure class will be created. Otherwise,
+ * the name of this struct will appear as a constant in class
+ * <code>Struct</code>, so it must be unique for all
+ * <code>Struct</code>s in the system and should start with a capital
+ * letter. Assigning a structure class to a constant effectively gives
+ * the class the name of the constant.
+ *
+ * <code>Struct::new</code> returns a new <code>Class</code> object,
+ * which can then be used to create specific instances of the new
+ * structure. The number of actual parameters must be
+ * less than or equal to the number of attributes defined for this
+ * class; unset parameters default to <code>nil</code>. Passing too many
+ * parameters will raise an <code>ArgumentError</code>.
+ *
+ * The remaining methods listed in this section (class and instance)
+ * are defined for this generated class.
+ *
+ * # Create a structure with a name in Struct
+ * Struct.new("Customer", :name, :address) #=> Struct::Customer
+ * Struct::Customer.new("Dave", "123 Main") #=> #<struct Struct::Customer name="Dave", address="123 Main">
+ *
+ * # Create a structure named by its constant
+ * Customer = Struct.new(:name, :address) #=> Customer
+ * Customer.new("Dave", "123 Main") #=> #<struct Customer name="Dave", address="123 Main">
+ */
+static mrb_value
+mrb_struct_s_def(mrb_state *mrb, mrb_value klass)
+{
+ mrb_value name, rest;
+ mrb_value *pargv;
+ int argcnt;
+ long i;
+ mrb_value b, st;
+ mrb_sym id;
+ mrb_value *argv;
+ int argc;
+
+ name = mrb_nil_value();
+ rest = mrb_nil_value();
+ mrb_get_args(mrb, "&*", &b, &argv, &argc);
+ if (argc > 0) name = argv[0];
+ if (argc > 1) rest = argv[1];
+ //mrb_scan_args(argc, argv, "1*", &name, &rest);
+ if (mrb_type(rest) == MRB_TT_ARRAY) {
+ if (!mrb_nil_p(name) && SYMBOL_P(name)) {
+ /* 1stArgument:symbol -> name=nil rest=argv[0]-[n] */
+ mrb_ary_unshift(mrb, rest, name);
+ name = mrb_nil_value();
+ }
+ for (i=0; i<RARRAY_LEN(rest); i++) {
+ id = mrb_to_id(mrb, RARRAY_PTR(rest)[i]);
+ RARRAY_PTR(rest)[i] = mrb_str_new_cstr(mrb, mrb_sym2name(mrb, id));
+ }
+ }
+ else {
+ pargv = &argv[1];
+ argcnt = argc-1;
+ if (!mrb_nil_p(name) && SYMBOL_P(name)) {
+ /* 1stArgument:symbol -> name=nil rest=argv[0]-[n] */
+ //mrb_ary_unshift(mrb, rest, name);
+ name = mrb_nil_value();
+ pargv = &argv[0];
+ argcnt++;
+ }
+ rest = mrb_ary_new_from_values(mrb, pargv, argcnt);
+ }
+ st = make_struct(mrb, name, rest, struct_class(mrb));
+ if (!mrb_nil_p(b)) {
+ mrb_funcall(mrb, b, "call", 1, &st);
+ }
+
+ return st;
+}
+
+static long
+num_members(mrb_state *mrb, struct RClass *klass)
+{
+ mrb_value members;
+ members = struct_ivar_get(mrb, mrb_obj_value(klass), mrb_intern(mrb, "__members__"));
+ if (mrb_type(members) != MRB_TT_ARRAY) {
+ mrb_raise(mrb, E_TYPE_ERROR, "broken members");
+ }
+ return RARRAY_LEN(members);
+}
+
+/* 15.2.18.4.8 */
+/*
+ */
+static mrb_value
+mrb_struct_initialize_withArg(mrb_state *mrb, int argc, mrb_value *argv, mrb_value self)
+{
+ struct RClass *klass = mrb_obj_class(mrb, self);
+ long n;
+ struct RStruct *st;
+
+ mrb_struct_modify(self);
+ n = num_members(mrb, klass);
+ if (n < argc) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "struct size differs");
+ }
+ st = RSTRUCT(self);
+ st->ptr = malloc(sizeof(mrb_value)*argc);
+ st->len = n;
+ memcpy(st->ptr, argv, sizeof(mrb_value)*argc);
+ //if (n > argc) {
+ // mrb_mem_clear(RSTRUCT_PTR(self)+argc, n-argc);
+ //}
+ return self;
+}
+
+static mrb_value
+mrb_struct_initialize_m(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value self)
+{
+ mrb_value *argv;
+ int argc;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+ return mrb_struct_initialize_withArg(mrb, argc, argv, self);
+}
+
+mrb_value
+mrb_struct_initialize(mrb_state *mrb, mrb_value self, mrb_value values)
+{
+ return mrb_struct_initialize_withArg(mrb, RARRAY_LEN/*INT*/(values), RARRAY_PTR(values), self);
+}
+
+mrb_value
+mrb_struct_alloc(mrb_state *mrb, mrb_value klass, mrb_value values)
+{
+ return mrb_class_new_instance(mrb, RARRAY_LEN(values), RARRAY_PTR(values), mrb_class(mrb, klass));
+}
+
+mrb_value
+mrb_struct_new(mrb_state *mrb, struct RClass *klass, ...)
+{
+ mrb_value tmpargs[N_REF_FUNC], *mem = tmpargs;
+ int size, i;
+ va_list args;
+
+ size = mrb_long2int(num_members(mrb, klass));
+ if (size > numberof(tmpargs)) {
+ tmpargs[0] = mrb_ary_tmp_new(mrb, size);
+ mem = RARRAY_PTR(tmpargs[0]);
+ }
+ va_start(args, klass);
+ for (i=0; i<size; i++) {
+ mem[i] = va_arg(args, mrb_value);
+ }
+ va_end(args);
+
+ return mrb_class_new_instance(mrb, size, mem, klass);
+}
+
+static mrb_value
+inspect_struct(mrb_state *mrb, mrb_value s, mrb_value dummy, int recur)
+{
+ const char *cn = mrb_class_name(mrb, mrb_obj_class(mrb, s));
+ mrb_value members, str = mrb_str_new2(mrb, "#<struct ");
+ mrb_value *ptr, *ptr_members;
+ long i, len;
+
+ if (cn) {
+ mrb_str_append(mrb, str, mrb_str_new_cstr(mrb, cn));
+ }
+ if (recur) {
+ return mrb_str_cat2(mrb, str, ":...>");
+ }
+
+ members = mrb_struct_members(mrb, s);
+ ptr_members = RARRAY_PTR(members);
+ ptr = RSTRUCT_PTR(s);
+ len = RSTRUCT_LEN(s);
+ for (i=0; i<len; i++) {
+ mrb_value slot;
+ mrb_sym id;
+
+ if (i > 0) {
+ mrb_str_cat2(mrb, str, ", ");
+ }
+ else if (cn) {
+ mrb_str_cat2(mrb, str, " ");
+ }
+ slot = ptr_members[i];
+ id = SYM2ID(slot);
+ if (mrb_is_local_id(id) || mrb_is_const_id(id)) {
+ //mrb_str_append(str, mrb_id2str(id));
+ mrb_str_append(mrb, str, mrb_str_new_cstr(mrb, mrb_sym2name(mrb, id)));
+ }
+ else {
+ mrb_str_append(mrb, str, mrb_inspect(mrb, slot));
+ }
+ mrb_str_cat2(mrb, str, "=");
+ mrb_str_append(mrb, str, mrb_inspect(mrb, ptr[i]));
+ }
+ mrb_str_cat2(mrb, str, ">");
+ //OBJ_INFECT(str, s);
+
+ return str;
+}
+
+/*
+ * call-seq:
+ * struct.to_s -> string
+ * struct.inspect -> string
+ *
+ * Describe the contents of this struct in a string.
+ */
+static mrb_value
+mrb_struct_inspect(mrb_state *mrb, mrb_value s)
+{
+ return inspect_struct(mrb, s, s, 0);
+}
+
+/* 15.2.18.4.9 */
+/* :nodoc: */
+mrb_value
+mrb_struct_init_copy(mrb_state *mrb, mrb_value copy)
+{
+ mrb_value s;
+ mrb_get_args(mrb, "o", &s);
+
+ if (mrb_obj_equal(mrb, copy, s)) return copy;
+ //mrb_check_frozen(copy);
+ if (!mrb_obj_is_instance_of(mrb, s, mrb_obj_class(mrb, copy))) {
+ mrb_raise(mrb, E_TYPE_ERROR, "wrong argument class");
+ }
+ if (RSTRUCT_LEN(copy) != RSTRUCT_LEN(s)) {
+ mrb_raise(mrb, E_TYPE_ERROR, "struct size mismatch");
+ }
+ memcpy(RSTRUCT_PTR(copy), RSTRUCT_PTR(s), sizeof(mrb_value)*RSTRUCT_LEN(copy));
+
+ return copy;
+}
+
+static mrb_value
+mrb_struct_aref_id(mrb_state *mrb, mrb_value s, mrb_sym id)
+{
+ mrb_value *ptr, members, *ptr_members;
+ long i, len;
+
+ ptr = RSTRUCT_PTR(s);
+ members = mrb_struct_members(mrb, s);
+ ptr_members = RARRAY_PTR(members);
+ len = RARRAY_LEN(members);
+ for (i=0; i<len; i++) {
+ if (SYM2ID(ptr_members[i]) == id) {
+ return ptr[i];
+ }
+ }
+ mrb_name_error(mrb, id, "no member '%s' in struct", mrb_sym2name(mrb, id));
+ return mrb_nil_value(); /* not reached */
+}
+
+/* 15.2.18.4.2 */
+/*
+ * call-seq:
+ * struct[symbol] -> anObject
+ * struct[fixnum] -> anObject
+ *
+ * Attribute Reference---Returns the value of the instance variable
+ * named by <i>symbol</i>, or indexed (0..length-1) by
+ * <i>fixnum</i>. Will raise <code>NameError</code> if the named
+ * variable does not exist, or <code>IndexError</code> if the index is
+ * out of range.
+ *
+ * Customer = Struct.new(:name, :address, :zip)
+ * joe = Customer.new("Joe Smith", "123 Maple, Anytown NC", 12345)
+ *
+ * joe["name"] #=> "Joe Smith"
+ * joe[:name] #=> "Joe Smith"
+ * joe[0] #=> "Joe Smith"
+ */
+mrb_value
+mrb_struct_aref_n(mrb_state *mrb, mrb_value s, mrb_value idx)
+{
+ long i;
+
+ if (mrb_type(idx) == MRB_TT_STRING || mrb_type(idx) == MRB_TT_SYMBOL) {
+ return mrb_struct_aref_id(mrb, s, mrb_to_id(mrb, idx));
+ }
+
+ i = mrb_fixnum(idx);
+ if (i < 0) i = RSTRUCT_LEN(s) + i;
+ if (i < 0)
+ mrb_raise(mrb, E_INDEX_ERROR, "offset %ld too small for struct(size:%ld)",
+ i, RSTRUCT_LEN(s));
+ if (RSTRUCT_LEN(s) <= i)
+ mrb_raise(mrb, E_INDEX_ERROR, "offset %ld too large for struct(size:%ld)",
+ i, RSTRUCT_LEN(s));
+ return RSTRUCT_PTR(s)[i];
+}
+
+mrb_value
+mrb_struct_aref(mrb_state *mrb, mrb_value s)
+{
+ mrb_value idx;
+
+ mrb_get_args(mrb, "o", &idx);
+ return mrb_struct_aref_n(mrb, s, idx);
+}
+
+static mrb_value
+mrb_struct_aset_id(mrb_state *mrb, mrb_value s, mrb_sym id, mrb_value val)
+{
+ mrb_value members, *ptr, *ptr_members;
+ long i, len;
+
+ members = mrb_struct_members(mrb, s);
+ len = RARRAY_LEN(members);
+ mrb_struct_modify(s);
+ if (RSTRUCT_LEN(s) != len) {
+ mrb_raise(mrb, E_TYPE_ERROR, "struct size differs (%ld required %ld given)",
+ len, RSTRUCT_LEN(s));
+ }
+ ptr = RSTRUCT_PTR(s);
+ ptr_members = RARRAY_PTR(members);
+ for (i=0; i<len; i++) {
+ if (SYM2ID(ptr_members[i]) == id) {
+ ptr[i] = val;
+ return val;
+ }
+ }
+ mrb_name_error(mrb, id, "no member '%s' in struct", mrb_sym2name(mrb, id));
+ return val; /* not reach */
+}
+
+/* 15.2.18.4.3 */
+/*
+ * call-seq:
+ * struct[symbol] = obj -> obj
+ * struct[fixnum] = obj -> obj
+ *
+ * Attribute Assignment---Assigns to the instance variable named by
+ * <i>symbol</i> or <i>fixnum</i> the value <i>obj</i> and
+ * returns it. Will raise a <code>NameError</code> if the named
+ * variable does not exist, or an <code>IndexError</code> if the index
+ * is out of range.
+ *
+ * Customer = Struct.new(:name, :address, :zip)
+ * joe = Customer.new("Joe Smith", "123 Maple, Anytown NC", 12345)
+ *
+ * joe["name"] = "Luke"
+ * joe[:zip] = "90210"
+ *
+ * joe.name #=> "Luke"
+ * joe.zip #=> "90210"
+ */
+
+mrb_value
+mrb_struct_aset(mrb_state *mrb, mrb_value s)
+{
+ long i;
+ mrb_value idx;
+ mrb_value val;
+ mrb_get_args(mrb, "oo", &idx, &val);
+
+ if (mrb_type(idx) == MRB_TT_STRING || mrb_type(idx) == MRB_TT_SYMBOL) {
+ return mrb_struct_aset_id(mrb, s, mrb_to_id(mrb, idx), val);
+ }
+
+ i = mrb_fixnum(idx);
+ if (i < 0) i = RSTRUCT_LEN(s) + i;
+ if (i < 0) {
+ mrb_raise(mrb, E_INDEX_ERROR, "offset %ld too small for struct(size:%ld)",
+ i, RSTRUCT_LEN(s));
+ }
+ if (RSTRUCT_LEN(s) <= i) {
+ mrb_raise(mrb, E_INDEX_ERROR, "offset %ld too large for struct(size:%ld)",
+ i, RSTRUCT_LEN(s));
+ }
+ mrb_struct_modify(s);
+ return RSTRUCT_PTR(s)[i] = val;
+}
+
+static mrb_value
+recursive_equal(mrb_state *mrb, mrb_value s, mrb_value s2, int recur)
+{
+ mrb_value *ptr, *ptr2;
+ long i, len;
+
+ if (recur) return mrb_true_value(); /* Subtle! */
+ ptr = RSTRUCT_PTR(s);
+ ptr2 = RSTRUCT_PTR(s2);
+ len = RSTRUCT_LEN(s);
+ for (i=0; i<len; i++) {
+ if (!mrb_equal(mrb, ptr[i], ptr2[i])) return mrb_false_value();
+ }
+ return mrb_true_value();
+}
+
+/* 15.2.18.4.1 */
+/*
+ * call-seq:
+ * struct == other_struct -> true or false
+ *
+ * Equality---Returns <code>true</code> if <i>other_struct</i> is
+ * equal to this one: they must be of the same class as generated by
+ * <code>Struct::new</code>, and the values of all instance variables
+ * must be equal (according to <code>Object#==</code>).
+ *
+ * Customer = Struct.new(:name, :address, :zip)
+ * joe = Customer.new("Joe Smith", "123 Maple, Anytown NC", 12345)
+ * joejr = Customer.new("Joe Smith", "123 Maple, Anytown NC", 12345)
+ * jane = Customer.new("Jane Doe", "456 Elm, Anytown NC", 12345)
+ * joe == joejr #=> true
+ * joe == jane #=> false
+ */
+
+static mrb_value
+mrb_struct_equal(mrb_state *mrb, mrb_value s)
+{
+ mrb_value s2;
+
+ mrb_get_args(mrb, "o", &s2);
+ if (mrb_obj_equal(mrb, s, s2)) return mrb_true_value();
+ if (mrb_type(s2) != MRB_TT_STRUCT) return mrb_false_value();
+ if (mrb_obj_class(mrb, s) != mrb_obj_class(mrb, s2)) return mrb_false_value();
+ if (RSTRUCT_LEN(s) != RSTRUCT_LEN(s2)) {
+ mrb_bug("inconsistent struct"); /* should never happen */
+ }
+
+ return mrb_exec_recursive_paired(mrb, recursive_equal, s, s2, (void*)0);
+}
+
+static mrb_value
+recursive_eql(mrb_state *mrb, mrb_value s, mrb_value s2, int recur)
+{
+ mrb_value *ptr, *ptr2;
+ long i, len;
+
+ if (recur) return mrb_true_value(); /* Subtle! */
+ ptr = RSTRUCT_PTR(s);
+ ptr2 = RSTRUCT_PTR(s2);
+ len = RSTRUCT_LEN(s);
+ for (i=0; i<len; i++) {
+ if (!mrb_eql(mrb, ptr[i], ptr2[i])) return mrb_false_value();
+ }
+ return mrb_true_value();
+}
+
+/* 15.2.18.4.12(x) */
+/*
+ * code-seq:
+ * struct.eql?(other) -> true or false
+ *
+ * Two structures are equal if they are the same object, or if all their
+ * fields are equal (using <code>eql?</code>).
+ */
+static mrb_value
+mrb_struct_eql(mrb_state *mrb, mrb_value s)
+{
+ mrb_value s2;
+
+ mrb_get_args(mrb, "o", &s2);
+ if (mrb_obj_equal(mrb, s, s2)) return mrb_true_value();
+ if (mrb_type(s2) != MRB_TT_STRUCT) return mrb_false_value();
+ if (mrb_obj_class(mrb, s) != mrb_obj_class(mrb, s2)) return mrb_false_value();
+ if (RSTRUCT_LEN(s) != RSTRUCT_LEN(s2)) {
+ mrb_bug("inconsistent struct"); /* should never happen */
+ }
+
+ return mrb_exec_recursive_paired(mrb, recursive_eql, s, s2, (void*)0);
+}
+
+/*
+ * A <code>Struct</code> is a convenient way to bundle a number of
+ * attributes together, using accessor methods, without having to write
+ * an explicit class.
+ *
+ * The <code>Struct</code> class is a generator of specific classes,
+ * each one of which is defined to hold a set of variables and their
+ * accessors. In these examples, we'll call the generated class
+ * ``<i>Customer</i>Class,'' and we'll show an example instance of that
+ * class as ``<i>Customer</i>Inst.''
+ *
+ * In the descriptions that follow, the parameter <i>symbol</i> refers
+ * to a symbol, which is either a quoted string or a
+ * <code>Symbol</code> (such as <code>:name</code>).
+ */
+void
+mrb_init_struct(mrb_state *mrb)
+{
+ struct RClass *st;
+ st = mrb_define_class(mrb, "Struct", mrb->object_class);
+ //mrb_include_module(mrb_cStruct, rb_mEnumerable);
+
+ //mrb_undef_alloc_func(mrb_cStruct);
+ mrb_define_class_method(mrb, st, "new", mrb_struct_s_def, ARGS_ANY()); /* 15.2.18.3.1 */
+
+ mrb_define_method(mrb, st, "==", mrb_struct_equal, ARGS_REQ(1)); /* 15.2.18.4.1 */
+ mrb_define_method(mrb, st, "[]", mrb_struct_aref, ARGS_REQ(1)); /* 15.2.18.4.2 */
+ mrb_define_method(mrb, st, "[]=", mrb_struct_aset, ARGS_REQ(2)); /* 15.2.18.4.3 */
+ mrb_define_method(mrb, st, "members", mrb_struct_members_m, ARGS_NONE()); /* 15.2.18.4.6 */
+ mrb_define_method(mrb, st, "initialize", mrb_struct_initialize_m,ARGS_ANY()); /* 15.2.18.4.8 */
+ mrb_define_method(mrb, st, "initialize_copy", mrb_struct_init_copy, ARGS_REQ(1)); /* 15.2.18.4.9 */
+ mrb_define_method(mrb, st, "inspect", mrb_struct_inspect, ARGS_NONE()); /* 15.2.18.4.10(x) */
+ mrb_define_alias(mrb, st, "to_s", "inspect"); /* 15.2.18.4.11(x) */
+ mrb_define_method(mrb, st, "eql?", mrb_struct_eql, ARGS_REQ(1)); /* 15.2.18.4.12(x) */
+
+}
diff --git a/src/symbol.c b/src/symbol.c
new file mode 100644
index 000000000..ad3400834
--- /dev/null
+++ b/src/symbol.c
@@ -0,0 +1,255 @@
+#include "mruby.h"
+#include "ritehash.h"
+#include <string.h>
+
+#include <stdarg.h>
+#include <string.h>
+#include "mruby/string.h"
+#include <ctype.h>
+#include "mruby/class.h"
+#include "variable.h"
+#include <stdio.h>
+
+#ifdef INCLUDE_REGEXP
+#include "re.h"
+#include "regex.h"
+#include "st.h"
+#endif
+
+/* ------------------------------------------------------ */
+KHASH_MAP_INIT_INT(s2n, const char*);
+KHASH_MAP_INIT_STR(n2s, mrb_sym);
+/* ------------------------------------------------------ */
+mrb_sym
+mrb_intern(mrb_state *mrb, const char *name)
+{
+ khash_t(n2s) *h = mrb->name2sym;
+ khash_t(s2n) *rh = mrb->sym2name;
+ khiter_t k;
+ int r;
+ size_t len;
+ char *p;
+ mrb_sym sym;
+
+ k = kh_get(n2s, h, name);
+ if (k != kh_end(h))
+ return kh_value(h, k);
+
+ sym = ++mrb->symidx;
+ len = strlen(name);
+ p = mrb_malloc(mrb, len+1);
+ memcpy(p, name, len);
+ p[len] = 0;
+ k = kh_put(n2s, h, p, &r);
+ kh_value(h, k) = sym;
+
+ k = kh_put(s2n, rh, sym, &r);
+ kh_value(rh, k) = p;
+
+ return sym;
+}
+
+const char*
+mrb_sym2name(mrb_state *mrb, mrb_sym sym)
+{
+ khash_t(s2n) *h = mrb->sym2name;
+ khiter_t k;
+
+ k = kh_get(s2n, h, sym);
+ if (k == kh_end(h)) {
+ return NULL; /* missing */
+ }
+ return kh_value(h, k);
+}
+
+void
+mrb_free_symtbls(mrb_state *mrb)
+{
+ khash_t(s2n) *h = mrb->sym2name;
+ khiter_t k;
+
+ for (k = kh_begin(h); k != kh_end(h); ++k)
+ if (kh_exist(h, k)) mrb_free(mrb, (char*)kh_value(h, k));
+ kh_destroy(s2n,mrb->sym2name);
+ kh_destroy(n2s,mrb->name2sym);
+}
+
+void
+mrb_init_symtbl(mrb_state *mrb)
+{
+ mrb->name2sym = kh_init(n2s, mrb);
+ mrb->sym2name = kh_init(s2n, mrb);
+}
+
+/**********************************************************************
+ * Document-class: Symbol
+ *
+ * <code>Symbol</code> objects represent names and some strings
+ * inside the Ruby
+ * interpreter. They are generated using the <code>:name</code> and
+ * <code>:"string"</code> literals
+ * syntax, and by the various <code>to_sym</code> methods. The same
+ * <code>Symbol</code> object will be created for a given name or string
+ * for the duration of a program's execution, regardless of the context
+ * or meaning of that name. Thus if <code>Fred</code> is a constant in
+ * one context, a method in another, and a class in a third, the
+ * <code>Symbol</code> <code>:Fred</code> will be the same object in
+ * all three contexts.
+ *
+ * module One
+ * class Fred
+ * end
+ * $f1 = :Fred
+ * end
+ * module Two
+ * Fred = 1
+ * $f2 = :Fred
+ * end
+ * def Fred()
+ * end
+ * $f3 = :Fred
+ * $f1.object_id #=> 2514190
+ * $f2.object_id #=> 2514190
+ * $f3.object_id #=> 2514190
+ *
+ */
+
+
+/* 15.2.11.3.1 */
+/*
+ * call-seq:
+ * sym == obj -> true or false
+ *
+ * Equality---If <i>sym</i> and <i>obj</i> are exactly the same
+ * symbol, returns <code>true</code>.
+ */
+
+static mrb_value
+sym_equal(mrb_state *mrb, mrb_value sym1)
+{
+ mrb_value sym2;
+
+ mrb_get_args(mrb, "o", &sym2);
+ if (mrb_obj_equal(mrb, sym1, sym2)) return mrb_true_value();
+ return mrb_false_value();
+}
+
+/* 15.2.11.3.2 */
+/* 15.2.11.3.3 */
+/*
+ * call-seq:
+ * sym.id2name -> string
+ * sym.to_s -> string
+ *
+ * Returns the name or string corresponding to <i>sym</i>.
+ *
+ * :fred.id2name #=> "fred"
+ */
+mrb_value
+mrb_sym_to_s(mrb_state *mrb, mrb_value sym)
+{
+ mrb_sym id = SYM2ID(sym);
+
+#ifdef INCLUDE_REGEXP
+ //return str_new3(mrb_cString, mrb_id2str(id));
+ return str_new3(mrb, mrb_obj_class(mrb, sym), mrb_str_new_cstr(mrb, mrb_sym2name(mrb, id)));
+#else
+ return mrb_str_new_cstr(mrb, mrb_sym2name(mrb, id)); //mrb_str_new2(mrb_id2name(SYM2ID(sym)));
+#endif
+
+}
+
+/* 15.2.11.3.4 */
+/*
+ * call-seq:
+ * sym.to_sym -> sym
+ * sym.intern -> sym
+ *
+ * In general, <code>to_sym</code> returns the <code>Symbol</code> corresponding
+ * to an object. As <i>sym</i> is already a symbol, <code>self</code> is returned
+ * in this case.
+ */
+
+static mrb_value
+sym_to_sym(mrb_state *mrb, mrb_value sym)
+{
+ return sym;
+}
+
+/* 15.2.11.3.5(x) */
+/*
+ * call-seq:
+ * sym.inspect -> string
+ *
+ * Returns the representation of <i>sym</i> as a symbol literal.
+ *
+ * :fred.inspect #=> ":fred"
+ */
+
+static mrb_value
+sym_inspect(mrb_state *mrb, mrb_value sym)
+{
+#ifdef INCLUDE_ENCODING
+ #define STR_ENC_GET(mrb, str) mrb_enc_from_index(mrb, ENCODING_GET(mrb, str))
+ mrb_value str;
+ mrb_sym id = SYM2ID(sym);
+ mrb_encoding *enc;
+ const char *ptr;
+ long len;
+ char *dest;
+ mrb_encoding *resenc = mrb_default_internal_encoding(mrb);
+
+ if (resenc == NULL) resenc = mrb_default_external_encoding(mrb);
+ sym = mrb_str_new_cstr(mrb, mrb_sym2name(mrb, id));//mrb_id2str(id);
+ enc = STR_ENC_GET(mrb, sym);
+ ptr = RSTRING_PTR(sym);
+ len = RSTRING_LEN(sym);
+ if ((resenc != enc && !mrb_str_is_ascii_only_p(mrb, sym)) || len != (long)strlen(ptr) ||
+ !mrb_enc_symname_p(ptr, enc) || !sym_printable(mrb, ptr, ptr + len, enc)) {
+ str = mrb_str_inspect(mrb, sym);
+ len = RSTRING_LEN(str);
+ mrb_str_resize(mrb, str, len + 1);
+ dest = RSTRING_PTR(str);
+ memmove(dest + 1, dest, len);
+ dest[0] = ':';
+ }
+ else {
+ char *dest;
+ str = mrb_enc_str_new(mrb, 0, len + 1, enc);
+ dest = RSTRING_PTR(str);
+ dest[0] = ':';
+ memcpy(dest + 1, ptr, len);
+ }
+ return str;
+#else
+ mrb_value str;
+ const char *name;
+ mrb_sym id = SYM2ID(sym);
+
+ name = mrb_sym2name(mrb, id); //mrb_id2name(id);
+ str = mrb_str_new(mrb, 0, strlen(name)+1);
+ RSTRING(str)->buf[0] = ':';
+ strcpy(RSTRING(str)->buf+1, name);
+ if (!mrb_symname_p(name)) {
+ str = mrb_str_dump(mrb, str);
+ strncpy(RSTRING(str)->buf, ":\"", 2);
+ }
+ return str;
+#endif
+}
+
+
+void
+mrb_init_symbols(mrb_state *mrb)
+{
+ struct RClass *sym;
+
+ sym = mrb->symbol_class = mrb_define_class(mrb, "Symbol", mrb->object_class);
+
+ mrb_define_method(mrb, sym, "===", sym_equal, ARGS_REQ(1)); /* 15.2.11.3.1 */
+ mrb_define_method(mrb, sym, "id2name", mrb_sym_to_s, ARGS_NONE()); /* 15.2.11.3.2 */
+ mrb_define_method(mrb, sym, "to_s", mrb_sym_to_s, ARGS_NONE()); /* 15.2.11.3.3 */
+ mrb_define_method(mrb, sym, "to_sym", sym_to_sym, ARGS_NONE()); /* 15.2.11.3.4 */
+
+ mrb_define_method(mrb, sym, "inspect", sym_inspect, ARGS_NONE()); /* 15.2.11.3.5(x) */
+}
diff --git a/src/transcode.c b/src/transcode.c
new file mode 100644
index 000000000..cb2f86731
--- /dev/null
+++ b/src/transcode.c
@@ -0,0 +1,4366 @@
+/**********************************************************************
+
+ transcode.c -
+
+ $Author: usa $
+ created at: Tue Oct 30 16:10:22 JST 2007
+
+ Copyright (C) 2007 Martin Duerst
+
+**********************************************************************/
+
+#include "mruby.h"
+#ifdef INCLUDE_ENCODING
+#include "encoding.h"
+#include <sys/types.h> /* for ssize_t */
+#include "transcode_data.h"
+#include <ctype.h>
+#include "st.h"
+#include "variable.h"
+#include <string.h>
+#include "mruby/string.h"
+#include "mruby/array.h"
+#include "mruby/hash.h"
+#include "error.h"
+#include "mruby/numeric.h"
+//#include "mio.h"
+#include <stdio.h>
+
+
+#define TYPE(o) (o).tt//mrb_type(o)
+
+#define E_CONVERTERNOTFOUND_ERROR (mrb_class_obj_get(mrb, "ConverterNotFoundError"))
+#define E_INVALIDBYTESEQUENCE_ERROR (mrb_class_obj_get(mrb, "InvalidByteSequenceError"))
+#define E_UNDEFINEDCONVERSION_ERROR (mrb_class_obj_get(mrb, "UndefinedConversionError"))
+
+/* mrb_value mrb_cEncoding = rb_define_class("Encoding", rb_cObject); */
+mrb_value rb_eUndefinedConversionError;
+mrb_value mrb_eInvalidByteSequenceError;
+mrb_value rb_eConverterNotFoundError;
+
+mrb_value mrb_cEncodingConverter;
+
+static mrb_value sym_invalid, sym_undef, sym_replace, sym_fallback;
+static mrb_value sym_xml, sym_text, sym_attr;
+static mrb_value sym_universal_newline;
+static mrb_value sym_crlf_newline;
+static mrb_value sym_cr_newline;
+static mrb_value sym_partial_input;
+
+static mrb_value sym_invalid_byte_sequence;
+static mrb_value sym_undefined_conversion;
+static mrb_value sym_destination_buffer_full;
+static mrb_value sym_source_buffer_empty;
+static mrb_value sym_finished;
+static mrb_value sym_after_output;
+static mrb_value sym_incomplete_input;
+
+static unsigned char *
+allocate_converted_string(mrb_state *mrb,
+ const char *sname, const char *dname,
+ const unsigned char *str, size_t len,
+ unsigned char *caller_dst_buf, size_t caller_dst_bufsize,
+ size_t *dst_len_ptr);
+
+/* dynamic structure, one per conversion (similar to iconv_t) */
+/* may carry conversion state (e.g. for iso-2022-jp) */
+typedef struct mrb_transcoding {
+ const mrb_transcoder *transcoder;
+
+ int flags;
+
+ int resume_position;
+ unsigned int next_table;
+ mrb_value next_info;
+ unsigned char next_byte;
+ unsigned int output_index;
+
+ ssize_t recognized_len; /* already interpreted */
+ ssize_t readagain_len; /* not yet interpreted */
+ union {
+ unsigned char ary[8]; /* max_input <= sizeof(ary) */
+ unsigned char *ptr; /* length: max_input */
+ } readbuf; /* recognized_len + readagain_len used */
+
+ ssize_t writebuf_off;
+ ssize_t writebuf_len;
+ union {
+ unsigned char ary[8]; /* max_output <= sizeof(ary) */
+ unsigned char *ptr; /* length: max_output */
+ } writebuf;
+
+ union mrb_transcoding_state_t { /* opaque data for stateful encoding */
+ void *ptr;
+ char ary[sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*)];
+ double dummy_for_alignment;
+ } state;
+} mrb_transcoding;
+#define TRANSCODING_READBUF(tc) \
+ ((tc)->transcoder->max_input <= (int)sizeof((tc)->readbuf.ary) ? \
+ (tc)->readbuf.ary : \
+ (tc)->readbuf.ptr)
+#define TRANSCODING_WRITEBUF(tc) \
+ ((tc)->transcoder->max_output <= (int)sizeof((tc)->writebuf.ary) ? \
+ (tc)->writebuf.ary : \
+ (tc)->writebuf.ptr)
+#define TRANSCODING_WRITEBUF_SIZE(tc) \
+ ((tc)->transcoder->max_output <= (int)sizeof((tc)->writebuf.ary) ? \
+ sizeof((tc)->writebuf.ary) : \
+ (size_t)(tc)->transcoder->max_output)
+#define TRANSCODING_STATE_EMBED_MAX ((int)sizeof(union mrb_transcoding_state_t))
+#define TRANSCODING_STATE(tc) \
+ ((tc)->transcoder->state_size <= (int)sizeof((tc)->state) ? \
+ (tc)->state.ary : \
+ (tc)->state.ptr)
+
+typedef struct {
+ struct mrb_transcoding *tc;
+ unsigned char *out_buf_start;
+ unsigned char *out_data_start;
+ unsigned char *out_data_end;
+ unsigned char *out_buf_end;
+ mrb_econv_result_t last_result;
+} mrb_econv_elem_t;
+
+struct mrb_econv_t {
+ int flags;
+ const char *source_encoding_name;
+ const char *destination_encoding_name;
+
+ int started;
+
+ const unsigned char *replacement_str;
+ size_t replacement_len;
+ const char *replacement_enc;
+ int replacement_allocated;
+
+ unsigned char *in_buf_start;
+ unsigned char *in_data_start;
+ unsigned char *in_data_end;
+ unsigned char *in_buf_end;
+ mrb_econv_elem_t *elems;
+ int num_allocated;
+ int num_trans;
+ int num_finished;
+ struct mrb_transcoding *last_tc;
+
+ /* last error */
+ struct {
+ mrb_econv_result_t result;
+ struct mrb_transcoding *error_tc;
+ const char *source_encoding;
+ const char *destination_encoding;
+ const unsigned char *error_bytes_start;
+ size_t error_bytes_len;
+ size_t readagain_len;
+ } last_error;
+
+ /* The following fields are only for Encoding::Converter.
+ * mrb_econv_open set them NULL. */
+ mrb_encoding *source_encoding;
+ mrb_encoding *destination_encoding;
+};
+
+/*
+ * Dispatch data and logic
+ */
+
+#define DECORATOR_P(sname, dname) (*(sname) == '\0')
+
+typedef struct {
+ const char *sname;
+ const char *dname;
+ const char *lib; /* null means means no need to load a library */
+ const mrb_transcoder *transcoder;
+} transcoder_entry_t;
+
+static st_table *transcoder_table;
+
+static transcoder_entry_t *
+make_transcoder_entry(const char *sname, const char *dname)
+{
+ st_data_t val;
+ st_table *table2;
+
+ if (!st_lookup(transcoder_table, (st_data_t)sname, &val)) {
+ val = (st_data_t)st_init_strcasetable();
+ st_add_direct(transcoder_table, (st_data_t)sname, val);
+ }
+ table2 = (st_table *)val;
+ if (!st_lookup(table2, (st_data_t)dname, &val)) {
+ transcoder_entry_t *entry = malloc(sizeof(transcoder_entry_t));
+ entry->sname = sname;
+ entry->dname = dname;
+ entry->lib = NULL;
+ entry->transcoder = NULL;
+ val = (st_data_t)entry;
+ st_add_direct(table2, (st_data_t)dname, val);
+ }
+ return (transcoder_entry_t *)val;
+}
+
+static transcoder_entry_t *
+get_transcoder_entry(const char *sname, const char *dname)
+{
+ st_data_t val;
+ st_table *table2;
+
+ if (!st_lookup(transcoder_table, (st_data_t)sname, &val)) {
+ return NULL;
+ }
+ table2 = (st_table *)val;
+ if (!st_lookup(table2, (st_data_t)dname, &val)) {
+ return NULL;
+ }
+ return (transcoder_entry_t *)val;
+}
+
+void
+mrb_register_transcoder(mrb_state *mrb, const mrb_transcoder *tr)
+{
+ const char *const sname = tr->src_encoding;
+ const char *const dname = tr->dst_encoding;
+
+ transcoder_entry_t *entry;
+
+ entry = make_transcoder_entry(sname, dname);
+ if (entry->transcoder) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "transcoder from %s to %s has been already registered",
+ sname, dname);
+ }
+
+ entry->transcoder = tr;
+}
+
+static void
+declare_transcoder(const char *sname, const char *dname, const char *lib)
+{
+ transcoder_entry_t *entry;
+
+ entry = make_transcoder_entry(sname, dname);
+ entry->lib = lib;
+}
+
+#define MAX_TRANSCODER_LIBNAME_LEN 64
+static const char transcoder_lib_prefix[] = "enc/trans/";
+
+void
+mrb_declare_transcoder(mrb_state *mrb, const char *enc1, const char *enc2, const char *lib)
+{
+ if (!lib || strlen(lib) > MAX_TRANSCODER_LIBNAME_LEN) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid library name - %s",
+ lib ? lib : "(null)");
+ }
+ declare_transcoder(enc1, enc2, lib);
+}
+
+#define encoding_equal(enc1, enc2) (STRCASECMP(enc1, enc2) == 0)
+
+typedef struct search_path_queue_tag {
+ struct search_path_queue_tag *next;
+ const char *enc;
+} search_path_queue_t;
+
+typedef struct {
+ st_table *visited;
+ search_path_queue_t *queue;
+ search_path_queue_t **queue_last_ptr;
+ const char *base_enc;
+} search_path_bfs_t;
+
+static int
+transcode_search_path_i(st_data_t key, st_data_t val, st_data_t arg)
+{
+ const char *dname = (const char *)key;
+ search_path_bfs_t *bfs = (search_path_bfs_t *)arg;
+ search_path_queue_t *q;
+
+ if (st_lookup(bfs->visited, (st_data_t)dname, &val)) {
+ return ST_CONTINUE;
+ }
+
+ q = malloc(sizeof(search_path_queue_t));
+ q->enc = dname;
+ q->next = NULL;
+ *bfs->queue_last_ptr = q;
+ bfs->queue_last_ptr = &q->next;
+
+ st_add_direct(bfs->visited, (st_data_t)dname, (st_data_t)bfs->base_enc);
+ return ST_CONTINUE;
+}
+
+static int
+transcode_search_path(mrb_state *mrb, const char *sname, const char *dname,
+ void (*callback)(mrb_state *mrb, const char *sname, const char *dname, int depth, void *arg),
+ void *arg)
+{
+ search_path_bfs_t bfs;
+ search_path_queue_t *q;
+ st_data_t val;
+ st_table *table2;
+ int found;
+ int pathlen = -1;
+
+ if (encoding_equal(sname, dname))
+ return -1;
+
+ q = malloc(sizeof(search_path_queue_t));//ALLOC(search_path_queue_t);
+ q->enc = sname;
+ q->next = NULL;
+ bfs.queue_last_ptr = &q->next;
+ bfs.queue = q;
+
+ bfs.visited = st_init_strcasetable();
+ st_add_direct(bfs.visited, (st_data_t)sname, (st_data_t)NULL);
+
+ while (bfs.queue) {
+ q = bfs.queue;
+ bfs.queue = q->next;
+ if (!bfs.queue)
+ bfs.queue_last_ptr = &bfs.queue;
+
+ if (!st_lookup(transcoder_table, (st_data_t)q->enc, &val)) {
+ xfree(q);
+ continue;
+ }
+ table2 = (st_table *)val;
+
+ if (st_lookup(table2, (st_data_t)dname, &val)) {
+ st_add_direct(bfs.visited, (st_data_t)dname, (st_data_t)q->enc);
+ xfree(q);
+ found = 1;
+ goto cleanup;
+ }
+
+ bfs.base_enc = q->enc;
+ st_foreach(table2, transcode_search_path_i, (st_data_t)&bfs);
+ bfs.base_enc = NULL;
+
+ xfree(q);
+ }
+ found = 0;
+
+ cleanup:
+ while (bfs.queue) {
+ q = bfs.queue;
+ bfs.queue = q->next;
+ xfree(q);
+ }
+
+ if (found) {
+ const char *enc = dname;
+ int depth;
+ pathlen = 0;
+ while (1) {
+ st_lookup(bfs.visited, (st_data_t)enc, &val);
+ if (!val)
+ break;
+ pathlen++;
+ enc = (const char *)val;
+ }
+ depth = pathlen;
+ enc = dname;
+ while (1) {
+ st_lookup(bfs.visited, (st_data_t)enc, &val);
+ if (!val)
+ break;
+ callback(mrb, (const char *)val, enc, --depth, arg);
+ enc = (const char *)val;
+ }
+ }
+
+ st_free_table(bfs.visited);
+
+ return pathlen; /* is -1 if not found */
+}
+
+int
+mrb_require(mrb_state *mrb, const char *fname)
+{
+ //mrb_value fn = mrb_str_new2(mrb, fname);
+ //OBJ_FREEZE(fn);
+ //return mrb_require_safe(fn, mrb_safe_level());
+ mrb_str_new2(mrb, fname);
+ return 1/* OK */;
+}
+
+static const mrb_transcoder *
+load_transcoder_entry(mrb_state *mrb, transcoder_entry_t *entry)
+{
+ if (entry->transcoder)
+ return entry->transcoder;
+
+ if (entry->lib) {
+ const char *lib = entry->lib;
+ size_t len = strlen(lib);
+ char path[sizeof(transcoder_lib_prefix) + MAX_TRANSCODER_LIBNAME_LEN];
+
+ entry->lib = NULL;
+
+ if (len > MAX_TRANSCODER_LIBNAME_LEN)
+ return NULL;
+ memcpy(path, transcoder_lib_prefix, sizeof(transcoder_lib_prefix) - 1);
+ memcpy(path + sizeof(transcoder_lib_prefix) - 1, lib, len + 1);
+ if (!mrb_require(mrb, path))
+ return NULL;
+ }
+
+ if (entry->transcoder)
+ return entry->transcoder;
+
+ return NULL;
+}
+
+static const char*
+get_replacement_character(const char *encname, size_t *len_ret, const char **repl_encname_ptr)
+{
+ if (encoding_equal(encname, "UTF-8")) {
+ *len_ret = 3;
+ *repl_encname_ptr = "UTF-8";
+ return "\xEF\xBF\xBD";
+ }
+ else {
+ *len_ret = 1;
+ *repl_encname_ptr = "US-ASCII";
+ return "?";
+ }
+}
+
+/*
+ * Transcoding engine logic
+ */
+
+static const unsigned char *
+transcode_char_start(mrb_transcoding *tc,
+ const unsigned char *in_start,
+ const unsigned char *inchar_start,
+ const unsigned char *in_p,
+ size_t *char_len_ptr)
+{
+ const unsigned char *ptr;
+ if (inchar_start - in_start < tc->recognized_len) {
+ memcpy(TRANSCODING_READBUF(tc) + tc->recognized_len,
+ inchar_start, in_p - inchar_start);
+ ptr = TRANSCODING_READBUF(tc);
+ }
+ else {
+ ptr = inchar_start - tc->recognized_len;
+ }
+ *char_len_ptr = tc->recognized_len + (in_p - inchar_start);
+ return ptr;
+}
+
+static mrb_econv_result_t
+transcode_restartable0(mrb_state *mrb,
+ const unsigned char **in_pos, unsigned char **out_pos,
+ const unsigned char *in_stop, unsigned char *out_stop,
+ mrb_transcoding *tc,
+ const int opt)
+{
+ const mrb_transcoder *tr = tc->transcoder;
+ int unitlen = tr->input_unit_length;
+ ssize_t readagain_len = 0;
+
+ const unsigned char *inchar_start;
+ const unsigned char *in_p;
+
+ unsigned char *out_p;
+
+ in_p = inchar_start = *in_pos;
+
+ out_p = *out_pos;
+
+#define SUSPEND(ret, num) \
+ do { \
+ tc->resume_position = (num); \
+ if (0 < in_p - inchar_start) \
+ memmove(TRANSCODING_READBUF(tc)+tc->recognized_len, \
+ inchar_start, in_p - inchar_start); \
+ *in_pos = in_p; \
+ *out_pos = out_p; \
+ tc->recognized_len += in_p - inchar_start; \
+ if (readagain_len) { \
+ tc->recognized_len -= readagain_len; \
+ tc->readagain_len = readagain_len; \
+ } \
+ return ret; \
+ resume_label ## num:; \
+ } while (0)
+#define SUSPEND_OBUF(num) \
+ do { \
+ while (out_stop - out_p < 1) { SUSPEND(econv_destination_buffer_full, num); } \
+ } while (0)
+
+#define SUSPEND_AFTER_OUTPUT(num) \
+ if ((opt & ECONV_AFTER_OUTPUT) && *out_pos != out_p) { \
+ SUSPEND(econv_after_output, num); \
+ }
+
+#define next_table (tc->next_table)
+#define next_info (tc->next_info)
+#define next_byte (tc->next_byte)
+#define writebuf_len (tc->writebuf_len)
+#define writebuf_off (tc->writebuf_off)
+
+ switch (tc->resume_position) {
+ case 0: break;
+ case 1: goto resume_label1;
+ case 2: goto resume_label2;
+ case 3: goto resume_label3;
+ case 4: goto resume_label4;
+ case 5: goto resume_label5;
+ case 6: goto resume_label6;
+ case 7: goto resume_label7;
+ case 8: goto resume_label8;
+ case 9: goto resume_label9;
+ case 10: goto resume_label10;
+ case 11: goto resume_label11;
+ case 12: goto resume_label12;
+ case 13: goto resume_label13;
+ case 14: goto resume_label14;
+ case 15: goto resume_label15;
+ case 16: goto resume_label16;
+ case 17: goto resume_label17;
+ case 18: goto resume_label18;
+ case 19: goto resume_label19;
+ case 20: goto resume_label20;
+ case 21: goto resume_label21;
+ case 22: goto resume_label22;
+ case 23: goto resume_label23;
+ case 24: goto resume_label24;
+ case 25: goto resume_label25;
+ case 26: goto resume_label26;
+ case 27: goto resume_label27;
+ case 28: goto resume_label28;
+ case 29: goto resume_label29;
+ case 30: goto resume_label30;
+ case 31: goto resume_label31;
+ case 32: goto resume_label32;
+ case 33: goto resume_label33;
+ case 34: goto resume_label34;
+ }
+
+ while (1) {
+ inchar_start = in_p;
+ tc->recognized_len = 0;
+ next_table = tr->conv_tree_start;
+
+ SUSPEND_AFTER_OUTPUT(24);
+
+ if (in_stop <= in_p) {
+ if (!(opt & ECONV_PARTIAL_INPUT))
+ break;
+ SUSPEND(econv_source_buffer_empty, 7);
+ continue;
+ }
+
+#define BYTE_ADDR(index) (tr->byte_array + (index))
+#define WORD_ADDR(index) (tr->word_array + INFO2WORDINDEX(index))
+#define BL_BASE BYTE_ADDR(BYTE_LOOKUP_BASE(WORD_ADDR(next_table)))
+#define BL_INFO WORD_ADDR(BYTE_LOOKUP_INFO(WORD_ADDR(next_table)))
+#define BL_MIN_BYTE (BL_BASE[0])
+#define BL_MAX_BYTE (BL_BASE[1])
+#define BL_OFFSET(byte) (BL_BASE[2+(byte)-BL_MIN_BYTE])
+#define BL_ACTION(byte) (BL_INFO[BL_OFFSET((byte))])
+
+ next_byte = (unsigned char)*in_p++;
+ follow_byte:
+ if (next_byte < BL_MIN_BYTE || BL_MAX_BYTE < next_byte)
+ next_info = mrb_fixnum_value(INVALID);
+ else {
+ next_info = mrb_fixnum_value(BL_ACTION(next_byte));
+ }
+ follow_info:
+ switch (mrb_fixnum(next_info) & 0x1F) {
+ case NOMAP:
+ {
+ const unsigned char *p = inchar_start;
+ writebuf_off = 0;
+ while (p < in_p) {
+ TRANSCODING_WRITEBUF(tc)[writebuf_off++] = (unsigned char)*p++;
+ }
+ writebuf_len = writebuf_off;
+ writebuf_off = 0;
+ while (writebuf_off < writebuf_len) {
+ SUSPEND_OBUF(3);
+ *out_p++ = TRANSCODING_WRITEBUF(tc)[writebuf_off++];
+ }
+ }
+ continue;
+ case 0x00: case 0x04: case 0x08: case 0x0C:
+ case 0x10: case 0x14: case 0x18: case 0x1C:
+ SUSPEND_AFTER_OUTPUT(25);
+ while (in_p >= in_stop) {
+ if (!(opt & ECONV_PARTIAL_INPUT))
+ goto incomplete;
+ SUSPEND(econv_source_buffer_empty, 5);
+ }
+ next_byte = (unsigned char)*in_p++;
+ next_table = (unsigned int)mrb_fixnum(next_info);
+ goto follow_byte;
+ case ZERObt: /* drop input */
+ continue;
+ case ONEbt:
+ SUSPEND_OBUF(9); *out_p++ = getBT1(mrb_fixnum(next_info));
+ continue;
+ case TWObt:
+ SUSPEND_OBUF(10); *out_p++ = getBT1(mrb_fixnum(next_info));
+ SUSPEND_OBUF(21); *out_p++ = getBT2(mrb_fixnum(next_info));
+ continue;
+ case THREEbt:
+ SUSPEND_OBUF(11); *out_p++ = getBT1(mrb_fixnum(next_info));
+ SUSPEND_OBUF(15); *out_p++ = getBT2(mrb_fixnum(next_info));
+ SUSPEND_OBUF(16); *out_p++ = getBT3(mrb_fixnum(next_info));
+ continue;
+ case FOURbt:
+ SUSPEND_OBUF(12); *out_p++ = getBT0(mrb_fixnum(next_info));
+ SUSPEND_OBUF(17); *out_p++ = getBT1(mrb_fixnum(next_info));
+ SUSPEND_OBUF(18); *out_p++ = getBT2(mrb_fixnum(next_info));
+ SUSPEND_OBUF(19); *out_p++ = getBT3(mrb_fixnum(next_info));
+ continue;
+ case GB4bt:
+ SUSPEND_OBUF(29); *out_p++ = getGB4bt0((unsigned char)mrb_fixnum(next_info));
+ SUSPEND_OBUF(30); *out_p++ = getGB4bt1((mrb_fixnum(next_info)));
+ SUSPEND_OBUF(31); *out_p++ = getGB4bt2((unsigned char)mrb_fixnum(next_info));
+ SUSPEND_OBUF(32); *out_p++ = getGB4bt3(mrb_fixnum(next_info));
+ continue;
+ case STR1:
+ tc->output_index = 0;
+ while (tc->output_index < STR1_LENGTH(BYTE_ADDR(STR1_BYTEINDEX(mrb_fixnum(next_info))))) {
+ SUSPEND_OBUF(28); *out_p++ = BYTE_ADDR(STR1_BYTEINDEX(mrb_fixnum(next_info)))[1+tc->output_index];
+ tc->output_index++;
+ }
+ continue;
+ case FUNii:
+ next_info = (mrb_value)(*tr->func_ii)(TRANSCODING_STATE(tc), next_info);
+ goto follow_info;
+ case FUNsi:
+ {
+ const unsigned char *char_start;
+ size_t char_len;
+ char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
+ next_info = (mrb_value)(*tr->func_si)(TRANSCODING_STATE(tc), char_start, (size_t)char_len);
+ goto follow_info;
+ }
+ case FUNio:
+ SUSPEND_OBUF(13);
+ if (tr->max_output <= out_stop - out_p)
+ out_p += tr->func_io(TRANSCODING_STATE(tc),
+ next_info, out_p, out_stop - out_p);
+ else {
+ writebuf_len = tr->func_io(TRANSCODING_STATE(tc),
+ next_info,
+ TRANSCODING_WRITEBUF(tc), TRANSCODING_WRITEBUF_SIZE(tc));
+ writebuf_off = 0;
+ while (writebuf_off < writebuf_len) {
+ SUSPEND_OBUF(20);
+ *out_p++ = TRANSCODING_WRITEBUF(tc)[writebuf_off++];
+ }
+ }
+ break;
+ case FUNso:
+ {
+ const unsigned char *char_start;
+ size_t char_len;
+ SUSPEND_OBUF(14);
+ if (tr->max_output <= out_stop - out_p) {
+ char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
+ out_p += tr->func_so(TRANSCODING_STATE(tc),
+ char_start, (size_t)char_len,
+ out_p, out_stop - out_p);
+ }
+ else {
+ char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
+ writebuf_len = tr->func_so(TRANSCODING_STATE(tc),
+ char_start, (size_t)char_len,
+ TRANSCODING_WRITEBUF(tc), TRANSCODING_WRITEBUF_SIZE(tc));
+ writebuf_off = 0;
+ while (writebuf_off < writebuf_len) {
+ SUSPEND_OBUF(22);
+ *out_p++ = TRANSCODING_WRITEBUF(tc)[writebuf_off++];
+ }
+ }
+ break;
+ }
+ case FUNsio:
+ {
+ const unsigned char *char_start;
+ size_t char_len;
+ SUSPEND_OBUF(33);
+ if (tr->max_output <= out_stop - out_p) {
+ char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
+ out_p += tr->func_sio(TRANSCODING_STATE(tc),
+ char_start, (size_t)char_len, next_info,
+ out_p, out_stop - out_p);
+ }
+ else {
+ char_start = transcode_char_start(tc, *in_pos, inchar_start, in_p, &char_len);
+ writebuf_len = tr->func_sio(TRANSCODING_STATE(tc),
+ char_start, (size_t)char_len, next_info,
+ TRANSCODING_WRITEBUF(tc), TRANSCODING_WRITEBUF_SIZE(tc));
+ writebuf_off = 0;
+ while (writebuf_off < writebuf_len) {
+ SUSPEND_OBUF(34);
+ *out_p++ = TRANSCODING_WRITEBUF(tc)[writebuf_off++];
+ }
+ }
+ break;
+ }
+ case INVALID:
+ if (tc->recognized_len + (in_p - inchar_start) <= unitlen) {
+ if (tc->recognized_len + (in_p - inchar_start) < unitlen)
+ SUSPEND_AFTER_OUTPUT(26);
+ while ((opt & ECONV_PARTIAL_INPUT) && tc->recognized_len + (in_stop - inchar_start) < unitlen) {
+ in_p = in_stop;
+ SUSPEND(econv_source_buffer_empty, 8);
+ }
+ if (tc->recognized_len + (in_stop - inchar_start) <= unitlen) {
+ in_p = in_stop;
+ }
+ else {
+ in_p = inchar_start + (unitlen - tc->recognized_len);
+ }
+ }
+ else {
+ ssize_t invalid_len; /* including the last byte which causes invalid */
+ ssize_t discard_len;
+ invalid_len = tc->recognized_len + (in_p - inchar_start);
+ discard_len = ((invalid_len - 1) / unitlen) * unitlen;
+ readagain_len = invalid_len - discard_len;
+ }
+ goto invalid;
+ case UNDEF:
+ goto undef;
+ default:
+ mrb_raise(mrb, mrb->eRuntimeError_class, "unknown transcoding instruction");
+ }
+ continue;
+
+ invalid:
+ SUSPEND(econv_invalid_byte_sequence, 1);
+ continue;
+
+ incomplete:
+ SUSPEND(econv_incomplete_input, 27);
+ continue;
+
+ undef:
+ SUSPEND(econv_undefined_conversion, 2);
+ continue;
+ }
+
+ /* cleanup */
+ if (tr->finish_func) {
+ SUSPEND_OBUF(4);
+ if (tr->max_output <= out_stop - out_p) {
+ out_p += tr->finish_func(TRANSCODING_STATE(tc),
+ out_p, out_stop - out_p);
+ }
+ else {
+ writebuf_len = tr->finish_func(TRANSCODING_STATE(tc),
+ TRANSCODING_WRITEBUF(tc), TRANSCODING_WRITEBUF_SIZE(tc));
+ writebuf_off = 0;
+ while (writebuf_off < writebuf_len) {
+ SUSPEND_OBUF(23);
+ *out_p++ = TRANSCODING_WRITEBUF(tc)[writebuf_off++];
+ }
+ }
+ }
+ while (1)
+ SUSPEND(econv_finished, 6);
+#undef SUSPEND
+#undef next_table
+#undef next_info
+#undef next_byte
+#undef writebuf_len
+#undef writebuf_off
+}
+
+static mrb_econv_result_t
+transcode_restartable(mrb_state *mrb,
+ const unsigned char **in_pos, unsigned char **out_pos,
+ const unsigned char *in_stop, unsigned char *out_stop,
+ mrb_transcoding *tc,
+ const int opt)
+{
+ if (tc->readagain_len) {
+ unsigned char *readagain_buf = malloc(tc->readagain_len);//ALLOCA_N(unsigned char, tc->readagain_len);
+ const unsigned char *readagain_pos = readagain_buf;
+ const unsigned char *readagain_stop = readagain_buf + tc->readagain_len;
+ mrb_econv_result_t res;
+
+ memcpy(readagain_buf, TRANSCODING_READBUF(tc) + tc->recognized_len,
+ tc->readagain_len);
+ tc->readagain_len = 0;
+ res = transcode_restartable0(mrb, &readagain_pos, out_pos, readagain_stop, out_stop, tc, opt|ECONV_PARTIAL_INPUT);
+ if (res != econv_source_buffer_empty) {
+ memcpy(TRANSCODING_READBUF(tc) + tc->recognized_len + tc->readagain_len,
+ readagain_pos, readagain_stop - readagain_pos);
+ tc->readagain_len += readagain_stop - readagain_pos;
+ return res;
+ }
+ }
+ return transcode_restartable0(mrb, in_pos, out_pos, in_stop, out_stop, tc, opt);
+}
+
+static mrb_transcoding *
+mrb_transcoding_open_by_transcoder(const mrb_transcoder *tr, int flags)
+{
+ mrb_transcoding *tc;
+
+ tc = malloc(sizeof(mrb_transcoding));
+ tc->transcoder = tr;
+ tc->flags = flags;
+ if (TRANSCODING_STATE_EMBED_MAX < tr->state_size)
+ tc->state.ptr = xmalloc(tr->state_size);
+ if (tr->state_init_func) {
+ (tr->state_init_func)(TRANSCODING_STATE(tc)); /* xxx: check return value */
+ }
+ tc->resume_position = 0;
+ tc->recognized_len = 0;
+ tc->readagain_len = 0;
+ tc->writebuf_len = 0;
+ tc->writebuf_off = 0;
+ if ((int)sizeof(tc->readbuf.ary) < tr->max_input) {
+ tc->readbuf.ptr = xmalloc(tr->max_input);
+ }
+ if ((int)sizeof(tc->writebuf.ary) < tr->max_output) {
+ tc->writebuf.ptr = xmalloc(tr->max_output);
+ }
+ return tc;
+}
+
+static mrb_econv_result_t
+mrb_transcoding_convert(mrb_state *mrb, mrb_transcoding *tc,
+ const unsigned char **input_ptr, const unsigned char *input_stop,
+ unsigned char **output_ptr, unsigned char *output_stop,
+ int flags)
+{
+ return transcode_restartable(mrb,
+ input_ptr, output_ptr,
+ input_stop, output_stop,
+ tc, flags);
+}
+
+static void
+mrb_transcoding_close(mrb_transcoding *tc)
+{
+ const mrb_transcoder *tr = tc->transcoder;
+ if (tr->state_fini_func) {
+ (tr->state_fini_func)(TRANSCODING_STATE(tc)); /* check return value? */
+ }
+ if (TRANSCODING_STATE_EMBED_MAX < tr->state_size)
+ xfree(tc->state.ptr);
+ if ((int)sizeof(tc->readbuf.ary) < tr->max_input)
+ xfree(tc->readbuf.ptr);
+ if ((int)sizeof(tc->writebuf.ary) < tr->max_output)
+ xfree(tc->writebuf.ptr);
+ xfree(tc);
+}
+
+static size_t
+mrb_transcoding_memsize(mrb_transcoding *tc)
+{
+ size_t size = sizeof(mrb_transcoding);
+ const mrb_transcoder *tr = tc->transcoder;
+
+ if (TRANSCODING_STATE_EMBED_MAX < tr->state_size) {
+ size += tr->state_size;
+ }
+ if ((int)sizeof(tc->readbuf.ary) < tr->max_input) {
+ size += tr->max_input;
+ }
+ if ((int)sizeof(tc->writebuf.ary) < tr->max_output) {
+ size += tr->max_output;
+ }
+ return size;
+}
+
+static mrb_econv_t *
+mrb_econv_alloc(int n_hint)
+{
+ mrb_econv_t *ec;
+
+ if (n_hint <= 0)
+ n_hint = 1;
+
+ ec = malloc(sizeof(mrb_econv_t));//ALLOC(mrb_econv_t);
+ ec->flags = 0;
+ ec->source_encoding_name = NULL;
+ ec->destination_encoding_name = NULL;
+ ec->started = 0;
+ ec->replacement_str = NULL;
+ ec->replacement_len = 0;
+ ec->replacement_enc = NULL;
+ ec->replacement_allocated = 0;
+ ec->in_buf_start = NULL;
+ ec->in_data_start = NULL;
+ ec->in_data_end = NULL;
+ ec->in_buf_end = NULL;
+ ec->num_allocated = n_hint;
+ ec->num_trans = 0;
+ ec->elems = malloc(sizeof(mrb_econv_elem_t)*ec->num_allocated);//ALLOC_N(mrb_econv_elem_t, ec->num_allocated);
+ ec->num_finished = 0;
+ ec->last_tc = NULL;
+ ec->last_error.result = econv_source_buffer_empty;
+ ec->last_error.error_tc = NULL;
+ ec->last_error.source_encoding = NULL;
+ ec->last_error.destination_encoding = NULL;
+ ec->last_error.error_bytes_start = NULL;
+ ec->last_error.error_bytes_len = 0;
+ ec->last_error.readagain_len = 0;
+ ec->source_encoding = NULL;
+ ec->destination_encoding = NULL;
+ return ec;
+}
+
+static int
+mrb_econv_add_transcoder_at(mrb_state *mrb, mrb_econv_t *ec, const mrb_transcoder *tr, int i)
+{
+ int n, j;
+ int bufsize = 4096;
+ unsigned char *p;
+
+ if (ec->num_trans == ec->num_allocated) {
+ n = ec->num_allocated * 2;
+ mrb_realloc(mrb, ec->elems, sizeof(mrb_econv_elem_t)*n);//REALLOC_N(ec->elems, mrb_econv_elem_t, n);
+ ec->num_allocated = n;
+ }
+
+ p = xmalloc(bufsize);
+
+ memmove(ec->elems+i+1, ec->elems+i, sizeof(mrb_econv_elem_t)*(ec->num_trans-i));
+
+ ec->elems[i].tc = mrb_transcoding_open_by_transcoder(tr, 0);
+ ec->elems[i].out_buf_start = p;
+ ec->elems[i].out_buf_end = p + bufsize;
+ ec->elems[i].out_data_start = p;
+ ec->elems[i].out_data_end = p;
+ ec->elems[i].last_result = econv_source_buffer_empty;
+
+ ec->num_trans++;
+
+ if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding))
+ for (j = ec->num_trans-1; i <= j; j--) {
+ mrb_transcoding *tc = ec->elems[j].tc;
+ const mrb_transcoder *tr2 = tc->transcoder;
+ if (!DECORATOR_P(tr2->src_encoding, tr2->dst_encoding)) {
+ ec->last_tc = tc;
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static mrb_econv_t *
+mrb_econv_open_by_transcoder_entries(mrb_state *mrb, int n, transcoder_entry_t **entries)
+{
+ mrb_econv_t *ec;
+ int i, ret;
+
+ for (i = 0; i < n; i++) {
+ const mrb_transcoder *tr;
+ tr = load_transcoder_entry(mrb, entries[i]);
+ if (!tr)
+ return NULL;
+ }
+
+ ec = mrb_econv_alloc(n);
+
+ for (i = 0; i < n; i++) {
+ const mrb_transcoder *tr = load_transcoder_entry(mrb, entries[i]);
+ ret = mrb_econv_add_transcoder_at(mrb, ec, tr, ec->num_trans);
+ if (ret == -1) {
+ mrb_econv_close(ec);
+ return NULL;
+ }
+ }
+
+ return ec;
+}
+
+struct trans_open_t {
+ transcoder_entry_t **entries;
+ int num_additional;
+};
+
+static void
+trans_open_i(mrb_state *mrb, const char *sname, const char *dname, int depth, void *arg)
+{
+ struct trans_open_t *toarg = arg;
+
+ if (!toarg->entries) {
+ toarg->entries = malloc(sizeof(transcoder_entry_t*)*depth+1+toarg->num_additional);//ALLOC_N(transcoder_entry_t *, depth+1+toarg->num_additional);
+ }
+ toarg->entries[depth] = get_transcoder_entry(sname, dname);
+}
+
+static mrb_econv_t *
+mrb_econv_open0(mrb_state *mrb, const char *sname, const char *dname, int ecflags)
+{
+ transcoder_entry_t **entries = NULL;
+ int num_trans;
+ mrb_econv_t *ec;
+
+ mrb_encoding *senc, *denc;
+ int sidx, didx;
+
+ senc = NULL;
+ if (*sname) {
+ sidx = mrb_enc_find_index(mrb, sname);
+ if (0 <= sidx) {
+ senc = mrb_enc_from_index(mrb, sidx);
+ }
+ }
+
+ denc = NULL;
+ if (*dname) {
+ didx = mrb_enc_find_index(mrb, dname);
+ if (0 <= didx) {
+ denc = mrb_enc_from_index(mrb, didx);
+ }
+ }
+
+ if (*sname == '\0' && *dname == '\0') {
+ num_trans = 0;
+ entries = NULL;
+ }
+ else {
+ struct trans_open_t toarg;
+ toarg.entries = NULL;
+ toarg.num_additional = 0;
+ num_trans = transcode_search_path(mrb, sname, dname, trans_open_i, (void *)&toarg);
+ entries = toarg.entries;
+ if (num_trans < 0) {
+ xfree(entries);
+ return NULL;
+ }
+ }
+
+ ec = mrb_econv_open_by_transcoder_entries(mrb, num_trans, entries);
+ xfree(entries);
+ if (!ec)
+ return NULL;
+
+ ec->flags = ecflags;
+ ec->source_encoding_name = sname;
+ ec->destination_encoding_name = dname;
+
+ return ec;
+}
+
+#define MAX_ECFLAGS_DECORATORS 32
+
+static int
+decorator_names(int ecflags, const char **decorators_ret)
+{
+ int num_decorators;
+
+ if ((ecflags & ECONV_CRLF_NEWLINE_DECORATOR) &&
+ (ecflags & ECONV_CR_NEWLINE_DECORATOR))
+ return -1;
+
+ if ((ecflags & (ECONV_CRLF_NEWLINE_DECORATOR|ECONV_CR_NEWLINE_DECORATOR)) &&
+ (ecflags & ECONV_UNIVERSAL_NEWLINE_DECORATOR))
+ return -1;
+
+ if ((ecflags & ECONV_XML_TEXT_DECORATOR) &&
+ (ecflags & ECONV_XML_ATTR_CONTENT_DECORATOR))
+ return -1;
+
+ num_decorators = 0;
+
+ if (ecflags & ECONV_XML_TEXT_DECORATOR)
+ decorators_ret[num_decorators++] = "xml_text_escape";
+ if (ecflags & ECONV_XML_ATTR_CONTENT_DECORATOR)
+ decorators_ret[num_decorators++] = "xml_attr_content_escape";
+ if (ecflags & ECONV_XML_ATTR_QUOTE_DECORATOR)
+ decorators_ret[num_decorators++] = "xml_attr_quote";
+
+ if (ecflags & ECONV_CRLF_NEWLINE_DECORATOR)
+ decorators_ret[num_decorators++] = "crlf_newline";
+ if (ecflags & ECONV_CR_NEWLINE_DECORATOR)
+ decorators_ret[num_decorators++] = "cr_newline";
+ if (ecflags & ECONV_UNIVERSAL_NEWLINE_DECORATOR)
+ decorators_ret[num_decorators++] = "universal_newline";
+
+ return num_decorators;
+}
+
+mrb_econv_t *
+mrb_econv_open(mrb_state *mrb, const char *sname, const char *dname, int ecflags)
+{
+ mrb_econv_t *ec;
+ int num_decorators;
+ const char *decorators[MAX_ECFLAGS_DECORATORS];
+ int i;
+
+ num_decorators = decorator_names(ecflags, decorators);
+ if (num_decorators == -1)
+ return NULL;
+
+ ec = mrb_econv_open0(mrb, sname, dname, ecflags & ECONV_ERROR_HANDLER_MASK);
+ if (!ec)
+ return NULL;
+
+ for (i = 0; i < num_decorators; i++)
+ if (mrb_econv_decorate_at_last(mrb, ec, decorators[i]) == -1) {
+ mrb_econv_close(ec);
+ return NULL;
+ }
+
+ ec->flags |= ecflags & ~ECONV_ERROR_HANDLER_MASK;
+
+ return ec;
+}
+
+static int
+trans_sweep(mrb_state *mrb, mrb_econv_t *ec,
+ const unsigned char **input_ptr, const unsigned char *input_stop,
+ unsigned char **output_ptr, unsigned char *output_stop,
+ int flags,
+ int start)
+{
+ int try;
+ int i, f;
+
+ const unsigned char **ipp, *is, *iold;
+ unsigned char **opp, *os, *oold;
+ mrb_econv_result_t res;
+
+ try = 1;
+ while (try) {
+ try = 0;
+ for (i = start; i < ec->num_trans; i++) {
+ mrb_econv_elem_t *te = &ec->elems[i];
+
+ if (i == 0) {
+ ipp = input_ptr;
+ is = input_stop;
+ }
+ else {
+ mrb_econv_elem_t *prev_te = &ec->elems[i-1];
+ ipp = (const unsigned char **)&prev_te->out_data_start;
+ is = prev_te->out_data_end;
+ }
+
+ if (i == ec->num_trans-1) {
+ opp = output_ptr;
+ os = output_stop;
+ }
+ else {
+ if (te->out_buf_start != te->out_data_start) {
+ ssize_t len = te->out_data_end - te->out_data_start;
+ ssize_t off = te->out_data_start - te->out_buf_start;
+ memmove(te->out_buf_start, te->out_data_start, len);
+ te->out_data_start = te->out_buf_start;
+ te->out_data_end -= off;
+ }
+ opp = &te->out_data_end;
+ os = te->out_buf_end;
+ }
+
+ f = flags;
+ if (ec->num_finished != i)
+ f |= ECONV_PARTIAL_INPUT;
+ if (i == 0 && (flags & ECONV_AFTER_OUTPUT)) {
+ start = 1;
+ flags &= ~ECONV_AFTER_OUTPUT;
+ }
+ if (i != 0)
+ f &= ~ECONV_AFTER_OUTPUT;
+ iold = *ipp;
+ oold = *opp;
+ te->last_result = res = mrb_transcoding_convert(mrb, te->tc, ipp, is, opp, os, f);
+ if (iold != *ipp || oold != *opp)
+ try = 1;
+
+ switch (res) {
+ case econv_invalid_byte_sequence:
+ case econv_incomplete_input:
+ case econv_undefined_conversion:
+ case econv_after_output:
+ return i;
+
+ case econv_destination_buffer_full:
+ case econv_source_buffer_empty:
+ break;
+
+ case econv_finished:
+ ec->num_finished = i+1;
+ break;
+ }
+ }
+ }
+ return -1;
+}
+
+static mrb_econv_result_t
+mrb_trans_conv(mrb_state *mrb, mrb_econv_t *ec,
+ const unsigned char **input_ptr, const unsigned char *input_stop,
+ unsigned char **output_ptr, unsigned char *output_stop,
+ int flags,
+ int *result_position_ptr)
+{
+ int i;
+ int needreport_index;
+ int sweep_start;
+
+ unsigned char empty_buf;
+ unsigned char *empty_ptr = &empty_buf;
+
+ if (!input_ptr) {
+ input_ptr = (const unsigned char **)&empty_ptr;
+ input_stop = empty_ptr;
+ }
+
+ if (!output_ptr) {
+ output_ptr = &empty_ptr;
+ output_stop = empty_ptr;
+ }
+
+ if (ec->elems[0].last_result == econv_after_output)
+ ec->elems[0].last_result = econv_source_buffer_empty;
+
+ needreport_index = -1;
+ for (i = ec->num_trans-1; 0 <= i; i--) {
+ switch (ec->elems[i].last_result) {
+ case econv_invalid_byte_sequence:
+ case econv_incomplete_input:
+ case econv_undefined_conversion:
+ case econv_after_output:
+ case econv_finished:
+ sweep_start = i+1;
+ needreport_index = i;
+ goto found_needreport;
+
+ case econv_destination_buffer_full:
+ case econv_source_buffer_empty:
+ break;
+
+ default:
+ mrb_bug("unexpected transcode last result");
+ }
+ }
+
+ /* /^[sd]+$/ is confirmed. but actually /^s*d*$/. */
+
+ if (ec->elems[ec->num_trans-1].last_result == econv_destination_buffer_full &&
+ (flags & ECONV_AFTER_OUTPUT)) {
+ mrb_econv_result_t res;
+
+ res = mrb_trans_conv(mrb, ec, NULL, NULL, output_ptr, output_stop,
+ (flags & ~ECONV_AFTER_OUTPUT)|ECONV_PARTIAL_INPUT,
+ result_position_ptr);
+
+ if (res == econv_source_buffer_empty)
+ return econv_after_output;
+ return res;
+ }
+
+ sweep_start = 0;
+
+ found_needreport:
+
+ do {
+ needreport_index = trans_sweep(mrb, ec, input_ptr, input_stop, output_ptr, output_stop, flags, sweep_start);
+ sweep_start = needreport_index + 1;
+ } while (needreport_index != -1 && needreport_index != ec->num_trans-1);
+
+ for (i = ec->num_trans-1; 0 <= i; i--) {
+ if (ec->elems[i].last_result != econv_source_buffer_empty) {
+ mrb_econv_result_t res = ec->elems[i].last_result;
+ if (res == econv_invalid_byte_sequence ||
+ res == econv_incomplete_input ||
+ res == econv_undefined_conversion ||
+ res == econv_after_output) {
+ ec->elems[i].last_result = econv_source_buffer_empty;
+ }
+ if (result_position_ptr)
+ *result_position_ptr = i;
+ return res;
+ }
+ }
+ if (result_position_ptr)
+ *result_position_ptr = -1;
+ return econv_source_buffer_empty;
+}
+
+static mrb_econv_result_t
+mrb_econv_convert0(mrb_state *mrb, mrb_econv_t *ec,
+ const unsigned char **input_ptr, const unsigned char *input_stop,
+ unsigned char **output_ptr, unsigned char *output_stop,
+ int flags)
+{
+ mrb_econv_result_t res;
+ int result_position;
+ int has_output = 0;
+
+ memset(&ec->last_error, 0, sizeof(ec->last_error));
+
+ if (ec->num_trans == 0) {
+ size_t len;
+ if (ec->in_buf_start && ec->in_data_start != ec->in_data_end) {
+ if (output_stop - *output_ptr < ec->in_data_end - ec->in_data_start) {
+ len = output_stop - *output_ptr;
+ memcpy(*output_ptr, ec->in_data_start, len);
+ *output_ptr = output_stop;
+ ec->in_data_start += len;
+ res = econv_destination_buffer_full;
+ goto gotresult;
+ }
+ len = ec->in_data_end - ec->in_data_start;
+ memcpy(*output_ptr, ec->in_data_start, len);
+ *output_ptr += len;
+ ec->in_data_start = ec->in_data_end = ec->in_buf_start;
+ if (flags & ECONV_AFTER_OUTPUT) {
+ res = econv_after_output;
+ goto gotresult;
+ }
+ }
+ if (output_stop - *output_ptr < input_stop - *input_ptr) {
+ len = output_stop - *output_ptr;
+ }
+ else {
+ len = input_stop - *input_ptr;
+ }
+ if (0 < len && (flags & ECONV_AFTER_OUTPUT)) {
+ *(*output_ptr)++ = *(*input_ptr)++;
+ res = econv_after_output;
+ goto gotresult;
+ }
+ memcpy(*output_ptr, *input_ptr, len);
+ *output_ptr += len;
+ *input_ptr += len;
+ if (*input_ptr != input_stop)
+ res = econv_destination_buffer_full;
+ else if (flags & ECONV_PARTIAL_INPUT)
+ res = econv_source_buffer_empty;
+ else
+ res = econv_finished;
+ goto gotresult;
+ }
+
+ if (ec->elems[ec->num_trans-1].out_data_start) {
+ unsigned char *data_start = ec->elems[ec->num_trans-1].out_data_start;
+ unsigned char *data_end = ec->elems[ec->num_trans-1].out_data_end;
+ if (data_start != data_end) {
+ size_t len;
+ if (output_stop - *output_ptr < data_end - data_start) {
+ len = output_stop - *output_ptr;
+ memcpy(*output_ptr, data_start, len);
+ *output_ptr = output_stop;
+ ec->elems[ec->num_trans-1].out_data_start += len;
+ res = econv_destination_buffer_full;
+ goto gotresult;
+ }
+ len = data_end - data_start;
+ memcpy(*output_ptr, data_start, len);
+ *output_ptr += len;
+ ec->elems[ec->num_trans-1].out_data_start =
+ ec->elems[ec->num_trans-1].out_data_end =
+ ec->elems[ec->num_trans-1].out_buf_start;
+ has_output = 1;
+ }
+ }
+
+ if (ec->in_buf_start &&
+ ec->in_data_start != ec->in_data_end) {
+ res = mrb_trans_conv(mrb, ec, (const unsigned char **)&ec->in_data_start, ec->in_data_end, output_ptr, output_stop,
+ (flags&~ECONV_AFTER_OUTPUT)|ECONV_PARTIAL_INPUT, &result_position);
+ if (res != econv_source_buffer_empty)
+ goto gotresult;
+ }
+
+ if (has_output &&
+ (flags & ECONV_AFTER_OUTPUT) &&
+ *input_ptr != input_stop) {
+ input_stop = *input_ptr;
+ res = mrb_trans_conv(mrb, ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position);
+ if (res == econv_source_buffer_empty)
+ res = econv_after_output;
+ }
+ else if ((flags & ECONV_AFTER_OUTPUT) ||
+ ec->num_trans == 1) {
+ res = mrb_trans_conv(mrb, ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position);
+ }
+ else {
+ flags |= ECONV_AFTER_OUTPUT;
+ do {
+ res = mrb_trans_conv(mrb, ec, input_ptr, input_stop, output_ptr, output_stop, flags, &result_position);
+ } while (res == econv_after_output);
+ }
+
+ gotresult:
+ ec->last_error.result = res;
+ if (res == econv_invalid_byte_sequence ||
+ res == econv_incomplete_input ||
+ res == econv_undefined_conversion) {
+ mrb_transcoding *error_tc = ec->elems[result_position].tc;
+ ec->last_error.error_tc = error_tc;
+ ec->last_error.source_encoding = error_tc->transcoder->src_encoding;
+ ec->last_error.destination_encoding = error_tc->transcoder->dst_encoding;
+ ec->last_error.error_bytes_start = TRANSCODING_READBUF(error_tc);
+ ec->last_error.error_bytes_len = error_tc->recognized_len;
+ ec->last_error.readagain_len = error_tc->readagain_len;
+ }
+
+ return res;
+}
+
+static int output_replacement_character(mrb_state *mrb, mrb_econv_t *ec);
+
+static int
+output_hex_charref(mrb_state *mrb, mrb_econv_t *ec)
+{
+ int ret;
+ unsigned char utfbuf[1024];
+ const unsigned char *utf;
+ size_t utf_len;
+ int utf_allocated = 0;
+ char charef_buf[16];
+ const unsigned char *p;
+
+ if (encoding_equal(ec->last_error.source_encoding, "UTF-32BE")) {
+ utf = ec->last_error.error_bytes_start;
+ utf_len = ec->last_error.error_bytes_len;
+ }
+ else {
+ utf = allocate_converted_string(mrb,
+ ec->last_error.source_encoding, "UTF-32BE",
+ ec->last_error.error_bytes_start, ec->last_error.error_bytes_len,
+ utfbuf, sizeof(utfbuf),
+ &utf_len);
+ if (!utf)
+ return -1;
+ if (utf != utfbuf && utf != ec->last_error.error_bytes_start)
+ utf_allocated = 1;
+ }
+
+ if (utf_len % 4 != 0)
+ goto fail;
+
+ p = utf;
+ while (4 <= utf_len) {
+ unsigned int u = 0;
+ u += p[0] << 24;
+ u += p[1] << 16;
+ u += p[2] << 8;
+ u += p[3];
+ snprintf(charef_buf, sizeof(charef_buf), "&#x%X;", u);
+
+ ret = mrb_econv_insert_output(mrb, ec, (unsigned char *)charef_buf, strlen(charef_buf), "US-ASCII");
+ if (ret == -1)
+ goto fail;
+
+ p += 4;
+ utf_len -= 4;
+ }
+
+ if (utf_allocated)
+ xfree((void *)utf);
+ return 0;
+
+ fail:
+ if (utf_allocated)
+ xfree((void *)utf);
+ return -1;
+}
+
+mrb_econv_result_t
+mrb_econv_convert(mrb_state *mrb, mrb_econv_t *ec,
+ const unsigned char **input_ptr, const unsigned char *input_stop,
+ unsigned char **output_ptr, unsigned char *output_stop,
+ int flags)
+{
+ mrb_econv_result_t ret;
+
+ unsigned char empty_buf;
+ unsigned char *empty_ptr = &empty_buf;
+
+ ec->started = 1;
+
+ if (!input_ptr) {
+ input_ptr = (const unsigned char **)&empty_ptr;
+ input_stop = empty_ptr;
+ }
+
+ if (!output_ptr) {
+ output_ptr = &empty_ptr;
+ output_stop = empty_ptr;
+ }
+
+ resume:
+ ret = mrb_econv_convert0(mrb, ec, input_ptr, input_stop, output_ptr, output_stop, flags);
+
+ if (ret == econv_invalid_byte_sequence ||
+ ret == econv_incomplete_input) {
+ /* deal with invalid byte sequence */
+ /* todo: add more alternative behaviors */
+ switch (ec->flags & ECONV_INVALID_MASK) {
+ case ECONV_INVALID_REPLACE:
+ if (output_replacement_character(mrb, ec) == 0)
+ goto resume;
+ }
+ }
+
+ if (ret == econv_undefined_conversion) {
+ /* valid character in source encoding
+ * but no related character(s) in destination encoding */
+ /* todo: add more alternative behaviors */
+ switch (ec->flags & ECONV_UNDEF_MASK) {
+ case ECONV_UNDEF_REPLACE:
+ if (output_replacement_character(mrb, ec) == 0)
+ goto resume;
+ break;
+
+ case ECONV_UNDEF_HEX_CHARREF:
+ if (output_hex_charref(mrb, ec) == 0)
+ goto resume;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+const char *
+mrb_econv_encoding_to_insert_output(mrb_econv_t *ec)
+{
+ mrb_transcoding *tc = ec->last_tc;
+ const mrb_transcoder *tr;
+
+ if (tc == NULL)
+ return "";
+
+ tr = tc->transcoder;
+
+ if (tr->asciicompat_type == asciicompat_encoder)
+ return tr->src_encoding;
+ return tr->dst_encoding;
+}
+
+static unsigned char *
+allocate_converted_string(mrb_state *mrb,
+ const char *sname, const char *dname,
+ const unsigned char *str, size_t len,
+ unsigned char *caller_dst_buf, size_t caller_dst_bufsize,
+ size_t *dst_len_ptr)
+{
+ unsigned char *dst_str;
+ size_t dst_len;
+ size_t dst_bufsize;
+
+ mrb_econv_t *ec;
+ mrb_econv_result_t res;
+
+ const unsigned char *sp;
+ unsigned char *dp;
+
+ if (caller_dst_buf)
+ dst_bufsize = caller_dst_bufsize;
+ else if (len == 0)
+ dst_bufsize = 1;
+ else
+ dst_bufsize = len;
+
+ ec = mrb_econv_open(mrb, sname, dname, 0);
+ if (ec == NULL)
+ return NULL;
+ if (caller_dst_buf)
+ dst_str = caller_dst_buf;
+ else
+ dst_str = xmalloc(dst_bufsize);
+ dst_len = 0;
+ sp = str;
+ dp = dst_str+dst_len;
+ res = mrb_econv_convert(mrb, ec, &sp, str+len, &dp, dst_str+dst_bufsize, 0);
+ dst_len = dp - dst_str;
+ while (res == econv_destination_buffer_full) {
+ if (SIZE_MAX/2 < dst_bufsize) {
+ goto fail;
+ }
+ dst_bufsize *= 2;
+ if (dst_str == caller_dst_buf) {
+ unsigned char *tmp;
+ tmp = xmalloc(dst_bufsize);
+ memcpy(tmp, dst_str, dst_bufsize/2);
+ dst_str = tmp;
+ }
+ else {
+ dst_str = xrealloc(dst_str, dst_bufsize);
+ }
+ dp = dst_str+dst_len;
+ res = mrb_econv_convert(mrb, ec, &sp, str+len, &dp, dst_str+dst_bufsize, 0);
+ dst_len = dp - dst_str;
+ }
+ if (res != econv_finished) {
+ goto fail;
+ }
+ mrb_econv_close(ec);
+ *dst_len_ptr = dst_len;
+ return dst_str;
+
+ fail:
+ if (dst_str != caller_dst_buf)
+ xfree(dst_str);
+ mrb_econv_close(ec);
+ return NULL;
+}
+
+/* result: 0:success -1:failure */
+int
+mrb_econv_insert_output(mrb_state *mrb, mrb_econv_t *ec,
+ const unsigned char *str, size_t len, const char *str_encoding)
+{
+ const char *insert_encoding = mrb_econv_encoding_to_insert_output(ec);
+ unsigned char insert_buf[4096];
+ const unsigned char *insert_str = NULL;
+ size_t insert_len;
+
+ int last_trans_index;
+ mrb_transcoding *tc;
+
+ unsigned char **buf_start_p;
+ unsigned char **data_start_p;
+ unsigned char **data_end_p;
+ unsigned char **buf_end_p;
+
+ size_t need;
+
+ ec->started = 1;
+
+ if (len == 0)
+ return 0;
+
+ if (encoding_equal(insert_encoding, str_encoding)) {
+ insert_str = str;
+ insert_len = len;
+ }
+ else {
+ insert_str = allocate_converted_string(mrb, str_encoding, insert_encoding,
+ str, len, insert_buf, sizeof(insert_buf), &insert_len);
+ if (insert_str == NULL)
+ return -1;
+ }
+
+ need = insert_len;
+
+ last_trans_index = ec->num_trans-1;
+ if (ec->num_trans == 0) {
+ tc = NULL;
+ buf_start_p = &ec->in_buf_start;
+ data_start_p = &ec->in_data_start;
+ data_end_p = &ec->in_data_end;
+ buf_end_p = &ec->in_buf_end;
+ }
+ else if (ec->elems[last_trans_index].tc->transcoder->asciicompat_type == asciicompat_encoder) {
+ tc = ec->elems[last_trans_index].tc;
+ need += tc->readagain_len;
+ if (need < insert_len)
+ goto fail;
+ if (last_trans_index == 0) {
+ buf_start_p = &ec->in_buf_start;
+ data_start_p = &ec->in_data_start;
+ data_end_p = &ec->in_data_end;
+ buf_end_p = &ec->in_buf_end;
+ }
+ else {
+ mrb_econv_elem_t *ee = &ec->elems[last_trans_index-1];
+ buf_start_p = &ee->out_buf_start;
+ data_start_p = &ee->out_data_start;
+ data_end_p = &ee->out_data_end;
+ buf_end_p = &ee->out_buf_end;
+ }
+ }
+ else {
+ mrb_econv_elem_t *ee = &ec->elems[last_trans_index];
+ buf_start_p = &ee->out_buf_start;
+ data_start_p = &ee->out_data_start;
+ data_end_p = &ee->out_data_end;
+ buf_end_p = &ee->out_buf_end;
+ tc = ec->elems[last_trans_index].tc;
+ }
+
+ if (*buf_start_p == NULL) {
+ unsigned char *buf = xmalloc(need);
+ *buf_start_p = buf;
+ *data_start_p = buf;
+ *data_end_p = buf;
+ *buf_end_p = buf+need;
+ }
+ else if ((size_t)(*buf_end_p - *data_end_p) < need) {
+ memmove(*buf_start_p, *data_start_p, *data_end_p - *data_start_p);
+ *data_end_p = *buf_start_p + (*data_end_p - *data_start_p);
+ *data_start_p = *buf_start_p;
+ if ((size_t)(*buf_end_p - *data_end_p) < need) {
+ unsigned char *buf;
+ size_t s = (*data_end_p - *buf_start_p) + need;
+ if (s < need)
+ goto fail;
+ buf = xrealloc(*buf_start_p, s);
+ *data_start_p = buf;
+ *data_end_p = buf + (*data_end_p - *buf_start_p);
+ *buf_start_p = buf;
+ *buf_end_p = buf + s;
+ }
+ }
+
+ memcpy(*data_end_p, insert_str, insert_len);
+ *data_end_p += insert_len;
+ if (tc && tc->transcoder->asciicompat_type == asciicompat_encoder) {
+ memcpy(*data_end_p, TRANSCODING_READBUF(tc)+tc->recognized_len, tc->readagain_len);
+ *data_end_p += tc->readagain_len;
+ tc->readagain_len = 0;
+ }
+
+ if (insert_str != str && insert_str != insert_buf)
+ xfree((void*)insert_str);
+ return 0;
+
+ fail:
+ if (insert_str != str && insert_str != insert_buf)
+ xfree((void*)insert_str);
+ return -1;
+}
+
+void
+mrb_econv_close(mrb_econv_t *ec)
+{
+ int i;
+
+ if (ec->replacement_allocated) {
+ xfree((void *)ec->replacement_str);
+ }
+ for (i = 0; i < ec->num_trans; i++) {
+ mrb_transcoding_close(ec->elems[i].tc);
+ if (ec->elems[i].out_buf_start)
+ xfree(ec->elems[i].out_buf_start);
+ }
+ xfree(ec->in_buf_start);
+ xfree(ec->elems);
+ xfree(ec);
+}
+
+size_t
+mrb_econv_memsize(mrb_econv_t *ec)
+{
+ size_t size = sizeof(mrb_econv_t);
+ int i;
+
+ if (ec->replacement_allocated) {
+ size += ec->replacement_len;
+ }
+ for (i = 0; i < ec->num_trans; i++) {
+ size += mrb_transcoding_memsize(ec->elems[i].tc);
+
+ if (ec->elems[i].out_buf_start) {
+ size += ec->elems[i].out_buf_end - ec->elems[i].out_buf_start;
+ }
+ }
+ size += ec->in_buf_end - ec->in_buf_start;
+ size += sizeof(mrb_econv_elem_t) * ec->num_allocated;
+
+ return size;
+}
+
+int
+mrb_econv_putbackable(mrb_econv_t *ec)
+{
+ if (ec->num_trans == 0)
+ return 0;
+#if SIZEOF_SIZE_T > SIZEOF_INT
+ if (ec->elems[0].tc->readagain_len > INT_MAX) return INT_MAX;
+#endif
+ return (int)ec->elems[0].tc->readagain_len;
+}
+
+void
+mrb_econv_putback(mrb_econv_t *ec, unsigned char *p, int n)
+{
+ mrb_transcoding *tc;
+ if (ec->num_trans == 0 || n == 0)
+ return;
+ tc = ec->elems[0].tc;
+ memcpy(p, TRANSCODING_READBUF(tc) + tc->recognized_len + tc->readagain_len - n, n);
+ tc->readagain_len -= n;
+}
+
+struct asciicompat_encoding_t {
+ const char *ascii_compat_name;
+ const char *ascii_incompat_name;
+};
+
+static int
+asciicompat_encoding_i(mrb_state *mrb, st_data_t key, st_data_t val, st_data_t arg)
+{
+ struct asciicompat_encoding_t *data = (struct asciicompat_encoding_t *)arg;
+ transcoder_entry_t *entry = (transcoder_entry_t *)val;
+ const mrb_transcoder *tr;
+
+ if (DECORATOR_P(entry->sname, entry->dname))
+ return ST_CONTINUE;
+ tr = load_transcoder_entry(mrb, entry);
+ if (tr && tr->asciicompat_type == asciicompat_decoder) {
+ data->ascii_compat_name = tr->dst_encoding;
+ return ST_STOP;
+ }
+ return ST_CONTINUE;
+}
+
+const char *
+mrb_econv_asciicompat_encoding(const char *ascii_incompat_name)
+{
+ st_data_t v;
+ st_table *table2;
+ struct asciicompat_encoding_t data;
+
+ if (!st_lookup(transcoder_table, (st_data_t)ascii_incompat_name, &v))
+ return NULL;
+ table2 = (st_table *)v;
+
+ /*
+ * Assumption:
+ * There is at most one transcoder for
+ * converting from ASCII incompatible encoding.
+ *
+ * For ISO-2022-JP, there is ISO-2022-JP -> stateless-ISO-2022-JP and no others.
+ */
+ if (table2->num_entries != 1)
+ return NULL;
+
+ data.ascii_incompat_name = ascii_incompat_name;
+ data.ascii_compat_name = NULL;
+ st_foreach(table2, asciicompat_encoding_i, (st_data_t)&data);
+ return data.ascii_compat_name;
+}
+
+mrb_value
+mrb_econv_substr_append(mrb_state *mrb, mrb_econv_t *ec, mrb_value src, long off, long len, mrb_value dst, int flags)
+{
+ unsigned const char *ss, *sp, *se;
+ unsigned char *ds, *dp, *de;
+ mrb_econv_result_t res;
+ int max_output;
+
+ if (mrb_nil_p(dst)) {
+ dst = mrb_str_buf_new(mrb, len);
+ if (ec->destination_encoding)
+ mrb_enc_associate(mrb, dst, ec->destination_encoding);
+ }
+
+ if (ec->last_tc)
+ max_output = ec->last_tc->transcoder->max_output;
+ else
+ max_output = 1;
+
+ res = econv_destination_buffer_full;
+ while (res == econv_destination_buffer_full) {
+ long dlen = RSTRING_LEN(dst);
+ if (mrb_str_capacity(dst) - dlen < (size_t)len + max_output) {
+ unsigned long new_capa = (unsigned long)dlen + len + max_output;
+ if (LONG_MAX < new_capa)
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "too long string");
+ mrb_str_resize(mrb, dst, new_capa);
+ mrb_str_set_len(mrb, dst, dlen);
+ }
+ ss = sp = (const unsigned char *)RSTRING_PTR(src) + off;
+ se = ss + len;
+ ds = (unsigned char *)RSTRING_PTR(dst);
+ de = ds + mrb_str_capacity(dst);
+ dp = ds += dlen;
+ res = mrb_econv_convert(mrb, ec, &sp, se, &dp, de, flags);
+ off += sp - ss;
+ len -= sp - ss;
+ mrb_str_set_len(mrb, dst, dlen + (dp - ds));
+ mrb_econv_check_error(mrb, ec);
+ }
+
+ return dst;
+}
+
+mrb_value
+mrb_econv_str_append(mrb_state *mrb, mrb_econv_t *ec, mrb_value src, mrb_value dst, int flags)
+{
+ return mrb_econv_substr_append(mrb, ec, src, 0, RSTRING_LEN(src), dst, flags);
+}
+
+mrb_value
+mrb_econv_substr_convert(mrb_state *mrb, mrb_econv_t *ec, mrb_value src, long byteoff, long bytesize, int flags)
+{
+ return mrb_econv_substr_append(mrb, ec, src, byteoff, bytesize, mrb_nil_value(), flags);
+}
+
+mrb_value
+mrb_econv_str_convert(mrb_state *mrb, mrb_econv_t *ec, mrb_value src, int flags)
+{
+ return mrb_econv_substr_append(mrb, ec, src, 0, RSTRING_LEN(src), mrb_nil_value(), flags);
+}
+
+static int
+mrb_econv_add_converter(mrb_state *mrb, mrb_econv_t *ec, const char *sname, const char *dname, int n)
+{
+ transcoder_entry_t *entry;
+ const mrb_transcoder *tr;
+
+ if (ec->started != 0)
+ return -1;
+
+ entry = get_transcoder_entry(sname, dname);
+ if (!entry)
+ return -1;
+
+ tr = load_transcoder_entry(mrb, entry);
+
+ return mrb_econv_add_transcoder_at(mrb, ec, tr, n);
+}
+
+static int
+mrb_econv_decorate_at(mrb_state *mrb, mrb_econv_t *ec, const char *decorator_name, int n)
+{
+ return mrb_econv_add_converter(mrb, ec, "", decorator_name, n);
+}
+
+int
+mrb_econv_decorate_at_first(mrb_state *mrb, mrb_econv_t *ec, const char *decorator_name)
+{
+ const mrb_transcoder *tr;
+
+ if (ec->num_trans == 0)
+ return mrb_econv_decorate_at(mrb, ec, decorator_name, 0);
+
+ tr = ec->elems[0].tc->transcoder;
+
+ if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding) &&
+ tr->asciicompat_type == asciicompat_decoder)
+ return mrb_econv_decorate_at(mrb, ec, decorator_name, 1);
+
+ return mrb_econv_decorate_at(mrb, ec, decorator_name, 0);
+}
+
+int
+mrb_econv_decorate_at_last(mrb_state *mrb, mrb_econv_t *ec, const char *decorator_name)
+{
+ const mrb_transcoder *tr;
+
+ if (ec->num_trans == 0)
+ return mrb_econv_decorate_at(mrb, ec, decorator_name, 0);
+
+ tr = ec->elems[ec->num_trans-1].tc->transcoder;
+
+ if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding) &&
+ tr->asciicompat_type == asciicompat_encoder)
+ return mrb_econv_decorate_at(mrb, ec, decorator_name, ec->num_trans-1);
+
+ return mrb_econv_decorate_at(mrb, ec, decorator_name, ec->num_trans);
+}
+
+void
+mrb_econv_binmode(mrb_econv_t *ec)
+{
+ const mrb_transcoder *trs[3];
+ int n, i, j;
+ transcoder_entry_t *entry;
+ int num_trans;
+
+ n = 0;
+ if (ec->flags & ECONV_UNIVERSAL_NEWLINE_DECORATOR) {
+ entry = get_transcoder_entry("", "universal_newline");
+ if (entry->transcoder)
+ trs[n++] = entry->transcoder;
+ }
+ if (ec->flags & ECONV_CRLF_NEWLINE_DECORATOR) {
+ entry = get_transcoder_entry("", "crlf_newline");
+ if (entry->transcoder)
+ trs[n++] = entry->transcoder;
+ }
+ if (ec->flags & ECONV_CR_NEWLINE_DECORATOR) {
+ entry = get_transcoder_entry("", "cr_newline");
+ if (entry->transcoder)
+ trs[n++] = entry->transcoder;
+ }
+
+ num_trans = ec->num_trans;
+ j = 0;
+ for (i = 0; i < num_trans; i++) {
+ int k;
+ for (k = 0; k < n; k++)
+ if (trs[k] == ec->elems[i].tc->transcoder)
+ break;
+ if (k == n) {
+ ec->elems[j] = ec->elems[i];
+ j++;
+ }
+ else {
+ mrb_transcoding_close(ec->elems[i].tc);
+ xfree(ec->elems[i].out_buf_start);
+ ec->num_trans--;
+ }
+ }
+
+ ec->flags &= ~(ECONV_UNIVERSAL_NEWLINE_DECORATOR|ECONV_CRLF_NEWLINE_DECORATOR|ECONV_CR_NEWLINE_DECORATOR);
+
+}
+
+static mrb_value
+econv_description(mrb_state *mrb, const char *sname, const char *dname, int ecflags, mrb_value mesg)
+{
+ int has_description = 0;
+
+ if (mrb_nil_p(mesg))
+ mesg = mrb_str_new(mrb, NULL, 0);
+
+ if (*sname != '\0' || *dname != '\0') {
+ if (*sname == '\0')
+ mrb_str_cat2(mrb, mesg, dname);
+ else if (*dname == '\0')
+ mrb_str_cat2(mrb, mesg, sname);
+ else
+ mrb_str_catf(mrb, mesg, "%s to %s", sname, dname);
+ has_description = 1;
+ }
+
+ if (ecflags & (ECONV_UNIVERSAL_NEWLINE_DECORATOR|
+ ECONV_CRLF_NEWLINE_DECORATOR|
+ ECONV_CR_NEWLINE_DECORATOR|
+ ECONV_XML_TEXT_DECORATOR|
+ ECONV_XML_ATTR_CONTENT_DECORATOR|
+ ECONV_XML_ATTR_QUOTE_DECORATOR)) {
+ const char *pre = "";
+ if (has_description)
+ mrb_str_cat2(mrb, mesg, " with ");
+ if (ecflags & ECONV_UNIVERSAL_NEWLINE_DECORATOR) {
+ mrb_str_cat2(mrb, mesg, pre); pre = ",";
+ mrb_str_cat2(mrb, mesg, "universal_newline");
+ }
+ if (ecflags & ECONV_CRLF_NEWLINE_DECORATOR) {
+ mrb_str_cat2(mrb, mesg, pre); pre = ",";
+ mrb_str_cat2(mrb, mesg, "crlf_newline");
+ }
+ if (ecflags & ECONV_CR_NEWLINE_DECORATOR) {
+ mrb_str_cat2(mrb, mesg, pre); pre = ",";
+ mrb_str_cat2(mrb, mesg, "cr_newline");
+ }
+ if (ecflags & ECONV_XML_TEXT_DECORATOR) {
+ mrb_str_cat2(mrb, mesg, pre); pre = ",";
+ mrb_str_cat2(mrb, mesg, "xml_text");
+ }
+ if (ecflags & ECONV_XML_ATTR_CONTENT_DECORATOR) {
+ mrb_str_cat2(mrb, mesg, pre); pre = ",";
+ mrb_str_cat2(mrb, mesg, "xml_attr_content");
+ }
+ if (ecflags & ECONV_XML_ATTR_QUOTE_DECORATOR) {
+ mrb_str_cat2(mrb, mesg, pre); pre = ",";
+ mrb_str_cat2(mrb, mesg, "xml_attr_quote");
+ }
+ has_description = 1;
+ }
+ if (!has_description) {
+ mrb_str_cat2(mrb, mesg, "no-conversion");
+ }
+
+ return mesg;
+}
+
+mrb_value
+mrb_econv_open_exc(mrb_state *mrb, const char *sname, const char *dname, int ecflags)
+{
+ mrb_value mesg, exc;
+ mesg = mrb_str_new_cstr(mrb, "code converter not found (");
+ econv_description(mrb, sname, dname, ecflags, mesg);
+ mrb_str_cat2(mrb, mesg, ")");
+ exc = mrb_exc_new3(mrb, E_CONVERTERNOTFOUND_ERROR, mesg);
+ return exc;
+}
+
+static mrb_value
+make_econv_exception(mrb_state *mrb, mrb_econv_t *ec)
+{
+ mrb_value mesg, exc;
+ if (ec->last_error.result == econv_invalid_byte_sequence ||
+ ec->last_error.result == econv_incomplete_input) {
+ const char *err = (const char *)ec->last_error.error_bytes_start;
+ size_t error_len = ec->last_error.error_bytes_len;
+ mrb_value bytes = mrb_str_new(mrb, err, error_len);
+ mrb_value dumped = mrb_str_dump(mrb, bytes);
+ size_t readagain_len = ec->last_error.readagain_len;
+ mrb_value bytes2 = mrb_nil_value();
+ mrb_value dumped2;
+ int idx;
+ if (ec->last_error.result == econv_incomplete_input) {
+ mesg = mrb_sprintf(mrb, "incomplete %s on %s",
+ //StringValueCStr(dumped),
+ mrb_string_value_cstr(mrb, &dumped),
+ ec->last_error.source_encoding);
+ }
+ else if (readagain_len) {
+ bytes2 = mrb_str_new(mrb, err+error_len, readagain_len);
+ dumped2 = mrb_str_dump(mrb, bytes2);
+ mesg = mrb_sprintf(mrb, "%s followed by %s on %s",
+ //StringValueCStr(dumped),
+ mrb_string_value_cstr(mrb, &dumped),
+ //StringValueCStr(dumped2),
+ mrb_string_value_cstr(mrb, &dumped2),
+ ec->last_error.source_encoding);
+ }
+ else {
+ mesg = mrb_sprintf(mrb, "%s on %s",
+ //StringValueCStr(dumped),
+ mrb_string_value_cstr(mrb, &dumped),
+ ec->last_error.source_encoding);
+ }
+
+ exc = mrb_exc_new3(mrb, E_INVALIDBYTESEQUENCE_ERROR, mesg);
+ mrb_iv_set(mrb, exc, mrb_intern(mrb, "error_bytes"), bytes);
+ mrb_iv_set(mrb, exc, mrb_intern(mrb, "readagain_bytes"), bytes2);
+ mrb_iv_set(mrb, exc, mrb_intern(mrb, "incomplete_input"), ec->last_error.result == econv_incomplete_input ? mrb_true_value() : mrb_false_value());
+
+set_encs:
+ mrb_iv_set(mrb, exc, mrb_intern(mrb, "source_encoding_name"), mrb_str_new2(mrb, ec->last_error.source_encoding));
+ mrb_iv_set(mrb, exc, mrb_intern(mrb, "destination_encoding_name"), mrb_str_new2(mrb, ec->last_error.destination_encoding));
+ idx = mrb_enc_find_index(mrb, ec->last_error.source_encoding);
+ if (0 <= idx)
+ mrb_iv_set(mrb, exc, mrb_intern(mrb, "source_encoding"), mrb_enc_from_encoding(mrb, mrb_enc_from_index(mrb, idx)));
+ idx = mrb_enc_find_index(mrb, ec->last_error.destination_encoding);
+ if (0 <= idx)
+ mrb_iv_set(mrb, exc, mrb_intern(mrb, "destination_encoding"), mrb_enc_from_encoding(mrb, mrb_enc_from_index(mrb, idx)));
+ return exc;
+ }
+ if (ec->last_error.result == econv_undefined_conversion) {
+ mrb_value bytes = mrb_str_new(mrb, (const char *)ec->last_error.error_bytes_start,
+ ec->last_error.error_bytes_len);
+ mrb_value dumped = mrb_nil_value();
+ int idx;
+ if (strcmp(ec->last_error.source_encoding, "UTF-8") == 0) {
+ mrb_encoding *utf8 = mrb_utf8_encoding(mrb);
+ const char *start, *end;
+ int n;
+ start = (const char *)ec->last_error.error_bytes_start;
+ end = start + ec->last_error.error_bytes_len;
+ n = mrb_enc_precise_mbclen(start, end, utf8);
+ if (MBCLEN_CHARFOUND_P(n) &&
+ (size_t)MBCLEN_CHARFOUND_LEN(n) == ec->last_error.error_bytes_len) {
+ unsigned int cc = mrb_enc_mbc_to_codepoint(start, end, utf8);
+ dumped = mrb_sprintf(mrb, "U+%04X", cc);
+ }
+ }
+ if (mrb_obj_equal(mrb, dumped, mrb_nil_value()))
+ dumped = mrb_str_dump(mrb, bytes);
+ if (strcmp(ec->last_error.source_encoding,
+ ec->source_encoding_name) == 0 &&
+ strcmp(ec->last_error.destination_encoding,
+ ec->destination_encoding_name) == 0) {
+ mesg = mrb_sprintf(mrb, "%s from %s to %s",
+ //StringValueCStr(dumped),
+ mrb_string_value_cstr(mrb, &dumped),
+ ec->last_error.source_encoding,
+ ec->last_error.destination_encoding);
+ }
+ else {
+ int i;
+ mesg = mrb_sprintf(mrb, "%s to %s in conversion from %s",
+ //StringValueCStr(dumped),
+ mrb_string_value_cstr(mrb, &dumped),
+ ec->last_error.destination_encoding,
+ ec->source_encoding_name);
+ for (i = 0; i < ec->num_trans; i++) {
+ const mrb_transcoder *tr = ec->elems[i].tc->transcoder;
+ if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding))
+ mrb_str_catf(mrb, mesg, " to %s",
+ ec->elems[i].tc->transcoder->dst_encoding);
+ }
+ }
+ exc = mrb_exc_new3(mrb, E_UNDEFINEDCONVERSION_ERROR, mesg);
+ idx = mrb_enc_find_index(mrb, ec->last_error.source_encoding);
+ if (0 <= idx)
+ mrb_enc_associate_index(mrb, bytes, idx);
+ mrb_iv_set(mrb, exc, mrb_intern(mrb, "error_char"), bytes);
+ goto set_encs;
+ }
+ return mrb_nil_value();
+}
+
+static void
+more_output_buffer(mrb_state *mrb,
+ mrb_value destination,
+ unsigned char *(*resize_destination)(mrb_state *, mrb_value, size_t, size_t),
+ int max_output,
+ unsigned char **out_start_ptr,
+ unsigned char **out_pos,
+ unsigned char **out_stop_ptr)
+{
+ size_t len = (*out_pos - *out_start_ptr);
+ size_t new_len = (len + max_output) * 2;
+ *out_start_ptr = resize_destination(mrb, destination, len, new_len);
+ *out_pos = *out_start_ptr + len;
+ *out_stop_ptr = *out_start_ptr + new_len;
+}
+
+static int
+make_replacement(mrb_state *mrb, mrb_econv_t *ec)
+{
+ mrb_transcoding *tc;
+ const mrb_transcoder *tr;
+ mrb_encoding *enc;
+ const unsigned char *replacement;
+ const char *repl_enc;
+ const char *ins_enc;
+ size_t len;
+
+ if (ec->replacement_str)
+ return 0;
+
+ ins_enc = mrb_econv_encoding_to_insert_output(ec);
+
+ tc = ec->last_tc;
+ if (*ins_enc) {
+ tr = tc->transcoder;
+ enc = mrb_enc_find(mrb, tr->dst_encoding);
+ replacement = (const unsigned char *)get_replacement_character(ins_enc, &len, &repl_enc);
+ }
+ else {
+ replacement = (unsigned char *)"?";
+ len = 1;
+ repl_enc = "";
+ }
+
+ ec->replacement_str = replacement;
+ ec->replacement_len = len;
+ ec->replacement_enc = repl_enc;
+ ec->replacement_allocated = 0;
+ return 0;
+}
+
+int
+mrb_econv_set_replacement(mrb_state *mrb, mrb_econv_t *ec,
+ const unsigned char *str, size_t len, const char *encname)
+{
+ unsigned char *str2;
+ size_t len2;
+ const char *encname2;
+
+ encname2 = mrb_econv_encoding_to_insert_output(ec);
+
+ if (encoding_equal(encname, encname2)) {
+ str2 = xmalloc(len);
+ memcpy(str2, str, len); /* xxx: str may be invalid */
+ len2 = len;
+ encname2 = encname;
+ }
+ else {
+ str2 = allocate_converted_string(mrb, encname, encname2, str, len, NULL, 0, &len2);
+ if (!str2)
+ return -1;
+ }
+
+ if (ec->replacement_allocated) {
+ xfree((void *)ec->replacement_str);
+ }
+ ec->replacement_allocated = 1;
+ ec->replacement_str = str2;
+ ec->replacement_len = len2;
+ ec->replacement_enc = encname2;
+ return 0;
+}
+
+static int
+output_replacement_character(mrb_state *mrb, mrb_econv_t *ec)
+{
+ int ret;
+
+ if (make_replacement(mrb, ec) == -1)
+ return -1;
+
+ ret = mrb_econv_insert_output(mrb, ec, ec->replacement_str, ec->replacement_len, ec->replacement_enc);
+ if (ret == -1)
+ return -1;
+
+ return 0;
+}
+
+static void
+transcode_loop(mrb_state *mrb,
+ const unsigned char **in_pos, unsigned char **out_pos,
+ const unsigned char *in_stop, unsigned char *out_stop,
+ mrb_value destination,
+ unsigned char *(*resize_destination)(mrb_state *, mrb_value, size_t, size_t),
+ const char *src_encoding,
+ const char *dst_encoding,
+ int ecflags,
+ mrb_value ecopts)
+{
+ mrb_econv_t *ec;
+ mrb_transcoding *last_tc;
+ mrb_econv_result_t ret;
+ unsigned char *out_start = *out_pos;
+ int max_output;
+ mrb_value exc;
+ mrb_value fallback = mrb_nil_value();
+ mrb_value Qundef;
+ Qundef.tt = 0;
+
+ ec = mrb_econv_open_opts(mrb, src_encoding, dst_encoding, ecflags, ecopts);
+ if (!ec)
+ mrb_exc_raise(mrb, mrb_econv_open_exc(mrb, src_encoding, dst_encoding, ecflags));
+
+ if (!mrb_nil_p(ecopts) && TYPE(ecopts) == MRB_TT_HASH)
+ fallback = mrb_hash_get(mrb, ecopts, sym_fallback);
+ last_tc = ec->last_tc;
+ max_output = last_tc ? last_tc->transcoder->max_output : 1;
+
+ resume:
+ ret = mrb_econv_convert(mrb, ec, in_pos, in_stop, out_pos, out_stop, 0);
+
+ if (!mrb_nil_p(fallback) && ret == econv_undefined_conversion) {
+ mrb_value rep = mrb_enc_str_new(mrb,
+ (const char *)ec->last_error.error_bytes_start,
+ ec->last_error.error_bytes_len,
+ mrb_enc_find(mrb, ec->last_error.source_encoding));
+ rep = mrb_hash_getWithDef(mrb, fallback, rep, Qundef);//mrb_hash_lookup2(fallback, rep, Qundef);
+ if (!mrb_obj_equal(mrb, rep, Qundef)) {
+ //StringValue(rep);
+ mrb_string_value(mrb, &rep);
+ ret = mrb_econv_insert_output(mrb, ec, (const unsigned char *)RSTRING_PTR(rep),
+ RSTRING_LEN(rep), mrb_enc_name(mrb_enc_get(mrb, rep)));
+ if ((int)ret == -1) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "too big fallback string");
+ }
+ goto resume;
+ }
+ }
+
+ if (ret == econv_invalid_byte_sequence ||
+ ret == econv_incomplete_input ||
+ ret == econv_undefined_conversion) {
+ exc = make_econv_exception(mrb, ec);
+ mrb_econv_close(ec);
+ mrb_exc_raise(mrb, exc);
+ }
+
+ if (ret == econv_destination_buffer_full) {
+ more_output_buffer(mrb, destination, resize_destination, max_output, &out_start, out_pos, &out_stop);
+ goto resume;
+ }
+
+ mrb_econv_close(ec);
+ return;
+}
+
+/*
+ * String-specific code
+ */
+
+static unsigned char *
+str_transcoding_resize(mrb_state *mrb, mrb_value destination, size_t len, size_t new_len)
+{
+ mrb_str_resize(mrb, destination, new_len);
+ return (unsigned char *)RSTRING_PTR(destination);
+}
+
+static int
+econv_opts(mrb_state *mrb, mrb_value opt)
+{
+ mrb_value v;
+ int ecflags = 0;
+
+ v = mrb_hash_get(mrb, opt, sym_invalid);
+ if (mrb_nil_p(v)) {
+ }
+ else if (mrb_obj_equal(mrb, v, sym_replace)) {
+ ecflags |= ECONV_INVALID_REPLACE;
+ }
+ else {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "unknown value for invalid character option");
+ }
+
+ v = mrb_hash_get(mrb, opt, sym_undef);
+ if (mrb_nil_p(v)) {
+ }
+ else if (mrb_obj_equal(mrb, v, sym_replace)) {
+ ecflags |= ECONV_UNDEF_REPLACE;
+ }
+ else {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "unknown value for undefined character option");
+ }
+
+ v = mrb_hash_get(mrb, opt, sym_replace);
+ if (!mrb_nil_p(v) && !(ecflags & ECONV_INVALID_REPLACE)) {
+ ecflags |= ECONV_UNDEF_REPLACE;
+ }
+
+ v = mrb_hash_get(mrb, opt, sym_xml);
+ if (!mrb_nil_p(v)) {
+ if (mrb_obj_equal(mrb, v, sym_text)) {
+ ecflags |= ECONV_XML_TEXT_DECORATOR|ECONV_UNDEF_HEX_CHARREF;
+ }
+ else if (mrb_obj_equal(mrb, v, sym_attr)) {
+ ecflags |= ECONV_XML_ATTR_CONTENT_DECORATOR|ECONV_XML_ATTR_QUOTE_DECORATOR|ECONV_UNDEF_HEX_CHARREF;
+ }
+ else if (TYPE(v) == MRB_TT_SYMBOL) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "unexpected value for xml option: %s", mrb_sym2name(mrb, SYM2ID(v)));
+ }
+ else {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "unexpected value for xml option");
+ }
+ }
+
+ v = mrb_hash_get(mrb, opt, sym_universal_newline);
+ if (RTEST(v))
+ ecflags |= ECONV_UNIVERSAL_NEWLINE_DECORATOR;
+
+ v = mrb_hash_get(mrb, opt, sym_crlf_newline);
+ if (RTEST(v))
+ ecflags |= ECONV_CRLF_NEWLINE_DECORATOR;
+
+ v = mrb_hash_get(mrb, opt, sym_cr_newline);
+ if (RTEST(v))
+ ecflags |= ECONV_CR_NEWLINE_DECORATOR;
+
+ return ecflags;
+}
+
+int
+mrb_econv_prepare_opts(mrb_state *mrb, mrb_value opthash, mrb_value *opts)
+{
+ int ecflags;
+ mrb_value newhash = mrb_nil_value();
+ mrb_value v;
+
+ if (mrb_nil_p(opthash)) {
+ *opts = mrb_nil_value();
+ return 0;
+ }
+ ecflags = econv_opts(mrb, opthash);
+
+ v = mrb_hash_get(mrb, opthash, sym_replace);
+ if (!mrb_nil_p(v)) {
+ //StringValue(v);
+ mrb_string_value(mrb, &v);
+ if (mrb_enc_str_coderange(mrb, v) == ENC_CODERANGE_BROKEN) {
+ mrb_value dumped = mrb_str_dump(mrb, v);
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "replacement string is broken: %s as %s",
+ //StringValueCStr(dumped),
+ mrb_string_value_cstr(mrb, &dumped),
+ mrb_enc_name(mrb_enc_get(mrb, v)));
+ }
+ v = mrb_str_new_frozen(mrb, v);
+ newhash = mrb_hash_new_capa(mrb, 0);
+ mrb_hash_set(mrb, newhash, sym_replace, v);
+ }
+
+ v = mrb_hash_get(mrb, opthash, sym_fallback);
+ if (!mrb_nil_p(v)) {
+ v = mrb_convert_type(mrb, v, MRB_TT_HASH, "Hash", "to_hash");
+ if (!mrb_nil_p(v)) {
+ if (mrb_nil_p(newhash))
+ newhash = mrb_hash_new_capa(mrb, 0);
+ mrb_hash_set(mrb, newhash, sym_fallback, v);
+ }
+ }
+
+ //if (!mrb_nil_p(newhash))
+ // mrb_hash_freeze(newhash);
+ *opts = newhash;
+
+ return ecflags;
+}
+
+mrb_econv_t *
+mrb_econv_open_opts(mrb_state *mrb, const char *source_encoding, const char *destination_encoding, int ecflags, mrb_value opthash)
+{
+ mrb_econv_t *ec;
+ mrb_value replacement;
+
+ if (mrb_nil_p(opthash)) {
+ replacement = mrb_nil_value();
+ }
+ else {
+ if (TYPE(opthash) != MRB_TT_HASH /*|| !OBJ_FROZEN(opthash)*/)
+ mrb_bug("mrb_econv_open_opts called with invalid opthash");
+ replacement = mrb_hash_get(mrb, opthash, sym_replace);
+ }
+
+ ec = mrb_econv_open(mrb, source_encoding, destination_encoding, ecflags);
+ if (!ec)
+ return ec;
+
+ if (!mrb_nil_p(replacement)) {
+ int ret;
+ mrb_encoding *enc = mrb_enc_get(mrb, replacement);
+
+ ret = mrb_econv_set_replacement(mrb, ec,
+ (const unsigned char *)RSTRING_PTR(replacement),
+ RSTRING_LEN(replacement),
+ mrb_enc_name(enc));
+ if (ret == -1) {
+ mrb_econv_close(ec);
+ return NULL;
+ }
+ }
+ return ec;
+}
+
+static int
+enc_arg(mrb_state *mrb, mrb_value *arg, const char **name_p, mrb_encoding **enc_p)
+{
+ mrb_encoding *enc;
+ const char *n;
+ int encidx;
+ mrb_value encval;
+
+ if (((encidx = mrb_to_encoding_index(mrb, encval = *arg)) < 0) ||
+ !(enc = mrb_enc_from_index(mrb, encidx))) {
+ enc = NULL;
+ encidx = 0;
+ //n = StringValueCStr(*arg);
+ n = mrb_string_value_cstr(mrb, arg);
+ }
+ else {
+ n = mrb_enc_name(enc);
+ }
+
+ *name_p = n;
+ *enc_p = enc;
+
+ return encidx;
+}
+
+static int
+str_transcode_enc_args(mrb_state *mrb,
+ mrb_value str, mrb_value *arg1, mrb_value *arg2,
+ const char **sname_p, mrb_encoding **senc_p,
+ const char **dname_p, mrb_encoding **denc_p)
+{
+ mrb_encoding *senc, *denc;
+ const char *sname, *dname;
+ int sencidx, dencidx;
+
+ dencidx = enc_arg(mrb, arg1, &dname, &denc);
+
+ if (mrb_nil_p(*arg2)) {
+ sencidx = mrb_enc_get_index(mrb, str);
+ senc = mrb_enc_from_index(mrb, sencidx);
+ sname = mrb_enc_name(senc);
+ }
+ else {
+ sencidx = enc_arg(mrb, arg2, &sname, &senc);
+ }
+
+ *sname_p = sname;
+ *senc_p = senc;
+ *dname_p = dname;
+ *denc_p = denc;
+ return dencidx;
+}
+
+mrb_value
+mrb_str_tmp_new(mrb_state *mrb, long len)
+{
+ return mrb_str_new(mrb, 0, len);
+}
+
+static int
+str_transcode0(mrb_state *mrb, int argc, mrb_value *argv, mrb_value *self, int ecflags, mrb_value ecopts)
+{
+
+ mrb_value dest;
+ mrb_value str = *self;
+ mrb_value arg1, arg2;
+ long blen, slen;
+ unsigned char *buf, *bp, *sp;
+ const unsigned char *fromp;
+ mrb_encoding *senc, *denc;
+ const char *sname, *dname;
+ int dencidx;
+
+ if (argc <0 || argc > 2) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%d for 0..2)", argc);
+ }
+
+ if (argc == 0) {
+ arg1 = mrb_enc_default_internal(mrb);
+ if (mrb_nil_p(arg1)) {
+ if (!ecflags) return -1;
+ arg1 = mrb_obj_encoding(mrb, str);
+ }
+ ecflags |= ECONV_INVALID_REPLACE | ECONV_UNDEF_REPLACE;
+ }
+ else {
+ arg1 = argv[0];
+ }
+ arg2 = argc<=1 ? mrb_nil_value() : argv[1];
+ dencidx = str_transcode_enc_args(mrb, str, &arg1, &arg2, &sname, &senc, &dname, &denc);
+
+ if ((ecflags & (ECONV_UNIVERSAL_NEWLINE_DECORATOR|
+ ECONV_CRLF_NEWLINE_DECORATOR|
+ ECONV_CR_NEWLINE_DECORATOR|
+ ECONV_XML_TEXT_DECORATOR|
+ ECONV_XML_ATTR_CONTENT_DECORATOR|
+ ECONV_XML_ATTR_QUOTE_DECORATOR)) == 0) {
+ if (senc && senc == denc) {
+ return mrb_nil_p(arg2) ? -1 : dencidx;
+ }
+ if (senc && denc && mrb_enc_asciicompat(mrb, senc) && mrb_enc_asciicompat(mrb, denc)) {
+ if (mrb_enc_str_coderange(mrb, str) == ENC_CODERANGE_7BIT) {
+ return dencidx;
+ }
+ }
+ if (encoding_equal(sname, dname)) {
+ return mrb_nil_p(arg2) ? -1 : dencidx;
+ }
+ }
+ else {
+ if (encoding_equal(sname, dname)) {
+ sname = "";
+ dname = "";
+ }
+ }
+
+ fromp = sp = (unsigned char *)RSTRING_PTR(str);
+ slen = RSTRING_LEN(str);
+ blen = slen + 30; /* len + margin */
+ dest = mrb_str_tmp_new(mrb, blen);
+ bp = (unsigned char *)RSTRING_PTR(dest);
+
+ transcode_loop(mrb, &fromp, &bp, (sp+slen), (bp+blen), dest, str_transcoding_resize, sname, dname, ecflags, ecopts);
+ if (fromp != sp+slen) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "not fully converted, %"PRIdPTRDIFF" bytes left", sp+slen-fromp);
+ }
+ buf = (unsigned char *)RSTRING_PTR(dest);
+ *bp = '\0';
+ mrb_str_set_len(mrb, dest, bp - buf);
+
+ /* set encoding */
+ if (!denc) {
+ dencidx = mrb_define_dummy_encoding(mrb, dname);
+ }
+ *self = dest;
+
+ return dencidx;
+}
+
+static int
+str_transcode(mrb_state *mrb, int argc, mrb_value *argv, mrb_value *self)
+{
+ mrb_value opt;
+ int ecflags = 0;
+ mrb_value ecopts = mrb_nil_value();
+
+ if (0 < argc) {
+ opt = mrb_check_convert_type(mrb, argv[argc-1], MRB_TT_HASH, "Hash", "to_hash");
+ if (!mrb_nil_p(opt)) {
+ argc--;
+ ecflags = mrb_econv_prepare_opts(mrb, opt, &ecopts);
+ }
+ }
+ return str_transcode0(mrb, argc, argv, self, ecflags, ecopts);
+}
+
+static inline mrb_value
+str_encode_associate(mrb_state *mrb, mrb_value str, int encidx)
+{
+ int cr = 0;
+
+ mrb_enc_associate_index(mrb, str, encidx);
+
+ /* transcoded string never be broken. */
+ if (mrb_enc_asciicompat(mrb, mrb_enc_from_index(mrb, encidx))) {
+ mrb_str_coderange_scan_restartable(RSTRING_PTR(str), RSTRING_END(str), 0, &cr);
+ }
+ else {
+ cr = ENC_CODERANGE_VALID;
+ }
+ ENC_CODERANGE_SET(str, cr);
+ return str;
+}
+
+/*
+ * call-seq:
+ * str.encode!(encoding [, options] ) -> str
+ * str.encode!(dst_encoding, src_encoding [, options] ) -> str
+ *
+ * The first form transcodes the contents of <i>str</i> from
+ * str.encoding to +encoding+.
+ * The second form transcodes the contents of <i>str</i> from
+ * src_encoding to dst_encoding.
+ * The options Hash gives details for conversion. See String#encode
+ * for details.
+ * Returns the string even if no changes were made.
+ */
+
+static mrb_value
+str_encode_bang(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value str)
+{
+ mrb_value argv[16];
+ int argc;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+ mrb_value newstr;
+ int encidx;
+
+ //if (OBJ_FROZEN(str)) { /* in future, may use str_frozen_check from string.c, but that's currently static */
+ // mrb_raise(mrb, mrb->eRuntimeError_class, "string frozen");
+ //}
+
+ newstr = str;
+ encidx = str_transcode(mrb, argc, argv, &newstr);
+
+ if (encidx < 0) return str;
+ mrb_str_shared_replace(mrb, str, newstr);
+ return str_encode_associate(mrb, str, encidx);
+}
+
+/*
+ * call-seq:
+ * str.encode(encoding [, options] ) -> str
+ * str.encode(dst_encoding, src_encoding [, options] ) -> str
+ * str.encode([options]) -> str
+ *
+ * The first form returns a copy of <i>str</i> transcoded
+ * to encoding +encoding+.
+ * The second form returns a copy of <i>str</i> transcoded
+ * from src_encoding to dst_encoding.
+ * The last form returns a copy of <i>str</i> transcoded to
+ * <code>Encoding.default_internal</code>.
+ * By default, the first and second form raise
+ * Encoding::UndefinedConversionError for characters that are
+ * undefined in the destination encoding, and
+ * Encoding::InvalidByteSequenceError for invalid byte sequences
+ * in the source encoding. The last form by default does not raise
+ * exceptions but uses replacement strings.
+ * The <code>options</code> Hash gives details for conversion.
+ *
+ * === options
+ * The hash <code>options</code> can have the following keys:
+ * :invalid ::
+ * If the value is <code>:replace</code>, <code>#encode</code> replaces
+ * invalid byte sequences in <code>str</code> with the replacement character.
+ * The default is to raise the exception
+ * :undef ::
+ * If the value is <code>:replace</code>, <code>#encode</code> replaces
+ * characters which are undefined in the destination encoding with
+ * the replacement character.
+ * :replace ::
+ * Sets the replacement string to the value. The default replacement
+ * string is "\uFFFD" for Unicode encoding forms, and "?" otherwise.
+ * :fallback ::
+ * Sets the replacement string by the hash for undefined character.
+ * Its key is a such undefined character encoded in source encoding
+ * of current transcoder. Its value can be any encoding until it
+ * can be converted into the destination encoding of the transcoder.
+ * :xml ::
+ * The value must be <code>:text</code> or <code>:attr</code>.
+ * If the value is <code>:text</code> <code>#encode</code> replaces
+ * undefined characters with their (upper-case hexadecimal) numeric
+ * character references. '&', '<', and '>' are converted to "&amp;",
+ * "&lt;", and "&gt;", respectively.
+ * If the value is <code>:attr</code>, <code>#encode</code> also quotes
+ * the replacement result (using '"'), and replaces '"' with "&quot;".
+ * :cr_newline ::
+ * Replaces LF ("\n") with CR ("\r") if value is true.
+ * :crlf_newline ::
+ * Replaces LF ("\n") with CRLF ("\r\n") if value is true.
+ * :universal_newline ::
+ * Replaces CRLF ("\r\n") and CR ("\r") with LF ("\n") if value is true.
+ */
+
+static mrb_value
+str_encode(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value str)
+{
+ mrb_value argv[16];
+ int argc;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+ mrb_value newstr = str;
+ int encidx = str_transcode(mrb, argc, argv, &newstr);
+
+ if (encidx < 0) return mrb_str_dup(mrb, str);
+ if (mrb_obj_equal(mrb, newstr, str)) {
+ newstr = mrb_str_dup(mrb, str);
+ }
+ else {
+ RBASIC(newstr)->c = mrb_obj_class(mrb, str);
+ }
+ return str_encode_associate(mrb, newstr, encidx);
+}
+
+mrb_value
+mrb_str_encode(mrb_state *mrb, mrb_value str, mrb_value to, int ecflags, mrb_value ecopts)
+{
+ int argc = 1;
+ mrb_value *argv = &to;
+ mrb_value newstr = str;
+ int encidx = str_transcode0(mrb, argc, argv, &newstr, ecflags, ecopts);
+
+ if (encidx < 0) return mrb_str_dup(mrb, str);
+ if (mrb_obj_equal(mrb, newstr, str)) {
+ newstr = mrb_str_dup(mrb, str);
+ }
+ else {
+ RBASIC(newstr)->c = mrb_obj_class(mrb, str);
+ }
+ return str_encode_associate(mrb, newstr, encidx);
+}
+
+static void
+econv_free(mrb_state *mrb, void *ptr)
+{
+ mrb_econv_t *ec = ptr;
+ mrb_econv_close(ec);
+}
+
+static const struct mrb_data_type econv_data_type = {
+ "econv", econv_free,
+};
+
+static mrb_encoding *
+make_dummy_encoding(mrb_state *mrb, const char *name)
+{
+ mrb_encoding *enc;
+ int idx;
+ idx = mrb_define_dummy_encoding(mrb, name);
+ enc = mrb_enc_from_index(mrb, idx);
+ return enc;
+}
+
+static mrb_encoding *
+make_encoding(mrb_state *mrb, const char *name)
+{
+ mrb_encoding *enc;
+ enc = mrb_enc_find(mrb, name);
+ if (!enc)
+ enc = make_dummy_encoding(mrb, name);
+ return enc;
+}
+
+static mrb_value
+make_encobj(mrb_state *mrb, const char *name)
+{
+ return mrb_enc_from_encoding(mrb, make_encoding(mrb, name));
+}
+
+/*
+ * call-seq:
+ * Encoding::Converter.asciicompat_encoding(string) -> encoding or nil
+ * Encoding::Converter.asciicompat_encoding(encoding) -> encoding or nil
+ *
+ * Returns the corresponding ASCII compatible encoding.
+ *
+ * Returns nil if the argument is an ASCII compatible encoding.
+ *
+ * "corresponding ASCII compatible encoding" is a ASCII compatible encoding which
+ * can represents exactly the same characters as the given ASCII incompatible encoding.
+ * So, no conversion undefined error occurs when converting between the two encodings.
+ *
+ * Encoding::Converter.asciicompat_encoding("ISO-2022-JP") #=> #<Encoding:stateless-ISO-2022-JP>
+ * Encoding::Converter.asciicompat_encoding("UTF-16BE") #=> #<Encoding:UTF-8>
+ * Encoding::Converter.asciicompat_encoding("UTF-8") #=> nil
+ *
+ */
+static mrb_value
+econv_s_asciicompat_encoding(mrb_state *mrb, mrb_value klass)
+{
+ mrb_value arg;
+ const char *arg_name, *result_name;
+ mrb_encoding *arg_enc, *result_enc;
+
+ mrb_get_args(mrb, "o", &arg);
+ enc_arg(mrb, &arg, &arg_name, &arg_enc);
+
+ result_name = mrb_econv_asciicompat_encoding(arg_name);
+
+ if (result_name == NULL)
+ return mrb_nil_value();
+
+ result_enc = make_encoding(mrb, result_name);
+
+ return mrb_enc_from_encoding(mrb, result_enc);
+}
+
+static void
+econv_args(mrb_state *mrb,
+ int argc, mrb_value *argv,
+ mrb_value *snamev_p, mrb_value *dnamev_p,
+ const char **sname_p, const char **dname_p,
+ mrb_encoding **senc_p, mrb_encoding **denc_p,
+ int *ecflags_p,
+ mrb_value *ecopts_p)
+{
+ mrb_value opt, opthash, flags_v, ecopts;
+ int sidx, didx;
+ const char *sname, *dname;
+ mrb_encoding *senc, *denc;
+ int ecflags;
+
+ //mrb_scan_args(argc, argv, "21", snamev_p, dnamev_p, &opt);
+ *snamev_p = argv[0];
+ *dnamev_p = argv[1];
+ opt = argv[2];
+
+ if (argc < 3) {//mrb_nil_p(opt)) {
+ ecflags = 0;
+ ecopts = mrb_nil_value();
+ }
+ else if (!mrb_nil_p(flags_v = mrb_check_to_integer(mrb, opt, "to_int"))) {
+ ecflags = mrb_fixnum(flags_v);
+ ecopts = mrb_nil_value();
+ }
+ else {
+ opthash = mrb_convert_type(mrb, opt, MRB_TT_HASH, "Hash", "to_hash");
+ ecflags = mrb_econv_prepare_opts(mrb, opthash, &ecopts);
+ }
+
+ senc = NULL;
+ sidx = mrb_to_encoding_index(mrb, *snamev_p);
+ if (0 <= sidx) {
+ senc = mrb_enc_from_index(mrb, sidx);
+ }
+ else {
+ //StringValue(*snamev_p);
+ mrb_string_value(mrb, snamev_p);
+ }
+
+ denc = NULL;
+ didx = mrb_to_encoding_index(mrb, *dnamev_p);
+ if (0 <= didx) {
+ denc = mrb_enc_from_index(mrb, didx);
+ }
+ else {
+ //StringValue(*dnamev_p);
+ mrb_string_value(mrb, dnamev_p);
+ }
+
+ //sname = senc ? mrb_enc_name(senc) : StringValueCStr(*snamev_p);
+ sname = senc ? mrb_enc_name(senc) : mrb_string_value_cstr(mrb, snamev_p);
+ //dname = denc ? mrb_enc_name(denc) : StringValueCStr(*dnamev_p);
+ dname = denc ? mrb_enc_name(denc) : mrb_string_value_cstr(mrb, dnamev_p);
+
+ *sname_p = sname;
+ *dname_p = dname;
+ *senc_p = senc;
+ *denc_p = denc;
+ *ecflags_p = ecflags;
+ *ecopts_p = ecopts;
+}
+
+static int
+decorate_convpath(mrb_state *mrb, mrb_value convpath, int ecflags)
+{
+ int num_decorators;
+ const char *decorators[MAX_ECFLAGS_DECORATORS];
+ int i;
+ int n, len;
+
+ num_decorators = decorator_names(ecflags, decorators);
+ if (num_decorators == -1)
+ return -1;
+
+ len = n = RARRAY_LEN(convpath);//RARRAY_LENINT(convpath);
+ if (n != 0) {
+ mrb_value pair = RARRAY_PTR(convpath)[n-1];
+ if (TYPE(pair) == MRB_TT_ARRAY) {
+ const char *sname = mrb_enc_name(mrb_to_encoding(mrb, RARRAY_PTR(pair)[0]));
+ const char *dname = mrb_enc_name(mrb_to_encoding(mrb, RARRAY_PTR(pair)[1]));
+ transcoder_entry_t *entry = get_transcoder_entry(sname, dname);
+ const mrb_transcoder *tr = load_transcoder_entry(mrb, entry);
+ if (!tr)
+ return -1;
+ if (!DECORATOR_P(tr->src_encoding, tr->dst_encoding) &&
+ tr->asciicompat_type == asciicompat_encoder) {
+ n--;
+ mrb_ary_set(mrb, convpath, len + num_decorators - 1, pair);
+ }
+ }
+ else {
+ mrb_ary_set(mrb, convpath, len + num_decorators - 1, pair);
+ }
+ }
+
+ for (i = 0; i < num_decorators; i++)
+ mrb_ary_set(mrb, convpath, n + i, mrb_str_new_cstr(mrb, decorators[i]));
+
+ return 0;
+}
+
+static void
+search_convpath_i(mrb_state *mrb, const char *sname, const char *dname, int depth, void *arg)
+{
+ mrb_value *ary_p = arg;
+ mrb_value v;
+
+ if (mrb_obj_equal(mrb, *ary_p, mrb_nil_value())) {
+ *ary_p = mrb_ary_new(mrb);
+ }
+
+ if (DECORATOR_P(sname, dname)) {
+ v = mrb_str_new_cstr(mrb, dname);
+ }
+ else {
+ v = mrb_assoc_new(mrb, make_encobj(mrb, sname), make_encobj(mrb, dname));
+ }
+ mrb_ary_set(mrb, *ary_p, depth, v);
+}
+
+/*
+ * call-seq:
+ * Encoding::Converter.search_convpath(source_encoding, destination_encoding) -> ary
+ * Encoding::Converter.search_convpath(source_encoding, destination_encoding, opt) -> ary
+ *
+ * Returns a conversion path.
+ *
+ * p Encoding::Converter.search_convpath("ISO-8859-1", "EUC-JP")
+ * #=> [[#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>],
+ * # [#<Encoding:UTF-8>, #<Encoding:EUC-JP>]]
+ *
+ * p Encoding::Converter.search_convpath("ISO-8859-1", "EUC-JP", universal_newline: true)
+ * #=> [[#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>],
+ * # [#<Encoding:UTF-8>, #<Encoding:EUC-JP>],
+ * # "universal_newline"]
+ *
+ * p Encoding::Converter.search_convpath("ISO-8859-1", "UTF-32BE", universal_newline: true)
+ * #=> [[#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>],
+ * # "universal_newline",
+ * # [#<Encoding:UTF-8>, #<Encoding:UTF-32BE>]]
+ */
+static mrb_value
+econv_s_search_convpath(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value klass)
+{
+ mrb_value snamev, dnamev;
+ const char *sname, *dname;
+ mrb_encoding *senc, *denc;
+ int ecflags;
+ mrb_value ecopts;
+ mrb_value convpath;
+
+ mrb_value argv[16];
+ int argc;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+ econv_args(mrb, argc, argv, &snamev, &dnamev, &sname, &dname, &senc, &denc, &ecflags, &ecopts);
+ convpath = mrb_nil_value();
+ transcode_search_path(mrb, sname, dname, search_convpath_i, &convpath);
+
+ if (mrb_nil_p(convpath))
+ mrb_exc_raise(mrb, mrb_econv_open_exc(mrb, sname, dname, ecflags));
+
+ if (decorate_convpath(mrb, convpath, ecflags) == -1)
+ mrb_exc_raise(mrb, mrb_econv_open_exc(mrb, sname, dname, ecflags));
+
+ return convpath;
+}
+
+/*
+ * Check the existence of a conversion path.
+ * Returns the number of converters in the conversion path.
+ * result: >=0:success -1:failure
+ */
+int
+mrb_econv_has_convpath_p(mrb_state *mrb, const char* from_encoding, const char* to_encoding)
+{
+ mrb_value convpath = mrb_nil_value();
+ transcode_search_path(mrb, from_encoding, to_encoding, search_convpath_i,
+ &convpath);
+ return RTEST(convpath);
+}
+
+struct mrb_econv_init_by_convpath_t {
+ mrb_econv_t *ec;
+ int index;
+ int ret;
+};
+
+static void
+mrb_econv_init_by_convpath_i(mrb_state *mrb, const char *sname, const char *dname, int depth, void *arg)
+{
+ struct mrb_econv_init_by_convpath_t *a = (struct mrb_econv_init_by_convpath_t *)arg;
+ int ret;
+
+ if (a->ret == -1)
+ return;
+
+ ret = mrb_econv_add_converter(mrb, a->ec, sname, dname, a->index);
+
+ a->ret = ret;
+ return;
+}
+
+static mrb_econv_t *
+mrb_econv_init_by_convpath(mrb_state *mrb, mrb_value self, mrb_value convpath,
+ const char **sname_p, const char **dname_p,
+ mrb_encoding **senc_p, mrb_encoding**denc_p)
+{
+ mrb_econv_t *ec;
+ long i;
+ int ret, first=1;
+ mrb_value elt;
+ mrb_encoding *senc = 0, *denc = 0;
+ const char *sname, *dname;
+
+ ec = mrb_econv_alloc(RARRAY_LEN/*INT*/(convpath));
+ DATA_PTR(self) = ec;
+
+ for (i = 0; i < RARRAY_LEN(convpath); i++) {
+ mrb_value snamev, dnamev;
+ mrb_value pair;
+ elt = mrb_ary_ref(mrb, convpath, i);
+ if (!mrb_nil_p(pair = mrb_check_array_type(mrb, elt))) {
+ if (RARRAY_LEN(pair) != 2)
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "not a 2-element array in convpath");
+ snamev = mrb_ary_ref(mrb, pair, 0);
+ enc_arg(mrb, &snamev, &sname, &senc);
+ dnamev = mrb_ary_ref(mrb, pair, 1);
+ enc_arg(mrb, &dnamev, &dname, &denc);
+ }
+ else {
+ sname = "";
+ //dname = StringValueCStr(elt);
+ dname = mrb_string_value_cstr(mrb, &elt);
+ }
+ if (DECORATOR_P(sname, dname)) {
+ ret = mrb_econv_add_converter(mrb, ec, sname, dname, ec->num_trans);
+ if (ret == -1)
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "decoration failed: %s", dname);
+ }
+ else {
+ int j = ec->num_trans;
+ struct mrb_econv_init_by_convpath_t arg;
+ arg.ec = ec;
+ arg.index = ec->num_trans;
+ arg.ret = 0;
+ ret = transcode_search_path(mrb, sname, dname, mrb_econv_init_by_convpath_i, &arg);
+ if (ret == -1 || arg.ret == -1)
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "adding conversion failed: %s to %s", sname, dname);
+ if (first) {
+ first = 0;
+ *senc_p = senc;
+ *sname_p = ec->elems[j].tc->transcoder->src_encoding;
+ }
+ *denc_p = denc;
+ *dname_p = ec->elems[ec->num_trans-1].tc->transcoder->dst_encoding;
+ }
+ }
+
+ if (first) {
+ *senc_p = NULL;
+ *denc_p = NULL;
+ *sname_p = "";
+ *dname_p = "";
+ }
+
+ ec->source_encoding_name = *sname_p;
+ ec->destination_encoding_name = *dname_p;
+
+ return ec;
+}
+
+/*
+ * call-seq:
+ * Encoding::Converter.new(source_encoding, destination_encoding)
+ * Encoding::Converter.new(source_encoding, destination_encoding, opt)
+ * Encoding::Converter.new(convpath)
+ *
+ * possible options elements:
+ * hash form:
+ * :invalid => nil # raise error on invalid byte sequence (default)
+ * :invalid => :replace # replace invalid byte sequence
+ * :undef => nil # raise error on undefined conversion (default)
+ * :undef => :replace # replace undefined conversion
+ * :replace => string # replacement string ("?" or "\uFFFD" if not specified)
+ * :universal_newline => true # decorator for converting CRLF and CR to LF
+ * :crlf_newline => true # decorator for converting LF to CRLF
+ * :cr_newline => true # decorator for converting LF to CR
+ * :xml => :text # escape as XML CharData.
+ * :xml => :attr # escape as XML AttValue
+ * integer form:
+ * Encoding::Converter::INVALID_REPLACE
+ * Encoding::Converter::UNDEF_REPLACE
+ * Encoding::Converter::UNDEF_HEX_CHARREF
+ * Encoding::Converter::UNIVERSAL_NEWLINE_DECORATOR
+ * Encoding::Converter::CRLF_NEWLINE_DECORATOR
+ * Encoding::Converter::CR_NEWLINE_DECORATOR
+ * Encoding::Converter::XML_TEXT_DECORATOR
+ * Encoding::Converter::XML_ATTR_CONTENT_DECORATOR
+ * Encoding::Converter::XML_ATTR_QUOTE_DECORATOR
+ *
+ * Encoding::Converter.new creates an instance of Encoding::Converter.
+ *
+ * Source_encoding and destination_encoding should be a string or
+ * Encoding object.
+ *
+ * opt should be nil, a hash or an integer.
+ *
+ * convpath should be an array.
+ * convpath may contain
+ * - two-element arrays which contain encodings or encoding names, or
+ * - strings representing decorator names.
+ *
+ * Encoding::Converter.new optionally takes an option.
+ * The option should be a hash or an integer.
+ * The option hash can contain :invalid => nil, etc.
+ * The option integer should be logical-or of constants such as
+ * Encoding::Converter::INVALID_REPLACE, etc.
+ *
+ * [:invalid => nil]
+ * Raise error on invalid byte sequence. This is a default behavior.
+ * [:invalid => :replace]
+ * Replace invalid byte sequence by replacement string.
+ * [:undef => nil]
+ * Raise an error if a character in source_encoding is not defined in destination_encoding.
+ * This is a default behavior.
+ * [:undef => :replace]
+ * Replace undefined character in destination_encoding with replacement string.
+ * [:replace => string]
+ * Specify the replacement string.
+ * If not specified, "\uFFFD" is used for Unicode encodings and "?" for others.
+ * [:universal_newline => true]
+ * Convert CRLF and CR to LF.
+ * [:crlf_newline => true]
+ * Convert LF to CRLF.
+ * [:cr_newline => true]
+ * Convert LF to CR.
+ * [:xml => :text]
+ * Escape as XML CharData.
+ * This form can be used as a HTML 4.0 #PCDATA.
+ * - '&' -> '&amp;'
+ * - '<' -> '&lt;'
+ * - '>' -> '&gt;'
+ * - undefined characters in destination_encoding -> hexadecimal CharRef such as &#xHH;
+ * [:xml => :attr]
+ * Escape as XML AttValue.
+ * The converted result is quoted as "...".
+ * This form can be used as a HTML 4.0 attribute value.
+ * - '&' -> '&amp;'
+ * - '<' -> '&lt;'
+ * - '>' -> '&gt;'
+ * - '"' -> '&quot;'
+ * - undefined characters in destination_encoding -> hexadecimal CharRef such as &#xHH;
+ *
+ * Examples:
+ * # UTF-16BE to UTF-8
+ * ec = Encoding::Converter.new("UTF-16BE", "UTF-8")
+ *
+ * # Usually, decorators such as newline conversion are inserted last.
+ * ec = Encoding::Converter.new("UTF-16BE", "UTF-8", :universal_newline => true)
+ * p ec.convpath #=> [[#<Encoding:UTF-16BE>, #<Encoding:UTF-8>],
+ * # "universal_newline"]
+ *
+ * # But, if the last encoding is ASCII incompatible,
+ * # decorators are inserted before the last conversion.
+ * ec = Encoding::Converter.new("UTF-8", "UTF-16BE", :crlf_newline => true)
+ * p ec.convpath #=> ["crlf_newline",
+ * # [#<Encoding:UTF-8>, #<Encoding:UTF-16BE>]]
+ *
+ * # Conversion path can be specified directly.
+ * ec = Encoding::Converter.new(["universal_newline", ["EUC-JP", "UTF-8"], ["UTF-8", "UTF-16BE"]])
+ * p ec.convpath #=> ["universal_newline",
+ * # [#<Encoding:EUC-JP>, #<Encoding:UTF-8>],
+ * # [#<Encoding:UTF-8>, #<Encoding:UTF-16BE>]]
+ */
+static mrb_value
+econv_init(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value self)
+{
+ mrb_value ecopts;
+ mrb_value snamev, dnamev;
+ const char *sname, *dname;
+ mrb_encoding *senc, *denc;
+ mrb_econv_t *ec;
+ int ecflags;
+ mrb_value convpath;
+ mrb_value argv[16];
+ int argc;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+ if (mrb_check_datatype(mrb, self, &econv_data_type)) {
+ mrb_raise(mrb, E_TYPE_ERROR, "already initialized");
+ }
+
+ if (argc == 1 && !mrb_nil_p(convpath = mrb_check_array_type(mrb, argv[0]))) {
+ ec = mrb_econv_init_by_convpath(mrb, self, convpath, &sname, &dname, &senc, &denc);
+ ecflags = 0;
+ ecopts = mrb_nil_value();
+ }
+ else {
+ econv_args(mrb, argc, argv, &snamev, &dnamev, &sname, &dname, &senc, &denc, &ecflags, &ecopts);
+ ec = mrb_econv_open_opts(mrb, sname, dname, ecflags, ecopts);
+ }
+
+ if (!ec) {
+ mrb_exc_raise(mrb, mrb_econv_open_exc(mrb, sname, dname, ecflags));
+ }
+
+ if (!DECORATOR_P(sname, dname)) {
+ if (!senc)
+ senc = make_dummy_encoding(mrb, sname);
+ if (!denc)
+ denc = make_dummy_encoding(mrb, dname);
+ }
+
+ ec->source_encoding = senc;
+ ec->destination_encoding = denc;
+
+ DATA_PTR(self) = ec;
+
+ return self;
+}
+
+/*
+ * call-seq:
+ * ec.inspect -> string
+ *
+ * Returns a printable version of <i>ec</i>
+ *
+ * ec = Encoding::Converter.new("iso-8859-1", "utf-8")
+ * puts ec.inspect #=> #<Encoding::Converter: ISO-8859-1 to UTF-8>
+ *
+ */
+static mrb_value
+econv_inspect(mrb_state *mrb, mrb_value self)
+{
+ const char *cname = mrb_obj_classname(mrb, self);
+ mrb_econv_t *ec;
+
+ Data_Get_Struct(mrb, self, &econv_data_type, ec);
+ if (!ec)
+ return mrb_sprintf(mrb, "#<%s: uninitialized>", cname);
+ else {
+ const char *sname = ec->source_encoding_name;
+ const char *dname = ec->destination_encoding_name;
+ mrb_value str;
+ str = mrb_sprintf(mrb, "#<%s: ", cname);
+ econv_description(mrb, sname, dname, ec->flags, str);
+ mrb_str_cat2(mrb, str, ">");
+ return str;
+ }
+}
+
+static mrb_econv_t *
+check_econv(mrb_state *mrb, mrb_value self)
+{
+ mrb_econv_t *ec;
+
+ Data_Get_Struct(mrb, self, &econv_data_type, ec);
+ if (!ec) {
+ mrb_raise(mrb, E_TYPE_ERROR, "uninitialized encoding converter");
+ }
+ return ec;
+}
+
+/*
+ * call-seq:
+ * ec.source_encoding -> encoding
+ *
+ * Returns the source encoding as an Encoding object.
+ */
+static mrb_value
+econv_source_encoding(mrb_state *mrb, mrb_value self)
+{
+ mrb_econv_t *ec = check_econv(mrb, self);
+ if (!ec->source_encoding)
+ return mrb_nil_value();
+ return mrb_enc_from_encoding(mrb, ec->source_encoding);
+}
+
+/*
+ * call-seq:
+ * ec.destination_encoding -> encoding
+ *
+ * Returns the destination encoding as an Encoding object.
+ */
+static mrb_value
+econv_destination_encoding(mrb_state *mrb, mrb_value self)
+{
+ mrb_econv_t *ec = check_econv(mrb, self);
+ if (!ec->destination_encoding)
+ return mrb_nil_value();
+ return mrb_enc_from_encoding(mrb, ec->destination_encoding);
+}
+
+/*
+ * call-seq:
+ * ec.convpath -> ary
+ *
+ * Returns the conversion path of ec.
+ *
+ * The result is an array of conversions.
+ *
+ * ec = Encoding::Converter.new("ISO-8859-1", "EUC-JP", crlf_newline: true)
+ * p ec.convpath
+ * #=> [[#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>],
+ * # [#<Encoding:UTF-8>, #<Encoding:EUC-JP>],
+ * # "crlf_newline"]
+ *
+ * Each element of the array is a pair of encodings or a string.
+ * A pair means an encoding conversion.
+ * A string means a decorator.
+ *
+ * In the above example, [#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>] means
+ * a converter from ISO-8859-1 to UTF-8.
+ * "crlf_newline" means newline converter from LF to CRLF.
+ */
+static mrb_value
+econv_convpath(mrb_state *mrb, mrb_value self)
+{
+ mrb_econv_t *ec = check_econv(mrb, self);
+ mrb_value result;
+ int i;
+
+ result = mrb_ary_new(mrb);
+ for (i = 0; i < ec->num_trans; i++) {
+ const mrb_transcoder *tr = ec->elems[i].tc->transcoder;
+ mrb_value v;
+ if (DECORATOR_P(tr->src_encoding, tr->dst_encoding))
+ v = mrb_str_new_cstr(mrb, tr->dst_encoding);
+ else
+ v = mrb_assoc_new(mrb, make_encobj(mrb, tr->src_encoding), make_encobj(mrb, tr->dst_encoding));
+ mrb_ary_push(mrb, result, v);
+ }
+ return result;
+}
+
+static mrb_value
+econv_result_to_symbol(mrb_econv_result_t res)
+{
+ switch (res) {
+ case econv_invalid_byte_sequence: return sym_invalid_byte_sequence;
+ case econv_incomplete_input: return sym_incomplete_input;
+ case econv_undefined_conversion: return sym_undefined_conversion;
+ case econv_destination_buffer_full: return sym_destination_buffer_full;
+ case econv_source_buffer_empty: return sym_source_buffer_empty;
+ case econv_finished: return sym_finished;
+ case econv_after_output: return sym_after_output;
+ default: return mrb_fixnum_value(res); /* should not be reached */
+ }
+}
+
+mrb_value econv_primitive_cnvproc(mrb_state *mrb, int argc, mrb_value *argv, mrb_value self)
+{
+ mrb_value input, output, output_byteoffset_v, output_bytesize_v, opt, flags_v;
+ mrb_econv_t *ec = check_econv(mrb, self);
+ mrb_econv_result_t res;
+ const unsigned char *ip, *is;
+ unsigned char *op, *os;
+ long output_byteoffset, output_bytesize;
+ unsigned long output_byteend;
+ int flags;
+
+ //mrb_scan_args(argc, argv, "23", &input, &output, &output_byteoffset_v, &output_bytesize_v, &opt);
+ input = argv[0];
+ output = argv[1];
+ output_byteoffset_v = argv[2];
+ output_bytesize_v = argv[3];
+ opt = argv[4];
+
+ if (argc < 3)//mrb_nil_p(output_byteoffset_v))
+ output_byteoffset = 0; /* dummy */
+ else
+ output_byteoffset = mrb_fixnum(output_byteoffset_v);
+
+ if (argc < 4)//mrb_nil_p(output_bytesize_v))
+ output_bytesize = 0; /* dummy */
+ else
+ output_bytesize = mrb_fixnum(output_bytesize_v);
+
+ if (argc < 5) {//mrb_nil_p(opt)) {
+ flags = 0;
+ }
+ else if (!mrb_nil_p(flags_v = mrb_check_to_integer(mrb, opt, "to_int"))) {
+ flags = mrb_fixnum(flags_v);
+ }
+ else {
+ mrb_value v;
+ opt = mrb_convert_type(mrb, opt, MRB_TT_HASH, "Hash", "to_hash");
+ flags = 0;
+ v = mrb_hash_get(mrb, opt, sym_partial_input);
+ if (RTEST(v))
+ flags |= ECONV_PARTIAL_INPUT;
+ v = mrb_hash_get(mrb, opt, sym_after_output);
+ if (RTEST(v))
+ flags |= ECONV_AFTER_OUTPUT;
+ }
+
+ //StringValue(output);
+ mrb_string_value(mrb, &output);
+ if (!mrb_nil_p(input))
+ //StringValue(input);
+ mrb_string_value(mrb, &input);
+ mrb_str_modify(mrb, output);
+
+ if (mrb_nil_p(output_bytesize_v)) {
+ output_bytesize = STR_BUF_MIN_SIZE;
+ if (!mrb_nil_p(input) && output_bytesize < RSTRING_LEN(input))
+ output_bytesize = RSTRING_LEN(input);
+ }
+
+ retry:
+
+ if (mrb_nil_p(output_byteoffset_v))
+ output_byteoffset = RSTRING_LEN(output);
+
+ if (output_byteoffset < 0)
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "negative output_byteoffset");
+
+ if (RSTRING_LEN(output) < output_byteoffset)
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "output_byteoffset too big");
+
+ if (output_bytesize < 0)
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "negative output_bytesize");
+
+ output_byteend = (unsigned long)output_byteoffset +
+ (unsigned long)output_bytesize;
+
+ if (output_byteend < (unsigned long)output_byteoffset ||
+ LONG_MAX < output_byteend)
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "output_byteoffset+output_bytesize too big");
+
+ if (mrb_str_capacity(output) < output_byteend)
+ mrb_str_resize(mrb, output, output_byteend);
+
+ if (mrb_nil_p(input)) {
+ ip = is = NULL;
+ }
+ else {
+ ip = (const unsigned char *)RSTRING_PTR(input);
+ is = ip + RSTRING_LEN(input);
+ }
+
+ op = (unsigned char *)RSTRING_PTR(output) + output_byteoffset;
+ os = op + output_bytesize;
+
+ res = mrb_econv_convert(mrb, ec, &ip, is, &op, os, flags);
+ mrb_str_set_len(mrb, output, op-(unsigned char *)RSTRING_PTR(output));
+ if (!mrb_nil_p(input))
+ mrb_str_drop_bytes(mrb, input, ip - (unsigned char *)RSTRING_PTR(input));
+
+ if (mrb_nil_p(output_bytesize_v) && res == econv_destination_buffer_full) {
+ if (LONG_MAX / 2 < output_bytesize)
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "too long conversion result");
+ output_bytesize *= 2;
+ output_byteoffset_v = mrb_nil_value();
+ goto retry;
+ }
+
+ if (ec->destination_encoding) {
+ mrb_enc_associate(mrb, output, ec->destination_encoding);
+ }
+
+ return econv_result_to_symbol(res);
+}
+
+/*
+ * call-seq:
+ * ec.primitive_convert(source_buffer, destination_buffer) -> symbol
+ * ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset) -> symbol
+ * ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset, destination_bytesize) -> symbol
+ * ec.primitive_convert(source_buffer, destination_buffer, destination_byteoffset, destination_bytesize, opt) -> symbol
+ *
+ * possible opt elements:
+ * hash form:
+ * :partial_input => true # source buffer may be part of larger source
+ * :after_output => true # stop conversion after output before input
+ * integer form:
+ * Encoding::Converter::PARTIAL_INPUT
+ * Encoding::Converter::AFTER_OUTPUT
+ *
+ * possible results:
+ * :invalid_byte_sequence
+ * :incomplete_input
+ * :undefined_conversion
+ * :after_output
+ * :destination_buffer_full
+ * :source_buffer_empty
+ * :finished
+ *
+ * primitive_convert converts source_buffer into destination_buffer.
+ *
+ * source_buffer should be a string or nil.
+ * nil means a empty string.
+ *
+ * destination_buffer should be a string.
+ *
+ * destination_byteoffset should be an integer or nil.
+ * nil means the end of destination_buffer.
+ * If it is omitted, nil is assumed.
+ *
+ * destination_bytesize should be an integer or nil.
+ * nil means unlimited.
+ * If it is omitted, nil is assumed.
+ *
+ * opt should be nil, a hash or an integer.
+ * nil means no flags.
+ * If it is omitted, nil is assumed.
+ *
+ * primitive_convert converts the content of source_buffer from beginning
+ * and store the result into destination_buffer.
+ *
+ * destination_byteoffset and destination_bytesize specify the region which
+ * the converted result is stored.
+ * destination_byteoffset specifies the start position in destination_buffer in bytes.
+ * If destination_byteoffset is nil,
+ * destination_buffer.bytesize is used for appending the result.
+ * destination_bytesize specifies maximum number of bytes.
+ * If destination_bytesize is nil,
+ * destination size is unlimited.
+ * After conversion, destination_buffer is resized to
+ * destination_byteoffset + actually produced number of bytes.
+ * Also destination_buffer's encoding is set to destination_encoding.
+ *
+ * primitive_convert drops the converted part of source_buffer.
+ * the dropped part is converted in destination_buffer or
+ * buffered in Encoding::Converter object.
+ *
+ * primitive_convert stops conversion when one of following condition met.
+ * - invalid byte sequence found in source buffer (:invalid_byte_sequence)
+ * - unexpected end of source buffer (:incomplete_input)
+ * this occur only when :partial_input is not specified.
+ * - character not representable in output encoding (:undefined_conversion)
+ * - after some output is generated, before input is done (:after_output)
+ * this occur only when :after_output is specified.
+ * - destination buffer is full (:destination_buffer_full)
+ * this occur only when destination_bytesize is non-nil.
+ * - source buffer is empty (:source_buffer_empty)
+ * this occur only when :partial_input is specified.
+ * - conversion is finished (:finished)
+ *
+ * example:
+ * ec = Encoding::Converter.new("UTF-8", "UTF-16BE")
+ * ret = ec.primitive_convert(src="pi", dst="", nil, 100)
+ * p [ret, src, dst] #=> [:finished, "", "\x00p\x00i"]
+ *
+ * ec = Encoding::Converter.new("UTF-8", "UTF-16BE")
+ * ret = ec.primitive_convert(src="pi", dst="", nil, 1)
+ * p [ret, src, dst] #=> [:destination_buffer_full, "i", "\x00"]
+ * ret = ec.primitive_convert(src, dst="", nil, 1)
+ * p [ret, src, dst] #=> [:destination_buffer_full, "", "p"]
+ * ret = ec.primitive_convert(src, dst="", nil, 1)
+ * p [ret, src, dst] #=> [:destination_buffer_full, "", "\x00"]
+ * ret = ec.primitive_convert(src, dst="", nil, 1)
+ * p [ret, src, dst] #=> [:finished, "", "i"]
+ *
+ */
+static mrb_value
+econv_primitive_convert(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value self)
+{
+ mrb_value argv[16];
+ int argc;
+
+ mrb_get_args(mrb, "*", &argv, &argc);
+ return econv_primitive_cnvproc(mrb, argc, argv, self);
+}
+
+/*
+ * call-seq:
+ * ec.convert(source_string) -> destination_string
+ *
+ * Convert source_string and return destination_string.
+ *
+ * source_string is assumed as a part of source.
+ * i.e. :partial_input=>true is specified internally.
+ * finish method should be used last.
+ *
+ * ec = Encoding::Converter.new("utf-8", "euc-jp")
+ * puts ec.convert("\u3042").dump #=> "\xA4\xA2"
+ * puts ec.finish.dump #=> ""
+ *
+ * ec = Encoding::Converter.new("euc-jp", "utf-8")
+ * puts ec.convert("\xA4").dump #=> ""
+ * puts ec.convert("\xA2").dump #=> "\xE3\x81\x82"
+ * puts ec.finish.dump #=> ""
+ *
+ * ec = Encoding::Converter.new("utf-8", "iso-2022-jp")
+ * puts ec.convert("\xE3").dump #=> "".force_encoding("ISO-2022-JP")
+ * puts ec.convert("\x81").dump #=> "".force_encoding("ISO-2022-JP")
+ * puts ec.convert("\x82").dump #=> "\e$B$\"".force_encoding("ISO-2022-JP")
+ * puts ec.finish.dump #=> "\e(B".force_encoding("ISO-2022-JP")
+ *
+ * If a conversion error occur,
+ * Encoding::UndefinedConversionError or
+ * Encoding::InvalidByteSequenceError is raised.
+ * Encoding::Converter#convert doesn't supply methods to recover or restart
+ * from these exceptions.
+ * When you want to handle these conversion errors,
+ * use Encoding::Converter#primitive_convert.
+ *
+ */
+static mrb_value
+econv_convert(mrb_state *mrb, mrb_value self)
+{
+ mrb_value source_string;
+ mrb_value ret, dst;
+ mrb_value av[5];
+ int ac;
+ mrb_econv_t *ec = check_econv(mrb, self);
+
+ mrb_get_args(mrb, "o", &source_string);
+ //StringValue(source_string);
+ mrb_string_value(mrb, &source_string);
+
+ dst = mrb_str_new(mrb, NULL, 0);
+
+ av[0] = mrb_str_dup(mrb, source_string);
+ av[1] = dst;
+ av[2] = mrb_nil_value();
+ av[3] = mrb_nil_value();
+ av[4] = mrb_fixnum_value(ECONV_PARTIAL_INPUT);
+ ac = 5;
+
+ ret = econv_primitive_cnvproc(mrb, ac, av, self);
+
+ if (mrb_obj_equal(mrb, ret, sym_invalid_byte_sequence) ||
+ mrb_obj_equal(mrb, ret, sym_undefined_conversion) ||
+ mrb_obj_equal(mrb, ret, sym_incomplete_input)) {
+ mrb_value exc = make_econv_exception(mrb, ec);
+ mrb_exc_raise(mrb, exc);
+ }
+
+ if (mrb_obj_equal(mrb, ret, sym_finished)) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "converter already finished");
+ }
+
+ if (!mrb_obj_equal(mrb, ret, sym_source_buffer_empty)) {
+ mrb_bug("unexpected result of econv_primitive_convert");
+ }
+
+ return dst;
+}
+
+/*
+ * call-seq:
+ * ec.finish -> string
+ *
+ * Finishes the converter.
+ * It returns the last part of the converted string.
+ *
+ * ec = Encoding::Converter.new("utf-8", "iso-2022-jp")
+ * p ec.convert("\u3042") #=> "\e$B$\""
+ * p ec.finish #=> "\e(B"
+ */
+static mrb_value
+econv_finish(mrb_state *mrb, mrb_value self)
+{
+ mrb_value ret, dst;
+ mrb_value av[5];
+ int ac;
+ mrb_econv_t *ec = check_econv(mrb, self);
+
+ dst = mrb_str_new(mrb, NULL, 0);
+
+ av[0] = mrb_nil_value();
+ av[1] = dst;
+ av[2] = mrb_nil_value();
+ av[3] = mrb_nil_value();
+ av[4] = mrb_fixnum_value(0);
+ ac = 5;
+
+ ret = econv_primitive_cnvproc(mrb, ac, av, self);
+
+ if (mrb_obj_equal(mrb, ret, sym_invalid_byte_sequence) ||
+ mrb_obj_equal(mrb, ret, sym_undefined_conversion) ||
+ mrb_obj_equal(mrb, ret, sym_incomplete_input)) {
+ mrb_value exc = make_econv_exception(mrb, ec);
+ mrb_exc_raise(mrb, exc);
+ }
+
+ if (!mrb_obj_equal(mrb, ret, sym_finished)) {
+ mrb_bug("unexpected result of econv_primitive_convert");
+ }
+
+ return dst;
+}
+
+/*
+ * call-seq:
+ * ec.primitive_errinfo -> array
+ *
+ * primitive_errinfo returns important information regarding the last error
+ * as a 5-element array:
+ *
+ * [result, enc1, enc2, error_bytes, readagain_bytes]
+ *
+ * result is the last result of primitive_convert.
+ *
+ * Other elements are only meaningful when result is
+ * :invalid_byte_sequence, :incomplete_input or :undefined_conversion.
+ *
+ * enc1 and enc2 indicate a conversion step as a pair of strings.
+ * For example, a converter from EUC-JP to ISO-8859-1 converts
+ * a string as follows: EUC-JP -> UTF-8 -> ISO-8859-1.
+ * So [enc1, enc2] is either ["EUC-JP", "UTF-8"] or ["UTF-8", "ISO-8859-1"].
+ *
+ * error_bytes and readagain_bytes indicate the byte sequences which caused the error.
+ * error_bytes is discarded portion.
+ * readagain_bytes is buffered portion which is read again on next conversion.
+ *
+ * Example:
+ *
+ * # \xff is invalid as EUC-JP.
+ * ec = Encoding::Converter.new("EUC-JP", "Shift_JIS")
+ * ec.primitive_convert(src="\xff", dst="", nil, 10)
+ * p ec.primitive_errinfo
+ * #=> [:invalid_byte_sequence, "EUC-JP", "UTF-8", "\xFF", ""]
+ *
+ * # HIRAGANA LETTER A (\xa4\xa2 in EUC-JP) is not representable in ISO-8859-1.
+ * # Since this error is occur in UTF-8 to ISO-8859-1 conversion,
+ * # error_bytes is HIRAGANA LETTER A in UTF-8 (\xE3\x81\x82).
+ * ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
+ * ec.primitive_convert(src="\xa4\xa2", dst="", nil, 10)
+ * p ec.primitive_errinfo
+ * #=> [:undefined_conversion, "UTF-8", "ISO-8859-1", "\xE3\x81\x82", ""]
+ *
+ * # partial character is invalid
+ * ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
+ * ec.primitive_convert(src="\xa4", dst="", nil, 10)
+ * p ec.primitive_errinfo
+ * #=> [:incomplete_input, "EUC-JP", "UTF-8", "\xA4", ""]
+ *
+ * # Encoding::Converter::PARTIAL_INPUT prevents invalid errors by
+ * # partial characters.
+ * ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
+ * ec.primitive_convert(src="\xa4", dst="", nil, 10, Encoding::Converter::PARTIAL_INPUT)
+ * p ec.primitive_errinfo
+ * #=> [:source_buffer_empty, nil, nil, nil, nil]
+ *
+ * # \xd8\x00\x00@ is invalid as UTF-16BE because
+ * # no low surrogate after high surrogate (\xd8\x00).
+ * # It is detected by 3rd byte (\00) which is part of next character.
+ * # So the high surrogate (\xd8\x00) is discarded and
+ * # the 3rd byte is read again later.
+ * # Since the byte is buffered in ec, it is dropped from src.
+ * ec = Encoding::Converter.new("UTF-16BE", "UTF-8")
+ * ec.primitive_convert(src="\xd8\x00\x00@", dst="", nil, 10)
+ * p ec.primitive_errinfo
+ * #=> [:invalid_byte_sequence, "UTF-16BE", "UTF-8", "\xD8\x00", "\x00"]
+ * p src
+ * #=> "@"
+ *
+ * # Similar to UTF-16BE, \x00\xd8@\x00 is invalid as UTF-16LE.
+ * # The problem is detected by 4th byte.
+ * ec = Encoding::Converter.new("UTF-16LE", "UTF-8")
+ * ec.primitive_convert(src="\x00\xd8@\x00", dst="", nil, 10)
+ * p ec.primitive_errinfo
+ * #=> [:invalid_byte_sequence, "UTF-16LE", "UTF-8", "\x00\xD8", "@\x00"]
+ * p src
+ * #=> ""
+ *
+ */
+static mrb_value
+econv_primitive_errinfo(mrb_state *mrb, mrb_value self)
+{
+ mrb_econv_t *ec = check_econv(mrb, self);
+
+ mrb_value ary;
+
+ ary = mrb_ary_new_capa(mrb, 5);//mrb_ary_new2(5);
+
+ mrb_ary_set(mrb, ary, 0, econv_result_to_symbol(ec->last_error.result));//rb_ary_store(ary, 0, econv_result_to_symbol(ec->last_error.result));
+ mrb_ary_set(mrb, ary, 4, mrb_nil_value());//rb_ary_store(ary, 4, mrb_nil_value());
+
+ if (ec->last_error.source_encoding)
+ mrb_ary_set(mrb, ary, 1, mrb_str_new2(mrb, ec->last_error.source_encoding));//rb_ary_store(ary, 1, mrb_str_new2(mrb, ec->last_error.source_encoding));
+
+ if (ec->last_error.destination_encoding)
+ mrb_ary_set(mrb, ary, 2, mrb_str_new2(mrb, ec->last_error.destination_encoding));//rb_ary_store(ary, 2, mrb_str_new2(mrb, ec->last_error.destination_encoding));
+
+ if (ec->last_error.error_bytes_start) {
+ //rb_ary_store(ary, 3, mrb_str_new(mrb, (const char *)ec->last_error.error_bytes_start, ec->last_error.error_bytes_len));
+ mrb_ary_set(mrb, ary, 3, mrb_str_new(mrb, (const char *)ec->last_error.error_bytes_start, ec->last_error.error_bytes_len));
+ //rb_ary_store(ary, 4, mrb_str_new(mrb, (const char *)ec->last_error.error_bytes_start + ec->last_error.error_bytes_len, ec->last_error.readagain_len));
+ mrb_ary_set(mrb, ary, 4, mrb_str_new(mrb, (const char *)ec->last_error.error_bytes_start + ec->last_error.error_bytes_len, ec->last_error.readagain_len));
+ }
+
+ return ary;
+}
+
+/*
+ * call-seq:
+ * ec.insert_output(string) -> nil
+ *
+ * Inserts string into the encoding converter.
+ * The string will be converted to the destination encoding and
+ * output on later conversions.
+ *
+ * If the destination encoding is stateful,
+ * string is converted according to the state and the state is updated.
+ *
+ * This method should be used only when a conversion error occurs.
+ *
+ * ec = Encoding::Converter.new("utf-8", "iso-8859-1")
+ * src = "HIRAGANA LETTER A is \u{3042}."
+ * dst = ""
+ * p ec.primitive_convert(src, dst) #=> :undefined_conversion
+ * puts "[#{dst.dump}, #{src.dump}]" #=> ["HIRAGANA LETTER A is ", "."]
+ * ec.insert_output("<err>")
+ * p ec.primitive_convert(src, dst) #=> :finished
+ * puts "[#{dst.dump}, #{src.dump}]" #=> ["HIRAGANA LETTER A is <err>.", ""]
+ *
+ * ec = Encoding::Converter.new("utf-8", "iso-2022-jp")
+ * src = "\u{306F 3041 3068 2661 3002}" # U+2661 is not representable in iso-2022-jp
+ * dst = ""
+ * p ec.primitive_convert(src, dst) #=> :undefined_conversion
+ * puts "[#{dst.dump}, #{src.dump}]" #=> ["\e$B$O$!$H".force_encoding("ISO-2022-JP"), "\xE3\x80\x82"]
+ * ec.insert_output "?" # state change required to output "?".
+ * p ec.primitive_convert(src, dst) #=> :finished
+ * puts "[#{dst.dump}, #{src.dump}]" #=> ["\e$B$O$!$H\e(B?\e$B!#\e(B".force_encoding("ISO-2022-JP"), ""]
+ *
+ */
+static mrb_value
+econv_insert_output(mrb_state *mrb, mrb_value self)
+{
+ mrb_value string;
+ const char *insert_enc;
+
+ int ret;
+
+ mrb_get_args(mrb, "o", &string);
+ mrb_econv_t *ec = check_econv(mrb, self);
+
+ //StringValue(string);
+ mrb_string_value(mrb, &string);
+ insert_enc = mrb_econv_encoding_to_insert_output(ec);
+ string = mrb_str_encode(mrb, string, mrb_enc_from_encoding(mrb, mrb_enc_find(mrb, insert_enc)), 0, mrb_nil_value());
+
+ ret = mrb_econv_insert_output(mrb, ec, (const unsigned char *)RSTRING_PTR(string), RSTRING_LEN(string), insert_enc);
+ if (ret == -1) {
+ mrb_raise(mrb, E_ARGUMENT_ERROR, "too big string");
+ }
+
+ return mrb_nil_value();
+}
+
+/*
+ * call-seq
+ * ec.putback -> string
+ * ec.putback(max_numbytes) -> string
+ *
+ * Put back the bytes which will be converted.
+ *
+ * The bytes are caused by invalid_byte_sequence error.
+ * When invalid_byte_sequence error, some bytes are discarded and
+ * some bytes are buffered to be converted later.
+ * The latter bytes can be put back.
+ * It can be observed by
+ * Encoding::InvalidByteSequenceError#readagain_bytes and
+ * Encoding::Converter#primitive_errinfo.
+ *
+ * ec = Encoding::Converter.new("utf-16le", "iso-8859-1")
+ * src = "\x00\xd8\x61\x00"
+ * dst = ""
+ * p ec.primitive_convert(src, dst) #=> :invalid_byte_sequence
+ * p ec.primitive_errinfo #=> [:invalid_byte_sequence, "UTF-16LE", "UTF-8", "\x00\xD8", "a\x00"]
+ * p ec.putback #=> "a\x00"
+ * p ec.putback #=> "" # no more bytes to put back
+ *
+ */
+static mrb_value
+econv_putback(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value self)
+{
+ mrb_econv_t *ec = check_econv(mrb, self);
+ int n;
+ int putbackable;
+ mrb_value str, max;
+
+ mrb_value argv[16];
+ int argc;
+
+ //mrb_scan_args(argc, argv, "01", &max);
+ mrb_get_args(mrb, "*", &argv, &argc);
+
+ if (argc == 0)//mrb_nil_p(max))
+ n = mrb_econv_putbackable(ec);
+ else {
+ max = argv[0];
+ n = mrb_fixnum(max);
+ putbackable = mrb_econv_putbackable(ec);
+ if (putbackable < n)
+ n = putbackable;
+ }
+
+ str = mrb_str_new(mrb, NULL, n);
+ mrb_econv_putback(ec, (unsigned char *)RSTRING_PTR(str), n);
+
+ if (ec->source_encoding) {
+ mrb_enc_associate(mrb, str, ec->source_encoding);
+ }
+
+ return str;
+}
+
+/*
+ * call-seq:
+ * ec.last_error -> exception or nil
+ *
+ * Returns an exception object for the last conversion.
+ * Returns nil if the last conversion did not produce an error.
+ *
+ * "error" means that
+ * Encoding::InvalidByteSequenceError and Encoding::UndefinedConversionError for
+ * Encoding::Converter#convert and
+ * :invalid_byte_sequence, :incomplete_input and :undefined_conversion for
+ * Encoding::Converter#primitive_convert.
+ *
+ * ec = Encoding::Converter.new("utf-8", "iso-8859-1")
+ * p ec.primitive_convert(src="\xf1abcd", dst="") #=> :invalid_byte_sequence
+ * p ec.last_error #=> #<Encoding::InvalidByteSequenceError: "\xF1" followed by "a" on UTF-8>
+ * p ec.primitive_convert(src, dst, nil, 1) #=> :destination_buffer_full
+ * p ec.last_error #=> nil
+ *
+ */
+static mrb_value
+econv_last_error(mrb_state *mrb, mrb_value self)
+{
+ mrb_econv_t *ec = check_econv(mrb, self);
+ mrb_value exc;
+
+ exc = make_econv_exception(mrb, ec);
+ if (mrb_nil_p(exc))
+ return mrb_nil_value();
+ return exc;
+}
+
+/*
+ * call-seq:
+ * ec.replacement -> string
+ *
+ * Returns the replacement string.
+ *
+ * ec = Encoding::Converter.new("euc-jp", "us-ascii")
+ * p ec.replacement #=> "?"
+ *
+ * ec = Encoding::Converter.new("euc-jp", "utf-8")
+ * p ec.replacement #=> "\uFFFD"
+ */
+static mrb_value
+econv_get_replacement(mrb_state *mrb, mrb_value self)
+{
+ mrb_econv_t *ec = check_econv(mrb, self);
+ int ret;
+ mrb_encoding *enc;
+
+ ret = make_replacement(mrb, ec);
+ if (ret == -1) {
+ mrb_raise(mrb, E_UNDEFINEDCONVERSION_ERROR, "replacement character setup failed");
+ }
+
+ enc = mrb_enc_find(mrb, ec->replacement_enc);
+ return mrb_enc_str_new(mrb, (const char *)ec->replacement_str, (long)ec->replacement_len, enc);
+}
+
+/*
+ * call-seq:
+ * ec.replacement = string
+ *
+ * Sets the replacement string.
+ *
+ * ec = Encoding::Converter.new("utf-8", "us-ascii", :undef => :replace)
+ * ec.replacement = "<undef>"
+ * p ec.convert("a \u3042 b") #=> "a <undef> b"
+ */
+static mrb_value
+econv_set_replacement(mrb_state *mrb, mrb_value self)
+{
+ mrb_value arg;
+ mrb_econv_t *ec = check_econv(mrb, self);
+ mrb_value string = arg;
+ int ret;
+ mrb_encoding *enc;
+ mrb_get_args(mrb, "o", &arg);
+
+ //StringValue(string);
+ mrb_string_value(mrb, &string);
+ enc = mrb_enc_get(mrb, string);
+
+ ret = mrb_econv_set_replacement(mrb, ec,
+ (const unsigned char *)RSTRING_PTR(string),
+ RSTRING_LEN(string),
+ mrb_enc_name(enc));
+
+ if (ret == -1) {
+ /* xxx: mrb_eInvalidByteSequenceError? */
+ mrb_raise(mrb, E_UNDEFINEDCONVERSION_ERROR, "replacement character setup failed");
+ }
+
+ return arg;
+}
+
+mrb_value
+mrb_econv_make_exception(mrb_state *mrb, mrb_econv_t *ec)
+{
+ return make_econv_exception(mrb, ec);
+}
+
+void
+mrb_econv_check_error(mrb_state *mrb, mrb_econv_t *ec)
+{
+ mrb_value exc;
+
+ exc = make_econv_exception(mrb, ec);
+ if (mrb_nil_p(exc))
+ return;
+ mrb_exc_raise(mrb, exc);
+}
+
+/*
+ * call-seq:
+ * ecerr.source_encoding_name -> string
+ *
+ * Returns the source encoding name as a string.
+ */
+static mrb_value
+ecerr_source_encoding_name(mrb_state *mrb, mrb_value self)
+{
+ return mrb_attr_get(mrb, self, mrb_intern(mrb, "source_encoding_name"));
+}
+
+/*
+ * call-seq:
+ * ecerr.source_encoding -> encoding
+ *
+ * Returns the source encoding as an encoding object.
+ *
+ * Note that the result may not be equal to the source encoding of
+ * the encoding converter if the conversion has multiple steps.
+ *
+ * ec = Encoding::Converter.new("ISO-8859-1", "EUC-JP") # ISO-8859-1 -> UTF-8 -> EUC-JP
+ * begin
+ * ec.convert("\xa0") # NO-BREAK SPACE, which is available in UTF-8 but not in EUC-JP.
+ * rescue Encoding::UndefinedConversionError
+ * p $!.source_encoding #=> #<Encoding:UTF-8>
+ * p $!.destination_encoding #=> #<Encoding:EUC-JP>
+ * p $!.source_encoding_name #=> "UTF-8"
+ * p $!.destination_encoding_name #=> "EUC-JP"
+ * end
+ *
+ */
+static mrb_value
+ecerr_source_encoding(mrb_state *mrb, mrb_value self)
+{
+ return mrb_attr_get(mrb, self, mrb_intern(mrb, "source_encoding"));
+}
+
+/*
+ * call-seq:
+ * ecerr.destination_encoding_name -> string
+ *
+ * Returns the destination encoding name as a string.
+ */
+static mrb_value
+ecerr_destination_encoding_name(mrb_state *mrb, mrb_value self)
+{
+ return mrb_attr_get(mrb, self, mrb_intern(mrb, "destination_encoding_name"));
+}
+
+/*
+ * call-seq:
+ * ecerr.destination_encoding -> string
+ *
+ * Returns the destination encoding as an encoding object.
+ */
+static mrb_value
+ecerr_destination_encoding(mrb_state *mrb, mrb_value self)
+{
+ return mrb_attr_get(mrb, self, mrb_intern(mrb, "destination_encoding"));
+}
+
+/*
+ * call-seq:
+ * ecerr.error_char -> string
+ *
+ * Returns the one-character string which cause Encoding::UndefinedConversionError.
+ *
+ * ec = Encoding::Converter.new("ISO-8859-1", "EUC-JP")
+ * begin
+ * ec.convert("\xa0")
+ * rescue Encoding::UndefinedConversionError
+ * puts $!.error_char.dump #=> "\xC2\xA0"
+ * p $!.error_char.encoding #=> #<Encoding:UTF-8>
+ * end
+ *
+ */
+static mrb_value
+ecerr_error_char(mrb_state *mrb, mrb_value self)
+{
+ return mrb_attr_get(mrb, self, mrb_intern(mrb, "error_char"));
+}
+
+/*
+ * call-seq:
+ * ecerr.error_bytes -> string
+ *
+ * Returns the discarded bytes when Encoding::InvalidByteSequenceError occurs.
+ *
+ * ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
+ * begin
+ * ec.convert("abc\xA1\xFFdef")
+ * rescue Encoding::InvalidByteSequenceError
+ * p $! #=> #<Encoding::InvalidByteSequenceError: "\xA1" followed by "\xFF" on EUC-JP>
+ * puts $!.error_bytes.dump #=> "\xA1"
+ * puts $!.readagain_bytes.dump #=> "\xFF"
+ * end
+ */
+static mrb_value
+ecerr_error_bytes(mrb_state *mrb, mrb_value self)
+{
+ return mrb_attr_get(mrb, self, mrb_intern(mrb, "error_bytes"));
+}
+
+/*
+ * call-seq:
+ * ecerr.readagain_bytes -> string
+ *
+ * Returns the bytes to be read again when Encoding::InvalidByteSequenceError occurs.
+ */
+static mrb_value
+ecerr_readagain_bytes(mrb_state *mrb, mrb_value self)
+{
+ return mrb_attr_get(mrb, self, mrb_intern(mrb, "readagain_bytes"));
+}
+
+/*
+ * call-seq:
+ * ecerr.incomplete_input? -> true or false
+ *
+ * Returns true if the invalid byte sequence error is caused by
+ * premature end of string.
+ *
+ * ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
+ *
+ * begin
+ * ec.convert("abc\xA1z")
+ * rescue Encoding::InvalidByteSequenceError
+ * p $! #=> #<Encoding::InvalidByteSequenceError: "\xA1" followed by "z" on EUC-JP>
+ * p $!.incomplete_input? #=> false
+ * end
+ *
+ * begin
+ * ec.convert("abc\xA1")
+ * ec.finish
+ * rescue Encoding::InvalidByteSequenceError
+ * p $! #=> #<Encoding::InvalidByteSequenceError: incomplete "\xA1" on EUC-JP>
+ * p $!.incomplete_input? #=> true
+ * end
+ */
+static mrb_value
+ecerr_incomplete_input(mrb_state *mrb, mrb_value self)
+{
+ return mrb_attr_get(mrb, self, mrb_intern(mrb, "incomplete_input"));
+}
+
+extern void Init_newline(void);
+
+/*
+ * Document-class: Encoding::UndefinedConversionError
+ *
+ * Raised by Encoding and String methods when a transcoding operation
+ * fails.
+ */
+
+/*
+ * Document-class: Encoding::InvalidByteSequenceError
+ *
+ * Raised by Encoding and String methods when the string being
+ * transcoded contains a byte invalid for the either the source or
+ * target encoding.
+ */
+
+/*
+ * Document-class: Encoding::ConverterNotFoundError
+ *
+ * Raised by transcoding methods when a named encoding does not
+ * correspond with a known converter.
+ */
+
+void
+mrb_init_transcode(mrb_state *mrb)
+{
+ struct RClass *e;
+ struct RClass *s;
+ struct RClass *c;
+ struct RClass *u;
+ struct RClass *i;
+ struct RClass *eConverterNotFoundError_class;
+ struct RClass *eInvalidByteSequenceError_class;
+ struct RClass *eUndefinedConversionError_class;
+ e = mrb->encode_class;
+ eUndefinedConversionError_class = mrb_define_class(mrb, "UndefinedConversionError", E_ENCODING_ERROR);
+ eInvalidByteSequenceError_class = mrb_define_class(mrb, "InvalidByteSequenceError", E_ENCODING_ERROR);
+ eConverterNotFoundError_class = mrb_define_class(mrb, "ConverterNotFoundError", E_ENCODING_ERROR);
+
+ transcoder_table = st_init_strcasetable();
+
+ //sym_invalid = ID2SYM(mrb_intern("invalid"));
+ //sym_undef = ID2SYM(mrb_intern("undef"));
+ //sym_replace = ID2SYM(mrb_intern("replace"));
+ //sym_fallback = ID2SYM(mrb_intern("fallback"));
+ //sym_xml = ID2SYM(mrb_intern("xml"));
+ //sym_text = ID2SYM(mrb_intern("text"));
+ //sym_attr = ID2SYM(mrb_intern("attr"));
+
+ //sym_invalid_byte_sequence = ID2SYM(mrb_intern("invalid_byte_sequence"));
+ //sym_undefined_conversion = ID2SYM(mrb_intern("undefined_conversion"));
+ //sym_destination_buffer_full = ID2SYM(mrb_intern("destination_buffer_full"));
+ //sym_source_buffer_empty = ID2SYM(mrb_intern("source_buffer_empty"));
+ //sym_finished = ID2SYM(mrb_intern("finished"));
+ //sym_after_output = ID2SYM(mrb_intern("after_output"));
+ //sym_incomplete_input = ID2SYM(mrb_intern("incomplete_input"));
+ //sym_universal_newline = ID2SYM(mrb_intern("universal_newline"));
+ //sym_crlf_newline = ID2SYM(mrb_intern("crlf_newline"));
+ //sym_cr_newline = ID2SYM(mrb_intern("cr_newline"));
+ //sym_partial_input = ID2SYM(mrb_intern("partial_input"));
+
+ s = mrb->string_class;
+ mrb_define_method(mrb, s, "encode", str_encode, ARGS_ANY());
+ mrb_define_method(mrb, s, "encode!", str_encode_bang, ARGS_ANY());
+
+ c = mrb->converter_class = mrb_define_class(mrb, "Converter", mrb->encode_class);
+ //mrb_cEncodingConverter = rb_define_class_under(mrb_cEncoding, "Converter", rb_cData);
+ //mrb_define_alloc_func(mrb_cEncodingConverter, econv_s_allocate);
+ mrb_define_class_method(mrb, c, "asciicompat_encoding", econv_s_asciicompat_encoding, ARGS_REQ(1)); /* 1 */
+ mrb_define_class_method(mrb, c, "search_convpath", econv_s_search_convpath, ARGS_ANY()); /* 2 */
+ mrb_define_method(mrb, s, "initialize", econv_init, ARGS_ANY());
+ mrb_define_method(mrb, s, "inspect", econv_inspect, ARGS_NONE());
+ mrb_define_method(mrb, s, "convpath", econv_convpath, ARGS_NONE());
+ mrb_define_method(mrb, s, "source_encoding", econv_source_encoding, ARGS_NONE());
+ mrb_define_method(mrb, s, "destination_encoding", econv_destination_encoding, ARGS_NONE());
+ mrb_define_method(mrb, s, "primitive_convert", econv_primitive_convert, ARGS_ANY());
+ mrb_define_method(mrb, s, "convert", econv_convert, ARGS_REQ(1));
+ mrb_define_method(mrb, s, "finish", econv_finish, ARGS_NONE());
+ mrb_define_method(mrb, s, "primitive_errinfo", econv_primitive_errinfo, ARGS_NONE());
+ mrb_define_method(mrb, s, "insert_output", econv_insert_output, ARGS_REQ(1));
+ mrb_define_method(mrb, s, "putback", econv_putback, ARGS_ANY());
+ mrb_define_method(mrb, s, "last_error", econv_last_error, ARGS_NONE());
+ mrb_define_method(mrb, s, "replacement", econv_get_replacement, ARGS_NONE());
+ mrb_define_method(mrb, s, "replacement=", econv_set_replacement, ARGS_REQ(1));
+
+ mrb_define_const(mrb, s, "INVALID_MASK", mrb_fixnum_value(ECONV_INVALID_MASK));
+ mrb_define_const(mrb, s, "INVALID_REPLACE", mrb_fixnum_value(ECONV_INVALID_REPLACE));
+ mrb_define_const(mrb, s, "UNDEF_MASK", mrb_fixnum_value(ECONV_UNDEF_MASK));
+ mrb_define_const(mrb, s, "UNDEF_REPLACE", mrb_fixnum_value(ECONV_UNDEF_REPLACE));
+ mrb_define_const(mrb, s, "UNDEF_HEX_CHARREF", mrb_fixnum_value(ECONV_UNDEF_HEX_CHARREF));
+ mrb_define_const(mrb, s, "PARTIAL_INPUT", mrb_fixnum_value(ECONV_PARTIAL_INPUT));
+ mrb_define_const(mrb, s, "AFTER_OUTPUT", mrb_fixnum_value(ECONV_AFTER_OUTPUT));
+ mrb_define_const(mrb, s, "UNIVERSAL_NEWLINE_DECORATOR", mrb_fixnum_value(ECONV_UNIVERSAL_NEWLINE_DECORATOR));
+ mrb_define_const(mrb, s, "CRLF_NEWLINE_DECORATOR", mrb_fixnum_value(ECONV_CRLF_NEWLINE_DECORATOR));
+ mrb_define_const(mrb, s, "CR_NEWLINE_DECORATOR", mrb_fixnum_value(ECONV_CR_NEWLINE_DECORATOR));
+ mrb_define_const(mrb, s, "XML_TEXT_DECORATOR", mrb_fixnum_value(ECONV_XML_TEXT_DECORATOR));
+ mrb_define_const(mrb, s, "XML_ATTR_CONTENT_DECORATOR", mrb_fixnum_value(ECONV_XML_ATTR_CONTENT_DECORATOR));
+ mrb_define_const(mrb, s, "XML_ATTR_QUOTE_DECORATOR", mrb_fixnum_value(ECONV_XML_ATTR_QUOTE_DECORATOR));
+
+ u = E_UNDEFINEDCONVERSION_ERROR;
+ mrb_define_method(mrb, u, "source_encoding_name", ecerr_source_encoding_name, ARGS_NONE());
+ mrb_define_method(mrb, u, "destination_encoding_name", ecerr_destination_encoding_name, ARGS_NONE());
+ mrb_define_method(mrb, u, "source_encoding", ecerr_source_encoding, ARGS_NONE());
+ mrb_define_method(mrb, u, "destination_encoding", ecerr_destination_encoding, ARGS_NONE());
+ mrb_define_method(mrb, u, "error_char", ecerr_error_char, ARGS_NONE());
+
+ i = E_INVALIDBYTESEQUENCE_ERROR;
+ mrb_define_method(mrb, i, "source_encoding_name", ecerr_source_encoding_name, ARGS_NONE());
+ mrb_define_method(mrb, i, "destination_encoding_name", ecerr_destination_encoding_name, ARGS_NONE());
+ mrb_define_method(mrb, i, "source_encoding", ecerr_source_encoding, ARGS_NONE());
+ mrb_define_method(mrb, i, "destination_encoding", ecerr_destination_encoding, ARGS_NONE());
+ mrb_define_method(mrb, i, "error_bytes", ecerr_error_bytes, ARGS_NONE());
+ mrb_define_method(mrb, i, "readagain_bytes", ecerr_readagain_bytes, ARGS_NONE());
+ mrb_define_method(mrb, i, "incomplete_input?", ecerr_incomplete_input, ARGS_NONE());
+
+ //Init_newline();
+}
+#endif //INCLUDE_ENCODING
diff --git a/src/transcode_data.h b/src/transcode_data.h
new file mode 100644
index 000000000..7ff540120
--- /dev/null
+++ b/src/transcode_data.h
@@ -0,0 +1,109 @@
+/**********************************************************************
+
+ transcode_data.h -
+
+ $Author: duerst $
+ created at: Mon 10 Dec 2007 14:01:47 JST 2007
+
+ Copyright (C) 2007 Martin Duerst
+
+**********************************************************************/
+
+//#include "ruby/ruby.h"
+
+#ifndef RUBY_TRANSCODE_DATA_H
+#define RUBY_TRANSCODE_DATA_H 1
+
+#define WORDINDEX_SHIFT_BITS 2
+#define WORDINDEX2INFO(widx) ((widx) << WORDINDEX_SHIFT_BITS)
+#define INFO2WORDINDEX(info) ((info) >> WORDINDEX_SHIFT_BITS)
+#define BYTE_LOOKUP_BASE(bl) ((bl)[0])
+#define BYTE_LOOKUP_INFO(bl) ((bl)[1])
+
+#define PType (unsigned int)
+
+#define NOMAP (PType 0x01) /* direct map */
+#define ONEbt (0x02) /* one byte payload */
+#define TWObt (0x03) /* two bytes payload */
+#define THREEbt (0x05) /* three bytes payload */
+#define FOURbt (0x06) /* four bytes payload, UTF-8 only, macros start at getBT0 */
+#define INVALID (PType 0x07) /* invalid byte sequence */
+#define UNDEF (PType 0x09) /* legal but undefined */
+#define ZERObt (PType 0x0A) /* zero bytes of payload, i.e. remove */
+#define FUNii (PType 0x0B) /* function from info to info */
+#define FUNsi (PType 0x0D) /* function from start to info */
+#define FUNio (PType 0x0E) /* function from info to output */
+#define FUNso (PType 0x0F) /* function from start to output */
+#define STR1 (PType 0x11) /* string 4 <= len <= 259 bytes: 1byte length + content */
+#define GB4bt (PType 0x12) /* GB18030 four bytes payload */
+#define FUNsio (PType 0x13) /* function from start and info to output */
+
+#define STR1_LENGTH(byte_addr) (unsigned int)(*(byte_addr) + 4)
+#define STR1_BYTEINDEX(w) ((w) >> 6)
+#define makeSTR1(bi) (((bi) << 6) | STR1)
+#define makeSTR1LEN(len) ((len)-4)
+
+#define o1(b1) (PType((((unsigned char)(b1))<<8)|ONEbt))
+#define o2(b1,b2) (PType((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|TWObt))
+#define o3(b1,b2,b3) (PType(((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|(((unsigned int)(unsigned char)(b3))<<24)|THREEbt)&0xffffffffU))
+#define o4(b0,b1,b2,b3) (PType(((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|(((unsigned char)(b3))<<24)|((((unsigned char)(b0))&0x07)<<5)|FOURbt)&0xffffffffU))
+#define g4(b0,b1,b2,b3) (PType(((((unsigned char)(b0))<<8)|(((unsigned char)(b2))<<16)|((((unsigned char)(b1))&0x0f)<<24)|((((unsigned int)(unsigned char)(b3))&0x0f)<<28)|GB4bt)&0xffffffffU))
+#define funsio(diff) (PType((((unsigned int)(diff))<<8)|FUNsio))
+
+#define getBT1(a) ((unsigned char)((a)>> 8))
+#define getBT2(a) ((unsigned char)((a)>>16))
+#define getBT3(a) ((unsigned char)((a)>>24))
+#define getBT0(a) (((unsigned char)((a)>> 5)&0x07)|0xF0) /* for UTF-8 only!!! */
+
+#define getGB4bt0(a) ((unsigned char)((a)>> 8))
+#define getGB4bt1(a) ((((unsigned char)((a)>>24))&0x0F)|0x30)
+#define getGB4bt2(a) ((unsigned char)((a)>>16))
+#define getGB4bt3(a) ((((unsigned char)((a)>>28))&0x0F)|0x30)
+
+#define o2FUNii(b1,b2) (PType((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|FUNii))
+
+/* do we need these??? maybe not, can be done with simple tables */
+#define ONETRAIL /* legal but undefined if one more trailing UTF-8 */
+#define TWOTRAIL /* legal but undefined if two more trailing UTF-8 */
+#define THREETRAIL /* legal but undefined if three more trailing UTF-8 */
+
+typedef enum {
+ asciicompat_converter, /* ASCII-compatible -> ASCII-compatible */
+ asciicompat_decoder, /* ASCII-incompatible -> ASCII-compatible */
+ asciicompat_encoder /* ASCII-compatible -> ASCII-incompatible */
+ /* ASCII-incompatible -> ASCII-incompatible is intentionally omitted. */
+} mrb_transcoder_asciicompat_type_t;
+
+typedef struct mrb_transcoder mrb_transcoder;
+
+/* static structure, one per supported encoding pair */
+struct mrb_transcoder {
+ const char *src_encoding;
+ const char *dst_encoding;
+ unsigned int conv_tree_start;
+ const unsigned char *byte_array;
+ unsigned int byte_array_length;
+ const unsigned int *word_array;
+ unsigned int word_array_length;
+ int word_size;
+ int input_unit_length;
+ int max_input;
+ int max_output;
+ mrb_transcoder_asciicompat_type_t asciicompat_type;
+ size_t state_size;
+ int (*state_init_func)(void*); /* ret==0:success ret!=0:failure(errno) */
+ int (*state_fini_func)(void*); /* ret==0:success ret!=0:failure(errno) */
+ mrb_value (*func_ii)(void*, mrb_value); /* info -> info */
+ mrb_value (*func_si)(void*, const unsigned char*, size_t); /* start -> info */
+ ssize_t (*func_io)(void*, mrb_value, const unsigned char*, size_t); /* info -> output */
+ ssize_t (*func_so)(void*, const unsigned char*, size_t, unsigned char*, size_t); /* start -> output */
+ ssize_t (*finish_func)(void*, unsigned char*, size_t); /* -> output */
+ ssize_t (*resetsize_func)(void*); /* -> len */
+ ssize_t (*resetstate_func)(void*, unsigned char*, size_t); /* -> output */
+ ssize_t (*func_sio)(void*, const unsigned char*, size_t, mrb_value, unsigned char*, size_t); /* start -> output */
+};
+
+void mrb_declare_transcoder(mrb_state *mrb, const char *enc1, const char *enc2, const char *lib);
+void mrb_register_transcoder(mrb_state *mrb, const mrb_transcoder *);
+
+#endif /* RUBY_TRANSCODE_DATA_H */
diff --git a/src/unicode.c b/src/unicode.c
new file mode 100644
index 000000000..0753fe62a
--- /dev/null
+++ b/src/unicode.c
@@ -0,0 +1,2607 @@
+/**********************************************************************
+ unicode.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "mruby.h"
+#ifdef INCLUDE_ENCODING
+#include <string.h>
+#include "regint.h"
+
+#include "encoding.h" //#define TOLOWER(c)
+
+#define ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code,ctype) \
+ ((EncUNICODE_ISO_8859_1_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
+
+static const unsigned short EncUNICODE_ISO_8859_1_CtypeTable[256] = {
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x428c, 0x4289, 0x4288, 0x4288, 0x4288, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+ 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+ 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+ 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+ 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+ 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+ 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0288, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
+ 0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x00a8, 0x00a0, 0x00a0,
+ 0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0,
+ 0x00a0, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
+ 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
+ 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
+};
+
+#include "name2ctype.h"
+
+typedef struct {
+ int n;
+ OnigCodePoint code[3];
+} CodePointList3;
+
+typedef struct {
+ OnigCodePoint from;
+ CodePointList3 to;
+} CaseFold_11_Type;
+
+typedef struct {
+ OnigCodePoint from;
+ CodePointList3 to;
+} CaseUnfold_11_Type;
+
+typedef struct {
+ int n;
+ OnigCodePoint code[2];
+} CodePointList2;
+
+typedef struct {
+ OnigCodePoint from[2];
+ CodePointList2 to;
+} CaseUnfold_12_Type;
+
+typedef struct {
+ OnigCodePoint from[3];
+ CodePointList2 to;
+} CaseUnfold_13_Type;
+
+static const CaseFold_11_Type CaseFold[] = {
+ { 0x0041, {1, {0x0061}}},
+ { 0x0042, {1, {0x0062}}},
+ { 0x0043, {1, {0x0063}}},
+ { 0x0044, {1, {0x0064}}},
+ { 0x0045, {1, {0x0065}}},
+ { 0x0046, {1, {0x0066}}},
+ { 0x0047, {1, {0x0067}}},
+ { 0x0048, {1, {0x0068}}},
+ { 0x004a, {1, {0x006a}}},
+ { 0x004b, {1, {0x006b}}},
+ { 0x004c, {1, {0x006c}}},
+ { 0x004d, {1, {0x006d}}},
+ { 0x004e, {1, {0x006e}}},
+ { 0x004f, {1, {0x006f}}},
+ { 0x0050, {1, {0x0070}}},
+ { 0x0051, {1, {0x0071}}},
+ { 0x0052, {1, {0x0072}}},
+ { 0x0053, {1, {0x0073}}},
+ { 0x0054, {1, {0x0074}}},
+ { 0x0055, {1, {0x0075}}},
+ { 0x0056, {1, {0x0076}}},
+ { 0x0057, {1, {0x0077}}},
+ { 0x0058, {1, {0x0078}}},
+ { 0x0059, {1, {0x0079}}},
+ { 0x005a, {1, {0x007a}}},
+ { 0x00b5, {1, {0x03bc}}},
+ { 0x00c0, {1, {0x00e0}}},
+ { 0x00c1, {1, {0x00e1}}},
+ { 0x00c2, {1, {0x00e2}}},
+ { 0x00c3, {1, {0x00e3}}},
+ { 0x00c4, {1, {0x00e4}}},
+ { 0x00c5, {1, {0x00e5}}},
+ { 0x00c6, {1, {0x00e6}}},
+ { 0x00c7, {1, {0x00e7}}},
+ { 0x00c8, {1, {0x00e8}}},
+ { 0x00c9, {1, {0x00e9}}},
+ { 0x00ca, {1, {0x00ea}}},
+ { 0x00cb, {1, {0x00eb}}},
+ { 0x00cc, {1, {0x00ec}}},
+ { 0x00cd, {1, {0x00ed}}},
+ { 0x00ce, {1, {0x00ee}}},
+ { 0x00cf, {1, {0x00ef}}},
+ { 0x00d0, {1, {0x00f0}}},
+ { 0x00d1, {1, {0x00f1}}},
+ { 0x00d2, {1, {0x00f2}}},
+ { 0x00d3, {1, {0x00f3}}},
+ { 0x00d4, {1, {0x00f4}}},
+ { 0x00d5, {1, {0x00f5}}},
+ { 0x00d6, {1, {0x00f6}}},
+ { 0x00d8, {1, {0x00f8}}},
+ { 0x00d9, {1, {0x00f9}}},
+ { 0x00da, {1, {0x00fa}}},
+ { 0x00db, {1, {0x00fb}}},
+ { 0x00dc, {1, {0x00fc}}},
+ { 0x00dd, {1, {0x00fd}}},
+ { 0x00de, {1, {0x00fe}}},
+ { 0x00df, {2, {0x0073, 0x0073}}},
+ { 0x0100, {1, {0x0101}}},
+ { 0x0102, {1, {0x0103}}},
+ { 0x0104, {1, {0x0105}}},
+ { 0x0106, {1, {0x0107}}},
+ { 0x0108, {1, {0x0109}}},
+ { 0x010a, {1, {0x010b}}},
+ { 0x010c, {1, {0x010d}}},
+ { 0x010e, {1, {0x010f}}},
+ { 0x0110, {1, {0x0111}}},
+ { 0x0112, {1, {0x0113}}},
+ { 0x0114, {1, {0x0115}}},
+ { 0x0116, {1, {0x0117}}},
+ { 0x0118, {1, {0x0119}}},
+ { 0x011a, {1, {0x011b}}},
+ { 0x011c, {1, {0x011d}}},
+ { 0x011e, {1, {0x011f}}},
+ { 0x0120, {1, {0x0121}}},
+ { 0x0122, {1, {0x0123}}},
+ { 0x0124, {1, {0x0125}}},
+ { 0x0126, {1, {0x0127}}},
+ { 0x0128, {1, {0x0129}}},
+ { 0x012a, {1, {0x012b}}},
+ { 0x012c, {1, {0x012d}}},
+ { 0x012e, {1, {0x012f}}},
+ { 0x0132, {1, {0x0133}}},
+ { 0x0134, {1, {0x0135}}},
+ { 0x0136, {1, {0x0137}}},
+ { 0x0139, {1, {0x013a}}},
+ { 0x013b, {1, {0x013c}}},
+ { 0x013d, {1, {0x013e}}},
+ { 0x013f, {1, {0x0140}}},
+ { 0x0141, {1, {0x0142}}},
+ { 0x0143, {1, {0x0144}}},
+ { 0x0145, {1, {0x0146}}},
+ { 0x0147, {1, {0x0148}}},
+ { 0x0149, {2, {0x02bc, 0x006e}}},
+ { 0x014a, {1, {0x014b}}},
+ { 0x014c, {1, {0x014d}}},
+ { 0x014e, {1, {0x014f}}},
+ { 0x0150, {1, {0x0151}}},
+ { 0x0152, {1, {0x0153}}},
+ { 0x0154, {1, {0x0155}}},
+ { 0x0156, {1, {0x0157}}},
+ { 0x0158, {1, {0x0159}}},
+ { 0x015a, {1, {0x015b}}},
+ { 0x015c, {1, {0x015d}}},
+ { 0x015e, {1, {0x015f}}},
+ { 0x0160, {1, {0x0161}}},
+ { 0x0162, {1, {0x0163}}},
+ { 0x0164, {1, {0x0165}}},
+ { 0x0166, {1, {0x0167}}},
+ { 0x0168, {1, {0x0169}}},
+ { 0x016a, {1, {0x016b}}},
+ { 0x016c, {1, {0x016d}}},
+ { 0x016e, {1, {0x016f}}},
+ { 0x0170, {1, {0x0171}}},
+ { 0x0172, {1, {0x0173}}},
+ { 0x0174, {1, {0x0175}}},
+ { 0x0176, {1, {0x0177}}},
+ { 0x0178, {1, {0x00ff}}},
+ { 0x0179, {1, {0x017a}}},
+ { 0x017b, {1, {0x017c}}},
+ { 0x017d, {1, {0x017e}}},
+ { 0x017f, {1, {0x0073}}},
+ { 0x0181, {1, {0x0253}}},
+ { 0x0182, {1, {0x0183}}},
+ { 0x0184, {1, {0x0185}}},
+ { 0x0186, {1, {0x0254}}},
+ { 0x0187, {1, {0x0188}}},
+ { 0x0189, {1, {0x0256}}},
+ { 0x018a, {1, {0x0257}}},
+ { 0x018b, {1, {0x018c}}},
+ { 0x018e, {1, {0x01dd}}},
+ { 0x018f, {1, {0x0259}}},
+ { 0x0190, {1, {0x025b}}},
+ { 0x0191, {1, {0x0192}}},
+ { 0x0193, {1, {0x0260}}},
+ { 0x0194, {1, {0x0263}}},
+ { 0x0196, {1, {0x0269}}},
+ { 0x0197, {1, {0x0268}}},
+ { 0x0198, {1, {0x0199}}},
+ { 0x019c, {1, {0x026f}}},
+ { 0x019d, {1, {0x0272}}},
+ { 0x019f, {1, {0x0275}}},
+ { 0x01a0, {1, {0x01a1}}},
+ { 0x01a2, {1, {0x01a3}}},
+ { 0x01a4, {1, {0x01a5}}},
+ { 0x01a6, {1, {0x0280}}},
+ { 0x01a7, {1, {0x01a8}}},
+ { 0x01a9, {1, {0x0283}}},
+ { 0x01ac, {1, {0x01ad}}},
+ { 0x01ae, {1, {0x0288}}},
+ { 0x01af, {1, {0x01b0}}},
+ { 0x01b1, {1, {0x028a}}},
+ { 0x01b2, {1, {0x028b}}},
+ { 0x01b3, {1, {0x01b4}}},
+ { 0x01b5, {1, {0x01b6}}},
+ { 0x01b7, {1, {0x0292}}},
+ { 0x01b8, {1, {0x01b9}}},
+ { 0x01bc, {1, {0x01bd}}},
+ { 0x01c4, {1, {0x01c6}}},
+ { 0x01c5, {1, {0x01c6}}},
+ { 0x01c7, {1, {0x01c9}}},
+ { 0x01c8, {1, {0x01c9}}},
+ { 0x01ca, {1, {0x01cc}}},
+ { 0x01cb, {1, {0x01cc}}},
+ { 0x01cd, {1, {0x01ce}}},
+ { 0x01cf, {1, {0x01d0}}},
+ { 0x01d1, {1, {0x01d2}}},
+ { 0x01d3, {1, {0x01d4}}},
+ { 0x01d5, {1, {0x01d6}}},
+ { 0x01d7, {1, {0x01d8}}},
+ { 0x01d9, {1, {0x01da}}},
+ { 0x01db, {1, {0x01dc}}},
+ { 0x01de, {1, {0x01df}}},
+ { 0x01e0, {1, {0x01e1}}},
+ { 0x01e2, {1, {0x01e3}}},
+ { 0x01e4, {1, {0x01e5}}},
+ { 0x01e6, {1, {0x01e7}}},
+ { 0x01e8, {1, {0x01e9}}},
+ { 0x01ea, {1, {0x01eb}}},
+ { 0x01ec, {1, {0x01ed}}},
+ { 0x01ee, {1, {0x01ef}}},
+ { 0x01f0, {2, {0x006a, 0x030c}}},
+ { 0x01f1, {1, {0x01f3}}},
+ { 0x01f2, {1, {0x01f3}}},
+ { 0x01f4, {1, {0x01f5}}},
+ { 0x01f6, {1, {0x0195}}},
+ { 0x01f7, {1, {0x01bf}}},
+ { 0x01f8, {1, {0x01f9}}},
+ { 0x01fa, {1, {0x01fb}}},
+ { 0x01fc, {1, {0x01fd}}},
+ { 0x01fe, {1, {0x01ff}}},
+ { 0x0200, {1, {0x0201}}},
+ { 0x0202, {1, {0x0203}}},
+ { 0x0204, {1, {0x0205}}},
+ { 0x0206, {1, {0x0207}}},
+ { 0x0208, {1, {0x0209}}},
+ { 0x020a, {1, {0x020b}}},
+ { 0x020c, {1, {0x020d}}},
+ { 0x020e, {1, {0x020f}}},
+ { 0x0210, {1, {0x0211}}},
+ { 0x0212, {1, {0x0213}}},
+ { 0x0214, {1, {0x0215}}},
+ { 0x0216, {1, {0x0217}}},
+ { 0x0218, {1, {0x0219}}},
+ { 0x021a, {1, {0x021b}}},
+ { 0x021c, {1, {0x021d}}},
+ { 0x021e, {1, {0x021f}}},
+ { 0x0220, {1, {0x019e}}},
+ { 0x0222, {1, {0x0223}}},
+ { 0x0224, {1, {0x0225}}},
+ { 0x0226, {1, {0x0227}}},
+ { 0x0228, {1, {0x0229}}},
+ { 0x022a, {1, {0x022b}}},
+ { 0x022c, {1, {0x022d}}},
+ { 0x022e, {1, {0x022f}}},
+ { 0x0230, {1, {0x0231}}},
+ { 0x0232, {1, {0x0233}}},
+ { 0x023b, {1, {0x023c}}},
+ { 0x023d, {1, {0x019a}}},
+ { 0x0241, {1, {0x0294}}},
+ { 0x0345, {1, {0x03b9}}},
+ { 0x0386, {1, {0x03ac}}},
+ { 0x0388, {1, {0x03ad}}},
+ { 0x0389, {1, {0x03ae}}},
+ { 0x038a, {1, {0x03af}}},
+ { 0x038c, {1, {0x03cc}}},
+ { 0x038e, {1, {0x03cd}}},
+ { 0x038f, {1, {0x03ce}}},
+ { 0x0390, {3, {0x03b9, 0x0308, 0x0301}}},
+ { 0x0391, {1, {0x03b1}}},
+ { 0x0392, {1, {0x03b2}}},
+ { 0x0393, {1, {0x03b3}}},
+ { 0x0394, {1, {0x03b4}}},
+ { 0x0395, {1, {0x03b5}}},
+ { 0x0396, {1, {0x03b6}}},
+ { 0x0397, {1, {0x03b7}}},
+ { 0x0398, {1, {0x03b8}}},
+ { 0x0399, {1, {0x03b9}}},
+ { 0x039a, {1, {0x03ba}}},
+ { 0x039b, {1, {0x03bb}}},
+ { 0x039c, {1, {0x03bc}}},
+ { 0x039d, {1, {0x03bd}}},
+ { 0x039e, {1, {0x03be}}},
+ { 0x039f, {1, {0x03bf}}},
+ { 0x03a0, {1, {0x03c0}}},
+ { 0x03a1, {1, {0x03c1}}},
+ { 0x03a3, {1, {0x03c3}}},
+ { 0x03a4, {1, {0x03c4}}},
+ { 0x03a5, {1, {0x03c5}}},
+ { 0x03a6, {1, {0x03c6}}},
+ { 0x03a7, {1, {0x03c7}}},
+ { 0x03a8, {1, {0x03c8}}},
+ { 0x03a9, {1, {0x03c9}}},
+ { 0x03aa, {1, {0x03ca}}},
+ { 0x03ab, {1, {0x03cb}}},
+ { 0x03b0, {3, {0x03c5, 0x0308, 0x0301}}},
+ { 0x03c2, {1, {0x03c3}}},
+ { 0x03d0, {1, {0x03b2}}},
+ { 0x03d1, {1, {0x03b8}}},
+ { 0x03d5, {1, {0x03c6}}},
+ { 0x03d6, {1, {0x03c0}}},
+ { 0x03d8, {1, {0x03d9}}},
+ { 0x03da, {1, {0x03db}}},
+ { 0x03dc, {1, {0x03dd}}},
+ { 0x03de, {1, {0x03df}}},
+ { 0x03e0, {1, {0x03e1}}},
+ { 0x03e2, {1, {0x03e3}}},
+ { 0x03e4, {1, {0x03e5}}},
+ { 0x03e6, {1, {0x03e7}}},
+ { 0x03e8, {1, {0x03e9}}},
+ { 0x03ea, {1, {0x03eb}}},
+ { 0x03ec, {1, {0x03ed}}},
+ { 0x03ee, {1, {0x03ef}}},
+ { 0x03f0, {1, {0x03ba}}},
+ { 0x03f1, {1, {0x03c1}}},
+ { 0x03f4, {1, {0x03b8}}},
+ { 0x03f5, {1, {0x03b5}}},
+ { 0x03f7, {1, {0x03f8}}},
+ { 0x03f9, {1, {0x03f2}}},
+ { 0x03fa, {1, {0x03fb}}},
+ { 0x0400, {1, {0x0450}}},
+ { 0x0401, {1, {0x0451}}},
+ { 0x0402, {1, {0x0452}}},
+ { 0x0403, {1, {0x0453}}},
+ { 0x0404, {1, {0x0454}}},
+ { 0x0405, {1, {0x0455}}},
+ { 0x0406, {1, {0x0456}}},
+ { 0x0407, {1, {0x0457}}},
+ { 0x0408, {1, {0x0458}}},
+ { 0x0409, {1, {0x0459}}},
+ { 0x040a, {1, {0x045a}}},
+ { 0x040b, {1, {0x045b}}},
+ { 0x040c, {1, {0x045c}}},
+ { 0x040d, {1, {0x045d}}},
+ { 0x040e, {1, {0x045e}}},
+ { 0x040f, {1, {0x045f}}},
+ { 0x0410, {1, {0x0430}}},
+ { 0x0411, {1, {0x0431}}},
+ { 0x0412, {1, {0x0432}}},
+ { 0x0413, {1, {0x0433}}},
+ { 0x0414, {1, {0x0434}}},
+ { 0x0415, {1, {0x0435}}},
+ { 0x0416, {1, {0x0436}}},
+ { 0x0417, {1, {0x0437}}},
+ { 0x0418, {1, {0x0438}}},
+ { 0x0419, {1, {0x0439}}},
+ { 0x041a, {1, {0x043a}}},
+ { 0x041b, {1, {0x043b}}},
+ { 0x041c, {1, {0x043c}}},
+ { 0x041d, {1, {0x043d}}},
+ { 0x041e, {1, {0x043e}}},
+ { 0x041f, {1, {0x043f}}},
+ { 0x0420, {1, {0x0440}}},
+ { 0x0421, {1, {0x0441}}},
+ { 0x0422, {1, {0x0442}}},
+ { 0x0423, {1, {0x0443}}},
+ { 0x0424, {1, {0x0444}}},
+ { 0x0425, {1, {0x0445}}},
+ { 0x0426, {1, {0x0446}}},
+ { 0x0427, {1, {0x0447}}},
+ { 0x0428, {1, {0x0448}}},
+ { 0x0429, {1, {0x0449}}},
+ { 0x042a, {1, {0x044a}}},
+ { 0x042b, {1, {0x044b}}},
+ { 0x042c, {1, {0x044c}}},
+ { 0x042d, {1, {0x044d}}},
+ { 0x042e, {1, {0x044e}}},
+ { 0x042f, {1, {0x044f}}},
+ { 0x0460, {1, {0x0461}}},
+ { 0x0462, {1, {0x0463}}},
+ { 0x0464, {1, {0x0465}}},
+ { 0x0466, {1, {0x0467}}},
+ { 0x0468, {1, {0x0469}}},
+ { 0x046a, {1, {0x046b}}},
+ { 0x046c, {1, {0x046d}}},
+ { 0x046e, {1, {0x046f}}},
+ { 0x0470, {1, {0x0471}}},
+ { 0x0472, {1, {0x0473}}},
+ { 0x0474, {1, {0x0475}}},
+ { 0x0476, {1, {0x0477}}},
+ { 0x0478, {1, {0x0479}}},
+ { 0x047a, {1, {0x047b}}},
+ { 0x047c, {1, {0x047d}}},
+ { 0x047e, {1, {0x047f}}},
+ { 0x0480, {1, {0x0481}}},
+ { 0x048a, {1, {0x048b}}},
+ { 0x048c, {1, {0x048d}}},
+ { 0x048e, {1, {0x048f}}},
+ { 0x0490, {1, {0x0491}}},
+ { 0x0492, {1, {0x0493}}},
+ { 0x0494, {1, {0x0495}}},
+ { 0x0496, {1, {0x0497}}},
+ { 0x0498, {1, {0x0499}}},
+ { 0x049a, {1, {0x049b}}},
+ { 0x049c, {1, {0x049d}}},
+ { 0x049e, {1, {0x049f}}},
+ { 0x04a0, {1, {0x04a1}}},
+ { 0x04a2, {1, {0x04a3}}},
+ { 0x04a4, {1, {0x04a5}}},
+ { 0x04a6, {1, {0x04a7}}},
+ { 0x04a8, {1, {0x04a9}}},
+ { 0x04aa, {1, {0x04ab}}},
+ { 0x04ac, {1, {0x04ad}}},
+ { 0x04ae, {1, {0x04af}}},
+ { 0x04b0, {1, {0x04b1}}},
+ { 0x04b2, {1, {0x04b3}}},
+ { 0x04b4, {1, {0x04b5}}},
+ { 0x04b6, {1, {0x04b7}}},
+ { 0x04b8, {1, {0x04b9}}},
+ { 0x04ba, {1, {0x04bb}}},
+ { 0x04bc, {1, {0x04bd}}},
+ { 0x04be, {1, {0x04bf}}},
+ { 0x04c1, {1, {0x04c2}}},
+ { 0x04c3, {1, {0x04c4}}},
+ { 0x04c5, {1, {0x04c6}}},
+ { 0x04c7, {1, {0x04c8}}},
+ { 0x04c9, {1, {0x04ca}}},
+ { 0x04cb, {1, {0x04cc}}},
+ { 0x04cd, {1, {0x04ce}}},
+ { 0x04d0, {1, {0x04d1}}},
+ { 0x04d2, {1, {0x04d3}}},
+ { 0x04d4, {1, {0x04d5}}},
+ { 0x04d6, {1, {0x04d7}}},
+ { 0x04d8, {1, {0x04d9}}},
+ { 0x04da, {1, {0x04db}}},
+ { 0x04dc, {1, {0x04dd}}},
+ { 0x04de, {1, {0x04df}}},
+ { 0x04e0, {1, {0x04e1}}},
+ { 0x04e2, {1, {0x04e3}}},
+ { 0x04e4, {1, {0x04e5}}},
+ { 0x04e6, {1, {0x04e7}}},
+ { 0x04e8, {1, {0x04e9}}},
+ { 0x04ea, {1, {0x04eb}}},
+ { 0x04ec, {1, {0x04ed}}},
+ { 0x04ee, {1, {0x04ef}}},
+ { 0x04f0, {1, {0x04f1}}},
+ { 0x04f2, {1, {0x04f3}}},
+ { 0x04f4, {1, {0x04f5}}},
+ { 0x04f6, {1, {0x04f7}}},
+ { 0x04f8, {1, {0x04f9}}},
+ { 0x0500, {1, {0x0501}}},
+ { 0x0502, {1, {0x0503}}},
+ { 0x0504, {1, {0x0505}}},
+ { 0x0506, {1, {0x0507}}},
+ { 0x0508, {1, {0x0509}}},
+ { 0x050a, {1, {0x050b}}},
+ { 0x050c, {1, {0x050d}}},
+ { 0x050e, {1, {0x050f}}},
+ { 0x0531, {1, {0x0561}}},
+ { 0x0532, {1, {0x0562}}},
+ { 0x0533, {1, {0x0563}}},
+ { 0x0534, {1, {0x0564}}},
+ { 0x0535, {1, {0x0565}}},
+ { 0x0536, {1, {0x0566}}},
+ { 0x0537, {1, {0x0567}}},
+ { 0x0538, {1, {0x0568}}},
+ { 0x0539, {1, {0x0569}}},
+ { 0x053a, {1, {0x056a}}},
+ { 0x053b, {1, {0x056b}}},
+ { 0x053c, {1, {0x056c}}},
+ { 0x053d, {1, {0x056d}}},
+ { 0x053e, {1, {0x056e}}},
+ { 0x053f, {1, {0x056f}}},
+ { 0x0540, {1, {0x0570}}},
+ { 0x0541, {1, {0x0571}}},
+ { 0x0542, {1, {0x0572}}},
+ { 0x0543, {1, {0x0573}}},
+ { 0x0544, {1, {0x0574}}},
+ { 0x0545, {1, {0x0575}}},
+ { 0x0546, {1, {0x0576}}},
+ { 0x0547, {1, {0x0577}}},
+ { 0x0548, {1, {0x0578}}},
+ { 0x0549, {1, {0x0579}}},
+ { 0x054a, {1, {0x057a}}},
+ { 0x054b, {1, {0x057b}}},
+ { 0x054c, {1, {0x057c}}},
+ { 0x054d, {1, {0x057d}}},
+ { 0x054e, {1, {0x057e}}},
+ { 0x054f, {1, {0x057f}}},
+ { 0x0550, {1, {0x0580}}},
+ { 0x0551, {1, {0x0581}}},
+ { 0x0552, {1, {0x0582}}},
+ { 0x0553, {1, {0x0583}}},
+ { 0x0554, {1, {0x0584}}},
+ { 0x0555, {1, {0x0585}}},
+ { 0x0556, {1, {0x0586}}},
+ { 0x0587, {2, {0x0565, 0x0582}}},
+ { 0x10a0, {1, {0x2d00}}},
+ { 0x10a1, {1, {0x2d01}}},
+ { 0x10a2, {1, {0x2d02}}},
+ { 0x10a3, {1, {0x2d03}}},
+ { 0x10a4, {1, {0x2d04}}},
+ { 0x10a5, {1, {0x2d05}}},
+ { 0x10a6, {1, {0x2d06}}},
+ { 0x10a7, {1, {0x2d07}}},
+ { 0x10a8, {1, {0x2d08}}},
+ { 0x10a9, {1, {0x2d09}}},
+ { 0x10aa, {1, {0x2d0a}}},
+ { 0x10ab, {1, {0x2d0b}}},
+ { 0x10ac, {1, {0x2d0c}}},
+ { 0x10ad, {1, {0x2d0d}}},
+ { 0x10ae, {1, {0x2d0e}}},
+ { 0x10af, {1, {0x2d0f}}},
+ { 0x10b0, {1, {0x2d10}}},
+ { 0x10b1, {1, {0x2d11}}},
+ { 0x10b2, {1, {0x2d12}}},
+ { 0x10b3, {1, {0x2d13}}},
+ { 0x10b4, {1, {0x2d14}}},
+ { 0x10b5, {1, {0x2d15}}},
+ { 0x10b6, {1, {0x2d16}}},
+ { 0x10b7, {1, {0x2d17}}},
+ { 0x10b8, {1, {0x2d18}}},
+ { 0x10b9, {1, {0x2d19}}},
+ { 0x10ba, {1, {0x2d1a}}},
+ { 0x10bb, {1, {0x2d1b}}},
+ { 0x10bc, {1, {0x2d1c}}},
+ { 0x10bd, {1, {0x2d1d}}},
+ { 0x10be, {1, {0x2d1e}}},
+ { 0x10bf, {1, {0x2d1f}}},
+ { 0x10c0, {1, {0x2d20}}},
+ { 0x10c1, {1, {0x2d21}}},
+ { 0x10c2, {1, {0x2d22}}},
+ { 0x10c3, {1, {0x2d23}}},
+ { 0x10c4, {1, {0x2d24}}},
+ { 0x10c5, {1, {0x2d25}}},
+ { 0x1e00, {1, {0x1e01}}},
+ { 0x1e02, {1, {0x1e03}}},
+ { 0x1e04, {1, {0x1e05}}},
+ { 0x1e06, {1, {0x1e07}}},
+ { 0x1e08, {1, {0x1e09}}},
+ { 0x1e0a, {1, {0x1e0b}}},
+ { 0x1e0c, {1, {0x1e0d}}},
+ { 0x1e0e, {1, {0x1e0f}}},
+ { 0x1e10, {1, {0x1e11}}},
+ { 0x1e12, {1, {0x1e13}}},
+ { 0x1e14, {1, {0x1e15}}},
+ { 0x1e16, {1, {0x1e17}}},
+ { 0x1e18, {1, {0x1e19}}},
+ { 0x1e1a, {1, {0x1e1b}}},
+ { 0x1e1c, {1, {0x1e1d}}},
+ { 0x1e1e, {1, {0x1e1f}}},
+ { 0x1e20, {1, {0x1e21}}},
+ { 0x1e22, {1, {0x1e23}}},
+ { 0x1e24, {1, {0x1e25}}},
+ { 0x1e26, {1, {0x1e27}}},
+ { 0x1e28, {1, {0x1e29}}},
+ { 0x1e2a, {1, {0x1e2b}}},
+ { 0x1e2c, {1, {0x1e2d}}},
+ { 0x1e2e, {1, {0x1e2f}}},
+ { 0x1e30, {1, {0x1e31}}},
+ { 0x1e32, {1, {0x1e33}}},
+ { 0x1e34, {1, {0x1e35}}},
+ { 0x1e36, {1, {0x1e37}}},
+ { 0x1e38, {1, {0x1e39}}},
+ { 0x1e3a, {1, {0x1e3b}}},
+ { 0x1e3c, {1, {0x1e3d}}},
+ { 0x1e3e, {1, {0x1e3f}}},
+ { 0x1e40, {1, {0x1e41}}},
+ { 0x1e42, {1, {0x1e43}}},
+ { 0x1e44, {1, {0x1e45}}},
+ { 0x1e46, {1, {0x1e47}}},
+ { 0x1e48, {1, {0x1e49}}},
+ { 0x1e4a, {1, {0x1e4b}}},
+ { 0x1e4c, {1, {0x1e4d}}},
+ { 0x1e4e, {1, {0x1e4f}}},
+ { 0x1e50, {1, {0x1e51}}},
+ { 0x1e52, {1, {0x1e53}}},
+ { 0x1e54, {1, {0x1e55}}},
+ { 0x1e56, {1, {0x1e57}}},
+ { 0x1e58, {1, {0x1e59}}},
+ { 0x1e5a, {1, {0x1e5b}}},
+ { 0x1e5c, {1, {0x1e5d}}},
+ { 0x1e5e, {1, {0x1e5f}}},
+ { 0x1e60, {1, {0x1e61}}},
+ { 0x1e62, {1, {0x1e63}}},
+ { 0x1e64, {1, {0x1e65}}},
+ { 0x1e66, {1, {0x1e67}}},
+ { 0x1e68, {1, {0x1e69}}},
+ { 0x1e6a, {1, {0x1e6b}}},
+ { 0x1e6c, {1, {0x1e6d}}},
+ { 0x1e6e, {1, {0x1e6f}}},
+ { 0x1e70, {1, {0x1e71}}},
+ { 0x1e72, {1, {0x1e73}}},
+ { 0x1e74, {1, {0x1e75}}},
+ { 0x1e76, {1, {0x1e77}}},
+ { 0x1e78, {1, {0x1e79}}},
+ { 0x1e7a, {1, {0x1e7b}}},
+ { 0x1e7c, {1, {0x1e7d}}},
+ { 0x1e7e, {1, {0x1e7f}}},
+ { 0x1e80, {1, {0x1e81}}},
+ { 0x1e82, {1, {0x1e83}}},
+ { 0x1e84, {1, {0x1e85}}},
+ { 0x1e86, {1, {0x1e87}}},
+ { 0x1e88, {1, {0x1e89}}},
+ { 0x1e8a, {1, {0x1e8b}}},
+ { 0x1e8c, {1, {0x1e8d}}},
+ { 0x1e8e, {1, {0x1e8f}}},
+ { 0x1e90, {1, {0x1e91}}},
+ { 0x1e92, {1, {0x1e93}}},
+ { 0x1e94, {1, {0x1e95}}},
+ { 0x1e96, {2, {0x0068, 0x0331}}},
+ { 0x1e97, {2, {0x0074, 0x0308}}},
+ { 0x1e98, {2, {0x0077, 0x030a}}},
+ { 0x1e99, {2, {0x0079, 0x030a}}},
+ { 0x1e9a, {2, {0x0061, 0x02be}}},
+ { 0x1e9b, {1, {0x1e61}}},
+ { 0x1ea0, {1, {0x1ea1}}},
+ { 0x1ea2, {1, {0x1ea3}}},
+ { 0x1ea4, {1, {0x1ea5}}},
+ { 0x1ea6, {1, {0x1ea7}}},
+ { 0x1ea8, {1, {0x1ea9}}},
+ { 0x1eaa, {1, {0x1eab}}},
+ { 0x1eac, {1, {0x1ead}}},
+ { 0x1eae, {1, {0x1eaf}}},
+ { 0x1eb0, {1, {0x1eb1}}},
+ { 0x1eb2, {1, {0x1eb3}}},
+ { 0x1eb4, {1, {0x1eb5}}},
+ { 0x1eb6, {1, {0x1eb7}}},
+ { 0x1eb8, {1, {0x1eb9}}},
+ { 0x1eba, {1, {0x1ebb}}},
+ { 0x1ebc, {1, {0x1ebd}}},
+ { 0x1ebe, {1, {0x1ebf}}},
+ { 0x1ec0, {1, {0x1ec1}}},
+ { 0x1ec2, {1, {0x1ec3}}},
+ { 0x1ec4, {1, {0x1ec5}}},
+ { 0x1ec6, {1, {0x1ec7}}},
+ { 0x1ec8, {1, {0x1ec9}}},
+ { 0x1eca, {1, {0x1ecb}}},
+ { 0x1ecc, {1, {0x1ecd}}},
+ { 0x1ece, {1, {0x1ecf}}},
+ { 0x1ed0, {1, {0x1ed1}}},
+ { 0x1ed2, {1, {0x1ed3}}},
+ { 0x1ed4, {1, {0x1ed5}}},
+ { 0x1ed6, {1, {0x1ed7}}},
+ { 0x1ed8, {1, {0x1ed9}}},
+ { 0x1eda, {1, {0x1edb}}},
+ { 0x1edc, {1, {0x1edd}}},
+ { 0x1ede, {1, {0x1edf}}},
+ { 0x1ee0, {1, {0x1ee1}}},
+ { 0x1ee2, {1, {0x1ee3}}},
+ { 0x1ee4, {1, {0x1ee5}}},
+ { 0x1ee6, {1, {0x1ee7}}},
+ { 0x1ee8, {1, {0x1ee9}}},
+ { 0x1eea, {1, {0x1eeb}}},
+ { 0x1eec, {1, {0x1eed}}},
+ { 0x1eee, {1, {0x1eef}}},
+ { 0x1ef0, {1, {0x1ef1}}},
+ { 0x1ef2, {1, {0x1ef3}}},
+ { 0x1ef4, {1, {0x1ef5}}},
+ { 0x1ef6, {1, {0x1ef7}}},
+ { 0x1ef8, {1, {0x1ef9}}},
+ { 0x1f08, {1, {0x1f00}}},
+ { 0x1f09, {1, {0x1f01}}},
+ { 0x1f0a, {1, {0x1f02}}},
+ { 0x1f0b, {1, {0x1f03}}},
+ { 0x1f0c, {1, {0x1f04}}},
+ { 0x1f0d, {1, {0x1f05}}},
+ { 0x1f0e, {1, {0x1f06}}},
+ { 0x1f0f, {1, {0x1f07}}},
+ { 0x1f18, {1, {0x1f10}}},
+ { 0x1f19, {1, {0x1f11}}},
+ { 0x1f1a, {1, {0x1f12}}},
+ { 0x1f1b, {1, {0x1f13}}},
+ { 0x1f1c, {1, {0x1f14}}},
+ { 0x1f1d, {1, {0x1f15}}},
+ { 0x1f28, {1, {0x1f20}}},
+ { 0x1f29, {1, {0x1f21}}},
+ { 0x1f2a, {1, {0x1f22}}},
+ { 0x1f2b, {1, {0x1f23}}},
+ { 0x1f2c, {1, {0x1f24}}},
+ { 0x1f2d, {1, {0x1f25}}},
+ { 0x1f2e, {1, {0x1f26}}},
+ { 0x1f2f, {1, {0x1f27}}},
+ { 0x1f38, {1, {0x1f30}}},
+ { 0x1f39, {1, {0x1f31}}},
+ { 0x1f3a, {1, {0x1f32}}},
+ { 0x1f3b, {1, {0x1f33}}},
+ { 0x1f3c, {1, {0x1f34}}},
+ { 0x1f3d, {1, {0x1f35}}},
+ { 0x1f3e, {1, {0x1f36}}},
+ { 0x1f3f, {1, {0x1f37}}},
+ { 0x1f48, {1, {0x1f40}}},
+ { 0x1f49, {1, {0x1f41}}},
+ { 0x1f4a, {1, {0x1f42}}},
+ { 0x1f4b, {1, {0x1f43}}},
+ { 0x1f4c, {1, {0x1f44}}},
+ { 0x1f4d, {1, {0x1f45}}},
+ { 0x1f50, {2, {0x03c5, 0x0313}}},
+ { 0x1f52, {3, {0x03c5, 0x0313, 0x0300}}},
+ { 0x1f54, {3, {0x03c5, 0x0313, 0x0301}}},
+ { 0x1f56, {3, {0x03c5, 0x0313, 0x0342}}},
+ { 0x1f59, {1, {0x1f51}}},
+ { 0x1f5b, {1, {0x1f53}}},
+ { 0x1f5d, {1, {0x1f55}}},
+ { 0x1f5f, {1, {0x1f57}}},
+ { 0x1f68, {1, {0x1f60}}},
+ { 0x1f69, {1, {0x1f61}}},
+ { 0x1f6a, {1, {0x1f62}}},
+ { 0x1f6b, {1, {0x1f63}}},
+ { 0x1f6c, {1, {0x1f64}}},
+ { 0x1f6d, {1, {0x1f65}}},
+ { 0x1f6e, {1, {0x1f66}}},
+ { 0x1f6f, {1, {0x1f67}}},
+ { 0x1f80, {2, {0x1f00, 0x03b9}}},
+ { 0x1f81, {2, {0x1f01, 0x03b9}}},
+ { 0x1f82, {2, {0x1f02, 0x03b9}}},
+ { 0x1f83, {2, {0x1f03, 0x03b9}}},
+ { 0x1f84, {2, {0x1f04, 0x03b9}}},
+ { 0x1f85, {2, {0x1f05, 0x03b9}}},
+ { 0x1f86, {2, {0x1f06, 0x03b9}}},
+ { 0x1f87, {2, {0x1f07, 0x03b9}}},
+ { 0x1f88, {2, {0x1f00, 0x03b9}}},
+ { 0x1f89, {2, {0x1f01, 0x03b9}}},
+ { 0x1f8a, {2, {0x1f02, 0x03b9}}},
+ { 0x1f8b, {2, {0x1f03, 0x03b9}}},
+ { 0x1f8c, {2, {0x1f04, 0x03b9}}},
+ { 0x1f8d, {2, {0x1f05, 0x03b9}}},
+ { 0x1f8e, {2, {0x1f06, 0x03b9}}},
+ { 0x1f8f, {2, {0x1f07, 0x03b9}}},
+ { 0x1f90, {2, {0x1f20, 0x03b9}}},
+ { 0x1f91, {2, {0x1f21, 0x03b9}}},
+ { 0x1f92, {2, {0x1f22, 0x03b9}}},
+ { 0x1f93, {2, {0x1f23, 0x03b9}}},
+ { 0x1f94, {2, {0x1f24, 0x03b9}}},
+ { 0x1f95, {2, {0x1f25, 0x03b9}}},
+ { 0x1f96, {2, {0x1f26, 0x03b9}}},
+ { 0x1f97, {2, {0x1f27, 0x03b9}}},
+ { 0x1f98, {2, {0x1f20, 0x03b9}}},
+ { 0x1f99, {2, {0x1f21, 0x03b9}}},
+ { 0x1f9a, {2, {0x1f22, 0x03b9}}},
+ { 0x1f9b, {2, {0x1f23, 0x03b9}}},
+ { 0x1f9c, {2, {0x1f24, 0x03b9}}},
+ { 0x1f9d, {2, {0x1f25, 0x03b9}}},
+ { 0x1f9e, {2, {0x1f26, 0x03b9}}},
+ { 0x1f9f, {2, {0x1f27, 0x03b9}}},
+ { 0x1fa0, {2, {0x1f60, 0x03b9}}},
+ { 0x1fa1, {2, {0x1f61, 0x03b9}}},
+ { 0x1fa2, {2, {0x1f62, 0x03b9}}},
+ { 0x1fa3, {2, {0x1f63, 0x03b9}}},
+ { 0x1fa4, {2, {0x1f64, 0x03b9}}},
+ { 0x1fa5, {2, {0x1f65, 0x03b9}}},
+ { 0x1fa6, {2, {0x1f66, 0x03b9}}},
+ { 0x1fa7, {2, {0x1f67, 0x03b9}}},
+ { 0x1fa8, {2, {0x1f60, 0x03b9}}},
+ { 0x1fa9, {2, {0x1f61, 0x03b9}}},
+ { 0x1faa, {2, {0x1f62, 0x03b9}}},
+ { 0x1fab, {2, {0x1f63, 0x03b9}}},
+ { 0x1fac, {2, {0x1f64, 0x03b9}}},
+ { 0x1fad, {2, {0x1f65, 0x03b9}}},
+ { 0x1fae, {2, {0x1f66, 0x03b9}}},
+ { 0x1faf, {2, {0x1f67, 0x03b9}}},
+ { 0x1fb2, {2, {0x1f70, 0x03b9}}},
+ { 0x1fb3, {2, {0x03b1, 0x03b9}}},
+ { 0x1fb4, {2, {0x03ac, 0x03b9}}},
+ { 0x1fb6, {2, {0x03b1, 0x0342}}},
+ { 0x1fb7, {3, {0x03b1, 0x0342, 0x03b9}}},
+ { 0x1fb8, {1, {0x1fb0}}},
+ { 0x1fb9, {1, {0x1fb1}}},
+ { 0x1fba, {1, {0x1f70}}},
+ { 0x1fbb, {1, {0x1f71}}},
+ { 0x1fbc, {2, {0x03b1, 0x03b9}}},
+ { 0x1fbe, {1, {0x03b9}}},
+ { 0x1fc2, {2, {0x1f74, 0x03b9}}},
+ { 0x1fc3, {2, {0x03b7, 0x03b9}}},
+ { 0x1fc4, {2, {0x03ae, 0x03b9}}},
+ { 0x1fc6, {2, {0x03b7, 0x0342}}},
+ { 0x1fc7, {3, {0x03b7, 0x0342, 0x03b9}}},
+ { 0x1fc8, {1, {0x1f72}}},
+ { 0x1fc9, {1, {0x1f73}}},
+ { 0x1fca, {1, {0x1f74}}},
+ { 0x1fcb, {1, {0x1f75}}},
+ { 0x1fcc, {2, {0x03b7, 0x03b9}}},
+ { 0x1fd2, {3, {0x03b9, 0x0308, 0x0300}}},
+ { 0x1fd3, {3, {0x03b9, 0x0308, 0x0301}}},
+ { 0x1fd6, {2, {0x03b9, 0x0342}}},
+ { 0x1fd7, {3, {0x03b9, 0x0308, 0x0342}}},
+ { 0x1fd8, {1, {0x1fd0}}},
+ { 0x1fd9, {1, {0x1fd1}}},
+ { 0x1fda, {1, {0x1f76}}},
+ { 0x1fdb, {1, {0x1f77}}},
+ { 0x1fe2, {3, {0x03c5, 0x0308, 0x0300}}},
+ { 0x1fe3, {3, {0x03c5, 0x0308, 0x0301}}},
+ { 0x1fe4, {2, {0x03c1, 0x0313}}},
+ { 0x1fe6, {2, {0x03c5, 0x0342}}},
+ { 0x1fe7, {3, {0x03c5, 0x0308, 0x0342}}},
+ { 0x1fe8, {1, {0x1fe0}}},
+ { 0x1fe9, {1, {0x1fe1}}},
+ { 0x1fea, {1, {0x1f7a}}},
+ { 0x1feb, {1, {0x1f7b}}},
+ { 0x1fec, {1, {0x1fe5}}},
+ { 0x1ff2, {2, {0x1f7c, 0x03b9}}},
+ { 0x1ff3, {2, {0x03c9, 0x03b9}}},
+ { 0x1ff4, {2, {0x03ce, 0x03b9}}},
+ { 0x1ff6, {2, {0x03c9, 0x0342}}},
+ { 0x1ff7, {3, {0x03c9, 0x0342, 0x03b9}}},
+ { 0x1ff8, {1, {0x1f78}}},
+ { 0x1ff9, {1, {0x1f79}}},
+ { 0x1ffa, {1, {0x1f7c}}},
+ { 0x1ffb, {1, {0x1f7d}}},
+ { 0x1ffc, {2, {0x03c9, 0x03b9}}},
+ { 0x2126, {1, {0x03c9}}},
+ { 0x212a, {1, {0x006b}}},
+ { 0x212b, {1, {0x00e5}}},
+ { 0x2160, {1, {0x2170}}},
+ { 0x2161, {1, {0x2171}}},
+ { 0x2162, {1, {0x2172}}},
+ { 0x2163, {1, {0x2173}}},
+ { 0x2164, {1, {0x2174}}},
+ { 0x2165, {1, {0x2175}}},
+ { 0x2166, {1, {0x2176}}},
+ { 0x2167, {1, {0x2177}}},
+ { 0x2168, {1, {0x2178}}},
+ { 0x2169, {1, {0x2179}}},
+ { 0x216a, {1, {0x217a}}},
+ { 0x216b, {1, {0x217b}}},
+ { 0x216c, {1, {0x217c}}},
+ { 0x216d, {1, {0x217d}}},
+ { 0x216e, {1, {0x217e}}},
+ { 0x216f, {1, {0x217f}}},
+ { 0x24b6, {1, {0x24d0}}},
+ { 0x24b7, {1, {0x24d1}}},
+ { 0x24b8, {1, {0x24d2}}},
+ { 0x24b9, {1, {0x24d3}}},
+ { 0x24ba, {1, {0x24d4}}},
+ { 0x24bb, {1, {0x24d5}}},
+ { 0x24bc, {1, {0x24d6}}},
+ { 0x24bd, {1, {0x24d7}}},
+ { 0x24be, {1, {0x24d8}}},
+ { 0x24bf, {1, {0x24d9}}},
+ { 0x24c0, {1, {0x24da}}},
+ { 0x24c1, {1, {0x24db}}},
+ { 0x24c2, {1, {0x24dc}}},
+ { 0x24c3, {1, {0x24dd}}},
+ { 0x24c4, {1, {0x24de}}},
+ { 0x24c5, {1, {0x24df}}},
+ { 0x24c6, {1, {0x24e0}}},
+ { 0x24c7, {1, {0x24e1}}},
+ { 0x24c8, {1, {0x24e2}}},
+ { 0x24c9, {1, {0x24e3}}},
+ { 0x24ca, {1, {0x24e4}}},
+ { 0x24cb, {1, {0x24e5}}},
+ { 0x24cc, {1, {0x24e6}}},
+ { 0x24cd, {1, {0x24e7}}},
+ { 0x24ce, {1, {0x24e8}}},
+ { 0x24cf, {1, {0x24e9}}},
+ { 0x2c00, {1, {0x2c30}}},
+ { 0x2c01, {1, {0x2c31}}},
+ { 0x2c02, {1, {0x2c32}}},
+ { 0x2c03, {1, {0x2c33}}},
+ { 0x2c04, {1, {0x2c34}}},
+ { 0x2c05, {1, {0x2c35}}},
+ { 0x2c06, {1, {0x2c36}}},
+ { 0x2c07, {1, {0x2c37}}},
+ { 0x2c08, {1, {0x2c38}}},
+ { 0x2c09, {1, {0x2c39}}},
+ { 0x2c0a, {1, {0x2c3a}}},
+ { 0x2c0b, {1, {0x2c3b}}},
+ { 0x2c0c, {1, {0x2c3c}}},
+ { 0x2c0d, {1, {0x2c3d}}},
+ { 0x2c0e, {1, {0x2c3e}}},
+ { 0x2c0f, {1, {0x2c3f}}},
+ { 0x2c10, {1, {0x2c40}}},
+ { 0x2c11, {1, {0x2c41}}},
+ { 0x2c12, {1, {0x2c42}}},
+ { 0x2c13, {1, {0x2c43}}},
+ { 0x2c14, {1, {0x2c44}}},
+ { 0x2c15, {1, {0x2c45}}},
+ { 0x2c16, {1, {0x2c46}}},
+ { 0x2c17, {1, {0x2c47}}},
+ { 0x2c18, {1, {0x2c48}}},
+ { 0x2c19, {1, {0x2c49}}},
+ { 0x2c1a, {1, {0x2c4a}}},
+ { 0x2c1b, {1, {0x2c4b}}},
+ { 0x2c1c, {1, {0x2c4c}}},
+ { 0x2c1d, {1, {0x2c4d}}},
+ { 0x2c1e, {1, {0x2c4e}}},
+ { 0x2c1f, {1, {0x2c4f}}},
+ { 0x2c20, {1, {0x2c50}}},
+ { 0x2c21, {1, {0x2c51}}},
+ { 0x2c22, {1, {0x2c52}}},
+ { 0x2c23, {1, {0x2c53}}},
+ { 0x2c24, {1, {0x2c54}}},
+ { 0x2c25, {1, {0x2c55}}},
+ { 0x2c26, {1, {0x2c56}}},
+ { 0x2c27, {1, {0x2c57}}},
+ { 0x2c28, {1, {0x2c58}}},
+ { 0x2c29, {1, {0x2c59}}},
+ { 0x2c2a, {1, {0x2c5a}}},
+ { 0x2c2b, {1, {0x2c5b}}},
+ { 0x2c2c, {1, {0x2c5c}}},
+ { 0x2c2d, {1, {0x2c5d}}},
+ { 0x2c2e, {1, {0x2c5e}}},
+ { 0x2c80, {1, {0x2c81}}},
+ { 0x2c82, {1, {0x2c83}}},
+ { 0x2c84, {1, {0x2c85}}},
+ { 0x2c86, {1, {0x2c87}}},
+ { 0x2c88, {1, {0x2c89}}},
+ { 0x2c8a, {1, {0x2c8b}}},
+ { 0x2c8c, {1, {0x2c8d}}},
+ { 0x2c8e, {1, {0x2c8f}}},
+ { 0x2c90, {1, {0x2c91}}},
+ { 0x2c92, {1, {0x2c93}}},
+ { 0x2c94, {1, {0x2c95}}},
+ { 0x2c96, {1, {0x2c97}}},
+ { 0x2c98, {1, {0x2c99}}},
+ { 0x2c9a, {1, {0x2c9b}}},
+ { 0x2c9c, {1, {0x2c9d}}},
+ { 0x2c9e, {1, {0x2c9f}}},
+ { 0x2ca0, {1, {0x2ca1}}},
+ { 0x2ca2, {1, {0x2ca3}}},
+ { 0x2ca4, {1, {0x2ca5}}},
+ { 0x2ca6, {1, {0x2ca7}}},
+ { 0x2ca8, {1, {0x2ca9}}},
+ { 0x2caa, {1, {0x2cab}}},
+ { 0x2cac, {1, {0x2cad}}},
+ { 0x2cae, {1, {0x2caf}}},
+ { 0x2cb0, {1, {0x2cb1}}},
+ { 0x2cb2, {1, {0x2cb3}}},
+ { 0x2cb4, {1, {0x2cb5}}},
+ { 0x2cb6, {1, {0x2cb7}}},
+ { 0x2cb8, {1, {0x2cb9}}},
+ { 0x2cba, {1, {0x2cbb}}},
+ { 0x2cbc, {1, {0x2cbd}}},
+ { 0x2cbe, {1, {0x2cbf}}},
+ { 0x2cc0, {1, {0x2cc1}}},
+ { 0x2cc2, {1, {0x2cc3}}},
+ { 0x2cc4, {1, {0x2cc5}}},
+ { 0x2cc6, {1, {0x2cc7}}},
+ { 0x2cc8, {1, {0x2cc9}}},
+ { 0x2cca, {1, {0x2ccb}}},
+ { 0x2ccc, {1, {0x2ccd}}},
+ { 0x2cce, {1, {0x2ccf}}},
+ { 0x2cd0, {1, {0x2cd1}}},
+ { 0x2cd2, {1, {0x2cd3}}},
+ { 0x2cd4, {1, {0x2cd5}}},
+ { 0x2cd6, {1, {0x2cd7}}},
+ { 0x2cd8, {1, {0x2cd9}}},
+ { 0x2cda, {1, {0x2cdb}}},
+ { 0x2cdc, {1, {0x2cdd}}},
+ { 0x2cde, {1, {0x2cdf}}},
+ { 0x2ce0, {1, {0x2ce1}}},
+ { 0x2ce2, {1, {0x2ce3}}},
+ { 0xfb00, {2, {0x0066, 0x0066}}},
+ { 0xfb01, {2, {0x0066, 0x0069}}},
+ { 0xfb02, {2, {0x0066, 0x006c}}},
+ { 0xfb03, {3, {0x0066, 0x0066, 0x0069}}},
+ { 0xfb04, {3, {0x0066, 0x0066, 0x006c}}},
+ { 0xfb05, {2, {0x0073, 0x0074}}},
+ { 0xfb06, {2, {0x0073, 0x0074}}},
+ { 0xfb13, {2, {0x0574, 0x0576}}},
+ { 0xfb14, {2, {0x0574, 0x0565}}},
+ { 0xfb15, {2, {0x0574, 0x056b}}},
+ { 0xfb16, {2, {0x057e, 0x0576}}},
+ { 0xfb17, {2, {0x0574, 0x056d}}},
+ { 0xff21, {1, {0xff41}}},
+ { 0xff22, {1, {0xff42}}},
+ { 0xff23, {1, {0xff43}}},
+ { 0xff24, {1, {0xff44}}},
+ { 0xff25, {1, {0xff45}}},
+ { 0xff26, {1, {0xff46}}},
+ { 0xff27, {1, {0xff47}}},
+ { 0xff28, {1, {0xff48}}},
+ { 0xff29, {1, {0xff49}}},
+ { 0xff2a, {1, {0xff4a}}},
+ { 0xff2b, {1, {0xff4b}}},
+ { 0xff2c, {1, {0xff4c}}},
+ { 0xff2d, {1, {0xff4d}}},
+ { 0xff2e, {1, {0xff4e}}},
+ { 0xff2f, {1, {0xff4f}}},
+ { 0xff30, {1, {0xff50}}},
+ { 0xff31, {1, {0xff51}}},
+ { 0xff32, {1, {0xff52}}},
+ { 0xff33, {1, {0xff53}}},
+ { 0xff34, {1, {0xff54}}},
+ { 0xff35, {1, {0xff55}}},
+ { 0xff36, {1, {0xff56}}},
+ { 0xff37, {1, {0xff57}}},
+ { 0xff38, {1, {0xff58}}},
+ { 0xff39, {1, {0xff59}}},
+ { 0xff3a, {1, {0xff5a}}},
+ { 0x10400, {1, {0x10428}}},
+ { 0x10401, {1, {0x10429}}},
+ { 0x10402, {1, {0x1042a}}},
+ { 0x10403, {1, {0x1042b}}},
+ { 0x10404, {1, {0x1042c}}},
+ { 0x10405, {1, {0x1042d}}},
+ { 0x10406, {1, {0x1042e}}},
+ { 0x10407, {1, {0x1042f}}},
+ { 0x10408, {1, {0x10430}}},
+ { 0x10409, {1, {0x10431}}},
+ { 0x1040a, {1, {0x10432}}},
+ { 0x1040b, {1, {0x10433}}},
+ { 0x1040c, {1, {0x10434}}},
+ { 0x1040d, {1, {0x10435}}},
+ { 0x1040e, {1, {0x10436}}},
+ { 0x1040f, {1, {0x10437}}},
+ { 0x10410, {1, {0x10438}}},
+ { 0x10411, {1, {0x10439}}},
+ { 0x10412, {1, {0x1043a}}},
+ { 0x10413, {1, {0x1043b}}},
+ { 0x10414, {1, {0x1043c}}},
+ { 0x10415, {1, {0x1043d}}},
+ { 0x10416, {1, {0x1043e}}},
+ { 0x10417, {1, {0x1043f}}},
+ { 0x10418, {1, {0x10440}}},
+ { 0x10419, {1, {0x10441}}},
+ { 0x1041a, {1, {0x10442}}},
+ { 0x1041b, {1, {0x10443}}},
+ { 0x1041c, {1, {0x10444}}},
+ { 0x1041d, {1, {0x10445}}},
+ { 0x1041e, {1, {0x10446}}},
+ { 0x1041f, {1, {0x10447}}},
+ { 0x10420, {1, {0x10448}}},
+ { 0x10421, {1, {0x10449}}},
+ { 0x10422, {1, {0x1044a}}},
+ { 0x10423, {1, {0x1044b}}},
+ { 0x10424, {1, {0x1044c}}},
+ { 0x10425, {1, {0x1044d}}},
+ { 0x10426, {1, {0x1044e}}},
+ { 0x10427, {1, {0x1044f}}}
+};
+
+static const CaseFold_11_Type CaseFold_Locale[] = {
+ { 0x0049, {1, {0x0069}}},
+ { 0x0130, {2, {0x0069, 0x0307}}}
+};
+
+static const CaseUnfold_11_Type CaseUnfold_11[] = {
+ { 0x0061, {1, {0x0041 }}},
+ { 0x0062, {1, {0x0042 }}},
+ { 0x0063, {1, {0x0043 }}},
+ { 0x0064, {1, {0x0044 }}},
+ { 0x0065, {1, {0x0045 }}},
+ { 0x0066, {1, {0x0046 }}},
+ { 0x0067, {1, {0x0047 }}},
+ { 0x0068, {1, {0x0048 }}},
+ { 0x006a, {1, {0x004a }}},
+ { 0x006b, {2, {0x212a, 0x004b }}},
+ { 0x006c, {1, {0x004c }}},
+ { 0x006d, {1, {0x004d }}},
+ { 0x006e, {1, {0x004e }}},
+ { 0x006f, {1, {0x004f }}},
+ { 0x0070, {1, {0x0050 }}},
+ { 0x0071, {1, {0x0051 }}},
+ { 0x0072, {1, {0x0052 }}},
+ { 0x0073, {2, {0x0053, 0x017f }}},
+ { 0x0074, {1, {0x0054 }}},
+ { 0x0075, {1, {0x0055 }}},
+ { 0x0076, {1, {0x0056 }}},
+ { 0x0077, {1, {0x0057 }}},
+ { 0x0078, {1, {0x0058 }}},
+ { 0x0079, {1, {0x0059 }}},
+ { 0x007a, {1, {0x005a }}},
+ { 0x00e0, {1, {0x00c0 }}},
+ { 0x00e1, {1, {0x00c1 }}},
+ { 0x00e2, {1, {0x00c2 }}},
+ { 0x00e3, {1, {0x00c3 }}},
+ { 0x00e4, {1, {0x00c4 }}},
+ { 0x00e5, {2, {0x212b, 0x00c5 }}},
+ { 0x00e6, {1, {0x00c6 }}},
+ { 0x00e7, {1, {0x00c7 }}},
+ { 0x00e8, {1, {0x00c8 }}},
+ { 0x00e9, {1, {0x00c9 }}},
+ { 0x00ea, {1, {0x00ca }}},
+ { 0x00eb, {1, {0x00cb }}},
+ { 0x00ec, {1, {0x00cc }}},
+ { 0x00ed, {1, {0x00cd }}},
+ { 0x00ee, {1, {0x00ce }}},
+ { 0x00ef, {1, {0x00cf }}},
+ { 0x00f0, {1, {0x00d0 }}},
+ { 0x00f1, {1, {0x00d1 }}},
+ { 0x00f2, {1, {0x00d2 }}},
+ { 0x00f3, {1, {0x00d3 }}},
+ { 0x00f4, {1, {0x00d4 }}},
+ { 0x00f5, {1, {0x00d5 }}},
+ { 0x00f6, {1, {0x00d6 }}},
+ { 0x00f8, {1, {0x00d8 }}},
+ { 0x00f9, {1, {0x00d9 }}},
+ { 0x00fa, {1, {0x00da }}},
+ { 0x00fb, {1, {0x00db }}},
+ { 0x00fc, {1, {0x00dc }}},
+ { 0x00fd, {1, {0x00dd }}},
+ { 0x00fe, {1, {0x00de }}},
+ { 0x00ff, {1, {0x0178 }}},
+ { 0x0101, {1, {0x0100 }}},
+ { 0x0103, {1, {0x0102 }}},
+ { 0x0105, {1, {0x0104 }}},
+ { 0x0107, {1, {0x0106 }}},
+ { 0x0109, {1, {0x0108 }}},
+ { 0x010b, {1, {0x010a }}},
+ { 0x010d, {1, {0x010c }}},
+ { 0x010f, {1, {0x010e }}},
+ { 0x0111, {1, {0x0110 }}},
+ { 0x0113, {1, {0x0112 }}},
+ { 0x0115, {1, {0x0114 }}},
+ { 0x0117, {1, {0x0116 }}},
+ { 0x0119, {1, {0x0118 }}},
+ { 0x011b, {1, {0x011a }}},
+ { 0x011d, {1, {0x011c }}},
+ { 0x011f, {1, {0x011e }}},
+ { 0x0121, {1, {0x0120 }}},
+ { 0x0123, {1, {0x0122 }}},
+ { 0x0125, {1, {0x0124 }}},
+ { 0x0127, {1, {0x0126 }}},
+ { 0x0129, {1, {0x0128 }}},
+ { 0x012b, {1, {0x012a }}},
+ { 0x012d, {1, {0x012c }}},
+ { 0x012f, {1, {0x012e }}},
+ { 0x0133, {1, {0x0132 }}},
+ { 0x0135, {1, {0x0134 }}},
+ { 0x0137, {1, {0x0136 }}},
+ { 0x013a, {1, {0x0139 }}},
+ { 0x013c, {1, {0x013b }}},
+ { 0x013e, {1, {0x013d }}},
+ { 0x0140, {1, {0x013f }}},
+ { 0x0142, {1, {0x0141 }}},
+ { 0x0144, {1, {0x0143 }}},
+ { 0x0146, {1, {0x0145 }}},
+ { 0x0148, {1, {0x0147 }}},
+ { 0x014b, {1, {0x014a }}},
+ { 0x014d, {1, {0x014c }}},
+ { 0x014f, {1, {0x014e }}},
+ { 0x0151, {1, {0x0150 }}},
+ { 0x0153, {1, {0x0152 }}},
+ { 0x0155, {1, {0x0154 }}},
+ { 0x0157, {1, {0x0156 }}},
+ { 0x0159, {1, {0x0158 }}},
+ { 0x015b, {1, {0x015a }}},
+ { 0x015d, {1, {0x015c }}},
+ { 0x015f, {1, {0x015e }}},
+ { 0x0161, {1, {0x0160 }}},
+ { 0x0163, {1, {0x0162 }}},
+ { 0x0165, {1, {0x0164 }}},
+ { 0x0167, {1, {0x0166 }}},
+ { 0x0169, {1, {0x0168 }}},
+ { 0x016b, {1, {0x016a }}},
+ { 0x016d, {1, {0x016c }}},
+ { 0x016f, {1, {0x016e }}},
+ { 0x0171, {1, {0x0170 }}},
+ { 0x0173, {1, {0x0172 }}},
+ { 0x0175, {1, {0x0174 }}},
+ { 0x0177, {1, {0x0176 }}},
+ { 0x017a, {1, {0x0179 }}},
+ { 0x017c, {1, {0x017b }}},
+ { 0x017e, {1, {0x017d }}},
+ { 0x0183, {1, {0x0182 }}},
+ { 0x0185, {1, {0x0184 }}},
+ { 0x0188, {1, {0x0187 }}},
+ { 0x018c, {1, {0x018b }}},
+ { 0x0192, {1, {0x0191 }}},
+ { 0x0195, {1, {0x01f6 }}},
+ { 0x0199, {1, {0x0198 }}},
+ { 0x019a, {1, {0x023d }}},
+ { 0x019e, {1, {0x0220 }}},
+ { 0x01a1, {1, {0x01a0 }}},
+ { 0x01a3, {1, {0x01a2 }}},
+ { 0x01a5, {1, {0x01a4 }}},
+ { 0x01a8, {1, {0x01a7 }}},
+ { 0x01ad, {1, {0x01ac }}},
+ { 0x01b0, {1, {0x01af }}},
+ { 0x01b4, {1, {0x01b3 }}},
+ { 0x01b6, {1, {0x01b5 }}},
+ { 0x01b9, {1, {0x01b8 }}},
+ { 0x01bd, {1, {0x01bc }}},
+ { 0x01bf, {1, {0x01f7 }}},
+ { 0x01c6, {2, {0x01c4, 0x01c5 }}},
+ { 0x01c9, {2, {0x01c7, 0x01c8 }}},
+ { 0x01cc, {2, {0x01ca, 0x01cb }}},
+ { 0x01ce, {1, {0x01cd }}},
+ { 0x01d0, {1, {0x01cf }}},
+ { 0x01d2, {1, {0x01d1 }}},
+ { 0x01d4, {1, {0x01d3 }}},
+ { 0x01d6, {1, {0x01d5 }}},
+ { 0x01d8, {1, {0x01d7 }}},
+ { 0x01da, {1, {0x01d9 }}},
+ { 0x01dc, {1, {0x01db }}},
+ { 0x01dd, {1, {0x018e }}},
+ { 0x01df, {1, {0x01de }}},
+ { 0x01e1, {1, {0x01e0 }}},
+ { 0x01e3, {1, {0x01e2 }}},
+ { 0x01e5, {1, {0x01e4 }}},
+ { 0x01e7, {1, {0x01e6 }}},
+ { 0x01e9, {1, {0x01e8 }}},
+ { 0x01eb, {1, {0x01ea }}},
+ { 0x01ed, {1, {0x01ec }}},
+ { 0x01ef, {1, {0x01ee }}},
+ { 0x01f3, {2, {0x01f1, 0x01f2 }}},
+ { 0x01f5, {1, {0x01f4 }}},
+ { 0x01f9, {1, {0x01f8 }}},
+ { 0x01fb, {1, {0x01fa }}},
+ { 0x01fd, {1, {0x01fc }}},
+ { 0x01ff, {1, {0x01fe }}},
+ { 0x0201, {1, {0x0200 }}},
+ { 0x0203, {1, {0x0202 }}},
+ { 0x0205, {1, {0x0204 }}},
+ { 0x0207, {1, {0x0206 }}},
+ { 0x0209, {1, {0x0208 }}},
+ { 0x020b, {1, {0x020a }}},
+ { 0x020d, {1, {0x020c }}},
+ { 0x020f, {1, {0x020e }}},
+ { 0x0211, {1, {0x0210 }}},
+ { 0x0213, {1, {0x0212 }}},
+ { 0x0215, {1, {0x0214 }}},
+ { 0x0217, {1, {0x0216 }}},
+ { 0x0219, {1, {0x0218 }}},
+ { 0x021b, {1, {0x021a }}},
+ { 0x021d, {1, {0x021c }}},
+ { 0x021f, {1, {0x021e }}},
+ { 0x0223, {1, {0x0222 }}},
+ { 0x0225, {1, {0x0224 }}},
+ { 0x0227, {1, {0x0226 }}},
+ { 0x0229, {1, {0x0228 }}},
+ { 0x022b, {1, {0x022a }}},
+ { 0x022d, {1, {0x022c }}},
+ { 0x022f, {1, {0x022e }}},
+ { 0x0231, {1, {0x0230 }}},
+ { 0x0233, {1, {0x0232 }}},
+ { 0x023c, {1, {0x023b }}},
+ { 0x0253, {1, {0x0181 }}},
+ { 0x0254, {1, {0x0186 }}},
+ { 0x0256, {1, {0x0189 }}},
+ { 0x0257, {1, {0x018a }}},
+ { 0x0259, {1, {0x018f }}},
+ { 0x025b, {1, {0x0190 }}},
+ { 0x0260, {1, {0x0193 }}},
+ { 0x0263, {1, {0x0194 }}},
+ { 0x0268, {1, {0x0197 }}},
+ { 0x0269, {1, {0x0196 }}},
+ { 0x026f, {1, {0x019c }}},
+ { 0x0272, {1, {0x019d }}},
+ { 0x0275, {1, {0x019f }}},
+ { 0x0280, {1, {0x01a6 }}},
+ { 0x0283, {1, {0x01a9 }}},
+ { 0x0288, {1, {0x01ae }}},
+ { 0x028a, {1, {0x01b1 }}},
+ { 0x028b, {1, {0x01b2 }}},
+ { 0x0292, {1, {0x01b7 }}},
+ { 0x0294, {1, {0x0241 }}},
+ { 0x03ac, {1, {0x0386 }}},
+ { 0x03ad, {1, {0x0388 }}},
+ { 0x03ae, {1, {0x0389 }}},
+ { 0x03af, {1, {0x038a }}},
+ { 0x03b1, {1, {0x0391 }}},
+ { 0x03b2, {2, {0x0392, 0x03d0 }}},
+ { 0x03b3, {1, {0x0393 }}},
+ { 0x03b4, {1, {0x0394 }}},
+ { 0x03b5, {2, {0x03f5, 0x0395 }}},
+ { 0x03b6, {1, {0x0396 }}},
+ { 0x03b7, {1, {0x0397 }}},
+ { 0x03b8, {3, {0x03f4, 0x0398, 0x03d1 }}},
+ { 0x03b9, {3, {0x1fbe, 0x0399, 0x0345 }}},
+ { 0x03ba, {2, {0x03f0, 0x039a }}},
+ { 0x03bb, {1, {0x039b }}},
+ { 0x03bc, {2, {0x00b5, 0x039c }}},
+ { 0x03bd, {1, {0x039d }}},
+ { 0x03be, {1, {0x039e }}},
+ { 0x03bf, {1, {0x039f }}},
+ { 0x03c0, {2, {0x03a0, 0x03d6 }}},
+ { 0x03c1, {2, {0x03f1, 0x03a1 }}},
+ { 0x03c3, {2, {0x03a3, 0x03c2 }}},
+ { 0x03c4, {1, {0x03a4 }}},
+ { 0x03c5, {1, {0x03a5 }}},
+ { 0x03c6, {2, {0x03a6, 0x03d5 }}},
+ { 0x03c7, {1, {0x03a7 }}},
+ { 0x03c8, {1, {0x03a8 }}},
+ { 0x03c9, {2, {0x03a9, 0x2126 }}},
+ { 0x03ca, {1, {0x03aa }}},
+ { 0x03cb, {1, {0x03ab }}},
+ { 0x03cc, {1, {0x038c }}},
+ { 0x03cd, {1, {0x038e }}},
+ { 0x03ce, {1, {0x038f }}},
+ { 0x03d9, {1, {0x03d8 }}},
+ { 0x03db, {1, {0x03da }}},
+ { 0x03dd, {1, {0x03dc }}},
+ { 0x03df, {1, {0x03de }}},
+ { 0x03e1, {1, {0x03e0 }}},
+ { 0x03e3, {1, {0x03e2 }}},
+ { 0x03e5, {1, {0x03e4 }}},
+ { 0x03e7, {1, {0x03e6 }}},
+ { 0x03e9, {1, {0x03e8 }}},
+ { 0x03eb, {1, {0x03ea }}},
+ { 0x03ed, {1, {0x03ec }}},
+ { 0x03ef, {1, {0x03ee }}},
+ { 0x03f2, {1, {0x03f9 }}},
+ { 0x03f8, {1, {0x03f7 }}},
+ { 0x03fb, {1, {0x03fa }}},
+ { 0x0430, {1, {0x0410 }}},
+ { 0x0431, {1, {0x0411 }}},
+ { 0x0432, {1, {0x0412 }}},
+ { 0x0433, {1, {0x0413 }}},
+ { 0x0434, {1, {0x0414 }}},
+ { 0x0435, {1, {0x0415 }}},
+ { 0x0436, {1, {0x0416 }}},
+ { 0x0437, {1, {0x0417 }}},
+ { 0x0438, {1, {0x0418 }}},
+ { 0x0439, {1, {0x0419 }}},
+ { 0x043a, {1, {0x041a }}},
+ { 0x043b, {1, {0x041b }}},
+ { 0x043c, {1, {0x041c }}},
+ { 0x043d, {1, {0x041d }}},
+ { 0x043e, {1, {0x041e }}},
+ { 0x043f, {1, {0x041f }}},
+ { 0x0440, {1, {0x0420 }}},
+ { 0x0441, {1, {0x0421 }}},
+ { 0x0442, {1, {0x0422 }}},
+ { 0x0443, {1, {0x0423 }}},
+ { 0x0444, {1, {0x0424 }}},
+ { 0x0445, {1, {0x0425 }}},
+ { 0x0446, {1, {0x0426 }}},
+ { 0x0447, {1, {0x0427 }}},
+ { 0x0448, {1, {0x0428 }}},
+ { 0x0449, {1, {0x0429 }}},
+ { 0x044a, {1, {0x042a }}},
+ { 0x044b, {1, {0x042b }}},
+ { 0x044c, {1, {0x042c }}},
+ { 0x044d, {1, {0x042d }}},
+ { 0x044e, {1, {0x042e }}},
+ { 0x044f, {1, {0x042f }}},
+ { 0x0450, {1, {0x0400 }}},
+ { 0x0451, {1, {0x0401 }}},
+ { 0x0452, {1, {0x0402 }}},
+ { 0x0453, {1, {0x0403 }}},
+ { 0x0454, {1, {0x0404 }}},
+ { 0x0455, {1, {0x0405 }}},
+ { 0x0456, {1, {0x0406 }}},
+ { 0x0457, {1, {0x0407 }}},
+ { 0x0458, {1, {0x0408 }}},
+ { 0x0459, {1, {0x0409 }}},
+ { 0x045a, {1, {0x040a }}},
+ { 0x045b, {1, {0x040b }}},
+ { 0x045c, {1, {0x040c }}},
+ { 0x045d, {1, {0x040d }}},
+ { 0x045e, {1, {0x040e }}},
+ { 0x045f, {1, {0x040f }}},
+ { 0x0461, {1, {0x0460 }}},
+ { 0x0463, {1, {0x0462 }}},
+ { 0x0465, {1, {0x0464 }}},
+ { 0x0467, {1, {0x0466 }}},
+ { 0x0469, {1, {0x0468 }}},
+ { 0x046b, {1, {0x046a }}},
+ { 0x046d, {1, {0x046c }}},
+ { 0x046f, {1, {0x046e }}},
+ { 0x0471, {1, {0x0470 }}},
+ { 0x0473, {1, {0x0472 }}},
+ { 0x0475, {1, {0x0474 }}},
+ { 0x0477, {1, {0x0476 }}},
+ { 0x0479, {1, {0x0478 }}},
+ { 0x047b, {1, {0x047a }}},
+ { 0x047d, {1, {0x047c }}},
+ { 0x047f, {1, {0x047e }}},
+ { 0x0481, {1, {0x0480 }}},
+ { 0x048b, {1, {0x048a }}},
+ { 0x048d, {1, {0x048c }}},
+ { 0x048f, {1, {0x048e }}},
+ { 0x0491, {1, {0x0490 }}},
+ { 0x0493, {1, {0x0492 }}},
+ { 0x0495, {1, {0x0494 }}},
+ { 0x0497, {1, {0x0496 }}},
+ { 0x0499, {1, {0x0498 }}},
+ { 0x049b, {1, {0x049a }}},
+ { 0x049d, {1, {0x049c }}},
+ { 0x049f, {1, {0x049e }}},
+ { 0x04a1, {1, {0x04a0 }}},
+ { 0x04a3, {1, {0x04a2 }}},
+ { 0x04a5, {1, {0x04a4 }}},
+ { 0x04a7, {1, {0x04a6 }}},
+ { 0x04a9, {1, {0x04a8 }}},
+ { 0x04ab, {1, {0x04aa }}},
+ { 0x04ad, {1, {0x04ac }}},
+ { 0x04af, {1, {0x04ae }}},
+ { 0x04b1, {1, {0x04b0 }}},
+ { 0x04b3, {1, {0x04b2 }}},
+ { 0x04b5, {1, {0x04b4 }}},
+ { 0x04b7, {1, {0x04b6 }}},
+ { 0x04b9, {1, {0x04b8 }}},
+ { 0x04bb, {1, {0x04ba }}},
+ { 0x04bd, {1, {0x04bc }}},
+ { 0x04bf, {1, {0x04be }}},
+ { 0x04c2, {1, {0x04c1 }}},
+ { 0x04c4, {1, {0x04c3 }}},
+ { 0x04c6, {1, {0x04c5 }}},
+ { 0x04c8, {1, {0x04c7 }}},
+ { 0x04ca, {1, {0x04c9 }}},
+ { 0x04cc, {1, {0x04cb }}},
+ { 0x04ce, {1, {0x04cd }}},
+ { 0x04d1, {1, {0x04d0 }}},
+ { 0x04d3, {1, {0x04d2 }}},
+ { 0x04d5, {1, {0x04d4 }}},
+ { 0x04d7, {1, {0x04d6 }}},
+ { 0x04d9, {1, {0x04d8 }}},
+ { 0x04db, {1, {0x04da }}},
+ { 0x04dd, {1, {0x04dc }}},
+ { 0x04df, {1, {0x04de }}},
+ { 0x04e1, {1, {0x04e0 }}},
+ { 0x04e3, {1, {0x04e2 }}},
+ { 0x04e5, {1, {0x04e4 }}},
+ { 0x04e7, {1, {0x04e6 }}},
+ { 0x04e9, {1, {0x04e8 }}},
+ { 0x04eb, {1, {0x04ea }}},
+ { 0x04ed, {1, {0x04ec }}},
+ { 0x04ef, {1, {0x04ee }}},
+ { 0x04f1, {1, {0x04f0 }}},
+ { 0x04f3, {1, {0x04f2 }}},
+ { 0x04f5, {1, {0x04f4 }}},
+ { 0x04f7, {1, {0x04f6 }}},
+ { 0x04f9, {1, {0x04f8 }}},
+ { 0x0501, {1, {0x0500 }}},
+ { 0x0503, {1, {0x0502 }}},
+ { 0x0505, {1, {0x0504 }}},
+ { 0x0507, {1, {0x0506 }}},
+ { 0x0509, {1, {0x0508 }}},
+ { 0x050b, {1, {0x050a }}},
+ { 0x050d, {1, {0x050c }}},
+ { 0x050f, {1, {0x050e }}},
+ { 0x0561, {1, {0x0531 }}},
+ { 0x0562, {1, {0x0532 }}},
+ { 0x0563, {1, {0x0533 }}},
+ { 0x0564, {1, {0x0534 }}},
+ { 0x0565, {1, {0x0535 }}},
+ { 0x0566, {1, {0x0536 }}},
+ { 0x0567, {1, {0x0537 }}},
+ { 0x0568, {1, {0x0538 }}},
+ { 0x0569, {1, {0x0539 }}},
+ { 0x056a, {1, {0x053a }}},
+ { 0x056b, {1, {0x053b }}},
+ { 0x056c, {1, {0x053c }}},
+ { 0x056d, {1, {0x053d }}},
+ { 0x056e, {1, {0x053e }}},
+ { 0x056f, {1, {0x053f }}},
+ { 0x0570, {1, {0x0540 }}},
+ { 0x0571, {1, {0x0541 }}},
+ { 0x0572, {1, {0x0542 }}},
+ { 0x0573, {1, {0x0543 }}},
+ { 0x0574, {1, {0x0544 }}},
+ { 0x0575, {1, {0x0545 }}},
+ { 0x0576, {1, {0x0546 }}},
+ { 0x0577, {1, {0x0547 }}},
+ { 0x0578, {1, {0x0548 }}},
+ { 0x0579, {1, {0x0549 }}},
+ { 0x057a, {1, {0x054a }}},
+ { 0x057b, {1, {0x054b }}},
+ { 0x057c, {1, {0x054c }}},
+ { 0x057d, {1, {0x054d }}},
+ { 0x057e, {1, {0x054e }}},
+ { 0x057f, {1, {0x054f }}},
+ { 0x0580, {1, {0x0550 }}},
+ { 0x0581, {1, {0x0551 }}},
+ { 0x0582, {1, {0x0552 }}},
+ { 0x0583, {1, {0x0553 }}},
+ { 0x0584, {1, {0x0554 }}},
+ { 0x0585, {1, {0x0555 }}},
+ { 0x0586, {1, {0x0556 }}},
+ { 0x1e01, {1, {0x1e00 }}},
+ { 0x1e03, {1, {0x1e02 }}},
+ { 0x1e05, {1, {0x1e04 }}},
+ { 0x1e07, {1, {0x1e06 }}},
+ { 0x1e09, {1, {0x1e08 }}},
+ { 0x1e0b, {1, {0x1e0a }}},
+ { 0x1e0d, {1, {0x1e0c }}},
+ { 0x1e0f, {1, {0x1e0e }}},
+ { 0x1e11, {1, {0x1e10 }}},
+ { 0x1e13, {1, {0x1e12 }}},
+ { 0x1e15, {1, {0x1e14 }}},
+ { 0x1e17, {1, {0x1e16 }}},
+ { 0x1e19, {1, {0x1e18 }}},
+ { 0x1e1b, {1, {0x1e1a }}},
+ { 0x1e1d, {1, {0x1e1c }}},
+ { 0x1e1f, {1, {0x1e1e }}},
+ { 0x1e21, {1, {0x1e20 }}},
+ { 0x1e23, {1, {0x1e22 }}},
+ { 0x1e25, {1, {0x1e24 }}},
+ { 0x1e27, {1, {0x1e26 }}},
+ { 0x1e29, {1, {0x1e28 }}},
+ { 0x1e2b, {1, {0x1e2a }}},
+ { 0x1e2d, {1, {0x1e2c }}},
+ { 0x1e2f, {1, {0x1e2e }}},
+ { 0x1e31, {1, {0x1e30 }}},
+ { 0x1e33, {1, {0x1e32 }}},
+ { 0x1e35, {1, {0x1e34 }}},
+ { 0x1e37, {1, {0x1e36 }}},
+ { 0x1e39, {1, {0x1e38 }}},
+ { 0x1e3b, {1, {0x1e3a }}},
+ { 0x1e3d, {1, {0x1e3c }}},
+ { 0x1e3f, {1, {0x1e3e }}},
+ { 0x1e41, {1, {0x1e40 }}},
+ { 0x1e43, {1, {0x1e42 }}},
+ { 0x1e45, {1, {0x1e44 }}},
+ { 0x1e47, {1, {0x1e46 }}},
+ { 0x1e49, {1, {0x1e48 }}},
+ { 0x1e4b, {1, {0x1e4a }}},
+ { 0x1e4d, {1, {0x1e4c }}},
+ { 0x1e4f, {1, {0x1e4e }}},
+ { 0x1e51, {1, {0x1e50 }}},
+ { 0x1e53, {1, {0x1e52 }}},
+ { 0x1e55, {1, {0x1e54 }}},
+ { 0x1e57, {1, {0x1e56 }}},
+ { 0x1e59, {1, {0x1e58 }}},
+ { 0x1e5b, {1, {0x1e5a }}},
+ { 0x1e5d, {1, {0x1e5c }}},
+ { 0x1e5f, {1, {0x1e5e }}},
+ { 0x1e61, {2, {0x1e9b, 0x1e60 }}},
+ { 0x1e63, {1, {0x1e62 }}},
+ { 0x1e65, {1, {0x1e64 }}},
+ { 0x1e67, {1, {0x1e66 }}},
+ { 0x1e69, {1, {0x1e68 }}},
+ { 0x1e6b, {1, {0x1e6a }}},
+ { 0x1e6d, {1, {0x1e6c }}},
+ { 0x1e6f, {1, {0x1e6e }}},
+ { 0x1e71, {1, {0x1e70 }}},
+ { 0x1e73, {1, {0x1e72 }}},
+ { 0x1e75, {1, {0x1e74 }}},
+ { 0x1e77, {1, {0x1e76 }}},
+ { 0x1e79, {1, {0x1e78 }}},
+ { 0x1e7b, {1, {0x1e7a }}},
+ { 0x1e7d, {1, {0x1e7c }}},
+ { 0x1e7f, {1, {0x1e7e }}},
+ { 0x1e81, {1, {0x1e80 }}},
+ { 0x1e83, {1, {0x1e82 }}},
+ { 0x1e85, {1, {0x1e84 }}},
+ { 0x1e87, {1, {0x1e86 }}},
+ { 0x1e89, {1, {0x1e88 }}},
+ { 0x1e8b, {1, {0x1e8a }}},
+ { 0x1e8d, {1, {0x1e8c }}},
+ { 0x1e8f, {1, {0x1e8e }}},
+ { 0x1e91, {1, {0x1e90 }}},
+ { 0x1e93, {1, {0x1e92 }}},
+ { 0x1e95, {1, {0x1e94 }}},
+ { 0x1ea1, {1, {0x1ea0 }}},
+ { 0x1ea3, {1, {0x1ea2 }}},
+ { 0x1ea5, {1, {0x1ea4 }}},
+ { 0x1ea7, {1, {0x1ea6 }}},
+ { 0x1ea9, {1, {0x1ea8 }}},
+ { 0x1eab, {1, {0x1eaa }}},
+ { 0x1ead, {1, {0x1eac }}},
+ { 0x1eaf, {1, {0x1eae }}},
+ { 0x1eb1, {1, {0x1eb0 }}},
+ { 0x1eb3, {1, {0x1eb2 }}},
+ { 0x1eb5, {1, {0x1eb4 }}},
+ { 0x1eb7, {1, {0x1eb6 }}},
+ { 0x1eb9, {1, {0x1eb8 }}},
+ { 0x1ebb, {1, {0x1eba }}},
+ { 0x1ebd, {1, {0x1ebc }}},
+ { 0x1ebf, {1, {0x1ebe }}},
+ { 0x1ec1, {1, {0x1ec0 }}},
+ { 0x1ec3, {1, {0x1ec2 }}},
+ { 0x1ec5, {1, {0x1ec4 }}},
+ { 0x1ec7, {1, {0x1ec6 }}},
+ { 0x1ec9, {1, {0x1ec8 }}},
+ { 0x1ecb, {1, {0x1eca }}},
+ { 0x1ecd, {1, {0x1ecc }}},
+ { 0x1ecf, {1, {0x1ece }}},
+ { 0x1ed1, {1, {0x1ed0 }}},
+ { 0x1ed3, {1, {0x1ed2 }}},
+ { 0x1ed5, {1, {0x1ed4 }}},
+ { 0x1ed7, {1, {0x1ed6 }}},
+ { 0x1ed9, {1, {0x1ed8 }}},
+ { 0x1edb, {1, {0x1eda }}},
+ { 0x1edd, {1, {0x1edc }}},
+ { 0x1edf, {1, {0x1ede }}},
+ { 0x1ee1, {1, {0x1ee0 }}},
+ { 0x1ee3, {1, {0x1ee2 }}},
+ { 0x1ee5, {1, {0x1ee4 }}},
+ { 0x1ee7, {1, {0x1ee6 }}},
+ { 0x1ee9, {1, {0x1ee8 }}},
+ { 0x1eeb, {1, {0x1eea }}},
+ { 0x1eed, {1, {0x1eec }}},
+ { 0x1eef, {1, {0x1eee }}},
+ { 0x1ef1, {1, {0x1ef0 }}},
+ { 0x1ef3, {1, {0x1ef2 }}},
+ { 0x1ef5, {1, {0x1ef4 }}},
+ { 0x1ef7, {1, {0x1ef6 }}},
+ { 0x1ef9, {1, {0x1ef8 }}},
+ { 0x1f00, {1, {0x1f08 }}},
+ { 0x1f01, {1, {0x1f09 }}},
+ { 0x1f02, {1, {0x1f0a }}},
+ { 0x1f03, {1, {0x1f0b }}},
+ { 0x1f04, {1, {0x1f0c }}},
+ { 0x1f05, {1, {0x1f0d }}},
+ { 0x1f06, {1, {0x1f0e }}},
+ { 0x1f07, {1, {0x1f0f }}},
+ { 0x1f10, {1, {0x1f18 }}},
+ { 0x1f11, {1, {0x1f19 }}},
+ { 0x1f12, {1, {0x1f1a }}},
+ { 0x1f13, {1, {0x1f1b }}},
+ { 0x1f14, {1, {0x1f1c }}},
+ { 0x1f15, {1, {0x1f1d }}},
+ { 0x1f20, {1, {0x1f28 }}},
+ { 0x1f21, {1, {0x1f29 }}},
+ { 0x1f22, {1, {0x1f2a }}},
+ { 0x1f23, {1, {0x1f2b }}},
+ { 0x1f24, {1, {0x1f2c }}},
+ { 0x1f25, {1, {0x1f2d }}},
+ { 0x1f26, {1, {0x1f2e }}},
+ { 0x1f27, {1, {0x1f2f }}},
+ { 0x1f30, {1, {0x1f38 }}},
+ { 0x1f31, {1, {0x1f39 }}},
+ { 0x1f32, {1, {0x1f3a }}},
+ { 0x1f33, {1, {0x1f3b }}},
+ { 0x1f34, {1, {0x1f3c }}},
+ { 0x1f35, {1, {0x1f3d }}},
+ { 0x1f36, {1, {0x1f3e }}},
+ { 0x1f37, {1, {0x1f3f }}},
+ { 0x1f40, {1, {0x1f48 }}},
+ { 0x1f41, {1, {0x1f49 }}},
+ { 0x1f42, {1, {0x1f4a }}},
+ { 0x1f43, {1, {0x1f4b }}},
+ { 0x1f44, {1, {0x1f4c }}},
+ { 0x1f45, {1, {0x1f4d }}},
+ { 0x1f51, {1, {0x1f59 }}},
+ { 0x1f53, {1, {0x1f5b }}},
+ { 0x1f55, {1, {0x1f5d }}},
+ { 0x1f57, {1, {0x1f5f }}},
+ { 0x1f60, {1, {0x1f68 }}},
+ { 0x1f61, {1, {0x1f69 }}},
+ { 0x1f62, {1, {0x1f6a }}},
+ { 0x1f63, {1, {0x1f6b }}},
+ { 0x1f64, {1, {0x1f6c }}},
+ { 0x1f65, {1, {0x1f6d }}},
+ { 0x1f66, {1, {0x1f6e }}},
+ { 0x1f67, {1, {0x1f6f }}},
+ { 0x1f70, {1, {0x1fba }}},
+ { 0x1f71, {1, {0x1fbb }}},
+ { 0x1f72, {1, {0x1fc8 }}},
+ { 0x1f73, {1, {0x1fc9 }}},
+ { 0x1f74, {1, {0x1fca }}},
+ { 0x1f75, {1, {0x1fcb }}},
+ { 0x1f76, {1, {0x1fda }}},
+ { 0x1f77, {1, {0x1fdb }}},
+ { 0x1f78, {1, {0x1ff8 }}},
+ { 0x1f79, {1, {0x1ff9 }}},
+ { 0x1f7a, {1, {0x1fea }}},
+ { 0x1f7b, {1, {0x1feb }}},
+ { 0x1f7c, {1, {0x1ffa }}},
+ { 0x1f7d, {1, {0x1ffb }}},
+ { 0x1fb0, {1, {0x1fb8 }}},
+ { 0x1fb1, {1, {0x1fb9 }}},
+ { 0x1fd0, {1, {0x1fd8 }}},
+ { 0x1fd1, {1, {0x1fd9 }}},
+ { 0x1fe0, {1, {0x1fe8 }}},
+ { 0x1fe1, {1, {0x1fe9 }}},
+ { 0x1fe5, {1, {0x1fec }}},
+ { 0x2170, {1, {0x2160 }}},
+ { 0x2171, {1, {0x2161 }}},
+ { 0x2172, {1, {0x2162 }}},
+ { 0x2173, {1, {0x2163 }}},
+ { 0x2174, {1, {0x2164 }}},
+ { 0x2175, {1, {0x2165 }}},
+ { 0x2176, {1, {0x2166 }}},
+ { 0x2177, {1, {0x2167 }}},
+ { 0x2178, {1, {0x2168 }}},
+ { 0x2179, {1, {0x2169 }}},
+ { 0x217a, {1, {0x216a }}},
+ { 0x217b, {1, {0x216b }}},
+ { 0x217c, {1, {0x216c }}},
+ { 0x217d, {1, {0x216d }}},
+ { 0x217e, {1, {0x216e }}},
+ { 0x217f, {1, {0x216f }}},
+ { 0x24d0, {1, {0x24b6 }}},
+ { 0x24d1, {1, {0x24b7 }}},
+ { 0x24d2, {1, {0x24b8 }}},
+ { 0x24d3, {1, {0x24b9 }}},
+ { 0x24d4, {1, {0x24ba }}},
+ { 0x24d5, {1, {0x24bb }}},
+ { 0x24d6, {1, {0x24bc }}},
+ { 0x24d7, {1, {0x24bd }}},
+ { 0x24d8, {1, {0x24be }}},
+ { 0x24d9, {1, {0x24bf }}},
+ { 0x24da, {1, {0x24c0 }}},
+ { 0x24db, {1, {0x24c1 }}},
+ { 0x24dc, {1, {0x24c2 }}},
+ { 0x24dd, {1, {0x24c3 }}},
+ { 0x24de, {1, {0x24c4 }}},
+ { 0x24df, {1, {0x24c5 }}},
+ { 0x24e0, {1, {0x24c6 }}},
+ { 0x24e1, {1, {0x24c7 }}},
+ { 0x24e2, {1, {0x24c8 }}},
+ { 0x24e3, {1, {0x24c9 }}},
+ { 0x24e4, {1, {0x24ca }}},
+ { 0x24e5, {1, {0x24cb }}},
+ { 0x24e6, {1, {0x24cc }}},
+ { 0x24e7, {1, {0x24cd }}},
+ { 0x24e8, {1, {0x24ce }}},
+ { 0x24e9, {1, {0x24cf }}},
+ { 0x2c30, {1, {0x2c00 }}},
+ { 0x2c31, {1, {0x2c01 }}},
+ { 0x2c32, {1, {0x2c02 }}},
+ { 0x2c33, {1, {0x2c03 }}},
+ { 0x2c34, {1, {0x2c04 }}},
+ { 0x2c35, {1, {0x2c05 }}},
+ { 0x2c36, {1, {0x2c06 }}},
+ { 0x2c37, {1, {0x2c07 }}},
+ { 0x2c38, {1, {0x2c08 }}},
+ { 0x2c39, {1, {0x2c09 }}},
+ { 0x2c3a, {1, {0x2c0a }}},
+ { 0x2c3b, {1, {0x2c0b }}},
+ { 0x2c3c, {1, {0x2c0c }}},
+ { 0x2c3d, {1, {0x2c0d }}},
+ { 0x2c3e, {1, {0x2c0e }}},
+ { 0x2c3f, {1, {0x2c0f }}},
+ { 0x2c40, {1, {0x2c10 }}},
+ { 0x2c41, {1, {0x2c11 }}},
+ { 0x2c42, {1, {0x2c12 }}},
+ { 0x2c43, {1, {0x2c13 }}},
+ { 0x2c44, {1, {0x2c14 }}},
+ { 0x2c45, {1, {0x2c15 }}},
+ { 0x2c46, {1, {0x2c16 }}},
+ { 0x2c47, {1, {0x2c17 }}},
+ { 0x2c48, {1, {0x2c18 }}},
+ { 0x2c49, {1, {0x2c19 }}},
+ { 0x2c4a, {1, {0x2c1a }}},
+ { 0x2c4b, {1, {0x2c1b }}},
+ { 0x2c4c, {1, {0x2c1c }}},
+ { 0x2c4d, {1, {0x2c1d }}},
+ { 0x2c4e, {1, {0x2c1e }}},
+ { 0x2c4f, {1, {0x2c1f }}},
+ { 0x2c50, {1, {0x2c20 }}},
+ { 0x2c51, {1, {0x2c21 }}},
+ { 0x2c52, {1, {0x2c22 }}},
+ { 0x2c53, {1, {0x2c23 }}},
+ { 0x2c54, {1, {0x2c24 }}},
+ { 0x2c55, {1, {0x2c25 }}},
+ { 0x2c56, {1, {0x2c26 }}},
+ { 0x2c57, {1, {0x2c27 }}},
+ { 0x2c58, {1, {0x2c28 }}},
+ { 0x2c59, {1, {0x2c29 }}},
+ { 0x2c5a, {1, {0x2c2a }}},
+ { 0x2c5b, {1, {0x2c2b }}},
+ { 0x2c5c, {1, {0x2c2c }}},
+ { 0x2c5d, {1, {0x2c2d }}},
+ { 0x2c5e, {1, {0x2c2e }}},
+ { 0x2c81, {1, {0x2c80 }}},
+ { 0x2c83, {1, {0x2c82 }}},
+ { 0x2c85, {1, {0x2c84 }}},
+ { 0x2c87, {1, {0x2c86 }}},
+ { 0x2c89, {1, {0x2c88 }}},
+ { 0x2c8b, {1, {0x2c8a }}},
+ { 0x2c8d, {1, {0x2c8c }}},
+ { 0x2c8f, {1, {0x2c8e }}},
+ { 0x2c91, {1, {0x2c90 }}},
+ { 0x2c93, {1, {0x2c92 }}},
+ { 0x2c95, {1, {0x2c94 }}},
+ { 0x2c97, {1, {0x2c96 }}},
+ { 0x2c99, {1, {0x2c98 }}},
+ { 0x2c9b, {1, {0x2c9a }}},
+ { 0x2c9d, {1, {0x2c9c }}},
+ { 0x2c9f, {1, {0x2c9e }}},
+ { 0x2ca1, {1, {0x2ca0 }}},
+ { 0x2ca3, {1, {0x2ca2 }}},
+ { 0x2ca5, {1, {0x2ca4 }}},
+ { 0x2ca7, {1, {0x2ca6 }}},
+ { 0x2ca9, {1, {0x2ca8 }}},
+ { 0x2cab, {1, {0x2caa }}},
+ { 0x2cad, {1, {0x2cac }}},
+ { 0x2caf, {1, {0x2cae }}},
+ { 0x2cb1, {1, {0x2cb0 }}},
+ { 0x2cb3, {1, {0x2cb2 }}},
+ { 0x2cb5, {1, {0x2cb4 }}},
+ { 0x2cb7, {1, {0x2cb6 }}},
+ { 0x2cb9, {1, {0x2cb8 }}},
+ { 0x2cbb, {1, {0x2cba }}},
+ { 0x2cbd, {1, {0x2cbc }}},
+ { 0x2cbf, {1, {0x2cbe }}},
+ { 0x2cc1, {1, {0x2cc0 }}},
+ { 0x2cc3, {1, {0x2cc2 }}},
+ { 0x2cc5, {1, {0x2cc4 }}},
+ { 0x2cc7, {1, {0x2cc6 }}},
+ { 0x2cc9, {1, {0x2cc8 }}},
+ { 0x2ccb, {1, {0x2cca }}},
+ { 0x2ccd, {1, {0x2ccc }}},
+ { 0x2ccf, {1, {0x2cce }}},
+ { 0x2cd1, {1, {0x2cd0 }}},
+ { 0x2cd3, {1, {0x2cd2 }}},
+ { 0x2cd5, {1, {0x2cd4 }}},
+ { 0x2cd7, {1, {0x2cd6 }}},
+ { 0x2cd9, {1, {0x2cd8 }}},
+ { 0x2cdb, {1, {0x2cda }}},
+ { 0x2cdd, {1, {0x2cdc }}},
+ { 0x2cdf, {1, {0x2cde }}},
+ { 0x2ce1, {1, {0x2ce0 }}},
+ { 0x2ce3, {1, {0x2ce2 }}},
+ { 0x2d00, {1, {0x10a0 }}},
+ { 0x2d01, {1, {0x10a1 }}},
+ { 0x2d02, {1, {0x10a2 }}},
+ { 0x2d03, {1, {0x10a3 }}},
+ { 0x2d04, {1, {0x10a4 }}},
+ { 0x2d05, {1, {0x10a5 }}},
+ { 0x2d06, {1, {0x10a6 }}},
+ { 0x2d07, {1, {0x10a7 }}},
+ { 0x2d08, {1, {0x10a8 }}},
+ { 0x2d09, {1, {0x10a9 }}},
+ { 0x2d0a, {1, {0x10aa }}},
+ { 0x2d0b, {1, {0x10ab }}},
+ { 0x2d0c, {1, {0x10ac }}},
+ { 0x2d0d, {1, {0x10ad }}},
+ { 0x2d0e, {1, {0x10ae }}},
+ { 0x2d0f, {1, {0x10af }}},
+ { 0x2d10, {1, {0x10b0 }}},
+ { 0x2d11, {1, {0x10b1 }}},
+ { 0x2d12, {1, {0x10b2 }}},
+ { 0x2d13, {1, {0x10b3 }}},
+ { 0x2d14, {1, {0x10b4 }}},
+ { 0x2d15, {1, {0x10b5 }}},
+ { 0x2d16, {1, {0x10b6 }}},
+ { 0x2d17, {1, {0x10b7 }}},
+ { 0x2d18, {1, {0x10b8 }}},
+ { 0x2d19, {1, {0x10b9 }}},
+ { 0x2d1a, {1, {0x10ba }}},
+ { 0x2d1b, {1, {0x10bb }}},
+ { 0x2d1c, {1, {0x10bc }}},
+ { 0x2d1d, {1, {0x10bd }}},
+ { 0x2d1e, {1, {0x10be }}},
+ { 0x2d1f, {1, {0x10bf }}},
+ { 0x2d20, {1, {0x10c0 }}},
+ { 0x2d21, {1, {0x10c1 }}},
+ { 0x2d22, {1, {0x10c2 }}},
+ { 0x2d23, {1, {0x10c3 }}},
+ { 0x2d24, {1, {0x10c4 }}},
+ { 0x2d25, {1, {0x10c5 }}},
+ { 0xff41, {1, {0xff21 }}},
+ { 0xff42, {1, {0xff22 }}},
+ { 0xff43, {1, {0xff23 }}},
+ { 0xff44, {1, {0xff24 }}},
+ { 0xff45, {1, {0xff25 }}},
+ { 0xff46, {1, {0xff26 }}},
+ { 0xff47, {1, {0xff27 }}},
+ { 0xff48, {1, {0xff28 }}},
+ { 0xff49, {1, {0xff29 }}},
+ { 0xff4a, {1, {0xff2a }}},
+ { 0xff4b, {1, {0xff2b }}},
+ { 0xff4c, {1, {0xff2c }}},
+ { 0xff4d, {1, {0xff2d }}},
+ { 0xff4e, {1, {0xff2e }}},
+ { 0xff4f, {1, {0xff2f }}},
+ { 0xff50, {1, {0xff30 }}},
+ { 0xff51, {1, {0xff31 }}},
+ { 0xff52, {1, {0xff32 }}},
+ { 0xff53, {1, {0xff33 }}},
+ { 0xff54, {1, {0xff34 }}},
+ { 0xff55, {1, {0xff35 }}},
+ { 0xff56, {1, {0xff36 }}},
+ { 0xff57, {1, {0xff37 }}},
+ { 0xff58, {1, {0xff38 }}},
+ { 0xff59, {1, {0xff39 }}},
+ { 0xff5a, {1, {0xff3a }}},
+ { 0x10428, {1, {0x10400 }}},
+ { 0x10429, {1, {0x10401 }}},
+ { 0x1042a, {1, {0x10402 }}},
+ { 0x1042b, {1, {0x10403 }}},
+ { 0x1042c, {1, {0x10404 }}},
+ { 0x1042d, {1, {0x10405 }}},
+ { 0x1042e, {1, {0x10406 }}},
+ { 0x1042f, {1, {0x10407 }}},
+ { 0x10430, {1, {0x10408 }}},
+ { 0x10431, {1, {0x10409 }}},
+ { 0x10432, {1, {0x1040a }}},
+ { 0x10433, {1, {0x1040b }}},
+ { 0x10434, {1, {0x1040c }}},
+ { 0x10435, {1, {0x1040d }}},
+ { 0x10436, {1, {0x1040e }}},
+ { 0x10437, {1, {0x1040f }}},
+ { 0x10438, {1, {0x10410 }}},
+ { 0x10439, {1, {0x10411 }}},
+ { 0x1043a, {1, {0x10412 }}},
+ { 0x1043b, {1, {0x10413 }}},
+ { 0x1043c, {1, {0x10414 }}},
+ { 0x1043d, {1, {0x10415 }}},
+ { 0x1043e, {1, {0x10416 }}},
+ { 0x1043f, {1, {0x10417 }}},
+ { 0x10440, {1, {0x10418 }}},
+ { 0x10441, {1, {0x10419 }}},
+ { 0x10442, {1, {0x1041a }}},
+ { 0x10443, {1, {0x1041b }}},
+ { 0x10444, {1, {0x1041c }}},
+ { 0x10445, {1, {0x1041d }}},
+ { 0x10446, {1, {0x1041e }}},
+ { 0x10447, {1, {0x1041f }}},
+ { 0x10448, {1, {0x10420 }}},
+ { 0x10449, {1, {0x10421 }}},
+ { 0x1044a, {1, {0x10422 }}},
+ { 0x1044b, {1, {0x10423 }}},
+ { 0x1044c, {1, {0x10424 }}},
+ { 0x1044d, {1, {0x10425 }}},
+ { 0x1044e, {1, {0x10426 }}},
+ { 0x1044f, {1, {0x10427 }}}
+};
+
+static const CaseUnfold_11_Type CaseUnfold_11_Locale[] = {
+ { 0x0069, {1, {0x0049 }}}
+};
+
+static const CaseUnfold_12_Type CaseUnfold_12[] = {
+ { {0x0061, 0x02be}, {1, {0x1e9a }}},
+ { {0x0066, 0x0066}, {1, {0xfb00 }}},
+ { {0x0066, 0x0069}, {1, {0xfb01 }}},
+ { {0x0066, 0x006c}, {1, {0xfb02 }}},
+ { {0x0068, 0x0331}, {1, {0x1e96 }}},
+ { {0x006a, 0x030c}, {1, {0x01f0 }}},
+ { {0x0073, 0x0073}, {1, {0x00df }}},
+ { {0x0073, 0x0074}, {2, {0xfb05, 0xfb06 }}},
+ { {0x0074, 0x0308}, {1, {0x1e97 }}},
+ { {0x0077, 0x030a}, {1, {0x1e98 }}},
+ { {0x0079, 0x030a}, {1, {0x1e99 }}},
+ { {0x02bc, 0x006e}, {1, {0x0149 }}},
+ { {0x03ac, 0x03b9}, {1, {0x1fb4 }}},
+ { {0x03ae, 0x03b9}, {1, {0x1fc4 }}},
+ { {0x03b1, 0x0342}, {1, {0x1fb6 }}},
+ { {0x03b1, 0x03b9}, {2, {0x1fb3, 0x1fbc }}},
+ { {0x03b7, 0x0342}, {1, {0x1fc6 }}},
+ { {0x03b7, 0x03b9}, {2, {0x1fc3, 0x1fcc }}},
+ { {0x03b9, 0x0342}, {1, {0x1fd6 }}},
+ { {0x03c1, 0x0313}, {1, {0x1fe4 }}},
+ { {0x03c5, 0x0313}, {1, {0x1f50 }}},
+ { {0x03c5, 0x0342}, {1, {0x1fe6 }}},
+ { {0x03c9, 0x0342}, {1, {0x1ff6 }}},
+ { {0x03c9, 0x03b9}, {2, {0x1ff3, 0x1ffc }}},
+ { {0x03ce, 0x03b9}, {1, {0x1ff4 }}},
+ { {0x0565, 0x0582}, {1, {0x0587 }}},
+ { {0x0574, 0x0565}, {1, {0xfb14 }}},
+ { {0x0574, 0x056b}, {1, {0xfb15 }}},
+ { {0x0574, 0x056d}, {1, {0xfb17 }}},
+ { {0x0574, 0x0576}, {1, {0xfb13 }}},
+ { {0x057e, 0x0576}, {1, {0xfb16 }}},
+ { {0x1f00, 0x03b9}, {2, {0x1f88, 0x1f80 }}},
+ { {0x1f01, 0x03b9}, {2, {0x1f81, 0x1f89 }}},
+ { {0x1f02, 0x03b9}, {2, {0x1f82, 0x1f8a }}},
+ { {0x1f03, 0x03b9}, {2, {0x1f83, 0x1f8b }}},
+ { {0x1f04, 0x03b9}, {2, {0x1f84, 0x1f8c }}},
+ { {0x1f05, 0x03b9}, {2, {0x1f85, 0x1f8d }}},
+ { {0x1f06, 0x03b9}, {2, {0x1f86, 0x1f8e }}},
+ { {0x1f07, 0x03b9}, {2, {0x1f87, 0x1f8f }}},
+ { {0x1f20, 0x03b9}, {2, {0x1f90, 0x1f98 }}},
+ { {0x1f21, 0x03b9}, {2, {0x1f91, 0x1f99 }}},
+ { {0x1f22, 0x03b9}, {2, {0x1f92, 0x1f9a }}},
+ { {0x1f23, 0x03b9}, {2, {0x1f93, 0x1f9b }}},
+ { {0x1f24, 0x03b9}, {2, {0x1f94, 0x1f9c }}},
+ { {0x1f25, 0x03b9}, {2, {0x1f95, 0x1f9d }}},
+ { {0x1f26, 0x03b9}, {2, {0x1f96, 0x1f9e }}},
+ { {0x1f27, 0x03b9}, {2, {0x1f97, 0x1f9f }}},
+ { {0x1f60, 0x03b9}, {2, {0x1fa0, 0x1fa8 }}},
+ { {0x1f61, 0x03b9}, {2, {0x1fa1, 0x1fa9 }}},
+ { {0x1f62, 0x03b9}, {2, {0x1fa2, 0x1faa }}},
+ { {0x1f63, 0x03b9}, {2, {0x1fa3, 0x1fab }}},
+ { {0x1f64, 0x03b9}, {2, {0x1fa4, 0x1fac }}},
+ { {0x1f65, 0x03b9}, {2, {0x1fa5, 0x1fad }}},
+ { {0x1f66, 0x03b9}, {2, {0x1fa6, 0x1fae }}},
+ { {0x1f67, 0x03b9}, {2, {0x1fa7, 0x1faf }}},
+ { {0x1f70, 0x03b9}, {1, {0x1fb2 }}},
+ { {0x1f74, 0x03b9}, {1, {0x1fc2 }}},
+ { {0x1f7c, 0x03b9}, {1, {0x1ff2 }}}
+};
+
+static const CaseUnfold_12_Type CaseUnfold_12_Locale[] = {
+ { {0x0069, 0x0307}, {1, {0x0130 }}}
+};
+
+static const CaseUnfold_13_Type CaseUnfold_13[] = {
+ { {0x0066, 0x0066, 0x0069}, {1, {0xfb03 }}},
+ { {0x0066, 0x0066, 0x006c}, {1, {0xfb04 }}},
+ { {0x03b1, 0x0342, 0x03b9}, {1, {0x1fb7 }}},
+ { {0x03b7, 0x0342, 0x03b9}, {1, {0x1fc7 }}},
+ { {0x03b9, 0x0308, 0x0300}, {1, {0x1fd2 }}},
+ { {0x03b9, 0x0308, 0x0301}, {2, {0x0390, 0x1fd3 }}},
+ { {0x03b9, 0x0308, 0x0342}, {1, {0x1fd7 }}},
+ { {0x03c5, 0x0308, 0x0300}, {1, {0x1fe2 }}},
+ { {0x03c5, 0x0308, 0x0301}, {2, {0x03b0, 0x1fe3 }}},
+ { {0x03c5, 0x0308, 0x0342}, {1, {0x1fe7 }}},
+ { {0x03c5, 0x0313, 0x0300}, {1, {0x1f52 }}},
+ { {0x03c5, 0x0313, 0x0301}, {1, {0x1f54 }}},
+ { {0x03c5, 0x0313, 0x0342}, {1, {0x1f56 }}},
+ { {0x03c9, 0x0342, 0x03b9}, {1, {0x1ff7 }}}
+};
+
+#define numberof(array) (int)(sizeof(array) / sizeof((array)[0]))
+#define CODE_RANGES_NUM numberof(CodeRanges)
+
+extern int
+onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc ARG_UNUSED)
+{
+ if (
+#ifdef USE_UNICODE_PROPERTIES
+ ctype <= ONIGENC_MAX_STD_CTYPE &&
+#endif
+ code < 256) {
+ return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype);
+ }
+
+ if (ctype >= CODE_RANGES_NUM) {
+ return ONIGERR_TYPE_BUG;
+ }
+
+ return onig_is_in_code_range((UChar* )CodeRanges[ctype], code);
+}
+
+
+extern int
+onigenc_unicode_ctype_code_range(int ctype, const OnigCodePoint* ranges[])
+{
+ if (ctype >= CODE_RANGES_NUM) {
+ return ONIGERR_TYPE_BUG;
+ }
+
+ *ranges = CodeRanges[ctype];
+
+ return 0;
+}
+
+extern int
+onigenc_utf16_32_get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out,
+ const OnigCodePoint* ranges[],
+ struct OnigEncodingTypeST* enc ARG_UNUSED)
+{
+ *sb_out = 0x00;
+ return onigenc_unicode_ctype_code_range(ctype, ranges);
+}
+
+#include "st.h"
+
+#define PROPERTY_NAME_MAX_SIZE MAX_WORD_LENGTH
+
+extern int
+onigenc_unicode_property_name_to_ctype(OnigEncoding enc, UChar* name, UChar* end)
+{
+ int len;
+ int ctype;
+ UChar buf[PROPERTY_NAME_MAX_SIZE];
+ UChar *p;
+ OnigCodePoint code;
+
+ p = name;
+ len = 0;
+ for (p = name; p < end; p += enclen(enc, p, end)) {
+ code = ONIGENC_MBC_TO_CODE(enc, p, end);
+ if (code == ' ' || code == '-' || code == '_')
+ continue;
+ if (code >= 0x80)
+ return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
+
+ buf[len++] = (UChar )TOLOWER((unsigned char)code);
+ if (len >= PROPERTY_NAME_MAX_SIZE)
+ return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
+ }
+
+ buf[len] = 0;
+
+ if ((ctype = uniname2ctype(buf, len)) < 0) {
+ return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
+ }
+
+ return ctype;
+}
+
+
+static int
+code2_cmp(OnigCodePoint* x, OnigCodePoint* y)
+{
+ if (x[0] == y[0] && x[1] == y[1]) return 0;
+ return 1;
+}
+
+static st_index_t
+code2_hash(OnigCodePoint* x)
+{
+ return (st_index_t )(x[0] + x[1]);
+}
+
+static const struct st_hash_type type_code2_hash = {
+ code2_cmp,
+ code2_hash,
+};
+
+static int
+code3_cmp(OnigCodePoint* x, OnigCodePoint* y)
+{
+ if (x[0] == y[0] && x[1] == y[1] && x[2] == y[2]) return 0;
+ return 1;
+}
+
+static st_index_t
+code3_hash(OnigCodePoint* x)
+{
+ return (st_index_t )(x[0] + x[1] + x[2]);
+}
+
+static const struct st_hash_type type_code3_hash = {
+ code3_cmp,
+ code3_hash,
+};
+
+
+static st_table* FoldTable; /* fold-1, fold-2, fold-3 */
+static st_table* Unfold1Table;
+static st_table* Unfold2Table;
+static st_table* Unfold3Table;
+static int CaseFoldInited = 0;
+
+static int init_case_fold_table(void)
+{
+ const CaseFold_11_Type *p;
+ const CaseUnfold_11_Type *p1;
+ const CaseUnfold_12_Type *p2;
+ const CaseUnfold_13_Type *p3;
+ int i;
+
+ THREAD_ATOMIC_START;
+
+ FoldTable = st_init_numtable_with_size(1200);
+ if (ONIG_IS_NULL(FoldTable)) return ONIGERR_MEMORY;
+ for (i = 0; i < numberof(CaseFold); i++) {
+ p = &CaseFold[i];
+ st_add_direct(FoldTable, (st_data_t )p->from, (st_data_t )&(p->to));
+ }
+ for (i = 0; i < numberof(CaseFold_Locale); i++) {
+ p = &CaseFold_Locale[i];
+ st_add_direct(FoldTable, (st_data_t )p->from, (st_data_t )&(p->to));
+ }
+
+ Unfold1Table = st_init_numtable_with_size(1000);
+ if (ONIG_IS_NULL(Unfold1Table)) return ONIGERR_MEMORY;
+
+ for (i = 0; i < numberof(CaseUnfold_11); i++) {
+ p1 = &CaseUnfold_11[i];
+ st_add_direct(Unfold1Table, (st_data_t )p1->from, (st_data_t )&(p1->to));
+ }
+ for (i = 0; i < numberof(CaseUnfold_11_Locale); i++) {
+ p1 = &CaseUnfold_11_Locale[i];
+ st_add_direct(Unfold1Table, (st_data_t )p1->from, (st_data_t )&(p1->to));
+ }
+
+ Unfold2Table = st_init_table_with_size(&type_code2_hash, 200);
+ if (ONIG_IS_NULL(Unfold2Table)) return ONIGERR_MEMORY;
+
+ for (i = 0; i < numberof(CaseUnfold_12); i++) {
+ p2 = &CaseUnfold_12[i];
+ st_add_direct(Unfold2Table, (st_data_t )p2->from, (st_data_t )(&p2->to));
+ }
+ for (i = 0; i < numberof(CaseUnfold_12_Locale); i++) {
+ p2 = &CaseUnfold_12_Locale[i];
+ st_add_direct(Unfold2Table, (st_data_t )p2->from, (st_data_t )(&p2->to));
+ }
+
+ Unfold3Table = st_init_table_with_size(&type_code3_hash, 30);
+ if (ONIG_IS_NULL(Unfold3Table)) return ONIGERR_MEMORY;
+
+ for (i = 0; i < numberof(CaseUnfold_13); i++) {
+ p3 = &CaseUnfold_13[i];
+ st_add_direct(Unfold3Table, (st_data_t )p3->from, (st_data_t )(&p3->to));
+ }
+
+ CaseFoldInited = 1;
+ THREAD_ATOMIC_END;
+ return 0;
+}
+
+extern int
+onigenc_unicode_mbc_case_fold(OnigEncoding enc,
+ OnigCaseFoldType flag ARG_UNUSED, const UChar** pp, const UChar* end,
+ UChar* fold)
+{
+ CodePointList3 *to;
+ OnigCodePoint code;
+ int i, len, rlen;
+ const UChar *p = *pp;
+
+ if (CaseFoldInited == 0) init_case_fold_table();
+
+ code = ONIGENC_MBC_TO_CODE(enc, p, end);
+ len = enclen(enc, p, end);
+ *pp += len;
+
+#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
+ if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
+ if (code == 0x0049) {
+ return ONIGENC_CODE_TO_MBC(enc, 0x0131, fold);
+ }
+ else if (code == 0x0130) {
+ return ONIGENC_CODE_TO_MBC(enc, 0x0069, fold);
+ }
+ }
+#endif
+
+ if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0) {
+ if (to->n == 1) {
+ return ONIGENC_CODE_TO_MBC(enc, to->code[0], fold);
+ }
+ else
+ {
+ rlen = 0;
+ for (i = 0; i < to->n; i++) {
+ len = ONIGENC_CODE_TO_MBC(enc, to->code[i], fold);
+ fold += len;
+ rlen += len;
+ }
+ return rlen;
+ }
+ }
+
+ for (i = 0; i < len; i++) {
+ *fold++ = *p++;
+ }
+ return len;
+}
+
+extern int
+onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag,
+ OnigApplyAllCaseFoldFunc f, void* arg,
+ OnigEncoding enc ARG_UNUSED)
+{
+ const CaseUnfold_11_Type* p11;
+ OnigCodePoint code;
+ int i, j, k, r;
+
+ /* if (CaseFoldInited == 0) init_case_fold_table(); */
+
+ for (i = 0; i < numberof(CaseUnfold_11); i++) {
+ p11 = &CaseUnfold_11[i];
+ for (j = 0; j < p11->to.n; j++) {
+ code = p11->from;
+ r = (*f)(p11->to.code[j], &code, 1, arg);
+ if (r != 0) return r;
+
+ code = p11->to.code[j];
+ r = (*f)(p11->from, &code, 1, arg);
+ if (r != 0) return r;
+
+ for (k = 0; k < j; k++) {
+ r = (*f)(p11->to.code[j], (OnigCodePoint* )(&p11->to.code[k]), 1, arg);
+ if (r != 0) return r;
+
+ r = (*f)(p11->to.code[k], (OnigCodePoint* )(&p11->to.code[j]), 1, arg);
+ if (r != 0) return r;
+ }
+ }
+ }
+
+#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
+ if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
+ code = 0x0131;
+ r = (*f)(0x0049, &code, 1, arg);
+ if (r != 0) return r;
+ code = 0x0049;
+ r = (*f)(0x0131, &code, 1, arg);
+ if (r != 0) return r;
+
+ code = 0x0130;
+ r = (*f)(0x0069, &code, 1, arg);
+ if (r != 0) return r;
+ code = 0x0069;
+ r = (*f)(0x0130, &code, 1, arg);
+ if (r != 0) return r;
+ }
+ else {
+#endif
+ for (i = 0; i < numberof(CaseUnfold_11_Locale); i++) {
+ p11 = &CaseUnfold_11_Locale[i];
+ for (j = 0; j < p11->to.n; j++) {
+ code = p11->from;
+ r = (*f)(p11->to.code[j], &code, 1, arg);
+ if (r != 0) return r;
+
+ code = p11->to.code[j];
+ r = (*f)(p11->from, &code, 1, arg);
+ if (r != 0) return r;
+
+ for (k = 0; k < j; k++) {
+ r = (*f)(p11->to.code[j], (OnigCodePoint* )(&p11->to.code[k]),
+ 1, arg);
+ if (r != 0) return r;
+
+ r = (*f)(p11->to.code[k], (OnigCodePoint* )(&p11->to.code[j]),
+ 1, arg);
+ if (r != 0) return r;
+ }
+ }
+ }
+#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
+ }
+#endif
+
+ if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
+ for (i = 0; i < numberof(CaseUnfold_12); i++) {
+ for (j = 0; j < CaseUnfold_12[i].to.n; j++) {
+ r = (*f)(CaseUnfold_12[i].to.code[j],
+ (OnigCodePoint* )CaseUnfold_12[i].from, 2, arg);
+ if (r != 0) return r;
+
+ for (k = 0; k < CaseUnfold_12[i].to.n; k++) {
+ if (k == j) continue;
+
+ r = (*f)(CaseUnfold_12[i].to.code[j],
+ (OnigCodePoint* )(&CaseUnfold_12[i].to.code[k]), 1, arg);
+ if (r != 0) return r;
+ }
+ }
+ }
+
+#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
+ if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) == 0) {
+#endif
+ for (i = 0; i < numberof(CaseUnfold_12_Locale); i++) {
+ for (j = 0; j < CaseUnfold_12_Locale[i].to.n; j++) {
+ r = (*f)(CaseUnfold_12_Locale[i].to.code[j],
+ (OnigCodePoint* )CaseUnfold_12_Locale[i].from, 2, arg);
+ if (r != 0) return r;
+
+ for (k = 0; k < CaseUnfold_12_Locale[i].to.n; k++) {
+ if (k == j) continue;
+
+ r = (*f)(CaseUnfold_12_Locale[i].to.code[j],
+ (OnigCodePoint* )(&CaseUnfold_12_Locale[i].to.code[k]),
+ 1, arg);
+ if (r != 0) return r;
+ }
+ }
+ }
+#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
+ }
+#endif
+
+ for (i = 0; i < numberof(CaseUnfold_13); i++) {
+ for (j = 0; j < CaseUnfold_13[i].to.n; j++) {
+ r = (*f)(CaseUnfold_13[i].to.code[j],
+ (OnigCodePoint* )CaseUnfold_13[i].from, 3, arg);
+ if (r != 0) return r;
+
+ for (k = 0; k < CaseUnfold_13[i].to.n; k++) {
+ if (k == j) continue;
+
+ r = (*f)(CaseUnfold_13[i].to.code[j],
+ (OnigCodePoint* )(&CaseUnfold_13[i].to.code[k]), 1, arg);
+ if (r != 0) return r;
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+extern int
+onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
+ OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end,
+ OnigCaseFoldCodeItem items[])
+{
+ int n, i, j, k, len;
+ OnigCodePoint code, codes[3];
+ CodePointList3 *to, *z3;
+ CodePointList2 *z2;
+
+ if (CaseFoldInited == 0) init_case_fold_table();
+
+ n = 0;
+
+ code = ONIGENC_MBC_TO_CODE(enc, p, end);
+ len = enclen(enc, p, end);
+
+#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
+ if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
+ if (code == 0x0049) {
+ items[0].byte_len = len;
+ items[0].code_len = 1;
+ items[0].code[0] = 0x0131;
+ return 1;
+ }
+ else if (code == 0x0130) {
+ items[0].byte_len = len;
+ items[0].code_len = 1;
+ items[0].code[0] = 0x0069;
+ return 1;
+ }
+ else if (code == 0x0131) {
+ items[0].byte_len = len;
+ items[0].code_len = 1;
+ items[0].code[0] = 0x0049;
+ return 1;
+ }
+ else if (code == 0x0069) {
+ items[0].byte_len = len;
+ items[0].code_len = 1;
+ items[0].code[0] = 0x0130;
+ return 1;
+ }
+ }
+#endif
+
+ if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0) {
+ if (to->n == 1) {
+ OnigCodePoint orig_code = code;
+
+ items[0].byte_len = len;
+ items[0].code_len = 1;
+ items[0].code[0] = to->code[0];
+ n++;
+
+ code = to->code[0];
+ if (onig_st_lookup(Unfold1Table, (st_data_t )code, (void* )&to) != 0) {
+ for (i = 0; i < to->n; i++) {
+ if (to->code[i] != orig_code) {
+ items[n].byte_len = len;
+ items[n].code_len = 1;
+ items[n].code[0] = to->code[i];
+ n++;
+ }
+ }
+ }
+ }
+ else if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
+ OnigCodePoint cs[3][4];
+ int fn, ncs[3];
+
+ for (fn = 0; fn < to->n; fn++) {
+ cs[fn][0] = to->code[fn];
+ if (onig_st_lookup(Unfold1Table, (st_data_t )cs[fn][0],
+ (void* )&z3) != 0) {
+ for (i = 0; i < z3->n; i++) {
+ cs[fn][i+1] = z3->code[i];
+ }
+ ncs[fn] = z3->n + 1;
+ }
+ else
+ ncs[fn] = 1;
+ }
+
+ if (fn == 2) {
+ for (i = 0; i < ncs[0]; i++) {
+ for (j = 0; j < ncs[1]; j++) {
+ items[n].byte_len = len;
+ items[n].code_len = 2;
+ items[n].code[0] = cs[0][i];
+ items[n].code[1] = cs[1][j];
+ n++;
+ }
+ }
+
+ if (onig_st_lookup(Unfold2Table, (st_data_t )to->code,
+ (void* )&z2) != 0) {
+ for (i = 0; i < z2->n; i++) {
+ if (z2->code[i] == code) continue;
+
+ items[n].byte_len = len;
+ items[n].code_len = 1;
+ items[n].code[0] = z2->code[i];
+ n++;
+ }
+ }
+ }
+ else {
+ for (i = 0; i < ncs[0]; i++) {
+ for (j = 0; j < ncs[1]; j++) {
+ for (k = 0; k < ncs[2]; k++) {
+ items[n].byte_len = len;
+ items[n].code_len = 3;
+ items[n].code[0] = cs[0][i];
+ items[n].code[1] = cs[1][j];
+ items[n].code[2] = cs[2][k];
+ n++;
+ }
+ }
+ }
+
+ if (onig_st_lookup(Unfold3Table, (st_data_t )to->code,
+ (void* )&z2) != 0) {
+ for (i = 0; i < z2->n; i++) {
+ if (z2->code[i] == code) continue;
+
+ items[n].byte_len = len;
+ items[n].code_len = 1;
+ items[n].code[0] = z2->code[i];
+ n++;
+ }
+ }
+ }
+
+ /* multi char folded code is not head of another folded multi char */
+ flag = 0; /* DISABLE_CASE_FOLD_MULTI_CHAR(flag); */
+ }
+ }
+ else {
+ if (onig_st_lookup(Unfold1Table, (st_data_t )code, (void* )&to) != 0) {
+ for (i = 0; i < to->n; i++) {
+ items[n].byte_len = len;
+ items[n].code_len = 1;
+ items[n].code[0] = to->code[i];
+ n++;
+ }
+ }
+ }
+
+
+ if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
+ p += len;
+ if (p < end) {
+ int clen;
+
+ codes[0] = code;
+ code = ONIGENC_MBC_TO_CODE(enc, p, end);
+ if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0
+ && to->n == 1) {
+ codes[1] = to->code[0];
+ }
+ else
+ codes[1] = code;
+
+ clen = enclen(enc, p, end);
+ len += clen;
+ if (onig_st_lookup(Unfold2Table, (st_data_t )codes, (void* )&z2) != 0) {
+ for (i = 0; i < z2->n; i++) {
+ items[n].byte_len = len;
+ items[n].code_len = 1;
+ items[n].code[0] = z2->code[i];
+ n++;
+ }
+ }
+
+ p += clen;
+ if (p < end) {
+ code = ONIGENC_MBC_TO_CODE(enc, p, end);
+ if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0
+ && to->n == 1) {
+ codes[2] = to->code[0];
+ }
+ else
+ codes[2] = code;
+
+ clen = enclen(enc, p, end);
+ len += clen;
+ if (onig_st_lookup(Unfold3Table, (st_data_t )codes,
+ (void* )&z2) != 0) {
+ for (i = 0; i < z2->n; i++) {
+ items[n].byte_len = len;
+ items[n].code_len = 1;
+ items[n].code[0] = z2->code[i];
+ n++;
+ }
+ }
+ }
+ }
+ }
+
+ return n;
+}
+#endif //INCLUDE_ENCODING
diff --git a/src/us_ascii.c b/src/us_ascii.c
new file mode 100644
index 000000000..b6e3f50cf
--- /dev/null
+++ b/src/us_ascii.c
@@ -0,0 +1,34 @@
+#include "mruby.h"
+#ifdef INCLUDE_ENCODING
+#include "regenc.h"
+
+static int
+us_ascii_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc)
+{
+ if (*p & 0x80)
+ return ONIGENC_CONSTRUCT_MBCLEN_INVALID();
+ return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(1);
+}
+
+OnigEncodingDefine(us_ascii, US_ASCII) = {
+ us_ascii_mbc_enc_len,
+ "US-ASCII",/* name */
+ 1, /* max byte length */
+ 1, /* min byte length */
+ onigenc_is_mbc_newline_0x0a,
+ onigenc_single_byte_mbc_to_code,
+ onigenc_single_byte_code_to_mbclen,
+ onigenc_single_byte_code_to_mbc,
+ onigenc_ascii_mbc_case_fold,
+ onigenc_ascii_apply_all_case_fold,
+ onigenc_ascii_get_case_fold_codes_by_str,
+ onigenc_minimum_property_name_to_ctype,
+ onigenc_ascii_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ onigenc_single_byte_left_adjust_char_head,
+ onigenc_always_true_is_allowed_reverse_match
+};
+ENC_ALIAS("ASCII", "US-ASCII")
+ENC_ALIAS("ANSI_X3.4-1968", "US-ASCII")
+ENC_ALIAS("646", "US-ASCII")
+#endif //INCLUDE_ENCODING
diff --git a/src/utf_8.c b/src/utf_8.c
new file mode 100644
index 000000000..9af010d4a
--- /dev/null
+++ b/src/utf_8.c
@@ -0,0 +1,460 @@
+/**********************************************************************
+ utf_8.c - Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "mruby.h"
+#ifdef INCLUDE_ENCODING
+#include "regenc.h"
+
+#define USE_INVALID_CODE_SCHEME
+
+#ifdef USE_INVALID_CODE_SCHEME
+/* virtual codepoint values for invalid encoding byte 0xfe and 0xff */
+#define INVALID_CODE_FE 0xfffffffe
+#define INVALID_CODE_FF 0xffffffff
+#define VALID_CODE_LIMIT 0x7fffffff
+#endif
+
+#define utf8_islead(c) ((UChar )((c) & 0xc0) != 0x80)
+
+static const int EncLen_UTF8[] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+};
+
+typedef enum {
+ FAILURE = -2,
+ ACCEPT,
+ S0, S1, S2, S3,
+ S4, S5, S6, S7
+} state_t;
+#define A ACCEPT
+#define F FAILURE
+static const signed char trans[][0x100] = {
+ { /* S0 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+ /* 0 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+ /* 1 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+ /* 2 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+ /* 3 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+ /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+ /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+ /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+ /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+ /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* a */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* b */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* c */ F, F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /* e */ 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3,
+ /* f */ 5, 6, 6, 6, 7, F, F, F, F, F, F, F, F, F, F, F
+ },
+ { /* S1 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+ /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 8 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+ /* 9 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+ /* a */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+ /* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
+ /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F
+ },
+ { /* S2 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+ /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* a */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F
+ },
+ { /* S3 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+ /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 8 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /* 9 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /* a */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F
+ },
+ { /* S4 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+ /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 8 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /* 9 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /* a */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* b */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F
+ },
+ { /* S5 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+ /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 9 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ /* a */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ /* b */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F
+ },
+ { /* S6 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+ /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 8 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ /* 9 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ /* a */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ /* b */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F
+ },
+ { /* S7 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+ /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* 8 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* a */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* b */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
+ /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F
+ },
+};
+#undef A
+#undef F
+
+static int
+mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED)
+{
+ int firstbyte = *p++;
+ state_t s;
+ s = trans[0][firstbyte];
+ if (s < 0) return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(1) :
+ ONIGENC_CONSTRUCT_MBCLEN_INVALID();
+
+ if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_UTF8[firstbyte]-1);
+ s = trans[s][*p++];
+ if (s < 0) return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(2) :
+ ONIGENC_CONSTRUCT_MBCLEN_INVALID();
+
+ if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_UTF8[firstbyte]-2);
+ s = trans[s][*p++];
+ if (s < 0) return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(3) :
+ ONIGENC_CONSTRUCT_MBCLEN_INVALID();
+
+ if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_UTF8[firstbyte]-3);
+ s = trans[s][*p++];
+ return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(4) :
+ ONIGENC_CONSTRUCT_MBCLEN_INVALID();
+}
+
+static int
+is_mbc_newline(const UChar* p, const UChar* end, OnigEncoding enc)
+{
+ if (p < end) {
+ if (*p == 0x0a) return 1;
+
+#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
+#ifndef USE_CRNL_AS_LINE_TERMINATOR
+ if (*p == 0x0d) return 1;
+#endif
+ if (p + 1 < end) {
+ if (*(p+1) == 0x85 && *p == 0xc2) /* U+0085 */
+ return 1;
+ if (p + 2 < end) {
+ if ((*(p+2) == 0xa8 || *(p+2) == 0xa9)
+ && *(p+1) == 0x80 && *p == 0xe2) /* U+2028, U+2029 */
+ return 1;
+ }
+ }
+#endif
+ }
+
+ return 0;
+}
+
+static OnigCodePoint
+mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc)
+{
+ int c, len;
+ OnigCodePoint n;
+
+ len = enclen(enc, p, end);
+ c = *p++;
+ if (len > 1) {
+ len--;
+ n = c & ((1 << (6 - len)) - 1);
+ while (len--) {
+ c = *p++;
+ n = (n << 6) | (c & ((1 << 6) - 1));
+ }
+ return n;
+ }
+ else {
+#ifdef USE_INVALID_CODE_SCHEME
+ if (c > 0xfd) {
+ return ((c == 0xfe) ? INVALID_CODE_FE : INVALID_CODE_FF);
+ }
+#endif
+ return (OnigCodePoint )c;
+ }
+}
+
+static int
+code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
+{
+ if ((code & 0xffffff80) == 0) return 1;
+ else if ((code & 0xfffff800) == 0) return 2;
+ else if ((code & 0xffff0000) == 0) return 3;
+ else if ((code & 0xffe00000) == 0) return 4;
+ else if ((code & 0xfc000000) == 0) return 5;
+ else if ((code & 0x80000000) == 0) return 6;
+#ifdef USE_INVALID_CODE_SCHEME
+ else if (code == INVALID_CODE_FE) return 1;
+ else if (code == INVALID_CODE_FF) return 1;
+#endif
+ else
+ return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
+}
+
+static int
+code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED)
+{
+#define UTF8_TRAILS(code, shift) (UChar )((((code) >> (shift)) & 0x3f) | 0x80)
+#define UTF8_TRAIL0(code) (UChar )(((code) & 0x3f) | 0x80)
+
+ if ((code & 0xffffff80) == 0) {
+ *buf = (UChar )code;
+ return 1;
+ }
+ else {
+ UChar *p = buf;
+
+ if ((code & 0xfffff800) == 0) {
+ *p++ = (UChar )(((code>>6)& 0x1f) | 0xc0);
+ }
+ else if ((code & 0xffff0000) == 0) {
+ *p++ = (UChar )(((code>>12) & 0x0f) | 0xe0);
+ *p++ = UTF8_TRAILS(code, 6);
+ }
+ else if ((code & 0xffe00000) == 0) {
+ *p++ = (UChar )(((code>>18) & 0x07) | 0xf0);
+ *p++ = UTF8_TRAILS(code, 12);
+ *p++ = UTF8_TRAILS(code, 6);
+ }
+ else if ((code & 0xfc000000) == 0) {
+ *p++ = (UChar )(((code>>24) & 0x03) | 0xf8);
+ *p++ = UTF8_TRAILS(code, 18);
+ *p++ = UTF8_TRAILS(code, 12);
+ *p++ = UTF8_TRAILS(code, 6);
+ }
+ else if ((code & 0x80000000) == 0) {
+ *p++ = (UChar )(((code>>30) & 0x01) | 0xfc);
+ *p++ = UTF8_TRAILS(code, 24);
+ *p++ = UTF8_TRAILS(code, 18);
+ *p++ = UTF8_TRAILS(code, 12);
+ *p++ = UTF8_TRAILS(code, 6);
+ }
+#ifdef USE_INVALID_CODE_SCHEME
+ else if (code == INVALID_CODE_FE) {
+ *p = 0xfe;
+ return 1;
+ }
+ else if (code == INVALID_CODE_FF) {
+ *p = 0xff;
+ return 1;
+ }
+#endif
+ else {
+ return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
+ }
+
+ *p++ = UTF8_TRAIL0(code);
+ return (int)(p - buf);
+ }
+}
+
+static int
+mbc_case_fold(OnigCaseFoldType flag, const UChar** pp,
+ const UChar* end, UChar* fold, OnigEncoding enc)
+{
+ const UChar* p = *pp;
+
+ if (ONIGENC_IS_MBC_ASCII(p)) {
+#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
+ if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
+ if (*p == 0x49) {
+ *fold++ = 0xc4;
+ *fold = 0xb1;
+ (*pp)++;
+ return 2;
+ }
+ }
+#endif
+
+ *fold = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
+ (*pp)++;
+ return 1; /* return byte length of converted char to lower */
+ }
+ else {
+ return onigenc_unicode_mbc_case_fold(enc, flag, pp, end, fold);
+ }
+}
+
+
+static int
+get_ctype_code_range(OnigCtype ctype, OnigCodePoint *sb_out,
+ const OnigCodePoint* ranges[], OnigEncoding enc ARG_UNUSED)
+{
+ *sb_out = 0x80;
+ return onigenc_unicode_ctype_code_range(ctype, ranges);
+}
+
+
+static UChar*
+left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end, OnigEncoding enc ARG_UNUSED)
+{
+ const UChar *p;
+
+ if (s <= start) return (UChar* )s;
+ p = s;
+
+ while (!utf8_islead(*p) && p > start) p--;
+ return (UChar* )p;
+}
+
+static int
+get_case_fold_codes_by_str(OnigCaseFoldType flag,
+ const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[],
+ OnigEncoding enc)
+{
+ return onigenc_unicode_get_case_fold_codes_by_str(enc, flag, p, end, items);
+}
+
+OnigEncodingDefine(utf_8, UTF_8) = {
+ mbc_enc_len,
+ "UTF-8", /* name */
+ 6, /* max byte length */
+ 1, /* min byte length */
+ is_mbc_newline,
+ mbc_to_code,
+ code_to_mbclen,
+ code_to_mbc,
+ mbc_case_fold,
+ onigenc_unicode_apply_all_case_fold,
+ get_case_fold_codes_by_str,
+ onigenc_unicode_property_name_to_ctype,
+ onigenc_unicode_is_code_ctype,
+ get_ctype_code_range,
+ left_adjust_char_head,
+ onigenc_always_true_is_allowed_reverse_match
+};
+ENC_ALIAS("CP65001", "UTF-8")
+
+/*
+ * Name: UTF8-MAC
+ * Link: http://developer.apple.com/documentation/MacOSX/Conceptual/BPFileSystem/BPFileSystem.html
+ * Link: http://developer.apple.com/qa/qa2001/qa1235.html
+ * Link: http://developer.apple.com/jp/qa/qa2001/qa1235.html
+ * Link: http://www.gnu.org/software/emacs/NEWS.23.2
+ */
+ENC_REPLICATE("UTF8-MAC", "UTF-8")
+ENC_ALIAS("UTF-8-MAC", "UTF8-MAC")
+ENC_ALIAS("UTF-8-HFS", "UTF8-MAC") /* Emacs 23.2 */
+
+#endif //INCLUDE_ENCODING
diff --git a/src/variable.c b/src/variable.c
new file mode 100644
index 000000000..0bc1f0de1
--- /dev/null
+++ b/src/variable.c
@@ -0,0 +1,453 @@
+#include "mruby.h"
+#include "mruby/class.h"
+#include "ritehash.h"
+#include "variable.h"
+#include "mruby/string.h"
+#include "mruby/range.h"
+#include "error.h"
+#include "mruby/array.h"
+
+#ifdef INCLUDE_REGEXP
+#include "re.h"
+#include "st.h"
+#endif
+
+KHASH_MAP_INIT_INT(iv, mrb_value);
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+static void
+mark_tbl(mrb_state *mrb, struct kh_iv *h)
+{
+ khiter_t k;
+
+ if (!h) return;
+ for (k = kh_begin(h); k != kh_end(h); k++)
+ if (kh_exist(h, k))
+ mrb_gc_mark_value(mrb, kh_value(h, k));
+}
+
+void
+mrb_gc_mark_gv(mrb_state *mrb)
+{
+ mark_tbl(mrb, mrb->globals);
+}
+
+void
+mrb_gc_free_gv(mrb_state *mrb)
+{
+ kh_destroy(iv, mrb->globals);
+}
+
+void
+mrb_gc_mark_iv(mrb_state *mrb, struct RObject *obj)
+{
+ mark_tbl(mrb, obj->iv);
+}
+
+size_t
+mrb_gc_mark_iv_size(mrb_state *mrb, struct RObject *obj)
+{
+ khiter_t k;
+ struct kh_iv *h = obj->iv;
+
+ if (!h) return 0;
+ return kh_size(h);
+}
+
+void
+mrb_gc_free_iv(mrb_state *mrb, struct RObject *obj)
+{
+ kh_destroy(iv, obj->iv);
+}
+
+mrb_value
+mrb_vm_special_get(mrb_state *mrb, mrb_sym i)
+{
+ return mrb_fixnum_value(0);
+}
+
+void
+mrb_vm_special_set(mrb_state *mrb, mrb_sym i, mrb_value v)
+{
+}
+
+static mrb_value
+ivget(mrb_state *mrb, struct kh_iv *h, mrb_sym sym)
+{
+ khiter_t k;
+
+ k = kh_get(iv, h, sym);
+ if (k != kh_end(h))
+ return kh_value(h, k);
+ return mrb_nil_value();
+}
+
+mrb_value
+mrb_obj_iv_get(mrb_state *mrb, struct RObject *obj, mrb_sym sym)
+{
+ if (!obj->iv) {
+ return mrb_nil_value();
+ }
+ return ivget(mrb, obj->iv, sym);
+}
+
+mrb_value
+mrb_iv_get(mrb_state *mrb, mrb_value obj, mrb_sym sym)
+{
+ return mrb_obj_iv_get(mrb, mrb_obj_ptr(obj), sym);
+}
+
+static void
+ivset(mrb_state *mrb, struct kh_iv *h, mrb_sym sym, mrb_value v)
+{
+ khiter_t k;
+ int r;
+
+ k = kh_put(iv, h, sym, &r);
+ kh_value(h, k) = v;
+}
+
+void
+mrb_obj_iv_set(mrb_state *mrb, struct RObject *obj, mrb_sym sym, mrb_value v)
+{
+ khash_t(iv) *h;
+
+ if (!obj->iv) {
+ h = obj->iv = kh_init(iv, mrb);
+ }
+ else {
+ h = obj->iv;
+ }
+ mrb_write_barrier(mrb, (struct RBasic*)obj);
+ ivset(mrb, h, sym, v);
+}
+
+void
+mrb_iv_set(mrb_state *mrb, mrb_value obj, mrb_sym sym, mrb_value v) /* mrb_ivar_set */
+{
+ mrb_obj_iv_set(mrb, mrb_obj_ptr(obj), sym, v);
+}
+
+mrb_value
+mrb_vm_iv_get(mrb_state *mrb, mrb_sym sym)
+{
+ /* get self */
+ return mrb_iv_get(mrb, mrb->stack[0], sym);
+}
+
+void
+mrb_vm_iv_set(mrb_state *mrb, mrb_sym sym, mrb_value v)
+{
+ /* get self */
+ mrb_iv_set(mrb, mrb->stack[0], sym, v);
+}
+
+mrb_value
+mrb_vm_cv_get(mrb_state *mrb, mrb_sym sym)
+{
+ struct RClass *c = mrb->ci->target_class;
+
+ while (c) {
+ if (c->iv) {
+ khash_t(iv) *h = c->iv;
+ khiter_t k = kh_get(iv, h, sym);
+
+ if (k != kh_end(h))
+ return kh_value(h, k);
+ }
+ c = c->super;
+ }
+ return mrb_nil_value();
+}
+
+void
+mrb_vm_cv_set(mrb_state *mrb, mrb_sym sym, mrb_value v)
+{
+ struct RClass *c = mrb->ci->target_class;
+ khash_t(iv) *h;
+ khiter_t k;
+ int r;
+
+ while (c) {
+ if (c->iv) {
+ h = c->iv;
+ k = kh_get(iv, h, sym);
+ if (k != kh_end(h)) {
+ k = kh_put(iv, h, sym, &r);
+ kh_value(h, k) = v;
+ }
+ }
+ c = c->super;
+ }
+ c = mrb->ci->target_class;
+ h = c->iv = kh_init(iv, mrb);
+ k = kh_put(iv, h, sym, &r);
+ kh_value(h, k) = v;
+}
+
+int
+mrb_const_defined(mrb_state *mrb, mrb_value mod, mrb_sym sym)
+{
+ khiter_t k;
+ struct RClass *m = mrb_class_ptr(mod);
+ struct kh_iv *h = m->iv;
+
+ if (!h) return 0;
+ k = kh_get(iv, h, sym);
+ if (k != kh_end(h))
+ return 1;
+ return 0;
+}
+
+static void
+mod_const_check(mrb_state *mrb, mrb_value mod)
+{
+ switch (mod.tt) {
+ case MRB_TT_CLASS:
+ case MRB_TT_MODULE:
+ break;
+ default:
+ mrb_raise(mrb, E_TYPE_ERROR, "constant look-up for non class/module");
+ break;
+ }
+}
+
+static mrb_value
+const_get(mrb_state *mrb, struct RClass *base, mrb_sym sym)
+{
+ struct RClass *c = base;
+ khash_t(iv) *h;
+ khiter_t k;
+
+ if (c->iv) {
+ h = c->iv;
+ k = kh_get(iv, h, sym);
+ if (k != kh_end(h)) {
+ return kh_value(h, k);
+ }
+ }
+ for (;;) {
+ c = mrb_class_outer_module(mrb, c);
+ if (!c) break;
+ if (c->iv) {
+ h = c->iv;
+ k = kh_get(iv, h, sym);
+ if (k != kh_end(h)) {
+ return kh_value(h, k);
+ }
+ }
+ }
+ c = base->super;
+ while (c) {
+ if (c->iv) {
+ h = c->iv;
+ k = kh_get(iv, h, sym);
+ if (k != kh_end(h)) {
+ return kh_value(h, k);
+ }
+ }
+ c = c->super;
+ }
+ mrb_raise(mrb, E_NAME_ERROR, "uninitialized constant %s",
+ mrb_sym2name(mrb, sym));
+ /* not reached */
+ return mrb_nil_value();
+}
+
+mrb_value
+mrb_const_get(mrb_state *mrb, mrb_value mod, mrb_sym sym)
+{
+ mod_const_check(mrb, mod);
+ return const_get(mrb, mrb_class_ptr(mod), sym);
+}
+
+mrb_value
+mrb_vm_const_get(mrb_state *mrb, mrb_sym sym)
+{
+ return const_get(mrb, mrb->ci->target_class, sym);
+}
+
+void
+mrb_const_set(mrb_state *mrb, mrb_value mod, mrb_sym sym, mrb_value v)
+{
+ mod_const_check(mrb, mod);
+ mrb_iv_set(mrb, mod, sym, v);
+}
+
+void
+mrb_vm_const_set(mrb_state *mrb, mrb_sym sym, mrb_value v)
+{
+ mrb_obj_iv_set(mrb, (struct RObject*)mrb->ci->target_class, sym, v);
+}
+
+void
+mrb_define_const(mrb_state *mrb, struct RClass *mod, const char *name, mrb_value v)
+{
+ mrb_obj_iv_set(mrb, (struct RObject*)mod, mrb_intern(mrb, name), v);
+}
+
+void
+mrb_define_global_const(mrb_state *mrb, const char *name, mrb_value val)
+{
+ mrb_define_const(mrb, mrb->object_class, name, val);
+}
+
+mrb_value
+mrb_gv_get(mrb_state *mrb, mrb_sym sym)
+{
+ if (!mrb->globals) {
+ return mrb_nil_value();
+ }
+ return ivget(mrb, mrb->globals, sym);
+}
+
+void
+mrb_gv_set(mrb_state *mrb, mrb_sym sym, mrb_value v)
+{
+ khash_t(iv) *h;
+
+ if (!mrb->globals) {
+ h = mrb->globals = kh_init(iv, mrb);
+ }
+ else {
+ h = mrb->globals;
+ }
+ ivset(mrb, h, sym, v);
+}
+
+/* 15.3.1.2.4 */
+/* 15.3.1.3.14 */
+/*
+ * call-seq:
+ * global_variables -> array
+ *
+ * Returns an array of the names of global variables.
+ *
+ * global_variables.grep /std/ #=> [:$stdin, :$stdout, :$stderr]
+ */
+mrb_value
+mrb_f_global_variables(mrb_state *mrb, mrb_value self)
+{
+ char buf[3];
+ int i;
+ struct kh_iv *h = mrb->globals;
+ mrb_value ary = mrb_ary_new(mrb);
+
+ for (i=0;i< kh_end(h);i++) {
+ if (kh_exist(h, i)) {
+ mrb_ary_push(mrb, ary, mrb_symbol_value(kh_key(h,i)));
+ }
+ }
+ buf[0] = '$';
+ buf[2] = 0;
+ for (i = 1; i <= 9; ++i) {
+ buf[1] = (char)(i + '0');
+ mrb_ary_push(mrb, ary, mrb_symbol_value(mrb_intern(mrb, buf)));
+ }
+ return ary;
+}
+
+int
+mrb_st_lookup(struct kh_iv *table, mrb_sym id, khiter_t *value)
+{
+ khash_t(iv) *h;
+ khiter_t k;
+
+ if (table) {
+ h = (khash_t(iv) *)table;
+ k = kh_get(iv, h, id);
+ if (k != kh_end(h)) {
+ if (value != 0) *value = k;//kh_value(h, k);
+ return 1;/* TRUE */
+ }
+ return 0;/* FALSE */
+ }
+ else {
+ return 0;/* FALSE */
+ }
+}
+
+int
+kiv_lookup(khash_t(iv)* table, mrb_sym key, mrb_value *value)
+{
+ khash_t(iv) *h=table;
+ khiter_t k;
+
+ // you must check(iv==0), before you call this function.
+ //if (!obj->iv) {
+ // return 0;
+ //}
+ k = kh_get(iv, h, key);
+ if (k != kh_end(h)) {
+ *value = kh_value(h, k);
+ return 1;
+ }
+ return 0;
+}
+
+static int
+mrb_const_defined_0(mrb_state *mrb, struct RClass *klass, mrb_sym id, int exclude, int recurse)
+{
+ mrb_value value;
+ struct RClass * tmp;
+ int mod_retry = 0;
+
+ tmp = klass;
+retry:
+ while (tmp) {
+ if (tmp->iv && kiv_lookup(tmp->iv, id, &value)) {
+ return (int)1/*Qtrue*/;
+ }
+ if (!recurse && (klass != mrb->object_class)) break;
+ tmp = tmp->super;
+ }
+ if (!exclude && !mod_retry && (klass->tt == MRB_TT_MODULE)) {
+ mod_retry = 1;
+ tmp = mrb->object_class;
+ goto retry;
+ }
+ return (int)0/*Qfalse*/;
+}
+
+int
+mrb_const_defined_at(mrb_state *mrb, struct RClass *klass, mrb_sym id)
+{
+ return mrb_const_defined_0(mrb, klass, id, TRUE, FALSE);
+}
+
+struct RClass *
+mrb_class_from_sym(mrb_state *mrb, struct RClass *klass, mrb_sym id)
+{
+ mrb_value c = const_get(mrb, klass, id);
+ return mrb_class_ptr(c);
+}
+
+struct RClass *
+mrb_class_get(mrb_state *mrb, char *name)
+{
+ return mrb_class_from_sym(mrb, mrb->object_class, mrb_intern(mrb, name));
+}
+
+mrb_value
+mrb_attr_get(mrb_state *mrb, mrb_value obj, mrb_sym id)
+{
+ //return ivar_get(obj, id, FALSE);
+ return mrb_iv_get(mrb, obj, id);
+}
+
+struct RClass *
+mrb_class_obj_get(mrb_state *mrb, char *name)
+{
+ mrb_value mod = mrb_obj_value(mrb->object_class);
+ mrb_sym sym = mrb_intern(mrb, name);
+
+ return mrb_class_ptr(mrb_const_get(mrb, mod, sym));
+}
+
diff --git a/src/variable.h b/src/variable.h
new file mode 100644
index 000000000..494099c2c
--- /dev/null
+++ b/src/variable.h
@@ -0,0 +1,42 @@
+#ifndef MRUBY_VARIABLE_H
+#define MRUBY_VARIABLE_H
+
+typedef struct global_variable {
+ int counter;
+ mrb_value *data;
+ mrb_value (*getter)();
+ void (*setter)();
+ //void (*marker)();
+ //int block_trace;
+ //struct trace_var *trace;
+} global_variable;
+struct global_entry {
+ global_variable *var;
+ mrb_sym id;
+};
+
+mrb_value mrb_vm_special_get(mrb_state*, mrb_sym);
+void mrb_vm_special_set(mrb_state*, mrb_sym, mrb_value);
+mrb_value mrb_vm_iv_get(mrb_state*, mrb_sym);
+void mrb_vm_iv_set(mrb_state*, mrb_sym, mrb_value);
+mrb_value mrb_vm_cv_get(mrb_state*, mrb_sym);
+void mrb_vm_cv_set(mrb_state*, mrb_sym, mrb_value);
+mrb_value mrb_vm_const_get(mrb_state*, mrb_sym);
+void mrb_vm_const_set(mrb_state*, mrb_sym, mrb_value);
+mrb_value mrb_const_get(mrb_state*, mrb_value, mrb_sym);
+void mrb_const_set(mrb_state*, mrb_value, mrb_sym, mrb_value);
+int mrb_const_defined(mrb_state*, mrb_value, mrb_sym);
+
+mrb_value mrb_obj_iv_get(mrb_state*, struct RObject*, mrb_sym);
+void mrb_obj_iv_set(mrb_state*, struct RObject*, mrb_sym, mrb_value);
+const char * mrb_class2name(mrb_state *mrb, struct RClass* klass);
+void mrb_define_variable(mrb_state *mrb, const char *name, mrb_value *var);
+mrb_value mrb_iv_get(mrb_state *mrb, mrb_value obj, mrb_sym sym);
+void mrb_iv_set(mrb_state *mrb, mrb_value obj, mrb_sym sym, mrb_value v); /* mrb_iv_set */
+void mrb_copy_generic_ivar(mrb_value clone, mrb_value obj);
+int mrb_const_defined_at(mrb_state *mrb, struct RClass *klass, mrb_sym id);
+mrb_value mrb_f_global_variables(mrb_state *mrb, mrb_value self);
+mrb_value mrb_gv_get(mrb_state *mrb, mrb_sym sym);
+void mrb_gv_set(mrb_state *mrb, mrb_sym sym, mrb_value val);
+
+#endif /* MRUBY_VARIABLE_H */
diff --git a/src/version.c b/src/version.c
new file mode 100644
index 000000000..d69c6941c
--- /dev/null
+++ b/src/version.c
@@ -0,0 +1,87 @@
+/**********************************************************************
+
+ version.c -
+
+ $Author: knu $
+ $Date: 2008-05-31 22:37:06 +0900 (Sat, 31 May 2008) $
+ created at: Thu Sep 30 20:08:01 JST 1993
+
+ Copyright (C) 1993-2003 Yukihiro Matsumoto
+
+**********************************************************************/
+
+#include "mruby.h"
+#include "version.h"
+#include <stdio.h>
+#include "mruby/string.h"
+#include "variable.h"
+
+#define PRINT(type) puts(ruby_##type)
+//#define MKSTR(type) mrb_obj_freeze(mrb_str_new(ruby_##type, sizeof(ruby_##type)-1))
+#define MKSTR(type) mrb_str_new(mrb, ruby_##type, sizeof(ruby_##type)-1)
+
+const char ruby_version[] = RUBY_VERSION;
+const char ruby_release_date[] = RUBY_RELEASE_DATE;
+const char ruby_platform[] = RUBY_PLATFORM;
+const int ruby_patchlevel = RUBY_PATCHLEVEL;
+const char ruby_engine[] = RUBY_ENGINE;
+
+void
+Init_version(mrb_state *mrb)
+{
+ char description[128];
+ char copyright[128];
+ mrb_value v = MKSTR(version);
+ mrb_value d = MKSTR(release_date);
+ mrb_value p = MKSTR(platform);
+ mrb_value e = MKSTR(engine);
+ mrb_value tmp;
+
+ mrb_define_global_const(mrb, "RUBY_VERSION", v);
+ mrb_define_global_const(mrb, "RUBY_RELEASE_DATE", d);
+ mrb_define_global_const(mrb, "RUBY_PLATFORM", p);
+ mrb_define_global_const(mrb, "RUBY_PATCHLEVEL", mrb_fixnum_value(RUBY_PATCHLEVEL));
+ mrb_define_global_const(mrb, "RUBY_ENGINE", e);
+
+ snprintf(description, sizeof(description), "ruby %s (%s %s %d) [%s]",
+ RUBY_VERSION, RUBY_RELEASE_DATE, RUBY_RELEASE_STR,
+ RUBY_RELEASE_NUM, RUBY_PLATFORM);
+ //tmp = mrb_obj_freeze(mrb_str_new2(description));
+ tmp = mrb_str_new2(mrb, description);
+ mrb_define_global_const(mrb, "RUBY_DESCRIPTION", tmp);
+
+ snprintf(copyright, sizeof(copyright), "ruby - Copyright (C) %d-%d %s",
+ RUBY_BIRTH_YEAR, RUBY_RELEASE_YEAR, RUBY_AUTHOR);
+ //tmp = mrb_obj_freeze(mrb_str_new2(copyright));
+ tmp = mrb_str_new2(mrb, copyright);
+ mrb_define_global_const(mrb, "RUBY_COPYRIGHT", tmp);
+
+ /* obsolete constants */
+ mrb_define_global_const(mrb, "VERSION", v);
+ mrb_define_global_const(mrb, "RELEASE_DATE", d);
+ mrb_define_global_const(mrb, "PLATFORM", p);
+}
+
+void
+ruby_show_version(mrb_state *mrb)
+{
+ mrb_value v = mrb_const_get(mrb, mrb_obj_value(mrb->object_class), mrb_intern(mrb, "RUBY_DESCRIPTION"));
+
+ if (mrb_type(v) != MRB_TT_STRING)
+ return;
+
+ puts(RSTRING_PTR(v));
+ fflush(stdout);
+}
+
+void
+ruby_show_copyright(mrb_state *mrb)
+{
+ mrb_value v = mrb_const_get(mrb, mrb_obj_value(mrb->object_class), mrb_intern(mrb, "RUBY_COPYRIGHT"));
+
+ if (mrb_type(v) != MRB_TT_STRING)
+ return;
+
+ puts(RSTRING_PTR(v));
+ exit(0);
+}
diff --git a/src/version.h b/src/version.h
new file mode 100644
index 000000000..e132efdf4
--- /dev/null
+++ b/src/version.h
@@ -0,0 +1,32 @@
+#define RUBY_VERSION "1.8.7"
+#define RUBY_RELEASE_DATE "2010-08-16"
+#define RUBY_VERSION_CODE 187
+#define RUBY_RELEASE_CODE 20100816
+#define RUBY_PATCHLEVEL 302
+
+#define RUBY_VERSION_MAJOR 1
+#define RUBY_VERSION_MINOR 8
+#define RUBY_VERSION_TEENY 7
+#define RUBY_RELEASE_YEAR 2010
+#define RUBY_RELEASE_MONTH 8
+#define RUBY_RELEASE_DAY 16
+
+#ifdef RUBY_EXTERN
+RUBY_EXTERN const char ruby_version[];
+RUBY_EXTERN const char ruby_release_date[];
+RUBY_EXTERN const char ruby_platform[];
+RUBY_EXTERN const int ruby_patchlevel;
+RUBY_EXTERN const char *ruby_description;
+RUBY_EXTERN const char *ruby_copyright;
+#endif
+
+#define RUBY_AUTHOR "Yukihiro Matsumoto"
+#define RUBY_BIRTH_YEAR 1993
+#define RUBY_BIRTH_MONTH 2
+#define RUBY_BIRTH_DAY 24
+
+#define RUBY_RELEASE_STR "patchlevel"
+#define RUBY_RELEASE_NUM RUBY_PATCHLEVEL
+
+#define RUBY_PLATFORM "i386-mingw32"
+#define RUBY_ENGINE "ruby"
diff --git a/src/vm.c b/src/vm.c
new file mode 100644
index 000000000..6983fd86f
--- /dev/null
+++ b/src/vm.c
@@ -0,0 +1,1544 @@
+#include "mruby.h"
+#include "opcode.h"
+#include "irep.h"
+#include "variable.h"
+#include "mruby/proc.h"
+#include "mruby/array.h"
+#include "mruby/string.h"
+#include "mruby/hash.h"
+#include "mruby/range.h"
+#include "mruby/class.h"
+#include "mruby/numeric.h"
+#include "error.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <setjmp.h>
+
+#define STACK_INIT_SIZE 128
+#define CALLINFO_INIT_SIZE 32
+
+static void
+stack_init(mrb_state *mrb)
+{
+ /* assert(mrb->stack == NULL); */
+ mrb->stbase = mrb_malloc(mrb, sizeof(mrb_value) * STACK_INIT_SIZE);
+ memset(mrb->stbase, 0, sizeof(mrb_value) * STACK_INIT_SIZE);
+ mrb->stend = mrb->stbase + STACK_INIT_SIZE;
+ mrb->stack = mrb->stbase;
+
+ /* assert(mrb->ci == NULL); */
+ mrb->cibase = mrb_malloc(mrb, sizeof(mrb_callinfo)*CALLINFO_INIT_SIZE);
+ mrb->ciend = mrb->cibase + CALLINFO_INIT_SIZE;
+ mrb->ci = mrb->cibase;
+ memset(mrb->ci, 0, sizeof(mrb_callinfo));
+ mrb->ci->target_class = mrb->object_class;
+}
+
+static void
+stack_extend(mrb_state *mrb, int room, int keep)
+{
+ size_t size, off;
+
+ if (mrb->stack + room > mrb->stend) {
+ size = mrb->stend - mrb->stbase;
+ off = mrb->stack - mrb->stbase;
+
+ if (room <= size) /* double size is enough? */
+ size *= 2;
+ else
+ size += room;
+ mrb->stbase = mrb_realloc(mrb, mrb->stbase, sizeof(mrb_value) * size);
+ mrb->stack = mrb->stbase + off;
+ mrb->stend = mrb->stbase + size;
+ }
+ if (room > keep) {
+ memset(mrb->stack+keep, 0, sizeof(mrb_value) * (room-keep));
+ }
+}
+
+int
+mrb_checkstack(mrb_state *mrb, int size)
+{
+ stack_extend(mrb, size+1, 1);
+ return 0;
+}
+
+struct REnv*
+uvenv(mrb_state *mrb, int up)
+{
+ struct REnv *e = mrb->ci->proc->env;
+
+ if (!e) return 0;
+ while (up--) {
+ e = (struct REnv*)e->c;
+ }
+ return e;
+}
+
+static mrb_value
+uvget(mrb_state *mrb, int up, int idx)
+{
+ struct REnv *e = uvenv(mrb, up);
+
+ if (!e) return mrb_nil_value();
+ return e->stack[idx];
+}
+
+static void
+uvset(mrb_state *mrb, int up, int idx, mrb_value v)
+{
+ struct REnv *e = uvenv(mrb, up);
+
+ if (!e) return;
+ e->stack[idx] = v;
+ mrb_write_barrier(mrb, (struct RBasic*)e);
+}
+
+static mrb_callinfo*
+cipush(mrb_state *mrb)
+{
+ size_t nregs = mrb->ci->nregs;
+ int eidx = mrb->ci->eidx;
+ int ridx = mrb->ci->ridx;
+
+ if (mrb->ci + 1 == mrb->ciend) {
+ size_t size = mrb->ci - mrb->cibase;
+
+ mrb->cibase = mrb_realloc(mrb, mrb->cibase, sizeof(mrb_callinfo)*size*2);
+ mrb->ci = mrb->cibase + size;
+ mrb->ciend = mrb->cibase + size * 2;
+ }
+ mrb->ci++;
+ mrb->ci->nregs = nregs;
+ mrb->ci->eidx = eidx;
+ mrb->ci->ridx = ridx;
+ mrb->ci->env = 0;
+ return mrb->ci;
+}
+
+static void
+cipop(mrb_state *mrb)
+{
+ mrb->ci--;
+}
+
+static void
+ecall(mrb_state *mrb, int i)
+{
+ struct RProc *p;
+ mrb_callinfo *ci;
+ mrb_value *self = mrb->stack;
+
+ p = mrb->ensure[i];
+ ci = cipush(mrb);
+ ci->stackidx = mrb->stack - mrb->stbase;
+ ci->mid = ci[-1].mid;
+ ci->acc = -1;
+ ci->argc = 0;
+ ci->proc = p;
+ ci->nregs = p->body.irep->nregs;
+ ci->target_class = p->target_class;
+ mrb->stack = mrb->stack + ci[-1].nregs;
+ mrb_run(mrb, p, *self);
+}
+
+mrb_value
+mrb_funcall_with_block(mrb_state *mrb, mrb_value self, const char *name, int argc, mrb_value *argv, struct RProc *blk)
+{
+ struct RProc *p;
+ struct RClass *c;
+ mrb_sym mid = mrb_intern(mrb, name);
+ mrb_sym undef = 0;
+ mrb_callinfo *ci;
+ int n = mrb->ci->nregs;
+ mrb_value val;
+
+ c = mrb_class(mrb, self);
+ p = mrb_method_search_vm(mrb, &c, mid);
+ if (!p) {
+ undef = mid;
+ mid = mrb_intern(mrb, "method_missing");
+ p = mrb_method_search_vm(mrb, &c, mid);
+ n++; argc++;
+ }
+ ci = cipush(mrb);
+ ci->mid = mid;
+ ci->proc = p;
+ ci->stackidx = mrb->stack - mrb->stbase;
+ ci->argc = argc;
+ ci->target_class = p->target_class;
+ ci->nregs = argc + 2;
+ ci->acc = -1;
+ mrb->stack = mrb->stack + n;
+
+ stack_extend(mrb, ci->nregs, 0);
+ mrb->stack[0] = self;
+ if (undef) {
+ mrb->stack[1] = mrb_symbol_value(undef);
+ memcpy(mrb->stack+2, argv, sizeof(mrb_value)*(argc-1));
+ }
+ else if (argc > 0) {
+ memcpy(mrb->stack+1, argv, sizeof(mrb_value)*argc);
+ }
+ if (!blk) {
+ mrb->stack[argc+1] = mrb_nil_value();
+ }
+ else {
+ mrb->stack[argc+1] = mrb_obj_value(blk);
+ }
+
+ if (MRB_PROC_CFUNC_P(p)) {
+ val = p->body.func(mrb, self);
+ mrb->stack = mrb->stbase + ci->stackidx;
+ cipop(mrb);
+ }
+ else {
+ val = mrb_run(mrb, p, self);
+ }
+ return val;
+}
+
+mrb_value
+mrb_funcall_argv(mrb_state *mrb, mrb_value self, const char *name, int argc, mrb_value *argv)
+{
+ return mrb_funcall_with_block(mrb, self, name, argc, argv, 0);
+}
+
+mrb_value
+mrb_yield_with_self(mrb_state *mrb, mrb_value b, int argc, mrb_value *argv, mrb_value self)
+{
+ struct RProc *p;
+ mrb_sym mid = mrb->ci->mid;
+ mrb_callinfo *ci;
+ int n = mrb->ci->nregs;
+ mrb_value val;
+
+ p = mrb_proc_ptr(b);
+ ci = cipush(mrb);
+ ci->mid = mid;
+ ci->proc = p;
+ ci->stackidx = mrb->stack - mrb->stbase;
+ ci->argc = argc;
+ ci->target_class = p->target_class;
+ ci->nregs = argc + 2;
+ ci->acc = -1;
+ mrb->stack = mrb->stack + n;
+
+ stack_extend(mrb, ci->nregs, 0);
+ mrb->stack[0] = self;
+ if (argc > 0) {
+ memcpy(mrb->stack+1, argv, sizeof(mrb_value)*argc);
+ }
+ mrb->stack[argc+1] = mrb_nil_value();
+
+ if (MRB_PROC_CFUNC_P(p)) {
+ val = p->body.func(mrb, self);
+ mrb->stack = mrb->stbase + ci->stackidx;
+ cipop(mrb);
+ }
+ else {
+ val = mrb_run(mrb, p, self);
+ }
+ return val;
+}
+
+mrb_value
+mrb_yield_argv(mrb_state *mrb, mrb_value b, int argc, mrb_value *argv)
+{
+ return mrb_yield_with_self(mrb, b, argc, argv, mrb->stack[0]);
+}
+
+mrb_value
+mrb_yield(mrb_state *mrb, mrb_value b, mrb_value v)
+{
+ return mrb_yield_with_self(mrb, b, 1, &v, mrb->stack[0]);
+}
+
+void
+localjump_error(mrb_state *mrb, const char *kind)
+{
+ char buf[256];
+ mrb_value exc;
+
+ snprintf(buf, 256, "unexpected %s", kind);
+ exc = mrb_exc_new(mrb, E_LOCALJUMP_ERROR, buf, sizeof(buf));
+ mrb->exc = mrb_object(exc);
+}
+
+#define SET_TRUE_VALUE(r) {\
+ (r).tt = MRB_TT_TRUE;\
+ (r).value.i = 1;\
+}
+
+#define SET_FALSE_VALUE(r) {\
+ (r).tt = MRB_TT_FALSE;\
+ (r).value.i = 1;\
+}
+
+#define SET_NIL_VALUE(r) { \
+ (r).tt = MRB_TT_FALSE;\
+ (r).value.p = 0;\
+}
+
+#define SET_INT_VALUE(r,n) {\
+ (r).tt = MRB_TT_FIXNUM;\
+ (r).value.i = (n);\
+}
+
+#define SET_FLOAT_VALUE(r,v) {\
+ (r).tt = MRB_TT_FLOAT;\
+ (r).value.f = (v);\
+}
+
+#define SET_SYM_VALUE(r,v) {\
+ (r).tt = MRB_TT_SYMBOL;\
+ (r).value.i = (v);\
+}
+
+#define SET_OBJ_VALUE(r,v) {\
+ (r).tt = (((struct RObject*)(v))->tt);\
+ (r).value.p = (void*)(v);\
+}
+
+#define DIRECT_THREADED
+#ifndef DIRECT_THREADED
+
+#define INIT_DISPACTH for (;;) { i = *pc; switch (GET_OPCODE(i)) {
+#define CASE(op) case op:
+#define NEXT mrb->arena_idx = ai; pc++; break
+#define JUMP break
+#define END_DISPACTH }}
+
+#else
+
+#define INIT_DISPACTH JUMP; return mrb_nil_value();
+#define CASE(op) L_ ## op:
+#define NEXT mrb->arena_idx = ai; i=*++pc; goto *optable[GET_OPCODE(i)]
+#define JUMP i=*pc; goto *optable[GET_OPCODE(i)]
+
+#define END_DISPACTH
+
+#endif
+
+mrb_value mrb_gv_val_get(mrb_state *mrb, mrb_sym sym);
+void mrb_gv_val_set(mrb_state *mrb, mrb_sym sym, mrb_value val);
+
+#define CALL_MAXARGS 127
+
+mrb_value
+mrb_run(mrb_state *mrb, struct RProc *proc, mrb_value self)
+{
+ /* assert(mrb_proc_cfunc_p(proc)) */
+ mrb_irep *irep = proc->body.irep;
+ mrb_code *pc = irep->iseq;
+ mrb_value *pool = irep->pool;
+ mrb_sym *syms = irep->syms;
+ mrb_value *regs;
+ mrb_code i;
+ int ai = mrb->arena_idx;
+ jmp_buf c_jmp;
+ jmp_buf *prev_jmp;
+
+#ifdef DIRECT_THREADED
+ static void *optable[] = {
+ &&L_OP_NOP, &&L_OP_MOVE,
+ &&L_OP_LOADL, &&L_OP_LOADI, &&L_OP_LOADSYM, &&L_OP_LOADNIL,
+ &&L_OP_LOADSELF, &&L_OP_LOADT, &&L_OP_LOADF,
+ &&L_OP_GETGLOBAL, &&L_OP_SETGLOBAL, &&L_OP_GETSPECIAL, &&L_OP_SETSPECIAL,
+ &&L_OP_GETIV, &&L_OP_SETIV, &&L_OP_GETCV, &&L_OP_SETCV,
+ &&L_OP_GETCONST, &&L_OP_SETCONST, &&L_OP_GETMCNST, &&L_OP_SETMCNST,
+ &&L_OP_GETUPVAR, &&L_OP_SETUPVAR,
+ &&L_OP_JMP, &&L_OP_JMPIF, &&L_OP_JMPNOT,
+ &&L_OP_ONERR, &&L_OP_RESCUE, &&L_OP_POPERR, &&L_OP_RAISE, &&L_OP_EPUSH, &&L_OP_EPOP,
+ &&L_OP_SEND, &&L_OP_FSEND, &&L_OP_VSEND,
+ &&L_OP_CALL, &&L_OP_SUPER, &&L_OP_ARGARY, &&L_OP_ENTER,
+ &&L_OP_KARG, &&L_OP_KDICT, &&L_OP_RETURN, &&L_OP_TAILCALL, &&L_OP_BLKPUSH,
+ &&L_OP_ADD, &&L_OP_ADDI, &&L_OP_SUB, &&L_OP_SUBI, &&L_OP_MUL, &&L_OP_DIV,
+ &&L_OP_EQ, &&L_OP_LT, &&L_OP_LE, &&L_OP_GT, &&L_OP_GE,
+ &&L_OP_ARRAY, &&L_OP_ARYCAT, &&L_OP_ARYPUSH, &&L_OP_AREF, &&L_OP_ASET, &&L_OP_APOST,
+ &&L_OP_STRING, &&L_OP_STRCAT, &&L_OP_HASH,
+ &&L_OP_LAMBDA, &&L_OP_RANGE, &&L_OP_OCLASS,
+ &&L_OP_CLASS, &&L_OP_MODULE, &&L_OP_EXEC,
+ &&L_OP_METHOD, &&L_OP_SCLASS, &&L_OP_TCLASS,
+ &&L_OP_DEBUG, &&L_OP_STOP, &&L_OP_ERR,
+ };
+#endif
+
+
+ if (setjmp(c_jmp) == 0) {
+ prev_jmp = mrb->jmp;
+ mrb->jmp = &c_jmp;
+ }
+ else {
+ goto L_RAISE;
+ }
+ if (!mrb->stack) {
+ stack_init(mrb);
+ }
+ mrb->ci->proc = proc;
+ mrb->ci->nregs = irep->nregs + 2;
+ regs = mrb->stack;
+
+ INIT_DISPACTH {
+ CASE(OP_NOP) {
+ /* do nothing */
+ NEXT;
+ }
+
+ CASE(OP_MOVE) {
+ /* A B R(A) := R(B) */
+#if 0
+ regs[GETARG_A(i)] = regs[GETARG_B(i)];
+#elif 1
+ int a = GETARG_A(i);
+ int b = GETARG_B(i);
+
+ regs[a].tt = regs[b].tt;
+ regs[a].value = regs[b].value;
+#else
+ memcpy(regs+GETARG_A(i), regs+GETARG_B(i), sizeof(mrb_value));
+#endif
+ NEXT;
+ }
+
+ CASE(OP_LOADL) {
+ /* A Bx R(A) := Pool(Bx) */
+ regs[GETARG_A(i)] = pool[GETARG_Bx(i)];
+ NEXT;
+ }
+
+ CASE(OP_LOADI) {
+ /* A Bx R(A) := sBx */
+ SET_INT_VALUE(regs[GETARG_A(i)], GETARG_sBx(i));
+ NEXT;
+ }
+
+ CASE(OP_LOADSYM) {
+ /* A B R(A) := Sym(B) */
+ SET_SYM_VALUE(regs[GETARG_A(i)], syms[GETARG_Bx(i)]);
+ NEXT;
+ }
+
+ CASE(OP_LOADNIL) {
+ /* A B R(A) := nil */
+ int a = GETARG_A(i);
+
+ SET_NIL_VALUE(regs[a]);
+ NEXT;
+ }
+
+ CASE(OP_LOADSELF) {
+ /* A R(A) := self */
+ regs[GETARG_A(i)] = mrb->stack[0];
+ NEXT;
+ }
+
+ CASE(OP_LOADT) {
+ /* A R(A) := true */
+ regs[GETARG_A(i)] = mrb_true_value();
+ NEXT;
+ }
+
+ CASE(OP_LOADF) {
+ /* A R(A) := false */
+ regs[GETARG_A(i)] = mrb_false_value();
+ NEXT;
+ }
+
+ CASE(OP_GETGLOBAL) {
+ /* A B R(A) := getglobal(Sym(B)) */
+ regs[GETARG_A(i)] = mrb_gv_get(mrb, syms[GETARG_Bx(i)]);
+ NEXT;
+ }
+
+ CASE(OP_SETGLOBAL) {
+ /* setglobal(Sym(b), R(A)) */
+ mrb_gv_set(mrb, syms[GETARG_Bx(i)], regs[GETARG_A(i)]);
+ NEXT;
+ }
+
+ CASE(OP_GETSPECIAL) {
+ /* A Bx R(A) := Special[Bx] */
+ regs[GETARG_A(i)] = mrb_vm_special_get(mrb, GETARG_Bx(i));
+ NEXT;
+ }
+
+ CASE(OP_SETSPECIAL) {
+ /* A Bx Special[Bx] := R(A) */
+ mrb_vm_special_set(mrb, GETARG_Bx(i), regs[GETARG_A(i)]);
+ NEXT;
+ }
+
+ CASE(OP_GETIV) {
+ /* A Bx R(A) := ivget(Bx) */
+ regs[GETARG_A(i)] = mrb_vm_iv_get(mrb, syms[GETARG_Bx(i)]);
+ NEXT;
+ }
+
+ CASE(OP_SETIV) {
+ /* ivset(Sym(B),R(A)) */
+ mrb_vm_iv_set(mrb, syms[GETARG_Bx(i)], regs[GETARG_A(i)]);
+ NEXT;
+ }
+
+ CASE(OP_GETCV) {
+ /* A B R(A) := ivget(Sym(B)) */
+ regs[GETARG_A(i)] = mrb_vm_cv_get(mrb, syms[GETARG_Bx(i)]);
+ NEXT;
+ }
+
+ CASE(OP_SETCV) {
+ /* ivset(Sym(B),R(A)) */
+ mrb_vm_cv_set(mrb, syms[GETARG_Bx(i)], regs[GETARG_A(i)]);
+ NEXT;
+ }
+
+ CASE(OP_GETCONST) {
+ /* A B R(A) := constget(Sym(B)) */
+ regs[GETARG_A(i)] = mrb_vm_const_get(mrb, syms[GETARG_Bx(i)]);
+ NEXT;
+ }
+
+ CASE(OP_SETCONST) {
+ /* A B constset(Sym(B),R(A)) */
+ mrb_vm_const_set(mrb, syms[GETARG_Bx(i)], regs[GETARG_A(i)]);
+ NEXT;
+ }
+
+ CASE(OP_GETMCNST) {
+ /* A B C R(A) := R(C)::Sym(B) */
+ int a = GETARG_A(i);
+
+ regs[a] = mrb_const_get(mrb, regs[a], syms[GETARG_Bx(i)]);
+ NEXT;
+ }
+
+ CASE(OP_SETMCNST) {
+ /* A B C R(A+1)::Sym(B) := R(A) */
+ int a = GETARG_A(i);
+
+ mrb_const_set(mrb, regs[a+1], syms[GETARG_Bx(i)], regs[a]);
+ NEXT;
+ }
+
+ CASE(OP_GETUPVAR) {
+ /* A B C R(A) := uvget(B,C) */
+ regs[GETARG_A(i)] = uvget(mrb, GETARG_C(i), GETARG_B(i));
+ NEXT;
+ }
+
+ CASE(OP_SETUPVAR) {
+ /* A B C uvset(B,C,R(A)) */
+ uvset(mrb, GETARG_C(i), GETARG_B(i), regs[GETARG_A(i)]);
+ NEXT;
+ }
+
+ CASE(OP_JMP) {
+ /* sBx pc+=sBx */
+ pc += GETARG_sBx(i);
+ JUMP;
+ }
+
+ CASE(OP_JMPIF) {
+ /* A sBx if R(A) pc+=sBx */
+ if (mrb_test(regs[GETARG_A(i)])) {
+ pc += GETARG_sBx(i);
+ JUMP;
+ }
+ NEXT;
+ }
+
+ CASE(OP_JMPNOT) {
+ /* A sBx if R(A) pc+=sBx */
+ if (!mrb_test(regs[GETARG_A(i)])) {
+ pc += GETARG_sBx(i);
+ JUMP;
+ }
+ NEXT;
+ }
+
+ CASE(OP_ONERR) {
+ /* sBx pc+=sBx on exception */
+ if (mrb->rsize <= mrb->ci->ridx) {
+ if (mrb->rsize == 0) mrb->rsize = 16;
+ else mrb->rsize *= 2;
+ mrb->rescue = mrb_realloc(mrb, mrb->rescue, sizeof(mrb_code*) * mrb->rsize);
+ }
+ mrb->rescue[mrb->ci->ridx++] = pc + GETARG_sBx(i);
+ NEXT;
+ }
+
+ CASE(OP_RESCUE) {
+ /* A R(A) := exc; clear(exc) */
+ SET_OBJ_VALUE(regs[GETARG_A(i)],mrb->exc);
+ mrb->exc = 0;
+ NEXT;
+ }
+
+ CASE(OP_POPERR) {
+ int a = GETARG_A(i);
+
+ while (a--) {
+ mrb->ci->ridx--;
+ }
+ NEXT;
+ }
+
+ CASE(OP_RAISE) {
+ /* A raise(R(A)) */
+ mrb->exc = mrb_object(regs[GETARG_A(i)]);
+ goto L_RAISE;
+ }
+
+ CASE(OP_EPUSH) {
+ /* Bx ensure_push(SEQ[Bx]) */
+ struct RProc *p;
+
+ p = mrb_closure_new(mrb, mrb->irep[irep->idx+GETARG_Bx(i)]);
+ /* push ensure_stack */
+ if (mrb->esize <= mrb->ci->eidx) {
+ if (mrb->esize == 0) mrb->esize = 16;
+ else mrb->esize *= 2;
+ mrb->ensure = mrb_realloc(mrb, mrb->ensure, sizeof(struct RProc*) * mrb->esize);
+ }
+ mrb->ensure[mrb->ci->eidx++] = p;
+ NEXT;
+ }
+
+ CASE(OP_EPOP) {
+ /* A A.times{ensure_pop().call} */
+ int n;
+ int a = GETARG_A(i);
+
+ for (n=0; n<a; n++) {
+ ecall(mrb, --mrb->ci->eidx);
+ }
+ NEXT;
+ }
+
+ L_SEND:
+ CASE(OP_SEND) {
+ /* A B C R(A) := call(R(A),Sym(B),R(A+1),... ,R(A+C-1)) */
+ int a = GETARG_A(i);
+ int n = GETARG_C(i);
+ struct RProc *m;
+ struct RClass *c;
+ mrb_callinfo *ci;
+ mrb_value recv;
+ mrb_sym mid = syms[GETARG_B(i)];
+
+ recv = regs[a];
+ c = mrb_class(mrb, recv);
+ m = mrb_method_search_vm(mrb, &c, mid);
+ if (!m) {
+ mrb_value sym = mrb_symbol_value(mid);
+
+ mid = mrb_intern(mrb, "method_missing");
+ m = mrb_method_search_vm(mrb, &c, mid);
+ if (n == CALL_MAXARGS) {
+ mrb_ary_unshift(mrb, regs[a+1], sym);
+ }
+ else {
+ memmove(regs+a+2, regs+a+1, sizeof(mrb_value)*(n+1));
+ regs[a+1] = sym;
+ n++;
+ }
+ }
+
+ /* push callinfo */
+ ci = cipush(mrb);
+ ci->mid = mid;
+ ci->proc = m;
+ ci->stackidx = mrb->stack - mrb->stbase;
+ ci->argc = n;
+ if (ci->argc == CALL_MAXARGS) ci->argc = -1;
+ ci->target_class = m->target_class;
+ ci->pc = pc + 1;
+
+ /* prepare stack */
+ mrb->stack += a;
+
+ if (MRB_PROC_CFUNC_P(m)) {
+ mrb->stack[0] = m->body.func(mrb, recv);
+ mrb->arena_idx = ai;
+ if (mrb->exc) goto L_RAISE;
+ /* pop stackpos */
+ mrb->stack = mrb->stbase + ci->stackidx;
+ cipop(mrb);
+ NEXT;
+ }
+ else {
+ /* fill callinfo */
+ ci->acc = a;
+
+ /* setup environment for calling method */
+ proc = mrb->ci->proc = m;
+ irep = m->body.irep;
+ pool = irep->pool;
+ syms = irep->syms;
+ ci->nregs = irep->nregs;
+ if (ci->argc < 0) {
+ stack_extend(mrb, (irep->nregs < 3) ? 3 : irep->nregs, 3);
+ }
+ else {
+ stack_extend(mrb, irep->nregs, ci->argc+2);
+ }
+ regs = mrb->stack;
+ pc = irep->iseq;
+ JUMP;
+ }
+ }
+
+ CASE(OP_FSEND) {
+ /* A B C R(A) := fcall(R(A),Sym(B),R(A+1),... ,R(A+C)) */
+ NEXT;
+ }
+
+ CASE(OP_VSEND) {
+ /* A B R(A) := vcall(R(A),Sym(B)) */
+ NEXT;
+ }
+
+ CASE(OP_CALL) {
+ /* A R(A) := self.call(frame.argc, frame.argv) */
+ mrb_callinfo *ci;
+ mrb_value recv = mrb->stack[0];
+ struct RProc *m = mrb_proc_ptr(recv);
+
+ /* replace callinfo */
+ ci = mrb->ci;
+ ci->target_class = m->target_class;
+ ci->proc = m;
+ if (m->env) {
+ ci->mid = m->env->mid;
+ if (!m->env->stack) {
+ m->env->stack = mrb->stack;
+ }
+ }
+
+ /* prepare stack */
+ if (MRB_PROC_CFUNC_P(m)) {
+ mrb->stack[0] = m->body.func(mrb, recv);
+ mrb->arena_idx = ai;
+ if (mrb->exc) goto L_RAISE;
+ /* pop stackpos */
+ regs = mrb->stack = mrb->stbase + ci->stackidx;
+ cipop(mrb);
+ NEXT;
+ }
+ else {
+ /* setup environment for calling method */
+ proc = m;
+ irep = m->body.irep;
+ pool = irep->pool;
+ syms = irep->syms;
+ ci->nregs = irep->nregs;
+ if (ci->argc < 0) {
+ stack_extend(mrb, (irep->nregs < 3) ? 3 : irep->nregs, 3);
+ }
+ else {
+ stack_extend(mrb, irep->nregs, ci->argc+2);
+ }
+ regs = mrb->stack;
+ regs[0] = m->env->stack[0];
+ pc = m->body.irep->iseq;
+ JUMP;
+ }
+ }
+
+ CASE(OP_SUPER) {
+ /* A B C R(A) := super(R(A+1),... ,R(A+C-1)) */
+ mrb_value recv;
+ mrb_callinfo *ci = mrb->ci;
+ struct RProc *m;
+ struct RClass *c;
+ mrb_sym mid = ci->mid;
+ int a = GETARG_A(i);
+ int n = GETARG_C(i);
+
+ recv = regs[0];
+ c = mrb->ci->proc->target_class->super;
+ m = mrb_method_search_vm(mrb, &c, mid);
+ if (!m) {
+ c = mrb->ci->proc->target_class;
+ mid = mrb_intern(mrb, "method_missing");
+ m = mrb_method_search_vm(mrb, &c, mid);
+ if (n == CALL_MAXARGS) {
+ mrb_ary_unshift(mrb, regs[a+1], mrb_symbol_value(ci->mid));
+ }
+ else {
+ memmove(regs+a+2, regs+a+1, sizeof(mrb_value)*(n+1));
+ regs[a+1] = mrb_symbol_value(ci->mid);
+ n++;
+ }
+ }
+
+ /* push callinfo */
+ ci = cipush(mrb);
+ ci->mid = mid;
+ ci->proc = m;
+ ci->stackidx = mrb->stack - mrb->stbase;
+ ci->argc = n;
+ if (ci->argc == CALL_MAXARGS) ci->argc = -1;
+ ci->target_class = m->target_class;
+ ci->pc = pc + 1;
+
+ /* prepare stack */
+ mrb->stack += a;
+ mrb->stack[0] = recv;
+
+ if (MRB_PROC_CFUNC_P(m)) {
+ mrb->stack[0] = m->body.func(mrb, recv);
+ mrb->arena_idx = ai;
+ if (mrb->exc) goto L_RAISE;
+ /* pop stackpos */
+ mrb->stack = mrb->stbase + ci->stackidx;
+ cipop(mrb);
+ NEXT;
+ }
+ else {
+ /* fill callinfo */
+ ci->acc = a;
+
+ /* setup environment for calling method */
+ ci->proc = m;
+ irep = m->body.irep;
+ pool = irep->pool;
+ syms = irep->syms;
+ ci->nregs = irep->nregs;
+ if (ci->argc < 0) {
+ stack_extend(mrb, (irep->nregs < 3) ? 3 : irep->nregs, 3);
+ }
+ else {
+ stack_extend(mrb, irep->nregs, ci->argc+2);
+ }
+ regs = mrb->stack;
+ pc = irep->iseq;
+ JUMP;
+ }
+ }
+
+ CASE(OP_ARGARY) {
+ /* A Bx R(A) := argument array (16=6:1:5:4) */
+ int a = GETARG_A(i);
+ int bx = GETARG_Bx(i);
+ int m1 = (bx>>10)&0x3f;
+ int r = (bx>>9)&0x1;
+ int m2 = (bx>>4)&0x1f;
+ int lv = (bx>>0)&0xf;
+ mrb_value *stack;
+
+ if (lv == 0) stack = regs + 1;
+ else {
+ struct REnv *e = uvenv(mrb, lv-1);
+ stack = e->stack + 1;
+ }
+ if (r == 0) {
+ regs[a] = mrb_ary_new_elts(mrb, m1+m2, stack);
+ }
+ else {
+ mrb_value *pp;
+ struct RArray *rest;
+ int len = 0;
+
+ if (stack[m1].tt == MRB_TT_ARRAY) {
+ struct RArray *ary = mrb_ary_ptr(stack[m1]);
+
+ pp = ary->buf;
+ len = ary->len;
+ }
+ regs[a] = mrb_ary_new_capa(mrb, m1+len+m2);
+ rest = mrb_ary_ptr(regs[a]);
+ memcpy(rest->buf, stack, sizeof(mrb_value)*m1);
+ if (len > 0) {
+ memcpy(rest->buf+m1, pp, sizeof(mrb_value)*len);
+ }
+ if (m2 > 0) {
+ memcpy(rest->buf+m1+len, stack+m1+1, sizeof(mrb_value)*m2);
+ }
+ rest->len = m1+len+m2;
+ }
+ regs[a+1] = stack[m1+r+m2];
+ NEXT;
+ }
+
+ CASE(OP_ENTER) {
+ /* Ax arg setup according to flags (24=5:5:1:5:5:1:1) */
+ /* number of optional arguments times OP_JMP should follow */
+ int ax = GETARG_Ax(i);
+ int m1 = (ax>>18)&0x1f;
+ int o = (ax>>13)&0x1f;
+ int r = (ax>>12)&0x1;
+ int m2 = (ax>>7)&0x1f;
+ int k = (ax>>2)&0x1f;
+ int kd = (ax>>1)&0x1;
+ int b = (ax>>0)& 0x1;
+ int argc = mrb->ci->argc;
+ mrb_value *argv = regs+1;
+ int len = m1 + o + r + m2;
+
+ if (argc < 0) {
+ struct RArray *ary = mrb_ary_ptr(regs[1]);
+ argv = ary->buf;
+ argc = ary->len;
+ regs[len+2] = regs[1]; /* save argary in register */
+ }
+ if (mrb->ci->proc && MRB_PROC_STRICT_P(mrb->ci->proc)) {
+ if (argc >= 0) {
+ if (argc < m1 + m2 || (r == 0 && argc > len)) {
+ fprintf(stderr, "'%s': wrong number of arguments (%d for %d)\n",
+ mrb_sym2name(mrb, mrb->ci->mid),
+ mrb->ci->argc, m1+m2);
+ exit(1);
+ }
+ }
+ }
+ else if (len > 1 && argc == 1 && argv[0].tt == MRB_TT_ARRAY) {
+ argc = mrb_ary_ptr(argv[0])->len;
+ argv = mrb_ary_ptr(argv[0])->buf;
+ }
+ mrb->ci->argc = len;
+ if (argc < len) {
+ regs[len+1] = argv[argc]; /* move block */
+ memmove(&regs[1], argv, sizeof(mrb_value)*(argc-m2)); /* m1 + o */
+ memmove(&regs[len-m2+1], &argv[argc-m2], sizeof(mrb_value)*m2); /* m2 */
+ if (r) { /* r */
+ regs[m1+o+1] = mrb_ary_new_capa(mrb, 0);
+ }
+ pc += argc - m1 - m2 + 1;
+ }
+ else {
+ memmove(&regs[1], argv, sizeof(mrb_value)*(m1+o)); /* m1 + o */
+ if (r) { /* r */
+ regs[m1+o+1] = mrb_ary_new_elts(mrb, argc-m1-o-m2, argv+m1+o);
+ }
+ memmove(&regs[m1+o+r+1], &argv[argc-m2], sizeof(mrb_value)*m2);
+ regs[len+1] = argv[argc]; /* move block */
+ pc += o + 1;
+ }
+ JUMP;
+ }
+
+ CASE(OP_KARG) {
+ /* A B C R(A) := kdict[Sym(B)]; if C kdict.rm(Sym(B)) */
+ /* if C == 2; raise unless kdict.empty? */
+ /* OP_JMP should follow to skip init code */
+ NEXT;
+ }
+
+ CASE(OP_KDICT) {
+ /* A C R(A) := kdict */
+ NEXT;
+ }
+
+ CASE(OP_RETURN) {
+ /* A return R(A) */
+ L_RETURN:
+ if (mrb->ci->env) {
+ struct REnv *e = mrb->ci->env;
+ int len = (int)e->flags;
+ mrb_value *p = mrb_malloc(mrb, sizeof(mrb_value)*len);
+
+ e->cioff = -1;
+ memcpy(p, e->stack, sizeof(mrb_value)*len);
+ e->stack = p;
+ }
+
+ if (mrb->exc) {
+ mrb_callinfo *ci;
+ int ridx;
+
+ L_RAISE:
+ ci = mrb->ci;
+ if (ci == mrb->cibase) goto L_STOP;
+ while (ci[0].ridx == ci[-1].ridx) {
+ cipop(mrb);
+ ci = mrb->ci;
+ if (ci == mrb->cibase) {
+ if (ci->ridx == 0) goto L_STOP;
+ break;
+ }
+ }
+ irep = ci->proc->body.irep;
+ pool = irep->pool;
+ syms = irep->syms;
+ regs = mrb->stack = mrb->stbase + ci->stackidx;
+ pc = mrb->rescue[--ci->ridx];
+ }
+ else {
+ mrb_callinfo *ci = mrb->ci;
+ int acc, eidx = mrb->ci->eidx;
+ mrb_value v = regs[GETARG_A(i)];
+
+ switch (GETARG_B(i)) {
+ case OP_R_NORMAL:
+ ci = mrb->ci;
+ break;
+ case OP_R_BREAK:
+ if (proc->env->cioff < 0) {
+ localjump_error(mrb, "break");
+ goto L_RAISE;
+ }
+ ci = mrb->ci = mrb->cibase + proc->env->cioff + 1;
+ break;
+ case OP_R_RETURN:
+ if (proc->env->cioff < 0) {
+ localjump_error(mrb, "return");
+ }
+ ci = mrb->ci = mrb->cibase + proc->env->cioff;
+ break;
+ default:
+ /* cannot happen */
+ break;
+ }
+ cipop(mrb);
+ acc = ci->acc;
+ pc = ci->pc;
+ regs = mrb->stack = mrb->stbase + ci->stackidx;
+ while (eidx > mrb->ci->eidx) {
+ ecall(mrb, --eidx);
+ }
+ if (acc < 0) {
+ mrb->jmp = prev_jmp;
+ return v;
+ }
+ DEBUG(printf("from :%s\n", mrb_sym2name(mrb, ci->mid)));
+ proc = mrb->ci->proc;
+ irep = proc->body.irep;
+ pool = irep->pool;
+ syms = irep->syms;
+
+ regs[acc] = v;
+ }
+ JUMP;
+ }
+
+ CASE(OP_TAILCALL) {
+ /* A B C return call(R(A),Sym(B),R(A+1),... ,R(A+C-1)) */
+ int a = GETARG_A(i);
+ int n = GETARG_C(i);
+ struct RProc *m;
+ struct RClass *c;
+ mrb_callinfo *ci;
+ mrb_value recv;
+ mrb_sym mid = syms[GETARG_B(i)];
+
+ recv = regs[a];
+ c = mrb_class(mrb, recv);
+ m = mrb_method_search_vm(mrb, &c, mid);
+ if (!m) {
+ mrb_value sym = mrb_symbol_value(mid);
+
+ mid = mrb_intern(mrb, "method_missing");
+ m = mrb_method_search_vm(mrb, &c, mid);
+ if (n == CALL_MAXARGS) {
+ mrb_ary_unshift(mrb, regs[a+1], sym);
+ }
+ else {
+ memmove(regs+a+2, regs+a+1, sizeof(mrb_value)*(n+1));
+ regs[a+1] = sym;
+ n++;
+ }
+ }
+
+
+ /* replace callinfo */
+ mrb->ci = ci = &mrb->ci[-1];
+ ci->mid = mid;
+ ci->target_class = m->target_class;
+ ci->argc = n;
+ if (ci->argc == CALL_MAXARGS) ci->argc = -1;
+
+ /* move stack */
+ memmove(mrb->stack, &regs[a], (ci->argc+1)*sizeof(mrb_value));
+
+ if (MRB_PROC_CFUNC_P(m)) {
+ mrb->stack[0] = m->body.func(mrb, recv);
+ mrb->arena_idx = ai;
+ goto L_RETURN;
+ }
+ else {
+ /* setup environment for calling method */
+ irep = m->body.irep;
+ pool = irep->pool;
+ syms = irep->syms;
+ if (ci->argc < 0) {
+ stack_extend(mrb, (irep->nregs < 3) ? 3 : irep->nregs, 3);
+ }
+ else {
+ stack_extend(mrb, irep->nregs, ci->argc+2);
+ }
+ regs = mrb->stack;
+ pc = irep->iseq;
+ }
+ JUMP;
+ }
+
+ CASE(OP_BLKPUSH) {
+ /* A Bx R(A) := block (16=6:1:5:4) */
+ int a = GETARG_A(i);
+ int bx = GETARG_Bx(i);
+ int m1 = (bx>>10)&0x3f;
+ int r = (bx>>9)&0x1;
+ int m2 = (bx>>4)&0x1f;
+ int lv = (bx>>0)&0xf;
+ mrb_value *stack;
+
+ if (lv == 0) stack = regs + 1;
+ else {
+ struct REnv *e = uvenv(mrb, lv-1);
+ stack = e->stack + 1;
+ }
+ regs[a] = stack[m1+r+m2];
+ NEXT;
+ }
+
+#define TYPES2(a,b) (((((int)(a))<<8)|((int)(b)))&0xffff)
+#define OP_MATH_BODY(op,v1,v2) do {\
+ regs[a].value.v1 = regs[a].value.v1 op regs[a+1].value.v2;\
+} while(0)
+
+#define OP_MATH(op) do {\
+ int a = GETARG_A(i);\
+ /* need to check if - is overridden */\
+ switch (TYPES2(mrb_type(regs[a]),mrb_type(regs[a+1]))) {\
+ case TYPES2(MRB_TT_FIXNUM,MRB_TT_FIXNUM):\
+ OP_MATH_BODY(op,i,i); \
+ break;\
+ case TYPES2(MRB_TT_FIXNUM,MRB_TT_FLOAT):\
+ {\
+ mrb_int x = regs[a].value.i;\
+ mrb_float y = regs[a+1].value.f;\
+ SET_FLOAT_VALUE(regs[a], (mrb_float)x op y);\
+ }\
+ break;\
+ case TYPES2(MRB_TT_FLOAT,MRB_TT_FIXNUM):\
+ OP_MATH_BODY(op,f,i);\
+ break;\
+ case TYPES2(MRB_TT_FLOAT,MRB_TT_FLOAT):\
+ OP_MATH_BODY(op,f,f);\
+ break;\
+ default:\
+ i = MKOP_ABC(OP_SEND, a, GETARG_B(i), GETARG_C(i));\
+ goto L_SEND;\
+ }\
+} while (0)
+
+ CASE(OP_ADD) {
+ /* A B C R(A) := R(A)+R(A+1) (Syms[B]=:+,C=1)*/
+ int a = GETARG_A(i);
+
+ switch (TYPES2(mrb_type(regs[a]),mrb_type(regs[a+1]))) {
+ case TYPES2(MRB_TT_FIXNUM,MRB_TT_FIXNUM):
+ OP_MATH_BODY(+,i,i);
+ break;
+ case TYPES2(MRB_TT_FIXNUM,MRB_TT_FLOAT):
+ {
+ mrb_int x = regs[a].value.i;
+ mrb_float y = regs[a+1].value.f;
+ SET_FLOAT_VALUE(regs[a], (mrb_float)x + y);
+ }
+ break;
+ case TYPES2(MRB_TT_FLOAT,MRB_TT_FIXNUM):
+ OP_MATH_BODY(+,f,i);
+ break;
+ case TYPES2(MRB_TT_FLOAT,MRB_TT_FLOAT):
+ OP_MATH_BODY(+,f,f);
+ break;
+ case TYPES2(MRB_TT_STRING,MRB_TT_STRING):
+ regs[a] = mrb_str_plus(mrb, regs[a], regs[a+1]);
+ break;
+ default:
+ i = MKOP_ABC(OP_SEND, a, GETARG_B(i), GETARG_C(i));
+ goto L_SEND;
+ }
+ NEXT;
+ }
+
+ CASE(OP_SUB) {
+ /* A B C R(A) := R(A)-R(A+1) (Syms[B]=:-,C=1)*/
+ OP_MATH(-);
+ NEXT;
+ }
+
+ CASE(OP_MUL) {
+ /* A B C R(A) := R(A)*R(A+1) (Syms[B]=:*,C=1)*/
+ OP_MATH(*);
+ NEXT;
+ }
+
+ CASE(OP_DIV) {
+ /* A B C R(A) := R(A)/R(A+1) (Syms[B]=:/,C=1)*/
+ OP_MATH(/);
+ NEXT;
+ }
+
+ CASE(OP_ADDI) {
+ /* A B C R(A) := R(A)+C (Syms[B]=:+)*/
+ int a = GETARG_A(i);
+
+ /* need to check if + is overridden */
+ switch (mrb_type(regs[a])) {
+ case MRB_TT_FIXNUM:
+ regs[a].value.i += GETARG_C(i);
+ break;
+ case MRB_TT_FLOAT:
+ regs[a].value.f += GETARG_C(i);
+ break;
+ default:
+ SET_INT_VALUE(regs[a+1], GETARG_C(i));
+ i = MKOP_ABC(OP_SEND, a, GETARG_B(i), 1);
+ goto L_SEND;
+ }
+ NEXT;
+ }
+
+ CASE(OP_SUBI) {
+ /* A B C R(A) := R(A)-C (Syms[B]=:+)*/
+ int a = GETARG_A(i);
+
+ /* need to check if + is overridden */
+ switch (mrb_type(regs[a])) {
+ case MRB_TT_FIXNUM:
+ regs[a].value.i -= GETARG_C(i);
+ break;
+ case MRB_TT_FLOAT:
+ regs[a].value.f -= GETARG_C(i);
+ break;
+ default:
+ SET_INT_VALUE(regs[a+1], GETARG_C(i));
+ i = MKOP_ABC(OP_SEND, a, GETARG_B(i), 1);
+ goto L_SEND;
+ }
+ NEXT;
+ }
+
+#define OP_CMP_BODY(op,v1,v2) do {\
+ if (regs[a].value.v1 op regs[a+1].value.v2) {\
+ SET_TRUE_VALUE(regs[a]);\
+ }\
+ else {\
+ SET_FALSE_VALUE(regs[a]);\
+ }\
+} while(0)
+
+#define OP_CMP(op) do {\
+ int a = GETARG_A(i);\
+ /* need to check if - is overridden */\
+ switch (TYPES2(mrb_type(regs[a]),mrb_type(regs[a+1]))) {\
+ case TYPES2(MRB_TT_FIXNUM,MRB_TT_FIXNUM):\
+ OP_CMP_BODY(op,i,i); \
+ break;\
+ case TYPES2(MRB_TT_FIXNUM,MRB_TT_FLOAT):\
+ OP_CMP_BODY(op,i,f);\
+ break;\
+ case TYPES2(MRB_TT_FLOAT,MRB_TT_FIXNUM):\
+ OP_CMP_BODY(op,f,i);\
+ break;\
+ case TYPES2(MRB_TT_FLOAT,MRB_TT_FLOAT):\
+ OP_CMP_BODY(op,f,f);\
+ break;\
+ default:\
+ i = MKOP_ABC(OP_SEND, a, GETARG_B(i), GETARG_C(i));\
+ goto L_SEND;\
+ }\
+} while (0)
+
+ CASE(OP_EQ) {
+ /* A B C R(A) := R(A)<R(A+1) (Syms[B]=:<,C=1)*/
+ OP_CMP(==);
+ NEXT;
+ }
+
+ CASE(OP_LT) {
+ /* A B C R(A) := R(A)<R(A+1) (Syms[B]=:<,C=1)*/
+ OP_CMP(<);
+ NEXT;
+ }
+
+ CASE(OP_LE) {
+ /* A B C R(A) := R(A)<R(A+1) (Syms[B]=:<,C=1)*/
+ OP_CMP(<=);
+ NEXT;
+ }
+
+ CASE(OP_GT) {
+ /* A B C R(A) := R(A)<R(A+1) (Syms[B]=:<,C=1)*/
+ OP_CMP(>);
+ NEXT;
+ }
+
+ CASE(OP_GE) {
+ /* A B C R(A) := R(A)<R(A+1) (Syms[B]=:<,C=1)*/
+ OP_CMP(>=);
+ NEXT;
+ }
+
+ CASE(OP_ARRAY) {
+ /* A B C R(A) := ary_new(R(B),R(B+1)..R(B+C)) */
+ int b = GETARG_B(i);
+ int lim = b+GETARG_C(i);
+ mrb_value ary = mrb_ary_new_capa(mrb, GETARG_C(i));
+
+ while (b < lim) {
+ mrb_ary_push(mrb, ary, regs[b++]);
+ }
+ regs[GETARG_A(i)] = ary;
+ NEXT;
+ }
+
+ CASE(OP_ARYCAT) {
+ /* A B mrb_ary_concat(R(A),R(B)) */
+ mrb_ary_concat(mrb, regs[GETARG_A(i)],
+ mrb_ary_splat(mrb, regs[GETARG_B(i)]));
+ NEXT;
+ }
+
+ CASE(OP_ARYPUSH) {
+ /* A B R(A).push(R(B)) */
+ mrb_ary_push(mrb, regs[GETARG_A(i)], regs[GETARG_B(i)]);
+ NEXT;
+ }
+
+ CASE(OP_AREF) {
+ /* A B C R(A) := R(B)[C] */
+ int a = GETARG_A(i);
+ int c = GETARG_C(i);
+ mrb_value v = regs[GETARG_B(i)];
+
+ if (v.tt != MRB_TT_ARRAY) {
+ if (c == 0) {
+ regs[GETARG_A(i)] = v;
+ }
+ else {
+ SET_NIL_VALUE(regs[a]);
+ }
+ }
+ else {
+ regs[GETARG_A(i)] = mrb_ary_ref(mrb, v, c);
+ }
+ NEXT;
+ }
+
+ CASE(OP_ASET) {
+ /* A B C R(B)[C] := R(A) */
+ mrb_ary_set(mrb, regs[GETARG_B(i)], GETARG_C(i), regs[GETARG_A(i)]);
+ NEXT;
+ }
+
+ CASE(OP_APOST) {
+ /* A B C *R(A),R(A+1)..R(A+C) := R(A) */
+ int a = GETARG_A(i);
+ mrb_value v = regs[a];
+ int pre = GETARG_B(i);
+ int post = GETARG_C(i);
+
+ if (v.tt != MRB_TT_ARRAY) {
+ regs[a++] = mrb_ary_new_capa(mrb, 0);
+ while (post--) {
+ SET_NIL_VALUE(regs[a]);
+ a++;
+ }
+ }
+ else {
+ struct RArray *ary = mrb_ary_ptr(v);
+ size_t len = ary->len;
+ int i;
+
+ if (len > pre + post) {
+ regs[a++] = mrb_ary_new_elts(mrb, len - pre - post, ary->buf+pre);
+ while (post--) {
+ regs[a++] = ary->buf[len-post-1];
+ }
+ }
+ else {
+ regs[a++] = mrb_ary_new_capa(mrb, 0);
+ for (i=0; i+pre<len; i++) {
+ regs[a+i] = ary->buf[pre+i];
+ }
+ while (i < post) {
+ SET_NIL_VALUE(regs[a+i]);
+ i++;
+ }
+ }
+ }
+ NEXT;
+ }
+
+ CASE(OP_STRING) {
+ /* A Bx R(A) := str_new(Lit(Bx)) */
+ regs[GETARG_A(i)] = mrb_str_literal(mrb, pool[GETARG_Bx(i)]);
+ NEXT;
+ }
+
+ CASE(OP_STRCAT) {
+ /* A B R(A).concat(R(B)) */
+ mrb_str_concat(mrb, regs[GETARG_A(i)], regs[GETARG_B(i)]);
+ NEXT;
+ }
+
+ CASE(OP_HASH) {
+ /* A B C R(A) := hash_new(R(B),R(B+1)..R(B+C)) */
+ int b = GETARG_B(i);
+ int c = GETARG_C(i);
+ int lim = b+c*2;
+ mrb_value hash = mrb_hash_new_capa(mrb, c);
+
+ while (b < lim) {
+ mrb_hash_set(mrb, hash, regs[b], regs[b+1]);
+ b+=2;
+ }
+ regs[GETARG_A(i)] = hash;
+ NEXT;
+ }
+
+ CASE(OP_LAMBDA) {
+ /* A b c R(A) := lambda(SEQ[b],c) (b:c = 14:2) */
+ struct RProc *p;
+ int c = GETARG_c(i);
+
+ if (c & OP_L_CAPTURE) {
+ p = mrb_closure_new(mrb, mrb->irep[irep->idx+GETARG_b(i)]);
+ }
+ else {
+ p = mrb_proc_new(mrb, mrb->irep[irep->idx+GETARG_b(i)]);
+ }
+ if (c & OP_L_STRICT) p->flags |= MRB_PROC_STRICT;
+ regs[GETARG_A(i)] = mrb_obj_value(p);
+ NEXT;
+ }
+
+ CASE(OP_OCLASS) {
+ /* A R(A) := ::Object */
+ regs[GETARG_A(i)] = mrb_obj_value(mrb->object_class);
+ NEXT;
+ }
+
+ CASE(OP_CLASS) {
+ /* A B R(A) := newclass(R(A),Sym(B),R(A+1)) */
+ struct RClass *c = 0;
+ int a = GETARG_A(i);
+ mrb_value base, super;
+ mrb_sym id = syms[GETARG_B(i)];
+
+ base = regs[a];
+ super = regs[a+1];
+ if (mrb_nil_p(base)) {
+ base = mrb_obj_value(mrb->ci->target_class);
+ }
+ c = mrb_vm_define_class(mrb, base, super, id);
+ regs[a] = mrb_obj_value(c);
+ NEXT;
+ }
+
+ CASE(OP_MODULE) {
+ /* A B R(A) := newmodule(R(A),Sym(B)) */
+ struct RClass *c = 0;
+ int a = GETARG_A(i);
+ mrb_value base;
+ mrb_sym id = syms[GETARG_B(i)];
+
+ base = regs[a];
+ if (mrb_nil_p(base)) {
+ base = mrb_obj_value(mrb->ci->target_class);
+ }
+ c = mrb_vm_define_module(mrb, base, id);
+ regs[a] = mrb_obj_value(c);
+ NEXT;
+ }
+
+ CASE(OP_EXEC) {
+ /* A Bx R(A) := blockexec(R(A),SEQ[Bx]) */
+ int a = GETARG_A(i);
+ mrb_callinfo *ci;
+ mrb_value recv = regs[a];
+ struct RProc *p;
+
+ /* prepare stack */
+ ci = cipush(mrb);
+ ci->pc = pc + 1;
+ ci->acc = a;
+ ci->mid = 0;
+ ci->stackidx = mrb->stack - mrb->stbase;
+ ci->argc = 0;
+ ci->target_class = mrb_class_ptr(regs[GETARG_A(i)]);
+
+ p = mrb_proc_new(mrb, mrb->irep[irep->idx+GETARG_Bx(i)]);
+ p->target_class = ci->target_class;
+ ci->proc = p;
+
+ if (MRB_PROC_CFUNC_P(p)) {
+ mrb->stack[0] = p->body.func(mrb, recv);
+ mrb->arena_idx = ai;
+ if (mrb->exc) goto L_RAISE;
+ /* pop stackpos */
+ regs = mrb->stack = mrb->stbase + ci->stackidx;
+ cipop(mrb);
+ NEXT;
+ }
+ else {
+ /* setup environment for calling method */
+ irep = p->body.irep;
+ pool = irep->pool;
+ syms = irep->syms;
+ mrb->stack += a;
+ stack_extend(mrb, irep->nregs, 1);
+ regs = mrb->stack;
+ pc = irep->iseq;
+ JUMP;
+ }
+ }
+
+ CASE(OP_METHOD) {
+ /* A B R(A).newmethod(Sym(B),R(A+1)) */
+ int a = GETARG_A(i);
+ struct RClass *c = mrb_class_ptr(regs[a]);
+
+ mrb_define_method_vm(mrb, c, syms[GETARG_B(i)], regs[a+1]);
+ NEXT;
+ }
+
+ CASE(OP_SCLASS) {
+ /* A B R(A) := R(B).singleton_class */
+ regs[GETARG_A(i)] = mrb_singleton_class(mrb, regs[GETARG_B(i)]);
+ NEXT;
+ }
+
+ CASE(OP_TCLASS) {
+ /* A B R(A) := target_class */
+ regs[GETARG_A(i)] = mrb_obj_value(mrb->ci->target_class);
+ NEXT;
+ }
+
+ CASE(OP_RANGE) {
+ /* A B C R(A) := range_new(R(B),R(B+1),C) */
+ int b = GETARG_B(i);
+ regs[GETARG_A(i)] = mrb_range_new(mrb, regs[b], regs[b+1], GETARG_C(i));
+ NEXT;
+ }
+
+ CASE(OP_DEBUG) {
+ /* A debug print R(A),R(B),R(C) */
+ printf("OP_DEBUG %d %d %d\n", GETARG_A(i), GETARG_B(i), GETARG_C(i));
+ NEXT;
+ }
+
+ CASE(OP_STOP) {
+ /* stop VM */
+ L_STOP:
+ mrb->jmp = prev_jmp;
+ return mrb_nil_value();
+ }
+
+ CASE(OP_ERR) {
+ /* Bx raise RuntimeError with message Lit(Bx) */
+ mrb_value msg = pool[GETARG_Bx(i)];
+ mrb_value exc = mrb_exc_new3(mrb, mrb->eRuntimeError_class, msg);
+
+ mrb->exc = mrb_object(exc);
+ goto L_RAISE;
+ }
+ }
+ END_DISPACTH;
+}
diff --git a/src/vm_core.h b/src/vm_core.h
new file mode 100644
index 000000000..98da043a2
--- /dev/null
+++ b/src/vm_core.h
@@ -0,0 +1,414 @@
+/**********************************************************************
+
+ vm_core.h -
+
+ $Author: yugui $
+ created at: 04/01/01 19:41:38 JST
+
+ Copyright (C) 2004-2007 Koichi Sasada
+
+**********************************************************************/
+
+#ifndef RUBY_VM_CORE_H
+#define RUBY_VM_CORE_H
+
+#define RUBY_VM_THREAD_MODEL 2
+
+//#include "ruby/ruby.h"
+#include "st.h" /* define ANYARGS */
+
+//#include "node.h"
+//#include "debug.h"
+//#include "vm_opts.h"
+//#include "id.h"
+#include "method.h"
+
+#if defined(_WIN32)
+#include "thread_win32.h"
+#elif defined(HAVE_PTHREAD_H)
+#include "thread_pthread.h"
+#else
+#error "unsupported thread type"
+#endif
+
+#ifndef ENABLE_VM_OBJSPACE
+#ifdef _WIN32
+/*
+ * TODO: object space indenpendent st_table.
+ * socklist needs st_table in mrb_w32_sysinit(), before object space
+ * initialization.
+ * It is too early now to change st_hash_type, since it breaks binary
+ * compatibility.
+ */
+#define ENABLE_VM_OBJSPACE 0
+#else
+#define ENABLE_VM_OBJSPACE 1
+#endif
+#endif
+
+#include <setjmp.h>
+#include <signal.h>
+
+//#ifndef NSIG
+//# define NSIG (_SIGMAX + 1) /* For QNX */
+//#endif
+
+//#define RUBY_NSIG NSIG
+
+#ifdef HAVE_STDARG_PROTOTYPES
+#include <stdarg.h>
+#define va_init_list(a,b) va_start(a,b)
+#else
+#include <varargs.h>
+#define va_init_list(a,b) va_start(a)
+#endif
+
+#if defined(SIGSEGV) && defined(HAVE_SIGALTSTACK) && defined(SA_SIGINFO) && !defined(__NetBSD__)
+#define USE_SIGALTSTACK
+#endif
+
+/*****************/
+/* configuration */
+/*****************/
+
+/* gcc ver. check */
+#if defined(__GNUC__) && __GNUC__ >= 2
+
+#if OPT_TOKEN_THREADED_CODE
+#if OPT_DIRECT_THREADED_CODE
+#undef OPT_DIRECT_THREADED_CODE
+#endif
+#endif
+
+#else /* defined(__GNUC__) && __GNUC__ >= 2 */
+
+/* disable threaded code options */
+#if OPT_DIRECT_THREADED_CODE
+#undef OPT_DIRECT_THREADED_CODE
+#endif
+#if OPT_TOKEN_THREADED_CODE
+#undef OPT_TOKEN_THREADED_CODE
+#endif
+#endif
+
+/* call threaded code */
+#if OPT_CALL_THREADED_CODE
+#if OPT_DIRECT_THREADED_CODE
+#undef OPT_DIRECT_THREADED_CODE
+#endif /* OPT_DIRECT_THREADED_CODE */
+#if OPT_STACK_CACHING
+#undef OPT_STACK_CACHING
+#endif /* OPT_STACK_CACHING */
+#endif /* OPT_CALL_THREADED_CODE */
+
+/* likely */
+#if __GNUC__ >= 3
+#define LIKELY(x) (__builtin_expect((x), 1))
+#define UNLIKELY(x) (__builtin_expect((x), 0))
+#else /* __GNUC__ >= 3 */
+#define LIKELY(x) (x)
+#define UNLIKELY(x) (x)
+#endif /* __GNUC__ >= 3 */
+
+#if __GNUC__ >= 3
+#define UNINITIALIZED_VAR(x) x = x
+#else
+#define UNINITIALIZED_VAR(x) x
+#endif
+
+typedef unsigned long mrb_num_t;
+
+/* iseq data type */
+
+struct iseq_compile_data_ensure_node_stack;
+
+typedef struct mrb_compile_option_struct {
+ int inline_const_cache;
+ int peephole_optimization;
+ int tailcall_optimization;
+ int specialized_instruction;
+ int operands_unification;
+ int instructions_unification;
+ int stack_caching;
+ int trace_instruction;
+ int debug_level;
+} mrb_compile_option_t;
+
+struct iseq_inline_cache_entry {
+ mrb_value ic_vmstat;
+ mrb_value ic_class;
+ union {
+ mrb_value value;
+ mrb_method_entry_t *method;
+ long index;
+ } ic_value;
+};
+
+#if 1
+#define GetCoreDataFromValue(obj, type, ptr) do { \
+ ptr = (type*)DATA_PTR(obj); \
+} while (0)
+#else
+#define GetCoreDataFromValue(obj, type, ptr) Data_Get_Struct(obj, type, ptr)
+#endif
+
+#define GetISeqPtr(obj, ptr) \
+ GetCoreDataFromValue(obj, mrb_iseq_t, ptr)
+
+struct mrb_iseq_struct;
+
+//enum ruby_special_exceptions {
+// ruby_error_reenter,
+// ruby_error_nomemory,
+// ruby_error_sysstack,
+// ruby_special_error_count
+//};
+
+#define GetVMPtr(obj, ptr) \
+ GetCoreDataFromValue(obj, mrb_vm_t, ptr)
+
+#if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE
+struct mrb_objspace;
+void mrb_objspace_free(struct mrb_objspace *);
+#endif
+
+typedef struct mrb_block_struct {
+ mrb_value self; /* share with method frame if it's only block */
+ mrb_value *lfp; /* share with method frame if it's only block */
+ mrb_value *dfp; /* share with method frame if it's only block */
+ mrb_iseq_t *iseq;
+ mrb_value proc;
+} mrb_block_t;
+
+#define GetThreadPtr(obj, ptr) \
+ GetCoreDataFromValue(obj, mrb_thread_t, ptr)
+
+//typedef RUBY_JMP_BUF mrb_jmpbuf_t; /* kusuda */
+#define mrb_jmpbuf_t void* /* kusuda */
+
+struct mrb_vm_protect_tag {
+ struct mrb_vm_protect_tag *prev;
+};
+
+#define RUBY_VM_VALUE_CACHE_SIZE 0x1000
+#define USE_VALUE_CACHE 0
+
+struct mrb_mutex_struct;
+
+
+/* iseq.c */
+mrb_value mrb_iseq_new(NODE*, mrb_value, mrb_value, mrb_value, mrb_value, mrb_value);
+mrb_value mrb_iseq_new_top(NODE *node, mrb_value name, mrb_value filename, mrb_value filepath, mrb_value parent);
+mrb_value mrb_iseq_new_main(NODE *node, mrb_value filename, mrb_value filepath);
+mrb_value mrb_iseq_new_with_bopt(NODE*, mrb_value, mrb_value, mrb_value, mrb_value, mrb_value, mrb_value, mrb_value);
+mrb_value mrb_iseq_new_with_opt(NODE*, mrb_value, mrb_value, mrb_value, mrb_value, mrb_value, mrb_value, const mrb_compile_option_t*);
+mrb_value mrb_iseq_compile(mrb_value src, mrb_value file, mrb_value line);
+mrb_value mrb_iseq_disasm(mrb_value self);
+int mrb_iseq_disasm_insn(mrb_value str, mrb_value *iseqval, size_t pos, mrb_iseq_t *iseq, mrb_value child);
+const char *ruby_node_name(int node);
+int mrb_iseq_first_lineno(mrb_iseq_t *iseq);
+
+RUBY_EXTERN mrb_value mrb_cISeq;
+RUBY_EXTERN mrb_value mrb_cRubyVM;
+RUBY_EXTERN mrb_value mrb_cEnv;
+RUBY_EXTERN mrb_value mrb_mRubyVMFrozenCore;
+
+/* each thread has this size stack : 128KB */
+#define RUBY_VM_THREAD_STACK_SIZE (128 * 1024)
+
+#define GetProcPtr(obj, ptr) \
+ GetCoreDataFromValue(obj, mrb_proc_t, ptr)
+
+typedef struct {
+ mrb_block_t block;
+
+ mrb_value envval; /* for GC mark */
+ mrb_value blockprocval;
+ int safe_level;
+ int is_from_method;
+ int is_lambda;
+} mrb_proc_t;
+
+#define GetEnvPtr(obj, ptr) \
+ GetCoreDataFromValue(obj, mrb_env_t, ptr)
+
+typedef struct {
+ mrb_value *env;
+ int env_size;
+ int local_size;
+ mrb_value prev_envval; /* for GC mark */
+ mrb_block_t block;
+} mrb_env_t;
+
+//#define GetBindingPtr(obj, ptr)
+// GetCoreDataFromValue(obj, mrb_binding_t, ptr)
+
+//typedef struct {
+// mrb_value env;
+// mrb_value filename;
+// unsigned short line_no;
+//} mrb_binding_t;
+
+/* used by compile time and send insn */
+#define VM_CALL_ARGS_SPLAT_BIT (0x01 << 1)
+#define VM_CALL_ARGS_BLOCKARG_BIT (0x01 << 2)
+#define VM_CALL_FCALL_BIT (0x01 << 3)
+#define VM_CALL_VCALL_BIT (0x01 << 4)
+#define VM_CALL_TAILCALL_BIT (0x01 << 5)
+#define VM_CALL_TAILRECURSION_BIT (0x01 << 6)
+#define VM_CALL_SUPER_BIT (0x01 << 7)
+#define VM_CALL_OPT_SEND_BIT (0x01 << 8)
+
+#define VM_SPECIAL_OBJECT_VMCORE 0x01
+#define VM_SPECIAL_OBJECT_CBASE 0x02
+#define VM_SPECIAL_OBJECT_CONST_BASE 0x03
+
+#define VM_FRAME_MAGIC_METHOD 0x11
+#define VM_FRAME_MAGIC_BLOCK 0x21
+#define VM_FRAME_MAGIC_CLASS 0x31
+#define VM_FRAME_MAGIC_TOP 0x41
+#define VM_FRAME_MAGIC_FINISH 0x51
+#define VM_FRAME_MAGIC_CFUNC 0x61
+#define VM_FRAME_MAGIC_PROC 0x71
+#define VM_FRAME_MAGIC_IFUNC 0x81
+#define VM_FRAME_MAGIC_EVAL 0x91
+#define VM_FRAME_MAGIC_LAMBDA 0xa1
+#define VM_FRAME_MAGIC_MASK_BITS 8
+#define VM_FRAME_MAGIC_MASK (~(~0<<VM_FRAME_MAGIC_MASK_BITS))
+
+#define VM_FRAME_TYPE(cfp) ((cfp)->flag & VM_FRAME_MAGIC_MASK)
+
+/* other frame flag */
+#define VM_FRAME_FLAG_PASSED 0x0100
+
+#define RUBYVM_CFUNC_FRAME_P(cfp) \
+ (VM_FRAME_TYPE(cfp) == VM_FRAME_MAGIC_CFUNC)
+
+/* inline cache */
+typedef struct iseq_inline_cache_entry *IC;
+
+extern mrb_value ruby_vm_global_state_version;
+
+#define GET_VM_STATE_VERSION() (ruby_vm_global_state_version)
+#define INC_VM_STATE_VERSION() \
+ (ruby_vm_global_state_version = (ruby_vm_global_state_version+1) & 0x8fffffff)
+void mrb_vm_change_state(void);
+
+typedef mrb_value CDHASH;
+
+#define GC_GUARDED_PTR(p) ((mrb_value)((mrb_value)(p) | 0x01))
+#define GC_GUARDED_PTR_REF(p) ((void *)(((mrb_value)p) & ~0x03))
+#define GC_GUARDED_PTR_P(p) (((mrb_value)p) & 0x01)
+
+#define RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp) (cfp+1)
+#define RUBY_VM_NEXT_CONTROL_FRAME(cfp) (cfp-1)
+#define RUBY_VM_END_CONTROL_FRAME(th) \
+ ((mrb_control_frame_t *)((th)->stack + (th)->stack_size))
+#define RUBY_VM_VALID_CONTROL_FRAME_P(cfp, ecfp) \
+ ((void *)(ecfp) > (void *)(cfp))
+#define RUBY_VM_CONTROL_FRAME_STACK_OVERFLOW_P(th, cfp) \
+ (!RUBY_VM_VALID_CONTROL_FRAME_P((cfp), RUBY_VM_END_CONTROL_FRAME(th)))
+
+#define RUBY_VM_IFUNC_P(ptr) (BUILTIN_TYPE(ptr) == T_NODE)
+#define RUBY_VM_NORMAL_ISEQ_P(ptr) \
+ (ptr && !RUBY_VM_IFUNC_P(ptr))
+
+#define RUBY_VM_GET_BLOCK_PTR_IN_CFP(cfp) ((mrb_block_t *)(&(cfp)->self))
+#define RUBY_VM_GET_CFP_FROM_BLOCK_PTR(b) \
+ ((mrb_control_frame_t *)((mrb_value *)(b) - 5))
+
+/* VM related object allocate functions */
+//mrb_value mrb_thread_alloc(mrb_value klass);
+mrb_value mrb_proc_alloc(mrb_value klass);
+
+/* for debug */
+extern void mrb_vmdebug_stack_dump_raw(mrb_thread_t *, mrb_control_frame_t *);
+#define SDR() mrb_vmdebug_stack_dump_raw(GET_THREAD(), GET_THREAD()->cfp)
+#define SDR2(cfp) mrb_vmdebug_stack_dump_raw(GET_THREAD(), (cfp))
+void mrb_vm_bugreport(void);
+
+/* functions about thread/vm execution */
+mrb_value mrb_iseq_eval(mrb_value iseqval);
+mrb_value mrb_iseq_eval_main(mrb_value iseqval);
+void mrb_enable_interrupt(void);
+void mrb_disable_interrupt(void);
+//int mrb_thread_method_id_and_class(mrb_thread_t *th, mrb_sym *idp, mrb_value *klassp);
+
+mrb_value mrb_vm_invoke_proc(mrb_thread_t *th, mrb_proc_t *proc, mrb_value self,
+ int argc, const mrb_value *argv, const mrb_block_t *blockptr);
+mrb_value mrb_vm_make_proc(mrb_thread_t *th, const mrb_block_t *block, mrb_value klass);
+mrb_value mrb_vm_make_env_object(mrb_thread_t *th, mrb_control_frame_t *cfp);
+
+//void mrb_thread_start_timer_thread(void);
+//void mrb_thread_stop_timer_thread(void);
+//void mrb_thread_reset_timer_thread(void);
+//void *mrb_thread_call_with_gvl(void *(*func)(void *), void *data1);
+int ruby_thread_has_gvl_p(void);
+mrb_value mrb_make_backtrace(void);
+typedef int mrb_backtrace_iter_func(void *, mrb_value, int, mrb_value);
+int mrb_backtrace_each(mrb_backtrace_iter_func *iter, void *arg);
+//mrb_control_frame_t *mrb_vm_get_ruby_level_next_cfp(mrb_thread_t *th, mrb_control_frame_t *cfp);
+int mrb_vm_get_sourceline(const mrb_control_frame_t *);
+mrb_value mrb_name_err_mesg_new(mrb_value obj, mrb_value mesg, mrb_value recv, mrb_value method);
+
+NOINLINE(void mrb_gc_save_machine_context(mrb_thread_t *));
+
+//#define sysstack_error GET_VM()->special_exceptions[ruby_error_sysstack]
+
+mrb_value mrb_str_resurrect(mrb_value str);
+mrb_value mrb_ary_resurrect(mrb_value ary);
+
+/* for thread */
+
+#if RUBY_VM_THREAD_MODEL == 2
+RUBY_EXTERN mrb_thread_t *ruby_current_thread;
+extern mrb_vm_t *ruby_current_vm;
+
+#define GET_VM() ruby_current_vm
+#define GET_THREAD() ruby_current_thread
+#define mrb_thread_set_current_raw(th) (void)(ruby_current_thread = (th))
+#define mrb_thread_set_current(th) do { \
+ mrb_thread_set_current_raw(th); \
+ th->vm->running_thread = th; \
+} while (0)
+
+#else
+#error "unsupported thread model"
+#endif
+
+#define RUBY_VM_SET_INTERRUPT(th) ((th)->interrupt_flag |= 0x02)
+#define RUBY_VM_SET_TIMER_INTERRUPT(th) ((th)->interrupt_flag |= 0x01)
+#define RUBY_VM_SET_FINALIZER_INTERRUPT(th) ((th)->interrupt_flag |= 0x04)
+#define RUBY_VM_INTERRUPTED(th) ((th)->interrupt_flag & 0x02)
+
+void mrb_threadptr_check_signal(mrb_thread_t *mth);
+//void mrb_threadptr_signal_raise(mrb_thread_t *th, int sig);
+void mrb_threadptr_signal_exit(mrb_state *mrb, mrb_thread_t *th);
+//void mrb_threadptr_execute_interrupts(mrb_thread_t *);
+
+void mrb_thread_lock_unlock(mrb_thread_lock_t *);
+void mrb_thread_lock_destroy(mrb_thread_lock_t *);
+
+//#define RUBY_VM_CHECK_INTS_TH(th) do { \
+// if (UNLIKELY(th->interrupt_flag)) { \
+// mrb_threadptr_execute_interrupts(th); \
+// } \
+//} while (0)
+
+//#define RUBY_VM_CHECK_INTS() \
+// RUBY_VM_CHECK_INTS_TH(GET_THREAD())
+
+/* tracer */
+//void
+//mrb_threadptr_exec_event_hooks(mrb_thread_t *th, mrb_event_flag_t flag, mrb_value self, mrb_sym id, mrb_value klass);
+#if 0
+#define EXEC_EVENT_HOOK(th, flag, self, id, klass) do { \
+ mrb_event_flag_t wait_event__ = th->event_flags; \
+ if (UNLIKELY(wait_event__)) { \
+ if (wait_event__ & (flag | RUBY_EVENT_VM)) { \
+ mrb_threadptr_exec_event_hooks(th, flag, self, id, klass); \
+ } \
+ } \
+} while (0)
+#endif
+#endif /* RUBY_VM_CORE_H */
diff --git a/tools/mrbc/Makefile b/tools/mrbc/Makefile
new file mode 100644
index 000000000..767f5c074
--- /dev/null
+++ b/tools/mrbc/Makefile
@@ -0,0 +1,73 @@
+# makefile discription.
+# basic build file for Rite-Compiler
+# 11.Apr.2011 coded by Kenji Yoshimoto.
+# 31.Aug.2011 coded by Hiroshi Mimaki.
+
+# project-specific macros
+# extension of the executable-file is modifiable(.exe .out ...)
+BASEDIR := ../../src
+TARGET := ../../bin/mrbc
+ifeq ($(OS),Windows_NT)
+EXE := $(TARGET).exe
+else
+EXE := $(TARGET)
+endif
+YSRC := $(BASEDIR)/parse.y
+YC := $(BASEDIR)/y.tab.c
+EXCEPT1 := $(YC) $(BASEDIR)/minimain.c $(BASEDIR)/load.c $(BASEDIR)/init_ext.c
+OBJY := $(patsubst %.c,%.o,$(YC))
+OBJ0 := $(patsubst %.c,%.o,$(wildcard $(BASEDIR)/../tools/mrbc/*.c))
+OBJ1 := $(patsubst %.c,%.o,$(filter-out $(EXCEPT1),$(wildcard $(BASEDIR)/*.c)))
+#OBJ2 := $(patsubst %.c,%.o,$(wildcard $(BASEDIR)/ext/regex/*.c))
+#OBJ3 := $(patsubst %.c,%.o,$(wildcard $(BASEDIR)/ext/enc/*.c))
+OBJS := $(OBJ0) $(OBJ1) $(OBJ2) $(OBJ3)
+
+# libraries, includes
+LIBS = -lm
+INCLUDES = -I$(BASEDIR) -I$(BASEDIR)/../include
+
+# compiler, linker (gcc)
+CC = gcc
+LL = gcc
+YACC = bison
+DEBUG_MODE = 1
+ifeq ($(DEBUG_MODE),1)
+CFLAGS = -g
+else
+CFLAGS = -O3
+endif
+ALL_CFLAGS = -Wall -Werror-implicit-function-declaration $(CFLAGS)
+MAKE_FLAGS = --no-print-directory CC="$(CC)" LL="$(LL)"
+
+##############################
+# generic build targets, rules
+
+.PHONY : all
+all : $(EXE)
+ @echo "make: built targets of `pwd`"
+
+# executable constructed using linker from object files
+$(EXE) : $(OBJS) $(OBJY)
+ $(LL) -o $@ $(OBJS) $(OBJY) $(LIBS)
+
+-include $(OBJS:.o=.d) $(OBJY:.o=.d)
+
+# objects compiled from source
+$(OBJS) : %.o : %.c
+ $(CC) $(ALL_CFLAGS) -MMD $(INCLUDES) -c $< -o $@
+
+# parser complie
+$(OBJY) : $(YC)
+ $(CC) $(ALL_CFLAGS) -MMD $(INCLUDES) -c $(YC) -o $(OBJY)
+
+# yacc complie
+$(YC) : $(YSRC)
+ $(YACC) -o $(YC) $(YSRC)
+
+# clean up
+.PHONY : clean
+clean :
+ -rm -f $(EXE) $(OBJS) $(OBJY) $(YC)
+ -rm -f $(OBJS:.o=.d) $(OBJY:.o=.d)
+ @echo "make: removing targets, objects and depend files of `pwd`"
+
diff --git a/tools/mrbc/mrbc.c b/tools/mrbc/mrbc.c
new file mode 100644
index 000000000..1e54edbdf
--- /dev/null
+++ b/tools/mrbc/mrbc.c
@@ -0,0 +1,210 @@
+#include "mruby.h"
+#include "mruby/proc.h"
+#include "compile.h"
+#include "dump.h"
+#include "cdump.h"
+#include "stdio.h"
+#include "memory.h"
+#include "stdlib.h"
+
+#define RITEBIN_EXT ".mrb"
+#define C_EXT ".c"
+void ruby_show_version(mrb_state *);
+void ruby_show_copyright(mrb_state *);
+void parser_dump(mrb_state*, struct mrb_ast_node*, int);
+void codedump_all(mrb_state*, int);
+
+struct _args {
+ FILE *rfp;
+ FILE *wfp;
+ char *initname;
+ char *ext;
+ int check_syntax : 1;
+ int dump_type : 2;
+ int verbose : 1;
+};
+
+static void
+usage(const char *name)
+{
+ static const char *const usage_msg[] = {
+ "switches:",
+ "-c check syntax only",
+ "-o<outfile> place the output into <outfile>",
+ "-v print version number, then trun on verbose mode",
+ "-B<symbol> binary <symbol> output in C language format",
+ "-C<func> function <func> output in C language format",
+ "--verbose run at verbose mode",
+ "--version print the version",
+ "--copyright print the copyright",
+ NULL
+ };
+ const char *const *p = usage_msg;
+
+ printf("Usage: %s [switches] programfile\n", name);
+ while(*p)
+ printf(" %s\n", *p++);
+}
+
+static char *
+get_outfilename(char *infile, char *ext)
+{
+ char *outfile;
+ char *p;
+
+ outfile = (char*)malloc(strlen(infile) + strlen(ext) + 1);
+ strcpy(outfile, infile);
+ if (*ext) {
+ if ((p = strrchr(outfile, '.')) == NULL)
+ p = &outfile[strlen(outfile)];
+ strcpy(p, ext);
+ }
+
+ return outfile;
+}
+
+static int
+parse_args(mrb_state *mrb, int argc, char **argv, struct _args *args)
+{
+ char *infile = NULL;
+ char *outfile = NULL;
+ char **origargv = argv;
+
+ memset(args, 0, sizeof(*args));
+ args->ext = RITEBIN_EXT;
+
+ for (argc--,argv++; argc > 0; argc--,argv++) {
+ if (**argv == '-') {
+ if (strlen(*argv) <= 1)
+ return -1;
+
+ switch ((*argv)[1]) {
+ case 'o':
+ outfile = get_outfilename((*argv) + 2, "");
+ break;
+ case 'B':
+ case 'C':
+ args->ext = C_EXT;
+ args->initname = (*argv) + 2;
+ if (*args->initname == '\0') {
+ printf("%s: Function name is not specified.\n", *origargv);
+ return -2;
+ }
+ args->dump_type = ((*argv)[1] == 'B') ? DUMP_TYPE_BIN : DUMP_TYPE_CODE;
+ break;
+ case 'c':
+ args->check_syntax = 1;
+ break;
+ case 'v':
+ ruby_show_version(mrb);
+ args->verbose = 1;
+ break;
+ case '-':
+ if (strcmp((*argv) + 2, "version") == 0) {
+ ruby_show_version(mrb);
+ }
+ else if (strcmp((*argv) + 2, "verbose") == 0) {
+ args->verbose = 1;
+ break;
+ }
+ else if (strcmp((*argv) + 2, "copyright") == 0) {
+ ruby_show_copyright(mrb);
+ }
+ else return -3;
+ return 0;
+ }
+ }
+ else if (args->rfp == NULL) {
+ infile = *argv;
+ if ((args->rfp = fopen(infile, "r")) == NULL) {
+ printf("%s: Cannot open program file. (%s)\n", *origargv, infile);
+ return 0;
+ }
+ }
+ }
+
+ if (infile == NULL)
+ return -4;
+ if (args->check_syntax)
+ return 0;
+
+ if (outfile == NULL)
+ outfile = get_outfilename(infile, args->ext);
+
+ if ((args->wfp = fopen(outfile, "wb")) == NULL) {
+ printf("%s: Cannot open output file. (%s)\n", *origargv, outfile);
+ return 0;
+ }
+
+ return 0;
+}
+
+static void
+cleanup(struct _args *args)
+{
+ if (args->rfp)
+ fclose(args->rfp);
+ if (args->wfp)
+ fclose(args->wfp);
+}
+
+int
+main(int argc, char **argv)
+{
+ mrb_state *mrb = mrb_open();
+ int n = -1;
+ struct _args args;
+ struct mrb_parser_state *p;
+
+ n = parse_args(mrb, argc, argv, &args);
+
+ if (n < 0 || args.rfp == NULL) {
+ cleanup(&args);
+ usage(argv[0]);
+ return n;
+ }
+
+ p = mrb_parse_file(mrb, args.rfp);
+ if (!p || !p->tree || p->nerr) {
+ cleanup(&args);
+ return -1;
+ }
+
+ if (args.verbose)
+ parser_dump(mrb, p->tree, 0);
+
+ n = mrb_generate_code(mrb, p->tree);
+ mrb_pool_close(p->pool);
+
+ if (args.verbose)
+ codedump_all(mrb, n);
+
+ if (n < 0 || args.check_syntax) {
+ cleanup(&args);
+ return n;
+ }
+ if (args.initname) {
+ if (args.dump_type == DUMP_TYPE_BIN)
+ n = mrb_bdump_irep(mrb, n, args.wfp, args.initname);
+ else
+ n = mrb_cdump_irep(mrb, n, args.wfp, args.initname);
+ }
+ else {
+ n = mrb_dump_irep(mrb, n, args.wfp);
+ }
+
+ cleanup(&args);
+
+ return n;
+}
+
+void
+mrb_init_ext(mrb_state *mrb)
+{
+}
+
+void
+mrb_init_mrblib(mrb_state *mrb)
+{
+}
+
diff --git a/tools/mruby/Makefile b/tools/mruby/Makefile
new file mode 100644
index 000000000..18882e4ee
--- /dev/null
+++ b/tools/mruby/Makefile
@@ -0,0 +1,89 @@
+# makefile discription.
+# basic build file for Rite-Interpreter
+# 11.Apr.2011 coded by Kenji Yoshimoto.
+# 31.Aug.2011 coded by Hiroshi Mimaki.
+
+# project-specific macros
+# extension of the executable-file is modifiable(.exe .out ...)
+BASEDIR = ../../src
+TARGET := ../../bin/mruby
+ifeq ($(OS),Windows_NT)
+EXE := $(TARGET).exe
+else
+EXE := $(TARGET)
+endif
+YSRC := $(BASEDIR)/parse.y
+YC := $(BASEDIR)/y.tab.c
+EXCEPT1 := $(YC) $(BASEDIR)/minimain.c $(BASEDIR)/dump.c $(BASEDIR)/cdump.c
+OBJY := $(patsubst %.c,%.o,$(YC))
+OBJ0 := $(patsubst %.c,%.o,$(wildcard $(BASEDIR)/../tools/mruby/*.c))
+OBJ1 := $(patsubst %.c,%.o,$(filter-out $(EXCEPT1),$(wildcard $(BASEDIR)/*.c)))
+#OBJ2 := $(patsubst %.c,%.o,$(wildcard $(BASEDIR)/ext/regex/*.c))
+#OBJ3 := $(patsubst %.c,%.o,$(wildcard $(BASEDIR)/ext/enc/*.c))
+OBJS := $(OBJ0) $(OBJ1) $(OBJ2) $(OBJ3)
+# mruby libraries
+EXTC := $(BASEDIR)/../mrblib/mrblib.c
+EXTRB := $(wildcard $(BASEDIR)/../mrblib/*.rb)
+EXTM := $(patsubst %.c,%.o,$(EXTC))
+# ext libraries
+#EXT1 := $(patsubst %.c,%.o,$(wildcard $(BASEDIR)/../ext/socket/*.c))
+EXTS := $(EXT1)
+
+# libraries, includes
+LIBS = -lm
+INCLUDES = -I$(BASEDIR) -I$(BASEDIR)/../include
+#INCLUDES = -I$(RITEVM_ROOT)
+
+# compiler, linker (gcc)
+CC = gcc
+LL = gcc
+YACC = bison
+DEBUG_MODE = 1
+ifeq ($(DEBUG_MODE),1)
+CFLAGS = -g
+else
+CFLAGS = -O3
+endif
+ALL_CFLAGS = -Wall -Werror-implicit-function-declaration $(CFLAGS)
+MAKE_FLAGS = --no-print-directory CC="$(CC)" LL="$(LL)"
+
+##############################
+# generic build targets, rules
+
+.PHONY : all
+all : $(EXTM) $(EXE)
+ @echo "make: built targets of `pwd`"
+
+# executable constructed using linker from object files
+$(EXE) : $(OBJS) $(OBJY) $(EXTM) $(EXTS)
+ $(LL) -o $@ $(OBJS) $(OBJY) $(EXTM) $(EXTS) $(LIBS)
+
+-include $(OBJS:.o=.d) $(OBJY:.o=.d)
+
+# objects compiled from source
+$(OBJS) : %.o : %.c
+ $(CC) $(ALL_CFLAGS) -MMD $(INCLUDES) -c $< -o $@
+
+# mruby library compile
+$(EXTM) : $(EXTRB) $(OBJS) $(OBJY)
+ $(MAKE) -C ../../mrblib $(MAKE_FLAGS)
+
+# extend libraries complile
+$(EXTS) : %.o : %.c
+ $(CC) $(ALL_CFLAGS) -MMD $(INCLUDES) -c $< -o $@
+
+# parser complie
+$(OBJY) : $(YC)
+ $(CC) $(ALL_CFLAGS) -MMD $(INCLUDES) -c $(YC) -o $(OBJY)
+
+# yacc complie
+$(YC) : $(YSRC)
+ $(YACC) -o $(YC) $(YSRC)
+
+# clean up
+.PHONY : clean #cleandep
+clean :
+ $(MAKE) clean -C ../../mrblib $(MAKE_FLAGS)
+ -rm -f $(EXE) $(OBJS) $(OBJY) $(YC) $(EXTS)
+ -rm -f $(OBJS:.o=.d) $(OBJY:.o=.d) $(EXTS:.o=.d)
+ @echo "make: removing targets, objects and depend files of `pwd`"
diff --git a/tools/mruby/mruby.c b/tools/mruby/mruby.c
new file mode 100644
index 000000000..4e84b3c7d
--- /dev/null
+++ b/tools/mruby/mruby.c
@@ -0,0 +1,143 @@
+#include "mruby.h"
+#include "mruby/proc.h"
+#include "compile.h"
+#include "dump.h"
+#include "stdio.h"
+#include "string.h"
+
+void ruby_show_version(mrb_state *);
+void ruby_show_copyright(mrb_state *);
+void parser_dump(mrb_state*, struct mrb_ast_node*, int);
+void codedump_all(mrb_state*, int);
+
+struct _args {
+ FILE *rfp;
+ int mrbfile : 1;
+ int check_syntax : 1;
+ int verbose : 1;
+};
+
+static void
+usage(const char *name)
+{
+ static const char *const usage_msg[] = {
+ "switches:",
+ "-b load and execute RiteBinary(mrb) file",
+ "-c check syntax only",
+ "-v print version number, then trun on verbose mode",
+ "--verbose run at verbose mode",
+ "--version print the version",
+ "--copyright print the copyright",
+ NULL
+ };
+ const char *const *p = usage_msg;
+
+ printf("Usage: %s [switches] programfile\n", name);
+ while(*p)
+ printf(" %s\n", *p++);
+}
+
+static int
+parse_args(mrb_state *mrb, int argc, char **argv, struct _args *args)
+{
+ char **origargv = argv;
+
+ memset(args, 0, sizeof(*args));
+
+ for (argc--,argv++; argc > 0; argc--,argv++) {
+ if (**argv == '-') {
+ if (strlen(*argv) <= 1)
+ return -1;
+
+ switch ((*argv)[1]) {
+ case 'b':
+ args->mrbfile = 1;
+ break;
+ case 'c':
+ args->check_syntax = 1;
+ break;
+ case 'v':
+ ruby_show_version(mrb);
+ args->verbose = 1;
+ break;
+ case '-':
+ if (strcmp((*argv) + 2, "version") == 0) {
+ ruby_show_version(mrb);
+ }
+ else if (strcmp((*argv) + 2, "verbose") == 0) {
+ args->verbose = 1;
+ break;
+ }
+ else if (strcmp((*argv) + 2, "copyright") == 0) {
+ ruby_show_copyright(mrb);
+ }
+ else return -3;
+ return 0;
+ }
+ }
+ else if (args->rfp == NULL) {
+ if ((args->rfp = fopen(*argv, args->mrbfile ? "rb" : "r")) == NULL) {
+ printf("%s: Cannot open program file. (%s)\n", *origargv, *argv);
+ return 0;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static void
+cleanup(struct _args *args)
+{
+ if (args->rfp)
+ fclose(args->rfp);
+}
+
+int
+main(int argc, char **argv)
+{
+ mrb_state *mrb = mrb_open();
+ int n = -1;
+ struct _args args;
+ struct mrb_parser_state *p;
+
+ n = parse_args(mrb, argc, argv, &args);
+ if (n < 0 || args.rfp == NULL) {
+ cleanup(&args);
+ usage(argv[0]);
+ return n;
+ }
+
+ if (args.mrbfile) {
+ n = mrb_load_irep(mrb, args.rfp);
+ }
+ else {
+ p = mrb_parse_file(mrb, args.rfp);
+ if (!p || !p->tree || p->nerr) {
+ cleanup(&args);
+ return -1;
+ }
+
+ if (args.verbose)
+ parser_dump(mrb, p->tree, 0);
+
+ n = mrb_generate_code(mrb, p->tree);
+ mrb_pool_close(p->pool);
+ }
+
+ if (n >= 0) {
+ if (args.verbose)
+ codedump_all(mrb, n);
+
+ if (!args.check_syntax) {
+ mrb_run(mrb, mrb_proc_new(mrb, mrb->irep[n]), mrb_nil_value());
+ if (mrb->exc) {
+ mrb_funcall(mrb, mrb_nil_value(), "p", 1, mrb_obj_value(mrb->exc));
+ }
+ }
+ }
+
+ cleanup(&args);
+
+ return n;
+}