summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorYukihiro "Matz" Matsumoto <[email protected]>2013-02-17 23:08:58 -0800
committerYukihiro "Matz" Matsumoto <[email protected]>2013-02-17 23:08:58 -0800
commit9b06a4506ed2ae222b19cafd184c66521b720581 (patch)
tree7284c80d736823c33978dce1f217973d108a905f
parent7ba3275bf70fa62a34ea9015e2b03eefd180afe9 (diff)
parente0f25b1fda0c9c67526885fafdabf35d4d4039b7 (diff)
downloadmruby-9b06a4506ed2ae222b19cafd184c66521b720581.tar.gz
mruby-9b06a4506ed2ae222b19cafd184c66521b720581.zip
Merge pull request #850 from mattn/pluggable_regexp
Pluggable regexp
-rw-r--r--include/mrbconf.h2
-rw-r--r--include/mruby/class.h4
-rw-r--r--include/mruby/compile.h1
-rw-r--r--include/mruby/string.h4
-rw-r--r--src/codegen.c20
-rw-r--r--src/dump.c27
-rw-r--r--src/encoding.h345
-rw-r--r--src/etc.c1
-rw-r--r--src/gc.c35
-rw-r--r--src/init.c4
-rw-r--r--src/load.c10
-rw-r--r--src/node.h1
-rw-r--r--src/oniguruma.h771
-rw-r--r--src/parse.y83
-rw-r--r--src/re.c2565
-rw-r--r--src/re.h72
-rw-r--r--src/regcomp.c6288
-rw-r--r--src/regenc.c901
-rw-r--r--src/regenc.h203
-rw-r--r--src/regerror.c375
-rw-r--r--src/regex.h26
-rw-r--r--src/regexec.c3757
-rw-r--r--src/regint.h838
-rw-r--r--src/regparse.c5600
-rw-r--r--src/regparse.h354
-rw-r--r--src/sprintf.c1
-rw-r--r--src/string.c397
-rw-r--r--src/struct.c4
-rw-r--r--src/variable.c4
29 files changed, 156 insertions, 22537 deletions
diff --git a/include/mrbconf.h b/include/mrbconf.h
index 17f6a0fc3..da60086bf 100644
--- a/include/mrbconf.h
+++ b/include/mrbconf.h
@@ -44,7 +44,7 @@
//#define POOL_PAGE_SIZE 16000
/* -DDISABLE_XXXX to drop the feature */
-#define DISABLE_REGEXP /* regular expression classes */
+//#define DISABLE_REGEXP /* regular expression classes */
//#define DISABLE_SPRINTF /* Kernel.sprintf method */
//#define DISABLE_MATH /* Math functions */
//#define DISABLE_TIME /* Time class */
diff --git a/include/mruby/class.h b/include/mruby/class.h
index eda54dff9..ca05c0d9f 100644
--- a/include/mruby/class.h
+++ b/include/mruby/class.h
@@ -42,13 +42,11 @@ mrb_class(mrb_state *mrb, mrb_value v)
case MRB_TT_MAIN:
return mrb->object_class;
-#ifdef ENABLE_REGEXP
case MRB_TT_REGEX:
case MRB_TT_MATCH:
- mrb_raise(mrb, E_TYPE_ERROR, "type mismatch: %s given",
+ mrb_raisef(mrb, E_TYPE_ERROR, "type mismatch: %s given",
mrb_obj_classname(mrb, v));
return mrb->nil_class; /* not reach */
-#endif
default:
return mrb_object(v)->c;
}
diff --git a/include/mruby/compile.h b/include/mruby/compile.h
index 5bdc7ccea..d0de0153b 100644
--- a/include/mruby/compile.h
+++ b/include/mruby/compile.h
@@ -72,6 +72,7 @@ struct mrb_parser_state {
enum mrb_lex_state_enum lstate;
int sterm;
+ int regexp;
unsigned int cond_stack;
unsigned int cmdarg_stack;
diff --git a/include/mruby/string.h b/include/mruby/string.h
index fca8b5b47..426cbe9ff 100644
--- a/include/mruby/string.h
+++ b/include/mruby/string.h
@@ -11,10 +11,6 @@
extern "C" {
#endif
-#ifdef INCLUDE_ENCODING
-#include "encoding.h"
-#endif
-
#ifndef RB_GC_GUARD
#define RB_GC_GUARD(v) v
#endif
diff --git a/src/codegen.c b/src/codegen.c
index 1f6d16477..86cb5eb87 100644
--- a/src/codegen.c
+++ b/src/codegen.c
@@ -16,6 +16,7 @@
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
+#include "re.h"
typedef mrb_ast_node node;
typedef struct mrb_parser_state parser_state;
@@ -1910,6 +1911,25 @@ codegen(codegen_scope *s, node *tree, int val)
}
break;
+ case NODE_REGX:
+ if (val) {
+ char *p1 = (char*)tree->car;
+ //char *p2 = (char*)tree->cdr;
+ int ai = mrb_gc_arena_save(s->mrb);
+ struct RClass* c = mrb_class_get(s->mrb, REGEXP_CLASS);
+ mrb_value args[2];
+ args[0] = mrb_str_new(s->mrb, p1, strlen(p1));
+ // TODO: Some regexp implementation does not have second argument
+ //args[1] = mrb_str_new(s->mrb, p2, strlen(p2));
+ int off = new_lit(s,
+ mrb_class_new_instance(s->mrb, 1, args, c));
+
+ mrb_gc_arena_restore(s->mrb, ai);
+ genop(s, MKOP_ABx(OP_LOADL, cursp(), off));
+ push();
+ }
+ break;
+
case NODE_SYM:
if (val) {
int sym = new_sym(s, sym(tree));
diff --git a/src/dump.c b/src/dump.c
index e0d755c8c..e8d96c570 100644
--- a/src/dump.c
+++ b/src/dump.c
@@ -9,9 +9,6 @@
#include <ctype.h>
#include "mruby/string.h"
-#ifdef ENABLE_REGEXP
-#include "re.h"
-#endif
#include "mruby/irep.h"
static const unsigned char def_rite_binary_header[] =
@@ -256,13 +253,6 @@ get_pool_block_size(mrb_state *mrb, mrb_irep *irep, int type)
nlen = str_dump_len(RSTRING_PTR(str), RSTRING_LEN(str), type);
size += nlen;
break;
-#ifdef ENABLE_REGEXP
- case MRB_TT_REGEX:
- str = mrb_reg_to_s(mrb, irep->pool[pool_no]);
- nlen = str_dump_len(RSTRING_PTR(str), RSTRING_LEN(str), type);
- size += nlen;
- break;
-#endif
default:
break;
}
@@ -390,23 +380,6 @@ write_pool_block(mrb_state *mrb, mrb_irep *irep, char *buf, int type)
str_dump(RSTRING_PTR(str), char_buf, RSTRING_LEN(str), type);
break;
-#ifdef ENABLE_REGEXP
- case MRB_TT_REGEX:
- str = mrb_reg_to_s(mrb, irep->pool[pool_no]);
- len = str_dump_len(RSTRING_PTR(str), RSTRING_LEN(str), type);
- if ( len > buf_size - 1) {
- buf_size = len + 1;
- char_buf = mrb_realloc(mrb, char_buf, buf_size);
- if (char_buf == NULL) {
- result = MRB_DUMP_GENERAL_FAILURE;
- goto error_exit;
- }
- memset(char_buf, 0, buf_size);
- }
- str_dump(RSTRING_PTR(str), char_buf, RSTRING_LEN(str), type);
- break;
-#endif
-
default:
buf += uint16_dump(0, buf, type); /* data length = 0 */
continue;
diff --git a/src/encoding.h b/src/encoding.h
deleted file mode 100644
index 7bc0d9ef0..000000000
--- a/src/encoding.h
+++ /dev/null
@@ -1,345 +0,0 @@
-/*
-** encoding.h - Encoding class
-**
-** See Copyright Notice in mruby.h
-*/
-
-#ifndef RUBY_ENCODING_H
-#define RUBY_ENCODING_H 1
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-
-#include <stdarg.h>
-#include "oniguruma.h"
-#include "mruby/data.h"
-
-#define FL_USHIFT 12
-
-#define FL_USER0 (((int)1)<<(FL_USHIFT+0))
-#define FL_USER1 (((int)1)<<(FL_USHIFT+1))
-#define FL_USER2 (((int)1)<<(FL_USHIFT+2))
-#define FL_USER3 (((int)1)<<(FL_USHIFT+3))
-#define FL_USER4 (((int)1)<<(FL_USHIFT+4))
-#define FL_USER5 (((int)1)<<(FL_USHIFT+5))
-#define FL_USER6 (((int)1)<<(FL_USHIFT+6))
-#define FL_USER7 (((int)1)<<(FL_USHIFT+7))
-#define FL_USER8 (((int)1)<<(FL_USHIFT+8))
-#define FL_USER9 (((int)1)<<(FL_USHIFT+9))
-
-#define ENCODING_INLINE_MAX 1023
-/* 1023 = 0x03FF */
-/*#define ENCODING_SHIFT (FL_USHIFT+10)*/
-#define ENCODING_SHIFT (10)
-#define ENCODING_MASK (((unsigned int)ENCODING_INLINE_MAX)<<ENCODING_SHIFT)
-
-#define ENCODING_SET_INLINED(obj,i) do {\
- mrb_obj_ptr(obj)->flags &= ~ENCODING_MASK;\
- mrb_obj_ptr(obj)->flags |= (unsigned int)(i) << ENCODING_SHIFT;\
-} while (0)
-#define ENCODING_SET(mrb, obj,i) do {\
- mrb_value mrb_encoding_set_obj = (obj); \
- int encoding_set_enc_index = (i); \
- if (encoding_set_enc_index < ENCODING_INLINE_MAX) \
- ENCODING_SET_INLINED(mrb_encoding_set_obj, encoding_set_enc_index); \
- else \
- mrb_enc_set_index(mrb, mrb_encoding_set_obj, encoding_set_enc_index); \
-} while (0)
-
-#define ENCODING_GET_INLINED(obj) (unsigned int)((RSTRING(obj)->flags & ENCODING_MASK)>>ENCODING_SHIFT)
-#define ENCODING_GET(mrb, obj) \
- (ENCODING_GET_INLINED(obj) != ENCODING_INLINE_MAX ? \
- ENCODING_GET_INLINED(obj) : \
- mrb_enc_get_index(mrb, obj))
-
-#define ENCODING_IS_ASCII8BIT(obj) (ENCODING_GET_INLINED(obj) == 0)
-
-#define ENCODING_MAXNAMELEN 42
-
-#define ENC_CODERANGE_MASK ((int)(FL_USER8|FL_USER9))
-#define ENC_CODERANGE_UNKNOWN 0
-#define ENC_CODERANGE_7BIT ((int)FL_USER8)
-#define ENC_CODERANGE_VALID ((int)FL_USER9)
-#define ENC_CODERANGE_BROKEN ((int)(FL_USER8|FL_USER9))
-#define ENC_CODERANGE(obj) ((int)(RSTRING(obj)->flags & ENC_CODERANGE_MASK))
-#define ENC_CODERANGE_ASCIIONLY(obj) (ENC_CODERANGE(obj) == ENC_CODERANGE_7BIT)
-#ifdef INCLUDE_ENCODING
-#define ENC_CODERANGE_SET(obj,cr) (RSTRING(obj)->flags = \
- (RSTRING(obj)->flags & ~ENC_CODERANGE_MASK) | (cr))
-#else
-#define ENC_CODERANGE_SET(obj,cr)
-#endif //INCLUDE_ENCODING
-#define ENC_CODERANGE_CLEAR(obj) ENC_CODERANGE_SET(obj,0)
-
-/* assumed ASCII compatibility */
-#define ENC_CODERANGE_AND(a, b) \
- (a == ENC_CODERANGE_7BIT ? b : \
- a == ENC_CODERANGE_VALID ? (b == ENC_CODERANGE_7BIT ? ENC_CODERANGE_VALID : b) : \
- ENC_CODERANGE_UNKNOWN)
-
-#define ENCODING_CODERANGE_SET(mrb, obj, encindex, cr) \
- do { \
- mrb_value mrb_encoding_coderange_obj = (obj); \
- ENCODING_SET(mrb, mrb_encoding_coderange_obj, (encindex)); \
- ENC_CODERANGE_SET(mrb_encoding_coderange_obj, (cr)); \
- } while (0)
-
-typedef OnigEncodingType mrb_encoding;
-
-/* mrb_encoding * -> name */
-#define mrb_enc_name(enc) (enc)->name
-int mrb_enc_get_index(mrb_state *mrb, mrb_value obj);
-
-int mrb_enc_replicate(mrb_state *, const char *, mrb_encoding *);
-int mrb_define_dummy_encoding(mrb_state *mrb, const char *);
-#define mrb_enc_to_index(enc) ((enc) ? ENC_TO_ENCINDEX(enc) : 0)
-void mrb_enc_set_index(mrb_state *mrb, mrb_value obj, int encindex);
-int mrb_enc_find_index(mrb_state *mrb, const char *name);
-int mrb_to_encoding_index(mrb_state *mrb, mrb_value);
-mrb_encoding* mrb_to_encoding(mrb_state *mrb, mrb_value);
-mrb_encoding* mrb_enc_get(mrb_state *, mrb_value);
-mrb_encoding* mrb_enc_compatible(mrb_state *, mrb_value, mrb_value);
-mrb_encoding* mrb_enc_check(mrb_state *, mrb_value, mrb_value);
-mrb_value mrb_enc_associate_index(mrb_state *mrb, mrb_value, int);
-#ifdef INCLUDE_ENCODING
-mrb_value mrb_enc_associate(mrb_state *mrb, mrb_value, mrb_encoding*);
-#else
-#define mrb_enc_associate(mrb,value,enc)
-#endif //INCLUDE_ENCODING
-void mrb_enc_copy(mrb_state *mrb, mrb_value dst, mrb_value src);
-
-mrb_value mrb_enc_reg_new(const char*, long, mrb_encoding*, int);
-//PRINTF_ARGS(mrb_value rb_enc_sprintf(mrb_encoding *, const char*, ...), 2, 3);
-mrb_value mrb_enc_vsprintf(mrb_encoding *, const char*, va_list);
-long mrb_enc_strlen(const char*, const char*, mrb_encoding*);
-char* mrb_enc_nth(mrb_state *, const char*, const char*, long, mrb_encoding*);
-mrb_value mrb_obj_encoding(mrb_state *, mrb_value);
-mrb_value mrb_enc_str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, long len, mrb_encoding *enc);
-mrb_value rb_enc_uint_chr(mrb_state *mrb, unsigned int code, mrb_encoding *enc);
-
-mrb_value mrb_external_str_new_with_enc(mrb_state *mrb, const char *ptr, long len, mrb_encoding *);
-mrb_value mrb_str_export_to_enc(mrb_value, mrb_encoding *);
-
-/* index -> mrb_encoding */
-mrb_encoding* mrb_enc_from_index(mrb_state *mrb, int idx);
-
-/* name -> mrb_encoding */
-mrb_encoding * mrb_enc_find(mrb_state *mrb, const char *name);
-
-/* mrb_encoding * -> name */
-#define mrb_enc_name(enc) (enc)->name
-
-/* mrb_encoding * -> minlen/maxlen */
-#define mrb_enc_mbminlen(enc) (enc)->min_enc_len
-#define mrb_enc_mbmaxlen(enc) (enc)->max_enc_len
-
-/* -> mbclen (no error notification: 0 < ret <= e-p, no exception) */
-int mrb_enc_mbclen(const char *p, const char *e, mrb_encoding *enc);
-
-/* -> mbclen (only for valid encoding) */
-int mrb_enc_fast_mbclen(const char *p, const char *e, mrb_encoding *enc);
-
-/* -> chlen, invalid or needmore */
-int mrb_enc_precise_mbclen(const char *p, const char *e, mrb_encoding *enc);
-#define MBCLEN_CHARFOUND_P(ret) ONIGENC_MBCLEN_CHARFOUND_P(ret)
-#define MBCLEN_CHARFOUND_LEN(ret) ONIGENC_MBCLEN_CHARFOUND_LEN(ret)
-#define MBCLEN_INVALID_P(ret) ONIGENC_MBCLEN_INVALID_P(ret)
-#define MBCLEN_NEEDMORE_P(ret) ONIGENC_MBCLEN_NEEDMORE_P(ret)
-#define MBCLEN_NEEDMORE_LEN(ret) ONIGENC_MBCLEN_NEEDMORE_LEN(ret)
-
-/* -> 0x00..0x7f, -1 */
-int mrb_enc_ascget(mrb_state *mrb, const char *p, const char *e, int *len, mrb_encoding *enc);
-
-
-/* -> code (and len) or raise exception */
-unsigned int mrb_enc_codepoint_len(mrb_state *mrb, const char *p, const char *e, int *len, mrb_encoding *enc);
-
-/* prototype for obsolete function */
-unsigned int mrb_enc_codepoint(mrb_state *mrb, const char *p, const char *e, mrb_encoding *enc);
-/* overriding macro */
-#define mrb_enc_codepoint(mrb,p,e,enc) mrb_enc_codepoint_len((mrb),(p),(e),0,(enc))
-#define mrb_enc_mbc_to_codepoint(p, e, enc) ONIGENC_MBC_TO_CODE(enc,(UChar*)(p),(UChar*)(e))
-
-/* -> codelen>0 or raise exception */
-#ifdef INCLUDE_ENCODING
-int mrb_enc_codelen(mrb_state *mrb, int code, mrb_encoding *enc);
-#else
-#define mrb_enc_codelen(mrb,code,enc) 1
-#endif //INCLUDE_ENCODING
-
-/* code,ptr,encoding -> write buf */
-#define mrb_enc_mbcput(c,buf,enc) ((*(buf) = (char)(c)),1)
-
-/* start, ptr, end, encoding -> prev_char */
-#define mrb_enc_prev_char(s,p,e,enc) (char *)onigenc_get_prev_char_head(enc,(UChar*)(s),(UChar*)(p),(UChar*)(e))
-/* start, ptr, end, encoding -> next_char */
-#define mrb_enc_left_char_head(s,p,e,enc) (char *)onigenc_get_left_adjust_char_head(enc,(UChar*)(s),(UChar*)(p),(UChar*)(e))
-#define mrb_enc_right_char_head(s,p,e,enc) (char *)onigenc_get_right_adjust_char_head(enc,(UChar*)(s),(UChar*)(p),(UChar*)(e))
-
-/* ptr, ptr, encoding -> newline_or_not */
-#define mrb_enc_is_newline(p,end,enc) ONIGENC_IS_MBC_NEWLINE(enc,(UChar*)(p),(UChar*)(end))
-
-#define mrb_enc_isctype(c,t,enc) ONIGENC_IS_CODE_CTYPE(enc,c,t)
-#define mrb_enc_isascii(c,enc) ONIGENC_IS_CODE_ASCII(c)
-#define mrb_enc_isalpha(c,enc) ONIGENC_IS_CODE_ALPHA(enc,c)
-#define mrb_enc_islower(c,enc) ONIGENC_IS_CODE_LOWER(enc,c)
-#define mrb_enc_isupper(c,enc) ONIGENC_IS_CODE_UPPER(enc,c)
-#define mrb_enc_ispunct(c,enc) ONIGENC_IS_CODE_PUNCT(enc,c)
-#define mrb_enc_isalnum(c,enc) ONIGENC_IS_CODE_ALNUM(enc,c)
-#define mrb_enc_isprint(c,enc) ONIGENC_IS_CODE_PRINT(enc,c)
-#define mrb_enc_isspace(c,enc) ONIGENC_IS_CODE_SPACE(enc,c)
-#define mrb_enc_isdigit(c,enc) ONIGENC_IS_CODE_DIGIT(enc,c)
-
-#define mrb_enc_asciicompat(mrb, enc) (mrb_enc_mbminlen(enc)==1 && !mrb_enc_dummy_p(enc))
-
-int mrb_enc_casefold(char *to, const char *p, const char *e, mrb_encoding *enc);
-int mrb_enc_toupper(int c, mrb_encoding *enc);
-int mrb_enc_tolower(int c, mrb_encoding *enc);
-//ID mrb_intern3(const char*, long, mrb_encoding*);
-//ID mrb_interned_id_p(const char *, long, mrb_encoding *);
-int mrb_enc_symname_p(const char*, mrb_encoding*);
-int mrb_enc_symname2_p(const char*, long, mrb_encoding*);
-int mrb_enc_str_coderange(mrb_state *mrb, mrb_value);
-long mrb_str_coderange_scan_restartable(const char*, const char*, mrb_encoding*, int*);
-int mrb_enc_str_asciionly_p(mrb_state *mrb, mrb_value);
-#define mrb_enc_str_asciicompat_p(mrb, str) mrb_enc_asciicompat(mrb, mrb_enc_get(mrb, str))
-mrb_value mrb_enc_from_encoding(mrb_state *mrb, mrb_encoding *enc);
-int mrb_enc_unicode_p(mrb_encoding *enc);
-mrb_encoding *mrb_ascii8bit_encoding(mrb_state *mrb);
-mrb_encoding *mrb_utf8_encoding(mrb_state *mrb);
-mrb_encoding *mrb_usascii_encoding(mrb_state *mrb);
-mrb_encoding *mrb_locale_encoding(mrb_state *mrb);
-mrb_encoding *mrb_filesystem_encoding(mrb_state *mrb);
-mrb_encoding *mrb_default_external_encoding(mrb_state *mrb);
-mrb_encoding *mrb_default_internal_encoding(mrb_state *mrb);
-int mrb_ascii8bit_encindex(void);
-int mrb_utf8_encindex(void);
-int mrb_usascii_encindex(void);
-int mrb_locale_encindex(mrb_state *mrb);
-int mrb_filesystem_encindex(void);
-mrb_value mrb_enc_default_external(mrb_state *mrb);
-mrb_value mrb_enc_default_internal(mrb_state *mrb);
-void mrb_enc_set_default_external(mrb_state *mrb, mrb_value encoding);
-void mrb_enc_set_default_internal(mrb_state *mrb, mrb_value encoding);
-mrb_value mrb_locale_charmap(mrb_state *mrb, mrb_value klass);
-mrb_value mrb_usascii_str_new_cstr(mrb_state *mrb, const char *ptr);
-int mrb_str_buf_cat_escaped_char(mrb_state *mrb, mrb_value result, unsigned int c, int unicode_p);
-
-#define ENC_DUMMY_FLAG (1<<24)
-#define ENC_INDEX_MASK (~(~0U<<24))
-
-#define ENC_TO_ENCINDEX(enc) (int)((enc)->ruby_encoding_index & ENC_INDEX_MASK)
-
-#define ENC_DUMMY_P(enc) ((enc)->ruby_encoding_index & ENC_DUMMY_FLAG)
-#define ENC_SET_DUMMY(enc) ((enc)->ruby_encoding_index |= ENC_DUMMY_FLAG)
-
-static inline int
-mrb_enc_dummy_p(mrb_encoding *enc)
-{
- return ENC_DUMMY_P(enc) != 0;
-}
-
-/* econv stuff */
-
-typedef enum {
- econv_invalid_byte_sequence,
- econv_undefined_conversion,
- econv_destination_buffer_full,
- econv_source_buffer_empty,
- econv_finished,
- econv_after_output,
- econv_incomplete_input
-} mrb_econv_result_t;
-
-typedef struct mrb_econv_t mrb_econv_t;
-
-mrb_value mrb_str_encode(mrb_state *mrb, mrb_value str, mrb_value to, int ecflags, mrb_value ecopts);
-int mrb_econv_has_convpath_p(mrb_state *mrb, const char* from_encoding, const char* to_encoding);
-
-int mrb_econv_prepare_opts(mrb_state *mrb, mrb_value opthash, mrb_value *ecopts);
-
-mrb_econv_t *mrb_econv_open(mrb_state *mrb, const char *source_encoding, const char *destination_encoding, int ecflags);
-mrb_econv_t *mrb_econv_open_opts(mrb_state *mrb, const char *source_encoding, const char *destination_encoding, int ecflags, mrb_value ecopts);
-
-mrb_econv_result_t mrb_econv_convert(mrb_state *mrb, mrb_econv_t *ec,
- const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end,
- unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end,
- int flags);
-void mrb_econv_close(mrb_econv_t *ec);
-
-/* result: 0:success -1:failure */
-int mrb_econv_set_replacement(mrb_state *mrb, mrb_econv_t *ec, const unsigned char *str, size_t len, const char *encname);
-
-/* result: 0:success -1:failure */
-int mrb_econv_decorate_at_first(mrb_state *mrb, mrb_econv_t *ec, const char *decorator_name);
-int mrb_econv_decorate_at_last(mrb_state *mrb, mrb_econv_t *ec, const char *decorator_name);
-
-mrb_value mrb_econv_open_exc(mrb_state *mrb, const char *senc, const char *denc, int ecflags);
-
-/* result: 0:success -1:failure */
-int mrb_econv_insert_output(mrb_state *mrb, mrb_econv_t *ec,
- const unsigned char *str, size_t len, const char *str_encoding);
-
-/* encoding that mrb_econv_insert_output doesn't need conversion */
-const char *mrb_econv_encoding_to_insert_output(mrb_econv_t *ec);
-
-/* raise an error if the last mrb_econv_convert is error */
-void mrb_econv_check_error(mrb_state *mrb, mrb_econv_t *ec);
-
-/* returns an exception object or nil */
-mrb_value mrb_econv_make_exception(mrb_state *mrb, mrb_econv_t *ec);
-
-int mrb_econv_putbackable(mrb_econv_t *ec);
-void mrb_econv_putback(mrb_econv_t *ec, unsigned char *p, int n);
-
-/* returns the corresponding ASCII compatible encoding for encname,
- * or NULL if encname is not ASCII incompatible encoding. */
-const char *mrb_econv_asciicompat_encoding(const char *encname);
-
-mrb_value mrb_econv_str_convert(mrb_state *mrb, mrb_econv_t *ec, mrb_value src, int flags);
-mrb_value mrb_econv_substr_convert(mrb_state *mrb, mrb_econv_t *ec, mrb_value src, long byteoff, long bytesize, int flags);
-mrb_value mrb_econv_str_append(mrb_state *mrb, mrb_econv_t *ec, mrb_value src, mrb_value dst, int flags);
-mrb_value mrb_econv_substr_append(mrb_state *mrb, mrb_econv_t *ec, mrb_value src, long byteoff, long bytesize, mrb_value dst, int flags);
-
-void mrb_econv_binmode(mrb_econv_t *ec);
-
-/* flags for mrb_econv_open */
-
-#define ECONV_ERROR_HANDLER_MASK 0x000000ff
-
-#define ECONV_INVALID_MASK 0x0000000f
-#define ECONV_INVALID_REPLACE 0x00000002
-
-#define ECONV_UNDEF_MASK 0x000000f0
-#define ECONV_UNDEF_REPLACE 0x00000020
-#define ECONV_UNDEF_HEX_CHARREF 0x00000030
-
-#define ECONV_DECORATOR_MASK 0x0000ff00
-
-#define ECONV_UNIVERSAL_NEWLINE_DECORATOR 0x00000100
-#define ECONV_CRLF_NEWLINE_DECORATOR 0x00001000
-#define ECONV_CR_NEWLINE_DECORATOR 0x00002000
-#define ECONV_XML_TEXT_DECORATOR 0x00004000
-#define ECONV_XML_ATTR_CONTENT_DECORATOR 0x00008000
-
-#define ECONV_STATEFUL_DECORATOR_MASK 0x00f00000
-#define ECONV_XML_ATTR_QUOTE_DECORATOR 0x00100000
-
-/* end of flags for mrb_econv_open */
-
-/* flags for mrb_econv_convert */
-#define ECONV_PARTIAL_INPUT 0x00010000
-#define ECONV_AFTER_OUTPUT 0x00020000
-/* end of flags for mrb_econv_convert */
-
-int mrb_isspace(int c);
-
-#define ENCODE_CLASS (mrb_class_obj_get(mrb, "Encoding"))
-#define CONVERTER_CLASS (mrb_class_obj_get(mrb, "Converter"))
-
-#if defined(__cplusplus)
-} /* extern "C" { */
-#endif
-
-#endif /* RUBY_ENCODING_H */
diff --git a/src/etc.c b/src/etc.c
index 4542c4abd..da4411b04 100644
--- a/src/etc.c
+++ b/src/etc.c
@@ -171,7 +171,6 @@ mrb_obj_id(mrb_value obj)
case MRB_TT_ARRAY:
case MRB_TT_HASH:
case MRB_TT_RANGE:
- case MRB_TT_REGEX:
case MRB_TT_STRUCT:
case MRB_TT_EXCEPTION:
case MRB_TT_MATCH:
diff --git a/src/gc.c b/src/gc.c
index 1833bf984..79e7ba8fc 100644
--- a/src/gc.c
+++ b/src/gc.c
@@ -71,10 +71,6 @@
*/
-#ifdef ENABLE_REGEXP
-#include "re.h"
-#endif
-
struct free_obj {
MRB_OBJECT_HEADER;
struct RBasic *next;
@@ -94,10 +90,6 @@ typedef struct {
struct RStruct structdata;
#endif
struct RProc procdata;
-#ifdef ENABLE_REGEXP
- struct RMatch match;
- struct RRegexp regexp;
-#endif
} as;
} RVALUE;
@@ -462,24 +454,6 @@ gc_mark_children(mrb_state *mrb, struct RBasic *obj)
}
break;
-#ifdef ENABLE_REGEXP
- case MRB_TT_MATCH:
- {
- struct RMatch *m = (struct RMatch*)obj;
-
- mrb_gc_mark(mrb, (struct RBasic*)m->str);
- mrb_gc_mark(mrb, (struct RBasic*)m->regexp);
- }
- break;
- case MRB_TT_REGEX:
- {
- struct RRegexp *r = (struct RRegexp*)obj;
-
- mrb_gc_mark(mrb, (struct RBasic*)r->src);
- }
- break;
-#endif
-
#ifdef ENABLE_STRUCT
case MRB_TT_STRUCT:
{
@@ -689,15 +663,6 @@ gc_gray_mark(mrb_state *mrb, struct RBasic *obj)
children+=2;
break;
-#ifdef ENABLE_REGEXP
- case MRB_TT_MATCH:
- children+=2;
- break;
- case MRB_TT_REGEX:
- children+=1;
- break;
-#endif
-
#ifdef ENABLE_STRUCT
case MRB_TT_STRUCT:
{
diff --git a/src/init.c b/src/init.c
index 73ff8fce2..a85d0483d 100644
--- a/src/init.c
+++ b/src/init.c
@@ -54,9 +54,7 @@ mrb_init_core(mrb_state *mrb)
mrb_init_struct(mrb); DONE;
#endif
mrb_init_gc(mrb); DONE;
-#ifdef ENABLE_REGEXP
mrb_init_regexp(mrb); DONE;
-#endif
#ifdef ENABLE_STDIO
mrb_init_print(mrb); DONE;
#endif
@@ -78,4 +76,4 @@ mrb_final_core(mrb_state *mrb)
#ifndef DISABLE_GEMS
mrb_final_mrbgems(mrb); DONE;
#endif
-} \ No newline at end of file
+}
diff --git a/src/load.c b/src/load.c
index 78ece114f..a61af509c 100644
--- a/src/load.c
+++ b/src/load.c
@@ -8,9 +8,6 @@
#include "mruby/dump.h"
#include "mruby/string.h"
-#ifdef ENABLE_REGEXP
-#include "re.h"
-#endif
#include "mruby/proc.h"
#include "mruby/irep.h"
@@ -435,13 +432,6 @@ read_rite_irep_record(mrb_state *mrb, unsigned char *src, uint32_t* len)
irep->pool[i] = mrb_str_new(mrb, buf, pdl);
break;
-#ifdef ENABLE_REGEXP
- case MRB_TT_REGEX:
- str = mrb_str_new(mrb, buf, pdl);
- irep->pool[i] = mrb_reg_quote(mrb, str);
- break;
-#endif
-
default:
irep->pool[i] = mrb_nil_value();
break;
diff --git a/src/node.h b/src/node.h
index 29edd6cc5..284105023 100644
--- a/src/node.h
+++ b/src/node.h
@@ -62,6 +62,7 @@ enum node_type {
NODE_SYM,
NODE_STR,
NODE_DSTR,
+ NODE_REGX,
NODE_DREGX,
NODE_DREGX_ONCE,
NODE_LIST,
diff --git a/src/oniguruma.h b/src/oniguruma.h
deleted file mode 100644
index 3332df023..000000000
--- a/src/oniguruma.h
+++ /dev/null
@@ -1,771 +0,0 @@
-#ifndef ONIGURUMA_H
-#define ONIGURUMA_H
-/**********************************************************************
- oniguruma.h - Oniguruma (regular expression library)
-**********************************************************************/
-/*-
- * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define ONIGURUMA
-#define ONIGURUMA_VERSION_MAJOR 5
-#define ONIGURUMA_VERSION_MINOR 9
-#define ONIGURUMA_VERSION_TEENY 2
-
-#ifdef __cplusplus
-# ifndef HAVE_PROTOTYPES
-# define HAVE_PROTOTYPES 1
-# endif
-# ifndef HAVE_STDARG_PROTOTYPES
-# define HAVE_STDARG_PROTOTYPES 1
-# endif
-#endif
-
-/* escape Mac OS X/Xcode 2.4/gcc 4.0.1 problem */
-#if defined(__APPLE__) && defined(__GNUC__) && __GNUC__ >= 4
-# ifndef HAVE_STDARG_PROTOTYPES
-# define HAVE_STDARG_PROTOTYPES 1
-# endif
-#endif
-
-#ifndef ONIG_EXTERN
-#ifdef RUBY_EXTERN
-#define ONIG_EXTERN RUBY_EXTERN
-#else
-#if defined(_WIN32) && !defined(__GNUC__)
-#if defined(EXPORT) || defined(RUBY_EXPORT)
-#define ONIG_EXTERN extern __declspec(dllexport)
-#else
-#define ONIG_EXTERN extern __declspec(dllimport)
-#endif
-#endif
-#endif
-#endif
-
-#ifndef ONIG_EXTERN
-#define ONIG_EXTERN extern
-#endif
-
-/* PART: character encoding */
-
-#ifndef ONIG_ESCAPE_UCHAR_COLLISION
-#define UChar OnigUChar
-#endif
-
-typedef unsigned char OnigUChar;
-typedef unsigned int OnigCodePoint;
-typedef unsigned int OnigCtype;
-typedef size_t OnigDistance;
-
-#define ONIG_INFINITE_DISTANCE ~((OnigDistance )0)
-
-typedef unsigned int OnigCaseFoldType; /* case fold flag */
-
-ONIG_EXTERN OnigCaseFoldType OnigDefaultCaseFoldFlag;
-
-/* #define ONIGENC_CASE_FOLD_HIRAGANA_KATAKANA (1<<1) */
-/* #define ONIGENC_CASE_FOLD_KATAKANA_WIDTH (1<<2) */
-#define ONIGENC_CASE_FOLD_TURKISH_AZERI (1<<20)
-#define INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR (1<<30)
-
-#define ONIGENC_CASE_FOLD_MIN INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR
-#define ONIGENC_CASE_FOLD_DEFAULT OnigDefaultCaseFoldFlag
-
-
-#define ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN 3
-#define ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM 13
-/* 13 => Unicode:0x1ffc */
-
-/* code range */
-#define ONIGENC_CODE_RANGE_NUM(range) ((int )range[0])
-#define ONIGENC_CODE_RANGE_FROM(range,i) range[((i)*2) + 1]
-#define ONIGENC_CODE_RANGE_TO(range,i) range[((i)*2) + 2]
-
-typedef struct {
- int byte_len; /* argument(original) character(s) byte length */
- int code_len; /* number of code */
- OnigCodePoint code[ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN];
-} OnigCaseFoldCodeItem;
-
-typedef struct {
- OnigCodePoint esc;
- OnigCodePoint anychar;
- OnigCodePoint anytime;
- OnigCodePoint zero_or_one_time;
- OnigCodePoint one_or_more_time;
- OnigCodePoint anychar_anytime;
-} OnigMetaCharTableType;
-
-typedef int (*OnigApplyAllCaseFoldFunc)(OnigCodePoint from, OnigCodePoint* to, int to_len, void* arg);
-
-typedef struct OnigEncodingTypeST {
- int (*precise_mbc_enc_len)(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc);
- const char* name;
- int max_enc_len;
- int min_enc_len;
- int (*is_mbc_newline)(const OnigUChar* p, const OnigUChar* end, struct OnigEncodingTypeST* enc);
- OnigCodePoint (*mbc_to_code)(const OnigUChar* p, const OnigUChar* end, struct OnigEncodingTypeST* enc);
- int (*code_to_mbclen)(OnigCodePoint code, struct OnigEncodingTypeST* enc);
- int (*code_to_mbc)(OnigCodePoint code, OnigUChar *buf, struct OnigEncodingTypeST* enc);
- int (*mbc_case_fold)(OnigCaseFoldType flag, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, struct OnigEncodingTypeST* enc);
- int (*apply_all_case_fold)(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, struct OnigEncodingTypeST* enc);
- int (*get_case_fold_codes_by_str)(OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem acs[], struct OnigEncodingTypeST* enc);
- int (*property_name_to_ctype)(struct OnigEncodingTypeST* enc, OnigUChar* p, OnigUChar* end);
- int (*is_code_ctype)(OnigCodePoint code, OnigCtype ctype, struct OnigEncodingTypeST* enc);
- int (*get_ctype_code_range)(OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[], struct OnigEncodingTypeST* enc);
- OnigUChar* (*left_adjust_char_head)(const OnigUChar* start, const OnigUChar* p, const OnigUChar* end, struct OnigEncodingTypeST* enc);
- int (*is_allowed_reverse_match)(const OnigUChar* p, const OnigUChar* end, struct OnigEncodingTypeST* enc);
- int ruby_encoding_index;
-} OnigEncodingType;
-
-typedef OnigEncodingType* OnigEncoding;
-
-ONIG_EXTERN OnigEncodingType OnigEncodingASCII;
-
-#define ONIG_ENCODING_ASCII (&OnigEncodingASCII)
-
-#define ONIG_ENCODING_UNDEF ((OnigEncoding )0)
-
-
-/* work size */
-#define ONIGENC_CODE_TO_MBC_MAXLEN 7
-#define ONIGENC_MBC_CASE_FOLD_MAXLEN 18
-/* 18: 6(max-byte) * 3(case-fold chars) */
-
-/* character types */
-#define ONIGENC_CTYPE_NEWLINE 0
-#define ONIGENC_CTYPE_ALPHA 1
-#define ONIGENC_CTYPE_BLANK 2
-#define ONIGENC_CTYPE_CNTRL 3
-#define ONIGENC_CTYPE_DIGIT 4
-#define ONIGENC_CTYPE_GRAPH 5
-#define ONIGENC_CTYPE_LOWER 6
-#define ONIGENC_CTYPE_PRINT 7
-#define ONIGENC_CTYPE_PUNCT 8
-#define ONIGENC_CTYPE_SPACE 9
-#define ONIGENC_CTYPE_UPPER 10
-#define ONIGENC_CTYPE_XDIGIT 11
-#define ONIGENC_CTYPE_WORD 12
-#define ONIGENC_CTYPE_ALNUM 13 /* alpha || digit */
-#define ONIGENC_CTYPE_ASCII 14
-#define ONIGENC_MAX_STD_CTYPE ONIGENC_CTYPE_ASCII
-#define ONIGENC_CTYPE_SPECIAL_MASK 128
-#define ONIGENC_CTYPE_S /* [\t\n\v\f\r\s] */ \
- ONIGENC_CTYPE_SPECIAL_MASK | ONIGENC_CTYPE_SPACE
-#define ONIGENC_CTYPE_D /* [0-9] */ \
- ONIGENC_CTYPE_SPECIAL_MASK | ONIGENC_CTYPE_DIGIT
-#define ONIGENC_CTYPE_W /* [0-9A-Za-z_] */ \
- ONIGENC_CTYPE_SPECIAL_MASK | ONIGENC_CTYPE_WORD
-#define ONIGENC_CTYPE_SPECIAL_P(ctype) ((ctype) & ONIGENC_CTYPE_SPECIAL_MASK)
-
-
-#define onig_enc_len(enc,p,e) ONIGENC_MBC_ENC_LEN(enc, p, e)
-
-#define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF)
-#define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1)
-#define ONIGENC_IS_MBC_HEAD(enc,p,e) (ONIGENC_MBC_ENC_LEN(enc,p,e) != 1)
-#define ONIGENC_IS_MBC_ASCII(p) (*(p) < 128)
-#define ONIGENC_IS_CODE_ASCII(code) ((code) < 128)
-#define ONIGENC_IS_MBC_WORD(enc,s,end) \
- ONIGENC_IS_CODE_WORD(enc,ONIGENC_MBC_TO_CODE(enc,s,end))
-
-
-#define ONIGENC_NAME(enc) ((enc)->name)
-
-#define ONIGENC_MBC_CASE_FOLD(enc,flag,pp,end,buf) \
- (enc)->mbc_case_fold(flag,(const OnigUChar** )pp,end,buf,enc)
-#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
- (enc)->is_allowed_reverse_match(s,end,enc)
-#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s,end) \
- (enc)->left_adjust_char_head(start, s, end, enc)
-#define ONIGENC_APPLY_ALL_CASE_FOLD(enc,case_fold_flag,f,arg) \
- (enc)->apply_all_case_fold(case_fold_flag,f,arg,enc)
-#define ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc,case_fold_flag,p,end,acs) \
- (enc)->get_case_fold_codes_by_str(case_fold_flag,p,end,acs,enc)
-#define ONIGENC_STEP_BACK(enc,start,s,end,n) \
- onigenc_step_back((enc),(start),(s),(end),(n))
-
-#define ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(n) (n)
-#define ONIGENC_MBCLEN_CHARFOUND_P(r) (0 < (r))
-#define ONIGENC_MBCLEN_CHARFOUND_LEN(r) (r)
-
-#define ONIGENC_CONSTRUCT_MBCLEN_INVALID() (-1)
-#define ONIGENC_MBCLEN_INVALID_P(r) ((r) == -1)
-
-#define ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n) (-1-(n))
-#define ONIGENC_MBCLEN_NEEDMORE_P(r) ((r) < -1)
-#define ONIGENC_MBCLEN_NEEDMORE_LEN(r) (-1-(r))
-
-#define ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e) (enc)->precise_mbc_enc_len(p,e,enc)
-
-ONIG_EXTERN
-int onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc);
-
-#define ONIGENC_MBC_ENC_LEN(enc,p,e) onigenc_mbclen_approximate(p,e,enc)
-#define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len)
-#define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc)
-#define ONIGENC_MBC_MINLEN(enc) ((enc)->min_enc_len)
-#define ONIGENC_IS_MBC_NEWLINE(enc,p,end) (enc)->is_mbc_newline((p),(end),enc)
-#define ONIGENC_MBC_TO_CODE(enc,p,end) (enc)->mbc_to_code((p),(end),enc)
-#define ONIGENC_CODE_TO_MBCLEN(enc,code) (enc)->code_to_mbclen(code,enc)
-#define ONIGENC_CODE_TO_MBC(enc,code,buf) (enc)->code_to_mbc(code,buf,enc)
-#define ONIGENC_PROPERTY_NAME_TO_CTYPE(enc,p,end) \
- (enc)->property_name_to_ctype(enc,p,end)
-
-#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) (enc)->is_code_ctype(code,ctype,enc)
-
-#define ONIGENC_IS_CODE_NEWLINE(enc,code) \
- ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_NEWLINE)
-#define ONIGENC_IS_CODE_GRAPH(enc,code) \
- ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_GRAPH)
-#define ONIGENC_IS_CODE_PRINT(enc,code) \
- ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PRINT)
-#define ONIGENC_IS_CODE_ALNUM(enc,code) \
- ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALNUM)
-#define ONIGENC_IS_CODE_ALPHA(enc,code) \
- ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALPHA)
-#define ONIGENC_IS_CODE_LOWER(enc,code) \
- ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_LOWER)
-#define ONIGENC_IS_CODE_UPPER(enc,code) \
- ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_UPPER)
-#define ONIGENC_IS_CODE_CNTRL(enc,code) \
- ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_CNTRL)
-#define ONIGENC_IS_CODE_PUNCT(enc,code) \
- ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PUNCT)
-#define ONIGENC_IS_CODE_SPACE(enc,code) \
- ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_SPACE)
-#define ONIGENC_IS_CODE_BLANK(enc,code) \
- ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_BLANK)
-#define ONIGENC_IS_CODE_DIGIT(enc,code) \
- ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_DIGIT)
-#define ONIGENC_IS_CODE_XDIGIT(enc,code) \
- ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_XDIGIT)
-#define ONIGENC_IS_CODE_WORD(enc,code) \
- ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_WORD)
-
-#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbout,ranges) \
- (enc)->get_ctype_code_range(ctype,sbout,ranges,enc)
-
-ONIG_EXTERN
-OnigUChar* onigenc_step_back(OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end, int n);
-
-
-/* encoding API */
-ONIG_EXTERN
-int onigenc_init(void);
-ONIG_EXTERN
-int onigenc_set_default_encoding(OnigEncoding enc);
-ONIG_EXTERN
-OnigEncoding onigenc_get_default_encoding(void);
-ONIG_EXTERN
-void onigenc_set_default_caseconv_table(const OnigUChar* table);
-ONIG_EXTERN
-OnigUChar* onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end, const OnigUChar** prev);
-ONIG_EXTERN
-OnigUChar* onigenc_get_prev_char_head(OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end);
-ONIG_EXTERN
-OnigUChar* onigenc_get_left_adjust_char_head(OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end);
-ONIG_EXTERN
-OnigUChar* onigenc_get_right_adjust_char_head(OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end);
-ONIG_EXTERN
-int onigenc_strlen(OnigEncoding enc, const OnigUChar* p, const OnigUChar* end);
-ONIG_EXTERN
-int onigenc_strlen_null(OnigEncoding enc, const OnigUChar* p);
-ONIG_EXTERN
-int onigenc_str_bytelen_null(OnigEncoding enc, const OnigUChar* p);
-
-
-
-/* PART: regular expression */
-
-/* config parameters */
-#define ONIG_NREGION 10
-#define ONIG_MAX_BACKREF_NUM 1000
-#define ONIG_MAX_REPEAT_NUM 100000
-#define ONIG_MAX_MULTI_BYTE_RANGES_NUM 10000
-/* constants */
-#define ONIG_MAX_ERROR_MESSAGE_LEN 90
-
-typedef unsigned int OnigOptionType;
-
-#define ONIG_OPTION_DEFAULT ONIG_OPTION_NONE
-
-/* options */
-#define ONIG_OPTION_NONE 0U
-#define ONIG_OPTION_IGNORECASE 1U
-#define ONIG_OPTION_EXTEND (ONIG_OPTION_IGNORECASE << 1)
-#define ONIG_OPTION_MULTILINE (ONIG_OPTION_EXTEND << 1)
-#define ONIG_OPTION_SINGLELINE (ONIG_OPTION_MULTILINE << 1)
-#define ONIG_OPTION_FIND_LONGEST (ONIG_OPTION_SINGLELINE << 1)
-#define ONIG_OPTION_FIND_NOT_EMPTY (ONIG_OPTION_FIND_LONGEST << 1)
-#define ONIG_OPTION_NEGATE_SINGLELINE (ONIG_OPTION_FIND_NOT_EMPTY << 1)
-#define ONIG_OPTION_DONT_CAPTURE_GROUP (ONIG_OPTION_NEGATE_SINGLELINE << 1)
-#define ONIG_OPTION_CAPTURE_GROUP (ONIG_OPTION_DONT_CAPTURE_GROUP << 1)
-/* options (search time) */
-#define ONIG_OPTION_NOTBOL (ONIG_OPTION_CAPTURE_GROUP << 1)
-#define ONIG_OPTION_NOTEOL (ONIG_OPTION_NOTBOL << 1)
-#define ONIG_OPTION_POSIX_REGION (ONIG_OPTION_NOTEOL << 1)
-#define ONIG_OPTION_MAXBIT ONIG_OPTION_POSIX_REGION /* limit */
-
-#define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt))
-#define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt))
-#define ONIG_IS_OPTION_ON(options,option) ((options) & (option))
-
-/* syntax */
-typedef struct {
- unsigned int op;
- unsigned int op2;
- unsigned int behavior;
- OnigOptionType options; /* default option */
- OnigMetaCharTableType meta_char_table;
-} OnigSyntaxType;
-
-ONIG_EXTERN const OnigSyntaxType OnigSyntaxASIS;
-ONIG_EXTERN const OnigSyntaxType OnigSyntaxPosixBasic;
-ONIG_EXTERN const OnigSyntaxType OnigSyntaxPosixExtended;
-ONIG_EXTERN const OnigSyntaxType OnigSyntaxEmacs;
-ONIG_EXTERN const OnigSyntaxType OnigSyntaxGrep;
-ONIG_EXTERN const OnigSyntaxType OnigSyntaxGnuRegex;
-ONIG_EXTERN const OnigSyntaxType OnigSyntaxJava;
-ONIG_EXTERN const OnigSyntaxType OnigSyntaxPerl;
-ONIG_EXTERN const OnigSyntaxType OnigSyntaxPerl_NG;
-ONIG_EXTERN const OnigSyntaxType OnigSyntaxRuby;
-
-/* predefined syntaxes (see regsyntax.c) */
-#define ONIG_SYNTAX_ASIS (&OnigSyntaxASIS)
-#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic)
-#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended)
-#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs)
-#define ONIG_SYNTAX_GREP (&OnigSyntaxGrep)
-#define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex)
-#define ONIG_SYNTAX_JAVA (&OnigSyntaxJava)
-#define ONIG_SYNTAX_PERL (&OnigSyntaxPerl)
-#define ONIG_SYNTAX_PERL_NG (&OnigSyntaxPerl_NG)
-#define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby)
-
-/* default syntax */
-ONIG_EXTERN const OnigSyntaxType* OnigDefaultSyntax;
-#define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax
-
-/* syntax (operators) */
-#define ONIG_SYN_OP_VARIABLE_META_CHARACTERS (1U<<0)
-#define ONIG_SYN_OP_DOT_ANYCHAR (1U<<1) /* . */
-#define ONIG_SYN_OP_ASTERISK_ZERO_INF (1U<<2) /* * */
-#define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF (1U<<3)
-#define ONIG_SYN_OP_PLUS_ONE_INF (1U<<4) /* + */
-#define ONIG_SYN_OP_ESC_PLUS_ONE_INF (1U<<5)
-#define ONIG_SYN_OP_QMARK_ZERO_ONE (1U<<6) /* ? */
-#define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE (1U<<7)
-#define ONIG_SYN_OP_BRACE_INTERVAL (1U<<8) /* {lower,upper} */
-#define ONIG_SYN_OP_ESC_BRACE_INTERVAL (1U<<9) /* \{lower,upper\} */
-#define ONIG_SYN_OP_VBAR_ALT (1U<<10) /* | */
-#define ONIG_SYN_OP_ESC_VBAR_ALT (1U<<11) /* \| */
-#define ONIG_SYN_OP_LPAREN_SUBEXP (1U<<12) /* (...) */
-#define ONIG_SYN_OP_ESC_LPAREN_SUBEXP (1U<<13) /* \(...\) */
-#define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR (1U<<14) /* \A, \Z, \z */
-#define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR (1U<<15) /* \G */
-#define ONIG_SYN_OP_DECIMAL_BACKREF (1U<<16) /* \num */
-#define ONIG_SYN_OP_BRACKET_CC (1U<<17) /* [...] */
-#define ONIG_SYN_OP_ESC_W_WORD (1U<<18) /* \w, \W */
-#define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END (1U<<19) /* \<. \> */
-#define ONIG_SYN_OP_ESC_B_WORD_BOUND (1U<<20) /* \b, \B */
-#define ONIG_SYN_OP_ESC_S_WHITE_SPACE (1U<<21) /* \s, \S */
-#define ONIG_SYN_OP_ESC_D_DIGIT (1U<<22) /* \d, \D */
-#define ONIG_SYN_OP_LINE_ANCHOR (1U<<23) /* ^, $ */
-#define ONIG_SYN_OP_POSIX_BRACKET (1U<<24) /* [:xxxx:] */
-#define ONIG_SYN_OP_QMARK_NON_GREEDY (1U<<25) /* ??,*?,+?,{n,m}? */
-#define ONIG_SYN_OP_ESC_CONTROL_CHARS (1U<<26) /* \n,\r,\t,\a ... */
-#define ONIG_SYN_OP_ESC_C_CONTROL (1U<<27) /* \cx */
-#define ONIG_SYN_OP_ESC_OCTAL3 (1U<<28) /* \OOO */
-#define ONIG_SYN_OP_ESC_X_HEX2 (1U<<29) /* \xHH */
-#define ONIG_SYN_OP_ESC_X_BRACE_HEX8 (1U<<30) /* \x{7HHHHHHH} */
-
-#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE (1U<<0) /* \Q...\E */
-#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT (1U<<1) /* (?...) */
-#define ONIG_SYN_OP2_OPTION_PERL (1U<<2) /* (?imsx),(?-imsx) */
-#define ONIG_SYN_OP2_OPTION_RUBY (1U<<3) /* (?imx), (?-imx) */
-#define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT (1U<<4) /* ?+,*+,++ */
-#define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL (1U<<5) /* {n,m}+ */
-#define ONIG_SYN_OP2_CCLASS_SET_OP (1U<<6) /* [...&&..[..]..] */
-#define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP (1U<<7) /* (?<name>...) */
-#define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF (1U<<8) /* \k<name> */
-#define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL (1U<<9) /* \g<name>, \g<n> */
-#define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY (1U<<10) /* (?@..),(?@<x>..) */
-#define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL (1U<<11) /* \C-x */
-#define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META (1U<<12) /* \M-x */
-#define ONIG_SYN_OP2_ESC_V_VTAB (1U<<13) /* \v as VTAB */
-#define ONIG_SYN_OP2_ESC_U_HEX4 (1U<<14) /* \uHHHH */
-#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR (1U<<15) /* \`, \' */
-#define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY (1U<<16) /* \p{...}, \P{...} */
-#define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (1U<<17) /* \p{^..}, \P{^..} */
-/* #define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1U<<18) */
-#define ONIG_SYN_OP2_ESC_H_XDIGIT (1U<<19) /* \h, \H */
-#define ONIG_SYN_OP2_INEFFECTIVE_ESCAPE (1U<<20) /* \ */
-
-/* syntax (behavior) */
-#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1U<<31) /* not implemented */
-#define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS (1U<<0) /* ?, *, +, {n,m} */
-#define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS (1U<<1) /* error or ignore */
-#define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP (1U<<2) /* ...)... */
-#define ONIG_SYN_ALLOW_INVALID_INTERVAL (1U<<3) /* {??? */
-#define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV (1U<<4) /* {,n} => {0,n} */
-#define ONIG_SYN_STRICT_CHECK_BACKREF (1U<<5) /* /(\1)/,/\1()/ ..*/
-#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1U<<6) /* (?<=a|bc) */
-#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (1U<<7) /* see doc/RE */
-#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1U<<8) /* (?<x>)(?<x>) */
-#define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY (1U<<9) /* a{n}?=(?:a{n})? */
-
-/* syntax (behavior) in char class [...] */
-#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1U<<20) /* [^...] */
-#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (1U<<21) /* [..\w..] etc.. */
-#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1U<<22)
-#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (1U<<23) /* [0-9-a]=[0-9\-a] */
-/* syntax (behavior) warning */
-#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1U<<24) /* [,-,] */
-#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1U<<25) /* (?:a*)+ */
-#define ONIG_SYN_WARN_CC_DUP (1U<<26) /* [aa] */
-
-/* meta character specifiers (onig_set_meta_char()) */
-#define ONIG_META_CHAR_ESCAPE 0
-#define ONIG_META_CHAR_ANYCHAR 1
-#define ONIG_META_CHAR_ANYTIME 2
-#define ONIG_META_CHAR_ZERO_OR_ONE_TIME 3
-#define ONIG_META_CHAR_ONE_OR_MORE_TIME 4
-#define ONIG_META_CHAR_ANYCHAR_ANYTIME 5
-
-#define ONIG_INEFFECTIVE_META_CHAR 0
-
-/* error codes */
-#define ONIG_IS_PATTERN_ERROR(ecode) ((ecode) <= -100 && (ecode) > -1000)
-/* normal return */
-#define ONIG_NORMAL 0
-#define ONIG_MISMATCH -1
-#define ONIG_NO_SUPPORT_CONFIG -2
-
-/* internal error */
-#define ONIGERR_MEMORY -5
-#define ONIGERR_TYPE_BUG -6
-#define ONIGERR_PARSER_BUG -11
-#define ONIGERR_STACK_BUG -12
-#define ONIGERR_UNDEFINED_BYTECODE -13
-#define ONIGERR_UNEXPECTED_BYTECODE -14
-#define ONIGERR_MATCH_STACK_LIMIT_OVER -15
-#define ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED -21
-#define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22
-/* general error */
-#define ONIGERR_INVALID_ARGUMENT -30
-/* syntax error */
-#define ONIGERR_END_PATTERN_AT_LEFT_BRACE -100
-#define ONIGERR_END_PATTERN_AT_LEFT_BRACKET -101
-#define ONIGERR_EMPTY_CHAR_CLASS -102
-#define ONIGERR_PREMATURE_END_OF_CHAR_CLASS -103
-#define ONIGERR_END_PATTERN_AT_ESCAPE -104
-#define ONIGERR_END_PATTERN_AT_META -105
-#define ONIGERR_END_PATTERN_AT_CONTROL -106
-#define ONIGERR_META_CODE_SYNTAX -108
-#define ONIGERR_CONTROL_CODE_SYNTAX -109
-#define ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE -110
-#define ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE -111
-#define ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS -112
-#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED -113
-#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID -114
-#define ONIGERR_NESTED_REPEAT_OPERATOR -115
-#define ONIGERR_UNMATCHED_CLOSE_PARENTHESIS -116
-#define ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS -117
-#define ONIGERR_END_PATTERN_IN_GROUP -118
-#define ONIGERR_UNDEFINED_GROUP_OPTION -119
-#define ONIGERR_INVALID_POSIX_BRACKET_TYPE -121
-#define ONIGERR_INVALID_LOOK_BEHIND_PATTERN -122
-#define ONIGERR_INVALID_REPEAT_RANGE_PATTERN -123
-/* values error (syntax error) */
-#define ONIGERR_TOO_BIG_NUMBER -200
-#define ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE -201
-#define ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE -202
-#define ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS -203
-#define ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE -204
-#define ONIGERR_TOO_MANY_MULTI_BYTE_RANGES -205
-#define ONIGERR_TOO_SHORT_MULTI_BYTE_STRING -206
-#define ONIGERR_TOO_BIG_BACKREF_NUMBER -207
-#define ONIGERR_INVALID_BACKREF -208
-#define ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED -209
-#define ONIGERR_TOO_LONG_WIDE_CHAR_VALUE -212
-#define ONIGERR_EMPTY_GROUP_NAME -214
-#define ONIGERR_INVALID_GROUP_NAME -215
-#define ONIGERR_INVALID_CHAR_IN_GROUP_NAME -216
-#define ONIGERR_UNDEFINED_NAME_REFERENCE -217
-#define ONIGERR_UNDEFINED_GROUP_REFERENCE -218
-#define ONIGERR_MULTIPLEX_DEFINED_NAME -219
-#define ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL -220
-#define ONIGERR_NEVER_ENDING_RECURSION -221
-#define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY -222
-#define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223
-#define ONIGERR_INVALID_CODE_POINT_VALUE -400
-#define ONIGERR_INVALID_WIDE_CHAR_VALUE -400
-#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401
-#define ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION -402
-#define ONIGERR_INVALID_COMBINATION_OF_OPTIONS -403
-
-/* errors related to thread */
-#define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001
-
-
-/* must be smaller than BIT_STATUS_BITS_NUM (unsigned int * 8) */
-#define ONIG_MAX_CAPTURE_HISTORY_GROUP 31
-#define ONIG_IS_CAPTURE_HISTORY_GROUP(r, i) \
- ((i) <= ONIG_MAX_CAPTURE_HISTORY_GROUP && (r)->list && (r)->list[i])
-
-typedef struct OnigCaptureTreeNodeStruct {
- int group; /* group number */
- int beg;
- int end;
- int allocated;
- int num_childs;
- struct OnigCaptureTreeNodeStruct** childs;
-} OnigCaptureTreeNode;
-
-/* match result region type */
-struct re_registers {
- int allocated;
- int num_regs;
- int* beg;
- int* end;
- /* extended */
- OnigCaptureTreeNode* history_root; /* capture history tree root */
-};
-
-/* capture tree traverse */
-#define ONIG_TRAVERSE_CALLBACK_AT_FIRST 1
-#define ONIG_TRAVERSE_CALLBACK_AT_LAST 2
-#define ONIG_TRAVERSE_CALLBACK_AT_BOTH \
- ( ONIG_TRAVERSE_CALLBACK_AT_FIRST | ONIG_TRAVERSE_CALLBACK_AT_LAST )
-
-
-#define ONIG_REGION_NOTPOS -1
-
-typedef struct re_registers OnigRegion;
-
-typedef struct {
- OnigEncoding enc;
- OnigUChar* par;
- OnigUChar* par_end;
-} OnigErrorInfo;
-
-typedef struct {
- int lower;
- int upper;
-} OnigRepeatRange;
-
-typedef void (*OnigWarnFunc)(const char* s);
-extern void onig_null_warn(const char* s);
-#define ONIG_NULL_WARN onig_null_warn
-
-#define ONIG_CHAR_TABLE_SIZE 256
-
-/* regex_t state */
-#define ONIG_STATE_NORMAL 0
-#define ONIG_STATE_SEARCHING 1
-#define ONIG_STATE_COMPILING -1
-#define ONIG_STATE_MODIFY -2
-
-#define ONIG_STATE(reg) \
- ((reg)->state > 0 ? ONIG_STATE_SEARCHING : (reg)->state)
-
-typedef struct re_pattern_buffer {
- /* common members of BBuf(bytes-buffer) */
- unsigned char* p; /* compiled pattern */
- unsigned int used; /* used space for p */
- unsigned int alloc; /* allocated space for p */
-
- int state; /* normal, searching, compiling */
- int num_mem; /* used memory(...) num counted from 1 */
- int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
- int num_null_check; /* OP_NULL_CHECK_START/END id counter */
- int num_comb_exp_check; /* combination explosion check */
- int num_call; /* number of subexp call */
- unsigned int capture_history; /* (?@...) flag (1-31) */
- unsigned int bt_mem_start; /* need backtrack flag */
- unsigned int bt_mem_end; /* need backtrack flag */
- int stack_pop_level;
- int repeat_range_alloc;
- OnigRepeatRange* repeat_range;
-
- OnigEncoding enc;
- OnigOptionType options;
- const OnigSyntaxType* syntax;
- OnigCaseFoldType case_fold_flag;
- void* name_table;
-
- /* optimization info (string search, char-map and anchors) */
- int optimize; /* optimize flag */
- int threshold_len; /* search str-length for apply optimize */
- int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
- OnigDistance anchor_dmin; /* (SEMI_)END_BUF anchor distance */
- OnigDistance anchor_dmax; /* (SEMI_)END_BUF anchor distance */
- int sub_anchor; /* start-anchor for exact or map */
- unsigned char *exact;
- unsigned char *exact_end;
- unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */
- int *int_map; /* BM skip for exact_len > 255 */
- int *int_map_backward; /* BM skip for backward search */
- OnigDistance dmin; /* min-distance of exact or map */
- OnigDistance dmax; /* max-distance of exact or map */
-
- /* regex_t link chain */
- struct re_pattern_buffer* chain; /* escape compile-conflict */
-} OnigRegexType;
-
-typedef OnigRegexType* OnigRegex;
-
-#ifndef ONIG_ESCAPE_REGEX_T_COLLISION
- typedef OnigRegexType regex_t;
-#endif
-
-
-typedef struct {
- int num_of_elements;
- OnigEncoding pattern_enc;
- OnigEncoding target_enc;
- OnigSyntaxType* syntax;
- OnigOptionType option;
- OnigCaseFoldType case_fold_flag;
-} OnigCompileInfo;
-
-/* Oniguruma Native API */
-ONIG_EXTERN
-int onig_init(void);
-ONIG_EXTERN
-int onig_error_code_to_str(OnigUChar* s, int err_code, ...);
-ONIG_EXTERN
-void onig_set_warn_func(OnigWarnFunc f);
-ONIG_EXTERN
-void onig_set_verb_warn_func(OnigWarnFunc f);
-ONIG_EXTERN
-int onig_new(OnigRegex*, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax, OnigErrorInfo* einfo);
-ONIG_EXTERN
-int onig_reg_init(regex_t* reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, const OnigSyntaxType* syntax);
-ONIG_EXTERN
-int onig_new_without_alloc(OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo);
-ONIG_EXTERN
-int onig_new_deluxe(OnigRegex* reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo);
-ONIG_EXTERN
-void onig_free(OnigRegex);
-ONIG_EXTERN
-void onig_free_body(OnigRegex);
-ONIG_EXTERN
-int onig_recompile(OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo);
-ONIG_EXTERN
-int onig_recompile_deluxe(OnigRegex reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo);
-ONIG_EXTERN
-long onig_search(OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option);
-ONIG_EXTERN
-long onig_match(OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option);
-ONIG_EXTERN
-OnigRegion* onig_region_new(void);
-ONIG_EXTERN
-void onig_region_init(OnigRegion* region);
-ONIG_EXTERN
-void onig_region_free(OnigRegion* region, int free_self);
-ONIG_EXTERN
-void onig_region_copy(OnigRegion* to, OnigRegion* from);
-ONIG_EXTERN
-void onig_region_clear(OnigRegion* region);
-ONIG_EXTERN
-int onig_region_resize(OnigRegion* region, int n);
-ONIG_EXTERN
-int onig_region_set(OnigRegion* region, int at, int beg, int end);
-ONIG_EXTERN
-int onig_name_to_group_numbers(OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, int** nums);
-ONIG_EXTERN
-int onig_name_to_backref_number(OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, OnigRegion *region);
-ONIG_EXTERN
-int onig_foreach_name(OnigRegex reg, int (*func)(const OnigUChar*, const OnigUChar*,int,int*,OnigRegex,void*), void* arg);
-ONIG_EXTERN
-int onig_number_of_names(OnigRegex reg);
-ONIG_EXTERN
-int onig_number_of_captures(OnigRegex reg);
-ONIG_EXTERN
-int onig_number_of_capture_histories(OnigRegex reg);
-ONIG_EXTERN
-OnigCaptureTreeNode* onig_get_capture_tree(OnigRegion* region);
-ONIG_EXTERN
-int onig_capture_tree_traverse(OnigRegion* region, int at, int(*callback_func)(int,int,int,int,int,void*), void* arg);
-ONIG_EXTERN
-int onig_noname_group_capture_is_active(OnigRegex reg);
-ONIG_EXTERN
-OnigEncoding onig_get_encoding(OnigRegex reg);
-ONIG_EXTERN
-OnigOptionType onig_get_options(OnigRegex reg);
-ONIG_EXTERN
-OnigCaseFoldType onig_get_case_fold_flag(OnigRegex reg);
-ONIG_EXTERN
-const OnigSyntaxType* onig_get_syntax(OnigRegex reg);
-ONIG_EXTERN
-int onig_set_default_syntax(const OnigSyntaxType* syntax);
-ONIG_EXTERN
-void onig_copy_syntax(OnigSyntaxType* to, const OnigSyntaxType* from);
-ONIG_EXTERN
-unsigned int onig_get_syntax_op(OnigSyntaxType* syntax);
-ONIG_EXTERN
-unsigned int onig_get_syntax_op2(OnigSyntaxType* syntax);
-ONIG_EXTERN
-unsigned int onig_get_syntax_behavior(OnigSyntaxType* syntax);
-ONIG_EXTERN
-OnigOptionType onig_get_syntax_options(OnigSyntaxType* syntax);
-ONIG_EXTERN
-void onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op);
-ONIG_EXTERN
-void onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2);
-ONIG_EXTERN
-void onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior);
-ONIG_EXTERN
-void onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options);
-ONIG_EXTERN
-int onig_set_meta_char(OnigSyntaxType* syntax, unsigned int what, OnigCodePoint code);
-ONIG_EXTERN
-void onig_copy_encoding(OnigEncoding to, OnigEncoding from);
-ONIG_EXTERN
-OnigCaseFoldType onig_get_default_case_fold_flag(void);
-ONIG_EXTERN
-int onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag);
-ONIG_EXTERN
-unsigned int onig_get_match_stack_limit_size(void);
-ONIG_EXTERN
-int onig_set_match_stack_limit_size(unsigned int size);
-ONIG_EXTERN
-int onig_end(void);
-ONIG_EXTERN
-const char* onig_version(void);
-ONIG_EXTERN
-const char* onig_copyright(void);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* ONIGURUMA_H */
diff --git a/src/parse.y b/src/parse.y
index 91fb1a8e8..9af8682c1 100644
--- a/src/parse.y
+++ b/src/parse.y
@@ -708,6 +708,13 @@ new_dsym(parser_state *p, node *a)
return cons((node*)NODE_DSYM, new_dstr(p, a));
}
+// (:str . (a . a))
+static node*
+new_regx(parser_state *p, const char *p1, const char* p2)
+{
+ return cons((node*)NODE_REGX, cons((node*)p1, (node*)p2));
+}
+
// (:backref . n)
static node*
new_back_ref(parser_state *p, int n)
@@ -743,13 +750,14 @@ call_bin_op(parser_state *p, node *recv, char *m, node *arg1)
return new_call(p, recv, intern(m), list1(list1(arg1)));
}
+/*
// (:match (a . b))
static node*
match_op(parser_state *p, node *a, node *b)
{
return cons((node*)NODE_MATCH, cons((node*)a, (node*)b));
}
-
+*/
static void
args_with_block(parser_state *p, node *a, node *b)
@@ -1679,7 +1687,7 @@ arg : lhs '=' arg
}
| arg tMATCH arg
{
- $$ = match_op(p, $1, $3);
+ $$ = call_bin_op(p, $1, "=~", $3);
#if 0
if (nd_type($1) == NODE_LIT && TYPE($1->nd_lit) == T_REGEXP) {
$$ = reg_named_capture_assign($1->nd_lit, $$);
@@ -2498,7 +2506,10 @@ string_interp : tSTRING_PART
}
;
-regexp : tREGEXP
+regexp : tREGEXP_BEG tREGEXP
+ {
+ $$ = $2;
+ }
;
symbol : basic_symbol
@@ -3335,9 +3346,17 @@ read_escape(parser_state *p)
return c;
case 'b': /* backspace */
+ if (p->regexp) {
+ tokadd(p, '\\');
+ return 'b';
+ }
return '\010';
case 's': /* space */
+ if (p->regexp) {
+ tokadd(p, '\\');
+ return 's';
+ }
return ' ';
case 'M':
@@ -3375,6 +3394,9 @@ read_escape(parser_state *p)
return '\0';
default:
+ if (p->regexp) {
+ tokadd(p, '\\');
+ }
return c;
}
}
@@ -3385,7 +3407,6 @@ parse_string(parser_state *p, int term)
int c;
newtok(p);
-
while ((c = nextc(p)) != term) {
if (c == -1) {
yyerror(p, "unterminated string meets end of file");
@@ -3422,6 +3443,40 @@ parse_string(parser_state *p, int term)
tokfix(p);
p->lstate = EXPR_END;
p->sterm = 0;
+
+ if (p->regexp) {
+ int f = 0;
+ int c;
+ char* s;
+ s = strndup(tok(p), toklen(p));
+ newtok(p);
+ while (c = nextc(p), ISALPHA(c)) {
+ switch (c) {
+ case 'i': f |= 1; break;
+ case 'x': f |= 2; break;
+ case 'm': f |= 4; break;
+ default: tokadd(p, c); break;
+ }
+ }
+ pushback(p, c);
+ if (toklen(p)) {
+ char msg[128];
+ free(s);
+ tokfix(p);
+ snprintf(msg, sizeof(msg), "unknown regexp option %s - %s",
+ toklen(p) > 1 ? "s" : "", tok(p));
+ yyerror(p, msg);
+ }
+ char flag[4] = {0};
+ if (f & 1) strcat(flag, "i");
+ if (f & 2) strcat(flag, "x");
+ if (f & 4) strcat(flag, "m");
+ yylval.nd = new_regx(p, s, strdup(flag));
+ p->regexp = 0;
+
+ return tREGEXP;
+ }
+
yylval.nd = new_str(p, tok(p), toklen(p));
return tSTRING;
}
@@ -4186,6 +4241,8 @@ parser_yylex(parser_state *p)
#if 0
p->lex_strterm = new_strterm(p, str_regexp, '/', 0);
#endif
+ p->regexp = 1;
+ p->sterm = '/';
return tREGEXP_BEG;
}
if ((c = nextc(p)) == '=') {
@@ -4199,6 +4256,8 @@ parser_yylex(parser_state *p)
#if 0
p->lex_strterm = new_strterm(p, str_regexp, '/', 0);
#endif
+ p->regexp = 1;
+ p->sterm = '/';
return tREGEXP_BEG;
}
if (p->lstate == EXPR_FNAME || p->lstate == EXPR_DOT) {
@@ -4381,6 +4440,8 @@ parser_yylex(parser_state *p)
#if 0
p->lex_strterm = new_strterm(p, str_regexp, term, paren);
#endif
+ p->regexp = 1;
+ p->sterm = '/';
return tREGEXP_BEG;
case 's':
@@ -5389,6 +5450,16 @@ parser_dump(mrb_state *mrb, node *tree, int offset)
printf("NODE_CONST %s\n", mrb_sym2name(mrb, sym(tree)));
break;
+ case NODE_MATCH:
+ printf("NODE_MATCH:\n");
+ dump_prefix(offset + 1);
+ printf("lhs:\n");
+ parser_dump(mrb, tree->car, offset + 2);
+ dump_prefix(offset + 1);
+ printf("rhs:\n");
+ parser_dump(mrb, tree->cdr, offset + 2);
+ break;
+
case NODE_BACK_REF:
printf("NODE_BACK_REF: $%c\n", (int)(intptr_t)tree);
break;
@@ -5428,6 +5499,10 @@ parser_dump(mrb_state *mrb, node *tree, int offset)
dump_recur(mrb, tree, offset+1);
break;
+ case NODE_REGX:
+ printf("NODE_REGX /%s/\n", (char*)tree->car->cdr->car);
+ break;
+
case NODE_SYM:
printf("NODE_SYM :%s\n", mrb_sym2name(mrb, sym(tree)));
break;
diff --git a/src/re.c b/src/re.c
index 091c35f61..744732e58 100644
--- a/src/re.c
+++ b/src/re.c
@@ -7,1971 +7,10 @@
#include "mruby.h"
#include <string.h>
#include "mruby/string.h"
-#include "encoding.h"
#include "re.h"
#include "mruby/array.h"
-#include "regint.h"
#include "mruby/class.h"
#include "error.h"
-#ifdef ENABLE_REGEXP
-
-#define REGEX_CLASS (mrb_class_obj_get(mrb, "Regexp"))
-#define MATCH_CLASS (mrb_class_obj_get(mrb, "MatchData"))
-
-//from opcode.h
-#define GETARG_A(i) ((((mrb_code)(i)) >> 24) & 0xff)
-#define GETARG_B(i) ((((mrb_code)(i)) >> 16) & 0xff)
-#define GETARG_C(i) ((((mrb_code)(i)) >> 8) & 0xff)
-#define MKARG_A(c) (((c) & 0xff) << 24)
-#define MKARG_B(c) (((c) & 0xff) << 16)
-#define MKARG_C(c) (((c) & 0xff) << 8)
-
-#define ARG_REG_OPTION_MASK \
- (ONIG_OPTION_IGNORECASE|ONIG_OPTION_MULTILINE|ONIG_OPTION_EXTEND)
-#define ARG_ENCODING_FIXED 16
-#define ARG_ENCODING_NONE 32
-#define REG_LITERAL FL_USER5
-#define REG_ENCODING_NONE FL_USER6
-typedef char onig_errmsg_buffer[ONIG_MAX_ERROR_MESSAGE_LEN];
-#define mrb_bug printf
-#define KCODE_FIXED FL_USER4
-#define scan_oct(s,l,e) (int)ruby_scan_oct(s,l,e)
-unsigned long ruby_scan_oct(const char*, size_t, size_t*);
-#define scan_hex(s,l,e) (int)ruby_scan_hex(s,l,e)
-unsigned long ruby_scan_hex(const char*, size_t, size_t*);
-
-static mrb_value mrb_match_to_a(mrb_state *mrb, mrb_value match);
-static mrb_value mrb_reg_preprocess(mrb_state *mrb, const char *p, const char *end, onig_errmsg_buffer err);
-static void mrb_reg_expr_str(mrb_state *mrb, mrb_value str, const char *s, long len);
-static char * option_to_str(char str[4], int options);
-
-//static int may_need_recompile;
-//static int reg_kcode = DEFAULT_KCODE;
-/* ------------------------------------------------------------------------- */
-/* RegExp Class */
-/* ------------------------------------------------------------------------- */
-/* 15.2.15.6.1 */
-/*
- * call-seq:
- * class.new(args, ...) -> obj
- *
- * Calls <code>allocate</code> to create a new object of
- * <i>class</i>'s class, then invokes that object's
- * <code>initialize</code> method, passing it <i>args</i>.
- * This is the method that ends up getting called whenever
- * an object is constructed using .new.
- *
- */
-mrb_value
-mrb_reg_s_new_instance(mrb_state *mrb, /*int argc, mrb_value *argv, */mrb_value self)
-{
- mrb_value argv[16];
- int argc;
- struct RRegexp *re;
-
- mrb_get_args(mrb, "*", &argv, &argc);
- re = (struct RRegexp*)mrb_obj_alloc(mrb, MRB_TT_REGEX, REGEX_CLASS);
- re->ptr = 0;
- re->src = 0;
- re->usecnt = 0;
- return mrb_funcall_argv(mrb, mrb_obj_value(re), mrb->init_sym, argc, argv);
-}
-
-mrb_value
-mrb_reg_quote(mrb_state *mrb, mrb_value str)
-{
- char *s, *send, *t;
- mrb_value tmp;
- int c;
-
- s = RSTRING_PTR(str);
- send = s + RSTRING_LEN(str);
- while (s < send) {
- c = *s;
- if (c == -1) {
- s += send - s;
- continue;
- }
- switch (c) {
- case '[': case ']': case '{': case '}':
- case '(': case ')': case '|': case '-':
- case '*': case '.': case '\\':
- case '?': case '+': case '^': case '$':
- case ' ': case '#':
- case '\t': case '\f': case '\n': case '\r':
- goto meta_found;
- }
- s++;
- }
- tmp = mrb_str_new(mrb, RSTRING_PTR(str), RSTRING_LEN(str));
- return tmp;
-
-meta_found:
- tmp = mrb_str_new(mrb, 0, RSTRING_LEN(str)*2);
- t = RSTRING_PTR(tmp);
- /* copy upto metacharacter */
- memcpy(t, RSTRING_PTR(str), s - RSTRING_PTR(str));
- t += s - RSTRING_PTR(str);
-
- while (s < send) {
- c = *s;
- if (c == -1) {
- int n = send - s;
-
- while (n--)
- *t++ = *s++;
- continue;
- }
- s++;
- switch (c) {
- case '[': case ']': case '{': case '}':
- case '(': case ')': case '|': case '-':
- case '*': case '.': case '\\':
- case '?': case '+': case '^': case '$':
- case '#':
- t += mrb_enc_mbcput('\\', t, enc);
- break;
- case ' ':
- t += mrb_enc_mbcput('\\', t, enc);
- t += mrb_enc_mbcput(' ', t, enc);
- continue;
- case '\t':
- t += mrb_enc_mbcput('\\', t, enc);
- t += mrb_enc_mbcput('t', t, enc);
- continue;
- case '\n':
- t += mrb_enc_mbcput('\\', t, enc);
- t += mrb_enc_mbcput('n', t, enc);
- continue;
- case '\r':
- t += mrb_enc_mbcput('\\', t, enc);
- t += mrb_enc_mbcput('r', t, enc);
- continue;
- case '\f':
- t += mrb_enc_mbcput('\\', t, enc);
- t += mrb_enc_mbcput('f', t, enc);
- continue;
- case '\v':
- t += mrb_enc_mbcput('\\', t, enc);
- t += mrb_enc_mbcput('v', t, enc);
- continue;
- }
- t += mrb_enc_mbcput(c, t, enc);
- }
- mrb_str_resize(mrb, tmp, t - RSTRING_PTR(tmp));
-
- return tmp;
-}
-
-static mrb_value
-reg_operand(mrb_state *mrb, mrb_value s, int check)
-{
- if (mrb_type(s) == MRB_TT_SYMBOL) {
- //return mrb_sym_to_s(s);
- return mrb_obj_inspect(mrb, s);
- }
- else {
- mrb_value tmp = mrb_check_string_type(mrb, s);
- if (check && mrb_nil_p(tmp)) {
- mrb_raise(mrb, E_TYPE_ERROR, "can't convert %s to String",
- mrb_obj_classname(mrb, s));
- }
- return tmp;
- }
-}
-/* 15.2.15.6.2 */
-/* 15.2.15.6.4 */
-/*
- * call-seq:
- * Regexp.escape(str) -> string
- * Regexp.quote(str) -> string
- *
- * Escapes any characters that would have special meaning in a regular
- * expression. Returns a new escaped string, or self if no characters are
- * escaped. For any string,
- * <code>Regexp.new(Regexp.escape(<i>str</i>))=~<i>str</i></code> will be true.
- *
- * Regexp.escape('\*?{}.') #=> \\\*\?\{\}\.
- *
- */
-
-static mrb_value
-mrb_reg_s_quote(mrb_state *mrb, mrb_value c/*, mrb_value str*/)
-{
- mrb_value str;
-
- mrb_get_args(mrb, "o", &str);
- return mrb_reg_quote(mrb, reg_operand(mrb, str, 1/*TRUE*/));
-}
-
-static void
-match_check(mrb_state *mrb, mrb_value match)
-{
- struct RMatch *m = mrb_match_ptr(match);
- if (!m->str) {
- mrb_raise(mrb, E_TYPE_ERROR, "uninitialized Match");
- }
-}
-
-mrb_value
-mrb_reg_nth_match(mrb_state *mrb, mrb_int nth, mrb_value match)
-{
- mrb_value str;
- long start, end, len;
- struct RMatch *m = mrb_match_ptr(match);
-
- if (mrb_nil_p(match)) return mrb_nil_value();
- match_check(mrb, match);
- if (nth >= m->rmatch->regs.num_regs) {
- return mrb_nil_value();
- }
- if (nth < 0) {
- nth += m->rmatch->regs.num_regs;
- if (nth <= 0) return mrb_nil_value();
- }
- start = m->rmatch->regs.beg[nth];
- if (start == -1) return mrb_nil_value();
- end = m->rmatch->regs.end[nth];
- len = end - start;
- str = mrb_str_subseq(mrb, mrb_obj_value(m->str), start, len);
-
- return str;
-}
-
-mrb_value
-mrb_reg_last_match(mrb_state *mrb, mrb_value match)
-{
- return mrb_reg_nth_match(mrb, 0, match);
-}
-
-
-static int
-match_backref_number(mrb_state *mrb, mrb_value match, mrb_value backref)
-{
- const char *name;
- int num;
-
- struct re_registers *regs = RMATCH_REGS(match);
- struct RRegexp *regexp = RMATCH(match)->regexp;
-
- match_check(mrb, match);
- switch(mrb_type(backref)) {
- default:
- return mrb_fixnum(backref);
-
- case MRB_TT_SYMBOL:
- name = mrb_sym2name(mrb, mrb_symbol(backref));
- break;
-
- case MRB_TT_STRING:
- //name = StringValueCStr(backref);
- name = mrb_string_value_cstr(mrb, &backref);
- break;
- }
- num = onig_name_to_backref_number(regexp->ptr,
- (const unsigned char*)name,
- (const unsigned char*)name + strlen(name),
- regs);
- if (num < 1) {
- mrb_raise(mrb, E_INDEX_ERROR, "undefined group name reference: %s", name);
- }
-
- return num;
-}
-/* 15.2.15.6.3 */
-/*
- * call-seq:
- * Regexp.last_match -> matchdata
- * Regexp.last_match(n) -> str
- *
- * The first form returns the <code>MatchData</code> object generated by the
- * last successful pattern match. Equivalent to reading the global variable
- * <code>$~</code>. The second form returns the <i>n</i>th field in this
- * <code>MatchData</code> object.
- * <em>n</em> can be a string or symbol to reference a named capture.
- *
- * Note that the <code>last_match</code> is local to the thread and method scope
- * of the method that did the pattern match.
- *
- * /c(.)t/ =~ 'cat' #=> 0
- * Regexp.last_match #=> #<MatchData "cat" 1:"a">
- * Regexp.last_match(0) #=> "cat"
- * Regexp.last_match(1) #=> "a"
- * Regexp.last_match(2) #=> nil
- *
- * /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ "var = val"
- * Regexp.last_match #=> #<MatchData "var = val" lhs:"var" rhs:"val">
- * Regexp.last_match(:lhs) #=> "var"
- * Regexp.last_match(:rhs) #=> "val"
- */
-static mrb_value
-mrb_reg_s_last_match(mrb_state *mrb, mrb_value self/*int argc, mrb_value *argv*/)
-{
- //mrb_value nth;
- mrb_value argv[16];
- int argc;
- mrb_value match = mrb_backref_get(mrb);
-
- //if (argc > 0 && mrb_scan_args(argc, argv, "01", &nth) == 1) {
- mrb_get_args(mrb, "*", &argv, &argc);
- if (argc != 0) {
- int n;
- if (mrb_nil_p(match)) return mrb_nil_value();
- n = match_backref_number(mrb, match, argv[0]);
- return mrb_reg_nth_match(mrb, n, match);
- }
- return match;//match_getter();
-}
-
-static void
-mrb_reg_check(mrb_state *mrb, mrb_value re)
-{
- //struct RRegexp *r = mrb_regex_ptr(re);
-
- //if (!(RREGEXP(re)->ptr) || !RREGEXP_SRC(re) || !RREGEXP_SRC_PTR(re)) {
- if (!(RREGEXP(re)->ptr)) {
- mrb_raise(mrb, E_TYPE_ERROR, "uninitialized Regexp");
- }
- if (RREGEXP(re)->src == 0) {
- mrb_raise(mrb, E_TYPE_ERROR, "uninitialized Regexp");
- }
-}
-
-int
-mrb_reg_options(mrb_state *mrb, mrb_value re)
-{
- int options;
-
- mrb_reg_check(mrb, re);
- options = RREGEXP(re)->ptr->options & ARG_REG_OPTION_MASK;
- if (mrb_basic(re)->flags & KCODE_FIXED) options |= ARG_ENCODING_FIXED;
- if (mrb_basic(re)->flags & REG_ENCODING_NONE) options |= ARG_ENCODING_NONE;
- return options;
-}
-
-static mrb_value
-mrb_reg_desc(mrb_state *mrb, const char *s, long len, mrb_value re)
-{
- mrb_value str = mrb_str_new(mrb, "/", 1);
-
- mrb_reg_expr_str(mrb, str, s, len);
- mrb_str_buf_cat(mrb, str, "/", 1);
- if (re.tt) {
- char opts[4];
- mrb_reg_check(mrb, re);
- if (*option_to_str(opts, RREGEXP(re)->ptr->options))
- mrb_str_buf_cat(mrb, str, opts, strlen(opts));//mrb_str_buf_cat2(str, opts);
- if (mrb_basic(re)->flags & REG_ENCODING_NONE)
- mrb_str_buf_cat(mrb, str, "n", 1);
- }
-
- return str;
-}
-static void
-mrb_reg_raise(mrb_state *mrb, const char *s, long len, const char *err, mrb_value re)
-{
- mrb_value desc = mrb_reg_desc(mrb, s, len, re);
-
- mrb_raise(mrb, E_REGEXP_ERROR, "%s: %s", err, RSTRING_PTR(desc));
-}
-
-regex_t *
-mrb_reg_prepare_re(mrb_state *mrb, mrb_value re, mrb_value str)
-{
- regex_t *reg = RREGEXP(re)->ptr;
- onig_errmsg_buffer err = "";
- int r;
- OnigErrorInfo einfo;
- const char *pattern;
- mrb_value unescaped;
- mrb_encoding *enc = mrb_ascii8bit_encoding(mrb);
-
- mrb_reg_check(mrb, re);
- reg = RREGEXP(re)->ptr;
- pattern = RREGEXP_SRC_PTR(re);
-
- unescaped = mrb_reg_preprocess(mrb,
- pattern, pattern + RREGEXP(re)->src->len, err);
-
- if (mrb_nil_p(unescaped)) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "regexp preprocess failed: %s", err);
- }
-
- r = onig_new(&reg, (UChar* )RSTRING_PTR(unescaped),
- (UChar* )(RSTRING_PTR(unescaped) + RSTRING_LEN(unescaped)),
- reg->options, enc,
- OnigDefaultSyntax, &einfo);
- if (r) {
- onig_error_code_to_str((UChar*)err, r, &einfo);
- mrb_reg_raise(mrb, pattern, RREGEXP_SRC_LEN(re), err, re);
- }
-
- //RB_GC_GUARD(unescaped);
- return reg;
-}
-
-
-mrb_int
-mrb_reg_search(mrb_state *mrb, mrb_value re, mrb_value str, mrb_int pos, mrb_int reverse)
-{
- long result;
- mrb_value match;
- struct re_registers regi, *regs = &regi;
- char *range = RSTRING_PTR(str);
- regex_t *reg;
- int tmpreg;
-
- if (pos > RSTRING_LEN(str) || pos < 0) {
- mrb_backref_set(mrb, mrb_nil_value());
- return -1;
- }
-
- reg = mrb_reg_prepare_re(mrb, re, str);
- tmpreg = reg != RREGEXP(re)->ptr;
- if (!tmpreg) RREGEXP(re)->usecnt++;
-
- match = mrb_backref_get(mrb);
- if (!mrb_nil_p(match)) {
- /*if (FL_TEST(match, MATCH_BUSY)) {
- match = Qnil;
- }
- else {
- regs = RMATCH_REGS(match);
- }*/
- regs = RMATCH_REGS(match);
- }
- if (mrb_nil_p(match)) {
- memset(regs, 0, sizeof(struct re_registers));
- }
-//-->
- if (!reverse) {
- range += RSTRING_LEN(str);
- }
- result = onig_search(reg,
- (UChar*)(RSTRING_PTR(str)),
- ((UChar*)(RSTRING_PTR(str)) + RSTRING_LEN(str)),
- ((UChar*)(RSTRING_PTR(str)) + pos),
- ((UChar*)range),
- regs, ONIG_OPTION_NONE);
- if (!tmpreg) RREGEXP(re)->usecnt--;
- if (tmpreg) {
- if (RREGEXP(re)->usecnt) {
- onig_free(reg);
- }
- else {
- onig_free(RREGEXP(re)->ptr);
- RREGEXP(re)->ptr = reg;
- }
- }
- if (result < 0) {
- if (regs == &regi)
- onig_region_free(regs, 0);
- if (result == ONIG_MISMATCH) {
- mrb_backref_set(mrb, mrb_nil_value());
- return result;
- }
- else {
- onig_errmsg_buffer err = "";
- onig_error_code_to_str((UChar*)err, (int)result);
- mrb_reg_raise(mrb, RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), err, re);
- }
- }
-//--<
- if (mrb_nil_p(match) ) {
- match = match_alloc(mrb);
- onig_region_copy(RMATCH_REGS(match), regs);
- onig_region_free(regs, 0);
- }
-
- RMATCH(match)->str = mrb_str_ptr(str);
- RMATCH(match)->regexp = mrb_regex_ptr(re);
- RMATCH(match)->rmatch->char_offset_updated = 0;
- mrb_backref_set(mrb, match);
-
- return result;
-}
-
-mrb_int
-mrb_reg_adjust_startpos(mrb_state *mrb, mrb_value re, mrb_value str, mrb_int pos, mrb_int reverse)
-{
- mrb_int range;
- struct RString *s = mrb_str_ptr(str);
- struct RRegexp *r = mrb_regex_ptr(re);
-
- mrb_reg_check(mrb, re);
- /*if (may_need_recompile) mrb_reg_prepare_re(re);*/
-
- /* if (FL_TEST(re, KCODE_FIXED))
- mrb_kcode_set_option(re);
- else if (reg_kcode != curr_kcode)
- mrb_kcode_reset_option(); */
-
- if (reverse) {
- range = -pos;
- }
- else {
- range = s->len - pos;
- }
- return re_adjust_startpos(r->ptr,
- s->buf, s->len,
- pos, range);
-}
-
-static int
-onig_new_with_source(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
- OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
- OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
-{
- int r;
-
- *reg = (regex_t* )malloc/*xmalloc*/(sizeof(regex_t));
- if ((void*)(*reg) == (void*)0) return ONIGERR_MEMORY;
-
- r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
- if (r) goto err;
- r = onig_compile(*reg, pattern, pattern_end, einfo, sourcefile, sourceline);
- if (r) {
- err:
- onig_free(*reg);
- *reg = 0/*NULL*/;
- }
- return r;
-}
-
-static Regexp*
-make_regexp(const char *s, long len, mrb_encoding *enc, int flags, onig_errmsg_buffer err,
- const char *sourcefile, int sourceline)
-{
- Regexp *rp;
- int r;
- OnigErrorInfo einfo;
-
- /* Handle escaped characters first. */
-
- /* Build a copy of the string (in dest) with the
- escaped characters translated, and generate the regex
- from that.
- */
-
- r = onig_new_with_source(&rp, (UChar*)s, (UChar*)(s + len), flags,
- enc, OnigDefaultSyntax, &einfo, sourcefile, sourceline);
- if (r) {
- onig_error_code_to_str((UChar*)err, r, &einfo);
- return 0;
- }
- return rp;
-}
-
-unsigned long
-ruby_scan_hex(const char *start, size_t len, size_t *retlen)
-{
- static const char hexdigit[] = "0123456789abcdef0123456789ABCDEF";
- register const char *s = start;
- register unsigned long retval = 0;
- const char *tmp;
-
- while (len-- && *s && (tmp = strchr(hexdigit, *s))) {
- retval <<= 4;
- retval |= (tmp - hexdigit) & 15;
- s++;
- }
- *retlen = (int)(s - start); /* less than len */
- return retval;
-}
-
-#define BYTEWIDTH 8
-
-int
-mrb_uv_to_utf8(mrb_state *mrb, char buf[6], unsigned long uv)
-{
- if (uv <= 0x7f) {
- buf[0] = (char)uv;
- return 1;
- }
- if (uv <= 0x7ff) {
- buf[0] = (char)((uv>>6)&0xff)|0xc0;
- buf[1] = (char)(uv&0x3f)|0x80;
- return 2;
- }
- if (uv <= 0xffff) {
- buf[0] = (char)((uv>>12)&0xff)|0xe0;
- buf[1] = (char)((uv>>6)&0x3f)|0x80;
- buf[2] = (char)(uv&0x3f)|0x80;
- return 3;
- }
- if (uv <= 0x1fffff) {
- buf[0] = (char)((uv>>18)&0xff)|0xf0;
- buf[1] = (char)((uv>>12)&0x3f)|0x80;
- buf[2] = (char)((uv>>6)&0x3f)|0x80;
- buf[3] = (char)(uv&0x3f)|0x80;
- return 4;
- }
- if (uv <= 0x3ffffff) {
- buf[0] = (char)((uv>>24)&0xff)|0xf8;
- buf[1] = (char)((uv>>18)&0x3f)|0x80;
- buf[2] = (char)((uv>>12)&0x3f)|0x80;
- buf[3] = (char)((uv>>6)&0x3f)|0x80;
- buf[4] = (char)(uv&0x3f)|0x80;
- return 5;
- }
- if (uv <= 0x7fffffff) {
- buf[0] = (char)((uv>>30)&0xff)|0xfc;
- buf[1] = (char)((uv>>24)&0x3f)|0x80;
- buf[2] = (char)((uv>>18)&0x3f)|0x80;
- buf[3] = (char)((uv>>12)&0x3f)|0x80;
- buf[4] = (char)((uv>>6)&0x3f)|0x80;
- buf[5] = (char)(uv&0x3f)|0x80;
- return 6;
- }
- mrb_raise(mrb, E_RANGE_ERROR, "pack(U): value out of range");
- return 0;
-}
-
-unsigned long
-ruby_scan_oct(const char *start, size_t len, size_t *retlen)
-{
- register const char *s = start;
- register unsigned long retval = 0;
-
- while (len-- && *s >= '0' && *s <= '7') {
- retval <<= 3;
- retval |= *s++ - '0';
- }
- *retlen = (int)(s - start); /* less than len */
- return retval;
-}
-
-static mrb_value
-mrb_reg_preprocess(mrb_state *mrb, const char *p, const char *end, onig_errmsg_buffer err)
-{
- return mrb_nil_value();
-}
-
-static int
-mrb_reg_initialize(mrb_state *mrb, mrb_value obj, const char *s, long len,
- int options, onig_errmsg_buffer err,
- const char *sourcefile, int sourceline)
-{
- struct RRegexp *re = RREGEXP(obj);
- mrb_value unescaped;
- mrb_encoding *enc = mrb_ascii8bit_encoding(mrb);
- if (re->ptr)
- mrb_raise(mrb, E_TYPE_ERROR, "already initialized regexp");
- re->ptr = 0;
-
- unescaped = mrb_reg_preprocess(mrb, s, s+len, err);
- if (mrb_nil_p(unescaped))
- return -1;
-
- if ((options & ARG_ENCODING_FIXED)) {
- //re->basic.flags |= KCODE_FIXED;
- re->flags|= KCODE_FIXED;
- }
- if (options & ARG_ENCODING_NONE) {
- re->flags |= REG_ENCODING_NONE;
- }
-
- re->ptr = make_regexp(RSTRING_PTR(unescaped), RSTRING_LEN(unescaped), enc,
- options & ARG_REG_OPTION_MASK, err,
- sourcefile, sourceline);
- if (!re->ptr) return -1;
- re->src = mrb_str_ptr(mrb_str_new(mrb, s, len));
-
- return 0;
-}
-
-static int
-mrb_reg_initialize_str(mrb_state *mrb, mrb_value obj, mrb_value str, int options, onig_errmsg_buffer err,
- const char *sourcefile, int sourceline)
-{
- int ret;
-
-#if 0
- if (options & ARG_ENCODING_NONE) {
- mrb_encoding *ascii8bit = mrb_ascii8bit_encoding(mrb);
- if (enc != ascii8bit) {
- if (mrb_enc_str_coderange(mrb, str) != ENC_CODERANGE_7BIT) {
- //errcpy(err, "/.../n has a non escaped non ASCII character in non ASCII-8BIT script");
- printf("/.../n has a non escaped non ASCII character in non ASCII-8BIT script");
- return -1;
- }
- enc = ascii8bit;
- }
- }
-#endif
-
- ret = mrb_reg_initialize(mrb, obj, RSTRING_PTR(str), RSTRING_LEN(str),
- options, err, sourcefile, sourceline);
-
- return ret;
-}
-
-/* 15.2.15.7.1 */
-/*
- * call-seq:
- * Regexp.initialize(string, [options [, lang]]) -> regexp
- * Regexp.initialize(regexp) -> regexp
- *
- * Constructs a new regular expression from <i>pattern</i>, which can be either
- * a <code>String</code> or a <code>Regexp</code> (in which case that regexp's
- * options are propagated, and new options may not be specified (a change as of
- * Ruby 1.8). If <i>options</i> is a <code>Fixnum</code>, it should be one or
- * more of the constants <code>Regexp::EXTENDED</code>,
- * <code>Regexp::IGNORECASE</code>, and <code>Regexp::MULTILINE</code>,
- * <em>or</em>-ed together. Otherwise, if <i>options</i> is not
- * <code>nil</code>, the regexp will be case insensitive.
- * When the <i>lang</i> parameter is `n' or `N' sets the regexp no encoding.
- *
- * r1 = Regexp.initialize('^a-z+:\\s+\w+') #=> /^a-z+:\s+\w+/
- * r2 = Regexp.initialize('cat', true) #=> /cat/i
- * r3 = Regexp.initialize('dog', Regexp::EXTENDED) #=> /dog/x
- * r4 = Regexp.initialize(r2) #=> /cat/i
- */
-
-static mrb_value
-mrb_reg_initialize_m(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value self)
-{
- mrb_value argv[16];
- int argc;
- onig_errmsg_buffer err = "";
- int flags = 0;
- mrb_value str;
- const char *ptr;
- long len;
-
- mrb_get_args(mrb, "*", &argv, &argc);
- if (argc == 0 || argc > 3) {
- mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%d for 1..3)", argc);
- }
- if (mrb_type(argv[0]) == MRB_TT_REGEX) {
- mrb_value re = argv[0];
-
- if (argc > 1) {
- /* mrb_warn("flags ignored"); */
- printf("flags ignored");
- }
- mrb_reg_check(mrb, re);
- flags = mrb_reg_options(mrb, re);
- ptr = RREGEXP_SRC_PTR(re);
- len = RREGEXP_SRC_LEN(re);
- if (mrb_reg_initialize(mrb, self, ptr, len, flags, err, NULL, 0)) {
- printf("mrb_reg_raise_str(str, flags, err);");
- }
- }
- else {
- if (argc >= 2) {
- if (mrb_type(argv[1]) == MRB_TT_FIXNUM) flags = mrb_fixnum(argv[1]);
- else if (mrb_test(argv[1])) flags = ONIG_OPTION_IGNORECASE;
- }
- if (argc == 3 && !mrb_nil_p(argv[2])) {
- //char *kcode = StringValuePtr(argv[2]);
- char *kcode = mrb_string_value_ptr(mrb, argv[2]);
- if (kcode[0] == 'n' || kcode[0] == 'N') {
- flags |= ARG_ENCODING_NONE;
- }
- else {
- /*mrb_warn("encoding option is ignored - %s", kcode); */
- printf("mrb_warn:encoding option is ignored - %s", kcode);
- }
- }
- str = argv[0];
- //ptr = StringValuePtr(str);
- ptr = mrb_string_value_ptr(mrb, str);
- if (mrb_reg_initialize_str(mrb, self, str, flags, err, NULL, 0)) {
- //mrb_reg_raise_str(str, flags, err);
- }
- }
- return self;
-}
-
-/* 15.2.15.7.2 */
-/* :nodoc: */
-static mrb_value
-mrb_reg_init_copy(mrb_state *mrb, mrb_value re/*, mrb_value copy*/)
-{
- mrb_value argv[16];
- int argc;
- onig_errmsg_buffer err = "";
- const char *s;
- long len;
- mrb_value copy;
-
- mrb_get_args(mrb, "*", &argv, &argc);
- copy = argv[0];
- if (mrb_obj_equal(mrb, copy, re)) return copy;
- /*mrb_check_frozen(copy);*/
- /* need better argument type check */
- if (!mrb_obj_is_instance_of(mrb, re, mrb_obj_class(mrb, copy))) {
- mrb_raise(mrb, E_TYPE_ERROR, "wrong argument type");
- }
- mrb_reg_check(mrb, copy);
- s = RREGEXP_SRC_PTR(copy);
- len = RREGEXP_SRC_LEN(copy);
- if (mrb_reg_initialize(mrb, re, s, len, mrb_reg_options(mrb, copy),
- err, 0/*NULL*/, 0) != 0) {
- mrb_reg_raise(mrb, s, len, err, re);
- }
- return re;
-}
-
-static int
-reg_equal(mrb_state *mrb, struct RRegexp *re1, struct RRegexp *re2)
-{
- if (re1->ptr->options != re2->ptr->options) return FALSE;
- if (!mrb_equal(mrb, mrb_obj_value(re1->src), mrb_obj_value(re2->src)))
- return FALSE;
- return TRUE;
-}
-
-static int
-mrb_reg_equal(mrb_state *mrb, mrb_value re1, mrb_value re2)
-{
- if (mrb_obj_equal(mrb, re1, re2)) return TRUE;
-
- if (mrb_type(re2) != MRB_TT_REGEX) return FALSE;
- mrb_reg_check(mrb, re1);
- mrb_reg_check(mrb, re2);
- return reg_equal(mrb, RREGEXP(re1), RREGEXP(re2));
-}
-
-/* 15.2.15.7.3 */
-/*
- * call-seq:
- * rxp == other_rxp -> true or false
- * rxp.eql?(other_rxp) -> true or false
- *
- * Equality---Two regexps are equal if their patterns are identical, they have
- * the same character set code, and their <code>casefold?</code> values are the
- * same.
- *
- * /abc/ == /abc/x #=> false
- * /abc/ == /abc/i #=> false
- * /abc/ == /abc/n #=> false
- * /abc/u == /abc/n #=> false
- */
-
-static mrb_value
-mrb_reg_equal_m(mrb_state *mrb, mrb_value re1/*, mrb_value re2*/)
-{
- mrb_value re2;
-
- mrb_get_args(mrb, "o", &re2);
- if (mrb_reg_equal(mrb, re1, re2))
- return mrb_true_value();
- return mrb_false_value();
-}
-
-/* 15.2.15.7.4 */
-/*
- * call-seq:
- * rxp === str -> true or false
- *
- * Case Equality---Synonym for <code>Regexp#=~</code> used in case statements.
- *
- * a = "HELLO"
- * case a
- * when /^[a-z]*$/; print "Lower case\n"
- * when /^[A-Z]*$/; print "Upper case\n"
- * else; print "Mixed case\n"
- * end
- *
- * <em>produces:</em>
- *
- * Upper case
- */
-
-mrb_value
-mrb_reg_eqq(mrb_state *mrb, mrb_value re/*, mrb_value str*/)
-{
- long start;
- mrb_value str;
-
- mrb_get_args(mrb, "o", &str);
- str = reg_operand(mrb, str, 0/*FALSE*/);
- if (mrb_nil_p(str)) {
- mrb_backref_set(mrb, mrb_nil_value());
- return mrb_false_value();
- }
- start = mrb_reg_search(mrb, re, str, 0, 0);
- if (start < 0) {
- return mrb_false_value();
- }
- return mrb_true_value();
-}
-
-static long
-reg_match_pos(mrb_state *mrb, mrb_value re, mrb_value *strp, long pos)
-{
- mrb_value str = *strp;
-
- if (mrb_nil_p(str)) {
- mrb_backref_set(mrb, mrb_nil_value());
- return -1;
- }
- *strp = str = reg_operand(mrb, str, 1/*TRUE*/);
- if (pos != 0) {
- if (pos < 0) {
- mrb_value l = mrb_str_size(mrb, str);
- pos += mrb_fixnum(l);
- if (pos < 0) {
- return pos;
- }
- }
- pos = mrb_str_offset(mrb, str, pos);
- }
- return mrb_reg_search(mrb, re, str, pos, 0);
-}
-
-mrb_value
-mrb_reg_match_str(mrb_state *mrb, mrb_value re, mrb_value str)
-{
- mrb_int pos = reg_match_pos(mrb, re, &str, 0);
- if (pos < 0) return mrb_nil_value();
- pos = mrb_str_sublen(mrb, str, pos);
- return mrb_fixnum_value(pos);
-}
-/* 15.2.15.7.5 */
-/*
- * call-seq:
- * rxp =~ str -> integer or nil
- *
- * Match---Matches <i>rxp</i> against <i>str</i>.
- *
- * /at/ =~ "input data" #=> 7
- * /ax/ =~ "input data" #=> nil
- *
- * If <code>=~</code> is used with a regexp literal with named captures,
- * captured strings (or nil) is assigned to local variables named by
- * the capture names.
- *
- * /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ " x = y "
- * p lhs #=> "x"
- * p rhs #=> "y"
- *
- * If it is not matched, nil is assigned for the variables.
- *
- * /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ " x = "
- * p lhs #=> nil
- * p rhs #=> nil
- *
- * This assignment is implemented in the Ruby parser.
- * The parser detects 'regexp-literal =~ expression' for the assignment.
- * The regexp must be a literal without interpolation and placed at left hand side.
- *
- * The assignment is not occur if the regexp is not a literal.
- *
- * re = /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/
- * re =~ " x = y "
- * p lhs # undefined local variable
- * p rhs # undefined local variable
- *
- * A regexp interpolation, <code>#{}</code>, also disables
- * the assignment.
- *
- * rhs_pat = /(?<rhs>\w+)/
- * /(?<lhs>\w+)\s*=\s*#{rhs_pat}/ =~ "x = y"
- * p lhs # undefined local variable
- *
- * The assignment is not occur if the regexp is placed at right hand side.
- *
- * " x = y " =~ /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/
- * p lhs, rhs # undefined local variable
- *
- */
-mrb_value
-mrb_reg_match(mrb_state *mrb, mrb_value re/*, mrb_value str*/)
-{
- mrb_value str;
-
- mrb_get_args(mrb, "o", &str);
- return mrb_reg_match_str(mrb, re, str);
-}
-
-/* 15.2.15.7.6 */
-/*
- * call-seq:
- * rxp.casefold? -> true or false
- *
- * Returns the value of the case-insensitive flag.
- *
- * /a/.casefold? #=> false
- * /a/i.casefold? #=> true
- * /(?i:a)/.casefold? #=> false
- */
-
-static mrb_value
-mrb_reg_casefold_p(mrb_state *mrb, mrb_value re)
-{
- mrb_reg_check(mrb, re);
- if (RREGEXP(re)->ptr->options & ONIG_OPTION_IGNORECASE) return mrb_true_value();
- return mrb_false_value();
-}
-
-/* 15.2.15.7.7 */
-/*
- * call-seq:
- * rxp.match(str) -> matchdata or nil
- * rxp.match(str,pos) -> matchdata or nil
- *
- * Returns a <code>MatchData</code> object describing the match, or
- * <code>nil</code> if there was no match. This is equivalent to retrieving the
- * value of the special variable <code>$~</code> following a normal match.
- * If the second parameter is present, it specifies the position in the string
- * to begin the search.
- *
- * /(.)(.)(.)/.match("abc")[2] #=> "b"
- * /(.)(.)/.match("abc", 1)[2] #=> "c"
- *
- * If a block is given, invoke the block with MatchData if match succeed, so
- * that you can write
- *
- * pat.match(str) {|m| ...}
- *
- * instead of
- *
- * if m = pat.match(str)
- * ...
- * end
- *
- * The return value is a value from block execution in this case.
- */
-
-static mrb_value
-mrb_reg_match_m(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value re)
-{
- mrb_value argv[16];
- int argc;
- mrb_value result, str, initpos, b;
- long pos;
-
- //if (mrb_scan_args(argc, argv, "11", &str, &initpos) == 2) {
- mrb_get_args(mrb, "*&", &argv, &argc, &b);
- if (argc == 2) {
- initpos = argv[1];
- pos = mrb_fixnum(initpos);
- }
- else {
- pos = 0;
- }
- str = argv[0];
- pos = reg_match_pos(mrb, re, &str, pos);
- if (pos < 0) {
- mrb_backref_set(mrb, mrb_nil_value());
- return mrb_nil_value();
- }
- result = mrb_backref_get(mrb);
- /*mrb_match_busy(result);*/
- if (!mrb_nil_p(result) && mrb_block_given_p()) {
- return mrb_yield(mrb, result, b);
- }
- return result;
-}
-
-/* 15.2.15.7.8 */
-
-/*
- * call-seq:
- * rxp.source -> str
- *
- * Returns the original string of the pattern.
- *
- * /ab+c/ix.source #=> "ab+c"
- *
- * Note that escape sequences are retained as is.
- *
- * /\x20\+/.source #=> "\\x20\\+"
- *
- */
-
-static mrb_value
-mrb_reg_source(mrb_state *mrb, mrb_value re)
-{
- mrb_value str;
-
- mrb_reg_check(mrb, re);
- str = mrb_str_new(mrb, RREGEXP_SRC_PTR(re),RREGEXP_SRC_LEN(re));
- return str;
-}
-
-static int
-name_to_backref_number(mrb_state *mrb, struct re_registers *regs, struct RRegexp*regexp, const char* name, const char* name_end)
-{
- int num;
-
- num = onig_name_to_backref_number(regexp->ptr,
- (const unsigned char* )name, (const unsigned char* )name_end, regs);
- if (num >= 1) {
- return num;
- }
- else {
- mrb_value s = mrb_str_new(mrb, name, (long )(name_end - name));//mrb_str_new(name, (long )(name_end - name));
- mrb_raise(mrb, E_INDEX_ERROR, "undefined group name reference: %s",
- mrb_string_value_ptr(mrb, s));
- return num; /* not reach */
- }
-}
-
-/*
- * Document-class: MatchData
- *
- * <code>MatchData</code> is the type of the special variable <code>$~</code>,
- * and is the type of the object returned by <code>Regexp#match</code> and
- * <code>Regexp.last_match</code>. It encapsulates all the results of a pattern
- * match, results normally accessed through the special variables
- * <code>$&</code>, <code>$'</code>, <code>$`</code>, <code>$1</code>,
- * <code>$2</code>, and so on.
- *
- */
-
-mrb_value
-match_alloc(mrb_state *mrb)
-{
- struct RMatch* m;
-
- m = (struct RMatch*)mrb_obj_alloc(mrb, MRB_TT_MATCH, MATCH_CLASS);
-
- m->str = 0;
- m->rmatch = 0;
- m->regexp = 0;
- m->rmatch = mrb_malloc(mrb, sizeof(struct rmatch));//ALLOC(struct rmatch);
- memset(m->rmatch, 0, sizeof(struct rmatch));
-
- return mrb_obj_value(m);
-}
-
-/* ------------------------------------------------------------------------- */
-/* MatchData Class */
-/* ------------------------------------------------------------------------- */
-/* 15.2.16.3.1 */
-/*
- * call-seq:
- * mtch[i] -> str or nil
- * mtch[start, length] -> array
- * mtch[range] -> array
- * mtch[name] -> str or nil
- *
- * Match Reference---<code>MatchData</code> acts as an array, and may be
- * accessed using the normal array indexing techniques. <i>mtch</i>[0] is
- * equivalent to the special variable <code>$&</code>, and returns the entire
- * matched string. <i>mtch</i>[1], <i>mtch</i>[2], and so on return the values
- * of the matched backreferences (portions of the pattern between parentheses).
- *
- * m = /(.)(.)(\d+)(\d)/.match("THX1138.")
- * m #=> #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8">
- * m[0] #=> "HX1138"
- * m[1, 2] #=> ["H", "X"]
- * m[1..3] #=> ["H", "X", "113"]
- * m[-3, 2] #=> ["X", "113"]
- *
- * m = /(?<foo>a+)b/.match("ccaaab")
- * m #=> #<MatchData "aaab" foo:"aaa">
- * m["foo"] #=> "aaa"
- * m[:foo] #=> "aaa"
- */
-
-static mrb_value
-mrb_match_aref(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value match)
-{
- mrb_value argv[16];
- int argc;
- mrb_value idx;
-
- match_check(mrb, match);
- //mrb_scan_args(argc, argv, "11", &idx, &rest);
- mrb_get_args(mrb, "*", &argv, &argc);
- idx = argv[0];
- if (argc<2) {
- if (mrb_type(idx) == MRB_TT_FIXNUM) {
- if (mrb_fixnum(idx) >= 0) {
- return mrb_reg_nth_match(mrb, mrb_fixnum(idx), match);
- }
- }
- else {
- const char *p;
- int num;
-
- switch (mrb_type(idx)) {
- case MRB_TT_SYMBOL:
- p = mrb_sym2name(mrb, mrb_symbol(idx));
- goto name_to_backref;
- break;
- case MRB_TT_STRING:
- //p = StringValuePtr(idx);
- p = mrb_string_value_ptr(mrb, idx);
-name_to_backref:
- num = name_to_backref_number(mrb, RMATCH_REGS(match),
- RMATCH(match)->regexp, p, p + strlen(p));
- return mrb_reg_nth_match(mrb, num, match);
- break;
- default:
- break;
- }
- }
- }
-
- return mrb_ary_aget(mrb, /*argc, argv,*/ mrb_match_to_a(mrb, match));
-}
-
-typedef struct {
- long byte_pos;
- long char_pos;
-} pair_t;
-
-static void
-update_char_offset(mrb_state *mrb, mrb_value match)
-{
- struct rmatch *rm = RMATCH(match)->rmatch;
- struct re_registers *regs;
- int i, num_regs;
-
- if (rm->char_offset_updated)
- return;
-
- regs = &rm->regs;
- num_regs = rm->regs.num_regs;
-
- if (rm->char_offset_num_allocated < num_regs) {
- //REALLOC_N(rm->char_offset, struct rmatch_offset, num_regs);
- rm->char_offset = mrb_realloc(mrb, rm->char_offset, sizeof(struct rmatch_offset)*num_regs);
- rm->char_offset_num_allocated = num_regs;
- }
-
- for (i = 0; i < num_regs; i++) {
- rm->char_offset[i].beg = BEG(i);
- rm->char_offset[i].end = END(i);
- }
- rm->char_offset_updated = 1;
- return;
-}
-
-/* 15.2.16.3.2 */
-/*
- * call-seq:
- * mtch.begin(n) -> integer
- *
- * Returns the offset of the start of the <em>n</em>th element of the match
- * array in the string.
- * <em>n</em> can be a string or symbol to reference a named capture.
- *
- * m = /(.)(.)(\d+)(\d)/.match("THX1138.")
- * m.begin(0) #=> 1
- * m.begin(2) #=> 2
- *
- * m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
- * p m.begin(:foo) #=> 0
- * p m.begin(:bar) #=> 2
- */
-
-static mrb_value
-mrb_match_begin(mrb_state *mrb, mrb_value match/*, mrb_value n*/)
-{
- mrb_value argv[16];
- int argc;
- mrb_value n = argv[0];
- int i;
- struct re_registers *regs;
-
- match_check(mrb, match);
- mrb_get_args(mrb, "*", &argv, &argc);
- n = argv[0];
- i = match_backref_number(mrb, match, n);
- regs = RMATCH_REGS(match);
-
- if (i < 0 || regs->num_regs <= i)
- mrb_raise(mrb, E_INDEX_ERROR, "index %d out of matches", i);
-
- if (BEG(i) < 0)
- return mrb_nil_value();
-
- update_char_offset(mrb, match);
- return mrb_fixnum_value(RMATCH(match)->rmatch->char_offset[i].beg);
-}
-
-static mrb_value
-match_array(mrb_state *mrb, mrb_value match, int start)
-{
- struct re_registers *regs;
- mrb_value ary;
- struct RString *target;
- int i;
-
- match_check(mrb, match);
- regs = RMATCH_REGS(match);
- ary = mrb_ary_new_capa(mrb, regs->num_regs);//mrb_ary_new2(regs->num_regs);
- target = RMATCH(match)->str;
-
- for (i=start; i<regs->num_regs; i++) {
- if (regs->beg[i] == -1) {
- mrb_ary_push(mrb, ary, mrb_nil_value());
- }
- else {
- mrb_value str = mrb_str_subseq(mrb, mrb_obj_value(target), regs->beg[i], regs->end[i]-regs->beg[i]);
- mrb_ary_push(mrb, ary, str);
- }
- }
- return ary;
-}
-
-/* 15.2.16.3.3 */
-/*
- * call-seq:
- * mtch.captures -> array
- *
- * Returns the array of captures; equivalent to <code>mtch.to_a[1..-1]</code>.
- *
- * f1,f2,f3,f4 = /(.)(.)(\d+)(\d)/.match("THX1138.").captures
- * f1 #=> "H"
- * f2 #=> "X"
- * f3 #=> "113"
- * f4 #=> "8"
- */
-static mrb_value
-mrb_match_captures(mrb_state *mrb, mrb_value match)
-{
- return match_array(mrb, match, 1);
-}
-
-/* 15.2.16.3.4 */
-/*
- * call-seq:
- * mtch.end(n) -> integer
- *
- * Returns the offset of the character immediately following the end of the
- * <em>n</em>th element of the match array in the string.
- * <em>n</em> can be a string or symbol to reference a named capture.
- *
- * m = /(.)(.)(\d+)(\d)/.match("THX1138.")
- * m.end(0) #=> 7
- * m.end(2) #=> 3
- *
- * m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
- * p m.end(:foo) #=> 1
- * p m.end(:bar) #=> 3
- */
-
-static mrb_value
-mrb_match_end(mrb_state *mrb, mrb_value match/*, mrb_value n*/)
-{
- mrb_value argv[16];
- int argc;
- mrb_value n;
- int i;
- struct re_registers *regs;
-
- match_check(mrb, match);
- mrb_get_args(mrb, "*", &argv, &argc);
- n = argv[0];
- i = match_backref_number(mrb, match, n);
- regs = RMATCH_REGS(match);
-
- if (i < 0 || regs->num_regs <= i)
- mrb_raise(mrb, E_INDEX_ERROR, "index %d out of matches", i);
-
- if (BEG(i) < 0)
- return mrb_nil_value();
-
- update_char_offset(mrb, match);
- return mrb_fixnum_value(RMATCH(match)->rmatch->char_offset[i].end);
-}
-
-/* 15.2.16.3.5 */
-/* :nodoc: */
-static mrb_value
-mrb_match_init_copy(mrb_state *mrb, mrb_value obj/*, mrb_value orig*/)
-{
- mrb_value argv[16];
- int argc;
- struct rmatch *rm;
- mrb_value orig;
-
- mrb_get_args(mrb, "*", &argv, &argc);
- orig = argv[0];
-
- if (mrb_obj_equal(mrb, obj, orig)) return obj;
-
- if (!mrb_obj_is_instance_of(mrb, orig, mrb_obj_class(mrb, obj))) {
- mrb_raise(mrb, E_TYPE_ERROR, "wrong argument class");
- }
-
- RMATCH(obj)->str = RMATCH(orig)->str;
- RMATCH(obj)->regexp = RMATCH(orig)->regexp;
-
- if (RMATCH(obj)->rmatch == 0) {
- RMATCH(obj)->rmatch = mrb_malloc(mrb, sizeof(struct rmatch));//ALLOC(struct rmatch);
- memset(RMATCH(obj)->rmatch, 0, sizeof(struct rmatch));
- }
- rm = RMATCH(obj)->rmatch;
- onig_region_copy(&rm->regs, RMATCH_REGS(orig));
-
- if (!RMATCH(orig)->rmatch->char_offset_updated) {
- rm->char_offset_updated = 0;
- }
- else {
- if (rm->char_offset_num_allocated < rm->regs.num_regs) {
- //REALLOC_N(rm->char_offset, struct rmatch_offset, rm->regs.num_regs);
- rm->char_offset = mrb_realloc(mrb, rm->char_offset, sizeof(struct rmatch_offset)* rm->regs.num_regs);
- rm->char_offset_num_allocated = rm->regs.num_regs;
- }
- memcpy(rm->char_offset, RMATCH(orig)->rmatch->char_offset,
- sizeof(struct rmatch_offset) * rm->regs.num_regs);
- rm->char_offset_updated = 1;
- }
-
- return obj;
-}
-
-/* 15.2.16.3.6 */
-/* 15.2.16.3.10 */
-/*
- * call-seq:
- * mtch.length -> integer
- * mtch.size -> integer
- *
- * Returns the number of elements in the match array.
- *
- * m = /(.)(.)(\d+)(\d)/.match("THX1138.")
- * m.length #=> 5
- * m.size #=> 5
- */
-
-static mrb_value
-mrb_match_size(mrb_state *mrb, mrb_value match)
-{
- match_check(mrb, match);
- return mrb_fixnum_value(RMATCH_REGS(match)->num_regs);
-}
-
-/* 15.2.16.3.7 */
-/*
- * call-seq:
- * mtch.offset(n) -> array
- *
- * Returns a two-element array containing the beginning and ending offsets of
- * the <em>n</em>th match.
- * <em>n</em> can be a string or symbol to reference a named capture.
- *
- * m = /(.)(.)(\d+)(\d)/.match("THX1138.")
- * m.offset(0) #=> [1, 7]
- * m.offset(4) #=> [6, 7]
- *
- * m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
- * p m.offset(:foo) #=> [0, 1]
- * p m.offset(:bar) #=> [2, 3]
- *
- */
-
-static mrb_value
-mrb_match_offset(mrb_state *mrb, mrb_value match/*, mrb_value n*/)
-{
- mrb_value n;
- struct re_registers *regs = RMATCH_REGS(match);
- int i;
-
- match_check(mrb, match);
- mrb_get_args(mrb, "o", &n);
- i = match_backref_number(mrb, match, n);
-
- if (i < 0 || regs->num_regs <= i)
- mrb_raise(mrb, E_INDEX_ERROR, "index %d out of matches", i);
-
- if (BEG(i) < 0)
- return mrb_assoc_new(mrb, mrb_nil_value(), mrb_nil_value());
-
- update_char_offset(mrb, match);
- return mrb_assoc_new(mrb, mrb_fixnum_value(RMATCH(match)->rmatch->char_offset[i].beg),
- mrb_fixnum_value(RMATCH(match)->rmatch->char_offset[i].end));
-}
-
-/* 15.2.16.3.8 */
-/*
- * call-seq:
- * mtch.post_match -> str
- *
- * Returns the portion of the original string after the current match.
- * Equivalent to the special variable <code>$'</code>.
- *
- * m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
- * m.post_match #=> ": The Movie"
- */
-mrb_value
-mrb_reg_match_post(mrb_state *mrb, mrb_value match)
-{
- struct RString *str;
- long pos;
- struct re_registers *regs;
-
- if (mrb_nil_p(match)) return mrb_nil_value();
- match_check(mrb, match);
- regs = RMATCH_REGS(match);
- if (BEG(0) == -1) return mrb_nil_value();
- str = RMATCH(match)->str;
- pos = END(0);
- return mrb_str_subseq(mrb, mrb_obj_value(str), pos, str->len - pos);
-}
-
-/* 15.2.16.3.9 */
-/*
- * call-seq:
- * mtch.pre_match -> str
- *
- * Returns the portion of the original string before the current match.
- * Equivalent to the special variable <code>$`</code>.
- *
- * m = /(.)(.)(\d+)(\d)/.match("THX1138.")
- * m.pre_match #=> "T"
- */
-
-mrb_value
-mrb_reg_match_pre(mrb_state *mrb, mrb_value match)
-{
- mrb_value str;
- struct re_registers *regs;
-
- if (mrb_nil_p(match)) return mrb_nil_value();
- match_check(mrb, match);
- regs = RMATCH_REGS(match);
- if (BEG(0) == -1) return mrb_nil_value();
- str = mrb_str_subseq(mrb, mrb_obj_value(RMATCH(match)->str), 0, BEG(0));
-
- return str;
-}
-
-/* 15.2.16.3.11 */
-/*
- * call-seq:
- * mtch.string -> str
- *
- * Returns a frozen copy of the string passed in to <code>match</code>.
- *
- * m = /(.)(.)(\d+)(\d)/.match("THX1138.")
- * m.string #=> "THX1138."
- */
-
-static mrb_value
-mrb_match_string(mrb_state *mrb, mrb_value match)
-{
- match_check(mrb, match);
- return mrb_obj_value(RMATCH(match)->str);
-}
-
-/* 15.2.16.3.12 */
-/*
- * call-seq:
- * mtch.to_a -> anArray
- *
- * Returns the array of matches.
- *
- * m = /(.)(.)(\d+)(\d)/.match("THX1138.")
- * m.to_a #=> ["HX1138", "H", "X", "113", "8"]
- *
- * Because <code>to_a</code> is called when expanding
- * <code>*</code><em>variable</em>, there's a useful assignment
- * shortcut for extracting matched fields. This is slightly slower than
- * accessing the fields directly (as an intermediate array is
- * generated).
- *
- * all,f1,f2,f3 = *(/(.)(.)(\d+)(\d)/.match("THX1138."))
- * all #=> "HX1138"
- * f1 #=> "H"
- * f2 #=> "X"
- * f3 #=> "113"
- */
-
-static mrb_value
-mrb_match_to_a(mrb_state *mrb, mrb_value match)
-{
- return match_array(mrb, match, 0);
-}
-
-/* 15.2.16.3.13 */
-/*
- * call-seq:
- * mtch.to_s -> str
- *
- * Returns the entire matched string.
- *
- * m = /(.)(.)(\d+)(\d)/.match("THX1138.")
- * m.to_s #=> "HX1138"
- */
-
-static mrb_value
-mrb_match_to_s(mrb_state *mrb, mrb_value match)
-{
- mrb_value str = mrb_reg_last_match(mrb, match);
-
- match_check(mrb, match);
- if (mrb_nil_p(str)) str = mrb_str_new(mrb, 0, 0);//mrb_str_new(0,0);
-
- return str;
-}
-
-static int
-char_to_option(int c)
-{
- int val;
-
- switch (c) {
- case 'i':
- val = ONIG_OPTION_IGNORECASE;
- break;
- case 'x':
- val = ONIG_OPTION_EXTEND;
- break;
- case 'm':
- val = ONIG_OPTION_MULTILINE;
- break;
- default:
- val = 0;
- break;
- }
- return val;
-}
-
-static char *
-option_to_str(char str[4], int options)
-{
- char *p = str;
- if (options & ONIG_OPTION_MULTILINE) *p++ = 'm';
- if (options & ONIG_OPTION_IGNORECASE) *p++ = 'i';
- if (options & ONIG_OPTION_EXTEND) *p++ = 'x';
- *p = 0;
- return str;
-}
-
-#define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */
-
-static void
-mrb_reg_expr_str(mrb_state *mrb, mrb_value str, const char *s, long len)
-{
- const char *p, *pend;
- int need_escape = 0;
- int c;
-
- p = s; pend = p + len;
- while (p < pend) {
- c = *p;
- if (c == -1) {
- p += pend - p;
- }
- else if (c != '/' && ISPRINT(c)) {
- p++;
- }
- else {
- need_escape = 1;
- break;
- }
- }
-
- if (!need_escape) {
- mrb_str_buf_cat(mrb, str, s, len);
- }
- else {
- p = s;
- while (p<pend) {
- c = *p;
- if (c == '\\' && p+1 < pend) {
- int n = 1 + pend - (p+1);
- mrb_str_buf_cat(mrb, str, p, n);
- p += n;
- continue;
- }
- else if (c == '/') {
- char c = '\\';
- mrb_str_buf_cat(mrb, str, &c, 1);
- mrb_str_buf_cat(mrb, str, p, 1);
- }
- else if (ISPRINT(c)) {
- mrb_str_buf_cat(mrb, str, p, 1);
- }
- else if (!ISSPACE(c)) {
- char b[8];
- int n;
-
- n = snprintf(b, sizeof(b), "\\x%02X", c);
- mrb_str_buf_cat(mrb, str, b, n);
- }
- else {
- mrb_str_buf_cat(mrb, str, p, 1);
- }
- p++;
- }
- }
-}
-
-/* 15.2.15.7.9 (x) */
-/*
- * call-seq:
- * rxp.to_s -> str
- *
- * Returns a string containing the regular expression and its options (using the
- * <code>(?opts:source)</code> notation. This string can be fed back in to
- * <code>Regexp::new</code> to a regular expression with the same semantics as
- * the original. (However, <code>Regexp#==</code> may not return true when
- * comparing the two, as the source of the regular expression itself may
- * differ, as the example shows). <code>Regexp#inspect</code> produces a
- * generally more readable version of <i>rxp</i>.
- *
- * r1 = /ab+c/ix #=> /ab+c/ix
- * s1 = r1.to_s #=> "(?ix-m:ab+c)"
- * r2 = Regexp.new(s1) #=> /(?ix-m:ab+c)/
- * r1 == r2 #=> false
- * r1.source #=> "ab+c"
- * r2.source #=> "(?ix-m:ab+c)"
- */
-
-mrb_value
-mrb_reg_to_s(mrb_state *mrb, mrb_value re)
-{
- int options, opt;
- const int embeddable = ONIG_OPTION_MULTILINE|ONIG_OPTION_IGNORECASE|ONIG_OPTION_EXTEND;
- long len;
- const UChar* ptr;
- mrb_value str = mrb_str_new(mrb, "(?", 2);
- char optbuf[5];
- mrb_encoding *enc = mrb_enc_get(mrb, re);
-
- mrb_reg_check(mrb, re);
- memset(optbuf, 0, 5);
- options = RREGEXP(re)->ptr->options;
- ptr = (UChar*)RREGEXP_SRC_PTR(re);
- len = RREGEXP_SRC_LEN(re);
-again:
- if (len >= 4 && ptr[0] == '(' && ptr[1] == '?') {
- int err = 1;
- ptr += 2;
- if ((len -= 2) > 0) {
- do {
- opt = char_to_option((int )*ptr);
- if (opt != 0) {
- options |= opt;
- }
- else {
- break;
- }
- ++ptr;
- } while (--len > 0);
- }
- if (len > 1 && *ptr == '-') {
- ++ptr;
- --len;
- do {
- opt = char_to_option((int )*ptr);
- if (opt != 0) {
- options &= ~opt;
- }
- else {
- break;
- }
- ++ptr;
- } while (--len > 0);
- }
- if (*ptr == ')') {
- --len;
- ++ptr;
- goto again;
- }
- if (*ptr == ':' && ptr[len-1] == ')') {
- Regexp *rp;
-
- ++ptr;
- len -= 2;
- err = onig_new(&rp, ptr, ptr + len, ONIG_OPTION_DEFAULT,
- enc, OnigDefaultSyntax, NULL);
- onig_free(rp);
- }
- if (err) {
- options = RREGEXP(re)->ptr->options;
- ptr = (UChar*)RREGEXP_SRC_PTR(re);
- len = RREGEXP_SRC_LEN(re);
- }
- }
-
- if (*option_to_str(optbuf, options)) mrb_str_buf_cat(mrb, str, optbuf, strlen(optbuf));
-
- if ((options & embeddable) != embeddable) {
- optbuf[0] = '-';
- option_to_str(optbuf + 1, ~options);
- mrb_str_buf_cat(mrb, str, optbuf, strlen(optbuf));
- }
-
- mrb_str_buf_cat(mrb, str, ":", 1);
- mrb_reg_expr_str(mrb, str, (char*)ptr, len);
- mrb_str_buf_cat(mrb, str, ")", 1);
-
- return str;
-}
-
-/* 15.2.15.7.10(x) */
-/*
- * call-seq:
- * rxp.inspect -> string
- *
- * Produce a nicely formatted string-version of _rxp_. Perhaps surprisingly,
- * <code>#inspect</code> actually produces the more natural version of
- * the string than <code>#to_s</code>.
- *
- * /ab+c/ix.inspect #=> "/ab+c/ix"
- *
- */
-
-static mrb_value
-mrb_reg_inspect(mrb_state *mrb, mrb_value re)
-{
- if (!RREGEXP(re)->ptr || !RREGEXP_SRC(re) || !RREGEXP_SRC_PTR(re)) {
- return mrb_any_to_s(mrb, re);
- }
- return mrb_reg_desc(mrb, RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), re);
-}
-
-static mrb_value
-mrb_reg_s_alloc(mrb_state *mrb, mrb_value dummy)
-{
- struct RRegexp* re;
-
- //NEWOBJ(re, struct RRegexp);
- //OBJSETUP(re, klass, T_REGEXP);
- re = (struct RRegexp*)mrb_obj_alloc(mrb, MRB_TT_REGEX, REGEX_CLASS);
-
- re->ptr = 0;
- re->src = 0;
- re->usecnt = 0;
-
- return mrb_obj_value(re);
-}
-
-mrb_value
-mrb_reg_match_last(mrb_state *mrb, mrb_value match)
-{
- int i;
-
- if (mrb_nil_p(match)) return mrb_nil_value();
- match_check(mrb, match);
- if (RMATCH(match)->rmatch->char_offset[0].beg == -1) return mrb_nil_value();
-
- for (i=RMATCH(match)->rmatch->regs.num_regs-1; RMATCH(match)->rmatch->char_offset[i].beg == -1 && i > 0; i--)
- ;
- if (i == 0) return mrb_nil_value();
- return mrb_reg_nth_match(mrb, i, match);
-}
-
-/* 15.2.16.3.14(x) */
-/*
- * call-seq:
- * mtch.inspect -> str
- *
- * Returns a printable version of <i>mtch</i>.
- *
- * puts /.$/.match("foo").inspect
- * #=> #<MatchData "o">
- *
- * puts /(.)(.)(.)/.match("foo").inspect
- * #=> #<MatchData "foo" 1:"f" 2:"o" 3:"o">
- *
- * puts /(.)(.)?(.)/.match("fo").inspect
- * #=> #<MatchData "fo" 1:"f" 2:nil 3:"o">
- *
- * puts /(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge").inspect
- * #=> #<MatchData "hog" foo:"h" bar:"o" baz:"g">
- *
- */
-struct backref_name_tag {
- const UChar *name;
- long len;
-};
-
-static int
-match_inspect_name_iter(const OnigUChar *name, const OnigUChar *name_end,
- int back_num, int *back_refs, OnigRegex regex, void *arg0)
-{
- struct backref_name_tag *arg = (struct backref_name_tag*)arg0;
- int i;
-
- for (i = 0; i < back_num; i++) {
- arg[back_refs[i]].name = name;
- arg[back_refs[i]].len = name_end - name;
- }
- return 0;
-}
-
-static mrb_value
-mrb_match_inspect(mrb_state *mrb, mrb_value match)
-{
- const char *cname = mrb_obj_classname(mrb, match);
- mrb_value str;
- int i;
- struct re_registers *regs = RMATCH_REGS(match);
- int num_regs = regs->num_regs;
- struct backref_name_tag *names;
- struct RRegexp *regexp = RMATCH(match)->regexp;
-
- if (!regexp) {
- return mrb_sprintf(mrb, "#<%s:%p>", cname, (void*)&match);
- }
-
- //names = ALLOCA_N(struct backref_name_tag, num_regs);
- //MEMZERO(names, struct backref_name_tag, num_regs);
- names = mrb_malloc(mrb, sizeof(struct backref_name_tag)*num_regs);
- memset(names, 0, sizeof(struct backref_name_tag)*num_regs);
-
- onig_foreach_name(regexp->ptr,
- match_inspect_name_iter, names);
-
- str = mrb_str_new(mrb, "#<", 2);
- mrb_str_buf_cat(mrb, str, cname, strlen(cname));
-
- for (i = 0; i < num_regs; i++) {
- char buf[sizeof(num_regs)*3+1];
- mrb_value v;
- mrb_str_buf_cat(mrb, str, " ", 1);
- if (0 < i) {
- if (names[i].name)
- mrb_str_buf_cat(mrb, str, (const char*)names[i].name, names[i].len);
- else {
- int n = sprintf(buf, "%d", i);
- mrb_str_buf_cat(mrb, str, (const char*)buf, n);
- }
- mrb_str_buf_cat(mrb, str, ":", 1);
- }
- v = mrb_reg_nth_match(mrb, i, match);
- if (mrb_nil_p(v))
- mrb_str_buf_cat(mrb, str, "nil", 3);
- else
- mrb_str_buf_append(mrb, str, mrb_str_inspect(mrb, v));
- }
- mrb_str_buf_cat(mrb, str, ">", 1);
-
- return str;
-}
-
-/* 15.2.16.3.15(x) */
-/* 15.2.16.3.16(x) */
-/*
- * call-seq:
- * mtch == mtch2 -> true or false
- *
- * Equality---Two matchdata are equal if their target strings,
- * patterns, and matched positions are identical.
- */
-
-static mrb_value
-mrb_match_equal(mrb_state *mrb, mrb_value match1)
-{
- const struct re_registers *regs1, *regs2;
- mrb_value match2;
-
- mrb_get_args(mrb, "o", &match2);
- if (mrb_obj_equal(mrb, match1, match2)) return mrb_true_value();
- if (mrb_type(match2) != MRB_TT_MATCH) return mrb_false_value();
- if (!mrb_str_equal(mrb, mrb_obj_value(RMATCH(match1)->str), mrb_obj_value(RMATCH(match2)->str)))
- return mrb_false_value();
- if (!reg_equal(mrb, RMATCH(match1)->regexp, RMATCH(match2)->regexp)) return mrb_false_value();
- regs1 = RMATCH_REGS(match1);
- regs2 = RMATCH_REGS(match2);
- if (regs1->num_regs != regs2->num_regs) return mrb_false_value();
- if (memcmp(regs1->beg, regs2->beg, regs1->num_regs * sizeof(*regs1->beg))) return mrb_false_value();
- if (memcmp(regs1->end, regs2->end, regs1->num_regs * sizeof(*regs1->end))) return mrb_false_value();
- return mrb_true_value();
-}
/*
* Document-class: RegexpError
@@ -1999,607 +38,5 @@ mrb_match_equal(mrb_state *mrb, mrb_value match1)
void
mrb_init_regexp(mrb_state *mrb)
{
- struct RClass *s;
- s = mrb_define_class(mrb, "Regexp", mrb->object_class);
-
- mrb_define_class_method(mrb, s, "compile", mrb_reg_s_new_instance, ARGS_ANY()); /* 15.2.15.6.1 */
- mrb_define_class_method(mrb, s, "escape", mrb_reg_s_quote, ARGS_REQ(1)); /* 15.2.15.6.2 */
- mrb_define_class_method(mrb, s, "last_match", mrb_reg_s_last_match, ARGS_ANY()); /* 15.2.15.6.3 */
- mrb_define_class_method(mrb, s, "quote", mrb_reg_s_quote, ARGS_REQ(1)); /* 15.2.15.6.4 */
- //mrb_define_singleton_method(rb_cRegexp, "union", rb_reg_s_union_m, -2);
- //mrb_define_singleton_method(rb_cRegexp, "try_convert", rb_reg_s_try_convert, 1);
-
- mrb_define_method(mrb, s, "initialize", mrb_reg_initialize_m, ARGS_ANY()); /* 15.2.15.7.1 */
- mrb_define_method(mrb, s, "initialize_copy", mrb_reg_init_copy, ARGS_REQ(1)); /* 15.2.15.7.2 */
- mrb_define_method(mrb, s, "==", mrb_reg_equal_m, ARGS_REQ(1)); /* 15.2.15.7.3 */
- mrb_define_method(mrb, s, "===", mrb_reg_eqq, ARGS_REQ(1)); /* 15.2.15.7.4 */
- mrb_define_method(mrb, s, "=~", mrb_reg_match, ARGS_REQ(1)); /* 15.2.15.7.5 */
- mrb_define_method(mrb, s, "casefold?", mrb_reg_casefold_p, ARGS_NONE()); /* 15.2.15.7.6 */
- mrb_define_method(mrb, s, "match", mrb_reg_match_m, ARGS_ANY()); /* 15.2.15.7.7 */
- mrb_define_method(mrb, s, "source", mrb_reg_source, ARGS_NONE()); /* 15.2.15.7.8 */
- //mrb_define_method(rb_cRegexp, "hash", rb_reg_hash, 0);
- //mrb_define_method(rb_cRegexp, "~", rb_reg_match2, 0);
- mrb_define_method(mrb, s, "to_s", mrb_reg_to_s, ARGS_NONE()); /* 15.2.15.7.9 (x) */
- mrb_define_method(mrb, s, "inspect", mrb_reg_inspect, ARGS_NONE()); /* 15.2.15.7.10(x) */
- mrb_define_method(mrb, s, "eql?", mrb_reg_equal_m, ARGS_REQ(1)); /* 15.2.15.7.11(x) */
- //mrb_define_method(rb_cRegexp, "options", mrb_reg_options_m, 0);
- //mrb_define_method(rb_cRegexp, "encoding", rb_obj_encoding, 0); /* in encoding.c */
- //mrb_define_method(rb_cRegexp, "fixed_encoding?", mrb_reg_fixed_encoding_p, 0);
- //mrb_define_method(rb_cRegexp, "names", rb_reg_names, 0);
- //mrb_define_method(rb_cRegexp, "named_captures", rb_reg_named_captures, 0);
-
- //mrb_define_const(rb_cRegexp, "IGNORECASE", INT2FIX(ONIG_OPTION_IGNORECASE));
- //mrb_define_const(rb_cRegexp, "EXTENDED", INT2FIX(ONIG_OPTION_EXTEND));
- //mrb_define_const(rb_cRegexp, "MULTILINE", INT2FIX(ONIG_OPTION_MULTILINE));
- //mrb_define_const(rb_cRegexp, "FIXEDENCODING", INT2FIX(ARG_ENCODING_FIXED));
- mrb_define_const(mrb, s, "IGNORECASE", mrb_fixnum_value(ONIG_OPTION_IGNORECASE));
- mrb_define_const(mrb, s, "EXTENDED", mrb_fixnum_value(ONIG_OPTION_EXTEND));
- mrb_define_const(mrb, s, "MULTILINE", mrb_fixnum_value(ONIG_OPTION_MULTILINE));
- mrb_define_const(mrb, s, "FIXEDENCODING", mrb_fixnum_value(ARG_ENCODING_FIXED));
-
- s = mrb_define_class(mrb, "MatchData", mrb->object_class);
- //mrb_undef_class_method(CLASS_OF(rb_cMatch), "new");
-
- mrb_define_method(mrb, s, "[]", mrb_match_aref, ARGS_ANY()); /* 15.2.16.3.1 */
- mrb_define_method(mrb, s, "begin", mrb_match_begin, ARGS_REQ(1)); /* 15.2.16.3.2 */
- mrb_define_method(mrb, s, "captures", mrb_match_captures, ARGS_NONE()); /* 15.2.16.3.3 */
- mrb_define_method(mrb, s, "end", mrb_match_end, ARGS_REQ(1)); /* 15.2.16.3.4 */
- mrb_define_method(mrb, s, "initialize_copy", mrb_match_init_copy, ARGS_REQ(1)); /* 15.2.16.3.5 */
- mrb_define_method(mrb, s, "length", mrb_match_size, ARGS_NONE()); /* 15.2.16.3.6 */
- mrb_define_method(mrb, s, "offset", mrb_match_offset, ARGS_REQ(1)); /* 15.2.16.3.7 */
- mrb_define_method(mrb, s, "post_match", mrb_reg_match_post, ARGS_NONE()); /* 15.2.16.3.8 */
- mrb_define_method(mrb, s, "pre_match", mrb_reg_match_pre, ARGS_NONE()); /* 15.2.16.3.9 */
- mrb_define_method(mrb, s, "size", mrb_match_size, ARGS_NONE()); /* 15.2.16.3.10 */
- mrb_define_method(mrb, s, "string", mrb_match_string, ARGS_NONE()); /* 15.2.16.3.11 */
- mrb_define_method(mrb, s, "to_a", mrb_match_to_a, ARGS_NONE()); /* 15.2.16.3.12 */
- mrb_define_method(mrb, s, "to_s", mrb_match_to_s, ARGS_NONE()); /* 15.2.16.3.13 */
- mrb_define_method(mrb, s, "inspect", mrb_match_inspect, ARGS_NONE()); /* 15.2.16.3.14(x) */
- mrb_define_method(mrb, s, "==", mrb_match_equal, ARGS_REQ(1)); /* 15.2.16.3.15(x) */
- mrb_define_method(mrb, s, "eql?", mrb_match_equal, ARGS_REQ(1)); /* 15.2.16.3.16(x) */
- //mrb_define_method(rb_cMatch, "regexp", match_regexp, 0);
- //mrb_define_method(rb_cMatch, "names", match_names, 0);
- //mrb_define_method(rb_cMatch, "values_at", match_values_at, -1);
- //mrb_define_method(rb_cMatch, "hash", match_hash, 0);
- //mrb_define_method(rb_cMatch, "==", match_equal, 1);
-}
-/* ----------------1_8_7---------------------------------------- */
-//`mrb_check_type'
-//`mrb_reg_regsub'
-//`mrb_backref_get'
-//`mrb_memsearch'
-//`mrb_reg_mbclen2'
-//`mrb_reg_regcomp'
-//`mrb_yield'
-
-
-mrb_value
-mrb_reg_regsub(mrb_state *mrb, mrb_value str, mrb_value src, struct re_registers *regs, mrb_value regexp)
-{
- mrb_value val;
- char *p, *s, *e;
- struct RString *ps = mrb_str_ptr(str);
- int no;
-
- val.tt = 0;
- p = s = ps->buf;
- e = s + ps->len;
-
- while (s < e) {
- int c = *s;
- char *ss;
-
- if (c == -1) {
- s += e - s;
- continue;
- }
- ss = s;
- s++;
-
- if (c != '\\' || s == e) continue;
-
- //if (!val) {
- if (!val.tt) {
- val = mrb_str_buf_new(mrb, ss-p);
- }
- mrb_str_buf_cat(mrb, val, p, ss-p);
-
- c = *s;
- if (c == -1) {
- s += e - s;
- mrb_str_buf_cat(mrb, val, ss, s-ss);
- p = s;
- continue;
- }
- s++;
-
- p = s;
- switch (c) {
- case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- if (onig_noname_group_capture_is_active(RREGEXP(regexp)->ptr)) {
- no = c - '0';
- }
- else {
- continue;
- }
- break;
-
- case 'k':
- if (s < e && *s == '<') {
- char *name, *name_end;
-
- name_end = name = s + 1;
- while (name_end < e) {
- c = *name_end;
- if (c == '>') break;
- name_end += c == -1 ? e - name_end : 1;
- }
- if (name_end < e) {
- no = name_to_backref_number(mrb, regs, RREGEXP(regexp), name, name_end);
- p = s = name_end + 1;
- break;
- }
- else {
- mrb_raise(mrb, E_RUNTIME_ERROR, "invalid group name reference format");
- }
- }
-
- mrb_str_buf_cat(mrb, val, ss, s-ss);
- continue;
-
- case '0':
- case '&':
- no = 0;
- break;
-
- case '`':
- mrb_str_buf_cat(mrb, val, RSTRING_PTR(src), BEG(0));
- continue;
-
- case '\'':
- mrb_str_buf_cat(mrb, val, RSTRING_PTR(src)+END(0), RSTRING_LEN(src)-END(0));
- continue;
-
- case '+':
- no = regs->num_regs-1;
- while (BEG(no) == -1 && no > 0) no--;
- if (no == 0) continue;
- break;
-
- case '\\':
- mrb_str_buf_cat(mrb, val, s-1, 1);
- continue;
-
- default:
- mrb_str_buf_cat(mrb, val, ss, s-ss);
- continue;
- }
-
- if (no >= 0) {
- if (no >= regs->num_regs) continue;
- if (BEG(no) == -1) continue;
- mrb_str_buf_cat(mrb, val, RSTRING_PTR(src)+BEG(no), END(no)-BEG(no));
- }
- } /* while (s < e) { */
-
-
- if (!val.tt) return str;
- if (p < e) {
- mrb_str_buf_cat(mrb, val, p, e-p);
- }
- return val;
-}
-
-static inline NODE *
-lfp_svar_place(mrb_state *mrb, /*mrb_thread_t *th,*/ mrb_value *lfp)
-{
- NODE *svar;
-
- /*if (lfp && th->local_lfp != lfp) {
- svar = &lfp[-1];
- }
- else {
- svar = mrb->&th->local_svar;
- }*/
- svar = mrb->local_svar;
- /*if (mrb_nil_p(*svar)) {
- *svar = mrb_obj_value(NEW_IF(0, 0, 0));
- }*/
- return svar;//(NODE *)((*svar).value.p);
-}
-
-static mrb_value
-lfp_svar_get(mrb_state *mrb, /*mrb_thread_t *th,*/ mrb_value *lfp, mrb_int key)
-{
- //mrb_value *regs;
- NODE *svar = lfp_svar_place(mrb, /*th,*/ lfp);
- //regs = mrb->stack;
-
- switch (key) {
- case 0:
- return svar->u1.value;
- case 1:
- return svar->u2.value;
- default: {
- return svar->u3.value;
- /*const mrb_value hash = regs[GETARG_C(*svar)];//svar->u3.value;
-
- if (mrb_nil_p(hash)) {
- return mrb_nil_value();
- }
- else {
- return mrb_hash_get(mrb, hash, mrb_fixnum_value(key));//mrb_hash_lookup(hash, key);
- }*/
- }
- }
-}
-
-static void
-lfp_svar_set(mrb_state *mrb, /*mrb_thread_t *th,*/ mrb_value *lfp, mrb_int key, mrb_value val)
-{
- //mrb_value *regs;
- NODE *svar = lfp_svar_place(mrb, /*th,*/ lfp);
- //regs = mrb->stack;
-
- switch (key) {
- case 0:
- svar->u1.value = val;
- return;
- case 1:
- svar->u2.value = val;
- return;
- default: {
- svar->u3.value = val;
- //mrb_value hash = *svar;//svar->u3.value;
-
- //if (mrb_nil_p(hash)) {
- // svar->u3.value = hash = mrb_hash_new(mrb, 0);
- //}
- //mrb_hash_aset(hash, key, val);
- //mrb_hash_set(mrb, hash, mrb_fixnum_value(key), val);
- }
- }
-}
-
-static mrb_value
-vm_cfp_svar_get(mrb_state *mrb, /*mrb_thread_t *th, mrb_control_frame_t *cfp,*/ mrb_int key)
-{
- //cfp = vm_normal_frame(th, cfp);
- return lfp_svar_get(mrb, /*th, cfp ? cfp->lfp :*/ 0, key);
-}
-
-static void
-vm_cfp_svar_set(mrb_state *mrb, /*mrb_thread_t *th, mrb_control_frame_t *cfp,*/ mrb_int key, const mrb_value val)
-{
- //cfp = vm_normal_frame(th, cfp);
- lfp_svar_set(mrb, /*th, cfp ? cfp->lfp : */0, key, val);
-}
-
-static mrb_value
-vm_svar_get(mrb_state *mrb, mrb_int key)
-{
- //mrb_thread_t *th = GET_THREAD();
- return vm_cfp_svar_get(mrb,/*th, th->cfp,*/ key);
-}
-
-static void
-vm_svar_set(mrb_state *mrb, mrb_int key, mrb_value val)
-{
- //mrb_thread_t *th = GET_THREAD();
- vm_cfp_svar_set(mrb,/*th, th->cfp,*/ key, val);
-}
-
-
-int
-mrb_reg_backref_number(mrb_state *mrb, mrb_value match, mrb_value backref)
-{
- return match_backref_number(mrb, match, backref);
-}
-
-mrb_value
-mrb_backref_get(mrb_state *mrb)
-{
- return vm_svar_get(mrb, 1);
-}
-
-void
-mrb_backref_set(mrb_state *mrb, mrb_value val)
-{
- vm_svar_set(mrb, 1, val);
-}
-#endif //ENABLE_REGEXP
-
-#ifdef INCLUDE_ENCODING
-static inline long
-mrb_memsearch_qs(const unsigned char *xs, long m, const unsigned char *ys, long n)
-{
- const unsigned char *x = xs, *xe = xs + m;
- const unsigned char *y = ys;
- int i, qstable[256];
-
- /* Preprocessing */
- for (i = 0; i < 256; ++i)
- qstable[i] = m + 1;
- for (; x < xe; ++x)
- qstable[*x] = xe - x;
- /* Searching */
- for (; y + m <= ys + n; y += *(qstable + y[m])) {
- if (*xs == *y && memcmp(xs, y, m) == 0)
- return y - ys;
- }
- return -1;
-}
-
-static inline unsigned int
-mrb_memsearch_qs_utf8_hash(const unsigned char *x)
-{
- register const unsigned int mix = 8353;
- register unsigned int h = *x;
- if (h < 0xC0) {
- return h + 256;
- }
- else if (h < 0xE0) {
- h *= mix;
- h += x[1];
- }
- else if (h < 0xF0) {
- h *= mix;
- h += x[1];
- h *= mix;
- h += x[2];
- }
- else if (h < 0xF5) {
- h *= mix;
- h += x[1];
- h *= mix;
- h += x[2];
- h *= mix;
- h += x[3];
- }
- else {
- return h + 256;
- }
- return (unsigned char)h;
-}
-
-static inline long
-mrb_memsearch_qs_utf8(const unsigned char *xs, long m, const unsigned char *ys, long n)
-{
- const unsigned char *x = xs, *xe = xs + m;
- const unsigned char *y = ys;
- int i, qstable[512];
-
- /* Preprocessing */
- for (i = 0; i < 512; ++i) {
- qstable[i] = m + 1;
- }
- for (; x < xe; ++x) {
- qstable[mrb_memsearch_qs_utf8_hash(x)] = xe - x;
- }
- /* Searching */
- for (; y + m <= ys + n; y += qstable[mrb_memsearch_qs_utf8_hash(y+m)]) {
- if (*xs == *y && memcmp(xs, y, m) == 0)
- return y - ys;
- }
- return -1;
-}
-
-int
-mrb_memsearch(mrb_state *mrb, const void *x0, int m, const void *y0, int n, mrb_encoding *enc)
-{
- const unsigned char *x = x0, *y = y0;
-
- if (m > n) return -1;
- else if (m == n) {
- return memcmp(x0, y0, m) == 0 ? 0 : -1;
- }
- else if (m < 1) {
- return 0;
- }
- else if (m == 1) {
- const unsigned char *ys = y, *ye = ys + n;
- for (; y < ye; ++y) {
- if (*x == *y)
- return y - ys;
- }
- return -1;
- }
- else {
- return mrb_memsearch_qs(x0, m, y0, n);
- }
-}
-#endif //INCLUDE_ENCODING
-
-#ifdef ENABLE_REGEXP
-mrb_value
-mrb_reg_init_str(mrb_state *mrb, mrb_value re, mrb_value s, int options)
-{
- onig_errmsg_buffer err = "";
-
- if (mrb_reg_initialize_str(mrb, re, s, options, err, NULL, 0) != 0) {
- //mrb_reg_raise_str(s, options, err);
- printf("mrb_reg_raise_str(s, options, err);");
- }
-
- return re;
-}
-
-mrb_value
-mrb_reg_alloc(mrb_state *mrb)
-{
- mrb_value dummy = mrb_nil_value();
- return mrb_reg_s_alloc(mrb, dummy);
-}
-
-mrb_value
-mrb_reg_new_str(mrb_state *mrb, mrb_value s, int options)
-{
- return mrb_reg_init_str(mrb, mrb_reg_alloc(mrb), s, options);
-}
-
-mrb_value
-mrb_reg_regcomp(mrb_state *mrb, mrb_value str)
-{
- return mrb_reg_new_str(mrb, str, 0);
-}
-
-int
-re_adjust_startpos(struct re_pattern_buffer *bufp, const char *string, int size, int startpos, int range)
-{
- /* Update the fastmap now if not correct already. */
- /*if (!bufp->fastmap_accurate) {
- int ret = re_compile_fastmap0(bufp);
- if (ret) return ret;
- }*/
-
- /* Adjust startpos for mbc string */
- /*if (current_mbctype && startpos>0 && !(bufp->options&RE_OPTIMIZE_BMATCH)) {
- startpos = re_mbc_startpos(string, size, startpos, range);
- }*/
- return startpos;
-}
-#endif //ENABLE_REGEXP
-
-#ifdef INCLUDE_ENCODING
-static const unsigned char mbctab_ascii[] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-};
-const unsigned char *re_mbctab = mbctab_ascii;
-
-#define is_identchar(p,e,enc) (mrb_enc_isalnum(*p,enc) || (*p) == '_' || !ISASCII(*p))
-
-static int
-is_special_global_name(const char *m, const char *e, mrb_encoding *enc)
-{
- int mb = 0;
-
- if (m >= e) return 0;
- switch (*m) {
- case '~': case '*': case '$': case '?': case '!': case '@':
- case '/': case '\\': case ';': case ',': case '.': case '=':
- case ':': case '<': case '>': case '\"':
- case '&': case '`': case '\'': case '+':
- case '0':
- ++m;
- break;
- case '-':
- ++m;
- if (m < e && is_identchar(m, e, enc)) {
- if (!ISASCII(*m)) mb = 1;
- m += e - m;
- }
- break;
- default:
- if (!mrb_enc_isdigit(*m, enc)) return 0;
- do {
- if (!ISASCII(*m)) mb = 1;
- ++m;
- } while (m < e && mrb_enc_isdigit(*m, enc));
- }
- return m == e ? mb + 1 : 0;
-}
-
-int
-mrb_enc_symname2_p(const char *name, long len, mrb_encoding *enc)
-{
- const char *m = name;
- const char *e = m + len;
- int localid = FALSE;
-
- if (!m) return FALSE;
- switch (*m) {
- case '\0':
- return FALSE;
-
- case '$':
- if (is_special_global_name(++m, e, enc)) return TRUE;
- goto id;
-
- case '@':
- if (*++m == '@') ++m;
- goto id;
-
- case '<':
- switch (*++m) {
- case '<': ++m; break;
- case '=': if (*++m == '>') ++m; break;
- default: break;
- }
- break;
-
- case '>':
- switch (*++m) {
- case '>': case '=': ++m; break;
- }
- break;
-
- case '=':
- switch (*++m) {
- case '~': ++m; break;
- case '=': if (*++m == '=') ++m; break;
- default: return FALSE;
- }
- break;
-
- case '*':
- if (*++m == '*') ++m;
- break;
-
- case '+': case '-':
- if (*++m == '@') ++m;
- break;
-
- case '|': case '^': case '&': case '/': case '%': case '~': case '`':
- ++m;
- break;
-
- case '[':
- if (*++m != ']') return FALSE;
- if (*++m == '=') ++m;
- break;
-
- case '!':
- switch (*++m) {
- case '\0': return TRUE;
- case '=': case '~': ++m; break;
- default: return FALSE;
- }
- break;
-
- default:
- localid = !mrb_enc_isupper(*m, enc);
-id:
- if (m >= e || (*m != '_' && !mrb_enc_isalpha(*m, enc) && ISASCII(*m)))
- return FALSE;
- while (m < e && is_identchar(m, e, enc)) m += e - m;
- if (localid) {
- switch (*m) {
- case '!': case '?': case '=': ++m;
- }
- }
- break;
- }
- return m == e;
-}
-
-int
-mrb_enc_symname_p(const char *name, mrb_encoding *enc)
-{
- return mrb_enc_symname2_p(name, strlen(name), enc);
+ //mrb_define_class(mrb, REGEXP_CLASS, mrb->object_class);
}
-#endif //INCLUDE_ENCODING
diff --git a/src/re.h b/src/re.h
index 65ca0e6ae..64dbd60dc 100644
--- a/src/re.h
+++ b/src/re.h
@@ -7,75 +7,7 @@
#ifndef RE_H
#define RE_H
-//#include <sys/types.h>
-#include <stdio.h>
-
-#include "node.h"
-#include "regex.h"
-#include "encoding.h"
-#include "st.h"
-
-#define BEG(no) regs->beg[no]
-#define END(no) regs->end[no]
-
-struct rmatch_offset {
- long beg;
- long end;
-};
-
-struct rmatch {
- struct re_registers regs;
-
- int char_offset_updated;
- int char_offset_num_allocated;
- struct rmatch_offset *char_offset;
-};
-
-struct RMatch {
- MRB_OBJECT_HEADER;
- struct RString *str;
- struct rmatch *rmatch;
- struct RRegexp *regexp;
-};
-
-struct RRegexp {
- MRB_OBJECT_HEADER;
- struct re_pattern_buffer *ptr;
- struct RString *src;
- unsigned long usecnt;
-};
-
-#define mrb_regex_ptr(r) ((struct RRegexp*)((r).value.p))
-#define RREGEXP(r) ((struct RRegexp*)((r).value.p))
-#define RREGEXP_SRC(r) (RREGEXP(r)->src)
-#define RREGEXP_SRC_PTR(r) (RREGEXP_SRC(r)->buf)
-#define RREGEXP_SRC_LEN(r) (RREGEXP_SRC(r)->len)
-int re_adjust_startpos(struct re_pattern_buffer *bufp, const char *string, int size, int startpos, int range);
-
-typedef struct re_pattern_buffer Regexp;
-
-//#define RMATCH(obj) (R_CAST(RMatch)(obj))
-#define RMATCH_REGS(v) (&((struct RMatch*)((v).value.p))->rmatch->regs)
-#define RMATCH(v) ((struct RMatch*)((v).value.p))
-#define mrb_match_ptr(v) ((struct RMatch*)((v).value.p))
-
-int mrb_memcmp(const void *p1, const void *p2, int len);
-
-mrb_int mrb_reg_search (mrb_state *mrb, mrb_value, mrb_value, mrb_int, mrb_int);
-mrb_value mrb_reg_regsub (mrb_state *mrb, mrb_value, mrb_value, struct re_registers *, mrb_value);
-//mrb_value mrb_reg_regsub(mrb_value, mrb_value, struct re_registers *, mrb_value);
-mrb_int mrb_reg_adjust_startpos(mrb_state *mrb, mrb_value re, mrb_value str, mrb_int pos, mrb_int reverse);
-void mrb_match_busy (mrb_value);
-
-mrb_value mrb_reg_quote(mrb_state *mrb, mrb_value str);
-mrb_value mrb_reg_regcomp(mrb_state *mrb, mrb_value str);
-mrb_value mrb_reg_match_str(mrb_state *mrb, mrb_value re, mrb_value str);
-mrb_value mrb_reg_nth_match(mrb_state *mrb, mrb_int nth, mrb_value match);
-mrb_value mrb_backref_get(mrb_state *mrb);
-//mrb_int mrb_memsearch(const void *x0, mrb_int m, const void *y0, mrb_int n);
-mrb_value mrb_reg_to_s(mrb_state *mrb, mrb_value re);
-void mrb_backref_set(mrb_state *mrb, mrb_value val);
-mrb_value match_alloc(mrb_state *mrb);
-int mrb_reg_backref_number(mrb_state *mrb, mrb_value match, mrb_value backref);
+//#define REGEXP_CLASS "HsRegexp"
+#define REGEXP_CLASS "Regexp"
#endif
diff --git a/src/regcomp.c b/src/regcomp.c
deleted file mode 100644
index b8c652999..000000000
--- a/src/regcomp.c
+++ /dev/null
@@ -1,6288 +0,0 @@
-/**********************************************************************
- regcomp.c - Oniguruma (regular expression library)
-**********************************************************************/
-/*-
- * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "mruby.h"
-#include <string.h>
-#include "regparse.h"
-#ifdef ENABLE_REGEXP
-
-OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
-
-extern OnigCaseFoldType
-onig_get_default_case_fold_flag(void)
-{
- return OnigDefaultCaseFoldFlag;
-}
-
-extern int
-onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
-{
- OnigDefaultCaseFoldFlag = case_fold_flag;
- return 0;
-}
-
-
-#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
-static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
-#endif
-
-static UChar*
-str_dup(UChar* s, UChar* end)
-{
- ptrdiff_t len = end - s;
-
- if (len > 0) {
- UChar* r = (UChar* )xmalloc(len + 1);
- CHECK_NULL_RETURN(r);
- xmemcpy(r, s, len);
- r[len] = (UChar )0;
- return r;
- }
- else return NULL;
-}
-
-static void
-swap_node(Node* a, Node* b)
-{
- Node c;
- c = *a; *a = *b; *b = c;
-
- if (NTYPE(a) == NT_STR) {
- StrNode* sn = NSTR(a);
- if (sn->capa == 0) {
- size_t len = sn->end - sn->s;
- sn->s = sn->buf;
- sn->end = sn->s + len;
- }
- }
-
- if (NTYPE(b) == NT_STR) {
- StrNode* sn = NSTR(b);
- if (sn->capa == 0) {
- size_t len = sn->end - sn->s;
- sn->s = sn->buf;
- sn->end = sn->s + len;
- }
- }
-}
-
-static OnigDistance
-distance_add(OnigDistance d1, OnigDistance d2)
-{
- if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
- return ONIG_INFINITE_DISTANCE;
- else {
- if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
- else return ONIG_INFINITE_DISTANCE;
- }
-}
-
-static OnigDistance
-distance_multiply(OnigDistance d, int m)
-{
- if (m == 0) return 0;
-
- if (d < ONIG_INFINITE_DISTANCE / m)
- return d * m;
- else
- return ONIG_INFINITE_DISTANCE;
-}
-
-static int
-bitset_is_empty(BitSetRef bs)
-{
- int i;
- for (i = 0; i < (int )BITSET_SIZE; i++) {
- if (bs[i] != 0) return 0;
- }
- return 1;
-}
-
-#ifdef ONIG_DEBUG
-static int
-bitset_on_num(BitSetRef bs)
-{
- int i, n;
-
- n = 0;
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- if (BITSET_AT(bs, i)) n++;
- }
- return n;
-}
-#endif
-
-extern int
-onig_bbuf_init(BBuf* buf, int size)
-{
- if (size <= 0) {
- size = 0;
- buf->p = NULL;
- }
- else {
- buf->p = (UChar* )xmalloc(size);
- if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
- }
-
- buf->alloc = size;
- buf->used = 0;
- return 0;
-}
-
-
-#ifdef USE_SUBEXP_CALL
-
-static int
-unset_addr_list_init(UnsetAddrList* uslist, int size)
-{
- UnsetAddr* p;
-
- p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
- CHECK_NULL_RETURN_MEMERR(p);
- uslist->num = 0;
- uslist->alloc = size;
- uslist->us = p;
- return 0;
-}
-
-static void
-unset_addr_list_end(UnsetAddrList* uslist)
-{
- if (IS_NOT_NULL(uslist->us))
- xfree(uslist->us);
-}
-
-static int
-unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
-{
- UnsetAddr* p;
- int size;
-
- if (uslist->num >= uslist->alloc) {
- size = uslist->alloc * 2;
- p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
- CHECK_NULL_RETURN_MEMERR(p);
- uslist->alloc = size;
- uslist->us = p;
- }
-
- uslist->us[uslist->num].offset = offset;
- uslist->us[uslist->num].target = node;
- uslist->num++;
- return 0;
-}
-#endif /* USE_SUBEXP_CALL */
-
-
-static int
-add_opcode(regex_t* reg, int opcode)
-{
- BBUF_ADD1(reg, opcode);
- return 0;
-}
-
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
-static int
-add_state_check_num(regex_t* reg, int num)
-{
- StateCheckNumType n = (StateCheckNumType )num;
-
- BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
- return 0;
-}
-#endif
-
-static int
-add_rel_addr(regex_t* reg, int addr)
-{
- RelAddrType ra = (RelAddrType )addr;
-
- BBUF_ADD(reg, &ra, SIZE_RELADDR);
- return 0;
-}
-
-static int
-add_abs_addr(regex_t* reg, int addr)
-{
- AbsAddrType ra = (AbsAddrType )addr;
-
- BBUF_ADD(reg, &ra, SIZE_ABSADDR);
- return 0;
-}
-
-static int
-add_length(regex_t* reg, int len)
-{
- LengthType l = (LengthType )len;
-
- BBUF_ADD(reg, &l, SIZE_LENGTH);
- return 0;
-}
-
-static int
-add_mem_num(regex_t* reg, int num)
-{
- MemNumType n = (MemNumType )num;
-
- BBUF_ADD(reg, &n, SIZE_MEMNUM);
- return 0;
-}
-
-static int
-add_pointer(regex_t* reg, void* addr)
-{
- PointerType ptr = (PointerType )addr;
-
- BBUF_ADD(reg, &ptr, SIZE_POINTER);
- return 0;
-}
-
-static int
-add_option(regex_t* reg, OnigOptionType option)
-{
- BBUF_ADD(reg, &option, SIZE_OPTION);
- return 0;
-}
-
-static int
-add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
-{
- int r;
-
- r = add_opcode(reg, opcode);
- if (r) return r;
- r = add_rel_addr(reg, addr);
- return r;
-}
-
-static int
-add_bytes(regex_t* reg, UChar* bytes, int len)
-{
- BBUF_ADD(reg, bytes, len);
- return 0;
-}
-
-static int
-add_bitset(regex_t* reg, BitSetRef bs)
-{
- BBUF_ADD(reg, bs, SIZE_BITSET);
- return 0;
-}
-
-static int
-add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
-{
- int r;
-
- r = add_opcode(reg, opcode);
- if (r) return r;
- r = add_option(reg, option);
- return r;
-}
-
-static int compile_length_tree(Node* node, regex_t* reg);
-static int compile_tree(Node* node, regex_t* reg);
-
-
-#define IS_NEED_STR_LEN_OP_EXACT(op) \
- ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
- (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
-
-static int
-select_str_opcode(int mb_len, int str_len, int ignore_case)
-{
- int op;
-
- if (ignore_case) {
- switch (str_len) {
- case 1: op = OP_EXACT1_IC; break;
- default: op = OP_EXACTN_IC; break;
- }
- }
- else {
- switch (mb_len) {
- case 1:
- switch (str_len) {
- case 1: op = OP_EXACT1; break;
- case 2: op = OP_EXACT2; break;
- case 3: op = OP_EXACT3; break;
- case 4: op = OP_EXACT4; break;
- case 5: op = OP_EXACT5; break;
- default: op = OP_EXACTN; break;
- }
- break;
-
- case 2:
- switch (str_len) {
- case 1: op = OP_EXACTMB2N1; break;
- case 2: op = OP_EXACTMB2N2; break;
- case 3: op = OP_EXACTMB2N3; break;
- default: op = OP_EXACTMB2N; break;
- }
- break;
-
- case 3:
- op = OP_EXACTMB3N;
- break;
-
- default:
- op = OP_EXACTMBN;
- break;
- }
- }
- return op;
-}
-
-static int
-compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
-{
- int r;
- int saved_num_null_check = reg->num_null_check;
-
- if (empty_info != 0) {
- r = add_opcode(reg, OP_NULL_CHECK_START);
- if (r) return r;
- r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
- if (r) return r;
- reg->num_null_check++;
- }
-
- r = compile_tree(node, reg);
- if (r) return r;
-
- if (empty_info != 0) {
- if (empty_info == NQ_TARGET_IS_EMPTY)
- r = add_opcode(reg, OP_NULL_CHECK_END);
- else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
- r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
- else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
- r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
-
- if (r) return r;
- r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
- }
- return r;
-}
-
-#ifdef USE_SUBEXP_CALL
-static int
-compile_call(CallNode* node, regex_t* reg)
-{
- int r;
-
- r = add_opcode(reg, OP_CALL);
- if (r) return r;
- r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
- node->target);
- if (r) return r;
- r = add_abs_addr(reg, 0 /*dummy addr.*/);
- return r;
-}
-#endif
-
-static int
-compile_tree_n_times(Node* node, int n, regex_t* reg)
-{
- int i, r;
-
- for (i = 0; i < n; i++) {
- r = compile_tree(node, reg);
- if (r) return r;
- }
- return 0;
-}
-
-static int
-add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance str_len,
- regex_t* reg ARG_UNUSED, int ignore_case)
-{
- int len;
- int op = select_str_opcode(mb_len, str_len, ignore_case);
-
- len = SIZE_OPCODE;
-
- if (op == OP_EXACTMBN) len += SIZE_LENGTH;
- if (IS_NEED_STR_LEN_OP_EXACT(op))
- len += SIZE_LENGTH;
-
- len += mb_len * str_len;
- return len;
-}
-
-static int
-add_compile_string(UChar* s, int mb_len, int str_len,
- regex_t* reg, int ignore_case)
-{
- int op = select_str_opcode(mb_len, str_len, ignore_case);
- add_opcode(reg, op);
-
- if (op == OP_EXACTMBN)
- add_length(reg, mb_len);
-
- if (IS_NEED_STR_LEN_OP_EXACT(op)) {
- if (op == OP_EXACTN_IC)
- add_length(reg, mb_len * str_len);
- else
- add_length(reg, str_len);
- }
-
- add_bytes(reg, s, mb_len * str_len);
- return 0;
-}
-
-
-static int
-compile_length_string_node(Node* node, regex_t* reg)
-{
- int rlen, r, len, prev_len, slen, ambig;
- OnigEncoding enc = reg->enc;
- UChar *p, *prev;
- StrNode* sn;
-
- sn = NSTR(node);
- if (sn->end <= sn->s)
- return 0;
-
- ambig = NSTRING_IS_AMBIG(node);
-
- p = prev = sn->s;
- prev_len = enclen(enc, p, sn->end);
- p += prev_len;
- slen = 1;
- rlen = 0;
-
- for (; p < sn->end; ) {
- len = enclen(enc, p, sn->end);
- if (len == prev_len) {
- slen++;
- }
- else {
- r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
- rlen += r;
- prev = p;
- slen = 1;
- prev_len = len;
- }
- p += len;
- }
- r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
- rlen += r;
- return rlen;
-}
-
-static int
-compile_length_string_raw_node(StrNode* sn, regex_t* reg)
-{
- if (sn->end <= sn->s)
- return 0;
-
- return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
-}
-
-static int
-compile_string_node(Node* node, regex_t* reg)
-{
- int r, len, prev_len, slen, ambig;
- OnigEncoding enc = reg->enc;
- UChar *p, *prev, *end;
- StrNode* sn;
-
- sn = NSTR(node);
- if (sn->end <= sn->s)
- return 0;
-
- end = sn->end;
- ambig = NSTRING_IS_AMBIG(node);
-
- p = prev = sn->s;
- prev_len = enclen(enc, p, end);
- p += prev_len;
- slen = 1;
-
- for (; p < end; ) {
- len = enclen(enc, p, end);
- if (len == prev_len) {
- slen++;
- }
- else {
- r = add_compile_string(prev, prev_len, slen, reg, ambig);
- if (r) return r;
-
- prev = p;
- slen = 1;
- prev_len = len;
- }
-
- p += len;
- }
- return add_compile_string(prev, prev_len, slen, reg, ambig);
-}
-
-static int
-compile_string_raw_node(StrNode* sn, regex_t* reg)
-{
- if (sn->end <= sn->s)
- return 0;
-
- return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
-}
-
-static int
-add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
-{
-#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
- add_length(reg, mbuf->used);
- return add_bytes(reg, mbuf->p, mbuf->used);
-#else
- int r, pad_size;
- UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
-
- GET_ALIGNMENT_PAD_SIZE(p, pad_size);
- add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
- if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
-
- r = add_bytes(reg, mbuf->p, mbuf->used);
-
- /* padding for return value from compile_length_cclass_node() to be fix. */
- pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
- if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
- return r;
-#endif
-}
-
-static int
-compile_length_cclass_node(CClassNode* cc, regex_t* reg)
-{
- int len;
-
- if (IS_NCCLASS_SHARE(cc)) {
- len = SIZE_OPCODE + SIZE_POINTER;
- return len;
- }
-
- if (IS_NULL(cc->mbuf)) {
- len = SIZE_OPCODE + SIZE_BITSET;
- }
- else {
- if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
- len = SIZE_OPCODE;
- }
- else {
- len = SIZE_OPCODE + SIZE_BITSET;
- }
-#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
- len += SIZE_LENGTH + cc->mbuf->used;
-#else
- len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
-#endif
- }
-
- return len;
-}
-
-static int
-compile_cclass_node(CClassNode* cc, regex_t* reg)
-{
- int r;
-
- if (IS_NCCLASS_SHARE(cc)) {
- add_opcode(reg, OP_CCLASS_NODE);
- r = add_pointer(reg, cc);
- return r;
- }
-
- if (IS_NULL(cc->mbuf)) {
- if (IS_NCCLASS_NOT(cc))
- add_opcode(reg, OP_CCLASS_NOT);
- else
- add_opcode(reg, OP_CCLASS);
-
- r = add_bitset(reg, cc->bs);
- }
- else {
- if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
- if (IS_NCCLASS_NOT(cc))
- add_opcode(reg, OP_CCLASS_MB_NOT);
- else
- add_opcode(reg, OP_CCLASS_MB);
-
- r = add_multi_byte_cclass(cc->mbuf, reg);
- }
- else {
- if (IS_NCCLASS_NOT(cc))
- add_opcode(reg, OP_CCLASS_MIX_NOT);
- else
- add_opcode(reg, OP_CCLASS_MIX);
-
- r = add_bitset(reg, cc->bs);
- if (r) return r;
- r = add_multi_byte_cclass(cc->mbuf, reg);
- }
- }
-
- return r;
-}
-
-static int
-entry_repeat_range(regex_t* reg, int id, int lower, int upper)
-{
-#define REPEAT_RANGE_ALLOC 4
-
- OnigRepeatRange* p;
-
- if (reg->repeat_range_alloc == 0) {
- p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
- CHECK_NULL_RETURN_MEMERR(p);
- reg->repeat_range = p;
- reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
- }
- else if (reg->repeat_range_alloc <= id) {
- int n;
- n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
- p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
- sizeof(OnigRepeatRange) * n);
- CHECK_NULL_RETURN_MEMERR(p);
- reg->repeat_range = p;
- reg->repeat_range_alloc = n;
- }
- else {
- p = reg->repeat_range;
- }
-
- p[id].lower = lower;
- p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
- return 0;
-}
-
-static int
-compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
- regex_t* reg)
-{
- int r;
- int num_repeat = reg->num_repeat;
-
- r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
- if (r) return r;
- r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
- reg->num_repeat++;
- if (r) return r;
- r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
- if (r) return r;
-
- r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
- if (r) return r;
-
- r = compile_tree_empty_check(qn->target, reg, empty_info);
- if (r) return r;
-
- if (
-#ifdef USE_SUBEXP_CALL
- reg->num_call > 0 ||
-#endif
- IS_QUANTIFIER_IN_REPEAT(qn)) {
- r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
- }
- else {
- r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
- }
- if (r) return r;
- r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
- return r;
-}
-
-static int
-is_anychar_star_quantifier(QtfrNode* qn)
-{
- if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
- NTYPE(qn->target) == NT_CANY)
- return 1;
- else
- return 0;
-}
-
-#define QUANTIFIER_EXPAND_LIMIT_SIZE 50
-#define CKN_ON (ckn > 0)
-
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
-
-static int
-compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
-{
- int len, mod_tlen, cklen;
- int ckn;
- int infinite = IS_REPEAT_INFINITE(qn->upper);
- int empty_info = qn->target_empty_info;
- int tlen = compile_length_tree(qn->target, reg);
-
- if (tlen < 0) return tlen;
-
- ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
-
- cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
-
- /* anychar repeat */
- if (NTYPE(qn->target) == NT_CANY) {
- if (qn->greedy && infinite) {
- if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
- return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
- else
- return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
- }
- }
-
- if (empty_info != 0)
- mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
- else
- mod_tlen = tlen;
-
- if (infinite && qn->lower <= 1) {
- if (qn->greedy) {
- if (qn->lower == 1)
- len = SIZE_OP_JUMP;
- else
- len = 0;
-
- len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
- }
- else {
- if (qn->lower == 0)
- len = SIZE_OP_JUMP;
- else
- len = 0;
-
- len += mod_tlen + SIZE_OP_PUSH + cklen;
- }
- }
- else if (qn->upper == 0) {
- if (qn->is_refered != 0) /* /(?<n>..){0}/ */
- len = SIZE_OP_JUMP + tlen;
- else
- len = 0;
- }
- else if (qn->upper == 1 && qn->greedy) {
- if (qn->lower == 0) {
- if (CKN_ON) {
- len = SIZE_OP_STATE_CHECK_PUSH + tlen;
- }
- else {
- len = SIZE_OP_PUSH + tlen;
- }
- }
- else {
- len = tlen;
- }
- }
- else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
- len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
- }
- else {
- len = SIZE_OP_REPEAT_INC
- + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
- if (CKN_ON)
- len += SIZE_OP_STATE_CHECK;
- }
-
- return len;
-}
-
-static int
-compile_quantifier_node(QtfrNode* qn, regex_t* reg)
-{
- int r, mod_tlen;
- int ckn;
- int infinite = IS_REPEAT_INFINITE(qn->upper);
- int empty_info = qn->target_empty_info;
- int tlen = compile_length_tree(qn->target, reg);
-
- if (tlen < 0) return tlen;
-
- ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
-
- if (is_anychar_star_quantifier(qn)) {
- r = compile_tree_n_times(qn->target, qn->lower, reg);
- if (r) return r;
- if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
- if (IS_MULTILINE(reg->options))
- r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
- else
- r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
- if (r) return r;
- if (CKN_ON) {
- r = add_state_check_num(reg, ckn);
- if (r) return r;
- }
-
- return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
- }
- else {
- if (IS_MULTILINE(reg->options)) {
- r = add_opcode(reg, (CKN_ON ?
- OP_STATE_CHECK_ANYCHAR_ML_STAR
- : OP_ANYCHAR_ML_STAR));
- }
- else {
- r = add_opcode(reg, (CKN_ON ?
- OP_STATE_CHECK_ANYCHAR_STAR
- : OP_ANYCHAR_STAR));
- }
- if (r) return r;
- if (CKN_ON)
- r = add_state_check_num(reg, ckn);
-
- return r;
- }
- }
-
- if (empty_info != 0)
- mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
- else
- mod_tlen = tlen;
-
- if (infinite && qn->lower <= 1) {
- if (qn->greedy) {
- if (qn->lower == 1) {
- r = add_opcode_rel_addr(reg, OP_JUMP,
- (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
- if (r) return r;
- }
-
- if (CKN_ON) {
- r = add_opcode(reg, OP_STATE_CHECK_PUSH);
- if (r) return r;
- r = add_state_check_num(reg, ckn);
- if (r) return r;
- r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
- }
- else {
- r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
- }
- if (r) return r;
- r = compile_tree_empty_check(qn->target, reg, empty_info);
- if (r) return r;
- r = add_opcode_rel_addr(reg, OP_JUMP,
- -(mod_tlen + (int )SIZE_OP_JUMP
- + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
- }
- else {
- if (qn->lower == 0) {
- r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
- if (r) return r;
- }
- r = compile_tree_empty_check(qn->target, reg, empty_info);
- if (r) return r;
- if (CKN_ON) {
- r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
- if (r) return r;
- r = add_state_check_num(reg, ckn);
- if (r) return r;
- r = add_rel_addr(reg,
- -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
- }
- else
- r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
- }
- }
- else if (qn->upper == 0) {
- if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
- r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
- if (r) return r;
- r = compile_tree(qn->target, reg);
- }
- else
- r = 0;
- }
- else if (qn->upper == 1 && qn->greedy) {
- if (qn->lower == 0) {
- if (CKN_ON) {
- r = add_opcode(reg, OP_STATE_CHECK_PUSH);
- if (r) return r;
- r = add_state_check_num(reg, ckn);
- if (r) return r;
- r = add_rel_addr(reg, tlen);
- }
- else {
- r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
- }
- if (r) return r;
- }
-
- r = compile_tree(qn->target, reg);
- }
- else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
- if (CKN_ON) {
- r = add_opcode(reg, OP_STATE_CHECK_PUSH);
- if (r) return r;
- r = add_state_check_num(reg, ckn);
- if (r) return r;
- r = add_rel_addr(reg, SIZE_OP_JUMP);
- }
- else {
- r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
- }
-
- if (r) return r;
- r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
- if (r) return r;
- r = compile_tree(qn->target, reg);
- }
- else {
- r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
- if (CKN_ON) {
- if (r) return r;
- r = add_opcode(reg, OP_STATE_CHECK);
- if (r) return r;
- r = add_state_check_num(reg, ckn);
- }
- }
- return r;
-}
-
-#else /* USE_COMBINATION_EXPLOSION_CHECK */
-
-static int
-compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
-{
- int len, mod_tlen;
- int infinite = IS_REPEAT_INFINITE(qn->upper);
- int empty_info = qn->target_empty_info;
- int tlen = compile_length_tree(qn->target, reg);
-
- if (tlen < 0) return tlen;
-
- /* anychar repeat */
- if (NTYPE(qn->target) == NT_CANY) {
- if (qn->greedy && infinite) {
- if (IS_NOT_NULL(qn->next_head_exact))
- return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
- else
- return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
- }
- }
-
- if (empty_info != 0)
- mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
- else
- mod_tlen = tlen;
-
- if (infinite &&
- (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
- if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
- len = SIZE_OP_JUMP;
- }
- else {
- len = tlen * qn->lower;
- }
-
- if (qn->greedy) {
- if (IS_NOT_NULL(qn->head_exact))
- len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
- else if (IS_NOT_NULL(qn->next_head_exact))
- len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
- else
- len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
- }
- else
- len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
- }
- else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
- len = SIZE_OP_JUMP + tlen;
- }
- else if (!infinite && qn->greedy &&
- (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
- <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
- len = tlen * qn->lower;
- len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
- }
- else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
- len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
- }
- else {
- len = SIZE_OP_REPEAT_INC
- + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
- }
-
- return len;
-}
-
-static int
-compile_quantifier_node(QtfrNode* qn, regex_t* reg)
-{
- int i, r, mod_tlen;
- int infinite = IS_REPEAT_INFINITE(qn->upper);
- int empty_info = qn->target_empty_info;
- int tlen = compile_length_tree(qn->target, reg);
-
- if (tlen < 0) return tlen;
-
- if (is_anychar_star_quantifier(qn)) {
- r = compile_tree_n_times(qn->target, qn->lower, reg);
- if (r) return r;
- if (IS_NOT_NULL(qn->next_head_exact)) {
- if (IS_MULTILINE(reg->options))
- r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
- else
- r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
- if (r) return r;
- return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
- }
- else {
- if (IS_MULTILINE(reg->options))
- return add_opcode(reg, OP_ANYCHAR_ML_STAR);
- else
- return add_opcode(reg, OP_ANYCHAR_STAR);
- }
- }
-
- if (empty_info != 0)
- mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
- else
- mod_tlen = tlen;
-
- if (infinite &&
- (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
- if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
- if (qn->greedy) {
- if (IS_NOT_NULL(qn->head_exact))
- r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
- else if (IS_NOT_NULL(qn->next_head_exact))
- r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
- else
- r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
- }
- else {
- r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
- }
- if (r) return r;
- }
- else {
- r = compile_tree_n_times(qn->target, qn->lower, reg);
- if (r) return r;
- }
-
- if (qn->greedy) {
- if (IS_NOT_NULL(qn->head_exact)) {
- r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
- mod_tlen + SIZE_OP_JUMP);
- if (r) return r;
- add_bytes(reg, NSTR(qn->head_exact)->s, 1);
- r = compile_tree_empty_check(qn->target, reg, empty_info);
- if (r) return r;
- r = add_opcode_rel_addr(reg, OP_JUMP,
- -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
- }
- else if (IS_NOT_NULL(qn->next_head_exact)) {
- r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
- mod_tlen + SIZE_OP_JUMP);
- if (r) return r;
- add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
- r = compile_tree_empty_check(qn->target, reg, empty_info);
- if (r) return r;
- r = add_opcode_rel_addr(reg, OP_JUMP,
- -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
- }
- else {
- r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
- if (r) return r;
- r = compile_tree_empty_check(qn->target, reg, empty_info);
- if (r) return r;
- r = add_opcode_rel_addr(reg, OP_JUMP,
- -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
- }
- }
- else {
- r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
- if (r) return r;
- r = compile_tree_empty_check(qn->target, reg, empty_info);
- if (r) return r;
- r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
- }
- }
- else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
- r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
- if (r) return r;
- r = compile_tree(qn->target, reg);
- }
- else if (!infinite && qn->greedy &&
- (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
- <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
- int n = qn->upper - qn->lower;
-
- r = compile_tree_n_times(qn->target, qn->lower, reg);
- if (r) return r;
-
- for (i = 0; i < n; i++) {
- r = add_opcode_rel_addr(reg, OP_PUSH,
- (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
- if (r) return r;
- r = compile_tree(qn->target, reg);
- if (r) return r;
- }
- }
- else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
- r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
- if (r) return r;
- r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
- if (r) return r;
- r = compile_tree(qn->target, reg);
- }
- else {
- r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
- }
- return r;
-}
-#endif /* USE_COMBINATION_EXPLOSION_CHECK */
-
-static int
-compile_length_option_node(EncloseNode* node, regex_t* reg)
-{
- int tlen;
- OnigOptionType prev = reg->options;
-
- reg->options = node->option;
- tlen = compile_length_tree(node->target, reg);
- reg->options = prev;
-
- if (tlen < 0) return tlen;
-
- if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
- return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
- + tlen + SIZE_OP_SET_OPTION;
- }
- else
- return tlen;
-}
-
-static int
-compile_option_node(EncloseNode* node, regex_t* reg)
-{
- int r;
- OnigOptionType prev = reg->options;
-
- if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
- r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
- if (r) return r;
- r = add_opcode_option(reg, OP_SET_OPTION, prev);
- if (r) return r;
- r = add_opcode(reg, OP_FAIL);
- if (r) return r;
- }
-
- reg->options = node->option;
- r = compile_tree(node->target, reg);
- reg->options = prev;
-
- if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
- if (r) return r;
- r = add_opcode_option(reg, OP_SET_OPTION, prev);
- }
- return r;
-}
-
-static int
-compile_length_enclose_node(EncloseNode* node, regex_t* reg)
-{
- int len;
- int tlen;
-
- if (node->type == ENCLOSE_OPTION)
- return compile_length_option_node(node, reg);
-
- if (node->target) {
- tlen = compile_length_tree(node->target, reg);
- if (tlen < 0) return tlen;
- }
- else
- tlen = 0;
-
- switch (node->type) {
- case ENCLOSE_MEMORY:
-#ifdef USE_SUBEXP_CALL
- if (IS_ENCLOSE_CALLED(node)) {
- len = SIZE_OP_MEMORY_START_PUSH + tlen
- + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
- if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
- len += (IS_ENCLOSE_RECURSION(node)
- ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
- else
- len += (IS_ENCLOSE_RECURSION(node)
- ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
- }
- else
-#endif
- {
- if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
- len = SIZE_OP_MEMORY_START_PUSH;
- else
- len = SIZE_OP_MEMORY_START;
-
- len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
- ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
- }
- break;
-
- case ENCLOSE_STOP_BACKTRACK:
- if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
- QtfrNode* qn = NQTFR(node->target);
- tlen = compile_length_tree(qn->target, reg);
- if (tlen < 0) return tlen;
-
- len = tlen * qn->lower
- + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
- }
- else {
- len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
- }
- break;
-
- default:
- return ONIGERR_TYPE_BUG;
- break;
- }
-
- return len;
-}
-
-static int get_char_length_tree(Node* node, regex_t* reg, int* len);
-
-static int
-compile_enclose_node(EncloseNode* node, regex_t* reg)
-{
- int r, len;
-
- if (node->type == ENCLOSE_OPTION)
- return compile_option_node(node, reg);
-
- switch (node->type) {
- case ENCLOSE_MEMORY:
-#ifdef USE_SUBEXP_CALL
- if (IS_ENCLOSE_CALLED(node)) {
- r = add_opcode(reg, OP_CALL);
- if (r) return r;
- node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
- node->state |= NST_ADDR_FIXED;
- r = add_abs_addr(reg, (int )node->call_addr);
- if (r) return r;
- len = compile_length_tree(node->target, reg);
- len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
- if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
- len += (IS_ENCLOSE_RECURSION(node)
- ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
- else
- len += (IS_ENCLOSE_RECURSION(node)
- ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
-
- r = add_opcode_rel_addr(reg, OP_JUMP, len);
- if (r) return r;
- }
-#endif
- if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
- r = add_opcode(reg, OP_MEMORY_START_PUSH);
- else
- r = add_opcode(reg, OP_MEMORY_START);
- if (r) return r;
- r = add_mem_num(reg, node->regnum);
- if (r) return r;
- r = compile_tree(node->target, reg);
- if (r) return r;
-#ifdef USE_SUBEXP_CALL
- if (IS_ENCLOSE_CALLED(node)) {
- if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
- r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
- ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
- else
- r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
- ? OP_MEMORY_END_REC : OP_MEMORY_END));
-
- if (r) return r;
- r = add_mem_num(reg, node->regnum);
- if (r) return r;
- r = add_opcode(reg, OP_RETURN);
- }
- else
-#endif
- {
- if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
- r = add_opcode(reg, OP_MEMORY_END_PUSH);
- else
- r = add_opcode(reg, OP_MEMORY_END);
- if (r) return r;
- r = add_mem_num(reg, node->regnum);
- }
- break;
-
- case ENCLOSE_STOP_BACKTRACK:
- if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
- QtfrNode* qn = NQTFR(node->target);
- r = compile_tree_n_times(qn->target, qn->lower, reg);
- if (r) return r;
-
- len = compile_length_tree(qn->target, reg);
- if (len < 0) return len;
-
- r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
- if (r) return r;
- r = compile_tree(qn->target, reg);
- if (r) return r;
- r = add_opcode(reg, OP_POP);
- if (r) return r;
- r = add_opcode_rel_addr(reg, OP_JUMP,
- -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
- }
- else {
- r = add_opcode(reg, OP_PUSH_STOP_BT);
- if (r) return r;
- r = compile_tree(node->target, reg);
- if (r) return r;
- r = add_opcode(reg, OP_POP_STOP_BT);
- }
- break;
-
- default:
- return ONIGERR_TYPE_BUG;
- break;
- }
-
- return r;
-}
-
-static int
-compile_length_anchor_node(AnchorNode* node, regex_t* reg)
-{
- int len;
- int tlen = 0;
-
- if (node->target) {
- tlen = compile_length_tree(node->target, reg);
- if (tlen < 0) return tlen;
- }
-
- switch (node->type) {
- case ANCHOR_PREC_READ:
- len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
- break;
- case ANCHOR_PREC_READ_NOT:
- len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
- break;
- case ANCHOR_LOOK_BEHIND:
- len = SIZE_OP_LOOK_BEHIND + tlen;
- break;
- case ANCHOR_LOOK_BEHIND_NOT:
- len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
- break;
-
- default:
- len = SIZE_OPCODE;
- break;
- }
-
- return len;
-}
-
-static int
-compile_anchor_node(AnchorNode* node, regex_t* reg)
-{
- int r, len;
-
- switch (node->type) {
- case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
- case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
- case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
- case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
- case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
- case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
-
- case ANCHOR_WORD_BOUND: r = add_opcode(reg, OP_WORD_BOUND); break;
- case ANCHOR_NOT_WORD_BOUND: r = add_opcode(reg, OP_NOT_WORD_BOUND); break;
-#ifdef USE_WORD_BEGIN_END
- case ANCHOR_WORD_BEGIN: r = add_opcode(reg, OP_WORD_BEGIN); break;
- case ANCHOR_WORD_END: r = add_opcode(reg, OP_WORD_END); break;
-#endif
-
- case ANCHOR_PREC_READ:
- r = add_opcode(reg, OP_PUSH_POS);
- if (r) return r;
- r = compile_tree(node->target, reg);
- if (r) return r;
- r = add_opcode(reg, OP_POP_POS);
- break;
-
- case ANCHOR_PREC_READ_NOT:
- len = compile_length_tree(node->target, reg);
- if (len < 0) return len;
- r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
- if (r) return r;
- r = compile_tree(node->target, reg);
- if (r) return r;
- r = add_opcode(reg, OP_FAIL_POS);
- break;
-
- case ANCHOR_LOOK_BEHIND:
- {
- int n;
- r = add_opcode(reg, OP_LOOK_BEHIND);
- if (r) return r;
- if (node->char_len < 0) {
- r = get_char_length_tree(node->target, reg, &n);
- if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
- }
- else
- n = node->char_len;
- r = add_length(reg, n);
- if (r) return r;
- r = compile_tree(node->target, reg);
- }
- break;
-
- case ANCHOR_LOOK_BEHIND_NOT:
- {
- int n;
- len = compile_length_tree(node->target, reg);
- r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
- len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
- if (r) return r;
- if (node->char_len < 0) {
- r = get_char_length_tree(node->target, reg, &n);
- if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
- }
- else
- n = node->char_len;
- r = add_length(reg, n);
- if (r) return r;
- r = compile_tree(node->target, reg);
- if (r) return r;
- r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
- }
- break;
-
- default:
- return ONIGERR_TYPE_BUG;
- break;
- }
-
- return r;
-}
-
-static int
-compile_length_tree(Node* node, regex_t* reg)
-{
- int len, type, r;
-
- type = NTYPE(node);
- switch (type) {
- case NT_LIST:
- len = 0;
- do {
- r = compile_length_tree(NCAR(node), reg);
- if (r < 0) return r;
- len += r;
- } while (IS_NOT_NULL(node = NCDR(node)));
- r = len;
- break;
-
- case NT_ALT:
- {
- int n;
-
- n = r = 0;
- do {
- r += compile_length_tree(NCAR(node), reg);
- n++;
- } while (IS_NOT_NULL(node = NCDR(node)));
- r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
- }
- break;
-
- case NT_STR:
- if (NSTRING_IS_RAW(node))
- r = compile_length_string_raw_node(NSTR(node), reg);
- else
- r = compile_length_string_node(node, reg);
- break;
-
- case NT_CCLASS:
- r = compile_length_cclass_node(NCCLASS(node), reg);
- break;
-
- case NT_CTYPE:
- case NT_CANY:
- r = SIZE_OPCODE;
- break;
-
- case NT_BREF:
- {
- BRefNode* br = NBREF(node);
-
-#ifdef USE_BACKREF_WITH_LEVEL
- if (IS_BACKREF_NEST_LEVEL(br)) {
- r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
- SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
- }
- else
-#endif
- if (br->back_num == 1) {
- r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
- ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
- }
- else {
- r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
- }
- }
- break;
-
-#ifdef USE_SUBEXP_CALL
- case NT_CALL:
- r = SIZE_OP_CALL;
- break;
-#endif
-
- case NT_QTFR:
- r = compile_length_quantifier_node(NQTFR(node), reg);
- break;
-
- case NT_ENCLOSE:
- r = compile_length_enclose_node(NENCLOSE(node), reg);
- break;
-
- case NT_ANCHOR:
- r = compile_length_anchor_node(NANCHOR(node), reg);
- break;
-
- default:
- return ONIGERR_TYPE_BUG;
- break;
- }
-
- return r;
-}
-
-static int
-compile_tree(Node* node, regex_t* reg)
-{
- int n, type, len, pos, r = 0;
-
- type = NTYPE(node);
- switch (type) {
- case NT_LIST:
- do {
- r = compile_tree(NCAR(node), reg);
- } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
- break;
-
- case NT_ALT:
- {
- Node* x = node;
- len = 0;
- do {
- len += compile_length_tree(NCAR(x), reg);
- if (NCDR(x) != NULL) {
- len += SIZE_OP_PUSH + SIZE_OP_JUMP;
- }
- } while (IS_NOT_NULL(x = NCDR(x)));
- pos = reg->used + len; /* goal position */
-
- do {
- len = compile_length_tree(NCAR(node), reg);
- if (IS_NOT_NULL(NCDR(node))) {
- r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
- if (r) break;
- }
- r = compile_tree(NCAR(node), reg);
- if (r) break;
- if (IS_NOT_NULL(NCDR(node))) {
- len = pos - (reg->used + SIZE_OP_JUMP);
- r = add_opcode_rel_addr(reg, OP_JUMP, len);
- if (r) break;
- }
- } while (IS_NOT_NULL(node = NCDR(node)));
- }
- break;
-
- case NT_STR:
- if (NSTRING_IS_RAW(node))
- r = compile_string_raw_node(NSTR(node), reg);
- else
- r = compile_string_node(node, reg);
- break;
-
- case NT_CCLASS:
- r = compile_cclass_node(NCCLASS(node), reg);
- break;
-
- case NT_CTYPE:
- {
- int op;
-
- switch (NCTYPE(node)->ctype) {
- case ONIGENC_CTYPE_WORD:
- if (NCTYPE(node)->is_not != 0) op = OP_NOT_WORD;
- else op = OP_WORD;
- break;
- default:
- return ONIGERR_TYPE_BUG;
- break;
- }
- r = add_opcode(reg, op);
- }
- break;
-
- case NT_CANY:
- if (IS_MULTILINE(reg->options))
- r = add_opcode(reg, OP_ANYCHAR_ML);
- else
- r = add_opcode(reg, OP_ANYCHAR);
- break;
-
- case NT_BREF:
- {
- BRefNode* br = NBREF(node);
-
-#ifdef USE_BACKREF_WITH_LEVEL
- if (IS_BACKREF_NEST_LEVEL(br)) {
- r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
- if (r) return r;
- r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
- if (r) return r;
- r = add_length(reg, br->nest_level);
- if (r) return r;
-
- goto add_bacref_mems;
- }
- else
-#endif
- if (br->back_num == 1) {
- n = br->back_static[0];
- if (IS_IGNORECASE(reg->options)) {
- r = add_opcode(reg, OP_BACKREFN_IC);
- if (r) return r;
- r = add_mem_num(reg, n);
- }
- else {
- switch (n) {
- case 1: r = add_opcode(reg, OP_BACKREF1); break;
- case 2: r = add_opcode(reg, OP_BACKREF2); break;
- default:
- r = add_opcode(reg, OP_BACKREFN);
- if (r) return r;
- r = add_mem_num(reg, n);
- break;
- }
- }
- }
- else {
- int i;
- int* p;
-
- if (IS_IGNORECASE(reg->options)) {
- r = add_opcode(reg, OP_BACKREF_MULTI_IC);
- }
- else {
- r = add_opcode(reg, OP_BACKREF_MULTI);
- }
- if (r) return r;
-
-#ifdef USE_BACKREF_WITH_LEVEL
- add_bacref_mems:
-#endif
- r = add_length(reg, br->back_num);
- if (r) return r;
- p = BACKREFS_P(br);
- for (i = br->back_num - 1; i >= 0; i--) {
- r = add_mem_num(reg, p[i]);
- if (r) return r;
- }
- }
- }
- break;
-
-#ifdef USE_SUBEXP_CALL
- case NT_CALL:
- r = compile_call(NCALL(node), reg);
- break;
-#endif
-
- case NT_QTFR:
- r = compile_quantifier_node(NQTFR(node), reg);
- break;
-
- case NT_ENCLOSE:
- r = compile_enclose_node(NENCLOSE(node), reg);
- break;
-
- case NT_ANCHOR:
- r = compile_anchor_node(NANCHOR(node), reg);
- break;
-
- default:
-#ifdef ONIG_DEBUG
- fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
-#endif
- break;
- }
-
- return r;
-}
-
-#ifdef USE_NAMED_GROUP
-
-static int
-noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
-{
- int r = 0;
- Node* node = *plink;
-
- switch (NTYPE(node)) {
- case NT_LIST:
- case NT_ALT:
- do {
- r = noname_disable_map(&(NCAR(node)), map, counter);
- } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
- break;
-
- case NT_QTFR:
- {
- Node** ptarget = &(NQTFR(node)->target);
- Node* old = *ptarget;
- r = noname_disable_map(ptarget, map, counter);
- if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
- onig_reduce_nested_quantifier(node, *ptarget);
- }
- }
- break;
-
- case NT_ENCLOSE:
- {
- EncloseNode* en = NENCLOSE(node);
- if (en->type == ENCLOSE_MEMORY) {
- if (IS_ENCLOSE_NAMED_GROUP(en)) {
- (*counter)++;
- map[en->regnum].new_val = *counter;
- en->regnum = *counter;
- r = noname_disable_map(&(en->target), map, counter);
- }
- else {
- *plink = en->target;
- en->target = NULL_NODE;
- onig_node_free(node);
- r = noname_disable_map(plink, map, counter);
- }
- }
- else
- r = noname_disable_map(&(en->target), map, counter);
- }
- break;
-
- case NT_ANCHOR:
- {
- AnchorNode* an = NANCHOR(node);
- switch (an->type) {
- case ANCHOR_PREC_READ:
- case ANCHOR_PREC_READ_NOT:
- case ANCHOR_LOOK_BEHIND:
- case ANCHOR_LOOK_BEHIND_NOT:
- r = noname_disable_map(&(an->target), map, counter);
- break;
- }
- }
- break;
-
- default:
- break;
- }
-
- return r;
-}
-
-static int
-renumber_node_backref(Node* node, GroupNumRemap* map)
-{
- int i, pos, n, old_num;
- int *backs;
- BRefNode* bn = NBREF(node);
-
- if (! IS_BACKREF_NAME_REF(bn))
- return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
-
- old_num = bn->back_num;
- if (IS_NULL(bn->back_dynamic))
- backs = bn->back_static;
- else
- backs = bn->back_dynamic;
-
- for (i = 0, pos = 0; i < old_num; i++) {
- n = map[backs[i]].new_val;
- if (n > 0) {
- backs[pos] = n;
- pos++;
- }
- }
-
- bn->back_num = pos;
- return 0;
-}
-
-static int
-renumber_by_map(Node* node, GroupNumRemap* map)
-{
- int r = 0;
-
- switch (NTYPE(node)) {
- case NT_LIST:
- case NT_ALT:
- do {
- r = renumber_by_map(NCAR(node), map);
- } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
- break;
- case NT_QTFR:
- r = renumber_by_map(NQTFR(node)->target, map);
- break;
- case NT_ENCLOSE:
- r = renumber_by_map(NENCLOSE(node)->target, map);
- break;
-
- case NT_BREF:
- r = renumber_node_backref(node, map);
- break;
-
- case NT_ANCHOR:
- {
- AnchorNode* an = NANCHOR(node);
- switch (an->type) {
- case ANCHOR_PREC_READ:
- case ANCHOR_PREC_READ_NOT:
- case ANCHOR_LOOK_BEHIND:
- case ANCHOR_LOOK_BEHIND_NOT:
- r = renumber_by_map(an->target, map);
- break;
- }
- }
- break;
-
- default:
- break;
- }
-
- return r;
-}
-
-static int
-numbered_ref_check(Node* node)
-{
- int r = 0;
-
- switch (NTYPE(node)) {
- case NT_LIST:
- case NT_ALT:
- do {
- r = numbered_ref_check(NCAR(node));
- } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
- break;
- case NT_QTFR:
- r = numbered_ref_check(NQTFR(node)->target);
- break;
- case NT_ENCLOSE:
- r = numbered_ref_check(NENCLOSE(node)->target);
- break;
-
- case NT_BREF:
- if (! IS_BACKREF_NAME_REF(NBREF(node)))
- return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
- break;
-
- default:
- break;
- }
-
- return r;
-}
-
-static int
-disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
-{
- int r, i, pos, counter;
- BitStatusType loc;
- GroupNumRemap* map;
-
- map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
- CHECK_NULL_RETURN_MEMERR(map);
- for (i = 1; i <= env->num_mem; i++) {
- map[i].new_val = 0;
- }
- counter = 0;
- r = noname_disable_map(root, map, &counter);
- if (r != 0) return r;
-
- r = renumber_by_map(*root, map);
- if (r != 0) return r;
-
- for (i = 1, pos = 1; i <= env->num_mem; i++) {
- if (map[i].new_val > 0) {
- SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
- pos++;
- }
- }
-
- loc = env->capture_history;
- BIT_STATUS_CLEAR(env->capture_history);
- for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
- if (BIT_STATUS_AT(loc, i)) {
- BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
- }
- }
-
- env->num_mem = env->num_named;
- reg->num_mem = env->num_named;
-
- return onig_renumber_name_table(reg, map);
-}
-#endif /* USE_NAMED_GROUP */
-
-#ifdef USE_SUBEXP_CALL
-static int
-unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
-{
- int i, offset;
- EncloseNode* en;
- AbsAddrType addr;
-
- for (i = 0; i < uslist->num; i++) {
- en = NENCLOSE(uslist->us[i].target);
- if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
- addr = en->call_addr;
- offset = uslist->us[i].offset;
-
- BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
- }
- return 0;
-}
-#endif
-
-#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
-static int
-quantifiers_memory_node_info(Node* node)
-{
- int r = 0;
-
- switch (NTYPE(node)) {
- case NT_LIST:
- case NT_ALT:
- {
- int v;
- do {
- v = quantifiers_memory_node_info(NCAR(node));
- if (v > r) r = v;
- } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
- }
- break;
-
-#ifdef USE_SUBEXP_CALL
- case NT_CALL:
- if (IS_CALL_RECURSION(NCALL(node))) {
- return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
- }
- else
- r = quantifiers_memory_node_info(NCALL(node)->target);
- break;
-#endif
-
- case NT_QTFR:
- {
- QtfrNode* qn = NQTFR(node);
- if (qn->upper != 0) {
- r = quantifiers_memory_node_info(qn->target);
- }
- }
- break;
-
- case NT_ENCLOSE:
- {
- EncloseNode* en = NENCLOSE(node);
- switch (en->type) {
- case ENCLOSE_MEMORY:
- return NQ_TARGET_IS_EMPTY_MEM;
- break;
-
- case ENCLOSE_OPTION:
- case ENCLOSE_STOP_BACKTRACK:
- r = quantifiers_memory_node_info(en->target);
- break;
- default:
- break;
- }
- }
- break;
-
- case NT_BREF:
- case NT_STR:
- case NT_CTYPE:
- case NT_CCLASS:
- case NT_CANY:
- case NT_ANCHOR:
- default:
- break;
- }
-
- return r;
-}
-#endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
-
-static int
-get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
-{
- OnigDistance tmin;
- int r = 0;
-
- *min = 0;
- switch (NTYPE(node)) {
- case NT_BREF:
- {
- int i;
- int* backs;
- Node** nodes = SCANENV_MEM_NODES(env);
- BRefNode* br = NBREF(node);
- if (br->state & NST_RECURSION) break;
-
- backs = BACKREFS_P(br);
- if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF;
- r = get_min_match_length(nodes[backs[0]], min, env);
- if (r != 0) break;
- for (i = 1; i < br->back_num; i++) {
- if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
- r = get_min_match_length(nodes[backs[i]], &tmin, env);
- if (r != 0) break;
- if (*min > tmin) *min = tmin;
- }
- }
- break;
-
-#ifdef USE_SUBEXP_CALL
- case NT_CALL:
- if (IS_CALL_RECURSION(NCALL(node))) {
- EncloseNode* en = NENCLOSE(NCALL(node)->target);
- if (IS_ENCLOSE_MIN_FIXED(en))
- *min = en->min_len;
- }
- else
- r = get_min_match_length(NCALL(node)->target, min, env);
- break;
-#endif
-
- case NT_LIST:
- do {
- r = get_min_match_length(NCAR(node), &tmin, env);
- if (r == 0) *min += tmin;
- } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
- break;
-
- case NT_ALT:
- {
- Node *x, *y;
- y = node;
- do {
- x = NCAR(y);
- r = get_min_match_length(x, &tmin, env);
- if (r != 0) break;
- if (y == node) *min = tmin;
- else if (*min > tmin) *min = tmin;
- } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
- }
- break;
-
- case NT_STR:
- {
- StrNode* sn = NSTR(node);
- *min = sn->end - sn->s;
- }
- break;
-
- case NT_CTYPE:
- *min = 1;
- break;
-
- case NT_CCLASS:
- case NT_CANY:
- *min = 1;
- break;
-
- case NT_QTFR:
- {
- QtfrNode* qn = NQTFR(node);
-
- if (qn->lower > 0) {
- r = get_min_match_length(qn->target, min, env);
- if (r == 0)
- *min = distance_multiply(*min, qn->lower);
- }
- }
- break;
-
- case NT_ENCLOSE:
- {
- EncloseNode* en = NENCLOSE(node);
- switch (en->type) {
- case ENCLOSE_MEMORY:
-#ifdef USE_SUBEXP_CALL
- if (IS_ENCLOSE_MIN_FIXED(en))
- *min = en->min_len;
- else {
- r = get_min_match_length(en->target, min, env);
- if (r == 0) {
- en->min_len = *min;
- SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
- }
- }
- break;
-#endif
- case ENCLOSE_OPTION:
- case ENCLOSE_STOP_BACKTRACK:
- r = get_min_match_length(en->target, min, env);
- break;
- }
- }
- break;
-
- case NT_ANCHOR:
- default:
- break;
- }
-
- return r;
-}
-
-static int
-get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
-{
- OnigDistance tmax;
- int r = 0;
-
- *max = 0;
- switch (NTYPE(node)) {
- case NT_LIST:
- do {
- r = get_max_match_length(NCAR(node), &tmax, env);
- if (r == 0)
- *max = distance_add(*max, tmax);
- } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
- break;
-
- case NT_ALT:
- do {
- r = get_max_match_length(NCAR(node), &tmax, env);
- if (r == 0 && *max < tmax) *max = tmax;
- } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
- break;
-
- case NT_STR:
- {
- StrNode* sn = NSTR(node);
- *max = sn->end - sn->s;
- }
- break;
-
- case NT_CTYPE:
- *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
- break;
-
- case NT_CCLASS:
- case NT_CANY:
- *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
- break;
-
- case NT_BREF:
- {
- int i;
- int* backs;
- Node** nodes = SCANENV_MEM_NODES(env);
- BRefNode* br = NBREF(node);
- if (br->state & NST_RECURSION) {
- *max = ONIG_INFINITE_DISTANCE;
- break;
- }
- backs = BACKREFS_P(br);
- for (i = 0; i < br->back_num; i++) {
- if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
- r = get_max_match_length(nodes[backs[i]], &tmax, env);
- if (r != 0) break;
- if (*max < tmax) *max = tmax;
- }
- }
- break;
-
-#ifdef USE_SUBEXP_CALL
- case NT_CALL:
- if (! IS_CALL_RECURSION(NCALL(node)))
- r = get_max_match_length(NCALL(node)->target, max, env);
- else
- *max = ONIG_INFINITE_DISTANCE;
- break;
-#endif
-
- case NT_QTFR:
- {
- QtfrNode* qn = NQTFR(node);
-
- if (qn->upper != 0) {
- r = get_max_match_length(qn->target, max, env);
- if (r == 0 && *max != 0) {
- if (! IS_REPEAT_INFINITE(qn->upper))
- *max = distance_multiply(*max, qn->upper);
- else
- *max = ONIG_INFINITE_DISTANCE;
- }
- }
- }
- break;
-
- case NT_ENCLOSE:
- {
- EncloseNode* en = NENCLOSE(node);
- switch (en->type) {
- case ENCLOSE_MEMORY:
-#ifdef USE_SUBEXP_CALL
- if (IS_ENCLOSE_MAX_FIXED(en))
- *max = en->max_len;
- else {
- r = get_max_match_length(en->target, max, env);
- if (r == 0) {
- en->max_len = *max;
- SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
- }
- }
- break;
-#endif
- case ENCLOSE_OPTION:
- case ENCLOSE_STOP_BACKTRACK:
- r = get_max_match_length(en->target, max, env);
- break;
- }
- }
- break;
-
- case NT_ANCHOR:
- default:
- break;
- }
-
- return r;
-}
-
-#define GET_CHAR_LEN_VARLEN -1
-#define GET_CHAR_LEN_TOP_ALT_VARLEN -2
-
-/* fixed size pattern node only */
-static int
-get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
-{
- int tlen;
- int r = 0;
-
- level++;
- *len = 0;
- switch (NTYPE(node)) {
- case NT_LIST:
- do {
- r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
- if (r == 0)
- *len = distance_add(*len, tlen);
- } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
- break;
-
- case NT_ALT:
- {
- int tlen2;
- int varlen = 0;
-
- r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
- while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
- r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
- if (r == 0) {
- if (tlen != tlen2)
- varlen = 1;
- }
- }
- if (r == 0) {
- if (varlen != 0) {
- if (level == 1)
- r = GET_CHAR_LEN_TOP_ALT_VARLEN;
- else
- r = GET_CHAR_LEN_VARLEN;
- }
- else
- *len = tlen;
- }
- }
- break;
-
- case NT_STR:
- {
- StrNode* sn = NSTR(node);
- UChar *s = sn->s;
- while (s < sn->end) {
- s += enclen(reg->enc, s, sn->end);
- (*len)++;
- }
- }
- break;
-
- case NT_QTFR:
- {
- QtfrNode* qn = NQTFR(node);
- if (qn->lower == qn->upper) {
- r = get_char_length_tree1(qn->target, reg, &tlen, level);
- if (r == 0)
- *len = distance_multiply(tlen, qn->lower);
- }
- else
- r = GET_CHAR_LEN_VARLEN;
- }
- break;
-
-#ifdef USE_SUBEXP_CALL
- case NT_CALL:
- if (! IS_CALL_RECURSION(NCALL(node)))
- r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
- else
- r = GET_CHAR_LEN_VARLEN;
- break;
-#endif
-
- case NT_CTYPE:
- *len = 1;
- break;
-
- case NT_CCLASS:
- case NT_CANY:
- *len = 1;
- break;
-
- case NT_ENCLOSE:
- {
- EncloseNode* en = NENCLOSE(node);
- switch (en->type) {
- case ENCLOSE_MEMORY:
-#ifdef USE_SUBEXP_CALL
- if (IS_ENCLOSE_CLEN_FIXED(en))
- *len = en->char_len;
- else {
- r = get_char_length_tree1(en->target, reg, len, level);
- if (r == 0) {
- en->char_len = *len;
- SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
- }
- }
- break;
-#endif
- case ENCLOSE_OPTION:
- case ENCLOSE_STOP_BACKTRACK:
- r = get_char_length_tree1(en->target, reg, len, level);
- break;
- default:
- break;
- }
- }
- break;
-
- case NT_ANCHOR:
- break;
-
- default:
- r = GET_CHAR_LEN_VARLEN;
- break;
- }
-
- return r;
-}
-
-static int
-get_char_length_tree(Node* node, regex_t* reg, int* len)
-{
- return get_char_length_tree1(node, reg, len, 0);
-}
-
-/* x is not included y ==> 1 : 0 */
-static int
-is_not_included(Node* x, Node* y, regex_t* reg)
-{
- int i, len;
- OnigCodePoint code;
- UChar *p, c;
- int ytype;
-
- retry:
- ytype = NTYPE(y);
- switch (NTYPE(x)) {
- case NT_CTYPE:
- {
- switch (ytype) {
- case NT_CTYPE:
- if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
- NCTYPE(y)->is_not != NCTYPE(x)->is_not)
- return 1;
- else
- return 0;
- break;
-
- case NT_CCLASS:
- swap:
- {
- Node* tmp;
- tmp = x; x = y; y = tmp;
- goto retry;
- }
- break;
-
- case NT_STR:
- goto swap;
- break;
-
- default:
- break;
- }
- }
- break;
-
- case NT_CCLASS:
- {
- CClassNode* xc = NCCLASS(x);
- switch (ytype) {
- case NT_CTYPE:
- switch (NCTYPE(y)->ctype) {
- case ONIGENC_CTYPE_WORD:
- if (NCTYPE(y)->is_not == 0) {
- if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- if (BITSET_AT(xc->bs, i)) {
- if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
- }
- }
- return 1;
- }
- return 0;
- }
- else {
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- if (! IS_CODE_SB_WORD(reg->enc, i)) {
- if (!IS_NCCLASS_NOT(xc)) {
- if (BITSET_AT(xc->bs, i))
- return 0;
- }
- else {
- if (! BITSET_AT(xc->bs, i))
- return 0;
- }
- }
- }
- return 1;
- }
- break;
-
- default:
- break;
- }
- break;
-
- case NT_CCLASS:
- {
- int v;
- CClassNode* yc = NCCLASS(y);
-
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- v = BITSET_AT(xc->bs, i);
- if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
- (v == 0 && IS_NCCLASS_NOT(xc))) {
- v = BITSET_AT(yc->bs, i);
- if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
- (v == 0 && IS_NCCLASS_NOT(yc)))
- return 0;
- }
- }
- if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
- (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
- return 1;
- return 0;
- }
- break;
-
- case NT_STR:
- goto swap;
- break;
-
- default:
- break;
- }
- }
- break;
-
- case NT_STR:
- {
- StrNode* xs = NSTR(x);
- if (NSTRING_LEN(x) == 0)
- break;
-
- c = *(xs->s);
- switch (ytype) {
- case NT_CTYPE:
- switch (NCTYPE(y)->ctype) {
- case ONIGENC_CTYPE_WORD:
- if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
- return NCTYPE(y)->is_not;
- else
- return !(NCTYPE(y)->is_not);
- break;
- default:
- break;
- }
- break;
-
- case NT_CCLASS:
- {
- CClassNode* cc = NCCLASS(y);
-
- code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
- xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
- return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
- }
- break;
-
- case NT_STR:
- {
- UChar *q;
- StrNode* ys = NSTR(y);
- len = NSTRING_LEN(x);
- if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
- if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
- /* tiny version */
- return 0;
- }
- else {
- for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) {
- if (*p != *q) return 1;
- }
- }
- }
- break;
-
- default:
- break;
- }
- }
- break;
-
- default:
- break;
- }
-
- return 0;
-}
-
-static Node*
-get_head_value_node(Node* node, int exact, regex_t* reg)
-{
- Node* n = NULL_NODE;
-
- switch (NTYPE(node)) {
- case NT_BREF:
- case NT_ALT:
- case NT_CANY:
-#ifdef USE_SUBEXP_CALL
- case NT_CALL:
-#endif
- break;
-
- case NT_CTYPE:
- case NT_CCLASS:
- if (exact == 0) {
- n = node;
- }
- break;
-
- case NT_LIST:
- n = get_head_value_node(NCAR(node), exact, reg);
- break;
-
- case NT_STR:
- {
- StrNode* sn = NSTR(node);
-
- if (sn->end <= sn->s)
- break;
-
- if (exact != 0 &&
- !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
- }
- else {
- n = node;
- }
- }
- break;
-
- case NT_QTFR:
- {
- QtfrNode* qn = NQTFR(node);
- if (qn->lower > 0) {
- if (IS_NOT_NULL(qn->head_exact))
- n = qn->head_exact;
- else
- n = get_head_value_node(qn->target, exact, reg);
- }
- }
- break;
-
- case NT_ENCLOSE:
- {
- EncloseNode* en = NENCLOSE(node);
- switch (en->type) {
- case ENCLOSE_OPTION:
- {
- OnigOptionType options = reg->options;
-
- reg->options = NENCLOSE(node)->option;
- n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
- reg->options = options;
- }
- break;
-
- case ENCLOSE_MEMORY:
- case ENCLOSE_STOP_BACKTRACK:
- n = get_head_value_node(en->target, exact, reg);
- break;
- }
- }
- break;
-
- case NT_ANCHOR:
- if (NANCHOR(node)->type == ANCHOR_PREC_READ)
- n = get_head_value_node(NANCHOR(node)->target, exact, reg);
- break;
-
- default:
- break;
- }
-
- return n;
-}
-
-static int
-check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
-{
- int type, r = 0;
-
- type = NTYPE(node);
- if ((NTYPE2BIT(type) & type_mask) == 0)
- return 1;
-
- switch (type) {
- case NT_LIST:
- case NT_ALT:
- do {
- r = check_type_tree(NCAR(node), type_mask, enclose_mask,
- anchor_mask);
- } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
- break;
-
- case NT_QTFR:
- r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
- anchor_mask);
- break;
-
- case NT_ENCLOSE:
- {
- EncloseNode* en = NENCLOSE(node);
- if ((en->type & enclose_mask) == 0)
- return 1;
-
- r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
- }
- break;
-
- case NT_ANCHOR:
- type = NANCHOR(node)->type;
- if ((type & anchor_mask) == 0)
- return 1;
-
- if (NANCHOR(node)->target)
- r = check_type_tree(NANCHOR(node)->target,
- type_mask, enclose_mask, anchor_mask);
- break;
-
- default:
- break;
- }
- return r;
-}
-
-#ifdef USE_SUBEXP_CALL
-
-#define RECURSION_EXIST 1
-#define RECURSION_INFINITE 2
-
-static int
-subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
-{
- int type;
- int r = 0;
-
- type = NTYPE(node);
- switch (type) {
- case NT_LIST:
- {
- Node *x;
- OnigDistance min;
- int ret;
-
- x = node;
- do {
- ret = subexp_inf_recursive_check(NCAR(x), env, head);
- if (ret < 0 || ret == RECURSION_INFINITE) return ret;
- r |= ret;
- if (head) {
- ret = get_min_match_length(NCAR(x), &min, env);
- if (ret != 0) return ret;
- if (min != 0) head = 0;
- }
- } while (IS_NOT_NULL(x = NCDR(x)));
- }
- break;
-
- case NT_ALT:
- {
- int ret;
- r = RECURSION_EXIST;
- do {
- ret = subexp_inf_recursive_check(NCAR(node), env, head);
- if (ret < 0 || ret == RECURSION_INFINITE) return ret;
- r &= ret;
- } while (IS_NOT_NULL(node = NCDR(node)));
- }
- break;
-
- case NT_QTFR:
- r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
- if (r == RECURSION_EXIST) {
- if (NQTFR(node)->lower == 0) r = 0;
- }
- break;
-
- case NT_ANCHOR:
- {
- AnchorNode* an = NANCHOR(node);
- switch (an->type) {
- case ANCHOR_PREC_READ:
- case ANCHOR_PREC_READ_NOT:
- case ANCHOR_LOOK_BEHIND:
- case ANCHOR_LOOK_BEHIND_NOT:
- r = subexp_inf_recursive_check(an->target, env, head);
- break;
- }
- }
- break;
-
- case NT_CALL:
- r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
- break;
-
- case NT_ENCLOSE:
- if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
- return 0;
- else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
- return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
- else {
- SET_ENCLOSE_STATUS(node, NST_MARK2);
- r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
- CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
- }
- break;
-
- default:
- break;
- }
-
- return r;
-}
-
-static int
-subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
-{
- int type;
- int r = 0;
-
- type = NTYPE(node);
- switch (type) {
- case NT_LIST:
- case NT_ALT:
- do {
- r = subexp_inf_recursive_check_trav(NCAR(node), env);
- } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
- break;
-
- case NT_QTFR:
- r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
- break;
-
- case NT_ANCHOR:
- {
- AnchorNode* an = NANCHOR(node);
- switch (an->type) {
- case ANCHOR_PREC_READ:
- case ANCHOR_PREC_READ_NOT:
- case ANCHOR_LOOK_BEHIND:
- case ANCHOR_LOOK_BEHIND_NOT:
- r = subexp_inf_recursive_check_trav(an->target, env);
- break;
- }
- }
- break;
-
- case NT_ENCLOSE:
- {
- EncloseNode* en = NENCLOSE(node);
-
- if (IS_ENCLOSE_RECURSION(en)) {
- SET_ENCLOSE_STATUS(node, NST_MARK1);
- r = subexp_inf_recursive_check(en->target, env, 1);
- if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
- CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
- }
- r = subexp_inf_recursive_check_trav(en->target, env);
- }
-
- break;
-
- default:
- break;
- }
-
- return r;
-}
-
-static int
-subexp_recursive_check(Node* node)
-{
- int r = 0;
-
- switch (NTYPE(node)) {
- case NT_LIST:
- case NT_ALT:
- do {
- r |= subexp_recursive_check(NCAR(node));
- } while (IS_NOT_NULL(node = NCDR(node)));
- break;
-
- case NT_QTFR:
- r = subexp_recursive_check(NQTFR(node)->target);
- break;
-
- case NT_ANCHOR:
- {
- AnchorNode* an = NANCHOR(node);
- switch (an->type) {
- case ANCHOR_PREC_READ:
- case ANCHOR_PREC_READ_NOT:
- case ANCHOR_LOOK_BEHIND:
- case ANCHOR_LOOK_BEHIND_NOT:
- r = subexp_recursive_check(an->target);
- break;
- }
- }
- break;
-
- case NT_CALL:
- r = subexp_recursive_check(NCALL(node)->target);
- if (r != 0) SET_CALL_RECURSION(node);
- break;
-
- case NT_ENCLOSE:
- if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
- return 0;
- else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
- return 1; /* recursion */
- else {
- SET_ENCLOSE_STATUS(node, NST_MARK2);
- r = subexp_recursive_check(NENCLOSE(node)->target);
- CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
- }
- break;
-
- default:
- break;
- }
-
- return r;
-}
-
-
-static int
-subexp_recursive_check_trav(Node* node, ScanEnv* env)
-{
-#define FOUND_CALLED_NODE 1
-
- int type;
- int r = 0;
-
- type = NTYPE(node);
- switch (type) {
- case NT_LIST:
- case NT_ALT:
- {
- int ret;
- do {
- ret = subexp_recursive_check_trav(NCAR(node), env);
- if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
- else if (ret < 0) return ret;
- } while (IS_NOT_NULL(node = NCDR(node)));
- }
- break;
-
- case NT_QTFR:
- r = subexp_recursive_check_trav(NQTFR(node)->target, env);
- if (NQTFR(node)->upper == 0) {
- if (r == FOUND_CALLED_NODE)
- NQTFR(node)->is_refered = 1;
- }
- break;
-
- case NT_ANCHOR:
- {
- AnchorNode* an = NANCHOR(node);
- switch (an->type) {
- case ANCHOR_PREC_READ:
- case ANCHOR_PREC_READ_NOT:
- case ANCHOR_LOOK_BEHIND:
- case ANCHOR_LOOK_BEHIND_NOT:
- r = subexp_recursive_check_trav(an->target, env);
- break;
- }
- }
- break;
-
- case NT_ENCLOSE:
- {
- EncloseNode* en = NENCLOSE(node);
-
- if (! IS_ENCLOSE_RECURSION(en)) {
- if (IS_ENCLOSE_CALLED(en)) {
- SET_ENCLOSE_STATUS(node, NST_MARK1);
- r = subexp_recursive_check(en->target);
- if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
- CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
- }
- }
- r = subexp_recursive_check_trav(en->target, env);
- if (IS_ENCLOSE_CALLED(en))
- r |= FOUND_CALLED_NODE;
- }
- break;
-
- default:
- break;
- }
-
- return r;
-}
-
-static int
-setup_subexp_call(Node* node, ScanEnv* env)
-{
- int type;
- int r = 0;
-
- type = NTYPE(node);
- switch (type) {
- case NT_LIST:
- do {
- r = setup_subexp_call(NCAR(node), env);
- } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
- break;
-
- case NT_ALT:
- do {
- r = setup_subexp_call(NCAR(node), env);
- } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
- break;
-
- case NT_QTFR:
- r = setup_subexp_call(NQTFR(node)->target, env);
- break;
- case NT_ENCLOSE:
- r = setup_subexp_call(NENCLOSE(node)->target, env);
- break;
-
- case NT_CALL:
- {
- CallNode* cn = NCALL(node);
- Node** nodes = SCANENV_MEM_NODES(env);
-
- if (cn->group_num != 0) {
- {
- int gnum = cn->group_num;
-
-#ifdef USE_NAMED_GROUP
- if (env->num_named > 0 &&
- IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
- !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
- return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
- }
-#endif
- if (gnum > env->num_mem) {
- onig_scan_env_set_error_string(env,
- ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
- return ONIGERR_UNDEFINED_GROUP_REFERENCE;
- }
- }
-
-#ifdef USE_NAMED_GROUP
- set_call_attr:
-#endif
- cn->target = nodes[cn->group_num];
- if (IS_NULL(cn->target)) {
- onig_scan_env_set_error_string(env,
- ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
- return ONIGERR_UNDEFINED_NAME_REFERENCE;
- }
- SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
- BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
- cn->unset_addr_list = env->unset_addr_list;
- }
-#ifdef USE_NAMED_GROUP
- else {
- int *refs;
-
- int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
- &refs);
- if (n <= 0) {
- onig_scan_env_set_error_string(env,
- ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
- return ONIGERR_UNDEFINED_NAME_REFERENCE;
- }
- else if (n > 1) {
- onig_scan_env_set_error_string(env,
- ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
- return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
- }
- else {
- cn->group_num = refs[0];
- goto set_call_attr;
- }
- }
-#endif
- }
- break;
-
- case NT_ANCHOR:
- {
- AnchorNode* an = NANCHOR(node);
-
- switch (an->type) {
- case ANCHOR_PREC_READ:
- case ANCHOR_PREC_READ_NOT:
- case ANCHOR_LOOK_BEHIND:
- case ANCHOR_LOOK_BEHIND_NOT:
- r = setup_subexp_call(an->target, env);
- break;
- }
- }
- break;
-
- default:
- break;
- }
-
- return r;
-}
-#endif
-
-/* divide different length alternatives in look-behind.
- (?<=A|B) ==> (?<=A)|(?<=B)
- (?<!A|B) ==> (?<!A)(?<!B)
-*/
-static int
-divide_look_behind_alternatives(Node* node)
-{
- Node *head, *np, *insert_node;
- AnchorNode* an = NANCHOR(node);
- int anc_type = an->type;
-
- head = an->target;
- np = NCAR(head);
- swap_node(node, head);
- NCAR(node) = head;
- NANCHOR(head)->target = np;
-
- np = node;
- while ((np = NCDR(np)) != NULL_NODE) {
- insert_node = onig_node_new_anchor(anc_type);
- CHECK_NULL_RETURN_MEMERR(insert_node);
- NANCHOR(insert_node)->target = NCAR(np);
- NCAR(np) = insert_node;
- }
-
- if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
- np = node;
- do {
- SET_NTYPE(np, NT_LIST); /* alt -> list */
- } while ((np = NCDR(np)) != NULL_NODE);
- }
- return 0;
-}
-
-static int
-setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
-{
- int r, len;
- AnchorNode* an = NANCHOR(node);
-
- r = get_char_length_tree(an->target, reg, &len);
- if (r == 0)
- an->char_len = len;
- else if (r == GET_CHAR_LEN_VARLEN)
- r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
- else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
- if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
- r = divide_look_behind_alternatives(node);
- else
- r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
- }
-
- return r;
-}
-
-static int
-next_setup(Node* node, Node* next_node, regex_t* reg)
-{
- int type;
-
- retry:
- type = NTYPE(node);
- if (type == NT_QTFR) {
- QtfrNode* qn = NQTFR(node);
- if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
-#ifdef USE_QTFR_PEEK_NEXT
- Node* n = get_head_value_node(next_node, 1, reg);
- /* '\0': for UTF-16BE etc... */
- if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
- qn->next_head_exact = n;
- }
-#endif
- /* automatic posseivation a*b ==> (?>a*)b */
- if (qn->lower <= 1) {
- int ttype = NTYPE(qn->target);
- if (IS_NODE_TYPE_SIMPLE(ttype)) {
- Node *x, *y;
- x = get_head_value_node(qn->target, 0, reg);
- if (IS_NOT_NULL(x)) {
- y = get_head_value_node(next_node, 0, reg);
- if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
- Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
- CHECK_NULL_RETURN_MEMERR(en);
- SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
- swap_node(node, en);
- NENCLOSE(node)->target = en;
- }
- }
- }
- }
- }
- }
- else if (type == NT_ENCLOSE) {
- EncloseNode* en = NENCLOSE(node);
- if (en->type == ENCLOSE_MEMORY) {
- node = en->target;
- goto retry;
- }
- }
- return 0;
-}
-
-
-static int
-update_string_node_case_fold(regex_t* reg, Node *node)
-{
- UChar *p, *q, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
- UChar *sbuf, *ebuf, *sp;
- int r, i, len, sbuf_size;
- StrNode* sn = NSTR(node);
-
- end = sn->end;
- sbuf_size = (end - sn->s) * 2;
- sbuf = (UChar* )xmalloc(sbuf_size);
- CHECK_NULL_RETURN_MEMERR(sbuf);
- ebuf = sbuf + sbuf_size;
-
- sp = sbuf;
- p = sn->s;
- while (p < end) {
- len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
- q = buf;
- for (i = 0; i < len; i++) {
- if (sp >= ebuf) {
- sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2);
- CHECK_NULL_RETURN_MEMERR(sbuf);
- sp = sbuf + sbuf_size;
- sbuf_size *= 2;
- ebuf = sbuf + sbuf_size;
- }
-
- *sp++ = buf[i];
- }
- }
-
- r = onig_node_str_set(node, sbuf, sp);
- if (r != 0) {
- xfree(sbuf);
- return r;
- }
-
- xfree(sbuf);
- return 0;
-}
-
-static int
-expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
- regex_t* reg)
-{
- int r;
- Node *node;
-
- node = onig_node_new_str(s, end);
- if (IS_NULL(node)) return ONIGERR_MEMORY;
-
- r = update_string_node_case_fold(reg, node);
- if (r != 0) {
- onig_node_free(node);
- return r;
- }
-
- NSTRING_SET_AMBIG(node);
- NSTRING_SET_DONT_GET_OPT_INFO(node);
- *rnode = node;
- return 0;
-}
-
-static int
-expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
- UChar *p, int slen, UChar *end,
- regex_t* reg, Node **rnode)
-{
- int r, i, j, len, varlen;
- Node *anode, *var_anode, *snode, *xnode, *an;
- UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
-
- *rnode = var_anode = NULL_NODE;
-
- varlen = 0;
- for (i = 0; i < item_num; i++) {
- if (items[i].byte_len != slen) {
- varlen = 1;
- break;
- }
- }
-
- if (varlen != 0) {
- *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
- if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
-
- xnode = onig_node_new_list(NULL, NULL);
- if (IS_NULL(xnode)) goto mem_err;
- NCAR(var_anode) = xnode;
-
- anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
- if (IS_NULL(anode)) goto mem_err;
- NCAR(xnode) = anode;
- }
- else {
- *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
- if (IS_NULL(anode)) return ONIGERR_MEMORY;
- }
-
- snode = onig_node_new_str(p, p + slen);
- if (IS_NULL(snode)) goto mem_err;
-
- NCAR(anode) = snode;
-
- for (i = 0; i < item_num; i++) {
- snode = onig_node_new_str(NULL, NULL);
- if (IS_NULL(snode)) goto mem_err;
-
- for (j = 0; j < items[i].code_len; j++) {
- len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
- if (len < 0) {
- r = len;
- goto mem_err2;
- }
-
- r = onig_node_str_cat(snode, buf, buf + len);
- if (r != 0) goto mem_err2;
- }
-
- an = onig_node_new_alt(NULL_NODE, NULL_NODE);
- if (IS_NULL(an)) {
- goto mem_err2;
- }
-
- if (items[i].byte_len != slen) {
- Node *rem;
- UChar *q = p + items[i].byte_len;
-
- if (q < end) {
- r = expand_case_fold_make_rem_string(&rem, q, end, reg);
- if (r != 0) {
- onig_node_free(an);
- goto mem_err2;
- }
-
- xnode = onig_node_list_add(NULL_NODE, snode);
- if (IS_NULL(xnode)) {
- onig_node_free(an);
- onig_node_free(rem);
- goto mem_err2;
- }
- if (IS_NULL(onig_node_list_add(xnode, rem))) {
- onig_node_free(an);
- onig_node_free(xnode);
- onig_node_free(rem);
- goto mem_err;
- }
-
- NCAR(an) = xnode;
- }
- else {
- NCAR(an) = snode;
- }
-
- NCDR(var_anode) = an;
- var_anode = an;
- }
- else {
- NCAR(an) = snode;
- NCDR(anode) = an;
- anode = an;
- }
- }
-
- return varlen;
-
- mem_err2:
- onig_node_free(snode);
-
- mem_err:
- onig_node_free(*rnode);
-
- return ONIGERR_MEMORY;
-}
-
-static int
-expand_case_fold_string(Node* node, regex_t* reg)
-{
-#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
-
- int r, n, len, alt_num;
- UChar *start, *end, *p;
- Node *top_root, *root, *snode, *prev_node;
- OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
- StrNode* sn = NSTR(node);
-
- if (NSTRING_IS_AMBIG(node)) return 0;
-
- start = sn->s;
- end = sn->end;
- if (start >= end) return 0;
-
- r = 0;
- top_root = root = prev_node = snode = NULL_NODE;
- alt_num = 1;
- p = start;
- while (p < end) {
- n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
- p, end, items);
- if (n < 0) {
- r = n;
- goto err;
- }
-
- len = enclen(reg->enc, p, end);
-
- if (n == 0) {
- if (IS_NULL(snode)) {
- if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
- top_root = root = onig_node_list_add(NULL_NODE, prev_node);
- if (IS_NULL(root)) {
- onig_node_free(prev_node);
- goto mem_err;
- }
- }
-
- prev_node = snode = onig_node_new_str(NULL, NULL);
- if (IS_NULL(snode)) goto mem_err;
- if (IS_NOT_NULL(root)) {
- if (IS_NULL(onig_node_list_add(root, snode))) {
- onig_node_free(snode);
- goto mem_err;
- }
- }
- }
-
- r = onig_node_str_cat(snode, p, p + len);
- if (r != 0) goto err;
- }
- else {
- alt_num *= (n + 1);
- if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
-
- if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
- top_root = root = onig_node_list_add(NULL_NODE, prev_node);
- if (IS_NULL(root)) {
- onig_node_free(prev_node);
- goto mem_err;
- }
- }
-
- r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
- if (r < 0) goto mem_err;
- if (r == 1) {
- if (IS_NULL(root)) {
- top_root = prev_node;
- }
- else {
- if (IS_NULL(onig_node_list_add(root, prev_node))) {
- onig_node_free(prev_node);
- goto mem_err;
- }
- }
-
- root = NCAR(prev_node);
- }
- else { /* r == 0 */
- if (IS_NOT_NULL(root)) {
- if (IS_NULL(onig_node_list_add(root, prev_node))) {
- onig_node_free(prev_node);
- goto mem_err;
- }
- }
- }
-
- snode = NULL_NODE;
- }
-
- p += len;
- }
-
- if (p < end) {
- Node *srem;
-
- r = expand_case_fold_make_rem_string(&srem, p, end, reg);
- if (r != 0) goto mem_err;
-
- if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
- top_root = root = onig_node_list_add(NULL_NODE, prev_node);
- if (IS_NULL(root)) {
- onig_node_free(srem);
- onig_node_free(prev_node);
- goto mem_err;
- }
- }
-
- if (IS_NULL(root)) {
- prev_node = srem;
- }
- else {
- if (IS_NULL(onig_node_list_add(root, srem))) {
- onig_node_free(srem);
- goto mem_err;
- }
- }
- }
-
- /* ending */
- top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
- swap_node(node, top_root);
- onig_node_free(top_root);
- return 0;
-
- mem_err:
- r = ONIGERR_MEMORY;
-
- err:
- onig_node_free(top_root);
- return r;
-}
-
-
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
-
-#define CEC_THRES_NUM_BIG_REPEAT 512
-#define CEC_INFINITE_NUM 0x7fffffff
-
-#define CEC_IN_INFINITE_REPEAT (1<<0)
-#define CEC_IN_FINITE_REPEAT (1<<1)
-#define CEC_CONT_BIG_REPEAT (1<<2)
-
-static int
-setup_comb_exp_check(Node* node, int state, ScanEnv* env)
-{
- int type;
- int r = state;
-
- type = NTYPE(node);
- switch (type) {
- case NT_LIST:
- {
- Node* prev = NULL_NODE;
- do {
- r = setup_comb_exp_check(NCAR(node), r, env);
- prev = NCAR(node);
- } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
- }
- break;
-
- case NT_ALT:
- {
- int ret;
- do {
- ret = setup_comb_exp_check(NCAR(node), state, env);
- r |= ret;
- } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
- }
- break;
-
- case NT_QTFR:
- {
- int child_state = state;
- int add_state = 0;
- QtfrNode* qn = NQTFR(node);
- Node* target = qn->target;
- int var_num;
-
- if (! IS_REPEAT_INFINITE(qn->upper)) {
- if (qn->upper > 1) {
- /* {0,1}, {1,1} are allowed */
- child_state |= CEC_IN_FINITE_REPEAT;
-
- /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
- if (env->backrefed_mem == 0) {
- if (NTYPE(qn->target) == NT_ENCLOSE) {
- EncloseNode* en = NENCLOSE(qn->target);
- if (en->type == ENCLOSE_MEMORY) {
- if (NTYPE(en->target) == NT_QTFR) {
- QtfrNode* q = NQTFR(en->target);
- if (IS_REPEAT_INFINITE(q->upper)
- && q->greedy == qn->greedy) {
- qn->upper = (qn->lower == 0 ? 1 : qn->lower);
- if (qn->upper == 1)
- child_state = state;
- }
- }
- }
- }
- }
- }
- }
-
- if (state & CEC_IN_FINITE_REPEAT) {
- qn->comb_exp_check_num = -1;
- }
- else {
- if (IS_REPEAT_INFINITE(qn->upper)) {
- var_num = CEC_INFINITE_NUM;
- child_state |= CEC_IN_INFINITE_REPEAT;
- }
- else {
- var_num = qn->upper - qn->lower;
- }
-
- if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
- add_state |= CEC_CONT_BIG_REPEAT;
-
- if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
- ((state & CEC_CONT_BIG_REPEAT) != 0 &&
- var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
- if (qn->comb_exp_check_num == 0) {
- env->num_comb_exp_check++;
- qn->comb_exp_check_num = env->num_comb_exp_check;
- if (env->curr_max_regnum > env->comb_exp_max_regnum)
- env->comb_exp_max_regnum = env->curr_max_regnum;
- }
- }
- }
-
- r = setup_comb_exp_check(target, child_state, env);
- r |= add_state;
- }
- break;
-
- case NT_ENCLOSE:
- {
- EncloseNode* en = NENCLOSE(node);
-
- switch (en->type) {
- case ENCLOSE_MEMORY:
- {
- if (env->curr_max_regnum < en->regnum)
- env->curr_max_regnum = en->regnum;
-
- r = setup_comb_exp_check(en->target, state, env);
- }
- break;
-
- default:
- r = setup_comb_exp_check(en->target, state, env);
- break;
- }
- }
- break;
-
-#ifdef USE_SUBEXP_CALL
- case NT_CALL:
- if (IS_CALL_RECURSION(NCALL(node)))
- env->has_recursion = 1;
- else
- r = setup_comb_exp_check(NCALL(node)->target, state, env);
- break;
-#endif
-
- default:
- break;
- }
-
- return r;
-}
-#endif
-
-#define IN_ALT (1<<0)
-#define IN_NOT (1<<1)
-#define IN_REPEAT (1<<2)
-#define IN_VAR_REPEAT (1<<3)
-
-/* setup_tree does the following work.
- 1. check empty loop. (set qn->target_empty_info)
- 2. expand ignore-case in char class.
- 3. set memory status bit flags. (reg->mem_stats)
- 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
- 5. find invalid patterns in look-behind.
- 6. expand repeated string.
- */
-static int
-setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
-{
- int type;
- int r = 0;
-
- type = NTYPE(node);
- switch (type) {
- case NT_LIST:
- {
- Node* prev = NULL_NODE;
- do {
- r = setup_tree(NCAR(node), reg, state, env);
- if (IS_NOT_NULL(prev) && r == 0) {
- r = next_setup(prev, NCAR(node), reg);
- }
- prev = NCAR(node);
- } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
- }
- break;
-
- case NT_ALT:
- do {
- r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
- } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
- break;
-
- case NT_CCLASS:
- break;
-
- case NT_STR:
- if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
- r = expand_case_fold_string(node, reg);
- }
- break;
-
- case NT_CTYPE:
- case NT_CANY:
- break;
-
-#ifdef USE_SUBEXP_CALL
- case NT_CALL:
- break;
-#endif
-
- case NT_BREF:
- {
- int i;
- int* p;
- Node** nodes = SCANENV_MEM_NODES(env);
- BRefNode* br = NBREF(node);
- p = BACKREFS_P(br);
- for (i = 0; i < br->back_num; i++) {
- if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
- BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
- BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
-#ifdef USE_BACKREF_WITH_LEVEL
- if (IS_BACKREF_NEST_LEVEL(br)) {
- BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
- }
-#endif
- SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
- }
- }
- break;
-
- case NT_QTFR:
- {
- OnigDistance d;
- QtfrNode* qn = NQTFR(node);
- Node* target = qn->target;
-
- if ((state & IN_REPEAT) != 0) {
- qn->state |= NST_IN_REPEAT;
- }
-
- if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
- r = get_min_match_length(target, &d, env);
- if (r) break;
- if (d == 0) {
- qn->target_empty_info = NQ_TARGET_IS_EMPTY;
-#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
- r = quantifiers_memory_node_info(target);
- if (r < 0) break;
- if (r > 0) {
- qn->target_empty_info = r;
- }
-#endif
- }
- }
-
- state |= IN_REPEAT;
- if (qn->lower != qn->upper)
- state |= IN_VAR_REPEAT;
- r = setup_tree(target, reg, state, env);
- if (r) break;
-
- /* expand string */
-#define EXPAND_STRING_MAX_LENGTH 100
- if (NTYPE(target) == NT_STR) {
- if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper &&
- qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {
- int len = NSTRING_LEN(target);
- StrNode* sn = NSTR(target);
-
- if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {
- int i, n = qn->lower;
- onig_node_conv_to_str_node(node, NSTR(target)->flag);
- for (i = 0; i < n; i++) {
- r = onig_node_str_cat(node, sn->s, sn->end);
- if (r) break;
- }
- onig_node_free(target);
- break; /* break case NT_QTFR: */
- }
- }
- }
-
-#ifdef USE_OP_PUSH_OR_JUMP_EXACT
- if (qn->greedy && (qn->target_empty_info != 0)) {
- if (NTYPE(target) == NT_QTFR) {
- QtfrNode* tqn = NQTFR(target);
- if (IS_NOT_NULL(tqn->head_exact)) {
- qn->head_exact = tqn->head_exact;
- tqn->head_exact = NULL;
- }
- }
- else {
- qn->head_exact = get_head_value_node(qn->target, 1, reg);
- }
- }
-#endif
- }
- break;
-
- case NT_ENCLOSE:
- {
- EncloseNode* en = NENCLOSE(node);
-
- switch (en->type) {
- case ENCLOSE_OPTION:
- {
- OnigOptionType options = reg->options;
- reg->options = NENCLOSE(node)->option;
- r = setup_tree(NENCLOSE(node)->target, reg, state, env);
- reg->options = options;
- }
- break;
-
- case ENCLOSE_MEMORY:
- if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {
- BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
- /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
- }
- r = setup_tree(en->target, reg, state, env);
- break;
-
- case ENCLOSE_STOP_BACKTRACK:
- {
- Node* target = en->target;
- r = setup_tree(target, reg, state, env);
- if (NTYPE(target) == NT_QTFR) {
- QtfrNode* tqn = NQTFR(target);
- if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
- tqn->greedy != 0) { /* (?>a*), a*+ etc... */
- int qtype = NTYPE(tqn->target);
- if (IS_NODE_TYPE_SIMPLE(qtype))
- SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
- }
- }
- }
- break;
- }
- }
- break;
-
- case NT_ANCHOR:
- {
- AnchorNode* an = NANCHOR(node);
-
- switch (an->type) {
- case ANCHOR_PREC_READ:
- r = setup_tree(an->target, reg, state, env);
- break;
- case ANCHOR_PREC_READ_NOT:
- r = setup_tree(an->target, reg, (state | IN_NOT), env);
- break;
-
-/* allowed node types in look-behind */
-#define ALLOWED_TYPE_IN_LB \
- ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
- BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
-
-#define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY )
-#define ALLOWED_ENCLOSE_IN_LB_NOT 0
-
-#define ALLOWED_ANCHOR_IN_LB \
-( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
-#define ALLOWED_ANCHOR_IN_LB_NOT \
-( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
-
- case ANCHOR_LOOK_BEHIND:
- {
- r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
- ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
- if (r < 0) return r;
- if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
- r = setup_look_behind(node, reg, env);
- if (r != 0) return r;
- r = setup_tree(an->target, reg, state, env);
- }
- break;
-
- case ANCHOR_LOOK_BEHIND_NOT:
- {
- r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
- ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
- if (r < 0) return r;
- if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
- r = setup_look_behind(node, reg, env);
- if (r != 0) return r;
- r = setup_tree(an->target, reg, (state | IN_NOT), env);
- }
- break;
- }
- }
- break;
-
- default:
- break;
- }
-
- return r;
-}
-
-/* set skip map for Boyer-Moor search */
-static int
-set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
- UChar skip[], int** int_skip)
-{
- int i, len;
-
- len = end - s;
- if (len < ONIG_CHAR_TABLE_SIZE) {
- for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len;
-
- for (i = 0; i < len - 1; i++)
- skip[s[i]] = len - 1 - i;
- }
- else {
- if (IS_NULL(*int_skip)) {
- *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
- if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
- }
- for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len;
-
- for (i = 0; i < len - 1; i++)
- (*int_skip)[s[i]] = len - 1 - i;
- }
- return 0;
-}
-
-#define OPT_EXACT_MAXLEN 24
-
-typedef struct {
- OnigDistance min; /* min byte length */
- OnigDistance max; /* max byte length */
-} MinMaxLen;
-
-typedef struct {
- MinMaxLen mmd;
- OnigEncoding enc;
- OnigOptionType options;
- OnigCaseFoldType case_fold_flag;
- ScanEnv* scan_env;
-} OptEnv;
-
-typedef struct {
- int left_anchor;
- int right_anchor;
-} OptAncInfo;
-
-typedef struct {
- MinMaxLen mmd; /* info position */
- OptAncInfo anc;
-
- int reach_end;
- int ignore_case;
- int len;
- UChar s[OPT_EXACT_MAXLEN];
-} OptExactInfo;
-
-typedef struct {
- MinMaxLen mmd; /* info position */
- OptAncInfo anc;
-
- int value; /* weighted value */
- UChar map[ONIG_CHAR_TABLE_SIZE];
-} OptMapInfo;
-
-typedef struct {
- MinMaxLen len;
-
- OptAncInfo anc;
- OptExactInfo exb; /* boundary */
- OptExactInfo exm; /* middle */
- OptExactInfo expr; /* prec read (?=...) */
-
- OptMapInfo map; /* boundary */
-} NodeOptInfo;
-
-
-static int
-map_position_value(OnigEncoding enc, int i)
-{
- static const short int ByteValTable[] = {
- 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
- 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
- 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
- };
-
- if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) {
- if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
- return 20;
- else
- return (int )ByteValTable[i];
- }
- else
- return 4; /* Take it easy. */
-}
-
-static int
-distance_value(MinMaxLen* mm)
-{
- /* 1000 / (min-max-dist + 1) */
- static const short int dist_vals[] = {
- 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
- 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
- 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
- 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
- 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
- 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
- 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
- 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
- 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
- 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
- };
-
- int d;
-
- if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
-
- d = mm->max - mm->min;
- if (d < (int )(sizeof(dist_vals)/sizeof(dist_vals[0])))
- /* return dist_vals[d] * 16 / (mm->min + 12); */
- return (int )dist_vals[d];
- else
- return 1;
-}
-
-static int
-comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
-{
- if (v2 <= 0) return -1;
- if (v1 <= 0) return 1;
-
- v1 *= distance_value(d1);
- v2 *= distance_value(d2);
-
- if (v2 > v1) return 1;
- if (v2 < v1) return -1;
-
- if (d2->min < d1->min) return 1;
- if (d2->min > d1->min) return -1;
- return 0;
-}
-
-static int
-is_equal_mml(MinMaxLen* a, MinMaxLen* b)
-{
- return (a->min == b->min && a->max == b->max) ? 1 : 0;
-}
-
-
-static void
-set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
-{
- mml->min = min;
- mml->max = max;
-}
-
-static void
-clear_mml(MinMaxLen* mml)
-{
- mml->min = mml->max = 0;
-}
-
-static void
-copy_mml(MinMaxLen* to, MinMaxLen* from)
-{
- to->min = from->min;
- to->max = from->max;
-}
-
-static void
-add_mml(MinMaxLen* to, MinMaxLen* from)
-{
- to->min = distance_add(to->min, from->min);
- to->max = distance_add(to->max, from->max);
-}
-
-static void
-alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
-{
- if (to->min > from->min) to->min = from->min;
- if (to->max < from->max) to->max = from->max;
-}
-
-static void
-copy_opt_env(OptEnv* to, OptEnv* from)
-{
- *to = *from;
-}
-
-static void
-clear_opt_anc_info(OptAncInfo* anc)
-{
- anc->left_anchor = 0;
- anc->right_anchor = 0;
-}
-
-static void
-copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
-{
- *to = *from;
-}
-
-static void
-concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
- OnigDistance left_len, OnigDistance right_len)
-{
- clear_opt_anc_info(to);
-
- to->left_anchor = left->left_anchor;
- if (left_len == 0) {
- to->left_anchor |= right->left_anchor;
- }
-
- to->right_anchor = right->right_anchor;
- if (right_len == 0) {
- to->right_anchor |= left->right_anchor;
- }
-}
-
-static int
-is_left_anchor(int anc)
-{
- if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
- anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
- anc == ANCHOR_PREC_READ_NOT)
- return 0;
-
- return 1;
-}
-
-static int
-is_set_opt_anc_info(OptAncInfo* to, int anc)
-{
- if ((to->left_anchor & anc) != 0) return 1;
-
- return ((to->right_anchor & anc) != 0 ? 1 : 0);
-}
-
-static void
-add_opt_anc_info(OptAncInfo* to, int anc)
-{
- if (is_left_anchor(anc))
- to->left_anchor |= anc;
- else
- to->right_anchor |= anc;
-}
-
-static void
-remove_opt_anc_info(OptAncInfo* to, int anc)
-{
- if (is_left_anchor(anc))
- to->left_anchor &= ~anc;
- else
- to->right_anchor &= ~anc;
-}
-
-static void
-alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
-{
- to->left_anchor &= add->left_anchor;
- to->right_anchor &= add->right_anchor;
-}
-
-static int
-is_full_opt_exact_info(OptExactInfo* ex)
-{
- return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
-}
-
-static void
-clear_opt_exact_info(OptExactInfo* ex)
-{
- clear_mml(&ex->mmd);
- clear_opt_anc_info(&ex->anc);
- ex->reach_end = 0;
- ex->ignore_case = 0;
- ex->len = 0;
- ex->s[0] = '\0';
-}
-
-static void
-copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
-{
- *to = *from;
-}
-
-static void
-concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
-{
- int i, j, len;
- UChar *p, *end;
- OptAncInfo tanc;
-
- if (! to->ignore_case && add->ignore_case) {
- if (to->len >= add->len) return ; /* avoid */
-
- to->ignore_case = 1;
- }
-
- p = add->s;
- end = p + add->len;
- for (i = to->len; p < end; ) {
- len = enclen(enc, p, end);
- if (i + len > OPT_EXACT_MAXLEN) break;
- for (j = 0; j < len && p < end; j++)
- to->s[i++] = *p++;
- }
-
- to->len = i;
- to->reach_end = (p == end ? add->reach_end : 0);
-
- concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
- if (! to->reach_end) tanc.right_anchor = 0;
- copy_opt_anc_info(&to->anc, &tanc);
-}
-
-static void
-concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
- int raw ARG_UNUSED, OnigEncoding enc)
-{
- int i, j, len;
- UChar *p;
-
- for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
- len = enclen(enc, p, end);
- if (i + len > OPT_EXACT_MAXLEN) break;
- for (j = 0; j < len && p < end; j++)
- to->s[i++] = *p++;
- }
-
- to->len = i;
-}
-
-static void
-alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
-{
- int i, j, len;
-
- if (add->len == 0 || to->len == 0) {
- clear_opt_exact_info(to);
- return ;
- }
-
- if (! is_equal_mml(&to->mmd, &add->mmd)) {
- clear_opt_exact_info(to);
- return ;
- }
-
- for (i = 0; i < to->len && i < add->len; ) {
- if (to->s[i] != add->s[i]) break;
- len = enclen(env->enc, to->s + i, to->s + to->len);
-
- for (j = 1; j < len; j++) {
- if (to->s[i+j] != add->s[i+j]) break;
- }
- if (j < len) break;
- i += len;
- }
-
- if (! add->reach_end || i < add->len || i < to->len) {
- to->reach_end = 0;
- }
- to->len = i;
- to->ignore_case |= add->ignore_case;
-
- alt_merge_opt_anc_info(&to->anc, &add->anc);
- if (! to->reach_end) to->anc.right_anchor = 0;
-}
-
-static void
-select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
-{
- int v1, v2;
-
- v1 = now->len;
- v2 = alt->len;
-
- if (v2 == 0) {
- return ;
- }
- else if (v1 == 0) {
- copy_opt_exact_info(now, alt);
- return ;
- }
- else if (v1 <= 2 && v2 <= 2) {
- /* ByteValTable[x] is big value --> low price */
- v2 = map_position_value(enc, now->s[0]);
- v1 = map_position_value(enc, alt->s[0]);
-
- if (now->len > 1) v1 += 5;
- if (alt->len > 1) v2 += 5;
- }
-
- if (now->ignore_case == 0) v1 *= 2;
- if (alt->ignore_case == 0) v2 *= 2;
-
- if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
- copy_opt_exact_info(now, alt);
-}
-
-static void
-clear_opt_map_info(OptMapInfo* map)
-{
- static const OptMapInfo clean_info = {
- {0, 0}, {0, 0}, 0,
- {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
- }
- };
-
- xmemcpy(map, &clean_info, sizeof(OptMapInfo));
-}
-
-static void
-copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
-{
- *to = *from;
-}
-
-static void
-add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
-{
- if (map->map[c] == 0) {
- map->map[c] = 1;
- map->value += map_position_value(enc, c);
- }
-}
-
-static int
-add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
- OnigEncoding enc, OnigCaseFoldType case_fold_flag)
-{
- OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
- UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
- int i, n;
-
- add_char_opt_map_info(map, p[0], enc);
-
- case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
- n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
- if (n < 0) return n;
-
- for (i = 0; i < n; i++) {
- ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
- add_char_opt_map_info(map, buf[0], enc);
- }
-
- return 0;
-}
-
-static void
-select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
-{
- const int z = 1<<15; /* 32768: something big value */
-
- int v1, v2;
-
- if (alt->value == 0) return ;
- if (now->value == 0) {
- copy_opt_map_info(now, alt);
- return ;
- }
-
- v1 = z / now->value;
- v2 = z / alt->value;
- if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
- copy_opt_map_info(now, alt);
-}
-
-static int
-comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
-{
-#define COMP_EM_BASE 20
- int ve, vm;
-
- if (m->value <= 0) return -1;
-
- ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2);
- vm = COMP_EM_BASE * 5 * 2 / m->value;
- return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
-}
-
-static void
-alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
-{
- int i, val;
-
- /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
- if (to->value == 0) return ;
- if (add->value == 0 || to->mmd.max < add->mmd.min) {
- clear_opt_map_info(to);
- return ;
- }
-
- alt_merge_mml(&to->mmd, &add->mmd);
-
- val = 0;
- for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
- if (add->map[i])
- to->map[i] = 1;
-
- if (to->map[i])
- val += map_position_value(enc, i);
- }
- to->value = val;
-
- alt_merge_opt_anc_info(&to->anc, &add->anc);
-}
-
-static void
-set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
-{
- copy_mml(&(opt->exb.mmd), mmd);
- copy_mml(&(opt->expr.mmd), mmd);
- copy_mml(&(opt->map.mmd), mmd);
-}
-
-static void
-clear_node_opt_info(NodeOptInfo* opt)
-{
- clear_mml(&opt->len);
- clear_opt_anc_info(&opt->anc);
- clear_opt_exact_info(&opt->exb);
- clear_opt_exact_info(&opt->exm);
- clear_opt_exact_info(&opt->expr);
- clear_opt_map_info(&opt->map);
-}
-
-static void
-copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
-{
- *to = *from;
-}
-
-static void
-concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
-{
- int exb_reach, exm_reach;
- OptAncInfo tanc;
-
- concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
- copy_opt_anc_info(&to->anc, &tanc);
-
- if (add->exb.len > 0 && to->len.max == 0) {
- concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
- to->len.max, add->len.max);
- copy_opt_anc_info(&add->exb.anc, &tanc);
- }
-
- if (add->map.value > 0 && to->len.max == 0) {
- if (add->map.mmd.max == 0)
- add->map.anc.left_anchor |= to->anc.left_anchor;
- }
-
- exb_reach = to->exb.reach_end;
- exm_reach = to->exm.reach_end;
-
- if (add->len.max != 0)
- to->exb.reach_end = to->exm.reach_end = 0;
-
- if (add->exb.len > 0) {
- if (exb_reach) {
- concat_opt_exact_info(&to->exb, &add->exb, enc);
- clear_opt_exact_info(&add->exb);
- }
- else if (exm_reach) {
- concat_opt_exact_info(&to->exm, &add->exb, enc);
- clear_opt_exact_info(&add->exb);
- }
- }
- select_opt_exact_info(enc, &to->exm, &add->exb);
- select_opt_exact_info(enc, &to->exm, &add->exm);
-
- if (to->expr.len > 0) {
- if (add->len.max > 0) {
- if (to->expr.len > (int )add->len.max)
- to->expr.len = add->len.max;
-
- if (to->expr.mmd.max == 0)
- select_opt_exact_info(enc, &to->exb, &to->expr);
- else
- select_opt_exact_info(enc, &to->exm, &to->expr);
- }
- }
- else if (add->expr.len > 0) {
- copy_opt_exact_info(&to->expr, &add->expr);
- }
-
- select_opt_map_info(&to->map, &add->map);
-
- add_mml(&to->len, &add->len);
-}
-
-static void
-alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
-{
- alt_merge_opt_anc_info (&to->anc, &add->anc);
- alt_merge_opt_exact_info(&to->exb, &add->exb, env);
- alt_merge_opt_exact_info(&to->exm, &add->exm, env);
- alt_merge_opt_exact_info(&to->expr, &add->expr, env);
- alt_merge_opt_map_info(env->enc, &to->map, &add->map);
-
- alt_merge_mml(&to->len, &add->len);
-}
-
-
-#define MAX_NODE_OPT_INFO_REF_COUNT 5
-
-static int
-optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
-{
- int type;
- int r = 0;
-
- clear_node_opt_info(opt);
- set_bound_node_opt_info(opt, &env->mmd);
-
- type = NTYPE(node);
- switch (type) {
- case NT_LIST:
- {
- OptEnv nenv;
- NodeOptInfo nopt;
- Node* nd = node;
-
- copy_opt_env(&nenv, env);
- do {
- r = optimize_node_left(NCAR(nd), &nopt, &nenv);
- if (r == 0) {
- add_mml(&nenv.mmd, &nopt.len);
- concat_left_node_opt_info(env->enc, opt, &nopt);
- }
- } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
- }
- break;
-
- case NT_ALT:
- {
- NodeOptInfo nopt;
- Node* nd = node;
-
- do {
- r = optimize_node_left(NCAR(nd), &nopt, env);
- if (r == 0) {
- if (nd == node) copy_node_opt_info(opt, &nopt);
- else alt_merge_node_opt_info(opt, &nopt, env);
- }
- } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
- }
- break;
-
- case NT_STR:
- {
- StrNode* sn = NSTR(node);
- int slen = sn->end - sn->s;
- int is_raw = NSTRING_IS_RAW(node);
-
- if (! NSTRING_IS_AMBIG(node)) {
- concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
- NSTRING_IS_RAW(node), env->enc);
- if (slen > 0) {
- add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
- }
- set_mml(&opt->len, slen, slen);
- }
- else {
- int max;
-
- if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
- int n = onigenc_strlen(env->enc, sn->s, sn->end);
- max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
- }
- else {
- concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
- is_raw, env->enc);
- opt->exb.ignore_case = 1;
-
- if (slen > 0) {
- r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
- env->enc, env->case_fold_flag);
- if (r != 0) break;
- }
-
- max = slen;
- }
-
- set_mml(&opt->len, slen, max);
- }
-
- if (opt->exb.len == slen)
- opt->exb.reach_end = 1;
- }
- break;
-
- case NT_CCLASS:
- {
- int i, z;
- CClassNode* cc = NCCLASS(node);
-
- /* no need to check ignore case. (setted in setup_tree()) */
-
- if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
- OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
- OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
-
- set_mml(&opt->len, min, max);
- }
- else {
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- z = BITSET_AT(cc->bs, i);
- if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
- add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
- }
- }
- set_mml(&opt->len, 1, 1);
- }
- }
- break;
-
- case NT_CTYPE:
- {
- int i, min, max;
-
- max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
-
- if (max == 1) {
- min = 1;
-
- switch (NCTYPE(node)->ctype) {
- case ONIGENC_CTYPE_WORD:
- if (NCTYPE(node)->is_not != 0) {
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- if (! ONIGENC_IS_CODE_WORD(env->enc, i)) {
- add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
- }
- }
- }
- else {
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
- add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
- }
- }
- }
- break;
- }
- }
- else {
- min = ONIGENC_MBC_MINLEN(env->enc);
- }
- set_mml(&opt->len, min, max);
- }
- break;
-
- case NT_CANY:
- {
- OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
- OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
- set_mml(&opt->len, min, max);
- }
- break;
-
- case NT_ANCHOR:
- switch (NANCHOR(node)->type) {
- case ANCHOR_BEGIN_BUF:
- case ANCHOR_BEGIN_POSITION:
- case ANCHOR_BEGIN_LINE:
- case ANCHOR_END_BUF:
- case ANCHOR_SEMI_END_BUF:
- case ANCHOR_END_LINE:
- add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
- break;
-
- case ANCHOR_PREC_READ:
- {
- NodeOptInfo nopt;
-
- r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
- if (r == 0) {
- if (nopt.exb.len > 0)
- copy_opt_exact_info(&opt->expr, &nopt.exb);
- else if (nopt.exm.len > 0)
- copy_opt_exact_info(&opt->expr, &nopt.exm);
-
- opt->expr.reach_end = 0;
-
- if (nopt.map.value > 0)
- copy_opt_map_info(&opt->map, &nopt.map);
- }
- }
- break;
-
- case ANCHOR_PREC_READ_NOT:
- case ANCHOR_LOOK_BEHIND: /* Sorry, I can't make use of it. */
- case ANCHOR_LOOK_BEHIND_NOT:
- break;
- }
- break;
-
- case NT_BREF:
- {
- int i;
- int* backs;
- OnigDistance min, max, tmin, tmax;
- Node** nodes = SCANENV_MEM_NODES(env->scan_env);
- BRefNode* br = NBREF(node);
-
- if (br->state & NST_RECURSION) {
- set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
- break;
- }
- backs = BACKREFS_P(br);
- r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
- if (r != 0) break;
- r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
- if (r != 0) break;
- for (i = 1; i < br->back_num; i++) {
- r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
- if (r != 0) break;
- r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
- if (r != 0) break;
- if (min > tmin) min = tmin;
- if (max < tmax) max = tmax;
- }
- if (r == 0) set_mml(&opt->len, min, max);
- }
- break;
-
-#ifdef USE_SUBEXP_CALL
- case NT_CALL:
- if (IS_CALL_RECURSION(NCALL(node)))
- set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
- else {
- OnigOptionType save = env->options;
- env->options = NENCLOSE(NCALL(node)->target)->option;
- r = optimize_node_left(NCALL(node)->target, opt, env);
- env->options = save;
- }
- break;
-#endif
-
- case NT_QTFR:
- {
- int i;
- OnigDistance min, max;
- NodeOptInfo nopt;
- QtfrNode* qn = NQTFR(node);
-
- r = optimize_node_left(qn->target, &nopt, env);
- if (r) break;
-
- if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {
- if (env->mmd.max == 0 &&
- NTYPE(qn->target) == NT_CANY && qn->greedy) {
- if (IS_MULTILINE(env->options))
- add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
- else
- add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
- }
- }
- else {
- if (qn->lower > 0) {
- copy_node_opt_info(opt, &nopt);
- if (nopt.exb.len > 0) {
- if (nopt.exb.reach_end) {
- for (i = 2; i <= qn->lower &&
- ! is_full_opt_exact_info(&opt->exb); i++) {
- concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
- }
- if (i < qn->lower) {
- opt->exb.reach_end = 0;
- }
- }
- }
-
- if (qn->lower != qn->upper) {
- opt->exb.reach_end = 0;
- opt->exm.reach_end = 0;
- }
- if (qn->lower > 1)
- opt->exm.reach_end = 0;
- }
- }
-
- min = distance_multiply(nopt.len.min, qn->lower);
- if (IS_REPEAT_INFINITE(qn->upper))
- max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
- else
- max = distance_multiply(nopt.len.max, qn->upper);
-
- set_mml(&opt->len, min, max);
- }
- break;
-
- case NT_ENCLOSE:
- {
- EncloseNode* en = NENCLOSE(node);
-
- switch (en->type) {
- case ENCLOSE_OPTION:
- {
- OnigOptionType save = env->options;
-
- env->options = en->option;
- r = optimize_node_left(en->target, opt, env);
- env->options = save;
- }
- break;
-
- case ENCLOSE_MEMORY:
-#ifdef USE_SUBEXP_CALL
- en->opt_count++;
- if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
- OnigDistance min, max;
-
- min = 0;
- max = ONIG_INFINITE_DISTANCE;
- if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
- if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
- set_mml(&opt->len, min, max);
- }
- else
-#endif
- {
- r = optimize_node_left(en->target, opt, env);
-
- if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
- if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
- remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
- }
- }
- break;
-
- case ENCLOSE_STOP_BACKTRACK:
- r = optimize_node_left(en->target, opt, env);
- break;
- }
- }
- break;
-
- default:
-#ifdef ONIG_DEBUG
- fprintf(stderr, "optimize_node_left: undefined node type %d\n",
- NTYPE(node));
-#endif
- r = ONIGERR_TYPE_BUG;
- break;
- }
-
- return r;
-}
-
-static int
-set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
-{
- int r;
-
- if (e->len == 0) return 0;
-
- if (e->ignore_case) {
- reg->exact = (UChar* )xmalloc(e->len);
- CHECK_NULL_RETURN_MEMERR(reg->exact);
- xmemcpy(reg->exact, e->s, e->len);
- reg->exact_end = reg->exact + e->len;
- reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
- }
- else {
- int allow_reverse;
-
- reg->exact = str_dup(e->s, e->s + e->len);
- CHECK_NULL_RETURN_MEMERR(reg->exact);
- reg->exact_end = reg->exact + e->len;
-
- allow_reverse =
- ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
-
- if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
- r = set_bm_skip(reg->exact, reg->exact_end, reg->enc,
- reg->map, &(reg->int_map));
- if (r) return r;
-
- reg->optimize = (allow_reverse != 0
- ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
- }
- else {
- reg->optimize = ONIG_OPTIMIZE_EXACT;
- }
- }
-
- reg->dmin = e->mmd.min;
- reg->dmax = e->mmd.max;
-
- if (reg->dmin != ONIG_INFINITE_DISTANCE) {
- reg->threshold_len = reg->dmin + (reg->exact_end - reg->exact);
- }
-
- return 0;
-}
-
-static void
-set_optimize_map_info(regex_t* reg, OptMapInfo* m)
-{
- int i;
-
- for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
- reg->map[i] = m->map[i];
-
- reg->optimize = ONIG_OPTIMIZE_MAP;
- reg->dmin = m->mmd.min;
- reg->dmax = m->mmd.max;
-
- if (reg->dmin != ONIG_INFINITE_DISTANCE) {
- reg->threshold_len = reg->dmin + 1;
- }
-}
-
-static void
-set_sub_anchor(regex_t* reg, OptAncInfo* anc)
-{
- reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE;
- reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
-}
-
-#ifdef ONIG_DEBUG
-static void print_optimize_info(FILE* f, regex_t* reg);
-#endif
-
-static int
-set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
-{
-
- int r;
- NodeOptInfo opt;
- OptEnv env;
-
- env.enc = reg->enc;
- env.options = reg->options;
- env.case_fold_flag = reg->case_fold_flag;
- env.scan_env = scan_env;
- clear_mml(&env.mmd);
-
- r = optimize_node_left(node, &opt, &env);
- if (r) return r;
-
- reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
- ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML);
-
- reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF);
-
- if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
- reg->anchor_dmin = opt.len.min;
- reg->anchor_dmax = opt.len.max;
- }
-
- if (opt.exb.len > 0 || opt.exm.len > 0) {
- select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
- if (opt.map.value > 0 &&
- comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
- goto set_map;
- }
- else {
- r = set_optimize_exact_info(reg, &opt.exb);
- set_sub_anchor(reg, &opt.exb.anc);
- }
- }
- else if (opt.map.value > 0) {
- set_map:
- set_optimize_map_info(reg, &opt.map);
- set_sub_anchor(reg, &opt.map.anc);
- }
- else {
- reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
- if (opt.len.max == 0)
- reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
- }
-
-#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
- print_optimize_info(stderr, reg);
-#endif
- return r;
-}
-
-static void
-clear_optimize_info(regex_t* reg)
-{
- reg->optimize = ONIG_OPTIMIZE_NONE;
- reg->anchor = 0;
- reg->anchor_dmin = 0;
- reg->anchor_dmax = 0;
- reg->sub_anchor = 0;
- reg->exact_end = (UChar* )NULL;
- reg->threshold_len = 0;
- if (IS_NOT_NULL(reg->exact)) {
- xfree(reg->exact);
- reg->exact = (UChar* )NULL;
- }
-}
-
-#ifdef ONIG_DEBUG
-
-static void print_enc_string(FILE* fp, OnigEncoding enc,
- const UChar *s, const UChar *end)
-{
- fprintf(fp, "\nPATTERN: /");
-
- if (ONIGENC_MBC_MINLEN(enc) > 1) {
- const UChar *p;
- OnigCodePoint code;
-
- p = s;
- while (p < end) {
- code = ONIGENC_MBC_TO_CODE(enc, p, end);
- if (code >= 0x80) {
- fprintf(fp, " 0x%04x ", (int )code);
- }
- else {
- fputc((int )code, fp);
- }
-
- p += enclen(enc, p, end);
- }
- }
- else {
- while (s < end) {
- fputc((int )*s, fp);
- s++;
- }
- }
-
- fprintf(fp, "/\n");
-}
-
-static void
-print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
-{
- if (a == ONIG_INFINITE_DISTANCE)
- fputs("inf", f);
- else
- fprintf(f, "(%u)", a);
-
- fputs("-", f);
-
- if (b == ONIG_INFINITE_DISTANCE)
- fputs("inf", f);
- else
- fprintf(f, "(%u)", b);
-}
-
-static void
-print_anchor(FILE* f, int anchor)
-{
- int q = 0;
-
- fprintf(f, "[");
-
- if (anchor & ANCHOR_BEGIN_BUF) {
- fprintf(f, "begin-buf");
- q = 1;
- }
- if (anchor & ANCHOR_BEGIN_LINE) {
- if (q) fprintf(f, ", ");
- q = 1;
- fprintf(f, "begin-line");
- }
- if (anchor & ANCHOR_BEGIN_POSITION) {
- if (q) fprintf(f, ", ");
- q = 1;
- fprintf(f, "begin-pos");
- }
- if (anchor & ANCHOR_END_BUF) {
- if (q) fprintf(f, ", ");
- q = 1;
- fprintf(f, "end-buf");
- }
- if (anchor & ANCHOR_SEMI_END_BUF) {
- if (q) fprintf(f, ", ");
- q = 1;
- fprintf(f, "semi-end-buf");
- }
- if (anchor & ANCHOR_END_LINE) {
- if (q) fprintf(f, ", ");
- q = 1;
- fprintf(f, "end-line");
- }
- if (anchor & ANCHOR_ANYCHAR_STAR) {
- if (q) fprintf(f, ", ");
- q = 1;
- fprintf(f, "anychar-star");
- }
- if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
- if (q) fprintf(f, ", ");
- fprintf(f, "anychar-star-pl");
- }
-
- fprintf(f, "]");
-}
-
-static void
-print_optimize_info(FILE* f, regex_t* reg)
-{
- static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
- "EXACT_IC", "MAP" };
-
- fprintf(f, "optimize: %s\n", on[reg->optimize]);
- fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
- if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
- print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
- fprintf(f, "\n");
-
- if (reg->optimize) {
- fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);
- fprintf(f, "\n");
- }
- fprintf(f, "\n");
-
- if (reg->exact) {
- UChar *p;
- fprintf(f, "exact: [");
- for (p = reg->exact; p < reg->exact_end; p++) {
- fputc(*p, f);
- }
- fprintf(f, "]: length: %d\n", (reg->exact_end - reg->exact));
- }
- else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
- int c, i, n = 0;
-
- for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
- if (reg->map[i]) n++;
-
- fprintf(f, "map: n=%d\n", n);
- if (n > 0) {
- c = 0;
- fputc('[', f);
- for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
- if (reg->map[i] != 0) {
- if (c > 0) fputs(", ", f);
- c++;
- if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
- ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
- fputc(i, f);
- else
- fprintf(f, "%d", i);
- }
- }
- fprintf(f, "]\n");
- }
- }
-}
-#endif /* ONIG_DEBUG */
-
-
-extern void
-onig_free_body(regex_t* reg)
-{
- if (IS_NOT_NULL(reg)) {
- if (IS_NOT_NULL(reg->p)) xfree(reg->p);
- if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);
- if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
- if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
- if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
- if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
-
-#ifdef USE_NAMED_GROUP
- onig_names_free(reg);
-#endif
- }
-}
-
-extern void
-onig_free(regex_t* reg)
-{
- if (IS_NOT_NULL(reg)) {
- onig_free_body(reg);
- xfree(reg);
- }
-}
-
-size_t
-onig_memsize(regex_t *reg)
-{
- size_t size = sizeof(regex_t);
- if (IS_NOT_NULL(reg->p)) size += reg->alloc;
- if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact;
- if (IS_NOT_NULL(reg->int_map)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
- if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
- if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc * sizeof(OnigRepeatRange);
- if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain);
-
- return size;
-}
-
-#define REGEX_TRANSFER(to,from) do {\
- (to)->state = ONIG_STATE_MODIFY;\
- onig_free_body(to);\
- xmemcpy(to, from, sizeof(regex_t));\
- xfree(from);\
-} while (0)
-
-extern void
-onig_transfer(regex_t* to, regex_t* from)
-{
- THREAD_ATOMIC_START;
- REGEX_TRANSFER(to, from);
- THREAD_ATOMIC_END;
-}
-
-#define REGEX_CHAIN_HEAD(reg) do {\
- while (IS_NOT_NULL((reg)->chain)) {\
- (reg) = (reg)->chain;\
- }\
-} while (0)
-
-extern void
-onig_chain_link_add(regex_t* to, regex_t* add)
-{
- THREAD_ATOMIC_START;
- REGEX_CHAIN_HEAD(to);
- to->chain = add;
- THREAD_ATOMIC_END;
-}
-
-extern void
-onig_chain_reduce(regex_t* reg)
-{
- regex_t *head, *prev;
-
- prev = reg;
- head = prev->chain;
- if (IS_NOT_NULL(head)) {
- reg->state = ONIG_STATE_MODIFY;
- while (IS_NOT_NULL(head->chain)) {
- prev = head;
- head = head->chain;
- }
- prev->chain = (regex_t* )NULL;
- REGEX_TRANSFER(reg, head);
- }
-}
-
-#ifdef ONIG_DEBUG
-static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg));
-#endif
-#ifdef ONIG_DEBUG_PARSE_TREE
-static void print_tree P_((FILE* f, Node* node));
-#endif
-
-extern int
-onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
- OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
-{
-#define COMPILE_INIT_SIZE 20
-
- int r, init_size;
- Node* root;
- ScanEnv scan_env = {0};
-#ifdef USE_SUBEXP_CALL
- UnsetAddrList uslist;
-#endif
-
- if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
-
- scan_env.sourcefile = sourcefile;
- scan_env.sourceline = sourceline;
- reg->state = ONIG_STATE_COMPILING;
-
-#ifdef ONIG_DEBUG
- print_enc_string(stderr, reg->enc, pattern, pattern_end);
-#endif
-
- if (reg->alloc == 0) {
- init_size = (pattern_end - pattern) * 2;
- if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
- r = BBUF_INIT(reg, init_size);
- if (r != 0) goto end;
- }
- else
- reg->used = 0;
-
- reg->num_mem = 0;
- reg->num_repeat = 0;
- reg->num_null_check = 0;
- reg->repeat_range_alloc = 0;
- reg->repeat_range = (OnigRepeatRange* )NULL;
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- reg->num_comb_exp_check = 0;
-#endif
-
- r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
- if (r != 0) goto err;
-
-#ifdef USE_NAMED_GROUP
- /* mixed use named group and no-named group */
- if (scan_env.num_named > 0 &&
- IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
- !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
- if (scan_env.num_named != scan_env.num_mem)
- r = disable_noname_group_capture(&root, reg, &scan_env);
- else
- r = numbered_ref_check(root);
-
- if (r != 0) goto err;
- }
-#endif
-
-#ifdef USE_SUBEXP_CALL
- if (scan_env.num_call > 0) {
- r = unset_addr_list_init(&uslist, scan_env.num_call);
- if (r != 0) goto err;
- scan_env.unset_addr_list = &uslist;
- r = setup_subexp_call(root, &scan_env);
- if (r != 0) goto err_unset;
- r = subexp_recursive_check_trav(root, &scan_env);
- if (r < 0) goto err_unset;
- r = subexp_inf_recursive_check_trav(root, &scan_env);
- if (r != 0) goto err_unset;
-
- reg->num_call = scan_env.num_call;
- }
- else
- reg->num_call = 0;
-#endif
-
- r = setup_tree(root, reg, 0, &scan_env);
- if (r != 0) goto err_unset;
-
-#ifdef ONIG_DEBUG_PARSE_TREE
- print_tree(stderr, root);
-#endif
-
- reg->capture_history = scan_env.capture_history;
- reg->bt_mem_start = scan_env.bt_mem_start;
- reg->bt_mem_start |= reg->capture_history;
- if (IS_FIND_CONDITION(reg->options))
- BIT_STATUS_ON_ALL(reg->bt_mem_end);
- else {
- reg->bt_mem_end = scan_env.bt_mem_end;
- reg->bt_mem_end |= reg->capture_history;
- }
-
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- if (scan_env.backrefed_mem == 0
-#ifdef USE_SUBEXP_CALL
- || scan_env.num_call == 0
-#endif
- ) {
- setup_comb_exp_check(root, 0, &scan_env);
-#ifdef USE_SUBEXP_CALL
- if (scan_env.has_recursion != 0) {
- scan_env.num_comb_exp_check = 0;
- }
- else
-#endif
- if (scan_env.comb_exp_max_regnum > 0) {
- int i;
- for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
- if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
- scan_env.num_comb_exp_check = 0;
- break;
- }
- }
- }
- }
-
- reg->num_comb_exp_check = scan_env.num_comb_exp_check;
-#endif
-
- clear_optimize_info(reg);
-#ifndef ONIG_DONT_OPTIMIZE
- r = set_optimize_info_from_tree(root, reg, &scan_env);
- if (r != 0) goto err_unset;
-#endif
-
- if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
- xfree(scan_env.mem_nodes_dynamic);
- scan_env.mem_nodes_dynamic = (Node** )NULL;
- }
-
- r = compile_tree(root, reg);
- if (r == 0) {
- r = add_opcode(reg, OP_END);
-#ifdef USE_SUBEXP_CALL
- if (scan_env.num_call > 0) {
- r = unset_addr_list_fix(&uslist, reg);
- unset_addr_list_end(&uslist);
- if (r) goto err;
- }
-#endif
-
- if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
- reg->stack_pop_level = STACK_POP_LEVEL_ALL;
- else {
- if (reg->bt_mem_start != 0)
- reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
- else
- reg->stack_pop_level = STACK_POP_LEVEL_FREE;
- }
- }
-#ifdef USE_SUBEXP_CALL
- else if (scan_env.num_call > 0) {
- unset_addr_list_end(&uslist);
- }
-#endif
- onig_node_free(root);
-
-#ifdef ONIG_DEBUG_COMPILE
-#ifdef USE_NAMED_GROUP
- onig_print_names(stderr, reg);
-#endif
- print_compiled_byte_code_list(stderr, reg);
-#endif
-
- end:
- reg->state = ONIG_STATE_NORMAL;
- return r;
-
- err_unset:
-#ifdef USE_SUBEXP_CALL
- if (scan_env.num_call > 0) {
- unset_addr_list_end(&uslist);
- }
-#endif
- err:
- if (IS_NOT_NULL(scan_env.error)) {
- if (IS_NOT_NULL(einfo)) {
- einfo->enc = scan_env.enc;
- einfo->par = scan_env.error;
- einfo->par_end = scan_env.error_end;
- }
- }
-
- onig_node_free(root);
- if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
- xfree(scan_env.mem_nodes_dynamic);
- return r;
-}
-
-#ifdef USE_RECOMPILE_API
-extern int
-onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
- OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
- OnigErrorInfo* einfo)
-{
- int r;
- regex_t *new_reg;
-
- r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo);
- if (r) return r;
- if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
- onig_transfer(reg, new_reg);
- }
- else {
- onig_chain_link_add(reg, new_reg);
- }
- return 0;
-}
-#endif
-
-static int onig_inited = 0;
-
-extern int
-onig_reg_init(regex_t* reg, OnigOptionType option,
- OnigCaseFoldType case_fold_flag,
- OnigEncoding enc, const OnigSyntaxType* syntax)
-{
- if (! onig_inited)
- onig_init();
-
- if (IS_NULL(reg))
- return ONIGERR_INVALID_ARGUMENT;
-
- if (ONIGENC_IS_UNDEF(enc))
- return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED;
-
- if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
- == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
- return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
- }
-
- (reg)->state = ONIG_STATE_MODIFY;
-
- if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
- option |= syntax->options;
- option &= ~ONIG_OPTION_SINGLELINE;
- }
- else
- option |= syntax->options;
-
- (reg)->enc = enc;
- (reg)->options = option;
- (reg)->syntax = syntax;
- (reg)->optimize = 0;
- (reg)->exact = (UChar* )NULL;
- (reg)->int_map = (int* )NULL;
- (reg)->int_map_backward = (int* )NULL;
- (reg)->chain = (regex_t* )NULL;
-
- (reg)->p = (UChar* )NULL;
- (reg)->alloc = 0;
- (reg)->used = 0;
- (reg)->name_table = (void* )NULL;
-
- (reg)->case_fold_flag = case_fold_flag;
- return 0;
-}
-
-extern int
-onig_new_without_alloc(regex_t* reg, const UChar* pattern,
- const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
- OnigSyntaxType* syntax, OnigErrorInfo* einfo)
-{
- int r;
-
- r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
- if (r) return r;
-
- r = onig_compile(reg, pattern, pattern_end, einfo, NULL, 0);
- return r;
-}
-
-extern int
-onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
- OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
- OnigErrorInfo* einfo)
-{
- int r;
-
- *reg = (regex_t* )xmalloc(sizeof(regex_t));
- if (IS_NULL(*reg)) return ONIGERR_MEMORY;
-
- r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
- if (r) goto err;
-
- r = onig_compile(*reg, pattern, pattern_end, einfo, NULL, 0);
- if (r) {
- err:
- onig_free(*reg);
- *reg = NULL;
- }
- return r;
-}
-
-
-extern int
-onig_init(void)
-{
- if (onig_inited != 0)
- return 0;
-
- THREAD_SYSTEM_INIT;
- THREAD_ATOMIC_START;
-
- onig_inited = 1;
-
- onigenc_init();
- /* onigenc_set_default_caseconv_table((UChar* )0); */
-
-#ifdef ONIG_DEBUG_STATISTICS
- onig_statistics_init();
-#endif
-
- THREAD_ATOMIC_END;
- return 0;
-}
-
-
-extern int
-onig_end(void)
-{
- THREAD_ATOMIC_START;
-
-#ifdef ONIG_DEBUG_STATISTICS
- onig_print_statistics(stderr);
-#endif
-
-#ifdef USE_SHARED_CCLASS_TABLE
- onig_free_shared_cclass_table();
-#endif
-
-#ifdef USE_PARSE_TREE_NODE_RECYCLE
- onig_free_node_list();
-#endif
-
- onig_inited = 0;
-
- THREAD_ATOMIC_END;
- THREAD_SYSTEM_END;
- return 0;
-}
-#endif //ENABLE_REGEXP
-
-#ifdef INCLUDE_ENCODING
-extern int
-onig_is_in_code_range(const UChar* p, OnigCodePoint code)
-{
- OnigCodePoint n, *data;
- OnigCodePoint low, high, x;
-
- GET_CODE_POINT(n, p);
- data = (OnigCodePoint* )p;
- data++;
-
- for (low = 0, high = n; low < high; ) {
- x = (low + high) >> 1;
- if (code > data[x * 2 + 1])
- low = x + 1;
- else
- high = x;
- }
-
- return ((low < n && code >= data[low * 2]) ? 1 : 0);
-}
-#endif //INCLUDE_ENCODING
-
-#ifdef ENABLE_REGEXP
-extern int
-onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc)
-{
- int found;
-
- if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
- if (IS_NULL(cc->mbuf)) {
- found = 0;
- }
- else {
- found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
- }
- }
- else {
- found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
- }
-
- if (IS_NCCLASS_NOT(cc))
- return !found;
- else
- return found;
-}
-
-extern int
-onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
-{
- int len;
-
- if (ONIGENC_MBC_MINLEN(enc) > 1) {
- len = 2;
- }
- else {
- len = ONIGENC_CODE_TO_MBCLEN(enc, code);
- }
- return onig_is_code_in_cc_len(len, code, cc);
-}
-
-
-#ifdef ONIG_DEBUG
-
-/* arguments type */
-#define ARG_SPECIAL -1
-#define ARG_NON 0
-#define ARG_RELADDR 1
-#define ARG_ABSADDR 2
-#define ARG_LENGTH 3
-#define ARG_MEMNUM 4
-#define ARG_OPTION 5
-#define ARG_STATE_CHECK 6
-
-OnigOpInfoType OnigOpInfo[] = {
- { OP_FINISH, "finish", ARG_NON },
- { OP_END, "end", ARG_NON },
- { OP_EXACT1, "exact1", ARG_SPECIAL },
- { OP_EXACT2, "exact2", ARG_SPECIAL },
- { OP_EXACT3, "exact3", ARG_SPECIAL },
- { OP_EXACT4, "exact4", ARG_SPECIAL },
- { OP_EXACT5, "exact5", ARG_SPECIAL },
- { OP_EXACTN, "exactn", ARG_SPECIAL },
- { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL },
- { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL },
- { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL },
- { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL },
- { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL },
- { OP_EXACTMBN, "exactmbn", ARG_SPECIAL },
- { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL },
- { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL },
- { OP_CCLASS, "cclass", ARG_SPECIAL },
- { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL },
- { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL },
- { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
- { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
- { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
- { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL },
- { OP_ANYCHAR, "anychar", ARG_NON },
- { OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
- { OP_ANYCHAR_STAR, "anychar*", ARG_NON },
- { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
- { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
- { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
- { OP_WORD, "word", ARG_NON },
- { OP_NOT_WORD, "not-word", ARG_NON },
- { OP_WORD_BOUND, "word-bound", ARG_NON },
- { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
- { OP_WORD_BEGIN, "word-begin", ARG_NON },
- { OP_WORD_END, "word-end", ARG_NON },
- { OP_BEGIN_BUF, "begin-buf", ARG_NON },
- { OP_END_BUF, "end-buf", ARG_NON },
- { OP_BEGIN_LINE, "begin-line", ARG_NON },
- { OP_END_LINE, "end-line", ARG_NON },
- { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
- { OP_BEGIN_POSITION, "begin-position", ARG_NON },
- { OP_BACKREF1, "backref1", ARG_NON },
- { OP_BACKREF2, "backref2", ARG_NON },
- { OP_BACKREFN, "backrefn", ARG_MEMNUM },
- { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
- { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
- { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
- { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL },
- { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
- { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
- { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
- { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
- { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
- { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
- { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
- { OP_SET_OPTION, "set-option", ARG_OPTION },
- { OP_FAIL, "fail", ARG_NON },
- { OP_JUMP, "jump", ARG_RELADDR },
- { OP_PUSH, "push", ARG_RELADDR },
- { OP_POP, "pop", ARG_NON },
- { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
- { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
- { OP_REPEAT, "repeat", ARG_SPECIAL },
- { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
- { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
- { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
- { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
- { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
- { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },
- { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
- { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
- { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
- { OP_PUSH_POS, "push-pos", ARG_NON },
- { OP_POP_POS, "pop-pos", ARG_NON },
- { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
- { OP_FAIL_POS, "fail-pos", ARG_NON },
- { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
- { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
- { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
- { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
- { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
- { OP_CALL, "call", ARG_ABSADDR },
- { OP_RETURN, "return", ARG_NON },
- { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },
- { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
- { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },
- { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },
- { OP_STATE_CHECK_ANYCHAR_ML_STAR,
- "state-check-anychar-ml*", ARG_STATE_CHECK },
- { -1, "", ARG_NON }
-};
-
-static char*
-op2name(int opcode)
-{
- int i;
-
- for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
- if (opcode == OnigOpInfo[i].opcode)
- return OnigOpInfo[i].name;
- }
- return "";
-}
-
-static int
-op2arg_type(int opcode)
-{
- int i;
-
- for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
- if (opcode == OnigOpInfo[i].opcode)
- return OnigOpInfo[i].arg_type;
- }
- return ARG_SPECIAL;
-}
-
-static void
-Indent(FILE* f, int indent)
-{
- int i;
- for (i = 0; i < indent; i++) putc(' ', f);
-}
-
-static void
-p_string(FILE* f, int len, UChar* s)
-{
- fputs(":", f);
- while (len-- > 0) { fputc(*s++, f); }
-}
-
-static void
-p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
-{
- int x = len * mb_len;
-
- fprintf(f, ":%d:", len);
- while (x-- > 0) { fputc(*s++, f); }
-}
-
-extern void
-onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
- OnigEncoding enc)
-{
- int i, n, arg_type;
- RelAddrType addr;
- LengthType len;
- MemNumType mem;
- StateCheckNumType scn;
- OnigCodePoint code;
- UChar *q;
-
- fprintf(f, "[%s", op2name(*bp));
- arg_type = op2arg_type(*bp);
- if (arg_type != ARG_SPECIAL) {
- bp++;
- switch (arg_type) {
- case ARG_NON:
- break;
- case ARG_RELADDR:
- GET_RELADDR_INC(addr, bp);
- fprintf(f, ":(%d)", addr);
- break;
- case ARG_ABSADDR:
- GET_ABSADDR_INC(addr, bp);
- fprintf(f, ":(%d)", addr);
- break;
- case ARG_LENGTH:
- GET_LENGTH_INC(len, bp);
- fprintf(f, ":%d", len);
- break;
- case ARG_MEMNUM:
- mem = *((MemNumType* )bp);
- bp += SIZE_MEMNUM;
- fprintf(f, ":%d", mem);
- break;
- case ARG_OPTION:
- {
- OnigOptionType option = *((OnigOptionType* )bp);
- bp += SIZE_OPTION;
- fprintf(f, ":%d", option);
- }
- break;
-
- case ARG_STATE_CHECK:
- scn = *((StateCheckNumType* )bp);
- bp += SIZE_STATE_CHECK_NUM;
- fprintf(f, ":%d", scn);
- break;
- }
- }
- else {
- switch (*bp++) {
- case OP_EXACT1:
- case OP_ANYCHAR_STAR_PEEK_NEXT:
- case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
- p_string(f, 1, bp++); break;
- case OP_EXACT2:
- p_string(f, 2, bp); bp += 2; break;
- case OP_EXACT3:
- p_string(f, 3, bp); bp += 3; break;
- case OP_EXACT4:
- p_string(f, 4, bp); bp += 4; break;
- case OP_EXACT5:
- p_string(f, 5, bp); bp += 5; break;
- case OP_EXACTN:
- GET_LENGTH_INC(len, bp);
- p_len_string(f, len, 1, bp);
- bp += len;
- break;
-
- case OP_EXACTMB2N1:
- p_string(f, 2, bp); bp += 2; break;
- case OP_EXACTMB2N2:
- p_string(f, 4, bp); bp += 4; break;
- case OP_EXACTMB2N3:
- p_string(f, 6, bp); bp += 6; break;
- case OP_EXACTMB2N:
- GET_LENGTH_INC(len, bp);
- p_len_string(f, len, 2, bp);
- bp += len * 2;
- break;
- case OP_EXACTMB3N:
- GET_LENGTH_INC(len, bp);
- p_len_string(f, len, 3, bp);
- bp += len * 3;
- break;
- case OP_EXACTMBN:
- {
- int mb_len;
-
- GET_LENGTH_INC(mb_len, bp);
- GET_LENGTH_INC(len, bp);
- fprintf(f, ":%d:%d:", mb_len, len);
- n = len * mb_len;
- while (n-- > 0) { fputc(*bp++, f); }
- }
- break;
-
- case OP_EXACT1_IC:
- len = enclen(enc, bp, bpend);
- p_string(f, len, bp);
- bp += len;
- break;
- case OP_EXACTN_IC:
- GET_LENGTH_INC(len, bp);
- p_len_string(f, len, 1, bp);
- bp += len;
- break;
-
- case OP_CCLASS:
- n = bitset_on_num((BitSetRef )bp);
- bp += SIZE_BITSET;
- fprintf(f, ":%d", n);
- break;
-
- case OP_CCLASS_NOT:
- n = bitset_on_num((BitSetRef )bp);
- bp += SIZE_BITSET;
- fprintf(f, ":%d", n);
- break;
-
- case OP_CCLASS_MB:
- case OP_CCLASS_MB_NOT:
- GET_LENGTH_INC(len, bp);
- q = bp;
-#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
- ALIGNMENT_RIGHT(q);
-#endif
- GET_CODE_POINT(code, q);
- bp += len;
- fprintf(f, ":%d:%d", (int )code, len);
- break;
-
- case OP_CCLASS_MIX:
- case OP_CCLASS_MIX_NOT:
- n = bitset_on_num((BitSetRef )bp);
- bp += SIZE_BITSET;
- GET_LENGTH_INC(len, bp);
- q = bp;
-#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
- ALIGNMENT_RIGHT(q);
-#endif
- GET_CODE_POINT(code, q);
- bp += len;
- fprintf(f, ":%d:%d:%d", n, (int )code, len);
- break;
-
- case OP_CCLASS_NODE:
- {
- CClassNode *cc;
-
- GET_POINTER_INC(cc, bp);
- n = bitset_on_num(cc->bs);
- fprintf(f, ":%u:%d", (unsigned int )cc, n);
- }
- break;
-
- case OP_BACKREFN_IC:
- mem = *((MemNumType* )bp);
- bp += SIZE_MEMNUM;
- fprintf(f, ":%d", mem);
- break;
-
- case OP_BACKREF_MULTI_IC:
- case OP_BACKREF_MULTI:
- fputs(" ", f);
- GET_LENGTH_INC(len, bp);
- for (i = 0; i < len; i++) {
- GET_MEMNUM_INC(mem, bp);
- if (i > 0) fputs(", ", f);
- fprintf(f, "%d", mem);
- }
- break;
-
- case OP_BACKREF_WITH_LEVEL:
- {
- OnigOptionType option;
- LengthType level;
-
- GET_OPTION_INC(option, bp);
- fprintf(f, ":%d", option);
- GET_LENGTH_INC(level, bp);
- fprintf(f, ":%d", level);
-
- fputs(" ", f);
- GET_LENGTH_INC(len, bp);
- for (i = 0; i < len; i++) {
- GET_MEMNUM_INC(mem, bp);
- if (i > 0) fputs(", ", f);
- fprintf(f, "%d", mem);
- }
- }
- break;
-
- case OP_REPEAT:
- case OP_REPEAT_NG:
- {
- mem = *((MemNumType* )bp);
- bp += SIZE_MEMNUM;
- addr = *((RelAddrType* )bp);
- bp += SIZE_RELADDR;
- fprintf(f, ":%d:%d", mem, addr);
- }
- break;
-
- case OP_PUSH_OR_JUMP_EXACT1:
- case OP_PUSH_IF_PEEK_NEXT:
- addr = *((RelAddrType* )bp);
- bp += SIZE_RELADDR;
- fprintf(f, ":(%d)", addr);
- p_string(f, 1, bp);
- bp += 1;
- break;
-
- case OP_LOOK_BEHIND:
- GET_LENGTH_INC(len, bp);
- fprintf(f, ":%d", len);
- break;
-
- case OP_PUSH_LOOK_BEHIND_NOT:
- GET_RELADDR_INC(addr, bp);
- GET_LENGTH_INC(len, bp);
- fprintf(f, ":%d:(%d)", len, addr);
- break;
-
- case OP_STATE_CHECK_PUSH:
- case OP_STATE_CHECK_PUSH_OR_JUMP:
- scn = *((StateCheckNumType* )bp);
- bp += SIZE_STATE_CHECK_NUM;
- addr = *((RelAddrType* )bp);
- bp += SIZE_RELADDR;
- fprintf(f, ":%d:(%d)", scn, addr);
- break;
-
- default:
- fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
- *--bp);
- }
- }
- fputs("]", f);
- if (nextp) *nextp = bp;
-}
-
-static void
-print_compiled_byte_code_list(FILE* f, regex_t* reg)
-{
- int ncode;
- UChar* bp = reg->p;
- UChar* end = reg->p + reg->used;
-
- fprintf(f, "code length: %d\n", reg->used);
-
- ncode = 0;
- while (bp < end) {
- ncode++;
- if (bp > reg->p) {
- if (ncode % 5 == 0)
- fprintf(f, "\n");
- else
- fputs(" ", f);
- }
- onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
- }
-
- fprintf(f, "\n");
-}
-
-static void
-print_indent_tree(FILE* f, Node* node, int indent)
-{
- int i, type;
- int add = 3;
- UChar* p;
-
- Indent(f, indent);
- if (IS_NULL(node)) {
- fprintf(f, "ERROR: null node!!!\n");
- exit (0);
- }
-
- type = NTYPE(node);
- switch (type) {
- case NT_LIST:
- case NT_ALT:
- if (NTYPE(node) == NT_LIST)
- fprintf(f, "<list:%x>\n", (int )node);
- else
- fprintf(f, "<alt:%x>\n", (int )node);
-
- print_indent_tree(f, NCAR(node), indent + add);
- while (IS_NOT_NULL(node = NCDR(node))) {
- if (NTYPE(node) != type) {
- fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
- exit(0);
- }
- print_indent_tree(f, NCAR(node), indent + add);
- }
- break;
-
- case NT_STR:
- fprintf(f, "<string%s:%x>",
- (NSTRING_IS_RAW(node) ? "-raw" : ""), (int )node);
- for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
- if (*p >= 0x20 && *p < 0x7f)
- fputc(*p, f);
- else {
- fprintf(f, " 0x%02x", *p);
- }
- }
- break;
-
- case NT_CCLASS:
- fprintf(f, "<cclass:%x>", (int )node);
- if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f);
- if (NCCLASS(node)->mbuf) {
- BBuf* bbuf = NCCLASS(node)->mbuf;
- for (i = 0; i < bbuf->used; i++) {
- if (i > 0) fprintf(f, ",");
- fprintf(f, "%0x", bbuf->p[i]);
- }
- }
- break;
-
- case NT_CTYPE:
- fprintf(f, "<ctype:%x> ", (int )node);
- switch (NCTYPE(node)->ctype) {
- case ONIGENC_CTYPE_WORD:
- if (NCTYPE(node)->is_not != 0)
- fputs("not word", f);
- else
- fputs("word", f);
- break;
-
- default:
- fprintf(f, "ERROR: undefined ctype.\n");
- exit(0);
- }
- break;
-
- case NT_CANY:
- fprintf(f, "<anychar:%x>", (int )node);
- break;
-
- case NT_ANCHOR:
- fprintf(f, "<anchor:%x> ", (int )node);
- switch (NANCHOR(node)->type) {
- case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
- case ANCHOR_END_BUF: fputs("end buf", f); break;
- case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;
- case ANCHOR_END_LINE: fputs("end line", f); break;
- case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
- case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
-
- case ANCHOR_WORD_BOUND: fputs("word bound", f); break;
- case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break;
-#ifdef USE_WORD_BEGIN_END
- case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
- case ANCHOR_WORD_END: fputs("word end", f); break;
-#endif
- case ANCHOR_PREC_READ: fputs("prec read", f); break;
- case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); break;
- case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); break;
- case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); break;
-
- default:
- fprintf(f, "ERROR: undefined anchor type.\n");
- break;
- }
- break;
-
- case NT_BREF:
- {
- int* p;
- BRefNode* br = NBREF(node);
- p = BACKREFS_P(br);
- fprintf(f, "<backref:%x>", (int )node);
- for (i = 0; i < br->back_num; i++) {
- if (i > 0) fputs(", ", f);
- fprintf(f, "%d", p[i]);
- }
- }
- break;
-
-#ifdef USE_SUBEXP_CALL
- case NT_CALL:
- {
- CallNode* cn = NCALL(node);
- fprintf(f, "<call:%x>", (int )node);
- p_string(f, cn->name_end - cn->name, cn->name);
- }
- break;
-#endif
-
- case NT_QTFR:
- fprintf(f, "<quantifier:%x>{%d,%d}%s\n", (int )node,
- NQTFR(node)->lower, NQTFR(node)->upper,
- (NQTFR(node)->greedy ? "" : "?"));
- print_indent_tree(f, NQTFR(node)->target, indent + add);
- break;
-
- case NT_ENCLOSE:
- fprintf(f, "<enclose:%x> ", (int )node);
- switch (NENCLOSE(node)->type) {
- case ENCLOSE_OPTION:
- fprintf(f, "option:%d\n", NENCLOSE(node)->option);
- print_indent_tree(f, NENCLOSE(node)->target, indent + add);
- break;
- case ENCLOSE_MEMORY:
- fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
- break;
- case ENCLOSE_STOP_BACKTRACK:
- fprintf(f, "stop-bt");
- break;
-
- default:
- break;
- }
- fprintf(f, "\n");
- print_indent_tree(f, NENCLOSE(node)->target, indent + add);
- break;
-
- default:
- fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
- break;
- }
-
- if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
- type != NT_ENCLOSE)
- fprintf(f, "\n");
- fflush(f);
-}
-#endif /* ONIG_DEBUG */
-
-#ifdef ONIG_DEBUG_PARSE_TREE
-static void
-print_tree(FILE* f, Node* node)
-{
- print_indent_tree(f, node, 0);
-}
-#endif
-#endif //ENABLE_REGEXP
diff --git a/src/regenc.c b/src/regenc.c
deleted file mode 100644
index 4cc496782..000000000
--- a/src/regenc.c
+++ /dev/null
@@ -1,901 +0,0 @@
-/**********************************************************************
- regenc.c - Oniguruma (regular expression library)
-**********************************************************************/
-/*-
- * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "mruby.h"
-#ifdef INCLUDE_ENCODING
-#include <string.h>
-#include "regint.h"
-
-OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
-
-extern int
-onigenc_init(void)
-{
- return 0;
-}
-
-extern OnigEncoding
-onigenc_get_default_encoding(void)
-{
- return OnigEncDefaultCharEncoding;
-}
-
-extern int
-onigenc_set_default_encoding(OnigEncoding enc)
-{
- OnigEncDefaultCharEncoding = enc;
- return 0;
-}
-
-extern int
-onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc)
-{
- int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e);
- if (ONIGENC_MBCLEN_CHARFOUND_P(ret))
- return ONIGENC_MBCLEN_CHARFOUND_LEN(ret);
- else if (ONIGENC_MBCLEN_NEEDMORE_P(ret))
- return (int)(e-p)+ONIGENC_MBCLEN_NEEDMORE_LEN(ret);
- return 1;
-}
-
-extern UChar*
-onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end)
-{
- UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);
- if (p < s) {
- p += enclen(enc, p, end);
- }
- return p;
-}
-
-extern UChar*
-onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
- const UChar* start, const UChar* s, const UChar* end, const UChar** prev)
-{
- UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);
-
- if (p < s) {
- if (prev) *prev = (const UChar* )p;
- p += enclen(enc, p, end);
- }
- else {
- if (prev) *prev = (const UChar* )NULL; /* Sorry */
- }
- return p;
-}
-
-extern UChar*
-onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end)
-{
- if (s <= start)
- return (UChar* )NULL;
-
- return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end);
-}
-
-extern UChar*
-onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end, int n)
-{
- while (ONIG_IS_NOT_NULL(s) && n-- > 0) {
- if (s <= start)
- return (UChar* )NULL;
-
- s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end);
- }
- return (UChar* )s;
-}
-
-extern UChar*
-onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n)
-{
- UChar* q = (UChar* )p;
- while (n-- > 0) {
- q += ONIGENC_MBC_ENC_LEN(enc, q, end);
- }
- return (q <= end ? q : NULL);
-}
-
-extern int
-onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end)
-{
- int n = 0;
- UChar* q = (UChar* )p;
-
- while (q < end) {
- q += ONIGENC_MBC_ENC_LEN(enc, q, end);
- n++;
- }
- return n;
-}
-
-extern int
-onigenc_strlen_null(OnigEncoding enc, const UChar* s)
-{
- int n = 0;
- UChar* p = (UChar* )s;
- UChar* e;
-
- while (1) {
- if (*p == '\0') {
- UChar* q;
- int len = ONIGENC_MBC_MINLEN(enc);
-
- if (len == 1) return n;
- q = p + 1;
- while (len > 1) {
- if (*q != '\0') break;
- q++;
- len--;
- }
- if (len == 1) return n;
- }
- e = p + ONIGENC_MBC_MAXLEN(enc);
- p += ONIGENC_MBC_ENC_LEN(enc, p, e);
- n++;
- }
-}
-
-extern int
-onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
-{
- UChar* start = (UChar* )s;
- UChar* p = (UChar* )s;
- UChar* e;
-
- while (1) {
- if (*p == '\0') {
- UChar* q;
- int len = ONIGENC_MBC_MINLEN(enc);
-
- if (len == 1) return (int )(p - start);
- q = p + 1;
- while (len > 1) {
- if (*q != '\0') break;
- q++;
- len--;
- }
- if (len == 1) return (int )(p - start);
- }
- e = p + ONIGENC_MBC_MAXLEN(enc);
- p += ONIGENC_MBC_ENC_LEN(enc, p, e);
- }
-}
-
-const UChar OnigEncAsciiToLowerCaseTable[] = {
- '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
- '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
- '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
- '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
- '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
- '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
- '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
- '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
- '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
- '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
- '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
- '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
- '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
- '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
- '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
- '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
- '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
- '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
- '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
- '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
- '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
- '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
- '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
- '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
- '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
- '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
- '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
- '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
- '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
- '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
- '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
- '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
-};
-
-#ifdef USE_UPPER_CASE_TABLE
-const UChar OnigEncAsciiToUpperCaseTable[256] = {
- '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
- '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
- '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
- '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
- '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
- '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
- '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
- '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
- '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
- '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
- '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
- '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
- '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
- '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
- '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
- '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
- '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
- '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
- '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
- '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
- '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
- '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
- '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
- '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
- '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
- '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
- '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
- '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
- '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
- '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
- '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
- '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
-};
-#endif
-
-const unsigned short OnigEncAsciiCtypeTable[256] = {
- 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
- 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
- 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
- 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
- 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
- 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
- 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
- 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
- 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
- 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
- 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
- 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
- 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
- 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
- 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
- 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
-};
-
-const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
- '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
- '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
- '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
- '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
- '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
- '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
- '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
- '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
- '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
- '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
- '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
- '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
- '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
- '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
- '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
- '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
- '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
- '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
- '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
- '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
- '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
- '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
- '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
- '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
- '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
- '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
- '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
- '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
- '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
- '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
- '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
- '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
-};
-
-#ifdef USE_UPPER_CASE_TABLE
-const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
- '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
- '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
- '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
- '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
- '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
- '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
- '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
- '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
- '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
- '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
- '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
- '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
- '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
- '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
- '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
- '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
- '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
- '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
- '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
- '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
- '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
- '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
- '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
- '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
- '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
- '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
- '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
- '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
- '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
- '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
- '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367',
- '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377',
-};
-#endif
-
-extern void
-onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED)
-{
- /* nothing */
- /* obsoleted. */
-}
-
-extern UChar*
-onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end)
-{
- return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);
-}
-
-const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = {
- { 0x41, 0x61 },
- { 0x42, 0x62 },
- { 0x43, 0x63 },
- { 0x44, 0x64 },
- { 0x45, 0x65 },
- { 0x46, 0x66 },
- { 0x47, 0x67 },
- { 0x48, 0x68 },
- { 0x49, 0x69 },
- { 0x4a, 0x6a },
- { 0x4b, 0x6b },
- { 0x4c, 0x6c },
- { 0x4d, 0x6d },
- { 0x4e, 0x6e },
- { 0x4f, 0x6f },
- { 0x50, 0x70 },
- { 0x51, 0x71 },
- { 0x52, 0x72 },
- { 0x53, 0x73 },
- { 0x54, 0x74 },
- { 0x55, 0x75 },
- { 0x56, 0x76 },
- { 0x57, 0x77 },
- { 0x58, 0x78 },
- { 0x59, 0x79 },
- { 0x5a, 0x7a }
-};
-
-extern int
-onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
- OnigApplyAllCaseFoldFunc f, void* arg,
- OnigEncoding enc ARG_UNUSED)
-{
- OnigCodePoint code;
- int i, r;
-
- for (i = 0;
- i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes));
- i++) {
- code = OnigAsciiLowerMap[i].to;
- r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg);
- if (r != 0) return r;
-
- code = OnigAsciiLowerMap[i].from;
- r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg);
- if (r != 0) return r;
- }
-
- return 0;
-}
-
-extern int
-onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,
- const OnigUChar* p, const OnigUChar* end ARG_UNUSED, OnigCaseFoldCodeItem items[],
- OnigEncoding enc ARG_UNUSED)
-{
- if (0x41 <= *p && *p <= 0x5a) {
- items[0].byte_len = 1;
- items[0].code_len = 1;
- items[0].code[0] = (OnigCodePoint )(*p + 0x20);
- return 1;
- }
- else if (0x61 <= *p && *p <= 0x7a) {
- items[0].byte_len = 1;
- items[0].code_len = 1;
- items[0].code[0] = (OnigCodePoint )(*p - 0x20);
- return 1;
- }
- else
- return 0;
-}
-
-static int
-ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
- OnigApplyAllCaseFoldFunc f, void* arg)
-{
- OnigCodePoint ss[] = { 0x73, 0x73 };
-
- return (*f)((OnigCodePoint )0xdf, ss, 2, arg);
-}
-
-extern int
-onigenc_apply_all_case_fold_with_map(int map_size,
- const OnigPairCaseFoldCodes map[],
- int ess_tsett_flag, OnigCaseFoldType flag,
- OnigApplyAllCaseFoldFunc f, void* arg)
-{
- OnigCodePoint code;
- int i, r;
-
- r = onigenc_ascii_apply_all_case_fold(flag, f, arg, 0);
- if (r != 0) return r;
-
- for (i = 0; i < map_size; i++) {
- code = map[i].to;
- r = (*f)(map[i].from, &code, 1, arg);
- if (r != 0) return r;
-
- code = map[i].from;
- r = (*f)(map[i].to, &code, 1, arg);
- if (r != 0) return r;
- }
-
- if (ess_tsett_flag != 0)
- return ss_apply_all_case_fold(flag, f, arg);
-
- return 0;
-}
-
-extern int
-onigenc_get_case_fold_codes_by_str_with_map(int map_size,
- const OnigPairCaseFoldCodes map[],
- int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED,
- const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
-{
- if (0x41 <= *p && *p <= 0x5a) {
- items[0].byte_len = 1;
- items[0].code_len = 1;
- items[0].code[0] = (OnigCodePoint )(*p + 0x20);
- if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1
- && (*(p+1) == 0x53 || *(p+1) == 0x73)) {
- /* SS */
- items[1].byte_len = 2;
- items[1].code_len = 1;
- items[1].code[0] = (OnigCodePoint )0xdf;
- return 2;
- }
- else
- return 1;
- }
- else if (0x61 <= *p && *p <= 0x7a) {
- items[0].byte_len = 1;
- items[0].code_len = 1;
- items[0].code[0] = (OnigCodePoint )(*p - 0x20);
- if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1
- && (*(p+1) == 0x73 || *(p+1) == 0x53)) {
- /* ss */
- items[1].byte_len = 2;
- items[1].code_len = 1;
- items[1].code[0] = (OnigCodePoint )0xdf;
- return 2;
- }
- else
- return 1;
- }
- else if (*p == 0xdf && ess_tsett_flag != 0) {
- items[0].byte_len = 1;
- items[0].code_len = 2;
- items[0].code[0] = (OnigCodePoint )'s';
- items[0].code[1] = (OnigCodePoint )'s';
-
- items[1].byte_len = 1;
- items[1].code_len = 2;
- items[1].code[0] = (OnigCodePoint )'S';
- items[1].code[1] = (OnigCodePoint )'S';
-
- items[2].byte_len = 1;
- items[2].code_len = 2;
- items[2].code[0] = (OnigCodePoint )'s';
- items[2].code[1] = (OnigCodePoint )'S';
-
- items[3].byte_len = 1;
- items[3].code_len = 2;
- items[3].code[0] = (OnigCodePoint )'S';
- items[3].code[1] = (OnigCodePoint )'s';
-
- return 4;
- }
- else {
- int i;
-
- for (i = 0; i < map_size; i++) {
- if (*p == map[i].from) {
- items[0].byte_len = 1;
- items[0].code_len = 1;
- items[0].code[0] = map[i].to;
- return 1;
- }
- else if (*p == map[i].to) {
- items[0].byte_len = 1;
- items[0].code_len = 1;
- items[0].code[0] = map[i].from;
- return 1;
- }
- }
- }
-
- return 0;
-}
-
-
-extern int
-onigenc_not_support_get_ctype_code_range(OnigCtype ctype,
- OnigCodePoint* sb_out, const OnigCodePoint* ranges[],
- OnigEncoding enc)
-{
- return ONIG_NO_SUPPORT_CONFIG;
-}
-
-extern int
-onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end, OnigEncoding enc ARG_UNUSED)
-{
- if (p < end) {
- if (*p == 0x0a) return 1;
- }
- return 0;
-}
-
-/* for single byte encodings */
-extern int
-onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p,
- const UChar*end, UChar* lower, OnigEncoding enc ARG_UNUSED)
-{
- *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);
-
- (*p)++;
- return 1; /* return byte length of converted char to lower */
-}
-
-extern int
-onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED, const UChar* e ARG_UNUSED,
- OnigEncoding enc ARG_UNUSED)
-{
- return 1;
-}
-
-extern OnigCodePoint
-onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED,
- OnigEncoding enc ARG_UNUSED)
-{
- return (OnigCodePoint )(*p);
-}
-
-extern int
-onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED, OnigEncoding enc ARG_UNUSED)
-{
- return 1;
-}
-
-extern int
-onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED)
-{
- *buf = (UChar )(code & 0xff);
- return 1;
-}
-
-extern UChar*
-onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED, const UChar* s,
- const UChar* end,
- OnigEncoding enc ARG_UNUSED)
-{
- return (UChar* )s;
-}
-
-extern int
-onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED, const UChar* end ARG_UNUSED,
- OnigEncoding enc ARG_UNUSED)
-{
- return TRUE;
-}
-
-extern int
-onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED, const UChar* end ARG_UNUSED,
- OnigEncoding enc ARG_UNUSED)
-{
- return FALSE;
-}
-
-extern int
-onigenc_ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype,
- OnigEncoding enc ARG_UNUSED)
-{
- if (code < 128)
- return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
- else
- return FALSE;
-}
-
-extern OnigCodePoint
-onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
-{
- int c, i, len;
- OnigCodePoint n;
-
- len = enclen(enc, p, end);
- n = (OnigCodePoint )(*p++);
- if (len == 1) return n;
-
- for (i = 1; i < len; i++) {
- if (p >= end) break;
- c = *p++;
- n <<= 8; n += c;
- }
- return n;
-}
-
-extern int
-onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,
- const UChar** pp, const UChar* end ARG_UNUSED,
- UChar* lower)
-{
- int len;
- const UChar *p = *pp;
-
- if (ONIGENC_IS_MBC_ASCII(p)) {
- *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
- (*pp)++;
- return 1;
- }
- else {
- int i;
-
- len = enclen(enc, p, end);
- for (i = 0; i < len; i++) {
- *lower++ = *p++;
- }
- (*pp) += len;
- return len; /* return byte length of converted to lower char */
- }
-}
-
-extern int
-onigenc_mb2_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
-{
- if ((code & 0xff00) != 0) return 2;
- else return 1;
-}
-
-extern int
-onigenc_mb4_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
-{
- if ((code & 0xff000000) != 0) return 4;
- else if ((code & 0xff0000) != 0) return 3;
- else if ((code & 0xff00) != 0) return 2;
- else return 1;
-}
-
-extern int
-onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
-{
- UChar *p = buf;
-
- if ((code & 0xff00) != 0) {
- *p++ = (UChar )((code >> 8) & 0xff);
- }
- *p++ = (UChar )(code & 0xff);
-
- if (enclen(enc, buf, p) != (p - buf))
- return ONIGERR_INVALID_CODE_POINT_VALUE;
- return (int)(p - buf);
-}
-
-extern int
-onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
-{
- UChar *p = buf;
-
- if ((code & 0xff000000) != 0) {
- *p++ = (UChar )((code >> 24) & 0xff);
- }
- if ((code & 0xff0000) != 0 || p != buf) {
- *p++ = (UChar )((code >> 16) & 0xff);
- }
- if ((code & 0xff00) != 0 || p != buf) {
- *p++ = (UChar )((code >> 8) & 0xff);
- }
- *p++ = (UChar )(code & 0xff);
-
- if (enclen(enc, buf, p) != (p - buf))
- return ONIGERR_INVALID_CODE_POINT_VALUE;
- return (int)(p - buf);
-}
-
-extern int
-onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
-{
- static const PosixBracketEntryType PBS[] = {
- PosixBracketEntryInit("Alnum", ONIGENC_CTYPE_ALNUM),
- PosixBracketEntryInit("Alpha", ONIGENC_CTYPE_ALPHA),
- PosixBracketEntryInit("Blank", ONIGENC_CTYPE_BLANK),
- PosixBracketEntryInit("Cntrl", ONIGENC_CTYPE_CNTRL),
- PosixBracketEntryInit("Digit", ONIGENC_CTYPE_DIGIT),
- PosixBracketEntryInit("Graph", ONIGENC_CTYPE_GRAPH),
- PosixBracketEntryInit("Lower", ONIGENC_CTYPE_LOWER),
- PosixBracketEntryInit("Print", ONIGENC_CTYPE_PRINT),
- PosixBracketEntryInit("Punct", ONIGENC_CTYPE_PUNCT),
- PosixBracketEntryInit("Space", ONIGENC_CTYPE_SPACE),
- PosixBracketEntryInit("Upper", ONIGENC_CTYPE_UPPER),
- PosixBracketEntryInit("XDigit", ONIGENC_CTYPE_XDIGIT),
- PosixBracketEntryInit("ASCII", ONIGENC_CTYPE_ASCII),
- PosixBracketEntryInit("Word", ONIGENC_CTYPE_WORD),
- };
-
- const PosixBracketEntryType *pb, *pbe;
- int len;
-
- len = onigenc_strlen(enc, p, end);
- for (pbe = (pb = PBS) + sizeof(PBS)/sizeof(PBS[0]); pb < pbe; ++pb) {
- if (len == pb->len &&
- onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0)
- return pb->ctype;
- }
-
- return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
-}
-
-extern int
-onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
- unsigned int ctype)
-{
- if (code < 128)
- return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
- else {
- if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
- return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
- }
- }
-
- return FALSE;
-}
-
-extern int
-onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
- unsigned int ctype)
-{
- if (code < 128)
- return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
- else {
- if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
- return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
- }
- }
-
- return FALSE;
-}
-
-extern int
-onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end,
- const UChar* sascii /* ascii */, int n)
-{
- int x, c;
-
- while (n-- > 0) {
- if (p >= end) return (int )(*sascii);
-
- c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);
- x = *sascii - c;
- if (x) return x;
-
- sascii++;
- p += enclen(enc, p, end);
- }
- return 0;
-}
-
-/* Property management */
-static int
-resize_property_list(int new_size, const OnigCodePoint*** plist, int* psize)
-{
- size_t size;
- const OnigCodePoint **list = *plist;
-
- size = sizeof(OnigCodePoint*) * new_size;
- if (IS_NULL(list)) {
- list = (const OnigCodePoint** )xmalloc(size);
- }
- else {
- list = (const OnigCodePoint** )xrealloc((void* )list, size);
- }
-
- if (IS_NULL(list)) return ONIGERR_MEMORY;
-
- *plist = list;
- *psize = new_size;
-
- return 0;
-}
-
-extern int
-onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop,
- hash_table_type **table, const OnigCodePoint*** plist, int *pnum,
- int *psize)
-{
-#define PROP_INIT_SIZE 16
-
- int r;
-
- if (*psize <= *pnum) {
- int new_size = (*psize == 0 ? PROP_INIT_SIZE : *psize * 2);
- r = resize_property_list(new_size, plist, psize);
- if (r != 0) return r;
- }
-
- (*plist)[*pnum] = prop;
-
- if (ONIG_IS_NULL(*table)) {
- *table = onig_st_init_strend_table_with_size(PROP_INIT_SIZE);
- if (ONIG_IS_NULL(*table)) return ONIGERR_MEMORY;
- }
-
- *pnum = *pnum + 1;
- onig_st_insert_strend(*table, name, name + strlen((char* )name),
- (hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE));
- return 0;
-}
-
-extern int
-onigenc_property_list_init(int (*f)(void))
-{
- int r;
-
- THREAD_ATOMIC_START;
-
- r = f();
-
- THREAD_ATOMIC_END;
- return r;
-}
-#endif //INCLUDE_ENCODING
diff --git a/src/regenc.h b/src/regenc.h
deleted file mode 100644
index 4e827d181..000000000
--- a/src/regenc.h
+++ /dev/null
@@ -1,203 +0,0 @@
-#ifndef ONIGURUMA_REGENC_H
-#define ONIGURUMA_REGENC_H
-/**********************************************************************
- regenc.h - Oniguruma (regular expression library)
-**********************************************************************/
-/*-
- * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <stdio.h>
-#include <stdarg.h>
-#define RUBY
-
-#ifndef mrb_compile_warn
-#define mrb_compile_warn(a,b,c,d) printf(c,d)
-#endif
-
-#ifndef REGINT_H
-#ifdef ONIG_ESCAPE_UCHAR_COLLISION
-#undef ONIG_ESCAPE_UCHAR_COLLISION
-#endif
-#endif
-#include "oniguruma.h"
-
-typedef struct {
- OnigCodePoint from;
- OnigCodePoint to;
-} OnigPairCaseFoldCodes;
-
-
-#ifndef ARG_UNUSED
-#if defined(__GNUC__)
-# define ARG_UNUSED __attribute__ ((unused))
-#else
-# define ARG_UNUSED
-#endif
-#endif
-
-#define ONIG_IS_NULL(p) (((void*)(p)) == (void*)0)
-#define ONIG_IS_NOT_NULL(p) (((void*)(p)) != (void*)0)
-#define ONIG_CHECK_NULL_RETURN(p) if (ONIG_IS_NULL(p)) return NULL
-#define ONIG_CHECK_NULL_RETURN_VAL(p,val) if (ONIG_IS_NULL(p)) return (val)
-
-#define enclen(enc,p,e) ((enc->max_enc_len == enc->min_enc_len) ? enc->min_enc_len : ONIGENC_MBC_ENC_LEN(enc,p,e))
-
-/* character types bit flag */
-#define BIT_CTYPE_NEWLINE (1<< ONIGENC_CTYPE_NEWLINE)
-#define BIT_CTYPE_ALPHA (1<< ONIGENC_CTYPE_ALPHA)
-#define BIT_CTYPE_BLANK (1<< ONIGENC_CTYPE_BLANK)
-#define BIT_CTYPE_CNTRL (1<< ONIGENC_CTYPE_CNTRL)
-#define BIT_CTYPE_DIGIT (1<< ONIGENC_CTYPE_DIGIT)
-#define BIT_CTYPE_GRAPH (1<< ONIGENC_CTYPE_GRAPH)
-#define BIT_CTYPE_LOWER (1<< ONIGENC_CTYPE_LOWER)
-#define BIT_CTYPE_PRINT (1<< ONIGENC_CTYPE_PRINT)
-#define BIT_CTYPE_PUNCT (1<< ONIGENC_CTYPE_PUNCT)
-#define BIT_CTYPE_SPACE (1<< ONIGENC_CTYPE_SPACE)
-#define BIT_CTYPE_UPPER (1<< ONIGENC_CTYPE_UPPER)
-#define BIT_CTYPE_XDIGIT (1<< ONIGENC_CTYPE_XDIGIT)
-#define BIT_CTYPE_WORD (1<< ONIGENC_CTYPE_WORD)
-#define BIT_CTYPE_ALNUM (1<< ONIGENC_CTYPE_ALNUM)
-#define BIT_CTYPE_ASCII (1<< ONIGENC_CTYPE_ASCII)
-
-#define CTYPE_TO_BIT(ctype) (1<<(ctype))
-#define CTYPE_IS_WORD_GRAPH_PRINT(ctype) \
- ((ctype) == ONIGENC_CTYPE_WORD || (ctype) == ONIGENC_CTYPE_GRAPH ||\
- (ctype) == ONIGENC_CTYPE_PRINT)
-
-
-typedef struct {
- const UChar *name;
- int ctype;
- short int len;
-} PosixBracketEntryType;
-
-#define PosixBracketEntryInit(name, ctype) {(const UChar *)name, ctype, (short int)(sizeof(name) - 1)}
-
-/* #define USE_CRNL_AS_LINE_TERMINATOR */
-#define USE_UNICODE_PROPERTIES
-/* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */
-/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTF#18 */
-
-
-#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII
-
-/* for encoding system implementation (internal) */
-ONIG_EXTERN int onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc);
-ONIG_EXTERN int onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[], OnigEncoding enc);
-ONIG_EXTERN int onigenc_apply_all_case_fold_with_map(int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg);
-ONIG_EXTERN int onigenc_get_case_fold_codes_by_str_with_map(int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]);
-ONIG_EXTERN int onigenc_not_support_get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[], OnigEncoding enc);
-ONIG_EXTERN int onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end, OnigEncoding enc);
-
-/* methods for single byte encoding */
-ONIG_EXTERN int onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower, OnigEncoding enc);
-ONIG_EXTERN int onigenc_single_byte_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc);
-ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc);
-ONIG_EXTERN int onigenc_single_byte_code_to_mbclen(OnigCodePoint code, OnigEncoding enc);
-ONIG_EXTERN int onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc);
-ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head(const UChar* start, const UChar* s, const OnigUChar* end, OnigEncoding enc);
-ONIG_EXTERN int onigenc_always_true_is_allowed_reverse_match(const UChar* s, const UChar* end, OnigEncoding enc);
-ONIG_EXTERN int onigenc_always_false_is_allowed_reverse_match(const UChar* s, const UChar* end, OnigEncoding enc);
-ONIG_EXTERN int onigenc_ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc);
-
-/* methods for multi byte encoding */
-ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end);
-ONIG_EXTERN int onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower);
-ONIG_EXTERN int onigenc_mb2_code_to_mbclen(OnigCodePoint code, OnigEncoding enc);
-ONIG_EXTERN int onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf);
-ONIG_EXTERN int onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end);
-ONIG_EXTERN int onigenc_unicode_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end);
-ONIG_EXTERN int onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code, unsigned int ctype);
-ONIG_EXTERN int onigenc_mb4_code_to_mbclen(OnigCodePoint code, OnigEncoding enc);
-ONIG_EXTERN int onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf);
-ONIG_EXTERN int onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code, unsigned int ctype);
-
-
-/* in enc/unicode.c */
-ONIG_EXTERN int onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc);
-ONIG_EXTERN int onigenc_utf16_32_get_ctype_code_range(OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[], OnigEncoding enc);
-ONIG_EXTERN int onigenc_unicode_ctype_code_range(int ctype, const OnigCodePoint* ranges[]);
-ONIG_EXTERN int onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]);
-ONIG_EXTERN int onigenc_unicode_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold);
-ONIG_EXTERN int onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc);
-
-
-#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8)
-#define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc)
-
-#define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \
- OnigEncISO_8859_1_ToLowerCaseTable[c]
-#define ONIGENC_ISO_8859_1_TO_UPPER_CASE(c) \
- OnigEncISO_8859_1_ToUpperCaseTable[c]
-
-ONIG_EXTERN const UChar OnigEncISO_8859_1_ToLowerCaseTable[];
-ONIG_EXTERN const UChar OnigEncISO_8859_1_ToUpperCaseTable[];
-
-ONIG_EXTERN int
-onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n);
-ONIG_EXTERN UChar*
-onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n);
-
-/* defined in regexec.c, but used in enc/xxx.c */
-extern int onig_is_in_code_range (const UChar* p, OnigCodePoint code);
-
-ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding;
-ONIG_EXTERN const UChar OnigEncAsciiToLowerCaseTable[];
-ONIG_EXTERN const UChar OnigEncAsciiToUpperCaseTable[];
-ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[];
-
-#define ONIGENC_IS_ASCII_CODE(code) ((code) < 0x80)
-#define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) OnigEncAsciiToLowerCaseTable[c]
-#define ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) OnigEncAsciiToUpperCaseTable[c]
-#define ONIGENC_IS_ASCII_CODE_CTYPE(code,ctype) \
- ((OnigEncAsciiCtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
-#define ONIGENC_IS_ASCII_CODE_CASE_AMBIG(code) \
- (ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_UPPER) ||\
- ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_LOWER))
-
-#ifdef ONIG_ENC_REGISTER
-extern int ONIG_ENC_REGISTER(const char *, OnigEncodingType*);
-#define OnigEncodingName(n) encoding_##n
-#define OnigEncodingDeclare(n) static OnigEncodingType OnigEncodingName(n)
-#define OnigEncodingDefine(f,n) \
- OnigEncodingDeclare(n); \
- void Init_##f(void) { \
- ONIG_ENC_REGISTER(OnigEncodingName(n).name, \
- &OnigEncodingName(n)); \
- } \
- OnigEncodingDeclare(n)
-#else
-#define OnigEncodingName(n) OnigEncoding##n
-#define OnigEncodingDeclare(n) OnigEncodingType OnigEncodingName(n)
-#define OnigEncodingDefine(f,n) OnigEncodingDeclare(n)
-#endif
-
-/* macros for define replica encoding and encoding alias */
-#define ENC_REPLICATE(name, orig)
-#define ENC_ALIAS(name, orig)
-#define ENC_DUMMY(name)
-
-#endif /* ONIGURUMA_REGENC_H */
diff --git a/src/regerror.c b/src/regerror.c
deleted file mode 100644
index df60b49cc..000000000
--- a/src/regerror.c
+++ /dev/null
@@ -1,375 +0,0 @@
-/**********************************************************************
- regerror.c - Oniguruma (regular expression library)
-**********************************************************************/
-/*-
- * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "mruby.h"
-#ifdef ENABLE_REGEXP
-#include <string.h>
-#include "regint.h"
-#include <stdio.h> /* for vsnprintf() */
-#include <stdarg.h>
-
-extern UChar*
-onig_error_code_to_format(int code)
-{
- const char *p;
-
- if (code >= 0) return (UChar* )0;
-
- switch (code) {
- case ONIG_MISMATCH:
- p = "mismatch"; break;
- case ONIG_NO_SUPPORT_CONFIG:
- p = "no support in this configuration"; break;
- case ONIGERR_MEMORY:
- p = "failed to allocate memory"; break;
- case ONIGERR_MATCH_STACK_LIMIT_OVER:
- p = "match-stack limit over"; break;
- case ONIGERR_TYPE_BUG:
- p = "undefined type (bug)"; break;
- case ONIGERR_PARSER_BUG:
- p = "internal parser error (bug)"; break;
- case ONIGERR_STACK_BUG:
- p = "stack error (bug)"; break;
- case ONIGERR_UNDEFINED_BYTECODE:
- p = "undefined bytecode (bug)"; break;
- case ONIGERR_UNEXPECTED_BYTECODE:
- p = "unexpected bytecode (bug)"; break;
- case ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED:
- p = "default multibyte-encoding is not setted"; break;
- case ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR:
- p = "can't convert to wide-char on specified multibyte-encoding"; break;
- case ONIGERR_INVALID_ARGUMENT:
- p = "invalid argument"; break;
- case ONIGERR_END_PATTERN_AT_LEFT_BRACE:
- p = "end pattern at left brace"; break;
- case ONIGERR_END_PATTERN_AT_LEFT_BRACKET:
- p = "end pattern at left bracket"; break;
- case ONIGERR_EMPTY_CHAR_CLASS:
- p = "empty char-class"; break;
- case ONIGERR_PREMATURE_END_OF_CHAR_CLASS:
- p = "premature end of char-class"; break;
- case ONIGERR_END_PATTERN_AT_ESCAPE:
- p = "end pattern at escape"; break;
- case ONIGERR_END_PATTERN_AT_META:
- p = "end pattern at meta"; break;
- case ONIGERR_END_PATTERN_AT_CONTROL:
- p = "end pattern at control"; break;
- case ONIGERR_META_CODE_SYNTAX:
- p = "invalid meta-code syntax"; break;
- case ONIGERR_CONTROL_CODE_SYNTAX:
- p = "invalid control-code syntax"; break;
- case ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE:
- p = "char-class value at end of range"; break;
- case ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE:
- p = "char-class value at start of range"; break;
- case ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS:
- p = "unmatched range specifier in char-class"; break;
- case ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED:
- p = "target of repeat operator is not specified"; break;
- case ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID:
- p = "target of repeat operator is invalid"; break;
- case ONIGERR_NESTED_REPEAT_OPERATOR:
- p = "nested repeat operator"; break;
- case ONIGERR_UNMATCHED_CLOSE_PARENTHESIS:
- p = "unmatched close parenthesis"; break;
- case ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS:
- p = "end pattern with unmatched parenthesis"; break;
- case ONIGERR_END_PATTERN_IN_GROUP:
- p = "end pattern in group"; break;
- case ONIGERR_UNDEFINED_GROUP_OPTION:
- p = "undefined group option"; break;
- case ONIGERR_INVALID_POSIX_BRACKET_TYPE:
- p = "invalid POSIX bracket type"; break;
- case ONIGERR_INVALID_LOOK_BEHIND_PATTERN:
- p = "invalid pattern in look-behind"; break;
- case ONIGERR_INVALID_REPEAT_RANGE_PATTERN:
- p = "invalid repeat range {lower,upper}"; break;
- case ONIGERR_TOO_BIG_NUMBER:
- p = "too big number"; break;
- case ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE:
- p = "too big number for repeat range"; break;
- case ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE:
- p = "upper is smaller than lower in repeat range"; break;
- case ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS:
- p = "empty range in char class"; break;
- case ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE:
- p = "mismatch multibyte code length in char-class range"; break;
- case ONIGERR_TOO_MANY_MULTI_BYTE_RANGES:
- p = "too many multibyte code ranges are specified"; break;
- case ONIGERR_TOO_SHORT_MULTI_BYTE_STRING:
- p = "too short multibyte code string"; break;
- case ONIGERR_TOO_BIG_BACKREF_NUMBER:
- p = "too big backref number"; break;
- case ONIGERR_INVALID_BACKREF:
-#ifdef USE_NAMED_GROUP
- p = "invalid backref number/name"; break;
-#else
- p = "invalid backref number"; break;
-#endif
- case ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED:
- p = "numbered backref/call is not allowed. (use name)"; break;
- case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
- p = "too big wide-char value"; break;
- case ONIGERR_TOO_LONG_WIDE_CHAR_VALUE:
- p = "too long wide-char value"; break;
- case ONIGERR_INVALID_CODE_POINT_VALUE:
- p = "invalid code point value"; break;
- case ONIGERR_EMPTY_GROUP_NAME:
- p = "group name is empty"; break;
- case ONIGERR_INVALID_GROUP_NAME:
- p = "invalid group name <%n>"; break;
- case ONIGERR_INVALID_CHAR_IN_GROUP_NAME:
-#ifdef USE_NAMED_GROUP
- p = "invalid char in group name <%n>"; break;
-#else
- p = "invalid char in group number <%n>"; break;
-#endif
- case ONIGERR_UNDEFINED_NAME_REFERENCE:
- p = "undefined name <%n> reference"; break;
- case ONIGERR_UNDEFINED_GROUP_REFERENCE:
- p = "undefined group <%n> reference"; break;
- case ONIGERR_MULTIPLEX_DEFINED_NAME:
- p = "multiplex defined name <%n>"; break;
- case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL:
- p = "multiplex definition name <%n> call"; break;
- case ONIGERR_NEVER_ENDING_RECURSION:
- p = "never ending recursion"; break;
- case ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY:
- p = "group number is too big for capture history"; break;
- case ONIGERR_INVALID_CHAR_PROPERTY_NAME:
- p = "invalid character property name {%n}"; break;
- case ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION:
- p = "not supported encoding combination"; break;
- case ONIGERR_INVALID_COMBINATION_OF_OPTIONS:
- p = "invalid combination of options"; break;
- case ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT:
- p = "over thread pass limit count"; break;
-
- default:
- p = "undefined error code"; break;
- }
-
- return (UChar* )p;
-}
-
-static void sprint_byte(char* s, unsigned int v)
-{
- sprintf(s, "%02x", (v & 0377));
-}
-
-static void sprint_byte_with_x(char* s, unsigned int v)
-{
- sprintf(s, "\\x%02x", (v & 0377));
-}
-
-static int to_ascii(OnigEncoding enc, UChar *s, UChar *end,
- UChar buf[], int buf_size, int *is_over)
-{
- int len;
- UChar *p;
- OnigCodePoint code;
-
- if (ONIGENC_MBC_MINLEN(enc) > 1) {
- p = s;
- len = 0;
- while (p < end) {
- code = ONIGENC_MBC_TO_CODE(enc, p, end);
- if (code >= 0x80) {
- if (code > 0xffff && len + 10 <= buf_size) {
- sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 24));
- sprint_byte((char*)(&(buf[len+4])), (unsigned int)(code >> 16));
- sprint_byte((char*)(&(buf[len+6])), (unsigned int)(code >> 8));
- sprint_byte((char*)(&(buf[len+8])), (unsigned int)code);
- len += 10;
- }
- else if (len + 6 <= buf_size) {
- sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 8));
- sprint_byte((char*)(&(buf[len+4])), (unsigned int)code);
- len += 6;
- }
- else {
- break;
- }
- }
- else {
- buf[len++] = (UChar )code;
- }
-
- p += enclen(enc, p, end);
- if (len >= buf_size) break;
- }
-
- *is_over = ((p < end) ? 1 : 0);
- }
- else {
- len = (int)MIN((end - s), buf_size);
- xmemcpy(buf, s, (size_t )len);
- *is_over = ((buf_size < (end - s)) ? 1 : 0);
- }
-
- return len;
-}
-
-
-/* for ONIG_MAX_ERROR_MESSAGE_LEN */
-#define MAX_ERROR_PAR_LEN 30
-
-extern int
-onig_error_code_to_str(UChar* s, int code, ...)
-{
- UChar *p, *q;
- OnigErrorInfo* einfo;
- size_t len;
- int is_over;
- UChar parbuf[MAX_ERROR_PAR_LEN];
- va_list vargs;
-
- va_start(vargs, code);
-
- switch (code) {
- case ONIGERR_UNDEFINED_NAME_REFERENCE:
- case ONIGERR_UNDEFINED_GROUP_REFERENCE:
- case ONIGERR_MULTIPLEX_DEFINED_NAME:
- case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL:
- case ONIGERR_INVALID_GROUP_NAME:
- case ONIGERR_INVALID_CHAR_IN_GROUP_NAME:
- case ONIGERR_INVALID_CHAR_PROPERTY_NAME:
- einfo = va_arg(vargs, OnigErrorInfo*);
- len = to_ascii(einfo->enc, einfo->par, einfo->par_end,
- parbuf, MAX_ERROR_PAR_LEN - 3, &is_over);
- q = onig_error_code_to_format(code);
- p = s;
- while (*q != '\0') {
- if (*q == '%') {
- q++;
- if (*q == 'n') { /* '%n': name */
- xmemcpy(p, parbuf, len);
- p += len;
- if (is_over != 0) {
- xmemcpy(p, "...", 3);
- p += 3;
- }
- q++;
- }
- else
- goto normal_char;
- }
- else {
- normal_char:
- *p++ = *q++;
- }
- }
- *p = '\0';
- len = p - s;
- break;
-
- default:
- q = onig_error_code_to_format(code);
- len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, q);
- xmemcpy(s, q, len);
- s[len] = '\0';
- break;
- }
-
- va_end(vargs);
- return (int)len;
-}
-
-void
-onig_vsnprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc,
- UChar* pat, UChar* pat_end, const UChar *fmt, va_list args)
-{
- size_t need;
- int n, len;
- UChar *p, *s, *bp;
- UChar bs[6];
-
- n = xvsnprintf((char* )buf, bufsize, (const char* )fmt, args);
-
- need = (pat_end - pat) * 4 + 4;
-
- if (n + need < (size_t)bufsize) {
- strcat((char* )buf, ": /");
- s = buf + onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, buf);
-
- p = pat;
- while (p < pat_end) {
- if (*p == '\\') {
- *s++ = *p++;
- len = enclen(enc, p, pat_end);
- while (len-- > 0) *s++ = *p++;
- }
- else if (*p == '/') {
- *s++ = (unsigned char )'\\';
- *s++ = *p++;
- }
- else if (ONIGENC_IS_MBC_HEAD(enc, p, pat_end)) {
- len = enclen(enc, p, pat_end);
- if (ONIGENC_MBC_MINLEN(enc) == 1) {
- while (len-- > 0) *s++ = *p++;
- }
- else { /* for UTF16 */
- int blen;
-
- while (len-- > 0) {
- sprint_byte_with_x((char* )bs, (unsigned int )(*p++));
- blen = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs);
- bp = bs;
- while (blen-- > 0) *s++ = *bp++;
- }
- }
- }
- else if (!ONIGENC_IS_CODE_PRINT(enc, *p) &&
- !ONIGENC_IS_CODE_SPACE(enc, *p)) {
- sprint_byte_with_x((char* )bs, (unsigned int )(*p++));
- len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs);
- bp = bs;
- while (len-- > 0) *s++ = *bp++;
- }
- else {
- *s++ = *p++;
- }
- }
-
- *s++ = '/';
- *s = '\0';
- }
-}
-
-void
-onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc,
- UChar* pat, UChar* pat_end, const UChar *fmt, ...)
-{
- va_list args;
- va_start(args, fmt);
- onig_vsnprintf_with_pattern(buf, bufsize, enc,
- pat, pat_end, fmt, args);
- va_end(args);
-}
-#endif //ENABLE_REGEXP
diff --git a/src/regex.h b/src/regex.h
deleted file mode 100644
index f3f797b00..000000000
--- a/src/regex.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
-** regex.h - Regexp class
-**
-** See Copyright Notice in mruby.h
-*/
-
-#ifndef ONIGURUMA_REGEX_H
-#define ONIGURUMA_REGEX_H 1
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-
-#include "oniguruma.h"
-
-#ifndef ONIG_RUBY_M17N
-
-#define mbclen(p,e,enc) mrb_enc_mbclen((p),(e),(enc))
-
-#endif /* ifndef ONIG_RUBY_M17N */
-
-#if defined(__cplusplus)
-} /* extern "C" { */
-#endif
-
-#endif /* ONIGURUMA_REGEX_H */
diff --git a/src/regexec.c b/src/regexec.c
deleted file mode 100644
index d265cc803..000000000
--- a/src/regexec.c
+++ /dev/null
@@ -1,3757 +0,0 @@
-/**********************************************************************
- regexec.c - Oniguruma (regular expression library)
-**********************************************************************/
-/*-
- * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "mruby.h"
-#ifdef ENABLE_REGEXP
-#include <string.h>
-#include "regint.h"
-
-/* #define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
-
-#ifdef USE_CRNL_AS_LINE_TERMINATOR
-#define ONIGENC_IS_MBC_CRNL(enc,p,end) \
- (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
- ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end))
-#endif
-
-#ifdef USE_CAPTURE_HISTORY
-static void history_tree_free(OnigCaptureTreeNode* node);
-
-static void
-history_tree_clear(OnigCaptureTreeNode* node)
-{
- int i;
-
- if (IS_NOT_NULL(node)) {
- for (i = 0; i < node->num_childs; i++) {
- if (IS_NOT_NULL(node->childs[i])) {
- history_tree_free(node->childs[i]);
- }
- }
- for (i = 0; i < node->allocated; i++) {
- node->childs[i] = (OnigCaptureTreeNode* )0;
- }
- node->num_childs = 0;
- node->beg = ONIG_REGION_NOTPOS;
- node->end = ONIG_REGION_NOTPOS;
- node->group = -1;
- }
-}
-
-static void
-history_tree_free(OnigCaptureTreeNode* node)
-{
- history_tree_clear(node);
- xfree(node);
-}
-
-static void
-history_root_free(OnigRegion* r)
-{
- if (IS_NOT_NULL(r->history_root)) {
- history_tree_free(r->history_root);
- r->history_root = (OnigCaptureTreeNode* )0;
- }
-}
-
-static OnigCaptureTreeNode*
-history_node_new(void)
-{
- OnigCaptureTreeNode* node;
-
- node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));
- CHECK_NULL_RETURN(node);
- node->childs = (OnigCaptureTreeNode** )0;
- node->allocated = 0;
- node->num_childs = 0;
- node->group = -1;
- node->beg = ONIG_REGION_NOTPOS;
- node->end = ONIG_REGION_NOTPOS;
-
- return node;
-}
-
-static int
-history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
-{
-#define HISTORY_TREE_INIT_ALLOC_SIZE 8
-
- if (parent->num_childs >= parent->allocated) {
- int n, i;
-
- if (IS_NULL(parent->childs)) {
- n = HISTORY_TREE_INIT_ALLOC_SIZE;
- parent->childs =
- (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n);
- }
- else {
- n = parent->allocated * 2;
- parent->childs =
- (OnigCaptureTreeNode** )xrealloc(parent->childs,
- sizeof(OnigCaptureTreeNode*) * n);
- }
- CHECK_NULL_RETURN_MEMERR(parent->childs);
- for (i = parent->allocated; i < n; i++) {
- parent->childs[i] = (OnigCaptureTreeNode* )0;
- }
- parent->allocated = n;
- }
-
- parent->childs[parent->num_childs] = child;
- parent->num_childs++;
- return 0;
-}
-
-static OnigCaptureTreeNode*
-history_tree_clone(OnigCaptureTreeNode* node)
-{
- int i;
- OnigCaptureTreeNode *clone, *child;
-
- clone = history_node_new();
- CHECK_NULL_RETURN(clone);
-
- clone->beg = node->beg;
- clone->end = node->end;
- for (i = 0; i < node->num_childs; i++) {
- child = history_tree_clone(node->childs[i]);
- if (IS_NULL(child)) {
- history_tree_free(clone);
- return (OnigCaptureTreeNode* )0;
- }
- history_tree_add_child(clone, child);
- }
-
- return clone;
-}
-
-extern OnigCaptureTreeNode*
-onig_get_capture_tree(OnigRegion* region)
-{
- return region->history_root;
-}
-#endif /* USE_CAPTURE_HISTORY */
-
-extern void
-onig_region_clear(OnigRegion* region)
-{
- int i;
-
- for (i = 0; i < region->num_regs; i++) {
- region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
- }
-#ifdef USE_CAPTURE_HISTORY
- history_root_free(region);
-#endif
-}
-
-extern int
-onig_region_resize(OnigRegion* region, int n)
-{
- region->num_regs = n;
-
- if (n < ONIG_NREGION)
- n = ONIG_NREGION;
-
- if (region->allocated == 0) {
- region->beg = (int* )xmalloc(n * sizeof(int));
- if (region->beg == 0)
- return ONIGERR_MEMORY;
-
- region->end = (int* )xmalloc(n * sizeof(int));
- if (region->end == 0) {
- xfree(region->beg);
- return ONIGERR_MEMORY;
- }
-
- region->allocated = n;
- }
- else if (region->allocated < n) {
- int *tmp;
-
- region->allocated = 0;
- tmp = (int* )xrealloc(region->beg, n * sizeof(int));
- if (tmp == 0) {
- xfree(region->beg);
- xfree(region->end);
- return ONIGERR_MEMORY;
- }
- region->beg = tmp;
- tmp = (int* )xrealloc(region->end, n * sizeof(int));
- if (tmp == 0) {
- xfree(region->beg);
- return ONIGERR_MEMORY;
- }
- region->end = tmp;
-
- if (region->beg == 0 || region->end == 0)
- return ONIGERR_MEMORY;
-
- region->allocated = n;
- }
-
- return 0;
-}
-
-static int
-onig_region_resize_clear(OnigRegion* region, int n)
-{
- int r;
-
- r = onig_region_resize(region, n);
- if (r != 0) return r;
- onig_region_clear(region);
- return 0;
-}
-
-extern int
-onig_region_set(OnigRegion* region, int at, int beg, int end)
-{
- if (at < 0) return ONIGERR_INVALID_ARGUMENT;
-
- if (at >= region->allocated) {
- int r = onig_region_resize(region, at + 1);
- if (r < 0) return r;
- }
-
- region->beg[at] = beg;
- region->end[at] = end;
- return 0;
-}
-
-extern void
-onig_region_init(OnigRegion* region)
-{
- region->num_regs = 0;
- region->allocated = 0;
- region->beg = (int* )0;
- region->end = (int* )0;
- region->history_root = (OnigCaptureTreeNode* )0;
-}
-
-extern OnigRegion*
-onig_region_new(void)
-{
- OnigRegion* r;
-
- r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
- if (r)
- onig_region_init(r);
- return r;
-}
-
-extern void
-onig_region_free(OnigRegion* r, int free_self)
-{
- if (r) {
- if (r->allocated > 0) {
- if (r->beg) xfree(r->beg);
- if (r->end) xfree(r->end);
- r->allocated = 0;
- }
-#ifdef USE_CAPTURE_HISTORY
- history_root_free(r);
-#endif
- if (free_self) xfree(r);
- }
-}
-
-extern void
-onig_region_copy(OnigRegion* to, OnigRegion* from)
-{
-#define RREGC_SIZE (sizeof(int) * from->num_regs)
- int i;
-
- if (to == from) return;
-
- onig_region_resize(to, from->num_regs);
- for (i = 0; i < from->num_regs; i++) {
- to->beg[i] = from->beg[i];
- to->end[i] = from->end[i];
- }
- to->num_regs = from->num_regs;
-
-#ifdef USE_CAPTURE_HISTORY
- history_root_free(to);
-
- if (IS_NOT_NULL(from->history_root)) {
- to->history_root = history_tree_clone(from->history_root);
- }
-#endif
-}
-
-
-/** stack **/
-#define INVALID_STACK_INDEX -1
-
-/* stack type */
-/* used by normal-POP */
-#define STK_ALT 0x0001
-#define STK_LOOK_BEHIND_NOT 0x0002
-#define STK_POS_NOT 0x0003
-/* handled by normal-POP */
-#define STK_MEM_START 0x0100
-#define STK_MEM_END 0x8200
-#define STK_REPEAT_INC 0x0300
-#define STK_STATE_CHECK_MARK 0x1000
-/* avoided by normal-POP */
-#define STK_NULL_CHECK_START 0x3000
-#define STK_NULL_CHECK_END 0x5000 /* for recursive call */
-#define STK_MEM_END_MARK 0x8400
-#define STK_POS 0x0500 /* used when POP-POS */
-#define STK_STOP_BT 0x0600 /* mark for "(?>...)" */
-#define STK_REPEAT 0x0700
-#define STK_CALL_FRAME 0x0800
-#define STK_RETURN 0x0900
-#define STK_VOID 0x0a00 /* for fill a blank */
-
-/* stack type check mask */
-#define STK_MASK_POP_USED 0x00ff
-#define STK_MASK_TO_VOID_TARGET 0x10ff
-#define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
-
-#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
-#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
- (msa).stack_p = (void* )0;\
- (msa).options = (arg_option);\
- (msa).region = (arg_region);\
- (msa).start = (arg_start);\
- (msa).best_len = ONIG_MISMATCH;\
-} while(0)
-#else
-#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
- (msa).stack_p = (void* )0;\
- (msa).options = (arg_option);\
- (msa).region = (arg_region);\
- (msa).start = (arg_start);\
-} while(0)
-#endif
-
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
-
-#define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
-
-#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \
- if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
- unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
- offset = ((offset) * (state_num)) >> 3;\
- if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
- if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) {\
- (msa).state_check_buff = (void* )xmalloc(size);\
- CHECK_NULL_RETURN_MEMERR((msa).state_check_buff);\
- }\
- else \
- (msa).state_check_buff = (void* )xalloca(size);\
- xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
- (size_t )(size - (offset))); \
- (msa).state_check_buff_size = size;\
- }\
- else {\
- (msa).state_check_buff = (void* )0;\
- (msa).state_check_buff_size = 0;\
- }\
- }\
- else {\
- (msa).state_check_buff = (void* )0;\
- (msa).state_check_buff_size = 0;\
- }\
- } while(0)
-
-#define MATCH_ARG_FREE(msa) do {\
- if ((msa).stack_p) xfree((msa).stack_p);\
- if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
- if ((msa).state_check_buff) xfree((msa).state_check_buff);\
- }\
-} while(0)
-#else
-#define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
-#endif
-
-
-
-#define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\
- if (msa->stack_p) {\
- alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num));\
- stk_alloc = (OnigStackType* )(msa->stack_p);\
- stk_base = stk_alloc;\
- stk = stk_base;\
- stk_end = stk_base + msa->stack_n;\
- }\
- else {\
- alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num)\
- + sizeof(OnigStackType) * (stack_num));\
- stk_alloc = (OnigStackType* )(alloc_addr + sizeof(char*) * (ptr_num));\
- stk_base = stk_alloc;\
- stk = stk_base;\
- stk_end = stk_base + (stack_num);\
- }\
-} while(0)
-
-#define STACK_SAVE do{\
- if (stk_base != stk_alloc) {\
- msa->stack_p = stk_base;\
- msa->stack_n = stk_end - stk_base; /* TODO: check overflow */\
- };\
-} while(0)
-
-static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
-
-extern unsigned int
-onig_get_match_stack_limit_size(void)
-{
- return MatchStackLimitSize;
-}
-
-extern int
-onig_set_match_stack_limit_size(unsigned int size)
-{
- MatchStackLimitSize = size;
- return 0;
-}
-
-static int
-stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
- OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa)
-{
- size_t n;
- OnigStackType *x, *stk_base, *stk_end, *stk;
-
- stk_base = *arg_stk_base;
- stk_end = *arg_stk_end;
- stk = *arg_stk;
-
- n = stk_end - stk_base;
- if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) {
- x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2);
- if (IS_NULL(x)) {
- STACK_SAVE;
- return ONIGERR_MEMORY;
- }
- xmemcpy(x, stk_base, n * sizeof(OnigStackType));
- n *= 2;
- }
- else {
- unsigned int limit_size = MatchStackLimitSize;
- n *= 2;
- if (limit_size != 0 && n > limit_size) {
- if ((unsigned int )(stk_end - stk_base) == limit_size)
- return ONIGERR_MATCH_STACK_LIMIT_OVER;
- else
- n = limit_size;
- }
- x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n);
- if (IS_NULL(x)) {
- STACK_SAVE;
- return ONIGERR_MEMORY;
- }
- }
- *arg_stk = x + (stk - stk_base);
- *arg_stk_base = x;
- *arg_stk_end = x + n;
- return 0;
-}
-
-#define STACK_ENSURE(n) do {\
- if (stk_end - stk < (n)) {\
- int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
- if (r != 0) { STACK_SAVE; return r; } \
- }\
-} while(0)
-
-#define STACK_AT(index) (stk_base + (index))
-#define GET_STACK_INDEX(stk) ((stk) - stk_base)
-
-#define STACK_PUSH_TYPE(stack_type) do {\
- STACK_ENSURE(1);\
- stk->type = (stack_type);\
- STACK_INC;\
-} while(0)
-
-#define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
-
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
-#define STATE_CHECK_POS(s,snum) \
- (((s) - str) * num_comb_exp_check + ((snum) - 1))
-#define STATE_CHECK_VAL(v,snum) do {\
- if (state_check_buff != NULL) {\
- int x = STATE_CHECK_POS(s,snum);\
- (v) = state_check_buff[x/8] & (1<<(x%8));\
- }\
- else (v) = 0;\
-} while(0)
-
-
-#define ELSE_IF_STATE_CHECK_MARK(stk) \
- else if ((stk)->type == STK_STATE_CHECK_MARK) { \
- int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
- state_check_buff[x/8] |= (1<<(x%8)); \
- }
-
-#define STACK_PUSH(stack_type,pat,s,sprev) do {\
- STACK_ENSURE(1);\
- stk->type = (stack_type);\
- stk->u.state.pcode = (pat);\
- stk->u.state.pstr = (s);\
- stk->u.state.pstr_prev = (sprev);\
- stk->u.state.state_check = 0;\
- STACK_INC;\
-} while(0)
-
-#define STACK_PUSH_ENSURED(stack_type,pat) do {\
- stk->type = (stack_type);\
- stk->u.state.pcode = (pat);\
- stk->u.state.state_check = 0;\
- STACK_INC;\
-} while(0)
-
-#define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum) do {\
- STACK_ENSURE(1);\
- stk->type = STK_ALT;\
- stk->u.state.pcode = (pat);\
- stk->u.state.pstr = (s);\
- stk->u.state.pstr_prev = (sprev);\
- stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
- STACK_INC;\
-} while(0)
-
-#define STACK_PUSH_STATE_CHECK(s,snum) do {\
- if (state_check_buff != NULL) {\
- STACK_ENSURE(1);\
- stk->type = STK_STATE_CHECK_MARK;\
- stk->u.state.pstr = (s);\
- stk->u.state.state_check = (snum);\
- STACK_INC;\
- }\
-} while(0)
-
-#else /* USE_COMBINATION_EXPLOSION_CHECK */
-
-#define ELSE_IF_STATE_CHECK_MARK(stk)
-
-#define STACK_PUSH(stack_type,pat,s,sprev) do {\
- STACK_ENSURE(1);\
- stk->type = (stack_type);\
- stk->u.state.pcode = (pat);\
- stk->u.state.pstr = (s);\
- stk->u.state.pstr_prev = (sprev);\
- STACK_INC;\
-} while(0)
-
-#define STACK_PUSH_ENSURED(stack_type,pat) do {\
- stk->type = (stack_type);\
- stk->u.state.pcode = (pat);\
- STACK_INC;\
-} while(0)
-#endif /* USE_COMBINATION_EXPLOSION_CHECK */
-
-#define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev)
-#define STACK_PUSH_POS(s,sprev) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev)
-#define STACK_PUSH_POS_NOT(pat,s,sprev) STACK_PUSH(STK_POS_NOT,pat,s,sprev)
-#define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT)
-#define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev) \
- STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev)
-
-#define STACK_PUSH_REPEAT(id, pat) do {\
- STACK_ENSURE(1);\
- stk->type = STK_REPEAT;\
- stk->u.repeat.num = (id);\
- stk->u.repeat.pcode = (pat);\
- stk->u.repeat.count = 0;\
- STACK_INC;\
-} while(0)
-
-#define STACK_PUSH_REPEAT_INC(sindex) do {\
- STACK_ENSURE(1);\
- stk->type = STK_REPEAT_INC;\
- stk->u.repeat_inc.si = (sindex);\
- STACK_INC;\
-} while(0)
-
-#define STACK_PUSH_MEM_START(mnum, s) do {\
- STACK_ENSURE(1);\
- stk->type = STK_MEM_START;\
- stk->u.mem.num = (mnum);\
- stk->u.mem.pstr = (s);\
- stk->u.mem.start = mem_start_stk[mnum];\
- stk->u.mem.end = mem_end_stk[mnum];\
- mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
- mem_end_stk[mnum] = INVALID_STACK_INDEX;\
- STACK_INC;\
-} while(0)
-
-#define STACK_PUSH_MEM_END(mnum, s) do {\
- STACK_ENSURE(1);\
- stk->type = STK_MEM_END;\
- stk->u.mem.num = (mnum);\
- stk->u.mem.pstr = (s);\
- stk->u.mem.start = mem_start_stk[mnum];\
- stk->u.mem.end = mem_end_stk[mnum];\
- mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
- STACK_INC;\
-} while(0)
-
-#define STACK_PUSH_MEM_END_MARK(mnum) do {\
- STACK_ENSURE(1);\
- stk->type = STK_MEM_END_MARK;\
- stk->u.mem.num = (mnum);\
- STACK_INC;\
-} while(0)
-
-#define STACK_GET_MEM_START(mnum, k) do {\
- int level = 0;\
- k = stk;\
- while (k > stk_base) {\
- k--;\
- if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
- && k->u.mem.num == (mnum)) {\
- level++;\
- }\
- else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
- if (level == 0) break;\
- level--;\
- }\
- }\
-} while(0)
-
-#define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
- int level = 0;\
- while (k < stk) {\
- if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
- if (level == 0) (start) = k->u.mem.pstr;\
- level++;\
- }\
- else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
- level--;\
- if (level == 0) {\
- (end) = k->u.mem.pstr;\
- break;\
- }\
- }\
- k++;\
- }\
-} while(0)
-
-#define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
- STACK_ENSURE(1);\
- stk->type = STK_NULL_CHECK_START;\
- stk->u.null_check.num = (cnum);\
- stk->u.null_check.pstr = (s);\
- STACK_INC;\
-} while(0)
-
-#define STACK_PUSH_NULL_CHECK_END(cnum) do {\
- STACK_ENSURE(1);\
- stk->type = STK_NULL_CHECK_END;\
- stk->u.null_check.num = (cnum);\
- STACK_INC;\
-} while(0)
-
-#define STACK_PUSH_CALL_FRAME(pat) do {\
- STACK_ENSURE(1);\
- stk->type = STK_CALL_FRAME;\
- stk->u.call_frame.ret_addr = (pat);\
- STACK_INC;\
-} while(0)
-
-#define STACK_PUSH_RETURN do {\
- STACK_ENSURE(1);\
- stk->type = STK_RETURN;\
- STACK_INC;\
-} while(0)
-
-
-#ifdef ONIG_DEBUG
-#define STACK_BASE_CHECK(p, at) \
- if ((p) < stk_base) {\
- fprintf(stderr, "at %s\n", at);\
- goto stack_error;\
- }
-#else
-#define STACK_BASE_CHECK(p, at)
-#endif
-
-#define STACK_POP_ONE do {\
- stk--;\
- STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
-} while(0)
-
-#define STACK_POP do {\
- switch (pop_level) {\
- case STACK_POP_LEVEL_FREE:\
- while (1) {\
- stk--;\
- STACK_BASE_CHECK(stk, "STACK_POP"); \
- if ((stk->type & STK_MASK_POP_USED) != 0) break;\
- ELSE_IF_STATE_CHECK_MARK(stk);\
- }\
- break;\
- case STACK_POP_LEVEL_MEM_START:\
- while (1) {\
- stk--;\
- STACK_BASE_CHECK(stk, "STACK_POP 2"); \
- if ((stk->type & STK_MASK_POP_USED) != 0) break;\
- else if (stk->type == STK_MEM_START) {\
- mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
- mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
- }\
- ELSE_IF_STATE_CHECK_MARK(stk);\
- }\
- break;\
- default:\
- while (1) {\
- stk--;\
- STACK_BASE_CHECK(stk, "STACK_POP 3"); \
- if ((stk->type & STK_MASK_POP_USED) != 0) break;\
- else if (stk->type == STK_MEM_START) {\
- mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
- mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
- }\
- else if (stk->type == STK_REPEAT_INC) {\
- STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
- }\
- else if (stk->type == STK_MEM_END) {\
- mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
- mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
- }\
- ELSE_IF_STATE_CHECK_MARK(stk);\
- }\
- break;\
- }\
-} while(0)
-
-#define STACK_POP_TIL_POS_NOT do {\
- while (1) {\
- stk--;\
- STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
- if (stk->type == STK_POS_NOT) break;\
- else if (stk->type == STK_MEM_START) {\
- mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
- mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
- }\
- else if (stk->type == STK_REPEAT_INC) {\
- STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
- }\
- else if (stk->type == STK_MEM_END) {\
- mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
- mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
- }\
- ELSE_IF_STATE_CHECK_MARK(stk);\
- }\
-} while(0)
-
-#define STACK_POP_TIL_LOOK_BEHIND_NOT do {\
- while (1) {\
- stk--;\
- STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
- if (stk->type == STK_LOOK_BEHIND_NOT) break;\
- else if (stk->type == STK_MEM_START) {\
- mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
- mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
- }\
- else if (stk->type == STK_REPEAT_INC) {\
- STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
- }\
- else if (stk->type == STK_MEM_END) {\
- mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
- mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
- }\
- ELSE_IF_STATE_CHECK_MARK(stk);\
- }\
-} while(0)
-
-#define STACK_POS_END(k) do {\
- k = stk;\
- while (1) {\
- k--;\
- STACK_BASE_CHECK(k, "STACK_POS_END"); \
- if (IS_TO_VOID_TARGET(k)) {\
- k->type = STK_VOID;\
- }\
- else if (k->type == STK_POS) {\
- k->type = STK_VOID;\
- break;\
- }\
- }\
-} while(0)
-
-#define STACK_STOP_BT_END do {\
- OnigStackType *k = stk;\
- while (1) {\
- k--;\
- STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
- if (IS_TO_VOID_TARGET(k)) {\
- k->type = STK_VOID;\
- }\
- else if (k->type == STK_STOP_BT) {\
- k->type = STK_VOID;\
- break;\
- }\
- }\
-} while(0)
-
-#define STACK_NULL_CHECK(isnull,id,s) do {\
- OnigStackType* k = stk;\
- while (1) {\
- k--;\
- STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
- if (k->type == STK_NULL_CHECK_START) {\
- if (k->u.null_check.num == (id)) {\
- (isnull) = (k->u.null_check.pstr == (s));\
- break;\
- }\
- }\
- }\
-} while(0)
-
-#define STACK_NULL_CHECK_REC(isnull,id,s) do {\
- int level = 0;\
- OnigStackType* k = stk;\
- while (1) {\
- k--;\
- STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
- if (k->type == STK_NULL_CHECK_START) {\
- if (k->u.null_check.num == (id)) {\
- if (level == 0) {\
- (isnull) = (k->u.null_check.pstr == (s));\
- break;\
- }\
- else level--;\
- }\
- }\
- else if (k->type == STK_NULL_CHECK_END) {\
- level++;\
- }\
- }\
-} while(0)
-
-#define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
- OnigStackType* k = stk;\
- while (1) {\
- k--;\
- STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
- if (k->type == STK_NULL_CHECK_START) {\
- if (k->u.null_check.num == (id)) {\
- if (k->u.null_check.pstr != (s)) {\
- (isnull) = 0;\
- break;\
- }\
- else {\
- UChar* endp;\
- (isnull) = 1;\
- while (k < stk) {\
- if (k->type == STK_MEM_START) {\
- if (k->u.mem.end == INVALID_STACK_INDEX) {\
- (isnull) = 0; break;\
- }\
- if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
- endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
- else\
- endp = (UChar* )k->u.mem.end;\
- if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
- (isnull) = 0; break;\
- }\
- else if (endp != s) {\
- (isnull) = -1; /* empty, but position changed */ \
- }\
- }\
- k++;\
- }\
- break;\
- }\
- }\
- }\
- }\
-} while(0)
-
-#define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
- int level = 0;\
- OnigStackType* k = stk;\
- while (1) {\
- k--;\
- STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
- if (k->type == STK_NULL_CHECK_START) {\
- if (k->u.null_check.num == (id)) {\
- if (level == 0) {\
- if (k->u.null_check.pstr != (s)) {\
- (isnull) = 0;\
- break;\
- }\
- else {\
- UChar* endp;\
- (isnull) = 1;\
- while (k < stk) {\
- if (k->type == STK_MEM_START) {\
- if (k->u.mem.end == INVALID_STACK_INDEX) {\
- (isnull) = 0; break;\
- }\
- if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
- endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
- else\
- endp = (UChar* )k->u.mem.end;\
- if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
- (isnull) = 0; break;\
- }\
- else if (endp != s) {\
- (isnull) = -1; /* empty, but position changed */ \
- }\
- }\
- k++;\
- }\
- break;\
- }\
- }\
- else {\
- level--;\
- }\
- }\
- }\
- else if (k->type == STK_NULL_CHECK_END) {\
- if (k->u.null_check.num == (id)) level++;\
- }\
- }\
-} while(0)
-
-#define STACK_GET_REPEAT(id, k) do {\
- int level = 0;\
- k = stk;\
- while (1) {\
- k--;\
- STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
- if (k->type == STK_REPEAT) {\
- if (level == 0) {\
- if (k->u.repeat.num == (id)) {\
- break;\
- }\
- }\
- }\
- else if (k->type == STK_CALL_FRAME) level--;\
- else if (k->type == STK_RETURN) level++;\
- }\
-} while(0)
-
-#define STACK_RETURN(addr) do {\
- int level = 0;\
- OnigStackType* k = stk;\
- while (1) {\
- k--;\
- STACK_BASE_CHECK(k, "STACK_RETURN"); \
- if (k->type == STK_CALL_FRAME) {\
- if (level == 0) {\
- (addr) = k->u.call_frame.ret_addr;\
- break;\
- }\
- else level--;\
- }\
- else if (k->type == STK_RETURN)\
- level++;\
- }\
-} while(0)
-
-
-#define STRING_CMP(s1,s2,len) do {\
- while (len-- > 0) {\
- if (*s1++ != *s2++) goto fail;\
- }\
-} while(0)
-
-#define STRING_CMP_IC(case_fold_flag,s1,ps2,len,text_end) do {\
- if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
- goto fail; \
-} while(0)
-
-static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
- UChar* s1, UChar** ps2, int mblen, const UChar* text_end)
-{
- UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];
- UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];
- UChar *p1, *p2, *end1, *s2;
- int len1, len2;
-
- s2 = *ps2;
- end1 = s1 + mblen;
- while (s1 < end1) {
- len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, text_end, buf1);
- len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, text_end, buf2);
- if (len1 != len2) return 0;
- p1 = buf1;
- p2 = buf2;
- while (len1-- > 0) {
- if (*p1 != *p2) return 0;
- p1++;
- p2++;
- }
- }
-
- *ps2 = s2;
- return 1;
-}
-
-#define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
- is_fail = 0;\
- while (len-- > 0) {\
- if (*s1++ != *s2++) {\
- is_fail = 1; break;\
- }\
- }\
-} while(0)
-
-#define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,text_end,is_fail) do {\
- if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
- is_fail = 1; \
- else \
- is_fail = 0; \
-} while(0)
-
-
-#define IS_EMPTY_STR (str == end)
-#define ON_STR_BEGIN(s) ((s) == str)
-#define ON_STR_END(s) ((s) == end)
-#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
-#define DATA_ENSURE_CHECK1 (s < right_range)
-#define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
-#define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
-#else
-#define DATA_ENSURE_CHECK1 (s < end)
-#define DATA_ENSURE_CHECK(n) (s + (n) <= end)
-#define DATA_ENSURE(n) if (s + (n) > end) goto fail
-#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
-
-
-#ifdef USE_CAPTURE_HISTORY
-static int
-make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp,
- OnigStackType* stk_top, UChar* str, regex_t* reg)
-{
- int n, r;
- OnigCaptureTreeNode* child;
- OnigStackType* k = *kp;
-
- while (k < stk_top) {
- if (k->type == STK_MEM_START) {
- n = k->u.mem.num;
- if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
- BIT_STATUS_AT(reg->capture_history, n) != 0) {
- child = history_node_new();
- CHECK_NULL_RETURN_MEMERR(child);
- child->group = n;
- child->beg = (int )(k->u.mem.pstr - str);
- r = history_tree_add_child(node, child);
- if (r != 0) return r;
- *kp = (k + 1);
- r = make_capture_history_tree(child, kp, stk_top, str, reg);
- if (r != 0) return r;
-
- k = *kp;
- child->end = (int )(k->u.mem.pstr - str);
- }
- }
- else if (k->type == STK_MEM_END) {
- if (k->u.mem.num == node->group) {
- node->end = (int )(k->u.mem.pstr - str);
- *kp = k;
- return 0;
- }
- }
- k++;
- }
-
- return 1; /* 1: root node ending. */
-}
-#endif
-
-#ifdef USE_BACKREF_WITH_LEVEL
-static int mem_is_in_memp(int mem, int num, UChar* memp)
-{
- int i;
- MemNumType m;
-
- for (i = 0; i < num; i++) {
- GET_MEMNUM_INC(m, memp);
- if (mem == (int )m) return 1;
- }
- return 0;
-}
-
-static int backref_match_at_nested_level(regex_t* reg
- , OnigStackType* top, OnigStackType* stk_base
- , int ignore_case, int case_fold_flag
- , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)
-{
- UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
- int level;
- OnigStackType* k;
-
- level = 0;
- k = top;
- k--;
- while (k >= stk_base) {
- if (k->type == STK_CALL_FRAME) {
- level--;
- }
- else if (k->type == STK_RETURN) {
- level++;
- }
- else if (level == nest) {
- if (k->type == STK_MEM_START) {
- if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
- pstart = k->u.mem.pstr;
- if (pend != NULL_UCHARP) {
- if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
- p = pstart;
- ss = *s;
-
- if (ignore_case != 0) {
- if (string_cmp_ic(reg->enc, case_fold_flag,
- pstart, &ss, (int )(pend - pstart), send) == 0)
- return 0; /* or goto next_mem; */
- }
- else {
- while (p < pend) {
- if (*p++ != *ss++) return 0; /* or goto next_mem; */
- }
- }
-
- *s = ss;
- return 1;
- }
- }
- }
- else if (k->type == STK_MEM_END) {
- if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
- pend = k->u.mem.pstr;
- }
- }
- }
- k--;
- }
-
- return 0;
-}
-#endif /* USE_BACKREF_WITH_LEVEL */
-
-
-#ifdef ONIG_DEBUG_STATISTICS
-
-#define USE_TIMEOFDAY
-
-#ifdef USE_TIMEOFDAY
-#ifdef HAVE_SYS_TIME_H
-#include <sys/time.h>
-#endif
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-static struct timeval ts, te;
-#define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
-#define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
- (((te).tv_sec - (ts).tv_sec)*1000000))
-#else
-#ifdef HAVE_SYS_TIMES_H
-#include <sys/times.h>
-#endif
-static struct tms ts, te;
-#define GETTIME(t) times(&(t))
-#define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
-#endif
-
-static int OpCounter[256];
-static int OpPrevCounter[256];
-static unsigned long OpTime[256];
-static int OpCurr = OP_FINISH;
-static int OpPrevTarget = OP_FAIL;
-static int MaxStackDepth = 0;
-
-#define MOP_IN(opcode) do {\
- if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
- OpCurr = opcode;\
- OpCounter[opcode]++;\
- GETTIME(ts);\
-} while(0)
-
-#define MOP_OUT do {\
- GETTIME(te);\
- OpTime[OpCurr] += TIMEDIFF(te, ts);\
-} while(0)
-
-extern void
-onig_statistics_init(void)
-{
- int i;
- for (i = 0; i < 256; i++) {
- OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
- }
- MaxStackDepth = 0;
-}
-
-extern void
-onig_print_statistics(FILE* f)
-{
- int i;
- fprintf(f, " count prev time\n");
- for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
- fprintf(f, "%8d: %8d: %10ld: %s\n",
- OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
- }
- fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
-}
-
-#define STACK_INC do {\
- stk++;\
- if (stk - stk_base > MaxStackDepth) \
- MaxStackDepth = stk - stk_base;\
-} while(0)
-
-#else
-#define STACK_INC stk++
-
-#define MOP_IN(opcode)
-#define MOP_OUT
-#endif
-
-
-/* matching region of POSIX API */
-typedef int regoff_t;
-
-typedef struct {
- regoff_t rm_so;
- regoff_t rm_eo;
-} posix_regmatch_t;
-
-/* match data(str - end) from position (sstart). */
-/* if sstart == str then set sprev to NULL. */
-static long
-match_at(regex_t* reg, const UChar* str, const UChar* end,
-#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
- const UChar* right_range,
-#endif
- const UChar* sstart, UChar* sprev, OnigMatchArg* msa)
-{
- static const UChar FinishCode[] = { OP_FINISH };
-
- int i, n, num_mem, best_len, pop_level;
- LengthType tlen, tlen2;
- MemNumType mem;
- RelAddrType addr;
- OnigOptionType option = reg->options;
- OnigEncoding encode = reg->enc;
- OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
- UChar *s, *q, *sbegin;
- UChar *p = reg->p;
- char *alloca_base;
- OnigStackType *stk_alloc, *stk_base, *stk, *stk_end;
- OnigStackType *stkp; /* used as any purpose. */
- OnigStackIndex si;
- OnigStackIndex *repeat_stk;
- OnigStackIndex *mem_start_stk, *mem_end_stk;
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- int scv;
- unsigned char* state_check_buff = msa->state_check_buff;
- int num_comb_exp_check = reg->num_comb_exp_check;
-#endif
- n = reg->num_repeat + reg->num_mem * 2;
-
- STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE);
- pop_level = reg->stack_pop_level;
- num_mem = reg->num_mem;
- repeat_stk = (OnigStackIndex* )alloca_base;
-
- mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);
- mem_end_stk = mem_start_stk + num_mem;
- mem_start_stk--; /* for index start from 1,
- mem_start_stk[1]..mem_start_stk[num_mem] */
- mem_end_stk--; /* for index start from 1,
- mem_end_stk[1]..mem_end_stk[num_mem] */
- for (i = 1; i <= num_mem; i++) {
- mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX;
- }
-
-#ifdef ONIG_DEBUG_MATCH
- fprintf(stderr, "match_at: str: %d, end: %d, start: %d, sprev: %d\n",
- (int )str, (int )end, (int )sstart, (int )sprev);
- fprintf(stderr, "size: %d, start offset: %d\n",
- (int )(end - str), (int )(sstart - str));
-#endif
-
- STACK_PUSH_ENSURED(STK_ALT, (UChar*)FinishCode); /* bottom stack */
- best_len = ONIG_MISMATCH;
- s = (UChar*)sstart;
- while (1) {
-#ifdef ONIG_DEBUG_MATCH
- if (s) {
- UChar *q, *bp, buf[50];
- int len;
- fprintf(stderr, "%4d> \"", (int )(s - str));
- bp = buf;
- for (i = 0, q = s; i < 7 && q < end; i++) {
- len = enclen(encode, q, end);
- while (len-- > 0) *bp++ = *q++;
- }
- if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }
- else { xmemcpy(bp, "\"", 1); bp += 1; }
- *bp = 0;
- fputs((char* )buf, stderr);
- for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);
- onig_print_compiled_byte_code(stderr, p, NULL, encode);
- fprintf(stderr, "\n");
- }
-#endif
-
- sbegin = s;
- switch (*p++) {
- case OP_END: MOP_IN(OP_END);
- n = s - sstart;
- if (n > best_len) {
- OnigRegion* region;
-#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
- if (IS_FIND_LONGEST(option)) {
- if (n > msa->best_len) {
- msa->best_len = n;
- msa->best_s = (UChar* )sstart;
- }
- else
- goto end_best_len;
- }
-#endif
- best_len = n;
- region = msa->region;
- if (region) {
- region->beg[0] = sstart - str;
- region->end[0] = s - str;
- for (i = 1; i <= num_mem; i++) {
- if (mem_end_stk[i] != INVALID_STACK_INDEX) {
- if (BIT_STATUS_AT(reg->bt_mem_start, i))
- region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
- else
- region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str;
-
- region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
- ? STACK_AT(mem_end_stk[i])->u.mem.pstr
- : (UChar* )((void* )mem_end_stk[i])) - str;
- }
- else {
- region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
- }
- }
-
-#ifdef USE_CAPTURE_HISTORY
- if (reg->capture_history != 0) {
- int r;
- OnigCaptureTreeNode* node;
-
- if (IS_NULL(region->history_root)) {
- region->history_root = node = history_node_new();
- CHECK_NULL_RETURN_MEMERR(node);
- }
- else {
- node = region->history_root;
- history_tree_clear(node);
- }
-
- node->group = 0;
- node->beg = sstart - str;
- node->end = s - str;
-
- stkp = stk_base;
- r = make_capture_history_tree(region->history_root, &stkp,
- stk, (UChar* )str, reg);
- if (r < 0) {
- best_len = r; /* error code */
- goto finish;
- }
- }
-#endif /* USE_CAPTURE_HISTORY */
- } /* if (region) */
- } /* n > best_len */
-
-#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
- end_best_len:
-#endif
- MOP_OUT;
-
- if (IS_FIND_CONDITION(option)) {
- if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
- best_len = ONIG_MISMATCH;
- goto fail; /* for retry */
- }
- if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
- goto fail; /* for retry */
- }
- }
-
- /* default behavior: return first-matching result. */
- goto finish;
- break;
-
- case OP_EXACT1: MOP_IN(OP_EXACT1);
- if (*p != *s++) goto fail;
- DATA_ENSURE(0);
- p++;
- MOP_OUT;
- break;
-
- case OP_EXACT1_IC: MOP_IN(OP_EXACT1_IC);
- {
- int len;
- UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
-
- DATA_ENSURE(1);
- len = ONIGENC_MBC_CASE_FOLD(encode,
- /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
- case_fold_flag,
- &s, end, lowbuf);
- DATA_ENSURE(0);
- q = lowbuf;
- while (len-- > 0) {
- if (*p != *q) {
- goto fail;
- }
- p++; q++;
- }
- }
- MOP_OUT;
- break;
-
- case OP_EXACT2: MOP_IN(OP_EXACT2);
- DATA_ENSURE(2);
- if (*p != *s) goto fail;
- p++; s++;
- if (*p != *s) goto fail;
- sprev = s;
- p++; s++;
- MOP_OUT;
- continue;
- break;
-
- case OP_EXACT3: MOP_IN(OP_EXACT3);
- DATA_ENSURE(3);
- if (*p != *s) goto fail;
- p++; s++;
- if (*p != *s) goto fail;
- p++; s++;
- if (*p != *s) goto fail;
- sprev = s;
- p++; s++;
- MOP_OUT;
- continue;
- break;
-
- case OP_EXACT4: MOP_IN(OP_EXACT4);
- DATA_ENSURE(4);
- if (*p != *s) goto fail;
- p++; s++;
- if (*p != *s) goto fail;
- p++; s++;
- if (*p != *s) goto fail;
- p++; s++;
- if (*p != *s) goto fail;
- sprev = s;
- p++; s++;
- MOP_OUT;
- continue;
- break;
-
- case OP_EXACT5: MOP_IN(OP_EXACT5);
- DATA_ENSURE(5);
- if (*p != *s) goto fail;
- p++; s++;
- if (*p != *s) goto fail;
- p++; s++;
- if (*p != *s) goto fail;
- p++; s++;
- if (*p != *s) goto fail;
- p++; s++;
- if (*p != *s) goto fail;
- sprev = s;
- p++; s++;
- MOP_OUT;
- continue;
- break;
-
- case OP_EXACTN: MOP_IN(OP_EXACTN);
- GET_LENGTH_INC(tlen, p);
- DATA_ENSURE(tlen);
- while (tlen-- > 0) {
- if (*p++ != *s++) goto fail;
- }
- sprev = s - 1;
- MOP_OUT;
- continue;
- break;
-
- case OP_EXACTN_IC: MOP_IN(OP_EXACTN_IC);
- {
- int len;
- UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
-
- GET_LENGTH_INC(tlen, p);
- endp = p + tlen;
-
- while (p < endp) {
- sprev = s;
- DATA_ENSURE(1);
- len = ONIGENC_MBC_CASE_FOLD(encode,
- /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
- case_fold_flag,
- &s, end, lowbuf);
- DATA_ENSURE(0);
- q = lowbuf;
- while (len-- > 0) {
- if (*p != *q) goto fail;
- p++; q++;
- }
- }
- }
-
- MOP_OUT;
- continue;
- break;
-
- case OP_EXACTMB2N1: MOP_IN(OP_EXACTMB2N1);
- DATA_ENSURE(2);
- if (*p != *s) goto fail;
- p++; s++;
- if (*p != *s) goto fail;
- p++; s++;
- MOP_OUT;
- break;
-
- case OP_EXACTMB2N2: MOP_IN(OP_EXACTMB2N2);
- DATA_ENSURE(4);
- if (*p != *s) goto fail;
- p++; s++;
- if (*p != *s) goto fail;
- p++; s++;
- sprev = s;
- if (*p != *s) goto fail;
- p++; s++;
- if (*p != *s) goto fail;
- p++; s++;
- MOP_OUT;
- continue;
- break;
-
- case OP_EXACTMB2N3: MOP_IN(OP_EXACTMB2N3);
- DATA_ENSURE(6);
- if (*p != *s) goto fail;
- p++; s++;
- if (*p != *s) goto fail;
- p++; s++;
- if (*p != *s) goto fail;
- p++; s++;
- if (*p != *s) goto fail;
- p++; s++;
- sprev = s;
- if (*p != *s) goto fail;
- p++; s++;
- if (*p != *s) goto fail;
- p++; s++;
- MOP_OUT;
- continue;
- break;
-
- case OP_EXACTMB2N: MOP_IN(OP_EXACTMB2N);
- GET_LENGTH_INC(tlen, p);
- DATA_ENSURE(tlen * 2);
- while (tlen-- > 0) {
- if (*p != *s) goto fail;
- p++; s++;
- if (*p != *s) goto fail;
- p++; s++;
- }
- sprev = s - 2;
- MOP_OUT;
- continue;
- break;
-
- case OP_EXACTMB3N: MOP_IN(OP_EXACTMB3N);
- GET_LENGTH_INC(tlen, p);
- DATA_ENSURE(tlen * 3);
- while (tlen-- > 0) {
- if (*p != *s) goto fail;
- p++; s++;
- if (*p != *s) goto fail;
- p++; s++;
- if (*p != *s) goto fail;
- p++; s++;
- }
- sprev = s - 3;
- MOP_OUT;
- continue;
- break;
-
- case OP_EXACTMBN: MOP_IN(OP_EXACTMBN);
- GET_LENGTH_INC(tlen, p); /* mb-len */
- GET_LENGTH_INC(tlen2, p); /* string len */
- tlen2 *= tlen;
- DATA_ENSURE(tlen2);
- while (tlen2-- > 0) {
- if (*p != *s) goto fail;
- p++; s++;
- }
- sprev = s - tlen;
- MOP_OUT;
- continue;
- break;
-
- case OP_CCLASS: MOP_IN(OP_CCLASS);
- DATA_ENSURE(1);
- if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
- p += SIZE_BITSET;
- s += enclen(encode, s, end); /* OP_CCLASS can match mb-code. \D, \S */
- MOP_OUT;
- break;
-
- case OP_CCLASS_MB: MOP_IN(OP_CCLASS_MB);
- if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) goto fail;
-
- cclass_mb:
- GET_LENGTH_INC(tlen, p);
- {
- OnigCodePoint code;
- UChar *ss;
- int mb_len;
-
- DATA_ENSURE(1);
- mb_len = enclen(encode, s, end);
- DATA_ENSURE(mb_len);
- ss = s;
- s += mb_len;
- code = ONIGENC_MBC_TO_CODE(encode, ss, s);
-
-#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
- if (! onig_is_in_code_range(p, code)) goto fail;
-#else
- q = p;
- ALIGNMENT_RIGHT(q);
- if (! onig_is_in_code_range(q, code)) goto fail;
-#endif
- }
- p += tlen;
- MOP_OUT;
- break;
-
- case OP_CCLASS_MIX: MOP_IN(OP_CCLASS_MIX);
- DATA_ENSURE(1);
- if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
- p += SIZE_BITSET;
- goto cclass_mb;
- }
- else {
- if (BITSET_AT(((BitSetRef )p), *s) == 0)
- goto fail;
-
- p += SIZE_BITSET;
- GET_LENGTH_INC(tlen, p);
- p += tlen;
- s++;
- }
- MOP_OUT;
- break;
-
- case OP_CCLASS_NOT: MOP_IN(OP_CCLASS_NOT);
- DATA_ENSURE(1);
- if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
- p += SIZE_BITSET;
- s += enclen(encode, s, end);
- MOP_OUT;
- break;
-
- case OP_CCLASS_MB_NOT: MOP_IN(OP_CCLASS_MB_NOT);
- DATA_ENSURE(1);
- if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) {
- s++;
- GET_LENGTH_INC(tlen, p);
- p += tlen;
- goto cc_mb_not_success;
- }
-
- cclass_mb_not:
- GET_LENGTH_INC(tlen, p);
- {
- OnigCodePoint code;
- UChar *ss;
- int mb_len = enclen(encode, s, end);
-
- if (! DATA_ENSURE_CHECK(mb_len)) {
- DATA_ENSURE(1);
- s = (UChar* )end;
- p += tlen;
- goto cc_mb_not_success;
- }
-
- ss = s;
- s += mb_len;
- code = ONIGENC_MBC_TO_CODE(encode, ss, s);
-
-#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
- if (onig_is_in_code_range(p, code)) goto fail;
-#else
- q = p;
- ALIGNMENT_RIGHT(q);
- if (onig_is_in_code_range(q, code)) goto fail;
-#endif
- }
- p += tlen;
-
- cc_mb_not_success:
- MOP_OUT;
- break;
-
- case OP_CCLASS_MIX_NOT: MOP_IN(OP_CCLASS_MIX_NOT);
- DATA_ENSURE(1);
- if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
- p += SIZE_BITSET;
- goto cclass_mb_not;
- }
- else {
- if (BITSET_AT(((BitSetRef )p), *s) != 0)
- goto fail;
-
- p += SIZE_BITSET;
- GET_LENGTH_INC(tlen, p);
- p += tlen;
- s++;
- }
- MOP_OUT;
- break;
-
- case OP_CCLASS_NODE: MOP_IN(OP_CCLASS_NODE);
- {
- OnigCodePoint code;
- void *node;
- int mb_len;
- UChar *ss;
-
- DATA_ENSURE(1);
- GET_POINTER_INC(node, p);
- mb_len = enclen(encode, s, end);
- ss = s;
- s += mb_len;
- DATA_ENSURE(0);
- code = ONIGENC_MBC_TO_CODE(encode, ss, s);
- if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail;
- }
- MOP_OUT;
- break;
-
- case OP_ANYCHAR: MOP_IN(OP_ANYCHAR);
- DATA_ENSURE(1);
- n = enclen(encode, s, end);
- DATA_ENSURE(n);
- if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
- s += n;
- MOP_OUT;
- break;
-
- case OP_ANYCHAR_ML: MOP_IN(OP_ANYCHAR_ML);
- DATA_ENSURE(1);
- n = enclen(encode, s, end);
- DATA_ENSURE(n);
- s += n;
- MOP_OUT;
- break;
-
- case OP_ANYCHAR_STAR: MOP_IN(OP_ANYCHAR_STAR);
- while (DATA_ENSURE_CHECK1) {
- STACK_PUSH_ALT(p, s, sprev);
- n = enclen(encode, s, end);
- DATA_ENSURE(n);
- if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
- sprev = s;
- s += n;
- }
- MOP_OUT;
- break;
-
- case OP_ANYCHAR_ML_STAR: MOP_IN(OP_ANYCHAR_ML_STAR);
- while (DATA_ENSURE_CHECK1) {
- STACK_PUSH_ALT(p, s, sprev);
- n = enclen(encode, s, end);
- if (n > 1) {
- DATA_ENSURE(n);
- sprev = s;
- s += n;
- }
- else {
- sprev = s;
- s++;
- }
- }
- MOP_OUT;
- break;
-
- case OP_ANYCHAR_STAR_PEEK_NEXT: MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
- while (DATA_ENSURE_CHECK1) {
- if (*p == *s) {
- STACK_PUSH_ALT(p + 1, s, sprev);
- }
- n = enclen(encode, s, end);
- DATA_ENSURE(n);
- if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
- sprev = s;
- s += n;
- }
- p++;
- MOP_OUT;
- break;
-
- case OP_ANYCHAR_ML_STAR_PEEK_NEXT:MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
- while (DATA_ENSURE_CHECK1) {
- if (*p == *s) {
- STACK_PUSH_ALT(p + 1, s, sprev);
- }
- n = enclen(encode, s, end);
- if (n > 1) {
- DATA_ENSURE(n);
- sprev = s;
- s += n;
- }
- else {
- sprev = s;
- s++;
- }
- }
- p++;
- MOP_OUT;
- break;
-
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- case OP_STATE_CHECK_ANYCHAR_STAR: MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
- GET_STATE_CHECK_NUM_INC(mem, p);
- while (DATA_ENSURE_CHECK1) {
- STATE_CHECK_VAL(scv, mem);
- if (scv) goto fail;
-
- STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
- n = enclen(encode, s, end);
- DATA_ENSURE(n);
- if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
- sprev = s;
- s += n;
- }
- MOP_OUT;
- break;
-
- case OP_STATE_CHECK_ANYCHAR_ML_STAR:
- MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
-
- GET_STATE_CHECK_NUM_INC(mem, p);
- while (DATA_ENSURE_CHECK1) {
- STATE_CHECK_VAL(scv, mem);
- if (scv) goto fail;
-
- STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
- n = enclen(encode, s, end);
- if (n > 1) {
- DATA_ENSURE(n);
- sprev = s;
- s += n;
- }
- else {
- sprev = s;
- s++;
- }
- }
- MOP_OUT;
- break;
-#endif /* USE_COMBINATION_EXPLOSION_CHECK */
-
- case OP_WORD: MOP_IN(OP_WORD);
- DATA_ENSURE(1);
- if (! ONIGENC_IS_MBC_WORD(encode, s, end))
- goto fail;
-
- s += enclen(encode, s, end);
- MOP_OUT;
- break;
-
- case OP_NOT_WORD: MOP_IN(OP_NOT_WORD);
- DATA_ENSURE(1);
- if (ONIGENC_IS_MBC_WORD(encode, s, end))
- goto fail;
-
- s += enclen(encode, s, end);
- MOP_OUT;
- break;
-
- case OP_WORD_BOUND: MOP_IN(OP_WORD_BOUND);
- if (ON_STR_BEGIN(s)) {
- DATA_ENSURE(1);
- if (! ONIGENC_IS_MBC_WORD(encode, s, end))
- goto fail;
- }
- else if (ON_STR_END(s)) {
- if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
- goto fail;
- }
- else {
- if (ONIGENC_IS_MBC_WORD(encode, s, end)
- == ONIGENC_IS_MBC_WORD(encode, sprev, end))
- goto fail;
- }
- MOP_OUT;
- continue;
- break;
-
- case OP_NOT_WORD_BOUND: MOP_IN(OP_NOT_WORD_BOUND);
- if (ON_STR_BEGIN(s)) {
- if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end))
- goto fail;
- }
- else if (ON_STR_END(s)) {
- if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
- goto fail;
- }
- else {
- if (ONIGENC_IS_MBC_WORD(encode, s, end)
- != ONIGENC_IS_MBC_WORD(encode, sprev, end))
- goto fail;
- }
- MOP_OUT;
- continue;
- break;
-
-#ifdef USE_WORD_BEGIN_END
- case OP_WORD_BEGIN: MOP_IN(OP_WORD_BEGIN);
- if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) {
- if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
- MOP_OUT;
- continue;
- }
- }
- goto fail;
- break;
-
- case OP_WORD_END: MOP_IN(OP_WORD_END);
- if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
- if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
- MOP_OUT;
- continue;
- }
- }
- goto fail;
- break;
-#endif
-
- case OP_BEGIN_BUF: MOP_IN(OP_BEGIN_BUF);
- if (! ON_STR_BEGIN(s)) goto fail;
-
- MOP_OUT;
- continue;
- break;
-
- case OP_END_BUF: MOP_IN(OP_END_BUF);
- if (! ON_STR_END(s)) goto fail;
-
- MOP_OUT;
- continue;
- break;
-
- case OP_BEGIN_LINE: MOP_IN(OP_BEGIN_LINE);
- if (ON_STR_BEGIN(s)) {
- if (IS_NOTBOL(msa->options)) goto fail;
- MOP_OUT;
- continue;
- }
- else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) {
- MOP_OUT;
- continue;
- }
- goto fail;
- break;
-
- case OP_END_LINE: MOP_IN(OP_END_LINE);
- if (ON_STR_END(s)) {
-#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
- if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
-#endif
- if (IS_NOTEOL(msa->options)) goto fail;
- MOP_OUT;
- continue;
-#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
- }
-#endif
- }
- else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) {
- MOP_OUT;
- continue;
- }
-#ifdef USE_CRNL_AS_LINE_TERMINATOR
- else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
- MOP_OUT;
- continue;
- }
-#endif
- goto fail;
- break;
-
- case OP_SEMI_END_BUF: MOP_IN(OP_SEMI_END_BUF);
- if (ON_STR_END(s)) {
-#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
- if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
-#endif
- if (IS_NOTEOL(msa->options)) goto fail;
- MOP_OUT;
- continue;
-#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
- }
-#endif
- }
- else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) &&
- ON_STR_END(s + enclen(encode, s, end))) {
- MOP_OUT;
- continue;
- }
-#ifdef USE_CRNL_AS_LINE_TERMINATOR
- else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
- UChar* ss = s + enclen(encode, s);
- ss += enclen(encode, ss);
- if (ON_STR_END(ss)) {
- MOP_OUT;
- continue;
- }
- }
-#endif
- goto fail;
- break;
-
- case OP_BEGIN_POSITION: MOP_IN(OP_BEGIN_POSITION);
- if (s != msa->start)
- goto fail;
-
- MOP_OUT;
- continue;
- break;
-
- case OP_MEMORY_START_PUSH: MOP_IN(OP_MEMORY_START_PUSH);
- GET_MEMNUM_INC(mem, p);
- STACK_PUSH_MEM_START(mem, s);
- MOP_OUT;
- continue;
- break;
-
- case OP_MEMORY_START: MOP_IN(OP_MEMORY_START);
- GET_MEMNUM_INC(mem, p);
- mem_start_stk[mem] = (OnigStackIndex )((void* )s);
- MOP_OUT;
- continue;
- break;
-
- case OP_MEMORY_END_PUSH: MOP_IN(OP_MEMORY_END_PUSH);
- GET_MEMNUM_INC(mem, p);
- STACK_PUSH_MEM_END(mem, s);
- MOP_OUT;
- continue;
- break;
-
- case OP_MEMORY_END: MOP_IN(OP_MEMORY_END);
- GET_MEMNUM_INC(mem, p);
- mem_end_stk[mem] = (OnigStackIndex )((void* )s);
- MOP_OUT;
- continue;
- break;
-
-#ifdef USE_SUBEXP_CALL
- case OP_MEMORY_END_PUSH_REC: MOP_IN(OP_MEMORY_END_PUSH_REC);
- GET_MEMNUM_INC(mem, p);
- STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
- STACK_PUSH_MEM_END(mem, s);
- mem_start_stk[mem] = GET_STACK_INDEX(stkp);
- MOP_OUT;
- continue;
- break;
-
- case OP_MEMORY_END_REC: MOP_IN(OP_MEMORY_END_REC);
- GET_MEMNUM_INC(mem, p);
- mem_end_stk[mem] = (OnigStackIndex )((void* )s);
- STACK_GET_MEM_START(mem, stkp);
-
- if (BIT_STATUS_AT(reg->bt_mem_start, mem))
- mem_start_stk[mem] = GET_STACK_INDEX(stkp);
- else
- mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr);
-
- STACK_PUSH_MEM_END_MARK(mem);
- MOP_OUT;
- continue;
- break;
-#endif
-
- case OP_BACKREF1: MOP_IN(OP_BACKREF1);
- mem = 1;
- goto backref;
- break;
-
- case OP_BACKREF2: MOP_IN(OP_BACKREF2);
- mem = 2;
- goto backref;
- break;
-
- case OP_BACKREFN: MOP_IN(OP_BACKREFN);
- GET_MEMNUM_INC(mem, p);
- backref:
- {
- int len;
- UChar *pstart, *pend;
-
- /* if you want to remove following line,
- you should check in parse and compile time. */
- if (mem > num_mem) goto fail;
- if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
- if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
-
- if (BIT_STATUS_AT(reg->bt_mem_start, mem))
- pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
- else
- pstart = (UChar* )((void* )mem_start_stk[mem]);
-
- pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
- ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
- : (UChar* )((void* )mem_end_stk[mem]));
- n = pend - pstart;
- DATA_ENSURE(n);
- sprev = s;
- STRING_CMP(pstart, s, n);
- while (sprev + (len = enclen(encode, sprev, end)) < s)
- sprev += len;
-
- MOP_OUT;
- continue;
- }
- break;
-
- case OP_BACKREFN_IC: MOP_IN(OP_BACKREFN_IC);
- GET_MEMNUM_INC(mem, p);
- {
- int len;
- UChar *pstart, *pend;
-
- /* if you want to remove following line,
- you should check in parse and compile time. */
- if (mem > num_mem) goto fail;
- if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
- if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
-
- if (BIT_STATUS_AT(reg->bt_mem_start, mem))
- pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
- else
- pstart = (UChar* )((void* )mem_start_stk[mem]);
-
- pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
- ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
- : (UChar* )((void* )mem_end_stk[mem]));
- n = pend - pstart;
- DATA_ENSURE(n);
- sprev = s;
- STRING_CMP_IC(case_fold_flag, pstart, &s, n, end);
- while (sprev + (len = enclen(encode, sprev, end)) < s)
- sprev += len;
-
- MOP_OUT;
- continue;
- }
- break;
-
- case OP_BACKREF_MULTI: MOP_IN(OP_BACKREF_MULTI);
- {
- int len, is_fail;
- UChar *pstart, *pend, *swork;
-
- GET_LENGTH_INC(tlen, p);
- for (i = 0; i < tlen; i++) {
- GET_MEMNUM_INC(mem, p);
-
- if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
- if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
-
- if (BIT_STATUS_AT(reg->bt_mem_start, mem))
- pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
- else
- pstart = (UChar* )((void* )mem_start_stk[mem]);
-
- pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
- ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
- : (UChar* )((void* )mem_end_stk[mem]));
- n = pend - pstart;
- DATA_ENSURE(n);
- sprev = s;
- swork = s;
- STRING_CMP_VALUE(pstart, swork, n, is_fail);
- if (is_fail) continue;
- s = swork;
- while (sprev + (len = enclen(encode, sprev, end)) < s)
- sprev += len;
-
- p += (SIZE_MEMNUM * (tlen - i - 1));
- break; /* success */
- }
- if (i == tlen) goto fail;
- MOP_OUT;
- continue;
- }
- break;
-
- case OP_BACKREF_MULTI_IC: MOP_IN(OP_BACKREF_MULTI_IC);
- {
- int len, is_fail;
- UChar *pstart, *pend, *swork;
-
- GET_LENGTH_INC(tlen, p);
- for (i = 0; i < tlen; i++) {
- GET_MEMNUM_INC(mem, p);
-
- if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
- if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
-
- if (BIT_STATUS_AT(reg->bt_mem_start, mem))
- pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
- else
- pstart = (UChar* )((void* )mem_start_stk[mem]);
-
- pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
- ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
- : (UChar* )((void* )mem_end_stk[mem]));
- n = pend - pstart;
- DATA_ENSURE(n);
- sprev = s;
- swork = s;
- STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, end, is_fail);
- if (is_fail) continue;
- s = swork;
- while (sprev + (len = enclen(encode, sprev, end)) < s)
- sprev += len;
-
- p += (SIZE_MEMNUM * (tlen - i - 1));
- break; /* success */
- }
- if (i == tlen) goto fail;
- MOP_OUT;
- continue;
- }
- break;
-
-#ifdef USE_BACKREF_WITH_LEVEL
- case OP_BACKREF_WITH_LEVEL:
- {
- int len;
- OnigOptionType ic;
- LengthType level;
-
- GET_OPTION_INC(ic, p);
- GET_LENGTH_INC(level, p);
- GET_LENGTH_INC(tlen, p);
-
- sprev = s;
- if (backref_match_at_nested_level(reg, stk, stk_base, ic
- , case_fold_flag, (int )level, (int )tlen, p, &s, end)) {
- while (sprev + (len = enclen(encode, sprev, end)) < s)
- sprev += len;
-
- p += (SIZE_MEMNUM * tlen);
- }
- else
- goto fail;
-
- MOP_OUT;
- continue;
- }
-
- break;
-#endif
-
- case OP_NULL_CHECK_START: MOP_IN(OP_NULL_CHECK_START);
- GET_MEMNUM_INC(mem, p); /* mem: null check id */
- STACK_PUSH_NULL_CHECK_START(mem, s);
- MOP_OUT;
- continue;
- break;
-
- case OP_NULL_CHECK_END: MOP_IN(OP_NULL_CHECK_END);
- {
- int isnull;
-
- GET_MEMNUM_INC(mem, p); /* mem: null check id */
- STACK_NULL_CHECK(isnull, mem, s);
- if (isnull) {
-#ifdef ONIG_DEBUG_MATCH
- fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%d\n",
- (int )mem, (int )s);
-#endif
- null_check_found:
- /* empty loop founded, skip next instruction */
- switch (*p++) {
- case OP_JUMP:
- case OP_PUSH:
- p += SIZE_RELADDR;
- break;
- case OP_REPEAT_INC:
- case OP_REPEAT_INC_NG:
- case OP_REPEAT_INC_SG:
- case OP_REPEAT_INC_NG_SG:
- p += SIZE_MEMNUM;
- break;
- default:
- goto unexpected_bytecode_error;
- break;
- }
- }
- }
- MOP_OUT;
- continue;
- break;
-
-#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
- case OP_NULL_CHECK_END_MEMST: MOP_IN(OP_NULL_CHECK_END_MEMST);
- {
- int isnull;
-
- GET_MEMNUM_INC(mem, p); /* mem: null check id */
- STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);
- if (isnull) {
-#ifdef ONIG_DEBUG_MATCH
- fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%d\n",
- (int )mem, (int )s);
-#endif
- if (isnull == -1) goto fail;
- goto null_check_found;
- }
- }
- MOP_OUT;
- continue;
- break;
-#endif
-
-#ifdef USE_SUBEXP_CALL
- case OP_NULL_CHECK_END_MEMST_PUSH:
- MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
- {
- int isnull;
-
- GET_MEMNUM_INC(mem, p); /* mem: null check id */
-#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
- STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
-#else
- STACK_NULL_CHECK_REC(isnull, mem, s);
-#endif
- if (isnull) {
-#ifdef ONIG_DEBUG_MATCH
- fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%d\n",
- (int )mem, (int )s);
-#endif
- if (isnull == -1) goto fail;
- goto null_check_found;
- }
- else {
- STACK_PUSH_NULL_CHECK_END(mem);
- }
- }
- MOP_OUT;
- continue;
- break;
-#endif
-
- case OP_JUMP: MOP_IN(OP_JUMP);
- GET_RELADDR_INC(addr, p);
- p += addr;
- MOP_OUT;
- CHECK_INTERRUPT_IN_MATCH_AT;
- continue;
- break;
-
- case OP_PUSH: MOP_IN(OP_PUSH);
- GET_RELADDR_INC(addr, p);
- STACK_PUSH_ALT(p + addr, s, sprev);
- MOP_OUT;
- continue;
- break;
-
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- case OP_STATE_CHECK_PUSH: MOP_IN(OP_STATE_CHECK_PUSH);
- GET_STATE_CHECK_NUM_INC(mem, p);
- STATE_CHECK_VAL(scv, mem);
- if (scv) goto fail;
-
- GET_RELADDR_INC(addr, p);
- STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
- MOP_OUT;
- continue;
- break;
-
- case OP_STATE_CHECK_PUSH_OR_JUMP: MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
- GET_STATE_CHECK_NUM_INC(mem, p);
- GET_RELADDR_INC(addr, p);
- STATE_CHECK_VAL(scv, mem);
- if (scv) {
- p += addr;
- }
- else {
- STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
- }
- MOP_OUT;
- continue;
- break;
-
- case OP_STATE_CHECK: MOP_IN(OP_STATE_CHECK);
- GET_STATE_CHECK_NUM_INC(mem, p);
- STATE_CHECK_VAL(scv, mem);
- if (scv) goto fail;
-
- STACK_PUSH_STATE_CHECK(s, mem);
- MOP_OUT;
- continue;
- break;
-#endif /* USE_COMBINATION_EXPLOSION_CHECK */
-
- case OP_POP: MOP_IN(OP_POP);
- STACK_POP_ONE;
- MOP_OUT;
- continue;
- break;
-
- case OP_PUSH_OR_JUMP_EXACT1: MOP_IN(OP_PUSH_OR_JUMP_EXACT1);
- GET_RELADDR_INC(addr, p);
- if (*p == *s && DATA_ENSURE_CHECK1) {
- p++;
- STACK_PUSH_ALT(p + addr, s, sprev);
- MOP_OUT;
- continue;
- }
- p += (addr + 1);
- MOP_OUT;
- continue;
- break;
-
- case OP_PUSH_IF_PEEK_NEXT: MOP_IN(OP_PUSH_IF_PEEK_NEXT);
- GET_RELADDR_INC(addr, p);
- if (*p == *s) {
- p++;
- STACK_PUSH_ALT(p + addr, s, sprev);
- MOP_OUT;
- continue;
- }
- p++;
- MOP_OUT;
- continue;
- break;
-
- case OP_REPEAT: MOP_IN(OP_REPEAT);
- {
- GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
- GET_RELADDR_INC(addr, p);
-
- STACK_ENSURE(1);
- repeat_stk[mem] = GET_STACK_INDEX(stk);
- STACK_PUSH_REPEAT(mem, p);
-
- if (reg->repeat_range[mem].lower == 0) {
- STACK_PUSH_ALT(p + addr, s, sprev);
- }
- }
- MOP_OUT;
- continue;
- break;
-
- case OP_REPEAT_NG: MOP_IN(OP_REPEAT_NG);
- {
- GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
- GET_RELADDR_INC(addr, p);
-
- STACK_ENSURE(1);
- repeat_stk[mem] = GET_STACK_INDEX(stk);
- STACK_PUSH_REPEAT(mem, p);
-
- if (reg->repeat_range[mem].lower == 0) {
- STACK_PUSH_ALT(p, s, sprev);
- p += addr;
- }
- }
- MOP_OUT;
- continue;
- break;
-
- case OP_REPEAT_INC: MOP_IN(OP_REPEAT_INC);
- GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
- si = repeat_stk[mem];
- stkp = STACK_AT(si);
-
- repeat_inc:
- stkp->u.repeat.count++;
- if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
- /* end of repeat. Nothing to do. */
- }
- else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
- STACK_PUSH_ALT(p, s, sprev);
- p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
- }
- else {
- p = stkp->u.repeat.pcode;
- }
- STACK_PUSH_REPEAT_INC(si);
- MOP_OUT;
- CHECK_INTERRUPT_IN_MATCH_AT;
- continue;
- break;
-
- case OP_REPEAT_INC_SG: MOP_IN(OP_REPEAT_INC_SG);
- GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
- STACK_GET_REPEAT(mem, stkp);
- si = GET_STACK_INDEX(stkp);
- goto repeat_inc;
- break;
-
- case OP_REPEAT_INC_NG: MOP_IN(OP_REPEAT_INC_NG);
- GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
- si = repeat_stk[mem];
- stkp = STACK_AT(si);
-
- repeat_inc_ng:
- stkp->u.repeat.count++;
- if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
- if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
- UChar* pcode = stkp->u.repeat.pcode;
-
- STACK_PUSH_REPEAT_INC(si);
- STACK_PUSH_ALT(pcode, s, sprev);
- }
- else {
- p = stkp->u.repeat.pcode;
- STACK_PUSH_REPEAT_INC(si);
- }
- }
- else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
- STACK_PUSH_REPEAT_INC(si);
- }
- MOP_OUT;
- CHECK_INTERRUPT_IN_MATCH_AT;
- continue;
- break;
-
- case OP_REPEAT_INC_NG_SG: MOP_IN(OP_REPEAT_INC_NG_SG);
- GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
- STACK_GET_REPEAT(mem, stkp);
- si = GET_STACK_INDEX(stkp);
- goto repeat_inc_ng;
- break;
-
- case OP_PUSH_POS: MOP_IN(OP_PUSH_POS);
- STACK_PUSH_POS(s, sprev);
- MOP_OUT;
- continue;
- break;
-
- case OP_POP_POS: MOP_IN(OP_POP_POS);
- {
- STACK_POS_END(stkp);
- s = stkp->u.state.pstr;
- sprev = stkp->u.state.pstr_prev;
- }
- MOP_OUT;
- continue;
- break;
-
- case OP_PUSH_POS_NOT: MOP_IN(OP_PUSH_POS_NOT);
- GET_RELADDR_INC(addr, p);
- STACK_PUSH_POS_NOT(p + addr, s, sprev);
- MOP_OUT;
- continue;
- break;
-
- case OP_FAIL_POS: MOP_IN(OP_FAIL_POS);
- STACK_POP_TIL_POS_NOT;
- goto fail;
- break;
-
- case OP_PUSH_STOP_BT: MOP_IN(OP_PUSH_STOP_BT);
- STACK_PUSH_STOP_BT;
- MOP_OUT;
- continue;
- break;
-
- case OP_POP_STOP_BT: MOP_IN(OP_POP_STOP_BT);
- STACK_STOP_BT_END;
- MOP_OUT;
- continue;
- break;
-
- case OP_LOOK_BEHIND: MOP_IN(OP_LOOK_BEHIND);
- GET_LENGTH_INC(tlen, p);
- s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen);
- if (IS_NULL(s)) goto fail;
- sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
- MOP_OUT;
- continue;
- break;
-
- case OP_PUSH_LOOK_BEHIND_NOT: MOP_IN(OP_PUSH_LOOK_BEHIND_NOT);
- GET_RELADDR_INC(addr, p);
- GET_LENGTH_INC(tlen, p);
- q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen);
- if (IS_NULL(q)) {
- /* too short case -> success. ex. /(?<!XXX)a/.match("a")
- If you want to change to fail, replace following line. */
- p += addr;
- /* goto fail; */
- }
- else {
- STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev);
- s = q;
- sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
- }
- MOP_OUT;
- continue;
- break;
-
- case OP_FAIL_LOOK_BEHIND_NOT: MOP_IN(OP_FAIL_LOOK_BEHIND_NOT);
- STACK_POP_TIL_LOOK_BEHIND_NOT;
- goto fail;
- break;
-
-#ifdef USE_SUBEXP_CALL
- case OP_CALL: MOP_IN(OP_CALL);
- GET_ABSADDR_INC(addr, p);
- STACK_PUSH_CALL_FRAME(p);
- p = reg->p + addr;
- MOP_OUT;
- continue;
- break;
-
- case OP_RETURN: MOP_IN(OP_RETURN);
- STACK_RETURN(p);
- STACK_PUSH_RETURN;
- MOP_OUT;
- continue;
- break;
-#endif
-
- case OP_FINISH:
- goto finish;
- break;
-
- fail:
- MOP_OUT;
- /* fall */
- case OP_FAIL: MOP_IN(OP_FAIL);
- STACK_POP;
- p = stk->u.state.pcode;
- s = stk->u.state.pstr;
- sprev = stk->u.state.pstr_prev;
-
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- if (stk->u.state.state_check != 0) {
- stk->type = STK_STATE_CHECK_MARK;
- stk++;
- }
-#endif
-
- MOP_OUT;
- continue;
- break;
-
- default:
- goto bytecode_error;
-
- } /* end of switch */
- sprev = sbegin;
- } /* end of while(1) */
-
- finish:
- STACK_SAVE;
- return best_len;
-
-#ifdef ONIG_DEBUG
- stack_error:
- STACK_SAVE;
- return ONIGERR_STACK_BUG;
-#endif
-
- bytecode_error:
- STACK_SAVE;
- return ONIGERR_UNDEFINED_BYTECODE;
-
- unexpected_bytecode_error:
- STACK_SAVE;
- return ONIGERR_UNEXPECTED_BYTECODE;
-}
-
-
-static UChar*
-slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
- const UChar* text, const UChar* text_end, UChar* text_range)
-{
- UChar *t, *p, *s, *end;
-
- end = (UChar* )text_end;
- end -= target_end - target - 1;
- if (end > text_range)
- end = text_range;
-
- s = (UChar* )text;
-
- if (enc->max_enc_len == enc->min_enc_len) {
- int n = enc->max_enc_len;
-
- while (s < end) {
- if (*s == *target) {
- p = s + 1;
- t = target + 1;
- if (target_end == t || memcmp(t, p, target_end - t) == 0)
- return s;
- }
- s += n;
- }
- return (UChar*)NULL;
- }
- while (s < end) {
- if (*s == *target) {
- p = s + 1;
- t = target + 1;
- if (target_end == t || memcmp(t, p, target_end - t) == 0)
- return s;
- }
- s += enclen(enc, s, text_end);
- }
-
- return (UChar* )NULL;
-}
-
-static int
-str_lower_case_match(OnigEncoding enc, int case_fold_flag,
- const UChar* t, const UChar* tend,
- const UChar* p, const UChar* end)
-{
- int lowlen;
- UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
-
- while (t < tend) {
- lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
- q = lowbuf;
- while (lowlen > 0) {
- if (*t++ != *q++) return 0;
- lowlen--;
- }
- }
-
- return 1;
-}
-
-static UChar*
-slow_search_ic(OnigEncoding enc, int case_fold_flag,
- UChar* target, UChar* target_end,
- const UChar* text, const UChar* text_end, UChar* text_range)
-{
- UChar *s, *end;
-
- end = (UChar* )text_end;
- end -= target_end - target - 1;
- if (end > text_range)
- end = text_range;
-
- s = (UChar* )text;
-
- while (s < end) {
- if (str_lower_case_match(enc, case_fold_flag, target, target_end,
- s, text_end))
- return s;
-
- s += enclen(enc, s, text_end);
- }
-
- return (UChar* )NULL;
-}
-
-static UChar*
-slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
- const UChar* text, const UChar* adjust_text,
- const UChar* text_end, const UChar* text_start)
-{
- UChar *t, *p, *s;
-
- s = (UChar* )text_end;
- s -= (target_end - target);
- if (s > text_start)
- s = (UChar* )text_start;
- else
- s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
-
- while (s >= text) {
- if (*s == *target) {
- p = s + 1;
- t = target + 1;
- while (t < target_end) {
- if (*t != *p++)
- break;
- t++;
- }
- if (t == target_end)
- return s;
- }
- s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
- }
-
- return (UChar* )NULL;
-}
-
-static UChar*
-slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
- UChar* target, UChar* target_end,
- const UChar* text, const UChar* adjust_text,
- const UChar* text_end, const UChar* text_start)
-{
- UChar *s;
-
- s = (UChar* )text_end;
- s -= (target_end - target);
- if (s > text_start)
- s = (UChar* )text_start;
- else
- s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
-
- while (s >= text) {
- if (str_lower_case_match(enc, case_fold_flag,
- target, target_end, s, text_end))
- return s;
-
- s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
- }
-
- return (UChar* )NULL;
-}
-
-static UChar*
-bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
- const UChar* text, const UChar* text_end,
- const UChar* text_range)
-{
- const UChar *s, *se, *t, *p, *end;
- const UChar *tail;
- int skip, tlen1;
-
-#ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "bm_search_notrev: text: %d, text_end: %d, text_range: %d\n",
- (int )text, (int )text_end, (int )text_range);
-#endif
-
- tail = target_end - 1;
- tlen1 = tail - target;
- end = text_range;
- if (end + tlen1 > text_end)
- end = text_end - tlen1;
-
- s = text;
-
- if (IS_NULL(reg->int_map)) {
- while (s < end) {
- p = se = s + tlen1;
- t = tail;
- while (*p == *t) {
- if (t == target) return (UChar* )s;
- p--; t--;
- }
- skip = reg->map[*se];
- t = s;
- do {
- s += enclen(reg->enc, s, end);
- } while ((s - t) < skip && s < end);
- }
- }
- else {
- while (s < end) {
- p = se = s + tlen1;
- t = tail;
- while (*p == *t) {
- if (t == target) return (UChar* )s;
- p--; t--;
- }
- skip = reg->int_map[*se];
- t = s;
- do {
- s += enclen(reg->enc, s, end);
- } while ((s - t) < skip && s < end);
- }
- }
-
- return (UChar* )NULL;
-}
-
-static UChar*
-bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
- const UChar* text, const UChar* text_end, const UChar* text_range)
-{
- const UChar *s, *t, *p, *end;
- const UChar *tail;
-
- end = text_range + (target_end - target) - 1;
- if (end > text_end)
- end = text_end;
-
- tail = target_end - 1;
- s = text + (target_end - target) - 1;
- if (IS_NULL(reg->int_map)) {
- while (s < end) {
- p = s;
- t = tail;
- while (*p == *t) {
- if (t == target) return (UChar* )p;
- p--; t--;
- }
- s += reg->map[*s];
- }
- }
- else { /* see int_map[] */
- while (s < end) {
- p = s;
- t = tail;
- while (*p == *t) {
- if (t == target) return (UChar* )p;
- p--; t--;
- }
- s += reg->int_map[*s];
- }
- }
- return (UChar* )NULL;
-}
-
-static int
-set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
- int** skip)
-{
- int i, len;
-
- if (IS_NULL(*skip)) {
- *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
- if (IS_NULL(*skip)) return ONIGERR_MEMORY;
- }
-
- len = end - s;
- for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
- (*skip)[i] = len;
-
- for (i = len - 1; i > 0; i--)
- (*skip)[s[i]] = i;
-
- return 0;
-}
-
-static UChar*
-bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end,
- const UChar* text, const UChar* adjust_text,
- const UChar* text_end, const UChar* text_start)
-{
- const UChar *s, *t, *p;
-
- s = text_end - (target_end - target);
- if (text_start < s)
- s = text_start;
- else
- s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
-
- while (s >= text) {
- p = s;
- t = target;
- while (t < target_end && *p == *t) {
- p++; t++;
- }
- if (t == target_end)
- return (UChar* )s;
-
- s -= reg->int_map_backward[*s];
- s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
- }
-
- return (UChar* )NULL;
-}
-
-static UChar*
-map_search(OnigEncoding enc, UChar map[],
- const UChar* text, const UChar* text_range, const UChar* text_end)
-{
- const UChar *s = text;
-
- while (s < text_range) {
- if (map[*s]) return (UChar* )s;
-
- s += enclen(enc, s, text_end);
- }
- return (UChar* )NULL;
-}
-
-static UChar*
-map_search_backward(OnigEncoding enc, UChar map[],
- const UChar* text, const UChar* adjust_text,
- const UChar* text_start, const UChar* text_end)
-{
- const UChar *s = text_start;
-
- while (s >= text) {
- if (map[*s]) return (UChar* )s;
-
- s = onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
- }
- return (UChar* )NULL;
-}
-
-extern long
-onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region,
- OnigOptionType option)
-{
- long r;
- UChar *prev;
- OnigMatchArg msa;
-
-#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
- start:
- THREAD_ATOMIC_START;
- if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
- ONIG_STATE_INC(reg);
- if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
- onig_chain_reduce(reg);
- ONIG_STATE_INC(reg);
- }
- }
- else {
- int n;
-
- THREAD_ATOMIC_END;
- n = 0;
- while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
- if (++n > THREAD_PASS_LIMIT_COUNT)
- return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
- THREAD_PASS;
- }
- goto start;
- }
- THREAD_ATOMIC_END;
-#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
-
- MATCH_ARG_INIT(msa, option, region, at);
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- {
- int offset = at - str;
- STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
- }
-#endif
-
- if (region
- ) {
- r = onig_region_resize_clear(region, reg->num_mem + 1);
- }
- else
- r = 0;
-
- if (r == 0) {
- prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at, end);
- r = match_at(reg, str, end,
-#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
- end,
-#endif
- at, prev, &msa);
- }
-
- MATCH_ARG_FREE(msa);
- ONIG_STATE_DEC_THREAD(reg);
- return r;
-}
-
-static int
-forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
- UChar* range, UChar** low, UChar** high, UChar** low_prev)
-{
- UChar *p, *pprev = (UChar* )NULL;
-
-#ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "forward_search_range: str: %d, end: %d, s: %d, range: %d\n",
- (int )str, (int )end, (int )s, (int )range);
-#endif
-
- p = s;
- if (reg->dmin > 0) {
- if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
- p += reg->dmin;
- }
- else {
- UChar *q = p + reg->dmin;
- while (p < q) p += enclen(reg->enc, p, end);
- }
- }
-
- retry:
- switch (reg->optimize) {
- case ONIG_OPTIMIZE_EXACT:
- p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
- break;
- case ONIG_OPTIMIZE_EXACT_IC:
- p = slow_search_ic(reg->enc, reg->case_fold_flag,
- reg->exact, reg->exact_end, p, end, range);
- break;
-
- case ONIG_OPTIMIZE_EXACT_BM:
- p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
- break;
-
- case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
- p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
- break;
-
- case ONIG_OPTIMIZE_MAP:
- p = map_search(reg->enc, reg->map, p, range, end);
- break;
- }
-
- if (p && p < range) {
- if (p - reg->dmin < s) {
- retry_gate:
- pprev = p;
- p += enclen(reg->enc, p, end);
- goto retry;
- }
-
- if (reg->sub_anchor) {
- UChar* prev;
-
- switch (reg->sub_anchor) {
- case ANCHOR_BEGIN_LINE:
- if (!ON_STR_BEGIN(p)) {
- prev = onigenc_get_prev_char_head(reg->enc,
- (pprev ? pprev : str), p, end);
- if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
- goto retry_gate;
- }
- break;
-
- case ANCHOR_END_LINE:
- if (ON_STR_END(p)) {
-#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
- prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
- (pprev ? pprev : str), p);
- if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
- goto retry_gate;
-#endif
- }
- else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
-#ifdef USE_CRNL_AS_LINE_TERMINATOR
- && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
-#endif
- )
- goto retry_gate;
- break;
- }
- }
-
- if (reg->dmax == 0) {
- *low = p;
- if (low_prev) {
- if (*low > s)
- *low_prev = onigenc_get_prev_char_head(reg->enc, s, p, end);
- else
- *low_prev = onigenc_get_prev_char_head(reg->enc,
- (pprev ? pprev : str), p, end);
- }
- }
- else {
- if (reg->dmax != ONIG_INFINITE_DISTANCE) {
- *low = p - reg->dmax;
- if (*low > s) {
- *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
- *low, end, (const UChar** )low_prev);
- if (low_prev && IS_NULL(*low_prev))
- *low_prev = onigenc_get_prev_char_head(reg->enc,
- (pprev ? pprev : s), *low, end);
- }
- else {
- if (low_prev)
- *low_prev = onigenc_get_prev_char_head(reg->enc,
- (pprev ? pprev : str), *low, end);
- }
- }
- }
- /* no needs to adjust *high, *high is used as range check only */
- *high = p - reg->dmin;
-
-#ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr,
- "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n",
- (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax);
-#endif
- return 1; /* success */
- }
-
- return 0; /* fail */
-}
-
-#define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
-
-static long
-backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
- UChar* s, const UChar* range, UChar* adjrange,
- UChar** low, UChar** high)
-{
- int r;
- UChar *p;
-
- range += reg->dmin;
- p = s;
-
- retry:
- switch (reg->optimize) {
- case ONIG_OPTIMIZE_EXACT:
- exact_method:
- p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
- range, adjrange, end, p);
- break;
-
- case ONIG_OPTIMIZE_EXACT_IC:
- p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
- reg->exact, reg->exact_end,
- range, adjrange, end, p);
- break;
-
- case ONIG_OPTIMIZE_EXACT_BM:
- case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
- if (IS_NULL(reg->int_map_backward)) {
- if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
- goto exact_method;
-
- r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
- &(reg->int_map_backward));
- if (r) return r;
- }
- p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
- end, p);
- break;
-
- case ONIG_OPTIMIZE_MAP:
- p = map_search_backward(reg->enc, reg->map, range, adjrange, p, end);
- break;
- }
-
- if (p) {
- if (reg->sub_anchor) {
- UChar* prev;
-
- switch (reg->sub_anchor) {
- case ANCHOR_BEGIN_LINE:
- if (!ON_STR_BEGIN(p)) {
- prev = onigenc_get_prev_char_head(reg->enc, str, p, end);
- if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
- p = prev;
- goto retry;
- }
- }
- break;
-
- case ANCHOR_END_LINE:
- if (ON_STR_END(p)) {
-#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
- prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
- if (IS_NULL(prev)) goto fail;
- if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
- p = prev;
- goto retry;
- }
-#endif
- }
- else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
-#ifdef USE_CRNL_AS_LINE_TERMINATOR
- && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
-#endif
- ) {
- p = onigenc_get_prev_char_head(reg->enc, adjrange, p, end);
- if (IS_NULL(p)) goto fail;
- goto retry;
- }
- break;
- }
- }
-
- /* no needs to adjust *high, *high is used as range check only */
- if (reg->dmax != ONIG_INFINITE_DISTANCE) {
- *low = p - reg->dmax;
- *high = p - reg->dmin;
- *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high, end);
- }
-
-#ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "backward_search_range: low: %d, high: %d\n",
- (int )(*low - str), (int )(*high - str));
-#endif
- return 1; /* success */
- }
-
- fail:
-#ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "backward_search_range: fail.\n");
-#endif
- return 0; /* fail */
-}
-
-
-extern long
-onig_search(regex_t* reg, const UChar* str, const UChar* end,
- const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
-{
- int r;
- UChar *s, *prev;
- OnigMatchArg msa;
- const UChar *orig_start = start;
-#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
- const UChar *orig_range = range;
-#endif
-
-#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
- start:
- THREAD_ATOMIC_START;
- if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
- ONIG_STATE_INC(reg);
- if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
- onig_chain_reduce(reg);
- ONIG_STATE_INC(reg);
- }
- }
- else {
- int n;
-
- THREAD_ATOMIC_END;
- n = 0;
- while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
- if (++n > THREAD_PASS_LIMIT_COUNT)
- return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
- THREAD_PASS;
- }
- goto start;
- }
- THREAD_ATOMIC_END;
-#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
-
-#ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr,
- "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n",
- (int )str, (int )(end - str), (int )(start - str), (int )(range - str));
-#endif
-
- if (region
- ) {
- r = onig_region_resize_clear(region, reg->num_mem + 1);
- if (r) goto finish_no_msa;
- }
-
- if (start > end || start < str) goto mismatch_no_msa;
-
-
-#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
-#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
-#define MATCH_AND_RETURN_CHECK(upper_range) \
- r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
- if (r != ONIG_MISMATCH) {\
- if (r >= 0) {\
- if (! IS_FIND_LONGEST(reg->options)) {\
- goto match;\
- }\
- }\
- else goto finish; /* error */ \
- }
-#else
-#define MATCH_AND_RETURN_CHECK(upper_range) \
- r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
- if (r != ONIG_MISMATCH) {\
- if (r >= 0) {\
- goto match;\
- }\
- else goto finish; /* error */ \
- }
-#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
-#else
-#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
-#define MATCH_AND_RETURN_CHECK(none) \
- r = match_at(reg, str, end, s, prev, &msa);\
- if (r != ONIG_MISMATCH) {\
- if (r >= 0) {\
- if (! IS_FIND_LONGEST(reg->options)) {\
- goto match;\
- }\
- }\
- else goto finish; /* error */ \
- }
-#else
-#define MATCH_AND_RETURN_CHECK(none) \
- r = match_at(reg, str, end, s, prev, &msa);\
- if (r != ONIG_MISMATCH) {\
- if (r >= 0) {\
- goto match;\
- }\
- else goto finish; /* error */ \
- }
-#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
-#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
-
-
- /* anchor optimize: resume search range */
- if (reg->anchor != 0 && str < end) {
- UChar *min_semi_end, *max_semi_end;
-
- if (reg->anchor & ANCHOR_BEGIN_POSITION) {
- /* search start-position only */
- begin_position:
- if (range > start)
- range = start + 1;
- else
- range = start;
- }
- else if (reg->anchor & ANCHOR_BEGIN_BUF) {
- /* search str-position only */
- if (range > start) {
- if (start != str) goto mismatch_no_msa;
- range = str + 1;
- }
- else {
- if (range <= str) {
- start = str;
- range = str;
- }
- else
- goto mismatch_no_msa;
- }
- }
- else if (reg->anchor & ANCHOR_END_BUF) {
- min_semi_end = max_semi_end = (UChar* )end;
-
- end_buf:
- if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin)
- goto mismatch_no_msa;
-
- if (range > start) {
- if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) {
- start = min_semi_end - reg->anchor_dmax;
- if (start < end)
- start = onigenc_get_right_adjust_char_head(reg->enc, str, start, end);
- else { /* match with empty at end */
- start = onigenc_get_prev_char_head(reg->enc, str, end, end);
- }
- }
- if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
- range = max_semi_end - reg->anchor_dmin + 1;
- }
-
- if (start >= range) goto mismatch_no_msa;
- }
- else {
- if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) {
- range = min_semi_end - reg->anchor_dmax;
- }
- if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) {
- start = max_semi_end - reg->anchor_dmin;
- start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start, end);
- }
- if (range > start) goto mismatch_no_msa;
- }
- }
- else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
- UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, end, 1);
-
- max_semi_end = (UChar* )end;
- if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
- min_semi_end = pre_end;
-
-#ifdef USE_CRNL_AS_LINE_TERMINATOR
- pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, end, 1);
- if (IS_NOT_NULL(pre_end) &&
- ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
- min_semi_end = pre_end;
- }
-#endif
- if (min_semi_end > str && start <= min_semi_end) {
- goto end_buf;
- }
- }
- else {
- min_semi_end = (UChar* )end;
- goto end_buf;
- }
- }
- else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
- goto begin_position;
- }
- }
- else if (str == end) { /* empty string */
- static const UChar address_for_empty_string[] = "";
-
-#ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "onig_search: empty string.\n");
-#endif
-
- if (reg->threshold_len == 0) {
- start = end = str = address_for_empty_string;
- s = (UChar* )start;
- prev = (UChar* )NULL;
-
- MATCH_ARG_INIT(msa, option, region, start);
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- msa.state_check_buff = (void* )0;
- msa.state_check_buff_size = 0; /* NO NEED, for valgrind */
-#endif
- MATCH_AND_RETURN_CHECK(end);
- goto mismatch;
- }
- goto mismatch_no_msa;
- }
-
-#ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
- (int )(end - str), (int )(start - str), (int )(range - str));
-#endif
-
- MATCH_ARG_INIT(msa, option, region, orig_start);
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- {
- int offset = (MIN(start, range) - str);
- STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
- }
-#endif
-
- s = (UChar* )start;
- if (range > start) { /* forward search */
- if (s > str)
- prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
- else
- prev = (UChar* )NULL;
-
- if (reg->optimize != ONIG_OPTIMIZE_NONE) {
- UChar *sch_range, *low, *high, *low_prev;
-
- sch_range = (UChar* )range;
- if (reg->dmax != 0) {
- if (reg->dmax == ONIG_INFINITE_DISTANCE)
- sch_range = (UChar* )end;
- else {
- sch_range += reg->dmax;
- if (sch_range > end) sch_range = (UChar* )end;
- }
- }
-
- if ((end - start) < reg->threshold_len)
- goto mismatch;
-
- if (reg->dmax != ONIG_INFINITE_DISTANCE) {
- do {
- if (! forward_search_range(reg, str, end, s, sch_range,
- &low, &high, &low_prev)) goto mismatch;
- if (s < low) {
- s = low;
- prev = low_prev;
- }
- while (s <= high) {
- MATCH_AND_RETURN_CHECK(orig_range);
- prev = s;
- s += enclen(reg->enc, s, end);
- }
- } while (s < range);
- goto mismatch;
- }
- else { /* check only. */
- if (! forward_search_range(reg, str, end, s, sch_range,
- &low, &high, (UChar** )NULL)) goto mismatch;
-
- if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
- do {
- MATCH_AND_RETURN_CHECK(orig_range);
- prev = s;
- s += enclen(reg->enc, s, end);
- } while (s < range);
- goto mismatch;
- }
- }
- }
-
- do {
- MATCH_AND_RETURN_CHECK(orig_range);
- prev = s;
- s += enclen(reg->enc, s, end);
- } while (s < range);
-
- if (s == range) { /* because empty match with /$/. */
- MATCH_AND_RETURN_CHECK(orig_range);
- }
- }
- else { /* backward search */
-#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
- if (orig_start < end)
- orig_start += enclen(reg->enc, orig_start, end); /* is upper range */
-#endif
-
- if (reg->optimize != ONIG_OPTIMIZE_NONE) {
- UChar *low, *high, *adjrange, *sch_start;
-
- if (range < end)
- adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range, end);
- else
- adjrange = (UChar* )end;
-
- if (reg->dmax != ONIG_INFINITE_DISTANCE &&
- (end - range) >= reg->threshold_len) {
- do {
- sch_start = s + reg->dmax;
- if (sch_start > end) sch_start = (UChar* )end;
- if (backward_search_range(reg, str, end, sch_start, range, adjrange,
- &low, &high) <= 0)
- goto mismatch;
-
- if (s > high)
- s = high;
-
- while (s >= low) {
- prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
- MATCH_AND_RETURN_CHECK(orig_start);
- s = prev;
- }
- } while (s >= range);
- goto mismatch;
- }
- else { /* check only. */
- if ((end - range) < reg->threshold_len) goto mismatch;
-
- sch_start = s;
- if (reg->dmax != 0) {
- if (reg->dmax == ONIG_INFINITE_DISTANCE)
- sch_start = (UChar* )end;
- else {
- sch_start += reg->dmax;
- if (sch_start > end) sch_start = (UChar* )end;
- else
- sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
- start, sch_start, end);
- }
- }
- if (backward_search_range(reg, str, end, sch_start, range, adjrange,
- &low, &high) <= 0) goto mismatch;
- }
- }
-
- do {
- prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
- MATCH_AND_RETURN_CHECK(orig_start);
- s = prev;
- } while (s >= range);
- }
-
- mismatch:
-#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
- if (IS_FIND_LONGEST(reg->options)) {
- if (msa.best_len >= 0) {
- s = msa.best_s;
- goto match;
- }
- }
-#endif
- r = ONIG_MISMATCH;
-
- finish:
- MATCH_ARG_FREE(msa);
- ONIG_STATE_DEC_THREAD(reg);
-
- /* If result is mismatch and no FIND_NOT_EMPTY option,
- then the region is not setted in match_at(). */
- if (IS_FIND_NOT_EMPTY(reg->options) && region
- ) {
- onig_region_clear(region);
- }
-
-#ifdef ONIG_DEBUG
- if (r != ONIG_MISMATCH)
- fprintf(stderr, "onig_search: error %d\n", r);
-#endif
- return r;
-
- mismatch_no_msa:
- r = ONIG_MISMATCH;
- finish_no_msa:
- ONIG_STATE_DEC_THREAD(reg);
-#ifdef ONIG_DEBUG
- if (r != ONIG_MISMATCH)
- fprintf(stderr, "onig_search: error %d\n", r);
-#endif
- return r;
-
- match:
- ONIG_STATE_DEC_THREAD(reg);
- MATCH_ARG_FREE(msa);
- return s - str;
-}
-
-extern OnigEncoding
-onig_get_encoding(regex_t* reg)
-{
- return reg->enc;
-}
-
-extern OnigOptionType
-onig_get_options(regex_t* reg)
-{
- return reg->options;
-}
-
-extern OnigCaseFoldType
-onig_get_case_fold_flag(regex_t* reg)
-{
- return reg->case_fold_flag;
-}
-
-extern const OnigSyntaxType*
-onig_get_syntax(regex_t* reg)
-{
- return reg->syntax;
-}
-
-extern int
-onig_number_of_captures(regex_t* reg)
-{
- return reg->num_mem;
-}
-
-extern int
-onig_number_of_capture_histories(regex_t* reg)
-{
-#ifdef USE_CAPTURE_HISTORY
- int i, n;
-
- n = 0;
- for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
- if (BIT_STATUS_AT(reg->capture_history, i) != 0)
- n++;
- }
- return n;
-#else
- return 0;
-#endif
-}
-
-extern void
-onig_copy_encoding(OnigEncoding to, OnigEncoding from)
-{
- *to = *from;
-}
-#endif //ENABLE_REGEXP
diff --git a/src/regint.h b/src/regint.h
deleted file mode 100644
index aa4871594..000000000
--- a/src/regint.h
+++ /dev/null
@@ -1,838 +0,0 @@
-#ifndef ONIGURUMA_REGINT_H
-#define ONIGURUMA_REGINT_H
-/**********************************************************************
- regint.h - Oniguruma (regular expression library)
-**********************************************************************/
-/*-
- * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/* for debug */
-/* #define ONIG_DEBUG_PARSE_TREE */
-/* #define ONIG_DEBUG_COMPILE */
-/* #define ONIG_DEBUG_SEARCH */
-/* #define ONIG_DEBUG_MATCH */
-/* #define ONIG_DONT_OPTIMIZE */
-
-/* for byte-code statistical data. */
-/* #define ONIG_DEBUG_STATISTICS */
-
-#ifndef RUBY
-#define RUBY
-#endif
-
-#include <stddef.h> //typedef unsigned int ptrdiff_t;
-
-#if defined(ONIG_DEBUG_PARSE_TREE) || defined(ONIG_DEBUG_MATCH) || \
- defined(ONIG_DEBUG_SEARCH) || defined(ONIG_DEBUG_COMPILE) || \
- defined(ONIG_DEBUG_STATISTICS)
-#ifndef ONIG_DEBUG
-#define ONIG_DEBUG
-#endif
-#endif
-
-#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
- (defined(__ppc__) && defined(__APPLE__)) || \
- defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD86) || \
- defined(__mc68020__)
-#define PLATFORM_UNALIGNED_WORD_ACCESS
-#endif
-
-/* config */
-/* spec. config */
-#define USE_NAMED_GROUP
-#define USE_SUBEXP_CALL
-#define USE_BACKREF_WITH_LEVEL /* \k<name+n>, \k<name-n> */
-#define USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT /* /(?:()|())*\2/ */
-#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */
-#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
-/* #define USE_RECOMPILE_API */
-/* !!! moved to regenc.h. */ /* #define USE_CRNL_AS_LINE_TERMINATOR */
-
-/* internal config */
-#define USE_PARSE_TREE_NODE_RECYCLE
-#define USE_OP_PUSH_OR_JUMP_EXACT
-#define USE_QTFR_PEEK_NEXT
-#define USE_ST_LIBRARY
-#define USE_SHARED_CCLASS_TABLE
-
-#define INIT_MATCH_STACK_SIZE 160
-#define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */
-
-#if defined(__GNUC__)
-# define ARG_UNUSED __attribute__ ((unused))
-#else
-# define ARG_UNUSED
-#endif
-
-/* */
-/* escape other system UChar definition */
-#ifndef RUBY_DEFINES_H
-#include "mruby.h"
-#endif
-#ifdef ONIG_ESCAPE_UCHAR_COLLISION
-#undef ONIG_ESCAPE_UCHAR_COLLISION
-#endif
-#undef USE_MATCH_RANGE_IS_COMPLETE_RANGE
-#undef USE_CAPTURE_HISTORY
-#define USE_VARIABLE_META_CHARS
-#define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */
-#define USE_POSIX_REGION_OPTION /* needed for POSIX API support */
-#define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
-/* #define USE_COMBINATION_EXPLOSION_CHECK */ /* (X*)* */
-/* #define USE_MULTI_THREAD_SYSTEM */
-#define THREAD_SYSTEM_INIT /* depend on thread system */
-#define THREAD_SYSTEM_END /* depend on thread system */
-#define THREAD_ATOMIC_START /* depend on thread system */
-#define THREAD_ATOMIC_END /* depend on thread system */
-#define THREAD_PASS /* depend on thread system */
-
-#ifdef RUBY
-
-//#define CHECK_INTERRUPT_IN_MATCH_AT mrb_thread_check_ints()
-#define CHECK_INTERRUPT_IN_MATCH_AT
-#define onig_st_init_table st_init_table
-#define onig_st_init_table_with_size st_init_table_with_size
-#define onig_st_init_numtable st_init_numtable
-#define onig_st_init_numtable_with_size st_init_numtable_with_size
-#define onig_st_init_strtable st_init_strtable
-#define onig_st_init_strtable_with_size st_init_strtable_with_size
-#define onig_st_delete st_delete
-#define onig_st_delete_safe st_delete_safe
-#define onig_st_insert st_insert
-#define onig_st_lookup st_lookup
-#define onig_st_foreach st_foreach
-#define onig_st_add_direct st_add_direct
-#define onig_st_free_table st_free_table
-#define onig_st_cleanup_safe st_cleanup_safe
-#define onig_st_copy st_copy
-#define onig_st_nothing_key_clone st_nothing_key_clone
-#define onig_st_nothing_key_free st_nothing_key_free
-#define onig_st_is_member st_is_member
-
-#define USE_UPPER_CASE_TABLE
-#else
-
-#define st_init_table onig_st_init_table
-#define st_init_table_with_size onig_st_init_table_with_size
-#define st_init_numtable onig_st_init_numtable
-#define st_init_numtable_with_size onig_st_init_numtable_with_size
-#define st_init_strtable onig_st_init_strtable
-#define st_init_strtable_with_size onig_st_init_strtable_with_size
-#define st_delete onig_st_delete
-#define st_delete_safe onig_st_delete_safe
-#define st_insert onig_st_insert
-#define st_lookup onig_st_lookup
-#define st_foreach onig_st_foreach
-#define st_add_direct onig_st_add_direct
-#define st_free_table onig_st_free_table
-#define st_cleanup_safe onig_st_cleanup_safe
-#define st_copy onig_st_copy
-#define st_nothing_key_clone onig_st_nothing_key_clone
-#define st_nothing_key_free onig_st_nothing_key_free
-/* */
-#define onig_st_is_member st_is_member
-
-#define CHECK_INTERRUPT_IN_MATCH_AT
-
-#endif
-
-#define STATE_CHECK_STRING_THRESHOLD_LEN 7
-#define STATE_CHECK_BUFF_MAX_SIZE 0x4000
-
-#define THREAD_PASS_LIMIT_COUNT 8
-#define xmemset memset
-#define xmemcpy memcpy
-#define xmemmove memmove
-
-#if defined(_WIN32) && !defined(__GNUC__)
-#define xalloca _alloca
-#define xvsnprintf _vsnprintf
-#else
-#define xalloca malloc
-#define xvsnprintf vsnprintf
-#endif
-
-
-#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
-#define ONIG_STATE_INC(reg) (reg)->state++
-#define ONIG_STATE_DEC(reg) (reg)->state--
-
-#define ONIG_STATE_INC_THREAD(reg) do {\
- THREAD_ATOMIC_START;\
- (reg)->state++;\
- THREAD_ATOMIC_END;\
-} while(0)
-#define ONIG_STATE_DEC_THREAD(reg) do {\
- THREAD_ATOMIC_START;\
- (reg)->state--;\
- THREAD_ATOMIC_END;\
-} while(0)
-#else
-#define ONIG_STATE_INC(reg) /* Nothing */
-#define ONIG_STATE_DEC(reg) /* Nothing */
-#define ONIG_STATE_INC_THREAD(reg) /* Nothing */
-#define ONIG_STATE_DEC_THREAD(reg) /* Nothing */
-#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
-
-#ifdef HAVE_STDLIB_H
-#include <stdlib.h>
-#endif
-
-#if defined(HAVE_ALLOCA_H) && (defined(_AIX) || !defined(__GNUC__))
-#include <alloca.h>
-#endif
-
-#ifdef HAVE_STRING_H
-# include <string.h>
-#else
-# include <strings.h>
-#endif
-
-#include <ctype.h>
-#ifdef HAVE_SYS_TYPES_H
-#include <sys/types.h>
-#endif
-
-#ifdef ONIG_DEBUG
-# include <stdio.h>
-#endif
-
-#include "regenc.h"
-
-#ifndef MIN
-#define MIN(a,b) (((a)>(b))?(b):(a))
-#endif
-
-#ifndef MAX
-#define MAX(a,b) (((a)<(b))?(b):(a))
-#endif
-
-#define IS_NULL(p) (((void*)(p)) == (void*)0)
-#define IS_NOT_NULL(p) (((void*)(p)) != (void*)0)
-#define CHECK_NULL_RETURN(p) if (IS_NULL(p)) return NULL
-#define CHECK_NULL_RETURN_MEMERR(p) if (IS_NULL(p)) return ONIGERR_MEMORY
-#define NULL_UCHARP ((UChar* )0)
-
-#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
-
-#define PLATFORM_GET_INC(val,p,type) do{\
- val = *(type* )p;\
- (p) += sizeof(type);\
-} while(0)
-
-#else
-
-#define PLATFORM_GET_INC(val,p,type) do{\
- xmemcpy(&val, (p), sizeof(type));\
- (p) += sizeof(type);\
-} while(0)
-
-/* sizeof(OnigCodePoint) */
-#define WORD_ALIGNMENT_SIZE sizeof(uintptr_t)
-
-#define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\
- (pad_size) = WORD_ALIGNMENT_SIZE \
- - ((uintptr_t)(addr) % WORD_ALIGNMENT_SIZE);\
- if ((pad_size) == WORD_ALIGNMENT_SIZE) (pad_size) = 0;\
-} while (0)
-
-#define ALIGNMENT_RIGHT(addr) do {\
- (addr) += (WORD_ALIGNMENT_SIZE - 1);\
- (addr) -= ((uintptr_t )(addr) % WORD_ALIGNMENT_SIZE);\
-} while (0)
-
-#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */
-
-/* stack pop level */
-#define STACK_POP_LEVEL_FREE 0
-#define STACK_POP_LEVEL_MEM_START 1
-#define STACK_POP_LEVEL_ALL 2
-
-/* optimize flags */
-#define ONIG_OPTIMIZE_NONE 0
-#define ONIG_OPTIMIZE_EXACT 1 /* Slow Search */
-#define ONIG_OPTIMIZE_EXACT_BM 2 /* Boyer Moore Search */
-#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV 3 /* BM (but not simple match) */
-#define ONIG_OPTIMIZE_EXACT_IC 4 /* Slow Search (ignore case) */
-#define ONIG_OPTIMIZE_MAP 5 /* char map */
-
-/* bit status */
-typedef unsigned int BitStatusType;
-
-#define BIT_STATUS_BITS_NUM (sizeof(BitStatusType) * 8)
-#define BIT_STATUS_CLEAR(stats) (stats) = 0
-#define BIT_STATUS_ON_ALL(stats) (stats) = ~((BitStatusType )0)
-#define BIT_STATUS_AT(stats,n) \
- ((n) < (int )BIT_STATUS_BITS_NUM ? ((stats) & (1 << n)) : ((stats) & 1))
-
-#define BIT_STATUS_ON_AT(stats,n) do {\
- if ((n) < (int )BIT_STATUS_BITS_NUM) \
- (stats) |= (1 << (n));\
- else\
- (stats) |= 1;\
-} while (0)
-
-#define BIT_STATUS_ON_AT_SIMPLE(stats,n) do {\
- if ((n) < (int )BIT_STATUS_BITS_NUM)\
- (stats) |= (1 << (n));\
-} while (0)
-
-
-#define INT_MAX_LIMIT ((1UL << (sizeof(int) * 8 - 1)) - 1)
-
-#define DIGITVAL(code) ((code) - '0')
-#define ODIGITVAL(code) DIGITVAL(code)
-#define XDIGITVAL(enc,code) \
- (ONIGENC_IS_CODE_DIGIT(enc,code) ? DIGITVAL(code) \
- : (ONIGENC_IS_CODE_UPPER(enc,code) ? (code) - 'A' + 10 : (code) - 'a' + 10))
-
-#define IS_SINGLELINE(option) ((option) & ONIG_OPTION_SINGLELINE)
-#define IS_MULTILINE(option) ((option) & ONIG_OPTION_MULTILINE)
-#define IS_IGNORECASE(option) ((option) & ONIG_OPTION_IGNORECASE)
-#define IS_EXTEND(option) ((option) & ONIG_OPTION_EXTEND)
-#define IS_FIND_LONGEST(option) ((option) & ONIG_OPTION_FIND_LONGEST)
-#define IS_FIND_NOT_EMPTY(option) ((option) & ONIG_OPTION_FIND_NOT_EMPTY)
-#define IS_FIND_CONDITION(option) ((option) & \
- (ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY))
-#define IS_NOTBOL(option) ((option) & ONIG_OPTION_NOTBOL)
-#define IS_NOTEOL(option) ((option) & ONIG_OPTION_NOTEOL)
-#define IS_POSIX_REGION(option) ((option) & ONIG_OPTION_POSIX_REGION)
-
-/* OP_SET_OPTION is required for these options.
-#define IS_DYNAMIC_OPTION(option) \
- (((option) & (ONIG_OPTION_MULTILINE | ONIG_OPTION_IGNORECASE)) != 0)
-*/
-/* ignore-case and multibyte status are included in compiled code. */
-#define IS_DYNAMIC_OPTION(option) 0
-
-#define DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag) \
- ((case_fold_flag) & ~INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR)
-
-#define REPEAT_INFINITE -1
-#define IS_REPEAT_INFINITE(n) ((n) == REPEAT_INFINITE)
-
-/* bitset */
-#define BITS_PER_BYTE 8
-#define SINGLE_BYTE_SIZE (1 << BITS_PER_BYTE)
-#define BITS_IN_ROOM (sizeof(Bits) * BITS_PER_BYTE)
-#define BITSET_SIZE (SINGLE_BYTE_SIZE / BITS_IN_ROOM)
-
-#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
-typedef unsigned int Bits;
-#else
-typedef unsigned char Bits;
-#endif
-typedef Bits BitSet[BITSET_SIZE];
-typedef Bits* BitSetRef;
-
-#define SIZE_BITSET (int)sizeof(BitSet)
-
-#define BITSET_CLEAR(bs) do {\
- int i;\
- for (i = 0; i < (int )BITSET_SIZE; i++) { (bs)[i] = 0; } \
-} while (0)
-
-#define BS_ROOM(bs,pos) (bs)[pos / BITS_IN_ROOM]
-#define BS_BIT(pos) (1 << (pos % BITS_IN_ROOM))
-
-#define BITSET_AT(bs, pos) (BS_ROOM(bs,pos) & BS_BIT(pos))
-#define BITSET_SET_BIT(bs, pos) BS_ROOM(bs,pos) |= BS_BIT(pos)
-#define BITSET_CLEAR_BIT(bs, pos) BS_ROOM(bs,pos) &= ~(BS_BIT(pos))
-#define BITSET_INVERT_BIT(bs, pos) BS_ROOM(bs,pos) ^= BS_BIT(pos)
-
-/* bytes buffer */
-typedef struct _BBuf {
- UChar* p;
- unsigned int used;
- unsigned int alloc;
-} BBuf;
-
-#define BBUF_INIT(buf,size) onig_bbuf_init((BBuf* )(buf), (size))
-
-#define BBUF_SIZE_INC(buf,inc) do{\
- (buf)->alloc += (inc);\
- (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\
- if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
-} while (0)
-
-#define BBUF_EXPAND(buf,low) do{\
- do { (buf)->alloc *= 2; } while ((buf)->alloc < (unsigned int )low);\
- (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\
- if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
-} while (0)
-
-#define BBUF_ENSURE_SIZE(buf,size) do{\
- unsigned int new_alloc = (buf)->alloc;\
- while (new_alloc < (unsigned int )(size)) { new_alloc *= 2; }\
- if ((buf)->alloc != new_alloc) {\
- (buf)->p = (UChar* )xrealloc((buf)->p, new_alloc);\
- if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
- (buf)->alloc = new_alloc;\
- }\
-} while (0)
-
-#define BBUF_WRITE(buf,pos,bytes,n) do{\
- int used = (pos) + (n);\
- if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\
- xmemcpy((buf)->p + (pos), (bytes), (n));\
- if ((buf)->used < (unsigned int )used) (buf)->used = used;\
-} while (0)
-
-#define BBUF_WRITE1(buf,pos,byte) do{\
- int used = (pos) + 1;\
- if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\
- (buf)->p[(pos)] = (byte);\
- if ((buf)->used < (unsigned int )used) (buf)->used = used;\
-} while (0)
-
-#define BBUF_ADD(buf,bytes,n) BBUF_WRITE((buf),(buf)->used,(bytes),(n))
-#define BBUF_ADD1(buf,byte) BBUF_WRITE1((buf),(buf)->used,(byte))
-#define BBUF_GET_ADD_ADDRESS(buf) ((buf)->p + (buf)->used)
-#define BBUF_GET_OFFSET_POS(buf) ((buf)->used)
-
-/* from < to */
-#define BBUF_MOVE_RIGHT(buf,from,to,n) do {\
- if ((unsigned int )((to)+(n)) > (buf)->alloc) BBUF_EXPAND((buf),(to) + (n));\
- xmemmove((buf)->p + (to), (buf)->p + (from), (n));\
- if ((unsigned int )((to)+(n)) > (buf)->used) (buf)->used = (to) + (n);\
-} while (0)
-
-/* from > to */
-#define BBUF_MOVE_LEFT(buf,from,to,n) do {\
- xmemmove((buf)->p + (to), (buf)->p + (from), (n));\
-} while (0)
-
-/* from > to */
-#define BBUF_MOVE_LEFT_REDUCE(buf,from,to) do {\
- xmemmove((buf)->p + (to), (buf)->p + (from), (buf)->used - (from));\
- (buf)->used -= (from - to);\
-} while (0)
-
-#define BBUF_INSERT(buf,pos,bytes,n) do {\
- if (pos >= (buf)->used) {\
- BBUF_WRITE(buf,pos,bytes,n);\
- }\
- else {\
- BBUF_MOVE_RIGHT((buf),(pos),(pos) + (n),((buf)->used - (pos)));\
- xmemcpy((buf)->p + (pos), (bytes), (n));\
- }\
-} while (0)
-
-#define BBUF_GET_BYTE(buf, pos) (buf)->p[(pos)]
-
-
-#define ANCHOR_BEGIN_BUF (1<<0)
-#define ANCHOR_BEGIN_LINE (1<<1)
-#define ANCHOR_BEGIN_POSITION (1<<2)
-#define ANCHOR_END_BUF (1<<3)
-#define ANCHOR_SEMI_END_BUF (1<<4)
-#define ANCHOR_END_LINE (1<<5)
-
-#define ANCHOR_WORD_BOUND (1<<6)
-#define ANCHOR_NOT_WORD_BOUND (1<<7)
-#define ANCHOR_WORD_BEGIN (1<<8)
-#define ANCHOR_WORD_END (1<<9)
-#define ANCHOR_PREC_READ (1<<10)
-#define ANCHOR_PREC_READ_NOT (1<<11)
-#define ANCHOR_LOOK_BEHIND (1<<12)
-#define ANCHOR_LOOK_BEHIND_NOT (1<<13)
-
-#define ANCHOR_ANYCHAR_STAR (1<<14) /* ".*" optimize info */
-#define ANCHOR_ANYCHAR_STAR_ML (1<<15) /* ".*" optimize info (multi-line) */
-
-/* operation code */
-enum OpCode {
- OP_FINISH = 0, /* matching process terminator (no more alternative) */
- OP_END = 1, /* pattern code terminator (success end) */
-
- OP_EXACT1 = 2, /* single byte, N = 1 */
- OP_EXACT2, /* single byte, N = 2 */
- OP_EXACT3, /* single byte, N = 3 */
- OP_EXACT4, /* single byte, N = 4 */
- OP_EXACT5, /* single byte, N = 5 */
- OP_EXACTN, /* single byte */
- OP_EXACTMB2N1, /* mb-length = 2 N = 1 */
- OP_EXACTMB2N2, /* mb-length = 2 N = 2 */
- OP_EXACTMB2N3, /* mb-length = 2 N = 3 */
- OP_EXACTMB2N, /* mb-length = 2 */
- OP_EXACTMB3N, /* mb-length = 3 */
- OP_EXACTMBN, /* other length */
-
- OP_EXACT1_IC, /* single byte, N = 1, ignore case */
- OP_EXACTN_IC, /* single byte, ignore case */
-
- OP_CCLASS,
- OP_CCLASS_MB,
- OP_CCLASS_MIX,
- OP_CCLASS_NOT,
- OP_CCLASS_MB_NOT,
- OP_CCLASS_MIX_NOT,
- OP_CCLASS_NODE, /* pointer to CClassNode node */
-
- OP_ANYCHAR, /* "." */
- OP_ANYCHAR_ML, /* "." multi-line */
- OP_ANYCHAR_STAR, /* ".*" */
- OP_ANYCHAR_ML_STAR, /* ".*" multi-line */
- OP_ANYCHAR_STAR_PEEK_NEXT,
- OP_ANYCHAR_ML_STAR_PEEK_NEXT,
-
- OP_WORD,
- OP_NOT_WORD,
- OP_WORD_BOUND,
- OP_NOT_WORD_BOUND,
- OP_WORD_BEGIN,
- OP_WORD_END,
-
- OP_BEGIN_BUF,
- OP_END_BUF,
- OP_BEGIN_LINE,
- OP_END_LINE,
- OP_SEMI_END_BUF,
- OP_BEGIN_POSITION,
-
- OP_BACKREF1,
- OP_BACKREF2,
- OP_BACKREFN,
- OP_BACKREFN_IC,
- OP_BACKREF_MULTI,
- OP_BACKREF_MULTI_IC,
- OP_BACKREF_WITH_LEVEL, /* \k<xxx+n>, \k<xxx-n> */
-
- OP_MEMORY_START,
- OP_MEMORY_START_PUSH, /* push back-tracker to stack */
- OP_MEMORY_END_PUSH, /* push back-tracker to stack */
- OP_MEMORY_END_PUSH_REC, /* push back-tracker to stack */
- OP_MEMORY_END,
- OP_MEMORY_END_REC, /* push marker to stack */
-
- OP_FAIL, /* pop stack and move */
- OP_JUMP,
- OP_PUSH,
- OP_POP,
- OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */
- OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */
- OP_REPEAT, /* {n,m} */
- OP_REPEAT_NG, /* {n,m}? (non greedy) */
- OP_REPEAT_INC,
- OP_REPEAT_INC_NG, /* non greedy */
- OP_REPEAT_INC_SG, /* search and get in stack */
- OP_REPEAT_INC_NG_SG, /* search and get in stack (non greedy) */
- OP_NULL_CHECK_START, /* null loop checker start */
- OP_NULL_CHECK_END, /* null loop checker end */
- OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */
- OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */
-
- OP_PUSH_POS, /* (?=...) start */
- OP_POP_POS, /* (?=...) end */
- OP_PUSH_POS_NOT, /* (?!...) start */
- OP_FAIL_POS, /* (?!...) end */
- OP_PUSH_STOP_BT, /* (?>...) start */
- OP_POP_STOP_BT, /* (?>...) end */
- OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */
- OP_PUSH_LOOK_BEHIND_NOT, /* (?<!...) start */
- OP_FAIL_LOOK_BEHIND_NOT, /* (?<!...) end */
-
- OP_CALL, /* \g<name> */
- OP_RETURN,
-
- OP_STATE_CHECK_PUSH, /* combination explosion check and push */
- OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */
- OP_STATE_CHECK, /* check only */
- OP_STATE_CHECK_ANYCHAR_STAR,
- OP_STATE_CHECK_ANYCHAR_ML_STAR,
-
- /* no need: IS_DYNAMIC_OPTION() == 0 */
- OP_SET_OPTION_PUSH, /* set option and push recover option */
- OP_SET_OPTION /* set option */
-};
-
-typedef int RelAddrType;
-typedef int AbsAddrType;
-typedef int LengthType;
-typedef int RepeatNumType;
-typedef short int MemNumType;
-typedef short int StateCheckNumType;
-typedef void* PointerType;
-
-#define SIZE_OPCODE 1
-#define SIZE_RELADDR (int)sizeof(RelAddrType)
-#define SIZE_ABSADDR (int)sizeof(AbsAddrType)
-#define SIZE_LENGTH (int)sizeof(LengthType)
-#define SIZE_MEMNUM (int)sizeof(MemNumType)
-#define SIZE_STATE_CHECK_NUM (int)sizeof(StateCheckNumType)
-#define SIZE_REPEATNUM (int)sizeof(RepeatNumType)
-#define SIZE_OPTION (int)sizeof(OnigOptionType)
-#define SIZE_CODE_POINT (int)sizeof(OnigCodePoint)
-#define SIZE_POINTER (int)sizeof(PointerType)
-
-
-#define GET_RELADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, RelAddrType)
-#define GET_ABSADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, AbsAddrType)
-#define GET_LENGTH_INC(len,p) PLATFORM_GET_INC(len, p, LengthType)
-#define GET_MEMNUM_INC(num,p) PLATFORM_GET_INC(num, p, MemNumType)
-#define GET_REPEATNUM_INC(num,p) PLATFORM_GET_INC(num, p, RepeatNumType)
-#define GET_OPTION_INC(option,p) PLATFORM_GET_INC(option, p, OnigOptionType)
-#define GET_POINTER_INC(ptr,p) PLATFORM_GET_INC(ptr, p, PointerType)
-#define GET_STATE_CHECK_NUM_INC(num,p) PLATFORM_GET_INC(num, p, StateCheckNumType)
-
-/* code point's address must be aligned address. */
-#define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p))
-#define GET_BYTE_INC(byte,p) do{\
- byte = *(p);\
- (p)++;\
-} while(0)
-
-
-/* op-code + arg size */
-#define SIZE_OP_ANYCHAR_STAR SIZE_OPCODE
-#define SIZE_OP_ANYCHAR_STAR_PEEK_NEXT (SIZE_OPCODE + 1)
-#define SIZE_OP_JUMP (SIZE_OPCODE + SIZE_RELADDR)
-#define SIZE_OP_PUSH (SIZE_OPCODE + SIZE_RELADDR)
-#define SIZE_OP_POP SIZE_OPCODE
-#define SIZE_OP_PUSH_OR_JUMP_EXACT1 (SIZE_OPCODE + SIZE_RELADDR + 1)
-#define SIZE_OP_PUSH_IF_PEEK_NEXT (SIZE_OPCODE + SIZE_RELADDR + 1)
-#define SIZE_OP_REPEAT_INC (SIZE_OPCODE + SIZE_MEMNUM)
-#define SIZE_OP_REPEAT_INC_NG (SIZE_OPCODE + SIZE_MEMNUM)
-#define SIZE_OP_PUSH_POS SIZE_OPCODE
-#define SIZE_OP_PUSH_POS_NOT (SIZE_OPCODE + SIZE_RELADDR)
-#define SIZE_OP_POP_POS SIZE_OPCODE
-#define SIZE_OP_FAIL_POS SIZE_OPCODE
-#define SIZE_OP_SET_OPTION (SIZE_OPCODE + SIZE_OPTION)
-#define SIZE_OP_SET_OPTION_PUSH (SIZE_OPCODE + SIZE_OPTION)
-#define SIZE_OP_FAIL SIZE_OPCODE
-#define SIZE_OP_MEMORY_START (SIZE_OPCODE + SIZE_MEMNUM)
-#define SIZE_OP_MEMORY_START_PUSH (SIZE_OPCODE + SIZE_MEMNUM)
-#define SIZE_OP_MEMORY_END_PUSH (SIZE_OPCODE + SIZE_MEMNUM)
-#define SIZE_OP_MEMORY_END_PUSH_REC (SIZE_OPCODE + SIZE_MEMNUM)
-#define SIZE_OP_MEMORY_END (SIZE_OPCODE + SIZE_MEMNUM)
-#define SIZE_OP_MEMORY_END_REC (SIZE_OPCODE + SIZE_MEMNUM)
-#define SIZE_OP_PUSH_STOP_BT SIZE_OPCODE
-#define SIZE_OP_POP_STOP_BT SIZE_OPCODE
-#define SIZE_OP_NULL_CHECK_START (SIZE_OPCODE + SIZE_MEMNUM)
-#define SIZE_OP_NULL_CHECK_END (SIZE_OPCODE + SIZE_MEMNUM)
-#define SIZE_OP_LOOK_BEHIND (SIZE_OPCODE + SIZE_LENGTH)
-#define SIZE_OP_PUSH_LOOK_BEHIND_NOT (SIZE_OPCODE + SIZE_RELADDR + SIZE_LENGTH)
-#define SIZE_OP_FAIL_LOOK_BEHIND_NOT SIZE_OPCODE
-#define SIZE_OP_CALL (SIZE_OPCODE + SIZE_ABSADDR)
-#define SIZE_OP_RETURN SIZE_OPCODE
-
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
-#define SIZE_OP_STATE_CHECK (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
-#define SIZE_OP_STATE_CHECK_PUSH (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR)
-#define SIZE_OP_STATE_CHECK_PUSH_OR_JUMP (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR)
-#define SIZE_OP_STATE_CHECK_ANYCHAR_STAR (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
-#endif
-
-#define MC_ESC(syn) (syn)->meta_char_table.esc
-#define MC_ANYCHAR(syn) (syn)->meta_char_table.anychar
-#define MC_ANYTIME(syn) (syn)->meta_char_table.anytime
-#define MC_ZERO_OR_ONE_TIME(syn) (syn)->meta_char_table.zero_or_one_time
-#define MC_ONE_OR_MORE_TIME(syn) (syn)->meta_char_table.one_or_more_time
-#define MC_ANYCHAR_ANYTIME(syn) (syn)->meta_char_table.anychar_anytime
-
-#define IS_MC_ESC_CODE(code, syn) \
- ((code) == MC_ESC(syn) && \
- !IS_SYNTAX_OP2((syn), ONIG_SYN_OP2_INEFFECTIVE_ESCAPE))
-
-
-#define SYN_POSIX_COMMON_OP \
- ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_POSIX_BRACKET | \
- ONIG_SYN_OP_DECIMAL_BACKREF | \
- ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_ASTERISK_ZERO_INF | \
- ONIG_SYN_OP_LINE_ANCHOR | \
- ONIG_SYN_OP_ESC_CONTROL_CHARS )
-
-#define SYN_GNU_REGEX_OP \
- ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | \
- ONIG_SYN_OP_POSIX_BRACKET | ONIG_SYN_OP_DECIMAL_BACKREF | \
- ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_LPAREN_SUBEXP | \
- ONIG_SYN_OP_VBAR_ALT | \
- ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF | \
- ONIG_SYN_OP_QMARK_ZERO_ONE | \
- ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR | ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR | \
- ONIG_SYN_OP_ESC_W_WORD | \
- ONIG_SYN_OP_ESC_B_WORD_BOUND | ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | \
- ONIG_SYN_OP_ESC_S_WHITE_SPACE | ONIG_SYN_OP_ESC_D_DIGIT | \
- ONIG_SYN_OP_LINE_ANCHOR )
-
-#define SYN_GNU_REGEX_BV \
- ( ONIG_SYN_CONTEXT_INDEP_ANCHORS | ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | \
- ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | ONIG_SYN_ALLOW_INVALID_INTERVAL | \
- ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
-
-
-#define NCCLASS_FLAGS(cc) ((cc)->flags)
-#define NCCLASS_FLAG_SET(cc,flag) (NCCLASS_FLAGS(cc) |= (flag))
-#define NCCLASS_FLAG_CLEAR(cc,flag) (NCCLASS_FLAGS(cc) &= ~(flag))
-#define IS_NCCLASS_FLAG_ON(cc,flag) ((NCCLASS_FLAGS(cc) & (flag)) != 0)
-
-/* cclass node */
-#define FLAG_NCCLASS_NOT (1<<0)
-#define FLAG_NCCLASS_SHARE (1<<1)
-
-#define NCCLASS_SET_NOT(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_NOT)
-#define NCCLASS_SET_SHARE(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_SHARE)
-#define NCCLASS_CLEAR_NOT(nd) NCCLASS_FLAG_CLEAR(nd, FLAG_NCCLASS_NOT)
-#define IS_NCCLASS_NOT(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_NOT)
-#define IS_NCCLASS_SHARE(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_SHARE)
-
-typedef struct {
- int type;
- /* struct _Node* next; */
- /* unsigned int flags; */
-} NodeBase;
-
-typedef struct {
- NodeBase base;
- unsigned int flags;
- BitSet bs;
- BBuf* mbuf; /* multi-byte info or NULL */
-} CClassNode;
-
-typedef intptr_t OnigStackIndex;
-
-typedef struct _OnigStackType {
- unsigned int type;
- union {
- struct {
- UChar *pcode; /* byte code position */
- UChar *pstr; /* string position */
- UChar *pstr_prev; /* previous char position of pstr */
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- unsigned int state_check;
-#endif
- } state;
- struct {
- int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
- UChar *pcode; /* byte code position (head of repeated target) */
- int num; /* repeat id */
- } repeat;
- struct {
- OnigStackIndex si; /* index of stack */
- } repeat_inc;
- struct {
- int num; /* memory num */
- UChar *pstr; /* start/end position */
- /* Following information is setted, if this stack type is MEM-START */
- OnigStackIndex start; /* prev. info (for backtrack "(...)*" ) */
- OnigStackIndex end; /* prev. info (for backtrack "(...)*" ) */
- } mem;
- struct {
- int num; /* null check id */
- UChar *pstr; /* start position */
- } null_check;
-#ifdef USE_SUBEXP_CALL
- struct {
- UChar *ret_addr; /* byte code position */
- int num; /* null check id */
- UChar *pstr; /* string position */
- } call_frame;
-#endif
- } u;
-} OnigStackType;
-
-typedef struct {
- void* stack_p;
- size_t stack_n;
- OnigOptionType options;
- OnigRegion* region;
- const UChar* start; /* search start position (for \G: BEGIN_POSITION) */
-#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
- int best_len; /* for ONIG_OPTION_FIND_LONGEST */
- UChar* best_s;
-#endif
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- void* state_check_buff;
- int state_check_buff_size;
-#endif
-} OnigMatchArg;
-
-
-#define IS_CODE_SB_WORD(enc,code) \
- (ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_WORD(enc,code))
-
-#ifdef ONIG_DEBUG
-
-typedef struct {
- short int opcode;
- char* name;
- short int arg_type;
-} OnigOpInfoType;
-
-extern OnigOpInfoType OnigOpInfo[];
-
-/* extern void onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp, OnigEncoding enc); */
-
-#ifdef ONIG_DEBUG_STATISTICS
-extern void onig_statistics_init(void);
-extern void onig_print_statistics(FILE* f);
-#endif
-#endif
-
-extern UChar* onig_error_code_to_format(int code);
-extern void onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, ...);
-extern int onig_bbuf_init(BBuf* buf, int size);
-extern int onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo, const char *sourcefile, int sourceline);
-extern void onig_chain_reduce(regex_t* reg);
-extern void onig_chain_link_add(regex_t* to, regex_t* add);
-extern void onig_transfer(regex_t* to, regex_t* from);
-extern int onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc);
-extern int onig_is_code_in_cc_len(int enclen, OnigCodePoint code, CClassNode* cc);
-
-/* strend hash */
-typedef void hash_table_type;
-#ifdef RUBY
-#include "st.h"
-
-typedef st_data_t hash_data_type;
-#else
-typedef unsigned long hash_data_type;
-#endif
-
-extern hash_table_type* onig_st_init_strend_table_with_size(st_index_t size);
-extern int onig_st_lookup_strend(hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type *value);
-extern int onig_st_insert_strend(hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type value);
-
-/* encoding property management */
-#define PROPERTY_LIST_ADD_PROP(Name, CR) \
- r = onigenc_property_list_add_property((UChar* )Name, CR,\
- &PropertyNameTable, &PropertyList, &PropertyListNum,\
- &PropertyListSize);\
- if (r != 0) goto end
-
-#define PROPERTY_LIST_INIT_CHECK \
- if (PropertyInited == 0) {\
- int r = onigenc_property_list_init(init_property_list);\
- if (r != 0) return r;\
- }
-
-extern int onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop, hash_table_type **table, const OnigCodePoint*** plist, int *pnum, int *psize);
-
-typedef int (*ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE)(void);
-
-extern int onigenc_property_list_init(ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE);
-
-#endif /* ONIGURUMA_REGINT_H */
diff --git a/src/regparse.c b/src/regparse.c
deleted file mode 100644
index 0ecb01018..000000000
--- a/src/regparse.c
+++ /dev/null
@@ -1,5600 +0,0 @@
-/* -*- mode:c; c-file-style:"gnu" -*- */
-/**********************************************************************
- regparse.c - Oniguruma (regular expression library)
-**********************************************************************/
-/*-
- * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "mruby.h"
-#include <string.h>
-#include "regparse.h"
-#include <stdarg.h>
-#ifdef ENABLE_REGEXP
-
-#define WARN_BUFSIZE 256
-
-#define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
-
-
-const OnigSyntaxType OnigSyntaxRuby = {
- (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
- ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
- ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
- ONIG_SYN_OP_ESC_C_CONTROL )
- & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
- , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
- ONIG_SYN_OP2_OPTION_RUBY |
- ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
- ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
- ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
- ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
- ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
- ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
- ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |
- ONIG_SYN_OP2_ESC_H_XDIGIT )
- , ( SYN_GNU_REGEX_BV |
- ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |
- ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |
- ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
- ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
- ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
- ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
- ONIG_SYN_WARN_CC_DUP |
- ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
- , ONIG_OPTION_NONE
- ,
- {
- (OnigCodePoint )'\\' /* esc */
- , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
- , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
- , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
- , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
- , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
- }
-};
-
-const OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_RUBY;
-
-extern void onig_null_warn(const char* s ARG_UNUSED) { }
-
-#ifdef DEFAULT_WARN_FUNCTION
-static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;
-#else
-static OnigWarnFunc onig_warn = onig_null_warn;
-#endif
-
-#ifdef DEFAULT_VERB_WARN_FUNCTION
-static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION;
-#else
-static OnigWarnFunc onig_verb_warn = onig_null_warn;
-#endif
-
-extern void onig_set_warn_func(OnigWarnFunc f)
-{
- onig_warn = f;
-}
-
-extern void onig_set_verb_warn_func(OnigWarnFunc f)
-{
- onig_verb_warn = f;
-}
-
-static void CC_DUP_WARN(ScanEnv *env);
-
-static void
-bbuf_free(BBuf* bbuf)
-{
- if (IS_NOT_NULL(bbuf)) {
- if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p);
- xfree(bbuf);
- }
-}
-
-static int
-bbuf_clone(BBuf** rto, BBuf* from)
-{
- int r;
- BBuf *to;
-
- *rto = to = (BBuf* )xmalloc(sizeof(BBuf));
- CHECK_NULL_RETURN_MEMERR(to);
- r = BBUF_INIT(to, from->alloc);
- if (r != 0) return r;
- to->used = from->used;
- xmemcpy(to->p, from->p, from->used);
- return 0;
-}
-
-#define BACKREF_REL_TO_ABS(rel_no, env) \
- ((env)->num_mem + 1 + (rel_no))
-
-#define ONOFF(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f))
-
-#define MBCODE_START_POS(enc) \
- (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)
-
-#define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \
- add_code_range_to_buf(pbuf, env, MBCODE_START_POS(enc), ~((OnigCodePoint )0))
-
-#define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\
- if (! ONIGENC_IS_SINGLEBYTE(enc)) {\
- r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\
- if (r) return r;\
- }\
-} while (0)
-
-
-#define BITSET_SET_BIT_CHKDUP(bs, pos) do { \
- if (BITSET_AT(bs, pos)) CC_DUP_WARN(env); \
- BS_ROOM(bs, pos) |= BS_BIT(pos); \
-} while (0)
-
-#define BITSET_IS_EMPTY(bs,empty) do {\
- int i;\
- empty = 1;\
- for (i = 0; i < (int )BITSET_SIZE; i++) {\
- if ((bs)[i] != 0) {\
- empty = 0; break;\
- }\
- }\
-} while (0)
-
-static void
-bitset_set_range(ScanEnv *env, BitSetRef bs, int from, int to)
-{
- int i;
- for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) {
- BITSET_SET_BIT_CHKDUP(bs, i);
- }
-}
-
-static void
-bitset_invert(BitSetRef bs)
-{
- int i;
- for (i = 0; i < (int )BITSET_SIZE; i++) { bs[i] = ~(bs[i]); }
-}
-
-static void
-bitset_invert_to(BitSetRef from, BitSetRef to)
-{
- int i;
- for (i = 0; i < (int )BITSET_SIZE; i++) { to[i] = ~(from[i]); }
-}
-
-static void
-bitset_and(BitSetRef dest, BitSetRef bs)
-{
- int i;
- for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] &= bs[i]; }
-}
-
-static void
-bitset_or(BitSetRef dest, BitSetRef bs)
-{
- int i;
- for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] |= bs[i]; }
-}
-
-static void
-bitset_copy(BitSetRef dest, BitSetRef bs)
-{
- int i;
- for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] = bs[i]; }
-}
-
-extern int
-onig_strncmp(const UChar* s1, const UChar* s2, int n)
-{
- int x;
-
- while (n-- > 0) {
- x = *s2++ - *s1++;
- if (x) return x;
- }
- return 0;
-}
-
-extern void
-onig_strcpy(UChar* dest, const UChar* src, const UChar* end)
-{
- ptrdiff_t len = end - src;
- if (len > 0) {
- xmemcpy(dest, src, len);
- dest[len] = (UChar )0;
- }
-}
-
-#ifdef USE_NAMED_GROUP
-static UChar*
-strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)
-{
- ptrdiff_t slen;
- int term_len, i;
- UChar *r;
-
- slen = end - s;
- term_len = ONIGENC_MBC_MINLEN(enc);
-
- r = (UChar* )xmalloc(slen + term_len);
- CHECK_NULL_RETURN(r);
- xmemcpy(r, s, slen);
-
- for (i = 0; i < term_len; i++)
- r[slen + i] = (UChar )0;
-
- return r;
-}
-#endif
-
-/* scan pattern methods */
-#define PEND_VALUE 0
-
-#define PFETCH_READY UChar* pfetch_prev
-#define PEND (p < end ? 0 : 1)
-#define PUNFETCH p = pfetch_prev
-#define PINC do { \
- pfetch_prev = p; \
- p += enclen(enc, p, end); \
-} while (0)
-#define PFETCH(c) do { \
- c = ((enc->max_enc_len == 1) ? *p : ONIGENC_MBC_TO_CODE(enc, p, end)); \
- pfetch_prev = p; \
- p += enclen(enc, p, end); \
-} while (0)
-
-#define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)
-#define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c)
-
-static UChar*
-strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,
- int capa)
-{
- UChar* r;
-
- if (dest)
- r = (UChar* )xrealloc(dest, capa + 1);
- else
- r = (UChar* )xmalloc(capa + 1);
-
- CHECK_NULL_RETURN(r);
- onig_strcpy(r + (dest_end - dest), src, src_end);
- return r;
-}
-
-/* dest on static area */
-static UChar*
-strcat_capa_from_static(UChar* dest, UChar* dest_end,
- const UChar* src, const UChar* src_end, int capa)
-{
- UChar* r;
-
- r = (UChar* )xmalloc(capa + 1);
- CHECK_NULL_RETURN(r);
- onig_strcpy(r, dest, dest_end);
- onig_strcpy(r + (dest_end - dest), src, src_end);
- return r;
-}
-#endif //ENABLE_REGEXP
-
-#ifdef INCLUDE_ENCODING
-#ifdef USE_ST_LIBRARY
-
-//#include "st.h"
-
-typedef struct {
- const UChar* s;
- const UChar* end;
-} st_str_end_key;
-
-static int
-str_end_cmp(st_data_t xp, st_data_t yp)
-{
- const st_str_end_key *x, *y;
- const UChar *p, *q;
- int c;
-
- x = (const st_str_end_key*)xp;
- y = (const st_str_end_key*)yp;
- if ((x->end - x->s) != (y->end - y->s))
- return 1;
-
- p = x->s;
- q = y->s;
- while (p < x->end) {
- c = (int )*p - (int )*q;
- if (c != 0) return c;
-
- p++; q++;
- }
-
- return 0;
-}
-
-static st_index_t
-str_end_hash(st_data_t xp)
-{
- const st_str_end_key *x = (const st_str_end_key*)xp;
- const UChar *p;
- st_index_t val = 0;
-
- p = x->s;
- while (p < x->end) {
- val = val * 997 + (int )*p++;
- }
-
- return val + (val >> 5);
-}
-
-extern hash_table_type*
-onig_st_init_strend_table_with_size(st_index_t size)
-{
- static const struct st_hash_type hashType = {
- str_end_cmp,
- str_end_hash,
- };
-
- return (hash_table_type* )
- onig_st_init_table_with_size(&hashType, size);
-}
-
-extern int
-onig_st_lookup_strend(hash_table_type* table, const UChar* str_key,
- const UChar* end_key, hash_data_type *value)
-{
- st_str_end_key key;
-
- key.s = (UChar* )str_key;
- key.end = (UChar* )end_key;
-
- return onig_st_lookup(table, (st_data_t )(&key), value);
-}
-
-extern int
-onig_st_insert_strend(hash_table_type* table, const UChar* str_key,
- const UChar* end_key, hash_data_type value)
-{
- st_str_end_key* key;
- int result;
-
- key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key));
- key->s = (UChar* )str_key;
- key->end = (UChar* )end_key;
- result = onig_st_insert(table, (st_data_t )key, value);
- if (result) {
- xfree(key);
- }
- return result;
-}
-
-#endif /* USE_ST_LIBRARY */
-#endif //INCLUDE_ENCODING
-
-#ifdef ENABLE_REGEXP
-#ifdef USE_NAMED_GROUP
-
-#define INIT_NAME_BACKREFS_ALLOC_NUM 8
-
-typedef struct {
- UChar* name;
- size_t name_len; /* byte length */
- int back_num; /* number of backrefs */
- int back_alloc;
- int back_ref1;
- int* back_refs;
-} NameEntry;
-
-#ifdef USE_ST_LIBRARY
-
-typedef st_table NameTable;
-typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */
-
-#define NAMEBUF_SIZE 24
-#define NAMEBUF_SIZE_1 25
-
-#ifdef ONIG_DEBUG
-static enum st_retval
-i_print_name_entry(UChar* key, NameEntry* e, void* arg)
-{
- int i;
- FILE* fp = (FILE* )arg;
-
- fprintf(fp, "%s: ", e->name);
- if (e->back_num == 0)
- fputs("-", fp);
- else if (e->back_num == 1)
- fprintf(fp, "%d", e->back_ref1);
- else {
- for (i = 0; i < e->back_num; i++) {
- if (i > 0) fprintf(fp, ", ");
- fprintf(fp, "%d", e->back_refs[i]);
- }
- }
- fputs("\n", fp);
- return ST_CONTINUE;
-}
-
-extern int
-onig_print_names(FILE* fp, regex_t* reg)
-{
- NameTable* t = (NameTable* )reg->name_table;
-
- if (IS_NOT_NULL(t)) {
- fprintf(fp, "name table\n");
- onig_st_foreach(t, i_print_name_entry, (HashDataType )fp);
- fputs("\n", fp);
- }
- return 0;
-}
-#endif /* ONIG_DEBUG */
-
-static enum st_retval
-i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED)
-{
- xfree(e->name);
- if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
- xfree(key);
- xfree(e);
- return ST_DELETE;
-}
-
-static int
-names_clear(regex_t* reg)
-{
- NameTable* t = (NameTable* )reg->name_table;
-
- if (IS_NOT_NULL(t)) {
- onig_st_foreach(t, i_free_name_entry, 0);
- }
- return 0;
-}
-
-extern int
-onig_names_free(regex_t* reg)
-{
- int r;
- NameTable* t;
-
- r = names_clear(reg);
- if (r) return r;
-
- t = (NameTable* )reg->name_table;
- if (IS_NOT_NULL(t)) onig_st_free_table(t);
- reg->name_table = (void* )NULL;
- return 0;
-}
-
-static NameEntry*
-name_find(regex_t* reg, const UChar* name, const UChar* name_end)
-{
- NameEntry* e;
- NameTable* t = (NameTable* )reg->name_table;
-
- e = (NameEntry* )NULL;
- if (IS_NOT_NULL(t)) {
- onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
- }
- return e;
-}
-
-typedef struct {
- int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*);
- regex_t* reg;
- void* arg;
- int ret;
- OnigEncoding enc;
-} INamesArg;
-
-static enum st_retval
-i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg)
-{
- int r = (*(arg->func))(e->name,
- e->name + e->name_len,
- e->back_num,
- (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
- arg->reg, arg->arg);
- if (r != 0) {
- arg->ret = r;
- return ST_STOP;
- }
- return ST_CONTINUE;
-}
-
-extern int
-onig_foreach_name(regex_t* reg,
- int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
-{
- INamesArg narg;
- NameTable* t = (NameTable* )reg->name_table;
-
- narg.ret = 0;
- if (IS_NOT_NULL(t)) {
- narg.func = func;
- narg.reg = reg;
- narg.arg = arg;
- narg.enc = reg->enc; /* should be pattern encoding. */
- onig_st_foreach(t, i_names, (HashDataType )&narg);
- }
- return narg.ret;
-}
-
-static enum st_retval
-i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map)
-{
- int i;
-
- if (e->back_num > 1) {
- for (i = 0; i < e->back_num; i++) {
- e->back_refs[i] = map[e->back_refs[i]].new_val;
- }
- }
- else if (e->back_num == 1) {
- e->back_ref1 = map[e->back_ref1].new_val;
- }
-
- return ST_CONTINUE;
-}
-
-extern int
-onig_renumber_name_table(regex_t* reg, GroupNumRemap* map)
-{
- NameTable* t = (NameTable* )reg->name_table;
-
- if (IS_NOT_NULL(t)) {
- onig_st_foreach(t, i_renumber_name, (HashDataType )map);
- }
- return 0;
-}
-
-
-extern int
-onig_number_of_names(regex_t* reg)
-{
- NameTable* t = (NameTable* )reg->name_table;
-
- if (IS_NOT_NULL(t))
- return t->num_entries;
- else
- return 0;
-}
-
-#else /* USE_ST_LIBRARY */
-
-#define INIT_NAMES_ALLOC_NUM 8
-
-typedef struct {
- NameEntry* e;
- int num;
- int alloc;
-} NameTable;
-
-#ifdef ONIG_DEBUG
-extern int
-onig_print_names(FILE* fp, regex_t* reg)
-{
- int i, j;
- NameEntry* e;
- NameTable* t = (NameTable* )reg->name_table;
-
- if (IS_NOT_NULL(t) && t->num > 0) {
- fprintf(fp, "name table\n");
- for (i = 0; i < t->num; i++) {
- e = &(t->e[i]);
- fprintf(fp, "%s: ", e->name);
- if (e->back_num == 0) {
- fputs("-", fp);
- }
- else if (e->back_num == 1) {
- fprintf(fp, "%d", e->back_ref1);
- }
- else {
- for (j = 0; j < e->back_num; j++) {
- if (j > 0) fprintf(fp, ", ");
- fprintf(fp, "%d", e->back_refs[j]);
- }
- }
- fputs("\n", fp);
- }
- fputs("\n", fp);
- }
- return 0;
-}
-#endif
-
-static int
-names_clear(regex_t* reg)
-{
- int i;
- NameEntry* e;
- NameTable* t = (NameTable* )reg->name_table;
-
- if (IS_NOT_NULL(t)) {
- for (i = 0; i < t->num; i++) {
- e = &(t->e[i]);
- if (IS_NOT_NULL(e->name)) {
- xfree(e->name);
- e->name = NULL;
- e->name_len = 0;
- e->back_num = 0;
- e->back_alloc = 0;
- if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
- e->back_refs = (int* )NULL;
- }
- }
- if (IS_NOT_NULL(t->e)) {
- xfree(t->e);
- t->e = NULL;
- }
- t->num = 0;
- }
- return 0;
-}
-
-extern int
-onig_names_free(regex_t* reg)
-{
- int r;
- NameTable* t;
-
- r = names_clear(reg);
- if (r) return r;
-
- t = (NameTable* )reg->name_table;
- if (IS_NOT_NULL(t)) xfree(t);
- reg->name_table = NULL;
- return 0;
-}
-
-static NameEntry*
-name_find(regex_t* reg, UChar* name, UChar* name_end)
-{
- int i, len;
- NameEntry* e;
- NameTable* t = (NameTable* )reg->name_table;
-
- if (IS_NOT_NULL(t)) {
- len = name_end - name;
- for (i = 0; i < t->num; i++) {
- e = &(t->e[i]);
- if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)
- return e;
- }
- }
- return (NameEntry* )NULL;
-}
-
-extern int
-onig_foreach_name(regex_t* reg,
- int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
-{
- int i, r;
- NameEntry* e;
- NameTable* t = (NameTable* )reg->name_table;
-
- if (IS_NOT_NULL(t)) {
- for (i = 0; i < t->num; i++) {
- e = &(t->e[i]);
- r = (*func)(e->name, e->name + e->name_len, e->back_num,
- (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
- reg, arg);
- if (r != 0) return r;
- }
- }
- return 0;
-}
-
-extern int
-onig_number_of_names(regex_t* reg)
-{
- NameTable* t = (NameTable* )reg->name_table;
-
- if (IS_NOT_NULL(t))
- return t->num;
- else
- return 0;
-}
-
-#endif /* else USE_ST_LIBRARY */
-
-static int
-name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
-{
- int alloc;
- NameEntry* e;
- NameTable* t = (NameTable* )reg->name_table;
-
- if (name_end - name <= 0)
- return ONIGERR_EMPTY_GROUP_NAME;
-
- e = name_find(reg, name, name_end);
- if (IS_NULL(e)) {
-#ifdef USE_ST_LIBRARY
- if (IS_NULL(t)) {
- t = onig_st_init_strend_table_with_size(5);
- reg->name_table = (void* )t;
- }
- e = (NameEntry* )xmalloc(sizeof(NameEntry));
- CHECK_NULL_RETURN_MEMERR(e);
-
- e->name = strdup_with_null(reg->enc, name, name_end);
- if (IS_NULL(e->name)) {
- xfree(e);
- return ONIGERR_MEMORY;
- }
- onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),
- (HashDataType )e);
-
- e->name_len = name_end - name;
- e->back_num = 0;
- e->back_alloc = 0;
- e->back_refs = (int* )NULL;
-
-#else
-
- if (IS_NULL(t)) {
- alloc = INIT_NAMES_ALLOC_NUM;
- t = (NameTable* )xmalloc(sizeof(NameTable));
- CHECK_NULL_RETURN_MEMERR(t);
- t->e = NULL;
- t->alloc = 0;
- t->num = 0;
-
- t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc);
- if (IS_NULL(t->e)) {
- xfree(t);
- return ONIGERR_MEMORY;
- }
- t->alloc = alloc;
- reg->name_table = t;
- goto clear;
- }
- else if (t->num == t->alloc) {
- int i;
-
- alloc = t->alloc * 2;
- t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc);
- CHECK_NULL_RETURN_MEMERR(t->e);
- t->alloc = alloc;
-
- clear:
- for (i = t->num; i < t->alloc; i++) {
- t->e[i].name = NULL;
- t->e[i].name_len = 0;
- t->e[i].back_num = 0;
- t->e[i].back_alloc = 0;
- t->e[i].back_refs = (int* )NULL;
- }
- }
- e = &(t->e[t->num]);
- t->num++;
- e->name = strdup_with_null(reg->enc, name, name_end);
- if (IS_NULL(e->name)) return ONIGERR_MEMORY;
- e->name_len = name_end - name;
-#endif
- }
-
- if (e->back_num >= 1 &&
- ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) {
- onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,
- name, name_end);
- return ONIGERR_MULTIPLEX_DEFINED_NAME;
- }
-
- e->back_num++;
- if (e->back_num == 1) {
- e->back_ref1 = backref;
- }
- else {
- if (e->back_num == 2) {
- alloc = INIT_NAME_BACKREFS_ALLOC_NUM;
- e->back_refs = (int* )xmalloc(sizeof(int) * alloc);
- CHECK_NULL_RETURN_MEMERR(e->back_refs);
- e->back_alloc = alloc;
- e->back_refs[0] = e->back_ref1;
- e->back_refs[1] = backref;
- }
- else {
- if (e->back_num > e->back_alloc) {
- alloc = e->back_alloc * 2;
- e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc);
- CHECK_NULL_RETURN_MEMERR(e->back_refs);
- e->back_alloc = alloc;
- }
- e->back_refs[e->back_num - 1] = backref;
- }
- }
-
- return 0;
-}
-
-extern int
-onig_name_to_group_numbers(regex_t* reg, const UChar* name,
- const UChar* name_end, int** nums)
-{
- NameEntry* e = name_find(reg, name, name_end);
-
- if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE;
-
- switch (e->back_num) {
- case 0:
- *nums = 0;
- break;
- case 1:
- *nums = &(e->back_ref1);
- break;
- default:
- *nums = e->back_refs;
- break;
- }
- return e->back_num;
-}
-
-extern int
-onig_name_to_backref_number(regex_t* reg, const UChar* name,
- const UChar* name_end, OnigRegion *region)
-{
- int i, n, *nums;
-
- n = onig_name_to_group_numbers(reg, name, name_end, &nums);
- if (n < 0)
- return n;
- else if (n == 0)
- return ONIGERR_PARSER_BUG;
- else if (n == 1)
- return nums[0];
- else {
- if (IS_NOT_NULL(region)) {
- for (i = n - 1; i >= 0; i--) {
- if (region->beg[nums[i]] != ONIG_REGION_NOTPOS)
- return nums[i];
- }
- }
- return nums[n - 1];
- }
-}
-
-#else /* USE_NAMED_GROUP */
-
-extern int
-onig_name_to_group_numbers(regex_t* reg, const UChar* name,
- const UChar* name_end, int** nums)
-{
- return ONIG_NO_SUPPORT_CONFIG;
-}
-
-extern int
-onig_name_to_backref_number(regex_t* reg, const UChar* name,
- const UChar* name_end, OnigRegion* region)
-{
- return ONIG_NO_SUPPORT_CONFIG;
-}
-
-extern int
-onig_foreach_name(regex_t* reg,
- int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
-{
- return ONIG_NO_SUPPORT_CONFIG;
-}
-
-extern int
-onig_number_of_names(regex_t* reg)
-{
- return 0;
-}
-#endif /* else USE_NAMED_GROUP */
-
-extern int
-onig_noname_group_capture_is_active(regex_t* reg)
-{
- if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP))
- return 0;
-
-#ifdef USE_NAMED_GROUP
- if (onig_number_of_names(reg) > 0 &&
- IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
- !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
- return 0;
- }
-#endif
-
- return 1;
-}
-
-
-#define INIT_SCANENV_MEMNODES_ALLOC_SIZE 16
-
-static void
-scan_env_clear(ScanEnv* env)
-{
- int i;
-
- BIT_STATUS_CLEAR(env->capture_history);
- BIT_STATUS_CLEAR(env->bt_mem_start);
- BIT_STATUS_CLEAR(env->bt_mem_end);
- BIT_STATUS_CLEAR(env->backrefed_mem);
- env->error = (UChar* )NULL;
- env->error_end = (UChar* )NULL;
- env->num_call = 0;
- env->num_mem = 0;
-#ifdef USE_NAMED_GROUP
- env->num_named = 0;
-#endif
- env->mem_alloc = 0;
- env->mem_nodes_dynamic = (Node** )NULL;
-
- for (i = 0; i < SCANENV_MEMNODES_SIZE; i++)
- env->mem_nodes_static[i] = NULL_NODE;
-
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- env->num_comb_exp_check = 0;
- env->comb_exp_max_regnum = 0;
- env->curr_max_regnum = 0;
- env->has_recursion = 0;
-#endif
- env->warnings_flag = 0;
-}
-
-static int
-scan_env_add_mem_entry(ScanEnv* env)
-{
- int i, need, alloc;
- Node** p;
-
- need = env->num_mem + 1;
- if (need >= SCANENV_MEMNODES_SIZE) {
- if (env->mem_alloc <= need) {
- if (IS_NULL(env->mem_nodes_dynamic)) {
- alloc = INIT_SCANENV_MEMNODES_ALLOC_SIZE;
- p = (Node** )xmalloc(sizeof(Node*) * alloc);
- xmemcpy(p, env->mem_nodes_static,
- sizeof(Node*) * SCANENV_MEMNODES_SIZE);
- }
- else {
- alloc = env->mem_alloc * 2;
- p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc);
- }
- CHECK_NULL_RETURN_MEMERR(p);
-
- for (i = env->num_mem + 1; i < alloc; i++)
- p[i] = NULL_NODE;
-
- env->mem_nodes_dynamic = p;
- env->mem_alloc = alloc;
- }
- }
-
- env->num_mem++;
- return env->num_mem;
-}
-
-static int
-scan_env_set_mem_node(ScanEnv* env, int num, Node* node)
-{
- if (env->num_mem >= num)
- SCANENV_MEM_NODES(env)[num] = node;
- else
- return ONIGERR_PARSER_BUG;
- return 0;
-}
-
-
-#ifdef USE_PARSE_TREE_NODE_RECYCLE
-typedef struct _FreeNode {
- struct _FreeNode* next;
-} FreeNode;
-
-static FreeNode* FreeNodeList = (FreeNode* )NULL;
-#endif
-
-extern void
-onig_node_free(Node* node)
-{
- start:
- if (IS_NULL(node)) return ;
-
- switch (NTYPE(node)) {
- case NT_STR:
- if (NSTR(node)->capa != 0 &&
- IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {
- xfree(NSTR(node)->s);
- }
- break;
-
- case NT_LIST:
- case NT_ALT:
- onig_node_free(NCAR(node));
- {
- Node* next_node = NCDR(node);
-
-#ifdef USE_PARSE_TREE_NODE_RECYCLE
- {
- FreeNode* n = (FreeNode* )node;
-
- THREAD_ATOMIC_START;
- n->next = FreeNodeList;
- FreeNodeList = n;
- THREAD_ATOMIC_END;
- }
-#else
- xfree(node);
-#endif
- node = next_node;
- goto start;
- }
- break;
-
- case NT_CCLASS:
- {
- CClassNode* cc = NCCLASS(node);
-
- if (IS_NCCLASS_SHARE(cc)) return ;
- if (cc->mbuf)
- bbuf_free(cc->mbuf);
- }
- break;
-
- case NT_QTFR:
- if (NQTFR(node)->target)
- onig_node_free(NQTFR(node)->target);
- break;
-
- case NT_ENCLOSE:
- if (NENCLOSE(node)->target)
- onig_node_free(NENCLOSE(node)->target);
- break;
-
- case NT_BREF:
- if (IS_NOT_NULL(NBREF(node)->back_dynamic))
- xfree(NBREF(node)->back_dynamic);
- break;
-
- case NT_ANCHOR:
- if (NANCHOR(node)->target)
- onig_node_free(NANCHOR(node)->target);
- break;
- }
-
-#ifdef USE_PARSE_TREE_NODE_RECYCLE
- {
- FreeNode* n = (FreeNode* )node;
-
- THREAD_ATOMIC_START;
- n->next = FreeNodeList;
- FreeNodeList = n;
- THREAD_ATOMIC_END;
- }
-#else
- xfree(node);
-#endif
-}
-
-#ifdef USE_PARSE_TREE_NODE_RECYCLE
-extern int
-onig_free_node_list(void)
-{
- FreeNode* n;
-
- /* THREAD_ATOMIC_START; */
- while (IS_NOT_NULL(FreeNodeList)) {
- n = FreeNodeList;
- FreeNodeList = FreeNodeList->next;
- xfree(n);
- }
- /* THREAD_ATOMIC_END; */
- return 0;
-}
-#endif
-
-static Node*
-node_new(void)
-{
- Node* node;
-
-#ifdef USE_PARSE_TREE_NODE_RECYCLE
- THREAD_ATOMIC_START;
- if (IS_NOT_NULL(FreeNodeList)) {
- node = (Node* )FreeNodeList;
- FreeNodeList = FreeNodeList->next;
- THREAD_ATOMIC_END;
- return node;
- }
- THREAD_ATOMIC_END;
-#endif
-
- node = (Node* )xmalloc(sizeof(Node));
- /* xmemset(node, 0, sizeof(Node)); */
- return node;
-}
-
-
-static void
-initialize_cclass(CClassNode* cc)
-{
- BITSET_CLEAR(cc->bs);
- /* cc->base.flags = 0; */
- cc->flags = 0;
- cc->mbuf = NULL;
-}
-
-static Node*
-node_new_cclass(void)
-{
- Node* node = node_new();
- CHECK_NULL_RETURN(node);
-
- SET_NTYPE(node, NT_CCLASS);
- initialize_cclass(NCCLASS(node));
- return node;
-}
-
-static Node*
-node_new_cclass_by_codepoint_range(int is_not, OnigCodePoint sb_out,
- const OnigCodePoint ranges[])
-{
- int n, i;
- CClassNode* cc;
- OnigCodePoint j;
-
- Node* node = node_new_cclass();
- CHECK_NULL_RETURN(node);
-
- cc = NCCLASS(node);
- if (is_not != 0) NCCLASS_SET_NOT(cc);
-
- BITSET_CLEAR(cc->bs);
- if (sb_out > 0 && IS_NOT_NULL(ranges)) {
- n = ONIGENC_CODE_RANGE_NUM(ranges);
- for (i = 0; i < n; i++) {
- for (j = ONIGENC_CODE_RANGE_FROM(ranges, i);
- j <= (OnigCodePoint )ONIGENC_CODE_RANGE_TO(ranges, i); j++) {
- if (j >= sb_out) goto sb_end;
-
- BITSET_SET_BIT(cc->bs, j);
- }
- }
- }
-
- sb_end:
- if (IS_NULL(ranges)) {
- is_null:
- cc->mbuf = NULL;
- }
- else {
- BBuf* bbuf;
-
- n = ONIGENC_CODE_RANGE_NUM(ranges);
- if (n == 0) goto is_null;
-
- bbuf = (BBuf* )xmalloc(sizeof(BBuf));
- CHECK_NULL_RETURN(bbuf);
- bbuf->alloc = n + 1;
- bbuf->used = n + 1;
- bbuf->p = (UChar* )((void* )ranges);
-
- cc->mbuf = bbuf;
- }
-
- return node;
-}
-
-static Node*
-node_new_ctype(int type, int is_not)
-{
- Node* node = node_new();
- CHECK_NULL_RETURN(node);
-
- SET_NTYPE(node, NT_CTYPE);
- NCTYPE(node)->ctype = type;
- NCTYPE(node)->is_not = is_not;
- return node;
-}
-
-static Node*
-node_new_anychar(void)
-{
- Node* node = node_new();
- CHECK_NULL_RETURN(node);
-
- SET_NTYPE(node, NT_CANY);
- return node;
-}
-
-static Node*
-node_new_list(Node* left, Node* right)
-{
- Node* node = node_new();
- CHECK_NULL_RETURN(node);
-
- SET_NTYPE(node, NT_LIST);
- NCAR(node) = left;
- NCDR(node) = right;
- return node;
-}
-
-extern Node*
-onig_node_new_list(Node* left, Node* right)
-{
- return node_new_list(left, right);
-}
-
-extern Node*
-onig_node_list_add(Node* list, Node* x)
-{
- Node *n;
-
- n = onig_node_new_list(x, NULL);
- if (IS_NULL(n)) return NULL_NODE;
-
- if (IS_NOT_NULL(list)) {
- while (IS_NOT_NULL(NCDR(list)))
- list = NCDR(list);
-
- NCDR(list) = n;
- }
-
- return n;
-}
-
-extern Node*
-onig_node_new_alt(Node* left, Node* right)
-{
- Node* node = node_new();
- CHECK_NULL_RETURN(node);
-
- SET_NTYPE(node, NT_ALT);
- NCAR(node) = left;
- NCDR(node) = right;
- return node;
-}
-
-extern Node*
-onig_node_new_anchor(int type)
-{
- Node* node = node_new();
- CHECK_NULL_RETURN(node);
-
- SET_NTYPE(node, NT_ANCHOR);
- NANCHOR(node)->type = type;
- NANCHOR(node)->target = NULL;
- NANCHOR(node)->char_len = -1;
- return node;
-}
-
-static Node*
-node_new_backref(int back_num, int* backrefs, int by_name,
-#ifdef USE_BACKREF_WITH_LEVEL
- int exist_level, int nest_level,
-#endif
- ScanEnv* env)
-{
- int i;
- Node* node = node_new();
-
- CHECK_NULL_RETURN(node);
-
- SET_NTYPE(node, NT_BREF);
- NBREF(node)->state = 0;
- NBREF(node)->back_num = back_num;
- NBREF(node)->back_dynamic = (int* )NULL;
- if (by_name != 0)
- NBREF(node)->state |= NST_NAME_REF;
-
-#ifdef USE_BACKREF_WITH_LEVEL
- if (exist_level != 0) {
- NBREF(node)->state |= NST_NEST_LEVEL;
- NBREF(node)->nest_level = nest_level;
- }
-#endif
-
- for (i = 0; i < back_num; i++) {
- if (backrefs[i] <= env->num_mem &&
- IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) {
- NBREF(node)->state |= NST_RECURSION; /* /...(\1).../ */
- break;
- }
- }
-
- if (back_num <= NODE_BACKREFS_SIZE) {
- for (i = 0; i < back_num; i++)
- NBREF(node)->back_static[i] = backrefs[i];
- }
- else {
- int* p = (int* )xmalloc(sizeof(int) * back_num);
- if (IS_NULL(p)) {
- onig_node_free(node);
- return NULL;
- }
- NBREF(node)->back_dynamic = p;
- for (i = 0; i < back_num; i++)
- p[i] = backrefs[i];
- }
- return node;
-}
-
-#ifdef USE_SUBEXP_CALL
-static Node*
-node_new_call(UChar* name, UChar* name_end, int gnum)
-{
- Node* node = node_new();
- CHECK_NULL_RETURN(node);
-
- SET_NTYPE(node, NT_CALL);
- NCALL(node)->state = 0;
- NCALL(node)->target = NULL_NODE;
- NCALL(node)->name = name;
- NCALL(node)->name_end = name_end;
- NCALL(node)->group_num = gnum; /* call by number if gnum != 0 */
- return node;
-}
-#endif
-
-static Node*
-node_new_quantifier(int lower, int upper, int by_number)
-{
- Node* node = node_new();
- CHECK_NULL_RETURN(node);
-
- SET_NTYPE(node, NT_QTFR);
- NQTFR(node)->state = 0;
- NQTFR(node)->target = NULL;
- NQTFR(node)->lower = lower;
- NQTFR(node)->upper = upper;
- NQTFR(node)->greedy = 1;
- NQTFR(node)->target_empty_info = NQ_TARGET_ISNOT_EMPTY;
- NQTFR(node)->head_exact = NULL_NODE;
- NQTFR(node)->next_head_exact = NULL_NODE;
- NQTFR(node)->is_refered = 0;
- if (by_number != 0)
- NQTFR(node)->state |= NST_BY_NUMBER;
-
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- NQTFR(node)->comb_exp_check_num = 0;
-#endif
-
- return node;
-}
-
-static Node*
-node_new_enclose(int type)
-{
- Node* node = node_new();
- CHECK_NULL_RETURN(node);
-
- SET_NTYPE(node, NT_ENCLOSE);
- NENCLOSE(node)->type = type;
- NENCLOSE(node)->state = 0;
- NENCLOSE(node)->regnum = 0;
- NENCLOSE(node)->option = 0;
- NENCLOSE(node)->target = NULL;
- NENCLOSE(node)->call_addr = -1;
- NENCLOSE(node)->opt_count = 0;
- return node;
-}
-
-extern Node*
-onig_node_new_enclose(int type)
-{
- return node_new_enclose(type);
-}
-
-static Node*
-node_new_enclose_memory(OnigOptionType option, int is_named)
-{
- Node* node = node_new_enclose(ENCLOSE_MEMORY);
- CHECK_NULL_RETURN(node);
- if (is_named != 0)
- SET_ENCLOSE_STATUS(node, NST_NAMED_GROUP);
-
-#ifdef USE_SUBEXP_CALL
- NENCLOSE(node)->option = option;
-#endif
- return node;
-}
-
-static Node*
-node_new_option(OnigOptionType option)
-{
- Node* node = node_new_enclose(ENCLOSE_OPTION);
- CHECK_NULL_RETURN(node);
- NENCLOSE(node)->option = option;
- return node;
-}
-
-extern int
-onig_node_str_cat(Node* node, const UChar* s, const UChar* end)
-{
- ptrdiff_t addlen = end - s;
-
- if (addlen > 0) {
- ptrdiff_t len = NSTR(node)->end - NSTR(node)->s;
-
- if (NSTR(node)->capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) {
- UChar* p;
- ptrdiff_t capa = len + addlen + NODE_STR_MARGIN;
-
- if (capa <= NSTR(node)->capa) {
- onig_strcpy(NSTR(node)->s + len, s, end);
- }
- else {
- if (NSTR(node)->s == NSTR(node)->buf)
- p = strcat_capa_from_static(NSTR(node)->s, NSTR(node)->end,
- s, end, capa);
- else
- p = strcat_capa(NSTR(node)->s, NSTR(node)->end, s, end, capa);
-
- CHECK_NULL_RETURN_MEMERR(p);
- NSTR(node)->s = p;
- NSTR(node)->capa = capa;
- }
- }
- else {
- onig_strcpy(NSTR(node)->s + len, s, end);
- }
- NSTR(node)->end = NSTR(node)->s + len + addlen;
- }
-
- return 0;
-}
-
-extern int
-onig_node_str_set(Node* node, const UChar* s, const UChar* end)
-{
- onig_node_str_clear(node);
- return onig_node_str_cat(node, s, end);
-}
-
-static int
-node_str_cat_char(Node* node, UChar c)
-{
- UChar s[1];
-
- s[0] = c;
- return onig_node_str_cat(node, s, s + 1);
-}
-
-extern void
-onig_node_conv_to_str_node(Node* node, int flag)
-{
- SET_NTYPE(node, NT_STR);
- NSTR(node)->flag = flag;
- NSTR(node)->capa = 0;
- NSTR(node)->s = NSTR(node)->buf;
- NSTR(node)->end = NSTR(node)->buf;
-}
-
-extern void
-onig_node_str_clear(Node* node)
-{
- if (NSTR(node)->capa != 0 &&
- IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {
- xfree(NSTR(node)->s);
- }
-
- NSTR(node)->capa = 0;
- NSTR(node)->flag = 0;
- NSTR(node)->s = NSTR(node)->buf;
- NSTR(node)->end = NSTR(node)->buf;
-}
-
-static Node*
-node_new_str(const UChar* s, const UChar* end)
-{
- Node* node = node_new();
- CHECK_NULL_RETURN(node);
-
- SET_NTYPE(node, NT_STR);
- NSTR(node)->capa = 0;
- NSTR(node)->flag = 0;
- NSTR(node)->s = NSTR(node)->buf;
- NSTR(node)->end = NSTR(node)->buf;
- if (onig_node_str_cat(node, s, end)) {
- onig_node_free(node);
- return NULL;
- }
- return node;
-}
-
-extern Node*
-onig_node_new_str(const UChar* s, const UChar* end)
-{
- return node_new_str(s, end);
-}
-
-static Node*
-node_new_str_raw(UChar* s, UChar* end)
-{
- Node* node = node_new_str(s, end);
- NSTRING_SET_RAW(node);
- return node;
-}
-
-static Node*
-node_new_empty(void)
-{
- return node_new_str(NULL, NULL);
-}
-
-static Node*
-node_new_str_raw_char(UChar c)
-{
- UChar p[1];
-
- p[0] = c;
- return node_new_str_raw(p, p + 1);
-}
-
-static Node*
-str_node_split_last_char(StrNode* sn, OnigEncoding enc)
-{
- const UChar *p;
- Node* n = NULL_NODE;
-
- if (sn->end > sn->s) {
- p = onigenc_get_prev_char_head(enc, sn->s, sn->end, sn->end);
- if (p && p > sn->s) { /* can be splitted. */
- n = node_new_str(p, sn->end);
- if ((sn->flag & NSTR_RAW) != 0)
- NSTRING_SET_RAW(n);
- sn->end = (UChar* )p;
- }
- }
- return n;
-}
-
-static int
-str_node_can_be_split(StrNode* sn, OnigEncoding enc)
-{
- if (sn->end > sn->s) {
- return ((enclen(enc, sn->s, sn->end) < sn->end - sn->s) ? 1 : 0);
- }
- return 0;
-}
-
-extern int
-onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)
-{
- unsigned int num, val;
- OnigCodePoint c;
- UChar* p = *src;
- PFETCH_READY;
-
- num = 0;
- while (!PEND) {
- PFETCH(c);
- if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
- val = (unsigned int )DIGITVAL(c);
- if ((INT_MAX_LIMIT - val) / 10UL < num)
- return -1; /* overflow */
-
- num = num * 10 + val;
- }
- else {
- PUNFETCH;
- break;
- }
- }
- *src = p;
- return num;
-}
-
-static int
-scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int maxlen,
- OnigEncoding enc)
-{
- OnigCodePoint c;
- unsigned int num, val;
- UChar* p = *src;
- PFETCH_READY;
-
- num = 0;
- while (!PEND && maxlen-- != 0) {
- PFETCH(c);
- if (ONIGENC_IS_CODE_XDIGIT(enc, c)) {
- val = (unsigned int )XDIGITVAL(enc,c);
- if ((INT_MAX_LIMIT - val) / 16UL < num)
- return -1; /* overflow */
-
- num = (num << 4) + XDIGITVAL(enc,c);
- }
- else {
- PUNFETCH;
- break;
- }
- }
- *src = p;
- return num;
-}
-
-static int
-scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen,
- OnigEncoding enc)
-{
- OnigCodePoint c;
- unsigned int num, val;
- UChar* p = *src;
- PFETCH_READY;
-
- num = 0;
- while (!PEND && maxlen-- != 0) {
- PFETCH(c);
- if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8') {
- val = ODIGITVAL(c);
- if ((INT_MAX_LIMIT - val) / 8UL < num)
- return -1; /* overflow */
-
- num = (num << 3) + val;
- }
- else {
- PUNFETCH;
- break;
- }
- }
- *src = p;
- return num;
-}
-
-
-#define BBUF_WRITE_CODE_POINT(bbuf,pos,code) \
- BBUF_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)
-
-/* data format:
- [n][from-1][to-1][from-2][to-2] ... [from-n][to-n]
- (all data size is OnigCodePoint)
- */
-static int
-new_code_range(BBuf** pbuf)
-{
-#define INIT_MULTI_BYTE_RANGE_SIZE (SIZE_CODE_POINT * 5)
- int r;
- OnigCodePoint n;
- BBuf* bbuf;
-
- bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));
- CHECK_NULL_RETURN_MEMERR(*pbuf);
- r = BBUF_INIT(*pbuf, INIT_MULTI_BYTE_RANGE_SIZE);
- if (r) return r;
-
- n = 0;
- BBUF_WRITE_CODE_POINT(bbuf, 0, n);
- return 0;
-}
-
-static int
-add_code_range_to_buf0(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to,
- int checkdup)
-{
- int r, inc_n, pos;
- int low, high, bound, x;
- OnigCodePoint n, *data;
- BBuf* bbuf;
-
- if (from > to) {
- n = from; from = to; to = n;
- }
-
- if (IS_NULL(*pbuf)) {
- r = new_code_range(pbuf);
- if (r) return r;
- bbuf = *pbuf;
- n = 0;
- }
- else {
- bbuf = *pbuf;
- GET_CODE_POINT(n, bbuf->p);
- }
- data = (OnigCodePoint* )(bbuf->p);
- data++;
-
- for (low = 0, bound = n; low < bound; ) {
- x = (low + bound) >> 1;
- if (from > data[x*2 + 1])
- low = x + 1;
- else
- bound = x;
- }
-
- for (high = low, bound = n; high < bound; ) {
- x = (high + bound) >> 1;
- if (to >= data[x*2] - 1)
- high = x + 1;
- else
- bound = x;
- }
-
- inc_n = low + 1 - high;
- if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM)
- return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES;
-
- if (inc_n != 1) {
- if (checkdup && to >= data[low*2]) CC_DUP_WARN(env);
- if (from > data[low*2])
- from = data[low*2];
- if (to < data[(high - 1)*2 + 1])
- to = data[(high - 1)*2 + 1];
- }
-
- if (inc_n != 0 && (OnigCodePoint )high < n) {
- int from_pos = SIZE_CODE_POINT * (1 + high * 2);
- int to_pos = SIZE_CODE_POINT * (1 + (low + 1) * 2);
- int size = (n - high) * 2 * SIZE_CODE_POINT;
-
- if (inc_n > 0) {
- BBUF_MOVE_RIGHT(bbuf, from_pos, to_pos, size);
- }
- else {
- BBUF_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos);
- }
- }
-
- pos = SIZE_CODE_POINT * (1 + low * 2);
- BBUF_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);
- BBUF_WRITE_CODE_POINT(bbuf, pos, from);
- BBUF_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);
- n += inc_n;
- BBUF_WRITE_CODE_POINT(bbuf, 0, n);
-
- return 0;
-}
-
-static int
-add_code_range_to_buf(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)
-{
- return add_code_range_to_buf0(pbuf, env, from, to, 1);
-}
-
-static int
-add_code_range0(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to, int checkdup)
-{
- if (from > to) {
- if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
- return 0;
- else
- return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
- }
-
- return add_code_range_to_buf0(pbuf, env, from, to, checkdup);
-}
-
-static int
-add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)
-{
- return add_code_range0(pbuf, env, from, to, 1);
-}
-
-static int
-not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf, ScanEnv* env)
-{
- int r, i, n;
- OnigCodePoint pre, from, *data, to = 0;
-
- *pbuf = (BBuf* )NULL;
- if (IS_NULL(bbuf)) {
- set_all:
- return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
- }
-
- data = (OnigCodePoint* )(bbuf->p);
- GET_CODE_POINT(n, data);
- data++;
- if (n <= 0) goto set_all;
-
- r = 0;
- pre = MBCODE_START_POS(enc);
- for (i = 0; i < n; i++) {
- from = data[i*2];
- to = data[i*2+1];
- if (pre <= from - 1) {
- r = add_code_range_to_buf(pbuf, env, pre, from - 1);
- if (r != 0) return r;
- }
- if (to == ~((OnigCodePoint )0)) break;
- pre = to + 1;
- }
- if (to < ~((OnigCodePoint )0)) {
- r = add_code_range_to_buf(pbuf, env, to + 1, ~((OnigCodePoint )0));
- }
- return r;
-}
-
-#define SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2) do {\
- BBuf *tbuf; \
- int tnot; \
- tnot = not1; not1 = not2; not2 = tnot; \
- tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \
-} while (0)
-
-static int
-or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,
- BBuf* bbuf2, int not2, BBuf** pbuf, ScanEnv* env)
-{
- int r;
- OnigCodePoint i, n1, *data1;
- OnigCodePoint from, to;
-
- *pbuf = (BBuf* )NULL;
- if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) {
- if (not1 != 0 || not2 != 0)
- return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
- return 0;
- }
-
- r = 0;
- if (IS_NULL(bbuf2))
- SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
-
- if (IS_NULL(bbuf1)) {
- if (not1 != 0) {
- return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
- }
- else {
- if (not2 == 0) {
- return bbuf_clone(pbuf, bbuf2);
- }
- else {
- return not_code_range_buf(enc, bbuf2, pbuf, env);
- }
- }
- }
-
- if (not1 != 0)
- SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
-
- data1 = (OnigCodePoint* )(bbuf1->p);
- GET_CODE_POINT(n1, data1);
- data1++;
-
- if (not2 == 0 && not1 == 0) { /* 1 OR 2 */
- r = bbuf_clone(pbuf, bbuf2);
- }
- else if (not1 == 0) { /* 1 OR (not 2) */
- r = not_code_range_buf(enc, bbuf2, pbuf, env);
- }
- if (r != 0) return r;
-
- for (i = 0; i < n1; i++) {
- from = data1[i*2];
- to = data1[i*2+1];
- r = add_code_range_to_buf(pbuf, env, from, to);
- if (r != 0) return r;
- }
- return 0;
-}
-
-static int
-and_code_range1(BBuf** pbuf, ScanEnv* env, OnigCodePoint from1, OnigCodePoint to1,
- OnigCodePoint* data, int n)
-{
- int i, r;
- OnigCodePoint from2, to2;
-
- for (i = 0; i < n; i++) {
- from2 = data[i*2];
- to2 = data[i*2+1];
- if (from2 < from1) {
- if (to2 < from1) continue;
- else {
- from1 = to2 + 1;
- }
- }
- else if (from2 <= to1) {
- if (to2 < to1) {
- if (from1 <= from2 - 1) {
- r = add_code_range_to_buf(pbuf, env, from1, from2-1);
- if (r != 0) return r;
- }
- from1 = to2 + 1;
- }
- else {
- to1 = from2 - 1;
- }
- }
- else {
- from1 = from2;
- }
- if (from1 > to1) break;
- }
- if (from1 <= to1) {
- r = add_code_range_to_buf(pbuf, env, from1, to1);
- if (r != 0) return r;
- }
- return 0;
-}
-
-static int
-and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf, ScanEnv* env)
-{
- int r;
- OnigCodePoint i, j, n1, n2, *data1, *data2;
- OnigCodePoint from, to, from1, to1, from2, to2;
-
- *pbuf = (BBuf* )NULL;
- if (IS_NULL(bbuf1)) {
- if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */
- return bbuf_clone(pbuf, bbuf2);
- return 0;
- }
- else if (IS_NULL(bbuf2)) {
- if (not2 != 0)
- return bbuf_clone(pbuf, bbuf1);
- return 0;
- }
-
- if (not1 != 0)
- SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
-
- data1 = (OnigCodePoint* )(bbuf1->p);
- data2 = (OnigCodePoint* )(bbuf2->p);
- GET_CODE_POINT(n1, data1);
- GET_CODE_POINT(n2, data2);
- data1++;
- data2++;
-
- if (not2 == 0 && not1 == 0) { /* 1 AND 2 */
- for (i = 0; i < n1; i++) {
- from1 = data1[i*2];
- to1 = data1[i*2+1];
- for (j = 0; j < n2; j++) {
- from2 = data2[j*2];
- to2 = data2[j*2+1];
- if (from2 > to1) break;
- if (to2 < from1) continue;
- from = MAX(from1, from2);
- to = MIN(to1, to2);
- r = add_code_range_to_buf(pbuf, env, from, to);
- if (r != 0) return r;
- }
- }
- }
- else if (not1 == 0) { /* 1 AND (not 2) */
- for (i = 0; i < n1; i++) {
- from1 = data1[i*2];
- to1 = data1[i*2+1];
- r = and_code_range1(pbuf, env, from1, to1, data2, n2);
- if (r != 0) return r;
- }
- }
-
- return 0;
-}
-
-static int
-and_cclass(CClassNode* dest, CClassNode* cc, ScanEnv* env)
-{
- OnigEncoding enc = env->enc;
- int r, not1, not2;
- BBuf *buf1, *buf2, *pbuf = 0;
- BitSetRef bsr1, bsr2;
- BitSet bs1, bs2;
-
- not1 = IS_NCCLASS_NOT(dest);
- bsr1 = dest->bs;
- buf1 = dest->mbuf;
- not2 = IS_NCCLASS_NOT(cc);
- bsr2 = cc->bs;
- buf2 = cc->mbuf;
-
- if (not1 != 0) {
- bitset_invert_to(bsr1, bs1);
- bsr1 = bs1;
- }
- if (not2 != 0) {
- bitset_invert_to(bsr2, bs2);
- bsr2 = bs2;
- }
- bitset_and(bsr1, bsr2);
- if (bsr1 != dest->bs) {
- bitset_copy(dest->bs, bsr1);
- bsr1 = dest->bs;
- }
- if (not1 != 0) {
- bitset_invert(dest->bs);
- }
-
- if (! ONIGENC_IS_SINGLEBYTE(enc)) {
- if (not1 != 0 && not2 != 0) {
- r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf, env);
- }
- else {
- r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf, env);
- if (r == 0 && not1 != 0) {
- BBuf *tbuf = 0;
- r = not_code_range_buf(enc, pbuf, &tbuf, env);
- bbuf_free(pbuf);
- pbuf = tbuf;
- }
- }
- if (r != 0) {
- bbuf_free(pbuf);
- return r;
- }
-
- dest->mbuf = pbuf;
- bbuf_free(buf1);
- return r;
- }
- return 0;
-}
-
-static int
-or_cclass(CClassNode* dest, CClassNode* cc, ScanEnv* env)
-{
- OnigEncoding enc = env->enc;
- int r, not1, not2;
- BBuf *buf1, *buf2, *pbuf = 0;
- BitSetRef bsr1, bsr2;
- BitSet bs1, bs2;
-
- not1 = IS_NCCLASS_NOT(dest);
- bsr1 = dest->bs;
- buf1 = dest->mbuf;
- not2 = IS_NCCLASS_NOT(cc);
- bsr2 = cc->bs;
- buf2 = cc->mbuf;
-
- if (not1 != 0) {
- bitset_invert_to(bsr1, bs1);
- bsr1 = bs1;
- }
- if (not2 != 0) {
- bitset_invert_to(bsr2, bs2);
- bsr2 = bs2;
- }
- bitset_or(bsr1, bsr2);
- if (bsr1 != dest->bs) {
- bitset_copy(dest->bs, bsr1);
- bsr1 = dest->bs;
- }
- if (not1 != 0) {
- bitset_invert(dest->bs);
- }
-
- if (! ONIGENC_IS_SINGLEBYTE(enc)) {
- if (not1 != 0 && not2 != 0) {
- r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf, env);
- }
- else {
- r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf, env);
- if (r == 0 && not1 != 0) {
- BBuf *tbuf = 0;
- r = not_code_range_buf(enc, pbuf, &tbuf, env);
- bbuf_free(pbuf);
- pbuf = tbuf;
- }
- }
- if (r != 0) {
- bbuf_free(pbuf);
- return r;
- }
-
- dest->mbuf = pbuf;
- bbuf_free(buf1);
- return r;
- }
- else
- return 0;
-}
-
-static void UNKNOWN_ESC_WARN(ScanEnv *env, int c);
-
-static int
-conv_backslash_value(int c, ScanEnv* env)
-{
- if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) {
- switch (c) {
- case 'n': return '\n';
- case 't': return '\t';
- case 'r': return '\r';
- case 'f': return '\f';
- case 'a': return '\007';
- case 'b': return '\010';
- case 'e': return '\033';
- case 'v':
- if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB))
- return '\v';
- break;
-
- default:
- if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
- UNKNOWN_ESC_WARN(env, c);
- break;
- }
- }
- return c;
-}
-
-#define is_invalid_quantifier_target(node) 0
-
-/* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */
-static int
-popular_quantifier_num(QtfrNode* q)
-{
- if (q->greedy) {
- if (q->lower == 0) {
- if (q->upper == 1) return 0;
- else if (IS_REPEAT_INFINITE(q->upper)) return 1;
- }
- else if (q->lower == 1) {
- if (IS_REPEAT_INFINITE(q->upper)) return 2;
- }
- }
- else {
- if (q->lower == 0) {
- if (q->upper == 1) return 3;
- else if (IS_REPEAT_INFINITE(q->upper)) return 4;
- }
- else if (q->lower == 1) {
- if (IS_REPEAT_INFINITE(q->upper)) return 5;
- }
- }
- return -1;
-}
-
-
-enum ReduceType {
- RQ_ASIS = 0, /* as is */
- RQ_DEL = 1, /* delete parent */
- RQ_A, /* to '*' */
- RQ_AQ, /* to '*?' */
- RQ_QQ, /* to '??' */
- RQ_P_QQ, /* to '+)??' */
- RQ_PQ_Q /* to '+?)?' */
-};
-
-static enum ReduceType const ReduceTypeTable[6][6] = {
- {RQ_DEL, RQ_A, RQ_A, RQ_QQ, RQ_AQ, RQ_ASIS}, /* '?' */
- {RQ_DEL, RQ_DEL, RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL}, /* '*' */
- {RQ_A, RQ_A, RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL}, /* '+' */
- {RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL, RQ_AQ, RQ_AQ}, /* '??' */
- {RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL}, /* '*?' */
- {RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL} /* '+?' */
-};
-
-extern void
-onig_reduce_nested_quantifier(Node* pnode, Node* cnode)
-{
- int pnum, cnum;
- QtfrNode *p, *c;
-
- p = NQTFR(pnode);
- c = NQTFR(cnode);
- pnum = popular_quantifier_num(p);
- cnum = popular_quantifier_num(c);
- if (pnum < 0 || cnum < 0) return ;
-
- switch(ReduceTypeTable[cnum][pnum]) {
- case RQ_DEL:
- *pnode = *cnode;
- break;
- case RQ_A:
- p->target = c->target;
- p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 1;
- break;
- case RQ_AQ:
- p->target = c->target;
- p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 0;
- break;
- case RQ_QQ:
- p->target = c->target;
- p->lower = 0; p->upper = 1; p->greedy = 0;
- break;
- case RQ_P_QQ:
- p->target = cnode;
- p->lower = 0; p->upper = 1; p->greedy = 0;
- c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 1;
- return ;
- break;
- case RQ_PQ_Q:
- p->target = cnode;
- p->lower = 0; p->upper = 1; p->greedy = 1;
- c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 0;
- return ;
- break;
- case RQ_ASIS:
- p->target = cnode;
- return ;
- break;
- }
-
- c->target = NULL_NODE;
- onig_node_free(cnode);
-}
-
-
-enum TokenSyms {
- TK_EOT = 0, /* end of token */
- TK_RAW_BYTE = 1,
- TK_CHAR,
- TK_STRING,
- TK_CODE_POINT,
- TK_ANYCHAR,
- TK_CHAR_TYPE,
- TK_BACKREF,
- TK_CALL,
- TK_ANCHOR,
- TK_OP_REPEAT,
- TK_INTERVAL,
- TK_ANYCHAR_ANYTIME, /* SQL '%' == .* */
- TK_ALT,
- TK_SUBEXP_OPEN,
- TK_SUBEXP_CLOSE,
- TK_CC_OPEN,
- TK_QUOTE_OPEN,
- TK_CHAR_PROPERTY, /* \p{...}, \P{...} */
- /* in cc */
- TK_CC_CLOSE,
- TK_CC_RANGE,
- TK_POSIX_BRACKET_OPEN,
- TK_CC_AND, /* && */
- TK_CC_CC_OPEN /* [ */
-};
-
-typedef struct {
- enum TokenSyms type;
- int escaped;
- int base; /* is number: 8, 16 (used in [....]) */
- UChar* backp;
- union {
- UChar* s;
- int c;
- OnigCodePoint code;
- int anchor;
- int subtype;
- struct {
- int lower;
- int upper;
- int greedy;
- int possessive;
- } repeat;
- struct {
- int num;
- int ref1;
- int* refs;
- int by_name;
-#ifdef USE_BACKREF_WITH_LEVEL
- int exist_level;
- int level; /* \k<name+n> */
-#endif
- } backref;
- struct {
- UChar* name;
- UChar* name_end;
- int gnum;
- } call;
- struct {
- int ctype;
- int is_not;
- } prop;
- } u;
-} OnigToken;
-
-
-static int
-fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)
-{
- int low, up, syn_allow, non_low = 0;
- int r = 0;
- OnigCodePoint c;
- OnigEncoding enc = env->enc;
- UChar* p = *src;
- PFETCH_READY;
-
- syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL);
-
- if (PEND) {
- if (syn_allow)
- return 1; /* "....{" : OK! */
- else
- return ONIGERR_END_PATTERN_AT_LEFT_BRACE; /* "....{" syntax error */
- }
-
- if (! syn_allow) {
- c = PPEEK;
- if (c == ')' || c == '(' || c == '|') {
- return ONIGERR_END_PATTERN_AT_LEFT_BRACE;
- }
- }
-
- low = onig_scan_unsigned_number(&p, end, env->enc);
- if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
- if (low > ONIG_MAX_REPEAT_NUM)
- return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
-
- if (p == *src) { /* can't read low */
- if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) {
- /* allow {,n} as {0,n} */
- low = 0;
- non_low = 1;
- }
- else
- goto invalid;
- }
-
- if (PEND) goto invalid;
- PFETCH(c);
- if (c == ',') {
- UChar* prev = p;
- up = onig_scan_unsigned_number(&p, end, env->enc);
- if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
- if (up > ONIG_MAX_REPEAT_NUM)
- return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
-
- if (p == prev) {
- if (non_low != 0)
- goto invalid;
- up = REPEAT_INFINITE; /* {n,} : {n,infinite} */
- }
- }
- else {
- if (non_low != 0)
- goto invalid;
-
- PUNFETCH;
- up = low; /* {n} : exact n times */
- r = 2; /* fixed */
- }
-
- if (PEND) goto invalid;
- PFETCH(c);
- if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) {
- if (c != MC_ESC(env->syntax)) goto invalid;
- PFETCH(c);
- }
- if (c != '}') goto invalid;
-
- if (!IS_REPEAT_INFINITE(up) && low > up) {
- return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE;
- }
-
- tok->type = TK_INTERVAL;
- tok->u.repeat.lower = low;
- tok->u.repeat.upper = up;
- *src = p;
- return r; /* 0: normal {n,m}, 2: fixed {n} */
-
- invalid:
- if (syn_allow)
- return 1; /* OK */
- else
- return ONIGERR_INVALID_REPEAT_RANGE_PATTERN;
-}
-
-/* \M-, \C-, \c, or \... */
-static int
-fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
-{
- int v;
- OnigCodePoint c;
- OnigEncoding enc = env->enc;
- UChar* p = *src;
- PFETCH_READY;
-
- if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
-
- PFETCH(c);
- switch (c) {
- case 'M':
- if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) {
- if (PEND) return ONIGERR_END_PATTERN_AT_META;
- PFETCH(c);
- if (c != '-') return ONIGERR_META_CODE_SYNTAX;
- if (PEND) return ONIGERR_END_PATTERN_AT_META;
- PFETCH(c);
- if (c == MC_ESC(env->syntax)) {
- v = fetch_escaped_value(&p, end, env);
- if (v < 0) return v;
- c = (OnigCodePoint )v;
- }
- c = ((c & 0xff) | 0x80);
- }
- else
- goto backslash;
- break;
-
- case 'C':
- if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) {
- if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
- PFETCH(c);
- if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX;
- goto control;
- }
- else
- goto backslash;
-
- case 'c':
- if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) {
- control:
- if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
- PFETCH(c);
- if (c == '?') {
- c = 0177;
- }
- else {
- if (c == MC_ESC(env->syntax)) {
- v = fetch_escaped_value(&p, end, env);
- if (v < 0) return v;
- c = (OnigCodePoint )v;
- }
- c &= 0x9f;
- }
- break;
- }
- /* fall through */
-
- default:
- {
- backslash:
- c = conv_backslash_value(c, env);
- }
- break;
- }
-
- *src = p;
- return c;
-}
-
-static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env);
-
-static OnigCodePoint
-get_name_end_code_point(OnigCodePoint start)
-{
- switch (start) {
- case '<': return (OnigCodePoint )'>'; break;
- case '\'': return (OnigCodePoint )'\''; break;
- default:
- break;
- }
-
- return (OnigCodePoint )0;
-}
-
-#ifdef USE_NAMED_GROUP
-#ifdef USE_BACKREF_WITH_LEVEL
-/*
- \k<name+n>, \k<name-n>
- \k<num+n>, \k<num-n>
- \k<-num+n>, \k<-num-n>
-*/
-static int
-fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
- UChar** rname_end, ScanEnv* env,
- int* rback_num, int* rlevel)
-{
- int r, sign, is_num, exist_level;
- OnigCodePoint end_code;
- OnigCodePoint c = 0;
- OnigEncoding enc = env->enc;
- UChar *name_end;
- UChar *pnum_head;
- UChar *p = *src;
- PFETCH_READY;
-
- *rback_num = 0;
- is_num = exist_level = 0;
- sign = 1;
- pnum_head = *src;
-
- end_code = get_name_end_code_point(start_code);
-
- name_end = end;
- r = 0;
- if (PEND) {
- return ONIGERR_EMPTY_GROUP_NAME;
- }
- else {
- PFETCH(c);
- if (c == end_code)
- return ONIGERR_EMPTY_GROUP_NAME;
-
- if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
- is_num = 1;
- }
- else if (c == '-') {
- is_num = 2;
- sign = -1;
- pnum_head = p;
- }
- else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
- r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
- }
- }
-
- while (!PEND) {
- name_end = p;
- PFETCH(c);
- if (c == end_code || c == ')' || c == '+' || c == '-') {
- if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME;
- break;
- }
-
- if (is_num != 0) {
- if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
- is_num = 1;
- }
- else {
- r = ONIGERR_INVALID_GROUP_NAME;
- is_num = 0;
- }
- }
- else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
- r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
- }
- }
-
- if (r == 0 && c != end_code) {
- if (c == '+' || c == '-') {
- int level;
- int flag = (c == '-' ? -1 : 1);
-
- PFETCH(c);
- if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err;
- PUNFETCH;
- level = onig_scan_unsigned_number(&p, end, enc);
- if (level < 0) return ONIGERR_TOO_BIG_NUMBER;
- *rlevel = (level * flag);
- exist_level = 1;
-
- PFETCH(c);
- if (c == end_code)
- goto end;
- }
-
- err:
- r = ONIGERR_INVALID_GROUP_NAME;
- name_end = end;
- }
-
- end:
- if (r == 0) {
- if (is_num != 0) {
- *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
- if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
- else if (*rback_num == 0) goto err;
-
- *rback_num *= sign;
- }
-
- *rname_end = name_end;
- *src = p;
- return (exist_level ? 1 : 0);
- }
- else {
- onig_scan_env_set_error_string(env, r, *src, name_end);
- return r;
- }
-}
-#endif /* USE_BACKREF_WITH_LEVEL */
-
-/*
- def: 0 -> define name (don't allow number name)
- 1 -> reference name (allow number name)
-*/
-static int
-fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
- UChar** rname_end, ScanEnv* env, int* rback_num, int ref)
-{
- int r, is_num, sign;
- OnigCodePoint end_code;
- OnigCodePoint c = 0;
- OnigEncoding enc = env->enc;
- UChar *name_end;
- UChar *pnum_head;
- UChar *p = *src;
- PFETCH_READY;
-
- *rback_num = 0;
-
- end_code = get_name_end_code_point(start_code);
-
- name_end = end;
- pnum_head = *src;
- r = 0;
- is_num = 0;
- sign = 1;
- if (PEND) {
- return ONIGERR_EMPTY_GROUP_NAME;
- }
- else {
- PFETCH(c);
- if (c == end_code)
- return ONIGERR_EMPTY_GROUP_NAME;
-
- if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
- if (ref == 1)
- is_num = 1;
- else {
- r = ONIGERR_INVALID_GROUP_NAME;
- is_num = 0;
- }
- }
- else if (c == '-') {
- if (ref == 1) {
- is_num = 2;
- sign = -1;
- pnum_head = p;
- }
- else {
- r = ONIGERR_INVALID_GROUP_NAME;
- is_num = 0;
- }
- }
- else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
- r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
- }
- }
-
- if (r == 0) {
- while (!PEND) {
- name_end = p;
- PFETCH(c);
- if (c == end_code || c == ')') {
- if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME;
- break;
- }
-
- if (is_num != 0) {
- if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
- is_num = 1;
- }
- else {
- if (!ONIGENC_IS_CODE_WORD(enc, c))
- r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
- else
- r = ONIGERR_INVALID_GROUP_NAME;
-
- is_num = 0;
- }
- }
- else {
- if (!ONIGENC_IS_CODE_WORD(enc, c)) {
- r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
- }
- }
- }
-
- if (c != end_code) {
- r = ONIGERR_INVALID_GROUP_NAME;
- name_end = end;
- }
-
- if (is_num != 0) {
- *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
- if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
- else if (*rback_num == 0) {
- r = ONIGERR_INVALID_GROUP_NAME;
- goto err;
- }
-
- *rback_num *= sign;
- }
-
- *rname_end = name_end;
- *src = p;
- return 0;
- }
- else {
- while (!PEND) {
- name_end = p;
- PFETCH(c);
- if (c == end_code || c == ')')
- break;
- }
- if (PEND)
- name_end = end;
-
- err:
- onig_scan_env_set_error_string(env, r, *src, name_end);
- return r;
- }
-}
-#else
-static int
-fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
- UChar** rname_end, ScanEnv* env, int* rback_num, int ref)
-{
- int r, is_num, sign;
- OnigCodePoint end_code;
- OnigCodePoint c = 0;
- UChar *name_end;
- OnigEncoding enc = env->enc;
- UChar *pnum_head;
- UChar *p = *src;
- PFETCH_READY;
-
- *rback_num = 0;
-
- end_code = get_name_end_code_point(start_code);
-
- *rname_end = name_end = end;
- r = 0;
- pnum_head = *src;
- is_num = 0;
- sign = 1;
-
- if (PEND) {
- return ONIGERR_EMPTY_GROUP_NAME;
- }
- else {
- PFETCH(c);
- if (c == end_code)
- return ONIGERR_EMPTY_GROUP_NAME;
-
- if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
- is_num = 1;
- }
- else if (c == '-') {
- is_num = 2;
- sign = -1;
- pnum_head = p;
- }
- else {
- r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
- }
- }
-
- while (!PEND) {
- name_end = p;
-
- PFETCH(c);
- if (c == end_code || c == ')') break;
- if (! ONIGENC_IS_CODE_DIGIT(enc, c))
- r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
- }
- if (r == 0 && c != end_code) {
- r = ONIGERR_INVALID_GROUP_NAME;
- name_end = end;
- }
-
- if (r == 0) {
- *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
- if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
- else if (*rback_num == 0) {
- r = ONIGERR_INVALID_GROUP_NAME;
- goto err;
- }
- *rback_num *= sign;
-
- *rname_end = name_end;
- *src = p;
- return 0;
- }
- else {
- err:
- onig_scan_env_set_error_string(env, r, *src, name_end);
- return r;
- }
-}
-#endif /* USE_NAMED_GROUP */
-
-void onig_vsnprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc,
- UChar* pat, UChar* pat_end, const UChar *fmt, va_list args);
-
-static void
-onig_syntax_warn(ScanEnv *env, const char *fmt, ...)
-{
- va_list args;
- UChar buf[WARN_BUFSIZE];
- va_start(args, fmt);
- onig_vsnprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
- env->pattern, env->pattern_end,
- (const UChar*)fmt, args);
- va_end(args);
- if (env->sourcefile == NULL)
- mrb_warn("%s", (char*)buf);
- else
- mrb_compile_warn(env->sourcefile, env->sourceline, "%s", (char*)buf);
-}
-
-static void
-CC_ESC_WARN(ScanEnv *env, UChar *c)
-{
- if (onig_warn == onig_null_warn) return ;
-
- if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) &&
- IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) {
- onig_syntax_warn(env, "character class has '%s' without escape", c);
- }
-}
-
-static void
-CLOSE_BRACKET_WITHOUT_ESC_WARN(ScanEnv* env, UChar* c)
-{
- if (onig_warn == onig_null_warn) return ;
-
- if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) {
- onig_syntax_warn(env, "regular expression has '%s' without escape", c);
- }
-}
-
-static void
-CC_DUP_WARN(ScanEnv *env)
-{
- if (onig_warn == onig_null_warn /*|| !mrb_test(ruby_verbose)*/) return ;
-
- if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_DUP) &&
- !((env)->warnings_flag & ONIG_SYN_WARN_CC_DUP)) {
- (env)->warnings_flag |= ONIG_SYN_WARN_CC_DUP;
- onig_syntax_warn(env, "character class has duplicated range");
- }
-}
-
-static void
-UNKNOWN_ESC_WARN(ScanEnv *env, int c)
-{
- if (onig_warn == onig_null_warn /*|| !mrb_test(ruby_verbose)*/) return ;
- onig_syntax_warn(env, "Unknown escape \\%c is ignored", c);
-}
-
-static UChar*
-find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,
- UChar **next, OnigEncoding enc)
-{
- int i;
- OnigCodePoint x;
- UChar *q;
- UChar *p = from;
-
- while (p < to) {
- x = ONIGENC_MBC_TO_CODE(enc, p, to);
- q = p + enclen(enc, p, to);
- if (x == s[0]) {
- for (i = 1; i < n && q < to; i++) {
- x = ONIGENC_MBC_TO_CODE(enc, q, to);
- if (x != s[i]) break;
- q += enclen(enc, q, to);
- }
- if (i >= n) {
- if (IS_NOT_NULL(next))
- *next = q;
- return p;
- }
- }
- p = q;
- }
- return NULL_UCHARP;
-}
-
-static int
-str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,
- OnigCodePoint bad, OnigEncoding enc, const OnigSyntaxType* syn)
-{
- int i, in_esc;
- OnigCodePoint x;
- UChar *q;
- UChar *p = from;
-
- in_esc = 0;
- while (p < to) {
- if (in_esc) {
- in_esc = 0;
- p += enclen(enc, p, to);
- }
- else {
- x = ONIGENC_MBC_TO_CODE(enc, p, to);
- q = p + enclen(enc, p, to);
- if (x == s[0]) {
- for (i = 1; i < n && q < to; i++) {
- x = ONIGENC_MBC_TO_CODE(enc, q, to);
- if (x != s[i]) break;
- q += enclen(enc, q, to);
- }
- if (i >= n) return 1;
- p += enclen(enc, p, to);
- }
- else {
- x = ONIGENC_MBC_TO_CODE(enc, p, to);
- if (x == bad) return 0;
- else if (x == MC_ESC(syn)) in_esc = 1;
- p = q;
- }
- }
- }
- return 0;
-}
-
-static int
-fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
-{
- int num;
- OnigCodePoint c, c2;
- const OnigSyntaxType* syn = env->syntax;
- OnigEncoding enc = env->enc;
- UChar* prev;
- UChar* p = *src;
- PFETCH_READY;
-
- if (PEND) {
- tok->type = TK_EOT;
- return tok->type;
- }
-
- PFETCH(c);
- tok->type = TK_CHAR;
- tok->base = 0;
- tok->u.c = c;
- tok->escaped = 0;
-
- if (c == ']') {
- tok->type = TK_CC_CLOSE;
- }
- else if (c == '-') {
- tok->type = TK_CC_RANGE;
- }
- else if (c == MC_ESC(syn)) {
- if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC))
- goto end;
-
- if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
-
- PFETCH(c);
- tok->escaped = 1;
- tok->u.c = c;
- switch (c) {
- case 'w':
- tok->type = TK_CHAR_TYPE;
- tok->u.prop.ctype = ONIGENC_CTYPE_W;
- tok->u.prop.is_not = 0;
- break;
- case 'W':
- tok->type = TK_CHAR_TYPE;
- tok->u.prop.ctype = ONIGENC_CTYPE_W;
- tok->u.prop.is_not = 1;
- break;
- case 'd':
- tok->type = TK_CHAR_TYPE;
- tok->u.prop.ctype = ONIGENC_CTYPE_D;
- tok->u.prop.is_not = 0;
- break;
- case 'D':
- tok->type = TK_CHAR_TYPE;
- tok->u.prop.ctype = ONIGENC_CTYPE_D;
- tok->u.prop.is_not = 1;
- break;
- case 's':
- tok->type = TK_CHAR_TYPE;
- tok->u.prop.ctype = ONIGENC_CTYPE_S;
- tok->u.prop.is_not = 0;
- break;
- case 'S':
- tok->type = TK_CHAR_TYPE;
- tok->u.prop.ctype = ONIGENC_CTYPE_S;
- tok->u.prop.is_not = 1;
- break;
- case 'h':
- if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
- tok->type = TK_CHAR_TYPE;
- tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
- tok->u.prop.is_not = 0;
- break;
- case 'H':
- if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
- tok->type = TK_CHAR_TYPE;
- tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
- tok->u.prop.is_not = 1;
- break;
-
- case 'p':
- case 'P':
- c2 = PPEEK;
- if (c2 == '{' &&
- IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
- PINC;
- tok->type = TK_CHAR_PROPERTY;
- tok->u.prop.is_not = (c == 'P' ? 1 : 0);
-
- if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
- PFETCH(c2);
- if (c2 == '^') {
- tok->u.prop.is_not = (tok->u.prop.is_not == 0 ? 1 : 0);
- }
- else
- PUNFETCH;
- }
- }
- else {
- onig_syntax_warn(env, "invalid Unicode Property \\%c", c);
- }
- break;
-
- case 'x':
- if (PEND) break;
-
- prev = p;
- if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
- PINC;
- num = scan_unsigned_hexadecimal_number(&p, end, 8, enc);
- if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
- if (!PEND) {
- c2 = PPEEK;
- if (ONIGENC_IS_CODE_XDIGIT(enc, c2))
- return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
- }
-
- if (p > prev + enclen(enc, prev, end) && !PEND && (PPEEK_IS('}'))) {
- PINC;
- tok->type = TK_CODE_POINT;
- tok->base = 16;
- tok->u.code = (OnigCodePoint )num;
- }
- else {
- /* can't read nothing or invalid format */
- p = prev;
- }
- }
- else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
- num = scan_unsigned_hexadecimal_number(&p, end, 2, enc);
- if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
- if (p == prev) { /* can't read nothing. */
- num = 0; /* but, it's not error */
- }
- tok->type = TK_RAW_BYTE;
- tok->base = 16;
- tok->u.c = num;
- }
- break;
-
- case 'u':
- if (PEND) break;
-
- prev = p;
- if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
- num = scan_unsigned_hexadecimal_number(&p, end, 4, enc);
- if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
- if (p == prev) { /* can't read nothing. */
- num = 0; /* but, it's not error */
- }
- tok->type = TK_CODE_POINT;
- tok->base = 16;
- tok->u.code = (OnigCodePoint )num;
- }
- break;
-
- case '0':
- case '1': case '2': case '3': case '4': case '5': case '6': case '7':
- if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
- PUNFETCH;
- prev = p;
- num = scan_unsigned_octal_number(&p, end, 3, enc);
- if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
- if (p == prev) { /* can't read nothing. */
- num = 0; /* but, it's not error */
- }
- tok->type = TK_RAW_BYTE;
- tok->base = 8;
- tok->u.c = num;
- }
- break;
-
- default:
- PUNFETCH;
- num = fetch_escaped_value(&p, end, env);
- if (num < 0) return num;
- if (tok->u.c != num) {
- tok->u.code = (OnigCodePoint )num;
- tok->type = TK_CODE_POINT;
- }
- break;
- }
- }
- else if (c == '[') {
- if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) {
- OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' };
- tok->backp = p; /* point at '[' is readed */
- PINC;
- if (str_exist_check_with_esc(send, 2, p, end,
- (OnigCodePoint )']', enc, syn)) {
- tok->type = TK_POSIX_BRACKET_OPEN;
- }
- else {
- PUNFETCH;
- goto cc_in_cc;
- }
- }
- else {
- cc_in_cc:
- if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) {
- tok->type = TK_CC_CC_OPEN;
- }
- else {
- CC_ESC_WARN(env, (UChar* )"[");
- }
- }
- }
- else if (c == '&') {
- if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) &&
- !PEND && (PPEEK_IS('&'))) {
- PINC;
- tok->type = TK_CC_AND;
- }
- }
-
- end:
- *src = p;
- return tok->type;
-}
-
-static int
-fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
-{
- int r, num;
- OnigCodePoint c;
- OnigEncoding enc = env->enc;
- const OnigSyntaxType* syn = env->syntax;
- UChar* prev;
- UChar* p = *src;
- PFETCH_READY;
-
- start:
- if (PEND) {
- tok->type = TK_EOT;
- return tok->type;
- }
-
- tok->type = TK_STRING;
- tok->base = 0;
- tok->backp = p;
-
- PFETCH(c);
- if (IS_MC_ESC_CODE(c, syn)) {
- if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
-
- tok->backp = p;
- PFETCH(c);
-
- tok->u.c = c;
- tok->escaped = 1;
- switch (c) {
- case '*':
- if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break;
- tok->type = TK_OP_REPEAT;
- tok->u.repeat.lower = 0;
- tok->u.repeat.upper = REPEAT_INFINITE;
- goto greedy_check;
- break;
-
- case '+':
- if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break;
- tok->type = TK_OP_REPEAT;
- tok->u.repeat.lower = 1;
- tok->u.repeat.upper = REPEAT_INFINITE;
- goto greedy_check;
- break;
-
- case '?':
- if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break;
- tok->type = TK_OP_REPEAT;
- tok->u.repeat.lower = 0;
- tok->u.repeat.upper = 1;
- greedy_check:
- if (!PEND && PPEEK_IS('?') &&
- IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) {
- PFETCH(c);
- tok->u.repeat.greedy = 0;
- tok->u.repeat.possessive = 0;
- }
- else {
- possessive_check:
- if (!PEND && PPEEK_IS('+') &&
- ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) &&
- tok->type != TK_INTERVAL) ||
- (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) &&
- tok->type == TK_INTERVAL))) {
- PFETCH(c);
- tok->u.repeat.greedy = 1;
- tok->u.repeat.possessive = 1;
- }
- else {
- tok->u.repeat.greedy = 1;
- tok->u.repeat.possessive = 0;
- }
- }
- break;
-
- case '{':
- if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break;
- r = fetch_range_quantifier(&p, end, tok, env);
- if (r < 0) return r; /* error */
- if (r == 0) goto greedy_check;
- else if (r == 2) { /* {n} */
- if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
- goto possessive_check;
-
- goto greedy_check;
- }
- /* r == 1 : normal char */
- break;
-
- case '|':
- if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break;
- tok->type = TK_ALT;
- break;
-
- case '(':
- if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
- tok->type = TK_SUBEXP_OPEN;
- break;
-
- case ')':
- if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
- tok->type = TK_SUBEXP_CLOSE;
- break;
-
- case 'w':
- if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
- tok->type = TK_CHAR_TYPE;
- tok->u.prop.ctype = ONIGENC_CTYPE_W;
- tok->u.prop.is_not = 0;
- break;
-
- case 'W':
- if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
- tok->type = TK_CHAR_TYPE;
- tok->u.prop.ctype = ONIGENC_CTYPE_W;
- tok->u.prop.is_not = 1;
- break;
-
- case 'b':
- if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
- tok->type = TK_ANCHOR;
- tok->u.anchor = ANCHOR_WORD_BOUND;
- break;
-
- case 'B':
- if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
- tok->type = TK_ANCHOR;
- tok->u.anchor = ANCHOR_NOT_WORD_BOUND;
- break;
-
-#ifdef USE_WORD_BEGIN_END
- case '<':
- if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
- tok->type = TK_ANCHOR;
- tok->u.anchor = ANCHOR_WORD_BEGIN;
- break;
-
- case '>':
- if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
- tok->type = TK_ANCHOR;
- tok->u.anchor = ANCHOR_WORD_END;
- break;
-#endif
-
- case 's':
- if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
- tok->type = TK_CHAR_TYPE;
- tok->u.prop.ctype = ONIGENC_CTYPE_S;
- tok->u.prop.is_not = 0;
- break;
-
- case 'S':
- if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
- tok->type = TK_CHAR_TYPE;
- tok->u.prop.ctype = ONIGENC_CTYPE_S;
- tok->u.prop.is_not = 1;
- break;
-
- case 'd':
- if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
- tok->type = TK_CHAR_TYPE;
- tok->u.prop.ctype = ONIGENC_CTYPE_D;
- tok->u.prop.is_not = 0;
- break;
-
- case 'D':
- if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
- tok->type = TK_CHAR_TYPE;
- tok->u.prop.ctype = ONIGENC_CTYPE_D;
- tok->u.prop.is_not = 1;
- break;
-
- case 'h':
- if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
- tok->type = TK_CHAR_TYPE;
- tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
- tok->u.prop.is_not = 0;
- break;
-
- case 'H':
- if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
- tok->type = TK_CHAR_TYPE;
- tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
- tok->u.prop.is_not = 1;
- break;
-
- case 'A':
- if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
- begin_buf:
- tok->type = TK_ANCHOR;
- tok->u.subtype = ANCHOR_BEGIN_BUF;
- break;
-
- case 'Z':
- if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
- tok->type = TK_ANCHOR;
- tok->u.subtype = ANCHOR_SEMI_END_BUF;
- break;
-
- case 'z':
- if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
- end_buf:
- tok->type = TK_ANCHOR;
- tok->u.subtype = ANCHOR_END_BUF;
- break;
-
- case 'G':
- if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break;
- tok->type = TK_ANCHOR;
- tok->u.subtype = ANCHOR_BEGIN_POSITION;
- break;
-
- case '`':
- if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
- goto begin_buf;
- break;
-
- case '\'':
- if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
- goto end_buf;
- break;
-
- case 'x':
- if (PEND) break;
-
- prev = p;
- if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
- PINC;
- num = scan_unsigned_hexadecimal_number(&p, end, 8, enc);
- if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
- if (!PEND) {
- if (ONIGENC_IS_CODE_XDIGIT(enc, PPEEK))
- return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
- }
-
- if ((p > prev + enclen(enc, prev, end)) && !PEND && PPEEK_IS('}')) {
- PINC;
- tok->type = TK_CODE_POINT;
- tok->u.code = (OnigCodePoint )num;
- }
- else {
- /* can't read nothing or invalid format */
- p = prev;
- }
- }
- else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
- num = scan_unsigned_hexadecimal_number(&p, end, 2, enc);
- if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
- if (p == prev) { /* can't read nothing. */
- num = 0; /* but, it's not error */
- }
- tok->type = TK_RAW_BYTE;
- tok->base = 16;
- tok->u.c = num;
- }
- break;
-
- case 'u':
- if (PEND) break;
-
- prev = p;
- if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
- num = scan_unsigned_hexadecimal_number(&p, end, 4, enc);
- if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
- if (p == prev) { /* can't read nothing. */
- num = 0; /* but, it's not error */
- }
- tok->type = TK_CODE_POINT;
- tok->base = 16;
- tok->u.code = (OnigCodePoint )num;
- }
- break;
-
- case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- PUNFETCH;
- prev = p;
- num = onig_scan_unsigned_number(&p, end, enc);
- if (num < 0 || num > ONIG_MAX_BACKREF_NUM) {
- goto skip_backref;
- }
-
- if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) &&
- (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */
- if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
- if (num > env->num_mem || IS_NULL(SCANENV_MEM_NODES(env)[num]))
- return ONIGERR_INVALID_BACKREF;
- }
-
- tok->type = TK_BACKREF;
- tok->u.backref.num = 1;
- tok->u.backref.ref1 = num;
- tok->u.backref.by_name = 0;
-#ifdef USE_BACKREF_WITH_LEVEL
- tok->u.backref.exist_level = 0;
-#endif
- break;
- }
-
- skip_backref:
- if (c == '8' || c == '9') {
- /* normal char */
- p = prev; PINC;
- break;
- }
-
- p = prev;
- /* fall through */
- case '0':
- if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
- prev = p;
- num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc);
- if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
- if (p == prev) { /* can't read nothing. */
- num = 0; /* but, it's not error */
- }
- tok->type = TK_RAW_BYTE;
- tok->base = 8;
- tok->u.c = num;
- }
- else if (c != '0') {
- PINC;
- }
- break;
-
-#ifdef USE_NAMED_GROUP
- case 'k':
- if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) {
- PFETCH(c);
- if (c == '<' || c == '\'') {
- UChar* name_end;
- int* backs;
- int back_num;
-
- prev = p;
-
-#ifdef USE_BACKREF_WITH_LEVEL
- name_end = NULL_UCHARP; /* no need. escape gcc warning. */
- r = fetch_name_with_level((OnigCodePoint )c, &p, end, &name_end,
- env, &back_num, &tok->u.backref.level);
- if (r == 1) tok->u.backref.exist_level = 1;
- else tok->u.backref.exist_level = 0;
-#else
- r = fetch_name(&p, end, &name_end, env, &back_num, 1);
-#endif
- if (r < 0) return r;
-
- if (back_num != 0) {
- if (back_num < 0) {
- back_num = BACKREF_REL_TO_ABS(back_num, env);
- if (back_num <= 0)
- return ONIGERR_INVALID_BACKREF;
- }
-
- if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
- if (back_num > env->num_mem ||
- IS_NULL(SCANENV_MEM_NODES(env)[back_num]))
- return ONIGERR_INVALID_BACKREF;
- }
- tok->type = TK_BACKREF;
- tok->u.backref.by_name = 0;
- tok->u.backref.num = 1;
- tok->u.backref.ref1 = back_num;
- }
- else {
- num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);
- if (num <= 0) {
- onig_scan_env_set_error_string(env,
- ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);
- return ONIGERR_UNDEFINED_NAME_REFERENCE;
- }
- if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
- int i;
- for (i = 0; i < num; i++) {
- if (backs[i] > env->num_mem ||
- IS_NULL(SCANENV_MEM_NODES(env)[backs[i]]))
- return ONIGERR_INVALID_BACKREF;
- }
- }
-
- tok->type = TK_BACKREF;
- tok->u.backref.by_name = 1;
- if (num == 1) {
- tok->u.backref.num = 1;
- tok->u.backref.ref1 = backs[0];
- }
- else {
- tok->u.backref.num = num;
- tok->u.backref.refs = backs;
- }
- }
- }
- else {
- PUNFETCH;
- onig_syntax_warn(env, "invalid back reference");
- }
- }
- break;
-#endif
-
-#ifdef USE_SUBEXP_CALL
- case 'g':
- if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) {
- PFETCH(c);
- if (c == '<' || c == '\'') {
- int gnum;
- UChar* name_end;
-
- prev = p;
- r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &gnum, 1);
- if (r < 0) return r;
-
- tok->type = TK_CALL;
- tok->u.call.name = prev;
- tok->u.call.name_end = name_end;
- tok->u.call.gnum = gnum;
- }
- else {
- onig_syntax_warn(env, "invalid subexp call");
- PUNFETCH;
- }
- }
- break;
-#endif
-
- case 'Q':
- if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)) {
- tok->type = TK_QUOTE_OPEN;
- }
- break;
-
- case 'p':
- case 'P':
- if (PPEEK_IS('{') &&
- IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
- PINC;
- tok->type = TK_CHAR_PROPERTY;
- tok->u.prop.is_not = (c == 'P' ? 1 : 0);
-
- if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
- PFETCH(c);
- if (c == '^') {
- tok->u.prop.is_not = (tok->u.prop.is_not == 0 ? 1 : 0);
- }
- else
- PUNFETCH;
- }
- }
- else {
- onig_syntax_warn(env, "invalid Unicode Property \\%c", c);
- }
- break;
-
- default:
- PUNFETCH;
- num = fetch_escaped_value(&p, end, env);
- if (num < 0) return num;
- /* set_raw: */
- if (tok->u.c != num) {
- tok->type = TK_CODE_POINT;
- tok->u.code = (OnigCodePoint )num;
- }
- else { /* string */
- p = tok->backp + enclen(enc, tok->backp, end);
- }
- break;
- }
- }
- else {
- tok->u.c = c;
- tok->escaped = 0;
-
-#ifdef USE_VARIABLE_META_CHARS
- if ((c != ONIG_INEFFECTIVE_META_CHAR) &&
- IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) {
- if (c == MC_ANYCHAR(syn))
- goto any_char;
- else if (c == MC_ANYTIME(syn))
- goto anytime;
- else if (c == MC_ZERO_OR_ONE_TIME(syn))
- goto zero_or_one_time;
- else if (c == MC_ONE_OR_MORE_TIME(syn))
- goto one_or_more_time;
- else if (c == MC_ANYCHAR_ANYTIME(syn)) {
- tok->type = TK_ANYCHAR_ANYTIME;
- goto out;
- }
- }
-#endif
-
- switch (c) {
- case '.':
- if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break;
-#ifdef USE_VARIABLE_META_CHARS
- any_char:
-#endif
- tok->type = TK_ANYCHAR;
- break;
-
- case '*':
- if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break;
-#ifdef USE_VARIABLE_META_CHARS
- anytime:
-#endif
- tok->type = TK_OP_REPEAT;
- tok->u.repeat.lower = 0;
- tok->u.repeat.upper = REPEAT_INFINITE;
- goto greedy_check;
- break;
-
- case '+':
- if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break;
-#ifdef USE_VARIABLE_META_CHARS
- one_or_more_time:
-#endif
- tok->type = TK_OP_REPEAT;
- tok->u.repeat.lower = 1;
- tok->u.repeat.upper = REPEAT_INFINITE;
- goto greedy_check;
- break;
-
- case '?':
- if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break;
-#ifdef USE_VARIABLE_META_CHARS
- zero_or_one_time:
-#endif
- tok->type = TK_OP_REPEAT;
- tok->u.repeat.lower = 0;
- tok->u.repeat.upper = 1;
- goto greedy_check;
- break;
-
- case '{':
- if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;
- r = fetch_range_quantifier(&p, end, tok, env);
- if (r < 0) return r; /* error */
- if (r == 0) goto greedy_check;
- else if (r == 2) { /* {n} */
- if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
- goto possessive_check;
-
- goto greedy_check;
- }
- /* r == 1 : normal char */
- break;
-
- case '|':
- if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break;
- tok->type = TK_ALT;
- break;
-
- case '(':
- if (PPEEK_IS('?') &&
- IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
- PINC;
- if (PPEEK_IS('#')) {
- PFETCH(c);
- while (1) {
- if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
- PFETCH(c);
- if (c == MC_ESC(syn)) {
- if (!PEND) PFETCH(c);
- }
- else {
- if (c == ')') break;
- }
- }
- goto start;
- }
- PUNFETCH;
- }
-
- if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
- tok->type = TK_SUBEXP_OPEN;
- break;
-
- case ')':
- if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
- tok->type = TK_SUBEXP_CLOSE;
- break;
-
- case '^':
- if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
- tok->type = TK_ANCHOR;
- tok->u.subtype = (IS_SINGLELINE(env->option)
- ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE);
- break;
-
- case '$':
- if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
- tok->type = TK_ANCHOR;
- tok->u.subtype = (IS_SINGLELINE(env->option)
- ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE);
- break;
-
- case '[':
- if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break;
- tok->type = TK_CC_OPEN;
- break;
-
- case ']':
- if (*src > env->pattern) /* /].../ is allowed. */
- CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]");
- break;
-
- case '#':
- if (IS_EXTEND(env->option)) {
- while (!PEND) {
- PFETCH(c);
- if (ONIGENC_IS_CODE_NEWLINE(enc, c))
- break;
- }
- goto start;
- break;
- }
- break;
-
- case ' ': case '\t': case '\n': case '\r': case '\f':
- if (IS_EXTEND(env->option))
- goto start;
- break;
-
- default:
- /* string */
- break;
- }
- }
-
-#ifdef USE_VARIABLE_META_CHARS
- out:
-#endif
- *src = p;
- return tok->type;
-}
-
-static int
-add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int is_not,
- ScanEnv* env,
- OnigCodePoint sb_out, const OnigCodePoint mbr[])
-{
- int i, r;
- OnigCodePoint j;
-
- int n = ONIGENC_CODE_RANGE_NUM(mbr);
-
- if (is_not == 0) {
- for (i = 0; i < n; i++) {
- for (j = ONIGENC_CODE_RANGE_FROM(mbr, i);
- j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {
- if (j >= sb_out) {
- if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {
- r = add_code_range_to_buf(&(cc->mbuf), env, j,
- ONIGENC_CODE_RANGE_TO(mbr, i));
- if (r != 0) return r;
- i++;
- }
-
- goto sb_end;
- }
- BITSET_SET_BIT_CHKDUP(cc->bs, j);
- }
- }
-
- sb_end:
- for ( ; i < n; i++) {
- r = add_code_range_to_buf(&(cc->mbuf), env,
- ONIGENC_CODE_RANGE_FROM(mbr, i),
- ONIGENC_CODE_RANGE_TO(mbr, i));
- if (r != 0) return r;
- }
- }
- else {
- OnigCodePoint prev = 0;
-
- for (i = 0; i < n; i++) {
- for (j = prev;
- j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) {
- if (j >= sb_out) {
- goto sb_end2;
- }
- BITSET_SET_BIT_CHKDUP(cc->bs, j);
- }
- prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
- }
- for (j = prev; j < sb_out; j++) {
- BITSET_SET_BIT_CHKDUP(cc->bs, j);
- }
-
- sb_end2:
- prev = sb_out;
-
- for (i = 0; i < n; i++) {
- if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {
- r = add_code_range_to_buf(&(cc->mbuf), env, prev,
- ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);
- if (r != 0) return r;
- }
- prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
- }
- if (prev < 0x7fffffff) {
- r = add_code_range_to_buf(&(cc->mbuf), env, prev, 0x7fffffff);
- if (r != 0) return r;
- }
- }
-
- return 0;
-}
-
-static int
-add_ctype_to_cc(CClassNode* cc, int ctype, int is_not, ScanEnv* env)
-{
- int c, r;
- const OnigCodePoint *ranges;
- OnigCodePoint sb_out;
- OnigEncoding enc = env->enc;
-
- switch (ctype) {
- case ONIGENC_CTYPE_D:
- case ONIGENC_CTYPE_S:
- case ONIGENC_CTYPE_W:
- ctype ^= ONIGENC_CTYPE_SPECIAL_MASK;
- if (is_not != 0) {
- for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
- if (! ONIGENC_IS_ASCII_CODE_CTYPE((OnigCodePoint )c, ctype))
- BITSET_SET_BIT_CHKDUP(cc->bs, c);
- }
- ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
- }
- else {
- for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
- if (ONIGENC_IS_ASCII_CODE_CTYPE((OnigCodePoint )c, ctype))
- BITSET_SET_BIT_CHKDUP(cc->bs, c);
- }
- }
- return 0;
- break;
- }
-
- r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);
- if (r == 0) {
- return add_ctype_to_cc_by_range(cc, ctype, is_not, env, sb_out, ranges);
- }
- else if (r != ONIG_NO_SUPPORT_CONFIG) {
- return r;
- }
-
- r = 0;
- switch (ctype) {
- case ONIGENC_CTYPE_ALPHA:
- case ONIGENC_CTYPE_BLANK:
- case ONIGENC_CTYPE_CNTRL:
- case ONIGENC_CTYPE_DIGIT:
- case ONIGENC_CTYPE_LOWER:
- case ONIGENC_CTYPE_PUNCT:
- case ONIGENC_CTYPE_SPACE:
- case ONIGENC_CTYPE_UPPER:
- case ONIGENC_CTYPE_XDIGIT:
- case ONIGENC_CTYPE_ASCII:
- case ONIGENC_CTYPE_ALNUM:
- if (is_not != 0) {
- for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
- if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
- BITSET_SET_BIT_CHKDUP(cc->bs, c);
- }
- ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
- }
- else {
- for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
- if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
- BITSET_SET_BIT_CHKDUP(cc->bs, c);
- }
- }
- break;
-
- case ONIGENC_CTYPE_GRAPH:
- case ONIGENC_CTYPE_PRINT:
- if (is_not != 0) {
- for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
- if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
- BITSET_SET_BIT_CHKDUP(cc->bs, c);
- }
- }
- else {
- for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
- if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
- BITSET_SET_BIT_CHKDUP(cc->bs, c);
- }
- ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
- }
- break;
-
- case ONIGENC_CTYPE_WORD:
- if (is_not == 0) {
- for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
- if (IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT_CHKDUP(cc->bs, c);
- }
- ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
- }
- else {
- for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
- if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* check invalid code point */
- && ! ONIGENC_IS_CODE_WORD(enc, c))
- BITSET_SET_BIT_CHKDUP(cc->bs, c);
- }
- }
- break;
-
- default:
- return ONIGERR_PARSER_BUG;
- break;
- }
-
- return r;
-}
-
-static int
-parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
-{
-#define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20
-#define POSIX_BRACKET_NAME_MIN_LEN 4
-
- static const PosixBracketEntryType PBS[] = {
- { (UChar* )"alnum", ONIGENC_CTYPE_ALNUM, 5 },
- { (UChar* )"alpha", ONIGENC_CTYPE_ALPHA, 5 },
- { (UChar* )"blank", ONIGENC_CTYPE_BLANK, 5 },
- { (UChar* )"cntrl", ONIGENC_CTYPE_CNTRL, 5 },
- { (UChar* )"digit", ONIGENC_CTYPE_DIGIT, 5 },
- { (UChar* )"graph", ONIGENC_CTYPE_GRAPH, 5 },
- { (UChar* )"lower", ONIGENC_CTYPE_LOWER, 5 },
- { (UChar* )"print", ONIGENC_CTYPE_PRINT, 5 },
- { (UChar* )"punct", ONIGENC_CTYPE_PUNCT, 5 },
- { (UChar* )"space", ONIGENC_CTYPE_SPACE, 5 },
- { (UChar* )"upper", ONIGENC_CTYPE_UPPER, 5 },
- { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 },
- { (UChar* )"ascii", ONIGENC_CTYPE_ASCII, 5 },
- { (UChar* )"word", ONIGENC_CTYPE_WORD, 4 },
- { (UChar* )NULL, -1, 0 }
- };
-
- const PosixBracketEntryType *pb;
- int is_not, i, r;
- OnigCodePoint c;
- OnigEncoding enc = env->enc;
- UChar *p = *src;
- PFETCH_READY;
-
- if (PPEEK_IS('^')) {
- PINC;
- is_not = 1;
- }
- else
- is_not = 0;
-
- if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3)
- goto not_posix_bracket;
-
- for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
- if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {
- p = (UChar* )onigenc_step(enc, p, end, pb->len);
- if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)
- return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
-
- r = add_ctype_to_cc(cc, pb->ctype, is_not, env);
- if (r != 0) return r;
-
- PINC; PINC;
- *src = p;
- return 0;
- }
- }
-
- not_posix_bracket:
- c = 0;
- i = 0;
- while (!PEND && ((c = PPEEK) != ':') && c != ']') {
- PINC;
- if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break;
- }
- if (c == ':' && ! PEND) {
- PINC;
- if (! PEND) {
- PFETCH(c);
- if (c == ']')
- return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
- }
- }
-
- return 1; /* 1: is not POSIX bracket, but no error. */
-}
-
-static int
-fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)
-{
- int r;
- OnigCodePoint c;
- OnigEncoding enc = env->enc;
- UChar *prev, *start, *p = *src;
- PFETCH_READY;
-
- r = 0;
- start = prev = p;
-
- while (!PEND) {
- prev = p;
- PFETCH(c);
- if (c == '}') {
- r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev);
- if (r < 0) break;
-
- *src = p;
- return r;
- }
- else if (c == '(' || c == ')' || c == '{' || c == '|') {
- r = ONIGERR_INVALID_CHAR_PROPERTY_NAME;
- break;
- }
- }
-
- onig_scan_env_set_error_string(env, r, *src, prev);
- return r;
-}
-
-static int
-parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end,
- ScanEnv* env)
-{
- int r, ctype;
- CClassNode* cc;
-
- ctype = fetch_char_property_to_ctype(src, end, env);
- if (ctype < 0) return ctype;
-
- *np = node_new_cclass();
- CHECK_NULL_RETURN_MEMERR(*np);
- cc = NCCLASS(*np);
- r = add_ctype_to_cc(cc, ctype, 0, env);
- if (r != 0) return r;
- if (tok->u.prop.is_not != 0) NCCLASS_SET_NOT(cc);
-
- return 0;
-}
-
-
-enum CCSTATE {
- CCS_VALUE,
- CCS_RANGE,
- CCS_COMPLETE,
- CCS_START
-};
-
-enum CCVALTYPE {
- CCV_SB,
- CCV_CODE_POINT,
- CCV_CLASS
-};
-
-static int
-next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,
- enum CCSTATE* state, ScanEnv* env)
-{
- int r;
-
- if (*state == CCS_RANGE)
- return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE;
-
- if (*state == CCS_VALUE && *type != CCV_CLASS) {
- if (*type == CCV_SB)
- BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs));
- else if (*type == CCV_CODE_POINT) {
- r = add_code_range(&(cc->mbuf), env, *vs, *vs);
- if (r < 0) return r;
- }
- }
-
- *state = CCS_VALUE;
- *type = CCV_CLASS;
- return 0;
-}
-
-static int
-next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
- int* vs_israw, int v_israw,
- enum CCVALTYPE intype, enum CCVALTYPE* type,
- enum CCSTATE* state, ScanEnv* env)
-{
- int r;
-
- switch (*state) {
- case CCS_VALUE:
- if (*type == CCV_SB)
- BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs));
- else if (*type == CCV_CODE_POINT) {
- r = add_code_range(&(cc->mbuf), env, *vs, *vs);
- if (r < 0) return r;
- }
- break;
-
- case CCS_RANGE:
- if (intype == *type) {
- if (intype == CCV_SB) {
- if (*vs > 0xff || v > 0xff)
- return ONIGERR_INVALID_CODE_POINT_VALUE;
-
- if (*vs > v) {
- if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
- goto ccs_range_end;
- else
- return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
- }
- bitset_set_range(env, cc->bs, (int )*vs, (int )v);
- }
- else {
- r = add_code_range(&(cc->mbuf), env, *vs, v);
- if (r < 0) return r;
- }
- }
- else {
- if (*vs > v) {
- if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
- goto ccs_range_end;
- else
- return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
- }
- bitset_set_range(env, cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff));
- r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v);
- if (r < 0) return r;
- }
- ccs_range_end:
- *state = CCS_COMPLETE;
- break;
-
- case CCS_COMPLETE:
- case CCS_START:
- *state = CCS_VALUE;
- break;
-
- default:
- break;
- }
-
- *vs_israw = v_israw;
- *vs = v;
- *type = intype;
- return 0;
-}
-
-static int
-code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
- ScanEnv* env)
-{
- int in_esc;
- OnigCodePoint code;
- OnigEncoding enc = env->enc;
- UChar* p = from;
- PFETCH_READY;
-
- in_esc = 0;
- while (! PEND) {
- if (ignore_escaped && in_esc) {
- in_esc = 0;
- }
- else {
- PFETCH(code);
- if (code == c) return 1;
- if (code == MC_ESC(env->syntax)) in_esc = 1;
- }
- }
- return 0;
-}
-
-static int
-parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
- ScanEnv* env)
-{
- int r, neg, len, fetched, and_start;
- OnigCodePoint v, vs;
- UChar *p;
- Node* node;
- CClassNode *cc, *prev_cc;
- CClassNode work_cc;
-
- enum CCSTATE state;
- enum CCVALTYPE val_type, in_type;
- int val_israw, in_israw;
-
- prev_cc = (CClassNode* )NULL;
- *np = NULL_NODE;
- r = fetch_token_in_cc(tok, src, end, env);
- if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) {
- neg = 1;
- r = fetch_token_in_cc(tok, src, end, env);
- }
- else {
- neg = 0;
- }
-
- if (r < 0) return r;
- if (r == TK_CC_CLOSE) {
- if (! code_exist_check((OnigCodePoint )']',
- *src, env->pattern_end, 1, env))
- return ONIGERR_EMPTY_CHAR_CLASS;
-
- CC_ESC_WARN(env, (UChar* )"]");
- r = tok->type = TK_CHAR; /* allow []...] */
- }
-
- *np = node = node_new_cclass();
- CHECK_NULL_RETURN_MEMERR(node);
- cc = NCCLASS(node);
-
- and_start = 0;
- state = CCS_START;
- p = *src;
- while (r != TK_CC_CLOSE) {
- fetched = 0;
- switch (r) {
- case TK_CHAR:
- if ((tok->u.code >= SINGLE_BYTE_SIZE) ||
- (len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c)) > 1) {
- in_type = CCV_CODE_POINT;
- }
- else if (len < 0) {
- r = len;
- goto err;
- }
- else {
- sb_char:
- in_type = CCV_SB;
- }
- v = (OnigCodePoint )tok->u.c;
- in_israw = 0;
- goto val_entry2;
- break;
-
- case TK_RAW_BYTE:
- /* tok->base != 0 : octal or hexadec. */
- if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) {
- UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
- UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN;
- UChar* psave = p;
- int i, base = tok->base;
-
- buf[0] = tok->u.c;
- for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {
- r = fetch_token_in_cc(tok, &p, end, env);
- if (r < 0) goto err;
- if (r != TK_RAW_BYTE || tok->base != base) {
- fetched = 1;
- break;
- }
- buf[i] = tok->u.c;
- }
-
- if (i < ONIGENC_MBC_MINLEN(env->enc)) {
- r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
- goto err;
- }
-
- len = enclen(env->enc, buf, buf+i);
- if (i < len) {
- r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
- goto err;
- }
- else if (i > len) { /* fetch back */
- p = psave;
- for (i = 1; i < len; i++) {
- r = fetch_token_in_cc(tok, &p, end, env);
- }
- fetched = 0;
- }
-
- if (i == 1) {
- v = (OnigCodePoint )buf[0];
- goto raw_single;
- }
- else {
- v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe);
- in_type = CCV_CODE_POINT;
- }
- }
- else {
- v = (OnigCodePoint )tok->u.c;
- raw_single:
- in_type = CCV_SB;
- }
- in_israw = 1;
- goto val_entry2;
- break;
-
- case TK_CODE_POINT:
- v = tok->u.code;
- in_israw = 1;
- val_entry:
- len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);
- if (len < 0) {
- r = len;
- goto err;
- }
- in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT);
- val_entry2:
- r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type,
- &state, env);
- if (r != 0) goto err;
- break;
-
- case TK_POSIX_BRACKET_OPEN:
- r = parse_posix_bracket(cc, &p, end, env);
- if (r < 0) goto err;
- if (r == 1) { /* is not POSIX bracket */
- CC_ESC_WARN(env, (UChar* )"[");
- p = tok->backp;
- v = (OnigCodePoint )tok->u.c;
- in_israw = 0;
- goto val_entry;
- }
- goto next_class;
- break;
-
- case TK_CHAR_TYPE:
- r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.is_not, env);
- if (r != 0) return r;
-
- next_class:
- r = next_state_class(cc, &vs, &val_type, &state, env);
- if (r != 0) goto err;
- break;
-
- case TK_CHAR_PROPERTY:
- {
- int ctype;
-
- ctype = fetch_char_property_to_ctype(&p, end, env);
- if (ctype < 0) return ctype;
- r = add_ctype_to_cc(cc, ctype, tok->u.prop.is_not, env);
- if (r != 0) return r;
- goto next_class;
- }
- break;
-
- case TK_CC_RANGE:
- if (state == CCS_VALUE) {
- r = fetch_token_in_cc(tok, &p, end, env);
- if (r < 0) goto err;
- fetched = 1;
- if (r == TK_CC_CLOSE) { /* allow [x-] */
- range_end_val:
- v = (OnigCodePoint )'-';
- in_israw = 0;
- goto val_entry;
- }
- else if (r == TK_CC_AND) {
- CC_ESC_WARN(env, (UChar* )"-");
- goto range_end_val;
- }
- state = CCS_RANGE;
- }
- else if (state == CCS_START) {
- /* [-xa] is allowed */
- v = (OnigCodePoint )tok->u.c;
- in_israw = 0;
-
- r = fetch_token_in_cc(tok, &p, end, env);
- if (r < 0) goto err;
- fetched = 1;
- /* [--x] or [a&&-x] is warned. */
- if (r == TK_CC_RANGE || and_start != 0)
- CC_ESC_WARN(env, (UChar* )"-");
-
- goto val_entry;
- }
- else if (state == CCS_RANGE) {
- CC_ESC_WARN(env, (UChar* )"-");
- goto sb_char; /* [!--x] is allowed */
- }
- else { /* CCS_COMPLETE */
- r = fetch_token_in_cc(tok, &p, end, env);
- if (r < 0) goto err;
- fetched = 1;
- if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */
- else if (r == TK_CC_AND) {
- CC_ESC_WARN(env, (UChar* )"-");
- goto range_end_val;
- }
-
- if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {
- CC_ESC_WARN(env, (UChar* )"-");
- goto sb_char; /* [0-9-a] is allowed as [0-9\-a] */
- }
- r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
- goto err;
- }
- break;
-
- case TK_CC_CC_OPEN: /* [ */
- {
- Node *anode;
- CClassNode* acc;
-
- r = parse_char_class(&anode, tok, &p, end, env);
- if (r == 0) {
- acc = NCCLASS(anode);
- r = or_cclass(cc, acc, env);
- }
- onig_node_free(anode);
- if (r != 0) goto err;
- }
- break;
-
- case TK_CC_AND: /* && */
- {
- if (state == CCS_VALUE) {
- r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
- &val_type, &state, env);
- if (r != 0) goto err;
- }
- /* initialize local variables */
- and_start = 1;
- state = CCS_START;
-
- if (IS_NOT_NULL(prev_cc)) {
- r = and_cclass(prev_cc, cc, env);
- if (r != 0) goto err;
- bbuf_free(cc->mbuf);
- }
- else {
- prev_cc = cc;
- cc = &work_cc;
- }
- initialize_cclass(cc);
- }
- break;
-
- case TK_EOT:
- r = ONIGERR_PREMATURE_END_OF_CHAR_CLASS;
- goto err;
- break;
- default:
- r = ONIGERR_PARSER_BUG;
- goto err;
- break;
- }
-
- if (fetched)
- r = tok->type;
- else {
- r = fetch_token_in_cc(tok, &p, end, env);
- if (r < 0) goto err;
- }
- }
-
- if (state == CCS_VALUE) {
- r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
- &val_type, &state, env);
- if (r != 0) goto err;
- }
-
- if (IS_NOT_NULL(prev_cc)) {
- r = and_cclass(prev_cc, cc, env);
- if (r != 0) goto err;
- bbuf_free(cc->mbuf);
- cc = prev_cc;
- }
-
- if (neg != 0)
- NCCLASS_SET_NOT(cc);
- else
- NCCLASS_CLEAR_NOT(cc);
- if (IS_NCCLASS_NOT(cc) &&
- IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {
- int is_empty;
-
- is_empty = (IS_NULL(cc->mbuf) ? 1 : 0);
- if (is_empty != 0)
- BITSET_IS_EMPTY(cc->bs, is_empty);
-
- if (is_empty == 0) {
-#define NEWLINE_CODE 0x0a
-
- if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) {
- if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)
- BITSET_SET_BIT_CHKDUP(cc->bs, NEWLINE_CODE);
- else
- add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE);
- }
- }
- }
- *src = p;
- return 0;
-
- err:
- if (cc != NCCLASS(*np))
- bbuf_free(cc->mbuf);
- return r;
-}
-
-static int parse_subexp(Node** top, OnigToken* tok, int term,
- UChar** src, UChar* end, ScanEnv* env);
-
-static int
-parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
- ScanEnv* env)
-{
- int r, num;
- Node *target;
- OnigOptionType option;
- OnigCodePoint c;
- OnigEncoding enc = env->enc;
-
-#ifdef USE_NAMED_GROUP
- int list_capture;
-#endif
-
- UChar* p = *src;
- PFETCH_READY;
-
- *np = NULL;
- if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
-
- option = env->option;
- if (PPEEK_IS('?') &&
- IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
- PINC;
- if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
-
- PFETCH(c);
- switch (c) {
- case ':': /* (?:...) grouping only */
- group:
- r = fetch_token(tok, &p, end, env);
- if (r < 0) return r;
- r = parse_subexp(np, tok, term, &p, end, env);
- if (r < 0) return r;
- *src = p;
- return 1; /* group */
- break;
-
- case '=':
- *np = onig_node_new_anchor(ANCHOR_PREC_READ);
- break;
- case '!': /* preceding read */
- *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT);
- break;
- case '>': /* (?>...) stop backtrack */
- *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
- break;
-
-#ifdef USE_NAMED_GROUP
- case '\'':
- if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
- goto named_group1;
- }
- else
- return ONIGERR_UNDEFINED_GROUP_OPTION;
- break;
-#endif
-
- case '<': /* look behind (?<=...), (?<!...) */
- PFETCH(c);
- if (c == '=')
- *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND);
- else if (c == '!')
- *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT);
-#ifdef USE_NAMED_GROUP
- else {
- if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
- UChar *name;
- UChar *name_end;
-
- PUNFETCH;
- c = '<';
-
- named_group1:
- list_capture = 0;
-
- named_group2:
- name = p;
- r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num, 0);
- if (r < 0) return r;
-
- num = scan_env_add_mem_entry(env);
- if (num < 0) return num;
- if (list_capture != 0 && num >= (int )BIT_STATUS_BITS_NUM)
- return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
-
- r = name_add(env->reg, name, name_end, num, env);
- if (r != 0) return r;
- *np = node_new_enclose_memory(env->option, 1);
- CHECK_NULL_RETURN_MEMERR(*np);
- NENCLOSE(*np)->regnum = num;
- if (list_capture != 0)
- BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
- env->num_named++;
- }
- else {
- return ONIGERR_UNDEFINED_GROUP_OPTION;
- }
- }
-#else
- else {
- return ONIGERR_UNDEFINED_GROUP_OPTION;
- }
-#endif
- break;
-
- case '@':
- if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) {
-#ifdef USE_NAMED_GROUP
- if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
- PFETCH(c);
- if (c == '<' || c == '\'') {
- list_capture = 1;
- goto named_group2; /* (?@<name>...) */
- }
- PUNFETCH;
- }
-#endif
- *np = node_new_enclose_memory(env->option, 0);
- CHECK_NULL_RETURN_MEMERR(*np);
- num = scan_env_add_mem_entry(env);
- if (num < 0) {
- onig_node_free(*np);
- return num;
- }
- else if (num >= (int )BIT_STATUS_BITS_NUM) {
- onig_node_free(*np);
- return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
- }
- NENCLOSE(*np)->regnum = num;
- BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
- }
- else {
- return ONIGERR_UNDEFINED_GROUP_OPTION;
- }
- break;
-
- case '-': case 'i': case 'm': case 's': case 'x':
- {
- int neg = 0;
-
- while (1) {
- switch (c) {
- case ':':
- case ')':
- break;
-
- case '-': neg = 1; break;
- case 'x': ONOFF(option, ONIG_OPTION_EXTEND, neg); break;
- case 'i': ONOFF(option, ONIG_OPTION_IGNORECASE, neg); break;
- case 's':
- if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
- ONOFF(option, ONIG_OPTION_MULTILINE, neg);
- }
- else
- return ONIGERR_UNDEFINED_GROUP_OPTION;
- break;
-
- case 'm':
- if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
- ONOFF(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0));
- }
- else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {
- ONOFF(option, ONIG_OPTION_MULTILINE, neg);
- }
- else
- return ONIGERR_UNDEFINED_GROUP_OPTION;
- break;
- default:
- return ONIGERR_UNDEFINED_GROUP_OPTION;
- }
-
- if (c == ')') {
- *np = node_new_option(option);
- CHECK_NULL_RETURN_MEMERR(*np);
- *src = p;
- return 2; /* option only */
- }
- else if (c == ':') {
- OnigOptionType prev = env->option;
-
- env->option = option;
- r = fetch_token(tok, &p, end, env);
- if (r < 0) return r;
- r = parse_subexp(&target, tok, term, &p, end, env);
- env->option = prev;
- if (r < 0) return r;
- *np = node_new_option(option);
- CHECK_NULL_RETURN_MEMERR(*np);
- NENCLOSE(*np)->target = target;
- *src = p;
- return 0;
- }
-
- if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
- PFETCH(c);
- }
- }
- break;
-
- default:
- return ONIGERR_UNDEFINED_GROUP_OPTION;
- }
- }
- else {
- if (ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_DONT_CAPTURE_GROUP))
- goto group;
-
- *np = node_new_enclose_memory(env->option, 0);
- CHECK_NULL_RETURN_MEMERR(*np);
- num = scan_env_add_mem_entry(env);
- if (num < 0) return num;
- NENCLOSE(*np)->regnum = num;
- }
-
- CHECK_NULL_RETURN_MEMERR(*np);
- r = fetch_token(tok, &p, end, env);
- if (r < 0) return r;
- r = parse_subexp(&target, tok, term, &p, end, env);
- if (r < 0) {
- onig_node_free(target);
- return r;
- }
-
- if (NTYPE(*np) == NT_ANCHOR)
- NANCHOR(*np)->target = target;
- else {
- NENCLOSE(*np)->target = target;
- if (NENCLOSE(*np)->type == ENCLOSE_MEMORY) {
- /* Don't move this to previous of parse_subexp() */
- r = scan_env_set_mem_node(env, NENCLOSE(*np)->regnum, *np);
- if (r != 0) return r;
- }
- }
-
- *src = p;
- return 0;
-}
-
-static const char* const PopularQStr[] = {
- "?", "*", "+", "??", "*?", "+?"
-};
-
-static const char* const ReduceQStr[] = {
- "", "", "*", "*?", "??", "+ and ??", "+? and ?"
-};
-
-static int
-set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
-{
- QtfrNode* qn;
-
- qn = NQTFR(qnode);
- if (qn->lower == 1 && qn->upper == 1) {
- return 1;
- }
-
- switch (NTYPE(target)) {
- case NT_STR:
- if (! group) {
- StrNode* sn = NSTR(target);
- if (str_node_can_be_split(sn, env->enc)) {
- Node* n = str_node_split_last_char(sn, env->enc);
- if (IS_NOT_NULL(n)) {
- qn->target = n;
- return 2;
- }
- }
- }
- break;
-
- case NT_QTFR:
- { /* check redundant double repeat. */
- /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */
- QtfrNode* qnt = NQTFR(target);
- int nestq_num = popular_quantifier_num(qn);
- int targetq_num = popular_quantifier_num(qnt);
-
-#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
- if (!IS_QUANTIFIER_BY_NUMBER(qn) && !IS_QUANTIFIER_BY_NUMBER(qnt) &&
- IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {
- UChar buf[WARN_BUFSIZE];
-
- switch(ReduceTypeTable[targetq_num][nestq_num]) {
- case RQ_ASIS:
- break;
-
- case RQ_DEL:
- if (onig_verb_warn != onig_null_warn) {
- onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
- env->pattern, env->pattern_end,
- (UChar* )"redundant nested repeat operator");
- (*onig_verb_warn)((char* )buf);
- }
- goto warn_exit;
- break;
-
- default:
- if (onig_verb_warn != onig_null_warn) {
- onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
- env->pattern, env->pattern_end,
- (UChar* )"nested repeat operator %s and %s was replaced with '%s'",
- PopularQStr[targetq_num], PopularQStr[nestq_num],
- ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
- (*onig_verb_warn)((char* )buf);
- }
- goto warn_exit;
- break;
- }
- }
-
- warn_exit:
-#endif
- if (targetq_num >= 0) {
- if (nestq_num >= 0) {
- onig_reduce_nested_quantifier(qnode, target);
- goto q_exit;
- }
- else if (targetq_num == 1 || targetq_num == 2) { /* * or + */
- /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */
- if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) {
- qn->upper = (qn->lower == 0 ? 1 : qn->lower);
- }
- }
- }
- }
- break;
-
- default:
- break;
- }
-
- qn->target = target;
- q_exit:
- return 0;
-}
-
-
-#ifdef USE_SHARED_CCLASS_TABLE
-
-#define THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS 8
-
-/* for ctype node hash table */
-
-typedef struct {
- OnigEncoding enc;
- int is_not;
- int type;
-} type_cclass_key;
-
-static int type_cclass_cmp(type_cclass_key* x, type_cclass_key* y)
-{
- if (x->type != y->type) return 1;
- if (x->enc != y->enc) return 1;
- if (x->is_not != y->is_not) return 1;
- return 0;
-}
-
-static st_index_t type_cclass_hash(type_cclass_key* key)
-{
- int i, val;
- UChar *p;
-
- val = 0;
-
- p = (UChar* )&(key->enc);
- for (i = 0; i < (int )sizeof(key->enc); i++) {
- val = val * 997 + (int )*p++;
- }
-
- p = (UChar* )(&key->type);
- for (i = 0; i < (int )sizeof(key->type); i++) {
- val = val * 997 + (int )*p++;
- }
-
- val += key->is_not;
- return val + (val >> 5);
-}
-
-static const struct st_hash_type type_type_cclass_hash = {
- type_cclass_cmp,
- type_cclass_hash,
-};
-
-static st_table* OnigTypeCClassTable;
-
-
-static enum st_retval
-i_free_shared_class(type_cclass_key* key, Node* node, void* arg ARG_UNUSED)
-{
- if (IS_NOT_NULL(node)) {
- CClassNode* cc = NCCLASS(node);
- if (IS_NOT_NULL(cc->mbuf)) xfree(cc->mbuf);
- xfree(node);
- }
-
- if (IS_NOT_NULL(key)) xfree(key);
- return ST_DELETE;
-}
-
-extern int
-onig_free_shared_cclass_table(void)
-{
- THREAD_ATOMIC_START;
- if (IS_NOT_NULL(OnigTypeCClassTable)) {
- onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0);
- onig_st_free_table(OnigTypeCClassTable);
- OnigTypeCClassTable = NULL;
- }
- THREAD_ATOMIC_END;
-
- return 0;
-}
-
-#endif /* USE_SHARED_CCLASS_TABLE */
-
-
-#ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
-static int
-clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)
-{
- BBuf *tbuf;
- int r;
-
- if (IS_NCCLASS_NOT(cc)) {
- bitset_invert(cc->bs);
-
- if (! ONIGENC_IS_SINGLEBYTE(enc)) {
- r = not_code_range_buf(enc, cc->mbuf, &tbuf);
- if (r != 0) return r;
-
- bbuf_free(cc->mbuf);
- cc->mbuf = tbuf;
- }
-
- NCCLASS_CLEAR_NOT(cc);
- }
-
- return 0;
-}
-#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
-
-typedef struct {
- ScanEnv* env;
- CClassNode* cc;
- Node* alt_root;
- Node** ptail;
-} IApplyCaseFoldArg;
-
-static int
-i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[],
- int to_len, void* arg)
-{
- IApplyCaseFoldArg* iarg;
- ScanEnv* env;
- CClassNode* cc;
- BitSetRef bs;
-
- iarg = (IApplyCaseFoldArg* )arg;
- env = iarg->env;
- cc = iarg->cc;
- bs = cc->bs;
-
- if (to_len == 1) {
- int is_in = onig_is_code_in_cc(env->enc, from, cc);
-#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
- if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||
- (is_in == 0 && IS_NCCLASS_NOT(cc))) {
- if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
- add_code_range0(&(cc->mbuf), env, *to, *to, 0);
- }
- else {
- BITSET_SET_BIT(bs, *to);
- }
- }
-#else
- if (is_in != 0) {
- if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
- if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);
- add_code_range0(&(cc->mbuf), env, *to, *to, 0);
- }
- else {
- if (IS_NCCLASS_NOT(cc)) {
- BITSET_CLEAR_BIT(bs, *to);
- }
- else
- BITSET_SET_BIT(bs, *to);
- }
- }
-#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
- }
- else {
- int r, i, len;
- UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
- Node *snode = NULL_NODE;
-
- if (onig_is_code_in_cc(env->enc, from, cc)
-#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
- && !IS_NCCLASS_NOT(cc)
-#endif
- ) {
- for (i = 0; i < to_len; i++) {
- len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf);
- if (i == 0) {
- snode = onig_node_new_str(buf, buf + len);
- CHECK_NULL_RETURN_MEMERR(snode);
-
- /* char-class expanded multi-char only
- compare with string folded at match time. */
- NSTRING_SET_AMBIG(snode);
- }
- else {
- r = onig_node_str_cat(snode, buf, buf + len);
- if (r < 0) {
- onig_node_free(snode);
- return r;
- }
- }
- }
-
- *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE);
- CHECK_NULL_RETURN_MEMERR(*(iarg->ptail));
- iarg->ptail = &(NCDR((*(iarg->ptail))));
- }
- }
-
- return 0;
-}
-
-static int
-parse_exp(Node** np, OnigToken* tok, int term,
- UChar** src, UChar* end, ScanEnv* env)
-{
- int r, len, group = 0;
- Node* qn;
- Node** targetp;
-
- *np = NULL;
- if (tok->type == (enum TokenSyms )term)
- goto end_of_token;
-
- switch (tok->type) {
- case TK_ALT:
- case TK_EOT:
- end_of_token:
- *np = node_new_empty();
- return tok->type;
-
- case TK_SUBEXP_OPEN:
- r = parse_enclose(np, tok, TK_SUBEXP_CLOSE, src, end, env);
- if (r < 0) return r;
- if (r == 1) group = 1;
- else if (r == 2) { /* option only */
- Node* target;
- OnigOptionType prev = env->option;
-
- env->option = NENCLOSE(*np)->option;
- r = fetch_token(tok, src, end, env);
- if (r < 0) return r;
- r = parse_subexp(&target, tok, term, src, end, env);
- env->option = prev;
- if (r < 0) {
- onig_node_free(target);
- return r;
- }
- NENCLOSE(*np)->target = target;
- return tok->type;
- }
- break;
-
- case TK_SUBEXP_CLOSE:
- if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP))
- return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS;
-
- if (tok->escaped) goto tk_raw_byte;
- else goto tk_byte;
- break;
-
- case TK_STRING:
- tk_byte:
- {
- *np = node_new_str(tok->backp, *src);
- CHECK_NULL_RETURN_MEMERR(*np);
-
- while (1) {
- r = fetch_token(tok, src, end, env);
- if (r < 0) return r;
- if (r != TK_STRING) break;
-
- r = onig_node_str_cat(*np, tok->backp, *src);
- if (r < 0) return r;
- }
-
- string_end:
- targetp = np;
- goto repeat;
- }
- break;
-
- case TK_RAW_BYTE:
- tk_raw_byte:
- {
- *np = node_new_str_raw_char((UChar )tok->u.c);
- CHECK_NULL_RETURN_MEMERR(*np);
- len = 1;
- while (1) {
- if (len >= ONIGENC_MBC_MINLEN(env->enc)) {
- if (len == enclen(env->enc, NSTR(*np)->s, NSTR(*np)->end)) {
- r = fetch_token(tok, src, end, env);
- NSTRING_CLEAR_RAW(*np);
- goto string_end;
- }
- }
-
- r = fetch_token(tok, src, end, env);
- if (r < 0) return r;
- if (r != TK_RAW_BYTE) {
- /* Don't use this, it is wrong for little endian encodings. */
- return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
- }
-
- r = node_str_cat_char(*np, (UChar )tok->u.c);
- if (r < 0) return r;
-
- len++;
- }
- }
- break;
-
- case TK_CODE_POINT:
- {
- UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
- int num = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf);
- if (num < 0) return num;
-#ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
- *np = node_new_str_raw(buf, buf + num);
-#else
- *np = node_new_str(buf, buf + num);
-#endif
- CHECK_NULL_RETURN_MEMERR(*np);
- }
- break;
-
- case TK_QUOTE_OPEN:
- {
- OnigCodePoint end_op[2];
- UChar *qstart, *qend, *nextp;
-
- end_op[0] = (OnigCodePoint )MC_ESC(env->syntax);
- end_op[1] = (OnigCodePoint )'E';
- qstart = *src;
- qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc);
- if (IS_NULL(qend)) {
- nextp = qend = end;
- }
- *np = node_new_str(qstart, qend);
- CHECK_NULL_RETURN_MEMERR(*np);
- *src = nextp;
- }
- break;
-
- case TK_CHAR_TYPE:
- {
- switch (tok->u.prop.ctype) {
- case ONIGENC_CTYPE_D:
- case ONIGENC_CTYPE_S:
- case ONIGENC_CTYPE_W:
- {
- CClassNode* cc;
- *np = node_new_cclass();
- CHECK_NULL_RETURN_MEMERR(*np);
- cc = NCCLASS(*np);
- add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env);
- if (tok->u.prop.is_not != 0) NCCLASS_SET_NOT(cc);
- }
- break;
-
- case ONIGENC_CTYPE_WORD:
- *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.is_not);
- CHECK_NULL_RETURN_MEMERR(*np);
- break;
-
- case ONIGENC_CTYPE_SPACE:
- case ONIGENC_CTYPE_DIGIT:
- case ONIGENC_CTYPE_XDIGIT:
- {
- CClassNode* cc;
-
-#ifdef USE_SHARED_CCLASS_TABLE
- const OnigCodePoint *mbr;
- OnigCodePoint sb_out;
-
- r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, tok->u.prop.ctype,
- &sb_out, &mbr);
- if (r == 0 &&
- ONIGENC_CODE_RANGE_NUM(mbr)
- >= THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS) {
- type_cclass_key key;
- type_cclass_key* new_key;
-
- key.enc = env->enc;
- key.is_not = tok->u.prop.is_not;
- key.type = tok->u.prop.ctype;
-
- THREAD_ATOMIC_START;
-
- if (IS_NULL(OnigTypeCClassTable)) {
- OnigTypeCClassTable
- = onig_st_init_table_with_size(&type_type_cclass_hash, 10);
- if (IS_NULL(OnigTypeCClassTable)) {
- THREAD_ATOMIC_END;
- return ONIGERR_MEMORY;
- }
- }
- else {
- if (onig_st_lookup(OnigTypeCClassTable, (st_data_t )&key,
- (st_data_t* )np)) {
- THREAD_ATOMIC_END;
- break;
- }
- }
-
- *np = node_new_cclass_by_codepoint_range(tok->u.prop.is_not,
- sb_out, mbr);
- if (IS_NULL(*np)) {
- THREAD_ATOMIC_END;
- return ONIGERR_MEMORY;
- }
-
- cc = NCCLASS(*np);
- NCCLASS_SET_SHARE(cc);
- new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key));
- xmemcpy(new_key, &key, sizeof(type_cclass_key));
- onig_st_add_direct(OnigTypeCClassTable, (st_data_t )new_key,
- (st_data_t )*np);
-
- THREAD_ATOMIC_END;
- }
- else {
-#endif
- *np = node_new_cclass();
- CHECK_NULL_RETURN_MEMERR(*np);
- cc = NCCLASS(*np);
- add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env);
- if (tok->u.prop.is_not != 0) NCCLASS_SET_NOT(cc);
-#ifdef USE_SHARED_CCLASS_TABLE
- }
-#endif
- }
- break;
-
- default:
- return ONIGERR_PARSER_BUG;
- break;
- }
- }
- break;
-
- case TK_CHAR_PROPERTY:
- r = parse_char_property(np, tok, src, end, env);
- if (r != 0) return r;
- break;
-
- case TK_CC_OPEN:
- {
- CClassNode* cc;
-
- r = parse_char_class(np, tok, src, end, env);
- if (r != 0) return r;
-
- cc = NCCLASS(*np);
- if (IS_IGNORECASE(env->option)) {
- IApplyCaseFoldArg iarg;
-
- iarg.env = env;
- iarg.cc = cc;
- iarg.alt_root = NULL_NODE;
- iarg.ptail = &(iarg.alt_root);
-
- r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag,
- i_apply_case_fold, &iarg);
- if (r != 0) {
- onig_node_free(iarg.alt_root);
- return r;
- }
- if (IS_NOT_NULL(iarg.alt_root)) {
- Node* work = onig_node_new_alt(*np, iarg.alt_root);
- if (IS_NULL(work)) {
- onig_node_free(iarg.alt_root);
- return ONIGERR_MEMORY;
- }
- *np = work;
- }
- }
- }
- break;
-
- case TK_ANYCHAR:
- *np = node_new_anychar();
- CHECK_NULL_RETURN_MEMERR(*np);
- break;
-
- case TK_ANYCHAR_ANYTIME:
- *np = node_new_anychar();
- CHECK_NULL_RETURN_MEMERR(*np);
- qn = node_new_quantifier(0, REPEAT_INFINITE, 0);
- CHECK_NULL_RETURN_MEMERR(qn);
- NQTFR(qn)->target = *np;
- *np = qn;
- break;
-
- case TK_BACKREF:
- len = tok->u.backref.num;
- *np = node_new_backref(len,
- (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),
- tok->u.backref.by_name,
-#ifdef USE_BACKREF_WITH_LEVEL
- tok->u.backref.exist_level,
- tok->u.backref.level,
-#endif
- env);
- CHECK_NULL_RETURN_MEMERR(*np);
- break;
-
-#ifdef USE_SUBEXP_CALL
- case TK_CALL:
- {
- int gnum = tok->u.call.gnum;
-
- if (gnum < 0) {
- gnum = BACKREF_REL_TO_ABS(gnum, env);
- if (gnum <= 0)
- return ONIGERR_INVALID_BACKREF;
- }
- *np = node_new_call(tok->u.call.name, tok->u.call.name_end, gnum);
- CHECK_NULL_RETURN_MEMERR(*np);
- env->num_call++;
- }
- break;
-#endif
-
- case TK_ANCHOR:
- *np = onig_node_new_anchor(tok->u.anchor);
- break;
-
- case TK_OP_REPEAT:
- case TK_INTERVAL:
- if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) {
- if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS))
- return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED;
- else
- *np = node_new_empty();
- }
- else {
- goto tk_byte;
- }
- break;
-
- default:
- return ONIGERR_PARSER_BUG;
- break;
- }
-
- {
- targetp = np;
-
- re_entry:
- r = fetch_token(tok, src, end, env);
- if (r < 0) return r;
-
- repeat:
- if (r == TK_OP_REPEAT || r == TK_INTERVAL) {
- if (is_invalid_quantifier_target(*targetp))
- return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;
-
- qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,
- (r == TK_INTERVAL ? 1 : 0));
- CHECK_NULL_RETURN_MEMERR(qn);
- NQTFR(qn)->greedy = tok->u.repeat.greedy;
- r = set_quantifier(qn, *targetp, group, env);
- if (r < 0) {
- onig_node_free(qn);
- return r;
- }
-
- if (tok->u.repeat.possessive != 0) {
- Node* en;
- en = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
- if (IS_NULL(en)) {
- onig_node_free(qn);
- return ONIGERR_MEMORY;
- }
- NENCLOSE(en)->target = qn;
- qn = en;
- }
-
- if (r == 0) {
- *targetp = qn;
- }
- else if (r == 1) {
- onig_node_free(qn);
- }
- else if (r == 2) { /* split case: /abc+/ */
- Node *tmp;
-
- *targetp = node_new_list(*targetp, NULL);
- if (IS_NULL(*targetp)) {
- onig_node_free(qn);
- return ONIGERR_MEMORY;
- }
- tmp = NCDR(*targetp) = node_new_list(qn, NULL);
- if (IS_NULL(tmp)) {
- onig_node_free(qn);
- return ONIGERR_MEMORY;
- }
- targetp = &(NCAR(tmp));
- }
- goto re_entry;
- }
- }
-
- return r;
-}
-
-static int
-parse_branch(Node** top, OnigToken* tok, int term,
- UChar** src, UChar* end, ScanEnv* env)
-{
- int r;
- Node *node, **headp;
-
- *top = NULL;
- r = parse_exp(&node, tok, term, src, end, env);
- if (r < 0) {
- onig_node_free(node);
- return r;
- }
-
- if (r == TK_EOT || r == term || r == TK_ALT) {
- *top = node;
- }
- else {
- *top = node_new_list(node, NULL);
- headp = &(NCDR(*top));
- while (r != TK_EOT && r != term && r != TK_ALT) {
- r = parse_exp(&node, tok, term, src, end, env);
- if (r < 0) {
- onig_node_free(node);
- return r;
- }
-
- if (NTYPE(node) == NT_LIST) {
- *headp = node;
- while (IS_NOT_NULL(NCDR(node))) node = NCDR(node);
- headp = &(NCDR(node));
- }
- else {
- *headp = node_new_list(node, NULL);
- headp = &(NCDR(*headp));
- }
- }
- }
-
- return r;
-}
-
-/* term_tok: TK_EOT or TK_SUBEXP_CLOSE */
-static int
-parse_subexp(Node** top, OnigToken* tok, int term,
- UChar** src, UChar* end, ScanEnv* env)
-{
- int r;
- Node *node, **headp;
-
- *top = NULL;
- r = parse_branch(&node, tok, term, src, end, env);
- if (r < 0) {
- onig_node_free(node);
- return r;
- }
-
- if (r == term) {
- *top = node;
- }
- else if (r == TK_ALT) {
- *top = onig_node_new_alt(node, NULL);
- headp = &(NCDR(*top));
- while (r == TK_ALT) {
- r = fetch_token(tok, src, end, env);
- if (r < 0) return r;
- r = parse_branch(&node, tok, term, src, end, env);
- if (r < 0) {
- onig_node_free(node);
- return r;
- }
-
- *headp = onig_node_new_alt(node, NULL);
- headp = &(NCDR(*headp));
- }
-
- if (tok->type != (enum TokenSyms )term)
- goto err;
- }
- else {
- onig_node_free(node);
- err:
- if (term == TK_SUBEXP_CLOSE)
- return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
- else
- return ONIGERR_PARSER_BUG;
- }
-
- return r;
-}
-
-static int
-parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)
-{
- int r;
- OnigToken tok;
-
- r = fetch_token(&tok, src, end, env);
- if (r < 0) return r;
- r = parse_subexp(top, &tok, TK_EOT, src, end, env);
- if (r < 0) return r;
- return 0;
-}
-
-extern int
-onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end,
- regex_t* reg, ScanEnv* env)
-{
- int r;
- UChar* p;
-
-#ifdef USE_NAMED_GROUP
- names_clear(reg);
-#endif
-
- scan_env_clear(env);
- env->option = reg->options;
- env->case_fold_flag = reg->case_fold_flag;
- env->enc = reg->enc;
- env->syntax = reg->syntax;
- env->pattern = (UChar* )pattern;
- env->pattern_end = (UChar* )end;
- env->reg = reg;
-
- *root = NULL;
- p = (UChar* )pattern;
- r = parse_regexp(root, &p, (UChar* )end, env);
- reg->num_mem = env->num_mem;
- return r;
-}
-
-extern void
-onig_scan_env_set_error_string(ScanEnv* env, int ecode ARG_UNUSED,
- UChar* arg, UChar* arg_end)
-{
- env->error = arg;
- env->error_end = arg_end;
-}
-#endif //ENABLE_REGEXP
diff --git a/src/regparse.h b/src/regparse.h
deleted file mode 100644
index 1f7855df8..000000000
--- a/src/regparse.h
+++ /dev/null
@@ -1,354 +0,0 @@
-#ifndef ONIGURUMA_REGPARSE_H
-#define ONIGURUMA_REGPARSE_H
-/**********************************************************************
- regparse.h - Oniguruma (regular expression library)
-**********************************************************************/
-/*-
- * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "regint.h"
-
-/* node type */
-#define NT_STR 0
-#define NT_CCLASS 1
-#define NT_CTYPE 2
-#define NT_CANY 3
-#define NT_BREF 4
-#define NT_QTFR 5
-#define NT_ENCLOSE 6
-#define NT_ANCHOR 7
-#define NT_LIST 8
-#define NT_ALT 9
-#define NT_CALL 10
-
-/* node type bit */
-#define NTYPE2BIT(type) (1<<(type))
-
-#define BIT_NT_STR NTYPE2BIT(NT_STR)
-#define BIT_NT_CCLASS NTYPE2BIT(NT_CCLASS)
-#define BIT_NT_CTYPE NTYPE2BIT(NT_CTYPE)
-#define BIT_NT_CANY NTYPE2BIT(NT_CANY)
-#define BIT_NT_BREF NTYPE2BIT(NT_BREF)
-#define BIT_NT_QTFR NTYPE2BIT(NT_QTFR)
-#define BIT_NT_ENCLOSE NTYPE2BIT(NT_ENCLOSE)
-#define BIT_NT_ANCHOR NTYPE2BIT(NT_ANCHOR)
-#define BIT_NT_LIST NTYPE2BIT(NT_LIST)
-#define BIT_NT_ALT NTYPE2BIT(NT_ALT)
-#define BIT_NT_CALL NTYPE2BIT(NT_CALL)
-
-#define IS_NODE_TYPE_SIMPLE(type) \
- ((NTYPE2BIT(type) & (BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE |\
- BIT_NT_CANY | BIT_NT_BREF)) != 0)
-
-#define NTYPE(node) ((node)->u.base.type)
-#define SET_NTYPE(node, ntype) (node)->u.base.type = (ntype)
-
-#define NSTR(node) (&((node)->u.str))
-#define NCCLASS(node) (&((node)->u.cclass))
-#define NCTYPE(node) (&((node)->u.ctype))
-#define NBREF(node) (&((node)->u.bref))
-#define NQTFR(node) (&((node)->u.qtfr))
-#define NENCLOSE(node) (&((node)->u.enclose))
-#define NANCHOR(node) (&((node)->u.anchor))
-#define NCONS(node) (&((node)->u.cons))
-#define NCALL(node) (&((node)->u.call))
-
-#define NCAR(node) (NCONS(node)->car)
-#define NCDR(node) (NCONS(node)->cdr)
-
-
-
-#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML)
-#define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)
-
-#define ENCLOSE_MEMORY (1<<0)
-#define ENCLOSE_OPTION (1<<1)
-#define ENCLOSE_STOP_BACKTRACK (1<<2)
-
-#define NODE_STR_MARGIN 16
-#define NODE_STR_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */
-#define NODE_BACKREFS_SIZE 6
-
-#define NSTR_RAW (1<<0) /* by backslashed number */
-#define NSTR_AMBIG (1<<1)
-#define NSTR_DONT_GET_OPT_INFO (1<<2)
-
-#define NSTRING_LEN(node) ((node)->u.str.end - (node)->u.str.s)
-#define NSTRING_SET_RAW(node) (node)->u.str.flag |= NSTR_RAW
-#define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NSTR_RAW
-#define NSTRING_SET_AMBIG(node) (node)->u.str.flag |= NSTR_AMBIG
-#define NSTRING_SET_DONT_GET_OPT_INFO(node) \
- (node)->u.str.flag |= NSTR_DONT_GET_OPT_INFO
-#define NSTRING_IS_RAW(node) (((node)->u.str.flag & NSTR_RAW) != 0)
-#define NSTRING_IS_AMBIG(node) (((node)->u.str.flag & NSTR_AMBIG) != 0)
-#define NSTRING_IS_DONT_GET_OPT_INFO(node) \
- (((node)->u.str.flag & NSTR_DONT_GET_OPT_INFO) != 0)
-
-#define BACKREFS_P(br) \
- (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static);
-
-#define NQ_TARGET_ISNOT_EMPTY 0
-#define NQ_TARGET_IS_EMPTY 1
-#define NQ_TARGET_IS_EMPTY_MEM 2
-#define NQ_TARGET_IS_EMPTY_REC 3
-
-/* status bits */
-#define NST_MIN_FIXED (1<<0)
-#define NST_MAX_FIXED (1<<1)
-#define NST_CLEN_FIXED (1<<2)
-#define NST_MARK1 (1<<3)
-#define NST_MARK2 (1<<4)
-#define NST_MEM_BACKREFED (1<<5)
-#define NST_STOP_BT_SIMPLE_REPEAT (1<<6)
-#define NST_RECURSION (1<<7)
-#define NST_CALLED (1<<8)
-#define NST_ADDR_FIXED (1<<9)
-#define NST_NAMED_GROUP (1<<10)
-#define NST_NAME_REF (1<<11)
-#define NST_IN_REPEAT (1<<12) /* STK_REPEAT is nested in stack. */
-#define NST_NEST_LEVEL (1<<13)
-#define NST_BY_NUMBER (1<<14) /* {n,m} */
-
-#define SET_ENCLOSE_STATUS(node,f) (node)->u.enclose.state |= (f)
-#define CLEAR_ENCLOSE_STATUS(node,f) (node)->u.enclose.state &= ~(f)
-
-#define IS_ENCLOSE_CALLED(en) (((en)->state & NST_CALLED) != 0)
-#define IS_ENCLOSE_ADDR_FIXED(en) (((en)->state & NST_ADDR_FIXED) != 0)
-#define IS_ENCLOSE_RECURSION(en) (((en)->state & NST_RECURSION) != 0)
-#define IS_ENCLOSE_MARK1(en) (((en)->state & NST_MARK1) != 0)
-#define IS_ENCLOSE_MARK2(en) (((en)->state & NST_MARK2) != 0)
-#define IS_ENCLOSE_MIN_FIXED(en) (((en)->state & NST_MIN_FIXED) != 0)
-#define IS_ENCLOSE_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0)
-#define IS_ENCLOSE_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0)
-#define IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(en) \
- (((en)->state & NST_STOP_BT_SIMPLE_REPEAT) != 0)
-#define IS_ENCLOSE_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0)
-
-#define SET_CALL_RECURSION(node) (node)->u.call.state |= NST_RECURSION
-#define IS_CALL_RECURSION(cn) (((cn)->state & NST_RECURSION) != 0)
-#define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0)
-#define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0)
-#define IS_BACKREF_NEST_LEVEL(bn) (((bn)->state & NST_NEST_LEVEL) != 0)
-#define IS_QUANTIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0)
-#define IS_QUANTIFIER_BY_NUMBER(qn) (((qn)->state & NST_BY_NUMBER) != 0)
-
-#define CALLNODE_REFNUM_UNDEF -1
-
-typedef struct {
- NodeBase base;
- UChar* s;
- UChar* end;
- unsigned int flag;
- int capa; /* (allocated size - 1) or 0: use buf[] */
- UChar buf[NODE_STR_BUF_SIZE];
-} StrNode;
-
-typedef struct {
- NodeBase base;
- int state;
- struct _Node* target;
- int lower;
- int upper;
- int greedy;
- int target_empty_info;
- struct _Node* head_exact;
- struct _Node* next_head_exact;
- int is_refered; /* include called node. don't eliminate even if {0} */
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- int comb_exp_check_num; /* 1,2,3...: check, 0: no check */
-#endif
-} QtfrNode;
-
-typedef struct {
- NodeBase base;
- int state;
- int type;
- int regnum;
- OnigOptionType option;
- struct _Node* target;
- AbsAddrType call_addr;
- /* for multiple call reference */
- OnigDistance min_len; /* min length (byte) */
- OnigDistance max_len; /* max length (byte) */
- int char_len; /* character length */
- int opt_count; /* referenced count in optimize_node_left() */
-} EncloseNode;
-
-#ifdef USE_SUBEXP_CALL
-
-typedef struct {
- int offset;
- struct _Node* target;
-} UnsetAddr;
-
-typedef struct {
- int num;
- int alloc;
- UnsetAddr* us;
-} UnsetAddrList;
-
-typedef struct {
- NodeBase base;
- int state;
- int group_num;
- UChar* name;
- UChar* name_end;
- struct _Node* target; /* EncloseNode : ENCLOSE_MEMORY */
- UnsetAddrList* unset_addr_list;
-} CallNode;
-
-#endif
-
-typedef struct {
- NodeBase base;
- int state;
- int back_num;
- int back_static[NODE_BACKREFS_SIZE];
- int* back_dynamic;
- int nest_level;
-} BRefNode;
-
-typedef struct {
- NodeBase base;
- int type;
- struct _Node* target;
- int char_len;
-} AnchorNode;
-
-typedef struct {
- NodeBase base;
- struct _Node* car;
- struct _Node* cdr;
-} ConsAltNode;
-
-typedef struct {
- NodeBase base;
- int ctype;
- int is_not;
-} CtypeNode;
-
-typedef struct _Node {
- union {
- NodeBase base;
- StrNode str;
- CClassNode cclass;
- QtfrNode qtfr;
- EncloseNode enclose;
- BRefNode bref;
- AnchorNode anchor;
- ConsAltNode cons;
- CtypeNode ctype;
-#ifdef USE_SUBEXP_CALL
- CallNode call;
-#endif
- } u;
-} Node;
-
-
-#define NULL_NODE ((Node* )0)
-
-#define SCANENV_MEMNODES_SIZE 8
-#define SCANENV_MEM_NODES(senv) \
- (IS_NOT_NULL((senv)->mem_nodes_dynamic) ? \
- (senv)->mem_nodes_dynamic : (senv)->mem_nodes_static)
-
-typedef struct {
- OnigOptionType option;
- OnigCaseFoldType case_fold_flag;
- OnigEncoding enc;
- const OnigSyntaxType* syntax;
- BitStatusType capture_history;
- BitStatusType bt_mem_start;
- BitStatusType bt_mem_end;
- BitStatusType backrefed_mem;
- UChar* pattern;
- UChar* pattern_end;
- UChar* error;
- UChar* error_end;
- regex_t* reg; /* for reg->names only */
- int num_call;
-#ifdef USE_SUBEXP_CALL
- UnsetAddrList* unset_addr_list;
-#endif
- int num_mem;
-#ifdef USE_NAMED_GROUP
- int num_named;
-#endif
- int mem_alloc;
- Node* mem_nodes_static[SCANENV_MEMNODES_SIZE];
- Node** mem_nodes_dynamic;
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- int num_comb_exp_check;
- int comb_exp_max_regnum;
- int curr_max_regnum;
- int has_recursion;
-#endif
- int warnings_flag;
- const char* sourcefile;
- int sourceline;
-} ScanEnv;
-
-
-#define IS_SYNTAX_OP(syn, opm) (((syn)->op & (opm)) != 0)
-#define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0)
-#define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0)
-
-#ifdef USE_NAMED_GROUP
-typedef struct {
- int new_val;
-} GroupNumRemap;
-
-extern int onig_renumber_name_table(regex_t* reg, GroupNumRemap* map);
-#endif
-
-extern int onig_strncmp(const UChar* s1, const UChar* s2, int n);
-extern void onig_strcpy(UChar* dest, const UChar* src, const UChar* end);
-extern void onig_scan_env_set_error_string(ScanEnv* env, int ecode, UChar* arg, UChar* arg_end);
-extern int onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc);
-extern void onig_reduce_nested_quantifier(Node* pnode, Node* cnode);
-extern void onig_node_conv_to_str_node(Node* node, int raw);
-extern int onig_node_str_cat(Node* node, const UChar* s, const UChar* end);
-extern int onig_node_str_set(Node* node, const UChar* s, const UChar* end);
-extern void onig_node_free(Node* node);
-extern Node* onig_node_new_enclose(int type);
-extern Node* onig_node_new_anchor(int type);
-extern Node* onig_node_new_str(const UChar* s, const UChar* end);
-extern Node* onig_node_new_list(Node* left, Node* right);
-extern Node* onig_node_list_add(Node* list, Node* x);
-extern Node* onig_node_new_alt(Node* left, Node* right);
-extern void onig_node_str_clear(Node* node);
-extern int onig_free_node_list(void);
-extern int onig_names_free(regex_t* reg);
-extern int onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env);
-extern int onig_free_shared_cclass_table(void);
-
-#ifdef ONIG_DEBUG
-#ifdef USE_NAMED_GROUP
-extern int onig_print_names(FILE*, regex_t*);
-#endif
-#endif
-
-#endif /* ONIGURUMA_REGPARSE_H */
diff --git a/src/sprintf.c b/src/sprintf.c
index 750b17d25..905182147 100644
--- a/src/sprintf.c
+++ b/src/sprintf.c
@@ -10,7 +10,6 @@
#include <stdio.h>
#include <string.h>
-#include "encoding.h"
#include "mruby/string.h"
#include "mruby/hash.h"
#include "mruby/numeric.h"
diff --git a/src/string.c b/src/string.c
index 00b08fc36..332d79376 100644
--- a/src/string.c
+++ b/src/string.c
@@ -8,22 +8,17 @@
#include <string.h>
#include "mruby/string.h"
+#include "mruby/class.h"
#include <ctype.h>
#include <limits.h>
#include "mruby/range.h"
#include "mruby/array.h"
#include "mruby/class.h"
#include <stdio.h>
-#ifdef ENABLE_REGEXP
#include "re.h"
-#include "regex.h"
-#endif //ENABLE_REGEXP
const char mrb_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz";
-#ifdef ENABLE_REGEXP
-static mrb_value get_pat(mrb_state *mrb, mrb_value pat, mrb_int quote);
-#endif //ENABLE_REGEXP
static mrb_value str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2);
static mrb_value mrb_str_subseq(mrb_state *mrb, mrb_value str, int beg, int len);
@@ -32,6 +27,12 @@ static mrb_value mrb_str_subseq(mrb_state *mrb, mrb_value str, int beg, int len)
s->aux.capa = capacity;\
} while (0)
+static const char*
+_obj_classname(mrb_state *mrb, mrb_value obj)
+{
+ return mrb_class_name(mrb, mrb_obj_class(mrb, obj));
+}
+
void
mrb_str_decref(mrb_state *mrb, struct mrb_shared_string *shared)
{
@@ -652,6 +653,7 @@ mrb_string_value_ptr(mrb_state *mrb, mrb_value ptr)
static mrb_value
mrb_str_match(mrb_state *mrb, mrb_value self/* x */)
{
+ mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp Class not implemented");
return mrb_nil_value();
}
@@ -739,6 +741,9 @@ mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx)
{
long idx;
+ if (!strcmp(_obj_classname(mrb, indx), REGEXP_CLASS)) {
+ mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp Class not implemented");
+ }
switch (mrb_type(indx)) {
case MRB_TT_FIXNUM:
idx = mrb_fixnum(indx);
@@ -748,14 +753,6 @@ num_index:
if (!mrb_nil_p(str) && RSTRING_LEN(str) == 0) return mrb_nil_value();
return str;
- case MRB_TT_REGEX:
-#ifdef ENABLE_REGEXP
- return mrb_str_subpat(mrb, str, indx, 0); //mrb_str_subpat(str, indx, INT2FIX(0));
-#else
- mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported");
- return mrb_nil_value();
-#endif //ENABLE_REGEXP
-
case MRB_TT_STRING:
if (mrb_str_index(mrb, str, indx, 0) != -1)
return mrb_str_dup(mrb, indx);
@@ -840,13 +837,8 @@ mrb_str_aref_m(mrb_state *mrb, mrb_value str)
argc = mrb_get_args(mrb, "o|o", &a1, &a2);
if (argc == 2) {
- if (mrb_type(a1) == MRB_TT_REGEX) {
-#ifdef ENABLE_REGEXP
- return mrb_str_subpat(mrb, str, argv[0], mrb_fixnum(argv[1]));
-#else
- mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported");
- return mrb_nil_value();
-#endif //ENABLE_REGEXP
+ if (!strcmp(mrb_obj_classname(mrb, a1), REGEXP_CLASS)) {
+ mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp Class not implemented");
}
return mrb_str_substr(mrb, str, mrb_fixnum(a1), mrb_fixnum(a2));
}
@@ -1215,86 +1207,6 @@ mrb_str_buf_append(mrb_state *mrb, mrb_value str, mrb_value str2)
return str;
}
-#ifdef ENABLE_REGEXP
-static mrb_value
-str_gsub(mrb_state *mrb, mrb_value str, mrb_int bang)
-{
- mrb_value *argv;
- int argc;
- mrb_value pat, val, repl, match, dest = mrb_nil_value();
- struct re_registers *regs;
- mrb_int beg, n;
- mrb_int beg0, end0;
- mrb_int offset, blen, len, last;
- char *sp, *cp;
-
- if (bang) str_modify(mrb, mrb_str_ptr(self));
- mrb_get_args(mrb, "*", &argv, &argc);
- switch (argc) {
- case 1:
- /*RETURN_ENUMERATOR(str, argc, argv);*/
- break;
- case 2:
- repl = argv[1];
- mrb_string_value(mrb, &repl);
- break;
- default:
- mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%d for 2)", argc);
- }
-
- pat = get_pat(mrb, argv[0], 1);
- beg = mrb_reg_search(mrb, pat, str, 0, 0);
- if (beg < 0) {
- if (bang) return mrb_nil_value(); /* no match, no substitution */
- return mrb_str_dup(mrb, str);
- }
-
- offset = 0;
- n = 0;
- blen = RSTRING_LEN(str) + 30;
- dest = mrb_str_buf_new(mrb, blen);
- sp = RSTRING_PTR(str);
- cp = sp;
-
- do {
- n++;
- match = mrb_backref_get(mrb);
- regs = RMATCH_REGS(match);
- beg0 = BEG(0);
- end0 = END(0);
- val = mrb_reg_regsub(mrb, repl, str, regs, pat);
-
- len = beg - offset; /* copy pre-match substr */
- if (len) {
- mrb_str_buf_cat(mrb, dest, cp, len);
- }
-
- mrb_str_buf_append(mrb, dest, val);
-
- last = offset;
- offset = end0;
- if (beg0 == end0) {
- /*
- * Always consume at least one character of the input string
- * in order to prevent infinite loops.
- */
- if (RSTRING_LEN(str) <= end0) break;
- len = RSTRING_LEN(str)-end0;
- mrb_str_buf_cat(mrb, dest, RSTRING_PTR(str)+end0, len);
- offset = end0 + len;
- }
- cp = RSTRING_PTR(str) + offset;
- if (offset > RSTRING_LEN(str)) break;
- beg = mrb_reg_search(mrb, pat, str, offset, 0);
- } while (beg >= 0);
- if (RSTRING_LEN(str) > offset) {
- mrb_str_buf_cat(mrb, dest, cp, RSTRING_LEN(str) - offset);
- }
- mrb_reg_search(mrb, pat, str, last, 0);
- mrb_basic(dest)->c = mrb_obj_class(mrb, str);
- return str;
-}
-
/* 15.2.10.5.18 */
/*
* call-seq:
@@ -1331,7 +1243,8 @@ str_gsub(mrb_state *mrb, mrb_value str, mrb_int bang)
static mrb_value
mrb_str_gsub(mrb_state *mrb, mrb_value self)
{
- return str_gsub(mrb, self, 0);
+ mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp Class not implemented");
+ return mrb_nil_value();
}
/* 15.2.10.5.19 */
@@ -1346,12 +1259,9 @@ mrb_str_gsub(mrb_state *mrb, mrb_value self)
static mrb_value
mrb_str_gsub_bang(mrb_state *mrb, mrb_value self)
{
- striuct RString *s = mrb_str_ptr(self);
-
- str_modify(mrb, s);
- return str_gsub(mrb, s, 1);
+ mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp Class not implemented");
+ return mrb_nil_value();
}
-#endif //ENABLE_REGEXP
mrb_int
mrb_str_hash(mrb_state *mrb, mrb_value str)
@@ -1460,29 +1370,17 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str)
sub = mrb_nil_value();
}
+ if (!strcmp(mrb_obj_classname(mrb, sub), REGEXP_CLASS)) {
+ mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp Class not implemented");
+ }
if (pos < 0) {
pos += RSTRING_LEN(str);
if (pos < 0) {
- if (mrb_type(sub) == MRB_TT_REGEX) {
- mrb_raise(mrb, E_TYPE_ERROR, "Regexp class not supported");
- }
return mrb_nil_value();
}
}
switch (mrb_type(sub)) {
- case MRB_TT_REGEX:
-#ifdef ENABLE_REGEXP
- if (pos > RSTRING_LEN(str))
- return mrb_nil_value();
- pos = mrb_str_offset(mrb, str, pos);
- pos = mrb_reg_search(mrb, sub, str, pos, 0);
- pos = mrb_str_sublen(mrb, str, pos);
-#else
- mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported");
-#endif //ENABLE_REGEXP
- break;
-
case MRB_TT_FIXNUM: {
int c = mrb_fixnum(sub);
long len = RSTRING_LEN(str);
@@ -1500,7 +1398,7 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str)
tmp = mrb_check_string_type(mrb, sub);
if (mrb_nil_p(tmp)) {
mrb_raisef(mrb, E_TYPE_ERROR, "type mismatch: %s given",
- mrb_obj_classname(mrb, sub));
+ _obj_classname(mrb, sub));
}
sub = tmp;
}
@@ -1642,36 +1540,6 @@ mrb_check_string_type(mrb_state *mrb, mrb_value str)
return mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str");
}
-#ifdef ENABLE_REGEXP
-static mrb_value
-get_pat(mrb_state *mrb, mrb_value pat, mrb_int quote)
-{
- mrb_value val;
-
- switch (mrb_type(pat)) {
- case MRB_TT_REGEX:
- return pat;
-
- case MRB_TT_STRING:
- break;
-
- default:
- val = mrb_check_string_type(mrb, pat);
- if (mrb_nil_p(val)) {
- //Check_Type(pat, T_REGEXP);
- mrb_check_type(mrb, pat, MRB_TT_REGEX);
- }
- pat = val;
- }
-
- if (quote) {
- pat = mrb_reg_quote(mrb, pat);
- }
-
- return mrb_reg_regcomp(mrb, pat);
-}
-#endif //ENABLE_REGEXP
-
/* 15.2.10.5.27 */
/*
* call-seq:
@@ -1685,26 +1553,12 @@ get_pat(mrb_state *mrb, mrb_value pat, mrb_int quote)
* 'hello'.match(/(.)\1/)[0] #=> "ll"
* 'hello'.match('xx') #=> nil
*/
-#ifdef ENABLE_REGEXP
static mrb_value
mrb_str_match_m(mrb_state *mrb, mrb_value self)
{
- mrb_value *argv;
- int argc;
- mrb_value re, result, b;
-
- mrb_get_args(mrb, "*&", &argv, &argc, &b);
- if (argc < 1)
- mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%d for 1..2)", argc);
- re = argv[0];
- argv[0] = self;
- result = mrb_funcall(mrb, get_pat(mrb, re, 0), "match", 1, self);
- if (!mrb_nil_p(result) && mrb_block_given_p()) {
- return mrb_yield(mrb, b, result);
- }
- return result;
+ mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp Class not implemented");
+ return mrb_nil_value();
}
-#endif //ENABLE_REGEXP
/* ---------------------------------- */
/* 15.2.10.5.29 */
@@ -1855,12 +1709,8 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str)
if (pos < 0) {
pos += len;
if (pos < 0) {
- if (mrb_type(sub) == MRB_TT_REGEX) {
-#ifdef ENABLE_REGEXP
- mrb_backref_set(mrb, mrb_nil_value());
-#else
- mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported");
-#endif //ENABLE_REGEXP
+ if (!strcmp(mrb_obj_classname(mrb, sub), REGEXP_CLASS)) {
+ mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp Class not implemented");
}
return mrb_nil_value();
}
@@ -1874,21 +1724,11 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str)
else
sub = mrb_nil_value();
}
+ if (!strcmp(mrb_obj_classname(mrb, sub), REGEXP_CLASS)) {
+ mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp Class not implemented");
+ }
switch (mrb_type(sub)) {
- case MRB_TT_REGEX:
-#ifdef ENABLE_REGEXP
- pos = mrb_str_offset(mrb, str, pos);
- if (!RREGEXP(sub)->ptr || RREGEXP_SRC_LEN(sub)) {
- pos = mrb_reg_search(mrb, sub, str, pos, 1);
- pos = mrb_str_sublen(mrb, str, pos);
- }
- if (pos >= 0) return mrb_fixnum_value(pos);
-#else
- mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported");
-#endif //ENABLE_REGEXP
- break;
-
case MRB_TT_FIXNUM: {
int c = mrb_fixnum(sub);
long len = RSTRING_LEN(str);
@@ -1920,46 +1760,6 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str)
return mrb_nil_value();
}
-#ifdef ENABLE_REGEXP
-static mrb_value
-scan_once(mrb_state *mrb, mrb_value str, mrb_value pat, mrb_int *start)
-{
- mrb_value result, match;
- struct re_registers *regs;
- long i;
- struct RString *ps = mrb_str_ptr(str);
- struct RMatch *pmatch;
-
- if (mrb_reg_search(mrb, pat, str, *start, 0) >= 0) {
- match = mrb_backref_get(mrb);
- pmatch = mrb_match_ptr(match);
- regs = &pmatch->rmatch->regs;
- if (regs->beg[0] == regs->end[0]) {
- /*
- * Always consume at least one character of the input string
- */
- if (ps->len > regs->end[0])
- *start = regs->end[0] + RSTRING_LEN(str)-regs->end[0];
- else
- *start = regs->end[0] + 1;
- }
- else {
- *start = regs->end[0];
- }
- if (regs->num_regs == 1) {
- return mrb_reg_nth_match(mrb, 0, match);
- }
- result = mrb_ary_new_capa(mrb, regs->num_regs);
- for (i=1; i < regs->num_regs; i++) {
- mrb_ary_push(mrb, result, mrb_reg_nth_match(mrb, i, match));
- }
-
- return result;
- }
- return mrb_nil_value();
-}
-#endif //ENABLE_REGEXP
-
/* 15.2.10.5.32 */
/*
* call-seq:
@@ -1991,41 +1791,12 @@ scan_once(mrb_state *mrb, mrb_value str, mrb_value pat, mrb_int *start)
* <<cruel>> <<world>>
* rceu lowlr
*/
-#ifdef ENABLE_REGEXP
static mrb_value
mrb_str_scan(mrb_state *mrb, mrb_value str)
{
- mrb_value result;
- mrb_value pat, b;
- mrb_int start = 0;
- mrb_value match = mrb_nil_value();
- struct RString *ps = mrb_str_ptr(str);
- char *p = ps->ptr;
- long len = ps->len;
-
- mrb_get_args(mrb, "o&", &pat, &b);
- pat = get_pat(mrb, pat, 1);
- if (!mrb_block_given_p()) {
- mrb_value ary = mrb_ary_new(mrb);
-
- while (!mrb_nil_p(result = scan_once(mrb, str, pat, &start))) {
- match = mrb_backref_get(mrb);
- mrb_ary_push(mrb, ary, result);
- }
- mrb_backref_set(mrb, match);
- return ary;
- }
-
- while (!mrb_nil_p(result = scan_once(mrb, str, pat, &start))) {
- match = mrb_backref_get(mrb);
- mrb_yield(mrb, b, result);
- str_mod_check(mrb, str, p, len);
- mrb_backref_set(mrb, match); /* restore $~ value */
- }
- mrb_backref_set(mrb, match);
- return str;
+ mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp Class not implemented");
+ return mrb_nil_value();
}
-#endif //ENABLE_REGEXP
static const char isspacetable[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
@@ -2117,28 +1888,12 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
else {
if (mrb_string_p(spat)) {
split_type = string;
-#ifdef ENABLE_REGEXP
- if (RSTRING_LEN(spat) == 0) {
- /* Special case - split into chars */
- spat = mrb_reg_regcomp(mrb, spat);
- split_type = regexp;
- }
- else {
-#endif //ENABLE_REGEXP
- if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' '){
- split_type = awk;
- }
-#ifdef ENABLE_REGEXP
+ if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' '){
+ split_type = awk;
}
-#endif //ENABLE_REGEXP
}
else {
-#ifdef ENABLE_REGEXP
- spat = get_pat(mrb, spat, 1);
- split_type = regexp;
-#else
- mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported");
-#endif //ENABLE_REGEXP
+ mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp Class not implemented");
}
}
@@ -2207,59 +1962,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
beg = ptr - temp;
}
else {
-#ifdef ENABLE_REGEXP
- char *ptr = RSTRING_PTR(str);
- long len = RSTRING_LEN(str);
- long start = beg;
- long idx;
- int last_null = 0;
- struct re_registers *regs;
-
- while ((end = mrb_reg_search(mrb, spat, str, start, 0)) >= 0) {
- int ai;
- regs = RMATCH_REGS(mrb_backref_get(mrb));
- ai = mrb_gc_arena_save(mrb);
- if (start == end && BEG(0) == END(0)) {
- if (!ptr) {
- mrb_ary_push(mrb, result, mrb_str_new_empty(mrb, str));
- mrb_gc_arena_restore(mrb, ai);
- break;
- }
- else if (last_null == 1) {
- mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, beg, len));
- mrb_gc_arena_restore(mrb, ai);
- beg = start;
- }
- else {
- if (ptr+start == ptr+len)
- start++;
- else
- start += len;
- last_null = 1;
- continue;
- }
- }
- else {
- mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, beg, end-beg));
- mrb_gc_arena_restore(mrb, ai);
- beg = start = END(0);
- }
- last_null = 0;
-
- for (idx=1; idx < regs->num_regs; idx++) {
- if (BEG(idx) == -1) continue;
- if (BEG(idx) == END(idx))
- tmp = mrb_str_new_empty(mrb, str);
- else
- tmp = mrb_str_subseq(mrb, str, BEG(idx), END(idx)-BEG(idx));
- mrb_ary_push(mrb, result, tmp);
- mrb_gc_arena_restore(mrb, ai);
- }
- if (lim >= 0 && lim <= ++i) break;
- }
-#else
- mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported");
-#endif //ENABLE_REGEXP
+ mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp Class not implemented");
}
if (RSTRING_LEN(str) > 0 && (lim >= 0 || RSTRING_LEN(str) > beg || lim < 0)) {
if (RSTRING_LEN(str) == beg)
@@ -2297,14 +2000,12 @@ mrb_block_given_p()
* returning <i>str</i>, or <code>nil</code> if no substitutions were
* performed.
*/
-#ifdef ENABLE_REGEXP
static mrb_value
mrb_str_sub_bang(mrb_state *mrb, mrb_value str)
{
- str_modify(mrb, str);
+ mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp Class not implemented");
return mrb_nil_value();
}
-#endif //ENABLE_REGEXP
/* 15.2.10.5.36 */
@@ -2345,16 +2046,12 @@ mrb_str_sub_bang(mrb_state *mrb, mrb_value str)
* #=> "Is /bin/bash your preferred shell?"
*/
-#ifdef ENABLE_REGEXP
static mrb_value
mrb_str_sub(mrb_state *mrb, mrb_value self)
{
- mrb_value str = mrb_str_dup(mrb, self);
-
- mrb_str_sub_bang(mrb, str);
- return str;
+ mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp Class not implemented");
+ return mrb_nil_value();
}
-#endif //ENABLE_REGEXP
mrb_value
mrb_cstr_to_inum(mrb_state *mrb, const char *str, int base, int badcheck)
@@ -2999,32 +2696,36 @@ mrb_init_string(mrb_state *mrb)
mrb_define_method(mrb, s, "downcase!", mrb_str_downcase_bang, ARGS_NONE()); /* 15.2.10.5.14 */
mrb_define_method(mrb, s, "empty?", mrb_str_empty_p, ARGS_NONE()); /* 15.2.10.5.16 */
mrb_define_method(mrb, s, "eql?", mrb_str_eql, ARGS_REQ(1)); /* 15.2.10.5.17 */
-#ifdef ENABLE_REGEXP
+
+ // NOTE: Regexp not implemented
mrb_define_method(mrb, s, "gsub", mrb_str_gsub, ARGS_REQ(1)); /* 15.2.10.5.18 */
mrb_define_method(mrb, s, "gsub!", mrb_str_gsub_bang, ARGS_REQ(1)); /* 15.2.10.5.19 */
-#endif
+
mrb_define_method(mrb, s, "hash", mrb_str_hash_m, ARGS_REQ(1)); /* 15.2.10.5.20 */
mrb_define_method(mrb, s, "include?", mrb_str_include, ARGS_REQ(1)); /* 15.2.10.5.21 */
mrb_define_method(mrb, s, "index", mrb_str_index_m, ARGS_ANY()); /* 15.2.10.5.22 */
mrb_define_method(mrb, s, "initialize", mrb_str_init, ARGS_REQ(1)); /* 15.2.10.5.23 */
mrb_define_method(mrb, s, "initialize_copy", mrb_str_replace, ARGS_REQ(1)); /* 15.2.10.5.24 */
mrb_define_method(mrb, s, "intern", mrb_str_intern, ARGS_NONE()); /* 15.2.10.5.25 */
-#ifdef ENABLE_REGEXP
+
+ // NOTE: Regexp not implemented
mrb_define_method(mrb, s, "match", mrb_str_match_m, ARGS_REQ(1)); /* 15.2.10.5.27 */
-#endif
+
mrb_define_method(mrb, s, "replace", mrb_str_replace, ARGS_REQ(1)); /* 15.2.10.5.28 */
mrb_define_method(mrb, s, "reverse", mrb_str_reverse, ARGS_NONE()); /* 15.2.10.5.29 */
mrb_define_method(mrb, s, "reverse!", mrb_str_reverse_bang, ARGS_NONE()); /* 15.2.10.5.30 */
mrb_define_method(mrb, s, "rindex", mrb_str_rindex_m, ARGS_ANY()); /* 15.2.10.5.31 */
-#ifdef ENABLE_REGEXP
+
+ // NOTE: Regexp not implemented
mrb_define_method(mrb, s, "scan", mrb_str_scan, ARGS_REQ(1)); /* 15.2.10.5.32 */
-#endif
+
mrb_define_method(mrb, s, "slice", mrb_str_aref_m, ARGS_ANY()); /* 15.2.10.5.34 */
mrb_define_method(mrb, s, "split", mrb_str_split_m, ARGS_ANY()); /* 15.2.10.5.35 */
-#ifdef ENABLE_REGEXP
+
+ // NOTE: Regexp not implemented
mrb_define_method(mrb, s, "sub", mrb_str_sub, ARGS_REQ(1)); /* 15.2.10.5.36 */
mrb_define_method(mrb, s, "sub!", mrb_str_sub_bang, ARGS_REQ(1)); /* 15.2.10.5.37 */
-#endif
+
mrb_define_method(mrb, s, "to_i", mrb_str_to_i, ARGS_ANY()); /* 15.2.10.5.38 */
mrb_define_method(mrb, s, "to_f", mrb_str_to_f, ARGS_NONE()); /* 15.2.10.5.39 */
mrb_define_method(mrb, s, "to_s", mrb_str_to_s, ARGS_NONE()); /* 15.2.10.5.40 */
diff --git a/src/struct.c b/src/struct.c
index 1396cd728..1efdca66f 100644
--- a/src/struct.c
+++ b/src/struct.c
@@ -12,10 +12,6 @@
#include "mruby/array.h"
#include <stdarg.h>
-#ifdef ENABLE_REGEXP
-#include "encoding.h"
-#endif
-
#include "mruby/string.h"
#include "mruby/class.h"
#include "mruby/variable.h"
diff --git a/src/variable.c b/src/variable.c
index a5ff36f6f..a2dca3cc6 100644
--- a/src/variable.c
+++ b/src/variable.c
@@ -12,10 +12,6 @@
#include "mruby/string.h"
#include "mruby/proc.h"
-#ifdef ENABLE_REGEXP
-#include "re.h"
-#endif
-
typedef int (iv_foreach_func)(mrb_state*,mrb_sym,mrb_value,void*);
#ifdef MRB_USE_IV_SEGLIST