diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/codegen.c | 17 | ||||
| -rw-r--r-- | src/dump.c | 27 | ||||
| -rw-r--r-- | src/encoding.h | 345 | ||||
| -rw-r--r-- | src/etc.c | 1 | ||||
| -rw-r--r-- | src/gc.c | 35 | ||||
| -rw-r--r-- | src/init.c | 4 | ||||
| -rw-r--r-- | src/load.c | 10 | ||||
| -rw-r--r-- | src/node.h | 1 | ||||
| -rw-r--r-- | src/oniguruma.h | 771 | ||||
| -rw-r--r-- | src/parse.y | 78 | ||||
| -rw-r--r-- | src/re.c | 2563 | ||||
| -rw-r--r-- | src/re.h | 53 | ||||
| -rw-r--r-- | src/regcomp.c | 6288 | ||||
| -rw-r--r-- | src/regenc.c | 901 | ||||
| -rw-r--r-- | src/regenc.h | 203 | ||||
| -rw-r--r-- | src/regerror.c | 375 | ||||
| -rw-r--r-- | src/regex.h | 26 | ||||
| -rw-r--r-- | src/regexec.c | 3757 | ||||
| -rw-r--r-- | src/regint.h | 838 | ||||
| -rw-r--r-- | src/regparse.c | 5600 | ||||
| -rw-r--r-- | src/regparse.h | 354 | ||||
| -rw-r--r-- | src/sprintf.c | 1 | ||||
| -rw-r--r-- | src/string.c | 397 | ||||
| -rw-r--r-- | src/struct.c | 4 | ||||
| -rw-r--r-- | src/variable.c | 4 |
25 files changed, 144 insertions, 22509 deletions
diff --git a/src/codegen.c b/src/codegen.c index 1f6d16477..a3e2995a0 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -1910,6 +1910,23 @@ codegen(codegen_scope *s, node *tree, int val) } break; + case NODE_REGX: + if (val) { + char *p = (char*)tree->car; + size_t len = (intptr_t)tree->cdr; + int ai = mrb_gc_arena_save(s->mrb); + struct RClass* c = mrb_class_get(s->mrb, "Regexp"); + mrb_value args[1]; + args[0] = mrb_str_new(s->mrb, p, len); + int off = new_lit(s, + mrb_class_new_instance(s->mrb, 1, args, c)); + + mrb_gc_arena_restore(s->mrb, ai); + genop(s, MKOP_ABx(OP_STRING, cursp(), off)); + push(); + } + break; + case NODE_SYM: if (val) { int sym = new_sym(s, sym(tree)); diff --git a/src/dump.c b/src/dump.c index e0d755c8c..e8d96c570 100644 --- a/src/dump.c +++ b/src/dump.c @@ -9,9 +9,6 @@ #include <ctype.h> #include "mruby/string.h" -#ifdef ENABLE_REGEXP -#include "re.h" -#endif #include "mruby/irep.h" static const unsigned char def_rite_binary_header[] = @@ -256,13 +253,6 @@ get_pool_block_size(mrb_state *mrb, mrb_irep *irep, int type) nlen = str_dump_len(RSTRING_PTR(str), RSTRING_LEN(str), type); size += nlen; break; -#ifdef ENABLE_REGEXP - case MRB_TT_REGEX: - str = mrb_reg_to_s(mrb, irep->pool[pool_no]); - nlen = str_dump_len(RSTRING_PTR(str), RSTRING_LEN(str), type); - size += nlen; - break; -#endif default: break; } @@ -390,23 +380,6 @@ write_pool_block(mrb_state *mrb, mrb_irep *irep, char *buf, int type) str_dump(RSTRING_PTR(str), char_buf, RSTRING_LEN(str), type); break; -#ifdef ENABLE_REGEXP - case MRB_TT_REGEX: - str = mrb_reg_to_s(mrb, irep->pool[pool_no]); - len = str_dump_len(RSTRING_PTR(str), RSTRING_LEN(str), type); - if ( len > buf_size - 1) { - buf_size = len + 1; - char_buf = mrb_realloc(mrb, char_buf, buf_size); - if (char_buf == NULL) { - result = MRB_DUMP_GENERAL_FAILURE; - goto error_exit; - } - memset(char_buf, 0, buf_size); - } - str_dump(RSTRING_PTR(str), char_buf, RSTRING_LEN(str), type); - break; -#endif - default: buf += uint16_dump(0, buf, type); /* data length = 0 */ continue; diff --git a/src/encoding.h b/src/encoding.h deleted file mode 100644 index 7bc0d9ef0..000000000 --- a/src/encoding.h +++ /dev/null @@ -1,345 +0,0 @@ -/* -** encoding.h - Encoding class -** -** See Copyright Notice in mruby.h -*/ - -#ifndef RUBY_ENCODING_H -#define RUBY_ENCODING_H 1 - -#if defined(__cplusplus) -extern "C" { -#endif - -#include <stdarg.h> -#include "oniguruma.h" -#include "mruby/data.h" - -#define FL_USHIFT 12 - -#define FL_USER0 (((int)1)<<(FL_USHIFT+0)) -#define FL_USER1 (((int)1)<<(FL_USHIFT+1)) -#define FL_USER2 (((int)1)<<(FL_USHIFT+2)) -#define FL_USER3 (((int)1)<<(FL_USHIFT+3)) -#define FL_USER4 (((int)1)<<(FL_USHIFT+4)) -#define FL_USER5 (((int)1)<<(FL_USHIFT+5)) -#define FL_USER6 (((int)1)<<(FL_USHIFT+6)) -#define FL_USER7 (((int)1)<<(FL_USHIFT+7)) -#define FL_USER8 (((int)1)<<(FL_USHIFT+8)) -#define FL_USER9 (((int)1)<<(FL_USHIFT+9)) - -#define ENCODING_INLINE_MAX 1023 -/* 1023 = 0x03FF */ -/*#define ENCODING_SHIFT (FL_USHIFT+10)*/ -#define ENCODING_SHIFT (10) -#define ENCODING_MASK (((unsigned int)ENCODING_INLINE_MAX)<<ENCODING_SHIFT) - -#define ENCODING_SET_INLINED(obj,i) do {\ - mrb_obj_ptr(obj)->flags &= ~ENCODING_MASK;\ - mrb_obj_ptr(obj)->flags |= (unsigned int)(i) << ENCODING_SHIFT;\ -} while (0) -#define ENCODING_SET(mrb, obj,i) do {\ - mrb_value mrb_encoding_set_obj = (obj); \ - int encoding_set_enc_index = (i); \ - if (encoding_set_enc_index < ENCODING_INLINE_MAX) \ - ENCODING_SET_INLINED(mrb_encoding_set_obj, encoding_set_enc_index); \ - else \ - mrb_enc_set_index(mrb, mrb_encoding_set_obj, encoding_set_enc_index); \ -} while (0) - -#define ENCODING_GET_INLINED(obj) (unsigned int)((RSTRING(obj)->flags & ENCODING_MASK)>>ENCODING_SHIFT) -#define ENCODING_GET(mrb, obj) \ - (ENCODING_GET_INLINED(obj) != ENCODING_INLINE_MAX ? \ - ENCODING_GET_INLINED(obj) : \ - mrb_enc_get_index(mrb, obj)) - -#define ENCODING_IS_ASCII8BIT(obj) (ENCODING_GET_INLINED(obj) == 0) - -#define ENCODING_MAXNAMELEN 42 - -#define ENC_CODERANGE_MASK ((int)(FL_USER8|FL_USER9)) -#define ENC_CODERANGE_UNKNOWN 0 -#define ENC_CODERANGE_7BIT ((int)FL_USER8) -#define ENC_CODERANGE_VALID ((int)FL_USER9) -#define ENC_CODERANGE_BROKEN ((int)(FL_USER8|FL_USER9)) -#define ENC_CODERANGE(obj) ((int)(RSTRING(obj)->flags & ENC_CODERANGE_MASK)) -#define ENC_CODERANGE_ASCIIONLY(obj) (ENC_CODERANGE(obj) == ENC_CODERANGE_7BIT) -#ifdef INCLUDE_ENCODING -#define ENC_CODERANGE_SET(obj,cr) (RSTRING(obj)->flags = \ - (RSTRING(obj)->flags & ~ENC_CODERANGE_MASK) | (cr)) -#else -#define ENC_CODERANGE_SET(obj,cr) -#endif //INCLUDE_ENCODING -#define ENC_CODERANGE_CLEAR(obj) ENC_CODERANGE_SET(obj,0) - -/* assumed ASCII compatibility */ -#define ENC_CODERANGE_AND(a, b) \ - (a == ENC_CODERANGE_7BIT ? b : \ - a == ENC_CODERANGE_VALID ? (b == ENC_CODERANGE_7BIT ? ENC_CODERANGE_VALID : b) : \ - ENC_CODERANGE_UNKNOWN) - -#define ENCODING_CODERANGE_SET(mrb, obj, encindex, cr) \ - do { \ - mrb_value mrb_encoding_coderange_obj = (obj); \ - ENCODING_SET(mrb, mrb_encoding_coderange_obj, (encindex)); \ - ENC_CODERANGE_SET(mrb_encoding_coderange_obj, (cr)); \ - } while (0) - -typedef OnigEncodingType mrb_encoding; - -/* mrb_encoding * -> name */ -#define mrb_enc_name(enc) (enc)->name -int mrb_enc_get_index(mrb_state *mrb, mrb_value obj); - -int mrb_enc_replicate(mrb_state *, const char *, mrb_encoding *); -int mrb_define_dummy_encoding(mrb_state *mrb, const char *); -#define mrb_enc_to_index(enc) ((enc) ? ENC_TO_ENCINDEX(enc) : 0) -void mrb_enc_set_index(mrb_state *mrb, mrb_value obj, int encindex); -int mrb_enc_find_index(mrb_state *mrb, const char *name); -int mrb_to_encoding_index(mrb_state *mrb, mrb_value); -mrb_encoding* mrb_to_encoding(mrb_state *mrb, mrb_value); -mrb_encoding* mrb_enc_get(mrb_state *, mrb_value); -mrb_encoding* mrb_enc_compatible(mrb_state *, mrb_value, mrb_value); -mrb_encoding* mrb_enc_check(mrb_state *, mrb_value, mrb_value); -mrb_value mrb_enc_associate_index(mrb_state *mrb, mrb_value, int); -#ifdef INCLUDE_ENCODING -mrb_value mrb_enc_associate(mrb_state *mrb, mrb_value, mrb_encoding*); -#else -#define mrb_enc_associate(mrb,value,enc) -#endif //INCLUDE_ENCODING -void mrb_enc_copy(mrb_state *mrb, mrb_value dst, mrb_value src); - -mrb_value mrb_enc_reg_new(const char*, long, mrb_encoding*, int); -//PRINTF_ARGS(mrb_value rb_enc_sprintf(mrb_encoding *, const char*, ...), 2, 3); -mrb_value mrb_enc_vsprintf(mrb_encoding *, const char*, va_list); -long mrb_enc_strlen(const char*, const char*, mrb_encoding*); -char* mrb_enc_nth(mrb_state *, const char*, const char*, long, mrb_encoding*); -mrb_value mrb_obj_encoding(mrb_state *, mrb_value); -mrb_value mrb_enc_str_buf_cat(mrb_state *mrb, mrb_value str, const char *ptr, long len, mrb_encoding *enc); -mrb_value rb_enc_uint_chr(mrb_state *mrb, unsigned int code, mrb_encoding *enc); - -mrb_value mrb_external_str_new_with_enc(mrb_state *mrb, const char *ptr, long len, mrb_encoding *); -mrb_value mrb_str_export_to_enc(mrb_value, mrb_encoding *); - -/* index -> mrb_encoding */ -mrb_encoding* mrb_enc_from_index(mrb_state *mrb, int idx); - -/* name -> mrb_encoding */ -mrb_encoding * mrb_enc_find(mrb_state *mrb, const char *name); - -/* mrb_encoding * -> name */ -#define mrb_enc_name(enc) (enc)->name - -/* mrb_encoding * -> minlen/maxlen */ -#define mrb_enc_mbminlen(enc) (enc)->min_enc_len -#define mrb_enc_mbmaxlen(enc) (enc)->max_enc_len - -/* -> mbclen (no error notification: 0 < ret <= e-p, no exception) */ -int mrb_enc_mbclen(const char *p, const char *e, mrb_encoding *enc); - -/* -> mbclen (only for valid encoding) */ -int mrb_enc_fast_mbclen(const char *p, const char *e, mrb_encoding *enc); - -/* -> chlen, invalid or needmore */ -int mrb_enc_precise_mbclen(const char *p, const char *e, mrb_encoding *enc); -#define MBCLEN_CHARFOUND_P(ret) ONIGENC_MBCLEN_CHARFOUND_P(ret) -#define MBCLEN_CHARFOUND_LEN(ret) ONIGENC_MBCLEN_CHARFOUND_LEN(ret) -#define MBCLEN_INVALID_P(ret) ONIGENC_MBCLEN_INVALID_P(ret) -#define MBCLEN_NEEDMORE_P(ret) ONIGENC_MBCLEN_NEEDMORE_P(ret) -#define MBCLEN_NEEDMORE_LEN(ret) ONIGENC_MBCLEN_NEEDMORE_LEN(ret) - -/* -> 0x00..0x7f, -1 */ -int mrb_enc_ascget(mrb_state *mrb, const char *p, const char *e, int *len, mrb_encoding *enc); - - -/* -> code (and len) or raise exception */ -unsigned int mrb_enc_codepoint_len(mrb_state *mrb, const char *p, const char *e, int *len, mrb_encoding *enc); - -/* prototype for obsolete function */ -unsigned int mrb_enc_codepoint(mrb_state *mrb, const char *p, const char *e, mrb_encoding *enc); -/* overriding macro */ -#define mrb_enc_codepoint(mrb,p,e,enc) mrb_enc_codepoint_len((mrb),(p),(e),0,(enc)) -#define mrb_enc_mbc_to_codepoint(p, e, enc) ONIGENC_MBC_TO_CODE(enc,(UChar*)(p),(UChar*)(e)) - -/* -> codelen>0 or raise exception */ -#ifdef INCLUDE_ENCODING -int mrb_enc_codelen(mrb_state *mrb, int code, mrb_encoding *enc); -#else -#define mrb_enc_codelen(mrb,code,enc) 1 -#endif //INCLUDE_ENCODING - -/* code,ptr,encoding -> write buf */ -#define mrb_enc_mbcput(c,buf,enc) ((*(buf) = (char)(c)),1) - -/* start, ptr, end, encoding -> prev_char */ -#define mrb_enc_prev_char(s,p,e,enc) (char *)onigenc_get_prev_char_head(enc,(UChar*)(s),(UChar*)(p),(UChar*)(e)) -/* start, ptr, end, encoding -> next_char */ -#define mrb_enc_left_char_head(s,p,e,enc) (char *)onigenc_get_left_adjust_char_head(enc,(UChar*)(s),(UChar*)(p),(UChar*)(e)) -#define mrb_enc_right_char_head(s,p,e,enc) (char *)onigenc_get_right_adjust_char_head(enc,(UChar*)(s),(UChar*)(p),(UChar*)(e)) - -/* ptr, ptr, encoding -> newline_or_not */ -#define mrb_enc_is_newline(p,end,enc) ONIGENC_IS_MBC_NEWLINE(enc,(UChar*)(p),(UChar*)(end)) - -#define mrb_enc_isctype(c,t,enc) ONIGENC_IS_CODE_CTYPE(enc,c,t) -#define mrb_enc_isascii(c,enc) ONIGENC_IS_CODE_ASCII(c) -#define mrb_enc_isalpha(c,enc) ONIGENC_IS_CODE_ALPHA(enc,c) -#define mrb_enc_islower(c,enc) ONIGENC_IS_CODE_LOWER(enc,c) -#define mrb_enc_isupper(c,enc) ONIGENC_IS_CODE_UPPER(enc,c) -#define mrb_enc_ispunct(c,enc) ONIGENC_IS_CODE_PUNCT(enc,c) -#define mrb_enc_isalnum(c,enc) ONIGENC_IS_CODE_ALNUM(enc,c) -#define mrb_enc_isprint(c,enc) ONIGENC_IS_CODE_PRINT(enc,c) -#define mrb_enc_isspace(c,enc) ONIGENC_IS_CODE_SPACE(enc,c) -#define mrb_enc_isdigit(c,enc) ONIGENC_IS_CODE_DIGIT(enc,c) - -#define mrb_enc_asciicompat(mrb, enc) (mrb_enc_mbminlen(enc)==1 && !mrb_enc_dummy_p(enc)) - -int mrb_enc_casefold(char *to, const char *p, const char *e, mrb_encoding *enc); -int mrb_enc_toupper(int c, mrb_encoding *enc); -int mrb_enc_tolower(int c, mrb_encoding *enc); -//ID mrb_intern3(const char*, long, mrb_encoding*); -//ID mrb_interned_id_p(const char *, long, mrb_encoding *); -int mrb_enc_symname_p(const char*, mrb_encoding*); -int mrb_enc_symname2_p(const char*, long, mrb_encoding*); -int mrb_enc_str_coderange(mrb_state *mrb, mrb_value); -long mrb_str_coderange_scan_restartable(const char*, const char*, mrb_encoding*, int*); -int mrb_enc_str_asciionly_p(mrb_state *mrb, mrb_value); -#define mrb_enc_str_asciicompat_p(mrb, str) mrb_enc_asciicompat(mrb, mrb_enc_get(mrb, str)) -mrb_value mrb_enc_from_encoding(mrb_state *mrb, mrb_encoding *enc); -int mrb_enc_unicode_p(mrb_encoding *enc); -mrb_encoding *mrb_ascii8bit_encoding(mrb_state *mrb); -mrb_encoding *mrb_utf8_encoding(mrb_state *mrb); -mrb_encoding *mrb_usascii_encoding(mrb_state *mrb); -mrb_encoding *mrb_locale_encoding(mrb_state *mrb); -mrb_encoding *mrb_filesystem_encoding(mrb_state *mrb); -mrb_encoding *mrb_default_external_encoding(mrb_state *mrb); -mrb_encoding *mrb_default_internal_encoding(mrb_state *mrb); -int mrb_ascii8bit_encindex(void); -int mrb_utf8_encindex(void); -int mrb_usascii_encindex(void); -int mrb_locale_encindex(mrb_state *mrb); -int mrb_filesystem_encindex(void); -mrb_value mrb_enc_default_external(mrb_state *mrb); -mrb_value mrb_enc_default_internal(mrb_state *mrb); -void mrb_enc_set_default_external(mrb_state *mrb, mrb_value encoding); -void mrb_enc_set_default_internal(mrb_state *mrb, mrb_value encoding); -mrb_value mrb_locale_charmap(mrb_state *mrb, mrb_value klass); -mrb_value mrb_usascii_str_new_cstr(mrb_state *mrb, const char *ptr); -int mrb_str_buf_cat_escaped_char(mrb_state *mrb, mrb_value result, unsigned int c, int unicode_p); - -#define ENC_DUMMY_FLAG (1<<24) -#define ENC_INDEX_MASK (~(~0U<<24)) - -#define ENC_TO_ENCINDEX(enc) (int)((enc)->ruby_encoding_index & ENC_INDEX_MASK) - -#define ENC_DUMMY_P(enc) ((enc)->ruby_encoding_index & ENC_DUMMY_FLAG) -#define ENC_SET_DUMMY(enc) ((enc)->ruby_encoding_index |= ENC_DUMMY_FLAG) - -static inline int -mrb_enc_dummy_p(mrb_encoding *enc) -{ - return ENC_DUMMY_P(enc) != 0; -} - -/* econv stuff */ - -typedef enum { - econv_invalid_byte_sequence, - econv_undefined_conversion, - econv_destination_buffer_full, - econv_source_buffer_empty, - econv_finished, - econv_after_output, - econv_incomplete_input -} mrb_econv_result_t; - -typedef struct mrb_econv_t mrb_econv_t; - -mrb_value mrb_str_encode(mrb_state *mrb, mrb_value str, mrb_value to, int ecflags, mrb_value ecopts); -int mrb_econv_has_convpath_p(mrb_state *mrb, const char* from_encoding, const char* to_encoding); - -int mrb_econv_prepare_opts(mrb_state *mrb, mrb_value opthash, mrb_value *ecopts); - -mrb_econv_t *mrb_econv_open(mrb_state *mrb, const char *source_encoding, const char *destination_encoding, int ecflags); -mrb_econv_t *mrb_econv_open_opts(mrb_state *mrb, const char *source_encoding, const char *destination_encoding, int ecflags, mrb_value ecopts); - -mrb_econv_result_t mrb_econv_convert(mrb_state *mrb, mrb_econv_t *ec, - const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, - unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, - int flags); -void mrb_econv_close(mrb_econv_t *ec); - -/* result: 0:success -1:failure */ -int mrb_econv_set_replacement(mrb_state *mrb, mrb_econv_t *ec, const unsigned char *str, size_t len, const char *encname); - -/* result: 0:success -1:failure */ -int mrb_econv_decorate_at_first(mrb_state *mrb, mrb_econv_t *ec, const char *decorator_name); -int mrb_econv_decorate_at_last(mrb_state *mrb, mrb_econv_t *ec, const char *decorator_name); - -mrb_value mrb_econv_open_exc(mrb_state *mrb, const char *senc, const char *denc, int ecflags); - -/* result: 0:success -1:failure */ -int mrb_econv_insert_output(mrb_state *mrb, mrb_econv_t *ec, - const unsigned char *str, size_t len, const char *str_encoding); - -/* encoding that mrb_econv_insert_output doesn't need conversion */ -const char *mrb_econv_encoding_to_insert_output(mrb_econv_t *ec); - -/* raise an error if the last mrb_econv_convert is error */ -void mrb_econv_check_error(mrb_state *mrb, mrb_econv_t *ec); - -/* returns an exception object or nil */ -mrb_value mrb_econv_make_exception(mrb_state *mrb, mrb_econv_t *ec); - -int mrb_econv_putbackable(mrb_econv_t *ec); -void mrb_econv_putback(mrb_econv_t *ec, unsigned char *p, int n); - -/* returns the corresponding ASCII compatible encoding for encname, - * or NULL if encname is not ASCII incompatible encoding. */ -const char *mrb_econv_asciicompat_encoding(const char *encname); - -mrb_value mrb_econv_str_convert(mrb_state *mrb, mrb_econv_t *ec, mrb_value src, int flags); -mrb_value mrb_econv_substr_convert(mrb_state *mrb, mrb_econv_t *ec, mrb_value src, long byteoff, long bytesize, int flags); -mrb_value mrb_econv_str_append(mrb_state *mrb, mrb_econv_t *ec, mrb_value src, mrb_value dst, int flags); -mrb_value mrb_econv_substr_append(mrb_state *mrb, mrb_econv_t *ec, mrb_value src, long byteoff, long bytesize, mrb_value dst, int flags); - -void mrb_econv_binmode(mrb_econv_t *ec); - -/* flags for mrb_econv_open */ - -#define ECONV_ERROR_HANDLER_MASK 0x000000ff - -#define ECONV_INVALID_MASK 0x0000000f -#define ECONV_INVALID_REPLACE 0x00000002 - -#define ECONV_UNDEF_MASK 0x000000f0 -#define ECONV_UNDEF_REPLACE 0x00000020 -#define ECONV_UNDEF_HEX_CHARREF 0x00000030 - -#define ECONV_DECORATOR_MASK 0x0000ff00 - -#define ECONV_UNIVERSAL_NEWLINE_DECORATOR 0x00000100 -#define ECONV_CRLF_NEWLINE_DECORATOR 0x00001000 -#define ECONV_CR_NEWLINE_DECORATOR 0x00002000 -#define ECONV_XML_TEXT_DECORATOR 0x00004000 -#define ECONV_XML_ATTR_CONTENT_DECORATOR 0x00008000 - -#define ECONV_STATEFUL_DECORATOR_MASK 0x00f00000 -#define ECONV_XML_ATTR_QUOTE_DECORATOR 0x00100000 - -/* end of flags for mrb_econv_open */ - -/* flags for mrb_econv_convert */ -#define ECONV_PARTIAL_INPUT 0x00010000 -#define ECONV_AFTER_OUTPUT 0x00020000 -/* end of flags for mrb_econv_convert */ - -int mrb_isspace(int c); - -#define ENCODE_CLASS (mrb_class_obj_get(mrb, "Encoding")) -#define CONVERTER_CLASS (mrb_class_obj_get(mrb, "Converter")) - -#if defined(__cplusplus) -} /* extern "C" { */ -#endif - -#endif /* RUBY_ENCODING_H */ @@ -171,7 +171,6 @@ mrb_obj_id(mrb_value obj) case MRB_TT_ARRAY: case MRB_TT_HASH: case MRB_TT_RANGE: - case MRB_TT_REGEX: case MRB_TT_STRUCT: case MRB_TT_EXCEPTION: case MRB_TT_MATCH: @@ -71,10 +71,6 @@ */ -#ifdef ENABLE_REGEXP -#include "re.h" -#endif - struct free_obj { MRB_OBJECT_HEADER; struct RBasic *next; @@ -94,10 +90,6 @@ typedef struct { struct RStruct structdata; #endif struct RProc procdata; -#ifdef ENABLE_REGEXP - struct RMatch match; - struct RRegexp regexp; -#endif } as; } RVALUE; @@ -462,24 +454,6 @@ gc_mark_children(mrb_state *mrb, struct RBasic *obj) } break; -#ifdef ENABLE_REGEXP - case MRB_TT_MATCH: - { - struct RMatch *m = (struct RMatch*)obj; - - mrb_gc_mark(mrb, (struct RBasic*)m->str); - mrb_gc_mark(mrb, (struct RBasic*)m->regexp); - } - break; - case MRB_TT_REGEX: - { - struct RRegexp *r = (struct RRegexp*)obj; - - mrb_gc_mark(mrb, (struct RBasic*)r->src); - } - break; -#endif - #ifdef ENABLE_STRUCT case MRB_TT_STRUCT: { @@ -689,15 +663,6 @@ gc_gray_mark(mrb_state *mrb, struct RBasic *obj) children+=2; break; -#ifdef ENABLE_REGEXP - case MRB_TT_MATCH: - children+=2; - break; - case MRB_TT_REGEX: - children+=1; - break; -#endif - #ifdef ENABLE_STRUCT case MRB_TT_STRUCT: { diff --git a/src/init.c b/src/init.c index 73ff8fce2..a85d0483d 100644 --- a/src/init.c +++ b/src/init.c @@ -54,9 +54,7 @@ mrb_init_core(mrb_state *mrb) mrb_init_struct(mrb); DONE; #endif mrb_init_gc(mrb); DONE; -#ifdef ENABLE_REGEXP mrb_init_regexp(mrb); DONE; -#endif #ifdef ENABLE_STDIO mrb_init_print(mrb); DONE; #endif @@ -78,4 +76,4 @@ mrb_final_core(mrb_state *mrb) #ifndef DISABLE_GEMS mrb_final_mrbgems(mrb); DONE; #endif -}
\ No newline at end of file +} diff --git a/src/load.c b/src/load.c index 78ece114f..a61af509c 100644 --- a/src/load.c +++ b/src/load.c @@ -8,9 +8,6 @@ #include "mruby/dump.h" #include "mruby/string.h" -#ifdef ENABLE_REGEXP -#include "re.h" -#endif #include "mruby/proc.h" #include "mruby/irep.h" @@ -435,13 +432,6 @@ read_rite_irep_record(mrb_state *mrb, unsigned char *src, uint32_t* len) irep->pool[i] = mrb_str_new(mrb, buf, pdl); break; -#ifdef ENABLE_REGEXP - case MRB_TT_REGEX: - str = mrb_str_new(mrb, buf, pdl); - irep->pool[i] = mrb_reg_quote(mrb, str); - break; -#endif - default: irep->pool[i] = mrb_nil_value(); break; diff --git a/src/node.h b/src/node.h index 29edd6cc5..284105023 100644 --- a/src/node.h +++ b/src/node.h @@ -62,6 +62,7 @@ enum node_type { NODE_SYM, NODE_STR, NODE_DSTR, + NODE_REGX, NODE_DREGX, NODE_DREGX_ONCE, NODE_LIST, diff --git a/src/oniguruma.h b/src/oniguruma.h deleted file mode 100644 index 3332df023..000000000 --- a/src/oniguruma.h +++ /dev/null @@ -1,771 +0,0 @@ -#ifndef ONIGURUMA_H -#define ONIGURUMA_H -/********************************************************************** - oniguruma.h - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#ifdef __cplusplus -extern "C" { -#endif - -#define ONIGURUMA -#define ONIGURUMA_VERSION_MAJOR 5 -#define ONIGURUMA_VERSION_MINOR 9 -#define ONIGURUMA_VERSION_TEENY 2 - -#ifdef __cplusplus -# ifndef HAVE_PROTOTYPES -# define HAVE_PROTOTYPES 1 -# endif -# ifndef HAVE_STDARG_PROTOTYPES -# define HAVE_STDARG_PROTOTYPES 1 -# endif -#endif - -/* escape Mac OS X/Xcode 2.4/gcc 4.0.1 problem */ -#if defined(__APPLE__) && defined(__GNUC__) && __GNUC__ >= 4 -# ifndef HAVE_STDARG_PROTOTYPES -# define HAVE_STDARG_PROTOTYPES 1 -# endif -#endif - -#ifndef ONIG_EXTERN -#ifdef RUBY_EXTERN -#define ONIG_EXTERN RUBY_EXTERN -#else -#if defined(_WIN32) && !defined(__GNUC__) -#if defined(EXPORT) || defined(RUBY_EXPORT) -#define ONIG_EXTERN extern __declspec(dllexport) -#else -#define ONIG_EXTERN extern __declspec(dllimport) -#endif -#endif -#endif -#endif - -#ifndef ONIG_EXTERN -#define ONIG_EXTERN extern -#endif - -/* PART: character encoding */ - -#ifndef ONIG_ESCAPE_UCHAR_COLLISION -#define UChar OnigUChar -#endif - -typedef unsigned char OnigUChar; -typedef unsigned int OnigCodePoint; -typedef unsigned int OnigCtype; -typedef size_t OnigDistance; - -#define ONIG_INFINITE_DISTANCE ~((OnigDistance )0) - -typedef unsigned int OnigCaseFoldType; /* case fold flag */ - -ONIG_EXTERN OnigCaseFoldType OnigDefaultCaseFoldFlag; - -/* #define ONIGENC_CASE_FOLD_HIRAGANA_KATAKANA (1<<1) */ -/* #define ONIGENC_CASE_FOLD_KATAKANA_WIDTH (1<<2) */ -#define ONIGENC_CASE_FOLD_TURKISH_AZERI (1<<20) -#define INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR (1<<30) - -#define ONIGENC_CASE_FOLD_MIN INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR -#define ONIGENC_CASE_FOLD_DEFAULT OnigDefaultCaseFoldFlag - - -#define ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN 3 -#define ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM 13 -/* 13 => Unicode:0x1ffc */ - -/* code range */ -#define ONIGENC_CODE_RANGE_NUM(range) ((int )range[0]) -#define ONIGENC_CODE_RANGE_FROM(range,i) range[((i)*2) + 1] -#define ONIGENC_CODE_RANGE_TO(range,i) range[((i)*2) + 2] - -typedef struct { - int byte_len; /* argument(original) character(s) byte length */ - int code_len; /* number of code */ - OnigCodePoint code[ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN]; -} OnigCaseFoldCodeItem; - -typedef struct { - OnigCodePoint esc; - OnigCodePoint anychar; - OnigCodePoint anytime; - OnigCodePoint zero_or_one_time; - OnigCodePoint one_or_more_time; - OnigCodePoint anychar_anytime; -} OnigMetaCharTableType; - -typedef int (*OnigApplyAllCaseFoldFunc)(OnigCodePoint from, OnigCodePoint* to, int to_len, void* arg); - -typedef struct OnigEncodingTypeST { - int (*precise_mbc_enc_len)(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc); - const char* name; - int max_enc_len; - int min_enc_len; - int (*is_mbc_newline)(const OnigUChar* p, const OnigUChar* end, struct OnigEncodingTypeST* enc); - OnigCodePoint (*mbc_to_code)(const OnigUChar* p, const OnigUChar* end, struct OnigEncodingTypeST* enc); - int (*code_to_mbclen)(OnigCodePoint code, struct OnigEncodingTypeST* enc); - int (*code_to_mbc)(OnigCodePoint code, OnigUChar *buf, struct OnigEncodingTypeST* enc); - int (*mbc_case_fold)(OnigCaseFoldType flag, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, struct OnigEncodingTypeST* enc); - int (*apply_all_case_fold)(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, struct OnigEncodingTypeST* enc); - int (*get_case_fold_codes_by_str)(OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem acs[], struct OnigEncodingTypeST* enc); - int (*property_name_to_ctype)(struct OnigEncodingTypeST* enc, OnigUChar* p, OnigUChar* end); - int (*is_code_ctype)(OnigCodePoint code, OnigCtype ctype, struct OnigEncodingTypeST* enc); - int (*get_ctype_code_range)(OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[], struct OnigEncodingTypeST* enc); - OnigUChar* (*left_adjust_char_head)(const OnigUChar* start, const OnigUChar* p, const OnigUChar* end, struct OnigEncodingTypeST* enc); - int (*is_allowed_reverse_match)(const OnigUChar* p, const OnigUChar* end, struct OnigEncodingTypeST* enc); - int ruby_encoding_index; -} OnigEncodingType; - -typedef OnigEncodingType* OnigEncoding; - -ONIG_EXTERN OnigEncodingType OnigEncodingASCII; - -#define ONIG_ENCODING_ASCII (&OnigEncodingASCII) - -#define ONIG_ENCODING_UNDEF ((OnigEncoding )0) - - -/* work size */ -#define ONIGENC_CODE_TO_MBC_MAXLEN 7 -#define ONIGENC_MBC_CASE_FOLD_MAXLEN 18 -/* 18: 6(max-byte) * 3(case-fold chars) */ - -/* character types */ -#define ONIGENC_CTYPE_NEWLINE 0 -#define ONIGENC_CTYPE_ALPHA 1 -#define ONIGENC_CTYPE_BLANK 2 -#define ONIGENC_CTYPE_CNTRL 3 -#define ONIGENC_CTYPE_DIGIT 4 -#define ONIGENC_CTYPE_GRAPH 5 -#define ONIGENC_CTYPE_LOWER 6 -#define ONIGENC_CTYPE_PRINT 7 -#define ONIGENC_CTYPE_PUNCT 8 -#define ONIGENC_CTYPE_SPACE 9 -#define ONIGENC_CTYPE_UPPER 10 -#define ONIGENC_CTYPE_XDIGIT 11 -#define ONIGENC_CTYPE_WORD 12 -#define ONIGENC_CTYPE_ALNUM 13 /* alpha || digit */ -#define ONIGENC_CTYPE_ASCII 14 -#define ONIGENC_MAX_STD_CTYPE ONIGENC_CTYPE_ASCII -#define ONIGENC_CTYPE_SPECIAL_MASK 128 -#define ONIGENC_CTYPE_S /* [\t\n\v\f\r\s] */ \ - ONIGENC_CTYPE_SPECIAL_MASK | ONIGENC_CTYPE_SPACE -#define ONIGENC_CTYPE_D /* [0-9] */ \ - ONIGENC_CTYPE_SPECIAL_MASK | ONIGENC_CTYPE_DIGIT -#define ONIGENC_CTYPE_W /* [0-9A-Za-z_] */ \ - ONIGENC_CTYPE_SPECIAL_MASK | ONIGENC_CTYPE_WORD -#define ONIGENC_CTYPE_SPECIAL_P(ctype) ((ctype) & ONIGENC_CTYPE_SPECIAL_MASK) - - -#define onig_enc_len(enc,p,e) ONIGENC_MBC_ENC_LEN(enc, p, e) - -#define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF) -#define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1) -#define ONIGENC_IS_MBC_HEAD(enc,p,e) (ONIGENC_MBC_ENC_LEN(enc,p,e) != 1) -#define ONIGENC_IS_MBC_ASCII(p) (*(p) < 128) -#define ONIGENC_IS_CODE_ASCII(code) ((code) < 128) -#define ONIGENC_IS_MBC_WORD(enc,s,end) \ - ONIGENC_IS_CODE_WORD(enc,ONIGENC_MBC_TO_CODE(enc,s,end)) - - -#define ONIGENC_NAME(enc) ((enc)->name) - -#define ONIGENC_MBC_CASE_FOLD(enc,flag,pp,end,buf) \ - (enc)->mbc_case_fold(flag,(const OnigUChar** )pp,end,buf,enc) -#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \ - (enc)->is_allowed_reverse_match(s,end,enc) -#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s,end) \ - (enc)->left_adjust_char_head(start, s, end, enc) -#define ONIGENC_APPLY_ALL_CASE_FOLD(enc,case_fold_flag,f,arg) \ - (enc)->apply_all_case_fold(case_fold_flag,f,arg,enc) -#define ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc,case_fold_flag,p,end,acs) \ - (enc)->get_case_fold_codes_by_str(case_fold_flag,p,end,acs,enc) -#define ONIGENC_STEP_BACK(enc,start,s,end,n) \ - onigenc_step_back((enc),(start),(s),(end),(n)) - -#define ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(n) (n) -#define ONIGENC_MBCLEN_CHARFOUND_P(r) (0 < (r)) -#define ONIGENC_MBCLEN_CHARFOUND_LEN(r) (r) - -#define ONIGENC_CONSTRUCT_MBCLEN_INVALID() (-1) -#define ONIGENC_MBCLEN_INVALID_P(r) ((r) == -1) - -#define ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n) (-1-(n)) -#define ONIGENC_MBCLEN_NEEDMORE_P(r) ((r) < -1) -#define ONIGENC_MBCLEN_NEEDMORE_LEN(r) (-1-(r)) - -#define ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e) (enc)->precise_mbc_enc_len(p,e,enc) - -ONIG_EXTERN -int onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc); - -#define ONIGENC_MBC_ENC_LEN(enc,p,e) onigenc_mbclen_approximate(p,e,enc) -#define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len) -#define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc) -#define ONIGENC_MBC_MINLEN(enc) ((enc)->min_enc_len) -#define ONIGENC_IS_MBC_NEWLINE(enc,p,end) (enc)->is_mbc_newline((p),(end),enc) -#define ONIGENC_MBC_TO_CODE(enc,p,end) (enc)->mbc_to_code((p),(end),enc) -#define ONIGENC_CODE_TO_MBCLEN(enc,code) (enc)->code_to_mbclen(code,enc) -#define ONIGENC_CODE_TO_MBC(enc,code,buf) (enc)->code_to_mbc(code,buf,enc) -#define ONIGENC_PROPERTY_NAME_TO_CTYPE(enc,p,end) \ - (enc)->property_name_to_ctype(enc,p,end) - -#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) (enc)->is_code_ctype(code,ctype,enc) - -#define ONIGENC_IS_CODE_NEWLINE(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_NEWLINE) -#define ONIGENC_IS_CODE_GRAPH(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_GRAPH) -#define ONIGENC_IS_CODE_PRINT(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PRINT) -#define ONIGENC_IS_CODE_ALNUM(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALNUM) -#define ONIGENC_IS_CODE_ALPHA(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALPHA) -#define ONIGENC_IS_CODE_LOWER(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_LOWER) -#define ONIGENC_IS_CODE_UPPER(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_UPPER) -#define ONIGENC_IS_CODE_CNTRL(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_CNTRL) -#define ONIGENC_IS_CODE_PUNCT(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PUNCT) -#define ONIGENC_IS_CODE_SPACE(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_SPACE) -#define ONIGENC_IS_CODE_BLANK(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_BLANK) -#define ONIGENC_IS_CODE_DIGIT(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_DIGIT) -#define ONIGENC_IS_CODE_XDIGIT(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_XDIGIT) -#define ONIGENC_IS_CODE_WORD(enc,code) \ - ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_WORD) - -#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbout,ranges) \ - (enc)->get_ctype_code_range(ctype,sbout,ranges,enc) - -ONIG_EXTERN -OnigUChar* onigenc_step_back(OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end, int n); - - -/* encoding API */ -ONIG_EXTERN -int onigenc_init(void); -ONIG_EXTERN -int onigenc_set_default_encoding(OnigEncoding enc); -ONIG_EXTERN -OnigEncoding onigenc_get_default_encoding(void); -ONIG_EXTERN -void onigenc_set_default_caseconv_table(const OnigUChar* table); -ONIG_EXTERN -OnigUChar* onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end, const OnigUChar** prev); -ONIG_EXTERN -OnigUChar* onigenc_get_prev_char_head(OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end); -ONIG_EXTERN -OnigUChar* onigenc_get_left_adjust_char_head(OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end); -ONIG_EXTERN -OnigUChar* onigenc_get_right_adjust_char_head(OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end); -ONIG_EXTERN -int onigenc_strlen(OnigEncoding enc, const OnigUChar* p, const OnigUChar* end); -ONIG_EXTERN -int onigenc_strlen_null(OnigEncoding enc, const OnigUChar* p); -ONIG_EXTERN -int onigenc_str_bytelen_null(OnigEncoding enc, const OnigUChar* p); - - - -/* PART: regular expression */ - -/* config parameters */ -#define ONIG_NREGION 10 -#define ONIG_MAX_BACKREF_NUM 1000 -#define ONIG_MAX_REPEAT_NUM 100000 -#define ONIG_MAX_MULTI_BYTE_RANGES_NUM 10000 -/* constants */ -#define ONIG_MAX_ERROR_MESSAGE_LEN 90 - -typedef unsigned int OnigOptionType; - -#define ONIG_OPTION_DEFAULT ONIG_OPTION_NONE - -/* options */ -#define ONIG_OPTION_NONE 0U -#define ONIG_OPTION_IGNORECASE 1U -#define ONIG_OPTION_EXTEND (ONIG_OPTION_IGNORECASE << 1) -#define ONIG_OPTION_MULTILINE (ONIG_OPTION_EXTEND << 1) -#define ONIG_OPTION_SINGLELINE (ONIG_OPTION_MULTILINE << 1) -#define ONIG_OPTION_FIND_LONGEST (ONIG_OPTION_SINGLELINE << 1) -#define ONIG_OPTION_FIND_NOT_EMPTY (ONIG_OPTION_FIND_LONGEST << 1) -#define ONIG_OPTION_NEGATE_SINGLELINE (ONIG_OPTION_FIND_NOT_EMPTY << 1) -#define ONIG_OPTION_DONT_CAPTURE_GROUP (ONIG_OPTION_NEGATE_SINGLELINE << 1) -#define ONIG_OPTION_CAPTURE_GROUP (ONIG_OPTION_DONT_CAPTURE_GROUP << 1) -/* options (search time) */ -#define ONIG_OPTION_NOTBOL (ONIG_OPTION_CAPTURE_GROUP << 1) -#define ONIG_OPTION_NOTEOL (ONIG_OPTION_NOTBOL << 1) -#define ONIG_OPTION_POSIX_REGION (ONIG_OPTION_NOTEOL << 1) -#define ONIG_OPTION_MAXBIT ONIG_OPTION_POSIX_REGION /* limit */ - -#define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt)) -#define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt)) -#define ONIG_IS_OPTION_ON(options,option) ((options) & (option)) - -/* syntax */ -typedef struct { - unsigned int op; - unsigned int op2; - unsigned int behavior; - OnigOptionType options; /* default option */ - OnigMetaCharTableType meta_char_table; -} OnigSyntaxType; - -ONIG_EXTERN const OnigSyntaxType OnigSyntaxASIS; -ONIG_EXTERN const OnigSyntaxType OnigSyntaxPosixBasic; -ONIG_EXTERN const OnigSyntaxType OnigSyntaxPosixExtended; -ONIG_EXTERN const OnigSyntaxType OnigSyntaxEmacs; -ONIG_EXTERN const OnigSyntaxType OnigSyntaxGrep; -ONIG_EXTERN const OnigSyntaxType OnigSyntaxGnuRegex; -ONIG_EXTERN const OnigSyntaxType OnigSyntaxJava; -ONIG_EXTERN const OnigSyntaxType OnigSyntaxPerl; -ONIG_EXTERN const OnigSyntaxType OnigSyntaxPerl_NG; -ONIG_EXTERN const OnigSyntaxType OnigSyntaxRuby; - -/* predefined syntaxes (see regsyntax.c) */ -#define ONIG_SYNTAX_ASIS (&OnigSyntaxASIS) -#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic) -#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended) -#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs) -#define ONIG_SYNTAX_GREP (&OnigSyntaxGrep) -#define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex) -#define ONIG_SYNTAX_JAVA (&OnigSyntaxJava) -#define ONIG_SYNTAX_PERL (&OnigSyntaxPerl) -#define ONIG_SYNTAX_PERL_NG (&OnigSyntaxPerl_NG) -#define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby) - -/* default syntax */ -ONIG_EXTERN const OnigSyntaxType* OnigDefaultSyntax; -#define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax - -/* syntax (operators) */ -#define ONIG_SYN_OP_VARIABLE_META_CHARACTERS (1U<<0) -#define ONIG_SYN_OP_DOT_ANYCHAR (1U<<1) /* . */ -#define ONIG_SYN_OP_ASTERISK_ZERO_INF (1U<<2) /* * */ -#define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF (1U<<3) -#define ONIG_SYN_OP_PLUS_ONE_INF (1U<<4) /* + */ -#define ONIG_SYN_OP_ESC_PLUS_ONE_INF (1U<<5) -#define ONIG_SYN_OP_QMARK_ZERO_ONE (1U<<6) /* ? */ -#define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE (1U<<7) -#define ONIG_SYN_OP_BRACE_INTERVAL (1U<<8) /* {lower,upper} */ -#define ONIG_SYN_OP_ESC_BRACE_INTERVAL (1U<<9) /* \{lower,upper\} */ -#define ONIG_SYN_OP_VBAR_ALT (1U<<10) /* | */ -#define ONIG_SYN_OP_ESC_VBAR_ALT (1U<<11) /* \| */ -#define ONIG_SYN_OP_LPAREN_SUBEXP (1U<<12) /* (...) */ -#define ONIG_SYN_OP_ESC_LPAREN_SUBEXP (1U<<13) /* \(...\) */ -#define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR (1U<<14) /* \A, \Z, \z */ -#define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR (1U<<15) /* \G */ -#define ONIG_SYN_OP_DECIMAL_BACKREF (1U<<16) /* \num */ -#define ONIG_SYN_OP_BRACKET_CC (1U<<17) /* [...] */ -#define ONIG_SYN_OP_ESC_W_WORD (1U<<18) /* \w, \W */ -#define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END (1U<<19) /* \<. \> */ -#define ONIG_SYN_OP_ESC_B_WORD_BOUND (1U<<20) /* \b, \B */ -#define ONIG_SYN_OP_ESC_S_WHITE_SPACE (1U<<21) /* \s, \S */ -#define ONIG_SYN_OP_ESC_D_DIGIT (1U<<22) /* \d, \D */ -#define ONIG_SYN_OP_LINE_ANCHOR (1U<<23) /* ^, $ */ -#define ONIG_SYN_OP_POSIX_BRACKET (1U<<24) /* [:xxxx:] */ -#define ONIG_SYN_OP_QMARK_NON_GREEDY (1U<<25) /* ??,*?,+?,{n,m}? */ -#define ONIG_SYN_OP_ESC_CONTROL_CHARS (1U<<26) /* \n,\r,\t,\a ... */ -#define ONIG_SYN_OP_ESC_C_CONTROL (1U<<27) /* \cx */ -#define ONIG_SYN_OP_ESC_OCTAL3 (1U<<28) /* \OOO */ -#define ONIG_SYN_OP_ESC_X_HEX2 (1U<<29) /* \xHH */ -#define ONIG_SYN_OP_ESC_X_BRACE_HEX8 (1U<<30) /* \x{7HHHHHHH} */ - -#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE (1U<<0) /* \Q...\E */ -#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT (1U<<1) /* (?...) */ -#define ONIG_SYN_OP2_OPTION_PERL (1U<<2) /* (?imsx),(?-imsx) */ -#define ONIG_SYN_OP2_OPTION_RUBY (1U<<3) /* (?imx), (?-imx) */ -#define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT (1U<<4) /* ?+,*+,++ */ -#define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL (1U<<5) /* {n,m}+ */ -#define ONIG_SYN_OP2_CCLASS_SET_OP (1U<<6) /* [...&&..[..]..] */ -#define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP (1U<<7) /* (?<name>...) */ -#define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF (1U<<8) /* \k<name> */ -#define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL (1U<<9) /* \g<name>, \g<n> */ -#define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY (1U<<10) /* (?@..),(?@<x>..) */ -#define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL (1U<<11) /* \C-x */ -#define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META (1U<<12) /* \M-x */ -#define ONIG_SYN_OP2_ESC_V_VTAB (1U<<13) /* \v as VTAB */ -#define ONIG_SYN_OP2_ESC_U_HEX4 (1U<<14) /* \uHHHH */ -#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR (1U<<15) /* \`, \' */ -#define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY (1U<<16) /* \p{...}, \P{...} */ -#define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (1U<<17) /* \p{^..}, \P{^..} */ -/* #define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1U<<18) */ -#define ONIG_SYN_OP2_ESC_H_XDIGIT (1U<<19) /* \h, \H */ -#define ONIG_SYN_OP2_INEFFECTIVE_ESCAPE (1U<<20) /* \ */ - -/* syntax (behavior) */ -#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1U<<31) /* not implemented */ -#define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS (1U<<0) /* ?, *, +, {n,m} */ -#define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS (1U<<1) /* error or ignore */ -#define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP (1U<<2) /* ...)... */ -#define ONIG_SYN_ALLOW_INVALID_INTERVAL (1U<<3) /* {??? */ -#define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV (1U<<4) /* {,n} => {0,n} */ -#define ONIG_SYN_STRICT_CHECK_BACKREF (1U<<5) /* /(\1)/,/\1()/ ..*/ -#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1U<<6) /* (?<=a|bc) */ -#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (1U<<7) /* see doc/RE */ -#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1U<<8) /* (?<x>)(?<x>) */ -#define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY (1U<<9) /* a{n}?=(?:a{n})? */ - -/* syntax (behavior) in char class [...] */ -#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1U<<20) /* [^...] */ -#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (1U<<21) /* [..\w..] etc.. */ -#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1U<<22) -#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (1U<<23) /* [0-9-a]=[0-9\-a] */ -/* syntax (behavior) warning */ -#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1U<<24) /* [,-,] */ -#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1U<<25) /* (?:a*)+ */ -#define ONIG_SYN_WARN_CC_DUP (1U<<26) /* [aa] */ - -/* meta character specifiers (onig_set_meta_char()) */ -#define ONIG_META_CHAR_ESCAPE 0 -#define ONIG_META_CHAR_ANYCHAR 1 -#define ONIG_META_CHAR_ANYTIME 2 -#define ONIG_META_CHAR_ZERO_OR_ONE_TIME 3 -#define ONIG_META_CHAR_ONE_OR_MORE_TIME 4 -#define ONIG_META_CHAR_ANYCHAR_ANYTIME 5 - -#define ONIG_INEFFECTIVE_META_CHAR 0 - -/* error codes */ -#define ONIG_IS_PATTERN_ERROR(ecode) ((ecode) <= -100 && (ecode) > -1000) -/* normal return */ -#define ONIG_NORMAL 0 -#define ONIG_MISMATCH -1 -#define ONIG_NO_SUPPORT_CONFIG -2 - -/* internal error */ -#define ONIGERR_MEMORY -5 -#define ONIGERR_TYPE_BUG -6 -#define ONIGERR_PARSER_BUG -11 -#define ONIGERR_STACK_BUG -12 -#define ONIGERR_UNDEFINED_BYTECODE -13 -#define ONIGERR_UNEXPECTED_BYTECODE -14 -#define ONIGERR_MATCH_STACK_LIMIT_OVER -15 -#define ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED -21 -#define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22 -/* general error */ -#define ONIGERR_INVALID_ARGUMENT -30 -/* syntax error */ -#define ONIGERR_END_PATTERN_AT_LEFT_BRACE -100 -#define ONIGERR_END_PATTERN_AT_LEFT_BRACKET -101 -#define ONIGERR_EMPTY_CHAR_CLASS -102 -#define ONIGERR_PREMATURE_END_OF_CHAR_CLASS -103 -#define ONIGERR_END_PATTERN_AT_ESCAPE -104 -#define ONIGERR_END_PATTERN_AT_META -105 -#define ONIGERR_END_PATTERN_AT_CONTROL -106 -#define ONIGERR_META_CODE_SYNTAX -108 -#define ONIGERR_CONTROL_CODE_SYNTAX -109 -#define ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE -110 -#define ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE -111 -#define ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS -112 -#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED -113 -#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID -114 -#define ONIGERR_NESTED_REPEAT_OPERATOR -115 -#define ONIGERR_UNMATCHED_CLOSE_PARENTHESIS -116 -#define ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS -117 -#define ONIGERR_END_PATTERN_IN_GROUP -118 -#define ONIGERR_UNDEFINED_GROUP_OPTION -119 -#define ONIGERR_INVALID_POSIX_BRACKET_TYPE -121 -#define ONIGERR_INVALID_LOOK_BEHIND_PATTERN -122 -#define ONIGERR_INVALID_REPEAT_RANGE_PATTERN -123 -/* values error (syntax error) */ -#define ONIGERR_TOO_BIG_NUMBER -200 -#define ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE -201 -#define ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE -202 -#define ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS -203 -#define ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE -204 -#define ONIGERR_TOO_MANY_MULTI_BYTE_RANGES -205 -#define ONIGERR_TOO_SHORT_MULTI_BYTE_STRING -206 -#define ONIGERR_TOO_BIG_BACKREF_NUMBER -207 -#define ONIGERR_INVALID_BACKREF -208 -#define ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED -209 -#define ONIGERR_TOO_LONG_WIDE_CHAR_VALUE -212 -#define ONIGERR_EMPTY_GROUP_NAME -214 -#define ONIGERR_INVALID_GROUP_NAME -215 -#define ONIGERR_INVALID_CHAR_IN_GROUP_NAME -216 -#define ONIGERR_UNDEFINED_NAME_REFERENCE -217 -#define ONIGERR_UNDEFINED_GROUP_REFERENCE -218 -#define ONIGERR_MULTIPLEX_DEFINED_NAME -219 -#define ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL -220 -#define ONIGERR_NEVER_ENDING_RECURSION -221 -#define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY -222 -#define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223 -#define ONIGERR_INVALID_CODE_POINT_VALUE -400 -#define ONIGERR_INVALID_WIDE_CHAR_VALUE -400 -#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401 -#define ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION -402 -#define ONIGERR_INVALID_COMBINATION_OF_OPTIONS -403 - -/* errors related to thread */ -#define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001 - - -/* must be smaller than BIT_STATUS_BITS_NUM (unsigned int * 8) */ -#define ONIG_MAX_CAPTURE_HISTORY_GROUP 31 -#define ONIG_IS_CAPTURE_HISTORY_GROUP(r, i) \ - ((i) <= ONIG_MAX_CAPTURE_HISTORY_GROUP && (r)->list && (r)->list[i]) - -typedef struct OnigCaptureTreeNodeStruct { - int group; /* group number */ - int beg; - int end; - int allocated; - int num_childs; - struct OnigCaptureTreeNodeStruct** childs; -} OnigCaptureTreeNode; - -/* match result region type */ -struct re_registers { - int allocated; - int num_regs; - int* beg; - int* end; - /* extended */ - OnigCaptureTreeNode* history_root; /* capture history tree root */ -}; - -/* capture tree traverse */ -#define ONIG_TRAVERSE_CALLBACK_AT_FIRST 1 -#define ONIG_TRAVERSE_CALLBACK_AT_LAST 2 -#define ONIG_TRAVERSE_CALLBACK_AT_BOTH \ - ( ONIG_TRAVERSE_CALLBACK_AT_FIRST | ONIG_TRAVERSE_CALLBACK_AT_LAST ) - - -#define ONIG_REGION_NOTPOS -1 - -typedef struct re_registers OnigRegion; - -typedef struct { - OnigEncoding enc; - OnigUChar* par; - OnigUChar* par_end; -} OnigErrorInfo; - -typedef struct { - int lower; - int upper; -} OnigRepeatRange; - -typedef void (*OnigWarnFunc)(const char* s); -extern void onig_null_warn(const char* s); -#define ONIG_NULL_WARN onig_null_warn - -#define ONIG_CHAR_TABLE_SIZE 256 - -/* regex_t state */ -#define ONIG_STATE_NORMAL 0 -#define ONIG_STATE_SEARCHING 1 -#define ONIG_STATE_COMPILING -1 -#define ONIG_STATE_MODIFY -2 - -#define ONIG_STATE(reg) \ - ((reg)->state > 0 ? ONIG_STATE_SEARCHING : (reg)->state) - -typedef struct re_pattern_buffer { - /* common members of BBuf(bytes-buffer) */ - unsigned char* p; /* compiled pattern */ - unsigned int used; /* used space for p */ - unsigned int alloc; /* allocated space for p */ - - int state; /* normal, searching, compiling */ - int num_mem; /* used memory(...) num counted from 1 */ - int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */ - int num_null_check; /* OP_NULL_CHECK_START/END id counter */ - int num_comb_exp_check; /* combination explosion check */ - int num_call; /* number of subexp call */ - unsigned int capture_history; /* (?@...) flag (1-31) */ - unsigned int bt_mem_start; /* need backtrack flag */ - unsigned int bt_mem_end; /* need backtrack flag */ - int stack_pop_level; - int repeat_range_alloc; - OnigRepeatRange* repeat_range; - - OnigEncoding enc; - OnigOptionType options; - const OnigSyntaxType* syntax; - OnigCaseFoldType case_fold_flag; - void* name_table; - - /* optimization info (string search, char-map and anchors) */ - int optimize; /* optimize flag */ - int threshold_len; /* search str-length for apply optimize */ - int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */ - OnigDistance anchor_dmin; /* (SEMI_)END_BUF anchor distance */ - OnigDistance anchor_dmax; /* (SEMI_)END_BUF anchor distance */ - int sub_anchor; /* start-anchor for exact or map */ - unsigned char *exact; - unsigned char *exact_end; - unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */ - int *int_map; /* BM skip for exact_len > 255 */ - int *int_map_backward; /* BM skip for backward search */ - OnigDistance dmin; /* min-distance of exact or map */ - OnigDistance dmax; /* max-distance of exact or map */ - - /* regex_t link chain */ - struct re_pattern_buffer* chain; /* escape compile-conflict */ -} OnigRegexType; - -typedef OnigRegexType* OnigRegex; - -#ifndef ONIG_ESCAPE_REGEX_T_COLLISION - typedef OnigRegexType regex_t; -#endif - - -typedef struct { - int num_of_elements; - OnigEncoding pattern_enc; - OnigEncoding target_enc; - OnigSyntaxType* syntax; - OnigOptionType option; - OnigCaseFoldType case_fold_flag; -} OnigCompileInfo; - -/* Oniguruma Native API */ -ONIG_EXTERN -int onig_init(void); -ONIG_EXTERN -int onig_error_code_to_str(OnigUChar* s, int err_code, ...); -ONIG_EXTERN -void onig_set_warn_func(OnigWarnFunc f); -ONIG_EXTERN -void onig_set_verb_warn_func(OnigWarnFunc f); -ONIG_EXTERN -int onig_new(OnigRegex*, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax, OnigErrorInfo* einfo); -ONIG_EXTERN -int onig_reg_init(regex_t* reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, const OnigSyntaxType* syntax); -ONIG_EXTERN -int onig_new_without_alloc(OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo); -ONIG_EXTERN -int onig_new_deluxe(OnigRegex* reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo); -ONIG_EXTERN -void onig_free(OnigRegex); -ONIG_EXTERN -void onig_free_body(OnigRegex); -ONIG_EXTERN -int onig_recompile(OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo); -ONIG_EXTERN -int onig_recompile_deluxe(OnigRegex reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo); -ONIG_EXTERN -long onig_search(OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option); -ONIG_EXTERN -long onig_match(OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option); -ONIG_EXTERN -OnigRegion* onig_region_new(void); -ONIG_EXTERN -void onig_region_init(OnigRegion* region); -ONIG_EXTERN -void onig_region_free(OnigRegion* region, int free_self); -ONIG_EXTERN -void onig_region_copy(OnigRegion* to, OnigRegion* from); -ONIG_EXTERN -void onig_region_clear(OnigRegion* region); -ONIG_EXTERN -int onig_region_resize(OnigRegion* region, int n); -ONIG_EXTERN -int onig_region_set(OnigRegion* region, int at, int beg, int end); -ONIG_EXTERN -int onig_name_to_group_numbers(OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, int** nums); -ONIG_EXTERN -int onig_name_to_backref_number(OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, OnigRegion *region); -ONIG_EXTERN -int onig_foreach_name(OnigRegex reg, int (*func)(const OnigUChar*, const OnigUChar*,int,int*,OnigRegex,void*), void* arg); -ONIG_EXTERN -int onig_number_of_names(OnigRegex reg); -ONIG_EXTERN -int onig_number_of_captures(OnigRegex reg); -ONIG_EXTERN -int onig_number_of_capture_histories(OnigRegex reg); -ONIG_EXTERN -OnigCaptureTreeNode* onig_get_capture_tree(OnigRegion* region); -ONIG_EXTERN -int onig_capture_tree_traverse(OnigRegion* region, int at, int(*callback_func)(int,int,int,int,int,void*), void* arg); -ONIG_EXTERN -int onig_noname_group_capture_is_active(OnigRegex reg); -ONIG_EXTERN -OnigEncoding onig_get_encoding(OnigRegex reg); -ONIG_EXTERN -OnigOptionType onig_get_options(OnigRegex reg); -ONIG_EXTERN -OnigCaseFoldType onig_get_case_fold_flag(OnigRegex reg); -ONIG_EXTERN -const OnigSyntaxType* onig_get_syntax(OnigRegex reg); -ONIG_EXTERN -int onig_set_default_syntax(const OnigSyntaxType* syntax); -ONIG_EXTERN -void onig_copy_syntax(OnigSyntaxType* to, const OnigSyntaxType* from); -ONIG_EXTERN -unsigned int onig_get_syntax_op(OnigSyntaxType* syntax); -ONIG_EXTERN -unsigned int onig_get_syntax_op2(OnigSyntaxType* syntax); -ONIG_EXTERN -unsigned int onig_get_syntax_behavior(OnigSyntaxType* syntax); -ONIG_EXTERN -OnigOptionType onig_get_syntax_options(OnigSyntaxType* syntax); -ONIG_EXTERN -void onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op); -ONIG_EXTERN -void onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2); -ONIG_EXTERN -void onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior); -ONIG_EXTERN -void onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options); -ONIG_EXTERN -int onig_set_meta_char(OnigSyntaxType* syntax, unsigned int what, OnigCodePoint code); -ONIG_EXTERN -void onig_copy_encoding(OnigEncoding to, OnigEncoding from); -ONIG_EXTERN -OnigCaseFoldType onig_get_default_case_fold_flag(void); -ONIG_EXTERN -int onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag); -ONIG_EXTERN -unsigned int onig_get_match_stack_limit_size(void); -ONIG_EXTERN -int onig_set_match_stack_limit_size(unsigned int size); -ONIG_EXTERN -int onig_end(void); -ONIG_EXTERN -const char* onig_version(void); -ONIG_EXTERN -const char* onig_copyright(void); - -#ifdef __cplusplus -} -#endif - -#endif /* ONIGURUMA_H */ diff --git a/src/parse.y b/src/parse.y index 91fb1a8e8..518985ea6 100644 --- a/src/parse.y +++ b/src/parse.y @@ -708,6 +708,13 @@ new_dsym(parser_state *p, node *a) return cons((node*)NODE_DSYM, new_dstr(p, a)); } +// (:str . (s . len)) +static node* +new_regx(parser_state *p, const char *s, int len) +{ + return cons((node*)NODE_REGX, cons((node*)strndup(s, len), (node*)(intptr_t)len)); +} + // (:backref . n) static node* new_back_ref(parser_state *p, int n) @@ -743,13 +750,14 @@ call_bin_op(parser_state *p, node *recv, char *m, node *arg1) return new_call(p, recv, intern(m), list1(list1(arg1))); } +/* // (:match (a . b)) static node* match_op(parser_state *p, node *a, node *b) { return cons((node*)NODE_MATCH, cons((node*)a, (node*)b)); } - +*/ static void args_with_block(parser_state *p, node *a, node *b) @@ -1679,7 +1687,7 @@ arg : lhs '=' arg } | arg tMATCH arg { - $$ = match_op(p, $1, $3); + $$ = call_bin_op(p, $1, "=~", $3); #if 0 if (nd_type($1) == NODE_LIT && TYPE($1->nd_lit) == T_REGEXP) { $$ = reg_named_capture_assign($1->nd_lit, $$); @@ -2498,7 +2506,10 @@ string_interp : tSTRING_PART } ; -regexp : tREGEXP +regexp : tREGEXP_BEG tREGEXP + { + $$ = $2; + } ; symbol : basic_symbol @@ -3335,9 +3346,17 @@ read_escape(parser_state *p) return c; case 'b': /* backspace */ + if (p->regexp) { + tokadd(p, '\\'); + return 'b'; + } return '\010'; case 's': /* space */ + if (p->regexp) { + tokadd(p, '\\'); + return 's'; + } return ' '; case 'M': @@ -3375,17 +3394,39 @@ read_escape(parser_state *p) return '\0'; default: + if (p->regexp) { + tokadd(p, '\\'); + } return c; } } +static void +regx_options(parser_state *p) +{ + int c; + + newtok(p); + while (c = nextc(p), ISALPHA(c)) { + tokadd(p, c); + } + + pushback(p, c); + if (toklen(p)) { + char msg[128]; + tokfix(p); + snprintf(msg, sizeof(msg), "unknown regexp option %s - %s", + toklen(p) > 1 ? "s" : "", tok(p)); + yyerror(p, msg); + } +} + static int parse_string(parser_state *p, int term) { int c; newtok(p); - while ((c = nextc(p)) != term) { if (c == -1) { yyerror(p, "unterminated string meets end of file"); @@ -3422,6 +3463,15 @@ parse_string(parser_state *p, int term) tokfix(p); p->lstate = EXPR_END; p->sterm = 0; + + if (p->regexp) { + //regx_options(p); + yylval.nd = new_regx(p, tok(p), toklen(p)); + p->regexp = 0; + + return tREGEXP; + } + yylval.nd = new_str(p, tok(p), toklen(p)); return tSTRING; } @@ -4186,6 +4236,8 @@ parser_yylex(parser_state *p) #if 0 p->lex_strterm = new_strterm(p, str_regexp, '/', 0); #endif + p->regexp = 1; + p->sterm = '/'; return tREGEXP_BEG; } if ((c = nextc(p)) == '=') { @@ -4199,6 +4251,8 @@ parser_yylex(parser_state *p) #if 0 p->lex_strterm = new_strterm(p, str_regexp, '/', 0); #endif + p->regexp = 1; + p->sterm = '/'; return tREGEXP_BEG; } if (p->lstate == EXPR_FNAME || p->lstate == EXPR_DOT) { @@ -4381,6 +4435,8 @@ parser_yylex(parser_state *p) #if 0 p->lex_strterm = new_strterm(p, str_regexp, term, paren); #endif + p->regexp = 1; + p->sterm = '/'; return tREGEXP_BEG; case 's': @@ -5389,6 +5445,16 @@ parser_dump(mrb_state *mrb, node *tree, int offset) printf("NODE_CONST %s\n", mrb_sym2name(mrb, sym(tree))); break; + case NODE_MATCH: + printf("NODE_MATCH:\n"); + dump_prefix(offset + 1); + printf("lhs:\n"); + parser_dump(mrb, tree->car, offset + 2); + dump_prefix(offset + 1); + printf("rhs:\n"); + parser_dump(mrb, tree->cdr, offset + 2); + break; + case NODE_BACK_REF: printf("NODE_BACK_REF: $%c\n", (int)(intptr_t)tree); break; @@ -5428,6 +5494,10 @@ parser_dump(mrb_state *mrb, node *tree, int offset) dump_recur(mrb, tree, offset+1); break; + case NODE_REGX: + printf("NODE_REGX /%s/\n", (char*)tree->car->cdr->car); + break; + case NODE_SYM: printf("NODE_SYM :%s\n", mrb_sym2name(mrb, sym(tree))); break; @@ -7,1971 +7,10 @@ #include "mruby.h" #include <string.h> #include "mruby/string.h" -#include "encoding.h" #include "re.h" #include "mruby/array.h" -#include "regint.h" #include "mruby/class.h" #include "error.h" -#ifdef ENABLE_REGEXP - -#define REGEX_CLASS (mrb_class_obj_get(mrb, "Regexp")) -#define MATCH_CLASS (mrb_class_obj_get(mrb, "MatchData")) - -//from opcode.h -#define GETARG_A(i) ((((mrb_code)(i)) >> 24) & 0xff) -#define GETARG_B(i) ((((mrb_code)(i)) >> 16) & 0xff) -#define GETARG_C(i) ((((mrb_code)(i)) >> 8) & 0xff) -#define MKARG_A(c) (((c) & 0xff) << 24) -#define MKARG_B(c) (((c) & 0xff) << 16) -#define MKARG_C(c) (((c) & 0xff) << 8) - -#define ARG_REG_OPTION_MASK \ - (ONIG_OPTION_IGNORECASE|ONIG_OPTION_MULTILINE|ONIG_OPTION_EXTEND) -#define ARG_ENCODING_FIXED 16 -#define ARG_ENCODING_NONE 32 -#define REG_LITERAL FL_USER5 -#define REG_ENCODING_NONE FL_USER6 -typedef char onig_errmsg_buffer[ONIG_MAX_ERROR_MESSAGE_LEN]; -#define mrb_bug printf -#define KCODE_FIXED FL_USER4 -#define scan_oct(s,l,e) (int)ruby_scan_oct(s,l,e) -unsigned long ruby_scan_oct(const char*, size_t, size_t*); -#define scan_hex(s,l,e) (int)ruby_scan_hex(s,l,e) -unsigned long ruby_scan_hex(const char*, size_t, size_t*); - -static mrb_value mrb_match_to_a(mrb_state *mrb, mrb_value match); -static mrb_value mrb_reg_preprocess(mrb_state *mrb, const char *p, const char *end, onig_errmsg_buffer err); -static void mrb_reg_expr_str(mrb_state *mrb, mrb_value str, const char *s, long len); -static char * option_to_str(char str[4], int options); - -//static int may_need_recompile; -//static int reg_kcode = DEFAULT_KCODE; -/* ------------------------------------------------------------------------- */ -/* RegExp Class */ -/* ------------------------------------------------------------------------- */ -/* 15.2.15.6.1 */ -/* - * call-seq: - * class.new(args, ...) -> obj - * - * Calls <code>allocate</code> to create a new object of - * <i>class</i>'s class, then invokes that object's - * <code>initialize</code> method, passing it <i>args</i>. - * This is the method that ends up getting called whenever - * an object is constructed using .new. - * - */ -mrb_value -mrb_reg_s_new_instance(mrb_state *mrb, /*int argc, mrb_value *argv, */mrb_value self) -{ - mrb_value argv[16]; - int argc; - struct RRegexp *re; - - mrb_get_args(mrb, "*", &argv, &argc); - re = (struct RRegexp*)mrb_obj_alloc(mrb, MRB_TT_REGEX, REGEX_CLASS); - re->ptr = 0; - re->src = 0; - re->usecnt = 0; - return mrb_funcall_argv(mrb, mrb_obj_value(re), mrb->init_sym, argc, argv); -} - -mrb_value -mrb_reg_quote(mrb_state *mrb, mrb_value str) -{ - char *s, *send, *t; - mrb_value tmp; - int c; - - s = RSTRING_PTR(str); - send = s + RSTRING_LEN(str); - while (s < send) { - c = *s; - if (c == -1) { - s += send - s; - continue; - } - switch (c) { - case '[': case ']': case '{': case '}': - case '(': case ')': case '|': case '-': - case '*': case '.': case '\\': - case '?': case '+': case '^': case '$': - case ' ': case '#': - case '\t': case '\f': case '\n': case '\r': - goto meta_found; - } - s++; - } - tmp = mrb_str_new(mrb, RSTRING_PTR(str), RSTRING_LEN(str)); - return tmp; - -meta_found: - tmp = mrb_str_new(mrb, 0, RSTRING_LEN(str)*2); - t = RSTRING_PTR(tmp); - /* copy upto metacharacter */ - memcpy(t, RSTRING_PTR(str), s - RSTRING_PTR(str)); - t += s - RSTRING_PTR(str); - - while (s < send) { - c = *s; - if (c == -1) { - int n = send - s; - - while (n--) - *t++ = *s++; - continue; - } - s++; - switch (c) { - case '[': case ']': case '{': case '}': - case '(': case ')': case '|': case '-': - case '*': case '.': case '\\': - case '?': case '+': case '^': case '$': - case '#': - t += mrb_enc_mbcput('\\', t, enc); - break; - case ' ': - t += mrb_enc_mbcput('\\', t, enc); - t += mrb_enc_mbcput(' ', t, enc); - continue; - case '\t': - t += mrb_enc_mbcput('\\', t, enc); - t += mrb_enc_mbcput('t', t, enc); - continue; - case '\n': - t += mrb_enc_mbcput('\\', t, enc); - t += mrb_enc_mbcput('n', t, enc); - continue; - case '\r': - t += mrb_enc_mbcput('\\', t, enc); - t += mrb_enc_mbcput('r', t, enc); - continue; - case '\f': - t += mrb_enc_mbcput('\\', t, enc); - t += mrb_enc_mbcput('f', t, enc); - continue; - case '\v': - t += mrb_enc_mbcput('\\', t, enc); - t += mrb_enc_mbcput('v', t, enc); - continue; - } - t += mrb_enc_mbcput(c, t, enc); - } - mrb_str_resize(mrb, tmp, t - RSTRING_PTR(tmp)); - - return tmp; -} - -static mrb_value -reg_operand(mrb_state *mrb, mrb_value s, int check) -{ - if (mrb_type(s) == MRB_TT_SYMBOL) { - //return mrb_sym_to_s(s); - return mrb_obj_inspect(mrb, s); - } - else { - mrb_value tmp = mrb_check_string_type(mrb, s); - if (check && mrb_nil_p(tmp)) { - mrb_raise(mrb, E_TYPE_ERROR, "can't convert %s to String", - mrb_obj_classname(mrb, s)); - } - return tmp; - } -} -/* 15.2.15.6.2 */ -/* 15.2.15.6.4 */ -/* - * call-seq: - * Regexp.escape(str) -> string - * Regexp.quote(str) -> string - * - * Escapes any characters that would have special meaning in a regular - * expression. Returns a new escaped string, or self if no characters are - * escaped. For any string, - * <code>Regexp.new(Regexp.escape(<i>str</i>))=~<i>str</i></code> will be true. - * - * Regexp.escape('\*?{}.') #=> \\\*\?\{\}\. - * - */ - -static mrb_value -mrb_reg_s_quote(mrb_state *mrb, mrb_value c/*, mrb_value str*/) -{ - mrb_value str; - - mrb_get_args(mrb, "o", &str); - return mrb_reg_quote(mrb, reg_operand(mrb, str, 1/*TRUE*/)); -} - -static void -match_check(mrb_state *mrb, mrb_value match) -{ - struct RMatch *m = mrb_match_ptr(match); - if (!m->str) { - mrb_raise(mrb, E_TYPE_ERROR, "uninitialized Match"); - } -} - -mrb_value -mrb_reg_nth_match(mrb_state *mrb, mrb_int nth, mrb_value match) -{ - mrb_value str; - long start, end, len; - struct RMatch *m = mrb_match_ptr(match); - - if (mrb_nil_p(match)) return mrb_nil_value(); - match_check(mrb, match); - if (nth >= m->rmatch->regs.num_regs) { - return mrb_nil_value(); - } - if (nth < 0) { - nth += m->rmatch->regs.num_regs; - if (nth <= 0) return mrb_nil_value(); - } - start = m->rmatch->regs.beg[nth]; - if (start == -1) return mrb_nil_value(); - end = m->rmatch->regs.end[nth]; - len = end - start; - str = mrb_str_subseq(mrb, mrb_obj_value(m->str), start, len); - - return str; -} - -mrb_value -mrb_reg_last_match(mrb_state *mrb, mrb_value match) -{ - return mrb_reg_nth_match(mrb, 0, match); -} - - -static int -match_backref_number(mrb_state *mrb, mrb_value match, mrb_value backref) -{ - const char *name; - int num; - - struct re_registers *regs = RMATCH_REGS(match); - struct RRegexp *regexp = RMATCH(match)->regexp; - - match_check(mrb, match); - switch(mrb_type(backref)) { - default: - return mrb_fixnum(backref); - - case MRB_TT_SYMBOL: - name = mrb_sym2name(mrb, mrb_symbol(backref)); - break; - - case MRB_TT_STRING: - //name = StringValueCStr(backref); - name = mrb_string_value_cstr(mrb, &backref); - break; - } - num = onig_name_to_backref_number(regexp->ptr, - (const unsigned char*)name, - (const unsigned char*)name + strlen(name), - regs); - if (num < 1) { - mrb_raise(mrb, E_INDEX_ERROR, "undefined group name reference: %s", name); - } - - return num; -} -/* 15.2.15.6.3 */ -/* - * call-seq: - * Regexp.last_match -> matchdata - * Regexp.last_match(n) -> str - * - * The first form returns the <code>MatchData</code> object generated by the - * last successful pattern match. Equivalent to reading the global variable - * <code>$~</code>. The second form returns the <i>n</i>th field in this - * <code>MatchData</code> object. - * <em>n</em> can be a string or symbol to reference a named capture. - * - * Note that the <code>last_match</code> is local to the thread and method scope - * of the method that did the pattern match. - * - * /c(.)t/ =~ 'cat' #=> 0 - * Regexp.last_match #=> #<MatchData "cat" 1:"a"> - * Regexp.last_match(0) #=> "cat" - * Regexp.last_match(1) #=> "a" - * Regexp.last_match(2) #=> nil - * - * /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ "var = val" - * Regexp.last_match #=> #<MatchData "var = val" lhs:"var" rhs:"val"> - * Regexp.last_match(:lhs) #=> "var" - * Regexp.last_match(:rhs) #=> "val" - */ -static mrb_value -mrb_reg_s_last_match(mrb_state *mrb, mrb_value self/*int argc, mrb_value *argv*/) -{ - //mrb_value nth; - mrb_value argv[16]; - int argc; - mrb_value match = mrb_backref_get(mrb); - - //if (argc > 0 && mrb_scan_args(argc, argv, "01", &nth) == 1) { - mrb_get_args(mrb, "*", &argv, &argc); - if (argc != 0) { - int n; - if (mrb_nil_p(match)) return mrb_nil_value(); - n = match_backref_number(mrb, match, argv[0]); - return mrb_reg_nth_match(mrb, n, match); - } - return match;//match_getter(); -} - -static void -mrb_reg_check(mrb_state *mrb, mrb_value re) -{ - //struct RRegexp *r = mrb_regex_ptr(re); - - //if (!(RREGEXP(re)->ptr) || !RREGEXP_SRC(re) || !RREGEXP_SRC_PTR(re)) { - if (!(RREGEXP(re)->ptr)) { - mrb_raise(mrb, E_TYPE_ERROR, "uninitialized Regexp"); - } - if (RREGEXP(re)->src == 0) { - mrb_raise(mrb, E_TYPE_ERROR, "uninitialized Regexp"); - } -} - -int -mrb_reg_options(mrb_state *mrb, mrb_value re) -{ - int options; - - mrb_reg_check(mrb, re); - options = RREGEXP(re)->ptr->options & ARG_REG_OPTION_MASK; - if (mrb_basic(re)->flags & KCODE_FIXED) options |= ARG_ENCODING_FIXED; - if (mrb_basic(re)->flags & REG_ENCODING_NONE) options |= ARG_ENCODING_NONE; - return options; -} - -static mrb_value -mrb_reg_desc(mrb_state *mrb, const char *s, long len, mrb_value re) -{ - mrb_value str = mrb_str_new(mrb, "/", 1); - - mrb_reg_expr_str(mrb, str, s, len); - mrb_str_buf_cat(mrb, str, "/", 1); - if (re.tt) { - char opts[4]; - mrb_reg_check(mrb, re); - if (*option_to_str(opts, RREGEXP(re)->ptr->options)) - mrb_str_buf_cat(mrb, str, opts, strlen(opts));//mrb_str_buf_cat2(str, opts); - if (mrb_basic(re)->flags & REG_ENCODING_NONE) - mrb_str_buf_cat(mrb, str, "n", 1); - } - - return str; -} -static void -mrb_reg_raise(mrb_state *mrb, const char *s, long len, const char *err, mrb_value re) -{ - mrb_value desc = mrb_reg_desc(mrb, s, len, re); - - mrb_raise(mrb, E_REGEXP_ERROR, "%s: %s", err, RSTRING_PTR(desc)); -} - -regex_t * -mrb_reg_prepare_re(mrb_state *mrb, mrb_value re, mrb_value str) -{ - regex_t *reg = RREGEXP(re)->ptr; - onig_errmsg_buffer err = ""; - int r; - OnigErrorInfo einfo; - const char *pattern; - mrb_value unescaped; - mrb_encoding *enc = mrb_ascii8bit_encoding(mrb); - - mrb_reg_check(mrb, re); - reg = RREGEXP(re)->ptr; - pattern = RREGEXP_SRC_PTR(re); - - unescaped = mrb_reg_preprocess(mrb, - pattern, pattern + RREGEXP(re)->src->len, err); - - if (mrb_nil_p(unescaped)) { - mrb_raise(mrb, E_ARGUMENT_ERROR, "regexp preprocess failed: %s", err); - } - - r = onig_new(®, (UChar* )RSTRING_PTR(unescaped), - (UChar* )(RSTRING_PTR(unescaped) + RSTRING_LEN(unescaped)), - reg->options, enc, - OnigDefaultSyntax, &einfo); - if (r) { - onig_error_code_to_str((UChar*)err, r, &einfo); - mrb_reg_raise(mrb, pattern, RREGEXP_SRC_LEN(re), err, re); - } - - //RB_GC_GUARD(unescaped); - return reg; -} - - -mrb_int -mrb_reg_search(mrb_state *mrb, mrb_value re, mrb_value str, mrb_int pos, mrb_int reverse) -{ - long result; - mrb_value match; - struct re_registers regi, *regs = ®i; - char *range = RSTRING_PTR(str); - regex_t *reg; - int tmpreg; - - if (pos > RSTRING_LEN(str) || pos < 0) { - mrb_backref_set(mrb, mrb_nil_value()); - return -1; - } - - reg = mrb_reg_prepare_re(mrb, re, str); - tmpreg = reg != RREGEXP(re)->ptr; - if (!tmpreg) RREGEXP(re)->usecnt++; - - match = mrb_backref_get(mrb); - if (!mrb_nil_p(match)) { - /*if (FL_TEST(match, MATCH_BUSY)) { - match = Qnil; - } - else { - regs = RMATCH_REGS(match); - }*/ - regs = RMATCH_REGS(match); - } - if (mrb_nil_p(match)) { - memset(regs, 0, sizeof(struct re_registers)); - } -//--> - if (!reverse) { - range += RSTRING_LEN(str); - } - result = onig_search(reg, - (UChar*)(RSTRING_PTR(str)), - ((UChar*)(RSTRING_PTR(str)) + RSTRING_LEN(str)), - ((UChar*)(RSTRING_PTR(str)) + pos), - ((UChar*)range), - regs, ONIG_OPTION_NONE); - if (!tmpreg) RREGEXP(re)->usecnt--; - if (tmpreg) { - if (RREGEXP(re)->usecnt) { - onig_free(reg); - } - else { - onig_free(RREGEXP(re)->ptr); - RREGEXP(re)->ptr = reg; - } - } - if (result < 0) { - if (regs == ®i) - onig_region_free(regs, 0); - if (result == ONIG_MISMATCH) { - mrb_backref_set(mrb, mrb_nil_value()); - return result; - } - else { - onig_errmsg_buffer err = ""; - onig_error_code_to_str((UChar*)err, (int)result); - mrb_reg_raise(mrb, RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), err, re); - } - } -//--< - if (mrb_nil_p(match) ) { - match = match_alloc(mrb); - onig_region_copy(RMATCH_REGS(match), regs); - onig_region_free(regs, 0); - } - - RMATCH(match)->str = mrb_str_ptr(str); - RMATCH(match)->regexp = mrb_regex_ptr(re); - RMATCH(match)->rmatch->char_offset_updated = 0; - mrb_backref_set(mrb, match); - - return result; -} - -mrb_int -mrb_reg_adjust_startpos(mrb_state *mrb, mrb_value re, mrb_value str, mrb_int pos, mrb_int reverse) -{ - mrb_int range; - struct RString *s = mrb_str_ptr(str); - struct RRegexp *r = mrb_regex_ptr(re); - - mrb_reg_check(mrb, re); - /*if (may_need_recompile) mrb_reg_prepare_re(re);*/ - - /* if (FL_TEST(re, KCODE_FIXED)) - mrb_kcode_set_option(re); - else if (reg_kcode != curr_kcode) - mrb_kcode_reset_option(); */ - - if (reverse) { - range = -pos; - } - else { - range = s->len - pos; - } - return re_adjust_startpos(r->ptr, - s->buf, s->len, - pos, range); -} - -static int -onig_new_with_source(regex_t** reg, const UChar* pattern, const UChar* pattern_end, - OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax, - OnigErrorInfo* einfo, const char *sourcefile, int sourceline) -{ - int r; - - *reg = (regex_t* )malloc/*xmalloc*/(sizeof(regex_t)); - if ((void*)(*reg) == (void*)0) return ONIGERR_MEMORY; - - r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax); - if (r) goto err; - r = onig_compile(*reg, pattern, pattern_end, einfo, sourcefile, sourceline); - if (r) { - err: - onig_free(*reg); - *reg = 0/*NULL*/; - } - return r; -} - -static Regexp* -make_regexp(const char *s, long len, mrb_encoding *enc, int flags, onig_errmsg_buffer err, - const char *sourcefile, int sourceline) -{ - Regexp *rp; - int r; - OnigErrorInfo einfo; - - /* Handle escaped characters first. */ - - /* Build a copy of the string (in dest) with the - escaped characters translated, and generate the regex - from that. - */ - - r = onig_new_with_source(&rp, (UChar*)s, (UChar*)(s + len), flags, - enc, OnigDefaultSyntax, &einfo, sourcefile, sourceline); - if (r) { - onig_error_code_to_str((UChar*)err, r, &einfo); - return 0; - } - return rp; -} - -unsigned long -ruby_scan_hex(const char *start, size_t len, size_t *retlen) -{ - static const char hexdigit[] = "0123456789abcdef0123456789ABCDEF"; - register const char *s = start; - register unsigned long retval = 0; - const char *tmp; - - while (len-- && *s && (tmp = strchr(hexdigit, *s))) { - retval <<= 4; - retval |= (tmp - hexdigit) & 15; - s++; - } - *retlen = (int)(s - start); /* less than len */ - return retval; -} - -#define BYTEWIDTH 8 - -int -mrb_uv_to_utf8(mrb_state *mrb, char buf[6], unsigned long uv) -{ - if (uv <= 0x7f) { - buf[0] = (char)uv; - return 1; - } - if (uv <= 0x7ff) { - buf[0] = (char)((uv>>6)&0xff)|0xc0; - buf[1] = (char)(uv&0x3f)|0x80; - return 2; - } - if (uv <= 0xffff) { - buf[0] = (char)((uv>>12)&0xff)|0xe0; - buf[1] = (char)((uv>>6)&0x3f)|0x80; - buf[2] = (char)(uv&0x3f)|0x80; - return 3; - } - if (uv <= 0x1fffff) { - buf[0] = (char)((uv>>18)&0xff)|0xf0; - buf[1] = (char)((uv>>12)&0x3f)|0x80; - buf[2] = (char)((uv>>6)&0x3f)|0x80; - buf[3] = (char)(uv&0x3f)|0x80; - return 4; - } - if (uv <= 0x3ffffff) { - buf[0] = (char)((uv>>24)&0xff)|0xf8; - buf[1] = (char)((uv>>18)&0x3f)|0x80; - buf[2] = (char)((uv>>12)&0x3f)|0x80; - buf[3] = (char)((uv>>6)&0x3f)|0x80; - buf[4] = (char)(uv&0x3f)|0x80; - return 5; - } - if (uv <= 0x7fffffff) { - buf[0] = (char)((uv>>30)&0xff)|0xfc; - buf[1] = (char)((uv>>24)&0x3f)|0x80; - buf[2] = (char)((uv>>18)&0x3f)|0x80; - buf[3] = (char)((uv>>12)&0x3f)|0x80; - buf[4] = (char)((uv>>6)&0x3f)|0x80; - buf[5] = (char)(uv&0x3f)|0x80; - return 6; - } - mrb_raise(mrb, E_RANGE_ERROR, "pack(U): value out of range"); - return 0; -} - -unsigned long -ruby_scan_oct(const char *start, size_t len, size_t *retlen) -{ - register const char *s = start; - register unsigned long retval = 0; - - while (len-- && *s >= '0' && *s <= '7') { - retval <<= 3; - retval |= *s++ - '0'; - } - *retlen = (int)(s - start); /* less than len */ - return retval; -} - -static mrb_value -mrb_reg_preprocess(mrb_state *mrb, const char *p, const char *end, onig_errmsg_buffer err) -{ - return mrb_nil_value(); -} - -static int -mrb_reg_initialize(mrb_state *mrb, mrb_value obj, const char *s, long len, - int options, onig_errmsg_buffer err, - const char *sourcefile, int sourceline) -{ - struct RRegexp *re = RREGEXP(obj); - mrb_value unescaped; - mrb_encoding *enc = mrb_ascii8bit_encoding(mrb); - if (re->ptr) - mrb_raise(mrb, E_TYPE_ERROR, "already initialized regexp"); - re->ptr = 0; - - unescaped = mrb_reg_preprocess(mrb, s, s+len, err); - if (mrb_nil_p(unescaped)) - return -1; - - if ((options & ARG_ENCODING_FIXED)) { - //re->basic.flags |= KCODE_FIXED; - re->flags|= KCODE_FIXED; - } - if (options & ARG_ENCODING_NONE) { - re->flags |= REG_ENCODING_NONE; - } - - re->ptr = make_regexp(RSTRING_PTR(unescaped), RSTRING_LEN(unescaped), enc, - options & ARG_REG_OPTION_MASK, err, - sourcefile, sourceline); - if (!re->ptr) return -1; - re->src = mrb_str_ptr(mrb_str_new(mrb, s, len)); - - return 0; -} - -static int -mrb_reg_initialize_str(mrb_state *mrb, mrb_value obj, mrb_value str, int options, onig_errmsg_buffer err, - const char *sourcefile, int sourceline) -{ - int ret; - -#if 0 - if (options & ARG_ENCODING_NONE) { - mrb_encoding *ascii8bit = mrb_ascii8bit_encoding(mrb); - if (enc != ascii8bit) { - if (mrb_enc_str_coderange(mrb, str) != ENC_CODERANGE_7BIT) { - //errcpy(err, "/.../n has a non escaped non ASCII character in non ASCII-8BIT script"); - printf("/.../n has a non escaped non ASCII character in non ASCII-8BIT script"); - return -1; - } - enc = ascii8bit; - } - } -#endif - - ret = mrb_reg_initialize(mrb, obj, RSTRING_PTR(str), RSTRING_LEN(str), - options, err, sourcefile, sourceline); - - return ret; -} - -/* 15.2.15.7.1 */ -/* - * call-seq: - * Regexp.initialize(string, [options [, lang]]) -> regexp - * Regexp.initialize(regexp) -> regexp - * - * Constructs a new regular expression from <i>pattern</i>, which can be either - * a <code>String</code> or a <code>Regexp</code> (in which case that regexp's - * options are propagated, and new options may not be specified (a change as of - * Ruby 1.8). If <i>options</i> is a <code>Fixnum</code>, it should be one or - * more of the constants <code>Regexp::EXTENDED</code>, - * <code>Regexp::IGNORECASE</code>, and <code>Regexp::MULTILINE</code>, - * <em>or</em>-ed together. Otherwise, if <i>options</i> is not - * <code>nil</code>, the regexp will be case insensitive. - * When the <i>lang</i> parameter is `n' or `N' sets the regexp no encoding. - * - * r1 = Regexp.initialize('^a-z+:\\s+\w+') #=> /^a-z+:\s+\w+/ - * r2 = Regexp.initialize('cat', true) #=> /cat/i - * r3 = Regexp.initialize('dog', Regexp::EXTENDED) #=> /dog/x - * r4 = Regexp.initialize(r2) #=> /cat/i - */ - -static mrb_value -mrb_reg_initialize_m(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value self) -{ - mrb_value argv[16]; - int argc; - onig_errmsg_buffer err = ""; - int flags = 0; - mrb_value str; - const char *ptr; - long len; - - mrb_get_args(mrb, "*", &argv, &argc); - if (argc == 0 || argc > 3) { - mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%d for 1..3)", argc); - } - if (mrb_type(argv[0]) == MRB_TT_REGEX) { - mrb_value re = argv[0]; - - if (argc > 1) { - /* mrb_warn("flags ignored"); */ - printf("flags ignored"); - } - mrb_reg_check(mrb, re); - flags = mrb_reg_options(mrb, re); - ptr = RREGEXP_SRC_PTR(re); - len = RREGEXP_SRC_LEN(re); - if (mrb_reg_initialize(mrb, self, ptr, len, flags, err, NULL, 0)) { - printf("mrb_reg_raise_str(str, flags, err);"); - } - } - else { - if (argc >= 2) { - if (mrb_type(argv[1]) == MRB_TT_FIXNUM) flags = mrb_fixnum(argv[1]); - else if (mrb_test(argv[1])) flags = ONIG_OPTION_IGNORECASE; - } - if (argc == 3 && !mrb_nil_p(argv[2])) { - //char *kcode = StringValuePtr(argv[2]); - char *kcode = mrb_string_value_ptr(mrb, argv[2]); - if (kcode[0] == 'n' || kcode[0] == 'N') { - flags |= ARG_ENCODING_NONE; - } - else { - /*mrb_warn("encoding option is ignored - %s", kcode); */ - printf("mrb_warn:encoding option is ignored - %s", kcode); - } - } - str = argv[0]; - //ptr = StringValuePtr(str); - ptr = mrb_string_value_ptr(mrb, str); - if (mrb_reg_initialize_str(mrb, self, str, flags, err, NULL, 0)) { - //mrb_reg_raise_str(str, flags, err); - } - } - return self; -} - -/* 15.2.15.7.2 */ -/* :nodoc: */ -static mrb_value -mrb_reg_init_copy(mrb_state *mrb, mrb_value re/*, mrb_value copy*/) -{ - mrb_value argv[16]; - int argc; - onig_errmsg_buffer err = ""; - const char *s; - long len; - mrb_value copy; - - mrb_get_args(mrb, "*", &argv, &argc); - copy = argv[0]; - if (mrb_obj_equal(mrb, copy, re)) return copy; - /*mrb_check_frozen(copy);*/ - /* need better argument type check */ - if (!mrb_obj_is_instance_of(mrb, re, mrb_obj_class(mrb, copy))) { - mrb_raise(mrb, E_TYPE_ERROR, "wrong argument type"); - } - mrb_reg_check(mrb, copy); - s = RREGEXP_SRC_PTR(copy); - len = RREGEXP_SRC_LEN(copy); - if (mrb_reg_initialize(mrb, re, s, len, mrb_reg_options(mrb, copy), - err, 0/*NULL*/, 0) != 0) { - mrb_reg_raise(mrb, s, len, err, re); - } - return re; -} - -static int -reg_equal(mrb_state *mrb, struct RRegexp *re1, struct RRegexp *re2) -{ - if (re1->ptr->options != re2->ptr->options) return FALSE; - if (!mrb_equal(mrb, mrb_obj_value(re1->src), mrb_obj_value(re2->src))) - return FALSE; - return TRUE; -} - -static int -mrb_reg_equal(mrb_state *mrb, mrb_value re1, mrb_value re2) -{ - if (mrb_obj_equal(mrb, re1, re2)) return TRUE; - - if (mrb_type(re2) != MRB_TT_REGEX) return FALSE; - mrb_reg_check(mrb, re1); - mrb_reg_check(mrb, re2); - return reg_equal(mrb, RREGEXP(re1), RREGEXP(re2)); -} - -/* 15.2.15.7.3 */ -/* - * call-seq: - * rxp == other_rxp -> true or false - * rxp.eql?(other_rxp) -> true or false - * - * Equality---Two regexps are equal if their patterns are identical, they have - * the same character set code, and their <code>casefold?</code> values are the - * same. - * - * /abc/ == /abc/x #=> false - * /abc/ == /abc/i #=> false - * /abc/ == /abc/n #=> false - * /abc/u == /abc/n #=> false - */ - -static mrb_value -mrb_reg_equal_m(mrb_state *mrb, mrb_value re1/*, mrb_value re2*/) -{ - mrb_value re2; - - mrb_get_args(mrb, "o", &re2); - if (mrb_reg_equal(mrb, re1, re2)) - return mrb_true_value(); - return mrb_false_value(); -} - -/* 15.2.15.7.4 */ -/* - * call-seq: - * rxp === str -> true or false - * - * Case Equality---Synonym for <code>Regexp#=~</code> used in case statements. - * - * a = "HELLO" - * case a - * when /^[a-z]*$/; print "Lower case\n" - * when /^[A-Z]*$/; print "Upper case\n" - * else; print "Mixed case\n" - * end - * - * <em>produces:</em> - * - * Upper case - */ - -mrb_value -mrb_reg_eqq(mrb_state *mrb, mrb_value re/*, mrb_value str*/) -{ - long start; - mrb_value str; - - mrb_get_args(mrb, "o", &str); - str = reg_operand(mrb, str, 0/*FALSE*/); - if (mrb_nil_p(str)) { - mrb_backref_set(mrb, mrb_nil_value()); - return mrb_false_value(); - } - start = mrb_reg_search(mrb, re, str, 0, 0); - if (start < 0) { - return mrb_false_value(); - } - return mrb_true_value(); -} - -static long -reg_match_pos(mrb_state *mrb, mrb_value re, mrb_value *strp, long pos) -{ - mrb_value str = *strp; - - if (mrb_nil_p(str)) { - mrb_backref_set(mrb, mrb_nil_value()); - return -1; - } - *strp = str = reg_operand(mrb, str, 1/*TRUE*/); - if (pos != 0) { - if (pos < 0) { - mrb_value l = mrb_str_size(mrb, str); - pos += mrb_fixnum(l); - if (pos < 0) { - return pos; - } - } - pos = mrb_str_offset(mrb, str, pos); - } - return mrb_reg_search(mrb, re, str, pos, 0); -} - -mrb_value -mrb_reg_match_str(mrb_state *mrb, mrb_value re, mrb_value str) -{ - mrb_int pos = reg_match_pos(mrb, re, &str, 0); - if (pos < 0) return mrb_nil_value(); - pos = mrb_str_sublen(mrb, str, pos); - return mrb_fixnum_value(pos); -} -/* 15.2.15.7.5 */ -/* - * call-seq: - * rxp =~ str -> integer or nil - * - * Match---Matches <i>rxp</i> against <i>str</i>. - * - * /at/ =~ "input data" #=> 7 - * /ax/ =~ "input data" #=> nil - * - * If <code>=~</code> is used with a regexp literal with named captures, - * captured strings (or nil) is assigned to local variables named by - * the capture names. - * - * /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ " x = y " - * p lhs #=> "x" - * p rhs #=> "y" - * - * If it is not matched, nil is assigned for the variables. - * - * /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ " x = " - * p lhs #=> nil - * p rhs #=> nil - * - * This assignment is implemented in the Ruby parser. - * The parser detects 'regexp-literal =~ expression' for the assignment. - * The regexp must be a literal without interpolation and placed at left hand side. - * - * The assignment is not occur if the regexp is not a literal. - * - * re = /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ - * re =~ " x = y " - * p lhs # undefined local variable - * p rhs # undefined local variable - * - * A regexp interpolation, <code>#{}</code>, also disables - * the assignment. - * - * rhs_pat = /(?<rhs>\w+)/ - * /(?<lhs>\w+)\s*=\s*#{rhs_pat}/ =~ "x = y" - * p lhs # undefined local variable - * - * The assignment is not occur if the regexp is placed at right hand side. - * - * " x = y " =~ /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ - * p lhs, rhs # undefined local variable - * - */ -mrb_value -mrb_reg_match(mrb_state *mrb, mrb_value re/*, mrb_value str*/) -{ - mrb_value str; - - mrb_get_args(mrb, "o", &str); - return mrb_reg_match_str(mrb, re, str); -} - -/* 15.2.15.7.6 */ -/* - * call-seq: - * rxp.casefold? -> true or false - * - * Returns the value of the case-insensitive flag. - * - * /a/.casefold? #=> false - * /a/i.casefold? #=> true - * /(?i:a)/.casefold? #=> false - */ - -static mrb_value -mrb_reg_casefold_p(mrb_state *mrb, mrb_value re) -{ - mrb_reg_check(mrb, re); - if (RREGEXP(re)->ptr->options & ONIG_OPTION_IGNORECASE) return mrb_true_value(); - return mrb_false_value(); -} - -/* 15.2.15.7.7 */ -/* - * call-seq: - * rxp.match(str) -> matchdata or nil - * rxp.match(str,pos) -> matchdata or nil - * - * Returns a <code>MatchData</code> object describing the match, or - * <code>nil</code> if there was no match. This is equivalent to retrieving the - * value of the special variable <code>$~</code> following a normal match. - * If the second parameter is present, it specifies the position in the string - * to begin the search. - * - * /(.)(.)(.)/.match("abc")[2] #=> "b" - * /(.)(.)/.match("abc", 1)[2] #=> "c" - * - * If a block is given, invoke the block with MatchData if match succeed, so - * that you can write - * - * pat.match(str) {|m| ...} - * - * instead of - * - * if m = pat.match(str) - * ... - * end - * - * The return value is a value from block execution in this case. - */ - -static mrb_value -mrb_reg_match_m(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value re) -{ - mrb_value argv[16]; - int argc; - mrb_value result, str, initpos, b; - long pos; - - //if (mrb_scan_args(argc, argv, "11", &str, &initpos) == 2) { - mrb_get_args(mrb, "*&", &argv, &argc, &b); - if (argc == 2) { - initpos = argv[1]; - pos = mrb_fixnum(initpos); - } - else { - pos = 0; - } - str = argv[0]; - pos = reg_match_pos(mrb, re, &str, pos); - if (pos < 0) { - mrb_backref_set(mrb, mrb_nil_value()); - return mrb_nil_value(); - } - result = mrb_backref_get(mrb); - /*mrb_match_busy(result);*/ - if (!mrb_nil_p(result) && mrb_block_given_p()) { - return mrb_yield(mrb, result, b); - } - return result; -} - -/* 15.2.15.7.8 */ - -/* - * call-seq: - * rxp.source -> str - * - * Returns the original string of the pattern. - * - * /ab+c/ix.source #=> "ab+c" - * - * Note that escape sequences are retained as is. - * - * /\x20\+/.source #=> "\\x20\\+" - * - */ - -static mrb_value -mrb_reg_source(mrb_state *mrb, mrb_value re) -{ - mrb_value str; - - mrb_reg_check(mrb, re); - str = mrb_str_new(mrb, RREGEXP_SRC_PTR(re),RREGEXP_SRC_LEN(re)); - return str; -} - -static int -name_to_backref_number(mrb_state *mrb, struct re_registers *regs, struct RRegexp*regexp, const char* name, const char* name_end) -{ - int num; - - num = onig_name_to_backref_number(regexp->ptr, - (const unsigned char* )name, (const unsigned char* )name_end, regs); - if (num >= 1) { - return num; - } - else { - mrb_value s = mrb_str_new(mrb, name, (long )(name_end - name));//mrb_str_new(name, (long )(name_end - name)); - mrb_raise(mrb, E_INDEX_ERROR, "undefined group name reference: %s", - mrb_string_value_ptr(mrb, s)); - return num; /* not reach */ - } -} - -/* - * Document-class: MatchData - * - * <code>MatchData</code> is the type of the special variable <code>$~</code>, - * and is the type of the object returned by <code>Regexp#match</code> and - * <code>Regexp.last_match</code>. It encapsulates all the results of a pattern - * match, results normally accessed through the special variables - * <code>$&</code>, <code>$'</code>, <code>$`</code>, <code>$1</code>, - * <code>$2</code>, and so on. - * - */ - -mrb_value -match_alloc(mrb_state *mrb) -{ - struct RMatch* m; - - m = (struct RMatch*)mrb_obj_alloc(mrb, MRB_TT_MATCH, MATCH_CLASS); - - m->str = 0; - m->rmatch = 0; - m->regexp = 0; - m->rmatch = mrb_malloc(mrb, sizeof(struct rmatch));//ALLOC(struct rmatch); - memset(m->rmatch, 0, sizeof(struct rmatch)); - - return mrb_obj_value(m); -} - -/* ------------------------------------------------------------------------- */ -/* MatchData Class */ -/* ------------------------------------------------------------------------- */ -/* 15.2.16.3.1 */ -/* - * call-seq: - * mtch[i] -> str or nil - * mtch[start, length] -> array - * mtch[range] -> array - * mtch[name] -> str or nil - * - * Match Reference---<code>MatchData</code> acts as an array, and may be - * accessed using the normal array indexing techniques. <i>mtch</i>[0] is - * equivalent to the special variable <code>$&</code>, and returns the entire - * matched string. <i>mtch</i>[1], <i>mtch</i>[2], and so on return the values - * of the matched backreferences (portions of the pattern between parentheses). - * - * m = /(.)(.)(\d+)(\d)/.match("THX1138.") - * m #=> #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8"> - * m[0] #=> "HX1138" - * m[1, 2] #=> ["H", "X"] - * m[1..3] #=> ["H", "X", "113"] - * m[-3, 2] #=> ["X", "113"] - * - * m = /(?<foo>a+)b/.match("ccaaab") - * m #=> #<MatchData "aaab" foo:"aaa"> - * m["foo"] #=> "aaa" - * m[:foo] #=> "aaa" - */ - -static mrb_value -mrb_match_aref(mrb_state *mrb, /*int argc, mrb_value *argv,*/ mrb_value match) -{ - mrb_value argv[16]; - int argc; - mrb_value idx; - - match_check(mrb, match); - //mrb_scan_args(argc, argv, "11", &idx, &rest); - mrb_get_args(mrb, "*", &argv, &argc); - idx = argv[0]; - if (argc<2) { - if (mrb_type(idx) == MRB_TT_FIXNUM) { - if (mrb_fixnum(idx) >= 0) { - return mrb_reg_nth_match(mrb, mrb_fixnum(idx), match); - } - } - else { - const char *p; - int num; - - switch (mrb_type(idx)) { - case MRB_TT_SYMBOL: - p = mrb_sym2name(mrb, mrb_symbol(idx)); - goto name_to_backref; - break; - case MRB_TT_STRING: - //p = StringValuePtr(idx); - p = mrb_string_value_ptr(mrb, idx); -name_to_backref: - num = name_to_backref_number(mrb, RMATCH_REGS(match), - RMATCH(match)->regexp, p, p + strlen(p)); - return mrb_reg_nth_match(mrb, num, match); - break; - default: - break; - } - } - } - - return mrb_ary_aget(mrb, /*argc, argv,*/ mrb_match_to_a(mrb, match)); -} - -typedef struct { - long byte_pos; - long char_pos; -} pair_t; - -static void -update_char_offset(mrb_state *mrb, mrb_value match) -{ - struct rmatch *rm = RMATCH(match)->rmatch; - struct re_registers *regs; - int i, num_regs; - - if (rm->char_offset_updated) - return; - - regs = &rm->regs; - num_regs = rm->regs.num_regs; - - if (rm->char_offset_num_allocated < num_regs) { - //REALLOC_N(rm->char_offset, struct rmatch_offset, num_regs); - rm->char_offset = mrb_realloc(mrb, rm->char_offset, sizeof(struct rmatch_offset)*num_regs); - rm->char_offset_num_allocated = num_regs; - } - - for (i = 0; i < num_regs; i++) { - rm->char_offset[i].beg = BEG(i); - rm->char_offset[i].end = END(i); - } - rm->char_offset_updated = 1; - return; -} - -/* 15.2.16.3.2 */ -/* - * call-seq: - * mtch.begin(n) -> integer - * - * Returns the offset of the start of the <em>n</em>th element of the match - * array in the string. - * <em>n</em> can be a string or symbol to reference a named capture. - * - * m = /(.)(.)(\d+)(\d)/.match("THX1138.") - * m.begin(0) #=> 1 - * m.begin(2) #=> 2 - * - * m = /(?<foo>.)(.)(?<bar>.)/.match("hoge") - * p m.begin(:foo) #=> 0 - * p m.begin(:bar) #=> 2 - */ - -static mrb_value -mrb_match_begin(mrb_state *mrb, mrb_value match/*, mrb_value n*/) -{ - mrb_value argv[16]; - int argc; - mrb_value n = argv[0]; - int i; - struct re_registers *regs; - - match_check(mrb, match); - mrb_get_args(mrb, "*", &argv, &argc); - n = argv[0]; - i = match_backref_number(mrb, match, n); - regs = RMATCH_REGS(match); - - if (i < 0 || regs->num_regs <= i) - mrb_raise(mrb, E_INDEX_ERROR, "index %d out of matches", i); - - if (BEG(i) < 0) - return mrb_nil_value(); - - update_char_offset(mrb, match); - return mrb_fixnum_value(RMATCH(match)->rmatch->char_offset[i].beg); -} - -static mrb_value -match_array(mrb_state *mrb, mrb_value match, int start) -{ - struct re_registers *regs; - mrb_value ary; - struct RString *target; - int i; - - match_check(mrb, match); - regs = RMATCH_REGS(match); - ary = mrb_ary_new_capa(mrb, regs->num_regs);//mrb_ary_new2(regs->num_regs); - target = RMATCH(match)->str; - - for (i=start; i<regs->num_regs; i++) { - if (regs->beg[i] == -1) { - mrb_ary_push(mrb, ary, mrb_nil_value()); - } - else { - mrb_value str = mrb_str_subseq(mrb, mrb_obj_value(target), regs->beg[i], regs->end[i]-regs->beg[i]); - mrb_ary_push(mrb, ary, str); - } - } - return ary; -} - -/* 15.2.16.3.3 */ -/* - * call-seq: - * mtch.captures -> array - * - * Returns the array of captures; equivalent to <code>mtch.to_a[1..-1]</code>. - * - * f1,f2,f3,f4 = /(.)(.)(\d+)(\d)/.match("THX1138.").captures - * f1 #=> "H" - * f2 #=> "X" - * f3 #=> "113" - * f4 #=> "8" - */ -static mrb_value -mrb_match_captures(mrb_state *mrb, mrb_value match) -{ - return match_array(mrb, match, 1); -} - -/* 15.2.16.3.4 */ -/* - * call-seq: - * mtch.end(n) -> integer - * - * Returns the offset of the character immediately following the end of the - * <em>n</em>th element of the match array in the string. - * <em>n</em> can be a string or symbol to reference a named capture. - * - * m = /(.)(.)(\d+)(\d)/.match("THX1138.") - * m.end(0) #=> 7 - * m.end(2) #=> 3 - * - * m = /(?<foo>.)(.)(?<bar>.)/.match("hoge") - * p m.end(:foo) #=> 1 - * p m.end(:bar) #=> 3 - */ - -static mrb_value -mrb_match_end(mrb_state *mrb, mrb_value match/*, mrb_value n*/) -{ - mrb_value argv[16]; - int argc; - mrb_value n; - int i; - struct re_registers *regs; - - match_check(mrb, match); - mrb_get_args(mrb, "*", &argv, &argc); - n = argv[0]; - i = match_backref_number(mrb, match, n); - regs = RMATCH_REGS(match); - - if (i < 0 || regs->num_regs <= i) - mrb_raise(mrb, E_INDEX_ERROR, "index %d out of matches", i); - - if (BEG(i) < 0) - return mrb_nil_value(); - - update_char_offset(mrb, match); - return mrb_fixnum_value(RMATCH(match)->rmatch->char_offset[i].end); -} - -/* 15.2.16.3.5 */ -/* :nodoc: */ -static mrb_value -mrb_match_init_copy(mrb_state *mrb, mrb_value obj/*, mrb_value orig*/) -{ - mrb_value argv[16]; - int argc; - struct rmatch *rm; - mrb_value orig; - - mrb_get_args(mrb, "*", &argv, &argc); - orig = argv[0]; - - if (mrb_obj_equal(mrb, obj, orig)) return obj; - - if (!mrb_obj_is_instance_of(mrb, orig, mrb_obj_class(mrb, obj))) { - mrb_raise(mrb, E_TYPE_ERROR, "wrong argument class"); - } - - RMATCH(obj)->str = RMATCH(orig)->str; - RMATCH(obj)->regexp = RMATCH(orig)->regexp; - - if (RMATCH(obj)->rmatch == 0) { - RMATCH(obj)->rmatch = mrb_malloc(mrb, sizeof(struct rmatch));//ALLOC(struct rmatch); - memset(RMATCH(obj)->rmatch, 0, sizeof(struct rmatch)); - } - rm = RMATCH(obj)->rmatch; - onig_region_copy(&rm->regs, RMATCH_REGS(orig)); - - if (!RMATCH(orig)->rmatch->char_offset_updated) { - rm->char_offset_updated = 0; - } - else { - if (rm->char_offset_num_allocated < rm->regs.num_regs) { - //REALLOC_N(rm->char_offset, struct rmatch_offset, rm->regs.num_regs); - rm->char_offset = mrb_realloc(mrb, rm->char_offset, sizeof(struct rmatch_offset)* rm->regs.num_regs); - rm->char_offset_num_allocated = rm->regs.num_regs; - } - memcpy(rm->char_offset, RMATCH(orig)->rmatch->char_offset, - sizeof(struct rmatch_offset) * rm->regs.num_regs); - rm->char_offset_updated = 1; - } - - return obj; -} - -/* 15.2.16.3.6 */ -/* 15.2.16.3.10 */ -/* - * call-seq: - * mtch.length -> integer - * mtch.size -> integer - * - * Returns the number of elements in the match array. - * - * m = /(.)(.)(\d+)(\d)/.match("THX1138.") - * m.length #=> 5 - * m.size #=> 5 - */ - -static mrb_value -mrb_match_size(mrb_state *mrb, mrb_value match) -{ - match_check(mrb, match); - return mrb_fixnum_value(RMATCH_REGS(match)->num_regs); -} - -/* 15.2.16.3.7 */ -/* - * call-seq: - * mtch.offset(n) -> array - * - * Returns a two-element array containing the beginning and ending offsets of - * the <em>n</em>th match. - * <em>n</em> can be a string or symbol to reference a named capture. - * - * m = /(.)(.)(\d+)(\d)/.match("THX1138.") - * m.offset(0) #=> [1, 7] - * m.offset(4) #=> [6, 7] - * - * m = /(?<foo>.)(.)(?<bar>.)/.match("hoge") - * p m.offset(:foo) #=> [0, 1] - * p m.offset(:bar) #=> [2, 3] - * - */ - -static mrb_value -mrb_match_offset(mrb_state *mrb, mrb_value match/*, mrb_value n*/) -{ - mrb_value n; - struct re_registers *regs = RMATCH_REGS(match); - int i; - - match_check(mrb, match); - mrb_get_args(mrb, "o", &n); - i = match_backref_number(mrb, match, n); - - if (i < 0 || regs->num_regs <= i) - mrb_raise(mrb, E_INDEX_ERROR, "index %d out of matches", i); - - if (BEG(i) < 0) - return mrb_assoc_new(mrb, mrb_nil_value(), mrb_nil_value()); - - update_char_offset(mrb, match); - return mrb_assoc_new(mrb, mrb_fixnum_value(RMATCH(match)->rmatch->char_offset[i].beg), - mrb_fixnum_value(RMATCH(match)->rmatch->char_offset[i].end)); -} - -/* 15.2.16.3.8 */ -/* - * call-seq: - * mtch.post_match -> str - * - * Returns the portion of the original string after the current match. - * Equivalent to the special variable <code>$'</code>. - * - * m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie") - * m.post_match #=> ": The Movie" - */ -mrb_value -mrb_reg_match_post(mrb_state *mrb, mrb_value match) -{ - struct RString *str; - long pos; - struct re_registers *regs; - - if (mrb_nil_p(match)) return mrb_nil_value(); - match_check(mrb, match); - regs = RMATCH_REGS(match); - if (BEG(0) == -1) return mrb_nil_value(); - str = RMATCH(match)->str; - pos = END(0); - return mrb_str_subseq(mrb, mrb_obj_value(str), pos, str->len - pos); -} - -/* 15.2.16.3.9 */ -/* - * call-seq: - * mtch.pre_match -> str - * - * Returns the portion of the original string before the current match. - * Equivalent to the special variable <code>$`</code>. - * - * m = /(.)(.)(\d+)(\d)/.match("THX1138.") - * m.pre_match #=> "T" - */ - -mrb_value -mrb_reg_match_pre(mrb_state *mrb, mrb_value match) -{ - mrb_value str; - struct re_registers *regs; - - if (mrb_nil_p(match)) return mrb_nil_value(); - match_check(mrb, match); - regs = RMATCH_REGS(match); - if (BEG(0) == -1) return mrb_nil_value(); - str = mrb_str_subseq(mrb, mrb_obj_value(RMATCH(match)->str), 0, BEG(0)); - - return str; -} - -/* 15.2.16.3.11 */ -/* - * call-seq: - * mtch.string -> str - * - * Returns a frozen copy of the string passed in to <code>match</code>. - * - * m = /(.)(.)(\d+)(\d)/.match("THX1138.") - * m.string #=> "THX1138." - */ - -static mrb_value -mrb_match_string(mrb_state *mrb, mrb_value match) -{ - match_check(mrb, match); - return mrb_obj_value(RMATCH(match)->str); -} - -/* 15.2.16.3.12 */ -/* - * call-seq: - * mtch.to_a -> anArray - * - * Returns the array of matches. - * - * m = /(.)(.)(\d+)(\d)/.match("THX1138.") - * m.to_a #=> ["HX1138", "H", "X", "113", "8"] - * - * Because <code>to_a</code> is called when expanding - * <code>*</code><em>variable</em>, there's a useful assignment - * shortcut for extracting matched fields. This is slightly slower than - * accessing the fields directly (as an intermediate array is - * generated). - * - * all,f1,f2,f3 = *(/(.)(.)(\d+)(\d)/.match("THX1138.")) - * all #=> "HX1138" - * f1 #=> "H" - * f2 #=> "X" - * f3 #=> "113" - */ - -static mrb_value -mrb_match_to_a(mrb_state *mrb, mrb_value match) -{ - return match_array(mrb, match, 0); -} - -/* 15.2.16.3.13 */ -/* - * call-seq: - * mtch.to_s -> str - * - * Returns the entire matched string. - * - * m = /(.)(.)(\d+)(\d)/.match("THX1138.") - * m.to_s #=> "HX1138" - */ - -static mrb_value -mrb_match_to_s(mrb_state *mrb, mrb_value match) -{ - mrb_value str = mrb_reg_last_match(mrb, match); - - match_check(mrb, match); - if (mrb_nil_p(str)) str = mrb_str_new(mrb, 0, 0);//mrb_str_new(0,0); - - return str; -} - -static int -char_to_option(int c) -{ - int val; - - switch (c) { - case 'i': - val = ONIG_OPTION_IGNORECASE; - break; - case 'x': - val = ONIG_OPTION_EXTEND; - break; - case 'm': - val = ONIG_OPTION_MULTILINE; - break; - default: - val = 0; - break; - } - return val; -} - -static char * -option_to_str(char str[4], int options) -{ - char *p = str; - if (options & ONIG_OPTION_MULTILINE) *p++ = 'm'; - if (options & ONIG_OPTION_IGNORECASE) *p++ = 'i'; - if (options & ONIG_OPTION_EXTEND) *p++ = 'x'; - *p = 0; - return str; -} - -#define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */ - -static void -mrb_reg_expr_str(mrb_state *mrb, mrb_value str, const char *s, long len) -{ - const char *p, *pend; - int need_escape = 0; - int c; - - p = s; pend = p + len; - while (p < pend) { - c = *p; - if (c == -1) { - p += pend - p; - } - else if (c != '/' && ISPRINT(c)) { - p++; - } - else { - need_escape = 1; - break; - } - } - - if (!need_escape) { - mrb_str_buf_cat(mrb, str, s, len); - } - else { - p = s; - while (p<pend) { - c = *p; - if (c == '\\' && p+1 < pend) { - int n = 1 + pend - (p+1); - mrb_str_buf_cat(mrb, str, p, n); - p += n; - continue; - } - else if (c == '/') { - char c = '\\'; - mrb_str_buf_cat(mrb, str, &c, 1); - mrb_str_buf_cat(mrb, str, p, 1); - } - else if (ISPRINT(c)) { - mrb_str_buf_cat(mrb, str, p, 1); - } - else if (!ISSPACE(c)) { - char b[8]; - int n; - - n = snprintf(b, sizeof(b), "\\x%02X", c); - mrb_str_buf_cat(mrb, str, b, n); - } - else { - mrb_str_buf_cat(mrb, str, p, 1); - } - p++; - } - } -} - -/* 15.2.15.7.9 (x) */ -/* - * call-seq: - * rxp.to_s -> str - * - * Returns a string containing the regular expression and its options (using the - * <code>(?opts:source)</code> notation. This string can be fed back in to - * <code>Regexp::new</code> to a regular expression with the same semantics as - * the original. (However, <code>Regexp#==</code> may not return true when - * comparing the two, as the source of the regular expression itself may - * differ, as the example shows). <code>Regexp#inspect</code> produces a - * generally more readable version of <i>rxp</i>. - * - * r1 = /ab+c/ix #=> /ab+c/ix - * s1 = r1.to_s #=> "(?ix-m:ab+c)" - * r2 = Regexp.new(s1) #=> /(?ix-m:ab+c)/ - * r1 == r2 #=> false - * r1.source #=> "ab+c" - * r2.source #=> "(?ix-m:ab+c)" - */ - -mrb_value -mrb_reg_to_s(mrb_state *mrb, mrb_value re) -{ - int options, opt; - const int embeddable = ONIG_OPTION_MULTILINE|ONIG_OPTION_IGNORECASE|ONIG_OPTION_EXTEND; - long len; - const UChar* ptr; - mrb_value str = mrb_str_new(mrb, "(?", 2); - char optbuf[5]; - mrb_encoding *enc = mrb_enc_get(mrb, re); - - mrb_reg_check(mrb, re); - memset(optbuf, 0, 5); - options = RREGEXP(re)->ptr->options; - ptr = (UChar*)RREGEXP_SRC_PTR(re); - len = RREGEXP_SRC_LEN(re); -again: - if (len >= 4 && ptr[0] == '(' && ptr[1] == '?') { - int err = 1; - ptr += 2; - if ((len -= 2) > 0) { - do { - opt = char_to_option((int )*ptr); - if (opt != 0) { - options |= opt; - } - else { - break; - } - ++ptr; - } while (--len > 0); - } - if (len > 1 && *ptr == '-') { - ++ptr; - --len; - do { - opt = char_to_option((int )*ptr); - if (opt != 0) { - options &= ~opt; - } - else { - break; - } - ++ptr; - } while (--len > 0); - } - if (*ptr == ')') { - --len; - ++ptr; - goto again; - } - if (*ptr == ':' && ptr[len-1] == ')') { - Regexp *rp; - - ++ptr; - len -= 2; - err = onig_new(&rp, ptr, ptr + len, ONIG_OPTION_DEFAULT, - enc, OnigDefaultSyntax, NULL); - onig_free(rp); - } - if (err) { - options = RREGEXP(re)->ptr->options; - ptr = (UChar*)RREGEXP_SRC_PTR(re); - len = RREGEXP_SRC_LEN(re); - } - } - - if (*option_to_str(optbuf, options)) mrb_str_buf_cat(mrb, str, optbuf, strlen(optbuf)); - - if ((options & embeddable) != embeddable) { - optbuf[0] = '-'; - option_to_str(optbuf + 1, ~options); - mrb_str_buf_cat(mrb, str, optbuf, strlen(optbuf)); - } - - mrb_str_buf_cat(mrb, str, ":", 1); - mrb_reg_expr_str(mrb, str, (char*)ptr, len); - mrb_str_buf_cat(mrb, str, ")", 1); - - return str; -} - -/* 15.2.15.7.10(x) */ -/* - * call-seq: - * rxp.inspect -> string - * - * Produce a nicely formatted string-version of _rxp_. Perhaps surprisingly, - * <code>#inspect</code> actually produces the more natural version of - * the string than <code>#to_s</code>. - * - * /ab+c/ix.inspect #=> "/ab+c/ix" - * - */ - -static mrb_value -mrb_reg_inspect(mrb_state *mrb, mrb_value re) -{ - if (!RREGEXP(re)->ptr || !RREGEXP_SRC(re) || !RREGEXP_SRC_PTR(re)) { - return mrb_any_to_s(mrb, re); - } - return mrb_reg_desc(mrb, RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), re); -} - -static mrb_value -mrb_reg_s_alloc(mrb_state *mrb, mrb_value dummy) -{ - struct RRegexp* re; - - //NEWOBJ(re, struct RRegexp); - //OBJSETUP(re, klass, T_REGEXP); - re = (struct RRegexp*)mrb_obj_alloc(mrb, MRB_TT_REGEX, REGEX_CLASS); - - re->ptr = 0; - re->src = 0; - re->usecnt = 0; - - return mrb_obj_value(re); -} - -mrb_value -mrb_reg_match_last(mrb_state *mrb, mrb_value match) -{ - int i; - - if (mrb_nil_p(match)) return mrb_nil_value(); - match_check(mrb, match); - if (RMATCH(match)->rmatch->char_offset[0].beg == -1) return mrb_nil_value(); - - for (i=RMATCH(match)->rmatch->regs.num_regs-1; RMATCH(match)->rmatch->char_offset[i].beg == -1 && i > 0; i--) - ; - if (i == 0) return mrb_nil_value(); - return mrb_reg_nth_match(mrb, i, match); -} - -/* 15.2.16.3.14(x) */ -/* - * call-seq: - * mtch.inspect -> str - * - * Returns a printable version of <i>mtch</i>. - * - * puts /.$/.match("foo").inspect - * #=> #<MatchData "o"> - * - * puts /(.)(.)(.)/.match("foo").inspect - * #=> #<MatchData "foo" 1:"f" 2:"o" 3:"o"> - * - * puts /(.)(.)?(.)/.match("fo").inspect - * #=> #<MatchData "fo" 1:"f" 2:nil 3:"o"> - * - * puts /(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge").inspect - * #=> #<MatchData "hog" foo:"h" bar:"o" baz:"g"> - * - */ -struct backref_name_tag { - const UChar *name; - long len; -}; - -static int -match_inspect_name_iter(const OnigUChar *name, const OnigUChar *name_end, - int back_num, int *back_refs, OnigRegex regex, void *arg0) -{ - struct backref_name_tag *arg = (struct backref_name_tag*)arg0; - int i; - - for (i = 0; i < back_num; i++) { - arg[back_refs[i]].name = name; - arg[back_refs[i]].len = name_end - name; - } - return 0; -} - -static mrb_value -mrb_match_inspect(mrb_state *mrb, mrb_value match) -{ - const char *cname = mrb_obj_classname(mrb, match); - mrb_value str; - int i; - struct re_registers *regs = RMATCH_REGS(match); - int num_regs = regs->num_regs; - struct backref_name_tag *names; - struct RRegexp *regexp = RMATCH(match)->regexp; - - if (!regexp) { - return mrb_sprintf(mrb, "#<%s:%p>", cname, (void*)&match); - } - - //names = ALLOCA_N(struct backref_name_tag, num_regs); - //MEMZERO(names, struct backref_name_tag, num_regs); - names = mrb_malloc(mrb, sizeof(struct backref_name_tag)*num_regs); - memset(names, 0, sizeof(struct backref_name_tag)*num_regs); - - onig_foreach_name(regexp->ptr, - match_inspect_name_iter, names); - - str = mrb_str_new(mrb, "#<", 2); - mrb_str_buf_cat(mrb, str, cname, strlen(cname)); - - for (i = 0; i < num_regs; i++) { - char buf[sizeof(num_regs)*3+1]; - mrb_value v; - mrb_str_buf_cat(mrb, str, " ", 1); - if (0 < i) { - if (names[i].name) - mrb_str_buf_cat(mrb, str, (const char*)names[i].name, names[i].len); - else { - int n = sprintf(buf, "%d", i); - mrb_str_buf_cat(mrb, str, (const char*)buf, n); - } - mrb_str_buf_cat(mrb, str, ":", 1); - } - v = mrb_reg_nth_match(mrb, i, match); - if (mrb_nil_p(v)) - mrb_str_buf_cat(mrb, str, "nil", 3); - else - mrb_str_buf_append(mrb, str, mrb_str_inspect(mrb, v)); - } - mrb_str_buf_cat(mrb, str, ">", 1); - - return str; -} - -/* 15.2.16.3.15(x) */ -/* 15.2.16.3.16(x) */ -/* - * call-seq: - * mtch == mtch2 -> true or false - * - * Equality---Two matchdata are equal if their target strings, - * patterns, and matched positions are identical. - */ - -static mrb_value -mrb_match_equal(mrb_state *mrb, mrb_value match1) -{ - const struct re_registers *regs1, *regs2; - mrb_value match2; - - mrb_get_args(mrb, "o", &match2); - if (mrb_obj_equal(mrb, match1, match2)) return mrb_true_value(); - if (mrb_type(match2) != MRB_TT_MATCH) return mrb_false_value(); - if (!mrb_str_equal(mrb, mrb_obj_value(RMATCH(match1)->str), mrb_obj_value(RMATCH(match2)->str))) - return mrb_false_value(); - if (!reg_equal(mrb, RMATCH(match1)->regexp, RMATCH(match2)->regexp)) return mrb_false_value(); - regs1 = RMATCH_REGS(match1); - regs2 = RMATCH_REGS(match2); - if (regs1->num_regs != regs2->num_regs) return mrb_false_value(); - if (memcmp(regs1->beg, regs2->beg, regs1->num_regs * sizeof(*regs1->beg))) return mrb_false_value(); - if (memcmp(regs1->end, regs2->end, regs1->num_regs * sizeof(*regs1->end))) return mrb_false_value(); - return mrb_true_value(); -} /* * Document-class: RegexpError @@ -2001,605 +40,5 @@ mrb_init_regexp(mrb_state *mrb) { struct RClass *s; s = mrb_define_class(mrb, "Regexp", mrb->object_class); - - mrb_define_class_method(mrb, s, "compile", mrb_reg_s_new_instance, ARGS_ANY()); /* 15.2.15.6.1 */ - mrb_define_class_method(mrb, s, "escape", mrb_reg_s_quote, ARGS_REQ(1)); /* 15.2.15.6.2 */ - mrb_define_class_method(mrb, s, "last_match", mrb_reg_s_last_match, ARGS_ANY()); /* 15.2.15.6.3 */ - mrb_define_class_method(mrb, s, "quote", mrb_reg_s_quote, ARGS_REQ(1)); /* 15.2.15.6.4 */ - //mrb_define_singleton_method(rb_cRegexp, "union", rb_reg_s_union_m, -2); - //mrb_define_singleton_method(rb_cRegexp, "try_convert", rb_reg_s_try_convert, 1); - - mrb_define_method(mrb, s, "initialize", mrb_reg_initialize_m, ARGS_ANY()); /* 15.2.15.7.1 */ - mrb_define_method(mrb, s, "initialize_copy", mrb_reg_init_copy, ARGS_REQ(1)); /* 15.2.15.7.2 */ - mrb_define_method(mrb, s, "==", mrb_reg_equal_m, ARGS_REQ(1)); /* 15.2.15.7.3 */ - mrb_define_method(mrb, s, "===", mrb_reg_eqq, ARGS_REQ(1)); /* 15.2.15.7.4 */ - mrb_define_method(mrb, s, "=~", mrb_reg_match, ARGS_REQ(1)); /* 15.2.15.7.5 */ - mrb_define_method(mrb, s, "casefold?", mrb_reg_casefold_p, ARGS_NONE()); /* 15.2.15.7.6 */ - mrb_define_method(mrb, s, "match", mrb_reg_match_m, ARGS_ANY()); /* 15.2.15.7.7 */ - mrb_define_method(mrb, s, "source", mrb_reg_source, ARGS_NONE()); /* 15.2.15.7.8 */ - //mrb_define_method(rb_cRegexp, "hash", rb_reg_hash, 0); - //mrb_define_method(rb_cRegexp, "~", rb_reg_match2, 0); - mrb_define_method(mrb, s, "to_s", mrb_reg_to_s, ARGS_NONE()); /* 15.2.15.7.9 (x) */ - mrb_define_method(mrb, s, "inspect", mrb_reg_inspect, ARGS_NONE()); /* 15.2.15.7.10(x) */ - mrb_define_method(mrb, s, "eql?", mrb_reg_equal_m, ARGS_REQ(1)); /* 15.2.15.7.11(x) */ - //mrb_define_method(rb_cRegexp, "options", mrb_reg_options_m, 0); - //mrb_define_method(rb_cRegexp, "encoding", rb_obj_encoding, 0); /* in encoding.c */ - //mrb_define_method(rb_cRegexp, "fixed_encoding?", mrb_reg_fixed_encoding_p, 0); - //mrb_define_method(rb_cRegexp, "names", rb_reg_names, 0); - //mrb_define_method(rb_cRegexp, "named_captures", rb_reg_named_captures, 0); - - //mrb_define_const(rb_cRegexp, "IGNORECASE", INT2FIX(ONIG_OPTION_IGNORECASE)); - //mrb_define_const(rb_cRegexp, "EXTENDED", INT2FIX(ONIG_OPTION_EXTEND)); - //mrb_define_const(rb_cRegexp, "MULTILINE", INT2FIX(ONIG_OPTION_MULTILINE)); - //mrb_define_const(rb_cRegexp, "FIXEDENCODING", INT2FIX(ARG_ENCODING_FIXED)); - mrb_define_const(mrb, s, "IGNORECASE", mrb_fixnum_value(ONIG_OPTION_IGNORECASE)); - mrb_define_const(mrb, s, "EXTENDED", mrb_fixnum_value(ONIG_OPTION_EXTEND)); - mrb_define_const(mrb, s, "MULTILINE", mrb_fixnum_value(ONIG_OPTION_MULTILINE)); - mrb_define_const(mrb, s, "FIXEDENCODING", mrb_fixnum_value(ARG_ENCODING_FIXED)); - - s = mrb_define_class(mrb, "MatchData", mrb->object_class); - //mrb_undef_class_method(CLASS_OF(rb_cMatch), "new"); - - mrb_define_method(mrb, s, "[]", mrb_match_aref, ARGS_ANY()); /* 15.2.16.3.1 */ - mrb_define_method(mrb, s, "begin", mrb_match_begin, ARGS_REQ(1)); /* 15.2.16.3.2 */ - mrb_define_method(mrb, s, "captures", mrb_match_captures, ARGS_NONE()); /* 15.2.16.3.3 */ - mrb_define_method(mrb, s, "end", mrb_match_end, ARGS_REQ(1)); /* 15.2.16.3.4 */ - mrb_define_method(mrb, s, "initialize_copy", mrb_match_init_copy, ARGS_REQ(1)); /* 15.2.16.3.5 */ - mrb_define_method(mrb, s, "length", mrb_match_size, ARGS_NONE()); /* 15.2.16.3.6 */ - mrb_define_method(mrb, s, "offset", mrb_match_offset, ARGS_REQ(1)); /* 15.2.16.3.7 */ - mrb_define_method(mrb, s, "post_match", mrb_reg_match_post, ARGS_NONE()); /* 15.2.16.3.8 */ - mrb_define_method(mrb, s, "pre_match", mrb_reg_match_pre, ARGS_NONE()); /* 15.2.16.3.9 */ - mrb_define_method(mrb, s, "size", mrb_match_size, ARGS_NONE()); /* 15.2.16.3.10 */ - mrb_define_method(mrb, s, "string", mrb_match_string, ARGS_NONE()); /* 15.2.16.3.11 */ - mrb_define_method(mrb, s, "to_a", mrb_match_to_a, ARGS_NONE()); /* 15.2.16.3.12 */ - mrb_define_method(mrb, s, "to_s", mrb_match_to_s, ARGS_NONE()); /* 15.2.16.3.13 */ - mrb_define_method(mrb, s, "inspect", mrb_match_inspect, ARGS_NONE()); /* 15.2.16.3.14(x) */ - mrb_define_method(mrb, s, "==", mrb_match_equal, ARGS_REQ(1)); /* 15.2.16.3.15(x) */ - mrb_define_method(mrb, s, "eql?", mrb_match_equal, ARGS_REQ(1)); /* 15.2.16.3.16(x) */ - //mrb_define_method(rb_cMatch, "regexp", match_regexp, 0); - //mrb_define_method(rb_cMatch, "names", match_names, 0); - //mrb_define_method(rb_cMatch, "values_at", match_values_at, -1); - //mrb_define_method(rb_cMatch, "hash", match_hash, 0); - //mrb_define_method(rb_cMatch, "==", match_equal, 1); -} -/* ----------------1_8_7---------------------------------------- */ -//`mrb_check_type' -//`mrb_reg_regsub' -//`mrb_backref_get' -//`mrb_memsearch' -//`mrb_reg_mbclen2' -//`mrb_reg_regcomp' -//`mrb_yield' - - -mrb_value -mrb_reg_regsub(mrb_state *mrb, mrb_value str, mrb_value src, struct re_registers *regs, mrb_value regexp) -{ - mrb_value val; - char *p, *s, *e; - struct RString *ps = mrb_str_ptr(str); - int no; - - val.tt = 0; - p = s = ps->buf; - e = s + ps->len; - - while (s < e) { - int c = *s; - char *ss; - - if (c == -1) { - s += e - s; - continue; - } - ss = s; - s++; - - if (c != '\\' || s == e) continue; - - //if (!val) { - if (!val.tt) { - val = mrb_str_buf_new(mrb, ss-p); - } - mrb_str_buf_cat(mrb, val, p, ss-p); - - c = *s; - if (c == -1) { - s += e - s; - mrb_str_buf_cat(mrb, val, ss, s-ss); - p = s; - continue; - } - s++; - - p = s; - switch (c) { - case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - if (onig_noname_group_capture_is_active(RREGEXP(regexp)->ptr)) { - no = c - '0'; - } - else { - continue; - } - break; - - case 'k': - if (s < e && *s == '<') { - char *name, *name_end; - - name_end = name = s + 1; - while (name_end < e) { - c = *name_end; - if (c == '>') break; - name_end += c == -1 ? e - name_end : 1; - } - if (name_end < e) { - no = name_to_backref_number(mrb, regs, RREGEXP(regexp), name, name_end); - p = s = name_end + 1; - break; - } - else { - mrb_raise(mrb, E_RUNTIME_ERROR, "invalid group name reference format"); - } - } - - mrb_str_buf_cat(mrb, val, ss, s-ss); - continue; - - case '0': - case '&': - no = 0; - break; - - case '`': - mrb_str_buf_cat(mrb, val, RSTRING_PTR(src), BEG(0)); - continue; - - case '\'': - mrb_str_buf_cat(mrb, val, RSTRING_PTR(src)+END(0), RSTRING_LEN(src)-END(0)); - continue; - - case '+': - no = regs->num_regs-1; - while (BEG(no) == -1 && no > 0) no--; - if (no == 0) continue; - break; - - case '\\': - mrb_str_buf_cat(mrb, val, s-1, 1); - continue; - - default: - mrb_str_buf_cat(mrb, val, ss, s-ss); - continue; - } - - if (no >= 0) { - if (no >= regs->num_regs) continue; - if (BEG(no) == -1) continue; - mrb_str_buf_cat(mrb, val, RSTRING_PTR(src)+BEG(no), END(no)-BEG(no)); - } - } /* while (s < e) { */ - - - if (!val.tt) return str; - if (p < e) { - mrb_str_buf_cat(mrb, val, p, e-p); - } - return val; -} - -static inline NODE * -lfp_svar_place(mrb_state *mrb, /*mrb_thread_t *th,*/ mrb_value *lfp) -{ - NODE *svar; - - /*if (lfp && th->local_lfp != lfp) { - svar = &lfp[-1]; - } - else { - svar = mrb->&th->local_svar; - }*/ - svar = mrb->local_svar; - /*if (mrb_nil_p(*svar)) { - *svar = mrb_obj_value(NEW_IF(0, 0, 0)); - }*/ - return svar;//(NODE *)((*svar).value.p); -} - -static mrb_value -lfp_svar_get(mrb_state *mrb, /*mrb_thread_t *th,*/ mrb_value *lfp, mrb_int key) -{ - //mrb_value *regs; - NODE *svar = lfp_svar_place(mrb, /*th,*/ lfp); - //regs = mrb->stack; - - switch (key) { - case 0: - return svar->u1.value; - case 1: - return svar->u2.value; - default: { - return svar->u3.value; - /*const mrb_value hash = regs[GETARG_C(*svar)];//svar->u3.value; - - if (mrb_nil_p(hash)) { - return mrb_nil_value(); - } - else { - return mrb_hash_get(mrb, hash, mrb_fixnum_value(key));//mrb_hash_lookup(hash, key); - }*/ - } - } -} - -static void -lfp_svar_set(mrb_state *mrb, /*mrb_thread_t *th,*/ mrb_value *lfp, mrb_int key, mrb_value val) -{ - //mrb_value *regs; - NODE *svar = lfp_svar_place(mrb, /*th,*/ lfp); - //regs = mrb->stack; - - switch (key) { - case 0: - svar->u1.value = val; - return; - case 1: - svar->u2.value = val; - return; - default: { - svar->u3.value = val; - //mrb_value hash = *svar;//svar->u3.value; - - //if (mrb_nil_p(hash)) { - // svar->u3.value = hash = mrb_hash_new(mrb, 0); - //} - //mrb_hash_aset(hash, key, val); - //mrb_hash_set(mrb, hash, mrb_fixnum_value(key), val); - } - } -} - -static mrb_value -vm_cfp_svar_get(mrb_state *mrb, /*mrb_thread_t *th, mrb_control_frame_t *cfp,*/ mrb_int key) -{ - //cfp = vm_normal_frame(th, cfp); - return lfp_svar_get(mrb, /*th, cfp ? cfp->lfp :*/ 0, key); -} - -static void -vm_cfp_svar_set(mrb_state *mrb, /*mrb_thread_t *th, mrb_control_frame_t *cfp,*/ mrb_int key, const mrb_value val) -{ - //cfp = vm_normal_frame(th, cfp); - lfp_svar_set(mrb, /*th, cfp ? cfp->lfp : */0, key, val); -} - -static mrb_value -vm_svar_get(mrb_state *mrb, mrb_int key) -{ - //mrb_thread_t *th = GET_THREAD(); - return vm_cfp_svar_get(mrb,/*th, th->cfp,*/ key); -} - -static void -vm_svar_set(mrb_state *mrb, mrb_int key, mrb_value val) -{ - //mrb_thread_t *th = GET_THREAD(); - vm_cfp_svar_set(mrb,/*th, th->cfp,*/ key, val); -} - - -int -mrb_reg_backref_number(mrb_state *mrb, mrb_value match, mrb_value backref) -{ - return match_backref_number(mrb, match, backref); -} - -mrb_value -mrb_backref_get(mrb_state *mrb) -{ - return vm_svar_get(mrb, 1); -} - -void -mrb_backref_set(mrb_state *mrb, mrb_value val) -{ - vm_svar_set(mrb, 1, val); -} -#endif //ENABLE_REGEXP - -#ifdef INCLUDE_ENCODING -static inline long -mrb_memsearch_qs(const unsigned char *xs, long m, const unsigned char *ys, long n) -{ - const unsigned char *x = xs, *xe = xs + m; - const unsigned char *y = ys; - int i, qstable[256]; - - /* Preprocessing */ - for (i = 0; i < 256; ++i) - qstable[i] = m + 1; - for (; x < xe; ++x) - qstable[*x] = xe - x; - /* Searching */ - for (; y + m <= ys + n; y += *(qstable + y[m])) { - if (*xs == *y && memcmp(xs, y, m) == 0) - return y - ys; - } - return -1; -} - -static inline unsigned int -mrb_memsearch_qs_utf8_hash(const unsigned char *x) -{ - register const unsigned int mix = 8353; - register unsigned int h = *x; - if (h < 0xC0) { - return h + 256; - } - else if (h < 0xE0) { - h *= mix; - h += x[1]; - } - else if (h < 0xF0) { - h *= mix; - h += x[1]; - h *= mix; - h += x[2]; - } - else if (h < 0xF5) { - h *= mix; - h += x[1]; - h *= mix; - h += x[2]; - h *= mix; - h += x[3]; - } - else { - return h + 256; - } - return (unsigned char)h; -} - -static inline long -mrb_memsearch_qs_utf8(const unsigned char *xs, long m, const unsigned char *ys, long n) -{ - const unsigned char *x = xs, *xe = xs + m; - const unsigned char *y = ys; - int i, qstable[512]; - - /* Preprocessing */ - for (i = 0; i < 512; ++i) { - qstable[i] = m + 1; - } - for (; x < xe; ++x) { - qstable[mrb_memsearch_qs_utf8_hash(x)] = xe - x; - } - /* Searching */ - for (; y + m <= ys + n; y += qstable[mrb_memsearch_qs_utf8_hash(y+m)]) { - if (*xs == *y && memcmp(xs, y, m) == 0) - return y - ys; - } - return -1; -} - -int -mrb_memsearch(mrb_state *mrb, const void *x0, int m, const void *y0, int n, mrb_encoding *enc) -{ - const unsigned char *x = x0, *y = y0; - - if (m > n) return -1; - else if (m == n) { - return memcmp(x0, y0, m) == 0 ? 0 : -1; - } - else if (m < 1) { - return 0; - } - else if (m == 1) { - const unsigned char *ys = y, *ye = ys + n; - for (; y < ye; ++y) { - if (*x == *y) - return y - ys; - } - return -1; - } - else { - return mrb_memsearch_qs(x0, m, y0, n); - } -} -#endif //INCLUDE_ENCODING - -#ifdef ENABLE_REGEXP -mrb_value -mrb_reg_init_str(mrb_state *mrb, mrb_value re, mrb_value s, int options) -{ - onig_errmsg_buffer err = ""; - - if (mrb_reg_initialize_str(mrb, re, s, options, err, NULL, 0) != 0) { - //mrb_reg_raise_str(s, options, err); - printf("mrb_reg_raise_str(s, options, err);"); - } - - return re; -} - -mrb_value -mrb_reg_alloc(mrb_state *mrb) -{ - mrb_value dummy = mrb_nil_value(); - return mrb_reg_s_alloc(mrb, dummy); -} - -mrb_value -mrb_reg_new_str(mrb_state *mrb, mrb_value s, int options) -{ - return mrb_reg_init_str(mrb, mrb_reg_alloc(mrb), s, options); -} - -mrb_value -mrb_reg_regcomp(mrb_state *mrb, mrb_value str) -{ - return mrb_reg_new_str(mrb, str, 0); -} - -int -re_adjust_startpos(struct re_pattern_buffer *bufp, const char *string, int size, int startpos, int range) -{ - /* Update the fastmap now if not correct already. */ - /*if (!bufp->fastmap_accurate) { - int ret = re_compile_fastmap0(bufp); - if (ret) return ret; - }*/ - - /* Adjust startpos for mbc string */ - /*if (current_mbctype && startpos>0 && !(bufp->options&RE_OPTIMIZE_BMATCH)) { - startpos = re_mbc_startpos(string, size, startpos, range); - }*/ - return startpos; -} -#endif //ENABLE_REGEXP - -#ifdef INCLUDE_ENCODING -static const unsigned char mbctab_ascii[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; -const unsigned char *re_mbctab = mbctab_ascii; - -#define is_identchar(p,e,enc) (mrb_enc_isalnum(*p,enc) || (*p) == '_' || !ISASCII(*p)) - -static int -is_special_global_name(const char *m, const char *e, mrb_encoding *enc) -{ - int mb = 0; - - if (m >= e) return 0; - switch (*m) { - case '~': case '*': case '$': case '?': case '!': case '@': - case '/': case '\\': case ';': case ',': case '.': case '=': - case ':': case '<': case '>': case '\"': - case '&': case '`': case '\'': case '+': - case '0': - ++m; - break; - case '-': - ++m; - if (m < e && is_identchar(m, e, enc)) { - if (!ISASCII(*m)) mb = 1; - m += e - m; - } - break; - default: - if (!mrb_enc_isdigit(*m, enc)) return 0; - do { - if (!ISASCII(*m)) mb = 1; - ++m; - } while (m < e && mrb_enc_isdigit(*m, enc)); - } - return m == e ? mb + 1 : 0; -} - -int -mrb_enc_symname2_p(const char *name, long len, mrb_encoding *enc) -{ - const char *m = name; - const char *e = m + len; - int localid = FALSE; - - if (!m) return FALSE; - switch (*m) { - case '\0': - return FALSE; - - case '$': - if (is_special_global_name(++m, e, enc)) return TRUE; - goto id; - - case '@': - if (*++m == '@') ++m; - goto id; - - case '<': - switch (*++m) { - case '<': ++m; break; - case '=': if (*++m == '>') ++m; break; - default: break; - } - break; - - case '>': - switch (*++m) { - case '>': case '=': ++m; break; - } - break; - - case '=': - switch (*++m) { - case '~': ++m; break; - case '=': if (*++m == '=') ++m; break; - default: return FALSE; - } - break; - - case '*': - if (*++m == '*') ++m; - break; - - case '+': case '-': - if (*++m == '@') ++m; - break; - - case '|': case '^': case '&': case '/': case '%': case '~': case '`': - ++m; - break; - - case '[': - if (*++m != ']') return FALSE; - if (*++m == '=') ++m; - break; - - case '!': - switch (*++m) { - case '\0': return TRUE; - case '=': case '~': ++m; break; - default: return FALSE; - } - break; - - default: - localid = !mrb_enc_isupper(*m, enc); -id: - if (m >= e || (*m != '_' && !mrb_enc_isalpha(*m, enc) && ISASCII(*m))) - return FALSE; - while (m < e && is_identchar(m, e, enc)) m += e - m; - if (localid) { - switch (*m) { - case '!': case '?': case '=': ++m; - } - } - break; - } - return m == e; -} - -int -mrb_enc_symname_p(const char *name, mrb_encoding *enc) -{ - return mrb_enc_symname2_p(name, strlen(name), enc); + //MRB_SET_INSTANCE_TT(s, MRB_TT_REGEX); } -#endif //INCLUDE_ENCODING @@ -11,26 +11,9 @@ #include <stdio.h> #include "node.h" -#include "regex.h" -#include "encoding.h" #include "st.h" -#define BEG(no) regs->beg[no] -#define END(no) regs->end[no] - -struct rmatch_offset { - long beg; - long end; -}; - -struct rmatch { - struct re_registers regs; - - int char_offset_updated; - int char_offset_num_allocated; - struct rmatch_offset *char_offset; -}; - +/* mattn struct RMatch { MRB_OBJECT_HEADER; struct RString *str; @@ -44,38 +27,6 @@ struct RRegexp { struct RString *src; unsigned long usecnt; }; - -#define mrb_regex_ptr(r) ((struct RRegexp*)((r).value.p)) -#define RREGEXP(r) ((struct RRegexp*)((r).value.p)) -#define RREGEXP_SRC(r) (RREGEXP(r)->src) -#define RREGEXP_SRC_PTR(r) (RREGEXP_SRC(r)->buf) -#define RREGEXP_SRC_LEN(r) (RREGEXP_SRC(r)->len) -int re_adjust_startpos(struct re_pattern_buffer *bufp, const char *string, int size, int startpos, int range); - -typedef struct re_pattern_buffer Regexp; - -//#define RMATCH(obj) (R_CAST(RMatch)(obj)) -#define RMATCH_REGS(v) (&((struct RMatch*)((v).value.p))->rmatch->regs) -#define RMATCH(v) ((struct RMatch*)((v).value.p)) -#define mrb_match_ptr(v) ((struct RMatch*)((v).value.p)) - -int mrb_memcmp(const void *p1, const void *p2, int len); - -mrb_int mrb_reg_search (mrb_state *mrb, mrb_value, mrb_value, mrb_int, mrb_int); -mrb_value mrb_reg_regsub (mrb_state *mrb, mrb_value, mrb_value, struct re_registers *, mrb_value); -//mrb_value mrb_reg_regsub(mrb_value, mrb_value, struct re_registers *, mrb_value); -mrb_int mrb_reg_adjust_startpos(mrb_state *mrb, mrb_value re, mrb_value str, mrb_int pos, mrb_int reverse); -void mrb_match_busy (mrb_value); - -mrb_value mrb_reg_quote(mrb_state *mrb, mrb_value str); -mrb_value mrb_reg_regcomp(mrb_state *mrb, mrb_value str); -mrb_value mrb_reg_match_str(mrb_state *mrb, mrb_value re, mrb_value str); -mrb_value mrb_reg_nth_match(mrb_state *mrb, mrb_int nth, mrb_value match); -mrb_value mrb_backref_get(mrb_state *mrb); -//mrb_int mrb_memsearch(const void *x0, mrb_int m, const void *y0, mrb_int n); -mrb_value mrb_reg_to_s(mrb_state *mrb, mrb_value re); -void mrb_backref_set(mrb_state *mrb, mrb_value val); -mrb_value match_alloc(mrb_state *mrb); -int mrb_reg_backref_number(mrb_state *mrb, mrb_value match, mrb_value backref); +*/ #endif diff --git a/src/regcomp.c b/src/regcomp.c deleted file mode 100644 index b8c652999..000000000 --- a/src/regcomp.c +++ /dev/null @@ -1,6288 +0,0 @@ -/********************************************************************** - regcomp.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "mruby.h" -#include <string.h> -#include "regparse.h" -#ifdef ENABLE_REGEXP - -OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN; - -extern OnigCaseFoldType -onig_get_default_case_fold_flag(void) -{ - return OnigDefaultCaseFoldFlag; -} - -extern int -onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag) -{ - OnigDefaultCaseFoldFlag = case_fold_flag; - return 0; -} - - -#ifndef PLATFORM_UNALIGNED_WORD_ACCESS -static unsigned char PadBuf[WORD_ALIGNMENT_SIZE]; -#endif - -static UChar* -str_dup(UChar* s, UChar* end) -{ - ptrdiff_t len = end - s; - - if (len > 0) { - UChar* r = (UChar* )xmalloc(len + 1); - CHECK_NULL_RETURN(r); - xmemcpy(r, s, len); - r[len] = (UChar )0; - return r; - } - else return NULL; -} - -static void -swap_node(Node* a, Node* b) -{ - Node c; - c = *a; *a = *b; *b = c; - - if (NTYPE(a) == NT_STR) { - StrNode* sn = NSTR(a); - if (sn->capa == 0) { - size_t len = sn->end - sn->s; - sn->s = sn->buf; - sn->end = sn->s + len; - } - } - - if (NTYPE(b) == NT_STR) { - StrNode* sn = NSTR(b); - if (sn->capa == 0) { - size_t len = sn->end - sn->s; - sn->s = sn->buf; - sn->end = sn->s + len; - } - } -} - -static OnigDistance -distance_add(OnigDistance d1, OnigDistance d2) -{ - if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE) - return ONIG_INFINITE_DISTANCE; - else { - if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2; - else return ONIG_INFINITE_DISTANCE; - } -} - -static OnigDistance -distance_multiply(OnigDistance d, int m) -{ - if (m == 0) return 0; - - if (d < ONIG_INFINITE_DISTANCE / m) - return d * m; - else - return ONIG_INFINITE_DISTANCE; -} - -static int -bitset_is_empty(BitSetRef bs) -{ - int i; - for (i = 0; i < (int )BITSET_SIZE; i++) { - if (bs[i] != 0) return 0; - } - return 1; -} - -#ifdef ONIG_DEBUG -static int -bitset_on_num(BitSetRef bs) -{ - int i, n; - - n = 0; - for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - if (BITSET_AT(bs, i)) n++; - } - return n; -} -#endif - -extern int -onig_bbuf_init(BBuf* buf, int size) -{ - if (size <= 0) { - size = 0; - buf->p = NULL; - } - else { - buf->p = (UChar* )xmalloc(size); - if (IS_NULL(buf->p)) return(ONIGERR_MEMORY); - } - - buf->alloc = size; - buf->used = 0; - return 0; -} - - -#ifdef USE_SUBEXP_CALL - -static int -unset_addr_list_init(UnsetAddrList* uslist, int size) -{ - UnsetAddr* p; - - p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size); - CHECK_NULL_RETURN_MEMERR(p); - uslist->num = 0; - uslist->alloc = size; - uslist->us = p; - return 0; -} - -static void -unset_addr_list_end(UnsetAddrList* uslist) -{ - if (IS_NOT_NULL(uslist->us)) - xfree(uslist->us); -} - -static int -unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node) -{ - UnsetAddr* p; - int size; - - if (uslist->num >= uslist->alloc) { - size = uslist->alloc * 2; - p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size); - CHECK_NULL_RETURN_MEMERR(p); - uslist->alloc = size; - uslist->us = p; - } - - uslist->us[uslist->num].offset = offset; - uslist->us[uslist->num].target = node; - uslist->num++; - return 0; -} -#endif /* USE_SUBEXP_CALL */ - - -static int -add_opcode(regex_t* reg, int opcode) -{ - BBUF_ADD1(reg, opcode); - return 0; -} - -#ifdef USE_COMBINATION_EXPLOSION_CHECK -static int -add_state_check_num(regex_t* reg, int num) -{ - StateCheckNumType n = (StateCheckNumType )num; - - BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM); - return 0; -} -#endif - -static int -add_rel_addr(regex_t* reg, int addr) -{ - RelAddrType ra = (RelAddrType )addr; - - BBUF_ADD(reg, &ra, SIZE_RELADDR); - return 0; -} - -static int -add_abs_addr(regex_t* reg, int addr) -{ - AbsAddrType ra = (AbsAddrType )addr; - - BBUF_ADD(reg, &ra, SIZE_ABSADDR); - return 0; -} - -static int -add_length(regex_t* reg, int len) -{ - LengthType l = (LengthType )len; - - BBUF_ADD(reg, &l, SIZE_LENGTH); - return 0; -} - -static int -add_mem_num(regex_t* reg, int num) -{ - MemNumType n = (MemNumType )num; - - BBUF_ADD(reg, &n, SIZE_MEMNUM); - return 0; -} - -static int -add_pointer(regex_t* reg, void* addr) -{ - PointerType ptr = (PointerType )addr; - - BBUF_ADD(reg, &ptr, SIZE_POINTER); - return 0; -} - -static int -add_option(regex_t* reg, OnigOptionType option) -{ - BBUF_ADD(reg, &option, SIZE_OPTION); - return 0; -} - -static int -add_opcode_rel_addr(regex_t* reg, int opcode, int addr) -{ - int r; - - r = add_opcode(reg, opcode); - if (r) return r; - r = add_rel_addr(reg, addr); - return r; -} - -static int -add_bytes(regex_t* reg, UChar* bytes, int len) -{ - BBUF_ADD(reg, bytes, len); - return 0; -} - -static int -add_bitset(regex_t* reg, BitSetRef bs) -{ - BBUF_ADD(reg, bs, SIZE_BITSET); - return 0; -} - -static int -add_opcode_option(regex_t* reg, int opcode, OnigOptionType option) -{ - int r; - - r = add_opcode(reg, opcode); - if (r) return r; - r = add_option(reg, option); - return r; -} - -static int compile_length_tree(Node* node, regex_t* reg); -static int compile_tree(Node* node, regex_t* reg); - - -#define IS_NEED_STR_LEN_OP_EXACT(op) \ - ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\ - (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC) - -static int -select_str_opcode(int mb_len, int str_len, int ignore_case) -{ - int op; - - if (ignore_case) { - switch (str_len) { - case 1: op = OP_EXACT1_IC; break; - default: op = OP_EXACTN_IC; break; - } - } - else { - switch (mb_len) { - case 1: - switch (str_len) { - case 1: op = OP_EXACT1; break; - case 2: op = OP_EXACT2; break; - case 3: op = OP_EXACT3; break; - case 4: op = OP_EXACT4; break; - case 5: op = OP_EXACT5; break; - default: op = OP_EXACTN; break; - } - break; - - case 2: - switch (str_len) { - case 1: op = OP_EXACTMB2N1; break; - case 2: op = OP_EXACTMB2N2; break; - case 3: op = OP_EXACTMB2N3; break; - default: op = OP_EXACTMB2N; break; - } - break; - - case 3: - op = OP_EXACTMB3N; - break; - - default: - op = OP_EXACTMBN; - break; - } - } - return op; -} - -static int -compile_tree_empty_check(Node* node, regex_t* reg, int empty_info) -{ - int r; - int saved_num_null_check = reg->num_null_check; - - if (empty_info != 0) { - r = add_opcode(reg, OP_NULL_CHECK_START); - if (r) return r; - r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */ - if (r) return r; - reg->num_null_check++; - } - - r = compile_tree(node, reg); - if (r) return r; - - if (empty_info != 0) { - if (empty_info == NQ_TARGET_IS_EMPTY) - r = add_opcode(reg, OP_NULL_CHECK_END); - else if (empty_info == NQ_TARGET_IS_EMPTY_MEM) - r = add_opcode(reg, OP_NULL_CHECK_END_MEMST); - else if (empty_info == NQ_TARGET_IS_EMPTY_REC) - r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH); - - if (r) return r; - r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */ - } - return r; -} - -#ifdef USE_SUBEXP_CALL -static int -compile_call(CallNode* node, regex_t* reg) -{ - int r; - - r = add_opcode(reg, OP_CALL); - if (r) return r; - r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg), - node->target); - if (r) return r; - r = add_abs_addr(reg, 0 /*dummy addr.*/); - return r; -} -#endif - -static int -compile_tree_n_times(Node* node, int n, regex_t* reg) -{ - int i, r; - - for (i = 0; i < n; i++) { - r = compile_tree(node, reg); - if (r) return r; - } - return 0; -} - -static int -add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance str_len, - regex_t* reg ARG_UNUSED, int ignore_case) -{ - int len; - int op = select_str_opcode(mb_len, str_len, ignore_case); - - len = SIZE_OPCODE; - - if (op == OP_EXACTMBN) len += SIZE_LENGTH; - if (IS_NEED_STR_LEN_OP_EXACT(op)) - len += SIZE_LENGTH; - - len += mb_len * str_len; - return len; -} - -static int -add_compile_string(UChar* s, int mb_len, int str_len, - regex_t* reg, int ignore_case) -{ - int op = select_str_opcode(mb_len, str_len, ignore_case); - add_opcode(reg, op); - - if (op == OP_EXACTMBN) - add_length(reg, mb_len); - - if (IS_NEED_STR_LEN_OP_EXACT(op)) { - if (op == OP_EXACTN_IC) - add_length(reg, mb_len * str_len); - else - add_length(reg, str_len); - } - - add_bytes(reg, s, mb_len * str_len); - return 0; -} - - -static int -compile_length_string_node(Node* node, regex_t* reg) -{ - int rlen, r, len, prev_len, slen, ambig; - OnigEncoding enc = reg->enc; - UChar *p, *prev; - StrNode* sn; - - sn = NSTR(node); - if (sn->end <= sn->s) - return 0; - - ambig = NSTRING_IS_AMBIG(node); - - p = prev = sn->s; - prev_len = enclen(enc, p, sn->end); - p += prev_len; - slen = 1; - rlen = 0; - - for (; p < sn->end; ) { - len = enclen(enc, p, sn->end); - if (len == prev_len) { - slen++; - } - else { - r = add_compile_string_length(prev, prev_len, slen, reg, ambig); - rlen += r; - prev = p; - slen = 1; - prev_len = len; - } - p += len; - } - r = add_compile_string_length(prev, prev_len, slen, reg, ambig); - rlen += r; - return rlen; -} - -static int -compile_length_string_raw_node(StrNode* sn, regex_t* reg) -{ - if (sn->end <= sn->s) - return 0; - - return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0); -} - -static int -compile_string_node(Node* node, regex_t* reg) -{ - int r, len, prev_len, slen, ambig; - OnigEncoding enc = reg->enc; - UChar *p, *prev, *end; - StrNode* sn; - - sn = NSTR(node); - if (sn->end <= sn->s) - return 0; - - end = sn->end; - ambig = NSTRING_IS_AMBIG(node); - - p = prev = sn->s; - prev_len = enclen(enc, p, end); - p += prev_len; - slen = 1; - - for (; p < end; ) { - len = enclen(enc, p, end); - if (len == prev_len) { - slen++; - } - else { - r = add_compile_string(prev, prev_len, slen, reg, ambig); - if (r) return r; - - prev = p; - slen = 1; - prev_len = len; - } - - p += len; - } - return add_compile_string(prev, prev_len, slen, reg, ambig); -} - -static int -compile_string_raw_node(StrNode* sn, regex_t* reg) -{ - if (sn->end <= sn->s) - return 0; - - return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0); -} - -static int -add_multi_byte_cclass(BBuf* mbuf, regex_t* reg) -{ -#ifdef PLATFORM_UNALIGNED_WORD_ACCESS - add_length(reg, mbuf->used); - return add_bytes(reg, mbuf->p, mbuf->used); -#else - int r, pad_size; - UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH; - - GET_ALIGNMENT_PAD_SIZE(p, pad_size); - add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1)); - if (pad_size != 0) add_bytes(reg, PadBuf, pad_size); - - r = add_bytes(reg, mbuf->p, mbuf->used); - - /* padding for return value from compile_length_cclass_node() to be fix. */ - pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size; - if (pad_size != 0) add_bytes(reg, PadBuf, pad_size); - return r; -#endif -} - -static int -compile_length_cclass_node(CClassNode* cc, regex_t* reg) -{ - int len; - - if (IS_NCCLASS_SHARE(cc)) { - len = SIZE_OPCODE + SIZE_POINTER; - return len; - } - - if (IS_NULL(cc->mbuf)) { - len = SIZE_OPCODE + SIZE_BITSET; - } - else { - if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) { - len = SIZE_OPCODE; - } - else { - len = SIZE_OPCODE + SIZE_BITSET; - } -#ifdef PLATFORM_UNALIGNED_WORD_ACCESS - len += SIZE_LENGTH + cc->mbuf->used; -#else - len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1); -#endif - } - - return len; -} - -static int -compile_cclass_node(CClassNode* cc, regex_t* reg) -{ - int r; - - if (IS_NCCLASS_SHARE(cc)) { - add_opcode(reg, OP_CCLASS_NODE); - r = add_pointer(reg, cc); - return r; - } - - if (IS_NULL(cc->mbuf)) { - if (IS_NCCLASS_NOT(cc)) - add_opcode(reg, OP_CCLASS_NOT); - else - add_opcode(reg, OP_CCLASS); - - r = add_bitset(reg, cc->bs); - } - else { - if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) { - if (IS_NCCLASS_NOT(cc)) - add_opcode(reg, OP_CCLASS_MB_NOT); - else - add_opcode(reg, OP_CCLASS_MB); - - r = add_multi_byte_cclass(cc->mbuf, reg); - } - else { - if (IS_NCCLASS_NOT(cc)) - add_opcode(reg, OP_CCLASS_MIX_NOT); - else - add_opcode(reg, OP_CCLASS_MIX); - - r = add_bitset(reg, cc->bs); - if (r) return r; - r = add_multi_byte_cclass(cc->mbuf, reg); - } - } - - return r; -} - -static int -entry_repeat_range(regex_t* reg, int id, int lower, int upper) -{ -#define REPEAT_RANGE_ALLOC 4 - - OnigRepeatRange* p; - - if (reg->repeat_range_alloc == 0) { - p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC); - CHECK_NULL_RETURN_MEMERR(p); - reg->repeat_range = p; - reg->repeat_range_alloc = REPEAT_RANGE_ALLOC; - } - else if (reg->repeat_range_alloc <= id) { - int n; - n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC; - p = (OnigRepeatRange* )xrealloc(reg->repeat_range, - sizeof(OnigRepeatRange) * n); - CHECK_NULL_RETURN_MEMERR(p); - reg->repeat_range = p; - reg->repeat_range_alloc = n; - } - else { - p = reg->repeat_range; - } - - p[id].lower = lower; - p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper); - return 0; -} - -static int -compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info, - regex_t* reg) -{ - int r; - int num_repeat = reg->num_repeat; - - r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG); - if (r) return r; - r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */ - reg->num_repeat++; - if (r) return r; - r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC); - if (r) return r; - - r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper); - if (r) return r; - - r = compile_tree_empty_check(qn->target, reg, empty_info); - if (r) return r; - - if ( -#ifdef USE_SUBEXP_CALL - reg->num_call > 0 || -#endif - IS_QUANTIFIER_IN_REPEAT(qn)) { - r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG); - } - else { - r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG); - } - if (r) return r; - r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */ - return r; -} - -static int -is_anychar_star_quantifier(QtfrNode* qn) -{ - if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) && - NTYPE(qn->target) == NT_CANY) - return 1; - else - return 0; -} - -#define QUANTIFIER_EXPAND_LIMIT_SIZE 50 -#define CKN_ON (ckn > 0) - -#ifdef USE_COMBINATION_EXPLOSION_CHECK - -static int -compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) -{ - int len, mod_tlen, cklen; - int ckn; - int infinite = IS_REPEAT_INFINITE(qn->upper); - int empty_info = qn->target_empty_info; - int tlen = compile_length_tree(qn->target, reg); - - if (tlen < 0) return tlen; - - ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0); - - cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0); - - /* anychar repeat */ - if (NTYPE(qn->target) == NT_CANY) { - if (qn->greedy && infinite) { - if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) - return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen; - else - return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen; - } - } - - if (empty_info != 0) - mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); - else - mod_tlen = tlen; - - if (infinite && qn->lower <= 1) { - if (qn->greedy) { - if (qn->lower == 1) - len = SIZE_OP_JUMP; - else - len = 0; - - len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP; - } - else { - if (qn->lower == 0) - len = SIZE_OP_JUMP; - else - len = 0; - - len += mod_tlen + SIZE_OP_PUSH + cklen; - } - } - else if (qn->upper == 0) { - if (qn->is_refered != 0) /* /(?<n>..){0}/ */ - len = SIZE_OP_JUMP + tlen; - else - len = 0; - } - else if (qn->upper == 1 && qn->greedy) { - if (qn->lower == 0) { - if (CKN_ON) { - len = SIZE_OP_STATE_CHECK_PUSH + tlen; - } - else { - len = SIZE_OP_PUSH + tlen; - } - } - else { - len = tlen; - } - } - else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ - len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen; - } - else { - len = SIZE_OP_REPEAT_INC - + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM; - if (CKN_ON) - len += SIZE_OP_STATE_CHECK; - } - - return len; -} - -static int -compile_quantifier_node(QtfrNode* qn, regex_t* reg) -{ - int r, mod_tlen; - int ckn; - int infinite = IS_REPEAT_INFINITE(qn->upper); - int empty_info = qn->target_empty_info; - int tlen = compile_length_tree(qn->target, reg); - - if (tlen < 0) return tlen; - - ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0); - - if (is_anychar_star_quantifier(qn)) { - r = compile_tree_n_times(qn->target, qn->lower, reg); - if (r) return r; - if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) { - if (IS_MULTILINE(reg->options)) - r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); - else - r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); - if (r) return r; - if (CKN_ON) { - r = add_state_check_num(reg, ckn); - if (r) return r; - } - - return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); - } - else { - if (IS_MULTILINE(reg->options)) { - r = add_opcode(reg, (CKN_ON ? - OP_STATE_CHECK_ANYCHAR_ML_STAR - : OP_ANYCHAR_ML_STAR)); - } - else { - r = add_opcode(reg, (CKN_ON ? - OP_STATE_CHECK_ANYCHAR_STAR - : OP_ANYCHAR_STAR)); - } - if (r) return r; - if (CKN_ON) - r = add_state_check_num(reg, ckn); - - return r; - } - } - - if (empty_info != 0) - mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); - else - mod_tlen = tlen; - - if (infinite && qn->lower <= 1) { - if (qn->greedy) { - if (qn->lower == 1) { - r = add_opcode_rel_addr(reg, OP_JUMP, - (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)); - if (r) return r; - } - - if (CKN_ON) { - r = add_opcode(reg, OP_STATE_CHECK_PUSH); - if (r) return r; - r = add_state_check_num(reg, ckn); - if (r) return r; - r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP); - } - else { - r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP); - } - if (r) return r; - r = compile_tree_empty_check(qn->target, reg, empty_info); - if (r) return r; - r = add_opcode_rel_addr(reg, OP_JUMP, - -(mod_tlen + (int )SIZE_OP_JUMP - + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH))); - } - else { - if (qn->lower == 0) { - r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen); - if (r) return r; - } - r = compile_tree_empty_check(qn->target, reg, empty_info); - if (r) return r; - if (CKN_ON) { - r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP); - if (r) return r; - r = add_state_check_num(reg, ckn); - if (r) return r; - r = add_rel_addr(reg, - -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP)); - } - else - r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH)); - } - } - else if (qn->upper == 0) { - if (qn->is_refered != 0) { /* /(?<n>..){0}/ */ - r = add_opcode_rel_addr(reg, OP_JUMP, tlen); - if (r) return r; - r = compile_tree(qn->target, reg); - } - else - r = 0; - } - else if (qn->upper == 1 && qn->greedy) { - if (qn->lower == 0) { - if (CKN_ON) { - r = add_opcode(reg, OP_STATE_CHECK_PUSH); - if (r) return r; - r = add_state_check_num(reg, ckn); - if (r) return r; - r = add_rel_addr(reg, tlen); - } - else { - r = add_opcode_rel_addr(reg, OP_PUSH, tlen); - } - if (r) return r; - } - - r = compile_tree(qn->target, reg); - } - else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ - if (CKN_ON) { - r = add_opcode(reg, OP_STATE_CHECK_PUSH); - if (r) return r; - r = add_state_check_num(reg, ckn); - if (r) return r; - r = add_rel_addr(reg, SIZE_OP_JUMP); - } - else { - r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP); - } - - if (r) return r; - r = add_opcode_rel_addr(reg, OP_JUMP, tlen); - if (r) return r; - r = compile_tree(qn->target, reg); - } - else { - r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg); - if (CKN_ON) { - if (r) return r; - r = add_opcode(reg, OP_STATE_CHECK); - if (r) return r; - r = add_state_check_num(reg, ckn); - } - } - return r; -} - -#else /* USE_COMBINATION_EXPLOSION_CHECK */ - -static int -compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) -{ - int len, mod_tlen; - int infinite = IS_REPEAT_INFINITE(qn->upper); - int empty_info = qn->target_empty_info; - int tlen = compile_length_tree(qn->target, reg); - - if (tlen < 0) return tlen; - - /* anychar repeat */ - if (NTYPE(qn->target) == NT_CANY) { - if (qn->greedy && infinite) { - if (IS_NOT_NULL(qn->next_head_exact)) - return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower; - else - return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower; - } - } - - if (empty_info != 0) - mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); - else - mod_tlen = tlen; - - if (infinite && - (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { - if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { - len = SIZE_OP_JUMP; - } - else { - len = tlen * qn->lower; - } - - if (qn->greedy) { - if (IS_NOT_NULL(qn->head_exact)) - len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP; - else if (IS_NOT_NULL(qn->next_head_exact)) - len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP; - else - len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP; - } - else - len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH; - } - else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */ - len = SIZE_OP_JUMP + tlen; - } - else if (!infinite && qn->greedy && - (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper - <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { - len = tlen * qn->lower; - len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower); - } - else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ - len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen; - } - else { - len = SIZE_OP_REPEAT_INC - + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM; - } - - return len; -} - -static int -compile_quantifier_node(QtfrNode* qn, regex_t* reg) -{ - int i, r, mod_tlen; - int infinite = IS_REPEAT_INFINITE(qn->upper); - int empty_info = qn->target_empty_info; - int tlen = compile_length_tree(qn->target, reg); - - if (tlen < 0) return tlen; - - if (is_anychar_star_quantifier(qn)) { - r = compile_tree_n_times(qn->target, qn->lower, reg); - if (r) return r; - if (IS_NOT_NULL(qn->next_head_exact)) { - if (IS_MULTILINE(reg->options)) - r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); - else - r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); - if (r) return r; - return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); - } - else { - if (IS_MULTILINE(reg->options)) - return add_opcode(reg, OP_ANYCHAR_ML_STAR); - else - return add_opcode(reg, OP_ANYCHAR_STAR); - } - } - - if (empty_info != 0) - mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); - else - mod_tlen = tlen; - - if (infinite && - (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { - if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { - if (qn->greedy) { - if (IS_NOT_NULL(qn->head_exact)) - r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1); - else if (IS_NOT_NULL(qn->next_head_exact)) - r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT); - else - r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH); - } - else { - r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP); - } - if (r) return r; - } - else { - r = compile_tree_n_times(qn->target, qn->lower, reg); - if (r) return r; - } - - if (qn->greedy) { - if (IS_NOT_NULL(qn->head_exact)) { - r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1, - mod_tlen + SIZE_OP_JUMP); - if (r) return r; - add_bytes(reg, NSTR(qn->head_exact)->s, 1); - r = compile_tree_empty_check(qn->target, reg, empty_info); - if (r) return r; - r = add_opcode_rel_addr(reg, OP_JUMP, - -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1)); - } - else if (IS_NOT_NULL(qn->next_head_exact)) { - r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT, - mod_tlen + SIZE_OP_JUMP); - if (r) return r; - add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); - r = compile_tree_empty_check(qn->target, reg, empty_info); - if (r) return r; - r = add_opcode_rel_addr(reg, OP_JUMP, - -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT)); - } - else { - r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP); - if (r) return r; - r = compile_tree_empty_check(qn->target, reg, empty_info); - if (r) return r; - r = add_opcode_rel_addr(reg, OP_JUMP, - -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH)); - } - } - else { - r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen); - if (r) return r; - r = compile_tree_empty_check(qn->target, reg, empty_info); - if (r) return r; - r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH)); - } - } - else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */ - r = add_opcode_rel_addr(reg, OP_JUMP, tlen); - if (r) return r; - r = compile_tree(qn->target, reg); - } - else if (!infinite && qn->greedy && - (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper - <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { - int n = qn->upper - qn->lower; - - r = compile_tree_n_times(qn->target, qn->lower, reg); - if (r) return r; - - for (i = 0; i < n; i++) { - r = add_opcode_rel_addr(reg, OP_PUSH, - (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH); - if (r) return r; - r = compile_tree(qn->target, reg); - if (r) return r; - } - } - else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ - r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP); - if (r) return r; - r = add_opcode_rel_addr(reg, OP_JUMP, tlen); - if (r) return r; - r = compile_tree(qn->target, reg); - } - else { - r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg); - } - return r; -} -#endif /* USE_COMBINATION_EXPLOSION_CHECK */ - -static int -compile_length_option_node(EncloseNode* node, regex_t* reg) -{ - int tlen; - OnigOptionType prev = reg->options; - - reg->options = node->option; - tlen = compile_length_tree(node->target, reg); - reg->options = prev; - - if (tlen < 0) return tlen; - - if (IS_DYNAMIC_OPTION(prev ^ node->option)) { - return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL - + tlen + SIZE_OP_SET_OPTION; - } - else - return tlen; -} - -static int -compile_option_node(EncloseNode* node, regex_t* reg) -{ - int r; - OnigOptionType prev = reg->options; - - if (IS_DYNAMIC_OPTION(prev ^ node->option)) { - r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option); - if (r) return r; - r = add_opcode_option(reg, OP_SET_OPTION, prev); - if (r) return r; - r = add_opcode(reg, OP_FAIL); - if (r) return r; - } - - reg->options = node->option; - r = compile_tree(node->target, reg); - reg->options = prev; - - if (IS_DYNAMIC_OPTION(prev ^ node->option)) { - if (r) return r; - r = add_opcode_option(reg, OP_SET_OPTION, prev); - } - return r; -} - -static int -compile_length_enclose_node(EncloseNode* node, regex_t* reg) -{ - int len; - int tlen; - - if (node->type == ENCLOSE_OPTION) - return compile_length_option_node(node, reg); - - if (node->target) { - tlen = compile_length_tree(node->target, reg); - if (tlen < 0) return tlen; - } - else - tlen = 0; - - switch (node->type) { - case ENCLOSE_MEMORY: -#ifdef USE_SUBEXP_CALL - if (IS_ENCLOSE_CALLED(node)) { - len = SIZE_OP_MEMORY_START_PUSH + tlen - + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN; - if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) - len += (IS_ENCLOSE_RECURSION(node) - ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); - else - len += (IS_ENCLOSE_RECURSION(node) - ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); - } - else -#endif - { - if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum)) - len = SIZE_OP_MEMORY_START_PUSH; - else - len = SIZE_OP_MEMORY_START; - - len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum) - ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END); - } - break; - - case ENCLOSE_STOP_BACKTRACK: - if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) { - QtfrNode* qn = NQTFR(node->target); - tlen = compile_length_tree(qn->target, reg); - if (tlen < 0) return tlen; - - len = tlen * qn->lower - + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP; - } - else { - len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT; - } - break; - - default: - return ONIGERR_TYPE_BUG; - break; - } - - return len; -} - -static int get_char_length_tree(Node* node, regex_t* reg, int* len); - -static int -compile_enclose_node(EncloseNode* node, regex_t* reg) -{ - int r, len; - - if (node->type == ENCLOSE_OPTION) - return compile_option_node(node, reg); - - switch (node->type) { - case ENCLOSE_MEMORY: -#ifdef USE_SUBEXP_CALL - if (IS_ENCLOSE_CALLED(node)) { - r = add_opcode(reg, OP_CALL); - if (r) return r; - node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP; - node->state |= NST_ADDR_FIXED; - r = add_abs_addr(reg, (int )node->call_addr); - if (r) return r; - len = compile_length_tree(node->target, reg); - len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN); - if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) - len += (IS_ENCLOSE_RECURSION(node) - ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); - else - len += (IS_ENCLOSE_RECURSION(node) - ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); - - r = add_opcode_rel_addr(reg, OP_JUMP, len); - if (r) return r; - } -#endif - if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum)) - r = add_opcode(reg, OP_MEMORY_START_PUSH); - else - r = add_opcode(reg, OP_MEMORY_START); - if (r) return r; - r = add_mem_num(reg, node->regnum); - if (r) return r; - r = compile_tree(node->target, reg); - if (r) return r; -#ifdef USE_SUBEXP_CALL - if (IS_ENCLOSE_CALLED(node)) { - if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) - r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node) - ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH)); - else - r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node) - ? OP_MEMORY_END_REC : OP_MEMORY_END)); - - if (r) return r; - r = add_mem_num(reg, node->regnum); - if (r) return r; - r = add_opcode(reg, OP_RETURN); - } - else -#endif - { - if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) - r = add_opcode(reg, OP_MEMORY_END_PUSH); - else - r = add_opcode(reg, OP_MEMORY_END); - if (r) return r; - r = add_mem_num(reg, node->regnum); - } - break; - - case ENCLOSE_STOP_BACKTRACK: - if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) { - QtfrNode* qn = NQTFR(node->target); - r = compile_tree_n_times(qn->target, qn->lower, reg); - if (r) return r; - - len = compile_length_tree(qn->target, reg); - if (len < 0) return len; - - r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP); - if (r) return r; - r = compile_tree(qn->target, reg); - if (r) return r; - r = add_opcode(reg, OP_POP); - if (r) return r; - r = add_opcode_rel_addr(reg, OP_JUMP, - -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP)); - } - else { - r = add_opcode(reg, OP_PUSH_STOP_BT); - if (r) return r; - r = compile_tree(node->target, reg); - if (r) return r; - r = add_opcode(reg, OP_POP_STOP_BT); - } - break; - - default: - return ONIGERR_TYPE_BUG; - break; - } - - return r; -} - -static int -compile_length_anchor_node(AnchorNode* node, regex_t* reg) -{ - int len; - int tlen = 0; - - if (node->target) { - tlen = compile_length_tree(node->target, reg); - if (tlen < 0) return tlen; - } - - switch (node->type) { - case ANCHOR_PREC_READ: - len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS; - break; - case ANCHOR_PREC_READ_NOT: - len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS; - break; - case ANCHOR_LOOK_BEHIND: - len = SIZE_OP_LOOK_BEHIND + tlen; - break; - case ANCHOR_LOOK_BEHIND_NOT: - len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT; - break; - - default: - len = SIZE_OPCODE; - break; - } - - return len; -} - -static int -compile_anchor_node(AnchorNode* node, regex_t* reg) -{ - int r, len; - - switch (node->type) { - case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break; - case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break; - case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break; - case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break; - case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break; - case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break; - - case ANCHOR_WORD_BOUND: r = add_opcode(reg, OP_WORD_BOUND); break; - case ANCHOR_NOT_WORD_BOUND: r = add_opcode(reg, OP_NOT_WORD_BOUND); break; -#ifdef USE_WORD_BEGIN_END - case ANCHOR_WORD_BEGIN: r = add_opcode(reg, OP_WORD_BEGIN); break; - case ANCHOR_WORD_END: r = add_opcode(reg, OP_WORD_END); break; -#endif - - case ANCHOR_PREC_READ: - r = add_opcode(reg, OP_PUSH_POS); - if (r) return r; - r = compile_tree(node->target, reg); - if (r) return r; - r = add_opcode(reg, OP_POP_POS); - break; - - case ANCHOR_PREC_READ_NOT: - len = compile_length_tree(node->target, reg); - if (len < 0) return len; - r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS); - if (r) return r; - r = compile_tree(node->target, reg); - if (r) return r; - r = add_opcode(reg, OP_FAIL_POS); - break; - - case ANCHOR_LOOK_BEHIND: - { - int n; - r = add_opcode(reg, OP_LOOK_BEHIND); - if (r) return r; - if (node->char_len < 0) { - r = get_char_length_tree(node->target, reg, &n); - if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; - } - else - n = node->char_len; - r = add_length(reg, n); - if (r) return r; - r = compile_tree(node->target, reg); - } - break; - - case ANCHOR_LOOK_BEHIND_NOT: - { - int n; - len = compile_length_tree(node->target, reg); - r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT, - len + SIZE_OP_FAIL_LOOK_BEHIND_NOT); - if (r) return r; - if (node->char_len < 0) { - r = get_char_length_tree(node->target, reg, &n); - if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; - } - else - n = node->char_len; - r = add_length(reg, n); - if (r) return r; - r = compile_tree(node->target, reg); - if (r) return r; - r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT); - } - break; - - default: - return ONIGERR_TYPE_BUG; - break; - } - - return r; -} - -static int -compile_length_tree(Node* node, regex_t* reg) -{ - int len, type, r; - - type = NTYPE(node); - switch (type) { - case NT_LIST: - len = 0; - do { - r = compile_length_tree(NCAR(node), reg); - if (r < 0) return r; - len += r; - } while (IS_NOT_NULL(node = NCDR(node))); - r = len; - break; - - case NT_ALT: - { - int n; - - n = r = 0; - do { - r += compile_length_tree(NCAR(node), reg); - n++; - } while (IS_NOT_NULL(node = NCDR(node))); - r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1); - } - break; - - case NT_STR: - if (NSTRING_IS_RAW(node)) - r = compile_length_string_raw_node(NSTR(node), reg); - else - r = compile_length_string_node(node, reg); - break; - - case NT_CCLASS: - r = compile_length_cclass_node(NCCLASS(node), reg); - break; - - case NT_CTYPE: - case NT_CANY: - r = SIZE_OPCODE; - break; - - case NT_BREF: - { - BRefNode* br = NBREF(node); - -#ifdef USE_BACKREF_WITH_LEVEL - if (IS_BACKREF_NEST_LEVEL(br)) { - r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH + - SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); - } - else -#endif - if (br->back_num == 1) { - r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2) - ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM)); - } - else { - r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); - } - } - break; - -#ifdef USE_SUBEXP_CALL - case NT_CALL: - r = SIZE_OP_CALL; - break; -#endif - - case NT_QTFR: - r = compile_length_quantifier_node(NQTFR(node), reg); - break; - - case NT_ENCLOSE: - r = compile_length_enclose_node(NENCLOSE(node), reg); - break; - - case NT_ANCHOR: - r = compile_length_anchor_node(NANCHOR(node), reg); - break; - - default: - return ONIGERR_TYPE_BUG; - break; - } - - return r; -} - -static int -compile_tree(Node* node, regex_t* reg) -{ - int n, type, len, pos, r = 0; - - type = NTYPE(node); - switch (type) { - case NT_LIST: - do { - r = compile_tree(NCAR(node), reg); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - - case NT_ALT: - { - Node* x = node; - len = 0; - do { - len += compile_length_tree(NCAR(x), reg); - if (NCDR(x) != NULL) { - len += SIZE_OP_PUSH + SIZE_OP_JUMP; - } - } while (IS_NOT_NULL(x = NCDR(x))); - pos = reg->used + len; /* goal position */ - - do { - len = compile_length_tree(NCAR(node), reg); - if (IS_NOT_NULL(NCDR(node))) { - r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP); - if (r) break; - } - r = compile_tree(NCAR(node), reg); - if (r) break; - if (IS_NOT_NULL(NCDR(node))) { - len = pos - (reg->used + SIZE_OP_JUMP); - r = add_opcode_rel_addr(reg, OP_JUMP, len); - if (r) break; - } - } while (IS_NOT_NULL(node = NCDR(node))); - } - break; - - case NT_STR: - if (NSTRING_IS_RAW(node)) - r = compile_string_raw_node(NSTR(node), reg); - else - r = compile_string_node(node, reg); - break; - - case NT_CCLASS: - r = compile_cclass_node(NCCLASS(node), reg); - break; - - case NT_CTYPE: - { - int op; - - switch (NCTYPE(node)->ctype) { - case ONIGENC_CTYPE_WORD: - if (NCTYPE(node)->is_not != 0) op = OP_NOT_WORD; - else op = OP_WORD; - break; - default: - return ONIGERR_TYPE_BUG; - break; - } - r = add_opcode(reg, op); - } - break; - - case NT_CANY: - if (IS_MULTILINE(reg->options)) - r = add_opcode(reg, OP_ANYCHAR_ML); - else - r = add_opcode(reg, OP_ANYCHAR); - break; - - case NT_BREF: - { - BRefNode* br = NBREF(node); - -#ifdef USE_BACKREF_WITH_LEVEL - if (IS_BACKREF_NEST_LEVEL(br)) { - r = add_opcode(reg, OP_BACKREF_WITH_LEVEL); - if (r) return r; - r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE)); - if (r) return r; - r = add_length(reg, br->nest_level); - if (r) return r; - - goto add_bacref_mems; - } - else -#endif - if (br->back_num == 1) { - n = br->back_static[0]; - if (IS_IGNORECASE(reg->options)) { - r = add_opcode(reg, OP_BACKREFN_IC); - if (r) return r; - r = add_mem_num(reg, n); - } - else { - switch (n) { - case 1: r = add_opcode(reg, OP_BACKREF1); break; - case 2: r = add_opcode(reg, OP_BACKREF2); break; - default: - r = add_opcode(reg, OP_BACKREFN); - if (r) return r; - r = add_mem_num(reg, n); - break; - } - } - } - else { - int i; - int* p; - - if (IS_IGNORECASE(reg->options)) { - r = add_opcode(reg, OP_BACKREF_MULTI_IC); - } - else { - r = add_opcode(reg, OP_BACKREF_MULTI); - } - if (r) return r; - -#ifdef USE_BACKREF_WITH_LEVEL - add_bacref_mems: -#endif - r = add_length(reg, br->back_num); - if (r) return r; - p = BACKREFS_P(br); - for (i = br->back_num - 1; i >= 0; i--) { - r = add_mem_num(reg, p[i]); - if (r) return r; - } - } - } - break; - -#ifdef USE_SUBEXP_CALL - case NT_CALL: - r = compile_call(NCALL(node), reg); - break; -#endif - - case NT_QTFR: - r = compile_quantifier_node(NQTFR(node), reg); - break; - - case NT_ENCLOSE: - r = compile_enclose_node(NENCLOSE(node), reg); - break; - - case NT_ANCHOR: - r = compile_anchor_node(NANCHOR(node), reg); - break; - - default: -#ifdef ONIG_DEBUG - fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node)); -#endif - break; - } - - return r; -} - -#ifdef USE_NAMED_GROUP - -static int -noname_disable_map(Node** plink, GroupNumRemap* map, int* counter) -{ - int r = 0; - Node* node = *plink; - - switch (NTYPE(node)) { - case NT_LIST: - case NT_ALT: - do { - r = noname_disable_map(&(NCAR(node)), map, counter); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - - case NT_QTFR: - { - Node** ptarget = &(NQTFR(node)->target); - Node* old = *ptarget; - r = noname_disable_map(ptarget, map, counter); - if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) { - onig_reduce_nested_quantifier(node, *ptarget); - } - } - break; - - case NT_ENCLOSE: - { - EncloseNode* en = NENCLOSE(node); - if (en->type == ENCLOSE_MEMORY) { - if (IS_ENCLOSE_NAMED_GROUP(en)) { - (*counter)++; - map[en->regnum].new_val = *counter; - en->regnum = *counter; - r = noname_disable_map(&(en->target), map, counter); - } - else { - *plink = en->target; - en->target = NULL_NODE; - onig_node_free(node); - r = noname_disable_map(plink, map, counter); - } - } - else - r = noname_disable_map(&(en->target), map, counter); - } - break; - - case NT_ANCHOR: - { - AnchorNode* an = NANCHOR(node); - switch (an->type) { - case ANCHOR_PREC_READ: - case ANCHOR_PREC_READ_NOT: - case ANCHOR_LOOK_BEHIND: - case ANCHOR_LOOK_BEHIND_NOT: - r = noname_disable_map(&(an->target), map, counter); - break; - } - } - break; - - default: - break; - } - - return r; -} - -static int -renumber_node_backref(Node* node, GroupNumRemap* map) -{ - int i, pos, n, old_num; - int *backs; - BRefNode* bn = NBREF(node); - - if (! IS_BACKREF_NAME_REF(bn)) - return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; - - old_num = bn->back_num; - if (IS_NULL(bn->back_dynamic)) - backs = bn->back_static; - else - backs = bn->back_dynamic; - - for (i = 0, pos = 0; i < old_num; i++) { - n = map[backs[i]].new_val; - if (n > 0) { - backs[pos] = n; - pos++; - } - } - - bn->back_num = pos; - return 0; -} - -static int -renumber_by_map(Node* node, GroupNumRemap* map) -{ - int r = 0; - - switch (NTYPE(node)) { - case NT_LIST: - case NT_ALT: - do { - r = renumber_by_map(NCAR(node), map); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - case NT_QTFR: - r = renumber_by_map(NQTFR(node)->target, map); - break; - case NT_ENCLOSE: - r = renumber_by_map(NENCLOSE(node)->target, map); - break; - - case NT_BREF: - r = renumber_node_backref(node, map); - break; - - case NT_ANCHOR: - { - AnchorNode* an = NANCHOR(node); - switch (an->type) { - case ANCHOR_PREC_READ: - case ANCHOR_PREC_READ_NOT: - case ANCHOR_LOOK_BEHIND: - case ANCHOR_LOOK_BEHIND_NOT: - r = renumber_by_map(an->target, map); - break; - } - } - break; - - default: - break; - } - - return r; -} - -static int -numbered_ref_check(Node* node) -{ - int r = 0; - - switch (NTYPE(node)) { - case NT_LIST: - case NT_ALT: - do { - r = numbered_ref_check(NCAR(node)); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - case NT_QTFR: - r = numbered_ref_check(NQTFR(node)->target); - break; - case NT_ENCLOSE: - r = numbered_ref_check(NENCLOSE(node)->target); - break; - - case NT_BREF: - if (! IS_BACKREF_NAME_REF(NBREF(node))) - return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; - break; - - default: - break; - } - - return r; -} - -static int -disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env) -{ - int r, i, pos, counter; - BitStatusType loc; - GroupNumRemap* map; - - map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1)); - CHECK_NULL_RETURN_MEMERR(map); - for (i = 1; i <= env->num_mem; i++) { - map[i].new_val = 0; - } - counter = 0; - r = noname_disable_map(root, map, &counter); - if (r != 0) return r; - - r = renumber_by_map(*root, map); - if (r != 0) return r; - - for (i = 1, pos = 1; i <= env->num_mem; i++) { - if (map[i].new_val > 0) { - SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i]; - pos++; - } - } - - loc = env->capture_history; - BIT_STATUS_CLEAR(env->capture_history); - for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { - if (BIT_STATUS_AT(loc, i)) { - BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val); - } - } - - env->num_mem = env->num_named; - reg->num_mem = env->num_named; - - return onig_renumber_name_table(reg, map); -} -#endif /* USE_NAMED_GROUP */ - -#ifdef USE_SUBEXP_CALL -static int -unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg) -{ - int i, offset; - EncloseNode* en; - AbsAddrType addr; - - for (i = 0; i < uslist->num; i++) { - en = NENCLOSE(uslist->us[i].target); - if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG; - addr = en->call_addr; - offset = uslist->us[i].offset; - - BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR); - } - return 0; -} -#endif - -#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT -static int -quantifiers_memory_node_info(Node* node) -{ - int r = 0; - - switch (NTYPE(node)) { - case NT_LIST: - case NT_ALT: - { - int v; - do { - v = quantifiers_memory_node_info(NCAR(node)); - if (v > r) r = v; - } while (v >= 0 && IS_NOT_NULL(node = NCDR(node))); - } - break; - -#ifdef USE_SUBEXP_CALL - case NT_CALL: - if (IS_CALL_RECURSION(NCALL(node))) { - return NQ_TARGET_IS_EMPTY_REC; /* tiny version */ - } - else - r = quantifiers_memory_node_info(NCALL(node)->target); - break; -#endif - - case NT_QTFR: - { - QtfrNode* qn = NQTFR(node); - if (qn->upper != 0) { - r = quantifiers_memory_node_info(qn->target); - } - } - break; - - case NT_ENCLOSE: - { - EncloseNode* en = NENCLOSE(node); - switch (en->type) { - case ENCLOSE_MEMORY: - return NQ_TARGET_IS_EMPTY_MEM; - break; - - case ENCLOSE_OPTION: - case ENCLOSE_STOP_BACKTRACK: - r = quantifiers_memory_node_info(en->target); - break; - default: - break; - } - } - break; - - case NT_BREF: - case NT_STR: - case NT_CTYPE: - case NT_CCLASS: - case NT_CANY: - case NT_ANCHOR: - default: - break; - } - - return r; -} -#endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */ - -static int -get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env) -{ - OnigDistance tmin; - int r = 0; - - *min = 0; - switch (NTYPE(node)) { - case NT_BREF: - { - int i; - int* backs; - Node** nodes = SCANENV_MEM_NODES(env); - BRefNode* br = NBREF(node); - if (br->state & NST_RECURSION) break; - - backs = BACKREFS_P(br); - if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF; - r = get_min_match_length(nodes[backs[0]], min, env); - if (r != 0) break; - for (i = 1; i < br->back_num; i++) { - if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; - r = get_min_match_length(nodes[backs[i]], &tmin, env); - if (r != 0) break; - if (*min > tmin) *min = tmin; - } - } - break; - -#ifdef USE_SUBEXP_CALL - case NT_CALL: - if (IS_CALL_RECURSION(NCALL(node))) { - EncloseNode* en = NENCLOSE(NCALL(node)->target); - if (IS_ENCLOSE_MIN_FIXED(en)) - *min = en->min_len; - } - else - r = get_min_match_length(NCALL(node)->target, min, env); - break; -#endif - - case NT_LIST: - do { - r = get_min_match_length(NCAR(node), &tmin, env); - if (r == 0) *min += tmin; - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - - case NT_ALT: - { - Node *x, *y; - y = node; - do { - x = NCAR(y); - r = get_min_match_length(x, &tmin, env); - if (r != 0) break; - if (y == node) *min = tmin; - else if (*min > tmin) *min = tmin; - } while (r == 0 && IS_NOT_NULL(y = NCDR(y))); - } - break; - - case NT_STR: - { - StrNode* sn = NSTR(node); - *min = sn->end - sn->s; - } - break; - - case NT_CTYPE: - *min = 1; - break; - - case NT_CCLASS: - case NT_CANY: - *min = 1; - break; - - case NT_QTFR: - { - QtfrNode* qn = NQTFR(node); - - if (qn->lower > 0) { - r = get_min_match_length(qn->target, min, env); - if (r == 0) - *min = distance_multiply(*min, qn->lower); - } - } - break; - - case NT_ENCLOSE: - { - EncloseNode* en = NENCLOSE(node); - switch (en->type) { - case ENCLOSE_MEMORY: -#ifdef USE_SUBEXP_CALL - if (IS_ENCLOSE_MIN_FIXED(en)) - *min = en->min_len; - else { - r = get_min_match_length(en->target, min, env); - if (r == 0) { - en->min_len = *min; - SET_ENCLOSE_STATUS(node, NST_MIN_FIXED); - } - } - break; -#endif - case ENCLOSE_OPTION: - case ENCLOSE_STOP_BACKTRACK: - r = get_min_match_length(en->target, min, env); - break; - } - } - break; - - case NT_ANCHOR: - default: - break; - } - - return r; -} - -static int -get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env) -{ - OnigDistance tmax; - int r = 0; - - *max = 0; - switch (NTYPE(node)) { - case NT_LIST: - do { - r = get_max_match_length(NCAR(node), &tmax, env); - if (r == 0) - *max = distance_add(*max, tmax); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - - case NT_ALT: - do { - r = get_max_match_length(NCAR(node), &tmax, env); - if (r == 0 && *max < tmax) *max = tmax; - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - - case NT_STR: - { - StrNode* sn = NSTR(node); - *max = sn->end - sn->s; - } - break; - - case NT_CTYPE: - *max = ONIGENC_MBC_MAXLEN_DIST(env->enc); - break; - - case NT_CCLASS: - case NT_CANY: - *max = ONIGENC_MBC_MAXLEN_DIST(env->enc); - break; - - case NT_BREF: - { - int i; - int* backs; - Node** nodes = SCANENV_MEM_NODES(env); - BRefNode* br = NBREF(node); - if (br->state & NST_RECURSION) { - *max = ONIG_INFINITE_DISTANCE; - break; - } - backs = BACKREFS_P(br); - for (i = 0; i < br->back_num; i++) { - if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; - r = get_max_match_length(nodes[backs[i]], &tmax, env); - if (r != 0) break; - if (*max < tmax) *max = tmax; - } - } - break; - -#ifdef USE_SUBEXP_CALL - case NT_CALL: - if (! IS_CALL_RECURSION(NCALL(node))) - r = get_max_match_length(NCALL(node)->target, max, env); - else - *max = ONIG_INFINITE_DISTANCE; - break; -#endif - - case NT_QTFR: - { - QtfrNode* qn = NQTFR(node); - - if (qn->upper != 0) { - r = get_max_match_length(qn->target, max, env); - if (r == 0 && *max != 0) { - if (! IS_REPEAT_INFINITE(qn->upper)) - *max = distance_multiply(*max, qn->upper); - else - *max = ONIG_INFINITE_DISTANCE; - } - } - } - break; - - case NT_ENCLOSE: - { - EncloseNode* en = NENCLOSE(node); - switch (en->type) { - case ENCLOSE_MEMORY: -#ifdef USE_SUBEXP_CALL - if (IS_ENCLOSE_MAX_FIXED(en)) - *max = en->max_len; - else { - r = get_max_match_length(en->target, max, env); - if (r == 0) { - en->max_len = *max; - SET_ENCLOSE_STATUS(node, NST_MAX_FIXED); - } - } - break; -#endif - case ENCLOSE_OPTION: - case ENCLOSE_STOP_BACKTRACK: - r = get_max_match_length(en->target, max, env); - break; - } - } - break; - - case NT_ANCHOR: - default: - break; - } - - return r; -} - -#define GET_CHAR_LEN_VARLEN -1 -#define GET_CHAR_LEN_TOP_ALT_VARLEN -2 - -/* fixed size pattern node only */ -static int -get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) -{ - int tlen; - int r = 0; - - level++; - *len = 0; - switch (NTYPE(node)) { - case NT_LIST: - do { - r = get_char_length_tree1(NCAR(node), reg, &tlen, level); - if (r == 0) - *len = distance_add(*len, tlen); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - - case NT_ALT: - { - int tlen2; - int varlen = 0; - - r = get_char_length_tree1(NCAR(node), reg, &tlen, level); - while (r == 0 && IS_NOT_NULL(node = NCDR(node))) { - r = get_char_length_tree1(NCAR(node), reg, &tlen2, level); - if (r == 0) { - if (tlen != tlen2) - varlen = 1; - } - } - if (r == 0) { - if (varlen != 0) { - if (level == 1) - r = GET_CHAR_LEN_TOP_ALT_VARLEN; - else - r = GET_CHAR_LEN_VARLEN; - } - else - *len = tlen; - } - } - break; - - case NT_STR: - { - StrNode* sn = NSTR(node); - UChar *s = sn->s; - while (s < sn->end) { - s += enclen(reg->enc, s, sn->end); - (*len)++; - } - } - break; - - case NT_QTFR: - { - QtfrNode* qn = NQTFR(node); - if (qn->lower == qn->upper) { - r = get_char_length_tree1(qn->target, reg, &tlen, level); - if (r == 0) - *len = distance_multiply(tlen, qn->lower); - } - else - r = GET_CHAR_LEN_VARLEN; - } - break; - -#ifdef USE_SUBEXP_CALL - case NT_CALL: - if (! IS_CALL_RECURSION(NCALL(node))) - r = get_char_length_tree1(NCALL(node)->target, reg, len, level); - else - r = GET_CHAR_LEN_VARLEN; - break; -#endif - - case NT_CTYPE: - *len = 1; - break; - - case NT_CCLASS: - case NT_CANY: - *len = 1; - break; - - case NT_ENCLOSE: - { - EncloseNode* en = NENCLOSE(node); - switch (en->type) { - case ENCLOSE_MEMORY: -#ifdef USE_SUBEXP_CALL - if (IS_ENCLOSE_CLEN_FIXED(en)) - *len = en->char_len; - else { - r = get_char_length_tree1(en->target, reg, len, level); - if (r == 0) { - en->char_len = *len; - SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED); - } - } - break; -#endif - case ENCLOSE_OPTION: - case ENCLOSE_STOP_BACKTRACK: - r = get_char_length_tree1(en->target, reg, len, level); - break; - default: - break; - } - } - break; - - case NT_ANCHOR: - break; - - default: - r = GET_CHAR_LEN_VARLEN; - break; - } - - return r; -} - -static int -get_char_length_tree(Node* node, regex_t* reg, int* len) -{ - return get_char_length_tree1(node, reg, len, 0); -} - -/* x is not included y ==> 1 : 0 */ -static int -is_not_included(Node* x, Node* y, regex_t* reg) -{ - int i, len; - OnigCodePoint code; - UChar *p, c; - int ytype; - - retry: - ytype = NTYPE(y); - switch (NTYPE(x)) { - case NT_CTYPE: - { - switch (ytype) { - case NT_CTYPE: - if (NCTYPE(y)->ctype == NCTYPE(x)->ctype && - NCTYPE(y)->is_not != NCTYPE(x)->is_not) - return 1; - else - return 0; - break; - - case NT_CCLASS: - swap: - { - Node* tmp; - tmp = x; x = y; y = tmp; - goto retry; - } - break; - - case NT_STR: - goto swap; - break; - - default: - break; - } - } - break; - - case NT_CCLASS: - { - CClassNode* xc = NCCLASS(x); - switch (ytype) { - case NT_CTYPE: - switch (NCTYPE(y)->ctype) { - case ONIGENC_CTYPE_WORD: - if (NCTYPE(y)->is_not == 0) { - if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) { - for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - if (BITSET_AT(xc->bs, i)) { - if (IS_CODE_SB_WORD(reg->enc, i)) return 0; - } - } - return 1; - } - return 0; - } - else { - for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - if (! IS_CODE_SB_WORD(reg->enc, i)) { - if (!IS_NCCLASS_NOT(xc)) { - if (BITSET_AT(xc->bs, i)) - return 0; - } - else { - if (! BITSET_AT(xc->bs, i)) - return 0; - } - } - } - return 1; - } - break; - - default: - break; - } - break; - - case NT_CCLASS: - { - int v; - CClassNode* yc = NCCLASS(y); - - for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - v = BITSET_AT(xc->bs, i); - if ((v != 0 && !IS_NCCLASS_NOT(xc)) || - (v == 0 && IS_NCCLASS_NOT(xc))) { - v = BITSET_AT(yc->bs, i); - if ((v != 0 && !IS_NCCLASS_NOT(yc)) || - (v == 0 && IS_NCCLASS_NOT(yc))) - return 0; - } - } - if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) || - (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc))) - return 1; - return 0; - } - break; - - case NT_STR: - goto swap; - break; - - default: - break; - } - } - break; - - case NT_STR: - { - StrNode* xs = NSTR(x); - if (NSTRING_LEN(x) == 0) - break; - - c = *(xs->s); - switch (ytype) { - case NT_CTYPE: - switch (NCTYPE(y)->ctype) { - case ONIGENC_CTYPE_WORD: - if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end)) - return NCTYPE(y)->is_not; - else - return !(NCTYPE(y)->is_not); - break; - default: - break; - } - break; - - case NT_CCLASS: - { - CClassNode* cc = NCCLASS(y); - - code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s, - xs->s + ONIGENC_MBC_MAXLEN(reg->enc)); - return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1); - } - break; - - case NT_STR: - { - UChar *q; - StrNode* ys = NSTR(y); - len = NSTRING_LEN(x); - if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y); - if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) { - /* tiny version */ - return 0; - } - else { - for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) { - if (*p != *q) return 1; - } - } - } - break; - - default: - break; - } - } - break; - - default: - break; - } - - return 0; -} - -static Node* -get_head_value_node(Node* node, int exact, regex_t* reg) -{ - Node* n = NULL_NODE; - - switch (NTYPE(node)) { - case NT_BREF: - case NT_ALT: - case NT_CANY: -#ifdef USE_SUBEXP_CALL - case NT_CALL: -#endif - break; - - case NT_CTYPE: - case NT_CCLASS: - if (exact == 0) { - n = node; - } - break; - - case NT_LIST: - n = get_head_value_node(NCAR(node), exact, reg); - break; - - case NT_STR: - { - StrNode* sn = NSTR(node); - - if (sn->end <= sn->s) - break; - - if (exact != 0 && - !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) { - } - else { - n = node; - } - } - break; - - case NT_QTFR: - { - QtfrNode* qn = NQTFR(node); - if (qn->lower > 0) { - if (IS_NOT_NULL(qn->head_exact)) - n = qn->head_exact; - else - n = get_head_value_node(qn->target, exact, reg); - } - } - break; - - case NT_ENCLOSE: - { - EncloseNode* en = NENCLOSE(node); - switch (en->type) { - case ENCLOSE_OPTION: - { - OnigOptionType options = reg->options; - - reg->options = NENCLOSE(node)->option; - n = get_head_value_node(NENCLOSE(node)->target, exact, reg); - reg->options = options; - } - break; - - case ENCLOSE_MEMORY: - case ENCLOSE_STOP_BACKTRACK: - n = get_head_value_node(en->target, exact, reg); - break; - } - } - break; - - case NT_ANCHOR: - if (NANCHOR(node)->type == ANCHOR_PREC_READ) - n = get_head_value_node(NANCHOR(node)->target, exact, reg); - break; - - default: - break; - } - - return n; -} - -static int -check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask) -{ - int type, r = 0; - - type = NTYPE(node); - if ((NTYPE2BIT(type) & type_mask) == 0) - return 1; - - switch (type) { - case NT_LIST: - case NT_ALT: - do { - r = check_type_tree(NCAR(node), type_mask, enclose_mask, - anchor_mask); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - - case NT_QTFR: - r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask, - anchor_mask); - break; - - case NT_ENCLOSE: - { - EncloseNode* en = NENCLOSE(node); - if ((en->type & enclose_mask) == 0) - return 1; - - r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask); - } - break; - - case NT_ANCHOR: - type = NANCHOR(node)->type; - if ((type & anchor_mask) == 0) - return 1; - - if (NANCHOR(node)->target) - r = check_type_tree(NANCHOR(node)->target, - type_mask, enclose_mask, anchor_mask); - break; - - default: - break; - } - return r; -} - -#ifdef USE_SUBEXP_CALL - -#define RECURSION_EXIST 1 -#define RECURSION_INFINITE 2 - -static int -subexp_inf_recursive_check(Node* node, ScanEnv* env, int head) -{ - int type; - int r = 0; - - type = NTYPE(node); - switch (type) { - case NT_LIST: - { - Node *x; - OnigDistance min; - int ret; - - x = node; - do { - ret = subexp_inf_recursive_check(NCAR(x), env, head); - if (ret < 0 || ret == RECURSION_INFINITE) return ret; - r |= ret; - if (head) { - ret = get_min_match_length(NCAR(x), &min, env); - if (ret != 0) return ret; - if (min != 0) head = 0; - } - } while (IS_NOT_NULL(x = NCDR(x))); - } - break; - - case NT_ALT: - { - int ret; - r = RECURSION_EXIST; - do { - ret = subexp_inf_recursive_check(NCAR(node), env, head); - if (ret < 0 || ret == RECURSION_INFINITE) return ret; - r &= ret; - } while (IS_NOT_NULL(node = NCDR(node))); - } - break; - - case NT_QTFR: - r = subexp_inf_recursive_check(NQTFR(node)->target, env, head); - if (r == RECURSION_EXIST) { - if (NQTFR(node)->lower == 0) r = 0; - } - break; - - case NT_ANCHOR: - { - AnchorNode* an = NANCHOR(node); - switch (an->type) { - case ANCHOR_PREC_READ: - case ANCHOR_PREC_READ_NOT: - case ANCHOR_LOOK_BEHIND: - case ANCHOR_LOOK_BEHIND_NOT: - r = subexp_inf_recursive_check(an->target, env, head); - break; - } - } - break; - - case NT_CALL: - r = subexp_inf_recursive_check(NCALL(node)->target, env, head); - break; - - case NT_ENCLOSE: - if (IS_ENCLOSE_MARK2(NENCLOSE(node))) - return 0; - else if (IS_ENCLOSE_MARK1(NENCLOSE(node))) - return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE); - else { - SET_ENCLOSE_STATUS(node, NST_MARK2); - r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head); - CLEAR_ENCLOSE_STATUS(node, NST_MARK2); - } - break; - - default: - break; - } - - return r; -} - -static int -subexp_inf_recursive_check_trav(Node* node, ScanEnv* env) -{ - int type; - int r = 0; - - type = NTYPE(node); - switch (type) { - case NT_LIST: - case NT_ALT: - do { - r = subexp_inf_recursive_check_trav(NCAR(node), env); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - - case NT_QTFR: - r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env); - break; - - case NT_ANCHOR: - { - AnchorNode* an = NANCHOR(node); - switch (an->type) { - case ANCHOR_PREC_READ: - case ANCHOR_PREC_READ_NOT: - case ANCHOR_LOOK_BEHIND: - case ANCHOR_LOOK_BEHIND_NOT: - r = subexp_inf_recursive_check_trav(an->target, env); - break; - } - } - break; - - case NT_ENCLOSE: - { - EncloseNode* en = NENCLOSE(node); - - if (IS_ENCLOSE_RECURSION(en)) { - SET_ENCLOSE_STATUS(node, NST_MARK1); - r = subexp_inf_recursive_check(en->target, env, 1); - if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION; - CLEAR_ENCLOSE_STATUS(node, NST_MARK1); - } - r = subexp_inf_recursive_check_trav(en->target, env); - } - - break; - - default: - break; - } - - return r; -} - -static int -subexp_recursive_check(Node* node) -{ - int r = 0; - - switch (NTYPE(node)) { - case NT_LIST: - case NT_ALT: - do { - r |= subexp_recursive_check(NCAR(node)); - } while (IS_NOT_NULL(node = NCDR(node))); - break; - - case NT_QTFR: - r = subexp_recursive_check(NQTFR(node)->target); - break; - - case NT_ANCHOR: - { - AnchorNode* an = NANCHOR(node); - switch (an->type) { - case ANCHOR_PREC_READ: - case ANCHOR_PREC_READ_NOT: - case ANCHOR_LOOK_BEHIND: - case ANCHOR_LOOK_BEHIND_NOT: - r = subexp_recursive_check(an->target); - break; - } - } - break; - - case NT_CALL: - r = subexp_recursive_check(NCALL(node)->target); - if (r != 0) SET_CALL_RECURSION(node); - break; - - case NT_ENCLOSE: - if (IS_ENCLOSE_MARK2(NENCLOSE(node))) - return 0; - else if (IS_ENCLOSE_MARK1(NENCLOSE(node))) - return 1; /* recursion */ - else { - SET_ENCLOSE_STATUS(node, NST_MARK2); - r = subexp_recursive_check(NENCLOSE(node)->target); - CLEAR_ENCLOSE_STATUS(node, NST_MARK2); - } - break; - - default: - break; - } - - return r; -} - - -static int -subexp_recursive_check_trav(Node* node, ScanEnv* env) -{ -#define FOUND_CALLED_NODE 1 - - int type; - int r = 0; - - type = NTYPE(node); - switch (type) { - case NT_LIST: - case NT_ALT: - { - int ret; - do { - ret = subexp_recursive_check_trav(NCAR(node), env); - if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE; - else if (ret < 0) return ret; - } while (IS_NOT_NULL(node = NCDR(node))); - } - break; - - case NT_QTFR: - r = subexp_recursive_check_trav(NQTFR(node)->target, env); - if (NQTFR(node)->upper == 0) { - if (r == FOUND_CALLED_NODE) - NQTFR(node)->is_refered = 1; - } - break; - - case NT_ANCHOR: - { - AnchorNode* an = NANCHOR(node); - switch (an->type) { - case ANCHOR_PREC_READ: - case ANCHOR_PREC_READ_NOT: - case ANCHOR_LOOK_BEHIND: - case ANCHOR_LOOK_BEHIND_NOT: - r = subexp_recursive_check_trav(an->target, env); - break; - } - } - break; - - case NT_ENCLOSE: - { - EncloseNode* en = NENCLOSE(node); - - if (! IS_ENCLOSE_RECURSION(en)) { - if (IS_ENCLOSE_CALLED(en)) { - SET_ENCLOSE_STATUS(node, NST_MARK1); - r = subexp_recursive_check(en->target); - if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION); - CLEAR_ENCLOSE_STATUS(node, NST_MARK1); - } - } - r = subexp_recursive_check_trav(en->target, env); - if (IS_ENCLOSE_CALLED(en)) - r |= FOUND_CALLED_NODE; - } - break; - - default: - break; - } - - return r; -} - -static int -setup_subexp_call(Node* node, ScanEnv* env) -{ - int type; - int r = 0; - - type = NTYPE(node); - switch (type) { - case NT_LIST: - do { - r = setup_subexp_call(NCAR(node), env); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - - case NT_ALT: - do { - r = setup_subexp_call(NCAR(node), env); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - - case NT_QTFR: - r = setup_subexp_call(NQTFR(node)->target, env); - break; - case NT_ENCLOSE: - r = setup_subexp_call(NENCLOSE(node)->target, env); - break; - - case NT_CALL: - { - CallNode* cn = NCALL(node); - Node** nodes = SCANENV_MEM_NODES(env); - - if (cn->group_num != 0) { - { - int gnum = cn->group_num; - -#ifdef USE_NAMED_GROUP - if (env->num_named > 0 && - IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && - !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) { - return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; - } -#endif - if (gnum > env->num_mem) { - onig_scan_env_set_error_string(env, - ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end); - return ONIGERR_UNDEFINED_GROUP_REFERENCE; - } - } - -#ifdef USE_NAMED_GROUP - set_call_attr: -#endif - cn->target = nodes[cn->group_num]; - if (IS_NULL(cn->target)) { - onig_scan_env_set_error_string(env, - ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); - return ONIGERR_UNDEFINED_NAME_REFERENCE; - } - SET_ENCLOSE_STATUS(cn->target, NST_CALLED); - BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num); - cn->unset_addr_list = env->unset_addr_list; - } -#ifdef USE_NAMED_GROUP - else { - int *refs; - - int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, - &refs); - if (n <= 0) { - onig_scan_env_set_error_string(env, - ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); - return ONIGERR_UNDEFINED_NAME_REFERENCE; - } - else if (n > 1) { - onig_scan_env_set_error_string(env, - ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end); - return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL; - } - else { - cn->group_num = refs[0]; - goto set_call_attr; - } - } -#endif - } - break; - - case NT_ANCHOR: - { - AnchorNode* an = NANCHOR(node); - - switch (an->type) { - case ANCHOR_PREC_READ: - case ANCHOR_PREC_READ_NOT: - case ANCHOR_LOOK_BEHIND: - case ANCHOR_LOOK_BEHIND_NOT: - r = setup_subexp_call(an->target, env); - break; - } - } - break; - - default: - break; - } - - return r; -} -#endif - -/* divide different length alternatives in look-behind. - (?<=A|B) ==> (?<=A)|(?<=B) - (?<!A|B) ==> (?<!A)(?<!B) -*/ -static int -divide_look_behind_alternatives(Node* node) -{ - Node *head, *np, *insert_node; - AnchorNode* an = NANCHOR(node); - int anc_type = an->type; - - head = an->target; - np = NCAR(head); - swap_node(node, head); - NCAR(node) = head; - NANCHOR(head)->target = np; - - np = node; - while ((np = NCDR(np)) != NULL_NODE) { - insert_node = onig_node_new_anchor(anc_type); - CHECK_NULL_RETURN_MEMERR(insert_node); - NANCHOR(insert_node)->target = NCAR(np); - NCAR(np) = insert_node; - } - - if (anc_type == ANCHOR_LOOK_BEHIND_NOT) { - np = node; - do { - SET_NTYPE(np, NT_LIST); /* alt -> list */ - } while ((np = NCDR(np)) != NULL_NODE); - } - return 0; -} - -static int -setup_look_behind(Node* node, regex_t* reg, ScanEnv* env) -{ - int r, len; - AnchorNode* an = NANCHOR(node); - - r = get_char_length_tree(an->target, reg, &len); - if (r == 0) - an->char_len = len; - else if (r == GET_CHAR_LEN_VARLEN) - r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN; - else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) { - if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND)) - r = divide_look_behind_alternatives(node); - else - r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN; - } - - return r; -} - -static int -next_setup(Node* node, Node* next_node, regex_t* reg) -{ - int type; - - retry: - type = NTYPE(node); - if (type == NT_QTFR) { - QtfrNode* qn = NQTFR(node); - if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) { -#ifdef USE_QTFR_PEEK_NEXT - Node* n = get_head_value_node(next_node, 1, reg); - /* '\0': for UTF-16BE etc... */ - if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') { - qn->next_head_exact = n; - } -#endif - /* automatic posseivation a*b ==> (?>a*)b */ - if (qn->lower <= 1) { - int ttype = NTYPE(qn->target); - if (IS_NODE_TYPE_SIMPLE(ttype)) { - Node *x, *y; - x = get_head_value_node(qn->target, 0, reg); - if (IS_NOT_NULL(x)) { - y = get_head_value_node(next_node, 0, reg); - if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) { - Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK); - CHECK_NULL_RETURN_MEMERR(en); - SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT); - swap_node(node, en); - NENCLOSE(node)->target = en; - } - } - } - } - } - } - else if (type == NT_ENCLOSE) { - EncloseNode* en = NENCLOSE(node); - if (en->type == ENCLOSE_MEMORY) { - node = en->target; - goto retry; - } - } - return 0; -} - - -static int -update_string_node_case_fold(regex_t* reg, Node *node) -{ - UChar *p, *q, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; - UChar *sbuf, *ebuf, *sp; - int r, i, len, sbuf_size; - StrNode* sn = NSTR(node); - - end = sn->end; - sbuf_size = (end - sn->s) * 2; - sbuf = (UChar* )xmalloc(sbuf_size); - CHECK_NULL_RETURN_MEMERR(sbuf); - ebuf = sbuf + sbuf_size; - - sp = sbuf; - p = sn->s; - while (p < end) { - len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf); - q = buf; - for (i = 0; i < len; i++) { - if (sp >= ebuf) { - sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2); - CHECK_NULL_RETURN_MEMERR(sbuf); - sp = sbuf + sbuf_size; - sbuf_size *= 2; - ebuf = sbuf + sbuf_size; - } - - *sp++ = buf[i]; - } - } - - r = onig_node_str_set(node, sbuf, sp); - if (r != 0) { - xfree(sbuf); - return r; - } - - xfree(sbuf); - return 0; -} - -static int -expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, - regex_t* reg) -{ - int r; - Node *node; - - node = onig_node_new_str(s, end); - if (IS_NULL(node)) return ONIGERR_MEMORY; - - r = update_string_node_case_fold(reg, node); - if (r != 0) { - onig_node_free(node); - return r; - } - - NSTRING_SET_AMBIG(node); - NSTRING_SET_DONT_GET_OPT_INFO(node); - *rnode = node; - return 0; -} - -static int -expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], - UChar *p, int slen, UChar *end, - regex_t* reg, Node **rnode) -{ - int r, i, j, len, varlen; - Node *anode, *var_anode, *snode, *xnode, *an; - UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; - - *rnode = var_anode = NULL_NODE; - - varlen = 0; - for (i = 0; i < item_num; i++) { - if (items[i].byte_len != slen) { - varlen = 1; - break; - } - } - - if (varlen != 0) { - *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE); - if (IS_NULL(var_anode)) return ONIGERR_MEMORY; - - xnode = onig_node_new_list(NULL, NULL); - if (IS_NULL(xnode)) goto mem_err; - NCAR(var_anode) = xnode; - - anode = onig_node_new_alt(NULL_NODE, NULL_NODE); - if (IS_NULL(anode)) goto mem_err; - NCAR(xnode) = anode; - } - else { - *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE); - if (IS_NULL(anode)) return ONIGERR_MEMORY; - } - - snode = onig_node_new_str(p, p + slen); - if (IS_NULL(snode)) goto mem_err; - - NCAR(anode) = snode; - - for (i = 0; i < item_num; i++) { - snode = onig_node_new_str(NULL, NULL); - if (IS_NULL(snode)) goto mem_err; - - for (j = 0; j < items[i].code_len; j++) { - len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf); - if (len < 0) { - r = len; - goto mem_err2; - } - - r = onig_node_str_cat(snode, buf, buf + len); - if (r != 0) goto mem_err2; - } - - an = onig_node_new_alt(NULL_NODE, NULL_NODE); - if (IS_NULL(an)) { - goto mem_err2; - } - - if (items[i].byte_len != slen) { - Node *rem; - UChar *q = p + items[i].byte_len; - - if (q < end) { - r = expand_case_fold_make_rem_string(&rem, q, end, reg); - if (r != 0) { - onig_node_free(an); - goto mem_err2; - } - - xnode = onig_node_list_add(NULL_NODE, snode); - if (IS_NULL(xnode)) { - onig_node_free(an); - onig_node_free(rem); - goto mem_err2; - } - if (IS_NULL(onig_node_list_add(xnode, rem))) { - onig_node_free(an); - onig_node_free(xnode); - onig_node_free(rem); - goto mem_err; - } - - NCAR(an) = xnode; - } - else { - NCAR(an) = snode; - } - - NCDR(var_anode) = an; - var_anode = an; - } - else { - NCAR(an) = snode; - NCDR(anode) = an; - anode = an; - } - } - - return varlen; - - mem_err2: - onig_node_free(snode); - - mem_err: - onig_node_free(*rnode); - - return ONIGERR_MEMORY; -} - -static int -expand_case_fold_string(Node* node, regex_t* reg) -{ -#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8 - - int r, n, len, alt_num; - UChar *start, *end, *p; - Node *top_root, *root, *snode, *prev_node; - OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; - StrNode* sn = NSTR(node); - - if (NSTRING_IS_AMBIG(node)) return 0; - - start = sn->s; - end = sn->end; - if (start >= end) return 0; - - r = 0; - top_root = root = prev_node = snode = NULL_NODE; - alt_num = 1; - p = start; - while (p < end) { - n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag, - p, end, items); - if (n < 0) { - r = n; - goto err; - } - - len = enclen(reg->enc, p, end); - - if (n == 0) { - if (IS_NULL(snode)) { - if (IS_NULL(root) && IS_NOT_NULL(prev_node)) { - top_root = root = onig_node_list_add(NULL_NODE, prev_node); - if (IS_NULL(root)) { - onig_node_free(prev_node); - goto mem_err; - } - } - - prev_node = snode = onig_node_new_str(NULL, NULL); - if (IS_NULL(snode)) goto mem_err; - if (IS_NOT_NULL(root)) { - if (IS_NULL(onig_node_list_add(root, snode))) { - onig_node_free(snode); - goto mem_err; - } - } - } - - r = onig_node_str_cat(snode, p, p + len); - if (r != 0) goto err; - } - else { - alt_num *= (n + 1); - if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break; - - if (IS_NULL(root) && IS_NOT_NULL(prev_node)) { - top_root = root = onig_node_list_add(NULL_NODE, prev_node); - if (IS_NULL(root)) { - onig_node_free(prev_node); - goto mem_err; - } - } - - r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node); - if (r < 0) goto mem_err; - if (r == 1) { - if (IS_NULL(root)) { - top_root = prev_node; - } - else { - if (IS_NULL(onig_node_list_add(root, prev_node))) { - onig_node_free(prev_node); - goto mem_err; - } - } - - root = NCAR(prev_node); - } - else { /* r == 0 */ - if (IS_NOT_NULL(root)) { - if (IS_NULL(onig_node_list_add(root, prev_node))) { - onig_node_free(prev_node); - goto mem_err; - } - } - } - - snode = NULL_NODE; - } - - p += len; - } - - if (p < end) { - Node *srem; - - r = expand_case_fold_make_rem_string(&srem, p, end, reg); - if (r != 0) goto mem_err; - - if (IS_NOT_NULL(prev_node) && IS_NULL(root)) { - top_root = root = onig_node_list_add(NULL_NODE, prev_node); - if (IS_NULL(root)) { - onig_node_free(srem); - onig_node_free(prev_node); - goto mem_err; - } - } - - if (IS_NULL(root)) { - prev_node = srem; - } - else { - if (IS_NULL(onig_node_list_add(root, srem))) { - onig_node_free(srem); - goto mem_err; - } - } - } - - /* ending */ - top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node); - swap_node(node, top_root); - onig_node_free(top_root); - return 0; - - mem_err: - r = ONIGERR_MEMORY; - - err: - onig_node_free(top_root); - return r; -} - - -#ifdef USE_COMBINATION_EXPLOSION_CHECK - -#define CEC_THRES_NUM_BIG_REPEAT 512 -#define CEC_INFINITE_NUM 0x7fffffff - -#define CEC_IN_INFINITE_REPEAT (1<<0) -#define CEC_IN_FINITE_REPEAT (1<<1) -#define CEC_CONT_BIG_REPEAT (1<<2) - -static int -setup_comb_exp_check(Node* node, int state, ScanEnv* env) -{ - int type; - int r = state; - - type = NTYPE(node); - switch (type) { - case NT_LIST: - { - Node* prev = NULL_NODE; - do { - r = setup_comb_exp_check(NCAR(node), r, env); - prev = NCAR(node); - } while (r >= 0 && IS_NOT_NULL(node = NCDR(node))); - } - break; - - case NT_ALT: - { - int ret; - do { - ret = setup_comb_exp_check(NCAR(node), state, env); - r |= ret; - } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node))); - } - break; - - case NT_QTFR: - { - int child_state = state; - int add_state = 0; - QtfrNode* qn = NQTFR(node); - Node* target = qn->target; - int var_num; - - if (! IS_REPEAT_INFINITE(qn->upper)) { - if (qn->upper > 1) { - /* {0,1}, {1,1} are allowed */ - child_state |= CEC_IN_FINITE_REPEAT; - - /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */ - if (env->backrefed_mem == 0) { - if (NTYPE(qn->target) == NT_ENCLOSE) { - EncloseNode* en = NENCLOSE(qn->target); - if (en->type == ENCLOSE_MEMORY) { - if (NTYPE(en->target) == NT_QTFR) { - QtfrNode* q = NQTFR(en->target); - if (IS_REPEAT_INFINITE(q->upper) - && q->greedy == qn->greedy) { - qn->upper = (qn->lower == 0 ? 1 : qn->lower); - if (qn->upper == 1) - child_state = state; - } - } - } - } - } - } - } - - if (state & CEC_IN_FINITE_REPEAT) { - qn->comb_exp_check_num = -1; - } - else { - if (IS_REPEAT_INFINITE(qn->upper)) { - var_num = CEC_INFINITE_NUM; - child_state |= CEC_IN_INFINITE_REPEAT; - } - else { - var_num = qn->upper - qn->lower; - } - - if (var_num >= CEC_THRES_NUM_BIG_REPEAT) - add_state |= CEC_CONT_BIG_REPEAT; - - if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) || - ((state & CEC_CONT_BIG_REPEAT) != 0 && - var_num >= CEC_THRES_NUM_BIG_REPEAT)) { - if (qn->comb_exp_check_num == 0) { - env->num_comb_exp_check++; - qn->comb_exp_check_num = env->num_comb_exp_check; - if (env->curr_max_regnum > env->comb_exp_max_regnum) - env->comb_exp_max_regnum = env->curr_max_regnum; - } - } - } - - r = setup_comb_exp_check(target, child_state, env); - r |= add_state; - } - break; - - case NT_ENCLOSE: - { - EncloseNode* en = NENCLOSE(node); - - switch (en->type) { - case ENCLOSE_MEMORY: - { - if (env->curr_max_regnum < en->regnum) - env->curr_max_regnum = en->regnum; - - r = setup_comb_exp_check(en->target, state, env); - } - break; - - default: - r = setup_comb_exp_check(en->target, state, env); - break; - } - } - break; - -#ifdef USE_SUBEXP_CALL - case NT_CALL: - if (IS_CALL_RECURSION(NCALL(node))) - env->has_recursion = 1; - else - r = setup_comb_exp_check(NCALL(node)->target, state, env); - break; -#endif - - default: - break; - } - - return r; -} -#endif - -#define IN_ALT (1<<0) -#define IN_NOT (1<<1) -#define IN_REPEAT (1<<2) -#define IN_VAR_REPEAT (1<<3) - -/* setup_tree does the following work. - 1. check empty loop. (set qn->target_empty_info) - 2. expand ignore-case in char class. - 3. set memory status bit flags. (reg->mem_stats) - 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact]. - 5. find invalid patterns in look-behind. - 6. expand repeated string. - */ -static int -setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) -{ - int type; - int r = 0; - - type = NTYPE(node); - switch (type) { - case NT_LIST: - { - Node* prev = NULL_NODE; - do { - r = setup_tree(NCAR(node), reg, state, env); - if (IS_NOT_NULL(prev) && r == 0) { - r = next_setup(prev, NCAR(node), reg); - } - prev = NCAR(node); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - } - break; - - case NT_ALT: - do { - r = setup_tree(NCAR(node), reg, (state | IN_ALT), env); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - - case NT_CCLASS: - break; - - case NT_STR: - if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) { - r = expand_case_fold_string(node, reg); - } - break; - - case NT_CTYPE: - case NT_CANY: - break; - -#ifdef USE_SUBEXP_CALL - case NT_CALL: - break; -#endif - - case NT_BREF: - { - int i; - int* p; - Node** nodes = SCANENV_MEM_NODES(env); - BRefNode* br = NBREF(node); - p = BACKREFS_P(br); - for (i = 0; i < br->back_num; i++) { - if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; - BIT_STATUS_ON_AT(env->backrefed_mem, p[i]); - BIT_STATUS_ON_AT(env->bt_mem_start, p[i]); -#ifdef USE_BACKREF_WITH_LEVEL - if (IS_BACKREF_NEST_LEVEL(br)) { - BIT_STATUS_ON_AT(env->bt_mem_end, p[i]); - } -#endif - SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED); - } - } - break; - - case NT_QTFR: - { - OnigDistance d; - QtfrNode* qn = NQTFR(node); - Node* target = qn->target; - - if ((state & IN_REPEAT) != 0) { - qn->state |= NST_IN_REPEAT; - } - - if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) { - r = get_min_match_length(target, &d, env); - if (r) break; - if (d == 0) { - qn->target_empty_info = NQ_TARGET_IS_EMPTY; -#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT - r = quantifiers_memory_node_info(target); - if (r < 0) break; - if (r > 0) { - qn->target_empty_info = r; - } -#endif - } - } - - state |= IN_REPEAT; - if (qn->lower != qn->upper) - state |= IN_VAR_REPEAT; - r = setup_tree(target, reg, state, env); - if (r) break; - - /* expand string */ -#define EXPAND_STRING_MAX_LENGTH 100 - if (NTYPE(target) == NT_STR) { - if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper && - qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) { - int len = NSTRING_LEN(target); - StrNode* sn = NSTR(target); - - if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) { - int i, n = qn->lower; - onig_node_conv_to_str_node(node, NSTR(target)->flag); - for (i = 0; i < n; i++) { - r = onig_node_str_cat(node, sn->s, sn->end); - if (r) break; - } - onig_node_free(target); - break; /* break case NT_QTFR: */ - } - } - } - -#ifdef USE_OP_PUSH_OR_JUMP_EXACT - if (qn->greedy && (qn->target_empty_info != 0)) { - if (NTYPE(target) == NT_QTFR) { - QtfrNode* tqn = NQTFR(target); - if (IS_NOT_NULL(tqn->head_exact)) { - qn->head_exact = tqn->head_exact; - tqn->head_exact = NULL; - } - } - else { - qn->head_exact = get_head_value_node(qn->target, 1, reg); - } - } -#endif - } - break; - - case NT_ENCLOSE: - { - EncloseNode* en = NENCLOSE(node); - - switch (en->type) { - case ENCLOSE_OPTION: - { - OnigOptionType options = reg->options; - reg->options = NENCLOSE(node)->option; - r = setup_tree(NENCLOSE(node)->target, reg, state, env); - reg->options = options; - } - break; - - case ENCLOSE_MEMORY: - if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) { - BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum); - /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */ - } - r = setup_tree(en->target, reg, state, env); - break; - - case ENCLOSE_STOP_BACKTRACK: - { - Node* target = en->target; - r = setup_tree(target, reg, state, env); - if (NTYPE(target) == NT_QTFR) { - QtfrNode* tqn = NQTFR(target); - if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 && - tqn->greedy != 0) { /* (?>a*), a*+ etc... */ - int qtype = NTYPE(tqn->target); - if (IS_NODE_TYPE_SIMPLE(qtype)) - SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT); - } - } - } - break; - } - } - break; - - case NT_ANCHOR: - { - AnchorNode* an = NANCHOR(node); - - switch (an->type) { - case ANCHOR_PREC_READ: - r = setup_tree(an->target, reg, state, env); - break; - case ANCHOR_PREC_READ_NOT: - r = setup_tree(an->target, reg, (state | IN_NOT), env); - break; - -/* allowed node types in look-behind */ -#define ALLOWED_TYPE_IN_LB \ - ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \ - BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL ) - -#define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY ) -#define ALLOWED_ENCLOSE_IN_LB_NOT 0 - -#define ALLOWED_ANCHOR_IN_LB \ -( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION ) -#define ALLOWED_ANCHOR_IN_LB_NOT \ -( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION ) - - case ANCHOR_LOOK_BEHIND: - { - r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, - ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB); - if (r < 0) return r; - if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; - r = setup_look_behind(node, reg, env); - if (r != 0) return r; - r = setup_tree(an->target, reg, state, env); - } - break; - - case ANCHOR_LOOK_BEHIND_NOT: - { - r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, - ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT); - if (r < 0) return r; - if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; - r = setup_look_behind(node, reg, env); - if (r != 0) return r; - r = setup_tree(an->target, reg, (state | IN_NOT), env); - } - break; - } - } - break; - - default: - break; - } - - return r; -} - -/* set skip map for Boyer-Moor search */ -static int -set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, - UChar skip[], int** int_skip) -{ - int i, len; - - len = end - s; - if (len < ONIG_CHAR_TABLE_SIZE) { - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len; - - for (i = 0; i < len - 1; i++) - skip[s[i]] = len - 1 - i; - } - else { - if (IS_NULL(*int_skip)) { - *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); - if (IS_NULL(*int_skip)) return ONIGERR_MEMORY; - } - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len; - - for (i = 0; i < len - 1; i++) - (*int_skip)[s[i]] = len - 1 - i; - } - return 0; -} - -#define OPT_EXACT_MAXLEN 24 - -typedef struct { - OnigDistance min; /* min byte length */ - OnigDistance max; /* max byte length */ -} MinMaxLen; - -typedef struct { - MinMaxLen mmd; - OnigEncoding enc; - OnigOptionType options; - OnigCaseFoldType case_fold_flag; - ScanEnv* scan_env; -} OptEnv; - -typedef struct { - int left_anchor; - int right_anchor; -} OptAncInfo; - -typedef struct { - MinMaxLen mmd; /* info position */ - OptAncInfo anc; - - int reach_end; - int ignore_case; - int len; - UChar s[OPT_EXACT_MAXLEN]; -} OptExactInfo; - -typedef struct { - MinMaxLen mmd; /* info position */ - OptAncInfo anc; - - int value; /* weighted value */ - UChar map[ONIG_CHAR_TABLE_SIZE]; -} OptMapInfo; - -typedef struct { - MinMaxLen len; - - OptAncInfo anc; - OptExactInfo exb; /* boundary */ - OptExactInfo exm; /* middle */ - OptExactInfo expr; /* prec read (?=...) */ - - OptMapInfo map; /* boundary */ -} NodeOptInfo; - - -static int -map_position_value(OnigEncoding enc, int i) -{ - static const short int ByteValTable[] = { - 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, - 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5, - 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1 - }; - - if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) { - if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1) - return 20; - else - return (int )ByteValTable[i]; - } - else - return 4; /* Take it easy. */ -} - -static int -distance_value(MinMaxLen* mm) -{ - /* 1000 / (min-max-dist + 1) */ - static const short int dist_vals[] = { - 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100, - 91, 83, 77, 71, 67, 63, 59, 56, 53, 50, - 48, 45, 43, 42, 40, 38, 37, 36, 34, 33, - 32, 31, 30, 29, 29, 28, 27, 26, 26, 25, - 24, 24, 23, 23, 22, 22, 21, 21, 20, 20, - 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, - 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, - 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, - 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 10, 10, 10, 10, 10 - }; - - int d; - - if (mm->max == ONIG_INFINITE_DISTANCE) return 0; - - d = mm->max - mm->min; - if (d < (int )(sizeof(dist_vals)/sizeof(dist_vals[0]))) - /* return dist_vals[d] * 16 / (mm->min + 12); */ - return (int )dist_vals[d]; - else - return 1; -} - -static int -comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2) -{ - if (v2 <= 0) return -1; - if (v1 <= 0) return 1; - - v1 *= distance_value(d1); - v2 *= distance_value(d2); - - if (v2 > v1) return 1; - if (v2 < v1) return -1; - - if (d2->min < d1->min) return 1; - if (d2->min > d1->min) return -1; - return 0; -} - -static int -is_equal_mml(MinMaxLen* a, MinMaxLen* b) -{ - return (a->min == b->min && a->max == b->max) ? 1 : 0; -} - - -static void -set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max) -{ - mml->min = min; - mml->max = max; -} - -static void -clear_mml(MinMaxLen* mml) -{ - mml->min = mml->max = 0; -} - -static void -copy_mml(MinMaxLen* to, MinMaxLen* from) -{ - to->min = from->min; - to->max = from->max; -} - -static void -add_mml(MinMaxLen* to, MinMaxLen* from) -{ - to->min = distance_add(to->min, from->min); - to->max = distance_add(to->max, from->max); -} - -static void -alt_merge_mml(MinMaxLen* to, MinMaxLen* from) -{ - if (to->min > from->min) to->min = from->min; - if (to->max < from->max) to->max = from->max; -} - -static void -copy_opt_env(OptEnv* to, OptEnv* from) -{ - *to = *from; -} - -static void -clear_opt_anc_info(OptAncInfo* anc) -{ - anc->left_anchor = 0; - anc->right_anchor = 0; -} - -static void -copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from) -{ - *to = *from; -} - -static void -concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right, - OnigDistance left_len, OnigDistance right_len) -{ - clear_opt_anc_info(to); - - to->left_anchor = left->left_anchor; - if (left_len == 0) { - to->left_anchor |= right->left_anchor; - } - - to->right_anchor = right->right_anchor; - if (right_len == 0) { - to->right_anchor |= left->right_anchor; - } -} - -static int -is_left_anchor(int anc) -{ - if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF || - anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ || - anc == ANCHOR_PREC_READ_NOT) - return 0; - - return 1; -} - -static int -is_set_opt_anc_info(OptAncInfo* to, int anc) -{ - if ((to->left_anchor & anc) != 0) return 1; - - return ((to->right_anchor & anc) != 0 ? 1 : 0); -} - -static void -add_opt_anc_info(OptAncInfo* to, int anc) -{ - if (is_left_anchor(anc)) - to->left_anchor |= anc; - else - to->right_anchor |= anc; -} - -static void -remove_opt_anc_info(OptAncInfo* to, int anc) -{ - if (is_left_anchor(anc)) - to->left_anchor &= ~anc; - else - to->right_anchor &= ~anc; -} - -static void -alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add) -{ - to->left_anchor &= add->left_anchor; - to->right_anchor &= add->right_anchor; -} - -static int -is_full_opt_exact_info(OptExactInfo* ex) -{ - return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0); -} - -static void -clear_opt_exact_info(OptExactInfo* ex) -{ - clear_mml(&ex->mmd); - clear_opt_anc_info(&ex->anc); - ex->reach_end = 0; - ex->ignore_case = 0; - ex->len = 0; - ex->s[0] = '\0'; -} - -static void -copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from) -{ - *to = *from; -} - -static void -concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc) -{ - int i, j, len; - UChar *p, *end; - OptAncInfo tanc; - - if (! to->ignore_case && add->ignore_case) { - if (to->len >= add->len) return ; /* avoid */ - - to->ignore_case = 1; - } - - p = add->s; - end = p + add->len; - for (i = to->len; p < end; ) { - len = enclen(enc, p, end); - if (i + len > OPT_EXACT_MAXLEN) break; - for (j = 0; j < len && p < end; j++) - to->s[i++] = *p++; - } - - to->len = i; - to->reach_end = (p == end ? add->reach_end : 0); - - concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1); - if (! to->reach_end) tanc.right_anchor = 0; - copy_opt_anc_info(&to->anc, &tanc); -} - -static void -concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end, - int raw ARG_UNUSED, OnigEncoding enc) -{ - int i, j, len; - UChar *p; - - for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) { - len = enclen(enc, p, end); - if (i + len > OPT_EXACT_MAXLEN) break; - for (j = 0; j < len && p < end; j++) - to->s[i++] = *p++; - } - - to->len = i; -} - -static void -alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env) -{ - int i, j, len; - - if (add->len == 0 || to->len == 0) { - clear_opt_exact_info(to); - return ; - } - - if (! is_equal_mml(&to->mmd, &add->mmd)) { - clear_opt_exact_info(to); - return ; - } - - for (i = 0; i < to->len && i < add->len; ) { - if (to->s[i] != add->s[i]) break; - len = enclen(env->enc, to->s + i, to->s + to->len); - - for (j = 1; j < len; j++) { - if (to->s[i+j] != add->s[i+j]) break; - } - if (j < len) break; - i += len; - } - - if (! add->reach_end || i < add->len || i < to->len) { - to->reach_end = 0; - } - to->len = i; - to->ignore_case |= add->ignore_case; - - alt_merge_opt_anc_info(&to->anc, &add->anc); - if (! to->reach_end) to->anc.right_anchor = 0; -} - -static void -select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt) -{ - int v1, v2; - - v1 = now->len; - v2 = alt->len; - - if (v2 == 0) { - return ; - } - else if (v1 == 0) { - copy_opt_exact_info(now, alt); - return ; - } - else if (v1 <= 2 && v2 <= 2) { - /* ByteValTable[x] is big value --> low price */ - v2 = map_position_value(enc, now->s[0]); - v1 = map_position_value(enc, alt->s[0]); - - if (now->len > 1) v1 += 5; - if (alt->len > 1) v2 += 5; - } - - if (now->ignore_case == 0) v1 *= 2; - if (alt->ignore_case == 0) v2 *= 2; - - if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0) - copy_opt_exact_info(now, alt); -} - -static void -clear_opt_map_info(OptMapInfo* map) -{ - static const OptMapInfo clean_info = { - {0, 0}, {0, 0}, 0, - { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - } - }; - - xmemcpy(map, &clean_info, sizeof(OptMapInfo)); -} - -static void -copy_opt_map_info(OptMapInfo* to, OptMapInfo* from) -{ - *to = *from; -} - -static void -add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc) -{ - if (map->map[c] == 0) { - map->map[c] = 1; - map->value += map_position_value(enc, c); - } -} - -static int -add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end, - OnigEncoding enc, OnigCaseFoldType case_fold_flag) -{ - OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; - UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; - int i, n; - - add_char_opt_map_info(map, p[0], enc); - - case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag); - n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items); - if (n < 0) return n; - - for (i = 0; i < n; i++) { - ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf); - add_char_opt_map_info(map, buf[0], enc); - } - - return 0; -} - -static void -select_opt_map_info(OptMapInfo* now, OptMapInfo* alt) -{ - const int z = 1<<15; /* 32768: something big value */ - - int v1, v2; - - if (alt->value == 0) return ; - if (now->value == 0) { - copy_opt_map_info(now, alt); - return ; - } - - v1 = z / now->value; - v2 = z / alt->value; - if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0) - copy_opt_map_info(now, alt); -} - -static int -comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m) -{ -#define COMP_EM_BASE 20 - int ve, vm; - - if (m->value <= 0) return -1; - - ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2); - vm = COMP_EM_BASE * 5 * 2 / m->value; - return comp_distance_value(&e->mmd, &m->mmd, ve, vm); -} - -static void -alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add) -{ - int i, val; - - /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */ - if (to->value == 0) return ; - if (add->value == 0 || to->mmd.max < add->mmd.min) { - clear_opt_map_info(to); - return ; - } - - alt_merge_mml(&to->mmd, &add->mmd); - - val = 0; - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) { - if (add->map[i]) - to->map[i] = 1; - - if (to->map[i]) - val += map_position_value(enc, i); - } - to->value = val; - - alt_merge_opt_anc_info(&to->anc, &add->anc); -} - -static void -set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd) -{ - copy_mml(&(opt->exb.mmd), mmd); - copy_mml(&(opt->expr.mmd), mmd); - copy_mml(&(opt->map.mmd), mmd); -} - -static void -clear_node_opt_info(NodeOptInfo* opt) -{ - clear_mml(&opt->len); - clear_opt_anc_info(&opt->anc); - clear_opt_exact_info(&opt->exb); - clear_opt_exact_info(&opt->exm); - clear_opt_exact_info(&opt->expr); - clear_opt_map_info(&opt->map); -} - -static void -copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from) -{ - *to = *from; -} - -static void -concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add) -{ - int exb_reach, exm_reach; - OptAncInfo tanc; - - concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max); - copy_opt_anc_info(&to->anc, &tanc); - - if (add->exb.len > 0 && to->len.max == 0) { - concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc, - to->len.max, add->len.max); - copy_opt_anc_info(&add->exb.anc, &tanc); - } - - if (add->map.value > 0 && to->len.max == 0) { - if (add->map.mmd.max == 0) - add->map.anc.left_anchor |= to->anc.left_anchor; - } - - exb_reach = to->exb.reach_end; - exm_reach = to->exm.reach_end; - - if (add->len.max != 0) - to->exb.reach_end = to->exm.reach_end = 0; - - if (add->exb.len > 0) { - if (exb_reach) { - concat_opt_exact_info(&to->exb, &add->exb, enc); - clear_opt_exact_info(&add->exb); - } - else if (exm_reach) { - concat_opt_exact_info(&to->exm, &add->exb, enc); - clear_opt_exact_info(&add->exb); - } - } - select_opt_exact_info(enc, &to->exm, &add->exb); - select_opt_exact_info(enc, &to->exm, &add->exm); - - if (to->expr.len > 0) { - if (add->len.max > 0) { - if (to->expr.len > (int )add->len.max) - to->expr.len = add->len.max; - - if (to->expr.mmd.max == 0) - select_opt_exact_info(enc, &to->exb, &to->expr); - else - select_opt_exact_info(enc, &to->exm, &to->expr); - } - } - else if (add->expr.len > 0) { - copy_opt_exact_info(&to->expr, &add->expr); - } - - select_opt_map_info(&to->map, &add->map); - - add_mml(&to->len, &add->len); -} - -static void -alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env) -{ - alt_merge_opt_anc_info (&to->anc, &add->anc); - alt_merge_opt_exact_info(&to->exb, &add->exb, env); - alt_merge_opt_exact_info(&to->exm, &add->exm, env); - alt_merge_opt_exact_info(&to->expr, &add->expr, env); - alt_merge_opt_map_info(env->enc, &to->map, &add->map); - - alt_merge_mml(&to->len, &add->len); -} - - -#define MAX_NODE_OPT_INFO_REF_COUNT 5 - -static int -optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) -{ - int type; - int r = 0; - - clear_node_opt_info(opt); - set_bound_node_opt_info(opt, &env->mmd); - - type = NTYPE(node); - switch (type) { - case NT_LIST: - { - OptEnv nenv; - NodeOptInfo nopt; - Node* nd = node; - - copy_opt_env(&nenv, env); - do { - r = optimize_node_left(NCAR(nd), &nopt, &nenv); - if (r == 0) { - add_mml(&nenv.mmd, &nopt.len); - concat_left_node_opt_info(env->enc, opt, &nopt); - } - } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd))); - } - break; - - case NT_ALT: - { - NodeOptInfo nopt; - Node* nd = node; - - do { - r = optimize_node_left(NCAR(nd), &nopt, env); - if (r == 0) { - if (nd == node) copy_node_opt_info(opt, &nopt); - else alt_merge_node_opt_info(opt, &nopt, env); - } - } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd))); - } - break; - - case NT_STR: - { - StrNode* sn = NSTR(node); - int slen = sn->end - sn->s; - int is_raw = NSTRING_IS_RAW(node); - - if (! NSTRING_IS_AMBIG(node)) { - concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, - NSTRING_IS_RAW(node), env->enc); - if (slen > 0) { - add_char_opt_map_info(&opt->map, *(sn->s), env->enc); - } - set_mml(&opt->len, slen, slen); - } - else { - int max; - - if (NSTRING_IS_DONT_GET_OPT_INFO(node)) { - int n = onigenc_strlen(env->enc, sn->s, sn->end); - max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n; - } - else { - concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, - is_raw, env->enc); - opt->exb.ignore_case = 1; - - if (slen > 0) { - r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end, - env->enc, env->case_fold_flag); - if (r != 0) break; - } - - max = slen; - } - - set_mml(&opt->len, slen, max); - } - - if (opt->exb.len == slen) - opt->exb.reach_end = 1; - } - break; - - case NT_CCLASS: - { - int i, z; - CClassNode* cc = NCCLASS(node); - - /* no need to check ignore case. (setted in setup_tree()) */ - - if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) { - OnigDistance min = ONIGENC_MBC_MINLEN(env->enc); - OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc); - - set_mml(&opt->len, min, max); - } - else { - for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - z = BITSET_AT(cc->bs, i); - if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) { - add_char_opt_map_info(&opt->map, (UChar )i, env->enc); - } - } - set_mml(&opt->len, 1, 1); - } - } - break; - - case NT_CTYPE: - { - int i, min, max; - - max = ONIGENC_MBC_MAXLEN_DIST(env->enc); - - if (max == 1) { - min = 1; - - switch (NCTYPE(node)->ctype) { - case ONIGENC_CTYPE_WORD: - if (NCTYPE(node)->is_not != 0) { - for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - if (! ONIGENC_IS_CODE_WORD(env->enc, i)) { - add_char_opt_map_info(&opt->map, (UChar )i, env->enc); - } - } - } - else { - for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - if (ONIGENC_IS_CODE_WORD(env->enc, i)) { - add_char_opt_map_info(&opt->map, (UChar )i, env->enc); - } - } - } - break; - } - } - else { - min = ONIGENC_MBC_MINLEN(env->enc); - } - set_mml(&opt->len, min, max); - } - break; - - case NT_CANY: - { - OnigDistance min = ONIGENC_MBC_MINLEN(env->enc); - OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc); - set_mml(&opt->len, min, max); - } - break; - - case NT_ANCHOR: - switch (NANCHOR(node)->type) { - case ANCHOR_BEGIN_BUF: - case ANCHOR_BEGIN_POSITION: - case ANCHOR_BEGIN_LINE: - case ANCHOR_END_BUF: - case ANCHOR_SEMI_END_BUF: - case ANCHOR_END_LINE: - add_opt_anc_info(&opt->anc, NANCHOR(node)->type); - break; - - case ANCHOR_PREC_READ: - { - NodeOptInfo nopt; - - r = optimize_node_left(NANCHOR(node)->target, &nopt, env); - if (r == 0) { - if (nopt.exb.len > 0) - copy_opt_exact_info(&opt->expr, &nopt.exb); - else if (nopt.exm.len > 0) - copy_opt_exact_info(&opt->expr, &nopt.exm); - - opt->expr.reach_end = 0; - - if (nopt.map.value > 0) - copy_opt_map_info(&opt->map, &nopt.map); - } - } - break; - - case ANCHOR_PREC_READ_NOT: - case ANCHOR_LOOK_BEHIND: /* Sorry, I can't make use of it. */ - case ANCHOR_LOOK_BEHIND_NOT: - break; - } - break; - - case NT_BREF: - { - int i; - int* backs; - OnigDistance min, max, tmin, tmax; - Node** nodes = SCANENV_MEM_NODES(env->scan_env); - BRefNode* br = NBREF(node); - - if (br->state & NST_RECURSION) { - set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); - break; - } - backs = BACKREFS_P(br); - r = get_min_match_length(nodes[backs[0]], &min, env->scan_env); - if (r != 0) break; - r = get_max_match_length(nodes[backs[0]], &max, env->scan_env); - if (r != 0) break; - for (i = 1; i < br->back_num; i++) { - r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env); - if (r != 0) break; - r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env); - if (r != 0) break; - if (min > tmin) min = tmin; - if (max < tmax) max = tmax; - } - if (r == 0) set_mml(&opt->len, min, max); - } - break; - -#ifdef USE_SUBEXP_CALL - case NT_CALL: - if (IS_CALL_RECURSION(NCALL(node))) - set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); - else { - OnigOptionType save = env->options; - env->options = NENCLOSE(NCALL(node)->target)->option; - r = optimize_node_left(NCALL(node)->target, opt, env); - env->options = save; - } - break; -#endif - - case NT_QTFR: - { - int i; - OnigDistance min, max; - NodeOptInfo nopt; - QtfrNode* qn = NQTFR(node); - - r = optimize_node_left(qn->target, &nopt, env); - if (r) break; - - if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) { - if (env->mmd.max == 0 && - NTYPE(qn->target) == NT_CANY && qn->greedy) { - if (IS_MULTILINE(env->options)) - add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML); - else - add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR); - } - } - else { - if (qn->lower > 0) { - copy_node_opt_info(opt, &nopt); - if (nopt.exb.len > 0) { - if (nopt.exb.reach_end) { - for (i = 2; i <= qn->lower && - ! is_full_opt_exact_info(&opt->exb); i++) { - concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc); - } - if (i < qn->lower) { - opt->exb.reach_end = 0; - } - } - } - - if (qn->lower != qn->upper) { - opt->exb.reach_end = 0; - opt->exm.reach_end = 0; - } - if (qn->lower > 1) - opt->exm.reach_end = 0; - } - } - - min = distance_multiply(nopt.len.min, qn->lower); - if (IS_REPEAT_INFINITE(qn->upper)) - max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0); - else - max = distance_multiply(nopt.len.max, qn->upper); - - set_mml(&opt->len, min, max); - } - break; - - case NT_ENCLOSE: - { - EncloseNode* en = NENCLOSE(node); - - switch (en->type) { - case ENCLOSE_OPTION: - { - OnigOptionType save = env->options; - - env->options = en->option; - r = optimize_node_left(en->target, opt, env); - env->options = save; - } - break; - - case ENCLOSE_MEMORY: -#ifdef USE_SUBEXP_CALL - en->opt_count++; - if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) { - OnigDistance min, max; - - min = 0; - max = ONIG_INFINITE_DISTANCE; - if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len; - if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len; - set_mml(&opt->len, min, max); - } - else -#endif - { - r = optimize_node_left(en->target, opt, env); - - if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) { - if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum)) - remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK); - } - } - break; - - case ENCLOSE_STOP_BACKTRACK: - r = optimize_node_left(en->target, opt, env); - break; - } - } - break; - - default: -#ifdef ONIG_DEBUG - fprintf(stderr, "optimize_node_left: undefined node type %d\n", - NTYPE(node)); -#endif - r = ONIGERR_TYPE_BUG; - break; - } - - return r; -} - -static int -set_optimize_exact_info(regex_t* reg, OptExactInfo* e) -{ - int r; - - if (e->len == 0) return 0; - - if (e->ignore_case) { - reg->exact = (UChar* )xmalloc(e->len); - CHECK_NULL_RETURN_MEMERR(reg->exact); - xmemcpy(reg->exact, e->s, e->len); - reg->exact_end = reg->exact + e->len; - reg->optimize = ONIG_OPTIMIZE_EXACT_IC; - } - else { - int allow_reverse; - - reg->exact = str_dup(e->s, e->s + e->len); - CHECK_NULL_RETURN_MEMERR(reg->exact); - reg->exact_end = reg->exact + e->len; - - allow_reverse = - ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end); - - if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { - r = set_bm_skip(reg->exact, reg->exact_end, reg->enc, - reg->map, &(reg->int_map)); - if (r) return r; - - reg->optimize = (allow_reverse != 0 - ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV); - } - else { - reg->optimize = ONIG_OPTIMIZE_EXACT; - } - } - - reg->dmin = e->mmd.min; - reg->dmax = e->mmd.max; - - if (reg->dmin != ONIG_INFINITE_DISTANCE) { - reg->threshold_len = reg->dmin + (reg->exact_end - reg->exact); - } - - return 0; -} - -static void -set_optimize_map_info(regex_t* reg, OptMapInfo* m) -{ - int i; - - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) - reg->map[i] = m->map[i]; - - reg->optimize = ONIG_OPTIMIZE_MAP; - reg->dmin = m->mmd.min; - reg->dmax = m->mmd.max; - - if (reg->dmin != ONIG_INFINITE_DISTANCE) { - reg->threshold_len = reg->dmin + 1; - } -} - -static void -set_sub_anchor(regex_t* reg, OptAncInfo* anc) -{ - reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE; - reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE; -} - -#ifdef ONIG_DEBUG -static void print_optimize_info(FILE* f, regex_t* reg); -#endif - -static int -set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) -{ - - int r; - NodeOptInfo opt; - OptEnv env; - - env.enc = reg->enc; - env.options = reg->options; - env.case_fold_flag = reg->case_fold_flag; - env.scan_env = scan_env; - clear_mml(&env.mmd); - - r = optimize_node_left(node, &opt, &env); - if (r) return r; - - reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF | - ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML); - - reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF); - - if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) { - reg->anchor_dmin = opt.len.min; - reg->anchor_dmax = opt.len.max; - } - - if (opt.exb.len > 0 || opt.exm.len > 0) { - select_opt_exact_info(reg->enc, &opt.exb, &opt.exm); - if (opt.map.value > 0 && - comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) { - goto set_map; - } - else { - r = set_optimize_exact_info(reg, &opt.exb); - set_sub_anchor(reg, &opt.exb.anc); - } - } - else if (opt.map.value > 0) { - set_map: - set_optimize_map_info(reg, &opt.map); - set_sub_anchor(reg, &opt.map.anc); - } - else { - reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE; - if (opt.len.max == 0) - reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE; - } - -#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH) - print_optimize_info(stderr, reg); -#endif - return r; -} - -static void -clear_optimize_info(regex_t* reg) -{ - reg->optimize = ONIG_OPTIMIZE_NONE; - reg->anchor = 0; - reg->anchor_dmin = 0; - reg->anchor_dmax = 0; - reg->sub_anchor = 0; - reg->exact_end = (UChar* )NULL; - reg->threshold_len = 0; - if (IS_NOT_NULL(reg->exact)) { - xfree(reg->exact); - reg->exact = (UChar* )NULL; - } -} - -#ifdef ONIG_DEBUG - -static void print_enc_string(FILE* fp, OnigEncoding enc, - const UChar *s, const UChar *end) -{ - fprintf(fp, "\nPATTERN: /"); - - if (ONIGENC_MBC_MINLEN(enc) > 1) { - const UChar *p; - OnigCodePoint code; - - p = s; - while (p < end) { - code = ONIGENC_MBC_TO_CODE(enc, p, end); - if (code >= 0x80) { - fprintf(fp, " 0x%04x ", (int )code); - } - else { - fputc((int )code, fp); - } - - p += enclen(enc, p, end); - } - } - else { - while (s < end) { - fputc((int )*s, fp); - s++; - } - } - - fprintf(fp, "/\n"); -} - -static void -print_distance_range(FILE* f, OnigDistance a, OnigDistance b) -{ - if (a == ONIG_INFINITE_DISTANCE) - fputs("inf", f); - else - fprintf(f, "(%u)", a); - - fputs("-", f); - - if (b == ONIG_INFINITE_DISTANCE) - fputs("inf", f); - else - fprintf(f, "(%u)", b); -} - -static void -print_anchor(FILE* f, int anchor) -{ - int q = 0; - - fprintf(f, "["); - - if (anchor & ANCHOR_BEGIN_BUF) { - fprintf(f, "begin-buf"); - q = 1; - } - if (anchor & ANCHOR_BEGIN_LINE) { - if (q) fprintf(f, ", "); - q = 1; - fprintf(f, "begin-line"); - } - if (anchor & ANCHOR_BEGIN_POSITION) { - if (q) fprintf(f, ", "); - q = 1; - fprintf(f, "begin-pos"); - } - if (anchor & ANCHOR_END_BUF) { - if (q) fprintf(f, ", "); - q = 1; - fprintf(f, "end-buf"); - } - if (anchor & ANCHOR_SEMI_END_BUF) { - if (q) fprintf(f, ", "); - q = 1; - fprintf(f, "semi-end-buf"); - } - if (anchor & ANCHOR_END_LINE) { - if (q) fprintf(f, ", "); - q = 1; - fprintf(f, "end-line"); - } - if (anchor & ANCHOR_ANYCHAR_STAR) { - if (q) fprintf(f, ", "); - q = 1; - fprintf(f, "anychar-star"); - } - if (anchor & ANCHOR_ANYCHAR_STAR_ML) { - if (q) fprintf(f, ", "); - fprintf(f, "anychar-star-pl"); - } - - fprintf(f, "]"); -} - -static void -print_optimize_info(FILE* f, regex_t* reg) -{ - static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV", - "EXACT_IC", "MAP" }; - - fprintf(f, "optimize: %s\n", on[reg->optimize]); - fprintf(f, " anchor: "); print_anchor(f, reg->anchor); - if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0) - print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax); - fprintf(f, "\n"); - - if (reg->optimize) { - fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor); - fprintf(f, "\n"); - } - fprintf(f, "\n"); - - if (reg->exact) { - UChar *p; - fprintf(f, "exact: ["); - for (p = reg->exact; p < reg->exact_end; p++) { - fputc(*p, f); - } - fprintf(f, "]: length: %d\n", (reg->exact_end - reg->exact)); - } - else if (reg->optimize & ONIG_OPTIMIZE_MAP) { - int c, i, n = 0; - - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) - if (reg->map[i]) n++; - - fprintf(f, "map: n=%d\n", n); - if (n > 0) { - c = 0; - fputc('[', f); - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) { - if (reg->map[i] != 0) { - if (c > 0) fputs(", ", f); - c++; - if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 && - ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i)) - fputc(i, f); - else - fprintf(f, "%d", i); - } - } - fprintf(f, "]\n"); - } - } -} -#endif /* ONIG_DEBUG */ - - -extern void -onig_free_body(regex_t* reg) -{ - if (IS_NOT_NULL(reg)) { - if (IS_NOT_NULL(reg->p)) xfree(reg->p); - if (IS_NOT_NULL(reg->exact)) xfree(reg->exact); - if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map); - if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward); - if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range); - if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain); - -#ifdef USE_NAMED_GROUP - onig_names_free(reg); -#endif - } -} - -extern void -onig_free(regex_t* reg) -{ - if (IS_NOT_NULL(reg)) { - onig_free_body(reg); - xfree(reg); - } -} - -size_t -onig_memsize(regex_t *reg) -{ - size_t size = sizeof(regex_t); - if (IS_NOT_NULL(reg->p)) size += reg->alloc; - if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact; - if (IS_NOT_NULL(reg->int_map)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE; - if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE; - if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc * sizeof(OnigRepeatRange); - if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain); - - return size; -} - -#define REGEX_TRANSFER(to,from) do {\ - (to)->state = ONIG_STATE_MODIFY;\ - onig_free_body(to);\ - xmemcpy(to, from, sizeof(regex_t));\ - xfree(from);\ -} while (0) - -extern void -onig_transfer(regex_t* to, regex_t* from) -{ - THREAD_ATOMIC_START; - REGEX_TRANSFER(to, from); - THREAD_ATOMIC_END; -} - -#define REGEX_CHAIN_HEAD(reg) do {\ - while (IS_NOT_NULL((reg)->chain)) {\ - (reg) = (reg)->chain;\ - }\ -} while (0) - -extern void -onig_chain_link_add(regex_t* to, regex_t* add) -{ - THREAD_ATOMIC_START; - REGEX_CHAIN_HEAD(to); - to->chain = add; - THREAD_ATOMIC_END; -} - -extern void -onig_chain_reduce(regex_t* reg) -{ - regex_t *head, *prev; - - prev = reg; - head = prev->chain; - if (IS_NOT_NULL(head)) { - reg->state = ONIG_STATE_MODIFY; - while (IS_NOT_NULL(head->chain)) { - prev = head; - head = head->chain; - } - prev->chain = (regex_t* )NULL; - REGEX_TRANSFER(reg, head); - } -} - -#ifdef ONIG_DEBUG -static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg)); -#endif -#ifdef ONIG_DEBUG_PARSE_TREE -static void print_tree P_((FILE* f, Node* node)); -#endif - -extern int -onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, - OnigErrorInfo* einfo, const char *sourcefile, int sourceline) -{ -#define COMPILE_INIT_SIZE 20 - - int r, init_size; - Node* root; - ScanEnv scan_env = {0}; -#ifdef USE_SUBEXP_CALL - UnsetAddrList uslist; -#endif - - if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL; - - scan_env.sourcefile = sourcefile; - scan_env.sourceline = sourceline; - reg->state = ONIG_STATE_COMPILING; - -#ifdef ONIG_DEBUG - print_enc_string(stderr, reg->enc, pattern, pattern_end); -#endif - - if (reg->alloc == 0) { - init_size = (pattern_end - pattern) * 2; - if (init_size <= 0) init_size = COMPILE_INIT_SIZE; - r = BBUF_INIT(reg, init_size); - if (r != 0) goto end; - } - else - reg->used = 0; - - reg->num_mem = 0; - reg->num_repeat = 0; - reg->num_null_check = 0; - reg->repeat_range_alloc = 0; - reg->repeat_range = (OnigRepeatRange* )NULL; -#ifdef USE_COMBINATION_EXPLOSION_CHECK - reg->num_comb_exp_check = 0; -#endif - - r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env); - if (r != 0) goto err; - -#ifdef USE_NAMED_GROUP - /* mixed use named group and no-named group */ - if (scan_env.num_named > 0 && - IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && - !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) { - if (scan_env.num_named != scan_env.num_mem) - r = disable_noname_group_capture(&root, reg, &scan_env); - else - r = numbered_ref_check(root); - - if (r != 0) goto err; - } -#endif - -#ifdef USE_SUBEXP_CALL - if (scan_env.num_call > 0) { - r = unset_addr_list_init(&uslist, scan_env.num_call); - if (r != 0) goto err; - scan_env.unset_addr_list = &uslist; - r = setup_subexp_call(root, &scan_env); - if (r != 0) goto err_unset; - r = subexp_recursive_check_trav(root, &scan_env); - if (r < 0) goto err_unset; - r = subexp_inf_recursive_check_trav(root, &scan_env); - if (r != 0) goto err_unset; - - reg->num_call = scan_env.num_call; - } - else - reg->num_call = 0; -#endif - - r = setup_tree(root, reg, 0, &scan_env); - if (r != 0) goto err_unset; - -#ifdef ONIG_DEBUG_PARSE_TREE - print_tree(stderr, root); -#endif - - reg->capture_history = scan_env.capture_history; - reg->bt_mem_start = scan_env.bt_mem_start; - reg->bt_mem_start |= reg->capture_history; - if (IS_FIND_CONDITION(reg->options)) - BIT_STATUS_ON_ALL(reg->bt_mem_end); - else { - reg->bt_mem_end = scan_env.bt_mem_end; - reg->bt_mem_end |= reg->capture_history; - } - -#ifdef USE_COMBINATION_EXPLOSION_CHECK - if (scan_env.backrefed_mem == 0 -#ifdef USE_SUBEXP_CALL - || scan_env.num_call == 0 -#endif - ) { - setup_comb_exp_check(root, 0, &scan_env); -#ifdef USE_SUBEXP_CALL - if (scan_env.has_recursion != 0) { - scan_env.num_comb_exp_check = 0; - } - else -#endif - if (scan_env.comb_exp_max_regnum > 0) { - int i; - for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) { - if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) { - scan_env.num_comb_exp_check = 0; - break; - } - } - } - } - - reg->num_comb_exp_check = scan_env.num_comb_exp_check; -#endif - - clear_optimize_info(reg); -#ifndef ONIG_DONT_OPTIMIZE - r = set_optimize_info_from_tree(root, reg, &scan_env); - if (r != 0) goto err_unset; -#endif - - if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) { - xfree(scan_env.mem_nodes_dynamic); - scan_env.mem_nodes_dynamic = (Node** )NULL; - } - - r = compile_tree(root, reg); - if (r == 0) { - r = add_opcode(reg, OP_END); -#ifdef USE_SUBEXP_CALL - if (scan_env.num_call > 0) { - r = unset_addr_list_fix(&uslist, reg); - unset_addr_list_end(&uslist); - if (r) goto err; - } -#endif - - if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0)) - reg->stack_pop_level = STACK_POP_LEVEL_ALL; - else { - if (reg->bt_mem_start != 0) - reg->stack_pop_level = STACK_POP_LEVEL_MEM_START; - else - reg->stack_pop_level = STACK_POP_LEVEL_FREE; - } - } -#ifdef USE_SUBEXP_CALL - else if (scan_env.num_call > 0) { - unset_addr_list_end(&uslist); - } -#endif - onig_node_free(root); - -#ifdef ONIG_DEBUG_COMPILE -#ifdef USE_NAMED_GROUP - onig_print_names(stderr, reg); -#endif - print_compiled_byte_code_list(stderr, reg); -#endif - - end: - reg->state = ONIG_STATE_NORMAL; - return r; - - err_unset: -#ifdef USE_SUBEXP_CALL - if (scan_env.num_call > 0) { - unset_addr_list_end(&uslist); - } -#endif - err: - if (IS_NOT_NULL(scan_env.error)) { - if (IS_NOT_NULL(einfo)) { - einfo->enc = scan_env.enc; - einfo->par = scan_env.error; - einfo->par_end = scan_env.error_end; - } - } - - onig_node_free(root); - if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) - xfree(scan_env.mem_nodes_dynamic); - return r; -} - -#ifdef USE_RECOMPILE_API -extern int -onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, - OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, - OnigErrorInfo* einfo) -{ - int r; - regex_t *new_reg; - - r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo); - if (r) return r; - if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) { - onig_transfer(reg, new_reg); - } - else { - onig_chain_link_add(reg, new_reg); - } - return 0; -} -#endif - -static int onig_inited = 0; - -extern int -onig_reg_init(regex_t* reg, OnigOptionType option, - OnigCaseFoldType case_fold_flag, - OnigEncoding enc, const OnigSyntaxType* syntax) -{ - if (! onig_inited) - onig_init(); - - if (IS_NULL(reg)) - return ONIGERR_INVALID_ARGUMENT; - - if (ONIGENC_IS_UNDEF(enc)) - return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED; - - if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) - == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) { - return ONIGERR_INVALID_COMBINATION_OF_OPTIONS; - } - - (reg)->state = ONIG_STATE_MODIFY; - - if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) { - option |= syntax->options; - option &= ~ONIG_OPTION_SINGLELINE; - } - else - option |= syntax->options; - - (reg)->enc = enc; - (reg)->options = option; - (reg)->syntax = syntax; - (reg)->optimize = 0; - (reg)->exact = (UChar* )NULL; - (reg)->int_map = (int* )NULL; - (reg)->int_map_backward = (int* )NULL; - (reg)->chain = (regex_t* )NULL; - - (reg)->p = (UChar* )NULL; - (reg)->alloc = 0; - (reg)->used = 0; - (reg)->name_table = (void* )NULL; - - (reg)->case_fold_flag = case_fold_flag; - return 0; -} - -extern int -onig_new_without_alloc(regex_t* reg, const UChar* pattern, - const UChar* pattern_end, OnigOptionType option, OnigEncoding enc, - OnigSyntaxType* syntax, OnigErrorInfo* einfo) -{ - int r; - - r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax); - if (r) return r; - - r = onig_compile(reg, pattern, pattern_end, einfo, NULL, 0); - return r; -} - -extern int -onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end, - OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax, - OnigErrorInfo* einfo) -{ - int r; - - *reg = (regex_t* )xmalloc(sizeof(regex_t)); - if (IS_NULL(*reg)) return ONIGERR_MEMORY; - - r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax); - if (r) goto err; - - r = onig_compile(*reg, pattern, pattern_end, einfo, NULL, 0); - if (r) { - err: - onig_free(*reg); - *reg = NULL; - } - return r; -} - - -extern int -onig_init(void) -{ - if (onig_inited != 0) - return 0; - - THREAD_SYSTEM_INIT; - THREAD_ATOMIC_START; - - onig_inited = 1; - - onigenc_init(); - /* onigenc_set_default_caseconv_table((UChar* )0); */ - -#ifdef ONIG_DEBUG_STATISTICS - onig_statistics_init(); -#endif - - THREAD_ATOMIC_END; - return 0; -} - - -extern int -onig_end(void) -{ - THREAD_ATOMIC_START; - -#ifdef ONIG_DEBUG_STATISTICS - onig_print_statistics(stderr); -#endif - -#ifdef USE_SHARED_CCLASS_TABLE - onig_free_shared_cclass_table(); -#endif - -#ifdef USE_PARSE_TREE_NODE_RECYCLE - onig_free_node_list(); -#endif - - onig_inited = 0; - - THREAD_ATOMIC_END; - THREAD_SYSTEM_END; - return 0; -} -#endif //ENABLE_REGEXP - -#ifdef INCLUDE_ENCODING -extern int -onig_is_in_code_range(const UChar* p, OnigCodePoint code) -{ - OnigCodePoint n, *data; - OnigCodePoint low, high, x; - - GET_CODE_POINT(n, p); - data = (OnigCodePoint* )p; - data++; - - for (low = 0, high = n; low < high; ) { - x = (low + high) >> 1; - if (code > data[x * 2 + 1]) - low = x + 1; - else - high = x; - } - - return ((low < n && code >= data[low * 2]) ? 1 : 0); -} -#endif //INCLUDE_ENCODING - -#ifdef ENABLE_REGEXP -extern int -onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc) -{ - int found; - - if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) { - if (IS_NULL(cc->mbuf)) { - found = 0; - } - else { - found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0); - } - } - else { - found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1); - } - - if (IS_NCCLASS_NOT(cc)) - return !found; - else - return found; -} - -extern int -onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc) -{ - int len; - - if (ONIGENC_MBC_MINLEN(enc) > 1) { - len = 2; - } - else { - len = ONIGENC_CODE_TO_MBCLEN(enc, code); - } - return onig_is_code_in_cc_len(len, code, cc); -} - - -#ifdef ONIG_DEBUG - -/* arguments type */ -#define ARG_SPECIAL -1 -#define ARG_NON 0 -#define ARG_RELADDR 1 -#define ARG_ABSADDR 2 -#define ARG_LENGTH 3 -#define ARG_MEMNUM 4 -#define ARG_OPTION 5 -#define ARG_STATE_CHECK 6 - -OnigOpInfoType OnigOpInfo[] = { - { OP_FINISH, "finish", ARG_NON }, - { OP_END, "end", ARG_NON }, - { OP_EXACT1, "exact1", ARG_SPECIAL }, - { OP_EXACT2, "exact2", ARG_SPECIAL }, - { OP_EXACT3, "exact3", ARG_SPECIAL }, - { OP_EXACT4, "exact4", ARG_SPECIAL }, - { OP_EXACT5, "exact5", ARG_SPECIAL }, - { OP_EXACTN, "exactn", ARG_SPECIAL }, - { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL }, - { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL }, - { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL }, - { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL }, - { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL }, - { OP_EXACTMBN, "exactmbn", ARG_SPECIAL }, - { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL }, - { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL }, - { OP_CCLASS, "cclass", ARG_SPECIAL }, - { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL }, - { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL }, - { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL }, - { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL }, - { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL }, - { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL }, - { OP_ANYCHAR, "anychar", ARG_NON }, - { OP_ANYCHAR_ML, "anychar-ml", ARG_NON }, - { OP_ANYCHAR_STAR, "anychar*", ARG_NON }, - { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON }, - { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL }, - { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL }, - { OP_WORD, "word", ARG_NON }, - { OP_NOT_WORD, "not-word", ARG_NON }, - { OP_WORD_BOUND, "word-bound", ARG_NON }, - { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON }, - { OP_WORD_BEGIN, "word-begin", ARG_NON }, - { OP_WORD_END, "word-end", ARG_NON }, - { OP_BEGIN_BUF, "begin-buf", ARG_NON }, - { OP_END_BUF, "end-buf", ARG_NON }, - { OP_BEGIN_LINE, "begin-line", ARG_NON }, - { OP_END_LINE, "end-line", ARG_NON }, - { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON }, - { OP_BEGIN_POSITION, "begin-position", ARG_NON }, - { OP_BACKREF1, "backref1", ARG_NON }, - { OP_BACKREF2, "backref2", ARG_NON }, - { OP_BACKREFN, "backrefn", ARG_MEMNUM }, - { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL }, - { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL }, - { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL }, - { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL }, - { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM }, - { OP_MEMORY_START, "mem-start", ARG_MEMNUM }, - { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM }, - { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM }, - { OP_MEMORY_END, "mem-end", ARG_MEMNUM }, - { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM }, - { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION }, - { OP_SET_OPTION, "set-option", ARG_OPTION }, - { OP_FAIL, "fail", ARG_NON }, - { OP_JUMP, "jump", ARG_RELADDR }, - { OP_PUSH, "push", ARG_RELADDR }, - { OP_POP, "pop", ARG_NON }, - { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL }, - { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL }, - { OP_REPEAT, "repeat", ARG_SPECIAL }, - { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL }, - { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM }, - { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM }, - { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM }, - { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM }, - { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM }, - { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM }, - { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM }, - { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM }, - { OP_PUSH_POS, "push-pos", ARG_NON }, - { OP_POP_POS, "pop-pos", ARG_NON }, - { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR }, - { OP_FAIL_POS, "fail-pos", ARG_NON }, - { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON }, - { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON }, - { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL }, - { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL }, - { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON }, - { OP_CALL, "call", ARG_ABSADDR }, - { OP_RETURN, "return", ARG_NON }, - { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL }, - { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL }, - { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK }, - { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK }, - { OP_STATE_CHECK_ANYCHAR_ML_STAR, - "state-check-anychar-ml*", ARG_STATE_CHECK }, - { -1, "", ARG_NON } -}; - -static char* -op2name(int opcode) -{ - int i; - - for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { - if (opcode == OnigOpInfo[i].opcode) - return OnigOpInfo[i].name; - } - return ""; -} - -static int -op2arg_type(int opcode) -{ - int i; - - for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { - if (opcode == OnigOpInfo[i].opcode) - return OnigOpInfo[i].arg_type; - } - return ARG_SPECIAL; -} - -static void -Indent(FILE* f, int indent) -{ - int i; - for (i = 0; i < indent; i++) putc(' ', f); -} - -static void -p_string(FILE* f, int len, UChar* s) -{ - fputs(":", f); - while (len-- > 0) { fputc(*s++, f); } -} - -static void -p_len_string(FILE* f, LengthType len, int mb_len, UChar* s) -{ - int x = len * mb_len; - - fprintf(f, ":%d:", len); - while (x-- > 0) { fputc(*s++, f); } -} - -extern void -onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp, - OnigEncoding enc) -{ - int i, n, arg_type; - RelAddrType addr; - LengthType len; - MemNumType mem; - StateCheckNumType scn; - OnigCodePoint code; - UChar *q; - - fprintf(f, "[%s", op2name(*bp)); - arg_type = op2arg_type(*bp); - if (arg_type != ARG_SPECIAL) { - bp++; - switch (arg_type) { - case ARG_NON: - break; - case ARG_RELADDR: - GET_RELADDR_INC(addr, bp); - fprintf(f, ":(%d)", addr); - break; - case ARG_ABSADDR: - GET_ABSADDR_INC(addr, bp); - fprintf(f, ":(%d)", addr); - break; - case ARG_LENGTH: - GET_LENGTH_INC(len, bp); - fprintf(f, ":%d", len); - break; - case ARG_MEMNUM: - mem = *((MemNumType* )bp); - bp += SIZE_MEMNUM; - fprintf(f, ":%d", mem); - break; - case ARG_OPTION: - { - OnigOptionType option = *((OnigOptionType* )bp); - bp += SIZE_OPTION; - fprintf(f, ":%d", option); - } - break; - - case ARG_STATE_CHECK: - scn = *((StateCheckNumType* )bp); - bp += SIZE_STATE_CHECK_NUM; - fprintf(f, ":%d", scn); - break; - } - } - else { - switch (*bp++) { - case OP_EXACT1: - case OP_ANYCHAR_STAR_PEEK_NEXT: - case OP_ANYCHAR_ML_STAR_PEEK_NEXT: - p_string(f, 1, bp++); break; - case OP_EXACT2: - p_string(f, 2, bp); bp += 2; break; - case OP_EXACT3: - p_string(f, 3, bp); bp += 3; break; - case OP_EXACT4: - p_string(f, 4, bp); bp += 4; break; - case OP_EXACT5: - p_string(f, 5, bp); bp += 5; break; - case OP_EXACTN: - GET_LENGTH_INC(len, bp); - p_len_string(f, len, 1, bp); - bp += len; - break; - - case OP_EXACTMB2N1: - p_string(f, 2, bp); bp += 2; break; - case OP_EXACTMB2N2: - p_string(f, 4, bp); bp += 4; break; - case OP_EXACTMB2N3: - p_string(f, 6, bp); bp += 6; break; - case OP_EXACTMB2N: - GET_LENGTH_INC(len, bp); - p_len_string(f, len, 2, bp); - bp += len * 2; - break; - case OP_EXACTMB3N: - GET_LENGTH_INC(len, bp); - p_len_string(f, len, 3, bp); - bp += len * 3; - break; - case OP_EXACTMBN: - { - int mb_len; - - GET_LENGTH_INC(mb_len, bp); - GET_LENGTH_INC(len, bp); - fprintf(f, ":%d:%d:", mb_len, len); - n = len * mb_len; - while (n-- > 0) { fputc(*bp++, f); } - } - break; - - case OP_EXACT1_IC: - len = enclen(enc, bp, bpend); - p_string(f, len, bp); - bp += len; - break; - case OP_EXACTN_IC: - GET_LENGTH_INC(len, bp); - p_len_string(f, len, 1, bp); - bp += len; - break; - - case OP_CCLASS: - n = bitset_on_num((BitSetRef )bp); - bp += SIZE_BITSET; - fprintf(f, ":%d", n); - break; - - case OP_CCLASS_NOT: - n = bitset_on_num((BitSetRef )bp); - bp += SIZE_BITSET; - fprintf(f, ":%d", n); - break; - - case OP_CCLASS_MB: - case OP_CCLASS_MB_NOT: - GET_LENGTH_INC(len, bp); - q = bp; -#ifndef PLATFORM_UNALIGNED_WORD_ACCESS - ALIGNMENT_RIGHT(q); -#endif - GET_CODE_POINT(code, q); - bp += len; - fprintf(f, ":%d:%d", (int )code, len); - break; - - case OP_CCLASS_MIX: - case OP_CCLASS_MIX_NOT: - n = bitset_on_num((BitSetRef )bp); - bp += SIZE_BITSET; - GET_LENGTH_INC(len, bp); - q = bp; -#ifndef PLATFORM_UNALIGNED_WORD_ACCESS - ALIGNMENT_RIGHT(q); -#endif - GET_CODE_POINT(code, q); - bp += len; - fprintf(f, ":%d:%d:%d", n, (int )code, len); - break; - - case OP_CCLASS_NODE: - { - CClassNode *cc; - - GET_POINTER_INC(cc, bp); - n = bitset_on_num(cc->bs); - fprintf(f, ":%u:%d", (unsigned int )cc, n); - } - break; - - case OP_BACKREFN_IC: - mem = *((MemNumType* )bp); - bp += SIZE_MEMNUM; - fprintf(f, ":%d", mem); - break; - - case OP_BACKREF_MULTI_IC: - case OP_BACKREF_MULTI: - fputs(" ", f); - GET_LENGTH_INC(len, bp); - for (i = 0; i < len; i++) { - GET_MEMNUM_INC(mem, bp); - if (i > 0) fputs(", ", f); - fprintf(f, "%d", mem); - } - break; - - case OP_BACKREF_WITH_LEVEL: - { - OnigOptionType option; - LengthType level; - - GET_OPTION_INC(option, bp); - fprintf(f, ":%d", option); - GET_LENGTH_INC(level, bp); - fprintf(f, ":%d", level); - - fputs(" ", f); - GET_LENGTH_INC(len, bp); - for (i = 0; i < len; i++) { - GET_MEMNUM_INC(mem, bp); - if (i > 0) fputs(", ", f); - fprintf(f, "%d", mem); - } - } - break; - - case OP_REPEAT: - case OP_REPEAT_NG: - { - mem = *((MemNumType* )bp); - bp += SIZE_MEMNUM; - addr = *((RelAddrType* )bp); - bp += SIZE_RELADDR; - fprintf(f, ":%d:%d", mem, addr); - } - break; - - case OP_PUSH_OR_JUMP_EXACT1: - case OP_PUSH_IF_PEEK_NEXT: - addr = *((RelAddrType* )bp); - bp += SIZE_RELADDR; - fprintf(f, ":(%d)", addr); - p_string(f, 1, bp); - bp += 1; - break; - - case OP_LOOK_BEHIND: - GET_LENGTH_INC(len, bp); - fprintf(f, ":%d", len); - break; - - case OP_PUSH_LOOK_BEHIND_NOT: - GET_RELADDR_INC(addr, bp); - GET_LENGTH_INC(len, bp); - fprintf(f, ":%d:(%d)", len, addr); - break; - - case OP_STATE_CHECK_PUSH: - case OP_STATE_CHECK_PUSH_OR_JUMP: - scn = *((StateCheckNumType* )bp); - bp += SIZE_STATE_CHECK_NUM; - addr = *((RelAddrType* )bp); - bp += SIZE_RELADDR; - fprintf(f, ":%d:(%d)", scn, addr); - break; - - default: - fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n", - *--bp); - } - } - fputs("]", f); - if (nextp) *nextp = bp; -} - -static void -print_compiled_byte_code_list(FILE* f, regex_t* reg) -{ - int ncode; - UChar* bp = reg->p; - UChar* end = reg->p + reg->used; - - fprintf(f, "code length: %d\n", reg->used); - - ncode = 0; - while (bp < end) { - ncode++; - if (bp > reg->p) { - if (ncode % 5 == 0) - fprintf(f, "\n"); - else - fputs(" ", f); - } - onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc); - } - - fprintf(f, "\n"); -} - -static void -print_indent_tree(FILE* f, Node* node, int indent) -{ - int i, type; - int add = 3; - UChar* p; - - Indent(f, indent); - if (IS_NULL(node)) { - fprintf(f, "ERROR: null node!!!\n"); - exit (0); - } - - type = NTYPE(node); - switch (type) { - case NT_LIST: - case NT_ALT: - if (NTYPE(node) == NT_LIST) - fprintf(f, "<list:%x>\n", (int )node); - else - fprintf(f, "<alt:%x>\n", (int )node); - - print_indent_tree(f, NCAR(node), indent + add); - while (IS_NOT_NULL(node = NCDR(node))) { - if (NTYPE(node) != type) { - fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node)); - exit(0); - } - print_indent_tree(f, NCAR(node), indent + add); - } - break; - - case NT_STR: - fprintf(f, "<string%s:%x>", - (NSTRING_IS_RAW(node) ? "-raw" : ""), (int )node); - for (p = NSTR(node)->s; p < NSTR(node)->end; p++) { - if (*p >= 0x20 && *p < 0x7f) - fputc(*p, f); - else { - fprintf(f, " 0x%02x", *p); - } - } - break; - - case NT_CCLASS: - fprintf(f, "<cclass:%x>", (int )node); - if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f); - if (NCCLASS(node)->mbuf) { - BBuf* bbuf = NCCLASS(node)->mbuf; - for (i = 0; i < bbuf->used; i++) { - if (i > 0) fprintf(f, ","); - fprintf(f, "%0x", bbuf->p[i]); - } - } - break; - - case NT_CTYPE: - fprintf(f, "<ctype:%x> ", (int )node); - switch (NCTYPE(node)->ctype) { - case ONIGENC_CTYPE_WORD: - if (NCTYPE(node)->is_not != 0) - fputs("not word", f); - else - fputs("word", f); - break; - - default: - fprintf(f, "ERROR: undefined ctype.\n"); - exit(0); - } - break; - - case NT_CANY: - fprintf(f, "<anychar:%x>", (int )node); - break; - - case NT_ANCHOR: - fprintf(f, "<anchor:%x> ", (int )node); - switch (NANCHOR(node)->type) { - case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break; - case ANCHOR_END_BUF: fputs("end buf", f); break; - case ANCHOR_BEGIN_LINE: fputs("begin line", f); break; - case ANCHOR_END_LINE: fputs("end line", f); break; - case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break; - case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break; - - case ANCHOR_WORD_BOUND: fputs("word bound", f); break; - case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break; -#ifdef USE_WORD_BEGIN_END - case ANCHOR_WORD_BEGIN: fputs("word begin", f); break; - case ANCHOR_WORD_END: fputs("word end", f); break; -#endif - case ANCHOR_PREC_READ: fputs("prec read", f); break; - case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); break; - case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); break; - case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); break; - - default: - fprintf(f, "ERROR: undefined anchor type.\n"); - break; - } - break; - - case NT_BREF: - { - int* p; - BRefNode* br = NBREF(node); - p = BACKREFS_P(br); - fprintf(f, "<backref:%x>", (int )node); - for (i = 0; i < br->back_num; i++) { - if (i > 0) fputs(", ", f); - fprintf(f, "%d", p[i]); - } - } - break; - -#ifdef USE_SUBEXP_CALL - case NT_CALL: - { - CallNode* cn = NCALL(node); - fprintf(f, "<call:%x>", (int )node); - p_string(f, cn->name_end - cn->name, cn->name); - } - break; -#endif - - case NT_QTFR: - fprintf(f, "<quantifier:%x>{%d,%d}%s\n", (int )node, - NQTFR(node)->lower, NQTFR(node)->upper, - (NQTFR(node)->greedy ? "" : "?")); - print_indent_tree(f, NQTFR(node)->target, indent + add); - break; - - case NT_ENCLOSE: - fprintf(f, "<enclose:%x> ", (int )node); - switch (NENCLOSE(node)->type) { - case ENCLOSE_OPTION: - fprintf(f, "option:%d\n", NENCLOSE(node)->option); - print_indent_tree(f, NENCLOSE(node)->target, indent + add); - break; - case ENCLOSE_MEMORY: - fprintf(f, "memory:%d", NENCLOSE(node)->regnum); - break; - case ENCLOSE_STOP_BACKTRACK: - fprintf(f, "stop-bt"); - break; - - default: - break; - } - fprintf(f, "\n"); - print_indent_tree(f, NENCLOSE(node)->target, indent + add); - break; - - default: - fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node)); - break; - } - - if (type != NT_LIST && type != NT_ALT && type != NT_QTFR && - type != NT_ENCLOSE) - fprintf(f, "\n"); - fflush(f); -} -#endif /* ONIG_DEBUG */ - -#ifdef ONIG_DEBUG_PARSE_TREE -static void -print_tree(FILE* f, Node* node) -{ - print_indent_tree(f, node, 0); -} -#endif -#endif //ENABLE_REGEXP diff --git a/src/regenc.c b/src/regenc.c deleted file mode 100644 index 4cc496782..000000000 --- a/src/regenc.c +++ /dev/null @@ -1,901 +0,0 @@ -/********************************************************************** - regenc.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "mruby.h" -#ifdef INCLUDE_ENCODING -#include <string.h> -#include "regint.h" - -OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT; - -extern int -onigenc_init(void) -{ - return 0; -} - -extern OnigEncoding -onigenc_get_default_encoding(void) -{ - return OnigEncDefaultCharEncoding; -} - -extern int -onigenc_set_default_encoding(OnigEncoding enc) -{ - OnigEncDefaultCharEncoding = enc; - return 0; -} - -extern int -onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc) -{ - int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e); - if (ONIGENC_MBCLEN_CHARFOUND_P(ret)) - return ONIGENC_MBCLEN_CHARFOUND_LEN(ret); - else if (ONIGENC_MBCLEN_NEEDMORE_P(ret)) - return (int)(e-p)+ONIGENC_MBCLEN_NEEDMORE_LEN(ret); - return 1; -} - -extern UChar* -onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end) -{ - UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end); - if (p < s) { - p += enclen(enc, p, end); - } - return p; -} - -extern UChar* -onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc, - const UChar* start, const UChar* s, const UChar* end, const UChar** prev) -{ - UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end); - - if (p < s) { - if (prev) *prev = (const UChar* )p; - p += enclen(enc, p, end); - } - else { - if (prev) *prev = (const UChar* )NULL; /* Sorry */ - } - return p; -} - -extern UChar* -onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end) -{ - if (s <= start) - return (UChar* )NULL; - - return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end); -} - -extern UChar* -onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end, int n) -{ - while (ONIG_IS_NOT_NULL(s) && n-- > 0) { - if (s <= start) - return (UChar* )NULL; - - s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end); - } - return (UChar* )s; -} - -extern UChar* -onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n) -{ - UChar* q = (UChar* )p; - while (n-- > 0) { - q += ONIGENC_MBC_ENC_LEN(enc, q, end); - } - return (q <= end ? q : NULL); -} - -extern int -onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end) -{ - int n = 0; - UChar* q = (UChar* )p; - - while (q < end) { - q += ONIGENC_MBC_ENC_LEN(enc, q, end); - n++; - } - return n; -} - -extern int -onigenc_strlen_null(OnigEncoding enc, const UChar* s) -{ - int n = 0; - UChar* p = (UChar* )s; - UChar* e; - - while (1) { - if (*p == '\0') { - UChar* q; - int len = ONIGENC_MBC_MINLEN(enc); - - if (len == 1) return n; - q = p + 1; - while (len > 1) { - if (*q != '\0') break; - q++; - len--; - } - if (len == 1) return n; - } - e = p + ONIGENC_MBC_MAXLEN(enc); - p += ONIGENC_MBC_ENC_LEN(enc, p, e); - n++; - } -} - -extern int -onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s) -{ - UChar* start = (UChar* )s; - UChar* p = (UChar* )s; - UChar* e; - - while (1) { - if (*p == '\0') { - UChar* q; - int len = ONIGENC_MBC_MINLEN(enc); - - if (len == 1) return (int )(p - start); - q = p + 1; - while (len > 1) { - if (*q != '\0') break; - q++; - len--; - } - if (len == 1) return (int )(p - start); - } - e = p + ONIGENC_MBC_MAXLEN(enc); - p += ONIGENC_MBC_ENC_LEN(enc, p, e); - } -} - -const UChar OnigEncAsciiToLowerCaseTable[] = { - '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', - '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', - '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', - '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', - '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', - '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', - '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', - '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', - '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', - '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', - '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', - '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', - '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', - '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', - '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', - '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', - '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', - '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', - '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', - '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', - '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', - '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', - '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', - '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', - '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', - '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377', -}; - -#ifdef USE_UPPER_CASE_TABLE -const UChar OnigEncAsciiToUpperCaseTable[256] = { - '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', - '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', - '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', - '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', - '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', - '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', - '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', - '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', - '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107', - '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117', - '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127', - '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137', - '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107', - '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117', - '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127', - '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177', - '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', - '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', - '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', - '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', - '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', - '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', - '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', - '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', - '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', - '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', - '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', - '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', - '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', - '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', - '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', - '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377', -}; -#endif - -const unsigned short OnigEncAsciiCtypeTable[256] = { - 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, - 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, - 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, - 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, - 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, - 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, - 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, - 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, - 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, - 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, - 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, - 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, - 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, - 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, - 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, - 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 -}; - -const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = { - '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', - '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', - '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', - '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', - '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', - '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', - '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', - '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', - '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', - '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', - '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', - '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', - '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', - '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', - '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', - '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', - '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', - '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', - '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', - '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', - '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327', - '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337', - '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', - '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', - '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', - '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' -}; - -#ifdef USE_UPPER_CASE_TABLE -const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = { - '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', - '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', - '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', - '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', - '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', - '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', - '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', - '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', - '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107', - '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117', - '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127', - '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137', - '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107', - '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117', - '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127', - '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177', - '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', - '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', - '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', - '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', - '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', - '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', - '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', - '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', - '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', - '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', - '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', - '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', - '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', - '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', - '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367', - '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377', -}; -#endif - -extern void -onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED) -{ - /* nothing */ - /* obsoleted. */ -} - -extern UChar* -onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end) -{ - return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end); -} - -const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = { - { 0x41, 0x61 }, - { 0x42, 0x62 }, - { 0x43, 0x63 }, - { 0x44, 0x64 }, - { 0x45, 0x65 }, - { 0x46, 0x66 }, - { 0x47, 0x67 }, - { 0x48, 0x68 }, - { 0x49, 0x69 }, - { 0x4a, 0x6a }, - { 0x4b, 0x6b }, - { 0x4c, 0x6c }, - { 0x4d, 0x6d }, - { 0x4e, 0x6e }, - { 0x4f, 0x6f }, - { 0x50, 0x70 }, - { 0x51, 0x71 }, - { 0x52, 0x72 }, - { 0x53, 0x73 }, - { 0x54, 0x74 }, - { 0x55, 0x75 }, - { 0x56, 0x76 }, - { 0x57, 0x77 }, - { 0x58, 0x78 }, - { 0x59, 0x79 }, - { 0x5a, 0x7a } -}; - -extern int -onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED, - OnigApplyAllCaseFoldFunc f, void* arg, - OnigEncoding enc ARG_UNUSED) -{ - OnigCodePoint code; - int i, r; - - for (i = 0; - i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes)); - i++) { - code = OnigAsciiLowerMap[i].to; - r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg); - if (r != 0) return r; - - code = OnigAsciiLowerMap[i].from; - r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg); - if (r != 0) return r; - } - - return 0; -} - -extern int -onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED, - const OnigUChar* p, const OnigUChar* end ARG_UNUSED, OnigCaseFoldCodeItem items[], - OnigEncoding enc ARG_UNUSED) -{ - if (0x41 <= *p && *p <= 0x5a) { - items[0].byte_len = 1; - items[0].code_len = 1; - items[0].code[0] = (OnigCodePoint )(*p + 0x20); - return 1; - } - else if (0x61 <= *p && *p <= 0x7a) { - items[0].byte_len = 1; - items[0].code_len = 1; - items[0].code[0] = (OnigCodePoint )(*p - 0x20); - return 1; - } - else - return 0; -} - -static int -ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED, - OnigApplyAllCaseFoldFunc f, void* arg) -{ - OnigCodePoint ss[] = { 0x73, 0x73 }; - - return (*f)((OnigCodePoint )0xdf, ss, 2, arg); -} - -extern int -onigenc_apply_all_case_fold_with_map(int map_size, - const OnigPairCaseFoldCodes map[], - int ess_tsett_flag, OnigCaseFoldType flag, - OnigApplyAllCaseFoldFunc f, void* arg) -{ - OnigCodePoint code; - int i, r; - - r = onigenc_ascii_apply_all_case_fold(flag, f, arg, 0); - if (r != 0) return r; - - for (i = 0; i < map_size; i++) { - code = map[i].to; - r = (*f)(map[i].from, &code, 1, arg); - if (r != 0) return r; - - code = map[i].from; - r = (*f)(map[i].to, &code, 1, arg); - if (r != 0) return r; - } - - if (ess_tsett_flag != 0) - return ss_apply_all_case_fold(flag, f, arg); - - return 0; -} - -extern int -onigenc_get_case_fold_codes_by_str_with_map(int map_size, - const OnigPairCaseFoldCodes map[], - int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED, - const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) -{ - if (0x41 <= *p && *p <= 0x5a) { - items[0].byte_len = 1; - items[0].code_len = 1; - items[0].code[0] = (OnigCodePoint )(*p + 0x20); - if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1 - && (*(p+1) == 0x53 || *(p+1) == 0x73)) { - /* SS */ - items[1].byte_len = 2; - items[1].code_len = 1; - items[1].code[0] = (OnigCodePoint )0xdf; - return 2; - } - else - return 1; - } - else if (0x61 <= *p && *p <= 0x7a) { - items[0].byte_len = 1; - items[0].code_len = 1; - items[0].code[0] = (OnigCodePoint )(*p - 0x20); - if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1 - && (*(p+1) == 0x73 || *(p+1) == 0x53)) { - /* ss */ - items[1].byte_len = 2; - items[1].code_len = 1; - items[1].code[0] = (OnigCodePoint )0xdf; - return 2; - } - else - return 1; - } - else if (*p == 0xdf && ess_tsett_flag != 0) { - items[0].byte_len = 1; - items[0].code_len = 2; - items[0].code[0] = (OnigCodePoint )'s'; - items[0].code[1] = (OnigCodePoint )'s'; - - items[1].byte_len = 1; - items[1].code_len = 2; - items[1].code[0] = (OnigCodePoint )'S'; - items[1].code[1] = (OnigCodePoint )'S'; - - items[2].byte_len = 1; - items[2].code_len = 2; - items[2].code[0] = (OnigCodePoint )'s'; - items[2].code[1] = (OnigCodePoint )'S'; - - items[3].byte_len = 1; - items[3].code_len = 2; - items[3].code[0] = (OnigCodePoint )'S'; - items[3].code[1] = (OnigCodePoint )'s'; - - return 4; - } - else { - int i; - - for (i = 0; i < map_size; i++) { - if (*p == map[i].from) { - items[0].byte_len = 1; - items[0].code_len = 1; - items[0].code[0] = map[i].to; - return 1; - } - else if (*p == map[i].to) { - items[0].byte_len = 1; - items[0].code_len = 1; - items[0].code[0] = map[i].from; - return 1; - } - } - } - - return 0; -} - - -extern int -onigenc_not_support_get_ctype_code_range(OnigCtype ctype, - OnigCodePoint* sb_out, const OnigCodePoint* ranges[], - OnigEncoding enc) -{ - return ONIG_NO_SUPPORT_CONFIG; -} - -extern int -onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end, OnigEncoding enc ARG_UNUSED) -{ - if (p < end) { - if (*p == 0x0a) return 1; - } - return 0; -} - -/* for single byte encodings */ -extern int -onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p, - const UChar*end, UChar* lower, OnigEncoding enc ARG_UNUSED) -{ - *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p); - - (*p)++; - return 1; /* return byte length of converted char to lower */ -} - -extern int -onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED, const UChar* e ARG_UNUSED, - OnigEncoding enc ARG_UNUSED) -{ - return 1; -} - -extern OnigCodePoint -onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED, - OnigEncoding enc ARG_UNUSED) -{ - return (OnigCodePoint )(*p); -} - -extern int -onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED, OnigEncoding enc ARG_UNUSED) -{ - return 1; -} - -extern int -onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED) -{ - *buf = (UChar )(code & 0xff); - return 1; -} - -extern UChar* -onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED, const UChar* s, - const UChar* end, - OnigEncoding enc ARG_UNUSED) -{ - return (UChar* )s; -} - -extern int -onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED, const UChar* end ARG_UNUSED, - OnigEncoding enc ARG_UNUSED) -{ - return TRUE; -} - -extern int -onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED, const UChar* end ARG_UNUSED, - OnigEncoding enc ARG_UNUSED) -{ - return FALSE; -} - -extern int -onigenc_ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype, - OnigEncoding enc ARG_UNUSED) -{ - if (code < 128) - return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); - else - return FALSE; -} - -extern OnigCodePoint -onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end) -{ - int c, i, len; - OnigCodePoint n; - - len = enclen(enc, p, end); - n = (OnigCodePoint )(*p++); - if (len == 1) return n; - - for (i = 1; i < len; i++) { - if (p >= end) break; - c = *p++; - n <<= 8; n += c; - } - return n; -} - -extern int -onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED, - const UChar** pp, const UChar* end ARG_UNUSED, - UChar* lower) -{ - int len; - const UChar *p = *pp; - - if (ONIGENC_IS_MBC_ASCII(p)) { - *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); - (*pp)++; - return 1; - } - else { - int i; - - len = enclen(enc, p, end); - for (i = 0; i < len; i++) { - *lower++ = *p++; - } - (*pp) += len; - return len; /* return byte length of converted to lower char */ - } -} - -extern int -onigenc_mb2_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED) -{ - if ((code & 0xff00) != 0) return 2; - else return 1; -} - -extern int -onigenc_mb4_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED) -{ - if ((code & 0xff000000) != 0) return 4; - else if ((code & 0xff0000) != 0) return 3; - else if ((code & 0xff00) != 0) return 2; - else return 1; -} - -extern int -onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) -{ - UChar *p = buf; - - if ((code & 0xff00) != 0) { - *p++ = (UChar )((code >> 8) & 0xff); - } - *p++ = (UChar )(code & 0xff); - - if (enclen(enc, buf, p) != (p - buf)) - return ONIGERR_INVALID_CODE_POINT_VALUE; - return (int)(p - buf); -} - -extern int -onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) -{ - UChar *p = buf; - - if ((code & 0xff000000) != 0) { - *p++ = (UChar )((code >> 24) & 0xff); - } - if ((code & 0xff0000) != 0 || p != buf) { - *p++ = (UChar )((code >> 16) & 0xff); - } - if ((code & 0xff00) != 0 || p != buf) { - *p++ = (UChar )((code >> 8) & 0xff); - } - *p++ = (UChar )(code & 0xff); - - if (enclen(enc, buf, p) != (p - buf)) - return ONIGERR_INVALID_CODE_POINT_VALUE; - return (int)(p - buf); -} - -extern int -onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end) -{ - static const PosixBracketEntryType PBS[] = { - PosixBracketEntryInit("Alnum", ONIGENC_CTYPE_ALNUM), - PosixBracketEntryInit("Alpha", ONIGENC_CTYPE_ALPHA), - PosixBracketEntryInit("Blank", ONIGENC_CTYPE_BLANK), - PosixBracketEntryInit("Cntrl", ONIGENC_CTYPE_CNTRL), - PosixBracketEntryInit("Digit", ONIGENC_CTYPE_DIGIT), - PosixBracketEntryInit("Graph", ONIGENC_CTYPE_GRAPH), - PosixBracketEntryInit("Lower", ONIGENC_CTYPE_LOWER), - PosixBracketEntryInit("Print", ONIGENC_CTYPE_PRINT), - PosixBracketEntryInit("Punct", ONIGENC_CTYPE_PUNCT), - PosixBracketEntryInit("Space", ONIGENC_CTYPE_SPACE), - PosixBracketEntryInit("Upper", ONIGENC_CTYPE_UPPER), - PosixBracketEntryInit("XDigit", ONIGENC_CTYPE_XDIGIT), - PosixBracketEntryInit("ASCII", ONIGENC_CTYPE_ASCII), - PosixBracketEntryInit("Word", ONIGENC_CTYPE_WORD), - }; - - const PosixBracketEntryType *pb, *pbe; - int len; - - len = onigenc_strlen(enc, p, end); - for (pbe = (pb = PBS) + sizeof(PBS)/sizeof(PBS[0]); pb < pbe; ++pb) { - if (len == pb->len && - onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) - return pb->ctype; - } - - return ONIGERR_INVALID_CHAR_PROPERTY_NAME; -} - -extern int -onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code, - unsigned int ctype) -{ - if (code < 128) - return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); - else { - if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { - return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE); - } - } - - return FALSE; -} - -extern int -onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code, - unsigned int ctype) -{ - if (code < 128) - return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); - else { - if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { - return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE); - } - } - - return FALSE; -} - -extern int -onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end, - const UChar* sascii /* ascii */, int n) -{ - int x, c; - - while (n-- > 0) { - if (p >= end) return (int )(*sascii); - - c = (int )ONIGENC_MBC_TO_CODE(enc, p, end); - x = *sascii - c; - if (x) return x; - - sascii++; - p += enclen(enc, p, end); - } - return 0; -} - -/* Property management */ -static int -resize_property_list(int new_size, const OnigCodePoint*** plist, int* psize) -{ - size_t size; - const OnigCodePoint **list = *plist; - - size = sizeof(OnigCodePoint*) * new_size; - if (IS_NULL(list)) { - list = (const OnigCodePoint** )xmalloc(size); - } - else { - list = (const OnigCodePoint** )xrealloc((void* )list, size); - } - - if (IS_NULL(list)) return ONIGERR_MEMORY; - - *plist = list; - *psize = new_size; - - return 0; -} - -extern int -onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop, - hash_table_type **table, const OnigCodePoint*** plist, int *pnum, - int *psize) -{ -#define PROP_INIT_SIZE 16 - - int r; - - if (*psize <= *pnum) { - int new_size = (*psize == 0 ? PROP_INIT_SIZE : *psize * 2); - r = resize_property_list(new_size, plist, psize); - if (r != 0) return r; - } - - (*plist)[*pnum] = prop; - - if (ONIG_IS_NULL(*table)) { - *table = onig_st_init_strend_table_with_size(PROP_INIT_SIZE); - if (ONIG_IS_NULL(*table)) return ONIGERR_MEMORY; - } - - *pnum = *pnum + 1; - onig_st_insert_strend(*table, name, name + strlen((char* )name), - (hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE)); - return 0; -} - -extern int -onigenc_property_list_init(int (*f)(void)) -{ - int r; - - THREAD_ATOMIC_START; - - r = f(); - - THREAD_ATOMIC_END; - return r; -} -#endif //INCLUDE_ENCODING diff --git a/src/regenc.h b/src/regenc.h deleted file mode 100644 index 4e827d181..000000000 --- a/src/regenc.h +++ /dev/null @@ -1,203 +0,0 @@ -#ifndef ONIGURUMA_REGENC_H -#define ONIGURUMA_REGENC_H -/********************************************************************** - regenc.h - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <stdio.h> -#include <stdarg.h> -#define RUBY - -#ifndef mrb_compile_warn -#define mrb_compile_warn(a,b,c,d) printf(c,d) -#endif - -#ifndef REGINT_H -#ifdef ONIG_ESCAPE_UCHAR_COLLISION -#undef ONIG_ESCAPE_UCHAR_COLLISION -#endif -#endif -#include "oniguruma.h" - -typedef struct { - OnigCodePoint from; - OnigCodePoint to; -} OnigPairCaseFoldCodes; - - -#ifndef ARG_UNUSED -#if defined(__GNUC__) -# define ARG_UNUSED __attribute__ ((unused)) -#else -# define ARG_UNUSED -#endif -#endif - -#define ONIG_IS_NULL(p) (((void*)(p)) == (void*)0) -#define ONIG_IS_NOT_NULL(p) (((void*)(p)) != (void*)0) -#define ONIG_CHECK_NULL_RETURN(p) if (ONIG_IS_NULL(p)) return NULL -#define ONIG_CHECK_NULL_RETURN_VAL(p,val) if (ONIG_IS_NULL(p)) return (val) - -#define enclen(enc,p,e) ((enc->max_enc_len == enc->min_enc_len) ? enc->min_enc_len : ONIGENC_MBC_ENC_LEN(enc,p,e)) - -/* character types bit flag */ -#define BIT_CTYPE_NEWLINE (1<< ONIGENC_CTYPE_NEWLINE) -#define BIT_CTYPE_ALPHA (1<< ONIGENC_CTYPE_ALPHA) -#define BIT_CTYPE_BLANK (1<< ONIGENC_CTYPE_BLANK) -#define BIT_CTYPE_CNTRL (1<< ONIGENC_CTYPE_CNTRL) -#define BIT_CTYPE_DIGIT (1<< ONIGENC_CTYPE_DIGIT) -#define BIT_CTYPE_GRAPH (1<< ONIGENC_CTYPE_GRAPH) -#define BIT_CTYPE_LOWER (1<< ONIGENC_CTYPE_LOWER) -#define BIT_CTYPE_PRINT (1<< ONIGENC_CTYPE_PRINT) -#define BIT_CTYPE_PUNCT (1<< ONIGENC_CTYPE_PUNCT) -#define BIT_CTYPE_SPACE (1<< ONIGENC_CTYPE_SPACE) -#define BIT_CTYPE_UPPER (1<< ONIGENC_CTYPE_UPPER) -#define BIT_CTYPE_XDIGIT (1<< ONIGENC_CTYPE_XDIGIT) -#define BIT_CTYPE_WORD (1<< ONIGENC_CTYPE_WORD) -#define BIT_CTYPE_ALNUM (1<< ONIGENC_CTYPE_ALNUM) -#define BIT_CTYPE_ASCII (1<< ONIGENC_CTYPE_ASCII) - -#define CTYPE_TO_BIT(ctype) (1<<(ctype)) -#define CTYPE_IS_WORD_GRAPH_PRINT(ctype) \ - ((ctype) == ONIGENC_CTYPE_WORD || (ctype) == ONIGENC_CTYPE_GRAPH ||\ - (ctype) == ONIGENC_CTYPE_PRINT) - - -typedef struct { - const UChar *name; - int ctype; - short int len; -} PosixBracketEntryType; - -#define PosixBracketEntryInit(name, ctype) {(const UChar *)name, ctype, (short int)(sizeof(name) - 1)} - -/* #define USE_CRNL_AS_LINE_TERMINATOR */ -#define USE_UNICODE_PROPERTIES -/* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */ -/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTF#18 */ - - -#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII - -/* for encoding system implementation (internal) */ -ONIG_EXTERN int onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc); -ONIG_EXTERN int onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[], OnigEncoding enc); -ONIG_EXTERN int onigenc_apply_all_case_fold_with_map(int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg); -ONIG_EXTERN int onigenc_get_case_fold_codes_by_str_with_map(int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]); -ONIG_EXTERN int onigenc_not_support_get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[], OnigEncoding enc); -ONIG_EXTERN int onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end, OnigEncoding enc); - -/* methods for single byte encoding */ -ONIG_EXTERN int onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower, OnigEncoding enc); -ONIG_EXTERN int onigenc_single_byte_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc); -ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc); -ONIG_EXTERN int onigenc_single_byte_code_to_mbclen(OnigCodePoint code, OnigEncoding enc); -ONIG_EXTERN int onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc); -ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head(const UChar* start, const UChar* s, const OnigUChar* end, OnigEncoding enc); -ONIG_EXTERN int onigenc_always_true_is_allowed_reverse_match(const UChar* s, const UChar* end, OnigEncoding enc); -ONIG_EXTERN int onigenc_always_false_is_allowed_reverse_match(const UChar* s, const UChar* end, OnigEncoding enc); -ONIG_EXTERN int onigenc_ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc); - -/* methods for multi byte encoding */ -ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end); -ONIG_EXTERN int onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower); -ONIG_EXTERN int onigenc_mb2_code_to_mbclen(OnigCodePoint code, OnigEncoding enc); -ONIG_EXTERN int onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf); -ONIG_EXTERN int onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end); -ONIG_EXTERN int onigenc_unicode_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end); -ONIG_EXTERN int onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code, unsigned int ctype); -ONIG_EXTERN int onigenc_mb4_code_to_mbclen(OnigCodePoint code, OnigEncoding enc); -ONIG_EXTERN int onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf); -ONIG_EXTERN int onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code, unsigned int ctype); - - -/* in enc/unicode.c */ -ONIG_EXTERN int onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc); -ONIG_EXTERN int onigenc_utf16_32_get_ctype_code_range(OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[], OnigEncoding enc); -ONIG_EXTERN int onigenc_unicode_ctype_code_range(int ctype, const OnigCodePoint* ranges[]); -ONIG_EXTERN int onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]); -ONIG_EXTERN int onigenc_unicode_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold); -ONIG_EXTERN int onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc); - - -#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8) -#define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc) - -#define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \ - OnigEncISO_8859_1_ToLowerCaseTable[c] -#define ONIGENC_ISO_8859_1_TO_UPPER_CASE(c) \ - OnigEncISO_8859_1_ToUpperCaseTable[c] - -ONIG_EXTERN const UChar OnigEncISO_8859_1_ToLowerCaseTable[]; -ONIG_EXTERN const UChar OnigEncISO_8859_1_ToUpperCaseTable[]; - -ONIG_EXTERN int -onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n); -ONIG_EXTERN UChar* -onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n); - -/* defined in regexec.c, but used in enc/xxx.c */ -extern int onig_is_in_code_range (const UChar* p, OnigCodePoint code); - -ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding; -ONIG_EXTERN const UChar OnigEncAsciiToLowerCaseTable[]; -ONIG_EXTERN const UChar OnigEncAsciiToUpperCaseTable[]; -ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[]; - -#define ONIGENC_IS_ASCII_CODE(code) ((code) < 0x80) -#define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) OnigEncAsciiToLowerCaseTable[c] -#define ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) OnigEncAsciiToUpperCaseTable[c] -#define ONIGENC_IS_ASCII_CODE_CTYPE(code,ctype) \ - ((OnigEncAsciiCtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0) -#define ONIGENC_IS_ASCII_CODE_CASE_AMBIG(code) \ - (ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_UPPER) ||\ - ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_LOWER)) - -#ifdef ONIG_ENC_REGISTER -extern int ONIG_ENC_REGISTER(const char *, OnigEncodingType*); -#define OnigEncodingName(n) encoding_##n -#define OnigEncodingDeclare(n) static OnigEncodingType OnigEncodingName(n) -#define OnigEncodingDefine(f,n) \ - OnigEncodingDeclare(n); \ - void Init_##f(void) { \ - ONIG_ENC_REGISTER(OnigEncodingName(n).name, \ - &OnigEncodingName(n)); \ - } \ - OnigEncodingDeclare(n) -#else -#define OnigEncodingName(n) OnigEncoding##n -#define OnigEncodingDeclare(n) OnigEncodingType OnigEncodingName(n) -#define OnigEncodingDefine(f,n) OnigEncodingDeclare(n) -#endif - -/* macros for define replica encoding and encoding alias */ -#define ENC_REPLICATE(name, orig) -#define ENC_ALIAS(name, orig) -#define ENC_DUMMY(name) - -#endif /* ONIGURUMA_REGENC_H */ diff --git a/src/regerror.c b/src/regerror.c deleted file mode 100644 index df60b49cc..000000000 --- a/src/regerror.c +++ /dev/null @@ -1,375 +0,0 @@ -/********************************************************************** - regerror.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "mruby.h" -#ifdef ENABLE_REGEXP -#include <string.h> -#include "regint.h" -#include <stdio.h> /* for vsnprintf() */ -#include <stdarg.h> - -extern UChar* -onig_error_code_to_format(int code) -{ - const char *p; - - if (code >= 0) return (UChar* )0; - - switch (code) { - case ONIG_MISMATCH: - p = "mismatch"; break; - case ONIG_NO_SUPPORT_CONFIG: - p = "no support in this configuration"; break; - case ONIGERR_MEMORY: - p = "failed to allocate memory"; break; - case ONIGERR_MATCH_STACK_LIMIT_OVER: - p = "match-stack limit over"; break; - case ONIGERR_TYPE_BUG: - p = "undefined type (bug)"; break; - case ONIGERR_PARSER_BUG: - p = "internal parser error (bug)"; break; - case ONIGERR_STACK_BUG: - p = "stack error (bug)"; break; - case ONIGERR_UNDEFINED_BYTECODE: - p = "undefined bytecode (bug)"; break; - case ONIGERR_UNEXPECTED_BYTECODE: - p = "unexpected bytecode (bug)"; break; - case ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED: - p = "default multibyte-encoding is not setted"; break; - case ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR: - p = "can't convert to wide-char on specified multibyte-encoding"; break; - case ONIGERR_INVALID_ARGUMENT: - p = "invalid argument"; break; - case ONIGERR_END_PATTERN_AT_LEFT_BRACE: - p = "end pattern at left brace"; break; - case ONIGERR_END_PATTERN_AT_LEFT_BRACKET: - p = "end pattern at left bracket"; break; - case ONIGERR_EMPTY_CHAR_CLASS: - p = "empty char-class"; break; - case ONIGERR_PREMATURE_END_OF_CHAR_CLASS: - p = "premature end of char-class"; break; - case ONIGERR_END_PATTERN_AT_ESCAPE: - p = "end pattern at escape"; break; - case ONIGERR_END_PATTERN_AT_META: - p = "end pattern at meta"; break; - case ONIGERR_END_PATTERN_AT_CONTROL: - p = "end pattern at control"; break; - case ONIGERR_META_CODE_SYNTAX: - p = "invalid meta-code syntax"; break; - case ONIGERR_CONTROL_CODE_SYNTAX: - p = "invalid control-code syntax"; break; - case ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE: - p = "char-class value at end of range"; break; - case ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE: - p = "char-class value at start of range"; break; - case ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS: - p = "unmatched range specifier in char-class"; break; - case ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED: - p = "target of repeat operator is not specified"; break; - case ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID: - p = "target of repeat operator is invalid"; break; - case ONIGERR_NESTED_REPEAT_OPERATOR: - p = "nested repeat operator"; break; - case ONIGERR_UNMATCHED_CLOSE_PARENTHESIS: - p = "unmatched close parenthesis"; break; - case ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS: - p = "end pattern with unmatched parenthesis"; break; - case ONIGERR_END_PATTERN_IN_GROUP: - p = "end pattern in group"; break; - case ONIGERR_UNDEFINED_GROUP_OPTION: - p = "undefined group option"; break; - case ONIGERR_INVALID_POSIX_BRACKET_TYPE: - p = "invalid POSIX bracket type"; break; - case ONIGERR_INVALID_LOOK_BEHIND_PATTERN: - p = "invalid pattern in look-behind"; break; - case ONIGERR_INVALID_REPEAT_RANGE_PATTERN: - p = "invalid repeat range {lower,upper}"; break; - case ONIGERR_TOO_BIG_NUMBER: - p = "too big number"; break; - case ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE: - p = "too big number for repeat range"; break; - case ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE: - p = "upper is smaller than lower in repeat range"; break; - case ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS: - p = "empty range in char class"; break; - case ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE: - p = "mismatch multibyte code length in char-class range"; break; - case ONIGERR_TOO_MANY_MULTI_BYTE_RANGES: - p = "too many multibyte code ranges are specified"; break; - case ONIGERR_TOO_SHORT_MULTI_BYTE_STRING: - p = "too short multibyte code string"; break; - case ONIGERR_TOO_BIG_BACKREF_NUMBER: - p = "too big backref number"; break; - case ONIGERR_INVALID_BACKREF: -#ifdef USE_NAMED_GROUP - p = "invalid backref number/name"; break; -#else - p = "invalid backref number"; break; -#endif - case ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED: - p = "numbered backref/call is not allowed. (use name)"; break; - case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE: - p = "too big wide-char value"; break; - case ONIGERR_TOO_LONG_WIDE_CHAR_VALUE: - p = "too long wide-char value"; break; - case ONIGERR_INVALID_CODE_POINT_VALUE: - p = "invalid code point value"; break; - case ONIGERR_EMPTY_GROUP_NAME: - p = "group name is empty"; break; - case ONIGERR_INVALID_GROUP_NAME: - p = "invalid group name <%n>"; break; - case ONIGERR_INVALID_CHAR_IN_GROUP_NAME: -#ifdef USE_NAMED_GROUP - p = "invalid char in group name <%n>"; break; -#else - p = "invalid char in group number <%n>"; break; -#endif - case ONIGERR_UNDEFINED_NAME_REFERENCE: - p = "undefined name <%n> reference"; break; - case ONIGERR_UNDEFINED_GROUP_REFERENCE: - p = "undefined group <%n> reference"; break; - case ONIGERR_MULTIPLEX_DEFINED_NAME: - p = "multiplex defined name <%n>"; break; - case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL: - p = "multiplex definition name <%n> call"; break; - case ONIGERR_NEVER_ENDING_RECURSION: - p = "never ending recursion"; break; - case ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY: - p = "group number is too big for capture history"; break; - case ONIGERR_INVALID_CHAR_PROPERTY_NAME: - p = "invalid character property name {%n}"; break; - case ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION: - p = "not supported encoding combination"; break; - case ONIGERR_INVALID_COMBINATION_OF_OPTIONS: - p = "invalid combination of options"; break; - case ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT: - p = "over thread pass limit count"; break; - - default: - p = "undefined error code"; break; - } - - return (UChar* )p; -} - -static void sprint_byte(char* s, unsigned int v) -{ - sprintf(s, "%02x", (v & 0377)); -} - -static void sprint_byte_with_x(char* s, unsigned int v) -{ - sprintf(s, "\\x%02x", (v & 0377)); -} - -static int to_ascii(OnigEncoding enc, UChar *s, UChar *end, - UChar buf[], int buf_size, int *is_over) -{ - int len; - UChar *p; - OnigCodePoint code; - - if (ONIGENC_MBC_MINLEN(enc) > 1) { - p = s; - len = 0; - while (p < end) { - code = ONIGENC_MBC_TO_CODE(enc, p, end); - if (code >= 0x80) { - if (code > 0xffff && len + 10 <= buf_size) { - sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 24)); - sprint_byte((char*)(&(buf[len+4])), (unsigned int)(code >> 16)); - sprint_byte((char*)(&(buf[len+6])), (unsigned int)(code >> 8)); - sprint_byte((char*)(&(buf[len+8])), (unsigned int)code); - len += 10; - } - else if (len + 6 <= buf_size) { - sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 8)); - sprint_byte((char*)(&(buf[len+4])), (unsigned int)code); - len += 6; - } - else { - break; - } - } - else { - buf[len++] = (UChar )code; - } - - p += enclen(enc, p, end); - if (len >= buf_size) break; - } - - *is_over = ((p < end) ? 1 : 0); - } - else { - len = (int)MIN((end - s), buf_size); - xmemcpy(buf, s, (size_t )len); - *is_over = ((buf_size < (end - s)) ? 1 : 0); - } - - return len; -} - - -/* for ONIG_MAX_ERROR_MESSAGE_LEN */ -#define MAX_ERROR_PAR_LEN 30 - -extern int -onig_error_code_to_str(UChar* s, int code, ...) -{ - UChar *p, *q; - OnigErrorInfo* einfo; - size_t len; - int is_over; - UChar parbuf[MAX_ERROR_PAR_LEN]; - va_list vargs; - - va_start(vargs, code); - - switch (code) { - case ONIGERR_UNDEFINED_NAME_REFERENCE: - case ONIGERR_UNDEFINED_GROUP_REFERENCE: - case ONIGERR_MULTIPLEX_DEFINED_NAME: - case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL: - case ONIGERR_INVALID_GROUP_NAME: - case ONIGERR_INVALID_CHAR_IN_GROUP_NAME: - case ONIGERR_INVALID_CHAR_PROPERTY_NAME: - einfo = va_arg(vargs, OnigErrorInfo*); - len = to_ascii(einfo->enc, einfo->par, einfo->par_end, - parbuf, MAX_ERROR_PAR_LEN - 3, &is_over); - q = onig_error_code_to_format(code); - p = s; - while (*q != '\0') { - if (*q == '%') { - q++; - if (*q == 'n') { /* '%n': name */ - xmemcpy(p, parbuf, len); - p += len; - if (is_over != 0) { - xmemcpy(p, "...", 3); - p += 3; - } - q++; - } - else - goto normal_char; - } - else { - normal_char: - *p++ = *q++; - } - } - *p = '\0'; - len = p - s; - break; - - default: - q = onig_error_code_to_format(code); - len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, q); - xmemcpy(s, q, len); - s[len] = '\0'; - break; - } - - va_end(vargs); - return (int)len; -} - -void -onig_vsnprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc, - UChar* pat, UChar* pat_end, const UChar *fmt, va_list args) -{ - size_t need; - int n, len; - UChar *p, *s, *bp; - UChar bs[6]; - - n = xvsnprintf((char* )buf, bufsize, (const char* )fmt, args); - - need = (pat_end - pat) * 4 + 4; - - if (n + need < (size_t)bufsize) { - strcat((char* )buf, ": /"); - s = buf + onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, buf); - - p = pat; - while (p < pat_end) { - if (*p == '\\') { - *s++ = *p++; - len = enclen(enc, p, pat_end); - while (len-- > 0) *s++ = *p++; - } - else if (*p == '/') { - *s++ = (unsigned char )'\\'; - *s++ = *p++; - } - else if (ONIGENC_IS_MBC_HEAD(enc, p, pat_end)) { - len = enclen(enc, p, pat_end); - if (ONIGENC_MBC_MINLEN(enc) == 1) { - while (len-- > 0) *s++ = *p++; - } - else { /* for UTF16 */ - int blen; - - while (len-- > 0) { - sprint_byte_with_x((char* )bs, (unsigned int )(*p++)); - blen = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs); - bp = bs; - while (blen-- > 0) *s++ = *bp++; - } - } - } - else if (!ONIGENC_IS_CODE_PRINT(enc, *p) && - !ONIGENC_IS_CODE_SPACE(enc, *p)) { - sprint_byte_with_x((char* )bs, (unsigned int )(*p++)); - len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs); - bp = bs; - while (len-- > 0) *s++ = *bp++; - } - else { - *s++ = *p++; - } - } - - *s++ = '/'; - *s = '\0'; - } -} - -void -onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc, - UChar* pat, UChar* pat_end, const UChar *fmt, ...) -{ - va_list args; - va_start(args, fmt); - onig_vsnprintf_with_pattern(buf, bufsize, enc, - pat, pat_end, fmt, args); - va_end(args); -} -#endif //ENABLE_REGEXP diff --git a/src/regex.h b/src/regex.h deleted file mode 100644 index f3f797b00..000000000 --- a/src/regex.h +++ /dev/null @@ -1,26 +0,0 @@ -/* -** regex.h - Regexp class -** -** See Copyright Notice in mruby.h -*/ - -#ifndef ONIGURUMA_REGEX_H -#define ONIGURUMA_REGEX_H 1 - -#if defined(__cplusplus) -extern "C" { -#endif - -#include "oniguruma.h" - -#ifndef ONIG_RUBY_M17N - -#define mbclen(p,e,enc) mrb_enc_mbclen((p),(e),(enc)) - -#endif /* ifndef ONIG_RUBY_M17N */ - -#if defined(__cplusplus) -} /* extern "C" { */ -#endif - -#endif /* ONIGURUMA_REGEX_H */ diff --git a/src/regexec.c b/src/regexec.c deleted file mode 100644 index d265cc803..000000000 --- a/src/regexec.c +++ /dev/null @@ -1,3757 +0,0 @@ -/********************************************************************** - regexec.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "mruby.h" -#ifdef ENABLE_REGEXP -#include <string.h> -#include "regint.h" - -/* #define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ - -#ifdef USE_CRNL_AS_LINE_TERMINATOR -#define ONIGENC_IS_MBC_CRNL(enc,p,end) \ - (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \ - ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end)) -#endif - -#ifdef USE_CAPTURE_HISTORY -static void history_tree_free(OnigCaptureTreeNode* node); - -static void -history_tree_clear(OnigCaptureTreeNode* node) -{ - int i; - - if (IS_NOT_NULL(node)) { - for (i = 0; i < node->num_childs; i++) { - if (IS_NOT_NULL(node->childs[i])) { - history_tree_free(node->childs[i]); - } - } - for (i = 0; i < node->allocated; i++) { - node->childs[i] = (OnigCaptureTreeNode* )0; - } - node->num_childs = 0; - node->beg = ONIG_REGION_NOTPOS; - node->end = ONIG_REGION_NOTPOS; - node->group = -1; - } -} - -static void -history_tree_free(OnigCaptureTreeNode* node) -{ - history_tree_clear(node); - xfree(node); -} - -static void -history_root_free(OnigRegion* r) -{ - if (IS_NOT_NULL(r->history_root)) { - history_tree_free(r->history_root); - r->history_root = (OnigCaptureTreeNode* )0; - } -} - -static OnigCaptureTreeNode* -history_node_new(void) -{ - OnigCaptureTreeNode* node; - - node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode)); - CHECK_NULL_RETURN(node); - node->childs = (OnigCaptureTreeNode** )0; - node->allocated = 0; - node->num_childs = 0; - node->group = -1; - node->beg = ONIG_REGION_NOTPOS; - node->end = ONIG_REGION_NOTPOS; - - return node; -} - -static int -history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child) -{ -#define HISTORY_TREE_INIT_ALLOC_SIZE 8 - - if (parent->num_childs >= parent->allocated) { - int n, i; - - if (IS_NULL(parent->childs)) { - n = HISTORY_TREE_INIT_ALLOC_SIZE; - parent->childs = - (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n); - } - else { - n = parent->allocated * 2; - parent->childs = - (OnigCaptureTreeNode** )xrealloc(parent->childs, - sizeof(OnigCaptureTreeNode*) * n); - } - CHECK_NULL_RETURN_MEMERR(parent->childs); - for (i = parent->allocated; i < n; i++) { - parent->childs[i] = (OnigCaptureTreeNode* )0; - } - parent->allocated = n; - } - - parent->childs[parent->num_childs] = child; - parent->num_childs++; - return 0; -} - -static OnigCaptureTreeNode* -history_tree_clone(OnigCaptureTreeNode* node) -{ - int i; - OnigCaptureTreeNode *clone, *child; - - clone = history_node_new(); - CHECK_NULL_RETURN(clone); - - clone->beg = node->beg; - clone->end = node->end; - for (i = 0; i < node->num_childs; i++) { - child = history_tree_clone(node->childs[i]); - if (IS_NULL(child)) { - history_tree_free(clone); - return (OnigCaptureTreeNode* )0; - } - history_tree_add_child(clone, child); - } - - return clone; -} - -extern OnigCaptureTreeNode* -onig_get_capture_tree(OnigRegion* region) -{ - return region->history_root; -} -#endif /* USE_CAPTURE_HISTORY */ - -extern void -onig_region_clear(OnigRegion* region) -{ - int i; - - for (i = 0; i < region->num_regs; i++) { - region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; - } -#ifdef USE_CAPTURE_HISTORY - history_root_free(region); -#endif -} - -extern int -onig_region_resize(OnigRegion* region, int n) -{ - region->num_regs = n; - - if (n < ONIG_NREGION) - n = ONIG_NREGION; - - if (region->allocated == 0) { - region->beg = (int* )xmalloc(n * sizeof(int)); - if (region->beg == 0) - return ONIGERR_MEMORY; - - region->end = (int* )xmalloc(n * sizeof(int)); - if (region->end == 0) { - xfree(region->beg); - return ONIGERR_MEMORY; - } - - region->allocated = n; - } - else if (region->allocated < n) { - int *tmp; - - region->allocated = 0; - tmp = (int* )xrealloc(region->beg, n * sizeof(int)); - if (tmp == 0) { - xfree(region->beg); - xfree(region->end); - return ONIGERR_MEMORY; - } - region->beg = tmp; - tmp = (int* )xrealloc(region->end, n * sizeof(int)); - if (tmp == 0) { - xfree(region->beg); - return ONIGERR_MEMORY; - } - region->end = tmp; - - if (region->beg == 0 || region->end == 0) - return ONIGERR_MEMORY; - - region->allocated = n; - } - - return 0; -} - -static int -onig_region_resize_clear(OnigRegion* region, int n) -{ - int r; - - r = onig_region_resize(region, n); - if (r != 0) return r; - onig_region_clear(region); - return 0; -} - -extern int -onig_region_set(OnigRegion* region, int at, int beg, int end) -{ - if (at < 0) return ONIGERR_INVALID_ARGUMENT; - - if (at >= region->allocated) { - int r = onig_region_resize(region, at + 1); - if (r < 0) return r; - } - - region->beg[at] = beg; - region->end[at] = end; - return 0; -} - -extern void -onig_region_init(OnigRegion* region) -{ - region->num_regs = 0; - region->allocated = 0; - region->beg = (int* )0; - region->end = (int* )0; - region->history_root = (OnigCaptureTreeNode* )0; -} - -extern OnigRegion* -onig_region_new(void) -{ - OnigRegion* r; - - r = (OnigRegion* )xmalloc(sizeof(OnigRegion)); - if (r) - onig_region_init(r); - return r; -} - -extern void -onig_region_free(OnigRegion* r, int free_self) -{ - if (r) { - if (r->allocated > 0) { - if (r->beg) xfree(r->beg); - if (r->end) xfree(r->end); - r->allocated = 0; - } -#ifdef USE_CAPTURE_HISTORY - history_root_free(r); -#endif - if (free_self) xfree(r); - } -} - -extern void -onig_region_copy(OnigRegion* to, OnigRegion* from) -{ -#define RREGC_SIZE (sizeof(int) * from->num_regs) - int i; - - if (to == from) return; - - onig_region_resize(to, from->num_regs); - for (i = 0; i < from->num_regs; i++) { - to->beg[i] = from->beg[i]; - to->end[i] = from->end[i]; - } - to->num_regs = from->num_regs; - -#ifdef USE_CAPTURE_HISTORY - history_root_free(to); - - if (IS_NOT_NULL(from->history_root)) { - to->history_root = history_tree_clone(from->history_root); - } -#endif -} - - -/** stack **/ -#define INVALID_STACK_INDEX -1 - -/* stack type */ -/* used by normal-POP */ -#define STK_ALT 0x0001 -#define STK_LOOK_BEHIND_NOT 0x0002 -#define STK_POS_NOT 0x0003 -/* handled by normal-POP */ -#define STK_MEM_START 0x0100 -#define STK_MEM_END 0x8200 -#define STK_REPEAT_INC 0x0300 -#define STK_STATE_CHECK_MARK 0x1000 -/* avoided by normal-POP */ -#define STK_NULL_CHECK_START 0x3000 -#define STK_NULL_CHECK_END 0x5000 /* for recursive call */ -#define STK_MEM_END_MARK 0x8400 -#define STK_POS 0x0500 /* used when POP-POS */ -#define STK_STOP_BT 0x0600 /* mark for "(?>...)" */ -#define STK_REPEAT 0x0700 -#define STK_CALL_FRAME 0x0800 -#define STK_RETURN 0x0900 -#define STK_VOID 0x0a00 /* for fill a blank */ - -/* stack type check mask */ -#define STK_MASK_POP_USED 0x00ff -#define STK_MASK_TO_VOID_TARGET 0x10ff -#define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */ - -#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE -#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\ - (msa).stack_p = (void* )0;\ - (msa).options = (arg_option);\ - (msa).region = (arg_region);\ - (msa).start = (arg_start);\ - (msa).best_len = ONIG_MISMATCH;\ -} while(0) -#else -#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\ - (msa).stack_p = (void* )0;\ - (msa).options = (arg_option);\ - (msa).region = (arg_region);\ - (msa).start = (arg_start);\ -} while(0) -#endif - -#ifdef USE_COMBINATION_EXPLOSION_CHECK - -#define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16 - -#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \ - if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\ - unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\ - offset = ((offset) * (state_num)) >> 3;\ - if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\ - if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) {\ - (msa).state_check_buff = (void* )xmalloc(size);\ - CHECK_NULL_RETURN_MEMERR((msa).state_check_buff);\ - }\ - else \ - (msa).state_check_buff = (void* )xalloca(size);\ - xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \ - (size_t )(size - (offset))); \ - (msa).state_check_buff_size = size;\ - }\ - else {\ - (msa).state_check_buff = (void* )0;\ - (msa).state_check_buff_size = 0;\ - }\ - }\ - else {\ - (msa).state_check_buff = (void* )0;\ - (msa).state_check_buff_size = 0;\ - }\ - } while(0) - -#define MATCH_ARG_FREE(msa) do {\ - if ((msa).stack_p) xfree((msa).stack_p);\ - if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \ - if ((msa).state_check_buff) xfree((msa).state_check_buff);\ - }\ -} while(0) -#else -#define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p) -#endif - - - -#define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\ - if (msa->stack_p) {\ - alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num));\ - stk_alloc = (OnigStackType* )(msa->stack_p);\ - stk_base = stk_alloc;\ - stk = stk_base;\ - stk_end = stk_base + msa->stack_n;\ - }\ - else {\ - alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num)\ - + sizeof(OnigStackType) * (stack_num));\ - stk_alloc = (OnigStackType* )(alloc_addr + sizeof(char*) * (ptr_num));\ - stk_base = stk_alloc;\ - stk = stk_base;\ - stk_end = stk_base + (stack_num);\ - }\ -} while(0) - -#define STACK_SAVE do{\ - if (stk_base != stk_alloc) {\ - msa->stack_p = stk_base;\ - msa->stack_n = stk_end - stk_base; /* TODO: check overflow */\ - };\ -} while(0) - -static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE; - -extern unsigned int -onig_get_match_stack_limit_size(void) -{ - return MatchStackLimitSize; -} - -extern int -onig_set_match_stack_limit_size(unsigned int size) -{ - MatchStackLimitSize = size; - return 0; -} - -static int -stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, - OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa) -{ - size_t n; - OnigStackType *x, *stk_base, *stk_end, *stk; - - stk_base = *arg_stk_base; - stk_end = *arg_stk_end; - stk = *arg_stk; - - n = stk_end - stk_base; - if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) { - x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2); - if (IS_NULL(x)) { - STACK_SAVE; - return ONIGERR_MEMORY; - } - xmemcpy(x, stk_base, n * sizeof(OnigStackType)); - n *= 2; - } - else { - unsigned int limit_size = MatchStackLimitSize; - n *= 2; - if (limit_size != 0 && n > limit_size) { - if ((unsigned int )(stk_end - stk_base) == limit_size) - return ONIGERR_MATCH_STACK_LIMIT_OVER; - else - n = limit_size; - } - x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n); - if (IS_NULL(x)) { - STACK_SAVE; - return ONIGERR_MEMORY; - } - } - *arg_stk = x + (stk - stk_base); - *arg_stk_base = x; - *arg_stk_end = x + n; - return 0; -} - -#define STACK_ENSURE(n) do {\ - if (stk_end - stk < (n)) {\ - int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\ - if (r != 0) { STACK_SAVE; return r; } \ - }\ -} while(0) - -#define STACK_AT(index) (stk_base + (index)) -#define GET_STACK_INDEX(stk) ((stk) - stk_base) - -#define STACK_PUSH_TYPE(stack_type) do {\ - STACK_ENSURE(1);\ - stk->type = (stack_type);\ - STACK_INC;\ -} while(0) - -#define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0) - -#ifdef USE_COMBINATION_EXPLOSION_CHECK -#define STATE_CHECK_POS(s,snum) \ - (((s) - str) * num_comb_exp_check + ((snum) - 1)) -#define STATE_CHECK_VAL(v,snum) do {\ - if (state_check_buff != NULL) {\ - int x = STATE_CHECK_POS(s,snum);\ - (v) = state_check_buff[x/8] & (1<<(x%8));\ - }\ - else (v) = 0;\ -} while(0) - - -#define ELSE_IF_STATE_CHECK_MARK(stk) \ - else if ((stk)->type == STK_STATE_CHECK_MARK) { \ - int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\ - state_check_buff[x/8] |= (1<<(x%8)); \ - } - -#define STACK_PUSH(stack_type,pat,s,sprev) do {\ - STACK_ENSURE(1);\ - stk->type = (stack_type);\ - stk->u.state.pcode = (pat);\ - stk->u.state.pstr = (s);\ - stk->u.state.pstr_prev = (sprev);\ - stk->u.state.state_check = 0;\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_ENSURED(stack_type,pat) do {\ - stk->type = (stack_type);\ - stk->u.state.pcode = (pat);\ - stk->u.state.state_check = 0;\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum) do {\ - STACK_ENSURE(1);\ - stk->type = STK_ALT;\ - stk->u.state.pcode = (pat);\ - stk->u.state.pstr = (s);\ - stk->u.state.pstr_prev = (sprev);\ - stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_STATE_CHECK(s,snum) do {\ - if (state_check_buff != NULL) {\ - STACK_ENSURE(1);\ - stk->type = STK_STATE_CHECK_MARK;\ - stk->u.state.pstr = (s);\ - stk->u.state.state_check = (snum);\ - STACK_INC;\ - }\ -} while(0) - -#else /* USE_COMBINATION_EXPLOSION_CHECK */ - -#define ELSE_IF_STATE_CHECK_MARK(stk) - -#define STACK_PUSH(stack_type,pat,s,sprev) do {\ - STACK_ENSURE(1);\ - stk->type = (stack_type);\ - stk->u.state.pcode = (pat);\ - stk->u.state.pstr = (s);\ - stk->u.state.pstr_prev = (sprev);\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_ENSURED(stack_type,pat) do {\ - stk->type = (stack_type);\ - stk->u.state.pcode = (pat);\ - STACK_INC;\ -} while(0) -#endif /* USE_COMBINATION_EXPLOSION_CHECK */ - -#define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev) -#define STACK_PUSH_POS(s,sprev) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev) -#define STACK_PUSH_POS_NOT(pat,s,sprev) STACK_PUSH(STK_POS_NOT,pat,s,sprev) -#define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT) -#define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev) \ - STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev) - -#define STACK_PUSH_REPEAT(id, pat) do {\ - STACK_ENSURE(1);\ - stk->type = STK_REPEAT;\ - stk->u.repeat.num = (id);\ - stk->u.repeat.pcode = (pat);\ - stk->u.repeat.count = 0;\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_REPEAT_INC(sindex) do {\ - STACK_ENSURE(1);\ - stk->type = STK_REPEAT_INC;\ - stk->u.repeat_inc.si = (sindex);\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_MEM_START(mnum, s) do {\ - STACK_ENSURE(1);\ - stk->type = STK_MEM_START;\ - stk->u.mem.num = (mnum);\ - stk->u.mem.pstr = (s);\ - stk->u.mem.start = mem_start_stk[mnum];\ - stk->u.mem.end = mem_end_stk[mnum];\ - mem_start_stk[mnum] = GET_STACK_INDEX(stk);\ - mem_end_stk[mnum] = INVALID_STACK_INDEX;\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_MEM_END(mnum, s) do {\ - STACK_ENSURE(1);\ - stk->type = STK_MEM_END;\ - stk->u.mem.num = (mnum);\ - stk->u.mem.pstr = (s);\ - stk->u.mem.start = mem_start_stk[mnum];\ - stk->u.mem.end = mem_end_stk[mnum];\ - mem_end_stk[mnum] = GET_STACK_INDEX(stk);\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_MEM_END_MARK(mnum) do {\ - STACK_ENSURE(1);\ - stk->type = STK_MEM_END_MARK;\ - stk->u.mem.num = (mnum);\ - STACK_INC;\ -} while(0) - -#define STACK_GET_MEM_START(mnum, k) do {\ - int level = 0;\ - k = stk;\ - while (k > stk_base) {\ - k--;\ - if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \ - && k->u.mem.num == (mnum)) {\ - level++;\ - }\ - else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\ - if (level == 0) break;\ - level--;\ - }\ - }\ -} while(0) - -#define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\ - int level = 0;\ - while (k < stk) {\ - if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\ - if (level == 0) (start) = k->u.mem.pstr;\ - level++;\ - }\ - else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\ - level--;\ - if (level == 0) {\ - (end) = k->u.mem.pstr;\ - break;\ - }\ - }\ - k++;\ - }\ -} while(0) - -#define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\ - STACK_ENSURE(1);\ - stk->type = STK_NULL_CHECK_START;\ - stk->u.null_check.num = (cnum);\ - stk->u.null_check.pstr = (s);\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_NULL_CHECK_END(cnum) do {\ - STACK_ENSURE(1);\ - stk->type = STK_NULL_CHECK_END;\ - stk->u.null_check.num = (cnum);\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_CALL_FRAME(pat) do {\ - STACK_ENSURE(1);\ - stk->type = STK_CALL_FRAME;\ - stk->u.call_frame.ret_addr = (pat);\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_RETURN do {\ - STACK_ENSURE(1);\ - stk->type = STK_RETURN;\ - STACK_INC;\ -} while(0) - - -#ifdef ONIG_DEBUG -#define STACK_BASE_CHECK(p, at) \ - if ((p) < stk_base) {\ - fprintf(stderr, "at %s\n", at);\ - goto stack_error;\ - } -#else -#define STACK_BASE_CHECK(p, at) -#endif - -#define STACK_POP_ONE do {\ - stk--;\ - STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \ -} while(0) - -#define STACK_POP do {\ - switch (pop_level) {\ - case STACK_POP_LEVEL_FREE:\ - while (1) {\ - stk--;\ - STACK_BASE_CHECK(stk, "STACK_POP"); \ - if ((stk->type & STK_MASK_POP_USED) != 0) break;\ - ELSE_IF_STATE_CHECK_MARK(stk);\ - }\ - break;\ - case STACK_POP_LEVEL_MEM_START:\ - while (1) {\ - stk--;\ - STACK_BASE_CHECK(stk, "STACK_POP 2"); \ - if ((stk->type & STK_MASK_POP_USED) != 0) break;\ - else if (stk->type == STK_MEM_START) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ - }\ - ELSE_IF_STATE_CHECK_MARK(stk);\ - }\ - break;\ - default:\ - while (1) {\ - stk--;\ - STACK_BASE_CHECK(stk, "STACK_POP 3"); \ - if ((stk->type & STK_MASK_POP_USED) != 0) break;\ - else if (stk->type == STK_MEM_START) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ - }\ - else if (stk->type == STK_REPEAT_INC) {\ - STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ - }\ - else if (stk->type == STK_MEM_END) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ - }\ - ELSE_IF_STATE_CHECK_MARK(stk);\ - }\ - break;\ - }\ -} while(0) - -#define STACK_POP_TIL_POS_NOT do {\ - while (1) {\ - stk--;\ - STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \ - if (stk->type == STK_POS_NOT) break;\ - else if (stk->type == STK_MEM_START) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ - }\ - else if (stk->type == STK_REPEAT_INC) {\ - STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ - }\ - else if (stk->type == STK_MEM_END) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ - }\ - ELSE_IF_STATE_CHECK_MARK(stk);\ - }\ -} while(0) - -#define STACK_POP_TIL_LOOK_BEHIND_NOT do {\ - while (1) {\ - stk--;\ - STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \ - if (stk->type == STK_LOOK_BEHIND_NOT) break;\ - else if (stk->type == STK_MEM_START) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ - }\ - else if (stk->type == STK_REPEAT_INC) {\ - STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ - }\ - else if (stk->type == STK_MEM_END) {\ - mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ - mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ - }\ - ELSE_IF_STATE_CHECK_MARK(stk);\ - }\ -} while(0) - -#define STACK_POS_END(k) do {\ - k = stk;\ - while (1) {\ - k--;\ - STACK_BASE_CHECK(k, "STACK_POS_END"); \ - if (IS_TO_VOID_TARGET(k)) {\ - k->type = STK_VOID;\ - }\ - else if (k->type == STK_POS) {\ - k->type = STK_VOID;\ - break;\ - }\ - }\ -} while(0) - -#define STACK_STOP_BT_END do {\ - OnigStackType *k = stk;\ - while (1) {\ - k--;\ - STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \ - if (IS_TO_VOID_TARGET(k)) {\ - k->type = STK_VOID;\ - }\ - else if (k->type == STK_STOP_BT) {\ - k->type = STK_VOID;\ - break;\ - }\ - }\ -} while(0) - -#define STACK_NULL_CHECK(isnull,id,s) do {\ - OnigStackType* k = stk;\ - while (1) {\ - k--;\ - STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \ - if (k->type == STK_NULL_CHECK_START) {\ - if (k->u.null_check.num == (id)) {\ - (isnull) = (k->u.null_check.pstr == (s));\ - break;\ - }\ - }\ - }\ -} while(0) - -#define STACK_NULL_CHECK_REC(isnull,id,s) do {\ - int level = 0;\ - OnigStackType* k = stk;\ - while (1) {\ - k--;\ - STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \ - if (k->type == STK_NULL_CHECK_START) {\ - if (k->u.null_check.num == (id)) {\ - if (level == 0) {\ - (isnull) = (k->u.null_check.pstr == (s));\ - break;\ - }\ - else level--;\ - }\ - }\ - else if (k->type == STK_NULL_CHECK_END) {\ - level++;\ - }\ - }\ -} while(0) - -#define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\ - OnigStackType* k = stk;\ - while (1) {\ - k--;\ - STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \ - if (k->type == STK_NULL_CHECK_START) {\ - if (k->u.null_check.num == (id)) {\ - if (k->u.null_check.pstr != (s)) {\ - (isnull) = 0;\ - break;\ - }\ - else {\ - UChar* endp;\ - (isnull) = 1;\ - while (k < stk) {\ - if (k->type == STK_MEM_START) {\ - if (k->u.mem.end == INVALID_STACK_INDEX) {\ - (isnull) = 0; break;\ - }\ - if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\ - endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ - else\ - endp = (UChar* )k->u.mem.end;\ - if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\ - (isnull) = 0; break;\ - }\ - else if (endp != s) {\ - (isnull) = -1; /* empty, but position changed */ \ - }\ - }\ - k++;\ - }\ - break;\ - }\ - }\ - }\ - }\ -} while(0) - -#define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\ - int level = 0;\ - OnigStackType* k = stk;\ - while (1) {\ - k--;\ - STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \ - if (k->type == STK_NULL_CHECK_START) {\ - if (k->u.null_check.num == (id)) {\ - if (level == 0) {\ - if (k->u.null_check.pstr != (s)) {\ - (isnull) = 0;\ - break;\ - }\ - else {\ - UChar* endp;\ - (isnull) = 1;\ - while (k < stk) {\ - if (k->type == STK_MEM_START) {\ - if (k->u.mem.end == INVALID_STACK_INDEX) {\ - (isnull) = 0; break;\ - }\ - if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\ - endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ - else\ - endp = (UChar* )k->u.mem.end;\ - if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\ - (isnull) = 0; break;\ - }\ - else if (endp != s) {\ - (isnull) = -1; /* empty, but position changed */ \ - }\ - }\ - k++;\ - }\ - break;\ - }\ - }\ - else {\ - level--;\ - }\ - }\ - }\ - else if (k->type == STK_NULL_CHECK_END) {\ - if (k->u.null_check.num == (id)) level++;\ - }\ - }\ -} while(0) - -#define STACK_GET_REPEAT(id, k) do {\ - int level = 0;\ - k = stk;\ - while (1) {\ - k--;\ - STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \ - if (k->type == STK_REPEAT) {\ - if (level == 0) {\ - if (k->u.repeat.num == (id)) {\ - break;\ - }\ - }\ - }\ - else if (k->type == STK_CALL_FRAME) level--;\ - else if (k->type == STK_RETURN) level++;\ - }\ -} while(0) - -#define STACK_RETURN(addr) do {\ - int level = 0;\ - OnigStackType* k = stk;\ - while (1) {\ - k--;\ - STACK_BASE_CHECK(k, "STACK_RETURN"); \ - if (k->type == STK_CALL_FRAME) {\ - if (level == 0) {\ - (addr) = k->u.call_frame.ret_addr;\ - break;\ - }\ - else level--;\ - }\ - else if (k->type == STK_RETURN)\ - level++;\ - }\ -} while(0) - - -#define STRING_CMP(s1,s2,len) do {\ - while (len-- > 0) {\ - if (*s1++ != *s2++) goto fail;\ - }\ -} while(0) - -#define STRING_CMP_IC(case_fold_flag,s1,ps2,len,text_end) do {\ - if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \ - goto fail; \ -} while(0) - -static int string_cmp_ic(OnigEncoding enc, int case_fold_flag, - UChar* s1, UChar** ps2, int mblen, const UChar* text_end) -{ - UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN]; - UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN]; - UChar *p1, *p2, *end1, *s2; - int len1, len2; - - s2 = *ps2; - end1 = s1 + mblen; - while (s1 < end1) { - len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, text_end, buf1); - len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, text_end, buf2); - if (len1 != len2) return 0; - p1 = buf1; - p2 = buf2; - while (len1-- > 0) { - if (*p1 != *p2) return 0; - p1++; - p2++; - } - } - - *ps2 = s2; - return 1; -} - -#define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\ - is_fail = 0;\ - while (len-- > 0) {\ - if (*s1++ != *s2++) {\ - is_fail = 1; break;\ - }\ - }\ -} while(0) - -#define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,text_end,is_fail) do {\ - if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \ - is_fail = 1; \ - else \ - is_fail = 0; \ -} while(0) - - -#define IS_EMPTY_STR (str == end) -#define ON_STR_BEGIN(s) ((s) == str) -#define ON_STR_END(s) ((s) == end) -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE -#define DATA_ENSURE_CHECK1 (s < right_range) -#define DATA_ENSURE_CHECK(n) (s + (n) <= right_range) -#define DATA_ENSURE(n) if (s + (n) > right_range) goto fail -#else -#define DATA_ENSURE_CHECK1 (s < end) -#define DATA_ENSURE_CHECK(n) (s + (n) <= end) -#define DATA_ENSURE(n) if (s + (n) > end) goto fail -#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ - - -#ifdef USE_CAPTURE_HISTORY -static int -make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp, - OnigStackType* stk_top, UChar* str, regex_t* reg) -{ - int n, r; - OnigCaptureTreeNode* child; - OnigStackType* k = *kp; - - while (k < stk_top) { - if (k->type == STK_MEM_START) { - n = k->u.mem.num; - if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP && - BIT_STATUS_AT(reg->capture_history, n) != 0) { - child = history_node_new(); - CHECK_NULL_RETURN_MEMERR(child); - child->group = n; - child->beg = (int )(k->u.mem.pstr - str); - r = history_tree_add_child(node, child); - if (r != 0) return r; - *kp = (k + 1); - r = make_capture_history_tree(child, kp, stk_top, str, reg); - if (r != 0) return r; - - k = *kp; - child->end = (int )(k->u.mem.pstr - str); - } - } - else if (k->type == STK_MEM_END) { - if (k->u.mem.num == node->group) { - node->end = (int )(k->u.mem.pstr - str); - *kp = k; - return 0; - } - } - k++; - } - - return 1; /* 1: root node ending. */ -} -#endif - -#ifdef USE_BACKREF_WITH_LEVEL -static int mem_is_in_memp(int mem, int num, UChar* memp) -{ - int i; - MemNumType m; - - for (i = 0; i < num; i++) { - GET_MEMNUM_INC(m, memp); - if (mem == (int )m) return 1; - } - return 0; -} - -static int backref_match_at_nested_level(regex_t* reg - , OnigStackType* top, OnigStackType* stk_base - , int ignore_case, int case_fold_flag - , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send) -{ - UChar *ss, *p, *pstart, *pend = NULL_UCHARP; - int level; - OnigStackType* k; - - level = 0; - k = top; - k--; - while (k >= stk_base) { - if (k->type == STK_CALL_FRAME) { - level--; - } - else if (k->type == STK_RETURN) { - level++; - } - else if (level == nest) { - if (k->type == STK_MEM_START) { - if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { - pstart = k->u.mem.pstr; - if (pend != NULL_UCHARP) { - if (pend - pstart > send - *s) return 0; /* or goto next_mem; */ - p = pstart; - ss = *s; - - if (ignore_case != 0) { - if (string_cmp_ic(reg->enc, case_fold_flag, - pstart, &ss, (int )(pend - pstart), send) == 0) - return 0; /* or goto next_mem; */ - } - else { - while (p < pend) { - if (*p++ != *ss++) return 0; /* or goto next_mem; */ - } - } - - *s = ss; - return 1; - } - } - } - else if (k->type == STK_MEM_END) { - if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { - pend = k->u.mem.pstr; - } - } - } - k--; - } - - return 0; -} -#endif /* USE_BACKREF_WITH_LEVEL */ - - -#ifdef ONIG_DEBUG_STATISTICS - -#define USE_TIMEOFDAY - -#ifdef USE_TIMEOFDAY -#ifdef HAVE_SYS_TIME_H -#include <sys/time.h> -#endif -#ifdef HAVE_UNISTD_H -#include <unistd.h> -#endif -static struct timeval ts, te; -#define GETTIME(t) gettimeofday(&(t), (struct timezone* )0) -#define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \ - (((te).tv_sec - (ts).tv_sec)*1000000)) -#else -#ifdef HAVE_SYS_TIMES_H -#include <sys/times.h> -#endif -static struct tms ts, te; -#define GETTIME(t) times(&(t)) -#define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime) -#endif - -static int OpCounter[256]; -static int OpPrevCounter[256]; -static unsigned long OpTime[256]; -static int OpCurr = OP_FINISH; -static int OpPrevTarget = OP_FAIL; -static int MaxStackDepth = 0; - -#define MOP_IN(opcode) do {\ - if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\ - OpCurr = opcode;\ - OpCounter[opcode]++;\ - GETTIME(ts);\ -} while(0) - -#define MOP_OUT do {\ - GETTIME(te);\ - OpTime[OpCurr] += TIMEDIFF(te, ts);\ -} while(0) - -extern void -onig_statistics_init(void) -{ - int i; - for (i = 0; i < 256; i++) { - OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0; - } - MaxStackDepth = 0; -} - -extern void -onig_print_statistics(FILE* f) -{ - int i; - fprintf(f, " count prev time\n"); - for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { - fprintf(f, "%8d: %8d: %10ld: %s\n", - OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name); - } - fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth); -} - -#define STACK_INC do {\ - stk++;\ - if (stk - stk_base > MaxStackDepth) \ - MaxStackDepth = stk - stk_base;\ -} while(0) - -#else -#define STACK_INC stk++ - -#define MOP_IN(opcode) -#define MOP_OUT -#endif - - -/* matching region of POSIX API */ -typedef int regoff_t; - -typedef struct { - regoff_t rm_so; - regoff_t rm_eo; -} posix_regmatch_t; - -/* match data(str - end) from position (sstart). */ -/* if sstart == str then set sprev to NULL. */ -static long -match_at(regex_t* reg, const UChar* str, const UChar* end, -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE - const UChar* right_range, -#endif - const UChar* sstart, UChar* sprev, OnigMatchArg* msa) -{ - static const UChar FinishCode[] = { OP_FINISH }; - - int i, n, num_mem, best_len, pop_level; - LengthType tlen, tlen2; - MemNumType mem; - RelAddrType addr; - OnigOptionType option = reg->options; - OnigEncoding encode = reg->enc; - OnigCaseFoldType case_fold_flag = reg->case_fold_flag; - UChar *s, *q, *sbegin; - UChar *p = reg->p; - char *alloca_base; - OnigStackType *stk_alloc, *stk_base, *stk, *stk_end; - OnigStackType *stkp; /* used as any purpose. */ - OnigStackIndex si; - OnigStackIndex *repeat_stk; - OnigStackIndex *mem_start_stk, *mem_end_stk; -#ifdef USE_COMBINATION_EXPLOSION_CHECK - int scv; - unsigned char* state_check_buff = msa->state_check_buff; - int num_comb_exp_check = reg->num_comb_exp_check; -#endif - n = reg->num_repeat + reg->num_mem * 2; - - STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE); - pop_level = reg->stack_pop_level; - num_mem = reg->num_mem; - repeat_stk = (OnigStackIndex* )alloca_base; - - mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat); - mem_end_stk = mem_start_stk + num_mem; - mem_start_stk--; /* for index start from 1, - mem_start_stk[1]..mem_start_stk[num_mem] */ - mem_end_stk--; /* for index start from 1, - mem_end_stk[1]..mem_end_stk[num_mem] */ - for (i = 1; i <= num_mem; i++) { - mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX; - } - -#ifdef ONIG_DEBUG_MATCH - fprintf(stderr, "match_at: str: %d, end: %d, start: %d, sprev: %d\n", - (int )str, (int )end, (int )sstart, (int )sprev); - fprintf(stderr, "size: %d, start offset: %d\n", - (int )(end - str), (int )(sstart - str)); -#endif - - STACK_PUSH_ENSURED(STK_ALT, (UChar*)FinishCode); /* bottom stack */ - best_len = ONIG_MISMATCH; - s = (UChar*)sstart; - while (1) { -#ifdef ONIG_DEBUG_MATCH - if (s) { - UChar *q, *bp, buf[50]; - int len; - fprintf(stderr, "%4d> \"", (int )(s - str)); - bp = buf; - for (i = 0, q = s; i < 7 && q < end; i++) { - len = enclen(encode, q, end); - while (len-- > 0) *bp++ = *q++; - } - if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; } - else { xmemcpy(bp, "\"", 1); bp += 1; } - *bp = 0; - fputs((char* )buf, stderr); - for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); - onig_print_compiled_byte_code(stderr, p, NULL, encode); - fprintf(stderr, "\n"); - } -#endif - - sbegin = s; - switch (*p++) { - case OP_END: MOP_IN(OP_END); - n = s - sstart; - if (n > best_len) { - OnigRegion* region; -#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE - if (IS_FIND_LONGEST(option)) { - if (n > msa->best_len) { - msa->best_len = n; - msa->best_s = (UChar* )sstart; - } - else - goto end_best_len; - } -#endif - best_len = n; - region = msa->region; - if (region) { - region->beg[0] = sstart - str; - region->end[0] = s - str; - for (i = 1; i <= num_mem; i++) { - if (mem_end_stk[i] != INVALID_STACK_INDEX) { - if (BIT_STATUS_AT(reg->bt_mem_start, i)) - region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str; - else - region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str; - - region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i) - ? STACK_AT(mem_end_stk[i])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[i])) - str; - } - else { - region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS; - } - } - -#ifdef USE_CAPTURE_HISTORY - if (reg->capture_history != 0) { - int r; - OnigCaptureTreeNode* node; - - if (IS_NULL(region->history_root)) { - region->history_root = node = history_node_new(); - CHECK_NULL_RETURN_MEMERR(node); - } - else { - node = region->history_root; - history_tree_clear(node); - } - - node->group = 0; - node->beg = sstart - str; - node->end = s - str; - - stkp = stk_base; - r = make_capture_history_tree(region->history_root, &stkp, - stk, (UChar* )str, reg); - if (r < 0) { - best_len = r; /* error code */ - goto finish; - } - } -#endif /* USE_CAPTURE_HISTORY */ - } /* if (region) */ - } /* n > best_len */ - -#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE - end_best_len: -#endif - MOP_OUT; - - if (IS_FIND_CONDITION(option)) { - if (IS_FIND_NOT_EMPTY(option) && s == sstart) { - best_len = ONIG_MISMATCH; - goto fail; /* for retry */ - } - if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) { - goto fail; /* for retry */ - } - } - - /* default behavior: return first-matching result. */ - goto finish; - break; - - case OP_EXACT1: MOP_IN(OP_EXACT1); - if (*p != *s++) goto fail; - DATA_ENSURE(0); - p++; - MOP_OUT; - break; - - case OP_EXACT1_IC: MOP_IN(OP_EXACT1_IC); - { - int len; - UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; - - DATA_ENSURE(1); - len = ONIGENC_MBC_CASE_FOLD(encode, - /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ - case_fold_flag, - &s, end, lowbuf); - DATA_ENSURE(0); - q = lowbuf; - while (len-- > 0) { - if (*p != *q) { - goto fail; - } - p++; q++; - } - } - MOP_OUT; - break; - - case OP_EXACT2: MOP_IN(OP_EXACT2); - DATA_ENSURE(2); - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - sprev = s; - p++; s++; - MOP_OUT; - continue; - break; - - case OP_EXACT3: MOP_IN(OP_EXACT3); - DATA_ENSURE(3); - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - sprev = s; - p++; s++; - MOP_OUT; - continue; - break; - - case OP_EXACT4: MOP_IN(OP_EXACT4); - DATA_ENSURE(4); - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - sprev = s; - p++; s++; - MOP_OUT; - continue; - break; - - case OP_EXACT5: MOP_IN(OP_EXACT5); - DATA_ENSURE(5); - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - sprev = s; - p++; s++; - MOP_OUT; - continue; - break; - - case OP_EXACTN: MOP_IN(OP_EXACTN); - GET_LENGTH_INC(tlen, p); - DATA_ENSURE(tlen); - while (tlen-- > 0) { - if (*p++ != *s++) goto fail; - } - sprev = s - 1; - MOP_OUT; - continue; - break; - - case OP_EXACTN_IC: MOP_IN(OP_EXACTN_IC); - { - int len; - UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; - - GET_LENGTH_INC(tlen, p); - endp = p + tlen; - - while (p < endp) { - sprev = s; - DATA_ENSURE(1); - len = ONIGENC_MBC_CASE_FOLD(encode, - /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */ - case_fold_flag, - &s, end, lowbuf); - DATA_ENSURE(0); - q = lowbuf; - while (len-- > 0) { - if (*p != *q) goto fail; - p++; q++; - } - } - } - - MOP_OUT; - continue; - break; - - case OP_EXACTMB2N1: MOP_IN(OP_EXACTMB2N1); - DATA_ENSURE(2); - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - MOP_OUT; - break; - - case OP_EXACTMB2N2: MOP_IN(OP_EXACTMB2N2); - DATA_ENSURE(4); - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - sprev = s; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - MOP_OUT; - continue; - break; - - case OP_EXACTMB2N3: MOP_IN(OP_EXACTMB2N3); - DATA_ENSURE(6); - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - sprev = s; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - MOP_OUT; - continue; - break; - - case OP_EXACTMB2N: MOP_IN(OP_EXACTMB2N); - GET_LENGTH_INC(tlen, p); - DATA_ENSURE(tlen * 2); - while (tlen-- > 0) { - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - } - sprev = s - 2; - MOP_OUT; - continue; - break; - - case OP_EXACTMB3N: MOP_IN(OP_EXACTMB3N); - GET_LENGTH_INC(tlen, p); - DATA_ENSURE(tlen * 3); - while (tlen-- > 0) { - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - if (*p != *s) goto fail; - p++; s++; - } - sprev = s - 3; - MOP_OUT; - continue; - break; - - case OP_EXACTMBN: MOP_IN(OP_EXACTMBN); - GET_LENGTH_INC(tlen, p); /* mb-len */ - GET_LENGTH_INC(tlen2, p); /* string len */ - tlen2 *= tlen; - DATA_ENSURE(tlen2); - while (tlen2-- > 0) { - if (*p != *s) goto fail; - p++; s++; - } - sprev = s - tlen; - MOP_OUT; - continue; - break; - - case OP_CCLASS: MOP_IN(OP_CCLASS); - DATA_ENSURE(1); - if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail; - p += SIZE_BITSET; - s += enclen(encode, s, end); /* OP_CCLASS can match mb-code. \D, \S */ - MOP_OUT; - break; - - case OP_CCLASS_MB: MOP_IN(OP_CCLASS_MB); - if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) goto fail; - - cclass_mb: - GET_LENGTH_INC(tlen, p); - { - OnigCodePoint code; - UChar *ss; - int mb_len; - - DATA_ENSURE(1); - mb_len = enclen(encode, s, end); - DATA_ENSURE(mb_len); - ss = s; - s += mb_len; - code = ONIGENC_MBC_TO_CODE(encode, ss, s); - -#ifdef PLATFORM_UNALIGNED_WORD_ACCESS - if (! onig_is_in_code_range(p, code)) goto fail; -#else - q = p; - ALIGNMENT_RIGHT(q); - if (! onig_is_in_code_range(q, code)) goto fail; -#endif - } - p += tlen; - MOP_OUT; - break; - - case OP_CCLASS_MIX: MOP_IN(OP_CCLASS_MIX); - DATA_ENSURE(1); - if (ONIGENC_IS_MBC_HEAD(encode, s, end)) { - p += SIZE_BITSET; - goto cclass_mb; - } - else { - if (BITSET_AT(((BitSetRef )p), *s) == 0) - goto fail; - - p += SIZE_BITSET; - GET_LENGTH_INC(tlen, p); - p += tlen; - s++; - } - MOP_OUT; - break; - - case OP_CCLASS_NOT: MOP_IN(OP_CCLASS_NOT); - DATA_ENSURE(1); - if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail; - p += SIZE_BITSET; - s += enclen(encode, s, end); - MOP_OUT; - break; - - case OP_CCLASS_MB_NOT: MOP_IN(OP_CCLASS_MB_NOT); - DATA_ENSURE(1); - if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) { - s++; - GET_LENGTH_INC(tlen, p); - p += tlen; - goto cc_mb_not_success; - } - - cclass_mb_not: - GET_LENGTH_INC(tlen, p); - { - OnigCodePoint code; - UChar *ss; - int mb_len = enclen(encode, s, end); - - if (! DATA_ENSURE_CHECK(mb_len)) { - DATA_ENSURE(1); - s = (UChar* )end; - p += tlen; - goto cc_mb_not_success; - } - - ss = s; - s += mb_len; - code = ONIGENC_MBC_TO_CODE(encode, ss, s); - -#ifdef PLATFORM_UNALIGNED_WORD_ACCESS - if (onig_is_in_code_range(p, code)) goto fail; -#else - q = p; - ALIGNMENT_RIGHT(q); - if (onig_is_in_code_range(q, code)) goto fail; -#endif - } - p += tlen; - - cc_mb_not_success: - MOP_OUT; - break; - - case OP_CCLASS_MIX_NOT: MOP_IN(OP_CCLASS_MIX_NOT); - DATA_ENSURE(1); - if (ONIGENC_IS_MBC_HEAD(encode, s, end)) { - p += SIZE_BITSET; - goto cclass_mb_not; - } - else { - if (BITSET_AT(((BitSetRef )p), *s) != 0) - goto fail; - - p += SIZE_BITSET; - GET_LENGTH_INC(tlen, p); - p += tlen; - s++; - } - MOP_OUT; - break; - - case OP_CCLASS_NODE: MOP_IN(OP_CCLASS_NODE); - { - OnigCodePoint code; - void *node; - int mb_len; - UChar *ss; - - DATA_ENSURE(1); - GET_POINTER_INC(node, p); - mb_len = enclen(encode, s, end); - ss = s; - s += mb_len; - DATA_ENSURE(0); - code = ONIGENC_MBC_TO_CODE(encode, ss, s); - if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail; - } - MOP_OUT; - break; - - case OP_ANYCHAR: MOP_IN(OP_ANYCHAR); - DATA_ENSURE(1); - n = enclen(encode, s, end); - DATA_ENSURE(n); - if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; - s += n; - MOP_OUT; - break; - - case OP_ANYCHAR_ML: MOP_IN(OP_ANYCHAR_ML); - DATA_ENSURE(1); - n = enclen(encode, s, end); - DATA_ENSURE(n); - s += n; - MOP_OUT; - break; - - case OP_ANYCHAR_STAR: MOP_IN(OP_ANYCHAR_STAR); - while (DATA_ENSURE_CHECK1) { - STACK_PUSH_ALT(p, s, sprev); - n = enclen(encode, s, end); - DATA_ENSURE(n); - if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; - sprev = s; - s += n; - } - MOP_OUT; - break; - - case OP_ANYCHAR_ML_STAR: MOP_IN(OP_ANYCHAR_ML_STAR); - while (DATA_ENSURE_CHECK1) { - STACK_PUSH_ALT(p, s, sprev); - n = enclen(encode, s, end); - if (n > 1) { - DATA_ENSURE(n); - sprev = s; - s += n; - } - else { - sprev = s; - s++; - } - } - MOP_OUT; - break; - - case OP_ANYCHAR_STAR_PEEK_NEXT: MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); - while (DATA_ENSURE_CHECK1) { - if (*p == *s) { - STACK_PUSH_ALT(p + 1, s, sprev); - } - n = enclen(encode, s, end); - DATA_ENSURE(n); - if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; - sprev = s; - s += n; - } - p++; - MOP_OUT; - break; - - case OP_ANYCHAR_ML_STAR_PEEK_NEXT:MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); - while (DATA_ENSURE_CHECK1) { - if (*p == *s) { - STACK_PUSH_ALT(p + 1, s, sprev); - } - n = enclen(encode, s, end); - if (n > 1) { - DATA_ENSURE(n); - sprev = s; - s += n; - } - else { - sprev = s; - s++; - } - } - p++; - MOP_OUT; - break; - -#ifdef USE_COMBINATION_EXPLOSION_CHECK - case OP_STATE_CHECK_ANYCHAR_STAR: MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR); - GET_STATE_CHECK_NUM_INC(mem, p); - while (DATA_ENSURE_CHECK1) { - STATE_CHECK_VAL(scv, mem); - if (scv) goto fail; - - STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem); - n = enclen(encode, s, end); - DATA_ENSURE(n); - if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; - sprev = s; - s += n; - } - MOP_OUT; - break; - - case OP_STATE_CHECK_ANYCHAR_ML_STAR: - MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR); - - GET_STATE_CHECK_NUM_INC(mem, p); - while (DATA_ENSURE_CHECK1) { - STATE_CHECK_VAL(scv, mem); - if (scv) goto fail; - - STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem); - n = enclen(encode, s, end); - if (n > 1) { - DATA_ENSURE(n); - sprev = s; - s += n; - } - else { - sprev = s; - s++; - } - } - MOP_OUT; - break; -#endif /* USE_COMBINATION_EXPLOSION_CHECK */ - - case OP_WORD: MOP_IN(OP_WORD); - DATA_ENSURE(1); - if (! ONIGENC_IS_MBC_WORD(encode, s, end)) - goto fail; - - s += enclen(encode, s, end); - MOP_OUT; - break; - - case OP_NOT_WORD: MOP_IN(OP_NOT_WORD); - DATA_ENSURE(1); - if (ONIGENC_IS_MBC_WORD(encode, s, end)) - goto fail; - - s += enclen(encode, s, end); - MOP_OUT; - break; - - case OP_WORD_BOUND: MOP_IN(OP_WORD_BOUND); - if (ON_STR_BEGIN(s)) { - DATA_ENSURE(1); - if (! ONIGENC_IS_MBC_WORD(encode, s, end)) - goto fail; - } - else if (ON_STR_END(s)) { - if (! ONIGENC_IS_MBC_WORD(encode, sprev, end)) - goto fail; - } - else { - if (ONIGENC_IS_MBC_WORD(encode, s, end) - == ONIGENC_IS_MBC_WORD(encode, sprev, end)) - goto fail; - } - MOP_OUT; - continue; - break; - - case OP_NOT_WORD_BOUND: MOP_IN(OP_NOT_WORD_BOUND); - if (ON_STR_BEGIN(s)) { - if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) - goto fail; - } - else if (ON_STR_END(s)) { - if (ONIGENC_IS_MBC_WORD(encode, sprev, end)) - goto fail; - } - else { - if (ONIGENC_IS_MBC_WORD(encode, s, end) - != ONIGENC_IS_MBC_WORD(encode, sprev, end)) - goto fail; - } - MOP_OUT; - continue; - break; - -#ifdef USE_WORD_BEGIN_END - case OP_WORD_BEGIN: MOP_IN(OP_WORD_BEGIN); - if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) { - if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) { - MOP_OUT; - continue; - } - } - goto fail; - break; - - case OP_WORD_END: MOP_IN(OP_WORD_END); - if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) { - if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) { - MOP_OUT; - continue; - } - } - goto fail; - break; -#endif - - case OP_BEGIN_BUF: MOP_IN(OP_BEGIN_BUF); - if (! ON_STR_BEGIN(s)) goto fail; - - MOP_OUT; - continue; - break; - - case OP_END_BUF: MOP_IN(OP_END_BUF); - if (! ON_STR_END(s)) goto fail; - - MOP_OUT; - continue; - break; - - case OP_BEGIN_LINE: MOP_IN(OP_BEGIN_LINE); - if (ON_STR_BEGIN(s)) { - if (IS_NOTBOL(msa->options)) goto fail; - MOP_OUT; - continue; - } - else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) { - MOP_OUT; - continue; - } - goto fail; - break; - - case OP_END_LINE: MOP_IN(OP_END_LINE); - if (ON_STR_END(s)) { -#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { -#endif - if (IS_NOTEOL(msa->options)) goto fail; - MOP_OUT; - continue; -#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - } -#endif - } - else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) { - MOP_OUT; - continue; - } -#ifdef USE_CRNL_AS_LINE_TERMINATOR - else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { - MOP_OUT; - continue; - } -#endif - goto fail; - break; - - case OP_SEMI_END_BUF: MOP_IN(OP_SEMI_END_BUF); - if (ON_STR_END(s)) { -#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { -#endif - if (IS_NOTEOL(msa->options)) goto fail; - MOP_OUT; - continue; -#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - } -#endif - } - else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) && - ON_STR_END(s + enclen(encode, s, end))) { - MOP_OUT; - continue; - } -#ifdef USE_CRNL_AS_LINE_TERMINATOR - else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { - UChar* ss = s + enclen(encode, s); - ss += enclen(encode, ss); - if (ON_STR_END(ss)) { - MOP_OUT; - continue; - } - } -#endif - goto fail; - break; - - case OP_BEGIN_POSITION: MOP_IN(OP_BEGIN_POSITION); - if (s != msa->start) - goto fail; - - MOP_OUT; - continue; - break; - - case OP_MEMORY_START_PUSH: MOP_IN(OP_MEMORY_START_PUSH); - GET_MEMNUM_INC(mem, p); - STACK_PUSH_MEM_START(mem, s); - MOP_OUT; - continue; - break; - - case OP_MEMORY_START: MOP_IN(OP_MEMORY_START); - GET_MEMNUM_INC(mem, p); - mem_start_stk[mem] = (OnigStackIndex )((void* )s); - MOP_OUT; - continue; - break; - - case OP_MEMORY_END_PUSH: MOP_IN(OP_MEMORY_END_PUSH); - GET_MEMNUM_INC(mem, p); - STACK_PUSH_MEM_END(mem, s); - MOP_OUT; - continue; - break; - - case OP_MEMORY_END: MOP_IN(OP_MEMORY_END); - GET_MEMNUM_INC(mem, p); - mem_end_stk[mem] = (OnigStackIndex )((void* )s); - MOP_OUT; - continue; - break; - -#ifdef USE_SUBEXP_CALL - case OP_MEMORY_END_PUSH_REC: MOP_IN(OP_MEMORY_END_PUSH_REC); - GET_MEMNUM_INC(mem, p); - STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */ - STACK_PUSH_MEM_END(mem, s); - mem_start_stk[mem] = GET_STACK_INDEX(stkp); - MOP_OUT; - continue; - break; - - case OP_MEMORY_END_REC: MOP_IN(OP_MEMORY_END_REC); - GET_MEMNUM_INC(mem, p); - mem_end_stk[mem] = (OnigStackIndex )((void* )s); - STACK_GET_MEM_START(mem, stkp); - - if (BIT_STATUS_AT(reg->bt_mem_start, mem)) - mem_start_stk[mem] = GET_STACK_INDEX(stkp); - else - mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr); - - STACK_PUSH_MEM_END_MARK(mem); - MOP_OUT; - continue; - break; -#endif - - case OP_BACKREF1: MOP_IN(OP_BACKREF1); - mem = 1; - goto backref; - break; - - case OP_BACKREF2: MOP_IN(OP_BACKREF2); - mem = 2; - goto backref; - break; - - case OP_BACKREFN: MOP_IN(OP_BACKREFN); - GET_MEMNUM_INC(mem, p); - backref: - { - int len; - UChar *pstart, *pend; - - /* if you want to remove following line, - you should check in parse and compile time. */ - if (mem > num_mem) goto fail; - if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; - if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; - - if (BIT_STATUS_AT(reg->bt_mem_start, mem)) - pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; - else - pstart = (UChar* )((void* )mem_start_stk[mem]); - - pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) - ? STACK_AT(mem_end_stk[mem])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[mem])); - n = pend - pstart; - DATA_ENSURE(n); - sprev = s; - STRING_CMP(pstart, s, n); - while (sprev + (len = enclen(encode, sprev, end)) < s) - sprev += len; - - MOP_OUT; - continue; - } - break; - - case OP_BACKREFN_IC: MOP_IN(OP_BACKREFN_IC); - GET_MEMNUM_INC(mem, p); - { - int len; - UChar *pstart, *pend; - - /* if you want to remove following line, - you should check in parse and compile time. */ - if (mem > num_mem) goto fail; - if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; - if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; - - if (BIT_STATUS_AT(reg->bt_mem_start, mem)) - pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; - else - pstart = (UChar* )((void* )mem_start_stk[mem]); - - pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) - ? STACK_AT(mem_end_stk[mem])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[mem])); - n = pend - pstart; - DATA_ENSURE(n); - sprev = s; - STRING_CMP_IC(case_fold_flag, pstart, &s, n, end); - while (sprev + (len = enclen(encode, sprev, end)) < s) - sprev += len; - - MOP_OUT; - continue; - } - break; - - case OP_BACKREF_MULTI: MOP_IN(OP_BACKREF_MULTI); - { - int len, is_fail; - UChar *pstart, *pend, *swork; - - GET_LENGTH_INC(tlen, p); - for (i = 0; i < tlen; i++) { - GET_MEMNUM_INC(mem, p); - - if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; - if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; - - if (BIT_STATUS_AT(reg->bt_mem_start, mem)) - pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; - else - pstart = (UChar* )((void* )mem_start_stk[mem]); - - pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) - ? STACK_AT(mem_end_stk[mem])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[mem])); - n = pend - pstart; - DATA_ENSURE(n); - sprev = s; - swork = s; - STRING_CMP_VALUE(pstart, swork, n, is_fail); - if (is_fail) continue; - s = swork; - while (sprev + (len = enclen(encode, sprev, end)) < s) - sprev += len; - - p += (SIZE_MEMNUM * (tlen - i - 1)); - break; /* success */ - } - if (i == tlen) goto fail; - MOP_OUT; - continue; - } - break; - - case OP_BACKREF_MULTI_IC: MOP_IN(OP_BACKREF_MULTI_IC); - { - int len, is_fail; - UChar *pstart, *pend, *swork; - - GET_LENGTH_INC(tlen, p); - for (i = 0; i < tlen; i++) { - GET_MEMNUM_INC(mem, p); - - if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; - if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; - - if (BIT_STATUS_AT(reg->bt_mem_start, mem)) - pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; - else - pstart = (UChar* )((void* )mem_start_stk[mem]); - - pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) - ? STACK_AT(mem_end_stk[mem])->u.mem.pstr - : (UChar* )((void* )mem_end_stk[mem])); - n = pend - pstart; - DATA_ENSURE(n); - sprev = s; - swork = s; - STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, end, is_fail); - if (is_fail) continue; - s = swork; - while (sprev + (len = enclen(encode, sprev, end)) < s) - sprev += len; - - p += (SIZE_MEMNUM * (tlen - i - 1)); - break; /* success */ - } - if (i == tlen) goto fail; - MOP_OUT; - continue; - } - break; - -#ifdef USE_BACKREF_WITH_LEVEL - case OP_BACKREF_WITH_LEVEL: - { - int len; - OnigOptionType ic; - LengthType level; - - GET_OPTION_INC(ic, p); - GET_LENGTH_INC(level, p); - GET_LENGTH_INC(tlen, p); - - sprev = s; - if (backref_match_at_nested_level(reg, stk, stk_base, ic - , case_fold_flag, (int )level, (int )tlen, p, &s, end)) { - while (sprev + (len = enclen(encode, sprev, end)) < s) - sprev += len; - - p += (SIZE_MEMNUM * tlen); - } - else - goto fail; - - MOP_OUT; - continue; - } - - break; -#endif - - case OP_NULL_CHECK_START: MOP_IN(OP_NULL_CHECK_START); - GET_MEMNUM_INC(mem, p); /* mem: null check id */ - STACK_PUSH_NULL_CHECK_START(mem, s); - MOP_OUT; - continue; - break; - - case OP_NULL_CHECK_END: MOP_IN(OP_NULL_CHECK_END); - { - int isnull; - - GET_MEMNUM_INC(mem, p); /* mem: null check id */ - STACK_NULL_CHECK(isnull, mem, s); - if (isnull) { -#ifdef ONIG_DEBUG_MATCH - fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%d\n", - (int )mem, (int )s); -#endif - null_check_found: - /* empty loop founded, skip next instruction */ - switch (*p++) { - case OP_JUMP: - case OP_PUSH: - p += SIZE_RELADDR; - break; - case OP_REPEAT_INC: - case OP_REPEAT_INC_NG: - case OP_REPEAT_INC_SG: - case OP_REPEAT_INC_NG_SG: - p += SIZE_MEMNUM; - break; - default: - goto unexpected_bytecode_error; - break; - } - } - } - MOP_OUT; - continue; - break; - -#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT - case OP_NULL_CHECK_END_MEMST: MOP_IN(OP_NULL_CHECK_END_MEMST); - { - int isnull; - - GET_MEMNUM_INC(mem, p); /* mem: null check id */ - STACK_NULL_CHECK_MEMST(isnull, mem, s, reg); - if (isnull) { -#ifdef ONIG_DEBUG_MATCH - fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%d\n", - (int )mem, (int )s); -#endif - if (isnull == -1) goto fail; - goto null_check_found; - } - } - MOP_OUT; - continue; - break; -#endif - -#ifdef USE_SUBEXP_CALL - case OP_NULL_CHECK_END_MEMST_PUSH: - MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH); - { - int isnull; - - GET_MEMNUM_INC(mem, p); /* mem: null check id */ -#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT - STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg); -#else - STACK_NULL_CHECK_REC(isnull, mem, s); -#endif - if (isnull) { -#ifdef ONIG_DEBUG_MATCH - fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%d\n", - (int )mem, (int )s); -#endif - if (isnull == -1) goto fail; - goto null_check_found; - } - else { - STACK_PUSH_NULL_CHECK_END(mem); - } - } - MOP_OUT; - continue; - break; -#endif - - case OP_JUMP: MOP_IN(OP_JUMP); - GET_RELADDR_INC(addr, p); - p += addr; - MOP_OUT; - CHECK_INTERRUPT_IN_MATCH_AT; - continue; - break; - - case OP_PUSH: MOP_IN(OP_PUSH); - GET_RELADDR_INC(addr, p); - STACK_PUSH_ALT(p + addr, s, sprev); - MOP_OUT; - continue; - break; - -#ifdef USE_COMBINATION_EXPLOSION_CHECK - case OP_STATE_CHECK_PUSH: MOP_IN(OP_STATE_CHECK_PUSH); - GET_STATE_CHECK_NUM_INC(mem, p); - STATE_CHECK_VAL(scv, mem); - if (scv) goto fail; - - GET_RELADDR_INC(addr, p); - STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem); - MOP_OUT; - continue; - break; - - case OP_STATE_CHECK_PUSH_OR_JUMP: MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP); - GET_STATE_CHECK_NUM_INC(mem, p); - GET_RELADDR_INC(addr, p); - STATE_CHECK_VAL(scv, mem); - if (scv) { - p += addr; - } - else { - STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem); - } - MOP_OUT; - continue; - break; - - case OP_STATE_CHECK: MOP_IN(OP_STATE_CHECK); - GET_STATE_CHECK_NUM_INC(mem, p); - STATE_CHECK_VAL(scv, mem); - if (scv) goto fail; - - STACK_PUSH_STATE_CHECK(s, mem); - MOP_OUT; - continue; - break; -#endif /* USE_COMBINATION_EXPLOSION_CHECK */ - - case OP_POP: MOP_IN(OP_POP); - STACK_POP_ONE; - MOP_OUT; - continue; - break; - - case OP_PUSH_OR_JUMP_EXACT1: MOP_IN(OP_PUSH_OR_JUMP_EXACT1); - GET_RELADDR_INC(addr, p); - if (*p == *s && DATA_ENSURE_CHECK1) { - p++; - STACK_PUSH_ALT(p + addr, s, sprev); - MOP_OUT; - continue; - } - p += (addr + 1); - MOP_OUT; - continue; - break; - - case OP_PUSH_IF_PEEK_NEXT: MOP_IN(OP_PUSH_IF_PEEK_NEXT); - GET_RELADDR_INC(addr, p); - if (*p == *s) { - p++; - STACK_PUSH_ALT(p + addr, s, sprev); - MOP_OUT; - continue; - } - p++; - MOP_OUT; - continue; - break; - - case OP_REPEAT: MOP_IN(OP_REPEAT); - { - GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ - GET_RELADDR_INC(addr, p); - - STACK_ENSURE(1); - repeat_stk[mem] = GET_STACK_INDEX(stk); - STACK_PUSH_REPEAT(mem, p); - - if (reg->repeat_range[mem].lower == 0) { - STACK_PUSH_ALT(p + addr, s, sprev); - } - } - MOP_OUT; - continue; - break; - - case OP_REPEAT_NG: MOP_IN(OP_REPEAT_NG); - { - GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ - GET_RELADDR_INC(addr, p); - - STACK_ENSURE(1); - repeat_stk[mem] = GET_STACK_INDEX(stk); - STACK_PUSH_REPEAT(mem, p); - - if (reg->repeat_range[mem].lower == 0) { - STACK_PUSH_ALT(p, s, sprev); - p += addr; - } - } - MOP_OUT; - continue; - break; - - case OP_REPEAT_INC: MOP_IN(OP_REPEAT_INC); - GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ - si = repeat_stk[mem]; - stkp = STACK_AT(si); - - repeat_inc: - stkp->u.repeat.count++; - if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) { - /* end of repeat. Nothing to do. */ - } - else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { - STACK_PUSH_ALT(p, s, sprev); - p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */ - } - else { - p = stkp->u.repeat.pcode; - } - STACK_PUSH_REPEAT_INC(si); - MOP_OUT; - CHECK_INTERRUPT_IN_MATCH_AT; - continue; - break; - - case OP_REPEAT_INC_SG: MOP_IN(OP_REPEAT_INC_SG); - GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ - STACK_GET_REPEAT(mem, stkp); - si = GET_STACK_INDEX(stkp); - goto repeat_inc; - break; - - case OP_REPEAT_INC_NG: MOP_IN(OP_REPEAT_INC_NG); - GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ - si = repeat_stk[mem]; - stkp = STACK_AT(si); - - repeat_inc_ng: - stkp->u.repeat.count++; - if (stkp->u.repeat.count < reg->repeat_range[mem].upper) { - if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { - UChar* pcode = stkp->u.repeat.pcode; - - STACK_PUSH_REPEAT_INC(si); - STACK_PUSH_ALT(pcode, s, sprev); - } - else { - p = stkp->u.repeat.pcode; - STACK_PUSH_REPEAT_INC(si); - } - } - else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) { - STACK_PUSH_REPEAT_INC(si); - } - MOP_OUT; - CHECK_INTERRUPT_IN_MATCH_AT; - continue; - break; - - case OP_REPEAT_INC_NG_SG: MOP_IN(OP_REPEAT_INC_NG_SG); - GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ - STACK_GET_REPEAT(mem, stkp); - si = GET_STACK_INDEX(stkp); - goto repeat_inc_ng; - break; - - case OP_PUSH_POS: MOP_IN(OP_PUSH_POS); - STACK_PUSH_POS(s, sprev); - MOP_OUT; - continue; - break; - - case OP_POP_POS: MOP_IN(OP_POP_POS); - { - STACK_POS_END(stkp); - s = stkp->u.state.pstr; - sprev = stkp->u.state.pstr_prev; - } - MOP_OUT; - continue; - break; - - case OP_PUSH_POS_NOT: MOP_IN(OP_PUSH_POS_NOT); - GET_RELADDR_INC(addr, p); - STACK_PUSH_POS_NOT(p + addr, s, sprev); - MOP_OUT; - continue; - break; - - case OP_FAIL_POS: MOP_IN(OP_FAIL_POS); - STACK_POP_TIL_POS_NOT; - goto fail; - break; - - case OP_PUSH_STOP_BT: MOP_IN(OP_PUSH_STOP_BT); - STACK_PUSH_STOP_BT; - MOP_OUT; - continue; - break; - - case OP_POP_STOP_BT: MOP_IN(OP_POP_STOP_BT); - STACK_STOP_BT_END; - MOP_OUT; - continue; - break; - - case OP_LOOK_BEHIND: MOP_IN(OP_LOOK_BEHIND); - GET_LENGTH_INC(tlen, p); - s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen); - if (IS_NULL(s)) goto fail; - sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end); - MOP_OUT; - continue; - break; - - case OP_PUSH_LOOK_BEHIND_NOT: MOP_IN(OP_PUSH_LOOK_BEHIND_NOT); - GET_RELADDR_INC(addr, p); - GET_LENGTH_INC(tlen, p); - q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen); - if (IS_NULL(q)) { - /* too short case -> success. ex. /(?<!XXX)a/.match("a") - If you want to change to fail, replace following line. */ - p += addr; - /* goto fail; */ - } - else { - STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev); - s = q; - sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end); - } - MOP_OUT; - continue; - break; - - case OP_FAIL_LOOK_BEHIND_NOT: MOP_IN(OP_FAIL_LOOK_BEHIND_NOT); - STACK_POP_TIL_LOOK_BEHIND_NOT; - goto fail; - break; - -#ifdef USE_SUBEXP_CALL - case OP_CALL: MOP_IN(OP_CALL); - GET_ABSADDR_INC(addr, p); - STACK_PUSH_CALL_FRAME(p); - p = reg->p + addr; - MOP_OUT; - continue; - break; - - case OP_RETURN: MOP_IN(OP_RETURN); - STACK_RETURN(p); - STACK_PUSH_RETURN; - MOP_OUT; - continue; - break; -#endif - - case OP_FINISH: - goto finish; - break; - - fail: - MOP_OUT; - /* fall */ - case OP_FAIL: MOP_IN(OP_FAIL); - STACK_POP; - p = stk->u.state.pcode; - s = stk->u.state.pstr; - sprev = stk->u.state.pstr_prev; - -#ifdef USE_COMBINATION_EXPLOSION_CHECK - if (stk->u.state.state_check != 0) { - stk->type = STK_STATE_CHECK_MARK; - stk++; - } -#endif - - MOP_OUT; - continue; - break; - - default: - goto bytecode_error; - - } /* end of switch */ - sprev = sbegin; - } /* end of while(1) */ - - finish: - STACK_SAVE; - return best_len; - -#ifdef ONIG_DEBUG - stack_error: - STACK_SAVE; - return ONIGERR_STACK_BUG; -#endif - - bytecode_error: - STACK_SAVE; - return ONIGERR_UNDEFINED_BYTECODE; - - unexpected_bytecode_error: - STACK_SAVE; - return ONIGERR_UNEXPECTED_BYTECODE; -} - - -static UChar* -slow_search(OnigEncoding enc, UChar* target, UChar* target_end, - const UChar* text, const UChar* text_end, UChar* text_range) -{ - UChar *t, *p, *s, *end; - - end = (UChar* )text_end; - end -= target_end - target - 1; - if (end > text_range) - end = text_range; - - s = (UChar* )text; - - if (enc->max_enc_len == enc->min_enc_len) { - int n = enc->max_enc_len; - - while (s < end) { - if (*s == *target) { - p = s + 1; - t = target + 1; - if (target_end == t || memcmp(t, p, target_end - t) == 0) - return s; - } - s += n; - } - return (UChar*)NULL; - } - while (s < end) { - if (*s == *target) { - p = s + 1; - t = target + 1; - if (target_end == t || memcmp(t, p, target_end - t) == 0) - return s; - } - s += enclen(enc, s, text_end); - } - - return (UChar* )NULL; -} - -static int -str_lower_case_match(OnigEncoding enc, int case_fold_flag, - const UChar* t, const UChar* tend, - const UChar* p, const UChar* end) -{ - int lowlen; - UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; - - while (t < tend) { - lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf); - q = lowbuf; - while (lowlen > 0) { - if (*t++ != *q++) return 0; - lowlen--; - } - } - - return 1; -} - -static UChar* -slow_search_ic(OnigEncoding enc, int case_fold_flag, - UChar* target, UChar* target_end, - const UChar* text, const UChar* text_end, UChar* text_range) -{ - UChar *s, *end; - - end = (UChar* )text_end; - end -= target_end - target - 1; - if (end > text_range) - end = text_range; - - s = (UChar* )text; - - while (s < end) { - if (str_lower_case_match(enc, case_fold_flag, target, target_end, - s, text_end)) - return s; - - s += enclen(enc, s, text_end); - } - - return (UChar* )NULL; -} - -static UChar* -slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end, - const UChar* text, const UChar* adjust_text, - const UChar* text_end, const UChar* text_start) -{ - UChar *t, *p, *s; - - s = (UChar* )text_end; - s -= (target_end - target); - if (s > text_start) - s = (UChar* )text_start; - else - s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end); - - while (s >= text) { - if (*s == *target) { - p = s + 1; - t = target + 1; - while (t < target_end) { - if (*t != *p++) - break; - t++; - } - if (t == target_end) - return s; - } - s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end); - } - - return (UChar* )NULL; -} - -static UChar* -slow_search_backward_ic(OnigEncoding enc, int case_fold_flag, - UChar* target, UChar* target_end, - const UChar* text, const UChar* adjust_text, - const UChar* text_end, const UChar* text_start) -{ - UChar *s; - - s = (UChar* )text_end; - s -= (target_end - target); - if (s > text_start) - s = (UChar* )text_start; - else - s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end); - - while (s >= text) { - if (str_lower_case_match(enc, case_fold_flag, - target, target_end, s, text_end)) - return s; - - s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end); - } - - return (UChar* )NULL; -} - -static UChar* -bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* text_end, - const UChar* text_range) -{ - const UChar *s, *se, *t, *p, *end; - const UChar *tail; - int skip, tlen1; - -#ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "bm_search_notrev: text: %d, text_end: %d, text_range: %d\n", - (int )text, (int )text_end, (int )text_range); -#endif - - tail = target_end - 1; - tlen1 = tail - target; - end = text_range; - if (end + tlen1 > text_end) - end = text_end - tlen1; - - s = text; - - if (IS_NULL(reg->int_map)) { - while (s < end) { - p = se = s + tlen1; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )s; - p--; t--; - } - skip = reg->map[*se]; - t = s; - do { - s += enclen(reg->enc, s, end); - } while ((s - t) < skip && s < end); - } - } - else { - while (s < end) { - p = se = s + tlen1; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )s; - p--; t--; - } - skip = reg->int_map[*se]; - t = s; - do { - s += enclen(reg->enc, s, end); - } while ((s - t) < skip && s < end); - } - } - - return (UChar* )NULL; -} - -static UChar* -bm_search(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* text_end, const UChar* text_range) -{ - const UChar *s, *t, *p, *end; - const UChar *tail; - - end = text_range + (target_end - target) - 1; - if (end > text_end) - end = text_end; - - tail = target_end - 1; - s = text + (target_end - target) - 1; - if (IS_NULL(reg->int_map)) { - while (s < end) { - p = s; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )p; - p--; t--; - } - s += reg->map[*s]; - } - } - else { /* see int_map[] */ - while (s < end) { - p = s; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )p; - p--; t--; - } - s += reg->int_map[*s]; - } - } - return (UChar* )NULL; -} - -static int -set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, - int** skip) -{ - int i, len; - - if (IS_NULL(*skip)) { - *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); - if (IS_NULL(*skip)) return ONIGERR_MEMORY; - } - - len = end - s; - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) - (*skip)[i] = len; - - for (i = len - 1; i > 0; i--) - (*skip)[s[i]] = i; - - return 0; -} - -static UChar* -bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* adjust_text, - const UChar* text_end, const UChar* text_start) -{ - const UChar *s, *t, *p; - - s = text_end - (target_end - target); - if (text_start < s) - s = text_start; - else - s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end); - - while (s >= text) { - p = s; - t = target; - while (t < target_end && *p == *t) { - p++; t++; - } - if (t == target_end) - return (UChar* )s; - - s -= reg->int_map_backward[*s]; - s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end); - } - - return (UChar* )NULL; -} - -static UChar* -map_search(OnigEncoding enc, UChar map[], - const UChar* text, const UChar* text_range, const UChar* text_end) -{ - const UChar *s = text; - - while (s < text_range) { - if (map[*s]) return (UChar* )s; - - s += enclen(enc, s, text_end); - } - return (UChar* )NULL; -} - -static UChar* -map_search_backward(OnigEncoding enc, UChar map[], - const UChar* text, const UChar* adjust_text, - const UChar* text_start, const UChar* text_end) -{ - const UChar *s = text_start; - - while (s >= text) { - if (map[*s]) return (UChar* )s; - - s = onigenc_get_prev_char_head(enc, adjust_text, s, text_end); - } - return (UChar* )NULL; -} - -extern long -onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region, - OnigOptionType option) -{ - long r; - UChar *prev; - OnigMatchArg msa; - -#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) - start: - THREAD_ATOMIC_START; - if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) { - ONIG_STATE_INC(reg); - if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { - onig_chain_reduce(reg); - ONIG_STATE_INC(reg); - } - } - else { - int n; - - THREAD_ATOMIC_END; - n = 0; - while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) { - if (++n > THREAD_PASS_LIMIT_COUNT) - return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; - THREAD_PASS; - } - goto start; - } - THREAD_ATOMIC_END; -#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ - - MATCH_ARG_INIT(msa, option, region, at); -#ifdef USE_COMBINATION_EXPLOSION_CHECK - { - int offset = at - str; - STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); - } -#endif - - if (region - ) { - r = onig_region_resize_clear(region, reg->num_mem + 1); - } - else - r = 0; - - if (r == 0) { - prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at, end); - r = match_at(reg, str, end, -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE - end, -#endif - at, prev, &msa); - } - - MATCH_ARG_FREE(msa); - ONIG_STATE_DEC_THREAD(reg); - return r; -} - -static int -forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, - UChar* range, UChar** low, UChar** high, UChar** low_prev) -{ - UChar *p, *pprev = (UChar* )NULL; - -#ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "forward_search_range: str: %d, end: %d, s: %d, range: %d\n", - (int )str, (int )end, (int )s, (int )range); -#endif - - p = s; - if (reg->dmin > 0) { - if (ONIGENC_IS_SINGLEBYTE(reg->enc)) { - p += reg->dmin; - } - else { - UChar *q = p + reg->dmin; - while (p < q) p += enclen(reg->enc, p, end); - } - } - - retry: - switch (reg->optimize) { - case ONIG_OPTIMIZE_EXACT: - p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range); - break; - case ONIG_OPTIMIZE_EXACT_IC: - p = slow_search_ic(reg->enc, reg->case_fold_flag, - reg->exact, reg->exact_end, p, end, range); - break; - - case ONIG_OPTIMIZE_EXACT_BM: - p = bm_search(reg, reg->exact, reg->exact_end, p, end, range); - break; - - case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: - p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range); - break; - - case ONIG_OPTIMIZE_MAP: - p = map_search(reg->enc, reg->map, p, range, end); - break; - } - - if (p && p < range) { - if (p - reg->dmin < s) { - retry_gate: - pprev = p; - p += enclen(reg->enc, p, end); - goto retry; - } - - if (reg->sub_anchor) { - UChar* prev; - - switch (reg->sub_anchor) { - case ANCHOR_BEGIN_LINE: - if (!ON_STR_BEGIN(p)) { - prev = onigenc_get_prev_char_head(reg->enc, - (pprev ? pprev : str), p, end); - if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) - goto retry_gate; - } - break; - - case ANCHOR_END_LINE: - if (ON_STR_END(p)) { -#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - prev = (UChar* )onigenc_get_prev_char_head(reg->enc, - (pprev ? pprev : str), p); - if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) - goto retry_gate; -#endif - } - else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end) -#ifdef USE_CRNL_AS_LINE_TERMINATOR - && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end) -#endif - ) - goto retry_gate; - break; - } - } - - if (reg->dmax == 0) { - *low = p; - if (low_prev) { - if (*low > s) - *low_prev = onigenc_get_prev_char_head(reg->enc, s, p, end); - else - *low_prev = onigenc_get_prev_char_head(reg->enc, - (pprev ? pprev : str), p, end); - } - } - else { - if (reg->dmax != ONIG_INFINITE_DISTANCE) { - *low = p - reg->dmax; - if (*low > s) { - *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s, - *low, end, (const UChar** )low_prev); - if (low_prev && IS_NULL(*low_prev)) - *low_prev = onigenc_get_prev_char_head(reg->enc, - (pprev ? pprev : s), *low, end); - } - else { - if (low_prev) - *low_prev = onigenc_get_prev_char_head(reg->enc, - (pprev ? pprev : str), *low, end); - } - } - } - /* no needs to adjust *high, *high is used as range check only */ - *high = p - reg->dmin; - -#ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, - "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n", - (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax); -#endif - return 1; /* success */ - } - - return 0; /* fail */ -} - -#define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100 - -static long -backward_search_range(regex_t* reg, const UChar* str, const UChar* end, - UChar* s, const UChar* range, UChar* adjrange, - UChar** low, UChar** high) -{ - int r; - UChar *p; - - range += reg->dmin; - p = s; - - retry: - switch (reg->optimize) { - case ONIG_OPTIMIZE_EXACT: - exact_method: - p = slow_search_backward(reg->enc, reg->exact, reg->exact_end, - range, adjrange, end, p); - break; - - case ONIG_OPTIMIZE_EXACT_IC: - p = slow_search_backward_ic(reg->enc, reg->case_fold_flag, - reg->exact, reg->exact_end, - range, adjrange, end, p); - break; - - case ONIG_OPTIMIZE_EXACT_BM: - case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: - if (IS_NULL(reg->int_map_backward)) { - if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD) - goto exact_method; - - r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc, - &(reg->int_map_backward)); - if (r) return r; - } - p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange, - end, p); - break; - - case ONIG_OPTIMIZE_MAP: - p = map_search_backward(reg->enc, reg->map, range, adjrange, p, end); - break; - } - - if (p) { - if (reg->sub_anchor) { - UChar* prev; - - switch (reg->sub_anchor) { - case ANCHOR_BEGIN_LINE: - if (!ON_STR_BEGIN(p)) { - prev = onigenc_get_prev_char_head(reg->enc, str, p, end); - if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { - p = prev; - goto retry; - } - } - break; - - case ANCHOR_END_LINE: - if (ON_STR_END(p)) { -#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE - prev = onigenc_get_prev_char_head(reg->enc, adjrange, p); - if (IS_NULL(prev)) goto fail; - if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { - p = prev; - goto retry; - } -#endif - } - else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end) -#ifdef USE_CRNL_AS_LINE_TERMINATOR - && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end) -#endif - ) { - p = onigenc_get_prev_char_head(reg->enc, adjrange, p, end); - if (IS_NULL(p)) goto fail; - goto retry; - } - break; - } - } - - /* no needs to adjust *high, *high is used as range check only */ - if (reg->dmax != ONIG_INFINITE_DISTANCE) { - *low = p - reg->dmax; - *high = p - reg->dmin; - *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high, end); - } - -#ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "backward_search_range: low: %d, high: %d\n", - (int )(*low - str), (int )(*high - str)); -#endif - return 1; /* success */ - } - - fail: -#ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "backward_search_range: fail.\n"); -#endif - return 0; /* fail */ -} - - -extern long -onig_search(regex_t* reg, const UChar* str, const UChar* end, - const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option) -{ - int r; - UChar *s, *prev; - OnigMatchArg msa; - const UChar *orig_start = start; -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE - const UChar *orig_range = range; -#endif - -#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) - start: - THREAD_ATOMIC_START; - if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) { - ONIG_STATE_INC(reg); - if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { - onig_chain_reduce(reg); - ONIG_STATE_INC(reg); - } - } - else { - int n; - - THREAD_ATOMIC_END; - n = 0; - while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) { - if (++n > THREAD_PASS_LIMIT_COUNT) - return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; - THREAD_PASS; - } - goto start; - } - THREAD_ATOMIC_END; -#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ - -#ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, - "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n", - (int )str, (int )(end - str), (int )(start - str), (int )(range - str)); -#endif - - if (region - ) { - r = onig_region_resize_clear(region, reg->num_mem + 1); - if (r) goto finish_no_msa; - } - - if (start > end || start < str) goto mismatch_no_msa; - - -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE -#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE -#define MATCH_AND_RETURN_CHECK(upper_range) \ - r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ - if (r != ONIG_MISMATCH) {\ - if (r >= 0) {\ - if (! IS_FIND_LONGEST(reg->options)) {\ - goto match;\ - }\ - }\ - else goto finish; /* error */ \ - } -#else -#define MATCH_AND_RETURN_CHECK(upper_range) \ - r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ - if (r != ONIG_MISMATCH) {\ - if (r >= 0) {\ - goto match;\ - }\ - else goto finish; /* error */ \ - } -#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ -#else -#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE -#define MATCH_AND_RETURN_CHECK(none) \ - r = match_at(reg, str, end, s, prev, &msa);\ - if (r != ONIG_MISMATCH) {\ - if (r >= 0) {\ - if (! IS_FIND_LONGEST(reg->options)) {\ - goto match;\ - }\ - }\ - else goto finish; /* error */ \ - } -#else -#define MATCH_AND_RETURN_CHECK(none) \ - r = match_at(reg, str, end, s, prev, &msa);\ - if (r != ONIG_MISMATCH) {\ - if (r >= 0) {\ - goto match;\ - }\ - else goto finish; /* error */ \ - } -#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ -#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ - - - /* anchor optimize: resume search range */ - if (reg->anchor != 0 && str < end) { - UChar *min_semi_end, *max_semi_end; - - if (reg->anchor & ANCHOR_BEGIN_POSITION) { - /* search start-position only */ - begin_position: - if (range > start) - range = start + 1; - else - range = start; - } - else if (reg->anchor & ANCHOR_BEGIN_BUF) { - /* search str-position only */ - if (range > start) { - if (start != str) goto mismatch_no_msa; - range = str + 1; - } - else { - if (range <= str) { - start = str; - range = str; - } - else - goto mismatch_no_msa; - } - } - else if (reg->anchor & ANCHOR_END_BUF) { - min_semi_end = max_semi_end = (UChar* )end; - - end_buf: - if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin) - goto mismatch_no_msa; - - if (range > start) { - if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) { - start = min_semi_end - reg->anchor_dmax; - if (start < end) - start = onigenc_get_right_adjust_char_head(reg->enc, str, start, end); - else { /* match with empty at end */ - start = onigenc_get_prev_char_head(reg->enc, str, end, end); - } - } - if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) { - range = max_semi_end - reg->anchor_dmin + 1; - } - - if (start >= range) goto mismatch_no_msa; - } - else { - if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) { - range = min_semi_end - reg->anchor_dmax; - } - if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) { - start = max_semi_end - reg->anchor_dmin; - start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start, end); - } - if (range > start) goto mismatch_no_msa; - } - } - else if (reg->anchor & ANCHOR_SEMI_END_BUF) { - UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, end, 1); - - max_semi_end = (UChar* )end; - if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) { - min_semi_end = pre_end; - -#ifdef USE_CRNL_AS_LINE_TERMINATOR - pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, end, 1); - if (IS_NOT_NULL(pre_end) && - ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) { - min_semi_end = pre_end; - } -#endif - if (min_semi_end > str && start <= min_semi_end) { - goto end_buf; - } - } - else { - min_semi_end = (UChar* )end; - goto end_buf; - } - } - else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) { - goto begin_position; - } - } - else if (str == end) { /* empty string */ - static const UChar address_for_empty_string[] = ""; - -#ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "onig_search: empty string.\n"); -#endif - - if (reg->threshold_len == 0) { - start = end = str = address_for_empty_string; - s = (UChar* )start; - prev = (UChar* )NULL; - - MATCH_ARG_INIT(msa, option, region, start); -#ifdef USE_COMBINATION_EXPLOSION_CHECK - msa.state_check_buff = (void* )0; - msa.state_check_buff_size = 0; /* NO NEED, for valgrind */ -#endif - MATCH_AND_RETURN_CHECK(end); - goto mismatch; - } - goto mismatch_no_msa; - } - -#ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n", - (int )(end - str), (int )(start - str), (int )(range - str)); -#endif - - MATCH_ARG_INIT(msa, option, region, orig_start); -#ifdef USE_COMBINATION_EXPLOSION_CHECK - { - int offset = (MIN(start, range) - str); - STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); - } -#endif - - s = (UChar* )start; - if (range > start) { /* forward search */ - if (s > str) - prev = onigenc_get_prev_char_head(reg->enc, str, s, end); - else - prev = (UChar* )NULL; - - if (reg->optimize != ONIG_OPTIMIZE_NONE) { - UChar *sch_range, *low, *high, *low_prev; - - sch_range = (UChar* )range; - if (reg->dmax != 0) { - if (reg->dmax == ONIG_INFINITE_DISTANCE) - sch_range = (UChar* )end; - else { - sch_range += reg->dmax; - if (sch_range > end) sch_range = (UChar* )end; - } - } - - if ((end - start) < reg->threshold_len) - goto mismatch; - - if (reg->dmax != ONIG_INFINITE_DISTANCE) { - do { - if (! forward_search_range(reg, str, end, s, sch_range, - &low, &high, &low_prev)) goto mismatch; - if (s < low) { - s = low; - prev = low_prev; - } - while (s <= high) { - MATCH_AND_RETURN_CHECK(orig_range); - prev = s; - s += enclen(reg->enc, s, end); - } - } while (s < range); - goto mismatch; - } - else { /* check only. */ - if (! forward_search_range(reg, str, end, s, sch_range, - &low, &high, (UChar** )NULL)) goto mismatch; - - if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) { - do { - MATCH_AND_RETURN_CHECK(orig_range); - prev = s; - s += enclen(reg->enc, s, end); - } while (s < range); - goto mismatch; - } - } - } - - do { - MATCH_AND_RETURN_CHECK(orig_range); - prev = s; - s += enclen(reg->enc, s, end); - } while (s < range); - - if (s == range) { /* because empty match with /$/. */ - MATCH_AND_RETURN_CHECK(orig_range); - } - } - else { /* backward search */ -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE - if (orig_start < end) - orig_start += enclen(reg->enc, orig_start, end); /* is upper range */ -#endif - - if (reg->optimize != ONIG_OPTIMIZE_NONE) { - UChar *low, *high, *adjrange, *sch_start; - - if (range < end) - adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range, end); - else - adjrange = (UChar* )end; - - if (reg->dmax != ONIG_INFINITE_DISTANCE && - (end - range) >= reg->threshold_len) { - do { - sch_start = s + reg->dmax; - if (sch_start > end) sch_start = (UChar* )end; - if (backward_search_range(reg, str, end, sch_start, range, adjrange, - &low, &high) <= 0) - goto mismatch; - - if (s > high) - s = high; - - while (s >= low) { - prev = onigenc_get_prev_char_head(reg->enc, str, s, end); - MATCH_AND_RETURN_CHECK(orig_start); - s = prev; - } - } while (s >= range); - goto mismatch; - } - else { /* check only. */ - if ((end - range) < reg->threshold_len) goto mismatch; - - sch_start = s; - if (reg->dmax != 0) { - if (reg->dmax == ONIG_INFINITE_DISTANCE) - sch_start = (UChar* )end; - else { - sch_start += reg->dmax; - if (sch_start > end) sch_start = (UChar* )end; - else - sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, - start, sch_start, end); - } - } - if (backward_search_range(reg, str, end, sch_start, range, adjrange, - &low, &high) <= 0) goto mismatch; - } - } - - do { - prev = onigenc_get_prev_char_head(reg->enc, str, s, end); - MATCH_AND_RETURN_CHECK(orig_start); - s = prev; - } while (s >= range); - } - - mismatch: -#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE - if (IS_FIND_LONGEST(reg->options)) { - if (msa.best_len >= 0) { - s = msa.best_s; - goto match; - } - } -#endif - r = ONIG_MISMATCH; - - finish: - MATCH_ARG_FREE(msa); - ONIG_STATE_DEC_THREAD(reg); - - /* If result is mismatch and no FIND_NOT_EMPTY option, - then the region is not setted in match_at(). */ - if (IS_FIND_NOT_EMPTY(reg->options) && region - ) { - onig_region_clear(region); - } - -#ifdef ONIG_DEBUG - if (r != ONIG_MISMATCH) - fprintf(stderr, "onig_search: error %d\n", r); -#endif - return r; - - mismatch_no_msa: - r = ONIG_MISMATCH; - finish_no_msa: - ONIG_STATE_DEC_THREAD(reg); -#ifdef ONIG_DEBUG - if (r != ONIG_MISMATCH) - fprintf(stderr, "onig_search: error %d\n", r); -#endif - return r; - - match: - ONIG_STATE_DEC_THREAD(reg); - MATCH_ARG_FREE(msa); - return s - str; -} - -extern OnigEncoding -onig_get_encoding(regex_t* reg) -{ - return reg->enc; -} - -extern OnigOptionType -onig_get_options(regex_t* reg) -{ - return reg->options; -} - -extern OnigCaseFoldType -onig_get_case_fold_flag(regex_t* reg) -{ - return reg->case_fold_flag; -} - -extern const OnigSyntaxType* -onig_get_syntax(regex_t* reg) -{ - return reg->syntax; -} - -extern int -onig_number_of_captures(regex_t* reg) -{ - return reg->num_mem; -} - -extern int -onig_number_of_capture_histories(regex_t* reg) -{ -#ifdef USE_CAPTURE_HISTORY - int i, n; - - n = 0; - for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { - if (BIT_STATUS_AT(reg->capture_history, i) != 0) - n++; - } - return n; -#else - return 0; -#endif -} - -extern void -onig_copy_encoding(OnigEncoding to, OnigEncoding from) -{ - *to = *from; -} -#endif //ENABLE_REGEXP diff --git a/src/regint.h b/src/regint.h deleted file mode 100644 index aa4871594..000000000 --- a/src/regint.h +++ /dev/null @@ -1,838 +0,0 @@ -#ifndef ONIGURUMA_REGINT_H -#define ONIGURUMA_REGINT_H -/********************************************************************** - regint.h - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* for debug */ -/* #define ONIG_DEBUG_PARSE_TREE */ -/* #define ONIG_DEBUG_COMPILE */ -/* #define ONIG_DEBUG_SEARCH */ -/* #define ONIG_DEBUG_MATCH */ -/* #define ONIG_DONT_OPTIMIZE */ - -/* for byte-code statistical data. */ -/* #define ONIG_DEBUG_STATISTICS */ - -#ifndef RUBY -#define RUBY -#endif - -#include <stddef.h> //typedef unsigned int ptrdiff_t; - -#if defined(ONIG_DEBUG_PARSE_TREE) || defined(ONIG_DEBUG_MATCH) || \ - defined(ONIG_DEBUG_SEARCH) || defined(ONIG_DEBUG_COMPILE) || \ - defined(ONIG_DEBUG_STATISTICS) -#ifndef ONIG_DEBUG -#define ONIG_DEBUG -#endif -#endif - -#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \ - (defined(__ppc__) && defined(__APPLE__)) || \ - defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD86) || \ - defined(__mc68020__) -#define PLATFORM_UNALIGNED_WORD_ACCESS -#endif - -/* config */ -/* spec. config */ -#define USE_NAMED_GROUP -#define USE_SUBEXP_CALL -#define USE_BACKREF_WITH_LEVEL /* \k<name+n>, \k<name-n> */ -#define USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT /* /(?:()|())*\2/ */ -#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */ -#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR -/* #define USE_RECOMPILE_API */ -/* !!! moved to regenc.h. */ /* #define USE_CRNL_AS_LINE_TERMINATOR */ - -/* internal config */ -#define USE_PARSE_TREE_NODE_RECYCLE -#define USE_OP_PUSH_OR_JUMP_EXACT -#define USE_QTFR_PEEK_NEXT -#define USE_ST_LIBRARY -#define USE_SHARED_CCLASS_TABLE - -#define INIT_MATCH_STACK_SIZE 160 -#define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */ - -#if defined(__GNUC__) -# define ARG_UNUSED __attribute__ ((unused)) -#else -# define ARG_UNUSED -#endif - -/* */ -/* escape other system UChar definition */ -#ifndef RUBY_DEFINES_H -#include "mruby.h" -#endif -#ifdef ONIG_ESCAPE_UCHAR_COLLISION -#undef ONIG_ESCAPE_UCHAR_COLLISION -#endif -#undef USE_MATCH_RANGE_IS_COMPLETE_RANGE -#undef USE_CAPTURE_HISTORY -#define USE_VARIABLE_META_CHARS -#define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */ -#define USE_POSIX_REGION_OPTION /* needed for POSIX API support */ -#define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE -/* #define USE_COMBINATION_EXPLOSION_CHECK */ /* (X*)* */ -/* #define USE_MULTI_THREAD_SYSTEM */ -#define THREAD_SYSTEM_INIT /* depend on thread system */ -#define THREAD_SYSTEM_END /* depend on thread system */ -#define THREAD_ATOMIC_START /* depend on thread system */ -#define THREAD_ATOMIC_END /* depend on thread system */ -#define THREAD_PASS /* depend on thread system */ - -#ifdef RUBY - -//#define CHECK_INTERRUPT_IN_MATCH_AT mrb_thread_check_ints() -#define CHECK_INTERRUPT_IN_MATCH_AT -#define onig_st_init_table st_init_table -#define onig_st_init_table_with_size st_init_table_with_size -#define onig_st_init_numtable st_init_numtable -#define onig_st_init_numtable_with_size st_init_numtable_with_size -#define onig_st_init_strtable st_init_strtable -#define onig_st_init_strtable_with_size st_init_strtable_with_size -#define onig_st_delete st_delete -#define onig_st_delete_safe st_delete_safe -#define onig_st_insert st_insert -#define onig_st_lookup st_lookup -#define onig_st_foreach st_foreach -#define onig_st_add_direct st_add_direct -#define onig_st_free_table st_free_table -#define onig_st_cleanup_safe st_cleanup_safe -#define onig_st_copy st_copy -#define onig_st_nothing_key_clone st_nothing_key_clone -#define onig_st_nothing_key_free st_nothing_key_free -#define onig_st_is_member st_is_member - -#define USE_UPPER_CASE_TABLE -#else - -#define st_init_table onig_st_init_table -#define st_init_table_with_size onig_st_init_table_with_size -#define st_init_numtable onig_st_init_numtable -#define st_init_numtable_with_size onig_st_init_numtable_with_size -#define st_init_strtable onig_st_init_strtable -#define st_init_strtable_with_size onig_st_init_strtable_with_size -#define st_delete onig_st_delete -#define st_delete_safe onig_st_delete_safe -#define st_insert onig_st_insert -#define st_lookup onig_st_lookup -#define st_foreach onig_st_foreach -#define st_add_direct onig_st_add_direct -#define st_free_table onig_st_free_table -#define st_cleanup_safe onig_st_cleanup_safe -#define st_copy onig_st_copy -#define st_nothing_key_clone onig_st_nothing_key_clone -#define st_nothing_key_free onig_st_nothing_key_free -/* */ -#define onig_st_is_member st_is_member - -#define CHECK_INTERRUPT_IN_MATCH_AT - -#endif - -#define STATE_CHECK_STRING_THRESHOLD_LEN 7 -#define STATE_CHECK_BUFF_MAX_SIZE 0x4000 - -#define THREAD_PASS_LIMIT_COUNT 8 -#define xmemset memset -#define xmemcpy memcpy -#define xmemmove memmove - -#if defined(_WIN32) && !defined(__GNUC__) -#define xalloca _alloca -#define xvsnprintf _vsnprintf -#else -#define xalloca malloc -#define xvsnprintf vsnprintf -#endif - - -#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) -#define ONIG_STATE_INC(reg) (reg)->state++ -#define ONIG_STATE_DEC(reg) (reg)->state-- - -#define ONIG_STATE_INC_THREAD(reg) do {\ - THREAD_ATOMIC_START;\ - (reg)->state++;\ - THREAD_ATOMIC_END;\ -} while(0) -#define ONIG_STATE_DEC_THREAD(reg) do {\ - THREAD_ATOMIC_START;\ - (reg)->state--;\ - THREAD_ATOMIC_END;\ -} while(0) -#else -#define ONIG_STATE_INC(reg) /* Nothing */ -#define ONIG_STATE_DEC(reg) /* Nothing */ -#define ONIG_STATE_INC_THREAD(reg) /* Nothing */ -#define ONIG_STATE_DEC_THREAD(reg) /* Nothing */ -#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ - -#ifdef HAVE_STDLIB_H -#include <stdlib.h> -#endif - -#if defined(HAVE_ALLOCA_H) && (defined(_AIX) || !defined(__GNUC__)) -#include <alloca.h> -#endif - -#ifdef HAVE_STRING_H -# include <string.h> -#else -# include <strings.h> -#endif - -#include <ctype.h> -#ifdef HAVE_SYS_TYPES_H -#include <sys/types.h> -#endif - -#ifdef ONIG_DEBUG -# include <stdio.h> -#endif - -#include "regenc.h" - -#ifndef MIN -#define MIN(a,b) (((a)>(b))?(b):(a)) -#endif - -#ifndef MAX -#define MAX(a,b) (((a)<(b))?(b):(a)) -#endif - -#define IS_NULL(p) (((void*)(p)) == (void*)0) -#define IS_NOT_NULL(p) (((void*)(p)) != (void*)0) -#define CHECK_NULL_RETURN(p) if (IS_NULL(p)) return NULL -#define CHECK_NULL_RETURN_MEMERR(p) if (IS_NULL(p)) return ONIGERR_MEMORY -#define NULL_UCHARP ((UChar* )0) - -#ifdef PLATFORM_UNALIGNED_WORD_ACCESS - -#define PLATFORM_GET_INC(val,p,type) do{\ - val = *(type* )p;\ - (p) += sizeof(type);\ -} while(0) - -#else - -#define PLATFORM_GET_INC(val,p,type) do{\ - xmemcpy(&val, (p), sizeof(type));\ - (p) += sizeof(type);\ -} while(0) - -/* sizeof(OnigCodePoint) */ -#define WORD_ALIGNMENT_SIZE sizeof(uintptr_t) - -#define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\ - (pad_size) = WORD_ALIGNMENT_SIZE \ - - ((uintptr_t)(addr) % WORD_ALIGNMENT_SIZE);\ - if ((pad_size) == WORD_ALIGNMENT_SIZE) (pad_size) = 0;\ -} while (0) - -#define ALIGNMENT_RIGHT(addr) do {\ - (addr) += (WORD_ALIGNMENT_SIZE - 1);\ - (addr) -= ((uintptr_t )(addr) % WORD_ALIGNMENT_SIZE);\ -} while (0) - -#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */ - -/* stack pop level */ -#define STACK_POP_LEVEL_FREE 0 -#define STACK_POP_LEVEL_MEM_START 1 -#define STACK_POP_LEVEL_ALL 2 - -/* optimize flags */ -#define ONIG_OPTIMIZE_NONE 0 -#define ONIG_OPTIMIZE_EXACT 1 /* Slow Search */ -#define ONIG_OPTIMIZE_EXACT_BM 2 /* Boyer Moore Search */ -#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV 3 /* BM (but not simple match) */ -#define ONIG_OPTIMIZE_EXACT_IC 4 /* Slow Search (ignore case) */ -#define ONIG_OPTIMIZE_MAP 5 /* char map */ - -/* bit status */ -typedef unsigned int BitStatusType; - -#define BIT_STATUS_BITS_NUM (sizeof(BitStatusType) * 8) -#define BIT_STATUS_CLEAR(stats) (stats) = 0 -#define BIT_STATUS_ON_ALL(stats) (stats) = ~((BitStatusType )0) -#define BIT_STATUS_AT(stats,n) \ - ((n) < (int )BIT_STATUS_BITS_NUM ? ((stats) & (1 << n)) : ((stats) & 1)) - -#define BIT_STATUS_ON_AT(stats,n) do {\ - if ((n) < (int )BIT_STATUS_BITS_NUM) \ - (stats) |= (1 << (n));\ - else\ - (stats) |= 1;\ -} while (0) - -#define BIT_STATUS_ON_AT_SIMPLE(stats,n) do {\ - if ((n) < (int )BIT_STATUS_BITS_NUM)\ - (stats) |= (1 << (n));\ -} while (0) - - -#define INT_MAX_LIMIT ((1UL << (sizeof(int) * 8 - 1)) - 1) - -#define DIGITVAL(code) ((code) - '0') -#define ODIGITVAL(code) DIGITVAL(code) -#define XDIGITVAL(enc,code) \ - (ONIGENC_IS_CODE_DIGIT(enc,code) ? DIGITVAL(code) \ - : (ONIGENC_IS_CODE_UPPER(enc,code) ? (code) - 'A' + 10 : (code) - 'a' + 10)) - -#define IS_SINGLELINE(option) ((option) & ONIG_OPTION_SINGLELINE) -#define IS_MULTILINE(option) ((option) & ONIG_OPTION_MULTILINE) -#define IS_IGNORECASE(option) ((option) & ONIG_OPTION_IGNORECASE) -#define IS_EXTEND(option) ((option) & ONIG_OPTION_EXTEND) -#define IS_FIND_LONGEST(option) ((option) & ONIG_OPTION_FIND_LONGEST) -#define IS_FIND_NOT_EMPTY(option) ((option) & ONIG_OPTION_FIND_NOT_EMPTY) -#define IS_FIND_CONDITION(option) ((option) & \ - (ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY)) -#define IS_NOTBOL(option) ((option) & ONIG_OPTION_NOTBOL) -#define IS_NOTEOL(option) ((option) & ONIG_OPTION_NOTEOL) -#define IS_POSIX_REGION(option) ((option) & ONIG_OPTION_POSIX_REGION) - -/* OP_SET_OPTION is required for these options. -#define IS_DYNAMIC_OPTION(option) \ - (((option) & (ONIG_OPTION_MULTILINE | ONIG_OPTION_IGNORECASE)) != 0) -*/ -/* ignore-case and multibyte status are included in compiled code. */ -#define IS_DYNAMIC_OPTION(option) 0 - -#define DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag) \ - ((case_fold_flag) & ~INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) - -#define REPEAT_INFINITE -1 -#define IS_REPEAT_INFINITE(n) ((n) == REPEAT_INFINITE) - -/* bitset */ -#define BITS_PER_BYTE 8 -#define SINGLE_BYTE_SIZE (1 << BITS_PER_BYTE) -#define BITS_IN_ROOM (sizeof(Bits) * BITS_PER_BYTE) -#define BITSET_SIZE (SINGLE_BYTE_SIZE / BITS_IN_ROOM) - -#ifdef PLATFORM_UNALIGNED_WORD_ACCESS -typedef unsigned int Bits; -#else -typedef unsigned char Bits; -#endif -typedef Bits BitSet[BITSET_SIZE]; -typedef Bits* BitSetRef; - -#define SIZE_BITSET (int)sizeof(BitSet) - -#define BITSET_CLEAR(bs) do {\ - int i;\ - for (i = 0; i < (int )BITSET_SIZE; i++) { (bs)[i] = 0; } \ -} while (0) - -#define BS_ROOM(bs,pos) (bs)[pos / BITS_IN_ROOM] -#define BS_BIT(pos) (1 << (pos % BITS_IN_ROOM)) - -#define BITSET_AT(bs, pos) (BS_ROOM(bs,pos) & BS_BIT(pos)) -#define BITSET_SET_BIT(bs, pos) BS_ROOM(bs,pos) |= BS_BIT(pos) -#define BITSET_CLEAR_BIT(bs, pos) BS_ROOM(bs,pos) &= ~(BS_BIT(pos)) -#define BITSET_INVERT_BIT(bs, pos) BS_ROOM(bs,pos) ^= BS_BIT(pos) - -/* bytes buffer */ -typedef struct _BBuf { - UChar* p; - unsigned int used; - unsigned int alloc; -} BBuf; - -#define BBUF_INIT(buf,size) onig_bbuf_init((BBuf* )(buf), (size)) - -#define BBUF_SIZE_INC(buf,inc) do{\ - (buf)->alloc += (inc);\ - (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\ - if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\ -} while (0) - -#define BBUF_EXPAND(buf,low) do{\ - do { (buf)->alloc *= 2; } while ((buf)->alloc < (unsigned int )low);\ - (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\ - if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\ -} while (0) - -#define BBUF_ENSURE_SIZE(buf,size) do{\ - unsigned int new_alloc = (buf)->alloc;\ - while (new_alloc < (unsigned int )(size)) { new_alloc *= 2; }\ - if ((buf)->alloc != new_alloc) {\ - (buf)->p = (UChar* )xrealloc((buf)->p, new_alloc);\ - if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\ - (buf)->alloc = new_alloc;\ - }\ -} while (0) - -#define BBUF_WRITE(buf,pos,bytes,n) do{\ - int used = (pos) + (n);\ - if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\ - xmemcpy((buf)->p + (pos), (bytes), (n));\ - if ((buf)->used < (unsigned int )used) (buf)->used = used;\ -} while (0) - -#define BBUF_WRITE1(buf,pos,byte) do{\ - int used = (pos) + 1;\ - if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\ - (buf)->p[(pos)] = (byte);\ - if ((buf)->used < (unsigned int )used) (buf)->used = used;\ -} while (0) - -#define BBUF_ADD(buf,bytes,n) BBUF_WRITE((buf),(buf)->used,(bytes),(n)) -#define BBUF_ADD1(buf,byte) BBUF_WRITE1((buf),(buf)->used,(byte)) -#define BBUF_GET_ADD_ADDRESS(buf) ((buf)->p + (buf)->used) -#define BBUF_GET_OFFSET_POS(buf) ((buf)->used) - -/* from < to */ -#define BBUF_MOVE_RIGHT(buf,from,to,n) do {\ - if ((unsigned int )((to)+(n)) > (buf)->alloc) BBUF_EXPAND((buf),(to) + (n));\ - xmemmove((buf)->p + (to), (buf)->p + (from), (n));\ - if ((unsigned int )((to)+(n)) > (buf)->used) (buf)->used = (to) + (n);\ -} while (0) - -/* from > to */ -#define BBUF_MOVE_LEFT(buf,from,to,n) do {\ - xmemmove((buf)->p + (to), (buf)->p + (from), (n));\ -} while (0) - -/* from > to */ -#define BBUF_MOVE_LEFT_REDUCE(buf,from,to) do {\ - xmemmove((buf)->p + (to), (buf)->p + (from), (buf)->used - (from));\ - (buf)->used -= (from - to);\ -} while (0) - -#define BBUF_INSERT(buf,pos,bytes,n) do {\ - if (pos >= (buf)->used) {\ - BBUF_WRITE(buf,pos,bytes,n);\ - }\ - else {\ - BBUF_MOVE_RIGHT((buf),(pos),(pos) + (n),((buf)->used - (pos)));\ - xmemcpy((buf)->p + (pos), (bytes), (n));\ - }\ -} while (0) - -#define BBUF_GET_BYTE(buf, pos) (buf)->p[(pos)] - - -#define ANCHOR_BEGIN_BUF (1<<0) -#define ANCHOR_BEGIN_LINE (1<<1) -#define ANCHOR_BEGIN_POSITION (1<<2) -#define ANCHOR_END_BUF (1<<3) -#define ANCHOR_SEMI_END_BUF (1<<4) -#define ANCHOR_END_LINE (1<<5) - -#define ANCHOR_WORD_BOUND (1<<6) -#define ANCHOR_NOT_WORD_BOUND (1<<7) -#define ANCHOR_WORD_BEGIN (1<<8) -#define ANCHOR_WORD_END (1<<9) -#define ANCHOR_PREC_READ (1<<10) -#define ANCHOR_PREC_READ_NOT (1<<11) -#define ANCHOR_LOOK_BEHIND (1<<12) -#define ANCHOR_LOOK_BEHIND_NOT (1<<13) - -#define ANCHOR_ANYCHAR_STAR (1<<14) /* ".*" optimize info */ -#define ANCHOR_ANYCHAR_STAR_ML (1<<15) /* ".*" optimize info (multi-line) */ - -/* operation code */ -enum OpCode { - OP_FINISH = 0, /* matching process terminator (no more alternative) */ - OP_END = 1, /* pattern code terminator (success end) */ - - OP_EXACT1 = 2, /* single byte, N = 1 */ - OP_EXACT2, /* single byte, N = 2 */ - OP_EXACT3, /* single byte, N = 3 */ - OP_EXACT4, /* single byte, N = 4 */ - OP_EXACT5, /* single byte, N = 5 */ - OP_EXACTN, /* single byte */ - OP_EXACTMB2N1, /* mb-length = 2 N = 1 */ - OP_EXACTMB2N2, /* mb-length = 2 N = 2 */ - OP_EXACTMB2N3, /* mb-length = 2 N = 3 */ - OP_EXACTMB2N, /* mb-length = 2 */ - OP_EXACTMB3N, /* mb-length = 3 */ - OP_EXACTMBN, /* other length */ - - OP_EXACT1_IC, /* single byte, N = 1, ignore case */ - OP_EXACTN_IC, /* single byte, ignore case */ - - OP_CCLASS, - OP_CCLASS_MB, - OP_CCLASS_MIX, - OP_CCLASS_NOT, - OP_CCLASS_MB_NOT, - OP_CCLASS_MIX_NOT, - OP_CCLASS_NODE, /* pointer to CClassNode node */ - - OP_ANYCHAR, /* "." */ - OP_ANYCHAR_ML, /* "." multi-line */ - OP_ANYCHAR_STAR, /* ".*" */ - OP_ANYCHAR_ML_STAR, /* ".*" multi-line */ - OP_ANYCHAR_STAR_PEEK_NEXT, - OP_ANYCHAR_ML_STAR_PEEK_NEXT, - - OP_WORD, - OP_NOT_WORD, - OP_WORD_BOUND, - OP_NOT_WORD_BOUND, - OP_WORD_BEGIN, - OP_WORD_END, - - OP_BEGIN_BUF, - OP_END_BUF, - OP_BEGIN_LINE, - OP_END_LINE, - OP_SEMI_END_BUF, - OP_BEGIN_POSITION, - - OP_BACKREF1, - OP_BACKREF2, - OP_BACKREFN, - OP_BACKREFN_IC, - OP_BACKREF_MULTI, - OP_BACKREF_MULTI_IC, - OP_BACKREF_WITH_LEVEL, /* \k<xxx+n>, \k<xxx-n> */ - - OP_MEMORY_START, - OP_MEMORY_START_PUSH, /* push back-tracker to stack */ - OP_MEMORY_END_PUSH, /* push back-tracker to stack */ - OP_MEMORY_END_PUSH_REC, /* push back-tracker to stack */ - OP_MEMORY_END, - OP_MEMORY_END_REC, /* push marker to stack */ - - OP_FAIL, /* pop stack and move */ - OP_JUMP, - OP_PUSH, - OP_POP, - OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */ - OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */ - OP_REPEAT, /* {n,m} */ - OP_REPEAT_NG, /* {n,m}? (non greedy) */ - OP_REPEAT_INC, - OP_REPEAT_INC_NG, /* non greedy */ - OP_REPEAT_INC_SG, /* search and get in stack */ - OP_REPEAT_INC_NG_SG, /* search and get in stack (non greedy) */ - OP_NULL_CHECK_START, /* null loop checker start */ - OP_NULL_CHECK_END, /* null loop checker end */ - OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */ - OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */ - - OP_PUSH_POS, /* (?=...) start */ - OP_POP_POS, /* (?=...) end */ - OP_PUSH_POS_NOT, /* (?!...) start */ - OP_FAIL_POS, /* (?!...) end */ - OP_PUSH_STOP_BT, /* (?>...) start */ - OP_POP_STOP_BT, /* (?>...) end */ - OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */ - OP_PUSH_LOOK_BEHIND_NOT, /* (?<!...) start */ - OP_FAIL_LOOK_BEHIND_NOT, /* (?<!...) end */ - - OP_CALL, /* \g<name> */ - OP_RETURN, - - OP_STATE_CHECK_PUSH, /* combination explosion check and push */ - OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */ - OP_STATE_CHECK, /* check only */ - OP_STATE_CHECK_ANYCHAR_STAR, - OP_STATE_CHECK_ANYCHAR_ML_STAR, - - /* no need: IS_DYNAMIC_OPTION() == 0 */ - OP_SET_OPTION_PUSH, /* set option and push recover option */ - OP_SET_OPTION /* set option */ -}; - -typedef int RelAddrType; -typedef int AbsAddrType; -typedef int LengthType; -typedef int RepeatNumType; -typedef short int MemNumType; -typedef short int StateCheckNumType; -typedef void* PointerType; - -#define SIZE_OPCODE 1 -#define SIZE_RELADDR (int)sizeof(RelAddrType) -#define SIZE_ABSADDR (int)sizeof(AbsAddrType) -#define SIZE_LENGTH (int)sizeof(LengthType) -#define SIZE_MEMNUM (int)sizeof(MemNumType) -#define SIZE_STATE_CHECK_NUM (int)sizeof(StateCheckNumType) -#define SIZE_REPEATNUM (int)sizeof(RepeatNumType) -#define SIZE_OPTION (int)sizeof(OnigOptionType) -#define SIZE_CODE_POINT (int)sizeof(OnigCodePoint) -#define SIZE_POINTER (int)sizeof(PointerType) - - -#define GET_RELADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, RelAddrType) -#define GET_ABSADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, AbsAddrType) -#define GET_LENGTH_INC(len,p) PLATFORM_GET_INC(len, p, LengthType) -#define GET_MEMNUM_INC(num,p) PLATFORM_GET_INC(num, p, MemNumType) -#define GET_REPEATNUM_INC(num,p) PLATFORM_GET_INC(num, p, RepeatNumType) -#define GET_OPTION_INC(option,p) PLATFORM_GET_INC(option, p, OnigOptionType) -#define GET_POINTER_INC(ptr,p) PLATFORM_GET_INC(ptr, p, PointerType) -#define GET_STATE_CHECK_NUM_INC(num,p) PLATFORM_GET_INC(num, p, StateCheckNumType) - -/* code point's address must be aligned address. */ -#define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p)) -#define GET_BYTE_INC(byte,p) do{\ - byte = *(p);\ - (p)++;\ -} while(0) - - -/* op-code + arg size */ -#define SIZE_OP_ANYCHAR_STAR SIZE_OPCODE -#define SIZE_OP_ANYCHAR_STAR_PEEK_NEXT (SIZE_OPCODE + 1) -#define SIZE_OP_JUMP (SIZE_OPCODE + SIZE_RELADDR) -#define SIZE_OP_PUSH (SIZE_OPCODE + SIZE_RELADDR) -#define SIZE_OP_POP SIZE_OPCODE -#define SIZE_OP_PUSH_OR_JUMP_EXACT1 (SIZE_OPCODE + SIZE_RELADDR + 1) -#define SIZE_OP_PUSH_IF_PEEK_NEXT (SIZE_OPCODE + SIZE_RELADDR + 1) -#define SIZE_OP_REPEAT_INC (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_REPEAT_INC_NG (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_PUSH_POS SIZE_OPCODE -#define SIZE_OP_PUSH_POS_NOT (SIZE_OPCODE + SIZE_RELADDR) -#define SIZE_OP_POP_POS SIZE_OPCODE -#define SIZE_OP_FAIL_POS SIZE_OPCODE -#define SIZE_OP_SET_OPTION (SIZE_OPCODE + SIZE_OPTION) -#define SIZE_OP_SET_OPTION_PUSH (SIZE_OPCODE + SIZE_OPTION) -#define SIZE_OP_FAIL SIZE_OPCODE -#define SIZE_OP_MEMORY_START (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_MEMORY_START_PUSH (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_MEMORY_END_PUSH (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_MEMORY_END_PUSH_REC (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_MEMORY_END (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_MEMORY_END_REC (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_PUSH_STOP_BT SIZE_OPCODE -#define SIZE_OP_POP_STOP_BT SIZE_OPCODE -#define SIZE_OP_NULL_CHECK_START (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_NULL_CHECK_END (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_LOOK_BEHIND (SIZE_OPCODE + SIZE_LENGTH) -#define SIZE_OP_PUSH_LOOK_BEHIND_NOT (SIZE_OPCODE + SIZE_RELADDR + SIZE_LENGTH) -#define SIZE_OP_FAIL_LOOK_BEHIND_NOT SIZE_OPCODE -#define SIZE_OP_CALL (SIZE_OPCODE + SIZE_ABSADDR) -#define SIZE_OP_RETURN SIZE_OPCODE - -#ifdef USE_COMBINATION_EXPLOSION_CHECK -#define SIZE_OP_STATE_CHECK (SIZE_OPCODE + SIZE_STATE_CHECK_NUM) -#define SIZE_OP_STATE_CHECK_PUSH (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR) -#define SIZE_OP_STATE_CHECK_PUSH_OR_JUMP (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR) -#define SIZE_OP_STATE_CHECK_ANYCHAR_STAR (SIZE_OPCODE + SIZE_STATE_CHECK_NUM) -#endif - -#define MC_ESC(syn) (syn)->meta_char_table.esc -#define MC_ANYCHAR(syn) (syn)->meta_char_table.anychar -#define MC_ANYTIME(syn) (syn)->meta_char_table.anytime -#define MC_ZERO_OR_ONE_TIME(syn) (syn)->meta_char_table.zero_or_one_time -#define MC_ONE_OR_MORE_TIME(syn) (syn)->meta_char_table.one_or_more_time -#define MC_ANYCHAR_ANYTIME(syn) (syn)->meta_char_table.anychar_anytime - -#define IS_MC_ESC_CODE(code, syn) \ - ((code) == MC_ESC(syn) && \ - !IS_SYNTAX_OP2((syn), ONIG_SYN_OP2_INEFFECTIVE_ESCAPE)) - - -#define SYN_POSIX_COMMON_OP \ - ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_POSIX_BRACKET | \ - ONIG_SYN_OP_DECIMAL_BACKREF | \ - ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_ASTERISK_ZERO_INF | \ - ONIG_SYN_OP_LINE_ANCHOR | \ - ONIG_SYN_OP_ESC_CONTROL_CHARS ) - -#define SYN_GNU_REGEX_OP \ - ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | \ - ONIG_SYN_OP_POSIX_BRACKET | ONIG_SYN_OP_DECIMAL_BACKREF | \ - ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_LPAREN_SUBEXP | \ - ONIG_SYN_OP_VBAR_ALT | \ - ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF | \ - ONIG_SYN_OP_QMARK_ZERO_ONE | \ - ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR | ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR | \ - ONIG_SYN_OP_ESC_W_WORD | \ - ONIG_SYN_OP_ESC_B_WORD_BOUND | ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | \ - ONIG_SYN_OP_ESC_S_WHITE_SPACE | ONIG_SYN_OP_ESC_D_DIGIT | \ - ONIG_SYN_OP_LINE_ANCHOR ) - -#define SYN_GNU_REGEX_BV \ - ( ONIG_SYN_CONTEXT_INDEP_ANCHORS | ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | \ - ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | ONIG_SYN_ALLOW_INVALID_INTERVAL | \ - ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC ) - - -#define NCCLASS_FLAGS(cc) ((cc)->flags) -#define NCCLASS_FLAG_SET(cc,flag) (NCCLASS_FLAGS(cc) |= (flag)) -#define NCCLASS_FLAG_CLEAR(cc,flag) (NCCLASS_FLAGS(cc) &= ~(flag)) -#define IS_NCCLASS_FLAG_ON(cc,flag) ((NCCLASS_FLAGS(cc) & (flag)) != 0) - -/* cclass node */ -#define FLAG_NCCLASS_NOT (1<<0) -#define FLAG_NCCLASS_SHARE (1<<1) - -#define NCCLASS_SET_NOT(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_NOT) -#define NCCLASS_SET_SHARE(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_SHARE) -#define NCCLASS_CLEAR_NOT(nd) NCCLASS_FLAG_CLEAR(nd, FLAG_NCCLASS_NOT) -#define IS_NCCLASS_NOT(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_NOT) -#define IS_NCCLASS_SHARE(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_SHARE) - -typedef struct { - int type; - /* struct _Node* next; */ - /* unsigned int flags; */ -} NodeBase; - -typedef struct { - NodeBase base; - unsigned int flags; - BitSet bs; - BBuf* mbuf; /* multi-byte info or NULL */ -} CClassNode; - -typedef intptr_t OnigStackIndex; - -typedef struct _OnigStackType { - unsigned int type; - union { - struct { - UChar *pcode; /* byte code position */ - UChar *pstr; /* string position */ - UChar *pstr_prev; /* previous char position of pstr */ -#ifdef USE_COMBINATION_EXPLOSION_CHECK - unsigned int state_check; -#endif - } state; - struct { - int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */ - UChar *pcode; /* byte code position (head of repeated target) */ - int num; /* repeat id */ - } repeat; - struct { - OnigStackIndex si; /* index of stack */ - } repeat_inc; - struct { - int num; /* memory num */ - UChar *pstr; /* start/end position */ - /* Following information is setted, if this stack type is MEM-START */ - OnigStackIndex start; /* prev. info (for backtrack "(...)*" ) */ - OnigStackIndex end; /* prev. info (for backtrack "(...)*" ) */ - } mem; - struct { - int num; /* null check id */ - UChar *pstr; /* start position */ - } null_check; -#ifdef USE_SUBEXP_CALL - struct { - UChar *ret_addr; /* byte code position */ - int num; /* null check id */ - UChar *pstr; /* string position */ - } call_frame; -#endif - } u; -} OnigStackType; - -typedef struct { - void* stack_p; - size_t stack_n; - OnigOptionType options; - OnigRegion* region; - const UChar* start; /* search start position (for \G: BEGIN_POSITION) */ -#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE - int best_len; /* for ONIG_OPTION_FIND_LONGEST */ - UChar* best_s; -#endif -#ifdef USE_COMBINATION_EXPLOSION_CHECK - void* state_check_buff; - int state_check_buff_size; -#endif -} OnigMatchArg; - - -#define IS_CODE_SB_WORD(enc,code) \ - (ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_WORD(enc,code)) - -#ifdef ONIG_DEBUG - -typedef struct { - short int opcode; - char* name; - short int arg_type; -} OnigOpInfoType; - -extern OnigOpInfoType OnigOpInfo[]; - -/* extern void onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp, OnigEncoding enc); */ - -#ifdef ONIG_DEBUG_STATISTICS -extern void onig_statistics_init(void); -extern void onig_print_statistics(FILE* f); -#endif -#endif - -extern UChar* onig_error_code_to_format(int code); -extern void onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, ...); -extern int onig_bbuf_init(BBuf* buf, int size); -extern int onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo, const char *sourcefile, int sourceline); -extern void onig_chain_reduce(regex_t* reg); -extern void onig_chain_link_add(regex_t* to, regex_t* add); -extern void onig_transfer(regex_t* to, regex_t* from); -extern int onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc); -extern int onig_is_code_in_cc_len(int enclen, OnigCodePoint code, CClassNode* cc); - -/* strend hash */ -typedef void hash_table_type; -#ifdef RUBY -#include "st.h" - -typedef st_data_t hash_data_type; -#else -typedef unsigned long hash_data_type; -#endif - -extern hash_table_type* onig_st_init_strend_table_with_size(st_index_t size); -extern int onig_st_lookup_strend(hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type *value); -extern int onig_st_insert_strend(hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type value); - -/* encoding property management */ -#define PROPERTY_LIST_ADD_PROP(Name, CR) \ - r = onigenc_property_list_add_property((UChar* )Name, CR,\ - &PropertyNameTable, &PropertyList, &PropertyListNum,\ - &PropertyListSize);\ - if (r != 0) goto end - -#define PROPERTY_LIST_INIT_CHECK \ - if (PropertyInited == 0) {\ - int r = onigenc_property_list_init(init_property_list);\ - if (r != 0) return r;\ - } - -extern int onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop, hash_table_type **table, const OnigCodePoint*** plist, int *pnum, int *psize); - -typedef int (*ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE)(void); - -extern int onigenc_property_list_init(ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE); - -#endif /* ONIGURUMA_REGINT_H */ diff --git a/src/regparse.c b/src/regparse.c deleted file mode 100644 index 0ecb01018..000000000 --- a/src/regparse.c +++ /dev/null @@ -1,5600 +0,0 @@ -/* -*- mode:c; c-file-style:"gnu" -*- */ -/********************************************************************** - regparse.c - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "mruby.h" -#include <string.h> -#include "regparse.h" -#include <stdarg.h> -#ifdef ENABLE_REGEXP - -#define WARN_BUFSIZE 256 - -#define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS - - -const OnigSyntaxType OnigSyntaxRuby = { - (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | - ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | - ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS | - ONIG_SYN_OP_ESC_C_CONTROL ) - & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) - , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT | - ONIG_SYN_OP2_OPTION_RUBY | - ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF | - ONIG_SYN_OP2_ESC_G_SUBEXP_CALL | - ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY | - ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT | - ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT | - ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL | - ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB | - ONIG_SYN_OP2_ESC_H_XDIGIT ) - , ( SYN_GNU_REGEX_BV | - ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV | - ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND | - ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP | - ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME | - ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY | - ONIG_SYN_WARN_CC_OP_NOT_ESCAPED | - ONIG_SYN_WARN_CC_DUP | - ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT ) - , ONIG_OPTION_NONE - , - { - (OnigCodePoint )'\\' /* esc */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ - , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ - } -}; - -const OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_RUBY; - -extern void onig_null_warn(const char* s ARG_UNUSED) { } - -#ifdef DEFAULT_WARN_FUNCTION -static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION; -#else -static OnigWarnFunc onig_warn = onig_null_warn; -#endif - -#ifdef DEFAULT_VERB_WARN_FUNCTION -static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION; -#else -static OnigWarnFunc onig_verb_warn = onig_null_warn; -#endif - -extern void onig_set_warn_func(OnigWarnFunc f) -{ - onig_warn = f; -} - -extern void onig_set_verb_warn_func(OnigWarnFunc f) -{ - onig_verb_warn = f; -} - -static void CC_DUP_WARN(ScanEnv *env); - -static void -bbuf_free(BBuf* bbuf) -{ - if (IS_NOT_NULL(bbuf)) { - if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p); - xfree(bbuf); - } -} - -static int -bbuf_clone(BBuf** rto, BBuf* from) -{ - int r; - BBuf *to; - - *rto = to = (BBuf* )xmalloc(sizeof(BBuf)); - CHECK_NULL_RETURN_MEMERR(to); - r = BBUF_INIT(to, from->alloc); - if (r != 0) return r; - to->used = from->used; - xmemcpy(to->p, from->p, from->used); - return 0; -} - -#define BACKREF_REL_TO_ABS(rel_no, env) \ - ((env)->num_mem + 1 + (rel_no)) - -#define ONOFF(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f)) - -#define MBCODE_START_POS(enc) \ - (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80) - -#define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \ - add_code_range_to_buf(pbuf, env, MBCODE_START_POS(enc), ~((OnigCodePoint )0)) - -#define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\ - if (! ONIGENC_IS_SINGLEBYTE(enc)) {\ - r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\ - if (r) return r;\ - }\ -} while (0) - - -#define BITSET_SET_BIT_CHKDUP(bs, pos) do { \ - if (BITSET_AT(bs, pos)) CC_DUP_WARN(env); \ - BS_ROOM(bs, pos) |= BS_BIT(pos); \ -} while (0) - -#define BITSET_IS_EMPTY(bs,empty) do {\ - int i;\ - empty = 1;\ - for (i = 0; i < (int )BITSET_SIZE; i++) {\ - if ((bs)[i] != 0) {\ - empty = 0; break;\ - }\ - }\ -} while (0) - -static void -bitset_set_range(ScanEnv *env, BitSetRef bs, int from, int to) -{ - int i; - for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) { - BITSET_SET_BIT_CHKDUP(bs, i); - } -} - -static void -bitset_invert(BitSetRef bs) -{ - int i; - for (i = 0; i < (int )BITSET_SIZE; i++) { bs[i] = ~(bs[i]); } -} - -static void -bitset_invert_to(BitSetRef from, BitSetRef to) -{ - int i; - for (i = 0; i < (int )BITSET_SIZE; i++) { to[i] = ~(from[i]); } -} - -static void -bitset_and(BitSetRef dest, BitSetRef bs) -{ - int i; - for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] &= bs[i]; } -} - -static void -bitset_or(BitSetRef dest, BitSetRef bs) -{ - int i; - for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] |= bs[i]; } -} - -static void -bitset_copy(BitSetRef dest, BitSetRef bs) -{ - int i; - for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] = bs[i]; } -} - -extern int -onig_strncmp(const UChar* s1, const UChar* s2, int n) -{ - int x; - - while (n-- > 0) { - x = *s2++ - *s1++; - if (x) return x; - } - return 0; -} - -extern void -onig_strcpy(UChar* dest, const UChar* src, const UChar* end) -{ - ptrdiff_t len = end - src; - if (len > 0) { - xmemcpy(dest, src, len); - dest[len] = (UChar )0; - } -} - -#ifdef USE_NAMED_GROUP -static UChar* -strdup_with_null(OnigEncoding enc, UChar* s, UChar* end) -{ - ptrdiff_t slen; - int term_len, i; - UChar *r; - - slen = end - s; - term_len = ONIGENC_MBC_MINLEN(enc); - - r = (UChar* )xmalloc(slen + term_len); - CHECK_NULL_RETURN(r); - xmemcpy(r, s, slen); - - for (i = 0; i < term_len; i++) - r[slen + i] = (UChar )0; - - return r; -} -#endif - -/* scan pattern methods */ -#define PEND_VALUE 0 - -#define PFETCH_READY UChar* pfetch_prev -#define PEND (p < end ? 0 : 1) -#define PUNFETCH p = pfetch_prev -#define PINC do { \ - pfetch_prev = p; \ - p += enclen(enc, p, end); \ -} while (0) -#define PFETCH(c) do { \ - c = ((enc->max_enc_len == 1) ? *p : ONIGENC_MBC_TO_CODE(enc, p, end)); \ - pfetch_prev = p; \ - p += enclen(enc, p, end); \ -} while (0) - -#define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE) -#define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c) - -static UChar* -strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end, - int capa) -{ - UChar* r; - - if (dest) - r = (UChar* )xrealloc(dest, capa + 1); - else - r = (UChar* )xmalloc(capa + 1); - - CHECK_NULL_RETURN(r); - onig_strcpy(r + (dest_end - dest), src, src_end); - return r; -} - -/* dest on static area */ -static UChar* -strcat_capa_from_static(UChar* dest, UChar* dest_end, - const UChar* src, const UChar* src_end, int capa) -{ - UChar* r; - - r = (UChar* )xmalloc(capa + 1); - CHECK_NULL_RETURN(r); - onig_strcpy(r, dest, dest_end); - onig_strcpy(r + (dest_end - dest), src, src_end); - return r; -} -#endif //ENABLE_REGEXP - -#ifdef INCLUDE_ENCODING -#ifdef USE_ST_LIBRARY - -//#include "st.h" - -typedef struct { - const UChar* s; - const UChar* end; -} st_str_end_key; - -static int -str_end_cmp(st_data_t xp, st_data_t yp) -{ - const st_str_end_key *x, *y; - const UChar *p, *q; - int c; - - x = (const st_str_end_key*)xp; - y = (const st_str_end_key*)yp; - if ((x->end - x->s) != (y->end - y->s)) - return 1; - - p = x->s; - q = y->s; - while (p < x->end) { - c = (int )*p - (int )*q; - if (c != 0) return c; - - p++; q++; - } - - return 0; -} - -static st_index_t -str_end_hash(st_data_t xp) -{ - const st_str_end_key *x = (const st_str_end_key*)xp; - const UChar *p; - st_index_t val = 0; - - p = x->s; - while (p < x->end) { - val = val * 997 + (int )*p++; - } - - return val + (val >> 5); -} - -extern hash_table_type* -onig_st_init_strend_table_with_size(st_index_t size) -{ - static const struct st_hash_type hashType = { - str_end_cmp, - str_end_hash, - }; - - return (hash_table_type* ) - onig_st_init_table_with_size(&hashType, size); -} - -extern int -onig_st_lookup_strend(hash_table_type* table, const UChar* str_key, - const UChar* end_key, hash_data_type *value) -{ - st_str_end_key key; - - key.s = (UChar* )str_key; - key.end = (UChar* )end_key; - - return onig_st_lookup(table, (st_data_t )(&key), value); -} - -extern int -onig_st_insert_strend(hash_table_type* table, const UChar* str_key, - const UChar* end_key, hash_data_type value) -{ - st_str_end_key* key; - int result; - - key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key)); - key->s = (UChar* )str_key; - key->end = (UChar* )end_key; - result = onig_st_insert(table, (st_data_t )key, value); - if (result) { - xfree(key); - } - return result; -} - -#endif /* USE_ST_LIBRARY */ -#endif //INCLUDE_ENCODING - -#ifdef ENABLE_REGEXP -#ifdef USE_NAMED_GROUP - -#define INIT_NAME_BACKREFS_ALLOC_NUM 8 - -typedef struct { - UChar* name; - size_t name_len; /* byte length */ - int back_num; /* number of backrefs */ - int back_alloc; - int back_ref1; - int* back_refs; -} NameEntry; - -#ifdef USE_ST_LIBRARY - -typedef st_table NameTable; -typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */ - -#define NAMEBUF_SIZE 24 -#define NAMEBUF_SIZE_1 25 - -#ifdef ONIG_DEBUG -static enum st_retval -i_print_name_entry(UChar* key, NameEntry* e, void* arg) -{ - int i; - FILE* fp = (FILE* )arg; - - fprintf(fp, "%s: ", e->name); - if (e->back_num == 0) - fputs("-", fp); - else if (e->back_num == 1) - fprintf(fp, "%d", e->back_ref1); - else { - for (i = 0; i < e->back_num; i++) { - if (i > 0) fprintf(fp, ", "); - fprintf(fp, "%d", e->back_refs[i]); - } - } - fputs("\n", fp); - return ST_CONTINUE; -} - -extern int -onig_print_names(FILE* fp, regex_t* reg) -{ - NameTable* t = (NameTable* )reg->name_table; - - if (IS_NOT_NULL(t)) { - fprintf(fp, "name table\n"); - onig_st_foreach(t, i_print_name_entry, (HashDataType )fp); - fputs("\n", fp); - } - return 0; -} -#endif /* ONIG_DEBUG */ - -static enum st_retval -i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED) -{ - xfree(e->name); - if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs); - xfree(key); - xfree(e); - return ST_DELETE; -} - -static int -names_clear(regex_t* reg) -{ - NameTable* t = (NameTable* )reg->name_table; - - if (IS_NOT_NULL(t)) { - onig_st_foreach(t, i_free_name_entry, 0); - } - return 0; -} - -extern int -onig_names_free(regex_t* reg) -{ - int r; - NameTable* t; - - r = names_clear(reg); - if (r) return r; - - t = (NameTable* )reg->name_table; - if (IS_NOT_NULL(t)) onig_st_free_table(t); - reg->name_table = (void* )NULL; - return 0; -} - -static NameEntry* -name_find(regex_t* reg, const UChar* name, const UChar* name_end) -{ - NameEntry* e; - NameTable* t = (NameTable* )reg->name_table; - - e = (NameEntry* )NULL; - if (IS_NOT_NULL(t)) { - onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e))); - } - return e; -} - -typedef struct { - int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*); - regex_t* reg; - void* arg; - int ret; - OnigEncoding enc; -} INamesArg; - -static enum st_retval -i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg) -{ - int r = (*(arg->func))(e->name, - e->name + e->name_len, - e->back_num, - (e->back_num > 1 ? e->back_refs : &(e->back_ref1)), - arg->reg, arg->arg); - if (r != 0) { - arg->ret = r; - return ST_STOP; - } - return ST_CONTINUE; -} - -extern int -onig_foreach_name(regex_t* reg, - int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg) -{ - INamesArg narg; - NameTable* t = (NameTable* )reg->name_table; - - narg.ret = 0; - if (IS_NOT_NULL(t)) { - narg.func = func; - narg.reg = reg; - narg.arg = arg; - narg.enc = reg->enc; /* should be pattern encoding. */ - onig_st_foreach(t, i_names, (HashDataType )&narg); - } - return narg.ret; -} - -static enum st_retval -i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map) -{ - int i; - - if (e->back_num > 1) { - for (i = 0; i < e->back_num; i++) { - e->back_refs[i] = map[e->back_refs[i]].new_val; - } - } - else if (e->back_num == 1) { - e->back_ref1 = map[e->back_ref1].new_val; - } - - return ST_CONTINUE; -} - -extern int -onig_renumber_name_table(regex_t* reg, GroupNumRemap* map) -{ - NameTable* t = (NameTable* )reg->name_table; - - if (IS_NOT_NULL(t)) { - onig_st_foreach(t, i_renumber_name, (HashDataType )map); - } - return 0; -} - - -extern int -onig_number_of_names(regex_t* reg) -{ - NameTable* t = (NameTable* )reg->name_table; - - if (IS_NOT_NULL(t)) - return t->num_entries; - else - return 0; -} - -#else /* USE_ST_LIBRARY */ - -#define INIT_NAMES_ALLOC_NUM 8 - -typedef struct { - NameEntry* e; - int num; - int alloc; -} NameTable; - -#ifdef ONIG_DEBUG -extern int -onig_print_names(FILE* fp, regex_t* reg) -{ - int i, j; - NameEntry* e; - NameTable* t = (NameTable* )reg->name_table; - - if (IS_NOT_NULL(t) && t->num > 0) { - fprintf(fp, "name table\n"); - for (i = 0; i < t->num; i++) { - e = &(t->e[i]); - fprintf(fp, "%s: ", e->name); - if (e->back_num == 0) { - fputs("-", fp); - } - else if (e->back_num == 1) { - fprintf(fp, "%d", e->back_ref1); - } - else { - for (j = 0; j < e->back_num; j++) { - if (j > 0) fprintf(fp, ", "); - fprintf(fp, "%d", e->back_refs[j]); - } - } - fputs("\n", fp); - } - fputs("\n", fp); - } - return 0; -} -#endif - -static int -names_clear(regex_t* reg) -{ - int i; - NameEntry* e; - NameTable* t = (NameTable* )reg->name_table; - - if (IS_NOT_NULL(t)) { - for (i = 0; i < t->num; i++) { - e = &(t->e[i]); - if (IS_NOT_NULL(e->name)) { - xfree(e->name); - e->name = NULL; - e->name_len = 0; - e->back_num = 0; - e->back_alloc = 0; - if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs); - e->back_refs = (int* )NULL; - } - } - if (IS_NOT_NULL(t->e)) { - xfree(t->e); - t->e = NULL; - } - t->num = 0; - } - return 0; -} - -extern int -onig_names_free(regex_t* reg) -{ - int r; - NameTable* t; - - r = names_clear(reg); - if (r) return r; - - t = (NameTable* )reg->name_table; - if (IS_NOT_NULL(t)) xfree(t); - reg->name_table = NULL; - return 0; -} - -static NameEntry* -name_find(regex_t* reg, UChar* name, UChar* name_end) -{ - int i, len; - NameEntry* e; - NameTable* t = (NameTable* )reg->name_table; - - if (IS_NOT_NULL(t)) { - len = name_end - name; - for (i = 0; i < t->num; i++) { - e = &(t->e[i]); - if (len == e->name_len && onig_strncmp(name, e->name, len) == 0) - return e; - } - } - return (NameEntry* )NULL; -} - -extern int -onig_foreach_name(regex_t* reg, - int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg) -{ - int i, r; - NameEntry* e; - NameTable* t = (NameTable* )reg->name_table; - - if (IS_NOT_NULL(t)) { - for (i = 0; i < t->num; i++) { - e = &(t->e[i]); - r = (*func)(e->name, e->name + e->name_len, e->back_num, - (e->back_num > 1 ? e->back_refs : &(e->back_ref1)), - reg, arg); - if (r != 0) return r; - } - } - return 0; -} - -extern int -onig_number_of_names(regex_t* reg) -{ - NameTable* t = (NameTable* )reg->name_table; - - if (IS_NOT_NULL(t)) - return t->num; - else - return 0; -} - -#endif /* else USE_ST_LIBRARY */ - -static int -name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) -{ - int alloc; - NameEntry* e; - NameTable* t = (NameTable* )reg->name_table; - - if (name_end - name <= 0) - return ONIGERR_EMPTY_GROUP_NAME; - - e = name_find(reg, name, name_end); - if (IS_NULL(e)) { -#ifdef USE_ST_LIBRARY - if (IS_NULL(t)) { - t = onig_st_init_strend_table_with_size(5); - reg->name_table = (void* )t; - } - e = (NameEntry* )xmalloc(sizeof(NameEntry)); - CHECK_NULL_RETURN_MEMERR(e); - - e->name = strdup_with_null(reg->enc, name, name_end); - if (IS_NULL(e->name)) { - xfree(e); - return ONIGERR_MEMORY; - } - onig_st_insert_strend(t, e->name, (e->name + (name_end - name)), - (HashDataType )e); - - e->name_len = name_end - name; - e->back_num = 0; - e->back_alloc = 0; - e->back_refs = (int* )NULL; - -#else - - if (IS_NULL(t)) { - alloc = INIT_NAMES_ALLOC_NUM; - t = (NameTable* )xmalloc(sizeof(NameTable)); - CHECK_NULL_RETURN_MEMERR(t); - t->e = NULL; - t->alloc = 0; - t->num = 0; - - t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc); - if (IS_NULL(t->e)) { - xfree(t); - return ONIGERR_MEMORY; - } - t->alloc = alloc; - reg->name_table = t; - goto clear; - } - else if (t->num == t->alloc) { - int i; - - alloc = t->alloc * 2; - t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc); - CHECK_NULL_RETURN_MEMERR(t->e); - t->alloc = alloc; - - clear: - for (i = t->num; i < t->alloc; i++) { - t->e[i].name = NULL; - t->e[i].name_len = 0; - t->e[i].back_num = 0; - t->e[i].back_alloc = 0; - t->e[i].back_refs = (int* )NULL; - } - } - e = &(t->e[t->num]); - t->num++; - e->name = strdup_with_null(reg->enc, name, name_end); - if (IS_NULL(e->name)) return ONIGERR_MEMORY; - e->name_len = name_end - name; -#endif - } - - if (e->back_num >= 1 && - ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) { - onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME, - name, name_end); - return ONIGERR_MULTIPLEX_DEFINED_NAME; - } - - e->back_num++; - if (e->back_num == 1) { - e->back_ref1 = backref; - } - else { - if (e->back_num == 2) { - alloc = INIT_NAME_BACKREFS_ALLOC_NUM; - e->back_refs = (int* )xmalloc(sizeof(int) * alloc); - CHECK_NULL_RETURN_MEMERR(e->back_refs); - e->back_alloc = alloc; - e->back_refs[0] = e->back_ref1; - e->back_refs[1] = backref; - } - else { - if (e->back_num > e->back_alloc) { - alloc = e->back_alloc * 2; - e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc); - CHECK_NULL_RETURN_MEMERR(e->back_refs); - e->back_alloc = alloc; - } - e->back_refs[e->back_num - 1] = backref; - } - } - - return 0; -} - -extern int -onig_name_to_group_numbers(regex_t* reg, const UChar* name, - const UChar* name_end, int** nums) -{ - NameEntry* e = name_find(reg, name, name_end); - - if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE; - - switch (e->back_num) { - case 0: - *nums = 0; - break; - case 1: - *nums = &(e->back_ref1); - break; - default: - *nums = e->back_refs; - break; - } - return e->back_num; -} - -extern int -onig_name_to_backref_number(regex_t* reg, const UChar* name, - const UChar* name_end, OnigRegion *region) -{ - int i, n, *nums; - - n = onig_name_to_group_numbers(reg, name, name_end, &nums); - if (n < 0) - return n; - else if (n == 0) - return ONIGERR_PARSER_BUG; - else if (n == 1) - return nums[0]; - else { - if (IS_NOT_NULL(region)) { - for (i = n - 1; i >= 0; i--) { - if (region->beg[nums[i]] != ONIG_REGION_NOTPOS) - return nums[i]; - } - } - return nums[n - 1]; - } -} - -#else /* USE_NAMED_GROUP */ - -extern int -onig_name_to_group_numbers(regex_t* reg, const UChar* name, - const UChar* name_end, int** nums) -{ - return ONIG_NO_SUPPORT_CONFIG; -} - -extern int -onig_name_to_backref_number(regex_t* reg, const UChar* name, - const UChar* name_end, OnigRegion* region) -{ - return ONIG_NO_SUPPORT_CONFIG; -} - -extern int -onig_foreach_name(regex_t* reg, - int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg) -{ - return ONIG_NO_SUPPORT_CONFIG; -} - -extern int -onig_number_of_names(regex_t* reg) -{ - return 0; -} -#endif /* else USE_NAMED_GROUP */ - -extern int -onig_noname_group_capture_is_active(regex_t* reg) -{ - if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP)) - return 0; - -#ifdef USE_NAMED_GROUP - if (onig_number_of_names(reg) > 0 && - IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && - !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) { - return 0; - } -#endif - - return 1; -} - - -#define INIT_SCANENV_MEMNODES_ALLOC_SIZE 16 - -static void -scan_env_clear(ScanEnv* env) -{ - int i; - - BIT_STATUS_CLEAR(env->capture_history); - BIT_STATUS_CLEAR(env->bt_mem_start); - BIT_STATUS_CLEAR(env->bt_mem_end); - BIT_STATUS_CLEAR(env->backrefed_mem); - env->error = (UChar* )NULL; - env->error_end = (UChar* )NULL; - env->num_call = 0; - env->num_mem = 0; -#ifdef USE_NAMED_GROUP - env->num_named = 0; -#endif - env->mem_alloc = 0; - env->mem_nodes_dynamic = (Node** )NULL; - - for (i = 0; i < SCANENV_MEMNODES_SIZE; i++) - env->mem_nodes_static[i] = NULL_NODE; - -#ifdef USE_COMBINATION_EXPLOSION_CHECK - env->num_comb_exp_check = 0; - env->comb_exp_max_regnum = 0; - env->curr_max_regnum = 0; - env->has_recursion = 0; -#endif - env->warnings_flag = 0; -} - -static int -scan_env_add_mem_entry(ScanEnv* env) -{ - int i, need, alloc; - Node** p; - - need = env->num_mem + 1; - if (need >= SCANENV_MEMNODES_SIZE) { - if (env->mem_alloc <= need) { - if (IS_NULL(env->mem_nodes_dynamic)) { - alloc = INIT_SCANENV_MEMNODES_ALLOC_SIZE; - p = (Node** )xmalloc(sizeof(Node*) * alloc); - xmemcpy(p, env->mem_nodes_static, - sizeof(Node*) * SCANENV_MEMNODES_SIZE); - } - else { - alloc = env->mem_alloc * 2; - p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc); - } - CHECK_NULL_RETURN_MEMERR(p); - - for (i = env->num_mem + 1; i < alloc; i++) - p[i] = NULL_NODE; - - env->mem_nodes_dynamic = p; - env->mem_alloc = alloc; - } - } - - env->num_mem++; - return env->num_mem; -} - -static int -scan_env_set_mem_node(ScanEnv* env, int num, Node* node) -{ - if (env->num_mem >= num) - SCANENV_MEM_NODES(env)[num] = node; - else - return ONIGERR_PARSER_BUG; - return 0; -} - - -#ifdef USE_PARSE_TREE_NODE_RECYCLE -typedef struct _FreeNode { - struct _FreeNode* next; -} FreeNode; - -static FreeNode* FreeNodeList = (FreeNode* )NULL; -#endif - -extern void -onig_node_free(Node* node) -{ - start: - if (IS_NULL(node)) return ; - - switch (NTYPE(node)) { - case NT_STR: - if (NSTR(node)->capa != 0 && - IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) { - xfree(NSTR(node)->s); - } - break; - - case NT_LIST: - case NT_ALT: - onig_node_free(NCAR(node)); - { - Node* next_node = NCDR(node); - -#ifdef USE_PARSE_TREE_NODE_RECYCLE - { - FreeNode* n = (FreeNode* )node; - - THREAD_ATOMIC_START; - n->next = FreeNodeList; - FreeNodeList = n; - THREAD_ATOMIC_END; - } -#else - xfree(node); -#endif - node = next_node; - goto start; - } - break; - - case NT_CCLASS: - { - CClassNode* cc = NCCLASS(node); - - if (IS_NCCLASS_SHARE(cc)) return ; - if (cc->mbuf) - bbuf_free(cc->mbuf); - } - break; - - case NT_QTFR: - if (NQTFR(node)->target) - onig_node_free(NQTFR(node)->target); - break; - - case NT_ENCLOSE: - if (NENCLOSE(node)->target) - onig_node_free(NENCLOSE(node)->target); - break; - - case NT_BREF: - if (IS_NOT_NULL(NBREF(node)->back_dynamic)) - xfree(NBREF(node)->back_dynamic); - break; - - case NT_ANCHOR: - if (NANCHOR(node)->target) - onig_node_free(NANCHOR(node)->target); - break; - } - -#ifdef USE_PARSE_TREE_NODE_RECYCLE - { - FreeNode* n = (FreeNode* )node; - - THREAD_ATOMIC_START; - n->next = FreeNodeList; - FreeNodeList = n; - THREAD_ATOMIC_END; - } -#else - xfree(node); -#endif -} - -#ifdef USE_PARSE_TREE_NODE_RECYCLE -extern int -onig_free_node_list(void) -{ - FreeNode* n; - - /* THREAD_ATOMIC_START; */ - while (IS_NOT_NULL(FreeNodeList)) { - n = FreeNodeList; - FreeNodeList = FreeNodeList->next; - xfree(n); - } - /* THREAD_ATOMIC_END; */ - return 0; -} -#endif - -static Node* -node_new(void) -{ - Node* node; - -#ifdef USE_PARSE_TREE_NODE_RECYCLE - THREAD_ATOMIC_START; - if (IS_NOT_NULL(FreeNodeList)) { - node = (Node* )FreeNodeList; - FreeNodeList = FreeNodeList->next; - THREAD_ATOMIC_END; - return node; - } - THREAD_ATOMIC_END; -#endif - - node = (Node* )xmalloc(sizeof(Node)); - /* xmemset(node, 0, sizeof(Node)); */ - return node; -} - - -static void -initialize_cclass(CClassNode* cc) -{ - BITSET_CLEAR(cc->bs); - /* cc->base.flags = 0; */ - cc->flags = 0; - cc->mbuf = NULL; -} - -static Node* -node_new_cclass(void) -{ - Node* node = node_new(); - CHECK_NULL_RETURN(node); - - SET_NTYPE(node, NT_CCLASS); - initialize_cclass(NCCLASS(node)); - return node; -} - -static Node* -node_new_cclass_by_codepoint_range(int is_not, OnigCodePoint sb_out, - const OnigCodePoint ranges[]) -{ - int n, i; - CClassNode* cc; - OnigCodePoint j; - - Node* node = node_new_cclass(); - CHECK_NULL_RETURN(node); - - cc = NCCLASS(node); - if (is_not != 0) NCCLASS_SET_NOT(cc); - - BITSET_CLEAR(cc->bs); - if (sb_out > 0 && IS_NOT_NULL(ranges)) { - n = ONIGENC_CODE_RANGE_NUM(ranges); - for (i = 0; i < n; i++) { - for (j = ONIGENC_CODE_RANGE_FROM(ranges, i); - j <= (OnigCodePoint )ONIGENC_CODE_RANGE_TO(ranges, i); j++) { - if (j >= sb_out) goto sb_end; - - BITSET_SET_BIT(cc->bs, j); - } - } - } - - sb_end: - if (IS_NULL(ranges)) { - is_null: - cc->mbuf = NULL; - } - else { - BBuf* bbuf; - - n = ONIGENC_CODE_RANGE_NUM(ranges); - if (n == 0) goto is_null; - - bbuf = (BBuf* )xmalloc(sizeof(BBuf)); - CHECK_NULL_RETURN(bbuf); - bbuf->alloc = n + 1; - bbuf->used = n + 1; - bbuf->p = (UChar* )((void* )ranges); - - cc->mbuf = bbuf; - } - - return node; -} - -static Node* -node_new_ctype(int type, int is_not) -{ - Node* node = node_new(); - CHECK_NULL_RETURN(node); - - SET_NTYPE(node, NT_CTYPE); - NCTYPE(node)->ctype = type; - NCTYPE(node)->is_not = is_not; - return node; -} - -static Node* -node_new_anychar(void) -{ - Node* node = node_new(); - CHECK_NULL_RETURN(node); - - SET_NTYPE(node, NT_CANY); - return node; -} - -static Node* -node_new_list(Node* left, Node* right) -{ - Node* node = node_new(); - CHECK_NULL_RETURN(node); - - SET_NTYPE(node, NT_LIST); - NCAR(node) = left; - NCDR(node) = right; - return node; -} - -extern Node* -onig_node_new_list(Node* left, Node* right) -{ - return node_new_list(left, right); -} - -extern Node* -onig_node_list_add(Node* list, Node* x) -{ - Node *n; - - n = onig_node_new_list(x, NULL); - if (IS_NULL(n)) return NULL_NODE; - - if (IS_NOT_NULL(list)) { - while (IS_NOT_NULL(NCDR(list))) - list = NCDR(list); - - NCDR(list) = n; - } - - return n; -} - -extern Node* -onig_node_new_alt(Node* left, Node* right) -{ - Node* node = node_new(); - CHECK_NULL_RETURN(node); - - SET_NTYPE(node, NT_ALT); - NCAR(node) = left; - NCDR(node) = right; - return node; -} - -extern Node* -onig_node_new_anchor(int type) -{ - Node* node = node_new(); - CHECK_NULL_RETURN(node); - - SET_NTYPE(node, NT_ANCHOR); - NANCHOR(node)->type = type; - NANCHOR(node)->target = NULL; - NANCHOR(node)->char_len = -1; - return node; -} - -static Node* -node_new_backref(int back_num, int* backrefs, int by_name, -#ifdef USE_BACKREF_WITH_LEVEL - int exist_level, int nest_level, -#endif - ScanEnv* env) -{ - int i; - Node* node = node_new(); - - CHECK_NULL_RETURN(node); - - SET_NTYPE(node, NT_BREF); - NBREF(node)->state = 0; - NBREF(node)->back_num = back_num; - NBREF(node)->back_dynamic = (int* )NULL; - if (by_name != 0) - NBREF(node)->state |= NST_NAME_REF; - -#ifdef USE_BACKREF_WITH_LEVEL - if (exist_level != 0) { - NBREF(node)->state |= NST_NEST_LEVEL; - NBREF(node)->nest_level = nest_level; - } -#endif - - for (i = 0; i < back_num; i++) { - if (backrefs[i] <= env->num_mem && - IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) { - NBREF(node)->state |= NST_RECURSION; /* /...(\1).../ */ - break; - } - } - - if (back_num <= NODE_BACKREFS_SIZE) { - for (i = 0; i < back_num; i++) - NBREF(node)->back_static[i] = backrefs[i]; - } - else { - int* p = (int* )xmalloc(sizeof(int) * back_num); - if (IS_NULL(p)) { - onig_node_free(node); - return NULL; - } - NBREF(node)->back_dynamic = p; - for (i = 0; i < back_num; i++) - p[i] = backrefs[i]; - } - return node; -} - -#ifdef USE_SUBEXP_CALL -static Node* -node_new_call(UChar* name, UChar* name_end, int gnum) -{ - Node* node = node_new(); - CHECK_NULL_RETURN(node); - - SET_NTYPE(node, NT_CALL); - NCALL(node)->state = 0; - NCALL(node)->target = NULL_NODE; - NCALL(node)->name = name; - NCALL(node)->name_end = name_end; - NCALL(node)->group_num = gnum; /* call by number if gnum != 0 */ - return node; -} -#endif - -static Node* -node_new_quantifier(int lower, int upper, int by_number) -{ - Node* node = node_new(); - CHECK_NULL_RETURN(node); - - SET_NTYPE(node, NT_QTFR); - NQTFR(node)->state = 0; - NQTFR(node)->target = NULL; - NQTFR(node)->lower = lower; - NQTFR(node)->upper = upper; - NQTFR(node)->greedy = 1; - NQTFR(node)->target_empty_info = NQ_TARGET_ISNOT_EMPTY; - NQTFR(node)->head_exact = NULL_NODE; - NQTFR(node)->next_head_exact = NULL_NODE; - NQTFR(node)->is_refered = 0; - if (by_number != 0) - NQTFR(node)->state |= NST_BY_NUMBER; - -#ifdef USE_COMBINATION_EXPLOSION_CHECK - NQTFR(node)->comb_exp_check_num = 0; -#endif - - return node; -} - -static Node* -node_new_enclose(int type) -{ - Node* node = node_new(); - CHECK_NULL_RETURN(node); - - SET_NTYPE(node, NT_ENCLOSE); - NENCLOSE(node)->type = type; - NENCLOSE(node)->state = 0; - NENCLOSE(node)->regnum = 0; - NENCLOSE(node)->option = 0; - NENCLOSE(node)->target = NULL; - NENCLOSE(node)->call_addr = -1; - NENCLOSE(node)->opt_count = 0; - return node; -} - -extern Node* -onig_node_new_enclose(int type) -{ - return node_new_enclose(type); -} - -static Node* -node_new_enclose_memory(OnigOptionType option, int is_named) -{ - Node* node = node_new_enclose(ENCLOSE_MEMORY); - CHECK_NULL_RETURN(node); - if (is_named != 0) - SET_ENCLOSE_STATUS(node, NST_NAMED_GROUP); - -#ifdef USE_SUBEXP_CALL - NENCLOSE(node)->option = option; -#endif - return node; -} - -static Node* -node_new_option(OnigOptionType option) -{ - Node* node = node_new_enclose(ENCLOSE_OPTION); - CHECK_NULL_RETURN(node); - NENCLOSE(node)->option = option; - return node; -} - -extern int -onig_node_str_cat(Node* node, const UChar* s, const UChar* end) -{ - ptrdiff_t addlen = end - s; - - if (addlen > 0) { - ptrdiff_t len = NSTR(node)->end - NSTR(node)->s; - - if (NSTR(node)->capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) { - UChar* p; - ptrdiff_t capa = len + addlen + NODE_STR_MARGIN; - - if (capa <= NSTR(node)->capa) { - onig_strcpy(NSTR(node)->s + len, s, end); - } - else { - if (NSTR(node)->s == NSTR(node)->buf) - p = strcat_capa_from_static(NSTR(node)->s, NSTR(node)->end, - s, end, capa); - else - p = strcat_capa(NSTR(node)->s, NSTR(node)->end, s, end, capa); - - CHECK_NULL_RETURN_MEMERR(p); - NSTR(node)->s = p; - NSTR(node)->capa = capa; - } - } - else { - onig_strcpy(NSTR(node)->s + len, s, end); - } - NSTR(node)->end = NSTR(node)->s + len + addlen; - } - - return 0; -} - -extern int -onig_node_str_set(Node* node, const UChar* s, const UChar* end) -{ - onig_node_str_clear(node); - return onig_node_str_cat(node, s, end); -} - -static int -node_str_cat_char(Node* node, UChar c) -{ - UChar s[1]; - - s[0] = c; - return onig_node_str_cat(node, s, s + 1); -} - -extern void -onig_node_conv_to_str_node(Node* node, int flag) -{ - SET_NTYPE(node, NT_STR); - NSTR(node)->flag = flag; - NSTR(node)->capa = 0; - NSTR(node)->s = NSTR(node)->buf; - NSTR(node)->end = NSTR(node)->buf; -} - -extern void -onig_node_str_clear(Node* node) -{ - if (NSTR(node)->capa != 0 && - IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) { - xfree(NSTR(node)->s); - } - - NSTR(node)->capa = 0; - NSTR(node)->flag = 0; - NSTR(node)->s = NSTR(node)->buf; - NSTR(node)->end = NSTR(node)->buf; -} - -static Node* -node_new_str(const UChar* s, const UChar* end) -{ - Node* node = node_new(); - CHECK_NULL_RETURN(node); - - SET_NTYPE(node, NT_STR); - NSTR(node)->capa = 0; - NSTR(node)->flag = 0; - NSTR(node)->s = NSTR(node)->buf; - NSTR(node)->end = NSTR(node)->buf; - if (onig_node_str_cat(node, s, end)) { - onig_node_free(node); - return NULL; - } - return node; -} - -extern Node* -onig_node_new_str(const UChar* s, const UChar* end) -{ - return node_new_str(s, end); -} - -static Node* -node_new_str_raw(UChar* s, UChar* end) -{ - Node* node = node_new_str(s, end); - NSTRING_SET_RAW(node); - return node; -} - -static Node* -node_new_empty(void) -{ - return node_new_str(NULL, NULL); -} - -static Node* -node_new_str_raw_char(UChar c) -{ - UChar p[1]; - - p[0] = c; - return node_new_str_raw(p, p + 1); -} - -static Node* -str_node_split_last_char(StrNode* sn, OnigEncoding enc) -{ - const UChar *p; - Node* n = NULL_NODE; - - if (sn->end > sn->s) { - p = onigenc_get_prev_char_head(enc, sn->s, sn->end, sn->end); - if (p && p > sn->s) { /* can be splitted. */ - n = node_new_str(p, sn->end); - if ((sn->flag & NSTR_RAW) != 0) - NSTRING_SET_RAW(n); - sn->end = (UChar* )p; - } - } - return n; -} - -static int -str_node_can_be_split(StrNode* sn, OnigEncoding enc) -{ - if (sn->end > sn->s) { - return ((enclen(enc, sn->s, sn->end) < sn->end - sn->s) ? 1 : 0); - } - return 0; -} - -extern int -onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc) -{ - unsigned int num, val; - OnigCodePoint c; - UChar* p = *src; - PFETCH_READY; - - num = 0; - while (!PEND) { - PFETCH(c); - if (ONIGENC_IS_CODE_DIGIT(enc, c)) { - val = (unsigned int )DIGITVAL(c); - if ((INT_MAX_LIMIT - val) / 10UL < num) - return -1; /* overflow */ - - num = num * 10 + val; - } - else { - PUNFETCH; - break; - } - } - *src = p; - return num; -} - -static int -scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int maxlen, - OnigEncoding enc) -{ - OnigCodePoint c; - unsigned int num, val; - UChar* p = *src; - PFETCH_READY; - - num = 0; - while (!PEND && maxlen-- != 0) { - PFETCH(c); - if (ONIGENC_IS_CODE_XDIGIT(enc, c)) { - val = (unsigned int )XDIGITVAL(enc,c); - if ((INT_MAX_LIMIT - val) / 16UL < num) - return -1; /* overflow */ - - num = (num << 4) + XDIGITVAL(enc,c); - } - else { - PUNFETCH; - break; - } - } - *src = p; - return num; -} - -static int -scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen, - OnigEncoding enc) -{ - OnigCodePoint c; - unsigned int num, val; - UChar* p = *src; - PFETCH_READY; - - num = 0; - while (!PEND && maxlen-- != 0) { - PFETCH(c); - if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8') { - val = ODIGITVAL(c); - if ((INT_MAX_LIMIT - val) / 8UL < num) - return -1; /* overflow */ - - num = (num << 3) + val; - } - else { - PUNFETCH; - break; - } - } - *src = p; - return num; -} - - -#define BBUF_WRITE_CODE_POINT(bbuf,pos,code) \ - BBUF_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT) - -/* data format: - [n][from-1][to-1][from-2][to-2] ... [from-n][to-n] - (all data size is OnigCodePoint) - */ -static int -new_code_range(BBuf** pbuf) -{ -#define INIT_MULTI_BYTE_RANGE_SIZE (SIZE_CODE_POINT * 5) - int r; - OnigCodePoint n; - BBuf* bbuf; - - bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf)); - CHECK_NULL_RETURN_MEMERR(*pbuf); - r = BBUF_INIT(*pbuf, INIT_MULTI_BYTE_RANGE_SIZE); - if (r) return r; - - n = 0; - BBUF_WRITE_CODE_POINT(bbuf, 0, n); - return 0; -} - -static int -add_code_range_to_buf0(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to, - int checkdup) -{ - int r, inc_n, pos; - int low, high, bound, x; - OnigCodePoint n, *data; - BBuf* bbuf; - - if (from > to) { - n = from; from = to; to = n; - } - - if (IS_NULL(*pbuf)) { - r = new_code_range(pbuf); - if (r) return r; - bbuf = *pbuf; - n = 0; - } - else { - bbuf = *pbuf; - GET_CODE_POINT(n, bbuf->p); - } - data = (OnigCodePoint* )(bbuf->p); - data++; - - for (low = 0, bound = n; low < bound; ) { - x = (low + bound) >> 1; - if (from > data[x*2 + 1]) - low = x + 1; - else - bound = x; - } - - for (high = low, bound = n; high < bound; ) { - x = (high + bound) >> 1; - if (to >= data[x*2] - 1) - high = x + 1; - else - bound = x; - } - - inc_n = low + 1 - high; - if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM) - return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES; - - if (inc_n != 1) { - if (checkdup && to >= data[low*2]) CC_DUP_WARN(env); - if (from > data[low*2]) - from = data[low*2]; - if (to < data[(high - 1)*2 + 1]) - to = data[(high - 1)*2 + 1]; - } - - if (inc_n != 0 && (OnigCodePoint )high < n) { - int from_pos = SIZE_CODE_POINT * (1 + high * 2); - int to_pos = SIZE_CODE_POINT * (1 + (low + 1) * 2); - int size = (n - high) * 2 * SIZE_CODE_POINT; - - if (inc_n > 0) { - BBUF_MOVE_RIGHT(bbuf, from_pos, to_pos, size); - } - else { - BBUF_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos); - } - } - - pos = SIZE_CODE_POINT * (1 + low * 2); - BBUF_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2); - BBUF_WRITE_CODE_POINT(bbuf, pos, from); - BBUF_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to); - n += inc_n; - BBUF_WRITE_CODE_POINT(bbuf, 0, n); - - return 0; -} - -static int -add_code_range_to_buf(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to) -{ - return add_code_range_to_buf0(pbuf, env, from, to, 1); -} - -static int -add_code_range0(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to, int checkdup) -{ - if (from > to) { - if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) - return 0; - else - return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; - } - - return add_code_range_to_buf0(pbuf, env, from, to, checkdup); -} - -static int -add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to) -{ - return add_code_range0(pbuf, env, from, to, 1); -} - -static int -not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf, ScanEnv* env) -{ - int r, i, n; - OnigCodePoint pre, from, *data, to = 0; - - *pbuf = (BBuf* )NULL; - if (IS_NULL(bbuf)) { - set_all: - return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf); - } - - data = (OnigCodePoint* )(bbuf->p); - GET_CODE_POINT(n, data); - data++; - if (n <= 0) goto set_all; - - r = 0; - pre = MBCODE_START_POS(enc); - for (i = 0; i < n; i++) { - from = data[i*2]; - to = data[i*2+1]; - if (pre <= from - 1) { - r = add_code_range_to_buf(pbuf, env, pre, from - 1); - if (r != 0) return r; - } - if (to == ~((OnigCodePoint )0)) break; - pre = to + 1; - } - if (to < ~((OnigCodePoint )0)) { - r = add_code_range_to_buf(pbuf, env, to + 1, ~((OnigCodePoint )0)); - } - return r; -} - -#define SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2) do {\ - BBuf *tbuf; \ - int tnot; \ - tnot = not1; not1 = not2; not2 = tnot; \ - tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \ -} while (0) - -static int -or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1, - BBuf* bbuf2, int not2, BBuf** pbuf, ScanEnv* env) -{ - int r; - OnigCodePoint i, n1, *data1; - OnigCodePoint from, to; - - *pbuf = (BBuf* )NULL; - if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) { - if (not1 != 0 || not2 != 0) - return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf); - return 0; - } - - r = 0; - if (IS_NULL(bbuf2)) - SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2); - - if (IS_NULL(bbuf1)) { - if (not1 != 0) { - return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf); - } - else { - if (not2 == 0) { - return bbuf_clone(pbuf, bbuf2); - } - else { - return not_code_range_buf(enc, bbuf2, pbuf, env); - } - } - } - - if (not1 != 0) - SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2); - - data1 = (OnigCodePoint* )(bbuf1->p); - GET_CODE_POINT(n1, data1); - data1++; - - if (not2 == 0 && not1 == 0) { /* 1 OR 2 */ - r = bbuf_clone(pbuf, bbuf2); - } - else if (not1 == 0) { /* 1 OR (not 2) */ - r = not_code_range_buf(enc, bbuf2, pbuf, env); - } - if (r != 0) return r; - - for (i = 0; i < n1; i++) { - from = data1[i*2]; - to = data1[i*2+1]; - r = add_code_range_to_buf(pbuf, env, from, to); - if (r != 0) return r; - } - return 0; -} - -static int -and_code_range1(BBuf** pbuf, ScanEnv* env, OnigCodePoint from1, OnigCodePoint to1, - OnigCodePoint* data, int n) -{ - int i, r; - OnigCodePoint from2, to2; - - for (i = 0; i < n; i++) { - from2 = data[i*2]; - to2 = data[i*2+1]; - if (from2 < from1) { - if (to2 < from1) continue; - else { - from1 = to2 + 1; - } - } - else if (from2 <= to1) { - if (to2 < to1) { - if (from1 <= from2 - 1) { - r = add_code_range_to_buf(pbuf, env, from1, from2-1); - if (r != 0) return r; - } - from1 = to2 + 1; - } - else { - to1 = from2 - 1; - } - } - else { - from1 = from2; - } - if (from1 > to1) break; - } - if (from1 <= to1) { - r = add_code_range_to_buf(pbuf, env, from1, to1); - if (r != 0) return r; - } - return 0; -} - -static int -and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf, ScanEnv* env) -{ - int r; - OnigCodePoint i, j, n1, n2, *data1, *data2; - OnigCodePoint from, to, from1, to1, from2, to2; - - *pbuf = (BBuf* )NULL; - if (IS_NULL(bbuf1)) { - if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */ - return bbuf_clone(pbuf, bbuf2); - return 0; - } - else if (IS_NULL(bbuf2)) { - if (not2 != 0) - return bbuf_clone(pbuf, bbuf1); - return 0; - } - - if (not1 != 0) - SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2); - - data1 = (OnigCodePoint* )(bbuf1->p); - data2 = (OnigCodePoint* )(bbuf2->p); - GET_CODE_POINT(n1, data1); - GET_CODE_POINT(n2, data2); - data1++; - data2++; - - if (not2 == 0 && not1 == 0) { /* 1 AND 2 */ - for (i = 0; i < n1; i++) { - from1 = data1[i*2]; - to1 = data1[i*2+1]; - for (j = 0; j < n2; j++) { - from2 = data2[j*2]; - to2 = data2[j*2+1]; - if (from2 > to1) break; - if (to2 < from1) continue; - from = MAX(from1, from2); - to = MIN(to1, to2); - r = add_code_range_to_buf(pbuf, env, from, to); - if (r != 0) return r; - } - } - } - else if (not1 == 0) { /* 1 AND (not 2) */ - for (i = 0; i < n1; i++) { - from1 = data1[i*2]; - to1 = data1[i*2+1]; - r = and_code_range1(pbuf, env, from1, to1, data2, n2); - if (r != 0) return r; - } - } - - return 0; -} - -static int -and_cclass(CClassNode* dest, CClassNode* cc, ScanEnv* env) -{ - OnigEncoding enc = env->enc; - int r, not1, not2; - BBuf *buf1, *buf2, *pbuf = 0; - BitSetRef bsr1, bsr2; - BitSet bs1, bs2; - - not1 = IS_NCCLASS_NOT(dest); - bsr1 = dest->bs; - buf1 = dest->mbuf; - not2 = IS_NCCLASS_NOT(cc); - bsr2 = cc->bs; - buf2 = cc->mbuf; - - if (not1 != 0) { - bitset_invert_to(bsr1, bs1); - bsr1 = bs1; - } - if (not2 != 0) { - bitset_invert_to(bsr2, bs2); - bsr2 = bs2; - } - bitset_and(bsr1, bsr2); - if (bsr1 != dest->bs) { - bitset_copy(dest->bs, bsr1); - bsr1 = dest->bs; - } - if (not1 != 0) { - bitset_invert(dest->bs); - } - - if (! ONIGENC_IS_SINGLEBYTE(enc)) { - if (not1 != 0 && not2 != 0) { - r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf, env); - } - else { - r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf, env); - if (r == 0 && not1 != 0) { - BBuf *tbuf = 0; - r = not_code_range_buf(enc, pbuf, &tbuf, env); - bbuf_free(pbuf); - pbuf = tbuf; - } - } - if (r != 0) { - bbuf_free(pbuf); - return r; - } - - dest->mbuf = pbuf; - bbuf_free(buf1); - return r; - } - return 0; -} - -static int -or_cclass(CClassNode* dest, CClassNode* cc, ScanEnv* env) -{ - OnigEncoding enc = env->enc; - int r, not1, not2; - BBuf *buf1, *buf2, *pbuf = 0; - BitSetRef bsr1, bsr2; - BitSet bs1, bs2; - - not1 = IS_NCCLASS_NOT(dest); - bsr1 = dest->bs; - buf1 = dest->mbuf; - not2 = IS_NCCLASS_NOT(cc); - bsr2 = cc->bs; - buf2 = cc->mbuf; - - if (not1 != 0) { - bitset_invert_to(bsr1, bs1); - bsr1 = bs1; - } - if (not2 != 0) { - bitset_invert_to(bsr2, bs2); - bsr2 = bs2; - } - bitset_or(bsr1, bsr2); - if (bsr1 != dest->bs) { - bitset_copy(dest->bs, bsr1); - bsr1 = dest->bs; - } - if (not1 != 0) { - bitset_invert(dest->bs); - } - - if (! ONIGENC_IS_SINGLEBYTE(enc)) { - if (not1 != 0 && not2 != 0) { - r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf, env); - } - else { - r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf, env); - if (r == 0 && not1 != 0) { - BBuf *tbuf = 0; - r = not_code_range_buf(enc, pbuf, &tbuf, env); - bbuf_free(pbuf); - pbuf = tbuf; - } - } - if (r != 0) { - bbuf_free(pbuf); - return r; - } - - dest->mbuf = pbuf; - bbuf_free(buf1); - return r; - } - else - return 0; -} - -static void UNKNOWN_ESC_WARN(ScanEnv *env, int c); - -static int -conv_backslash_value(int c, ScanEnv* env) -{ - if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) { - switch (c) { - case 'n': return '\n'; - case 't': return '\t'; - case 'r': return '\r'; - case 'f': return '\f'; - case 'a': return '\007'; - case 'b': return '\010'; - case 'e': return '\033'; - case 'v': - if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB)) - return '\v'; - break; - - default: - if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z')) - UNKNOWN_ESC_WARN(env, c); - break; - } - } - return c; -} - -#define is_invalid_quantifier_target(node) 0 - -/* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */ -static int -popular_quantifier_num(QtfrNode* q) -{ - if (q->greedy) { - if (q->lower == 0) { - if (q->upper == 1) return 0; - else if (IS_REPEAT_INFINITE(q->upper)) return 1; - } - else if (q->lower == 1) { - if (IS_REPEAT_INFINITE(q->upper)) return 2; - } - } - else { - if (q->lower == 0) { - if (q->upper == 1) return 3; - else if (IS_REPEAT_INFINITE(q->upper)) return 4; - } - else if (q->lower == 1) { - if (IS_REPEAT_INFINITE(q->upper)) return 5; - } - } - return -1; -} - - -enum ReduceType { - RQ_ASIS = 0, /* as is */ - RQ_DEL = 1, /* delete parent */ - RQ_A, /* to '*' */ - RQ_AQ, /* to '*?' */ - RQ_QQ, /* to '??' */ - RQ_P_QQ, /* to '+)??' */ - RQ_PQ_Q /* to '+?)?' */ -}; - -static enum ReduceType const ReduceTypeTable[6][6] = { - {RQ_DEL, RQ_A, RQ_A, RQ_QQ, RQ_AQ, RQ_ASIS}, /* '?' */ - {RQ_DEL, RQ_DEL, RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL}, /* '*' */ - {RQ_A, RQ_A, RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL}, /* '+' */ - {RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL, RQ_AQ, RQ_AQ}, /* '??' */ - {RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL}, /* '*?' */ - {RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL} /* '+?' */ -}; - -extern void -onig_reduce_nested_quantifier(Node* pnode, Node* cnode) -{ - int pnum, cnum; - QtfrNode *p, *c; - - p = NQTFR(pnode); - c = NQTFR(cnode); - pnum = popular_quantifier_num(p); - cnum = popular_quantifier_num(c); - if (pnum < 0 || cnum < 0) return ; - - switch(ReduceTypeTable[cnum][pnum]) { - case RQ_DEL: - *pnode = *cnode; - break; - case RQ_A: - p->target = c->target; - p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 1; - break; - case RQ_AQ: - p->target = c->target; - p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 0; - break; - case RQ_QQ: - p->target = c->target; - p->lower = 0; p->upper = 1; p->greedy = 0; - break; - case RQ_P_QQ: - p->target = cnode; - p->lower = 0; p->upper = 1; p->greedy = 0; - c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 1; - return ; - break; - case RQ_PQ_Q: - p->target = cnode; - p->lower = 0; p->upper = 1; p->greedy = 1; - c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 0; - return ; - break; - case RQ_ASIS: - p->target = cnode; - return ; - break; - } - - c->target = NULL_NODE; - onig_node_free(cnode); -} - - -enum TokenSyms { - TK_EOT = 0, /* end of token */ - TK_RAW_BYTE = 1, - TK_CHAR, - TK_STRING, - TK_CODE_POINT, - TK_ANYCHAR, - TK_CHAR_TYPE, - TK_BACKREF, - TK_CALL, - TK_ANCHOR, - TK_OP_REPEAT, - TK_INTERVAL, - TK_ANYCHAR_ANYTIME, /* SQL '%' == .* */ - TK_ALT, - TK_SUBEXP_OPEN, - TK_SUBEXP_CLOSE, - TK_CC_OPEN, - TK_QUOTE_OPEN, - TK_CHAR_PROPERTY, /* \p{...}, \P{...} */ - /* in cc */ - TK_CC_CLOSE, - TK_CC_RANGE, - TK_POSIX_BRACKET_OPEN, - TK_CC_AND, /* && */ - TK_CC_CC_OPEN /* [ */ -}; - -typedef struct { - enum TokenSyms type; - int escaped; - int base; /* is number: 8, 16 (used in [....]) */ - UChar* backp; - union { - UChar* s; - int c; - OnigCodePoint code; - int anchor; - int subtype; - struct { - int lower; - int upper; - int greedy; - int possessive; - } repeat; - struct { - int num; - int ref1; - int* refs; - int by_name; -#ifdef USE_BACKREF_WITH_LEVEL - int exist_level; - int level; /* \k<name+n> */ -#endif - } backref; - struct { - UChar* name; - UChar* name_end; - int gnum; - } call; - struct { - int ctype; - int is_not; - } prop; - } u; -} OnigToken; - - -static int -fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env) -{ - int low, up, syn_allow, non_low = 0; - int r = 0; - OnigCodePoint c; - OnigEncoding enc = env->enc; - UChar* p = *src; - PFETCH_READY; - - syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL); - - if (PEND) { - if (syn_allow) - return 1; /* "....{" : OK! */ - else - return ONIGERR_END_PATTERN_AT_LEFT_BRACE; /* "....{" syntax error */ - } - - if (! syn_allow) { - c = PPEEK; - if (c == ')' || c == '(' || c == '|') { - return ONIGERR_END_PATTERN_AT_LEFT_BRACE; - } - } - - low = onig_scan_unsigned_number(&p, end, env->enc); - if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; - if (low > ONIG_MAX_REPEAT_NUM) - return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; - - if (p == *src) { /* can't read low */ - if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) { - /* allow {,n} as {0,n} */ - low = 0; - non_low = 1; - } - else - goto invalid; - } - - if (PEND) goto invalid; - PFETCH(c); - if (c == ',') { - UChar* prev = p; - up = onig_scan_unsigned_number(&p, end, env->enc); - if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; - if (up > ONIG_MAX_REPEAT_NUM) - return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE; - - if (p == prev) { - if (non_low != 0) - goto invalid; - up = REPEAT_INFINITE; /* {n,} : {n,infinite} */ - } - } - else { - if (non_low != 0) - goto invalid; - - PUNFETCH; - up = low; /* {n} : exact n times */ - r = 2; /* fixed */ - } - - if (PEND) goto invalid; - PFETCH(c); - if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) { - if (c != MC_ESC(env->syntax)) goto invalid; - PFETCH(c); - } - if (c != '}') goto invalid; - - if (!IS_REPEAT_INFINITE(up) && low > up) { - return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE; - } - - tok->type = TK_INTERVAL; - tok->u.repeat.lower = low; - tok->u.repeat.upper = up; - *src = p; - return r; /* 0: normal {n,m}, 2: fixed {n} */ - - invalid: - if (syn_allow) - return 1; /* OK */ - else - return ONIGERR_INVALID_REPEAT_RANGE_PATTERN; -} - -/* \M-, \C-, \c, or \... */ -static int -fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env) -{ - int v; - OnigCodePoint c; - OnigEncoding enc = env->enc; - UChar* p = *src; - PFETCH_READY; - - if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE; - - PFETCH(c); - switch (c) { - case 'M': - if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) { - if (PEND) return ONIGERR_END_PATTERN_AT_META; - PFETCH(c); - if (c != '-') return ONIGERR_META_CODE_SYNTAX; - if (PEND) return ONIGERR_END_PATTERN_AT_META; - PFETCH(c); - if (c == MC_ESC(env->syntax)) { - v = fetch_escaped_value(&p, end, env); - if (v < 0) return v; - c = (OnigCodePoint )v; - } - c = ((c & 0xff) | 0x80); - } - else - goto backslash; - break; - - case 'C': - if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) { - if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL; - PFETCH(c); - if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX; - goto control; - } - else - goto backslash; - - case 'c': - if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) { - control: - if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL; - PFETCH(c); - if (c == '?') { - c = 0177; - } - else { - if (c == MC_ESC(env->syntax)) { - v = fetch_escaped_value(&p, end, env); - if (v < 0) return v; - c = (OnigCodePoint )v; - } - c &= 0x9f; - } - break; - } - /* fall through */ - - default: - { - backslash: - c = conv_backslash_value(c, env); - } - break; - } - - *src = p; - return c; -} - -static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env); - -static OnigCodePoint -get_name_end_code_point(OnigCodePoint start) -{ - switch (start) { - case '<': return (OnigCodePoint )'>'; break; - case '\'': return (OnigCodePoint )'\''; break; - default: - break; - } - - return (OnigCodePoint )0; -} - -#ifdef USE_NAMED_GROUP -#ifdef USE_BACKREF_WITH_LEVEL -/* - \k<name+n>, \k<name-n> - \k<num+n>, \k<num-n> - \k<-num+n>, \k<-num-n> -*/ -static int -fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end, - UChar** rname_end, ScanEnv* env, - int* rback_num, int* rlevel) -{ - int r, sign, is_num, exist_level; - OnigCodePoint end_code; - OnigCodePoint c = 0; - OnigEncoding enc = env->enc; - UChar *name_end; - UChar *pnum_head; - UChar *p = *src; - PFETCH_READY; - - *rback_num = 0; - is_num = exist_level = 0; - sign = 1; - pnum_head = *src; - - end_code = get_name_end_code_point(start_code); - - name_end = end; - r = 0; - if (PEND) { - return ONIGERR_EMPTY_GROUP_NAME; - } - else { - PFETCH(c); - if (c == end_code) - return ONIGERR_EMPTY_GROUP_NAME; - - if (ONIGENC_IS_CODE_DIGIT(enc, c)) { - is_num = 1; - } - else if (c == '-') { - is_num = 2; - sign = -1; - pnum_head = p; - } - else if (!ONIGENC_IS_CODE_WORD(enc, c)) { - r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; - } - } - - while (!PEND) { - name_end = p; - PFETCH(c); - if (c == end_code || c == ')' || c == '+' || c == '-') { - if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME; - break; - } - - if (is_num != 0) { - if (ONIGENC_IS_CODE_DIGIT(enc, c)) { - is_num = 1; - } - else { - r = ONIGERR_INVALID_GROUP_NAME; - is_num = 0; - } - } - else if (!ONIGENC_IS_CODE_WORD(enc, c)) { - r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; - } - } - - if (r == 0 && c != end_code) { - if (c == '+' || c == '-') { - int level; - int flag = (c == '-' ? -1 : 1); - - PFETCH(c); - if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err; - PUNFETCH; - level = onig_scan_unsigned_number(&p, end, enc); - if (level < 0) return ONIGERR_TOO_BIG_NUMBER; - *rlevel = (level * flag); - exist_level = 1; - - PFETCH(c); - if (c == end_code) - goto end; - } - - err: - r = ONIGERR_INVALID_GROUP_NAME; - name_end = end; - } - - end: - if (r == 0) { - if (is_num != 0) { - *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc); - if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER; - else if (*rback_num == 0) goto err; - - *rback_num *= sign; - } - - *rname_end = name_end; - *src = p; - return (exist_level ? 1 : 0); - } - else { - onig_scan_env_set_error_string(env, r, *src, name_end); - return r; - } -} -#endif /* USE_BACKREF_WITH_LEVEL */ - -/* - def: 0 -> define name (don't allow number name) - 1 -> reference name (allow number name) -*/ -static int -fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, - UChar** rname_end, ScanEnv* env, int* rback_num, int ref) -{ - int r, is_num, sign; - OnigCodePoint end_code; - OnigCodePoint c = 0; - OnigEncoding enc = env->enc; - UChar *name_end; - UChar *pnum_head; - UChar *p = *src; - PFETCH_READY; - - *rback_num = 0; - - end_code = get_name_end_code_point(start_code); - - name_end = end; - pnum_head = *src; - r = 0; - is_num = 0; - sign = 1; - if (PEND) { - return ONIGERR_EMPTY_GROUP_NAME; - } - else { - PFETCH(c); - if (c == end_code) - return ONIGERR_EMPTY_GROUP_NAME; - - if (ONIGENC_IS_CODE_DIGIT(enc, c)) { - if (ref == 1) - is_num = 1; - else { - r = ONIGERR_INVALID_GROUP_NAME; - is_num = 0; - } - } - else if (c == '-') { - if (ref == 1) { - is_num = 2; - sign = -1; - pnum_head = p; - } - else { - r = ONIGERR_INVALID_GROUP_NAME; - is_num = 0; - } - } - else if (!ONIGENC_IS_CODE_WORD(enc, c)) { - r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; - } - } - - if (r == 0) { - while (!PEND) { - name_end = p; - PFETCH(c); - if (c == end_code || c == ')') { - if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME; - break; - } - - if (is_num != 0) { - if (ONIGENC_IS_CODE_DIGIT(enc, c)) { - is_num = 1; - } - else { - if (!ONIGENC_IS_CODE_WORD(enc, c)) - r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; - else - r = ONIGERR_INVALID_GROUP_NAME; - - is_num = 0; - } - } - else { - if (!ONIGENC_IS_CODE_WORD(enc, c)) { - r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; - } - } - } - - if (c != end_code) { - r = ONIGERR_INVALID_GROUP_NAME; - name_end = end; - } - - if (is_num != 0) { - *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc); - if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER; - else if (*rback_num == 0) { - r = ONIGERR_INVALID_GROUP_NAME; - goto err; - } - - *rback_num *= sign; - } - - *rname_end = name_end; - *src = p; - return 0; - } - else { - while (!PEND) { - name_end = p; - PFETCH(c); - if (c == end_code || c == ')') - break; - } - if (PEND) - name_end = end; - - err: - onig_scan_env_set_error_string(env, r, *src, name_end); - return r; - } -} -#else -static int -fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, - UChar** rname_end, ScanEnv* env, int* rback_num, int ref) -{ - int r, is_num, sign; - OnigCodePoint end_code; - OnigCodePoint c = 0; - UChar *name_end; - OnigEncoding enc = env->enc; - UChar *pnum_head; - UChar *p = *src; - PFETCH_READY; - - *rback_num = 0; - - end_code = get_name_end_code_point(start_code); - - *rname_end = name_end = end; - r = 0; - pnum_head = *src; - is_num = 0; - sign = 1; - - if (PEND) { - return ONIGERR_EMPTY_GROUP_NAME; - } - else { - PFETCH(c); - if (c == end_code) - return ONIGERR_EMPTY_GROUP_NAME; - - if (ONIGENC_IS_CODE_DIGIT(enc, c)) { - is_num = 1; - } - else if (c == '-') { - is_num = 2; - sign = -1; - pnum_head = p; - } - else { - r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; - } - } - - while (!PEND) { - name_end = p; - - PFETCH(c); - if (c == end_code || c == ')') break; - if (! ONIGENC_IS_CODE_DIGIT(enc, c)) - r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; - } - if (r == 0 && c != end_code) { - r = ONIGERR_INVALID_GROUP_NAME; - name_end = end; - } - - if (r == 0) { - *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc); - if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER; - else if (*rback_num == 0) { - r = ONIGERR_INVALID_GROUP_NAME; - goto err; - } - *rback_num *= sign; - - *rname_end = name_end; - *src = p; - return 0; - } - else { - err: - onig_scan_env_set_error_string(env, r, *src, name_end); - return r; - } -} -#endif /* USE_NAMED_GROUP */ - -void onig_vsnprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc, - UChar* pat, UChar* pat_end, const UChar *fmt, va_list args); - -static void -onig_syntax_warn(ScanEnv *env, const char *fmt, ...) -{ - va_list args; - UChar buf[WARN_BUFSIZE]; - va_start(args, fmt); - onig_vsnprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, - env->pattern, env->pattern_end, - (const UChar*)fmt, args); - va_end(args); - if (env->sourcefile == NULL) - mrb_warn("%s", (char*)buf); - else - mrb_compile_warn(env->sourcefile, env->sourceline, "%s", (char*)buf); -} - -static void -CC_ESC_WARN(ScanEnv *env, UChar *c) -{ - if (onig_warn == onig_null_warn) return ; - - if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) && - IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) { - onig_syntax_warn(env, "character class has '%s' without escape", c); - } -} - -static void -CLOSE_BRACKET_WITHOUT_ESC_WARN(ScanEnv* env, UChar* c) -{ - if (onig_warn == onig_null_warn) return ; - - if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) { - onig_syntax_warn(env, "regular expression has '%s' without escape", c); - } -} - -static void -CC_DUP_WARN(ScanEnv *env) -{ - if (onig_warn == onig_null_warn /*|| !mrb_test(ruby_verbose)*/) return ; - - if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_DUP) && - !((env)->warnings_flag & ONIG_SYN_WARN_CC_DUP)) { - (env)->warnings_flag |= ONIG_SYN_WARN_CC_DUP; - onig_syntax_warn(env, "character class has duplicated range"); - } -} - -static void -UNKNOWN_ESC_WARN(ScanEnv *env, int c) -{ - if (onig_warn == onig_null_warn /*|| !mrb_test(ruby_verbose)*/) return ; - onig_syntax_warn(env, "Unknown escape \\%c is ignored", c); -} - -static UChar* -find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to, - UChar **next, OnigEncoding enc) -{ - int i; - OnigCodePoint x; - UChar *q; - UChar *p = from; - - while (p < to) { - x = ONIGENC_MBC_TO_CODE(enc, p, to); - q = p + enclen(enc, p, to); - if (x == s[0]) { - for (i = 1; i < n && q < to; i++) { - x = ONIGENC_MBC_TO_CODE(enc, q, to); - if (x != s[i]) break; - q += enclen(enc, q, to); - } - if (i >= n) { - if (IS_NOT_NULL(next)) - *next = q; - return p; - } - } - p = q; - } - return NULL_UCHARP; -} - -static int -str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to, - OnigCodePoint bad, OnigEncoding enc, const OnigSyntaxType* syn) -{ - int i, in_esc; - OnigCodePoint x; - UChar *q; - UChar *p = from; - - in_esc = 0; - while (p < to) { - if (in_esc) { - in_esc = 0; - p += enclen(enc, p, to); - } - else { - x = ONIGENC_MBC_TO_CODE(enc, p, to); - q = p + enclen(enc, p, to); - if (x == s[0]) { - for (i = 1; i < n && q < to; i++) { - x = ONIGENC_MBC_TO_CODE(enc, q, to); - if (x != s[i]) break; - q += enclen(enc, q, to); - } - if (i >= n) return 1; - p += enclen(enc, p, to); - } - else { - x = ONIGENC_MBC_TO_CODE(enc, p, to); - if (x == bad) return 0; - else if (x == MC_ESC(syn)) in_esc = 1; - p = q; - } - } - } - return 0; -} - -static int -fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) -{ - int num; - OnigCodePoint c, c2; - const OnigSyntaxType* syn = env->syntax; - OnigEncoding enc = env->enc; - UChar* prev; - UChar* p = *src; - PFETCH_READY; - - if (PEND) { - tok->type = TK_EOT; - return tok->type; - } - - PFETCH(c); - tok->type = TK_CHAR; - tok->base = 0; - tok->u.c = c; - tok->escaped = 0; - - if (c == ']') { - tok->type = TK_CC_CLOSE; - } - else if (c == '-') { - tok->type = TK_CC_RANGE; - } - else if (c == MC_ESC(syn)) { - if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) - goto end; - - if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE; - - PFETCH(c); - tok->escaped = 1; - tok->u.c = c; - switch (c) { - case 'w': - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_W; - tok->u.prop.is_not = 0; - break; - case 'W': - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_W; - tok->u.prop.is_not = 1; - break; - case 'd': - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_D; - tok->u.prop.is_not = 0; - break; - case 'D': - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_D; - tok->u.prop.is_not = 1; - break; - case 's': - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_S; - tok->u.prop.is_not = 0; - break; - case 'S': - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_S; - tok->u.prop.is_not = 1; - break; - case 'h': - if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break; - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT; - tok->u.prop.is_not = 0; - break; - case 'H': - if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break; - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT; - tok->u.prop.is_not = 1; - break; - - case 'p': - case 'P': - c2 = PPEEK; - if (c2 == '{' && - IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) { - PINC; - tok->type = TK_CHAR_PROPERTY; - tok->u.prop.is_not = (c == 'P' ? 1 : 0); - - if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) { - PFETCH(c2); - if (c2 == '^') { - tok->u.prop.is_not = (tok->u.prop.is_not == 0 ? 1 : 0); - } - else - PUNFETCH; - } - } - else { - onig_syntax_warn(env, "invalid Unicode Property \\%c", c); - } - break; - - case 'x': - if (PEND) break; - - prev = p; - if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) { - PINC; - num = scan_unsigned_hexadecimal_number(&p, end, 8, enc); - if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; - if (!PEND) { - c2 = PPEEK; - if (ONIGENC_IS_CODE_XDIGIT(enc, c2)) - return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; - } - - if (p > prev + enclen(enc, prev, end) && !PEND && (PPEEK_IS('}'))) { - PINC; - tok->type = TK_CODE_POINT; - tok->base = 16; - tok->u.code = (OnigCodePoint )num; - } - else { - /* can't read nothing or invalid format */ - p = prev; - } - } - else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) { - num = scan_unsigned_hexadecimal_number(&p, end, 2, enc); - if (num < 0) return ONIGERR_TOO_BIG_NUMBER; - if (p == prev) { /* can't read nothing. */ - num = 0; /* but, it's not error */ - } - tok->type = TK_RAW_BYTE; - tok->base = 16; - tok->u.c = num; - } - break; - - case 'u': - if (PEND) break; - - prev = p; - if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) { - num = scan_unsigned_hexadecimal_number(&p, end, 4, enc); - if (num < 0) return ONIGERR_TOO_BIG_NUMBER; - if (p == prev) { /* can't read nothing. */ - num = 0; /* but, it's not error */ - } - tok->type = TK_CODE_POINT; - tok->base = 16; - tok->u.code = (OnigCodePoint )num; - } - break; - - case '0': - case '1': case '2': case '3': case '4': case '5': case '6': case '7': - if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) { - PUNFETCH; - prev = p; - num = scan_unsigned_octal_number(&p, end, 3, enc); - if (num < 0) return ONIGERR_TOO_BIG_NUMBER; - if (p == prev) { /* can't read nothing. */ - num = 0; /* but, it's not error */ - } - tok->type = TK_RAW_BYTE; - tok->base = 8; - tok->u.c = num; - } - break; - - default: - PUNFETCH; - num = fetch_escaped_value(&p, end, env); - if (num < 0) return num; - if (tok->u.c != num) { - tok->u.code = (OnigCodePoint )num; - tok->type = TK_CODE_POINT; - } - break; - } - } - else if (c == '[') { - if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) { - OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' }; - tok->backp = p; /* point at '[' is readed */ - PINC; - if (str_exist_check_with_esc(send, 2, p, end, - (OnigCodePoint )']', enc, syn)) { - tok->type = TK_POSIX_BRACKET_OPEN; - } - else { - PUNFETCH; - goto cc_in_cc; - } - } - else { - cc_in_cc: - if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) { - tok->type = TK_CC_CC_OPEN; - } - else { - CC_ESC_WARN(env, (UChar* )"["); - } - } - } - else if (c == '&') { - if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) && - !PEND && (PPEEK_IS('&'))) { - PINC; - tok->type = TK_CC_AND; - } - } - - end: - *src = p; - return tok->type; -} - -static int -fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) -{ - int r, num; - OnigCodePoint c; - OnigEncoding enc = env->enc; - const OnigSyntaxType* syn = env->syntax; - UChar* prev; - UChar* p = *src; - PFETCH_READY; - - start: - if (PEND) { - tok->type = TK_EOT; - return tok->type; - } - - tok->type = TK_STRING; - tok->base = 0; - tok->backp = p; - - PFETCH(c); - if (IS_MC_ESC_CODE(c, syn)) { - if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE; - - tok->backp = p; - PFETCH(c); - - tok->u.c = c; - tok->escaped = 1; - switch (c) { - case '*': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break; - tok->type = TK_OP_REPEAT; - tok->u.repeat.lower = 0; - tok->u.repeat.upper = REPEAT_INFINITE; - goto greedy_check; - break; - - case '+': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break; - tok->type = TK_OP_REPEAT; - tok->u.repeat.lower = 1; - tok->u.repeat.upper = REPEAT_INFINITE; - goto greedy_check; - break; - - case '?': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break; - tok->type = TK_OP_REPEAT; - tok->u.repeat.lower = 0; - tok->u.repeat.upper = 1; - greedy_check: - if (!PEND && PPEEK_IS('?') && - IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) { - PFETCH(c); - tok->u.repeat.greedy = 0; - tok->u.repeat.possessive = 0; - } - else { - possessive_check: - if (!PEND && PPEEK_IS('+') && - ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) && - tok->type != TK_INTERVAL) || - (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) && - tok->type == TK_INTERVAL))) { - PFETCH(c); - tok->u.repeat.greedy = 1; - tok->u.repeat.possessive = 1; - } - else { - tok->u.repeat.greedy = 1; - tok->u.repeat.possessive = 0; - } - } - break; - - case '{': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break; - r = fetch_range_quantifier(&p, end, tok, env); - if (r < 0) return r; /* error */ - if (r == 0) goto greedy_check; - else if (r == 2) { /* {n} */ - if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY)) - goto possessive_check; - - goto greedy_check; - } - /* r == 1 : normal char */ - break; - - case '|': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break; - tok->type = TK_ALT; - break; - - case '(': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break; - tok->type = TK_SUBEXP_OPEN; - break; - - case ')': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break; - tok->type = TK_SUBEXP_CLOSE; - break; - - case 'w': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break; - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_W; - tok->u.prop.is_not = 0; - break; - - case 'W': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break; - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_W; - tok->u.prop.is_not = 1; - break; - - case 'b': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break; - tok->type = TK_ANCHOR; - tok->u.anchor = ANCHOR_WORD_BOUND; - break; - - case 'B': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break; - tok->type = TK_ANCHOR; - tok->u.anchor = ANCHOR_NOT_WORD_BOUND; - break; - -#ifdef USE_WORD_BEGIN_END - case '<': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break; - tok->type = TK_ANCHOR; - tok->u.anchor = ANCHOR_WORD_BEGIN; - break; - - case '>': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break; - tok->type = TK_ANCHOR; - tok->u.anchor = ANCHOR_WORD_END; - break; -#endif - - case 's': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break; - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_S; - tok->u.prop.is_not = 0; - break; - - case 'S': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break; - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_S; - tok->u.prop.is_not = 1; - break; - - case 'd': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break; - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_D; - tok->u.prop.is_not = 0; - break; - - case 'D': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break; - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_D; - tok->u.prop.is_not = 1; - break; - - case 'h': - if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break; - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT; - tok->u.prop.is_not = 0; - break; - - case 'H': - if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break; - tok->type = TK_CHAR_TYPE; - tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT; - tok->u.prop.is_not = 1; - break; - - case 'A': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; - begin_buf: - tok->type = TK_ANCHOR; - tok->u.subtype = ANCHOR_BEGIN_BUF; - break; - - case 'Z': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; - tok->type = TK_ANCHOR; - tok->u.subtype = ANCHOR_SEMI_END_BUF; - break; - - case 'z': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; - end_buf: - tok->type = TK_ANCHOR; - tok->u.subtype = ANCHOR_END_BUF; - break; - - case 'G': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break; - tok->type = TK_ANCHOR; - tok->u.subtype = ANCHOR_BEGIN_POSITION; - break; - - case '`': - if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break; - goto begin_buf; - break; - - case '\'': - if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break; - goto end_buf; - break; - - case 'x': - if (PEND) break; - - prev = p; - if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) { - PINC; - num = scan_unsigned_hexadecimal_number(&p, end, 8, enc); - if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; - if (!PEND) { - if (ONIGENC_IS_CODE_XDIGIT(enc, PPEEK)) - return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; - } - - if ((p > prev + enclen(enc, prev, end)) && !PEND && PPEEK_IS('}')) { - PINC; - tok->type = TK_CODE_POINT; - tok->u.code = (OnigCodePoint )num; - } - else { - /* can't read nothing or invalid format */ - p = prev; - } - } - else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) { - num = scan_unsigned_hexadecimal_number(&p, end, 2, enc); - if (num < 0) return ONIGERR_TOO_BIG_NUMBER; - if (p == prev) { /* can't read nothing. */ - num = 0; /* but, it's not error */ - } - tok->type = TK_RAW_BYTE; - tok->base = 16; - tok->u.c = num; - } - break; - - case 'u': - if (PEND) break; - - prev = p; - if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) { - num = scan_unsigned_hexadecimal_number(&p, end, 4, enc); - if (num < 0) return ONIGERR_TOO_BIG_NUMBER; - if (p == prev) { /* can't read nothing. */ - num = 0; /* but, it's not error */ - } - tok->type = TK_CODE_POINT; - tok->base = 16; - tok->u.code = (OnigCodePoint )num; - } - break; - - case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - PUNFETCH; - prev = p; - num = onig_scan_unsigned_number(&p, end, enc); - if (num < 0 || num > ONIG_MAX_BACKREF_NUM) { - goto skip_backref; - } - - if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) && - (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */ - if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { - if (num > env->num_mem || IS_NULL(SCANENV_MEM_NODES(env)[num])) - return ONIGERR_INVALID_BACKREF; - } - - tok->type = TK_BACKREF; - tok->u.backref.num = 1; - tok->u.backref.ref1 = num; - tok->u.backref.by_name = 0; -#ifdef USE_BACKREF_WITH_LEVEL - tok->u.backref.exist_level = 0; -#endif - break; - } - - skip_backref: - if (c == '8' || c == '9') { - /* normal char */ - p = prev; PINC; - break; - } - - p = prev; - /* fall through */ - case '0': - if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) { - prev = p; - num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc); - if (num < 0) return ONIGERR_TOO_BIG_NUMBER; - if (p == prev) { /* can't read nothing. */ - num = 0; /* but, it's not error */ - } - tok->type = TK_RAW_BYTE; - tok->base = 8; - tok->u.c = num; - } - else if (c != '0') { - PINC; - } - break; - -#ifdef USE_NAMED_GROUP - case 'k': - if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) { - PFETCH(c); - if (c == '<' || c == '\'') { - UChar* name_end; - int* backs; - int back_num; - - prev = p; - -#ifdef USE_BACKREF_WITH_LEVEL - name_end = NULL_UCHARP; /* no need. escape gcc warning. */ - r = fetch_name_with_level((OnigCodePoint )c, &p, end, &name_end, - env, &back_num, &tok->u.backref.level); - if (r == 1) tok->u.backref.exist_level = 1; - else tok->u.backref.exist_level = 0; -#else - r = fetch_name(&p, end, &name_end, env, &back_num, 1); -#endif - if (r < 0) return r; - - if (back_num != 0) { - if (back_num < 0) { - back_num = BACKREF_REL_TO_ABS(back_num, env); - if (back_num <= 0) - return ONIGERR_INVALID_BACKREF; - } - - if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { - if (back_num > env->num_mem || - IS_NULL(SCANENV_MEM_NODES(env)[back_num])) - return ONIGERR_INVALID_BACKREF; - } - tok->type = TK_BACKREF; - tok->u.backref.by_name = 0; - tok->u.backref.num = 1; - tok->u.backref.ref1 = back_num; - } - else { - num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs); - if (num <= 0) { - onig_scan_env_set_error_string(env, - ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end); - return ONIGERR_UNDEFINED_NAME_REFERENCE; - } - if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { - int i; - for (i = 0; i < num; i++) { - if (backs[i] > env->num_mem || - IS_NULL(SCANENV_MEM_NODES(env)[backs[i]])) - return ONIGERR_INVALID_BACKREF; - } - } - - tok->type = TK_BACKREF; - tok->u.backref.by_name = 1; - if (num == 1) { - tok->u.backref.num = 1; - tok->u.backref.ref1 = backs[0]; - } - else { - tok->u.backref.num = num; - tok->u.backref.refs = backs; - } - } - } - else { - PUNFETCH; - onig_syntax_warn(env, "invalid back reference"); - } - } - break; -#endif - -#ifdef USE_SUBEXP_CALL - case 'g': - if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) { - PFETCH(c); - if (c == '<' || c == '\'') { - int gnum; - UChar* name_end; - - prev = p; - r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &gnum, 1); - if (r < 0) return r; - - tok->type = TK_CALL; - tok->u.call.name = prev; - tok->u.call.name_end = name_end; - tok->u.call.gnum = gnum; - } - else { - onig_syntax_warn(env, "invalid subexp call"); - PUNFETCH; - } - } - break; -#endif - - case 'Q': - if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)) { - tok->type = TK_QUOTE_OPEN; - } - break; - - case 'p': - case 'P': - if (PPEEK_IS('{') && - IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) { - PINC; - tok->type = TK_CHAR_PROPERTY; - tok->u.prop.is_not = (c == 'P' ? 1 : 0); - - if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) { - PFETCH(c); - if (c == '^') { - tok->u.prop.is_not = (tok->u.prop.is_not == 0 ? 1 : 0); - } - else - PUNFETCH; - } - } - else { - onig_syntax_warn(env, "invalid Unicode Property \\%c", c); - } - break; - - default: - PUNFETCH; - num = fetch_escaped_value(&p, end, env); - if (num < 0) return num; - /* set_raw: */ - if (tok->u.c != num) { - tok->type = TK_CODE_POINT; - tok->u.code = (OnigCodePoint )num; - } - else { /* string */ - p = tok->backp + enclen(enc, tok->backp, end); - } - break; - } - } - else { - tok->u.c = c; - tok->escaped = 0; - -#ifdef USE_VARIABLE_META_CHARS - if ((c != ONIG_INEFFECTIVE_META_CHAR) && - IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) { - if (c == MC_ANYCHAR(syn)) - goto any_char; - else if (c == MC_ANYTIME(syn)) - goto anytime; - else if (c == MC_ZERO_OR_ONE_TIME(syn)) - goto zero_or_one_time; - else if (c == MC_ONE_OR_MORE_TIME(syn)) - goto one_or_more_time; - else if (c == MC_ANYCHAR_ANYTIME(syn)) { - tok->type = TK_ANYCHAR_ANYTIME; - goto out; - } - } -#endif - - switch (c) { - case '.': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break; -#ifdef USE_VARIABLE_META_CHARS - any_char: -#endif - tok->type = TK_ANYCHAR; - break; - - case '*': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break; -#ifdef USE_VARIABLE_META_CHARS - anytime: -#endif - tok->type = TK_OP_REPEAT; - tok->u.repeat.lower = 0; - tok->u.repeat.upper = REPEAT_INFINITE; - goto greedy_check; - break; - - case '+': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break; -#ifdef USE_VARIABLE_META_CHARS - one_or_more_time: -#endif - tok->type = TK_OP_REPEAT; - tok->u.repeat.lower = 1; - tok->u.repeat.upper = REPEAT_INFINITE; - goto greedy_check; - break; - - case '?': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break; -#ifdef USE_VARIABLE_META_CHARS - zero_or_one_time: -#endif - tok->type = TK_OP_REPEAT; - tok->u.repeat.lower = 0; - tok->u.repeat.upper = 1; - goto greedy_check; - break; - - case '{': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break; - r = fetch_range_quantifier(&p, end, tok, env); - if (r < 0) return r; /* error */ - if (r == 0) goto greedy_check; - else if (r == 2) { /* {n} */ - if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY)) - goto possessive_check; - - goto greedy_check; - } - /* r == 1 : normal char */ - break; - - case '|': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break; - tok->type = TK_ALT; - break; - - case '(': - if (PPEEK_IS('?') && - IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) { - PINC; - if (PPEEK_IS('#')) { - PFETCH(c); - while (1) { - if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; - PFETCH(c); - if (c == MC_ESC(syn)) { - if (!PEND) PFETCH(c); - } - else { - if (c == ')') break; - } - } - goto start; - } - PUNFETCH; - } - - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break; - tok->type = TK_SUBEXP_OPEN; - break; - - case ')': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break; - tok->type = TK_SUBEXP_CLOSE; - break; - - case '^': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break; - tok->type = TK_ANCHOR; - tok->u.subtype = (IS_SINGLELINE(env->option) - ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE); - break; - - case '$': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break; - tok->type = TK_ANCHOR; - tok->u.subtype = (IS_SINGLELINE(env->option) - ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE); - break; - - case '[': - if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break; - tok->type = TK_CC_OPEN; - break; - - case ']': - if (*src > env->pattern) /* /].../ is allowed. */ - CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]"); - break; - - case '#': - if (IS_EXTEND(env->option)) { - while (!PEND) { - PFETCH(c); - if (ONIGENC_IS_CODE_NEWLINE(enc, c)) - break; - } - goto start; - break; - } - break; - - case ' ': case '\t': case '\n': case '\r': case '\f': - if (IS_EXTEND(env->option)) - goto start; - break; - - default: - /* string */ - break; - } - } - -#ifdef USE_VARIABLE_META_CHARS - out: -#endif - *src = p; - return tok->type; -} - -static int -add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int is_not, - ScanEnv* env, - OnigCodePoint sb_out, const OnigCodePoint mbr[]) -{ - int i, r; - OnigCodePoint j; - - int n = ONIGENC_CODE_RANGE_NUM(mbr); - - if (is_not == 0) { - for (i = 0; i < n; i++) { - for (j = ONIGENC_CODE_RANGE_FROM(mbr, i); - j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) { - if (j >= sb_out) { - if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) { - r = add_code_range_to_buf(&(cc->mbuf), env, j, - ONIGENC_CODE_RANGE_TO(mbr, i)); - if (r != 0) return r; - i++; - } - - goto sb_end; - } - BITSET_SET_BIT_CHKDUP(cc->bs, j); - } - } - - sb_end: - for ( ; i < n; i++) { - r = add_code_range_to_buf(&(cc->mbuf), env, - ONIGENC_CODE_RANGE_FROM(mbr, i), - ONIGENC_CODE_RANGE_TO(mbr, i)); - if (r != 0) return r; - } - } - else { - OnigCodePoint prev = 0; - - for (i = 0; i < n; i++) { - for (j = prev; - j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) { - if (j >= sb_out) { - goto sb_end2; - } - BITSET_SET_BIT_CHKDUP(cc->bs, j); - } - prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1; - } - for (j = prev; j < sb_out; j++) { - BITSET_SET_BIT_CHKDUP(cc->bs, j); - } - - sb_end2: - prev = sb_out; - - for (i = 0; i < n; i++) { - if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) { - r = add_code_range_to_buf(&(cc->mbuf), env, prev, - ONIGENC_CODE_RANGE_FROM(mbr, i) - 1); - if (r != 0) return r; - } - prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1; - } - if (prev < 0x7fffffff) { - r = add_code_range_to_buf(&(cc->mbuf), env, prev, 0x7fffffff); - if (r != 0) return r; - } - } - - return 0; -} - -static int -add_ctype_to_cc(CClassNode* cc, int ctype, int is_not, ScanEnv* env) -{ - int c, r; - const OnigCodePoint *ranges; - OnigCodePoint sb_out; - OnigEncoding enc = env->enc; - - switch (ctype) { - case ONIGENC_CTYPE_D: - case ONIGENC_CTYPE_S: - case ONIGENC_CTYPE_W: - ctype ^= ONIGENC_CTYPE_SPECIAL_MASK; - if (is_not != 0) { - for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if (! ONIGENC_IS_ASCII_CODE_CTYPE((OnigCodePoint )c, ctype)) - BITSET_SET_BIT_CHKDUP(cc->bs, c); - } - ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); - } - else { - for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if (ONIGENC_IS_ASCII_CODE_CTYPE((OnigCodePoint )c, ctype)) - BITSET_SET_BIT_CHKDUP(cc->bs, c); - } - } - return 0; - break; - } - - r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges); - if (r == 0) { - return add_ctype_to_cc_by_range(cc, ctype, is_not, env, sb_out, ranges); - } - else if (r != ONIG_NO_SUPPORT_CONFIG) { - return r; - } - - r = 0; - switch (ctype) { - case ONIGENC_CTYPE_ALPHA: - case ONIGENC_CTYPE_BLANK: - case ONIGENC_CTYPE_CNTRL: - case ONIGENC_CTYPE_DIGIT: - case ONIGENC_CTYPE_LOWER: - case ONIGENC_CTYPE_PUNCT: - case ONIGENC_CTYPE_SPACE: - case ONIGENC_CTYPE_UPPER: - case ONIGENC_CTYPE_XDIGIT: - case ONIGENC_CTYPE_ASCII: - case ONIGENC_CTYPE_ALNUM: - if (is_not != 0) { - for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) - BITSET_SET_BIT_CHKDUP(cc->bs, c); - } - ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); - } - else { - for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) - BITSET_SET_BIT_CHKDUP(cc->bs, c); - } - } - break; - - case ONIGENC_CTYPE_GRAPH: - case ONIGENC_CTYPE_PRINT: - if (is_not != 0) { - for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) - BITSET_SET_BIT_CHKDUP(cc->bs, c); - } - } - else { - for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) - BITSET_SET_BIT_CHKDUP(cc->bs, c); - } - ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); - } - break; - - case ONIGENC_CTYPE_WORD: - if (is_not == 0) { - for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if (IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT_CHKDUP(cc->bs, c); - } - ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); - } - else { - for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* check invalid code point */ - && ! ONIGENC_IS_CODE_WORD(enc, c)) - BITSET_SET_BIT_CHKDUP(cc->bs, c); - } - } - break; - - default: - return ONIGERR_PARSER_BUG; - break; - } - - return r; -} - -static int -parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env) -{ -#define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20 -#define POSIX_BRACKET_NAME_MIN_LEN 4 - - static const PosixBracketEntryType PBS[] = { - { (UChar* )"alnum", ONIGENC_CTYPE_ALNUM, 5 }, - { (UChar* )"alpha", ONIGENC_CTYPE_ALPHA, 5 }, - { (UChar* )"blank", ONIGENC_CTYPE_BLANK, 5 }, - { (UChar* )"cntrl", ONIGENC_CTYPE_CNTRL, 5 }, - { (UChar* )"digit", ONIGENC_CTYPE_DIGIT, 5 }, - { (UChar* )"graph", ONIGENC_CTYPE_GRAPH, 5 }, - { (UChar* )"lower", ONIGENC_CTYPE_LOWER, 5 }, - { (UChar* )"print", ONIGENC_CTYPE_PRINT, 5 }, - { (UChar* )"punct", ONIGENC_CTYPE_PUNCT, 5 }, - { (UChar* )"space", ONIGENC_CTYPE_SPACE, 5 }, - { (UChar* )"upper", ONIGENC_CTYPE_UPPER, 5 }, - { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 }, - { (UChar* )"ascii", ONIGENC_CTYPE_ASCII, 5 }, - { (UChar* )"word", ONIGENC_CTYPE_WORD, 4 }, - { (UChar* )NULL, -1, 0 } - }; - - const PosixBracketEntryType *pb; - int is_not, i, r; - OnigCodePoint c; - OnigEncoding enc = env->enc; - UChar *p = *src; - PFETCH_READY; - - if (PPEEK_IS('^')) { - PINC; - is_not = 1; - } - else - is_not = 0; - - if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3) - goto not_posix_bracket; - - for (pb = PBS; IS_NOT_NULL(pb->name); pb++) { - if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) { - p = (UChar* )onigenc_step(enc, p, end, pb->len); - if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0) - return ONIGERR_INVALID_POSIX_BRACKET_TYPE; - - r = add_ctype_to_cc(cc, pb->ctype, is_not, env); - if (r != 0) return r; - - PINC; PINC; - *src = p; - return 0; - } - } - - not_posix_bracket: - c = 0; - i = 0; - while (!PEND && ((c = PPEEK) != ':') && c != ']') { - PINC; - if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break; - } - if (c == ':' && ! PEND) { - PINC; - if (! PEND) { - PFETCH(c); - if (c == ']') - return ONIGERR_INVALID_POSIX_BRACKET_TYPE; - } - } - - return 1; /* 1: is not POSIX bracket, but no error. */ -} - -static int -fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env) -{ - int r; - OnigCodePoint c; - OnigEncoding enc = env->enc; - UChar *prev, *start, *p = *src; - PFETCH_READY; - - r = 0; - start = prev = p; - - while (!PEND) { - prev = p; - PFETCH(c); - if (c == '}') { - r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev); - if (r < 0) break; - - *src = p; - return r; - } - else if (c == '(' || c == ')' || c == '{' || c == '|') { - r = ONIGERR_INVALID_CHAR_PROPERTY_NAME; - break; - } - } - - onig_scan_env_set_error_string(env, r, *src, prev); - return r; -} - -static int -parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end, - ScanEnv* env) -{ - int r, ctype; - CClassNode* cc; - - ctype = fetch_char_property_to_ctype(src, end, env); - if (ctype < 0) return ctype; - - *np = node_new_cclass(); - CHECK_NULL_RETURN_MEMERR(*np); - cc = NCCLASS(*np); - r = add_ctype_to_cc(cc, ctype, 0, env); - if (r != 0) return r; - if (tok->u.prop.is_not != 0) NCCLASS_SET_NOT(cc); - - return 0; -} - - -enum CCSTATE { - CCS_VALUE, - CCS_RANGE, - CCS_COMPLETE, - CCS_START -}; - -enum CCVALTYPE { - CCV_SB, - CCV_CODE_POINT, - CCV_CLASS -}; - -static int -next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type, - enum CCSTATE* state, ScanEnv* env) -{ - int r; - - if (*state == CCS_RANGE) - return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE; - - if (*state == CCS_VALUE && *type != CCV_CLASS) { - if (*type == CCV_SB) - BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs)); - else if (*type == CCV_CODE_POINT) { - r = add_code_range(&(cc->mbuf), env, *vs, *vs); - if (r < 0) return r; - } - } - - *state = CCS_VALUE; - *type = CCV_CLASS; - return 0; -} - -static int -next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v, - int* vs_israw, int v_israw, - enum CCVALTYPE intype, enum CCVALTYPE* type, - enum CCSTATE* state, ScanEnv* env) -{ - int r; - - switch (*state) { - case CCS_VALUE: - if (*type == CCV_SB) - BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs)); - else if (*type == CCV_CODE_POINT) { - r = add_code_range(&(cc->mbuf), env, *vs, *vs); - if (r < 0) return r; - } - break; - - case CCS_RANGE: - if (intype == *type) { - if (intype == CCV_SB) { - if (*vs > 0xff || v > 0xff) - return ONIGERR_INVALID_CODE_POINT_VALUE; - - if (*vs > v) { - if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) - goto ccs_range_end; - else - return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; - } - bitset_set_range(env, cc->bs, (int )*vs, (int )v); - } - else { - r = add_code_range(&(cc->mbuf), env, *vs, v); - if (r < 0) return r; - } - } - else { - if (*vs > v) { - if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) - goto ccs_range_end; - else - return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; - } - bitset_set_range(env, cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff)); - r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v); - if (r < 0) return r; - } - ccs_range_end: - *state = CCS_COMPLETE; - break; - - case CCS_COMPLETE: - case CCS_START: - *state = CCS_VALUE; - break; - - default: - break; - } - - *vs_israw = v_israw; - *vs = v; - *type = intype; - return 0; -} - -static int -code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped, - ScanEnv* env) -{ - int in_esc; - OnigCodePoint code; - OnigEncoding enc = env->enc; - UChar* p = from; - PFETCH_READY; - - in_esc = 0; - while (! PEND) { - if (ignore_escaped && in_esc) { - in_esc = 0; - } - else { - PFETCH(code); - if (code == c) return 1; - if (code == MC_ESC(env->syntax)) in_esc = 1; - } - } - return 0; -} - -static int -parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, - ScanEnv* env) -{ - int r, neg, len, fetched, and_start; - OnigCodePoint v, vs; - UChar *p; - Node* node; - CClassNode *cc, *prev_cc; - CClassNode work_cc; - - enum CCSTATE state; - enum CCVALTYPE val_type, in_type; - int val_israw, in_israw; - - prev_cc = (CClassNode* )NULL; - *np = NULL_NODE; - r = fetch_token_in_cc(tok, src, end, env); - if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) { - neg = 1; - r = fetch_token_in_cc(tok, src, end, env); - } - else { - neg = 0; - } - - if (r < 0) return r; - if (r == TK_CC_CLOSE) { - if (! code_exist_check((OnigCodePoint )']', - *src, env->pattern_end, 1, env)) - return ONIGERR_EMPTY_CHAR_CLASS; - - CC_ESC_WARN(env, (UChar* )"]"); - r = tok->type = TK_CHAR; /* allow []...] */ - } - - *np = node = node_new_cclass(); - CHECK_NULL_RETURN_MEMERR(node); - cc = NCCLASS(node); - - and_start = 0; - state = CCS_START; - p = *src; - while (r != TK_CC_CLOSE) { - fetched = 0; - switch (r) { - case TK_CHAR: - if ((tok->u.code >= SINGLE_BYTE_SIZE) || - (len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c)) > 1) { - in_type = CCV_CODE_POINT; - } - else if (len < 0) { - r = len; - goto err; - } - else { - sb_char: - in_type = CCV_SB; - } - v = (OnigCodePoint )tok->u.c; - in_israw = 0; - goto val_entry2; - break; - - case TK_RAW_BYTE: - /* tok->base != 0 : octal or hexadec. */ - if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) { - UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; - UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN; - UChar* psave = p; - int i, base = tok->base; - - buf[0] = tok->u.c; - for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) { - r = fetch_token_in_cc(tok, &p, end, env); - if (r < 0) goto err; - if (r != TK_RAW_BYTE || tok->base != base) { - fetched = 1; - break; - } - buf[i] = tok->u.c; - } - - if (i < ONIGENC_MBC_MINLEN(env->enc)) { - r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING; - goto err; - } - - len = enclen(env->enc, buf, buf+i); - if (i < len) { - r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING; - goto err; - } - else if (i > len) { /* fetch back */ - p = psave; - for (i = 1; i < len; i++) { - r = fetch_token_in_cc(tok, &p, end, env); - } - fetched = 0; - } - - if (i == 1) { - v = (OnigCodePoint )buf[0]; - goto raw_single; - } - else { - v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe); - in_type = CCV_CODE_POINT; - } - } - else { - v = (OnigCodePoint )tok->u.c; - raw_single: - in_type = CCV_SB; - } - in_israw = 1; - goto val_entry2; - break; - - case TK_CODE_POINT: - v = tok->u.code; - in_israw = 1; - val_entry: - len = ONIGENC_CODE_TO_MBCLEN(env->enc, v); - if (len < 0) { - r = len; - goto err; - } - in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT); - val_entry2: - r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type, - &state, env); - if (r != 0) goto err; - break; - - case TK_POSIX_BRACKET_OPEN: - r = parse_posix_bracket(cc, &p, end, env); - if (r < 0) goto err; - if (r == 1) { /* is not POSIX bracket */ - CC_ESC_WARN(env, (UChar* )"["); - p = tok->backp; - v = (OnigCodePoint )tok->u.c; - in_israw = 0; - goto val_entry; - } - goto next_class; - break; - - case TK_CHAR_TYPE: - r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.is_not, env); - if (r != 0) return r; - - next_class: - r = next_state_class(cc, &vs, &val_type, &state, env); - if (r != 0) goto err; - break; - - case TK_CHAR_PROPERTY: - { - int ctype; - - ctype = fetch_char_property_to_ctype(&p, end, env); - if (ctype < 0) return ctype; - r = add_ctype_to_cc(cc, ctype, tok->u.prop.is_not, env); - if (r != 0) return r; - goto next_class; - } - break; - - case TK_CC_RANGE: - if (state == CCS_VALUE) { - r = fetch_token_in_cc(tok, &p, end, env); - if (r < 0) goto err; - fetched = 1; - if (r == TK_CC_CLOSE) { /* allow [x-] */ - range_end_val: - v = (OnigCodePoint )'-'; - in_israw = 0; - goto val_entry; - } - else if (r == TK_CC_AND) { - CC_ESC_WARN(env, (UChar* )"-"); - goto range_end_val; - } - state = CCS_RANGE; - } - else if (state == CCS_START) { - /* [-xa] is allowed */ - v = (OnigCodePoint )tok->u.c; - in_israw = 0; - - r = fetch_token_in_cc(tok, &p, end, env); - if (r < 0) goto err; - fetched = 1; - /* [--x] or [a&&-x] is warned. */ - if (r == TK_CC_RANGE || and_start != 0) - CC_ESC_WARN(env, (UChar* )"-"); - - goto val_entry; - } - else if (state == CCS_RANGE) { - CC_ESC_WARN(env, (UChar* )"-"); - goto sb_char; /* [!--x] is allowed */ - } - else { /* CCS_COMPLETE */ - r = fetch_token_in_cc(tok, &p, end, env); - if (r < 0) goto err; - fetched = 1; - if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */ - else if (r == TK_CC_AND) { - CC_ESC_WARN(env, (UChar* )"-"); - goto range_end_val; - } - - if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) { - CC_ESC_WARN(env, (UChar* )"-"); - goto sb_char; /* [0-9-a] is allowed as [0-9\-a] */ - } - r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS; - goto err; - } - break; - - case TK_CC_CC_OPEN: /* [ */ - { - Node *anode; - CClassNode* acc; - - r = parse_char_class(&anode, tok, &p, end, env); - if (r == 0) { - acc = NCCLASS(anode); - r = or_cclass(cc, acc, env); - } - onig_node_free(anode); - if (r != 0) goto err; - } - break; - - case TK_CC_AND: /* && */ - { - if (state == CCS_VALUE) { - r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type, - &val_type, &state, env); - if (r != 0) goto err; - } - /* initialize local variables */ - and_start = 1; - state = CCS_START; - - if (IS_NOT_NULL(prev_cc)) { - r = and_cclass(prev_cc, cc, env); - if (r != 0) goto err; - bbuf_free(cc->mbuf); - } - else { - prev_cc = cc; - cc = &work_cc; - } - initialize_cclass(cc); - } - break; - - case TK_EOT: - r = ONIGERR_PREMATURE_END_OF_CHAR_CLASS; - goto err; - break; - default: - r = ONIGERR_PARSER_BUG; - goto err; - break; - } - - if (fetched) - r = tok->type; - else { - r = fetch_token_in_cc(tok, &p, end, env); - if (r < 0) goto err; - } - } - - if (state == CCS_VALUE) { - r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type, - &val_type, &state, env); - if (r != 0) goto err; - } - - if (IS_NOT_NULL(prev_cc)) { - r = and_cclass(prev_cc, cc, env); - if (r != 0) goto err; - bbuf_free(cc->mbuf); - cc = prev_cc; - } - - if (neg != 0) - NCCLASS_SET_NOT(cc); - else - NCCLASS_CLEAR_NOT(cc); - if (IS_NCCLASS_NOT(cc) && - IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) { - int is_empty; - - is_empty = (IS_NULL(cc->mbuf) ? 1 : 0); - if (is_empty != 0) - BITSET_IS_EMPTY(cc->bs, is_empty); - - if (is_empty == 0) { -#define NEWLINE_CODE 0x0a - - if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) { - if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1) - BITSET_SET_BIT_CHKDUP(cc->bs, NEWLINE_CODE); - else - add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE); - } - } - } - *src = p; - return 0; - - err: - if (cc != NCCLASS(*np)) - bbuf_free(cc->mbuf); - return r; -} - -static int parse_subexp(Node** top, OnigToken* tok, int term, - UChar** src, UChar* end, ScanEnv* env); - -static int -parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, - ScanEnv* env) -{ - int r, num; - Node *target; - OnigOptionType option; - OnigCodePoint c; - OnigEncoding enc = env->enc; - -#ifdef USE_NAMED_GROUP - int list_capture; -#endif - - UChar* p = *src; - PFETCH_READY; - - *np = NULL; - if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS; - - option = env->option; - if (PPEEK_IS('?') && - IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) { - PINC; - if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; - - PFETCH(c); - switch (c) { - case ':': /* (?:...) grouping only */ - group: - r = fetch_token(tok, &p, end, env); - if (r < 0) return r; - r = parse_subexp(np, tok, term, &p, end, env); - if (r < 0) return r; - *src = p; - return 1; /* group */ - break; - - case '=': - *np = onig_node_new_anchor(ANCHOR_PREC_READ); - break; - case '!': /* preceding read */ - *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT); - break; - case '>': /* (?>...) stop backtrack */ - *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK); - break; - -#ifdef USE_NAMED_GROUP - case '\'': - if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { - goto named_group1; - } - else - return ONIGERR_UNDEFINED_GROUP_OPTION; - break; -#endif - - case '<': /* look behind (?<=...), (?<!...) */ - PFETCH(c); - if (c == '=') - *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND); - else if (c == '!') - *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT); -#ifdef USE_NAMED_GROUP - else { - if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { - UChar *name; - UChar *name_end; - - PUNFETCH; - c = '<'; - - named_group1: - list_capture = 0; - - named_group2: - name = p; - r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num, 0); - if (r < 0) return r; - - num = scan_env_add_mem_entry(env); - if (num < 0) return num; - if (list_capture != 0 && num >= (int )BIT_STATUS_BITS_NUM) - return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY; - - r = name_add(env->reg, name, name_end, num, env); - if (r != 0) return r; - *np = node_new_enclose_memory(env->option, 1); - CHECK_NULL_RETURN_MEMERR(*np); - NENCLOSE(*np)->regnum = num; - if (list_capture != 0) - BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num); - env->num_named++; - } - else { - return ONIGERR_UNDEFINED_GROUP_OPTION; - } - } -#else - else { - return ONIGERR_UNDEFINED_GROUP_OPTION; - } -#endif - break; - - case '@': - if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) { -#ifdef USE_NAMED_GROUP - if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { - PFETCH(c); - if (c == '<' || c == '\'') { - list_capture = 1; - goto named_group2; /* (?@<name>...) */ - } - PUNFETCH; - } -#endif - *np = node_new_enclose_memory(env->option, 0); - CHECK_NULL_RETURN_MEMERR(*np); - num = scan_env_add_mem_entry(env); - if (num < 0) { - onig_node_free(*np); - return num; - } - else if (num >= (int )BIT_STATUS_BITS_NUM) { - onig_node_free(*np); - return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY; - } - NENCLOSE(*np)->regnum = num; - BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num); - } - else { - return ONIGERR_UNDEFINED_GROUP_OPTION; - } - break; - - case '-': case 'i': case 'm': case 's': case 'x': - { - int neg = 0; - - while (1) { - switch (c) { - case ':': - case ')': - break; - - case '-': neg = 1; break; - case 'x': ONOFF(option, ONIG_OPTION_EXTEND, neg); break; - case 'i': ONOFF(option, ONIG_OPTION_IGNORECASE, neg); break; - case 's': - if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) { - ONOFF(option, ONIG_OPTION_MULTILINE, neg); - } - else - return ONIGERR_UNDEFINED_GROUP_OPTION; - break; - - case 'm': - if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) { - ONOFF(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0)); - } - else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) { - ONOFF(option, ONIG_OPTION_MULTILINE, neg); - } - else - return ONIGERR_UNDEFINED_GROUP_OPTION; - break; - default: - return ONIGERR_UNDEFINED_GROUP_OPTION; - } - - if (c == ')') { - *np = node_new_option(option); - CHECK_NULL_RETURN_MEMERR(*np); - *src = p; - return 2; /* option only */ - } - else if (c == ':') { - OnigOptionType prev = env->option; - - env->option = option; - r = fetch_token(tok, &p, end, env); - if (r < 0) return r; - r = parse_subexp(&target, tok, term, &p, end, env); - env->option = prev; - if (r < 0) return r; - *np = node_new_option(option); - CHECK_NULL_RETURN_MEMERR(*np); - NENCLOSE(*np)->target = target; - *src = p; - return 0; - } - - if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; - PFETCH(c); - } - } - break; - - default: - return ONIGERR_UNDEFINED_GROUP_OPTION; - } - } - else { - if (ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_DONT_CAPTURE_GROUP)) - goto group; - - *np = node_new_enclose_memory(env->option, 0); - CHECK_NULL_RETURN_MEMERR(*np); - num = scan_env_add_mem_entry(env); - if (num < 0) return num; - NENCLOSE(*np)->regnum = num; - } - - CHECK_NULL_RETURN_MEMERR(*np); - r = fetch_token(tok, &p, end, env); - if (r < 0) return r; - r = parse_subexp(&target, tok, term, &p, end, env); - if (r < 0) { - onig_node_free(target); - return r; - } - - if (NTYPE(*np) == NT_ANCHOR) - NANCHOR(*np)->target = target; - else { - NENCLOSE(*np)->target = target; - if (NENCLOSE(*np)->type == ENCLOSE_MEMORY) { - /* Don't move this to previous of parse_subexp() */ - r = scan_env_set_mem_node(env, NENCLOSE(*np)->regnum, *np); - if (r != 0) return r; - } - } - - *src = p; - return 0; -} - -static const char* const PopularQStr[] = { - "?", "*", "+", "??", "*?", "+?" -}; - -static const char* const ReduceQStr[] = { - "", "", "*", "*?", "??", "+ and ??", "+? and ?" -}; - -static int -set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env) -{ - QtfrNode* qn; - - qn = NQTFR(qnode); - if (qn->lower == 1 && qn->upper == 1) { - return 1; - } - - switch (NTYPE(target)) { - case NT_STR: - if (! group) { - StrNode* sn = NSTR(target); - if (str_node_can_be_split(sn, env->enc)) { - Node* n = str_node_split_last_char(sn, env->enc); - if (IS_NOT_NULL(n)) { - qn->target = n; - return 2; - } - } - } - break; - - case NT_QTFR: - { /* check redundant double repeat. */ - /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */ - QtfrNode* qnt = NQTFR(target); - int nestq_num = popular_quantifier_num(qn); - int targetq_num = popular_quantifier_num(qnt); - -#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR - if (!IS_QUANTIFIER_BY_NUMBER(qn) && !IS_QUANTIFIER_BY_NUMBER(qnt) && - IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) { - UChar buf[WARN_BUFSIZE]; - - switch(ReduceTypeTable[targetq_num][nestq_num]) { - case RQ_ASIS: - break; - - case RQ_DEL: - if (onig_verb_warn != onig_null_warn) { - onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, - env->pattern, env->pattern_end, - (UChar* )"redundant nested repeat operator"); - (*onig_verb_warn)((char* )buf); - } - goto warn_exit; - break; - - default: - if (onig_verb_warn != onig_null_warn) { - onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, - env->pattern, env->pattern_end, - (UChar* )"nested repeat operator %s and %s was replaced with '%s'", - PopularQStr[targetq_num], PopularQStr[nestq_num], - ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]); - (*onig_verb_warn)((char* )buf); - } - goto warn_exit; - break; - } - } - - warn_exit: -#endif - if (targetq_num >= 0) { - if (nestq_num >= 0) { - onig_reduce_nested_quantifier(qnode, target); - goto q_exit; - } - else if (targetq_num == 1 || targetq_num == 2) { /* * or + */ - /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */ - if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) { - qn->upper = (qn->lower == 0 ? 1 : qn->lower); - } - } - } - } - break; - - default: - break; - } - - qn->target = target; - q_exit: - return 0; -} - - -#ifdef USE_SHARED_CCLASS_TABLE - -#define THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS 8 - -/* for ctype node hash table */ - -typedef struct { - OnigEncoding enc; - int is_not; - int type; -} type_cclass_key; - -static int type_cclass_cmp(type_cclass_key* x, type_cclass_key* y) -{ - if (x->type != y->type) return 1; - if (x->enc != y->enc) return 1; - if (x->is_not != y->is_not) return 1; - return 0; -} - -static st_index_t type_cclass_hash(type_cclass_key* key) -{ - int i, val; - UChar *p; - - val = 0; - - p = (UChar* )&(key->enc); - for (i = 0; i < (int )sizeof(key->enc); i++) { - val = val * 997 + (int )*p++; - } - - p = (UChar* )(&key->type); - for (i = 0; i < (int )sizeof(key->type); i++) { - val = val * 997 + (int )*p++; - } - - val += key->is_not; - return val + (val >> 5); -} - -static const struct st_hash_type type_type_cclass_hash = { - type_cclass_cmp, - type_cclass_hash, -}; - -static st_table* OnigTypeCClassTable; - - -static enum st_retval -i_free_shared_class(type_cclass_key* key, Node* node, void* arg ARG_UNUSED) -{ - if (IS_NOT_NULL(node)) { - CClassNode* cc = NCCLASS(node); - if (IS_NOT_NULL(cc->mbuf)) xfree(cc->mbuf); - xfree(node); - } - - if (IS_NOT_NULL(key)) xfree(key); - return ST_DELETE; -} - -extern int -onig_free_shared_cclass_table(void) -{ - THREAD_ATOMIC_START; - if (IS_NOT_NULL(OnigTypeCClassTable)) { - onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0); - onig_st_free_table(OnigTypeCClassTable); - OnigTypeCClassTable = NULL; - } - THREAD_ATOMIC_END; - - return 0; -} - -#endif /* USE_SHARED_CCLASS_TABLE */ - - -#ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS -static int -clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc) -{ - BBuf *tbuf; - int r; - - if (IS_NCCLASS_NOT(cc)) { - bitset_invert(cc->bs); - - if (! ONIGENC_IS_SINGLEBYTE(enc)) { - r = not_code_range_buf(enc, cc->mbuf, &tbuf); - if (r != 0) return r; - - bbuf_free(cc->mbuf); - cc->mbuf = tbuf; - } - - NCCLASS_CLEAR_NOT(cc); - } - - return 0; -} -#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */ - -typedef struct { - ScanEnv* env; - CClassNode* cc; - Node* alt_root; - Node** ptail; -} IApplyCaseFoldArg; - -static int -i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], - int to_len, void* arg) -{ - IApplyCaseFoldArg* iarg; - ScanEnv* env; - CClassNode* cc; - BitSetRef bs; - - iarg = (IApplyCaseFoldArg* )arg; - env = iarg->env; - cc = iarg->cc; - bs = cc->bs; - - if (to_len == 1) { - int is_in = onig_is_code_in_cc(env->enc, from, cc); -#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS - if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) || - (is_in == 0 && IS_NCCLASS_NOT(cc))) { - if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) { - add_code_range0(&(cc->mbuf), env, *to, *to, 0); - } - else { - BITSET_SET_BIT(bs, *to); - } - } -#else - if (is_in != 0) { - if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) { - if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc); - add_code_range0(&(cc->mbuf), env, *to, *to, 0); - } - else { - if (IS_NCCLASS_NOT(cc)) { - BITSET_CLEAR_BIT(bs, *to); - } - else - BITSET_SET_BIT(bs, *to); - } - } -#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */ - } - else { - int r, i, len; - UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; - Node *snode = NULL_NODE; - - if (onig_is_code_in_cc(env->enc, from, cc) -#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS - && !IS_NCCLASS_NOT(cc) -#endif - ) { - for (i = 0; i < to_len; i++) { - len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf); - if (i == 0) { - snode = onig_node_new_str(buf, buf + len); - CHECK_NULL_RETURN_MEMERR(snode); - - /* char-class expanded multi-char only - compare with string folded at match time. */ - NSTRING_SET_AMBIG(snode); - } - else { - r = onig_node_str_cat(snode, buf, buf + len); - if (r < 0) { - onig_node_free(snode); - return r; - } - } - } - - *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE); - CHECK_NULL_RETURN_MEMERR(*(iarg->ptail)); - iarg->ptail = &(NCDR((*(iarg->ptail)))); - } - } - - return 0; -} - -static int -parse_exp(Node** np, OnigToken* tok, int term, - UChar** src, UChar* end, ScanEnv* env) -{ - int r, len, group = 0; - Node* qn; - Node** targetp; - - *np = NULL; - if (tok->type == (enum TokenSyms )term) - goto end_of_token; - - switch (tok->type) { - case TK_ALT: - case TK_EOT: - end_of_token: - *np = node_new_empty(); - return tok->type; - - case TK_SUBEXP_OPEN: - r = parse_enclose(np, tok, TK_SUBEXP_CLOSE, src, end, env); - if (r < 0) return r; - if (r == 1) group = 1; - else if (r == 2) { /* option only */ - Node* target; - OnigOptionType prev = env->option; - - env->option = NENCLOSE(*np)->option; - r = fetch_token(tok, src, end, env); - if (r < 0) return r; - r = parse_subexp(&target, tok, term, src, end, env); - env->option = prev; - if (r < 0) { - onig_node_free(target); - return r; - } - NENCLOSE(*np)->target = target; - return tok->type; - } - break; - - case TK_SUBEXP_CLOSE: - if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP)) - return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS; - - if (tok->escaped) goto tk_raw_byte; - else goto tk_byte; - break; - - case TK_STRING: - tk_byte: - { - *np = node_new_str(tok->backp, *src); - CHECK_NULL_RETURN_MEMERR(*np); - - while (1) { - r = fetch_token(tok, src, end, env); - if (r < 0) return r; - if (r != TK_STRING) break; - - r = onig_node_str_cat(*np, tok->backp, *src); - if (r < 0) return r; - } - - string_end: - targetp = np; - goto repeat; - } - break; - - case TK_RAW_BYTE: - tk_raw_byte: - { - *np = node_new_str_raw_char((UChar )tok->u.c); - CHECK_NULL_RETURN_MEMERR(*np); - len = 1; - while (1) { - if (len >= ONIGENC_MBC_MINLEN(env->enc)) { - if (len == enclen(env->enc, NSTR(*np)->s, NSTR(*np)->end)) { - r = fetch_token(tok, src, end, env); - NSTRING_CLEAR_RAW(*np); - goto string_end; - } - } - - r = fetch_token(tok, src, end, env); - if (r < 0) return r; - if (r != TK_RAW_BYTE) { - /* Don't use this, it is wrong for little endian encodings. */ - return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING; - } - - r = node_str_cat_char(*np, (UChar )tok->u.c); - if (r < 0) return r; - - len++; - } - } - break; - - case TK_CODE_POINT: - { - UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; - int num = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf); - if (num < 0) return num; -#ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG - *np = node_new_str_raw(buf, buf + num); -#else - *np = node_new_str(buf, buf + num); -#endif - CHECK_NULL_RETURN_MEMERR(*np); - } - break; - - case TK_QUOTE_OPEN: - { - OnigCodePoint end_op[2]; - UChar *qstart, *qend, *nextp; - - end_op[0] = (OnigCodePoint )MC_ESC(env->syntax); - end_op[1] = (OnigCodePoint )'E'; - qstart = *src; - qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc); - if (IS_NULL(qend)) { - nextp = qend = end; - } - *np = node_new_str(qstart, qend); - CHECK_NULL_RETURN_MEMERR(*np); - *src = nextp; - } - break; - - case TK_CHAR_TYPE: - { - switch (tok->u.prop.ctype) { - case ONIGENC_CTYPE_D: - case ONIGENC_CTYPE_S: - case ONIGENC_CTYPE_W: - { - CClassNode* cc; - *np = node_new_cclass(); - CHECK_NULL_RETURN_MEMERR(*np); - cc = NCCLASS(*np); - add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env); - if (tok->u.prop.is_not != 0) NCCLASS_SET_NOT(cc); - } - break; - - case ONIGENC_CTYPE_WORD: - *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.is_not); - CHECK_NULL_RETURN_MEMERR(*np); - break; - - case ONIGENC_CTYPE_SPACE: - case ONIGENC_CTYPE_DIGIT: - case ONIGENC_CTYPE_XDIGIT: - { - CClassNode* cc; - -#ifdef USE_SHARED_CCLASS_TABLE - const OnigCodePoint *mbr; - OnigCodePoint sb_out; - - r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, tok->u.prop.ctype, - &sb_out, &mbr); - if (r == 0 && - ONIGENC_CODE_RANGE_NUM(mbr) - >= THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS) { - type_cclass_key key; - type_cclass_key* new_key; - - key.enc = env->enc; - key.is_not = tok->u.prop.is_not; - key.type = tok->u.prop.ctype; - - THREAD_ATOMIC_START; - - if (IS_NULL(OnigTypeCClassTable)) { - OnigTypeCClassTable - = onig_st_init_table_with_size(&type_type_cclass_hash, 10); - if (IS_NULL(OnigTypeCClassTable)) { - THREAD_ATOMIC_END; - return ONIGERR_MEMORY; - } - } - else { - if (onig_st_lookup(OnigTypeCClassTable, (st_data_t )&key, - (st_data_t* )np)) { - THREAD_ATOMIC_END; - break; - } - } - - *np = node_new_cclass_by_codepoint_range(tok->u.prop.is_not, - sb_out, mbr); - if (IS_NULL(*np)) { - THREAD_ATOMIC_END; - return ONIGERR_MEMORY; - } - - cc = NCCLASS(*np); - NCCLASS_SET_SHARE(cc); - new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key)); - xmemcpy(new_key, &key, sizeof(type_cclass_key)); - onig_st_add_direct(OnigTypeCClassTable, (st_data_t )new_key, - (st_data_t )*np); - - THREAD_ATOMIC_END; - } - else { -#endif - *np = node_new_cclass(); - CHECK_NULL_RETURN_MEMERR(*np); - cc = NCCLASS(*np); - add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env); - if (tok->u.prop.is_not != 0) NCCLASS_SET_NOT(cc); -#ifdef USE_SHARED_CCLASS_TABLE - } -#endif - } - break; - - default: - return ONIGERR_PARSER_BUG; - break; - } - } - break; - - case TK_CHAR_PROPERTY: - r = parse_char_property(np, tok, src, end, env); - if (r != 0) return r; - break; - - case TK_CC_OPEN: - { - CClassNode* cc; - - r = parse_char_class(np, tok, src, end, env); - if (r != 0) return r; - - cc = NCCLASS(*np); - if (IS_IGNORECASE(env->option)) { - IApplyCaseFoldArg iarg; - - iarg.env = env; - iarg.cc = cc; - iarg.alt_root = NULL_NODE; - iarg.ptail = &(iarg.alt_root); - - r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag, - i_apply_case_fold, &iarg); - if (r != 0) { - onig_node_free(iarg.alt_root); - return r; - } - if (IS_NOT_NULL(iarg.alt_root)) { - Node* work = onig_node_new_alt(*np, iarg.alt_root); - if (IS_NULL(work)) { - onig_node_free(iarg.alt_root); - return ONIGERR_MEMORY; - } - *np = work; - } - } - } - break; - - case TK_ANYCHAR: - *np = node_new_anychar(); - CHECK_NULL_RETURN_MEMERR(*np); - break; - - case TK_ANYCHAR_ANYTIME: - *np = node_new_anychar(); - CHECK_NULL_RETURN_MEMERR(*np); - qn = node_new_quantifier(0, REPEAT_INFINITE, 0); - CHECK_NULL_RETURN_MEMERR(qn); - NQTFR(qn)->target = *np; - *np = qn; - break; - - case TK_BACKREF: - len = tok->u.backref.num; - *np = node_new_backref(len, - (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)), - tok->u.backref.by_name, -#ifdef USE_BACKREF_WITH_LEVEL - tok->u.backref.exist_level, - tok->u.backref.level, -#endif - env); - CHECK_NULL_RETURN_MEMERR(*np); - break; - -#ifdef USE_SUBEXP_CALL - case TK_CALL: - { - int gnum = tok->u.call.gnum; - - if (gnum < 0) { - gnum = BACKREF_REL_TO_ABS(gnum, env); - if (gnum <= 0) - return ONIGERR_INVALID_BACKREF; - } - *np = node_new_call(tok->u.call.name, tok->u.call.name_end, gnum); - CHECK_NULL_RETURN_MEMERR(*np); - env->num_call++; - } - break; -#endif - - case TK_ANCHOR: - *np = onig_node_new_anchor(tok->u.anchor); - break; - - case TK_OP_REPEAT: - case TK_INTERVAL: - if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) { - if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS)) - return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED; - else - *np = node_new_empty(); - } - else { - goto tk_byte; - } - break; - - default: - return ONIGERR_PARSER_BUG; - break; - } - - { - targetp = np; - - re_entry: - r = fetch_token(tok, src, end, env); - if (r < 0) return r; - - repeat: - if (r == TK_OP_REPEAT || r == TK_INTERVAL) { - if (is_invalid_quantifier_target(*targetp)) - return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID; - - qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper, - (r == TK_INTERVAL ? 1 : 0)); - CHECK_NULL_RETURN_MEMERR(qn); - NQTFR(qn)->greedy = tok->u.repeat.greedy; - r = set_quantifier(qn, *targetp, group, env); - if (r < 0) { - onig_node_free(qn); - return r; - } - - if (tok->u.repeat.possessive != 0) { - Node* en; - en = node_new_enclose(ENCLOSE_STOP_BACKTRACK); - if (IS_NULL(en)) { - onig_node_free(qn); - return ONIGERR_MEMORY; - } - NENCLOSE(en)->target = qn; - qn = en; - } - - if (r == 0) { - *targetp = qn; - } - else if (r == 1) { - onig_node_free(qn); - } - else if (r == 2) { /* split case: /abc+/ */ - Node *tmp; - - *targetp = node_new_list(*targetp, NULL); - if (IS_NULL(*targetp)) { - onig_node_free(qn); - return ONIGERR_MEMORY; - } - tmp = NCDR(*targetp) = node_new_list(qn, NULL); - if (IS_NULL(tmp)) { - onig_node_free(qn); - return ONIGERR_MEMORY; - } - targetp = &(NCAR(tmp)); - } - goto re_entry; - } - } - - return r; -} - -static int -parse_branch(Node** top, OnigToken* tok, int term, - UChar** src, UChar* end, ScanEnv* env) -{ - int r; - Node *node, **headp; - - *top = NULL; - r = parse_exp(&node, tok, term, src, end, env); - if (r < 0) { - onig_node_free(node); - return r; - } - - if (r == TK_EOT || r == term || r == TK_ALT) { - *top = node; - } - else { - *top = node_new_list(node, NULL); - headp = &(NCDR(*top)); - while (r != TK_EOT && r != term && r != TK_ALT) { - r = parse_exp(&node, tok, term, src, end, env); - if (r < 0) { - onig_node_free(node); - return r; - } - - if (NTYPE(node) == NT_LIST) { - *headp = node; - while (IS_NOT_NULL(NCDR(node))) node = NCDR(node); - headp = &(NCDR(node)); - } - else { - *headp = node_new_list(node, NULL); - headp = &(NCDR(*headp)); - } - } - } - - return r; -} - -/* term_tok: TK_EOT or TK_SUBEXP_CLOSE */ -static int -parse_subexp(Node** top, OnigToken* tok, int term, - UChar** src, UChar* end, ScanEnv* env) -{ - int r; - Node *node, **headp; - - *top = NULL; - r = parse_branch(&node, tok, term, src, end, env); - if (r < 0) { - onig_node_free(node); - return r; - } - - if (r == term) { - *top = node; - } - else if (r == TK_ALT) { - *top = onig_node_new_alt(node, NULL); - headp = &(NCDR(*top)); - while (r == TK_ALT) { - r = fetch_token(tok, src, end, env); - if (r < 0) return r; - r = parse_branch(&node, tok, term, src, end, env); - if (r < 0) { - onig_node_free(node); - return r; - } - - *headp = onig_node_new_alt(node, NULL); - headp = &(NCDR(*headp)); - } - - if (tok->type != (enum TokenSyms )term) - goto err; - } - else { - onig_node_free(node); - err: - if (term == TK_SUBEXP_CLOSE) - return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS; - else - return ONIGERR_PARSER_BUG; - } - - return r; -} - -static int -parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env) -{ - int r; - OnigToken tok; - - r = fetch_token(&tok, src, end, env); - if (r < 0) return r; - r = parse_subexp(top, &tok, TK_EOT, src, end, env); - if (r < 0) return r; - return 0; -} - -extern int -onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end, - regex_t* reg, ScanEnv* env) -{ - int r; - UChar* p; - -#ifdef USE_NAMED_GROUP - names_clear(reg); -#endif - - scan_env_clear(env); - env->option = reg->options; - env->case_fold_flag = reg->case_fold_flag; - env->enc = reg->enc; - env->syntax = reg->syntax; - env->pattern = (UChar* )pattern; - env->pattern_end = (UChar* )end; - env->reg = reg; - - *root = NULL; - p = (UChar* )pattern; - r = parse_regexp(root, &p, (UChar* )end, env); - reg->num_mem = env->num_mem; - return r; -} - -extern void -onig_scan_env_set_error_string(ScanEnv* env, int ecode ARG_UNUSED, - UChar* arg, UChar* arg_end) -{ - env->error = arg; - env->error_end = arg_end; -} -#endif //ENABLE_REGEXP diff --git a/src/regparse.h b/src/regparse.h deleted file mode 100644 index 1f7855df8..000000000 --- a/src/regparse.h +++ /dev/null @@ -1,354 +0,0 @@ -#ifndef ONIGURUMA_REGPARSE_H -#define ONIGURUMA_REGPARSE_H -/********************************************************************** - regparse.h - Oniguruma (regular expression library) -**********************************************************************/ -/*- - * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include "regint.h" - -/* node type */ -#define NT_STR 0 -#define NT_CCLASS 1 -#define NT_CTYPE 2 -#define NT_CANY 3 -#define NT_BREF 4 -#define NT_QTFR 5 -#define NT_ENCLOSE 6 -#define NT_ANCHOR 7 -#define NT_LIST 8 -#define NT_ALT 9 -#define NT_CALL 10 - -/* node type bit */ -#define NTYPE2BIT(type) (1<<(type)) - -#define BIT_NT_STR NTYPE2BIT(NT_STR) -#define BIT_NT_CCLASS NTYPE2BIT(NT_CCLASS) -#define BIT_NT_CTYPE NTYPE2BIT(NT_CTYPE) -#define BIT_NT_CANY NTYPE2BIT(NT_CANY) -#define BIT_NT_BREF NTYPE2BIT(NT_BREF) -#define BIT_NT_QTFR NTYPE2BIT(NT_QTFR) -#define BIT_NT_ENCLOSE NTYPE2BIT(NT_ENCLOSE) -#define BIT_NT_ANCHOR NTYPE2BIT(NT_ANCHOR) -#define BIT_NT_LIST NTYPE2BIT(NT_LIST) -#define BIT_NT_ALT NTYPE2BIT(NT_ALT) -#define BIT_NT_CALL NTYPE2BIT(NT_CALL) - -#define IS_NODE_TYPE_SIMPLE(type) \ - ((NTYPE2BIT(type) & (BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE |\ - BIT_NT_CANY | BIT_NT_BREF)) != 0) - -#define NTYPE(node) ((node)->u.base.type) -#define SET_NTYPE(node, ntype) (node)->u.base.type = (ntype) - -#define NSTR(node) (&((node)->u.str)) -#define NCCLASS(node) (&((node)->u.cclass)) -#define NCTYPE(node) (&((node)->u.ctype)) -#define NBREF(node) (&((node)->u.bref)) -#define NQTFR(node) (&((node)->u.qtfr)) -#define NENCLOSE(node) (&((node)->u.enclose)) -#define NANCHOR(node) (&((node)->u.anchor)) -#define NCONS(node) (&((node)->u.cons)) -#define NCALL(node) (&((node)->u.call)) - -#define NCAR(node) (NCONS(node)->car) -#define NCDR(node) (NCONS(node)->cdr) - - - -#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML) -#define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF) - -#define ENCLOSE_MEMORY (1<<0) -#define ENCLOSE_OPTION (1<<1) -#define ENCLOSE_STOP_BACKTRACK (1<<2) - -#define NODE_STR_MARGIN 16 -#define NODE_STR_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */ -#define NODE_BACKREFS_SIZE 6 - -#define NSTR_RAW (1<<0) /* by backslashed number */ -#define NSTR_AMBIG (1<<1) -#define NSTR_DONT_GET_OPT_INFO (1<<2) - -#define NSTRING_LEN(node) ((node)->u.str.end - (node)->u.str.s) -#define NSTRING_SET_RAW(node) (node)->u.str.flag |= NSTR_RAW -#define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NSTR_RAW -#define NSTRING_SET_AMBIG(node) (node)->u.str.flag |= NSTR_AMBIG -#define NSTRING_SET_DONT_GET_OPT_INFO(node) \ - (node)->u.str.flag |= NSTR_DONT_GET_OPT_INFO -#define NSTRING_IS_RAW(node) (((node)->u.str.flag & NSTR_RAW) != 0) -#define NSTRING_IS_AMBIG(node) (((node)->u.str.flag & NSTR_AMBIG) != 0) -#define NSTRING_IS_DONT_GET_OPT_INFO(node) \ - (((node)->u.str.flag & NSTR_DONT_GET_OPT_INFO) != 0) - -#define BACKREFS_P(br) \ - (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static); - -#define NQ_TARGET_ISNOT_EMPTY 0 -#define NQ_TARGET_IS_EMPTY 1 -#define NQ_TARGET_IS_EMPTY_MEM 2 -#define NQ_TARGET_IS_EMPTY_REC 3 - -/* status bits */ -#define NST_MIN_FIXED (1<<0) -#define NST_MAX_FIXED (1<<1) -#define NST_CLEN_FIXED (1<<2) -#define NST_MARK1 (1<<3) -#define NST_MARK2 (1<<4) -#define NST_MEM_BACKREFED (1<<5) -#define NST_STOP_BT_SIMPLE_REPEAT (1<<6) -#define NST_RECURSION (1<<7) -#define NST_CALLED (1<<8) -#define NST_ADDR_FIXED (1<<9) -#define NST_NAMED_GROUP (1<<10) -#define NST_NAME_REF (1<<11) -#define NST_IN_REPEAT (1<<12) /* STK_REPEAT is nested in stack. */ -#define NST_NEST_LEVEL (1<<13) -#define NST_BY_NUMBER (1<<14) /* {n,m} */ - -#define SET_ENCLOSE_STATUS(node,f) (node)->u.enclose.state |= (f) -#define CLEAR_ENCLOSE_STATUS(node,f) (node)->u.enclose.state &= ~(f) - -#define IS_ENCLOSE_CALLED(en) (((en)->state & NST_CALLED) != 0) -#define IS_ENCLOSE_ADDR_FIXED(en) (((en)->state & NST_ADDR_FIXED) != 0) -#define IS_ENCLOSE_RECURSION(en) (((en)->state & NST_RECURSION) != 0) -#define IS_ENCLOSE_MARK1(en) (((en)->state & NST_MARK1) != 0) -#define IS_ENCLOSE_MARK2(en) (((en)->state & NST_MARK2) != 0) -#define IS_ENCLOSE_MIN_FIXED(en) (((en)->state & NST_MIN_FIXED) != 0) -#define IS_ENCLOSE_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0) -#define IS_ENCLOSE_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0) -#define IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(en) \ - (((en)->state & NST_STOP_BT_SIMPLE_REPEAT) != 0) -#define IS_ENCLOSE_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0) - -#define SET_CALL_RECURSION(node) (node)->u.call.state |= NST_RECURSION -#define IS_CALL_RECURSION(cn) (((cn)->state & NST_RECURSION) != 0) -#define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0) -#define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0) -#define IS_BACKREF_NEST_LEVEL(bn) (((bn)->state & NST_NEST_LEVEL) != 0) -#define IS_QUANTIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0) -#define IS_QUANTIFIER_BY_NUMBER(qn) (((qn)->state & NST_BY_NUMBER) != 0) - -#define CALLNODE_REFNUM_UNDEF -1 - -typedef struct { - NodeBase base; - UChar* s; - UChar* end; - unsigned int flag; - int capa; /* (allocated size - 1) or 0: use buf[] */ - UChar buf[NODE_STR_BUF_SIZE]; -} StrNode; - -typedef struct { - NodeBase base; - int state; - struct _Node* target; - int lower; - int upper; - int greedy; - int target_empty_info; - struct _Node* head_exact; - struct _Node* next_head_exact; - int is_refered; /* include called node. don't eliminate even if {0} */ -#ifdef USE_COMBINATION_EXPLOSION_CHECK - int comb_exp_check_num; /* 1,2,3...: check, 0: no check */ -#endif -} QtfrNode; - -typedef struct { - NodeBase base; - int state; - int type; - int regnum; - OnigOptionType option; - struct _Node* target; - AbsAddrType call_addr; - /* for multiple call reference */ - OnigDistance min_len; /* min length (byte) */ - OnigDistance max_len; /* max length (byte) */ - int char_len; /* character length */ - int opt_count; /* referenced count in optimize_node_left() */ -} EncloseNode; - -#ifdef USE_SUBEXP_CALL - -typedef struct { - int offset; - struct _Node* target; -} UnsetAddr; - -typedef struct { - int num; - int alloc; - UnsetAddr* us; -} UnsetAddrList; - -typedef struct { - NodeBase base; - int state; - int group_num; - UChar* name; - UChar* name_end; - struct _Node* target; /* EncloseNode : ENCLOSE_MEMORY */ - UnsetAddrList* unset_addr_list; -} CallNode; - -#endif - -typedef struct { - NodeBase base; - int state; - int back_num; - int back_static[NODE_BACKREFS_SIZE]; - int* back_dynamic; - int nest_level; -} BRefNode; - -typedef struct { - NodeBase base; - int type; - struct _Node* target; - int char_len; -} AnchorNode; - -typedef struct { - NodeBase base; - struct _Node* car; - struct _Node* cdr; -} ConsAltNode; - -typedef struct { - NodeBase base; - int ctype; - int is_not; -} CtypeNode; - -typedef struct _Node { - union { - NodeBase base; - StrNode str; - CClassNode cclass; - QtfrNode qtfr; - EncloseNode enclose; - BRefNode bref; - AnchorNode anchor; - ConsAltNode cons; - CtypeNode ctype; -#ifdef USE_SUBEXP_CALL - CallNode call; -#endif - } u; -} Node; - - -#define NULL_NODE ((Node* )0) - -#define SCANENV_MEMNODES_SIZE 8 -#define SCANENV_MEM_NODES(senv) \ - (IS_NOT_NULL((senv)->mem_nodes_dynamic) ? \ - (senv)->mem_nodes_dynamic : (senv)->mem_nodes_static) - -typedef struct { - OnigOptionType option; - OnigCaseFoldType case_fold_flag; - OnigEncoding enc; - const OnigSyntaxType* syntax; - BitStatusType capture_history; - BitStatusType bt_mem_start; - BitStatusType bt_mem_end; - BitStatusType backrefed_mem; - UChar* pattern; - UChar* pattern_end; - UChar* error; - UChar* error_end; - regex_t* reg; /* for reg->names only */ - int num_call; -#ifdef USE_SUBEXP_CALL - UnsetAddrList* unset_addr_list; -#endif - int num_mem; -#ifdef USE_NAMED_GROUP - int num_named; -#endif - int mem_alloc; - Node* mem_nodes_static[SCANENV_MEMNODES_SIZE]; - Node** mem_nodes_dynamic; -#ifdef USE_COMBINATION_EXPLOSION_CHECK - int num_comb_exp_check; - int comb_exp_max_regnum; - int curr_max_regnum; - int has_recursion; -#endif - int warnings_flag; - const char* sourcefile; - int sourceline; -} ScanEnv; - - -#define IS_SYNTAX_OP(syn, opm) (((syn)->op & (opm)) != 0) -#define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0) -#define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0) - -#ifdef USE_NAMED_GROUP -typedef struct { - int new_val; -} GroupNumRemap; - -extern int onig_renumber_name_table(regex_t* reg, GroupNumRemap* map); -#endif - -extern int onig_strncmp(const UChar* s1, const UChar* s2, int n); -extern void onig_strcpy(UChar* dest, const UChar* src, const UChar* end); -extern void onig_scan_env_set_error_string(ScanEnv* env, int ecode, UChar* arg, UChar* arg_end); -extern int onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc); -extern void onig_reduce_nested_quantifier(Node* pnode, Node* cnode); -extern void onig_node_conv_to_str_node(Node* node, int raw); -extern int onig_node_str_cat(Node* node, const UChar* s, const UChar* end); -extern int onig_node_str_set(Node* node, const UChar* s, const UChar* end); -extern void onig_node_free(Node* node); -extern Node* onig_node_new_enclose(int type); -extern Node* onig_node_new_anchor(int type); -extern Node* onig_node_new_str(const UChar* s, const UChar* end); -extern Node* onig_node_new_list(Node* left, Node* right); -extern Node* onig_node_list_add(Node* list, Node* x); -extern Node* onig_node_new_alt(Node* left, Node* right); -extern void onig_node_str_clear(Node* node); -extern int onig_free_node_list(void); -extern int onig_names_free(regex_t* reg); -extern int onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env); -extern int onig_free_shared_cclass_table(void); - -#ifdef ONIG_DEBUG -#ifdef USE_NAMED_GROUP -extern int onig_print_names(FILE*, regex_t*); -#endif -#endif - -#endif /* ONIGURUMA_REGPARSE_H */ diff --git a/src/sprintf.c b/src/sprintf.c index 750b17d25..905182147 100644 --- a/src/sprintf.c +++ b/src/sprintf.c @@ -10,7 +10,6 @@ #include <stdio.h> #include <string.h> -#include "encoding.h" #include "mruby/string.h" #include "mruby/hash.h" #include "mruby/numeric.h" diff --git a/src/string.c b/src/string.c index 00b08fc36..a778e4ed4 100644 --- a/src/string.c +++ b/src/string.c @@ -8,22 +8,16 @@ #include <string.h> #include "mruby/string.h" +#include "mruby/class.h" #include <ctype.h> #include <limits.h> #include "mruby/range.h" #include "mruby/array.h" #include "mruby/class.h" #include <stdio.h> -#ifdef ENABLE_REGEXP -#include "re.h" -#include "regex.h" -#endif //ENABLE_REGEXP const char mrb_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz"; -#ifdef ENABLE_REGEXP -static mrb_value get_pat(mrb_state *mrb, mrb_value pat, mrb_int quote); -#endif //ENABLE_REGEXP static mrb_value str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2); static mrb_value mrb_str_subseq(mrb_state *mrb, mrb_value str, int beg, int len); @@ -32,6 +26,12 @@ static mrb_value mrb_str_subseq(mrb_state *mrb, mrb_value str, int beg, int len) s->aux.capa = capacity;\ } while (0) +static const char* +_obj_classname(mrb_state *mrb, mrb_value obj) +{ + return mrb_class_name(mrb, mrb_obj_class(mrb, obj)); +} + void mrb_str_decref(mrb_state *mrb, struct mrb_shared_string *shared) { @@ -739,6 +739,9 @@ mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx) { long idx; + if (!strcmp(_obj_classname(mrb, indx), "Regexp")) { + mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp Class not implemented"); + } switch (mrb_type(indx)) { case MRB_TT_FIXNUM: idx = mrb_fixnum(indx); @@ -748,14 +751,6 @@ num_index: if (!mrb_nil_p(str) && RSTRING_LEN(str) == 0) return mrb_nil_value(); return str; - case MRB_TT_REGEX: -#ifdef ENABLE_REGEXP - return mrb_str_subpat(mrb, str, indx, 0); //mrb_str_subpat(str, indx, INT2FIX(0)); -#else - mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported"); - return mrb_nil_value(); -#endif //ENABLE_REGEXP - case MRB_TT_STRING: if (mrb_str_index(mrb, str, indx, 0) != -1) return mrb_str_dup(mrb, indx); @@ -840,13 +835,8 @@ mrb_str_aref_m(mrb_state *mrb, mrb_value str) argc = mrb_get_args(mrb, "o|o", &a1, &a2); if (argc == 2) { - if (mrb_type(a1) == MRB_TT_REGEX) { -#ifdef ENABLE_REGEXP - return mrb_str_subpat(mrb, str, argv[0], mrb_fixnum(argv[1])); -#else - mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported"); - return mrb_nil_value(); -#endif //ENABLE_REGEXP + if (!strcmp(mrb_obj_classname(mrb, a1), "Regexp")) { + mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp Class not implemented"); } return mrb_str_substr(mrb, str, mrb_fixnum(a1), mrb_fixnum(a2)); } @@ -1215,86 +1205,6 @@ mrb_str_buf_append(mrb_state *mrb, mrb_value str, mrb_value str2) return str; } -#ifdef ENABLE_REGEXP -static mrb_value -str_gsub(mrb_state *mrb, mrb_value str, mrb_int bang) -{ - mrb_value *argv; - int argc; - mrb_value pat, val, repl, match, dest = mrb_nil_value(); - struct re_registers *regs; - mrb_int beg, n; - mrb_int beg0, end0; - mrb_int offset, blen, len, last; - char *sp, *cp; - - if (bang) str_modify(mrb, mrb_str_ptr(self)); - mrb_get_args(mrb, "*", &argv, &argc); - switch (argc) { - case 1: - /*RETURN_ENUMERATOR(str, argc, argv);*/ - break; - case 2: - repl = argv[1]; - mrb_string_value(mrb, &repl); - break; - default: - mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%d for 2)", argc); - } - - pat = get_pat(mrb, argv[0], 1); - beg = mrb_reg_search(mrb, pat, str, 0, 0); - if (beg < 0) { - if (bang) return mrb_nil_value(); /* no match, no substitution */ - return mrb_str_dup(mrb, str); - } - - offset = 0; - n = 0; - blen = RSTRING_LEN(str) + 30; - dest = mrb_str_buf_new(mrb, blen); - sp = RSTRING_PTR(str); - cp = sp; - - do { - n++; - match = mrb_backref_get(mrb); - regs = RMATCH_REGS(match); - beg0 = BEG(0); - end0 = END(0); - val = mrb_reg_regsub(mrb, repl, str, regs, pat); - - len = beg - offset; /* copy pre-match substr */ - if (len) { - mrb_str_buf_cat(mrb, dest, cp, len); - } - - mrb_str_buf_append(mrb, dest, val); - - last = offset; - offset = end0; - if (beg0 == end0) { - /* - * Always consume at least one character of the input string - * in order to prevent infinite loops. - */ - if (RSTRING_LEN(str) <= end0) break; - len = RSTRING_LEN(str)-end0; - mrb_str_buf_cat(mrb, dest, RSTRING_PTR(str)+end0, len); - offset = end0 + len; - } - cp = RSTRING_PTR(str) + offset; - if (offset > RSTRING_LEN(str)) break; - beg = mrb_reg_search(mrb, pat, str, offset, 0); - } while (beg >= 0); - if (RSTRING_LEN(str) > offset) { - mrb_str_buf_cat(mrb, dest, cp, RSTRING_LEN(str) - offset); - } - mrb_reg_search(mrb, pat, str, last, 0); - mrb_basic(dest)->c = mrb_obj_class(mrb, str); - return str; -} - /* 15.2.10.5.18 */ /* * call-seq: @@ -1331,7 +1241,8 @@ str_gsub(mrb_state *mrb, mrb_value str, mrb_int bang) static mrb_value mrb_str_gsub(mrb_state *mrb, mrb_value self) { - return str_gsub(mrb, self, 0); + mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp Class not implemented"); + return mrb_nil_value(); } /* 15.2.10.5.19 */ @@ -1346,12 +1257,9 @@ mrb_str_gsub(mrb_state *mrb, mrb_value self) static mrb_value mrb_str_gsub_bang(mrb_state *mrb, mrb_value self) { - striuct RString *s = mrb_str_ptr(self); - - str_modify(mrb, s); - return str_gsub(mrb, s, 1); + mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp Class not implemented"); + return mrb_nil_value(); } -#endif //ENABLE_REGEXP mrb_int mrb_str_hash(mrb_state *mrb, mrb_value str) @@ -1460,29 +1368,17 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str) sub = mrb_nil_value(); } + if (!strcmp(mrb_obj_classname(mrb, sub), "Regexp")) { + mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp Class not implemented"); + } if (pos < 0) { pos += RSTRING_LEN(str); if (pos < 0) { - if (mrb_type(sub) == MRB_TT_REGEX) { - mrb_raise(mrb, E_TYPE_ERROR, "Regexp class not supported"); - } return mrb_nil_value(); } } switch (mrb_type(sub)) { - case MRB_TT_REGEX: -#ifdef ENABLE_REGEXP - if (pos > RSTRING_LEN(str)) - return mrb_nil_value(); - pos = mrb_str_offset(mrb, str, pos); - pos = mrb_reg_search(mrb, sub, str, pos, 0); - pos = mrb_str_sublen(mrb, str, pos); -#else - mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported"); -#endif //ENABLE_REGEXP - break; - case MRB_TT_FIXNUM: { int c = mrb_fixnum(sub); long len = RSTRING_LEN(str); @@ -1500,7 +1396,7 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str) tmp = mrb_check_string_type(mrb, sub); if (mrb_nil_p(tmp)) { mrb_raisef(mrb, E_TYPE_ERROR, "type mismatch: %s given", - mrb_obj_classname(mrb, sub)); + _obj_classname(mrb, sub)); } sub = tmp; } @@ -1642,36 +1538,6 @@ mrb_check_string_type(mrb_state *mrb, mrb_value str) return mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str"); } -#ifdef ENABLE_REGEXP -static mrb_value -get_pat(mrb_state *mrb, mrb_value pat, mrb_int quote) -{ - mrb_value val; - - switch (mrb_type(pat)) { - case MRB_TT_REGEX: - return pat; - - case MRB_TT_STRING: - break; - - default: - val = mrb_check_string_type(mrb, pat); - if (mrb_nil_p(val)) { - //Check_Type(pat, T_REGEXP); - mrb_check_type(mrb, pat, MRB_TT_REGEX); - } - pat = val; - } - - if (quote) { - pat = mrb_reg_quote(mrb, pat); - } - - return mrb_reg_regcomp(mrb, pat); -} -#endif //ENABLE_REGEXP - /* 15.2.10.5.27 */ /* * call-seq: @@ -1685,26 +1551,12 @@ get_pat(mrb_state *mrb, mrb_value pat, mrb_int quote) * 'hello'.match(/(.)\1/)[0] #=> "ll" * 'hello'.match('xx') #=> nil */ -#ifdef ENABLE_REGEXP static mrb_value mrb_str_match_m(mrb_state *mrb, mrb_value self) { - mrb_value *argv; - int argc; - mrb_value re, result, b; - - mrb_get_args(mrb, "*&", &argv, &argc, &b); - if (argc < 1) - mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%d for 1..2)", argc); - re = argv[0]; - argv[0] = self; - result = mrb_funcall(mrb, get_pat(mrb, re, 0), "match", 1, self); - if (!mrb_nil_p(result) && mrb_block_given_p()) { - return mrb_yield(mrb, b, result); - } - return result; + mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp Class not implemented"); + return mrb_nil_value(); } -#endif //ENABLE_REGEXP /* ---------------------------------- */ /* 15.2.10.5.29 */ @@ -1855,12 +1707,8 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str) if (pos < 0) { pos += len; if (pos < 0) { - if (mrb_type(sub) == MRB_TT_REGEX) { -#ifdef ENABLE_REGEXP - mrb_backref_set(mrb, mrb_nil_value()); -#else - mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported"); -#endif //ENABLE_REGEXP + if (!strcmp(mrb_obj_classname(mrb, sub), "Regexp")) { + mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp Class not implemented"); } return mrb_nil_value(); } @@ -1874,21 +1722,11 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str) else sub = mrb_nil_value(); } + if (!strcmp(mrb_obj_classname(mrb, sub), "Regexp")) { + mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp Class not implemented"); + } switch (mrb_type(sub)) { - case MRB_TT_REGEX: -#ifdef ENABLE_REGEXP - pos = mrb_str_offset(mrb, str, pos); - if (!RREGEXP(sub)->ptr || RREGEXP_SRC_LEN(sub)) { - pos = mrb_reg_search(mrb, sub, str, pos, 1); - pos = mrb_str_sublen(mrb, str, pos); - } - if (pos >= 0) return mrb_fixnum_value(pos); -#else - mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported"); -#endif //ENABLE_REGEXP - break; - case MRB_TT_FIXNUM: { int c = mrb_fixnum(sub); long len = RSTRING_LEN(str); @@ -1920,46 +1758,6 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str) return mrb_nil_value(); } -#ifdef ENABLE_REGEXP -static mrb_value -scan_once(mrb_state *mrb, mrb_value str, mrb_value pat, mrb_int *start) -{ - mrb_value result, match; - struct re_registers *regs; - long i; - struct RString *ps = mrb_str_ptr(str); - struct RMatch *pmatch; - - if (mrb_reg_search(mrb, pat, str, *start, 0) >= 0) { - match = mrb_backref_get(mrb); - pmatch = mrb_match_ptr(match); - regs = &pmatch->rmatch->regs; - if (regs->beg[0] == regs->end[0]) { - /* - * Always consume at least one character of the input string - */ - if (ps->len > regs->end[0]) - *start = regs->end[0] + RSTRING_LEN(str)-regs->end[0]; - else - *start = regs->end[0] + 1; - } - else { - *start = regs->end[0]; - } - if (regs->num_regs == 1) { - return mrb_reg_nth_match(mrb, 0, match); - } - result = mrb_ary_new_capa(mrb, regs->num_regs); - for (i=1; i < regs->num_regs; i++) { - mrb_ary_push(mrb, result, mrb_reg_nth_match(mrb, i, match)); - } - - return result; - } - return mrb_nil_value(); -} -#endif //ENABLE_REGEXP - /* 15.2.10.5.32 */ /* * call-seq: @@ -1991,41 +1789,12 @@ scan_once(mrb_state *mrb, mrb_value str, mrb_value pat, mrb_int *start) * <<cruel>> <<world>> * rceu lowlr */ -#ifdef ENABLE_REGEXP static mrb_value mrb_str_scan(mrb_state *mrb, mrb_value str) { - mrb_value result; - mrb_value pat, b; - mrb_int start = 0; - mrb_value match = mrb_nil_value(); - struct RString *ps = mrb_str_ptr(str); - char *p = ps->ptr; - long len = ps->len; - - mrb_get_args(mrb, "o&", &pat, &b); - pat = get_pat(mrb, pat, 1); - if (!mrb_block_given_p()) { - mrb_value ary = mrb_ary_new(mrb); - - while (!mrb_nil_p(result = scan_once(mrb, str, pat, &start))) { - match = mrb_backref_get(mrb); - mrb_ary_push(mrb, ary, result); - } - mrb_backref_set(mrb, match); - return ary; - } - - while (!mrb_nil_p(result = scan_once(mrb, str, pat, &start))) { - match = mrb_backref_get(mrb); - mrb_yield(mrb, b, result); - str_mod_check(mrb, str, p, len); - mrb_backref_set(mrb, match); /* restore $~ value */ - } - mrb_backref_set(mrb, match); - return str; + mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp Class not implemented"); + return mrb_nil_value(); } -#endif //ENABLE_REGEXP static const char isspacetable[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, @@ -2117,28 +1886,12 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) else { if (mrb_string_p(spat)) { split_type = string; -#ifdef ENABLE_REGEXP - if (RSTRING_LEN(spat) == 0) { - /* Special case - split into chars */ - spat = mrb_reg_regcomp(mrb, spat); - split_type = regexp; - } - else { -#endif //ENABLE_REGEXP - if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' '){ - split_type = awk; - } -#ifdef ENABLE_REGEXP + if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' '){ + split_type = awk; } -#endif //ENABLE_REGEXP } else { -#ifdef ENABLE_REGEXP - spat = get_pat(mrb, spat, 1); - split_type = regexp; -#else - mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported"); -#endif //ENABLE_REGEXP + mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp Class not implemented"); } } @@ -2207,59 +1960,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) beg = ptr - temp; } else { -#ifdef ENABLE_REGEXP - char *ptr = RSTRING_PTR(str); - long len = RSTRING_LEN(str); - long start = beg; - long idx; - int last_null = 0; - struct re_registers *regs; - - while ((end = mrb_reg_search(mrb, spat, str, start, 0)) >= 0) { - int ai; - regs = RMATCH_REGS(mrb_backref_get(mrb)); - ai = mrb_gc_arena_save(mrb); - if (start == end && BEG(0) == END(0)) { - if (!ptr) { - mrb_ary_push(mrb, result, mrb_str_new_empty(mrb, str)); - mrb_gc_arena_restore(mrb, ai); - break; - } - else if (last_null == 1) { - mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, beg, len)); - mrb_gc_arena_restore(mrb, ai); - beg = start; - } - else { - if (ptr+start == ptr+len) - start++; - else - start += len; - last_null = 1; - continue; - } - } - else { - mrb_ary_push(mrb, result, mrb_str_subseq(mrb, str, beg, end-beg)); - mrb_gc_arena_restore(mrb, ai); - beg = start = END(0); - } - last_null = 0; - - for (idx=1; idx < regs->num_regs; idx++) { - if (BEG(idx) == -1) continue; - if (BEG(idx) == END(idx)) - tmp = mrb_str_new_empty(mrb, str); - else - tmp = mrb_str_subseq(mrb, str, BEG(idx), END(idx)-BEG(idx)); - mrb_ary_push(mrb, result, tmp); - mrb_gc_arena_restore(mrb, ai); - } - if (lim >= 0 && lim <= ++i) break; - } -#else - mrb_raise(mrb, E_TYPE_ERROR, "Regexp Class not supported"); -#endif //ENABLE_REGEXP + mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp Class not implemented"); } if (RSTRING_LEN(str) > 0 && (lim >= 0 || RSTRING_LEN(str) > beg || lim < 0)) { if (RSTRING_LEN(str) == beg) @@ -2297,14 +1998,12 @@ mrb_block_given_p() * returning <i>str</i>, or <code>nil</code> if no substitutions were * performed. */ -#ifdef ENABLE_REGEXP static mrb_value mrb_str_sub_bang(mrb_state *mrb, mrb_value str) { - str_modify(mrb, str); + mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp Class not implemented"); return mrb_nil_value(); } -#endif //ENABLE_REGEXP /* 15.2.10.5.36 */ @@ -2345,16 +2044,12 @@ mrb_str_sub_bang(mrb_state *mrb, mrb_value str) * #=> "Is /bin/bash your preferred shell?" */ -#ifdef ENABLE_REGEXP static mrb_value mrb_str_sub(mrb_state *mrb, mrb_value self) { - mrb_value str = mrb_str_dup(mrb, self); - - mrb_str_sub_bang(mrb, str); - return str; + mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp Class not implemented"); + return mrb_nil_value(); } -#endif //ENABLE_REGEXP mrb_value mrb_cstr_to_inum(mrb_state *mrb, const char *str, int base, int badcheck) @@ -2999,32 +2694,36 @@ mrb_init_string(mrb_state *mrb) mrb_define_method(mrb, s, "downcase!", mrb_str_downcase_bang, ARGS_NONE()); /* 15.2.10.5.14 */ mrb_define_method(mrb, s, "empty?", mrb_str_empty_p, ARGS_NONE()); /* 15.2.10.5.16 */ mrb_define_method(mrb, s, "eql?", mrb_str_eql, ARGS_REQ(1)); /* 15.2.10.5.17 */ -#ifdef ENABLE_REGEXP + + // NOTE: Regexp not implemented mrb_define_method(mrb, s, "gsub", mrb_str_gsub, ARGS_REQ(1)); /* 15.2.10.5.18 */ mrb_define_method(mrb, s, "gsub!", mrb_str_gsub_bang, ARGS_REQ(1)); /* 15.2.10.5.19 */ -#endif + mrb_define_method(mrb, s, "hash", mrb_str_hash_m, ARGS_REQ(1)); /* 15.2.10.5.20 */ mrb_define_method(mrb, s, "include?", mrb_str_include, ARGS_REQ(1)); /* 15.2.10.5.21 */ mrb_define_method(mrb, s, "index", mrb_str_index_m, ARGS_ANY()); /* 15.2.10.5.22 */ mrb_define_method(mrb, s, "initialize", mrb_str_init, ARGS_REQ(1)); /* 15.2.10.5.23 */ mrb_define_method(mrb, s, "initialize_copy", mrb_str_replace, ARGS_REQ(1)); /* 15.2.10.5.24 */ mrb_define_method(mrb, s, "intern", mrb_str_intern, ARGS_NONE()); /* 15.2.10.5.25 */ -#ifdef ENABLE_REGEXP + + // NOTE: Regexp not implemented mrb_define_method(mrb, s, "match", mrb_str_match_m, ARGS_REQ(1)); /* 15.2.10.5.27 */ -#endif + mrb_define_method(mrb, s, "replace", mrb_str_replace, ARGS_REQ(1)); /* 15.2.10.5.28 */ mrb_define_method(mrb, s, "reverse", mrb_str_reverse, ARGS_NONE()); /* 15.2.10.5.29 */ mrb_define_method(mrb, s, "reverse!", mrb_str_reverse_bang, ARGS_NONE()); /* 15.2.10.5.30 */ mrb_define_method(mrb, s, "rindex", mrb_str_rindex_m, ARGS_ANY()); /* 15.2.10.5.31 */ -#ifdef ENABLE_REGEXP + + // NOTE: Regexp not implemented mrb_define_method(mrb, s, "scan", mrb_str_scan, ARGS_REQ(1)); /* 15.2.10.5.32 */ -#endif + mrb_define_method(mrb, s, "slice", mrb_str_aref_m, ARGS_ANY()); /* 15.2.10.5.34 */ mrb_define_method(mrb, s, "split", mrb_str_split_m, ARGS_ANY()); /* 15.2.10.5.35 */ -#ifdef ENABLE_REGEXP + + // NOTE: Regexp not implemented mrb_define_method(mrb, s, "sub", mrb_str_sub, ARGS_REQ(1)); /* 15.2.10.5.36 */ mrb_define_method(mrb, s, "sub!", mrb_str_sub_bang, ARGS_REQ(1)); /* 15.2.10.5.37 */ -#endif + mrb_define_method(mrb, s, "to_i", mrb_str_to_i, ARGS_ANY()); /* 15.2.10.5.38 */ mrb_define_method(mrb, s, "to_f", mrb_str_to_f, ARGS_NONE()); /* 15.2.10.5.39 */ mrb_define_method(mrb, s, "to_s", mrb_str_to_s, ARGS_NONE()); /* 15.2.10.5.40 */ diff --git a/src/struct.c b/src/struct.c index 1396cd728..1efdca66f 100644 --- a/src/struct.c +++ b/src/struct.c @@ -12,10 +12,6 @@ #include "mruby/array.h" #include <stdarg.h> -#ifdef ENABLE_REGEXP -#include "encoding.h" -#endif - #include "mruby/string.h" #include "mruby/class.h" #include "mruby/variable.h" diff --git a/src/variable.c b/src/variable.c index 6aa6e71bd..6661dae40 100644 --- a/src/variable.c +++ b/src/variable.c @@ -12,10 +12,6 @@ #include "mruby/string.h" #include "mruby/proc.h" -#ifdef ENABLE_REGEXP -#include "re.h" -#endif - typedef int (iv_foreach_func)(mrb_state*,mrb_sym,mrb_value,void*); #ifdef MRB_USE_IV_SEGLIST |
