From b1a5146ea8f8665df5edf2b26dcadc028d7929f7 Mon Sep 17 00:00:00 2001 From: mattn Date: Fri, 15 Feb 2013 04:38:27 +0900 Subject: Pluggable Regexp --- src/parse.y | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 74 insertions(+), 4 deletions(-) (limited to 'src/parse.y') diff --git a/src/parse.y b/src/parse.y index 91fb1a8e8..518985ea6 100644 --- a/src/parse.y +++ b/src/parse.y @@ -708,6 +708,13 @@ new_dsym(parser_state *p, node *a) return cons((node*)NODE_DSYM, new_dstr(p, a)); } +// (:str . (s . len)) +static node* +new_regx(parser_state *p, const char *s, int len) +{ + return cons((node*)NODE_REGX, cons((node*)strndup(s, len), (node*)(intptr_t)len)); +} + // (:backref . n) static node* new_back_ref(parser_state *p, int n) @@ -743,13 +750,14 @@ call_bin_op(parser_state *p, node *recv, char *m, node *arg1) return new_call(p, recv, intern(m), list1(list1(arg1))); } +/* // (:match (a . b)) static node* match_op(parser_state *p, node *a, node *b) { return cons((node*)NODE_MATCH, cons((node*)a, (node*)b)); } - +*/ static void args_with_block(parser_state *p, node *a, node *b) @@ -1679,7 +1687,7 @@ arg : lhs '=' arg } | arg tMATCH arg { - $$ = match_op(p, $1, $3); + $$ = call_bin_op(p, $1, "=~", $3); #if 0 if (nd_type($1) == NODE_LIT && TYPE($1->nd_lit) == T_REGEXP) { $$ = reg_named_capture_assign($1->nd_lit, $$); @@ -2498,7 +2506,10 @@ string_interp : tSTRING_PART } ; -regexp : tREGEXP +regexp : tREGEXP_BEG tREGEXP + { + $$ = $2; + } ; symbol : basic_symbol @@ -3335,9 +3346,17 @@ read_escape(parser_state *p) return c; case 'b': /* backspace */ + if (p->regexp) { + tokadd(p, '\\'); + return 'b'; + } return '\010'; case 's': /* space */ + if (p->regexp) { + tokadd(p, '\\'); + return 's'; + } return ' '; case 'M': @@ -3375,17 +3394,39 @@ read_escape(parser_state *p) return '\0'; default: + if (p->regexp) { + tokadd(p, '\\'); + } return c; } } +static void +regx_options(parser_state *p) +{ + int c; + + newtok(p); + while (c = nextc(p), ISALPHA(c)) { + tokadd(p, c); + } + + pushback(p, c); + if (toklen(p)) { + char msg[128]; + tokfix(p); + snprintf(msg, sizeof(msg), "unknown regexp option %s - %s", + toklen(p) > 1 ? "s" : "", tok(p)); + yyerror(p, msg); + } +} + static int parse_string(parser_state *p, int term) { int c; newtok(p); - while ((c = nextc(p)) != term) { if (c == -1) { yyerror(p, "unterminated string meets end of file"); @@ -3422,6 +3463,15 @@ parse_string(parser_state *p, int term) tokfix(p); p->lstate = EXPR_END; p->sterm = 0; + + if (p->regexp) { + //regx_options(p); + yylval.nd = new_regx(p, tok(p), toklen(p)); + p->regexp = 0; + + return tREGEXP; + } + yylval.nd = new_str(p, tok(p), toklen(p)); return tSTRING; } @@ -4186,6 +4236,8 @@ parser_yylex(parser_state *p) #if 0 p->lex_strterm = new_strterm(p, str_regexp, '/', 0); #endif + p->regexp = 1; + p->sterm = '/'; return tREGEXP_BEG; } if ((c = nextc(p)) == '=') { @@ -4199,6 +4251,8 @@ parser_yylex(parser_state *p) #if 0 p->lex_strterm = new_strterm(p, str_regexp, '/', 0); #endif + p->regexp = 1; + p->sterm = '/'; return tREGEXP_BEG; } if (p->lstate == EXPR_FNAME || p->lstate == EXPR_DOT) { @@ -4381,6 +4435,8 @@ parser_yylex(parser_state *p) #if 0 p->lex_strterm = new_strterm(p, str_regexp, term, paren); #endif + p->regexp = 1; + p->sterm = '/'; return tREGEXP_BEG; case 's': @@ -5389,6 +5445,16 @@ parser_dump(mrb_state *mrb, node *tree, int offset) printf("NODE_CONST %s\n", mrb_sym2name(mrb, sym(tree))); break; + case NODE_MATCH: + printf("NODE_MATCH:\n"); + dump_prefix(offset + 1); + printf("lhs:\n"); + parser_dump(mrb, tree->car, offset + 2); + dump_prefix(offset + 1); + printf("rhs:\n"); + parser_dump(mrb, tree->cdr, offset + 2); + break; + case NODE_BACK_REF: printf("NODE_BACK_REF: $%c\n", (int)(intptr_t)tree); break; @@ -5428,6 +5494,10 @@ parser_dump(mrb_state *mrb, node *tree, int offset) dump_recur(mrb, tree, offset+1); break; + case NODE_REGX: + printf("NODE_REGX /%s/\n", (char*)tree->car->cdr->car); + break; + case NODE_SYM: printf("NODE_SYM :%s\n", mrb_sym2name(mrb, sym(tree))); break; -- cgit v1.2.3 From e0f25b1fda0c9c67526885fafdabf35d4d4039b7 Mon Sep 17 00:00:00 2001 From: mattn Date: Fri, 15 Feb 2013 13:55:41 +0900 Subject: ready to pass second argument of Regexp.new --- src/codegen.c | 13 ++++++++----- src/parse.y | 55 ++++++++++++++++++++++++++++++------------------------- src/re.h | 7 +------ 3 files changed, 39 insertions(+), 36 deletions(-) (limited to 'src/parse.y') diff --git a/src/codegen.c b/src/codegen.c index d1242a29f..86cb5eb87 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -16,6 +16,7 @@ #include #include #include +#include "re.h" typedef mrb_ast_node node; typedef struct mrb_parser_state parser_state; @@ -1912,12 +1913,14 @@ codegen(codegen_scope *s, node *tree, int val) case NODE_REGX: if (val) { - char *p = (char*)tree->car; - size_t len = (intptr_t)tree->cdr; + char *p1 = (char*)tree->car; + //char *p2 = (char*)tree->cdr; int ai = mrb_gc_arena_save(s->mrb); - struct RClass* c = mrb_class_get(s->mrb, "Regexp"); - mrb_value args[1]; - args[0] = mrb_str_new(s->mrb, p, len); + struct RClass* c = mrb_class_get(s->mrb, REGEXP_CLASS); + mrb_value args[2]; + args[0] = mrb_str_new(s->mrb, p1, strlen(p1)); + // TODO: Some regexp implementation does not have second argument + //args[1] = mrb_str_new(s->mrb, p2, strlen(p2)); int off = new_lit(s, mrb_class_new_instance(s->mrb, 1, args, c)); diff --git a/src/parse.y b/src/parse.y index 518985ea6..9af8682c1 100644 --- a/src/parse.y +++ b/src/parse.y @@ -708,11 +708,11 @@ new_dsym(parser_state *p, node *a) return cons((node*)NODE_DSYM, new_dstr(p, a)); } -// (:str . (s . len)) +// (:str . (a . a)) static node* -new_regx(parser_state *p, const char *s, int len) +new_regx(parser_state *p, const char *p1, const char* p2) { - return cons((node*)NODE_REGX, cons((node*)strndup(s, len), (node*)(intptr_t)len)); + return cons((node*)NODE_REGX, cons((node*)p1, (node*)p2)); } // (:backref . n) @@ -3401,26 +3401,6 @@ read_escape(parser_state *p) } } -static void -regx_options(parser_state *p) -{ - int c; - - newtok(p); - while (c = nextc(p), ISALPHA(c)) { - tokadd(p, c); - } - - pushback(p, c); - if (toklen(p)) { - char msg[128]; - tokfix(p); - snprintf(msg, sizeof(msg), "unknown regexp option %s - %s", - toklen(p) > 1 ? "s" : "", tok(p)); - yyerror(p, msg); - } -} - static int parse_string(parser_state *p, int term) { @@ -3465,8 +3445,33 @@ parse_string(parser_state *p, int term) p->sterm = 0; if (p->regexp) { - //regx_options(p); - yylval.nd = new_regx(p, tok(p), toklen(p)); + int f = 0; + int c; + char* s; + s = strndup(tok(p), toklen(p)); + newtok(p); + while (c = nextc(p), ISALPHA(c)) { + switch (c) { + case 'i': f |= 1; break; + case 'x': f |= 2; break; + case 'm': f |= 4; break; + default: tokadd(p, c); break; + } + } + pushback(p, c); + if (toklen(p)) { + char msg[128]; + free(s); + tokfix(p); + snprintf(msg, sizeof(msg), "unknown regexp option %s - %s", + toklen(p) > 1 ? "s" : "", tok(p)); + yyerror(p, msg); + } + char flag[4] = {0}; + if (f & 1) strcat(flag, "i"); + if (f & 2) strcat(flag, "x"); + if (f & 4) strcat(flag, "m"); + yylval.nd = new_regx(p, s, strdup(flag)); p->regexp = 0; return tREGEXP; diff --git a/src/re.h b/src/re.h index eafe50dc8..64dbd60dc 100644 --- a/src/re.h +++ b/src/re.h @@ -7,12 +7,7 @@ #ifndef RE_H #define RE_H -//#include -#include - -#include "node.h" -#include "st.h" - +//#define REGEXP_CLASS "HsRegexp" #define REGEXP_CLASS "Regexp" #endif -- cgit v1.2.3