From 3bcaeb09b4079c0d428f4fa76838a67bb5742d7a Mon Sep 17 00:00:00 2001 From: FUKUZAWA-Tadashi Date: Wed, 13 Mar 2013 00:06:19 +0900 Subject: refactor heredoc identifier --- include/mruby/compile.h | 3 +- src/parse.y | 176 +++++++++++++++++++++--------------------------- test/t/literals.rb | 4 +- 3 files changed, 81 insertions(+), 102 deletions(-) diff --git a/include/mruby/compile.h b/include/mruby/compile.h index 45eb72403..4b12cb10c 100644 --- a/include/mruby/compile.h +++ b/include/mruby/compile.h @@ -74,8 +74,7 @@ enum mrb_string_type { str_sword = (STR_FUNC_PARSING|STR_FUNC_WORD|STR_FUNC_ARRAY), str_dword = (STR_FUNC_PARSING|STR_FUNC_WORD|STR_FUNC_ARRAY|STR_FUNC_EXPAND), str_ssym = (STR_FUNC_PARSING|STR_FUNC_SYMBOL), - str_sheredoc = (STR_FUNC_PARSING|STR_FUNC_HEREDOC), - str_dheredoc = (STR_FUNC_PARSING|STR_FUNC_HEREDOC|STR_FUNC_EXPAND), + str_heredoc = (STR_FUNC_PARSING|STR_FUNC_HEREDOC), }; /* heredoc structure */ diff --git a/src/parse.y b/src/parse.y index 18f338937..21823451b 100644 --- a/src/parse.y +++ b/src/parse.y @@ -874,49 +874,6 @@ end_strterm(parser_state *p) p->lex_strterm = NULL; } -static node* -heredoc_start_sb(parser_state *p, const char* term, size_t term_len, string_type type, int allow_indent) -{ - node *newnode = new_heredoc(p); - parser_heredoc_info *inf = (parser_heredoc_info*)newnode->cdr; - inf->term = term; - inf->term_len = term_len; - inf->type = type; - inf->allow_indent = allow_indent; - inf->line_head = TRUE; - inf->doc = NULL; - p->heredocs = push(p->heredocs, newnode); - if (p->parsing_heredoc == NULL) { - node *c = p->heredocs; - while (c->cdr) - c = c->cdr; - p->parsing_heredoc = c; - } - p->heredoc_starts_nextline = TRUE; - p->lstate = EXPR_END; - return newnode; -} - -static node* -heredoc_start(parser_state *p, node *beg, node *str, string_type type) -{ - char *bs = (char*)beg->cdr->car; - int allow_indent = (bs[2] == '-'); - const char *s = (char*)str->cdr->car; - size_t len = (intptr_t)str->cdr->cdr; - return heredoc_start_sb(p, s, len, type, allow_indent); -} - -static node* -heredoc_start_sym(parser_state *p, node *beg, mrb_sym sym, string_type type) -{ - char *bs = (char*)beg->cdr->car; - int allow_indent = (bs[2] == '-'); - size_t len; - const char *s = mrb_sym2name_len(p->mrb, sym, &len); - return heredoc_start_sb(p, s, len, type, allow_indent); -} - parser_heredoc_info * parsing_heredoc_inf(parser_state *p) { @@ -947,7 +904,6 @@ heredoc_end(parser_state *p) %} -%expect 2 %pure_parser %parse-param {parser_state *p} %lex-param {parser_state *p} @@ -1064,7 +1020,7 @@ heredoc_end(parser_state *p) %token tSTAR /* * */ %token tAMPER /* & */ %token tLAMBDA /* -> */ -%token tSYMBEG tREGEXP_BEG tWORDS_BEG tQWORDS_BEG +%token tSYMBEG tREGEXP_BEG tWORDS_BEG %token tSTRING_BEG tSTRING_DVAR tLAMBEG %token tHEREDOC_BEG /* <<, <<- */ %token tHEREDOC_END tLITERAL_DELIM @@ -2615,18 +2571,6 @@ string_interp : tSTRING_MID p->lex_strterm = $2; $$ = list2($1, $3); } - | string_interp - tSTRING_PART - { - $$ = p->lex_strterm; - p->lex_strterm = NULL; - } - compstmt - '}' - { - p->lex_strterm = $3; - $$ = push(push($1, $2), $4); - } | tLITERAL_DELIM { $$ = list1(new_literal_delim(p)); @@ -2643,25 +2587,9 @@ regexp : tREGEXP_BEG tREGEXP } ; -heredoc : tHEREDOC_BEG tSTRING_BEG tSTRING - { - $$ = heredoc_start(p, $1, $3, str_dheredoc); - } - | tHEREDOC_BEG tSTRING - { - $$ = heredoc_start(p, $1, $2, str_sheredoc); - } - | tHEREDOC_BEG tIDENTIFIER - { - $$ = heredoc_start_sym(p, $1, $2, str_dheredoc); - } - | tHEREDOC_BEG tCONSTANT - { - $$ = heredoc_start_sym(p, $1, $2, str_dheredoc); - } +heredoc : tHEREDOC_BEG ; - opt_heredoc_bodies : none | heredoc_bodies ; @@ -2690,14 +2618,6 @@ words : tWORDS_BEG tSTRING { $$ = new_words(p, push($2, $3)); } - | tQWORDS_BEG tSTRING - { - $$ = new_words(p, list1($2)); - } - | tQWORDS_BEG string_rep tSTRING - { - $$ = new_words(p, push($2, $3)); - } ; @@ -3734,7 +3654,77 @@ parse_string(parser_state *p) yylval.nd = new_str(p, tok(p), toklen(p)); return tSTRING; } - + + +static int +heredoc_identifier(parser_state *p) +{ + int c; + int type = str_heredoc; + int indent = FALSE; + int quote = FALSE; + node *newnode; + parser_heredoc_info *info; + + c = nextc(p); + if (ISSPACE(c) || c == '=') { + pushback(p, c); + return 0; + } + if (c == '-') { + indent = TRUE; + c = nextc(p); + } + if (c == '\'' || c == '"') { + int term = c; + if (c == '\'') + quote = TRUE; + newtok(p); + while ((c = nextc(p)) != -1 && c != term) { + if (c == '\n') + c = -1; + tokadd(p, c); + } + if (c == -1) { + yyerror(p, "unterminated here document identifier"); + return 0; + } + } else { + if (! identchar(c)) { + pushback(p, c); + if (indent) pushback(p, '-'); + return 0; + } + newtok(p); + do { + tokadd(p, c); + } while ((c = nextc(p)) != -1 && identchar(c)); + pushback(p, c); + } + tokfix(p); + newnode = new_heredoc(p); + info = (parser_heredoc_info*)newnode->cdr; + info->term = strndup(tok(p), toklen(p)); + info->term_len = toklen(p); + if (! quote) + type |= STR_FUNC_EXPAND; + info->type = type; + info->allow_indent = indent; + info->line_head = TRUE; + info->doc = NULL; + p->heredocs = push(p->heredocs, newnode); + if (p->parsing_heredoc == NULL) { + node *n = p->heredocs; + while (n->cdr) + n = n->cdr; + p->parsing_heredoc = n; + } + p->heredoc_starts_nextline = TRUE; + p->lstate = EXPR_END; + + yylval.nd = newnode; + return tHEREDOC_BEG; +} static int arg_ambiguous(parser_state *p) @@ -3749,7 +3739,6 @@ static int parser_yylex(parser_state *p) { register int c; - int c2; int space_seen = 0; int cmd_state; enum mrb_lex_state_enum last_state; @@ -3916,20 +3905,9 @@ parser_yylex(parser_state *p) p->lstate != EXPR_CLASS && !IS_END() && (!IS_ARG() || space_seen)) { - /* heredocument check */ - newtok(p); tokadd(p, '<'); tokadd(p, '<'); - c2 = nextc(p); - if (c2 == '-') { - tokadd(p, c2); - c2 = nextc(p); - } - pushback(p, c2); - if (!ISSPACE(c2)) { - tokfix(p); - yylval.nd = new_str(p, tok(p), toklen(p)); - p->lstate = EXPR_DOT; - return tHEREDOC_BEG; - } + int token = heredoc_identifier(p); + if (token) + return token; } if (p->lstate == EXPR_FNAME || p->lstate == EXPR_DOT) { p->lstate = EXPR_ARG; @@ -4625,7 +4603,7 @@ parser_yylex(parser_state *p) case 'w': p->lex_strterm = new_strterm(p, str_sword, term, paren); - return tQWORDS_BEG; + return tWORDS_BEG; case 'r': p->lex_strterm = new_strterm(p, str_regexp, term, paren); diff --git a/test/t/literals.rb b/test/t/literals.rb index eb3190a07..5dc15f135 100644 --- a/test/t/literals.rb +++ b/test/t/literals.rb @@ -195,9 +195,11 @@ assert('Literals Symbol', '8.7.6.6') do f = %s[asd \[ qwe] g = %s/foo#{1+2}bar/ + h = %s{{foo bar}} a == :'asd qwe' and b == :"foo bar" and c == :a3b and d == :asd and - e == :' foo )' and f == :"asd [\nqwe" and g == :'foo#{1+2}bar' + e == :' foo )' and f == :"asd [\nqwe" and g == :'foo#{1+2}bar' and + h == :'{foo bar}' end # Not Implemented ATM assert('Literals Regular expression', '8.7.6.5') do -- cgit v1.2.3