From 44fbfc51849e6bad4f7238ad2fd9724af331cea2 Mon Sep 17 00:00:00 2001 From: FUKUZAWA-Tadashi Date: Tue, 12 Mar 2013 21:54:17 +0900 Subject: implement literal %W %w %s refactor string parsing --- test/t/literals.rb | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 71 insertions(+), 2 deletions(-) (limited to 'test/t') diff --git a/test/t/literals.rb b/test/t/literals.rb index 9a6d341d4..eb3190a07 100644 --- a/test/t/literals.rb +++ b/test/t/literals.rb @@ -128,8 +128,77 @@ ZZZ z == "" end -# Not Implemented ATM assert('Literals Array', '8.7.6.4') do +assert('Literals Array', '8.7.6.4') do + a = %W{abc#{1+2}def \}g} + b = %W(abc #{2+3} def \(g) + c = %W[#{3+4}] + d = %W< #{4+5} > + e = %W// + f = %W[[ab cd][ef]] + g = %W{ + ab + #{-1}1 + 2#{2} + } + + test1 = (a == ['abc3def', '}g'] and + b == ['abc', '5', 'def', '(g'] and + c == ['7'] and + d == ['9'] and + e == [] and + f == ['[ab', 'cd][ef]'] and + g == ['ab', '-11', '22'] + ) + + a = %w{abc#{1+2}def \}g} + b = %w(abc #{2+3} def \(g) + c = %w[#{3+4}] + d = %w< #{4+5} > + e = %w// + f = %w[[ab cd][ef]] + g = %w{ + ab + #{-1}1 + 2#{2} + } + + test2 = (a == ['abc#{1+2}def', '}g'] and + b == ['abc', '#{2+3}', 'def', '(g'] and + c == ['#{3+4}'] and + d == ['#{4+5}'] and + e == [] and + f == ['[ab', 'cd][ef]'] and + g == ['ab', '#{-1}1', '2#{2}'] + ) + + test1 and test2 +end + +assert('Literals Symbol', '8.7.6.6') do + /* do not compile error */ + :$asd + :@asd + :@@asd + :asd= + :asd! + :asd? + :+ + :+@ + :if + :BEGIN + + a = :"asd qwe" + b = :'foo bar' + c = :"a#{1+2}b" + d = %s(asd) + e = %s( foo \)) + f = %s[asd \[ +qwe] + g = %s/foo#{1+2}bar/ + + a == :'asd qwe' and b == :"foo bar" and c == :a3b and d == :asd and + e == :' foo )' and f == :"asd [\nqwe" and g == :'foo#{1+2}bar' +end # Not Implemented ATM assert('Literals Regular expression', '8.7.6.5') do -# Not Implemented ATM assert('Literals Symbol', '8.7.6.6') do -- cgit v1.2.3 From aec3e1c4ff6603350811e672d6c706294f59e44e Mon Sep 17 00:00:00 2001 From: FUKUZAWA-Tadashi Date: Wed, 13 Mar 2013 00:06:19 +0900 Subject: refactor heredoc identifier --- include/mruby/compile.h | 3 +- src/parse.y | 176 +++++++++++++++++++++--------------------------- test/t/literals.rb | 4 +- 3 files changed, 81 insertions(+), 102 deletions(-) (limited to 'test/t') diff --git a/include/mruby/compile.h b/include/mruby/compile.h index 45eb72403..4b12cb10c 100644 --- a/include/mruby/compile.h +++ b/include/mruby/compile.h @@ -74,8 +74,7 @@ enum mrb_string_type { str_sword = (STR_FUNC_PARSING|STR_FUNC_WORD|STR_FUNC_ARRAY), str_dword = (STR_FUNC_PARSING|STR_FUNC_WORD|STR_FUNC_ARRAY|STR_FUNC_EXPAND), str_ssym = (STR_FUNC_PARSING|STR_FUNC_SYMBOL), - str_sheredoc = (STR_FUNC_PARSING|STR_FUNC_HEREDOC), - str_dheredoc = (STR_FUNC_PARSING|STR_FUNC_HEREDOC|STR_FUNC_EXPAND), + str_heredoc = (STR_FUNC_PARSING|STR_FUNC_HEREDOC), }; /* heredoc structure */ diff --git a/src/parse.y b/src/parse.y index 18f338937..21823451b 100644 --- a/src/parse.y +++ b/src/parse.y @@ -874,49 +874,6 @@ end_strterm(parser_state *p) p->lex_strterm = NULL; } -static node* -heredoc_start_sb(parser_state *p, const char* term, size_t term_len, string_type type, int allow_indent) -{ - node *newnode = new_heredoc(p); - parser_heredoc_info *inf = (parser_heredoc_info*)newnode->cdr; - inf->term = term; - inf->term_len = term_len; - inf->type = type; - inf->allow_indent = allow_indent; - inf->line_head = TRUE; - inf->doc = NULL; - p->heredocs = push(p->heredocs, newnode); - if (p->parsing_heredoc == NULL) { - node *c = p->heredocs; - while (c->cdr) - c = c->cdr; - p->parsing_heredoc = c; - } - p->heredoc_starts_nextline = TRUE; - p->lstate = EXPR_END; - return newnode; -} - -static node* -heredoc_start(parser_state *p, node *beg, node *str, string_type type) -{ - char *bs = (char*)beg->cdr->car; - int allow_indent = (bs[2] == '-'); - const char *s = (char*)str->cdr->car; - size_t len = (intptr_t)str->cdr->cdr; - return heredoc_start_sb(p, s, len, type, allow_indent); -} - -static node* -heredoc_start_sym(parser_state *p, node *beg, mrb_sym sym, string_type type) -{ - char *bs = (char*)beg->cdr->car; - int allow_indent = (bs[2] == '-'); - size_t len; - const char *s = mrb_sym2name_len(p->mrb, sym, &len); - return heredoc_start_sb(p, s, len, type, allow_indent); -} - parser_heredoc_info * parsing_heredoc_inf(parser_state *p) { @@ -947,7 +904,6 @@ heredoc_end(parser_state *p) %} -%expect 2 %pure_parser %parse-param {parser_state *p} %lex-param {parser_state *p} @@ -1064,7 +1020,7 @@ heredoc_end(parser_state *p) %token tSTAR /* * */ %token tAMPER /* & */ %token tLAMBDA /* -> */ -%token tSYMBEG tREGEXP_BEG tWORDS_BEG tQWORDS_BEG +%token tSYMBEG tREGEXP_BEG tWORDS_BEG %token tSTRING_BEG tSTRING_DVAR tLAMBEG %token tHEREDOC_BEG /* <<, <<- */ %token tHEREDOC_END tLITERAL_DELIM @@ -2615,18 +2571,6 @@ string_interp : tSTRING_MID p->lex_strterm = $2; $$ = list2($1, $3); } - | string_interp - tSTRING_PART - { - $$ = p->lex_strterm; - p->lex_strterm = NULL; - } - compstmt - '}' - { - p->lex_strterm = $3; - $$ = push(push($1, $2), $4); - } | tLITERAL_DELIM { $$ = list1(new_literal_delim(p)); @@ -2643,25 +2587,9 @@ regexp : tREGEXP_BEG tREGEXP } ; -heredoc : tHEREDOC_BEG tSTRING_BEG tSTRING - { - $$ = heredoc_start(p, $1, $3, str_dheredoc); - } - | tHEREDOC_BEG tSTRING - { - $$ = heredoc_start(p, $1, $2, str_sheredoc); - } - | tHEREDOC_BEG tIDENTIFIER - { - $$ = heredoc_start_sym(p, $1, $2, str_dheredoc); - } - | tHEREDOC_BEG tCONSTANT - { - $$ = heredoc_start_sym(p, $1, $2, str_dheredoc); - } +heredoc : tHEREDOC_BEG ; - opt_heredoc_bodies : none | heredoc_bodies ; @@ -2690,14 +2618,6 @@ words : tWORDS_BEG tSTRING { $$ = new_words(p, push($2, $3)); } - | tQWORDS_BEG tSTRING - { - $$ = new_words(p, list1($2)); - } - | tQWORDS_BEG string_rep tSTRING - { - $$ = new_words(p, push($2, $3)); - } ; @@ -3734,7 +3654,77 @@ parse_string(parser_state *p) yylval.nd = new_str(p, tok(p), toklen(p)); return tSTRING; } - + + +static int +heredoc_identifier(parser_state *p) +{ + int c; + int type = str_heredoc; + int indent = FALSE; + int quote = FALSE; + node *newnode; + parser_heredoc_info *info; + + c = nextc(p); + if (ISSPACE(c) || c == '=') { + pushback(p, c); + return 0; + } + if (c == '-') { + indent = TRUE; + c = nextc(p); + } + if (c == '\'' || c == '"') { + int term = c; + if (c == '\'') + quote = TRUE; + newtok(p); + while ((c = nextc(p)) != -1 && c != term) { + if (c == '\n') + c = -1; + tokadd(p, c); + } + if (c == -1) { + yyerror(p, "unterminated here document identifier"); + return 0; + } + } else { + if (! identchar(c)) { + pushback(p, c); + if (indent) pushback(p, '-'); + return 0; + } + newtok(p); + do { + tokadd(p, c); + } while ((c = nextc(p)) != -1 && identchar(c)); + pushback(p, c); + } + tokfix(p); + newnode = new_heredoc(p); + info = (parser_heredoc_info*)newnode->cdr; + info->term = strndup(tok(p), toklen(p)); + info->term_len = toklen(p); + if (! quote) + type |= STR_FUNC_EXPAND; + info->type = type; + info->allow_indent = indent; + info->line_head = TRUE; + info->doc = NULL; + p->heredocs = push(p->heredocs, newnode); + if (p->parsing_heredoc == NULL) { + node *n = p->heredocs; + while (n->cdr) + n = n->cdr; + p->parsing_heredoc = n; + } + p->heredoc_starts_nextline = TRUE; + p->lstate = EXPR_END; + + yylval.nd = newnode; + return tHEREDOC_BEG; +} static int arg_ambiguous(parser_state *p) @@ -3749,7 +3739,6 @@ static int parser_yylex(parser_state *p) { register int c; - int c2; int space_seen = 0; int cmd_state; enum mrb_lex_state_enum last_state; @@ -3916,20 +3905,9 @@ parser_yylex(parser_state *p) p->lstate != EXPR_CLASS && !IS_END() && (!IS_ARG() || space_seen)) { - /* heredocument check */ - newtok(p); tokadd(p, '<'); tokadd(p, '<'); - c2 = nextc(p); - if (c2 == '-') { - tokadd(p, c2); - c2 = nextc(p); - } - pushback(p, c2); - if (!ISSPACE(c2)) { - tokfix(p); - yylval.nd = new_str(p, tok(p), toklen(p)); - p->lstate = EXPR_DOT; - return tHEREDOC_BEG; - } + int token = heredoc_identifier(p); + if (token) + return token; } if (p->lstate == EXPR_FNAME || p->lstate == EXPR_DOT) { p->lstate = EXPR_ARG; @@ -4625,7 +4603,7 @@ parser_yylex(parser_state *p) case 'w': p->lex_strterm = new_strterm(p, str_sword, term, paren); - return tQWORDS_BEG; + return tWORDS_BEG; case 'r': p->lex_strterm = new_strterm(p, str_regexp, term, paren); diff --git a/test/t/literals.rb b/test/t/literals.rb index eb3190a07..5dc15f135 100644 --- a/test/t/literals.rb +++ b/test/t/literals.rb @@ -195,9 +195,11 @@ assert('Literals Symbol', '8.7.6.6') do f = %s[asd \[ qwe] g = %s/foo#{1+2}bar/ + h = %s{{foo bar}} a == :'asd qwe' and b == :"foo bar" and c == :a3b and d == :asd and - e == :' foo )' and f == :"asd [\nqwe" and g == :'foo#{1+2}bar' + e == :' foo )' and f == :"asd [\nqwe" and g == :'foo#{1+2}bar' and + h == :'{foo bar}' end # Not Implemented ATM assert('Literals Regular expression', '8.7.6.5') do -- cgit v1.2.3 From 76c24894a7f859cc76b437a07030f2e2f277eab1 Mon Sep 17 00:00:00 2001 From: FUKUZAWA-Tadashi Date: Sun, 17 Mar 2013 21:22:21 +0900 Subject: bugfix about escaping '\n' --- src/parse.y | 26 +++++++++++++++++++------- test/t/literals.rb | 18 +++++++++++++++--- 2 files changed, 34 insertions(+), 10 deletions(-) (limited to 'test/t') diff --git a/src/parse.y b/src/parse.y index 21823451b..bfbdd9eb1 100644 --- a/src/parse.y +++ b/src/parse.y @@ -3535,7 +3535,7 @@ parse_string(parser_state *p) char buf[256]; snprintf(buf, sizeof(buf), "can't find string \"%s\" anywhere before EOF", hinf->term); yyerror(p, buf); - return 0; + return 0; } yylval.nd = new_str(p, tok(p), toklen(p)); return tSTRING_MID; @@ -3558,6 +3558,11 @@ parse_string(parser_state *p) if (c == end || c == beg) { tokadd(p, c); } + else if ((c == '\n') && (type & STR_FUNC_ARRAY)) { + p->lineno++; + p->column = 0; + tokadd(p, '\n'); + } else { pushback(p, c); tokadd(p, read_escape(p)); @@ -3570,14 +3575,14 @@ parse_string(parser_state *p) case '\n': p->lineno++; p->column = 0; - continue; + break; case '\\': - c = '\\'; break; default: - tokadd(p, '\\'); + if (! ISSPACE(c)) + tokadd(p, '\\'); } } tokadd(p, c); @@ -3601,7 +3606,12 @@ parse_string(parser_state *p) } if ((type & STR_FUNC_ARRAY) && ISSPACE(c)) { if (toklen(p) == 0) { - do {} while (ISSPACE(c = nextc(p))); + do { + if (c == '\n') { + p->lineno++; + p->column = 0; + } + } while (ISSPACE(c = nextc(p))); pushback(p, c); return tLITERAL_DELIM; } else { @@ -3681,8 +3691,10 @@ heredoc_identifier(parser_state *p) quote = TRUE; newtok(p); while ((c = nextc(p)) != -1 && c != term) { - if (c == '\n') - c = -1; + if (c == '\n') { + c = -1; + break; + } tokadd(p, c); } if (c == -1) { diff --git a/test/t/literals.rb b/test/t/literals.rb index 5dc15f135..5a29cff0c 100644 --- a/test/t/literals.rb +++ b/test/t/literals.rb @@ -140,6 +140,11 @@ assert('Literals Array', '8.7.6.4') do #{-1}1 2#{2} } + h = %W(a\nb + test\ abc + c\ +d + x\y x\\y x\\\y) test1 = (a == ['abc3def', '}g'] and b == ['abc', '5', 'def', '(g'] and @@ -147,7 +152,8 @@ assert('Literals Array', '8.7.6.4') do d == ['9'] and e == [] and f == ['[ab', 'cd][ef]'] and - g == ['ab', '-11', '22'] + g == ['ab', '-11', '22'] and + h == ["a\nb", 'test abc', "c\nd", "xy", "x\\y", "x\\y"] ) a = %w{abc#{1+2}def \}g} @@ -161,6 +167,11 @@ assert('Literals Array', '8.7.6.4') do #{-1}1 2#{2} } + h = %w(a\nb + test\ abc + c\ +d + x\y x\\y x\\\y) test2 = (a == ['abc#{1+2}def', '}g'] and b == ['abc', '#{2+3}', 'def', '(g'] and @@ -168,8 +179,9 @@ assert('Literals Array', '8.7.6.4') do d == ['#{4+5}'] and e == [] and f == ['[ab', 'cd][ef]'] and - g == ['ab', '#{-1}1', '2#{2}'] - ) + g == ['ab', '#{-1}1', '2#{2}'] and + h == ["a\\nb", "test abc", "c\nd", "x\\y", "x\\y", "x\\\\y"] + ) test1 and test2 end -- cgit v1.2.3