From 3bcaeb09b4079c0d428f4fa76838a67bb5742d7a Mon Sep 17 00:00:00 2001
From: FUKUZAWA-Tadashi <alien@apple.nifty.jp>
Date: Wed, 13 Mar 2013 00:06:19 +0900
Subject: refactor heredoc identifier

---
 include/mruby/compile.h |   3 +-
 src/parse.y             | 176 +++++++++++++++++++++---------------------------
 test/t/literals.rb      |   4 +-
 3 files changed, 81 insertions(+), 102 deletions(-)
diff --git a/include/mruby/compile.h b/include/mruby/compile.h
index 45eb72403..4b12cb10c 100644
--- a/include/mruby/compile.h
+++ b/include/mruby/compile.h
@@ -74,8 +74,7 @@ enum mrb_string_type {
   str_sword   = (STR_FUNC_PARSING|STR_FUNC_WORD|STR_FUNC_ARRAY),
   str_dword   = (STR_FUNC_PARSING|STR_FUNC_WORD|STR_FUNC_ARRAY|STR_FUNC_EXPAND),
   str_ssym    = (STR_FUNC_PARSING|STR_FUNC_SYMBOL),
-  str_sheredoc = (STR_FUNC_PARSING|STR_FUNC_HEREDOC),
-  str_dheredoc = (STR_FUNC_PARSING|STR_FUNC_HEREDOC|STR_FUNC_EXPAND),
+  str_heredoc = (STR_FUNC_PARSING|STR_FUNC_HEREDOC),
 };
 
 /* heredoc structure */
diff --git a/src/parse.y b/src/parse.y
index 18f338937..21823451b 100644
--- a/src/parse.y
+++ b/src/parse.y
@@ -874,49 +874,6 @@ end_strterm(parser_state *p)
   p->lex_strterm = NULL;
 }
 
-static node*
-heredoc_start_sb(parser_state *p, const char* term, size_t term_len, string_type type, int allow_indent)
-{
-  node *newnode = new_heredoc(p);
-  parser_heredoc_info *inf = (parser_heredoc_info*)newnode->cdr;
-  inf->term = term;
-  inf->term_len = term_len;
-  inf->type = type;
-  inf->allow_indent = allow_indent;
-  inf->line_head = TRUE;
-  inf->doc = NULL;
-  p->heredocs = push(p->heredocs, newnode);
-  if (p->parsing_heredoc == NULL) {
-    node *c = p->heredocs;
-    while (c->cdr)
-      c = c->cdr;
-    p->parsing_heredoc = c;
-  }
-  p->heredoc_starts_nextline = TRUE;
-  p->lstate = EXPR_END;
-  return newnode;
-}
-
-static node*
-heredoc_start(parser_state *p, node *beg, node *str, string_type type)
-{
-  char *bs = (char*)beg->cdr->car;
-  int allow_indent = (bs[2] == '-');
-  const char *s = (char*)str->cdr->car;
-  size_t len = (intptr_t)str->cdr->cdr;
-  return heredoc_start_sb(p, s, len, type, allow_indent);
-}
-
-static node*
-heredoc_start_sym(parser_state *p, node *beg, mrb_sym sym, string_type type)
-{
-  char *bs = (char*)beg->cdr->car;
-  int allow_indent = (bs[2] == '-');
-  size_t len;
-  const char *s = mrb_sym2name_len(p->mrb, sym, &len);
-  return heredoc_start_sb(p, s, len, type, allow_indent);
-}
-
 parser_heredoc_info *
 parsing_heredoc_inf(parser_state *p)
 {
@@ -947,7 +904,6 @@ heredoc_end(parser_state *p)
 
 %}
 
-%expect 2
 %pure_parser
 %parse-param {parser_state *p}
 %lex-param {parser_state *p}
@@ -1064,7 +1020,7 @@ heredoc_end(parser_state *p)
 %token tSTAR              /* * */
 %token tAMPER             /* & */
 %token tLAMBDA            /* -> */
-%token tSYMBEG tREGEXP_BEG tWORDS_BEG tQWORDS_BEG
+%token tSYMBEG tREGEXP_BEG tWORDS_BEG
 %token tSTRING_BEG tSTRING_DVAR tLAMBEG
 %token <nd> tHEREDOC_BEG  /* <<, <<- */
 %token tHEREDOC_END tLITERAL_DELIM
@@ -2615,18 +2571,6 @@ string_interp	: tSTRING_MID
 		      p->lex_strterm = $<nd>2;
 		      $$ = list2($1, $3);
 		    }
-		| string_interp
-		  tSTRING_PART
-		    {
-		      $<nd>$ = p->lex_strterm;
-		      p->lex_strterm = NULL;
-		    }
-		  compstmt
-		  '}'
-		    {
-		      p->lex_strterm = $<nd>3;
-		      $$ = push(push($1, $2), $4);
-		    }
 		| tLITERAL_DELIM
 		    {
 		      $$ = list1(new_literal_delim(p));
@@ -2643,25 +2587,9 @@ regexp		: tREGEXP_BEG tREGEXP
 		    }
 		;
 
-heredoc		: tHEREDOC_BEG tSTRING_BEG tSTRING
-		    {
-		      $$ = heredoc_start(p, $1, $3, str_dheredoc);
-		    }
-		| tHEREDOC_BEG tSTRING
-		    {
-		      $$ = heredoc_start(p, $1, $2, str_sheredoc);
-		    }
-		| tHEREDOC_BEG tIDENTIFIER
-		    {
-		      $$ = heredoc_start_sym(p, $1, $2, str_dheredoc);
-		    }
-		| tHEREDOC_BEG tCONSTANT
-		    {
-		      $$ = heredoc_start_sym(p, $1, $2, str_dheredoc);
-		    }
+heredoc		: tHEREDOC_BEG
 		;
 
-
 opt_heredoc_bodies : none
 		   | heredoc_bodies
 		   ;
@@ -2690,14 +2618,6 @@ words		: tWORDS_BEG tSTRING
 		    {
 		      $$ = new_words(p, push($2, $3));
 		    }
-		| tQWORDS_BEG tSTRING
-		    {
-		      $$ = new_words(p, list1($2));
-		    }
-		| tQWORDS_BEG string_rep tSTRING
-		    {
-		      $$ = new_words(p, push($2, $3));
-		    }
 		;
 
 
@@ -3734,7 +3654,77 @@ parse_string(parser_state *p)
   yylval.nd = new_str(p, tok(p), toklen(p));
   return tSTRING;
 }
-  
+ 
+
+static int
+heredoc_identifier(parser_state *p)
+{
+  int c;
+  int type = str_heredoc;
+  int indent = FALSE;
+  int quote = FALSE;
+  node *newnode;
+  parser_heredoc_info *info;
+
+  c = nextc(p);
+  if (ISSPACE(c) || c == '=') {
+    pushback(p, c);
+    return 0;
+  }
+  if (c == '-') {
+    indent = TRUE;
+    c = nextc(p);
+  }
+  if (c == '\'' || c == '"') {
+    int term = c;
+    if (c == '\'')
+      quote = TRUE;
+    newtok(p);
+    while ((c = nextc(p)) != -1 && c != term) {
+      if (c == '\n')
+        c = -1;
+      tokadd(p, c);
+    }
+    if (c == -1) {
+      yyerror(p, "unterminated here document identifier");
+      return 0;
+    }
+  } else {
+    if (! identchar(c)) {
+      pushback(p, c);
+      if (indent) pushback(p, '-');
+      return 0;
+    }
+    newtok(p);
+    do {
+      tokadd(p, c);
+    } while ((c = nextc(p)) != -1 && identchar(c));
+    pushback(p, c);
+  }
+  tokfix(p);
+  newnode = new_heredoc(p);
+  info = (parser_heredoc_info*)newnode->cdr;
+  info->term = strndup(tok(p), toklen(p));
+  info->term_len = toklen(p);
+  if (! quote)
+    type |= STR_FUNC_EXPAND;
+  info->type = type;
+  info->allow_indent = indent;
+  info->line_head = TRUE;
+  info->doc = NULL;
+  p->heredocs = push(p->heredocs, newnode);
+  if (p->parsing_heredoc == NULL) {
+    node *n = p->heredocs;
+    while (n->cdr)
+      n = n->cdr;
+    p->parsing_heredoc = n;
+  }
+  p->heredoc_starts_nextline = TRUE;
+  p->lstate = EXPR_END;
+
+  yylval.nd = newnode;
+  return tHEREDOC_BEG;
+}
 
 static int
 arg_ambiguous(parser_state *p)
@@ -3749,7 +3739,6 @@ static int
 parser_yylex(parser_state *p)
 {
   register int c;
-  int c2;
   int space_seen = 0;
   int cmd_state;
   enum mrb_lex_state_enum last_state;
@@ -3916,20 +3905,9 @@ parser_yylex(parser_state *p)
 	p->lstate != EXPR_CLASS &&
 	!IS_END() &&
 	(!IS_ARG() || space_seen)) {
-      /* heredocument check */
-      newtok(p); tokadd(p, '<'); tokadd(p, '<');
-      c2 = nextc(p);
-      if (c2 == '-') {
-	tokadd(p, c2);
-	c2 = nextc(p);
-      }
-      pushback(p, c2);
-      if (!ISSPACE(c2)) {
-	tokfix(p);
-	yylval.nd = new_str(p, tok(p), toklen(p));
-	p->lstate = EXPR_DOT;
-	return tHEREDOC_BEG;
-      }
+      int token = heredoc_identifier(p);
+      if (token)
+	return token;
     }
     if (p->lstate == EXPR_FNAME || p->lstate == EXPR_DOT) {
       p->lstate = EXPR_ARG;
@@ -4625,7 +4603,7 @@ parser_yylex(parser_state *p)
 
       case 'w':
 	p->lex_strterm = new_strterm(p, str_sword, term, paren);
-	return tQWORDS_BEG;
+	return tWORDS_BEG;
 
       case 'r':
 	p->lex_strterm = new_strterm(p, str_regexp, term, paren);
diff --git a/test/t/literals.rb b/test/t/literals.rb
index eb3190a07..5dc15f135 100644
--- a/test/t/literals.rb
+++ b/test/t/literals.rb
@@ -195,9 +195,11 @@ assert('Literals Symbol', '8.7.6.6') do
   f = %s[asd \[
 qwe]
   g = %s/foo#{1+2}bar/
+  h = %s{{foo bar}}
 
   a == :'asd qwe' and b == :"foo bar" and c == :a3b and d == :asd and
-  e == :' foo )' and f == :"asd [\nqwe" and g == :'foo#{1+2}bar'
+  e == :' foo )' and f == :"asd [\nqwe" and g == :'foo#{1+2}bar' and
+  h == :'{foo bar}'
 end
 
 # Not Implemented ATM assert('Literals Regular expression', '8.7.6.5') do
-- 
cgit v1.2.3