From 44fbfc51849e6bad4f7238ad2fd9724af331cea2 Mon Sep 17 00:00:00 2001
From: FUKUZAWA-Tadashi <alien@apple.nifty.jp>
Date: Tue, 12 Mar 2013 21:54:17 +0900
Subject: implement literal %W %w %s refactor string parsing

---
 test/t/literals.rb | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 71 insertions(+), 2 deletions(-)

(limited to 'test/t')

diff --git a/test/t/literals.rb b/test/t/literals.rb
index 9a6d341d4..eb3190a07 100644
--- a/test/t/literals.rb
+++ b/test/t/literals.rb
@@ -128,8 +128,77 @@ ZZZ
   z == ""
 end
 
-# Not Implemented ATM assert('Literals Array', '8.7.6.4') do
+assert('Literals Array', '8.7.6.4') do
+  a = %W{abc#{1+2}def \}g}
+  b = %W(abc #{2+3} def \(g)
+  c = %W[#{3+4}]
+  d = %W< #{4+5} >
+  e = %W//
+  f = %W[[ab cd][ef]]
+  g = %W{
+    ab
+    #{-1}1
+    2#{2}
+  }
+
+  test1 = (a == ['abc3def', '}g'] and
+           b == ['abc', '5', 'def', '(g'] and
+           c == ['7'] and
+           d == ['9'] and
+           e == [] and
+           f == ['[ab', 'cd][ef]'] and
+           g == ['ab', '-11', '22']
+          )
+
+  a = %w{abc#{1+2}def \}g}
+  b = %w(abc #{2+3} def \(g)
+  c = %w[#{3+4}]
+  d = %w< #{4+5} >
+  e = %w//
+  f = %w[[ab cd][ef]]
+  g = %w{
+    ab
+    #{-1}1
+    2#{2}
+  }
+
+  test2 = (a == ['abc#{1+2}def', '}g'] and
+           b == ['abc', '#{2+3}', 'def', '(g'] and
+           c == ['#{3+4}'] and
+           d == ['#{4+5}'] and
+           e == [] and
+           f == ['[ab', 'cd][ef]'] and
+           g == ['ab', '#{-1}1', '2#{2}']
+          )
+
+  test1 and test2
+end
+
+assert('Literals Symbol', '8.7.6.6') do
+  /* do not compile error */
+  :$asd
+  :@asd
+  :@@asd
+  :asd=
+  :asd!
+  :asd?
+  :+
+  :+@
+  :if
+  :BEGIN
+
+  a = :"asd qwe"
+  b = :'foo bar'
+  c = :"a#{1+2}b"
+  d = %s(asd)
+  e = %s( foo \))
+  f = %s[asd \[
+qwe]
+  g = %s/foo#{1+2}bar/
+
+  a == :'asd qwe' and b == :"foo bar" and c == :a3b and d == :asd and
+  e == :' foo )' and f == :"asd [\nqwe" and g == :'foo#{1+2}bar'
+end
 
 # Not Implemented ATM assert('Literals Regular expression', '8.7.6.5') do
 
-# Not Implemented ATM assert('Literals Symbol', '8.7.6.6') do
-- 
cgit v1.2.3


From aec3e1c4ff6603350811e672d6c706294f59e44e Mon Sep 17 00:00:00 2001
From: FUKUZAWA-Tadashi <alien@apple.nifty.jp>
Date: Wed, 13 Mar 2013 00:06:19 +0900
Subject: refactor heredoc identifier

---
 include/mruby/compile.h |   3 +-
 src/parse.y             | 176 +++++++++++++++++++++---------------------------
 test/t/literals.rb      |   4 +-
 3 files changed, 81 insertions(+), 102 deletions(-)

(limited to 'test/t')

diff --git a/include/mruby/compile.h b/include/mruby/compile.h
index 45eb72403..4b12cb10c 100644
--- a/include/mruby/compile.h
+++ b/include/mruby/compile.h
@@ -74,8 +74,7 @@ enum mrb_string_type {
   str_sword   = (STR_FUNC_PARSING|STR_FUNC_WORD|STR_FUNC_ARRAY),
   str_dword   = (STR_FUNC_PARSING|STR_FUNC_WORD|STR_FUNC_ARRAY|STR_FUNC_EXPAND),
   str_ssym    = (STR_FUNC_PARSING|STR_FUNC_SYMBOL),
-  str_sheredoc = (STR_FUNC_PARSING|STR_FUNC_HEREDOC),
-  str_dheredoc = (STR_FUNC_PARSING|STR_FUNC_HEREDOC|STR_FUNC_EXPAND),
+  str_heredoc = (STR_FUNC_PARSING|STR_FUNC_HEREDOC),
 };
 
 /* heredoc structure */
diff --git a/src/parse.y b/src/parse.y
index 18f338937..21823451b 100644
--- a/src/parse.y
+++ b/src/parse.y
@@ -874,49 +874,6 @@ end_strterm(parser_state *p)
   p->lex_strterm = NULL;
 }
 
-static node*
-heredoc_start_sb(parser_state *p, const char* term, size_t term_len, string_type type, int allow_indent)
-{
-  node *newnode = new_heredoc(p);
-  parser_heredoc_info *inf = (parser_heredoc_info*)newnode->cdr;
-  inf->term = term;
-  inf->term_len = term_len;
-  inf->type = type;
-  inf->allow_indent = allow_indent;
-  inf->line_head = TRUE;
-  inf->doc = NULL;
-  p->heredocs = push(p->heredocs, newnode);
-  if (p->parsing_heredoc == NULL) {
-    node *c = p->heredocs;
-    while (c->cdr)
-      c = c->cdr;
-    p->parsing_heredoc = c;
-  }
-  p->heredoc_starts_nextline = TRUE;
-  p->lstate = EXPR_END;
-  return newnode;
-}
-
-static node*
-heredoc_start(parser_state *p, node *beg, node *str, string_type type)
-{
-  char *bs = (char*)beg->cdr->car;
-  int allow_indent = (bs[2] == '-');
-  const char *s = (char*)str->cdr->car;
-  size_t len = (intptr_t)str->cdr->cdr;
-  return heredoc_start_sb(p, s, len, type, allow_indent);
-}
-
-static node*
-heredoc_start_sym(parser_state *p, node *beg, mrb_sym sym, string_type type)
-{
-  char *bs = (char*)beg->cdr->car;
-  int allow_indent = (bs[2] == '-');
-  size_t len;
-  const char *s = mrb_sym2name_len(p->mrb, sym, &len);
-  return heredoc_start_sb(p, s, len, type, allow_indent);
-}
-
 parser_heredoc_info *
 parsing_heredoc_inf(parser_state *p)
 {
@@ -947,7 +904,6 @@ heredoc_end(parser_state *p)
 
 %}
 
-%expect 2
 %pure_parser
 %parse-param {parser_state *p}
 %lex-param {parser_state *p}
@@ -1064,7 +1020,7 @@ heredoc_end(parser_state *p)
 %token tSTAR              /* * */
 %token tAMPER             /* & */
 %token tLAMBDA            /* -> */
-%token tSYMBEG tREGEXP_BEG tWORDS_BEG tQWORDS_BEG
+%token tSYMBEG tREGEXP_BEG tWORDS_BEG
 %token tSTRING_BEG tSTRING_DVAR tLAMBEG
 %token <nd> tHEREDOC_BEG  /* <<, <<- */
 %token tHEREDOC_END tLITERAL_DELIM
@@ -2615,18 +2571,6 @@ string_interp	: tSTRING_MID
 		      p->lex_strterm = $<nd>2;
 		      $$ = list2($1, $3);
 		    }
-		| string_interp
-		  tSTRING_PART
-		    {
-		      $<nd>$ = p->lex_strterm;
-		      p->lex_strterm = NULL;
-		    }
-		  compstmt
-		  '}'
-		    {
-		      p->lex_strterm = $<nd>3;
-		      $$ = push(push($1, $2), $4);
-		    }
 		| tLITERAL_DELIM
 		    {
 		      $$ = list1(new_literal_delim(p));
@@ -2643,25 +2587,9 @@ regexp		: tREGEXP_BEG tREGEXP
 		    }
 		;
 
-heredoc		: tHEREDOC_BEG tSTRING_BEG tSTRING
-		    {
-		      $$ = heredoc_start(p, $1, $3, str_dheredoc);
-		    }
-		| tHEREDOC_BEG tSTRING
-		    {
-		      $$ = heredoc_start(p, $1, $2, str_sheredoc);
-		    }
-		| tHEREDOC_BEG tIDENTIFIER
-		    {
-		      $$ = heredoc_start_sym(p, $1, $2, str_dheredoc);
-		    }
-		| tHEREDOC_BEG tCONSTANT
-		    {
-		      $$ = heredoc_start_sym(p, $1, $2, str_dheredoc);
-		    }
+heredoc		: tHEREDOC_BEG
 		;
 
-
 opt_heredoc_bodies : none
 		   | heredoc_bodies
 		   ;
@@ -2690,14 +2618,6 @@ words		: tWORDS_BEG tSTRING
 		    {
 		      $$ = new_words(p, push($2, $3));
 		    }
-		| tQWORDS_BEG tSTRING
-		    {
-		      $$ = new_words(p, list1($2));
-		    }
-		| tQWORDS_BEG string_rep tSTRING
-		    {
-		      $$ = new_words(p, push($2, $3));
-		    }
 		;
 
 
@@ -3734,7 +3654,77 @@ parse_string(parser_state *p)
   yylval.nd = new_str(p, tok(p), toklen(p));
   return tSTRING;
 }
-  
+ 
+
+static int
+heredoc_identifier(parser_state *p)
+{
+  int c;
+  int type = str_heredoc;
+  int indent = FALSE;
+  int quote = FALSE;
+  node *newnode;
+  parser_heredoc_info *info;
+
+  c = nextc(p);
+  if (ISSPACE(c) || c == '=') {
+    pushback(p, c);
+    return 0;
+  }
+  if (c == '-') {
+    indent = TRUE;
+    c = nextc(p);
+  }
+  if (c == '\'' || c == '"') {
+    int term = c;
+    if (c == '\'')
+      quote = TRUE;
+    newtok(p);
+    while ((c = nextc(p)) != -1 && c != term) {
+      if (c == '\n')
+        c = -1;
+      tokadd(p, c);
+    }
+    if (c == -1) {
+      yyerror(p, "unterminated here document identifier");
+      return 0;
+    }
+  } else {
+    if (! identchar(c)) {
+      pushback(p, c);
+      if (indent) pushback(p, '-');
+      return 0;
+    }
+    newtok(p);
+    do {
+      tokadd(p, c);
+    } while ((c = nextc(p)) != -1 && identchar(c));
+    pushback(p, c);
+  }
+  tokfix(p);
+  newnode = new_heredoc(p);
+  info = (parser_heredoc_info*)newnode->cdr;
+  info->term = strndup(tok(p), toklen(p));
+  info->term_len = toklen(p);
+  if (! quote)
+    type |= STR_FUNC_EXPAND;
+  info->type = type;
+  info->allow_indent = indent;
+  info->line_head = TRUE;
+  info->doc = NULL;
+  p->heredocs = push(p->heredocs, newnode);
+  if (p->parsing_heredoc == NULL) {
+    node *n = p->heredocs;
+    while (n->cdr)
+      n = n->cdr;
+    p->parsing_heredoc = n;
+  }
+  p->heredoc_starts_nextline = TRUE;
+  p->lstate = EXPR_END;
+
+  yylval.nd = newnode;
+  return tHEREDOC_BEG;
+}
 
 static int
 arg_ambiguous(parser_state *p)
@@ -3749,7 +3739,6 @@ static int
 parser_yylex(parser_state *p)
 {
   register int c;
-  int c2;
   int space_seen = 0;
   int cmd_state;
   enum mrb_lex_state_enum last_state;
@@ -3916,20 +3905,9 @@ parser_yylex(parser_state *p)
 	p->lstate != EXPR_CLASS &&
 	!IS_END() &&
 	(!IS_ARG() || space_seen)) {
-      /* heredocument check */
-      newtok(p); tokadd(p, '<'); tokadd(p, '<');
-      c2 = nextc(p);
-      if (c2 == '-') {
-	tokadd(p, c2);
-	c2 = nextc(p);
-      }
-      pushback(p, c2);
-      if (!ISSPACE(c2)) {
-	tokfix(p);
-	yylval.nd = new_str(p, tok(p), toklen(p));
-	p->lstate = EXPR_DOT;
-	return tHEREDOC_BEG;
-      }
+      int token = heredoc_identifier(p);
+      if (token)
+	return token;
     }
     if (p->lstate == EXPR_FNAME || p->lstate == EXPR_DOT) {
       p->lstate = EXPR_ARG;
@@ -4625,7 +4603,7 @@ parser_yylex(parser_state *p)
 
       case 'w':
 	p->lex_strterm = new_strterm(p, str_sword, term, paren);
-	return tQWORDS_BEG;
+	return tWORDS_BEG;
 
       case 'r':
 	p->lex_strterm = new_strterm(p, str_regexp, term, paren);
diff --git a/test/t/literals.rb b/test/t/literals.rb
index eb3190a07..5dc15f135 100644
--- a/test/t/literals.rb
+++ b/test/t/literals.rb
@@ -195,9 +195,11 @@ assert('Literals Symbol', '8.7.6.6') do
   f = %s[asd \[
 qwe]
   g = %s/foo#{1+2}bar/
+  h = %s{{foo bar}}
 
   a == :'asd qwe' and b == :"foo bar" and c == :a3b and d == :asd and
-  e == :' foo )' and f == :"asd [\nqwe" and g == :'foo#{1+2}bar'
+  e == :' foo )' and f == :"asd [\nqwe" and g == :'foo#{1+2}bar' and
+  h == :'{foo bar}'
 end
 
 # Not Implemented ATM assert('Literals Regular expression', '8.7.6.5') do
-- 
cgit v1.2.3


From 76c24894a7f859cc76b437a07030f2e2f277eab1 Mon Sep 17 00:00:00 2001
From: FUKUZAWA-Tadashi <alien@apple.nifty.jp>
Date: Sun, 17 Mar 2013 21:22:21 +0900
Subject: bugfix about escaping '\n'

---
 src/parse.y        | 26 +++++++++++++++++++-------
 test/t/literals.rb | 18 +++++++++++++++---
 2 files changed, 34 insertions(+), 10 deletions(-)

(limited to 'test/t')

diff --git a/src/parse.y b/src/parse.y
index 21823451b..bfbdd9eb1 100644
--- a/src/parse.y
+++ b/src/parse.y
@@ -3535,7 +3535,7 @@ parse_string(parser_state *p)
 	char buf[256];
 	snprintf(buf, sizeof(buf), "can't find string \"%s\" anywhere before EOF", hinf->term);
 	yyerror(p, buf);
-        return 0;
+	return 0;
       }
       yylval.nd = new_str(p, tok(p), toklen(p));
       return tSTRING_MID;
@@ -3558,6 +3558,11 @@ parse_string(parser_state *p)
 	if (c == end || c == beg) {
 	  tokadd(p, c);
 	}
+	else if ((c == '\n') && (type & STR_FUNC_ARRAY)) {
+	  p->lineno++;
+	  p->column = 0;
+	  tokadd(p, '\n');
+	}
 	else {
 	  pushback(p, c);
 	  tokadd(p, read_escape(p));
@@ -3570,14 +3575,14 @@ parse_string(parser_state *p)
 	  case '\n':
 	    p->lineno++;
 	    p->column = 0;
-	    continue;
+	    break;
 
 	  case '\\':
-	    c = '\\';
 	    break;
 
 	  default:
-	    tokadd(p, '\\');
+	    if (! ISSPACE(c))
+	      tokadd(p, '\\');
 	  }
 	}
 	tokadd(p, c);
@@ -3601,7 +3606,12 @@ parse_string(parser_state *p)
     }
     if ((type & STR_FUNC_ARRAY) && ISSPACE(c)) {
       if (toklen(p) == 0) {
-	do {} while (ISSPACE(c = nextc(p)));
+	do {
+	  if (c == '\n') {
+	    p->lineno++;
+	    p->column = 0;
+	  }
+	} while (ISSPACE(c = nextc(p)));
 	pushback(p, c);
 	return tLITERAL_DELIM;
       } else {
@@ -3681,8 +3691,10 @@ heredoc_identifier(parser_state *p)
       quote = TRUE;
     newtok(p);
     while ((c = nextc(p)) != -1 && c != term) {
-      if (c == '\n')
-        c = -1;
+      if (c == '\n') {
+	c = -1;
+	break;
+      }
       tokadd(p, c);
     }
     if (c == -1) {
diff --git a/test/t/literals.rb b/test/t/literals.rb
index 5dc15f135..5a29cff0c 100644
--- a/test/t/literals.rb
+++ b/test/t/literals.rb
@@ -140,6 +140,11 @@ assert('Literals Array', '8.7.6.4') do
     #{-1}1
     2#{2}
   }
+  h = %W(a\nb
+         test\ abc
+         c\
+d
+         x\y x\\y x\\\y)
 
   test1 = (a == ['abc3def', '}g'] and
            b == ['abc', '5', 'def', '(g'] and
@@ -147,7 +152,8 @@ assert('Literals Array', '8.7.6.4') do
            d == ['9'] and
            e == [] and
            f == ['[ab', 'cd][ef]'] and
-           g == ['ab', '-11', '22']
+           g == ['ab', '-11', '22'] and
+           h == ["a\nb", 'test abc', "c\nd", "xy", "x\\y", "x\\y"]
           )
 
   a = %w{abc#{1+2}def \}g}
@@ -161,6 +167,11 @@ assert('Literals Array', '8.7.6.4') do
     #{-1}1
     2#{2}
   }
+  h = %w(a\nb
+         test\ abc
+         c\
+d
+         x\y x\\y x\\\y)
 
   test2 = (a == ['abc#{1+2}def', '}g'] and
            b == ['abc', '#{2+3}', 'def', '(g'] and
@@ -168,8 +179,9 @@ assert('Literals Array', '8.7.6.4') do
            d == ['#{4+5}'] and
            e == [] and
            f == ['[ab', 'cd][ef]'] and
-           g == ['ab', '#{-1}1', '2#{2}']
-          )
+           g == ['ab', '#{-1}1', '2#{2}'] and
+           h == ["a\\nb", "test abc", "c\nd", "x\\y", "x\\y", "x\\\\y"]
+          ) 
 
   test1 and test2
 end
-- 
cgit v1.2.3