diff options
| author | Yukihiro "Matz" Matsumoto <[email protected]> | 2016-12-01 18:42:40 +0900 |
|---|---|---|
| committer | GitHub <[email protected]> | 2016-12-01 18:42:40 +0900 |
| commit | f1cf6ef8179313b31a12ed4e5eba509d3b2aed0f (patch) | |
| tree | f7af4f06597947fd8e5d31307bca554414a23234 /mrbgems | |
| parent | 4d807fc619e7feccd2bc079ef76a9e817e36fa59 (diff) | |
| parent | 0f08914ac0d433545a4224ee1c3f8d3eb8d51e68 (diff) | |
| download | mruby-f1cf6ef8179313b31a12ed4e5eba509d3b2aed0f.tar.gz mruby-f1cf6ef8179313b31a12ed4e5eba509d3b2aed0f.zip | |
Merge pull request #3312 from nobu/feature/multi-unicode-escape
Feature/multi unicode escape
Diffstat (limited to 'mrbgems')
| -rw-r--r-- | mrbgems/mruby-compiler/core/parse.y | 102 |
1 files changed, 59 insertions, 43 deletions
diff --git a/mrbgems/mruby-compiler/core/parse.y b/mrbgems/mruby-compiler/core/parse.y index ef522d239..f0c45b85b 100644 --- a/mrbgems/mruby-compiler/core/parse.y +++ b/mrbgems/mruby-compiler/core/parse.y @@ -3752,6 +3752,44 @@ scan_hex(const int *start, int len, int *retlen) return retval; } +static int32_t +read_escape_unicode(parser_state *p, size_t limit) +{ + int32_t c; + int buf[9]; + int i; + + /* Look for opening brace */ + i = 0; + buf[0] = nextc(p); + if (buf[0] < 0) goto eof; + if (ISXDIGIT(buf[0])) { + /* \uxxxx form */ + for (i=1; i<limit; i++) { + buf[i] = nextc(p); + if (buf[i] < 0) goto eof; + if (!ISXDIGIT(buf[i])) { + pushback(p, buf[i]); + break; + } + } + } + else { + pushback(p, buf[0]); + } + c = scan_hex(buf, i, &i); + if (i == 0) { + eof: + yyerror(p, "Invalid escape character syntax"); + return -1; + } + if (c < 0 || c > 0x10FFFF || (c & 0xFFFFF800) == 0xD800) { + yyerror(p, "Invalid Unicode code point"); + return -1; + } + return c; +} + /* Return negative to indicate Unicode code point */ static int32_t read_escape(parser_state *p) @@ -3824,53 +3862,17 @@ read_escape(parser_state *p) return c; case 'u': /* Unicode */ - { - int buf[9]; - int i; - - /* Look for opening brace */ - i = 0; - buf[0] = nextc(p); - if (buf[0] < 0) goto eof; - if (buf[0] == '{') { + if (peek(p, '{')) { /* \u{xxxxxxxx} form */ - for (i=0; i<9; i++) { - buf[i] = nextc(p); - if (buf[i] < 0) goto eof; - if (buf[i] == '}') { - break; - } - else if (!ISXDIGIT(buf[i])) { - yyerror(p, "Invalid escape character syntax"); - pushback(p, buf[i]); - return 0; - } - } - } - else if (ISXDIGIT(buf[0])) { - /* \uxxxx form */ - for (i=1; i<4; i++) { - buf[i] = nextc(p); - if (buf[i] < 0) goto eof; - if (!ISXDIGIT(buf[i])) { - pushback(p, buf[i]); - break; - } - } + nextc(p); + c = read_escape_unicode(p, 8); + if (c < 0) return 0; + if (nextc(p) != '}') goto eof; } else { - pushback(p, buf[0]); - } - c = scan_hex(buf, i, &i); - if (i == 0) { - yyerror(p, "Invalid escape character syntax"); - return 0; + c = read_escape_unicode(p, 4); + if (c < 0) return 0; } - if (c < 0 || c > 0x10FFFF || (c & 0xFFFFF800) == 0xD800) { - yyerror(p, "Invalid Unicode code point"); - return 0; - } - } return -c; case 'b':/* backspace */ @@ -3993,6 +3995,20 @@ parse_string(parser_state *p) tokadd(p, '\\'); tokadd(p, c); } + else if (c == 'u' && peek(p, '{')) { + /* \u{xxxx xxxx xxxx} form */ + nextc(p); + while (1) { + do c = nextc(p); while (ISSPACE(c)); + if (c == '}') break; + pushback(p, c); + c = read_escape_unicode(p, 8); + if (c < 0) break; + tokadd(p, -c); + } + if (hinf) + hinf->line_head = FALSE; + } else { pushback(p, c); tokadd(p, read_escape(p)); |
