Implement \u notation for strings and regexes.

This change adds the \u notation for double quoted strings and regular expressions. It does not implement the \u notation for character literals. Both the \uNNNN and \u{NNNN} notations are supported. \uNNNN is implemented by emitting equivalent UTF-8; that is, "\u4000" is equivalent to "\xE4\x80\x80". Unlike CRuby, the \u{NNNN} notation allows only one character per pair of braces; I see no way to lift this restriction without remodeling the parser.
author: chasonr <[email protected]> 2014-03-23 21:46:00 -0400
committer: chasonr <[email protected]> 2014-03-23 21:46:00 -0400
commit: 509cbc51eb3984b4a67076ee88df871ea157c46c (patch)
tree: 232ff12b03cf271a5101e94b3b8fe8a73ddabfcc /test
parent: b8d7f1ce7aaef75d43f515231fcd62dfe34116d5 (diff)
download: mruby-509cbc51eb3984b4a67076ee88df871ea157c46c.tar.gz
mruby-509cbc51eb3984b4a67076ee88df871ea157c46c.zip
1 files changed, 60 insertions, 0 deletions
diff --git a/test/t/unicode.rb b/test/t/unicode.rb
new file mode 100644
index 000000000..8f94421e6
--- /dev/null
+++ b/test/t/unicode.rb
@@ -0,0 +1,60 @@
+# Test of the \u notation
+
+assert('bare \u notation test') do
+  # Mininum and maximum one byte characters
+  assert_equal("\u0000", "\x00")
+  assert_equal("\u007F", "\x7F")
+
+  # Mininum and maximum two byte characters
+  assert_equal("\u0080", "\xC2\x80")
+  assert_equal("\u07FF", "\xDF\xBF")
+
+  # Mininum and maximum three byte characters
+  assert_equal("\u0800", "\xE0\xA0\x80")
+  assert_equal("\uFFFF", "\xEF\xBF\xBF")
+
+  # Four byte characters require the \U notation
+end
+
+assert('braced \u notation test') do
+  # Mininum and maximum one byte characters
+  assert_equal("\u{0000}", "\x00")
+  assert_equal("\u{007F}", "\x7F")
+
+  # Mininum and maximum two byte characters
+  assert_equal("\u{0080}", "\xC2\x80")
+  assert_equal("\u{07FF}", "\xDF\xBF")
+
+  # Mininum and maximum three byte characters
+  assert_equal("\u{0800}", "\xE0\xA0\x80")
+  assert_equal("\u{FFFF}", "\xEF\xBF\xBF")
+
+  # Mininum and maximum four byte characters
+  assert_equal("\u{10000}",  "\xF0\x90\x80\x80")
+  assert_equal("\u{10FFFF}", "\xF4\x8F\xBF\xBF")
+end
+
+# Test regular expressions only if implemented
+begin
+  Regexp
+  have_regexp = true
+rescue NameError
+  have_regexp = false
+end
+if have_regexp then
+  assert('Testing in regular expressions') do
+    # The regular expression uses the unbraced notation where the string uses
+    # the braced notation, and vice versa, so these tests will fail if the \u
+    # modification is not applied
+
+    # Test of unbraced \u notation in a regular expression
+    assert_false(/\u0300/ =~ "\u{02FF}")
+    assert_true( /\u0300/ =~ "\u{0300}")
+    assert_false(/\u0300/ =~ "\u{0301}")
+
+    # Test of braced \u notation in a regular expression
+    assert_false(/\u{0300}/ =~ "\u02FF")
+    assert_true( /\u{0300}/ =~ "\u0300")
+    assert_false(/\u{0300}/ =~ "\u0301")
+  end
+end
author	chasonr <[email protected]>	2014-03-23 21:46:00 -0400
committer	chasonr <[email protected]>	2014-03-23 21:46:00 -0400
commit	509cbc51eb3984b4a67076ee88df871ea157c46c (patch)
tree	232ff12b03cf271a5101e94b3b8fe8a73ddabfcc /test
parent	b8d7f1ce7aaef75d43f515231fcd62dfe34116d5 (diff)
download	mruby-509cbc51eb3984b4a67076ee88df871ea157c46c.tar.gz mruby-509cbc51eb3984b4a67076ee88df871ea157c46c.zip