summaryrefslogtreecommitdiffhomepage
path: root/test/t/unicode.rb
blob: a8e8c0e14db75e86e18cc05b92ede7908fe9d36d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# Test of the \u notation

assert('bare \u notation test') do
  # Mininum and maximum one byte characters
  assert_equal("\u0000", "\x00")
  assert_equal("\u007F", "\x7F")

  # Mininum and maximum two byte characters
  assert_equal("\u0080", "\xC2\x80")
  assert_equal("\u07FF", "\xDF\xBF")

  # Mininum and maximum three byte characters
  assert_equal("\u0800", "\xE0\xA0\x80")
  assert_equal("\uFFFF", "\xEF\xBF\xBF")

  # Four byte characters require the \U notation
end

assert('braced \u notation test') do
  # Mininum and maximum one byte characters
  assert_equal("\u{0000}", "\x00")
  assert_equal("\u{007F}", "\x7F")

  # Mininum and maximum two byte characters
  assert_equal("\u{0080}", "\xC2\x80")
  assert_equal("\u{07FF}", "\xDF\xBF")

  # Mininum and maximum three byte characters
  assert_equal("\u{0800}", "\xE0\xA0\x80")
  assert_equal("\u{FFFF}", "\xEF\xBF\xBF")

  # Mininum and maximum four byte characters
  assert_equal("\u{10000}",  "\xF0\x90\x80\x80")
  assert_equal("\u{10FFFF}", "\xF4\x8F\xBF\xBF")
end

# Test regular expressions only if implemented
begin
  Regexp
  have_regexp = true
rescue NameError
  have_regexp = false
end
if have_regexp then
  assert('Testing \u in regular expressions') do
    # The regular expression uses the unbraced notation where the string uses
    # the braced notation, and vice versa, so these tests will fail if the \u
    # modification is not applied

    # Test of unbraced \u notation in a regular expression
    assert_false(/\u0300/ =~ "\u{02FF}")
    assert_true( /\u0300/ =~ "\u{0300}")
    assert_false(/\u0300/ =~ "\u{0301}")

    # Test of braced \u notation in a regular expression
    assert_false(/\u{0300}/ =~ "\u02FF")
    assert_true( /\u{0300}/ =~ "\u0300")
    assert_false(/\u{0300}/ =~ "\u0301")
  end
end