diff options
Diffstat (limited to 'testes')
-rw-r--r-- | testes/utf8.lua | 44 |
1 files changed, 27 insertions, 17 deletions
diff --git a/testes/utf8.lua b/testes/utf8.lua index efadbd5c..dc0f2f09 100644 --- a/testes/utf8.lua +++ b/testes/utf8.lua | |||
@@ -52,25 +52,35 @@ local function check (s, t, nonstrict) | |||
52 | for i = 1, #t do assert(t[i] == t1[i]) end -- 't' is equal to 't1' | 52 | for i = 1, #t do assert(t[i] == t1[i]) end -- 't' is equal to 't1' |
53 | 53 | ||
54 | for i = 1, l do -- for all codepoints | 54 | for i = 1, l do -- for all codepoints |
55 | local pi = utf8.offset(s, i) -- position of i-th char | 55 | local pi, pie = utf8.offset(s, i) -- position of i-th char |
56 | local pi1 = utf8.offset(s, 2, pi) -- position of next char | 56 | local pi1 = utf8.offset(s, 2, pi) -- position of next char |
57 | assert(pi1 == pie + 1) | ||
57 | assert(string.find(string.sub(s, pi, pi1 - 1), justone)) | 58 | assert(string.find(string.sub(s, pi, pi1 - 1), justone)) |
58 | assert(utf8.offset(s, -1, pi1) == pi) | 59 | assert(utf8.offset(s, -1, pi1) == pi) |
59 | assert(utf8.offset(s, i - l - 1) == pi) | 60 | assert(utf8.offset(s, i - l - 1) == pi) |
60 | assert(pi1 - pi == #utf8.char(utf8.codepoint(s, pi, pi, nonstrict))) | 61 | assert(pi1 - pi == #utf8.char(utf8.codepoint(s, pi, pi, nonstrict))) |
61 | for j = pi, pi1 - 1 do | 62 | for j = pi, pi1 - 1 do |
62 | assert(utf8.offset(s, 0, j) == pi) | 63 | local off1, off2 = utf8.offset(s, 0, j) |
64 | assert(off1 == pi and off2 == pi1 - 1) | ||
63 | end | 65 | end |
64 | for j = pi + 1, pi1 - 1 do | 66 | for j = pi + 1, pi1 - 1 do |
65 | assert(not utf8.len(s, j)) | 67 | assert(not utf8.len(s, j)) |
66 | end | 68 | end |
67 | assert(utf8.len(s, pi, pi, nonstrict) == 1) | 69 | assert(utf8.len(s, pi, pi, nonstrict) == 1) |
68 | assert(utf8.len(s, pi, pi1 - 1, nonstrict) == 1) | 70 | assert(utf8.len(s, pi, pi1 - 1, nonstrict) == 1) |
69 | assert(utf8.len(s, pi, -1, nonstrict) == l - i + 1) | 71 | assert(utf8.len(s, pi, -1, nonstrict) == l - i + 1) |
70 | assert(utf8.len(s, pi1, -1, nonstrict) == l - i) | 72 | assert(utf8.len(s, pi1, -1, nonstrict) == l - i) |
71 | assert(utf8.len(s, 1, pi, nonstrict) == i) | 73 | assert(utf8.len(s, 1, pi, nonstrict) == i) |
72 | end | 74 | end |
73 | 75 | ||
76 | local expected = 1 -- expected position of "current" character | ||
77 | for i = 1, l + 1 do | ||
78 | local p, e = utf8.offset(s, i) | ||
79 | assert(p == expected) | ||
80 | expected = e + 1 | ||
81 | end | ||
82 | assert(expected - 1 == #s + 1) | ||
83 | |||
74 | local i = 0 | 84 | local i = 0 |
75 | for p, c in utf8.codes(s, nonstrict) do | 85 | for p, c in utf8.codes(s, nonstrict) do |
76 | i = i + 1 | 86 | i = i + 1 |
@@ -94,20 +104,20 @@ end | |||
94 | 104 | ||
95 | 105 | ||
96 | do -- error indication in utf8.len | 106 | do -- error indication in utf8.len |
97 | local function check (s, p) | 107 | local function checklen (s, p) |
98 | local a, b = utf8.len(s) | 108 | local a, b = utf8.len(s) |
99 | assert(not a and b == p) | 109 | assert(not a and b == p) |
100 | end | 110 | end |
101 | check("abc\xE3def", 4) | 111 | checklen("abc\xE3def", 4) |
102 | check("\xF4\x9F\xBF", 1) | 112 | checklen("\xF4\x9F\xBF", 1) |
103 | check("\xF4\x9F\xBF\xBF", 1) | 113 | checklen("\xF4\x9F\xBF\xBF", 1) |
104 | -- spurious continuation bytes | 114 | -- spurious continuation bytes |
105 | check("汉字\x80", #("汉字") + 1) | 115 | checklen("汉字\x80", #("汉字") + 1) |
106 | check("\x80hello", 1) | 116 | checklen("\x80hello", 1) |
107 | check("hel\x80lo", 4) | 117 | checklen("hel\x80lo", 4) |
108 | check("汉字\xBF", #("汉字") + 1) | 118 | checklen("汉字\xBF", #("汉字") + 1) |
109 | check("\xBFhello", 1) | 119 | checklen("\xBFhello", 1) |
110 | check("hel\xBFlo", 4) | 120 | checklen("hel\xBFlo", 4) |
111 | end | 121 | end |
112 | 122 | ||
113 | -- errors in utf8.codes | 123 | -- errors in utf8.codes |