diff options
Diffstat (limited to 'testes')
| -rw-r--r-- | testes/utf8.lua | 44 |
1 files changed, 27 insertions, 17 deletions
diff --git a/testes/utf8.lua b/testes/utf8.lua index efadbd5c..dc0f2f09 100644 --- a/testes/utf8.lua +++ b/testes/utf8.lua | |||
| @@ -52,25 +52,35 @@ local function check (s, t, nonstrict) | |||
| 52 | for i = 1, #t do assert(t[i] == t1[i]) end -- 't' is equal to 't1' | 52 | for i = 1, #t do assert(t[i] == t1[i]) end -- 't' is equal to 't1' |
| 53 | 53 | ||
| 54 | for i = 1, l do -- for all codepoints | 54 | for i = 1, l do -- for all codepoints |
| 55 | local pi = utf8.offset(s, i) -- position of i-th char | 55 | local pi, pie = utf8.offset(s, i) -- position of i-th char |
| 56 | local pi1 = utf8.offset(s, 2, pi) -- position of next char | 56 | local pi1 = utf8.offset(s, 2, pi) -- position of next char |
| 57 | assert(pi1 == pie + 1) | ||
| 57 | assert(string.find(string.sub(s, pi, pi1 - 1), justone)) | 58 | assert(string.find(string.sub(s, pi, pi1 - 1), justone)) |
| 58 | assert(utf8.offset(s, -1, pi1) == pi) | 59 | assert(utf8.offset(s, -1, pi1) == pi) |
| 59 | assert(utf8.offset(s, i - l - 1) == pi) | 60 | assert(utf8.offset(s, i - l - 1) == pi) |
| 60 | assert(pi1 - pi == #utf8.char(utf8.codepoint(s, pi, pi, nonstrict))) | 61 | assert(pi1 - pi == #utf8.char(utf8.codepoint(s, pi, pi, nonstrict))) |
| 61 | for j = pi, pi1 - 1 do | 62 | for j = pi, pi1 - 1 do |
| 62 | assert(utf8.offset(s, 0, j) == pi) | 63 | local off1, off2 = utf8.offset(s, 0, j) |
| 64 | assert(off1 == pi and off2 == pi1 - 1) | ||
| 63 | end | 65 | end |
| 64 | for j = pi + 1, pi1 - 1 do | 66 | for j = pi + 1, pi1 - 1 do |
| 65 | assert(not utf8.len(s, j)) | 67 | assert(not utf8.len(s, j)) |
| 66 | end | 68 | end |
| 67 | assert(utf8.len(s, pi, pi, nonstrict) == 1) | 69 | assert(utf8.len(s, pi, pi, nonstrict) == 1) |
| 68 | assert(utf8.len(s, pi, pi1 - 1, nonstrict) == 1) | 70 | assert(utf8.len(s, pi, pi1 - 1, nonstrict) == 1) |
| 69 | assert(utf8.len(s, pi, -1, nonstrict) == l - i + 1) | 71 | assert(utf8.len(s, pi, -1, nonstrict) == l - i + 1) |
| 70 | assert(utf8.len(s, pi1, -1, nonstrict) == l - i) | 72 | assert(utf8.len(s, pi1, -1, nonstrict) == l - i) |
| 71 | assert(utf8.len(s, 1, pi, nonstrict) == i) | 73 | assert(utf8.len(s, 1, pi, nonstrict) == i) |
| 72 | end | 74 | end |
| 73 | 75 | ||
| 76 | local expected = 1 -- expected position of "current" character | ||
| 77 | for i = 1, l + 1 do | ||
| 78 | local p, e = utf8.offset(s, i) | ||
| 79 | assert(p == expected) | ||
| 80 | expected = e + 1 | ||
| 81 | end | ||
| 82 | assert(expected - 1 == #s + 1) | ||
| 83 | |||
| 74 | local i = 0 | 84 | local i = 0 |
| 75 | for p, c in utf8.codes(s, nonstrict) do | 85 | for p, c in utf8.codes(s, nonstrict) do |
| 76 | i = i + 1 | 86 | i = i + 1 |
| @@ -94,20 +104,20 @@ end | |||
| 94 | 104 | ||
| 95 | 105 | ||
| 96 | do -- error indication in utf8.len | 106 | do -- error indication in utf8.len |
| 97 | local function check (s, p) | 107 | local function checklen (s, p) |
| 98 | local a, b = utf8.len(s) | 108 | local a, b = utf8.len(s) |
| 99 | assert(not a and b == p) | 109 | assert(not a and b == p) |
| 100 | end | 110 | end |
| 101 | check("abc\xE3def", 4) | 111 | checklen("abc\xE3def", 4) |
| 102 | check("\xF4\x9F\xBF", 1) | 112 | checklen("\xF4\x9F\xBF", 1) |
| 103 | check("\xF4\x9F\xBF\xBF", 1) | 113 | checklen("\xF4\x9F\xBF\xBF", 1) |
| 104 | -- spurious continuation bytes | 114 | -- spurious continuation bytes |
| 105 | check("汉字\x80", #("汉字") + 1) | 115 | checklen("汉字\x80", #("汉字") + 1) |
| 106 | check("\x80hello", 1) | 116 | checklen("\x80hello", 1) |
| 107 | check("hel\x80lo", 4) | 117 | checklen("hel\x80lo", 4) |
| 108 | check("汉字\xBF", #("汉字") + 1) | 118 | checklen("汉字\xBF", #("汉字") + 1) |
| 109 | check("\xBFhello", 1) | 119 | checklen("\xBFhello", 1) |
| 110 | check("hel\xBFlo", 4) | 120 | checklen("hel\xBFlo", 4) |
| 111 | end | 121 | end |
| 112 | 122 | ||
| 113 | -- errors in utf8.codes | 123 | -- errors in utf8.codes |
