aboutsummaryrefslogtreecommitdiff
path: root/testes/utf8.lua
diff options
context:
space:
mode:
Diffstat (limited to 'testes/utf8.lua')
-rw-r--r--testes/utf8.lua14
1 files changed, 13 insertions, 1 deletions
diff --git a/testes/utf8.lua b/testes/utf8.lua
index 028995a4..8a0213d6 100644
--- a/testes/utf8.lua
+++ b/testes/utf8.lua
@@ -238,10 +238,18 @@ s = "\0 \x7F\z
238s = string.gsub(s, " ", "") 238s = string.gsub(s, " ", "")
239check(s, {0,0x7F, 0x80,0x7FF, 0x800,0xFFFF, 0x10000,0x10FFFF}) 239check(s, {0,0x7F, 0x80,0x7FF, 0x800,0xFFFF, 0x10000,0x10FFFF})
240 240
241
242-- again, without strictness
243s = "\xF0\x90\x80\x80 \xF7\xBF\xBF\xBF\z
244 \xF8\x88\x80\x80\x80 \xFB\xBF\xBF\xBF\xBF\z
245 \xFC\x84\x80\x80\x80\x80 \xFD\xBF\xBF\xBF\xBF\xBF"
246s = string.gsub(s, " ", "")
247check(s, {0x10000,0x1FFFFF, 0x200000,0x3FFFFFF, 0x4000000,0x7FFFFFFF}, true)
248
241do 249do
242 -- original UTF-8 values 250 -- original UTF-8 values
243 local s = "\u{4000000}\u{7FFFFFFF}" 251 local s = "\u{4000000}\u{7FFFFFFF}"
244 assert(#s == 12) 252 assert(s == "\xFC\x84\x80\x80\x80\x80\xFD\xBF\xBF\xBF\xBF\xBF")
245 check(s, {0x4000000, 0x7FFFFFFF}, true) 253 check(s, {0x4000000, 0x7FFFFFFF}, true)
246 254
247 s = "\u{200000}\u{3FFFFFF}" 255 s = "\u{200000}\u{3FFFFFF}"
@@ -257,6 +265,10 @@ local x = "日本語a-4\0éó"
257check(x, {26085, 26412, 35486, 97, 45, 52, 0, 233, 243}) 265check(x, {26085, 26412, 35486, 97, 45, 52, 0, 233, 243})
258 266
259 267
268-- more than 5 continuation bytes
269assert(not utf8.len("\xff\x8f\x8f\x8f\x8f\x8f\x8f\x8f"))
270
271
260-- Supplementary Characters 272-- Supplementary Characters
261check("𣲷𠜎𠱓𡁻𠵼ab𠺢", 273check("𣲷𠜎𠱓𡁻𠵼ab𠺢",
262 {0x23CB7, 0x2070E, 0x20C53, 0x2107B, 0x20D7C, 0x61, 0x62, 0x20EA2,}) 274 {0x23CB7, 0x2070E, 0x20C53, 0x2107B, 0x20D7C, 0x61, 0x62, 0x20EA2,})