diff options
Diffstat (limited to 'testes')
| -rw-r--r-- | testes/utf8.lua | 14 |
1 files changed, 13 insertions, 1 deletions
diff --git a/testes/utf8.lua b/testes/utf8.lua index 028995a4..8a0213d6 100644 --- a/testes/utf8.lua +++ b/testes/utf8.lua | |||
| @@ -238,10 +238,18 @@ s = "\0 \x7F\z | |||
| 238 | s = string.gsub(s, " ", "") | 238 | s = string.gsub(s, " ", "") |
| 239 | check(s, {0,0x7F, 0x80,0x7FF, 0x800,0xFFFF, 0x10000,0x10FFFF}) | 239 | check(s, {0,0x7F, 0x80,0x7FF, 0x800,0xFFFF, 0x10000,0x10FFFF}) |
| 240 | 240 | ||
| 241 | |||
| 242 | -- again, without strictness | ||
| 243 | s = "\xF0\x90\x80\x80 \xF7\xBF\xBF\xBF\z | ||
| 244 | \xF8\x88\x80\x80\x80 \xFB\xBF\xBF\xBF\xBF\z | ||
| 245 | \xFC\x84\x80\x80\x80\x80 \xFD\xBF\xBF\xBF\xBF\xBF" | ||
| 246 | s = string.gsub(s, " ", "") | ||
| 247 | check(s, {0x10000,0x1FFFFF, 0x200000,0x3FFFFFF, 0x4000000,0x7FFFFFFF}, true) | ||
| 248 | |||
| 241 | do | 249 | do |
| 242 | -- original UTF-8 values | 250 | -- original UTF-8 values |
| 243 | local s = "\u{4000000}\u{7FFFFFFF}" | 251 | local s = "\u{4000000}\u{7FFFFFFF}" |
| 244 | assert(#s == 12) | 252 | assert(s == "\xFC\x84\x80\x80\x80\x80\xFD\xBF\xBF\xBF\xBF\xBF") |
| 245 | check(s, {0x4000000, 0x7FFFFFFF}, true) | 253 | check(s, {0x4000000, 0x7FFFFFFF}, true) |
| 246 | 254 | ||
| 247 | s = "\u{200000}\u{3FFFFFF}" | 255 | s = "\u{200000}\u{3FFFFFF}" |
| @@ -257,6 +265,10 @@ local x = "日本語a-4\0éó" | |||
| 257 | check(x, {26085, 26412, 35486, 97, 45, 52, 0, 233, 243}) | 265 | check(x, {26085, 26412, 35486, 97, 45, 52, 0, 233, 243}) |
| 258 | 266 | ||
| 259 | 267 | ||
| 268 | -- more than 5 continuation bytes | ||
| 269 | assert(not utf8.len("\xff\x8f\x8f\x8f\x8f\x8f\x8f\x8f")) | ||
| 270 | |||
| 271 | |||
| 260 | -- Supplementary Characters | 272 | -- Supplementary Characters |
| 261 | check("𣲷𠜎𠱓𡁻𠵼ab𠺢", | 273 | check("𣲷𠜎𠱓𡁻𠵼ab𠺢", |
| 262 | {0x23CB7, 0x2070E, 0x20C53, 0x2107B, 0x20D7C, 0x61, 0x62, 0x20EA2,}) | 274 | {0x23CB7, 0x2070E, 0x20C53, 0x2107B, 0x20D7C, 0x61, 0x62, 0x20EA2,}) |
