aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lutf8lib.c5
-rw-r--r--makefile2
-rw-r--r--testes/utf8.lua14
3 files changed, 18 insertions, 3 deletions
diff --git a/lutf8lib.c b/lutf8lib.c
index b7f3fe1e..73f0e49b 100644
--- a/lutf8lib.c
+++ b/lutf8lib.c
@@ -56,6 +56,8 @@ static const char *utf8_decode (const char *s, l_uint32 *val, int strict) {
56 l_uint32 res = 0; /* final result */ 56 l_uint32 res = 0; /* final result */
57 if (c < 0x80) /* ASCII? */ 57 if (c < 0x80) /* ASCII? */
58 res = c; 58 res = c;
59 else if (c >= 0xfe) /* c >= 1111 1110b ? */
60 return NULL; /* would need six or more continuation bytes */
59 else { 61 else {
60 int count = 0; /* to count number of continuation bytes */ 62 int count = 0; /* to count number of continuation bytes */
61 for (; c & 0x40; c <<= 1) { /* while it needs continuation bytes... */ 63 for (; c & 0x40; c <<= 1) { /* while it needs continuation bytes... */
@@ -64,8 +66,9 @@ static const char *utf8_decode (const char *s, l_uint32 *val, int strict) {
64 return NULL; /* invalid byte sequence */ 66 return NULL; /* invalid byte sequence */
65 res = (res << 6) | (cc & 0x3F); /* add lower 6 bits from cont. byte */ 67 res = (res << 6) | (cc & 0x3F); /* add lower 6 bits from cont. byte */
66 } 68 }
69 lua_assert(count <= 5);
67 res |= ((l_uint32)(c & 0x7F) << (count * 5)); /* add first byte */ 70 res |= ((l_uint32)(c & 0x7F) << (count * 5)); /* add first byte */
68 if (count > 5 || res > MAXUTF || res < limits[count]) 71 if (res > MAXUTF || res < limits[count])
69 return NULL; /* invalid byte sequence */ 72 return NULL; /* invalid byte sequence */
70 s += count; /* skip continuation bytes read */ 73 s += count; /* skip continuation bytes read */
71 } 74 }
diff --git a/makefile b/makefile
index 8674519f..fa165bca 100644
--- a/makefile
+++ b/makefile
@@ -60,7 +60,7 @@ CWARNS= $(CWARNSCPP) $(CWARNSC) $(CWARNGCC)
60# create problems; some are only available in newer gcc versions. To 60# create problems; some are only available in newer gcc versions. To
61# use some of them, we also have to define an environment variable 61# use some of them, we also have to define an environment variable
62# ASAN_OPTIONS="detect_invalid_pointer_pairs=2". 62# ASAN_OPTIONS="detect_invalid_pointer_pairs=2".
63# -fsanitize=undefined 63# -fsanitize=undefined (you may need to add "-lubsan" to libs)
64# -fsanitize=pointer-subtract -fsanitize=address -fsanitize=pointer-compare 64# -fsanitize=pointer-subtract -fsanitize=address -fsanitize=pointer-compare
65# TESTS= -DLUA_USER_H='"ltests.h"' -Og -g 65# TESTS= -DLUA_USER_H='"ltests.h"' -Og -g
66 66
diff --git a/testes/utf8.lua b/testes/utf8.lua
index 028995a4..8a0213d6 100644
--- a/testes/utf8.lua
+++ b/testes/utf8.lua
@@ -238,10 +238,18 @@ s = "\0 \x7F\z
238s = string.gsub(s, " ", "") 238s = string.gsub(s, " ", "")
239check(s, {0,0x7F, 0x80,0x7FF, 0x800,0xFFFF, 0x10000,0x10FFFF}) 239check(s, {0,0x7F, 0x80,0x7FF, 0x800,0xFFFF, 0x10000,0x10FFFF})
240 240
241
242-- again, without strictness
243s = "\xF0\x90\x80\x80 \xF7\xBF\xBF\xBF\z
244 \xF8\x88\x80\x80\x80 \xFB\xBF\xBF\xBF\xBF\z
245 \xFC\x84\x80\x80\x80\x80 \xFD\xBF\xBF\xBF\xBF\xBF"
246s = string.gsub(s, " ", "")
247check(s, {0x10000,0x1FFFFF, 0x200000,0x3FFFFFF, 0x4000000,0x7FFFFFFF}, true)
248
241do 249do
242 -- original UTF-8 values 250 -- original UTF-8 values
243 local s = "\u{4000000}\u{7FFFFFFF}" 251 local s = "\u{4000000}\u{7FFFFFFF}"
244 assert(#s == 12) 252 assert(s == "\xFC\x84\x80\x80\x80\x80\xFD\xBF\xBF\xBF\xBF\xBF")
245 check(s, {0x4000000, 0x7FFFFFFF}, true) 253 check(s, {0x4000000, 0x7FFFFFFF}, true)
246 254
247 s = "\u{200000}\u{3FFFFFF}" 255 s = "\u{200000}\u{3FFFFFF}"
@@ -257,6 +265,10 @@ local x = "日本語a-4\0éó"
257check(x, {26085, 26412, 35486, 97, 45, 52, 0, 233, 243}) 265check(x, {26085, 26412, 35486, 97, 45, 52, 0, 233, 243})
258 266
259 267
268-- more than 5 continuation bytes
269assert(not utf8.len("\xff\x8f\x8f\x8f\x8f\x8f\x8f\x8f"))
270
271
260-- Supplementary Characters 272-- Supplementary Characters
261check("𣲷𠜎𠱓𡁻𠵼ab𠺢", 273check("𣲷𠜎𠱓𡁻𠵼ab𠺢",
262 {0x23CB7, 0x2070E, 0x20C53, 0x2107B, 0x20D7C, 0x61, 0x62, 0x20EA2,}) 274 {0x23CB7, 0x2070E, 0x20C53, 0x2107B, 0x20D7C, 0x61, 0x62, 0x20EA2,})