diff options
Diffstat (limited to 'lutf8lib.c')
| -rw-r--r-- | lutf8lib.c | 24 |
1 files changed, 8 insertions, 16 deletions
| @@ -19,6 +19,7 @@ | |||
| 19 | 19 | ||
| 20 | #include "lauxlib.h" | 20 | #include "lauxlib.h" |
| 21 | #include "lualib.h" | 21 | #include "lualib.h" |
| 22 | #include "llimits.h" | ||
| 22 | 23 | ||
| 23 | 24 | ||
| 24 | #define MAXUNICODE 0x10FFFFu | 25 | #define MAXUNICODE 0x10FFFFu |
| @@ -28,15 +29,6 @@ | |||
| 28 | 29 | ||
| 29 | #define MSGInvalid "invalid UTF-8 code" | 30 | #define MSGInvalid "invalid UTF-8 code" |
| 30 | 31 | ||
| 31 | /* | ||
| 32 | ** Integer type for decoded UTF-8 values; MAXUTF needs 31 bits. | ||
| 33 | */ | ||
| 34 | #if (UINT_MAX >> 30) >= 1 | ||
| 35 | typedef unsigned int utfint; | ||
| 36 | #else | ||
| 37 | typedef unsigned long utfint; | ||
| 38 | #endif | ||
| 39 | |||
| 40 | 32 | ||
| 41 | #define iscont(c) (((c) & 0xC0) == 0x80) | 33 | #define iscont(c) (((c) & 0xC0) == 0x80) |
| 42 | #define iscontp(p) iscont(*(p)) | 34 | #define iscontp(p) iscont(*(p)) |
| @@ -58,11 +50,11 @@ static lua_Integer u_posrelat (lua_Integer pos, size_t len) { | |||
| 58 | ** entry forces an error for non-ascii bytes with no continuation | 50 | ** entry forces an error for non-ascii bytes with no continuation |
| 59 | ** bytes (count == 0). | 51 | ** bytes (count == 0). |
| 60 | */ | 52 | */ |
| 61 | static const char *utf8_decode (const char *s, utfint *val, int strict) { | 53 | static const char *utf8_decode (const char *s, l_uint32 *val, int strict) { |
| 62 | static const utfint limits[] = | 54 | static const l_uint32 limits[] = |
| 63 | {~(utfint)0, 0x80, 0x800, 0x10000u, 0x200000u, 0x4000000u}; | 55 | {~(l_uint32)0, 0x80, 0x800, 0x10000u, 0x200000u, 0x4000000u}; |
| 64 | unsigned int c = (unsigned char)s[0]; | 56 | unsigned int c = (unsigned char)s[0]; |
| 65 | utfint res = 0; /* final result */ | 57 | l_uint32 res = 0; /* final result */ |
| 66 | if (c < 0x80) /* ascii? */ | 58 | if (c < 0x80) /* ascii? */ |
| 67 | res = c; | 59 | res = c; |
| 68 | else { | 60 | else { |
| @@ -73,7 +65,7 @@ static const char *utf8_decode (const char *s, utfint *val, int strict) { | |||
| 73 | return NULL; /* invalid byte sequence */ | 65 | return NULL; /* invalid byte sequence */ |
| 74 | res = (res << 6) | (cc & 0x3F); /* add lower 6 bits from cont. byte */ | 66 | res = (res << 6) | (cc & 0x3F); /* add lower 6 bits from cont. byte */ |
| 75 | } | 67 | } |
| 76 | res |= ((utfint)(c & 0x7F) << (count * 5)); /* add first byte */ | 68 | res |= ((l_uint32)(c & 0x7F) << (count * 5)); /* add first byte */ |
| 77 | if (count > 5 || res > MAXUTF || res < limits[count]) | 69 | if (count > 5 || res > MAXUTF || res < limits[count]) |
| 78 | return NULL; /* invalid byte sequence */ | 70 | return NULL; /* invalid byte sequence */ |
| 79 | s += count; /* skip continuation bytes read */ | 71 | s += count; /* skip continuation bytes read */ |
| @@ -141,7 +133,7 @@ static int codepoint (lua_State *L) { | |||
| 141 | n = 0; /* count the number of returns */ | 133 | n = 0; /* count the number of returns */ |
| 142 | se = s + pose; /* string end */ | 134 | se = s + pose; /* string end */ |
| 143 | for (s += posi - 1; s < se;) { | 135 | for (s += posi - 1; s < se;) { |
| 144 | utfint code; | 136 | l_uint32 code; |
| 145 | s = utf8_decode(s, &code, !lax); | 137 | s = utf8_decode(s, &code, !lax); |
| 146 | if (s == NULL) | 138 | if (s == NULL) |
| 147 | return luaL_error(L, MSGInvalid); | 139 | return luaL_error(L, MSGInvalid); |
| @@ -243,7 +235,7 @@ static int iter_aux (lua_State *L, int strict) { | |||
| 243 | if (n >= len) /* (also handles original 'n' being negative) */ | 235 | if (n >= len) /* (also handles original 'n' being negative) */ |
| 244 | return 0; /* no more codepoints */ | 236 | return 0; /* no more codepoints */ |
| 245 | else { | 237 | else { |
| 246 | utfint code; | 238 | l_uint32 code; |
| 247 | const char *next = utf8_decode(s + n, &code, strict); | 239 | const char *next = utf8_decode(s + n, &code, strict); |
| 248 | if (next == NULL || iscontp(next)) | 240 | if (next == NULL || iscontp(next)) |
| 249 | return luaL_error(L, MSGInvalid); | 241 | return luaL_error(L, MSGInvalid); |
