aboutsummaryrefslogtreecommitdiff
path: root/lutf8lib.c
diff options
context:
space:
mode:
authorRoberto Ierusalimschy <roberto@inf.puc-rio.br>2024-06-20 14:46:06 -0300
committerRoberto Ierusalimschy <roberto@inf.puc-rio.br>2024-06-20 14:46:06 -0300
commita08d82eb132bfd9db5b91e0d5ebcb81d7b26dcd0 (patch)
tree45618815246686a535a28fb4e6f9736a60be00d4 /lutf8lib.c
parent55ac40f859ad8e28fe71a8801d49f4a4140e8aa3 (diff)
downloadlua-a08d82eb132bfd9db5b91e0d5ebcb81d7b26dcd0.tar.gz
lua-a08d82eb132bfd9db5b91e0d5ebcb81d7b26dcd0.tar.bz2
lua-a08d82eb132bfd9db5b91e0d5ebcb81d7b26dcd0.zip
llimits.h being used by all Lua code
The definitions in llimits.h are useful not only for the core. That header only defines types and '#define's, so libs and core still do not share any real code/data.
Diffstat (limited to 'lutf8lib.c')
-rw-r--r--lutf8lib.c24
1 files changed, 8 insertions, 16 deletions
diff --git a/lutf8lib.c b/lutf8lib.c
index 7b747937..243196c8 100644
--- a/lutf8lib.c
+++ b/lutf8lib.c
@@ -19,6 +19,7 @@
19 19
20#include "lauxlib.h" 20#include "lauxlib.h"
21#include "lualib.h" 21#include "lualib.h"
22#include "llimits.h"
22 23
23 24
24#define MAXUNICODE 0x10FFFFu 25#define MAXUNICODE 0x10FFFFu
@@ -28,15 +29,6 @@
28 29
29#define MSGInvalid "invalid UTF-8 code" 30#define MSGInvalid "invalid UTF-8 code"
30 31
31/*
32** Integer type for decoded UTF-8 values; MAXUTF needs 31 bits.
33*/
34#if (UINT_MAX >> 30) >= 1
35typedef unsigned int utfint;
36#else
37typedef unsigned long utfint;
38#endif
39
40 32
41#define iscont(c) (((c) & 0xC0) == 0x80) 33#define iscont(c) (((c) & 0xC0) == 0x80)
42#define iscontp(p) iscont(*(p)) 34#define iscontp(p) iscont(*(p))
@@ -58,11 +50,11 @@ static lua_Integer u_posrelat (lua_Integer pos, size_t len) {
58** entry forces an error for non-ascii bytes with no continuation 50** entry forces an error for non-ascii bytes with no continuation
59** bytes (count == 0). 51** bytes (count == 0).
60*/ 52*/
61static const char *utf8_decode (const char *s, utfint *val, int strict) { 53static const char *utf8_decode (const char *s, l_uint32 *val, int strict) {
62 static const utfint limits[] = 54 static const l_uint32 limits[] =
63 {~(utfint)0, 0x80, 0x800, 0x10000u, 0x200000u, 0x4000000u}; 55 {~(l_uint32)0, 0x80, 0x800, 0x10000u, 0x200000u, 0x4000000u};
64 unsigned int c = (unsigned char)s[0]; 56 unsigned int c = (unsigned char)s[0];
65 utfint res = 0; /* final result */ 57 l_uint32 res = 0; /* final result */
66 if (c < 0x80) /* ascii? */ 58 if (c < 0x80) /* ascii? */
67 res = c; 59 res = c;
68 else { 60 else {
@@ -73,7 +65,7 @@ static const char *utf8_decode (const char *s, utfint *val, int strict) {
73 return NULL; /* invalid byte sequence */ 65 return NULL; /* invalid byte sequence */
74 res = (res << 6) | (cc & 0x3F); /* add lower 6 bits from cont. byte */ 66 res = (res << 6) | (cc & 0x3F); /* add lower 6 bits from cont. byte */
75 } 67 }
76 res |= ((utfint)(c & 0x7F) << (count * 5)); /* add first byte */ 68 res |= ((l_uint32)(c & 0x7F) << (count * 5)); /* add first byte */
77 if (count > 5 || res > MAXUTF || res < limits[count]) 69 if (count > 5 || res > MAXUTF || res < limits[count])
78 return NULL; /* invalid byte sequence */ 70 return NULL; /* invalid byte sequence */
79 s += count; /* skip continuation bytes read */ 71 s += count; /* skip continuation bytes read */
@@ -141,7 +133,7 @@ static int codepoint (lua_State *L) {
141 n = 0; /* count the number of returns */ 133 n = 0; /* count the number of returns */
142 se = s + pose; /* string end */ 134 se = s + pose; /* string end */
143 for (s += posi - 1; s < se;) { 135 for (s += posi - 1; s < se;) {
144 utfint code; 136 l_uint32 code;
145 s = utf8_decode(s, &code, !lax); 137 s = utf8_decode(s, &code, !lax);
146 if (s == NULL) 138 if (s == NULL)
147 return luaL_error(L, MSGInvalid); 139 return luaL_error(L, MSGInvalid);
@@ -243,7 +235,7 @@ static int iter_aux (lua_State *L, int strict) {
243 if (n >= len) /* (also handles original 'n' being negative) */ 235 if (n >= len) /* (also handles original 'n' being negative) */
244 return 0; /* no more codepoints */ 236 return 0; /* no more codepoints */
245 else { 237 else {
246 utfint code; 238 l_uint32 code;
247 const char *next = utf8_decode(s + n, &code, strict); 239 const char *next = utf8_decode(s + n, &code, strict);
248 if (next == NULL || iscontp(next)) 240 if (next == NULL || iscontp(next))
249 return luaL_error(L, MSGInvalid); 241 return luaL_error(L, MSGInvalid);