diff options
Diffstat (limited to 'src/lj_str.c')
-rw-r--r-- | src/lj_str.c | 519 |
1 files changed, 275 insertions, 244 deletions
diff --git a/src/lj_str.c b/src/lj_str.c index 60912aed..a5282da6 100644 --- a/src/lj_str.c +++ b/src/lj_str.c | |||
@@ -1,13 +1,8 @@ | |||
1 | /* | 1 | /* |
2 | ** String handling. | 2 | ** String handling. |
3 | ** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h | 3 | ** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h |
4 | ** | ||
5 | ** Portions taken verbatim or adapted from the Lua interpreter. | ||
6 | ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h | ||
7 | */ | 4 | */ |
8 | 5 | ||
9 | #include <stdio.h> | ||
10 | |||
11 | #define lj_str_c | 6 | #define lj_str_c |
12 | #define LUA_CORE | 7 | #define LUA_CORE |
13 | 8 | ||
@@ -15,10 +10,10 @@ | |||
15 | #include "lj_gc.h" | 10 | #include "lj_gc.h" |
16 | #include "lj_err.h" | 11 | #include "lj_err.h" |
17 | #include "lj_str.h" | 12 | #include "lj_str.h" |
18 | #include "lj_state.h" | ||
19 | #include "lj_char.h" | 13 | #include "lj_char.h" |
14 | #include "lj_prng.h" | ||
20 | 15 | ||
21 | /* -- String interning ---------------------------------------------------- */ | 16 | /* -- String helpers ------------------------------------------------------ */ |
22 | 17 | ||
23 | /* Ordered compare of strings. Assumes string data is 4-byte aligned. */ | 18 | /* Ordered compare of strings. Assumes string data is 4-byte aligned. */ |
24 | int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b) | 19 | int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b) |
@@ -43,297 +38,333 @@ int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b) | |||
43 | return (int32_t)(a->len - b->len); | 38 | return (int32_t)(a->len - b->len); |
44 | } | 39 | } |
45 | 40 | ||
46 | /* Fast string data comparison. Caveat: unaligned access to 1st string! */ | 41 | /* Find fixed string p inside string s. */ |
47 | static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len) | 42 | const char *lj_str_find(const char *s, const char *p, MSize slen, MSize plen) |
48 | { | 43 | { |
49 | MSize i = 0; | 44 | if (plen <= slen) { |
50 | lua_assert(len > 0); | 45 | if (plen == 0) { |
51 | lua_assert((((uintptr_t)a+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4); | 46 | return s; |
52 | do { /* Note: innocuous access up to end of string + 3. */ | 47 | } else { |
53 | uint32_t v = lj_getu32(a+i) ^ *(const uint32_t *)(b+i); | 48 | int c = *(const uint8_t *)p++; |
54 | if (v) { | 49 | plen--; slen -= plen; |
55 | i -= len; | 50 | while (slen) { |
56 | #if LJ_LE | 51 | const char *q = (const char *)memchr(s, c, slen); |
57 | return (int32_t)i >= -3 ? (v << (32+(i<<3))) : 1; | 52 | if (!q) break; |
58 | #else | 53 | if (memcmp(q+1, p, plen) == 0) return q; |
59 | return (int32_t)i >= -3 ? (v >> (32+(i<<3))) : 1; | 54 | q++; slen -= (MSize)(q-s); s = q; |
60 | #endif | 55 | } |
61 | } | 56 | } |
62 | i += 4; | 57 | } |
63 | } while (i < len); | 58 | return NULL; |
64 | return 0; | ||
65 | } | 59 | } |
66 | 60 | ||
67 | /* Resize the string hash table (grow and shrink). */ | 61 | /* Check whether a string has a pattern matching character. */ |
68 | void lj_str_resize(lua_State *L, MSize newmask) | 62 | int lj_str_haspattern(GCstr *s) |
69 | { | 63 | { |
70 | global_State *g = G(L); | 64 | const char *p = strdata(s), *q = p + s->len; |
71 | GCRef *newhash; | 65 | while (p < q) { |
72 | MSize i; | 66 | int c = *(const uint8_t *)p++; |
73 | if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1) | 67 | if (lj_char_ispunct(c) && strchr("^$*+?.([%-", c)) |
74 | return; /* No resizing during GC traversal or if already too big. */ | 68 | return 1; /* Found a pattern matching char. */ |
75 | newhash = lj_mem_newvec(L, newmask+1, GCRef); | ||
76 | memset(newhash, 0, (newmask+1)*sizeof(GCRef)); | ||
77 | for (i = g->strmask; i != ~(MSize)0; i--) { /* Rehash old table. */ | ||
78 | GCobj *p = gcref(g->strhash[i]); | ||
79 | while (p) { /* Follow each hash chain and reinsert all strings. */ | ||
80 | MSize h = gco2str(p)->hash & newmask; | ||
81 | GCobj *next = gcnext(p); | ||
82 | /* NOBARRIER: The string table is a GC root. */ | ||
83 | setgcrefr(p->gch.nextgc, newhash[h]); | ||
84 | setgcref(newhash[h], p); | ||
85 | p = next; | ||
86 | } | ||
87 | } | 69 | } |
88 | lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef); | 70 | return 0; /* No pattern matching chars found. */ |
89 | g->strmask = newmask; | ||
90 | g->strhash = newhash; | ||
91 | } | 71 | } |
92 | 72 | ||
93 | /* Intern a string and return string object. */ | 73 | /* -- String hashing ------------------------------------------------------ */ |
94 | GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx) | 74 | |
75 | /* Keyed sparse ARX string hash. Constant time. */ | ||
76 | static StrHash hash_sparse(uint64_t seed, const char *str, MSize len) | ||
95 | { | 77 | { |
96 | global_State *g; | 78 | /* Constants taken from lookup3 hash by Bob Jenkins. */ |
97 | GCstr *s; | 79 | StrHash a, b, h = len ^ (StrHash)seed; |
98 | GCobj *o; | ||
99 | MSize len = (MSize)lenx; | ||
100 | MSize a, b, h = len; | ||
101 | if (lenx >= LJ_MAX_STR) | ||
102 | lj_err_msg(L, LJ_ERR_STROV); | ||
103 | g = G(L); | ||
104 | /* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */ | ||
105 | if (len >= 4) { /* Caveat: unaligned access! */ | 80 | if (len >= 4) { /* Caveat: unaligned access! */ |
106 | a = lj_getu32(str); | 81 | a = lj_getu32(str); |
107 | h ^= lj_getu32(str+len-4); | 82 | h ^= lj_getu32(str+len-4); |
108 | b = lj_getu32(str+(len>>1)-2); | 83 | b = lj_getu32(str+(len>>1)-2); |
109 | h ^= b; h -= lj_rol(b, 14); | 84 | h ^= b; h -= lj_rol(b, 14); |
110 | b += lj_getu32(str+(len>>2)-1); | 85 | b += lj_getu32(str+(len>>2)-1); |
111 | } else if (len > 0) { | 86 | } else { |
112 | a = *(const uint8_t *)str; | 87 | a = *(const uint8_t *)str; |
113 | h ^= *(const uint8_t *)(str+len-1); | 88 | h ^= *(const uint8_t *)(str+len-1); |
114 | b = *(const uint8_t *)(str+(len>>1)); | 89 | b = *(const uint8_t *)(str+(len>>1)); |
115 | h ^= b; h -= lj_rol(b, 14); | 90 | h ^= b; h -= lj_rol(b, 14); |
116 | } else { | ||
117 | return &g->strempty; | ||
118 | } | 91 | } |
119 | a ^= h; a -= lj_rol(h, 11); | 92 | a ^= h; a -= lj_rol(h, 11); |
120 | b ^= a; b -= lj_rol(a, 25); | 93 | b ^= a; b -= lj_rol(a, 25); |
121 | h ^= b; h -= lj_rol(b, 16); | 94 | h ^= b; h -= lj_rol(b, 16); |
122 | /* Check if the string has already been interned. */ | 95 | return h; |
123 | o = gcref(g->strhash[h & g->strmask]); | ||
124 | if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) { | ||
125 | while (o != NULL) { | ||
126 | GCstr *sx = gco2str(o); | ||
127 | if (sx->len == len && str_fastcmp(str, strdata(sx), len) == 0) { | ||
128 | /* Resurrect if dead. Can only happen with fixstring() (keywords). */ | ||
129 | if (isdead(g, o)) flipwhite(o); | ||
130 | return sx; /* Return existing string. */ | ||
131 | } | ||
132 | o = gcnext(o); | ||
133 | } | ||
134 | } else { /* Slow path: end of string is too close to a page boundary. */ | ||
135 | while (o != NULL) { | ||
136 | GCstr *sx = gco2str(o); | ||
137 | if (sx->len == len && memcmp(str, strdata(sx), len) == 0) { | ||
138 | /* Resurrect if dead. Can only happen with fixstring() (keywords). */ | ||
139 | if (isdead(g, o)) flipwhite(o); | ||
140 | return sx; /* Return existing string. */ | ||
141 | } | ||
142 | o = gcnext(o); | ||
143 | } | ||
144 | } | ||
145 | /* Nope, create a new string. */ | ||
146 | s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr); | ||
147 | newwhite(g, s); | ||
148 | s->gct = ~LJ_TSTR; | ||
149 | s->len = len; | ||
150 | s->hash = h; | ||
151 | s->reserved = 0; | ||
152 | memcpy(strdatawr(s), str, len); | ||
153 | strdatawr(s)[len] = '\0'; /* Zero-terminate string. */ | ||
154 | /* Add it to string hash table. */ | ||
155 | h &= g->strmask; | ||
156 | s->nextgc = g->strhash[h]; | ||
157 | /* NOBARRIER: The string table is a GC root. */ | ||
158 | setgcref(g->strhash[h], obj2gco(s)); | ||
159 | if (g->strnum++ > g->strmask) /* Allow a 100% load factor. */ | ||
160 | lj_str_resize(L, (g->strmask<<1)+1); /* Grow string table. */ | ||
161 | return s; /* Return newly interned string. */ | ||
162 | } | 96 | } |
163 | 97 | ||
164 | void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s) | 98 | #if LUAJIT_SECURITY_STRHASH |
99 | /* Keyed dense ARX string hash. Linear time. */ | ||
100 | static LJ_NOINLINE StrHash hash_dense(uint64_t seed, StrHash h, | ||
101 | const char *str, MSize len) | ||
165 | { | 102 | { |
166 | g->strnum--; | 103 | StrHash b = lj_bswap(lj_rol(h ^ (StrHash)(seed >> 32), 4)); |
167 | lj_mem_free(g, s, sizestring(s)); | 104 | if (len > 12) { |
105 | StrHash a = (StrHash)seed; | ||
106 | const char *pe = str+len-12, *p = pe, *q = str; | ||
107 | do { | ||
108 | a += lj_getu32(p); | ||
109 | b += lj_getu32(p+4); | ||
110 | h += lj_getu32(p+8); | ||
111 | p = q; q += 12; | ||
112 | h ^= b; h -= lj_rol(b, 14); | ||
113 | a ^= h; a -= lj_rol(h, 11); | ||
114 | b ^= a; b -= lj_rol(a, 25); | ||
115 | } while (p < pe); | ||
116 | h ^= b; h -= lj_rol(b, 16); | ||
117 | a ^= h; a -= lj_rol(h, 4); | ||
118 | b ^= a; b -= lj_rol(a, 14); | ||
119 | } | ||
120 | return b; | ||
168 | } | 121 | } |
122 | #endif | ||
169 | 123 | ||
170 | /* -- Type conversions ---------------------------------------------------- */ | 124 | /* -- String interning ---------------------------------------------------- */ |
171 | 125 | ||
172 | /* Print number to buffer. Canonicalizes non-finite values. */ | 126 | #define LJ_STR_MAXCOLL 32 |
173 | size_t LJ_FASTCALL lj_str_bufnum(char *s, cTValue *o) | ||
174 | { | ||
175 | if (LJ_LIKELY((o->u32.hi << 1) < 0xffe00000)) { /* Finite? */ | ||
176 | lua_Number n = o->n; | ||
177 | #if __BIONIC__ | ||
178 | if (tvismzero(o)) { s[0] = '-'; s[1] = '0'; return 2; } | ||
179 | #endif | ||
180 | return (size_t)lua_number2str(s, n); | ||
181 | } else if (((o->u32.hi & 0x000fffff) | o->u32.lo) != 0) { | ||
182 | s[0] = 'n'; s[1] = 'a'; s[2] = 'n'; return 3; | ||
183 | } else if ((o->u32.hi & 0x80000000) == 0) { | ||
184 | s[0] = 'i'; s[1] = 'n'; s[2] = 'f'; return 3; | ||
185 | } else { | ||
186 | s[0] = '-'; s[1] = 'i'; s[2] = 'n'; s[3] = 'f'; return 4; | ||
187 | } | ||
188 | } | ||
189 | 127 | ||
190 | /* Print integer to buffer. Returns pointer to start. */ | 128 | /* Resize the string interning hash table (grow and shrink). */ |
191 | char * LJ_FASTCALL lj_str_bufint(char *p, int32_t k) | 129 | void lj_str_resize(lua_State *L, MSize newmask) |
192 | { | 130 | { |
193 | uint32_t u = (uint32_t)(k < 0 ? -k : k); | 131 | global_State *g = G(L); |
194 | p += 1+10; | 132 | GCRef *newtab, *oldtab = g->str.tab; |
195 | do { *--p = (char)('0' + u % 10); } while (u /= 10); | 133 | MSize i; |
196 | if (k < 0) *--p = '-'; | ||
197 | return p; | ||
198 | } | ||
199 | 134 | ||
200 | /* Convert number to string. */ | 135 | /* No resizing during GC traversal or if already too big. */ |
201 | GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np) | 136 | if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1) |
202 | { | 137 | return; |
203 | char buf[LJ_STR_NUMBUF]; | ||
204 | size_t len = lj_str_bufnum(buf, (TValue *)np); | ||
205 | return lj_str_new(L, buf, len); | ||
206 | } | ||
207 | 138 | ||
208 | /* Convert integer to string. */ | 139 | newtab = lj_mem_newvec(L, newmask+1, GCRef); |
209 | GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k) | 140 | memset(newtab, 0, (newmask+1)*sizeof(GCRef)); |
210 | { | ||
211 | char s[1+10]; | ||
212 | char *p = lj_str_bufint(s, k); | ||
213 | return lj_str_new(L, p, (size_t)(s+sizeof(s)-p)); | ||
214 | } | ||
215 | 141 | ||
216 | GCstr * LJ_FASTCALL lj_str_fromnumber(lua_State *L, cTValue *o) | 142 | #if LUAJIT_SECURITY_STRHASH |
217 | { | 143 | /* Check which chains need secondary hashes. */ |
218 | return tvisint(o) ? lj_str_fromint(L, intV(o)) : lj_str_fromnum(L, &o->n); | 144 | if (g->str.second) { |
219 | } | 145 | int newsecond = 0; |
146 | /* Compute primary chain lengths. */ | ||
147 | for (i = g->str.mask; i != ~(MSize)0; i--) { | ||
148 | GCobj *o = (GCobj *)(gcrefu(oldtab[i]) & ~(uintptr_t)1); | ||
149 | while (o) { | ||
150 | GCstr *s = gco2str(o); | ||
151 | MSize hash = s->hashalg ? hash_sparse(g->str.seed, strdata(s), s->len) : | ||
152 | s->hash; | ||
153 | hash &= newmask; | ||
154 | setgcrefp(newtab[hash], gcrefu(newtab[hash]) + 1); | ||
155 | o = gcnext(o); | ||
156 | } | ||
157 | } | ||
158 | /* Mark secondary chains. */ | ||
159 | for (i = newmask; i != ~(MSize)0; i--) { | ||
160 | int secondary = gcrefu(newtab[i]) > LJ_STR_MAXCOLL; | ||
161 | newsecond |= secondary; | ||
162 | setgcrefp(newtab[i], secondary); | ||
163 | } | ||
164 | g->str.second = newsecond; | ||
165 | } | ||
166 | #endif | ||
220 | 167 | ||
221 | /* -- String formatting --------------------------------------------------- */ | 168 | /* Reinsert all strings from the old table into the new table. */ |
169 | for (i = g->str.mask; i != ~(MSize)0; i--) { | ||
170 | GCobj *o = (GCobj *)(gcrefu(oldtab[i]) & ~(uintptr_t)1); | ||
171 | while (o) { | ||
172 | GCobj *next = gcnext(o); | ||
173 | GCstr *s = gco2str(o); | ||
174 | MSize hash = s->hash; | ||
175 | #if LUAJIT_SECURITY_STRHASH | ||
176 | uintptr_t u; | ||
177 | if (LJ_LIKELY(!s->hashalg)) { /* String hashed with primary hash. */ | ||
178 | hash &= newmask; | ||
179 | u = gcrefu(newtab[hash]); | ||
180 | if (LJ_UNLIKELY(u & 1)) { /* Switch string to secondary hash. */ | ||
181 | s->hash = hash = hash_dense(g->str.seed, s->hash, strdata(s), s->len); | ||
182 | s->hashalg = 1; | ||
183 | hash &= newmask; | ||
184 | u = gcrefu(newtab[hash]); | ||
185 | } | ||
186 | } else { /* String hashed with secondary hash. */ | ||
187 | MSize shash = hash_sparse(g->str.seed, strdata(s), s->len); | ||
188 | u = gcrefu(newtab[shash & newmask]); | ||
189 | if (u & 1) { | ||
190 | hash &= newmask; | ||
191 | u = gcrefu(newtab[hash]); | ||
192 | } else { /* Revert string back to primary hash. */ | ||
193 | s->hash = shash; | ||
194 | s->hashalg = 0; | ||
195 | hash = (shash & newmask); | ||
196 | } | ||
197 | } | ||
198 | /* NOBARRIER: The string table is a GC root. */ | ||
199 | setgcrefp(o->gch.nextgc, (u & ~(uintptr_t)1)); | ||
200 | setgcrefp(newtab[hash], ((uintptr_t)o | (u & 1))); | ||
201 | #else | ||
202 | hash &= newmask; | ||
203 | /* NOBARRIER: The string table is a GC root. */ | ||
204 | setgcrefr(o->gch.nextgc, newtab[hash]); | ||
205 | setgcref(newtab[hash], o); | ||
206 | #endif | ||
207 | o = next; | ||
208 | } | ||
209 | } | ||
210 | |||
211 | /* Free old table and replace with new table. */ | ||
212 | lj_str_freetab(g); | ||
213 | g->str.tab = newtab; | ||
214 | g->str.mask = newmask; | ||
215 | } | ||
222 | 216 | ||
223 | static void addstr(lua_State *L, SBuf *sb, const char *str, MSize len) | 217 | #if LUAJIT_SECURITY_STRHASH |
218 | /* Rehash and rechain all strings in a chain. */ | ||
219 | static LJ_NOINLINE GCstr *lj_str_rehash_chain(lua_State *L, StrHash hashc, | ||
220 | const char *str, MSize len) | ||
224 | { | 221 | { |
225 | char *p; | 222 | global_State *g = G(L); |
226 | MSize i; | 223 | int ow = g->gc.state == GCSsweepstring ? otherwhite(g) : 0; /* Sweeping? */ |
227 | if (sb->n + len > sb->sz) { | 224 | GCRef *strtab = g->str.tab; |
228 | MSize sz = sb->sz * 2; | 225 | MSize strmask = g->str.mask; |
229 | while (sb->n + len > sz) sz = sz * 2; | 226 | GCobj *o = gcref(strtab[hashc & strmask]); |
230 | lj_str_resizebuf(L, sb, sz); | 227 | setgcrefp(strtab[hashc & strmask], (void *)((uintptr_t)1)); |
228 | g->str.second = 1; | ||
229 | while (o) { | ||
230 | uintptr_t u; | ||
231 | GCobj *next = gcnext(o); | ||
232 | GCstr *s = gco2str(o); | ||
233 | StrHash hash; | ||
234 | if (ow) { /* Must sweep while rechaining. */ | ||
235 | if (((o->gch.marked ^ LJ_GC_WHITES) & ow)) { /* String alive? */ | ||
236 | lj_assertG(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED), | ||
237 | "sweep of undead string"); | ||
238 | makewhite(g, o); | ||
239 | } else { /* Free dead string. */ | ||
240 | lj_assertG(isdead(g, o) || ow == LJ_GC_SFIXED, | ||
241 | "sweep of unlive string"); | ||
242 | lj_str_free(g, s); | ||
243 | o = next; | ||
244 | continue; | ||
245 | } | ||
246 | } | ||
247 | hash = s->hash; | ||
248 | if (!s->hashalg) { /* Rehash with secondary hash. */ | ||
249 | hash = hash_dense(g->str.seed, hash, strdata(s), s->len); | ||
250 | s->hash = hash; | ||
251 | s->hashalg = 1; | ||
252 | } | ||
253 | /* Rechain. */ | ||
254 | hash &= strmask; | ||
255 | u = gcrefu(strtab[hash]); | ||
256 | setgcrefp(o->gch.nextgc, (u & ~(uintptr_t)1)); | ||
257 | setgcrefp(strtab[hash], ((uintptr_t)o | (u & 1))); | ||
258 | o = next; | ||
231 | } | 259 | } |
232 | p = sb->buf + sb->n; | 260 | /* Try to insert the pending string again. */ |
233 | sb->n += len; | 261 | return lj_str_new(L, str, len); |
234 | for (i = 0; i < len; i++) p[i] = str[i]; | ||
235 | } | 262 | } |
263 | #endif | ||
264 | |||
265 | /* Reseed String ID from PRNG after random interval < 2^bits. */ | ||
266 | #if LUAJIT_SECURITY_STRID == 1 | ||
267 | #define STRID_RESEED_INTERVAL 8 | ||
268 | #elif LUAJIT_SECURITY_STRID == 2 | ||
269 | #define STRID_RESEED_INTERVAL 4 | ||
270 | #elif LUAJIT_SECURITY_STRID >= 3 | ||
271 | #define STRID_RESEED_INTERVAL 0 | ||
272 | #endif | ||
236 | 273 | ||
237 | static void addchar(lua_State *L, SBuf *sb, int c) | 274 | /* Allocate a new string and add to string interning table. */ |
275 | static GCstr *lj_str_alloc(lua_State *L, const char *str, MSize len, | ||
276 | StrHash hash, int hashalg) | ||
238 | { | 277 | { |
239 | if (sb->n + 1 > sb->sz) { | 278 | GCstr *s = lj_mem_newt(L, lj_str_size(len), GCstr); |
240 | MSize sz = sb->sz * 2; | 279 | global_State *g = G(L); |
241 | lj_str_resizebuf(L, sb, sz); | 280 | uintptr_t u; |
281 | newwhite(g, s); | ||
282 | s->gct = ~LJ_TSTR; | ||
283 | s->len = len; | ||
284 | s->hash = hash; | ||
285 | #ifndef STRID_RESEED_INTERVAL | ||
286 | s->sid = g->str.id++; | ||
287 | #elif STRID_RESEED_INTERVAL | ||
288 | if (!g->str.idreseed--) { | ||
289 | uint64_t r = lj_prng_u64(&g->prng); | ||
290 | g->str.id = (StrID)r; | ||
291 | g->str.idreseed = (uint8_t)(r >> (64 - STRID_RESEED_INTERVAL)); | ||
242 | } | 292 | } |
243 | sb->buf[sb->n++] = (char)c; | 293 | s->sid = g->str.id++; |
294 | #else | ||
295 | s->sid = (StrID)lj_prng_u64(&g->prng); | ||
296 | #endif | ||
297 | s->reserved = 0; | ||
298 | s->hashalg = (uint8_t)hashalg; | ||
299 | /* Clear last 4 bytes of allocated memory. Implies zero-termination, too. */ | ||
300 | *(uint32_t *)(strdatawr(s)+(len & ~(MSize)3)) = 0; | ||
301 | memcpy(strdatawr(s), str, len); | ||
302 | /* Add to string hash table. */ | ||
303 | hash &= g->str.mask; | ||
304 | u = gcrefu(g->str.tab[hash]); | ||
305 | setgcrefp(s->nextgc, (u & ~(uintptr_t)1)); | ||
306 | /* NOBARRIER: The string table is a GC root. */ | ||
307 | setgcrefp(g->str.tab[hash], ((uintptr_t)s | (u & 1))); | ||
308 | if (g->str.num++ > g->str.mask) /* Allow a 100% load factor. */ | ||
309 | lj_str_resize(L, (g->str.mask<<1)+1); /* Grow string table. */ | ||
310 | return s; /* Return newly interned string. */ | ||
244 | } | 311 | } |
245 | 312 | ||
246 | /* Push formatted message as a string object to Lua stack. va_list variant. */ | 313 | /* Intern a string and return string object. */ |
247 | const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp) | 314 | GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx) |
248 | { | 315 | { |
249 | SBuf *sb = &G(L)->tmpbuf; | 316 | global_State *g = G(L); |
250 | lj_str_needbuf(L, sb, (MSize)strlen(fmt)); | 317 | if (lenx-1 < LJ_MAX_STR-1) { |
251 | lj_str_resetbuf(sb); | 318 | MSize len = (MSize)lenx; |
252 | for (;;) { | 319 | StrHash hash = hash_sparse(g->str.seed, str, len); |
253 | const char *e = strchr(fmt, '%'); | 320 | MSize coll = 0; |
254 | if (e == NULL) break; | 321 | int hashalg = 0; |
255 | addstr(L, sb, fmt, (MSize)(e-fmt)); | 322 | /* Check if the string has already been interned. */ |
256 | /* This function only handles %s, %c, %d, %f and %p formats. */ | 323 | GCobj *o = gcref(g->str.tab[hash & g->str.mask]); |
257 | switch (e[1]) { | 324 | #if LUAJIT_SECURITY_STRHASH |
258 | case 's': { | 325 | if (LJ_UNLIKELY((uintptr_t)o & 1)) { /* Secondary hash for this chain? */ |
259 | const char *s = va_arg(argp, char *); | 326 | hashalg = 1; |
260 | if (s == NULL) s = "(null)"; | 327 | hash = hash_dense(g->str.seed, hash, str, len); |
261 | addstr(L, sb, s, (MSize)strlen(s)); | 328 | o = (GCobj *)(gcrefu(g->str.tab[hash & g->str.mask]) & ~(uintptr_t)1); |
262 | break; | 329 | } |
263 | } | ||
264 | case 'c': | ||
265 | addchar(L, sb, va_arg(argp, int)); | ||
266 | break; | ||
267 | case 'd': { | ||
268 | char buf[LJ_STR_INTBUF]; | ||
269 | char *p = lj_str_bufint(buf, va_arg(argp, int32_t)); | ||
270 | addstr(L, sb, p, (MSize)(buf+LJ_STR_INTBUF-p)); | ||
271 | break; | ||
272 | } | ||
273 | case 'f': { | ||
274 | char buf[LJ_STR_NUMBUF]; | ||
275 | TValue tv; | ||
276 | MSize len; | ||
277 | tv.n = (lua_Number)(va_arg(argp, LUAI_UACNUMBER)); | ||
278 | len = (MSize)lj_str_bufnum(buf, &tv); | ||
279 | addstr(L, sb, buf, len); | ||
280 | break; | ||
281 | } | ||
282 | case 'p': { | ||
283 | #define FMTP_CHARS (2*sizeof(ptrdiff_t)) | ||
284 | char buf[2+FMTP_CHARS]; | ||
285 | ptrdiff_t p = (ptrdiff_t)(va_arg(argp, void *)); | ||
286 | ptrdiff_t i, lasti = 2+FMTP_CHARS; | ||
287 | if (p == 0) { | ||
288 | addstr(L, sb, "NULL", 4); | ||
289 | break; | ||
290 | } | ||
291 | #if LJ_64 | ||
292 | /* Shorten output for 64 bit pointers. */ | ||
293 | lasti = 2+2*4+((p >> 32) ? 2+2*(lj_fls((uint32_t)(p >> 32))>>3) : 0); | ||
294 | #endif | 330 | #endif |
295 | buf[0] = '0'; | 331 | while (o != NULL) { |
296 | buf[1] = 'x'; | 332 | GCstr *sx = gco2str(o); |
297 | for (i = lasti-1; i >= 2; i--, p >>= 4) | 333 | if (sx->hash == hash && sx->len == len) { |
298 | buf[i] = "0123456789abcdef"[(p & 15)]; | 334 | if (memcmp(str, strdata(sx), len) == 0) { |
299 | addstr(L, sb, buf, (MSize)lasti); | 335 | if (isdead(g, o)) flipwhite(o); /* Resurrect if dead. */ |
300 | break; | 336 | return sx; /* Return existing string. */ |
337 | } | ||
338 | coll++; | ||
301 | } | 339 | } |
302 | case '%': | 340 | coll++; |
303 | addchar(L, sb, '%'); | 341 | o = gcnext(o); |
304 | break; | 342 | } |
305 | default: | 343 | #if LUAJIT_SECURITY_STRHASH |
306 | addchar(L, sb, '%'); | 344 | /* Rehash chain if there are too many collisions. */ |
307 | addchar(L, sb, e[1]); | 345 | if (LJ_UNLIKELY(coll > LJ_STR_MAXCOLL) && !hashalg) { |
308 | break; | 346 | return lj_str_rehash_chain(L, hash, str, len); |
309 | } | 347 | } |
310 | fmt = e+2; | 348 | #endif |
349 | /* Otherwise allocate a new string. */ | ||
350 | return lj_str_alloc(L, str, len, hash, hashalg); | ||
351 | } else { | ||
352 | if (lenx) | ||
353 | lj_err_msg(L, LJ_ERR_STROV); | ||
354 | return &g->strempty; | ||
311 | } | 355 | } |
312 | addstr(L, sb, fmt, (MSize)strlen(fmt)); | ||
313 | setstrV(L, L->top, lj_str_new(L, sb->buf, sb->n)); | ||
314 | incr_top(L); | ||
315 | return strVdata(L->top - 1); | ||
316 | } | 356 | } |
317 | 357 | ||
318 | /* Push formatted message as a string object to Lua stack. Vararg variant. */ | 358 | void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s) |
319 | const char *lj_str_pushf(lua_State *L, const char *fmt, ...) | ||
320 | { | 359 | { |
321 | const char *msg; | 360 | g->str.num--; |
322 | va_list argp; | 361 | lj_mem_free(g, s, lj_str_size(s->len)); |
323 | va_start(argp, fmt); | ||
324 | msg = lj_str_pushvf(L, fmt, argp); | ||
325 | va_end(argp); | ||
326 | return msg; | ||
327 | } | 362 | } |
328 | 363 | ||
329 | /* -- Buffer handling ----------------------------------------------------- */ | 364 | void LJ_FASTCALL lj_str_init(lua_State *L) |
330 | |||
331 | char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz) | ||
332 | { | 365 | { |
333 | if (sz > sb->sz) { | 366 | global_State *g = G(L); |
334 | if (sz < LJ_MIN_SBUF) sz = LJ_MIN_SBUF; | 367 | g->str.seed = lj_prng_u64(&g->prng); |
335 | lj_str_resizebuf(L, sb, sz); | 368 | lj_str_resize(L, LJ_MIN_STRTAB-1); |
336 | } | ||
337 | return sb->buf; | ||
338 | } | 369 | } |
339 | 370 | ||