summaryrefslogtreecommitdiff
path: root/src/lj_str.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lj_str.c')
-rw-r--r--src/lj_str.c301
1 files changed, 301 insertions, 0 deletions
diff --git a/src/lj_str.c b/src/lj_str.c
new file mode 100644
index 00000000..26f91cba
--- /dev/null
+++ b/src/lj_str.c
@@ -0,0 +1,301 @@
1/*
2** String handling.
3** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4**
5** Portions taken verbatim or adapted from the Lua interpreter.
6** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
7*/
8
9#include <stdio.h>
10
11#define lj_str_c
12#define LUA_CORE
13
14#include "lj_obj.h"
15#include "lj_gc.h"
16#include "lj_err.h"
17#include "lj_str.h"
18#include "lj_state.h"
19#include "lj_ctype.h"
20
21/* -- String interning ---------------------------------------------------- */
22
23/* Ordered compare of strings. Assumes string data is 4-byte aligned. */
24int32_t lj_str_cmp(GCstr *a, GCstr *b)
25{
26 MSize i, n = a->len > b->len ? b->len : a->len;
27 for (i = 0; i < n; i += 4) {
28 /* Note: innocuous access up to end of string + 3. */
29 uint32_t va = *(const uint32_t *)(strdata(a)+i);
30 uint32_t vb = *(const uint32_t *)(strdata(b)+i);
31 if (va != vb) {
32#if LJ_ARCH_ENDIAN == LUAJIT_LE
33 va = lj_bswap(va); vb = lj_bswap(vb);
34#endif
35 i -= n;
36 if ((int32_t)i >= -3) {
37 va >>= 32+(i<<3); vb >>= 32+(i<<3);
38 if (va == vb) break;
39 }
40 return (int32_t)(va - vb);
41 }
42 }
43 return (int32_t)(a->len - b->len);
44}
45
46/* Resize the string hash table (grow and shrink). */
47void lj_str_resize(lua_State *L, MSize newmask)
48{
49 global_State *g = G(L);
50 GCRef *newhash;
51 MSize i;
52 if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1)
53 return; /* No resizing during GC traversal or if already too big. */
54 newhash = lj_mem_newvec(L, newmask+1, GCRef);
55 memset(newhash, 0, (newmask+1)*sizeof(GCRef));
56 for (i = g->strmask; i != ~(MSize)0; i--) { /* Rehash old table. */
57 GCobj *p = gcref(g->strhash[i]);
58 while (p) { /* Follow each hash chain and reinsert all strings. */
59 MSize h = gco2str(p)->hash & newmask;
60 GCobj *next = gcnext(p);
61 /* NOBARRIER: The string table is a GC root. */
62 setgcrefr(p->gch.nextgc, newhash[h]);
63 setgcref(newhash[h], p);
64 p = next;
65 }
66 }
67 lj_mem_freevec(g, g->strhash, g->strmask+1, GCstr *);
68 g->strmask = newmask;
69 g->strhash = newhash;
70}
71
72/* Intern a string and return string object. */
73GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
74{
75 global_State *g;
76 GCstr *s;
77 GCobj *o;
78 MSize len = (MSize)lenx;
79 MSize h = len;
80 MSize step = (len>>5)+1; /* Partial hash. */
81 MSize l1;
82 if (lenx >= LJ_MAX_STR)
83 lj_err_msg(L, LJ_ERR_STROV);
84 for (l1 = len; l1 >= step; l1 -= step) /* Compute string hash. */
85 h = h ^ ((h<<5)+(h>>2)+cast(unsigned char, str[l1-1]));
86 /* Check if the string has already been interned. */
87 g = G(L);
88 for (o = gcref(g->strhash[h & g->strmask]); o != NULL; o = gcnext(o)) {
89 GCstr *tso = gco2str(o);
90 if (tso->len == len && (memcmp(str, strdata(tso), len) == 0)) {
91 if (isdead(g, o)) flipwhite(o); /* Resurrect if dead. */
92 return tso; /* Return existing string. */
93 }
94 }
95 /* Nope, create a new string. */
96 s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr);
97 newwhite(g, s);
98 s->gct = ~LJ_TSTR;
99 s->len = len;
100 s->hash = h;
101 s->reserved = 0;
102 memcpy(strdatawr(s), str, len);
103 strdatawr(s)[len] = '\0'; /* Zero-terminate string. */
104 /* Add it to string hash table. */
105 h &= g->strmask;
106 s->nextgc = g->strhash[h];
107 /* NOBARRIER: The string table is a GC root. */
108 setgcref(g->strhash[h], obj2gco(s));
109 if (g->strnum++ > g->strmask) /* Allow a 100% load factor. */
110 lj_str_resize(L, (g->strmask<<1)+1); /* Grow string table. */
111 return s; /* Return newly interned string. */
112}
113
114void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s)
115{
116 g->strnum--;
117 lj_mem_free(g, s, sizestring(s));
118}
119
120/* -- Type conversions ---------------------------------------------------- */
121
122/* Convert string to number. */
123int lj_str_numconv(const char *s, TValue *n)
124{
125 lua_Number sign = 1;
126 const uint8_t *p = (const uint8_t *)s;
127 while (lj_ctype_isspace(*p)) p++;
128 if (*p == '-') { p++; sign = -1; } else if (*p == '+') { p++; }
129 if ((uint32_t)(*p - '0') < 10) {
130 uint32_t k = (uint32_t)(*p++ - '0');
131 if (k == 0 && ((*p & ~0x20) == 'X')) {
132 p++;
133 while (lj_ctype_isxdigit(*p)) {
134 if (k >= 0x10000000) goto parsedbl;
135 k = (k << 4) + (*p & 15u);
136 if (!lj_ctype_isdigit(*p)) k += 9;
137 p++;
138 }
139 } else {
140 while ((uint32_t)(*p - '0') < 10) {
141 if (k >= 0x19999999) goto parsedbl;
142 k = k * 10u + (uint32_t)(*p++ - '0');
143 }
144 }
145 while (LJ_UNLIKELY(lj_ctype_isspace(*p))) p++;
146 if (LJ_LIKELY(*p == '\0')) {
147 setnumV(n, sign * cast_num(k));
148 return 1;
149 }
150 }
151parsedbl:
152 {
153 TValue tv;
154 char *endptr;
155 setnumV(&tv, lua_str2number(s, &endptr));
156 if (endptr == s) return 0; /* conversion failed */
157 if (LJ_UNLIKELY(*endptr != '\0')) {
158 while (lj_ctype_isspace((uint8_t)*endptr)) endptr++;
159 if (*endptr != '\0') return 0; /* invalid trailing characters? */
160 }
161 if (LJ_LIKELY(!tvisnan(&tv)))
162 setnumV(n, numV(&tv));
163 else
164 setnanV(n); /* Canonicalize injected NaNs. */
165 return 1;
166 }
167}
168
169/* Convert number to string. */
170GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np)
171{
172 char s[LUAI_MAXNUMBER2STR];
173 lua_Number n = *np;
174 size_t len = (size_t)lua_number2str(s, n);
175 return lj_str_new(L, s, len);
176}
177
178/* Convert integer to string. */
179GCstr *lj_str_fromint(lua_State *L, int32_t k)
180{
181 char s[1+10];
182 char *p = s+sizeof(s);
183 uint32_t i = (uint32_t)(k < 0 ? -k : k);
184 do { *--p = (char)('0' + i % 10); } while (i /= 10);
185 if (k < 0) *--p = '-';
186 return lj_str_new(L, p, (size_t)(s+sizeof(s)-p));
187}
188
189/* -- String formatting --------------------------------------------------- */
190
191static void addstr(lua_State *L, SBuf *sb, const char *str, MSize len)
192{
193 char *p;
194 MSize i;
195 if (sb->n + len > sb->sz) {
196 MSize sz = sb->sz * 2;
197 while (sb->n + len > sz) sz = sz * 2;
198 lj_str_resizebuf(L, sb, sz);
199 }
200 p = sb->buf + sb->n;
201 sb->n += len;
202 for (i = 0; i < len; i++) p[i] = str[i];
203}
204
205static void addchar(lua_State *L, SBuf *sb, int c)
206{
207 if (sb->n + 1 > sb->sz) {
208 MSize sz = sb->sz * 2;
209 lj_str_resizebuf(L, sb, sz);
210 }
211 sb->buf[sb->n++] = cast(char, c);
212}
213
214/* Push formatted message as a string object to Lua stack. va_list variant. */
215const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp)
216{
217 SBuf *sb = &G(L)->tmpbuf;
218 lj_str_needbuf(L, sb, (MSize)strlen(fmt));
219 lj_str_resetbuf(sb);
220 for (;;) {
221 const char *e = strchr(fmt, '%');
222 if (e == NULL) break;
223 addstr(L, sb, fmt, (MSize)(e-fmt));
224 /* This function only handles %s, %c, %d, %f and %p formats. */
225 switch (e[1]) {
226 case 's': {
227 const char *s = va_arg(argp, char *);
228 if (s == NULL) s = "(null)";
229 addstr(L, sb, s, (MSize)strlen(s));
230 break;
231 }
232 case 'c':
233 addchar(L, sb, va_arg(argp, int));
234 break;
235 case 'd': {
236 char buff[1+10];
237 char *p = buff+sizeof(buff);
238 int32_t k = va_arg(argp, int32_t);
239 uint32_t i = (uint32_t)(k < 0 ? -k : k);
240 do { *--p = (char)('0' + i % 10); } while (i /= 10);
241 if (k < 0) *--p = '-';
242 addstr(L, sb, p, (MSize)(buff+sizeof(buff)-p));
243 break;
244 }
245 case 'f': {
246 char buff[LUAI_MAXNUMBER2STR];
247 lua_Number n = cast_num(va_arg(argp, LUAI_UACNUMBER));
248 MSize len = (MSize)lua_number2str(buff, n);
249 addstr(L, sb, buff, len);
250 break;
251 }
252 case 'p': {
253#define FMTP_CHARS (2*sizeof(ptrdiff_t))
254 char buff[2+FMTP_CHARS];
255 ptrdiff_t p = (ptrdiff_t)(va_arg(argp, void *));
256 int i;
257 buff[0] = '0';
258 buff[1] = 'x';
259 for (i = 2+FMTP_CHARS-1; i >= 2; i--, p >>= 4)
260 buff[i] = "0123456789abcdef"[(p & 15)];
261 addstr(L, sb, buff, 2+FMTP_CHARS);
262 break;
263 }
264 case '%':
265 addchar(L, sb, '%');
266 break;
267 default:
268 addchar(L, sb, '%');
269 addchar(L, sb, e[1]);
270 break;
271 }
272 fmt = e+2;
273 }
274 addstr(L, sb, fmt, (MSize)strlen(fmt));
275 setstrV(L, L->top, lj_str_new(L, sb->buf, sb->n));
276 incr_top(L);
277 return strVdata(L->top - 1);
278}
279
280/* Push formatted message as a string object to Lua stack. Vararg variant. */
281const char *lj_str_pushf(lua_State *L, const char *fmt, ...)
282{
283 const char *msg;
284 va_list argp;
285 va_start(argp, fmt);
286 msg = lj_str_pushvf(L, fmt, argp);
287 va_end(argp);
288 return msg;
289}
290
291/* -- Buffer handling ----------------------------------------------------- */
292
293char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz)
294{
295 if (sz > sb->sz) {
296 if (sz < LJ_MIN_SBUF) sz = LJ_MIN_SBUF;
297 lj_str_resizebuf(L, sb, sz);
298 }
299 return sb->buf;
300}
301