diff options
Diffstat (limited to 'src/lj_str.c')
-rw-r--r-- | src/lj_str.c | 301 |
1 files changed, 301 insertions, 0 deletions
diff --git a/src/lj_str.c b/src/lj_str.c new file mode 100644 index 00000000..26f91cba --- /dev/null +++ b/src/lj_str.c | |||
@@ -0,0 +1,301 @@ | |||
1 | /* | ||
2 | ** String handling. | ||
3 | ** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h | ||
4 | ** | ||
5 | ** Portions taken verbatim or adapted from the Lua interpreter. | ||
6 | ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h | ||
7 | */ | ||
8 | |||
9 | #include <stdio.h> | ||
10 | |||
11 | #define lj_str_c | ||
12 | #define LUA_CORE | ||
13 | |||
14 | #include "lj_obj.h" | ||
15 | #include "lj_gc.h" | ||
16 | #include "lj_err.h" | ||
17 | #include "lj_str.h" | ||
18 | #include "lj_state.h" | ||
19 | #include "lj_ctype.h" | ||
20 | |||
21 | /* -- String interning ---------------------------------------------------- */ | ||
22 | |||
23 | /* Ordered compare of strings. Assumes string data is 4-byte aligned. */ | ||
24 | int32_t lj_str_cmp(GCstr *a, GCstr *b) | ||
25 | { | ||
26 | MSize i, n = a->len > b->len ? b->len : a->len; | ||
27 | for (i = 0; i < n; i += 4) { | ||
28 | /* Note: innocuous access up to end of string + 3. */ | ||
29 | uint32_t va = *(const uint32_t *)(strdata(a)+i); | ||
30 | uint32_t vb = *(const uint32_t *)(strdata(b)+i); | ||
31 | if (va != vb) { | ||
32 | #if LJ_ARCH_ENDIAN == LUAJIT_LE | ||
33 | va = lj_bswap(va); vb = lj_bswap(vb); | ||
34 | #endif | ||
35 | i -= n; | ||
36 | if ((int32_t)i >= -3) { | ||
37 | va >>= 32+(i<<3); vb >>= 32+(i<<3); | ||
38 | if (va == vb) break; | ||
39 | } | ||
40 | return (int32_t)(va - vb); | ||
41 | } | ||
42 | } | ||
43 | return (int32_t)(a->len - b->len); | ||
44 | } | ||
45 | |||
46 | /* Resize the string hash table (grow and shrink). */ | ||
47 | void lj_str_resize(lua_State *L, MSize newmask) | ||
48 | { | ||
49 | global_State *g = G(L); | ||
50 | GCRef *newhash; | ||
51 | MSize i; | ||
52 | if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1) | ||
53 | return; /* No resizing during GC traversal or if already too big. */ | ||
54 | newhash = lj_mem_newvec(L, newmask+1, GCRef); | ||
55 | memset(newhash, 0, (newmask+1)*sizeof(GCRef)); | ||
56 | for (i = g->strmask; i != ~(MSize)0; i--) { /* Rehash old table. */ | ||
57 | GCobj *p = gcref(g->strhash[i]); | ||
58 | while (p) { /* Follow each hash chain and reinsert all strings. */ | ||
59 | MSize h = gco2str(p)->hash & newmask; | ||
60 | GCobj *next = gcnext(p); | ||
61 | /* NOBARRIER: The string table is a GC root. */ | ||
62 | setgcrefr(p->gch.nextgc, newhash[h]); | ||
63 | setgcref(newhash[h], p); | ||
64 | p = next; | ||
65 | } | ||
66 | } | ||
67 | lj_mem_freevec(g, g->strhash, g->strmask+1, GCstr *); | ||
68 | g->strmask = newmask; | ||
69 | g->strhash = newhash; | ||
70 | } | ||
71 | |||
72 | /* Intern a string and return string object. */ | ||
73 | GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx) | ||
74 | { | ||
75 | global_State *g; | ||
76 | GCstr *s; | ||
77 | GCobj *o; | ||
78 | MSize len = (MSize)lenx; | ||
79 | MSize h = len; | ||
80 | MSize step = (len>>5)+1; /* Partial hash. */ | ||
81 | MSize l1; | ||
82 | if (lenx >= LJ_MAX_STR) | ||
83 | lj_err_msg(L, LJ_ERR_STROV); | ||
84 | for (l1 = len; l1 >= step; l1 -= step) /* Compute string hash. */ | ||
85 | h = h ^ ((h<<5)+(h>>2)+cast(unsigned char, str[l1-1])); | ||
86 | /* Check if the string has already been interned. */ | ||
87 | g = G(L); | ||
88 | for (o = gcref(g->strhash[h & g->strmask]); o != NULL; o = gcnext(o)) { | ||
89 | GCstr *tso = gco2str(o); | ||
90 | if (tso->len == len && (memcmp(str, strdata(tso), len) == 0)) { | ||
91 | if (isdead(g, o)) flipwhite(o); /* Resurrect if dead. */ | ||
92 | return tso; /* Return existing string. */ | ||
93 | } | ||
94 | } | ||
95 | /* Nope, create a new string. */ | ||
96 | s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr); | ||
97 | newwhite(g, s); | ||
98 | s->gct = ~LJ_TSTR; | ||
99 | s->len = len; | ||
100 | s->hash = h; | ||
101 | s->reserved = 0; | ||
102 | memcpy(strdatawr(s), str, len); | ||
103 | strdatawr(s)[len] = '\0'; /* Zero-terminate string. */ | ||
104 | /* Add it to string hash table. */ | ||
105 | h &= g->strmask; | ||
106 | s->nextgc = g->strhash[h]; | ||
107 | /* NOBARRIER: The string table is a GC root. */ | ||
108 | setgcref(g->strhash[h], obj2gco(s)); | ||
109 | if (g->strnum++ > g->strmask) /* Allow a 100% load factor. */ | ||
110 | lj_str_resize(L, (g->strmask<<1)+1); /* Grow string table. */ | ||
111 | return s; /* Return newly interned string. */ | ||
112 | } | ||
113 | |||
114 | void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s) | ||
115 | { | ||
116 | g->strnum--; | ||
117 | lj_mem_free(g, s, sizestring(s)); | ||
118 | } | ||
119 | |||
120 | /* -- Type conversions ---------------------------------------------------- */ | ||
121 | |||
122 | /* Convert string to number. */ | ||
123 | int lj_str_numconv(const char *s, TValue *n) | ||
124 | { | ||
125 | lua_Number sign = 1; | ||
126 | const uint8_t *p = (const uint8_t *)s; | ||
127 | while (lj_ctype_isspace(*p)) p++; | ||
128 | if (*p == '-') { p++; sign = -1; } else if (*p == '+') { p++; } | ||
129 | if ((uint32_t)(*p - '0') < 10) { | ||
130 | uint32_t k = (uint32_t)(*p++ - '0'); | ||
131 | if (k == 0 && ((*p & ~0x20) == 'X')) { | ||
132 | p++; | ||
133 | while (lj_ctype_isxdigit(*p)) { | ||
134 | if (k >= 0x10000000) goto parsedbl; | ||
135 | k = (k << 4) + (*p & 15u); | ||
136 | if (!lj_ctype_isdigit(*p)) k += 9; | ||
137 | p++; | ||
138 | } | ||
139 | } else { | ||
140 | while ((uint32_t)(*p - '0') < 10) { | ||
141 | if (k >= 0x19999999) goto parsedbl; | ||
142 | k = k * 10u + (uint32_t)(*p++ - '0'); | ||
143 | } | ||
144 | } | ||
145 | while (LJ_UNLIKELY(lj_ctype_isspace(*p))) p++; | ||
146 | if (LJ_LIKELY(*p == '\0')) { | ||
147 | setnumV(n, sign * cast_num(k)); | ||
148 | return 1; | ||
149 | } | ||
150 | } | ||
151 | parsedbl: | ||
152 | { | ||
153 | TValue tv; | ||
154 | char *endptr; | ||
155 | setnumV(&tv, lua_str2number(s, &endptr)); | ||
156 | if (endptr == s) return 0; /* conversion failed */ | ||
157 | if (LJ_UNLIKELY(*endptr != '\0')) { | ||
158 | while (lj_ctype_isspace((uint8_t)*endptr)) endptr++; | ||
159 | if (*endptr != '\0') return 0; /* invalid trailing characters? */ | ||
160 | } | ||
161 | if (LJ_LIKELY(!tvisnan(&tv))) | ||
162 | setnumV(n, numV(&tv)); | ||
163 | else | ||
164 | setnanV(n); /* Canonicalize injected NaNs. */ | ||
165 | return 1; | ||
166 | } | ||
167 | } | ||
168 | |||
169 | /* Convert number to string. */ | ||
170 | GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np) | ||
171 | { | ||
172 | char s[LUAI_MAXNUMBER2STR]; | ||
173 | lua_Number n = *np; | ||
174 | size_t len = (size_t)lua_number2str(s, n); | ||
175 | return lj_str_new(L, s, len); | ||
176 | } | ||
177 | |||
178 | /* Convert integer to string. */ | ||
179 | GCstr *lj_str_fromint(lua_State *L, int32_t k) | ||
180 | { | ||
181 | char s[1+10]; | ||
182 | char *p = s+sizeof(s); | ||
183 | uint32_t i = (uint32_t)(k < 0 ? -k : k); | ||
184 | do { *--p = (char)('0' + i % 10); } while (i /= 10); | ||
185 | if (k < 0) *--p = '-'; | ||
186 | return lj_str_new(L, p, (size_t)(s+sizeof(s)-p)); | ||
187 | } | ||
188 | |||
189 | /* -- String formatting --------------------------------------------------- */ | ||
190 | |||
191 | static void addstr(lua_State *L, SBuf *sb, const char *str, MSize len) | ||
192 | { | ||
193 | char *p; | ||
194 | MSize i; | ||
195 | if (sb->n + len > sb->sz) { | ||
196 | MSize sz = sb->sz * 2; | ||
197 | while (sb->n + len > sz) sz = sz * 2; | ||
198 | lj_str_resizebuf(L, sb, sz); | ||
199 | } | ||
200 | p = sb->buf + sb->n; | ||
201 | sb->n += len; | ||
202 | for (i = 0; i < len; i++) p[i] = str[i]; | ||
203 | } | ||
204 | |||
205 | static void addchar(lua_State *L, SBuf *sb, int c) | ||
206 | { | ||
207 | if (sb->n + 1 > sb->sz) { | ||
208 | MSize sz = sb->sz * 2; | ||
209 | lj_str_resizebuf(L, sb, sz); | ||
210 | } | ||
211 | sb->buf[sb->n++] = cast(char, c); | ||
212 | } | ||
213 | |||
214 | /* Push formatted message as a string object to Lua stack. va_list variant. */ | ||
215 | const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp) | ||
216 | { | ||
217 | SBuf *sb = &G(L)->tmpbuf; | ||
218 | lj_str_needbuf(L, sb, (MSize)strlen(fmt)); | ||
219 | lj_str_resetbuf(sb); | ||
220 | for (;;) { | ||
221 | const char *e = strchr(fmt, '%'); | ||
222 | if (e == NULL) break; | ||
223 | addstr(L, sb, fmt, (MSize)(e-fmt)); | ||
224 | /* This function only handles %s, %c, %d, %f and %p formats. */ | ||
225 | switch (e[1]) { | ||
226 | case 's': { | ||
227 | const char *s = va_arg(argp, char *); | ||
228 | if (s == NULL) s = "(null)"; | ||
229 | addstr(L, sb, s, (MSize)strlen(s)); | ||
230 | break; | ||
231 | } | ||
232 | case 'c': | ||
233 | addchar(L, sb, va_arg(argp, int)); | ||
234 | break; | ||
235 | case 'd': { | ||
236 | char buff[1+10]; | ||
237 | char *p = buff+sizeof(buff); | ||
238 | int32_t k = va_arg(argp, int32_t); | ||
239 | uint32_t i = (uint32_t)(k < 0 ? -k : k); | ||
240 | do { *--p = (char)('0' + i % 10); } while (i /= 10); | ||
241 | if (k < 0) *--p = '-'; | ||
242 | addstr(L, sb, p, (MSize)(buff+sizeof(buff)-p)); | ||
243 | break; | ||
244 | } | ||
245 | case 'f': { | ||
246 | char buff[LUAI_MAXNUMBER2STR]; | ||
247 | lua_Number n = cast_num(va_arg(argp, LUAI_UACNUMBER)); | ||
248 | MSize len = (MSize)lua_number2str(buff, n); | ||
249 | addstr(L, sb, buff, len); | ||
250 | break; | ||
251 | } | ||
252 | case 'p': { | ||
253 | #define FMTP_CHARS (2*sizeof(ptrdiff_t)) | ||
254 | char buff[2+FMTP_CHARS]; | ||
255 | ptrdiff_t p = (ptrdiff_t)(va_arg(argp, void *)); | ||
256 | int i; | ||
257 | buff[0] = '0'; | ||
258 | buff[1] = 'x'; | ||
259 | for (i = 2+FMTP_CHARS-1; i >= 2; i--, p >>= 4) | ||
260 | buff[i] = "0123456789abcdef"[(p & 15)]; | ||
261 | addstr(L, sb, buff, 2+FMTP_CHARS); | ||
262 | break; | ||
263 | } | ||
264 | case '%': | ||
265 | addchar(L, sb, '%'); | ||
266 | break; | ||
267 | default: | ||
268 | addchar(L, sb, '%'); | ||
269 | addchar(L, sb, e[1]); | ||
270 | break; | ||
271 | } | ||
272 | fmt = e+2; | ||
273 | } | ||
274 | addstr(L, sb, fmt, (MSize)strlen(fmt)); | ||
275 | setstrV(L, L->top, lj_str_new(L, sb->buf, sb->n)); | ||
276 | incr_top(L); | ||
277 | return strVdata(L->top - 1); | ||
278 | } | ||
279 | |||
280 | /* Push formatted message as a string object to Lua stack. Vararg variant. */ | ||
281 | const char *lj_str_pushf(lua_State *L, const char *fmt, ...) | ||
282 | { | ||
283 | const char *msg; | ||
284 | va_list argp; | ||
285 | va_start(argp, fmt); | ||
286 | msg = lj_str_pushvf(L, fmt, argp); | ||
287 | va_end(argp); | ||
288 | return msg; | ||
289 | } | ||
290 | |||
291 | /* -- Buffer handling ----------------------------------------------------- */ | ||
292 | |||
293 | char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz) | ||
294 | { | ||
295 | if (sz > sb->sz) { | ||
296 | if (sz < LJ_MIN_SBUF) sz = LJ_MIN_SBUF; | ||
297 | lj_str_resizebuf(L, sb, sz); | ||
298 | } | ||
299 | return sb->buf; | ||
300 | } | ||
301 | |||