diff options
author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 1997-09-16 16:25:59 -0300 |
---|---|---|
committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 1997-09-16 16:25:59 -0300 |
commit | 75ac0d217266dba48a887df96d37398140e22b9e (patch) | |
tree | 876365c903fcc411922c4e9ecd1a8cf41e5253d1 /lstrlib.c | |
parent | 9f3785a2f375989fbac59f82efe38768919cc334 (diff) | |
download | lua-75ac0d217266dba48a887df96d37398140e22b9e.tar.gz lua-75ac0d217266dba48a887df96d37398140e22b9e.tar.bz2 lua-75ac0d217266dba48a887df96d37398140e22b9e.zip |
Standard library for strings and pattern-matching
Diffstat (limited to 'lstrlib.c')
-rw-r--r-- | lstrlib.c | 534 |
1 files changed, 534 insertions, 0 deletions
diff --git a/lstrlib.c b/lstrlib.c new file mode 100644 index 00000000..eba282de --- /dev/null +++ b/lstrlib.c | |||
@@ -0,0 +1,534 @@ | |||
1 | /* | ||
2 | ** $Id: lstrlib.c,v 1.1 1997/08/14 19:47:57 roberto Exp roberto $ | ||
3 | ** Standard library for strings and pattern-matching | ||
4 | ** See Copyright Notice in lua.h | ||
5 | */ | ||
6 | |||
7 | |||
8 | #include <ctype.h> | ||
9 | #include <stdio.h> | ||
10 | #include <stdlib.h> | ||
11 | #include <string.h> | ||
12 | |||
13 | #include "lauxlib.h" | ||
14 | #include "lua.h" | ||
15 | #include "lualib.h" | ||
16 | |||
17 | |||
18 | struct lbuff { | ||
19 | char *b; | ||
20 | size_t max; | ||
21 | size_t size; | ||
22 | }; | ||
23 | |||
24 | static struct lbuff lbuffer = {NULL, 0, 0}; | ||
25 | |||
26 | |||
27 | static char *strbuffer (unsigned long size) | ||
28 | { | ||
29 | if (size > lbuffer.max) { | ||
30 | /* ANSI "realloc" doesn't need this test, but some machines (Sun!) | ||
31 | don't follow ANSI */ | ||
32 | lbuffer.b = (lbuffer.b) ? realloc(lbuffer.b, lbuffer.max=size) : | ||
33 | malloc(lbuffer.max=size); | ||
34 | if (lbuffer.b == NULL) | ||
35 | lua_error("memory overflow"); | ||
36 | } | ||
37 | return lbuffer.b; | ||
38 | } | ||
39 | |||
40 | |||
41 | static char *openspace (unsigned long size) | ||
42 | { | ||
43 | char *buff = strbuffer(lbuffer.size+size); | ||
44 | return buff+lbuffer.size; | ||
45 | } | ||
46 | |||
47 | |||
48 | char *luaI_addchar (int c) | ||
49 | { | ||
50 | if (lbuffer.size >= lbuffer.max) | ||
51 | strbuffer(lbuffer.max == 0 ? 100 : lbuffer.max*2); | ||
52 | lbuffer.b[lbuffer.size++] = c; | ||
53 | return lbuffer.b; | ||
54 | } | ||
55 | |||
56 | |||
57 | void luaI_emptybuff (void) | ||
58 | { | ||
59 | lbuffer.size = 0; /* prepare for next string */ | ||
60 | } | ||
61 | |||
62 | |||
63 | static void addnchar (char *s, int n) | ||
64 | { | ||
65 | char *b = openspace(n); | ||
66 | strncpy(b, s, n); | ||
67 | lbuffer.size += n; | ||
68 | } | ||
69 | |||
70 | |||
71 | static void addstr (char *s) | ||
72 | { | ||
73 | addnchar(s, strlen(s)); | ||
74 | } | ||
75 | |||
76 | |||
77 | static void str_len (void) | ||
78 | { | ||
79 | lua_pushnumber(strlen(luaL_check_string(1))); | ||
80 | } | ||
81 | |||
82 | |||
83 | static void str_sub (void) | ||
84 | { | ||
85 | char *s = luaL_check_string(1); | ||
86 | long l = strlen(s); | ||
87 | long start = (long)luaL_check_number(2); | ||
88 | long end = (long)luaL_opt_number(3, -1); | ||
89 | if (start < 0) start = l+start+1; | ||
90 | if (end < 0) end = l+end+1; | ||
91 | if (1 <= start && start <= end && end <= l) { | ||
92 | luaI_emptybuff(); | ||
93 | addnchar(s+start-1, end-start+1); | ||
94 | lua_pushstring(luaI_addchar(0)); | ||
95 | } | ||
96 | else lua_pushstring(""); | ||
97 | } | ||
98 | |||
99 | |||
100 | static void str_lower (void) | ||
101 | { | ||
102 | char *s; | ||
103 | luaI_emptybuff(); | ||
104 | for (s = luaL_check_string(1); *s; s++) | ||
105 | luaI_addchar(tolower((unsigned char)*s)); | ||
106 | lua_pushstring(luaI_addchar(0)); | ||
107 | } | ||
108 | |||
109 | |||
110 | static void str_upper (void) | ||
111 | { | ||
112 | char *s; | ||
113 | luaI_emptybuff(); | ||
114 | for (s = luaL_check_string(1); *s; s++) | ||
115 | luaI_addchar(toupper((unsigned char)*s)); | ||
116 | lua_pushstring(luaI_addchar(0)); | ||
117 | } | ||
118 | |||
119 | static void str_rep (void) | ||
120 | { | ||
121 | char *s = luaL_check_string(1); | ||
122 | int n = (int)luaL_check_number(2); | ||
123 | luaI_emptybuff(); | ||
124 | while (n-- > 0) | ||
125 | addstr(s); | ||
126 | lua_pushstring(luaI_addchar(0)); | ||
127 | } | ||
128 | |||
129 | |||
130 | static void str_ascii (void) | ||
131 | { | ||
132 | char *s = luaL_check_string(1); | ||
133 | long pos = (long)luaL_opt_number(2, 1) - 1; | ||
134 | luaL_arg_check(0<=pos && pos<strlen(s), 2, "out of range"); | ||
135 | lua_pushnumber((unsigned char)s[pos]); | ||
136 | } | ||
137 | |||
138 | |||
139 | |||
140 | /* | ||
141 | ** ======================================================= | ||
142 | ** PATTERN MATCHING | ||
143 | ** ======================================================= | ||
144 | */ | ||
145 | |||
146 | #define MAX_CAPT 9 | ||
147 | |||
148 | static struct { | ||
149 | char *init; | ||
150 | int len; /* -1 signals unfinished capture */ | ||
151 | } capture[MAX_CAPT]; | ||
152 | |||
153 | static int num_captures; /* only valid after a sucessful call to match */ | ||
154 | |||
155 | |||
156 | #define ESC '%' | ||
157 | #define SPECIALS "^$*?.([%-" | ||
158 | |||
159 | |||
160 | static void push_captures (void) | ||
161 | { | ||
162 | int i; | ||
163 | for (i=0; i<num_captures; i++) { | ||
164 | int l = capture[i].len; | ||
165 | char *buff = openspace(l+1); | ||
166 | if (l == -1) lua_error("unfinished capture"); | ||
167 | strncpy(buff, capture[i].init, l); | ||
168 | buff[l] = 0; | ||
169 | lua_pushstring(buff); | ||
170 | } | ||
171 | } | ||
172 | |||
173 | |||
174 | static int check_cap (int l, int level) | ||
175 | { | ||
176 | l -= '1'; | ||
177 | if (!(0 <= l && l < level && capture[l].len != -1)) | ||
178 | lua_error("invalid capture index"); | ||
179 | return l; | ||
180 | } | ||
181 | |||
182 | |||
183 | static int capture_to_close (int level) | ||
184 | { | ||
185 | for (level--; level>=0; level--) | ||
186 | if (capture[level].len == -1) return level; | ||
187 | lua_error("invalid pattern capture"); | ||
188 | return 0; /* to avoid warnings */ | ||
189 | } | ||
190 | |||
191 | |||
192 | static char *bracket_end (char *p) | ||
193 | { | ||
194 | return (*p == 0) ? NULL : strchr((*p=='^') ? p+2 : p+1, ']'); | ||
195 | } | ||
196 | |||
197 | |||
198 | static int matchclass (int c, int cl) | ||
199 | { | ||
200 | int res; | ||
201 | if (c == 0) return 0; | ||
202 | switch (tolower((unsigned char)cl)) { | ||
203 | case 'a' : res = isalpha((unsigned char)c); break; | ||
204 | case 'c' : res = iscntrl((unsigned char)c); break; | ||
205 | case 'd' : res = isdigit((unsigned char)c); break; | ||
206 | case 'l' : res = islower((unsigned char)c); break; | ||
207 | case 'p' : res = ispunct((unsigned char)c); break; | ||
208 | case 's' : res = isspace((unsigned char)c); break; | ||
209 | case 'u' : res = isupper((unsigned char)c); break; | ||
210 | case 'w' : res = isalnum((unsigned char)c); break; | ||
211 | default: return (cl == c); | ||
212 | } | ||
213 | return (islower((unsigned char)cl) ? res : !res); | ||
214 | } | ||
215 | |||
216 | |||
217 | int luaI_singlematch (int c, char *p, char **ep) | ||
218 | { | ||
219 | switch (*p) { | ||
220 | case '\0': | ||
221 | *ep = p; | ||
222 | return 0; | ||
223 | case '.': | ||
224 | *ep = p+1; | ||
225 | return (c != 0); | ||
226 | case ESC: | ||
227 | if (*(++p) == '\0') | ||
228 | luaL_verror("incorrect pattern (ends with `%c')", ESC); | ||
229 | *ep = p+1; | ||
230 | return matchclass(c, *p); | ||
231 | case '[': { | ||
232 | char *end = bracket_end(p+1); | ||
233 | int sig = *(p+1) == '^' ? (p++, 0) : 1; | ||
234 | if (end == NULL) lua_error("incorrect pattern (missing `]')"); | ||
235 | *ep = end+1; | ||
236 | if (c == 0) return 0; | ||
237 | while (++p < end) { | ||
238 | if (*p == ESC) { | ||
239 | if (((p+1) < end) && matchclass(c, *++p)) return sig; | ||
240 | } | ||
241 | else if ((*(p+1) == '-') && (p+2 < end)) { | ||
242 | p+=2; | ||
243 | if (*(p-2) <= c && c <= *p) return sig; | ||
244 | } | ||
245 | else if (*p == c) return sig; | ||
246 | } | ||
247 | return !sig; | ||
248 | } | ||
249 | default: | ||
250 | *ep = p+1; | ||
251 | return (*p == c); | ||
252 | } | ||
253 | } | ||
254 | |||
255 | |||
256 | static char *matchbalance (char *s, int b, int e) | ||
257 | { | ||
258 | if (*s != b) return NULL; | ||
259 | else { | ||
260 | int cont = 1; | ||
261 | while (*(++s)) { | ||
262 | if (*s == e) { | ||
263 | if (--cont == 0) return s+1; | ||
264 | } | ||
265 | else if (*s == b) cont++; | ||
266 | } | ||
267 | } | ||
268 | return NULL; /* string ends out of balance */ | ||
269 | } | ||
270 | |||
271 | |||
272 | static char *matchitem (char *s, char *p, int level, char **ep) | ||
273 | { | ||
274 | if (*p == ESC) { | ||
275 | p++; | ||
276 | if (isdigit((unsigned char)*p)) { /* capture */ | ||
277 | int l = check_cap(*p, level); | ||
278 | *ep = p+1; | ||
279 | if (strncmp(capture[l].init, s, capture[l].len) == 0) | ||
280 | return s+capture[l].len; | ||
281 | else return NULL; | ||
282 | } | ||
283 | else if (*p == 'b') { /* balanced string */ | ||
284 | p++; | ||
285 | if (*p == 0 || *(p+1) == 0) | ||
286 | lua_error("bad balanced pattern specification"); | ||
287 | *ep = p+2; | ||
288 | return matchbalance(s, *p, *(p+1)); | ||
289 | } | ||
290 | else p--; /* and go through */ | ||
291 | } | ||
292 | return (luaI_singlematch(*s, p, ep) ? s+1 : NULL); | ||
293 | } | ||
294 | |||
295 | |||
296 | static char *match (char *s, char *p, int level) | ||
297 | { | ||
298 | init: /* using goto's to optimize tail recursion */ | ||
299 | switch (*p) { | ||
300 | case '(': /* start capture */ | ||
301 | if (level >= MAX_CAPT) lua_error("too many captures"); | ||
302 | capture[level].init = s; | ||
303 | capture[level].len = -1; | ||
304 | level++; p++; goto init; /* return match(s, p+1, level); */ | ||
305 | case ')': { /* end capture */ | ||
306 | int l = capture_to_close(level); | ||
307 | char *res; | ||
308 | capture[l].len = s - capture[l].init; /* close capture */ | ||
309 | if ((res = match(s, p+1, level)) == NULL) /* match failed? */ | ||
310 | capture[l].len = -1; /* undo capture */ | ||
311 | return res; | ||
312 | } | ||
313 | case '\0': case '$': /* (possibly) end of pattern */ | ||
314 | if (*p == 0 || (*(p+1) == 0 && *s == 0)) { | ||
315 | num_captures = level; | ||
316 | return s; | ||
317 | } | ||
318 | /* else go through */ | ||
319 | default: { /* it is a pattern item */ | ||
320 | char *ep; /* get what is next */ | ||
321 | char *s1 = matchitem(s, p, level, &ep); | ||
322 | switch (*ep) { | ||
323 | case '*': { /* repetition */ | ||
324 | char *res; | ||
325 | if (s1 && (res = match(s1, p, level))) | ||
326 | return res; | ||
327 | p=ep+1; goto init; /* else return match(s, ep+1, level); */ | ||
328 | } | ||
329 | case '-': { /* repetition */ | ||
330 | char *res; | ||
331 | if ((res = match(s, ep+1, level)) != 0) | ||
332 | return res; | ||
333 | else if (s1) { | ||
334 | s = s1; | ||
335 | goto init; /* return match(s1, p, level); */ | ||
336 | } | ||
337 | else | ||
338 | return NULL; | ||
339 | } | ||
340 | case '?': { /* optional */ | ||
341 | char *res; | ||
342 | if (s1 && (res = match(s1, ep+1, level))) | ||
343 | return res; | ||
344 | p=ep+1; goto init; /* else return match(s, ep+1, level); */ | ||
345 | } | ||
346 | default: | ||
347 | if (s1) { s=s1; p=ep; goto init; } /* return match(s1, ep, level); */ | ||
348 | else return NULL; | ||
349 | } | ||
350 | } | ||
351 | } | ||
352 | } | ||
353 | |||
354 | |||
355 | static void str_find (void) | ||
356 | { | ||
357 | char *s = luaL_check_string(1); | ||
358 | char *p = luaL_check_string(2); | ||
359 | long init = (long)luaL_opt_number(3, 1) - 1; | ||
360 | luaL_arg_check(0 <= init && init <= strlen(s), 3, "out of range"); | ||
361 | if (lua_getparam(4) != LUA_NOOBJECT || | ||
362 | strpbrk(p, SPECIALS) == NULL) { /* no special caracters? */ | ||
363 | char *s2 = strstr(s+init, p); | ||
364 | if (s2) { | ||
365 | lua_pushnumber(s2-s+1); | ||
366 | lua_pushnumber(s2-s+strlen(p)); | ||
367 | } | ||
368 | } | ||
369 | else { | ||
370 | int anchor = (*p == '^') ? (p++, 1) : 0; | ||
371 | char *s1=s+init; | ||
372 | do { | ||
373 | char *res; | ||
374 | if ((res=match(s1, p, 0)) != NULL) { | ||
375 | lua_pushnumber(s1-s+1); /* start */ | ||
376 | lua_pushnumber(res-s); /* end */ | ||
377 | push_captures(); | ||
378 | return; | ||
379 | } | ||
380 | } while (*s1++ && !anchor); | ||
381 | } | ||
382 | } | ||
383 | |||
384 | |||
385 | static void add_s (lua_Object newp) | ||
386 | { | ||
387 | if (lua_isstring(newp)) { | ||
388 | char *news = lua_getstring(newp); | ||
389 | while (*news) { | ||
390 | if (*news != ESC || !isdigit((unsigned char)*++news)) | ||
391 | luaI_addchar(*news++); | ||
392 | else { | ||
393 | int l = check_cap(*news++, num_captures); | ||
394 | addnchar(capture[l].init, capture[l].len); | ||
395 | } | ||
396 | } | ||
397 | } | ||
398 | else if (lua_isfunction(newp)) { | ||
399 | lua_Object res; | ||
400 | struct lbuff oldbuff; | ||
401 | int status; | ||
402 | lua_beginblock(); | ||
403 | push_captures(); | ||
404 | /* function may use lbuffer, so save it and create a luaM_new one */ | ||
405 | oldbuff = lbuffer; | ||
406 | lbuffer.b = NULL; lbuffer.max = lbuffer.size = 0; | ||
407 | status = lua_callfunction(newp); | ||
408 | /* restore old buffer */ | ||
409 | free(lbuffer.b); | ||
410 | lbuffer = oldbuff; | ||
411 | if (status != 0) | ||
412 | lua_error(NULL); | ||
413 | res = lua_getresult(1); | ||
414 | addstr(lua_isstring(res) ? lua_getstring(res) : ""); | ||
415 | lua_endblock(); | ||
416 | } | ||
417 | else luaL_arg_check(0, 3, NULL); | ||
418 | } | ||
419 | |||
420 | |||
421 | static void str_gsub (void) | ||
422 | { | ||
423 | char *src = luaL_check_string(1); | ||
424 | char *p = luaL_check_string(2); | ||
425 | lua_Object newp = lua_getparam(3); | ||
426 | int max_s = (int)luaL_opt_number(4, strlen(src)+1); | ||
427 | int anchor = (*p == '^') ? (p++, 1) : 0; | ||
428 | int n = 0; | ||
429 | luaI_emptybuff(); | ||
430 | while (n < max_s) { | ||
431 | char *e = match(src, p, 0); | ||
432 | if (e) { | ||
433 | n++; | ||
434 | add_s(newp); | ||
435 | } | ||
436 | if (e && e>src) /* non empty match? */ | ||
437 | src = e; /* skip it */ | ||
438 | else if (*src) | ||
439 | luaI_addchar(*src++); | ||
440 | else break; | ||
441 | if (anchor) break; | ||
442 | } | ||
443 | addstr(src); | ||
444 | lua_pushstring(luaI_addchar(0)); | ||
445 | lua_pushnumber(n); /* number of substitutions */ | ||
446 | } | ||
447 | |||
448 | |||
449 | void luaI_addquoted (char *s) | ||
450 | { | ||
451 | luaI_addchar('"'); | ||
452 | for (; *s; s++) { | ||
453 | if (strchr("\"\\\n", *s)) | ||
454 | luaI_addchar('\\'); | ||
455 | luaI_addchar(*s); | ||
456 | } | ||
457 | luaI_addchar('"'); | ||
458 | } | ||
459 | |||
460 | #define MAX_FORMAT 200 | ||
461 | |||
462 | static void str_format (void) | ||
463 | { | ||
464 | int arg = 1; | ||
465 | char *strfrmt = luaL_check_string(arg); | ||
466 | luaI_emptybuff(); /* initialize */ | ||
467 | while (*strfrmt) { | ||
468 | if (*strfrmt != '%') | ||
469 | luaI_addchar(*strfrmt++); | ||
470 | else if (*++strfrmt == '%') | ||
471 | luaI_addchar(*strfrmt++); /* %% */ | ||
472 | else { /* format item */ | ||
473 | char form[MAX_FORMAT]; /* store the format ('%...') */ | ||
474 | char *buff; | ||
475 | char *initf = strfrmt; | ||
476 | form[0] = '%'; | ||
477 | strfrmt = match(strfrmt, "%d?%$?[-+ #]*(%d*)%.?(%d*)", 0); | ||
478 | if (capture[0].len > 3 || capture[1].len > 3) /* < 1000? */ | ||
479 | lua_error("invalid format (width or precision too long)"); | ||
480 | if (isdigit((unsigned char)initf[0]) && initf[1] == '$') { | ||
481 | arg = initf[0] - '0'; | ||
482 | initf += 2; /* skip the 'n$' */ | ||
483 | } | ||
484 | arg++; | ||
485 | strncpy(form+1, initf, strfrmt-initf+1); /* +1 to include convertion */ | ||
486 | form[strfrmt-initf+2] = 0; | ||
487 | buff = openspace(1000); /* to store the formated value */ | ||
488 | switch (*strfrmt++) { | ||
489 | case 'q': | ||
490 | luaI_addquoted(luaL_check_string(arg)); | ||
491 | continue; | ||
492 | case 's': { | ||
493 | char *s = luaL_check_string(arg); | ||
494 | buff = openspace(strlen(s)); | ||
495 | sprintf(buff, form, s); | ||
496 | break; | ||
497 | } | ||
498 | case 'c': case 'd': case 'i': case 'o': | ||
499 | case 'u': case 'x': case 'X': | ||
500 | sprintf(buff, form, (int)luaL_check_number(arg)); | ||
501 | break; | ||
502 | case 'e': case 'E': case 'f': case 'g': case 'G': | ||
503 | sprintf(buff, form, luaL_check_number(arg)); | ||
504 | break; | ||
505 | default: /* also treat cases 'pnLlh' */ | ||
506 | lua_error("invalid format option in function `format'"); | ||
507 | } | ||
508 | lbuffer.size += strlen(buff); | ||
509 | } | ||
510 | } | ||
511 | lua_pushstring(luaI_addchar(0)); /* push the result */ | ||
512 | } | ||
513 | |||
514 | |||
515 | static struct luaL_reg strlib[] = { | ||
516 | {"strlen", str_len}, | ||
517 | {"strsub", str_sub}, | ||
518 | {"strlower", str_lower}, | ||
519 | {"strupper", str_upper}, | ||
520 | {"strrep", str_rep}, | ||
521 | {"ascii", str_ascii}, | ||
522 | {"format", str_format}, | ||
523 | {"strfind", str_find}, | ||
524 | {"gsub", str_gsub} | ||
525 | }; | ||
526 | |||
527 | |||
528 | /* | ||
529 | ** Open string library | ||
530 | */ | ||
531 | void strlib_open (void) | ||
532 | { | ||
533 | luaL_openlib(strlib, (sizeof(strlib)/sizeof(strlib[0]))); | ||
534 | } | ||