aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoberto Ierusalimschy <roberto@inf.puc-rio.br>1996-08-01 11:55:33 -0300
committerRoberto Ierusalimschy <roberto@inf.puc-rio.br>1996-08-01 11:55:33 -0300
commit1630c2533a99e808ecf4649b637f45c964b1c477 (patch)
tree302dbc8cff93763066fa754a2464496ac51898da
parent1d373d77de72ee8baacf69ba53e9e61af6b9caa3 (diff)
downloadlua-1630c2533a99e808ecf4649b637f45c964b1c477.tar.gz
lua-1630c2533a99e808ecf4649b637f45c964b1c477.tar.bz2
lua-1630c2533a99e808ecf4649b637f45c964b1c477.zip
Pattern-Matching!
plus several other changes...
-rw-r--r--strlib.c494
1 files changed, 359 insertions, 135 deletions
diff --git a/strlib.c b/strlib.c
index 061581e3..1fab7a20 100644
--- a/strlib.c
+++ b/strlib.c
@@ -3,42 +3,68 @@
3** String library to LUA 3** String library to LUA
4*/ 4*/
5 5
6char *rcs_strlib="$Id: strlib.c,v 1.23 1996/04/30 21:13:55 roberto Exp roberto $"; 6char *rcs_strlib="$Id: strlib.c,v 1.24 1996/05/22 21:59:07 roberto Exp roberto $";
7 7
8#include <string.h> 8#include <string.h>
9#include <stdio.h> 9#include <stdio.h>
10#include <stdlib.h> 10#include <stdlib.h>
11#include <ctype.h> 11#include <ctype.h>
12#include <limits.h>
13 12
14#include "lua.h" 13#include "lua.h"
15#include "lualib.h" 14#include "lualib.h"
16 15
17 16
18void lua_arg_error(char *funcname) 17static char *buffer = NULL;
18static size_t maxbuff = 0;
19static size_t buff_size = 0;
20
21
22static char *lua_strbuffer (unsigned long size)
23{
24 if (size > maxbuff) {
25 buffer = (buffer) ? realloc(buffer, maxbuff=size) : malloc(maxbuff=size);
26 if (buffer == NULL)
27 lua_error("memory overflow");
28 }
29 return buffer;
30}
31
32static char *openspace (unsigned long size)
19{ 33{
20 char buff[100]; 34 char *buff = lua_strbuffer(buff_size+size);
21 sprintf(buff, "incorrect arguments to function `%s'", funcname); 35 return buff+buff_size;
22 lua_error(buff); 36}
37
38void lua_arg_check(int cond, char *funcname)
39{
40 if (!cond) {
41 char buff[100];
42 sprintf(buff, "incorrect argument to function `%s'", funcname);
43 lua_error(buff);
44 }
23} 45}
24 46
25char *lua_check_string (int numArg, char *funcname) 47char *lua_check_string (int numArg, char *funcname)
26{ 48{
27 lua_Object o = lua_getparam(numArg); 49 lua_Object o = lua_getparam(numArg);
28 if (!lua_isstring(o)) 50 lua_arg_check(lua_isstring(o), funcname);
29 lua_arg_error(funcname);
30 return lua_getstring(o); 51 return lua_getstring(o);
31} 52}
32 53
54char *lua_opt_string (int numArg, char *def, char *funcname)
55{
56 return (lua_getparam(numArg) == LUA_NOOBJECT) ? def :
57 lua_check_string(numArg, funcname);
58}
59
33double lua_check_number (int numArg, char *funcname) 60double lua_check_number (int numArg, char *funcname)
34{ 61{
35 lua_Object o = lua_getparam(numArg); 62 lua_Object o = lua_getparam(numArg);
36 if (!lua_isnumber(o)) 63 lua_arg_check(lua_isnumber(o), funcname);
37 lua_arg_error(funcname);
38 return lua_getnumber(o); 64 return lua_getnumber(o);
39} 65}
40 66
41static long lua_opt_number (int numArg, long def, char *funcname) 67long lua_opt_number (int numArg, long def, char *funcname)
42{ 68{
43 return (lua_getparam(numArg) == LUA_NOOBJECT) ? def : 69 return (lua_getparam(numArg) == LUA_NOOBJECT) ? def :
44 (long)lua_check_number(numArg, funcname); 70 (long)lua_check_number(numArg, funcname);
@@ -46,54 +72,45 @@ static long lua_opt_number (int numArg, long def, char *funcname)
46 72
47char *luaI_addchar (int c) 73char *luaI_addchar (int c)
48{ 74{
49 static char *buff = NULL; 75 if (buff_size >= maxbuff)
50 static size_t max = 0; 76 lua_strbuffer(maxbuff == 0 ? 100 : maxbuff*2);
51 static size_t n = 0; 77 buffer[buff_size++] = c;
52 if (n >= max)
53 {
54 if (max == 0)
55 {
56 max = 100;
57 buff = (char *)malloc(max);
58 }
59 else
60 {
61 max *= 2;
62 buff = (char *)realloc(buff, max);
63 }
64 if (buff == NULL)
65 lua_error("memory overflow");
66 }
67 buff[n++] = c;
68 if (c == 0) 78 if (c == 0)
69 n = 0; /* prepare for next string */ 79 buff_size = 0; /* prepare for next string */
70 return buff; 80 return buffer;
81}
82
83static void addnchar (char *s, int n)
84{
85 char *b = openspace(n);
86 strncpy(b, s, n);
87 buff_size += n;
71} 88}
72 89
73 90
74/* 91/*
75** Return the position of the first caracter of a substring into a string 92** Interface to strtok
76** LUA interface:
77** n = strfind (string, substring, init, end)
78*/ 93*/
79static void str_find (void) 94static void str_tok (void)
80{ 95{
81 char *s1 = lua_check_string(1, "strfind"); 96 char *s1 = lua_check_string(1, "strtok");
82 char *s2 = lua_check_string(2, "strfind"); 97 char *del = lua_check_string(2, "strtok");
83 long init = lua_opt_number(3, 1, "strfind") - 1; 98 lua_Object t = lua_createtable();
84 char *f = (init>=0 && init<=strlen(s1)) ? strstr(s1+init,s2) : NULL; 99 int i = 1;
85 if (f != NULL) 100 /* As strtok changes s1, and s1 is "constant", make a copy of it */
86 { 101 s1 = strcpy(lua_strbuffer(strlen(s1+1)), s1);
87 size_t pos = f-s1+1; 102 while ((s1 = strtok(s1, del)) != NULL) {
88 if (lua_opt_number(4, LONG_MAX, "strfind") >= pos+strlen(s2)-1) 103 lua_pushobject(t);
89 lua_pushnumber (pos); 104 lua_pushnumber(i++);
90 else 105 lua_pushstring(s1);
91 lua_pushnil(); 106 lua_storesubscript();
92 } 107 s1 = NULL; /* prepare for next strtok */
93 else 108 }
94 lua_pushnil(); 109 lua_pushobject(t);
110 lua_pushnumber(i-1); /* total number of tokens */
95} 111}
96 112
113
97/* 114/*
98** Return the string length 115** Return the string length
99** LUA interface: 116** LUA interface:
@@ -101,11 +118,9 @@ static void str_find (void)
101*/ 118*/
102static void str_len (void) 119static void str_len (void)
103{ 120{
104 char *s = lua_check_string(1, "strlen"); 121 lua_pushnumber(strlen(lua_check_string(1, "strlen")));
105 lua_pushnumber(strlen(s));
106} 122}
107 123
108
109/* 124/*
110** Return the substring of a string, from start to end 125** Return the substring of a string, from start to end
111** LUA interface: 126** LUA interface:
@@ -113,136 +128,342 @@ static void str_len (void)
113*/ 128*/
114static void str_sub (void) 129static void str_sub (void)
115{ 130{
116 char *s = lua_check_string(1, "strsub"); 131 char *s = lua_check_string(1, "strsub");
117 long start = (long)lua_check_number(2, "strsub"); 132 long start = (long)lua_check_number(2, "strsub");
118 long end = lua_opt_number(3, strlen(s), "strsub"); 133 long end = lua_opt_number(3, strlen(s), "strsub");
119 if (end < start || start < 1 || end > strlen(s)) 134 if (1 <= start && start <= end && end <= strlen(s)) {
120 lua_pushliteral(""); 135 luaI_addchar(0);
121 else 136 addnchar(s+start-1, end-start+1);
122 { 137 lua_pushstring(luaI_addchar(0));
123 luaI_addchar(0); 138 }
124 while (start <= end) 139 else lua_pushliteral("");
125 luaI_addchar(s[start++ - 1]);
126 lua_pushstring (luaI_addchar(0));
127 }
128} 140}
129 141
130/* 142/*
131** Convert a string to lower case. 143** Transliterate a string
132** LUA interface:
133** lowercase = strlower (string)
134*/ 144*/
135static void str_lower (void) 145static void str_map (void)
136{ 146{
137 char *s = lua_check_string(1, "strlower"); 147 char *s = lua_check_string(1, "strmap");
138 luaI_addchar(0); 148 char *from = lua_check_string(2, "strmap");
139 while (*s) 149 char *to = lua_opt_string(3, "", "strmap");
140 luaI_addchar(tolower(*s++)); 150 long len = strlen(to);
151 for (luaI_addchar(0); *s; s++) {
152 char *f = strrchr(from, *s);
153 if (f == NULL)
154 luaI_addchar(*s);
155 else {
156 long pos = f-from;
157 if (pos < len)
158 luaI_addchar(to[pos]);
159 }
160 }
141 lua_pushstring(luaI_addchar(0)); 161 lua_pushstring(luaI_addchar(0));
142} 162}
143 163
144
145/* 164/*
146** Convert a string to upper case. 165** get ascii value of a character in a string
147** LUA interface:
148** uppercase = strupper (string)
149*/ 166*/
150static void str_upper (void) 167static void str_ascii (void)
168{
169 char *s = lua_check_string(1, "ascii");
170 long pos = lua_opt_number(2, 1, "ascii") - 1;
171 lua_arg_check(0<=pos && pos<strlen(s), "ascii");
172 lua_pushnumber((unsigned char)s[pos]);
173}
174
175
176/* pattern matching */
177
178#define ESC '%'
179#define SPECIALS "^$*?.([%"
180
181static char *item_end (char *p)
182{
183 switch (*p) {
184 case '\0': return p;
185 case ESC:
186 if (*(p+1) == 0) lua_error("incorrect pattern");
187 return p+2;
188 case '[': {
189 char *end = (*(p+1) == 0) ? NULL : strchr(p+2, ']');
190 if (end == NULL) lua_error("incorrect pattern");
191 return end+1;
192 }
193 default:
194 return p+1;
195 }
196}
197
198static int matchclass (int c, int cl)
199{
200 int res;
201 switch (tolower(cl)) {
202 case 'a' : res = isalpha(c); break;
203 case 'c' : res = iscntrl(c); break;
204 case 'd' : res = isdigit(c); break;
205 case 'l' : res = islower(c); break;
206 case 'p' : res = ispunct(c); break;
207 case 's' : res = isspace(c); break;
208 case 'u' : res = isupper(c); break;
209 case 'w' : res = isalnum(c); break;
210 default: return (cl == c);
211 }
212 return (islower(cl) ? res : !res);
213}
214
215static int singlematch (int c, char *p)
216{
217 if (c == 0) return 0;
218 switch (*p) {
219 case '.': return 1;
220 case ESC: return matchclass(c, *(p+1));
221 case '[': {
222 char *end = strchr(p+2, ']');
223 int sig = *(p+1) == '^' ? (p++, 0) : 1;
224 while (++p < end) {
225 if (*p == ESC) {
226 if (((p+1) < end) && matchclass(c, *++p)) return sig;
227 }
228 else if ((*(p+1) == '-') && (p+2 < end)) {
229 p+=2;
230 if (*(p-2) <= c && c <= *p) return sig;
231 }
232 else if (*p == c) return sig;
233 }
234 return !sig;
235 }
236 default: return (*p == c);
237 }
238}
239
240#define MAX_CAPT 9
241
242static struct {
243 char *init;
244 int len; /* -1 signals unfinished capture */
245} capture[MAX_CAPT];
246
247static int num_captures; /* only valid after a sucessful call to match */
248
249
250static void push_captures (void)
151{ 251{
152 char *s = lua_check_string(1, "strupper"); 252 int i;
153 luaI_addchar(0); 253 luaI_addchar(0);
154 while (*s) 254 for (i=0; i<num_captures; i++) {
155 luaI_addchar(toupper(*s++)); 255 if (capture[i].len == -1) lua_error("unfinished capture");
256 addnchar(capture[i].init, capture[i].len);
257 lua_pushstring(luaI_addchar(0));
258 }
259}
260
261static int check_cap (int l, int level)
262{
263 l -= '1';
264 if (!(0 <= l && l < level && capture[l].len != -1))
265 lua_error("invalid capture index");
266 return l;
267}
268
269static void add_s (char *newp)
270{
271 while (*newp) {
272 if (*newp != ESC || !isdigit(*++newp))
273 luaI_addchar(*newp++);
274 else {
275 int l = check_cap(*newp++, num_captures);
276 addnchar(capture[l].init, capture[l].len);
277 }
278 }
279}
280
281static int capture_to_close (int level)
282{
283 for (level--; level>=0; level--)
284 if (capture[level].len == -1) return level;
285 lua_error("invalid pattern capture");
286 return 0; /* to avoid warnings */
287}
288
289static char *match (char *s, char *p, int level)
290{
291 init: /* using goto's to optimize tail recursion */
292 switch (*p) {
293 case '(': /* start capture */
294 if (level >= MAX_CAPT) lua_error("too many captures");
295 capture[level].init = s;
296 capture[level].len = -1;
297 level++; p++; goto init; /* return match(s, p+1, level); */
298 case ')': { /* end capture */
299 int l = capture_to_close(level);
300 char *res;
301 capture[l].len = s - capture[l].init; /* close capture */
302 if ((res = match(s, p+1, level)) == NULL) /* match failed? */
303 capture[l].len = -1; /* undo capture */
304 return res;
305 }
306 case ESC: /* possibly a capture (if followed by a digit) */
307 if (!isdigit(*(p+1))) goto dflt;
308 else {
309 int l = check_cap(*(p+1), level);
310 if (strncmp(capture[l].init, s, capture[l].len) == 0) {
311 /* return match(p+2, s+capture[l].len, level); */
312 p+=2; s+=capture[l].len; goto init;
313 }
314 else return NULL;
315 }
316 case '\0': case '$': /* (possibly) end of pattern */
317 if (*p == 0 || (*(p+1) == 0 && *s == 0)) {
318 num_captures = level;
319 return s;
320 }
321 else goto dflt;
322 default: dflt: { /* it is a pattern item */
323 int m = singlematch(*s, p);
324 char *ep = item_end(p); /* get what is next */
325 switch (*ep) {
326 case '*': { /* repetition? */
327 char *res;
328 if (m && (res = match(s+1, p, level)))
329 return res;
330 p=ep+1; goto init; /* else return match(s, ep+1, level); */
331 }
332 case '?': { /* optional? */
333 char *res;
334 if (m && (res = match(s+1, ep+1, level)))
335 return res;
336 p=ep+1; goto init; /* else return match(s, ep+1, level); */
337 }
338 default:
339 if (m) { s++; p=ep; goto init; } /* return match(s+1, ep, level); */
340 else return NULL;
341 }
342 }
343 }
344}
345
346static void str_find (void)
347{
348 char *s = lua_check_string(1, "find");
349 char *p = lua_check_string(2, "find");
350 long init = lua_opt_number(3, 1, "strfind") - 1;
351 lua_arg_check(0 <= init && init <= strlen(s), "find");
352 if (strpbrk(p, SPECIALS) == NULL) { /* no special caracters? */
353 char *s2 = strstr(s+init, p);
354 if (s2) {
355 lua_pushnumber(s2-s+1);
356 lua_pushnumber(s2-s+strlen(p));
357 }
358 }
359 else {
360 int anchor = (*p == '^') ? (p++, 1) : 0;
361 char *s1=s+init;
362 do {
363 char *res;
364 if ((res=match(s1, p, 0)) != NULL) {
365 lua_pushnumber(s1-s+1); /* start */
366 lua_pushnumber(res-s); /* end */
367 push_captures();
368 return;
369 }
370 } while (*s1++ && !anchor);
371 }
372}
373
374static void str_s (void)
375{
376 char *src = lua_check_string(1, "s");
377 char *p = lua_check_string(2, "s");
378 char *newp = lua_check_string(3, "s");
379 int max_s = lua_opt_number(4, strlen(src), "s");
380 int anchor = (*p == '^') ? (p++, 1) : 0;
381 int n = 0;
382 luaI_addchar(0);
383 while (*src && n < max_s) {
384 char *e;
385 if ((e=match(src, p, 0)) == NULL)
386 luaI_addchar(*src++);
387 else {
388 if (e == src) lua_error("empty pattern in substitution"); /* ??? */
389 add_s(newp);
390 src = e;
391 n++;
392 }
393 if (anchor) break;
394 }
395 addnchar(src, strlen(src));
156 lua_pushstring(luaI_addchar(0)); 396 lua_pushstring(luaI_addchar(0));
397 lua_pushnumber(n); /* number of substitutions */
157} 398}
158 399
159/* 400static void str_set (void)
160** get ascii value of a character in a string
161*/
162static void str_ascii (void)
163{ 401{
164 char *s = lua_check_string(1, "ascii"); 402 char *item = lua_check_string(1, "strset");
165 long pos = lua_opt_number(2, 1, "ascii") - 1; 403 int i;
166 if (pos<0 || pos>=strlen(s)) 404 lua_arg_check(*item_end(item) == 0, "strset");
167 lua_arg_error("ascii"); 405 luaI_addchar(0);
168 lua_pushnumber(s[pos]); 406 for (i=1; i<256; i++) /* 0 cannot be part of a set */
407 if (singlematch(i, item))
408 luaI_addchar(i);
409 lua_pushstring(luaI_addchar(0));
169} 410}
170 411
412
171void luaI_addquoted (char *s) 413void luaI_addquoted (char *s)
172{ 414{
173 luaI_addchar('"'); 415 luaI_addchar('"');
174 for (; *s; s++) 416 for (; *s; s++) {
175 { 417 if (strchr("\"\\\n", *s))
176 if (*s == '"' || *s == '\\' || *s == '\n')
177 luaI_addchar('\\'); 418 luaI_addchar('\\');
178 luaI_addchar(*s); 419 luaI_addchar(*s);
179 } 420 }
180 luaI_addchar('"'); 421 luaI_addchar('"');
181} 422}
182 423
183#define MAX_CONVERTION 2000 424#define MAX_FORMAT 200
184#define MAX_FORMAT 50
185 425
186static void str_format (void) 426static void str_format (void)
187{ 427{
188 int arg = 1; 428 int arg = 1;
189 char *strfrmt = lua_check_string(arg++, "format"); 429 char *strfrmt = lua_check_string(arg++, "format");
190 luaI_addchar(0); /* initialize */ 430 luaI_addchar(0); /* initialize */
191 while (*strfrmt) 431 while (*strfrmt) {
192 {
193 if (*strfrmt != '%') 432 if (*strfrmt != '%')
194 luaI_addchar(*strfrmt++); 433 luaI_addchar(*strfrmt++);
195 else if (*++strfrmt == '%') 434 else if (*++strfrmt == '%')
196 luaI_addchar(*strfrmt++); /* %% */ 435 luaI_addchar(*strfrmt++); /* %% */
197 else 436 else { /* format item */
198 { /* format item */
199 char form[MAX_FORMAT]; /* store the format ('%...') */ 437 char form[MAX_FORMAT]; /* store the format ('%...') */
200 char buff[MAX_CONVERTION]; /* store the formated value */ 438 char *buff;
201 int size = 0; 439 char *initf = strfrmt-1; /* -1 to include % */
202 int i = 0; 440 strfrmt = match(strfrmt, "[-+ #]*(%d*)%.?(%d*)", 0);
203 form[i++] = '%'; 441 if (capture[0].len > 3 || capture[1].len > 3) /* < 1000? */
204 form[i] = *strfrmt++; 442 lua_error("invalid format (width/precision too long)");
205 while (!isalpha(form[i])) 443 strncpy(form, initf, strfrmt-initf+1); /* +1 to include convertion */
206 { 444 form[strfrmt-initf+1] = 0;
207 if (isdigit(form[i])) 445 buff = openspace(1000); /* to store the formated value */
208 { 446 switch (*strfrmt++) {
209 size = size*10 + form[i]-'0';
210 if (size >= MAX_CONVERTION)
211 lua_error("format size/precision too long in function `format'");
212 }
213 else if (form[i] == '.')
214 size = 0; /* re-start */
215 if (++i >= MAX_FORMAT)
216 lua_error("bad format in function `format'");
217 form[i] = *strfrmt++;
218 }
219 form[i+1] = 0; /* ends string */
220 switch (form[i])
221 {
222 case 'q': 447 case 'q':
223 luaI_addquoted(lua_check_string(arg++, "format")); 448 luaI_addquoted(lua_check_string(arg++, "format"));
224 buff[0] = '\0'; /* addchar already done */
225 break; 449 break;
226 case 's': 450 case 's': {
227 {
228 char *s = lua_check_string(arg++, "format"); 451 char *s = lua_check_string(arg++, "format");
229 if (strlen(s) >= MAX_CONVERTION) 452 buff = openspace(strlen(s));
230 lua_error("string argument too long in function `format'"); 453 buff_size += sprintf(buff, form, s);
231 sprintf(buff, form, s);
232 break; 454 break;
233 } 455 }
234 case 'c': case 'd': case 'i': case 'o': 456 case 'c': case 'd': case 'i': case 'o':
235 case 'u': case 'x': case 'X': 457 case 'u': case 'x': case 'X':
236 sprintf(buff, form, (int)lua_check_number(arg++, "format")); 458 buff_size += sprintf(buff, form,
459 (int)lua_check_number(arg++, "format"));
237 break; 460 break;
238 case 'e': case 'E': case 'f': case 'g': 461 case 'e': case 'E': case 'f': case 'g':
239 sprintf(buff, form, lua_check_number(arg++, "format")); 462 buff_size += sprintf(buff, form, lua_check_number(arg++, "format"));
240 break; 463 break;
241 default: /* also treat cases 'pnLlh' */ 464 default: /* also treat cases 'pnLlh' */
242 lua_error("invalid format option in function `format'"); 465 lua_error("invalid format option in function `format'");
243 } 466 }
244 for (i=0; buff[i]; i++) /* move formated value to result */
245 luaI_addchar(buff[i]);
246 } 467 }
247 } 468 }
248 lua_pushstring(luaI_addchar(0)); /* push the result */ 469 lua_pushstring(luaI_addchar(0)); /* push the result */
@@ -256,14 +477,17 @@ void luaI_openlib (struct lua_reg *l, int n)
256 lua_register(l[i].name, l[i].func); 477 lua_register(l[i].name, l[i].func);
257} 478}
258 479
480
259static struct lua_reg strlib[] = { 481static struct lua_reg strlib[] = {
260{"strfind", str_find}, 482{"strtok", str_tok},
261{"strlen", str_len}, 483{"strlen", str_len},
262{"strsub", str_sub}, 484{"strsub", str_sub},
263{"strlower", str_lower}, 485{"strset", str_set},
264{"strupper", str_upper}, 486{"strmap", str_map},
265{"ascii", str_ascii}, 487{"ascii", str_ascii},
266{"format", str_format} 488{"format", str_format},
489{"strfind", str_find},
490{"s", str_s}
267}; 491};
268 492
269 493