diff options
author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 1998-03-24 17:14:25 -0300 |
---|---|---|
committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 1998-03-24 17:14:25 -0300 |
commit | daa937c043bb0ddb5f4bfe676f8ff13825a99651 (patch) | |
tree | 4f65f0a9c7f050710d462cf68648e7207dfaf2da | |
parent | 21455162b5da29e5850bee1e16b85e226ae391b2 (diff) | |
download | lua-daa937c043bb0ddb5f4bfe676f8ff13825a99651.tar.gz lua-daa937c043bb0ddb5f4bfe676f8ff13825a99651.tar.bz2 lua-daa937c043bb0ddb5f4bfe676f8ff13825a99651.zip |
pattern-matching support for '\0'.
-rw-r--r-- | liolib.c | 4 | ||||
-rw-r--r-- | lstrlib.c | 107 |
2 files changed, 68 insertions, 43 deletions
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | ** $Id: liolib.c,v 1.15 1998/03/06 16:54:42 roberto Exp roberto $ | 2 | ** $Id: liolib.c,v 1.16 1998/03/06 18:47:42 roberto Exp roberto $ |
3 | ** Standard I/O (and system) library | 3 | ** Standard I/O (and system) library |
4 | ** See Copyright Notice in lua.h | 4 | ** See Copyright Notice in lua.h |
5 | */ | 5 | */ |
@@ -209,7 +209,7 @@ static void io_read (void) | |||
209 | m = 0; | 209 | m = 0; |
210 | } | 210 | } |
211 | else { | 211 | else { |
212 | m = luaI_singlematch((char)c, p, &ep); | 212 | m = luaI_singlematch(c, p, &ep); |
213 | if (m) { | 213 | if (m) { |
214 | if (inskip == 0) luaL_addchar(c); | 214 | if (inskip == 0) luaL_addchar(c); |
215 | c = NEED_OTHER; | 215 | c = NEED_OTHER; |
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | ** $Id: lstrlib.c,v 1.10 1998/03/06 18:47:42 roberto Exp roberto $ | 2 | ** $Id: lstrlib.c,v 1.11 1998/03/09 18:28:08 roberto Exp roberto $ |
3 | ** Standard library for strings and pattern-matching | 3 | ** Standard library for strings and pattern-matching |
4 | ** See Copyright Notice in lua.h | 4 | ** See Copyright Notice in lua.h |
5 | */ | 5 | */ |
@@ -101,6 +101,14 @@ static void str_ascii (void) | |||
101 | lua_pushnumber((unsigned char)s[pos-1]); | 101 | lua_pushnumber((unsigned char)s[pos-1]); |
102 | } | 102 | } |
103 | 103 | ||
104 | static void str_int2str (void) | ||
105 | { | ||
106 | int i = 0; | ||
107 | luaL_resetbuffer(); | ||
108 | while (lua_getparam(++i) != LUA_NOOBJECT) | ||
109 | luaL_addchar((int)luaL_check_number(i)); | ||
110 | closeandpush(); | ||
111 | } | ||
104 | 112 | ||
105 | 113 | ||
106 | /* | 114 | /* |
@@ -113,6 +121,7 @@ static void str_ascii (void) | |||
113 | 121 | ||
114 | struct Capture { | 122 | struct Capture { |
115 | int level; /* total number of captures (finished or unfinished) */ | 123 | int level; /* total number of captures (finished or unfinished) */ |
124 | char *src_end; /* end ('\0') of source string */ | ||
116 | struct { | 125 | struct { |
117 | char *init; | 126 | char *init; |
118 | int len; /* -1 signals unfinished capture */ | 127 | int len; /* -1 signals unfinished capture */ |
@@ -160,15 +169,16 @@ static char *bracket_end (char *p) | |||
160 | static int matchclass (int c, int cl) | 169 | static int matchclass (int c, int cl) |
161 | { | 170 | { |
162 | int res; | 171 | int res; |
163 | switch (tolower((unsigned char)cl)) { | 172 | switch (tolower(cl)) { |
164 | case 'w' : res = isalnum((unsigned char)c); break; | 173 | case 'a' : res = isalpha(c); break; |
165 | case 'd' : res = isdigit((unsigned char)c); break; | 174 | case 'c' : res = iscntrl(c); break; |
166 | case 's' : res = isspace((unsigned char)c); break; | 175 | case 'd' : res = isdigit(c); break; |
167 | case 'a' : res = isalpha((unsigned char)c); break; | 176 | case 'l' : res = islower(c); break; |
168 | case 'p' : res = ispunct((unsigned char)c); break; | 177 | case 'p' : res = ispunct(c); break; |
169 | case 'l' : res = islower((unsigned char)c); break; | 178 | case 's' : res = isspace(c); break; |
170 | case 'u' : res = isupper((unsigned char)c); break; | 179 | case 'u' : res = isupper(c); break; |
171 | case 'c' : res = iscntrl((unsigned char)c); break; | 180 | case 'w' : res = isalnum(c); break; |
181 | case 'z' : res = (c == '\0'); break; | ||
172 | default: return (cl == c); | 182 | default: return (cl == c); |
173 | } | 183 | } |
174 | return (islower((unsigned char)cl) ? res : !res); | 184 | return (islower((unsigned char)cl) ? res : !res); |
@@ -178,17 +188,17 @@ static int matchclass (int c, int cl) | |||
178 | int luaI_singlematch (int c, char *p, char **ep) | 188 | int luaI_singlematch (int c, char *p, char **ep) |
179 | { | 189 | { |
180 | switch (*p) { | 190 | switch (*p) { |
181 | case '.': | 191 | case '.': /* matches any char */ |
182 | *ep = p+1; | 192 | *ep = p+1; |
183 | return 1; | 193 | return 1; |
184 | case '\0': | 194 | case '\0': /* end of pattern; matches nothing */ |
185 | *ep = p; | 195 | *ep = p; |
186 | return 0; | 196 | return 0; |
187 | case ESC: | 197 | case ESC: |
188 | if (*(++p) == '\0') | 198 | if (*(++p) == '\0') |
189 | luaL_verror("incorrect pattern (ends with `%c')", ESC); | 199 | luaL_verror("incorrect pattern (ends with `%c')", ESC); |
190 | *ep = p+1; | 200 | *ep = p+1; |
191 | return matchclass(c, *p); | 201 | return matchclass(c, (unsigned char)*p); |
192 | case '[': { | 202 | case '[': { |
193 | char *end = bracket_end(p+1); | 203 | char *end = bracket_end(p+1); |
194 | int sig = *(p+1) == '^' ? (p++, 0) : 1; | 204 | int sig = *(p+1) == '^' ? (p++, 0) : 1; |
@@ -196,29 +206,31 @@ int luaI_singlematch (int c, char *p, char **ep) | |||
196 | *ep = end+1; | 206 | *ep = end+1; |
197 | while (++p < end) { | 207 | while (++p < end) { |
198 | if (*p == ESC) { | 208 | if (*p == ESC) { |
199 | if (((p+1) < end) && matchclass(c, *++p)) return sig; | 209 | if (((p+1) < end) && matchclass(c, (unsigned char)*++p)) |
210 | return sig; | ||
200 | } | 211 | } |
201 | else if ((*(p+1) == '-') && (p+2 < end)) { | 212 | else if ((*(p+1) == '-') && (p+2 < end)) { |
202 | p+=2; | 213 | p+=2; |
203 | if (*(p-2) <= c && c <= *p) return sig; | 214 | if ((unsigned char)*(p-2) <= c && c <= (unsigned char)*p) |
215 | return sig; | ||
204 | } | 216 | } |
205 | else if (*p == c) return sig; | 217 | else if ((unsigned char)*p == c) return sig; |
206 | } | 218 | } |
207 | return !sig; | 219 | return !sig; |
208 | } | 220 | } |
209 | default: | 221 | default: |
210 | *ep = p+1; | 222 | *ep = p+1; |
211 | return (*p == c); | 223 | return ((unsigned char)*p == c); |
212 | } | 224 | } |
213 | } | 225 | } |
214 | 226 | ||
215 | 227 | ||
216 | static char *matchbalance (char *s, int b, int e) | 228 | static char *matchbalance (char *s, int b, int e, struct Capture *cap) |
217 | { | 229 | { |
218 | if (*s != b) return NULL; | 230 | if (*s != b) return NULL; |
219 | else { | 231 | else { |
220 | int cont = 1; | 232 | int cont = 1; |
221 | while (*(++s)) { | 233 | while (++s < cap->src_end) { |
222 | if (*s == e) { | 234 | if (*s == e) { |
223 | if (--cont == 0) return s+1; | 235 | if (--cont == 0) return s+1; |
224 | } | 236 | } |
@@ -235,9 +247,10 @@ static char *matchitem (char *s, char *p, struct Capture *cap, char **ep) | |||
235 | p++; | 247 | p++; |
236 | if (isdigit((unsigned char)*p)) { /* capture */ | 248 | if (isdigit((unsigned char)*p)) { /* capture */ |
237 | int l = check_cap(*p, cap); | 249 | int l = check_cap(*p, cap); |
250 | int len = cap->capture[l].len; | ||
238 | *ep = p+1; | 251 | *ep = p+1; |
239 | if (strncmp(cap->capture[l].init, s, cap->capture[l].len) == 0) | 252 | if (cap->src_end-s >= len && memcmp(cap->capture[l].init, s, len) == 0) |
240 | return s+cap->capture[l].len; | 253 | return s+len; |
241 | else return NULL; | 254 | else return NULL; |
242 | } | 255 | } |
243 | else if (*p == 'b') { /* balanced string */ | 256 | else if (*p == 'b') { /* balanced string */ |
@@ -245,12 +258,13 @@ static char *matchitem (char *s, char *p, struct Capture *cap, char **ep) | |||
245 | if (*p == 0 || *(p+1) == 0) | 258 | if (*p == 0 || *(p+1) == 0) |
246 | lua_error("unbalanced pattern"); | 259 | lua_error("unbalanced pattern"); |
247 | *ep = p+2; | 260 | *ep = p+2; |
248 | return matchbalance(s, *p, *(p+1)); | 261 | return matchbalance(s, *p, *(p+1), cap); |
249 | } | 262 | } |
250 | else p--; /* and go through */ | 263 | else p--; /* and go through */ |
251 | } | 264 | } |
252 | /* "luaI_singlematch" sets "ep" (so must be called even when *s == 0) */ | 265 | /* "luaI_singlematch" sets "ep" (so must be called even when *s == 0) */ |
253 | return (luaI_singlematch(*s, p, ep) && *s) ? s+1 : NULL; | 266 | return (luaI_singlematch((unsigned char)*s, p, ep) && s<cap->src_end) ? |
267 | s+1 : NULL; | ||
254 | } | 268 | } |
255 | 269 | ||
256 | 270 | ||
@@ -277,7 +291,7 @@ static char *match (char *s, char *p, struct Capture *cap) | |||
277 | return res; | 291 | return res; |
278 | } | 292 | } |
279 | case '\0': case '$': /* (possibly) end of pattern */ | 293 | case '\0': case '$': /* (possibly) end of pattern */ |
280 | if (*p == 0 || (*(p+1) == 0 && *s == 0)) | 294 | if (*p == 0 || (*(p+1) == 0 && s == cap->src_end)) |
281 | return s; | 295 | return s; |
282 | /* else go through */ | 296 | /* else go through */ |
283 | default: { /* it is a pattern item */ | 297 | default: { /* it is a pattern item */ |
@@ -322,6 +336,7 @@ static void str_find (void) | |||
322 | char *s = luaL_check_lstr(1, &l); | 336 | char *s = luaL_check_lstr(1, &l); |
323 | char *p = luaL_check_string(2); | 337 | char *p = luaL_check_string(2); |
324 | long init = posrelat(luaL_opt_number(3, 1), l) - 1; | 338 | long init = posrelat(luaL_opt_number(3, 1), l) - 1; |
339 | struct Capture cap; | ||
325 | luaL_arg_check(0 <= init && init <= l, 3, "out of range"); | 340 | luaL_arg_check(0 <= init && init <= l, 3, "out of range"); |
326 | if (lua_getparam(4) != LUA_NOOBJECT || | 341 | if (lua_getparam(4) != LUA_NOOBJECT || |
327 | strpbrk(p, SPECIALS) == NULL) { /* no special caracters? */ | 342 | strpbrk(p, SPECIALS) == NULL) { /* no special caracters? */ |
@@ -334,8 +349,8 @@ static void str_find (void) | |||
334 | else { | 349 | else { |
335 | int anchor = (*p == '^') ? (p++, 1) : 0; | 350 | int anchor = (*p == '^') ? (p++, 1) : 0; |
336 | char *s1=s+init; | 351 | char *s1=s+init; |
352 | cap.src_end = s+l; | ||
337 | do { | 353 | do { |
338 | struct Capture cap; | ||
339 | char *res; | 354 | char *res; |
340 | cap.level = 0; | 355 | cap.level = 0; |
341 | if ((res=match(s1, p, &cap)) != NULL) { | 356 | if ((res=match(s1, p, &cap)) != NULL) { |
@@ -344,7 +359,7 @@ static void str_find (void) | |||
344 | push_captures(&cap); | 359 | push_captures(&cap); |
345 | return; | 360 | return; |
346 | } | 361 | } |
347 | } while (*s1++ && !anchor); | 362 | } while (s1++<cap.src_end && !anchor); |
348 | } | 363 | } |
349 | } | 364 | } |
350 | 365 | ||
@@ -353,16 +368,23 @@ static void add_s (lua_Object newp, struct Capture *cap) | |||
353 | { | 368 | { |
354 | if (lua_isstring(newp)) { | 369 | if (lua_isstring(newp)) { |
355 | char *news = lua_getstring(newp); | 370 | char *news = lua_getstring(newp); |
356 | while (*news) { | 371 | int l = lua_strlen(newp); |
357 | if (*news != ESC || !isdigit((unsigned char)*++news)) | 372 | int i; |
358 | luaL_addchar(*news++); | 373 | for (i=0; i<l; i++) { |
374 | if (news[i] != ESC) | ||
375 | luaL_addchar(news[i]); | ||
359 | else { | 376 | else { |
360 | int l = check_cap(*news++, cap); | 377 | i++; /* skip ESC */ |
361 | addnchar(cap->capture[l].init, cap->capture[l].len); | 378 | if (!isdigit((unsigned char)news[i])) |
379 | luaL_addchar(news[i]); | ||
380 | else { | ||
381 | int level = check_cap(news[i], cap); | ||
382 | addnchar(cap->capture[level].init, cap->capture[level].len); | ||
383 | } | ||
362 | } | 384 | } |
363 | } | 385 | } |
364 | } | 386 | } |
365 | else if (lua_isfunction(newp)) { | 387 | else { /* is a function */ |
366 | lua_Object res; | 388 | lua_Object res; |
367 | int status; | 389 | int status; |
368 | int oldbuff; | 390 | int oldbuff; |
@@ -380,25 +402,26 @@ static void add_s (lua_Object newp, struct Capture *cap) | |||
380 | res = lua_getresult(1); | 402 | res = lua_getresult(1); |
381 | if (lua_isstring(res)) | 403 | if (lua_isstring(res)) |
382 | addnchar(lua_getstring(res), lua_strlen(res)); | 404 | addnchar(lua_getstring(res), lua_strlen(res)); |
383 | else | ||
384 | addnchar(NULL, 0); | ||
385 | lua_endblock(); | 405 | lua_endblock(); |
386 | } | 406 | } |
387 | else luaL_arg_check(0, 3, "string or function expected"); | ||
388 | } | 407 | } |
389 | 408 | ||
390 | 409 | ||
391 | static void str_gsub (void) | 410 | static void str_gsub (void) |
392 | { | 411 | { |
393 | char *src = luaL_check_string(1); | 412 | long srcl; |
413 | char *src = luaL_check_lstr(1, &srcl); | ||
394 | char *p = luaL_check_string(2); | 414 | char *p = luaL_check_string(2); |
395 | lua_Object newp = lua_getparam(3); | 415 | lua_Object newp = lua_getparam(3); |
396 | int max_s = (int)luaL_opt_number(4, strlen(src)+1); | 416 | int max_s = (int)luaL_opt_number(4, srcl+1); |
397 | int anchor = (*p == '^') ? (p++, 1) : 0; | 417 | int anchor = (*p == '^') ? (p++, 1) : 0; |
398 | int n = 0; | 418 | int n = 0; |
419 | struct Capture cap; | ||
420 | luaL_arg_check(lua_isstring(newp) || lua_isfunction(newp), 3, | ||
421 | "string or function expected"); | ||
399 | luaL_resetbuffer(); | 422 | luaL_resetbuffer(); |
423 | cap.src_end = src+srcl; | ||
400 | while (n < max_s) { | 424 | while (n < max_s) { |
401 | struct Capture cap; | ||
402 | char *e; | 425 | char *e; |
403 | cap.level = 0; | 426 | cap.level = 0; |
404 | e = match(src, p, &cap); | 427 | e = match(src, p, &cap); |
@@ -408,12 +431,12 @@ static void str_gsub (void) | |||
408 | } | 431 | } |
409 | if (e && e>src) /* non empty match? */ | 432 | if (e && e>src) /* non empty match? */ |
410 | src = e; /* skip it */ | 433 | src = e; /* skip it */ |
411 | else if (*src) | 434 | else if (src < cap.src_end) |
412 | luaL_addchar(*src++); | 435 | luaL_addchar(*src++); |
413 | else break; | 436 | else break; |
414 | if (anchor) break; | 437 | if (anchor) break; |
415 | } | 438 | } |
416 | addnchar(src, strlen(src)); | 439 | addnchar(src, cap.src_end-src); |
417 | closeandpush(); | 440 | closeandpush(); |
418 | lua_pushnumber(n); /* number of substitutions */ | 441 | lua_pushnumber(n); /* number of substitutions */ |
419 | } | 442 | } |
@@ -436,6 +459,8 @@ static void str_format (void) | |||
436 | { | 459 | { |
437 | int arg = 1; | 460 | int arg = 1; |
438 | char *strfrmt = luaL_check_string(arg); | 461 | char *strfrmt = luaL_check_string(arg); |
462 | struct Capture cap; | ||
463 | cap.src_end = strfrmt+strlen(strfrmt)+1; | ||
439 | luaL_resetbuffer(); | 464 | luaL_resetbuffer(); |
440 | while (*strfrmt) { | 465 | while (*strfrmt) { |
441 | if (*strfrmt != '%') | 466 | if (*strfrmt != '%') |
@@ -444,7 +469,6 @@ static void str_format (void) | |||
444 | luaL_addchar(*strfrmt++); /* %% */ | 469 | luaL_addchar(*strfrmt++); /* %% */ |
445 | else { /* format item */ | 470 | else { /* format item */ |
446 | char form[MAX_FORMAT]; /* store the format ('%...') */ | 471 | char form[MAX_FORMAT]; /* store the format ('%...') */ |
447 | struct Capture cap; | ||
448 | char *buff; | 472 | char *buff; |
449 | char *initf = strfrmt; | 473 | char *initf = strfrmt; |
450 | form[0] = '%'; | 474 | form[0] = '%'; |
@@ -492,6 +516,7 @@ static struct luaL_reg strlib[] = { | |||
492 | {"strsub", str_sub}, | 516 | {"strsub", str_sub}, |
493 | {"strlower", str_lower}, | 517 | {"strlower", str_lower}, |
494 | {"strupper", str_upper}, | 518 | {"strupper", str_upper}, |
519 | {"int2str", str_int2str}, | ||
495 | {"strrep", str_rep}, | 520 | {"strrep", str_rep}, |
496 | {"ascii", str_ascii}, | 521 | {"ascii", str_ascii}, |
497 | {"format", str_format}, | 522 | {"format", str_format}, |