aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoberto Ierusalimschy <roberto@inf.puc-rio.br>2010-04-12 09:00:50 -0300
committerRoberto Ierusalimschy <roberto@inf.puc-rio.br>2010-04-12 09:00:50 -0300
commit4541243355a299a9b75042d207feb87295872c3a (patch)
treeabdffc43c56dac321baa862783b8ce79019e30f4
parent9100f7479afabc4bb2926c619b5ef09693cf9a94 (diff)
downloadlua-4541243355a299a9b75042d207feb87295872c3a.tar.gz
lua-4541243355a299a9b75042d207feb87295872c3a.tar.bz2
lua-4541243355a299a9b75042d207feb87295872c3a.zip
patterns now accept '\0' as a regular character
-rw-r--r--lstrlib.c66
1 files changed, 38 insertions, 28 deletions
diff --git a/lstrlib.c b/lstrlib.c
index d3709a83..4a5ba255 100644
--- a/lstrlib.c
+++ b/lstrlib.c
@@ -1,5 +1,5 @@
1/* 1/*
2** $Id: lstrlib.c,v 1.148 2010/01/04 16:37:19 roberto Exp roberto $ 2** $Id: lstrlib.c,v 1.149 2010/04/09 16:14:46 roberto Exp roberto $
3** Standard library for string operations and pattern-matching 3** Standard library for string operations and pattern-matching
4** See Copyright Notice in lua.h 4** See Copyright Notice in lua.h
5*/ 5*/
@@ -180,7 +180,8 @@ static int str_dump (lua_State *L) {
180 180
181typedef struct MatchState { 181typedef struct MatchState {
182 const char *src_init; /* init of source string */ 182 const char *src_init; /* init of source string */
183 const char *src_end; /* end (`\0') of source string */ 183 const char *src_end; /* end ('\0') of source string */
184 const char *p_end; /* end ('\0') of pattern */
184 lua_State *L; 185 lua_State *L;
185 int level; /* total number of captures (finished or unfinished) */ 186 int level; /* total number of captures (finished or unfinished) */
186 struct { 187 struct {
@@ -213,16 +214,16 @@ static int capture_to_close (MatchState *ms) {
213static const char *classend (MatchState *ms, const char *p) { 214static const char *classend (MatchState *ms, const char *p) {
214 switch (*p++) { 215 switch (*p++) {
215 case L_ESC: { 216 case L_ESC: {
216 if (*p == '\0') 217 if (p == ms->p_end)
217 luaL_error(ms->L, "malformed pattern (ends with " LUA_QL("%%") ")"); 218 luaL_error(ms->L, "malformed pattern (ends with " LUA_QL("%%") ")");
218 return p+1; 219 return p+1;
219 } 220 }
220 case '[': { 221 case '[': {
221 if (*p == '^') p++; 222 if (*p == '^') p++;
222 do { /* look for a `]' */ 223 do { /* look for a `]' */
223 if (*p == '\0') 224 if (p == ms->p_end)
224 luaL_error(ms->L, "malformed pattern (missing " LUA_QL("]") ")"); 225 luaL_error(ms->L, "malformed pattern (missing " LUA_QL("]") ")");
225 if (*(p++) == L_ESC && *p != '\0') 226 if (*(p++) == L_ESC && p < ms->p_end)
226 p++; /* skip escapes (e.g. `%]') */ 227 p++; /* skip escapes (e.g. `%]') */
227 } while (*p != ']'); 228 } while (*p != ']');
228 return p+1; 229 return p+1;
@@ -246,7 +247,7 @@ static int match_class (int c, int cl) {
246 case 'u' : res = isupper(c); break; 247 case 'u' : res = isupper(c); break;
247 case 'w' : res = isalnum(c); break; 248 case 'w' : res = isalnum(c); break;
248 case 'x' : res = isxdigit(c); break; 249 case 'x' : res = isxdigit(c); break;
249 case 'z' : res = (c == 0); break; 250 case 'z' : res = (c == 0); break; /* deprecated option */
250 default: return (cl == c); 251 default: return (cl == c);
251 } 252 }
252 return (islower(cl) ? res : !res); 253 return (islower(cl) ? res : !res);
@@ -291,8 +292,9 @@ static const char *match (MatchState *ms, const char *s, const char *p);
291 292
292static const char *matchbalance (MatchState *ms, const char *s, 293static const char *matchbalance (MatchState *ms, const char *s,
293 const char *p) { 294 const char *p) {
294 if (*p == 0 || *(p+1) == 0) 295 if (p >= ms->p_end - 1)
295 luaL_error(ms->L, "unbalanced pattern"); 296 luaL_error(ms->L, "malformed pattern "
297 "(missing arguments to " LUA_QL("%%b") ")");
296 if (*s != *p) return NULL; 298 if (*s != *p) return NULL;
297 else { 299 else {
298 int b = *p; 300 int b = *p;
@@ -375,6 +377,8 @@ static const char *match_capture (MatchState *ms, const char *s, int l) {
375 377
376static const char *match (MatchState *ms, const char *s, const char *p) { 378static const char *match (MatchState *ms, const char *s, const char *p) {
377 init: /* using goto's to optimize tail recursion */ 379 init: /* using goto's to optimize tail recursion */
380 if (p == ms->p_end) /* end of pattern? */
381 return s; /* match succeeded */
378 switch (*p) { 382 switch (*p) {
379 case '(': { /* start capture */ 383 case '(': { /* start capture */
380 if (*(p+1) == ')') /* position capture? */ 384 if (*(p+1) == ')') /* position capture? */
@@ -385,11 +389,8 @@ static const char *match (MatchState *ms, const char *s, const char *p) {
385 case ')': { /* end capture */ 389 case ')': { /* end capture */
386 return end_capture(ms, s, p+1); 390 return end_capture(ms, s, p+1);
387 } 391 }
388 case '\0': { /* end of pattern */
389 return s; /* match succeeded */
390 }
391 case '$': { 392 case '$': {
392 if (*(p+1) == '\0') /* is the `$' the last char in pattern? */ 393 if ((p+1) == ms->p_end) /* is the `$' the last char in pattern? */
393 return (s == ms->src_end) ? s : NULL; /* check end of string */ 394 return (s == ms->src_end) ? s : NULL; /* check end of string */
394 else goto dflt; 395 else goto dflt;
395 } 396 }
@@ -419,12 +420,12 @@ static const char *match (MatchState *ms, const char *s, const char *p) {
419 if (s == NULL) return NULL; 420 if (s == NULL) return NULL;
420 p+=2; goto init; /* else return match(ms, s, p+2) */ 421 p+=2; goto init; /* else return match(ms, s, p+2) */
421 } 422 }
422 default: break; /* go through to 'dflt' */ 423 default: goto dflt;
423 } 424 }
424 } 425 }
425 default: dflt: { /* pattern class plus optional sufix */ 426 default: dflt: { /* pattern class plus optional sufix */
426 const char *ep = classend(ms, p); /* points to what is next */ 427 const char *ep = classend(ms, p); /* points to what is next */
427 int m = s<ms->src_end && singlematch(uchar(*s), p, ep); 428 int m = s < ms->src_end && singlematch(uchar(*s), p, ep);
428 switch (*ep) { 429 switch (*ep) {
429 case '?': { /* optional */ 430 case '?': { /* optional */
430 const char *res; 431 const char *res;
@@ -504,32 +505,36 @@ static int push_captures (MatchState *ms, const char *s, const char *e) {
504 505
505 506
506static int str_find_aux (lua_State *L, int find) { 507static int str_find_aux (lua_State *L, int find) {
507 size_t l1, l2; 508 size_t ls, lp;
508 const char *s = luaL_checklstring(L, 1, &l1); 509 const char *s = luaL_checklstring(L, 1, &ls);
509 const char *p = luaL_checklstring(L, 2, &l2); 510 const char *p = luaL_checklstring(L, 2, &lp);
510 size_t init = posrelat(luaL_optinteger(L, 3, 1), l1); 511 size_t init = posrelat(luaL_optinteger(L, 3, 1), ls);
511 if (init < 1) init = 1; 512 if (init < 1) init = 1;
512 else if (init > l1 + 1) { /* start after string's end? */ 513 else if (init > ls + 1) { /* start after string's end? */
513 lua_pushnil(L); /* cannot find anything */ 514 lua_pushnil(L); /* cannot find anything */
514 return 1; 515 return 1;
515 } 516 }
516 if (find && (lua_toboolean(L, 4) || /* explicit request? */ 517 if (find && (lua_toboolean(L, 4) || /* explicit request? */
517 strpbrk(p, SPECIALS) == NULL)) { /* or no special characters? */ 518 strpbrk(p, SPECIALS) == NULL)) { /* or no special characters? */
518 /* do a plain search */ 519 /* do a plain search */
519 const char *s2 = lmemfind(s + init - 1, l1 - init + 1, p, l2); 520 const char *s2 = lmemfind(s + init - 1, ls - init + 1, p, lp);
520 if (s2) { 521 if (s2) {
521 lua_pushinteger(L, s2 - s + 1); 522 lua_pushinteger(L, s2 - s + 1);
522 lua_pushinteger(L, s2 - s + l2); 523 lua_pushinteger(L, s2 - s + lp);
523 return 2; 524 return 2;
524 } 525 }
525 } 526 }
526 else { 527 else {
527 MatchState ms; 528 MatchState ms;
528 int anchor = (*p == '^') ? (p++, 1) : 0;
529 const char *s1 = s + init - 1; 529 const char *s1 = s + init - 1;
530 int anchor = (*p == '^');
531 if (anchor) {
532 p++; lp--; /* skip anchor character */
533 }
530 ms.L = L; 534 ms.L = L;
531 ms.src_init = s; 535 ms.src_init = s;
532 ms.src_end = s + l1; 536 ms.src_end = s + ls;
537 ms.p_end = p + lp;
533 do { 538 do {
534 const char *res; 539 const char *res;
535 ms.level = 0; 540 ms.level = 0;
@@ -561,13 +566,14 @@ static int str_match (lua_State *L) {
561 566
562static int gmatch_aux (lua_State *L) { 567static int gmatch_aux (lua_State *L) {
563 MatchState ms; 568 MatchState ms;
564 size_t ls; 569 size_t ls, lp;
565 const char *s = lua_tolstring(L, lua_upvalueindex(1), &ls); 570 const char *s = lua_tolstring(L, lua_upvalueindex(1), &ls);
566 const char *p = lua_tostring(L, lua_upvalueindex(2)); 571 const char *p = lua_tolstring(L, lua_upvalueindex(2), &lp);
567 const char *src; 572 const char *src;
568 ms.L = L; 573 ms.L = L;
569 ms.src_init = s; 574 ms.src_init = s;
570 ms.src_end = s+ls; 575 ms.src_end = s+ls;
576 ms.p_end = p + lp;
571 for (src = s + (size_t)lua_tointeger(L, lua_upvalueindex(3)); 577 for (src = s + (size_t)lua_tointeger(L, lua_upvalueindex(3));
572 src <= ms.src_end; 578 src <= ms.src_end;
573 src++) { 579 src++) {
@@ -659,12 +665,12 @@ static void add_value (MatchState *ms, luaL_Buffer *b, const char *s,
659 665
660 666
661static int str_gsub (lua_State *L) { 667static int str_gsub (lua_State *L) {
662 size_t srcl; 668 size_t srcl, lp;
663 const char *src = luaL_checklstring(L, 1, &srcl); 669 const char *src = luaL_checklstring(L, 1, &srcl);
664 const char *p = luaL_checkstring(L, 2); 670 const char *p = luaL_checklstring(L, 2, &lp);
665 int tr = lua_type(L, 3); 671 int tr = lua_type(L, 3);
666 size_t max_s = luaL_optinteger(L, 4, srcl+1); 672 size_t max_s = luaL_optinteger(L, 4, srcl+1);
667 int anchor = (*p == '^') ? (p++, 1) : 0; 673 int anchor = (*p == '^');
668 size_t n = 0; 674 size_t n = 0;
669 MatchState ms; 675 MatchState ms;
670 luaL_Buffer b; 676 luaL_Buffer b;
@@ -672,9 +678,13 @@ static int str_gsub (lua_State *L) {
672 tr == LUA_TFUNCTION || tr == LUA_TTABLE, 3, 678 tr == LUA_TFUNCTION || tr == LUA_TTABLE, 3,
673 "string/function/table expected"); 679 "string/function/table expected");
674 luaL_buffinit(L, &b); 680 luaL_buffinit(L, &b);
681 if (anchor) {
682 p++; lp--; /* skip anchor character */
683 }
675 ms.L = L; 684 ms.L = L;
676 ms.src_init = src; 685 ms.src_init = src;
677 ms.src_end = src+srcl; 686 ms.src_end = src+srcl;
687 ms.p_end = p + lp;
678 while (n < max_s) { 688 while (n < max_s) {
679 const char *e; 689 const char *e;
680 ms.level = 0; 690 ms.level = 0;