diff options
author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2000-12-04 12:43:06 -0200 |
---|---|---|
committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2000-12-04 12:43:06 -0200 |
commit | 10ac68c648e0e1d23fe5485bc711df8fc71b6ae3 (patch) | |
tree | da07f262771a48f039049604a2ec3eb0728e4af5 /lstrlib.c | |
parent | 01b00cc29261579600ce414b470c339510ac49d5 (diff) | |
download | lua-10ac68c648e0e1d23fe5485bc711df8fc71b6ae3.tar.gz lua-10ac68c648e0e1d23fe5485bc711df8fc71b6ae3.tar.bz2 lua-10ac68c648e0e1d23fe5485bc711df8fc71b6ae3.zip |
first implementation for position captures
Diffstat (limited to 'lstrlib.c')
-rw-r--r-- | lstrlib.c | 52 |
1 files changed, 35 insertions, 17 deletions
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | ** $Id: lstrlib.c,v 1.57 2000/11/23 13:49:35 roberto Exp roberto $ | 2 | ** $Id: lstrlib.c,v 1.58 2000/11/24 17:39:56 roberto Exp roberto $ |
3 | ** Standard library for string operations and pattern-matching | 3 | ** Standard library for string operations and pattern-matching |
4 | ** See Copyright Notice in lua.h | 4 | ** See Copyright Notice in lua.h |
5 | */ | 5 | */ |
@@ -123,12 +123,16 @@ static int str_char (lua_State *L) { | |||
123 | #endif | 123 | #endif |
124 | 124 | ||
125 | 125 | ||
126 | #define CAP_UNFINISHED (-1) | ||
127 | #define CAP_POSITION (-2) | ||
128 | |||
126 | typedef struct MatchState { | 129 | typedef struct MatchState { |
130 | const char *src_init; /* init of source string */ | ||
127 | const char *src_end; /* end ('\0') of source string */ | 131 | const char *src_end; /* end ('\0') of source string */ |
128 | int level; /* total number of captures (finished or unfinished) */ | 132 | int level; /* total number of captures (finished or unfinished) */ |
129 | struct { | 133 | struct { |
130 | const char *init; | 134 | const char *init; |
131 | sint32 len; /* -1 signals unfinished capture */ | 135 | sint32 len; |
132 | } capture[MAX_CAPTURES]; | 136 | } capture[MAX_CAPTURES]; |
133 | lua_State *L; | 137 | lua_State *L; |
134 | } MatchState; | 138 | } MatchState; |
@@ -140,7 +144,7 @@ typedef struct MatchState { | |||
140 | 144 | ||
141 | static int check_capture (MatchState *ms, int l) { | 145 | static int check_capture (MatchState *ms, int l) { |
142 | l -= '1'; | 146 | l -= '1'; |
143 | if (!(0 <= l && l < ms->level && ms->capture[l].len != -1)) | 147 | if (l < 0 || l >= ms->level || ms->capture[l].len == CAP_UNFINISHED) |
144 | lua_error(ms->L, "invalid capture index"); | 148 | lua_error(ms->L, "invalid capture index"); |
145 | return l; | 149 | return l; |
146 | } | 150 | } |
@@ -149,7 +153,7 @@ static int check_capture (MatchState *ms, int l) { | |||
149 | static int capture_to_close (MatchState *ms) { | 153 | static int capture_to_close (MatchState *ms) { |
150 | int level = ms->level; | 154 | int level = ms->level; |
151 | for (level--; level>=0; level--) | 155 | for (level--; level>=0; level--) |
152 | if (ms->capture[level].len == -1) return level; | 156 | if (ms->capture[level].len == CAP_UNFINISHED) return level; |
153 | lua_error(ms->L, "invalid pattern capture"); | 157 | lua_error(ms->L, "invalid pattern capture"); |
154 | return 0; /* to avoid warnings */ | 158 | return 0; /* to avoid warnings */ |
155 | } | 159 | } |
@@ -279,14 +283,15 @@ static const char *min_expand (MatchState *ms, const char *s, const char *p, | |||
279 | } | 283 | } |
280 | 284 | ||
281 | 285 | ||
282 | static const char *start_capture (MatchState *ms, const char *s, const char *p){ | 286 | static const char *start_capture (MatchState *ms, const char *s, |
287 | const char *p, int what) { | ||
283 | const char *res; | 288 | const char *res; |
284 | int level = ms->level; | 289 | int level = ms->level; |
285 | if (level >= MAX_CAPTURES) lua_error(ms->L, "too many captures"); | 290 | if (level >= MAX_CAPTURES) lua_error(ms->L, "too many captures"); |
286 | ms->capture[level].init = s; | 291 | ms->capture[level].init = s; |
287 | ms->capture[level].len = -1; | 292 | ms->capture[level].len = what; |
288 | ms->level = level+1; | 293 | ms->level = level+1; |
289 | if ((res=match(ms, s, p+1)) == NULL) /* match failed? */ | 294 | if ((res=match(ms, s, p)) == NULL) /* match failed? */ |
290 | ms->level--; /* undo capture */ | 295 | ms->level--; /* undo capture */ |
291 | return res; | 296 | return res; |
292 | } | 297 | } |
@@ -296,8 +301,8 @@ static const char *end_capture (MatchState *ms, const char *s, const char *p) { | |||
296 | int l = capture_to_close(ms); | 301 | int l = capture_to_close(ms); |
297 | const char *res; | 302 | const char *res; |
298 | ms->capture[l].len = s - ms->capture[l].init; /* close capture */ | 303 | ms->capture[l].len = s - ms->capture[l].init; /* close capture */ |
299 | if ((res = match(ms, s, p+1)) == NULL) /* match failed? */ | 304 | if ((res = match(ms, s, p)) == NULL) /* match failed? */ |
300 | ms->capture[l].len = -1; /* undo capture */ | 305 | ms->capture[l].len = CAP_UNFINISHED; /* undo capture */ |
301 | return res; | 306 | return res; |
302 | } | 307 | } |
303 | 308 | ||
@@ -316,9 +321,12 @@ static const char *match (MatchState *ms, const char *s, const char *p) { | |||
316 | init: /* using goto's to optimize tail recursion */ | 321 | init: /* using goto's to optimize tail recursion */ |
317 | switch (*p) { | 322 | switch (*p) { |
318 | case '(': /* start capture */ | 323 | case '(': /* start capture */ |
319 | return start_capture(ms, s, p); | 324 | if (*(p+1) == ')') /* position capture? */ |
325 | return start_capture(ms, s, p+2, CAP_POSITION); | ||
326 | else | ||
327 | return start_capture(ms, s, p+1, CAP_UNFINISHED); | ||
320 | case ')': /* end capture */ | 328 | case ')': /* end capture */ |
321 | return end_capture(ms, s, p); | 329 | return end_capture(ms, s, p+1); |
322 | case ESC: /* may be %[0-9] or %b */ | 330 | case ESC: /* may be %[0-9] or %b */ |
323 | if (isdigit((unsigned char)(*(p+1)))) { /* capture? */ | 331 | if (isdigit((unsigned char)(*(p+1)))) { /* capture? */ |
324 | s = match_capture(ms, s, *(p+1)); | 332 | s = match_capture(ms, s, *(p+1)); |
@@ -385,14 +393,21 @@ static const char *lmemfind (const char *s1, size_t l1, | |||
385 | } | 393 | } |
386 | 394 | ||
387 | 395 | ||
396 | static void push_onecapture (MatchState *ms, int i) { | ||
397 | int l = ms->capture[i].len; | ||
398 | if (l == CAP_UNFINISHED) lua_error(ms->L, "unfinished capture"); | ||
399 | if (l == CAP_POSITION) | ||
400 | lua_pushnumber(ms->L, ms->capture[i].init - ms->src_init + 1); | ||
401 | else | ||
402 | lua_pushlstring(ms->L, ms->capture[i].init, l); | ||
403 | } | ||
404 | |||
405 | |||
388 | static int push_captures (MatchState *ms) { | 406 | static int push_captures (MatchState *ms) { |
389 | int i; | 407 | int i; |
390 | luaL_checkstack(ms->L, ms->level, "too many captures"); | 408 | luaL_checkstack(ms->L, ms->level, "too many captures"); |
391 | for (i=0; i<ms->level; i++) { | 409 | for (i=0; i<ms->level; i++) |
392 | int l = ms->capture[i].len; | 410 | push_onecapture(ms, i); |
393 | if (l == -1) lua_error(ms->L, "unfinished capture"); | ||
394 | lua_pushlstring(ms->L, ms->capture[i].init, l); | ||
395 | } | ||
396 | return ms->level; /* number of strings pushed */ | 411 | return ms->level; /* number of strings pushed */ |
397 | } | 412 | } |
398 | 413 | ||
@@ -417,6 +432,7 @@ static int str_find (lua_State *L) { | |||
417 | int anchor = (*p == '^') ? (p++, 1) : 0; | 432 | int anchor = (*p == '^') ? (p++, 1) : 0; |
418 | const char *s1=s+init; | 433 | const char *s1=s+init; |
419 | ms.L = L; | 434 | ms.L = L; |
435 | ms.src_init = s; | ||
420 | ms.src_end = s+l1; | 436 | ms.src_end = s+l1; |
421 | do { | 437 | do { |
422 | const char *res; | 438 | const char *res; |
@@ -448,7 +464,8 @@ static void add_s (MatchState *ms, luaL_Buffer *b) { | |||
448 | luaL_putchar(b, news[i]); | 464 | luaL_putchar(b, news[i]); |
449 | else { | 465 | else { |
450 | int level = check_capture(ms, news[i]); | 466 | int level = check_capture(ms, news[i]); |
451 | luaL_addlstring(b, ms->capture[level].init, ms->capture[level].len); | 467 | push_onecapture(ms, level); |
468 | luaL_addvalue(b); /* add capture to accumulated result */ | ||
452 | } | 469 | } |
453 | } | 470 | } |
454 | } | 471 | } |
@@ -480,6 +497,7 @@ static int str_gsub (lua_State *L) { | |||
480 | 3, "string or function expected"); | 497 | 3, "string or function expected"); |
481 | luaL_buffinit(L, &b); | 498 | luaL_buffinit(L, &b); |
482 | ms.L = L; | 499 | ms.L = L; |
500 | ms.src_init = src; | ||
483 | ms.src_end = src+srcl; | 501 | ms.src_end = src+srcl; |
484 | while (n < max_s) { | 502 | while (n < max_s) { |
485 | const char *e; | 503 | const char *e; |