aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoberto Ierusalimschy <roberto@inf.puc-rio.br>2019-01-08 14:22:32 -0200
committerRoberto Ierusalimschy <roberto@inf.puc-rio.br>2019-01-08 14:22:32 -0200
commit264659bd53e92969a1e17d65c0266597cde24b5d (patch)
tree6aba62d2b6ac2a46dc064ea7193c8134200a7d57
parent4ace93ca6502dd1da38d5c06fa099d229e791ba8 (diff)
downloadlua-264659bd53e92969a1e17d65c0266597cde24b5d.tar.gz
lua-264659bd53e92969a1e17d65c0266597cde24b5d.tar.bz2
lua-264659bd53e92969a1e17d65c0266597cde24b5d.zip
Optional 'init' argument to 'string.gmatch'
The function 'string.gmatch' now has an optional 'init' argument, similar to 'string.find' and 'string.match'. Moreover, there was some reorganization in the manipulation of indices in the string library. This commit also includes small janitorial work in the manual and in comments in the interpreter loop.
-rw-r--r--lstrlib.c76
-rw-r--r--lvm.c14
-rw-r--r--manual/manual.of42
-rw-r--r--testes/pm.lua29
-rw-r--r--testes/strings.lua5
-rw-r--r--testes/tpack.lua2
6 files changed, 116 insertions, 52 deletions
diff --git a/lstrlib.c b/lstrlib.c
index dde868c0..41ebc523 100644
--- a/lstrlib.c
+++ b/lstrlib.c
@@ -60,23 +60,50 @@ static int str_len (lua_State *L) {
60} 60}
61 61
62 62
63/* translate a relative string position: negative means back from end */ 63/*
64static lua_Integer posrelat (lua_Integer pos, size_t len) { 64** translate a relative initial string position
65 if (pos >= 0) return pos; 65** (negative means back from end): clip result to [1, inf).
66 else if (0u - (size_t)pos > len) return 0; 66** The length of any string in Lua must fit in a lua_Integer,
67 else return (lua_Integer)len + pos + 1; 67** so there are no overflows in the casts.
68** The inverted comparison avoids a possible overflow
69** computing '-pos'.
70*/
71static size_t posrelatI (lua_Integer pos, size_t len) {
72 if (pos > 0)
73 return (size_t)pos;
74 else if (pos == 0)
75 return 1;
76 else if (pos < -(lua_Integer)len) /* inverted comparison */
77 return 1; /* clip to 1 */
78 else return len + (size_t)pos + 1;
79}
80
81
82/*
83** Gets an optional ending string position from argument 'arg',
84** with default value 'def'.
85** Negative means back from end: clip result to [0, len]
86*/
87static size_t getendpos (lua_State *L, int arg, lua_Integer def,
88 size_t len) {
89 lua_Integer pos = luaL_optinteger(L, arg, def);
90 if (pos > (lua_Integer)len)
91 return len;
92 else if (pos >= 0)
93 return (size_t)pos;
94 else if (pos < -(lua_Integer)len)
95 return 0;
96 else return len + (size_t)pos + 1;
68} 97}
69 98
70 99
71static int str_sub (lua_State *L) { 100static int str_sub (lua_State *L) {
72 size_t l; 101 size_t l;
73 const char *s = luaL_checklstring(L, 1, &l); 102 const char *s = luaL_checklstring(L, 1, &l);
74 lua_Integer start = posrelat(luaL_checkinteger(L, 2), l); 103 size_t start = posrelatI(luaL_checkinteger(L, 2), l);
75 lua_Integer end = posrelat(luaL_optinteger(L, 3, -1), l); 104 size_t end = getendpos(L, 3, -1, l);
76 if (start < 1) start = 1;
77 if (end > (lua_Integer)l) end = l;
78 if (start <= end) 105 if (start <= end)
79 lua_pushlstring(L, s + start - 1, (size_t)(end - start) + 1); 106 lua_pushlstring(L, s + start - 1, (end - start) + 1);
80 else lua_pushliteral(L, ""); 107 else lua_pushliteral(L, "");
81 return 1; 108 return 1;
82} 109}
@@ -149,11 +176,10 @@ static int str_rep (lua_State *L) {
149static int str_byte (lua_State *L) { 176static int str_byte (lua_State *L) {
150 size_t l; 177 size_t l;
151 const char *s = luaL_checklstring(L, 1, &l); 178 const char *s = luaL_checklstring(L, 1, &l);
152 lua_Integer posi = posrelat(luaL_optinteger(L, 2, 1), l); 179 lua_Integer pi = luaL_optinteger(L, 2, 1);
153 lua_Integer pose = posrelat(luaL_optinteger(L, 3, posi), l); 180 size_t posi = posrelatI(pi, l);
181 size_t pose = getendpos(L, 3, pi, l);
154 int n, i; 182 int n, i;
155 if (posi < 1) posi = 1;
156 if (pose > (lua_Integer)l) pose = l;
157 if (posi > pose) return 0; /* empty interval; return no values */ 183 if (posi > pose) return 0; /* empty interval; return no values */
158 if (pose - posi >= INT_MAX) /* arithmetic overflow? */ 184 if (pose - posi >= INT_MAX) /* arithmetic overflow? */
159 return luaL_error(L, "string slice too long"); 185 return luaL_error(L, "string slice too long");
@@ -171,8 +197,8 @@ static int str_char (lua_State *L) {
171 luaL_Buffer b; 197 luaL_Buffer b;
172 char *p = luaL_buffinitsize(L, &b, n); 198 char *p = luaL_buffinitsize(L, &b, n);
173 for (i=1; i<=n; i++) { 199 for (i=1; i<=n; i++) {
174 lua_Integer c = luaL_checkinteger(L, i); 200 lua_Unsigned c = (lua_Unsigned)luaL_checkinteger(L, i);
175 luaL_argcheck(L, uchar(c) == c, i, "value out of range"); 201 luaL_argcheck(L, c <= (lua_Unsigned)UCHAR_MAX, i, "value out of range");
176 p[i - 1] = uchar(c); 202 p[i - 1] = uchar(c);
177 } 203 }
178 luaL_pushresultsize(&b, n); 204 luaL_pushresultsize(&b, n);
@@ -695,16 +721,15 @@ static int str_find_aux (lua_State *L, int find) {
695 size_t ls, lp; 721 size_t ls, lp;
696 const char *s = luaL_checklstring(L, 1, &ls); 722 const char *s = luaL_checklstring(L, 1, &ls);
697 const char *p = luaL_checklstring(L, 2, &lp); 723 const char *p = luaL_checklstring(L, 2, &lp);
698 lua_Integer init = posrelat(luaL_optinteger(L, 3, 1), ls); 724 size_t init = posrelatI(luaL_optinteger(L, 3, 1), ls) - 1;
699 if (init < 1) init = 1; 725 if (init > ls) { /* start after string's end? */
700 else if (init > (lua_Integer)ls + 1) { /* start after string's end? */
701 lua_pushnil(L); /* cannot find anything */ 726 lua_pushnil(L); /* cannot find anything */
702 return 1; 727 return 1;
703 } 728 }
704 /* explicit request or no special characters? */ 729 /* explicit request or no special characters? */
705 if (find && (lua_toboolean(L, 4) || nospecials(p, lp))) { 730 if (find && (lua_toboolean(L, 4) || nospecials(p, lp))) {
706 /* do a plain search */ 731 /* do a plain search */
707 const char *s2 = lmemfind(s + init - 1, ls - (size_t)init + 1, p, lp); 732 const char *s2 = lmemfind(s + init, ls - init, p, lp);
708 if (s2) { 733 if (s2) {
709 lua_pushinteger(L, (s2 - s) + 1); 734 lua_pushinteger(L, (s2 - s) + 1);
710 lua_pushinteger(L, (s2 - s) + lp); 735 lua_pushinteger(L, (s2 - s) + lp);
@@ -713,7 +738,7 @@ static int str_find_aux (lua_State *L, int find) {
713 } 738 }
714 else { 739 else {
715 MatchState ms; 740 MatchState ms;
716 const char *s1 = s + init - 1; 741 const char *s1 = s + init;
717 int anchor = (*p == '^'); 742 int anchor = (*p == '^');
718 if (anchor) { 743 if (anchor) {
719 p++; lp--; /* skip anchor character */ 744 p++; lp--; /* skip anchor character */
@@ -777,11 +802,14 @@ static int gmatch (lua_State *L) {
777 size_t ls, lp; 802 size_t ls, lp;
778 const char *s = luaL_checklstring(L, 1, &ls); 803 const char *s = luaL_checklstring(L, 1, &ls);
779 const char *p = luaL_checklstring(L, 2, &lp); 804 const char *p = luaL_checklstring(L, 2, &lp);
805 size_t init = posrelatI(luaL_optinteger(L, 3, 1), ls) - 1;
780 GMatchState *gm; 806 GMatchState *gm;
781 lua_settop(L, 2); /* keep them on closure to avoid being collected */ 807 lua_settop(L, 2); /* keep strings on closure to avoid being collected */
782 gm = (GMatchState *)lua_newuserdatauv(L, sizeof(GMatchState), 0); 808 gm = (GMatchState *)lua_newuserdatauv(L, sizeof(GMatchState), 0);
809 if (init > ls) /* start after string's end? */
810 init = ls + 1; /* avoid overflows in 's + init' */
783 prepstate(&gm->ms, L, s, ls, p, lp); 811 prepstate(&gm->ms, L, s, ls, p, lp);
784 gm->src = s; gm->p = p; gm->lastmatch = NULL; 812 gm->src = s + init; gm->p = p; gm->lastmatch = NULL;
785 lua_pushcclosure(L, gmatch_aux, 3); 813 lua_pushcclosure(L, gmatch_aux, 3);
786 return 1; 814 return 1;
787} 815}
@@ -1572,7 +1600,7 @@ static int str_unpack (lua_State *L) {
1572 const char *fmt = luaL_checkstring(L, 1); 1600 const char *fmt = luaL_checkstring(L, 1);
1573 size_t ld; 1601 size_t ld;
1574 const char *data = luaL_checklstring(L, 2, &ld); 1602 const char *data = luaL_checklstring(L, 2, &ld);
1575 size_t pos = (size_t)posrelat(luaL_optinteger(L, 3, 1), ld) - 1; 1603 size_t pos = posrelatI(luaL_optinteger(L, 3, 1), ld) - 1;
1576 int n = 0; /* number of results */ 1604 int n = 0; /* number of results */
1577 luaL_argcheck(L, pos <= ld, 3, "initial position out of string"); 1605 luaL_argcheck(L, pos <= ld, 3, "initial position out of string");
1578 initheader(L, &h); 1606 initheader(L, &h);
diff --git a/lvm.c b/lvm.c
index 652095dc..23e7ff70 100644
--- a/lvm.c
+++ b/lvm.c
@@ -991,7 +991,8 @@ void luaV_finishOp (lua_State *L) {
991 991
992/* 992/*
993** Protect code that will finish the loop (returns) or can only raise 993** Protect code that will finish the loop (returns) or can only raise
994** errors. 994** errors. (That is, it will not return to the interpreter main loop
995** after changing the stack or hooks.)
995*/ 996*/
996#define halfProtect(exp) (savepc(L), (exp)) 997#define halfProtect(exp) (savepc(L), (exp))
997 998
@@ -1607,7 +1608,7 @@ void luaV_execute (lua_State *L, CallInfo *ci) {
1607 L->top = ra; 1608 L->top = ra;
1608 halfProtect(luaD_poscall(L, ci, 0)); /* no hurry... */ 1609 halfProtect(luaD_poscall(L, ci, 0)); /* no hurry... */
1609 } 1610 }
1610 else { 1611 else { /* do the 'poscall' here */
1611 int nres = ci->nresults; 1612 int nres = ci->nresults;
1612 L->ci = ci->previous; /* back to caller */ 1613 L->ci = ci->previous; /* back to caller */
1613 L->top = base - 1; 1614 L->top = base - 1;
@@ -1621,7 +1622,7 @@ void luaV_execute (lua_State *L, CallInfo *ci) {
1621 L->top = ra + 1; 1622 L->top = ra + 1;
1622 halfProtect(luaD_poscall(L, ci, 1)); /* no hurry... */ 1623 halfProtect(luaD_poscall(L, ci, 1)); /* no hurry... */
1623 } 1624 }
1624 else { 1625 else { /* do the 'poscall' here */
1625 int nres = ci->nresults; 1626 int nres = ci->nresults;
1626 L->ci = ci->previous; /* back to caller */ 1627 L->ci = ci->previous; /* back to caller */
1627 if (nres == 0) 1628 if (nres == 0)
@@ -1652,8 +1653,8 @@ void luaV_execute (lua_State *L, CallInfo *ci) {
1652 lua_Integer ilimit, initv; 1653 lua_Integer ilimit, initv;
1653 int stopnow; 1654 int stopnow;
1654 if (unlikely(!forlimit(plimit, &ilimit, 1, &stopnow))) { 1655 if (unlikely(!forlimit(plimit, &ilimit, 1, &stopnow))) {
1655 savestate(L, ci); /* for the error message */ 1656 savestate(L, ci); /* for the error message */
1656 luaG_forerror(L, plimit, "limit"); 1657 luaG_forerror(L, plimit, "limit");
1657 } 1658 }
1658 initv = (stopnow ? 0 : ivalue(init)); 1659 initv = (stopnow ? 0 : ivalue(init));
1659 setivalue(plimit, ilimit); 1660 setivalue(plimit, ilimit);
@@ -1717,8 +1718,7 @@ void luaV_execute (lua_State *L, CallInfo *ci) {
1717 vmbreak; 1718 vmbreak;
1718 } 1719 }
1719 vmcase(OP_TFORPREP) { 1720 vmcase(OP_TFORPREP) {
1720 /* is 'toclose' not nil? */ 1721 if (!ttisnil(s2v(ra + 3))) { /* is 'toclose' not nil? */
1721 if (!ttisnil(s2v(ra + 3))) {
1722 /* create to-be-closed upvalue for it */ 1722 /* create to-be-closed upvalue for it */
1723 halfProtect(luaF_newtbcupval(L, ra + 3)); 1723 halfProtect(luaF_newtbcupval(L, ra + 3));
1724 } 1724 }
diff --git a/manual/manual.of b/manual/manual.of
index b9ab1ebe..421d04de 100644
--- a/manual/manual.of
+++ b/manual/manual.of
@@ -83,25 +83,10 @@ it usually represents the absence of a useful value.
83The type @emph{boolean} has two values, @false and @true. 83The type @emph{boolean} has two values, @false and @true.
84Both @nil and @false make a condition false; 84Both @nil and @false make a condition false;
85any other value makes it true. 85any other value makes it true.
86The type @emph{number} represents both
87integer numbers and real (floating-point) numbers.
88The type @emph{string} represents immutable sequences of bytes.
89@index{eight-bit clean}
90Lua is 8-bit clean:
91strings can contain any 8-bit value,
92including @x{embedded zeros} (@Char{\0}).
93Lua is also encoding-agnostic;
94it makes no assumptions about the contents of a string.
95 86
96The type @emph{number} uses two internal representations, 87The type @emph{number} represents both
97or two @x{subtypes}, 88integer numbers and real (floating-point) numbers,
98one called @def{integer} and the other called @def{float}. 89using two @x{subtypes}: @def{integer} and @def{float}.
99Lua has explicit rules about when each representation is used,
100but it also converts between them automatically as needed @see{coercion}.
101Therefore,
102the programmer may choose to mostly ignore the difference
103between integers and floats
104or to assume complete control over the representation of each number.
105Standard Lua uses 64-bit integers and double-precision (64-bit) floats, 90Standard Lua uses 64-bit integers and double-precision (64-bit) floats,
106but you can also compile Lua so that it 91but you can also compile Lua so that it
107uses 32-bit integers and/or single-precision (32-bit) floats. 92uses 32-bit integers and/or single-precision (32-bit) floats.
@@ -110,6 +95,22 @@ is particularly attractive
110for small machines and embedded systems. 95for small machines and embedded systems.
111(See macro @id{LUA_32BITS} in file @id{luaconf.h}.) 96(See macro @id{LUA_32BITS} in file @id{luaconf.h}.)
112 97
98Lua has explicit rules about when each subtype is used,
99but it also converts between them automatically as needed @see{coercion}.
100Therefore,
101the programmer may choose to mostly ignore the difference
102between integers and floats
103or to assume complete control over the representation of each number.
104
105The type @emph{string} represents immutable sequences of bytes.
106@index{eight-bit clean}
107Lua is 8-bit clean:
108strings can contain any 8-bit value,
109including @x{embedded zeros} (@Char{\0}).
110Lua is also encoding-agnostic;
111it makes no assumptions about the contents of a string.
112The length of any string in Lua must fit in a Lua integer.
113
113Lua can call (and manipulate) functions written in Lua and 114Lua can call (and manipulate) functions written in Lua and
114functions written in C @see{functioncall}. 115functions written in C @see{functioncall}.
115Both are represented by the type @emph{function}. 116Both are represented by the type @emph{function}.
@@ -6788,13 +6789,16 @@ the string argument should not contain @x{embedded zeros}.
6788 6789
6789} 6790}
6790 6791
6791@LibEntry{string.gmatch (s, pattern)| 6792@LibEntry{string.gmatch (s, pattern [, init])|
6792Returns an iterator function that, 6793Returns an iterator function that,
6793each time it is called, 6794each time it is called,
6794returns the next captures from @id{pattern} @see{pm} 6795returns the next captures from @id{pattern} @see{pm}
6795over the string @id{s}. 6796over the string @id{s}.
6796If @id{pattern} specifies no captures, 6797If @id{pattern} specifies no captures,
6797then the whole match is produced in each call. 6798then the whole match is produced in each call.
6799A third, optional numeric argument @id{init} specifies
6800where to start the search;
6801its default value @N{is 1} and can be negative.
6798 6802
6799As an example, the following loop 6803As an example, the following loop
6800will iterate over all the words from string @id{s}, 6804will iterate over all the words from string @id{s},
diff --git a/testes/pm.lua b/testes/pm.lua
index 1afaccf6..8cc8772e 100644
--- a/testes/pm.lua
+++ b/testes/pm.lua
@@ -297,6 +297,35 @@ for k,v in pairs(t) do assert(k+1 == v+0); a=a+1 end
297assert(a == 3) 297assert(a == 3)
298 298
299 299
300do -- init parameter in gmatch
301 local s = 0
302 for k in string.gmatch("10 20 30", "%d+", 3) do
303 s = s + tonumber(k)
304 end
305 assert(s == 50)
306
307 s = 0
308 for k in string.gmatch("11 21 31", "%d+", -4) do
309 s = s + tonumber(k)
310 end
311 assert(s == 32)
312
313 -- there is an empty string at the end of the subject
314 s = 0
315 for k in string.gmatch("11 21 31", "%w*", 9) do
316 s = s + 1
317 end
318 assert(s == 1)
319
320 -- there are no empty strings after the end of the subject
321 s = 0
322 for k in string.gmatch("11 21 31", "%w*", 10) do
323 s = s + 1
324 end
325 assert(s == 0)
326end
327
328
300-- tests for `%f' (`frontiers') 329-- tests for `%f' (`frontiers')
301 330
302assert(string.gsub("aaa aa a aaa a", "%f[%w]a", "x") == "xaa xa x xaa x") 331assert(string.gsub("aaa aa a aaa a", "%f[%w]a", "x") == "xaa xa x xaa x")
diff --git a/testes/strings.lua b/testes/strings.lua
index 587a0e06..88480924 100644
--- a/testes/strings.lua
+++ b/testes/strings.lua
@@ -94,6 +94,11 @@ assert(string.char(string.byte("\xe4l\0ķu", 1, -1)) == "\xe4l\0ķu")
94assert(string.char(string.byte("\xe4l\0ķu", 1, 0)) == "") 94assert(string.char(string.byte("\xe4l\0ķu", 1, 0)) == "")
95assert(string.char(string.byte("\xe4l\0ķu", -10, 100)) == "\xe4l\0ķu") 95assert(string.char(string.byte("\xe4l\0ķu", -10, 100)) == "\xe4l\0ķu")
96 96
97checkerror("out of range", string.char, 256)
98checkerror("out of range", string.char, -1)
99checkerror("out of range", string.char, math.maxinteger)
100checkerror("out of range", string.char, math.mininteger)
101
97assert(string.upper("ab\0c") == "AB\0C") 102assert(string.upper("ab\0c") == "AB\0C")
98assert(string.lower("\0ABCc%$") == "\0abcc%$") 103assert(string.lower("\0ABCc%$") == "\0abcc%$")
99assert(string.rep('teste', 0) == '') 104assert(string.rep('teste', 0) == '')
diff --git a/testes/tpack.lua b/testes/tpack.lua
index 4c5fc7f7..2b9953f8 100644
--- a/testes/tpack.lua
+++ b/testes/tpack.lua
@@ -314,9 +314,7 @@ do -- testing initial position
314 for i = 1, #x + 1 do 314 for i = 1, #x + 1 do
315 assert(unpack("c0", x, i) == "") 315 assert(unpack("c0", x, i) == "")
316 end 316 end
317 checkerror("out of string", unpack, "c0", x, 0)
318 checkerror("out of string", unpack, "c0", x, #x + 2) 317 checkerror("out of string", unpack, "c0", x, #x + 2)
319 checkerror("out of string", unpack, "c0", x, -(#x + 1))
320 318
321end 319end
322 320