diff options
author | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2023-11-09 17:05:42 -0300 |
---|---|---|
committer | Roberto Ierusalimschy <roberto@inf.puc-rio.br> | 2023-11-09 17:05:42 -0300 |
commit | 024f9064f1b43758eb36aba52547edc0312bf4ba (patch) | |
tree | 9d8609112058e885196a581f0736fbdd94f7f94d | |
parent | 7f4906f565ab9f8b1125107a3abae3d759f3ecf2 (diff) | |
download | lua-024f9064f1b43758eb36aba52547edc0312bf4ba.tar.gz lua-024f9064f1b43758eb36aba52547edc0312bf4ba.tar.bz2 lua-024f9064f1b43758eb36aba52547edc0312bf4ba.zip |
External strings
Strings can use external buffers to store their contents.
Diffstat (limited to '')
-rw-r--r-- | lapi.c | 14 | ||||
-rw-r--r-- | lgc.c | 4 | ||||
-rw-r--r-- | lobject.h | 8 | ||||
-rw-r--r-- | lstring.c | 78 | ||||
-rw-r--r-- | lstring.h | 10 | ||||
-rw-r--r-- | ltests.c | 33 | ||||
-rw-r--r-- | lua.h | 2 | ||||
-rw-r--r-- | manual/manual.of | 34 | ||||
-rw-r--r-- | testes/strings.lua | 26 |
9 files changed, 195 insertions, 14 deletions
@@ -535,6 +535,20 @@ LUA_API const char *lua_pushlstring (lua_State *L, const char *s, size_t len) { | |||
535 | } | 535 | } |
536 | 536 | ||
537 | 537 | ||
538 | LUA_API const char *lua_pushextlstring (lua_State *L, | ||
539 | const char *s, size_t len, lua_Alloc falloc, void *ud) { | ||
540 | TString *ts; | ||
541 | lua_lock(L); | ||
542 | api_check(L, s[len] == '\0', "string not ending with zero"); | ||
543 | ts = luaS_newextlstr (L, s, len, falloc, ud); | ||
544 | setsvalue2s(L, L->top.p, ts); | ||
545 | api_incr_top(L); | ||
546 | luaC_checkGC(L); | ||
547 | lua_unlock(L); | ||
548 | return getstr(ts); | ||
549 | } | ||
550 | |||
551 | |||
538 | LUA_API const char *lua_pushstring (lua_State *L, const char *s) { | 552 | LUA_API const char *lua_pushstring (lua_State *L, const char *s) { |
539 | lua_lock(L); | 553 | lua_lock(L); |
540 | if (s == NULL) | 554 | if (s == NULL) |
@@ -813,7 +813,9 @@ static void freeobj (lua_State *L, GCObject *o) { | |||
813 | } | 813 | } |
814 | case LUA_VLNGSTR: { | 814 | case LUA_VLNGSTR: { |
815 | TString *ts = gco2ts(o); | 815 | TString *ts = gco2ts(o); |
816 | luaM_freemem(L, ts, sizestrlng(ts->u.lnglen)); | 816 | if (ts->shrlen == LSTRMEM) /* must free external string? */ |
817 | (*ts->falloc)(ts->ud, ts->contents, ts->u.lnglen + 1, 0); | ||
818 | luaM_freemem(L, ts, luaS_sizelngstr(ts->u.lnglen, ts->shrlen)); | ||
817 | break; | 819 | break; |
818 | } | 820 | } |
819 | default: lua_assert(0); | 821 | default: lua_assert(0); |
@@ -382,6 +382,12 @@ typedef struct GCObject { | |||
382 | #define setsvalue2n setsvalue | 382 | #define setsvalue2n setsvalue |
383 | 383 | ||
384 | 384 | ||
385 | /* Kinds of long strings (stored in 'shrlen') */ | ||
386 | #define LSTRREG -1 /* regular long string */ | ||
387 | #define LSTRFIX -2 /* fixed external long string */ | ||
388 | #define LSTRMEM -3 /* external long string with deallocation */ | ||
389 | |||
390 | |||
385 | /* | 391 | /* |
386 | ** Header for a string value. | 392 | ** Header for a string value. |
387 | */ | 393 | */ |
@@ -395,6 +401,8 @@ typedef struct TString { | |||
395 | struct TString *hnext; /* linked list for hash table */ | 401 | struct TString *hnext; /* linked list for hash table */ |
396 | } u; | 402 | } u; |
397 | char *contents; /* pointer to content in long strings */ | 403 | char *contents; /* pointer to content in long strings */ |
404 | lua_Alloc falloc; /* deallocation function for external strings */ | ||
405 | void *ud; /* user data for external strings */ | ||
398 | } TString; | 406 | } TString; |
399 | 407 | ||
400 | 408 | ||
@@ -136,6 +136,20 @@ void luaS_init (lua_State *L) { | |||
136 | } | 136 | } |
137 | 137 | ||
138 | 138 | ||
139 | size_t luaS_sizelngstr (size_t len, int kind) { | ||
140 | switch (kind) { | ||
141 | case LSTRREG: /* regular long string */ | ||
142 | /* don't need 'falloc'/'ud', but need space for content */ | ||
143 | return offsetof(TString, falloc) + (len + 1) * sizeof(char); | ||
144 | case LSTRFIX: /* fixed external long string */ | ||
145 | /* don't need 'falloc'/'ud' */ | ||
146 | return offsetof(TString, falloc); | ||
147 | default: /* external long string with deallocation */ | ||
148 | lua_assert(kind == LSTRMEM); | ||
149 | return sizeof(TString); | ||
150 | } | ||
151 | } | ||
152 | |||
139 | 153 | ||
140 | /* | 154 | /* |
141 | ** creates a new string object | 155 | ** creates a new string object |
@@ -153,11 +167,11 @@ static TString *createstrobj (lua_State *L, size_t totalsize, int tag, | |||
153 | 167 | ||
154 | 168 | ||
155 | TString *luaS_createlngstrobj (lua_State *L, size_t l) { | 169 | TString *luaS_createlngstrobj (lua_State *L, size_t l) { |
156 | size_t totalsize = sizestrlng(l); | 170 | size_t totalsize = luaS_sizelngstr(l, LSTRREG); |
157 | TString *ts = createstrobj(L, totalsize, LUA_VLNGSTR, G(L)->seed); | 171 | TString *ts = createstrobj(L, totalsize, LUA_VLNGSTR, G(L)->seed); |
158 | ts->u.lnglen = l; | 172 | ts->u.lnglen = l; |
159 | ts->shrlen = -1; /* signals that it is a long string */ | 173 | ts->shrlen = LSTRREG; /* signals that it is a regular long string */ |
160 | ts->contents = cast_charp(ts) + sizeof(TString); | 174 | ts->contents = cast_charp(ts) + offsetof(TString, falloc); |
161 | ts->contents[l] = '\0'; /* ending 0 */ | 175 | ts->contents[l] = '\0'; /* ending 0 */ |
162 | return ts; | 176 | return ts; |
163 | } | 177 | } |
@@ -275,3 +289,61 @@ Udata *luaS_newudata (lua_State *L, size_t s, int nuvalue) { | |||
275 | return u; | 289 | return u; |
276 | } | 290 | } |
277 | 291 | ||
292 | |||
293 | struct NewExt { | ||
294 | int kind; | ||
295 | const char *s; | ||
296 | size_t len; | ||
297 | TString *ts; /* output */ | ||
298 | }; | ||
299 | |||
300 | |||
301 | static void f_newext (lua_State *L, void *ud) { | ||
302 | struct NewExt *ne = cast(struct NewExt *, ud); | ||
303 | size_t size = luaS_sizelngstr(0, ne->kind); | ||
304 | ne->ts = createstrobj(L, size, LUA_VLNGSTR, G(L)->seed); | ||
305 | } | ||
306 | |||
307 | |||
308 | static void f_pintern (lua_State *L, void *ud) { | ||
309 | struct NewExt *ne = cast(struct NewExt *, ud); | ||
310 | ne->ts = internshrstr(L, ne->s, ne->len); | ||
311 | } | ||
312 | |||
313 | |||
314 | TString *luaS_newextlstr (lua_State *L, | ||
315 | const char *s, size_t len, lua_Alloc falloc, void *ud) { | ||
316 | struct NewExt ne; | ||
317 | if (len <= LUAI_MAXSHORTLEN) { /* short string? */ | ||
318 | ne.s = s; ne.len = len; | ||
319 | if (!falloc) | ||
320 | f_pintern(L, &ne); /* just internalize string */ | ||
321 | else { | ||
322 | int status = luaD_rawrunprotected(L, f_pintern, &ne); | ||
323 | (*falloc)(ud, cast_voidp(s), len + 1, 0); /* free external string */ | ||
324 | if (status != LUA_OK) /* memory error? */ | ||
325 | luaM_error(L); /* re-raise memory error */ | ||
326 | } | ||
327 | return ne.ts; | ||
328 | } | ||
329 | /* "normal" case: long strings */ | ||
330 | if (!falloc) { | ||
331 | ne.kind = LSTRFIX; | ||
332 | f_newext(L, &ne); /* just create header */ | ||
333 | } | ||
334 | else { | ||
335 | ne.kind = LSTRMEM; | ||
336 | if (luaD_rawrunprotected(L, f_newext, &ne) != LUA_OK) { /* mem. error? */ | ||
337 | (*falloc)(ud, cast_voidp(s), len + 1, 0); /* free external string */ | ||
338 | luaM_error(L); /* re-raise memory error */ | ||
339 | } | ||
340 | ne.ts->falloc = falloc; | ||
341 | ne.ts->ud = ud; | ||
342 | } | ||
343 | ne.ts->shrlen = ne.kind; | ||
344 | ne.ts->u.lnglen = len; | ||
345 | ne.ts->contents = cast_charp(s); | ||
346 | return ne.ts; | ||
347 | } | ||
348 | |||
349 | |||
@@ -26,12 +26,6 @@ | |||
26 | #define sizestrshr(l) \ | 26 | #define sizestrshr(l) \ |
27 | (offsetof(TString, contents) + ((l) + 1) * sizeof(char)) | 27 | (offsetof(TString, contents) + ((l) + 1) * sizeof(char)) |
28 | 28 | ||
29 | /* | ||
30 | ** Size of a long TString: Size of the header plus space for the string | ||
31 | ** itself (including final '\0'). | ||
32 | */ | ||
33 | #define sizestrlng(l) (sizeof(TString) + ((l) + 1) * sizeof(char)) | ||
34 | |||
35 | 29 | ||
36 | #define luaS_newliteral(L, s) (luaS_newlstr(L, "" s, \ | 30 | #define luaS_newliteral(L, s) (luaS_newlstr(L, "" s, \ |
37 | (sizeof(s)/sizeof(char))-1)) | 31 | (sizeof(s)/sizeof(char))-1)) |
@@ -60,6 +54,8 @@ LUAI_FUNC Udata *luaS_newudata (lua_State *L, size_t s, int nuvalue); | |||
60 | LUAI_FUNC TString *luaS_newlstr (lua_State *L, const char *str, size_t l); | 54 | LUAI_FUNC TString *luaS_newlstr (lua_State *L, const char *str, size_t l); |
61 | LUAI_FUNC TString *luaS_new (lua_State *L, const char *str); | 55 | LUAI_FUNC TString *luaS_new (lua_State *L, const char *str); |
62 | LUAI_FUNC TString *luaS_createlngstrobj (lua_State *L, size_t l); | 56 | LUAI_FUNC TString *luaS_createlngstrobj (lua_State *L, size_t l); |
63 | 57 | LUAI_FUNC TString *luaS_newextlstr (lua_State *L, | |
58 | const char *s, size_t len, lua_Alloc falloc, void *ud); | ||
59 | LUAI_FUNC size_t luaS_sizelngstr (size_t len, int kind); | ||
64 | 60 | ||
65 | #endif | 61 | #endif |
@@ -1277,6 +1277,37 @@ static int checkpanic (lua_State *L) { | |||
1277 | } | 1277 | } |
1278 | 1278 | ||
1279 | 1279 | ||
1280 | static int externKstr (lua_State *L) { | ||
1281 | size_t len; | ||
1282 | const char *s = luaL_checklstring(L, 1, &len); | ||
1283 | lua_pushextlstring(L, s, len, NULL, NULL); | ||
1284 | return 1; | ||
1285 | } | ||
1286 | |||
1287 | |||
1288 | /* | ||
1289 | ** Create a buffer with the content of a given string and then | ||
1290 | ** create an external string using that buffer. Use the allocation | ||
1291 | ** function from Lua to create and free the buffer. | ||
1292 | */ | ||
1293 | static int externstr (lua_State *L) { | ||
1294 | size_t len; | ||
1295 | const char *s = luaL_checklstring(L, 1, &len); | ||
1296 | void *ud; | ||
1297 | lua_Alloc allocf = lua_getallocf(L, &ud); /* get allocation function */ | ||
1298 | /* create the buffer */ | ||
1299 | char *buff = cast_charp((*allocf)(ud, NULL, 0, len + 1)); | ||
1300 | if (buff == NULL) { /* memory error? */ | ||
1301 | lua_pushliteral(L, "not enough memory"); | ||
1302 | lua_error(L); /* raise a memory error */ | ||
1303 | } | ||
1304 | /* copy string content to buffer, including ending 0 */ | ||
1305 | memcpy(buff, s, (len + 1) * sizeof(char)); | ||
1306 | /* create external string */ | ||
1307 | lua_pushextlstring(L, buff, len, allocf, ud); | ||
1308 | return 1; | ||
1309 | } | ||
1310 | |||
1280 | 1311 | ||
1281 | /* | 1312 | /* |
1282 | ** {==================================================================== | 1313 | ** {==================================================================== |
@@ -1949,6 +1980,8 @@ static const struct luaL_Reg tests_funcs[] = { | |||
1949 | {"udataval", udataval}, | 1980 | {"udataval", udataval}, |
1950 | {"unref", unref}, | 1981 | {"unref", unref}, |
1951 | {"upvalue", upvalue}, | 1982 | {"upvalue", upvalue}, |
1983 | {"externKstr", externKstr}, | ||
1984 | {"externstr", externstr}, | ||
1952 | {NULL, NULL} | 1985 | {NULL, NULL} |
1953 | }; | 1986 | }; |
1954 | 1987 | ||
@@ -244,6 +244,8 @@ LUA_API void (lua_pushnil) (lua_State *L); | |||
244 | LUA_API void (lua_pushnumber) (lua_State *L, lua_Number n); | 244 | LUA_API void (lua_pushnumber) (lua_State *L, lua_Number n); |
245 | LUA_API void (lua_pushinteger) (lua_State *L, lua_Integer n); | 245 | LUA_API void (lua_pushinteger) (lua_State *L, lua_Integer n); |
246 | LUA_API const char *(lua_pushlstring) (lua_State *L, const char *s, size_t len); | 246 | LUA_API const char *(lua_pushlstring) (lua_State *L, const char *s, size_t len); |
247 | LUA_API const char *(lua_pushextlstring) (lua_State *L, | ||
248 | const char *s, size_t len, lua_Alloc falloc, void *ud); | ||
247 | LUA_API const char *(lua_pushstring) (lua_State *L, const char *s); | 249 | LUA_API const char *(lua_pushstring) (lua_State *L, const char *s); |
248 | LUA_API const char *(lua_pushvfstring) (lua_State *L, const char *fmt, | 250 | LUA_API const char *(lua_pushvfstring) (lua_State *L, const char *fmt, |
249 | va_list argp); | 251 | va_list argp); |
diff --git a/manual/manual.of b/manual/manual.of index 3eab69fa..9d6a7fd9 100644 --- a/manual/manual.of +++ b/manual/manual.of | |||
@@ -3908,6 +3908,40 @@ This function is equivalent to @Lid{lua_pushcclosure} with no upvalues. | |||
3908 | 3908 | ||
3909 | } | 3909 | } |
3910 | 3910 | ||
3911 | @APIEntry{const char *(lua_pushextlstring) (lua_State *L, | ||
3912 | const char *s, size_t len, lua_Alloc falloc, void *ud);| | ||
3913 | @apii{0,1,m} | ||
3914 | |||
3915 | Creates an @emphx{external string}, | ||
3916 | that is, a string that uses memory not managed by Lua. | ||
3917 | The pointer @id{s} points to the exernal buffer | ||
3918 | holding the string content, | ||
3919 | and @id{len} is the length of the string. | ||
3920 | The string should have a zero at its end, | ||
3921 | that is, the condition @T{s[len] == '\0'} should hold. | ||
3922 | |||
3923 | If @id{falloc} is different from @id{NULL}, | ||
3924 | that function will be called by Lua | ||
3925 | when the external buffer is no longer needed. | ||
3926 | The contents of the buffer should not change before this call. | ||
3927 | The function will be called with the given @id{ud}, | ||
3928 | the string @id{s} as the block, | ||
3929 | the length plus one (to account for the ending zero) as the old size, | ||
3930 | and 0 as the new size. | ||
3931 | |||
3932 | Lua always @x{internalizes} strings with lengths up to 40 characters. | ||
3933 | So, for strings in that range, | ||
3934 | this function will immediately internalize the string | ||
3935 | and call @id{falloc} to free the buffer. | ||
3936 | |||
3937 | Even when using an external buffer, | ||
3938 | Lua still has to allocate a header for the string. | ||
3939 | In case of a memory-allocation error, | ||
3940 | Lua will call @id{falloc} before raising the error. | ||
3941 | |||
3942 | } | ||
3943 | |||
3944 | |||
3911 | @APIEntry{const char *lua_pushfstring (lua_State *L, const char *fmt, ...);| | 3945 | @APIEntry{const char *lua_pushfstring (lua_State *L, const char *fmt, ...);| |
3912 | @apii{0,1,v} | 3946 | @apii{0,1,v} |
3913 | 3947 | ||
diff --git a/testes/strings.lua b/testes/strings.lua index 90983edd..c124b369 100644 --- a/testes/strings.lua +++ b/testes/strings.lua | |||
@@ -157,6 +157,12 @@ else -- compatible coercion | |||
157 | assert(tostring(-1203 + 0.0) == "-1203") | 157 | assert(tostring(-1203 + 0.0) == "-1203") |
158 | end | 158 | end |
159 | 159 | ||
160 | |||
161 | local function topointer (s) | ||
162 | return string.format("%p", s) | ||
163 | end | ||
164 | |||
165 | |||
160 | do -- tests for '%p' format | 166 | do -- tests for '%p' format |
161 | -- not much to test, as C does not specify what '%p' does. | 167 | -- not much to test, as C does not specify what '%p' does. |
162 | -- ("The value of the pointer is converted to a sequence of printing | 168 | -- ("The value of the pointer is converted to a sequence of printing |
@@ -180,18 +186,18 @@ do -- tests for '%p' format | |||
180 | 186 | ||
181 | do | 187 | do |
182 | local t1 = {}; local t2 = {} | 188 | local t1 = {}; local t2 = {} |
183 | assert(string.format("%p", t1) ~= string.format("%p", t2)) | 189 | assert(topointer(t1) ~= topointer(t2)) |
184 | end | 190 | end |
185 | 191 | ||
186 | do -- short strings are internalized | 192 | do -- short strings are internalized |
187 | local s1 = string.rep("a", 10) | 193 | local s1 = string.rep("a", 10) |
188 | local s2 = string.rep("aa", 5) | 194 | local s2 = string.rep("aa", 5) |
189 | assert(string.format("%p", s1) == string.format("%p", s2)) | 195 | assert(topointer(s1) == topointer(s2)) |
190 | end | 196 | end |
191 | 197 | ||
192 | do -- long strings aren't internalized | 198 | do -- long strings aren't internalized |
193 | local s1 = string.rep("a", 300); local s2 = string.rep("a", 300) | 199 | local s1 = string.rep("a", 300); local s2 = string.rep("a", 300) |
194 | assert(string.format("%p", s1) ~= string.format("%p", s2)) | 200 | assert(topointer(s1) ~= topointer(s2)) |
195 | end | 201 | end |
196 | end | 202 | end |
197 | 203 | ||
@@ -521,6 +527,20 @@ else | |||
521 | testpfs("P", str, {}) | 527 | testpfs("P", str, {}) |
522 | end | 528 | end |
523 | 529 | ||
530 | if T == nil then | ||
531 | (Message or print)('\n >>> testC not active: skipping external strings tests <<<\n') | ||
532 | else | ||
533 | print("testing external strings") | ||
534 | local x = T.externKstr("hello") -- external fixed short string | ||
535 | assert(x == "hello") | ||
536 | local x = T.externstr("hello") -- external allocated short string | ||
537 | assert(x == "hello") | ||
538 | x = string.rep("a", 100) -- long string | ||
539 | local y = T.externKstr(x) -- external fixed long string | ||
540 | assert(y == x) | ||
541 | local z = T.externstr(x) -- external allocated long string | ||
542 | assert(z == y) | ||
543 | end | ||
524 | 544 | ||
525 | print('OK') | 545 | print('OK') |
526 | 546 | ||