From 024f9064f1b43758eb36aba52547edc0312bf4ba Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Thu, 9 Nov 2023 17:05:42 -0300 Subject: External strings Strings can use external buffers to store their contents. --- lapi.c | 14 ++++++++++ lgc.c | 4 ++- lobject.h | 8 ++++++ lstring.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++--- lstring.h | 10 +++---- ltests.c | 33 +++++++++++++++++++++++ lua.h | 2 ++ manual/manual.of | 34 ++++++++++++++++++++++++ testes/strings.lua | 26 +++++++++++++++--- 9 files changed, 195 insertions(+), 14 deletions(-) diff --git a/lapi.c b/lapi.c index 34b335fd..2aaa6505 100644 --- a/lapi.c +++ b/lapi.c @@ -535,6 +535,20 @@ LUA_API const char *lua_pushlstring (lua_State *L, const char *s, size_t len) { } +LUA_API const char *lua_pushextlstring (lua_State *L, + const char *s, size_t len, lua_Alloc falloc, void *ud) { + TString *ts; + lua_lock(L); + api_check(L, s[len] == '\0', "string not ending with zero"); + ts = luaS_newextlstr (L, s, len, falloc, ud); + setsvalue2s(L, L->top.p, ts); + api_incr_top(L); + luaC_checkGC(L); + lua_unlock(L); + return getstr(ts); +} + + LUA_API const char *lua_pushstring (lua_State *L, const char *s) { lua_lock(L); if (s == NULL) diff --git a/lgc.c b/lgc.c index e3fcaa3e..3884aad0 100644 --- a/lgc.c +++ b/lgc.c @@ -813,7 +813,9 @@ static void freeobj (lua_State *L, GCObject *o) { } case LUA_VLNGSTR: { TString *ts = gco2ts(o); - luaM_freemem(L, ts, sizestrlng(ts->u.lnglen)); + if (ts->shrlen == LSTRMEM) /* must free external string? */ + (*ts->falloc)(ts->ud, ts->contents, ts->u.lnglen + 1, 0); + luaM_freemem(L, ts, luaS_sizelngstr(ts->u.lnglen, ts->shrlen)); break; } default: lua_assert(0); diff --git a/lobject.h b/lobject.h index f76d26a6..8688a842 100644 --- a/lobject.h +++ b/lobject.h @@ -382,6 +382,12 @@ typedef struct GCObject { #define setsvalue2n setsvalue +/* Kinds of long strings (stored in 'shrlen') */ +#define LSTRREG -1 /* regular long string */ +#define LSTRFIX -2 /* fixed external long string */ +#define LSTRMEM -3 /* external long string with deallocation */ + + /* ** Header for a string value. */ @@ -395,6 +401,8 @@ typedef struct TString { struct TString *hnext; /* linked list for hash table */ } u; char *contents; /* pointer to content in long strings */ + lua_Alloc falloc; /* deallocation function for external strings */ + void *ud; /* user data for external strings */ } TString; diff --git a/lstring.c b/lstring.c index c4b3c7ba..8701b705 100644 --- a/lstring.c +++ b/lstring.c @@ -136,6 +136,20 @@ void luaS_init (lua_State *L) { } +size_t luaS_sizelngstr (size_t len, int kind) { + switch (kind) { + case LSTRREG: /* regular long string */ + /* don't need 'falloc'/'ud', but need space for content */ + return offsetof(TString, falloc) + (len + 1) * sizeof(char); + case LSTRFIX: /* fixed external long string */ + /* don't need 'falloc'/'ud' */ + return offsetof(TString, falloc); + default: /* external long string with deallocation */ + lua_assert(kind == LSTRMEM); + return sizeof(TString); + } +} + /* ** creates a new string object @@ -153,11 +167,11 @@ static TString *createstrobj (lua_State *L, size_t totalsize, int tag, TString *luaS_createlngstrobj (lua_State *L, size_t l) { - size_t totalsize = sizestrlng(l); + size_t totalsize = luaS_sizelngstr(l, LSTRREG); TString *ts = createstrobj(L, totalsize, LUA_VLNGSTR, G(L)->seed); ts->u.lnglen = l; - ts->shrlen = -1; /* signals that it is a long string */ - ts->contents = cast_charp(ts) + sizeof(TString); + ts->shrlen = LSTRREG; /* signals that it is a regular long string */ + ts->contents = cast_charp(ts) + offsetof(TString, falloc); ts->contents[l] = '\0'; /* ending 0 */ return ts; } @@ -275,3 +289,61 @@ Udata *luaS_newudata (lua_State *L, size_t s, int nuvalue) { return u; } + +struct NewExt { + int kind; + const char *s; + size_t len; + TString *ts; /* output */ +}; + + +static void f_newext (lua_State *L, void *ud) { + struct NewExt *ne = cast(struct NewExt *, ud); + size_t size = luaS_sizelngstr(0, ne->kind); + ne->ts = createstrobj(L, size, LUA_VLNGSTR, G(L)->seed); +} + + +static void f_pintern (lua_State *L, void *ud) { + struct NewExt *ne = cast(struct NewExt *, ud); + ne->ts = internshrstr(L, ne->s, ne->len); +} + + +TString *luaS_newextlstr (lua_State *L, + const char *s, size_t len, lua_Alloc falloc, void *ud) { + struct NewExt ne; + if (len <= LUAI_MAXSHORTLEN) { /* short string? */ + ne.s = s; ne.len = len; + if (!falloc) + f_pintern(L, &ne); /* just internalize string */ + else { + int status = luaD_rawrunprotected(L, f_pintern, &ne); + (*falloc)(ud, cast_voidp(s), len + 1, 0); /* free external string */ + if (status != LUA_OK) /* memory error? */ + luaM_error(L); /* re-raise memory error */ + } + return ne.ts; + } + /* "normal" case: long strings */ + if (!falloc) { + ne.kind = LSTRFIX; + f_newext(L, &ne); /* just create header */ + } + else { + ne.kind = LSTRMEM; + if (luaD_rawrunprotected(L, f_newext, &ne) != LUA_OK) { /* mem. error? */ + (*falloc)(ud, cast_voidp(s), len + 1, 0); /* free external string */ + luaM_error(L); /* re-raise memory error */ + } + ne.ts->falloc = falloc; + ne.ts->ud = ud; + } + ne.ts->shrlen = ne.kind; + ne.ts->u.lnglen = len; + ne.ts->contents = cast_charp(s); + return ne.ts; +} + + diff --git a/lstring.h b/lstring.h index 069e64b7..e321bd43 100644 --- a/lstring.h +++ b/lstring.h @@ -26,12 +26,6 @@ #define sizestrshr(l) \ (offsetof(TString, contents) + ((l) + 1) * sizeof(char)) -/* -** Size of a long TString: Size of the header plus space for the string -** itself (including final '\0'). -*/ -#define sizestrlng(l) (sizeof(TString) + ((l) + 1) * sizeof(char)) - #define luaS_newliteral(L, s) (luaS_newlstr(L, "" s, \ (sizeof(s)/sizeof(char))-1)) @@ -60,6 +54,8 @@ LUAI_FUNC Udata *luaS_newudata (lua_State *L, size_t s, int nuvalue); LUAI_FUNC TString *luaS_newlstr (lua_State *L, const char *str, size_t l); LUAI_FUNC TString *luaS_new (lua_State *L, const char *str); LUAI_FUNC TString *luaS_createlngstrobj (lua_State *L, size_t l); - +LUAI_FUNC TString *luaS_newextlstr (lua_State *L, + const char *s, size_t len, lua_Alloc falloc, void *ud); +LUAI_FUNC size_t luaS_sizelngstr (size_t len, int kind); #endif diff --git a/ltests.c b/ltests.c index 6f556dc9..94bd4e33 100644 --- a/ltests.c +++ b/ltests.c @@ -1277,6 +1277,37 @@ static int checkpanic (lua_State *L) { } +static int externKstr (lua_State *L) { + size_t len; + const char *s = luaL_checklstring(L, 1, &len); + lua_pushextlstring(L, s, len, NULL, NULL); + return 1; +} + + +/* +** Create a buffer with the content of a given string and then +** create an external string using that buffer. Use the allocation +** function from Lua to create and free the buffer. +*/ +static int externstr (lua_State *L) { + size_t len; + const char *s = luaL_checklstring(L, 1, &len); + void *ud; + lua_Alloc allocf = lua_getallocf(L, &ud); /* get allocation function */ + /* create the buffer */ + char *buff = cast_charp((*allocf)(ud, NULL, 0, len + 1)); + if (buff == NULL) { /* memory error? */ + lua_pushliteral(L, "not enough memory"); + lua_error(L); /* raise a memory error */ + } + /* copy string content to buffer, including ending 0 */ + memcpy(buff, s, (len + 1) * sizeof(char)); + /* create external string */ + lua_pushextlstring(L, buff, len, allocf, ud); + return 1; +} + /* ** {==================================================================== @@ -1949,6 +1980,8 @@ static const struct luaL_Reg tests_funcs[] = { {"udataval", udataval}, {"unref", unref}, {"upvalue", upvalue}, + {"externKstr", externKstr}, + {"externstr", externstr}, {NULL, NULL} }; diff --git a/lua.h b/lua.h index 699b7ca7..ca8d06fe 100644 --- a/lua.h +++ b/lua.h @@ -244,6 +244,8 @@ LUA_API void (lua_pushnil) (lua_State *L); LUA_API void (lua_pushnumber) (lua_State *L, lua_Number n); LUA_API void (lua_pushinteger) (lua_State *L, lua_Integer n); LUA_API const char *(lua_pushlstring) (lua_State *L, const char *s, size_t len); +LUA_API const char *(lua_pushextlstring) (lua_State *L, + const char *s, size_t len, lua_Alloc falloc, void *ud); LUA_API const char *(lua_pushstring) (lua_State *L, const char *s); LUA_API const char *(lua_pushvfstring) (lua_State *L, const char *fmt, va_list argp); diff --git a/manual/manual.of b/manual/manual.of index 3eab69fa..9d6a7fd9 100644 --- a/manual/manual.of +++ b/manual/manual.of @@ -3908,6 +3908,40 @@ This function is equivalent to @Lid{lua_pushcclosure} with no upvalues. } +@APIEntry{const char *(lua_pushextlstring) (lua_State *L, + const char *s, size_t len, lua_Alloc falloc, void *ud);| +@apii{0,1,m} + +Creates an @emphx{external string}, +that is, a string that uses memory not managed by Lua. +The pointer @id{s} points to the exernal buffer +holding the string content, +and @id{len} is the length of the string. +The string should have a zero at its end, +that is, the condition @T{s[len] == '\0'} should hold. + +If @id{falloc} is different from @id{NULL}, +that function will be called by Lua +when the external buffer is no longer needed. +The contents of the buffer should not change before this call. +The function will be called with the given @id{ud}, +the string @id{s} as the block, +the length plus one (to account for the ending zero) as the old size, +and 0 as the new size. + +Lua always @x{internalizes} strings with lengths up to 40 characters. +So, for strings in that range, +this function will immediately internalize the string +and call @id{falloc} to free the buffer. + +Even when using an external buffer, +Lua still has to allocate a header for the string. +In case of a memory-allocation error, +Lua will call @id{falloc} before raising the error. + +} + + @APIEntry{const char *lua_pushfstring (lua_State *L, const char *fmt, ...);| @apii{0,1,v} diff --git a/testes/strings.lua b/testes/strings.lua index 90983edd..c124b369 100644 --- a/testes/strings.lua +++ b/testes/strings.lua @@ -157,6 +157,12 @@ else -- compatible coercion assert(tostring(-1203 + 0.0) == "-1203") end + +local function topointer (s) + return string.format("%p", s) +end + + do -- tests for '%p' format -- not much to test, as C does not specify what '%p' does. -- ("The value of the pointer is converted to a sequence of printing @@ -180,18 +186,18 @@ do -- tests for '%p' format do local t1 = {}; local t2 = {} - assert(string.format("%p", t1) ~= string.format("%p", t2)) + assert(topointer(t1) ~= topointer(t2)) end do -- short strings are internalized local s1 = string.rep("a", 10) local s2 = string.rep("aa", 5) - assert(string.format("%p", s1) == string.format("%p", s2)) + assert(topointer(s1) == topointer(s2)) end do -- long strings aren't internalized local s1 = string.rep("a", 300); local s2 = string.rep("a", 300) - assert(string.format("%p", s1) ~= string.format("%p", s2)) + assert(topointer(s1) ~= topointer(s2)) end end @@ -521,6 +527,20 @@ else testpfs("P", str, {}) end +if T == nil then + (Message or print)('\n >>> testC not active: skipping external strings tests <<<\n') +else + print("testing external strings") + local x = T.externKstr("hello") -- external fixed short string + assert(x == "hello") + local x = T.externstr("hello") -- external allocated short string + assert(x == "hello") + x = string.rep("a", 100) -- long string + local y = T.externKstr(x) -- external fixed long string + assert(y == x) + local z = T.externstr(x) -- external allocated long string + assert(z == y) +end print('OK') -- cgit v1.2.3-55-g6feb