diff options
-rw-r--r-- | doc/ext_buffer.html | 70 | ||||
-rw-r--r-- | src/lib_buffer.c | 60 | ||||
-rw-r--r-- | src/lj_buf.h | 16 | ||||
-rw-r--r-- | src/lj_errmsg.h | 2 | ||||
-rw-r--r-- | src/lj_gc.c | 5 | ||||
-rw-r--r-- | src/lj_obj.h | 2 | ||||
-rw-r--r-- | src/lj_serialize.c | 77 | ||||
-rw-r--r-- | src/lj_serialize.h | 1 | ||||
-rw-r--r-- | src/lj_tab.c | 23 | ||||
-rw-r--r-- | src/lj_tab.h | 23 |
10 files changed, 214 insertions, 65 deletions
diff --git a/doc/ext_buffer.html b/doc/ext_buffer.html index 94af757d..2443fc90 100644 --- a/doc/ext_buffer.html +++ b/doc/ext_buffer.html | |||
@@ -175,14 +175,19 @@ object itself as a convenience. This allows method chaining, e.g.: | |||
175 | 175 | ||
176 | <h2 id="create">Buffer Creation and Management</h2> | 176 | <h2 id="create">Buffer Creation and Management</h2> |
177 | 177 | ||
178 | <h3 id="buffer_new"><tt>local buf = buffer.new([size])</tt></h3> | 178 | <h3 id="buffer_new"><tt>local buf = buffer.new([size [,options]])<br> |
179 | local buf = buffer.new([options])</tt></h3> | ||
179 | <p> | 180 | <p> |
180 | Creates a new buffer object. | 181 | Creates a new buffer object. |
181 | </p> | 182 | </p> |
182 | <p> | 183 | <p> |
183 | The optional <tt>size</tt> argument ensures a minimum initial buffer | 184 | The optional <tt>size</tt> argument ensures a minimum initial buffer |
184 | size. This is strictly an optimization for cases where the required | 185 | size. This is strictly an optimization when the required buffer size is |
185 | buffer size is known beforehand. | 186 | known beforehand. The buffer space will grow as needed, in any case. |
187 | </p> | ||
188 | <p> | ||
189 | The optional table <tt>options</tt> sets various | ||
190 | <a href="#serialize_options">serialization options</a>. | ||
186 | </p> | 191 | </p> |
187 | 192 | ||
188 | <h3 id="buffer_reset"><tt>buf = buf:reset()</tt></h3> | 193 | <h3 id="buffer_reset"><tt>buf = buf:reset()</tt></h3> |
@@ -205,7 +210,7 @@ immediately. | |||
205 | 210 | ||
206 | <h2 id="write">Buffer Writers</h2> | 211 | <h2 id="write">Buffer Writers</h2> |
207 | 212 | ||
208 | <h3 id="buffer_put"><tt>buf = buf:put([str|num|obj] [, ...])</tt></h3> | 213 | <h3 id="buffer_put"><tt>buf = buf:put([str|num|obj] [,…])</tt></h3> |
209 | <p> | 214 | <p> |
210 | Appends a string <tt>str</tt>, a number <tt>num</tt> or any object | 215 | Appends a string <tt>str</tt>, a number <tt>num</tt> or any object |
211 | <tt>obj</tt> with a <tt>__tostring</tt> metamethod to the buffer. | 216 | <tt>obj</tt> with a <tt>__tostring</tt> metamethod to the buffer. |
@@ -217,7 +222,7 @@ internally. But it still involves a copy. Better combine the buffer | |||
217 | writes to use a single buffer. | 222 | writes to use a single buffer. |
218 | </p> | 223 | </p> |
219 | 224 | ||
220 | <h3 id="buffer_putf"><tt>buf = buf:putf(format, ...)</tt></h3> | 225 | <h3 id="buffer_putf"><tt>buf = buf:putf(format, …)</tt></h3> |
221 | <p> | 226 | <p> |
222 | Appends the formatted arguments to the buffer. The <tt>format</tt> | 227 | Appends the formatted arguments to the buffer. The <tt>format</tt> |
223 | string supports the same options as <tt>string.format()</tt>. | 228 | string supports the same options as <tt>string.format()</tt>. |
@@ -298,7 +303,7 @@ method, if nothing is added to the buffer (e.g. on error). | |||
298 | Returns the current length of the buffer data in bytes. | 303 | Returns the current length of the buffer data in bytes. |
299 | </p> | 304 | </p> |
300 | 305 | ||
301 | <h3 id="buffer_concat"><tt>res = str|num|buf .. str|num|buf [...]</tt></h3> | 306 | <h3 id="buffer_concat"><tt>res = str|num|buf .. str|num|buf […]</tt></h3> |
302 | <p> | 307 | <p> |
303 | The Lua concatenation operator <tt>..</tt> also accepts buffers, just | 308 | The Lua concatenation operator <tt>..</tt> also accepts buffers, just |
304 | like strings or numbers. It always returns a string and not a buffer. | 309 | like strings or numbers. It always returns a string and not a buffer. |
@@ -319,7 +324,7 @@ Skips (consumes) <tt>len</tt> bytes from the buffer up to the current | |||
319 | length of the buffer data. | 324 | length of the buffer data. |
320 | </p> | 325 | </p> |
321 | 326 | ||
322 | <h3 id="buffer_get"><tt>str, ... = buf:get([len|nil] [,...])</tt></h3> | 327 | <h3 id="buffer_get"><tt>str, … = buf:get([len|nil] [,…])</tt></h3> |
323 | <p> | 328 | <p> |
324 | Consumes the buffer data and returns one or more strings. If called | 329 | Consumes the buffer data and returns one or more strings. If called |
325 | without arguments, the whole buffer data is consumed. If called with a | 330 | without arguments, the whole buffer data is consumed. If called with a |
@@ -444,6 +449,56 @@ data after decoding a single top-level object. The buffer method leaves | |||
444 | any left-over data in the buffer. | 449 | any left-over data in the buffer. |
445 | </p> | 450 | </p> |
446 | 451 | ||
452 | <h3 id="serialize_options">Serialization Options</h3> | ||
453 | <p> | ||
454 | The <tt>options</tt> table passed to <tt>buffer.new()</tt> may contain | ||
455 | the following members (all optional): | ||
456 | </p> | ||
457 | <ul> | ||
458 | <li> | ||
459 | <tt>dict</tt> is a Lua table holding a <b>dictionary of strings</b> that | ||
460 | commonly occur as table keys of objects you are serializing. These keys | ||
461 | are compactly encoded as indexes during serialization. A well chosen | ||
462 | dictionary saves space and improves serialization performance. | ||
463 | </li> | ||
464 | </ul> | ||
465 | <p> | ||
466 | <tt>dict</tt> needs to be an array of strings, starting at index 1 and | ||
467 | without holes (no <tt>nil</tt> inbetween). The table is anchored in the | ||
468 | buffer object and internally modified into a two-way index (don't do | ||
469 | this yourself, just pass a plain array). The table must not be modified | ||
470 | after it has been passed to <tt>buffer.new()</tt>. | ||
471 | </p> | ||
472 | <p> | ||
473 | The <tt>dict</tt> tables used by the encoder and decoder must be the | ||
474 | same. Put the most common entries at the front. Extend at the end to | ||
475 | ensure backwards-compatibility — older encodings can then still be | ||
476 | read. You may also set some indexes to <tt>false</tt> to explicitly drop | ||
477 | backwards-compatibility. Old encodings that use these indexes will throw | ||
478 | an error when decoded. | ||
479 | </p> | ||
480 | <p> | ||
481 | Note: parsing and preparation of the options table is somewhat | ||
482 | expensive. Create a buffer object only once and recycle it for multiple | ||
483 | uses. Avoid mixing encoder and decoder buffers, since the | ||
484 | <tt>buf:set()</tt> method frees the already allocated buffer space: | ||
485 | </p> | ||
486 | <pre class="code"> | ||
487 | local options = { | ||
488 | dict = { "commonly", "used", "string", "keys" }, | ||
489 | } | ||
490 | local buf_enc = buffer.new(options) | ||
491 | local buf_dec = buffer.new(options) | ||
492 | |||
493 | local function encode(obj) | ||
494 | return buf_enc:reset():encode(obj):get() | ||
495 | end | ||
496 | |||
497 | local function decode(str) | ||
498 | return buf_dec:set(str):decode() | ||
499 | end | ||
500 | </pre> | ||
501 | |||
447 | <h3 id="serialize_stream">Streaming Serialization</h3> | 502 | <h3 id="serialize_stream">Streaming Serialization</h3> |
448 | <p> | 503 | <p> |
449 | In some contexts, it's desirable to do piecewise serialization of large | 504 | In some contexts, it's desirable to do piecewise serialization of large |
@@ -536,6 +591,7 @@ uint64 → 0x11 uint.L // FFI uint64_t | |||
536 | complex → 0x12 re.L im.L // FFI complex | 591 | complex → 0x12 re.L im.L // FFI complex |
537 | 592 | ||
538 | string → (0x20+len).U len*char.B | 593 | string → (0x20+len).U len*char.B |
594 | | 0x0f (index-1).U // Dict entry | ||
539 | 595 | ||
540 | .B = 8 bit | 596 | .B = 8 bit |
541 | .I = 32 bit little-endian | 597 | .I = 32 bit little-endian |
diff --git a/src/lib_buffer.c b/src/lib_buffer.c index 78c4eeb9..f13320c4 100644 --- a/src/lib_buffer.c +++ b/src/lib_buffer.c | |||
@@ -29,9 +29,7 @@ | |||
29 | #include "lj_serialize.h" | 29 | #include "lj_serialize.h" |
30 | #include "lj_lib.h" | 30 | #include "lj_lib.h" |
31 | 31 | ||
32 | /* ------------------------------------------------------------------------ */ | 32 | /* -- Helper functions ---------------------------------------------------- */ |
33 | |||
34 | #define LJLIB_MODULE_buffer_method | ||
35 | 33 | ||
36 | /* Check that the first argument is a string buffer. */ | 34 | /* Check that the first argument is a string buffer. */ |
37 | static SBufExt *buffer_tobuf(lua_State *L) | 35 | static SBufExt *buffer_tobuf(lua_State *L) |
@@ -49,11 +47,16 @@ static LJ_AINLINE SBufExt *buffer_tobufw(lua_State *L) | |||
49 | return sbx; | 47 | return sbx; |
50 | } | 48 | } |
51 | 49 | ||
50 | #define buffer_toudata(sbx) ((GCudata *)(sbx)-1) | ||
51 | |||
52 | /* -- Buffer methods ------------------------------------------------------ */ | ||
53 | |||
54 | #define LJLIB_MODULE_buffer_method | ||
55 | |||
52 | LJLIB_CF(buffer_method_free) | 56 | LJLIB_CF(buffer_method_free) |
53 | { | 57 | { |
54 | SBufExt *sbx = buffer_tobuf(L); | 58 | SBufExt *sbx = buffer_tobuf(L); |
55 | lj_bufx_free(G(L), sbx); | 59 | lj_bufx_free(L, sbx); |
56 | lj_bufx_init(L, sbx); | ||
57 | L->top = L->base+1; /* Chain buffer object. */ | 60 | L->top = L->base+1; /* Chain buffer object. */ |
58 | return 1; | 61 | return 1; |
59 | } | 62 | } |
@@ -83,6 +86,7 @@ LJLIB_CF(buffer_method_skip) | |||
83 | LJLIB_CF(buffer_method_set) | 86 | LJLIB_CF(buffer_method_set) |
84 | { | 87 | { |
85 | SBufExt *sbx = buffer_tobuf(L); | 88 | SBufExt *sbx = buffer_tobuf(L); |
89 | GCobj *ref; | ||
86 | const char *p; | 90 | const char *p; |
87 | MSize len; | 91 | MSize len; |
88 | #if LJ_HASFFI | 92 | #if LJ_HASFFI |
@@ -98,9 +102,11 @@ LJLIB_CF(buffer_method_set) | |||
98 | p = strdata(str); | 102 | p = strdata(str); |
99 | len = str->len; | 103 | len = str->len; |
100 | } | 104 | } |
101 | lj_bufx_free(G(L), sbx); | 105 | lj_bufx_free(L, sbx); |
102 | lj_bufx_init_cow(L, sbx, p, len); | 106 | lj_bufx_set_cow(L, sbx, p, len); |
103 | setgcref(sbx->cowref, gcV(L->base+1)); | 107 | ref = gcV(L->base+1); |
108 | setgcref(sbx->cowref, ref); | ||
109 | lj_gc_objbarrier(L, buffer_toudata(sbx), ref); | ||
104 | L->top = L->base+1; /* Chain buffer object. */ | 110 | L->top = L->base+1; /* Chain buffer object. */ |
105 | return 1; | 111 | return 1; |
106 | } | 112 | } |
@@ -249,8 +255,7 @@ LJLIB_CF(buffer_method_decode) | |||
249 | LJLIB_CF(buffer_method___gc) | 255 | LJLIB_CF(buffer_method___gc) |
250 | { | 256 | { |
251 | SBufExt *sbx = buffer_tobuf(L); | 257 | SBufExt *sbx = buffer_tobuf(L); |
252 | lj_bufx_free(G(L), sbx); | 258 | lj_bufx_free(L, sbx); |
253 | lj_bufx_init(L, sbx); | ||
254 | return 0; | 259 | return 0; |
255 | } | 260 | } |
256 | 261 | ||
@@ -272,7 +277,7 @@ LJLIB_CF(buffer_method___len) | |||
272 | LJLIB_PUSH("buffer") LJLIB_SET(__metatable) | 277 | LJLIB_PUSH("buffer") LJLIB_SET(__metatable) |
273 | LJLIB_PUSH(top-1) LJLIB_SET(__index) | 278 | LJLIB_PUSH(top-1) LJLIB_SET(__index) |
274 | 279 | ||
275 | /* ------------------------------------------------------------------------ */ | 280 | /* -- Buffer library functions -------------------------------------------- */ |
276 | 281 | ||
277 | #define LJLIB_MODULE_buffer | 282 | #define LJLIB_MODULE_buffer |
278 | 283 | ||
@@ -280,16 +285,33 @@ LJLIB_PUSH(top-2) LJLIB_SET(!) /* Set environment. */ | |||
280 | 285 | ||
281 | LJLIB_CF(buffer_new) | 286 | LJLIB_CF(buffer_new) |
282 | { | 287 | { |
283 | MSize sz = L->base == L->top ? 0u : | 288 | MSize sz = 0; |
284 | (MSize)lj_lib_checkintrange(L, 1, 0, LJ_MAX_BUF); | 289 | int targ = 1; |
285 | GCtab *env = tabref(curr_func(L)->c.env); | 290 | GCtab *env, *dict = NULL; |
286 | GCudata *ud = lj_udata_new(L, sizeof(SBufExt), env); | 291 | GCudata *ud; |
287 | SBufExt *sbx = (SBufExt *)uddata(ud); | 292 | SBufExt *sbx; |
293 | if (L->base < L->top && !tvistab(L->base)) { | ||
294 | targ = 2; | ||
295 | if (!tvisnil(L->base)) | ||
296 | sz = (MSize)lj_lib_checkintrange(L, 1, 0, LJ_MAX_BUF); | ||
297 | } | ||
298 | if (L->base+targ-1 < L->top) { | ||
299 | GCtab *options = lj_lib_checktab(L, targ); | ||
300 | cTValue *opt_dict = lj_tab_getstr(options, lj_str_newlit(L, "dict")); | ||
301 | if (opt_dict && tvistab(opt_dict)) { | ||
302 | dict = tabV(opt_dict); | ||
303 | lj_serialize_dict_prep(L, dict); | ||
304 | } | ||
305 | } | ||
306 | env = tabref(curr_func(L)->c.env); | ||
307 | ud = lj_udata_new(L, sizeof(SBufExt), env); | ||
288 | ud->udtype = UDTYPE_BUFFER; | 308 | ud->udtype = UDTYPE_BUFFER; |
289 | /* NOBARRIER: The GCudata is new (marked white). */ | 309 | /* NOBARRIER: The GCudata is new (marked white). */ |
290 | setgcref(ud->metatable, obj2gco(env)); | 310 | setgcref(ud->metatable, obj2gco(env)); |
291 | setudataV(L, L->top++, ud); | 311 | setudataV(L, L->top++, ud); |
312 | sbx = (SBufExt *)uddata(ud); | ||
292 | lj_bufx_init(L, sbx); | 313 | lj_bufx_init(L, sbx); |
314 | setgcref(sbx->dict, obj2gco(dict)); | ||
293 | if (sz > 0) lj_buf_need2((SBuf *)sbx, sz); | 315 | if (sz > 0) lj_buf_need2((SBuf *)sbx, sz); |
294 | return 1; | 316 | return 1; |
295 | } | 317 | } |
@@ -298,7 +320,8 @@ LJLIB_CF(buffer_encode) | |||
298 | { | 320 | { |
299 | cTValue *o = lj_lib_checkany(L, 1); | 321 | cTValue *o = lj_lib_checkany(L, 1); |
300 | SBufExt sbx; | 322 | SBufExt sbx; |
301 | lj_bufx_init_borrow(L, &sbx, &G(L)->tmpbuf); | 323 | memset(&sbx, 0, sizeof(SBufExt)); |
324 | lj_bufx_set_borrow(L, &sbx, &G(L)->tmpbuf); | ||
302 | lj_serialize_put(&sbx, o); | 325 | lj_serialize_put(&sbx, o); |
303 | setstrV(L, L->top++, lj_buf_str(L, (SBuf *)&sbx)); | 326 | setstrV(L, L->top++, lj_buf_str(L, (SBuf *)&sbx)); |
304 | lj_gc_check(L); | 327 | lj_gc_check(L); |
@@ -309,7 +332,8 @@ LJLIB_CF(buffer_decode) | |||
309 | { | 332 | { |
310 | GCstr *str = lj_lib_checkstrx(L, 1); | 333 | GCstr *str = lj_lib_checkstrx(L, 1); |
311 | SBufExt sbx; | 334 | SBufExt sbx; |
312 | lj_bufx_init_cow(L, &sbx, strdata(str), str->len); | 335 | memset(&sbx, 0, sizeof(SBufExt)); |
336 | lj_bufx_set_cow(L, &sbx, strdata(str), str->len); | ||
313 | /* No need to set sbx.cowref here. */ | 337 | /* No need to set sbx.cowref here. */ |
314 | setnilV(L->top++); | 338 | setnilV(L->top++); |
315 | lj_serialize_get(&sbx, L->top-1); | 339 | lj_serialize_get(&sbx, L->top-1); |
diff --git a/src/lj_buf.h b/src/lj_buf.h index 02f0ac61..b97d55ef 100644 --- a/src/lj_buf.h +++ b/src/lj_buf.h | |||
@@ -27,6 +27,7 @@ typedef struct SBufExt { | |||
27 | MRef bsb; /* Borrowed string buffer. */ | 27 | MRef bsb; /* Borrowed string buffer. */ |
28 | }; | 28 | }; |
29 | char *r; /* Read pointer. */ | 29 | char *r; /* Read pointer. */ |
30 | GCRef dict; /* Serialization string dictionary table. */ | ||
30 | int depth; /* Remaining recursion depth. */ | 31 | int depth; /* Remaining recursion depth. */ |
31 | } SBufExt; | 32 | } SBufExt; |
32 | 33 | ||
@@ -114,19 +115,17 @@ static LJ_AINLINE void lj_bufx_init(lua_State *L, SBufExt *sbx) | |||
114 | setsbufXL(sbx, L, SBUF_FLAG_EXT); | 115 | setsbufXL(sbx, L, SBUF_FLAG_EXT); |
115 | } | 116 | } |
116 | 117 | ||
117 | static LJ_AINLINE void lj_bufx_init_borrow(lua_State *L, SBufExt *sbx, SBuf *sb) | 118 | static LJ_AINLINE void lj_bufx_set_borrow(lua_State *L, SBufExt *sbx, SBuf *sb) |
118 | { | 119 | { |
119 | memset(sbx, 0, sizeof(SBufExt)); | ||
120 | setsbufXL(sbx, L, SBUF_FLAG_EXT | SBUF_FLAG_BORROW); | 120 | setsbufXL(sbx, L, SBUF_FLAG_EXT | SBUF_FLAG_BORROW); |
121 | setmref(sbx->bsb, sb); | 121 | setmref(sbx->bsb, sb); |
122 | sbx->r = sbx->w = sbx->b = sb->b; | 122 | sbx->r = sbx->w = sbx->b = sb->b; |
123 | sbx->e = sb->e; | 123 | sbx->e = sb->e; |
124 | } | 124 | } |
125 | 125 | ||
126 | static LJ_AINLINE void lj_bufx_init_cow(lua_State *L, SBufExt *sbx, | 126 | static LJ_AINLINE void lj_bufx_set_cow(lua_State *L, SBufExt *sbx, |
127 | const char *p, MSize len) | 127 | const char *p, MSize len) |
128 | { | 128 | { |
129 | memset(sbx, 0, sizeof(SBufExt)); | ||
130 | setsbufXL(sbx, L, SBUF_FLAG_EXT | SBUF_FLAG_COW); | 129 | setsbufXL(sbx, L, SBUF_FLAG_EXT | SBUF_FLAG_COW); |
131 | sbx->r = sbx->b = (char *)p; | 130 | sbx->r = sbx->b = (char *)p; |
132 | sbx->w = sbx->e = (char *)p + len; | 131 | sbx->w = sbx->e = (char *)p + len; |
@@ -142,9 +141,12 @@ static LJ_AINLINE void lj_bufx_reset(SBufExt *sbx) | |||
142 | sbx->r = sbx->w = sbx->b; | 141 | sbx->r = sbx->w = sbx->b; |
143 | } | 142 | } |
144 | 143 | ||
145 | static LJ_AINLINE void lj_bufx_free(global_State *g, SBufExt *sbx) | 144 | static LJ_AINLINE void lj_bufx_free(lua_State *L, SBufExt *sbx) |
146 | { | 145 | { |
147 | if (!sbufiscow(sbx)) lj_mem_free(g, sbx->b, sbufsz(sbx)); | 146 | if (!sbufiscow(sbx)) lj_mem_free(G(L), sbx->b, sbufsz(sbx)); |
147 | setsbufXL(sbx, L, SBUF_FLAG_EXT); | ||
148 | setgcrefnull(sbx->cowref); | ||
149 | sbx->r = sbx->w = sbx->b = sbx->e = NULL; | ||
148 | } | 150 | } |
149 | 151 | ||
150 | /* Low-level buffer put operations */ | 152 | /* Low-level buffer put operations */ |
diff --git a/src/lj_errmsg.h b/src/lj_errmsg.h index af4a03dd..56be4bb9 100644 --- a/src/lj_errmsg.h +++ b/src/lj_errmsg.h | |||
@@ -182,8 +182,10 @@ ERRDEF(FFI_NYICALL, "NYI: cannot call this C function (yet)") | |||
182 | 182 | ||
183 | #if LJ_HASBUFFER | 183 | #if LJ_HASBUFFER |
184 | /* String buffer errors. */ | 184 | /* String buffer errors. */ |
185 | ERRDEF(BUFFER_BADOPT, "bad options table") | ||
185 | ERRDEF(BUFFER_BADENC, "cannot serialize " LUA_QS) | 186 | ERRDEF(BUFFER_BADENC, "cannot serialize " LUA_QS) |
186 | ERRDEF(BUFFER_BADDEC, "cannot deserialize tag 0x%02x") | 187 | ERRDEF(BUFFER_BADDEC, "cannot deserialize tag 0x%02x") |
188 | ERRDEF(BUFFER_BADDICTX, "cannot deserialize dictionary index %d") | ||
187 | ERRDEF(BUFFER_DEPTH, "too deep to serialize") | 189 | ERRDEF(BUFFER_DEPTH, "too deep to serialize") |
188 | ERRDEF(BUFFER_DUPKEY, "duplicate table key") | 190 | ERRDEF(BUFFER_DUPKEY, "duplicate table key") |
189 | ERRDEF(BUFFER_EOB, "unexpected end of buffer") | 191 | ERRDEF(BUFFER_EOB, "unexpected end of buffer") |
diff --git a/src/lj_gc.c b/src/lj_gc.c index 1f382ea0..646a27b2 100644 --- a/src/lj_gc.c +++ b/src/lj_gc.c | |||
@@ -67,9 +67,10 @@ static void gc_mark(global_State *g, GCobj *o) | |||
67 | gc_markobj(g, tabref(gco2ud(o)->env)); | 67 | gc_markobj(g, tabref(gco2ud(o)->env)); |
68 | if (LJ_HASBUFFER && gco2ud(o)->udtype == UDTYPE_BUFFER) { | 68 | if (LJ_HASBUFFER && gco2ud(o)->udtype == UDTYPE_BUFFER) { |
69 | SBufExt *sbx = (SBufExt *)uddata(gco2ud(o)); | 69 | SBufExt *sbx = (SBufExt *)uddata(gco2ud(o)); |
70 | if (sbufiscow(sbx) && gcref(sbx->cowref) != NULL) { | 70 | if (sbufiscow(sbx) && gcref(sbx->cowref)) |
71 | gc_markobj(g, gcref(sbx->cowref)); | 71 | gc_markobj(g, gcref(sbx->cowref)); |
72 | } | 72 | if (gcref(sbx->dict)) |
73 | gc_markobj(g, gcref(sbx->dict)); | ||
73 | } | 74 | } |
74 | } else if (LJ_UNLIKELY(gct == ~LJ_TUPVAL)) { | 75 | } else if (LJ_UNLIKELY(gct == ~LJ_TUPVAL)) { |
75 | GCupval *uv = gco2uv(o); | 76 | GCupval *uv = gco2uv(o); |
diff --git a/src/lj_obj.h b/src/lj_obj.h index 0dae5fec..5547a79b 100644 --- a/src/lj_obj.h +++ b/src/lj_obj.h | |||
@@ -923,7 +923,7 @@ static LJ_AINLINE void setgcV(lua_State *L, TValue *o, GCobj *v, uint32_t it) | |||
923 | } | 923 | } |
924 | 924 | ||
925 | #define define_setV(name, type, tag) \ | 925 | #define define_setV(name, type, tag) \ |
926 | static LJ_AINLINE void name(lua_State *L, TValue *o, type *v) \ | 926 | static LJ_AINLINE void name(lua_State *L, TValue *o, const type *v) \ |
927 | { \ | 927 | { \ |
928 | setgcV(L, o, obj2gco(v), tag); \ | 928 | setgcV(L, o, obj2gco(v), tag); \ |
929 | } | 929 | } |
diff --git a/src/lj_serialize.c b/src/lj_serialize.c index 49a25a7c..d84ebcb8 100644 --- a/src/lj_serialize.c +++ b/src/lj_serialize.c | |||
@@ -32,7 +32,7 @@ enum { | |||
32 | SER_TAG_NUM, | 32 | SER_TAG_NUM, |
33 | SER_TAG_TAB, /* 0x08 */ | 33 | SER_TAG_TAB, /* 0x08 */ |
34 | SER_TAG_0x0e = SER_TAG_TAB+6, | 34 | SER_TAG_0x0e = SER_TAG_TAB+6, |
35 | SER_TAG_0x0f, | 35 | SER_TAG_DICT, |
36 | SER_TAG_INT64, /* 0x10 */ | 36 | SER_TAG_INT64, /* 0x10 */ |
37 | SER_TAG_UINT64, | 37 | SER_TAG_UINT64, |
38 | SER_TAG_COMPLEX, | 38 | SER_TAG_COMPLEX, |
@@ -120,6 +120,26 @@ static LJ_AINLINE char *serialize_ru124(char *r, char *w, uint32_t *pv) | |||
120 | return NULL; | 120 | return NULL; |
121 | } | 121 | } |
122 | 122 | ||
123 | /* Prepare string dictionary for use (once). */ | ||
124 | void LJ_FASTCALL lj_serialize_dict_prep(lua_State *L, GCtab *dict) | ||
125 | { | ||
126 | if (!dict->hmask) { /* No hash part means not prepared, yet. */ | ||
127 | MSize i, len = lj_tab_len(dict); | ||
128 | if (!len) return; | ||
129 | lj_tab_resize(L, dict, dict->asize, hsize2hbits(len)); | ||
130 | for (i = 1; i <= len && i < dict->asize; i++) { | ||
131 | cTValue *o = arrayslot(dict, i); | ||
132 | if (tvisstr(o)) { | ||
133 | if (!lj_tab_getstr(dict, strV(o))) { /* Ignore dups. */ | ||
134 | lj_tab_newkey(L, dict, o)->u64 = (uint64_t)(i-1); | ||
135 | } | ||
136 | } else if (!tvisfalse(o)) { | ||
137 | lj_err_caller(L, LJ_ERR_BUFFER_BADOPT); | ||
138 | } | ||
139 | } | ||
140 | } | ||
141 | } | ||
142 | |||
123 | /* -- Internal serializer ------------------------------------------------- */ | 143 | /* -- Internal serializer ------------------------------------------------- */ |
124 | 144 | ||
125 | /* Put serialized object into buffer. */ | 145 | /* Put serialized object into buffer. */ |
@@ -174,12 +194,45 @@ static char *serialize_put(char *w, SBufExt *sbx, cTValue *o) | |||
174 | } | 194 | } |
175 | if (nhash) { /* Write hash entries. */ | 195 | if (nhash) { /* Write hash entries. */ |
176 | const Node *node = noderef(t->node) + t->hmask; | 196 | const Node *node = noderef(t->node) + t->hmask; |
177 | for (;; node--) | 197 | GCtab *dict = tabref(sbx->dict); |
178 | if (!tvisnil(&node->val)) { | 198 | if (LJ_UNLIKELY(dict)) { |
179 | w = serialize_put(w, sbx, &node->key); | 199 | for (;; node--) |
180 | w = serialize_put(w, sbx, &node->val); | 200 | if (!tvisnil(&node->val)) { |
181 | if (--nhash == 0) break; | 201 | if (LJ_LIKELY(tvisstr(&node->key))) { |
182 | } | 202 | /* Inlined lj_tab_getstr is 30% faster. */ |
203 | const GCstr *str = strV(&node->key); | ||
204 | Node *n = hashstr(dict, str); | ||
205 | do { | ||
206 | if (tvisstr(&n->key) && strV(&n->key) == str) { | ||
207 | uint32_t idx = n->val.u32.lo; | ||
208 | w = serialize_more(w, sbx, 1+5); | ||
209 | *w++ = SER_TAG_DICT; | ||
210 | w = serialize_wu124(w, idx); | ||
211 | break; | ||
212 | } | ||
213 | n = nextnode(n); | ||
214 | if (!n) { | ||
215 | MSize len = str->len; | ||
216 | w = serialize_more(w, sbx, 5+len); | ||
217 | w = serialize_wu124(w, SER_TAG_STR + len); | ||
218 | w = lj_buf_wmem(w, strdata(str), len); | ||
219 | break; | ||
220 | } | ||
221 | } while (1); | ||
222 | } else { | ||
223 | w = serialize_put(w, sbx, &node->key); | ||
224 | } | ||
225 | w = serialize_put(w, sbx, &node->val); | ||
226 | if (--nhash == 0) break; | ||
227 | } | ||
228 | } else { | ||
229 | for (;; node--) | ||
230 | if (!tvisnil(&node->val)) { | ||
231 | w = serialize_put(w, sbx, &node->key); | ||
232 | w = serialize_put(w, sbx, &node->val); | ||
233 | if (--nhash == 0) break; | ||
234 | } | ||
235 | } | ||
183 | } | 236 | } |
184 | sbx->depth++; | 237 | sbx->depth++; |
185 | #if LJ_HASFFI | 238 | #if LJ_HASFFI |
@@ -266,6 +319,16 @@ static char *serialize_get(char *r, SBufExt *sbx, TValue *o) | |||
266 | if (!tvisnum(o)) setnanV(o); | 319 | if (!tvisnum(o)) setnanV(o); |
267 | } else if (tp <= SER_TAG_TRUE) { | 320 | } else if (tp <= SER_TAG_TRUE) { |
268 | setpriV(o, ~tp); | 321 | setpriV(o, ~tp); |
322 | } else if (tp == SER_TAG_DICT) { | ||
323 | GCtab *dict; | ||
324 | uint32_t idx; | ||
325 | r = serialize_ru124(r, w, &idx); | ||
326 | idx++; | ||
327 | dict = tabref(sbx->dict); | ||
328 | if (dict && idx < dict->asize && tvisstr(arrayslot(dict, idx))) | ||
329 | copyTV(sbufL(sbx), o, arrayslot(dict, idx)); | ||
330 | else | ||
331 | lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDICTX, idx); | ||
269 | } else if (tp >= SER_TAG_TAB && tp < SER_TAG_TAB+6) { | 332 | } else if (tp >= SER_TAG_TAB && tp < SER_TAG_TAB+6) { |
270 | uint32_t narray = 0, nhash = 0; | 333 | uint32_t narray = 0, nhash = 0; |
271 | GCtab *t; | 334 | GCtab *t; |
diff --git a/src/lj_serialize.h b/src/lj_serialize.h index f5617790..ccf1d63d 100644 --- a/src/lj_serialize.h +++ b/src/lj_serialize.h | |||
@@ -13,6 +13,7 @@ | |||
13 | 13 | ||
14 | #define LJ_SERIALIZE_DEPTH 100 /* Default depth. */ | 14 | #define LJ_SERIALIZE_DEPTH 100 /* Default depth. */ |
15 | 15 | ||
16 | LJ_FUNC void LJ_FASTCALL lj_serialize_dict_prep(lua_State *L, GCtab *dict); | ||
16 | LJ_FUNC SBufExt * LJ_FASTCALL lj_serialize_put(SBufExt *sbx, cTValue *o); | 17 | LJ_FUNC SBufExt * LJ_FASTCALL lj_serialize_put(SBufExt *sbx, cTValue *o); |
17 | LJ_FUNC SBufExt * LJ_FASTCALL lj_serialize_get(SBufExt *sbx, TValue *o); | 18 | LJ_FUNC SBufExt * LJ_FASTCALL lj_serialize_get(SBufExt *sbx, TValue *o); |
18 | 19 | ||
diff --git a/src/lj_tab.c b/src/lj_tab.c index 27e58f0a..ed5fd2dd 100644 --- a/src/lj_tab.c +++ b/src/lj_tab.c | |||
@@ -16,25 +16,6 @@ | |||
16 | 16 | ||
17 | /* -- Object hashing ------------------------------------------------------ */ | 17 | /* -- Object hashing ------------------------------------------------------ */ |
18 | 18 | ||
19 | /* Hash values are masked with the table hash mask and used as an index. */ | ||
20 | static LJ_AINLINE Node *hashmask(const GCtab *t, uint32_t hash) | ||
21 | { | ||
22 | Node *n = noderef(t->node); | ||
23 | return &n[hash & t->hmask]; | ||
24 | } | ||
25 | |||
26 | /* String IDs are generated when a string is interned. */ | ||
27 | #define hashstr(t, s) hashmask(t, (s)->sid) | ||
28 | |||
29 | #define hashlohi(t, lo, hi) hashmask((t), hashrot((lo), (hi))) | ||
30 | #define hashnum(t, o) hashlohi((t), (o)->u32.lo, ((o)->u32.hi << 1)) | ||
31 | #if LJ_GC64 | ||
32 | #define hashgcref(t, r) \ | ||
33 | hashlohi((t), (uint32_t)gcrefu(r), (uint32_t)(gcrefu(r) >> 32)) | ||
34 | #else | ||
35 | #define hashgcref(t, r) hashlohi((t), gcrefu(r), gcrefu(r) + HASH_BIAS) | ||
36 | #endif | ||
37 | |||
38 | /* Hash an arbitrary key and return its anchor position in the hash table. */ | 19 | /* Hash an arbitrary key and return its anchor position in the hash table. */ |
39 | static Node *hashkey(const GCtab *t, cTValue *key) | 20 | static Node *hashkey(const GCtab *t, cTValue *key) |
40 | { | 21 | { |
@@ -413,7 +394,7 @@ cTValue * LJ_FASTCALL lj_tab_getinth(GCtab *t, int32_t key) | |||
413 | return NULL; | 394 | return NULL; |
414 | } | 395 | } |
415 | 396 | ||
416 | cTValue *lj_tab_getstr(GCtab *t, GCstr *key) | 397 | cTValue *lj_tab_getstr(GCtab *t, const GCstr *key) |
417 | { | 398 | { |
418 | Node *n = hashstr(t, key); | 399 | Node *n = hashstr(t, key); |
419 | do { | 400 | do { |
@@ -546,7 +527,7 @@ TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key) | |||
546 | return lj_tab_newkey(L, t, &k); | 527 | return lj_tab_newkey(L, t, &k); |
547 | } | 528 | } |
548 | 529 | ||
549 | TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key) | 530 | TValue *lj_tab_setstr(lua_State *L, GCtab *t, const GCstr *key) |
550 | { | 531 | { |
551 | TValue k; | 532 | TValue k; |
552 | Node *n = hashstr(t, key); | 533 | Node *n = hashstr(t, key); |
diff --git a/src/lj_tab.h b/src/lj_tab.h index 97436cc0..1efa9506 100644 --- a/src/lj_tab.h +++ b/src/lj_tab.h | |||
@@ -31,6 +31,25 @@ static LJ_AINLINE uint32_t hashrot(uint32_t lo, uint32_t hi) | |||
31 | return hi; | 31 | return hi; |
32 | } | 32 | } |
33 | 33 | ||
34 | /* Hash values are masked with the table hash mask and used as an index. */ | ||
35 | static LJ_AINLINE Node *hashmask(const GCtab *t, uint32_t hash) | ||
36 | { | ||
37 | Node *n = noderef(t->node); | ||
38 | return &n[hash & t->hmask]; | ||
39 | } | ||
40 | |||
41 | /* String IDs are generated when a string is interned. */ | ||
42 | #define hashstr(t, s) hashmask(t, (s)->sid) | ||
43 | |||
44 | #define hashlohi(t, lo, hi) hashmask((t), hashrot((lo), (hi))) | ||
45 | #define hashnum(t, o) hashlohi((t), (o)->u32.lo, ((o)->u32.hi << 1)) | ||
46 | #if LJ_GC64 | ||
47 | #define hashgcref(t, r) \ | ||
48 | hashlohi((t), (uint32_t)gcrefu(r), (uint32_t)(gcrefu(r) >> 32)) | ||
49 | #else | ||
50 | #define hashgcref(t, r) hashlohi((t), gcrefu(r), gcrefu(r) + HASH_BIAS) | ||
51 | #endif | ||
52 | |||
34 | #define hsize2hbits(s) ((s) ? ((s)==1 ? 1 : 1+lj_fls((uint32_t)((s)-1))) : 0) | 53 | #define hsize2hbits(s) ((s) ? ((s)==1 ? 1 : 1+lj_fls((uint32_t)((s)-1))) : 0) |
35 | 54 | ||
36 | LJ_FUNCA GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits); | 55 | LJ_FUNCA GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits); |
@@ -50,14 +69,14 @@ LJ_FUNCA void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize); | |||
50 | /* Caveat: all getters except lj_tab_get() can return NULL! */ | 69 | /* Caveat: all getters except lj_tab_get() can return NULL! */ |
51 | 70 | ||
52 | LJ_FUNCA cTValue * LJ_FASTCALL lj_tab_getinth(GCtab *t, int32_t key); | 71 | LJ_FUNCA cTValue * LJ_FASTCALL lj_tab_getinth(GCtab *t, int32_t key); |
53 | LJ_FUNC cTValue *lj_tab_getstr(GCtab *t, GCstr *key); | 72 | LJ_FUNC cTValue *lj_tab_getstr(GCtab *t, const GCstr *key); |
54 | LJ_FUNCA cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key); | 73 | LJ_FUNCA cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key); |
55 | 74 | ||
56 | /* Caveat: all setters require a write barrier for the stored value. */ | 75 | /* Caveat: all setters require a write barrier for the stored value. */ |
57 | 76 | ||
58 | LJ_FUNCA TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key); | 77 | LJ_FUNCA TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key); |
59 | LJ_FUNCA TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key); | 78 | LJ_FUNCA TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key); |
60 | LJ_FUNC TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key); | 79 | LJ_FUNC TValue *lj_tab_setstr(lua_State *L, GCtab *t, const GCstr *key); |
61 | LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key); | 80 | LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key); |
62 | 81 | ||
63 | #define inarray(t, key) ((MSize)(key) < (MSize)(t)->asize) | 82 | #define inarray(t, key) ((MSize)(key) < (MSize)(t)->asize) |