diff options
| author | Mike Pall <mike> | 2021-06-01 05:16:32 +0200 |
|---|---|---|
| committer | Mike Pall <mike> | 2021-06-01 05:16:32 +0200 |
| commit | a119497becdf2894fb7fa737f106464309dd7947 (patch) | |
| tree | 95c7f88198ba3f12eff52fd5646207ffb31da2d5 | |
| parent | edd5cbadc5cdc7b5b66d5340ee97c5abe5a3892a (diff) | |
| download | luajit-a119497becdf2894fb7fa737f106464309dd7947.tar.gz luajit-a119497becdf2894fb7fa737f106464309dd7947.tar.bz2 luajit-a119497becdf2894fb7fa737f106464309dd7947.zip | |
String buffers, part 2d: basic string buffer methods.
Sponsored by fmad.io.
| -rw-r--r-- | doc/ext_buffer.html | 457 | ||||
| -rw-r--r-- | src/Makefile.dep | 29 | ||||
| -rw-r--r-- | src/lib_base.c | 19 | ||||
| -rw-r--r-- | src/lib_buffer.c | 278 | ||||
| -rw-r--r-- | src/lj_asm.c | 1 | ||||
| -rw-r--r-- | src/lj_buf.h | 4 | ||||
| -rw-r--r-- | src/lj_cconv.c | 3 | ||||
| -rw-r--r-- | src/lj_crecord.c | 8 | ||||
| -rw-r--r-- | src/lj_ctype.h | 1 | ||||
| -rw-r--r-- | src/lj_errmsg.h | 1 | ||||
| -rw-r--r-- | src/lj_gc.c | 6 | ||||
| -rw-r--r-- | src/lj_ir.h | 1 | ||||
| -rw-r--r-- | src/lj_lib.c | 54 | ||||
| -rw-r--r-- | src/lj_lib.h | 6 | ||||
| -rw-r--r-- | src/lj_meta.c | 13 | ||||
| -rw-r--r-- | src/lj_obj.h | 1 | ||||
| -rw-r--r-- | src/lj_serialize.c | 5 | ||||
| -rw-r--r-- | src/lj_strfmt.c | 8 |
18 files changed, 813 insertions, 82 deletions
diff --git a/doc/ext_buffer.html b/doc/ext_buffer.html index 455c298d..94af757d 100644 --- a/doc/ext_buffer.html +++ b/doc/ext_buffer.html | |||
| @@ -1,19 +1,30 @@ | |||
| 1 | <!DOCTYPE html> | 1 | <!DOCTYPE html> |
| 2 | <html> | 2 | <html> |
| 3 | <head> | 3 | <head> |
| 4 | <title>String Buffers</title> | 4 | <title>String Buffer Library</title> |
| 5 | <meta charset="utf-8"> | 5 | <meta charset="utf-8"> |
| 6 | <meta name="Copyright" content="Copyright (C) 2005-2021"> | 6 | <meta name="Copyright" content="Copyright (C) 2005-2021"> |
| 7 | <meta name="Language" content="en"> | 7 | <meta name="Language" content="en"> |
| 8 | <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> | 8 | <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> |
| 9 | <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> | 9 | <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> |
| 10 | <style type="text/css"> | ||
| 11 | .lib { | ||
| 12 | vertical-align: middle; | ||
| 13 | margin-left: 5px; | ||
| 14 | padding: 0 5px; | ||
| 15 | font-size: 60%; | ||
| 16 | border-radius: 5px; | ||
| 17 | background: #c5d5ff; | ||
| 18 | color: #000; | ||
| 19 | } | ||
| 20 | </style> | ||
| 10 | </head> | 21 | </head> |
| 11 | <body> | 22 | <body> |
| 12 | <div id="site"> | 23 | <div id="site"> |
| 13 | <a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> | 24 | <a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> |
| 14 | </div> | 25 | </div> |
| 15 | <div id="head"> | 26 | <div id="head"> |
| 16 | <h1>String Buffers</h1> | 27 | <h1>String Buffer Library</h1> |
| 17 | </div> | 28 | </div> |
| 18 | <div id="nav"> | 29 | <div id="nav"> |
| 19 | <ul><li> | 30 | <ul><li> |
| @@ -57,31 +68,35 @@ | |||
| 57 | </div> | 68 | </div> |
| 58 | <div id="main"> | 69 | <div id="main"> |
| 59 | <p> | 70 | <p> |
| 60 | |||
| 61 | The string buffer library allows <b>high-performance manipulation of | 71 | The string buffer library allows <b>high-performance manipulation of |
| 62 | string-like data</b>. | 72 | string-like data</b>. |
| 63 | |||
| 64 | </p> | 73 | </p> |
| 65 | <p> | 74 | <p> |
| 66 | |||
| 67 | Unlike Lua strings, which are constants, string buffers are | 75 | Unlike Lua strings, which are constants, string buffers are |
| 68 | <b>mutable</b> sequences of 8-bit (binary-transparent) characters. Data | 76 | <b>mutable</b> sequences of 8-bit (binary-transparent) characters. Data |
| 69 | can be stored, formatted and encoded into a string buffer and later | 77 | can be stored, formatted and encoded into a string buffer and later |
| 70 | converted, decoded or extracted. | 78 | converted, extracted or decoded. |
| 71 | |||
| 72 | </p> | 79 | </p> |
| 73 | <p> | 80 | <p> |
| 74 | |||
| 75 | The convenient string buffer API simplifies common string manipulation | 81 | The convenient string buffer API simplifies common string manipulation |
| 76 | tasks, that would otherwise require creating many intermediate strings. | 82 | tasks, that would otherwise require creating many intermediate strings. |
| 77 | String buffers improve performance by eliminating redundant memory | 83 | String buffers improve performance by eliminating redundant memory |
| 78 | copies, object creation, string interning and garbage collection | 84 | copies, object creation, string interning and garbage collection |
| 79 | overhead. In conjunction with the FFI library, they allow zero-copy | 85 | overhead. In conjunction with the FFI library, they allow zero-copy |
| 80 | operations. | 86 | operations. |
| 87 | </p> | ||
| 88 | <p> | ||
| 89 | The string buffer libary also includes a high-performance | ||
| 90 | <a href="serialize">serializer</a> for Lua objects. | ||
| 91 | </p> | ||
| 81 | 92 | ||
| 93 | <h2 id="wip" style="color:#ff0000">Work in Progress</h2> | ||
| 94 | <p> | ||
| 95 | <b style="color:#ff0000">This library is a work in progress. More | ||
| 96 | functionality will be added soon.</b> | ||
| 82 | </p> | 97 | </p> |
| 83 | 98 | ||
| 84 | <h2 id="load">Using the String Buffer Library</h2> | 99 | <h2 id="use">Using the String Buffer Library</h2> |
| 85 | <p> | 100 | <p> |
| 86 | The string buffer library is built into LuaJIT by default, but it's not | 101 | The string buffer library is built into LuaJIT by default, but it's not |
| 87 | loaded by default. Add this to the start of every Lua file that needs | 102 | loaded by default. Add this to the start of every Lua file that needs |
| @@ -90,137 +105,406 @@ one of its functions: | |||
| 90 | <pre class="code"> | 105 | <pre class="code"> |
| 91 | local buffer = require("string.buffer") | 106 | local buffer = require("string.buffer") |
| 92 | </pre> | 107 | </pre> |
| 108 | <p> | ||
| 109 | The convention for the syntax shown on this page is that <tt>buffer</tt> | ||
| 110 | refers to the buffer library and <tt>buf</tt> refers to an individual | ||
| 111 | buffer object. | ||
| 112 | </p> | ||
| 113 | <p> | ||
| 114 | Please note the difference between a Lua function call, e.g. | ||
| 115 | <tt>buffer.new()</tt> (with a dot) and a Lua method call, e.g. | ||
| 116 | <tt>buf:reset()</tt> (with a colon). | ||
| 117 | </p> | ||
| 93 | 118 | ||
| 94 | <h2 id="wip" style="color:#ff0000">Work in Progress</h2> | 119 | <h3 id="buffer_object">Buffer Objects</h3> |
| 120 | <p> | ||
| 121 | A buffer object is a garbage-collected Lua object. After creation with | ||
| 122 | <tt>buffer.new()</tt>, it can (and should) be reused for many operations. | ||
| 123 | When the last reference to a buffer object is gone, it will eventually | ||
| 124 | be freed by the garbage collector, along with the allocated buffer | ||
| 125 | space. | ||
| 126 | </p> | ||
| 127 | <p> | ||
| 128 | Buffers operate like a FIFO (first-in first-out) data structure. Data | ||
| 129 | can be appended (written) to the end of the buffer and consumed (read) | ||
| 130 | from the front of the buffer. These operations can be freely mixed. | ||
| 131 | </p> | ||
| 132 | <p> | ||
| 133 | The buffer space that holds the characters is managed automatically | ||
| 134 | — it grows as needed and already consumed space is recycled. Use | ||
| 135 | <tt>buffer.new(size)</tt> and <tt>buf:free()</tt>, if you need more | ||
| 136 | control. | ||
| 137 | </p> | ||
| 138 | <p> | ||
| 139 | The maximum size of a single buffer is the same as the maximum size of a | ||
| 140 | Lua string, which is slightly below two gigabytes. For huge data sizes, | ||
| 141 | neither strings nor buffers are the right data structure — use the | ||
| 142 | FFI library to directly map memory or files up to the virtual memory | ||
| 143 | limit of your OS. | ||
| 144 | </p> | ||
| 95 | 145 | ||
| 146 | <h3 id="buffer_overview">Buffer Method Overview</h3> | ||
| 147 | <ul> | ||
| 148 | <li> | ||
| 149 | The <tt>buf:put*()</tt>-like methods append (write) characters to the | ||
| 150 | end of the buffer. | ||
| 151 | </li> | ||
| 152 | <li> | ||
| 153 | The <tt>buf:get*()</tt>-like methods consume (read) characters from the | ||
| 154 | front of the buffer. | ||
| 155 | </li> | ||
| 156 | <li> | ||
| 157 | Other methods, like <tt>buf:tostring()</tt> only read the buffer | ||
| 158 | contents, but don't change the buffer. | ||
| 159 | </li> | ||
| 160 | <li> | ||
| 161 | The <tt>buf:set()</tt> method allows zero-copy consumption of a string | ||
| 162 | or an FFI cdata object as a buffer. | ||
| 163 | </li> | ||
| 164 | <li> | ||
| 165 | The FFI-specific methods allow zero-copy read/write-style operations or | ||
| 166 | modifying the buffer contents in-place. Please check the | ||
| 167 | <a href="#ffi_caveats">FFI caveats</a> below, too. | ||
| 168 | </li> | ||
| 169 | <li> | ||
| 170 | Methods that don't need to return anything specific, return the buffer | ||
| 171 | object itself as a convenience. This allows method chaining, e.g.: | ||
| 172 | <tt>buf:reset():encode(obj)</tt> or <tt>buf:skip(len):get()</tt> | ||
| 173 | </li> | ||
| 174 | </ul> | ||
| 175 | |||
| 176 | <h2 id="create">Buffer Creation and Management</h2> | ||
| 177 | |||
| 178 | <h3 id="buffer_new"><tt>local buf = buffer.new([size])</tt></h3> | ||
| 179 | <p> | ||
| 180 | Creates a new buffer object. | ||
| 181 | </p> | ||
| 96 | <p> | 182 | <p> |
| 183 | The optional <tt>size</tt> argument ensures a minimum initial buffer | ||
| 184 | size. This is strictly an optimization for cases where the required | ||
| 185 | buffer size is known beforehand. | ||
| 186 | </p> | ||
| 97 | 187 | ||
| 98 | <b style="color:#ff0000">This library is a work in progress. More | 188 | <h3 id="buffer_reset"><tt>buf = buf:reset()</tt></h3> |
| 99 | functions will be added soon.</b> | 189 | <p> |
| 190 | Reset (empty) the buffer. The allocated buffer space is not freed and | ||
| 191 | may be reused. | ||
| 192 | </p> | ||
| 100 | 193 | ||
| 194 | <h3 id="buffer_free"><tt>buf = buf:free()</tt></h3> | ||
| 195 | <p> | ||
| 196 | The buffer space of the buffer object is freed. The object itself | ||
| 197 | remains intact, empty and it may be reused. | ||
| 198 | </p> | ||
| 199 | <p> | ||
| 200 | Note: you normally don't need to use this method. The garbage collector | ||
| 201 | automatically frees the buffer space, when the buffer object is | ||
| 202 | collected. Use this method, if you need to free the associated memory | ||
| 203 | immediately. | ||
| 101 | </p> | 204 | </p> |
| 102 | 205 | ||
| 103 | <h2 id="serialize">Serialization of Lua Objects</h2> | 206 | <h2 id="write">Buffer Writers</h2> |
| 207 | |||
| 208 | <h3 id="buffer_put"><tt>buf = buf:put([str|num|obj] [, ...])</tt></h3> | ||
| 209 | <p> | ||
| 210 | Appends a string <tt>str</tt>, a number <tt>num</tt> or any object | ||
| 211 | <tt>obj</tt> with a <tt>__tostring</tt> metamethod to the buffer. | ||
| 212 | Multiple arguments are appended in the given order. | ||
| 213 | </p> | ||
| 214 | <p> | ||
| 215 | Appending a buffer to a buffer is possible and short-circuited | ||
| 216 | internally. But it still involves a copy. Better combine the buffer | ||
| 217 | writes to use a single buffer. | ||
| 218 | </p> | ||
| 219 | |||
| 220 | <h3 id="buffer_putf"><tt>buf = buf:putf(format, ...)</tt></h3> | ||
| 221 | <p> | ||
| 222 | Appends the formatted arguments to the buffer. The <tt>format</tt> | ||
| 223 | string supports the same options as <tt>string.format()</tt>. | ||
| 224 | </p> | ||
| 225 | |||
| 226 | <h3 id="buffer_putcdata"><tt>buf = buf:putcdata(cdata, len)</tt><span class="lib">FFI</span></h3> | ||
| 104 | <p> | 227 | <p> |
| 228 | Appends the given <tt>len</tt> number of bytes from the memory pointed | ||
| 229 | to by the FFI <tt>cdata</tt> object to the buffer. The object needs to | ||
| 230 | be convertible to a (constant) pointer. | ||
| 231 | </p> | ||
| 232 | |||
| 233 | <h3 id="buffer_set"><tt>buf = buf:set(str)<br> | ||
| 234 | buf = buf:set(cdata, len)</tt><span class="lib">FFI</span></h3> | ||
| 235 | <p> | ||
| 236 | This method allows zero-copy consumption of a string or an FFI cdata | ||
| 237 | object as a buffer. It stores a reference to the passed string | ||
| 238 | <tt>str</tt> or the FFI <tt>cdata</tt> object in the buffer. Any buffer | ||
| 239 | space originally allocated is freed. This is <i>not</i> an append | ||
| 240 | operation, unlike the <tt>buf:put*()</tt> methods. | ||
| 241 | </p> | ||
| 242 | <p> | ||
| 243 | After calling this method, the buffer behaves as if | ||
| 244 | <tt>buf:free():put(str)</tt> or <tt>buf:free():put(cdata, len)</tt> | ||
| 245 | had been called. However, the data is only referenced and not copied, as | ||
| 246 | long as the buffer is only consumed. | ||
| 247 | </p> | ||
| 248 | <p> | ||
| 249 | In case the buffer is written to later on, the referenced data is copied | ||
| 250 | and the object reference is removed (copy-on-write semantics). | ||
| 251 | </p> | ||
| 252 | <p> | ||
| 253 | The stored reference is an anchor for the garbage collector and keeps the | ||
| 254 | originally passed string or FFI cdata object alive. | ||
| 255 | </p> | ||
| 256 | |||
| 257 | <h3 id="buffer_reserve"><tt>ptr, len = buf:reserve(size)</tt><span class="lib">FFI</span><br> | ||
| 258 | <tt>buf = buf:commit(used)</tt><span class="lib">FFI</span></h3> | ||
| 259 | <p> | ||
| 260 | The <tt>reserve</tt> method reserves at least <tt>size</tt> bytes of | ||
| 261 | write space in the buffer. It returns an <tt>uint8_t *</tt> FFI | ||
| 262 | cdata pointer <tt>ptr</tt> that points to this space. | ||
| 263 | </p> | ||
| 264 | <p> | ||
| 265 | The available length in bytes is returned in <tt>len</tt>. This is at | ||
| 266 | least <tt>size</tt> bytes, but may be more to facilitate efficient | ||
| 267 | buffer growth. You can either make use of the additional space or ignore | ||
| 268 | <tt>len</tt> and only use <tt>size</tt> bytes. | ||
| 269 | </p> | ||
| 270 | <p> | ||
| 271 | The <tt>commit</tt> method appends the <tt>used</tt> bytes of the | ||
| 272 | previously returned write space to the buffer data. | ||
| 273 | </p> | ||
| 274 | <p> | ||
| 275 | This pair of methods allows zero-copy use of C read-style APIs: | ||
| 276 | </p> | ||
| 277 | <pre class="code"> | ||
| 278 | local MIN_SIZE = 65536 | ||
| 279 | repeat | ||
| 280 | local ptr, len = buf:reserve(MIN_SIZE) | ||
| 281 | local n = C.read(fd, ptr, len) | ||
| 282 | if n == 0 then break end -- EOF. | ||
| 283 | if n < 0 then error("read error") end | ||
| 284 | buf:commit(n) | ||
| 285 | until false | ||
| 286 | </pre> | ||
| 287 | <p> | ||
| 288 | The reserved write space is <i>not</i> initialized. At least the | ||
| 289 | <tt>used</tt> bytes <b>must</b> be written to before calling the | ||
| 290 | <tt>commit</tt> method. There's no need to call the <tt>commit</tt> | ||
| 291 | method, if nothing is added to the buffer (e.g. on error). | ||
| 292 | </p> | ||
| 293 | |||
| 294 | <h2 id="read">Buffer Readers</h2> | ||
| 295 | |||
| 296 | <h3 id="buffer_length"><tt>len = #buf</tt></h3> | ||
| 297 | <p> | ||
| 298 | Returns the current length of the buffer data in bytes. | ||
| 299 | </p> | ||
| 300 | |||
| 301 | <h3 id="buffer_concat"><tt>res = str|num|buf .. str|num|buf [...]</tt></h3> | ||
| 302 | <p> | ||
| 303 | The Lua concatenation operator <tt>..</tt> also accepts buffers, just | ||
| 304 | like strings or numbers. It always returns a string and not a buffer. | ||
| 305 | </p> | ||
| 306 | <p> | ||
| 307 | Note that although this is supported for convenience, this thwarts one | ||
| 308 | of the main reasons to use buffers, which is to avoid string | ||
| 309 | allocations. Rewrite it with <tt>buf:put()</tt> and <tt>buf:get()</tt>. | ||
| 310 | </p> | ||
| 311 | <p> | ||
| 312 | Mixing this with unrelated objects that have a <tt>__concat</tt> | ||
| 313 | metamethod may not work, since these probably only expect strings. | ||
| 314 | </p> | ||
| 315 | |||
| 316 | <h3 id="buffer_skip"><tt>buf = buf:skip(len)</tt></h3> | ||
| 317 | <p> | ||
| 318 | Skips (consumes) <tt>len</tt> bytes from the buffer up to the current | ||
| 319 | length of the buffer data. | ||
| 320 | </p> | ||
| 321 | |||
| 322 | <h3 id="buffer_get"><tt>str, ... = buf:get([len|nil] [,...])</tt></h3> | ||
| 323 | <p> | ||
| 324 | Consumes the buffer data and returns one or more strings. If called | ||
| 325 | without arguments, the whole buffer data is consumed. If called with a | ||
| 326 | number, up to <tt>len</tt> bytes are consumed. A <tt>nil</tt> argument | ||
| 327 | consumes the remaining buffer space (this only makes sense as the last | ||
| 328 | argument). Multiple arguments consume the buffer data in the given | ||
| 329 | order. | ||
| 330 | </p> | ||
| 331 | <p> | ||
| 332 | Note: a zero length or no remaining buffer data returns an empty string | ||
| 333 | and not <tt>nil</tt>. | ||
| 334 | </p> | ||
| 105 | 335 | ||
| 336 | <h3 id="buffer_tostring"><tt>str = buf:tostring()<br> | ||
| 337 | str = tostring(buf)</tt></h3> | ||
| 338 | <p> | ||
| 339 | Creates a string from the buffer data, but doesn't consume it. The | ||
| 340 | buffer remains unchanged. | ||
| 341 | </p> | ||
| 342 | <p> | ||
| 343 | Buffer objects also define a <tt>__tostring</tt> metamethod. This means | ||
| 344 | buffers can be passed to the global <tt>tostring()</tt> function and | ||
| 345 | many other functions that accept this in place of strings. The important | ||
| 346 | internal uses in functions like <tt>io.write()</tt> are short-circuited | ||
| 347 | to avoid the creation of an intermediate string object. | ||
| 348 | </p> | ||
| 349 | |||
| 350 | <h3 id="buffer_ref"><tt>ptr, len = buf:ref()</tt><span class="lib">FFI</span></h3> | ||
| 351 | <p> | ||
| 352 | Returns an <tt>uint8_t *</tt> FFI cdata pointer <tt>ptr</tt> that | ||
| 353 | points to the buffer data. The length of the buffer data in bytes is | ||
| 354 | returned in <tt>len</tt>. | ||
| 355 | </p> | ||
| 356 | <p> | ||
| 357 | The returned pointer can be directly passed to C functions that expect a | ||
| 358 | buffer and a length. You can also do bytewise reads | ||
| 359 | (<tt>local x = ptr[i]</tt>) or writes | ||
| 360 | (<tt>ptr[i] = 0x40</tt>) of the buffer data. | ||
| 361 | </p> | ||
| 362 | <p> | ||
| 363 | In conjunction with the <tt>skip</tt> method, this allows zero-copy use | ||
| 364 | of C write-style APIs: | ||
| 365 | </p> | ||
| 366 | <pre class="code"> | ||
| 367 | repeat | ||
| 368 | local ptr, len = buf:ref() | ||
| 369 | if len == 0 then break end | ||
| 370 | local n = C.write(fd, ptr, len) | ||
| 371 | if n < 0 then error("write error") end | ||
| 372 | buf:skip(n) | ||
| 373 | until n >= len | ||
| 374 | </pre> | ||
| 375 | <p> | ||
| 376 | Unlike Lua strings, buffer data is <i>not</i> implicitly | ||
| 377 | zero-terminated. It's not safe to pass <tt>ptr</tt> to C functions that | ||
| 378 | expect zero-terminated strings. If you're not using <tt>len</tt>, then | ||
| 379 | you're doing something wrong. | ||
| 380 | </p> | ||
| 381 | |||
| 382 | <h2 id="serialize">Serialization of Lua Objects</h2> | ||
| 383 | <p> | ||
| 106 | The following functions and methods allow <b>high-speed serialization</b> | 384 | The following functions and methods allow <b>high-speed serialization</b> |
| 107 | (encoding) of a Lua object into a string and decoding it back to a Lua | 385 | (encoding) of a Lua object into a string and decoding it back to a Lua |
| 108 | object. This allows convenient storage and transport of <b>structured | 386 | object. This allows convenient storage and transport of <b>structured |
| 109 | data</b>. | 387 | data</b>. |
| 110 | |||
| 111 | </p> | 388 | </p> |
| 112 | <p> | 389 | <p> |
| 113 | |||
| 114 | The encoded data is in an <a href="#serialize_format">internal binary | 390 | The encoded data is in an <a href="#serialize_format">internal binary |
| 115 | format</a>. The data can be stored in files, binary-transparent | 391 | format</a>. The data can be stored in files, binary-transparent |
| 116 | databases or transmitted to other LuaJIT instances across threads, | 392 | databases or transmitted to other LuaJIT instances across threads, |
| 117 | processes or networks. | 393 | processes or networks. |
| 118 | |||
| 119 | </p> | 394 | </p> |
| 120 | <p> | 395 | <p> |
| 121 | |||
| 122 | Encoding speed can reach up to 1 Gigabyte/second on a modern desktop- or | 396 | Encoding speed can reach up to 1 Gigabyte/second on a modern desktop- or |
| 123 | server-class system, even when serializing many small objects. Decoding | 397 | server-class system, even when serializing many small objects. Decoding |
| 124 | speed is mostly constrained by object creation cost. | 398 | speed is mostly constrained by object creation cost. |
| 125 | |||
| 126 | </p> | 399 | </p> |
| 127 | <p> | 400 | <p> |
| 128 | |||
| 129 | The serializer handles most Lua types, common FFI number types and | 401 | The serializer handles most Lua types, common FFI number types and |
| 130 | nested structures. Functions, thread objects, other FFI cdata, full | 402 | nested structures. Functions, thread objects, other FFI cdata, full |
| 131 | userdata and associated metatables cannot be serialized (yet). | 403 | userdata and associated metatables cannot be serialized (yet). |
| 132 | |||
| 133 | </p> | 404 | </p> |
| 134 | <p> | 405 | <p> |
| 135 | |||
| 136 | The encoder serializes nested structures as trees. Multiple references | 406 | The encoder serializes nested structures as trees. Multiple references |
| 137 | to a single object will be stored separately and create distinct objects | 407 | to a single object will be stored separately and create distinct objects |
| 138 | after decoding. Circular references cause an error. | 408 | after decoding. Circular references cause an error. |
| 139 | |||
| 140 | |||
| 141 | </p> | 409 | </p> |
| 142 | 410 | ||
| 143 | <h3 id="buffer_encode"><tt>str = buffer.encode(obj)</tt></h3> | 411 | <h3 id="serialize_methods">Serialization Functions and Methods</h3> |
| 144 | <p> | ||
| 145 | |||
| 146 | Serializes (encodes) the Lua object <tt>obj</tt> into the string | ||
| 147 | <tt>str</tt>. | ||
| 148 | 412 | ||
| 413 | <h3 id="buffer_encode"><tt>str = buffer.encode(obj)<br> | ||
| 414 | buf = buf:encode(obj)</tt></h3> | ||
| 415 | <p> | ||
| 416 | Serializes (encodes) the Lua object <tt>obj</tt>. The stand-alone | ||
| 417 | function returns a string <tt>str</tt>. The buffer method appends the | ||
| 418 | encoding to the buffer. | ||
| 149 | </p> | 419 | </p> |
| 150 | <p> | 420 | <p> |
| 151 | |||
| 152 | <tt>obj</tt> can be any of the supported Lua types — it doesn't | 421 | <tt>obj</tt> can be any of the supported Lua types — it doesn't |
| 153 | need to be a Lua table. | 422 | need to be a Lua table. |
| 154 | |||
| 155 | </p> | 423 | </p> |
| 156 | <p> | 424 | <p> |
| 157 | |||
| 158 | This function may throw an error when attempting to serialize | 425 | This function may throw an error when attempting to serialize |
| 159 | unsupported object types, circular references or deeply nested tables. | 426 | unsupported object types, circular references or deeply nested tables. |
| 160 | |||
| 161 | </p> | 427 | </p> |
| 162 | 428 | ||
| 163 | <h3 id="buffer_decode"><tt>obj = buffer.decode(str)</tt></h3> | 429 | <h3 id="buffer_decode"><tt>obj = buffer.decode(str)<br> |
| 430 | obj = buf:decode()</tt></h3> | ||
| 164 | <p> | 431 | <p> |
| 165 | 432 | The stand-alone function de-serializes (decodes) the string | |
| 166 | De-serializes (decodes) the string <tt>str</tt> into the Lua object | 433 | <tt>str</tt>, the buffer method de-serializes one object from the |
| 167 | <tt>obj</tt>. | 434 | buffer. Both return a Lua object <tt>obj</tt>. |
| 168 | |||
| 169 | </p> | 435 | </p> |
| 170 | <p> | 436 | <p> |
| 171 | |||
| 172 | The returned object may be any of the supported Lua types — | 437 | The returned object may be any of the supported Lua types — |
| 173 | even <tt>nil</tt>. | 438 | even <tt>nil</tt>. |
| 174 | |||
| 175 | </p> | 439 | </p> |
| 176 | <p> | 440 | <p> |
| 177 | |||
| 178 | This function may throw an error when fed with malformed or incomplete | 441 | This function may throw an error when fed with malformed or incomplete |
| 179 | encoded data. The standalone function throws when there's left-over data | 442 | encoded data. The stand-alone function throws when there's left-over |
| 180 | after decoding a single top-level object. | 443 | data after decoding a single top-level object. The buffer method leaves |
| 181 | 444 | any left-over data in the buffer. | |
| 182 | </p> | 445 | </p> |
| 183 | 446 | ||
| 184 | <h2 id="serialize_format">Serialization Format Specification</h2> | 447 | <h3 id="serialize_stream">Streaming Serialization</h3> |
| 448 | <p> | ||
| 449 | In some contexts, it's desirable to do piecewise serialization of large | ||
| 450 | datasets, also known as <i>streaming</i>. | ||
| 451 | </p> | ||
| 452 | <p> | ||
| 453 | This serialization format can be safely concatenated and supports streaming. | ||
| 454 | Multiple encodings can simply be appended to a buffer and later decoded | ||
| 455 | individually: | ||
| 456 | </p> | ||
| 457 | <pre class="code"> | ||
| 458 | local buf = buffer.new() | ||
| 459 | buf:encode(obj1) | ||
| 460 | buf:encode(obj2) | ||
| 461 | local copy1 = buf:decode() | ||
| 462 | local copy2 = buf:decode() | ||
| 463 | </pre> | ||
| 185 | <p> | 464 | <p> |
| 465 | Here's how to iterate over a stream: | ||
| 466 | </p> | ||
| 467 | <pre class="code"> | ||
| 468 | while #buf ~= 0 do | ||
| 469 | local obj = buf:decode() | ||
| 470 | -- Do something with obj. | ||
| 471 | end | ||
| 472 | </pre> | ||
| 473 | <p> | ||
| 474 | Since the serialization format doesn't prepend a length to its encoding, | ||
| 475 | network applications may need to transmit the length, too. | ||
| 476 | </p> | ||
| 186 | 477 | ||
| 478 | <h3 id="serialize_format">Serialization Format Specification</h3> | ||
| 479 | <p> | ||
| 187 | This serialization format is designed for <b>internal use</b> by LuaJIT | 480 | This serialization format is designed for <b>internal use</b> by LuaJIT |
| 188 | applications. Serialized data is upwards-compatible and portable across | 481 | applications. Serialized data is upwards-compatible and portable across |
| 189 | all supported LuaJIT platforms. | 482 | all supported LuaJIT platforms. |
| 190 | |||
| 191 | </p> | 483 | </p> |
| 192 | <p> | 484 | <p> |
| 193 | |||
| 194 | It's an <b>8-bit binary format</b> and not human-readable. It uses e.g. | 485 | It's an <b>8-bit binary format</b> and not human-readable. It uses e.g. |
| 195 | embedded zeroes and stores embedded Lua string objects unmodified, which | 486 | embedded zeroes and stores embedded Lua string objects unmodified, which |
| 196 | are 8-bit-clean, too. Encoded data can be safely concatenated for | 487 | are 8-bit-clean, too. Encoded data can be safely concatenated for |
| 197 | streaming and later decoded one top-level object at a time. | 488 | streaming and later decoded one top-level object at a time. |
| 198 | |||
| 199 | </p> | 489 | </p> |
| 200 | <p> | 490 | <p> |
| 201 | |||
| 202 | The encoding is reasonably compact, but tuned for maximum performance, | 491 | The encoding is reasonably compact, but tuned for maximum performance, |
| 203 | not for minimum space usage. It compresses well with any of the common | 492 | not for minimum space usage. It compresses well with any of the common |
| 204 | byte-oriented data compression algorithms. | 493 | byte-oriented data compression algorithms. |
| 205 | |||
| 206 | </p> | 494 | </p> |
| 207 | <p> | 495 | <p> |
| 208 | |||
| 209 | Although documented here for reference, this format is explicitly | 496 | Although documented here for reference, this format is explicitly |
| 210 | <b>not</b> intended to be a 'public standard' for structured data | 497 | <b>not</b> intended to be a 'public standard' for structured data |
| 211 | interchange across computer languages (like JSON or MessagePack). Please | 498 | interchange across computer languages (like JSON or MessagePack). Please |
| 212 | do not use it as such. | 499 | do not use it as such. |
| 213 | |||
| 214 | </p> | 500 | </p> |
| 215 | <p> | 501 | <p> |
| 216 | |||
| 217 | The specification is given below as a context-free grammar with a | 502 | The specification is given below as a context-free grammar with a |
| 218 | top-level <tt>object</tt> as the starting point. Alternatives are | 503 | top-level <tt>object</tt> as the starting point. Alternatives are |
| 219 | separated by the <tt>|</tt> symbol and <tt>*</tt> indicates repeats. | 504 | separated by the <tt>|</tt> symbol and <tt>*</tt> indicates repeats. |
| 220 | Grouping is implicit or indicated by <tt>{…}</tt>. Terminals are | 505 | Grouping is implicit or indicated by <tt>{…}</tt>. Terminals are |
| 221 | either plain hex numbers, encoded as bytes, or have a <tt>.format</tt> | 506 | either plain hex numbers, encoded as bytes, or have a <tt>.format</tt> |
| 222 | suffix. | 507 | suffix. |
| 223 | |||
| 224 | </p> | 508 | </p> |
| 225 | <pre> | 509 | <pre> |
| 226 | object → nil | false | true | 510 | object → nil | false | true |
| @@ -261,6 +545,73 @@ string → (0x20+len).U len*char.B | |||
| 261 | 0xe0..0x1fdf → (0xe0|(((n-0xe0)>>8)&0x1f)).B ((n-0xe0)&0xff).B | 545 | 0xe0..0x1fdf → (0xe0|(((n-0xe0)>>8)&0x1f)).B ((n-0xe0)&0xff).B |
| 262 | 0x1fe0.. → 0xff n.I | 546 | 0x1fe0.. → 0xff n.I |
| 263 | </pre> | 547 | </pre> |
| 548 | |||
| 549 | <h2 id="error">Error handling</h2> | ||
| 550 | <p> | ||
| 551 | Many of the buffer methods can throw an error. Out-of-memory or usage | ||
| 552 | errors are best caught with an outer wrapper for larger parts of code. | ||
| 553 | There's not much one can do after that, anyway. | ||
| 554 | </p> | ||
| 555 | <p> | ||
| 556 | OTOH you may want to catch some errors individually. Buffer methods need | ||
| 557 | to receive the buffer object as the first argument. The Lua colon-syntax | ||
| 558 | <tt>obj:method()</tt> does that implicitly. But to wrap a method with | ||
| 559 | <tt>pcall()</tt>, the arguments need to be passed like this: | ||
| 560 | </p> | ||
| 561 | <pre class="code"> | ||
| 562 | local ok, err = pcall(buf.encode, buf, obj) | ||
| 563 | if not ok then | ||
| 564 | -- Handle error in err. | ||
| 565 | end | ||
| 566 | </pre> | ||
| 567 | |||
| 568 | <h2 id="ffi_caveats">FFI caveats</h2> | ||
| 569 | <p> | ||
| 570 | The string buffer library has been designed to work well together with | ||
| 571 | the FFI library. But due to the low-level nature of the FFI library, | ||
| 572 | some care needs to be taken: | ||
| 573 | </p> | ||
| 574 | <p> | ||
| 575 | First, please remember that FFI pointers are zero-indexed. The space | ||
| 576 | returned by <tt>buf:reserve()</tt> and <tt>buf:ref()</tt> starts at the | ||
| 577 | returned pointer and ends before <tt>len</tt> bytes after that. | ||
| 578 | </p> | ||
| 579 | <p> | ||
| 580 | I.e. the first valid index is <tt>ptr[0]</tt> and the last valid index | ||
| 581 | is <tt>ptr[len-1]</tt>. If the returned length is zero, there's no valid | ||
| 582 | index at all. The returned pointer may even be <tt>NULL</tt>. | ||
| 583 | </p> | ||
| 584 | <p> | ||
| 585 | The space pointed to by the returned pointer is only valid as long as | ||
| 586 | the buffer is not modified in any way (neither append, nor consume, nor | ||
| 587 | reset, etc.). The pointer is also not a GC anchor for the buffer object | ||
| 588 | itself. | ||
| 589 | </p> | ||
| 590 | <p> | ||
| 591 | Buffer data is only guaranteed to be byte-aligned. Casting the returned | ||
| 592 | pointer to a data type with higher alignment may cause unaligned | ||
| 593 | accesses. It depends on the CPU architecture whether this is allowed or | ||
| 594 | not (it's always OK on x86/x64 and mostly OK on other modern | ||
| 595 | architectures). | ||
| 596 | </p> | ||
| 597 | <p> | ||
| 598 | FFI pointers or references do not count as GC anchors for an underlying | ||
| 599 | object. E.g. an <tt>array</tt> allocated with <tt>ffi.new()</tt> is | ||
| 600 | anchored by <tt>buf:set(array, len)</tt>, but not by | ||
| 601 | <tt>buf:set(array+offset, len)</tt>. The addition of the offset | ||
| 602 | creates a new pointer, even when the offset is zero. In this case, you | ||
| 603 | need to make sure there's still a reference to the original array as | ||
| 604 | long as its contents are in use by the buffer. | ||
| 605 | </p> | ||
| 606 | <p> | ||
| 607 | Even though each LuaJIT VM instance is single-threaded (but you can | ||
| 608 | create multiple VMs), FFI data structures can be accessed concurrently. | ||
| 609 | Be careful when reading/writing FFI cdata from/to buffers to avoid | ||
| 610 | concurrent accesses or modifications. In particular, the memory | ||
| 611 | referenced by <tt>buf:set(cdata, len)</tt> must not be modified | ||
| 612 | while buffer readers are working on it. Shared, but read-only memory | ||
| 613 | mappings of files are OK, but only if the file does not change. | ||
| 614 | </p> | ||
| 264 | <br class="flush"> | 615 | <br class="flush"> |
| 265 | </div> | 616 | </div> |
| 266 | <div id="foot"> | 617 | <div id="foot"> |
diff --git a/src/Makefile.dep b/src/Makefile.dep index 0bf63391..a557d44f 100644 --- a/src/Makefile.dep +++ b/src/Makefile.dep | |||
| @@ -2,17 +2,18 @@ lib_aux.o: lib_aux.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \ | |||
| 2 | lj_arch.h lj_err.h lj_errmsg.h lj_state.h lj_trace.h lj_jit.h lj_ir.h \ | 2 | lj_arch.h lj_err.h lj_errmsg.h lj_state.h lj_trace.h lj_jit.h lj_ir.h \ |
| 3 | lj_dispatch.h lj_bc.h lj_traceerr.h lj_lib.h | 3 | lj_dispatch.h lj_bc.h lj_traceerr.h lj_lib.h |
| 4 | lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ | 4 | lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ |
| 5 | lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h \ | 5 | lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h \ |
| 6 | lj_tab.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cconv.h \ | 6 | lj_str.h lj_tab.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h \ |
| 7 | lj_ff.h lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \ | 7 | lj_cconv.h lj_ff.h lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h \ |
| 8 | lj_strfmt.h lj_lib.h lj_libdef.h | 8 | lj_strscan.h lj_strfmt.h lj_lib.h lj_libdef.h |
| 9 | lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ | 9 | lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ |
| 10 | lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_strscan.h \ | 10 | lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_strscan.h \ |
| 11 | lj_strfmt.h lj_ctype.h lj_cdata.h lj_cconv.h lj_carith.h lj_ff.h \ | 11 | lj_strfmt.h lj_ctype.h lj_cdata.h lj_cconv.h lj_carith.h lj_ff.h \ |
| 12 | lj_ffdef.h lj_lib.h lj_libdef.h | 12 | lj_ffdef.h lj_lib.h lj_libdef.h |
| 13 | lib_buffer.o: lib_buffer.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ | 13 | lib_buffer.o: lib_buffer.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ |
| 14 | lj_def.h lj_arch.h lj_gc.h lj_buf.h lj_str.h lj_serialize.h lj_lib.h \ | 14 | lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \ |
| 15 | lj_libdef.h | 15 | lj_tab.h lj_udata.h lj_meta.h lj_ctype.h lj_cdata.h lj_cconv.h \ |
| 16 | lj_strfmt.h lj_serialize.h lj_lib.h lj_libdef.h | ||
| 16 | lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ | 17 | lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ |
| 17 | lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_lib.h \ | 18 | lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_lib.h \ |
| 18 | lj_libdef.h | 19 | lj_libdef.h |
| @@ -51,10 +52,10 @@ lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | |||
| 51 | lj_meta.h lj_state.h lj_bc.h lj_frame.h lj_trace.h lj_jit.h lj_ir.h \ | 52 | lj_meta.h lj_state.h lj_bc.h lj_frame.h lj_trace.h lj_jit.h lj_ir.h \ |
| 52 | lj_dispatch.h lj_traceerr.h lj_vm.h lj_strscan.h lj_strfmt.h | 53 | lj_dispatch.h lj_traceerr.h lj_vm.h lj_strscan.h lj_strfmt.h |
| 53 | lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | 54 | lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ |
| 54 | lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h \ | 55 | lj_buf.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h \ |
| 55 | lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h lj_traceerr.h \ | 56 | lj_jit.h lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h \ |
| 56 | lj_snap.h lj_asm.h lj_vm.h lj_target.h lj_target_*.h lj_emit_*.h \ | 57 | lj_traceerr.h lj_snap.h lj_asm.h lj_vm.h lj_target.h lj_target_*.h \ |
| 57 | lj_asm_*.h | 58 | lj_emit_*.h lj_asm_*.h |
| 58 | lj_assert.o: lj_assert.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h | 59 | lj_assert.o: lj_assert.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h |
| 59 | lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \ | 60 | lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \ |
| 60 | lj_bcdef.h | 61 | lj_bcdef.h |
| @@ -80,8 +81,8 @@ lj_ccallback.o: lj_ccallback.c lj_obj.h lua.h luaconf.h lj_def.h \ | |||
| 80 | lj_target_*.h lj_mcode.h lj_jit.h lj_ir.h lj_trace.h lj_dispatch.h \ | 81 | lj_target_*.h lj_mcode.h lj_jit.h lj_ir.h lj_trace.h lj_dispatch.h \ |
| 81 | lj_traceerr.h lj_vm.h | 82 | lj_traceerr.h lj_vm.h |
| 82 | lj_cconv.o: lj_cconv.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 83 | lj_cconv.o: lj_cconv.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
| 83 | lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_gc.h lj_cdata.h lj_cconv.h \ | 84 | lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_ctype.h \ |
| 84 | lj_ccallback.h | 85 | lj_cdata.h lj_cconv.h lj_ccallback.h |
| 85 | lj_cdata.o: lj_cdata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 86 | lj_cdata.o: lj_cdata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
| 86 | lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_cconv.h lj_cdata.h | 87 | lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_cconv.h lj_cdata.h |
| 87 | lj_char.o: lj_char.c lj_char.h lj_def.h lua.h luaconf.h | 88 | lj_char.o: lj_char.c lj_char.h lj_def.h lua.h luaconf.h |
| @@ -137,8 +138,8 @@ lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | |||
| 137 | lj_strfmt.h | 138 | lj_strfmt.h |
| 138 | lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \ | 139 | lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \ |
| 139 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \ | 140 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \ |
| 140 | lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lex.h \ | 141 | lj_dispatch.h lj_jit.h lj_ir.h lj_ctype.h lj_vm.h lj_strscan.h \ |
| 141 | lj_bcdump.h lj_lib.h | 142 | lj_strfmt.h lj_lex.h lj_bcdump.h lj_lib.h |
| 142 | lj_load.o: lj_load.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \ | 143 | lj_load.o: lj_load.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \ |
| 143 | lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_func.h \ | 144 | lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_func.h \ |
| 144 | lj_frame.h lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h | 145 | lj_frame.h lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h |
diff --git a/src/lib_base.c b/src/lib_base.c index cb2e244e..1c8816f0 100644 --- a/src/lib_base.c +++ b/src/lib_base.c | |||
| @@ -19,6 +19,7 @@ | |||
| 19 | #include "lj_gc.h" | 19 | #include "lj_gc.h" |
| 20 | #include "lj_err.h" | 20 | #include "lj_err.h" |
| 21 | #include "lj_debug.h" | 21 | #include "lj_debug.h" |
| 22 | #include "lj_buf.h" | ||
| 22 | #include "lj_str.h" | 23 | #include "lj_str.h" |
| 23 | #include "lj_tab.h" | 24 | #include "lj_tab.h" |
| 24 | #include "lj_meta.h" | 25 | #include "lj_meta.h" |
| @@ -406,10 +407,22 @@ LJLIB_CF(load) | |||
| 406 | GCstr *name = lj_lib_optstr(L, 2); | 407 | GCstr *name = lj_lib_optstr(L, 2); |
| 407 | GCstr *mode = lj_lib_optstr(L, 3); | 408 | GCstr *mode = lj_lib_optstr(L, 3); |
| 408 | int status; | 409 | int status; |
| 409 | if (L->base < L->top && (tvisstr(L->base) || tvisnumber(L->base))) { | 410 | if (L->base < L->top && |
| 410 | GCstr *s = lj_lib_checkstr(L, 1); | 411 | (tvisstr(L->base) || tvisnumber(L->base) || tvisbuf(L->base))) { |
| 412 | const char *s; | ||
| 413 | MSize len; | ||
| 414 | if (tvisbuf(L->base)) { | ||
| 415 | SBufExt *sbx = bufV(L->base); | ||
| 416 | s = sbx->r; | ||
| 417 | len = sbufxlen(sbx); | ||
| 418 | if (!name) name = &G(L)->strempty; /* Buffers are not NUL-terminated. */ | ||
| 419 | } else { | ||
| 420 | GCstr *str = lj_lib_checkstr(L, 1); | ||
| 421 | s = strdata(str); | ||
| 422 | len = str->len; | ||
| 423 | } | ||
| 411 | lua_settop(L, 4); /* Ensure env arg exists. */ | 424 | lua_settop(L, 4); /* Ensure env arg exists. */ |
| 412 | status = luaL_loadbufferx(L, strdata(s), s->len, strdata(name ? name : s), | 425 | status = luaL_loadbufferx(L, s, len, name ? strdata(name) : s, |
| 413 | mode ? strdata(mode) : NULL); | 426 | mode ? strdata(mode) : NULL); |
| 414 | } else { | 427 | } else { |
| 415 | lj_lib_checkfunc(L, 1); | 428 | lj_lib_checkfunc(L, 1); |
diff --git a/src/lib_buffer.c b/src/lib_buffer.c index c9ef9510..78c4eeb9 100644 --- a/src/lib_buffer.c +++ b/src/lib_buffer.c | |||
| @@ -14,14 +14,286 @@ | |||
| 14 | 14 | ||
| 15 | #if LJ_HASBUFFER | 15 | #if LJ_HASBUFFER |
| 16 | #include "lj_gc.h" | 16 | #include "lj_gc.h" |
| 17 | #include "lj_err.h" | ||
| 17 | #include "lj_buf.h" | 18 | #include "lj_buf.h" |
| 19 | #include "lj_str.h" | ||
| 20 | #include "lj_tab.h" | ||
| 21 | #include "lj_udata.h" | ||
| 22 | #include "lj_meta.h" | ||
| 23 | #if LJ_HASFFI | ||
| 24 | #include "lj_ctype.h" | ||
| 25 | #include "lj_cdata.h" | ||
| 26 | #include "lj_cconv.h" | ||
| 27 | #endif | ||
| 28 | #include "lj_strfmt.h" | ||
| 18 | #include "lj_serialize.h" | 29 | #include "lj_serialize.h" |
| 19 | #include "lj_lib.h" | 30 | #include "lj_lib.h" |
| 20 | 31 | ||
| 21 | /* ------------------------------------------------------------------------ */ | 32 | /* ------------------------------------------------------------------------ */ |
| 22 | 33 | ||
| 34 | #define LJLIB_MODULE_buffer_method | ||
| 35 | |||
| 36 | /* Check that the first argument is a string buffer. */ | ||
| 37 | static SBufExt *buffer_tobuf(lua_State *L) | ||
| 38 | { | ||
| 39 | if (!(L->base < L->top && tvisbuf(L->base))) | ||
| 40 | lj_err_argtype(L, 1, "buffer"); | ||
| 41 | return bufV(L->base); | ||
| 42 | } | ||
| 43 | |||
| 44 | /* Ditto, but for writers. */ | ||
| 45 | static LJ_AINLINE SBufExt *buffer_tobufw(lua_State *L) | ||
| 46 | { | ||
| 47 | SBufExt *sbx = buffer_tobuf(L); | ||
| 48 | setsbufXL_(sbx, L); | ||
| 49 | return sbx; | ||
| 50 | } | ||
| 51 | |||
| 52 | LJLIB_CF(buffer_method_free) | ||
| 53 | { | ||
| 54 | SBufExt *sbx = buffer_tobuf(L); | ||
| 55 | lj_bufx_free(G(L), sbx); | ||
| 56 | lj_bufx_init(L, sbx); | ||
| 57 | L->top = L->base+1; /* Chain buffer object. */ | ||
| 58 | return 1; | ||
| 59 | } | ||
| 60 | |||
| 61 | LJLIB_CF(buffer_method_reset) | ||
| 62 | { | ||
| 63 | SBufExt *sbx = buffer_tobuf(L); | ||
| 64 | lj_bufx_reset(sbx); | ||
| 65 | L->top = L->base+1; /* Chain buffer object. */ | ||
| 66 | return 1; | ||
| 67 | } | ||
| 68 | |||
| 69 | LJLIB_CF(buffer_method_skip) | ||
| 70 | { | ||
| 71 | SBufExt *sbx = buffer_tobuf(L); | ||
| 72 | MSize n = (MSize)lj_lib_checkintrange(L, 2, 0, LJ_MAX_BUF); | ||
| 73 | MSize len = sbufxlen(sbx); | ||
| 74 | if (n < len) { | ||
| 75 | sbx->r += n; | ||
| 76 | } else { | ||
| 77 | sbx->r = sbx->w = sbx->b; | ||
| 78 | } | ||
| 79 | L->top = L->base+1; /* Chain buffer object. */ | ||
| 80 | return 1; | ||
| 81 | } | ||
| 82 | |||
| 83 | LJLIB_CF(buffer_method_set) | ||
| 84 | { | ||
| 85 | SBufExt *sbx = buffer_tobuf(L); | ||
| 86 | const char *p; | ||
| 87 | MSize len; | ||
| 88 | #if LJ_HASFFI | ||
| 89 | if (tviscdata(L->base+1)) { | ||
| 90 | CTState *cts = ctype_cts(L); | ||
| 91 | lj_cconv_ct_tv(cts, ctype_get(cts, CTID_P_CVOID), (uint8_t *)&p, | ||
| 92 | L->base+1, CCF_ARG(2)); | ||
| 93 | len = (MSize)lj_lib_checkintrange(L, 3, 0, LJ_MAX_BUF); | ||
| 94 | } else | ||
| 95 | #endif | ||
| 96 | { | ||
| 97 | GCstr *str = lj_lib_checkstrx(L, 2); | ||
| 98 | p = strdata(str); | ||
| 99 | len = str->len; | ||
| 100 | } | ||
| 101 | lj_bufx_free(G(L), sbx); | ||
| 102 | lj_bufx_init_cow(L, sbx, p, len); | ||
| 103 | setgcref(sbx->cowref, gcV(L->base+1)); | ||
| 104 | L->top = L->base+1; /* Chain buffer object. */ | ||
| 105 | return 1; | ||
| 106 | } | ||
| 107 | |||
| 108 | LJLIB_CF(buffer_method_put) | ||
| 109 | { | ||
| 110 | SBufExt *sbx = buffer_tobufw(L); | ||
| 111 | ptrdiff_t arg, narg = L->top - L->base; | ||
| 112 | for (arg = 1; arg < narg; arg++) { | ||
| 113 | cTValue *o = &L->base[arg], *mo = NULL; | ||
| 114 | retry: | ||
| 115 | if (tvisstr(o)) { | ||
| 116 | lj_buf_putstr((SBuf *)sbx, strV(o)); | ||
| 117 | } else if (tvisint(o)) { | ||
| 118 | lj_strfmt_putint((SBuf *)sbx, intV(o)); | ||
| 119 | } else if (tvisnum(o)) { | ||
| 120 | lj_strfmt_putfnum((SBuf *)sbx, STRFMT_G14, numV(o)); | ||
| 121 | } else if (tvisbuf(o)) { | ||
| 122 | SBufExt *sbx2 = bufV(o); | ||
| 123 | lj_buf_putmem((SBuf *)sbx, sbx2->r, sbufxlen(sbx2)); | ||
| 124 | } else if (!mo && !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { | ||
| 125 | /* Call __tostring metamethod inline. */ | ||
| 126 | copyTV(L, L->top++, mo); | ||
| 127 | copyTV(L, L->top++, o); | ||
| 128 | lua_call(L, 1, 1); | ||
| 129 | o = &L->base[arg]; /* The stack may have been reallocated. */ | ||
| 130 | copyTV(L, &L->base[arg], L->top-1); | ||
| 131 | L->top = L->base + narg; | ||
| 132 | goto retry; /* Retry with the result. */ | ||
| 133 | } else { | ||
| 134 | lj_err_argtype(L, arg+1, "string/number/__tostring"); | ||
| 135 | } | ||
| 136 | /* Probably not useful to inline other __tostring MMs, e.g. FFI numbers. */ | ||
| 137 | } | ||
| 138 | L->top = L->base+1; /* Chain buffer object. */ | ||
| 139 | lj_gc_check(L); | ||
| 140 | return 1; | ||
| 141 | } | ||
| 142 | |||
| 143 | LJLIB_CF(buffer_method_putf) | ||
| 144 | { | ||
| 145 | SBufExt *sbx = buffer_tobufw(L); | ||
| 146 | lj_strfmt_putarg(L, (SBuf *)sbx, 2, 2); | ||
| 147 | L->top = L->base+1; /* Chain buffer object. */ | ||
| 148 | lj_gc_check(L); | ||
| 149 | return 1; | ||
| 150 | } | ||
| 151 | |||
| 152 | LJLIB_CF(buffer_method_get) | ||
| 153 | { | ||
| 154 | SBufExt *sbx = buffer_tobuf(L); | ||
| 155 | ptrdiff_t arg, narg = L->top - L->base; | ||
| 156 | if (narg == 1) { | ||
| 157 | narg++; | ||
| 158 | setnilV(L->top++); /* get() is the same as get(nil). */ | ||
| 159 | } | ||
| 160 | for (arg = 1; arg < narg; arg++) { | ||
| 161 | TValue *o = &L->base[arg]; | ||
| 162 | MSize n = tvisnil(o) ? LJ_MAX_BUF : | ||
| 163 | (MSize) lj_lib_checkintrange(L, arg+1, 0, LJ_MAX_BUF); | ||
| 164 | MSize len = sbufxlen(sbx); | ||
| 165 | if (n > len) n = len; | ||
| 166 | setstrV(L, o, lj_str_new(L, sbx->r, n)); | ||
| 167 | sbx->r += n; | ||
| 168 | } | ||
| 169 | if (sbx->r == sbx->w) sbx->r = sbx->w = sbx->b; | ||
| 170 | lj_gc_check(L); | ||
| 171 | return narg-1; | ||
| 172 | } | ||
| 173 | |||
| 174 | #if LJ_HASFFI | ||
| 175 | LJLIB_CF(buffer_method_putcdata) | ||
| 176 | { | ||
| 177 | SBufExt *sbx = buffer_tobufw(L); | ||
| 178 | const char *p; | ||
| 179 | MSize len; | ||
| 180 | if (tviscdata(L->base+1)) { | ||
| 181 | CTState *cts = ctype_cts(L); | ||
| 182 | lj_cconv_ct_tv(cts, ctype_get(cts, CTID_P_CVOID), (uint8_t *)&p, | ||
| 183 | L->base+1, CCF_ARG(2)); | ||
| 184 | } else { | ||
| 185 | lj_err_argtype(L, 2, "cdata"); | ||
| 186 | } | ||
| 187 | len = (MSize)lj_lib_checkintrange(L, 3, 0, LJ_MAX_BUF); | ||
| 188 | lj_buf_putmem((SBuf *)sbx, p, len); | ||
| 189 | L->top = L->base+1; /* Chain buffer object. */ | ||
| 190 | return 1; | ||
| 191 | } | ||
| 192 | |||
| 193 | LJLIB_CF(buffer_method_reserve) | ||
| 194 | { | ||
| 195 | SBufExt *sbx = buffer_tobufw(L); | ||
| 196 | MSize len = (MSize)lj_lib_checkintrange(L, 2, 0, LJ_MAX_BUF); | ||
| 197 | GCcdata *cd; | ||
| 198 | lj_buf_more((SBuf *)sbx, len); | ||
| 199 | ctype_loadffi(L); | ||
| 200 | cd = lj_cdata_new_(L, CTID_P_UINT8, CTSIZE_PTR); | ||
| 201 | *(void **)cdataptr(cd) = sbx->w; | ||
| 202 | setcdataV(L, L->top++, cd); | ||
| 203 | setintV(L->top++, sbufleft(sbx)); | ||
| 204 | return 2; | ||
| 205 | } | ||
| 206 | |||
| 207 | LJLIB_CF(buffer_method_commit) | ||
| 208 | { | ||
| 209 | SBufExt *sbx = buffer_tobuf(L); | ||
| 210 | MSize len = (MSize)lj_lib_checkintrange(L, 2, 0, LJ_MAX_BUF); | ||
| 211 | if (len > sbufleft(sbx)) lj_err_arg(L, 2, LJ_ERR_NUMRNG); | ||
| 212 | sbx->w += len; | ||
| 213 | L->top = L->base+1; /* Chain buffer object. */ | ||
| 214 | return 1; | ||
| 215 | } | ||
| 216 | |||
| 217 | LJLIB_CF(buffer_method_ref) | ||
| 218 | { | ||
| 219 | SBufExt *sbx = buffer_tobuf(L); | ||
| 220 | GCcdata *cd; | ||
| 221 | ctype_loadffi(L); | ||
| 222 | cd = lj_cdata_new_(L, CTID_P_UINT8, CTSIZE_PTR); | ||
| 223 | *(void **)cdataptr(cd) = sbx->r; | ||
| 224 | setcdataV(L, L->top++, cd); | ||
| 225 | setintV(L->top++, sbufxlen(sbx)); | ||
| 226 | return 2; | ||
| 227 | } | ||
| 228 | #endif | ||
| 229 | |||
| 230 | LJLIB_CF(buffer_method_encode) | ||
| 231 | { | ||
| 232 | SBufExt *sbx = buffer_tobufw(L); | ||
| 233 | cTValue *o = lj_lib_checkany(L, 2); | ||
| 234 | lj_serialize_put(sbx, o); | ||
| 235 | lj_gc_check(L); | ||
| 236 | L->top = L->base+1; /* Chain buffer object. */ | ||
| 237 | return 1; | ||
| 238 | } | ||
| 239 | |||
| 240 | LJLIB_CF(buffer_method_decode) | ||
| 241 | { | ||
| 242 | SBufExt *sbx = buffer_tobufw(L); | ||
| 243 | setnilV(L->top++); | ||
| 244 | lj_serialize_get(sbx, L->top-1); | ||
| 245 | lj_gc_check(L); | ||
| 246 | return 1; | ||
| 247 | } | ||
| 248 | |||
| 249 | LJLIB_CF(buffer_method___gc) | ||
| 250 | { | ||
| 251 | SBufExt *sbx = buffer_tobuf(L); | ||
| 252 | lj_bufx_free(G(L), sbx); | ||
| 253 | lj_bufx_init(L, sbx); | ||
| 254 | return 0; | ||
| 255 | } | ||
| 256 | |||
| 257 | LJLIB_CF(buffer_method___tostring) | ||
| 258 | { | ||
| 259 | SBufExt *sbx = buffer_tobuf(L); | ||
| 260 | setstrV(L, L->top-1, lj_str_new(L, sbx->r, sbufxlen(sbx))); | ||
| 261 | lj_gc_check(L); | ||
| 262 | return 1; | ||
| 263 | } | ||
| 264 | |||
| 265 | LJLIB_CF(buffer_method___len) | ||
| 266 | { | ||
| 267 | SBufExt *sbx = buffer_tobuf(L); | ||
| 268 | setintV(L->top-1, (int32_t)sbufxlen(sbx)); | ||
| 269 | return 1; | ||
| 270 | } | ||
| 271 | |||
| 272 | LJLIB_PUSH("buffer") LJLIB_SET(__metatable) | ||
| 273 | LJLIB_PUSH(top-1) LJLIB_SET(__index) | ||
| 274 | |||
| 275 | /* ------------------------------------------------------------------------ */ | ||
| 276 | |||
| 23 | #define LJLIB_MODULE_buffer | 277 | #define LJLIB_MODULE_buffer |
| 24 | 278 | ||
| 279 | LJLIB_PUSH(top-2) LJLIB_SET(!) /* Set environment. */ | ||
| 280 | |||
| 281 | LJLIB_CF(buffer_new) | ||
| 282 | { | ||
| 283 | MSize sz = L->base == L->top ? 0u : | ||
| 284 | (MSize)lj_lib_checkintrange(L, 1, 0, LJ_MAX_BUF); | ||
| 285 | GCtab *env = tabref(curr_func(L)->c.env); | ||
| 286 | GCudata *ud = lj_udata_new(L, sizeof(SBufExt), env); | ||
| 287 | SBufExt *sbx = (SBufExt *)uddata(ud); | ||
| 288 | ud->udtype = UDTYPE_BUFFER; | ||
| 289 | /* NOBARRIER: The GCudata is new (marked white). */ | ||
| 290 | setgcref(ud->metatable, obj2gco(env)); | ||
| 291 | setudataV(L, L->top++, ud); | ||
| 292 | lj_bufx_init(L, sbx); | ||
| 293 | if (sz > 0) lj_buf_need2((SBuf *)sbx, sz); | ||
| 294 | return 1; | ||
| 295 | } | ||
| 296 | |||
| 25 | LJLIB_CF(buffer_encode) | 297 | LJLIB_CF(buffer_encode) |
| 26 | { | 298 | { |
| 27 | cTValue *o = lj_lib_checkany(L, 1); | 299 | cTValue *o = lj_lib_checkany(L, 1); |
| @@ -35,13 +307,14 @@ LJLIB_CF(buffer_encode) | |||
| 35 | 307 | ||
| 36 | LJLIB_CF(buffer_decode) | 308 | LJLIB_CF(buffer_decode) |
| 37 | { | 309 | { |
| 38 | GCstr *str = lj_lib_checkstr(L, 1); | 310 | GCstr *str = lj_lib_checkstrx(L, 1); |
| 39 | SBufExt sbx; | 311 | SBufExt sbx; |
| 40 | lj_bufx_init_cow(L, &sbx, strdata(str), str->len); | 312 | lj_bufx_init_cow(L, &sbx, strdata(str), str->len); |
| 41 | /* No need to set sbx.cowref here. */ | 313 | /* No need to set sbx.cowref here. */ |
| 42 | setnilV(L->top++); | 314 | setnilV(L->top++); |
| 43 | lj_serialize_get(&sbx, L->top-1); | 315 | lj_serialize_get(&sbx, L->top-1); |
| 44 | lj_gc_check(L); | 316 | lj_gc_check(L); |
| 317 | if (sbx.r != sbx.w) lj_err_caller(L, LJ_ERR_BUFFER_LEFTOV); | ||
| 45 | return 1; | 318 | return 1; |
| 46 | } | 319 | } |
| 47 | 320 | ||
| @@ -51,6 +324,9 @@ LJLIB_CF(buffer_decode) | |||
| 51 | 324 | ||
| 52 | int luaopen_string_buffer(lua_State *L) | 325 | int luaopen_string_buffer(lua_State *L) |
| 53 | { | 326 | { |
| 327 | LJ_LIB_REG(L, NULL, buffer_method); | ||
| 328 | lua_getfield(L, -1, "__tostring"); | ||
| 329 | lua_setfield(L, -2, "tostring"); | ||
| 54 | LJ_LIB_REG(L, NULL, buffer); | 330 | LJ_LIB_REG(L, NULL, buffer); |
| 55 | return 1; | 331 | return 1; |
| 56 | } | 332 | } |
diff --git a/src/lj_asm.c b/src/lj_asm.c index 286756c6..0e159e52 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #if LJ_HASJIT | 11 | #if LJ_HASJIT |
| 12 | 12 | ||
| 13 | #include "lj_gc.h" | 13 | #include "lj_gc.h" |
| 14 | #include "lj_buf.h" | ||
| 14 | #include "lj_str.h" | 15 | #include "lj_str.h" |
| 15 | #include "lj_tab.h" | 16 | #include "lj_tab.h" |
| 16 | #include "lj_frame.h" | 17 | #include "lj_frame.h" |
diff --git a/src/lj_buf.h b/src/lj_buf.h index 1fb70146..02f0ac61 100644 --- a/src/lj_buf.h +++ b/src/lj_buf.h | |||
| @@ -58,6 +58,10 @@ typedef struct SBufExt { | |||
| 58 | (lj_assertG_(G(sbufL(sb)), sbufisext(sb), "not an SBufExt"), (SBufExt *)(sb)) | 58 | (lj_assertG_(G(sbufL(sb)), sbufisext(sb), "not an SBufExt"), (SBufExt *)(sb)) |
| 59 | #define setsbufflag(sb, flag) (setmrefu((sb)->L, (flag))) | 59 | #define setsbufflag(sb, flag) (setmrefu((sb)->L, (flag))) |
| 60 | 60 | ||
| 61 | #define tvisbuf(o) \ | ||
| 62 | (LJ_HASBUFFER && tvisudata(o) && udataV(o)->udtype == UDTYPE_BUFFER) | ||
| 63 | #define bufV(o) check_exp(tvisbuf(o), ((SBufExt *)uddata(udataV(o)))) | ||
| 64 | |||
| 61 | /* Buffer management */ | 65 | /* Buffer management */ |
| 62 | LJ_FUNC char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz); | 66 | LJ_FUNC char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz); |
| 63 | LJ_FUNC char *LJ_FASTCALL lj_buf_more2(SBuf *sb, MSize sz); | 67 | LJ_FUNC char *LJ_FASTCALL lj_buf_more2(SBuf *sb, MSize sz); |
diff --git a/src/lj_cconv.c b/src/lj_cconv.c index f948002c..613f66e2 100644 --- a/src/lj_cconv.c +++ b/src/lj_cconv.c | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #if LJ_HASFFI | 8 | #if LJ_HASFFI |
| 9 | 9 | ||
| 10 | #include "lj_err.h" | 10 | #include "lj_err.h" |
| 11 | #include "lj_buf.h" | ||
| 11 | #include "lj_tab.h" | 12 | #include "lj_tab.h" |
| 12 | #include "lj_ctype.h" | 13 | #include "lj_ctype.h" |
| 13 | #include "lj_cdata.h" | 14 | #include "lj_cdata.h" |
| @@ -621,6 +622,8 @@ void lj_cconv_ct_tv(CTState *cts, CType *d, | |||
| 621 | tmpptr = uddata(ud); | 622 | tmpptr = uddata(ud); |
| 622 | if (ud->udtype == UDTYPE_IO_FILE) | 623 | if (ud->udtype == UDTYPE_IO_FILE) |
| 623 | tmpptr = *(void **)tmpptr; | 624 | tmpptr = *(void **)tmpptr; |
| 625 | else if (ud->udtype == UDTYPE_BUFFER) | ||
| 626 | tmpptr = ((SBufExt *)tmpptr)->r; | ||
| 624 | } else if (tvislightud(o)) { | 627 | } else if (tvislightud(o)) { |
| 625 | tmpptr = lightudV(cts->g, o); | 628 | tmpptr = lightudV(cts->g, o); |
| 626 | } else if (tvisfunc(o)) { | 629 | } else if (tvisfunc(o)) { |
diff --git a/src/lj_crecord.c b/src/lj_crecord.c index be23cd62..b0de5423 100644 --- a/src/lj_crecord.c +++ b/src/lj_crecord.c | |||
| @@ -616,10 +616,12 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, cTValue *sval) | |||
| 616 | sp = lj_ir_kptr(J, NULL); | 616 | sp = lj_ir_kptr(J, NULL); |
| 617 | } else if (tref_isudata(sp)) { | 617 | } else if (tref_isudata(sp)) { |
| 618 | GCudata *ud = udataV(sval); | 618 | GCudata *ud = udataV(sval); |
| 619 | if (ud->udtype == UDTYPE_IO_FILE) { | 619 | if (ud->udtype == UDTYPE_IO_FILE || ud->udtype == UDTYPE_BUFFER) { |
| 620 | TRef tr = emitir(IRT(IR_FLOAD, IRT_U8), sp, IRFL_UDATA_UDTYPE); | 620 | TRef tr = emitir(IRT(IR_FLOAD, IRT_U8), sp, IRFL_UDATA_UDTYPE); |
| 621 | emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, UDTYPE_IO_FILE)); | 621 | emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, ud->udtype)); |
| 622 | sp = emitir(IRT(IR_FLOAD, IRT_PTR), sp, IRFL_UDATA_FILE); | 622 | sp = emitir(IRT(IR_FLOAD, IRT_PTR), sp, |
| 623 | ud->udtype == UDTYPE_IO_FILE ? IRFL_UDATA_FILE : | ||
| 624 | IRFL_UDATA_BUF_R); | ||
| 623 | } else { | 625 | } else { |
| 624 | sp = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, sizeof(GCudata))); | 626 | sp = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, sizeof(GCudata))); |
| 625 | } | 627 | } |
diff --git a/src/lj_ctype.h b/src/lj_ctype.h index 9589ef2a..700250df 100644 --- a/src/lj_ctype.h +++ b/src/lj_ctype.h | |||
| @@ -298,6 +298,7 @@ typedef struct CTState { | |||
| 298 | _(P_VOID, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_VOID) \ | 298 | _(P_VOID, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_VOID) \ |
| 299 | _(P_CVOID, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_CVOID) \ | 299 | _(P_CVOID, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_CVOID) \ |
| 300 | _(P_CCHAR, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_CCHAR) \ | 300 | _(P_CCHAR, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_CCHAR) \ |
| 301 | _(P_UINT8, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_UINT8) \ | ||
| 301 | _(A_CCHAR, -1, CT_ARRAY, CTF_CONST|CTALIGN(0)|CTID_CCHAR) \ | 302 | _(A_CCHAR, -1, CT_ARRAY, CTF_CONST|CTALIGN(0)|CTID_CCHAR) \ |
| 302 | _(CTYPEID, 4, CT_ENUM, CTALIGN(2)|CTID_INT32) \ | 303 | _(CTYPEID, 4, CT_ENUM, CTALIGN(2)|CTID_INT32) \ |
| 303 | CTTYDEFP(_) \ | 304 | CTTYDEFP(_) \ |
diff --git a/src/lj_errmsg.h b/src/lj_errmsg.h index a6f638ce..af4a03dd 100644 --- a/src/lj_errmsg.h +++ b/src/lj_errmsg.h | |||
| @@ -67,6 +67,7 @@ ERRDEF(PROTMT, "cannot change a protected metatable") | |||
| 67 | ERRDEF(UNPACK, "too many results to unpack") | 67 | ERRDEF(UNPACK, "too many results to unpack") |
| 68 | ERRDEF(RDRSTR, "reader function must return a string") | 68 | ERRDEF(RDRSTR, "reader function must return a string") |
| 69 | ERRDEF(PRTOSTR, LUA_QL("tostring") " must return a string to " LUA_QL("print")) | 69 | ERRDEF(PRTOSTR, LUA_QL("tostring") " must return a string to " LUA_QL("print")) |
| 70 | ERRDEF(NUMRNG, "number out of range") | ||
| 70 | ERRDEF(IDXRNG, "index out of range") | 71 | ERRDEF(IDXRNG, "index out of range") |
| 71 | ERRDEF(BASERNG, "base out of range") | 72 | ERRDEF(BASERNG, "base out of range") |
| 72 | ERRDEF(LVLRNG, "level out of range") | 73 | ERRDEF(LVLRNG, "level out of range") |
diff --git a/src/lj_gc.c b/src/lj_gc.c index cfbce037..1f382ea0 100644 --- a/src/lj_gc.c +++ b/src/lj_gc.c | |||
| @@ -65,6 +65,12 @@ static void gc_mark(global_State *g, GCobj *o) | |||
| 65 | gray2black(o); /* Userdata are never gray. */ | 65 | gray2black(o); /* Userdata are never gray. */ |
| 66 | if (mt) gc_markobj(g, mt); | 66 | if (mt) gc_markobj(g, mt); |
| 67 | gc_markobj(g, tabref(gco2ud(o)->env)); | 67 | gc_markobj(g, tabref(gco2ud(o)->env)); |
| 68 | if (LJ_HASBUFFER && gco2ud(o)->udtype == UDTYPE_BUFFER) { | ||
| 69 | SBufExt *sbx = (SBufExt *)uddata(gco2ud(o)); | ||
| 70 | if (sbufiscow(sbx) && gcref(sbx->cowref) != NULL) { | ||
| 71 | gc_markobj(g, gcref(sbx->cowref)); | ||
| 72 | } | ||
| 73 | } | ||
| 68 | } else if (LJ_UNLIKELY(gct == ~LJ_TUPVAL)) { | 74 | } else if (LJ_UNLIKELY(gct == ~LJ_TUPVAL)) { |
| 69 | GCupval *uv = gco2uv(o); | 75 | GCupval *uv = gco2uv(o); |
| 70 | gc_marktv(g, uvval(uv)); | 76 | gc_marktv(g, uvval(uv)); |
diff --git a/src/lj_ir.h b/src/lj_ir.h index aacef2b4..f953ff0e 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h | |||
| @@ -204,6 +204,7 @@ IRFPMDEF(FPMENUM) | |||
| 204 | _(UDATA_META, offsetof(GCudata, metatable)) \ | 204 | _(UDATA_META, offsetof(GCudata, metatable)) \ |
| 205 | _(UDATA_UDTYPE, offsetof(GCudata, udtype)) \ | 205 | _(UDATA_UDTYPE, offsetof(GCudata, udtype)) \ |
| 206 | _(UDATA_FILE, sizeof(GCudata)) \ | 206 | _(UDATA_FILE, sizeof(GCudata)) \ |
| 207 | _(UDATA_BUF_R, sizeof(GCudata) + offsetof(SBufExt, r)) \ | ||
| 207 | _(CDATA_CTYPEID, offsetof(GCcdata, ctypeid)) \ | 208 | _(CDATA_CTYPEID, offsetof(GCcdata, ctypeid)) \ |
| 208 | _(CDATA_PTR, sizeof(GCcdata)) \ | 209 | _(CDATA_PTR, sizeof(GCcdata)) \ |
| 209 | _(CDATA_INT, sizeof(GCcdata)) \ | 210 | _(CDATA_INT, sizeof(GCcdata)) \ |
diff --git a/src/lj_lib.c b/src/lj_lib.c index a962ddc1..21e6a61d 100644 --- a/src/lj_lib.c +++ b/src/lj_lib.c | |||
| @@ -16,6 +16,9 @@ | |||
| 16 | #include "lj_func.h" | 16 | #include "lj_func.h" |
| 17 | #include "lj_bc.h" | 17 | #include "lj_bc.h" |
| 18 | #include "lj_dispatch.h" | 18 | #include "lj_dispatch.h" |
| 19 | #if LJ_HASFFI | ||
| 20 | #include "lj_ctype.h" | ||
| 21 | #endif | ||
| 19 | #include "lj_vm.h" | 22 | #include "lj_vm.h" |
| 20 | #include "lj_strscan.h" | 23 | #include "lj_strscan.h" |
| 21 | #include "lj_strfmt.h" | 24 | #include "lj_strfmt.h" |
| @@ -301,3 +304,54 @@ int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst) | |||
| 301 | return def; | 304 | return def; |
| 302 | } | 305 | } |
| 303 | 306 | ||
| 307 | /* -- Strict type checks -------------------------------------------------- */ | ||
| 308 | |||
| 309 | /* The following type checks do not coerce between strings and numbers. | ||
| 310 | ** And they handle plain int64_t/uint64_t FFI numbers, too. | ||
| 311 | */ | ||
| 312 | |||
| 313 | #if LJ_HASBUFFER | ||
| 314 | GCstr *lj_lib_checkstrx(lua_State *L, int narg) | ||
| 315 | { | ||
| 316 | TValue *o = L->base + narg-1; | ||
| 317 | if (!(o < L->top && tvisstr(o))) lj_err_argt(L, narg, LUA_TSTRING); | ||
| 318 | return strV(o); | ||
| 319 | } | ||
| 320 | |||
| 321 | int32_t lj_lib_checkintrange(lua_State *L, int narg, int32_t a, int32_t b) | ||
| 322 | { | ||
| 323 | TValue *o = L->base + narg-1; | ||
| 324 | lj_assertL(b >= 0, "expected range must be non-negative"); | ||
| 325 | if (o < L->top) { | ||
| 326 | if (LJ_LIKELY(tvisint(o))) { | ||
| 327 | int32_t i = intV(o); | ||
| 328 | if (i >= a && i <= b) return i; | ||
| 329 | } else if (LJ_LIKELY(tvisnum(o))) { | ||
| 330 | /* For performance reasons, this doesn't check for integerness or | ||
| 331 | ** integer overflow. Overflow detection still works, since all FPUs | ||
| 332 | ** return either MININT or MAXINT, which is then out of range. | ||
| 333 | */ | ||
| 334 | int32_t i = (int32_t)numV(o); | ||
| 335 | if (i >= a && i <= b) return i; | ||
| 336 | #if LJ_HASFFI | ||
| 337 | } else if (tviscdata(o)) { | ||
| 338 | GCcdata *cd = cdataV(o); | ||
| 339 | if (cd->ctypeid == CTID_INT64) { | ||
| 340 | int64_t i = *(int64_t *)cdataptr(cd); | ||
| 341 | if (i >= (int64_t)a && i <= (int64_t)b) return (int32_t)i; | ||
| 342 | } else if (cd->ctypeid == CTID_UINT64) { | ||
| 343 | uint64_t i = *(uint64_t *)cdataptr(cd); | ||
| 344 | if ((a < 0 || i >= (uint64_t)a) && i <= (uint64_t)b) return (int32_t)i; | ||
| 345 | } | ||
| 346 | #endif | ||
| 347 | } else { | ||
| 348 | goto badtype; | ||
| 349 | } | ||
| 350 | lj_err_arg(L, narg, LJ_ERR_NUMRNG); | ||
| 351 | } | ||
| 352 | badtype: | ||
| 353 | lj_err_argt(L, narg, LUA_TNUMBER); | ||
| 354 | return 0; /* unreachable */ | ||
| 355 | } | ||
| 356 | #endif | ||
| 357 | |||
diff --git a/src/lj_lib.h b/src/lj_lib.h index 718d8eb4..f59e9ea2 100644 --- a/src/lj_lib.h +++ b/src/lj_lib.h | |||
| @@ -46,6 +46,12 @@ LJ_FUNC GCtab *lj_lib_checktab(lua_State *L, int narg); | |||
| 46 | LJ_FUNC GCtab *lj_lib_checktabornil(lua_State *L, int narg); | 46 | LJ_FUNC GCtab *lj_lib_checktabornil(lua_State *L, int narg); |
| 47 | LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst); | 47 | LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst); |
| 48 | 48 | ||
| 49 | #if LJ_HASBUFFER | ||
| 50 | LJ_FUNC GCstr *lj_lib_checkstrx(lua_State *L, int narg); | ||
| 51 | LJ_FUNC int32_t lj_lib_checkintrange(lua_State *L, int narg, | ||
| 52 | int32_t a, int32_t b); | ||
| 53 | #endif | ||
| 54 | |||
| 49 | /* Avoid including lj_frame.h. */ | 55 | /* Avoid including lj_frame.h. */ |
| 50 | #if LJ_GC64 | 56 | #if LJ_GC64 |
| 51 | #define lj_lib_upvalue(L, n) \ | 57 | #define lj_lib_upvalue(L, n) \ |
diff --git a/src/lj_meta.c b/src/lj_meta.c index 07defa55..660dfec0 100644 --- a/src/lj_meta.c +++ b/src/lj_meta.c | |||
| @@ -240,8 +240,8 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left) | |||
| 240 | int fromc = 0; | 240 | int fromc = 0; |
| 241 | if (left < 0) { left = -left; fromc = 1; } | 241 | if (left < 0) { left = -left; fromc = 1; } |
| 242 | do { | 242 | do { |
| 243 | if (!(tvisstr(top) || tvisnumber(top)) || | 243 | if (!(tvisstr(top) || tvisnumber(top) || tvisbuf(top)) || |
| 244 | !(tvisstr(top-1) || tvisnumber(top-1))) { | 244 | !(tvisstr(top-1) || tvisnumber(top-1) || tvisbuf(top-1))) { |
| 245 | cTValue *mo = lj_meta_lookup(L, top-1, MM_concat); | 245 | cTValue *mo = lj_meta_lookup(L, top-1, MM_concat); |
| 246 | if (tvisnil(mo)) { | 246 | if (tvisnil(mo)) { |
| 247 | mo = lj_meta_lookup(L, top, MM_concat); | 247 | mo = lj_meta_lookup(L, top, MM_concat); |
| @@ -277,10 +277,12 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left) | |||
| 277 | ** next step: [...][CAT stack ............] | 277 | ** next step: [...][CAT stack ............] |
| 278 | */ | 278 | */ |
| 279 | TValue *e, *o = top; | 279 | TValue *e, *o = top; |
| 280 | uint64_t tlen = tvisstr(o) ? strV(o)->len : STRFMT_MAXBUF_NUM; | 280 | uint64_t tlen = tvisstr(o) ? strV(o)->len : |
| 281 | tvisbuf(o) ? sbufxlen(bufV(o)) : STRFMT_MAXBUF_NUM; | ||
| 281 | SBuf *sb; | 282 | SBuf *sb; |
| 282 | do { | 283 | do { |
| 283 | o--; tlen += tvisstr(o) ? strV(o)->len : STRFMT_MAXBUF_NUM; | 284 | o--; tlen += tvisstr(o) ? strV(o)->len : |
| 285 | tvisbuf(o) ? sbufxlen(bufV(o)) : STRFMT_MAXBUF_NUM; | ||
| 284 | } while (--left > 0 && (tvisstr(o-1) || tvisnumber(o-1))); | 286 | } while (--left > 0 && (tvisstr(o-1) || tvisnumber(o-1))); |
| 285 | if (tlen >= LJ_MAX_STR) lj_err_msg(L, LJ_ERR_STROV); | 287 | if (tlen >= LJ_MAX_STR) lj_err_msg(L, LJ_ERR_STROV); |
| 286 | sb = lj_buf_tmp_(L); | 288 | sb = lj_buf_tmp_(L); |
| @@ -290,6 +292,9 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left) | |||
| 290 | GCstr *s = strV(o); | 292 | GCstr *s = strV(o); |
| 291 | MSize len = s->len; | 293 | MSize len = s->len; |
| 292 | lj_buf_putmem(sb, strdata(s), len); | 294 | lj_buf_putmem(sb, strdata(s), len); |
| 295 | } else if (tvisbuf(o)) { | ||
| 296 | SBufExt *sbx = bufV(o); | ||
| 297 | lj_buf_putmem(sb, sbx->r, sbufxlen(sbx)); | ||
| 293 | } else if (tvisint(o)) { | 298 | } else if (tvisint(o)) { |
| 294 | lj_strfmt_putint(sb, intV(o)); | 299 | lj_strfmt_putint(sb, intV(o)); |
| 295 | } else { | 300 | } else { |
diff --git a/src/lj_obj.h b/src/lj_obj.h index 9b691e49..0dae5fec 100644 --- a/src/lj_obj.h +++ b/src/lj_obj.h | |||
| @@ -332,6 +332,7 @@ enum { | |||
| 332 | UDTYPE_USERDATA, /* Regular userdata. */ | 332 | UDTYPE_USERDATA, /* Regular userdata. */ |
| 333 | UDTYPE_IO_FILE, /* I/O library FILE. */ | 333 | UDTYPE_IO_FILE, /* I/O library FILE. */ |
| 334 | UDTYPE_FFI_CLIB, /* FFI C library namespace. */ | 334 | UDTYPE_FFI_CLIB, /* FFI C library namespace. */ |
| 335 | UDTYPE_BUFFER, /* String buffer. */ | ||
| 335 | UDTYPE__MAX | 336 | UDTYPE__MAX |
| 336 | }; | 337 | }; |
| 337 | 338 | ||
diff --git a/src/lj_serialize.c b/src/lj_serialize.c index 4e76502a..49a25a7c 100644 --- a/src/lj_serialize.c +++ b/src/lj_serialize.c | |||
| @@ -346,10 +346,7 @@ SBufExt * LJ_FASTCALL lj_serialize_put(SBufExt *sbx, cTValue *o) | |||
| 346 | 346 | ||
| 347 | SBufExt * LJ_FASTCALL lj_serialize_get(SBufExt *sbx, TValue *o) | 347 | SBufExt * LJ_FASTCALL lj_serialize_get(SBufExt *sbx, TValue *o) |
| 348 | { | 348 | { |
| 349 | char *r = serialize_get(sbx->r, sbx, o); | 349 | sbx->r = serialize_get(sbx->r, sbx, o); |
| 350 | if (r != sbx->w) | ||
| 351 | lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_LEFTOV); | ||
| 352 | sbx->r = r; | ||
| 353 | return sbx; | 350 | return sbx; |
| 354 | } | 351 | } |
| 355 | 352 | ||
diff --git a/src/lj_strfmt.c b/src/lj_strfmt.c index a9541d41..5826b539 100644 --- a/src/lj_strfmt.c +++ b/src/lj_strfmt.c | |||
| @@ -164,6 +164,10 @@ const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp) | |||
| 164 | if (tvisstr(o)) { | 164 | if (tvisstr(o)) { |
| 165 | *lenp = strV(o)->len; | 165 | *lenp = strV(o)->len; |
| 166 | return strVdata(o); | 166 | return strVdata(o); |
| 167 | } else if (tvisbuf(o)) { | ||
| 168 | SBufExt *sbx = bufV(o); | ||
| 169 | *lenp = sbufxlen(sbx); | ||
| 170 | return sbx->r; | ||
| 167 | } else if (tvisint(o)) { | 171 | } else if (tvisint(o)) { |
| 168 | sb = lj_strfmt_putint(lj_buf_tmp_(L), intV(o)); | 172 | sb = lj_strfmt_putint(lj_buf_tmp_(L), intV(o)); |
| 169 | } else if (tvisnum(o)) { | 173 | } else if (tvisnum(o)) { |
| @@ -421,6 +425,10 @@ int lj_strfmt_putarg(lua_State *L, SBuf *sb, int arg, int retry) | |||
| 421 | if (LJ_LIKELY(tvisstr(o))) { | 425 | if (LJ_LIKELY(tvisstr(o))) { |
| 422 | len = strV(o)->len; | 426 | len = strV(o)->len; |
| 423 | s = strVdata(o); | 427 | s = strVdata(o); |
| 428 | } else if (tvisbuf(o)) { | ||
| 429 | SBufExt *sbx = bufV(o); | ||
| 430 | len = sbufxlen(sbx); | ||
| 431 | s = sbx->r; | ||
| 424 | } else { | 432 | } else { |
| 425 | GCstr *str = lj_strfmt_obj(L, o); | 433 | GCstr *str = lj_strfmt_obj(L, o); |
| 426 | len = str->len; | 434 | len = str->len; |
