diff options
44 files changed, 1213 insertions, 761 deletions
diff --git a/doc/api.html b/doc/api.html index 3bb10967..874f7ae9 100644 --- a/doc/api.html +++ b/doc/api.html | |||
@@ -319,7 +319,7 @@ enable it <b>after</b> running <tt>luaL_openlibs</tt>. | |||
319 | </p> | 319 | </p> |
320 | <p> | 320 | <p> |
321 | LuaJIT already intercepts exception handling for systems using | 321 | LuaJIT already intercepts exception handling for systems using |
322 | ELF/DWARF2 stack unwinding (e.g. Linux). This is a zero-cost mechanism | 322 | DWARF2 stack unwinding (e.g. Linux, OSX). This is a zero-cost mechanism |
323 | and always enabled. You don't need to use any wrapper functions, | 323 | and always enabled. You don't need to use any wrapper functions, |
324 | except when you want to get a more specific error message than | 324 | except when you want to get a more specific error message than |
325 | <tt>"C++ exception"</tt>. | 325 | <tt>"C++ exception"</tt>. |
diff --git a/doc/changes.html b/doc/changes.html index 641f1e28..d9a3aadd 100644 --- a/doc/changes.html +++ b/doc/changes.html | |||
@@ -48,10 +48,27 @@ The current <span style="color: #0000c0;">stable version</span> is <strong>LuaJI | |||
48 | </p> | 48 | </p> |
49 | <p> | 49 | <p> |
50 | Please check the | 50 | Please check the |
51 | <a href="http://luajit.org/luajit_changes.html"><span class="ext">»</span> Online Change History</a> | 51 | <a href="http://luajit.org/changes.html"><span class="ext">»</span> Online Change History</a> |
52 | to see whether newer versions are available. | 52 | to see whether newer versions are available. |
53 | </p> | 53 | </p> |
54 | 54 | ||
55 | <div class="major" style="background: #d0d0d0;"> | ||
56 | <h2 id="snap">Development Snapshot</h2> | ||
57 | <ul> | ||
58 | <li>Add abstract C call handling to IR.</li> | ||
59 | <li>Improve KNUM fuse vs. load heuristics.</li> | ||
60 | <li>Drive the GC forward on string allocations in the parser.</li> | ||
61 | <li>Compile various <tt>io.*()</tt> functions.</li> | ||
62 | <li>Compile <tt>math.sinh()</tt>, <tt>math.cosh()</tt>, <tt>math.tanh()</tt> | ||
63 | and <tt>math.random()</tt>.</li> | ||
64 | <li>Fix <tt>lua_tocfunction()</tt>.</li> | ||
65 | <li>Fix cutoff register in JMP bytecode for some conditional expressions.</li> | ||
66 | <li>Fix PHI marking algorithm for references from variant slots.</li> | ||
67 | <li>Fix <tt>package.cpath</tt> for non-default PREFIX.</li> | ||
68 | <li>Fix DWARF2 frame unwind information for interpreter on OSX.</li> | ||
69 | </ul> | ||
70 | </div> | ||
71 | |||
55 | <div class="major" style="background: #ffd0d0;"> | 72 | <div class="major" style="background: #ffd0d0;"> |
56 | <h2 id="LuaJIT-2.0.0-beta2">LuaJIT 2.0.0-beta2 — 2009-11-09</h2> | 73 | <h2 id="LuaJIT-2.0.0-beta2">LuaJIT 2.0.0-beta2 — 2009-11-09</h2> |
57 | <ul> | 74 | <ul> |
@@ -59,14 +76,14 @@ to see whether newer versions are available. | |||
59 | <li>Allow C++ exception conversion on all platforms | 76 | <li>Allow C++ exception conversion on all platforms |
60 | using a wrapper function.</li> | 77 | using a wrapper function.</li> |
61 | <li>Automatically catch C++ exceptions and rethrow Lua error | 78 | <li>Automatically catch C++ exceptions and rethrow Lua error |
62 | (ELF/DWARF2 only).</li> | 79 | (DWARF2 only).</li> |
63 | <li>Check for the correct x87 FPU precision at strategic points.</li> | 80 | <li>Check for the correct x87 FPU precision at strategic points.</li> |
64 | <li>Always use wrappers for libm functions.</li> | 81 | <li>Always use wrappers for libm functions.</li> |
65 | <li>Resurrect metamethod name strings before copying them.</li> | 82 | <li>Resurrect metamethod name strings before copying them.</li> |
66 | <li>Mark current trace, even if compiler is idle.</li> | 83 | <li>Mark current trace, even if compiler is idle.</li> |
67 | <li>Ensure FILE metatable is created only once.</li> | 84 | <li>Ensure FILE metatable is created only once.</li> |
68 | <li>Fix type comparisons when different integer types are involved.</li> | 85 | <li>Fix type comparisons when different integer types are involved.</li> |
69 | <li>Fix getmetatable() recording.</li> | 86 | <li>Fix <tt>getmetatable()</tt> recording.</li> |
70 | <li>Fix TDUP with dead keys in template table.</li> | 87 | <li>Fix TDUP with dead keys in template table.</li> |
71 | <li><tt>jit.flush(tr)</tt> returns status. | 88 | <li><tt>jit.flush(tr)</tt> returns status. |
72 | Prevent manual flush of a trace that's still linked.</li> | 89 | Prevent manual flush of a trace that's still linked.</li> |
@@ -234,7 +251,7 @@ on a separate line.</li> | |||
234 | 251 | ||
235 | <li>Added SSE, SSE2, SSE3 and SSSE3 support to DynASM.</li> | 252 | <li>Added SSE, SSE2, SSE3 and SSSE3 support to DynASM.</li> |
236 | <li>Miscellaneous doc changes. Added a section about | 253 | <li>Miscellaneous doc changes. Added a section about |
237 | <a href="luajit_install.html#embedding">embedding LuaJIT</a>.</li> | 254 | <a href="install.html#embedding">embedding LuaJIT</a>.</li> |
238 | </ul> | 255 | </ul> |
239 | <p> | 256 | <p> |
240 | This release is in sync with Coco 1.1.0 (see the | 257 | This release is in sync with Coco 1.1.0 (see the |
diff --git a/doc/contact.html b/doc/contact.html index 36d5a825..66d52410 100644 --- a/doc/contact.html +++ b/doc/contact.html | |||
@@ -46,17 +46,15 @@ You can also send any questions you have directly to me: | |||
46 | 46 | ||
47 | <script type="text/javascript"> | 47 | <script type="text/javascript"> |
48 | <!-- | 48 | <!-- |
49 | var xS="@-: .0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZa<b>cdefghijklmnopqrstuvwxyz" | 49 | var xS="@-:\" .0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ<abc>defghijklmnopqrstuvwxyz";function xD(s) |
50 | function xD(s) | ||
51 | {var len=s.length;var r="";for(var i=0;i<len;i++) | 50 | {var len=s.length;var r="";for(var i=0;i<len;i++) |
52 | {var c=s.charAt(i);var n=xS.indexOf(c);if(n!=-1) | 51 | {var c=s.charAt(i);var n=xS.indexOf(c);if(n!=-1)c=xS.charAt(69-n);r+=c;} |
53 | c=xS.charAt(66-n);r+=c;} | ||
54 | document.write("<"+"p>"+r+"<"+"/p>\n");} | 52 | document.write("<"+"p>"+r+"<"+"/p>\n");} |
55 | //--> | 53 | //--> |
56 | </script> | 54 | </script> |
57 | <script type="text/javascript"> | 55 | <script type="text/javascript"> |
58 | <!-- | 56 | <!-- |
59 | xD("ewYKA7vu-EIwslx7 K9A.t41C") | 57 | xD("fyZKB8xv\"FJytmz8.KAB0u52D") |
60 | //--></script> | 58 | //--></script> |
61 | <noscript> | 59 | <noscript> |
62 | <p><img src="img/contact.png" alt="Contact info in image" width="170" height="13"> | 60 | <p><img src="img/contact.png" alt="Contact info in image" width="170" height="13"> |
diff --git a/doc/luajit.html b/doc/luajit.html index 9b16ea37..5a2b3689 100644 --- a/doc/luajit.html +++ b/doc/luajit.html | |||
@@ -8,6 +8,7 @@ | |||
8 | <meta name="Language" content="en"> | 8 | <meta name="Language" content="en"> |
9 | <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> | 9 | <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> |
10 | <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> | 10 | <link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> |
11 | <meta name="description" content="LuaJIT is a Just-In-Time (JIT) compiler for the Lua language."> | ||
11 | </head> | 12 | </head> |
12 | <body> | 13 | <body> |
13 | <div id="site"> | 14 | <div id="site"> |
@@ -30,7 +30,7 @@ | |||
30 | -- print(bc.line(foo, 2)) --> 0002 KSTR 1 1 ; "hello" | 30 | -- print(bc.line(foo, 2)) --> 0002 KSTR 1 1 ; "hello" |
31 | -- | 31 | -- |
32 | -- local out = { | 32 | -- local out = { |
33 | -- -- Do something wich each line: | 33 | -- -- Do something with each line: |
34 | -- write = function(t, ...) io.write(...) end, | 34 | -- write = function(t, ...) io.write(...) end, |
35 | -- close = function(t) end, | 35 | -- close = function(t) end, |
36 | -- flush = function(t) end, | 36 | -- flush = function(t) end, |
diff --git a/lib/dump.lua b/lib/dump.lua index 9fde87c1..021fc1c9 100644 --- a/lib/dump.lua +++ b/lib/dump.lua | |||
@@ -144,7 +144,7 @@ local colortype_ansi = { | |||
144 | [0] = "%s", | 144 | [0] = "%s", |
145 | "%s", | 145 | "%s", |
146 | "%s", | 146 | "%s", |
147 | "%s", | 147 | "\027[36m%s\027[m", |
148 | "\027[32m%s\027[m", | 148 | "\027[32m%s\027[m", |
149 | "%s", | 149 | "%s", |
150 | "\027[1m%s\027[m", | 150 | "\027[1m%s\027[m", |
@@ -199,9 +199,9 @@ margin-right: 2em; | |||
199 | span.irt_str { color: #00a000; } | 199 | span.irt_str { color: #00a000; } |
200 | span.irt_thr, span.irt_fun { color: #404040; font-weight: bold; } | 200 | span.irt_thr, span.irt_fun { color: #404040; font-weight: bold; } |
201 | span.irt_tab { color: #c00000; } | 201 | span.irt_tab { color: #c00000; } |
202 | span.irt_udt { color: #00c0c0; } | 202 | span.irt_udt, span.irt_lud { color: #00c0c0; } |
203 | span.irt_num { color: #0000c0; } | 203 | span.irt_num { color: #4040c0; } |
204 | span.irt_int { color: #c000c0; } | 204 | span.irt_int, span.irt_i8, span.irt_u8, span.irt_i16, span.irt_u16 { color: #b040b0; } |
205 | </style> | 205 | </style> |
206 | ]] | 206 | ]] |
207 | 207 | ||
@@ -210,7 +210,7 @@ local colorize, irtype | |||
210 | -- Lookup table to convert some literals into names. | 210 | -- Lookup table to convert some literals into names. |
211 | local litname = { | 211 | local litname = { |
212 | ["SLOAD "] = { [0] = "", "I", "R", "RI", "P", "PI", "PR", "PRI", }, | 212 | ["SLOAD "] = { [0] = "", "I", "R", "RI", "P", "PI", "PR", "PRI", }, |
213 | ["XLOAD "] = { [0] = "", "unaligned", }, | 213 | ["XLOAD "] = { [0] = "", "R", "U", "RU", }, |
214 | ["TOINT "] = { [0] = "check", "index", "", }, | 214 | ["TOINT "] = { [0] = "check", "index", "", }, |
215 | ["FLOAD "] = vmdef.irfield, | 215 | ["FLOAD "] = vmdef.irfield, |
216 | ["FREF "] = vmdef.irfield, | 216 | ["FREF "] = vmdef.irfield, |
@@ -313,6 +313,27 @@ local function ridsp_name(ridsp) | |||
313 | return "" | 313 | return "" |
314 | end | 314 | end |
315 | 315 | ||
316 | -- Recursively gather CALL* args and dump them. | ||
317 | local function dumpcallargs(tr, ins) | ||
318 | if ins < 0 then | ||
319 | out:write(formatk(tr, ins)) | ||
320 | else | ||
321 | local m, ot, op1, op2 = traceir(tr, ins) | ||
322 | local oidx = 6*shr(ot, 8) | ||
323 | local op = sub(vmdef.irnames, oidx+1, oidx+6) | ||
324 | if op == "CARG " then | ||
325 | dumpcallargs(tr, op1) | ||
326 | if op2 < 0 then | ||
327 | out:write(" ", formatk(tr, op2)) | ||
328 | else | ||
329 | out:write(" ", format("%04d", op2)) | ||
330 | end | ||
331 | else | ||
332 | out:write(format("%04d", ins)) | ||
333 | end | ||
334 | end | ||
335 | end | ||
336 | |||
316 | -- Dump IR and interleaved snapshots. | 337 | -- Dump IR and interleaved snapshots. |
317 | local function dump_ir(tr, dumpsnap, dumpreg) | 338 | local function dump_ir(tr, dumpsnap, dumpreg) |
318 | local info = traceinfo(tr) | 339 | local info = traceinfo(tr) |
@@ -348,7 +369,8 @@ local function dump_ir(tr, dumpsnap, dumpreg) | |||
348 | else | 369 | else |
349 | out:write(format("%04d ------ LOOP ------------\n", ins)) | 370 | out:write(format("%04d ------ LOOP ------------\n", ins)) |
350 | end | 371 | end |
351 | elseif op ~= "NOP " and (dumpreg or op ~= "RENAME") then | 372 | elseif op ~= "NOP " and op ~= "CARG " and |
373 | (dumpreg or op ~= "RENAME") then | ||
352 | if dumpreg then | 374 | if dumpreg then |
353 | out:write(format("%04d %-5s ", ins, ridsp_name(ridsp))) | 375 | out:write(format("%04d %-5s ", ins, ridsp_name(ridsp))) |
354 | else | 376 | else |
@@ -359,7 +381,11 @@ local function dump_ir(tr, dumpsnap, dumpreg) | |||
359 | band(ot, 128) == 0 and " " or "+", | 381 | band(ot, 128) == 0 and " " or "+", |
360 | irtype[t], op)) | 382 | irtype[t], op)) |
361 | local m1 = band(m, 3) | 383 | local m1 = band(m, 3) |
362 | if m1 ~= 3 then -- op1 != IRMnone | 384 | if sub(op, 1, 4) == "CALL" then |
385 | out:write(format("%-10s (", vmdef.ircall[op2])) | ||
386 | if op1 ~= -1 then dumpcallargs(tr, op1) end | ||
387 | out:write(")") | ||
388 | elseif m1 ~= 3 then -- op1 != IRMnone | ||
363 | if op1 < 0 then | 389 | if op1 < 0 then |
364 | out:write(formatk(tr, op1)) | 390 | out:write(formatk(tr, op1)) |
365 | else | 391 | else |
diff --git a/src/Makefile.dep b/src/Makefile.dep index 1fb81e27..779ee545 100644 --- a/src/Makefile.dep +++ b/src/Makefile.dep | |||
@@ -21,8 +21,9 @@ lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ | |||
21 | lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_libdef.h | 21 | lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_libdef.h |
22 | lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h | 22 | lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h |
23 | lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ | 23 | lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ |
24 | lj_arch.h lj_err.h lj_errmsg.h lj_gc.h lj_ff.h lj_ffdef.h lj_lib.h \ | 24 | lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_ff.h lj_ffdef.h \ |
25 | lj_libdef.h | 25 | lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h lj_traceerr.h \ |
26 | lj_lib.h lj_libdef.h | ||
26 | lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h \ | 27 | lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h \ |
27 | lj_obj.h lj_def.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ir.h \ | 28 | lj_obj.h lj_def.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ir.h \ |
28 | lj_jit.h lj_iropt.h lj_dispatch.h lj_bc.h lj_vm.h lj_vmevent.h lj_lib.h \ | 29 | lj_jit.h lj_iropt.h lj_dispatch.h lj_bc.h lj_vm.h lj_vmevent.h lj_lib.h \ |
@@ -45,9 +46,9 @@ lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | |||
45 | lj_state.h lj_frame.h lj_bc.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \ | 46 | lj_state.h lj_frame.h lj_bc.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \ |
46 | lj_traceerr.h lj_vm.h lj_lex.h lj_parse.h | 47 | lj_traceerr.h lj_vm.h lj_lex.h lj_parse.h |
47 | lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | 48 | lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ |
48 | lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_mcode.h lj_trace.h \ | 49 | lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \ |
49 | lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_asm.h lj_vm.h \ | 50 | lj_mcode.h lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_asm.h \ |
50 | lj_target.h lj_target_x86.h | 51 | lj_vm.h lj_target.h lj_target_x86.h |
51 | lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h | 52 | lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h |
52 | lj_ctype.o: lj_ctype.c lj_ctype.h lj_def.h lua.h luaconf.h | 53 | lj_ctype.o: lj_ctype.c lj_ctype.h lj_def.h lua.h luaconf.h |
53 | lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 54 | lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
@@ -67,8 +68,8 @@ lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | |||
67 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_frame.h lj_bc.h lj_jit.h \ | 68 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_frame.h lj_bc.h lj_jit.h \ |
68 | lj_ir.h lj_dispatch.h | 69 | lj_ir.h lj_dispatch.h |
69 | lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | 70 | lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ |
70 | lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h \ | 71 | lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \ |
71 | lj_traceerr.h | 72 | lj_bc.h lj_traceerr.h lj_lib.h |
72 | lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | 73 | lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ |
73 | lj_err.h lj_errmsg.h lj_str.h lj_lex.h lj_parse.h lj_ctype.h | 74 | lj_err.h lj_errmsg.h lj_str.h lj_lex.h lj_parse.h lj_ctype.h |
74 | lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \ | 75 | lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \ |
diff --git a/src/buildvm.c b/src/buildvm.c index b3738db4..4aba39d4 100644 --- a/src/buildvm.c +++ b/src/buildvm.c | |||
@@ -215,12 +215,19 @@ IRFPMDEF(FPMNAME) | |||
215 | }; | 215 | }; |
216 | 216 | ||
217 | const char *const irfield_names[] = { | 217 | const char *const irfield_names[] = { |
218 | #define FLNAME(name, type, field) #name, | 218 | #define FLNAME(name, ofs) #name, |
219 | IRFLDEF(FLNAME) | 219 | IRFLDEF(FLNAME) |
220 | #undef FLNAME | 220 | #undef FLNAME |
221 | NULL | 221 | NULL |
222 | }; | 222 | }; |
223 | 223 | ||
224 | const char *const ircall_names[] = { | ||
225 | #define IRCALLNAME(name, nargs, kind, type, flags) #name, | ||
226 | IRCALLDEF(IRCALLNAME) | ||
227 | #undef IRCALLNAME | ||
228 | NULL | ||
229 | }; | ||
230 | |||
224 | static const char *const trace_errors[] = { | 231 | static const char *const trace_errors[] = { |
225 | #define TREDEF(name, msg) msg, | 232 | #define TREDEF(name, msg) msg, |
226 | #include "lj_traceerr.h" | 233 | #include "lj_traceerr.h" |
@@ -269,6 +276,11 @@ static void emit_vmdef(BuildCtx *ctx) | |||
269 | } | 276 | } |
270 | fprintf(ctx->fp, "}\n\n"); | 277 | fprintf(ctx->fp, "}\n\n"); |
271 | 278 | ||
279 | fprintf(ctx->fp, "ircall = {\n[0]="); | ||
280 | for (i = 0; ircall_names[i]; i++) | ||
281 | fprintf(ctx->fp, "\"%s\",\n", ircall_names[i]); | ||
282 | fprintf(ctx->fp, "}\n\n"); | ||
283 | |||
272 | fprintf(ctx->fp, "traceerr = {\n[0]="); | 284 | fprintf(ctx->fp, "traceerr = {\n[0]="); |
273 | for (i = 0; trace_errors[i]; i++) | 285 | for (i = 0; trace_errors[i]; i++) |
274 | fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]); | 286 | fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]); |
diff --git a/src/buildvm.h b/src/buildvm.h index e55527fd..53c820ad 100644 --- a/src/buildvm.h +++ b/src/buildvm.h | |||
@@ -102,5 +102,6 @@ extern const char *const bc_names[]; | |||
102 | extern const char *const ir_names[]; | 102 | extern const char *const ir_names[]; |
103 | extern const char *const irfpm_names[]; | 103 | extern const char *const irfpm_names[]; |
104 | extern const char *const irfield_names[]; | 104 | extern const char *const irfield_names[]; |
105 | extern const char *const ircall_names[]; | ||
105 | 106 | ||
106 | #endif | 107 | #endif |
diff --git a/src/buildvm_asm.c b/src/buildvm_asm.c index 5daab13b..31b6f61e 100644 --- a/src/buildvm_asm.c +++ b/src/buildvm_asm.c | |||
@@ -26,6 +26,14 @@ static void emit_asm_bytes(BuildCtx *ctx, uint8_t *p, int n) | |||
26 | static void emit_asm_reloc(BuildCtx *ctx, BuildReloc *r) | 26 | static void emit_asm_reloc(BuildCtx *ctx, BuildReloc *r) |
27 | { | 27 | { |
28 | const char *sym = ctx->extnames[r->sym]; | 28 | const char *sym = ctx->extnames[r->sym]; |
29 | const char *p = strchr(sym, '@'); | ||
30 | char buf[80]; | ||
31 | if (p) { | ||
32 | /* Always strip fastcall suffix. Wrong for (unused) COFF on Win32. */ | ||
33 | strncpy(buf, sym, p-sym); | ||
34 | buf[p-sym] = '\0'; | ||
35 | sym = buf; | ||
36 | } | ||
29 | switch (ctx->mode) { | 37 | switch (ctx->mode) { |
30 | case BUILD_elfasm: | 38 | case BUILD_elfasm: |
31 | if (r->type) | 39 | if (r->type) |
diff --git a/src/buildvm_fold.c b/src/buildvm_fold.c index 271118e0..77af3dc5 100644 --- a/src/buildvm_fold.c +++ b/src/buildvm_fold.c | |||
@@ -107,6 +107,10 @@ static uint32_t nexttoken(char **pp, int allowlit, int allowany) | |||
107 | for (i = 0; irfield_names[i]; i++) | 107 | for (i = 0; irfield_names[i]; i++) |
108 | if (!strcmp(irfield_names[i], p+5)) | 108 | if (!strcmp(irfield_names[i], p+5)) |
109 | return i; | 109 | return i; |
110 | } else if (allowlit && !strncmp(p, "IRCALL_", 7)) { | ||
111 | for (i = 0; ircall_names[i]; i++) | ||
112 | if (!strcmp(ircall_names[i], p+7)) | ||
113 | return i; | ||
110 | } else if (allowany && !strcmp("any", p)) { | 114 | } else if (allowany && !strcmp("any", p)) { |
111 | return 0xff; | 115 | return 0xff; |
112 | } else { | 116 | } else { |
diff --git a/src/buildvm_peobj.c b/src/buildvm_peobj.c index 1a8661bf..a24ae727 100644 --- a/src/buildvm_peobj.c +++ b/src/buildvm_peobj.c | |||
@@ -85,6 +85,7 @@ typedef struct PEsymaux { | |||
85 | #define PEOBJ_RELOC_REL32 0x14 /* MS: REL32, GNU: DISP32. */ | 85 | #define PEOBJ_RELOC_REL32 0x14 /* MS: REL32, GNU: DISP32. */ |
86 | #define PEOBJ_RELOC_DIR32 0x06 | 86 | #define PEOBJ_RELOC_DIR32 0x06 |
87 | #define PEOBJ_SYM_PREFIX "_" | 87 | #define PEOBJ_SYM_PREFIX "_" |
88 | #define PEOBJ_SYMF_PREFIX "@" | ||
88 | #elif LJ_TARGET_X64 | 89 | #elif LJ_TARGET_X64 |
89 | #define PEOBJ_ARCH_TARGET 0x8664 | 90 | #define PEOBJ_ARCH_TARGET 0x8664 |
90 | #define PEOBJ_RELOC_REL32 0x04 /* MS: REL32, GNU: DISP32. */ | 91 | #define PEOBJ_RELOC_REL32 0x04 /* MS: REL32, GNU: DISP32. */ |
@@ -260,7 +261,18 @@ void emit_peobj(BuildCtx *ctx) | |||
260 | 261 | ||
261 | emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_TEXT); | 262 | emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_TEXT); |
262 | for (i = 0; ctx->extnames[i]; i++) { | 263 | for (i = 0; ctx->extnames[i]; i++) { |
263 | sprintf(name, PEOBJ_SYM_PREFIX "%s", ctx->extnames[i]); | 264 | const char *sym = ctx->extnames[i]; |
265 | const char *p = strchr(sym, '@'); | ||
266 | if (p) { | ||
267 | #ifdef PEOBJ_SYMF_PREFIX | ||
268 | sprintf(name, PEOBJ_SYMF_PREFIX "%s", sym); | ||
269 | #else | ||
270 | strncpy(name, sym, p-sym); | ||
271 | name[p-sym] = '\0'; | ||
272 | #endif | ||
273 | } else { | ||
274 | sprintf(name, PEOBJ_SYM_PREFIX "%s", sym); | ||
275 | } | ||
264 | emit_peobj_sym(ctx, name, 0, | 276 | emit_peobj_sym(ctx, name, 0, |
265 | PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN); | 277 | PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN); |
266 | } | 278 | } |
diff --git a/src/buildvm_x86.dasc b/src/buildvm_x86.dasc index 09cfa6dc..e857a6be 100644 --- a/src/buildvm_x86.dasc +++ b/src/buildvm_x86.dasc | |||
@@ -30,6 +30,9 @@ | |||
30 | |.define RD, RC | 30 | |.define RD, RC |
31 | |.define RDL, RCL | 31 | |.define RDL, RCL |
32 | | | 32 | | |
33 | |.define FCARG1, ecx // Fastcall arguments. | ||
34 | |.define FCARG2, edx | ||
35 | | | ||
33 | |// Type definitions. Some of these are only used for documentation. | 36 | |// Type definitions. Some of these are only used for documentation. |
34 | |.type L, lua_State | 37 | |.type L, lua_State |
35 | |.type GL, global_State | 38 | |.type GL, global_State |
@@ -1066,7 +1069,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov) | |||
1066 | | mov RB, LJ_TNUMX | 1069 | | mov RB, LJ_TNUMX |
1067 | |7: | 1070 | |7: |
1068 | | not RB | 1071 | | not RB |
1069 | | mov TAB:RB, [DISPATCH+RB*4+DISPATCH_GL(basemt)] | 1072 | | mov TAB:RB, [DISPATCH+RB*4+DISPATCH_GL(gcroot[GCROOT_BASEMT])] |
1070 | | jmp <2 | 1073 | | jmp <2 |
1071 | | | 1074 | | |
1072 | |.ffunc_2 setmetatable | 1075 | |.ffunc_2 setmetatable |
@@ -1126,17 +1129,17 @@ static void build_subroutines(BuildCtx *ctx, int cmov) | |||
1126 | | jmp ->fff_res1 | 1129 | | jmp ->fff_res1 |
1127 | |3: // Handle numbers inline, unless a number base metatable is present. | 1130 | |3: // Handle numbers inline, unless a number base metatable is present. |
1128 | | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback | 1131 | | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback |
1129 | | cmp dword [DISPATCH+DISPATCH_GL(basemt)+4*(~LJ_TNUMX)], 0 | 1132 | | cmp dword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0 |
1130 | | jne ->fff_fallback | 1133 | | jne ->fff_fallback |
1131 | | ffgccheck // Caveat: uses label 1. | 1134 | | ffgccheck // Caveat: uses label 1. |
1132 | | mov L:RB, SAVE_L | 1135 | | mov L:RB, SAVE_L |
1133 | | mov ARG1, L:RB | ||
1134 | | mov ARG2, RA | ||
1135 | | mov L:RB->base, RA // Add frame since C call can throw. | 1136 | | mov L:RB->base, RA // Add frame since C call can throw. |
1136 | | mov [RA-4], PC | 1137 | | mov [RA-4], PC |
1137 | | mov SAVE_PC, PC // Redundant (but a defined value). | 1138 | | mov SAVE_PC, PC // Redundant (but a defined value). |
1138 | | mov ARG3, BASE // Save BASE. | 1139 | | mov ARG3, BASE // Save BASE. |
1139 | | call extern lj_str_fromnum // (lua_State *L, lua_Number *np) | 1140 | | mov FCARG2, RA // Caveat: FCARG2 == BASE |
1141 | | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA | ||
1142 | | call extern lj_str_fromnum@8 // (lua_State *L, lua_Number *np) | ||
1140 | | // GCstr returned in eax (RC). | 1143 | | // GCstr returned in eax (RC). |
1141 | | mov RA, L:RB->base | 1144 | | mov RA, L:RB->base |
1142 | | mov BASE, ARG3 | 1145 | | mov BASE, ARG3 |
@@ -1762,11 +1765,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov) | |||
1762 | | | 1765 | | |
1763 | |.ffunc_1 table_getn | 1766 | |.ffunc_1 table_getn |
1764 | | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback | 1767 | | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback |
1765 | | mov TAB:RB, [RA] | 1768 | | mov ARG2, BASE // Save RA and BASE. |
1766 | | mov ARG1, TAB:RB | 1769 | | mov RB, RA |
1767 | | mov RB, RA // Save RA and BASE. | 1770 | | mov TAB:FCARG1, [RA] // Caveat: FCARG1 == RA |
1768 | | mov ARG2, BASE | 1771 | | call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t) |
1769 | | call extern lj_tab_len // (GCtab *t) | ||
1770 | | // Length of table returned in eax (RC). | 1772 | | // Length of table returned in eax (RC). |
1771 | | mov ARG1, RC | 1773 | | mov ARG1, RC |
1772 | | mov RA, RB // Restore RA and BASE. | 1774 | | mov RA, RB // Restore RA and BASE. |
@@ -2512,10 +2514,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov) | |||
2512 | | ins_next | 2514 | | ins_next |
2513 | |2: | 2515 | |2: |
2514 | | checktab RD, ->vmeta_len | 2516 | | checktab RD, ->vmeta_len |
2515 | | mov TAB:RD, [BASE+RD*8] | 2517 | | mov TAB:FCARG1, [BASE+RD*8] |
2516 | | mov ARG1, TAB:RD | ||
2517 | | mov RB, BASE // Save BASE. | 2518 | | mov RB, BASE // Save BASE. |
2518 | | call extern lj_tab_len // (GCtab *t) | 2519 | | call extern lj_tab_len@4 // (GCtab *t) |
2519 | | // Length of table returned in eax (RC). | 2520 | | // Length of table returned in eax (RC). |
2520 | | mov ARG1, RC | 2521 | | mov ARG1, RC |
2521 | | mov BASE, RB // Restore BASE. | 2522 | | mov BASE, RB // Restore BASE. |
@@ -2665,66 +2666,63 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov) | |||
2665 | | ins_next | 2666 | | ins_next |
2666 | break; | 2667 | break; |
2667 | case BC_USETV: | 2668 | case BC_USETV: |
2669 | #define TV2MARKOFS \ | ||
2670 | ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)) | ||
2668 | | ins_AD // RA = upvalue #, RD = src | 2671 | | ins_AD // RA = upvalue #, RD = src |
2669 | | // Really ugly code due to the lack of a 4th free register. | ||
2670 | | mov LFUNC:RB, [BASE-8] | 2672 | | mov LFUNC:RB, [BASE-8] |
2671 | | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] | 2673 | | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] |
2672 | | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv) | 2674 | | cmp byte UPVAL:RB->closed, 0 |
2673 | | jnz >4 | ||
2674 | |1: | ||
2675 | | mov RA, [BASE+RD*8] | ||
2676 | |2: | ||
2677 | | mov RB, UPVAL:RB->v | 2675 | | mov RB, UPVAL:RB->v |
2676 | | mov RA, [BASE+RD*8] | ||
2678 | | mov RD, [BASE+RD*8+4] | 2677 | | mov RD, [BASE+RD*8+4] |
2679 | | mov [RB], RA | 2678 | | mov [RB], RA |
2680 | | mov [RB+4], RD | 2679 | | mov [RB+4], RD |
2681 | |3: | 2680 | | jz >1 |
2681 | | // Check barrier for closed upvalue. | ||
2682 | | test byte [RB+TV2MARKOFS], LJ_GC_BLACK // isblack(uv) | ||
2683 | | jnz >2 | ||
2684 | |1: | ||
2682 | | ins_next | 2685 | | ins_next |
2683 | | | 2686 | | |
2684 | |4: // Upvalue is black. Check if new value is collectable and white. | 2687 | |2: // Upvalue is black. Check if new value is collectable and white. |
2685 | | mov RA, [BASE+RD*8+4] | 2688 | | sub RD, LJ_TISGCV |
2686 | | sub RA, LJ_TISGCV | 2689 | | cmp RD, LJ_TISNUM - LJ_TISGCV // tvisgcv(v) |
2687 | | cmp RA, LJ_TISNUM - LJ_TISGCV // tvisgcv(v) | ||
2688 | | jbe <1 | 2690 | | jbe <1 |
2689 | | mov GCOBJ:RA, [BASE+RD*8] | ||
2690 | | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v) | 2691 | | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v) |
2691 | | jz <2 | 2692 | | jz <1 |
2692 | | // Crossed a write barrier. So move the barrier forward. | 2693 | | // Crossed a write barrier. Move the barrier forward. |
2693 | | mov ARG2, UPVAL:RB | 2694 | | xchg FCARG2, RB // Save BASE (FCARG2 == BASE). |
2694 | | mov ARG3, GCOBJ:RA | 2695 | | lea GL:FCARG1, [DISPATCH+GG_DISP2G] |
2695 | | mov RB, UPVAL:RB->v | 2696 | | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv) |
2696 | | mov RD, [BASE+RD*8+4] | 2697 | | mov BASE, RB // Restore BASE. |
2697 | | mov [RB], GCOBJ:RA | 2698 | | jmp <1 |
2698 | | mov [RB+4], RD | ||
2699 | |->BC_USETV_Z: | ||
2700 | | mov L:RB, SAVE_L | ||
2701 | | lea GL:RA, [DISPATCH+GG_DISP2G] | ||
2702 | | mov L:RB->base, BASE | ||
2703 | | mov ARG1, GL:RA | ||
2704 | | call extern lj_gc_barrieruv // (global_State *g, GCobj *o, GCobj *v) | ||
2705 | | mov BASE, L:RB->base | ||
2706 | | jmp <3 | ||
2707 | break; | 2699 | break; |
2700 | #undef TV2MARKOFS | ||
2708 | case BC_USETS: | 2701 | case BC_USETS: |
2709 | | ins_AND // RA = upvalue #, RD = str const (~) | 2702 | | ins_AND // RA = upvalue #, RD = str const (~) |
2710 | | mov LFUNC:RB, [BASE-8] | 2703 | | mov LFUNC:RB, [BASE-8] |
2711 | | mov GCOBJ:RD, [KBASE+RD*4] | ||
2712 | | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] | 2704 | | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] |
2713 | | mov RA, UPVAL:RB->v | 2705 | | mov GCOBJ:RA, [KBASE+RD*4] |
2714 | | mov dword [RA+4], LJ_TSTR | 2706 | | mov RD, UPVAL:RB->v |
2715 | | mov [RA], GCOBJ:RD | 2707 | | mov [RD], GCOBJ:RA |
2708 | | mov dword [RD+4], LJ_TSTR | ||
2716 | | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv) | 2709 | | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv) |
2717 | | jnz >2 | 2710 | | jnz >2 |
2718 | |1: | 2711 | |1: |
2719 | | ins_next | 2712 | | ins_next |
2720 | | | 2713 | | |
2721 | |2: // Upvalue is black. Check if string is white. | 2714 | |2: // Check if string is white and ensure upvalue is closed. |
2722 | | test byte GCOBJ:RD->gch.marked, LJ_GC_WHITES // iswhite(str) | 2715 | | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str) |
2723 | | jz <1 | 2716 | | jz <1 |
2724 | | // Crossed a write barrier. So move the barrier forward. | 2717 | | cmp byte UPVAL:RB->closed, 0 |
2725 | | mov ARG3, GCOBJ:RD | 2718 | | jz <1 |
2726 | | mov ARG2, UPVAL:RB | 2719 | | // Crossed a write barrier. Move the barrier forward. |
2727 | | jmp ->BC_USETV_Z | 2720 | | mov RB, BASE // Save BASE (FCARG2 == BASE). |
2721 | | mov FCARG2, RD | ||
2722 | | lea GL:FCARG1, [DISPATCH+GG_DISP2G] | ||
2723 | | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv) | ||
2724 | | mov BASE, RB // Restore BASE. | ||
2725 | | jmp <1 | ||
2728 | break; | 2726 | break; |
2729 | case BC_USETN: | 2727 | case BC_USETN: |
2730 | | ins_AD // RA = upvalue #, RD = num const | 2728 | | ins_AD // RA = upvalue #, RD = num const |
@@ -2808,23 +2806,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov) | |||
2808 | | mov dword [BASE+RA*8+4], LJ_TTAB | 2806 | | mov dword [BASE+RA*8+4], LJ_TTAB |
2809 | | ins_next | 2807 | | ins_next |
2810 | |2: | 2808 | |2: |
2811 | | call extern lj_gc_step_fixtop // (lua_State *L) | 2809 | | mov L:FCARG1, L:RB |
2812 | | mov ARG1, L:RB // Args owned by callee. Set it again. | 2810 | | call extern lj_gc_step_fixtop@4 // (lua_State *L) |
2813 | | jmp <1 | 2811 | | jmp <1 |
2814 | break; | 2812 | break; |
2815 | case BC_TDUP: | 2813 | case BC_TDUP: |
2816 | | ins_AND // RA = dst, RD = table const (~) (holding template table) | 2814 | | ins_AND // RA = dst, RD = table const (~) (holding template table) |
2817 | | mov TAB:RD, [KBASE+RD*4] | ||
2818 | | mov L:RB, SAVE_L | 2815 | | mov L:RB, SAVE_L |
2819 | | mov ARG2, TAB:RD | ||
2820 | | mov ARG1, L:RB | ||
2821 | | mov RA, [DISPATCH+DISPATCH_GL(gc.total)] | 2816 | | mov RA, [DISPATCH+DISPATCH_GL(gc.total)] |
2822 | | mov SAVE_PC, PC | 2817 | | mov SAVE_PC, PC |
2823 | | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)] | 2818 | | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)] |
2824 | | mov L:RB->base, BASE | 2819 | | mov L:RB->base, BASE |
2825 | | jae >3 | 2820 | | jae >3 |
2826 | |2: | 2821 | |2: |
2827 | | call extern lj_tab_dup // (lua_State *L, Table *kt) | 2822 | | mov TAB:FCARG2, [KBASE+RD*4] // Caveat: FCARG2 == BASE |
2823 | | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA | ||
2824 | | call extern lj_tab_dup@8 // (lua_State *L, Table *kt) | ||
2828 | | // Table * returned in eax (RC). | 2825 | | // Table * returned in eax (RC). |
2829 | | mov BASE, L:RB->base | 2826 | | mov BASE, L:RB->base |
2830 | | movzx RA, PC_RA | 2827 | | movzx RA, PC_RA |
@@ -2832,8 +2829,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov) | |||
2832 | | mov dword [BASE+RA*8+4], LJ_TTAB | 2829 | | mov dword [BASE+RA*8+4], LJ_TTAB |
2833 | | ins_next | 2830 | | ins_next |
2834 | |3: | 2831 | |3: |
2835 | | call extern lj_gc_step_fixtop // (lua_State *L) | 2832 | | mov L:FCARG1, L:RB |
2836 | | mov ARG1, L:RB // Args owned by callee. Set it again. | 2833 | | call extern lj_gc_step_fixtop@4 // (lua_State *L) |
2834 | | movzx RD, PC_RD // Need to reload RD. | ||
2835 | | not RD | ||
2837 | | jmp <2 | 2836 | | jmp <2 |
2838 | break; | 2837 | break; |
2839 | 2838 | ||
diff --git a/src/lib_base.c b/src/lib_base.c index 6b9e8eef..821c81b4 100644 --- a/src/lib_base.c +++ b/src/lib_base.c | |||
@@ -183,7 +183,7 @@ LJLIB_ASM(tonumber) LJLIB_REC(.) | |||
183 | int32_t base = lj_lib_optint(L, 2, 10); | 183 | int32_t base = lj_lib_optint(L, 2, 10); |
184 | if (base == 10) { | 184 | if (base == 10) { |
185 | TValue *o = lj_lib_checkany(L, 1); | 185 | TValue *o = lj_lib_checkany(L, 1); |
186 | if (tvisnum(o) || (tvisstr(o) && lj_str_numconv(strVdata(o), o))) { | 186 | if (tvisnum(o) || (tvisstr(o) && lj_str_tonum(strV(o), o))) { |
187 | setnumV(L->base-1, numV(o)); | 187 | setnumV(L->base-1, numV(o)); |
188 | return FFH_RES(1); | 188 | return FFH_RES(1); |
189 | } | 189 | } |
@@ -206,6 +206,9 @@ LJLIB_ASM(tonumber) LJLIB_REC(.) | |||
206 | return FFH_RES(1); | 206 | return FFH_RES(1); |
207 | } | 207 | } |
208 | 208 | ||
209 | LJLIB_PUSH("nil") | ||
210 | LJLIB_PUSH("false") | ||
211 | LJLIB_PUSH("true") | ||
209 | LJLIB_ASM(tostring) LJLIB_REC(.) | 212 | LJLIB_ASM(tostring) LJLIB_REC(.) |
210 | { | 213 | { |
211 | TValue *o = lj_lib_checkany(L, 1); | 214 | TValue *o = lj_lib_checkany(L, 1); |
@@ -218,12 +221,8 @@ LJLIB_ASM(tostring) LJLIB_REC(.) | |||
218 | GCstr *s; | 221 | GCstr *s; |
219 | if (tvisnum(o)) { | 222 | if (tvisnum(o)) { |
220 | s = lj_str_fromnum(L, &o->n); | 223 | s = lj_str_fromnum(L, &o->n); |
221 | } else if (tvisnil(o)) { | 224 | } else if (tvispri(o)) { |
222 | s = lj_str_newlit(L, "nil"); | 225 | s = strV(lj_lib_upvalue(L, -itype(o))); |
223 | } else if (tvisfalse(o)) { | ||
224 | s = lj_str_newlit(L, "false"); | ||
225 | } else if (tvistrue(o)) { | ||
226 | s = lj_str_newlit(L, "true"); | ||
227 | } else { | 226 | } else { |
228 | if (tvisfunc(o) && isffunc(funcV(o))) | 227 | if (tvisfunc(o) && isffunc(funcV(o))) |
229 | lua_pushfstring(L, "function: fast#%d", funcV(o)->c.ffid); | 228 | lua_pushfstring(L, "function: fast#%d", funcV(o)->c.ffid); |
diff --git a/src/lib_io.c b/src/lib_io.c index aefe4213..d69b99a4 100644 --- a/src/lib_io.c +++ b/src/lib_io.c | |||
@@ -17,14 +17,28 @@ | |||
17 | #include "lualib.h" | 17 | #include "lualib.h" |
18 | 18 | ||
19 | #include "lj_obj.h" | 19 | #include "lj_obj.h" |
20 | #include "lj_err.h" | ||
21 | #include "lj_gc.h" | 20 | #include "lj_gc.h" |
21 | #include "lj_err.h" | ||
22 | #include "lj_str.h" | ||
22 | #include "lj_ff.h" | 23 | #include "lj_ff.h" |
24 | #include "lj_trace.h" | ||
23 | #include "lj_lib.h" | 25 | #include "lj_lib.h" |
24 | 26 | ||
25 | /* Index of standard handles in function environment. */ | 27 | /* Userdata payload for I/O file. */ |
26 | #define IO_INPUT 1 | 28 | typedef struct IOFileUD { |
27 | #define IO_OUTPUT 2 | 29 | FILE *fp; /* File handle. */ |
30 | uint32_t type; /* File type. */ | ||
31 | } IOFileUD; | ||
32 | |||
33 | #define IOFILE_TYPE_FILE 0 /* Regular file. */ | ||
34 | #define IOFILE_TYPE_PIPE 1 /* Pipe. */ | ||
35 | #define IOFILE_TYPE_STDF 2 /* Standard file handle. */ | ||
36 | #define IOFILE_TYPE_MASK 3 | ||
37 | |||
38 | #define IOFILE_FLAG_CLOSE 4 /* Close after io.lines() iterator. */ | ||
39 | |||
40 | #define IOSTDF_UD(L, id) (&gcref(G(L)->gcroot[(id)])->ud) | ||
41 | #define IOSTDF_IOF(L, id) ((IOFileUD *)uddata(IOSTDF_UD(L, (id)))) | ||
28 | 42 | ||
29 | /* -- Error handling ------------------------------------------------------ */ | 43 | /* -- Error handling ------------------------------------------------------ */ |
30 | 44 | ||
@@ -35,95 +49,102 @@ static int io_pushresult(lua_State *L, int ok, const char *fname) | |||
35 | return 1; | 49 | return 1; |
36 | } else { | 50 | } else { |
37 | int en = errno; /* Lua API calls may change this value. */ | 51 | int en = errno; /* Lua API calls may change this value. */ |
38 | lua_pushnil(L); | 52 | setnilV(L->top++); |
39 | if (fname) | 53 | if (fname) |
40 | lua_pushfstring(L, "%s: %s", fname, strerror(en)); | 54 | lua_pushfstring(L, "%s: %s", fname, strerror(en)); |
41 | else | 55 | else |
42 | lua_pushfstring(L, "%s", strerror(en)); | 56 | lua_pushfstring(L, "%s", strerror(en)); |
43 | lua_pushinteger(L, en); | 57 | setintV(L->top++, en); |
58 | lj_trace_abort(G(L)); | ||
44 | return 3; | 59 | return 3; |
45 | } | 60 | } |
46 | } | 61 | } |
47 | 62 | ||
48 | static void io_file_error(lua_State *L, int arg, const char *fname) | 63 | /* -- Open/close helpers -------------------------------------------------- */ |
64 | |||
65 | static IOFileUD *io_tofilep(lua_State *L) | ||
49 | { | 66 | { |
50 | lua_pushfstring(L, "%s: %s", fname, strerror(errno)); | 67 | if (!(L->base < L->top && tvisudata(L->base) && |
51 | luaL_argerror(L, arg, lua_tostring(L, -1)); | 68 | udataV(L->base)->udtype == UDTYPE_IO_FILE)) |
69 | lj_err_argtype(L, 1, "FILE*"); | ||
70 | return (IOFileUD *)uddata(udataV(L->base)); | ||
52 | } | 71 | } |
53 | 72 | ||
54 | /* -- Open helpers -------------------------------------------------------- */ | 73 | static IOFileUD *io_tofile(lua_State *L) |
55 | |||
56 | #define io_tofilep(L) ((FILE **)luaL_checkudata(L, 1, LUA_FILEHANDLE)) | ||
57 | |||
58 | static FILE *io_tofile(lua_State *L) | ||
59 | { | 74 | { |
60 | FILE **f = io_tofilep(L); | 75 | IOFileUD *iof = io_tofilep(L); |
61 | if (*f == NULL) | 76 | if (iof->fp == NULL) |
62 | lj_err_caller(L, LJ_ERR_IOCLFL); | 77 | lj_err_caller(L, LJ_ERR_IOCLFL); |
63 | return *f; | 78 | return iof; |
64 | } | 79 | } |
65 | 80 | ||
66 | static FILE **io_file_new(lua_State *L) | 81 | static FILE *io_stdfile(lua_State *L, ptrdiff_t id) |
67 | { | 82 | { |
68 | FILE **pf = (FILE **)lua_newuserdata(L, sizeof(FILE *)); | 83 | IOFileUD *iof = IOSTDF_IOF(L, id); |
69 | *pf = NULL; | 84 | if (iof->fp == NULL) |
70 | luaL_getmetatable(L, LUA_FILEHANDLE); | 85 | lj_err_caller(L, LJ_ERR_IOSTDCL); |
71 | lua_setmetatable(L, -2); | 86 | return iof->fp; |
72 | return pf; | ||
73 | } | 87 | } |
74 | 88 | ||
75 | /* -- Close helpers ------------------------------------------------------- */ | 89 | static IOFileUD *io_file_new(lua_State *L) |
90 | { | ||
91 | IOFileUD *iof = (IOFileUD *)lua_newuserdata(L, sizeof(IOFileUD)); | ||
92 | GCudata *ud = udataV(L->top-1); | ||
93 | ud->udtype = UDTYPE_IO_FILE; | ||
94 | /* NOBARRIER: The GCudata is new (marked white). */ | ||
95 | setgcrefr(ud->metatable, curr_func(L)->c.env); | ||
96 | iof->fp = NULL; | ||
97 | iof->type = IOFILE_TYPE_FILE; | ||
98 | return iof; | ||
99 | } | ||
76 | 100 | ||
77 | static int lj_cf_io_std_close(lua_State *L) | 101 | static IOFileUD *io_file_open(lua_State *L, const char *mode) |
78 | { | 102 | { |
79 | lua_pushnil(L); | 103 | const char *fname = strdata(lj_lib_checkstr(L, 1)); |
80 | lua_pushliteral(L, "cannot close standard file"); | 104 | IOFileUD *iof = io_file_new(L); |
81 | return 2; | 105 | iof->fp = fopen(fname, mode); |
106 | if (iof->fp == NULL) | ||
107 | luaL_argerror(L, 1, lj_str_pushf(L, "%s: %s", fname, strerror(errno))); | ||
108 | return iof; | ||
82 | } | 109 | } |
83 | 110 | ||
84 | static int lj_cf_io_pipe_close(lua_State *L) | 111 | static int io_file_close(lua_State *L, IOFileUD *iof) |
85 | { | 112 | { |
86 | FILE **p = io_tofilep(L); | 113 | int ok; |
114 | if ((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_FILE) { | ||
115 | ok = (fclose(iof->fp) == 0); | ||
116 | } else if ((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_PIPE) { | ||
87 | #if defined(LUA_USE_POSIX) | 117 | #if defined(LUA_USE_POSIX) |
88 | int ok = (pclose(*p) != -1); | 118 | ok = (pclose(iof->fp) != -1); |
89 | #elif defined(LUA_USE_WIN) | 119 | #elif defined(LUA_USE_WIN) |
90 | int ok = (_pclose(*p) != -1); | 120 | ok = (_pclose(iof->fp) != -1); |
91 | #else | 121 | #else |
92 | int ok = 0; | 122 | ok = 0; |
93 | #endif | 123 | #endif |
94 | *p = NULL; | 124 | } else { |
95 | return io_pushresult(L, ok, NULL); | 125 | lua_assert((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_STDF); |
96 | } | 126 | setnilV(L->top++); |
97 | 127 | lua_pushliteral(L, "cannot close standard file"); | |
98 | static int lj_cf_io_file_close(lua_State *L) | 128 | return 2; |
99 | { | 129 | } |
100 | FILE **p = io_tofilep(L); | 130 | iof->fp = NULL; |
101 | int ok = (fclose(*p) == 0); | ||
102 | *p = NULL; | ||
103 | return io_pushresult(L, ok, NULL); | 131 | return io_pushresult(L, ok, NULL); |
104 | } | 132 | } |
105 | 133 | ||
106 | static int io_file_close(lua_State *L) | ||
107 | { | ||
108 | lua_getfenv(L, 1); | ||
109 | lua_getfield(L, -1, "__close"); | ||
110 | return (lua_tocfunction(L, -1))(L); | ||
111 | } | ||
112 | |||
113 | /* -- Read/write helpers -------------------------------------------------- */ | 134 | /* -- Read/write helpers -------------------------------------------------- */ |
114 | 135 | ||
115 | static int io_file_readnum(lua_State *L, FILE *fp) | 136 | static int io_file_readnum(lua_State *L, FILE *fp) |
116 | { | 137 | { |
117 | lua_Number d; | 138 | lua_Number d; |
118 | if (fscanf(fp, LUA_NUMBER_SCAN, &d) == 1) { | 139 | if (fscanf(fp, LUA_NUMBER_SCAN, &d) == 1) { |
119 | lua_pushnumber(L, d); | 140 | setnumV(L->top++, d); |
120 | return 1; | 141 | return 1; |
121 | } else { | 142 | } else { |
122 | return 0; /* read fails */ | 143 | return 0; |
123 | } | 144 | } |
124 | } | 145 | } |
125 | 146 | ||
126 | static int test_eof(lua_State *L, FILE *fp) | 147 | static int io_file_testeof(lua_State *L, FILE *fp) |
127 | { | 148 | { |
128 | int c = getc(fp); | 149 | int c = getc(fp); |
129 | ungetc(c, fp); | 150 | ungetc(c, fp); |
@@ -168,7 +189,7 @@ static int io_file_readchars(lua_State *L, FILE *fp, size_t n) | |||
168 | n -= nr; /* still have to read `n' chars */ | 189 | n -= nr; /* still have to read `n' chars */ |
169 | } while (n > 0 && nr == rlen); /* until end of count or eof */ | 190 | } while (n > 0 && nr == rlen); /* until end of count or eof */ |
170 | luaL_pushresult(&b); /* close buffer */ | 191 | luaL_pushresult(&b); /* close buffer */ |
171 | return (n == 0 || lua_objlen(L, -1) > 0); | 192 | return (n == 0 || strV(L->top-1)->len > 0); |
172 | } | 193 | } |
173 | 194 | ||
174 | static int io_file_read(lua_State *L, FILE *fp, int start) | 195 | static int io_file_read(lua_State *L, FILE *fp, int start) |
@@ -197,7 +218,7 @@ static int io_file_read(lua_State *L, FILE *fp, int start) | |||
197 | lj_err_arg(L, n+1, LJ_ERR_INVFMT); | 218 | lj_err_arg(L, n+1, LJ_ERR_INVFMT); |
198 | } else if (tvisnum(L->base+n)) { | 219 | } else if (tvisnum(L->base+n)) { |
199 | size_t len = (size_t)lj_lib_checkint(L, n+1); | 220 | size_t len = (size_t)lj_lib_checkint(L, n+1); |
200 | ok = len ? io_file_readchars(L, fp, len) : test_eof(L, fp); | 221 | ok = len ? io_file_readchars(L, fp, len) : io_file_testeof(L, fp); |
201 | } else { | 222 | } else { |
202 | lj_err_arg(L, n+1, LJ_ERR_INVOPT); | 223 | lj_err_arg(L, n+1, LJ_ERR_INVOPT); |
203 | } | 224 | } |
@@ -233,30 +254,29 @@ static int io_file_write(lua_State *L, FILE *fp, int start) | |||
233 | 254 | ||
234 | LJLIB_CF(io_method_close) | 255 | LJLIB_CF(io_method_close) |
235 | { | 256 | { |
236 | if (lua_isnone(L, 1)) | 257 | IOFileUD *iof = L->base < L->top ? io_tofile(L) : |
237 | lua_rawgeti(L, LUA_ENVIRONINDEX, IO_OUTPUT); | 258 | IOSTDF_IOF(L, GCROOT_IO_OUTPUT); |
238 | io_tofile(L); | 259 | return io_file_close(L, iof); |
239 | return io_file_close(L); | ||
240 | } | 260 | } |
241 | 261 | ||
242 | LJLIB_CF(io_method_read) | 262 | LJLIB_CF(io_method_read) |
243 | { | 263 | { |
244 | return io_file_read(L, io_tofile(L), 1); | 264 | return io_file_read(L, io_tofile(L)->fp, 1); |
245 | } | 265 | } |
246 | 266 | ||
247 | LJLIB_CF(io_method_write) | 267 | LJLIB_CF(io_method_write) LJLIB_REC(io_write 0) |
248 | { | 268 | { |
249 | return io_file_write(L, io_tofile(L), 1); | 269 | return io_file_write(L, io_tofile(L)->fp, 1); |
250 | } | 270 | } |
251 | 271 | ||
252 | LJLIB_CF(io_method_flush) | 272 | LJLIB_CF(io_method_flush) LJLIB_REC(io_flush 0) |
253 | { | 273 | { |
254 | return io_pushresult(L, fflush(io_tofile(L)) == 0, NULL); | 274 | return io_pushresult(L, fflush(io_tofile(L)->fp) == 0, NULL); |
255 | } | 275 | } |
256 | 276 | ||
257 | LJLIB_CF(io_method_seek) | 277 | LJLIB_CF(io_method_seek) |
258 | { | 278 | { |
259 | FILE *fp = io_tofile(L); | 279 | FILE *fp = io_tofile(L)->fp; |
260 | int opt = lj_lib_checkopt(L, 2, 1, "\3set\3cur\3end"); | 280 | int opt = lj_lib_checkopt(L, 2, 1, "\3set\3cur\3end"); |
261 | lua_Number ofs; | 281 | lua_Number ofs; |
262 | int res; | 282 | int res; |
@@ -294,39 +314,40 @@ LJLIB_CF(io_method_seek) | |||
294 | 314 | ||
295 | LJLIB_CF(io_method_setvbuf) | 315 | LJLIB_CF(io_method_setvbuf) |
296 | { | 316 | { |
297 | FILE *fp = io_tofile(L); | 317 | FILE *fp = io_tofile(L)->fp; |
298 | int opt = lj_lib_checkopt(L, 2, -1, "\4full\4line\2no"); | 318 | int opt = lj_lib_checkopt(L, 2, -1, "\4full\4line\2no"); |
299 | size_t sz = (size_t)lj_lib_optint(L, 3, LUAL_BUFFERSIZE); | 319 | size_t sz = (size_t)lj_lib_optint(L, 3, LUAL_BUFFERSIZE); |
300 | if (opt == 0) opt = _IOFBF; | 320 | if (opt == 0) opt = _IOFBF; |
301 | else if (opt == 1) opt = _IOLBF; | 321 | else if (opt == 1) opt = _IOLBF; |
302 | else if (opt == 2) opt = _IONBF; | 322 | else if (opt == 2) opt = _IONBF; |
303 | return io_pushresult(L, (setvbuf(fp, NULL, opt, sz) == 0), NULL); | 323 | return io_pushresult(L, setvbuf(fp, NULL, opt, sz) == 0, NULL); |
304 | } | 324 | } |
305 | 325 | ||
306 | /* Forward declaration. */ | 326 | LJLIB_PUSH(top-2) /* io_lines_iter */ |
307 | static void io_file_lines(lua_State *L, int idx, int toclose); | ||
308 | |||
309 | LJLIB_CF(io_method_lines) | 327 | LJLIB_CF(io_method_lines) |
310 | { | 328 | { |
311 | io_tofile(L); | 329 | io_tofile(L); |
312 | io_file_lines(L, 1, 0); | 330 | setfuncV(L, L->top, funcV(lj_lib_upvalue(L, 1))); |
313 | return 1; | 331 | setudataV(L, L->top+1, udataV(L->base)); |
332 | L->top += 2; | ||
333 | return 2; | ||
314 | } | 334 | } |
315 | 335 | ||
316 | LJLIB_CF(io_method___gc) | 336 | LJLIB_CF(io_method___gc) |
317 | { | 337 | { |
318 | FILE *fp = *io_tofilep(L); | 338 | IOFileUD *iof = io_tofilep(L); |
319 | if (fp != NULL) io_file_close(L); | 339 | if (iof->fp != NULL) |
340 | io_file_close(L, iof); | ||
320 | return 0; | 341 | return 0; |
321 | } | 342 | } |
322 | 343 | ||
323 | LJLIB_CF(io_method___tostring) | 344 | LJLIB_CF(io_method___tostring) |
324 | { | 345 | { |
325 | FILE *fp = *io_tofilep(L); | 346 | IOFileUD *iof = io_tofilep(L); |
326 | if (fp == NULL) | 347 | if (iof->fp != NULL) |
327 | lua_pushliteral(L, "file (closed)"); | 348 | lua_pushfstring(L, "file (%p)", iof->fp); |
328 | else | 349 | else |
329 | lua_pushfstring(L, "file (%p)", fp); | 350 | lua_pushliteral(L, "file (closed)"); |
330 | return 1; | 351 | return 1; |
331 | } | 352 | } |
332 | 353 | ||
@@ -340,30 +361,41 @@ LJLIB_PUSH(top-1) LJLIB_SET(__index) | |||
340 | 361 | ||
341 | LJLIB_PUSH(top-2) LJLIB_SET(!) /* Set environment. */ | 362 | LJLIB_PUSH(top-2) LJLIB_SET(!) /* Set environment. */ |
342 | 363 | ||
343 | static FILE *io_file_get(lua_State *L, int findex) | 364 | LJLIB_CF(io_open) |
344 | { | 365 | { |
345 | GCtab *fenv = tabref(curr_func(L)->c.env); | 366 | const char *fname = strdata(lj_lib_checkstr(L, 1)); |
346 | GCudata *ud = udataV(&tvref(fenv->array)[findex]); | 367 | GCstr *s = lj_lib_optstr(L, 2); |
347 | FILE *fp = *(FILE **)uddata(ud); | 368 | const char *mode = s ? strdata(s) : "r"; |
348 | if (fp == NULL) | 369 | IOFileUD *iof = io_file_new(L); |
349 | lj_err_caller(L, LJ_ERR_IOSTDCL); | 370 | iof->fp = fopen(fname, mode); |
350 | return fp; | 371 | return iof->fp != NULL ? 1 : io_pushresult(L, 0, fname); |
351 | } | 372 | } |
352 | 373 | ||
353 | LJLIB_CF(io_open) | 374 | LJLIB_CF(io_popen) |
354 | { | 375 | { |
355 | const char *fname = luaL_checkstring(L, 1); | 376 | #if defined(LUA_USE_POSIX) || defined(LUA_USE_WIN) |
356 | const char *mode = luaL_optstring(L, 2, "r"); | 377 | const char *fname = strdata(lj_lib_checkstr(L, 1)); |
357 | FILE **pf = io_file_new(L); | 378 | GCstr *s = lj_lib_optstr(L, 2); |
358 | *pf = fopen(fname, mode); | 379 | const char *mode = s ? strdata(s) : "r"; |
359 | return (*pf == NULL) ? io_pushresult(L, 0, fname) : 1; | 380 | IOFileUD *iof = io_file_new(L); |
381 | iof->type = IOFILE_TYPE_PIPE; | ||
382 | #ifdef LUA_USE_POSIX | ||
383 | fflush(NULL); | ||
384 | iof->fp = popen(fname, mode); | ||
385 | #else | ||
386 | iof->fp = _popen(fname, mode); | ||
387 | #endif | ||
388 | return iof->fp != NULL ? 1 : io_pushresult(L, 0, fname); | ||
389 | #else | ||
390 | luaL_error(L, LUA_QL("popen") " not supported"); | ||
391 | #endif | ||
360 | } | 392 | } |
361 | 393 | ||
362 | LJLIB_CF(io_tmpfile) | 394 | LJLIB_CF(io_tmpfile) |
363 | { | 395 | { |
364 | FILE **pf = io_file_new(L); | 396 | IOFileUD *iof = io_file_new(L); |
365 | *pf = tmpfile(); | 397 | iof->fp = tmpfile(); |
366 | return (*pf == NULL) ? io_pushresult(L, 0, NULL) : 1; | 398 | return iof->fp != NULL ? 1 : io_pushresult(L, 0, NULL); |
367 | } | 399 | } |
368 | 400 | ||
369 | LJLIB_CF(io_close) | 401 | LJLIB_CF(io_close) |
@@ -373,169 +405,112 @@ LJLIB_CF(io_close) | |||
373 | 405 | ||
374 | LJLIB_CF(io_read) | 406 | LJLIB_CF(io_read) |
375 | { | 407 | { |
376 | return io_file_read(L, io_file_get(L, IO_INPUT), 0); | 408 | return io_file_read(L, io_stdfile(L, GCROOT_IO_INPUT), 0); |
377 | } | ||
378 | |||
379 | LJLIB_CF(io_write) | ||
380 | { | ||
381 | return io_file_write(L, io_file_get(L, IO_OUTPUT), 0); | ||
382 | } | ||
383 | |||
384 | LJLIB_CF(io_flush) | ||
385 | { | ||
386 | return io_pushresult(L, fflush(io_file_get(L, IO_OUTPUT)) == 0, NULL); | ||
387 | } | 409 | } |
388 | 410 | ||
389 | LJLIB_NOREG LJLIB_CF(io_lines_iter) | 411 | LJLIB_CF(io_write) LJLIB_REC(io_write GCROOT_IO_OUTPUT) |
390 | { | ||
391 | FILE *fp = *(FILE **)uddata(udataV(lj_lib_upvalue(L, 1))); | ||
392 | int ok; | ||
393 | if (fp == NULL) | ||
394 | lj_err_caller(L, LJ_ERR_IOCLFL); | ||
395 | ok = io_file_readline(L, fp); | ||
396 | if (ferror(fp)) | ||
397 | return luaL_error(L, "%s", strerror(errno)); | ||
398 | if (ok) | ||
399 | return 1; | ||
400 | if (tvistrue(lj_lib_upvalue(L, 2))) { /* Need to close file? */ | ||
401 | L->top = L->base+1; | ||
402 | setudataV(L, L->base, udataV(lj_lib_upvalue(L, 1))); | ||
403 | io_file_close(L); | ||
404 | } | ||
405 | return 0; | ||
406 | } | ||
407 | |||
408 | static void io_file_lines(lua_State *L, int idx, int toclose) | ||
409 | { | 412 | { |
410 | lua_pushvalue(L, idx); | 413 | return io_file_write(L, io_stdfile(L, GCROOT_IO_OUTPUT), 0); |
411 | lua_pushboolean(L, toclose); | ||
412 | lua_pushcclosure(L, lj_cf_io_lines_iter, 2); | ||
413 | funcV(L->top-1)->c.ffid = FF_io_lines_iter; | ||
414 | } | 414 | } |
415 | 415 | ||
416 | LJLIB_CF(io_lines) | 416 | LJLIB_CF(io_flush) LJLIB_REC(io_flush GCROOT_IO_OUTPUT) |
417 | { | 417 | { |
418 | if (lua_isnoneornil(L, 1)) { /* no arguments? */ | 418 | return io_pushresult(L, fflush(io_stdfile(L, GCROOT_IO_OUTPUT)) == 0, NULL); |
419 | /* will iterate over default input */ | ||
420 | lua_rawgeti(L, LUA_ENVIRONINDEX, IO_INPUT); | ||
421 | return lj_cf_io_method_lines(L); | ||
422 | } else { | ||
423 | const char *fname = luaL_checkstring(L, 1); | ||
424 | FILE **pf = io_file_new(L); | ||
425 | *pf = fopen(fname, "r"); | ||
426 | if (*pf == NULL) | ||
427 | io_file_error(L, 1, fname); | ||
428 | io_file_lines(L, lua_gettop(L), 1); | ||
429 | return 1; | ||
430 | } | ||
431 | } | 419 | } |
432 | 420 | ||
433 | static int io_std_get(lua_State *L, int fp, const char *mode) | 421 | static int io_std_getset(lua_State *L, ptrdiff_t id, const char *mode) |
434 | { | 422 | { |
435 | if (!lua_isnoneornil(L, 1)) { | 423 | if (L->base < L->top && !tvisnil(L->base)) { |
436 | const char *fname = lua_tostring(L, 1); | 424 | if (tvisudata(L->base)) { |
437 | if (fname) { | 425 | io_tofile(L); |
438 | FILE **pf = io_file_new(L); | 426 | L->top = L->base+1; |
439 | *pf = fopen(fname, mode); | ||
440 | if (*pf == NULL) | ||
441 | io_file_error(L, 1, fname); | ||
442 | } else { | 427 | } else { |
443 | io_tofile(L); /* check that it's a valid file handle */ | 428 | io_file_open(L, mode); |
444 | lua_pushvalue(L, 1); | ||
445 | } | 429 | } |
446 | lua_rawseti(L, LUA_ENVIRONINDEX, fp); | 430 | /* NOBARRIER: The standard I/O handles are GC roots. */ |
431 | setgcref(G(L)->gcroot[id], gcV(L->top-1)); | ||
432 | } else { | ||
433 | setudataV(L, L->top++, IOSTDF_UD(L, id)); | ||
447 | } | 434 | } |
448 | /* return current value */ | ||
449 | lua_rawgeti(L, LUA_ENVIRONINDEX, fp); | ||
450 | return 1; | 435 | return 1; |
451 | } | 436 | } |
452 | 437 | ||
453 | LJLIB_CF(io_input) | 438 | LJLIB_CF(io_input) |
454 | { | 439 | { |
455 | return io_std_get(L, IO_INPUT, "r"); | 440 | return io_std_getset(L, GCROOT_IO_INPUT, "r"); |
456 | } | 441 | } |
457 | 442 | ||
458 | LJLIB_CF(io_output) | 443 | LJLIB_CF(io_output) |
459 | { | 444 | { |
460 | return io_std_get(L, IO_OUTPUT, "w"); | 445 | return io_std_getset(L, GCROOT_IO_OUTPUT, "w"); |
461 | } | 446 | } |
462 | 447 | ||
463 | LJLIB_CF(io_type) | 448 | LJLIB_NOREG LJLIB_CF(io_lines_iter) |
464 | { | 449 | { |
465 | void *ud; | 450 | IOFileUD *iof = io_tofile(L); |
466 | luaL_checkany(L, 1); | 451 | int ok = io_file_readline(L, iof->fp); |
467 | ud = lua_touserdata(L, 1); | 452 | if (ferror(iof->fp)) |
468 | lua_getfield(L, LUA_REGISTRYINDEX, LUA_FILEHANDLE); | 453 | lj_err_callermsg(L, strerror(errno)); |
469 | if (ud == NULL || !lua_getmetatable(L, 1) || !lua_rawequal(L, -2, -1)) | 454 | if (!ok && (iof->type & IOFILE_FLAG_CLOSE)) |
470 | lua_pushnil(L); /* not a file */ | 455 | io_file_close(L, iof); /* Return values are ignored (ok is 0). */ |
471 | else if (*((FILE **)ud) == NULL) | 456 | return ok; |
472 | lua_pushliteral(L, "closed file"); | ||
473 | else | ||
474 | lua_pushliteral(L, "file"); | ||
475 | return 1; | ||
476 | } | 457 | } |
477 | 458 | ||
478 | LJLIB_PUSH(top-3) LJLIB_SET(!) /* Set environment. */ | 459 | LJLIB_PUSH(top-3) /* io_lines_iter */ |
460 | LJLIB_CF(io_lines) | ||
461 | { | ||
462 | if (L->base < L->top && !tvisnil(L->base)) { /* io.lines(fname) */ | ||
463 | IOFileUD *iof = io_file_open(L, "r"); | ||
464 | iof->type = IOFILE_TYPE_FILE|IOFILE_FLAG_CLOSE; | ||
465 | setfuncV(L, L->top-2, funcV(lj_lib_upvalue(L, 1))); | ||
466 | } else { /* io.lines() iterates over stdin. */ | ||
467 | setfuncV(L, L->top, funcV(lj_lib_upvalue(L, 1))); | ||
468 | setudataV(L, L->top+1, IOSTDF_UD(L, GCROOT_IO_INPUT)); | ||
469 | L->top += 2; | ||
470 | } | ||
471 | return 2; | ||
472 | } | ||
479 | 473 | ||
480 | LJLIB_CF(io_popen) | 474 | LJLIB_CF(io_type) |
481 | { | 475 | { |
482 | #if defined(LUA_USE_POSIX) || defined(LUA_USE_WIN) | 476 | cTValue *o = lj_lib_checkany(L, 1); |
483 | const char *fname = luaL_checkstring(L, 1); | 477 | if (!(tvisudata(o) && udataV(o)->udtype == UDTYPE_IO_FILE)) |
484 | const char *mode = luaL_optstring(L, 2, "r"); | 478 | setnilV(L->top++); |
485 | FILE **pf = io_file_new(L); | 479 | else if (((IOFileUD *)uddata(udataV(o)))->fp != NULL) |
486 | #ifdef LUA_USE_POSIX | 480 | lua_pushliteral(L, "file"); |
487 | fflush(NULL); | 481 | else |
488 | *pf = popen(fname, mode); | 482 | lua_pushliteral(L, "closed file"); |
489 | #else | 483 | return 1; |
490 | *pf = _popen(fname, mode); | ||
491 | #endif | ||
492 | return (*pf == NULL) ? io_pushresult(L, 0, fname) : 1; | ||
493 | #else | ||
494 | luaL_error(L, LUA_QL("popen") " not supported"); | ||
495 | #endif | ||
496 | } | 484 | } |
497 | 485 | ||
498 | #include "lj_libdef.h" | 486 | #include "lj_libdef.h" |
499 | 487 | ||
500 | /* ------------------------------------------------------------------------ */ | 488 | /* ------------------------------------------------------------------------ */ |
501 | 489 | ||
502 | static void io_std_new(lua_State *L, FILE *fp, int k, const char *fname) | 490 | static GCobj *io_std_new(lua_State *L, FILE *fp, const char *name) |
503 | { | 491 | { |
504 | FILE **pf = io_file_new(L); | 492 | IOFileUD *iof = (IOFileUD *)lua_newuserdata(L, sizeof(IOFileUD)); |
505 | GCudata *ud = udataV(L->top-1); | 493 | GCudata *ud = udataV(L->top-1); |
506 | GCtab *envt = tabV(L->top-2); | 494 | ud->udtype = UDTYPE_IO_FILE; |
507 | *pf = fp; | 495 | /* NOBARRIER: The GCudata is new (marked white). */ |
508 | setgcref(ud->env, obj2gco(envt)); | 496 | setgcref(ud->metatable, gcV(L->top-3)); |
509 | lj_gc_objbarrier(L, obj2gco(ud), envt); | 497 | iof->fp = fp; |
510 | if (k > 0) { | 498 | iof->type = IOFILE_TYPE_STDF; |
511 | lua_pushvalue(L, -1); | 499 | lua_setfield(L, -2, name); |
512 | lua_rawseti(L, -5, k); | 500 | return obj2gco(ud); |
513 | } | ||
514 | lua_setfield(L, -3, fname); | ||
515 | } | ||
516 | |||
517 | static void io_fenv_new(lua_State *L, int narr, lua_CFunction cls) | ||
518 | { | ||
519 | lua_createtable(L, narr, 1); | ||
520 | lua_pushcfunction(L, cls); | ||
521 | lua_setfield(L, -2, "__close"); | ||
522 | } | 501 | } |
523 | 502 | ||
524 | LUALIB_API int luaopen_io(lua_State *L) | 503 | LUALIB_API int luaopen_io(lua_State *L) |
525 | { | 504 | { |
526 | lua_getfield(L, LUA_REGISTRYINDEX, LUA_FILEHANDLE); | 505 | lua_pushcfunction(L, lj_cf_io_lines_iter); |
527 | if (tvisnil(L->top-1)) { | 506 | funcV(L->top-1)->c.ffid = FF_io_lines_iter; |
528 | LJ_LIB_REG_(L, NULL, io_method); | 507 | LJ_LIB_REG_(L, NULL, io_method); |
529 | lua_setfield(L, LUA_REGISTRYINDEX, LUA_FILEHANDLE); | 508 | copyTV(L, L->top, L->top-1); L->top++; |
530 | } | 509 | lua_setfield(L, LUA_REGISTRYINDEX, LUA_FILEHANDLE); |
531 | io_fenv_new(L, 0, lj_cf_io_pipe_close); /* top-3 */ | ||
532 | io_fenv_new(L, 2, lj_cf_io_file_close); /* top-2 */ | ||
533 | LJ_LIB_REG(L, io); | 510 | LJ_LIB_REG(L, io); |
534 | io_fenv_new(L, 0, lj_cf_io_std_close); | 511 | setgcref(G(L)->gcroot[GCROOT_IO_INPUT], io_std_new(L, stdin, "stdin")); |
535 | io_std_new(L, stdin, IO_INPUT, "stdin"); | 512 | setgcref(G(L)->gcroot[GCROOT_IO_OUTPUT], io_std_new(L, stdout, "stdout")); |
536 | io_std_new(L, stdout, IO_OUTPUT, "stdout"); | 513 | io_std_new(L, stderr, "stderr"); |
537 | io_std_new(L, stderr, 0, "stderr"); | ||
538 | L->top--; | ||
539 | return 1; | 514 | return 1; |
540 | } | 515 | } |
541 | 516 | ||
diff --git a/src/lib_math.c b/src/lib_math.c index adc77c9d..f3803e8f 100644 --- a/src/lib_math.c +++ b/src/lib_math.c | |||
@@ -36,9 +36,9 @@ LJLIB_ASM_(math_tan) LJLIB_REC(math_unary IRFPM_TAN) | |||
36 | LJLIB_ASM_(math_asin) LJLIB_REC(math_atrig FF_math_asin) | 36 | LJLIB_ASM_(math_asin) LJLIB_REC(math_atrig FF_math_asin) |
37 | LJLIB_ASM_(math_acos) LJLIB_REC(math_atrig FF_math_acos) | 37 | LJLIB_ASM_(math_acos) LJLIB_REC(math_atrig FF_math_acos) |
38 | LJLIB_ASM_(math_atan) LJLIB_REC(math_atrig FF_math_atan) | 38 | LJLIB_ASM_(math_atan) LJLIB_REC(math_atrig FF_math_atan) |
39 | LJLIB_ASM_(math_sinh) | 39 | LJLIB_ASM_(math_sinh) LJLIB_REC(math_htrig IRCALL_sinh) |
40 | LJLIB_ASM_(math_cosh) | 40 | LJLIB_ASM_(math_cosh) LJLIB_REC(math_htrig IRCALL_cosh) |
41 | LJLIB_ASM_(math_tanh) | 41 | LJLIB_ASM_(math_tanh) LJLIB_REC(math_htrig IRCALL_tanh) |
42 | LJLIB_ASM_(math_frexp) | 42 | LJLIB_ASM_(math_frexp) |
43 | LJLIB_ASM_(math_modf) LJLIB_REC(.) | 43 | LJLIB_ASM_(math_modf) LJLIB_REC(.) |
44 | 44 | ||
@@ -82,35 +82,33 @@ LJ_FUNCA double lj_wrapper_tanh(double x) { return tanh(x); } | |||
82 | */ | 82 | */ |
83 | 83 | ||
84 | /* PRNG state. */ | 84 | /* PRNG state. */ |
85 | typedef struct TW223State { | 85 | struct RandomState { |
86 | uint64_t gen[4]; /* State of the 4 LFSR generators. */ | 86 | uint64_t gen[4]; /* State of the 4 LFSR generators. */ |
87 | int valid; /* State is valid. */ | 87 | int valid; /* State is valid. */ |
88 | } TW223State; | 88 | }; |
89 | 89 | ||
90 | /* Union needed for bit-pattern conversion between uint64_t and double. */ | 90 | /* Union needed for bit-pattern conversion between uint64_t and double. */ |
91 | typedef union { uint64_t u64; double d; } U64double; | 91 | typedef union { uint64_t u64; double d; } U64double; |
92 | 92 | ||
93 | /* Update generator i and compute a running xor of all states. */ | 93 | /* Update generator i and compute a running xor of all states. */ |
94 | #define TW223_GEN(i, k, q, s) \ | 94 | #define TW223_GEN(i, k, q, s) \ |
95 | z = tw->gen[i]; \ | 95 | z = rs->gen[i]; \ |
96 | z = (((z<<q)^z) >> (k-s)) ^ ((z&((uint64_t)(int64_t)-1 << (64-k)))<<s); \ | 96 | z = (((z<<q)^z) >> (k-s)) ^ ((z&((uint64_t)(int64_t)-1 << (64-k)))<<s); \ |
97 | r ^= z; tw->gen[i] = z; | 97 | r ^= z; rs->gen[i] = z; |
98 | 98 | ||
99 | /* PRNG step function. Returns a double in the range 1.0 <= d < 2.0. */ | 99 | /* PRNG step function. Returns a double in the range 1.0 <= d < 2.0. */ |
100 | static LJ_NOINLINE double tw223_step(TW223State *tw) | 100 | LJ_NOINLINE uint64_t LJ_FASTCALL lj_math_random_step(RandomState *rs) |
101 | { | 101 | { |
102 | uint64_t z, r = 0; | 102 | uint64_t z, r = 0; |
103 | U64double u; | ||
104 | TW223_GEN(0, 63, 31, 18) | 103 | TW223_GEN(0, 63, 31, 18) |
105 | TW223_GEN(1, 58, 19, 28) | 104 | TW223_GEN(1, 58, 19, 28) |
106 | TW223_GEN(2, 55, 24, 7) | 105 | TW223_GEN(2, 55, 24, 7) |
107 | TW223_GEN(3, 47, 21, 8) | 106 | TW223_GEN(3, 47, 21, 8) |
108 | u.u64 = (r & (((uint64_t)1 << 52)-1)) | ((uint64_t)0x3ff << 52); | 107 | return (r & U64x(000fffff,ffffffff)) | U64x(3ff00000,00000000); |
109 | return u.d; | ||
110 | } | 108 | } |
111 | 109 | ||
112 | /* PRNG initialization function. */ | 110 | /* PRNG initialization function. */ |
113 | static void tw223_init(TW223State *tw, double d) | 111 | static void random_init(RandomState *rs, double d) |
114 | { | 112 | { |
115 | uint32_t r = 0x11090601; /* 64-k[i] as four 8 bit constants. */ | 113 | uint32_t r = 0x11090601; /* 64-k[i] as four 8 bit constants. */ |
116 | int i; | 114 | int i; |
@@ -120,22 +118,24 @@ static void tw223_init(TW223State *tw, double d) | |||
120 | r >>= 8; | 118 | r >>= 8; |
121 | u.d = d = d * 3.14159265358979323846 + 2.7182818284590452354; | 119 | u.d = d = d * 3.14159265358979323846 + 2.7182818284590452354; |
122 | if (u.u64 < m) u.u64 += m; /* Ensure k[i] MSB of gen[i] are non-zero. */ | 120 | if (u.u64 < m) u.u64 += m; /* Ensure k[i] MSB of gen[i] are non-zero. */ |
123 | tw->gen[i] = u.u64; | 121 | rs->gen[i] = u.u64; |
124 | } | 122 | } |
125 | tw->valid = 1; | 123 | rs->valid = 1; |
126 | for (i = 0; i < 10; i++) | 124 | for (i = 0; i < 10; i++) |
127 | tw223_step(tw); | 125 | lj_math_random_step(rs); |
128 | } | 126 | } |
129 | 127 | ||
130 | /* PRNG extract function. */ | 128 | /* PRNG extract function. */ |
131 | LJLIB_PUSH(top-2) /* Upvalue holds userdata with TW223State. */ | 129 | LJLIB_PUSH(top-2) /* Upvalue holds userdata with RandomState. */ |
132 | LJLIB_CF(math_random) | 130 | LJLIB_CF(math_random) LJLIB_REC(.) |
133 | { | 131 | { |
134 | int n = cast_int(L->top - L->base); | 132 | int n = cast_int(L->top - L->base); |
135 | TW223State *tw = (TW223State *)(uddata(udataV(lj_lib_upvalue(L, 1)))); | 133 | RandomState *rs = (RandomState *)(uddata(udataV(lj_lib_upvalue(L, 1)))); |
134 | U64double u; | ||
136 | double d; | 135 | double d; |
137 | if (LJ_UNLIKELY(!tw->valid)) tw223_init(tw, 0.0); | 136 | if (LJ_UNLIKELY(!rs->valid)) random_init(rs, 0.0); |
138 | d = tw223_step(tw) - 1.0; | 137 | u.u64 = lj_math_random_step(rs); |
138 | d = u.d - 1.0; | ||
139 | if (n > 0) { | 139 | if (n > 0) { |
140 | double r1 = lj_lib_checknum(L, 1); | 140 | double r1 = lj_lib_checknum(L, 1); |
141 | if (n == 1) { | 141 | if (n == 1) { |
@@ -150,11 +150,11 @@ LJLIB_CF(math_random) | |||
150 | } | 150 | } |
151 | 151 | ||
152 | /* PRNG seed function. */ | 152 | /* PRNG seed function. */ |
153 | LJLIB_PUSH(top-2) /* Upvalue holds userdata with TW223State. */ | 153 | LJLIB_PUSH(top-2) /* Upvalue holds userdata with RandomState. */ |
154 | LJLIB_CF(math_randomseed) | 154 | LJLIB_CF(math_randomseed) |
155 | { | 155 | { |
156 | TW223State *tw = (TW223State *)(uddata(udataV(lj_lib_upvalue(L, 1)))); | 156 | RandomState *rs = (RandomState *)(uddata(udataV(lj_lib_upvalue(L, 1)))); |
157 | tw223_init(tw, lj_lib_checknum(L, 1)); | 157 | random_init(rs, lj_lib_checknum(L, 1)); |
158 | return 0; | 158 | return 0; |
159 | } | 159 | } |
160 | 160 | ||
@@ -164,9 +164,9 @@ LJLIB_CF(math_randomseed) | |||
164 | 164 | ||
165 | LUALIB_API int luaopen_math(lua_State *L) | 165 | LUALIB_API int luaopen_math(lua_State *L) |
166 | { | 166 | { |
167 | TW223State *tw; | 167 | RandomState *rs; |
168 | tw = (TW223State *)lua_newuserdata(L, sizeof(TW223State)); | 168 | rs = (RandomState *)lua_newuserdata(L, sizeof(RandomState)); |
169 | tw->valid = 0; /* Use lazy initialization to save some time on startup. */ | 169 | rs->valid = 0; /* Use lazy initialization to save some time on startup. */ |
170 | LJ_LIB_REG(L, math); | 170 | LJ_LIB_REG(L, math); |
171 | #if defined(LUA_COMPAT_MOD) | 171 | #if defined(LUA_COMPAT_MOD) |
172 | lua_getfield(L, -1, "fmod"); | 172 | lua_getfield(L, -1, "fmod"); |
diff --git a/src/lib_string.c b/src/lib_string.c index 6c857328..e7ad12df 100644 --- a/src/lib_string.c +++ b/src/lib_string.c | |||
@@ -776,16 +776,18 @@ LUALIB_API int luaopen_string(lua_State *L) | |||
776 | { | 776 | { |
777 | GCtab *mt; | 777 | GCtab *mt; |
778 | GCstr *mmstr; | 778 | GCstr *mmstr; |
779 | global_State *g; | ||
779 | LJ_LIB_REG(L, string); | 780 | LJ_LIB_REG(L, string); |
780 | #if defined(LUA_COMPAT_GFIND) | 781 | #if defined(LUA_COMPAT_GFIND) |
781 | lua_getfield(L, -1, "gmatch"); | 782 | lua_getfield(L, -1, "gmatch"); |
782 | lua_setfield(L, -2, "gfind"); | 783 | lua_setfield(L, -2, "gfind"); |
783 | #endif | 784 | #endif |
784 | mt = lj_tab_new(L, 0, 1); | 785 | mt = lj_tab_new(L, 0, 1); |
785 | /* NOBARRIER: G(L)->mmname[] is a GC root. */ | 786 | /* NOBARRIER: basemt is a GC root. */ |
786 | setgcref(G(L)->basemt[~LJ_TSTR], obj2gco(mt)); | 787 | g = G(L); |
787 | mmstr = strref(G(L)->mmname[MM_index]); | 788 | setgcref(basemt_it(g, LJ_TSTR), obj2gco(mt)); |
788 | if (isdead(G(L), obj2gco(mmstr))) flipwhite(obj2gco(mmstr)); | 789 | mmstr = strref(g->mmname[MM_index]); |
790 | if (isdead(g, obj2gco(mmstr))) flipwhite(obj2gco(mmstr)); | ||
789 | settabV(L, lj_tab_setstr(L, mt, mmstr), tabV(L->top-1)); | 791 | settabV(L, lj_tab_setstr(L, mt, mmstr), tabV(L->top-1)); |
790 | mt->nomm = cast_byte(~(1u<<MM_index)); | 792 | mt->nomm = cast_byte(~(1u<<MM_index)); |
791 | return 1; | 793 | return 1; |
diff --git a/src/lj_alloc.c b/src/lj_alloc.c index 8ad4f8fb..6d8b4ccb 100644 --- a/src/lj_alloc.c +++ b/src/lj_alloc.c | |||
@@ -1186,10 +1186,10 @@ static LJ_NOINLINE void *lj_alloc_realloc(void *msp, void *ptr, size_t nsize) | |||
1186 | size_t rsize = oldsize - nb; | 1186 | size_t rsize = oldsize - nb; |
1187 | newp = oldp; | 1187 | newp = oldp; |
1188 | if (rsize >= MIN_CHUNK_SIZE) { | 1188 | if (rsize >= MIN_CHUNK_SIZE) { |
1189 | mchunkptr remainder = chunk_plus_offset(newp, nb); | 1189 | mchunkptr rem = chunk_plus_offset(newp, nb); |
1190 | set_inuse(m, newp, nb); | 1190 | set_inuse(m, newp, nb); |
1191 | set_inuse(m, remainder, rsize); | 1191 | set_inuse(m, rem, rsize); |
1192 | lj_alloc_free(m, chunk2mem(remainder)); | 1192 | lj_alloc_free(m, chunk2mem(rem)); |
1193 | } | 1193 | } |
1194 | } else if (next == m->top && oldsize + m->topsize > nb) { | 1194 | } else if (next == m->top && oldsize + m->topsize > nb) { |
1195 | /* Expand into top */ | 1195 | /* Expand into top */ |
diff --git a/src/lj_api.c b/src/lj_api.c index 7a759e5f..4bac5024 100644 --- a/src/lj_api.c +++ b/src/lj_api.c | |||
@@ -227,7 +227,7 @@ LUA_API int lua_isnumber(lua_State *L, int idx) | |||
227 | { | 227 | { |
228 | cTValue *o = index2adr(L, idx); | 228 | cTValue *o = index2adr(L, idx); |
229 | TValue tmp; | 229 | TValue tmp; |
230 | return (tvisnum(o) || (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp))); | 230 | return (tvisnum(o) || (tvisstr(o) && lj_str_tonum(strV(o), &tmp))); |
231 | } | 231 | } |
232 | 232 | ||
233 | LUA_API int lua_isstring(lua_State *L, int idx) | 233 | LUA_API int lua_isstring(lua_State *L, int idx) |
@@ -307,7 +307,7 @@ LUA_API lua_Number lua_tonumber(lua_State *L, int idx) | |||
307 | TValue tmp; | 307 | TValue tmp; |
308 | if (LJ_LIKELY(tvisnum(o))) | 308 | if (LJ_LIKELY(tvisnum(o))) |
309 | return numV(o); | 309 | return numV(o); |
310 | else if (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp)) | 310 | else if (tvisstr(o) && lj_str_tonum(strV(o), &tmp)) |
311 | return numV(&tmp); | 311 | return numV(&tmp); |
312 | else | 312 | else |
313 | return 0; | 313 | return 0; |
@@ -319,7 +319,7 @@ LUALIB_API lua_Number luaL_checknumber(lua_State *L, int idx) | |||
319 | TValue tmp; | 319 | TValue tmp; |
320 | if (tvisnum(o)) | 320 | if (tvisnum(o)) |
321 | return numV(o); | 321 | return numV(o); |
322 | else if (!(tvisstr(o) && lj_str_numconv(strVdata(o), &tmp))) | 322 | else if (!(tvisstr(o) && lj_str_tonum(strV(o), &tmp))) |
323 | lj_err_argt(L, idx, LUA_TNUMBER); | 323 | lj_err_argt(L, idx, LUA_TNUMBER); |
324 | return numV(&tmp); | 324 | return numV(&tmp); |
325 | } | 325 | } |
@@ -332,7 +332,7 @@ LUALIB_API lua_Number luaL_optnumber(lua_State *L, int idx, lua_Number def) | |||
332 | return numV(o); | 332 | return numV(o); |
333 | else if (tvisnil(o)) | 333 | else if (tvisnil(o)) |
334 | return def; | 334 | return def; |
335 | else if (!(tvisstr(o) && lj_str_numconv(strVdata(o), &tmp))) | 335 | else if (!(tvisstr(o) && lj_str_tonum(strV(o), &tmp))) |
336 | lj_err_argt(L, idx, LUA_TNUMBER); | 336 | lj_err_argt(L, idx, LUA_TNUMBER); |
337 | return numV(&tmp); | 337 | return numV(&tmp); |
338 | } | 338 | } |
@@ -344,7 +344,7 @@ LUA_API lua_Integer lua_tointeger(lua_State *L, int idx) | |||
344 | lua_Number n; | 344 | lua_Number n; |
345 | if (LJ_LIKELY(tvisnum(o))) | 345 | if (LJ_LIKELY(tvisnum(o))) |
346 | n = numV(o); | 346 | n = numV(o); |
347 | else if (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp)) | 347 | else if (tvisstr(o) && lj_str_tonum(strV(o), &tmp)) |
348 | n = numV(&tmp); | 348 | n = numV(&tmp); |
349 | else | 349 | else |
350 | return 0; | 350 | return 0; |
@@ -362,7 +362,7 @@ LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int idx) | |||
362 | lua_Number n; | 362 | lua_Number n; |
363 | if (LJ_LIKELY(tvisnum(o))) | 363 | if (LJ_LIKELY(tvisnum(o))) |
364 | n = numV(o); | 364 | n = numV(o); |
365 | else if (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp)) | 365 | else if (tvisstr(o) && lj_str_tonum(strV(o), &tmp)) |
366 | n = numV(&tmp); | 366 | n = numV(&tmp); |
367 | else | 367 | else |
368 | lj_err_argt(L, idx, LUA_TNUMBER); | 368 | lj_err_argt(L, idx, LUA_TNUMBER); |
@@ -382,7 +382,7 @@ LUALIB_API lua_Integer luaL_optinteger(lua_State *L, int idx, lua_Integer def) | |||
382 | n = numV(o); | 382 | n = numV(o); |
383 | else if (tvisnil(o)) | 383 | else if (tvisnil(o)) |
384 | return def; | 384 | return def; |
385 | else if (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp)) | 385 | else if (tvisstr(o) && lj_str_tonum(strV(o), &tmp)) |
386 | n = numV(&tmp); | 386 | n = numV(&tmp); |
387 | else | 387 | else |
388 | lj_err_argt(L, idx, LUA_TNUMBER); | 388 | lj_err_argt(L, idx, LUA_TNUMBER); |
@@ -753,7 +753,7 @@ LUA_API int lua_getmetatable(lua_State *L, int idx) | |||
753 | else if (tvisudata(o)) | 753 | else if (tvisudata(o)) |
754 | mt = tabref(udataV(o)->metatable); | 754 | mt = tabref(udataV(o)->metatable); |
755 | else | 755 | else |
756 | mt = tabref(G(L)->basemt[itypemap(o)]); | 756 | mt = tabref(basemt_obj(G(L), o)); |
757 | if (mt == NULL) | 757 | if (mt == NULL) |
758 | return 0; | 758 | return 0; |
759 | settabV(L, L->top, mt); | 759 | settabV(L, L->top, mt); |
@@ -941,12 +941,12 @@ LUA_API int lua_setmetatable(lua_State *L, int idx) | |||
941 | if (lj_trace_flushall(L)) | 941 | if (lj_trace_flushall(L)) |
942 | lj_err_caller(L, LJ_ERR_NOGCMM); | 942 | lj_err_caller(L, LJ_ERR_NOGCMM); |
943 | if (tvisbool(o)) { | 943 | if (tvisbool(o)) { |
944 | /* NOBARRIER: g->basemt[] is a GC root. */ | 944 | /* NOBARRIER: basemt is a GC root. */ |
945 | setgcref(g->basemt[~LJ_TTRUE], obj2gco(mt)); | 945 | setgcref(basemt_it(g, LJ_TTRUE), obj2gco(mt)); |
946 | setgcref(g->basemt[~LJ_TFALSE], obj2gco(mt)); | 946 | setgcref(basemt_it(g, LJ_TFALSE), obj2gco(mt)); |
947 | } else { | 947 | } else { |
948 | /* NOBARRIER: g->basemt[] is a GC root. */ | 948 | /* NOBARRIER: basemt is a GC root. */ |
949 | setgcref(g->basemt[itypemap(o)], obj2gco(mt)); | 949 | setgcref(basemt_obj(g, o), obj2gco(mt)); |
950 | } | 950 | } |
951 | } | 951 | } |
952 | L->top--; | 952 | L->top--; |
diff --git a/src/lj_asm.c b/src/lj_asm.c index a4d0c606..f26a40a5 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include "lj_gc.h" | 13 | #include "lj_gc.h" |
14 | #include "lj_str.h" | 14 | #include "lj_str.h" |
15 | #include "lj_tab.h" | 15 | #include "lj_tab.h" |
16 | #include "lj_frame.h" | ||
16 | #include "lj_ir.h" | 17 | #include "lj_ir.h" |
17 | #include "lj_jit.h" | 18 | #include "lj_jit.h" |
18 | #include "lj_iropt.h" | 19 | #include "lj_iropt.h" |
@@ -81,6 +82,10 @@ typedef struct ASMState { | |||
81 | 82 | ||
82 | #define IR(ref) (&as->ir[(ref)]) | 83 | #define IR(ref) (&as->ir[(ref)]) |
83 | 84 | ||
85 | #define ASMREF_TMP1 REF_TRUE /* Temp. register. */ | ||
86 | #define ASMREF_TMP2 REF_FALSE /* Temp. register. */ | ||
87 | #define ASMREF_L REF_NIL /* Stores register for L. */ | ||
88 | |||
84 | /* Check for variant to invariant references. */ | 89 | /* Check for variant to invariant references. */ |
85 | #define iscrossref(as, ref) ((ref) < as->sectref) | 90 | #define iscrossref(as, ref) ((ref) < as->sectref) |
86 | 91 | ||
@@ -115,9 +120,11 @@ static LJ_NORET LJ_NOINLINE void asm_mclimit(ASMState *as) | |||
115 | { MCode rex = 0x40 + (((rr)>>1)&4) + (((rb)>>3)&1); \ | 120 | { MCode rex = 0x40 + (((rr)>>1)&4) + (((rb)>>3)&1); \ |
116 | if (rex != 0x40) *--(p) = rex; } | 121 | if (rex != 0x40) *--(p) = rex; } |
117 | #define FORCE_REX 0x200 | 122 | #define FORCE_REX 0x200 |
123 | #define REX_64 (FORCE_REX|0x080000) | ||
118 | #else | 124 | #else |
119 | #define REXRB(p, rr, rb) ((void)0) | 125 | #define REXRB(p, rr, rb) ((void)0) |
120 | #define FORCE_REX 0 | 126 | #define FORCE_REX 0 |
127 | #define REX_64 0 | ||
121 | #endif | 128 | #endif |
122 | 129 | ||
123 | #define emit_i8(as, i) (*--as->mcp = (MCode)(i)) | 130 | #define emit_i8(as, i) (*--as->mcp = (MCode)(i)) |
@@ -144,6 +151,7 @@ static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx, | |||
144 | { | 151 | { |
145 | uint32_t rex = 0x40 + ((rr>>1)&(4+(FORCE_REX>>1)))+((rx>>2)&2)+((rb>>3)&1); | 152 | uint32_t rex = 0x40 + ((rr>>1)&(4+(FORCE_REX>>1)))+((rx>>2)&2)+((rb>>3)&1); |
146 | if (rex != 0x40) { | 153 | if (rex != 0x40) { |
154 | rex |= (rr >> 16); | ||
147 | if (n == -4) { *p = (MCode)rex; rex = (MCode)(xo >> 8); } | 155 | if (n == -4) { *p = (MCode)rex; rex = (MCode)(xo >> 8); } |
148 | *--p = (MCode)rex; | 156 | *--p = (MCode)rex; |
149 | } | 157 | } |
@@ -451,14 +459,6 @@ static void emit_call_(ASMState *as, MCode *target) | |||
451 | 459 | ||
452 | #define emit_call(as, f) emit_call_(as, (MCode *)(void *)(f)) | 460 | #define emit_call(as, f) emit_call_(as, (MCode *)(void *)(f)) |
453 | 461 | ||
454 | /* Argument setup for C calls. Up to 3 args need no stack adjustment. */ | ||
455 | #define emit_setargr(as, narg, r) \ | ||
456 | emit_movtomro(as, (r), RID_ESP, ((narg)-1)*4); | ||
457 | #define emit_setargi(as, narg, imm) \ | ||
458 | emit_movmroi(as, RID_ESP, ((narg)-1)*4, (imm)) | ||
459 | #define emit_setargp(as, narg, ptr) \ | ||
460 | emit_setargi(as, (narg), ptr2addr((ptr))) | ||
461 | |||
462 | /* -- Register allocator debugging ---------------------------------------- */ | 462 | /* -- Register allocator debugging ---------------------------------------- */ |
463 | 463 | ||
464 | /* #define LUAJIT_DEBUG_RA */ | 464 | /* #define LUAJIT_DEBUG_RA */ |
@@ -578,10 +578,6 @@ static void ra_setup(ASMState *as) | |||
578 | memset(as->phireg, 0, sizeof(as->phireg)); | 578 | memset(as->phireg, 0, sizeof(as->phireg)); |
579 | memset(as->cost, 0, sizeof(as->cost)); | 579 | memset(as->cost, 0, sizeof(as->cost)); |
580 | as->cost[RID_ESP] = REGCOST(~0u, 0u); | 580 | as->cost[RID_ESP] = REGCOST(~0u, 0u); |
581 | |||
582 | /* Start slots for spill slot allocation. */ | ||
583 | as->evenspill = (SPS_FIRST+1)&~1; | ||
584 | as->oddspill = (SPS_FIRST&1) ? SPS_FIRST : 0; | ||
585 | } | 581 | } |
586 | 582 | ||
587 | /* Rematerialize constants. */ | 583 | /* Rematerialize constants. */ |
@@ -598,6 +594,9 @@ static Reg ra_rematk(ASMState *as, IRIns *ir) | |||
598 | } else if (ir->o == IR_BASE) { | 594 | } else if (ir->o == IR_BASE) { |
599 | ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */ | 595 | ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */ |
600 | emit_getgl(as, r, jit_base); | 596 | emit_getgl(as, r, jit_base); |
597 | } else if (ir->o == IR_KPRI) { /* REF_NIL stores ASMREF_L register. */ | ||
598 | lua_assert(irt_isnil(ir->t)); | ||
599 | emit_getgl(as, r, jit_L); | ||
601 | } else { | 600 | } else { |
602 | lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || | 601 | lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || |
603 | ir->o == IR_KPTR || ir->o == IR_KNULL); | 602 | ir->o == IR_KPTR || ir->o == IR_KNULL); |
@@ -629,6 +628,18 @@ static int32_t ra_spill(ASMState *as, IRIns *ir) | |||
629 | return sps_scale(slot); | 628 | return sps_scale(slot); |
630 | } | 629 | } |
631 | 630 | ||
631 | /* Release the temporarily allocated register in ASMREF_TMP1/ASMREF_TMP2. */ | ||
632 | static Reg ra_releasetmp(ASMState *as, IRRef ref) | ||
633 | { | ||
634 | IRIns *ir = IR(ref); | ||
635 | Reg r = ir->r; | ||
636 | lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); | ||
637 | ra_free(as, r); | ||
638 | ra_modified(as, r); | ||
639 | ir->r = RID_INIT; | ||
640 | return r; | ||
641 | } | ||
642 | |||
632 | /* Restore a register (marked as free). Rematerialize or force a spill. */ | 643 | /* Restore a register (marked as free). Rematerialize or force a spill. */ |
633 | static Reg ra_restore(ASMState *as, IRRef ref) | 644 | static Reg ra_restore(ASMState *as, IRRef ref) |
634 | { | 645 | { |
@@ -1008,7 +1019,7 @@ static void asm_guardcc(ASMState *as, int cc) | |||
1008 | 1019 | ||
1009 | /* Arch-specific field offsets. */ | 1020 | /* Arch-specific field offsets. */ |
1010 | static const uint8_t field_ofs[IRFL__MAX+1] = { | 1021 | static const uint8_t field_ofs[IRFL__MAX+1] = { |
1011 | #define FLOFS(name, type, field) (uint8_t)offsetof(type, field), | 1022 | #define FLOFS(name, ofs) (uint8_t)(ofs), |
1012 | IRFLDEF(FLOFS) | 1023 | IRFLDEF(FLOFS) |
1013 | #undef FLOFS | 1024 | #undef FLOFS |
1014 | 0 | 1025 | 0 |
@@ -1129,7 +1140,7 @@ static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow) | |||
1129 | { | 1140 | { |
1130 | IRIns *irr; | 1141 | IRIns *irr; |
1131 | lua_assert(ir->o == IR_STRREF); | 1142 | lua_assert(ir->o == IR_STRREF); |
1132 | as->mrm.idx = as->mrm.base = RID_NONE; | 1143 | as->mrm.base = as->mrm.idx = RID_NONE; |
1133 | as->mrm.scale = XM_SCALE1; | 1144 | as->mrm.scale = XM_SCALE1; |
1134 | as->mrm.ofs = sizeof(GCstr); | 1145 | as->mrm.ofs = sizeof(GCstr); |
1135 | if (irref_isk(ir->op1)) { | 1146 | if (irref_isk(ir->op1)) { |
@@ -1158,6 +1169,17 @@ static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow) | |||
1158 | } | 1169 | } |
1159 | } | 1170 | } |
1160 | 1171 | ||
1172 | static void asm_fusexref(ASMState *as, IRIns *ir, RegSet allow) | ||
1173 | { | ||
1174 | if (ir->o == IR_KPTR) { | ||
1175 | as->mrm.ofs = ir->i; | ||
1176 | as->mrm.base = as->mrm.idx = RID_NONE; | ||
1177 | } else { | ||
1178 | lua_assert(ir->o == IR_STRREF); | ||
1179 | asm_fusestrref(as, ir, allow); | ||
1180 | } | ||
1181 | } | ||
1182 | |||
1161 | /* Fuse load into memory operand. */ | 1183 | /* Fuse load into memory operand. */ |
1162 | static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) | 1184 | static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) |
1163 | { | 1185 | { |
@@ -1172,8 +1194,9 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) | |||
1172 | return RID_MRM; | 1194 | return RID_MRM; |
1173 | } | 1195 | } |
1174 | if (ir->o == IR_KNUM) { | 1196 | if (ir->o == IR_KNUM) { |
1197 | RegSet avail = as->freeset & ~as->modset & RSET_FPR; | ||
1175 | lua_assert(allow != RSET_EMPTY); | 1198 | lua_assert(allow != RSET_EMPTY); |
1176 | if (!(as->freeset & ~as->modset & RSET_FPR)) { | 1199 | if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */ |
1177 | as->mrm.ofs = ptr2addr(ir_knum(ir)); | 1200 | as->mrm.ofs = ptr2addr(ir_knum(ir)); |
1178 | as->mrm.base = as->mrm.idx = RID_NONE; | 1201 | as->mrm.base = as->mrm.idx = RID_NONE; |
1179 | return RID_MRM; | 1202 | return RID_MRM; |
@@ -1188,8 +1211,9 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) | |||
1188 | return RID_MRM; | 1211 | return RID_MRM; |
1189 | } | 1212 | } |
1190 | } else if (ir->o == IR_FLOAD) { | 1213 | } else if (ir->o == IR_FLOAD) { |
1191 | /* Generic fusion is only ok for IRT_INT operand (but see asm_comp). */ | 1214 | /* Generic fusion is only ok for 32 bit operand (but see asm_comp). */ |
1192 | if (irt_isint(ir->t) && noconflict(as, ref, IR_FSTORE)) { | 1215 | if ((irt_isint(ir->t) || irt_isaddr(ir->t)) && |
1216 | noconflict(as, ref, IR_FSTORE)) { | ||
1193 | asm_fusefref(as, ir, xallow); | 1217 | asm_fusefref(as, ir, xallow); |
1194 | return RID_MRM; | 1218 | return RID_MRM; |
1195 | } | 1219 | } |
@@ -1199,11 +1223,11 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) | |||
1199 | return RID_MRM; | 1223 | return RID_MRM; |
1200 | } | 1224 | } |
1201 | } else if (ir->o == IR_XLOAD) { | 1225 | } else if (ir->o == IR_XLOAD) { |
1202 | /* Generic fusion is only ok for IRT_INT operand (but see asm_comp). | 1226 | /* Generic fusion is only ok for 32 bit operand (but see asm_comp). |
1203 | ** Fusing unaligned memory operands is ok on x86 (except for SIMD types). | 1227 | ** Fusing unaligned memory operands is ok on x86 (except for SIMD types). |
1204 | */ | 1228 | */ |
1205 | if (irt_isint(ir->t)) { | 1229 | if (irt_isint(ir->t) || irt_isaddr(ir->t)) { |
1206 | asm_fusestrref(as, IR(ir->op1), xallow); | 1230 | asm_fusexref(as, IR(ir->op1), xallow); |
1207 | return RID_MRM; | 1231 | return RID_MRM; |
1208 | } | 1232 | } |
1209 | } | 1233 | } |
@@ -1214,6 +1238,137 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) | |||
1214 | return ra_allocref(as, ref, allow); | 1238 | return ra_allocref(as, ref, allow); |
1215 | } | 1239 | } |
1216 | 1240 | ||
1241 | /* -- Calls --------------------------------------------------------------- */ | ||
1242 | |||
1243 | /* Generate a call to a C function. */ | ||
1244 | static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | ||
1245 | { | ||
1246 | RegSet allow = RSET_ALL; | ||
1247 | uint32_t n, nargs = CCI_NARGS(ci); | ||
1248 | int32_t ofs = 0; | ||
1249 | lua_assert(!(nargs > 2 && (ci->flags&CCI_FASTCALL))); /* Avoid stack adj. */ | ||
1250 | emit_call(as, ci->func); | ||
1251 | for (n = 0; n < nargs; n++) { /* Setup args. */ | ||
1252 | #if LJ_64 | ||
1253 | #error "NYI: 64 bit mode call argument setup" | ||
1254 | #endif | ||
1255 | IRIns *ir = IR(args[n]); | ||
1256 | if (irt_isnum(ir->t)) { | ||
1257 | if ((ofs & 4) && irref_isk(args[n])) { | ||
1258 | /* Split stores for unaligned FP consts. */ | ||
1259 | emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo); | ||
1260 | emit_movmroi(as, RID_ESP, ofs+4, (int32_t)ir_knum(ir)->u32.hi); | ||
1261 | } else { | ||
1262 | Reg r; | ||
1263 | if ((allow & RSET_FPR) == RSET_EMPTY) | ||
1264 | lj_trace_err(as->J, LJ_TRERR_NYICOAL); | ||
1265 | r = ra_alloc1(as, args[n], allow & RSET_FPR); | ||
1266 | allow &= ~RID2RSET(r); | ||
1267 | emit_rmro(as, XO_MOVSDto, r, RID_ESP, ofs); | ||
1268 | } | ||
1269 | ofs += 8; | ||
1270 | } else { | ||
1271 | if ((ci->flags & CCI_FASTCALL) && n < 2) { | ||
1272 | Reg r = n == 0 ? RID_ECX : RID_EDX; | ||
1273 | if (args[n] < ASMREF_TMP1) { | ||
1274 | emit_loadi(as, r, ir->i); | ||
1275 | } else { | ||
1276 | lua_assert(rset_test(as->freeset, r)); /* Must have been evicted. */ | ||
1277 | allow &= ~RID2RSET(r); | ||
1278 | if (ra_hasreg(ir->r)) | ||
1279 | emit_movrr(as, r, ir->r); | ||
1280 | else | ||
1281 | ra_allocref(as, args[n], RID2RSET(r)); | ||
1282 | } | ||
1283 | } else { | ||
1284 | if (args[n] < ASMREF_TMP1) { | ||
1285 | emit_movmroi(as, RID_ESP, ofs, ir->i); | ||
1286 | } else { | ||
1287 | Reg r; | ||
1288 | if ((allow & RSET_GPR) == RSET_EMPTY) | ||
1289 | lj_trace_err(as->J, LJ_TRERR_NYICOAL); | ||
1290 | r = ra_alloc1(as, args[n], allow & RSET_GPR); | ||
1291 | allow &= ~RID2RSET(r); | ||
1292 | emit_movtomro(as, r, RID_ESP, ofs); | ||
1293 | } | ||
1294 | ofs += 4; | ||
1295 | } | ||
1296 | } | ||
1297 | } | ||
1298 | } | ||
1299 | |||
1300 | /* Setup result reg/sp for call. Evict scratch regs. */ | ||
1301 | static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | ||
1302 | { | ||
1303 | RegSet drop = RSET_SCRATCH; | ||
1304 | if ((ci->flags & CCI_NOFPRCLOBBER)) | ||
1305 | drop &= ~RSET_FPR; | ||
1306 | if (ra_hasreg(ir->r)) | ||
1307 | rset_clear(drop, ir->r); /* Dest reg handled below. */ | ||
1308 | ra_evictset(as, drop); /* Evictions must be performed first. */ | ||
1309 | if (ra_used(ir)) { | ||
1310 | if (irt_isnum(ir->t)) { | ||
1311 | int32_t ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */ | ||
1312 | #if LJ_64 | ||
1313 | if ((ci->flags & CCI_CASTU64)) { | ||
1314 | Reg dest = ir->r; | ||
1315 | if (ra_hasreg(dest)) { | ||
1316 | ra_free(as, dest); | ||
1317 | ra_modified(as, dest); | ||
1318 | emit_rr(as, XO_MOVD, dest|REX_64, RID_RET); /* Really MOVQ. */ | ||
1319 | } else { | ||
1320 | emit_movrmro(as, RID_RET, RID_ESP, ofs); | ||
1321 | } | ||
1322 | } else { | ||
1323 | ra_destreg(as, ir, RID_FPRET); | ||
1324 | } | ||
1325 | #else | ||
1326 | /* Number result is in x87 st0 for x86 calling convention. */ | ||
1327 | Reg dest = ir->r; | ||
1328 | if (ra_hasreg(dest)) { | ||
1329 | ra_free(as, dest); | ||
1330 | ra_modified(as, dest); | ||
1331 | emit_rmro(as, XMM_MOVRM(as), dest, RID_ESP, ofs); | ||
1332 | } | ||
1333 | if ((ci->flags & CCI_CASTU64)) { | ||
1334 | emit_movtomro(as, RID_RET, RID_ESP, ofs); | ||
1335 | emit_movtomro(as, RID_RETHI, RID_ESP, ofs+4); | ||
1336 | } else { | ||
1337 | emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs); | ||
1338 | } | ||
1339 | #endif | ||
1340 | } else { | ||
1341 | lua_assert(!irt_ispri(ir->t)); | ||
1342 | ra_destreg(as, ir, RID_RET); | ||
1343 | } | ||
1344 | } | ||
1345 | } | ||
1346 | |||
1347 | /* Collect arguments from CALL* and ARG instructions. */ | ||
1348 | static void asm_collectargs(ASMState *as, IRIns *ir, | ||
1349 | const CCallInfo *ci, IRRef *args) | ||
1350 | { | ||
1351 | uint32_t n = CCI_NARGS(ci); | ||
1352 | lua_assert(n <= CCI_NARGS_MAX); | ||
1353 | if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; } | ||
1354 | while (n-- > 1) { | ||
1355 | ir = IR(ir->op1); | ||
1356 | lua_assert(ir->o == IR_CARG); | ||
1357 | args[n] = ir->op2; | ||
1358 | } | ||
1359 | args[0] = ir->op1; | ||
1360 | lua_assert(IR(ir->op1)->o != IR_CARG); | ||
1361 | } | ||
1362 | |||
1363 | static void asm_call(ASMState *as, IRIns *ir) | ||
1364 | { | ||
1365 | IRRef args[CCI_NARGS_MAX]; | ||
1366 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | ||
1367 | asm_collectargs(as, ir, ci, args); | ||
1368 | asm_setupresult(as, ir, ci); | ||
1369 | asm_gencall(as, ci, args); | ||
1370 | } | ||
1371 | |||
1217 | /* -- Type conversions ---------------------------------------------------- */ | 1372 | /* -- Type conversions ---------------------------------------------------- */ |
1218 | 1373 | ||
1219 | static void asm_tonum(ASMState *as, IRIns *ir) | 1374 | static void asm_tonum(ASMState *as, IRIns *ir) |
@@ -1260,48 +1415,41 @@ static void asm_tobit(ASMState *as, IRIns *ir) | |||
1260 | 1415 | ||
1261 | static void asm_strto(ASMState *as, IRIns *ir) | 1416 | static void asm_strto(ASMState *as, IRIns *ir) |
1262 | { | 1417 | { |
1263 | Reg str; | ||
1264 | int32_t ofs; | ||
1265 | RegSet drop = RSET_SCRATCH; | ||
1266 | /* Force a spill slot for the destination register (if any). */ | 1418 | /* Force a spill slot for the destination register (if any). */ |
1419 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_tonum]; | ||
1420 | IRRef args[2]; | ||
1421 | RegSet drop = RSET_SCRATCH; | ||
1267 | if ((drop & RSET_FPR) != RSET_FPR && ra_hasreg(ir->r)) | 1422 | if ((drop & RSET_FPR) != RSET_FPR && ra_hasreg(ir->r)) |
1268 | rset_set(drop, ir->r); /* WIN64 doesn't spill all FPRs. */ | 1423 | rset_set(drop, ir->r); /* WIN64 doesn't spill all FPRs. */ |
1269 | ra_evictset(as, drop); | 1424 | ra_evictset(as, drop); |
1270 | asm_guardcc(as, CC_E); | 1425 | asm_guardcc(as, CC_E); |
1271 | emit_rr(as, XO_TEST, RID_RET, RID_RET); | 1426 | emit_rr(as, XO_TEST, RID_RET, RID_RET); |
1272 | /* int lj_str_numconv(const char *s, TValue *n) */ | 1427 | args[0] = ir->op1; |
1273 | emit_call(as, lj_str_numconv); | 1428 | args[1] = ASMREF_TMP1; |
1274 | ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */ | 1429 | asm_gencall(as, ci, args); |
1275 | if (ofs == 0) { | 1430 | /* Store the result to the spill slot or slots SPS_TEMP1/2. */ |
1276 | emit_setargr(as, 2, RID_ESP); | 1431 | emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1), |
1277 | } else { | 1432 | RID_ESP, sps_scale(ir->s)); |
1278 | emit_setargr(as, 2, RID_RET); | ||
1279 | emit_rmro(as, XO_LEA, RID_RET, RID_ESP, ofs); | ||
1280 | } | ||
1281 | emit_setargr(as, 1, RID_RET); | ||
1282 | str = ra_alloc1(as, ir->op1, RSET_GPR); | ||
1283 | emit_rmro(as, XO_LEA, RID_RET, str, sizeof(GCstr)); | ||
1284 | } | 1433 | } |
1285 | 1434 | ||
1286 | static void asm_tostr(ASMState *as, IRIns *ir) | 1435 | static void asm_tostr(ASMState *as, IRIns *ir) |
1287 | { | 1436 | { |
1288 | IRIns *irl = IR(ir->op1); | 1437 | IRIns *irl = IR(ir->op1); |
1289 | ra_destreg(as, ir, RID_RET); | 1438 | IRRef args[2]; |
1290 | ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); | 1439 | args[0] = ASMREF_L; |
1291 | as->gcsteps++; | 1440 | as->gcsteps++; |
1292 | if (irt_isnum(irl->t)) { | 1441 | if (irt_isnum(irl->t)) { |
1293 | /* GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np) */ | 1442 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum]; |
1294 | emit_call(as, lj_str_fromnum); | 1443 | args[1] = ASMREF_TMP1; |
1295 | emit_setargr(as, 1, RID_RET); | 1444 | asm_setupresult(as, ir, ci); |
1296 | emit_getgl(as, RID_RET, jit_L); | 1445 | asm_gencall(as, ci, args); |
1297 | emit_setargr(as, 2, RID_RET); | 1446 | emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1), |
1298 | emit_rmro(as, XO_LEA, RID_RET, RID_ESP, ra_spill(as, irl)); | 1447 | RID_ESP, ra_spill(as, irl)); |
1299 | } else { | 1448 | } else { |
1300 | /* GCstr *lj_str_fromint(lua_State *L, int32_t k) */ | 1449 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint]; |
1301 | emit_call(as, lj_str_fromint); | 1450 | args[1] = ir->op1; |
1302 | emit_setargr(as, 1, RID_RET); | 1451 | asm_setupresult(as, ir, ci); |
1303 | emit_getgl(as, RID_RET, jit_L); | 1452 | asm_gencall(as, ci, args); |
1304 | emit_setargr(as, 2, ra_alloc1(as, ir->op1, RSET_GPR)); | ||
1305 | } | 1453 | } |
1306 | } | 1454 | } |
1307 | 1455 | ||
@@ -1330,7 +1478,7 @@ static uint32_t ir_khash(IRIns *ir) | |||
1330 | lua_assert(!irt_isnil(ir->t)); | 1478 | lua_assert(!irt_isnil(ir->t)); |
1331 | return irt_type(ir->t)-IRT_FALSE; | 1479 | return irt_type(ir->t)-IRT_FALSE; |
1332 | } else { | 1480 | } else { |
1333 | lua_assert(irt_isaddr(ir->t)); | 1481 | lua_assert(irt_isgcv(ir->t)); |
1334 | lo = u32ptr(ir_kgc(ir)); | 1482 | lo = u32ptr(ir_kgc(ir)); |
1335 | hi = lo - 0x04c11db7; | 1483 | hi = lo - 0x04c11db7; |
1336 | } | 1484 | } |
@@ -1517,33 +1665,27 @@ static void asm_hrefk(ASMState *as, IRIns *ir) | |||
1517 | 1665 | ||
1518 | static void asm_newref(ASMState *as, IRIns *ir) | 1666 | static void asm_newref(ASMState *as, IRIns *ir) |
1519 | { | 1667 | { |
1520 | IRRef keyref = ir->op2; | 1668 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; |
1521 | IRIns *irkey = IR(keyref); | 1669 | IRRef args[3]; |
1522 | RegSet allow = RSET_GPR; | 1670 | IRIns *irkey; |
1523 | Reg tab, tmp; | 1671 | Reg tmp; |
1524 | ra_destreg(as, ir, RID_RET); | 1672 | args[0] = ASMREF_L; |
1525 | ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); | 1673 | args[1] = ir->op1; |
1526 | tab = ra_alloc1(as, ir->op1, allow); | 1674 | args[2] = ASMREF_TMP1; |
1527 | tmp = ra_scratch(as, rset_clear(allow, tab)); | 1675 | asm_setupresult(as, ir, ci); |
1528 | /* TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key) */ | 1676 | asm_gencall(as, ci, args); |
1529 | emit_call(as, lj_tab_newkey); | 1677 | tmp = ra_releasetmp(as, ASMREF_TMP1); |
1530 | emit_setargr(as, 1, tmp); | 1678 | irkey = IR(ir->op2); |
1531 | emit_setargr(as, 2, tab); | ||
1532 | emit_getgl(as, tmp, jit_L); | ||
1533 | if (irt_isnum(irkey->t)) { | 1679 | if (irt_isnum(irkey->t)) { |
1534 | /* For numbers use the constant itself or a spill slot as a TValue. */ | 1680 | /* For numbers use the constant itself or a spill slot as a TValue. */ |
1535 | if (irref_isk(keyref)) { | 1681 | if (irref_isk(ir->op2)) |
1536 | emit_setargp(as, 3, ir_knum(irkey)); | 1682 | emit_loada(as, tmp, ir_knum(irkey)); |
1537 | } else { | 1683 | else |
1538 | emit_setargr(as, 3, tmp); | ||
1539 | emit_rmro(as, XO_LEA, tmp, RID_ESP, ra_spill(as, irkey)); | 1684 | emit_rmro(as, XO_LEA, tmp, RID_ESP, ra_spill(as, irkey)); |
1540 | } | ||
1541 | } else { | 1685 | } else { |
1542 | /* Otherwise use g->tmptv to hold the TValue. */ | 1686 | /* Otherwise use g->tmptv to hold the TValue. */ |
1543 | lua_assert(irt_ispri(irkey->t) || irt_isaddr(irkey->t)); | 1687 | if (!irref_isk(ir->op2)) { |
1544 | emit_setargr(as, 3, tmp); | 1688 | Reg src = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, tmp)); |
1545 | if (!irref_isk(keyref)) { | ||
1546 | Reg src = ra_alloc1(as, keyref, rset_exclude(allow, tmp)); | ||
1547 | emit_movtomro(as, src, tmp, 0); | 1689 | emit_movtomro(as, src, tmp, 0); |
1548 | } else if (!irt_ispri(irkey->t)) { | 1690 | } else if (!irt_ispri(irkey->t)) { |
1549 | emit_movmroi(as, tmp, 0, irkey->i); | 1691 | emit_movmroi(as, tmp, 0, irkey->i); |
@@ -1600,11 +1742,15 @@ static void asm_strref(ASMState *as, IRIns *ir) | |||
1600 | 1742 | ||
1601 | /* -- Loads and stores ---------------------------------------------------- */ | 1743 | /* -- Loads and stores ---------------------------------------------------- */ |
1602 | 1744 | ||
1603 | static void asm_fload(ASMState *as, IRIns *ir) | 1745 | static void asm_fxload(ASMState *as, IRIns *ir) |
1604 | { | 1746 | { |
1605 | Reg dest = ra_dest(as, ir, RSET_GPR); | 1747 | Reg dest = ra_dest(as, ir, RSET_GPR); |
1606 | x86Op xo; | 1748 | x86Op xo; |
1607 | asm_fusefref(as, ir, RSET_GPR); | 1749 | if (ir->o == IR_FLOAD) |
1750 | asm_fusefref(as, ir, RSET_GPR); | ||
1751 | else | ||
1752 | asm_fusexref(as, IR(ir->op1), RSET_GPR); | ||
1753 | /* ir->op2 is ignored -- unaligned loads are ok on x86. */ | ||
1608 | switch (irt_type(ir->t)) { | 1754 | switch (irt_type(ir->t)) { |
1609 | case IRT_I8: xo = XO_MOVSXb; break; | 1755 | case IRT_I8: xo = XO_MOVSXb; break; |
1610 | case IRT_U8: xo = XO_MOVZXb; break; | 1756 | case IRT_U8: xo = XO_MOVZXb; break; |
@@ -1731,96 +1877,44 @@ static void asm_sload(ASMState *as, IRIns *ir) | |||
1731 | } | 1877 | } |
1732 | } | 1878 | } |
1733 | 1879 | ||
1734 | static void asm_xload(ASMState *as, IRIns *ir) | 1880 | /* -- Allocations --------------------------------------------------------- */ |
1735 | { | ||
1736 | Reg dest = ra_dest(as, ir, RSET_GPR); | ||
1737 | x86Op xo; | ||
1738 | asm_fusestrref(as, IR(ir->op1), RSET_GPR); /* For now only support STRREF. */ | ||
1739 | /* ir->op2 is ignored -- unaligned loads are ok on x86. */ | ||
1740 | switch (irt_type(ir->t)) { | ||
1741 | case IRT_I8: xo = XO_MOVSXb; break; | ||
1742 | case IRT_U8: xo = XO_MOVZXb; break; | ||
1743 | case IRT_I16: xo = XO_MOVSXw; break; | ||
1744 | case IRT_U16: xo = XO_MOVZXw; break; | ||
1745 | default: lua_assert(irt_isint(ir->t)); xo = XO_MOV; break; | ||
1746 | } | ||
1747 | emit_mrm(as, xo, dest, RID_MRM); | ||
1748 | } | ||
1749 | |||
1750 | /* -- String ops ---------------------------------------------------------- */ | ||
1751 | 1881 | ||
1752 | static void asm_snew(ASMState *as, IRIns *ir) | 1882 | static void asm_snew(ASMState *as, IRIns *ir) |
1753 | { | 1883 | { |
1754 | RegSet allow = RSET_GPR; | 1884 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_new]; |
1755 | Reg left, right; | 1885 | IRRef args[3]; |
1756 | IRIns *irl; | 1886 | args[0] = ASMREF_L; |
1757 | ra_destreg(as, ir, RID_RET); | 1887 | args[1] = ir->op1; |
1758 | ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); | 1888 | args[2] = ir->op2; |
1759 | irl = IR(ir->op1); | ||
1760 | left = irl->r; | ||
1761 | right = IR(ir->op2)->r; | ||
1762 | if (ra_noreg(left)) { | ||
1763 | lua_assert(irl->o == IR_STRREF); | ||
1764 | /* Get register only for non-const STRREF. */ | ||
1765 | if (!(irref_isk(irl->op1) && irref_isk(irl->op2))) { | ||
1766 | if (ra_hasreg(right)) rset_clear(allow, right); | ||
1767 | left = ra_allocref(as, ir->op1, allow); | ||
1768 | } | ||
1769 | } | ||
1770 | if (ra_noreg(right) && !irref_isk(ir->op2)) { | ||
1771 | if (ra_hasreg(left)) rset_clear(allow, left); | ||
1772 | right = ra_allocref(as, ir->op2, allow); | ||
1773 | } | ||
1774 | /* GCstr *lj_str_new(lua_State *L, const char *str, size_t len) */ | ||
1775 | emit_call(as, lj_str_new); | ||
1776 | emit_setargr(as, 1, RID_RET); | ||
1777 | emit_getgl(as, RID_RET, jit_L); | ||
1778 | if (ra_noreg(left)) /* Use immediate for const STRREF. */ | ||
1779 | emit_setargi(as, 2, IR(irl->op1)->i + IR(irl->op2)->i + | ||
1780 | (int32_t)sizeof(GCstr)); | ||
1781 | else | ||
1782 | emit_setargr(as, 2, left); | ||
1783 | if (ra_noreg(right)) | ||
1784 | emit_setargi(as, 3, IR(ir->op2)->i); | ||
1785 | else | ||
1786 | emit_setargr(as, 3, right); | ||
1787 | as->gcsteps++; | 1889 | as->gcsteps++; |
1890 | asm_setupresult(as, ir, ci); | ||
1891 | asm_gencall(as, ci, args); | ||
1788 | } | 1892 | } |
1789 | 1893 | ||
1790 | /* -- Table ops ----------------------------------------------------------- */ | ||
1791 | |||
1792 | static void asm_tnew(ASMState *as, IRIns *ir) | 1894 | static void asm_tnew(ASMState *as, IRIns *ir) |
1793 | { | 1895 | { |
1794 | ra_destreg(as, ir, RID_RET); | 1896 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_new1]; |
1795 | ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); | 1897 | IRRef args[2]; |
1796 | /* GCtab *lj_tab_new(lua_State *L, int32_t asize, uint32_t hbits) */ | 1898 | args[0] = ASMREF_L; |
1797 | emit_call(as, lj_tab_new); | 1899 | args[1] = ASMREF_TMP1; |
1798 | emit_setargr(as, 1, RID_RET); | ||
1799 | emit_setargi(as, 2, ir->op1); | ||
1800 | emit_setargi(as, 3, ir->op2); | ||
1801 | emit_getgl(as, RID_RET, jit_L); | ||
1802 | as->gcsteps++; | 1900 | as->gcsteps++; |
1901 | asm_setupresult(as, ir, ci); | ||
1902 | asm_gencall(as, ci, args); | ||
1903 | emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1 | (ir->op2 << 24)); | ||
1803 | } | 1904 | } |
1804 | 1905 | ||
1805 | static void asm_tdup(ASMState *as, IRIns *ir) | 1906 | static void asm_tdup(ASMState *as, IRIns *ir) |
1806 | { | 1907 | { |
1807 | ra_destreg(as, ir, RID_RET); | 1908 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_dup]; |
1808 | ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); | 1909 | IRRef args[2]; |
1809 | /* GCtab *lj_tab_dup(lua_State *L, const GCtab *kt) */ | 1910 | args[0] = ASMREF_L; |
1810 | emit_call(as, lj_tab_dup); | 1911 | args[1] = ir->op1; |
1811 | emit_setargr(as, 1, RID_RET); | ||
1812 | emit_setargp(as, 2, ir_kgc(IR(ir->op1))); | ||
1813 | emit_getgl(as, RID_RET, jit_L); | ||
1814 | as->gcsteps++; | 1912 | as->gcsteps++; |
1913 | asm_setupresult(as, ir, ci); | ||
1914 | asm_gencall(as, ci, args); | ||
1815 | } | 1915 | } |
1816 | 1916 | ||
1817 | static void asm_tlen(ASMState *as, IRIns *ir) | 1917 | /* -- Write barriers ------------------------------------------------------ */ |
1818 | { | ||
1819 | ra_destreg(as, ir, RID_RET); | ||
1820 | ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); | ||
1821 | emit_call(as, lj_tab_len); /* MSize lj_tab_len(GCtab *t) */ | ||
1822 | emit_setargr(as, 1, ra_alloc1(as, ir->op1, RSET_GPR)); | ||
1823 | } | ||
1824 | 1918 | ||
1825 | static void asm_tbar(ASMState *as, IRIns *ir) | 1919 | static void asm_tbar(ASMState *as, IRIns *ir) |
1826 | { | 1920 | { |
@@ -1839,51 +1933,31 @@ static void asm_tbar(ASMState *as, IRIns *ir) | |||
1839 | 1933 | ||
1840 | static void asm_obar(ASMState *as, IRIns *ir) | 1934 | static void asm_obar(ASMState *as, IRIns *ir) |
1841 | { | 1935 | { |
1842 | RegSet allow = RSET_GPR; | 1936 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; |
1843 | Reg obj, val; | 1937 | IRRef args[2]; |
1844 | GCobj *valp; | ||
1845 | MCLabel l_end; | 1938 | MCLabel l_end; |
1846 | int32_t ofs; | 1939 | Reg obj; |
1847 | ra_evictset(as, RSET_SCRATCH); | ||
1848 | if (irref_isk(ir->op2)) { | ||
1849 | valp = ir_kgc(IR(ir->op2)); | ||
1850 | val = RID_NONE; | ||
1851 | } else { | ||
1852 | valp = NULL; | ||
1853 | val = ra_alloc1(as, ir->op2, allow); | ||
1854 | rset_clear(allow, val); | ||
1855 | } | ||
1856 | obj = ra_alloc1(as, ir->op1, allow); | ||
1857 | l_end = emit_label(as); | ||
1858 | /* No need for other object barriers (yet). */ | 1940 | /* No need for other object barriers (yet). */ |
1859 | lua_assert(IR(ir->op1)->o == IR_UREFC); | 1941 | lua_assert(IR(ir->op1)->o == IR_UREFC); |
1860 | ofs = -(int32_t)offsetof(GCupval, tv); | 1942 | l_end = emit_label(as); |
1861 | /* void lj_gc_barrieruv(global_State *g, GCobj *o, GCobj *v) */ | 1943 | args[0] = ASMREF_TMP1; |
1862 | emit_call(as, lj_gc_barrieruv); | 1944 | args[1] = ir->op1; |
1863 | if (ofs == 0) { | 1945 | asm_gencall(as, ci, args); |
1864 | emit_setargr(as, 2, obj); | 1946 | emit_loada(as, ra_releasetmp(as, ASMREF_TMP1), J2G(as->J)); |
1865 | } else if (rset_test(RSET_SCRATCH, obj) && !(as->flags & JIT_F_LEA_AGU)) { | 1947 | obj = IR(ir->op1)->r; |
1866 | emit_setargr(as, 2, obj); | ||
1867 | emit_gri(as, XG_ARITHi(XOg_ADD), obj, ofs); | ||
1868 | } else { | ||
1869 | emit_setargr(as, 2, RID_RET); | ||
1870 | emit_rmro(as, XO_LEA, RID_RET, obj, ofs); | ||
1871 | } | ||
1872 | emit_setargp(as, 1, J2G(as->J)); | ||
1873 | if (valp) | ||
1874 | emit_setargp(as, 3, valp); | ||
1875 | else | ||
1876 | emit_setargr(as, 3, val); | ||
1877 | emit_sjcc(as, CC_Z, l_end); | 1948 | emit_sjcc(as, CC_Z, l_end); |
1878 | emit_i8(as, LJ_GC_WHITES); | 1949 | emit_i8(as, LJ_GC_WHITES); |
1879 | if (valp) | 1950 | if (irref_isk(ir->op2)) { |
1880 | emit_rma(as, XO_GROUP3b, XOg_TEST, &valp->gch.marked); | 1951 | GCobj *vp = ir_kgc(IR(ir->op2)); |
1881 | else | 1952 | emit_rma(as, XO_GROUP3b, XOg_TEST, &vp->gch.marked); |
1953 | } else { | ||
1954 | Reg val = ra_alloc1(as, ir->op2, rset_exclude(RSET_SCRATCH&RSET_GPR, obj)); | ||
1882 | emit_rmro(as, XO_GROUP3b, XOg_TEST, val, (int32_t)offsetof(GChead, marked)); | 1955 | emit_rmro(as, XO_GROUP3b, XOg_TEST, val, (int32_t)offsetof(GChead, marked)); |
1956 | } | ||
1883 | emit_sjcc(as, CC_Z, l_end); | 1957 | emit_sjcc(as, CC_Z, l_end); |
1884 | emit_i8(as, LJ_GC_BLACK); | 1958 | emit_i8(as, LJ_GC_BLACK); |
1885 | emit_rmro(as, XO_GROUP3b, XOg_TEST, obj, | 1959 | emit_rmro(as, XO_GROUP3b, XOg_TEST, obj, |
1886 | ofs + (int32_t)offsetof(GChead, marked)); | 1960 | (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)); |
1887 | } | 1961 | } |
1888 | 1962 | ||
1889 | /* -- FP/int arithmetic and logic operations ------------------------------ */ | 1963 | /* -- FP/int arithmetic and logic operations ------------------------------ */ |
@@ -2260,10 +2334,10 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) | |||
2260 | } | 2334 | } |
2261 | } | 2335 | } |
2262 | emit_mrm(as, XO_UCOMISD, left, right); | 2336 | emit_mrm(as, XO_UCOMISD, left, right); |
2263 | } else if (!(irt_isstr(ir->t) && (cc & 0xe) != CC_E)) { | 2337 | } else { |
2264 | IRRef lref = ir->op1, rref = ir->op2; | 2338 | IRRef lref = ir->op1, rref = ir->op2; |
2265 | IROp leftop = (IROp)(IR(lref)->o); | 2339 | IROp leftop = (IROp)(IR(lref)->o); |
2266 | lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t)); | 2340 | lua_assert(irt_isint(ir->t) || (irt_isaddr(ir->t) && (cc & 0xe) == CC_E)); |
2267 | /* Swap constants (only for ABC) and fusable loads to the right. */ | 2341 | /* Swap constants (only for ABC) and fusable loads to the right. */ |
2268 | if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) { | 2342 | if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) { |
2269 | if ((cc & 0xc) == 0xc) cc ^= 3; /* L <-> G, LE <-> GE */ | 2343 | if ((cc & 0xc) == 0xc) cc ^= 3; /* L <-> G, LE <-> GE */ |
@@ -2294,11 +2368,15 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) | |||
2294 | } else { | 2368 | } else { |
2295 | Reg left; | 2369 | Reg left; |
2296 | if (opisfusableload((IROp)irl->o) && | 2370 | if (opisfusableload((IROp)irl->o) && |
2297 | ((irt_isi8(irl->t) && checki8(imm)) || | 2371 | ((irt_isu8(irl->t) && checku8(imm)) || |
2298 | (irt_isu8(irl->t) && checku8(imm)))) { | 2372 | ((irt_isi8(irl->t) || irt_isi16(irl->t)) && checki8(imm)) || |
2299 | /* Only the IRT_INT case is fused by asm_fuseload. The IRT_I8/IRT_U8 | 2373 | (irt_isu16(irl->t) && checku16(imm) && checki8((int16_t)imm)))) { |
2300 | ** loads are handled here. The IRT_I16/IRT_U16 loads should never be | 2374 | /* Only the IRT_INT case is fused by asm_fuseload. |
2301 | ** fused, since cmp word [mem], imm16 has a length-changing prefix. | 2375 | ** The IRT_I8/IRT_U8 loads and some IRT_I16/IRT_U16 loads |
2376 | ** are handled here. | ||
2377 | ** Note that cmp word [mem], imm16 should not be generated, | ||
2378 | ** since it has a length-changing prefix. Compares of a word | ||
2379 | ** against a sign-extended imm8 are ok, however. | ||
2302 | */ | 2380 | */ |
2303 | IRType1 origt = irl->t; /* Temporarily flip types. */ | 2381 | IRType1 origt = irl->t; /* Temporarily flip types. */ |
2304 | irl->t.irt = (irl->t.irt & ~IRT_TYPE) | IRT_INT; | 2382 | irl->t.irt = (irl->t.irt & ~IRT_TYPE) | IRT_INT; |
@@ -2307,7 +2385,8 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) | |||
2307 | if (left == RID_MRM) { /* Fusion succeeded? */ | 2385 | if (left == RID_MRM) { /* Fusion succeeded? */ |
2308 | asm_guardcc(as, cc); | 2386 | asm_guardcc(as, cc); |
2309 | emit_i8(as, imm); | 2387 | emit_i8(as, imm); |
2310 | emit_mrm(as, XO_ARITHib, XOg_CMP, RID_MRM); | 2388 | emit_mrm(as, (irt_isi8(origt) || irt_isu8(origt)) ? |
2389 | XO_ARITHib : XO_ARITHiw8, XOg_CMP, RID_MRM); | ||
2311 | return; | 2390 | return; |
2312 | } /* Otherwise handle register case as usual. */ | 2391 | } /* Otherwise handle register case as usual. */ |
2313 | } else { | 2392 | } else { |
@@ -2337,26 +2416,6 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) | |||
2337 | asm_guardcc(as, cc); | 2416 | asm_guardcc(as, cc); |
2338 | emit_mrm(as, XO_CMP, left, right); | 2417 | emit_mrm(as, XO_CMP, left, right); |
2339 | } | 2418 | } |
2340 | } else { /* Handle ordered string compares. */ | ||
2341 | RegSet allow = RSET_GPR; | ||
2342 | /* This assumes lj_str_cmp never uses any SSE registers. */ | ||
2343 | ra_evictset(as, (RSET_SCRATCH & RSET_GPR)); | ||
2344 | asm_guardcc(as, cc); | ||
2345 | emit_rr(as, XO_TEST, RID_RET, RID_RET); | ||
2346 | emit_call(as, lj_str_cmp); /* int32_t lj_str_cmp(GCstr *a, GCstr *b) */ | ||
2347 | if (irref_isk(ir->op1)) { | ||
2348 | emit_setargi(as, 1, IR(ir->op1)->i); | ||
2349 | } else { | ||
2350 | Reg left = ra_alloc1(as, ir->op1, allow); | ||
2351 | rset_clear(allow, left); | ||
2352 | emit_setargr(as, 1, left); | ||
2353 | } | ||
2354 | if (irref_isk(ir->op2)) { | ||
2355 | emit_setargi(as, 2, IR(ir->op2)->i); | ||
2356 | } else { | ||
2357 | Reg right = ra_alloc1(as, ir->op2, allow); | ||
2358 | emit_setargr(as, 2, right); | ||
2359 | } | ||
2360 | } | 2419 | } |
2361 | } | 2420 | } |
2362 | 2421 | ||
@@ -2366,8 +2425,14 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc) | |||
2366 | /* -- GC handling --------------------------------------------------------- */ | 2425 | /* -- GC handling --------------------------------------------------------- */ |
2367 | 2426 | ||
2368 | /* Sync all live GC values to Lua stack slots. */ | 2427 | /* Sync all live GC values to Lua stack slots. */ |
2369 | static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base, RegSet allow) | 2428 | static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base) |
2370 | { | 2429 | { |
2430 | /* Some care must be taken when allocating registers here, since this is | ||
2431 | ** not part of the fast path. All scratch registers are evicted in the | ||
2432 | ** fast path, so it's easiest to force allocation from scratch registers | ||
2433 | ** only. This avoids register allocation state unification. | ||
2434 | */ | ||
2435 | RegSet allow = rset_exclude(RSET_SCRATCH & RSET_GPR, base); | ||
2371 | IRRef2 *map = &as->T->snapmap[snap->mapofs]; | 2436 | IRRef2 *map = &as->T->snapmap[snap->mapofs]; |
2372 | BCReg s, nslots = snap->nslots; | 2437 | BCReg s, nslots = snap->nslots; |
2373 | for (s = 0; s < nslots; s++) { | 2438 | for (s = 0; s < nslots; s++) { |
@@ -2392,27 +2457,36 @@ static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base, RegSet allow) | |||
2392 | /* Check GC threshold and do one or more GC steps. */ | 2457 | /* Check GC threshold and do one or more GC steps. */ |
2393 | static void asm_gc_check(ASMState *as, SnapShot *snap) | 2458 | static void asm_gc_check(ASMState *as, SnapShot *snap) |
2394 | { | 2459 | { |
2460 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit]; | ||
2461 | IRRef args[2]; | ||
2395 | MCLabel l_end; | 2462 | MCLabel l_end; |
2396 | const BCIns *pc; | 2463 | Reg base, lstate, tmp; |
2397 | Reg tmp, base; | ||
2398 | RegSet drop = RSET_SCRATCH; | 2464 | RegSet drop = RSET_SCRATCH; |
2399 | /* Must evict BASE because the stack may be reallocated by the GC. */ | 2465 | if (ra_hasreg(IR(REF_BASE)->r)) /* Stack may be reallocated by the GC. */ |
2400 | if (ra_hasreg(IR(REF_BASE)->r)) | 2466 | drop |= RID2RSET(IR(REF_BASE)->r); /* Need to evict BASE, too. */ |
2401 | drop |= RID2RSET(IR(REF_BASE)->r); | ||
2402 | ra_evictset(as, drop); | 2467 | ra_evictset(as, drop); |
2403 | base = ra_alloc1(as, REF_BASE, rset_exclude(RSET_GPR, RID_RET)); | ||
2404 | l_end = emit_label(as); | 2468 | l_end = emit_label(as); |
2405 | /* void lj_gc_step_jit(lua_State *L, const BCIns *pc, MSize steps) */ | 2469 | args[0] = ASMREF_L; |
2406 | emit_call(as, lj_gc_step_jit); | 2470 | args[1] = ASMREF_TMP1; |
2407 | emit_movtomro(as, base, RID_RET, offsetof(lua_State, base)); | 2471 | asm_gencall(as, ci, args); |
2408 | emit_setargr(as, 1, RID_RET); | 2472 | tmp = ra_releasetmp(as, ASMREF_TMP1); |
2409 | emit_setargi(as, 3, (int32_t)as->gcsteps); | 2473 | emit_loadi(as, tmp, (int32_t)as->gcsteps); |
2410 | emit_getgl(as, RID_RET, jit_L); | 2474 | /* We don't know spadj yet, so get the C frame from L->cframe. */ |
2411 | pc = (const BCIns *)(uintptr_t)as->T->snapmap[snap->mapofs+snap->nslots]; | 2475 | emit_movmroi(as, tmp, CFRAME_OFS_PC, |
2412 | emit_setargp(as, 2, pc); | 2476 | (int32_t)as->T->snapmap[snap->mapofs+snap->nslots]); |
2413 | asm_gc_sync(as, snap, base, rset_exclude(RSET_SCRATCH & RSET_GPR, base)); | 2477 | emit_gri(as, XG_ARITHi(XOg_AND), tmp, CFRAME_RAWMASK); |
2414 | if (as->curins == as->loopref) /* BASE gets restored by LOOP anyway. */ | 2478 | lstate = IR(ASMREF_L)->r; |
2415 | ra_restore(as, REF_BASE); /* Better do it inside the slow path. */ | 2479 | emit_movrmro(as, tmp, lstate, offsetof(lua_State, cframe)); |
2480 | /* It's ok if lstate is already in a non-scratch reg. But all allocations | ||
2481 | ** in the non-fast path must use a scratch reg. See comment above. | ||
2482 | */ | ||
2483 | base = ra_alloc1(as, REF_BASE, rset_exclude(RSET_SCRATCH & RSET_GPR, lstate)); | ||
2484 | emit_movtomro(as, base, lstate, offsetof(lua_State, base)); | ||
2485 | asm_gc_sync(as, snap, base); | ||
2486 | /* BASE/L get restored anyway, better do it inside the slow path. */ | ||
2487 | if (as->parent || as->curins == as->loopref) ra_restore(as, REF_BASE); | ||
2488 | if (rset_test(RSET_SCRATCH, lstate) && ra_hasreg(IR(ASMREF_L)->r)) | ||
2489 | ra_restore(as, ASMREF_L); | ||
2416 | /* Jump around GC step if GC total < GC threshold. */ | 2490 | /* Jump around GC step if GC total < GC threshold. */ |
2417 | tmp = ra_scratch(as, RSET_SCRATCH & RSET_GPR); | 2491 | tmp = ra_scratch(as, RSET_SCRATCH & RSET_GPR); |
2418 | emit_sjcc(as, CC_B, l_end); | 2492 | emit_sjcc(as, CC_B, l_end); |
@@ -2666,7 +2740,7 @@ static void asm_head_root(ASMState *as) | |||
2666 | { | 2740 | { |
2667 | int32_t spadj; | 2741 | int32_t spadj; |
2668 | emit_setgli(as, vmstate, (int32_t)as->J->curtrace); | 2742 | emit_setgli(as, vmstate, (int32_t)as->J->curtrace); |
2669 | spadj = sps_adjust(as); | 2743 | spadj = sps_adjust(as->evenspill); |
2670 | as->T->spadjust = (uint16_t)spadj; | 2744 | as->T->spadjust = (uint16_t)spadj; |
2671 | emit_addptr(as, RID_ESP, -spadj); | 2745 | emit_addptr(as, RID_ESP, -spadj); |
2672 | } | 2746 | } |
@@ -2676,11 +2750,13 @@ static void asm_head_base(ASMState *as) | |||
2676 | { | 2750 | { |
2677 | IRIns *ir = IR(REF_BASE); | 2751 | IRIns *ir = IR(REF_BASE); |
2678 | Reg r = ir->r; | 2752 | Reg r = ir->r; |
2679 | lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); | 2753 | lua_assert(!ra_hasspill(ir->s)); |
2680 | ra_free(as, r); | 2754 | if (ra_hasreg(r)) { |
2681 | if (r != RID_BASE) { | 2755 | ra_free(as, r); |
2682 | ra_scratch(as, RID2RSET(RID_BASE)); | 2756 | if (r != RID_BASE) { |
2683 | emit_rr(as, XO_MOV, r, RID_BASE); | 2757 | ra_scratch(as, RID2RSET(RID_BASE)); |
2758 | emit_rr(as, XO_MOV, r, RID_BASE); | ||
2759 | } | ||
2684 | } | 2760 | } |
2685 | } | 2761 | } |
2686 | 2762 | ||
@@ -2749,7 +2825,7 @@ static void asm_head_side(ASMState *as) | |||
2749 | } | 2825 | } |
2750 | 2826 | ||
2751 | /* Calculate stack frame adjustment. */ | 2827 | /* Calculate stack frame adjustment. */ |
2752 | spadj = sps_adjust(as); | 2828 | spadj = sps_adjust(as->evenspill); |
2753 | spdelta = spadj - (int32_t)as->parent->spadjust; | 2829 | spdelta = spadj - (int32_t)as->parent->spadjust; |
2754 | if (spdelta < 0) { /* Don't shrink the stack frame. */ | 2830 | if (spdelta < 0) { /* Don't shrink the stack frame. */ |
2755 | spadj = (int32_t)as->parent->spadjust; | 2831 | spadj = (int32_t)as->parent->spadjust; |
@@ -2877,9 +2953,11 @@ static void asm_tail_sync(ASMState *as) | |||
2877 | GCfunc *fn = ir_kfunc(IR(ir->op2)); | 2953 | GCfunc *fn = ir_kfunc(IR(ir->op2)); |
2878 | if (isluafunc(fn)) { | 2954 | if (isluafunc(fn)) { |
2879 | BCReg fs = s + funcproto(fn)->framesize; | 2955 | BCReg fs = s + funcproto(fn)->framesize; |
2880 | newbase = s; | ||
2881 | if (secondbase == ~(BCReg)0) secondbase = s; | ||
2882 | if (fs > topslot) topslot = fs; | 2956 | if (fs > topslot) topslot = fs; |
2957 | if (s != 0) { | ||
2958 | newbase = s; | ||
2959 | if (secondbase == ~(BCReg)0) secondbase = s; | ||
2960 | } | ||
2883 | } | 2961 | } |
2884 | } | 2962 | } |
2885 | } | 2963 | } |
@@ -3063,20 +3141,18 @@ static void asm_ir(ASMState *as, IRIns *ir) | |||
3063 | 3141 | ||
3064 | /* Loads and stores. */ | 3142 | /* Loads and stores. */ |
3065 | case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: asm_ahuload(as, ir); break; | 3143 | case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: asm_ahuload(as, ir); break; |
3066 | case IR_FLOAD: asm_fload(as, ir); break; | 3144 | case IR_FLOAD: case IR_XLOAD: asm_fxload(as, ir); break; |
3067 | case IR_SLOAD: asm_sload(as, ir); break; | 3145 | case IR_SLOAD: asm_sload(as, ir); break; |
3068 | case IR_XLOAD: asm_xload(as, ir); break; | ||
3069 | 3146 | ||
3070 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; | 3147 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; |
3071 | case IR_FSTORE: asm_fstore(as, ir); break; | 3148 | case IR_FSTORE: asm_fstore(as, ir); break; |
3072 | 3149 | ||
3073 | /* String ops. */ | 3150 | /* Allocations. */ |
3074 | case IR_SNEW: asm_snew(as, ir); break; | 3151 | case IR_SNEW: asm_snew(as, ir); break; |
3075 | |||
3076 | /* Table ops. */ | ||
3077 | case IR_TNEW: asm_tnew(as, ir); break; | 3152 | case IR_TNEW: asm_tnew(as, ir); break; |
3078 | case IR_TDUP: asm_tdup(as, ir); break; | 3153 | case IR_TDUP: asm_tdup(as, ir); break; |
3079 | case IR_TLEN: asm_tlen(as, ir); break; | 3154 | |
3155 | /* Write barriers. */ | ||
3080 | case IR_TBAR: asm_tbar(as, ir); break; | 3156 | case IR_TBAR: asm_tbar(as, ir); break; |
3081 | case IR_OBAR: asm_obar(as, ir); break; | 3157 | case IR_OBAR: asm_obar(as, ir); break; |
3082 | 3158 | ||
@@ -3092,6 +3168,10 @@ static void asm_ir(ASMState *as, IRIns *ir) | |||
3092 | case IR_TOSTR: asm_tostr(as, ir); break; | 3168 | case IR_TOSTR: asm_tostr(as, ir); break; |
3093 | case IR_STRTO: asm_strto(as, ir); break; | 3169 | case IR_STRTO: asm_strto(as, ir); break; |
3094 | 3170 | ||
3171 | /* Calls. */ | ||
3172 | case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break; | ||
3173 | case IR_CARG: break; | ||
3174 | |||
3095 | default: | 3175 | default: |
3096 | setintV(&as->J->errinfo, ir->o); | 3176 | setintV(&as->J->errinfo, ir->o); |
3097 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); | 3177 | lj_trace_err_info(as->J, LJ_TRERR_NYIIR); |
@@ -3123,6 +3203,8 @@ static void asm_setup_regsp(ASMState *as, Trace *T) | |||
3123 | IRRef i, nins; | 3203 | IRRef i, nins; |
3124 | int inloop; | 3204 | int inloop; |
3125 | 3205 | ||
3206 | ra_setup(as); | ||
3207 | |||
3126 | /* Clear reg/sp for constants. */ | 3208 | /* Clear reg/sp for constants. */ |
3127 | for (i = T->nk; i < REF_BIAS; i++) | 3209 | for (i = T->nk; i < REF_BIAS; i++) |
3128 | IR(i)->prev = REGSP_INIT; | 3210 | IR(i)->prev = REGSP_INIT; |
@@ -3144,6 +3226,7 @@ static void asm_setup_regsp(ASMState *as, Trace *T) | |||
3144 | as->curins = nins; | 3226 | as->curins = nins; |
3145 | 3227 | ||
3146 | inloop = 0; | 3228 | inloop = 0; |
3229 | as->evenspill = SPS_FIRST; | ||
3147 | for (i = REF_FIRST; i < nins; i++) { | 3230 | for (i = REF_FIRST; i < nins; i++) { |
3148 | IRIns *ir = IR(i); | 3231 | IRIns *ir = IR(i); |
3149 | switch (ir->o) { | 3232 | switch (ir->o) { |
@@ -3166,8 +3249,23 @@ static void asm_setup_regsp(ASMState *as, Trace *T) | |||
3166 | if (i == as->stopins+1 && ir->op1 == ir->op2) | 3249 | if (i == as->stopins+1 && ir->op1 == ir->op2) |
3167 | as->stopins++; | 3250 | as->stopins++; |
3168 | break; | 3251 | break; |
3252 | case IR_CALLN: case IR_CALLL: case IR_CALLS: { | ||
3253 | const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; | ||
3254 | /* NYI: not fastcall-aware, but doesn't matter (yet). */ | ||
3255 | if (CCI_NARGS(ci) > (uint32_t)as->evenspill) /* Leave room for args. */ | ||
3256 | as->evenspill = (int32_t)CCI_NARGS(ci); | ||
3257 | #if LJ_64 | ||
3258 | ir->prev = REGSP_HINT(irt_isnum(ir->t) ? RID_FPRET : RID_RET); | ||
3259 | #else | ||
3260 | ir->prev = REGSP_HINT(RID_RET); | ||
3261 | #endif | ||
3262 | if (inloop) | ||
3263 | as->modset |= (ci->flags & CCI_NOFPRCLOBBER) ? | ||
3264 | (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH; | ||
3265 | continue; | ||
3266 | } | ||
3169 | /* C calls evict all scratch regs and return results in RID_RET. */ | 3267 | /* C calls evict all scratch regs and return results in RID_RET. */ |
3170 | case IR_SNEW: case IR_TNEW: case IR_TDUP: case IR_TLEN: case IR_TOSTR: | 3268 | case IR_SNEW: case IR_TNEW: case IR_TDUP: case IR_TOSTR: |
3171 | case IR_NEWREF: | 3269 | case IR_NEWREF: |
3172 | ir->prev = REGSP_HINT(RID_RET); | 3270 | ir->prev = REGSP_HINT(RID_RET); |
3173 | if (inloop) | 3271 | if (inloop) |
@@ -3177,11 +3275,6 @@ static void asm_setup_regsp(ASMState *as, Trace *T) | |||
3177 | if (inloop) | 3275 | if (inloop) |
3178 | as->modset = RSET_SCRATCH; | 3276 | as->modset = RSET_SCRATCH; |
3179 | break; | 3277 | break; |
3180 | /* Ordered string compares evict all integer scratch registers. */ | ||
3181 | case IR_LT: case IR_GE: case IR_LE: case IR_GT: | ||
3182 | if (irt_isstr(ir->t) && inloop) | ||
3183 | as->modset |= (RSET_SCRATCH & RSET_GPR); | ||
3184 | break; | ||
3185 | /* Non-constant shift counts need to be in RID_ECX. */ | 3278 | /* Non-constant shift counts need to be in RID_ECX. */ |
3186 | case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: | 3279 | case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: |
3187 | if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) | 3280 | if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) |
@@ -3200,6 +3293,10 @@ static void asm_setup_regsp(ASMState *as, Trace *T) | |||
3200 | } | 3293 | } |
3201 | ir->prev = REGSP_INIT; | 3294 | ir->prev = REGSP_INIT; |
3202 | } | 3295 | } |
3296 | if ((as->evenspill & 1)) | ||
3297 | as->oddspill = as->evenspill++; | ||
3298 | else | ||
3299 | as->oddspill = 0; | ||
3203 | } | 3300 | } |
3204 | 3301 | ||
3205 | /* -- Assembler core ------------------------------------------------------ */ | 3302 | /* -- Assembler core ------------------------------------------------------ */ |
@@ -3263,7 +3360,6 @@ void lj_asm_trace(jit_State *J, Trace *T) | |||
3263 | as->fuseref = (as->flags & JIT_F_OPT_FUSE) ? as->loopref : FUSE_DISABLED; | 3360 | as->fuseref = (as->flags & JIT_F_OPT_FUSE) ? as->loopref : FUSE_DISABLED; |
3264 | 3361 | ||
3265 | /* Setup register allocation. */ | 3362 | /* Setup register allocation. */ |
3266 | ra_setup(as); | ||
3267 | asm_setup_regsp(as, T); | 3363 | asm_setup_regsp(as, T); |
3268 | 3364 | ||
3269 | if (!as->loopref) { | 3365 | if (!as->loopref) { |
diff --git a/src/lj_def.h b/src/lj_def.h index dbfd5bf5..3d6ba417 100644 --- a/src/lj_def.h +++ b/src/lj_def.h | |||
@@ -88,6 +88,7 @@ typedef unsigned __int32 uintptr_t; | |||
88 | #define checki8(x) ((x) == (int32_t)(int8_t)(x)) | 88 | #define checki8(x) ((x) == (int32_t)(int8_t)(x)) |
89 | #define checku8(x) ((x) == (int32_t)(uint8_t)(x)) | 89 | #define checku8(x) ((x) == (int32_t)(uint8_t)(x)) |
90 | #define checki16(x) ((x) == (int32_t)(int16_t)(x)) | 90 | #define checki16(x) ((x) == (int32_t)(int16_t)(x)) |
91 | #define checku16(x) ((x) == (int32_t)(uint16_t)(x)) | ||
91 | 92 | ||
92 | /* Every half-decent C compiler transforms this into a rotate instruction. */ | 93 | /* Every half-decent C compiler transforms this into a rotate instruction. */ |
93 | #define lj_rol(x, n) (((x)<<(n)) | ((x)>>(32-(n)))) | 94 | #define lj_rol(x, n) (((x)<<(n)) | ((x)>>(32-(n)))) |
diff --git a/src/lj_gc.c b/src/lj_gc.c index 0d8a03ec..5c9d2bcb 100644 --- a/src/lj_gc.c +++ b/src/lj_gc.c | |||
@@ -73,13 +73,13 @@ static void gc_mark(global_State *g, GCobj *o) | |||
73 | } | 73 | } |
74 | } | 74 | } |
75 | 75 | ||
76 | /* Mark the base metatables. */ | 76 | /* Mark GC roots. */ |
77 | static void gc_mark_basemt(global_State *g) | 77 | static void gc_mark_gcroot(global_State *g) |
78 | { | 78 | { |
79 | int i; | 79 | ptrdiff_t i; |
80 | for (i = 0; i < BASEMT_MAX; i++) | 80 | for (i = 0; i < GCROOT__MAX; i++) |
81 | if (tabref(g->basemt[i]) != NULL) | 81 | if (gcref(g->gcroot[i]) != NULL) |
82 | gc_markobj(g, tabref(g->basemt[i])); | 82 | gc_markobj(g, gcref(g->gcroot[i])); |
83 | } | 83 | } |
84 | 84 | ||
85 | /* Start a GC cycle and mark the root set. */ | 85 | /* Start a GC cycle and mark the root set. */ |
@@ -91,7 +91,7 @@ static void gc_mark_start(global_State *g) | |||
91 | gc_markobj(g, mainthread(g)); | 91 | gc_markobj(g, mainthread(g)); |
92 | gc_markobj(g, tabref(mainthread(g)->env)); | 92 | gc_markobj(g, tabref(mainthread(g)->env)); |
93 | gc_marktv(g, &g->registrytv); | 93 | gc_marktv(g, &g->registrytv); |
94 | gc_mark_basemt(g); | 94 | gc_mark_gcroot(g); |
95 | g->gc.state = GCSpropagate; | 95 | g->gc.state = GCSpropagate; |
96 | } | 96 | } |
97 | 97 | ||
@@ -541,7 +541,7 @@ static void atomic(global_State *g, lua_State *L) | |||
541 | lua_assert(!iswhite(obj2gco(mainthread(g)))); | 541 | lua_assert(!iswhite(obj2gco(mainthread(g)))); |
542 | gc_markobj(g, L); /* Mark running thread. */ | 542 | gc_markobj(g, L); /* Mark running thread. */ |
543 | gc_mark_curtrace(g); /* Mark current trace. */ | 543 | gc_mark_curtrace(g); /* Mark current trace. */ |
544 | gc_mark_basemt(g); /* Mark base metatables (again). */ | 544 | gc_mark_gcroot(g); /* Mark GC roots (again). */ |
545 | gc_propagate_gray(g); /* Propagate all of the above. */ | 545 | gc_propagate_gray(g); /* Propagate all of the above. */ |
546 | 546 | ||
547 | setgcrefr(g->gc.gray, g->gc.grayagain); /* Empty the 2nd chance list. */ | 547 | setgcrefr(g->gc.gray, g->gc.grayagain); /* Empty the 2nd chance list. */ |
@@ -643,16 +643,15 @@ int lj_gc_step(lua_State *L) | |||
643 | } | 643 | } |
644 | 644 | ||
645 | /* Ditto, but fix the stack top first. */ | 645 | /* Ditto, but fix the stack top first. */ |
646 | void lj_gc_step_fixtop(lua_State *L) | 646 | void LJ_FASTCALL lj_gc_step_fixtop(lua_State *L) |
647 | { | 647 | { |
648 | if (curr_funcisL(L)) L->top = curr_topL(L); | 648 | if (curr_funcisL(L)) L->top = curr_topL(L); |
649 | lj_gc_step(L); | 649 | lj_gc_step(L); |
650 | } | 650 | } |
651 | 651 | ||
652 | /* Perform multiple GC steps. Called from JIT-compiled code. */ | 652 | /* Perform multiple GC steps. Called from JIT-compiled code. */ |
653 | void lj_gc_step_jit(lua_State *L, const BCIns *pc, MSize steps) | 653 | void LJ_FASTCALL lj_gc_step_jit(lua_State *L, MSize steps) |
654 | { | 654 | { |
655 | cframe_pc(cframe_raw(L->cframe)) = pc; | ||
656 | L->top = curr_topL(L); | 655 | L->top = curr_topL(L); |
657 | while (steps-- > 0 && lj_gc_step(L) == 0) | 656 | while (steps-- > 0 && lj_gc_step(L) == 0) |
658 | ; | 657 | ; |
@@ -711,17 +710,16 @@ void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v) | |||
711 | makewhite(g, o); /* Make it white to avoid the following barrier. */ | 710 | makewhite(g, o); /* Make it white to avoid the following barrier. */ |
712 | } | 711 | } |
713 | 712 | ||
714 | /* The reason for duplicating this is that it needs to be visible from ASM. */ | 713 | /* Specialized barrier for closed upvalue. Pass &uv->tv. */ |
715 | void lj_gc_barrieruv(global_State *g, GCobj *o, GCobj *v) | 714 | void LJ_FASTCALL lj_gc_barrieruv(global_State *g, TValue *tv) |
716 | { | 715 | { |
717 | lua_assert(isblack(o) && iswhite(v) && !isdead(g, v) && !isdead(g, o)); | 716 | #define TV2MARKED(x) \ |
718 | lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause); | 717 | (*((uint8_t *)(x) - offsetof(GCupval, tv) + offsetof(GCupval, marked))) |
719 | lua_assert(o->gch.gct == ~LJ_TUPVAL); | ||
720 | /* Preserve invariant during propagation. Otherwise it doesn't matter. */ | ||
721 | if (g->gc.state == GCSpropagate) | 718 | if (g->gc.state == GCSpropagate) |
722 | gc_mark(g, v); /* Move frontier forward. */ | 719 | gc_mark(g, gcV(tv)); |
723 | else | 720 | else |
724 | makewhite(g, o); /* Make it white to avoid the following barrier. */ | 721 | TV2MARKED(tv) = (TV2MARKED(tv) & cast_byte(~LJ_GC_COLORS)) | curwhite(g); |
722 | #undef TV2MARKED | ||
725 | } | 723 | } |
726 | 724 | ||
727 | /* Close upvalue. Also needs a write barrier. */ | 725 | /* Close upvalue. Also needs a write barrier. */ |
diff --git a/src/lj_gc.h b/src/lj_gc.h index 192066d3..0dbb9b82 100644 --- a/src/lj_gc.h +++ b/src/lj_gc.h | |||
@@ -43,8 +43,8 @@ LJ_FUNC size_t lj_gc_separateudata(global_State *g, int all); | |||
43 | LJ_FUNC void lj_gc_finalizeudata(lua_State *L); | 43 | LJ_FUNC void lj_gc_finalizeudata(lua_State *L); |
44 | LJ_FUNC void lj_gc_freeall(global_State *g); | 44 | LJ_FUNC void lj_gc_freeall(global_State *g); |
45 | LJ_FUNCA int lj_gc_step(lua_State *L); | 45 | LJ_FUNCA int lj_gc_step(lua_State *L); |
46 | LJ_FUNCA void lj_gc_step_fixtop(lua_State *L); | 46 | LJ_FUNCA void LJ_FASTCALL lj_gc_step_fixtop(lua_State *L); |
47 | LJ_FUNCA void lj_gc_step_jit(lua_State *L, const BCIns *pc, MSize steps); | 47 | LJ_FUNC void LJ_FASTCALL lj_gc_step_jit(lua_State *L, MSize steps); |
48 | LJ_FUNC void lj_gc_fullgc(lua_State *L); | 48 | LJ_FUNC void lj_gc_fullgc(lua_State *L); |
49 | 49 | ||
50 | /* GC check: drive collector forward if the GC threshold has been reached. */ | 50 | /* GC check: drive collector forward if the GC threshold has been reached. */ |
@@ -58,7 +58,7 @@ LJ_FUNC void lj_gc_fullgc(lua_State *L); | |||
58 | /* Write barriers. */ | 58 | /* Write barriers. */ |
59 | LJ_FUNC void lj_gc_barrierback(global_State *g, GCtab *t); | 59 | LJ_FUNC void lj_gc_barrierback(global_State *g, GCtab *t); |
60 | LJ_FUNC void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v); | 60 | LJ_FUNC void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v); |
61 | LJ_FUNCA void lj_gc_barrieruv(global_State *g, GCobj *o, GCobj *v); | 61 | LJ_FUNCA void LJ_FASTCALL lj_gc_barrieruv(global_State *g, TValue *tv); |
62 | LJ_FUNC void lj_gc_closeuv(global_State *g, GCupval *uv); | 62 | LJ_FUNC void lj_gc_closeuv(global_State *g, GCupval *uv); |
63 | LJ_FUNC void lj_gc_barriertrace(global_State *g, void *T); | 63 | LJ_FUNC void lj_gc_barriertrace(global_State *g, void *T); |
64 | 64 | ||
diff --git a/src/lj_ir.c b/src/lj_ir.c index 1efb12f0..cf0b6b55 100644 --- a/src/lj_ir.c +++ b/src/lj_ir.c | |||
@@ -6,16 +6,22 @@ | |||
6 | #define lj_ir_c | 6 | #define lj_ir_c |
7 | #define LUA_CORE | 7 | #define LUA_CORE |
8 | 8 | ||
9 | /* For pointers to libc/libm functions. */ | ||
10 | #include <stdio.h> | ||
11 | #include <math.h> | ||
12 | |||
9 | #include "lj_obj.h" | 13 | #include "lj_obj.h" |
10 | 14 | ||
11 | #if LJ_HASJIT | 15 | #if LJ_HASJIT |
12 | 16 | ||
13 | #include "lj_gc.h" | 17 | #include "lj_gc.h" |
14 | #include "lj_str.h" | 18 | #include "lj_str.h" |
19 | #include "lj_tab.h" | ||
15 | #include "lj_ir.h" | 20 | #include "lj_ir.h" |
16 | #include "lj_jit.h" | 21 | #include "lj_jit.h" |
17 | #include "lj_iropt.h" | 22 | #include "lj_iropt.h" |
18 | #include "lj_trace.h" | 23 | #include "lj_trace.h" |
24 | #include "lj_lib.h" | ||
19 | 25 | ||
20 | /* Some local macros to save typing. Undef'd at the end. */ | 26 | /* Some local macros to save typing. Undef'd at the end. */ |
21 | #define IR(ref) (&J->cur.ir[(ref)]) | 27 | #define IR(ref) (&J->cur.ir[(ref)]) |
@@ -32,6 +38,17 @@ IRDEF(IRMODE) | |||
32 | 0 | 38 | 0 |
33 | }; | 39 | }; |
34 | 40 | ||
41 | /* C call info for CALL* instructions. */ | ||
42 | LJ_DATADEF const CCallInfo lj_ir_callinfo[] = { | ||
43 | #define IRCALLCI(name, nargs, kind, type, flags) \ | ||
44 | { (ASMFunction)name, \ | ||
45 | (nargs)|(CCI_CALL_##kind)|(IRT_##type<<CCI_OTSHIFT)|(flags) }, | ||
46 | IRCALLDEF(IRCALLCI) | ||
47 | #undef IRCALLCI | ||
48 | { NULL, 0 } | ||
49 | }; | ||
50 | |||
51 | |||
35 | /* -- IR emitter ---------------------------------------------------------- */ | 52 | /* -- IR emitter ---------------------------------------------------------- */ |
36 | 53 | ||
37 | /* Grow IR buffer at the top. */ | 54 | /* Grow IR buffer at the top. */ |
@@ -92,6 +109,25 @@ TRef LJ_FASTCALL lj_ir_emit(jit_State *J) | |||
92 | return TREF(ref, irt_t((ir->t = fins->t))); | 109 | return TREF(ref, irt_t((ir->t = fins->t))); |
93 | } | 110 | } |
94 | 111 | ||
112 | /* Emit call to a C function. */ | ||
113 | TRef lj_ir_call(jit_State *J, IRCallID id, ...) | ||
114 | { | ||
115 | const CCallInfo *ci = &lj_ir_callinfo[id]; | ||
116 | uint32_t n = CCI_NARGS(ci); | ||
117 | TRef tr = TREF_NIL; | ||
118 | va_list argp; | ||
119 | va_start(argp, id); | ||
120 | if ((ci->flags & CCI_L)) n--; | ||
121 | if (n > 0) | ||
122 | tr = va_arg(argp, IRRef); | ||
123 | while (n-- > 1) | ||
124 | tr = emitir(IRT(IR_CARG, IRT_NIL), tr, va_arg(argp, IRRef)); | ||
125 | va_end(argp); | ||
126 | if (CCI_OP(ci) == IR_CALLS) | ||
127 | J->needsnap = 1; /* Need snapshot after call with side effect. */ | ||
128 | return emitir(CCI_OPTYPE(ci), tr, id); | ||
129 | } | ||
130 | |||
95 | /* -- Interning of constants ---------------------------------------------- */ | 131 | /* -- Interning of constants ---------------------------------------------- */ |
96 | 132 | ||
97 | /* | 133 | /* |
diff --git a/src/lj_ir.h b/src/lj_ir.h index a6973a81..9a7e711d 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h | |||
@@ -8,6 +8,8 @@ | |||
8 | 8 | ||
9 | #include "lj_obj.h" | 9 | #include "lj_obj.h" |
10 | 10 | ||
11 | /* -- IR instructions ----------------------------------------------------- */ | ||
12 | |||
11 | /* IR instruction definition. Order matters, see below. */ | 13 | /* IR instruction definition. Order matters, see below. */ |
12 | #define IRDEF(_) \ | 14 | #define IRDEF(_) \ |
13 | /* Miscellaneous ops. */ \ | 15 | /* Miscellaneous ops. */ \ |
@@ -101,13 +103,12 @@ | |||
101 | _(USTORE, S , ref, ref) \ | 103 | _(USTORE, S , ref, ref) \ |
102 | _(FSTORE, S , ref, ref) \ | 104 | _(FSTORE, S , ref, ref) \ |
103 | \ | 105 | \ |
104 | /* String ops. */ \ | 106 | /* Allocations. */ \ |
105 | _(SNEW, N , ref, ref) \ | 107 | _(SNEW, N , ref, ref) /* CSE is ok, so not marked as A. */ \ |
106 | \ | ||
107 | /* Table ops. */ \ | ||
108 | _(TNEW, A , lit, lit) \ | 108 | _(TNEW, A , lit, lit) \ |
109 | _(TDUP, A , ref, ___) \ | 109 | _(TDUP, A , ref, ___) \ |
110 | _(TLEN, L , ref, ___) \ | 110 | \ |
111 | /* Write barriers. */ \ | ||
111 | _(TBAR, S , ref, ___) \ | 112 | _(TBAR, S , ref, ___) \ |
112 | _(OBAR, S , ref, ref) \ | 113 | _(OBAR, S , ref, ref) \ |
113 | \ | 114 | \ |
@@ -118,6 +119,12 @@ | |||
118 | _(TOSTR, N , ref, ___) \ | 119 | _(TOSTR, N , ref, ___) \ |
119 | _(STRTO, G , ref, ___) \ | 120 | _(STRTO, G , ref, ___) \ |
120 | \ | 121 | \ |
122 | /* Calls. */ \ | ||
123 | _(CALLN, N , ref, lit) \ | ||
124 | _(CALLL, L , ref, lit) \ | ||
125 | _(CALLS, S , ref, lit) \ | ||
126 | _(CARG, N , ref, ref) \ | ||
127 | \ | ||
121 | /* End of list. */ | 128 | /* End of list. */ |
122 | 129 | ||
123 | /* IR opcodes (max. 256). */ | 130 | /* IR opcodes (max. 256). */ |
@@ -144,6 +151,8 @@ LJ_STATIC_ASSERT((int)IR_HLOAD + IRDELTA_L2S == (int)IR_HSTORE); | |||
144 | LJ_STATIC_ASSERT((int)IR_ULOAD + IRDELTA_L2S == (int)IR_USTORE); | 151 | LJ_STATIC_ASSERT((int)IR_ULOAD + IRDELTA_L2S == (int)IR_USTORE); |
145 | LJ_STATIC_ASSERT((int)IR_FLOAD + IRDELTA_L2S == (int)IR_FSTORE); | 152 | LJ_STATIC_ASSERT((int)IR_FLOAD + IRDELTA_L2S == (int)IR_FSTORE); |
146 | 153 | ||
154 | /* -- Named IR literals --------------------------------------------------- */ | ||
155 | |||
147 | /* FPMATH sub-functions. ORDER FPM. */ | 156 | /* FPMATH sub-functions. ORDER FPM. */ |
148 | #define IRFPMDEF(_) \ | 157 | #define IRFPMDEF(_) \ |
149 | _(FLOOR) _(CEIL) _(TRUNC) /* Must be first and in this order. */ \ | 158 | _(FLOOR) _(CEIL) _(TRUNC) /* Must be first and in this order. */ \ |
@@ -158,20 +167,22 @@ IRFPMDEF(FPMENUM) | |||
158 | IRFPM__MAX | 167 | IRFPM__MAX |
159 | } IRFPMathOp; | 168 | } IRFPMathOp; |
160 | 169 | ||
161 | /* FLOAD field IDs. */ | 170 | /* FLOAD fields. */ |
162 | #define IRFLDEF(_) \ | 171 | #define IRFLDEF(_) \ |
163 | _(STR_LEN, GCstr, len) \ | 172 | _(STR_LEN, offsetof(GCstr, len)) \ |
164 | _(FUNC_ENV, GCfunc, l.env) \ | 173 | _(FUNC_ENV, offsetof(GCfunc, l.env)) \ |
165 | _(TAB_META, GCtab, metatable) \ | 174 | _(TAB_META, offsetof(GCtab, metatable)) \ |
166 | _(TAB_ARRAY, GCtab, array) \ | 175 | _(TAB_ARRAY, offsetof(GCtab, array)) \ |
167 | _(TAB_NODE, GCtab, node) \ | 176 | _(TAB_NODE, offsetof(GCtab, node)) \ |
168 | _(TAB_ASIZE, GCtab, asize) \ | 177 | _(TAB_ASIZE, offsetof(GCtab, asize)) \ |
169 | _(TAB_HMASK, GCtab, hmask) \ | 178 | _(TAB_HMASK, offsetof(GCtab, hmask)) \ |
170 | _(TAB_NOMM, GCtab, nomm) \ | 179 | _(TAB_NOMM, offsetof(GCtab, nomm)) \ |
171 | _(UDATA_META, GCudata, metatable) | 180 | _(UDATA_META, offsetof(GCudata, metatable)) \ |
181 | _(UDATA_UDTYPE, offsetof(GCudata, udtype)) \ | ||
182 | _(UDATA_FILE, sizeof(GCudata)) | ||
172 | 183 | ||
173 | typedef enum { | 184 | typedef enum { |
174 | #define FLENUM(name, type, field) IRFL_##name, | 185 | #define FLENUM(name, ofs) IRFL_##name, |
175 | IRFLDEF(FLENUM) | 186 | IRFLDEF(FLENUM) |
176 | #undef FLENUM | 187 | #undef FLENUM |
177 | IRFL__MAX | 188 | IRFL__MAX |
@@ -183,7 +194,8 @@ IRFLDEF(FLENUM) | |||
183 | #define IRSLOAD_PARENT 4 /* Coalesce with parent trace. */ | 194 | #define IRSLOAD_PARENT 4 /* Coalesce with parent trace. */ |
184 | 195 | ||
185 | /* XLOAD mode, stored in op2. */ | 196 | /* XLOAD mode, stored in op2. */ |
186 | #define IRXLOAD_UNALIGNED 1 | 197 | #define IRXLOAD_READONLY 1 /* Load from read-only data. */ |
198 | #define IRXLOAD_UNALIGNED 2 /* Unaligned load. */ | ||
187 | 199 | ||
188 | /* TOINT mode, stored in op2. Ordered by strength of the checks. */ | 200 | /* TOINT mode, stored in op2. Ordered by strength of the checks. */ |
189 | #define IRTOINT_CHECK 0 /* Number checked for integerness. */ | 201 | #define IRTOINT_CHECK 0 /* Number checked for integerness. */ |
@@ -191,6 +203,67 @@ IRFLDEF(FLENUM) | |||
191 | #define IRTOINT_ANY 2 /* Any FP number is ok. */ | 203 | #define IRTOINT_ANY 2 /* Any FP number is ok. */ |
192 | #define IRTOINT_TOBIT 3 /* Cache only: TOBIT conversion. */ | 204 | #define IRTOINT_TOBIT 3 /* Cache only: TOBIT conversion. */ |
193 | 205 | ||
206 | /* C call info for CALL* instructions. */ | ||
207 | typedef struct CCallInfo { | ||
208 | ASMFunction func; /* Function pointer. */ | ||
209 | uint32_t flags; /* Number of arguments and flags. */ | ||
210 | } CCallInfo; | ||
211 | |||
212 | #define CCI_NARGS(ci) ((ci)->flags & 0xff) /* Extract # of args. */ | ||
213 | #define CCI_NARGS_MAX 16 /* Max. # of args. */ | ||
214 | |||
215 | #define CCI_OTSHIFT 16 | ||
216 | #define CCI_OPTYPE(ci) ((ci)->flags >> CCI_OTSHIFT) /* Get op/type. */ | ||
217 | #define CCI_OPSHIFT 24 | ||
218 | #define CCI_OP(ci) ((ci)->flags >> CCI_OPSHIFT) /* Get op. */ | ||
219 | |||
220 | #define CCI_CALL_N (IR_CALLN << CCI_OPSHIFT) | ||
221 | #define CCI_CALL_L (IR_CALLL << CCI_OPSHIFT) | ||
222 | #define CCI_CALL_S (IR_CALLS << CCI_OPSHIFT) | ||
223 | #define CCI_CALL_FN (CCI_CALL_N|CCI_FASTCALL) | ||
224 | #define CCI_CALL_FL (CCI_CALL_L|CCI_FASTCALL) | ||
225 | #define CCI_CALL_FS (CCI_CALL_S|CCI_FASTCALL) | ||
226 | |||
227 | /* C call info flags. */ | ||
228 | #define CCI_L 0x0100 /* Implicit L arg. */ | ||
229 | #define CCI_CASTU64 0x0200 /* Cast u64 result to number. */ | ||
230 | #define CCI_NOFPRCLOBBER 0x0400 /* Does not clobber any FPRs. */ | ||
231 | #define CCI_FASTCALL 0x0800 /* Fastcall convention. */ | ||
232 | |||
233 | /* Function definitions for CALL* instructions. */ | ||
234 | #define IRCALLDEF(_) \ | ||
235 | _(lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \ | ||
236 | _(lj_str_new, 3, S, STR, CCI_L) \ | ||
237 | _(lj_str_tonum, 2, FN, INT, 0) \ | ||
238 | _(lj_str_fromint, 2, FN, STR, CCI_L) \ | ||
239 | _(lj_str_fromnum, 2, FN, STR, CCI_L) \ | ||
240 | _(lj_tab_new1, 2, FS, TAB, CCI_L) \ | ||
241 | _(lj_tab_dup, 2, FS, TAB, CCI_L) \ | ||
242 | _(lj_tab_newkey, 3, S, PTR, CCI_L) \ | ||
243 | _(lj_tab_len, 1, FL, INT, 0) \ | ||
244 | _(lj_gc_step_jit, 2, FS, NIL, CCI_L) \ | ||
245 | _(lj_gc_barrieruv, 2, FS, NIL, 0) \ | ||
246 | _(lj_math_random_step, 1, FS, NUM, CCI_CASTU64|CCI_NOFPRCLOBBER) \ | ||
247 | _(sinh, 1, N, NUM, 0) \ | ||
248 | _(cosh, 1, N, NUM, 0) \ | ||
249 | _(tanh, 1, N, NUM, 0) \ | ||
250 | _(fputc, 2, S, INT, 0) \ | ||
251 | _(fwrite, 4, S, INT, 0) \ | ||
252 | _(fflush, 1, S, INT, 0) \ | ||
253 | \ | ||
254 | /* End of list. */ | ||
255 | |||
256 | typedef enum { | ||
257 | #define IRCALLENUM(name, nargs, kind, type, flags) IRCALL_##name, | ||
258 | IRCALLDEF(IRCALLENUM) | ||
259 | #undef IRCALLENUM | ||
260 | IRCALL__MAX | ||
261 | } IRCallID; | ||
262 | |||
263 | LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1]; | ||
264 | |||
265 | /* -- IR operands --------------------------------------------------------- */ | ||
266 | |||
194 | /* IR operand mode (2 bit). */ | 267 | /* IR operand mode (2 bit). */ |
195 | typedef enum { | 268 | typedef enum { |
196 | IRMref, /* IR reference. */ | 269 | IRMref, /* IR reference. */ |
@@ -227,6 +300,8 @@ typedef enum { | |||
227 | 300 | ||
228 | LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1]; | 301 | LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1]; |
229 | 302 | ||
303 | /* -- IR instruction types ------------------------------------------------ */ | ||
304 | |||
230 | /* IR result type and flags (8 bit). */ | 305 | /* IR result type and flags (8 bit). */ |
231 | typedef enum { | 306 | typedef enum { |
232 | /* Map of itypes to non-negative numbers. ORDER LJ_T */ | 307 | /* Map of itypes to non-negative numbers. ORDER LJ_T */ |
@@ -314,6 +389,8 @@ typedef struct IRType1 { uint8_t irt; } IRType1; | |||
314 | /* Stored combined IR opcode and type. */ | 389 | /* Stored combined IR opcode and type. */ |
315 | typedef uint16_t IROpT; | 390 | typedef uint16_t IROpT; |
316 | 391 | ||
392 | /* -- IR references ------------------------------------------------------- */ | ||
393 | |||
317 | /* IR references. */ | 394 | /* IR references. */ |
318 | typedef uint16_t IRRef1; /* One stored reference. */ | 395 | typedef uint16_t IRRef1; /* One stored reference. */ |
319 | typedef uint32_t IRRef2; /* Two stored references. */ | 396 | typedef uint32_t IRRef2; /* Two stored references. */ |
@@ -382,6 +459,8 @@ typedef uint32_t TRef; | |||
382 | #define TREF_FALSE (TREF_PRI(IRT_FALSE)) | 459 | #define TREF_FALSE (TREF_PRI(IRT_FALSE)) |
383 | #define TREF_TRUE (TREF_PRI(IRT_TRUE)) | 460 | #define TREF_TRUE (TREF_PRI(IRT_TRUE)) |
384 | 461 | ||
462 | /* -- IR format ----------------------------------------------------------- */ | ||
463 | |||
385 | /* IR instruction format (64 bit). | 464 | /* IR instruction format (64 bit). |
386 | ** | 465 | ** |
387 | ** 16 16 8 8 8 8 | 466 | ** 16 16 8 8 8 8 |
@@ -425,5 +504,6 @@ typedef union IRIns { | |||
425 | #define ir_ktab(ir) (gco2tab(ir_kgc((ir)))) | 504 | #define ir_ktab(ir) (gco2tab(ir_kgc((ir)))) |
426 | #define ir_kfunc(ir) (gco2func(ir_kgc((ir)))) | 505 | #define ir_kfunc(ir) (gco2func(ir_kgc((ir)))) |
427 | #define ir_knum(ir) (mref((ir)->ptr, cTValue)) | 506 | #define ir_knum(ir) (mref((ir)->ptr, cTValue)) |
507 | #define ir_kptr(ir) (mref((ir)->ptr, void)) | ||
428 | 508 | ||
429 | #endif | 509 | #endif |
diff --git a/src/lj_iropt.h b/src/lj_iropt.h index 69b0a955..52077ad5 100644 --- a/src/lj_iropt.h +++ b/src/lj_iropt.h | |||
@@ -6,6 +6,8 @@ | |||
6 | #ifndef _LJ_IROPT_H | 6 | #ifndef _LJ_IROPT_H |
7 | #define _LJ_IROPT_H | 7 | #define _LJ_IROPT_H |
8 | 8 | ||
9 | #include <stdarg.h> | ||
10 | |||
9 | #include "lj_obj.h" | 11 | #include "lj_obj.h" |
10 | #include "lj_jit.h" | 12 | #include "lj_jit.h" |
11 | 13 | ||
@@ -13,6 +15,7 @@ | |||
13 | /* IR emitter. */ | 15 | /* IR emitter. */ |
14 | LJ_FUNC void LJ_FASTCALL lj_ir_growtop(jit_State *J); | 16 | LJ_FUNC void LJ_FASTCALL lj_ir_growtop(jit_State *J); |
15 | LJ_FUNC TRef LJ_FASTCALL lj_ir_emit(jit_State *J); | 17 | LJ_FUNC TRef LJ_FASTCALL lj_ir_emit(jit_State *J); |
18 | LJ_FUNC TRef lj_ir_call(jit_State *J, IRCallID id, ...); | ||
16 | 19 | ||
17 | /* Save current IR in J->fold.ins, but do not emit it (yet). */ | 20 | /* Save current IR in J->fold.ins, but do not emit it (yet). */ |
18 | static LJ_AINLINE void lj_ir_set_(jit_State *J, uint16_t ot, IRRef1 a, IRRef1 b) | 21 | static LJ_AINLINE void lj_ir_set_(jit_State *J, uint16_t ot, IRRef1 a, IRRef1 b) |
@@ -83,6 +86,7 @@ LJ_FUNC void lj_ir_rollback(jit_State *J, IRRef ref); | |||
83 | /* Emit IR instructions with on-the-fly optimizations. */ | 86 | /* Emit IR instructions with on-the-fly optimizations. */ |
84 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fold(jit_State *J); | 87 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fold(jit_State *J); |
85 | LJ_FUNC TRef LJ_FASTCALL lj_opt_cse(jit_State *J); | 88 | LJ_FUNC TRef LJ_FASTCALL lj_opt_cse(jit_State *J); |
89 | LJ_FUNC TRef LJ_FASTCALL lj_opt_cselim(jit_State *J, IRRef lim); | ||
86 | 90 | ||
87 | /* Special return values for the fold functions. */ | 91 | /* Special return values for the fold functions. */ |
88 | enum { | 92 | enum { |
@@ -106,7 +110,7 @@ LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_aload(jit_State *J); | |||
106 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hload(jit_State *J); | 110 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hload(jit_State *J); |
107 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J); | 111 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J); |
108 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J); | 112 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J); |
109 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_tlen(jit_State *J); | 113 | LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J); |
110 | LJ_FUNC int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref); | 114 | LJ_FUNC int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref); |
111 | 115 | ||
112 | /* Dead-store elimination. */ | 116 | /* Dead-store elimination. */ |
diff --git a/src/lj_lib.c b/src/lj_lib.c index 683c66d6..d8254093 100644 --- a/src/lj_lib.c +++ b/src/lj_lib.c | |||
@@ -152,7 +152,7 @@ lua_Number lj_lib_checknum(lua_State *L, int narg) | |||
152 | { | 152 | { |
153 | TValue *o = L->base + narg-1; | 153 | TValue *o = L->base + narg-1; |
154 | if (!(o < L->top && | 154 | if (!(o < L->top && |
155 | (tvisnum(o) || (tvisstr(o) && lj_str_numconv(strVdata(o), o))))) | 155 | (tvisnum(o) || (tvisstr(o) && lj_str_tonum(strV(o), o))))) |
156 | lj_err_argt(L, narg, LUA_TNUMBER); | 156 | lj_err_argt(L, narg, LUA_TNUMBER); |
157 | return numV(o); | 157 | return numV(o); |
158 | } | 158 | } |
diff --git a/src/lj_lib.h b/src/lj_lib.h index 59a0f2be..a7a6317e 100644 --- a/src/lj_lib.h +++ b/src/lj_lib.h | |||
@@ -90,4 +90,9 @@ LJ_FUNC void lj_lib_register(lua_State *L, const char *libname, | |||
90 | #define LIBINIT_FFID 0xfe | 90 | #define LIBINIT_FFID 0xfe |
91 | #define LIBINIT_END 0xff | 91 | #define LIBINIT_END 0xff |
92 | 92 | ||
93 | /* Exported library functions. */ | ||
94 | |||
95 | typedef struct RandomState RandomState; | ||
96 | LJ_FUNC uint64_t LJ_FASTCALL lj_math_random_step(RandomState *rs); | ||
97 | |||
93 | #endif | 98 | #endif |
diff --git a/src/lj_meta.c b/src/lj_meta.c index dff01f85..1182d908 100644 --- a/src/lj_meta.c +++ b/src/lj_meta.c | |||
@@ -60,7 +60,7 @@ cTValue *lj_meta_lookup(lua_State *L, cTValue *o, MMS mm) | |||
60 | else if (tvisudata(o)) | 60 | else if (tvisudata(o)) |
61 | mt = tabref(udataV(o)->metatable); | 61 | mt = tabref(udataV(o)->metatable); |
62 | else | 62 | else |
63 | mt = tabref(G(L)->basemt[itypemap(o)]); | 63 | mt = tabref(basemt_obj(G(L), o)); |
64 | if (mt) { | 64 | if (mt) { |
65 | cTValue *mo = lj_tab_getstr(mt, strref(G(L)->mmname[mm])); | 65 | cTValue *mo = lj_tab_getstr(mt, strref(G(L)->mmname[mm])); |
66 | if (mo) | 66 | if (mo) |
@@ -157,7 +157,7 @@ static cTValue *str2num(cTValue *o, TValue *n) | |||
157 | { | 157 | { |
158 | if (tvisnum(o)) | 158 | if (tvisnum(o)) |
159 | return o; | 159 | return o; |
160 | else if (tvisstr(o) && lj_str_numconv(strVdata(o), n)) | 160 | else if (tvisstr(o) && lj_str_tonum(strV(o), n)) |
161 | return n; | 161 | return n; |
162 | else | 162 | else |
163 | return NULL; | 163 | return NULL; |
@@ -295,7 +295,7 @@ TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne) | |||
295 | top = curr_top(L); | 295 | top = curr_top(L); |
296 | setcont(top, ne ? lj_cont_condf : lj_cont_condt); | 296 | setcont(top, ne ? lj_cont_condf : lj_cont_condt); |
297 | copyTV(L, top+1, mo); | 297 | copyTV(L, top+1, mo); |
298 | it = o1->gch.gct == ~LJ_TTAB ? LJ_TTAB : LJ_TUDATA; | 298 | it = ~o1->gch.gct; |
299 | setgcV(L, top+2, &o1->gch, it); | 299 | setgcV(L, top+2, &o1->gch, it); |
300 | setgcV(L, top+3, &o2->gch, it); | 300 | setgcV(L, top+3, &o2->gch, it); |
301 | return top+2; /* Trigger metamethod call. */ | 301 | return top+2; /* Trigger metamethod call. */ |
diff --git a/src/lj_obj.h b/src/lj_obj.h index 9101f053..cebeda9b 100644 --- a/src/lj_obj.h +++ b/src/lj_obj.h | |||
@@ -315,7 +315,7 @@ typedef struct GCstr { | |||
315 | /* Userdata object. Payload follows. */ | 315 | /* Userdata object. Payload follows. */ |
316 | typedef struct GCudata { | 316 | typedef struct GCudata { |
317 | GCHeader; | 317 | GCHeader; |
318 | uint8_t unused1; | 318 | uint8_t udtype; /* Userdata type. */ |
319 | uint8_t unused2; | 319 | uint8_t unused2; |
320 | GCRef env; /* Should be at same offset in GCfunc. */ | 320 | GCRef env; /* Should be at same offset in GCfunc. */ |
321 | MSize len; /* Size of payload. */ | 321 | MSize len; /* Size of payload. */ |
@@ -323,6 +323,13 @@ typedef struct GCudata { | |||
323 | uint32_t align1; /* To force 8 byte alignment of the payload. */ | 323 | uint32_t align1; /* To force 8 byte alignment of the payload. */ |
324 | } GCudata; | 324 | } GCudata; |
325 | 325 | ||
326 | /* Userdata types. */ | ||
327 | enum { | ||
328 | UDTYPE_USERDATA, /* Regular userdata. */ | ||
329 | UDTYPE_IO_FILE, /* I/O library FILE. */ | ||
330 | UDTYPE__MAX | ||
331 | }; | ||
332 | |||
326 | #define uddata(u) ((void *)((u)+1)) | 333 | #define uddata(u) ((void *)((u)+1)) |
327 | #define sizeudata(u) (sizeof(struct GCudata)+(u)->len) | 334 | #define sizeudata(u) (sizeof(struct GCudata)+(u)->len) |
328 | 335 | ||
@@ -496,7 +503,17 @@ MMDEF(MMENUM) | |||
496 | MM_FAST = MM_eq | 503 | MM_FAST = MM_eq |
497 | } MMS; | 504 | } MMS; |
498 | 505 | ||
499 | #define BASEMT_MAX ((~LJ_TNUMX)+1) | 506 | /* GC root IDs. */ |
507 | typedef enum { | ||
508 | GCROOT_BASEMT, /* Metatables for base types. */ | ||
509 | GCROOT_BASEMT_NUM = ~LJ_TNUMX, /* Last base metatable. */ | ||
510 | GCROOT_IO_INPUT, /* Userdata for default I/O input file. */ | ||
511 | GCROOT_IO_OUTPUT, /* Userdata for default I/O output file. */ | ||
512 | GCROOT__MAX | ||
513 | } GCRootID; | ||
514 | |||
515 | #define basemt_it(g, it) ((g)->gcroot[GCROOT_BASEMT+~(it)]) | ||
516 | #define basemt_obj(g, o) ((g)->gcroot[GCROOT_BASEMT+itypemap(o)]) | ||
500 | 517 | ||
501 | typedef struct GCState { | 518 | typedef struct GCState { |
502 | MSize total; /* Memory currently allocated. */ | 519 | MSize total; /* Memory currently allocated. */ |
@@ -544,7 +561,7 @@ typedef struct global_State { | |||
544 | volatile int32_t vmstate; /* VM state or current JIT code trace number. */ | 561 | volatile int32_t vmstate; /* VM state or current JIT code trace number. */ |
545 | GCRef jit_L; /* Current JIT code lua_State or NULL. */ | 562 | GCRef jit_L; /* Current JIT code lua_State or NULL. */ |
546 | MRef jit_base; /* Current JIT code L->base. */ | 563 | MRef jit_base; /* Current JIT code L->base. */ |
547 | GCRef basemt[BASEMT_MAX]; /* Metatables for base types. */ | 564 | GCRef gcroot[GCROOT__MAX]; /* GC roots. */ |
548 | GCRef mmname[MM_MAX]; /* Array holding metamethod names. */ | 565 | GCRef mmname[MM_MAX]; /* Array holding metamethod names. */ |
549 | } global_State; | 566 | } global_State; |
550 | 567 | ||
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 2102561d..98266d21 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c | |||
@@ -282,21 +282,50 @@ LJFOLD(STRTO KGC) | |||
282 | LJFOLDF(kfold_strto) | 282 | LJFOLDF(kfold_strto) |
283 | { | 283 | { |
284 | TValue n; | 284 | TValue n; |
285 | if (lj_str_numconv(strdata(ir_kstr(fleft)), &n)) | 285 | if (lj_str_tonum(ir_kstr(fleft), &n)) |
286 | return lj_ir_knum(J, numV(&n)); | 286 | return lj_ir_knum(J, numV(&n)); |
287 | return FAILFOLD; | 287 | return FAILFOLD; |
288 | } | 288 | } |
289 | 289 | ||
290 | LJFOLD(SNEW STRREF KINT) | 290 | LJFOLD(SNEW KPTR KINT) |
291 | LJFOLDF(kfold_snew) | 291 | LJFOLDF(kfold_snew_kptr) |
292 | { | ||
293 | GCstr *s = lj_str_new(J->L, (const char *)ir_kptr(fleft), (size_t)fright->i); | ||
294 | return lj_ir_kstr(J, s); | ||
295 | } | ||
296 | |||
297 | LJFOLD(SNEW any KINT) | ||
298 | LJFOLDF(kfold_snew_empty) | ||
292 | { | 299 | { |
293 | if (fright->i == 0) | 300 | if (fright->i == 0) |
294 | return lj_ir_kstr(J, lj_str_new(J->L, "", 0)); | 301 | return lj_ir_kstr(J, lj_str_new(J->L, "", 0)); |
302 | return NEXTFOLD; | ||
303 | } | ||
304 | |||
305 | LJFOLD(STRREF KGC KINT) | ||
306 | LJFOLDF(kfold_strref) | ||
307 | { | ||
308 | GCstr *str = ir_kstr(fleft); | ||
309 | lua_assert((MSize)fright->i < str->len); | ||
310 | return lj_ir_kptr(J, (char *)strdata(str) + fright->i); | ||
311 | } | ||
312 | |||
313 | LJFOLD(STRREF SNEW any) | ||
314 | LJFOLDF(kfold_strref_snew) | ||
315 | { | ||
295 | PHIBARRIER(fleft); | 316 | PHIBARRIER(fleft); |
296 | if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) { | 317 | if (irref_isk(fins->op2) && fright->i == 0) { |
297 | const char *s = strdata(ir_kstr(IR(fleft->op1))); | 318 | return fleft->op1; /* strref(snew(ptr, len), 0) ==> ptr */ |
298 | int32_t ofs = IR(fleft->op2)->i; | 319 | } else { |
299 | return lj_ir_kstr(J, lj_str_new(J->L, s+ofs, (size_t)fright->i)); | 320 | /* Reassociate: strref(snew(strref(str, a), len), b) ==> strref(str, a+b) */ |
321 | IRIns *ir = IR(fleft->op1); | ||
322 | IRRef1 str = ir->op1; /* IRIns * is not valid across emitir. */ | ||
323 | lua_assert(ir->o == IR_STRREF); | ||
324 | PHIBARRIER(ir); | ||
325 | fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */ | ||
326 | fins->op1 = str; | ||
327 | fins->ot = IRT(IR_STRREF, IRT_PTR); | ||
328 | return RETRYFOLD; | ||
300 | } | 329 | } |
301 | return NEXTFOLD; | 330 | return NEXTFOLD; |
302 | } | 331 | } |
@@ -343,16 +372,13 @@ LJFOLDF(kfold_intcomp) | |||
343 | } | 372 | } |
344 | } | 373 | } |
345 | 374 | ||
346 | LJFOLD(LT KGC KGC) | 375 | LJFOLD(CALLN CARG IRCALL_lj_str_cmp) |
347 | LJFOLD(GE KGC KGC) | 376 | LJFOLDF(kfold_strcmp) |
348 | LJFOLD(LE KGC KGC) | ||
349 | LJFOLD(GT KGC KGC) | ||
350 | LJFOLDF(kfold_strcomp) | ||
351 | { | 377 | { |
352 | if (irt_isstr(fins->t)) { | 378 | if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) { |
353 | GCstr *a = ir_kstr(fleft); | 379 | GCstr *a = ir_kstr(IR(fleft->op1)); |
354 | GCstr *b = ir_kstr(fright); | 380 | GCstr *b = ir_kstr(IR(fleft->op2)); |
355 | return CONDFOLD(lj_ir_strcmp(a, b, (IROp)fins->o)); | 381 | return INTFOLD(lj_str_cmp(a, b)); |
356 | } | 382 | } |
357 | return NEXTFOLD; | 383 | return NEXTFOLD; |
358 | } | 384 | } |
@@ -1070,7 +1096,8 @@ LJFOLDF(merge_eqne_snew_kgc) | |||
1070 | uint16_t ot = (uint16_t)(len == 1 ? IRT(IR_XLOAD, IRT_I8) : | 1096 | uint16_t ot = (uint16_t)(len == 1 ? IRT(IR_XLOAD, IRT_I8) : |
1071 | len == 2 ? IRT(IR_XLOAD, IRT_U16) : | 1097 | len == 2 ? IRT(IR_XLOAD, IRT_U16) : |
1072 | IRTI(IR_XLOAD)); | 1098 | IRTI(IR_XLOAD)); |
1073 | TRef tmp = emitir(ot, strref, len > 1 ? IRXLOAD_UNALIGNED : 0); | 1099 | TRef tmp = emitir(ot, strref, |
1100 | IRXLOAD_READONLY | (len > 1 ? IRXLOAD_UNALIGNED : 0)); | ||
1074 | TRef val = lj_ir_kint(J, kfold_xload(IR(tref_ref(tmp)), strdata(kstr))); | 1101 | TRef val = lj_ir_kint(J, kfold_xload(IR(tref_ref(tmp)), strdata(kstr))); |
1075 | if (len == 3) | 1102 | if (len == 3) |
1076 | tmp = emitir(IRTI(IR_BAND), tmp, | 1103 | tmp = emitir(IRTI(IR_BAND), tmp, |
@@ -1103,8 +1130,8 @@ LJFOLDX(lj_opt_fwd_hload) | |||
1103 | LJFOLD(ULOAD any) | 1130 | LJFOLD(ULOAD any) |
1104 | LJFOLDX(lj_opt_fwd_uload) | 1131 | LJFOLDX(lj_opt_fwd_uload) |
1105 | 1132 | ||
1106 | LJFOLD(TLEN any) | 1133 | LJFOLD(CALLL any IRCALL_lj_tab_len) |
1107 | LJFOLDX(lj_opt_fwd_tlen) | 1134 | LJFOLDX(lj_opt_fwd_tab_len) |
1108 | 1135 | ||
1109 | /* Upvalue refs are really loads, but there are no corresponding stores. | 1136 | /* Upvalue refs are really loads, but there are no corresponding stores. |
1110 | ** So CSE is ok for them, except for UREFO across a GC step (see below). | 1137 | ** So CSE is ok for them, except for UREFO across a GC step (see below). |
@@ -1194,13 +1221,23 @@ LJFOLDF(fload_tab_ah) | |||
1194 | 1221 | ||
1195 | /* Strings are immutable, so we can safely FOLD/CSE the related FLOAD. */ | 1222 | /* Strings are immutable, so we can safely FOLD/CSE the related FLOAD. */ |
1196 | LJFOLD(FLOAD KGC IRFL_STR_LEN) | 1223 | LJFOLD(FLOAD KGC IRFL_STR_LEN) |
1197 | LJFOLDF(fload_str_len) | 1224 | LJFOLDF(fload_str_len_kgc) |
1198 | { | 1225 | { |
1199 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) | 1226 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) |
1200 | return INTFOLD((int32_t)ir_kstr(fleft)->len); | 1227 | return INTFOLD((int32_t)ir_kstr(fleft)->len); |
1201 | return NEXTFOLD; | 1228 | return NEXTFOLD; |
1202 | } | 1229 | } |
1203 | 1230 | ||
1231 | LJFOLD(FLOAD SNEW IRFL_STR_LEN) | ||
1232 | LJFOLDF(fload_str_len_snew) | ||
1233 | { | ||
1234 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) { | ||
1235 | PHIBARRIER(fleft); | ||
1236 | return fleft->op2; | ||
1237 | } | ||
1238 | return NEXTFOLD; | ||
1239 | } | ||
1240 | |||
1204 | LJFOLD(FLOAD any IRFL_STR_LEN) | 1241 | LJFOLD(FLOAD any IRFL_STR_LEN) |
1205 | LJFOLDX(lj_opt_cse) | 1242 | LJFOLDX(lj_opt_cse) |
1206 | 1243 | ||
@@ -1216,20 +1253,28 @@ LJFOLDF(fwd_sload) | |||
1216 | return J->slot[fins->op1]; | 1253 | return J->slot[fins->op1]; |
1217 | } | 1254 | } |
1218 | 1255 | ||
1219 | /* Strings are immutable, so we can safely FOLD/CSE an XLOAD of a string. */ | 1256 | LJFOLD(XLOAD KPTR any) |
1220 | LJFOLD(XLOAD STRREF any) | 1257 | LJFOLDF(xload_kptr) |
1221 | LJFOLDF(xload_str) | ||
1222 | { | 1258 | { |
1223 | if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) { | 1259 | /* Only fold read-only integer loads for now. */ |
1224 | GCstr *str = ir_kstr(IR(fleft->op1)); | 1260 | if ((fins->op2 & IRXLOAD_READONLY) && irt_isinteger(fins->t)) |
1225 | int32_t ofs = IR(fleft->op2)->i; | 1261 | return INTFOLD(kfold_xload(fins, ir_kptr(fleft))); |
1226 | lua_assert((MSize)ofs < str->len); | 1262 | return NEXTFOLD; |
1227 | lua_assert((MSize)(ofs + (1<<((fins->op2>>8)&3))) <= str->len); | 1263 | } |
1228 | return INTFOLD(kfold_xload(fins, strdata(str)+ofs)); | 1264 | |
1265 | /* CSE for XLOAD depends on the type, but not on the IRXLOAD_* flags. */ | ||
1266 | LJFOLD(XLOAD any any) | ||
1267 | LJFOLDF(fwd_xload) | ||
1268 | { | ||
1269 | IRRef ref = J->chain[IR_XLOAD]; | ||
1270 | IRRef op1 = fins->op1; | ||
1271 | while (ref > op1) { | ||
1272 | if (IR(ref)->op1 == op1 && irt_sametype(IR(ref)->t, fins->t)) | ||
1273 | return ref; | ||
1274 | ref = IR(ref)->prev; | ||
1229 | } | 1275 | } |
1230 | return CSEFOLD; | 1276 | return EMITFOLD; |
1231 | } | 1277 | } |
1232 | /* No XLOAD of non-strings (yet), so we don't need a (XLOAD any any) rule. */ | ||
1233 | 1278 | ||
1234 | /* -- Write barriers ------------------------------------------------------ */ | 1279 | /* -- Write barriers ------------------------------------------------------ */ |
1235 | 1280 | ||
@@ -1279,12 +1324,11 @@ LJFOLD(FSTORE any any) | |||
1279 | LJFOLDX(lj_opt_dse_fstore) | 1324 | LJFOLDX(lj_opt_dse_fstore) |
1280 | 1325 | ||
1281 | LJFOLD(NEWREF any any) /* Treated like a store. */ | 1326 | LJFOLD(NEWREF any any) /* Treated like a store. */ |
1327 | LJFOLD(CALLS any any) | ||
1328 | LJFOLD(CALLL any any) /* Safeguard fallback. */ | ||
1282 | LJFOLD(TNEW any any) | 1329 | LJFOLD(TNEW any any) |
1283 | LJFOLD(TDUP any) | 1330 | LJFOLD(TDUP any) |
1284 | LJFOLDF(store_raw) | 1331 | LJFOLDX(lj_ir_emit) |
1285 | { | ||
1286 | return EMITFOLD; | ||
1287 | } | ||
1288 | 1332 | ||
1289 | /* ------------------------------------------------------------------------ */ | 1333 | /* ------------------------------------------------------------------------ */ |
1290 | 1334 | ||
@@ -1402,6 +1446,19 @@ TRef LJ_FASTCALL lj_opt_cse(jit_State *J) | |||
1402 | } | 1446 | } |
1403 | } | 1447 | } |
1404 | 1448 | ||
1449 | /* CSE with explicit search limit. */ | ||
1450 | TRef LJ_FASTCALL lj_opt_cselim(jit_State *J, IRRef lim) | ||
1451 | { | ||
1452 | IRRef ref = J->chain[fins->o]; | ||
1453 | IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16); | ||
1454 | while (ref > lim) { | ||
1455 | if (IR(ref)->op12 == op12) | ||
1456 | return ref; | ||
1457 | ref = IR(ref)->prev; | ||
1458 | } | ||
1459 | return lj_ir_emit(J); | ||
1460 | } | ||
1461 | |||
1405 | /* ------------------------------------------------------------------------ */ | 1462 | /* ------------------------------------------------------------------------ */ |
1406 | 1463 | ||
1407 | #undef IR | 1464 | #undef IR |
diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c index f9a2a808..90ab1b6f 100644 --- a/src/lj_opt_loop.c +++ b/src/lj_opt_loop.c | |||
@@ -310,7 +310,13 @@ static void loop_unroll(jit_State *J) | |||
310 | /* Undo any partial changes made by the loop optimization. */ | 310 | /* Undo any partial changes made by the loop optimization. */ |
311 | static void loop_undo(jit_State *J, IRRef ins) | 311 | static void loop_undo(jit_State *J, IRRef ins) |
312 | { | 312 | { |
313 | ptrdiff_t i; | ||
313 | lj_ir_rollback(J, ins); | 314 | lj_ir_rollback(J, ins); |
315 | for (i = 0; i < BPROP_SLOTS; i++) { /* Remove backprop. cache entries. */ | ||
316 | BPropEntry *bp = &J->bpropcache[i]; | ||
317 | if (bp->val >= ins) | ||
318 | bp->key = 0; | ||
319 | } | ||
314 | for (ins--; ins >= REF_FIRST; ins--) { /* Remove flags. */ | 320 | for (ins--; ins >= REF_FIRST; ins--) { /* Remove flags. */ |
315 | IRIns *ir = IR(ins); | 321 | IRIns *ir = IR(ins); |
316 | irt_clearphi(ir->t); | 322 | irt_clearphi(ir->t); |
diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c index 94fc4ad8..882ba6c5 100644 --- a/src/lj_opt_mem.c +++ b/src/lj_opt_mem.c | |||
@@ -307,14 +307,7 @@ TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J) | |||
307 | 307 | ||
308 | conflict: | 308 | conflict: |
309 | /* Try to find a matching load. Below the conflicting store, if any. */ | 309 | /* Try to find a matching load. Below the conflicting store, if any. */ |
310 | ref = J->chain[IR_ULOAD]; | 310 | return lj_opt_cselim(J, lim); |
311 | while (ref > lim) { | ||
312 | IRIns *load = IR(ref); | ||
313 | if (load->op1 == uref) | ||
314 | return ref; /* Load forwarding. */ | ||
315 | ref = load->prev; | ||
316 | } | ||
317 | return EMITFOLD; /* Conflict or no match. */ | ||
318 | } | 311 | } |
319 | 312 | ||
320 | /* USTORE elimination. */ | 313 | /* USTORE elimination. */ |
@@ -405,14 +398,7 @@ TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J) | |||
405 | 398 | ||
406 | conflict: | 399 | conflict: |
407 | /* Try to find a matching load. Below the conflicting store, if any. */ | 400 | /* Try to find a matching load. Below the conflicting store, if any. */ |
408 | ref = J->chain[IR_FLOAD]; | 401 | return lj_opt_cselim(J, lim); |
409 | while (ref > lim) { | ||
410 | IRIns *load = IR(ref); | ||
411 | if (load->op1 == oref && load->op2 == fid) | ||
412 | return ref; /* Load forwarding. */ | ||
413 | ref = load->prev; | ||
414 | } | ||
415 | return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ | ||
416 | } | 402 | } |
417 | 403 | ||
418 | /* FSTORE elimination. */ | 404 | /* FSTORE elimination. */ |
@@ -458,10 +444,10 @@ doemit: | |||
458 | return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ | 444 | return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ |
459 | } | 445 | } |
460 | 446 | ||
461 | /* -- TLEN forwarding ----------------------------------------------------- */ | 447 | /* -- Forwarding of lj_tab_len -------------------------------------------- */ |
462 | 448 | ||
463 | /* This is rather simplistic right now, but better than nothing. */ | 449 | /* This is rather simplistic right now, but better than nothing. */ |
464 | TRef LJ_FASTCALL lj_opt_fwd_tlen(jit_State *J) | 450 | TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J) |
465 | { | 451 | { |
466 | IRRef tab = fins->op1; /* Table reference. */ | 452 | IRRef tab = fins->op1; /* Table reference. */ |
467 | IRRef lim = tab; /* Search limit. */ | 453 | IRRef lim = tab; /* Search limit. */ |
@@ -484,14 +470,7 @@ TRef LJ_FASTCALL lj_opt_fwd_tlen(jit_State *J) | |||
484 | } | 470 | } |
485 | 471 | ||
486 | /* Try to find a matching load. Below the conflicting store, if any. */ | 472 | /* Try to find a matching load. Below the conflicting store, if any. */ |
487 | ref = J->chain[IR_TLEN]; | 473 | return lj_opt_cselim(J, lim); |
488 | while (ref > lim) { | ||
489 | IRIns *tlen = IR(ref); | ||
490 | if (tlen->op1 == tab) | ||
491 | return ref; /* Load forwarding. */ | ||
492 | ref = tlen->prev; | ||
493 | } | ||
494 | return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ | ||
495 | } | 474 | } |
496 | 475 | ||
497 | /* -- ASTORE/HSTORE previous type analysis -------------------------------- */ | 476 | /* -- ASTORE/HSTORE previous type analysis -------------------------------- */ |
diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c index 60a6afb8..b9107c5e 100644 --- a/src/lj_opt_narrow.c +++ b/src/lj_opt_narrow.c | |||
@@ -370,7 +370,7 @@ TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc) | |||
370 | TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc) | 370 | TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc) |
371 | { | 371 | { |
372 | lua_Number n; | 372 | lua_Number n; |
373 | if (tvisstr(vc) && !lj_str_numconv(strVdata(vc), vc)) | 373 | if (tvisstr(vc) && !lj_str_tonum(strV(vc), vc)) |
374 | lj_trace_err(J, LJ_TRERR_BADTYPE); | 374 | lj_trace_err(J, LJ_TRERR_BADTYPE); |
375 | n = numV(vc); | 375 | n = numV(vc); |
376 | /* Limit narrowing for pow to small exponents (or for two constants). */ | 376 | /* Limit narrowing for pow to small exponents (or for two constants). */ |
diff --git a/src/lj_parse.c b/src/lj_parse.c index 000772fe..1de07e92 100644 --- a/src/lj_parse.c +++ b/src/lj_parse.c | |||
@@ -317,6 +317,7 @@ GCstr *lj_parse_keepstr(LexState *ls, const char *str, size_t len) | |||
317 | GCstr *s = lj_str_new(L, str, len); | 317 | GCstr *s = lj_str_new(L, str, len); |
318 | TValue *tv = lj_tab_setstr(L, ls->fs->kt, s); | 318 | TValue *tv = lj_tab_setstr(L, ls->fs->kt, s); |
319 | if (tvisnil(tv)) setboolV(tv, 1); /* Anchor string to avoid GC. */ | 319 | if (tvisnil(tv)) setboolV(tv, 1); /* Anchor string to avoid GC. */ |
320 | lj_gc_check(L); | ||
320 | return s; | 321 | return s; |
321 | } | 322 | } |
322 | 323 | ||
diff --git a/src/lj_record.c b/src/lj_record.c index 68a233b9..9b223ff6 100644 --- a/src/lj_record.c +++ b/src/lj_record.c | |||
@@ -441,7 +441,7 @@ static int rec_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm) | |||
441 | mix.tab = emitir(IRT(IR_FLOAD, IRT_TAB), ix->tab, IRFL_UDATA_META); | 441 | mix.tab = emitir(IRT(IR_FLOAD, IRT_TAB), ix->tab, IRFL_UDATA_META); |
442 | } else { | 442 | } else { |
443 | /* Specialize to base metatable. Must flush mcode in lua_setmetatable(). */ | 443 | /* Specialize to base metatable. Must flush mcode in lua_setmetatable(). */ |
444 | mt = tabref(J2G(J)->basemt[itypemap(&ix->tabv)]); | 444 | mt = tabref(basemt_obj(J2G(J), &ix->tabv)); |
445 | if (mt == NULL) | 445 | if (mt == NULL) |
446 | return 0; /* No metamethod. */ | 446 | return 0; /* No metamethod. */ |
447 | mix.tab = lj_ir_ktab(J, mt); | 447 | mix.tab = lj_ir_ktab(J, mt); |
@@ -855,7 +855,7 @@ typedef void (*RecordFunc)(jit_State *J, TRef *res, RecordFFData *rd); | |||
855 | /* Get runtime value of int argument. */ | 855 | /* Get runtime value of int argument. */ |
856 | static int32_t argv2int(jit_State *J, TValue *o) | 856 | static int32_t argv2int(jit_State *J, TValue *o) |
857 | { | 857 | { |
858 | if (tvisstr(o) && !lj_str_numconv(strVdata(o), o)) | 858 | if (tvisstr(o) && !lj_str_tonum(strV(o), o)) |
859 | lj_trace_err(J, LJ_TRERR_BADTYPE); | 859 | lj_trace_err(J, LJ_TRERR_BADTYPE); |
860 | return lj_num2bit(numV(o)); | 860 | return lj_num2bit(numV(o)); |
861 | } | 861 | } |
@@ -1017,6 +1017,8 @@ static void recff_tostring(jit_State *J, TRef *res, RecordFFData *rd) | |||
1017 | /* Otherwise res[0] already contains the result. */ | 1017 | /* Otherwise res[0] already contains the result. */ |
1018 | } else if (tref_isnumber(tr)) { | 1018 | } else if (tref_isnumber(tr)) { |
1019 | res[0] = emitir(IRT(IR_TOSTR, IRT_STR), tr, 0); | 1019 | res[0] = emitir(IRT(IR_TOSTR, IRT_STR), tr, 0); |
1020 | } else if (tref_ispri(tr)) { | ||
1021 | res[0] = lj_ir_kstr(J, strV(&rd->fn->c.upvalue[tref_type(tr)])); | ||
1020 | } else { | 1022 | } else { |
1021 | recff_err_nyi(J, rd); | 1023 | recff_err_nyi(J, rd); |
1022 | } | 1024 | } |
@@ -1165,10 +1167,16 @@ static void recff_math_atrig(jit_State *J, TRef *res, RecordFFData *rd) | |||
1165 | res[0] = emitir(IRTN(IR_ATAN2), y, x); | 1167 | res[0] = emitir(IRTN(IR_ATAN2), y, x); |
1166 | } | 1168 | } |
1167 | 1169 | ||
1170 | static void recff_math_htrig(jit_State *J, TRef *res, RecordFFData *rd) | ||
1171 | { | ||
1172 | TRef tr = lj_ir_tonum(J, arg[0]); | ||
1173 | res[0] = lj_ir_call(J, rd->data, tr); | ||
1174 | } | ||
1175 | |||
1168 | static void recff_math_modf(jit_State *J, TRef *res, RecordFFData *rd) | 1176 | static void recff_math_modf(jit_State *J, TRef *res, RecordFFData *rd) |
1169 | { | 1177 | { |
1170 | TRef tr = arg[0]; | 1178 | TRef tr = arg[0]; |
1171 | if (tref_isinteger(arg[0])) { | 1179 | if (tref_isinteger(tr)) { |
1172 | res[0] = tr; | 1180 | res[0] = tr; |
1173 | res[1] = lj_ir_kint(J, 0); | 1181 | res[1] = lj_ir_kint(J, 0); |
1174 | } else { | 1182 | } else { |
@@ -1187,9 +1195,10 @@ static void recff_math_degrad(jit_State *J, TRef *res, RecordFFData *rd) | |||
1187 | 1195 | ||
1188 | static void recff_math_pow(jit_State *J, TRef *res, RecordFFData *rd) | 1196 | static void recff_math_pow(jit_State *J, TRef *res, RecordFFData *rd) |
1189 | { | 1197 | { |
1198 | TRef tr = lj_ir_tonum(J, arg[0]); | ||
1190 | if (!tref_isnumber_str(arg[1])) | 1199 | if (!tref_isnumber_str(arg[1])) |
1191 | lj_trace_err(J, LJ_TRERR_BADTYPE); | 1200 | lj_trace_err(J, LJ_TRERR_BADTYPE); |
1192 | res[0] = lj_opt_narrow_pow(J, lj_ir_tonum(J, arg[0]), arg[1], &rd->argv[1]); | 1201 | res[0] = lj_opt_narrow_pow(J, tr, arg[1], &rd->argv[1]); |
1193 | UNUSED(rd); | 1202 | UNUSED(rd); |
1194 | } | 1203 | } |
1195 | 1204 | ||
@@ -1203,6 +1212,32 @@ static void recff_math_minmax(jit_State *J, TRef *res, RecordFFData *rd) | |||
1203 | res[0] = tr; | 1212 | res[0] = tr; |
1204 | } | 1213 | } |
1205 | 1214 | ||
1215 | static void recff_math_random(jit_State *J, TRef *res, RecordFFData *rd) | ||
1216 | { | ||
1217 | GCudata *ud = udataV(&rd->fn->c.upvalue[0]); | ||
1218 | TRef tr, one; | ||
1219 | lj_ir_kgc(J, obj2gco(ud), IRT_UDATA); /* Prevent collection. */ | ||
1220 | tr = lj_ir_call(J, IRCALL_lj_math_random_step, lj_ir_kptr(J, uddata(ud))); | ||
1221 | one = lj_ir_knum_one(J); | ||
1222 | tr = emitir(IRTN(IR_SUB), tr, one); | ||
1223 | if (arg[0]) { | ||
1224 | TRef tr1 = lj_ir_tonum(J, arg[0]); | ||
1225 | if (arg[1]) { /* d = floor(d*(r2-r1+1.0)) + r1 */ | ||
1226 | TRef tr2 = lj_ir_tonum(J, arg[1]); | ||
1227 | tr2 = emitir(IRTN(IR_SUB), tr2, tr1); | ||
1228 | tr2 = emitir(IRTN(IR_ADD), tr2, one); | ||
1229 | tr = emitir(IRTN(IR_MUL), tr, tr2); | ||
1230 | tr = emitir(IRTN(IR_FPMATH), tr, IRFPM_FLOOR); | ||
1231 | tr = emitir(IRTN(IR_ADD), tr, tr1); | ||
1232 | } else { /* d = floor(d*r1) + 1.0 */ | ||
1233 | tr = emitir(IRTN(IR_MUL), tr, tr1); | ||
1234 | tr = emitir(IRTN(IR_FPMATH), tr, IRFPM_FLOOR); | ||
1235 | tr = emitir(IRTN(IR_ADD), tr, one); | ||
1236 | } | ||
1237 | } | ||
1238 | res[0] = tr; | ||
1239 | } | ||
1240 | |||
1206 | /* -- Bit library fast functions ------------------------------------------ */ | 1241 | /* -- Bit library fast functions ------------------------------------------ */ |
1207 | 1242 | ||
1208 | /* Record unary bit.tobit, bit.bnot, bit.bswap. */ | 1243 | /* Record unary bit.tobit, bit.bnot, bit.bswap. */ |
@@ -1321,7 +1356,7 @@ static void recff_string_range(jit_State *J, TRef *res, RecordFFData *rd) | |||
1321 | for (i = 0; i < len; i++) { | 1356 | for (i = 0; i < len; i++) { |
1322 | TRef tmp = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, i)); | 1357 | TRef tmp = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, i)); |
1323 | tmp = emitir(IRT(IR_STRREF, IRT_PTR), trstr, tmp); | 1358 | tmp = emitir(IRT(IR_STRREF, IRT_PTR), trstr, tmp); |
1324 | res[i] = emitir(IRT(IR_XLOAD, IRT_U8), tmp, 0); | 1359 | res[i] = emitir(IRT(IR_XLOAD, IRT_U8), tmp, IRXLOAD_READONLY); |
1325 | } | 1360 | } |
1326 | } else { /* Empty range or range underflow: return no results. */ | 1361 | } else { /* Empty range or range underflow: return no results. */ |
1327 | emitir(IRTGI(IR_LE), trend, trstart); | 1362 | emitir(IRTGI(IR_LE), trend, trstart); |
@@ -1335,7 +1370,7 @@ static void recff_string_range(jit_State *J, TRef *res, RecordFFData *rd) | |||
1335 | static void recff_table_getn(jit_State *J, TRef *res, RecordFFData *rd) | 1370 | static void recff_table_getn(jit_State *J, TRef *res, RecordFFData *rd) |
1336 | { | 1371 | { |
1337 | if (tref_istab(arg[0])) { | 1372 | if (tref_istab(arg[0])) { |
1338 | res[0] = emitir(IRTI(IR_TLEN), arg[0], 0); | 1373 | res[0] = lj_ir_call(J, IRCALL_lj_tab_len, arg[0]); |
1339 | } /* else: Interpreter will throw. */ | 1374 | } /* else: Interpreter will throw. */ |
1340 | UNUSED(rd); | 1375 | UNUSED(rd); |
1341 | } | 1376 | } |
@@ -1344,7 +1379,7 @@ static void recff_table_remove(jit_State *J, TRef *res, RecordFFData *rd) | |||
1344 | { | 1379 | { |
1345 | if (tref_istab(arg[0])) { | 1380 | if (tref_istab(arg[0])) { |
1346 | if (!arg[1] || tref_isnil(arg[1])) { /* Simple pop: t[#t] = nil */ | 1381 | if (!arg[1] || tref_isnil(arg[1])) { /* Simple pop: t[#t] = nil */ |
1347 | TRef trlen = emitir(IRTI(IR_TLEN), arg[0], 0); | 1382 | TRef trlen = lj_ir_call(J, IRCALL_lj_tab_len, arg[0]); |
1348 | GCtab *t = tabV(&rd->argv[0]); | 1383 | GCtab *t = tabV(&rd->argv[0]); |
1349 | MSize len = lj_tab_len(t); | 1384 | MSize len = lj_tab_len(t); |
1350 | emitir(IRTGI(len ? IR_NE : IR_EQ), trlen, lj_ir_kint(J, 0)); | 1385 | emitir(IRTGI(len ? IR_NE : IR_EQ), trlen, lj_ir_kint(J, 0)); |
@@ -1376,7 +1411,7 @@ static void recff_table_insert(jit_State *J, TRef *res, RecordFFData *rd) | |||
1376 | rd->nres = 0; | 1411 | rd->nres = 0; |
1377 | if (tref_istab(arg[0]) && arg[1]) { | 1412 | if (tref_istab(arg[0]) && arg[1]) { |
1378 | if (!arg[2]) { /* Simple push: t[#t+1] = v */ | 1413 | if (!arg[2]) { /* Simple push: t[#t+1] = v */ |
1379 | TRef trlen = emitir(IRTI(IR_TLEN), arg[0], 0); | 1414 | TRef trlen = lj_ir_call(J, IRCALL_lj_tab_len, arg[0]); |
1380 | GCtab *t = tabV(&rd->argv[0]); | 1415 | GCtab *t = tabV(&rd->argv[0]); |
1381 | RecordIndex ix; | 1416 | RecordIndex ix; |
1382 | ix.tab = arg[0]; | 1417 | ix.tab = arg[0]; |
@@ -1392,6 +1427,62 @@ static void recff_table_insert(jit_State *J, TRef *res, RecordFFData *rd) | |||
1392 | } /* else: Interpreter will throw. */ | 1427 | } /* else: Interpreter will throw. */ |
1393 | } | 1428 | } |
1394 | 1429 | ||
1430 | /* -- I/O library fast functions ------------------------------------------ */ | ||
1431 | |||
1432 | /* Get FILE* for I/O function. Any I/O error aborts recording, so there's | ||
1433 | ** no need to encode the alternate cases for any of the guards. | ||
1434 | */ | ||
1435 | static TRef recff_io_fp(jit_State *J, TRef *res, uint32_t id) | ||
1436 | { | ||
1437 | TRef tr, ud, fp; | ||
1438 | if (id) { /* io.func() */ | ||
1439 | tr = lj_ir_kptr(J, &J2G(J)->gcroot[id]); | ||
1440 | ud = emitir(IRT(IR_XLOAD, IRT_UDATA), tr, 0); | ||
1441 | } else { /* fp:method() */ | ||
1442 | ud = arg[0]; | ||
1443 | if (!tref_isudata(ud)) | ||
1444 | lj_trace_err(J, LJ_TRERR_BADTYPE); | ||
1445 | tr = emitir(IRT(IR_FLOAD, IRT_U8), ud, IRFL_UDATA_UDTYPE); | ||
1446 | emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, UDTYPE_IO_FILE)); | ||
1447 | } | ||
1448 | fp = emitir(IRT(IR_FLOAD, IRT_LIGHTUD), ud, IRFL_UDATA_FILE); | ||
1449 | emitir(IRTG(IR_NE, IRT_LIGHTUD), fp, lj_ir_knull(J, IRT_LIGHTUD)); | ||
1450 | return fp; | ||
1451 | } | ||
1452 | |||
1453 | static void recff_io_write(jit_State *J, TRef *res, RecordFFData *rd) | ||
1454 | { | ||
1455 | TRef fp = recff_io_fp(J, res, rd->data); | ||
1456 | TRef zero = lj_ir_kint(J, 0); | ||
1457 | TRef one = lj_ir_kint(J, 1); | ||
1458 | ptrdiff_t i = rd->data == 0 ? 1 : 0; | ||
1459 | for (; arg[i]; i++) { | ||
1460 | TRef str = lj_ir_tostr(J, arg[i]); | ||
1461 | TRef buf = emitir(IRT(IR_STRREF, IRT_PTR), str, zero); | ||
1462 | TRef len = emitir(IRTI(IR_FLOAD), str, IRFL_STR_LEN); | ||
1463 | if (tref_isk(len) && IR(tref_ref(len))->i == 1) { | ||
1464 | TRef tr = emitir(IRT(IR_XLOAD, IRT_U8), buf, IRXLOAD_READONLY); | ||
1465 | tr = lj_ir_call(J, IRCALL_fputc, tr, fp); | ||
1466 | if (rd->cres != 0) /* Check result only if requested. */ | ||
1467 | emitir(IRTGI(IR_NE), tr, lj_ir_kint(J, -1)); | ||
1468 | } else { | ||
1469 | TRef tr = lj_ir_call(J, IRCALL_fwrite, buf, one, len, fp); | ||
1470 | if (rd->cres != 0) /* Check result only if requested. */ | ||
1471 | emitir(IRTGI(IR_EQ), tr, len); | ||
1472 | } | ||
1473 | } | ||
1474 | res[0] = TREF_TRUE; | ||
1475 | } | ||
1476 | |||
1477 | static void recff_io_flush(jit_State *J, TRef *res, RecordFFData *rd) | ||
1478 | { | ||
1479 | TRef fp = recff_io_fp(J, res, rd->data); | ||
1480 | TRef tr = lj_ir_call(J, IRCALL_fflush, fp); | ||
1481 | if (rd->cres != 0) /* Check result only if requested. */ | ||
1482 | emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, 0)); | ||
1483 | res[0] = TREF_TRUE; | ||
1484 | } | ||
1485 | |||
1395 | /* -- Record calls and returns -------------------------------------------- */ | 1486 | /* -- Record calls and returns -------------------------------------------- */ |
1396 | 1487 | ||
1397 | #undef arg | 1488 | #undef arg |
@@ -1696,6 +1787,9 @@ void lj_record_ins(jit_State *J) | |||
1696 | if (!lj_ir_numcmp(numV(rav), numV(rcv), (IROp)irop)) irop ^= 1; | 1787 | if (!lj_ir_numcmp(numV(rav), numV(rcv), (IROp)irop)) irop ^= 1; |
1697 | } else if (ta == IRT_STR) { | 1788 | } else if (ta == IRT_STR) { |
1698 | if (!lj_ir_strcmp(strV(rav), strV(rcv), (IROp)irop)) irop ^= 1; | 1789 | if (!lj_ir_strcmp(strV(rav), strV(rcv), (IROp)irop)) irop ^= 1; |
1790 | ra = lj_ir_call(J, IRCALL_lj_str_cmp, ra, rc); | ||
1791 | rc = lj_ir_kint(J, 0); | ||
1792 | ta = IRT_INT; | ||
1699 | } else { | 1793 | } else { |
1700 | rec_mm_comp(J, &ix, (int)op); | 1794 | rec_mm_comp(J, &ix, (int)op); |
1701 | break; | 1795 | break; |
@@ -1745,7 +1839,7 @@ void lj_record_ins(jit_State *J) | |||
1745 | if (tref_isstr(rc)) { | 1839 | if (tref_isstr(rc)) { |
1746 | rc = emitir(IRTI(IR_FLOAD), rc, IRFL_STR_LEN); | 1840 | rc = emitir(IRTI(IR_FLOAD), rc, IRFL_STR_LEN); |
1747 | } else if (tref_istab(rc)) { | 1841 | } else if (tref_istab(rc)) { |
1748 | rc = emitir(IRTI(IR_TLEN), rc, 0); | 1842 | rc = lj_ir_call(J, IRCALL_lj_tab_len, rc); |
1749 | } else { | 1843 | } else { |
1750 | ix.tab = rc; | 1844 | ix.tab = rc; |
1751 | copyTV(J->L, &ix.tabv, &ix.keyv); | 1845 | copyTV(J->L, &ix.tabv, &ix.keyv); |
@@ -1879,8 +1973,6 @@ void lj_record_ins(jit_State *J) | |||
1879 | /* fallthrough */ | 1973 | /* fallthrough */ |
1880 | case BC_CALL: | 1974 | case BC_CALL: |
1881 | callop: | 1975 | callop: |
1882 | if (rb == (TRef)(CALLRES_TAILCALL+1)) { /* Tail call. */ | ||
1883 | } | ||
1884 | rec_call(J, ra, (int)(rb-1), (int)(rc-1)); | 1976 | rec_call(J, ra, (int)(rb-1), (int)(rc-1)); |
1885 | break; | 1977 | break; |
1886 | 1978 | ||
@@ -2064,8 +2156,11 @@ static void rec_setup_side(jit_State *J, Trace *T) | |||
2064 | BCReg j; | 2156 | BCReg j; |
2065 | for (j = 0; j < s; j++) | 2157 | for (j = 0; j < s; j++) |
2066 | if (snap_ref(map[j]) == ref) { | 2158 | if (snap_ref(map[j]) == ref) { |
2067 | if (ir->o == IR_FRAME && irt_isfunc(ir->t)) | 2159 | if (ir->o == IR_FRAME && irt_isfunc(ir->t)) { |
2160 | lua_assert(s != 0); | ||
2068 | J->baseslot = s+1; | 2161 | J->baseslot = s+1; |
2162 | J->framedepth++; | ||
2163 | } | ||
2069 | tr = J->slot[j]; | 2164 | tr = J->slot[j]; |
2070 | goto dupslot; | 2165 | goto dupslot; |
2071 | } | 2166 | } |
@@ -2078,8 +2173,10 @@ static void rec_setup_side(jit_State *J, Trace *T) | |||
2078 | case IR_KNUM: tr = lj_ir_knum_addr(J, ir_knum(ir)); break; | 2173 | case IR_KNUM: tr = lj_ir_knum_addr(J, ir_knum(ir)); break; |
2079 | case IR_FRAME: /* Placeholder FRAMEs don't need a guard. */ | 2174 | case IR_FRAME: /* Placeholder FRAMEs don't need a guard. */ |
2080 | if (irt_isfunc(ir->t)) { | 2175 | if (irt_isfunc(ir->t)) { |
2081 | J->baseslot = s+1; | 2176 | if (s != 0) { |
2082 | J->framedepth++; | 2177 | J->baseslot = s+1; |
2178 | J->framedepth++; | ||
2179 | } | ||
2083 | tr = lj_ir_kfunc(J, ir_kfunc(&T->ir[ir->op2])); | 2180 | tr = lj_ir_kfunc(J, ir_kfunc(&T->ir[ir->op2])); |
2084 | tr = emitir_raw(IRT(IR_FRAME, IRT_FUNC), tr, tr); | 2181 | tr = emitir_raw(IRT(IR_FRAME, IRT_FUNC), tr, tr); |
2085 | } else { | 2182 | } else { |
diff --git a/src/lj_snap.c b/src/lj_snap.c index 09cd095c..d27404f2 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c | |||
@@ -251,9 +251,9 @@ void lj_snap_restore(jit_State *J, void *exptr) | |||
251 | GCfunc *fn = gco2func(gcref(T->ir[ir->op2].gcr)); | 251 | GCfunc *fn = gco2func(gcref(T->ir[ir->op2].gcr)); |
252 | if (isluafunc(fn)) { | 252 | if (isluafunc(fn)) { |
253 | TValue *fs; | 253 | TValue *fs; |
254 | newbase = o+1; | 254 | fs = o+1 + funcproto(fn)->framesize; |
255 | fs = newbase + funcproto(fn)->framesize; | ||
256 | if (fs > ntop) ntop = fs; /* Update top for newly added frames. */ | 255 | if (fs > ntop) ntop = fs; /* Update top for newly added frames. */ |
256 | if (s != 0) newbase = o+1; | ||
257 | } | 257 | } |
258 | } | 258 | } |
259 | } | 259 | } |
@@ -262,21 +262,17 @@ void lj_snap_restore(jit_State *J, void *exptr) | |||
262 | setnilV(o); /* Clear unreferenced slots of newly added frames. */ | 262 | setnilV(o); /* Clear unreferenced slots of newly added frames. */ |
263 | } | 263 | } |
264 | } | 264 | } |
265 | if (newbase) { /* Clear remainder of newly added frames. */ | 265 | if (newbase) L->base = newbase; |
266 | L->base = newbase; | 266 | if (ntop >= L->maxstack) { /* Need to grow the stack again. */ |
267 | if (ntop >= L->maxstack) { /* Need to grow the stack again. */ | 267 | MSize need = (MSize)(ntop - o); |
268 | MSize need = (MSize)(ntop - o); | 268 | L->top = o; |
269 | L->top = o; | 269 | lj_state_growstack(L, need); |
270 | lj_state_growstack(L, need); | 270 | o = L->top; |
271 | o = L->top; | 271 | ntop = o + need; |
272 | ntop = o + need; | ||
273 | } | ||
274 | L->top = curr_topL(L); | ||
275 | for (; o < ntop; o++) | ||
276 | setnilV(o); | ||
277 | } else { /* Must not clear slots of existing frame. */ | ||
278 | L->top = curr_topL(L); | ||
279 | } | 272 | } |
273 | L->top = curr_topL(L); | ||
274 | for (; o < ntop; o++) /* Clear remainder of newly added frames. */ | ||
275 | setnilV(o); | ||
280 | lua_assert(map + nslots == flinks-1); | 276 | lua_assert(map + nslots == flinks-1); |
281 | J->pc = (const BCIns *)(uintptr_t)(*--flinks); | 277 | J->pc = (const BCIns *)(uintptr_t)(*--flinks); |
282 | } | 278 | } |
diff --git a/src/lj_state.h b/src/lj_state.h index 54e85405..4e4185c0 100644 --- a/src/lj_state.h +++ b/src/lj_state.h | |||
@@ -17,7 +17,7 @@ | |||
17 | LJ_FUNC void lj_state_relimitstack(lua_State *L); | 17 | LJ_FUNC void lj_state_relimitstack(lua_State *L); |
18 | LJ_FUNC void lj_state_shrinkstack(lua_State *L, MSize used); | 18 | LJ_FUNC void lj_state_shrinkstack(lua_State *L, MSize used); |
19 | LJ_FUNCA void lj_state_growstack(lua_State *L, MSize need); | 19 | LJ_FUNCA void lj_state_growstack(lua_State *L, MSize need); |
20 | LJ_FUNCA void lj_state_growstack1(lua_State *L); | 20 | LJ_FUNC void lj_state_growstack1(lua_State *L); |
21 | 21 | ||
22 | static LJ_AINLINE void lj_state_checkstack(lua_State *L, MSize need) | 22 | static LJ_AINLINE void lj_state_checkstack(lua_State *L, MSize need) |
23 | { | 23 | { |
diff --git a/src/lj_str.c b/src/lj_str.c index 26f91cba..62322b59 100644 --- a/src/lj_str.c +++ b/src/lj_str.c | |||
@@ -21,7 +21,7 @@ | |||
21 | /* -- String interning ---------------------------------------------------- */ | 21 | /* -- String interning ---------------------------------------------------- */ |
22 | 22 | ||
23 | /* Ordered compare of strings. Assumes string data is 4-byte aligned. */ | 23 | /* Ordered compare of strings. Assumes string data is 4-byte aligned. */ |
24 | int32_t lj_str_cmp(GCstr *a, GCstr *b) | 24 | int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b) |
25 | { | 25 | { |
26 | MSize i, n = a->len > b->len ? b->len : a->len; | 26 | MSize i, n = a->len > b->len ? b->len : a->len; |
27 | for (i = 0; i < n; i += 4) { | 27 | for (i = 0; i < n; i += 4) { |
@@ -119,8 +119,14 @@ void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s) | |||
119 | 119 | ||
120 | /* -- Type conversions ---------------------------------------------------- */ | 120 | /* -- Type conversions ---------------------------------------------------- */ |
121 | 121 | ||
122 | /* Convert string object to number. */ | ||
123 | int LJ_FASTCALL lj_str_tonum(GCstr *str, TValue *n) | ||
124 | { | ||
125 | return lj_str_numconv(strdata(str), n); | ||
126 | } | ||
127 | |||
122 | /* Convert string to number. */ | 128 | /* Convert string to number. */ |
123 | int lj_str_numconv(const char *s, TValue *n) | 129 | int LJ_FASTCALL lj_str_numconv(const char *s, TValue *n) |
124 | { | 130 | { |
125 | lua_Number sign = 1; | 131 | lua_Number sign = 1; |
126 | const uint8_t *p = (const uint8_t *)s; | 132 | const uint8_t *p = (const uint8_t *)s; |
@@ -167,7 +173,7 @@ parsedbl: | |||
167 | } | 173 | } |
168 | 174 | ||
169 | /* Convert number to string. */ | 175 | /* Convert number to string. */ |
170 | GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np) | 176 | GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np) |
171 | { | 177 | { |
172 | char s[LUAI_MAXNUMBER2STR]; | 178 | char s[LUAI_MAXNUMBER2STR]; |
173 | lua_Number n = *np; | 179 | lua_Number n = *np; |
@@ -176,7 +182,7 @@ GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np) | |||
176 | } | 182 | } |
177 | 183 | ||
178 | /* Convert integer to string. */ | 184 | /* Convert integer to string. */ |
179 | GCstr *lj_str_fromint(lua_State *L, int32_t k) | 185 | GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k) |
180 | { | 186 | { |
181 | char s[1+10]; | 187 | char s[1+10]; |
182 | char *p = s+sizeof(s); | 188 | char *p = s+sizeof(s); |
diff --git a/src/lj_str.h b/src/lj_str.h index f7e56d16..e8b242c0 100644 --- a/src/lj_str.h +++ b/src/lj_str.h | |||
@@ -11,7 +11,7 @@ | |||
11 | #include "lj_obj.h" | 11 | #include "lj_obj.h" |
12 | 12 | ||
13 | /* String interning. */ | 13 | /* String interning. */ |
14 | LJ_FUNCA int32_t lj_str_cmp(GCstr *a, GCstr *b); | 14 | LJ_FUNC int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b); |
15 | LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask); | 15 | LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask); |
16 | LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len); | 16 | LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len); |
17 | LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s); | 17 | LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s); |
@@ -20,9 +20,10 @@ LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s); | |||
20 | #define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1)) | 20 | #define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1)) |
21 | 21 | ||
22 | /* Type conversions. */ | 22 | /* Type conversions. */ |
23 | LJ_FUNCA int lj_str_numconv(const char *s, TValue *n); | 23 | LJ_FUNC int LJ_FASTCALL lj_str_numconv(const char *s, TValue *n); |
24 | LJ_FUNCA GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np); | 24 | LJ_FUNC int LJ_FASTCALL lj_str_tonum(GCstr *str, TValue *n); |
25 | LJ_FUNCA GCstr *lj_str_fromint(lua_State *L, int32_t k); | 25 | LJ_FUNCA GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np); |
26 | LJ_FUNC GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k); | ||
26 | 27 | ||
27 | /* String formatting. */ | 28 | /* String formatting. */ |
28 | LJ_FUNC const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp); | 29 | LJ_FUNC const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp); |
diff --git a/src/lj_tab.c b/src/lj_tab.c index 9af51027..ceafb770 100644 --- a/src/lj_tab.c +++ b/src/lj_tab.c | |||
@@ -160,8 +160,16 @@ GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits) | |||
160 | return t; | 160 | return t; |
161 | } | 161 | } |
162 | 162 | ||
163 | GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize) | ||
164 | { | ||
165 | GCtab *t = newtab(L, ahsize & 0xffffff, ahsize >> 24); | ||
166 | clearapart(t); | ||
167 | if (t->hmask > 0) clearhpart(t); | ||
168 | return t; | ||
169 | } | ||
170 | |||
163 | /* Duplicate a table. */ | 171 | /* Duplicate a table. */ |
164 | GCtab *lj_tab_dup(lua_State *L, const GCtab *kt) | 172 | GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt) |
165 | { | 173 | { |
166 | GCtab *t; | 174 | GCtab *t; |
167 | uint32_t asize, hmask; | 175 | uint32_t asize, hmask; |
@@ -334,8 +342,8 @@ static uint32_t counthash(const GCtab *t, uint32_t *bins, uint32_t *narray) | |||
334 | static uint32_t bestasize(uint32_t bins[], uint32_t *narray) | 342 | static uint32_t bestasize(uint32_t bins[], uint32_t *narray) |
335 | { | 343 | { |
336 | uint32_t b, sum, na = 0, sz = 0, nn = *narray; | 344 | uint32_t b, sum, na = 0, sz = 0, nn = *narray; |
337 | for (b = 0, sum = 0; (1u<<b) <= nn && sum != nn; b++) | 345 | for (b = 0, sum = 0; 2*nn > (1u<<b) && sum != nn; b++) |
338 | if (bins[b] > 0 && (sum += bins[b]) >= (1u<<b)) { | 346 | if (bins[b] > 0 && 2*(sum += bins[b]) > (1u<<b)) { |
339 | sz = (2u<<b)+1; | 347 | sz = (2u<<b)+1; |
340 | na = sum; | 348 | na = sum; |
341 | } | 349 | } |
@@ -599,7 +607,7 @@ static MSize unbound_search(GCtab *t, MSize j) | |||
599 | ** Try to find a boundary in table `t'. A `boundary' is an integer index | 607 | ** Try to find a boundary in table `t'. A `boundary' is an integer index |
600 | ** such that t[i] is non-nil and t[i+1] is nil (and 0 if t[1] is nil). | 608 | ** such that t[i] is non-nil and t[i+1] is nil (and 0 if t[1] is nil). |
601 | */ | 609 | */ |
602 | MSize lj_tab_len(GCtab *t) | 610 | MSize LJ_FASTCALL lj_tab_len(GCtab *t) |
603 | { | 611 | { |
604 | MSize j = (MSize)t->asize; | 612 | MSize j = (MSize)t->asize; |
605 | if (j > 1 && tvisnil(arrayslot(t, j-1))) { | 613 | if (j > 1 && tvisnil(arrayslot(t, j-1))) { |
diff --git a/src/lj_tab.h b/src/lj_tab.h index e9e8bcd1..b2a8c3aa 100644 --- a/src/lj_tab.h +++ b/src/lj_tab.h | |||
@@ -11,7 +11,8 @@ | |||
11 | #define hsize2hbits(s) ((s) ? ((s)==1 ? 1 : 1+lj_fls((uint32_t)((s)-1))) : 0) | 11 | #define hsize2hbits(s) ((s) ? ((s)==1 ? 1 : 1+lj_fls((uint32_t)((s)-1))) : 0) |
12 | 12 | ||
13 | LJ_FUNCA GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits); | 13 | LJ_FUNCA GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits); |
14 | LJ_FUNCA GCtab *lj_tab_dup(lua_State *L, const GCtab *kt); | 14 | LJ_FUNC GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize); |
15 | LJ_FUNCA GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt); | ||
15 | LJ_FUNC void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t); | 16 | LJ_FUNC void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t); |
16 | LJ_FUNCA void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize); | 17 | LJ_FUNCA void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize); |
17 | 18 | ||
@@ -36,6 +37,6 @@ LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key); | |||
36 | (inarray((t), (key)) ? arrayslot((t), (key)) : lj_tab_setinth(L, (t), (key))) | 37 | (inarray((t), (key)) ? arrayslot((t), (key)) : lj_tab_setinth(L, (t), (key))) |
37 | 38 | ||
38 | LJ_FUNCA int lj_tab_next(lua_State *L, GCtab *t, TValue *key); | 39 | LJ_FUNCA int lj_tab_next(lua_State *L, GCtab *t, TValue *key); |
39 | LJ_FUNCA MSize lj_tab_len(GCtab *t); | 40 | LJ_FUNCA MSize LJ_FASTCALL lj_tab_len(GCtab *t); |
40 | 41 | ||
41 | #endif | 42 | #endif |
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h index 3ee4fa00..2fb3c4b8 100644 --- a/src/lj_target_x86.h +++ b/src/lj_target_x86.h | |||
@@ -32,6 +32,11 @@ enum { | |||
32 | 32 | ||
33 | /* Calling conventions. */ | 33 | /* Calling conventions. */ |
34 | RID_RET = RID_EAX, | 34 | RID_RET = RID_EAX, |
35 | #if LJ_64 | ||
36 | RID_FPRET = RID_XMM0, | ||
37 | #else | ||
38 | RID_RETHI = RID_EDX, | ||
39 | #endif | ||
35 | 40 | ||
36 | /* These definitions must match with the *.dasc file(s): */ | 41 | /* These definitions must match with the *.dasc file(s): */ |
37 | RID_BASE = RID_EDX, /* Interpreter BASE. */ | 42 | RID_BASE = RID_EDX, /* Interpreter BASE. */ |
@@ -98,8 +103,8 @@ enum { | |||
98 | }; | 103 | }; |
99 | 104 | ||
100 | /* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. */ | 105 | /* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. */ |
101 | #define sps_scale(slot) (4 * (int32_t)(slot)) | 106 | #define sps_scale(slot) (4 * (int32_t)(slot)) |
102 | #define sps_adjust(as) (sps_scale((as->evenspill-SPS_FIXED+3)&~3)) | 107 | #define sps_adjust(slot) (sps_scale(((slot)-SPS_FIXED+3)&~3)) |
103 | 108 | ||
104 | /* -- Exit state ---------------------------------------------------------- */ | 109 | /* -- Exit state ---------------------------------------------------------- */ |
105 | 110 | ||
@@ -185,6 +190,7 @@ typedef enum { | |||
185 | XO_ARITHib = XO_(80), | 190 | XO_ARITHib = XO_(80), |
186 | XO_ARITHi = XO_(81), | 191 | XO_ARITHi = XO_(81), |
187 | XO_ARITHi8 = XO_(83), | 192 | XO_ARITHi8 = XO_(83), |
193 | XO_ARITHiw8 = XO_66(83), | ||
188 | XO_SHIFTi = XO_(c1), | 194 | XO_SHIFTi = XO_(c1), |
189 | XO_SHIFT1 = XO_(d1), | 195 | XO_SHIFT1 = XO_(d1), |
190 | XO_SHIFTcl = XO_(d3), | 196 | XO_SHIFTcl = XO_(d3), |
@@ -216,6 +222,7 @@ typedef enum { | |||
216 | XO_CVTSI2SD = XO_f20f(2a), | 222 | XO_CVTSI2SD = XO_f20f(2a), |
217 | XO_CVTSD2SI = XO_f20f(2d), | 223 | XO_CVTSD2SI = XO_f20f(2d), |
218 | XO_CVTTSD2SI= XO_f20f(2c), | 224 | XO_CVTTSD2SI= XO_f20f(2c), |
225 | XO_MOVD = XO_660f(6e), | ||
219 | XO_MOVDto = XO_660f(7e), | 226 | XO_MOVDto = XO_660f(7e), |
220 | 227 | ||
221 | XO_FLDq = XO_(dd), XOg_FLDq = 0, | 228 | XO_FLDq = XO_(dd), XOg_FLDq = 0, |
diff --git a/src/lj_udata.c b/src/lj_udata.c index 863889c9..717d483b 100644 --- a/src/lj_udata.c +++ b/src/lj_udata.c | |||
@@ -16,6 +16,7 @@ GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env) | |||
16 | global_State *g = G(L); | 16 | global_State *g = G(L); |
17 | newwhite(g, ud); /* Not finalized. */ | 17 | newwhite(g, ud); /* Not finalized. */ |
18 | ud->gct = ~LJ_TUDATA; | 18 | ud->gct = ~LJ_TUDATA; |
19 | ud->udtype = UDTYPE_USERDATA; | ||
19 | ud->len = sz; | 20 | ud->len = sz; |
20 | /* NOBARRIER: The GCudata is new (marked white). */ | 21 | /* NOBARRIER: The GCudata is new (marked white). */ |
21 | setgcrefnull(ud->metatable); | 22 | setgcrefnull(ud->metatable); |