summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/api.html2
-rw-r--r--doc/changes.html25
-rw-r--r--doc/contact.html8
-rw-r--r--doc/luajit.html1
-rw-r--r--lib/bc.lua2
-rw-r--r--lib/dump.lua40
-rw-r--r--src/Makefile.dep15
-rw-r--r--src/buildvm.c14
-rw-r--r--src/buildvm.h1
-rw-r--r--src/buildvm_asm.c8
-rw-r--r--src/buildvm_fold.c4
-rw-r--r--src/buildvm_peobj.c14
-rw-r--r--src/buildvm_x86.dasc117
-rw-r--r--src/lib_base.c13
-rw-r--r--src/lib_io.c405
-rw-r--r--src/lib_math.c52
-rw-r--r--src/lib_string.c10
-rw-r--r--src/lj_alloc.c6
-rw-r--r--src/lj_api.c26
-rw-r--r--src/lj_asm.c598
-rw-r--r--src/lj_def.h1
-rw-r--r--src/lj_gc.c36
-rw-r--r--src/lj_gc.h6
-rw-r--r--src/lj_ir.c36
-rw-r--r--src/lj_ir.h114
-rw-r--r--src/lj_iropt.h6
-rw-r--r--src/lj_lib.c2
-rw-r--r--src/lj_lib.h5
-rw-r--r--src/lj_meta.c6
-rw-r--r--src/lj_obj.h23
-rw-r--r--src/lj_opt_fold.c127
-rw-r--r--src/lj_opt_loop.c6
-rw-r--r--src/lj_opt_mem.c31
-rw-r--r--src/lj_opt_narrow.c2
-rw-r--r--src/lj_parse.c1
-rw-r--r--src/lj_record.c125
-rw-r--r--src/lj_snap.c28
-rw-r--r--src/lj_state.h2
-rw-r--r--src/lj_str.c14
-rw-r--r--src/lj_str.h9
-rw-r--r--src/lj_tab.c16
-rw-r--r--src/lj_tab.h5
-rw-r--r--src/lj_target_x86.h11
-rw-r--r--src/lj_udata.c1
44 files changed, 1213 insertions, 761 deletions
diff --git a/doc/api.html b/doc/api.html
index 3bb10967..874f7ae9 100644
--- a/doc/api.html
+++ b/doc/api.html
@@ -319,7 +319,7 @@ enable it <b>after</b> running <tt>luaL_openlibs</tt>.
319</p> 319</p>
320<p> 320<p>
321LuaJIT already intercepts exception handling for systems using 321LuaJIT already intercepts exception handling for systems using
322ELF/DWARF2 stack unwinding (e.g. Linux). This is a zero-cost mechanism 322DWARF2 stack unwinding (e.g. Linux, OSX). This is a zero-cost mechanism
323and always enabled. You don't need to use any wrapper functions, 323and always enabled. You don't need to use any wrapper functions,
324except when you want to get a more specific error message than 324except when you want to get a more specific error message than
325<tt>"C++&nbsp;exception"</tt>. 325<tt>"C++&nbsp;exception"</tt>.
diff --git a/doc/changes.html b/doc/changes.html
index 641f1e28..d9a3aadd 100644
--- a/doc/changes.html
+++ b/doc/changes.html
@@ -48,10 +48,27 @@ The current <span style="color: #0000c0;">stable version</span> is <strong>LuaJI
48</p> 48</p>
49<p> 49<p>
50Please check the 50Please check the
51<a href="http://luajit.org/luajit_changes.html"><span class="ext">&raquo;</span>&nbsp;Online Change History</a> 51<a href="http://luajit.org/changes.html"><span class="ext">&raquo;</span>&nbsp;Online Change History</a>
52to see whether newer versions are available. 52to see whether newer versions are available.
53</p> 53</p>
54 54
55<div class="major" style="background: #d0d0d0;">
56<h2 id="snap">Development Snapshot</h2>
57<ul>
58<li>Add abstract C call handling to IR.</li>
59<li>Improve KNUM fuse vs. load heuristics.</li>
60<li>Drive the GC forward on string allocations in the parser.</li>
61<li>Compile various <tt>io.*()</tt> functions.</li>
62<li>Compile <tt>math.sinh()</tt>, <tt>math.cosh()</tt>, <tt>math.tanh()</tt>
63and <tt>math.random()</tt>.</li>
64<li>Fix <tt>lua_tocfunction()</tt>.</li>
65<li>Fix cutoff register in JMP bytecode for some conditional expressions.</li>
66<li>Fix PHI marking algorithm for references from variant slots.</li>
67<li>Fix <tt>package.cpath</tt> for non-default PREFIX.</li>
68<li>Fix DWARF2 frame unwind information for interpreter on OSX.</li>
69</ul>
70</div>
71
55<div class="major" style="background: #ffd0d0;"> 72<div class="major" style="background: #ffd0d0;">
56<h2 id="LuaJIT-2.0.0-beta2">LuaJIT 2.0.0-beta2 &mdash; 2009-11-09</h2> 73<h2 id="LuaJIT-2.0.0-beta2">LuaJIT 2.0.0-beta2 &mdash; 2009-11-09</h2>
57<ul> 74<ul>
@@ -59,14 +76,14 @@ to see whether newer versions are available.
59<li>Allow C++ exception conversion on all platforms 76<li>Allow C++ exception conversion on all platforms
60using a wrapper function.</li> 77using a wrapper function.</li>
61<li>Automatically catch C++ exceptions and rethrow Lua error 78<li>Automatically catch C++ exceptions and rethrow Lua error
62(ELF/DWARF2 only).</li> 79(DWARF2 only).</li>
63<li>Check for the correct x87 FPU precision at strategic points.</li> 80<li>Check for the correct x87 FPU precision at strategic points.</li>
64<li>Always use wrappers for libm functions.</li> 81<li>Always use wrappers for libm functions.</li>
65<li>Resurrect metamethod name strings before copying them.</li> 82<li>Resurrect metamethod name strings before copying them.</li>
66<li>Mark current trace, even if compiler is idle.</li> 83<li>Mark current trace, even if compiler is idle.</li>
67<li>Ensure FILE metatable is created only once.</li> 84<li>Ensure FILE metatable is created only once.</li>
68<li>Fix type comparisons when different integer types are involved.</li> 85<li>Fix type comparisons when different integer types are involved.</li>
69<li>Fix getmetatable() recording.</li> 86<li>Fix <tt>getmetatable()</tt> recording.</li>
70<li>Fix TDUP with dead keys in template table.</li> 87<li>Fix TDUP with dead keys in template table.</li>
71<li><tt>jit.flush(tr)</tt> returns status. 88<li><tt>jit.flush(tr)</tt> returns status.
72Prevent manual flush of a trace that's still linked.</li> 89Prevent manual flush of a trace that's still linked.</li>
@@ -234,7 +251,7 @@ on a separate line.</li>
234 251
235<li>Added SSE, SSE2, SSE3 and SSSE3 support to DynASM.</li> 252<li>Added SSE, SSE2, SSE3 and SSSE3 support to DynASM.</li>
236<li>Miscellaneous doc changes. Added a section about 253<li>Miscellaneous doc changes. Added a section about
237<a href="luajit_install.html#embedding">embedding LuaJIT</a>.</li> 254<a href="install.html#embedding">embedding LuaJIT</a>.</li>
238</ul> 255</ul>
239<p> 256<p>
240This release is in sync with Coco 1.1.0 (see the 257This release is in sync with Coco 1.1.0 (see the
diff --git a/doc/contact.html b/doc/contact.html
index 36d5a825..66d52410 100644
--- a/doc/contact.html
+++ b/doc/contact.html
@@ -46,17 +46,15 @@ You can also send any questions you have directly to me:
46 46
47<script type="text/javascript"> 47<script type="text/javascript">
48<!-- 48<!--
49var xS="@-: .0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZa<b>cdefghijklmnopqrstuvwxyz" 49var xS="@-:\" .0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ<abc>defghijklmnopqrstuvwxyz";function xD(s)
50function xD(s)
51{var len=s.length;var r="";for(var i=0;i<len;i++) 50{var len=s.length;var r="";for(var i=0;i<len;i++)
52{var c=s.charAt(i);var n=xS.indexOf(c);if(n!=-1) 51{var c=s.charAt(i);var n=xS.indexOf(c);if(n!=-1)c=xS.charAt(69-n);r+=c;}
53c=xS.charAt(66-n);r+=c;}
54document.write("<"+"p>"+r+"<"+"/p>\n");} 52document.write("<"+"p>"+r+"<"+"/p>\n");}
55//--> 53//-->
56</script> 54</script>
57<script type="text/javascript"> 55<script type="text/javascript">
58<!-- 56<!--
59xD("ewYKA7vu-EIwslx7 K9A.t41C") 57xD("fyZKB8xv\"FJytmz8.KAB0u52D")
60//--></script> 58//--></script>
61<noscript> 59<noscript>
62<p><img src="img/contact.png" alt="Contact info in image" width="170" height="13"> 60<p><img src="img/contact.png" alt="Contact info in image" width="170" height="13">
diff --git a/doc/luajit.html b/doc/luajit.html
index 9b16ea37..5a2b3689 100644
--- a/doc/luajit.html
+++ b/doc/luajit.html
@@ -8,6 +8,7 @@
8<meta name="Language" content="en"> 8<meta name="Language" content="en">
9<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> 9<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
10<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> 10<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
11<meta name="description" content="LuaJIT is a Just-In-Time (JIT) compiler for the Lua language.">
11</head> 12</head>
12<body> 13<body>
13<div id="site"> 14<div id="site">
diff --git a/lib/bc.lua b/lib/bc.lua
index 532f2493..6296174e 100644
--- a/lib/bc.lua
+++ b/lib/bc.lua
@@ -30,7 +30,7 @@
30-- print(bc.line(foo, 2)) --> 0002 KSTR 1 1 ; "hello" 30-- print(bc.line(foo, 2)) --> 0002 KSTR 1 1 ; "hello"
31-- 31--
32-- local out = { 32-- local out = {
33-- -- Do something wich each line: 33-- -- Do something with each line:
34-- write = function(t, ...) io.write(...) end, 34-- write = function(t, ...) io.write(...) end,
35-- close = function(t) end, 35-- close = function(t) end,
36-- flush = function(t) end, 36-- flush = function(t) end,
diff --git a/lib/dump.lua b/lib/dump.lua
index 9fde87c1..021fc1c9 100644
--- a/lib/dump.lua
+++ b/lib/dump.lua
@@ -144,7 +144,7 @@ local colortype_ansi = {
144 [0] = "%s", 144 [0] = "%s",
145 "%s", 145 "%s",
146 "%s", 146 "%s",
147 "%s", 147 "\027[36m%s\027[m",
148 "\027[32m%s\027[m", 148 "\027[32m%s\027[m",
149 "%s", 149 "%s",
150 "\027[1m%s\027[m", 150 "\027[1m%s\027[m",
@@ -199,9 +199,9 @@ margin-right: 2em;
199span.irt_str { color: #00a000; } 199span.irt_str { color: #00a000; }
200span.irt_thr, span.irt_fun { color: #404040; font-weight: bold; } 200span.irt_thr, span.irt_fun { color: #404040; font-weight: bold; }
201span.irt_tab { color: #c00000; } 201span.irt_tab { color: #c00000; }
202span.irt_udt { color: #00c0c0; } 202span.irt_udt, span.irt_lud { color: #00c0c0; }
203span.irt_num { color: #0000c0; } 203span.irt_num { color: #4040c0; }
204span.irt_int { color: #c000c0; } 204span.irt_int, span.irt_i8, span.irt_u8, span.irt_i16, span.irt_u16 { color: #b040b0; }
205</style> 205</style>
206]] 206]]
207 207
@@ -210,7 +210,7 @@ local colorize, irtype
210-- Lookup table to convert some literals into names. 210-- Lookup table to convert some literals into names.
211local litname = { 211local litname = {
212 ["SLOAD "] = { [0] = "", "I", "R", "RI", "P", "PI", "PR", "PRI", }, 212 ["SLOAD "] = { [0] = "", "I", "R", "RI", "P", "PI", "PR", "PRI", },
213 ["XLOAD "] = { [0] = "", "unaligned", }, 213 ["XLOAD "] = { [0] = "", "R", "U", "RU", },
214 ["TOINT "] = { [0] = "check", "index", "", }, 214 ["TOINT "] = { [0] = "check", "index", "", },
215 ["FLOAD "] = vmdef.irfield, 215 ["FLOAD "] = vmdef.irfield,
216 ["FREF "] = vmdef.irfield, 216 ["FREF "] = vmdef.irfield,
@@ -313,6 +313,27 @@ local function ridsp_name(ridsp)
313 return "" 313 return ""
314end 314end
315 315
316-- Recursively gather CALL* args and dump them.
317local function dumpcallargs(tr, ins)
318 if ins < 0 then
319 out:write(formatk(tr, ins))
320 else
321 local m, ot, op1, op2 = traceir(tr, ins)
322 local oidx = 6*shr(ot, 8)
323 local op = sub(vmdef.irnames, oidx+1, oidx+6)
324 if op == "CARG " then
325 dumpcallargs(tr, op1)
326 if op2 < 0 then
327 out:write(" ", formatk(tr, op2))
328 else
329 out:write(" ", format("%04d", op2))
330 end
331 else
332 out:write(format("%04d", ins))
333 end
334 end
335end
336
316-- Dump IR and interleaved snapshots. 337-- Dump IR and interleaved snapshots.
317local function dump_ir(tr, dumpsnap, dumpreg) 338local function dump_ir(tr, dumpsnap, dumpreg)
318 local info = traceinfo(tr) 339 local info = traceinfo(tr)
@@ -348,7 +369,8 @@ local function dump_ir(tr, dumpsnap, dumpreg)
348 else 369 else
349 out:write(format("%04d ------ LOOP ------------\n", ins)) 370 out:write(format("%04d ------ LOOP ------------\n", ins))
350 end 371 end
351 elseif op ~= "NOP " and (dumpreg or op ~= "RENAME") then 372 elseif op ~= "NOP " and op ~= "CARG " and
373 (dumpreg or op ~= "RENAME") then
352 if dumpreg then 374 if dumpreg then
353 out:write(format("%04d %-5s ", ins, ridsp_name(ridsp))) 375 out:write(format("%04d %-5s ", ins, ridsp_name(ridsp)))
354 else 376 else
@@ -359,7 +381,11 @@ local function dump_ir(tr, dumpsnap, dumpreg)
359 band(ot, 128) == 0 and " " or "+", 381 band(ot, 128) == 0 and " " or "+",
360 irtype[t], op)) 382 irtype[t], op))
361 local m1 = band(m, 3) 383 local m1 = band(m, 3)
362 if m1 ~= 3 then -- op1 != IRMnone 384 if sub(op, 1, 4) == "CALL" then
385 out:write(format("%-10s (", vmdef.ircall[op2]))
386 if op1 ~= -1 then dumpcallargs(tr, op1) end
387 out:write(")")
388 elseif m1 ~= 3 then -- op1 != IRMnone
363 if op1 < 0 then 389 if op1 < 0 then
364 out:write(formatk(tr, op1)) 390 out:write(formatk(tr, op1))
365 else 391 else
diff --git a/src/Makefile.dep b/src/Makefile.dep
index 1fb81e27..779ee545 100644
--- a/src/Makefile.dep
+++ b/src/Makefile.dep
@@ -21,8 +21,9 @@ lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
21 lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_libdef.h 21 lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_libdef.h
22lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h 22lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h
23lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ 23lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
24 lj_arch.h lj_err.h lj_errmsg.h lj_gc.h lj_ff.h lj_ffdef.h lj_lib.h \ 24 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_ff.h lj_ffdef.h \
25 lj_libdef.h 25 lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h lj_traceerr.h \
26 lj_lib.h lj_libdef.h
26lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h \ 27lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h \
27 lj_obj.h lj_def.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ir.h \ 28 lj_obj.h lj_def.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ir.h \
28 lj_jit.h lj_iropt.h lj_dispatch.h lj_bc.h lj_vm.h lj_vmevent.h lj_lib.h \ 29 lj_jit.h lj_iropt.h lj_dispatch.h lj_bc.h lj_vm.h lj_vmevent.h lj_lib.h \
@@ -45,9 +46,9 @@ lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
45 lj_state.h lj_frame.h lj_bc.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \ 46 lj_state.h lj_frame.h lj_bc.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
46 lj_traceerr.h lj_vm.h lj_lex.h lj_parse.h 47 lj_traceerr.h lj_vm.h lj_lex.h lj_parse.h
47lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 48lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
48 lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_mcode.h lj_trace.h \ 49 lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \
49 lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_asm.h lj_vm.h \ 50 lj_mcode.h lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_asm.h \
50 lj_target.h lj_target_x86.h 51 lj_vm.h lj_target.h lj_target_x86.h
51lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h 52lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h
52lj_ctype.o: lj_ctype.c lj_ctype.h lj_def.h lua.h luaconf.h 53lj_ctype.o: lj_ctype.c lj_ctype.h lj_def.h lua.h luaconf.h
53lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 54lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
@@ -67,8 +68,8 @@ lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
67 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_frame.h lj_bc.h lj_jit.h \ 68 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_frame.h lj_bc.h lj_jit.h \
68 lj_ir.h lj_dispatch.h 69 lj_ir.h lj_dispatch.h
69lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 70lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
70 lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h \ 71 lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \
71 lj_traceerr.h 72 lj_bc.h lj_traceerr.h lj_lib.h
72lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 73lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
73 lj_err.h lj_errmsg.h lj_str.h lj_lex.h lj_parse.h lj_ctype.h 74 lj_err.h lj_errmsg.h lj_str.h lj_lex.h lj_parse.h lj_ctype.h
74lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \ 75lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \
diff --git a/src/buildvm.c b/src/buildvm.c
index b3738db4..4aba39d4 100644
--- a/src/buildvm.c
+++ b/src/buildvm.c
@@ -215,12 +215,19 @@ IRFPMDEF(FPMNAME)
215}; 215};
216 216
217const char *const irfield_names[] = { 217const char *const irfield_names[] = {
218#define FLNAME(name, type, field) #name, 218#define FLNAME(name, ofs) #name,
219IRFLDEF(FLNAME) 219IRFLDEF(FLNAME)
220#undef FLNAME 220#undef FLNAME
221 NULL 221 NULL
222}; 222};
223 223
224const char *const ircall_names[] = {
225#define IRCALLNAME(name, nargs, kind, type, flags) #name,
226IRCALLDEF(IRCALLNAME)
227#undef IRCALLNAME
228 NULL
229};
230
224static const char *const trace_errors[] = { 231static const char *const trace_errors[] = {
225#define TREDEF(name, msg) msg, 232#define TREDEF(name, msg) msg,
226#include "lj_traceerr.h" 233#include "lj_traceerr.h"
@@ -269,6 +276,11 @@ static void emit_vmdef(BuildCtx *ctx)
269 } 276 }
270 fprintf(ctx->fp, "}\n\n"); 277 fprintf(ctx->fp, "}\n\n");
271 278
279 fprintf(ctx->fp, "ircall = {\n[0]=");
280 for (i = 0; ircall_names[i]; i++)
281 fprintf(ctx->fp, "\"%s\",\n", ircall_names[i]);
282 fprintf(ctx->fp, "}\n\n");
283
272 fprintf(ctx->fp, "traceerr = {\n[0]="); 284 fprintf(ctx->fp, "traceerr = {\n[0]=");
273 for (i = 0; trace_errors[i]; i++) 285 for (i = 0; trace_errors[i]; i++)
274 fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]); 286 fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]);
diff --git a/src/buildvm.h b/src/buildvm.h
index e55527fd..53c820ad 100644
--- a/src/buildvm.h
+++ b/src/buildvm.h
@@ -102,5 +102,6 @@ extern const char *const bc_names[];
102extern const char *const ir_names[]; 102extern const char *const ir_names[];
103extern const char *const irfpm_names[]; 103extern const char *const irfpm_names[];
104extern const char *const irfield_names[]; 104extern const char *const irfield_names[];
105extern const char *const ircall_names[];
105 106
106#endif 107#endif
diff --git a/src/buildvm_asm.c b/src/buildvm_asm.c
index 5daab13b..31b6f61e 100644
--- a/src/buildvm_asm.c
+++ b/src/buildvm_asm.c
@@ -26,6 +26,14 @@ static void emit_asm_bytes(BuildCtx *ctx, uint8_t *p, int n)
26static void emit_asm_reloc(BuildCtx *ctx, BuildReloc *r) 26static void emit_asm_reloc(BuildCtx *ctx, BuildReloc *r)
27{ 27{
28 const char *sym = ctx->extnames[r->sym]; 28 const char *sym = ctx->extnames[r->sym];
29 const char *p = strchr(sym, '@');
30 char buf[80];
31 if (p) {
32 /* Always strip fastcall suffix. Wrong for (unused) COFF on Win32. */
33 strncpy(buf, sym, p-sym);
34 buf[p-sym] = '\0';
35 sym = buf;
36 }
29 switch (ctx->mode) { 37 switch (ctx->mode) {
30 case BUILD_elfasm: 38 case BUILD_elfasm:
31 if (r->type) 39 if (r->type)
diff --git a/src/buildvm_fold.c b/src/buildvm_fold.c
index 271118e0..77af3dc5 100644
--- a/src/buildvm_fold.c
+++ b/src/buildvm_fold.c
@@ -107,6 +107,10 @@ static uint32_t nexttoken(char **pp, int allowlit, int allowany)
107 for (i = 0; irfield_names[i]; i++) 107 for (i = 0; irfield_names[i]; i++)
108 if (!strcmp(irfield_names[i], p+5)) 108 if (!strcmp(irfield_names[i], p+5))
109 return i; 109 return i;
110 } else if (allowlit && !strncmp(p, "IRCALL_", 7)) {
111 for (i = 0; ircall_names[i]; i++)
112 if (!strcmp(ircall_names[i], p+7))
113 return i;
110 } else if (allowany && !strcmp("any", p)) { 114 } else if (allowany && !strcmp("any", p)) {
111 return 0xff; 115 return 0xff;
112 } else { 116 } else {
diff --git a/src/buildvm_peobj.c b/src/buildvm_peobj.c
index 1a8661bf..a24ae727 100644
--- a/src/buildvm_peobj.c
+++ b/src/buildvm_peobj.c
@@ -85,6 +85,7 @@ typedef struct PEsymaux {
85#define PEOBJ_RELOC_REL32 0x14 /* MS: REL32, GNU: DISP32. */ 85#define PEOBJ_RELOC_REL32 0x14 /* MS: REL32, GNU: DISP32. */
86#define PEOBJ_RELOC_DIR32 0x06 86#define PEOBJ_RELOC_DIR32 0x06
87#define PEOBJ_SYM_PREFIX "_" 87#define PEOBJ_SYM_PREFIX "_"
88#define PEOBJ_SYMF_PREFIX "@"
88#elif LJ_TARGET_X64 89#elif LJ_TARGET_X64
89#define PEOBJ_ARCH_TARGET 0x8664 90#define PEOBJ_ARCH_TARGET 0x8664
90#define PEOBJ_RELOC_REL32 0x04 /* MS: REL32, GNU: DISP32. */ 91#define PEOBJ_RELOC_REL32 0x04 /* MS: REL32, GNU: DISP32. */
@@ -260,7 +261,18 @@ void emit_peobj(BuildCtx *ctx)
260 261
261 emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_TEXT); 262 emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_TEXT);
262 for (i = 0; ctx->extnames[i]; i++) { 263 for (i = 0; ctx->extnames[i]; i++) {
263 sprintf(name, PEOBJ_SYM_PREFIX "%s", ctx->extnames[i]); 264 const char *sym = ctx->extnames[i];
265 const char *p = strchr(sym, '@');
266 if (p) {
267#ifdef PEOBJ_SYMF_PREFIX
268 sprintf(name, PEOBJ_SYMF_PREFIX "%s", sym);
269#else
270 strncpy(name, sym, p-sym);
271 name[p-sym] = '\0';
272#endif
273 } else {
274 sprintf(name, PEOBJ_SYM_PREFIX "%s", sym);
275 }
264 emit_peobj_sym(ctx, name, 0, 276 emit_peobj_sym(ctx, name, 0,
265 PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN); 277 PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN);
266 } 278 }
diff --git a/src/buildvm_x86.dasc b/src/buildvm_x86.dasc
index 09cfa6dc..e857a6be 100644
--- a/src/buildvm_x86.dasc
+++ b/src/buildvm_x86.dasc
@@ -30,6 +30,9 @@
30|.define RD, RC 30|.define RD, RC
31|.define RDL, RCL 31|.define RDL, RCL
32| 32|
33|.define FCARG1, ecx // Fastcall arguments.
34|.define FCARG2, edx
35|
33|// Type definitions. Some of these are only used for documentation. 36|// Type definitions. Some of these are only used for documentation.
34|.type L, lua_State 37|.type L, lua_State
35|.type GL, global_State 38|.type GL, global_State
@@ -1066,7 +1069,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov)
1066 | mov RB, LJ_TNUMX 1069 | mov RB, LJ_TNUMX
1067 |7: 1070 |7:
1068 | not RB 1071 | not RB
1069 | mov TAB:RB, [DISPATCH+RB*4+DISPATCH_GL(basemt)] 1072 | mov TAB:RB, [DISPATCH+RB*4+DISPATCH_GL(gcroot[GCROOT_BASEMT])]
1070 | jmp <2 1073 | jmp <2
1071 | 1074 |
1072 |.ffunc_2 setmetatable 1075 |.ffunc_2 setmetatable
@@ -1126,17 +1129,17 @@ static void build_subroutines(BuildCtx *ctx, int cmov)
1126 | jmp ->fff_res1 1129 | jmp ->fff_res1
1127 |3: // Handle numbers inline, unless a number base metatable is present. 1130 |3: // Handle numbers inline, unless a number base metatable is present.
1128 | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback 1131 | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback
1129 | cmp dword [DISPATCH+DISPATCH_GL(basemt)+4*(~LJ_TNUMX)], 0 1132 | cmp dword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0
1130 | jne ->fff_fallback 1133 | jne ->fff_fallback
1131 | ffgccheck // Caveat: uses label 1. 1134 | ffgccheck // Caveat: uses label 1.
1132 | mov L:RB, SAVE_L 1135 | mov L:RB, SAVE_L
1133 | mov ARG1, L:RB
1134 | mov ARG2, RA
1135 | mov L:RB->base, RA // Add frame since C call can throw. 1136 | mov L:RB->base, RA // Add frame since C call can throw.
1136 | mov [RA-4], PC 1137 | mov [RA-4], PC
1137 | mov SAVE_PC, PC // Redundant (but a defined value). 1138 | mov SAVE_PC, PC // Redundant (but a defined value).
1138 | mov ARG3, BASE // Save BASE. 1139 | mov ARG3, BASE // Save BASE.
1139 | call extern lj_str_fromnum // (lua_State *L, lua_Number *np) 1140 | mov FCARG2, RA // Caveat: FCARG2 == BASE
1141 | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA
1142 | call extern lj_str_fromnum@8 // (lua_State *L, lua_Number *np)
1140 | // GCstr returned in eax (RC). 1143 | // GCstr returned in eax (RC).
1141 | mov RA, L:RB->base 1144 | mov RA, L:RB->base
1142 | mov BASE, ARG3 1145 | mov BASE, ARG3
@@ -1762,11 +1765,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov)
1762 | 1765 |
1763 |.ffunc_1 table_getn 1766 |.ffunc_1 table_getn
1764 | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback 1767 | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback
1765 | mov TAB:RB, [RA] 1768 | mov ARG2, BASE // Save RA and BASE.
1766 | mov ARG1, TAB:RB 1769 | mov RB, RA
1767 | mov RB, RA // Save RA and BASE. 1770 | mov TAB:FCARG1, [RA] // Caveat: FCARG1 == RA
1768 | mov ARG2, BASE 1771 | call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t)
1769 | call extern lj_tab_len // (GCtab *t)
1770 | // Length of table returned in eax (RC). 1772 | // Length of table returned in eax (RC).
1771 | mov ARG1, RC 1773 | mov ARG1, RC
1772 | mov RA, RB // Restore RA and BASE. 1774 | mov RA, RB // Restore RA and BASE.
@@ -2512,10 +2514,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov)
2512 | ins_next 2514 | ins_next
2513 |2: 2515 |2:
2514 | checktab RD, ->vmeta_len 2516 | checktab RD, ->vmeta_len
2515 | mov TAB:RD, [BASE+RD*8] 2517 | mov TAB:FCARG1, [BASE+RD*8]
2516 | mov ARG1, TAB:RD
2517 | mov RB, BASE // Save BASE. 2518 | mov RB, BASE // Save BASE.
2518 | call extern lj_tab_len // (GCtab *t) 2519 | call extern lj_tab_len@4 // (GCtab *t)
2519 | // Length of table returned in eax (RC). 2520 | // Length of table returned in eax (RC).
2520 | mov ARG1, RC 2521 | mov ARG1, RC
2521 | mov BASE, RB // Restore BASE. 2522 | mov BASE, RB // Restore BASE.
@@ -2665,66 +2666,63 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov)
2665 | ins_next 2666 | ins_next
2666 break; 2667 break;
2667 case BC_USETV: 2668 case BC_USETV:
2669#define TV2MARKOFS \
2670 ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))
2668 | ins_AD // RA = upvalue #, RD = src 2671 | ins_AD // RA = upvalue #, RD = src
2669 | // Really ugly code due to the lack of a 4th free register.
2670 | mov LFUNC:RB, [BASE-8] 2672 | mov LFUNC:RB, [BASE-8]
2671 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] 2673 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
2672 | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv) 2674 | cmp byte UPVAL:RB->closed, 0
2673 | jnz >4
2674 |1:
2675 | mov RA, [BASE+RD*8]
2676 |2:
2677 | mov RB, UPVAL:RB->v 2675 | mov RB, UPVAL:RB->v
2676 | mov RA, [BASE+RD*8]
2678 | mov RD, [BASE+RD*8+4] 2677 | mov RD, [BASE+RD*8+4]
2679 | mov [RB], RA 2678 | mov [RB], RA
2680 | mov [RB+4], RD 2679 | mov [RB+4], RD
2681 |3: 2680 | jz >1
2681 | // Check barrier for closed upvalue.
2682 | test byte [RB+TV2MARKOFS], LJ_GC_BLACK // isblack(uv)
2683 | jnz >2
2684 |1:
2682 | ins_next 2685 | ins_next
2683 | 2686 |
2684 |4: // Upvalue is black. Check if new value is collectable and white. 2687 |2: // Upvalue is black. Check if new value is collectable and white.
2685 | mov RA, [BASE+RD*8+4] 2688 | sub RD, LJ_TISGCV
2686 | sub RA, LJ_TISGCV 2689 | cmp RD, LJ_TISNUM - LJ_TISGCV // tvisgcv(v)
2687 | cmp RA, LJ_TISNUM - LJ_TISGCV // tvisgcv(v)
2688 | jbe <1 2690 | jbe <1
2689 | mov GCOBJ:RA, [BASE+RD*8]
2690 | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v) 2691 | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v)
2691 | jz <2 2692 | jz <1
2692 | // Crossed a write barrier. So move the barrier forward. 2693 | // Crossed a write barrier. Move the barrier forward.
2693 | mov ARG2, UPVAL:RB 2694 | xchg FCARG2, RB // Save BASE (FCARG2 == BASE).
2694 | mov ARG3, GCOBJ:RA 2695 | lea GL:FCARG1, [DISPATCH+GG_DISP2G]
2695 | mov RB, UPVAL:RB->v 2696 | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv)
2696 | mov RD, [BASE+RD*8+4] 2697 | mov BASE, RB // Restore BASE.
2697 | mov [RB], GCOBJ:RA 2698 | jmp <1
2698 | mov [RB+4], RD
2699 |->BC_USETV_Z:
2700 | mov L:RB, SAVE_L
2701 | lea GL:RA, [DISPATCH+GG_DISP2G]
2702 | mov L:RB->base, BASE
2703 | mov ARG1, GL:RA
2704 | call extern lj_gc_barrieruv // (global_State *g, GCobj *o, GCobj *v)
2705 | mov BASE, L:RB->base
2706 | jmp <3
2707 break; 2699 break;
2700#undef TV2MARKOFS
2708 case BC_USETS: 2701 case BC_USETS:
2709 | ins_AND // RA = upvalue #, RD = str const (~) 2702 | ins_AND // RA = upvalue #, RD = str const (~)
2710 | mov LFUNC:RB, [BASE-8] 2703 | mov LFUNC:RB, [BASE-8]
2711 | mov GCOBJ:RD, [KBASE+RD*4]
2712 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] 2704 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
2713 | mov RA, UPVAL:RB->v 2705 | mov GCOBJ:RA, [KBASE+RD*4]
2714 | mov dword [RA+4], LJ_TSTR 2706 | mov RD, UPVAL:RB->v
2715 | mov [RA], GCOBJ:RD 2707 | mov [RD], GCOBJ:RA
2708 | mov dword [RD+4], LJ_TSTR
2716 | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv) 2709 | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
2717 | jnz >2 2710 | jnz >2
2718 |1: 2711 |1:
2719 | ins_next 2712 | ins_next
2720 | 2713 |
2721 |2: // Upvalue is black. Check if string is white. 2714 |2: // Check if string is white and ensure upvalue is closed.
2722 | test byte GCOBJ:RD->gch.marked, LJ_GC_WHITES // iswhite(str) 2715 | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str)
2723 | jz <1 2716 | jz <1
2724 | // Crossed a write barrier. So move the barrier forward. 2717 | cmp byte UPVAL:RB->closed, 0
2725 | mov ARG3, GCOBJ:RD 2718 | jz <1
2726 | mov ARG2, UPVAL:RB 2719 | // Crossed a write barrier. Move the barrier forward.
2727 | jmp ->BC_USETV_Z 2720 | mov RB, BASE // Save BASE (FCARG2 == BASE).
2721 | mov FCARG2, RD
2722 | lea GL:FCARG1, [DISPATCH+GG_DISP2G]
2723 | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv)
2724 | mov BASE, RB // Restore BASE.
2725 | jmp <1
2728 break; 2726 break;
2729 case BC_USETN: 2727 case BC_USETN:
2730 | ins_AD // RA = upvalue #, RD = num const 2728 | ins_AD // RA = upvalue #, RD = num const
@@ -2808,23 +2806,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov)
2808 | mov dword [BASE+RA*8+4], LJ_TTAB 2806 | mov dword [BASE+RA*8+4], LJ_TTAB
2809 | ins_next 2807 | ins_next
2810 |2: 2808 |2:
2811 | call extern lj_gc_step_fixtop // (lua_State *L) 2809 | mov L:FCARG1, L:RB
2812 | mov ARG1, L:RB // Args owned by callee. Set it again. 2810 | call extern lj_gc_step_fixtop@4 // (lua_State *L)
2813 | jmp <1 2811 | jmp <1
2814 break; 2812 break;
2815 case BC_TDUP: 2813 case BC_TDUP:
2816 | ins_AND // RA = dst, RD = table const (~) (holding template table) 2814 | ins_AND // RA = dst, RD = table const (~) (holding template table)
2817 | mov TAB:RD, [KBASE+RD*4]
2818 | mov L:RB, SAVE_L 2815 | mov L:RB, SAVE_L
2819 | mov ARG2, TAB:RD
2820 | mov ARG1, L:RB
2821 | mov RA, [DISPATCH+DISPATCH_GL(gc.total)] 2816 | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
2822 | mov SAVE_PC, PC 2817 | mov SAVE_PC, PC
2823 | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)] 2818 | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
2824 | mov L:RB->base, BASE 2819 | mov L:RB->base, BASE
2825 | jae >3 2820 | jae >3
2826 |2: 2821 |2:
2827 | call extern lj_tab_dup // (lua_State *L, Table *kt) 2822 | mov TAB:FCARG2, [KBASE+RD*4] // Caveat: FCARG2 == BASE
2823 | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA
2824 | call extern lj_tab_dup@8 // (lua_State *L, Table *kt)
2828 | // Table * returned in eax (RC). 2825 | // Table * returned in eax (RC).
2829 | mov BASE, L:RB->base 2826 | mov BASE, L:RB->base
2830 | movzx RA, PC_RA 2827 | movzx RA, PC_RA
@@ -2832,8 +2829,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov)
2832 | mov dword [BASE+RA*8+4], LJ_TTAB 2829 | mov dword [BASE+RA*8+4], LJ_TTAB
2833 | ins_next 2830 | ins_next
2834 |3: 2831 |3:
2835 | call extern lj_gc_step_fixtop // (lua_State *L) 2832 | mov L:FCARG1, L:RB
2836 | mov ARG1, L:RB // Args owned by callee. Set it again. 2833 | call extern lj_gc_step_fixtop@4 // (lua_State *L)
2834 | movzx RD, PC_RD // Need to reload RD.
2835 | not RD
2837 | jmp <2 2836 | jmp <2
2838 break; 2837 break;
2839 2838
diff --git a/src/lib_base.c b/src/lib_base.c
index 6b9e8eef..821c81b4 100644
--- a/src/lib_base.c
+++ b/src/lib_base.c
@@ -183,7 +183,7 @@ LJLIB_ASM(tonumber) LJLIB_REC(.)
183 int32_t base = lj_lib_optint(L, 2, 10); 183 int32_t base = lj_lib_optint(L, 2, 10);
184 if (base == 10) { 184 if (base == 10) {
185 TValue *o = lj_lib_checkany(L, 1); 185 TValue *o = lj_lib_checkany(L, 1);
186 if (tvisnum(o) || (tvisstr(o) && lj_str_numconv(strVdata(o), o))) { 186 if (tvisnum(o) || (tvisstr(o) && lj_str_tonum(strV(o), o))) {
187 setnumV(L->base-1, numV(o)); 187 setnumV(L->base-1, numV(o));
188 return FFH_RES(1); 188 return FFH_RES(1);
189 } 189 }
@@ -206,6 +206,9 @@ LJLIB_ASM(tonumber) LJLIB_REC(.)
206 return FFH_RES(1); 206 return FFH_RES(1);
207} 207}
208 208
209LJLIB_PUSH("nil")
210LJLIB_PUSH("false")
211LJLIB_PUSH("true")
209LJLIB_ASM(tostring) LJLIB_REC(.) 212LJLIB_ASM(tostring) LJLIB_REC(.)
210{ 213{
211 TValue *o = lj_lib_checkany(L, 1); 214 TValue *o = lj_lib_checkany(L, 1);
@@ -218,12 +221,8 @@ LJLIB_ASM(tostring) LJLIB_REC(.)
218 GCstr *s; 221 GCstr *s;
219 if (tvisnum(o)) { 222 if (tvisnum(o)) {
220 s = lj_str_fromnum(L, &o->n); 223 s = lj_str_fromnum(L, &o->n);
221 } else if (tvisnil(o)) { 224 } else if (tvispri(o)) {
222 s = lj_str_newlit(L, "nil"); 225 s = strV(lj_lib_upvalue(L, -itype(o)));
223 } else if (tvisfalse(o)) {
224 s = lj_str_newlit(L, "false");
225 } else if (tvistrue(o)) {
226 s = lj_str_newlit(L, "true");
227 } else { 226 } else {
228 if (tvisfunc(o) && isffunc(funcV(o))) 227 if (tvisfunc(o) && isffunc(funcV(o)))
229 lua_pushfstring(L, "function: fast#%d", funcV(o)->c.ffid); 228 lua_pushfstring(L, "function: fast#%d", funcV(o)->c.ffid);
diff --git a/src/lib_io.c b/src/lib_io.c
index aefe4213..d69b99a4 100644
--- a/src/lib_io.c
+++ b/src/lib_io.c
@@ -17,14 +17,28 @@
17#include "lualib.h" 17#include "lualib.h"
18 18
19#include "lj_obj.h" 19#include "lj_obj.h"
20#include "lj_err.h"
21#include "lj_gc.h" 20#include "lj_gc.h"
21#include "lj_err.h"
22#include "lj_str.h"
22#include "lj_ff.h" 23#include "lj_ff.h"
24#include "lj_trace.h"
23#include "lj_lib.h" 25#include "lj_lib.h"
24 26
25/* Index of standard handles in function environment. */ 27/* Userdata payload for I/O file. */
26#define IO_INPUT 1 28typedef struct IOFileUD {
27#define IO_OUTPUT 2 29 FILE *fp; /* File handle. */
30 uint32_t type; /* File type. */
31} IOFileUD;
32
33#define IOFILE_TYPE_FILE 0 /* Regular file. */
34#define IOFILE_TYPE_PIPE 1 /* Pipe. */
35#define IOFILE_TYPE_STDF 2 /* Standard file handle. */
36#define IOFILE_TYPE_MASK 3
37
38#define IOFILE_FLAG_CLOSE 4 /* Close after io.lines() iterator. */
39
40#define IOSTDF_UD(L, id) (&gcref(G(L)->gcroot[(id)])->ud)
41#define IOSTDF_IOF(L, id) ((IOFileUD *)uddata(IOSTDF_UD(L, (id))))
28 42
29/* -- Error handling ------------------------------------------------------ */ 43/* -- Error handling ------------------------------------------------------ */
30 44
@@ -35,95 +49,102 @@ static int io_pushresult(lua_State *L, int ok, const char *fname)
35 return 1; 49 return 1;
36 } else { 50 } else {
37 int en = errno; /* Lua API calls may change this value. */ 51 int en = errno; /* Lua API calls may change this value. */
38 lua_pushnil(L); 52 setnilV(L->top++);
39 if (fname) 53 if (fname)
40 lua_pushfstring(L, "%s: %s", fname, strerror(en)); 54 lua_pushfstring(L, "%s: %s", fname, strerror(en));
41 else 55 else
42 lua_pushfstring(L, "%s", strerror(en)); 56 lua_pushfstring(L, "%s", strerror(en));
43 lua_pushinteger(L, en); 57 setintV(L->top++, en);
58 lj_trace_abort(G(L));
44 return 3; 59 return 3;
45 } 60 }
46} 61}
47 62
48static void io_file_error(lua_State *L, int arg, const char *fname) 63/* -- Open/close helpers -------------------------------------------------- */
64
65static IOFileUD *io_tofilep(lua_State *L)
49{ 66{
50 lua_pushfstring(L, "%s: %s", fname, strerror(errno)); 67 if (!(L->base < L->top && tvisudata(L->base) &&
51 luaL_argerror(L, arg, lua_tostring(L, -1)); 68 udataV(L->base)->udtype == UDTYPE_IO_FILE))
69 lj_err_argtype(L, 1, "FILE*");
70 return (IOFileUD *)uddata(udataV(L->base));
52} 71}
53 72
54/* -- Open helpers -------------------------------------------------------- */ 73static IOFileUD *io_tofile(lua_State *L)
55
56#define io_tofilep(L) ((FILE **)luaL_checkudata(L, 1, LUA_FILEHANDLE))
57
58static FILE *io_tofile(lua_State *L)
59{ 74{
60 FILE **f = io_tofilep(L); 75 IOFileUD *iof = io_tofilep(L);
61 if (*f == NULL) 76 if (iof->fp == NULL)
62 lj_err_caller(L, LJ_ERR_IOCLFL); 77 lj_err_caller(L, LJ_ERR_IOCLFL);
63 return *f; 78 return iof;
64} 79}
65 80
66static FILE **io_file_new(lua_State *L) 81static FILE *io_stdfile(lua_State *L, ptrdiff_t id)
67{ 82{
68 FILE **pf = (FILE **)lua_newuserdata(L, sizeof(FILE *)); 83 IOFileUD *iof = IOSTDF_IOF(L, id);
69 *pf = NULL; 84 if (iof->fp == NULL)
70 luaL_getmetatable(L, LUA_FILEHANDLE); 85 lj_err_caller(L, LJ_ERR_IOSTDCL);
71 lua_setmetatable(L, -2); 86 return iof->fp;
72 return pf;
73} 87}
74 88
75/* -- Close helpers ------------------------------------------------------- */ 89static IOFileUD *io_file_new(lua_State *L)
90{
91 IOFileUD *iof = (IOFileUD *)lua_newuserdata(L, sizeof(IOFileUD));
92 GCudata *ud = udataV(L->top-1);
93 ud->udtype = UDTYPE_IO_FILE;
94 /* NOBARRIER: The GCudata is new (marked white). */
95 setgcrefr(ud->metatable, curr_func(L)->c.env);
96 iof->fp = NULL;
97 iof->type = IOFILE_TYPE_FILE;
98 return iof;
99}
76 100
77static int lj_cf_io_std_close(lua_State *L) 101static IOFileUD *io_file_open(lua_State *L, const char *mode)
78{ 102{
79 lua_pushnil(L); 103 const char *fname = strdata(lj_lib_checkstr(L, 1));
80 lua_pushliteral(L, "cannot close standard file"); 104 IOFileUD *iof = io_file_new(L);
81 return 2; 105 iof->fp = fopen(fname, mode);
106 if (iof->fp == NULL)
107 luaL_argerror(L, 1, lj_str_pushf(L, "%s: %s", fname, strerror(errno)));
108 return iof;
82} 109}
83 110
84static int lj_cf_io_pipe_close(lua_State *L) 111static int io_file_close(lua_State *L, IOFileUD *iof)
85{ 112{
86 FILE **p = io_tofilep(L); 113 int ok;
114 if ((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_FILE) {
115 ok = (fclose(iof->fp) == 0);
116 } else if ((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_PIPE) {
87#if defined(LUA_USE_POSIX) 117#if defined(LUA_USE_POSIX)
88 int ok = (pclose(*p) != -1); 118 ok = (pclose(iof->fp) != -1);
89#elif defined(LUA_USE_WIN) 119#elif defined(LUA_USE_WIN)
90 int ok = (_pclose(*p) != -1); 120 ok = (_pclose(iof->fp) != -1);
91#else 121#else
92 int ok = 0; 122 ok = 0;
93#endif 123#endif
94 *p = NULL; 124 } else {
95 return io_pushresult(L, ok, NULL); 125 lua_assert((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_STDF);
96} 126 setnilV(L->top++);
97 127 lua_pushliteral(L, "cannot close standard file");
98static int lj_cf_io_file_close(lua_State *L) 128 return 2;
99{ 129 }
100 FILE **p = io_tofilep(L); 130 iof->fp = NULL;
101 int ok = (fclose(*p) == 0);
102 *p = NULL;
103 return io_pushresult(L, ok, NULL); 131 return io_pushresult(L, ok, NULL);
104} 132}
105 133
106static int io_file_close(lua_State *L)
107{
108 lua_getfenv(L, 1);
109 lua_getfield(L, -1, "__close");
110 return (lua_tocfunction(L, -1))(L);
111}
112
113/* -- Read/write helpers -------------------------------------------------- */ 134/* -- Read/write helpers -------------------------------------------------- */
114 135
115static int io_file_readnum(lua_State *L, FILE *fp) 136static int io_file_readnum(lua_State *L, FILE *fp)
116{ 137{
117 lua_Number d; 138 lua_Number d;
118 if (fscanf(fp, LUA_NUMBER_SCAN, &d) == 1) { 139 if (fscanf(fp, LUA_NUMBER_SCAN, &d) == 1) {
119 lua_pushnumber(L, d); 140 setnumV(L->top++, d);
120 return 1; 141 return 1;
121 } else { 142 } else {
122 return 0; /* read fails */ 143 return 0;
123 } 144 }
124} 145}
125 146
126static int test_eof(lua_State *L, FILE *fp) 147static int io_file_testeof(lua_State *L, FILE *fp)
127{ 148{
128 int c = getc(fp); 149 int c = getc(fp);
129 ungetc(c, fp); 150 ungetc(c, fp);
@@ -168,7 +189,7 @@ static int io_file_readchars(lua_State *L, FILE *fp, size_t n)
168 n -= nr; /* still have to read `n' chars */ 189 n -= nr; /* still have to read `n' chars */
169 } while (n > 0 && nr == rlen); /* until end of count or eof */ 190 } while (n > 0 && nr == rlen); /* until end of count or eof */
170 luaL_pushresult(&b); /* close buffer */ 191 luaL_pushresult(&b); /* close buffer */
171 return (n == 0 || lua_objlen(L, -1) > 0); 192 return (n == 0 || strV(L->top-1)->len > 0);
172} 193}
173 194
174static int io_file_read(lua_State *L, FILE *fp, int start) 195static int io_file_read(lua_State *L, FILE *fp, int start)
@@ -197,7 +218,7 @@ static int io_file_read(lua_State *L, FILE *fp, int start)
197 lj_err_arg(L, n+1, LJ_ERR_INVFMT); 218 lj_err_arg(L, n+1, LJ_ERR_INVFMT);
198 } else if (tvisnum(L->base+n)) { 219 } else if (tvisnum(L->base+n)) {
199 size_t len = (size_t)lj_lib_checkint(L, n+1); 220 size_t len = (size_t)lj_lib_checkint(L, n+1);
200 ok = len ? io_file_readchars(L, fp, len) : test_eof(L, fp); 221 ok = len ? io_file_readchars(L, fp, len) : io_file_testeof(L, fp);
201 } else { 222 } else {
202 lj_err_arg(L, n+1, LJ_ERR_INVOPT); 223 lj_err_arg(L, n+1, LJ_ERR_INVOPT);
203 } 224 }
@@ -233,30 +254,29 @@ static int io_file_write(lua_State *L, FILE *fp, int start)
233 254
234LJLIB_CF(io_method_close) 255LJLIB_CF(io_method_close)
235{ 256{
236 if (lua_isnone(L, 1)) 257 IOFileUD *iof = L->base < L->top ? io_tofile(L) :
237 lua_rawgeti(L, LUA_ENVIRONINDEX, IO_OUTPUT); 258 IOSTDF_IOF(L, GCROOT_IO_OUTPUT);
238 io_tofile(L); 259 return io_file_close(L, iof);
239 return io_file_close(L);
240} 260}
241 261
242LJLIB_CF(io_method_read) 262LJLIB_CF(io_method_read)
243{ 263{
244 return io_file_read(L, io_tofile(L), 1); 264 return io_file_read(L, io_tofile(L)->fp, 1);
245} 265}
246 266
247LJLIB_CF(io_method_write) 267LJLIB_CF(io_method_write) LJLIB_REC(io_write 0)
248{ 268{
249 return io_file_write(L, io_tofile(L), 1); 269 return io_file_write(L, io_tofile(L)->fp, 1);
250} 270}
251 271
252LJLIB_CF(io_method_flush) 272LJLIB_CF(io_method_flush) LJLIB_REC(io_flush 0)
253{ 273{
254 return io_pushresult(L, fflush(io_tofile(L)) == 0, NULL); 274 return io_pushresult(L, fflush(io_tofile(L)->fp) == 0, NULL);
255} 275}
256 276
257LJLIB_CF(io_method_seek) 277LJLIB_CF(io_method_seek)
258{ 278{
259 FILE *fp = io_tofile(L); 279 FILE *fp = io_tofile(L)->fp;
260 int opt = lj_lib_checkopt(L, 2, 1, "\3set\3cur\3end"); 280 int opt = lj_lib_checkopt(L, 2, 1, "\3set\3cur\3end");
261 lua_Number ofs; 281 lua_Number ofs;
262 int res; 282 int res;
@@ -294,39 +314,40 @@ LJLIB_CF(io_method_seek)
294 314
295LJLIB_CF(io_method_setvbuf) 315LJLIB_CF(io_method_setvbuf)
296{ 316{
297 FILE *fp = io_tofile(L); 317 FILE *fp = io_tofile(L)->fp;
298 int opt = lj_lib_checkopt(L, 2, -1, "\4full\4line\2no"); 318 int opt = lj_lib_checkopt(L, 2, -1, "\4full\4line\2no");
299 size_t sz = (size_t)lj_lib_optint(L, 3, LUAL_BUFFERSIZE); 319 size_t sz = (size_t)lj_lib_optint(L, 3, LUAL_BUFFERSIZE);
300 if (opt == 0) opt = _IOFBF; 320 if (opt == 0) opt = _IOFBF;
301 else if (opt == 1) opt = _IOLBF; 321 else if (opt == 1) opt = _IOLBF;
302 else if (opt == 2) opt = _IONBF; 322 else if (opt == 2) opt = _IONBF;
303 return io_pushresult(L, (setvbuf(fp, NULL, opt, sz) == 0), NULL); 323 return io_pushresult(L, setvbuf(fp, NULL, opt, sz) == 0, NULL);
304} 324}
305 325
306/* Forward declaration. */ 326LJLIB_PUSH(top-2) /* io_lines_iter */
307static void io_file_lines(lua_State *L, int idx, int toclose);
308
309LJLIB_CF(io_method_lines) 327LJLIB_CF(io_method_lines)
310{ 328{
311 io_tofile(L); 329 io_tofile(L);
312 io_file_lines(L, 1, 0); 330 setfuncV(L, L->top, funcV(lj_lib_upvalue(L, 1)));
313 return 1; 331 setudataV(L, L->top+1, udataV(L->base));
332 L->top += 2;
333 return 2;
314} 334}
315 335
316LJLIB_CF(io_method___gc) 336LJLIB_CF(io_method___gc)
317{ 337{
318 FILE *fp = *io_tofilep(L); 338 IOFileUD *iof = io_tofilep(L);
319 if (fp != NULL) io_file_close(L); 339 if (iof->fp != NULL)
340 io_file_close(L, iof);
320 return 0; 341 return 0;
321} 342}
322 343
323LJLIB_CF(io_method___tostring) 344LJLIB_CF(io_method___tostring)
324{ 345{
325 FILE *fp = *io_tofilep(L); 346 IOFileUD *iof = io_tofilep(L);
326 if (fp == NULL) 347 if (iof->fp != NULL)
327 lua_pushliteral(L, "file (closed)"); 348 lua_pushfstring(L, "file (%p)", iof->fp);
328 else 349 else
329 lua_pushfstring(L, "file (%p)", fp); 350 lua_pushliteral(L, "file (closed)");
330 return 1; 351 return 1;
331} 352}
332 353
@@ -340,30 +361,41 @@ LJLIB_PUSH(top-1) LJLIB_SET(__index)
340 361
341LJLIB_PUSH(top-2) LJLIB_SET(!) /* Set environment. */ 362LJLIB_PUSH(top-2) LJLIB_SET(!) /* Set environment. */
342 363
343static FILE *io_file_get(lua_State *L, int findex) 364LJLIB_CF(io_open)
344{ 365{
345 GCtab *fenv = tabref(curr_func(L)->c.env); 366 const char *fname = strdata(lj_lib_checkstr(L, 1));
346 GCudata *ud = udataV(&tvref(fenv->array)[findex]); 367 GCstr *s = lj_lib_optstr(L, 2);
347 FILE *fp = *(FILE **)uddata(ud); 368 const char *mode = s ? strdata(s) : "r";
348 if (fp == NULL) 369 IOFileUD *iof = io_file_new(L);
349 lj_err_caller(L, LJ_ERR_IOSTDCL); 370 iof->fp = fopen(fname, mode);
350 return fp; 371 return iof->fp != NULL ? 1 : io_pushresult(L, 0, fname);
351} 372}
352 373
353LJLIB_CF(io_open) 374LJLIB_CF(io_popen)
354{ 375{
355 const char *fname = luaL_checkstring(L, 1); 376#if defined(LUA_USE_POSIX) || defined(LUA_USE_WIN)
356 const char *mode = luaL_optstring(L, 2, "r"); 377 const char *fname = strdata(lj_lib_checkstr(L, 1));
357 FILE **pf = io_file_new(L); 378 GCstr *s = lj_lib_optstr(L, 2);
358 *pf = fopen(fname, mode); 379 const char *mode = s ? strdata(s) : "r";
359 return (*pf == NULL) ? io_pushresult(L, 0, fname) : 1; 380 IOFileUD *iof = io_file_new(L);
381 iof->type = IOFILE_TYPE_PIPE;
382#ifdef LUA_USE_POSIX
383 fflush(NULL);
384 iof->fp = popen(fname, mode);
385#else
386 iof->fp = _popen(fname, mode);
387#endif
388 return iof->fp != NULL ? 1 : io_pushresult(L, 0, fname);
389#else
390 luaL_error(L, LUA_QL("popen") " not supported");
391#endif
360} 392}
361 393
362LJLIB_CF(io_tmpfile) 394LJLIB_CF(io_tmpfile)
363{ 395{
364 FILE **pf = io_file_new(L); 396 IOFileUD *iof = io_file_new(L);
365 *pf = tmpfile(); 397 iof->fp = tmpfile();
366 return (*pf == NULL) ? io_pushresult(L, 0, NULL) : 1; 398 return iof->fp != NULL ? 1 : io_pushresult(L, 0, NULL);
367} 399}
368 400
369LJLIB_CF(io_close) 401LJLIB_CF(io_close)
@@ -373,169 +405,112 @@ LJLIB_CF(io_close)
373 405
374LJLIB_CF(io_read) 406LJLIB_CF(io_read)
375{ 407{
376 return io_file_read(L, io_file_get(L, IO_INPUT), 0); 408 return io_file_read(L, io_stdfile(L, GCROOT_IO_INPUT), 0);
377}
378
379LJLIB_CF(io_write)
380{
381 return io_file_write(L, io_file_get(L, IO_OUTPUT), 0);
382}
383
384LJLIB_CF(io_flush)
385{
386 return io_pushresult(L, fflush(io_file_get(L, IO_OUTPUT)) == 0, NULL);
387} 409}
388 410
389LJLIB_NOREG LJLIB_CF(io_lines_iter) 411LJLIB_CF(io_write) LJLIB_REC(io_write GCROOT_IO_OUTPUT)
390{
391 FILE *fp = *(FILE **)uddata(udataV(lj_lib_upvalue(L, 1)));
392 int ok;
393 if (fp == NULL)
394 lj_err_caller(L, LJ_ERR_IOCLFL);
395 ok = io_file_readline(L, fp);
396 if (ferror(fp))
397 return luaL_error(L, "%s", strerror(errno));
398 if (ok)
399 return 1;
400 if (tvistrue(lj_lib_upvalue(L, 2))) { /* Need to close file? */
401 L->top = L->base+1;
402 setudataV(L, L->base, udataV(lj_lib_upvalue(L, 1)));
403 io_file_close(L);
404 }
405 return 0;
406}
407
408static void io_file_lines(lua_State *L, int idx, int toclose)
409{ 412{
410 lua_pushvalue(L, idx); 413 return io_file_write(L, io_stdfile(L, GCROOT_IO_OUTPUT), 0);
411 lua_pushboolean(L, toclose);
412 lua_pushcclosure(L, lj_cf_io_lines_iter, 2);
413 funcV(L->top-1)->c.ffid = FF_io_lines_iter;
414} 414}
415 415
416LJLIB_CF(io_lines) 416LJLIB_CF(io_flush) LJLIB_REC(io_flush GCROOT_IO_OUTPUT)
417{ 417{
418 if (lua_isnoneornil(L, 1)) { /* no arguments? */ 418 return io_pushresult(L, fflush(io_stdfile(L, GCROOT_IO_OUTPUT)) == 0, NULL);
419 /* will iterate over default input */
420 lua_rawgeti(L, LUA_ENVIRONINDEX, IO_INPUT);
421 return lj_cf_io_method_lines(L);
422 } else {
423 const char *fname = luaL_checkstring(L, 1);
424 FILE **pf = io_file_new(L);
425 *pf = fopen(fname, "r");
426 if (*pf == NULL)
427 io_file_error(L, 1, fname);
428 io_file_lines(L, lua_gettop(L), 1);
429 return 1;
430 }
431} 419}
432 420
433static int io_std_get(lua_State *L, int fp, const char *mode) 421static int io_std_getset(lua_State *L, ptrdiff_t id, const char *mode)
434{ 422{
435 if (!lua_isnoneornil(L, 1)) { 423 if (L->base < L->top && !tvisnil(L->base)) {
436 const char *fname = lua_tostring(L, 1); 424 if (tvisudata(L->base)) {
437 if (fname) { 425 io_tofile(L);
438 FILE **pf = io_file_new(L); 426 L->top = L->base+1;
439 *pf = fopen(fname, mode);
440 if (*pf == NULL)
441 io_file_error(L, 1, fname);
442 } else { 427 } else {
443 io_tofile(L); /* check that it's a valid file handle */ 428 io_file_open(L, mode);
444 lua_pushvalue(L, 1);
445 } 429 }
446 lua_rawseti(L, LUA_ENVIRONINDEX, fp); 430 /* NOBARRIER: The standard I/O handles are GC roots. */
431 setgcref(G(L)->gcroot[id], gcV(L->top-1));
432 } else {
433 setudataV(L, L->top++, IOSTDF_UD(L, id));
447 } 434 }
448 /* return current value */
449 lua_rawgeti(L, LUA_ENVIRONINDEX, fp);
450 return 1; 435 return 1;
451} 436}
452 437
453LJLIB_CF(io_input) 438LJLIB_CF(io_input)
454{ 439{
455 return io_std_get(L, IO_INPUT, "r"); 440 return io_std_getset(L, GCROOT_IO_INPUT, "r");
456} 441}
457 442
458LJLIB_CF(io_output) 443LJLIB_CF(io_output)
459{ 444{
460 return io_std_get(L, IO_OUTPUT, "w"); 445 return io_std_getset(L, GCROOT_IO_OUTPUT, "w");
461} 446}
462 447
463LJLIB_CF(io_type) 448LJLIB_NOREG LJLIB_CF(io_lines_iter)
464{ 449{
465 void *ud; 450 IOFileUD *iof = io_tofile(L);
466 luaL_checkany(L, 1); 451 int ok = io_file_readline(L, iof->fp);
467 ud = lua_touserdata(L, 1); 452 if (ferror(iof->fp))
468 lua_getfield(L, LUA_REGISTRYINDEX, LUA_FILEHANDLE); 453 lj_err_callermsg(L, strerror(errno));
469 if (ud == NULL || !lua_getmetatable(L, 1) || !lua_rawequal(L, -2, -1)) 454 if (!ok && (iof->type & IOFILE_FLAG_CLOSE))
470 lua_pushnil(L); /* not a file */ 455 io_file_close(L, iof); /* Return values are ignored (ok is 0). */
471 else if (*((FILE **)ud) == NULL) 456 return ok;
472 lua_pushliteral(L, "closed file");
473 else
474 lua_pushliteral(L, "file");
475 return 1;
476} 457}
477 458
478LJLIB_PUSH(top-3) LJLIB_SET(!) /* Set environment. */ 459LJLIB_PUSH(top-3) /* io_lines_iter */
460LJLIB_CF(io_lines)
461{
462 if (L->base < L->top && !tvisnil(L->base)) { /* io.lines(fname) */
463 IOFileUD *iof = io_file_open(L, "r");
464 iof->type = IOFILE_TYPE_FILE|IOFILE_FLAG_CLOSE;
465 setfuncV(L, L->top-2, funcV(lj_lib_upvalue(L, 1)));
466 } else { /* io.lines() iterates over stdin. */
467 setfuncV(L, L->top, funcV(lj_lib_upvalue(L, 1)));
468 setudataV(L, L->top+1, IOSTDF_UD(L, GCROOT_IO_INPUT));
469 L->top += 2;
470 }
471 return 2;
472}
479 473
480LJLIB_CF(io_popen) 474LJLIB_CF(io_type)
481{ 475{
482#if defined(LUA_USE_POSIX) || defined(LUA_USE_WIN) 476 cTValue *o = lj_lib_checkany(L, 1);
483 const char *fname = luaL_checkstring(L, 1); 477 if (!(tvisudata(o) && udataV(o)->udtype == UDTYPE_IO_FILE))
484 const char *mode = luaL_optstring(L, 2, "r"); 478 setnilV(L->top++);
485 FILE **pf = io_file_new(L); 479 else if (((IOFileUD *)uddata(udataV(o)))->fp != NULL)
486#ifdef LUA_USE_POSIX 480 lua_pushliteral(L, "file");
487 fflush(NULL); 481 else
488 *pf = popen(fname, mode); 482 lua_pushliteral(L, "closed file");
489#else 483 return 1;
490 *pf = _popen(fname, mode);
491#endif
492 return (*pf == NULL) ? io_pushresult(L, 0, fname) : 1;
493#else
494 luaL_error(L, LUA_QL("popen") " not supported");
495#endif
496} 484}
497 485
498#include "lj_libdef.h" 486#include "lj_libdef.h"
499 487
500/* ------------------------------------------------------------------------ */ 488/* ------------------------------------------------------------------------ */
501 489
502static void io_std_new(lua_State *L, FILE *fp, int k, const char *fname) 490static GCobj *io_std_new(lua_State *L, FILE *fp, const char *name)
503{ 491{
504 FILE **pf = io_file_new(L); 492 IOFileUD *iof = (IOFileUD *)lua_newuserdata(L, sizeof(IOFileUD));
505 GCudata *ud = udataV(L->top-1); 493 GCudata *ud = udataV(L->top-1);
506 GCtab *envt = tabV(L->top-2); 494 ud->udtype = UDTYPE_IO_FILE;
507 *pf = fp; 495 /* NOBARRIER: The GCudata is new (marked white). */
508 setgcref(ud->env, obj2gco(envt)); 496 setgcref(ud->metatable, gcV(L->top-3));
509 lj_gc_objbarrier(L, obj2gco(ud), envt); 497 iof->fp = fp;
510 if (k > 0) { 498 iof->type = IOFILE_TYPE_STDF;
511 lua_pushvalue(L, -1); 499 lua_setfield(L, -2, name);
512 lua_rawseti(L, -5, k); 500 return obj2gco(ud);
513 }
514 lua_setfield(L, -3, fname);
515}
516
517static void io_fenv_new(lua_State *L, int narr, lua_CFunction cls)
518{
519 lua_createtable(L, narr, 1);
520 lua_pushcfunction(L, cls);
521 lua_setfield(L, -2, "__close");
522} 501}
523 502
524LUALIB_API int luaopen_io(lua_State *L) 503LUALIB_API int luaopen_io(lua_State *L)
525{ 504{
526 lua_getfield(L, LUA_REGISTRYINDEX, LUA_FILEHANDLE); 505 lua_pushcfunction(L, lj_cf_io_lines_iter);
527 if (tvisnil(L->top-1)) { 506 funcV(L->top-1)->c.ffid = FF_io_lines_iter;
528 LJ_LIB_REG_(L, NULL, io_method); 507 LJ_LIB_REG_(L, NULL, io_method);
529 lua_setfield(L, LUA_REGISTRYINDEX, LUA_FILEHANDLE); 508 copyTV(L, L->top, L->top-1); L->top++;
530 } 509 lua_setfield(L, LUA_REGISTRYINDEX, LUA_FILEHANDLE);
531 io_fenv_new(L, 0, lj_cf_io_pipe_close); /* top-3 */
532 io_fenv_new(L, 2, lj_cf_io_file_close); /* top-2 */
533 LJ_LIB_REG(L, io); 510 LJ_LIB_REG(L, io);
534 io_fenv_new(L, 0, lj_cf_io_std_close); 511 setgcref(G(L)->gcroot[GCROOT_IO_INPUT], io_std_new(L, stdin, "stdin"));
535 io_std_new(L, stdin, IO_INPUT, "stdin"); 512 setgcref(G(L)->gcroot[GCROOT_IO_OUTPUT], io_std_new(L, stdout, "stdout"));
536 io_std_new(L, stdout, IO_OUTPUT, "stdout"); 513 io_std_new(L, stderr, "stderr");
537 io_std_new(L, stderr, 0, "stderr");
538 L->top--;
539 return 1; 514 return 1;
540} 515}
541 516
diff --git a/src/lib_math.c b/src/lib_math.c
index adc77c9d..f3803e8f 100644
--- a/src/lib_math.c
+++ b/src/lib_math.c
@@ -36,9 +36,9 @@ LJLIB_ASM_(math_tan) LJLIB_REC(math_unary IRFPM_TAN)
36LJLIB_ASM_(math_asin) LJLIB_REC(math_atrig FF_math_asin) 36LJLIB_ASM_(math_asin) LJLIB_REC(math_atrig FF_math_asin)
37LJLIB_ASM_(math_acos) LJLIB_REC(math_atrig FF_math_acos) 37LJLIB_ASM_(math_acos) LJLIB_REC(math_atrig FF_math_acos)
38LJLIB_ASM_(math_atan) LJLIB_REC(math_atrig FF_math_atan) 38LJLIB_ASM_(math_atan) LJLIB_REC(math_atrig FF_math_atan)
39LJLIB_ASM_(math_sinh) 39LJLIB_ASM_(math_sinh) LJLIB_REC(math_htrig IRCALL_sinh)
40LJLIB_ASM_(math_cosh) 40LJLIB_ASM_(math_cosh) LJLIB_REC(math_htrig IRCALL_cosh)
41LJLIB_ASM_(math_tanh) 41LJLIB_ASM_(math_tanh) LJLIB_REC(math_htrig IRCALL_tanh)
42LJLIB_ASM_(math_frexp) 42LJLIB_ASM_(math_frexp)
43LJLIB_ASM_(math_modf) LJLIB_REC(.) 43LJLIB_ASM_(math_modf) LJLIB_REC(.)
44 44
@@ -82,35 +82,33 @@ LJ_FUNCA double lj_wrapper_tanh(double x) { return tanh(x); }
82*/ 82*/
83 83
84/* PRNG state. */ 84/* PRNG state. */
85typedef struct TW223State { 85struct RandomState {
86 uint64_t gen[4]; /* State of the 4 LFSR generators. */ 86 uint64_t gen[4]; /* State of the 4 LFSR generators. */
87 int valid; /* State is valid. */ 87 int valid; /* State is valid. */
88} TW223State; 88};
89 89
90/* Union needed for bit-pattern conversion between uint64_t and double. */ 90/* Union needed for bit-pattern conversion between uint64_t and double. */
91typedef union { uint64_t u64; double d; } U64double; 91typedef union { uint64_t u64; double d; } U64double;
92 92
93/* Update generator i and compute a running xor of all states. */ 93/* Update generator i and compute a running xor of all states. */
94#define TW223_GEN(i, k, q, s) \ 94#define TW223_GEN(i, k, q, s) \
95 z = tw->gen[i]; \ 95 z = rs->gen[i]; \
96 z = (((z<<q)^z) >> (k-s)) ^ ((z&((uint64_t)(int64_t)-1 << (64-k)))<<s); \ 96 z = (((z<<q)^z) >> (k-s)) ^ ((z&((uint64_t)(int64_t)-1 << (64-k)))<<s); \
97 r ^= z; tw->gen[i] = z; 97 r ^= z; rs->gen[i] = z;
98 98
99/* PRNG step function. Returns a double in the range 1.0 <= d < 2.0. */ 99/* PRNG step function. Returns a double in the range 1.0 <= d < 2.0. */
100static LJ_NOINLINE double tw223_step(TW223State *tw) 100LJ_NOINLINE uint64_t LJ_FASTCALL lj_math_random_step(RandomState *rs)
101{ 101{
102 uint64_t z, r = 0; 102 uint64_t z, r = 0;
103 U64double u;
104 TW223_GEN(0, 63, 31, 18) 103 TW223_GEN(0, 63, 31, 18)
105 TW223_GEN(1, 58, 19, 28) 104 TW223_GEN(1, 58, 19, 28)
106 TW223_GEN(2, 55, 24, 7) 105 TW223_GEN(2, 55, 24, 7)
107 TW223_GEN(3, 47, 21, 8) 106 TW223_GEN(3, 47, 21, 8)
108 u.u64 = (r & (((uint64_t)1 << 52)-1)) | ((uint64_t)0x3ff << 52); 107 return (r & U64x(000fffff,ffffffff)) | U64x(3ff00000,00000000);
109 return u.d;
110} 108}
111 109
112/* PRNG initialization function. */ 110/* PRNG initialization function. */
113static void tw223_init(TW223State *tw, double d) 111static void random_init(RandomState *rs, double d)
114{ 112{
115 uint32_t r = 0x11090601; /* 64-k[i] as four 8 bit constants. */ 113 uint32_t r = 0x11090601; /* 64-k[i] as four 8 bit constants. */
116 int i; 114 int i;
@@ -120,22 +118,24 @@ static void tw223_init(TW223State *tw, double d)
120 r >>= 8; 118 r >>= 8;
121 u.d = d = d * 3.14159265358979323846 + 2.7182818284590452354; 119 u.d = d = d * 3.14159265358979323846 + 2.7182818284590452354;
122 if (u.u64 < m) u.u64 += m; /* Ensure k[i] MSB of gen[i] are non-zero. */ 120 if (u.u64 < m) u.u64 += m; /* Ensure k[i] MSB of gen[i] are non-zero. */
123 tw->gen[i] = u.u64; 121 rs->gen[i] = u.u64;
124 } 122 }
125 tw->valid = 1; 123 rs->valid = 1;
126 for (i = 0; i < 10; i++) 124 for (i = 0; i < 10; i++)
127 tw223_step(tw); 125 lj_math_random_step(rs);
128} 126}
129 127
130/* PRNG extract function. */ 128/* PRNG extract function. */
131LJLIB_PUSH(top-2) /* Upvalue holds userdata with TW223State. */ 129LJLIB_PUSH(top-2) /* Upvalue holds userdata with RandomState. */
132LJLIB_CF(math_random) 130LJLIB_CF(math_random) LJLIB_REC(.)
133{ 131{
134 int n = cast_int(L->top - L->base); 132 int n = cast_int(L->top - L->base);
135 TW223State *tw = (TW223State *)(uddata(udataV(lj_lib_upvalue(L, 1)))); 133 RandomState *rs = (RandomState *)(uddata(udataV(lj_lib_upvalue(L, 1))));
134 U64double u;
136 double d; 135 double d;
137 if (LJ_UNLIKELY(!tw->valid)) tw223_init(tw, 0.0); 136 if (LJ_UNLIKELY(!rs->valid)) random_init(rs, 0.0);
138 d = tw223_step(tw) - 1.0; 137 u.u64 = lj_math_random_step(rs);
138 d = u.d - 1.0;
139 if (n > 0) { 139 if (n > 0) {
140 double r1 = lj_lib_checknum(L, 1); 140 double r1 = lj_lib_checknum(L, 1);
141 if (n == 1) { 141 if (n == 1) {
@@ -150,11 +150,11 @@ LJLIB_CF(math_random)
150} 150}
151 151
152/* PRNG seed function. */ 152/* PRNG seed function. */
153LJLIB_PUSH(top-2) /* Upvalue holds userdata with TW223State. */ 153LJLIB_PUSH(top-2) /* Upvalue holds userdata with RandomState. */
154LJLIB_CF(math_randomseed) 154LJLIB_CF(math_randomseed)
155{ 155{
156 TW223State *tw = (TW223State *)(uddata(udataV(lj_lib_upvalue(L, 1)))); 156 RandomState *rs = (RandomState *)(uddata(udataV(lj_lib_upvalue(L, 1))));
157 tw223_init(tw, lj_lib_checknum(L, 1)); 157 random_init(rs, lj_lib_checknum(L, 1));
158 return 0; 158 return 0;
159} 159}
160 160
@@ -164,9 +164,9 @@ LJLIB_CF(math_randomseed)
164 164
165LUALIB_API int luaopen_math(lua_State *L) 165LUALIB_API int luaopen_math(lua_State *L)
166{ 166{
167 TW223State *tw; 167 RandomState *rs;
168 tw = (TW223State *)lua_newuserdata(L, sizeof(TW223State)); 168 rs = (RandomState *)lua_newuserdata(L, sizeof(RandomState));
169 tw->valid = 0; /* Use lazy initialization to save some time on startup. */ 169 rs->valid = 0; /* Use lazy initialization to save some time on startup. */
170 LJ_LIB_REG(L, math); 170 LJ_LIB_REG(L, math);
171#if defined(LUA_COMPAT_MOD) 171#if defined(LUA_COMPAT_MOD)
172 lua_getfield(L, -1, "fmod"); 172 lua_getfield(L, -1, "fmod");
diff --git a/src/lib_string.c b/src/lib_string.c
index 6c857328..e7ad12df 100644
--- a/src/lib_string.c
+++ b/src/lib_string.c
@@ -776,16 +776,18 @@ LUALIB_API int luaopen_string(lua_State *L)
776{ 776{
777 GCtab *mt; 777 GCtab *mt;
778 GCstr *mmstr; 778 GCstr *mmstr;
779 global_State *g;
779 LJ_LIB_REG(L, string); 780 LJ_LIB_REG(L, string);
780#if defined(LUA_COMPAT_GFIND) 781#if defined(LUA_COMPAT_GFIND)
781 lua_getfield(L, -1, "gmatch"); 782 lua_getfield(L, -1, "gmatch");
782 lua_setfield(L, -2, "gfind"); 783 lua_setfield(L, -2, "gfind");
783#endif 784#endif
784 mt = lj_tab_new(L, 0, 1); 785 mt = lj_tab_new(L, 0, 1);
785 /* NOBARRIER: G(L)->mmname[] is a GC root. */ 786 /* NOBARRIER: basemt is a GC root. */
786 setgcref(G(L)->basemt[~LJ_TSTR], obj2gco(mt)); 787 g = G(L);
787 mmstr = strref(G(L)->mmname[MM_index]); 788 setgcref(basemt_it(g, LJ_TSTR), obj2gco(mt));
788 if (isdead(G(L), obj2gco(mmstr))) flipwhite(obj2gco(mmstr)); 789 mmstr = strref(g->mmname[MM_index]);
790 if (isdead(g, obj2gco(mmstr))) flipwhite(obj2gco(mmstr));
789 settabV(L, lj_tab_setstr(L, mt, mmstr), tabV(L->top-1)); 791 settabV(L, lj_tab_setstr(L, mt, mmstr), tabV(L->top-1));
790 mt->nomm = cast_byte(~(1u<<MM_index)); 792 mt->nomm = cast_byte(~(1u<<MM_index));
791 return 1; 793 return 1;
diff --git a/src/lj_alloc.c b/src/lj_alloc.c
index 8ad4f8fb..6d8b4ccb 100644
--- a/src/lj_alloc.c
+++ b/src/lj_alloc.c
@@ -1186,10 +1186,10 @@ static LJ_NOINLINE void *lj_alloc_realloc(void *msp, void *ptr, size_t nsize)
1186 size_t rsize = oldsize - nb; 1186 size_t rsize = oldsize - nb;
1187 newp = oldp; 1187 newp = oldp;
1188 if (rsize >= MIN_CHUNK_SIZE) { 1188 if (rsize >= MIN_CHUNK_SIZE) {
1189 mchunkptr remainder = chunk_plus_offset(newp, nb); 1189 mchunkptr rem = chunk_plus_offset(newp, nb);
1190 set_inuse(m, newp, nb); 1190 set_inuse(m, newp, nb);
1191 set_inuse(m, remainder, rsize); 1191 set_inuse(m, rem, rsize);
1192 lj_alloc_free(m, chunk2mem(remainder)); 1192 lj_alloc_free(m, chunk2mem(rem));
1193 } 1193 }
1194 } else if (next == m->top && oldsize + m->topsize > nb) { 1194 } else if (next == m->top && oldsize + m->topsize > nb) {
1195 /* Expand into top */ 1195 /* Expand into top */
diff --git a/src/lj_api.c b/src/lj_api.c
index 7a759e5f..4bac5024 100644
--- a/src/lj_api.c
+++ b/src/lj_api.c
@@ -227,7 +227,7 @@ LUA_API int lua_isnumber(lua_State *L, int idx)
227{ 227{
228 cTValue *o = index2adr(L, idx); 228 cTValue *o = index2adr(L, idx);
229 TValue tmp; 229 TValue tmp;
230 return (tvisnum(o) || (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp))); 230 return (tvisnum(o) || (tvisstr(o) && lj_str_tonum(strV(o), &tmp)));
231} 231}
232 232
233LUA_API int lua_isstring(lua_State *L, int idx) 233LUA_API int lua_isstring(lua_State *L, int idx)
@@ -307,7 +307,7 @@ LUA_API lua_Number lua_tonumber(lua_State *L, int idx)
307 TValue tmp; 307 TValue tmp;
308 if (LJ_LIKELY(tvisnum(o))) 308 if (LJ_LIKELY(tvisnum(o)))
309 return numV(o); 309 return numV(o);
310 else if (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp)) 310 else if (tvisstr(o) && lj_str_tonum(strV(o), &tmp))
311 return numV(&tmp); 311 return numV(&tmp);
312 else 312 else
313 return 0; 313 return 0;
@@ -319,7 +319,7 @@ LUALIB_API lua_Number luaL_checknumber(lua_State *L, int idx)
319 TValue tmp; 319 TValue tmp;
320 if (tvisnum(o)) 320 if (tvisnum(o))
321 return numV(o); 321 return numV(o);
322 else if (!(tvisstr(o) && lj_str_numconv(strVdata(o), &tmp))) 322 else if (!(tvisstr(o) && lj_str_tonum(strV(o), &tmp)))
323 lj_err_argt(L, idx, LUA_TNUMBER); 323 lj_err_argt(L, idx, LUA_TNUMBER);
324 return numV(&tmp); 324 return numV(&tmp);
325} 325}
@@ -332,7 +332,7 @@ LUALIB_API lua_Number luaL_optnumber(lua_State *L, int idx, lua_Number def)
332 return numV(o); 332 return numV(o);
333 else if (tvisnil(o)) 333 else if (tvisnil(o))
334 return def; 334 return def;
335 else if (!(tvisstr(o) && lj_str_numconv(strVdata(o), &tmp))) 335 else if (!(tvisstr(o) && lj_str_tonum(strV(o), &tmp)))
336 lj_err_argt(L, idx, LUA_TNUMBER); 336 lj_err_argt(L, idx, LUA_TNUMBER);
337 return numV(&tmp); 337 return numV(&tmp);
338} 338}
@@ -344,7 +344,7 @@ LUA_API lua_Integer lua_tointeger(lua_State *L, int idx)
344 lua_Number n; 344 lua_Number n;
345 if (LJ_LIKELY(tvisnum(o))) 345 if (LJ_LIKELY(tvisnum(o)))
346 n = numV(o); 346 n = numV(o);
347 else if (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp)) 347 else if (tvisstr(o) && lj_str_tonum(strV(o), &tmp))
348 n = numV(&tmp); 348 n = numV(&tmp);
349 else 349 else
350 return 0; 350 return 0;
@@ -362,7 +362,7 @@ LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int idx)
362 lua_Number n; 362 lua_Number n;
363 if (LJ_LIKELY(tvisnum(o))) 363 if (LJ_LIKELY(tvisnum(o)))
364 n = numV(o); 364 n = numV(o);
365 else if (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp)) 365 else if (tvisstr(o) && lj_str_tonum(strV(o), &tmp))
366 n = numV(&tmp); 366 n = numV(&tmp);
367 else 367 else
368 lj_err_argt(L, idx, LUA_TNUMBER); 368 lj_err_argt(L, idx, LUA_TNUMBER);
@@ -382,7 +382,7 @@ LUALIB_API lua_Integer luaL_optinteger(lua_State *L, int idx, lua_Integer def)
382 n = numV(o); 382 n = numV(o);
383 else if (tvisnil(o)) 383 else if (tvisnil(o))
384 return def; 384 return def;
385 else if (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp)) 385 else if (tvisstr(o) && lj_str_tonum(strV(o), &tmp))
386 n = numV(&tmp); 386 n = numV(&tmp);
387 else 387 else
388 lj_err_argt(L, idx, LUA_TNUMBER); 388 lj_err_argt(L, idx, LUA_TNUMBER);
@@ -753,7 +753,7 @@ LUA_API int lua_getmetatable(lua_State *L, int idx)
753 else if (tvisudata(o)) 753 else if (tvisudata(o))
754 mt = tabref(udataV(o)->metatable); 754 mt = tabref(udataV(o)->metatable);
755 else 755 else
756 mt = tabref(G(L)->basemt[itypemap(o)]); 756 mt = tabref(basemt_obj(G(L), o));
757 if (mt == NULL) 757 if (mt == NULL)
758 return 0; 758 return 0;
759 settabV(L, L->top, mt); 759 settabV(L, L->top, mt);
@@ -941,12 +941,12 @@ LUA_API int lua_setmetatable(lua_State *L, int idx)
941 if (lj_trace_flushall(L)) 941 if (lj_trace_flushall(L))
942 lj_err_caller(L, LJ_ERR_NOGCMM); 942 lj_err_caller(L, LJ_ERR_NOGCMM);
943 if (tvisbool(o)) { 943 if (tvisbool(o)) {
944 /* NOBARRIER: g->basemt[] is a GC root. */ 944 /* NOBARRIER: basemt is a GC root. */
945 setgcref(g->basemt[~LJ_TTRUE], obj2gco(mt)); 945 setgcref(basemt_it(g, LJ_TTRUE), obj2gco(mt));
946 setgcref(g->basemt[~LJ_TFALSE], obj2gco(mt)); 946 setgcref(basemt_it(g, LJ_TFALSE), obj2gco(mt));
947 } else { 947 } else {
948 /* NOBARRIER: g->basemt[] is a GC root. */ 948 /* NOBARRIER: basemt is a GC root. */
949 setgcref(g->basemt[itypemap(o)], obj2gco(mt)); 949 setgcref(basemt_obj(g, o), obj2gco(mt));
950 } 950 }
951 } 951 }
952 L->top--; 952 L->top--;
diff --git a/src/lj_asm.c b/src/lj_asm.c
index a4d0c606..f26a40a5 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -13,6 +13,7 @@
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_str.h" 14#include "lj_str.h"
15#include "lj_tab.h" 15#include "lj_tab.h"
16#include "lj_frame.h"
16#include "lj_ir.h" 17#include "lj_ir.h"
17#include "lj_jit.h" 18#include "lj_jit.h"
18#include "lj_iropt.h" 19#include "lj_iropt.h"
@@ -81,6 +82,10 @@ typedef struct ASMState {
81 82
82#define IR(ref) (&as->ir[(ref)]) 83#define IR(ref) (&as->ir[(ref)])
83 84
85#define ASMREF_TMP1 REF_TRUE /* Temp. register. */
86#define ASMREF_TMP2 REF_FALSE /* Temp. register. */
87#define ASMREF_L REF_NIL /* Stores register for L. */
88
84/* Check for variant to invariant references. */ 89/* Check for variant to invariant references. */
85#define iscrossref(as, ref) ((ref) < as->sectref) 90#define iscrossref(as, ref) ((ref) < as->sectref)
86 91
@@ -115,9 +120,11 @@ static LJ_NORET LJ_NOINLINE void asm_mclimit(ASMState *as)
115 { MCode rex = 0x40 + (((rr)>>1)&4) + (((rb)>>3)&1); \ 120 { MCode rex = 0x40 + (((rr)>>1)&4) + (((rb)>>3)&1); \
116 if (rex != 0x40) *--(p) = rex; } 121 if (rex != 0x40) *--(p) = rex; }
117#define FORCE_REX 0x200 122#define FORCE_REX 0x200
123#define REX_64 (FORCE_REX|0x080000)
118#else 124#else
119#define REXRB(p, rr, rb) ((void)0) 125#define REXRB(p, rr, rb) ((void)0)
120#define FORCE_REX 0 126#define FORCE_REX 0
127#define REX_64 0
121#endif 128#endif
122 129
123#define emit_i8(as, i) (*--as->mcp = (MCode)(i)) 130#define emit_i8(as, i) (*--as->mcp = (MCode)(i))
@@ -144,6 +151,7 @@ static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx,
144 { 151 {
145 uint32_t rex = 0x40 + ((rr>>1)&(4+(FORCE_REX>>1)))+((rx>>2)&2)+((rb>>3)&1); 152 uint32_t rex = 0x40 + ((rr>>1)&(4+(FORCE_REX>>1)))+((rx>>2)&2)+((rb>>3)&1);
146 if (rex != 0x40) { 153 if (rex != 0x40) {
154 rex |= (rr >> 16);
147 if (n == -4) { *p = (MCode)rex; rex = (MCode)(xo >> 8); } 155 if (n == -4) { *p = (MCode)rex; rex = (MCode)(xo >> 8); }
148 *--p = (MCode)rex; 156 *--p = (MCode)rex;
149 } 157 }
@@ -451,14 +459,6 @@ static void emit_call_(ASMState *as, MCode *target)
451 459
452#define emit_call(as, f) emit_call_(as, (MCode *)(void *)(f)) 460#define emit_call(as, f) emit_call_(as, (MCode *)(void *)(f))
453 461
454/* Argument setup for C calls. Up to 3 args need no stack adjustment. */
455#define emit_setargr(as, narg, r) \
456 emit_movtomro(as, (r), RID_ESP, ((narg)-1)*4);
457#define emit_setargi(as, narg, imm) \
458 emit_movmroi(as, RID_ESP, ((narg)-1)*4, (imm))
459#define emit_setargp(as, narg, ptr) \
460 emit_setargi(as, (narg), ptr2addr((ptr)))
461
462/* -- Register allocator debugging ---------------------------------------- */ 462/* -- Register allocator debugging ---------------------------------------- */
463 463
464/* #define LUAJIT_DEBUG_RA */ 464/* #define LUAJIT_DEBUG_RA */
@@ -578,10 +578,6 @@ static void ra_setup(ASMState *as)
578 memset(as->phireg, 0, sizeof(as->phireg)); 578 memset(as->phireg, 0, sizeof(as->phireg));
579 memset(as->cost, 0, sizeof(as->cost)); 579 memset(as->cost, 0, sizeof(as->cost));
580 as->cost[RID_ESP] = REGCOST(~0u, 0u); 580 as->cost[RID_ESP] = REGCOST(~0u, 0u);
581
582 /* Start slots for spill slot allocation. */
583 as->evenspill = (SPS_FIRST+1)&~1;
584 as->oddspill = (SPS_FIRST&1) ? SPS_FIRST : 0;
585} 581}
586 582
587/* Rematerialize constants. */ 583/* Rematerialize constants. */
@@ -598,6 +594,9 @@ static Reg ra_rematk(ASMState *as, IRIns *ir)
598 } else if (ir->o == IR_BASE) { 594 } else if (ir->o == IR_BASE) {
599 ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */ 595 ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */
600 emit_getgl(as, r, jit_base); 596 emit_getgl(as, r, jit_base);
597 } else if (ir->o == IR_KPRI) { /* REF_NIL stores ASMREF_L register. */
598 lua_assert(irt_isnil(ir->t));
599 emit_getgl(as, r, jit_L);
601 } else { 600 } else {
602 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || 601 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
603 ir->o == IR_KPTR || ir->o == IR_KNULL); 602 ir->o == IR_KPTR || ir->o == IR_KNULL);
@@ -629,6 +628,18 @@ static int32_t ra_spill(ASMState *as, IRIns *ir)
629 return sps_scale(slot); 628 return sps_scale(slot);
630} 629}
631 630
631/* Release the temporarily allocated register in ASMREF_TMP1/ASMREF_TMP2. */
632static Reg ra_releasetmp(ASMState *as, IRRef ref)
633{
634 IRIns *ir = IR(ref);
635 Reg r = ir->r;
636 lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s));
637 ra_free(as, r);
638 ra_modified(as, r);
639 ir->r = RID_INIT;
640 return r;
641}
642
632/* Restore a register (marked as free). Rematerialize or force a spill. */ 643/* Restore a register (marked as free). Rematerialize or force a spill. */
633static Reg ra_restore(ASMState *as, IRRef ref) 644static Reg ra_restore(ASMState *as, IRRef ref)
634{ 645{
@@ -1008,7 +1019,7 @@ static void asm_guardcc(ASMState *as, int cc)
1008 1019
1009/* Arch-specific field offsets. */ 1020/* Arch-specific field offsets. */
1010static const uint8_t field_ofs[IRFL__MAX+1] = { 1021static const uint8_t field_ofs[IRFL__MAX+1] = {
1011#define FLOFS(name, type, field) (uint8_t)offsetof(type, field), 1022#define FLOFS(name, ofs) (uint8_t)(ofs),
1012IRFLDEF(FLOFS) 1023IRFLDEF(FLOFS)
1013#undef FLOFS 1024#undef FLOFS
1014 0 1025 0
@@ -1129,7 +1140,7 @@ static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow)
1129{ 1140{
1130 IRIns *irr; 1141 IRIns *irr;
1131 lua_assert(ir->o == IR_STRREF); 1142 lua_assert(ir->o == IR_STRREF);
1132 as->mrm.idx = as->mrm.base = RID_NONE; 1143 as->mrm.base = as->mrm.idx = RID_NONE;
1133 as->mrm.scale = XM_SCALE1; 1144 as->mrm.scale = XM_SCALE1;
1134 as->mrm.ofs = sizeof(GCstr); 1145 as->mrm.ofs = sizeof(GCstr);
1135 if (irref_isk(ir->op1)) { 1146 if (irref_isk(ir->op1)) {
@@ -1158,6 +1169,17 @@ static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow)
1158 } 1169 }
1159} 1170}
1160 1171
1172static void asm_fusexref(ASMState *as, IRIns *ir, RegSet allow)
1173{
1174 if (ir->o == IR_KPTR) {
1175 as->mrm.ofs = ir->i;
1176 as->mrm.base = as->mrm.idx = RID_NONE;
1177 } else {
1178 lua_assert(ir->o == IR_STRREF);
1179 asm_fusestrref(as, ir, allow);
1180 }
1181}
1182
1161/* Fuse load into memory operand. */ 1183/* Fuse load into memory operand. */
1162static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) 1184static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
1163{ 1185{
@@ -1172,8 +1194,9 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
1172 return RID_MRM; 1194 return RID_MRM;
1173 } 1195 }
1174 if (ir->o == IR_KNUM) { 1196 if (ir->o == IR_KNUM) {
1197 RegSet avail = as->freeset & ~as->modset & RSET_FPR;
1175 lua_assert(allow != RSET_EMPTY); 1198 lua_assert(allow != RSET_EMPTY);
1176 if (!(as->freeset & ~as->modset & RSET_FPR)) { 1199 if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */
1177 as->mrm.ofs = ptr2addr(ir_knum(ir)); 1200 as->mrm.ofs = ptr2addr(ir_knum(ir));
1178 as->mrm.base = as->mrm.idx = RID_NONE; 1201 as->mrm.base = as->mrm.idx = RID_NONE;
1179 return RID_MRM; 1202 return RID_MRM;
@@ -1188,8 +1211,9 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
1188 return RID_MRM; 1211 return RID_MRM;
1189 } 1212 }
1190 } else if (ir->o == IR_FLOAD) { 1213 } else if (ir->o == IR_FLOAD) {
1191 /* Generic fusion is only ok for IRT_INT operand (but see asm_comp). */ 1214 /* Generic fusion is only ok for 32 bit operand (but see asm_comp). */
1192 if (irt_isint(ir->t) && noconflict(as, ref, IR_FSTORE)) { 1215 if ((irt_isint(ir->t) || irt_isaddr(ir->t)) &&
1216 noconflict(as, ref, IR_FSTORE)) {
1193 asm_fusefref(as, ir, xallow); 1217 asm_fusefref(as, ir, xallow);
1194 return RID_MRM; 1218 return RID_MRM;
1195 } 1219 }
@@ -1199,11 +1223,11 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
1199 return RID_MRM; 1223 return RID_MRM;
1200 } 1224 }
1201 } else if (ir->o == IR_XLOAD) { 1225 } else if (ir->o == IR_XLOAD) {
1202 /* Generic fusion is only ok for IRT_INT operand (but see asm_comp). 1226 /* Generic fusion is only ok for 32 bit operand (but see asm_comp).
1203 ** Fusing unaligned memory operands is ok on x86 (except for SIMD types). 1227 ** Fusing unaligned memory operands is ok on x86 (except for SIMD types).
1204 */ 1228 */
1205 if (irt_isint(ir->t)) { 1229 if (irt_isint(ir->t) || irt_isaddr(ir->t)) {
1206 asm_fusestrref(as, IR(ir->op1), xallow); 1230 asm_fusexref(as, IR(ir->op1), xallow);
1207 return RID_MRM; 1231 return RID_MRM;
1208 } 1232 }
1209 } 1233 }
@@ -1214,6 +1238,137 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
1214 return ra_allocref(as, ref, allow); 1238 return ra_allocref(as, ref, allow);
1215} 1239}
1216 1240
1241/* -- Calls --------------------------------------------------------------- */
1242
1243/* Generate a call to a C function. */
1244static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
1245{
1246 RegSet allow = RSET_ALL;
1247 uint32_t n, nargs = CCI_NARGS(ci);
1248 int32_t ofs = 0;
1249 lua_assert(!(nargs > 2 && (ci->flags&CCI_FASTCALL))); /* Avoid stack adj. */
1250 emit_call(as, ci->func);
1251 for (n = 0; n < nargs; n++) { /* Setup args. */
1252#if LJ_64
1253#error "NYI: 64 bit mode call argument setup"
1254#endif
1255 IRIns *ir = IR(args[n]);
1256 if (irt_isnum(ir->t)) {
1257 if ((ofs & 4) && irref_isk(args[n])) {
1258 /* Split stores for unaligned FP consts. */
1259 emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo);
1260 emit_movmroi(as, RID_ESP, ofs+4, (int32_t)ir_knum(ir)->u32.hi);
1261 } else {
1262 Reg r;
1263 if ((allow & RSET_FPR) == RSET_EMPTY)
1264 lj_trace_err(as->J, LJ_TRERR_NYICOAL);
1265 r = ra_alloc1(as, args[n], allow & RSET_FPR);
1266 allow &= ~RID2RSET(r);
1267 emit_rmro(as, XO_MOVSDto, r, RID_ESP, ofs);
1268 }
1269 ofs += 8;
1270 } else {
1271 if ((ci->flags & CCI_FASTCALL) && n < 2) {
1272 Reg r = n == 0 ? RID_ECX : RID_EDX;
1273 if (args[n] < ASMREF_TMP1) {
1274 emit_loadi(as, r, ir->i);
1275 } else {
1276 lua_assert(rset_test(as->freeset, r)); /* Must have been evicted. */
1277 allow &= ~RID2RSET(r);
1278 if (ra_hasreg(ir->r))
1279 emit_movrr(as, r, ir->r);
1280 else
1281 ra_allocref(as, args[n], RID2RSET(r));
1282 }
1283 } else {
1284 if (args[n] < ASMREF_TMP1) {
1285 emit_movmroi(as, RID_ESP, ofs, ir->i);
1286 } else {
1287 Reg r;
1288 if ((allow & RSET_GPR) == RSET_EMPTY)
1289 lj_trace_err(as->J, LJ_TRERR_NYICOAL);
1290 r = ra_alloc1(as, args[n], allow & RSET_GPR);
1291 allow &= ~RID2RSET(r);
1292 emit_movtomro(as, r, RID_ESP, ofs);
1293 }
1294 ofs += 4;
1295 }
1296 }
1297 }
1298}
1299
1300/* Setup result reg/sp for call. Evict scratch regs. */
1301static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
1302{
1303 RegSet drop = RSET_SCRATCH;
1304 if ((ci->flags & CCI_NOFPRCLOBBER))
1305 drop &= ~RSET_FPR;
1306 if (ra_hasreg(ir->r))
1307 rset_clear(drop, ir->r); /* Dest reg handled below. */
1308 ra_evictset(as, drop); /* Evictions must be performed first. */
1309 if (ra_used(ir)) {
1310 if (irt_isnum(ir->t)) {
1311 int32_t ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */
1312#if LJ_64
1313 if ((ci->flags & CCI_CASTU64)) {
1314 Reg dest = ir->r;
1315 if (ra_hasreg(dest)) {
1316 ra_free(as, dest);
1317 ra_modified(as, dest);
1318 emit_rr(as, XO_MOVD, dest|REX_64, RID_RET); /* Really MOVQ. */
1319 } else {
1320 emit_movrmro(as, RID_RET, RID_ESP, ofs);
1321 }
1322 } else {
1323 ra_destreg(as, ir, RID_FPRET);
1324 }
1325#else
1326 /* Number result is in x87 st0 for x86 calling convention. */
1327 Reg dest = ir->r;
1328 if (ra_hasreg(dest)) {
1329 ra_free(as, dest);
1330 ra_modified(as, dest);
1331 emit_rmro(as, XMM_MOVRM(as), dest, RID_ESP, ofs);
1332 }
1333 if ((ci->flags & CCI_CASTU64)) {
1334 emit_movtomro(as, RID_RET, RID_ESP, ofs);
1335 emit_movtomro(as, RID_RETHI, RID_ESP, ofs+4);
1336 } else {
1337 emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
1338 }
1339#endif
1340 } else {
1341 lua_assert(!irt_ispri(ir->t));
1342 ra_destreg(as, ir, RID_RET);
1343 }
1344 }
1345}
1346
1347/* Collect arguments from CALL* and ARG instructions. */
1348static void asm_collectargs(ASMState *as, IRIns *ir,
1349 const CCallInfo *ci, IRRef *args)
1350{
1351 uint32_t n = CCI_NARGS(ci);
1352 lua_assert(n <= CCI_NARGS_MAX);
1353 if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
1354 while (n-- > 1) {
1355 ir = IR(ir->op1);
1356 lua_assert(ir->o == IR_CARG);
1357 args[n] = ir->op2;
1358 }
1359 args[0] = ir->op1;
1360 lua_assert(IR(ir->op1)->o != IR_CARG);
1361}
1362
1363static void asm_call(ASMState *as, IRIns *ir)
1364{
1365 IRRef args[CCI_NARGS_MAX];
1366 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
1367 asm_collectargs(as, ir, ci, args);
1368 asm_setupresult(as, ir, ci);
1369 asm_gencall(as, ci, args);
1370}
1371
1217/* -- Type conversions ---------------------------------------------------- */ 1372/* -- Type conversions ---------------------------------------------------- */
1218 1373
1219static void asm_tonum(ASMState *as, IRIns *ir) 1374static void asm_tonum(ASMState *as, IRIns *ir)
@@ -1260,48 +1415,41 @@ static void asm_tobit(ASMState *as, IRIns *ir)
1260 1415
1261static void asm_strto(ASMState *as, IRIns *ir) 1416static void asm_strto(ASMState *as, IRIns *ir)
1262{ 1417{
1263 Reg str;
1264 int32_t ofs;
1265 RegSet drop = RSET_SCRATCH;
1266 /* Force a spill slot for the destination register (if any). */ 1418 /* Force a spill slot for the destination register (if any). */
1419 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_tonum];
1420 IRRef args[2];
1421 RegSet drop = RSET_SCRATCH;
1267 if ((drop & RSET_FPR) != RSET_FPR && ra_hasreg(ir->r)) 1422 if ((drop & RSET_FPR) != RSET_FPR && ra_hasreg(ir->r))
1268 rset_set(drop, ir->r); /* WIN64 doesn't spill all FPRs. */ 1423 rset_set(drop, ir->r); /* WIN64 doesn't spill all FPRs. */
1269 ra_evictset(as, drop); 1424 ra_evictset(as, drop);
1270 asm_guardcc(as, CC_E); 1425 asm_guardcc(as, CC_E);
1271 emit_rr(as, XO_TEST, RID_RET, RID_RET); 1426 emit_rr(as, XO_TEST, RID_RET, RID_RET);
1272 /* int lj_str_numconv(const char *s, TValue *n) */ 1427 args[0] = ir->op1;
1273 emit_call(as, lj_str_numconv); 1428 args[1] = ASMREF_TMP1;
1274 ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */ 1429 asm_gencall(as, ci, args);
1275 if (ofs == 0) { 1430 /* Store the result to the spill slot or slots SPS_TEMP1/2. */
1276 emit_setargr(as, 2, RID_ESP); 1431 emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1),
1277 } else { 1432 RID_ESP, sps_scale(ir->s));
1278 emit_setargr(as, 2, RID_RET);
1279 emit_rmro(as, XO_LEA, RID_RET, RID_ESP, ofs);
1280 }
1281 emit_setargr(as, 1, RID_RET);
1282 str = ra_alloc1(as, ir->op1, RSET_GPR);
1283 emit_rmro(as, XO_LEA, RID_RET, str, sizeof(GCstr));
1284} 1433}
1285 1434
1286static void asm_tostr(ASMState *as, IRIns *ir) 1435static void asm_tostr(ASMState *as, IRIns *ir)
1287{ 1436{
1288 IRIns *irl = IR(ir->op1); 1437 IRIns *irl = IR(ir->op1);
1289 ra_destreg(as, ir, RID_RET); 1438 IRRef args[2];
1290 ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); 1439 args[0] = ASMREF_L;
1291 as->gcsteps++; 1440 as->gcsteps++;
1292 if (irt_isnum(irl->t)) { 1441 if (irt_isnum(irl->t)) {
1293 /* GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np) */ 1442 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
1294 emit_call(as, lj_str_fromnum); 1443 args[1] = ASMREF_TMP1;
1295 emit_setargr(as, 1, RID_RET); 1444 asm_setupresult(as, ir, ci);
1296 emit_getgl(as, RID_RET, jit_L); 1445 asm_gencall(as, ci, args);
1297 emit_setargr(as, 2, RID_RET); 1446 emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1),
1298 emit_rmro(as, XO_LEA, RID_RET, RID_ESP, ra_spill(as, irl)); 1447 RID_ESP, ra_spill(as, irl));
1299 } else { 1448 } else {
1300 /* GCstr *lj_str_fromint(lua_State *L, int32_t k) */ 1449 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
1301 emit_call(as, lj_str_fromint); 1450 args[1] = ir->op1;
1302 emit_setargr(as, 1, RID_RET); 1451 asm_setupresult(as, ir, ci);
1303 emit_getgl(as, RID_RET, jit_L); 1452 asm_gencall(as, ci, args);
1304 emit_setargr(as, 2, ra_alloc1(as, ir->op1, RSET_GPR));
1305 } 1453 }
1306} 1454}
1307 1455
@@ -1330,7 +1478,7 @@ static uint32_t ir_khash(IRIns *ir)
1330 lua_assert(!irt_isnil(ir->t)); 1478 lua_assert(!irt_isnil(ir->t));
1331 return irt_type(ir->t)-IRT_FALSE; 1479 return irt_type(ir->t)-IRT_FALSE;
1332 } else { 1480 } else {
1333 lua_assert(irt_isaddr(ir->t)); 1481 lua_assert(irt_isgcv(ir->t));
1334 lo = u32ptr(ir_kgc(ir)); 1482 lo = u32ptr(ir_kgc(ir));
1335 hi = lo - 0x04c11db7; 1483 hi = lo - 0x04c11db7;
1336 } 1484 }
@@ -1517,33 +1665,27 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
1517 1665
1518static void asm_newref(ASMState *as, IRIns *ir) 1666static void asm_newref(ASMState *as, IRIns *ir)
1519{ 1667{
1520 IRRef keyref = ir->op2; 1668 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
1521 IRIns *irkey = IR(keyref); 1669 IRRef args[3];
1522 RegSet allow = RSET_GPR; 1670 IRIns *irkey;
1523 Reg tab, tmp; 1671 Reg tmp;
1524 ra_destreg(as, ir, RID_RET); 1672 args[0] = ASMREF_L;
1525 ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); 1673 args[1] = ir->op1;
1526 tab = ra_alloc1(as, ir->op1, allow); 1674 args[2] = ASMREF_TMP1;
1527 tmp = ra_scratch(as, rset_clear(allow, tab)); 1675 asm_setupresult(as, ir, ci);
1528 /* TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key) */ 1676 asm_gencall(as, ci, args);
1529 emit_call(as, lj_tab_newkey); 1677 tmp = ra_releasetmp(as, ASMREF_TMP1);
1530 emit_setargr(as, 1, tmp); 1678 irkey = IR(ir->op2);
1531 emit_setargr(as, 2, tab);
1532 emit_getgl(as, tmp, jit_L);
1533 if (irt_isnum(irkey->t)) { 1679 if (irt_isnum(irkey->t)) {
1534 /* For numbers use the constant itself or a spill slot as a TValue. */ 1680 /* For numbers use the constant itself or a spill slot as a TValue. */
1535 if (irref_isk(keyref)) { 1681 if (irref_isk(ir->op2))
1536 emit_setargp(as, 3, ir_knum(irkey)); 1682 emit_loada(as, tmp, ir_knum(irkey));
1537 } else { 1683 else
1538 emit_setargr(as, 3, tmp);
1539 emit_rmro(as, XO_LEA, tmp, RID_ESP, ra_spill(as, irkey)); 1684 emit_rmro(as, XO_LEA, tmp, RID_ESP, ra_spill(as, irkey));
1540 }
1541 } else { 1685 } else {
1542 /* Otherwise use g->tmptv to hold the TValue. */ 1686 /* Otherwise use g->tmptv to hold the TValue. */
1543 lua_assert(irt_ispri(irkey->t) || irt_isaddr(irkey->t)); 1687 if (!irref_isk(ir->op2)) {
1544 emit_setargr(as, 3, tmp); 1688 Reg src = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, tmp));
1545 if (!irref_isk(keyref)) {
1546 Reg src = ra_alloc1(as, keyref, rset_exclude(allow, tmp));
1547 emit_movtomro(as, src, tmp, 0); 1689 emit_movtomro(as, src, tmp, 0);
1548 } else if (!irt_ispri(irkey->t)) { 1690 } else if (!irt_ispri(irkey->t)) {
1549 emit_movmroi(as, tmp, 0, irkey->i); 1691 emit_movmroi(as, tmp, 0, irkey->i);
@@ -1600,11 +1742,15 @@ static void asm_strref(ASMState *as, IRIns *ir)
1600 1742
1601/* -- Loads and stores ---------------------------------------------------- */ 1743/* -- Loads and stores ---------------------------------------------------- */
1602 1744
1603static void asm_fload(ASMState *as, IRIns *ir) 1745static void asm_fxload(ASMState *as, IRIns *ir)
1604{ 1746{
1605 Reg dest = ra_dest(as, ir, RSET_GPR); 1747 Reg dest = ra_dest(as, ir, RSET_GPR);
1606 x86Op xo; 1748 x86Op xo;
1607 asm_fusefref(as, ir, RSET_GPR); 1749 if (ir->o == IR_FLOAD)
1750 asm_fusefref(as, ir, RSET_GPR);
1751 else
1752 asm_fusexref(as, IR(ir->op1), RSET_GPR);
1753 /* ir->op2 is ignored -- unaligned loads are ok on x86. */
1608 switch (irt_type(ir->t)) { 1754 switch (irt_type(ir->t)) {
1609 case IRT_I8: xo = XO_MOVSXb; break; 1755 case IRT_I8: xo = XO_MOVSXb; break;
1610 case IRT_U8: xo = XO_MOVZXb; break; 1756 case IRT_U8: xo = XO_MOVZXb; break;
@@ -1731,96 +1877,44 @@ static void asm_sload(ASMState *as, IRIns *ir)
1731 } 1877 }
1732} 1878}
1733 1879
1734static void asm_xload(ASMState *as, IRIns *ir) 1880/* -- Allocations --------------------------------------------------------- */
1735{
1736 Reg dest = ra_dest(as, ir, RSET_GPR);
1737 x86Op xo;
1738 asm_fusestrref(as, IR(ir->op1), RSET_GPR); /* For now only support STRREF. */
1739 /* ir->op2 is ignored -- unaligned loads are ok on x86. */
1740 switch (irt_type(ir->t)) {
1741 case IRT_I8: xo = XO_MOVSXb; break;
1742 case IRT_U8: xo = XO_MOVZXb; break;
1743 case IRT_I16: xo = XO_MOVSXw; break;
1744 case IRT_U16: xo = XO_MOVZXw; break;
1745 default: lua_assert(irt_isint(ir->t)); xo = XO_MOV; break;
1746 }
1747 emit_mrm(as, xo, dest, RID_MRM);
1748}
1749
1750/* -- String ops ---------------------------------------------------------- */
1751 1881
1752static void asm_snew(ASMState *as, IRIns *ir) 1882static void asm_snew(ASMState *as, IRIns *ir)
1753{ 1883{
1754 RegSet allow = RSET_GPR; 1884 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_new];
1755 Reg left, right; 1885 IRRef args[3];
1756 IRIns *irl; 1886 args[0] = ASMREF_L;
1757 ra_destreg(as, ir, RID_RET); 1887 args[1] = ir->op1;
1758 ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); 1888 args[2] = ir->op2;
1759 irl = IR(ir->op1);
1760 left = irl->r;
1761 right = IR(ir->op2)->r;
1762 if (ra_noreg(left)) {
1763 lua_assert(irl->o == IR_STRREF);
1764 /* Get register only for non-const STRREF. */
1765 if (!(irref_isk(irl->op1) && irref_isk(irl->op2))) {
1766 if (ra_hasreg(right)) rset_clear(allow, right);
1767 left = ra_allocref(as, ir->op1, allow);
1768 }
1769 }
1770 if (ra_noreg(right) && !irref_isk(ir->op2)) {
1771 if (ra_hasreg(left)) rset_clear(allow, left);
1772 right = ra_allocref(as, ir->op2, allow);
1773 }
1774 /* GCstr *lj_str_new(lua_State *L, const char *str, size_t len) */
1775 emit_call(as, lj_str_new);
1776 emit_setargr(as, 1, RID_RET);
1777 emit_getgl(as, RID_RET, jit_L);
1778 if (ra_noreg(left)) /* Use immediate for const STRREF. */
1779 emit_setargi(as, 2, IR(irl->op1)->i + IR(irl->op2)->i +
1780 (int32_t)sizeof(GCstr));
1781 else
1782 emit_setargr(as, 2, left);
1783 if (ra_noreg(right))
1784 emit_setargi(as, 3, IR(ir->op2)->i);
1785 else
1786 emit_setargr(as, 3, right);
1787 as->gcsteps++; 1889 as->gcsteps++;
1890 asm_setupresult(as, ir, ci);
1891 asm_gencall(as, ci, args);
1788} 1892}
1789 1893
1790/* -- Table ops ----------------------------------------------------------- */
1791
1792static void asm_tnew(ASMState *as, IRIns *ir) 1894static void asm_tnew(ASMState *as, IRIns *ir)
1793{ 1895{
1794 ra_destreg(as, ir, RID_RET); 1896 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_new1];
1795 ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); 1897 IRRef args[2];
1796 /* GCtab *lj_tab_new(lua_State *L, int32_t asize, uint32_t hbits) */ 1898 args[0] = ASMREF_L;
1797 emit_call(as, lj_tab_new); 1899 args[1] = ASMREF_TMP1;
1798 emit_setargr(as, 1, RID_RET);
1799 emit_setargi(as, 2, ir->op1);
1800 emit_setargi(as, 3, ir->op2);
1801 emit_getgl(as, RID_RET, jit_L);
1802 as->gcsteps++; 1900 as->gcsteps++;
1901 asm_setupresult(as, ir, ci);
1902 asm_gencall(as, ci, args);
1903 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1 | (ir->op2 << 24));
1803} 1904}
1804 1905
1805static void asm_tdup(ASMState *as, IRIns *ir) 1906static void asm_tdup(ASMState *as, IRIns *ir)
1806{ 1907{
1807 ra_destreg(as, ir, RID_RET); 1908 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_dup];
1808 ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET)); 1909 IRRef args[2];
1809 /* GCtab *lj_tab_dup(lua_State *L, const GCtab *kt) */ 1910 args[0] = ASMREF_L;
1810 emit_call(as, lj_tab_dup); 1911 args[1] = ir->op1;
1811 emit_setargr(as, 1, RID_RET);
1812 emit_setargp(as, 2, ir_kgc(IR(ir->op1)));
1813 emit_getgl(as, RID_RET, jit_L);
1814 as->gcsteps++; 1912 as->gcsteps++;
1913 asm_setupresult(as, ir, ci);
1914 asm_gencall(as, ci, args);
1815} 1915}
1816 1916
1817static void asm_tlen(ASMState *as, IRIns *ir) 1917/* -- Write barriers ------------------------------------------------------ */
1818{
1819 ra_destreg(as, ir, RID_RET);
1820 ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET));
1821 emit_call(as, lj_tab_len); /* MSize lj_tab_len(GCtab *t) */
1822 emit_setargr(as, 1, ra_alloc1(as, ir->op1, RSET_GPR));
1823}
1824 1918
1825static void asm_tbar(ASMState *as, IRIns *ir) 1919static void asm_tbar(ASMState *as, IRIns *ir)
1826{ 1920{
@@ -1839,51 +1933,31 @@ static void asm_tbar(ASMState *as, IRIns *ir)
1839 1933
1840static void asm_obar(ASMState *as, IRIns *ir) 1934static void asm_obar(ASMState *as, IRIns *ir)
1841{ 1935{
1842 RegSet allow = RSET_GPR; 1936 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv];
1843 Reg obj, val; 1937 IRRef args[2];
1844 GCobj *valp;
1845 MCLabel l_end; 1938 MCLabel l_end;
1846 int32_t ofs; 1939 Reg obj;
1847 ra_evictset(as, RSET_SCRATCH);
1848 if (irref_isk(ir->op2)) {
1849 valp = ir_kgc(IR(ir->op2));
1850 val = RID_NONE;
1851 } else {
1852 valp = NULL;
1853 val = ra_alloc1(as, ir->op2, allow);
1854 rset_clear(allow, val);
1855 }
1856 obj = ra_alloc1(as, ir->op1, allow);
1857 l_end = emit_label(as);
1858 /* No need for other object barriers (yet). */ 1940 /* No need for other object barriers (yet). */
1859 lua_assert(IR(ir->op1)->o == IR_UREFC); 1941 lua_assert(IR(ir->op1)->o == IR_UREFC);
1860 ofs = -(int32_t)offsetof(GCupval, tv); 1942 l_end = emit_label(as);
1861 /* void lj_gc_barrieruv(global_State *g, GCobj *o, GCobj *v) */ 1943 args[0] = ASMREF_TMP1;
1862 emit_call(as, lj_gc_barrieruv); 1944 args[1] = ir->op1;
1863 if (ofs == 0) { 1945 asm_gencall(as, ci, args);
1864 emit_setargr(as, 2, obj); 1946 emit_loada(as, ra_releasetmp(as, ASMREF_TMP1), J2G(as->J));
1865 } else if (rset_test(RSET_SCRATCH, obj) && !(as->flags & JIT_F_LEA_AGU)) { 1947 obj = IR(ir->op1)->r;
1866 emit_setargr(as, 2, obj);
1867 emit_gri(as, XG_ARITHi(XOg_ADD), obj, ofs);
1868 } else {
1869 emit_setargr(as, 2, RID_RET);
1870 emit_rmro(as, XO_LEA, RID_RET, obj, ofs);
1871 }
1872 emit_setargp(as, 1, J2G(as->J));
1873 if (valp)
1874 emit_setargp(as, 3, valp);
1875 else
1876 emit_setargr(as, 3, val);
1877 emit_sjcc(as, CC_Z, l_end); 1948 emit_sjcc(as, CC_Z, l_end);
1878 emit_i8(as, LJ_GC_WHITES); 1949 emit_i8(as, LJ_GC_WHITES);
1879 if (valp) 1950 if (irref_isk(ir->op2)) {
1880 emit_rma(as, XO_GROUP3b, XOg_TEST, &valp->gch.marked); 1951 GCobj *vp = ir_kgc(IR(ir->op2));
1881 else 1952 emit_rma(as, XO_GROUP3b, XOg_TEST, &vp->gch.marked);
1953 } else {
1954 Reg val = ra_alloc1(as, ir->op2, rset_exclude(RSET_SCRATCH&RSET_GPR, obj));
1882 emit_rmro(as, XO_GROUP3b, XOg_TEST, val, (int32_t)offsetof(GChead, marked)); 1955 emit_rmro(as, XO_GROUP3b, XOg_TEST, val, (int32_t)offsetof(GChead, marked));
1956 }
1883 emit_sjcc(as, CC_Z, l_end); 1957 emit_sjcc(as, CC_Z, l_end);
1884 emit_i8(as, LJ_GC_BLACK); 1958 emit_i8(as, LJ_GC_BLACK);
1885 emit_rmro(as, XO_GROUP3b, XOg_TEST, obj, 1959 emit_rmro(as, XO_GROUP3b, XOg_TEST, obj,
1886 ofs + (int32_t)offsetof(GChead, marked)); 1960 (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv));
1887} 1961}
1888 1962
1889/* -- FP/int arithmetic and logic operations ------------------------------ */ 1963/* -- FP/int arithmetic and logic operations ------------------------------ */
@@ -2260,10 +2334,10 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
2260 } 2334 }
2261 } 2335 }
2262 emit_mrm(as, XO_UCOMISD, left, right); 2336 emit_mrm(as, XO_UCOMISD, left, right);
2263 } else if (!(irt_isstr(ir->t) && (cc & 0xe) != CC_E)) { 2337 } else {
2264 IRRef lref = ir->op1, rref = ir->op2; 2338 IRRef lref = ir->op1, rref = ir->op2;
2265 IROp leftop = (IROp)(IR(lref)->o); 2339 IROp leftop = (IROp)(IR(lref)->o);
2266 lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t)); 2340 lua_assert(irt_isint(ir->t) || (irt_isaddr(ir->t) && (cc & 0xe) == CC_E));
2267 /* Swap constants (only for ABC) and fusable loads to the right. */ 2341 /* Swap constants (only for ABC) and fusable loads to the right. */
2268 if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) { 2342 if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) {
2269 if ((cc & 0xc) == 0xc) cc ^= 3; /* L <-> G, LE <-> GE */ 2343 if ((cc & 0xc) == 0xc) cc ^= 3; /* L <-> G, LE <-> GE */
@@ -2294,11 +2368,15 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
2294 } else { 2368 } else {
2295 Reg left; 2369 Reg left;
2296 if (opisfusableload((IROp)irl->o) && 2370 if (opisfusableload((IROp)irl->o) &&
2297 ((irt_isi8(irl->t) && checki8(imm)) || 2371 ((irt_isu8(irl->t) && checku8(imm)) ||
2298 (irt_isu8(irl->t) && checku8(imm)))) { 2372 ((irt_isi8(irl->t) || irt_isi16(irl->t)) && checki8(imm)) ||
2299 /* Only the IRT_INT case is fused by asm_fuseload. The IRT_I8/IRT_U8 2373 (irt_isu16(irl->t) && checku16(imm) && checki8((int16_t)imm)))) {
2300 ** loads are handled here. The IRT_I16/IRT_U16 loads should never be 2374 /* Only the IRT_INT case is fused by asm_fuseload.
2301 ** fused, since cmp word [mem], imm16 has a length-changing prefix. 2375 ** The IRT_I8/IRT_U8 loads and some IRT_I16/IRT_U16 loads
2376 ** are handled here.
2377 ** Note that cmp word [mem], imm16 should not be generated,
2378 ** since it has a length-changing prefix. Compares of a word
2379 ** against a sign-extended imm8 are ok, however.
2302 */ 2380 */
2303 IRType1 origt = irl->t; /* Temporarily flip types. */ 2381 IRType1 origt = irl->t; /* Temporarily flip types. */
2304 irl->t.irt = (irl->t.irt & ~IRT_TYPE) | IRT_INT; 2382 irl->t.irt = (irl->t.irt & ~IRT_TYPE) | IRT_INT;
@@ -2307,7 +2385,8 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
2307 if (left == RID_MRM) { /* Fusion succeeded? */ 2385 if (left == RID_MRM) { /* Fusion succeeded? */
2308 asm_guardcc(as, cc); 2386 asm_guardcc(as, cc);
2309 emit_i8(as, imm); 2387 emit_i8(as, imm);
2310 emit_mrm(as, XO_ARITHib, XOg_CMP, RID_MRM); 2388 emit_mrm(as, (irt_isi8(origt) || irt_isu8(origt)) ?
2389 XO_ARITHib : XO_ARITHiw8, XOg_CMP, RID_MRM);
2311 return; 2390 return;
2312 } /* Otherwise handle register case as usual. */ 2391 } /* Otherwise handle register case as usual. */
2313 } else { 2392 } else {
@@ -2337,26 +2416,6 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
2337 asm_guardcc(as, cc); 2416 asm_guardcc(as, cc);
2338 emit_mrm(as, XO_CMP, left, right); 2417 emit_mrm(as, XO_CMP, left, right);
2339 } 2418 }
2340 } else { /* Handle ordered string compares. */
2341 RegSet allow = RSET_GPR;
2342 /* This assumes lj_str_cmp never uses any SSE registers. */
2343 ra_evictset(as, (RSET_SCRATCH & RSET_GPR));
2344 asm_guardcc(as, cc);
2345 emit_rr(as, XO_TEST, RID_RET, RID_RET);
2346 emit_call(as, lj_str_cmp); /* int32_t lj_str_cmp(GCstr *a, GCstr *b) */
2347 if (irref_isk(ir->op1)) {
2348 emit_setargi(as, 1, IR(ir->op1)->i);
2349 } else {
2350 Reg left = ra_alloc1(as, ir->op1, allow);
2351 rset_clear(allow, left);
2352 emit_setargr(as, 1, left);
2353 }
2354 if (irref_isk(ir->op2)) {
2355 emit_setargi(as, 2, IR(ir->op2)->i);
2356 } else {
2357 Reg right = ra_alloc1(as, ir->op2, allow);
2358 emit_setargr(as, 2, right);
2359 }
2360 } 2419 }
2361} 2420}
2362 2421
@@ -2366,8 +2425,14 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
2366/* -- GC handling --------------------------------------------------------- */ 2425/* -- GC handling --------------------------------------------------------- */
2367 2426
2368/* Sync all live GC values to Lua stack slots. */ 2427/* Sync all live GC values to Lua stack slots. */
2369static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base, RegSet allow) 2428static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base)
2370{ 2429{
2430 /* Some care must be taken when allocating registers here, since this is
2431 ** not part of the fast path. All scratch registers are evicted in the
2432 ** fast path, so it's easiest to force allocation from scratch registers
2433 ** only. This avoids register allocation state unification.
2434 */
2435 RegSet allow = rset_exclude(RSET_SCRATCH & RSET_GPR, base);
2371 IRRef2 *map = &as->T->snapmap[snap->mapofs]; 2436 IRRef2 *map = &as->T->snapmap[snap->mapofs];
2372 BCReg s, nslots = snap->nslots; 2437 BCReg s, nslots = snap->nslots;
2373 for (s = 0; s < nslots; s++) { 2438 for (s = 0; s < nslots; s++) {
@@ -2392,27 +2457,36 @@ static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base, RegSet allow)
2392/* Check GC threshold and do one or more GC steps. */ 2457/* Check GC threshold and do one or more GC steps. */
2393static void asm_gc_check(ASMState *as, SnapShot *snap) 2458static void asm_gc_check(ASMState *as, SnapShot *snap)
2394{ 2459{
2460 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit];
2461 IRRef args[2];
2395 MCLabel l_end; 2462 MCLabel l_end;
2396 const BCIns *pc; 2463 Reg base, lstate, tmp;
2397 Reg tmp, base;
2398 RegSet drop = RSET_SCRATCH; 2464 RegSet drop = RSET_SCRATCH;
2399 /* Must evict BASE because the stack may be reallocated by the GC. */ 2465 if (ra_hasreg(IR(REF_BASE)->r)) /* Stack may be reallocated by the GC. */
2400 if (ra_hasreg(IR(REF_BASE)->r)) 2466 drop |= RID2RSET(IR(REF_BASE)->r); /* Need to evict BASE, too. */
2401 drop |= RID2RSET(IR(REF_BASE)->r);
2402 ra_evictset(as, drop); 2467 ra_evictset(as, drop);
2403 base = ra_alloc1(as, REF_BASE, rset_exclude(RSET_GPR, RID_RET));
2404 l_end = emit_label(as); 2468 l_end = emit_label(as);
2405 /* void lj_gc_step_jit(lua_State *L, const BCIns *pc, MSize steps) */ 2469 args[0] = ASMREF_L;
2406 emit_call(as, lj_gc_step_jit); 2470 args[1] = ASMREF_TMP1;
2407 emit_movtomro(as, base, RID_RET, offsetof(lua_State, base)); 2471 asm_gencall(as, ci, args);
2408 emit_setargr(as, 1, RID_RET); 2472 tmp = ra_releasetmp(as, ASMREF_TMP1);
2409 emit_setargi(as, 3, (int32_t)as->gcsteps); 2473 emit_loadi(as, tmp, (int32_t)as->gcsteps);
2410 emit_getgl(as, RID_RET, jit_L); 2474 /* We don't know spadj yet, so get the C frame from L->cframe. */
2411 pc = (const BCIns *)(uintptr_t)as->T->snapmap[snap->mapofs+snap->nslots]; 2475 emit_movmroi(as, tmp, CFRAME_OFS_PC,
2412 emit_setargp(as, 2, pc); 2476 (int32_t)as->T->snapmap[snap->mapofs+snap->nslots]);
2413 asm_gc_sync(as, snap, base, rset_exclude(RSET_SCRATCH & RSET_GPR, base)); 2477 emit_gri(as, XG_ARITHi(XOg_AND), tmp, CFRAME_RAWMASK);
2414 if (as->curins == as->loopref) /* BASE gets restored by LOOP anyway. */ 2478 lstate = IR(ASMREF_L)->r;
2415 ra_restore(as, REF_BASE); /* Better do it inside the slow path. */ 2479 emit_movrmro(as, tmp, lstate, offsetof(lua_State, cframe));
2480 /* It's ok if lstate is already in a non-scratch reg. But all allocations
2481 ** in the non-fast path must use a scratch reg. See comment above.
2482 */
2483 base = ra_alloc1(as, REF_BASE, rset_exclude(RSET_SCRATCH & RSET_GPR, lstate));
2484 emit_movtomro(as, base, lstate, offsetof(lua_State, base));
2485 asm_gc_sync(as, snap, base);
2486 /* BASE/L get restored anyway, better do it inside the slow path. */
2487 if (as->parent || as->curins == as->loopref) ra_restore(as, REF_BASE);
2488 if (rset_test(RSET_SCRATCH, lstate) && ra_hasreg(IR(ASMREF_L)->r))
2489 ra_restore(as, ASMREF_L);
2416 /* Jump around GC step if GC total < GC threshold. */ 2490 /* Jump around GC step if GC total < GC threshold. */
2417 tmp = ra_scratch(as, RSET_SCRATCH & RSET_GPR); 2491 tmp = ra_scratch(as, RSET_SCRATCH & RSET_GPR);
2418 emit_sjcc(as, CC_B, l_end); 2492 emit_sjcc(as, CC_B, l_end);
@@ -2666,7 +2740,7 @@ static void asm_head_root(ASMState *as)
2666{ 2740{
2667 int32_t spadj; 2741 int32_t spadj;
2668 emit_setgli(as, vmstate, (int32_t)as->J->curtrace); 2742 emit_setgli(as, vmstate, (int32_t)as->J->curtrace);
2669 spadj = sps_adjust(as); 2743 spadj = sps_adjust(as->evenspill);
2670 as->T->spadjust = (uint16_t)spadj; 2744 as->T->spadjust = (uint16_t)spadj;
2671 emit_addptr(as, RID_ESP, -spadj); 2745 emit_addptr(as, RID_ESP, -spadj);
2672} 2746}
@@ -2676,11 +2750,13 @@ static void asm_head_base(ASMState *as)
2676{ 2750{
2677 IRIns *ir = IR(REF_BASE); 2751 IRIns *ir = IR(REF_BASE);
2678 Reg r = ir->r; 2752 Reg r = ir->r;
2679 lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); 2753 lua_assert(!ra_hasspill(ir->s));
2680 ra_free(as, r); 2754 if (ra_hasreg(r)) {
2681 if (r != RID_BASE) { 2755 ra_free(as, r);
2682 ra_scratch(as, RID2RSET(RID_BASE)); 2756 if (r != RID_BASE) {
2683 emit_rr(as, XO_MOV, r, RID_BASE); 2757 ra_scratch(as, RID2RSET(RID_BASE));
2758 emit_rr(as, XO_MOV, r, RID_BASE);
2759 }
2684 } 2760 }
2685} 2761}
2686 2762
@@ -2749,7 +2825,7 @@ static void asm_head_side(ASMState *as)
2749 } 2825 }
2750 2826
2751 /* Calculate stack frame adjustment. */ 2827 /* Calculate stack frame adjustment. */
2752 spadj = sps_adjust(as); 2828 spadj = sps_adjust(as->evenspill);
2753 spdelta = spadj - (int32_t)as->parent->spadjust; 2829 spdelta = spadj - (int32_t)as->parent->spadjust;
2754 if (spdelta < 0) { /* Don't shrink the stack frame. */ 2830 if (spdelta < 0) { /* Don't shrink the stack frame. */
2755 spadj = (int32_t)as->parent->spadjust; 2831 spadj = (int32_t)as->parent->spadjust;
@@ -2877,9 +2953,11 @@ static void asm_tail_sync(ASMState *as)
2877 GCfunc *fn = ir_kfunc(IR(ir->op2)); 2953 GCfunc *fn = ir_kfunc(IR(ir->op2));
2878 if (isluafunc(fn)) { 2954 if (isluafunc(fn)) {
2879 BCReg fs = s + funcproto(fn)->framesize; 2955 BCReg fs = s + funcproto(fn)->framesize;
2880 newbase = s;
2881 if (secondbase == ~(BCReg)0) secondbase = s;
2882 if (fs > topslot) topslot = fs; 2956 if (fs > topslot) topslot = fs;
2957 if (s != 0) {
2958 newbase = s;
2959 if (secondbase == ~(BCReg)0) secondbase = s;
2960 }
2883 } 2961 }
2884 } 2962 }
2885 } 2963 }
@@ -3063,20 +3141,18 @@ static void asm_ir(ASMState *as, IRIns *ir)
3063 3141
3064 /* Loads and stores. */ 3142 /* Loads and stores. */
3065 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: asm_ahuload(as, ir); break; 3143 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: asm_ahuload(as, ir); break;
3066 case IR_FLOAD: asm_fload(as, ir); break; 3144 case IR_FLOAD: case IR_XLOAD: asm_fxload(as, ir); break;
3067 case IR_SLOAD: asm_sload(as, ir); break; 3145 case IR_SLOAD: asm_sload(as, ir); break;
3068 case IR_XLOAD: asm_xload(as, ir); break;
3069 3146
3070 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; 3147 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
3071 case IR_FSTORE: asm_fstore(as, ir); break; 3148 case IR_FSTORE: asm_fstore(as, ir); break;
3072 3149
3073 /* String ops. */ 3150 /* Allocations. */
3074 case IR_SNEW: asm_snew(as, ir); break; 3151 case IR_SNEW: asm_snew(as, ir); break;
3075
3076 /* Table ops. */
3077 case IR_TNEW: asm_tnew(as, ir); break; 3152 case IR_TNEW: asm_tnew(as, ir); break;
3078 case IR_TDUP: asm_tdup(as, ir); break; 3153 case IR_TDUP: asm_tdup(as, ir); break;
3079 case IR_TLEN: asm_tlen(as, ir); break; 3154
3155 /* Write barriers. */
3080 case IR_TBAR: asm_tbar(as, ir); break; 3156 case IR_TBAR: asm_tbar(as, ir); break;
3081 case IR_OBAR: asm_obar(as, ir); break; 3157 case IR_OBAR: asm_obar(as, ir); break;
3082 3158
@@ -3092,6 +3168,10 @@ static void asm_ir(ASMState *as, IRIns *ir)
3092 case IR_TOSTR: asm_tostr(as, ir); break; 3168 case IR_TOSTR: asm_tostr(as, ir); break;
3093 case IR_STRTO: asm_strto(as, ir); break; 3169 case IR_STRTO: asm_strto(as, ir); break;
3094 3170
3171 /* Calls. */
3172 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
3173 case IR_CARG: break;
3174
3095 default: 3175 default:
3096 setintV(&as->J->errinfo, ir->o); 3176 setintV(&as->J->errinfo, ir->o);
3097 lj_trace_err_info(as->J, LJ_TRERR_NYIIR); 3177 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
@@ -3123,6 +3203,8 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
3123 IRRef i, nins; 3203 IRRef i, nins;
3124 int inloop; 3204 int inloop;
3125 3205
3206 ra_setup(as);
3207
3126 /* Clear reg/sp for constants. */ 3208 /* Clear reg/sp for constants. */
3127 for (i = T->nk; i < REF_BIAS; i++) 3209 for (i = T->nk; i < REF_BIAS; i++)
3128 IR(i)->prev = REGSP_INIT; 3210 IR(i)->prev = REGSP_INIT;
@@ -3144,6 +3226,7 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
3144 as->curins = nins; 3226 as->curins = nins;
3145 3227
3146 inloop = 0; 3228 inloop = 0;
3229 as->evenspill = SPS_FIRST;
3147 for (i = REF_FIRST; i < nins; i++) { 3230 for (i = REF_FIRST; i < nins; i++) {
3148 IRIns *ir = IR(i); 3231 IRIns *ir = IR(i);
3149 switch (ir->o) { 3232 switch (ir->o) {
@@ -3166,8 +3249,23 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
3166 if (i == as->stopins+1 && ir->op1 == ir->op2) 3249 if (i == as->stopins+1 && ir->op1 == ir->op2)
3167 as->stopins++; 3250 as->stopins++;
3168 break; 3251 break;
3252 case IR_CALLN: case IR_CALLL: case IR_CALLS: {
3253 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
3254 /* NYI: not fastcall-aware, but doesn't matter (yet). */
3255 if (CCI_NARGS(ci) > (uint32_t)as->evenspill) /* Leave room for args. */
3256 as->evenspill = (int32_t)CCI_NARGS(ci);
3257#if LJ_64
3258 ir->prev = REGSP_HINT(irt_isnum(ir->t) ? RID_FPRET : RID_RET);
3259#else
3260 ir->prev = REGSP_HINT(RID_RET);
3261#endif
3262 if (inloop)
3263 as->modset |= (ci->flags & CCI_NOFPRCLOBBER) ?
3264 (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH;
3265 continue;
3266 }
3169 /* C calls evict all scratch regs and return results in RID_RET. */ 3267 /* C calls evict all scratch regs and return results in RID_RET. */
3170 case IR_SNEW: case IR_TNEW: case IR_TDUP: case IR_TLEN: case IR_TOSTR: 3268 case IR_SNEW: case IR_TNEW: case IR_TDUP: case IR_TOSTR:
3171 case IR_NEWREF: 3269 case IR_NEWREF:
3172 ir->prev = REGSP_HINT(RID_RET); 3270 ir->prev = REGSP_HINT(RID_RET);
3173 if (inloop) 3271 if (inloop)
@@ -3177,11 +3275,6 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
3177 if (inloop) 3275 if (inloop)
3178 as->modset = RSET_SCRATCH; 3276 as->modset = RSET_SCRATCH;
3179 break; 3277 break;
3180 /* Ordered string compares evict all integer scratch registers. */
3181 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
3182 if (irt_isstr(ir->t) && inloop)
3183 as->modset |= (RSET_SCRATCH & RSET_GPR);
3184 break;
3185 /* Non-constant shift counts need to be in RID_ECX. */ 3278 /* Non-constant shift counts need to be in RID_ECX. */
3186 case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: 3279 case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR:
3187 if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) 3280 if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r))
@@ -3200,6 +3293,10 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
3200 } 3293 }
3201 ir->prev = REGSP_INIT; 3294 ir->prev = REGSP_INIT;
3202 } 3295 }
3296 if ((as->evenspill & 1))
3297 as->oddspill = as->evenspill++;
3298 else
3299 as->oddspill = 0;
3203} 3300}
3204 3301
3205/* -- Assembler core ------------------------------------------------------ */ 3302/* -- Assembler core ------------------------------------------------------ */
@@ -3263,7 +3360,6 @@ void lj_asm_trace(jit_State *J, Trace *T)
3263 as->fuseref = (as->flags & JIT_F_OPT_FUSE) ? as->loopref : FUSE_DISABLED; 3360 as->fuseref = (as->flags & JIT_F_OPT_FUSE) ? as->loopref : FUSE_DISABLED;
3264 3361
3265 /* Setup register allocation. */ 3362 /* Setup register allocation. */
3266 ra_setup(as);
3267 asm_setup_regsp(as, T); 3363 asm_setup_regsp(as, T);
3268 3364
3269 if (!as->loopref) { 3365 if (!as->loopref) {
diff --git a/src/lj_def.h b/src/lj_def.h
index dbfd5bf5..3d6ba417 100644
--- a/src/lj_def.h
+++ b/src/lj_def.h
@@ -88,6 +88,7 @@ typedef unsigned __int32 uintptr_t;
88#define checki8(x) ((x) == (int32_t)(int8_t)(x)) 88#define checki8(x) ((x) == (int32_t)(int8_t)(x))
89#define checku8(x) ((x) == (int32_t)(uint8_t)(x)) 89#define checku8(x) ((x) == (int32_t)(uint8_t)(x))
90#define checki16(x) ((x) == (int32_t)(int16_t)(x)) 90#define checki16(x) ((x) == (int32_t)(int16_t)(x))
91#define checku16(x) ((x) == (int32_t)(uint16_t)(x))
91 92
92/* Every half-decent C compiler transforms this into a rotate instruction. */ 93/* Every half-decent C compiler transforms this into a rotate instruction. */
93#define lj_rol(x, n) (((x)<<(n)) | ((x)>>(32-(n)))) 94#define lj_rol(x, n) (((x)<<(n)) | ((x)>>(32-(n))))
diff --git a/src/lj_gc.c b/src/lj_gc.c
index 0d8a03ec..5c9d2bcb 100644
--- a/src/lj_gc.c
+++ b/src/lj_gc.c
@@ -73,13 +73,13 @@ static void gc_mark(global_State *g, GCobj *o)
73 } 73 }
74} 74}
75 75
76/* Mark the base metatables. */ 76/* Mark GC roots. */
77static void gc_mark_basemt(global_State *g) 77static void gc_mark_gcroot(global_State *g)
78{ 78{
79 int i; 79 ptrdiff_t i;
80 for (i = 0; i < BASEMT_MAX; i++) 80 for (i = 0; i < GCROOT__MAX; i++)
81 if (tabref(g->basemt[i]) != NULL) 81 if (gcref(g->gcroot[i]) != NULL)
82 gc_markobj(g, tabref(g->basemt[i])); 82 gc_markobj(g, gcref(g->gcroot[i]));
83} 83}
84 84
85/* Start a GC cycle and mark the root set. */ 85/* Start a GC cycle and mark the root set. */
@@ -91,7 +91,7 @@ static void gc_mark_start(global_State *g)
91 gc_markobj(g, mainthread(g)); 91 gc_markobj(g, mainthread(g));
92 gc_markobj(g, tabref(mainthread(g)->env)); 92 gc_markobj(g, tabref(mainthread(g)->env));
93 gc_marktv(g, &g->registrytv); 93 gc_marktv(g, &g->registrytv);
94 gc_mark_basemt(g); 94 gc_mark_gcroot(g);
95 g->gc.state = GCSpropagate; 95 g->gc.state = GCSpropagate;
96} 96}
97 97
@@ -541,7 +541,7 @@ static void atomic(global_State *g, lua_State *L)
541 lua_assert(!iswhite(obj2gco(mainthread(g)))); 541 lua_assert(!iswhite(obj2gco(mainthread(g))));
542 gc_markobj(g, L); /* Mark running thread. */ 542 gc_markobj(g, L); /* Mark running thread. */
543 gc_mark_curtrace(g); /* Mark current trace. */ 543 gc_mark_curtrace(g); /* Mark current trace. */
544 gc_mark_basemt(g); /* Mark base metatables (again). */ 544 gc_mark_gcroot(g); /* Mark GC roots (again). */
545 gc_propagate_gray(g); /* Propagate all of the above. */ 545 gc_propagate_gray(g); /* Propagate all of the above. */
546 546
547 setgcrefr(g->gc.gray, g->gc.grayagain); /* Empty the 2nd chance list. */ 547 setgcrefr(g->gc.gray, g->gc.grayagain); /* Empty the 2nd chance list. */
@@ -643,16 +643,15 @@ int lj_gc_step(lua_State *L)
643} 643}
644 644
645/* Ditto, but fix the stack top first. */ 645/* Ditto, but fix the stack top first. */
646void lj_gc_step_fixtop(lua_State *L) 646void LJ_FASTCALL lj_gc_step_fixtop(lua_State *L)
647{ 647{
648 if (curr_funcisL(L)) L->top = curr_topL(L); 648 if (curr_funcisL(L)) L->top = curr_topL(L);
649 lj_gc_step(L); 649 lj_gc_step(L);
650} 650}
651 651
652/* Perform multiple GC steps. Called from JIT-compiled code. */ 652/* Perform multiple GC steps. Called from JIT-compiled code. */
653void lj_gc_step_jit(lua_State *L, const BCIns *pc, MSize steps) 653void LJ_FASTCALL lj_gc_step_jit(lua_State *L, MSize steps)
654{ 654{
655 cframe_pc(cframe_raw(L->cframe)) = pc;
656 L->top = curr_topL(L); 655 L->top = curr_topL(L);
657 while (steps-- > 0 && lj_gc_step(L) == 0) 656 while (steps-- > 0 && lj_gc_step(L) == 0)
658 ; 657 ;
@@ -711,17 +710,16 @@ void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v)
711 makewhite(g, o); /* Make it white to avoid the following barrier. */ 710 makewhite(g, o); /* Make it white to avoid the following barrier. */
712} 711}
713 712
714/* The reason for duplicating this is that it needs to be visible from ASM. */ 713/* Specialized barrier for closed upvalue. Pass &uv->tv. */
715void lj_gc_barrieruv(global_State *g, GCobj *o, GCobj *v) 714void LJ_FASTCALL lj_gc_barrieruv(global_State *g, TValue *tv)
716{ 715{
717 lua_assert(isblack(o) && iswhite(v) && !isdead(g, v) && !isdead(g, o)); 716#define TV2MARKED(x) \
718 lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause); 717 (*((uint8_t *)(x) - offsetof(GCupval, tv) + offsetof(GCupval, marked)))
719 lua_assert(o->gch.gct == ~LJ_TUPVAL);
720 /* Preserve invariant during propagation. Otherwise it doesn't matter. */
721 if (g->gc.state == GCSpropagate) 718 if (g->gc.state == GCSpropagate)
722 gc_mark(g, v); /* Move frontier forward. */ 719 gc_mark(g, gcV(tv));
723 else 720 else
724 makewhite(g, o); /* Make it white to avoid the following barrier. */ 721 TV2MARKED(tv) = (TV2MARKED(tv) & cast_byte(~LJ_GC_COLORS)) | curwhite(g);
722#undef TV2MARKED
725} 723}
726 724
727/* Close upvalue. Also needs a write barrier. */ 725/* Close upvalue. Also needs a write barrier. */
diff --git a/src/lj_gc.h b/src/lj_gc.h
index 192066d3..0dbb9b82 100644
--- a/src/lj_gc.h
+++ b/src/lj_gc.h
@@ -43,8 +43,8 @@ LJ_FUNC size_t lj_gc_separateudata(global_State *g, int all);
43LJ_FUNC void lj_gc_finalizeudata(lua_State *L); 43LJ_FUNC void lj_gc_finalizeudata(lua_State *L);
44LJ_FUNC void lj_gc_freeall(global_State *g); 44LJ_FUNC void lj_gc_freeall(global_State *g);
45LJ_FUNCA int lj_gc_step(lua_State *L); 45LJ_FUNCA int lj_gc_step(lua_State *L);
46LJ_FUNCA void lj_gc_step_fixtop(lua_State *L); 46LJ_FUNCA void LJ_FASTCALL lj_gc_step_fixtop(lua_State *L);
47LJ_FUNCA void lj_gc_step_jit(lua_State *L, const BCIns *pc, MSize steps); 47LJ_FUNC void LJ_FASTCALL lj_gc_step_jit(lua_State *L, MSize steps);
48LJ_FUNC void lj_gc_fullgc(lua_State *L); 48LJ_FUNC void lj_gc_fullgc(lua_State *L);
49 49
50/* GC check: drive collector forward if the GC threshold has been reached. */ 50/* GC check: drive collector forward if the GC threshold has been reached. */
@@ -58,7 +58,7 @@ LJ_FUNC void lj_gc_fullgc(lua_State *L);
58/* Write barriers. */ 58/* Write barriers. */
59LJ_FUNC void lj_gc_barrierback(global_State *g, GCtab *t); 59LJ_FUNC void lj_gc_barrierback(global_State *g, GCtab *t);
60LJ_FUNC void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v); 60LJ_FUNC void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v);
61LJ_FUNCA void lj_gc_barrieruv(global_State *g, GCobj *o, GCobj *v); 61LJ_FUNCA void LJ_FASTCALL lj_gc_barrieruv(global_State *g, TValue *tv);
62LJ_FUNC void lj_gc_closeuv(global_State *g, GCupval *uv); 62LJ_FUNC void lj_gc_closeuv(global_State *g, GCupval *uv);
63LJ_FUNC void lj_gc_barriertrace(global_State *g, void *T); 63LJ_FUNC void lj_gc_barriertrace(global_State *g, void *T);
64 64
diff --git a/src/lj_ir.c b/src/lj_ir.c
index 1efb12f0..cf0b6b55 100644
--- a/src/lj_ir.c
+++ b/src/lj_ir.c
@@ -6,16 +6,22 @@
6#define lj_ir_c 6#define lj_ir_c
7#define LUA_CORE 7#define LUA_CORE
8 8
9/* For pointers to libc/libm functions. */
10#include <stdio.h>
11#include <math.h>
12
9#include "lj_obj.h" 13#include "lj_obj.h"
10 14
11#if LJ_HASJIT 15#if LJ_HASJIT
12 16
13#include "lj_gc.h" 17#include "lj_gc.h"
14#include "lj_str.h" 18#include "lj_str.h"
19#include "lj_tab.h"
15#include "lj_ir.h" 20#include "lj_ir.h"
16#include "lj_jit.h" 21#include "lj_jit.h"
17#include "lj_iropt.h" 22#include "lj_iropt.h"
18#include "lj_trace.h" 23#include "lj_trace.h"
24#include "lj_lib.h"
19 25
20/* Some local macros to save typing. Undef'd at the end. */ 26/* Some local macros to save typing. Undef'd at the end. */
21#define IR(ref) (&J->cur.ir[(ref)]) 27#define IR(ref) (&J->cur.ir[(ref)])
@@ -32,6 +38,17 @@ IRDEF(IRMODE)
32 0 38 0
33}; 39};
34 40
41/* C call info for CALL* instructions. */
42LJ_DATADEF const CCallInfo lj_ir_callinfo[] = {
43#define IRCALLCI(name, nargs, kind, type, flags) \
44 { (ASMFunction)name, \
45 (nargs)|(CCI_CALL_##kind)|(IRT_##type<<CCI_OTSHIFT)|(flags) },
46IRCALLDEF(IRCALLCI)
47#undef IRCALLCI
48 { NULL, 0 }
49};
50
51
35/* -- IR emitter ---------------------------------------------------------- */ 52/* -- IR emitter ---------------------------------------------------------- */
36 53
37/* Grow IR buffer at the top. */ 54/* Grow IR buffer at the top. */
@@ -92,6 +109,25 @@ TRef LJ_FASTCALL lj_ir_emit(jit_State *J)
92 return TREF(ref, irt_t((ir->t = fins->t))); 109 return TREF(ref, irt_t((ir->t = fins->t)));
93} 110}
94 111
112/* Emit call to a C function. */
113TRef lj_ir_call(jit_State *J, IRCallID id, ...)
114{
115 const CCallInfo *ci = &lj_ir_callinfo[id];
116 uint32_t n = CCI_NARGS(ci);
117 TRef tr = TREF_NIL;
118 va_list argp;
119 va_start(argp, id);
120 if ((ci->flags & CCI_L)) n--;
121 if (n > 0)
122 tr = va_arg(argp, IRRef);
123 while (n-- > 1)
124 tr = emitir(IRT(IR_CARG, IRT_NIL), tr, va_arg(argp, IRRef));
125 va_end(argp);
126 if (CCI_OP(ci) == IR_CALLS)
127 J->needsnap = 1; /* Need snapshot after call with side effect. */
128 return emitir(CCI_OPTYPE(ci), tr, id);
129}
130
95/* -- Interning of constants ---------------------------------------------- */ 131/* -- Interning of constants ---------------------------------------------- */
96 132
97/* 133/*
diff --git a/src/lj_ir.h b/src/lj_ir.h
index a6973a81..9a7e711d 100644
--- a/src/lj_ir.h
+++ b/src/lj_ir.h
@@ -8,6 +8,8 @@
8 8
9#include "lj_obj.h" 9#include "lj_obj.h"
10 10
11/* -- IR instructions ----------------------------------------------------- */
12
11/* IR instruction definition. Order matters, see below. */ 13/* IR instruction definition. Order matters, see below. */
12#define IRDEF(_) \ 14#define IRDEF(_) \
13 /* Miscellaneous ops. */ \ 15 /* Miscellaneous ops. */ \
@@ -101,13 +103,12 @@
101 _(USTORE, S , ref, ref) \ 103 _(USTORE, S , ref, ref) \
102 _(FSTORE, S , ref, ref) \ 104 _(FSTORE, S , ref, ref) \
103 \ 105 \
104 /* String ops. */ \ 106 /* Allocations. */ \
105 _(SNEW, N , ref, ref) \ 107 _(SNEW, N , ref, ref) /* CSE is ok, so not marked as A. */ \
106 \
107 /* Table ops. */ \
108 _(TNEW, A , lit, lit) \ 108 _(TNEW, A , lit, lit) \
109 _(TDUP, A , ref, ___) \ 109 _(TDUP, A , ref, ___) \
110 _(TLEN, L , ref, ___) \ 110 \
111 /* Write barriers. */ \
111 _(TBAR, S , ref, ___) \ 112 _(TBAR, S , ref, ___) \
112 _(OBAR, S , ref, ref) \ 113 _(OBAR, S , ref, ref) \
113 \ 114 \
@@ -118,6 +119,12 @@
118 _(TOSTR, N , ref, ___) \ 119 _(TOSTR, N , ref, ___) \
119 _(STRTO, G , ref, ___) \ 120 _(STRTO, G , ref, ___) \
120 \ 121 \
122 /* Calls. */ \
123 _(CALLN, N , ref, lit) \
124 _(CALLL, L , ref, lit) \
125 _(CALLS, S , ref, lit) \
126 _(CARG, N , ref, ref) \
127 \
121 /* End of list. */ 128 /* End of list. */
122 129
123/* IR opcodes (max. 256). */ 130/* IR opcodes (max. 256). */
@@ -144,6 +151,8 @@ LJ_STATIC_ASSERT((int)IR_HLOAD + IRDELTA_L2S == (int)IR_HSTORE);
144LJ_STATIC_ASSERT((int)IR_ULOAD + IRDELTA_L2S == (int)IR_USTORE); 151LJ_STATIC_ASSERT((int)IR_ULOAD + IRDELTA_L2S == (int)IR_USTORE);
145LJ_STATIC_ASSERT((int)IR_FLOAD + IRDELTA_L2S == (int)IR_FSTORE); 152LJ_STATIC_ASSERT((int)IR_FLOAD + IRDELTA_L2S == (int)IR_FSTORE);
146 153
154/* -- Named IR literals --------------------------------------------------- */
155
147/* FPMATH sub-functions. ORDER FPM. */ 156/* FPMATH sub-functions. ORDER FPM. */
148#define IRFPMDEF(_) \ 157#define IRFPMDEF(_) \
149 _(FLOOR) _(CEIL) _(TRUNC) /* Must be first and in this order. */ \ 158 _(FLOOR) _(CEIL) _(TRUNC) /* Must be first and in this order. */ \
@@ -158,20 +167,22 @@ IRFPMDEF(FPMENUM)
158 IRFPM__MAX 167 IRFPM__MAX
159} IRFPMathOp; 168} IRFPMathOp;
160 169
161/* FLOAD field IDs. */ 170/* FLOAD fields. */
162#define IRFLDEF(_) \ 171#define IRFLDEF(_) \
163 _(STR_LEN, GCstr, len) \ 172 _(STR_LEN, offsetof(GCstr, len)) \
164 _(FUNC_ENV, GCfunc, l.env) \ 173 _(FUNC_ENV, offsetof(GCfunc, l.env)) \
165 _(TAB_META, GCtab, metatable) \ 174 _(TAB_META, offsetof(GCtab, metatable)) \
166 _(TAB_ARRAY, GCtab, array) \ 175 _(TAB_ARRAY, offsetof(GCtab, array)) \
167 _(TAB_NODE, GCtab, node) \ 176 _(TAB_NODE, offsetof(GCtab, node)) \
168 _(TAB_ASIZE, GCtab, asize) \ 177 _(TAB_ASIZE, offsetof(GCtab, asize)) \
169 _(TAB_HMASK, GCtab, hmask) \ 178 _(TAB_HMASK, offsetof(GCtab, hmask)) \
170 _(TAB_NOMM, GCtab, nomm) \ 179 _(TAB_NOMM, offsetof(GCtab, nomm)) \
171 _(UDATA_META, GCudata, metatable) 180 _(UDATA_META, offsetof(GCudata, metatable)) \
181 _(UDATA_UDTYPE, offsetof(GCudata, udtype)) \
182 _(UDATA_FILE, sizeof(GCudata))
172 183
173typedef enum { 184typedef enum {
174#define FLENUM(name, type, field) IRFL_##name, 185#define FLENUM(name, ofs) IRFL_##name,
175IRFLDEF(FLENUM) 186IRFLDEF(FLENUM)
176#undef FLENUM 187#undef FLENUM
177 IRFL__MAX 188 IRFL__MAX
@@ -183,7 +194,8 @@ IRFLDEF(FLENUM)
183#define IRSLOAD_PARENT 4 /* Coalesce with parent trace. */ 194#define IRSLOAD_PARENT 4 /* Coalesce with parent trace. */
184 195
185/* XLOAD mode, stored in op2. */ 196/* XLOAD mode, stored in op2. */
186#define IRXLOAD_UNALIGNED 1 197#define IRXLOAD_READONLY 1 /* Load from read-only data. */
198#define IRXLOAD_UNALIGNED 2 /* Unaligned load. */
187 199
188/* TOINT mode, stored in op2. Ordered by strength of the checks. */ 200/* TOINT mode, stored in op2. Ordered by strength of the checks. */
189#define IRTOINT_CHECK 0 /* Number checked for integerness. */ 201#define IRTOINT_CHECK 0 /* Number checked for integerness. */
@@ -191,6 +203,67 @@ IRFLDEF(FLENUM)
191#define IRTOINT_ANY 2 /* Any FP number is ok. */ 203#define IRTOINT_ANY 2 /* Any FP number is ok. */
192#define IRTOINT_TOBIT 3 /* Cache only: TOBIT conversion. */ 204#define IRTOINT_TOBIT 3 /* Cache only: TOBIT conversion. */
193 205
206/* C call info for CALL* instructions. */
207typedef struct CCallInfo {
208 ASMFunction func; /* Function pointer. */
209 uint32_t flags; /* Number of arguments and flags. */
210} CCallInfo;
211
212#define CCI_NARGS(ci) ((ci)->flags & 0xff) /* Extract # of args. */
213#define CCI_NARGS_MAX 16 /* Max. # of args. */
214
215#define CCI_OTSHIFT 16
216#define CCI_OPTYPE(ci) ((ci)->flags >> CCI_OTSHIFT) /* Get op/type. */
217#define CCI_OPSHIFT 24
218#define CCI_OP(ci) ((ci)->flags >> CCI_OPSHIFT) /* Get op. */
219
220#define CCI_CALL_N (IR_CALLN << CCI_OPSHIFT)
221#define CCI_CALL_L (IR_CALLL << CCI_OPSHIFT)
222#define CCI_CALL_S (IR_CALLS << CCI_OPSHIFT)
223#define CCI_CALL_FN (CCI_CALL_N|CCI_FASTCALL)
224#define CCI_CALL_FL (CCI_CALL_L|CCI_FASTCALL)
225#define CCI_CALL_FS (CCI_CALL_S|CCI_FASTCALL)
226
227/* C call info flags. */
228#define CCI_L 0x0100 /* Implicit L arg. */
229#define CCI_CASTU64 0x0200 /* Cast u64 result to number. */
230#define CCI_NOFPRCLOBBER 0x0400 /* Does not clobber any FPRs. */
231#define CCI_FASTCALL 0x0800 /* Fastcall convention. */
232
233/* Function definitions for CALL* instructions. */
234#define IRCALLDEF(_) \
235 _(lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \
236 _(lj_str_new, 3, S, STR, CCI_L) \
237 _(lj_str_tonum, 2, FN, INT, 0) \
238 _(lj_str_fromint, 2, FN, STR, CCI_L) \
239 _(lj_str_fromnum, 2, FN, STR, CCI_L) \
240 _(lj_tab_new1, 2, FS, TAB, CCI_L) \
241 _(lj_tab_dup, 2, FS, TAB, CCI_L) \
242 _(lj_tab_newkey, 3, S, PTR, CCI_L) \
243 _(lj_tab_len, 1, FL, INT, 0) \
244 _(lj_gc_step_jit, 2, FS, NIL, CCI_L) \
245 _(lj_gc_barrieruv, 2, FS, NIL, 0) \
246 _(lj_math_random_step, 1, FS, NUM, CCI_CASTU64|CCI_NOFPRCLOBBER) \
247 _(sinh, 1, N, NUM, 0) \
248 _(cosh, 1, N, NUM, 0) \
249 _(tanh, 1, N, NUM, 0) \
250 _(fputc, 2, S, INT, 0) \
251 _(fwrite, 4, S, INT, 0) \
252 _(fflush, 1, S, INT, 0) \
253 \
254 /* End of list. */
255
256typedef enum {
257#define IRCALLENUM(name, nargs, kind, type, flags) IRCALL_##name,
258IRCALLDEF(IRCALLENUM)
259#undef IRCALLENUM
260 IRCALL__MAX
261} IRCallID;
262
263LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1];
264
265/* -- IR operands --------------------------------------------------------- */
266
194/* IR operand mode (2 bit). */ 267/* IR operand mode (2 bit). */
195typedef enum { 268typedef enum {
196 IRMref, /* IR reference. */ 269 IRMref, /* IR reference. */
@@ -227,6 +300,8 @@ typedef enum {
227 300
228LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1]; 301LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1];
229 302
303/* -- IR instruction types ------------------------------------------------ */
304
230/* IR result type and flags (8 bit). */ 305/* IR result type and flags (8 bit). */
231typedef enum { 306typedef enum {
232 /* Map of itypes to non-negative numbers. ORDER LJ_T */ 307 /* Map of itypes to non-negative numbers. ORDER LJ_T */
@@ -314,6 +389,8 @@ typedef struct IRType1 { uint8_t irt; } IRType1;
314/* Stored combined IR opcode and type. */ 389/* Stored combined IR opcode and type. */
315typedef uint16_t IROpT; 390typedef uint16_t IROpT;
316 391
392/* -- IR references ------------------------------------------------------- */
393
317/* IR references. */ 394/* IR references. */
318typedef uint16_t IRRef1; /* One stored reference. */ 395typedef uint16_t IRRef1; /* One stored reference. */
319typedef uint32_t IRRef2; /* Two stored references. */ 396typedef uint32_t IRRef2; /* Two stored references. */
@@ -382,6 +459,8 @@ typedef uint32_t TRef;
382#define TREF_FALSE (TREF_PRI(IRT_FALSE)) 459#define TREF_FALSE (TREF_PRI(IRT_FALSE))
383#define TREF_TRUE (TREF_PRI(IRT_TRUE)) 460#define TREF_TRUE (TREF_PRI(IRT_TRUE))
384 461
462/* -- IR format ----------------------------------------------------------- */
463
385/* IR instruction format (64 bit). 464/* IR instruction format (64 bit).
386** 465**
387** 16 16 8 8 8 8 466** 16 16 8 8 8 8
@@ -425,5 +504,6 @@ typedef union IRIns {
425#define ir_ktab(ir) (gco2tab(ir_kgc((ir)))) 504#define ir_ktab(ir) (gco2tab(ir_kgc((ir))))
426#define ir_kfunc(ir) (gco2func(ir_kgc((ir)))) 505#define ir_kfunc(ir) (gco2func(ir_kgc((ir))))
427#define ir_knum(ir) (mref((ir)->ptr, cTValue)) 506#define ir_knum(ir) (mref((ir)->ptr, cTValue))
507#define ir_kptr(ir) (mref((ir)->ptr, void))
428 508
429#endif 509#endif
diff --git a/src/lj_iropt.h b/src/lj_iropt.h
index 69b0a955..52077ad5 100644
--- a/src/lj_iropt.h
+++ b/src/lj_iropt.h
@@ -6,6 +6,8 @@
6#ifndef _LJ_IROPT_H 6#ifndef _LJ_IROPT_H
7#define _LJ_IROPT_H 7#define _LJ_IROPT_H
8 8
9#include <stdarg.h>
10
9#include "lj_obj.h" 11#include "lj_obj.h"
10#include "lj_jit.h" 12#include "lj_jit.h"
11 13
@@ -13,6 +15,7 @@
13/* IR emitter. */ 15/* IR emitter. */
14LJ_FUNC void LJ_FASTCALL lj_ir_growtop(jit_State *J); 16LJ_FUNC void LJ_FASTCALL lj_ir_growtop(jit_State *J);
15LJ_FUNC TRef LJ_FASTCALL lj_ir_emit(jit_State *J); 17LJ_FUNC TRef LJ_FASTCALL lj_ir_emit(jit_State *J);
18LJ_FUNC TRef lj_ir_call(jit_State *J, IRCallID id, ...);
16 19
17/* Save current IR in J->fold.ins, but do not emit it (yet). */ 20/* Save current IR in J->fold.ins, but do not emit it (yet). */
18static LJ_AINLINE void lj_ir_set_(jit_State *J, uint16_t ot, IRRef1 a, IRRef1 b) 21static LJ_AINLINE void lj_ir_set_(jit_State *J, uint16_t ot, IRRef1 a, IRRef1 b)
@@ -83,6 +86,7 @@ LJ_FUNC void lj_ir_rollback(jit_State *J, IRRef ref);
83/* Emit IR instructions with on-the-fly optimizations. */ 86/* Emit IR instructions with on-the-fly optimizations. */
84LJ_FUNC TRef LJ_FASTCALL lj_opt_fold(jit_State *J); 87LJ_FUNC TRef LJ_FASTCALL lj_opt_fold(jit_State *J);
85LJ_FUNC TRef LJ_FASTCALL lj_opt_cse(jit_State *J); 88LJ_FUNC TRef LJ_FASTCALL lj_opt_cse(jit_State *J);
89LJ_FUNC TRef LJ_FASTCALL lj_opt_cselim(jit_State *J, IRRef lim);
86 90
87/* Special return values for the fold functions. */ 91/* Special return values for the fold functions. */
88enum { 92enum {
@@ -106,7 +110,7 @@ LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_aload(jit_State *J);
106LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hload(jit_State *J); 110LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hload(jit_State *J);
107LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J); 111LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J);
108LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J); 112LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J);
109LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_tlen(jit_State *J); 113LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J);
110LJ_FUNC int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref); 114LJ_FUNC int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref);
111 115
112/* Dead-store elimination. */ 116/* Dead-store elimination. */
diff --git a/src/lj_lib.c b/src/lj_lib.c
index 683c66d6..d8254093 100644
--- a/src/lj_lib.c
+++ b/src/lj_lib.c
@@ -152,7 +152,7 @@ lua_Number lj_lib_checknum(lua_State *L, int narg)
152{ 152{
153 TValue *o = L->base + narg-1; 153 TValue *o = L->base + narg-1;
154 if (!(o < L->top && 154 if (!(o < L->top &&
155 (tvisnum(o) || (tvisstr(o) && lj_str_numconv(strVdata(o), o))))) 155 (tvisnum(o) || (tvisstr(o) && lj_str_tonum(strV(o), o)))))
156 lj_err_argt(L, narg, LUA_TNUMBER); 156 lj_err_argt(L, narg, LUA_TNUMBER);
157 return numV(o); 157 return numV(o);
158} 158}
diff --git a/src/lj_lib.h b/src/lj_lib.h
index 59a0f2be..a7a6317e 100644
--- a/src/lj_lib.h
+++ b/src/lj_lib.h
@@ -90,4 +90,9 @@ LJ_FUNC void lj_lib_register(lua_State *L, const char *libname,
90#define LIBINIT_FFID 0xfe 90#define LIBINIT_FFID 0xfe
91#define LIBINIT_END 0xff 91#define LIBINIT_END 0xff
92 92
93/* Exported library functions. */
94
95typedef struct RandomState RandomState;
96LJ_FUNC uint64_t LJ_FASTCALL lj_math_random_step(RandomState *rs);
97
93#endif 98#endif
diff --git a/src/lj_meta.c b/src/lj_meta.c
index dff01f85..1182d908 100644
--- a/src/lj_meta.c
+++ b/src/lj_meta.c
@@ -60,7 +60,7 @@ cTValue *lj_meta_lookup(lua_State *L, cTValue *o, MMS mm)
60 else if (tvisudata(o)) 60 else if (tvisudata(o))
61 mt = tabref(udataV(o)->metatable); 61 mt = tabref(udataV(o)->metatable);
62 else 62 else
63 mt = tabref(G(L)->basemt[itypemap(o)]); 63 mt = tabref(basemt_obj(G(L), o));
64 if (mt) { 64 if (mt) {
65 cTValue *mo = lj_tab_getstr(mt, strref(G(L)->mmname[mm])); 65 cTValue *mo = lj_tab_getstr(mt, strref(G(L)->mmname[mm]));
66 if (mo) 66 if (mo)
@@ -157,7 +157,7 @@ static cTValue *str2num(cTValue *o, TValue *n)
157{ 157{
158 if (tvisnum(o)) 158 if (tvisnum(o))
159 return o; 159 return o;
160 else if (tvisstr(o) && lj_str_numconv(strVdata(o), n)) 160 else if (tvisstr(o) && lj_str_tonum(strV(o), n))
161 return n; 161 return n;
162 else 162 else
163 return NULL; 163 return NULL;
@@ -295,7 +295,7 @@ TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne)
295 top = curr_top(L); 295 top = curr_top(L);
296 setcont(top, ne ? lj_cont_condf : lj_cont_condt); 296 setcont(top, ne ? lj_cont_condf : lj_cont_condt);
297 copyTV(L, top+1, mo); 297 copyTV(L, top+1, mo);
298 it = o1->gch.gct == ~LJ_TTAB ? LJ_TTAB : LJ_TUDATA; 298 it = ~o1->gch.gct;
299 setgcV(L, top+2, &o1->gch, it); 299 setgcV(L, top+2, &o1->gch, it);
300 setgcV(L, top+3, &o2->gch, it); 300 setgcV(L, top+3, &o2->gch, it);
301 return top+2; /* Trigger metamethod call. */ 301 return top+2; /* Trigger metamethod call. */
diff --git a/src/lj_obj.h b/src/lj_obj.h
index 9101f053..cebeda9b 100644
--- a/src/lj_obj.h
+++ b/src/lj_obj.h
@@ -315,7 +315,7 @@ typedef struct GCstr {
315/* Userdata object. Payload follows. */ 315/* Userdata object. Payload follows. */
316typedef struct GCudata { 316typedef struct GCudata {
317 GCHeader; 317 GCHeader;
318 uint8_t unused1; 318 uint8_t udtype; /* Userdata type. */
319 uint8_t unused2; 319 uint8_t unused2;
320 GCRef env; /* Should be at same offset in GCfunc. */ 320 GCRef env; /* Should be at same offset in GCfunc. */
321 MSize len; /* Size of payload. */ 321 MSize len; /* Size of payload. */
@@ -323,6 +323,13 @@ typedef struct GCudata {
323 uint32_t align1; /* To force 8 byte alignment of the payload. */ 323 uint32_t align1; /* To force 8 byte alignment of the payload. */
324} GCudata; 324} GCudata;
325 325
326/* Userdata types. */
327enum {
328 UDTYPE_USERDATA, /* Regular userdata. */
329 UDTYPE_IO_FILE, /* I/O library FILE. */
330 UDTYPE__MAX
331};
332
326#define uddata(u) ((void *)((u)+1)) 333#define uddata(u) ((void *)((u)+1))
327#define sizeudata(u) (sizeof(struct GCudata)+(u)->len) 334#define sizeudata(u) (sizeof(struct GCudata)+(u)->len)
328 335
@@ -496,7 +503,17 @@ MMDEF(MMENUM)
496 MM_FAST = MM_eq 503 MM_FAST = MM_eq
497} MMS; 504} MMS;
498 505
499#define BASEMT_MAX ((~LJ_TNUMX)+1) 506/* GC root IDs. */
507typedef enum {
508 GCROOT_BASEMT, /* Metatables for base types. */
509 GCROOT_BASEMT_NUM = ~LJ_TNUMX, /* Last base metatable. */
510 GCROOT_IO_INPUT, /* Userdata for default I/O input file. */
511 GCROOT_IO_OUTPUT, /* Userdata for default I/O output file. */
512 GCROOT__MAX
513} GCRootID;
514
515#define basemt_it(g, it) ((g)->gcroot[GCROOT_BASEMT+~(it)])
516#define basemt_obj(g, o) ((g)->gcroot[GCROOT_BASEMT+itypemap(o)])
500 517
501typedef struct GCState { 518typedef struct GCState {
502 MSize total; /* Memory currently allocated. */ 519 MSize total; /* Memory currently allocated. */
@@ -544,7 +561,7 @@ typedef struct global_State {
544 volatile int32_t vmstate; /* VM state or current JIT code trace number. */ 561 volatile int32_t vmstate; /* VM state or current JIT code trace number. */
545 GCRef jit_L; /* Current JIT code lua_State or NULL. */ 562 GCRef jit_L; /* Current JIT code lua_State or NULL. */
546 MRef jit_base; /* Current JIT code L->base. */ 563 MRef jit_base; /* Current JIT code L->base. */
547 GCRef basemt[BASEMT_MAX]; /* Metatables for base types. */ 564 GCRef gcroot[GCROOT__MAX]; /* GC roots. */
548 GCRef mmname[MM_MAX]; /* Array holding metamethod names. */ 565 GCRef mmname[MM_MAX]; /* Array holding metamethod names. */
549} global_State; 566} global_State;
550 567
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
index 2102561d..98266d21 100644
--- a/src/lj_opt_fold.c
+++ b/src/lj_opt_fold.c
@@ -282,21 +282,50 @@ LJFOLD(STRTO KGC)
282LJFOLDF(kfold_strto) 282LJFOLDF(kfold_strto)
283{ 283{
284 TValue n; 284 TValue n;
285 if (lj_str_numconv(strdata(ir_kstr(fleft)), &n)) 285 if (lj_str_tonum(ir_kstr(fleft), &n))
286 return lj_ir_knum(J, numV(&n)); 286 return lj_ir_knum(J, numV(&n));
287 return FAILFOLD; 287 return FAILFOLD;
288} 288}
289 289
290LJFOLD(SNEW STRREF KINT) 290LJFOLD(SNEW KPTR KINT)
291LJFOLDF(kfold_snew) 291LJFOLDF(kfold_snew_kptr)
292{
293 GCstr *s = lj_str_new(J->L, (const char *)ir_kptr(fleft), (size_t)fright->i);
294 return lj_ir_kstr(J, s);
295}
296
297LJFOLD(SNEW any KINT)
298LJFOLDF(kfold_snew_empty)
292{ 299{
293 if (fright->i == 0) 300 if (fright->i == 0)
294 return lj_ir_kstr(J, lj_str_new(J->L, "", 0)); 301 return lj_ir_kstr(J, lj_str_new(J->L, "", 0));
302 return NEXTFOLD;
303}
304
305LJFOLD(STRREF KGC KINT)
306LJFOLDF(kfold_strref)
307{
308 GCstr *str = ir_kstr(fleft);
309 lua_assert((MSize)fright->i < str->len);
310 return lj_ir_kptr(J, (char *)strdata(str) + fright->i);
311}
312
313LJFOLD(STRREF SNEW any)
314LJFOLDF(kfold_strref_snew)
315{
295 PHIBARRIER(fleft); 316 PHIBARRIER(fleft);
296 if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) { 317 if (irref_isk(fins->op2) && fright->i == 0) {
297 const char *s = strdata(ir_kstr(IR(fleft->op1))); 318 return fleft->op1; /* strref(snew(ptr, len), 0) ==> ptr */
298 int32_t ofs = IR(fleft->op2)->i; 319 } else {
299 return lj_ir_kstr(J, lj_str_new(J->L, s+ofs, (size_t)fright->i)); 320 /* Reassociate: strref(snew(strref(str, a), len), b) ==> strref(str, a+b) */
321 IRIns *ir = IR(fleft->op1);
322 IRRef1 str = ir->op1; /* IRIns * is not valid across emitir. */
323 lua_assert(ir->o == IR_STRREF);
324 PHIBARRIER(ir);
325 fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */
326 fins->op1 = str;
327 fins->ot = IRT(IR_STRREF, IRT_PTR);
328 return RETRYFOLD;
300 } 329 }
301 return NEXTFOLD; 330 return NEXTFOLD;
302} 331}
@@ -343,16 +372,13 @@ LJFOLDF(kfold_intcomp)
343 } 372 }
344} 373}
345 374
346LJFOLD(LT KGC KGC) 375LJFOLD(CALLN CARG IRCALL_lj_str_cmp)
347LJFOLD(GE KGC KGC) 376LJFOLDF(kfold_strcmp)
348LJFOLD(LE KGC KGC)
349LJFOLD(GT KGC KGC)
350LJFOLDF(kfold_strcomp)
351{ 377{
352 if (irt_isstr(fins->t)) { 378 if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
353 GCstr *a = ir_kstr(fleft); 379 GCstr *a = ir_kstr(IR(fleft->op1));
354 GCstr *b = ir_kstr(fright); 380 GCstr *b = ir_kstr(IR(fleft->op2));
355 return CONDFOLD(lj_ir_strcmp(a, b, (IROp)fins->o)); 381 return INTFOLD(lj_str_cmp(a, b));
356 } 382 }
357 return NEXTFOLD; 383 return NEXTFOLD;
358} 384}
@@ -1070,7 +1096,8 @@ LJFOLDF(merge_eqne_snew_kgc)
1070 uint16_t ot = (uint16_t)(len == 1 ? IRT(IR_XLOAD, IRT_I8) : 1096 uint16_t ot = (uint16_t)(len == 1 ? IRT(IR_XLOAD, IRT_I8) :
1071 len == 2 ? IRT(IR_XLOAD, IRT_U16) : 1097 len == 2 ? IRT(IR_XLOAD, IRT_U16) :
1072 IRTI(IR_XLOAD)); 1098 IRTI(IR_XLOAD));
1073 TRef tmp = emitir(ot, strref, len > 1 ? IRXLOAD_UNALIGNED : 0); 1099 TRef tmp = emitir(ot, strref,
1100 IRXLOAD_READONLY | (len > 1 ? IRXLOAD_UNALIGNED : 0));
1074 TRef val = lj_ir_kint(J, kfold_xload(IR(tref_ref(tmp)), strdata(kstr))); 1101 TRef val = lj_ir_kint(J, kfold_xload(IR(tref_ref(tmp)), strdata(kstr)));
1075 if (len == 3) 1102 if (len == 3)
1076 tmp = emitir(IRTI(IR_BAND), tmp, 1103 tmp = emitir(IRTI(IR_BAND), tmp,
@@ -1103,8 +1130,8 @@ LJFOLDX(lj_opt_fwd_hload)
1103LJFOLD(ULOAD any) 1130LJFOLD(ULOAD any)
1104LJFOLDX(lj_opt_fwd_uload) 1131LJFOLDX(lj_opt_fwd_uload)
1105 1132
1106LJFOLD(TLEN any) 1133LJFOLD(CALLL any IRCALL_lj_tab_len)
1107LJFOLDX(lj_opt_fwd_tlen) 1134LJFOLDX(lj_opt_fwd_tab_len)
1108 1135
1109/* Upvalue refs are really loads, but there are no corresponding stores. 1136/* Upvalue refs are really loads, but there are no corresponding stores.
1110** So CSE is ok for them, except for UREFO across a GC step (see below). 1137** So CSE is ok for them, except for UREFO across a GC step (see below).
@@ -1194,13 +1221,23 @@ LJFOLDF(fload_tab_ah)
1194 1221
1195/* Strings are immutable, so we can safely FOLD/CSE the related FLOAD. */ 1222/* Strings are immutable, so we can safely FOLD/CSE the related FLOAD. */
1196LJFOLD(FLOAD KGC IRFL_STR_LEN) 1223LJFOLD(FLOAD KGC IRFL_STR_LEN)
1197LJFOLDF(fload_str_len) 1224LJFOLDF(fload_str_len_kgc)
1198{ 1225{
1199 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) 1226 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
1200 return INTFOLD((int32_t)ir_kstr(fleft)->len); 1227 return INTFOLD((int32_t)ir_kstr(fleft)->len);
1201 return NEXTFOLD; 1228 return NEXTFOLD;
1202} 1229}
1203 1230
1231LJFOLD(FLOAD SNEW IRFL_STR_LEN)
1232LJFOLDF(fload_str_len_snew)
1233{
1234 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
1235 PHIBARRIER(fleft);
1236 return fleft->op2;
1237 }
1238 return NEXTFOLD;
1239}
1240
1204LJFOLD(FLOAD any IRFL_STR_LEN) 1241LJFOLD(FLOAD any IRFL_STR_LEN)
1205LJFOLDX(lj_opt_cse) 1242LJFOLDX(lj_opt_cse)
1206 1243
@@ -1216,20 +1253,28 @@ LJFOLDF(fwd_sload)
1216 return J->slot[fins->op1]; 1253 return J->slot[fins->op1];
1217} 1254}
1218 1255
1219/* Strings are immutable, so we can safely FOLD/CSE an XLOAD of a string. */ 1256LJFOLD(XLOAD KPTR any)
1220LJFOLD(XLOAD STRREF any) 1257LJFOLDF(xload_kptr)
1221LJFOLDF(xload_str)
1222{ 1258{
1223 if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) { 1259 /* Only fold read-only integer loads for now. */
1224 GCstr *str = ir_kstr(IR(fleft->op1)); 1260 if ((fins->op2 & IRXLOAD_READONLY) && irt_isinteger(fins->t))
1225 int32_t ofs = IR(fleft->op2)->i; 1261 return INTFOLD(kfold_xload(fins, ir_kptr(fleft)));
1226 lua_assert((MSize)ofs < str->len); 1262 return NEXTFOLD;
1227 lua_assert((MSize)(ofs + (1<<((fins->op2>>8)&3))) <= str->len); 1263}
1228 return INTFOLD(kfold_xload(fins, strdata(str)+ofs)); 1264
1265/* CSE for XLOAD depends on the type, but not on the IRXLOAD_* flags. */
1266LJFOLD(XLOAD any any)
1267LJFOLDF(fwd_xload)
1268{
1269 IRRef ref = J->chain[IR_XLOAD];
1270 IRRef op1 = fins->op1;
1271 while (ref > op1) {
1272 if (IR(ref)->op1 == op1 && irt_sametype(IR(ref)->t, fins->t))
1273 return ref;
1274 ref = IR(ref)->prev;
1229 } 1275 }
1230 return CSEFOLD; 1276 return EMITFOLD;
1231} 1277}
1232/* No XLOAD of non-strings (yet), so we don't need a (XLOAD any any) rule. */
1233 1278
1234/* -- Write barriers ------------------------------------------------------ */ 1279/* -- Write barriers ------------------------------------------------------ */
1235 1280
@@ -1279,12 +1324,11 @@ LJFOLD(FSTORE any any)
1279LJFOLDX(lj_opt_dse_fstore) 1324LJFOLDX(lj_opt_dse_fstore)
1280 1325
1281LJFOLD(NEWREF any any) /* Treated like a store. */ 1326LJFOLD(NEWREF any any) /* Treated like a store. */
1327LJFOLD(CALLS any any)
1328LJFOLD(CALLL any any) /* Safeguard fallback. */
1282LJFOLD(TNEW any any) 1329LJFOLD(TNEW any any)
1283LJFOLD(TDUP any) 1330LJFOLD(TDUP any)
1284LJFOLDF(store_raw) 1331LJFOLDX(lj_ir_emit)
1285{
1286 return EMITFOLD;
1287}
1288 1332
1289/* ------------------------------------------------------------------------ */ 1333/* ------------------------------------------------------------------------ */
1290 1334
@@ -1402,6 +1446,19 @@ TRef LJ_FASTCALL lj_opt_cse(jit_State *J)
1402 } 1446 }
1403} 1447}
1404 1448
1449/* CSE with explicit search limit. */
1450TRef LJ_FASTCALL lj_opt_cselim(jit_State *J, IRRef lim)
1451{
1452 IRRef ref = J->chain[fins->o];
1453 IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
1454 while (ref > lim) {
1455 if (IR(ref)->op12 == op12)
1456 return ref;
1457 ref = IR(ref)->prev;
1458 }
1459 return lj_ir_emit(J);
1460}
1461
1405/* ------------------------------------------------------------------------ */ 1462/* ------------------------------------------------------------------------ */
1406 1463
1407#undef IR 1464#undef IR
diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c
index f9a2a808..90ab1b6f 100644
--- a/src/lj_opt_loop.c
+++ b/src/lj_opt_loop.c
@@ -310,7 +310,13 @@ static void loop_unroll(jit_State *J)
310/* Undo any partial changes made by the loop optimization. */ 310/* Undo any partial changes made by the loop optimization. */
311static void loop_undo(jit_State *J, IRRef ins) 311static void loop_undo(jit_State *J, IRRef ins)
312{ 312{
313 ptrdiff_t i;
313 lj_ir_rollback(J, ins); 314 lj_ir_rollback(J, ins);
315 for (i = 0; i < BPROP_SLOTS; i++) { /* Remove backprop. cache entries. */
316 BPropEntry *bp = &J->bpropcache[i];
317 if (bp->val >= ins)
318 bp->key = 0;
319 }
314 for (ins--; ins >= REF_FIRST; ins--) { /* Remove flags. */ 320 for (ins--; ins >= REF_FIRST; ins--) { /* Remove flags. */
315 IRIns *ir = IR(ins); 321 IRIns *ir = IR(ins);
316 irt_clearphi(ir->t); 322 irt_clearphi(ir->t);
diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c
index 94fc4ad8..882ba6c5 100644
--- a/src/lj_opt_mem.c
+++ b/src/lj_opt_mem.c
@@ -307,14 +307,7 @@ TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J)
307 307
308conflict: 308conflict:
309 /* Try to find a matching load. Below the conflicting store, if any. */ 309 /* Try to find a matching load. Below the conflicting store, if any. */
310 ref = J->chain[IR_ULOAD]; 310 return lj_opt_cselim(J, lim);
311 while (ref > lim) {
312 IRIns *load = IR(ref);
313 if (load->op1 == uref)
314 return ref; /* Load forwarding. */
315 ref = load->prev;
316 }
317 return EMITFOLD; /* Conflict or no match. */
318} 311}
319 312
320/* USTORE elimination. */ 313/* USTORE elimination. */
@@ -405,14 +398,7 @@ TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J)
405 398
406conflict: 399conflict:
407 /* Try to find a matching load. Below the conflicting store, if any. */ 400 /* Try to find a matching load. Below the conflicting store, if any. */
408 ref = J->chain[IR_FLOAD]; 401 return lj_opt_cselim(J, lim);
409 while (ref > lim) {
410 IRIns *load = IR(ref);
411 if (load->op1 == oref && load->op2 == fid)
412 return ref; /* Load forwarding. */
413 ref = load->prev;
414 }
415 return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
416} 402}
417 403
418/* FSTORE elimination. */ 404/* FSTORE elimination. */
@@ -458,10 +444,10 @@ doemit:
458 return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ 444 return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
459} 445}
460 446
461/* -- TLEN forwarding ----------------------------------------------------- */ 447/* -- Forwarding of lj_tab_len -------------------------------------------- */
462 448
463/* This is rather simplistic right now, but better than nothing. */ 449/* This is rather simplistic right now, but better than nothing. */
464TRef LJ_FASTCALL lj_opt_fwd_tlen(jit_State *J) 450TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J)
465{ 451{
466 IRRef tab = fins->op1; /* Table reference. */ 452 IRRef tab = fins->op1; /* Table reference. */
467 IRRef lim = tab; /* Search limit. */ 453 IRRef lim = tab; /* Search limit. */
@@ -484,14 +470,7 @@ TRef LJ_FASTCALL lj_opt_fwd_tlen(jit_State *J)
484 } 470 }
485 471
486 /* Try to find a matching load. Below the conflicting store, if any. */ 472 /* Try to find a matching load. Below the conflicting store, if any. */
487 ref = J->chain[IR_TLEN]; 473 return lj_opt_cselim(J, lim);
488 while (ref > lim) {
489 IRIns *tlen = IR(ref);
490 if (tlen->op1 == tab)
491 return ref; /* Load forwarding. */
492 ref = tlen->prev;
493 }
494 return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
495} 474}
496 475
497/* -- ASTORE/HSTORE previous type analysis -------------------------------- */ 476/* -- ASTORE/HSTORE previous type analysis -------------------------------- */
diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c
index 60a6afb8..b9107c5e 100644
--- a/src/lj_opt_narrow.c
+++ b/src/lj_opt_narrow.c
@@ -370,7 +370,7 @@ TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc)
370TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc) 370TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc)
371{ 371{
372 lua_Number n; 372 lua_Number n;
373 if (tvisstr(vc) && !lj_str_numconv(strVdata(vc), vc)) 373 if (tvisstr(vc) && !lj_str_tonum(strV(vc), vc))
374 lj_trace_err(J, LJ_TRERR_BADTYPE); 374 lj_trace_err(J, LJ_TRERR_BADTYPE);
375 n = numV(vc); 375 n = numV(vc);
376 /* Limit narrowing for pow to small exponents (or for two constants). */ 376 /* Limit narrowing for pow to small exponents (or for two constants). */
diff --git a/src/lj_parse.c b/src/lj_parse.c
index 000772fe..1de07e92 100644
--- a/src/lj_parse.c
+++ b/src/lj_parse.c
@@ -317,6 +317,7 @@ GCstr *lj_parse_keepstr(LexState *ls, const char *str, size_t len)
317 GCstr *s = lj_str_new(L, str, len); 317 GCstr *s = lj_str_new(L, str, len);
318 TValue *tv = lj_tab_setstr(L, ls->fs->kt, s); 318 TValue *tv = lj_tab_setstr(L, ls->fs->kt, s);
319 if (tvisnil(tv)) setboolV(tv, 1); /* Anchor string to avoid GC. */ 319 if (tvisnil(tv)) setboolV(tv, 1); /* Anchor string to avoid GC. */
320 lj_gc_check(L);
320 return s; 321 return s;
321} 322}
322 323
diff --git a/src/lj_record.c b/src/lj_record.c
index 68a233b9..9b223ff6 100644
--- a/src/lj_record.c
+++ b/src/lj_record.c
@@ -441,7 +441,7 @@ static int rec_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm)
441 mix.tab = emitir(IRT(IR_FLOAD, IRT_TAB), ix->tab, IRFL_UDATA_META); 441 mix.tab = emitir(IRT(IR_FLOAD, IRT_TAB), ix->tab, IRFL_UDATA_META);
442 } else { 442 } else {
443 /* Specialize to base metatable. Must flush mcode in lua_setmetatable(). */ 443 /* Specialize to base metatable. Must flush mcode in lua_setmetatable(). */
444 mt = tabref(J2G(J)->basemt[itypemap(&ix->tabv)]); 444 mt = tabref(basemt_obj(J2G(J), &ix->tabv));
445 if (mt == NULL) 445 if (mt == NULL)
446 return 0; /* No metamethod. */ 446 return 0; /* No metamethod. */
447 mix.tab = lj_ir_ktab(J, mt); 447 mix.tab = lj_ir_ktab(J, mt);
@@ -855,7 +855,7 @@ typedef void (*RecordFunc)(jit_State *J, TRef *res, RecordFFData *rd);
855/* Get runtime value of int argument. */ 855/* Get runtime value of int argument. */
856static int32_t argv2int(jit_State *J, TValue *o) 856static int32_t argv2int(jit_State *J, TValue *o)
857{ 857{
858 if (tvisstr(o) && !lj_str_numconv(strVdata(o), o)) 858 if (tvisstr(o) && !lj_str_tonum(strV(o), o))
859 lj_trace_err(J, LJ_TRERR_BADTYPE); 859 lj_trace_err(J, LJ_TRERR_BADTYPE);
860 return lj_num2bit(numV(o)); 860 return lj_num2bit(numV(o));
861} 861}
@@ -1017,6 +1017,8 @@ static void recff_tostring(jit_State *J, TRef *res, RecordFFData *rd)
1017 /* Otherwise res[0] already contains the result. */ 1017 /* Otherwise res[0] already contains the result. */
1018 } else if (tref_isnumber(tr)) { 1018 } else if (tref_isnumber(tr)) {
1019 res[0] = emitir(IRT(IR_TOSTR, IRT_STR), tr, 0); 1019 res[0] = emitir(IRT(IR_TOSTR, IRT_STR), tr, 0);
1020 } else if (tref_ispri(tr)) {
1021 res[0] = lj_ir_kstr(J, strV(&rd->fn->c.upvalue[tref_type(tr)]));
1020 } else { 1022 } else {
1021 recff_err_nyi(J, rd); 1023 recff_err_nyi(J, rd);
1022 } 1024 }
@@ -1165,10 +1167,16 @@ static void recff_math_atrig(jit_State *J, TRef *res, RecordFFData *rd)
1165 res[0] = emitir(IRTN(IR_ATAN2), y, x); 1167 res[0] = emitir(IRTN(IR_ATAN2), y, x);
1166} 1168}
1167 1169
1170static void recff_math_htrig(jit_State *J, TRef *res, RecordFFData *rd)
1171{
1172 TRef tr = lj_ir_tonum(J, arg[0]);
1173 res[0] = lj_ir_call(J, rd->data, tr);
1174}
1175
1168static void recff_math_modf(jit_State *J, TRef *res, RecordFFData *rd) 1176static void recff_math_modf(jit_State *J, TRef *res, RecordFFData *rd)
1169{ 1177{
1170 TRef tr = arg[0]; 1178 TRef tr = arg[0];
1171 if (tref_isinteger(arg[0])) { 1179 if (tref_isinteger(tr)) {
1172 res[0] = tr; 1180 res[0] = tr;
1173 res[1] = lj_ir_kint(J, 0); 1181 res[1] = lj_ir_kint(J, 0);
1174 } else { 1182 } else {
@@ -1187,9 +1195,10 @@ static void recff_math_degrad(jit_State *J, TRef *res, RecordFFData *rd)
1187 1195
1188static void recff_math_pow(jit_State *J, TRef *res, RecordFFData *rd) 1196static void recff_math_pow(jit_State *J, TRef *res, RecordFFData *rd)
1189{ 1197{
1198 TRef tr = lj_ir_tonum(J, arg[0]);
1190 if (!tref_isnumber_str(arg[1])) 1199 if (!tref_isnumber_str(arg[1]))
1191 lj_trace_err(J, LJ_TRERR_BADTYPE); 1200 lj_trace_err(J, LJ_TRERR_BADTYPE);
1192 res[0] = lj_opt_narrow_pow(J, lj_ir_tonum(J, arg[0]), arg[1], &rd->argv[1]); 1201 res[0] = lj_opt_narrow_pow(J, tr, arg[1], &rd->argv[1]);
1193 UNUSED(rd); 1202 UNUSED(rd);
1194} 1203}
1195 1204
@@ -1203,6 +1212,32 @@ static void recff_math_minmax(jit_State *J, TRef *res, RecordFFData *rd)
1203 res[0] = tr; 1212 res[0] = tr;
1204} 1213}
1205 1214
1215static void recff_math_random(jit_State *J, TRef *res, RecordFFData *rd)
1216{
1217 GCudata *ud = udataV(&rd->fn->c.upvalue[0]);
1218 TRef tr, one;
1219 lj_ir_kgc(J, obj2gco(ud), IRT_UDATA); /* Prevent collection. */
1220 tr = lj_ir_call(J, IRCALL_lj_math_random_step, lj_ir_kptr(J, uddata(ud)));
1221 one = lj_ir_knum_one(J);
1222 tr = emitir(IRTN(IR_SUB), tr, one);
1223 if (arg[0]) {
1224 TRef tr1 = lj_ir_tonum(J, arg[0]);
1225 if (arg[1]) { /* d = floor(d*(r2-r1+1.0)) + r1 */
1226 TRef tr2 = lj_ir_tonum(J, arg[1]);
1227 tr2 = emitir(IRTN(IR_SUB), tr2, tr1);
1228 tr2 = emitir(IRTN(IR_ADD), tr2, one);
1229 tr = emitir(IRTN(IR_MUL), tr, tr2);
1230 tr = emitir(IRTN(IR_FPMATH), tr, IRFPM_FLOOR);
1231 tr = emitir(IRTN(IR_ADD), tr, tr1);
1232 } else { /* d = floor(d*r1) + 1.0 */
1233 tr = emitir(IRTN(IR_MUL), tr, tr1);
1234 tr = emitir(IRTN(IR_FPMATH), tr, IRFPM_FLOOR);
1235 tr = emitir(IRTN(IR_ADD), tr, one);
1236 }
1237 }
1238 res[0] = tr;
1239}
1240
1206/* -- Bit library fast functions ------------------------------------------ */ 1241/* -- Bit library fast functions ------------------------------------------ */
1207 1242
1208/* Record unary bit.tobit, bit.bnot, bit.bswap. */ 1243/* Record unary bit.tobit, bit.bnot, bit.bswap. */
@@ -1321,7 +1356,7 @@ static void recff_string_range(jit_State *J, TRef *res, RecordFFData *rd)
1321 for (i = 0; i < len; i++) { 1356 for (i = 0; i < len; i++) {
1322 TRef tmp = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, i)); 1357 TRef tmp = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, i));
1323 tmp = emitir(IRT(IR_STRREF, IRT_PTR), trstr, tmp); 1358 tmp = emitir(IRT(IR_STRREF, IRT_PTR), trstr, tmp);
1324 res[i] = emitir(IRT(IR_XLOAD, IRT_U8), tmp, 0); 1359 res[i] = emitir(IRT(IR_XLOAD, IRT_U8), tmp, IRXLOAD_READONLY);
1325 } 1360 }
1326 } else { /* Empty range or range underflow: return no results. */ 1361 } else { /* Empty range or range underflow: return no results. */
1327 emitir(IRTGI(IR_LE), trend, trstart); 1362 emitir(IRTGI(IR_LE), trend, trstart);
@@ -1335,7 +1370,7 @@ static void recff_string_range(jit_State *J, TRef *res, RecordFFData *rd)
1335static void recff_table_getn(jit_State *J, TRef *res, RecordFFData *rd) 1370static void recff_table_getn(jit_State *J, TRef *res, RecordFFData *rd)
1336{ 1371{
1337 if (tref_istab(arg[0])) { 1372 if (tref_istab(arg[0])) {
1338 res[0] = emitir(IRTI(IR_TLEN), arg[0], 0); 1373 res[0] = lj_ir_call(J, IRCALL_lj_tab_len, arg[0]);
1339 } /* else: Interpreter will throw. */ 1374 } /* else: Interpreter will throw. */
1340 UNUSED(rd); 1375 UNUSED(rd);
1341} 1376}
@@ -1344,7 +1379,7 @@ static void recff_table_remove(jit_State *J, TRef *res, RecordFFData *rd)
1344{ 1379{
1345 if (tref_istab(arg[0])) { 1380 if (tref_istab(arg[0])) {
1346 if (!arg[1] || tref_isnil(arg[1])) { /* Simple pop: t[#t] = nil */ 1381 if (!arg[1] || tref_isnil(arg[1])) { /* Simple pop: t[#t] = nil */
1347 TRef trlen = emitir(IRTI(IR_TLEN), arg[0], 0); 1382 TRef trlen = lj_ir_call(J, IRCALL_lj_tab_len, arg[0]);
1348 GCtab *t = tabV(&rd->argv[0]); 1383 GCtab *t = tabV(&rd->argv[0]);
1349 MSize len = lj_tab_len(t); 1384 MSize len = lj_tab_len(t);
1350 emitir(IRTGI(len ? IR_NE : IR_EQ), trlen, lj_ir_kint(J, 0)); 1385 emitir(IRTGI(len ? IR_NE : IR_EQ), trlen, lj_ir_kint(J, 0));
@@ -1376,7 +1411,7 @@ static void recff_table_insert(jit_State *J, TRef *res, RecordFFData *rd)
1376 rd->nres = 0; 1411 rd->nres = 0;
1377 if (tref_istab(arg[0]) && arg[1]) { 1412 if (tref_istab(arg[0]) && arg[1]) {
1378 if (!arg[2]) { /* Simple push: t[#t+1] = v */ 1413 if (!arg[2]) { /* Simple push: t[#t+1] = v */
1379 TRef trlen = emitir(IRTI(IR_TLEN), arg[0], 0); 1414 TRef trlen = lj_ir_call(J, IRCALL_lj_tab_len, arg[0]);
1380 GCtab *t = tabV(&rd->argv[0]); 1415 GCtab *t = tabV(&rd->argv[0]);
1381 RecordIndex ix; 1416 RecordIndex ix;
1382 ix.tab = arg[0]; 1417 ix.tab = arg[0];
@@ -1392,6 +1427,62 @@ static void recff_table_insert(jit_State *J, TRef *res, RecordFFData *rd)
1392 } /* else: Interpreter will throw. */ 1427 } /* else: Interpreter will throw. */
1393} 1428}
1394 1429
1430/* -- I/O library fast functions ------------------------------------------ */
1431
1432/* Get FILE* for I/O function. Any I/O error aborts recording, so there's
1433** no need to encode the alternate cases for any of the guards.
1434*/
1435static TRef recff_io_fp(jit_State *J, TRef *res, uint32_t id)
1436{
1437 TRef tr, ud, fp;
1438 if (id) { /* io.func() */
1439 tr = lj_ir_kptr(J, &J2G(J)->gcroot[id]);
1440 ud = emitir(IRT(IR_XLOAD, IRT_UDATA), tr, 0);
1441 } else { /* fp:method() */
1442 ud = arg[0];
1443 if (!tref_isudata(ud))
1444 lj_trace_err(J, LJ_TRERR_BADTYPE);
1445 tr = emitir(IRT(IR_FLOAD, IRT_U8), ud, IRFL_UDATA_UDTYPE);
1446 emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, UDTYPE_IO_FILE));
1447 }
1448 fp = emitir(IRT(IR_FLOAD, IRT_LIGHTUD), ud, IRFL_UDATA_FILE);
1449 emitir(IRTG(IR_NE, IRT_LIGHTUD), fp, lj_ir_knull(J, IRT_LIGHTUD));
1450 return fp;
1451}
1452
1453static void recff_io_write(jit_State *J, TRef *res, RecordFFData *rd)
1454{
1455 TRef fp = recff_io_fp(J, res, rd->data);
1456 TRef zero = lj_ir_kint(J, 0);
1457 TRef one = lj_ir_kint(J, 1);
1458 ptrdiff_t i = rd->data == 0 ? 1 : 0;
1459 for (; arg[i]; i++) {
1460 TRef str = lj_ir_tostr(J, arg[i]);
1461 TRef buf = emitir(IRT(IR_STRREF, IRT_PTR), str, zero);
1462 TRef len = emitir(IRTI(IR_FLOAD), str, IRFL_STR_LEN);
1463 if (tref_isk(len) && IR(tref_ref(len))->i == 1) {
1464 TRef tr = emitir(IRT(IR_XLOAD, IRT_U8), buf, IRXLOAD_READONLY);
1465 tr = lj_ir_call(J, IRCALL_fputc, tr, fp);
1466 if (rd->cres != 0) /* Check result only if requested. */
1467 emitir(IRTGI(IR_NE), tr, lj_ir_kint(J, -1));
1468 } else {
1469 TRef tr = lj_ir_call(J, IRCALL_fwrite, buf, one, len, fp);
1470 if (rd->cres != 0) /* Check result only if requested. */
1471 emitir(IRTGI(IR_EQ), tr, len);
1472 }
1473 }
1474 res[0] = TREF_TRUE;
1475}
1476
1477static void recff_io_flush(jit_State *J, TRef *res, RecordFFData *rd)
1478{
1479 TRef fp = recff_io_fp(J, res, rd->data);
1480 TRef tr = lj_ir_call(J, IRCALL_fflush, fp);
1481 if (rd->cres != 0) /* Check result only if requested. */
1482 emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, 0));
1483 res[0] = TREF_TRUE;
1484}
1485
1395/* -- Record calls and returns -------------------------------------------- */ 1486/* -- Record calls and returns -------------------------------------------- */
1396 1487
1397#undef arg 1488#undef arg
@@ -1696,6 +1787,9 @@ void lj_record_ins(jit_State *J)
1696 if (!lj_ir_numcmp(numV(rav), numV(rcv), (IROp)irop)) irop ^= 1; 1787 if (!lj_ir_numcmp(numV(rav), numV(rcv), (IROp)irop)) irop ^= 1;
1697 } else if (ta == IRT_STR) { 1788 } else if (ta == IRT_STR) {
1698 if (!lj_ir_strcmp(strV(rav), strV(rcv), (IROp)irop)) irop ^= 1; 1789 if (!lj_ir_strcmp(strV(rav), strV(rcv), (IROp)irop)) irop ^= 1;
1790 ra = lj_ir_call(J, IRCALL_lj_str_cmp, ra, rc);
1791 rc = lj_ir_kint(J, 0);
1792 ta = IRT_INT;
1699 } else { 1793 } else {
1700 rec_mm_comp(J, &ix, (int)op); 1794 rec_mm_comp(J, &ix, (int)op);
1701 break; 1795 break;
@@ -1745,7 +1839,7 @@ void lj_record_ins(jit_State *J)
1745 if (tref_isstr(rc)) { 1839 if (tref_isstr(rc)) {
1746 rc = emitir(IRTI(IR_FLOAD), rc, IRFL_STR_LEN); 1840 rc = emitir(IRTI(IR_FLOAD), rc, IRFL_STR_LEN);
1747 } else if (tref_istab(rc)) { 1841 } else if (tref_istab(rc)) {
1748 rc = emitir(IRTI(IR_TLEN), rc, 0); 1842 rc = lj_ir_call(J, IRCALL_lj_tab_len, rc);
1749 } else { 1843 } else {
1750 ix.tab = rc; 1844 ix.tab = rc;
1751 copyTV(J->L, &ix.tabv, &ix.keyv); 1845 copyTV(J->L, &ix.tabv, &ix.keyv);
@@ -1879,8 +1973,6 @@ void lj_record_ins(jit_State *J)
1879 /* fallthrough */ 1973 /* fallthrough */
1880 case BC_CALL: 1974 case BC_CALL:
1881 callop: 1975 callop:
1882 if (rb == (TRef)(CALLRES_TAILCALL+1)) { /* Tail call. */
1883 }
1884 rec_call(J, ra, (int)(rb-1), (int)(rc-1)); 1976 rec_call(J, ra, (int)(rb-1), (int)(rc-1));
1885 break; 1977 break;
1886 1978
@@ -2064,8 +2156,11 @@ static void rec_setup_side(jit_State *J, Trace *T)
2064 BCReg j; 2156 BCReg j;
2065 for (j = 0; j < s; j++) 2157 for (j = 0; j < s; j++)
2066 if (snap_ref(map[j]) == ref) { 2158 if (snap_ref(map[j]) == ref) {
2067 if (ir->o == IR_FRAME && irt_isfunc(ir->t)) 2159 if (ir->o == IR_FRAME && irt_isfunc(ir->t)) {
2160 lua_assert(s != 0);
2068 J->baseslot = s+1; 2161 J->baseslot = s+1;
2162 J->framedepth++;
2163 }
2069 tr = J->slot[j]; 2164 tr = J->slot[j];
2070 goto dupslot; 2165 goto dupslot;
2071 } 2166 }
@@ -2078,8 +2173,10 @@ static void rec_setup_side(jit_State *J, Trace *T)
2078 case IR_KNUM: tr = lj_ir_knum_addr(J, ir_knum(ir)); break; 2173 case IR_KNUM: tr = lj_ir_knum_addr(J, ir_knum(ir)); break;
2079 case IR_FRAME: /* Placeholder FRAMEs don't need a guard. */ 2174 case IR_FRAME: /* Placeholder FRAMEs don't need a guard. */
2080 if (irt_isfunc(ir->t)) { 2175 if (irt_isfunc(ir->t)) {
2081 J->baseslot = s+1; 2176 if (s != 0) {
2082 J->framedepth++; 2177 J->baseslot = s+1;
2178 J->framedepth++;
2179 }
2083 tr = lj_ir_kfunc(J, ir_kfunc(&T->ir[ir->op2])); 2180 tr = lj_ir_kfunc(J, ir_kfunc(&T->ir[ir->op2]));
2084 tr = emitir_raw(IRT(IR_FRAME, IRT_FUNC), tr, tr); 2181 tr = emitir_raw(IRT(IR_FRAME, IRT_FUNC), tr, tr);
2085 } else { 2182 } else {
diff --git a/src/lj_snap.c b/src/lj_snap.c
index 09cd095c..d27404f2 100644
--- a/src/lj_snap.c
+++ b/src/lj_snap.c
@@ -251,9 +251,9 @@ void lj_snap_restore(jit_State *J, void *exptr)
251 GCfunc *fn = gco2func(gcref(T->ir[ir->op2].gcr)); 251 GCfunc *fn = gco2func(gcref(T->ir[ir->op2].gcr));
252 if (isluafunc(fn)) { 252 if (isluafunc(fn)) {
253 TValue *fs; 253 TValue *fs;
254 newbase = o+1; 254 fs = o+1 + funcproto(fn)->framesize;
255 fs = newbase + funcproto(fn)->framesize;
256 if (fs > ntop) ntop = fs; /* Update top for newly added frames. */ 255 if (fs > ntop) ntop = fs; /* Update top for newly added frames. */
256 if (s != 0) newbase = o+1;
257 } 257 }
258 } 258 }
259 } 259 }
@@ -262,21 +262,17 @@ void lj_snap_restore(jit_State *J, void *exptr)
262 setnilV(o); /* Clear unreferenced slots of newly added frames. */ 262 setnilV(o); /* Clear unreferenced slots of newly added frames. */
263 } 263 }
264 } 264 }
265 if (newbase) { /* Clear remainder of newly added frames. */ 265 if (newbase) L->base = newbase;
266 L->base = newbase; 266 if (ntop >= L->maxstack) { /* Need to grow the stack again. */
267 if (ntop >= L->maxstack) { /* Need to grow the stack again. */ 267 MSize need = (MSize)(ntop - o);
268 MSize need = (MSize)(ntop - o); 268 L->top = o;
269 L->top = o; 269 lj_state_growstack(L, need);
270 lj_state_growstack(L, need); 270 o = L->top;
271 o = L->top; 271 ntop = o + need;
272 ntop = o + need;
273 }
274 L->top = curr_topL(L);
275 for (; o < ntop; o++)
276 setnilV(o);
277 } else { /* Must not clear slots of existing frame. */
278 L->top = curr_topL(L);
279 } 272 }
273 L->top = curr_topL(L);
274 for (; o < ntop; o++) /* Clear remainder of newly added frames. */
275 setnilV(o);
280 lua_assert(map + nslots == flinks-1); 276 lua_assert(map + nslots == flinks-1);
281 J->pc = (const BCIns *)(uintptr_t)(*--flinks); 277 J->pc = (const BCIns *)(uintptr_t)(*--flinks);
282} 278}
diff --git a/src/lj_state.h b/src/lj_state.h
index 54e85405..4e4185c0 100644
--- a/src/lj_state.h
+++ b/src/lj_state.h
@@ -17,7 +17,7 @@
17LJ_FUNC void lj_state_relimitstack(lua_State *L); 17LJ_FUNC void lj_state_relimitstack(lua_State *L);
18LJ_FUNC void lj_state_shrinkstack(lua_State *L, MSize used); 18LJ_FUNC void lj_state_shrinkstack(lua_State *L, MSize used);
19LJ_FUNCA void lj_state_growstack(lua_State *L, MSize need); 19LJ_FUNCA void lj_state_growstack(lua_State *L, MSize need);
20LJ_FUNCA void lj_state_growstack1(lua_State *L); 20LJ_FUNC void lj_state_growstack1(lua_State *L);
21 21
22static LJ_AINLINE void lj_state_checkstack(lua_State *L, MSize need) 22static LJ_AINLINE void lj_state_checkstack(lua_State *L, MSize need)
23{ 23{
diff --git a/src/lj_str.c b/src/lj_str.c
index 26f91cba..62322b59 100644
--- a/src/lj_str.c
+++ b/src/lj_str.c
@@ -21,7 +21,7 @@
21/* -- String interning ---------------------------------------------------- */ 21/* -- String interning ---------------------------------------------------- */
22 22
23/* Ordered compare of strings. Assumes string data is 4-byte aligned. */ 23/* Ordered compare of strings. Assumes string data is 4-byte aligned. */
24int32_t lj_str_cmp(GCstr *a, GCstr *b) 24int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
25{ 25{
26 MSize i, n = a->len > b->len ? b->len : a->len; 26 MSize i, n = a->len > b->len ? b->len : a->len;
27 for (i = 0; i < n; i += 4) { 27 for (i = 0; i < n; i += 4) {
@@ -119,8 +119,14 @@ void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s)
119 119
120/* -- Type conversions ---------------------------------------------------- */ 120/* -- Type conversions ---------------------------------------------------- */
121 121
122/* Convert string object to number. */
123int LJ_FASTCALL lj_str_tonum(GCstr *str, TValue *n)
124{
125 return lj_str_numconv(strdata(str), n);
126}
127
122/* Convert string to number. */ 128/* Convert string to number. */
123int lj_str_numconv(const char *s, TValue *n) 129int LJ_FASTCALL lj_str_numconv(const char *s, TValue *n)
124{ 130{
125 lua_Number sign = 1; 131 lua_Number sign = 1;
126 const uint8_t *p = (const uint8_t *)s; 132 const uint8_t *p = (const uint8_t *)s;
@@ -167,7 +173,7 @@ parsedbl:
167} 173}
168 174
169/* Convert number to string. */ 175/* Convert number to string. */
170GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np) 176GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np)
171{ 177{
172 char s[LUAI_MAXNUMBER2STR]; 178 char s[LUAI_MAXNUMBER2STR];
173 lua_Number n = *np; 179 lua_Number n = *np;
@@ -176,7 +182,7 @@ GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np)
176} 182}
177 183
178/* Convert integer to string. */ 184/* Convert integer to string. */
179GCstr *lj_str_fromint(lua_State *L, int32_t k) 185GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k)
180{ 186{
181 char s[1+10]; 187 char s[1+10];
182 char *p = s+sizeof(s); 188 char *p = s+sizeof(s);
diff --git a/src/lj_str.h b/src/lj_str.h
index f7e56d16..e8b242c0 100644
--- a/src/lj_str.h
+++ b/src/lj_str.h
@@ -11,7 +11,7 @@
11#include "lj_obj.h" 11#include "lj_obj.h"
12 12
13/* String interning. */ 13/* String interning. */
14LJ_FUNCA int32_t lj_str_cmp(GCstr *a, GCstr *b); 14LJ_FUNC int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b);
15LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask); 15LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask);
16LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len); 16LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len);
17LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s); 17LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s);
@@ -20,9 +20,10 @@ LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s);
20#define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1)) 20#define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1))
21 21
22/* Type conversions. */ 22/* Type conversions. */
23LJ_FUNCA int lj_str_numconv(const char *s, TValue *n); 23LJ_FUNC int LJ_FASTCALL lj_str_numconv(const char *s, TValue *n);
24LJ_FUNCA GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np); 24LJ_FUNC int LJ_FASTCALL lj_str_tonum(GCstr *str, TValue *n);
25LJ_FUNCA GCstr *lj_str_fromint(lua_State *L, int32_t k); 25LJ_FUNCA GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np);
26LJ_FUNC GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k);
26 27
27/* String formatting. */ 28/* String formatting. */
28LJ_FUNC const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp); 29LJ_FUNC const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp);
diff --git a/src/lj_tab.c b/src/lj_tab.c
index 9af51027..ceafb770 100644
--- a/src/lj_tab.c
+++ b/src/lj_tab.c
@@ -160,8 +160,16 @@ GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits)
160 return t; 160 return t;
161} 161}
162 162
163GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize)
164{
165 GCtab *t = newtab(L, ahsize & 0xffffff, ahsize >> 24);
166 clearapart(t);
167 if (t->hmask > 0) clearhpart(t);
168 return t;
169}
170
163/* Duplicate a table. */ 171/* Duplicate a table. */
164GCtab *lj_tab_dup(lua_State *L, const GCtab *kt) 172GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt)
165{ 173{
166 GCtab *t; 174 GCtab *t;
167 uint32_t asize, hmask; 175 uint32_t asize, hmask;
@@ -334,8 +342,8 @@ static uint32_t counthash(const GCtab *t, uint32_t *bins, uint32_t *narray)
334static uint32_t bestasize(uint32_t bins[], uint32_t *narray) 342static uint32_t bestasize(uint32_t bins[], uint32_t *narray)
335{ 343{
336 uint32_t b, sum, na = 0, sz = 0, nn = *narray; 344 uint32_t b, sum, na = 0, sz = 0, nn = *narray;
337 for (b = 0, sum = 0; (1u<<b) <= nn && sum != nn; b++) 345 for (b = 0, sum = 0; 2*nn > (1u<<b) && sum != nn; b++)
338 if (bins[b] > 0 && (sum += bins[b]) >= (1u<<b)) { 346 if (bins[b] > 0 && 2*(sum += bins[b]) > (1u<<b)) {
339 sz = (2u<<b)+1; 347 sz = (2u<<b)+1;
340 na = sum; 348 na = sum;
341 } 349 }
@@ -599,7 +607,7 @@ static MSize unbound_search(GCtab *t, MSize j)
599** Try to find a boundary in table `t'. A `boundary' is an integer index 607** Try to find a boundary in table `t'. A `boundary' is an integer index
600** such that t[i] is non-nil and t[i+1] is nil (and 0 if t[1] is nil). 608** such that t[i] is non-nil and t[i+1] is nil (and 0 if t[1] is nil).
601*/ 609*/
602MSize lj_tab_len(GCtab *t) 610MSize LJ_FASTCALL lj_tab_len(GCtab *t)
603{ 611{
604 MSize j = (MSize)t->asize; 612 MSize j = (MSize)t->asize;
605 if (j > 1 && tvisnil(arrayslot(t, j-1))) { 613 if (j > 1 && tvisnil(arrayslot(t, j-1))) {
diff --git a/src/lj_tab.h b/src/lj_tab.h
index e9e8bcd1..b2a8c3aa 100644
--- a/src/lj_tab.h
+++ b/src/lj_tab.h
@@ -11,7 +11,8 @@
11#define hsize2hbits(s) ((s) ? ((s)==1 ? 1 : 1+lj_fls((uint32_t)((s)-1))) : 0) 11#define hsize2hbits(s) ((s) ? ((s)==1 ? 1 : 1+lj_fls((uint32_t)((s)-1))) : 0)
12 12
13LJ_FUNCA GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits); 13LJ_FUNCA GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits);
14LJ_FUNCA GCtab *lj_tab_dup(lua_State *L, const GCtab *kt); 14LJ_FUNC GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize);
15LJ_FUNCA GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt);
15LJ_FUNC void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t); 16LJ_FUNC void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t);
16LJ_FUNCA void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize); 17LJ_FUNCA void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize);
17 18
@@ -36,6 +37,6 @@ LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key);
36 (inarray((t), (key)) ? arrayslot((t), (key)) : lj_tab_setinth(L, (t), (key))) 37 (inarray((t), (key)) ? arrayslot((t), (key)) : lj_tab_setinth(L, (t), (key)))
37 38
38LJ_FUNCA int lj_tab_next(lua_State *L, GCtab *t, TValue *key); 39LJ_FUNCA int lj_tab_next(lua_State *L, GCtab *t, TValue *key);
39LJ_FUNCA MSize lj_tab_len(GCtab *t); 40LJ_FUNCA MSize LJ_FASTCALL lj_tab_len(GCtab *t);
40 41
41#endif 42#endif
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h
index 3ee4fa00..2fb3c4b8 100644
--- a/src/lj_target_x86.h
+++ b/src/lj_target_x86.h
@@ -32,6 +32,11 @@ enum {
32 32
33 /* Calling conventions. */ 33 /* Calling conventions. */
34 RID_RET = RID_EAX, 34 RID_RET = RID_EAX,
35#if LJ_64
36 RID_FPRET = RID_XMM0,
37#else
38 RID_RETHI = RID_EDX,
39#endif
35 40
36 /* These definitions must match with the *.dasc file(s): */ 41 /* These definitions must match with the *.dasc file(s): */
37 RID_BASE = RID_EDX, /* Interpreter BASE. */ 42 RID_BASE = RID_EDX, /* Interpreter BASE. */
@@ -98,8 +103,8 @@ enum {
98}; 103};
99 104
100/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. */ 105/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. */
101#define sps_scale(slot) (4 * (int32_t)(slot)) 106#define sps_scale(slot) (4 * (int32_t)(slot))
102#define sps_adjust(as) (sps_scale((as->evenspill-SPS_FIXED+3)&~3)) 107#define sps_adjust(slot) (sps_scale(((slot)-SPS_FIXED+3)&~3))
103 108
104/* -- Exit state ---------------------------------------------------------- */ 109/* -- Exit state ---------------------------------------------------------- */
105 110
@@ -185,6 +190,7 @@ typedef enum {
185 XO_ARITHib = XO_(80), 190 XO_ARITHib = XO_(80),
186 XO_ARITHi = XO_(81), 191 XO_ARITHi = XO_(81),
187 XO_ARITHi8 = XO_(83), 192 XO_ARITHi8 = XO_(83),
193 XO_ARITHiw8 = XO_66(83),
188 XO_SHIFTi = XO_(c1), 194 XO_SHIFTi = XO_(c1),
189 XO_SHIFT1 = XO_(d1), 195 XO_SHIFT1 = XO_(d1),
190 XO_SHIFTcl = XO_(d3), 196 XO_SHIFTcl = XO_(d3),
@@ -216,6 +222,7 @@ typedef enum {
216 XO_CVTSI2SD = XO_f20f(2a), 222 XO_CVTSI2SD = XO_f20f(2a),
217 XO_CVTSD2SI = XO_f20f(2d), 223 XO_CVTSD2SI = XO_f20f(2d),
218 XO_CVTTSD2SI= XO_f20f(2c), 224 XO_CVTTSD2SI= XO_f20f(2c),
225 XO_MOVD = XO_660f(6e),
219 XO_MOVDto = XO_660f(7e), 226 XO_MOVDto = XO_660f(7e),
220 227
221 XO_FLDq = XO_(dd), XOg_FLDq = 0, 228 XO_FLDq = XO_(dd), XOg_FLDq = 0,
diff --git a/src/lj_udata.c b/src/lj_udata.c
index 863889c9..717d483b 100644
--- a/src/lj_udata.c
+++ b/src/lj_udata.c
@@ -16,6 +16,7 @@ GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env)
16 global_State *g = G(L); 16 global_State *g = G(L);
17 newwhite(g, ud); /* Not finalized. */ 17 newwhite(g, ud); /* Not finalized. */
18 ud->gct = ~LJ_TUDATA; 18 ud->gct = ~LJ_TUDATA;
19 ud->udtype = UDTYPE_USERDATA;
19 ud->len = sz; 20 ud->len = sz;
20 /* NOBARRIER: The GCudata is new (marked white). */ 21 /* NOBARRIER: The GCudata is new (marked white). */
21 setgcrefnull(ud->metatable); 22 setgcrefnull(ud->metatable);