aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile25
-rw-r--r--README4
-rw-r--r--doc/changes.html323
-rw-r--r--doc/contact.html2
-rw-r--r--doc/ext_c_api.html2
-rw-r--r--doc/ext_ffi.html2
-rw-r--r--doc/ext_ffi_api.html10
-rw-r--r--doc/ext_ffi_semantics.html29
-rw-r--r--doc/ext_ffi_tutorial.html2
-rw-r--r--doc/ext_jit.html4
-rw-r--r--doc/ext_profiler.html364
-rw-r--r--doc/extensions.html104
-rw-r--r--doc/faq.html2
-rw-r--r--doc/install.html155
-rw-r--r--doc/luajit.html8
-rw-r--r--doc/running.html3
-rw-r--r--doc/status.html13
-rw-r--r--dynasm/dasm_arm.lua6
-rw-r--r--dynasm/dasm_arm64.h519
-rw-r--r--dynasm/dasm_arm64.lua1166
-rw-r--r--dynasm/dasm_mips.h22
-rw-r--r--dynasm/dasm_mips.lua684
-rw-r--r--dynasm/dasm_mips64.lua12
-rw-r--r--dynasm/dasm_ppc.h13
-rw-r--r--dynasm/dasm_ppc.lua702
-rw-r--r--dynasm/dasm_proto.h4
-rw-r--r--dynasm/dasm_x86.h38
-rw-r--r--dynasm/dasm_x86.lua606
-rw-r--r--dynasm/dynasm.lua7
-rw-r--r--etc/luajit.pc6
-rw-r--r--src/.gitignore2
-rw-r--r--src/Makefile87
-rw-r--r--src/Makefile.dep216
-rw-r--r--src/host/buildvm.c26
-rw-r--r--src/host/buildvm.h1
-rw-r--r--src/host/buildvm_asm.c56
-rw-r--r--src/host/buildvm_lib.c61
-rw-r--r--src/host/buildvm_libbc.h56
-rw-r--r--src/host/buildvm_peobj.c43
-rw-r--r--src/host/genlibbc.lua197
-rw-r--r--src/jit/bc.lua19
-rw-r--r--src/jit/bcsave.lua106
-rw-r--r--src/jit/dis_arm.lua18
-rw-r--r--src/jit/dis_arm64.lua1216
-rw-r--r--src/jit/dis_arm64be.lua12
-rw-r--r--src/jit/dis_mips.lua372
-rw-r--r--src/jit/dis_mips64.lua17
-rw-r--r--src/jit/dis_mips64el.lua17
-rw-r--r--src/jit/dis_mips64r6.lua17
-rw-r--r--src/jit/dis_mips64r6el.lua17
-rw-r--r--src/jit/dis_mipsel.lua15
-rw-r--r--src/jit/dis_ppc.lua18
-rw-r--r--src/jit/dis_x64.lua15
-rw-r--r--src/jit/dis_x86.lua297
-rw-r--r--src/jit/dump.lua45
-rw-r--r--src/jit/p.lua311
-rw-r--r--src/jit/v.lua17
-rw-r--r--src/jit/zone.lua45
-rw-r--r--src/lauxlib.h34
-rw-r--r--src/lib_aux.c74
-rw-r--r--src/lib_base.c134
-rw-r--r--src/lib_bit.c134
-rw-r--r--src/lib_debug.c10
-rw-r--r--src/lib_ffi.c82
-rw-r--r--src/lib_io.c41
-rw-r--r--src/lib_jit.c169
-rw-r--r--src/lib_math.c15
-rw-r--r--src/lib_os.c37
-rw-r--r--src/lib_package.c67
-rw-r--r--src/lib_string.c453
-rw-r--r--src/lib_table.c187
-rw-r--r--src/lj_alloc.c264
-rw-r--r--src/lj_api.c246
-rw-r--r--src/lj_arch.h282
-rw-r--r--src/lj_asm.c733
-rw-r--r--src/lj_asm_arm.h438
-rw-r--r--src/lj_asm_arm64.h2043
-rw-r--r--src/lj_asm_mips.h1590
-rw-r--r--src/lj_asm_ppc.h729
-rw-r--r--src/lj_asm_x86.h1121
-rw-r--r--src/lj_bc.h4
-rw-r--r--src/lj_bcdump.h6
-rw-r--r--src/lj_bcread.c144
-rw-r--r--src/lj_bcwrite.c225
-rw-r--r--src/lj_buf.c232
-rw-r--r--src/lj_buf.h103
-rw-r--r--src/lj_carith.c75
-rw-r--r--src/lj_carith.h10
-rw-r--r--src/lj_ccall.c377
-rw-r--r--src/lj_ccall.h49
-rw-r--r--src/lj_ccallback.c239
-rw-r--r--src/lj_cconv.c4
-rw-r--r--src/lj_cdata.c40
-rw-r--r--src/lj_cdata.h7
-rw-r--r--src/lj_clib.c41
-rw-r--r--src/lj_cparse.c152
-rw-r--r--src/lj_cparse.h2
-rw-r--r--src/lj_crecord.c322
-rw-r--r--src/lj_crecord.h7
-rw-r--r--src/lj_ctype.c17
-rw-r--r--src/lj_ctype.h2
-rw-r--r--src/lj_debug.c186
-rw-r--r--src/lj_debug.h8
-rw-r--r--src/lj_def.h20
-rw-r--r--src/lj_dispatch.c85
-rw-r--r--src/lj_dispatch.h43
-rw-r--r--src/lj_emit_arm.h21
-rw-r--r--src/lj_emit_arm64.h419
-rw-r--r--src/lj_emit_mips.h153
-rw-r--r--src/lj_emit_ppc.h22
-rw-r--r--src/lj_emit_x86.h179
-rw-r--r--src/lj_err.c205
-rw-r--r--src/lj_errmsg.h5
-rw-r--r--src/lj_ffrecord.c590
-rw-r--r--src/lj_frame.h160
-rw-r--r--src/lj_gc.c73
-rw-r--r--src/lj_gc.h10
-rw-r--r--src/lj_gdbjit.c48
-rw-r--r--src/lj_ir.c153
-rw-r--r--src/lj_ir.h79
-rw-r--r--src/lj_ircall.h203
-rw-r--r--src/lj_iropt.h13
-rw-r--r--src/lj_jit.h130
-rw-r--r--src/lj_lex.c375
-rw-r--r--src/lj_lex.h17
-rw-r--r--src/lj_lib.c75
-rw-r--r--src/lj_lib.h29
-rw-r--r--src/lj_load.c4
-rw-r--r--src/lj_mcode.c12
-rw-r--r--src/lj_meta.c125
-rw-r--r--src/lj_meta.h1
-rw-r--r--src/lj_obj.c17
-rw-r--r--src/lj_obj.h194
-rw-r--r--src/lj_opt_fold.c369
-rw-r--r--src/lj_opt_loop.c29
-rw-r--r--src/lj_opt_mem.c27
-rw-r--r--src/lj_opt_narrow.c2
-rw-r--r--src/lj_opt_sink.c11
-rw-r--r--src/lj_opt_split.c154
-rw-r--r--src/lj_parse.c189
-rw-r--r--src/lj_profile.c368
-rw-r--r--src/lj_profile.h21
-rw-r--r--src/lj_record.c656
-rw-r--r--src/lj_record.h1
-rw-r--r--src/lj_snap.c146
-rw-r--r--src/lj_state.c43
-rw-r--r--src/lj_state.h2
-rw-r--r--src/lj_str.c212
-rw-r--r--src/lj_str.h35
-rw-r--r--src/lj_strfmt.c472
-rw-r--r--src/lj_strfmt.h125
-rw-r--r--src/lj_strfmt_num.c592
-rw-r--r--src/lj_strscan.c57
-rw-r--r--src/lj_tab.c54
-rw-r--r--src/lj_tab.h5
-rw-r--r--src/lj_target.h6
-rw-r--r--src/lj_target_arm.h4
-rw-r--r--src/lj_target_arm64.h332
-rw-r--r--src/lj_target_mips.h193
-rw-r--r--src/lj_target_ppc.h2
-rw-r--r--src/lj_target_x86.h30
-rw-r--r--src/lj_trace.c131
-rw-r--r--src/lj_trace.h2
-rw-r--r--src/lj_traceerr.h4
-rw-r--r--src/lj_vm.h26
-rw-r--r--src/lj_vmevent.c1
-rw-r--r--src/lj_vmmath.c56
-rw-r--r--src/ljamalg.c4
-rw-r--r--src/lua.h11
-rw-r--r--src/luaconf.h8
-rw-r--r--src/luajit.c134
-rw-r--r--src/luajit.h15
-rw-r--r--src/msvcbuild.bat12
-rw-r--r--src/ps4build.bat32
-rw-r--r--src/vm_arm.dasc339
-rw-r--r--src/vm_arm64.dasc3988
-rw-r--r--src/vm_mips.dasc2492
-rw-r--r--src/vm_mips64.dasc5424
-rw-r--r--src/vm_ppc.dasc1620
-rw-r--r--src/vm_x64.dasc4909
-rw-r--r--src/vm_x86.dasc1563
-rw-r--r--src/xb1build.bat101
182 files changed, 39665 insertions, 8095 deletions
diff --git a/Makefile b/Makefile
index 07bc70fa..08778136 100644
--- a/Makefile
+++ b/Makefile
@@ -14,9 +14,10 @@
14############################################################################## 14##############################################################################
15 15
16MAJVER= 2 16MAJVER= 2
17MINVER= 0 17MINVER= 1
18RELVER= 5 18RELVER= 0
19VERSION= $(MAJVER).$(MINVER).$(RELVER) 19PREREL= -beta3
20VERSION= $(MAJVER).$(MINVER).$(RELVER)$(PREREL)
20ABIVER= 5.1 21ABIVER= 5.1
21 22
22############################################################################## 23##############################################################################
@@ -84,8 +85,10 @@ FILE_SO= libluajit.so
84FILE_MAN= luajit.1 85FILE_MAN= luajit.1
85FILE_PC= luajit.pc 86FILE_PC= luajit.pc
86FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h 87FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h
87FILES_JITLIB= bc.lua v.lua dump.lua dis_x86.lua dis_x64.lua dis_arm.lua \ 88FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \
88 dis_ppc.lua dis_mips.lua dis_mipsel.lua bcsave.lua vmdef.lua 89 dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \
90 dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \
91 dis_mips64.lua dis_mips64el.lua vmdef.lua
89 92
90ifeq (,$(findstring Windows,$(OS))) 93ifeq (,$(findstring Windows,$(OS)))
91 HOST_SYS:= $(shell uname -s) 94 HOST_SYS:= $(shell uname -s)
@@ -115,7 +118,7 @@ install: $(INSTALL_DEP)
115 $(MKDIR) $(INSTALL_DIRS) 118 $(MKDIR) $(INSTALL_DIRS)
116 cd src && $(INSTALL_X) $(FILE_T) $(INSTALL_T) 119 cd src && $(INSTALL_X) $(FILE_T) $(INSTALL_T)
117 cd src && test -f $(FILE_A) && $(INSTALL_F) $(FILE_A) $(INSTALL_STATIC) || : 120 cd src && test -f $(FILE_A) && $(INSTALL_F) $(FILE_A) $(INSTALL_STATIC) || :
118 $(RM) $(INSTALL_TSYM) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) 121 $(RM) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2)
119 cd src && test -f $(FILE_SO) && \ 122 cd src && test -f $(FILE_SO) && \
120 $(INSTALL_X) $(FILE_SO) $(INSTALL_DYN) && \ 123 $(INSTALL_X) $(FILE_SO) $(INSTALL_DYN) && \
121 $(LDCONFIG) $(INSTALL_LIB) && \ 124 $(LDCONFIG) $(INSTALL_LIB) && \
@@ -127,12 +130,18 @@ install: $(INSTALL_DEP)
127 $(RM) $(FILE_PC).tmp 130 $(RM) $(FILE_PC).tmp
128 cd src && $(INSTALL_F) $(FILES_INC) $(INSTALL_INC) 131 cd src && $(INSTALL_F) $(FILES_INC) $(INSTALL_INC)
129 cd src/jit && $(INSTALL_F) $(FILES_JITLIB) $(INSTALL_JITLIB) 132 cd src/jit && $(INSTALL_F) $(FILES_JITLIB) $(INSTALL_JITLIB)
130 $(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM)
131 @echo "==== Successfully installed LuaJIT $(VERSION) to $(PREFIX) ====" 133 @echo "==== Successfully installed LuaJIT $(VERSION) to $(PREFIX) ===="
134 @echo ""
135 @echo "Note: the development releases deliberately do NOT install a symlink for luajit"
136 @echo "You can do this now by running this command (with sudo):"
137 @echo ""
138 @echo " $(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM)"
139 @echo ""
140
132 141
133uninstall: 142uninstall:
134 @echo "==== Uninstalling LuaJIT $(VERSION) from $(PREFIX) ====" 143 @echo "==== Uninstalling LuaJIT $(VERSION) from $(PREFIX) ===="
135 $(UNINSTALL) $(INSTALL_TSYM) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC) 144 $(UNINSTALL) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC)
136 for file in $(FILES_JITLIB); do \ 145 for file in $(FILES_JITLIB); do \
137 $(UNINSTALL) $(INSTALL_JITLIB)/$$file; \ 146 $(UNINSTALL) $(INSTALL_JITLIB)/$$file; \
138 done 147 done
diff --git a/README b/README
index aa177ae2..5c74038f 100644
--- a/README
+++ b/README
@@ -1,5 +1,5 @@
1README for LuaJIT 2.0.5 1README for LuaJIT 2.1.0-beta3
2----------------------- 2-----------------------------
3 3
4LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language. 4LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language.
5 5
diff --git a/doc/changes.html b/doc/changes.html
index a20295f2..5fc74f10 100644
--- a/doc/changes.html
+++ b/doc/changes.html
@@ -43,6 +43,8 @@ div.major { max-width: 600px; padding: 1em; margin: 1em 0 1em 0; }
43<a href="ext_jit.html">jit.* Library</a> 43<a href="ext_jit.html">jit.* Library</a>
44</li><li> 44</li><li>
45<a href="ext_c_api.html">Lua/C API</a> 45<a href="ext_c_api.html">Lua/C API</a>
46</li><li>
47<a href="ext_profiler.html">Profiler</a>
46</li></ul> 48</li></ul>
47</li><li> 49</li><li>
48<a href="status.html">Status</a> 50<a href="status.html">Status</a>
@@ -71,6 +73,96 @@ to see whether newer versions are available.
71</p> 73</p>
72 74
73<div class="major" style="background: #d0d0ff;"> 75<div class="major" style="background: #d0d0ff;">
76<h2 id="LuaJIT-2.1.0-beta3">LuaJIT 2.1.0-beta3 &mdash; 2017-05-01</h2>
77<ul>
78<li>Rewrite memory block allocator.</li>
79<li>Add various extension from Lua 5.2/5.3.</li>
80<li>Remove old Lua 5.0 compatibility defines.</li>
81<li>Set arg table before evaluating <tt>LUA_INIT</tt> and <tt>-e</tt> chunks.</li>
82<li>Fix FOLD rules for <tt>math.abs()</tt> and FP negation.</li>
83<li>Fix soft-float <tt>math.abs()</tt> and negation.</li>
84<li>Fix formatting of some small denormals at low precision.</li>
85<li>LJ_GC64: Add JIT compiler support.</li>
86<li>x64/LJ_GC64: Add JIT compiler backend.</li>
87<li>x86/x64: Generate BMI2 shifts and rotates, if available.</li>
88<li>Windows/x86: Add full exception interoperability.</li>
89<li>ARM64: Add big-endian support.</li>
90<li>ARM64: Add JIT compiler backend.</li>
91<li>MIPS: Fix <tt>TSETR</tt> barrier.</li>
92<li>MIPS: Support MIPS16 interlinking.</li>
93<li>MIPS soft-float: Fix code generation for <tt>HREF</tt>.</li>
94<li>MIPS64: Add MIPS64 hard-float JIT compiler backend.</li>
95<li>MIPS64: Add MIPS64 hard-float/soft-float support to interpreter.</li>
96<li>FFI: Compile bitfield loads/stores.</li>
97<li>Various fixes common with the 2.0 branch.</li>
98</ul>
99
100<h2 id="LuaJIT-2.1.0-beta2">LuaJIT 2.1.0-beta2 &mdash; 2016-03-03</h2>
101<ul>
102<li>Enable trace stitching.</li>
103<li>Use internal implementation for converting FP numbers to strings.</li>
104<li>Parse Unicode escape <tt>'\u{XX...}'</tt> in string literals.</li>
105<li>Add MIPS soft-float support.</li>
106<li>Switch MIPS port to dual-number mode.</li>
107<li>x86/x64: Add support for AES-NI, AVX and AVX2 to DynASM.</li>
108<li>FFI: Add <tt>ssize_t</tt> declaration.</li>
109<li>FFI: Parse <tt>#line NN</tt> and <tt>#NN</tt>.</li>
110<li>Various minor fixes.</li>
111</ul>
112
113<h2 id="LuaJIT-2.1.0-beta1">LuaJIT 2.1.0-beta1 &mdash; 2015-08-25</h2>
114<p>
115This is a brief summary of the major changes in LuaJIT 2.1 compared to 2.0.
116Please take a look at the commit history for more details.
117</p>
118<ul>
119<li>Changes to the VM core:
120<ul>
121<li>Add low-overhead profiler (<tt>-jp</tt>).</li>
122<li>Add <tt>LJ_GC64</tt> mode: 64 bit GC object references (really: 47 bit). Interpreter-only for now.</li>
123<li>Add <tt>LJ_FR2</tt> mode: Two-slot frame info. Required by <tt>LJ_GC64</tt> mode.</li>
124<li>Add <tt>table.new()</tt> and <tt>table.clear()</tt>.</li>
125<li>Parse binary number literals (<tt>0bxxx</tt>).</li>
126</ul></li>
127<li>Improvements to the JIT compiler:
128<ul>
129<li>Add trace stitching (disabled for now).</li>
130<li>Compile various builtins: <tt>string.char()</tt>, <tt>string.reverse()</tt>, <tt>string.lower()</tt>, <tt>string.upper()</tt>, <tt>string.rep()</tt>, <tt>string.format()</tt>, <tt>table.concat()</tt>, <tt>bit.tohex()</tt>, <tt>getfenv(0)</tt>, <tt>debug.getmetatable()</tt>.</li>
131<li>Compile <tt>string.find()</tt> for fixed string searches (no patterns).</li>
132<li>Compile <tt>BC_TSETM</tt>, e.g. <tt>{1,2,3,f()}</tt>.</li>
133<li>Compile string concatenations (<tt>BC_CAT</tt>).</li>
134<li>Compile <tt>__concat</tt> metamethod.</li>
135<li>Various minor optimizations.</li>
136</ul></li>
137<li>Internal Changes:
138<ul>
139<li>Add support for embedding LuaJIT bytecode for builtins.</li>
140<li>Replace various builtins with embedded bytecode.</li>
141<li>Refactor string buffers and string formatting.</li>
142<li>Remove obsolete non-truncating number to integer conversions.</li>
143</ul></li>
144<li>Ports:
145<ul>
146<li>Add Xbox One port (<tt>LJ_GC64</tt> mode).</li>
147<li>ARM64: Add port of the interpreter (<tt>LJ_GC64</tt> mode).</li>
148<li>x64: Add separate port of the interpreter to <tt>LJ_GC64</tt> mode.</li>
149<li>x86/x64: Drop internal x87 math functions. Use libm functions.</li>
150<li>x86: Remove x87 support from interpreter. SSE2 is mandatory now.</li>
151<li>PPC/e500: Drop support for this architecture.</li>
152</ul></li>
153<li>FFI library:
154<ul>
155<li>FFI: Add 64 bit bitwise operations.</li>
156<li>FFI: Compile VLA/VLS and large cdata allocations with default initialization.</li>
157<li>FFI: Compile conversions from functions to function pointers.</li>
158<li>FFI: Compile lightuserdata to <tt>void *</tt> conversion.</li>
159<li>FFI: Compile <tt>ffi.gc(cdata, nil)</tt>, too.</li>
160<li>FFI: Add <tt>ffi.typeinfo()</tt>.</li>
161</ul></li>
162</ul>
163</div>
164
165<div class="major" style="background: #ffffd0;">
74<h2 id="LuaJIT-2.0.5">LuaJIT 2.0.5 &mdash; 2017-05-01</h2> 166<h2 id="LuaJIT-2.0.5">LuaJIT 2.0.5 &mdash; 2017-05-01</h2>
75<ul> 167<ul>
76<li>Add workaround for MSVC 2015 stdio changes.</li> 168<li>Add workaround for MSVC 2015 stdio changes.</li>
@@ -80,7 +172,7 @@ to see whether newer versions are available.
80<li>Remove internal <tt>__mode = "K"</tt> and replace with safe check.</li> 172<li>Remove internal <tt>__mode = "K"</tt> and replace with safe check.</li>
81<li>Add "proto" field to <tt>jit.util.funcinfo()</tt>.</li> 173<li>Add "proto" field to <tt>jit.util.funcinfo()</tt>.</li>
82<li>Fix GC step size calculation.</li> 174<li>Fix GC step size calculation.</li>
83<li>Initialize <tt>uv->immutable</tt> for upvalues of loaded chunks.</li> 175<li>Initialize <tt>uv-&gt;immutable</tt> for upvalues of loaded chunks.</li>
84<li>Fix for cdata vs. non-cdata arithmetics/comparisons.</li> 176<li>Fix for cdata vs. non-cdata arithmetics/comparisons.</li>
85<li>Drop leftover regs in 'for' iterator assignment, too.</li> 177<li>Drop leftover regs in 'for' iterator assignment, too.</li>
86<li>Fix PHI remarking in SINK pass.</li> 178<li>Fix PHI remarking in SINK pass.</li>
@@ -776,235 +868,6 @@ This matches the behavior of Lua 5.1, but not the specification.</li>
776no point in listing differences over earlier versions.</li> 868no point in listing differences over earlier versions.</li>
777</ul> 869</ul>
778</div> 870</div>
779
780<div class="major" style="background: #ffff80;">
781<h2 id="LuaJIT-1.1.8">LuaJIT 1.1.8 &mdash; 2012-04-16</h2>
782<ul>
783<li>Merged with Lua 5.1.5. Also integrated fixes for all
784<a href="http://www.lua.org/bugs.html#5.1.5"><span class="ext">&raquo;</span>&nbsp;<span class="ext">&raquo;</span>&nbsp;currently known bugs in Lua 5.1.5</a>.</li>
785</ul>
786
787<h2 id="LuaJIT-1.1.7">LuaJIT 1.1.7 &mdash; 2011-05-05</h2>
788<ul>
789<li>Added fixes for the
790<a href="http://www.lua.org/bugs.html#5.1.4"><span class="ext">&raquo;</span>&nbsp;currently known bugs in Lua 5.1.4</a>.</li>
791</ul>
792
793<h2 id="LuaJIT-1.1.6">LuaJIT 1.1.6 &mdash; 2010-03-28</h2>
794<ul>
795<li>Added fixes for the
796<a href="http://www.lua.org/bugs.html#5.1.4"><span class="ext">&raquo;</span>&nbsp;currently known bugs in Lua 5.1.4</a>.</li>
797<li>Removed wrong GC check in <tt>jit_createstate()</tt>.
798Thanks to Tim Mensch.</li>
799<li>Fixed bad assertions while compiling <tt>table.insert()</tt> and
800<tt>table.remove()</tt>.</li>
801</ul>
802
803<h2 id="LuaJIT-1.1.5">LuaJIT 1.1.5 &mdash; 2008-10-25</h2>
804<ul>
805<li>Merged with Lua 5.1.4. Fixes all
806<a href="http://www.lua.org/bugs.html#5.1.3"><span class="ext">&raquo;</span>&nbsp;known bugs in Lua 5.1.3</a>.</li>
807</ul>
808
809<h2 id="LuaJIT-1.1.4">LuaJIT 1.1.4 &mdash; 2008-02-05</h2>
810<ul>
811<li>Merged with Lua 5.1.3. Fixes all
812<a href="http://www.lua.org/bugs.html#5.1.2"><span class="ext">&raquo;</span>&nbsp;known bugs in Lua 5.1.2</a>.</li>
813<li>Fixed possible (but unlikely) stack corruption while compiling
814<tt>k^x</tt> expressions.</li>
815<li>Fixed DynASM template for cmpss instruction.</li>
816</ul>
817
818<h2 id="LuaJIT-1.1.3">LuaJIT 1.1.3 &mdash; 2007-05-24</h2>
819<ul>
820<li>Merged with Lua 5.1.2. Fixes all
821<a href="http://www.lua.org/bugs.html#5.1.1"><span class="ext">&raquo;</span>&nbsp;known bugs in Lua 5.1.1</a>.</li>
822<li>Merged pending Lua 5.1.x fixes: "return -nil" bug, spurious count hook call.</li>
823<li>Remove a (sometimes) wrong assertion in <tt>luaJIT_findpc()</tt>.</li>
824<li>DynASM now allows labels for displacements and <tt>.aword</tt>.</li>
825<li>Fix some compiler warnings for DynASM glue (internal API change).</li>
826<li>Correct naming for SSSE3 (temporarily known as SSE4) in DynASM and x86 disassembler.</li>
827<li>The loadable debug modules now handle redirection to stdout
828(e.g. <tt>-j&nbsp;trace=-</tt>).</li>
829</ul>
830
831<h2 id="LuaJIT-1.1.2">LuaJIT 1.1.2 &mdash; 2006-06-24</h2>
832<ul>
833<li>Fix MSVC inline assembly: use only local variables with
834<tt>lua_number2int()</tt>.</li>
835<li>Fix "attempt to call a thread value" bug on Mac OS X:
836make values of consts used as lightuserdata keys unique
837to avoid joining by the compiler/linker.</li>
838</ul>
839
840<h2 id="LuaJIT-1.1.1">LuaJIT 1.1.1 &mdash; 2006-06-20</h2>
841<ul>
842<li>Merged with Lua 5.1.1. Fixes all
843<a href="http://www.lua.org/bugs.html#5.1"><span class="ext">&raquo;</span>&nbsp;known bugs in Lua 5.1</a>.</li>
844<li>Enforce (dynamic) linker error for EXE/DLL version mismatches.</li>
845<li>Minor changes to DynASM: faster pre-processing, smaller encoding
846for some immediates.</li>
847</ul>
848<p>
849This release is in sync with Coco 1.1.1 (see the
850<a href="http://coco.luajit.org/changes.html"><span class="ext">&raquo;</span>&nbsp;Coco Change History</a>).
851</p>
852
853<h2 id="LuaJIT-1.1.0">LuaJIT 1.1.0 &mdash; 2006-03-13</h2>
854<ul>
855<li>Merged with Lua 5.1 (final).</li>
856
857<li>New JIT call frame setup:
858<ul>
859<li>The C stack is kept 16 byte aligned (faster).
860Mandatory for Mac OS X on Intel, too.</li>
861<li>Faster calling conventions for internal C helper functions.</li>
862<li>Better instruction scheduling for function prologue, OP_CALL and
863OP_RETURN.</li>
864</ul></li>
865
866<li>Miscellaneous optimizations:
867<ul>
868<li>Faster loads of FP constants. Remove narrow-to-wide store-to-load
869forwarding stalls.</li>
870<li>Use (scalar) SSE2 ops (if the CPU supports it) to speed up slot moves
871and FP to integer conversions.</li>
872<li>Optimized the two-argument form of <tt>OP_CONCAT</tt> (<tt>a..b</tt>).</li>
873<li>Inlined <tt>OP_MOD</tt> (<tt>a%b</tt>).
874With better accuracy than the C variant, too.</li>
875<li>Inlined <tt>OP_POW</tt> (<tt>a^b</tt>). Unroll <tt>x^k</tt> or
876use <tt>k^x = 2^(log2(k)*x)</tt> or call <tt>pow()</tt>.</li>
877</ul></li>
878
879<li>Changes in the optimizer:
880<ul>
881<li>Improved hinting for table keys derived from table values
882(<tt>t1[t2[x]]</tt>).</li>
883<li>Lookup hinting now works with arbitrary object types and
884supports index chains, too.</li>
885<li>Generate type hints for arithmetic and comparison operators,
886OP_LEN, OP_CONCAT and OP_FORPREP.</li>
887<li>Remove several hint definitions in favour of a generic COMBINE hint.</li>
888<li>Complete rewrite of <tt>jit.opt_inline</tt> module
889(ex <tt>jit.opt_lib</tt>).</li>
890</ul></li>
891
892<li>Use adaptive deoptimization:
893<ul>
894<li>If runtime verification of a contract fails, the affected
895instruction is recompiled and patched on-the-fly.
896Regular programs will trigger deoptimization only occasionally.</li>
897<li>This avoids generating code for uncommon fallback cases
898most of the time. Generated code is up to 30% smaller compared to
899LuaJIT&nbsp;1.0.3.</li>
900<li>Deoptimization is used for many opcodes and contracts:
901<ul>
902<li>OP_CALL, OP_TAILCALL: type mismatch for callable.</li>
903<li>Inlined calls: closure mismatch, parameter number and type mismatches.</li>
904<li>OP_GETTABLE, OP_SETTABLE: table or key type and range mismatches.</li>
905<li>All arithmetic and comparison operators, OP_LEN, OP_CONCAT,
906OP_FORPREP: operand type and range mismatches.</li>
907</ul></li>
908<li>Complete redesign of the debug and traceback info
909(bytecode &harr; mcode) to support deoptimization.
910Much more flexible and needs only 50% of the space.</li>
911<li>The modules <tt>jit.trace</tt>, <tt>jit.dumphints</tt> and
912<tt>jit.dump</tt> handle deoptimization.</li>
913</ul></li>
914
915<li>Inlined many popular library functions
916(for commonly used arguments only):
917<ul>
918<li>Most <tt>math.*</tt> functions (the 18 most used ones)
919[2x-10x faster].</li>
920<li><tt>string.len</tt>, <tt>string.sub</tt> and <tt>string.char</tt>
921[2x-10x faster].</li>
922<li><tt>table.insert</tt>, <tt>table.remove</tt> and <tt>table.getn</tt>
923[3x-5x faster].</li>
924<li><tt>coroutine.yield</tt> and <tt>coroutine.resume</tt>
925[3x-5x faster].</li>
926<li><tt>pairs</tt>, <tt>ipairs</tt> and the corresponding iterators
927[8x-15x faster].</li>
928</ul></li>
929
930<li>Changes in the core and loadable modules and the stand-alone executable:
931<ul>
932<li>Added <tt>jit.version</tt>, <tt>jit.version_num</tt>
933and <tt>jit.arch</tt>.</li>
934<li>Reorganized some internal API functions (<tt>jit.util.*mcode*</tt>).</li>
935<li>The <tt>-j dump</tt> output now shows JSUB names, too.</li>
936<li>New x86 disassembler module written in pure Lua. No dependency
937on ndisasm anymore. Flexible API, very compact (500 lines)
938and complete (x87, MMX, SSE, SSE2, SSE3, SSSE3, privileged instructions).</li>
939<li><tt>luajit -v</tt> prints the LuaJIT version and copyright
940on a separate line.</li>
941</ul></li>
942
943<li>Added SSE, SSE2, SSE3 and SSSE3 support to DynASM.</li>
944<li>Miscellaneous doc changes. Added a section about
945<a href="install.html#embedding">embedding LuaJIT</a>.</li>
946</ul>
947<p>
948This release is in sync with Coco 1.1.0 (see the
949<a href="http://coco.luajit.org/changes.html"><span class="ext">&raquo;</span>&nbsp;Coco Change History</a>).
950</p>
951</div>
952
953<div class="major" style="background: #ffffd0;">
954<h2 id="LuaJIT-1.0.3">LuaJIT 1.0.3 &mdash; 2005-09-08</h2>
955<ul>
956<li>Even more docs.</li>
957<li>Unified closure checks in <tt>jit.*</tt>.</li>
958<li>Fixed some range checks in <tt>jit.util.*</tt>.</li>
959<li>Fixed __newindex call originating from <tt>jit_settable_str()</tt>.</li>
960<li>Merged with Lua 5.1 alpha (including early bug fixes).</li>
961</ul>
962<p>
963This is the first public release of LuaJIT.
964</p>
965
966<h2 id="LuaJIT-1.0.2">LuaJIT 1.0.2 &mdash; 2005-09-02</h2>
967<ul>
968<li>Add support for flushing the Valgrind translation cache <br>
969(<tt>MYCFLAGS= -DUSE_VALGRIND</tt>).</li>
970<li>Add support for freeing executable mcode memory to the <tt>mmap()</tt>-based
971variant for POSIX systems.</li>
972<li>Reorganized the C&nbsp;function signature handling in
973<tt>jit.opt_lib</tt>.</li>
974<li>Changed to index-based hints for inlining C&nbsp;functions.
975Still no support in the backend for inlining.</li>
976<li>Hardcode <tt>HEAP_CREATE_ENABLE_EXECUTE</tt> value if undefined.</li>
977<li>Misc. changes to the <tt>jit.*</tt> modules.</li>
978<li>Misc. changes to the Makefiles.</li>
979<li>Lots of new docs.</li>
980<li>Complete doc reorg.</li>
981</ul>
982<p>
983Not released because Lua 5.1 alpha came out today.
984</p>
985
986<h2 id="LuaJIT-1.0.1">LuaJIT 1.0.1 &mdash; 2005-08-31</h2>
987<ul>
988<li>Missing GC step in <tt>OP_CONCAT</tt>.</li>
989<li>Fix result handling for C &ndash;> JIT calls.</li>
990<li>Detect CPU feature bits.</li>
991<li>Encode conditional moves (<tt>fucomip</tt>) only when supported.</li>
992<li>Add fallback instructions for FP compares.</li>
993<li>Add support for <tt>LUA_COMPAT_VARARG</tt>. Still disabled by default.</li>
994<li>MSVC needs a specific place for the <tt>CALLBACK</tt> attribute
995(David Burgess).</li>
996<li>Misc. doc updates.</li>
997</ul>
998<p>
999Interim non-public release.
1000Special thanks to Adam D. Moss for reporting most of the bugs.
1001</p>
1002
1003<h2 id="LuaJIT-1.0.0">LuaJIT 1.0.0 &mdash; 2005-08-29</h2>
1004<p>
1005This is the initial non-public release of LuaJIT.
1006</p>
1007</div>
1008<br class="flush"> 871<br class="flush">
1009</div> 872</div>
1010<div id="foot"> 873<div id="foot">
diff --git a/doc/contact.html b/doc/contact.html
index fbab370c..c014dc9a 100644
--- a/doc/contact.html
+++ b/doc/contact.html
@@ -40,6 +40,8 @@
40<a href="ext_jit.html">jit.* Library</a> 40<a href="ext_jit.html">jit.* Library</a>
41</li><li> 41</li><li>
42<a href="ext_c_api.html">Lua/C API</a> 42<a href="ext_c_api.html">Lua/C API</a>
43</li><li>
44<a href="ext_profiler.html">Profiler</a>
43</li></ul> 45</li></ul>
44</li><li> 46</li><li>
45<a href="status.html">Status</a> 47<a href="status.html">Status</a>
diff --git a/doc/ext_c_api.html b/doc/ext_c_api.html
index 88017ace..4f471526 100644
--- a/doc/ext_c_api.html
+++ b/doc/ext_c_api.html
@@ -40,6 +40,8 @@
40<a href="ext_jit.html">jit.* Library</a> 40<a href="ext_jit.html">jit.* Library</a>
41</li><li> 41</li><li>
42<a class="current" href="ext_c_api.html">Lua/C API</a> 42<a class="current" href="ext_c_api.html">Lua/C API</a>
43</li><li>
44<a href="ext_profiler.html">Profiler</a>
43</li></ul> 45</li></ul>
44</li><li> 46</li><li>
45<a href="status.html">Status</a> 47<a href="status.html">Status</a>
diff --git a/doc/ext_ffi.html b/doc/ext_ffi.html
index fe74a1d8..6a2a4852 100644
--- a/doc/ext_ffi.html
+++ b/doc/ext_ffi.html
@@ -40,6 +40,8 @@
40<a href="ext_jit.html">jit.* Library</a> 40<a href="ext_jit.html">jit.* Library</a>
41</li><li> 41</li><li>
42<a href="ext_c_api.html">Lua/C API</a> 42<a href="ext_c_api.html">Lua/C API</a>
43</li><li>
44<a href="ext_profiler.html">Profiler</a>
43</li></ul> 45</li></ul>
44</li><li> 46</li><li>
45<a href="status.html">Status</a> 47<a href="status.html">Status</a>
diff --git a/doc/ext_ffi_api.html b/doc/ext_ffi_api.html
index 40fc694b..ad19b76b 100644
--- a/doc/ext_ffi_api.html
+++ b/doc/ext_ffi_api.html
@@ -45,6 +45,8 @@ td.abiparam { font-weight: bold; width: 6em; }
45<a href="ext_jit.html">jit.* Library</a> 45<a href="ext_jit.html">jit.* Library</a>
46</li><li> 46</li><li>
47<a href="ext_c_api.html">Lua/C API</a> 47<a href="ext_c_api.html">Lua/C API</a>
48</li><li>
49<a href="ext_profiler.html">Profiler</a>
48</li></ul> 50</li></ul>
49</li><li> 51</li><li>
50<a href="status.html">Status</a> 52<a href="status.html">Status</a>
@@ -465,6 +467,10 @@ otherwise. The following parameters are currently defined:
465<td class="abiparam">eabi</td><td class="abidesc">EABI variant of the standard ABI</td></tr> 467<td class="abiparam">eabi</td><td class="abidesc">EABI variant of the standard ABI</td></tr>
466<tr class="odd"> 468<tr class="odd">
467<td class="abiparam">win</td><td class="abidesc">Windows variant of the standard ABI</td></tr> 469<td class="abiparam">win</td><td class="abidesc">Windows variant of the standard ABI</td></tr>
470<tr class="even">
471<td class="abiparam">uwp</td><td class="abidesc">Universal Windows Platform</td></tr>
472<tr class="odd">
473<td class="abiparam">gc64</td><td class="abidesc">64 bit GC references</td></tr>
468</table> 474</table>
469 475
470<h3 id="ffi_os"><tt>ffi.os</tt></h3> 476<h3 id="ffi_os"><tt>ffi.os</tt></h3>
@@ -541,8 +547,8 @@ corresponding ctype.
541The parser for Lua source code treats numeric literals with the 547The parser for Lua source code treats numeric literals with the
542suffixes <tt>LL</tt> or <tt>ULL</tt> as signed or unsigned 64&nbsp;bit 548suffixes <tt>LL</tt> or <tt>ULL</tt> as signed or unsigned 64&nbsp;bit
543integers. Case doesn't matter, but uppercase is recommended for 549integers. Case doesn't matter, but uppercase is recommended for
544readability. It handles both decimal (<tt>42LL</tt>) and hexadecimal 550readability. It handles decimal (<tt>42LL</tt>), hexadecimal
545(<tt>0x2aLL</tt>) literals. 551(<tt>0x2aLL</tt>) and binary (<tt>0b101010LL</tt>) literals.
546</p> 552</p>
547<p> 553<p>
548The imaginary part of complex numbers can be specified by suffixing 554The imaginary part of complex numbers can be specified by suffixing
diff --git a/doc/ext_ffi_semantics.html b/doc/ext_ffi_semantics.html
index a21e5bd5..40575af8 100644
--- a/doc/ext_ffi_semantics.html
+++ b/doc/ext_ffi_semantics.html
@@ -45,6 +45,8 @@ td.convop { font-style: italic; width: 40%; }
45<a href="ext_jit.html">jit.* Library</a> 45<a href="ext_jit.html">jit.* Library</a>
46</li><li> 46</li><li>
47<a href="ext_c_api.html">Lua/C API</a> 47<a href="ext_c_api.html">Lua/C API</a>
48</li><li>
49<a href="ext_profiler.html">Profiler</a>
48</li></ul> 50</li></ul>
49</li><li> 51</li><li>
50<a href="status.html">Status</a> 52<a href="status.html">Status</a>
@@ -182,6 +184,8 @@ a <tt>typedef</tt>, except re-declarations will be ignored):
182<tt>uint16_t</tt>, <tt>uint32_t</tt>, <tt>uint64_t</tt>, 184<tt>uint16_t</tt>, <tt>uint32_t</tt>, <tt>uint64_t</tt>,
183<tt>intptr_t</tt>, <tt>uintptr_t</tt>.</li> 185<tt>intptr_t</tt>, <tt>uintptr_t</tt>.</li>
184 186
187<li>From <tt>&lt;unistd.h&gt;</tt> (POSIX): <tt>ssize_t</tt>.</li>
188
185</ul> 189</ul>
186<p> 190<p>
187You're encouraged to use these types in preference to 191You're encouraged to use these types in preference to
@@ -729,6 +733,22 @@ You'll have to explicitly convert a 64&nbsp;bit integer to a Lua
729number (e.g. for regular floating-point calculations) with 733number (e.g. for regular floating-point calculations) with
730<tt>tonumber()</tt>. But note this may incur a precision loss.</li> 734<tt>tonumber()</tt>. But note this may incur a precision loss.</li>
731 735
736<li><b>64&nbsp;bit bitwise operations</b>: the rules for 64&nbsp;bit
737arithmetic operators apply analogously.<br>
738
739Unlike the other <tt>bit.*</tt> operations, <tt>bit.tobit()</tt>
740converts a cdata number via <tt>int64_t</tt> to <tt>int32_t</tt> and
741returns a Lua number.<br>
742
743For <tt>bit.band()</tt>, <tt>bit.bor()</tt> and <tt>bit.bxor()</tt>, the
744conversion to <tt>int64_t</tt> or <tt>uint64_t</tt> applies to
745<em>all</em> arguments, if <em>any</em> argument is a cdata number.<br>
746
747For all other operations, only the first argument is used to determine
748the output type. This implies that a cdata number as a shift count for
749shifts and rotates is accepted, but that alone does <em>not</em> cause
750a cdata number output.
751
732</ul> 752</ul>
733 753
734<h3 id="cdata_comp">Comparisons of cdata objects</h3> 754<h3 id="cdata_comp">Comparisons of cdata objects</h3>
@@ -1200,14 +1220,12 @@ The following operations are currently not compiled and may exhibit
1200suboptimal performance, especially when used in inner loops: 1220suboptimal performance, especially when used in inner loops:
1201</p> 1221</p>
1202<ul> 1222<ul>
1203<li>Bitfield accesses and initializations.</li>
1204<li>Vector operations.</li> 1223<li>Vector operations.</li>
1205<li>Table initializers.</li> 1224<li>Table initializers.</li>
1206<li>Initialization of nested <tt>struct</tt>/<tt>union</tt> types.</li> 1225<li>Initialization of nested <tt>struct</tt>/<tt>union</tt> types.</li>
1207<li>Allocations of variable-length arrays or structs.</li> 1226<li>Non-default initialization of VLA/VLS or large C&nbsp;types
1208<li>Allocations of C&nbsp;types with a size &gt; 128&nbsp;bytes or an 1227(&gt; 128&nbsp;bytes or &gt; 16 array elements.</li>
1209alignment &gt; 8&nbsp;bytes.</li> 1228<li>Bitfield initializations.</li>
1210<li>Conversions from lightuserdata to <tt>void&nbsp;*</tt>.</li>
1211<li>Pointer differences for element sizes that are not a power of 1229<li>Pointer differences for element sizes that are not a power of
1212two.</li> 1230two.</li>
1213<li>Calls to C&nbsp;functions with aggregates passed or returned by 1231<li>Calls to C&nbsp;functions with aggregates passed or returned by
@@ -1223,7 +1241,6 @@ value.</li>
1223Other missing features: 1241Other missing features:
1224</p> 1242</p>
1225<ul> 1243<ul>
1226<li>Bit operations for 64&nbsp;bit types.</li>
1227<li>Arithmetic for <tt>complex</tt> numbers.</li> 1244<li>Arithmetic for <tt>complex</tt> numbers.</li>
1228<li>Passing structs by value to vararg C&nbsp;functions.</li> 1245<li>Passing structs by value to vararg C&nbsp;functions.</li>
1229<li><a href="extensions.html#exceptions">C++ exception interoperability</a> 1246<li><a href="extensions.html#exceptions">C++ exception interoperability</a>
diff --git a/doc/ext_ffi_tutorial.html b/doc/ext_ffi_tutorial.html
index 7ca14018..e979ffea 100644
--- a/doc/ext_ffi_tutorial.html
+++ b/doc/ext_ffi_tutorial.html
@@ -47,6 +47,8 @@ td.idiomlua b { font-weight: normal; color: #2142bf; }
47<a href="ext_jit.html">jit.* Library</a> 47<a href="ext_jit.html">jit.* Library</a>
48</li><li> 48</li><li>
49<a href="ext_c_api.html">Lua/C API</a> 49<a href="ext_c_api.html">Lua/C API</a>
50</li><li>
51<a href="ext_profiler.html">Profiler</a>
50</li></ul> 52</li></ul>
51</li><li> 53</li><li>
52<a href="status.html">Status</a> 54<a href="status.html">Status</a>
diff --git a/doc/ext_jit.html b/doc/ext_jit.html
index 018b4396..3720d308 100644
--- a/doc/ext_jit.html
+++ b/doc/ext_jit.html
@@ -40,6 +40,8 @@
40<a class="current" href="ext_jit.html">jit.* Library</a> 40<a class="current" href="ext_jit.html">jit.* Library</a>
41</li><li> 41</li><li>
42<a href="ext_c_api.html">Lua/C API</a> 42<a href="ext_c_api.html">Lua/C API</a>
43</li><li>
44<a href="ext_profiler.html">Profiler</a>
43</li></ul> 45</li></ul>
44</li><li> 46</li><li>
45<a href="status.html">Status</a> 47<a href="status.html">Status</a>
@@ -150,7 +152,7 @@ Contains the target OS name:
150<h3 id="jit_arch"><tt>jit.arch</tt></h3> 152<h3 id="jit_arch"><tt>jit.arch</tt></h3>
151<p> 153<p>
152Contains the target architecture name: 154Contains the target architecture name:
153"x86", "x64", "arm", "ppc", "ppcspe", or "mips". 155"x86", "x64", "arm", "arm64", "ppc", "mips" or "mips64".
154</p> 156</p>
155 157
156<h2 id="jit_opt"><tt>jit.opt.*</tt> &mdash; JIT compiler optimization control</h2> 158<h2 id="jit_opt"><tt>jit.opt.*</tt> &mdash; JIT compiler optimization control</h2>
diff --git a/doc/ext_profiler.html b/doc/ext_profiler.html
new file mode 100644
index 00000000..b778cda4
--- /dev/null
+++ b/doc/ext_profiler.html
@@ -0,0 +1,364 @@
1<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
2<html>
3<head>
4<title>Profiler</title>
5<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
6<meta name="Copyright" content="Copyright (C) 2005-2020">
7<meta name="Language" content="en">
8<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
9<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
10</head>
11<body>
12<div id="site">
13<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
14</div>
15<div id="head">
16<h1>Profiler</h1>
17</div>
18<div id="nav">
19<ul><li>
20<a href="luajit.html">LuaJIT</a>
21<ul><li>
22<a href="http://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
23</li><li>
24<a href="install.html">Installation</a>
25</li><li>
26<a href="running.html">Running</a>
27</li></ul>
28</li><li>
29<a href="extensions.html">Extensions</a>
30<ul><li>
31<a href="ext_ffi.html">FFI Library</a>
32<ul><li>
33<a href="ext_ffi_tutorial.html">FFI Tutorial</a>
34</li><li>
35<a href="ext_ffi_api.html">ffi.* API</a>
36</li><li>
37<a href="ext_ffi_semantics.html">FFI Semantics</a>
38</li></ul>
39</li><li>
40<a href="ext_jit.html">jit.* Library</a>
41</li><li>
42<a href="ext_c_api.html">Lua/C API</a>
43</li><li>
44<a class="current" href="ext_profiler.html">Profiler</a>
45</li></ul>
46</li><li>
47<a href="status.html">Status</a>
48<ul><li>
49<a href="changes.html">Changes</a>
50</li></ul>
51</li><li>
52<a href="faq.html">FAQ</a>
53</li><li>
54<a href="http://luajit.org/performance.html">Performance <span class="ext">&raquo;</span></a>
55</li><li>
56<a href="http://wiki.luajit.org/">Wiki <span class="ext">&raquo;</span></a>
57</li><li>
58<a href="http://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
59</li></ul>
60</div>
61<div id="main">
62<p>
63LuaJIT has an integrated statistical profiler with very low overhead. It
64allows sampling the currently executing stack and other parameters in
65regular intervals.
66</p>
67<p>
68The integrated profiler can be accessed from three levels:
69</p>
70<ul>
71<li>The <a href="#hl_profiler">bundled high-level profiler</a>, invoked by the
72<a href="#j_p"><tt>-jp</tt></a> command line option.</li>
73<li>A <a href="#ll_lua_api">low-level Lua API</a> to control the profiler.</li>
74<li>A <a href="#ll_c_api">low-level C API</a> to control the profiler.</li>
75</ul>
76
77<h2 id="hl_profiler">High-Level Profiler</h2>
78<p>
79The bundled high-level profiler offers basic profiling functionality. It
80generates simple textual summaries or source code annotations. It can be
81accessed with the <a href="#j_p"><tt>-jp</tt></a> command line option
82or from Lua code by loading the underlying <tt>jit.p</tt> module.
83</p>
84<p>
85To cut to the chase &mdash; run this to get a CPU usage profile by
86function name:
87</p>
88<pre class="code">
89luajit -jp myapp.lua
90</pre>
91<p>
92It's <em>not</em> a stated goal of the bundled profiler to add every
93possible option or to cater for special profiling needs. The low-level
94profiler APIs are documented below. They may be used by third-party
95authors to implement advanced functionality, e.g. IDE integration or
96graphical profilers.
97</p>
98<p>
99Note: Sampling works for both interpreted and JIT-compiled code. The
100results for JIT-compiled code may sometimes be surprising. LuaJIT
101heavily optimizes and inlines Lua code &mdash; there's no simple
102one-to-one correspondence between source code lines and the sampled
103machine code.
104</p>
105
106<h3 id="j_p"><tt>-jp=[options[,output]]</tt></h3>
107<p>
108The <tt>-jp</tt> command line option starts the high-level profiler.
109When the application run by the command line terminates, the profiler
110stops and writes the results to <tt>stdout</tt> or to the specified
111<tt>output</tt> file.
112</p>
113<p>
114The <tt>options</tt> argument specifies how the profiling is to be
115performed:
116</p>
117<ul>
118<li><tt>f</tt> &mdash; Stack dump: function name, otherwise module:line.
119This is the default mode.</li>
120<li><tt>F</tt> &mdash; Stack dump: ditto, but dump module:name.</li>
121<li><tt>l</tt> &mdash; Stack dump: module:line.</li>
122<li><tt>&lt;number&gt;</tt> &mdash; stack dump depth (callee &larr;
123caller). Default: 1.</li>
124<li><tt>-&lt;number&gt;</tt> &mdash; Inverse stack dump depth (caller
125&rarr; callee).</li>
126<li><tt>s</tt> &mdash; Split stack dump after first stack level. Implies
127depth&nbsp;&ge;&nbsp;2 or depth&nbsp;&le;&nbsp;-2.</li>
128<li><tt>p</tt> &mdash; Show full path for module names.</li>
129<li><tt>v</tt> &mdash; Show VM states.</li>
130<li><tt>z</tt> &mdash; Show <a href="#jit_zone">zones</a>.</li>
131<li><tt>r</tt> &mdash; Show raw sample counts. Default: show percentages.</li>
132<li><tt>a</tt> &mdash; Annotate excerpts from source code files.</li>
133<li><tt>A</tt> &mdash; Annotate complete source code files.</li>
134<li><tt>G</tt> &mdash; Produce raw output suitable for graphical tools.</li>
135<li><tt>m&lt;number&gt;</tt> &mdash; Minimum sample percentage to be shown.
136Default: 3%.</li>
137<li><tt>i&lt;number&gt;</tt> &mdash; Sampling interval in milliseconds.
138Default: 10ms.<br>
139Note: The actual sampling precision is OS-dependent.</li>
140</ul>
141<p>
142The default output for <tt>-jp</tt> is a list of the most CPU consuming
143spots in the application. Increasing the stack dump depth with (say)
144<tt>-jp=2</tt> may help to point out the main callers or callees of
145hotspots. But sample aggregation is still flat per unique stack dump.
146</p>
147<p>
148To get a two-level view (split view) of callers/callees, use
149<tt>-jp=s</tt> or <tt>-jp=-s</tt>. The percentages shown for the second
150level are relative to the first level.
151</p>
152<p>
153To see how much time is spent in each line relative to a function, use
154<tt>-jp=fl</tt>.
155</p>
156<p>
157To see how much time is spent in different VM states or
158<a href="#jit_zone">zones</a>, use <tt>-jp=v</tt> or <tt>-jp=z</tt>.
159</p>
160<p>
161Combinations of <tt>v/z</tt> with <tt>f/F/l</tt> produce two-level
162views, e.g. <tt>-jp=vf</tt> or <tt>-jp=fv</tt>. This shows the time
163spent in a VM state or zone vs. hotspots. This can be used to answer
164questions like "Which time consuming functions are only interpreted?" or
165"What's the garbage collector overhead for a specific function?".
166</p>
167<p>
168Multiple options can be combined &mdash; but not all combinations make
169sense, see above. E.g. <tt>-jp=3si4m1</tt> samples three stack levels
170deep in 4ms intervals and shows a split view of the CPU consuming
171functions and their callers with a 1% threshold.
172</p>
173<p>
174Source code annotations produced by <tt>-jp=a</tt> or <tt>-jp=A</tt> are
175always flat and at the line level. Obviously, the source code files need
176to be readable by the profiler script.
177</p>
178<p>
179The high-level profiler can also be started and stopped from Lua code with:
180</p>
181<pre class="code">
182require("jit.p").start(options, output)
183...
184require("jit.p").stop()
185</pre>
186
187<h3 id="jit_zone"><tt>jit.zone</tt> &mdash; Zones</h3>
188<p>
189Zones can be used to provide information about different parts of an
190application to the high-level profiler. E.g. a game could make use of an
191<tt>"AI"</tt> zone, a <tt>"PHYS"</tt> zone, etc. Zones are hierarchical,
192organized as a stack.
193</p>
194<p>
195The <tt>jit.zone</tt> module needs to be loaded explicitly:
196</p>
197<pre class="code">
198local zone = require("jit.zone")
199</pre>
200<ul>
201<li><tt>zone("name")</tt> pushes a named zone to the zone stack.</li>
202<li><tt>zone()</tt> pops the current zone from the zone stack and
203returns its name.</li>
204<li><tt>zone:get()</tt> returns the current zone name or <tt>nil</tt>.</li>
205<li><tt>zone:flush()</tt> flushes the zone stack.</li>
206</ul>
207<p>
208To show the time spent in each zone use <tt>-jp=z</tt>. To show the time
209spent relative to hotspots use e.g. <tt>-jp=zf</tt> or <tt>-jp=fz</tt>.
210</p>
211
212<h2 id="ll_lua_api">Low-level Lua API</h2>
213<p>
214The <tt>jit.profile</tt> module gives access to the low-level API of the
215profiler from Lua code. This module needs to be loaded explicitly:
216<pre class="code">
217local profile = require("jit.profile")
218</pre>
219<p>
220This module can be used to implement your own higher-level profiler.
221A typical profiling run starts the profiler, captures stack dumps in
222the profiler callback, adds them to a hash table to aggregate the number
223of samples, stops the profiler and then analyzes all of the captured
224stack dumps. Other parameters can be sampled in the profiler callback,
225too. But it's important not to spend too much time in the callback,
226since this may skew the statistics.
227</p>
228
229<h3 id="profile_start"><tt>profile.start(mode, cb)</tt>
230&mdash; Start profiler</h3>
231<p>
232This function starts the profiler. The <tt>mode</tt> argument is a
233string holding options:
234</p>
235<ul>
236<li><tt>f</tt> &mdash; Profile with precision down to the function level.</li>
237<li><tt>l</tt> &mdash; Profile with precision down to the line level.</li>
238<li><tt>i&lt;number&gt;</tt> &mdash; Sampling interval in milliseconds (default
23910ms).</br>
240Note: The actual sampling precision is OS-dependent.
241</li>
242</ul>
243<p>
244The <tt>cb</tt> argument is a callback function which is called with
245three arguments: <tt>(thread, samples, vmstate)</tt>. The callback is
246called on a separate coroutine, the <tt>thread</tt> argument is the
247state that holds the stack to sample for profiling. Note: do
248<em>not</em> modify the stack of that state or call functions on it.
249</p>
250<p>
251<tt>samples</tt> gives the number of accumulated samples since the last
252callback (usually 1).
253</p>
254<p>
255<tt>vmstate</tt> holds the VM state at the time the profiling timer
256triggered. This may or may not correspond to the state of the VM when
257the profiling callback is called. The state is either <tt>'N'</tt>
258native (compiled) code, <tt>'I'</tt> interpreted code, <tt>'C'</tt>
259C&nbsp;code, <tt>'G'</tt> the garbage collector, or <tt>'J'</tt> the JIT
260compiler.
261</p>
262
263<h3 id="profile_stop"><tt>profile.stop()</tt>
264&mdash; Stop profiler</h3>
265<p>
266This function stops the profiler.
267</p>
268
269<h3 id="profile_dump"><tt>dump = profile.dumpstack([thread,] fmt, depth)</tt>
270&mdash; Dump stack </h3>
271<p>
272This function allows taking stack dumps in an efficient manner. It
273returns a string with a stack dump for the <tt>thread</tt> (coroutine),
274formatted according to the <tt>fmt</tt> argument:
275</p>
276<ul>
277<li><tt>p</tt> &mdash; Preserve the full path for module names. Otherwise
278only the file name is used.</li>
279<li><tt>f</tt> &mdash; Dump the function name if it can be derived. Otherwise
280use module:line.</li>
281<li><tt>F</tt> &mdash; Ditto, but dump module:name.</li>
282<li><tt>l</tt> &mdash; Dump module:line.</li>
283<li><tt>Z</tt> &mdash; Zap the following characters for the last dumped
284frame.</li>
285<li>All other characters are added verbatim to the output string.</li>
286</ul>
287<p>
288The <tt>depth</tt> argument gives the number of frames to dump, starting
289at the topmost frame of the thread. A negative number dumps the frames in
290inverse order.
291</p>
292<p>
293The first example prints a list of the current module names and line
294numbers of up to 10 frames in separate lines. The second example prints
295semicolon-separated function names for all frames (up to 100) in inverse
296order:
297</p>
298<pre class="code">
299print(profile.dumpstack(thread, "l\n", 10))
300print(profile.dumpstack(thread, "lZ;", -100))
301</pre>
302
303<h2 id="ll_c_api">Low-level C API</h2>
304<p>
305The profiler can be controlled directly from C&nbsp;code, e.g. for
306use by IDEs. The declarations are in <tt>"luajit.h"</tt> (see
307<a href="ext_c_api.html">Lua/C API</a> extensions).
308</p>
309
310<h3 id="luaJIT_profile_start"><tt>luaJIT_profile_start(L, mode, cb, data)</tt>
311&mdash; Start profiler</h3>
312<p>
313This function starts the profiler. <a href="#profile_start">See
314above</a> for a description of the <tt>mode</tt> argument.
315</p>
316<p>
317The <tt>cb</tt> argument is a callback function with the following
318declaration:
319</p>
320<pre class="code">
321typedef void (*luaJIT_profile_callback)(void *data, lua_State *L,
322 int samples, int vmstate);
323</pre>
324<p>
325<tt>data</tt> is available for use by the callback. <tt>L</tt> is the
326state that holds the stack to sample for profiling. Note: do
327<em>not</em> modify this stack or call functions on this stack &mdash;
328use a separate coroutine for this purpose. <a href="#profile_start">See
329above</a> for a description of <tt>samples</tt> and <tt>vmstate</tt>.
330</p>
331
332<h3 id="luaJIT_profile_stop"><tt>luaJIT_profile_stop(L)</tt>
333&mdash; Stop profiler</h3>
334<p>
335This function stops the profiler.
336</p>
337
338<h3 id="luaJIT_profile_dumpstack"><tt>p = luaJIT_profile_dumpstack(L, fmt, depth, len)</tt>
339&mdash; Dump stack </h3>
340<p>
341This function allows taking stack dumps in an efficient manner.
342<a href="#profile_dump">See above</a> for a description of <tt>fmt</tt>
343and <tt>depth</tt>.
344</p>
345<p>
346This function returns a <tt>const&nbsp;char&nbsp;*</tt> pointing to a
347private string buffer of the profiler. The <tt>int&nbsp;*len</tt>
348argument returns the length of the output string. The buffer is
349overwritten on the next call and deallocated when the profiler stops.
350You either need to consume the content immediately or copy it for later
351use.
352</p>
353<br class="flush">
354</div>
355<div id="foot">
356<hr class="hide">
357Copyright &copy; 2005-2020
358<span class="noprint">
359&middot;
360<a href="contact.html">Contact</a>
361</span>
362</div>
363</body>
364</html>
diff --git a/doc/extensions.html b/doc/extensions.html
index 25d2f7fd..25764198 100644
--- a/doc/extensions.html
+++ b/doc/extensions.html
@@ -57,6 +57,8 @@ td.excinterop {
57<a href="ext_jit.html">jit.* Library</a> 57<a href="ext_jit.html">jit.* Library</a>
58</li><li> 58</li><li>
59<a href="ext_c_api.html">Lua/C API</a> 59<a href="ext_c_api.html">Lua/C API</a>
60</li><li>
61<a href="ext_profiler.html">Profiler</a>
60</li></ul> 62</li></ul>
61</li><li> 63</li><li>
62<a href="status.html">Status</a> 64<a href="status.html">Status</a>
@@ -112,6 +114,9 @@ bit.lshift bit.rshift bit.arshift bit.rol bit.ror bit.bswap
112This module is a LuaJIT built-in &mdash; you don't need to download or 114This module is a LuaJIT built-in &mdash; you don't need to download or
113install Lua BitOp. The Lua BitOp site has full documentation for all 115install Lua BitOp. The Lua BitOp site has full documentation for all
114<a href="http://bitop.luajit.org/api.html"><span class="ext">&raquo;</span>&nbsp;Lua BitOp API functions</a>. 116<a href="http://bitop.luajit.org/api.html"><span class="ext">&raquo;</span>&nbsp;Lua BitOp API functions</a>.
117The FFI adds support for
118<a href="ext_ffi_semantics.html#cdata_arith">64&nbsp;bit bitwise operations</a>,
119using the same API functions.
115</p> 120</p>
116<p> 121<p>
117Please make sure to <tt>require</tt> the module before using any of 122Please make sure to <tt>require</tt> the module before using any of
@@ -145,6 +150,11 @@ LuaJIT adds some
145<a href="ext_c_api.html">extra functions to the Lua/C API</a>. 150<a href="ext_c_api.html">extra functions to the Lua/C API</a>.
146</p> 151</p>
147 152
153<h3 id="profiler">Profiler</h3>
154<p>
155LuaJIT has an <a href="ext_profiler.html">integrated profiler</a>.
156</p>
157
148<h2 id="library">Enhanced Standard Library Functions</h2> 158<h2 id="library">Enhanced Standard Library Functions</h2>
149 159
150<h3 id="xpcall"><tt>xpcall(f, err [,args...])</tt> passes arguments</h3> 160<h3 id="xpcall"><tt>xpcall(f, err [,args...])</tt> passes arguments</h3>
@@ -172,7 +182,7 @@ in <tt>"-inf"</tt>.
172<h3 id="tonumber"><tt>tonumber()</tt> etc. use builtin string to number conversion</h3> 182<h3 id="tonumber"><tt>tonumber()</tt> etc. use builtin string to number conversion</h3>
173<p> 183<p>
174All string-to-number conversions consistently convert integer and 184All string-to-number conversions consistently convert integer and
175floating-point inputs in decimal and hexadecimal on all platforms. 185floating-point inputs in decimal, hexadecimal and binary on all platforms.
176<tt>strtod()</tt> is <em>not</em> used anymore, which avoids numerous 186<tt>strtod()</tt> is <em>not</em> used anymore, which avoids numerous
177problems with poor C library implementations. The builtin conversion 187problems with poor C library implementations. The builtin conversion
178function provides full precision according to the IEEE-754 standard, it 188function provides full precision according to the IEEE-754 standard, it
@@ -196,6 +206,36 @@ for dot releases (x.y.0 &rarr; x.y.1), but may change with major or
196minor releases (2.0 &rarr; 2.1) or between any beta release. Foreign 206minor releases (2.0 &rarr; 2.1) or between any beta release. Foreign
197bytecode (e.g. from Lua 5.1) is incompatible and cannot be loaded. 207bytecode (e.g. from Lua 5.1) is incompatible and cannot be loaded.
198</p> 208</p>
209<p>
210Note: <tt>LJ_GC64</tt> mode requires a different frame layout, which implies
211a different, incompatible bytecode format for all 64 bit ports. This may be
212rectified in the future.
213</p>
214
215<h3 id="table_new"><tt>table.new(narray, nhash)</tt> allocates a pre-sized table</h3>
216<p>
217An extra library function <tt>table.new()</tt> can be made available via
218<tt>require("table.new")</tt>. This creates a pre-sized table, just like
219the C API equivalent <tt>lua_createtable()</tt>. This is useful for big
220tables if the final table size is known and automatic table resizing is
221too expensive.
222</p>
223
224<h3 id="table_clear"><tt>table.clear(tab)</tt> clears a table</h3>
225<p>
226An extra library function <tt>table.clear()</tt> can be made available
227via <tt>require("table.clear")</tt>. This clears all keys and values
228from a table, but preserves the allocated array/hash sizes. This is
229useful when a table, which is linked from multiple places, needs to be
230cleared and/or when recycling a table for use by the same context. This
231avoids managing backlinks, saves an allocation and the overhead of
232incremental array/hash part growth.
233</p>
234<p>
235Please note this function is meant for very specific situations. In most
236cases it's better to replace the (usually single) link with a new table
237and let the GC do its work.
238</p>
199 239
200<h3 id="math_random">Enhanced PRNG for <tt>math.random()</tt></h3> 240<h3 id="math_random">Enhanced PRNG for <tt>math.random()</tt></h3>
201<p> 241<p>
@@ -274,6 +314,26 @@ indexes for varargs.</li>
274<li><tt>debug.getupvalue()</tt> and <tt>debug.setupvalue()</tt> handle 314<li><tt>debug.getupvalue()</tt> and <tt>debug.setupvalue()</tt> handle
275C&nbsp;functions.</li> 315C&nbsp;functions.</li>
276<li><tt>debug.upvalueid()</tt> and <tt>debug.upvaluejoin()</tt>.</li> 316<li><tt>debug.upvalueid()</tt> and <tt>debug.upvaluejoin()</tt>.</li>
317<li>Lua/C API extensions:
318<tt>lua_version()</tt>
319<tt>lua_upvalueid()</tt>
320<tt>lua_upvaluejoin()</tt>
321<tt>lua_loadx()</tt>
322<tt>lua_copy()</tt>
323<tt>lua_tonumberx()</tt>
324<tt>lua_tointegerx()</tt>
325<tt>luaL_fileresult()</tt>
326<tt>luaL_execresult()</tt>
327<tt>luaL_loadfilex()</tt>
328<tt>luaL_loadbufferx()</tt>
329<tt>luaL_traceback()</tt>
330<tt>luaL_setfuncs()</tt>
331<tt>luaL_pushmodule()</tt>
332<tt>luaL_newlibtable()</tt>
333<tt>luaL_newlib()</tt>
334<tt>luaL_testudata()</tt>
335<tt>luaL_setmetatable()</tt>
336</li>
277<li>Command line option <tt>-E</tt>.</li> 337<li>Command line option <tt>-E</tt>.</li>
278<li>Command line checks <tt>__tostring</tt> for errors.</li> 338<li>Command line checks <tt>__tostring</tt> for errors.</li>
279</ul> 339</ul>
@@ -299,6 +359,8 @@ exit status.</li>
299<li><tt>debug.setmetatable()</tt> returns object.</li> 359<li><tt>debug.setmetatable()</tt> returns object.</li>
300<li><tt>debug.getuservalue()</tt> and <tt>debug.setuservalue()</tt>.</li> 360<li><tt>debug.getuservalue()</tt> and <tt>debug.setuservalue()</tt>.</li>
301<li>Remove <tt>math.mod()</tt>, <tt>string.gfind()</tt>.</li> 361<li>Remove <tt>math.mod()</tt>, <tt>string.gfind()</tt>.</li>
362<li><tt>package.searchers</tt>.</li>
363<li><tt>module()</tt> returns the module table.</li>
302</ul> 364</ul>
303<p> 365<p>
304Note: this provides only partial compatibility with Lua 5.2 at the 366Note: this provides only partial compatibility with Lua 5.2 at the
@@ -307,6 +369,21 @@ Lua&nbsp;5.1, which prevents implementing features that would otherwise
307break the Lua/C API and ABI (e.g. <tt>_ENV</tt>). 369break the Lua/C API and ABI (e.g. <tt>_ENV</tt>).
308</p> 370</p>
309 371
372<h2 id="lua53">Extensions from Lua 5.3</h2>
373<p>
374LuaJIT supports some extensions from Lua&nbsp;5.3:
375<ul>
376<li>Unicode escape <tt>'\u{XX...}'</tt> embeds the UTF-8 encoding in string literals.</li>
377<li>The argument table <tt>arg</tt> can be read (and modified) by <tt>LUA_INIT</tt> and <tt>-e</tt> chunks.</li>
378<li><tt>io.read()</tt> and <tt>file:read()</tt> accept formats with or without a leading <tt>*</tt>.</li>
379<li><tt>assert()</tt> accepts any type of error object.</li>
380<li><tt>table.move(a1, f, e, t [,a2])</tt>.</li>
381<li><tt>coroutine.isyieldable()</tt>.</li>
382<li>Lua/C API extensions:
383<tt>lua_isyieldable()</tt>
384</li>
385</ul>
386
310<h2 id="exceptions">C++ Exception Interoperability</h2> 387<h2 id="exceptions">C++ Exception Interoperability</h2>
311<p> 388<p>
312LuaJIT has built-in support for interoperating with C++&nbsp;exceptions. 389LuaJIT has built-in support for interoperating with C++&nbsp;exceptions.
@@ -321,25 +398,30 @@ the toolchain used to compile LuaJIT:
321</tr> 398</tr>
322<tr class="odd separate"> 399<tr class="odd separate">
323<td class="excplatform">POSIX/x64, DWARF2 unwinding</td> 400<td class="excplatform">POSIX/x64, DWARF2 unwinding</td>
324<td class="exccompiler">GCC 4.3+</td> 401<td class="exccompiler">GCC 4.3+, Clang</td>
325<td class="excinterop"><b style="color: #00a000;">Full</b></td> 402<td class="excinterop"><b style="color: #00a000;">Full</b></td>
326</tr> 403</tr>
327<tr class="even"> 404<tr class="even">
405<td class="excplatform">ARM <tt>-DLUAJIT_UNWIND_EXTERNAL</tt></td>
406<td class="exccompiler">GCC, Clang</td>
407<td class="excinterop"><b style="color: #00a000;">Full</b></td>
408</tr>
409<tr class="odd">
328<td class="excplatform">Other platforms, DWARF2 unwinding</td> 410<td class="excplatform">Other platforms, DWARF2 unwinding</td>
329<td class="exccompiler">GCC</td> 411<td class="exccompiler">GCC, Clang</td>
330<td class="excinterop"><b style="color: #c06000;">Limited</b></td> 412<td class="excinterop"><b style="color: #c06000;">Limited</b></td>
331</tr> 413</tr>
332<tr class="odd"> 414<tr class="even">
333<td class="excplatform">Windows/x64</td> 415<td class="excplatform">Windows/x64</td>
334<td class="exccompiler">MSVC or WinSDK</td> 416<td class="exccompiler">MSVC or WinSDK</td>
335<td class="excinterop"><b style="color: #00a000;">Full</b></td> 417<td class="excinterop"><b style="color: #00a000;">Full</b></td>
336</tr> 418</tr>
337<tr class="even"> 419<tr class="odd">
338<td class="excplatform">Windows/x86</td> 420<td class="excplatform">Windows/x86</td>
339<td class="exccompiler">Any</td> 421<td class="exccompiler">Any</td>
340<td class="excinterop"><b style="color: #a00000;">No</b></td> 422<td class="excinterop"><b style="color: #00a000;">Full</b></td>
341</tr> 423</tr>
342<tr class="odd"> 424<tr class="even">
343<td class="excplatform">Other platforms</td> 425<td class="excplatform">Other platforms</td>
344<td class="exccompiler">Other compilers</td> 426<td class="exccompiler">Other compilers</td>
345<td class="excinterop"><b style="color: #a00000;">No</b></td> 427<td class="excinterop"><b style="color: #a00000;">No</b></td>
@@ -388,14 +470,6 @@ C++ destructors.</li>
388<li>Lua errors <b>cannot</b> be caught on the C++ side.</li> 470<li>Lua errors <b>cannot</b> be caught on the C++ side.</li>
389<li>Throwing Lua errors across C++ frames will <b>not</b> call 471<li>Throwing Lua errors across C++ frames will <b>not</b> call
390C++ destructors.</li> 472C++ destructors.</li>
391<li>Additionally, on Windows/x86 with SEH-based C++&nbsp;exceptions:
392it's <b>not</b> safe to throw a Lua error across any frames containing
393a C++ function with any try/catch construct or using variables with
394(implicit) destructors. This also applies to any functions which may be
395inlined in such a function. It doesn't matter whether <tt>lua_error()</tt>
396is called inside or outside of a try/catch or whether any object actually
397needs to be destroyed: the SEH chain is corrupted and this will eventually
398lead to the termination of the process.</li>
399</ul> 473</ul>
400<br class="flush"> 474<br class="flush">
401</div> 475</div>
diff --git a/doc/faq.html b/doc/faq.html
index be4d9f61..2031aa8a 100644
--- a/doc/faq.html
+++ b/doc/faq.html
@@ -43,6 +43,8 @@ dd { margin-left: 1.5em; }
43<a href="ext_jit.html">jit.* Library</a> 43<a href="ext_jit.html">jit.* Library</a>
44</li><li> 44</li><li>
45<a href="ext_c_api.html">Lua/C API</a> 45<a href="ext_c_api.html">Lua/C API</a>
46</li><li>
47<a href="ext_profiler.html">Profiler</a>
46</li></ul> 48</li></ul>
47</li><li> 49</li><li>
48<a href="status.html">Status</a> 50<a href="status.html">Status</a>
diff --git a/doc/install.html b/doc/install.html
index 68de0c10..9602831e 100644
--- a/doc/install.html
+++ b/doc/install.html
@@ -68,6 +68,8 @@ td.compatno {
68<a href="ext_jit.html">jit.* Library</a> 68<a href="ext_jit.html">jit.* Library</a>
69</li><li> 69</li><li>
70<a href="ext_c_api.html">Lua/C API</a> 70<a href="ext_c_api.html">Lua/C API</a>
71</li><li>
72<a href="ext_profiler.html">Profiler</a>
71</li></ul> 73</li></ul>
72</li><li> 74</li><li>
73<a href="status.html">Status</a> 75<a href="status.html">Status</a>
@@ -111,17 +113,17 @@ operating systems, CPUs and compilers:
111</tr> 113</tr>
112<tr class="odd separate"> 114<tr class="odd separate">
113<td class="compatcpu">x86 (32 bit)</td> 115<td class="compatcpu">x86 (32 bit)</td>
114<td class="compatos">GCC 4.x+<br>GCC 3.4</td> 116<td class="compatos">GCC 4.2+</td>
115<td class="compatos">GCC 4.x+<br>GCC 3.4</td> 117<td class="compatos">GCC 4.2+</td>
116<td class="compatos">XCode 5.0+<br>Clang</td> 118<td class="compatos">XCode 5.0+<br>Clang</td>
117<td class="compatos">MSVC<br>MinGW, Cygwin</td> 119<td class="compatos">MSVC<br>MinGW, Cygwin</td>
118</tr> 120</tr>
119<tr class="even"> 121<tr class="even">
120<td class="compatcpu">x64 (64 bit)</td> 122<td class="compatcpu">x64 (64 bit)</td>
121<td class="compatos">GCC 4.x+</td> 123<td class="compatos">GCC 4.2+</td>
122<td class="compatos">ORBIS (<a href="#ps4">PS4</a>)</td> 124<td class="compatos">GCC 4.2+<br>ORBIS (<a href="#ps4">PS4</a>)</td>
123<td class="compatos">XCode 5.0+<br>Clang</td> 125<td class="compatos">XCode 5.0+<br>Clang</td>
124<td class="compatos">MSVC</td> 126<td class="compatos">MSVC<br>Durango (<a href="#xboxone">Xbox One</a>)</td>
125</tr> 127</tr>
126<tr class="odd"> 128<tr class="odd">
127<td class="compatcpu"><a href="#cross2">ARMv5+<br>ARM9E+</a></td> 129<td class="compatcpu"><a href="#cross2">ARMv5+<br>ARM9E+</a></td>
@@ -131,21 +133,21 @@ operating systems, CPUs and compilers:
131<td class="compatos compatno">&nbsp;</td> 133<td class="compatos compatno">&nbsp;</td>
132</tr> 134</tr>
133<tr class="even"> 135<tr class="even">
134<td class="compatcpu"><a href="#cross2">PPC</a></td> 136<td class="compatcpu"><a href="#cross2">ARM64</a></td>
135<td class="compatos">GCC 4.3+</td> 137<td class="compatos">GCC 4.8+</td>
136<td class="compatos">GCC 4.3+<br>GCC 4.1 (<a href="#ps3">PS3</a>)</td> 138<td class="compatos compatno">&nbsp;</td>
139<td class="compatos">XCode 6.0+<br>Clang 3.5+</td>
137<td class="compatos compatno">&nbsp;</td> 140<td class="compatos compatno">&nbsp;</td>
138<td class="compatos">XEDK (<a href="#xbox360">Xbox 360</a>)</td>
139</tr> 141</tr>
140<tr class="odd"> 142<tr class="odd">
141<td class="compatcpu"><a href="#cross2">PPC/e500v2</a></td> 143<td class="compatcpu"><a href="#cross2">PPC</a></td>
142<td class="compatos">GCC 4.3+</td>
143<td class="compatos">GCC 4.3+</td> 144<td class="compatos">GCC 4.3+</td>
145<td class="compatos">GCC 4.3+<br>GCC 4.1 (<a href="#ps3">PS3</a>)</td>
144<td class="compatos compatno">&nbsp;</td> 146<td class="compatos compatno">&nbsp;</td>
145<td class="compatos compatno">&nbsp;</td> 147<td class="compatos">XEDK (<a href="#xbox360">Xbox 360</a>)</td>
146</tr> 148</tr>
147<tr class="even"> 149<tr class="even">
148<td class="compatcpu"><a href="#cross2">MIPS</a></td> 150<td class="compatcpu"><a href="#cross2">MIPS32<br>MIPS64</a></td>
149<td class="compatos">GCC 4.3+</td> 151<td class="compatos">GCC 4.3+</td>
150<td class="compatos">GCC 4.3+</td> 152<td class="compatos">GCC 4.3+</td>
151<td class="compatos compatno">&nbsp;</td> 153<td class="compatos compatno">&nbsp;</td>
@@ -172,6 +174,13 @@ MSVC (Visual Studio).</li>
172Please read the instructions given in these files, before changing 174Please read the instructions given in these files, before changing
173any settings. 175any settings.
174</p> 176</p>
177<p>
178All LuaJIT 64 bit ports use 64 bit GC objects by default (<tt>LJ_GC64</tt>).
179For x64, you can select the old 32-on-64 bit mode by adding
180<tt>XCFLAGS=-DLUAJIT_DISABLE_GC64</tt> to the make command.
181Please check the note about the
182<a href="extensions.html#string_dump">bytecode format</a> differences, too.
183</p>
175 184
176<h2 id="posix">POSIX Systems (Linux, OSX, *BSD etc.)</h2> 185<h2 id="posix">POSIX Systems (Linux, OSX, *BSD etc.)</h2>
177<h3>Prerequisites</h3> 186<h3>Prerequisites</h3>
@@ -199,7 +208,7 @@ which is probably the default on your system, anyway. Simply run:
199make 208make
200</pre> 209</pre>
201<p> 210<p>
202This always builds a native x86, x64 or PPC binary, depending on the host OS 211This always builds a native binary, depending on the host OS
203you're running this command on. Check the section on 212you're running this command on. Check the section on
204<a href="#cross">cross-compilation</a> for more options. 213<a href="#cross">cross-compilation</a> for more options.
205</p> 214</p>
@@ -297,25 +306,36 @@ directory where <tt>luajit.exe</tt> is installed
297 306
298<h2 id="cross">Cross-compiling LuaJIT</h2> 307<h2 id="cross">Cross-compiling LuaJIT</h2>
299<p> 308<p>
309First, let's clear up some terminology:
310</p>
311<ul>
312<li>Host: This is your development system, usually based on a x64 or x86 CPU.</li>
313<li>Target: This is the target system you want LuaJIT to run on, e.g. Android/ARM.</li>
314<li>Toolchain: This comprises a C compiler, linker, assembler and a matching C library.</li>
315<li>Host (or system) toolchain: This is the toolchain used to build native binaries for your host system.</li>
316<li>Cross-compile toolchain: This is the toolchain used to build binaries for the target system. They can only be run on the target system.</li>
317</ul>
318<p>
300The GNU Makefile-based build system allows cross-compiling on any host 319The GNU Makefile-based build system allows cross-compiling on any host
301for any supported target, as long as both architectures have the same 320for any supported target:
302pointer size. If you want to cross-compile to any 32 bit target on an
303x64 OS, you need to install the multilib development package (e.g.
304<tt>libc6-dev-i386</tt> on Debian/Ubuntu) and build a 32 bit host part
305(<tt>HOST_CC="gcc -m32"</tt>).
306</p> 321</p>
322<ul>
323<li>Yes, you need a toolchain for both your host <em>and</em> your target!</li>
324<li>Both host and target architectures must have the same pointer size.</li>
325<li>E.g. if you want to cross-compile to a 32 bit target on a 64 bit host, you need to install the multilib development package (e.g. <tt>libc6-dev-i386</tt> on Debian/Ubuntu) and build a 32 bit host part (<tt>HOST_CC="gcc -m32"</tt>).</li>
326<li>64 bit targets always require compilation on a 64 bit host.</li>
327</ul>
307<p> 328<p>
308You need to specify <tt>TARGET_SYS</tt> whenever the host OS and the 329You need to specify <tt>TARGET_SYS</tt> whenever the host OS and the
309target OS differ, or you'll get assembler or linker errors. E.g. if 330target OS differ, or you'll get assembler or linker errors:
310you're compiling on a Windows or OSX host for embedded Linux or Android,
311you need to add <tt>TARGET_SYS=Linux</tt> to the examples below. For a
312minimal target OS, you may need to disable the built-in allocator in
313<tt>src/Makefile</tt> and use <tt>TARGET_SYS=Other</tt>. Don't forget to
314specify the same <tt>TARGET_SYS</tt> for the install step, too.
315</p> 331</p>
332<ul>
333<li>E.g. if you're compiling on a Windows or OSX host for embedded Linux or Android, you need to add <tt>TARGET_SYS=Linux</tt> to the examples below.</li>
334<li>For a minimal target OS, you may need to disable the built-in allocator in <tt>src/Makefile</tt> and use <tt>TARGET_SYS=Other</tt>.</li>
335<li>Don't forget to specify the same <tt>TARGET_SYS</tt> for the install step, too.</li>
336</ul>
316<p> 337<p>
317The examples below only show some popular targets &mdash; please check 338Here are some examples where host and target have the same CPU:
318the comments in <tt>src/Makefile</tt> for more details.
319</p> 339</p>
320<pre class="code"> 340<pre class="code">
321# Cross-compile to a 32 bit binary on a multilib x64 OS 341# Cross-compile to a 32 bit binary on a multilib x64 OS
@@ -333,34 +353,44 @@ use the canonical toolchain triplets for Linux.
333</p> 353</p>
334<p> 354<p>
335Since there's often no easy way to detect CPU features at runtime, it's 355Since there's often no easy way to detect CPU features at runtime, it's
336important to compile with the proper CPU or architecture settings. You 356important to compile with the proper CPU or architecture settings:
337can specify these when building the toolchain yourself. Or add 357</o>
338<tt>-mcpu=...</tt> or <tt>-march=...</tt> to <tt>TARGET_CFLAGS</tt>. For 358<ul>
339ARM it's important to have the correct <tt>-mfloat-abi=...</tt> setting, 359<li>The best way to get consistent results is to specify the correct settings when building the toolchain yourself.</li>
340too. Otherwise LuaJIT may not run at the full performance of your target 360<li>For a pre-built, generic toolchain add <tt>-mcpu=...</tt> or <tt>-march=...</tt> and other necessary flags to <tt>TARGET_CFLAGS</tt>.</li>
341CPU. 361<li>For ARM it's important to have the correct <tt>-mfloat-abi=...</tt> setting, too. Otherwise LuaJIT may not run at the full performance of your target CPU.</li>
362<li>For MIPS it's important to select a supported ABI (o32 on MIPS32, n64 on MIPS64) and consistently compile your project either with hard-float or soft-float compiler settings.</li>
363</ul>
364<p>
365Here are some examples for targets with a different CPU than the host:
342</p> 366</p>
343<pre class="code"> 367<pre class="code">
344# ARM soft-float 368# ARM soft-float
345make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \ 369make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \
346 TARGET_CFLAGS="-mfloat-abi=soft" 370 TARGET_CFLAGS="-mfloat-abi=soft"
347 371
348# ARM soft-float ABI with VFP (example for Cortex-A8) 372# ARM soft-float ABI with VFP (example for Cortex-A9)
349make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \ 373make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \
350 TARGET_CFLAGS="-mcpu=cortex-a8 -mfloat-abi=softfp" 374 TARGET_CFLAGS="-mcpu=cortex-a9 -mfloat-abi=softfp"
351 375
352# ARM hard-float ABI with VFP (armhf, requires recent toolchain) 376# ARM hard-float ABI with VFP (armhf, most modern toolchains)
353make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabihf- 377make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabihf-
354 378
379# ARM64
380make CROSS=aarch64-linux-
381
355# PPC 382# PPC
356make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu- 383make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu-
357# PPC/e500v2 (fast interpreter only)
358make HOST_CC="gcc -m32" CROSS=powerpc-e500v2-linux-gnuspe-
359 384
360# MIPS big-endian 385# MIPS32 big-endian
361make HOST_CC="gcc -m32" CROSS=mips-linux- 386make HOST_CC="gcc -m32" CROSS=mips-linux-
362# MIPS little-endian 387# MIPS32 little-endian
363make HOST_CC="gcc -m32" CROSS=mipsel-linux- 388make HOST_CC="gcc -m32" CROSS=mipsel-linux-
389
390# MIPS64 big-endian
391make CROSS=mips-linux- TARGET_CFLAGS="-mips64r2 -mabi=64"
392# MIPS64 little-endian
393make CROSS=mipsel-linux- TARGET_CFLAGS="-mips64r2 -mabi=64"
364</pre> 394</pre>
365<p> 395<p>
366You can cross-compile for <b id="android">Android</b> using the <a href="http://developer.android.com/ndk/"><span class="ext">&raquo;</span>&nbsp;Android NDK</a>. 396You can cross-compile for <b id="android">Android</b> using the <a href="http://developer.android.com/ndk/"><span class="ext">&raquo;</span>&nbsp;Android NDK</a>.
@@ -368,8 +398,16 @@ Please adapt the environment variables to match the install locations and the
368desired target platform. E.g. Android&nbsp;4.1 corresponds to ABI level&nbsp;16. 398desired target platform. E.g. Android&nbsp;4.1 corresponds to ABI level&nbsp;16.
369</p> 399</p>
370<pre class="code"> 400<pre class="code">
371# Android/ARM, armeabi-v7a (ARMv7 VFP), Android 4.1+ (JB) 401# Android/ARM64, aarch64, Android 5.0+ (L)
402NDKDIR=/opt/android/ndk
403NDKBIN=$NDKDIR/toolchains/llvm/prebuilt/linux-x86_64/bin
404NDKCROSS=$NDKBIN/aarch64-linux-android-
405NDKCC=$NDKBIN/aarch64-linux-android21-clang
406make CROSS=$NDKCROSS \
407 STATIC_CC=$NDKCC DYNAMIC_CC="$NDKCC -fPIC" \
408 TARGET_LD=$NDKCC
372 409
410# Android/ARM, armeabi-v7a (ARMv7 VFP), Android 4.1+ (JB)
373NDKDIR=/opt/android/ndk 411NDKDIR=/opt/android/ndk
374NDKBIN=$NDKDIR/toolchains/llvm/prebuilt/linux-x86_64/bin 412NDKBIN=$NDKDIR/toolchains/llvm/prebuilt/linux-x86_64/bin
375NDKCROSS=$NDKBIN/arm-linux-androideabi- 413NDKCROSS=$NDKBIN/arm-linux-androideabi-
@@ -379,9 +417,23 @@ make HOST_CC="gcc -m32" CROSS=$NDKCROSS \
379 TARGET_LD=$NDKCC 417 TARGET_LD=$NDKCC
380</pre> 418</pre>
381<p> 419<p>
382Please use the LuaJIT 2.1 branch to compile for 420You can cross-compile for <b id="ios">iOS 3.0+</b> (iPhone/iPad) using the <a href="http://developer.apple.com/ios/"><span class="ext">&raquo;</span>&nbsp;iOS SDK</a>:
383<b id="ios">iOS</b> (iPhone/iPad).
384</p> 421</p>
422<p style="font-size: 8pt;">
423Note: <b>the JIT compiler is disabled for iOS</b>, because regular iOS Apps
424are not allowed to generate code at runtime. You'll only get the performance
425of the LuaJIT interpreter on iOS. This is still faster than plain Lua, but
426much slower than the JIT compiler. Please complain to Apple, not me.
427Or use Android. :-p
428</p>
429<pre class="code">
430# iOS/ARM64
431ISDKP=$(xcrun --sdk iphoneos --show-sdk-path)
432ICC=$(xcrun --sdk iphoneos --find clang)
433ISDKF="-arch arm64 -isysroot $ISDKP"
434make DEFAULT_CC=clang CROSS="$(dirname $ICC)/" \
435 TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS
436</pre>
385 437
386<h3 id="consoles">Cross-compiling for consoles</h3> 438<h3 id="consoles">Cross-compiling for consoles</h3>
387<p> 439<p>
@@ -437,6 +489,16 @@ the following commands:
437cd src 489cd src
438xedkbuild 490xedkbuild
439</pre> 491</pre>
492<p>
493To cross-compile for <b id="xboxone">Xbox One</b> from a Windows host,
494open a "Visual Studio .NET Command Prompt" (64&nbsp;bit host compiler),
495<tt>cd</tt> to the directory where you've unpacked the sources and run
496the following commands:
497</p>
498<pre class="code">
499cd src
500xb1build
501</pre>
440 502
441<h2 id="embed">Embedding LuaJIT</h2> 503<h2 id="embed">Embedding LuaJIT</h2>
442<p> 504<p>
@@ -467,14 +529,11 @@ intend to load Lua/C modules at runtime.
467</li> 529</li>
468<li> 530<li>
469If you're building a 64 bit application on OSX which links directly or 531If you're building a 64 bit application on OSX which links directly or
470indirectly against LuaJIT, you need to link your main executable 532indirectly against LuaJIT which is not built for <tt>LJ_GC64</tt> mode,
471with these flags: 533you need to link your main executable with these flags:
472<pre class="code"> 534<pre class="code">
473-pagezero_size 10000 -image_base 100000000 535-pagezero_size 10000 -image_base 100000000
474</pre> 536</pre>
475Also, it's recommended to <tt>rebase</tt> all (self-compiled) shared libraries
476which are loaded at runtime on OSX/x64 (e.g. C extension modules for Lua).
477See: <tt>man rebase</tt>
478</li> 537</li>
479</ul> 538</ul>
480<p>Additional hints for initializing LuaJIT using the C API functions:</p> 539<p>Additional hints for initializing LuaJIT using the C API functions:</p>
diff --git a/doc/luajit.html b/doc/luajit.html
index 3f360a93..a3ffa476 100644
--- a/doc/luajit.html
+++ b/doc/luajit.html
@@ -125,6 +125,8 @@ table.feature small {
125<a href="ext_jit.html">jit.* Library</a> 125<a href="ext_jit.html">jit.* Library</a>
126</li><li> 126</li><li>
127<a href="ext_c_api.html">Lua/C API</a> 127<a href="ext_c_api.html">Lua/C API</a>
128</li><li>
129<a href="ext_profiler.html">Profiler</a>
128</li></ul> 130</li></ul>
129</li><li> 131</li><li>
130<a href="status.html">Status</a> 132<a href="status.html">Status</a>
@@ -163,13 +165,13 @@ LuaJIT is Copyright &copy; 2005-2020 Mike Pall, released under the
163<tr><td><span style="font-size:90%;">Embedded</span></td><td>Android</td><td>iOS</td></tr> 165<tr><td><span style="font-size:90%;">Embedded</span></td><td>Android</td><td>iOS</td></tr>
164</table> 166</table>
165<table class="feature os os3"> 167<table class="feature os os3">
166<tr><td>PS3</td><td>PS4</td><td>PS Vita</td><td>Xbox 360</td></tr> 168<tr><td>PS3</td><td>PS4</td><td>PS Vita</td><td>Xbox 360</td><td>Xbox One</td></tr>
167</table> 169</table>
168<table class="feature compiler"> 170<table class="feature compiler">
169<tr><td>GCC</td><td>CLANG<br>LLVM</td><td>MSVC</td></tr> 171<tr><td>GCC</td><td>Clang<br>LLVM</td><td>MSVC</td></tr>
170</table> 172</table>
171<table class="feature cpu"> 173<table class="feature cpu">
172<tr><td>x86</td><td>x64</td><td>ARM</td><td>PPC</td><td>e500</td><td>MIPS</td></tr> 174<tr><td>x86<br>x64</td><td>ARM<br>ARM64</td><td>PPC</td><td>MIPS32<br>MIPS64</td></tr>
173</table> 175</table>
174<table class="feature fcompat"> 176<table class="feature fcompat">
175<tr><td>Lua&nbsp;5.1<br>API+ABI</td><td>+&nbsp;JIT</td><td>+&nbsp;BitOp</td><td>+&nbsp;FFI</td><td>Drop-in<br>DLL/.so</td></tr> 177<tr><td>Lua&nbsp;5.1<br>API+ABI</td><td>+&nbsp;JIT</td><td>+&nbsp;BitOp</td><td>+&nbsp;FFI</td><td>Drop-in<br>DLL/.so</td></tr>
diff --git a/doc/running.html b/doc/running.html
index 5cfdcc5e..6f96e9d8 100644
--- a/doc/running.html
+++ b/doc/running.html
@@ -62,6 +62,8 @@ td.param_default {
62<a href="ext_jit.html">jit.* Library</a> 62<a href="ext_jit.html">jit.* Library</a>
63</li><li> 63</li><li>
64<a href="ext_c_api.html">Lua/C API</a> 64<a href="ext_c_api.html">Lua/C API</a>
65</li><li>
66<a href="ext_profiler.html">Profiler</a>
65</li></ul> 67</li></ul>
66</li><li> 68</li><li>
67<a href="status.html">Status</a> 69<a href="status.html">Status</a>
@@ -177,6 +179,7 @@ Here are the available LuaJIT control commands:
177<li id="j_flush"><tt>-jflush</tt> &mdash; Flushes the whole cache of compiled code.</li> 179<li id="j_flush"><tt>-jflush</tt> &mdash; Flushes the whole cache of compiled code.</li>
178<li id="j_v"><tt>-jv</tt> &mdash; Shows verbose information about the progress of the JIT compiler.</li> 180<li id="j_v"><tt>-jv</tt> &mdash; Shows verbose information about the progress of the JIT compiler.</li>
179<li id="j_dump"><tt>-jdump</tt> &mdash; Dumps the code and structures used in various compiler stages.</li> 181<li id="j_dump"><tt>-jdump</tt> &mdash; Dumps the code and structures used in various compiler stages.</li>
182<li id="j_p"><tt>-jp</tt> &mdash; Start the <a href="ext_profiler.html">integrated profiler</a>.</li>
180</ul> 183</ul>
181<p> 184<p>
182The <tt>-jv</tt> and <tt>-jdump</tt> commands are extension modules 185The <tt>-jv</tt> and <tt>-jdump</tt> commands are extension modules
diff --git a/doc/status.html b/doc/status.html
index 175f6a29..cb454db8 100644
--- a/doc/status.html
+++ b/doc/status.html
@@ -43,6 +43,8 @@ ul li { padding-bottom: 0.3em; }
43<a href="ext_jit.html">jit.* Library</a> 43<a href="ext_jit.html">jit.* Library</a>
44</li><li> 44</li><li>
45<a href="ext_c_api.html">Lua/C API</a> 45<a href="ext_c_api.html">Lua/C API</a>
46</li><li>
47<a href="ext_profiler.html">Profiler</a>
46</li></ul> 48</li></ul>
47</li><li> 49</li><li>
48<a class="current" href="status.html">Status</a> 50<a class="current" href="status.html">Status</a>
@@ -94,6 +96,17 @@ handled correctly. The error may fall through an on-trace
94<tt>lua_atpanic</tt> on x64. This issue will be fixed with the new 96<tt>lua_atpanic</tt> on x64. This issue will be fixed with the new
95garbage collector. 97garbage collector.
96</li> 98</li>
99<li>
100LuaJIT on 64 bit systems provides a <b>limited range</b> of 47 bits for the
101<b>legacy <tt>lightuserdata</tt></b> data type.
102This is only relevant on x64 systems which use the negative part of the
103virtual address space in user mode, e.g. Solaris/x64, and on ARM64 systems
104configured with a 48 bit or 52 bit VA.
105Avoid using <tt>lightuserdata</tt> to hold pointers that may point outside
106of that range, e.g. variables on the stack. In general, avoid this data
107type for new code and replace it with (much more performant) FFI bindings.
108FFI cdata pointers can address the full 64 bit range.
109</li>
97</ul> 110</ul>
98<br class="flush"> 111<br class="flush">
99</div> 112</div>
diff --git a/dynasm/dasm_arm.lua b/dynasm/dasm_arm.lua
index 21fb5022..164980a1 100644
--- a/dynasm/dasm_arm.lua
+++ b/dynasm/dasm_arm.lua
@@ -9,9 +9,9 @@
9local _info = { 9local _info = {
10 arch = "arm", 10 arch = "arm",
11 description = "DynASM ARM module", 11 description = "DynASM ARM module",
12 version = "1.3.0", 12 version = "1.4.0",
13 vernum = 10300, 13 vernum = 10400,
14 release = "2011-05-05", 14 release = "2015-10-18",
15 author = "Mike Pall", 15 author = "Mike Pall",
16 license = "MIT", 16 license = "MIT",
17} 17}
diff --git a/dynasm/dasm_arm64.h b/dynasm/dasm_arm64.h
new file mode 100644
index 00000000..3455981f
--- /dev/null
+++ b/dynasm/dasm_arm64.h
@@ -0,0 +1,519 @@
1/*
2** DynASM ARM64 encoding engine.
3** Copyright (C) 2005-2020 Mike Pall. All rights reserved.
4** Released under the MIT license. See dynasm.lua for full copyright notice.
5*/
6
7#include <stddef.h>
8#include <stdarg.h>
9#include <string.h>
10#include <stdlib.h>
11
12#define DASM_ARCH "arm64"
13
14#ifndef DASM_EXTERN
15#define DASM_EXTERN(a,b,c,d) 0
16#endif
17
18/* Action definitions. */
19enum {
20 DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT,
21 /* The following actions need a buffer position. */
22 DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
23 /* The following actions also have an argument. */
24 DASM_REL_PC, DASM_LABEL_PC,
25 DASM_IMM, DASM_IMM6, DASM_IMM12, DASM_IMM13W, DASM_IMM13X, DASM_IMML,
26 DASM__MAX
27};
28
29/* Maximum number of section buffer positions for a single dasm_put() call. */
30#define DASM_MAXSECPOS 25
31
32/* DynASM encoder status codes. Action list offset or number are or'ed in. */
33#define DASM_S_OK 0x00000000
34#define DASM_S_NOMEM 0x01000000
35#define DASM_S_PHASE 0x02000000
36#define DASM_S_MATCH_SEC 0x03000000
37#define DASM_S_RANGE_I 0x11000000
38#define DASM_S_RANGE_SEC 0x12000000
39#define DASM_S_RANGE_LG 0x13000000
40#define DASM_S_RANGE_PC 0x14000000
41#define DASM_S_RANGE_REL 0x15000000
42#define DASM_S_UNDEF_LG 0x21000000
43#define DASM_S_UNDEF_PC 0x22000000
44
45/* Macros to convert positions (8 bit section + 24 bit index). */
46#define DASM_POS2IDX(pos) ((pos)&0x00ffffff)
47#define DASM_POS2BIAS(pos) ((pos)&0xff000000)
48#define DASM_SEC2POS(sec) ((sec)<<24)
49#define DASM_POS2SEC(pos) ((pos)>>24)
50#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
51
52/* Action list type. */
53typedef const unsigned int *dasm_ActList;
54
55/* Per-section structure. */
56typedef struct dasm_Section {
57 int *rbuf; /* Biased buffer pointer (negative section bias). */
58 int *buf; /* True buffer pointer. */
59 size_t bsize; /* Buffer size in bytes. */
60 int pos; /* Biased buffer position. */
61 int epos; /* End of biased buffer position - max single put. */
62 int ofs; /* Byte offset into section. */
63} dasm_Section;
64
65/* Core structure holding the DynASM encoding state. */
66struct dasm_State {
67 size_t psize; /* Allocated size of this structure. */
68 dasm_ActList actionlist; /* Current actionlist pointer. */
69 int *lglabels; /* Local/global chain/pos ptrs. */
70 size_t lgsize;
71 int *pclabels; /* PC label chains/pos ptrs. */
72 size_t pcsize;
73 void **globals; /* Array of globals (bias -10). */
74 dasm_Section *section; /* Pointer to active section. */
75 size_t codesize; /* Total size of all code sections. */
76 int maxsection; /* 0 <= sectionidx < maxsection. */
77 int status; /* Status code. */
78 dasm_Section sections[1]; /* All sections. Alloc-extended. */
79};
80
81/* The size of the core structure depends on the max. number of sections. */
82#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
83
84
85/* Initialize DynASM state. */
86void dasm_init(Dst_DECL, int maxsection)
87{
88 dasm_State *D;
89 size_t psz = 0;
90 int i;
91 Dst_REF = NULL;
92 DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
93 D = Dst_REF;
94 D->psize = psz;
95 D->lglabels = NULL;
96 D->lgsize = 0;
97 D->pclabels = NULL;
98 D->pcsize = 0;
99 D->globals = NULL;
100 D->maxsection = maxsection;
101 for (i = 0; i < maxsection; i++) {
102 D->sections[i].buf = NULL; /* Need this for pass3. */
103 D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
104 D->sections[i].bsize = 0;
105 D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
106 }
107}
108
109/* Free DynASM state. */
110void dasm_free(Dst_DECL)
111{
112 dasm_State *D = Dst_REF;
113 int i;
114 for (i = 0; i < D->maxsection; i++)
115 if (D->sections[i].buf)
116 DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
117 if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
118 if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
119 DASM_M_FREE(Dst, D, D->psize);
120}
121
122/* Setup global label array. Must be called before dasm_setup(). */
123void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
124{
125 dasm_State *D = Dst_REF;
126 D->globals = gl - 10; /* Negative bias to compensate for locals. */
127 DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
128}
129
130/* Grow PC label array. Can be called after dasm_setup(), too. */
131void dasm_growpc(Dst_DECL, unsigned int maxpc)
132{
133 dasm_State *D = Dst_REF;
134 size_t osz = D->pcsize;
135 DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
136 memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz);
137}
138
139/* Setup encoder. */
140void dasm_setup(Dst_DECL, const void *actionlist)
141{
142 dasm_State *D = Dst_REF;
143 int i;
144 D->actionlist = (dasm_ActList)actionlist;
145 D->status = DASM_S_OK;
146 D->section = &D->sections[0];
147 memset((void *)D->lglabels, 0, D->lgsize);
148 if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
149 for (i = 0; i < D->maxsection; i++) {
150 D->sections[i].pos = DASM_SEC2POS(i);
151 D->sections[i].ofs = 0;
152 }
153}
154
155
156#ifdef DASM_CHECKS
157#define CK(x, st) \
158 do { if (!(x)) { \
159 D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0)
160#define CKPL(kind, st) \
161 do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
162 D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0)
163#else
164#define CK(x, st) ((void)0)
165#define CKPL(kind, st) ((void)0)
166#endif
167
168static int dasm_imm12(unsigned int n)
169{
170 if ((n >> 12) == 0)
171 return n;
172 else if ((n & 0xff000fff) == 0)
173 return (n >> 12) | 0x1000;
174 else
175 return -1;
176}
177
178static int dasm_ffs(unsigned long long x)
179{
180 int n = -1;
181 while (x) { x >>= 1; n++; }
182 return n;
183}
184
185static int dasm_imm13(int lo, int hi)
186{
187 int inv = 0, w = 64, s = 0xfff, xa, xb;
188 unsigned long long n = (((unsigned long long)hi) << 32) | (unsigned int)lo;
189 unsigned long long m = 1ULL, a, b, c;
190 if (n & 1) { n = ~n; inv = 1; }
191 a = n & -n; b = (n+a)&-(n+a); c = (n+a-b)&-(n+a-b);
192 xa = dasm_ffs(a); xb = dasm_ffs(b);
193 if (c) {
194 w = dasm_ffs(c) - xa;
195 if (w == 32) m = 0x0000000100000001UL;
196 else if (w == 16) m = 0x0001000100010001UL;
197 else if (w == 8) m = 0x0101010101010101UL;
198 else if (w == 4) m = 0x1111111111111111UL;
199 else if (w == 2) m = 0x5555555555555555UL;
200 else return -1;
201 s = (-2*w & 0x3f) - 1;
202 } else if (!a) {
203 return -1;
204 } else if (xb == -1) {
205 xb = 64;
206 }
207 if ((b-a) * m != n) return -1;
208 if (inv) {
209 return ((w - xb) << 6) | (s+w+xa-xb);
210 } else {
211 return ((w - xa) << 6) | (s+xb-xa);
212 }
213 return -1;
214}
215
216/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
217void dasm_put(Dst_DECL, int start, ...)
218{
219 va_list ap;
220 dasm_State *D = Dst_REF;
221 dasm_ActList p = D->actionlist + start;
222 dasm_Section *sec = D->section;
223 int pos = sec->pos, ofs = sec->ofs;
224 int *b;
225
226 if (pos >= sec->epos) {
227 DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
228 sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
229 sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
230 sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos);
231 }
232
233 b = sec->rbuf;
234 b[pos++] = start;
235
236 va_start(ap, start);
237 while (1) {
238 unsigned int ins = *p++;
239 unsigned int action = (ins >> 16);
240 if (action >= DASM__MAX) {
241 ofs += 4;
242 } else {
243 int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
244 switch (action) {
245 case DASM_STOP: goto stop;
246 case DASM_SECTION:
247 n = (ins & 255); CK(n < D->maxsection, RANGE_SEC);
248 D->section = &D->sections[n]; goto stop;
249 case DASM_ESC: p++; ofs += 4; break;
250 case DASM_REL_EXT: break;
251 case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
252 case DASM_REL_LG:
253 n = (ins & 2047) - 10; pl = D->lglabels + n;
254 /* Bkwd rel or global. */
255 if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; }
256 pl += 10; n = *pl;
257 if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */
258 goto linkrel;
259 case DASM_REL_PC:
260 pl = D->pclabels + n; CKPL(pc, PC);
261 putrel:
262 n = *pl;
263 if (n < 0) { /* Label exists. Get label pos and store it. */
264 b[pos] = -n;
265 } else {
266 linkrel:
267 b[pos] = n; /* Else link to rel chain, anchored at label. */
268 *pl = pos;
269 }
270 pos++;
271 break;
272 case DASM_LABEL_LG:
273 pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel;
274 case DASM_LABEL_PC:
275 pl = D->pclabels + n; CKPL(pc, PC);
276 putlabel:
277 n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
278 while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos;
279 }
280 *pl = -pos; /* Label exists now. */
281 b[pos++] = ofs; /* Store pass1 offset estimate. */
282 break;
283 case DASM_IMM:
284 CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
285 n >>= ((ins>>10)&31);
286#ifdef DASM_CHECKS
287 if ((ins & 0x8000))
288 CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I);
289 else
290 CK((n>>((ins>>5)&31)) == 0, RANGE_I);
291#endif
292 b[pos++] = n;
293 break;
294 case DASM_IMM6:
295 CK((n >> 6) == 0, RANGE_I);
296 b[pos++] = n;
297 break;
298 case DASM_IMM12:
299 CK(dasm_imm12((unsigned int)n) != -1, RANGE_I);
300 b[pos++] = n;
301 break;
302 case DASM_IMM13W:
303 CK(dasm_imm13(n, n) != -1, RANGE_I);
304 b[pos++] = n;
305 break;
306 case DASM_IMM13X: {
307 int m = va_arg(ap, int);
308 CK(dasm_imm13(n, m) != -1, RANGE_I);
309 b[pos++] = n;
310 b[pos++] = m;
311 break;
312 }
313 case DASM_IMML: {
314#ifdef DASM_CHECKS
315 int scale = (p[-2] >> 30);
316 CK((!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale) < 4096) ||
317 (unsigned int)(n+256) < 512, RANGE_I);
318#endif
319 b[pos++] = n;
320 break;
321 }
322 }
323 }
324 }
325stop:
326 va_end(ap);
327 sec->pos = pos;
328 sec->ofs = ofs;
329}
330#undef CK
331
332/* Pass 2: Link sections, shrink aligns, fix label offsets. */
333int dasm_link(Dst_DECL, size_t *szp)
334{
335 dasm_State *D = Dst_REF;
336 int secnum;
337 int ofs = 0;
338
339#ifdef DASM_CHECKS
340 *szp = 0;
341 if (D->status != DASM_S_OK) return D->status;
342 {
343 int pc;
344 for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
345 if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc;
346 }
347#endif
348
349 { /* Handle globals not defined in this translation unit. */
350 int idx;
351 for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) {
352 int n = D->lglabels[idx];
353 /* Undefined label: Collapse rel chain and replace with marker (< 0). */
354 while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
355 }
356 }
357
358 /* Combine all code sections. No support for data sections (yet). */
359 for (secnum = 0; secnum < D->maxsection; secnum++) {
360 dasm_Section *sec = D->sections + secnum;
361 int *b = sec->rbuf;
362 int pos = DASM_SEC2POS(secnum);
363 int lastpos = sec->pos;
364
365 while (pos != lastpos) {
366 dasm_ActList p = D->actionlist + b[pos++];
367 while (1) {
368 unsigned int ins = *p++;
369 unsigned int action = (ins >> 16);
370 switch (action) {
371 case DASM_STOP: case DASM_SECTION: goto stop;
372 case DASM_ESC: p++; break;
373 case DASM_REL_EXT: break;
374 case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
375 case DASM_REL_LG: case DASM_REL_PC: pos++; break;
376 case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
377 case DASM_IMM: case DASM_IMM6: case DASM_IMM12: case DASM_IMM13W:
378 case DASM_IMML: pos++; break;
379 case DASM_IMM13X: pos += 2; break;
380 }
381 }
382 stop: (void)0;
383 }
384 ofs += sec->ofs; /* Next section starts right after current section. */
385 }
386
387 D->codesize = ofs; /* Total size of all code sections */
388 *szp = ofs;
389 return DASM_S_OK;
390}
391
392#ifdef DASM_CHECKS
393#define CK(x, st) \
394 do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0)
395#else
396#define CK(x, st) ((void)0)
397#endif
398
399/* Pass 3: Encode sections. */
400int dasm_encode(Dst_DECL, void *buffer)
401{
402 dasm_State *D = Dst_REF;
403 char *base = (char *)buffer;
404 unsigned int *cp = (unsigned int *)buffer;
405 int secnum;
406
407 /* Encode all code sections. No support for data sections (yet). */
408 for (secnum = 0; secnum < D->maxsection; secnum++) {
409 dasm_Section *sec = D->sections + secnum;
410 int *b = sec->buf;
411 int *endb = sec->rbuf + sec->pos;
412
413 while (b != endb) {
414 dasm_ActList p = D->actionlist + *b++;
415 while (1) {
416 unsigned int ins = *p++;
417 unsigned int action = (ins >> 16);
418 int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
419 switch (action) {
420 case DASM_STOP: case DASM_SECTION: goto stop;
421 case DASM_ESC: *cp++ = *p++; break;
422 case DASM_REL_EXT:
423 n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins&2047), !(ins&2048));
424 goto patchrel;
425 case DASM_ALIGN:
426 ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xe1a00000;
427 break;
428 case DASM_REL_LG:
429 CK(n >= 0, UNDEF_LG);
430 /* fallthrough */
431 case DASM_REL_PC:
432 CK(n >= 0, UNDEF_PC);
433 n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) + 4;
434 patchrel:
435 if (!(ins & 0xf800)) { /* B, BL */
436 CK((n & 3) == 0 && ((n+0x08000000) >> 28) == 0, RANGE_REL);
437 cp[-1] |= ((n >> 2) & 0x03ffffff);
438 } else if ((ins & 0x800)) { /* B.cond, CBZ, CBNZ, LDR* literal */
439 CK((n & 3) == 0 && ((n+0x00100000) >> 21) == 0, RANGE_REL);
440 cp[-1] |= ((n << 3) & 0x00ffffe0);
441 } else if ((ins & 0x3000) == 0x2000) { /* ADR */
442 CK(((n+0x00100000) >> 21) == 0, RANGE_REL);
443 cp[-1] |= ((n << 3) & 0x00ffffe0) | ((n & 3) << 29);
444 } else if ((ins & 0x3000) == 0x3000) { /* ADRP */
445 cp[-1] |= ((n >> 9) & 0x00ffffe0) | (((n >> 12) & 3) << 29);
446 } else if ((ins & 0x1000)) { /* TBZ, TBNZ */
447 CK((n & 3) == 0 && ((n+0x00008000) >> 16) == 0, RANGE_REL);
448 cp[-1] |= ((n << 3) & 0x0007ffe0);
449 }
450 break;
451 case DASM_LABEL_LG:
452 ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
453 break;
454 case DASM_LABEL_PC: break;
455 case DASM_IMM:
456 cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
457 break;
458 case DASM_IMM6:
459 cp[-1] |= ((n&31) << 19) | ((n&32) << 26);
460 break;
461 case DASM_IMM12:
462 cp[-1] |= (dasm_imm12((unsigned int)n) << 10);
463 break;
464 case DASM_IMM13W:
465 cp[-1] |= (dasm_imm13(n, n) << 10);
466 break;
467 case DASM_IMM13X:
468 cp[-1] |= (dasm_imm13(n, *b++) << 10);
469 break;
470 case DASM_IMML: {
471 int scale = (p[-2] >> 30);
472 cp[-1] |= (!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale) < 4096) ?
473 ((n << (10-scale)) | 0x01000000) : ((n & 511) << 12);
474 break;
475 }
476 default: *cp++ = ins; break;
477 }
478 }
479 stop: (void)0;
480 }
481 }
482
483 if (base + D->codesize != (char *)cp) /* Check for phase errors. */
484 return DASM_S_PHASE;
485 return DASM_S_OK;
486}
487#undef CK
488
489/* Get PC label offset. */
490int dasm_getpclabel(Dst_DECL, unsigned int pc)
491{
492 dasm_State *D = Dst_REF;
493 if (pc*sizeof(int) < D->pcsize) {
494 int pos = D->pclabels[pc];
495 if (pos < 0) return *DASM_POS2PTR(D, -pos);
496 if (pos > 0) return -1; /* Undefined. */
497 }
498 return -2; /* Unused or out of range. */
499}
500
501#ifdef DASM_CHECKS
502/* Optional sanity checker to call between isolated encoding steps. */
503int dasm_checkstep(Dst_DECL, int secmatch)
504{
505 dasm_State *D = Dst_REF;
506 if (D->status == DASM_S_OK) {
507 int i;
508 for (i = 1; i <= 9; i++) {
509 if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; }
510 D->lglabels[i] = 0;
511 }
512 }
513 if (D->status == DASM_S_OK && secmatch >= 0 &&
514 D->section != &D->sections[secmatch])
515 D->status = DASM_S_MATCH_SEC|(D->section-D->sections);
516 return D->status;
517}
518#endif
519
diff --git a/dynasm/dasm_arm64.lua b/dynasm/dasm_arm64.lua
new file mode 100644
index 00000000..d5640842
--- /dev/null
+++ b/dynasm/dasm_arm64.lua
@@ -0,0 +1,1166 @@
1------------------------------------------------------------------------------
2-- DynASM ARM64 module.
3--
4-- Copyright (C) 2005-2020 Mike Pall. All rights reserved.
5-- See dynasm.lua for full copyright notice.
6------------------------------------------------------------------------------
7
8-- Module information:
9local _info = {
10 arch = "arm",
11 description = "DynASM ARM64 module",
12 version = "1.4.0",
13 vernum = 10400,
14 release = "2015-10-18",
15 author = "Mike Pall",
16 license = "MIT",
17}
18
19-- Exported glue functions for the arch-specific module.
20local _M = { _info = _info }
21
22-- Cache library functions.
23local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
24local assert, setmetatable, rawget = assert, setmetatable, rawget
25local _s = string
26local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
27local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub
28local concat, sort, insert = table.concat, table.sort, table.insert
29local bit = bit or require("bit")
30local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift
31local ror, tohex = bit.ror, bit.tohex
32
33-- Inherited tables and callbacks.
34local g_opt, g_arch
35local wline, werror, wfatal, wwarn
36
37-- Action name list.
38-- CHECK: Keep this in sync with the C code!
39local action_names = {
40 "STOP", "SECTION", "ESC", "REL_EXT",
41 "ALIGN", "REL_LG", "LABEL_LG",
42 "REL_PC", "LABEL_PC", "IMM", "IMM6", "IMM12", "IMM13W", "IMM13X", "IMML",
43}
44
45-- Maximum number of section buffer positions for dasm_put().
46-- CHECK: Keep this in sync with the C code!
47local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
48
49-- Action name -> action number.
50local map_action = {}
51for n,name in ipairs(action_names) do
52 map_action[name] = n-1
53end
54
55-- Action list buffer.
56local actlist = {}
57
58-- Argument list for next dasm_put(). Start with offset 0 into action list.
59local actargs = { 0 }
60
61-- Current number of section buffer positions for dasm_put().
62local secpos = 1
63
64------------------------------------------------------------------------------
65
66-- Dump action names and numbers.
67local function dumpactions(out)
68 out:write("DynASM encoding engine action codes:\n")
69 for n,name in ipairs(action_names) do
70 local num = map_action[name]
71 out:write(format(" %-10s %02X %d\n", name, num, num))
72 end
73 out:write("\n")
74end
75
76-- Write action list buffer as a huge static C array.
77local function writeactions(out, name)
78 local nn = #actlist
79 if nn == 0 then nn = 1; actlist[0] = map_action.STOP end
80 out:write("static const unsigned int ", name, "[", nn, "] = {\n")
81 for i = 1,nn-1 do
82 assert(out:write("0x", tohex(actlist[i]), ",\n"))
83 end
84 assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n"))
85end
86
87------------------------------------------------------------------------------
88
89-- Add word to action list.
90local function wputxw(n)
91 assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
92 actlist[#actlist+1] = n
93end
94
95-- Add action to list with optional arg. Advance buffer pos, too.
96local function waction(action, val, a, num)
97 local w = assert(map_action[action], "bad action name `"..action.."'")
98 wputxw(w * 0x10000 + (val or 0))
99 if a then actargs[#actargs+1] = a end
100 if a or num then secpos = secpos + (num or 1) end
101end
102
103-- Flush action list (intervening C code or buffer pos overflow).
104local function wflush(term)
105 if #actlist == actargs[1] then return end -- Nothing to flush.
106 if not term then waction("STOP") end -- Terminate action list.
107 wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true)
108 actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
109 secpos = 1 -- The actionlist offset occupies a buffer position, too.
110end
111
112-- Put escaped word.
113local function wputw(n)
114 if n <= 0x000fffff then waction("ESC") end
115 wputxw(n)
116end
117
118-- Reserve position for word.
119local function wpos()
120 local pos = #actlist+1
121 actlist[pos] = ""
122 return pos
123end
124
125-- Store word to reserved position.
126local function wputpos(pos, n)
127 assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
128 if n <= 0x000fffff then
129 insert(actlist, pos+1, n)
130 n = map_action.ESC * 0x10000
131 end
132 actlist[pos] = n
133end
134
135------------------------------------------------------------------------------
136
137-- Global label name -> global label number. With auto assignment on 1st use.
138local next_global = 20
139local map_global = setmetatable({}, { __index = function(t, name)
140 if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end
141 local n = next_global
142 if n > 2047 then werror("too many global labels") end
143 next_global = n + 1
144 t[name] = n
145 return n
146end})
147
148-- Dump global labels.
149local function dumpglobals(out, lvl)
150 local t = {}
151 for name, n in pairs(map_global) do t[n] = name end
152 out:write("Global labels:\n")
153 for i=20,next_global-1 do
154 out:write(format(" %s\n", t[i]))
155 end
156 out:write("\n")
157end
158
159-- Write global label enum.
160local function writeglobals(out, prefix)
161 local t = {}
162 for name, n in pairs(map_global) do t[n] = name end
163 out:write("enum {\n")
164 for i=20,next_global-1 do
165 out:write(" ", prefix, t[i], ",\n")
166 end
167 out:write(" ", prefix, "_MAX\n};\n")
168end
169
170-- Write global label names.
171local function writeglobalnames(out, name)
172 local t = {}
173 for name, n in pairs(map_global) do t[n] = name end
174 out:write("static const char *const ", name, "[] = {\n")
175 for i=20,next_global-1 do
176 out:write(" \"", t[i], "\",\n")
177 end
178 out:write(" (const char *)0\n};\n")
179end
180
181------------------------------------------------------------------------------
182
183-- Extern label name -> extern label number. With auto assignment on 1st use.
184local next_extern = 0
185local map_extern_ = {}
186local map_extern = setmetatable({}, { __index = function(t, name)
187 -- No restrictions on the name for now.
188 local n = next_extern
189 if n > 2047 then werror("too many extern labels") end
190 next_extern = n + 1
191 t[name] = n
192 map_extern_[n] = name
193 return n
194end})
195
196-- Dump extern labels.
197local function dumpexterns(out, lvl)
198 out:write("Extern labels:\n")
199 for i=0,next_extern-1 do
200 out:write(format(" %s\n", map_extern_[i]))
201 end
202 out:write("\n")
203end
204
205-- Write extern label names.
206local function writeexternnames(out, name)
207 out:write("static const char *const ", name, "[] = {\n")
208 for i=0,next_extern-1 do
209 out:write(" \"", map_extern_[i], "\",\n")
210 end
211 out:write(" (const char *)0\n};\n")
212end
213
214------------------------------------------------------------------------------
215
216-- Arch-specific maps.
217
218-- Ext. register name -> int. name.
219local map_archdef = { xzr = "@x31", wzr = "@w31", lr = "x30", }
220
221-- Int. register name -> ext. name.
222local map_reg_rev = { ["@x31"] = "xzr", ["@w31"] = "wzr", x30 = "lr", }
223
224local map_type = {} -- Type name -> { ctype, reg }
225local ctypenum = 0 -- Type number (for Dt... macros).
226
227-- Reverse defines for registers.
228function _M.revdef(s)
229 return map_reg_rev[s] or s
230end
231
232local map_shift = { lsl = 0, lsr = 1, asr = 2, }
233
234local map_extend = {
235 uxtb = 0, uxth = 1, uxtw = 2, uxtx = 3,
236 sxtb = 4, sxth = 5, sxtw = 6, sxtx = 7,
237}
238
239local map_cond = {
240 eq = 0, ne = 1, cs = 2, cc = 3, mi = 4, pl = 5, vs = 6, vc = 7,
241 hi = 8, ls = 9, ge = 10, lt = 11, gt = 12, le = 13, al = 14,
242 hs = 2, lo = 3,
243}
244
245------------------------------------------------------------------------------
246
247local parse_reg_type
248
249local function parse_reg(expr)
250 if not expr then werror("expected register name") end
251 local tname, ovreg = match(expr, "^([%w_]+):(@?%l%d+)$")
252 local tp = map_type[tname or expr]
253 if tp then
254 local reg = ovreg or tp.reg
255 if not reg then
256 werror("type `"..(tname or expr).."' needs a register override")
257 end
258 expr = reg
259 end
260 local ok31, rt, r = match(expr, "^(@?)([xwqdshb])([123]?[0-9])$")
261 if r then
262 r = tonumber(r)
263 if r <= 30 or (r == 31 and ok31 ~= "" or (rt ~= "w" and rt ~= "x")) then
264 if not parse_reg_type then
265 parse_reg_type = rt
266 elseif parse_reg_type ~= rt then
267 werror("register size mismatch")
268 end
269 return r, tp
270 end
271 end
272 werror("bad register name `"..expr.."'")
273end
274
275local function parse_reg_base(expr)
276 if expr == "sp" then return 0x3e0 end
277 local base, tp = parse_reg(expr)
278 if parse_reg_type ~= "x" then werror("bad register type") end
279 parse_reg_type = false
280 return shl(base, 5), tp
281end
282
283local parse_ctx = {}
284
285local loadenv = setfenv and function(s)
286 local code = loadstring(s, "")
287 if code then setfenv(code, parse_ctx) end
288 return code
289end or function(s)
290 return load(s, "", nil, parse_ctx)
291end
292
293-- Try to parse simple arithmetic, too, since some basic ops are aliases.
294local function parse_number(n)
295 local x = tonumber(n)
296 if x then return x end
297 local code = loadenv("return "..n)
298 if code then
299 local ok, y = pcall(code)
300 if ok then return y end
301 end
302 return nil
303end
304
305local function parse_imm(imm, bits, shift, scale, signed)
306 imm = match(imm, "^#(.*)$")
307 if not imm then werror("expected immediate operand") end
308 local n = parse_number(imm)
309 if n then
310 local m = sar(n, scale)
311 if shl(m, scale) == n then
312 if signed then
313 local s = sar(m, bits-1)
314 if s == 0 then return shl(m, shift)
315 elseif s == -1 then return shl(m + shl(1, bits), shift) end
316 else
317 if sar(m, bits) == 0 then return shl(m, shift) end
318 end
319 end
320 werror("out of range immediate `"..imm.."'")
321 else
322 waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm)
323 return 0
324 end
325end
326
327local function parse_imm12(imm)
328 imm = match(imm, "^#(.*)$")
329 if not imm then werror("expected immediate operand") end
330 local n = parse_number(imm)
331 if n then
332 if shr(n, 12) == 0 then
333 return shl(n, 10)
334 elseif band(n, 0xff000fff) == 0 then
335 return shr(n, 2) + 0x00400000
336 end
337 werror("out of range immediate `"..imm.."'")
338 else
339 waction("IMM12", 0, imm)
340 return 0
341 end
342end
343
344local function parse_imm13(imm)
345 imm = match(imm, "^#(.*)$")
346 if not imm then werror("expected immediate operand") end
347 local n = parse_number(imm)
348 local r64 = parse_reg_type == "x"
349 if n and n % 1 == 0 and n >= 0 and n <= 0xffffffff then
350 local inv = false
351 if band(n, 1) == 1 then n = bit.bnot(n); inv = true end
352 local t = {}
353 for i=1,32 do t[i] = band(n, 1); n = shr(n, 1) end
354 local b = table.concat(t)
355 b = b..(r64 and (inv and "1" or "0"):rep(32) or b)
356 local p0, p1, p0a, p1a = b:match("^(0+)(1+)(0*)(1*)")
357 if p0 then
358 local w = p1a == "" and (r64 and 64 or 32) or #p1+#p0a
359 if band(w, w-1) == 0 and b == b:sub(1, w):rep(64/w) then
360 local s = band(-2*w, 0x3f) - 1
361 if w == 64 then s = s + 0x1000 end
362 if inv then
363 return shl(w-#p1-#p0, 16) + shl(s+w-#p1, 10)
364 else
365 return shl(w-#p0, 16) + shl(s+#p1, 10)
366 end
367 end
368 end
369 werror("out of range immediate `"..imm.."'")
370 elseif r64 then
371 waction("IMM13X", 0, format("(unsigned int)(%s)", imm))
372 actargs[#actargs+1] = format("(unsigned int)((unsigned long long)(%s)>>32)", imm)
373 return 0
374 else
375 waction("IMM13W", 0, imm)
376 return 0
377 end
378end
379
380local function parse_imm6(imm)
381 imm = match(imm, "^#(.*)$")
382 if not imm then werror("expected immediate operand") end
383 local n = parse_number(imm)
384 if n then
385 if n >= 0 and n <= 63 then
386 return shl(band(n, 0x1f), 19) + (n >= 32 and 0x80000000 or 0)
387 end
388 werror("out of range immediate `"..imm.."'")
389 else
390 waction("IMM6", 0, imm)
391 return 0
392 end
393end
394
395local function parse_imm_load(imm, scale)
396 local n = parse_number(imm)
397 if n then
398 local m = sar(n, scale)
399 if shl(m, scale) == n and m >= 0 and m < 0x1000 then
400 return shl(m, 10) + 0x01000000 -- Scaled, unsigned 12 bit offset.
401 elseif n >= -256 and n < 256 then
402 return shl(band(n, 511), 12) -- Unscaled, signed 9 bit offset.
403 end
404 werror("out of range immediate `"..imm.."'")
405 else
406 waction("IMML", 0, imm)
407 return 0
408 end
409end
410
411local function parse_fpimm(imm)
412 imm = match(imm, "^#(.*)$")
413 if not imm then werror("expected immediate operand") end
414 local n = parse_number(imm)
415 if n then
416 local m, e = math.frexp(n)
417 local s, e2 = 0, band(e-2, 7)
418 if m < 0 then m = -m; s = 0x00100000 end
419 m = m*32-16
420 if m % 1 == 0 and m >= 0 and m <= 15 and sar(shl(e2, 29), 29)+2 == e then
421 return s + shl(e2, 17) + shl(m, 13)
422 end
423 werror("out of range immediate `"..imm.."'")
424 else
425 werror("NYI fpimm action")
426 end
427end
428
429local function parse_shift(expr)
430 local s, s2 = match(expr, "^(%S+)%s*(.*)$")
431 s = map_shift[s]
432 if not s then werror("expected shift operand") end
433 return parse_imm(s2, 6, 10, 0, false) + shl(s, 22)
434end
435
436local function parse_lslx16(expr)
437 local n = match(expr, "^lsl%s*#(%d+)$")
438 n = tonumber(n)
439 if not n then werror("expected shift operand") end
440 if band(n, parse_reg_type == "x" and 0xffffffcf or 0xffffffef) ~= 0 then
441 werror("bad shift amount")
442 end
443 return shl(n, 17)
444end
445
446local function parse_extend(expr)
447 local s, s2 = match(expr, "^(%S+)%s*(.*)$")
448 if s == "lsl" then
449 s = parse_reg_type == "x" and 3 or 2
450 else
451 s = map_extend[s]
452 end
453 if not s then werror("expected extend operand") end
454 return (s2 == "" and 0 or parse_imm(s2, 3, 10, 0, false)) + shl(s, 13)
455end
456
457local function parse_cond(expr, inv)
458 local c = map_cond[expr]
459 if not c then werror("expected condition operand") end
460 return shl(bit.bxor(c, inv), 12)
461end
462
463local function parse_load(params, nparams, n, op)
464 if params[n+2] then werror("too many operands") end
465 local pn, p2 = params[n], params[n+1]
466 local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
467 if not p1 then
468 if not p2 then
469 local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$")
470 if reg and tailr ~= "" then
471 local base, tp = parse_reg_base(reg)
472 if tp then
473 waction("IMML", 0, format(tp.ctypefmt, tailr))
474 return op + base
475 end
476 end
477 end
478 werror("expected address operand")
479 end
480 local scale = shr(op, 30)
481 if p2 then
482 if wb == "!" then werror("bad use of '!'") end
483 op = op + parse_reg_base(p1) + parse_imm(p2, 9, 12, 0, true) + 0x400
484 elseif wb == "!" then
485 local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$")
486 if not p1a then werror("bad use of '!'") end
487 op = op + parse_reg_base(p1a) + parse_imm(p2a, 9, 12, 0, true) + 0xc00
488 else
489 local p1a, p2a = match(p1, "^([^,%s]*)%s*(.*)$")
490 op = op + parse_reg_base(p1a)
491 if p2a ~= "" then
492 local imm = match(p2a, "^,%s*#(.*)$")
493 if imm then
494 op = op + parse_imm_load(imm, scale)
495 else
496 local p2b, p3b, p3s = match(p2a, "^,%s*([^,%s]*)%s*,?%s*(%S*)%s*(.*)$")
497 op = op + shl(parse_reg(p2b), 16) + 0x00200800
498 if parse_reg_type ~= "x" and parse_reg_type ~= "w" then
499 werror("bad index register type")
500 end
501 if p3b == "" then
502 if parse_reg_type ~= "x" then werror("bad index register type") end
503 op = op + 0x6000
504 else
505 if p3s == "" or p3s == "#0" then
506 elseif p3s == "#"..scale then
507 op = op + 0x1000
508 else
509 werror("bad scale")
510 end
511 if parse_reg_type == "x" then
512 if p3b == "lsl" and p3s ~= "" then op = op + 0x6000
513 elseif p3b == "sxtx" then op = op + 0xe000
514 else
515 werror("bad extend/shift specifier")
516 end
517 else
518 if p3b == "uxtw" then op = op + 0x4000
519 elseif p3b == "sxtw" then op = op + 0xc000
520 else
521 werror("bad extend/shift specifier")
522 end
523 end
524 end
525 end
526 else
527 if wb == "!" then werror("bad use of '!'") end
528 op = op + 0x01000000
529 end
530 end
531 return op
532end
533
534local function parse_load_pair(params, nparams, n, op)
535 if params[n+2] then werror("too many operands") end
536 local pn, p2 = params[n], params[n+1]
537 local scale = shr(op, 30) == 0 and 2 or 3
538 local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
539 if not p1 then
540 if not p2 then
541 local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$")
542 if reg and tailr ~= "" then
543 local base, tp = parse_reg_base(reg)
544 if tp then
545 waction("IMM", 32768+7*32+15+scale*1024, format(tp.ctypefmt, tailr))
546 return op + base + 0x01000000
547 end
548 end
549 end
550 werror("expected address operand")
551 end
552 if p2 then
553 if wb == "!" then werror("bad use of '!'") end
554 op = op + 0x00800000
555 else
556 local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$")
557 if p1a then p1, p2 = p1a, p2a else p2 = "#0" end
558 op = op + (wb == "!" and 0x01800000 or 0x01000000)
559 end
560 return op + parse_reg_base(p1) + parse_imm(p2, 7, 15, scale, true)
561end
562
563local function parse_label(label, def)
564 local prefix = sub(label, 1, 2)
565 -- =>label (pc label reference)
566 if prefix == "=>" then
567 return "PC", 0, sub(label, 3)
568 end
569 -- ->name (global label reference)
570 if prefix == "->" then
571 return "LG", map_global[sub(label, 3)]
572 end
573 if def then
574 -- [1-9] (local label definition)
575 if match(label, "^[1-9]$") then
576 return "LG", 10+tonumber(label)
577 end
578 else
579 -- [<>][1-9] (local label reference)
580 local dir, lnum = match(label, "^([<>])([1-9])$")
581 if dir then -- Fwd: 1-9, Bkwd: 11-19.
582 return "LG", lnum + (dir == ">" and 0 or 10)
583 end
584 -- extern label (extern label reference)
585 local extname = match(label, "^extern%s+(%S+)$")
586 if extname then
587 return "EXT", map_extern[extname]
588 end
589 end
590 werror("bad label `"..label.."'")
591end
592
593local function branch_type(op)
594 if band(op, 0x7c000000) == 0x14000000 then return 0 -- B, BL
595 elseif shr(op, 24) == 0x54 or band(op, 0x7e000000) == 0x34000000 or
596 band(op, 0x3b000000) == 0x18000000 then
597 return 0x800 -- B.cond, CBZ, CBNZ, LDR* literal
598 elseif band(op, 0x7e000000) == 0x36000000 then return 0x1000 -- TBZ, TBNZ
599 elseif band(op, 0x9f000000) == 0x10000000 then return 0x2000 -- ADR
600 elseif band(op, 0x9f000000) == band(0x90000000) then return 0x3000 -- ADRP
601 else
602 assert(false, "unknown branch type")
603 end
604end
605
606------------------------------------------------------------------------------
607
608local map_op, op_template
609
610local function op_alias(opname, f)
611 return function(params, nparams)
612 if not params then return "-> "..opname:sub(1, -3) end
613 f(params, nparams)
614 op_template(params, map_op[opname], nparams)
615 end
616end
617
618local function alias_bfx(p)
619 p[4] = "#("..p[3]:sub(2)..")+("..p[4]:sub(2)..")-1"
620end
621
622local function alias_bfiz(p)
623 parse_reg(p[1])
624 if parse_reg_type == "w" then
625 p[3] = "#-("..p[3]:sub(2)..")%32"
626 p[4] = "#("..p[4]:sub(2)..")-1"
627 else
628 p[3] = "#-("..p[3]:sub(2)..")%64"
629 p[4] = "#("..p[4]:sub(2)..")-1"
630 end
631end
632
633local alias_lslimm = op_alias("ubfm_4", function(p)
634 parse_reg(p[1])
635 local sh = p[3]:sub(2)
636 if parse_reg_type == "w" then
637 p[3] = "#-("..sh..")%32"
638 p[4] = "#31-("..sh..")"
639 else
640 p[3] = "#-("..sh..")%64"
641 p[4] = "#63-("..sh..")"
642 end
643end)
644
645-- Template strings for ARM instructions.
646map_op = {
647 -- Basic data processing instructions.
648 add_3 = "0b000000DNMg|11000000pDpNIg|8b206000pDpNMx",
649 add_4 = "0b000000DNMSg|0b200000DNMXg|8b200000pDpNMXx|8b200000pDpNxMwX",
650 adds_3 = "2b000000DNMg|31000000DpNIg|ab206000DpNMx",
651 adds_4 = "2b000000DNMSg|2b200000DNMXg|ab200000DpNMXx|ab200000DpNxMwX",
652 cmn_2 = "2b00001fNMg|3100001fpNIg|ab20601fpNMx",
653 cmn_3 = "2b00001fNMSg|2b20001fNMXg|ab20001fpNMXx|ab20001fpNxMwX",
654
655 sub_3 = "4b000000DNMg|51000000pDpNIg|cb206000pDpNMx",
656 sub_4 = "4b000000DNMSg|4b200000DNMXg|cb200000pDpNMXx|cb200000pDpNxMwX",
657 subs_3 = "6b000000DNMg|71000000DpNIg|eb206000DpNMx",
658 subs_4 = "6b000000DNMSg|6b200000DNMXg|eb200000DpNMXx|eb200000DpNxMwX",
659 cmp_2 = "6b00001fNMg|7100001fpNIg|eb20601fpNMx",
660 cmp_3 = "6b00001fNMSg|6b20001fNMXg|eb20001fpNMXx|eb20001fpNxMwX",
661
662 neg_2 = "4b0003e0DMg",
663 neg_3 = "4b0003e0DMSg",
664 negs_2 = "6b0003e0DMg",
665 negs_3 = "6b0003e0DMSg",
666
667 adc_3 = "1a000000DNMg",
668 adcs_3 = "3a000000DNMg",
669 sbc_3 = "5a000000DNMg",
670 sbcs_3 = "7a000000DNMg",
671 ngc_2 = "5a0003e0DMg",
672 ngcs_2 = "7a0003e0DMg",
673
674 and_3 = "0a000000DNMg|12000000pDNig",
675 and_4 = "0a000000DNMSg",
676 orr_3 = "2a000000DNMg|32000000pDNig",
677 orr_4 = "2a000000DNMSg",
678 eor_3 = "4a000000DNMg|52000000pDNig",
679 eor_4 = "4a000000DNMSg",
680 ands_3 = "6a000000DNMg|72000000DNig",
681 ands_4 = "6a000000DNMSg",
682 tst_2 = "6a00001fNMg|7200001fNig",
683 tst_3 = "6a00001fNMSg",
684
685 bic_3 = "0a200000DNMg",
686 bic_4 = "0a200000DNMSg",
687 orn_3 = "2a200000DNMg",
688 orn_4 = "2a200000DNMSg",
689 eon_3 = "4a200000DNMg",
690 eon_4 = "4a200000DNMSg",
691 bics_3 = "6a200000DNMg",
692 bics_4 = "6a200000DNMSg",
693
694 movn_2 = "12800000DWg",
695 movn_3 = "12800000DWRg",
696 movz_2 = "52800000DWg",
697 movz_3 = "52800000DWRg",
698 movk_2 = "72800000DWg",
699 movk_3 = "72800000DWRg",
700
701 -- TODO: this doesn't cover all valid immediates for mov reg, #imm.
702 mov_2 = "2a0003e0DMg|52800000DW|320003e0pDig|11000000pDpNg",
703 mov_3 = "2a0003e0DMSg",
704 mvn_2 = "2a2003e0DMg",
705 mvn_3 = "2a2003e0DMSg",
706
707 adr_2 = "10000000DBx",
708 adrp_2 = "90000000DBx",
709
710 csel_4 = "1a800000DNMCg",
711 csinc_4 = "1a800400DNMCg",
712 csinv_4 = "5a800000DNMCg",
713 csneg_4 = "5a800400DNMCg",
714 cset_2 = "1a9f07e0Dcg",
715 csetm_2 = "5a9f03e0Dcg",
716 cinc_3 = "1a800400DNmcg",
717 cinv_3 = "5a800000DNmcg",
718 cneg_3 = "5a800400DNmcg",
719
720 ccmn_4 = "3a400000NMVCg|3a400800N5VCg",
721 ccmp_4 = "7a400000NMVCg|7a400800N5VCg",
722
723 madd_4 = "1b000000DNMAg",
724 msub_4 = "1b008000DNMAg",
725 mul_3 = "1b007c00DNMg",
726 mneg_3 = "1b00fc00DNMg",
727
728 smaddl_4 = "9b200000DxNMwAx",
729 smsubl_4 = "9b208000DxNMwAx",
730 smull_3 = "9b207c00DxNMw",
731 smnegl_3 = "9b20fc00DxNMw",
732 smulh_3 = "9b407c00DNMx",
733 umaddl_4 = "9ba00000DxNMwAx",
734 umsubl_4 = "9ba08000DxNMwAx",
735 umull_3 = "9ba07c00DxNMw",
736 umnegl_3 = "9ba0fc00DxNMw",
737 umulh_3 = "9bc07c00DNMx",
738
739 udiv_3 = "1ac00800DNMg",
740 sdiv_3 = "1ac00c00DNMg",
741
742 -- Bit operations.
743 sbfm_4 = "13000000DN12w|93400000DN12x",
744 bfm_4 = "33000000DN12w|b3400000DN12x",
745 ubfm_4 = "53000000DN12w|d3400000DN12x",
746 extr_4 = "13800000DNM2w|93c00000DNM2x",
747
748 sxtb_2 = "13001c00DNw|93401c00DNx",
749 sxth_2 = "13003c00DNw|93403c00DNx",
750 sxtw_2 = "93407c00DxNw",
751 uxtb_2 = "53001c00DNw",
752 uxth_2 = "53003c00DNw",
753
754 sbfx_4 = op_alias("sbfm_4", alias_bfx),
755 bfxil_4 = op_alias("bfm_4", alias_bfx),
756 ubfx_4 = op_alias("ubfm_4", alias_bfx),
757 sbfiz_4 = op_alias("sbfm_4", alias_bfiz),
758 bfi_4 = op_alias("bfm_4", alias_bfiz),
759 ubfiz_4 = op_alias("ubfm_4", alias_bfiz),
760
761 lsl_3 = function(params, nparams)
762 if params and params[3]:byte() == 35 then
763 return alias_lslimm(params, nparams)
764 else
765 return op_template(params, "1ac02000DNMg", nparams)
766 end
767 end,
768 lsr_3 = "1ac02400DNMg|53007c00DN1w|d340fc00DN1x",
769 asr_3 = "1ac02800DNMg|13007c00DN1w|9340fc00DN1x",
770 ror_3 = "1ac02c00DNMg|13800000DNm2w|93c00000DNm2x",
771
772 clz_2 = "5ac01000DNg",
773 cls_2 = "5ac01400DNg",
774 rbit_2 = "5ac00000DNg",
775 rev_2 = "5ac00800DNw|dac00c00DNx",
776 rev16_2 = "5ac00400DNg",
777 rev32_2 = "dac00800DNx",
778
779 -- Loads and stores.
780 ["strb_*"] = "38000000DwL",
781 ["ldrb_*"] = "38400000DwL",
782 ["ldrsb_*"] = "38c00000DwL|38800000DxL",
783 ["strh_*"] = "78000000DwL",
784 ["ldrh_*"] = "78400000DwL",
785 ["ldrsh_*"] = "78c00000DwL|78800000DxL",
786 ["str_*"] = "b8000000DwL|f8000000DxL|bc000000DsL|fc000000DdL",
787 ["ldr_*"] = "18000000DwB|58000000DxB|1c000000DsB|5c000000DdB|b8400000DwL|f8400000DxL|bc400000DsL|fc400000DdL",
788 ["ldrsw_*"] = "98000000DxB|b8800000DxL",
789 -- NOTE: ldur etc. are handled by ldr et al.
790
791 ["stp_*"] = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP",
792 ["ldp_*"] = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP",
793 ["ldpsw_*"] = "68400000DAxP",
794
795 -- Branches.
796 b_1 = "14000000B",
797 bl_1 = "94000000B",
798 blr_1 = "d63f0000Nx",
799 br_1 = "d61f0000Nx",
800 ret_0 = "d65f03c0",
801 ret_1 = "d65f0000Nx",
802 -- b.cond is added below.
803 cbz_2 = "34000000DBg",
804 cbnz_2 = "35000000DBg",
805 tbz_3 = "36000000DTBw|36000000DTBx",
806 tbnz_3 = "37000000DTBw|37000000DTBx",
807
808 -- Miscellaneous instructions.
809 -- TODO: hlt, hvc, smc, svc, eret, dcps[123], drps, mrs, msr
810 -- TODO: sys, sysl, ic, dc, at, tlbi
811 -- TODO: hint, yield, wfe, wfi, sev, sevl
812 -- TODO: clrex, dsb, dmb, isb
813 nop_0 = "d503201f",
814 brk_0 = "d4200000",
815 brk_1 = "d4200000W",
816
817 -- Floating point instructions.
818 fmov_2 = "1e204000DNf|1e260000DwNs|1e270000DsNw|9e660000DxNd|9e670000DdNx|1e201000DFf",
819 fabs_2 = "1e20c000DNf",
820 fneg_2 = "1e214000DNf",
821 fsqrt_2 = "1e21c000DNf",
822
823 fcvt_2 = "1e22c000DdNs|1e624000DsNd",
824
825 -- TODO: half-precision and fixed-point conversions.
826 fcvtas_2 = "1e240000DwNs|9e240000DxNs|1e640000DwNd|9e640000DxNd",
827 fcvtau_2 = "1e250000DwNs|9e250000DxNs|1e650000DwNd|9e650000DxNd",
828 fcvtms_2 = "1e300000DwNs|9e300000DxNs|1e700000DwNd|9e700000DxNd",
829 fcvtmu_2 = "1e310000DwNs|9e310000DxNs|1e710000DwNd|9e710000DxNd",
830 fcvtns_2 = "1e200000DwNs|9e200000DxNs|1e600000DwNd|9e600000DxNd",
831 fcvtnu_2 = "1e210000DwNs|9e210000DxNs|1e610000DwNd|9e610000DxNd",
832 fcvtps_2 = "1e280000DwNs|9e280000DxNs|1e680000DwNd|9e680000DxNd",
833 fcvtpu_2 = "1e290000DwNs|9e290000DxNs|1e690000DwNd|9e690000DxNd",
834 fcvtzs_2 = "1e380000DwNs|9e380000DxNs|1e780000DwNd|9e780000DxNd",
835 fcvtzu_2 = "1e390000DwNs|9e390000DxNs|1e790000DwNd|9e790000DxNd",
836
837 scvtf_2 = "1e220000DsNw|9e220000DsNx|1e620000DdNw|9e620000DdNx",
838 ucvtf_2 = "1e230000DsNw|9e230000DsNx|1e630000DdNw|9e630000DdNx",
839
840 frintn_2 = "1e244000DNf",
841 frintp_2 = "1e24c000DNf",
842 frintm_2 = "1e254000DNf",
843 frintz_2 = "1e25c000DNf",
844 frinta_2 = "1e264000DNf",
845 frintx_2 = "1e274000DNf",
846 frinti_2 = "1e27c000DNf",
847
848 fadd_3 = "1e202800DNMf",
849 fsub_3 = "1e203800DNMf",
850 fmul_3 = "1e200800DNMf",
851 fnmul_3 = "1e208800DNMf",
852 fdiv_3 = "1e201800DNMf",
853
854 fmadd_4 = "1f000000DNMAf",
855 fmsub_4 = "1f008000DNMAf",
856 fnmadd_4 = "1f200000DNMAf",
857 fnmsub_4 = "1f208000DNMAf",
858
859 fmax_3 = "1e204800DNMf",
860 fmaxnm_3 = "1e206800DNMf",
861 fmin_3 = "1e205800DNMf",
862 fminnm_3 = "1e207800DNMf",
863
864 fcmp_2 = "1e202000NMf|1e202008NZf",
865 fcmpe_2 = "1e202010NMf|1e202018NZf",
866
867 fccmp_4 = "1e200400NMVCf",
868 fccmpe_4 = "1e200410NMVCf",
869
870 fcsel_4 = "1e200c00DNMCf",
871
872 -- TODO: crc32*, aes*, sha*, pmull
873 -- TODO: SIMD instructions.
874}
875
876for cond,c in pairs(map_cond) do
877 map_op["b"..cond.."_1"] = tohex(0x54000000+c).."B"
878end
879
880------------------------------------------------------------------------------
881
882-- Handle opcodes defined with template strings.
883local function parse_template(params, template, nparams, pos)
884 local op = tonumber(sub(template, 1, 8), 16)
885 local n = 1
886 local rtt = {}
887
888 parse_reg_type = false
889
890 -- Process each character.
891 for p in gmatch(sub(template, 9), ".") do
892 local q = params[n]
893 if p == "D" then
894 op = op + parse_reg(q); n = n + 1
895 elseif p == "N" then
896 op = op + shl(parse_reg(q), 5); n = n + 1
897 elseif p == "M" then
898 op = op + shl(parse_reg(q), 16); n = n + 1
899 elseif p == "A" then
900 op = op + shl(parse_reg(q), 10); n = n + 1
901 elseif p == "m" then
902 op = op + shl(parse_reg(params[n-1]), 16)
903
904 elseif p == "p" then
905 if q == "sp" then params[n] = "@x31" end
906 elseif p == "g" then
907 if parse_reg_type == "x" then
908 op = op + 0x80000000
909 elseif parse_reg_type ~= "w" then
910 werror("bad register type")
911 end
912 parse_reg_type = false
913 elseif p == "f" then
914 if parse_reg_type == "d" then
915 op = op + 0x00400000
916 elseif parse_reg_type ~= "s" then
917 werror("bad register type")
918 end
919 parse_reg_type = false
920 elseif p == "x" or p == "w" or p == "d" or p == "s" then
921 if parse_reg_type ~= p then
922 werror("register size mismatch")
923 end
924 parse_reg_type = false
925
926 elseif p == "L" then
927 op = parse_load(params, nparams, n, op)
928 elseif p == "P" then
929 op = parse_load_pair(params, nparams, n, op)
930
931 elseif p == "B" then
932 local mode, v, s = parse_label(q, false); n = n + 1
933 local m = branch_type(op)
934 waction("REL_"..mode, v+m, s, 1)
935
936 elseif p == "I" then
937 op = op + parse_imm12(q); n = n + 1
938 elseif p == "i" then
939 op = op + parse_imm13(q); n = n + 1
940 elseif p == "W" then
941 op = op + parse_imm(q, 16, 5, 0, false); n = n + 1
942 elseif p == "T" then
943 op = op + parse_imm6(q); n = n + 1
944 elseif p == "1" then
945 op = op + parse_imm(q, 6, 16, 0, false); n = n + 1
946 elseif p == "2" then
947 op = op + parse_imm(q, 6, 10, 0, false); n = n + 1
948 elseif p == "5" then
949 op = op + parse_imm(q, 5, 16, 0, false); n = n + 1
950 elseif p == "V" then
951 op = op + parse_imm(q, 4, 0, 0, false); n = n + 1
952 elseif p == "F" then
953 op = op + parse_fpimm(q); n = n + 1
954 elseif p == "Z" then
955 if q ~= "#0" and q ~= "#0.0" then werror("expected zero immediate") end
956 n = n + 1
957
958 elseif p == "S" then
959 op = op + parse_shift(q); n = n + 1
960 elseif p == "X" then
961 op = op + parse_extend(q); n = n + 1
962 elseif p == "R" then
963 op = op + parse_lslx16(q); n = n + 1
964 elseif p == "C" then
965 op = op + parse_cond(q, 0); n = n + 1
966 elseif p == "c" then
967 op = op + parse_cond(q, 1); n = n + 1
968
969 else
970 assert(false)
971 end
972 end
973 wputpos(pos, op)
974end
975
976function op_template(params, template, nparams)
977 if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end
978
979 -- Limit number of section buffer positions used by a single dasm_put().
980 -- A single opcode needs a maximum of 3 positions.
981 if secpos+3 > maxsecpos then wflush() end
982 local pos = wpos()
983 local lpos, apos, spos = #actlist, #actargs, secpos
984
985 local ok, err
986 for t in gmatch(template, "[^|]+") do
987 ok, err = pcall(parse_template, params, t, nparams, pos)
988 if ok then return end
989 secpos = spos
990 actlist[lpos+1] = nil
991 actlist[lpos+2] = nil
992 actlist[lpos+3] = nil
993 actargs[apos+1] = nil
994 actargs[apos+2] = nil
995 actargs[apos+3] = nil
996 end
997 error(err, 0)
998end
999
1000map_op[".template__"] = op_template
1001
1002------------------------------------------------------------------------------
1003
1004-- Pseudo-opcode to mark the position where the action list is to be emitted.
1005map_op[".actionlist_1"] = function(params)
1006 if not params then return "cvar" end
1007 local name = params[1] -- No syntax check. You get to keep the pieces.
1008 wline(function(out) writeactions(out, name) end)
1009end
1010
1011-- Pseudo-opcode to mark the position where the global enum is to be emitted.
1012map_op[".globals_1"] = function(params)
1013 if not params then return "prefix" end
1014 local prefix = params[1] -- No syntax check. You get to keep the pieces.
1015 wline(function(out) writeglobals(out, prefix) end)
1016end
1017
1018-- Pseudo-opcode to mark the position where the global names are to be emitted.
1019map_op[".globalnames_1"] = function(params)
1020 if not params then return "cvar" end
1021 local name = params[1] -- No syntax check. You get to keep the pieces.
1022 wline(function(out) writeglobalnames(out, name) end)
1023end
1024
1025-- Pseudo-opcode to mark the position where the extern names are to be emitted.
1026map_op[".externnames_1"] = function(params)
1027 if not params then return "cvar" end
1028 local name = params[1] -- No syntax check. You get to keep the pieces.
1029 wline(function(out) writeexternnames(out, name) end)
1030end
1031
1032------------------------------------------------------------------------------
1033
1034-- Label pseudo-opcode (converted from trailing colon form).
1035map_op[".label_1"] = function(params)
1036 if not params then return "[1-9] | ->global | =>pcexpr" end
1037 if secpos+1 > maxsecpos then wflush() end
1038 local mode, n, s = parse_label(params[1], true)
1039 if mode == "EXT" then werror("bad label definition") end
1040 waction("LABEL_"..mode, n, s, 1)
1041end
1042
1043------------------------------------------------------------------------------
1044
1045-- Pseudo-opcodes for data storage.
1046map_op[".long_*"] = function(params)
1047 if not params then return "imm..." end
1048 for _,p in ipairs(params) do
1049 local n = tonumber(p)
1050 if not n then werror("bad immediate `"..p.."'") end
1051 if n < 0 then n = n + 2^32 end
1052 wputw(n)
1053 if secpos+2 > maxsecpos then wflush() end
1054 end
1055end
1056
1057-- Alignment pseudo-opcode.
1058map_op[".align_1"] = function(params)
1059 if not params then return "numpow2" end
1060 if secpos+1 > maxsecpos then wflush() end
1061 local align = tonumber(params[1])
1062 if align then
1063 local x = align
1064 -- Must be a power of 2 in the range (2 ... 256).
1065 for i=1,8 do
1066 x = x / 2
1067 if x == 1 then
1068 waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1.
1069 return
1070 end
1071 end
1072 end
1073 werror("bad alignment")
1074end
1075
1076------------------------------------------------------------------------------
1077
1078-- Pseudo-opcode for (primitive) type definitions (map to C types).
1079map_op[".type_3"] = function(params, nparams)
1080 if not params then
1081 return nparams == 2 and "name, ctype" or "name, ctype, reg"
1082 end
1083 local name, ctype, reg = params[1], params[2], params[3]
1084 if not match(name, "^[%a_][%w_]*$") then
1085 werror("bad type name `"..name.."'")
1086 end
1087 local tp = map_type[name]
1088 if tp then
1089 werror("duplicate type `"..name.."'")
1090 end
1091 -- Add #type to defines. A bit unclean to put it in map_archdef.
1092 map_archdef["#"..name] = "sizeof("..ctype..")"
1093 -- Add new type and emit shortcut define.
1094 local num = ctypenum + 1
1095 map_type[name] = {
1096 ctype = ctype,
1097 ctypefmt = format("Dt%X(%%s)", num),
1098 reg = reg,
1099 }
1100 wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype))
1101 ctypenum = num
1102end
1103map_op[".type_2"] = map_op[".type_3"]
1104
1105-- Dump type definitions.
1106local function dumptypes(out, lvl)
1107 local t = {}
1108 for name in pairs(map_type) do t[#t+1] = name end
1109 sort(t)
1110 out:write("Type definitions:\n")
1111 for _,name in ipairs(t) do
1112 local tp = map_type[name]
1113 local reg = tp.reg or ""
1114 out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg))
1115 end
1116 out:write("\n")
1117end
1118
1119------------------------------------------------------------------------------
1120
1121-- Set the current section.
1122function _M.section(num)
1123 waction("SECTION", num)
1124 wflush(true) -- SECTION is a terminal action.
1125end
1126
1127------------------------------------------------------------------------------
1128
1129-- Dump architecture description.
1130function _M.dumparch(out)
1131 out:write(format("DynASM %s version %s, released %s\n\n",
1132 _info.arch, _info.version, _info.release))
1133 dumpactions(out)
1134end
1135
1136-- Dump all user defined elements.
1137function _M.dumpdef(out, lvl)
1138 dumptypes(out, lvl)
1139 dumpglobals(out, lvl)
1140 dumpexterns(out, lvl)
1141end
1142
1143------------------------------------------------------------------------------
1144
1145-- Pass callbacks from/to the DynASM core.
1146function _M.passcb(wl, we, wf, ww)
1147 wline, werror, wfatal, wwarn = wl, we, wf, ww
1148 return wflush
1149end
1150
1151-- Setup the arch-specific module.
1152function _M.setup(arch, opt)
1153 g_arch, g_opt = arch, opt
1154end
1155
1156-- Merge the core maps and the arch-specific maps.
1157function _M.mergemaps(map_coreop, map_def)
1158 setmetatable(map_op, { __index = map_coreop })
1159 setmetatable(map_def, { __index = map_archdef })
1160 return map_op, map_def
1161end
1162
1163return _M
1164
1165------------------------------------------------------------------------------
1166
diff --git a/dynasm/dasm_mips.h b/dynasm/dasm_mips.h
index 1b309edd..143c3cbe 100644
--- a/dynasm/dasm_mips.h
+++ b/dynasm/dasm_mips.h
@@ -21,7 +21,7 @@ enum {
21 /* The following actions need a buffer position. */ 21 /* The following actions need a buffer position. */
22 DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, 22 DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
23 /* The following actions also have an argument. */ 23 /* The following actions also have an argument. */
24 DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, 24 DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMS,
25 DASM__MAX 25 DASM__MAX
26}; 26};
27 27
@@ -231,7 +231,7 @@ void dasm_put(Dst_DECL, int start, ...)
231 *pl = -pos; /* Label exists now. */ 231 *pl = -pos; /* Label exists now. */
232 b[pos++] = ofs; /* Store pass1 offset estimate. */ 232 b[pos++] = ofs; /* Store pass1 offset estimate. */
233 break; 233 break;
234 case DASM_IMM: 234 case DASM_IMM: case DASM_IMMS:
235#ifdef DASM_CHECKS 235#ifdef DASM_CHECKS
236 CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I); 236 CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
237#endif 237#endif
@@ -299,7 +299,7 @@ int dasm_link(Dst_DECL, size_t *szp)
299 case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; 299 case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
300 case DASM_REL_LG: case DASM_REL_PC: pos++; break; 300 case DASM_REL_LG: case DASM_REL_PC: pos++; break;
301 case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; 301 case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
302 case DASM_IMM: pos++; break; 302 case DASM_IMM: case DASM_IMMS: pos++; break;
303 } 303 }
304 } 304 }
305 stop: (void)0; 305 stop: (void)0;
@@ -355,19 +355,23 @@ int dasm_encode(Dst_DECL, void *buffer)
355 CK(n >= 0, UNDEF_PC); 355 CK(n >= 0, UNDEF_PC);
356 n = *DASM_POS2PTR(D, n); 356 n = *DASM_POS2PTR(D, n);
357 if (ins & 2048) 357 if (ins & 2048)
358 n = n - (int)((char *)cp - base); 358 n = (n + (int)(size_t)base) & 0x0fffffff;
359 else 359 else
360 n = (n + (int)base) & 0x0fffffff; 360 n = n - (int)((char *)cp - base);
361 patchrel: 361 patchrel: {
362 unsigned int e = 16 + ((ins >> 12) & 15);
362 CK((n & 3) == 0 && 363 CK((n & 3) == 0 &&
363 ((n + ((ins & 2048) ? 0x00020000 : 0)) >> 364 ((n + ((ins & 2048) ? 0 : (1<<(e+1)))) >> (e+2)) == 0, RANGE_REL);
364 ((ins & 2048) ? 18 : 28)) == 0, RANGE_REL); 365 cp[-1] |= ((n>>2) & ((1<<e)-1));
365 cp[-1] |= ((n>>2) & ((ins & 2048) ? 0x0000ffff: 0x03ffffff)); 366 }
366 break; 367 break;
367 case DASM_LABEL_LG: 368 case DASM_LABEL_LG:
368 ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); 369 ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
369 break; 370 break;
370 case DASM_LABEL_PC: break; 371 case DASM_LABEL_PC: break;
372 case DASM_IMMS:
373 cp[-1] |= ((n>>3) & 4); n &= 0x1f;
374 /* fallthrough */
371 case DASM_IMM: 375 case DASM_IMM:
372 cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31); 376 cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
373 break; 377 break;
diff --git a/dynasm/dasm_mips.lua b/dynasm/dasm_mips.lua
index 9ef280e3..3e41df52 100644
--- a/dynasm/dasm_mips.lua
+++ b/dynasm/dasm_mips.lua
@@ -1,17 +1,20 @@
1------------------------------------------------------------------------------ 1------------------------------------------------------------------------------
2-- DynASM MIPS module. 2-- DynASM MIPS32/MIPS64 module.
3-- 3--
4-- Copyright (C) 2005-2020 Mike Pall. All rights reserved. 4-- Copyright (C) 2005-2020 Mike Pall. All rights reserved.
5-- See dynasm.lua for full copyright notice. 5-- See dynasm.lua for full copyright notice.
6------------------------------------------------------------------------------ 6------------------------------------------------------------------------------
7 7
8local mips64 = mips64
9local mipsr6 = _map_def.MIPSR6
10
8-- Module information: 11-- Module information:
9local _info = { 12local _info = {
10 arch = "mips", 13 arch = mips64 and "mips64" or "mips",
11 description = "DynASM MIPS module", 14 description = "DynASM MIPS32/MIPS64 module",
12 version = "1.3.0", 15 version = "1.4.0",
13 vernum = 10300, 16 vernum = 10400,
14 release = "2012-01-23", 17 release = "2020-01-20",
15 author = "Mike Pall", 18 author = "Mike Pall",
16 license = "MIT", 19 license = "MIT",
17} 20}
@@ -27,7 +30,8 @@ local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
27local match, gmatch = _s.match, _s.gmatch 30local match, gmatch = _s.match, _s.gmatch
28local concat, sort = table.concat, table.sort 31local concat, sort = table.concat, table.sort
29local bit = bit or require("bit") 32local bit = bit or require("bit")
30local band, shl, sar, tohex = bit.band, bit.lshift, bit.arshift, bit.tohex 33local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift
34local tohex = bit.tohex
31 35
32-- Inherited tables and callbacks. 36-- Inherited tables and callbacks.
33local g_opt, g_arch 37local g_opt, g_arch
@@ -38,7 +42,7 @@ local wline, werror, wfatal, wwarn
38local action_names = { 42local action_names = {
39 "STOP", "SECTION", "ESC", "REL_EXT", 43 "STOP", "SECTION", "ESC", "REL_EXT",
40 "ALIGN", "REL_LG", "LABEL_LG", 44 "ALIGN", "REL_LG", "LABEL_LG",
41 "REL_PC", "LABEL_PC", "IMM", 45 "REL_PC", "LABEL_PC", "IMM", "IMMS",
42} 46}
43 47
44-- Maximum number of section buffer positions for dasm_put(). 48-- Maximum number of section buffer positions for dasm_put().
@@ -235,7 +239,6 @@ local map_op = {
235 bne_3 = "14000000STB", 239 bne_3 = "14000000STB",
236 blez_2 = "18000000SB", 240 blez_2 = "18000000SB",
237 bgtz_2 = "1c000000SB", 241 bgtz_2 = "1c000000SB",
238 addi_3 = "20000000TSI",
239 li_2 = "24000000TI", 242 li_2 = "24000000TI",
240 addiu_3 = "24000000TSI", 243 addiu_3 = "24000000TSI",
241 slti_3 = "28000000TSI", 244 slti_3 = "28000000TSI",
@@ -245,70 +248,52 @@ local map_op = {
245 ori_3 = "34000000TSU", 248 ori_3 = "34000000TSU",
246 xori_3 = "38000000TSU", 249 xori_3 = "38000000TSU",
247 lui_2 = "3c000000TU", 250 lui_2 = "3c000000TU",
248 beqzl_2 = "50000000SB", 251 daddiu_3 = mips64 and "64000000TSI",
249 beql_3 = "50000000STB", 252 ldl_2 = mips64 and "68000000TO",
250 bnezl_2 = "54000000SB", 253 ldr_2 = mips64 and "6c000000TO",
251 bnel_3 = "54000000STB",
252 blezl_2 = "58000000SB",
253 bgtzl_2 = "5c000000SB",
254 lb_2 = "80000000TO", 254 lb_2 = "80000000TO",
255 lh_2 = "84000000TO", 255 lh_2 = "84000000TO",
256 lwl_2 = "88000000TO",
257 lw_2 = "8c000000TO", 256 lw_2 = "8c000000TO",
258 lbu_2 = "90000000TO", 257 lbu_2 = "90000000TO",
259 lhu_2 = "94000000TO", 258 lhu_2 = "94000000TO",
260 lwr_2 = "98000000TO", 259 lwu_2 = mips64 and "9c000000TO",
261 sb_2 = "a0000000TO", 260 sb_2 = "a0000000TO",
262 sh_2 = "a4000000TO", 261 sh_2 = "a4000000TO",
263 swl_2 = "a8000000TO",
264 sw_2 = "ac000000TO", 262 sw_2 = "ac000000TO",
265 swr_2 = "b8000000TO",
266 cache_2 = "bc000000NO",
267 ll_2 = "c0000000TO",
268 lwc1_2 = "c4000000HO", 263 lwc1_2 = "c4000000HO",
269 pref_2 = "cc000000NO",
270 ldc1_2 = "d4000000HO", 264 ldc1_2 = "d4000000HO",
271 sc_2 = "e0000000TO", 265 ld_2 = mips64 and "dc000000TO",
272 swc1_2 = "e4000000HO", 266 swc1_2 = "e4000000HO",
273 sdc1_2 = "f4000000HO", 267 sdc1_2 = "f4000000HO",
268 sd_2 = mips64 and "fc000000TO",
274 269
275 -- Opcode SPECIAL. 270 -- Opcode SPECIAL.
276 nop_0 = "00000000", 271 nop_0 = "00000000",
277 sll_3 = "00000000DTA", 272 sll_3 = "00000000DTA",
278 movf_2 = "00000001DS", 273 sextw_2 = "00000000DT",
279 movf_3 = "00000001DSC",
280 movt_2 = "00010001DS",
281 movt_3 = "00010001DSC",
282 srl_3 = "00000002DTA", 274 srl_3 = "00000002DTA",
283 rotr_3 = "00200002DTA", 275 rotr_3 = "00200002DTA",
284 sra_3 = "00000003DTA", 276 sra_3 = "00000003DTA",
285 sllv_3 = "00000004DTS", 277 sllv_3 = "00000004DTS",
286 srlv_3 = "00000006DTS", 278 srlv_3 = "00000006DTS",
287 rotrv_3 = "00000046DTS", 279 rotrv_3 = "00000046DTS",
280 drotrv_3 = mips64 and "00000056DTS",
288 srav_3 = "00000007DTS", 281 srav_3 = "00000007DTS",
289 jr_1 = "00000008S",
290 jalr_1 = "0000f809S", 282 jalr_1 = "0000f809S",
291 jalr_2 = "00000009DS", 283 jalr_2 = "00000009DS",
292 movz_3 = "0000000aDST",
293 movn_3 = "0000000bDST",
294 syscall_0 = "0000000c", 284 syscall_0 = "0000000c",
295 syscall_1 = "0000000cY", 285 syscall_1 = "0000000cY",
296 break_0 = "0000000d", 286 break_0 = "0000000d",
297 break_1 = "0000000dY", 287 break_1 = "0000000dY",
298 sync_0 = "0000000f", 288 sync_0 = "0000000f",
299 mfhi_1 = "00000010D", 289 dsllv_3 = mips64 and "00000014DTS",
300 mthi_1 = "00000011S", 290 dsrlv_3 = mips64 and "00000016DTS",
301 mflo_1 = "00000012D", 291 dsrav_3 = mips64 and "00000017DTS",
302 mtlo_1 = "00000013S",
303 mult_2 = "00000018ST",
304 multu_2 = "00000019ST",
305 div_2 = "0000001aST",
306 divu_2 = "0000001bST",
307 add_3 = "00000020DST", 292 add_3 = "00000020DST",
308 move_2 = "00000021DS", 293 move_2 = mips64 and "00000025DS" or "00000021DS",
309 addu_3 = "00000021DST", 294 addu_3 = "00000021DST",
310 sub_3 = "00000022DST", 295 sub_3 = "00000022DST",
311 negu_2 = "00000023DT", 296 negu_2 = mips64 and "0000002fDT" or "00000023DT",
312 subu_3 = "00000023DST", 297 subu_3 = "00000023DST",
313 and_3 = "00000024DST", 298 and_3 = "00000024DST",
314 or_3 = "00000025DST", 299 or_3 = "00000025DST",
@@ -317,6 +302,10 @@ local map_op = {
317 nor_3 = "00000027DST", 302 nor_3 = "00000027DST",
318 slt_3 = "0000002aDST", 303 slt_3 = "0000002aDST",
319 sltu_3 = "0000002bDST", 304 sltu_3 = "0000002bDST",
305 dadd_3 = mips64 and "0000002cDST",
306 daddu_3 = mips64 and "0000002dDST",
307 dsub_3 = mips64 and "0000002eDST",
308 dsubu_3 = mips64 and "0000002fDST",
320 tge_2 = "00000030ST", 309 tge_2 = "00000030ST",
321 tge_3 = "00000030STZ", 310 tge_3 = "00000030STZ",
322 tgeu_2 = "00000031ST", 311 tgeu_2 = "00000031ST",
@@ -329,40 +318,36 @@ local map_op = {
329 teq_3 = "00000034STZ", 318 teq_3 = "00000034STZ",
330 tne_2 = "00000036ST", 319 tne_2 = "00000036ST",
331 tne_3 = "00000036STZ", 320 tne_3 = "00000036STZ",
321 dsll_3 = mips64 and "00000038DTa",
322 dsrl_3 = mips64 and "0000003aDTa",
323 drotr_3 = mips64 and "0020003aDTa",
324 dsra_3 = mips64 and "0000003bDTa",
325 dsll32_3 = mips64 and "0000003cDTA",
326 dsrl32_3 = mips64 and "0000003eDTA",
327 drotr32_3 = mips64 and "0020003eDTA",
328 dsra32_3 = mips64 and "0000003fDTA",
332 329
333 -- Opcode REGIMM. 330 -- Opcode REGIMM.
334 bltz_2 = "04000000SB", 331 bltz_2 = "04000000SB",
335 bgez_2 = "04010000SB", 332 bgez_2 = "04010000SB",
336 bltzl_2 = "04020000SB", 333 bltzl_2 = "04020000SB",
337 bgezl_2 = "04030000SB", 334 bgezl_2 = "04030000SB",
338 tgei_2 = "04080000SI",
339 tgeiu_2 = "04090000SI",
340 tlti_2 = "040a0000SI",
341 tltiu_2 = "040b0000SI",
342 teqi_2 = "040c0000SI",
343 tnei_2 = "040e0000SI",
344 bltzal_2 = "04100000SB",
345 bal_1 = "04110000B", 335 bal_1 = "04110000B",
346 bgezal_2 = "04110000SB",
347 bltzall_2 = "04120000SB",
348 bgezall_2 = "04130000SB",
349 synci_1 = "041f0000O", 336 synci_1 = "041f0000O",
350 337
351 -- Opcode SPECIAL2.
352 madd_2 = "70000000ST",
353 maddu_2 = "70000001ST",
354 mul_3 = "70000002DST",
355 msub_2 = "70000004ST",
356 msubu_2 = "70000005ST",
357 clz_2 = "70000020DS=",
358 clo_2 = "70000021DS=",
359 sdbbp_0 = "7000003f",
360 sdbbp_1 = "7000003fY",
361
362 -- Opcode SPECIAL3. 338 -- Opcode SPECIAL3.
363 ext_4 = "7c000000TSAM", -- Note: last arg is msbd = size-1 339 ext_4 = "7c000000TSAM", -- Note: last arg is msbd = size-1
340 dextm_4 = mips64 and "7c000001TSAM", -- Args: pos | size-1-32
341 dextu_4 = mips64 and "7c000002TSAM", -- Args: pos-32 | size-1
342 dext_4 = mips64 and "7c000003TSAM", -- Args: pos | size-1
343 zextw_2 = mips64 and "7c00f803TS",
364 ins_4 = "7c000004TSAM", -- Note: last arg is msb = pos+size-1 344 ins_4 = "7c000004TSAM", -- Note: last arg is msb = pos+size-1
345 dinsm_4 = mips64 and "7c000005TSAM", -- Args: pos | pos+size-33
346 dinsu_4 = mips64 and "7c000006TSAM", -- Args: pos-32 | pos+size-33
347 dins_4 = mips64 and "7c000007TSAM", -- Args: pos | pos+size-1
365 wsbh_2 = "7c0000a0DT", 348 wsbh_2 = "7c0000a0DT",
349 dsbh_2 = mips64 and "7c0000a4DT",
350 dshd_2 = mips64 and "7c000164DT",
366 seb_2 = "7c000420DT", 351 seb_2 = "7c000420DT",
367 seh_2 = "7c000620DT", 352 seh_2 = "7c000620DT",
368 rdhwr_2 = "7c00003bTD", 353 rdhwr_2 = "7c00003bTD",
@@ -370,8 +355,12 @@ local map_op = {
370 -- Opcode COP0. 355 -- Opcode COP0.
371 mfc0_2 = "40000000TD", 356 mfc0_2 = "40000000TD",
372 mfc0_3 = "40000000TDW", 357 mfc0_3 = "40000000TDW",
358 dmfc0_2 = mips64 and "40200000TD",
359 dmfc0_3 = mips64 and "40200000TDW",
373 mtc0_2 = "40800000TD", 360 mtc0_2 = "40800000TD",
374 mtc0_3 = "40800000TDW", 361 mtc0_3 = "40800000TDW",
362 dmtc0_2 = mips64 and "40a00000TD",
363 dmtc0_3 = mips64 and "40a00000TDW",
375 rdpgpr_2 = "41400000DT", 364 rdpgpr_2 = "41400000DT",
376 di_0 = "41606000", 365 di_0 = "41606000",
377 di_1 = "41606000T", 366 di_1 = "41606000T",
@@ -388,21 +377,14 @@ local map_op = {
388 377
389 -- Opcode COP1. 378 -- Opcode COP1.
390 mfc1_2 = "44000000TG", 379 mfc1_2 = "44000000TG",
380 dmfc1_2 = mips64 and "44200000TG",
391 cfc1_2 = "44400000TG", 381 cfc1_2 = "44400000TG",
392 mfhc1_2 = "44600000TG", 382 mfhc1_2 = "44600000TG",
393 mtc1_2 = "44800000TG", 383 mtc1_2 = "44800000TG",
384 dmtc1_2 = mips64 and "44a00000TG",
394 ctc1_2 = "44c00000TG", 385 ctc1_2 = "44c00000TG",
395 mthc1_2 = "44e00000TG", 386 mthc1_2 = "44e00000TG",
396 387
397 bc1f_1 = "45000000B",
398 bc1f_2 = "45000000CB",
399 bc1t_1 = "45010000B",
400 bc1t_2 = "45010000CB",
401 bc1fl_1 = "45020000B",
402 bc1fl_2 = "45020000CB",
403 bc1tl_1 = "45030000B",
404 bc1tl_2 = "45030000CB",
405
406 ["add.s_3"] = "46000000FGH", 388 ["add.s_3"] = "46000000FGH",
407 ["sub.s_3"] = "46000001FGH", 389 ["sub.s_3"] = "46000001FGH",
408 ["mul.s_3"] = "46000002FGH", 390 ["mul.s_3"] = "46000002FGH",
@@ -419,51 +401,11 @@ local map_op = {
419 ["trunc.w.s_2"] = "4600000dFG", 401 ["trunc.w.s_2"] = "4600000dFG",
420 ["ceil.w.s_2"] = "4600000eFG", 402 ["ceil.w.s_2"] = "4600000eFG",
421 ["floor.w.s_2"] = "4600000fFG", 403 ["floor.w.s_2"] = "4600000fFG",
422 ["movf.s_2"] = "46000011FG",
423 ["movf.s_3"] = "46000011FGC",
424 ["movt.s_2"] = "46010011FG",
425 ["movt.s_3"] = "46010011FGC",
426 ["movz.s_3"] = "46000012FGT",
427 ["movn.s_3"] = "46000013FGT",
428 ["recip.s_2"] = "46000015FG", 404 ["recip.s_2"] = "46000015FG",
429 ["rsqrt.s_2"] = "46000016FG", 405 ["rsqrt.s_2"] = "46000016FG",
430 ["cvt.d.s_2"] = "46000021FG", 406 ["cvt.d.s_2"] = "46000021FG",
431 ["cvt.w.s_2"] = "46000024FG", 407 ["cvt.w.s_2"] = "46000024FG",
432 ["cvt.l.s_2"] = "46000025FG", 408 ["cvt.l.s_2"] = "46000025FG",
433 ["cvt.ps.s_3"] = "46000026FGH",
434 ["c.f.s_2"] = "46000030GH",
435 ["c.f.s_3"] = "46000030VGH",
436 ["c.un.s_2"] = "46000031GH",
437 ["c.un.s_3"] = "46000031VGH",
438 ["c.eq.s_2"] = "46000032GH",
439 ["c.eq.s_3"] = "46000032VGH",
440 ["c.ueq.s_2"] = "46000033GH",
441 ["c.ueq.s_3"] = "46000033VGH",
442 ["c.olt.s_2"] = "46000034GH",
443 ["c.olt.s_3"] = "46000034VGH",
444 ["c.ult.s_2"] = "46000035GH",
445 ["c.ult.s_3"] = "46000035VGH",
446 ["c.ole.s_2"] = "46000036GH",
447 ["c.ole.s_3"] = "46000036VGH",
448 ["c.ule.s_2"] = "46000037GH",
449 ["c.ule.s_3"] = "46000037VGH",
450 ["c.sf.s_2"] = "46000038GH",
451 ["c.sf.s_3"] = "46000038VGH",
452 ["c.ngle.s_2"] = "46000039GH",
453 ["c.ngle.s_3"] = "46000039VGH",
454 ["c.seq.s_2"] = "4600003aGH",
455 ["c.seq.s_3"] = "4600003aVGH",
456 ["c.ngl.s_2"] = "4600003bGH",
457 ["c.ngl.s_3"] = "4600003bVGH",
458 ["c.lt.s_2"] = "4600003cGH",
459 ["c.lt.s_3"] = "4600003cVGH",
460 ["c.nge.s_2"] = "4600003dGH",
461 ["c.nge.s_3"] = "4600003dVGH",
462 ["c.le.s_2"] = "4600003eGH",
463 ["c.le.s_3"] = "4600003eVGH",
464 ["c.ngt.s_2"] = "4600003fGH",
465 ["c.ngt.s_3"] = "4600003fVGH",
466
467 ["add.d_3"] = "46200000FGH", 409 ["add.d_3"] = "46200000FGH",
468 ["sub.d_3"] = "46200001FGH", 410 ["sub.d_3"] = "46200001FGH",
469 ["mul.d_3"] = "46200002FGH", 411 ["mul.d_3"] = "46200002FGH",
@@ -480,130 +422,410 @@ local map_op = {
480 ["trunc.w.d_2"] = "4620000dFG", 422 ["trunc.w.d_2"] = "4620000dFG",
481 ["ceil.w.d_2"] = "4620000eFG", 423 ["ceil.w.d_2"] = "4620000eFG",
482 ["floor.w.d_2"] = "4620000fFG", 424 ["floor.w.d_2"] = "4620000fFG",
483 ["movf.d_2"] = "46200011FG",
484 ["movf.d_3"] = "46200011FGC",
485 ["movt.d_2"] = "46210011FG",
486 ["movt.d_3"] = "46210011FGC",
487 ["movz.d_3"] = "46200012FGT",
488 ["movn.d_3"] = "46200013FGT",
489 ["recip.d_2"] = "46200015FG", 425 ["recip.d_2"] = "46200015FG",
490 ["rsqrt.d_2"] = "46200016FG", 426 ["rsqrt.d_2"] = "46200016FG",
491 ["cvt.s.d_2"] = "46200020FG", 427 ["cvt.s.d_2"] = "46200020FG",
492 ["cvt.w.d_2"] = "46200024FG", 428 ["cvt.w.d_2"] = "46200024FG",
493 ["cvt.l.d_2"] = "46200025FG", 429 ["cvt.l.d_2"] = "46200025FG",
494 ["c.f.d_2"] = "46200030GH",
495 ["c.f.d_3"] = "46200030VGH",
496 ["c.un.d_2"] = "46200031GH",
497 ["c.un.d_3"] = "46200031VGH",
498 ["c.eq.d_2"] = "46200032GH",
499 ["c.eq.d_3"] = "46200032VGH",
500 ["c.ueq.d_2"] = "46200033GH",
501 ["c.ueq.d_3"] = "46200033VGH",
502 ["c.olt.d_2"] = "46200034GH",
503 ["c.olt.d_3"] = "46200034VGH",
504 ["c.ult.d_2"] = "46200035GH",
505 ["c.ult.d_3"] = "46200035VGH",
506 ["c.ole.d_2"] = "46200036GH",
507 ["c.ole.d_3"] = "46200036VGH",
508 ["c.ule.d_2"] = "46200037GH",
509 ["c.ule.d_3"] = "46200037VGH",
510 ["c.sf.d_2"] = "46200038GH",
511 ["c.sf.d_3"] = "46200038VGH",
512 ["c.ngle.d_2"] = "46200039GH",
513 ["c.ngle.d_3"] = "46200039VGH",
514 ["c.seq.d_2"] = "4620003aGH",
515 ["c.seq.d_3"] = "4620003aVGH",
516 ["c.ngl.d_2"] = "4620003bGH",
517 ["c.ngl.d_3"] = "4620003bVGH",
518 ["c.lt.d_2"] = "4620003cGH",
519 ["c.lt.d_3"] = "4620003cVGH",
520 ["c.nge.d_2"] = "4620003dGH",
521 ["c.nge.d_3"] = "4620003dVGH",
522 ["c.le.d_2"] = "4620003eGH",
523 ["c.le.d_3"] = "4620003eVGH",
524 ["c.ngt.d_2"] = "4620003fGH",
525 ["c.ngt.d_3"] = "4620003fVGH",
526
527 ["add.ps_3"] = "46c00000FGH",
528 ["sub.ps_3"] = "46c00001FGH",
529 ["mul.ps_3"] = "46c00002FGH",
530 ["abs.ps_2"] = "46c00005FG",
531 ["mov.ps_2"] = "46c00006FG",
532 ["neg.ps_2"] = "46c00007FG",
533 ["movf.ps_2"] = "46c00011FG",
534 ["movf.ps_3"] = "46c00011FGC",
535 ["movt.ps_2"] = "46c10011FG",
536 ["movt.ps_3"] = "46c10011FGC",
537 ["movz.ps_3"] = "46c00012FGT",
538 ["movn.ps_3"] = "46c00013FGT",
539 ["cvt.s.pu_2"] = "46c00020FG",
540 ["cvt.s.pl_2"] = "46c00028FG",
541 ["pll.ps_3"] = "46c0002cFGH",
542 ["plu.ps_3"] = "46c0002dFGH",
543 ["pul.ps_3"] = "46c0002eFGH",
544 ["puu.ps_3"] = "46c0002fFGH",
545 ["c.f.ps_2"] = "46c00030GH",
546 ["c.f.ps_3"] = "46c00030VGH",
547 ["c.un.ps_2"] = "46c00031GH",
548 ["c.un.ps_3"] = "46c00031VGH",
549 ["c.eq.ps_2"] = "46c00032GH",
550 ["c.eq.ps_3"] = "46c00032VGH",
551 ["c.ueq.ps_2"] = "46c00033GH",
552 ["c.ueq.ps_3"] = "46c00033VGH",
553 ["c.olt.ps_2"] = "46c00034GH",
554 ["c.olt.ps_3"] = "46c00034VGH",
555 ["c.ult.ps_2"] = "46c00035GH",
556 ["c.ult.ps_3"] = "46c00035VGH",
557 ["c.ole.ps_2"] = "46c00036GH",
558 ["c.ole.ps_3"] = "46c00036VGH",
559 ["c.ule.ps_2"] = "46c00037GH",
560 ["c.ule.ps_3"] = "46c00037VGH",
561 ["c.sf.ps_2"] = "46c00038GH",
562 ["c.sf.ps_3"] = "46c00038VGH",
563 ["c.ngle.ps_2"] = "46c00039GH",
564 ["c.ngle.ps_3"] = "46c00039VGH",
565 ["c.seq.ps_2"] = "46c0003aGH",
566 ["c.seq.ps_3"] = "46c0003aVGH",
567 ["c.ngl.ps_2"] = "46c0003bGH",
568 ["c.ngl.ps_3"] = "46c0003bVGH",
569 ["c.lt.ps_2"] = "46c0003cGH",
570 ["c.lt.ps_3"] = "46c0003cVGH",
571 ["c.nge.ps_2"] = "46c0003dGH",
572 ["c.nge.ps_3"] = "46c0003dVGH",
573 ["c.le.ps_2"] = "46c0003eGH",
574 ["c.le.ps_3"] = "46c0003eVGH",
575 ["c.ngt.ps_2"] = "46c0003fGH",
576 ["c.ngt.ps_3"] = "46c0003fVGH",
577
578 ["cvt.s.w_2"] = "46800020FG", 430 ["cvt.s.w_2"] = "46800020FG",
579 ["cvt.d.w_2"] = "46800021FG", 431 ["cvt.d.w_2"] = "46800021FG",
580
581 ["cvt.s.l_2"] = "46a00020FG", 432 ["cvt.s.l_2"] = "46a00020FG",
582 ["cvt.d.l_2"] = "46a00021FG", 433 ["cvt.d.l_2"] = "46a00021FG",
583
584 -- Opcode COP1X.
585 lwxc1_2 = "4c000000FX",
586 ldxc1_2 = "4c000001FX",
587 luxc1_2 = "4c000005FX",
588 swxc1_2 = "4c000008FX",
589 sdxc1_2 = "4c000009FX",
590 suxc1_2 = "4c00000dFX",
591 prefx_2 = "4c00000fMX",
592 ["alnv.ps_4"] = "4c00001eFGHS",
593 ["madd.s_4"] = "4c000020FRGH",
594 ["madd.d_4"] = "4c000021FRGH",
595 ["madd.ps_4"] = "4c000026FRGH",
596 ["msub.s_4"] = "4c000028FRGH",
597 ["msub.d_4"] = "4c000029FRGH",
598 ["msub.ps_4"] = "4c00002eFRGH",
599 ["nmadd.s_4"] = "4c000030FRGH",
600 ["nmadd.d_4"] = "4c000031FRGH",
601 ["nmadd.ps_4"] = "4c000036FRGH",
602 ["nmsub.s_4"] = "4c000038FRGH",
603 ["nmsub.d_4"] = "4c000039FRGH",
604 ["nmsub.ps_4"] = "4c00003eFRGH",
605} 434}
606 435
436if mipsr6 then -- Instructions added with MIPSR6.
437
438 for k,v in pairs({
439
440 -- Add immediate to upper bits.
441 aui_3 = "3c000000TSI",
442 daui_3 = mips64 and "74000000TSI",
443 dahi_2 = mips64 and "04060000SI",
444 dati_2 = mips64 and "041e0000SI",
445
446 -- TODO: addiupc, auipc, aluipc, lwpc, lwupc, ldpc.
447
448 -- Compact branches.
449 blezalc_2 = "18000000TB", -- rt != 0.
450 bgezalc_2 = "18000000T=SB", -- rt != 0.
451 bgtzalc_2 = "1c000000TB", -- rt != 0.
452 bltzalc_2 = "1c000000T=SB", -- rt != 0.
453
454 blezc_2 = "58000000TB", -- rt != 0.
455 bgezc_2 = "58000000T=SB", -- rt != 0.
456 bgec_3 = "58000000STB", -- rs != rt.
457 blec_3 = "58000000TSB", -- rt != rs.
458
459 bgtzc_2 = "5c000000TB", -- rt != 0.
460 bltzc_2 = "5c000000T=SB", -- rt != 0.
461 bltc_3 = "5c000000STB", -- rs != rt.
462 bgtc_3 = "5c000000TSB", -- rt != rs.
463
464 bgeuc_3 = "18000000STB", -- rs != rt.
465 bleuc_3 = "18000000TSB", -- rt != rs.
466 bltuc_3 = "1c000000STB", -- rs != rt.
467 bgtuc_3 = "1c000000TSB", -- rt != rs.
468
469 beqzalc_2 = "20000000TB", -- rt != 0.
470 bnezalc_2 = "60000000TB", -- rt != 0.
471 beqc_3 = "20000000STB", -- rs < rt.
472 bnec_3 = "60000000STB", -- rs < rt.
473 bovc_3 = "20000000STB", -- rs >= rt.
474 bnvc_3 = "60000000STB", -- rs >= rt.
475
476 beqzc_2 = "d8000000SK", -- rs != 0.
477 bnezc_2 = "f8000000SK", -- rs != 0.
478 jic_2 = "d8000000TI",
479 jialc_2 = "f8000000TI",
480 bc_1 = "c8000000L",
481 balc_1 = "e8000000L",
482
483 -- Opcode SPECIAL.
484 jr_1 = "00000009S",
485 sdbbp_0 = "0000000e",
486 sdbbp_1 = "0000000eY",
487 lsa_4 = "00000005DSTA",
488 dlsa_4 = mips64 and "00000015DSTA",
489 seleqz_3 = "00000035DST",
490 selnez_3 = "00000037DST",
491 clz_2 = "00000050DS",
492 clo_2 = "00000051DS",
493 dclz_2 = mips64 and "00000052DS",
494 dclo_2 = mips64 and "00000053DS",
495 mul_3 = "00000098DST",
496 muh_3 = "000000d8DST",
497 mulu_3 = "00000099DST",
498 muhu_3 = "000000d9DST",
499 div_3 = "0000009aDST",
500 mod_3 = "000000daDST",
501 divu_3 = "0000009bDST",
502 modu_3 = "000000dbDST",
503 dmul_3 = mips64 and "0000009cDST",
504 dmuh_3 = mips64 and "000000dcDST",
505 dmulu_3 = mips64 and "0000009dDST",
506 dmuhu_3 = mips64 and "000000ddDST",
507 ddiv_3 = mips64 and "0000009eDST",
508 dmod_3 = mips64 and "000000deDST",
509 ddivu_3 = mips64 and "0000009fDST",
510 dmodu_3 = mips64 and "000000dfDST",
511
512 -- Opcode SPECIAL3.
513 align_4 = "7c000220DSTA",
514 dalign_4 = mips64 and "7c000224DSTA",
515 bitswap_2 = "7c000020DT",
516 dbitswap_2 = mips64 and "7c000024DT",
517
518 -- Opcode COP1.
519 bc1eqz_2 = "45200000HB",
520 bc1nez_2 = "45a00000HB",
521
522 ["sel.s_3"] = "46000010FGH",
523 ["seleqz.s_3"] = "46000014FGH",
524 ["selnez.s_3"] = "46000017FGH",
525 ["maddf.s_3"] = "46000018FGH",
526 ["msubf.s_3"] = "46000019FGH",
527 ["rint.s_2"] = "4600001aFG",
528 ["class.s_2"] = "4600001bFG",
529 ["min.s_3"] = "4600001cFGH",
530 ["mina.s_3"] = "4600001dFGH",
531 ["max.s_3"] = "4600001eFGH",
532 ["maxa.s_3"] = "4600001fFGH",
533 ["cmp.af.s_3"] = "46800000FGH",
534 ["cmp.un.s_3"] = "46800001FGH",
535 ["cmp.or.s_3"] = "46800011FGH",
536 ["cmp.eq.s_3"] = "46800002FGH",
537 ["cmp.une.s_3"] = "46800012FGH",
538 ["cmp.ueq.s_3"] = "46800003FGH",
539 ["cmp.ne.s_3"] = "46800013FGH",
540 ["cmp.lt.s_3"] = "46800004FGH",
541 ["cmp.ult.s_3"] = "46800005FGH",
542 ["cmp.le.s_3"] = "46800006FGH",
543 ["cmp.ule.s_3"] = "46800007FGH",
544 ["cmp.saf.s_3"] = "46800008FGH",
545 ["cmp.sun.s_3"] = "46800009FGH",
546 ["cmp.sor.s_3"] = "46800019FGH",
547 ["cmp.seq.s_3"] = "4680000aFGH",
548 ["cmp.sune.s_3"] = "4680001aFGH",
549 ["cmp.sueq.s_3"] = "4680000bFGH",
550 ["cmp.sne.s_3"] = "4680001bFGH",
551 ["cmp.slt.s_3"] = "4680000cFGH",
552 ["cmp.sult.s_3"] = "4680000dFGH",
553 ["cmp.sle.s_3"] = "4680000eFGH",
554 ["cmp.sule.s_3"] = "4680000fFGH",
555
556 ["sel.d_3"] = "46200010FGH",
557 ["seleqz.d_3"] = "46200014FGH",
558 ["selnez.d_3"] = "46200017FGH",
559 ["maddf.d_3"] = "46200018FGH",
560 ["msubf.d_3"] = "46200019FGH",
561 ["rint.d_2"] = "4620001aFG",
562 ["class.d_2"] = "4620001bFG",
563 ["min.d_3"] = "4620001cFGH",
564 ["mina.d_3"] = "4620001dFGH",
565 ["max.d_3"] = "4620001eFGH",
566 ["maxa.d_3"] = "4620001fFGH",
567 ["cmp.af.d_3"] = "46a00000FGH",
568 ["cmp.un.d_3"] = "46a00001FGH",
569 ["cmp.or.d_3"] = "46a00011FGH",
570 ["cmp.eq.d_3"] = "46a00002FGH",
571 ["cmp.une.d_3"] = "46a00012FGH",
572 ["cmp.ueq.d_3"] = "46a00003FGH",
573 ["cmp.ne.d_3"] = "46a00013FGH",
574 ["cmp.lt.d_3"] = "46a00004FGH",
575 ["cmp.ult.d_3"] = "46a00005FGH",
576 ["cmp.le.d_3"] = "46a00006FGH",
577 ["cmp.ule.d_3"] = "46a00007FGH",
578 ["cmp.saf.d_3"] = "46a00008FGH",
579 ["cmp.sun.d_3"] = "46a00009FGH",
580 ["cmp.sor.d_3"] = "46a00019FGH",
581 ["cmp.seq.d_3"] = "46a0000aFGH",
582 ["cmp.sune.d_3"] = "46a0001aFGH",
583 ["cmp.sueq.d_3"] = "46a0000bFGH",
584 ["cmp.sne.d_3"] = "46a0001bFGH",
585 ["cmp.slt.d_3"] = "46a0000cFGH",
586 ["cmp.sult.d_3"] = "46a0000dFGH",
587 ["cmp.sle.d_3"] = "46a0000eFGH",
588 ["cmp.sule.d_3"] = "46a0000fFGH",
589
590 }) do map_op[k] = v end
591
592else -- Instructions removed by MIPSR6.
593
594 for k,v in pairs({
595 -- Traps, don't use.
596 addi_3 = "20000000TSI",
597 daddi_3 = mips64 and "60000000TSI",
598
599 -- Branch on likely, don't use.
600 beqzl_2 = "50000000SB",
601 beql_3 = "50000000STB",
602 bnezl_2 = "54000000SB",
603 bnel_3 = "54000000STB",
604 blezl_2 = "58000000SB",
605 bgtzl_2 = "5c000000SB",
606
607 lwl_2 = "88000000TO",
608 lwr_2 = "98000000TO",
609 swl_2 = "a8000000TO",
610 sdl_2 = mips64 and "b0000000TO",
611 sdr_2 = mips64 and "b1000000TO",
612 swr_2 = "b8000000TO",
613 cache_2 = "bc000000NO",
614 ll_2 = "c0000000TO",
615 pref_2 = "cc000000NO",
616 sc_2 = "e0000000TO",
617 scd_2 = mips64 and "f0000000TO",
618
619 -- Opcode SPECIAL.
620 movf_2 = "00000001DS",
621 movf_3 = "00000001DSC",
622 movt_2 = "00010001DS",
623 movt_3 = "00010001DSC",
624 jr_1 = "00000008S",
625 movz_3 = "0000000aDST",
626 movn_3 = "0000000bDST",
627 mfhi_1 = "00000010D",
628 mthi_1 = "00000011S",
629 mflo_1 = "00000012D",
630 mtlo_1 = "00000013S",
631 mult_2 = "00000018ST",
632 multu_2 = "00000019ST",
633 div_3 = "0000001aST",
634 divu_3 = "0000001bST",
635 ddiv_3 = mips64 and "0000001eST",
636 ddivu_3 = mips64 and "0000001fST",
637 dmult_2 = mips64 and "0000001cST",
638 dmultu_2 = mips64 and "0000001dST",
639
640 -- Opcode REGIMM.
641 tgei_2 = "04080000SI",
642 tgeiu_2 = "04090000SI",
643 tlti_2 = "040a0000SI",
644 tltiu_2 = "040b0000SI",
645 teqi_2 = "040c0000SI",
646 tnei_2 = "040e0000SI",
647 bltzal_2 = "04100000SB",
648 bgezal_2 = "04110000SB",
649 bltzall_2 = "04120000SB",
650 bgezall_2 = "04130000SB",
651
652 -- Opcode SPECIAL2.
653 madd_2 = "70000000ST",
654 maddu_2 = "70000001ST",
655 mul_3 = "70000002DST",
656 msub_2 = "70000004ST",
657 msubu_2 = "70000005ST",
658 clz_2 = "70000020D=TS",
659 clo_2 = "70000021D=TS",
660 dclz_2 = mips64 and "70000024D=TS",
661 dclo_2 = mips64 and "70000025D=TS",
662 sdbbp_0 = "7000003f",
663 sdbbp_1 = "7000003fY",
664
665 -- Opcode COP1.
666 bc1f_1 = "45000000B",
667 bc1f_2 = "45000000CB",
668 bc1t_1 = "45010000B",
669 bc1t_2 = "45010000CB",
670 bc1fl_1 = "45020000B",
671 bc1fl_2 = "45020000CB",
672 bc1tl_1 = "45030000B",
673 bc1tl_2 = "45030000CB",
674
675 ["movf.s_2"] = "46000011FG",
676 ["movf.s_3"] = "46000011FGC",
677 ["movt.s_2"] = "46010011FG",
678 ["movt.s_3"] = "46010011FGC",
679 ["movz.s_3"] = "46000012FGT",
680 ["movn.s_3"] = "46000013FGT",
681 ["cvt.ps.s_3"] = "46000026FGH",
682 ["c.f.s_2"] = "46000030GH",
683 ["c.f.s_3"] = "46000030VGH",
684 ["c.un.s_2"] = "46000031GH",
685 ["c.un.s_3"] = "46000031VGH",
686 ["c.eq.s_2"] = "46000032GH",
687 ["c.eq.s_3"] = "46000032VGH",
688 ["c.ueq.s_2"] = "46000033GH",
689 ["c.ueq.s_3"] = "46000033VGH",
690 ["c.olt.s_2"] = "46000034GH",
691 ["c.olt.s_3"] = "46000034VGH",
692 ["c.ult.s_2"] = "46000035GH",
693 ["c.ult.s_3"] = "46000035VGH",
694 ["c.ole.s_2"] = "46000036GH",
695 ["c.ole.s_3"] = "46000036VGH",
696 ["c.ule.s_2"] = "46000037GH",
697 ["c.ule.s_3"] = "46000037VGH",
698 ["c.sf.s_2"] = "46000038GH",
699 ["c.sf.s_3"] = "46000038VGH",
700 ["c.ngle.s_2"] = "46000039GH",
701 ["c.ngle.s_3"] = "46000039VGH",
702 ["c.seq.s_2"] = "4600003aGH",
703 ["c.seq.s_3"] = "4600003aVGH",
704 ["c.ngl.s_2"] = "4600003bGH",
705 ["c.ngl.s_3"] = "4600003bVGH",
706 ["c.lt.s_2"] = "4600003cGH",
707 ["c.lt.s_3"] = "4600003cVGH",
708 ["c.nge.s_2"] = "4600003dGH",
709 ["c.nge.s_3"] = "4600003dVGH",
710 ["c.le.s_2"] = "4600003eGH",
711 ["c.le.s_3"] = "4600003eVGH",
712 ["c.ngt.s_2"] = "4600003fGH",
713 ["c.ngt.s_3"] = "4600003fVGH",
714 ["movf.d_2"] = "46200011FG",
715 ["movf.d_3"] = "46200011FGC",
716 ["movt.d_2"] = "46210011FG",
717 ["movt.d_3"] = "46210011FGC",
718 ["movz.d_3"] = "46200012FGT",
719 ["movn.d_3"] = "46200013FGT",
720 ["c.f.d_2"] = "46200030GH",
721 ["c.f.d_3"] = "46200030VGH",
722 ["c.un.d_2"] = "46200031GH",
723 ["c.un.d_3"] = "46200031VGH",
724 ["c.eq.d_2"] = "46200032GH",
725 ["c.eq.d_3"] = "46200032VGH",
726 ["c.ueq.d_2"] = "46200033GH",
727 ["c.ueq.d_3"] = "46200033VGH",
728 ["c.olt.d_2"] = "46200034GH",
729 ["c.olt.d_3"] = "46200034VGH",
730 ["c.ult.d_2"] = "46200035GH",
731 ["c.ult.d_3"] = "46200035VGH",
732 ["c.ole.d_2"] = "46200036GH",
733 ["c.ole.d_3"] = "46200036VGH",
734 ["c.ule.d_2"] = "46200037GH",
735 ["c.ule.d_3"] = "46200037VGH",
736 ["c.sf.d_2"] = "46200038GH",
737 ["c.sf.d_3"] = "46200038VGH",
738 ["c.ngle.d_2"] = "46200039GH",
739 ["c.ngle.d_3"] = "46200039VGH",
740 ["c.seq.d_2"] = "4620003aGH",
741 ["c.seq.d_3"] = "4620003aVGH",
742 ["c.ngl.d_2"] = "4620003bGH",
743 ["c.ngl.d_3"] = "4620003bVGH",
744 ["c.lt.d_2"] = "4620003cGH",
745 ["c.lt.d_3"] = "4620003cVGH",
746 ["c.nge.d_2"] = "4620003dGH",
747 ["c.nge.d_3"] = "4620003dVGH",
748 ["c.le.d_2"] = "4620003eGH",
749 ["c.le.d_3"] = "4620003eVGH",
750 ["c.ngt.d_2"] = "4620003fGH",
751 ["c.ngt.d_3"] = "4620003fVGH",
752 ["add.ps_3"] = "46c00000FGH",
753 ["sub.ps_3"] = "46c00001FGH",
754 ["mul.ps_3"] = "46c00002FGH",
755 ["abs.ps_2"] = "46c00005FG",
756 ["mov.ps_2"] = "46c00006FG",
757 ["neg.ps_2"] = "46c00007FG",
758 ["movf.ps_2"] = "46c00011FG",
759 ["movf.ps_3"] = "46c00011FGC",
760 ["movt.ps_2"] = "46c10011FG",
761 ["movt.ps_3"] = "46c10011FGC",
762 ["movz.ps_3"] = "46c00012FGT",
763 ["movn.ps_3"] = "46c00013FGT",
764 ["cvt.s.pu_2"] = "46c00020FG",
765 ["cvt.s.pl_2"] = "46c00028FG",
766 ["pll.ps_3"] = "46c0002cFGH",
767 ["plu.ps_3"] = "46c0002dFGH",
768 ["pul.ps_3"] = "46c0002eFGH",
769 ["puu.ps_3"] = "46c0002fFGH",
770 ["c.f.ps_2"] = "46c00030GH",
771 ["c.f.ps_3"] = "46c00030VGH",
772 ["c.un.ps_2"] = "46c00031GH",
773 ["c.un.ps_3"] = "46c00031VGH",
774 ["c.eq.ps_2"] = "46c00032GH",
775 ["c.eq.ps_3"] = "46c00032VGH",
776 ["c.ueq.ps_2"] = "46c00033GH",
777 ["c.ueq.ps_3"] = "46c00033VGH",
778 ["c.olt.ps_2"] = "46c00034GH",
779 ["c.olt.ps_3"] = "46c00034VGH",
780 ["c.ult.ps_2"] = "46c00035GH",
781 ["c.ult.ps_3"] = "46c00035VGH",
782 ["c.ole.ps_2"] = "46c00036GH",
783 ["c.ole.ps_3"] = "46c00036VGH",
784 ["c.ule.ps_2"] = "46c00037GH",
785 ["c.ule.ps_3"] = "46c00037VGH",
786 ["c.sf.ps_2"] = "46c00038GH",
787 ["c.sf.ps_3"] = "46c00038VGH",
788 ["c.ngle.ps_2"] = "46c00039GH",
789 ["c.ngle.ps_3"] = "46c00039VGH",
790 ["c.seq.ps_2"] = "46c0003aGH",
791 ["c.seq.ps_3"] = "46c0003aVGH",
792 ["c.ngl.ps_2"] = "46c0003bGH",
793 ["c.ngl.ps_3"] = "46c0003bVGH",
794 ["c.lt.ps_2"] = "46c0003cGH",
795 ["c.lt.ps_3"] = "46c0003cVGH",
796 ["c.nge.ps_2"] = "46c0003dGH",
797 ["c.nge.ps_3"] = "46c0003dVGH",
798 ["c.le.ps_2"] = "46c0003eGH",
799 ["c.le.ps_3"] = "46c0003eVGH",
800 ["c.ngt.ps_2"] = "46c0003fGH",
801 ["c.ngt.ps_3"] = "46c0003fVGH",
802
803 -- Opcode COP1X.
804 lwxc1_2 = "4c000000FX",
805 ldxc1_2 = "4c000001FX",
806 luxc1_2 = "4c000005FX",
807 swxc1_2 = "4c000008FX",
808 sdxc1_2 = "4c000009FX",
809 suxc1_2 = "4c00000dFX",
810 prefx_2 = "4c00000fMX",
811 ["alnv.ps_4"] = "4c00001eFGHS",
812 ["madd.s_4"] = "4c000020FRGH",
813 ["madd.d_4"] = "4c000021FRGH",
814 ["madd.ps_4"] = "4c000026FRGH",
815 ["msub.s_4"] = "4c000028FRGH",
816 ["msub.d_4"] = "4c000029FRGH",
817 ["msub.ps_4"] = "4c00002eFRGH",
818 ["nmadd.s_4"] = "4c000030FRGH",
819 ["nmadd.d_4"] = "4c000031FRGH",
820 ["nmadd.ps_4"] = "4c000036FRGH",
821 ["nmsub.s_4"] = "4c000038FRGH",
822 ["nmsub.d_4"] = "4c000039FRGH",
823 ["nmsub.ps_4"] = "4c00003eFRGH",
824
825 }) do map_op[k] = v end
826
827end
828
607------------------------------------------------------------------------------ 829------------------------------------------------------------------------------
608 830
609local function parse_gpr(expr) 831local function parse_gpr(expr)
@@ -633,7 +855,7 @@ local function parse_fpr(expr)
633 werror("bad register name `"..expr.."'") 855 werror("bad register name `"..expr.."'")
634end 856end
635 857
636local function parse_imm(imm, bits, shift, scale, signed) 858local function parse_imm(imm, bits, shift, scale, signed, action)
637 local n = tonumber(imm) 859 local n = tonumber(imm)
638 if n then 860 if n then
639 local m = sar(n, scale) 861 local m = sar(n, scale)
@@ -651,7 +873,8 @@ local function parse_imm(imm, bits, shift, scale, signed)
651 match(imm, "^([%w_]+):([rf][1-3]?[0-9])$") then 873 match(imm, "^([%w_]+):([rf][1-3]?[0-9])$") then
652 werror("expected immediate operand, got register") 874 werror("expected immediate operand, got register")
653 else 875 else
654 waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm) 876 waction(action or "IMM",
877 (signed and 32768 or 0)+shl(scale, 10)+shl(bits, 5)+shift, imm)
655 return 0 878 return 0
656 end 879 end
657end 880end
@@ -756,13 +979,18 @@ map_op[".template__"] = function(params, template, nparams)
756 op = op + parse_disp(params[n]); n = n + 1 979 op = op + parse_disp(params[n]); n = n + 1
757 elseif p == "X" then 980 elseif p == "X" then
758 op = op + parse_index(params[n]); n = n + 1 981 op = op + parse_index(params[n]); n = n + 1
759 elseif p == "B" or p == "J" then 982 elseif p == "B" or p == "J" or p == "K" or p == "L" then
760 local mode, m, s = parse_label(params[n], false) 983 local mode, m, s = parse_label(params[n], false)
761 if p == "B" then m = m + 2048 end 984 if p == "J" then m = m + 0xa800
985 elseif p == "K" then m = m + 0x5000
986 elseif p == "L" then m = m + 0xa000 end
762 waction("REL_"..mode, m, s, 1) 987 waction("REL_"..mode, m, s, 1)
763 n = n + 1 988 n = n + 1
764 elseif p == "A" then 989 elseif p == "A" then
765 op = op + parse_imm(params[n], 5, 6, 0, false); n = n + 1 990 op = op + parse_imm(params[n], 5, 6, 0, false); n = n + 1
991 elseif p == "a" then
992 local m = parse_imm(params[n], 6, 6, 0, false, "IMMS"); n = n + 1
993 op = op + band(m, 0x7c0) + band(shr(m, 9), 4)
766 elseif p == "M" then 994 elseif p == "M" then
767 op = op + parse_imm(params[n], 5, 11, 0, false); n = n + 1 995 op = op + parse_imm(params[n], 5, 11, 0, false); n = n + 1
768 elseif p == "N" then 996 elseif p == "N" then
@@ -778,7 +1006,7 @@ map_op[".template__"] = function(params, template, nparams)
778 elseif p == "Z" then 1006 elseif p == "Z" then
779 op = op + parse_imm(params[n], 10, 6, 0, false); n = n + 1 1007 op = op + parse_imm(params[n], 10, 6, 0, false); n = n + 1
780 elseif p == "=" then 1008 elseif p == "=" then
781 op = op + shl(band(op, 0xf800), 5) -- Copy D to T for clz, clo. 1009 n = n - 1 -- Re-use previous parameter for next template char.
782 else 1010 else
783 assert(false) 1011 assert(false)
784 end 1012 end
diff --git a/dynasm/dasm_mips64.lua b/dynasm/dasm_mips64.lua
new file mode 100644
index 00000000..0aae291b
--- /dev/null
+++ b/dynasm/dasm_mips64.lua
@@ -0,0 +1,12 @@
1------------------------------------------------------------------------------
2-- DynASM MIPS64 module.
3--
4-- Copyright (C) 2005-2020 Mike Pall. All rights reserved.
5-- See dynasm.lua for full copyright notice.
6------------------------------------------------------------------------------
7-- This module just sets 64 bit mode for the combined MIPS/MIPS64 module.
8-- All the interesting stuff is there.
9------------------------------------------------------------------------------
10
11mips64 = true -- Using a global is an ugly, but effective solution.
12return require("dasm_mips")
diff --git a/dynasm/dasm_ppc.h b/dynasm/dasm_ppc.h
index 699d5c31..6e7cc7ab 100644
--- a/dynasm/dasm_ppc.h
+++ b/dynasm/dasm_ppc.h
@@ -1,5 +1,5 @@
1/* 1/*
2** DynASM PPC encoding engine. 2** DynASM PPC/PPC64 encoding engine.
3** Copyright (C) 2005-2020 Mike Pall. All rights reserved. 3** Copyright (C) 2005-2020 Mike Pall. All rights reserved.
4** Released under the MIT license. See dynasm.lua for full copyright notice. 4** Released under the MIT license. See dynasm.lua for full copyright notice.
5*/ 5*/
@@ -21,7 +21,7 @@ enum {
21 /* The following actions need a buffer position. */ 21 /* The following actions need a buffer position. */
22 DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, 22 DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
23 /* The following actions also have an argument. */ 23 /* The following actions also have an argument. */
24 DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, 24 DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMSH,
25 DASM__MAX 25 DASM__MAX
26}; 26};
27 27
@@ -244,6 +244,10 @@ void dasm_put(Dst_DECL, int start, ...)
244#endif 244#endif
245 b[pos++] = n; 245 b[pos++] = n;
246 break; 246 break;
247 case DASM_IMMSH:
248 CK((n >> 6) == 0, RANGE_I);
249 b[pos++] = n;
250 break;
247 } 251 }
248 } 252 }
249 } 253 }
@@ -299,7 +303,7 @@ int dasm_link(Dst_DECL, size_t *szp)
299 case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; 303 case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
300 case DASM_REL_LG: case DASM_REL_PC: pos++; break; 304 case DASM_REL_LG: case DASM_REL_PC: pos++; break;
301 case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; 305 case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
302 case DASM_IMM: pos++; break; 306 case DASM_IMM: case DASM_IMMSH: pos++; break;
303 } 307 }
304 } 308 }
305 stop: (void)0; 309 stop: (void)0;
@@ -367,6 +371,9 @@ int dasm_encode(Dst_DECL, void *buffer)
367 case DASM_IMM: 371 case DASM_IMM:
368 cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31); 372 cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
369 break; 373 break;
374 case DASM_IMMSH:
375 cp[-1] |= (ins & 1) ? ((n&31)<<11)|((n&32)>>4) : ((n&31)<<6)|(n&32);
376 break;
370 default: *cp++ = ins; break; 377 default: *cp++ = ins; break;
371 } 378 }
372 } 379 }
diff --git a/dynasm/dasm_ppc.lua b/dynasm/dasm_ppc.lua
index 05981760..20634e13 100644
--- a/dynasm/dasm_ppc.lua
+++ b/dynasm/dasm_ppc.lua
@@ -1,17 +1,19 @@
1------------------------------------------------------------------------------ 1------------------------------------------------------------------------------
2-- DynASM PPC module. 2-- DynASM PPC/PPC64 module.
3-- 3--
4-- Copyright (C) 2005-2020 Mike Pall. All rights reserved. 4-- Copyright (C) 2005-2020 Mike Pall. All rights reserved.
5-- See dynasm.lua for full copyright notice. 5-- See dynasm.lua for full copyright notice.
6--
7-- Support for various extensions contributed by Caio Souza Oliveira.
6------------------------------------------------------------------------------ 8------------------------------------------------------------------------------
7 9
8-- Module information: 10-- Module information:
9local _info = { 11local _info = {
10 arch = "ppc", 12 arch = "ppc",
11 description = "DynASM PPC module", 13 description = "DynASM PPC module",
12 version = "1.3.0", 14 version = "1.4.0",
13 vernum = 10300, 15 vernum = 10400,
14 release = "2011-05-05", 16 release = "2015-10-18",
15 author = "Mike Pall", 17 author = "Mike Pall",
16 license = "MIT", 18 license = "MIT",
17} 19}
@@ -39,7 +41,7 @@ local wline, werror, wfatal, wwarn
39local action_names = { 41local action_names = {
40 "STOP", "SECTION", "ESC", "REL_EXT", 42 "STOP", "SECTION", "ESC", "REL_EXT",
41 "ALIGN", "REL_LG", "LABEL_LG", 43 "ALIGN", "REL_LG", "LABEL_LG",
42 "REL_PC", "LABEL_PC", "IMM", 44 "REL_PC", "LABEL_PC", "IMM", "IMMSH"
43} 45}
44 46
45-- Maximum number of section buffer positions for dasm_put(). 47-- Maximum number of section buffer positions for dasm_put().
@@ -228,8 +230,18 @@ local map_cond = {
228 230
229------------------------------------------------------------------------------ 231------------------------------------------------------------------------------
230 232
233local map_op, op_template
234
235local function op_alias(opname, f)
236 return function(params, nparams)
237 if not params then return "-> "..opname:sub(1, -3) end
238 f(params, nparams)
239 op_template(params, map_op[opname], nparams)
240 end
241end
242
231-- Template strings for PPC instructions. 243-- Template strings for PPC instructions.
232local map_op = { 244map_op = {
233 tdi_3 = "08000000ARI", 245 tdi_3 = "08000000ARI",
234 twi_3 = "0c000000ARI", 246 twi_3 = "0c000000ARI",
235 mulli_3 = "1c000000RRI", 247 mulli_3 = "1c000000RRI",
@@ -297,6 +309,250 @@ local map_op = {
297 std_2 = "f8000000RD", 309 std_2 = "f8000000RD",
298 stdu_2 = "f8000001RD", 310 stdu_2 = "f8000001RD",
299 311
312 subi_3 = op_alias("addi_3", function(p) p[3] = "-("..p[3]..")" end),
313 subis_3 = op_alias("addis_3", function(p) p[3] = "-("..p[3]..")" end),
314 subic_3 = op_alias("addic_3", function(p) p[3] = "-("..p[3]..")" end),
315 ["subic._3"] = op_alias("addic._3", function(p) p[3] = "-("..p[3]..")" end),
316
317 rotlwi_3 = op_alias("rlwinm_5", function(p)
318 p[4] = "0"; p[5] = "31"
319 end),
320 rotrwi_3 = op_alias("rlwinm_5", function(p)
321 p[3] = "32-("..p[3]..")"; p[4] = "0"; p[5] = "31"
322 end),
323 rotlw_3 = op_alias("rlwnm_5", function(p)
324 p[4] = "0"; p[5] = "31"
325 end),
326 slwi_3 = op_alias("rlwinm_5", function(p)
327 p[5] = "31-("..p[3]..")"; p[4] = "0"
328 end),
329 srwi_3 = op_alias("rlwinm_5", function(p)
330 p[4] = p[3]; p[3] = "32-("..p[3]..")"; p[5] = "31"
331 end),
332 clrlwi_3 = op_alias("rlwinm_5", function(p)
333 p[4] = p[3]; p[3] = "0"; p[5] = "31"
334 end),
335 clrrwi_3 = op_alias("rlwinm_5", function(p)
336 p[5] = "31-("..p[3]..")"; p[3] = "0"; p[4] = "0"
337 end),
338
339 -- Primary opcode 4:
340 mulhhwu_3 = "10000010RRR.",
341 machhwu_3 = "10000018RRR.",
342 mulhhw_3 = "10000050RRR.",
343 nmachhw_3 = "1000005cRRR.",
344 machhwsu_3 = "10000098RRR.",
345 machhws_3 = "100000d8RRR.",
346 nmachhws_3 = "100000dcRRR.",
347 mulchwu_3 = "10000110RRR.",
348 macchwu_3 = "10000118RRR.",
349 mulchw_3 = "10000150RRR.",
350 macchw_3 = "10000158RRR.",
351 nmacchw_3 = "1000015cRRR.",
352 macchwsu_3 = "10000198RRR.",
353 macchws_3 = "100001d8RRR.",
354 nmacchws_3 = "100001dcRRR.",
355 mullhw_3 = "10000350RRR.",
356 maclhw_3 = "10000358RRR.",
357 nmaclhw_3 = "1000035cRRR.",
358 maclhwsu_3 = "10000398RRR.",
359 maclhws_3 = "100003d8RRR.",
360 nmaclhws_3 = "100003dcRRR.",
361 machhwuo_3 = "10000418RRR.",
362 nmachhwo_3 = "1000045cRRR.",
363 machhwsuo_3 = "10000498RRR.",
364 machhwso_3 = "100004d8RRR.",
365 nmachhwso_3 = "100004dcRRR.",
366 macchwuo_3 = "10000518RRR.",
367 macchwo_3 = "10000558RRR.",
368 nmacchwo_3 = "1000055cRRR.",
369 macchwsuo_3 = "10000598RRR.",
370 macchwso_3 = "100005d8RRR.",
371 nmacchwso_3 = "100005dcRRR.",
372 maclhwo_3 = "10000758RRR.",
373 nmaclhwo_3 = "1000075cRRR.",
374 maclhwsuo_3 = "10000798RRR.",
375 maclhwso_3 = "100007d8RRR.",
376 nmaclhwso_3 = "100007dcRRR.",
377
378 vaddubm_3 = "10000000VVV",
379 vmaxub_3 = "10000002VVV",
380 vrlb_3 = "10000004VVV",
381 vcmpequb_3 = "10000006VVV",
382 vmuloub_3 = "10000008VVV",
383 vaddfp_3 = "1000000aVVV",
384 vmrghb_3 = "1000000cVVV",
385 vpkuhum_3 = "1000000eVVV",
386 vmhaddshs_4 = "10000020VVVV",
387 vmhraddshs_4 = "10000021VVVV",
388 vmladduhm_4 = "10000022VVVV",
389 vmsumubm_4 = "10000024VVVV",
390 vmsummbm_4 = "10000025VVVV",
391 vmsumuhm_4 = "10000026VVVV",
392 vmsumuhs_4 = "10000027VVVV",
393 vmsumshm_4 = "10000028VVVV",
394 vmsumshs_4 = "10000029VVVV",
395 vsel_4 = "1000002aVVVV",
396 vperm_4 = "1000002bVVVV",
397 vsldoi_4 = "1000002cVVVP",
398 vpermxor_4 = "1000002dVVVV",
399 vmaddfp_4 = "1000002eVVVV~",
400 vnmsubfp_4 = "1000002fVVVV~",
401 vaddeuqm_4 = "1000003cVVVV",
402 vaddecuq_4 = "1000003dVVVV",
403 vsubeuqm_4 = "1000003eVVVV",
404 vsubecuq_4 = "1000003fVVVV",
405 vadduhm_3 = "10000040VVV",
406 vmaxuh_3 = "10000042VVV",
407 vrlh_3 = "10000044VVV",
408 vcmpequh_3 = "10000046VVV",
409 vmulouh_3 = "10000048VVV",
410 vsubfp_3 = "1000004aVVV",
411 vmrghh_3 = "1000004cVVV",
412 vpkuwum_3 = "1000004eVVV",
413 vadduwm_3 = "10000080VVV",
414 vmaxuw_3 = "10000082VVV",
415 vrlw_3 = "10000084VVV",
416 vcmpequw_3 = "10000086VVV",
417 vmulouw_3 = "10000088VVV",
418 vmuluwm_3 = "10000089VVV",
419 vmrghw_3 = "1000008cVVV",
420 vpkuhus_3 = "1000008eVVV",
421 vaddudm_3 = "100000c0VVV",
422 vmaxud_3 = "100000c2VVV",
423 vrld_3 = "100000c4VVV",
424 vcmpeqfp_3 = "100000c6VVV",
425 vcmpequd_3 = "100000c7VVV",
426 vpkuwus_3 = "100000ceVVV",
427 vadduqm_3 = "10000100VVV",
428 vmaxsb_3 = "10000102VVV",
429 vslb_3 = "10000104VVV",
430 vmulosb_3 = "10000108VVV",
431 vrefp_2 = "1000010aV-V",
432 vmrglb_3 = "1000010cVVV",
433 vpkshus_3 = "1000010eVVV",
434 vaddcuq_3 = "10000140VVV",
435 vmaxsh_3 = "10000142VVV",
436 vslh_3 = "10000144VVV",
437 vmulosh_3 = "10000148VVV",
438 vrsqrtefp_2 = "1000014aV-V",
439 vmrglh_3 = "1000014cVVV",
440 vpkswus_3 = "1000014eVVV",
441 vaddcuw_3 = "10000180VVV",
442 vmaxsw_3 = "10000182VVV",
443 vslw_3 = "10000184VVV",
444 vmulosw_3 = "10000188VVV",
445 vexptefp_2 = "1000018aV-V",
446 vmrglw_3 = "1000018cVVV",
447 vpkshss_3 = "1000018eVVV",
448 vmaxsd_3 = "100001c2VVV",
449 vsl_3 = "100001c4VVV",
450 vcmpgefp_3 = "100001c6VVV",
451 vlogefp_2 = "100001caV-V",
452 vpkswss_3 = "100001ceVVV",
453 vadduhs_3 = "10000240VVV",
454 vminuh_3 = "10000242VVV",
455 vsrh_3 = "10000244VVV",
456 vcmpgtuh_3 = "10000246VVV",
457 vmuleuh_3 = "10000248VVV",
458 vrfiz_2 = "1000024aV-V",
459 vsplth_3 = "1000024cVV3",
460 vupkhsh_2 = "1000024eV-V",
461 vminuw_3 = "10000282VVV",
462 vminud_3 = "100002c2VVV",
463 vcmpgtud_3 = "100002c7VVV",
464 vrfim_2 = "100002caV-V",
465 vcmpgtsb_3 = "10000306VVV",
466 vcfux_3 = "1000030aVVA~",
467 vaddshs_3 = "10000340VVV",
468 vminsh_3 = "10000342VVV",
469 vsrah_3 = "10000344VVV",
470 vcmpgtsh_3 = "10000346VVV",
471 vmulesh_3 = "10000348VVV",
472 vcfsx_3 = "1000034aVVA~",
473 vspltish_2 = "1000034cVS",
474 vupkhpx_2 = "1000034eV-V",
475 vaddsws_3 = "10000380VVV",
476 vminsw_3 = "10000382VVV",
477 vsraw_3 = "10000384VVV",
478 vcmpgtsw_3 = "10000386VVV",
479 vmulesw_3 = "10000388VVV",
480 vctuxs_3 = "1000038aVVA~",
481 vspltisw_2 = "1000038cVS",
482 vminsd_3 = "100003c2VVV",
483 vsrad_3 = "100003c4VVV",
484 vcmpbfp_3 = "100003c6VVV",
485 vcmpgtsd_3 = "100003c7VVV",
486 vctsxs_3 = "100003caVVA~",
487 vupklpx_2 = "100003ceV-V",
488 vsububm_3 = "10000400VVV",
489 ["bcdadd._4"] = "10000401VVVy.",
490 vavgub_3 = "10000402VVV",
491 vand_3 = "10000404VVV",
492 ["vcmpequb._3"] = "10000406VVV",
493 vmaxfp_3 = "1000040aVVV",
494 vsubuhm_3 = "10000440VVV",
495 ["bcdsub._4"] = "10000441VVVy.",
496 vavguh_3 = "10000442VVV",
497 vandc_3 = "10000444VVV",
498 ["vcmpequh._3"] = "10000446VVV",
499 vminfp_3 = "1000044aVVV",
500 vpkudum_3 = "1000044eVVV",
501 vsubuwm_3 = "10000480VVV",
502 vavguw_3 = "10000482VVV",
503 vor_3 = "10000484VVV",
504 ["vcmpequw._3"] = "10000486VVV",
505 vpmsumw_3 = "10000488VVV",
506 ["vcmpeqfp._3"] = "100004c6VVV",
507 ["vcmpequd._3"] = "100004c7VVV",
508 vpkudus_3 = "100004ceVVV",
509 vavgsb_3 = "10000502VVV",
510 vavgsh_3 = "10000542VVV",
511 vorc_3 = "10000544VVV",
512 vbpermq_3 = "1000054cVVV",
513 vpksdus_3 = "1000054eVVV",
514 vavgsw_3 = "10000582VVV",
515 vsld_3 = "100005c4VVV",
516 ["vcmpgefp._3"] = "100005c6VVV",
517 vpksdss_3 = "100005ceVVV",
518 vsububs_3 = "10000600VVV",
519 mfvscr_1 = "10000604V--",
520 vsum4ubs_3 = "10000608VVV",
521 vsubuhs_3 = "10000640VVV",
522 mtvscr_1 = "10000644--V",
523 ["vcmpgtuh._3"] = "10000646VVV",
524 vsum4shs_3 = "10000648VVV",
525 vupkhsw_2 = "1000064eV-V",
526 vsubuws_3 = "10000680VVV",
527 vshasigmaw_4 = "10000682VVYp",
528 veqv_3 = "10000684VVV",
529 vsum2sws_3 = "10000688VVV",
530 vmrgow_3 = "1000068cVVV",
531 vshasigmad_4 = "100006c2VVYp",
532 vsrd_3 = "100006c4VVV",
533 ["vcmpgtud._3"] = "100006c7VVV",
534 vupklsw_2 = "100006ceV-V",
535 vupkslw_2 = "100006ceV-V",
536 vsubsbs_3 = "10000700VVV",
537 vclzb_2 = "10000702V-V",
538 vpopcntb_2 = "10000703V-V",
539 ["vcmpgtsb._3"] = "10000706VVV",
540 vsum4sbs_3 = "10000708VVV",
541 vsubshs_3 = "10000740VVV",
542 vclzh_2 = "10000742V-V",
543 vpopcnth_2 = "10000743V-V",
544 ["vcmpgtsh._3"] = "10000746VVV",
545 vsubsws_3 = "10000780VVV",
546 vclzw_2 = "10000782V-V",
547 vpopcntw_2 = "10000783V-V",
548 ["vcmpgtsw._3"] = "10000786VVV",
549 vsumsws_3 = "10000788VVV",
550 vmrgew_3 = "1000078cVVV",
551 vclzd_2 = "100007c2V-V",
552 vpopcntd_2 = "100007c3V-V",
553 ["vcmpbfp._3"] = "100007c6VVV",
554 ["vcmpgtsd._3"] = "100007c7VVV",
555
300 -- Primary opcode 19: 556 -- Primary opcode 19:
301 mcrf_2 = "4c000000XX", 557 mcrf_2 = "4c000000XX",
302 isync_0 = "4c00012c", 558 isync_0 = "4c00012c",
@@ -316,6 +572,8 @@ local map_op = {
316 bclrl_2 = "4c000021AA", 572 bclrl_2 = "4c000021AA",
317 bcctr_2 = "4c000420AA", 573 bcctr_2 = "4c000420AA",
318 bcctrl_2 = "4c000421AA", 574 bcctrl_2 = "4c000421AA",
575 bctar_2 = "4c000460AA",
576 bctarl_2 = "4c000461AA",
319 blr_0 = "4e800020", 577 blr_0 = "4e800020",
320 blrl_0 = "4e800021", 578 blrl_0 = "4e800021",
321 bctr_0 = "4e800420", 579 bctr_0 = "4e800420",
@@ -327,6 +585,7 @@ local map_op = {
327 cmpd_3 = "7c200000XRR", 585 cmpd_3 = "7c200000XRR",
328 cmpd_2 = "7c200000-RR", 586 cmpd_2 = "7c200000-RR",
329 tw_3 = "7c000008ARR", 587 tw_3 = "7c000008ARR",
588 lvsl_3 = "7c00000cVRR",
330 subfc_3 = "7c000010RRR.", 589 subfc_3 = "7c000010RRR.",
331 subc_3 = "7c000010RRR~.", 590 subc_3 = "7c000010RRR~.",
332 mulhdu_3 = "7c000012RRR.", 591 mulhdu_3 = "7c000012RRR.",
@@ -351,50 +610,68 @@ local map_op = {
351 cmplw_2 = "7c000040-RR", 610 cmplw_2 = "7c000040-RR",
352 cmpld_3 = "7c200040XRR", 611 cmpld_3 = "7c200040XRR",
353 cmpld_2 = "7c200040-RR", 612 cmpld_2 = "7c200040-RR",
613 lvsr_3 = "7c00004cVRR",
354 subf_3 = "7c000050RRR.", 614 subf_3 = "7c000050RRR.",
355 sub_3 = "7c000050RRR~.", 615 sub_3 = "7c000050RRR~.",
616 lbarx_3 = "7c000068RR0R",
356 ldux_3 = "7c00006aRR0R", 617 ldux_3 = "7c00006aRR0R",
357 dcbst_2 = "7c00006c-RR", 618 dcbst_2 = "7c00006c-RR",
358 lwzux_3 = "7c00006eRR0R", 619 lwzux_3 = "7c00006eRR0R",
359 cntlzd_2 = "7c000074RR~", 620 cntlzd_2 = "7c000074RR~",
360 andc_3 = "7c000078RR~R.", 621 andc_3 = "7c000078RR~R.",
361 td_3 = "7c000088ARR", 622 td_3 = "7c000088ARR",
623 lvewx_3 = "7c00008eVRR",
362 mulhd_3 = "7c000092RRR.", 624 mulhd_3 = "7c000092RRR.",
625 addg6s_3 = "7c000094RRR",
363 mulhw_3 = "7c000096RRR.", 626 mulhw_3 = "7c000096RRR.",
627 dlmzb_3 = "7c00009cRR~R.",
364 ldarx_3 = "7c0000a8RR0R", 628 ldarx_3 = "7c0000a8RR0R",
365 dcbf_2 = "7c0000ac-RR", 629 dcbf_2 = "7c0000ac-RR",
366 lbzx_3 = "7c0000aeRR0R", 630 lbzx_3 = "7c0000aeRR0R",
631 lvx_3 = "7c0000ceVRR",
367 neg_2 = "7c0000d0RR.", 632 neg_2 = "7c0000d0RR.",
633 lharx_3 = "7c0000e8RR0R",
368 lbzux_3 = "7c0000eeRR0R", 634 lbzux_3 = "7c0000eeRR0R",
369 popcntb_2 = "7c0000f4RR~", 635 popcntb_2 = "7c0000f4RR~",
370 not_2 = "7c0000f8RR~%.", 636 not_2 = "7c0000f8RR~%.",
371 nor_3 = "7c0000f8RR~R.", 637 nor_3 = "7c0000f8RR~R.",
638 stvebx_3 = "7c00010eVRR",
372 subfe_3 = "7c000110RRR.", 639 subfe_3 = "7c000110RRR.",
373 sube_3 = "7c000110RRR~.", 640 sube_3 = "7c000110RRR~.",
374 adde_3 = "7c000114RRR.", 641 adde_3 = "7c000114RRR.",
375 stdx_3 = "7c00012aRR0R", 642 stdx_3 = "7c00012aRR0R",
376 stwcx_3 = "7c00012cRR0R.", 643 ["stwcx._3"] = "7c00012dRR0R.",
377 stwx_3 = "7c00012eRR0R", 644 stwx_3 = "7c00012eRR0R",
378 prtyw_2 = "7c000134RR~", 645 prtyw_2 = "7c000134RR~",
646 stvehx_3 = "7c00014eVRR",
379 stdux_3 = "7c00016aRR0R", 647 stdux_3 = "7c00016aRR0R",
648 ["stqcx._3"] = "7c00016dR:R0R.",
380 stwux_3 = "7c00016eRR0R", 649 stwux_3 = "7c00016eRR0R",
381 prtyd_2 = "7c000174RR~", 650 prtyd_2 = "7c000174RR~",
651 stvewx_3 = "7c00018eVRR",
382 subfze_2 = "7c000190RR.", 652 subfze_2 = "7c000190RR.",
383 addze_2 = "7c000194RR.", 653 addze_2 = "7c000194RR.",
384 stdcx_3 = "7c0001acRR0R.", 654 ["stdcx._3"] = "7c0001adRR0R.",
385 stbx_3 = "7c0001aeRR0R", 655 stbx_3 = "7c0001aeRR0R",
656 stvx_3 = "7c0001ceVRR",
386 subfme_2 = "7c0001d0RR.", 657 subfme_2 = "7c0001d0RR.",
387 mulld_3 = "7c0001d2RRR.", 658 mulld_3 = "7c0001d2RRR.",
388 addme_2 = "7c0001d4RR.", 659 addme_2 = "7c0001d4RR.",
389 mullw_3 = "7c0001d6RRR.", 660 mullw_3 = "7c0001d6RRR.",
390 dcbtst_2 = "7c0001ec-RR", 661 dcbtst_2 = "7c0001ec-RR",
391 stbux_3 = "7c0001eeRR0R", 662 stbux_3 = "7c0001eeRR0R",
663 bpermd_3 = "7c0001f8RR~R",
664 lvepxl_3 = "7c00020eVRR",
392 add_3 = "7c000214RRR.", 665 add_3 = "7c000214RRR.",
666 lqarx_3 = "7c000228R:R0R",
393 dcbt_2 = "7c00022c-RR", 667 dcbt_2 = "7c00022c-RR",
394 lhzx_3 = "7c00022eRR0R", 668 lhzx_3 = "7c00022eRR0R",
669 cdtbcd_2 = "7c000234RR~",
395 eqv_3 = "7c000238RR~R.", 670 eqv_3 = "7c000238RR~R.",
671 lvepx_3 = "7c00024eVRR",
396 eciwx_3 = "7c00026cRR0R", 672 eciwx_3 = "7c00026cRR0R",
397 lhzux_3 = "7c00026eRR0R", 673 lhzux_3 = "7c00026eRR0R",
674 cbcdtd_2 = "7c000274RR~",
398 xor_3 = "7c000278RR~R.", 675 xor_3 = "7c000278RR~R.",
399 mfspefscr_1 = "7c0082a6R", 676 mfspefscr_1 = "7c0082a6R",
400 mfxer_1 = "7c0102a6R", 677 mfxer_1 = "7c0102a6R",
@@ -404,8 +681,12 @@ local map_op = {
404 lhax_3 = "7c0002aeRR0R", 681 lhax_3 = "7c0002aeRR0R",
405 mftb_1 = "7c0c42e6R", 682 mftb_1 = "7c0c42e6R",
406 mftbu_1 = "7c0d42e6R", 683 mftbu_1 = "7c0d42e6R",
684 lvxl_3 = "7c0002ceVRR",
407 lwaux_3 = "7c0002eaRR0R", 685 lwaux_3 = "7c0002eaRR0R",
408 lhaux_3 = "7c0002eeRR0R", 686 lhaux_3 = "7c0002eeRR0R",
687 popcntw_2 = "7c0002f4RR~",
688 divdeu_3 = "7c000312RRR.",
689 divweu_3 = "7c000316RRR.",
409 sthx_3 = "7c00032eRR0R", 690 sthx_3 = "7c00032eRR0R",
410 orc_3 = "7c000338RR~R.", 691 orc_3 = "7c000338RR~R.",
411 ecowx_3 = "7c00036cRR0R", 692 ecowx_3 = "7c00036cRR0R",
@@ -420,10 +701,14 @@ local map_op = {
420 mtctr_1 = "7c0903a6R", 701 mtctr_1 = "7c0903a6R",
421 dcbi_2 = "7c0003ac-RR", 702 dcbi_2 = "7c0003ac-RR",
422 nand_3 = "7c0003b8RR~R.", 703 nand_3 = "7c0003b8RR~R.",
704 dsn_2 = "7c0003c6-RR",
705 stvxl_3 = "7c0003ceVRR",
423 divd_3 = "7c0003d2RRR.", 706 divd_3 = "7c0003d2RRR.",
424 divw_3 = "7c0003d6RRR.", 707 divw_3 = "7c0003d6RRR.",
708 popcntd_2 = "7c0003f4RR~",
425 cmpb_3 = "7c0003f8RR~R.", 709 cmpb_3 = "7c0003f8RR~R.",
426 mcrxr_1 = "7c000400X", 710 mcrxr_1 = "7c000400X",
711 lbdx_3 = "7c000406RRR",
427 subfco_3 = "7c000410RRR.", 712 subfco_3 = "7c000410RRR.",
428 subco_3 = "7c000410RRR~.", 713 subco_3 = "7c000410RRR~.",
429 addco_3 = "7c000414RRR.", 714 addco_3 = "7c000414RRR.",
@@ -433,16 +718,20 @@ local map_op = {
433 lfsx_3 = "7c00042eFR0R", 718 lfsx_3 = "7c00042eFR0R",
434 srw_3 = "7c000430RR~R.", 719 srw_3 = "7c000430RR~R.",
435 srd_3 = "7c000436RR~R.", 720 srd_3 = "7c000436RR~R.",
721 lhdx_3 = "7c000446RRR",
436 subfo_3 = "7c000450RRR.", 722 subfo_3 = "7c000450RRR.",
437 subo_3 = "7c000450RRR~.", 723 subo_3 = "7c000450RRR~.",
438 lfsux_3 = "7c00046eFR0R", 724 lfsux_3 = "7c00046eFR0R",
725 lwdx_3 = "7c000486RRR",
439 lswi_3 = "7c0004aaRR0A", 726 lswi_3 = "7c0004aaRR0A",
440 sync_0 = "7c0004ac", 727 sync_0 = "7c0004ac",
441 lwsync_0 = "7c2004ac", 728 lwsync_0 = "7c2004ac",
442 ptesync_0 = "7c4004ac", 729 ptesync_0 = "7c4004ac",
443 lfdx_3 = "7c0004aeFR0R", 730 lfdx_3 = "7c0004aeFR0R",
731 lddx_3 = "7c0004c6RRR",
444 nego_2 = "7c0004d0RR.", 732 nego_2 = "7c0004d0RR.",
445 lfdux_3 = "7c0004eeFR0R", 733 lfdux_3 = "7c0004eeFR0R",
734 stbdx_3 = "7c000506RRR",
446 subfeo_3 = "7c000510RRR.", 735 subfeo_3 = "7c000510RRR.",
447 subeo_3 = "7c000510RRR~.", 736 subeo_3 = "7c000510RRR~.",
448 addeo_3 = "7c000514RRR.", 737 addeo_3 = "7c000514RRR.",
@@ -450,27 +739,42 @@ local map_op = {
450 stswx_3 = "7c00052aRR0R", 739 stswx_3 = "7c00052aRR0R",
451 stwbrx_3 = "7c00052cRR0R", 740 stwbrx_3 = "7c00052cRR0R",
452 stfsx_3 = "7c00052eFR0R", 741 stfsx_3 = "7c00052eFR0R",
742 sthdx_3 = "7c000546RRR",
743 ["stbcx._3"] = "7c00056dRRR",
453 stfsux_3 = "7c00056eFR0R", 744 stfsux_3 = "7c00056eFR0R",
745 stwdx_3 = "7c000586RRR",
454 subfzeo_2 = "7c000590RR.", 746 subfzeo_2 = "7c000590RR.",
455 addzeo_2 = "7c000594RR.", 747 addzeo_2 = "7c000594RR.",
456 stswi_3 = "7c0005aaRR0A", 748 stswi_3 = "7c0005aaRR0A",
749 ["sthcx._3"] = "7c0005adRRR",
457 stfdx_3 = "7c0005aeFR0R", 750 stfdx_3 = "7c0005aeFR0R",
751 stddx_3 = "7c0005c6RRR",
458 subfmeo_2 = "7c0005d0RR.", 752 subfmeo_2 = "7c0005d0RR.",
459 mulldo_3 = "7c0005d2RRR.", 753 mulldo_3 = "7c0005d2RRR.",
460 addmeo_2 = "7c0005d4RR.", 754 addmeo_2 = "7c0005d4RR.",
461 mullwo_3 = "7c0005d6RRR.", 755 mullwo_3 = "7c0005d6RRR.",
462 dcba_2 = "7c0005ec-RR", 756 dcba_2 = "7c0005ec-RR",
463 stfdux_3 = "7c0005eeFR0R", 757 stfdux_3 = "7c0005eeFR0R",
758 stvepxl_3 = "7c00060eVRR",
464 addo_3 = "7c000614RRR.", 759 addo_3 = "7c000614RRR.",
465 lhbrx_3 = "7c00062cRR0R", 760 lhbrx_3 = "7c00062cRR0R",
761 lfdpx_3 = "7c00062eF:RR",
466 sraw_3 = "7c000630RR~R.", 762 sraw_3 = "7c000630RR~R.",
467 srad_3 = "7c000634RR~R.", 763 srad_3 = "7c000634RR~R.",
764 lfddx_3 = "7c000646FRR",
765 stvepx_3 = "7c00064eVRR",
468 srawi_3 = "7c000670RR~A.", 766 srawi_3 = "7c000670RR~A.",
469 sradi_3 = "7c000674RR~H.", 767 sradi_3 = "7c000674RR~H.",
470 eieio_0 = "7c0006ac", 768 eieio_0 = "7c0006ac",
471 lfiwax_3 = "7c0006aeFR0R", 769 lfiwax_3 = "7c0006aeFR0R",
770 divdeuo_3 = "7c000712RRR.",
771 divweuo_3 = "7c000716RRR.",
472 sthbrx_3 = "7c00072cRR0R", 772 sthbrx_3 = "7c00072cRR0R",
773 stfdpx_3 = "7c00072eF:RR",
473 extsh_2 = "7c000734RR~.", 774 extsh_2 = "7c000734RR~.",
775 stfddx_3 = "7c000746FRR",
776 divdeo_3 = "7c000752RRR.",
777 divweo_3 = "7c000756RRR.",
474 extsb_2 = "7c000774RR~.", 778 extsb_2 = "7c000774RR~.",
475 divduo_3 = "7c000792RRR.", 779 divduo_3 = "7c000792RRR.",
476 divwou_3 = "7c000796RRR.", 780 divwou_3 = "7c000796RRR.",
@@ -481,6 +785,40 @@ local map_op = {
481 divwo_3 = "7c0007d6RRR.", 785 divwo_3 = "7c0007d6RRR.",
482 dcbz_2 = "7c0007ec-RR", 786 dcbz_2 = "7c0007ec-RR",
483 787
788 ["tbegin._1"] = "7c00051d1",
789 ["tbegin._0"] = "7c00051d",
790 ["tend._1"] = "7c00055dY",
791 ["tend._0"] = "7c00055d",
792 ["tendall._0"] = "7e00055d",
793 tcheck_1 = "7c00059cX",
794 ["tsr._1"] = "7c0005dd1",
795 ["tsuspend._0"] = "7c0005dd",
796 ["tresume._0"] = "7c2005dd",
797 ["tabortwc._3"] = "7c00061dARR",
798 ["tabortdc._3"] = "7c00065dARR",
799 ["tabortwci._3"] = "7c00069dARS",
800 ["tabortdci._3"] = "7c0006ddARS",
801 ["tabort._1"] = "7c00071d-R-",
802 ["treclaim._1"] = "7c00075d-R",
803 ["trechkpt._0"] = "7c0007dd",
804
805 lxsiwzx_3 = "7c000018QRR",
806 lxsiwax_3 = "7c000098QRR",
807 mfvsrd_2 = "7c000066-Rq",
808 mfvsrwz_2 = "7c0000e6-Rq",
809 stxsiwx_3 = "7c000118QRR",
810 mtvsrd_2 = "7c000166QR",
811 mtvsrwa_2 = "7c0001a6QR",
812 lxvdsx_3 = "7c000298QRR",
813 lxsspx_3 = "7c000418QRR",
814 lxsdx_3 = "7c000498QRR",
815 stxsspx_3 = "7c000518QRR",
816 stxsdx_3 = "7c000598QRR",
817 lxvw4x_3 = "7c000618QRR",
818 lxvd2x_3 = "7c000698QRR",
819 stxvw4x_3 = "7c000718QRR",
820 stxvd2x_3 = "7c000798QRR",
821
484 -- Primary opcode 30: 822 -- Primary opcode 30:
485 rldicl_4 = "78000000RR~HM.", 823 rldicl_4 = "78000000RR~HM.",
486 rldicr_4 = "78000004RR~HM.", 824 rldicr_4 = "78000004RR~HM.",
@@ -489,6 +827,34 @@ local map_op = {
489 rldcl_4 = "78000010RR~RM.", 827 rldcl_4 = "78000010RR~RM.",
490 rldcr_4 = "78000012RR~RM.", 828 rldcr_4 = "78000012RR~RM.",
491 829
830 rotldi_3 = op_alias("rldicl_4", function(p)
831 p[4] = "0"
832 end),
833 rotrdi_3 = op_alias("rldicl_4", function(p)
834 p[3] = "64-("..p[3]..")"; p[4] = "0"
835 end),
836 rotld_3 = op_alias("rldcl_4", function(p)
837 p[4] = "0"
838 end),
839 sldi_3 = op_alias("rldicr_4", function(p)
840 p[4] = "63-("..p[3]..")"
841 end),
842 srdi_3 = op_alias("rldicl_4", function(p)
843 p[4] = p[3]; p[3] = "64-("..p[3]..")"
844 end),
845 clrldi_3 = op_alias("rldicl_4", function(p)
846 p[4] = p[3]; p[3] = "0"
847 end),
848 clrrdi_3 = op_alias("rldicr_4", function(p)
849 p[4] = "63-("..p[3]..")"; p[3] = "0"
850 end),
851
852 -- Primary opcode 56:
853 lq_2 = "e0000000R:D", -- NYI: displacement must be divisible by 8.
854
855 -- Primary opcode 57:
856 lfdp_2 = "e4000000F:D", -- NYI: displacement must be divisible by 4.
857
492 -- Primary opcode 59: 858 -- Primary opcode 59:
493 fdivs_3 = "ec000024FFF.", 859 fdivs_3 = "ec000024FFF.",
494 fsubs_3 = "ec000028FFF.", 860 fsubs_3 = "ec000028FFF.",
@@ -501,6 +867,200 @@ local map_op = {
501 fmadds_4 = "ec00003aFFFF~.", 867 fmadds_4 = "ec00003aFFFF~.",
502 fnmsubs_4 = "ec00003cFFFF~.", 868 fnmsubs_4 = "ec00003cFFFF~.",
503 fnmadds_4 = "ec00003eFFFF~.", 869 fnmadds_4 = "ec00003eFFFF~.",
870 fcfids_2 = "ec00069cF-F.",
871 fcfidus_2 = "ec00079cF-F.",
872
873 dadd_3 = "ec000004FFF.",
874 dqua_4 = "ec000006FFFZ.",
875 dmul_3 = "ec000044FFF.",
876 drrnd_4 = "ec000046FFFZ.",
877 dscli_3 = "ec000084FF6.",
878 dquai_4 = "ec000086SF~FZ.",
879 dscri_3 = "ec0000c4FF6.",
880 drintx_4 = "ec0000c61F~FZ.",
881 dcmpo_3 = "ec000104XFF",
882 dtstex_3 = "ec000144XFF",
883 dtstdc_3 = "ec000184XF6",
884 dtstdg_3 = "ec0001c4XF6",
885 drintn_4 = "ec0001c61F~FZ.",
886 dctdp_2 = "ec000204F-F.",
887 dctfix_2 = "ec000244F-F.",
888 ddedpd_3 = "ec000284ZF~F.",
889 dxex_2 = "ec0002c4F-F.",
890 dsub_3 = "ec000404FFF.",
891 ddiv_3 = "ec000444FFF.",
892 dcmpu_3 = "ec000504XFF",
893 dtstsf_3 = "ec000544XFF",
894 drsp_2 = "ec000604F-F.",
895 dcffix_2 = "ec000644F-F.",
896 denbcd_3 = "ec000684YF~F.",
897 diex_3 = "ec0006c4FFF.",
898
899 -- Primary opcode 60:
900 xsaddsp_3 = "f0000000QQQ",
901 xsmaddasp_3 = "f0000008QQQ",
902 xxsldwi_4 = "f0000010QQQz",
903 xsrsqrtesp_2 = "f0000028Q-Q",
904 xssqrtsp_2 = "f000002cQ-Q",
905 xxsel_4 = "f0000030QQQQ",
906 xssubsp_3 = "f0000040QQQ",
907 xsmaddmsp_3 = "f0000048QQQ",
908 xxpermdi_4 = "f0000050QQQz",
909 xsresp_2 = "f0000068Q-Q",
910 xsmulsp_3 = "f0000080QQQ",
911 xsmsubasp_3 = "f0000088QQQ",
912 xxmrghw_3 = "f0000090QQQ",
913 xsdivsp_3 = "f00000c0QQQ",
914 xsmsubmsp_3 = "f00000c8QQQ",
915 xsadddp_3 = "f0000100QQQ",
916 xsmaddadp_3 = "f0000108QQQ",
917 xscmpudp_3 = "f0000118XQQ",
918 xscvdpuxws_2 = "f0000120Q-Q",
919 xsrdpi_2 = "f0000124Q-Q",
920 xsrsqrtedp_2 = "f0000128Q-Q",
921 xssqrtdp_2 = "f000012cQ-Q",
922 xssubdp_3 = "f0000140QQQ",
923 xsmaddmdp_3 = "f0000148QQQ",
924 xscmpodp_3 = "f0000158XQQ",
925 xscvdpsxws_2 = "f0000160Q-Q",
926 xsrdpiz_2 = "f0000164Q-Q",
927 xsredp_2 = "f0000168Q-Q",
928 xsmuldp_3 = "f0000180QQQ",
929 xsmsubadp_3 = "f0000188QQQ",
930 xxmrglw_3 = "f0000190QQQ",
931 xsrdpip_2 = "f00001a4Q-Q",
932 xstsqrtdp_2 = "f00001a8X-Q",
933 xsrdpic_2 = "f00001acQ-Q",
934 xsdivdp_3 = "f00001c0QQQ",
935 xsmsubmdp_3 = "f00001c8QQQ",
936 xsrdpim_2 = "f00001e4Q-Q",
937 xstdivdp_3 = "f00001e8XQQ",
938 xvaddsp_3 = "f0000200QQQ",
939 xvmaddasp_3 = "f0000208QQQ",
940 xvcmpeqsp_3 = "f0000218QQQ",
941 xvcvspuxws_2 = "f0000220Q-Q",
942 xvrspi_2 = "f0000224Q-Q",
943 xvrsqrtesp_2 = "f0000228Q-Q",
944 xvsqrtsp_2 = "f000022cQ-Q",
945 xvsubsp_3 = "f0000240QQQ",
946 xvmaddmsp_3 = "f0000248QQQ",
947 xvcmpgtsp_3 = "f0000258QQQ",
948 xvcvspsxws_2 = "f0000260Q-Q",
949 xvrspiz_2 = "f0000264Q-Q",
950 xvresp_2 = "f0000268Q-Q",
951 xvmulsp_3 = "f0000280QQQ",
952 xvmsubasp_3 = "f0000288QQQ",
953 xxspltw_3 = "f0000290QQg~",
954 xvcmpgesp_3 = "f0000298QQQ",
955 xvcvuxwsp_2 = "f00002a0Q-Q",
956 xvrspip_2 = "f00002a4Q-Q",
957 xvtsqrtsp_2 = "f00002a8X-Q",
958 xvrspic_2 = "f00002acQ-Q",
959 xvdivsp_3 = "f00002c0QQQ",
960 xvmsubmsp_3 = "f00002c8QQQ",
961 xvcvsxwsp_2 = "f00002e0Q-Q",
962 xvrspim_2 = "f00002e4Q-Q",
963 xvtdivsp_3 = "f00002e8XQQ",
964 xvadddp_3 = "f0000300QQQ",
965 xvmaddadp_3 = "f0000308QQQ",
966 xvcmpeqdp_3 = "f0000318QQQ",
967 xvcvdpuxws_2 = "f0000320Q-Q",
968 xvrdpi_2 = "f0000324Q-Q",
969 xvrsqrtedp_2 = "f0000328Q-Q",
970 xvsqrtdp_2 = "f000032cQ-Q",
971 xvsubdp_3 = "f0000340QQQ",
972 xvmaddmdp_3 = "f0000348QQQ",
973 xvcmpgtdp_3 = "f0000358QQQ",
974 xvcvdpsxws_2 = "f0000360Q-Q",
975 xvrdpiz_2 = "f0000364Q-Q",
976 xvredp_2 = "f0000368Q-Q",
977 xvmuldp_3 = "f0000380QQQ",
978 xvmsubadp_3 = "f0000388QQQ",
979 xvcmpgedp_3 = "f0000398QQQ",
980 xvcvuxwdp_2 = "f00003a0Q-Q",
981 xvrdpip_2 = "f00003a4Q-Q",
982 xvtsqrtdp_2 = "f00003a8X-Q",
983 xvrdpic_2 = "f00003acQ-Q",
984 xvdivdp_3 = "f00003c0QQQ",
985 xvmsubmdp_3 = "f00003c8QQQ",
986 xvcvsxwdp_2 = "f00003e0Q-Q",
987 xvrdpim_2 = "f00003e4Q-Q",
988 xvtdivdp_3 = "f00003e8XQQ",
989 xsnmaddasp_3 = "f0000408QQQ",
990 xxland_3 = "f0000410QQQ",
991 xscvdpsp_2 = "f0000424Q-Q",
992 xscvdpspn_2 = "f000042cQ-Q",
993 xsnmaddmsp_3 = "f0000448QQQ",
994 xxlandc_3 = "f0000450QQQ",
995 xsrsp_2 = "f0000464Q-Q",
996 xsnmsubasp_3 = "f0000488QQQ",
997 xxlor_3 = "f0000490QQQ",
998 xscvuxdsp_2 = "f00004a0Q-Q",
999 xsnmsubmsp_3 = "f00004c8QQQ",
1000 xxlxor_3 = "f00004d0QQQ",
1001 xscvsxdsp_2 = "f00004e0Q-Q",
1002 xsmaxdp_3 = "f0000500QQQ",
1003 xsnmaddadp_3 = "f0000508QQQ",
1004 xxlnor_3 = "f0000510QQQ",
1005 xscvdpuxds_2 = "f0000520Q-Q",
1006 xscvspdp_2 = "f0000524Q-Q",
1007 xscvspdpn_2 = "f000052cQ-Q",
1008 xsmindp_3 = "f0000540QQQ",
1009 xsnmaddmdp_3 = "f0000548QQQ",
1010 xxlorc_3 = "f0000550QQQ",
1011 xscvdpsxds_2 = "f0000560Q-Q",
1012 xsabsdp_2 = "f0000564Q-Q",
1013 xscpsgndp_3 = "f0000580QQQ",
1014 xsnmsubadp_3 = "f0000588QQQ",
1015 xxlnand_3 = "f0000590QQQ",
1016 xscvuxddp_2 = "f00005a0Q-Q",
1017 xsnabsdp_2 = "f00005a4Q-Q",
1018 xsnmsubmdp_3 = "f00005c8QQQ",
1019 xxleqv_3 = "f00005d0QQQ",
1020 xscvsxddp_2 = "f00005e0Q-Q",
1021 xsnegdp_2 = "f00005e4Q-Q",
1022 xvmaxsp_3 = "f0000600QQQ",
1023 xvnmaddasp_3 = "f0000608QQQ",
1024 ["xvcmpeqsp._3"] = "f0000618QQQ",
1025 xvcvspuxds_2 = "f0000620Q-Q",
1026 xvcvdpsp_2 = "f0000624Q-Q",
1027 xvminsp_3 = "f0000640QQQ",
1028 xvnmaddmsp_3 = "f0000648QQQ",
1029 ["xvcmpgtsp._3"] = "f0000658QQQ",
1030 xvcvspsxds_2 = "f0000660Q-Q",
1031 xvabssp_2 = "f0000664Q-Q",
1032 xvcpsgnsp_3 = "f0000680QQQ",
1033 xvnmsubasp_3 = "f0000688QQQ",
1034 ["xvcmpgesp._3"] = "f0000698QQQ",
1035 xvcvuxdsp_2 = "f00006a0Q-Q",
1036 xvnabssp_2 = "f00006a4Q-Q",
1037 xvnmsubmsp_3 = "f00006c8QQQ",
1038 xvcvsxdsp_2 = "f00006e0Q-Q",
1039 xvnegsp_2 = "f00006e4Q-Q",
1040 xvmaxdp_3 = "f0000700QQQ",
1041 xvnmaddadp_3 = "f0000708QQQ",
1042 ["xvcmpeqdp._3"] = "f0000718QQQ",
1043 xvcvdpuxds_2 = "f0000720Q-Q",
1044 xvcvspdp_2 = "f0000724Q-Q",
1045 xvmindp_3 = "f0000740QQQ",
1046 xvnmaddmdp_3 = "f0000748QQQ",
1047 ["xvcmpgtdp._3"] = "f0000758QQQ",
1048 xvcvdpsxds_2 = "f0000760Q-Q",
1049 xvabsdp_2 = "f0000764Q-Q",
1050 xvcpsgndp_3 = "f0000780QQQ",
1051 xvnmsubadp_3 = "f0000788QQQ",
1052 ["xvcmpgedp._3"] = "f0000798QQQ",
1053 xvcvuxddp_2 = "f00007a0Q-Q",
1054 xvnabsdp_2 = "f00007a4Q-Q",
1055 xvnmsubmdp_3 = "f00007c8QQQ",
1056 xvcvsxddp_2 = "f00007e0Q-Q",
1057 xvnegdp_2 = "f00007e4Q-Q",
1058
1059 -- Primary opcode 61:
1060 stfdp_2 = "f4000000F:D", -- NYI: displacement must be divisible by 4.
1061
1062 -- Primary opcode 62:
1063 stq_2 = "f8000002R:D", -- NYI: displacement must be divisible by 8.
504 1064
505 -- Primary opcode 63: 1065 -- Primary opcode 63:
506 fdiv_3 = "fc000024FFF.", 1066 fdiv_3 = "fc000024FFF.",
@@ -526,8 +1086,12 @@ local map_op = {
526 frsp_2 = "fc000018F-F.", 1086 frsp_2 = "fc000018F-F.",
527 fctiw_2 = "fc00001cF-F.", 1087 fctiw_2 = "fc00001cF-F.",
528 fctiwz_2 = "fc00001eF-F.", 1088 fctiwz_2 = "fc00001eF-F.",
1089 ftdiv_2 = "fc000100X-F.",
1090 fctiwu_2 = "fc00011cF-F.",
1091 fctiwuz_2 = "fc00011eF-F.",
529 mtfsfi_2 = "fc00010cAA", -- NYI: upshift. 1092 mtfsfi_2 = "fc00010cAA", -- NYI: upshift.
530 fnabs_2 = "fc000110F-F.", 1093 fnabs_2 = "fc000110F-F.",
1094 ftsqrt_2 = "fc000140X-F.",
531 fabs_2 = "fc000210F-F.", 1095 fabs_2 = "fc000210F-F.",
532 frin_2 = "fc000310F-F.", 1096 frin_2 = "fc000310F-F.",
533 friz_2 = "fc000350F-F.", 1097 friz_2 = "fc000350F-F.",
@@ -537,7 +1101,38 @@ local map_op = {
537 -- NYI: mtfsf, mtfsb0, mtfsb1. 1101 -- NYI: mtfsf, mtfsb0, mtfsb1.
538 fctid_2 = "fc00065cF-F.", 1102 fctid_2 = "fc00065cF-F.",
539 fctidz_2 = "fc00065eF-F.", 1103 fctidz_2 = "fc00065eF-F.",
1104 fmrgow_3 = "fc00068cFFF",
540 fcfid_2 = "fc00069cF-F.", 1105 fcfid_2 = "fc00069cF-F.",
1106 fctidu_2 = "fc00075cF-F.",
1107 fctiduz_2 = "fc00075eF-F.",
1108 fmrgew_3 = "fc00078cFFF",
1109 fcfidu_2 = "fc00079cF-F.",
1110
1111 daddq_3 = "fc000004F:F:F:.",
1112 dquaq_4 = "fc000006F:F:F:Z.",
1113 dmulq_3 = "fc000044F:F:F:.",
1114 drrndq_4 = "fc000046F:F:F:Z.",
1115 dscliq_3 = "fc000084F:F:6.",
1116 dquaiq_4 = "fc000086SF:~F:Z.",
1117 dscriq_3 = "fc0000c4F:F:6.",
1118 drintxq_4 = "fc0000c61F:~F:Z.",
1119 dcmpoq_3 = "fc000104XF:F:",
1120 dtstexq_3 = "fc000144XF:F:",
1121 dtstdcq_3 = "fc000184XF:6",
1122 dtstdgq_3 = "fc0001c4XF:6",
1123 drintnq_4 = "fc0001c61F:~F:Z.",
1124 dctqpq_2 = "fc000204F:-F:.",
1125 dctfixq_2 = "fc000244F:-F:.",
1126 ddedpdq_3 = "fc000284ZF:~F:.",
1127 dxexq_2 = "fc0002c4F:-F:.",
1128 dsubq_3 = "fc000404F:F:F:.",
1129 ddivq_3 = "fc000444F:F:F:.",
1130 dcmpuq_3 = "fc000504XF:F:",
1131 dtstsfq_3 = "fc000544XF:F:",
1132 drdpq_2 = "fc000604F:-F:.",
1133 dcffixq_2 = "fc000644F:-F:.",
1134 denbcdq_3 = "fc000684YF:~F:.",
1135 diexq_3 = "fc0006c4F:FF:.",
541 1136
542 -- Primary opcode 4, SPE APU extension: 1137 -- Primary opcode 4, SPE APU extension:
543 evaddw_3 = "10000200RRR", 1138 evaddw_3 = "10000200RRR",
@@ -822,7 +1417,7 @@ local map_op = {
822do 1417do
823 local t = {} 1418 local t = {}
824 for k,v in pairs(map_op) do 1419 for k,v in pairs(map_op) do
825 if sub(v, -1) == "." then 1420 if type(v) == "string" and sub(v, -1) == "." then
826 local v2 = sub(v, 1, 7)..char(byte(v, 8)+1)..sub(v, 9, -2) 1421 local v2 = sub(v, 1, 7)..char(byte(v, 8)+1)..sub(v, 9, -2)
827 t[sub(k, 1, -3).."."..sub(k, -2)] = v2 1422 t[sub(k, 1, -3).."."..sub(k, -2)] = v2
828 end 1423 end
@@ -884,6 +1479,24 @@ local function parse_fpr(expr)
884 werror("bad register name `"..expr.."'") 1479 werror("bad register name `"..expr.."'")
885end 1480end
886 1481
1482local function parse_vr(expr)
1483 local r = match(expr, "^v([1-3]?[0-9])$")
1484 if r then
1485 r = tonumber(r)
1486 if r <= 31 then return r end
1487 end
1488 werror("bad register name `"..expr.."'")
1489end
1490
1491local function parse_vs(expr)
1492 local r = match(expr, "^vs([1-6]?[0-9])$")
1493 if r then
1494 r = tonumber(r)
1495 if r <= 63 then return r end
1496 end
1497 werror("bad register name `"..expr.."'")
1498end
1499
887local function parse_cr(expr) 1500local function parse_cr(expr)
888 local r = match(expr, "^cr([0-7])$") 1501 local r = match(expr, "^cr([0-7])$")
889 if r then return tonumber(r) end 1502 if r then return tonumber(r) end
@@ -900,8 +1513,30 @@ local function parse_cond(expr)
900 werror("bad condition bit name `"..expr.."'") 1513 werror("bad condition bit name `"..expr.."'")
901end 1514end
902 1515
1516local parse_ctx = {}
1517
1518local loadenv = setfenv and function(s)
1519 local code = loadstring(s, "")
1520 if code then setfenv(code, parse_ctx) end
1521 return code
1522end or function(s)
1523 return load(s, "", nil, parse_ctx)
1524end
1525
1526-- Try to parse simple arithmetic, too, since some basic ops are aliases.
1527local function parse_number(n)
1528 local x = tonumber(n)
1529 if x then return x end
1530 local code = loadenv("return "..n)
1531 if code then
1532 local ok, y = pcall(code)
1533 if ok then return y end
1534 end
1535 return nil
1536end
1537
903local function parse_imm(imm, bits, shift, scale, signed) 1538local function parse_imm(imm, bits, shift, scale, signed)
904 local n = tonumber(imm) 1539 local n = parse_number(imm)
905 if n then 1540 if n then
906 local m = sar(n, scale) 1541 local m = sar(n, scale)
907 if shl(m, scale) == n then 1542 if shl(m, scale) == n then
@@ -914,7 +1549,8 @@ local function parse_imm(imm, bits, shift, scale, signed)
914 end 1549 end
915 end 1550 end
916 werror("out of range immediate `"..imm.."'") 1551 werror("out of range immediate `"..imm.."'")
917 elseif match(imm, "^r([1-3]?[0-9])$") or 1552 elseif match(imm, "^[rfv]([1-3]?[0-9])$") or
1553 match(imm, "^vs([1-6]?[0-9])$") or
918 match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then 1554 match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then
919 werror("expected immediate operand, got register") 1555 werror("expected immediate operand, got register")
920 else 1556 else
@@ -924,11 +1560,11 @@ local function parse_imm(imm, bits, shift, scale, signed)
924end 1560end
925 1561
926local function parse_shiftmask(imm, isshift) 1562local function parse_shiftmask(imm, isshift)
927 local n = tonumber(imm) 1563 local n = parse_number(imm)
928 if n then 1564 if n then
929 if shr(n, 6) == 0 then 1565 if shr(n, 6) == 0 then
930 local lsb = band(imm, 31) 1566 local lsb = band(n, 31)
931 local msb = imm - lsb 1567 local msb = n - lsb
932 return isshift and (shl(lsb, 11)+shr(msb, 4)) or (shl(lsb, 6)+msb) 1568 return isshift and (shl(lsb, 11)+shr(msb, 4)) or (shl(lsb, 6)+msb)
933 end 1569 end
934 werror("out of range immediate `"..imm.."'") 1570 werror("out of range immediate `"..imm.."'")
@@ -936,7 +1572,8 @@ local function parse_shiftmask(imm, isshift)
936 match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then 1572 match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then
937 werror("expected immediate operand, got register") 1573 werror("expected immediate operand, got register")
938 else 1574 else
939 werror("NYI: parameterized 64 bit shift/mask") 1575 waction("IMMSH", isshift and 1 or 0, imm)
1576 return 0;
940 end 1577 end
941end 1578end
942 1579
@@ -1011,7 +1648,7 @@ end
1011------------------------------------------------------------------------------ 1648------------------------------------------------------------------------------
1012 1649
1013-- Handle opcodes defined with template strings. 1650-- Handle opcodes defined with template strings.
1014map_op[".template__"] = function(params, template, nparams) 1651op_template = function(params, template, nparams)
1015 if not params then return sub(template, 9) end 1652 if not params then return sub(template, 9) end
1016 local op = tonumber(sub(template, 1, 8), 16) 1653 local op = tonumber(sub(template, 1, 8), 16)
1017 local n, rs = 1, 26 1654 local n, rs = 1, 26
@@ -1027,6 +1664,15 @@ map_op[".template__"] = function(params, template, nparams)
1027 rs = rs - 5; op = op + shl(parse_gpr(params[n]), rs); n = n + 1 1664 rs = rs - 5; op = op + shl(parse_gpr(params[n]), rs); n = n + 1
1028 elseif p == "F" then 1665 elseif p == "F" then
1029 rs = rs - 5; op = op + shl(parse_fpr(params[n]), rs); n = n + 1 1666 rs = rs - 5; op = op + shl(parse_fpr(params[n]), rs); n = n + 1
1667 elseif p == "V" then
1668 rs = rs - 5; op = op + shl(parse_vr(params[n]), rs); n = n + 1
1669 elseif p == "Q" then
1670 local vs = parse_vs(params[n]); n = n + 1; rs = rs - 5
1671 local sh = rs == 6 and 2 or 3 + band(shr(rs, 1), 3)
1672 op = op + shl(band(vs, 31), rs) + shr(band(vs, 32), sh)
1673 elseif p == "q" then
1674 local vs = parse_vs(params[n]); n = n + 1
1675 op = op + shl(band(vs, 31), 21) + shr(band(vs, 32), 5)
1030 elseif p == "A" then 1676 elseif p == "A" then
1031 rs = rs - 5; op = op + parse_imm(params[n], 5, rs, 0, false); n = n + 1 1677 rs = rs - 5; op = op + parse_imm(params[n], 5, rs, 0, false); n = n + 1
1032 elseif p == "S" then 1678 elseif p == "S" then
@@ -1047,6 +1693,26 @@ map_op[".template__"] = function(params, template, nparams)
1047 rs = rs - 5; op = op + shl(parse_cond(params[n]), rs); n = n + 1 1693 rs = rs - 5; op = op + shl(parse_cond(params[n]), rs); n = n + 1
1048 elseif p == "X" then 1694 elseif p == "X" then
1049 rs = rs - 5; op = op + shl(parse_cr(params[n]), rs+2); n = n + 1 1695 rs = rs - 5; op = op + shl(parse_cr(params[n]), rs+2); n = n + 1
1696 elseif p == "1" then
1697 rs = rs - 5; op = op + parse_imm(params[n], 1, rs, 0, false); n = n + 1
1698 elseif p == "g" then
1699 rs = rs - 5; op = op + parse_imm(params[n], 2, rs, 0, false); n = n + 1
1700 elseif p == "3" then
1701 rs = rs - 5; op = op + parse_imm(params[n], 3, rs, 0, false); n = n + 1
1702 elseif p == "P" then
1703 rs = rs - 5; op = op + parse_imm(params[n], 4, rs, 0, false); n = n + 1
1704 elseif p == "p" then
1705 op = op + parse_imm(params[n], 4, rs, 0, false); n = n + 1
1706 elseif p == "6" then
1707 rs = rs - 6; op = op + parse_imm(params[n], 6, rs, 0, false); n = n + 1
1708 elseif p == "Y" then
1709 rs = rs - 5; op = op + parse_imm(params[n], 1, rs+4, 0, false); n = n + 1
1710 elseif p == "y" then
1711 rs = rs - 5; op = op + parse_imm(params[n], 1, rs+3, 0, false); n = n + 1
1712 elseif p == "Z" then
1713 rs = rs - 5; op = op + parse_imm(params[n], 2, rs+3, 0, false); n = n + 1
1714 elseif p == "z" then
1715 rs = rs - 5; op = op + parse_imm(params[n], 2, rs+2, 0, false); n = n + 1
1050 elseif p == "W" then 1716 elseif p == "W" then
1051 op = op + parse_cr(params[n]); n = n + 1 1717 op = op + parse_cr(params[n]); n = n + 1
1052 elseif p == "G" then 1718 elseif p == "G" then
@@ -1071,6 +1737,8 @@ map_op[".template__"] = function(params, template, nparams)
1071 local lo = band(op, mm) 1737 local lo = band(op, mm)
1072 local hi = band(op, shl(mm, 5)) 1738 local hi = band(op, shl(mm, 5))
1073 op = op - lo - hi + shl(lo, 5) + shr(hi, 5) 1739 op = op - lo - hi + shl(lo, 5) + shr(hi, 5)
1740 elseif p == ":" then
1741 if band(shr(op, rs), 1) ~= 0 then werror("register pair expected") end
1074 elseif p == "-" then 1742 elseif p == "-" then
1075 rs = rs - 5 1743 rs = rs - 5
1076 elseif p == "." then 1744 elseif p == "." then
@@ -1082,6 +1750,8 @@ map_op[".template__"] = function(params, template, nparams)
1082 wputpos(pos, op) 1750 wputpos(pos, op)
1083end 1751end
1084 1752
1753map_op[".template__"] = op_template
1754
1085------------------------------------------------------------------------------ 1755------------------------------------------------------------------------------
1086 1756
1087-- Pseudo-opcode to mark the position where the action list is to be emitted. 1757-- Pseudo-opcode to mark the position where the action list is to be emitted.
diff --git a/dynasm/dasm_proto.h b/dynasm/dasm_proto.h
index a7278e85..ba038e87 100644
--- a/dynasm/dasm_proto.h
+++ b/dynasm/dasm_proto.h
@@ -10,8 +10,8 @@
10#include <stddef.h> 10#include <stddef.h>
11#include <stdarg.h> 11#include <stdarg.h>
12 12
13#define DASM_IDENT "DynASM 1.3.0" 13#define DASM_IDENT "DynASM 1.4.0"
14#define DASM_VERSION 10300 /* 1.3.0 */ 14#define DASM_VERSION 10400 /* 1.4.0 */
15 15
16#ifndef Dst_DECL 16#ifndef Dst_DECL
17#define Dst_DECL dasm_State **Dst 17#define Dst_DECL dasm_State **Dst
diff --git a/dynasm/dasm_x86.h b/dynasm/dasm_x86.h
index 84b9d17f..edaddf54 100644
--- a/dynasm/dasm_x86.h
+++ b/dynasm/dasm_x86.h
@@ -170,7 +170,7 @@ void dasm_put(Dst_DECL, int start, ...)
170 dasm_State *D = Dst_REF; 170 dasm_State *D = Dst_REF;
171 dasm_ActList p = D->actionlist + start; 171 dasm_ActList p = D->actionlist + start;
172 dasm_Section *sec = D->section; 172 dasm_Section *sec = D->section;
173 int pos = sec->pos, ofs = sec->ofs, mrm = 4; 173 int pos = sec->pos, ofs = sec->ofs, mrm = -1;
174 int *b; 174 int *b;
175 175
176 if (pos >= sec->epos) { 176 if (pos >= sec->epos) {
@@ -193,7 +193,7 @@ void dasm_put(Dst_DECL, int start, ...)
193 b[pos++] = n; 193 b[pos++] = n;
194 switch (action) { 194 switch (action) {
195 case DASM_DISP: 195 case DASM_DISP:
196 if (n == 0) { if ((mrm&7) == 4) mrm = p[-2]; if ((mrm&7) != 5) break; } 196 if (n == 0) { if (mrm < 0) mrm = p[-2]; if ((mrm&7) != 5) break; }
197 /* fallthrough */ 197 /* fallthrough */
198 case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob; /* fallthrough */ 198 case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob; /* fallthrough */
199 case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */ 199 case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */
@@ -204,11 +204,17 @@ void dasm_put(Dst_DECL, int start, ...)
204 case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break; 204 case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break;
205 case DASM_SPACE: p++; ofs += n; break; 205 case DASM_SPACE: p++; ofs += n; break;
206 case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */ 206 case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */
207 case DASM_VREG: CK((n&-8) == 0 && (n != 4 || (*p&1) == 0), RANGE_VREG); 207 case DASM_VREG: CK((n&-16) == 0 && (n != 4 || (*p>>5) != 2), RANGE_VREG);
208 if (*p++ == 1 && *p == DASM_DISP) mrm = n; 208 if (*p < 0x40 && p[1] == DASM_DISP) mrm = n;
209 if (*p < 0x20 && (n&7) == 4) ofs++;
210 switch ((*p++ >> 3) & 3) {
211 case 3: n |= b[pos-3]; /* fallthrough */
212 case 2: n |= b[pos-2]; /* fallthrough */
213 case 1: if (n <= 7) { b[pos-1] |= 0x10; ofs--; }
214 }
209 continue; 215 continue;
210 } 216 }
211 mrm = 4; 217 mrm = -1;
212 } else { 218 } else {
213 int *pl, n; 219 int *pl, n;
214 switch (action) { 220 switch (action) {
@@ -399,7 +405,27 @@ int dasm_encode(Dst_DECL, void *buffer)
399 case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL; 405 case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL;
400 /* fallthrough */ 406 /* fallthrough */
401 case DASM_IMM_W: dasmw(n); break; 407 case DASM_IMM_W: dasmw(n); break;
402 case DASM_VREG: { int t = *p++; if (t >= 2) n<<=3; cp[-1] |= n; break; } 408 case DASM_VREG: {
409 int t = *p++;
410 unsigned char *ex = cp - (t&7);
411 if ((n & 8) && t < 0xa0) {
412 if (*ex & 0x80) ex[1] ^= 0x20 << (t>>6); else *ex ^= 1 << (t>>6);
413 n &= 7;
414 } else if (n & 0x10) {
415 if (*ex & 0x80) {
416 *ex = 0xc5; ex[1] = (ex[1] & 0x80) | ex[2]; ex += 2;
417 }
418 while (++ex < cp) ex[-1] = *ex;
419 if (mark) mark--;
420 cp--;
421 n &= 7;
422 }
423 if (t >= 0xc0) n <<= 4;
424 else if (t >= 0x40) n <<= 3;
425 else if (n == 4 && t < 0x20) { cp[-1] ^= n; *cp++ = 0x20; }
426 cp[-1] ^= n;
427 break;
428 }
403 case DASM_REL_LG: p++; if (n >= 0) goto rel_pc; 429 case DASM_REL_LG: p++; if (n >= 0) goto rel_pc;
404 b++; n = (int)(ptrdiff_t)D->globals[-n]; 430 b++; n = (int)(ptrdiff_t)D->globals[-n];
405 /* fallthrough */ 431 /* fallthrough */
diff --git a/dynasm/dasm_x86.lua b/dynasm/dasm_x86.lua
index 13aa68ff..c5c8c17b 100644
--- a/dynasm/dasm_x86.lua
+++ b/dynasm/dasm_x86.lua
@@ -11,9 +11,9 @@ local x64 = x64
11local _info = { 11local _info = {
12 arch = x64 and "x64" or "x86", 12 arch = x64 and "x64" or "x86",
13 description = "DynASM x86/x64 module", 13 description = "DynASM x86/x64 module",
14 version = "1.3.0", 14 version = "1.4.0",
15 vernum = 10300, 15 vernum = 10400,
16 release = "2011-05-05", 16 release = "2015-10-18",
17 author = "Mike Pall", 17 author = "Mike Pall",
18 license = "MIT", 18 license = "MIT",
19} 19}
@@ -27,9 +27,9 @@ local assert, unpack, setmetatable = assert, unpack or table.unpack, setmetatabl
27local _s = string 27local _s = string
28local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char 28local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
29local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub 29local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub
30local concat, sort = table.concat, table.sort 30local concat, sort, remove = table.concat, table.sort, table.remove
31local bit = bit or require("bit") 31local bit = bit or require("bit")
32local band, shl, shr = bit.band, bit.lshift, bit.rshift 32local band, bxor, shl, shr = bit.band, bit.bxor, bit.lshift, bit.rshift
33 33
34-- Inherited tables and callbacks. 34-- Inherited tables and callbacks.
35local g_opt, g_arch 35local g_opt, g_arch
@@ -41,7 +41,7 @@ local action_names = {
41 -- int arg, 1 buffer pos: 41 -- int arg, 1 buffer pos:
42 "DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB", 42 "DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB",
43 -- action arg (1 byte), int arg, 1 buffer pos (reg/num): 43 -- action arg (1 byte), int arg, 1 buffer pos (reg/num):
44 "VREG", "SPACE", -- !x64: VREG support NYI. 44 "VREG", "SPACE",
45 -- ptrdiff_t arg, 1 buffer pos (address): !x64 45 -- ptrdiff_t arg, 1 buffer pos (address): !x64
46 "SETLABEL", "REL_A", 46 "SETLABEL", "REL_A",
47 -- action arg (1 byte) or int arg, 2 buffer pos (link, offset): 47 -- action arg (1 byte) or int arg, 2 buffer pos (link, offset):
@@ -83,6 +83,21 @@ local actargs = { 0 }
83-- Current number of section buffer positions for dasm_put(). 83-- Current number of section buffer positions for dasm_put().
84local secpos = 1 84local secpos = 1
85 85
86-- VREG kind encodings, pre-shifted by 5 bits.
87local map_vreg = {
88 ["modrm.rm.m"] = 0x00,
89 ["modrm.rm.r"] = 0x20,
90 ["opcode"] = 0x20,
91 ["sib.base"] = 0x20,
92 ["sib.index"] = 0x40,
93 ["modrm.reg"] = 0x80,
94 ["vex.v"] = 0xa0,
95 ["imm.hi"] = 0xc0,
96}
97
98-- Current number of VREG actions contributing to REX/VEX shrinkage.
99local vreg_shrink_count = 0
100
86------------------------------------------------------------------------------ 101------------------------------------------------------------------------------
87 102
88-- Compute action numbers for action names. 103-- Compute action numbers for action names.
@@ -134,6 +149,21 @@ local function waction(action, a, num)
134 if a or num then secpos = secpos + (num or 1) end 149 if a or num then secpos = secpos + (num or 1) end
135end 150end
136 151
152-- Optionally add a VREG action.
153local function wvreg(kind, vreg, psz, sk, defer)
154 if not vreg then return end
155 waction("VREG", vreg)
156 local b = assert(map_vreg[kind], "bad vreg kind `"..vreg.."'")
157 if b < (sk or 0) then
158 vreg_shrink_count = vreg_shrink_count + 1
159 end
160 if not defer then
161 b = b + vreg_shrink_count * 8
162 vreg_shrink_count = 0
163 end
164 wputxb(b + (psz or 0))
165end
166
137-- Add call to embedded DynASM C code. 167-- Add call to embedded DynASM C code.
138local function wcall(func, args) 168local function wcall(func, args)
139 wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true) 169 wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true)
@@ -299,7 +329,7 @@ local function mkrmap(sz, cl, names)
299 local iname = format("@%s%x%s", sz, i, needrex and "R" or "") 329 local iname = format("@%s%x%s", sz, i, needrex and "R" or "")
300 if needrex then map_reg_needrex[iname] = true end 330 if needrex then map_reg_needrex[iname] = true end
301 local name 331 local name
302 if sz == "o" then name = format("xmm%d", i) 332 if sz == "o" or sz == "y" then name = format("%s%d", cl, i)
303 elseif sz == "f" then name = format("st%d", i) 333 elseif sz == "f" then name = format("st%d", i)
304 else name = format("r%d%s", i, sz == addrsize and "" or sz) end 334 else name = format("r%d%s", i, sz == addrsize and "" or sz) end
305 map_archdef[name] = iname 335 map_archdef[name] = iname
@@ -326,6 +356,7 @@ mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"})
326mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"}) 356mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"})
327map_reg_valid_index[map_archdef.esp] = false 357map_reg_valid_index[map_archdef.esp] = false
328if x64 then map_reg_valid_index[map_archdef.rsp] = false end 358if x64 then map_reg_valid_index[map_archdef.rsp] = false end
359if x64 then map_reg_needrex[map_archdef.Rb] = true end
329map_archdef["Ra"] = "@"..addrsize 360map_archdef["Ra"] = "@"..addrsize
330 361
331-- FP registers (internally tword sized, but use "f" as operand size). 362-- FP registers (internally tword sized, but use "f" as operand size).
@@ -334,21 +365,24 @@ mkrmap("f", "Rf")
334-- SSE registers (oword sized, but qword and dword accessible). 365-- SSE registers (oword sized, but qword and dword accessible).
335mkrmap("o", "xmm") 366mkrmap("o", "xmm")
336 367
368-- AVX registers (yword sized, but oword, qword and dword accessible).
369mkrmap("y", "ymm")
370
337-- Operand size prefixes to codes. 371-- Operand size prefixes to codes.
338local map_opsize = { 372local map_opsize = {
339 byte = "b", word = "w", dword = "d", qword = "q", oword = "o", tword = "t", 373 byte = "b", word = "w", dword = "d", qword = "q", oword = "o", yword = "y",
340 aword = addrsize, 374 tword = "t", aword = addrsize,
341} 375}
342 376
343-- Operand size code to number. 377-- Operand size code to number.
344local map_opsizenum = { 378local map_opsizenum = {
345 b = 1, w = 2, d = 4, q = 8, o = 16, t = 10, 379 b = 1, w = 2, d = 4, q = 8, o = 16, y = 32, t = 10,
346} 380}
347 381
348-- Operand size code to name. 382-- Operand size code to name.
349local map_opsizename = { 383local map_opsizename = {
350 b = "byte", w = "word", d = "dword", q = "qword", o = "oword", t = "tword", 384 b = "byte", w = "word", d = "dword", q = "qword", o = "oword", y = "yword",
351 f = "fpword", 385 t = "tword", f = "fpword",
352} 386}
353 387
354-- Valid index register scale factors. 388-- Valid index register scale factors.
@@ -460,9 +494,45 @@ local function wputszarg(sz, n)
460end 494end
461 495
462-- Put multi-byte opcode with operand-size dependent modifications. 496-- Put multi-byte opcode with operand-size dependent modifications.
463local function wputop(sz, op, rex) 497local function wputop(sz, op, rex, vex, vregr, vregxb)
498 local psz, sk = 0, nil
499 if vex then
500 local tail
501 if vex.m == 1 and band(rex, 11) == 0 then
502 if x64 and vregxb then
503 sk = map_vreg["modrm.reg"]
504 else
505 wputb(0xc5)
506 tail = shl(bxor(band(rex, 4), 4), 5)
507 psz = 3
508 end
509 end
510 if not tail then
511 wputb(0xc4)
512 wputb(shl(bxor(band(rex, 7), 7), 5) + vex.m)
513 tail = shl(band(rex, 8), 4)
514 psz = 4
515 end
516 local reg, vreg = 0, nil
517 if vex.v then
518 reg = vex.v.reg
519 if not reg then werror("bad vex operand") end
520 if reg < 0 then reg = 0; vreg = vex.v.vreg end
521 end
522 if sz == "y" or vex.l then tail = tail + 4 end
523 wputb(tail + shl(bxor(reg, 15), 3) + vex.p)
524 wvreg("vex.v", vreg)
525 rex = 0
526 if op >= 256 then werror("bad vex opcode") end
527 else
528 if rex ~= 0 then
529 if not x64 then werror("bad operand size") end
530 elseif (vregr or vregxb) and x64 then
531 rex = 0x10
532 sk = map_vreg["vex.v"]
533 end
534 end
464 local r 535 local r
465 if rex ~= 0 and not x64 then werror("bad operand size") end
466 if sz == "w" then wputb(102) end 536 if sz == "w" then wputb(102) end
467 -- Needs >32 bit numbers, but only for crc32 eax, word [ebx] 537 -- Needs >32 bit numbers, but only for crc32 eax, word [ebx]
468 if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end 538 if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end
@@ -471,20 +541,20 @@ local function wputop(sz, op, rex)
471 if rex ~= 0 then 541 if rex ~= 0 then
472 local opc3 = band(op, 0xffff00) 542 local opc3 = band(op, 0xffff00)
473 if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then 543 if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then
474 wputb(64 + band(rex, 15)); rex = 0 544 wputb(64 + band(rex, 15)); rex = 0; psz = 2
475 end 545 end
476 end 546 end
477 wputb(shr(op, 16)); op = band(op, 0xffff) 547 wputb(shr(op, 16)); op = band(op, 0xffff); psz = psz + 1
478 end 548 end
479 if op >= 256 then 549 if op >= 256 then
480 local b = shr(op, 8) 550 local b = shr(op, 8)
481 if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0 end 551 if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0; psz = 2 end
482 wputb(b) 552 wputb(b); op = band(op, 255); psz = psz + 1
483 op = band(op, 255)
484 end 553 end
485 if rex ~= 0 then wputb(64 + band(rex, 15)) end 554 if rex ~= 0 then wputb(64 + band(rex, 15)); psz = 2 end
486 if sz == "b" then op = op - 1 end 555 if sz == "b" then op = op - 1 end
487 wputb(op) 556 wputb(op)
557 return psz, sk
488end 558end
489 559
490-- Put ModRM or SIB formatted byte. 560-- Put ModRM or SIB formatted byte.
@@ -494,7 +564,7 @@ local function wputmodrm(m, s, rm, vs, vrm)
494end 564end
495 565
496-- Put ModRM/SIB plus optional displacement. 566-- Put ModRM/SIB plus optional displacement.
497local function wputmrmsib(t, imark, s, vsreg) 567local function wputmrmsib(t, imark, s, vsreg, psz, sk)
498 local vreg, vxreg 568 local vreg, vxreg
499 local reg, xreg = t.reg, t.xreg 569 local reg, xreg = t.reg, t.xreg
500 if reg and reg < 0 then reg = 0; vreg = t.vreg end 570 if reg and reg < 0 then reg = 0; vreg = t.vreg end
@@ -504,8 +574,8 @@ local function wputmrmsib(t, imark, s, vsreg)
504 -- Register mode. 574 -- Register mode.
505 if sub(t.mode, 1, 1) == "r" then 575 if sub(t.mode, 1, 1) == "r" then
506 wputmodrm(3, s, reg) 576 wputmodrm(3, s, reg)
507 if vsreg then waction("VREG", vsreg); wputxb(2) end 577 wvreg("modrm.reg", vsreg, psz+1, sk, vreg)
508 if vreg then waction("VREG", vreg); wputxb(0) end 578 wvreg("modrm.rm.r", vreg, psz+1, sk)
509 return 579 return
510 end 580 end
511 581
@@ -519,21 +589,22 @@ local function wputmrmsib(t, imark, s, vsreg)
519 -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp) 589 -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp)
520 wputmodrm(0, s, 4) 590 wputmodrm(0, s, 4)
521 if imark == "I" then waction("MARK") end 591 if imark == "I" then waction("MARK") end
522 if vsreg then waction("VREG", vsreg); wputxb(2) end 592 wvreg("modrm.reg", vsreg, psz+1, sk, vxreg)
523 wputmodrm(t.xsc, xreg, 5) 593 wputmodrm(t.xsc, xreg, 5)
524 if vxreg then waction("VREG", vxreg); wputxb(3) end 594 wvreg("sib.index", vxreg, psz+2, sk)
525 else 595 else
526 -- Pure 32 bit displacement. 596 -- Pure 32 bit displacement.
527 if x64 and tdisp ~= "table" then 597 if x64 and tdisp ~= "table" then
528 wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp) 598 wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp)
599 wvreg("modrm.reg", vsreg, psz+1, sk)
529 if imark == "I" then waction("MARK") end 600 if imark == "I" then waction("MARK") end
530 wputmodrm(0, 4, 5) 601 wputmodrm(0, 4, 5)
531 else 602 else
532 riprel = x64 603 riprel = x64
533 wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp) 604 wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp)
605 wvreg("modrm.reg", vsreg, psz+1, sk)
534 if imark == "I" then waction("MARK") end 606 if imark == "I" then waction("MARK") end
535 end 607 end
536 if vsreg then waction("VREG", vsreg); wputxb(2) end
537 end 608 end
538 if riprel then -- Emit rip-relative displacement. 609 if riprel then -- Emit rip-relative displacement.
539 if match("UWSiI", imark) then 610 if match("UWSiI", imark) then
@@ -561,16 +632,16 @@ local function wputmrmsib(t, imark, s, vsreg)
561 if xreg or band(reg, 7) == 4 then 632 if xreg or band(reg, 7) == 4 then
562 wputmodrm(m or 2, s, 4) -- ModRM. 633 wputmodrm(m or 2, s, 4) -- ModRM.
563 if m == nil or imark == "I" then waction("MARK") end 634 if m == nil or imark == "I" then waction("MARK") end
564 if vsreg then waction("VREG", vsreg); wputxb(2) end 635 wvreg("modrm.reg", vsreg, psz+1, sk, vxreg or vreg)
565 wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB. 636 wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB.
566 if vxreg then waction("VREG", vxreg); wputxb(3) end 637 wvreg("sib.index", vxreg, psz+2, sk, vreg)
567 if vreg then waction("VREG", vreg); wputxb(1) end 638 wvreg("sib.base", vreg, psz+2, sk)
568 else 639 else
569 wputmodrm(m or 2, s, reg) -- ModRM. 640 wputmodrm(m or 2, s, reg) -- ModRM.
570 if (imark == "I" and (m == 1 or m == 2)) or 641 if (imark == "I" and (m == 1 or m == 2)) or
571 (m == nil and (vsreg or vreg)) then waction("MARK") end 642 (m == nil and (vsreg or vreg)) then waction("MARK") end
572 if vsreg then waction("VREG", vsreg); wputxb(2) end 643 wvreg("modrm.reg", vsreg, psz+1, sk, vreg)
573 if vreg then waction("VREG", vreg); wputxb(1) end 644 wvreg("modrm.rm.m", vreg, psz+1, sk)
574 end 645 end
575 646
576 -- Put displacement. 647 -- Put displacement.
@@ -881,9 +952,16 @@ end
881-- "m"/"M" generates ModRM/SIB from the 1st/2nd operand. 952-- "m"/"M" generates ModRM/SIB from the 1st/2nd operand.
882-- The spare 3 bits are either filled with the last hex digit or 953-- The spare 3 bits are either filled with the last hex digit or
883-- the result from a previous "r"/"R". The opcode is restored. 954-- the result from a previous "r"/"R". The opcode is restored.
955-- "u" Use VEX encoding, vvvv unused.
956-- "v"/"V" Use VEX encoding, vvvv from 1st/2nd operand (the operand is
957-- removed from the list used by future characters).
958-- "w" Use VEX encoding, vvvv from 3rd operand.
959-- "L" Force VEX.L
884-- 960--
885-- All of the following characters force a flush of the opcode: 961-- All of the following characters force a flush of the opcode:
886-- "o"/"O" stores a pure 32 bit disp (offset) from the 1st/2nd operand. 962-- "o"/"O" stores a pure 32 bit disp (offset) from the 1st/2nd operand.
963-- "s" stores a 4 bit immediate from the last register operand,
964-- followed by 4 zero bits.
887-- "S" stores a signed 8 bit immediate from the last operand. 965-- "S" stores a signed 8 bit immediate from the last operand.
888-- "U" stores an unsigned 8 bit immediate from the last operand. 966-- "U" stores an unsigned 8 bit immediate from the last operand.
889-- "W" stores an unsigned 16 bit immediate from the last operand. 967-- "W" stores an unsigned 16 bit immediate from the last operand.
@@ -1226,46 +1304,14 @@ local map_op = {
1226 movups_2 = "rmo:0F10rM|mro:0F11Rm", 1304 movups_2 = "rmo:0F10rM|mro:0F11Rm",
1227 orpd_2 = "rmo:660F56rM", 1305 orpd_2 = "rmo:660F56rM",
1228 orps_2 = "rmo:0F56rM", 1306 orps_2 = "rmo:0F56rM",
1229 packssdw_2 = "rmo:660F6BrM",
1230 packsswb_2 = "rmo:660F63rM",
1231 packuswb_2 = "rmo:660F67rM",
1232 paddb_2 = "rmo:660FFCrM",
1233 paddd_2 = "rmo:660FFErM",
1234 paddq_2 = "rmo:660FD4rM",
1235 paddsb_2 = "rmo:660FECrM",
1236 paddsw_2 = "rmo:660FEDrM",
1237 paddusb_2 = "rmo:660FDCrM",
1238 paddusw_2 = "rmo:660FDDrM",
1239 paddw_2 = "rmo:660FFDrM",
1240 pand_2 = "rmo:660FDBrM",
1241 pandn_2 = "rmo:660FDFrM",
1242 pause_0 = "F390", 1307 pause_0 = "F390",
1243 pavgb_2 = "rmo:660FE0rM",
1244 pavgw_2 = "rmo:660FE3rM",
1245 pcmpeqb_2 = "rmo:660F74rM",
1246 pcmpeqd_2 = "rmo:660F76rM",
1247 pcmpeqw_2 = "rmo:660F75rM",
1248 pcmpgtb_2 = "rmo:660F64rM",
1249 pcmpgtd_2 = "rmo:660F66rM",
1250 pcmpgtw_2 = "rmo:660F65rM",
1251 pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nRmU", -- Mem op: SSE4.1 only. 1308 pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nRmU", -- Mem op: SSE4.1 only.
1252 pinsrw_3 = "rri/od:660FC4rMU|rxi/ow:", 1309 pinsrw_3 = "rri/od:660FC4rMU|rxi/ow:",
1253 pmaddwd_2 = "rmo:660FF5rM",
1254 pmaxsw_2 = "rmo:660FEErM",
1255 pmaxub_2 = "rmo:660FDErM",
1256 pminsw_2 = "rmo:660FEArM",
1257 pminub_2 = "rmo:660FDArM",
1258 pmovmskb_2 = "rr/do:660FD7rM", 1310 pmovmskb_2 = "rr/do:660FD7rM",
1259 pmulhuw_2 = "rmo:660FE4rM",
1260 pmulhw_2 = "rmo:660FE5rM",
1261 pmullw_2 = "rmo:660FD5rM",
1262 pmuludq_2 = "rmo:660FF4rM",
1263 por_2 = "rmo:660FEBrM",
1264 prefetchnta_1 = "xb:n0F180m", 1311 prefetchnta_1 = "xb:n0F180m",
1265 prefetcht0_1 = "xb:n0F181m", 1312 prefetcht0_1 = "xb:n0F181m",
1266 prefetcht1_1 = "xb:n0F182m", 1313 prefetcht1_1 = "xb:n0F182m",
1267 prefetcht2_1 = "xb:n0F183m", 1314 prefetcht2_1 = "xb:n0F183m",
1268 psadbw_2 = "rmo:660FF6rM",
1269 pshufd_3 = "rmio:660F70rMU", 1315 pshufd_3 = "rmio:660F70rMU",
1270 pshufhw_3 = "rmio:F30F70rMU", 1316 pshufhw_3 = "rmio:F30F70rMU",
1271 pshuflw_3 = "rmio:F20F70rMU", 1317 pshuflw_3 = "rmio:F20F70rMU",
@@ -1279,23 +1325,6 @@ local map_op = {
1279 psrldq_2 = "rio:660F733mU", 1325 psrldq_2 = "rio:660F733mU",
1280 psrlq_2 = "rmo:660FD3rM|rio:660F732mU", 1326 psrlq_2 = "rmo:660FD3rM|rio:660F732mU",
1281 psrlw_2 = "rmo:660FD1rM|rio:660F712mU", 1327 psrlw_2 = "rmo:660FD1rM|rio:660F712mU",
1282 psubb_2 = "rmo:660FF8rM",
1283 psubd_2 = "rmo:660FFArM",
1284 psubq_2 = "rmo:660FFBrM",
1285 psubsb_2 = "rmo:660FE8rM",
1286 psubsw_2 = "rmo:660FE9rM",
1287 psubusb_2 = "rmo:660FD8rM",
1288 psubusw_2 = "rmo:660FD9rM",
1289 psubw_2 = "rmo:660FF9rM",
1290 punpckhbw_2 = "rmo:660F68rM",
1291 punpckhdq_2 = "rmo:660F6ArM",
1292 punpckhqdq_2 = "rmo:660F6DrM",
1293 punpckhwd_2 = "rmo:660F69rM",
1294 punpcklbw_2 = "rmo:660F60rM",
1295 punpckldq_2 = "rmo:660F62rM",
1296 punpcklqdq_2 = "rmo:660F6CrM",
1297 punpcklwd_2 = "rmo:660F61rM",
1298 pxor_2 = "rmo:660FEFrM",
1299 rcpps_2 = "rmo:0F53rM", 1328 rcpps_2 = "rmo:0F53rM",
1300 rcpss_2 = "rro:F30F53rM|rx/od:", 1329 rcpss_2 = "rro:F30F53rM|rx/od:",
1301 rsqrtps_2 = "rmo:0F52rM", 1330 rsqrtps_2 = "rmo:0F52rM",
@@ -1413,6 +1442,327 @@ local map_op = {
1413 movntsd_2 = "xr/qo:nF20F2BRm", 1442 movntsd_2 = "xr/qo:nF20F2BRm",
1414 movntss_2 = "xr/do:F30F2BRm", 1443 movntss_2 = "xr/do:F30F2BRm",
1415 -- popcnt is also in SSE4.2 1444 -- popcnt is also in SSE4.2
1445
1446 -- AES-NI
1447 aesdec_2 = "rmo:660F38DErM",
1448 aesdeclast_2 = "rmo:660F38DFrM",
1449 aesenc_2 = "rmo:660F38DCrM",
1450 aesenclast_2 = "rmo:660F38DDrM",
1451 aesimc_2 = "rmo:660F38DBrM",
1452 aeskeygenassist_3 = "rmio:660F3ADFrMU",
1453 pclmulqdq_3 = "rmio:660F3A44rMU",
1454
1455 -- AVX FP ops
1456 vaddsubpd_3 = "rrmoy:660FVD0rM",
1457 vaddsubps_3 = "rrmoy:F20FVD0rM",
1458 vandpd_3 = "rrmoy:660FV54rM",
1459 vandps_3 = "rrmoy:0FV54rM",
1460 vandnpd_3 = "rrmoy:660FV55rM",
1461 vandnps_3 = "rrmoy:0FV55rM",
1462 vblendpd_4 = "rrmioy:660F3AV0DrMU",
1463 vblendps_4 = "rrmioy:660F3AV0CrMU",
1464 vblendvpd_4 = "rrmroy:660F3AV4BrMs",
1465 vblendvps_4 = "rrmroy:660F3AV4ArMs",
1466 vbroadcastf128_2 = "rx/yo:660F38u1ArM",
1467 vcmppd_4 = "rrmioy:660FVC2rMU",
1468 vcmpps_4 = "rrmioy:0FVC2rMU",
1469 vcmpsd_4 = "rrrio:F20FVC2rMU|rrxi/ooq:",
1470 vcmpss_4 = "rrrio:F30FVC2rMU|rrxi/ood:",
1471 vcomisd_2 = "rro:660Fu2FrM|rx/oq:",
1472 vcomiss_2 = "rro:0Fu2FrM|rx/od:",
1473 vcvtdq2pd_2 = "rro:F30FuE6rM|rx/oq:|rm/yo:",
1474 vcvtdq2ps_2 = "rmoy:0Fu5BrM",
1475 vcvtpd2dq_2 = "rmoy:F20FuE6rM",
1476 vcvtpd2ps_2 = "rmoy:660Fu5ArM",
1477 vcvtps2dq_2 = "rmoy:660Fu5BrM",
1478 vcvtps2pd_2 = "rro:0Fu5ArM|rx/oq:|rm/yo:",
1479 vcvtsd2si_2 = "rr/do:F20Fu2DrM|rx/dq:|rr/qo:|rxq:",
1480 vcvtsd2ss_3 = "rrro:F20FV5ArM|rrx/ooq:",
1481 vcvtsi2sd_3 = "rrm/ood:F20FV2ArM|rrm/ooq:F20FVX2ArM",
1482 vcvtsi2ss_3 = "rrm/ood:F30FV2ArM|rrm/ooq:F30FVX2ArM",
1483 vcvtss2sd_3 = "rrro:F30FV5ArM|rrx/ood:",
1484 vcvtss2si_2 = "rr/do:F30Fu2DrM|rxd:|rr/qo:|rx/qd:",
1485 vcvttpd2dq_2 = "rmo:660FuE6rM|rm/oy:660FuLE6rM",
1486 vcvttps2dq_2 = "rmoy:F30Fu5BrM",
1487 vcvttsd2si_2 = "rr/do:F20Fu2CrM|rx/dq:|rr/qo:|rxq:",
1488 vcvttss2si_2 = "rr/do:F30Fu2CrM|rxd:|rr/qo:|rx/qd:",
1489 vdppd_4 = "rrmio:660F3AV41rMU",
1490 vdpps_4 = "rrmioy:660F3AV40rMU",
1491 vextractf128_3 = "mri/oy:660F3AuL19RmU",
1492 vextractps_3 = "mri/do:660F3Au17RmU",
1493 vhaddpd_3 = "rrmoy:660FV7CrM",
1494 vhaddps_3 = "rrmoy:F20FV7CrM",
1495 vhsubpd_3 = "rrmoy:660FV7DrM",
1496 vhsubps_3 = "rrmoy:F20FV7DrM",
1497 vinsertf128_4 = "rrmi/yyo:660F3AV18rMU",
1498 vinsertps_4 = "rrrio:660F3AV21rMU|rrxi/ood:",
1499 vldmxcsr_1 = "xd:0FuAE2m",
1500 vmaskmovps_3 = "rrxoy:660F38V2CrM|xrroy:660F38V2ERm",
1501 vmaskmovpd_3 = "rrxoy:660F38V2DrM|xrroy:660F38V2FRm",
1502 vmovapd_2 = "rmoy:660Fu28rM|mroy:660Fu29Rm",
1503 vmovaps_2 = "rmoy:0Fu28rM|mroy:0Fu29Rm",
1504 vmovd_2 = "rm/od:660Fu6ErM|rm/oq:660FuX6ErM|mr/do:660Fu7ERm|mr/qo:",
1505 vmovq_2 = "rro:F30Fu7ErM|rx/oq:|xr/qo:660FuD6Rm",
1506 vmovddup_2 = "rmy:F20Fu12rM|rro:|rx/oq:",
1507 vmovhlps_3 = "rrro:0FV12rM",
1508 vmovhpd_2 = "xr/qo:660Fu17Rm",
1509 vmovhpd_3 = "rrx/ooq:660FV16rM",
1510 vmovhps_2 = "xr/qo:0Fu17Rm",
1511 vmovhps_3 = "rrx/ooq:0FV16rM",
1512 vmovlhps_3 = "rrro:0FV16rM",
1513 vmovlpd_2 = "xr/qo:660Fu13Rm",
1514 vmovlpd_3 = "rrx/ooq:660FV12rM",
1515 vmovlps_2 = "xr/qo:0Fu13Rm",
1516 vmovlps_3 = "rrx/ooq:0FV12rM",
1517 vmovmskpd_2 = "rr/do:660Fu50rM|rr/dy:660FuL50rM",
1518 vmovmskps_2 = "rr/do:0Fu50rM|rr/dy:0FuL50rM",
1519 vmovntpd_2 = "xroy:660Fu2BRm",
1520 vmovntps_2 = "xroy:0Fu2BRm",
1521 vmovsd_2 = "rx/oq:F20Fu10rM|xr/qo:F20Fu11Rm",
1522 vmovsd_3 = "rrro:F20FV10rM",
1523 vmovshdup_2 = "rmoy:F30Fu16rM",
1524 vmovsldup_2 = "rmoy:F30Fu12rM",
1525 vmovss_2 = "rx/od:F30Fu10rM|xr/do:F30Fu11Rm",
1526 vmovss_3 = "rrro:F30FV10rM",
1527 vmovupd_2 = "rmoy:660Fu10rM|mroy:660Fu11Rm",
1528 vmovups_2 = "rmoy:0Fu10rM|mroy:0Fu11Rm",
1529 vorpd_3 = "rrmoy:660FV56rM",
1530 vorps_3 = "rrmoy:0FV56rM",
1531 vpermilpd_3 = "rrmoy:660F38V0DrM|rmioy:660F3Au05rMU",
1532 vpermilps_3 = "rrmoy:660F38V0CrM|rmioy:660F3Au04rMU",
1533 vperm2f128_4 = "rrmiy:660F3AV06rMU",
1534 vptestpd_2 = "rmoy:660F38u0FrM",
1535 vptestps_2 = "rmoy:660F38u0ErM",
1536 vrcpps_2 = "rmoy:0Fu53rM",
1537 vrcpss_3 = "rrro:F30FV53rM|rrx/ood:",
1538 vrsqrtps_2 = "rmoy:0Fu52rM",
1539 vrsqrtss_3 = "rrro:F30FV52rM|rrx/ood:",
1540 vroundpd_3 = "rmioy:660F3Au09rMU",
1541 vroundps_3 = "rmioy:660F3Au08rMU",
1542 vroundsd_4 = "rrrio:660F3AV0BrMU|rrxi/ooq:",
1543 vroundss_4 = "rrrio:660F3AV0ArMU|rrxi/ood:",
1544 vshufpd_4 = "rrmioy:660FVC6rMU",
1545 vshufps_4 = "rrmioy:0FVC6rMU",
1546 vsqrtps_2 = "rmoy:0Fu51rM",
1547 vsqrtss_2 = "rro:F30Fu51rM|rx/od:",
1548 vsqrtpd_2 = "rmoy:660Fu51rM",
1549 vsqrtsd_2 = "rro:F20Fu51rM|rx/oq:",
1550 vstmxcsr_1 = "xd:0FuAE3m",
1551 vucomisd_2 = "rro:660Fu2ErM|rx/oq:",
1552 vucomiss_2 = "rro:0Fu2ErM|rx/od:",
1553 vunpckhpd_3 = "rrmoy:660FV15rM",
1554 vunpckhps_3 = "rrmoy:0FV15rM",
1555 vunpcklpd_3 = "rrmoy:660FV14rM",
1556 vunpcklps_3 = "rrmoy:0FV14rM",
1557 vxorpd_3 = "rrmoy:660FV57rM",
1558 vxorps_3 = "rrmoy:0FV57rM",
1559 vzeroall_0 = "0FuL77",
1560 vzeroupper_0 = "0Fu77",
1561
1562 -- AVX2 FP ops
1563 vbroadcastss_2 = "rx/od:660F38u18rM|rx/yd:|rro:|rr/yo:",
1564 vbroadcastsd_2 = "rx/yq:660F38u19rM|rr/yo:",
1565 -- *vgather* (!vsib)
1566 vpermpd_3 = "rmiy:660F3AuX01rMU",
1567 vpermps_3 = "rrmy:660F38V16rM",
1568
1569 -- AVX, AVX2 integer ops
1570 -- In general, xmm requires AVX, ymm requires AVX2.
1571 vaesdec_3 = "rrmo:660F38VDErM",
1572 vaesdeclast_3 = "rrmo:660F38VDFrM",
1573 vaesenc_3 = "rrmo:660F38VDCrM",
1574 vaesenclast_3 = "rrmo:660F38VDDrM",
1575 vaesimc_2 = "rmo:660F38uDBrM",
1576 vaeskeygenassist_3 = "rmio:660F3AuDFrMU",
1577 vlddqu_2 = "rxoy:F20FuF0rM",
1578 vmaskmovdqu_2 = "rro:660FuF7rM",
1579 vmovdqa_2 = "rmoy:660Fu6FrM|mroy:660Fu7FRm",
1580 vmovdqu_2 = "rmoy:F30Fu6FrM|mroy:F30Fu7FRm",
1581 vmovntdq_2 = "xroy:660FuE7Rm",
1582 vmovntdqa_2 = "rxoy:660F38u2ArM",
1583 vmpsadbw_4 = "rrmioy:660F3AV42rMU",
1584 vpabsb_2 = "rmoy:660F38u1CrM",
1585 vpabsd_2 = "rmoy:660F38u1ErM",
1586 vpabsw_2 = "rmoy:660F38u1DrM",
1587 vpackusdw_3 = "rrmoy:660F38V2BrM",
1588 vpalignr_4 = "rrmioy:660F3AV0FrMU",
1589 vpblendvb_4 = "rrmroy:660F3AV4CrMs",
1590 vpblendw_4 = "rrmioy:660F3AV0ErMU",
1591 vpclmulqdq_4 = "rrmio:660F3AV44rMU",
1592 vpcmpeqq_3 = "rrmoy:660F38V29rM",
1593 vpcmpestri_3 = "rmio:660F3Au61rMU",
1594 vpcmpestrm_3 = "rmio:660F3Au60rMU",
1595 vpcmpgtq_3 = "rrmoy:660F38V37rM",
1596 vpcmpistri_3 = "rmio:660F3Au63rMU",
1597 vpcmpistrm_3 = "rmio:660F3Au62rMU",
1598 vpextrb_3 = "rri/do:660F3Au14nRmU|rri/qo:|xri/bo:",
1599 vpextrw_3 = "rri/do:660FuC5rMU|xri/wo:660F3Au15nRmU",
1600 vpextrd_3 = "mri/do:660F3Au16RmU",
1601 vpextrq_3 = "mri/qo:660F3Au16RmU",
1602 vphaddw_3 = "rrmoy:660F38V01rM",
1603 vphaddd_3 = "rrmoy:660F38V02rM",
1604 vphaddsw_3 = "rrmoy:660F38V03rM",
1605 vphminposuw_2 = "rmo:660F38u41rM",
1606 vphsubw_3 = "rrmoy:660F38V05rM",
1607 vphsubd_3 = "rrmoy:660F38V06rM",
1608 vphsubsw_3 = "rrmoy:660F38V07rM",
1609 vpinsrb_4 = "rrri/ood:660F3AV20rMU|rrxi/oob:",
1610 vpinsrw_4 = "rrri/ood:660FVC4rMU|rrxi/oow:",
1611 vpinsrd_4 = "rrmi/ood:660F3AV22rMU",
1612 vpinsrq_4 = "rrmi/ooq:660F3AVX22rMU",
1613 vpmaddubsw_3 = "rrmoy:660F38V04rM",
1614 vpmaxsb_3 = "rrmoy:660F38V3CrM",
1615 vpmaxsd_3 = "rrmoy:660F38V3DrM",
1616 vpmaxuw_3 = "rrmoy:660F38V3ErM",
1617 vpmaxud_3 = "rrmoy:660F38V3FrM",
1618 vpminsb_3 = "rrmoy:660F38V38rM",
1619 vpminsd_3 = "rrmoy:660F38V39rM",
1620 vpminuw_3 = "rrmoy:660F38V3ArM",
1621 vpminud_3 = "rrmoy:660F38V3BrM",
1622 vpmovmskb_2 = "rr/do:660FuD7rM|rr/dy:660FuLD7rM",
1623 vpmovsxbw_2 = "rroy:660F38u20rM|rx/oq:|rx/yo:",
1624 vpmovsxbd_2 = "rroy:660F38u21rM|rx/od:|rx/yq:",
1625 vpmovsxbq_2 = "rroy:660F38u22rM|rx/ow:|rx/yd:",
1626 vpmovsxwd_2 = "rroy:660F38u23rM|rx/oq:|rx/yo:",
1627 vpmovsxwq_2 = "rroy:660F38u24rM|rx/od:|rx/yq:",
1628 vpmovsxdq_2 = "rroy:660F38u25rM|rx/oq:|rx/yo:",
1629 vpmovzxbw_2 = "rroy:660F38u30rM|rx/oq:|rx/yo:",
1630 vpmovzxbd_2 = "rroy:660F38u31rM|rx/od:|rx/yq:",
1631 vpmovzxbq_2 = "rroy:660F38u32rM|rx/ow:|rx/yd:",
1632 vpmovzxwd_2 = "rroy:660F38u33rM|rx/oq:|rx/yo:",
1633 vpmovzxwq_2 = "rroy:660F38u34rM|rx/od:|rx/yq:",
1634 vpmovzxdq_2 = "rroy:660F38u35rM|rx/oq:|rx/yo:",
1635 vpmuldq_3 = "rrmoy:660F38V28rM",
1636 vpmulhrsw_3 = "rrmoy:660F38V0BrM",
1637 vpmulld_3 = "rrmoy:660F38V40rM",
1638 vpshufb_3 = "rrmoy:660F38V00rM",
1639 vpshufd_3 = "rmioy:660Fu70rMU",
1640 vpshufhw_3 = "rmioy:F30Fu70rMU",
1641 vpshuflw_3 = "rmioy:F20Fu70rMU",
1642 vpsignb_3 = "rrmoy:660F38V08rM",
1643 vpsignw_3 = "rrmoy:660F38V09rM",
1644 vpsignd_3 = "rrmoy:660F38V0ArM",
1645 vpslldq_3 = "rrioy:660Fv737mU",
1646 vpsllw_3 = "rrmoy:660FVF1rM|rrioy:660Fv716mU",
1647 vpslld_3 = "rrmoy:660FVF2rM|rrioy:660Fv726mU",
1648 vpsllq_3 = "rrmoy:660FVF3rM|rrioy:660Fv736mU",
1649 vpsraw_3 = "rrmoy:660FVE1rM|rrioy:660Fv714mU",
1650 vpsrad_3 = "rrmoy:660FVE2rM|rrioy:660Fv724mU",
1651 vpsrldq_3 = "rrioy:660Fv733mU",
1652 vpsrlw_3 = "rrmoy:660FVD1rM|rrioy:660Fv712mU",
1653 vpsrld_3 = "rrmoy:660FVD2rM|rrioy:660Fv722mU",
1654 vpsrlq_3 = "rrmoy:660FVD3rM|rrioy:660Fv732mU",
1655 vptest_2 = "rmoy:660F38u17rM",
1656
1657 -- AVX2 integer ops
1658 vbroadcasti128_2 = "rx/yo:660F38u5ArM",
1659 vinserti128_4 = "rrmi/yyo:660F3AV38rMU",
1660 vextracti128_3 = "mri/oy:660F3AuL39RmU",
1661 vpblendd_4 = "rrmioy:660F3AV02rMU",
1662 vpbroadcastb_2 = "rro:660F38u78rM|rx/ob:|rr/yo:|rx/yb:",
1663 vpbroadcastw_2 = "rro:660F38u79rM|rx/ow:|rr/yo:|rx/yw:",
1664 vpbroadcastd_2 = "rro:660F38u58rM|rx/od:|rr/yo:|rx/yd:",
1665 vpbroadcastq_2 = "rro:660F38u59rM|rx/oq:|rr/yo:|rx/yq:",
1666 vpermd_3 = "rrmy:660F38V36rM",
1667 vpermq_3 = "rmiy:660F3AuX00rMU",
1668 -- *vpgather* (!vsib)
1669 vperm2i128_4 = "rrmiy:660F3AV46rMU",
1670 vpmaskmovd_3 = "rrxoy:660F38V8CrM|xrroy:660F38V8ERm",
1671 vpmaskmovq_3 = "rrxoy:660F38VX8CrM|xrroy:660F38VX8ERm",
1672 vpsllvd_3 = "rrmoy:660F38V47rM",
1673 vpsllvq_3 = "rrmoy:660F38VX47rM",
1674 vpsravd_3 = "rrmoy:660F38V46rM",
1675 vpsrlvd_3 = "rrmoy:660F38V45rM",
1676 vpsrlvq_3 = "rrmoy:660F38VX45rM",
1677
1678 -- Intel ADX
1679 adcx_2 = "rmqd:660F38F6rM",
1680 adox_2 = "rmqd:F30F38F6rM",
1681
1682 -- BMI1
1683 andn_3 = "rrmqd:0F38VF2rM",
1684 bextr_3 = "rmrqd:0F38wF7rM",
1685 blsi_2 = "rmqd:0F38vF33m",
1686 blsmsk_2 = "rmqd:0F38vF32m",
1687 blsr_2 = "rmqd:0F38vF31m",
1688 tzcnt_2 = "rmqdw:F30FBCrM",
1689
1690 -- BMI2
1691 bzhi_3 = "rmrqd:0F38wF5rM",
1692 mulx_3 = "rrmqd:F20F38VF6rM",
1693 pdep_3 = "rrmqd:F20F38VF5rM",
1694 pext_3 = "rrmqd:F30F38VF5rM",
1695 rorx_3 = "rmSqd:F20F3AuF0rMS",
1696 sarx_3 = "rmrqd:F30F38wF7rM",
1697 shrx_3 = "rmrqd:F20F38wF7rM",
1698 shlx_3 = "rmrqd:660F38wF7rM",
1699
1700 -- FMA3
1701 vfmaddsub132pd_3 = "rrmoy:660F38VX96rM",
1702 vfmaddsub132ps_3 = "rrmoy:660F38V96rM",
1703 vfmaddsub213pd_3 = "rrmoy:660F38VXA6rM",
1704 vfmaddsub213ps_3 = "rrmoy:660F38VA6rM",
1705 vfmaddsub231pd_3 = "rrmoy:660F38VXB6rM",
1706 vfmaddsub231ps_3 = "rrmoy:660F38VB6rM",
1707
1708 vfmsubadd132pd_3 = "rrmoy:660F38VX97rM",
1709 vfmsubadd132ps_3 = "rrmoy:660F38V97rM",
1710 vfmsubadd213pd_3 = "rrmoy:660F38VXA7rM",
1711 vfmsubadd213ps_3 = "rrmoy:660F38VA7rM",
1712 vfmsubadd231pd_3 = "rrmoy:660F38VXB7rM",
1713 vfmsubadd231ps_3 = "rrmoy:660F38VB7rM",
1714
1715 vfmadd132pd_3 = "rrmoy:660F38VX98rM",
1716 vfmadd132ps_3 = "rrmoy:660F38V98rM",
1717 vfmadd132sd_3 = "rrro:660F38VX99rM|rrx/ooq:",
1718 vfmadd132ss_3 = "rrro:660F38V99rM|rrx/ood:",
1719 vfmadd213pd_3 = "rrmoy:660F38VXA8rM",
1720 vfmadd213ps_3 = "rrmoy:660F38VA8rM",
1721 vfmadd213sd_3 = "rrro:660F38VXA9rM|rrx/ooq:",
1722 vfmadd213ss_3 = "rrro:660F38VA9rM|rrx/ood:",
1723 vfmadd231pd_3 = "rrmoy:660F38VXB8rM",
1724 vfmadd231ps_3 = "rrmoy:660F38VB8rM",
1725 vfmadd231sd_3 = "rrro:660F38VXB9rM|rrx/ooq:",
1726 vfmadd231ss_3 = "rrro:660F38VB9rM|rrx/ood:",
1727
1728 vfmsub132pd_3 = "rrmoy:660F38VX9ArM",
1729 vfmsub132ps_3 = "rrmoy:660F38V9ArM",
1730 vfmsub132sd_3 = "rrro:660F38VX9BrM|rrx/ooq:",
1731 vfmsub132ss_3 = "rrro:660F38V9BrM|rrx/ood:",
1732 vfmsub213pd_3 = "rrmoy:660F38VXAArM",
1733 vfmsub213ps_3 = "rrmoy:660F38VAArM",
1734 vfmsub213sd_3 = "rrro:660F38VXABrM|rrx/ooq:",
1735 vfmsub213ss_3 = "rrro:660F38VABrM|rrx/ood:",
1736 vfmsub231pd_3 = "rrmoy:660F38VXBArM",
1737 vfmsub231ps_3 = "rrmoy:660F38VBArM",
1738 vfmsub231sd_3 = "rrro:660F38VXBBrM|rrx/ooq:",
1739 vfmsub231ss_3 = "rrro:660F38VBBrM|rrx/ood:",
1740
1741 vfnmadd132pd_3 = "rrmoy:660F38VX9CrM",
1742 vfnmadd132ps_3 = "rrmoy:660F38V9CrM",
1743 vfnmadd132sd_3 = "rrro:660F38VX9DrM|rrx/ooq:",
1744 vfnmadd132ss_3 = "rrro:660F38V9DrM|rrx/ood:",
1745 vfnmadd213pd_3 = "rrmoy:660F38VXACrM",
1746 vfnmadd213ps_3 = "rrmoy:660F38VACrM",
1747 vfnmadd213sd_3 = "rrro:660F38VXADrM|rrx/ooq:",
1748 vfnmadd213ss_3 = "rrro:660F38VADrM|rrx/ood:",
1749 vfnmadd231pd_3 = "rrmoy:660F38VXBCrM",
1750 vfnmadd231ps_3 = "rrmoy:660F38VBCrM",
1751 vfnmadd231sd_3 = "rrro:660F38VXBDrM|rrx/ooq:",
1752 vfnmadd231ss_3 = "rrro:660F38VBDrM|rrx/ood:",
1753
1754 vfnmsub132pd_3 = "rrmoy:660F38VX9ErM",
1755 vfnmsub132ps_3 = "rrmoy:660F38V9ErM",
1756 vfnmsub132sd_3 = "rrro:660F38VX9FrM|rrx/ooq:",
1757 vfnmsub132ss_3 = "rrro:660F38V9FrM|rrx/ood:",
1758 vfnmsub213pd_3 = "rrmoy:660F38VXAErM",
1759 vfnmsub213ps_3 = "rrmoy:660F38VAErM",
1760 vfnmsub213sd_3 = "rrro:660F38VXAFrM|rrx/ooq:",
1761 vfnmsub213ss_3 = "rrro:660F38VAFrM|rrx/ood:",
1762 vfnmsub231pd_3 = "rrmoy:660F38VXBErM",
1763 vfnmsub231ps_3 = "rrmoy:660F38VBErM",
1764 vfnmsub231sd_3 = "rrro:660F38VXBFrM|rrx/ooq:",
1765 vfnmsub231ss_3 = "rrro:660F38VBFrM|rrx/ood:",
1416} 1766}
1417 1767
1418------------------------------------------------------------------------------ 1768------------------------------------------------------------------------------
@@ -1463,28 +1813,58 @@ for cc,n in pairs{ b=0, e=1, be=2, u=3, nb=4, ne=5, nbe=6, nu=7 } do
1463 map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+ 1813 map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+
1464end 1814end
1465 1815
1466-- SSE FP arithmetic ops. 1816-- SSE / AVX FP arithmetic ops.
1467for name,n in pairs{ sqrt = 1, add = 8, mul = 9, 1817for name,n in pairs{ sqrt = 1, add = 8, mul = 9,
1468 sub = 12, min = 13, div = 14, max = 15 } do 1818 sub = 12, min = 13, div = 14, max = 15 } do
1469 map_op[name.."ps_2"] = format("rmo:0F5%XrM", n) 1819 map_op[name.."ps_2"] = format("rmo:0F5%XrM", n)
1470 map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n) 1820 map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n)
1471 map_op[name.."pd_2"] = format("rmo:660F5%XrM", n) 1821 map_op[name.."pd_2"] = format("rmo:660F5%XrM", n)
1472 map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n) 1822 map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n)
1823 if n ~= 1 then
1824 map_op["v"..name.."ps_3"] = format("rrmoy:0FV5%XrM", n)
1825 map_op["v"..name.."ss_3"] = format("rrro:F30FV5%XrM|rrx/ood:", n)
1826 map_op["v"..name.."pd_3"] = format("rrmoy:660FV5%XrM", n)
1827 map_op["v"..name.."sd_3"] = format("rrro:F20FV5%XrM|rrx/ooq:", n)
1828 end
1829end
1830
1831-- SSE2 / AVX / AVX2 integer arithmetic ops (66 0F leaf).
1832for name,n in pairs{
1833 paddb = 0xFC, paddw = 0xFD, paddd = 0xFE, paddq = 0xD4,
1834 paddsb = 0xEC, paddsw = 0xED, packssdw = 0x6B,
1835 packsswb = 0x63, packuswb = 0x67, paddusb = 0xDC,
1836 paddusw = 0xDD, pand = 0xDB, pandn = 0xDF, pavgb = 0xE0,
1837 pavgw = 0xE3, pcmpeqb = 0x74, pcmpeqd = 0x76,
1838 pcmpeqw = 0x75, pcmpgtb = 0x64, pcmpgtd = 0x66,
1839 pcmpgtw = 0x65, pmaddwd = 0xF5, pmaxsw = 0xEE,
1840 pmaxub = 0xDE, pminsw = 0xEA, pminub = 0xDA,
1841 pmulhuw = 0xE4, pmulhw = 0xE5, pmullw = 0xD5,
1842 pmuludq = 0xF4, por = 0xEB, psadbw = 0xF6, psubb = 0xF8,
1843 psubw = 0xF9, psubd = 0xFA, psubq = 0xFB, psubsb = 0xE8,
1844 psubsw = 0xE9, psubusb = 0xD8, psubusw = 0xD9,
1845 punpckhbw = 0x68, punpckhwd = 0x69, punpckhdq = 0x6A,
1846 punpckhqdq = 0x6D, punpcklbw = 0x60, punpcklwd = 0x61,
1847 punpckldq = 0x62, punpcklqdq = 0x6C, pxor = 0xEF
1848} do
1849 map_op[name.."_2"] = format("rmo:660F%02XrM", n)
1850 map_op["v"..name.."_3"] = format("rrmoy:660FV%02XrM", n)
1473end 1851end
1474 1852
1475------------------------------------------------------------------------------ 1853------------------------------------------------------------------------------
1476 1854
1855local map_vexarg = { u = false, v = 1, V = 2, w = 3 }
1856
1477-- Process pattern string. 1857-- Process pattern string.
1478local function dopattern(pat, args, sz, op, needrex) 1858local function dopattern(pat, args, sz, op, needrex)
1479 local digit, addin 1859 local digit, addin, vex
1480 local opcode = 0 1860 local opcode = 0
1481 local szov = sz 1861 local szov = sz
1482 local narg = 1 1862 local narg = 1
1483 local rex = 0 1863 local rex = 0
1484 1864
1485 -- Limit number of section buffer positions used by a single dasm_put(). 1865 -- Limit number of section buffer positions used by a single dasm_put().
1486 -- A single opcode needs a maximum of 5 positions. 1866 -- A single opcode needs a maximum of 6 positions.
1487 if secpos+5 > maxsecpos then wflush() end 1867 if secpos+6 > maxsecpos then wflush() end
1488 1868
1489 -- Process each character. 1869 -- Process each character.
1490 for c in gmatch(pat.."|", ".") do 1870 for c in gmatch(pat.."|", ".") do
@@ -1498,6 +1878,8 @@ local function dopattern(pat, args, sz, op, needrex)
1498 szov = nil 1878 szov = nil
1499 elseif c == "X" then -- Force REX.W. 1879 elseif c == "X" then -- Force REX.W.
1500 rex = 8 1880 rex = 8
1881 elseif c == "L" then -- Force VEX.L.
1882 vex.l = true
1501 elseif c == "r" then -- Merge 1st operand regno. into opcode. 1883 elseif c == "r" then -- Merge 1st operand regno. into opcode.
1502 addin = args[1]; opcode = opcode + (addin.reg % 8) 1884 addin = args[1]; opcode = opcode + (addin.reg % 8)
1503 if narg < 2 then narg = 2 end 1885 if narg < 2 then narg = 2 end
@@ -1521,21 +1903,42 @@ local function dopattern(pat, args, sz, op, needrex)
1521 if t.xreg and t.xreg > 7 then rex = rex + 2 end 1903 if t.xreg and t.xreg > 7 then rex = rex + 2 end
1522 if s > 7 then rex = rex + 4 end 1904 if s > 7 then rex = rex + 4 end
1523 if needrex then rex = rex + 16 end 1905 if needrex then rex = rex + 16 end
1524 wputop(szov, opcode, rex); opcode = nil 1906 local psz, sk = wputop(szov, opcode, rex, vex, s < 0, t.vreg or t.vxreg)
1907 opcode = nil
1525 local imark = sub(pat, -1) -- Force a mark (ugly). 1908 local imark = sub(pat, -1) -- Force a mark (ugly).
1526 -- Put ModRM/SIB with regno/last digit as spare. 1909 -- Put ModRM/SIB with regno/last digit as spare.
1527 wputmrmsib(t, imark, s, addin and addin.vreg) 1910 wputmrmsib(t, imark, s, addin and addin.vreg, psz, sk)
1528 addin = nil 1911 addin = nil
1912 elseif map_vexarg[c] ~= nil then -- Encode using VEX prefix
1913 local b = band(opcode, 255); opcode = shr(opcode, 8)
1914 local m = 1
1915 if b == 0x38 then m = 2
1916 elseif b == 0x3a then m = 3 end
1917 if m ~= 1 then b = band(opcode, 255); opcode = shr(opcode, 8) end
1918 if b ~= 0x0f then
1919 werror("expected `0F', `0F38', or `0F3A' to precede `"..c..
1920 "' in pattern `"..pat.."' for `"..op.."'")
1921 end
1922 local v = map_vexarg[c]
1923 if v then v = remove(args, v) end
1924 b = band(opcode, 255)
1925 local p = 0
1926 if b == 0x66 then p = 1
1927 elseif b == 0xf3 then p = 2
1928 elseif b == 0xf2 then p = 3 end
1929 if p ~= 0 then opcode = shr(opcode, 8) end
1930 if opcode ~= 0 then wputop(nil, opcode, 0); opcode = 0 end
1931 vex = { m = m, p = p, v = v }
1529 else 1932 else
1530 if opcode then -- Flush opcode. 1933 if opcode then -- Flush opcode.
1531 if szov == "q" and rex == 0 then rex = rex + 8 end 1934 if szov == "q" and rex == 0 then rex = rex + 8 end
1532 if needrex then rex = rex + 16 end 1935 if needrex then rex = rex + 16 end
1533 if addin and addin.reg == -1 then 1936 if addin and addin.reg == -1 then
1534 wputop(szov, opcode - 7, rex) 1937 local psz, sk = wputop(szov, opcode - 7, rex, vex, true)
1535 waction("VREG", addin.vreg); wputxb(0) 1938 wvreg("opcode", addin.vreg, psz, sk)
1536 else 1939 else
1537 if addin and addin.reg > 7 then rex = rex + 1 end 1940 if addin and addin.reg > 7 then rex = rex + 1 end
1538 wputop(szov, opcode, rex) 1941 wputop(szov, opcode, rex, vex)
1539 end 1942 end
1540 opcode = nil 1943 opcode = nil
1541 end 1944 end
@@ -1572,6 +1975,14 @@ local function dopattern(pat, args, sz, op, needrex)
1572 else 1975 else
1573 wputlabel("REL_", imm, 2) 1976 wputlabel("REL_", imm, 2)
1574 end 1977 end
1978 elseif c == "s" then
1979 local reg = a.reg
1980 if reg < 0 then
1981 wputb(0)
1982 wvreg("imm.hi", a.vreg)
1983 else
1984 wputb(shl(reg, 4))
1985 end
1575 else 1986 else
1576 werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'") 1987 werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'")
1577 end 1988 end
@@ -1648,11 +2059,14 @@ map_op[".template__"] = function(params, template, nparams)
1648 if pat == "" then pat = lastpat else lastpat = pat end 2059 if pat == "" then pat = lastpat else lastpat = pat end
1649 if matchtm(tm, args) then 2060 if matchtm(tm, args) then
1650 local prefix = sub(szm, 1, 1) 2061 local prefix = sub(szm, 1, 1)
1651 if prefix == "/" then -- Match both operand sizes. 2062 if prefix == "/" then -- Exactly match leading operand sizes.
1652 if args[1].opsize == sub(szm, 2, 2) and 2063 for i = #szm,1,-1 do
1653 args[2].opsize == sub(szm, 3, 3) then 2064 if i == 1 then
1654 dopattern(pat, args, sz, params.op, needrex) -- Process pattern. 2065 dopattern(pat, args, sz, params.op, needrex) -- Process pattern.
1655 return 2066 return
2067 elseif args[i-1].opsize ~= sub(szm, i, i) then
2068 break
2069 end
1656 end 2070 end
1657 else -- Match common operand size. 2071 else -- Match common operand size.
1658 local szp = sz 2072 local szp = sz
@@ -1717,8 +2131,8 @@ if x64 then
1717 rex = a.reg > 7 and 9 or 8 2131 rex = a.reg > 7 and 9 or 8
1718 end 2132 end
1719 end 2133 end
1720 wputop(sz, opcode, rex) 2134 local psz, sk = wputop(sz, opcode, rex, nil, vreg)
1721 if vreg then waction("VREG", vreg); wputxb(0) end 2135 wvreg("opcode", vreg, psz, sk)
1722 waction("IMM_D", format("(unsigned int)(%s)", op64)) 2136 waction("IMM_D", format("(unsigned int)(%s)", op64))
1723 waction("IMM_D", format("(unsigned int)((%s)>>32)", op64)) 2137 waction("IMM_D", format("(unsigned int)((%s)>>32)", op64))
1724 end 2138 end
diff --git a/dynasm/dynasm.lua b/dynasm/dynasm.lua
index 5fda425b..8e85af24 100644
--- a/dynasm/dynasm.lua
+++ b/dynasm/dynasm.lua
@@ -10,9 +10,9 @@
10local _info = { 10local _info = {
11 name = "DynASM", 11 name = "DynASM",
12 description = "A dynamic assembler for code generation engines", 12 description = "A dynamic assembler for code generation engines",
13 version = "1.3.0", 13 version = "1.4.0",
14 vernum = 10300, 14 vernum = 10400,
15 release = "2011-05-05", 15 release = "2015-10-18",
16 author = "Mike Pall", 16 author = "Mike Pall",
17 url = "http://luajit.org/dynasm.html", 17 url = "http://luajit.org/dynasm.html",
18 license = "MIT", 18 license = "MIT",
@@ -630,6 +630,7 @@ end
630-- Load architecture-specific module. 630-- Load architecture-specific module.
631local function loadarch(arch) 631local function loadarch(arch)
632 if not match(arch, "^[%w_]+$") then return "bad arch name" end 632 if not match(arch, "^[%w_]+$") then return "bad arch name" end
633 _G._map_def = map_def
633 local ok, m_arch = pcall(require, "dasm_"..arch) 634 local ok, m_arch = pcall(require, "dasm_"..arch)
634 if not ok then return "cannot load module: "..m_arch end 635 if not ok then return "cannot load module: "..m_arch end
635 g_arch = m_arch 636 g_arch = m_arch
diff --git a/etc/luajit.pc b/etc/luajit.pc
index 36840ab8..a78f1746 100644
--- a/etc/luajit.pc
+++ b/etc/luajit.pc
@@ -1,8 +1,8 @@
1# Package information for LuaJIT to be used by pkg-config. 1# Package information for LuaJIT to be used by pkg-config.
2majver=2 2majver=2
3minver=0 3minver=1
4relver=5 4relver=0
5version=${majver}.${minver}.${relver} 5version=${majver}.${minver}.${relver}-beta3
6abiver=5.1 6abiver=5.1
7 7
8prefix=/usr/local 8prefix=/usr/local
diff --git a/src/.gitignore b/src/.gitignore
index fc94e82c..1a30573c 100644
--- a/src/.gitignore
+++ b/src/.gitignore
@@ -4,4 +4,4 @@ lj_ffdef.h
4lj_libdef.h 4lj_libdef.h
5lj_recdef.h 5lj_recdef.h
6lj_folddef.h 6lj_folddef.h
7lj_vm.s 7lj_vm.[sS]
diff --git a/src/Makefile b/src/Makefile
index a588dc3d..77e0d537 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -11,8 +11,8 @@
11############################################################################## 11##############################################################################
12 12
13MAJVER= 2 13MAJVER= 2
14MINVER= 0 14MINVER= 1
15RELVER= 5 15RELVER= 0
16ABIVER= 5.1 16ABIVER= 5.1
17NODOTABIVER= 51 17NODOTABIVER= 51
18 18
@@ -44,17 +44,14 @@ CCOPT= -O2 -fomit-frame-pointer
44# 44#
45# Target-specific compiler options: 45# Target-specific compiler options:
46# 46#
47# x86 only: it's recommended to compile at least for i686. Better yet,
48# compile for an architecture that has SSE2, too (-msse -msse2).
49#
50# x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute 47# x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute
51# the binaries to a different machine you could also use: -march=native 48# the binaries to a different machine you could also use: -march=native
52# 49#
53CCOPT_x86= -march=i686 50CCOPT_x86= -march=i686 -msse -msse2 -mfpmath=sse
54CCOPT_x64= 51CCOPT_x64=
55CCOPT_arm= 52CCOPT_arm=
53CCOPT_arm64=
56CCOPT_ppc= 54CCOPT_ppc=
57CCOPT_ppcspe=
58CCOPT_mips= 55CCOPT_mips=
59# 56#
60CCDEBUG= 57CCDEBUG=
@@ -113,6 +110,9 @@ XCFLAGS=
113#XCFLAGS+= -DLUAJIT_NUMMODE=1 110#XCFLAGS+= -DLUAJIT_NUMMODE=1
114#XCFLAGS+= -DLUAJIT_NUMMODE=2 111#XCFLAGS+= -DLUAJIT_NUMMODE=2
115# 112#
113# Disable LJ_GC64 mode for x64.
114#XCFLAGS+= -DLUAJIT_DISABLE_GC64
115#
116############################################################################## 116##############################################################################
117 117
118############################################################################## 118##############################################################################
@@ -124,8 +124,8 @@ XCFLAGS=
124# 124#
125# Use the system provided memory allocator (realloc) instead of the 125# Use the system provided memory allocator (realloc) instead of the
126# bundled memory allocator. This is slower, but sometimes helpful for 126# bundled memory allocator. This is slower, but sometimes helpful for
127# debugging. This option cannot be enabled on x64, since realloc usually 127# debugging. This option cannot be enabled on x64 without GC64, since
128# doesn't return addresses in the right address range. 128# realloc usually doesn't return addresses in the right address range.
129# OTOH this option is mandatory for Valgrind's memcheck tool on x64 and 129# OTOH this option is mandatory for Valgrind's memcheck tool on x64 and
130# the only way to get useful results from it for all other architectures. 130# the only way to get useful results from it for all other architectures.
131#XCFLAGS+= -DLUAJIT_USE_SYSMALLOC 131#XCFLAGS+= -DLUAJIT_USE_SYSMALLOC
@@ -189,7 +189,8 @@ endif
189# make HOST_CC="gcc -m32" CROSS=i586-mingw32msvc- TARGET_SYS=Windows 189# make HOST_CC="gcc -m32" CROSS=i586-mingw32msvc- TARGET_SYS=Windows
190# make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu- 190# make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu-
191 191
192CCOPTIONS= $(CCDEBUG) $(CCOPT) $(CCWARN) $(XCFLAGS) $(CFLAGS) 192ASOPTIONS= $(CCOPT) $(CCWARN) $(XCFLAGS) $(CFLAGS)
193CCOPTIONS= $(CCDEBUG) $(ASOPTIONS)
193LDOPTIONS= $(CCDEBUG) $(LDFLAGS) 194LDOPTIONS= $(CCDEBUG) $(LDFLAGS)
194 195
195HOST_CC= $(CC) 196HOST_CC= $(CC)
@@ -229,6 +230,7 @@ TARGET_XLDFLAGS=
229TARGET_XLIBS= -lm 230TARGET_XLIBS= -lm
230TARGET_TCFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS) 231TARGET_TCFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS)
231TARGET_ACFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS) 232TARGET_ACFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS)
233TARGET_ASFLAGS= $(ASOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS)
232TARGET_ALDFLAGS= $(LDOPTIONS) $(TARGET_XLDFLAGS) $(TARGET_FLAGS) $(TARGET_LDFLAGS) 234TARGET_ALDFLAGS= $(LDOPTIONS) $(TARGET_XLDFLAGS) $(TARGET_FLAGS) $(TARGET_LDFLAGS)
233TARGET_ASHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) $(TARGET_FLAGS) $(TARGET_SHLDFLAGS) 235TARGET_ASHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) $(TARGET_FLAGS) $(TARGET_SHLDFLAGS)
234TARGET_ALIBS= $(TARGET_XLIBS) $(LIBS) $(TARGET_LIBS) 236TARGET_ALIBS= $(TARGET_XLIBS) $(LIBS) $(TARGET_LIBS)
@@ -243,17 +245,29 @@ else
243ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH))) 245ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH)))
244 TARGET_LJARCH= arm 246 TARGET_LJARCH= arm
245else 247else
248ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH)))
249 ifneq (,$(findstring __AARCH64EB__ ,$(TARGET_TESTARCH)))
250 TARGET_ARCH= -D__AARCH64EB__=1
251 endif
252 TARGET_LJARCH= arm64
253else
246ifneq (,$(findstring LJ_TARGET_PPC ,$(TARGET_TESTARCH))) 254ifneq (,$(findstring LJ_TARGET_PPC ,$(TARGET_TESTARCH)))
255 ifneq (,$(findstring LJ_LE 1,$(TARGET_TESTARCH)))
256 TARGET_ARCH= -DLJ_ARCH_ENDIAN=LUAJIT_LE
257 else
258 TARGET_ARCH= -DLJ_ARCH_ENDIAN=LUAJIT_BE
259 endif
247 TARGET_LJARCH= ppc 260 TARGET_LJARCH= ppc
248else 261else
249ifneq (,$(findstring LJ_TARGET_PPCSPE ,$(TARGET_TESTARCH)))
250 TARGET_LJARCH= ppcspe
251else
252ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH))) 262ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH)))
253 ifneq (,$(findstring MIPSEL ,$(TARGET_TESTARCH))) 263 ifneq (,$(findstring MIPSEL ,$(TARGET_TESTARCH)))
254 TARGET_ARCH= -D__MIPSEL__=1 264 TARGET_ARCH= -D__MIPSEL__=1
255 endif 265 endif
256 TARGET_LJARCH= mips 266 ifneq (,$(findstring LJ_TARGET_MIPS64 ,$(TARGET_TESTARCH)))
267 TARGET_LJARCH= mips64
268 else
269 TARGET_LJARCH= mips
270 endif
257else 271else
258 $(error Unsupported target architecture) 272 $(error Unsupported target architecture)
259endif 273endif
@@ -267,6 +281,7 @@ ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH)))
267 TARGET_SYS= PS3 281 TARGET_SYS= PS3
268 TARGET_ARCH+= -D__CELLOS_LV2__ 282 TARGET_ARCH+= -D__CELLOS_LV2__
269 TARGET_XCFLAGS+= -DLUAJIT_USE_SYSMALLOC 283 TARGET_XCFLAGS+= -DLUAJIT_USE_SYSMALLOC
284 TARGET_XLIBS+= -lpthread
270endif 285endif
271 286
272TARGET_XCFLAGS+= $(CCOPT_$(TARGET_LJARCH)) 287TARGET_XCFLAGS+= $(CCOPT_$(TARGET_LJARCH))
@@ -320,6 +335,9 @@ ifeq (iOS,$(TARGET_SYS))
320 TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC 335 TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
321 TARGET_DYNXLDOPTS= 336 TARGET_DYNXLDOPTS=
322 TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER) 337 TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER)
338 ifeq (arm64,$(TARGET_LJARCH))
339 TARGET_XCFLAGS+= -fno-omit-frame-pointer
340 endif
323else 341else
324 ifneq (SunOS,$(TARGET_SYS)) 342 ifneq (SunOS,$(TARGET_SYS))
325 ifneq (PS3,$(TARGET_SYS)) 343 ifneq (PS3,$(TARGET_SYS))
@@ -380,6 +398,11 @@ DASM_XFLAGS=
380DASM_AFLAGS= 398DASM_AFLAGS=
381DASM_ARCH= $(TARGET_LJARCH) 399DASM_ARCH= $(TARGET_LJARCH)
382 400
401ifneq (,$(findstring LJ_LE 1,$(TARGET_TESTARCH)))
402 DASM_AFLAGS+= -D ENDIAN_LE
403else
404 DASM_AFLAGS+= -D ENDIAN_BE
405endif
383ifneq (,$(findstring LJ_ARCH_BITS 64,$(TARGET_TESTARCH))) 406ifneq (,$(findstring LJ_ARCH_BITS 64,$(TARGET_TESTARCH)))
384 DASM_AFLAGS+= -D P64 407 DASM_AFLAGS+= -D P64
385endif 408endif
@@ -412,19 +435,19 @@ DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subs
412ifeq (Windows,$(TARGET_SYS)) 435ifeq (Windows,$(TARGET_SYS))
413 DASM_AFLAGS+= -D WIN 436 DASM_AFLAGS+= -D WIN
414endif 437endif
415ifeq (x86,$(TARGET_LJARCH))
416 ifneq (,$(findstring __SSE2__ 1,$(TARGET_TESTARCH)))
417 DASM_AFLAGS+= -D SSE
418 endif
419else
420ifeq (x64,$(TARGET_LJARCH)) 438ifeq (x64,$(TARGET_LJARCH))
421 DASM_ARCH= x86 439 ifeq (,$(findstring LJ_FR2 1,$(TARGET_TESTARCH)))
440 DASM_ARCH= x86
441 endif
422else 442else
423ifeq (arm,$(TARGET_LJARCH)) 443ifeq (arm,$(TARGET_LJARCH))
424 ifeq (iOS,$(TARGET_SYS)) 444 ifeq (iOS,$(TARGET_SYS))
425 DASM_AFLAGS+= -D IOS 445 DASM_AFLAGS+= -D IOS
426 endif 446 endif
427else 447else
448ifneq (,$(findstring LJ_TARGET_MIPSR6 ,$(TARGET_TESTARCH)))
449 DASM_AFLAGS+= -D MIPSR6
450endif
428ifeq (ppc,$(TARGET_LJARCH)) 451ifeq (ppc,$(TARGET_LJARCH))
429 ifneq (,$(findstring LJ_ARCH_SQRT 1,$(TARGET_TESTARCH))) 452 ifneq (,$(findstring LJ_ARCH_SQRT 1,$(TARGET_TESTARCH)))
430 DASM_AFLAGS+= -D SQRT 453 DASM_AFLAGS+= -D SQRT
@@ -432,7 +455,7 @@ ifeq (ppc,$(TARGET_LJARCH))
432 ifneq (,$(findstring LJ_ARCH_ROUND 1,$(TARGET_TESTARCH))) 455 ifneq (,$(findstring LJ_ARCH_ROUND 1,$(TARGET_TESTARCH)))
433 DASM_AFLAGS+= -D ROUND 456 DASM_AFLAGS+= -D ROUND
434 endif 457 endif
435 ifneq (,$(findstring LJ_ARCH_PPC64 1,$(TARGET_TESTARCH))) 458 ifneq (,$(findstring LJ_ARCH_PPC32ON64 1,$(TARGET_TESTARCH)))
436 DASM_AFLAGS+= -D GPR64 459 DASM_AFLAGS+= -D GPR64
437 endif 460 endif
438 ifeq (PS3,$(TARGET_SYS)) 461 ifeq (PS3,$(TARGET_SYS))
@@ -441,7 +464,6 @@ ifeq (ppc,$(TARGET_LJARCH))
441endif 464endif
442endif 465endif
443endif 466endif
444endif
445 467
446DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS) 468DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS)
447DASM_DASC= vm_$(DASM_ARCH).dasc 469DASM_DASC= vm_$(DASM_ARCH).dasc
@@ -454,7 +476,7 @@ BUILDVM_X= $(BUILDVM_T)
454HOST_O= $(MINILUA_O) $(BUILDVM_O) 476HOST_O= $(MINILUA_O) $(BUILDVM_O)
455HOST_T= $(MINILUA_T) $(BUILDVM_T) 477HOST_T= $(MINILUA_T) $(BUILDVM_T)
456 478
457LJVM_S= lj_vm.s 479LJVM_S= lj_vm.S
458LJVM_O= lj_vm.o 480LJVM_O= lj_vm.o
459LJVM_BOUT= $(LJVM_S) 481LJVM_BOUT= $(LJVM_S)
460LJVM_MODE= elfasm 482LJVM_MODE= elfasm
@@ -463,10 +485,11 @@ LJLIB_O= lib_base.o lib_math.o lib_bit.o lib_string.o lib_table.o \
463 lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o 485 lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o
464LJLIB_C= $(LJLIB_O:.o=.c) 486LJLIB_C= $(LJLIB_O:.o=.c)
465 487
466LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o \ 488LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \
467 lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \ 489 lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \
468 lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \ 490 lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \
469 lj_api.o lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \ 491 lj_strfmt.o lj_strfmt_num.o lj_api.o lj_profile.o \
492 lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \
470 lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \ 493 lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \
471 lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \ 494 lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \
472 lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \ 495 lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \
@@ -586,6 +609,10 @@ amalg:
586clean: 609clean:
587 $(HOST_RM) $(ALL_RM) 610 $(HOST_RM) $(ALL_RM)
588 611
612libbc:
613 ./$(LUAJIT_T) host/genlibbc.lua -o host/buildvm_libbc.h $(LJLIB_C)
614 $(MAKE) all
615
589depend: 616depend:
590 @for file in $(ALL_HDRGEN); do \ 617 @for file in $(ALL_HDRGEN); do \
591 test -f $$file || touch $$file; \ 618 test -f $$file || touch $$file; \
@@ -600,7 +627,7 @@ depend:
600 test -s $$file || $(HOST_RM) $$file; \ 627 test -s $$file || $(HOST_RM) $$file; \
601 done 628 done
602 629
603.PHONY: default all amalg clean depend 630.PHONY: default all amalg clean libbc depend
604 631
605############################################################################## 632##############################################################################
606# Rules for generated files. 633# Rules for generated files.
@@ -610,7 +637,7 @@ $(MINILUA_T): $(MINILUA_O)
610 $(E) "HOSTLINK $@" 637 $(E) "HOSTLINK $@"
611 $(Q)$(HOST_CC) $(HOST_ALDFLAGS) -o $@ $(MINILUA_O) $(MINILUA_LIBS) $(HOST_ALIBS) 638 $(Q)$(HOST_CC) $(HOST_ALDFLAGS) -o $@ $(MINILUA_O) $(MINILUA_LIBS) $(HOST_ALIBS)
612 639
613host/buildvm_arch.h: $(DASM_DASC) $(DASM_DEP) 640host/buildvm_arch.h: $(DASM_DASC) $(DASM_DEP) $(DASM_DIR)/*.lua
614 $(E) "DYNASM $@" 641 $(E) "DYNASM $@"
615 $(Q)$(DASM) $(DASM_FLAGS) -o $@ $(DASM_DASC) 642 $(Q)$(DASM) $(DASM_FLAGS) -o $@ $(DASM_DASC)
616 643
@@ -657,10 +684,10 @@ lj_folddef.h: $(BUILDVM_T) lj_opt_fold.c
657 $(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $< 684 $(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $<
658 $(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $< 685 $(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $<
659 686
660%.o: %.s 687%.o: %.S
661 $(E) "ASM $@" 688 $(E) "ASM $@"
662 $(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $< 689 $(Q)$(TARGET_DYNCC) $(TARGET_ASFLAGS) -c -o $(@:.o=_dyn.o) $<
663 $(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $< 690 $(Q)$(TARGET_CC) $(TARGET_ASFLAGS) -c -o $@ $<
664 691
665$(LUAJIT_O): 692$(LUAJIT_O):
666 $(E) "CC $@" 693 $(E) "CC $@"
diff --git a/src/Makefile.dep b/src/Makefile.dep
index 9e14d617..2b1cb5ef 100644
--- a/src/Makefile.dep
+++ b/src/Makefile.dep
@@ -3,45 +3,49 @@ lib_aux.o: lib_aux.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \
3 lj_dispatch.h lj_bc.h lj_traceerr.h lj_lib.h lj_alloc.h 3 lj_dispatch.h lj_bc.h lj_traceerr.h lj_lib.h lj_alloc.h
4lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ 4lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
5 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h \ 5 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h \
6 lj_tab.h lj_meta.h lj_state.h lj_ctype.h lj_cconv.h lj_bc.h lj_ff.h \ 6 lj_tab.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cconv.h \
7 lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \ 7 lj_ff.h lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \
8 lj_lib.h lj_libdef.h 8 lj_strfmt.h lj_lib.h lj_libdef.h
9lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ 9lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
10 lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_lib.h lj_libdef.h 10 lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_strscan.h \
11 lj_strfmt.h lj_ctype.h lj_cdata.h lj_cconv.h lj_carith.h lj_ff.h \
12 lj_ffdef.h lj_lib.h lj_libdef.h
11lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ 13lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
12 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_lib.h \ 14 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_lib.h \
13 lj_libdef.h 15 lj_libdef.h
14lib_ffi.o: lib_ffi.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ 16lib_ffi.o: lib_ffi.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
15 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h \ 17 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h \
16 lj_ctype.h lj_cparse.h lj_cdata.h lj_cconv.h lj_carith.h lj_ccall.h \ 18 lj_ctype.h lj_cparse.h lj_cdata.h lj_cconv.h lj_carith.h lj_ccall.h \
17 lj_ccallback.h lj_clib.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h 19 lj_ccallback.h lj_clib.h lj_strfmt.h lj_ff.h lj_ffdef.h lj_lib.h \
20 lj_libdef.h
18lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h 21lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h
19lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ 22lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
20 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_state.h lj_ff.h \ 23 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_state.h \
21 lj_ffdef.h lj_lib.h lj_libdef.h 24 lj_strfmt.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h
22lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h \ 25lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
23 lj_obj.h lj_def.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h \ 26 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h \
24 lj_bc.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_target.h \ 27 lj_state.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \
25 lj_target_*.h lj_dispatch.h lj_vm.h lj_vmevent.h lj_lib.h luajit.h \ 28 lj_target.h lj_target_*.h lj_trace.h lj_dispatch.h lj_traceerr.h \
26 lj_libdef.h 29 lj_vm.h lj_vmevent.h lj_lib.h luajit.h lj_libdef.h
27lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ 30lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
28 lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_libdef.h 31 lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_libdef.h
29lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ 32lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
30 lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_libdef.h 33 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_lib.h \
34 lj_libdef.h
31lib_package.o: lib_package.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ 35lib_package.o: lib_package.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
32 lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h 36 lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h
33lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ 37lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
34 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h \ 38 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \
35 lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_bcdump.h lj_lex.h lj_char.h \ 39 lj_tab.h lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_bcdump.h lj_lex.h \
36 lj_lib.h lj_libdef.h 40 lj_char.h lj_strfmt.h lj_lib.h lj_libdef.h
37lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ 41lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
38 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_lib.h \ 42 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \
39 lj_libdef.h 43 lj_tab.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h
40lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h 44lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h
41lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 45lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
42 lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h lj_udata.h \ 46 lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h lj_udata.h \
43 lj_meta.h lj_state.h lj_bc.h lj_frame.h lj_trace.h lj_jit.h lj_ir.h \ 47 lj_meta.h lj_state.h lj_bc.h lj_frame.h lj_trace.h lj_jit.h lj_ir.h \
44 lj_dispatch.h lj_traceerr.h lj_vm.h lj_strscan.h 48 lj_dispatch.h lj_traceerr.h lj_vm.h lj_strscan.h lj_strfmt.h
45lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 49lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
46 lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h \ 50 lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h \
47 lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h lj_traceerr.h \ 51 lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h lj_traceerr.h \
@@ -50,17 +54,20 @@ lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
50lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \ 54lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \
51 lj_bcdef.h 55 lj_bcdef.h
52lj_bcread.o: lj_bcread.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 56lj_bcread.o: lj_bcread.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
53 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_bc.h lj_ctype.h \ 57 lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_bc.h \
54 lj_cdata.h lualib.h lj_lex.h lj_bcdump.h lj_state.h 58 lj_ctype.h lj_cdata.h lualib.h lj_lex.h lj_bcdump.h lj_state.h \
59 lj_strfmt.h
55lj_bcwrite.o: lj_bcwrite.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 60lj_bcwrite.o: lj_bcwrite.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
56 lj_gc.h lj_str.h lj_bc.h lj_ctype.h lj_dispatch.h lj_jit.h lj_ir.h \ 61 lj_gc.h lj_buf.h lj_str.h lj_bc.h lj_ctype.h lj_dispatch.h lj_jit.h \
57 lj_bcdump.h lj_lex.h lj_err.h lj_errmsg.h lj_vm.h 62 lj_ir.h lj_strfmt.h lj_bcdump.h lj_lex.h lj_err.h lj_errmsg.h lj_vm.h
63lj_buf.o: lj_buf.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
64 lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_strfmt.h
58lj_carith.o: lj_carith.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 65lj_carith.o: lj_carith.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
59 lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_meta.h lj_ctype.h lj_cconv.h \ 66 lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_meta.h lj_ir.h lj_ctype.h \
60 lj_cdata.h lj_carith.h 67 lj_cconv.h lj_cdata.h lj_carith.h lj_strscan.h
61lj_ccall.o: lj_ccall.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 68lj_ccall.o: lj_ccall.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
62 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cconv.h \ 69 lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_cconv.h lj_cdata.h \
63 lj_cdata.h lj_ccall.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \ 70 lj_ccall.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
64 lj_traceerr.h 71 lj_traceerr.h
65lj_ccallback.o: lj_ccallback.c lj_obj.h lua.h luaconf.h lj_def.h \ 72lj_ccallback.o: lj_ccallback.c lj_obj.h lua.h luaconf.h lj_def.h \
66 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_state.h lj_frame.h \ 73 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_state.h lj_frame.h \
@@ -71,107 +78,118 @@ lj_cconv.o: lj_cconv.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
71 lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_gc.h lj_cdata.h lj_cconv.h \ 78 lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_gc.h lj_cdata.h lj_cconv.h \
72 lj_ccallback.h 79 lj_ccallback.h
73lj_cdata.o: lj_cdata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 80lj_cdata.o: lj_cdata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
74 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cconv.h \ 81 lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_cconv.h lj_cdata.h
75 lj_cdata.h
76lj_char.o: lj_char.c lj_char.h lj_def.h lua.h luaconf.h 82lj_char.o: lj_char.c lj_char.h lj_def.h lua.h luaconf.h
77lj_clib.o: lj_clib.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 83lj_clib.o: lj_clib.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
78 lj_err.h lj_errmsg.h lj_tab.h lj_str.h lj_udata.h lj_ctype.h lj_cconv.h \ 84 lj_err.h lj_errmsg.h lj_tab.h lj_str.h lj_udata.h lj_ctype.h lj_cconv.h \
79 lj_cdata.h lj_clib.h 85 lj_cdata.h lj_clib.h lj_strfmt.h
80lj_cparse.o: lj_cparse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 86lj_cparse.o: lj_cparse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
81 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_ctype.h lj_cparse.h lj_frame.h \ 87 lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_ctype.h lj_cparse.h \
82 lj_bc.h lj_vm.h lj_char.h lj_strscan.h 88 lj_frame.h lj_bc.h lj_vm.h lj_char.h lj_strscan.h lj_strfmt.h
83lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 89lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
84 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h \ 90 lj_err.h lj_errmsg.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_gc.h \
85 lj_gc.h lj_cdata.h lj_cparse.h lj_cconv.h lj_clib.h lj_ccall.h lj_ff.h \ 91 lj_cdata.h lj_cparse.h lj_cconv.h lj_carith.h lj_clib.h lj_ccall.h \
86 lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ 92 lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
87 lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h \ 93 lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h \
88 lj_crecord.h 94 lj_crecord.h lj_strfmt.h
89lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 95lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
90 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_ccallback.h 96 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_strfmt.h lj_ctype.h \
97 lj_ccallback.h lj_buf.h
91lj_debug.o: lj_debug.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 98lj_debug.o: lj_debug.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
92 lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_state.h lj_frame.h \ 99 lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_gc.h lj_str.h lj_tab.h \
93 lj_bc.h lj_vm.h lj_jit.h lj_ir.h 100 lj_state.h lj_frame.h lj_bc.h lj_strfmt.h lj_jit.h lj_ir.h
94lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 101lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
95 lj_err.h lj_errmsg.h lj_func.h lj_str.h lj_tab.h lj_meta.h lj_debug.h \ 102 lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_func.h lj_tab.h \
96 lj_state.h lj_frame.h lj_bc.h lj_ff.h lj_ffdef.h lj_jit.h lj_ir.h \ 103 lj_meta.h lj_debug.h lj_state.h lj_frame.h lj_bc.h lj_ff.h lj_ffdef.h \
97 lj_ccallback.h lj_ctype.h lj_gc.h lj_trace.h lj_dispatch.h lj_traceerr.h \ 104 lj_strfmt.h lj_jit.h lj_ir.h lj_ccallback.h lj_ctype.h lj_trace.h \
98 lj_vm.h luajit.h 105 lj_dispatch.h lj_traceerr.h lj_profile.h lj_vm.h luajit.h
99lj_err.o: lj_err.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_err.h \ 106lj_err.o: lj_err.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_err.h \
100 lj_errmsg.h lj_debug.h lj_str.h lj_func.h lj_state.h lj_frame.h lj_bc.h \ 107 lj_errmsg.h lj_debug.h lj_str.h lj_func.h lj_state.h lj_frame.h lj_bc.h \
101 lj_ff.h lj_ffdef.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \ 108 lj_ff.h lj_ffdef.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
102 lj_traceerr.h lj_vm.h 109 lj_traceerr.h lj_vm.h lj_strfmt.h
103lj_ffrecord.o: lj_ffrecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 110lj_ffrecord.o: lj_ffrecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
104 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ff.h \ 111 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ff.h \
105 lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ 112 lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
106 lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_crecord.h \ 113 lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_crecord.h \
107 lj_vm.h lj_strscan.h lj_recdef.h 114 lj_vm.h lj_strscan.h lj_strfmt.h lj_recdef.h
108lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 115lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
109 lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \ 116 lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
110 lj_traceerr.h lj_vm.h 117 lj_traceerr.h lj_vm.h
111lj_gc.o: lj_gc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 118lj_gc.o: lj_gc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
112 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_udata.h lj_meta.h \ 119 lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h lj_udata.h \
113 lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h lj_jit.h \ 120 lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h \
114 lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h 121 lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h
115lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 122lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
116 lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_frame.h lj_bc.h lj_jit.h \ 123 lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_frame.h lj_bc.h lj_buf.h \
117 lj_ir.h lj_dispatch.h 124 lj_str.h lj_strfmt.h lj_jit.h lj_ir.h lj_dispatch.h
118lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 125lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
119 lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ 126 lj_buf.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \
120 lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h lj_carith.h \ 127 lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h \
121 lj_vm.h lj_strscan.h lj_lib.h 128 lj_carith.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h
122lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 129lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
123 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h lualib.h \ 130 lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h \
124 lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h 131 lualib.h lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h \
132 lj_strfmt.h
125lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \ 133lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \
126 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \ 134 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \
127 lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_strscan.h lj_lib.h 135 lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lex.h \
136 lj_bcdump.h lj_lib.h
128lj_load.o: lj_load.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \ 137lj_load.o: lj_load.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \
129 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_func.h lj_frame.h \ 138 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_func.h \
130 lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h 139 lj_frame.h lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h
131lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 140lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
132 lj_gc.h lj_err.h lj_errmsg.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h \ 141 lj_gc.h lj_err.h lj_errmsg.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h \
133 lj_dispatch.h lj_bc.h lj_traceerr.h lj_vm.h 142 lj_dispatch.h lj_bc.h lj_traceerr.h lj_vm.h
134lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 143lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
135 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \ 144 lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_meta.h lj_frame.h \
136 lj_vm.h lj_strscan.h 145 lj_bc.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h
137lj_obj.o: lj_obj.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h 146lj_obj.o: lj_obj.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h
138lj_opt_dce.o: lj_opt_dce.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 147lj_opt_dce.o: lj_opt_dce.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
139 lj_ir.h lj_jit.h lj_iropt.h 148 lj_ir.h lj_jit.h lj_iropt.h
140lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 149lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
141 lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \ 150 lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h \
142 lj_bc.h lj_traceerr.h lj_ctype.h lj_gc.h lj_carith.h lj_vm.h \ 151 lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h \
143 lj_strscan.h lj_folddef.h 152 lj_carith.h lj_vm.h lj_strscan.h lj_strfmt.h lj_folddef.h
144lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 153lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
145 lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ 154 lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h lj_jit.h \
146 lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_vm.h 155 lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h \
156 lj_vm.h
147lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 157lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
148 lj_tab.h lj_ir.h lj_jit.h lj_iropt.h 158 lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_ircall.h
149lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ 159lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \
150 lj_arch.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \ 160 lj_arch.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \
151 lj_traceerr.h lj_vm.h lj_strscan.h 161 lj_traceerr.h lj_vm.h lj_strscan.h
152lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 162lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
153 lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h 163 lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h
154lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \ 164lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \
155 lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_ircall.h \ 165 lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h \
156 lj_iropt.h lj_vm.h 166 lj_jit.h lj_ircall.h lj_iropt.h lj_dispatch.h lj_bc.h lj_vm.h
157lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 167lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
158 lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h \ 168 lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_str.h lj_tab.h \
159 lj_state.h lj_bc.h lj_ctype.h lj_lex.h lj_parse.h lj_vm.h lj_vmevent.h 169 lj_func.h lj_state.h lj_bc.h lj_ctype.h lj_strfmt.h lj_lex.h lj_parse.h \
170 lj_vm.h lj_vmevent.h
171lj_profile.o: lj_profile.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
172 lj_buf.h lj_gc.h lj_str.h lj_frame.h lj_bc.h lj_debug.h lj_dispatch.h \
173 lj_jit.h lj_ir.h lj_trace.h lj_traceerr.h lj_profile.h luajit.h
160lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 174lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
161 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \ 175 lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \
162 lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h \ 176 lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_debug.h lj_ir.h lj_jit.h \
163 lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h lj_record.h \ 177 lj_ircall.h lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h \
164 lj_ffrecord.h lj_snap.h lj_vm.h 178 lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h
165lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 179lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
166 lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \ 180 lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \
167 lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \ 181 lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \
168 lj_target_*.h lj_ctype.h lj_cdata.h 182 lj_target_*.h lj_ctype.h lj_cdata.h
169lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 183lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
170 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_meta.h \ 184 lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h \
171 lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h lj_ir.h \ 185 lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h \
172 lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h 186 lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h luajit.h
173lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 187lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
174 lj_err.h lj_errmsg.h lj_str.h lj_state.h lj_char.h 188 lj_err.h lj_errmsg.h lj_str.h lj_char.h
189lj_strfmt.o: lj_strfmt.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
190 lj_buf.h lj_gc.h lj_str.h lj_state.h lj_char.h lj_strfmt.h
191lj_strfmt_num.o: lj_strfmt_num.c lj_obj.h lua.h luaconf.h lj_def.h \
192 lj_arch.h lj_buf.h lj_gc.h lj_str.h lj_strfmt.h
175lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 193lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
176 lj_char.h lj_strscan.h 194 lj_char.h lj_strscan.h
177lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 195lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
@@ -189,26 +207,27 @@ lj_vmevent.o: lj_vmevent.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
189lj_vmmath.o: lj_vmmath.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 207lj_vmmath.o: lj_vmmath.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
190 lj_ir.h lj_vm.h 208 lj_ir.h lj_vm.h
191ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \ 209ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \
192 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h \ 210 lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h \
193 lj_udata.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h \ 211 lj_func.h lj_udata.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h \
194 lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_err.c \ 212 lj_cdata.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h \
195 lj_debug.h lj_ff.h lj_ffdef.h lj_char.c lj_char.h lj_bc.c lj_bcdef.h \ 213 lj_vm.h lj_err.c lj_debug.h lj_ff.h lj_ffdef.h lj_strfmt.h lj_char.c \
196 lj_obj.c lj_str.c lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_strscan.h \ 214 lj_char.h lj_bc.c lj_bcdef.h lj_obj.c lj_buf.c lj_str.c lj_tab.c \
197 lj_debug.c lj_state.c lj_lex.h lj_alloc.h lj_dispatch.c lj_ccallback.h \ 215 lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h lj_debug.c \
198 luajit.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c lj_api.c \ 216 lj_state.c lj_lex.h lj_alloc.h luajit.h lj_dispatch.c lj_ccallback.h \
199 lj_lex.c lualib.h lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h \ 217 lj_profile.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c \
200 lj_bcwrite.c lj_load.c lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c \ 218 lj_strfmt.c lj_strfmt_num.c lj_api.c lj_profile.c lj_lex.c lualib.h \
201 lj_ccall.c lj_ccall.h lj_ccallback.c lj_target.h lj_target_*.h \ 219 lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c \
202 lj_mcode.h lj_carith.c lj_carith.h lj_clib.c lj_clib.h lj_cparse.c \ 220 lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h \
203 lj_cparse.h lj_lib.c lj_lib.h lj_ir.c lj_ircall.h lj_iropt.h \ 221 lj_ccallback.c lj_target.h lj_target_*.h lj_mcode.h lj_carith.c \
204 lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c lj_opt_dce.c \ 222 lj_carith.h lj_clib.c lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c \
205 lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c lj_mcode.c \ 223 lj_ircall.h lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h \
206 lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \ 224 lj_opt_narrow.c lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c \
207 lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \ 225 lj_opt_sink.c lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h \
208 lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \ 226 lj_crecord.c lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h \
209 lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \ 227 lj_emit_*.h lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c \
210 lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c \ 228 lib_aux.c lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c \
211 lib_init.c 229 lib_io.c lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c \
230 lib_ffi.c lib_init.c
212luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h 231luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h
213host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \ 232host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \
214 lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \ 233 lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \
@@ -220,7 +239,8 @@ host/buildvm_asm.o: host/buildvm_asm.c host/buildvm.h lj_def.h lua.h luaconf.h \
220host/buildvm_fold.o: host/buildvm_fold.c host/buildvm.h lj_def.h lua.h \ 239host/buildvm_fold.o: host/buildvm_fold.c host/buildvm.h lj_def.h lua.h \
221 luaconf.h lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_ir.h lj_obj.h 240 luaconf.h lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_ir.h lj_obj.h
222host/buildvm_lib.o: host/buildvm_lib.c host/buildvm.h lj_def.h lua.h luaconf.h \ 241host/buildvm_lib.o: host/buildvm_lib.c host/buildvm.h lj_def.h lua.h luaconf.h \
223 lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_lib.h lj_obj.h 242 lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_bc.h lj_lib.h lj_obj.h \
243 host/buildvm_libbc.h
224host/buildvm_peobj.o: host/buildvm_peobj.c host/buildvm.h lj_def.h lua.h \ 244host/buildvm_peobj.o: host/buildvm_peobj.c host/buildvm.h lj_def.h lua.h \
225 luaconf.h lj_arch.h lj_bc.h lj_def.h lj_arch.h 245 luaconf.h lj_arch.h lj_bc.h lj_def.h lj_arch.h
226host/minilua.o: host/minilua.c 246host/minilua.o: host/minilua.c
diff --git a/src/host/buildvm.c b/src/host/buildvm.c
index 05e0dbdb..27e14d57 100644
--- a/src/host/buildvm.c
+++ b/src/host/buildvm.c
@@ -59,10 +59,10 @@ static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type);
59#include "../dynasm/dasm_x86.h" 59#include "../dynasm/dasm_x86.h"
60#elif LJ_TARGET_ARM 60#elif LJ_TARGET_ARM
61#include "../dynasm/dasm_arm.h" 61#include "../dynasm/dasm_arm.h"
62#elif LJ_TARGET_ARM64
63#include "../dynasm/dasm_arm64.h"
62#elif LJ_TARGET_PPC 64#elif LJ_TARGET_PPC
63#include "../dynasm/dasm_ppc.h" 65#include "../dynasm/dasm_ppc.h"
64#elif LJ_TARGET_PPCSPE
65#include "../dynasm/dasm_ppc.h"
66#elif LJ_TARGET_MIPS 66#elif LJ_TARGET_MIPS
67#include "../dynasm/dasm_mips.h" 67#include "../dynasm/dasm_mips.h"
68#else 68#else
@@ -110,11 +110,11 @@ static const char *sym_decorate(BuildCtx *ctx,
110 if (p) { 110 if (p) {
111#if LJ_TARGET_X86ORX64 111#if LJ_TARGET_X86ORX64
112 if (!LJ_64 && (ctx->mode == BUILD_coffasm || ctx->mode == BUILD_peobj)) 112 if (!LJ_64 && (ctx->mode == BUILD_coffasm || ctx->mode == BUILD_peobj))
113 name[0] = '@'; 113 name[0] = name[1] == 'R' ? '_' : '@'; /* Just for _RtlUnwind@16. */
114 else 114 else
115 *p = '\0'; 115 *p = '\0';
116#elif (LJ_TARGET_PPC || LJ_TARGET_PPCSPE) && !LJ_TARGET_CONSOLE 116#elif LJ_TARGET_PPC && !LJ_TARGET_CONSOLE
117 /* Keep @plt. */ 117 /* Keep @plt etc. */
118#else 118#else
119 *p = '\0'; 119 *p = '\0';
120#endif 120#endif
@@ -179,6 +179,7 @@ static int build_code(BuildCtx *ctx)
179 ctx->nreloc = 0; 179 ctx->nreloc = 0;
180 180
181 ctx->globnames = globnames; 181 ctx->globnames = globnames;
182 ctx->extnames = extnames;
182 ctx->relocsym = (const char **)malloc(NRELOCSYM*sizeof(const char *)); 183 ctx->relocsym = (const char **)malloc(NRELOCSYM*sizeof(const char *));
183 ctx->nrelocsym = 0; 184 ctx->nrelocsym = 0;
184 for (i = 0; i < (int)NRELOCSYM; i++) relocmap[i] = -1; 185 for (i = 0; i < (int)NRELOCSYM; i++) relocmap[i] = -1;
@@ -320,20 +321,20 @@ static void emit_vmdef(BuildCtx *ctx)
320 char buf[80]; 321 char buf[80];
321 int i; 322 int i;
322 fprintf(ctx->fp, "-- This is a generated file. DO NOT EDIT!\n\n"); 323 fprintf(ctx->fp, "-- This is a generated file. DO NOT EDIT!\n\n");
323 fprintf(ctx->fp, "module(...)\n\n"); 324 fprintf(ctx->fp, "return {\n\n");
324 325
325 fprintf(ctx->fp, "bcnames = \""); 326 fprintf(ctx->fp, "bcnames = \"");
326 for (i = 0; bc_names[i]; i++) fprintf(ctx->fp, "%-6s", bc_names[i]); 327 for (i = 0; bc_names[i]; i++) fprintf(ctx->fp, "%-6s", bc_names[i]);
327 fprintf(ctx->fp, "\"\n\n"); 328 fprintf(ctx->fp, "\",\n\n");
328 329
329 fprintf(ctx->fp, "irnames = \""); 330 fprintf(ctx->fp, "irnames = \"");
330 for (i = 0; ir_names[i]; i++) fprintf(ctx->fp, "%-6s", ir_names[i]); 331 for (i = 0; ir_names[i]; i++) fprintf(ctx->fp, "%-6s", ir_names[i]);
331 fprintf(ctx->fp, "\"\n\n"); 332 fprintf(ctx->fp, "\",\n\n");
332 333
333 fprintf(ctx->fp, "irfpm = { [0]="); 334 fprintf(ctx->fp, "irfpm = { [0]=");
334 for (i = 0; irfpm_names[i]; i++) 335 for (i = 0; irfpm_names[i]; i++)
335 fprintf(ctx->fp, "\"%s\", ", lower(buf, irfpm_names[i])); 336 fprintf(ctx->fp, "\"%s\", ", lower(buf, irfpm_names[i]));
336 fprintf(ctx->fp, "}\n\n"); 337 fprintf(ctx->fp, "},\n\n");
337 338
338 fprintf(ctx->fp, "irfield = { [0]="); 339 fprintf(ctx->fp, "irfield = { [0]=");
339 for (i = 0; irfield_names[i]; i++) { 340 for (i = 0; irfield_names[i]; i++) {
@@ -343,17 +344,17 @@ static void emit_vmdef(BuildCtx *ctx)
343 if (p) *p = '.'; 344 if (p) *p = '.';
344 fprintf(ctx->fp, "\"%s\", ", buf); 345 fprintf(ctx->fp, "\"%s\", ", buf);
345 } 346 }
346 fprintf(ctx->fp, "}\n\n"); 347 fprintf(ctx->fp, "},\n\n");
347 348
348 fprintf(ctx->fp, "ircall = {\n[0]="); 349 fprintf(ctx->fp, "ircall = {\n[0]=");
349 for (i = 0; ircall_names[i]; i++) 350 for (i = 0; ircall_names[i]; i++)
350 fprintf(ctx->fp, "\"%s\",\n", ircall_names[i]); 351 fprintf(ctx->fp, "\"%s\",\n", ircall_names[i]);
351 fprintf(ctx->fp, "}\n\n"); 352 fprintf(ctx->fp, "},\n\n");
352 353
353 fprintf(ctx->fp, "traceerr = {\n[0]="); 354 fprintf(ctx->fp, "traceerr = {\n[0]=");
354 for (i = 0; trace_errors[i]; i++) 355 for (i = 0; trace_errors[i]; i++)
355 fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]); 356 fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]);
356 fprintf(ctx->fp, "}\n\n"); 357 fprintf(ctx->fp, "},\n\n");
357} 358}
358 359
359/* -- Argument parsing ---------------------------------------------------- */ 360/* -- Argument parsing ---------------------------------------------------- */
@@ -490,6 +491,7 @@ int main(int argc, char **argv)
490 case BUILD_vmdef: 491 case BUILD_vmdef:
491 emit_vmdef(ctx); 492 emit_vmdef(ctx);
492 emit_lib(ctx); 493 emit_lib(ctx);
494 fprintf(ctx->fp, "}\n\n");
493 break; 495 break;
494 case BUILD_ffdef: 496 case BUILD_ffdef:
495 case BUILD_libdef: 497 case BUILD_libdef:
diff --git a/src/host/buildvm.h b/src/host/buildvm.h
index a440cfc3..3fdff65b 100644
--- a/src/host/buildvm.h
+++ b/src/host/buildvm.h
@@ -82,6 +82,7 @@ typedef struct BuildCtx {
82 const char *beginsym; 82 const char *beginsym;
83 /* Strings generated by DynASM. */ 83 /* Strings generated by DynASM. */
84 const char *const *globnames; 84 const char *const *globnames;
85 const char *const *extnames;
85 const char *dasm_ident; 86 const char *dasm_ident;
86 const char *dasm_arch; 87 const char *dasm_arch;
87 /* Relocations. */ 88 /* Relocations. */
diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c
index 2cb7d451..b9cfa049 100644
--- a/src/host/buildvm_asm.c
+++ b/src/host/buildvm_asm.c
@@ -51,8 +51,8 @@ static const char *const jccnames[] = {
51 "js", "jns", "jpe", "jpo", "jl", "jge", "jle", "jg" 51 "js", "jns", "jpe", "jpo", "jl", "jge", "jle", "jg"
52}; 52};
53 53
54/* Emit relocation for the incredibly stupid OSX assembler. */ 54/* Emit x86/x64 text relocations. */
55static void emit_asm_reloc_mach(BuildCtx *ctx, uint8_t *cp, int n, 55static void emit_asm_reloc_text(BuildCtx *ctx, uint8_t *cp, int n,
56 const char *sym) 56 const char *sym)
57{ 57{
58 const char *opname = NULL; 58 const char *opname = NULL;
@@ -71,6 +71,20 @@ err:
71 exit(1); 71 exit(1);
72 } 72 }
73 emit_asm_bytes(ctx, cp, n); 73 emit_asm_bytes(ctx, cp, n);
74 if (strncmp(sym+(*sym == '_'), LABEL_PREFIX, sizeof(LABEL_PREFIX)-1)) {
75 /* Various fixups for external symbols outside of our binary. */
76 if (ctx->mode == BUILD_elfasm) {
77 if (LJ_32)
78 fprintf(ctx->fp, "#if __PIC__\n\t%s lj_wrap_%s\n#else\n", opname, sym);
79 fprintf(ctx->fp, "\t%s %s@PLT\n", opname, sym);
80 if (LJ_32)
81 fprintf(ctx->fp, "#endif\n");
82 return;
83 } else if (LJ_32 && ctx->mode == BUILD_machasm) {
84 fprintf(ctx->fp, "\t%s L%s$stub\n", opname, sym);
85 return;
86 }
87 }
74 fprintf(ctx->fp, "\t%s %s\n", opname, sym); 88 fprintf(ctx->fp, "\t%s %s\n", opname, sym);
75} 89}
76#else 90#else
@@ -79,10 +93,14 @@ static void emit_asm_words(BuildCtx *ctx, uint8_t *p, int n)
79{ 93{
80 int i; 94 int i;
81 for (i = 0; i < n; i += 4) { 95 for (i = 0; i < n; i += 4) {
96 uint32_t ins = *(uint32_t *)(p+i);
97#if LJ_TARGET_ARM64 && LJ_BE
98 ins = lj_bswap(ins); /* ARM64 instructions are always little-endian. */
99#endif
82 if ((i & 15) == 0) 100 if ((i & 15) == 0)
83 fprintf(ctx->fp, "\t.long 0x%08x", *(uint32_t *)(p+i)); 101 fprintf(ctx->fp, "\t.long 0x%08x", ins);
84 else 102 else
85 fprintf(ctx->fp, ",0x%08x", *(uint32_t *)(p+i)); 103 fprintf(ctx->fp, ",0x%08x", ins);
86 if ((i & 15) == 12) putc('\n', ctx->fp); 104 if ((i & 15) == 12) putc('\n', ctx->fp);
87 } 105 }
88 if ((n & 15) != 0) putc('\n', ctx->fp); 106 if ((n & 15) != 0) putc('\n', ctx->fp);
@@ -107,7 +125,16 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n,
107 ins, sym); 125 ins, sym);
108 exit(1); 126 exit(1);
109 } 127 }
110#elif LJ_TARGET_PPC || LJ_TARGET_PPCSPE 128#elif LJ_TARGET_ARM64
129 if ((ins >> 26) == 0x25u) {
130 fprintf(ctx->fp, "\tbl %s\n", sym);
131 } else {
132 fprintf(stderr,
133 "Error: unsupported opcode %08x for %s symbol relocation.\n",
134 ins, sym);
135 exit(1);
136 }
137#elif LJ_TARGET_PPC
111#if LJ_TARGET_PS3 138#if LJ_TARGET_PS3
112#define TOCPREFIX "." 139#define TOCPREFIX "."
113#else 140#else
@@ -228,11 +255,20 @@ void emit_asm(BuildCtx *ctx)
228 255
229#if LJ_TARGET_ARM && defined(__GNUC__) && !LJ_NO_UNWIND 256#if LJ_TARGET_ARM && defined(__GNUC__) && !LJ_NO_UNWIND
230 /* This should really be moved into buildvm_arm.dasc. */ 257 /* This should really be moved into buildvm_arm.dasc. */
258#if LJ_ARCH_HASFPU
259 fprintf(ctx->fp,
260 ".fnstart\n"
261 ".save {r5, r6, r7, r8, r9, r10, r11, lr}\n"
262 ".vsave {d8-d15}\n"
263 ".save {r4}\n"
264 ".pad #28\n");
265#else
231 fprintf(ctx->fp, 266 fprintf(ctx->fp,
232 ".fnstart\n" 267 ".fnstart\n"
233 ".save {r4, r5, r6, r7, r8, r9, r10, r11, lr}\n" 268 ".save {r4, r5, r6, r7, r8, r9, r10, r11, lr}\n"
234 ".pad #28\n"); 269 ".pad #28\n");
235#endif 270#endif
271#endif
236#if LJ_TARGET_MIPS 272#if LJ_TARGET_MIPS
237 fprintf(ctx->fp, ".set nomips16\n.abicalls\n.set noreorder\n.set nomacro\n"); 273 fprintf(ctx->fp, ".set nomips16\n.abicalls\n.set noreorder\n.set nomacro\n");
238#endif 274#endif
@@ -255,8 +291,9 @@ void emit_asm(BuildCtx *ctx)
255 BuildReloc *r = &ctx->reloc[rel]; 291 BuildReloc *r = &ctx->reloc[rel];
256 int n = r->ofs - ofs; 292 int n = r->ofs - ofs;
257#if LJ_TARGET_X86ORX64 293#if LJ_TARGET_X86ORX64
258 if (ctx->mode == BUILD_machasm && r->type != 0) { 294 if (r->type != 0 &&
259 emit_asm_reloc_mach(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]); 295 (ctx->mode == BUILD_elfasm || ctx->mode == BUILD_machasm)) {
296 emit_asm_reloc_text(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]);
260 } else { 297 } else {
261 emit_asm_bytes(ctx, ctx->code+ofs, n); 298 emit_asm_bytes(ctx, ctx->code+ofs, n);
262 emit_asm_reloc(ctx, r->type, ctx->relocsym[r->sym]); 299 emit_asm_reloc(ctx, r->type, ctx->relocsym[r->sym]);
@@ -290,10 +327,7 @@ void emit_asm(BuildCtx *ctx)
290#if !(LJ_TARGET_PS3 || LJ_TARGET_PSVITA) 327#if !(LJ_TARGET_PS3 || LJ_TARGET_PSVITA)
291 fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\"," ELFASM_PX "progbits\n"); 328 fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\"," ELFASM_PX "progbits\n");
292#endif 329#endif
293#if LJ_TARGET_PPCSPE 330#if LJ_TARGET_PPC && !LJ_TARGET_PS3 && !LJ_ABI_SOFTFP
294 /* Soft-float ABI + SPE. */
295 fprintf(ctx->fp, "\t.gnu_attribute 4, 2\n\t.gnu_attribute 8, 3\n");
296#elif LJ_TARGET_PPC && !LJ_TARGET_PS3
297 /* Hard-float ABI. */ 331 /* Hard-float ABI. */
298 fprintf(ctx->fp, "\t.gnu_attribute 4, 1\n"); 332 fprintf(ctx->fp, "\t.gnu_attribute 4, 1\n");
299#endif 333#endif
diff --git a/src/host/buildvm_lib.c b/src/host/buildvm_lib.c
index 3c64626c..88014b23 100644
--- a/src/host/buildvm_lib.c
+++ b/src/host/buildvm_lib.c
@@ -5,7 +5,9 @@
5 5
6#include "buildvm.h" 6#include "buildvm.h"
7#include "lj_obj.h" 7#include "lj_obj.h"
8#include "lj_bc.h"
8#include "lj_lib.h" 9#include "lj_lib.h"
10#include "buildvm_libbc.h"
9 11
10/* Context for library definitions. */ 12/* Context for library definitions. */
11static uint8_t obuf[8192]; 13static uint8_t obuf[8192];
@@ -151,6 +153,62 @@ static void libdef_func(BuildCtx *ctx, char *p, int arg)
151 regfunc = REGFUNC_OK; 153 regfunc = REGFUNC_OK;
152} 154}
153 155
156static uint8_t *libdef_uleb128(uint8_t *p, uint32_t *vv)
157{
158 uint32_t v = *p++;
159 if (v >= 0x80) {
160 int sh = 0; v &= 0x7f;
161 do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80);
162 }
163 *vv = v;
164 return p;
165}
166
167static void libdef_fixupbc(uint8_t *p)
168{
169 uint32_t i, sizebc;
170 p += 4;
171 p = libdef_uleb128(p, &sizebc);
172 p = libdef_uleb128(p, &sizebc);
173 p = libdef_uleb128(p, &sizebc);
174 for (i = 0; i < sizebc; i++, p += 4) {
175 uint8_t op = p[libbc_endian ? 3 : 0];
176 uint8_t ra = p[libbc_endian ? 2 : 1];
177 uint8_t rc = p[libbc_endian ? 1 : 2];
178 uint8_t rb = p[libbc_endian ? 0 : 3];
179 if (!LJ_DUALNUM && op == BC_ISTYPE && rc == ~LJ_TNUMX+1) {
180 op = BC_ISNUM; rc++;
181 }
182 p[LJ_ENDIAN_SELECT(0, 3)] = op;
183 p[LJ_ENDIAN_SELECT(1, 2)] = ra;
184 p[LJ_ENDIAN_SELECT(2, 1)] = rc;
185 p[LJ_ENDIAN_SELECT(3, 0)] = rb;
186 }
187}
188
189static void libdef_lua(BuildCtx *ctx, char *p, int arg)
190{
191 UNUSED(arg);
192 if (ctx->mode == BUILD_libdef) {
193 int i;
194 for (i = 0; libbc_map[i].name != NULL; i++) {
195 if (!strcmp(libbc_map[i].name, p)) {
196 int ofs = libbc_map[i].ofs;
197 int len = libbc_map[i+1].ofs - ofs;
198 obuf[2]++; /* Bump hash table size. */
199 *optr++ = LIBINIT_LUA;
200 libdef_name(p, 0);
201 memcpy(optr, libbc_code + ofs, len);
202 libdef_fixupbc(optr);
203 optr += len;
204 return;
205 }
206 }
207 fprintf(stderr, "Error: missing libbc definition for %s\n", p);
208 exit(1);
209 }
210}
211
154static uint32_t find_rec(char *name) 212static uint32_t find_rec(char *name)
155{ 213{
156 char *p = (char *)obuf; 214 char *p = (char *)obuf;
@@ -277,6 +335,7 @@ static const LibDefHandler libdef_handlers[] = {
277 { "CF(", ")", libdef_func, LIBINIT_CF }, 335 { "CF(", ")", libdef_func, LIBINIT_CF },
278 { "ASM(", ")", libdef_func, LIBINIT_ASM }, 336 { "ASM(", ")", libdef_func, LIBINIT_ASM },
279 { "ASM_(", ")", libdef_func, LIBINIT_ASM_ }, 337 { "ASM_(", ")", libdef_func, LIBINIT_ASM_ },
338 { "LUA(", ")", libdef_lua, 0 },
280 { "REC(", ")", libdef_rec, 0 }, 339 { "REC(", ")", libdef_rec, 0 },
281 { "PUSH(", ")", libdef_push, 0 }, 340 { "PUSH(", ")", libdef_push, 0 },
282 { "SET(", ")", libdef_set, 0 }, 341 { "SET(", ")", libdef_set, 0 },
@@ -373,7 +432,7 @@ void emit_lib(BuildCtx *ctx)
373 "#ifndef FF_NUM_ASMFUNC\n#define FF_NUM_ASMFUNC %d\n#endif\n\n", 432 "#ifndef FF_NUM_ASMFUNC\n#define FF_NUM_ASMFUNC %d\n#endif\n\n",
374 ffasmfunc); 433 ffasmfunc);
375 } else if (ctx->mode == BUILD_vmdef) { 434 } else if (ctx->mode == BUILD_vmdef) {
376 fprintf(ctx->fp, "}\n\n"); 435 fprintf(ctx->fp, "},\n\n");
377 } else if (ctx->mode == BUILD_bcdef) { 436 } else if (ctx->mode == BUILD_bcdef) {
378 int i; 437 int i;
379 fprintf(ctx->fp, "\n};\n\n"); 438 fprintf(ctx->fp, "\n};\n\n");
diff --git a/src/host/buildvm_libbc.h b/src/host/buildvm_libbc.h
new file mode 100644
index 00000000..b2600bd5
--- /dev/null
+++ b/src/host/buildvm_libbc.h
@@ -0,0 +1,56 @@
1/* This is a generated file. DO NOT EDIT! */
2
3static const int libbc_endian = 0;
4
5static const uint8_t libbc_code[] = {
6#if LJ_FR2
70,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0,
80,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3,
916,0,5,0,21,1,0,0,76,1,2,0,0,2,10,0,0,0,15,16,0,12,0,16,1,9,0,41,2,1,0,21,3,
100,0,41,4,1,0,77,2,8,128,18,6,1,0,18,8,5,0,59,9,5,0,66,6,3,2,10,6,0,0,88,7,1,
11128,76,6,2,0,79,2,248,127,75,0,1,0,0,2,11,0,0,0,16,16,0,12,0,16,1,9,0,43,2,
120,0,18,3,0,0,41,4,0,0,88,5,7,128,18,7,1,0,18,9,5,0,18,10,6,0,66,7,3,2,10,7,
130,0,88,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,75,0,1,0,0,1,2,0,0,0,3,16,0,12,
140,21,1,0,0,76,1,2,0,0,2,10,0,0,2,30,16,0,12,0,21,2,0,0,11,1,0,0,88,3,7,128,
158,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,88,3,18,128,16,1,14,
160,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,1,0,22,4,1,1,18,5,2,
170,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,127,43,4,0,0,64,4,
182,0,76,3,2,0,75,0,1,0,0,2,0,5,12,0,0,0,35,16,0,12,0,16,1,14,0,16,2,14,0,16,
193,14,0,11,4,0,0,88,5,1,128,18,4,0,0,16,4,12,0,3,1,2,0,88,5,24,128,33,5,1,3,
200,2,3,0,88,6,4,128,2,3,1,0,88,6,2,128,4,4,0,0,88,6,9,128,18,6,1,0,18,7,2,0,
2141,8,1,0,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,6,252,127,88,6,8,128,
2218,6,2,0,18,7,1,0,41,8,255,255,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,
236,252,127,76,4,2,0,0
24#else
250,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0,
260,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3,
2716,0,5,0,21,1,0,0,76,1,2,0,0,2,9,0,0,0,15,16,0,12,0,16,1,9,0,41,2,1,0,21,3,
280,0,41,4,1,0,77,2,8,128,18,6,1,0,18,7,5,0,59,8,5,0,66,6,3,2,10,6,0,0,88,7,1,
29128,76,6,2,0,79,2,248,127,75,0,1,0,0,2,10,0,0,0,16,16,0,12,0,16,1,9,0,43,2,
300,0,18,3,0,0,41,4,0,0,88,5,7,128,18,7,1,0,18,8,5,0,18,9,6,0,66,7,3,2,10,7,0,
310,88,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,75,0,1,0,0,1,2,0,0,0,3,16,0,12,
320,21,1,0,0,76,1,2,0,0,2,10,0,0,2,30,16,0,12,0,21,2,0,0,11,1,0,0,88,3,7,128,
338,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,88,3,18,128,16,1,14,
340,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,1,0,22,4,1,1,18,5,2,
350,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,127,43,4,0,0,64,4,
362,0,76,3,2,0,75,0,1,0,0,2,0,5,12,0,0,0,35,16,0,12,0,16,1,14,0,16,2,14,0,16,
373,14,0,11,4,0,0,88,5,1,128,18,4,0,0,16,4,12,0,3,1,2,0,88,5,24,128,33,5,1,3,
380,2,3,0,88,6,4,128,2,3,1,0,88,6,2,128,4,4,0,0,88,6,9,128,18,6,1,0,18,7,2,0,
3941,8,1,0,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,6,252,127,88,6,8,128,
4018,6,2,0,18,7,1,0,41,8,255,255,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,
416,252,127,76,4,2,0,0
42#endif
43};
44
45static const struct { const char *name; int ofs; } libbc_map[] = {
46{"math_deg",0},
47{"math_rad",25},
48{"string_len",50},
49{"table_foreachi",69},
50{"table_foreach",136},
51{"table_getn",207},
52{"table_remove",226},
53{"table_move",355},
54{NULL,502}
55};
56
diff --git a/src/host/buildvm_peobj.c b/src/host/buildvm_peobj.c
index 876b0add..01f9dac4 100644
--- a/src/host/buildvm_peobj.c
+++ b/src/host/buildvm_peobj.c
@@ -9,7 +9,7 @@
9#include "buildvm.h" 9#include "buildvm.h"
10#include "lj_bc.h" 10#include "lj_bc.h"
11 11
12#if LJ_TARGET_X86ORX64 || LJ_TARGET_PPC 12#if LJ_TARGET_X86ORX64
13 13
14/* Context for PE object emitter. */ 14/* Context for PE object emitter. */
15static char *strtab; 15static char *strtab;
@@ -93,12 +93,6 @@ typedef struct PEsymaux {
93#define PEOBJ_RELOC_ADDR32NB 0x03 93#define PEOBJ_RELOC_ADDR32NB 0x03
94#define PEOBJ_RELOC_OFS 0 94#define PEOBJ_RELOC_OFS 0
95#define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */ 95#define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */
96#elif LJ_TARGET_PPC
97#define PEOBJ_ARCH_TARGET 0x01f2
98#define PEOBJ_RELOC_REL32 0x06
99#define PEOBJ_RELOC_DIR32 0x02
100#define PEOBJ_RELOC_OFS (-4)
101#define PEOBJ_TEXT_FLAGS 0x60400020 /* 60=r+x, 40=align8, 20=code. */
102#endif 96#endif
103 97
104/* Section numbers (0-based). */ 98/* Section numbers (0-based). */
@@ -109,6 +103,8 @@ enum {
109#if LJ_TARGET_X64 103#if LJ_TARGET_X64
110 PEOBJ_SECT_PDATA, 104 PEOBJ_SECT_PDATA,
111 PEOBJ_SECT_XDATA, 105 PEOBJ_SECT_XDATA,
106#elif LJ_TARGET_X86
107 PEOBJ_SECT_SXDATA,
112#endif 108#endif
113 PEOBJ_SECT_RDATA_Z, 109 PEOBJ_SECT_RDATA_Z,
114 PEOBJ_NSECTIONS 110 PEOBJ_NSECTIONS
@@ -208,6 +204,13 @@ void emit_peobj(BuildCtx *ctx)
208 sofs += (pesect[PEOBJ_SECT_XDATA].nreloc = 1) * PEOBJ_RELOC_SIZE; 204 sofs += (pesect[PEOBJ_SECT_XDATA].nreloc = 1) * PEOBJ_RELOC_SIZE;
209 /* Flags: 40 = read, 30 = align4, 40 = initialized data. */ 205 /* Flags: 40 = read, 30 = align4, 40 = initialized data. */
210 pesect[PEOBJ_SECT_XDATA].flags = 0x40300040; 206 pesect[PEOBJ_SECT_XDATA].flags = 0x40300040;
207#elif LJ_TARGET_X86
208 memcpy(pesect[PEOBJ_SECT_SXDATA].name, ".sxdata", sizeof(".sxdata")-1);
209 pesect[PEOBJ_SECT_SXDATA].ofs = sofs;
210 sofs += (pesect[PEOBJ_SECT_SXDATA].size = 4);
211 pesect[PEOBJ_SECT_SXDATA].relocofs = sofs;
212 /* Flags: 40 = read, 30 = align4, 02 = lnk_info, 40 = initialized data. */
213 pesect[PEOBJ_SECT_SXDATA].flags = 0x40300240;
211#endif 214#endif
212 215
213 memcpy(pesect[PEOBJ_SECT_RDATA_Z].name, ".rdata$Z", sizeof(".rdata$Z")-1); 216 memcpy(pesect[PEOBJ_SECT_RDATA_Z].name, ".rdata$Z", sizeof(".rdata$Z")-1);
@@ -232,7 +235,7 @@ void emit_peobj(BuildCtx *ctx)
232 nrsym = ctx->nrelocsym; 235 nrsym = ctx->nrelocsym;
233 pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+ctx->nsym + nrsym; 236 pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+ctx->nsym + nrsym;
234#if LJ_TARGET_X64 237#if LJ_TARGET_X64
235 pehdr.nsyms += 1; /* Symbol for lj_err_unwind_win64. */ 238 pehdr.nsyms += 1; /* Symbol for lj_err_unwind_win. */
236#endif 239#endif
237 240
238 /* Write PE object header and all sections. */ 241 /* Write PE object header and all sections. */
@@ -242,15 +245,8 @@ void emit_peobj(BuildCtx *ctx)
242 /* Write .text section. */ 245 /* Write .text section. */
243 host_endian.u = 1; 246 host_endian.u = 1;
244 if (host_endian.b != LJ_ENDIAN_SELECT(1, 0)) { 247 if (host_endian.b != LJ_ENDIAN_SELECT(1, 0)) {
245#if LJ_TARGET_PPC
246 uint32_t *p = (uint32_t *)ctx->code;
247 int n = (int)(ctx->codesz >> 2);
248 for (i = 0; i < n; i++, p++)
249 *p = lj_bswap(*p); /* Byteswap .text section. */
250#else
251 fprintf(stderr, "Error: different byte order for host and target\n"); 248 fprintf(stderr, "Error: different byte order for host and target\n");
252 exit(1); 249 exit(1);
253#endif
254 } 250 }
255 owrite(ctx, ctx->code, ctx->codesz); 251 owrite(ctx, ctx->code, ctx->codesz);
256 for (i = 0; i < ctx->nreloc; i++) { 252 for (i = 0; i < ctx->nreloc; i++) {
@@ -312,6 +308,19 @@ void emit_peobj(BuildCtx *ctx)
312 reloc.type = PEOBJ_RELOC_ADDR32NB; 308 reloc.type = PEOBJ_RELOC_ADDR32NB;
313 owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); 309 owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
314 } 310 }
311#elif LJ_TARGET_X86
312 /* Write .sxdata section. */
313 for (i = 0; i < nrsym; i++) {
314 if (!strcmp(ctx->relocsym[i], "_lj_err_unwind_win")) {
315 uint32_t symidx = 1+2+i;
316 owrite(ctx, &symidx, 4);
317 break;
318 }
319 }
320 if (i == nrsym) {
321 fprintf(stderr, "Error: extern lj_err_unwind_win not used\n");
322 exit(1);
323 }
315#endif 324#endif
316 325
317 /* Write .rdata$Z section. */ 326 /* Write .rdata$Z section. */
@@ -333,8 +342,10 @@ void emit_peobj(BuildCtx *ctx)
333#if LJ_TARGET_X64 342#if LJ_TARGET_X64
334 emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_PDATA); 343 emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_PDATA);
335 emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_XDATA); 344 emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_XDATA);
336 emit_peobj_sym(ctx, "lj_err_unwind_win64", 0, 345 emit_peobj_sym(ctx, "lj_err_unwind_win", 0,
337 PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN); 346 PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN);
347#elif LJ_TARGET_X86
348 emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_SXDATA);
338#endif 349#endif
339 350
340 emit_peobj_sym(ctx, ctx->beginsym, 0, 351 emit_peobj_sym(ctx, ctx->beginsym, 0,
diff --git a/src/host/genlibbc.lua b/src/host/genlibbc.lua
new file mode 100644
index 00000000..56899546
--- /dev/null
+++ b/src/host/genlibbc.lua
@@ -0,0 +1,197 @@
1----------------------------------------------------------------------------
2-- Lua script to dump the bytecode of the library functions written in Lua.
3-- The resulting 'buildvm_libbc.h' is used for the build process of LuaJIT.
4----------------------------------------------------------------------------
5-- Copyright (C) 2005-2020 Mike Pall. All rights reserved.
6-- Released under the MIT license. See Copyright Notice in luajit.h
7----------------------------------------------------------------------------
8
9local ffi = require("ffi")
10local bit = require("bit")
11local vmdef = require("jit.vmdef")
12local bcnames = vmdef.bcnames
13
14local format = string.format
15
16local isbe = (string.byte(string.dump(function() end), 5) % 2 == 1)
17
18local function usage(arg)
19 io.stderr:write("Usage: ", arg and arg[0] or "genlibbc",
20 " [-o buildvm_libbc.h] lib_*.c\n")
21 os.exit(1)
22end
23
24local function parse_arg(arg)
25 local outfile = "-"
26 if not (arg and arg[1]) then
27 usage(arg)
28 end
29 if arg[1] == "-o" then
30 outfile = arg[2]
31 if not outfile then usage(arg) end
32 table.remove(arg, 1)
33 table.remove(arg, 1)
34 end
35 return outfile
36end
37
38local function read_files(names)
39 local src = ""
40 for _,name in ipairs(names) do
41 local fp = assert(io.open(name))
42 src = src .. fp:read("*a")
43 fp:close()
44 end
45 return src
46end
47
48local function transform_lua(code)
49 local fixup = {}
50 local n = -30000
51 code = string.gsub(code, "CHECK_(%w*)%((.-)%)", function(tp, var)
52 n = n + 1
53 fixup[n] = { "CHECK", tp }
54 return format("%s=%d", var, n)
55 end)
56 code = string.gsub(code, "PAIRS%((.-)%)", function(var)
57 fixup.PAIRS = true
58 return format("nil, %s, 0", var)
59 end)
60 return "return "..code, fixup
61end
62
63local function read_uleb128(p)
64 local v = p[0]; p = p + 1
65 if v >= 128 then
66 local sh = 7; v = v - 128
67 repeat
68 local r = p[0]
69 v = v + bit.lshift(bit.band(r, 127), sh)
70 sh = sh + 7
71 p = p + 1
72 until r < 128
73 end
74 return p, v
75end
76
77-- ORDER LJ_T
78local name2itype = {
79 str = 5, func = 9, tab = 12, int = 14, num = 15
80}
81
82local BC = {}
83for i=0,#bcnames/6-1 do
84 BC[string.gsub(string.sub(bcnames, i*6+1, i*6+6), " ", "")] = i
85end
86local xop, xra = isbe and 3 or 0, isbe and 2 or 1
87local xrc, xrb = isbe and 1 or 2, isbe and 0 or 3
88
89local function fixup_dump(dump, fixup)
90 local buf = ffi.new("uint8_t[?]", #dump+1, dump)
91 local p = buf+5
92 local n, sizebc
93 p, n = read_uleb128(p)
94 local start = p
95 p = p + 4
96 p = read_uleb128(p)
97 p = read_uleb128(p)
98 p, sizebc = read_uleb128(p)
99 local rawtab = {}
100 for i=0,sizebc-1 do
101 local op = p[xop]
102 if op == BC.KSHORT then
103 local rd = p[xrc] + 256*p[xrb]
104 rd = bit.arshift(bit.lshift(rd, 16), 16)
105 local f = fixup[rd]
106 if f then
107 if f[1] == "CHECK" then
108 local tp = f[2]
109 if tp == "tab" then rawtab[p[xra]] = true end
110 p[xop] = tp == "num" and BC.ISNUM or BC.ISTYPE
111 p[xrb] = 0
112 p[xrc] = name2itype[tp]
113 else
114 error("unhandled fixup type: "..f[1])
115 end
116 end
117 elseif op == BC.TGETV then
118 if rawtab[p[xrb]] then
119 p[xop] = BC.TGETR
120 end
121 elseif op == BC.TSETV then
122 if rawtab[p[xrb]] then
123 p[xop] = BC.TSETR
124 end
125 elseif op == BC.ITERC then
126 if fixup.PAIRS then
127 p[xop] = BC.ITERN
128 end
129 end
130 p = p + 4
131 end
132 return ffi.string(start, n)
133end
134
135local function find_defs(src)
136 local defs = {}
137 for name, code in string.gmatch(src, "LJLIB_LUA%(([^)]*)%)%s*/%*(.-)%*/") do
138 local env = {}
139 local tcode, fixup = transform_lua(code)
140 local func = assert(load(tcode, "", nil, env))()
141 defs[name] = fixup_dump(string.dump(func, true), fixup)
142 defs[#defs+1] = name
143 end
144 return defs
145end
146
147local function gen_header(defs)
148 local t = {}
149 local function w(x) t[#t+1] = x end
150 w("/* This is a generated file. DO NOT EDIT! */\n\n")
151 w("static const int libbc_endian = ") w(isbe and 1 or 0) w(";\n\n")
152 local s = ""
153 for _,name in ipairs(defs) do
154 s = s .. defs[name]
155 end
156 w("static const uint8_t libbc_code[] = {\n")
157 local n = 0
158 for i=1,#s do
159 local x = string.byte(s, i)
160 w(x); w(",")
161 n = n + (x < 10 and 2 or (x < 100 and 3 or 4))
162 if n >= 75 then n = 0; w("\n") end
163 end
164 w("0\n};\n\n")
165 w("static const struct { const char *name; int ofs; } libbc_map[] = {\n")
166 local m = 0
167 for _,name in ipairs(defs) do
168 w('{"'); w(name); w('",'); w(m) w('},\n')
169 m = m + #defs[name]
170 end
171 w("{NULL,"); w(m); w("}\n};\n\n")
172 return table.concat(t)
173end
174
175local function write_file(name, data)
176 if name == "-" then
177 assert(io.write(data))
178 assert(io.flush())
179 else
180 local fp = io.open(name)
181 if fp then
182 local old = fp:read("*a")
183 fp:close()
184 if data == old then return end
185 end
186 fp = assert(io.open(name, "w"))
187 assert(fp:write(data))
188 assert(fp:close())
189 end
190end
191
192local outfile = parse_arg(arg)
193local src = read_files(arg)
194local defs = find_defs(src)
195local hdr = gen_header(defs)
196write_file(outfile, hdr)
197
diff --git a/src/jit/bc.lua b/src/jit/bc.lua
index 7ca4c61c..45ba40e2 100644
--- a/src/jit/bc.lua
+++ b/src/jit/bc.lua
@@ -41,7 +41,7 @@
41 41
42-- Cache some library functions and objects. 42-- Cache some library functions and objects.
43local jit = require("jit") 43local jit = require("jit")
44assert(jit.version_num == 20005, "LuaJIT core/library version mismatch") 44assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
45local jutil = require("jit.util") 45local jutil = require("jit.util")
46local vmdef = require("jit.vmdef") 46local vmdef = require("jit.vmdef")
47local bit = require("bit") 47local bit = require("bit")
@@ -179,13 +179,12 @@ local function bcliston(outfile)
179end 179end
180 180
181-- Public module functions. 181-- Public module functions.
182module(...) 182return {
183 183 line = bcline,
184line = bcline 184 dump = bcdump,
185dump = bcdump 185 targets = bctargets,
186targets = bctargets 186 on = bcliston,
187 187 off = bclistoff,
188on = bcliston 188 start = bcliston -- For -j command line option.
189off = bclistoff 189}
190start = bcliston -- For -j command line option.
191 190
diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua
index 58351c16..42d7240b 100644
--- a/src/jit/bcsave.lua
+++ b/src/jit/bcsave.lua
@@ -11,12 +11,16 @@
11------------------------------------------------------------------------------ 11------------------------------------------------------------------------------
12 12
13local jit = require("jit") 13local jit = require("jit")
14assert(jit.version_num == 20005, "LuaJIT core/library version mismatch") 14assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
15local bit = require("bit") 15local bit = require("bit")
16 16
17-- Symbol name prefix for LuaJIT bytecode. 17-- Symbol name prefix for LuaJIT bytecode.
18local LJBC_PREFIX = "luaJIT_BC_" 18local LJBC_PREFIX = "luaJIT_BC_"
19 19
20local type, assert = type, assert
21local format = string.format
22local tremove, tconcat = table.remove, table.concat
23
20------------------------------------------------------------------------------ 24------------------------------------------------------------------------------
21 25
22local function usage() 26local function usage()
@@ -63,8 +67,18 @@ local map_type = {
63} 67}
64 68
65local map_arch = { 69local map_arch = {
66 x86 = true, x64 = true, arm = true, ppc = true, ppcspe = true, 70 x86 = { e = "le", b = 32, m = 3, p = 0x14c, },
67 mips = true, mipsel = true, 71 x64 = { e = "le", b = 64, m = 62, p = 0x8664, },
72 arm = { e = "le", b = 32, m = 40, p = 0x1c0, },
73 arm64 = { e = "le", b = 64, m = 183, p = 0xaa64, },
74 arm64be = { e = "be", b = 64, m = 183, },
75 ppc = { e = "be", b = 32, m = 20, },
76 mips = { e = "be", b = 32, m = 8, f = 0x50001006, },
77 mipsel = { e = "le", b = 32, m = 8, f = 0x50001006, },
78 mips64 = { e = "be", b = 64, m = 8, f = 0x80000007, },
79 mips64el = { e = "le", b = 64, m = 8, f = 0x80000007, },
80 mips64r6 = { e = "be", b = 64, m = 8, f = 0xa0000407, },
81 mips64r6el = { e = "le", b = 64, m = 8, f = 0xa0000407, },
68} 82}
69 83
70local map_os = { 84local map_os = {
@@ -73,33 +87,33 @@ local map_os = {
73} 87}
74 88
75local function checkarg(str, map, err) 89local function checkarg(str, map, err)
76 str = string.lower(str) 90 str = str:lower()
77 local s = check(map[str], "unknown ", err) 91 local s = check(map[str], "unknown ", err)
78 return s == true and str or s 92 return type(s) == "string" and s or str
79end 93end
80 94
81local function detecttype(str) 95local function detecttype(str)
82 local ext = string.match(string.lower(str), "%.(%a+)$") 96 local ext = str:lower():match("%.(%a+)$")
83 return map_type[ext] or "raw" 97 return map_type[ext] or "raw"
84end 98end
85 99
86local function checkmodname(str) 100local function checkmodname(str)
87 check(string.match(str, "^[%w_.%-]+$"), "bad module name") 101 check(str:match("^[%w_.%-]+$"), "bad module name")
88 return string.gsub(str, "[%.%-]", "_") 102 return str:gsub("[%.%-]", "_")
89end 103end
90 104
91local function detectmodname(str) 105local function detectmodname(str)
92 if type(str) == "string" then 106 if type(str) == "string" then
93 local tail = string.match(str, "[^/\\]+$") 107 local tail = str:match("[^/\\]+$")
94 if tail then str = tail end 108 if tail then str = tail end
95 local head = string.match(str, "^(.*)%.[^.]*$") 109 local head = str:match("^(.*)%.[^.]*$")
96 if head then str = head end 110 if head then str = head end
97 str = string.match(str, "^[%w_.%-]+") 111 str = str:match("^[%w_.%-]+")
98 else 112 else
99 str = nil 113 str = nil
100 end 114 end
101 check(str, "cannot derive module name, use -n name") 115 check(str, "cannot derive module name, use -n name")
102 return string.gsub(str, "[%.%-]", "_") 116 return str:gsub("[%.%-]", "_")
103end 117end
104 118
105------------------------------------------------------------------------------ 119------------------------------------------------------------------------------
@@ -118,19 +132,19 @@ end
118local function bcsave_c(ctx, output, s) 132local function bcsave_c(ctx, output, s)
119 local fp = savefile(output, "w") 133 local fp = savefile(output, "w")
120 if ctx.type == "c" then 134 if ctx.type == "c" then
121 fp:write(string.format([[ 135 fp:write(format([[
122#ifdef __cplusplus 136#ifdef __cplusplus
123extern "C" 137extern "C"
124#endif 138#endif
125#ifdef _WIN32 139#ifdef _WIN32
126__declspec(dllexport) 140__declspec(dllexport)
127#endif 141#endif
128const char %s%s[] = { 142const unsigned char %s%s[] = {
129]], LJBC_PREFIX, ctx.modname)) 143]], LJBC_PREFIX, ctx.modname))
130 else 144 else
131 fp:write(string.format([[ 145 fp:write(format([[
132#define %s%s_SIZE %d 146#define %s%s_SIZE %d
133static const char %s%s[] = { 147static const unsigned char %s%s[] = {
134]], LJBC_PREFIX, ctx.modname, #s, LJBC_PREFIX, ctx.modname)) 148]], LJBC_PREFIX, ctx.modname, #s, LJBC_PREFIX, ctx.modname))
135 end 149 end
136 local t, n, m = {}, 0, 0 150 local t, n, m = {}, 0, 0
@@ -138,13 +152,13 @@ static const char %s%s[] = {
138 local b = tostring(string.byte(s, i)) 152 local b = tostring(string.byte(s, i))
139 m = m + #b + 1 153 m = m + #b + 1
140 if m > 78 then 154 if m > 78 then
141 fp:write(table.concat(t, ",", 1, n), ",\n") 155 fp:write(tconcat(t, ",", 1, n), ",\n")
142 n, m = 0, #b + 1 156 n, m = 0, #b + 1
143 end 157 end
144 n = n + 1 158 n = n + 1
145 t[n] = b 159 t[n] = b
146 end 160 end
147 bcsave_tail(fp, output, table.concat(t, ",", 1, n).."\n};\n") 161 bcsave_tail(fp, output, tconcat(t, ",", 1, n).."\n};\n")
148end 162end
149 163
150local function bcsave_elfobj(ctx, output, s, ffi) 164local function bcsave_elfobj(ctx, output, s, ffi)
@@ -199,12 +213,8 @@ typedef struct {
199} ELF64obj; 213} ELF64obj;
200]] 214]]
201 local symname = LJBC_PREFIX..ctx.modname 215 local symname = LJBC_PREFIX..ctx.modname
202 local is64, isbe = false, false 216 local ai = assert(map_arch[ctx.arch])
203 if ctx.arch == "x64" then 217 local is64, isbe = ai.b == 64, ai.e == "be"
204 is64 = true
205 elseif ctx.arch == "ppc" or ctx.arch == "ppcspe" or ctx.arch == "mips" then
206 isbe = true
207 end
208 218
209 -- Handle different host/target endianess. 219 -- Handle different host/target endianess.
210 local function f32(x) return x end 220 local function f32(x) return x end
@@ -237,10 +247,8 @@ typedef struct {
237 hdr.eendian = isbe and 2 or 1 247 hdr.eendian = isbe and 2 or 1
238 hdr.eversion = 1 248 hdr.eversion = 1
239 hdr.type = f16(1) 249 hdr.type = f16(1)
240 hdr.machine = f16(({ x86=3, x64=62, arm=40, ppc=20, ppcspe=20, mips=8, mipsel=8 })[ctx.arch]) 250 hdr.machine = f16(ai.m)
241 if ctx.arch == "mips" or ctx.arch == "mipsel" then 251 hdr.flags = f32(ai.f or 0)
242 hdr.flags = f32(0x50001006)
243 end
244 hdr.version = f32(1) 252 hdr.version = f32(1)
245 hdr.shofs = fofs(ffi.offsetof(o, "sect")) 253 hdr.shofs = fofs(ffi.offsetof(o, "sect"))
246 hdr.ehsize = f16(ffi.sizeof(hdr)) 254 hdr.ehsize = f16(ffi.sizeof(hdr))
@@ -336,12 +344,8 @@ typedef struct {
336} PEobj; 344} PEobj;
337]] 345]]
338 local symname = LJBC_PREFIX..ctx.modname 346 local symname = LJBC_PREFIX..ctx.modname
339 local is64 = false 347 local ai = assert(map_arch[ctx.arch])
340 if ctx.arch == "x86" then 348 local is64 = ai.b == 64
341 symname = "_"..symname
342 elseif ctx.arch == "x64" then
343 is64 = true
344 end
345 local symexport = " /EXPORT:"..symname..",DATA " 349 local symexport = " /EXPORT:"..symname..",DATA "
346 350
347 -- The file format is always little-endian. Swap if the host is big-endian. 351 -- The file format is always little-endian. Swap if the host is big-endian.
@@ -355,7 +359,7 @@ typedef struct {
355 -- Create PE object and fill in header. 359 -- Create PE object and fill in header.
356 local o = ffi.new("PEobj") 360 local o = ffi.new("PEobj")
357 local hdr = o.hdr 361 local hdr = o.hdr
358 hdr.arch = f16(({ x86=0x14c, x64=0x8664, arm=0x1c0, ppc=0x1f2, mips=0x366, mipsel=0x366 })[ctx.arch]) 362 hdr.arch = f16(assert(ai.p))
359 hdr.nsects = f16(2) 363 hdr.nsects = f16(2)
360 hdr.symtabofs = f32(ffi.offsetof(o, "sym0")) 364 hdr.symtabofs = f32(ffi.offsetof(o, "sym0"))
361 hdr.nsyms = f32(6) 365 hdr.nsyms = f32(6)
@@ -477,13 +481,13 @@ typedef struct {
477} mach_obj_64; 481} mach_obj_64;
478typedef struct { 482typedef struct {
479 mach_fat_header fat; 483 mach_fat_header fat;
480 mach_fat_arch fat_arch[4]; 484 mach_fat_arch fat_arch[2];
481 struct { 485 struct {
482 mach_header hdr; 486 mach_header hdr;
483 mach_segment_command seg; 487 mach_segment_command seg;
484 mach_section sec; 488 mach_section sec;
485 mach_symtab_command sym; 489 mach_symtab_command sym;
486 } arch[4]; 490 } arch[2];
487 mach_nlist sym_entry; 491 mach_nlist sym_entry;
488 uint8_t space[4096]; 492 uint8_t space[4096];
489} mach_fat_obj; 493} mach_fat_obj;
@@ -494,6 +498,8 @@ typedef struct {
494 is64, align, mobj = true, 8, "mach_obj_64" 498 is64, align, mobj = true, 8, "mach_obj_64"
495 elseif ctx.arch == "arm" then 499 elseif ctx.arch == "arm" then
496 isfat, mobj = true, "mach_fat_obj" 500 isfat, mobj = true, "mach_fat_obj"
501 elseif ctx.arch == "arm64" then
502 is64, align, isfat, mobj = true, 8, true, "mach_fat_obj"
497 else 503 else
498 check(ctx.arch == "x86", "unsupported architecture for OSX") 504 check(ctx.arch == "x86", "unsupported architecture for OSX")
499 end 505 end
@@ -503,8 +509,8 @@ typedef struct {
503 -- Create Mach-O object and fill in header. 509 -- Create Mach-O object and fill in header.
504 local o = ffi.new(mobj) 510 local o = ffi.new(mobj)
505 local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, align) 511 local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, align)
506 local cputype = ({ x86={7}, x64={0x01000007}, arm={7,12,12,12} })[ctx.arch] 512 local cputype = ({ x86={7}, x64={0x01000007}, arm={7,12}, arm64={0x01000007,0x0100000c} })[ctx.arch]
507 local cpusubtype = ({ x86={3}, x64={3}, arm={3,6,9,11} })[ctx.arch] 513 local cpusubtype = ({ x86={3}, x64={3}, arm={3,9}, arm64={3,0} })[ctx.arch]
508 if isfat then 514 if isfat then
509 o.fat.magic = be32(0xcafebabe) 515 o.fat.magic = be32(0xcafebabe)
510 o.fat.nfat_arch = be32(#cpusubtype) 516 o.fat.nfat_arch = be32(#cpusubtype)
@@ -603,16 +609,16 @@ local function docmd(...)
603 local n = 1 609 local n = 1
604 local list = false 610 local list = false
605 local ctx = { 611 local ctx = {
606 strip = true, arch = jit.arch, os = string.lower(jit.os), 612 strip = true, arch = jit.arch, os = jit.os:lower(),
607 type = false, modname = false, 613 type = false, modname = false,
608 } 614 }
609 while n <= #arg do 615 while n <= #arg do
610 local a = arg[n] 616 local a = arg[n]
611 if type(a) == "string" and string.sub(a, 1, 1) == "-" and a ~= "-" then 617 if type(a) == "string" and a:sub(1, 1) == "-" and a ~= "-" then
612 table.remove(arg, n) 618 tremove(arg, n)
613 if a == "--" then break end 619 if a == "--" then break end
614 for m=2,#a do 620 for m=2,#a do
615 local opt = string.sub(a, m, m) 621 local opt = a:sub(m, m)
616 if opt == "l" then 622 if opt == "l" then
617 list = true 623 list = true
618 elseif opt == "s" then 624 elseif opt == "s" then
@@ -625,13 +631,13 @@ local function docmd(...)
625 if n ~= 1 then usage() end 631 if n ~= 1 then usage() end
626 arg[1] = check(loadstring(arg[1])) 632 arg[1] = check(loadstring(arg[1]))
627 elseif opt == "n" then 633 elseif opt == "n" then
628 ctx.modname = checkmodname(table.remove(arg, n)) 634 ctx.modname = checkmodname(tremove(arg, n))
629 elseif opt == "t" then 635 elseif opt == "t" then
630 ctx.type = checkarg(table.remove(arg, n), map_type, "file type") 636 ctx.type = checkarg(tremove(arg, n), map_type, "file type")
631 elseif opt == "a" then 637 elseif opt == "a" then
632 ctx.arch = checkarg(table.remove(arg, n), map_arch, "architecture") 638 ctx.arch = checkarg(tremove(arg, n), map_arch, "architecture")
633 elseif opt == "o" then 639 elseif opt == "o" then
634 ctx.os = checkarg(table.remove(arg, n), map_os, "OS name") 640 ctx.os = checkarg(tremove(arg, n), map_os, "OS name")
635 else 641 else
636 usage() 642 usage()
637 end 643 end
@@ -653,7 +659,7 @@ end
653------------------------------------------------------------------------------ 659------------------------------------------------------------------------------
654 660
655-- Public module functions. 661-- Public module functions.
656module(...) 662return {
657 663 start = docmd -- Process -b command line option.
658start = docmd -- Process -b command line option. 664}
659 665
diff --git a/src/jit/dis_arm.lua b/src/jit/dis_arm.lua
index 152d91bb..cafd2f74 100644
--- a/src/jit/dis_arm.lua
+++ b/src/jit/dis_arm.lua
@@ -658,7 +658,7 @@ local function disass_block(ctx, ofs, len)
658end 658end
659 659
660-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). 660-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
661local function create_(code, addr, out) 661local function create(code, addr, out)
662 local ctx = {} 662 local ctx = {}
663 ctx.code = code 663 ctx.code = code
664 ctx.addr = addr or 0 664 ctx.addr = addr or 0
@@ -670,20 +670,20 @@ local function create_(code, addr, out)
670end 670end
671 671
672-- Simple API: disassemble code (a string) at address and output via out. 672-- Simple API: disassemble code (a string) at address and output via out.
673local function disass_(code, addr, out) 673local function disass(code, addr, out)
674 create_(code, addr, out):disass() 674 create(code, addr, out):disass()
675end 675end
676 676
677-- Return register name for RID. 677-- Return register name for RID.
678local function regname_(r) 678local function regname(r)
679 if r < 16 then return map_gpr[r] end 679 if r < 16 then return map_gpr[r] end
680 return "d"..(r-16) 680 return "d"..(r-16)
681end 681end
682 682
683-- Public module functions. 683-- Public module functions.
684module(...) 684return {
685 685 create = create,
686create = create_ 686 disass = disass,
687disass = disass_ 687 regname = regname
688regname = regname_ 688}
689 689
diff --git a/src/jit/dis_arm64.lua b/src/jit/dis_arm64.lua
new file mode 100644
index 00000000..d1596ebc
--- /dev/null
+++ b/src/jit/dis_arm64.lua
@@ -0,0 +1,1216 @@
1----------------------------------------------------------------------------
2-- LuaJIT ARM64 disassembler module.
3--
4-- Copyright (C) 2005-2020 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6--
7-- Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
8-- Sponsored by Cisco Systems, Inc.
9----------------------------------------------------------------------------
10-- This is a helper module used by the LuaJIT machine code dumper module.
11--
12-- It disassembles most user-mode AArch64 instructions.
13-- NYI: Advanced SIMD and VFP instructions.
14------------------------------------------------------------------------------
15
16local type = type
17local sub, byte, format = string.sub, string.byte, string.format
18local match, gmatch, gsub = string.match, string.gmatch, string.gsub
19local concat = table.concat
20local bit = require("bit")
21local band, bor, bxor, tohex = bit.band, bit.bor, bit.bxor, bit.tohex
22local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift
23local ror = bit.ror
24
25------------------------------------------------------------------------------
26-- Opcode maps
27------------------------------------------------------------------------------
28
29local map_adr = { -- PC-relative addressing.
30 shift = 31, mask = 1,
31 [0] = "adrDBx", "adrpDBx"
32}
33
34local map_addsubi = { -- Add/subtract immediate.
35 shift = 29, mask = 3,
36 [0] = "add|movDNIg", "adds|cmnD0NIg", "subDNIg", "subs|cmpD0NIg",
37}
38
39local map_logi = { -- Logical immediate.
40 shift = 31, mask = 1,
41 [0] = {
42 shift = 22, mask = 1,
43 [0] = {
44 shift = 29, mask = 3,
45 [0] = "andDNig", "orr|movDN0ig", "eorDNig", "ands|tstD0Nig"
46 },
47 false -- unallocated
48 },
49 {
50 shift = 29, mask = 3,
51 [0] = "andDNig", "orr|movDN0ig", "eorDNig", "ands|tstD0Nig"
52 }
53}
54
55local map_movwi = { -- Move wide immediate.
56 shift = 31, mask = 1,
57 [0] = {
58 shift = 22, mask = 1,
59 [0] = {
60 shift = 29, mask = 3,
61 [0] = "movnDWRg", false, "movz|movDYRg", "movkDWRg"
62 }, false -- unallocated
63 },
64 {
65 shift = 29, mask = 3,
66 [0] = "movnDWRg", false, "movz|movDYRg", "movkDWRg"
67 },
68}
69
70local map_bitf = { -- Bitfield.
71 shift = 31, mask = 1,
72 [0] = {
73 shift = 22, mask = 1,
74 [0] = {
75 shift = 29, mask = 3,
76 [0] = "sbfm|sbfiz|sbfx|asr|sxtw|sxth|sxtbDN12w",
77 "bfm|bfi|bfxilDN13w",
78 "ubfm|ubfiz|ubfx|lsr|lsl|uxth|uxtbDN12w"
79 }
80 },
81 {
82 shift = 22, mask = 1,
83 {
84 shift = 29, mask = 3,
85 [0] = "sbfm|sbfiz|sbfx|asr|sxtw|sxth|sxtbDN12x",
86 "bfm|bfi|bfxilDN13x",
87 "ubfm|ubfiz|ubfx|lsr|lsl|uxth|uxtbDN12x"
88 }
89 }
90}
91
92local map_datai = { -- Data processing - immediate.
93 shift = 23, mask = 7,
94 [0] = map_adr, map_adr, map_addsubi, false,
95 map_logi, map_movwi, map_bitf,
96 {
97 shift = 15, mask = 0x1c0c1,
98 [0] = "extr|rorDNM4w", [0x10080] = "extr|rorDNM4x",
99 [0x10081] = "extr|rorDNM4x"
100 }
101}
102
103local map_logsr = { -- Logical, shifted register.
104 shift = 31, mask = 1,
105 [0] = {
106 shift = 15, mask = 1,
107 [0] = {
108 shift = 29, mask = 3,
109 [0] = {
110 shift = 21, mask = 7,
111 [0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg",
112 "andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg"
113 },
114 {
115 shift = 21, mask = 7,
116 [0] ="orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg",
117 "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg"
118 },
119 {
120 shift = 21, mask = 7,
121 [0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg",
122 "eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg"
123 },
124 {
125 shift = 21, mask = 7,
126 [0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg",
127 "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg"
128 }
129 },
130 false -- unallocated
131 },
132 {
133 shift = 29, mask = 3,
134 [0] = {
135 shift = 21, mask = 7,
136 [0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg",
137 "andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg"
138 },
139 {
140 shift = 21, mask = 7,
141 [0] = "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg",
142 "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg"
143 },
144 {
145 shift = 21, mask = 7,
146 [0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg",
147 "eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg"
148 },
149 {
150 shift = 21, mask = 7,
151 [0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg",
152 "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg"
153 }
154 }
155}
156
157local map_assh = {
158 shift = 31, mask = 1,
159 [0] = {
160 shift = 15, mask = 1,
161 [0] = {
162 shift = 29, mask = 3,
163 [0] = {
164 shift = 22, mask = 3,
165 [0] = "addDNMSg", "addDNMSg", "addDNMSg", "addDNMg"
166 },
167 {
168 shift = 22, mask = 3,
169 [0] = "adds|cmnD0NMSg", "adds|cmnD0NMSg",
170 "adds|cmnD0NMSg", "adds|cmnD0NMg"
171 },
172 {
173 shift = 22, mask = 3,
174 [0] = "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0Mg"
175 },
176 {
177 shift = 22, mask = 3,
178 [0] = "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0MzSg",
179 "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0Mzg"
180 },
181 },
182 false -- unallocated
183 },
184 {
185 shift = 29, mask = 3,
186 [0] = {
187 shift = 22, mask = 3,
188 [0] = "addDNMSg", "addDNMSg", "addDNMSg", "addDNMg"
189 },
190 {
191 shift = 22, mask = 3,
192 [0] = "adds|cmnD0NMSg", "adds|cmnD0NMSg", "adds|cmnD0NMSg",
193 "adds|cmnD0NMg"
194 },
195 {
196 shift = 22, mask = 3,
197 [0] = "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0Mg"
198 },
199 {
200 shift = 22, mask = 3,
201 [0] = "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0MzSg",
202 "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0Mzg"
203 }
204 }
205}
206
207local map_addsubsh = { -- Add/subtract, shifted register.
208 shift = 22, mask = 3,
209 [0] = map_assh, map_assh, map_assh
210}
211
212local map_addsubex = { -- Add/subtract, extended register.
213 shift = 22, mask = 3,
214 [0] = {
215 shift = 29, mask = 3,
216 [0] = "addDNMXg", "adds|cmnD0NMXg", "subDNMXg", "subs|cmpD0NMzXg",
217 }
218}
219
220local map_addsubc = { -- Add/subtract, with carry.
221 shift = 10, mask = 63,
222 [0] = {
223 shift = 29, mask = 3,
224 [0] = "adcDNMg", "adcsDNMg", "sbc|ngcDN0Mg", "sbcs|ngcsDN0Mg",
225 }
226}
227
228local map_ccomp = {
229 shift = 4, mask = 1,
230 [0] = {
231 shift = 10, mask = 3,
232 [0] = { -- Conditional compare register.
233 shift = 29, mask = 3,
234 "ccmnNMVCg", false, "ccmpNMVCg",
235 },
236 [2] = { -- Conditional compare immediate.
237 shift = 29, mask = 3,
238 "ccmnN5VCg", false, "ccmpN5VCg",
239 }
240 }
241}
242
243local map_csel = { -- Conditional select.
244 shift = 11, mask = 1,
245 [0] = {
246 shift = 10, mask = 1,
247 [0] = {
248 shift = 29, mask = 3,
249 [0] = "cselDNMzCg", false, "csinv|cinv|csetmDNMcg", false,
250 },
251 {
252 shift = 29, mask = 3,
253 [0] = "csinc|cinc|csetDNMcg", false, "csneg|cnegDNMcg", false,
254 }
255 }
256}
257
258local map_data1s = { -- Data processing, 1 source.
259 shift = 29, mask = 1,
260 [0] = {
261 shift = 31, mask = 1,
262 [0] = {
263 shift = 10, mask = 0x7ff,
264 [0] = "rbitDNg", "rev16DNg", "revDNw", false, "clzDNg", "clsDNg"
265 },
266 {
267 shift = 10, mask = 0x7ff,
268 [0] = "rbitDNg", "rev16DNg", "rev32DNx", "revDNx", "clzDNg", "clsDNg"
269 }
270 }
271}
272
273local map_data2s = { -- Data processing, 2 sources.
274 shift = 29, mask = 1,
275 [0] = {
276 shift = 10, mask = 63,
277 false, "udivDNMg", "sdivDNMg", false, false, false, false, "lslDNMg",
278 "lsrDNMg", "asrDNMg", "rorDNMg"
279 }
280}
281
282local map_data3s = { -- Data processing, 3 sources.
283 shift = 29, mask = 7,
284 [0] = {
285 shift = 21, mask = 7,
286 [0] = {
287 shift = 15, mask = 1,
288 [0] = "madd|mulDNMA0g", "msub|mnegDNMA0g"
289 }
290 }, false, false, false,
291 {
292 shift = 15, mask = 1,
293 [0] = {
294 shift = 21, mask = 7,
295 [0] = "madd|mulDNMA0g", "smaddl|smullDxNMwA0x", "smulhDNMx", false,
296 false, "umaddl|umullDxNMwA0x", "umulhDNMx"
297 },
298 {
299 shift = 21, mask = 7,
300 [0] = "msub|mnegDNMA0g", "smsubl|smneglDxNMwA0x", false, false,
301 false, "umsubl|umneglDxNMwA0x"
302 }
303 }
304}
305
306local map_datar = { -- Data processing, register.
307 shift = 28, mask = 1,
308 [0] = {
309 shift = 24, mask = 1,
310 [0] = map_logsr,
311 {
312 shift = 21, mask = 1,
313 [0] = map_addsubsh, map_addsubex
314 }
315 },
316 {
317 shift = 21, mask = 15,
318 [0] = map_addsubc, false, map_ccomp, false, map_csel, false,
319 {
320 shift = 30, mask = 1,
321 [0] = map_data2s, map_data1s
322 },
323 false, map_data3s, map_data3s, map_data3s, map_data3s, map_data3s,
324 map_data3s, map_data3s, map_data3s
325 }
326}
327
328local map_lrl = { -- Load register, literal.
329 shift = 26, mask = 1,
330 [0] = {
331 shift = 30, mask = 3,
332 [0] = "ldrDwB", "ldrDxB", "ldrswDxB"
333 },
334 {
335 shift = 30, mask = 3,
336 [0] = "ldrDsB", "ldrDdB"
337 }
338}
339
340local map_lsriind = { -- Load/store register, immediate pre/post-indexed.
341 shift = 30, mask = 3,
342 [0] = {
343 shift = 26, mask = 1,
344 [0] = {
345 shift = 22, mask = 3,
346 [0] = "strbDwzL", "ldrbDwzL", "ldrsbDxzL", "ldrsbDwzL"
347 }
348 },
349 {
350 shift = 26, mask = 1,
351 [0] = {
352 shift = 22, mask = 3,
353 [0] = "strhDwzL", "ldrhDwzL", "ldrshDxzL", "ldrshDwzL"
354 }
355 },
356 {
357 shift = 26, mask = 1,
358 [0] = {
359 shift = 22, mask = 3,
360 [0] = "strDwzL", "ldrDwzL", "ldrswDxzL"
361 },
362 {
363 shift = 22, mask = 3,
364 [0] = "strDszL", "ldrDszL"
365 }
366 },
367 {
368 shift = 26, mask = 1,
369 [0] = {
370 shift = 22, mask = 3,
371 [0] = "strDxzL", "ldrDxzL"
372 },
373 {
374 shift = 22, mask = 3,
375 [0] = "strDdzL", "ldrDdzL"
376 }
377 }
378}
379
380local map_lsriro = {
381 shift = 21, mask = 1,
382 [0] = { -- Load/store register immediate.
383 shift = 10, mask = 3,
384 [0] = { -- Unscaled immediate.
385 shift = 26, mask = 1,
386 [0] = {
387 shift = 30, mask = 3,
388 [0] = {
389 shift = 22, mask = 3,
390 [0] = "sturbDwK", "ldurbDwK"
391 },
392 {
393 shift = 22, mask = 3,
394 [0] = "sturhDwK", "ldurhDwK"
395 },
396 {
397 shift = 22, mask = 3,
398 [0] = "sturDwK", "ldurDwK"
399 },
400 {
401 shift = 22, mask = 3,
402 [0] = "sturDxK", "ldurDxK"
403 }
404 }
405 }, map_lsriind, false, map_lsriind
406 },
407 { -- Load/store register, register offset.
408 shift = 10, mask = 3,
409 [2] = {
410 shift = 26, mask = 1,
411 [0] = {
412 shift = 30, mask = 3,
413 [0] = {
414 shift = 22, mask = 3,
415 [0] = "strbDwO", "ldrbDwO", "ldrsbDxO", "ldrsbDwO"
416 },
417 {
418 shift = 22, mask = 3,
419 [0] = "strhDwO", "ldrhDwO", "ldrshDxO", "ldrshDwO"
420 },
421 {
422 shift = 22, mask = 3,
423 [0] = "strDwO", "ldrDwO", "ldrswDxO"
424 },
425 {
426 shift = 22, mask = 3,
427 [0] = "strDxO", "ldrDxO"
428 }
429 },
430 {
431 shift = 30, mask = 3,
432 [2] = {
433 shift = 22, mask = 3,
434 [0] = "strDsO", "ldrDsO"
435 },
436 [3] = {
437 shift = 22, mask = 3,
438 [0] = "strDdO", "ldrDdO"
439 }
440 }
441 }
442 }
443}
444
445local map_lsp = { -- Load/store register pair, offset.
446 shift = 22, mask = 1,
447 [0] = {
448 shift = 30, mask = 3,
449 [0] = {
450 shift = 26, mask = 1,
451 [0] = "stpDzAzwP", "stpDzAzsP",
452 },
453 {
454 shift = 26, mask = 1,
455 "stpDzAzdP"
456 },
457 {
458 shift = 26, mask = 1,
459 [0] = "stpDzAzxP"
460 }
461 },
462 {
463 shift = 30, mask = 3,
464 [0] = {
465 shift = 26, mask = 1,
466 [0] = "ldpDzAzwP", "ldpDzAzsP",
467 },
468 {
469 shift = 26, mask = 1,
470 [0] = "ldpswDAxP", "ldpDzAzdP"
471 },
472 {
473 shift = 26, mask = 1,
474 [0] = "ldpDzAzxP"
475 }
476 }
477}
478
479local map_ls = { -- Loads and stores.
480 shift = 24, mask = 0x31,
481 [0x10] = map_lrl, [0x30] = map_lsriro,
482 [0x20] = {
483 shift = 23, mask = 3,
484 map_lsp, map_lsp, map_lsp
485 },
486 [0x21] = {
487 shift = 23, mask = 3,
488 map_lsp, map_lsp, map_lsp
489 },
490 [0x31] = {
491 shift = 26, mask = 1,
492 [0] = {
493 shift = 30, mask = 3,
494 [0] = {
495 shift = 22, mask = 3,
496 [0] = "strbDwzU", "ldrbDwzU"
497 },
498 {
499 shift = 22, mask = 3,
500 [0] = "strhDwzU", "ldrhDwzU"
501 },
502 {
503 shift = 22, mask = 3,
504 [0] = "strDwzU", "ldrDwzU"
505 },
506 {
507 shift = 22, mask = 3,
508 [0] = "strDxzU", "ldrDxzU"
509 }
510 },
511 {
512 shift = 30, mask = 3,
513 [2] = {
514 shift = 22, mask = 3,
515 [0] = "strDszU", "ldrDszU"
516 },
517 [3] = {
518 shift = 22, mask = 3,
519 [0] = "strDdzU", "ldrDdzU"
520 }
521 }
522 },
523}
524
525local map_datafp = { -- Data processing, SIMD and FP.
526 shift = 28, mask = 7,
527 { -- 001
528 shift = 24, mask = 1,
529 [0] = {
530 shift = 21, mask = 1,
531 {
532 shift = 10, mask = 3,
533 [0] = {
534 shift = 12, mask = 1,
535 [0] = {
536 shift = 13, mask = 1,
537 [0] = {
538 shift = 14, mask = 1,
539 [0] = {
540 shift = 15, mask = 1,
541 [0] = { -- FP/int conversion.
542 shift = 31, mask = 1,
543 [0] = {
544 shift = 16, mask = 0xff,
545 [0x20] = "fcvtnsDwNs", [0x21] = "fcvtnuDwNs",
546 [0x22] = "scvtfDsNw", [0x23] = "ucvtfDsNw",
547 [0x24] = "fcvtasDwNs", [0x25] = "fcvtauDwNs",
548 [0x26] = "fmovDwNs", [0x27] = "fmovDsNw",
549 [0x28] = "fcvtpsDwNs", [0x29] = "fcvtpuDwNs",
550 [0x30] = "fcvtmsDwNs", [0x31] = "fcvtmuDwNs",
551 [0x38] = "fcvtzsDwNs", [0x39] = "fcvtzuDwNs",
552 [0x60] = "fcvtnsDwNd", [0x61] = "fcvtnuDwNd",
553 [0x62] = "scvtfDdNw", [0x63] = "ucvtfDdNw",
554 [0x64] = "fcvtasDwNd", [0x65] = "fcvtauDwNd",
555 [0x68] = "fcvtpsDwNd", [0x69] = "fcvtpuDwNd",
556 [0x70] = "fcvtmsDwNd", [0x71] = "fcvtmuDwNd",
557 [0x78] = "fcvtzsDwNd", [0x79] = "fcvtzuDwNd"
558 },
559 {
560 shift = 16, mask = 0xff,
561 [0x20] = "fcvtnsDxNs", [0x21] = "fcvtnuDxNs",
562 [0x22] = "scvtfDsNx", [0x23] = "ucvtfDsNx",
563 [0x24] = "fcvtasDxNs", [0x25] = "fcvtauDxNs",
564 [0x28] = "fcvtpsDxNs", [0x29] = "fcvtpuDxNs",
565 [0x30] = "fcvtmsDxNs", [0x31] = "fcvtmuDxNs",
566 [0x38] = "fcvtzsDxNs", [0x39] = "fcvtzuDxNs",
567 [0x60] = "fcvtnsDxNd", [0x61] = "fcvtnuDxNd",
568 [0x62] = "scvtfDdNx", [0x63] = "ucvtfDdNx",
569 [0x64] = "fcvtasDxNd", [0x65] = "fcvtauDxNd",
570 [0x66] = "fmovDxNd", [0x67] = "fmovDdNx",
571 [0x68] = "fcvtpsDxNd", [0x69] = "fcvtpuDxNd",
572 [0x70] = "fcvtmsDxNd", [0x71] = "fcvtmuDxNd",
573 [0x78] = "fcvtzsDxNd", [0x79] = "fcvtzuDxNd"
574 }
575 }
576 },
577 { -- FP data-processing, 1 source.
578 shift = 31, mask = 1,
579 [0] = {
580 shift = 22, mask = 3,
581 [0] = {
582 shift = 15, mask = 63,
583 [0] = "fmovDNf", "fabsDNf", "fnegDNf",
584 "fsqrtDNf", false, "fcvtDdNs", false, false,
585 "frintnDNf", "frintpDNf", "frintmDNf", "frintzDNf",
586 "frintaDNf", false, "frintxDNf", "frintiDNf",
587 },
588 {
589 shift = 15, mask = 63,
590 [0] = "fmovDNf", "fabsDNf", "fnegDNf",
591 "fsqrtDNf", "fcvtDsNd", false, false, false,
592 "frintnDNf", "frintpDNf", "frintmDNf", "frintzDNf",
593 "frintaDNf", false, "frintxDNf", "frintiDNf",
594 }
595 }
596 }
597 },
598 { -- FP compare.
599 shift = 31, mask = 1,
600 [0] = {
601 shift = 14, mask = 3,
602 [0] = {
603 shift = 23, mask = 1,
604 [0] = {
605 shift = 0, mask = 31,
606 [0] = "fcmpNMf", [8] = "fcmpNZf",
607 [16] = "fcmpeNMf", [24] = "fcmpeNZf",
608 }
609 }
610 }
611 }
612 },
613 { -- FP immediate.
614 shift = 31, mask = 1,
615 [0] = {
616 shift = 5, mask = 31,
617 [0] = {
618 shift = 23, mask = 1,
619 [0] = "fmovDFf"
620 }
621 }
622 }
623 },
624 { -- FP conditional compare.
625 shift = 31, mask = 1,
626 [0] = {
627 shift = 23, mask = 1,
628 [0] = {
629 shift = 4, mask = 1,
630 [0] = "fccmpNMVCf", "fccmpeNMVCf"
631 }
632 }
633 },
634 { -- FP data-processing, 2 sources.
635 shift = 31, mask = 1,
636 [0] = {
637 shift = 23, mask = 1,
638 [0] = {
639 shift = 12, mask = 15,
640 [0] = "fmulDNMf", "fdivDNMf", "faddDNMf", "fsubDNMf",
641 "fmaxDNMf", "fminDNMf", "fmaxnmDNMf", "fminnmDNMf",
642 "fnmulDNMf"
643 }
644 }
645 },
646 { -- FP conditional select.
647 shift = 31, mask = 1,
648 [0] = {
649 shift = 23, mask = 1,
650 [0] = "fcselDNMCf"
651 }
652 }
653 }
654 },
655 { -- FP data-processing, 3 sources.
656 shift = 31, mask = 1,
657 [0] = {
658 shift = 15, mask = 1,
659 [0] = {
660 shift = 21, mask = 5,
661 [0] = "fmaddDNMAf", "fnmaddDNMAf"
662 },
663 {
664 shift = 21, mask = 5,
665 [0] = "fmsubDNMAf", "fnmsubDNMAf"
666 }
667 }
668 }
669 }
670}
671
672local map_br = { -- Branches, exception generating and system instructions.
673 shift = 29, mask = 7,
674 [0] = "bB",
675 { -- Compare & branch, immediate.
676 shift = 24, mask = 3,
677 [0] = "cbzDBg", "cbnzDBg", "tbzDTBw", "tbnzDTBw"
678 },
679 { -- Conditional branch, immediate.
680 shift = 24, mask = 3,
681 [0] = {
682 shift = 4, mask = 1,
683 [0] = {
684 shift = 0, mask = 15,
685 [0] = "beqB", "bneB", "bhsB", "bloB", "bmiB", "bplB", "bvsB", "bvcB",
686 "bhiB", "blsB", "bgeB", "bltB", "bgtB", "bleB", "balB"
687 }
688 }
689 }, false, "blB",
690 { -- Compare & branch, immediate.
691 shift = 24, mask = 3,
692 [0] = "cbzDBg", "cbnzDBg", "tbzDTBx", "tbnzDTBx"
693 },
694 {
695 shift = 24, mask = 3,
696 [0] = { -- Exception generation.
697 shift = 0, mask = 0xe0001f,
698 [0x200000] = "brkW"
699 },
700 { -- System instructions.
701 shift = 0, mask = 0x3fffff,
702 [0x03201f] = "nop"
703 },
704 { -- Unconditional branch, register.
705 shift = 0, mask = 0xfffc1f,
706 [0x1f0000] = "brNx", [0x3f0000] = "blrNx",
707 [0x5f0000] = "retNx"
708 },
709 }
710}
711
712local map_init = {
713 shift = 25, mask = 15,
714 [0] = false, false, false, false, map_ls, map_datar, map_ls, map_datafp,
715 map_datai, map_datai, map_br, map_br, map_ls, map_datar, map_ls, map_datafp
716}
717
718------------------------------------------------------------------------------
719
720local map_regs = { x = {}, w = {}, d = {}, s = {} }
721
722for i=0,30 do
723 map_regs.x[i] = "x"..i
724 map_regs.w[i] = "w"..i
725 map_regs.d[i] = "d"..i
726 map_regs.s[i] = "s"..i
727end
728map_regs.x[31] = "sp"
729map_regs.w[31] = "wsp"
730map_regs.d[31] = "d31"
731map_regs.s[31] = "s31"
732
733local map_cond = {
734 [0] = "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
735 "hi", "ls", "ge", "lt", "gt", "le", "al",
736}
737
738local map_shift = { [0] = "lsl", "lsr", "asr", }
739
740local map_extend = {
741 [0] = "uxtb", "uxth", "uxtw", "uxtx", "sxtb", "sxth", "sxtw", "sxtx",
742}
743
744------------------------------------------------------------------------------
745
746-- Output a nicely formatted line with an opcode and operands.
747local function putop(ctx, text, operands)
748 local pos = ctx.pos
749 local extra = ""
750 if ctx.rel then
751 local sym = ctx.symtab[ctx.rel]
752 if sym then
753 extra = "\t->"..sym
754 end
755 end
756 if ctx.hexdump > 0 then
757 ctx.out(format("%08x %s %-5s %s%s\n",
758 ctx.addr+pos, tohex(ctx.op), text, concat(operands, ", "), extra))
759 else
760 ctx.out(format("%08x %-5s %s%s\n",
761 ctx.addr+pos, text, concat(operands, ", "), extra))
762 end
763 ctx.pos = pos + 4
764end
765
766-- Fallback for unknown opcodes.
767local function unknown(ctx)
768 return putop(ctx, ".long", { "0x"..tohex(ctx.op) })
769end
770
771local function match_reg(p, pat, regnum)
772 return map_regs[match(pat, p.."%w-([xwds])")][regnum]
773end
774
775local function fmt_hex32(x)
776 if x < 0 then
777 return tohex(x)
778 else
779 return format("%x", x)
780 end
781end
782
783local imm13_rep = { 0x55555555, 0x11111111, 0x01010101, 0x00010001, 0x00000001 }
784
785local function decode_imm13(op)
786 local imms = band(rshift(op, 10), 63)
787 local immr = band(rshift(op, 16), 63)
788 if band(op, 0x00400000) == 0 then
789 local len = 5
790 if imms >= 56 then
791 if imms >= 60 then len = 1 else len = 2 end
792 elseif imms >= 48 then len = 3 elseif imms >= 32 then len = 4 end
793 local l = lshift(1, len)-1
794 local s = band(imms, l)
795 local r = band(immr, l)
796 local imm = ror(rshift(-1, 31-s), r)
797 if len ~= 5 then imm = band(imm, lshift(1, l)-1) + rshift(imm, 31-l) end
798 imm = imm * imm13_rep[len]
799 local ix = fmt_hex32(imm)
800 if rshift(op, 31) ~= 0 then
801 return ix..tohex(imm)
802 else
803 return ix
804 end
805 else
806 local lo, hi = -1, 0
807 if imms < 32 then lo = rshift(-1, 31-imms) else hi = rshift(-1, 63-imms) end
808 if immr ~= 0 then
809 lo, hi = ror(lo, immr), ror(hi, immr)
810 local x = immr == 32 and 0 or band(bxor(lo, hi), lshift(-1, 32-immr))
811 lo, hi = bxor(lo, x), bxor(hi, x)
812 if immr >= 32 then lo, hi = hi, lo end
813 end
814 if hi ~= 0 then
815 return fmt_hex32(hi)..tohex(lo)
816 else
817 return fmt_hex32(lo)
818 end
819 end
820end
821
822local function parse_immpc(op, name)
823 if name == "b" or name == "bl" then
824 return arshift(lshift(op, 6), 4)
825 elseif name == "adr" or name == "adrp" then
826 local immlo = band(rshift(op, 29), 3)
827 local immhi = lshift(arshift(lshift(op, 8), 13), 2)
828 return bor(immhi, immlo)
829 elseif name == "tbz" or name == "tbnz" then
830 return lshift(arshift(lshift(op, 13), 18), 2)
831 else
832 return lshift(arshift(lshift(op, 8), 13), 2)
833 end
834end
835
836local function parse_fpimm8(op)
837 local sign = band(op, 0x100000) == 0 and 1 or -1
838 local exp = bxor(rshift(arshift(lshift(op, 12), 5), 24), 0x80) - 131
839 local frac = 16+band(rshift(op, 13), 15)
840 return sign * frac * 2^exp
841end
842
843local function prefer_bfx(sf, uns, imms, immr)
844 if imms < immr or imms == 31 or imms == 63 then
845 return false
846 end
847 if immr == 0 then
848 if sf == 0 and (imms == 7 or imms == 15) then
849 return false
850 end
851 if sf ~= 0 and uns == 0 and (imms == 7 or imms == 15 or imms == 31) then
852 return false
853 end
854 end
855 return true
856end
857
858-- Disassemble a single instruction.
859local function disass_ins(ctx)
860 local pos = ctx.pos
861 local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4)
862 local op = bor(lshift(b3, 24), lshift(b2, 16), lshift(b1, 8), b0)
863 local operands = {}
864 local suffix = ""
865 local last, name, pat
866 local map_reg
867 ctx.op = op
868 ctx.rel = nil
869 last = nil
870 local opat
871 opat = map_init[band(rshift(op, 25), 15)]
872 while type(opat) ~= "string" do
873 if not opat then return unknown(ctx) end
874 opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._
875 end
876 name, pat = match(opat, "^([a-z0-9]*)(.*)")
877 local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)")
878 if altname then pat = pat2 end
879 if sub(pat, 1, 1) == "." then
880 local s2, p2 = match(pat, "^([a-z0-9.]*)(.*)")
881 suffix = suffix..s2
882 pat = p2
883 end
884
885 local rt = match(pat, "[gf]")
886 if rt then
887 if rt == "g" then
888 map_reg = band(op, 0x80000000) ~= 0 and map_regs.x or map_regs.w
889 else
890 map_reg = band(op, 0x400000) ~= 0 and map_regs.d or map_regs.s
891 end
892 end
893
894 local second0, immr
895
896 for p in gmatch(pat, ".") do
897 local x = nil
898 if p == "D" then
899 local regnum = band(op, 31)
900 x = rt and map_reg[regnum] or match_reg(p, pat, regnum)
901 elseif p == "N" then
902 local regnum = band(rshift(op, 5), 31)
903 x = rt and map_reg[regnum] or match_reg(p, pat, regnum)
904 elseif p == "M" then
905 local regnum = band(rshift(op, 16), 31)
906 x = rt and map_reg[regnum] or match_reg(p, pat, regnum)
907 elseif p == "A" then
908 local regnum = band(rshift(op, 10), 31)
909 x = rt and map_reg[regnum] or match_reg(p, pat, regnum)
910 elseif p == "B" then
911 local addr = ctx.addr + pos + parse_immpc(op, name)
912 ctx.rel = addr
913 x = "0x"..tohex(addr)
914 elseif p == "T" then
915 x = bor(band(rshift(op, 26), 32), band(rshift(op, 19), 31))
916 elseif p == "V" then
917 x = band(op, 15)
918 elseif p == "C" then
919 x = map_cond[band(rshift(op, 12), 15)]
920 elseif p == "c" then
921 local rn = band(rshift(op, 5), 31)
922 local rm = band(rshift(op, 16), 31)
923 local cond = band(rshift(op, 12), 15)
924 local invc = bxor(cond, 1)
925 x = map_cond[cond]
926 if altname and cond ~= 14 and cond ~= 15 then
927 local a1, a2 = match(altname, "([^|]*)|(.*)")
928 if rn == rm then
929 local n = #operands
930 operands[n] = nil
931 x = map_cond[invc]
932 if rn ~= 31 then
933 if a1 then name = a1 else name = altname end
934 else
935 operands[n-1] = nil
936 name = a2
937 end
938 end
939 end
940 elseif p == "W" then
941 x = band(rshift(op, 5), 0xffff)
942 elseif p == "Y" then
943 x = band(rshift(op, 5), 0xffff)
944 local hw = band(rshift(op, 21), 3)
945 if altname and (hw == 0 or x ~= 0) then
946 name = altname
947 end
948 elseif p == "L" then
949 local rn = map_regs.x[band(rshift(op, 5), 31)]
950 local imm9 = arshift(lshift(op, 11), 23)
951 if band(op, 0x800) ~= 0 then
952 x = "["..rn..", #"..imm9.."]!"
953 else
954 x = "["..rn.."], #"..imm9
955 end
956 elseif p == "U" then
957 local rn = map_regs.x[band(rshift(op, 5), 31)]
958 local sz = band(rshift(op, 30), 3)
959 local imm12 = lshift(arshift(lshift(op, 10), 20), sz)
960 if imm12 ~= 0 then
961 x = "["..rn..", #"..imm12.."]"
962 else
963 x = "["..rn.."]"
964 end
965 elseif p == "K" then
966 local rn = map_regs.x[band(rshift(op, 5), 31)]
967 local imm9 = arshift(lshift(op, 11), 23)
968 if imm9 ~= 0 then
969 x = "["..rn..", #"..imm9.."]"
970 else
971 x = "["..rn.."]"
972 end
973 elseif p == "O" then
974 local rn, rm = map_regs.x[band(rshift(op, 5), 31)]
975 local m = band(rshift(op, 13), 1)
976 if m == 0 then
977 rm = map_regs.w[band(rshift(op, 16), 31)]
978 else
979 rm = map_regs.x[band(rshift(op, 16), 31)]
980 end
981 x = "["..rn..", "..rm
982 local opt = band(rshift(op, 13), 7)
983 local s = band(rshift(op, 12), 1)
984 local sz = band(rshift(op, 30), 3)
985 -- extension to be applied
986 if opt == 3 then
987 if s == 0 then x = x.."]"
988 else x = x..", lsl #"..sz.."]" end
989 elseif opt == 2 or opt == 6 or opt == 7 then
990 if s == 0 then x = x..", "..map_extend[opt].."]"
991 else x = x..", "..map_extend[opt].." #"..sz.."]" end
992 else
993 x = x.."]"
994 end
995 elseif p == "P" then
996 local opcv, sh = rshift(op, 26), 2
997 if opcv >= 0x2a then sh = 4 elseif opcv >= 0x1b then sh = 3 end
998 local imm7 = lshift(arshift(lshift(op, 10), 25), sh)
999 local rn = map_regs.x[band(rshift(op, 5), 31)]
1000 local ind = band(rshift(op, 23), 3)
1001 if ind == 1 then
1002 x = "["..rn.."], #"..imm7
1003 elseif ind == 2 then
1004 if imm7 == 0 then
1005 x = "["..rn.."]"
1006 else
1007 x = "["..rn..", #"..imm7.."]"
1008 end
1009 elseif ind == 3 then
1010 x = "["..rn..", #"..imm7.."]!"
1011 end
1012 elseif p == "I" then
1013 local shf = band(rshift(op, 22), 3)
1014 local imm12 = band(rshift(op, 10), 0x0fff)
1015 local rn, rd = band(rshift(op, 5), 31), band(op, 31)
1016 if altname == "mov" and shf == 0 and imm12 == 0 and (rn == 31 or rd == 31) then
1017 name = altname
1018 x = nil
1019 elseif shf == 0 then
1020 x = imm12
1021 elseif shf == 1 then
1022 x = imm12..", lsl #12"
1023 end
1024 elseif p == "i" then
1025 x = "#0x"..decode_imm13(op)
1026 elseif p == "1" then
1027 immr = band(rshift(op, 16), 63)
1028 x = immr
1029 elseif p == "2" then
1030 x = band(rshift(op, 10), 63)
1031 if altname then
1032 local a1, a2, a3, a4, a5, a6 =
1033 match(altname, "([^|]*)|([^|]*)|([^|]*)|([^|]*)|([^|]*)|(.*)")
1034 local sf = band(rshift(op, 26), 32)
1035 local uns = band(rshift(op, 30), 1)
1036 if prefer_bfx(sf, uns, x, immr) then
1037 name = a2
1038 x = x - immr + 1
1039 elseif immr == 0 and x == 7 then
1040 local n = #operands
1041 operands[n] = nil
1042 if sf ~= 0 then
1043 operands[n-1] = gsub(operands[n-1], "x", "w")
1044 end
1045 last = operands[n-1]
1046 name = a6
1047 x = nil
1048 elseif immr == 0 and x == 15 then
1049 local n = #operands
1050 operands[n] = nil
1051 if sf ~= 0 then
1052 operands[n-1] = gsub(operands[n-1], "x", "w")
1053 end
1054 last = operands[n-1]
1055 name = a5
1056 x = nil
1057 elseif x == 31 or x == 63 then
1058 if x == 31 and immr == 0 and name == "sbfm" then
1059 name = a4
1060 local n = #operands
1061 operands[n] = nil
1062 if sf ~= 0 then
1063 operands[n-1] = gsub(operands[n-1], "x", "w")
1064 end
1065 last = operands[n-1]
1066 else
1067 name = a3
1068 end
1069 x = nil
1070 elseif band(x, 31) ~= 31 and immr == x+1 and name == "ubfm" then
1071 name = a4
1072 last = "#"..(sf+32 - immr)
1073 operands[#operands] = last
1074 x = nil
1075 elseif x < immr then
1076 name = a1
1077 last = "#"..(sf+32 - immr)
1078 operands[#operands] = last
1079 x = x + 1
1080 end
1081 end
1082 elseif p == "3" then
1083 x = band(rshift(op, 10), 63)
1084 if altname then
1085 local a1, a2 = match(altname, "([^|]*)|(.*)")
1086 if x < immr then
1087 name = a1
1088 local sf = band(rshift(op, 26), 32)
1089 last = "#"..(sf+32 - immr)
1090 operands[#operands] = last
1091 x = x + 1
1092 elseif x >= immr then
1093 name = a2
1094 x = x - immr + 1
1095 end
1096 end
1097 elseif p == "4" then
1098 x = band(rshift(op, 10), 63)
1099 local rn = band(rshift(op, 5), 31)
1100 local rm = band(rshift(op, 16), 31)
1101 if altname and rn == rm then
1102 local n = #operands
1103 operands[n] = nil
1104 last = operands[n-1]
1105 name = altname
1106 end
1107 elseif p == "5" then
1108 x = band(rshift(op, 16), 31)
1109 elseif p == "S" then
1110 x = band(rshift(op, 10), 63)
1111 if x == 0 then x = nil
1112 else x = map_shift[band(rshift(op, 22), 3)].." #"..x end
1113 elseif p == "X" then
1114 local opt = band(rshift(op, 13), 7)
1115 -- Width specifier <R>.
1116 if opt ~= 3 and opt ~= 7 then
1117 last = map_regs.w[band(rshift(op, 16), 31)]
1118 operands[#operands] = last
1119 end
1120 x = band(rshift(op, 10), 7)
1121 -- Extension.
1122 if opt == 2 + band(rshift(op, 31), 1) and
1123 band(rshift(op, second0 and 5 or 0), 31) == 31 then
1124 if x == 0 then x = nil
1125 else x = "lsl #"..x end
1126 else
1127 if x == 0 then x = map_extend[band(rshift(op, 13), 7)]
1128 else x = map_extend[band(rshift(op, 13), 7)].." #"..x end
1129 end
1130 elseif p == "R" then
1131 x = band(rshift(op,21), 3)
1132 if x == 0 then x = nil
1133 else x = "lsl #"..x*16 end
1134 elseif p == "z" then
1135 local n = #operands
1136 if operands[n] == "sp" then operands[n] = "xzr"
1137 elseif operands[n] == "wsp" then operands[n] = "wzr"
1138 end
1139 elseif p == "Z" then
1140 x = 0
1141 elseif p == "F" then
1142 x = parse_fpimm8(op)
1143 elseif p == "g" or p == "f" or p == "x" or p == "w" or
1144 p == "d" or p == "s" then
1145 -- These are handled in D/N/M/A.
1146 elseif p == "0" then
1147 if last == "sp" or last == "wsp" then
1148 local n = #operands
1149 operands[n] = nil
1150 last = operands[n-1]
1151 if altname then
1152 local a1, a2 = match(altname, "([^|]*)|(.*)")
1153 if not a1 then
1154 name = altname
1155 elseif second0 then
1156 name, altname = a2, a1
1157 else
1158 name, altname = a1, a2
1159 end
1160 end
1161 end
1162 second0 = true
1163 else
1164 assert(false)
1165 end
1166 if x then
1167 last = x
1168 if type(x) == "number" then x = "#"..x end
1169 operands[#operands+1] = x
1170 end
1171 end
1172
1173 return putop(ctx, name..suffix, operands)
1174end
1175
1176------------------------------------------------------------------------------
1177
1178-- Disassemble a block of code.
1179local function disass_block(ctx, ofs, len)
1180 if not ofs then ofs = 0 end
1181 local stop = len and ofs+len or #ctx.code
1182 ctx.pos = ofs
1183 ctx.rel = nil
1184 while ctx.pos < stop do disass_ins(ctx) end
1185end
1186
1187-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
1188local function create(code, addr, out)
1189 local ctx = {}
1190 ctx.code = code
1191 ctx.addr = addr or 0
1192 ctx.out = out or io.write
1193 ctx.symtab = {}
1194 ctx.disass = disass_block
1195 ctx.hexdump = 8
1196 return ctx
1197end
1198
1199-- Simple API: disassemble code (a string) at address and output via out.
1200local function disass(code, addr, out)
1201 create(code, addr, out):disass()
1202end
1203
1204-- Return register name for RID.
1205local function regname(r)
1206 if r < 32 then return map_regs.x[r] end
1207 return map_regs.d[r-32]
1208end
1209
1210-- Public module functions.
1211return {
1212 create = create,
1213 disass = disass,
1214 regname = regname
1215}
1216
diff --git a/src/jit/dis_arm64be.lua b/src/jit/dis_arm64be.lua
new file mode 100644
index 00000000..9f4077af
--- /dev/null
+++ b/src/jit/dis_arm64be.lua
@@ -0,0 +1,12 @@
1----------------------------------------------------------------------------
2-- LuaJIT ARM64BE disassembler wrapper module.
3--
4-- Copyright (C) 2005-2020 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7-- ARM64 instructions are always little-endian. So just forward to the
8-- common ARM64 disassembler module. All the interesting stuff is there.
9------------------------------------------------------------------------------
10
11return require((string.match(..., ".*%.") or "").."dis_arm64")
12
diff --git a/src/jit/dis_mips.lua b/src/jit/dis_mips.lua
index c720b537..791ac91d 100644
--- a/src/jit/dis_mips.lua
+++ b/src/jit/dis_mips.lua
@@ -19,13 +19,34 @@ local band, bor, tohex = bit.band, bit.bor, bit.tohex
19local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift 19local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift
20 20
21------------------------------------------------------------------------------ 21------------------------------------------------------------------------------
22-- Primary and extended opcode maps 22-- Extended opcode maps common to all MIPS releases
23------------------------------------------------------------------------------ 23------------------------------------------------------------------------------
24 24
25local map_movci = { shift = 16, mask = 1, [0] = "movfDSC", "movtDSC", }
26local map_srl = { shift = 21, mask = 1, [0] = "srlDTA", "rotrDTA", } 25local map_srl = { shift = 21, mask = 1, [0] = "srlDTA", "rotrDTA", }
27local map_srlv = { shift = 6, mask = 1, [0] = "srlvDTS", "rotrvDTS", } 26local map_srlv = { shift = 6, mask = 1, [0] = "srlvDTS", "rotrvDTS", }
28 27
28local map_cop0 = {
29 shift = 25, mask = 1,
30 [0] = {
31 shift = 21, mask = 15,
32 [0] = "mfc0TDW", [4] = "mtc0TDW",
33 [10] = "rdpgprDT",
34 [11] = { shift = 5, mask = 1, [0] = "diT0", "eiT0", },
35 [14] = "wrpgprDT",
36 }, {
37 shift = 0, mask = 63,
38 [1] = "tlbr", [2] = "tlbwi", [6] = "tlbwr", [8] = "tlbp",
39 [24] = "eret", [31] = "deret",
40 [32] = "wait",
41 },
42}
43
44------------------------------------------------------------------------------
45-- Primary and extended opcode maps for MIPS R1-R5
46------------------------------------------------------------------------------
47
48local map_movci = { shift = 16, mask = 1, [0] = "movfDSC", "movtDSC", }
49
29local map_special = { 50local map_special = {
30 shift = 0, mask = 63, 51 shift = 0, mask = 63,
31 [0] = { shift = 0, mask = -1, [0] = "nop", _ = "sllDTA" }, 52 [0] = { shift = 0, mask = -1, [0] = "nop", _ = "sllDTA" },
@@ -34,15 +55,17 @@ local map_special = {
34 "jrS", "jalrD1S", "movzDST", "movnDST", 55 "jrS", "jalrD1S", "movzDST", "movnDST",
35 "syscallY", "breakY", false, "sync", 56 "syscallY", "breakY", false, "sync",
36 "mfhiD", "mthiS", "mfloD", "mtloS", 57 "mfhiD", "mthiS", "mfloD", "mtloS",
37 false, false, false, false, 58 "dsllvDST", false, "dsrlvDST", "dsravDST",
38 "multST", "multuST", "divST", "divuST", 59 "multST", "multuST", "divST", "divuST",
39 false, false, false, false, 60 "dmultST", "dmultuST", "ddivST", "ddivuST",
40 "addDST", "addu|moveDST0", "subDST", "subu|neguDS0T", 61 "addDST", "addu|moveDST0", "subDST", "subu|neguDS0T",
41 "andDST", "orDST", "xorDST", "nor|notDST0", 62 "andDST", "or|moveDST0", "xorDST", "nor|notDST0",
42 false, false, "sltDST", "sltuDST", 63 false, false, "sltDST", "sltuDST",
43 false, false, false, false, 64 "daddDST", "dadduDST", "dsubDST", "dsubuDST",
44 "tgeSTZ", "tgeuSTZ", "tltSTZ", "tltuSTZ", 65 "tgeSTZ", "tgeuSTZ", "tltSTZ", "tltuSTZ",
45 "teqSTZ", false, "tneSTZ", 66 "teqSTZ", false, "tneSTZ", false,
67 "dsllDTA", false, "dsrlDTA", "dsraDTA",
68 "dsll32DTA", false, "dsrl32DTA", "dsra32DTA",
46} 69}
47 70
48local map_special2 = { 71local map_special2 = {
@@ -60,11 +83,17 @@ local map_bshfl = {
60 [24] = "sehDT", 83 [24] = "sehDT",
61} 84}
62 85
86local map_dbshfl = {
87 shift = 6, mask = 31,
88 [2] = "dsbhDT",
89 [5] = "dshdDT",
90}
91
63local map_special3 = { 92local map_special3 = {
64 shift = 0, mask = 63, 93 shift = 0, mask = 63,
65 [0] = "extTSAK", [4] = "insTSAL", 94 [0] = "extTSAK", [1] = "dextmTSAP", [3] = "dextTSAK",
66 [32] = map_bshfl, 95 [4] = "insTSAL", [6] = "dinsuTSEQ", [7] = "dinsTSAL",
67 [59] = "rdhwrTD", 96 [32] = map_bshfl, [36] = map_dbshfl, [59] = "rdhwrTD",
68} 97}
69 98
70local map_regimm = { 99local map_regimm = {
@@ -79,22 +108,6 @@ local map_regimm = {
79 false, false, false, "synciSO", 108 false, false, false, "synciSO",
80} 109}
81 110
82local map_cop0 = {
83 shift = 25, mask = 1,
84 [0] = {
85 shift = 21, mask = 15,
86 [0] = "mfc0TDW", [4] = "mtc0TDW",
87 [10] = "rdpgprDT",
88 [11] = { shift = 5, mask = 1, [0] = "diT0", "eiT0", },
89 [14] = "wrpgprDT",
90 }, {
91 shift = 0, mask = 63,
92 [1] = "tlbr", [2] = "tlbwi", [6] = "tlbwr", [8] = "tlbp",
93 [24] = "eret", [31] = "deret",
94 [32] = "wait",
95 },
96}
97
98local map_cop1s = { 111local map_cop1s = {
99 shift = 0, mask = 63, 112 shift = 0, mask = 63,
100 [0] = "add.sFGH", "sub.sFGH", "mul.sFGH", "div.sFGH", 113 [0] = "add.sFGH", "sub.sFGH", "mul.sFGH", "div.sFGH",
@@ -178,8 +191,8 @@ local map_cop1bc = {
178 191
179local map_cop1 = { 192local map_cop1 = {
180 shift = 21, mask = 31, 193 shift = 21, mask = 31,
181 [0] = "mfc1TG", false, "cfc1TG", "mfhc1TG", 194 [0] = "mfc1TG", "dmfc1TG", "cfc1TG", "mfhc1TG",
182 "mtc1TG", false, "ctc1TG", "mthc1TG", 195 "mtc1TG", "dmtc1TG", "ctc1TG", "mthc1TG",
183 map_cop1bc, false, false, false, 196 map_cop1bc, false, false, false,
184 false, false, false, false, 197 false, false, false, false,
185 map_cop1s, map_cop1d, false, false, 198 map_cop1s, map_cop1d, false, false,
@@ -213,16 +226,218 @@ local map_pri = {
213 "andiTSU", "ori|liTS0U", "xoriTSU", "luiTU", 226 "andiTSU", "ori|liTS0U", "xoriTSU", "luiTU",
214 map_cop0, map_cop1, false, map_cop1x, 227 map_cop0, map_cop1, false, map_cop1x,
215 "beql|beqzlST0B", "bnel|bnezlST0B", "blezlSB", "bgtzlSB", 228 "beql|beqzlST0B", "bnel|bnezlST0B", "blezlSB", "bgtzlSB",
216 false, false, false, false, 229 "daddiTSI", "daddiuTSI", false, false,
217 map_special2, false, false, map_special3, 230 map_special2, "jalxJ", false, map_special3,
218 "lbTSO", "lhTSO", "lwlTSO", "lwTSO", 231 "lbTSO", "lhTSO", "lwlTSO", "lwTSO",
219 "lbuTSO", "lhuTSO", "lwrTSO", false, 232 "lbuTSO", "lhuTSO", "lwrTSO", false,
220 "sbTSO", "shTSO", "swlTSO", "swTSO", 233 "sbTSO", "shTSO", "swlTSO", "swTSO",
221 false, false, "swrTSO", "cacheNSO", 234 false, false, "swrTSO", "cacheNSO",
222 "llTSO", "lwc1HSO", "lwc2TSO", "prefNSO", 235 "llTSO", "lwc1HSO", "lwc2TSO", "prefNSO",
223 false, "ldc1HSO", "ldc2TSO", false, 236 false, "ldc1HSO", "ldc2TSO", "ldTSO",
224 "scTSO", "swc1HSO", "swc2TSO", false, 237 "scTSO", "swc1HSO", "swc2TSO", false,
225 false, "sdc1HSO", "sdc2TSO", false, 238 false, "sdc1HSO", "sdc2TSO", "sdTSO",
239}
240
241------------------------------------------------------------------------------
242-- Primary and extended opcode maps for MIPS R6
243------------------------------------------------------------------------------
244
245local map_mul_r6 = { shift = 6, mask = 3, [2] = "mulDST", [3] = "muhDST" }
246local map_mulu_r6 = { shift = 6, mask = 3, [2] = "muluDST", [3] = "muhuDST" }
247local map_div_r6 = { shift = 6, mask = 3, [2] = "divDST", [3] = "modDST" }
248local map_divu_r6 = { shift = 6, mask = 3, [2] = "divuDST", [3] = "moduDST" }
249local map_dmul_r6 = { shift = 6, mask = 3, [2] = "dmulDST", [3] = "dmuhDST" }
250local map_dmulu_r6 = { shift = 6, mask = 3, [2] = "dmuluDST", [3] = "dmuhuDST" }
251local map_ddiv_r6 = { shift = 6, mask = 3, [2] = "ddivDST", [3] = "dmodDST" }
252local map_ddivu_r6 = { shift = 6, mask = 3, [2] = "ddivuDST", [3] = "dmoduDST" }
253
254local map_special_r6 = {
255 shift = 0, mask = 63,
256 [0] = { shift = 0, mask = -1, [0] = "nop", _ = "sllDTA" },
257 false, map_srl, "sraDTA",
258 "sllvDTS", false, map_srlv, "sravDTS",
259 "jrS", "jalrD1S", false, false,
260 "syscallY", "breakY", false, "sync",
261 "clzDS", "cloDS", "dclzDS", "dcloDS",
262 "dsllvDST", "dlsaDSTA", "dsrlvDST", "dsravDST",
263 map_mul_r6, map_mulu_r6, map_div_r6, map_divu_r6,
264 map_dmul_r6, map_dmulu_r6, map_ddiv_r6, map_ddivu_r6,
265 "addDST", "addu|moveDST0", "subDST", "subu|neguDS0T",
266 "andDST", "or|moveDST0", "xorDST", "nor|notDST0",
267 false, false, "sltDST", "sltuDST",
268 "daddDST", "dadduDST", "dsubDST", "dsubuDST",
269 "tgeSTZ", "tgeuSTZ", "tltSTZ", "tltuSTZ",
270 "teqSTZ", "seleqzDST", "tneSTZ", "selnezDST",
271 "dsllDTA", false, "dsrlDTA", "dsraDTA",
272 "dsll32DTA", false, "dsrl32DTA", "dsra32DTA",
273}
274
275local map_bshfl_r6 = {
276 shift = 9, mask = 3,
277 [1] = "alignDSTa",
278 _ = {
279 shift = 6, mask = 31,
280 [0] = "bitswapDT",
281 [2] = "wsbhDT",
282 [16] = "sebDT",
283 [24] = "sehDT",
284 }
285}
286
287local map_dbshfl_r6 = {
288 shift = 9, mask = 3,
289 [1] = "dalignDSTa",
290 _ = {
291 shift = 6, mask = 31,
292 [0] = "dbitswapDT",
293 [2] = "dsbhDT",
294 [5] = "dshdDT",
295 }
296}
297
298local map_special3_r6 = {
299 shift = 0, mask = 63,
300 [0] = "extTSAK", [1] = "dextmTSAP", [3] = "dextTSAK",
301 [4] = "insTSAL", [6] = "dinsuTSEQ", [7] = "dinsTSAL",
302 [32] = map_bshfl_r6, [36] = map_dbshfl_r6, [59] = "rdhwrTD",
303}
304
305local map_regimm_r6 = {
306 shift = 16, mask = 31,
307 [0] = "bltzSB", [1] = "bgezSB",
308 [6] = "dahiSI", [30] = "datiSI",
309 [23] = "sigrieI", [31] = "synciSO",
310}
311
312local map_pcrel_r6 = {
313 shift = 19, mask = 3,
314 [0] = "addiupcS2", "lwpcS2", "lwupcS2", {
315 shift = 18, mask = 1,
316 [0] = "ldpcS3", { shift = 16, mask = 3, [2] = "auipcSI", [3] = "aluipcSI" }
317 }
318}
319
320local map_cop1s_r6 = {
321 shift = 0, mask = 63,
322 [0] = "add.sFGH", "sub.sFGH", "mul.sFGH", "div.sFGH",
323 "sqrt.sFG", "abs.sFG", "mov.sFG", "neg.sFG",
324 "round.l.sFG", "trunc.l.sFG", "ceil.l.sFG", "floor.l.sFG",
325 "round.w.sFG", "trunc.w.sFG", "ceil.w.sFG", "floor.w.sFG",
326 "sel.sFGH", false, false, false,
327 "seleqz.sFGH", "recip.sFG", "rsqrt.sFG", "selnez.sFGH",
328 "maddf.sFGH", "msubf.sFGH", "rint.sFG", "class.sFG",
329 "min.sFGH", "mina.sFGH", "max.sFGH", "maxa.sFGH",
330 false, "cvt.d.sFG", false, false,
331 "cvt.w.sFG", "cvt.l.sFG",
332}
333
334local map_cop1d_r6 = {
335 shift = 0, mask = 63,
336 [0] = "add.dFGH", "sub.dFGH", "mul.dFGH", "div.dFGH",
337 "sqrt.dFG", "abs.dFG", "mov.dFG", "neg.dFG",
338 "round.l.dFG", "trunc.l.dFG", "ceil.l.dFG", "floor.l.dFG",
339 "round.w.dFG", "trunc.w.dFG", "ceil.w.dFG", "floor.w.dFG",
340 "sel.dFGH", false, false, false,
341 "seleqz.dFGH", "recip.dFG", "rsqrt.dFG", "selnez.dFGH",
342 "maddf.dFGH", "msubf.dFGH", "rint.dFG", "class.dFG",
343 "min.dFGH", "mina.dFGH", "max.dFGH", "maxa.dFGH",
344 "cvt.s.dFG", false, false, false,
345 "cvt.w.dFG", "cvt.l.dFG",
346}
347
348local map_cop1w_r6 = {
349 shift = 0, mask = 63,
350 [0] = "cmp.af.sFGH", "cmp.un.sFGH", "cmp.eq.sFGH", "cmp.ueq.sFGH",
351 "cmp.lt.sFGH", "cmp.ult.sFGH", "cmp.le.sFGH", "cmp.ule.sFGH",
352 "cmp.saf.sFGH", "cmp.sun.sFGH", "cmp.seq.sFGH", "cmp.sueq.sFGH",
353 "cmp.slt.sFGH", "cmp.sult.sFGH", "cmp.sle.sFGH", "cmp.sule.sFGH",
354 false, "cmp.or.sFGH", "cmp.une.sFGH", "cmp.ne.sFGH",
355 false, false, false, false,
356 false, "cmp.sor.sFGH", "cmp.sune.sFGH", "cmp.sne.sFGH",
357 false, false, false, false,
358 "cvt.s.wFG", "cvt.d.wFG",
359}
360
361local map_cop1l_r6 = {
362 shift = 0, mask = 63,
363 [0] = "cmp.af.dFGH", "cmp.un.dFGH", "cmp.eq.dFGH", "cmp.ueq.dFGH",
364 "cmp.lt.dFGH", "cmp.ult.dFGH", "cmp.le.dFGH", "cmp.ule.dFGH",
365 "cmp.saf.dFGH", "cmp.sun.dFGH", "cmp.seq.dFGH", "cmp.sueq.dFGH",
366 "cmp.slt.dFGH", "cmp.sult.dFGH", "cmp.sle.dFGH", "cmp.sule.dFGH",
367 false, "cmp.or.dFGH", "cmp.une.dFGH", "cmp.ne.dFGH",
368 false, false, false, false,
369 false, "cmp.sor.dFGH", "cmp.sune.dFGH", "cmp.sne.dFGH",
370 false, false, false, false,
371 "cvt.s.lFG", "cvt.d.lFG",
372}
373
374local map_cop1_r6 = {
375 shift = 21, mask = 31,
376 [0] = "mfc1TG", "dmfc1TG", "cfc1TG", "mfhc1TG",
377 "mtc1TG", "dmtc1TG", "ctc1TG", "mthc1TG",
378 false, "bc1eqzHB", false, false,
379 false, "bc1nezHB", false, false,
380 map_cop1s_r6, map_cop1d_r6, false, false,
381 map_cop1w_r6, map_cop1l_r6,
382}
383
384local function maprs_popTS(rs, rt)
385 if rt == 0 then return 0 elseif rs == 0 then return 1
386 elseif rs == rt then return 2 else return 3 end
387end
388
389local map_pop06_r6 = {
390 maprs = maprs_popTS, [0] = "blezSB", "blezalcTB", "bgezalcTB", "bgeucSTB"
391}
392local map_pop07_r6 = {
393 maprs = maprs_popTS, [0] = "bgtzSB", "bgtzalcTB", "bltzalcTB", "bltucSTB"
394}
395local map_pop26_r6 = {
396 maprs = maprs_popTS, "blezcTB", "bgezcTB", "bgecSTB"
397}
398local map_pop27_r6 = {
399 maprs = maprs_popTS, "bgtzcTB", "bltzcTB", "bltcSTB"
400}
401
402local function maprs_popS(rs, rt)
403 if rs == 0 then return 0 else return 1 end
404end
405
406local map_pop66_r6 = {
407 maprs = maprs_popS, [0] = "jicTI", "beqzcSb"
408}
409local map_pop76_r6 = {
410 maprs = maprs_popS, [0] = "jialcTI", "bnezcSb"
411}
412
413local function maprs_popST(rs, rt)
414 if rs >= rt then return 0 elseif rs == 0 then return 1 else return 2 end
415end
416
417local map_pop10_r6 = {
418 maprs = maprs_popST, [0] = "bovcSTB", "beqzalcTB", "beqcSTB"
419}
420local map_pop30_r6 = {
421 maprs = maprs_popST, [0] = "bnvcSTB", "bnezalcTB", "bnecSTB"
422}
423
424local map_pri_r6 = {
425 [0] = map_special_r6, map_regimm_r6, "jJ", "jalJ",
426 "beq|beqz|bST00B", "bne|bnezST0B", map_pop06_r6, map_pop07_r6,
427 map_pop10_r6, "addiu|liTS0I", "sltiTSI", "sltiuTSI",
428 "andiTSU", "ori|liTS0U", "xoriTSU", "aui|luiTS0U",
429 map_cop0, map_cop1_r6, false, false,
430 false, false, map_pop26_r6, map_pop27_r6,
431 map_pop30_r6, "daddiuTSI", false, false,
432 false, "dauiTSI", false, map_special3_r6,
433 "lbTSO", "lhTSO", false, "lwTSO",
434 "lbuTSO", "lhuTSO", false, false,
435 "sbTSO", "shTSO", false, "swTSO",
436 false, false, false, false,
437 false, "lwc1HSO", "bc#", false,
438 false, "ldc1HSO", map_pop66_r6, "ldTSO",
439 false, "swc1HSO", "balc#", map_pcrel_r6,
440 false, "sdc1HSO", map_pop76_r6, "sdTSO",
226} 441}
227 442
228------------------------------------------------------------------------------ 443------------------------------------------------------------------------------
@@ -279,10 +494,14 @@ local function disass_ins(ctx)
279 ctx.op = op 494 ctx.op = op
280 ctx.rel = nil 495 ctx.rel = nil
281 496
282 local opat = map_pri[rshift(op, 26)] 497 local opat = ctx.map_pri[rshift(op, 26)]
283 while type(opat) ~= "string" do 498 while type(opat) ~= "string" do
284 if not opat then return unknown(ctx) end 499 if not opat then return unknown(ctx) end
285 opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._ 500 if opat.maprs then
501 opat = opat[opat.maprs(band(rshift(op,21),31), band(rshift(op,16),31))]
502 else
503 opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._
504 end
286 end 505 end
287 local name, pat = match(opat, "^([a-z0-9_.]*)(.*)") 506 local name, pat = match(opat, "^([a-z0-9_.]*)(.*)")
288 local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)") 507 local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)")
@@ -306,6 +525,10 @@ local function disass_ins(ctx)
306 x = "f"..band(rshift(op, 21), 31) 525 x = "f"..band(rshift(op, 21), 31)
307 elseif p == "A" then 526 elseif p == "A" then
308 x = band(rshift(op, 6), 31) 527 x = band(rshift(op, 6), 31)
528 elseif p == "a" then
529 x = band(rshift(op, 6), 7)
530 elseif p == "E" then
531 x = band(rshift(op, 6), 31) + 32
309 elseif p == "M" then 532 elseif p == "M" then
310 x = band(rshift(op, 11), 31) 533 x = band(rshift(op, 11), 31)
311 elseif p == "N" then 534 elseif p == "N" then
@@ -315,10 +538,18 @@ local function disass_ins(ctx)
315 if x == 0 then x = nil end 538 if x == 0 then x = nil end
316 elseif p == "K" then 539 elseif p == "K" then
317 x = band(rshift(op, 11), 31) + 1 540 x = band(rshift(op, 11), 31) + 1
541 elseif p == "P" then
542 x = band(rshift(op, 11), 31) + 33
318 elseif p == "L" then 543 elseif p == "L" then
319 x = band(rshift(op, 11), 31) - last + 1 544 x = band(rshift(op, 11), 31) - last + 1
545 elseif p == "Q" then
546 x = band(rshift(op, 11), 31) - last + 33
320 elseif p == "I" then 547 elseif p == "I" then
321 x = arshift(lshift(op, 16), 16) 548 x = arshift(lshift(op, 16), 16)
549 elseif p == "2" then
550 x = arshift(lshift(op, 13), 11)
551 elseif p == "3" then
552 x = arshift(lshift(op, 14), 11)
322 elseif p == "U" then 553 elseif p == "U" then
323 x = band(op, 0xffff) 554 x = band(op, 0xffff)
324 elseif p == "O" then 555 elseif p == "O" then
@@ -328,13 +559,22 @@ local function disass_ins(ctx)
328 local index = map_gpr[band(rshift(op, 16), 31)] 559 local index = map_gpr[band(rshift(op, 16), 31)]
329 operands[#operands] = format("%s(%s)", index, last) 560 operands[#operands] = format("%s(%s)", index, last)
330 elseif p == "B" then 561 elseif p == "B" then
331 x = ctx.addr + ctx.pos + arshift(lshift(op, 16), 16)*4 + 4 562 x = ctx.addr + ctx.pos + arshift(lshift(op, 16), 14) + 4
563 ctx.rel = x
564 x = format("0x%08x", x)
565 elseif p == "b" then
566 x = ctx.addr + ctx.pos + arshift(lshift(op, 11), 9) + 4
332 ctx.rel = x 567 ctx.rel = x
333 x = "0x"..tohex(x) 568 x = format("0x%08x", x)
569 elseif p == "#" then
570 x = ctx.addr + ctx.pos + arshift(lshift(op, 6), 4) + 4
571 ctx.rel = x
572 x = format("0x%08x", x)
334 elseif p == "J" then 573 elseif p == "J" then
335 x = band(ctx.addr + ctx.pos, 0xf0000000) + band(op, 0x03ffffff)*4 574 local a = ctx.addr + ctx.pos
575 x = a - band(a, 0x0fffffff) + band(op, 0x03ffffff)*4
336 ctx.rel = x 576 ctx.rel = x
337 x = "0x"..tohex(x) 577 x = format("0x%08x", x)
338 elseif p == "V" then 578 elseif p == "V" then
339 x = band(rshift(op, 8), 7) 579 x = band(rshift(op, 8), 7)
340 if x == 0 then x = nil end 580 if x == 0 then x = nil end
@@ -384,7 +624,7 @@ local function disass_block(ctx, ofs, len)
384end 624end
385 625
386-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). 626-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
387local function create_(code, addr, out) 627local function create(code, addr, out)
388 local ctx = {} 628 local ctx = {}
389 ctx.code = code 629 ctx.code = code
390 ctx.addr = addr or 0 630 ctx.addr = addr or 0
@@ -393,36 +633,62 @@ local function create_(code, addr, out)
393 ctx.disass = disass_block 633 ctx.disass = disass_block
394 ctx.hexdump = 8 634 ctx.hexdump = 8
395 ctx.get = get_be 635 ctx.get = get_be
636 ctx.map_pri = map_pri
637 return ctx
638end
639
640local function create_el(code, addr, out)
641 local ctx = create(code, addr, out)
642 ctx.get = get_le
643 return ctx
644end
645
646local function create_r6(code, addr, out)
647 local ctx = create(code, addr, out)
648 ctx.map_pri = map_pri_r6
396 return ctx 649 return ctx
397end 650end
398 651
399local function create_el_(code, addr, out) 652local function create_r6_el(code, addr, out)
400 local ctx = create_(code, addr, out) 653 local ctx = create(code, addr, out)
401 ctx.get = get_le 654 ctx.get = get_le
655 ctx.map_pri = map_pri_r6
402 return ctx 656 return ctx
403end 657end
404 658
405-- Simple API: disassemble code (a string) at address and output via out. 659-- Simple API: disassemble code (a string) at address and output via out.
406local function disass_(code, addr, out) 660local function disass(code, addr, out)
407 create_(code, addr, out):disass() 661 create(code, addr, out):disass()
662end
663
664local function disass_el(code, addr, out)
665 create_el(code, addr, out):disass()
408end 666end
409 667
410local function disass_el_(code, addr, out) 668local function disass_r6(code, addr, out)
411 create_el_(code, addr, out):disass() 669 create_r6(code, addr, out):disass()
670end
671
672local function disass_r6_el(code, addr, out)
673 create_r6_el(code, addr, out):disass()
412end 674end
413 675
414-- Return register name for RID. 676-- Return register name for RID.
415local function regname_(r) 677local function regname(r)
416 if r < 32 then return map_gpr[r] end 678 if r < 32 then return map_gpr[r] end
417 return "f"..(r-32) 679 return "f"..(r-32)
418end 680end
419 681
420-- Public module functions. 682-- Public module functions.
421module(...) 683return {
422 684 create = create,
423create = create_ 685 create_el = create_el,
424create_el = create_el_ 686 create_r6 = create_r6,
425disass = disass_ 687 create_r6_el = create_r6_el,
426disass_el = disass_el_ 688 disass = disass,
427regname = regname_ 689 disass_el = disass_el,
690 disass_r6 = disass_r6,
691 disass_r6_el = disass_r6_el,
692 regname = regname
693}
428 694
diff --git a/src/jit/dis_mips64.lua b/src/jit/dis_mips64.lua
new file mode 100644
index 00000000..018e6058
--- /dev/null
+++ b/src/jit/dis_mips64.lua
@@ -0,0 +1,17 @@
1----------------------------------------------------------------------------
2-- LuaJIT MIPS64 disassembler wrapper module.
3--
4-- Copyright (C) 2005-2020 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7-- This module just exports the big-endian functions from the
8-- MIPS disassembler module. All the interesting stuff is there.
9------------------------------------------------------------------------------
10
11local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
12return {
13 create = dis_mips.create,
14 disass = dis_mips.disass,
15 regname = dis_mips.regname
16}
17
diff --git a/src/jit/dis_mips64el.lua b/src/jit/dis_mips64el.lua
new file mode 100644
index 00000000..ef3af475
--- /dev/null
+++ b/src/jit/dis_mips64el.lua
@@ -0,0 +1,17 @@
1----------------------------------------------------------------------------
2-- LuaJIT MIPS64EL disassembler wrapper module.
3--
4-- Copyright (C) 2005-2020 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7-- This module just exports the little-endian functions from the
8-- MIPS disassembler module. All the interesting stuff is there.
9------------------------------------------------------------------------------
10
11local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
12return {
13 create = dis_mips.create_el,
14 disass = dis_mips.disass_el,
15 regname = dis_mips.regname
16}
17
diff --git a/src/jit/dis_mips64r6.lua b/src/jit/dis_mips64r6.lua
new file mode 100644
index 00000000..2bfc2429
--- /dev/null
+++ b/src/jit/dis_mips64r6.lua
@@ -0,0 +1,17 @@
1----------------------------------------------------------------------------
2-- LuaJIT MIPS64R6 disassembler wrapper module.
3--
4-- Copyright (C) 2005-2020 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7-- This module just exports the r6 big-endian functions from the
8-- MIPS disassembler module. All the interesting stuff is there.
9------------------------------------------------------------------------------
10
11local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
12return {
13 create = dis_mips.create_r6,
14 disass = dis_mips.disass_r6,
15 regname = dis_mips.regname
16}
17
diff --git a/src/jit/dis_mips64r6el.lua b/src/jit/dis_mips64r6el.lua
new file mode 100644
index 00000000..30597552
--- /dev/null
+++ b/src/jit/dis_mips64r6el.lua
@@ -0,0 +1,17 @@
1----------------------------------------------------------------------------
2-- LuaJIT MIPS64R6EL disassembler wrapper module.
3--
4-- Copyright (C) 2005-2020 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7-- This module just exports the r6 little-endian functions from the
8-- MIPS disassembler module. All the interesting stuff is there.
9------------------------------------------------------------------------------
10
11local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
12return {
13 create = dis_mips.create_r6_el,
14 disass = dis_mips.disass_r6_el,
15 regname = dis_mips.regname
16}
17
diff --git a/src/jit/dis_mipsel.lua b/src/jit/dis_mipsel.lua
index a2d05690..a6bb9565 100644
--- a/src/jit/dis_mipsel.lua
+++ b/src/jit/dis_mipsel.lua
@@ -8,13 +8,10 @@
8-- MIPS disassembler module. All the interesting stuff is there. 8-- MIPS disassembler module. All the interesting stuff is there.
9------------------------------------------------------------------------------ 9------------------------------------------------------------------------------
10 10
11local require = require 11local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
12 12return {
13module(...) 13 create = dis_mips.create_el,
14 14 disass = dis_mips.disass_el,
15local dis_mips = require(_PACKAGE.."dis_mips") 15 regname = dis_mips.regname
16 16}
17create = dis_mips.create_el
18disass = dis_mips.disass_el
19regname = dis_mips.regname
20 17
diff --git a/src/jit/dis_ppc.lua b/src/jit/dis_ppc.lua
index dfc6cbce..31d7a4d5 100644
--- a/src/jit/dis_ppc.lua
+++ b/src/jit/dis_ppc.lua
@@ -560,7 +560,7 @@ local function disass_block(ctx, ofs, len)
560end 560end
561 561
562-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). 562-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
563local function create_(code, addr, out) 563local function create(code, addr, out)
564 local ctx = {} 564 local ctx = {}
565 ctx.code = code 565 ctx.code = code
566 ctx.addr = addr or 0 566 ctx.addr = addr or 0
@@ -572,20 +572,20 @@ local function create_(code, addr, out)
572end 572end
573 573
574-- Simple API: disassemble code (a string) at address and output via out. 574-- Simple API: disassemble code (a string) at address and output via out.
575local function disass_(code, addr, out) 575local function disass(code, addr, out)
576 create_(code, addr, out):disass() 576 create(code, addr, out):disass()
577end 577end
578 578
579-- Return register name for RID. 579-- Return register name for RID.
580local function regname_(r) 580local function regname(r)
581 if r < 32 then return map_gpr[r] end 581 if r < 32 then return map_gpr[r] end
582 return "f"..(r-32) 582 return "f"..(r-32)
583end 583end
584 584
585-- Public module functions. 585-- Public module functions.
586module(...) 586return {
587 587 create = create,
588create = create_ 588 disass = disass,
589disass = disass_ 589 regname = regname
590regname = regname_ 590}
591 591
diff --git a/src/jit/dis_x64.lua b/src/jit/dis_x64.lua
index 1027b5a1..88032f1e 100644
--- a/src/jit/dis_x64.lua
+++ b/src/jit/dis_x64.lua
@@ -8,13 +8,10 @@
8-- x86/x64 disassembler module. All the interesting stuff is there. 8-- x86/x64 disassembler module. All the interesting stuff is there.
9------------------------------------------------------------------------------ 9------------------------------------------------------------------------------
10 10
11local require = require 11local dis_x86 = require((string.match(..., ".*%.") or "").."dis_x86")
12 12return {
13module(...) 13 create = dis_x86.create64,
14 14 disass = dis_x86.disass64,
15local dis_x86 = require(_PACKAGE.."dis_x86") 15 regname = dis_x86.regname64
16 16}
17create = dis_x86.create64
18disass = dis_x86.disass64
19regname = dis_x86.regname64
20 17
diff --git a/src/jit/dis_x86.lua b/src/jit/dis_x86.lua
index 9246820d..364a3184 100644
--- a/src/jit/dis_x86.lua
+++ b/src/jit/dis_x86.lua
@@ -15,19 +15,20 @@
15-- Intel and AMD manuals. The supported instruction set is quite extensive 15-- Intel and AMD manuals. The supported instruction set is quite extensive
16-- and reflects what a current generation Intel or AMD CPU implements in 16-- and reflects what a current generation Intel or AMD CPU implements in
17-- 32 bit and 64 bit mode. Yes, this includes MMX, SSE, SSE2, SSE3, SSSE3, 17-- 32 bit and 64 bit mode. Yes, this includes MMX, SSE, SSE2, SSE3, SSSE3,
18-- SSE4.1, SSE4.2, SSE4a and even privileged and hypervisor (VMX/SVM) 18-- SSE4.1, SSE4.2, SSE4a, AVX, AVX2 and even privileged and hypervisor
19-- instructions. 19-- (VMX/SVM) instructions.
20-- 20--
21-- Notes: 21-- Notes:
22-- * The (useless) a16 prefix, 3DNow and pre-586 opcodes are unsupported. 22-- * The (useless) a16 prefix, 3DNow and pre-586 opcodes are unsupported.
23-- * No attempt at optimization has been made -- it's fast enough for my needs. 23-- * No attempt at optimization has been made -- it's fast enough for my needs.
24-- * The public API may change when more architectures are added.
25------------------------------------------------------------------------------ 24------------------------------------------------------------------------------
26 25
27local type = type 26local type = type
28local sub, byte, format = string.sub, string.byte, string.format 27local sub, byte, format = string.sub, string.byte, string.format
29local match, gmatch, gsub = string.match, string.gmatch, string.gsub 28local match, gmatch, gsub = string.match, string.gmatch, string.gsub
30local lower, rep = string.lower, string.rep 29local lower, rep = string.lower, string.rep
30local bit = require("bit")
31local tohex = bit.tohex
31 32
32-- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on. 33-- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on.
33local map_opc1_32 = { 34local map_opc1_32 = {
@@ -76,7 +77,7 @@ local map_opc1_32 = {
76"movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi", 77"movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi",
77"movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI", 78"movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI",
78--Cx 79--Cx
79"shift!Bmu","shift!Vmu","retBw","ret","$lesVrm","$ldsVrm","movBmi","movVmi", 80"shift!Bmu","shift!Vmu","retBw","ret","vex*3$lesVrm","vex*2$ldsVrm","movBmi","movVmi",
80"enterBwu","leave","retfBw","retf","int3","intBu","into","iretVS", 81"enterBwu","leave","retfBw","retf","int3","intBu","into","iretVS",
81--Dx 82--Dx
82"shift!Bm1","shift!Vm1","shift!Bmc","shift!Vmc","aamBu","aadBu","salc","xlatb", 83"shift!Bm1","shift!Vm1","shift!Bmc","shift!Vmc","aamBu","aadBu","salc","xlatb",
@@ -101,7 +102,7 @@ local map_opc1_64 = setmetatable({
101 [0x44]="rex*r", [0x45]="rex*rb", [0x46]="rex*rx", [0x47]="rex*rxb", 102 [0x44]="rex*r", [0x45]="rex*rb", [0x46]="rex*rx", [0x47]="rex*rxb",
102 [0x48]="rex*w", [0x49]="rex*wb", [0x4a]="rex*wx", [0x4b]="rex*wxb", 103 [0x48]="rex*w", [0x49]="rex*wb", [0x4a]="rex*wx", [0x4b]="rex*wxb",
103 [0x4c]="rex*wr", [0x4d]="rex*wrb", [0x4e]="rex*wrx", [0x4f]="rex*wrxb", 104 [0x4c]="rex*wr", [0x4d]="rex*wrb", [0x4e]="rex*wrx", [0x4f]="rex*wrxb",
104 [0x82]=false, [0x9a]=false, [0xc4]=false, [0xc5]=false, [0xce]=false, 105 [0x82]=false, [0x9a]=false, [0xc4]="vex*3", [0xc5]="vex*2", [0xce]=false,
105 [0xd4]=false, [0xd5]=false, [0xd6]=false, [0xea]=false, 106 [0xd4]=false, [0xd5]=false, [0xd6]=false, [0xea]=false,
106}, { __index = map_opc1_32 }) 107}, { __index = map_opc1_32 })
107 108
@@ -112,12 +113,12 @@ local map_opc2 = {
112[0]="sldt!Dmp","sgdt!Ump","larVrm","lslVrm",nil,"syscall","clts","sysret", 113[0]="sldt!Dmp","sgdt!Ump","larVrm","lslVrm",nil,"syscall","clts","sysret",
113"invd","wbinvd",nil,"ud1",nil,"$prefetch!Bm","femms","3dnowMrmu", 114"invd","wbinvd",nil,"ud1",nil,"$prefetch!Bm","femms","3dnowMrmu",
114--1x 115--1x
115"movupsXrm|movssXrm|movupdXrm|movsdXrm", 116"movupsXrm|movssXrvm|movupdXrm|movsdXrvm",
116"movupsXmr|movssXmr|movupdXmr|movsdXmr", 117"movupsXmr|movssXmvr|movupdXmr|movsdXmvr",
117"movhlpsXrm$movlpsXrm|movsldupXrm|movlpdXrm|movddupXrm", 118"movhlpsXrm$movlpsXrm|movsldupXrm|movlpdXrm|movddupXrm",
118"movlpsXmr||movlpdXmr", 119"movlpsXmr||movlpdXmr",
119"unpcklpsXrm||unpcklpdXrm", 120"unpcklpsXrvm||unpcklpdXrvm",
120"unpckhpsXrm||unpckhpdXrm", 121"unpckhpsXrvm||unpckhpdXrvm",
121"movlhpsXrm$movhpsXrm|movshdupXrm|movhpdXrm", 122"movlhpsXrm$movhpsXrm|movshdupXrm|movhpdXrm",
122"movhpsXmr||movhpdXmr", 123"movhpsXmr||movhpdXmr",
123"$prefetcht!Bm","hintnopVm","hintnopVm","hintnopVm", 124"$prefetcht!Bm","hintnopVm","hintnopVm","hintnopVm",
@@ -126,7 +127,7 @@ local map_opc2 = {
126"movUmx$","movUmy$","movUxm$","movUym$","movUmz$",nil,"movUzm$",nil, 127"movUmx$","movUmy$","movUxm$","movUym$","movUmz$",nil,"movUzm$",nil,
127"movapsXrm||movapdXrm", 128"movapsXrm||movapdXrm",
128"movapsXmr||movapdXmr", 129"movapsXmr||movapdXmr",
129"cvtpi2psXrMm|cvtsi2ssXrVmt|cvtpi2pdXrMm|cvtsi2sdXrVmt", 130"cvtpi2psXrMm|cvtsi2ssXrvVmt|cvtpi2pdXrMm|cvtsi2sdXrvVmt",
130"movntpsXmr|movntssXmr|movntpdXmr|movntsdXmr", 131"movntpsXmr|movntssXmr|movntpdXmr|movntsdXmr",
131"cvttps2piMrXm|cvttss2siVrXm|cvttpd2piMrXm|cvttsd2siVrXm", 132"cvttps2piMrXm|cvttss2siVrXm|cvttpd2piMrXm|cvttsd2siVrXm",
132"cvtps2piMrXm|cvtss2siVrXm|cvtpd2piMrXm|cvtsd2siVrXm", 133"cvtps2piMrXm|cvtss2siVrXm|cvtpd2piMrXm|cvtsd2siVrXm",
@@ -142,27 +143,27 @@ local map_opc2 = {
142"cmovlVrm","cmovgeVrm","cmovleVrm","cmovgVrm", 143"cmovlVrm","cmovgeVrm","cmovleVrm","cmovgVrm",
143--5x 144--5x
144"movmskpsVrXm$||movmskpdVrXm$","sqrtpsXrm|sqrtssXrm|sqrtpdXrm|sqrtsdXrm", 145"movmskpsVrXm$||movmskpdVrXm$","sqrtpsXrm|sqrtssXrm|sqrtpdXrm|sqrtsdXrm",
145"rsqrtpsXrm|rsqrtssXrm","rcppsXrm|rcpssXrm", 146"rsqrtpsXrm|rsqrtssXrvm","rcppsXrm|rcpssXrvm",
146"andpsXrm||andpdXrm","andnpsXrm||andnpdXrm", 147"andpsXrvm||andpdXrvm","andnpsXrvm||andnpdXrvm",
147"orpsXrm||orpdXrm","xorpsXrm||xorpdXrm", 148"orpsXrvm||orpdXrvm","xorpsXrvm||xorpdXrvm",
148"addpsXrm|addssXrm|addpdXrm|addsdXrm","mulpsXrm|mulssXrm|mulpdXrm|mulsdXrm", 149"addpsXrvm|addssXrvm|addpdXrvm|addsdXrvm","mulpsXrvm|mulssXrvm|mulpdXrvm|mulsdXrvm",
149"cvtps2pdXrm|cvtss2sdXrm|cvtpd2psXrm|cvtsd2ssXrm", 150"cvtps2pdXrm|cvtss2sdXrvm|cvtpd2psXrm|cvtsd2ssXrvm",
150"cvtdq2psXrm|cvttps2dqXrm|cvtps2dqXrm", 151"cvtdq2psXrm|cvttps2dqXrm|cvtps2dqXrm",
151"subpsXrm|subssXrm|subpdXrm|subsdXrm","minpsXrm|minssXrm|minpdXrm|minsdXrm", 152"subpsXrvm|subssXrvm|subpdXrvm|subsdXrvm","minpsXrvm|minssXrvm|minpdXrvm|minsdXrvm",
152"divpsXrm|divssXrm|divpdXrm|divsdXrm","maxpsXrm|maxssXrm|maxpdXrm|maxsdXrm", 153"divpsXrvm|divssXrvm|divpdXrvm|divsdXrvm","maxpsXrvm|maxssXrvm|maxpdXrvm|maxsdXrvm",
153--6x 154--6x
154"punpcklbwPrm","punpcklwdPrm","punpckldqPrm","packsswbPrm", 155"punpcklbwPrvm","punpcklwdPrvm","punpckldqPrvm","packsswbPrvm",
155"pcmpgtbPrm","pcmpgtwPrm","pcmpgtdPrm","packuswbPrm", 156"pcmpgtbPrvm","pcmpgtwPrvm","pcmpgtdPrvm","packuswbPrvm",
156"punpckhbwPrm","punpckhwdPrm","punpckhdqPrm","packssdwPrm", 157"punpckhbwPrvm","punpckhwdPrvm","punpckhdqPrvm","packssdwPrvm",
157"||punpcklqdqXrm","||punpckhqdqXrm", 158"||punpcklqdqXrvm","||punpckhqdqXrvm",
158"movPrVSm","movqMrm|movdquXrm|movdqaXrm", 159"movPrVSm","movqMrm|movdquXrm|movdqaXrm",
159--7x 160--7x
160"pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pmu", 161"pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pvmu",
161"pshiftd!Pmu","pshiftq!Mmu||pshiftdq!Xmu", 162"pshiftd!Pvmu","pshiftq!Mvmu||pshiftdq!Xvmu",
162"pcmpeqbPrm","pcmpeqwPrm","pcmpeqdPrm","emms|", 163"pcmpeqbPrvm","pcmpeqwPrvm","pcmpeqdPrvm","emms*|",
163"vmreadUmr||extrqXmuu$|insertqXrmuu$","vmwriteUrm||extrqXrm$|insertqXrm$", 164"vmreadUmr||extrqXmuu$|insertqXrmuu$","vmwriteUrm||extrqXrm$|insertqXrm$",
164nil,nil, 165nil,nil,
165"||haddpdXrm|haddpsXrm","||hsubpdXrm|hsubpsXrm", 166"||haddpdXrvm|haddpsXrvm","||hsubpdXrvm|hsubpsXrvm",
166"movVSmMr|movqXrm|movVSmXr","movqMmr|movdquXmr|movdqaXmr", 167"movVSmMr|movqXrm|movVSmXr","movqMmr|movdquXmr|movdqaXmr",
167--8x 168--8x
168"joVj","jnoVj","jbVj","jnbVj","jzVj","jnzVj","jbeVj","jaVj", 169"joVj","jnoVj","jbVj","jnbVj","jzVj","jnzVj","jbeVj","jaVj",
@@ -180,27 +181,27 @@ nil,nil,
180"bsfVrm","bsrVrm|lzcntVrm|bsrWrm","movsxVrBmt","movsxVrWmt", 181"bsfVrm","bsrVrm|lzcntVrm|bsrWrm","movsxVrBmt","movsxVrWmt",
181--Cx 182--Cx
182"xaddBmr","xaddVmr", 183"xaddBmr","xaddVmr",
183"cmppsXrmu|cmpssXrmu|cmppdXrmu|cmpsdXrmu","$movntiVmr|", 184"cmppsXrvmu|cmpssXrvmu|cmppdXrvmu|cmpsdXrvmu","$movntiVmr|",
184"pinsrwPrWmu","pextrwDrPmu", 185"pinsrwPrvWmu","pextrwDrPmu",
185"shufpsXrmu||shufpdXrmu","$cmpxchg!Qmp", 186"shufpsXrvmu||shufpdXrvmu","$cmpxchg!Qmp",
186"bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR", 187"bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR",
187--Dx 188--Dx
188"||addsubpdXrm|addsubpsXrm","psrlwPrm","psrldPrm","psrlqPrm", 189"||addsubpdXrvm|addsubpsXrvm","psrlwPrvm","psrldPrvm","psrlqPrvm",
189"paddqPrm","pmullwPrm", 190"paddqPrvm","pmullwPrvm",
190"|movq2dqXrMm|movqXmr|movdq2qMrXm$","pmovmskbVrMm||pmovmskbVrXm", 191"|movq2dqXrMm|movqXmr|movdq2qMrXm$","pmovmskbVrMm||pmovmskbVrXm",
191"psubusbPrm","psubuswPrm","pminubPrm","pandPrm", 192"psubusbPrvm","psubuswPrvm","pminubPrvm","pandPrvm",
192"paddusbPrm","padduswPrm","pmaxubPrm","pandnPrm", 193"paddusbPrvm","padduswPrvm","pmaxubPrvm","pandnPrvm",
193--Ex 194--Ex
194"pavgbPrm","psrawPrm","psradPrm","pavgwPrm", 195"pavgbPrvm","psrawPrvm","psradPrvm","pavgwPrvm",
195"pmulhuwPrm","pmulhwPrm", 196"pmulhuwPrvm","pmulhwPrvm",
196"|cvtdq2pdXrm|cvttpd2dqXrm|cvtpd2dqXrm","$movntqMmr||$movntdqXmr", 197"|cvtdq2pdXrm|cvttpd2dqXrm|cvtpd2dqXrm","$movntqMmr||$movntdqXmr",
197"psubsbPrm","psubswPrm","pminswPrm","porPrm", 198"psubsbPrvm","psubswPrvm","pminswPrvm","porPrvm",
198"paddsbPrm","paddswPrm","pmaxswPrm","pxorPrm", 199"paddsbPrvm","paddswPrvm","pmaxswPrvm","pxorPrvm",
199--Fx 200--Fx
200"|||lddquXrm","psllwPrm","pslldPrm","psllqPrm", 201"|||lddquXrm","psllwPrvm","pslldPrvm","psllqPrvm",
201"pmuludqPrm","pmaddwdPrm","psadbwPrm","maskmovqMrm||maskmovdquXrm$", 202"pmuludqPrvm","pmaddwdPrvm","psadbwPrvm","maskmovqMrm||maskmovdquXrm$",
202"psubbPrm","psubwPrm","psubdPrm","psubqPrm", 203"psubbPrvm","psubwPrvm","psubdPrvm","psubqPrvm",
203"paddbPrm","paddwPrm","padddPrm","ud", 204"paddbPrvm","paddwPrvm","padddPrvm","ud",
204} 205}
205assert(map_opc2[255] == "ud") 206assert(map_opc2[255] == "ud")
206 207
@@ -208,49 +209,91 @@ assert(map_opc2[255] == "ud")
208local map_opc3 = { 209local map_opc3 = {
209["38"] = { -- [66] 0f 38 xx 210["38"] = { -- [66] 0f 38 xx
210--0x 211--0x
211[0]="pshufbPrm","phaddwPrm","phadddPrm","phaddswPrm", 212[0]="pshufbPrvm","phaddwPrvm","phadddPrvm","phaddswPrvm",
212"pmaddubswPrm","phsubwPrm","phsubdPrm","phsubswPrm", 213"pmaddubswPrvm","phsubwPrvm","phsubdPrvm","phsubswPrvm",
213"psignbPrm","psignwPrm","psigndPrm","pmulhrswPrm", 214"psignbPrvm","psignwPrvm","psigndPrvm","pmulhrswPrvm",
214nil,nil,nil,nil, 215"||permilpsXrvm","||permilpdXrvm",nil,nil,
215--1x 216--1x
216"||pblendvbXrma",nil,nil,nil, 217"||pblendvbXrma",nil,nil,nil,
217"||blendvpsXrma","||blendvpdXrma",nil,"||ptestXrm", 218"||blendvpsXrma","||blendvpdXrma","||permpsXrvm","||ptestXrm",
218nil,nil,nil,nil, 219"||broadcastssXrm","||broadcastsdXrm","||broadcastf128XrlXm",nil,
219"pabsbPrm","pabswPrm","pabsdPrm",nil, 220"pabsbPrm","pabswPrm","pabsdPrm",nil,
220--2x 221--2x
221"||pmovsxbwXrm","||pmovsxbdXrm","||pmovsxbqXrm","||pmovsxwdXrm", 222"||pmovsxbwXrm","||pmovsxbdXrm","||pmovsxbqXrm","||pmovsxwdXrm",
222"||pmovsxwqXrm","||pmovsxdqXrm",nil,nil, 223"||pmovsxwqXrm","||pmovsxdqXrm",nil,nil,
223"||pmuldqXrm","||pcmpeqqXrm","||$movntdqaXrm","||packusdwXrm", 224"||pmuldqXrvm","||pcmpeqqXrvm","||$movntdqaXrm","||packusdwXrvm",
224nil,nil,nil,nil, 225"||maskmovpsXrvm","||maskmovpdXrvm","||maskmovpsXmvr","||maskmovpdXmvr",
225--3x 226--3x
226"||pmovzxbwXrm","||pmovzxbdXrm","||pmovzxbqXrm","||pmovzxwdXrm", 227"||pmovzxbwXrm","||pmovzxbdXrm","||pmovzxbqXrm","||pmovzxwdXrm",
227"||pmovzxwqXrm","||pmovzxdqXrm",nil,"||pcmpgtqXrm", 228"||pmovzxwqXrm","||pmovzxdqXrm","||permdXrvm","||pcmpgtqXrvm",
228"||pminsbXrm","||pminsdXrm","||pminuwXrm","||pminudXrm", 229"||pminsbXrvm","||pminsdXrvm","||pminuwXrvm","||pminudXrvm",
229"||pmaxsbXrm","||pmaxsdXrm","||pmaxuwXrm","||pmaxudXrm", 230"||pmaxsbXrvm","||pmaxsdXrvm","||pmaxuwXrvm","||pmaxudXrvm",
230--4x 231--4x
231"||pmulddXrm","||phminposuwXrm", 232"||pmulddXrvm","||phminposuwXrm",nil,nil,
233nil,"||psrlvVSXrvm","||psravdXrvm","||psllvVSXrvm",
234--5x
235[0x58] = "||pbroadcastdXrlXm",[0x59] = "||pbroadcastqXrlXm",
236[0x5a] = "||broadcasti128XrlXm",
237--7x
238[0x78] = "||pbroadcastbXrlXm",[0x79] = "||pbroadcastwXrlXm",
239--8x
240[0x8c] = "||pmaskmovXrvVSm",
241[0x8e] = "||pmaskmovVSmXvr",
242--9x
243[0x96] = "||fmaddsub132pHXrvm",[0x97] = "||fmsubadd132pHXrvm",
244[0x98] = "||fmadd132pHXrvm",[0x99] = "||fmadd132sHXrvm",
245[0x9a] = "||fmsub132pHXrvm",[0x9b] = "||fmsub132sHXrvm",
246[0x9c] = "||fnmadd132pHXrvm",[0x9d] = "||fnmadd132sHXrvm",
247[0x9e] = "||fnmsub132pHXrvm",[0x9f] = "||fnmsub132sHXrvm",
248--Ax
249[0xa6] = "||fmaddsub213pHXrvm",[0xa7] = "||fmsubadd213pHXrvm",
250[0xa8] = "||fmadd213pHXrvm",[0xa9] = "||fmadd213sHXrvm",
251[0xaa] = "||fmsub213pHXrvm",[0xab] = "||fmsub213sHXrvm",
252[0xac] = "||fnmadd213pHXrvm",[0xad] = "||fnmadd213sHXrvm",
253[0xae] = "||fnmsub213pHXrvm",[0xaf] = "||fnmsub213sHXrvm",
254--Bx
255[0xb6] = "||fmaddsub231pHXrvm",[0xb7] = "||fmsubadd231pHXrvm",
256[0xb8] = "||fmadd231pHXrvm",[0xb9] = "||fmadd231sHXrvm",
257[0xba] = "||fmsub231pHXrvm",[0xbb] = "||fmsub231sHXrvm",
258[0xbc] = "||fnmadd231pHXrvm",[0xbd] = "||fnmadd231sHXrvm",
259[0xbe] = "||fnmsub231pHXrvm",[0xbf] = "||fnmsub231sHXrvm",
260--Dx
261[0xdc] = "||aesencXrvm", [0xdd] = "||aesenclastXrvm",
262[0xde] = "||aesdecXrvm", [0xdf] = "||aesdeclastXrvm",
232--Fx 263--Fx
233[0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt", 264[0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt",
265[0xf7] = "| sarxVrmv| shlxVrmv| shrxVrmv",
234}, 266},
235 267
236["3a"] = { -- [66] 0f 3a xx 268["3a"] = { -- [66] 0f 3a xx
237--0x 269--0x
238[0x00]=nil,nil,nil,nil,nil,nil,nil,nil, 270[0x00]="||permqXrmu","||permpdXrmu","||pblenddXrvmu",nil,
239"||roundpsXrmu","||roundpdXrmu","||roundssXrmu","||roundsdXrmu", 271"||permilpsXrmu","||permilpdXrmu","||perm2f128Xrvmu",nil,
240"||blendpsXrmu","||blendpdXrmu","||pblendwXrmu","palignrPrmu", 272"||roundpsXrmu","||roundpdXrmu","||roundssXrvmu","||roundsdXrvmu",
273"||blendpsXrvmu","||blendpdXrvmu","||pblendwXrvmu","palignrPrvmu",
241--1x 274--1x
242nil,nil,nil,nil, 275nil,nil,nil,nil,
243"||pextrbVmXru","||pextrwVmXru","||pextrVmSXru","||extractpsVmXru", 276"||pextrbVmXru","||pextrwVmXru","||pextrVmSXru","||extractpsVmXru",
244nil,nil,nil,nil,nil,nil,nil,nil, 277"||insertf128XrvlXmu","||extractf128XlXmYru",nil,nil,
278nil,nil,nil,nil,
245--2x 279--2x
246"||pinsrbXrVmu","||insertpsXrmu","||pinsrXrVmuS",nil, 280"||pinsrbXrvVmu","||insertpsXrvmu","||pinsrXrvVmuS",nil,
281--3x
282[0x38] = "||inserti128Xrvmu",[0x39] = "||extracti128XlXmYru",
247--4x 283--4x
248[0x40] = "||dppsXrmu", 284[0x40] = "||dppsXrvmu",
249[0x41] = "||dppdXrmu", 285[0x41] = "||dppdXrvmu",
250[0x42] = "||mpsadbwXrmu", 286[0x42] = "||mpsadbwXrvmu",
287[0x44] = "||pclmulqdqXrvmu",
288[0x46] = "||perm2i128Xrvmu",
289[0x4a] = "||blendvpsXrvmb",[0x4b] = "||blendvpdXrvmb",
290[0x4c] = "||pblendvbXrvmb",
251--6x 291--6x
252[0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu", 292[0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu",
253[0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu", 293[0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu",
294[0xdf] = "||aeskeygenassistXrmu",
295--Fx
296[0xf0] = "||| rorxVrmu",
254}, 297},
255} 298}
256 299
@@ -354,17 +397,19 @@ local map_regs = {
354 "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" }, -- No x64 ext! 397 "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" }, -- No x64 ext!
355 X = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", 398 X = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
356 "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" }, 399 "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" },
400 Y = { "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7",
401 "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15" },
357} 402}
358local map_segregs = { "es", "cs", "ss", "ds", "fs", "gs", "segr6", "segr7" } 403local map_segregs = { "es", "cs", "ss", "ds", "fs", "gs", "segr6", "segr7" }
359 404
360-- Maps for size names. 405-- Maps for size names.
361local map_sz2n = { 406local map_sz2n = {
362 B = 1, W = 2, D = 4, Q = 8, M = 8, X = 16, 407 B = 1, W = 2, D = 4, Q = 8, M = 8, X = 16, Y = 32,
363} 408}
364local map_sz2prefix = { 409local map_sz2prefix = {
365 B = "byte", W = "word", D = "dword", 410 B = "byte", W = "word", D = "dword",
366 Q = "qword", 411 Q = "qword",
367 M = "qword", X = "xword", 412 M = "qword", X = "xword", Y = "yword",
368 F = "dword", G = "qword", -- No need for sizes/register names for these two. 413 F = "dword", G = "qword", -- No need for sizes/register names for these two.
369} 414}
370 415
@@ -387,10 +432,13 @@ local function putop(ctx, text, operands)
387 if ctx.rep then text = ctx.rep.." "..text; ctx.rep = false end 432 if ctx.rep then text = ctx.rep.." "..text; ctx.rep = false end
388 if ctx.rex then 433 if ctx.rex then
389 local t = (ctx.rexw and "w" or "")..(ctx.rexr and "r" or "").. 434 local t = (ctx.rexw and "w" or "")..(ctx.rexr and "r" or "")..
390 (ctx.rexx and "x" or "")..(ctx.rexb and "b" or "") 435 (ctx.rexx and "x" or "")..(ctx.rexb and "b" or "")..
391 if t ~= "" then text = "rex."..t.." "..text end 436 (ctx.vexl and "l" or "")
437 if ctx.vexv and ctx.vexv ~= 0 then t = t.."v"..ctx.vexv end
438 if t ~= "" then text = ctx.rex.."."..t.." "..gsub(text, "^ ", "")
439 elseif ctx.rex == "vex" then text = gsub("v"..text, "^v ", "") end
392 ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false 440 ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false
393 ctx.rex = false 441 ctx.rex = false; ctx.vexl = false; ctx.vexv = false
394 end 442 end
395 if ctx.seg then 443 if ctx.seg then
396 local text2, n = gsub(text, "%[", "["..ctx.seg..":") 444 local text2, n = gsub(text, "%[", "["..ctx.seg..":")
@@ -405,6 +453,7 @@ local function putop(ctx, text, operands)
405 end 453 end
406 ctx.out(format("%08x %s%s\n", ctx.addr+ctx.start, hex, text)) 454 ctx.out(format("%08x %s%s\n", ctx.addr+ctx.start, hex, text))
407 ctx.mrm = false 455 ctx.mrm = false
456 ctx.vexv = false
408 ctx.start = pos 457 ctx.start = pos
409 ctx.imm = nil 458 ctx.imm = nil
410end 459end
@@ -413,7 +462,7 @@ end
413local function clearprefixes(ctx) 462local function clearprefixes(ctx)
414 ctx.o16 = false; ctx.seg = false; ctx.lock = false; ctx.rep = false 463 ctx.o16 = false; ctx.seg = false; ctx.lock = false; ctx.rep = false
415 ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false 464 ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false
416 ctx.rex = false; ctx.a32 = false 465 ctx.rex = false; ctx.a32 = false; ctx.vexl = false
417end 466end
418 467
419-- Fallback for incomplete opcodes at the end. 468-- Fallback for incomplete opcodes at the end.
@@ -450,9 +499,9 @@ end
450-- Process pattern string and generate the operands. 499-- Process pattern string and generate the operands.
451local function putpat(ctx, name, pat) 500local function putpat(ctx, name, pat)
452 local operands, regs, sz, mode, sp, rm, sc, rx, sdisp 501 local operands, regs, sz, mode, sp, rm, sc, rx, sdisp
453 local code, pos, stop = ctx.code, ctx.pos, ctx.stop 502 local code, pos, stop, vexl = ctx.code, ctx.pos, ctx.stop, ctx.vexl
454 503
455 -- Chars used: 1DFGIMPQRSTUVWXacdfgijmoprstuwxyz 504 -- Chars used: 1DFGHIMPQRSTUVWXYabcdfgijlmoprstuvwxyz
456 for p in gmatch(pat, ".") do 505 for p in gmatch(pat, ".") do
457 local x = nil 506 local x = nil
458 if p == "V" or p == "U" then 507 if p == "V" or p == "U" then
@@ -467,12 +516,17 @@ local function putpat(ctx, name, pat)
467 elseif p == "B" then 516 elseif p == "B" then
468 sz = "B" 517 sz = "B"
469 regs = ctx.rex and map_regs.B64 or map_regs.B 518 regs = ctx.rex and map_regs.B64 or map_regs.B
470 elseif match(p, "[WDQMXFG]") then 519 elseif match(p, "[WDQMXYFG]") then
471 sz = p 520 sz = p
521 if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end
472 regs = map_regs[sz] 522 regs = map_regs[sz]
473 elseif p == "P" then 523 elseif p == "P" then
474 sz = ctx.o16 and "X" or "M"; ctx.o16 = false 524 sz = ctx.o16 and "X" or "M"; ctx.o16 = false
525 if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end
475 regs = map_regs[sz] 526 regs = map_regs[sz]
527 elseif p == "H" then
528 name = name..(ctx.rexw and "d" or "s")
529 ctx.rexw = false
476 elseif p == "S" then 530 elseif p == "S" then
477 name = name..lower(sz) 531 name = name..lower(sz)
478 elseif p == "s" then 532 elseif p == "s" then
@@ -484,6 +538,10 @@ local function putpat(ctx, name, pat)
484 local imm = getimm(ctx, pos, 1); if not imm then return end 538 local imm = getimm(ctx, pos, 1); if not imm then return end
485 x = format("0x%02x", imm) 539 x = format("0x%02x", imm)
486 pos = pos+1 540 pos = pos+1
541 elseif p == "b" then
542 local imm = getimm(ctx, pos, 1); if not imm then return end
543 x = regs[imm/16+1]
544 pos = pos+1
487 elseif p == "w" then 545 elseif p == "w" then
488 local imm = getimm(ctx, pos, 2); if not imm then return end 546 local imm = getimm(ctx, pos, 2); if not imm then return end
489 x = format("0x%x", imm) 547 x = format("0x%x", imm)
@@ -532,7 +590,7 @@ local function putpat(ctx, name, pat)
532 local lo = imm % 0x1000000 590 local lo = imm % 0x1000000
533 x = format("0x%02x%06x", (imm-lo) / 0x1000000, lo) 591 x = format("0x%02x%06x", (imm-lo) / 0x1000000, lo)
534 else 592 else
535 x = format("0x%08x", imm) 593 x = "0x"..tohex(imm)
536 end 594 end
537 elseif p == "R" then 595 elseif p == "R" then
538 local r = byte(code, pos-1, pos-1)%8 596 local r = byte(code, pos-1, pos-1)%8
@@ -616,8 +674,13 @@ local function putpat(ctx, name, pat)
616 else 674 else
617 x = "CR"..sp 675 x = "CR"..sp
618 end 676 end
677 elseif p == "v" then
678 if ctx.vexv then
679 x = regs[ctx.vexv+1]; ctx.vexv = false
680 end
619 elseif p == "y" then x = "DR"..sp 681 elseif p == "y" then x = "DR"..sp
620 elseif p == "z" then x = "TR"..sp 682 elseif p == "z" then x = "TR"..sp
683 elseif p == "l" then vexl = false
621 elseif p == "t" then 684 elseif p == "t" then
622 else 685 else
623 error("bad pattern `"..pat.."'") 686 error("bad pattern `"..pat.."'")
@@ -692,7 +755,8 @@ map_act = {
692 B = putpat, W = putpat, D = putpat, Q = putpat, 755 B = putpat, W = putpat, D = putpat, Q = putpat,
693 V = putpat, U = putpat, T = putpat, 756 V = putpat, U = putpat, T = putpat,
694 M = putpat, X = putpat, P = putpat, 757 M = putpat, X = putpat, P = putpat,
695 F = putpat, G = putpat, 758 F = putpat, G = putpat, Y = putpat,
759 H = putpat,
696 760
697 -- Collect prefixes. 761 -- Collect prefixes.
698 [":"] = function(ctx, name, pat) 762 [":"] = function(ctx, name, pat)
@@ -753,15 +817,68 @@ map_act = {
753 817
754 -- REX prefix. 818 -- REX prefix.
755 rex = function(ctx, name, pat) 819 rex = function(ctx, name, pat)
756 if ctx.rex then return unknown(ctx) end -- Only 1 REX prefix allowed. 820 if ctx.rex then return unknown(ctx) end -- Only 1 REX or VEX prefix allowed.
757 for p in gmatch(pat, ".") do ctx["rex"..p] = true end 821 for p in gmatch(pat, ".") do ctx["rex"..p] = true end
758 ctx.rex = true 822 ctx.rex = "rex"
823 end,
824
825 -- VEX prefix.
826 vex = function(ctx, name, pat)
827 if ctx.rex then return unknown(ctx) end -- Only 1 REX or VEX prefix allowed.
828 ctx.rex = "vex"
829 local pos = ctx.pos
830 if ctx.mrm then
831 ctx.mrm = nil
832 pos = pos-1
833 end
834 local b = byte(ctx.code, pos, pos)
835 if not b then return incomplete(ctx) end
836 pos = pos+1
837 if b < 128 then ctx.rexr = true end
838 local m = 1
839 if pat == "3" then
840 m = b%32; b = (b-m)/32
841 local nb = b%2; b = (b-nb)/2
842 if nb == 0 then ctx.rexb = true end
843 local nx = b%2
844 if nx == 0 then ctx.rexx = true end
845 b = byte(ctx.code, pos, pos)
846 if not b then return incomplete(ctx) end
847 pos = pos+1
848 if b >= 128 then ctx.rexw = true end
849 end
850 ctx.pos = pos
851 local map
852 if m == 1 then map = map_opc2
853 elseif m == 2 then map = map_opc3["38"]
854 elseif m == 3 then map = map_opc3["3a"]
855 else return unknown(ctx) end
856 local p = b%4; b = (b-p)/4
857 if p == 1 then ctx.o16 = "o16"
858 elseif p == 2 then ctx.rep = "rep"
859 elseif p == 3 then ctx.rep = "repne" end
860 local l = b%2; b = (b-l)/2
861 if l ~= 0 then ctx.vexl = true end
862 ctx.vexv = (-1-b)%16
863 return dispatchmap(ctx, map)
759 end, 864 end,
760 865
761 -- Special case for nop with REX prefix. 866 -- Special case for nop with REX prefix.
762 nop = function(ctx, name, pat) 867 nop = function(ctx, name, pat)
763 return dispatch(ctx, ctx.rex and pat or "nop") 868 return dispatch(ctx, ctx.rex and pat or "nop")
764 end, 869 end,
870
871 -- Special case for 0F 77.
872 emms = function(ctx, name, pat)
873 if ctx.rex ~= "vex" then
874 return putop(ctx, "emms")
875 elseif ctx.vexl then
876 ctx.vexl = false
877 return putop(ctx, "zeroall")
878 else
879 return putop(ctx, "zeroupper")
880 end
881 end,
765} 882}
766 883
767------------------------------------------------------------------------------ 884------------------------------------------------------------------------------
@@ -782,7 +899,7 @@ local function disass_block(ctx, ofs, len)
782end 899end
783 900
784-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). 901-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
785local function create_(code, addr, out) 902local function create(code, addr, out)
786 local ctx = {} 903 local ctx = {}
787 ctx.code = code 904 ctx.code = code
788 ctx.addr = (addr or 0) - 1 905 ctx.addr = (addr or 0) - 1
@@ -796,8 +913,8 @@ local function create_(code, addr, out)
796 return ctx 913 return ctx
797end 914end
798 915
799local function create64_(code, addr, out) 916local function create64(code, addr, out)
800 local ctx = create_(code, addr, out) 917 local ctx = create(code, addr, out)
801 ctx.x64 = true 918 ctx.x64 = true
802 ctx.map1 = map_opc1_64 919 ctx.map1 = map_opc1_64
803 ctx.aregs = map_regs.Q 920 ctx.aregs = map_regs.Q
@@ -805,32 +922,32 @@ local function create64_(code, addr, out)
805end 922end
806 923
807-- Simple API: disassemble code (a string) at address and output via out. 924-- Simple API: disassemble code (a string) at address and output via out.
808local function disass_(code, addr, out) 925local function disass(code, addr, out)
809 create_(code, addr, out):disass() 926 create(code, addr, out):disass()
810end 927end
811 928
812local function disass64_(code, addr, out) 929local function disass64(code, addr, out)
813 create64_(code, addr, out):disass() 930 create64(code, addr, out):disass()
814end 931end
815 932
816-- Return register name for RID. 933-- Return register name for RID.
817local function regname_(r) 934local function regname(r)
818 if r < 8 then return map_regs.D[r+1] end 935 if r < 8 then return map_regs.D[r+1] end
819 return map_regs.X[r-7] 936 return map_regs.X[r-7]
820end 937end
821 938
822local function regname64_(r) 939local function regname64(r)
823 if r < 16 then return map_regs.Q[r+1] end 940 if r < 16 then return map_regs.Q[r+1] end
824 return map_regs.X[r-15] 941 return map_regs.X[r-15]
825end 942end
826 943
827-- Public module functions. 944-- Public module functions.
828module(...) 945return {
829 946 create = create,
830create = create_ 947 create64 = create64,
831create64 = create64_ 948 disass = disass,
832disass = disass_ 949 disass64 = disass64,
833disass64 = disass64_ 950 regname = regname,
834regname = regname_ 951 regname64 = regname64
835regname64 = regname64_ 952}
836 953
diff --git a/src/jit/dump.lua b/src/jit/dump.lua
index 6a2632c3..0cb38b58 100644
--- a/src/jit/dump.lua
+++ b/src/jit/dump.lua
@@ -55,7 +55,7 @@
55 55
56-- Cache some library functions and objects. 56-- Cache some library functions and objects.
57local jit = require("jit") 57local jit = require("jit")
58assert(jit.version_num == 20005, "LuaJIT core/library version mismatch") 58assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
59local jutil = require("jit.util") 59local jutil = require("jit.util")
60local vmdef = require("jit.vmdef") 60local vmdef = require("jit.vmdef")
61local funcinfo, funcbc = jutil.funcinfo, jutil.funcbc 61local funcinfo, funcbc = jutil.funcinfo, jutil.funcbc
@@ -63,7 +63,7 @@ local traceinfo, traceir, tracek = jutil.traceinfo, jutil.traceir, jutil.tracek
63local tracemc, tracesnap = jutil.tracemc, jutil.tracesnap 63local tracemc, tracesnap = jutil.tracemc, jutil.tracesnap
64local traceexitstub, ircalladdr = jutil.traceexitstub, jutil.ircalladdr 64local traceexitstub, ircalladdr = jutil.traceexitstub, jutil.ircalladdr
65local bit = require("bit") 65local bit = require("bit")
66local band, shr = bit.band, bit.rshift 66local band, shr, tohex = bit.band, bit.rshift, bit.tohex
67local sub, gsub, format = string.sub, string.gsub, string.format 67local sub, gsub, format = string.sub, string.gsub, string.format
68local byte, rep = string.byte, string.rep 68local byte, rep = string.byte, string.rep
69local type, tostring = type, tostring 69local type, tostring = type, tostring
@@ -85,12 +85,13 @@ local nexitsym = 0
85local function fillsymtab_tr(tr, nexit) 85local function fillsymtab_tr(tr, nexit)
86 local t = {} 86 local t = {}
87 symtabmt.__index = t 87 symtabmt.__index = t
88 if jit.arch == "mips" or jit.arch == "mipsel" then 88 if jit.arch:sub(1, 4) == "mips" then
89 t[traceexitstub(tr, 0)] = "exit" 89 t[traceexitstub(tr, 0)] = "exit"
90 return 90 return
91 end 91 end
92 for i=0,nexit-1 do 92 for i=0,nexit-1 do
93 local addr = traceexitstub(tr, i) 93 local addr = traceexitstub(tr, i)
94 if addr < 0 then addr = addr + 2^32 end
94 t[addr] = tostring(i) 95 t[addr] = tostring(i)
95 end 96 end
96 local addr = traceexitstub(tr, nexit) 97 local addr = traceexitstub(tr, nexit)
@@ -104,7 +105,10 @@ local function fillsymtab(tr, nexit)
104 local ircall = vmdef.ircall 105 local ircall = vmdef.ircall
105 for i=0,#ircall do 106 for i=0,#ircall do
106 local addr = ircalladdr(i) 107 local addr = ircalladdr(i)
107 if addr ~= 0 then t[addr] = ircall[i] end 108 if addr ~= 0 then
109 if addr < 0 then addr = addr + 2^32 end
110 t[addr] = ircall[i]
111 end
108 end 112 end
109 end 113 end
110 if nexitsym == 1000000 then -- Per-trace exit stubs. 114 if nexitsym == 1000000 then -- Per-trace exit stubs.
@@ -118,6 +122,7 @@ local function fillsymtab(tr, nexit)
118 nexit = 1000000 122 nexit = 1000000
119 break 123 break
120 end 124 end
125 if addr < 0 then addr = addr + 2^32 end
121 t[addr] = tostring(i) 126 t[addr] = tostring(i)
122 end 127 end
123 nexitsym = nexit 128 nexitsym = nexit
@@ -136,6 +141,7 @@ local function dump_mcode(tr)
136 local mcode, addr, loop = tracemc(tr) 141 local mcode, addr, loop = tracemc(tr)
137 if not mcode then return end 142 if not mcode then return end
138 if not disass then disass = require("jit.dis_"..jit.arch) end 143 if not disass then disass = require("jit.dis_"..jit.arch) end
144 if addr < 0 then addr = addr + 2^32 end
139 out:write("---- TRACE ", tr, " mcode ", #mcode, "\n") 145 out:write("---- TRACE ", tr, " mcode ", #mcode, "\n")
140 local ctx = disass.create(mcode, addr, dumpwrite) 146 local ctx = disass.create(mcode, addr, dumpwrite)
141 ctx.hexdump = 0 147 ctx.hexdump = 0
@@ -270,8 +276,7 @@ local litname = {
270 ["CONV "] = setmetatable({}, { __index = function(t, mode) 276 ["CONV "] = setmetatable({}, { __index = function(t, mode)
271 local s = irtype[band(mode, 31)] 277 local s = irtype[band(mode, 31)]
272 s = irtype[band(shr(mode, 5), 31)].."."..s 278 s = irtype[band(shr(mode, 5), 31)].."."..s
273 if band(mode, 0x400) ~= 0 then s = s.." trunc" 279 if band(mode, 0x800) ~= 0 then s = s.." sext" end
274 elseif band(mode, 0x800) ~= 0 then s = s.." sext" end
275 local c = shr(mode, 14) 280 local c = shr(mode, 14)
276 if c == 2 then s = s.." index" elseif c == 3 then s = s.." check" end 281 if c == 2 then s = s.." index" elseif c == 3 then s = s.." check" end
277 t[mode] = s 282 t[mode] = s
@@ -280,6 +285,8 @@ local litname = {
280 ["FLOAD "] = vmdef.irfield, 285 ["FLOAD "] = vmdef.irfield,
281 ["FREF "] = vmdef.irfield, 286 ["FREF "] = vmdef.irfield,
282 ["FPMATH"] = vmdef.irfpm, 287 ["FPMATH"] = vmdef.irfpm,
288 ["BUFHDR"] = { [0] = "RESET", "APPEND" },
289 ["TOSTR "] = { [0] = "INT", "NUM", "CHAR" },
283} 290}
284 291
285local function ctlsub(c) 292local function ctlsub(c)
@@ -303,15 +310,17 @@ local function fmtfunc(func, pc)
303 end 310 end
304end 311end
305 312
306local function formatk(tr, idx) 313local function formatk(tr, idx, sn)
307 local k, t, slot = tracek(tr, idx) 314 local k, t, slot = tracek(tr, idx)
308 local tn = type(k) 315 local tn = type(k)
309 local s 316 local s
310 if tn == "number" then 317 if tn == "number" then
311 if k == 2^52+2^51 then 318 if band(sn or 0, 0x30000) ~= 0 then
319 s = band(sn, 0x20000) ~= 0 and "contpc" or "ftsz"
320 elseif k == 2^52+2^51 then
312 s = "bias" 321 s = "bias"
313 else 322 else
314 s = format("%+.14g", k) 323 s = format(0 < k and k < 0x1p-1026 and "%+a" or "%+.14g", k)
315 end 324 end
316 elseif tn == "string" then 325 elseif tn == "string" then
317 s = format(#k > 20 and '"%.20s"~' or '"%s"', gsub(k, "%c", ctlsub)) 326 s = format(#k > 20 and '"%.20s"~' or '"%s"', gsub(k, "%c", ctlsub))
@@ -329,6 +338,8 @@ local function formatk(tr, idx)
329 elseif t == 21 then -- int64_t 338 elseif t == 21 then -- int64_t
330 s = sub(tostring(k), 1, -3) 339 s = sub(tostring(k), 1, -3)
331 if sub(s, 1, 1) ~= "-" then s = "+"..s end 340 if sub(s, 1, 1) ~= "-" then s = "+"..s end
341 elseif sn == 0x1057fff then -- SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL)
342 return "----" -- Special case for LJ_FR2 slot 1.
332 else 343 else
333 s = tostring(k) -- For primitives. 344 s = tostring(k) -- For primitives.
334 end 345 end
@@ -347,7 +358,7 @@ local function printsnap(tr, snap)
347 n = n + 1 358 n = n + 1
348 local ref = band(sn, 0xffff) - 0x8000 -- REF_BIAS 359 local ref = band(sn, 0xffff) - 0x8000 -- REF_BIAS
349 if ref < 0 then 360 if ref < 0 then
350 out:write(formatk(tr, ref)) 361 out:write(formatk(tr, ref, sn))
351 elseif band(sn, 0x80000) ~= 0 then -- SNAP_SOFTFPNUM 362 elseif band(sn, 0x80000) ~= 0 then -- SNAP_SOFTFPNUM
352 out:write(colorize(format("%04d/%04d", ref, ref+1), 14)) 363 out:write(colorize(format("%04d/%04d", ref, ref+1), 14))
353 else 364 else
@@ -545,7 +556,7 @@ local function dump_trace(what, tr, func, pc, otr, oex)
545 if what == "start" then 556 if what == "start" then
546 if dumpmode.H then out:write('<pre class="ljdump">\n') end 557 if dumpmode.H then out:write('<pre class="ljdump">\n') end
547 out:write("---- TRACE ", tr, " ", what) 558 out:write("---- TRACE ", tr, " ", what)
548 if otr then out:write(" ", otr, "/", oex) end 559 if otr then out:write(" ", otr, "/", oex == -1 and "stitch" or oex) end
549 out:write(" ", fmtfunc(func, pc), "\n") 560 out:write(" ", fmtfunc(func, pc), "\n")
550 elseif what == "stop" or what == "abort" then 561 elseif what == "stop" or what == "abort" then
551 out:write("---- TRACE ", tr, " ", what) 562 out:write("---- TRACE ", tr, " ", what)
@@ -608,7 +619,7 @@ local function dump_texit(tr, ex, ngpr, nfpr, ...)
608 end 619 end
609 else 620 else
610 for i=1,ngpr do 621 for i=1,ngpr do
611 out:write(format(" %08x", regs[i])) 622 out:write(" ", tohex(regs[i]))
612 if i % 8 == 0 then out:write("\n") end 623 if i % 8 == 0 then out:write("\n") end
613 end 624 end
614 end 625 end
@@ -693,9 +704,9 @@ local function dumpon(opt, outfile)
693end 704end
694 705
695-- Public module functions. 706-- Public module functions.
696module(...) 707return {
697 708 on = dumpon,
698on = dumpon 709 off = dumpoff,
699off = dumpoff 710 start = dumpon -- For -j command line option.
700start = dumpon -- For -j command line option. 711}
701 712
diff --git a/src/jit/p.lua b/src/jit/p.lua
new file mode 100644
index 00000000..ac3ec40a
--- /dev/null
+++ b/src/jit/p.lua
@@ -0,0 +1,311 @@
1----------------------------------------------------------------------------
2-- LuaJIT profiler.
3--
4-- Copyright (C) 2005-2020 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7--
8-- This module is a simple command line interface to the built-in
9-- low-overhead profiler of LuaJIT.
10--
11-- The lower-level API of the profiler is accessible via the "jit.profile"
12-- module or the luaJIT_profile_* C API.
13--
14-- Example usage:
15--
16-- luajit -jp myapp.lua
17-- luajit -jp=s myapp.lua
18-- luajit -jp=-s myapp.lua
19-- luajit -jp=vl myapp.lua
20-- luajit -jp=G,profile.txt myapp.lua
21--
22-- The following dump features are available:
23--
24-- f Stack dump: function name, otherwise module:line. Default mode.
25-- F Stack dump: ditto, but always prepend module.
26-- l Stack dump: module:line.
27-- <number> stack dump depth (callee < caller). Default: 1.
28-- -<number> Inverse stack dump depth (caller > callee).
29-- s Split stack dump after first stack level. Implies abs(depth) >= 2.
30-- p Show full path for module names.
31-- v Show VM states. Can be combined with stack dumps, e.g. vf or fv.
32-- z Show zones. Can be combined with stack dumps, e.g. zf or fz.
33-- r Show raw sample counts. Default: show percentages.
34-- a Annotate excerpts from source code files.
35-- A Annotate complete source code files.
36-- G Produce raw output suitable for graphical tools (e.g. flame graphs).
37-- m<number> Minimum sample percentage to be shown. Default: 3.
38-- i<number> Sampling interval in milliseconds. Default: 10.
39--
40----------------------------------------------------------------------------
41
42-- Cache some library functions and objects.
43local jit = require("jit")
44assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
45local profile = require("jit.profile")
46local vmdef = require("jit.vmdef")
47local math = math
48local pairs, ipairs, tonumber, floor = pairs, ipairs, tonumber, math.floor
49local sort, format = table.sort, string.format
50local stdout = io.stdout
51local zone -- Load jit.zone module on demand.
52
53-- Output file handle.
54local out
55
56------------------------------------------------------------------------------
57
58local prof_ud
59local prof_states, prof_split, prof_min, prof_raw, prof_fmt, prof_depth
60local prof_ann, prof_count1, prof_count2, prof_samples
61
62local map_vmmode = {
63 N = "Compiled",
64 I = "Interpreted",
65 C = "C code",
66 G = "Garbage Collector",
67 J = "JIT Compiler",
68}
69
70-- Profiler callback.
71local function prof_cb(th, samples, vmmode)
72 prof_samples = prof_samples + samples
73 local key_stack, key_stack2, key_state
74 -- Collect keys for sample.
75 if prof_states then
76 if prof_states == "v" then
77 key_state = map_vmmode[vmmode] or vmmode
78 else
79 key_state = zone:get() or "(none)"
80 end
81 end
82 if prof_fmt then
83 key_stack = profile.dumpstack(th, prof_fmt, prof_depth)
84 key_stack = key_stack:gsub("%[builtin#(%d+)%]", function(x)
85 return vmdef.ffnames[tonumber(x)]
86 end)
87 if prof_split == 2 then
88 local k1, k2 = key_stack:match("(.-) [<>] (.*)")
89 if k2 then key_stack, key_stack2 = k1, k2 end
90 elseif prof_split == 3 then
91 key_stack2 = profile.dumpstack(th, "l", 1)
92 end
93 end
94 -- Order keys.
95 local k1, k2
96 if prof_split == 1 then
97 if key_state then
98 k1 = key_state
99 if key_stack then k2 = key_stack end
100 end
101 elseif key_stack then
102 k1 = key_stack
103 if key_stack2 then k2 = key_stack2 elseif key_state then k2 = key_state end
104 end
105 -- Coalesce samples in one or two levels.
106 if k1 then
107 local t1 = prof_count1
108 t1[k1] = (t1[k1] or 0) + samples
109 if k2 then
110 local t2 = prof_count2
111 local t3 = t2[k1]
112 if not t3 then t3 = {}; t2[k1] = t3 end
113 t3[k2] = (t3[k2] or 0) + samples
114 end
115 end
116end
117
118------------------------------------------------------------------------------
119
120-- Show top N list.
121local function prof_top(count1, count2, samples, indent)
122 local t, n = {}, 0
123 for k in pairs(count1) do
124 n = n + 1
125 t[n] = k
126 end
127 sort(t, function(a, b) return count1[a] > count1[b] end)
128 for i=1,n do
129 local k = t[i]
130 local v = count1[k]
131 local pct = floor(v*100/samples + 0.5)
132 if pct < prof_min then break end
133 if not prof_raw then
134 out:write(format("%s%2d%% %s\n", indent, pct, k))
135 elseif prof_raw == "r" then
136 out:write(format("%s%5d %s\n", indent, v, k))
137 else
138 out:write(format("%s %d\n", k, v))
139 end
140 if count2 then
141 local r = count2[k]
142 if r then
143 prof_top(r, nil, v, (prof_split == 3 or prof_split == 1) and " -- " or
144 (prof_depth < 0 and " -> " or " <- "))
145 end
146 end
147 end
148end
149
150-- Annotate source code
151local function prof_annotate(count1, samples)
152 local files = {}
153 local ms = 0
154 for k, v in pairs(count1) do
155 local pct = floor(v*100/samples + 0.5)
156 ms = math.max(ms, v)
157 if pct >= prof_min then
158 local file, line = k:match("^(.*):(%d+)$")
159 if not file then file = k; line = 0 end
160 local fl = files[file]
161 if not fl then fl = {}; files[file] = fl; files[#files+1] = file end
162 line = tonumber(line)
163 fl[line] = prof_raw and v or pct
164 end
165 end
166 sort(files)
167 local fmtv, fmtn = " %3d%% | %s\n", " | %s\n"
168 if prof_raw then
169 local n = math.max(5, math.ceil(math.log10(ms)))
170 fmtv = "%"..n.."d | %s\n"
171 fmtn = (" "):rep(n).." | %s\n"
172 end
173 local ann = prof_ann
174 for _, file in ipairs(files) do
175 local f0 = file:byte()
176 if f0 == 40 or f0 == 91 then
177 out:write(format("\n====== %s ======\n[Cannot annotate non-file]\n", file))
178 break
179 end
180 local fp, err = io.open(file)
181 if not fp then
182 out:write(format("====== ERROR: %s: %s\n", file, err))
183 break
184 end
185 out:write(format("\n====== %s ======\n", file))
186 local fl = files[file]
187 local n, show = 1, false
188 if ann ~= 0 then
189 for i=1,ann do
190 if fl[i] then show = true; out:write("@@ 1 @@\n"); break end
191 end
192 end
193 for line in fp:lines() do
194 if line:byte() == 27 then
195 out:write("[Cannot annotate bytecode file]\n")
196 break
197 end
198 local v = fl[n]
199 if ann ~= 0 then
200 local v2 = fl[n+ann]
201 if show then
202 if v2 then show = n+ann elseif v then show = n
203 elseif show+ann < n then show = false end
204 elseif v2 then
205 show = n+ann
206 out:write(format("@@ %d @@\n", n))
207 end
208 if not show then goto next end
209 end
210 if v then
211 out:write(format(fmtv, v, line))
212 else
213 out:write(format(fmtn, line))
214 end
215 ::next::
216 n = n + 1
217 end
218 fp:close()
219 end
220end
221
222------------------------------------------------------------------------------
223
224-- Finish profiling and dump result.
225local function prof_finish()
226 if prof_ud then
227 profile.stop()
228 local samples = prof_samples
229 if samples == 0 then
230 if prof_raw ~= true then out:write("[No samples collected]\n") end
231 return
232 end
233 if prof_ann then
234 prof_annotate(prof_count1, samples)
235 else
236 prof_top(prof_count1, prof_count2, samples, "")
237 end
238 prof_count1 = nil
239 prof_count2 = nil
240 prof_ud = nil
241 end
242end
243
244-- Start profiling.
245local function prof_start(mode)
246 local interval = ""
247 mode = mode:gsub("i%d*", function(s) interval = s; return "" end)
248 prof_min = 3
249 mode = mode:gsub("m(%d+)", function(s) prof_min = tonumber(s); return "" end)
250 prof_depth = 1
251 mode = mode:gsub("%-?%d+", function(s) prof_depth = tonumber(s); return "" end)
252 local m = {}
253 for c in mode:gmatch(".") do m[c] = c end
254 prof_states = m.z or m.v
255 if prof_states == "z" then zone = require("jit.zone") end
256 local scope = m.l or m.f or m.F or (prof_states and "" or "f")
257 local flags = (m.p or "")
258 prof_raw = m.r
259 if m.s then
260 prof_split = 2
261 if prof_depth == -1 or m["-"] then prof_depth = -2
262 elseif prof_depth == 1 then prof_depth = 2 end
263 elseif mode:find("[fF].*l") then
264 scope = "l"
265 prof_split = 3
266 else
267 prof_split = (scope == "" or mode:find("[zv].*[lfF]")) and 1 or 0
268 end
269 prof_ann = m.A and 0 or (m.a and 3)
270 if prof_ann then
271 scope = "l"
272 prof_fmt = "pl"
273 prof_split = 0
274 prof_depth = 1
275 elseif m.G and scope ~= "" then
276 prof_fmt = flags..scope.."Z;"
277 prof_depth = -100
278 prof_raw = true
279 prof_min = 0
280 elseif scope == "" then
281 prof_fmt = false
282 else
283 local sc = prof_split == 3 and m.f or m.F or scope
284 prof_fmt = flags..sc..(prof_depth >= 0 and "Z < " or "Z > ")
285 end
286 prof_count1 = {}
287 prof_count2 = {}
288 prof_samples = 0
289 profile.start(scope:lower()..interval, prof_cb)
290 prof_ud = newproxy(true)
291 getmetatable(prof_ud).__gc = prof_finish
292end
293
294------------------------------------------------------------------------------
295
296local function start(mode, outfile)
297 if not outfile then outfile = os.getenv("LUAJIT_PROFILEFILE") end
298 if outfile then
299 out = outfile == "-" and stdout or assert(io.open(outfile, "w"))
300 else
301 out = stdout
302 end
303 prof_start(mode or "f")
304end
305
306-- Public module functions.
307return {
308 start = start, -- For -j command line option.
309 stop = prof_finish
310}
311
diff --git a/src/jit/v.lua b/src/jit/v.lua
index 9696f67f..e37466c6 100644
--- a/src/jit/v.lua
+++ b/src/jit/v.lua
@@ -59,7 +59,7 @@
59 59
60-- Cache some library functions and objects. 60-- Cache some library functions and objects.
61local jit = require("jit") 61local jit = require("jit")
62assert(jit.version_num == 20005, "LuaJIT core/library version mismatch") 62assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
63local jutil = require("jit.util") 63local jutil = require("jit.util")
64local vmdef = require("jit.vmdef") 64local vmdef = require("jit.vmdef")
65local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo 65local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo
@@ -99,7 +99,7 @@ end
99local function dump_trace(what, tr, func, pc, otr, oex) 99local function dump_trace(what, tr, func, pc, otr, oex)
100 if what == "start" then 100 if what == "start" then
101 startloc = fmtfunc(func, pc) 101 startloc = fmtfunc(func, pc)
102 startex = otr and "("..otr.."/"..oex..") " or "" 102 startex = otr and "("..otr.."/"..(oex == -1 and "stitch" or oex)..") " or ""
103 else 103 else
104 if what == "abort" then 104 if what == "abort" then
105 local loc = fmtfunc(func, pc) 105 local loc = fmtfunc(func, pc)
@@ -116,6 +116,9 @@ local function dump_trace(what, tr, func, pc, otr, oex)
116 if ltype == "interpreter" then 116 if ltype == "interpreter" then
117 out:write(format("[TRACE %3s %s%s -- fallback to interpreter]\n", 117 out:write(format("[TRACE %3s %s%s -- fallback to interpreter]\n",
118 tr, startex, startloc)) 118 tr, startex, startloc))
119 elseif ltype == "stitch" then
120 out:write(format("[TRACE %3s %s%s %s %s]\n",
121 tr, startex, startloc, ltype, fmtfunc(func, pc)))
119 elseif link == tr or link == 0 then 122 elseif link == tr or link == 0 then
120 out:write(format("[TRACE %3s %s%s %s]\n", 123 out:write(format("[TRACE %3s %s%s %s]\n",
121 tr, startex, startloc, ltype)) 124 tr, startex, startloc, ltype))
@@ -159,9 +162,9 @@ local function dumpon(outfile)
159end 162end
160 163
161-- Public module functions. 164-- Public module functions.
162module(...) 165return {
163 166 on = dumpon,
164on = dumpon 167 off = dumpoff,
165off = dumpoff 168 start = dumpon -- For -j command line option.
166start = dumpon -- For -j command line option. 169}
167 170
diff --git a/src/jit/zone.lua b/src/jit/zone.lua
new file mode 100644
index 00000000..a8b4f0ae
--- /dev/null
+++ b/src/jit/zone.lua
@@ -0,0 +1,45 @@
1----------------------------------------------------------------------------
2-- LuaJIT profiler zones.
3--
4-- Copyright (C) 2005-2020 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7--
8-- This module implements a simple hierarchical zone model.
9--
10-- Example usage:
11--
12-- local zone = require("jit.zone")
13-- zone("AI")
14-- ...
15-- zone("A*")
16-- ...
17-- print(zone:get()) --> "A*"
18-- ...
19-- zone()
20-- ...
21-- print(zone:get()) --> "AI"
22-- ...
23-- zone()
24--
25----------------------------------------------------------------------------
26
27local remove = table.remove
28
29return setmetatable({
30 flush = function(t)
31 for i=#t,1,-1 do t[i] = nil end
32 end,
33 get = function(t)
34 return t[#t]
35 end
36}, {
37 __call = function(t, zone)
38 if zone then
39 t[#t+1] = zone
40 else
41 return (assert(remove(t), "empty zone stack"))
42 end
43 end
44})
45
diff --git a/src/lauxlib.h b/src/lauxlib.h
index fed1491b..a44f0272 100644
--- a/src/lauxlib.h
+++ b/src/lauxlib.h
@@ -15,9 +15,6 @@
15#include "lua.h" 15#include "lua.h"
16 16
17 17
18#define luaL_getn(L,i) ((int)lua_objlen(L, i))
19#define luaL_setn(L,i,j) ((void)0) /* no op! */
20
21/* extra error code for `luaL_load' */ 18/* extra error code for `luaL_load' */
22#define LUA_ERRFILE (LUA_ERRERR+1) 19#define LUA_ERRFILE (LUA_ERRERR+1)
23 20
@@ -58,6 +55,10 @@ LUALIB_API int (luaL_error) (lua_State *L, const char *fmt, ...);
58LUALIB_API int (luaL_checkoption) (lua_State *L, int narg, const char *def, 55LUALIB_API int (luaL_checkoption) (lua_State *L, int narg, const char *def,
59 const char *const lst[]); 56 const char *const lst[]);
60 57
58/* pre-defined references */
59#define LUA_NOREF (-2)
60#define LUA_REFNIL (-1)
61
61LUALIB_API int (luaL_ref) (lua_State *L, int t); 62LUALIB_API int (luaL_ref) (lua_State *L, int t);
62LUALIB_API void (luaL_unref) (lua_State *L, int t, int ref); 63LUALIB_API void (luaL_unref) (lua_State *L, int t, int ref);
63 64
@@ -84,6 +85,11 @@ LUALIB_API int (luaL_loadbufferx) (lua_State *L, const char *buff, size_t sz,
84 const char *name, const char *mode); 85 const char *name, const char *mode);
85LUALIB_API void luaL_traceback (lua_State *L, lua_State *L1, const char *msg, 86LUALIB_API void luaL_traceback (lua_State *L, lua_State *L1, const char *msg,
86 int level); 87 int level);
88LUALIB_API void (luaL_setfuncs) (lua_State *L, const luaL_Reg *l, int nup);
89LUALIB_API void (luaL_pushmodule) (lua_State *L, const char *modname,
90 int sizehint);
91LUALIB_API void *(luaL_testudata) (lua_State *L, int ud, const char *tname);
92LUALIB_API void (luaL_setmetatable) (lua_State *L, const char *tname);
87 93
88 94
89/* 95/*
@@ -113,6 +119,11 @@ LUALIB_API void luaL_traceback (lua_State *L, lua_State *L1, const char *msg,
113 119
114#define luaL_opt(L,f,n,d) (lua_isnoneornil(L,(n)) ? (d) : f(L,(n))) 120#define luaL_opt(L,f,n,d) (lua_isnoneornil(L,(n)) ? (d) : f(L,(n)))
115 121
122/* From Lua 5.2. */
123#define luaL_newlibtable(L, l) \
124 lua_createtable(L, 0, sizeof(l)/sizeof((l)[0]) - 1)
125#define luaL_newlib(L, l) (luaL_newlibtable(L, l), luaL_setfuncs(L, l, 0))
126
116/* 127/*
117** {====================================================== 128** {======================================================
118** Generic Buffer manipulation 129** Generic Buffer manipulation
@@ -147,21 +158,4 @@ LUALIB_API void (luaL_pushresult) (luaL_Buffer *B);
147 158
148/* }====================================================== */ 159/* }====================================================== */
149 160
150
151/* compatibility with ref system */
152
153/* pre-defined references */
154#define LUA_NOREF (-2)
155#define LUA_REFNIL (-1)
156
157#define lua_ref(L,lock) ((lock) ? luaL_ref(L, LUA_REGISTRYINDEX) : \
158 (lua_pushstring(L, "unlocked references are obsolete"), lua_error(L), 0))
159
160#define lua_unref(L,ref) luaL_unref(L, LUA_REGISTRYINDEX, (ref))
161
162#define lua_getref(L,ref) lua_rawgeti(L, LUA_REGISTRYINDEX, (ref))
163
164
165#define luaL_reg luaL_Reg
166
167#endif 161#endif
diff --git a/src/lib_aux.c b/src/lib_aux.c
index f29ca848..8f10e23c 100644
--- a/src/lib_aux.c
+++ b/src/lib_aux.c
@@ -107,38 +107,36 @@ LUALIB_API const char *luaL_findtable(lua_State *L, int idx,
107static int libsize(const luaL_Reg *l) 107static int libsize(const luaL_Reg *l)
108{ 108{
109 int size = 0; 109 int size = 0;
110 for (; l->name; l++) size++; 110 for (; l && l->name; l++) size++;
111 return size; 111 return size;
112} 112}
113 113
114LUALIB_API void luaL_pushmodule(lua_State *L, const char *modname, int sizehint)
115{
116 luaL_findtable(L, LUA_REGISTRYINDEX, "_LOADED", 16);
117 lua_getfield(L, -1, modname);
118 if (!lua_istable(L, -1)) {
119 lua_pop(L, 1);
120 if (luaL_findtable(L, LUA_GLOBALSINDEX, modname, sizehint) != NULL)
121 lj_err_callerv(L, LJ_ERR_BADMODN, modname);
122 lua_pushvalue(L, -1);
123 lua_setfield(L, -3, modname); /* _LOADED[modname] = new table. */
124 }
125 lua_remove(L, -2); /* Remove _LOADED table. */
126}
127
114LUALIB_API void luaL_openlib(lua_State *L, const char *libname, 128LUALIB_API void luaL_openlib(lua_State *L, const char *libname,
115 const luaL_Reg *l, int nup) 129 const luaL_Reg *l, int nup)
116{ 130{
117 lj_lib_checkfpu(L); 131 lj_lib_checkfpu(L);
118 if (libname) { 132 if (libname) {
119 int size = libsize(l); 133 luaL_pushmodule(L, libname, libsize(l));
120 /* check whether lib already exists */ 134 lua_insert(L, -(nup + 1)); /* Move module table below upvalues. */
121 luaL_findtable(L, LUA_REGISTRYINDEX, "_LOADED", 16);
122 lua_getfield(L, -1, libname); /* get _LOADED[libname] */
123 if (!lua_istable(L, -1)) { /* not found? */
124 lua_pop(L, 1); /* remove previous result */
125 /* try global variable (and create one if it does not exist) */
126 if (luaL_findtable(L, LUA_GLOBALSINDEX, libname, size) != NULL)
127 lj_err_callerv(L, LJ_ERR_BADMODN, libname);
128 lua_pushvalue(L, -1);
129 lua_setfield(L, -3, libname); /* _LOADED[libname] = new table */
130 }
131 lua_remove(L, -2); /* remove _LOADED table */
132 lua_insert(L, -(nup+1)); /* move library table to below upvalues */
133 } 135 }
134 for (; l->name; l++) { 136 if (l)
135 int i; 137 luaL_setfuncs(L, l, nup);
136 for (i = 0; i < nup; i++) /* copy upvalues to the top */ 138 else
137 lua_pushvalue(L, -nup); 139 lua_pop(L, nup); /* Remove upvalues. */
138 lua_pushcclosure(L, l->func, nup);
139 lua_setfield(L, -(nup+2), l->name);
140 }
141 lua_pop(L, nup); /* remove upvalues */
142} 140}
143 141
144LUALIB_API void luaL_register(lua_State *L, const char *libname, 142LUALIB_API void luaL_register(lua_State *L, const char *libname,
@@ -147,6 +145,19 @@ LUALIB_API void luaL_register(lua_State *L, const char *libname,
147 luaL_openlib(L, libname, l, 0); 145 luaL_openlib(L, libname, l, 0);
148} 146}
149 147
148LUALIB_API void luaL_setfuncs(lua_State *L, const luaL_Reg *l, int nup)
149{
150 luaL_checkstack(L, nup, "too many upvalues");
151 for (; l->name; l++) {
152 int i;
153 for (i = 0; i < nup; i++) /* Copy upvalues to the top. */
154 lua_pushvalue(L, -nup);
155 lua_pushcclosure(L, l->func, nup);
156 lua_setfield(L, -(nup + 2), l->name);
157 }
158 lua_pop(L, nup); /* Remove upvalues. */
159}
160
150LUALIB_API const char *luaL_gsub(lua_State *L, const char *s, 161LUALIB_API const char *luaL_gsub(lua_State *L, const char *s,
151 const char *p, const char *r) 162 const char *p, const char *r)
152{ 163{
@@ -207,8 +218,15 @@ LUALIB_API char *luaL_prepbuffer(luaL_Buffer *B)
207 218
208LUALIB_API void luaL_addlstring(luaL_Buffer *B, const char *s, size_t l) 219LUALIB_API void luaL_addlstring(luaL_Buffer *B, const char *s, size_t l)
209{ 220{
210 while (l--) 221 if (l <= bufffree(B)) {
211 luaL_addchar(B, *s++); 222 memcpy(B->p, s, l);
223 B->p += l;
224 } else {
225 emptybuffer(B);
226 lua_pushlstring(B->L, s, l);
227 B->lvl++;
228 adjuststack(B);
229 }
212} 230}
213 231
214LUALIB_API void luaL_addstring(luaL_Buffer *B, const char *s) 232LUALIB_API void luaL_addstring(luaL_Buffer *B, const char *s)
@@ -302,7 +320,7 @@ static int panic(lua_State *L)
302 320
303#ifdef LUAJIT_USE_SYSMALLOC 321#ifdef LUAJIT_USE_SYSMALLOC
304 322
305#if LJ_64 && !defined(LUAJIT_USE_VALGRIND) 323#if LJ_64 && !LJ_GC64 && !defined(LUAJIT_USE_VALGRIND)
306#error "Must use builtin allocator for 64 bit target" 324#error "Must use builtin allocator for 64 bit target"
307#endif 325#endif
308 326
@@ -334,7 +352,7 @@ LUALIB_API lua_State *luaL_newstate(void)
334 lua_State *L; 352 lua_State *L;
335 void *ud = lj_alloc_create(); 353 void *ud = lj_alloc_create();
336 if (ud == NULL) return NULL; 354 if (ud == NULL) return NULL;
337#if LJ_64 355#if LJ_64 && !LJ_GC64
338 L = lj_state_newstate(lj_alloc_f, ud); 356 L = lj_state_newstate(lj_alloc_f, ud);
339#else 357#else
340 L = lua_newstate(lj_alloc_f, ud); 358 L = lua_newstate(lj_alloc_f, ud);
@@ -343,7 +361,7 @@ LUALIB_API lua_State *luaL_newstate(void)
343 return L; 361 return L;
344} 362}
345 363
346#if LJ_64 364#if LJ_64 && !LJ_GC64
347LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) 365LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
348{ 366{
349 UNUSED(f); UNUSED(ud); 367 UNUSED(f); UNUSED(ud);
diff --git a/src/lib_base.c b/src/lib_base.c
index dae61fe1..54e9e2b0 100644
--- a/src/lib_base.c
+++ b/src/lib_base.c
@@ -23,6 +23,7 @@
23#include "lj_tab.h" 23#include "lj_tab.h"
24#include "lj_meta.h" 24#include "lj_meta.h"
25#include "lj_state.h" 25#include "lj_state.h"
26#include "lj_frame.h"
26#if LJ_HASFFI 27#if LJ_HASFFI
27#include "lj_ctype.h" 28#include "lj_ctype.h"
28#include "lj_cconv.h" 29#include "lj_cconv.h"
@@ -32,6 +33,7 @@
32#include "lj_dispatch.h" 33#include "lj_dispatch.h"
33#include "lj_char.h" 34#include "lj_char.h"
34#include "lj_strscan.h" 35#include "lj_strscan.h"
36#include "lj_strfmt.h"
35#include "lj_lib.h" 37#include "lj_lib.h"
36 38
37/* -- Base library: checks ------------------------------------------------ */ 39/* -- Base library: checks ------------------------------------------------ */
@@ -40,13 +42,13 @@
40 42
41LJLIB_ASM(assert) LJLIB_REC(.) 43LJLIB_ASM(assert) LJLIB_REC(.)
42{ 44{
43 GCstr *s;
44 lj_lib_checkany(L, 1); 45 lj_lib_checkany(L, 1);
45 s = lj_lib_optstr(L, 2); 46 if (L->top == L->base+1)
46 if (s)
47 lj_err_callermsg(L, strdata(s));
48 else
49 lj_err_caller(L, LJ_ERR_ASSERT); 47 lj_err_caller(L, LJ_ERR_ASSERT);
48 else if (tvisstr(L->base+1) || tvisnumber(L->base+1))
49 lj_err_callermsg(L, strdata(lj_lib_checkstr(L, 2)));
50 else
51 lj_err_run(L);
50 return FFH_UNREACHABLE; 52 return FFH_UNREACHABLE;
51} 53}
52 54
@@ -86,10 +88,11 @@ static int ffh_pairs(lua_State *L, MMS mm)
86 cTValue *mo = lj_meta_lookup(L, o, mm); 88 cTValue *mo = lj_meta_lookup(L, o, mm);
87 if ((LJ_52 || tviscdata(o)) && !tvisnil(mo)) { 89 if ((LJ_52 || tviscdata(o)) && !tvisnil(mo)) {
88 L->top = o+1; /* Only keep one argument. */ 90 L->top = o+1; /* Only keep one argument. */
89 copyTV(L, L->base-1, mo); /* Replace callable. */ 91 copyTV(L, L->base-1-LJ_FR2, mo); /* Replace callable. */
90 return FFH_TAILCALL; 92 return FFH_TAILCALL;
91 } else { 93 } else {
92 if (!tvistab(o)) lj_err_argt(L, 1, LUA_TTABLE); 94 if (!tvistab(o)) lj_err_argt(L, 1, LUA_TTABLE);
95 if (LJ_FR2) { copyTV(L, o-1, o); o--; }
93 setfuncV(L, o-1, funcV(lj_lib_upvalue(L, 1))); 96 setfuncV(L, o-1, funcV(lj_lib_upvalue(L, 1)));
94 if (mm == MM_pairs) setnilV(o+1); else setintV(o+1, 0); 97 if (mm == MM_pairs) setnilV(o+1); else setintV(o+1, 0);
95 return FFH_RES(3); 98 return FFH_RES(3);
@@ -100,7 +103,7 @@ static int ffh_pairs(lua_State *L, MMS mm)
100#endif 103#endif
101 104
102LJLIB_PUSH(lastcl) 105LJLIB_PUSH(lastcl)
103LJLIB_ASM(pairs) 106LJLIB_ASM(pairs) LJLIB_REC(xpairs 0)
104{ 107{
105 return ffh_pairs(L, MM_pairs); 108 return ffh_pairs(L, MM_pairs);
106} 109}
@@ -113,7 +116,7 @@ LJLIB_NOREGUV LJLIB_ASM(ipairs_aux) LJLIB_REC(.)
113} 116}
114 117
115LJLIB_PUSH(lastcl) 118LJLIB_PUSH(lastcl)
116LJLIB_ASM(ipairs) LJLIB_REC(.) 119LJLIB_ASM(ipairs) LJLIB_REC(xpairs 1)
117{ 120{
118 return ffh_pairs(L, MM_ipairs); 121 return ffh_pairs(L, MM_ipairs);
119} 122}
@@ -131,11 +134,11 @@ LJLIB_ASM(setmetatable) LJLIB_REC(.)
131 lj_err_caller(L, LJ_ERR_PROTMT); 134 lj_err_caller(L, LJ_ERR_PROTMT);
132 setgcref(t->metatable, obj2gco(mt)); 135 setgcref(t->metatable, obj2gco(mt));
133 if (mt) { lj_gc_objbarriert(L, t, mt); } 136 if (mt) { lj_gc_objbarriert(L, t, mt); }
134 settabV(L, L->base-1, t); 137 settabV(L, L->base-1-LJ_FR2, t);
135 return FFH_RES(1); 138 return FFH_RES(1);
136} 139}
137 140
138LJLIB_CF(getfenv) 141LJLIB_CF(getfenv) LJLIB_REC(.)
139{ 142{
140 GCfunc *fn; 143 GCfunc *fn;
141 cTValue *o = L->base; 144 cTValue *o = L->base;
@@ -144,6 +147,7 @@ LJLIB_CF(getfenv)
144 o = lj_debug_frame(L, level, &level); 147 o = lj_debug_frame(L, level, &level);
145 if (o == NULL) 148 if (o == NULL)
146 lj_err_arg(L, 1, LJ_ERR_INVLVL); 149 lj_err_arg(L, 1, LJ_ERR_INVLVL);
150 if (LJ_FR2) o--;
147 } 151 }
148 fn = &gcval(o)->fn; 152 fn = &gcval(o)->fn;
149 settabV(L, L->top++, isluafunc(fn) ? tabref(fn->l.env) : tabref(L->env)); 153 settabV(L, L->top++, isluafunc(fn) ? tabref(fn->l.env) : tabref(L->env));
@@ -165,6 +169,7 @@ LJLIB_CF(setfenv)
165 o = lj_debug_frame(L, level, &level); 169 o = lj_debug_frame(L, level, &level);
166 if (o == NULL) 170 if (o == NULL)
167 lj_err_arg(L, 1, LJ_ERR_INVLVL); 171 lj_err_arg(L, 1, LJ_ERR_INVLVL);
172 if (LJ_FR2) o--;
168 } 173 }
169 fn = &gcval(o)->fn; 174 fn = &gcval(o)->fn;
170 if (!isluafunc(fn)) 175 if (!isluafunc(fn))
@@ -257,7 +262,7 @@ LJLIB_ASM(tonumber) LJLIB_REC(.)
257 if (base == 10) { 262 if (base == 10) {
258 TValue *o = lj_lib_checkany(L, 1); 263 TValue *o = lj_lib_checkany(L, 1);
259 if (lj_strscan_numberobj(o)) { 264 if (lj_strscan_numberobj(o)) {
260 copyTV(L, L->base-1, o); 265 copyTV(L, L->base-1-LJ_FR2, o);
261 return FFH_RES(1); 266 return FFH_RES(1);
262 } 267 }
263#if LJ_HASFFI 268#if LJ_HASFFI
@@ -270,11 +275,11 @@ LJLIB_ASM(tonumber) LJLIB_REC(.)
270 ct->size <= 4 && !(ct->size == 4 && (ct->info & CTF_UNSIGNED))) { 275 ct->size <= 4 && !(ct->size == 4 && (ct->info & CTF_UNSIGNED))) {
271 int32_t i; 276 int32_t i;
272 lj_cconv_ct_tv(cts, ctype_get(cts, CTID_INT32), (uint8_t *)&i, o, 0); 277 lj_cconv_ct_tv(cts, ctype_get(cts, CTID_INT32), (uint8_t *)&i, o, 0);
273 setintV(L->base-1, i); 278 setintV(L->base-1-LJ_FR2, i);
274 return FFH_RES(1); 279 return FFH_RES(1);
275 } 280 }
276 lj_cconv_ct_tv(cts, ctype_get(cts, CTID_DOUBLE), 281 lj_cconv_ct_tv(cts, ctype_get(cts, CTID_DOUBLE),
277 (uint8_t *)&(L->base-1)->n, o, 0); 282 (uint8_t *)&(L->base-1-LJ_FR2)->n, o, 0);
278 return FFH_RES(1); 283 return FFH_RES(1);
279 } 284 }
280 } 285 }
@@ -282,53 +287,46 @@ LJLIB_ASM(tonumber) LJLIB_REC(.)
282 } else { 287 } else {
283 const char *p = strdata(lj_lib_checkstr(L, 1)); 288 const char *p = strdata(lj_lib_checkstr(L, 1));
284 char *ep; 289 char *ep;
290 unsigned int neg = 0;
285 unsigned long ul; 291 unsigned long ul;
286 if (base < 2 || base > 36) 292 if (base < 2 || base > 36)
287 lj_err_arg(L, 2, LJ_ERR_BASERNG); 293 lj_err_arg(L, 2, LJ_ERR_BASERNG);
288 ul = strtoul(p, &ep, base); 294 while (lj_char_isspace((unsigned char)(*p))) p++;
289 if (p != ep) { 295 if (*p == '-') { p++; neg = 1; } else if (*p == '+') { p++; }
290 while (lj_char_isspace((unsigned char)(*ep))) ep++; 296 if (lj_char_isalnum((unsigned char)(*p))) {
291 if (*ep == '\0') { 297 ul = strtoul(p, &ep, base);
292 if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u)) 298 if (p != ep) {
293 setintV(L->base-1, (int32_t)ul); 299 while (lj_char_isspace((unsigned char)(*ep))) ep++;
294 else 300 if (*ep == '\0') {
295 setnumV(L->base-1, (lua_Number)ul); 301 if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u+neg)) {
296 return FFH_RES(1); 302 if (neg) ul = -ul;
303 setintV(L->base-1-LJ_FR2, (int32_t)ul);
304 } else {
305 lua_Number n = (lua_Number)ul;
306 if (neg) n = -n;
307 setnumV(L->base-1-LJ_FR2, n);
308 }
309 return FFH_RES(1);
310 }
297 } 311 }
298 } 312 }
299 } 313 }
300 setnilV(L->base-1); 314 setnilV(L->base-1-LJ_FR2);
301 return FFH_RES(1); 315 return FFH_RES(1);
302} 316}
303 317
304LJLIB_PUSH("nil")
305LJLIB_PUSH("false")
306LJLIB_PUSH("true")
307LJLIB_ASM(tostring) LJLIB_REC(.) 318LJLIB_ASM(tostring) LJLIB_REC(.)
308{ 319{
309 TValue *o = lj_lib_checkany(L, 1); 320 TValue *o = lj_lib_checkany(L, 1);
310 cTValue *mo; 321 cTValue *mo;
311 L->top = o+1; /* Only keep one argument. */ 322 L->top = o+1; /* Only keep one argument. */
312 if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { 323 if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
313 copyTV(L, L->base-1, mo); /* Replace callable. */ 324 copyTV(L, L->base-1-LJ_FR2, mo); /* Replace callable. */
314 return FFH_TAILCALL; 325 return FFH_TAILCALL;
315 } else {
316 GCstr *s;
317 if (tvisnumber(o)) {
318 s = lj_str_fromnumber(L, o);
319 } else if (tvispri(o)) {
320 s = strV(lj_lib_upvalue(L, -(int32_t)itype(o)));
321 } else {
322 if (tvisfunc(o) && isffunc(funcV(o)))
323 lua_pushfstring(L, "function: builtin#%d", funcV(o)->c.ffid);
324 else
325 lua_pushfstring(L, "%s: %p", lj_typename(o), lua_topointer(L, 1));
326 /* Note: lua_pushfstring calls the GC which may invalidate o. */
327 s = strV(L->top-1);
328 }
329 setstrV(L, L->base-1, s);
330 return FFH_RES(1);
331 } 326 }
327 lj_gc_check(L);
328 setstrV(L, L->base-1-LJ_FR2, lj_strfmt_obj(L, L->base));
329 return FFH_RES(1);
332} 330}
333 331
334/* -- Base library: throw and catch errors -------------------------------- */ 332/* -- Base library: throw and catch errors -------------------------------- */
@@ -357,7 +355,7 @@ LJLIB_ASM_(xpcall) LJLIB_REC(.)
357 355
358static int load_aux(lua_State *L, int status, int envarg) 356static int load_aux(lua_State *L, int status, int envarg)
359{ 357{
360 if (status == 0) { 358 if (status == LUA_OK) {
361 if (tvistab(L->base+envarg-1)) { 359 if (tvistab(L->base+envarg-1)) {
362 GCfunc *fn = funcV(L->top-1); 360 GCfunc *fn = funcV(L->top-1);
363 GCtab *t = tabV(L->base+envarg-1); 361 GCtab *t = tabV(L->base+envarg-1);
@@ -430,7 +428,7 @@ LJLIB_CF(dofile)
430 GCstr *fname = lj_lib_optstr(L, 1); 428 GCstr *fname = lj_lib_optstr(L, 1);
431 setnilV(L->top); 429 setnilV(L->top);
432 L->top = L->base+1; 430 L->top = L->base+1;
433 if (luaL_loadfile(L, fname ? strdata(fname) : NULL) != 0) 431 if (luaL_loadfile(L, fname ? strdata(fname) : NULL) != LUA_OK)
434 lua_error(L); 432 lua_error(L);
435 lua_call(L, 0, LUA_MULTRET); 433 lua_call(L, 0, LUA_MULTRET);
436 return (int)(L->top - L->base) - 1; 434 return (int)(L->top - L->base) - 1;
@@ -440,20 +438,20 @@ LJLIB_CF(dofile)
440 438
441LJLIB_CF(gcinfo) 439LJLIB_CF(gcinfo)
442{ 440{
443 setintV(L->top++, (G(L)->gc.total >> 10)); 441 setintV(L->top++, (int32_t)(G(L)->gc.total >> 10));
444 return 1; 442 return 1;
445} 443}
446 444
447LJLIB_CF(collectgarbage) 445LJLIB_CF(collectgarbage)
448{ 446{
449 int opt = lj_lib_checkopt(L, 1, LUA_GCCOLLECT, /* ORDER LUA_GC* */ 447 int opt = lj_lib_checkopt(L, 1, LUA_GCCOLLECT, /* ORDER LUA_GC* */
450 "\4stop\7restart\7collect\5count\1\377\4step\10setpause\12setstepmul"); 448 "\4stop\7restart\7collect\5count\1\377\4step\10setpause\12setstepmul\1\377\11isrunning");
451 int32_t data = lj_lib_optint(L, 2, 0); 449 int32_t data = lj_lib_optint(L, 2, 0);
452 if (opt == LUA_GCCOUNT) { 450 if (opt == LUA_GCCOUNT) {
453 setnumV(L->top, (lua_Number)G(L)->gc.total/1024.0); 451 setnumV(L->top, (lua_Number)G(L)->gc.total/1024.0);
454 } else { 452 } else {
455 int res = lua_gc(L, opt, data); 453 int res = lua_gc(L, opt, data);
456 if (opt == LUA_GCSTEP) 454 if (opt == LUA_GCSTEP || opt == LUA_GCISRUNNING)
457 setboolV(L->top, res); 455 setboolV(L->top, res);
458 else 456 else
459 setintV(L->top, res); 457 setintV(L->top, res);
@@ -505,23 +503,14 @@ LJLIB_CF(print)
505 tv = L->top-1; 503 tv = L->top-1;
506 } 504 }
507 shortcut = (tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring) 505 shortcut = (tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring)
508 && !gcrefu(basemt_it(G(L), LJ_TNUMX)); 506 && !gcrefu(basemt_it(G(L), LJ_TNUMX));
509 for (i = 0; i < nargs; i++) { 507 for (i = 0; i < nargs; i++) {
508 cTValue *o = &L->base[i];
510 const char *str; 509 const char *str;
511 size_t size; 510 size_t size;
512 cTValue *o = &L->base[i]; 511 MSize len;
513 if (shortcut && tvisstr(o)) { 512 if (shortcut && (str = lj_strfmt_wstrnum(L, o, &len)) != NULL) {
514 str = strVdata(o); 513 size = len;
515 size = strV(o)->len;
516 } else if (shortcut && tvisint(o)) {
517 char buf[LJ_STR_INTBUF];
518 char *p = lj_str_bufint(buf, intV(o));
519 size = (size_t)(buf+LJ_STR_INTBUF-p);
520 str = p;
521 } else if (shortcut && tvisnum(o)) {
522 char buf[LJ_STR_NUMBUF];
523 size = lj_str_bufnum(buf, o);
524 str = buf;
525 } else { 514 } else {
526 copyTV(L, L->top+1, o); 515 copyTV(L, L->top+1, o);
527 copyTV(L, L->top, L->top-1); 516 copyTV(L, L->top, L->top-1);
@@ -558,8 +547,8 @@ LJLIB_CF(coroutine_status)
558 co = threadV(L->base); 547 co = threadV(L->base);
559 if (co == L) s = "running"; 548 if (co == L) s = "running";
560 else if (co->status == LUA_YIELD) s = "suspended"; 549 else if (co->status == LUA_YIELD) s = "suspended";
561 else if (co->status != 0) s = "dead"; 550 else if (co->status != LUA_OK) s = "dead";
562 else if (co->base > tvref(co->stack)+1) s = "normal"; 551 else if (co->base > tvref(co->stack)+1+LJ_FR2) s = "normal";
563 else if (co->top == co->base) s = "dead"; 552 else if (co->top == co->base) s = "dead";
564 else s = "suspended"; 553 else s = "suspended";
565 lua_pushstring(L, s); 554 lua_pushstring(L, s);
@@ -579,6 +568,12 @@ LJLIB_CF(coroutine_running)
579#endif 568#endif
580} 569}
581 570
571LJLIB_CF(coroutine_isyieldable)
572{
573 setboolV(L->top++, cframe_canyield(L->cframe));
574 return 1;
575}
576
582LJLIB_CF(coroutine_create) 577LJLIB_CF(coroutine_create)
583{ 578{
584 lua_State *L1; 579 lua_State *L1;
@@ -598,11 +593,11 @@ LJLIB_ASM(coroutine_yield)
598static int ffh_resume(lua_State *L, lua_State *co, int wrap) 593static int ffh_resume(lua_State *L, lua_State *co, int wrap)
599{ 594{
600 if (co->cframe != NULL || co->status > LUA_YIELD || 595 if (co->cframe != NULL || co->status > LUA_YIELD ||
601 (co->status == 0 && co->top == co->base)) { 596 (co->status == LUA_OK && co->top == co->base)) {
602 ErrMsg em = co->cframe ? LJ_ERR_CORUN : LJ_ERR_CODEAD; 597 ErrMsg em = co->cframe ? LJ_ERR_CORUN : LJ_ERR_CODEAD;
603 if (wrap) lj_err_caller(L, em); 598 if (wrap) lj_err_caller(L, em);
604 setboolV(L->base-1, 0); 599 setboolV(L->base-1-LJ_FR2, 0);
605 setstrV(L, L->base, lj_err_str(L, em)); 600 setstrV(L, L->base-LJ_FR2, lj_err_str(L, em));
606 return FFH_RES(2); 601 return FFH_RES(2);
607 } 602 }
608 lj_state_growstack(co, (MSize)(L->top - L->base)); 603 lj_state_growstack(co, (MSize)(L->top - L->base));
@@ -643,9 +638,10 @@ static void setpc_wrap_aux(lua_State *L, GCfunc *fn);
643 638
644LJLIB_CF(coroutine_wrap) 639LJLIB_CF(coroutine_wrap)
645{ 640{
641 GCfunc *fn;
646 lj_cf_coroutine_create(L); 642 lj_cf_coroutine_create(L);
647 lj_lib_pushcc(L, lj_ffh_coroutine_wrap_aux, FF_coroutine_wrap_aux, 1); 643 fn = lj_lib_pushcc(L, lj_ffh_coroutine_wrap_aux, FF_coroutine_wrap_aux, 1);
648 setpc_wrap_aux(L, funcV(L->top-1)); 644 setpc_wrap_aux(L, fn);
649 return 1; 645 return 1;
650} 646}
651 647
diff --git a/src/lib_bit.c b/src/lib_bit.c
index c374d7a0..c4911450 100644
--- a/src/lib_bit.c
+++ b/src/lib_bit.c
@@ -12,26 +12,99 @@
12 12
13#include "lj_obj.h" 13#include "lj_obj.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_str.h" 15#include "lj_buf.h"
16#include "lj_strscan.h"
17#include "lj_strfmt.h"
18#if LJ_HASFFI
19#include "lj_ctype.h"
20#include "lj_cdata.h"
21#include "lj_cconv.h"
22#include "lj_carith.h"
23#endif
24#include "lj_ff.h"
16#include "lj_lib.h" 25#include "lj_lib.h"
17 26
18/* ------------------------------------------------------------------------ */ 27/* ------------------------------------------------------------------------ */
19 28
20#define LJLIB_MODULE_bit 29#define LJLIB_MODULE_bit
21 30
22LJLIB_ASM(bit_tobit) LJLIB_REC(bit_unary IR_TOBIT) 31#if LJ_HASFFI
32static int bit_result64(lua_State *L, CTypeID id, uint64_t x)
23{ 33{
34 GCcdata *cd = lj_cdata_new_(L, id, 8);
35 *(uint64_t *)cdataptr(cd) = x;
36 setcdataV(L, L->base-1-LJ_FR2, cd);
37 return FFH_RES(1);
38}
39#else
40static int32_t bit_checkbit(lua_State *L, int narg)
41{
42 TValue *o = L->base + narg-1;
43 if (!(o < L->top && lj_strscan_numberobj(o)))
44 lj_err_argt(L, narg, LUA_TNUMBER);
45 if (LJ_LIKELY(tvisint(o))) {
46 return intV(o);
47 } else {
48 int32_t i = lj_num2bit(numV(o));
49 if (LJ_DUALNUM) setintV(o, i);
50 return i;
51 }
52}
53#endif
54
55LJLIB_ASM(bit_tobit) LJLIB_REC(bit_tobit)
56{
57#if LJ_HASFFI
58 CTypeID id = 0;
59 setintV(L->base-1-LJ_FR2, (int32_t)lj_carith_check64(L, 1, &id));
60 return FFH_RES(1);
61#else
62 lj_lib_checknumber(L, 1);
63 return FFH_RETRY;
64#endif
65}
66
67LJLIB_ASM(bit_bnot) LJLIB_REC(bit_unary IR_BNOT)
68{
69#if LJ_HASFFI
70 CTypeID id = 0;
71 uint64_t x = lj_carith_check64(L, 1, &id);
72 return id ? bit_result64(L, id, ~x) : FFH_RETRY;
73#else
24 lj_lib_checknumber(L, 1); 74 lj_lib_checknumber(L, 1);
25 return FFH_RETRY; 75 return FFH_RETRY;
76#endif
77}
78
79LJLIB_ASM(bit_bswap) LJLIB_REC(bit_unary IR_BSWAP)
80{
81#if LJ_HASFFI
82 CTypeID id = 0;
83 uint64_t x = lj_carith_check64(L, 1, &id);
84 return id ? bit_result64(L, id, lj_bswap64(x)) : FFH_RETRY;
85#else
86 lj_lib_checknumber(L, 1);
87 return FFH_RETRY;
88#endif
26} 89}
27LJLIB_ASM_(bit_bnot) LJLIB_REC(bit_unary IR_BNOT)
28LJLIB_ASM_(bit_bswap) LJLIB_REC(bit_unary IR_BSWAP)
29 90
30LJLIB_ASM(bit_lshift) LJLIB_REC(bit_shift IR_BSHL) 91LJLIB_ASM(bit_lshift) LJLIB_REC(bit_shift IR_BSHL)
31{ 92{
93#if LJ_HASFFI
94 CTypeID id = 0, id2 = 0;
95 uint64_t x = lj_carith_check64(L, 1, &id);
96 int32_t sh = (int32_t)lj_carith_check64(L, 2, &id2);
97 if (id) {
98 x = lj_carith_shift64(x, sh, curr_func(L)->c.ffid - (int)FF_bit_lshift);
99 return bit_result64(L, id, x);
100 }
101 if (id2) setintV(L->base+1, sh);
102 return FFH_RETRY;
103#else
32 lj_lib_checknumber(L, 1); 104 lj_lib_checknumber(L, 1);
33 lj_lib_checkbit(L, 2); 105 bit_checkbit(L, 2);
34 return FFH_RETRY; 106 return FFH_RETRY;
107#endif
35} 108}
36LJLIB_ASM_(bit_rshift) LJLIB_REC(bit_shift IR_BSHR) 109LJLIB_ASM_(bit_rshift) LJLIB_REC(bit_shift IR_BSHR)
37LJLIB_ASM_(bit_arshift) LJLIB_REC(bit_shift IR_BSAR) 110LJLIB_ASM_(bit_arshift) LJLIB_REC(bit_shift IR_BSAR)
@@ -40,25 +113,58 @@ LJLIB_ASM_(bit_ror) LJLIB_REC(bit_shift IR_BROR)
40 113
41LJLIB_ASM(bit_band) LJLIB_REC(bit_nary IR_BAND) 114LJLIB_ASM(bit_band) LJLIB_REC(bit_nary IR_BAND)
42{ 115{
116#if LJ_HASFFI
117 CTypeID id = 0;
118 TValue *o = L->base, *top = L->top;
119 int i = 0;
120 do { lj_carith_check64(L, ++i, &id); } while (++o < top);
121 if (id) {
122 CTState *cts = ctype_cts(L);
123 CType *ct = ctype_get(cts, id);
124 int op = curr_func(L)->c.ffid - (int)FF_bit_bor;
125 uint64_t x, y = op >= 0 ? 0 : ~(uint64_t)0;
126 o = L->base;
127 do {
128 lj_cconv_ct_tv(cts, ct, (uint8_t *)&x, o, 0);
129 if (op < 0) y &= x; else if (op == 0) y |= x; else y ^= x;
130 } while (++o < top);
131 return bit_result64(L, id, y);
132 }
133 return FFH_RETRY;
134#else
43 int i = 0; 135 int i = 0;
44 do { lj_lib_checknumber(L, ++i); } while (L->base+i < L->top); 136 do { lj_lib_checknumber(L, ++i); } while (L->base+i < L->top);
45 return FFH_RETRY; 137 return FFH_RETRY;
138#endif
46} 139}
47LJLIB_ASM_(bit_bor) LJLIB_REC(bit_nary IR_BOR) 140LJLIB_ASM_(bit_bor) LJLIB_REC(bit_nary IR_BOR)
48LJLIB_ASM_(bit_bxor) LJLIB_REC(bit_nary IR_BXOR) 141LJLIB_ASM_(bit_bxor) LJLIB_REC(bit_nary IR_BXOR)
49 142
50/* ------------------------------------------------------------------------ */ 143/* ------------------------------------------------------------------------ */
51 144
52LJLIB_CF(bit_tohex) 145LJLIB_CF(bit_tohex) LJLIB_REC(.)
53{ 146{
54 uint32_t b = (uint32_t)lj_lib_checkbit(L, 1); 147#if LJ_HASFFI
55 int32_t i, n = L->base+1 >= L->top ? 8 : lj_lib_checkbit(L, 2); 148 CTypeID id = 0, id2 = 0;
56 const char *hexdigits = "0123456789abcdef"; 149 uint64_t b = lj_carith_check64(L, 1, &id);
57 char buf[8]; 150 int32_t n = L->base+1>=L->top ? (id ? 16 : 8) :
58 if (n < 0) { n = -n; hexdigits = "0123456789ABCDEF"; } 151 (int32_t)lj_carith_check64(L, 2, &id2);
59 if (n > 8) n = 8; 152#else
60 for (i = n; --i >= 0; ) { buf[i] = hexdigits[b & 15]; b >>= 4; } 153 uint32_t b = (uint32_t)bit_checkbit(L, 1);
61 lua_pushlstring(L, buf, (size_t)n); 154 int32_t n = L->base+1>=L->top ? 8 : bit_checkbit(L, 2);
155#endif
156 SBuf *sb = lj_buf_tmp_(L);
157 SFormat sf = (STRFMT_UINT|STRFMT_T_HEX);
158 if (n < 0) { n = -n; sf |= STRFMT_F_UPPER; }
159 sf |= ((SFormat)((n+1)&255) << STRFMT_SH_PREC);
160#if LJ_HASFFI
161 if (n < 16) b &= ((uint64_t)1 << 4*n)-1;
162#else
163 if (n < 8) b &= (1u << 4*n)-1;
164#endif
165 sb = lj_strfmt_putfxint(sb, sf, b);
166 setstrV(L, L->top-1, lj_buf_str(L, sb));
167 lj_gc_check(L);
62 return 1; 168 return 1;
63} 169}
64 170
diff --git a/src/lib_debug.c b/src/lib_debug.c
index a485ff8e..5bcabe7d 100644
--- a/src/lib_debug.c
+++ b/src/lib_debug.c
@@ -29,7 +29,7 @@ LJLIB_CF(debug_getregistry)
29 return 1; 29 return 1;
30} 30}
31 31
32LJLIB_CF(debug_getmetatable) 32LJLIB_CF(debug_getmetatable) LJLIB_REC(.)
33{ 33{
34 lj_lib_checkany(L, 1); 34 lj_lib_checkany(L, 1);
35 if (!lua_getmetatable(L, 1)) { 35 if (!lua_getmetatable(L, 1)) {
@@ -283,13 +283,13 @@ LJLIB_CF(debug_setuservalue)
283 283
284/* ------------------------------------------------------------------------ */ 284/* ------------------------------------------------------------------------ */
285 285
286static const char KEY_HOOK = 'h'; 286#define KEY_HOOK ((void *)0x3004)
287 287
288static void hookf(lua_State *L, lua_Debug *ar) 288static void hookf(lua_State *L, lua_Debug *ar)
289{ 289{
290 static const char *const hooknames[] = 290 static const char *const hooknames[] =
291 {"call", "return", "line", "count", "tail return"}; 291 {"call", "return", "line", "count", "tail return"};
292 lua_pushlightuserdata(L, (void *)&KEY_HOOK); 292 lua_pushlightuserdata(L, KEY_HOOK);
293 lua_rawget(L, LUA_REGISTRYINDEX); 293 lua_rawget(L, LUA_REGISTRYINDEX);
294 if (lua_isfunction(L, -1)) { 294 if (lua_isfunction(L, -1)) {
295 lua_pushstring(L, hooknames[(int)ar->event]); 295 lua_pushstring(L, hooknames[(int)ar->event]);
@@ -334,7 +334,7 @@ LJLIB_CF(debug_sethook)
334 count = luaL_optint(L, arg+3, 0); 334 count = luaL_optint(L, arg+3, 0);
335 func = hookf; mask = makemask(smask, count); 335 func = hookf; mask = makemask(smask, count);
336 } 336 }
337 lua_pushlightuserdata(L, (void *)&KEY_HOOK); 337 lua_pushlightuserdata(L, KEY_HOOK);
338 lua_pushvalue(L, arg+1); 338 lua_pushvalue(L, arg+1);
339 lua_rawset(L, LUA_REGISTRYINDEX); 339 lua_rawset(L, LUA_REGISTRYINDEX);
340 lua_sethook(L, func, mask, count); 340 lua_sethook(L, func, mask, count);
@@ -349,7 +349,7 @@ LJLIB_CF(debug_gethook)
349 if (hook != NULL && hook != hookf) { /* external hook? */ 349 if (hook != NULL && hook != hookf) { /* external hook? */
350 lua_pushliteral(L, "external hook"); 350 lua_pushliteral(L, "external hook");
351 } else { 351 } else {
352 lua_pushlightuserdata(L, (void *)&KEY_HOOK); 352 lua_pushlightuserdata(L, KEY_HOOK);
353 lua_rawget(L, LUA_REGISTRYINDEX); /* get hook */ 353 lua_rawget(L, LUA_REGISTRYINDEX); /* get hook */
354 } 354 }
355 lua_pushstring(L, unmakemask(mask, buff)); 355 lua_pushstring(L, unmakemask(mask, buff));
diff --git a/src/lib_ffi.c b/src/lib_ffi.c
index 5851eea5..16fecacb 100644
--- a/src/lib_ffi.c
+++ b/src/lib_ffi.c
@@ -29,6 +29,7 @@
29#include "lj_ccall.h" 29#include "lj_ccall.h"
30#include "lj_ccallback.h" 30#include "lj_ccallback.h"
31#include "lj_clib.h" 31#include "lj_clib.h"
32#include "lj_strfmt.h"
32#include "lj_ff.h" 33#include "lj_ff.h"
33#include "lj_lib.h" 34#include "lj_lib.h"
34 35
@@ -137,7 +138,7 @@ static int ffi_index_meta(lua_State *L, CTState *cts, CType *ct, MMS mm)
137 } 138 }
138 } 139 }
139 copyTV(L, base, L->top); 140 copyTV(L, base, L->top);
140 tv = L->top-1; 141 tv = L->top-1-LJ_FR2;
141 } 142 }
142 return lj_meta_tailcall(L, tv); 143 return lj_meta_tailcall(L, tv);
143} 144}
@@ -318,7 +319,7 @@ LJLIB_CF(ffi_meta___tostring)
318 } 319 }
319 } 320 }
320 } 321 }
321 lj_str_pushf(L, msg, strdata(lj_ctype_repr(L, id, NULL)), p); 322 lj_strfmt_pushf(L, msg, strdata(lj_ctype_repr(L, id, NULL)), p);
322checkgc: 323checkgc:
323 lj_gc_check(L); 324 lj_gc_check(L);
324 return 1; 325 return 1;
@@ -504,10 +505,7 @@ LJLIB_CF(ffi_new) LJLIB_REC(.)
504 } 505 }
505 if (sz == CTSIZE_INVALID) 506 if (sz == CTSIZE_INVALID)
506 lj_err_arg(L, 1, LJ_ERR_FFI_INVSIZE); 507 lj_err_arg(L, 1, LJ_ERR_FFI_INVSIZE);
507 if (!(info & CTF_VLA) && ctype_align(info) <= CT_MEMALIGN) 508 cd = lj_cdata_newx(cts, id, sz, info);
508 cd = lj_cdata_new(cts, id, sz);
509 else
510 cd = lj_cdata_newv(cts, id, sz, ctype_align(info));
511 setcdataV(L, o-1, cd); /* Anchor the uninitialized cdata. */ 509 setcdataV(L, o-1, cd); /* Anchor the uninitialized cdata. */
512 lj_cconv_ct_init(cts, ct, sz, cdataptr(cd), 510 lj_cconv_ct_init(cts, ct, sz, cdataptr(cd),
513 o, (MSize)(L->top - o)); /* Initialize cdata. */ 511 o, (MSize)(L->top - o)); /* Initialize cdata. */
@@ -558,6 +556,31 @@ LJLIB_CF(ffi_typeof) LJLIB_REC(.)
558 return 1; 556 return 1;
559} 557}
560 558
559/* Internal and unsupported API. */
560LJLIB_CF(ffi_typeinfo)
561{
562 CTState *cts = ctype_cts(L);
563 CTypeID id = (CTypeID)ffi_checkint(L, 1);
564 if (id > 0 && id < cts->top) {
565 CType *ct = ctype_get(cts, id);
566 GCtab *t;
567 lua_createtable(L, 0, 4); /* Increment hash size if fields are added. */
568 t = tabV(L->top-1);
569 setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "info")), (int32_t)ct->info);
570 if (ct->size != CTSIZE_INVALID)
571 setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "size")), (int32_t)ct->size);
572 if (ct->sib)
573 setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "sib")), (int32_t)ct->sib);
574 if (gcref(ct->name)) {
575 GCstr *s = gco2str(gcref(ct->name));
576 setstrV(L, lj_tab_setstr(L, t, lj_str_newlit(L, "name")), s);
577 }
578 lj_gc_check(L);
579 return 1;
580 }
581 return 0;
582}
583
561LJLIB_CF(ffi_istype) LJLIB_REC(.) 584LJLIB_CF(ffi_istype) LJLIB_REC(.)
562{ 585{
563 CTState *cts = ctype_cts(L); 586 CTState *cts = ctype_cts(L);
@@ -697,44 +720,47 @@ LJLIB_CF(ffi_fill) LJLIB_REC(.)
697 return 0; 720 return 0;
698} 721}
699 722
700#define H_(le, be) LJ_ENDIAN_SELECT(0x##le, 0x##be)
701
702/* Test ABI string. */ 723/* Test ABI string. */
703LJLIB_CF(ffi_abi) LJLIB_REC(.) 724LJLIB_CF(ffi_abi) LJLIB_REC(.)
704{ 725{
705 GCstr *s = lj_lib_checkstr(L, 1); 726 GCstr *s = lj_lib_checkstr(L, 1);
706 int b = 0; 727 int b = lj_cparse_case(s,
707 switch (s->hash) {
708#if LJ_64 728#if LJ_64
709 case H_(849858eb,ad35fd06): b = 1; break; /* 64bit */ 729 "\00564bit"
710#else 730#else
711 case H_(662d3c79,d0e22477): b = 1; break; /* 32bit */ 731 "\00532bit"
712#endif 732#endif
713#if LJ_ARCH_HASFPU 733#if LJ_ARCH_HASFPU
714 case H_(e33ee463,e33ee463): b = 1; break; /* fpu */ 734 "\003fpu"
715#endif 735#endif
716#if LJ_ABI_SOFTFP 736#if LJ_ABI_SOFTFP
717 case H_(61211a23,c2e8c81c): b = 1; break; /* softfp */ 737 "\006softfp"
718#else 738#else
719 case H_(539417a8,8ce0812f): b = 1; break; /* hardfp */ 739 "\006hardfp"
720#endif 740#endif
721#if LJ_ABI_EABI 741#if LJ_ABI_EABI
722 case H_(2182df8f,f2ed1152): b = 1; break; /* eabi */ 742 "\004eabi"
723#endif 743#endif
724#if LJ_ABI_WIN 744#if LJ_ABI_WIN
725 case H_(4ab624a8,4ab624a8): b = 1; break; /* win */ 745 "\003win"
726#endif 746#endif
727 case H_(3af93066,1f001464): b = 1; break; /* le/be */ 747#if LJ_TARGET_UWP
728 default: 748 "\003uwp"
729 break; 749#endif
730 } 750#if LJ_LE
751 "\002le"
752#else
753 "\002be"
754#endif
755#if LJ_GC64
756 "\004gc64"
757#endif
758 ) >= 0;
731 setboolV(L->top-1, b); 759 setboolV(L->top-1, b);
732 setboolV(&G(L)->tmptv2, b); /* Remember for trace recorder. */ 760 setboolV(&G(L)->tmptv2, b); /* Remember for trace recorder. */
733 return 1; 761 return 1;
734} 762}
735 763
736#undef H_
737
738LJLIB_PUSH(top-8) LJLIB_SET(!) /* Store reference to miscmap table. */ 764LJLIB_PUSH(top-8) LJLIB_SET(!) /* Store reference to miscmap table. */
739 765
740LJLIB_CF(ffi_metatype) 766LJLIB_CF(ffi_metatype)
@@ -768,19 +794,11 @@ LJLIB_CF(ffi_gc) LJLIB_REC(.)
768 GCcdata *cd = ffi_checkcdata(L, 1); 794 GCcdata *cd = ffi_checkcdata(L, 1);
769 TValue *fin = lj_lib_checkany(L, 2); 795 TValue *fin = lj_lib_checkany(L, 2);
770 CTState *cts = ctype_cts(L); 796 CTState *cts = ctype_cts(L);
771 GCtab *t = cts->finalizer;
772 CType *ct = ctype_raw(cts, cd->ctypeid); 797 CType *ct = ctype_raw(cts, cd->ctypeid);
773 if (!(ctype_isptr(ct->info) || ctype_isstruct(ct->info) || 798 if (!(ctype_isptr(ct->info) || ctype_isstruct(ct->info) ||
774 ctype_isrefarray(ct->info))) 799 ctype_isrefarray(ct->info)))
775 lj_err_arg(L, 1, LJ_ERR_FFI_INVTYPE); 800 lj_err_arg(L, 1, LJ_ERR_FFI_INVTYPE);
776 if (gcref(t->metatable)) { /* Update finalizer table, if still enabled. */ 801 lj_cdata_setfin(L, cd, gcval(fin), itype(fin));
777 copyTV(L, lj_tab_set(L, t, L->base), fin);
778 lj_gc_anybarriert(L, t);
779 if (!tvisnil(fin))
780 cd->marked |= LJ_GC_CDATA_FIN;
781 else
782 cd->marked &= ~LJ_GC_CDATA_FIN;
783 }
784 L->top = L->base+1; /* Pass through the cdata object. */ 802 L->top = L->base+1; /* Pass through the cdata object. */
785 return 1; 803 return 1;
786} 804}
diff --git a/src/lib_io.c b/src/lib_io.c
index f13cf048..5e9d0d66 100644
--- a/src/lib_io.c
+++ b/src/lib_io.c
@@ -19,8 +19,10 @@
19#include "lj_obj.h" 19#include "lj_obj.h"
20#include "lj_gc.h" 20#include "lj_gc.h"
21#include "lj_err.h" 21#include "lj_err.h"
22#include "lj_buf.h"
22#include "lj_str.h" 23#include "lj_str.h"
23#include "lj_state.h" 24#include "lj_state.h"
25#include "lj_strfmt.h"
24#include "lj_ff.h" 26#include "lj_ff.h"
25#include "lj_lib.h" 27#include "lj_lib.h"
26 28
@@ -84,7 +86,7 @@ static IOFileUD *io_file_open(lua_State *L, const char *mode)
84 IOFileUD *iof = io_file_new(L); 86 IOFileUD *iof = io_file_new(L);
85 iof->fp = fopen(fname, mode); 87 iof->fp = fopen(fname, mode);
86 if (iof->fp == NULL) 88 if (iof->fp == NULL)
87 luaL_argerror(L, 1, lj_str_pushf(L, "%s: %s", fname, strerror(errno))); 89 luaL_argerror(L, 1, lj_strfmt_pushf(L, "%s: %s", fname, strerror(errno)));
88 return iof; 90 return iof;
89} 91}
90 92
@@ -97,7 +99,7 @@ static int io_file_close(lua_State *L, IOFileUD *iof)
97 int stat = -1; 99 int stat = -1;
98#if LJ_TARGET_POSIX 100#if LJ_TARGET_POSIX
99 stat = pclose(iof->fp); 101 stat = pclose(iof->fp);
100#elif LJ_TARGET_WINDOWS 102#elif LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE && !LJ_TARGET_UWP
101 stat = _pclose(iof->fp); 103 stat = _pclose(iof->fp);
102#else 104#else
103 lua_assert(0); 105 lua_assert(0);
@@ -145,7 +147,7 @@ static int io_file_readline(lua_State *L, FILE *fp, MSize chop)
145 MSize m = LUAL_BUFFERSIZE, n = 0, ok = 0; 147 MSize m = LUAL_BUFFERSIZE, n = 0, ok = 0;
146 char *buf; 148 char *buf;
147 for (;;) { 149 for (;;) {
148 buf = lj_str_needbuf(L, &G(L)->tmpbuf, m); 150 buf = lj_buf_tmp(L, m);
149 if (fgets(buf+n, m-n, fp) == NULL) break; 151 if (fgets(buf+n, m-n, fp) == NULL) break;
150 n += (MSize)strlen(buf+n); 152 n += (MSize)strlen(buf+n);
151 ok |= n; 153 ok |= n;
@@ -161,7 +163,7 @@ static void io_file_readall(lua_State *L, FILE *fp)
161{ 163{
162 MSize m, n; 164 MSize m, n;
163 for (m = LUAL_BUFFERSIZE, n = 0; ; m += m) { 165 for (m = LUAL_BUFFERSIZE, n = 0; ; m += m) {
164 char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, m); 166 char *buf = lj_buf_tmp(L, m);
165 n += (MSize)fread(buf+n, 1, m-n, fp); 167 n += (MSize)fread(buf+n, 1, m-n, fp);
166 if (n != m) { 168 if (n != m) {
167 setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n)); 169 setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
@@ -174,7 +176,7 @@ static void io_file_readall(lua_State *L, FILE *fp)
174static int io_file_readlen(lua_State *L, FILE *fp, MSize m) 176static int io_file_readlen(lua_State *L, FILE *fp, MSize m)
175{ 177{
176 if (m) { 178 if (m) {
177 char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, m); 179 char *buf = lj_buf_tmp(L, m);
178 MSize n = (MSize)fread(buf, 1, m, fp); 180 MSize n = (MSize)fread(buf, 1, m, fp);
179 setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n)); 181 setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
180 lj_gc_check(L); 182 lj_gc_check(L);
@@ -201,13 +203,12 @@ static int io_file_read(lua_State *L, FILE *fp, int start)
201 for (n = start; nargs-- && ok; n++) { 203 for (n = start; nargs-- && ok; n++) {
202 if (tvisstr(L->base+n)) { 204 if (tvisstr(L->base+n)) {
203 const char *p = strVdata(L->base+n); 205 const char *p = strVdata(L->base+n);
204 if (p[0] != '*') 206 if (p[0] == '*') p++;
205 lj_err_arg(L, n+1, LJ_ERR_INVOPT); 207 if (p[0] == 'n')
206 if (p[1] == 'n')
207 ok = io_file_readnum(L, fp); 208 ok = io_file_readnum(L, fp);
208 else if ((p[1] & ~0x20) == 'L') 209 else if ((p[0] & ~0x20) == 'L')
209 ok = io_file_readline(L, fp, (p[1] == 'l')); 210 ok = io_file_readline(L, fp, (p[0] == 'l'));
210 else if (p[1] == 'a') 211 else if (p[0] == 'a')
211 io_file_readall(L, fp); 212 io_file_readall(L, fp);
212 else 213 else
213 lj_err_arg(L, n+1, LJ_ERR_INVFMT); 214 lj_err_arg(L, n+1, LJ_ERR_INVFMT);
@@ -230,19 +231,11 @@ static int io_file_write(lua_State *L, FILE *fp, int start)
230 cTValue *tv; 231 cTValue *tv;
231 int status = 1; 232 int status = 1;
232 for (tv = L->base+start; tv < L->top; tv++) { 233 for (tv = L->base+start; tv < L->top; tv++) {
233 if (tvisstr(tv)) { 234 MSize len;
234 MSize len = strV(tv)->len; 235 const char *p = lj_strfmt_wstrnum(L, tv, &len);
235 status = status && (fwrite(strVdata(tv), 1, len, fp) == len); 236 if (!p)
236 } else if (tvisint(tv)) {
237 char buf[LJ_STR_INTBUF];
238 char *p = lj_str_bufint(buf, intV(tv));
239 size_t len = (size_t)(buf+LJ_STR_INTBUF-p);
240 status = status && (fwrite(p, 1, len, fp) == len);
241 } else if (tvisnum(tv)) {
242 status = status && (fprintf(fp, LUA_NUMBER_FMT, numV(tv)) > 0);
243 } else {
244 lj_err_argt(L, (int)(tv - L->base) + 1, LUA_TSTRING); 237 lj_err_argt(L, (int)(tv - L->base) + 1, LUA_TSTRING);
245 } 238 status = status && (fwrite(p, 1, len, fp) == len);
246 } 239 }
247 if (LJ_52 && status) { 240 if (LJ_52 && status) {
248 L->top = L->base+1; 241 L->top = L->base+1;
@@ -413,7 +406,7 @@ LJLIB_CF(io_open)
413 406
414LJLIB_CF(io_popen) 407LJLIB_CF(io_popen)
415{ 408{
416#if LJ_TARGET_POSIX || LJ_TARGET_WINDOWS 409#if LJ_TARGET_POSIX || (LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE && !LJ_TARGET_UWP)
417 const char *fname = strdata(lj_lib_checkstr(L, 1)); 410 const char *fname = strdata(lj_lib_checkstr(L, 1));
418 GCstr *s = lj_lib_optstr(L, 2); 411 GCstr *s = lj_lib_optstr(L, 2);
419 const char *mode = s ? strdata(s) : "r"; 412 const char *mode = s ? strdata(s) : "r";
diff --git a/src/lib_jit.c b/src/lib_jit.c
index 6e98229e..c97b0d53 100644
--- a/src/lib_jit.c
+++ b/src/lib_jit.c
@@ -10,13 +10,17 @@
10#include "lauxlib.h" 10#include "lauxlib.h"
11#include "lualib.h" 11#include "lualib.h"
12 12
13#include "lj_arch.h"
14#include "lj_obj.h" 13#include "lj_obj.h"
14#include "lj_gc.h"
15#include "lj_err.h" 15#include "lj_err.h"
16#include "lj_debug.h" 16#include "lj_debug.h"
17#include "lj_str.h" 17#include "lj_str.h"
18#include "lj_tab.h" 18#include "lj_tab.h"
19#include "lj_state.h"
19#include "lj_bc.h" 20#include "lj_bc.h"
21#if LJ_HASFFI
22#include "lj_ctype.h"
23#endif
20#if LJ_HASJIT 24#if LJ_HASJIT
21#include "lj_ir.h" 25#include "lj_ir.h"
22#include "lj_jit.h" 26#include "lj_jit.h"
@@ -24,6 +28,7 @@
24#include "lj_iropt.h" 28#include "lj_iropt.h"
25#include "lj_target.h" 29#include "lj_target.h"
26#endif 30#endif
31#include "lj_trace.h"
27#include "lj_dispatch.h" 32#include "lj_dispatch.h"
28#include "lj_vm.h" 33#include "lj_vm.h"
29#include "lj_vmevent.h" 34#include "lj_vmevent.h"
@@ -280,7 +285,7 @@ static GCtrace *jit_checktrace(lua_State *L)
280/* Names of link types. ORDER LJ_TRLINK */ 285/* Names of link types. ORDER LJ_TRLINK */
281static const char *const jit_trlinkname[] = { 286static const char *const jit_trlinkname[] = {
282 "none", "root", "loop", "tail-recursion", "up-recursion", "down-recursion", 287 "none", "root", "loop", "tail-recursion", "up-recursion", "down-recursion",
283 "interpreter", "return" 288 "interpreter", "return", "stitch"
284}; 289};
285 290
286/* local info = jit.util.traceinfo(tr) */ 291/* local info = jit.util.traceinfo(tr) */
@@ -333,6 +338,13 @@ LJLIB_CF(jit_util_tracek)
333 slot = ir->op2; 338 slot = ir->op2;
334 ir = &T->ir[ir->op1]; 339 ir = &T->ir[ir->op1];
335 } 340 }
341#if LJ_HASFFI
342 if (ir->o == IR_KINT64 && !ctype_ctsG(G(L))) {
343 ptrdiff_t oldtop = savestack(L, L->top);
344 luaopen_ffi(L); /* Load FFI library on-demand. */
345 L->top = restorestack(L, oldtop);
346 }
347#endif
336 lj_ir_kvalue(L, L->top-2, ir); 348 lj_ir_kvalue(L, L->top-2, ir);
337 setintV(L->top-1, (int32_t)irt_type(ir->t)); 349 setintV(L->top-1, (int32_t)irt_type(ir->t));
338 if (slot == -1) 350 if (slot == -1)
@@ -417,6 +429,12 @@ LJLIB_CF(jit_util_ircalladdr)
417 429
418#include "lj_libdef.h" 430#include "lj_libdef.h"
419 431
432static int luaopen_jit_util(lua_State *L)
433{
434 LJ_LIB_REG(L, NULL, jit_util);
435 return 1;
436}
437
420/* -- jit.opt module ------------------------------------------------------ */ 438/* -- jit.opt module ------------------------------------------------------ */
421 439
422#if LJ_HASJIT 440#if LJ_HASJIT
@@ -514,6 +532,104 @@ LJLIB_CF(jit_opt_start)
514 532
515#endif 533#endif
516 534
535/* -- jit.profile module -------------------------------------------------- */
536
537#if LJ_HASPROFILE
538
539#define LJLIB_MODULE_jit_profile
540
541/* Not loaded by default, use: local profile = require("jit.profile") */
542
543static const char KEY_PROFILE_THREAD = 't';
544static const char KEY_PROFILE_FUNC = 'f';
545
546static void jit_profile_callback(lua_State *L2, lua_State *L, int samples,
547 int vmstate)
548{
549 TValue key;
550 cTValue *tv;
551 setlightudV(&key, (void *)&KEY_PROFILE_FUNC);
552 tv = lj_tab_get(L, tabV(registry(L)), &key);
553 if (tvisfunc(tv)) {
554 char vmst = (char)vmstate;
555 int status;
556 setfuncV(L2, L2->top++, funcV(tv));
557 setthreadV(L2, L2->top++, L);
558 setintV(L2->top++, samples);
559 setstrV(L2, L2->top++, lj_str_new(L2, &vmst, 1));
560 status = lua_pcall(L2, 3, 0, 0); /* callback(thread, samples, vmstate) */
561 if (status) {
562 if (G(L2)->panic) G(L2)->panic(L2);
563 exit(EXIT_FAILURE);
564 }
565 lj_trace_abort(G(L2));
566 }
567}
568
569/* profile.start(mode, cb) */
570LJLIB_CF(jit_profile_start)
571{
572 GCtab *registry = tabV(registry(L));
573 GCstr *mode = lj_lib_optstr(L, 1);
574 GCfunc *func = lj_lib_checkfunc(L, 2);
575 lua_State *L2 = lua_newthread(L); /* Thread that runs profiler callback. */
576 TValue key;
577 /* Anchor thread and function in registry. */
578 setlightudV(&key, (void *)&KEY_PROFILE_THREAD);
579 setthreadV(L, lj_tab_set(L, registry, &key), L2);
580 setlightudV(&key, (void *)&KEY_PROFILE_FUNC);
581 setfuncV(L, lj_tab_set(L, registry, &key), func);
582 lj_gc_anybarriert(L, registry);
583 luaJIT_profile_start(L, mode ? strdata(mode) : "",
584 (luaJIT_profile_callback)jit_profile_callback, L2);
585 return 0;
586}
587
588/* profile.stop() */
589LJLIB_CF(jit_profile_stop)
590{
591 GCtab *registry;
592 TValue key;
593 luaJIT_profile_stop(L);
594 registry = tabV(registry(L));
595 setlightudV(&key, (void *)&KEY_PROFILE_THREAD);
596 setnilV(lj_tab_set(L, registry, &key));
597 setlightudV(&key, (void *)&KEY_PROFILE_FUNC);
598 setnilV(lj_tab_set(L, registry, &key));
599 lj_gc_anybarriert(L, registry);
600 return 0;
601}
602
603/* dump = profile.dumpstack([thread,] fmt, depth) */
604LJLIB_CF(jit_profile_dumpstack)
605{
606 lua_State *L2 = L;
607 int arg = 0;
608 size_t len;
609 int depth;
610 GCstr *fmt;
611 const char *p;
612 if (L->top > L->base && tvisthread(L->base)) {
613 L2 = threadV(L->base);
614 arg = 1;
615 }
616 fmt = lj_lib_checkstr(L, arg+1);
617 depth = lj_lib_checkint(L, arg+2);
618 p = luaJIT_profile_dumpstack(L2, strdata(fmt), depth, &len);
619 lua_pushlstring(L, p, len);
620 return 1;
621}
622
623#include "lj_libdef.h"
624
625static int luaopen_jit_profile(lua_State *L)
626{
627 LJ_LIB_REG(L, NULL, jit_profile);
628 return 1;
629}
630
631#endif
632
517/* -- JIT compiler initialization ----------------------------------------- */ 633/* -- JIT compiler initialization ----------------------------------------- */
518 634
519#if LJ_HASJIT 635#if LJ_HASJIT
@@ -539,38 +655,31 @@ static uint32_t jit_cpudetect(lua_State *L)
539 uint32_t features[4]; 655 uint32_t features[4];
540 if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) { 656 if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) {
541#if !LJ_HASJIT 657#if !LJ_HASJIT
542#define JIT_F_CMOV 1
543#define JIT_F_SSE2 2 658#define JIT_F_SSE2 2
544#endif 659#endif
545 flags |= ((features[3] >> 15)&1) * JIT_F_CMOV;
546 flags |= ((features[3] >> 26)&1) * JIT_F_SSE2; 660 flags |= ((features[3] >> 26)&1) * JIT_F_SSE2;
547#if LJ_HASJIT 661#if LJ_HASJIT
548 flags |= ((features[2] >> 0)&1) * JIT_F_SSE3; 662 flags |= ((features[2] >> 0)&1) * JIT_F_SSE3;
549 flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1; 663 flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1;
550 if (vendor[2] == 0x6c65746e) { /* Intel. */ 664 if (vendor[2] == 0x6c65746e) { /* Intel. */
551 if ((features[0] & 0x0ff00f00) == 0x00000f00) /* P4. */ 665 if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */
552 flags |= JIT_F_P4; /* Currently unused. */
553 else if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */
554 flags |= JIT_F_LEA_AGU; 666 flags |= JIT_F_LEA_AGU;
555 } else if (vendor[2] == 0x444d4163) { /* AMD. */ 667 } else if (vendor[2] == 0x444d4163) { /* AMD. */
556 uint32_t fam = (features[0] & 0x0ff00f00); 668 uint32_t fam = (features[0] & 0x0ff00f00);
557 if (fam == 0x00000f00) /* K8. */
558 flags |= JIT_F_SPLIT_XMM;
559 if (fam >= 0x00000f00) /* K8, K10. */ 669 if (fam >= 0x00000f00) /* K8, K10. */
560 flags |= JIT_F_PREFER_IMUL; 670 flags |= JIT_F_PREFER_IMUL;
561 } 671 }
672 if (vendor[0] >= 7) {
673 uint32_t xfeatures[4];
674 lj_vm_cpuid(7, xfeatures);
675 flags |= ((xfeatures[1] >> 8)&1) * JIT_F_BMI2;
676 }
562#endif 677#endif
563 } 678 }
564 /* Check for required instruction set support on x86 (unnecessary on x64). */ 679 /* Check for required instruction set support on x86 (unnecessary on x64). */
565#if LJ_TARGET_X86 680#if LJ_TARGET_X86
566#if !defined(LUAJIT_CPU_NOCMOV)
567 if (!(flags & JIT_F_CMOV))
568 luaL_error(L, "CPU not supported");
569#endif
570#if defined(LUAJIT_CPU_SSE2)
571 if (!(flags & JIT_F_SSE2)) 681 if (!(flags & JIT_F_SSE2))
572 luaL_error(L, "CPU does not support SSE2 (recompile without -DLUAJIT_CPU_SSE2)"); 682 luaL_error(L, "CPU with SSE2 required");
573#endif
574#endif 683#endif
575#elif LJ_TARGET_ARM 684#elif LJ_TARGET_ARM
576#if LJ_HASJIT 685#if LJ_HASJIT
@@ -592,6 +701,8 @@ static uint32_t jit_cpudetect(lua_State *L)
592 ver >= 60 ? JIT_F_ARMV6_ : 0; 701 ver >= 60 ? JIT_F_ARMV6_ : 0;
593 flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2; 702 flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2;
594#endif 703#endif
704#elif LJ_TARGET_ARM64
705 /* No optional CPU features to detect (for now). */
595#elif LJ_TARGET_PPC 706#elif LJ_TARGET_PPC
596#if LJ_HASJIT 707#if LJ_HASJIT
597#if LJ_ARCH_SQRT 708#if LJ_ARCH_SQRT
@@ -601,21 +712,23 @@ static uint32_t jit_cpudetect(lua_State *L)
601 flags |= JIT_F_ROUND; 712 flags |= JIT_F_ROUND;
602#endif 713#endif
603#endif 714#endif
604#elif LJ_TARGET_PPCSPE
605 /* Nothing to do. */
606#elif LJ_TARGET_MIPS 715#elif LJ_TARGET_MIPS
607#if LJ_HASJIT 716#if LJ_HASJIT
608 /* Compile-time MIPS CPU detection. */ 717 /* Compile-time MIPS CPU detection. */
609#if LJ_ARCH_VERSION >= 20 718#if LJ_ARCH_VERSION >= 20
610 flags |= JIT_F_MIPS32R2; 719 flags |= JIT_F_MIPSXXR2;
611#endif 720#endif
612 /* Runtime MIPS CPU detection. */ 721 /* Runtime MIPS CPU detection. */
613#if defined(__GNUC__) 722#if defined(__GNUC__)
614 if (!(flags & JIT_F_MIPS32R2)) { 723 if (!(flags & JIT_F_MIPSXXR2)) {
615 int x; 724 int x;
725#ifdef __mips16
726 x = 0; /* Runtime detection is difficult. Ensure optimal -march flags. */
727#else
616 /* On MIPS32R1 rotr is treated as srl. rotr r2,r2,1 -> srl r2,r2,1. */ 728 /* On MIPS32R1 rotr is treated as srl. rotr r2,r2,1 -> srl r2,r2,1. */
617 __asm__("li $2, 1\n\t.long 0x00221042\n\tmove %0, $2" : "=r"(x) : : "$2"); 729 __asm__("li $2, 1\n\t.long 0x00221042\n\tmove %0, $2" : "=r"(x) : : "$2");
618 if (x) flags |= JIT_F_MIPS32R2; /* Either 0x80000000 (R2) or 0 (R1). */ 730#endif
731 if (x) flags |= JIT_F_MIPSXXR2; /* Either 0x80000000 (R2) or 0 (R1). */
619 } 732 }
620#endif 733#endif
621#endif 734#endif
@@ -632,11 +745,7 @@ static void jit_init(lua_State *L)
632 uint32_t flags = jit_cpudetect(L); 745 uint32_t flags = jit_cpudetect(L);
633#if LJ_HASJIT 746#if LJ_HASJIT
634 jit_State *J = L2J(L); 747 jit_State *J = L2J(L);
635#if LJ_TARGET_X86 748 J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT;
636 /* Silently turn off the JIT compiler on CPUs without SSE2. */
637 if ((flags & JIT_F_SSE2))
638#endif
639 J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT;
640 memcpy(J->param, jit_param_default, sizeof(J->param)); 749 memcpy(J->param, jit_param_default, sizeof(J->param));
641 lj_dispatch_update(G(L)); 750 lj_dispatch_update(G(L));
642#else 751#else
@@ -646,19 +755,23 @@ static void jit_init(lua_State *L)
646 755
647LUALIB_API int luaopen_jit(lua_State *L) 756LUALIB_API int luaopen_jit(lua_State *L)
648{ 757{
758 jit_init(L);
649 lua_pushliteral(L, LJ_OS_NAME); 759 lua_pushliteral(L, LJ_OS_NAME);
650 lua_pushliteral(L, LJ_ARCH_NAME); 760 lua_pushliteral(L, LJ_ARCH_NAME);
651 lua_pushinteger(L, LUAJIT_VERSION_NUM); 761 lua_pushinteger(L, LUAJIT_VERSION_NUM);
652 lua_pushliteral(L, LUAJIT_VERSION); 762 lua_pushliteral(L, LUAJIT_VERSION);
653 LJ_LIB_REG(L, LUA_JITLIBNAME, jit); 763 LJ_LIB_REG(L, LUA_JITLIBNAME, jit);
764#if LJ_HASPROFILE
765 lj_lib_prereg(L, LUA_JITLIBNAME ".profile", luaopen_jit_profile,
766 tabref(L->env));
767#endif
654#ifndef LUAJIT_DISABLE_JITUTIL 768#ifndef LUAJIT_DISABLE_JITUTIL
655 LJ_LIB_REG(L, "jit.util", jit_util); 769 lj_lib_prereg(L, LUA_JITLIBNAME ".util", luaopen_jit_util, tabref(L->env));
656#endif 770#endif
657#if LJ_HASJIT 771#if LJ_HASJIT
658 LJ_LIB_REG(L, "jit.opt", jit_opt); 772 LJ_LIB_REG(L, "jit.opt", jit_opt);
659#endif 773#endif
660 L->top -= 2; 774 L->top -= 2;
661 jit_init(L);
662 return 1; 775 return 1;
663} 776}
664 777
diff --git a/src/lib_math.c b/src/lib_math.c
index 9d324d7e..3fd466ca 100644
--- a/src/lib_math.c
+++ b/src/lib_math.c
@@ -47,12 +47,6 @@ LJLIB_ASM_(math_tanh) LJLIB_REC(math_htrig IRCALL_tanh)
47LJLIB_ASM_(math_frexp) 47LJLIB_ASM_(math_frexp)
48LJLIB_ASM_(math_modf) LJLIB_REC(.) 48LJLIB_ASM_(math_modf) LJLIB_REC(.)
49 49
50LJLIB_PUSH(57.29577951308232)
51LJLIB_ASM_(math_deg) LJLIB_REC(math_degrad)
52
53LJLIB_PUSH(0.017453292519943295)
54LJLIB_ASM_(math_rad) LJLIB_REC(math_degrad)
55
56LJLIB_ASM(math_log) LJLIB_REC(math_log) 50LJLIB_ASM(math_log) LJLIB_REC(math_log)
57{ 51{
58 double x = lj_lib_checknum(L, 1); 52 double x = lj_lib_checknum(L, 1);
@@ -63,12 +57,15 @@ LJLIB_ASM(math_log) LJLIB_REC(math_log)
63#else 57#else
64 x = lj_vm_log2(x); y = 1.0 / lj_vm_log2(y); 58 x = lj_vm_log2(x); y = 1.0 / lj_vm_log2(y);
65#endif 59#endif
66 setnumV(L->base-1, x*y); /* Do NOT join the expression to x / y. */ 60 setnumV(L->base-1-LJ_FR2, x*y); /* Do NOT join the expression to x / y. */
67 return FFH_RES(1); 61 return FFH_RES(1);
68 } 62 }
69 return FFH_RETRY; 63 return FFH_RETRY;
70} 64}
71 65
66LJLIB_LUA(math_deg) /* function(x) return x * 57.29577951308232 end */
67LJLIB_LUA(math_rad) /* function(x) return x * 0.017453292519943295 end */
68
72LJLIB_ASM(math_atan2) LJLIB_REC(.) 69LJLIB_ASM(math_atan2) LJLIB_REC(.)
73{ 70{
74 lj_lib_checknum(L, 1); 71 lj_lib_checknum(L, 1);
@@ -224,10 +221,6 @@ LUALIB_API int luaopen_math(lua_State *L)
224 rs = (RandomState *)lua_newuserdata(L, sizeof(RandomState)); 221 rs = (RandomState *)lua_newuserdata(L, sizeof(RandomState));
225 rs->valid = 0; /* Use lazy initialization to save some time on startup. */ 222 rs->valid = 0; /* Use lazy initialization to save some time on startup. */
226 LJ_LIB_REG(L, LUA_MATHLIBNAME, math); 223 LJ_LIB_REG(L, LUA_MATHLIBNAME, math);
227#if defined(LUA_COMPAT_MOD) && !LJ_52
228 lua_getfield(L, -1, "fmod");
229 lua_setfield(L, -2, "mod");
230#endif
231 return 1; 224 return 1;
232} 225}
233 226
diff --git a/src/lib_os.c b/src/lib_os.c
index a8e1708f..47893766 100644
--- a/src/lib_os.c
+++ b/src/lib_os.c
@@ -17,7 +17,10 @@
17#include "lualib.h" 17#include "lualib.h"
18 18
19#include "lj_obj.h" 19#include "lj_obj.h"
20#include "lj_gc.h"
20#include "lj_err.h" 21#include "lj_err.h"
22#include "lj_buf.h"
23#include "lj_str.h"
21#include "lj_lib.h" 24#include "lj_lib.h"
22 25
23#if LJ_TARGET_POSIX 26#if LJ_TARGET_POSIX
@@ -188,7 +191,7 @@ LJLIB_CF(os_date)
188#endif 191#endif
189 } 192 }
190 if (stm == NULL) { /* Invalid date? */ 193 if (stm == NULL) { /* Invalid date? */
191 setnilV(L->top-1); 194 setnilV(L->top++);
192 } else if (strcmp(s, "*t") == 0) { 195 } else if (strcmp(s, "*t") == 0) {
193 lua_createtable(L, 0, 9); /* 9 = number of fields */ 196 lua_createtable(L, 0, 9); /* 9 = number of fields */
194 setfield(L, "sec", stm->tm_sec); 197 setfield(L, "sec", stm->tm_sec);
@@ -200,23 +203,25 @@ LJLIB_CF(os_date)
200 setfield(L, "wday", stm->tm_wday+1); 203 setfield(L, "wday", stm->tm_wday+1);
201 setfield(L, "yday", stm->tm_yday+1); 204 setfield(L, "yday", stm->tm_yday+1);
202 setboolfield(L, "isdst", stm->tm_isdst); 205 setboolfield(L, "isdst", stm->tm_isdst);
203 } else { 206 } else if (*s) {
204 char cc[3]; 207 SBuf *sb = &G(L)->tmpbuf;
205 luaL_Buffer b; 208 MSize sz = 0, retry = 4;
206 cc[0] = '%'; cc[2] = '\0'; 209 const char *q;
207 luaL_buffinit(L, &b); 210 for (q = s; *q; q++)
208 for (; *s; s++) { 211 sz += (*q == '%') ? 30 : 1; /* Overflow doesn't matter. */
209 if (*s != '%' || *(s + 1) == '\0') { /* No conversion specifier? */ 212 setsbufL(sb, L);
210 luaL_addchar(&b, *s); 213 while (retry--) { /* Limit growth for invalid format or empty result. */
211 } else { 214 char *buf = lj_buf_need(sb, sz);
212 size_t reslen; 215 size_t len = strftime(buf, sbufsz(sb), s, stm);
213 char buff[200]; /* Should be big enough for any conversion result. */ 216 if (len) {
214 cc[1] = *(++s); 217 setstrV(L, L->top++, lj_str_new(L, buf, len));
215 reslen = strftime(buff, sizeof(buff), cc, stm); 218 lj_gc_check(L);
216 luaL_addlstring(&b, buff, reslen); 219 break;
217 } 220 }
221 sz += (sz|1);
218 } 222 }
219 luaL_pushresult(&b); 223 } else {
224 setstrV(L, L->top++, &G(L)->strempty);
220 } 225 }
221 return 1; 226 return 1;
222} 227}
diff --git a/src/lib_package.c b/src/lib_package.c
index a8bdcf17..5d8eb25d 100644
--- a/src/lib_package.c
+++ b/src/lib_package.c
@@ -76,6 +76,20 @@ static const char *ll_bcsym(void *lib, const char *sym)
76BOOL WINAPI GetModuleHandleExA(DWORD, LPCSTR, HMODULE*); 76BOOL WINAPI GetModuleHandleExA(DWORD, LPCSTR, HMODULE*);
77#endif 77#endif
78 78
79#if LJ_TARGET_UWP
80void *LJ_WIN_LOADLIBA(const char *path)
81{
82 DWORD err = GetLastError();
83 wchar_t wpath[256];
84 HANDLE lib = NULL;
85 if (MultiByteToWideChar(CP_ACP, 0, path, -1, wpath, 256) > 0) {
86 lib = LoadPackagedLibrary(wpath, 0);
87 }
88 SetLastError(err);
89 return lib;
90}
91#endif
92
79#undef setprogdir 93#undef setprogdir
80 94
81static void setprogdir(lua_State *L) 95static void setprogdir(lua_State *L)
@@ -96,9 +110,17 @@ static void setprogdir(lua_State *L)
96static void pusherror(lua_State *L) 110static void pusherror(lua_State *L)
97{ 111{
98 DWORD error = GetLastError(); 112 DWORD error = GetLastError();
113#if LJ_TARGET_XBOXONE
114 wchar_t wbuffer[128];
115 char buffer[128*2];
116 if (FormatMessageW(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM,
117 NULL, error, 0, wbuffer, sizeof(wbuffer)/sizeof(wchar_t), NULL) &&
118 WideCharToMultiByte(CP_ACP, 0, wbuffer, 128, buffer, 128*2, NULL, NULL))
119#else
99 char buffer[128]; 120 char buffer[128];
100 if (FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM, 121 if (FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM,
101 NULL, error, 0, buffer, sizeof(buffer), NULL)) 122 NULL, error, 0, buffer, sizeof(buffer), NULL))
123#endif
102 lua_pushstring(L, buffer); 124 lua_pushstring(L, buffer);
103 else 125 else
104 lua_pushfstring(L, "system error %d\n", error); 126 lua_pushfstring(L, "system error %d\n", error);
@@ -111,7 +133,7 @@ static void ll_unloadlib(void *lib)
111 133
112static void *ll_load(lua_State *L, const char *path, int gl) 134static void *ll_load(lua_State *L, const char *path, int gl)
113{ 135{
114 HINSTANCE lib = LoadLibraryA(path); 136 HINSTANCE lib = LJ_WIN_LOADLIBA(path);
115 if (lib == NULL) pusherror(L); 137 if (lib == NULL) pusherror(L);
116 UNUSED(gl); 138 UNUSED(gl);
117 return lib; 139 return lib;
@@ -124,17 +146,25 @@ static lua_CFunction ll_sym(lua_State *L, void *lib, const char *sym)
124 return f; 146 return f;
125} 147}
126 148
149#if LJ_TARGET_UWP
150EXTERN_C IMAGE_DOS_HEADER __ImageBase;
151#endif
152
127static const char *ll_bcsym(void *lib, const char *sym) 153static const char *ll_bcsym(void *lib, const char *sym)
128{ 154{
129 if (lib) { 155 if (lib) {
130 return (const char *)GetProcAddress((HINSTANCE)lib, sym); 156 return (const char *)GetProcAddress((HINSTANCE)lib, sym);
131 } else { 157 } else {
158#if LJ_TARGET_UWP
159 return (const char *)GetProcAddress((HINSTANCE)&__ImageBase, sym);
160#else
132 HINSTANCE h = GetModuleHandleA(NULL); 161 HINSTANCE h = GetModuleHandleA(NULL);
133 const char *p = (const char *)GetProcAddress(h, sym); 162 const char *p = (const char *)GetProcAddress(h, sym);
134 if (p == NULL && GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, 163 if (p == NULL && GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
135 (const char *)ll_bcsym, &h)) 164 (const char *)ll_bcsym, &h))
136 p = (const char *)GetProcAddress(h, sym); 165 p = (const char *)GetProcAddress(h, sym);
137 return p; 166 return p;
167#endif
138 } 168 }
139} 169}
140 170
@@ -185,8 +215,7 @@ static void **ll_register(lua_State *L, const char *path)
185 lua_pop(L, 1); 215 lua_pop(L, 1);
186 plib = (void **)lua_newuserdata(L, sizeof(void *)); 216 plib = (void **)lua_newuserdata(L, sizeof(void *));
187 *plib = NULL; 217 *plib = NULL;
188 luaL_getmetatable(L, "_LOADLIB"); 218 luaL_setmetatable(L, "_LOADLIB");
189 lua_setmetatable(L, -2);
190 lua_pushfstring(L, "LOADLIB: %s", path); 219 lua_pushfstring(L, "LOADLIB: %s", path);
191 lua_pushvalue(L, -2); 220 lua_pushvalue(L, -2);
192 lua_settable(L, LUA_REGISTRYINDEX); 221 lua_settable(L, LUA_REGISTRYINDEX);
@@ -391,8 +420,7 @@ static int lj_cf_package_loader_preload(lua_State *L)
391 420
392/* ------------------------------------------------------------------------ */ 421/* ------------------------------------------------------------------------ */
393 422
394static const int sentinel_ = 0; 423#define sentinel ((void *)0x4004)
395#define sentinel ((void *)&sentinel_)
396 424
397static int lj_cf_package_require(lua_State *L) 425static int lj_cf_package_require(lua_State *L)
398{ 426{
@@ -482,29 +510,19 @@ static void modinit(lua_State *L, const char *modname)
482static int lj_cf_package_module(lua_State *L) 510static int lj_cf_package_module(lua_State *L)
483{ 511{
484 const char *modname = luaL_checkstring(L, 1); 512 const char *modname = luaL_checkstring(L, 1);
485 int loaded = lua_gettop(L) + 1; /* index of _LOADED table */ 513 int lastarg = (int)(L->top - L->base);
486 lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED"); 514 luaL_pushmodule(L, modname, 1);
487 lua_getfield(L, loaded, modname); /* get _LOADED[modname] */
488 if (!lua_istable(L, -1)) { /* not found? */
489 lua_pop(L, 1); /* remove previous result */
490 /* try global variable (and create one if it does not exist) */
491 if (luaL_findtable(L, LUA_GLOBALSINDEX, modname, 1) != NULL)
492 lj_err_callerv(L, LJ_ERR_BADMODN, modname);
493 lua_pushvalue(L, -1);
494 lua_setfield(L, loaded, modname); /* _LOADED[modname] = new table */
495 }
496 /* check whether table already has a _NAME field */
497 lua_getfield(L, -1, "_NAME"); 515 lua_getfield(L, -1, "_NAME");
498 if (!lua_isnil(L, -1)) { /* is table an initialized module? */ 516 if (!lua_isnil(L, -1)) { /* Module already initialized? */
499 lua_pop(L, 1); 517 lua_pop(L, 1);
500 } else { /* no; initialize it */ 518 } else {
501 lua_pop(L, 1); 519 lua_pop(L, 1);
502 modinit(L, modname); 520 modinit(L, modname);
503 } 521 }
504 lua_pushvalue(L, -1); 522 lua_pushvalue(L, -1);
505 setfenv(L); 523 setfenv(L);
506 dooptions(L, loaded - 1); 524 dooptions(L, lastarg);
507 return 0; 525 return LJ_52;
508} 526}
509 527
510static int lj_cf_package_seeall(lua_State *L) 528static int lj_cf_package_seeall(lua_State *L)
@@ -575,13 +593,16 @@ LUALIB_API int luaopen_package(lua_State *L)
575 lj_lib_pushcf(L, lj_cf_package_unloadlib, 1); 593 lj_lib_pushcf(L, lj_cf_package_unloadlib, 1);
576 lua_setfield(L, -2, "__gc"); 594 lua_setfield(L, -2, "__gc");
577 luaL_register(L, LUA_LOADLIBNAME, package_lib); 595 luaL_register(L, LUA_LOADLIBNAME, package_lib);
578 lua_pushvalue(L, -1); 596 lua_copy(L, -1, LUA_ENVIRONINDEX);
579 lua_replace(L, LUA_ENVIRONINDEX);
580 lua_createtable(L, sizeof(package_loaders)/sizeof(package_loaders[0])-1, 0); 597 lua_createtable(L, sizeof(package_loaders)/sizeof(package_loaders[0])-1, 0);
581 for (i = 0; package_loaders[i] != NULL; i++) { 598 for (i = 0; package_loaders[i] != NULL; i++) {
582 lj_lib_pushcf(L, package_loaders[i], 1); 599 lj_lib_pushcf(L, package_loaders[i], 1);
583 lua_rawseti(L, -2, i+1); 600 lua_rawseti(L, -2, i+1);
584 } 601 }
602#if LJ_52
603 lua_pushvalue(L, -1);
604 lua_setfield(L, -3, "searchers");
605#endif
585 lua_setfield(L, -2, "loaders"); 606 lua_setfield(L, -2, "loaders");
586 lua_getfield(L, LUA_REGISTRYINDEX, "LUA_NOENV"); 607 lua_getfield(L, LUA_REGISTRYINDEX, "LUA_NOENV");
587 noenv = lua_toboolean(L, -1); 608 noenv = lua_toboolean(L, -1);
diff --git a/src/lib_string.c b/src/lib_string.c
index d0b79160..6b88ee9b 100644
--- a/src/lib_string.c
+++ b/src/lib_string.c
@@ -6,8 +6,6 @@
6** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h 6** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
7*/ 7*/
8 8
9#include <stdio.h>
10
11#define lib_string_c 9#define lib_string_c
12#define LUA_LIB 10#define LUA_LIB
13 11
@@ -18,6 +16,7 @@
18#include "lj_obj.h" 16#include "lj_obj.h"
19#include "lj_gc.h" 17#include "lj_gc.h"
20#include "lj_err.h" 18#include "lj_err.h"
19#include "lj_buf.h"
21#include "lj_str.h" 20#include "lj_str.h"
22#include "lj_tab.h" 21#include "lj_tab.h"
23#include "lj_meta.h" 22#include "lj_meta.h"
@@ -25,17 +24,19 @@
25#include "lj_ff.h" 24#include "lj_ff.h"
26#include "lj_bcdump.h" 25#include "lj_bcdump.h"
27#include "lj_char.h" 26#include "lj_char.h"
27#include "lj_strfmt.h"
28#include "lj_lib.h" 28#include "lj_lib.h"
29 29
30/* ------------------------------------------------------------------------ */ 30/* ------------------------------------------------------------------------ */
31 31
32#define LJLIB_MODULE_string 32#define LJLIB_MODULE_string
33 33
34LJLIB_ASM(string_len) LJLIB_REC(.) 34LJLIB_LUA(string_len) /*
35{ 35 function(s)
36 lj_lib_checkstr(L, 1); 36 CHECK_str(s)
37 return FFH_RETRY; 37 return #s
38} 38 end
39*/
39 40
40LJLIB_ASM(string_byte) LJLIB_REC(string_range 0) 41LJLIB_ASM(string_byte) LJLIB_REC(string_range 0)
41{ 42{
@@ -57,21 +58,21 @@ LJLIB_ASM(string_byte) LJLIB_REC(string_range 0)
57 lj_state_checkstack(L, (MSize)n); 58 lj_state_checkstack(L, (MSize)n);
58 p = (const unsigned char *)strdata(s) + start; 59 p = (const unsigned char *)strdata(s) + start;
59 for (i = 0; i < n; i++) 60 for (i = 0; i < n; i++)
60 setintV(L->base + i-1, p[i]); 61 setintV(L->base + i-1-LJ_FR2, p[i]);
61 return FFH_RES(n); 62 return FFH_RES(n);
62} 63}
63 64
64LJLIB_ASM(string_char) 65LJLIB_ASM(string_char) LJLIB_REC(.)
65{ 66{
66 int i, nargs = (int)(L->top - L->base); 67 int i, nargs = (int)(L->top - L->base);
67 char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, (MSize)nargs); 68 char *buf = lj_buf_tmp(L, (MSize)nargs);
68 for (i = 1; i <= nargs; i++) { 69 for (i = 1; i <= nargs; i++) {
69 int32_t k = lj_lib_checkint(L, i); 70 int32_t k = lj_lib_checkint(L, i);
70 if (!checku8(k)) 71 if (!checku8(k))
71 lj_err_arg(L, i, LJ_ERR_BADVAL); 72 lj_err_arg(L, i, LJ_ERR_BADVAL);
72 buf[i-1] = (char)k; 73 buf[i-1] = (char)k;
73 } 74 }
74 setstrV(L, L->base-1, lj_str_new(L, buf, (size_t)nargs)); 75 setstrV(L, L->base-1-LJ_FR2, lj_str_new(L, buf, (size_t)nargs));
75 return FFH_RES(1); 76 return FFH_RES(1);
76} 77}
77 78
@@ -83,68 +84,38 @@ LJLIB_ASM(string_sub) LJLIB_REC(string_range 1)
83 return FFH_RETRY; 84 return FFH_RETRY;
84} 85}
85 86
86LJLIB_ASM(string_rep) 87LJLIB_CF(string_rep) LJLIB_REC(.)
87{ 88{
88 GCstr *s = lj_lib_checkstr(L, 1); 89 GCstr *s = lj_lib_checkstr(L, 1);
89 int32_t k = lj_lib_checkint(L, 2); 90 int32_t rep = lj_lib_checkint(L, 2);
90 GCstr *sep = lj_lib_optstr(L, 3); 91 GCstr *sep = lj_lib_optstr(L, 3);
91 int32_t len = (int32_t)s->len; 92 SBuf *sb = lj_buf_tmp_(L);
92 global_State *g = G(L); 93 if (sep && rep > 1) {
93 int64_t tlen; 94 GCstr *s2 = lj_buf_cat2str(L, sep, s);
94 const char *src; 95 lj_buf_reset(sb);
95 char *buf; 96 lj_buf_putstr(sb, s);
96 if (k <= 0) { 97 s = s2;
97 empty: 98 rep--;
98 setstrV(L, L->base-1, &g->strempty);
99 return FFH_RES(1);
100 }
101 if (sep) {
102 tlen = (int64_t)len + sep->len;
103 if (tlen > LJ_MAX_STR)
104 lj_err_caller(L, LJ_ERR_STROV);
105 tlen *= k;
106 if (tlen > LJ_MAX_STR)
107 lj_err_caller(L, LJ_ERR_STROV);
108 } else {
109 tlen = (int64_t)k * len;
110 if (tlen > LJ_MAX_STR)
111 lj_err_caller(L, LJ_ERR_STROV);
112 }
113 if (tlen == 0) goto empty;
114 buf = lj_str_needbuf(L, &g->tmpbuf, (MSize)tlen);
115 src = strdata(s);
116 if (sep) {
117 tlen -= sep->len; /* Ignore trailing separator. */
118 if (k > 1) { /* Paste one string and one separator. */
119 int32_t i;
120 i = 0; while (i < len) *buf++ = src[i++];
121 src = strdata(sep); len = sep->len;
122 i = 0; while (i < len) *buf++ = src[i++];
123 src = g->tmpbuf.buf; len += s->len; k--; /* Now copy that k-1 times. */
124 }
125 } 99 }
126 do { 100 sb = lj_buf_putstr_rep(sb, s, rep);
127 int32_t i = 0; 101 setstrV(L, L->top-1, lj_buf_str(L, sb));
128 do { *buf++ = src[i++]; } while (i < len); 102 lj_gc_check(L);
129 } while (--k > 0); 103 return 1;
130 setstrV(L, L->base-1, lj_str_new(L, g->tmpbuf.buf, (size_t)tlen));
131 return FFH_RES(1);
132} 104}
133 105
134LJLIB_ASM(string_reverse) 106LJLIB_ASM(string_reverse) LJLIB_REC(string_op IRCALL_lj_buf_putstr_reverse)
135{ 107{
136 GCstr *s = lj_lib_checkstr(L, 1); 108 lj_lib_checkstr(L, 1);
137 lj_str_needbuf(L, &G(L)->tmpbuf, s->len);
138 return FFH_RETRY; 109 return FFH_RETRY;
139} 110}
140LJLIB_ASM_(string_lower) 111LJLIB_ASM_(string_lower) LJLIB_REC(string_op IRCALL_lj_buf_putstr_lower)
141LJLIB_ASM_(string_upper) 112LJLIB_ASM_(string_upper) LJLIB_REC(string_op IRCALL_lj_buf_putstr_upper)
142 113
143/* ------------------------------------------------------------------------ */ 114/* ------------------------------------------------------------------------ */
144 115
145static int writer_buf(lua_State *L, const void *p, size_t size, void *b) 116static int writer_buf(lua_State *L, const void *p, size_t size, void *sb)
146{ 117{
147 luaL_addlstring((luaL_Buffer *)b, (const char *)p, size); 118 lj_buf_putmem((SBuf *)sb, p, (MSize)size);
148 UNUSED(L); 119 UNUSED(L);
149 return 0; 120 return 0;
150} 121}
@@ -153,12 +124,12 @@ LJLIB_CF(string_dump)
153{ 124{
154 GCfunc *fn = lj_lib_checkfunc(L, 1); 125 GCfunc *fn = lj_lib_checkfunc(L, 1);
155 int strip = L->base+1 < L->top && tvistruecond(L->base+1); 126 int strip = L->base+1 < L->top && tvistruecond(L->base+1);
156 luaL_Buffer b; 127 SBuf *sb = lj_buf_tmp_(L); /* Assumes lj_bcwrite() doesn't use tmpbuf. */
157 L->top = L->base+1; 128 L->top = L->base+1;
158 luaL_buffinit(L, &b); 129 if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, sb, strip))
159 if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, &b, strip))
160 lj_err_caller(L, LJ_ERR_STRDUMP); 130 lj_err_caller(L, LJ_ERR_STRDUMP);
161 luaL_pushresult(&b); 131 setstrV(L, L->top-1, lj_buf_str(L, sb));
132 lj_gc_check(L);
162 return 1; 133 return 1;
163} 134}
164 135
@@ -183,7 +154,6 @@ typedef struct MatchState {
183} MatchState; 154} MatchState;
184 155
185#define L_ESC '%' 156#define L_ESC '%'
186#define SPECIALS "^$*+?.([%-"
187 157
188static int check_capture(MatchState *ms, int l) 158static int check_capture(MatchState *ms, int l)
189{ 159{
@@ -450,30 +420,6 @@ static const char *match(MatchState *ms, const char *s, const char *p)
450 return s; 420 return s;
451} 421}
452 422
453static const char *lmemfind(const char *s1, size_t l1,
454 const char *s2, size_t l2)
455{
456 if (l2 == 0) {
457 return s1; /* empty strings are everywhere */
458 } else if (l2 > l1) {
459 return NULL; /* avoids a negative `l1' */
460 } else {
461 const char *init; /* to search for a `*s2' inside `s1' */
462 l2--; /* 1st char will be checked by `memchr' */
463 l1 = l1-l2; /* `s2' cannot be found after that */
464 while (l1 > 0 && (init = (const char *)memchr(s1, *s2, l1)) != NULL) {
465 init++; /* 1st char is already checked */
466 if (memcmp(init, s2+1, l2) == 0) {
467 return init-1;
468 } else { /* correct `l1' and `s1' to try again */
469 l1 -= (size_t)(init-s1);
470 s1 = init;
471 }
472 }
473 return NULL; /* not found */
474 }
475}
476
477static void push_onecapture(MatchState *ms, int i, const char *s, const char *e) 423static void push_onecapture(MatchState *ms, int i, const char *s, const char *e)
478{ 424{
479 if (i >= ms->level) { 425 if (i >= ms->level) {
@@ -501,64 +447,60 @@ static int push_captures(MatchState *ms, const char *s, const char *e)
501 return nlevels; /* number of strings pushed */ 447 return nlevels; /* number of strings pushed */
502} 448}
503 449
504static ptrdiff_t posrelat(ptrdiff_t pos, size_t len)
505{
506 /* relative string position: negative means back from end */
507 if (pos < 0) pos += (ptrdiff_t)len + 1;
508 return (pos >= 0) ? pos : 0;
509}
510
511static int str_find_aux(lua_State *L, int find) 450static int str_find_aux(lua_State *L, int find)
512{ 451{
513 size_t l1, l2; 452 GCstr *s = lj_lib_checkstr(L, 1);
514 const char *s = luaL_checklstring(L, 1, &l1); 453 GCstr *p = lj_lib_checkstr(L, 2);
515 const char *p = luaL_checklstring(L, 2, &l2); 454 int32_t start = lj_lib_optint(L, 3, 1);
516 ptrdiff_t init = posrelat(luaL_optinteger(L, 3, 1), l1) - 1; 455 MSize st;
517 if (init < 0) { 456 if (start < 0) start += (int32_t)s->len; else start--;
518 init = 0; 457 if (start < 0) start = 0;
519 } else if ((size_t)(init) > l1) { 458 st = (MSize)start;
459 if (st > s->len) {
520#if LJ_52 460#if LJ_52
521 setnilV(L->top-1); 461 setnilV(L->top-1);
522 return 1; 462 return 1;
523#else 463#else
524 init = (ptrdiff_t)l1; 464 st = s->len;
525#endif 465#endif
526 } 466 }
527 if (find && (lua_toboolean(L, 4) || /* explicit request? */ 467 if (find && ((L->base+3 < L->top && tvistruecond(L->base+3)) ||
528 strpbrk(p, SPECIALS) == NULL)) { /* or no special characters? */ 468 !lj_str_haspattern(p))) { /* Search for fixed string. */
529 /* do a plain search */ 469 const char *q = lj_str_find(strdata(s)+st, strdata(p), s->len-st, p->len);
530 const char *s2 = lmemfind(s+init, l1-(size_t)init, p, l2); 470 if (q) {
531 if (s2) { 471 setintV(L->top-2, (int32_t)(q-strdata(s)) + 1);
532 lua_pushinteger(L, s2-s+1); 472 setintV(L->top-1, (int32_t)(q-strdata(s)) + (int32_t)p->len);
533 lua_pushinteger(L, s2-s+(ptrdiff_t)l2);
534 return 2; 473 return 2;
535 } 474 }
536 } else { 475 } else { /* Search for pattern. */
537 MatchState ms; 476 MatchState ms;
538 int anchor = (*p == '^') ? (p++, 1) : 0; 477 const char *pstr = strdata(p);
539 const char *s1=s+init; 478 const char *sstr = strdata(s) + st;
479 int anchor = 0;
480 if (*pstr == '^') { pstr++; anchor = 1; }
540 ms.L = L; 481 ms.L = L;
541 ms.src_init = s; 482 ms.src_init = strdata(s);
542 ms.src_end = s+l1; 483 ms.src_end = strdata(s) + s->len;
543 do { 484 do { /* Loop through string and try to match the pattern. */
544 const char *res; 485 const char *q;
545 ms.level = ms.depth = 0; 486 ms.level = ms.depth = 0;
546 if ((res=match(&ms, s1, p)) != NULL) { 487 q = match(&ms, sstr, pstr);
488 if (q) {
547 if (find) { 489 if (find) {
548 lua_pushinteger(L, s1-s+1); /* start */ 490 setintV(L->top++, (int32_t)(sstr-(strdata(s)-1)));
549 lua_pushinteger(L, res-s); /* end */ 491 setintV(L->top++, (int32_t)(q-strdata(s)));
550 return push_captures(&ms, NULL, 0) + 2; 492 return push_captures(&ms, NULL, NULL) + 2;
551 } else { 493 } else {
552 return push_captures(&ms, s1, res); 494 return push_captures(&ms, sstr, q);
553 } 495 }
554 } 496 }
555 } while (s1++ < ms.src_end && !anchor); 497 } while (sstr++ < ms.src_end && !anchor);
556 } 498 }
557 lua_pushnil(L); /* not found */ 499 setnilV(L->top-1); /* Not found. */
558 return 1; 500 return 1;
559} 501}
560 502
561LJLIB_CF(string_find) 503LJLIB_CF(string_find) LJLIB_REC(.)
562{ 504{
563 return str_find_aux(L, 1); 505 return str_find_aux(L, 1);
564} 506}
@@ -698,222 +640,91 @@ LJLIB_CF(string_gsub)
698 640
699/* ------------------------------------------------------------------------ */ 641/* ------------------------------------------------------------------------ */
700 642
701/* maximum size of each formatted item (> len(format('%99.99f', -1e308))) */ 643/* Emulate tostring() inline. */
702#define MAX_FMTITEM 512 644static GCstr *string_fmt_tostring(lua_State *L, int arg, int retry)
703/* valid flags in a format specification */
704#define FMT_FLAGS "-+ #0"
705/*
706** maximum size of each format specification (such as '%-099.99d')
707** (+10 accounts for %99.99x plus margin of error)
708*/
709#define MAX_FMTSPEC (sizeof(FMT_FLAGS) + sizeof(LUA_INTFRMLEN) + 10)
710
711static void addquoted(lua_State *L, luaL_Buffer *b, int arg)
712{
713 GCstr *str = lj_lib_checkstr(L, arg);
714 int32_t len = (int32_t)str->len;
715 const char *s = strdata(str);
716 luaL_addchar(b, '"');
717 while (len--) {
718 uint32_t c = uchar(*s);
719 if (c == '"' || c == '\\' || c == '\n') {
720 luaL_addchar(b, '\\');
721 } else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */
722 uint32_t d;
723 luaL_addchar(b, '\\');
724 if (c >= 100 || lj_char_isdigit(uchar(s[1]))) {
725 luaL_addchar(b, '0'+(c >= 100)); if (c >= 100) c -= 100;
726 goto tens;
727 } else if (c >= 10) {
728 tens:
729 d = (c * 205) >> 11; c -= d * 10; luaL_addchar(b, '0'+d);
730 }
731 c += '0';
732 }
733 luaL_addchar(b, c);
734 s++;
735 }
736 luaL_addchar(b, '"');
737}
738
739static const char *scanformat(lua_State *L, const char *strfrmt, char *form)
740{
741 const char *p = strfrmt;
742 while (*p != '\0' && strchr(FMT_FLAGS, *p) != NULL) p++; /* skip flags */
743 if ((size_t)(p - strfrmt) >= sizeof(FMT_FLAGS))
744 lj_err_caller(L, LJ_ERR_STRFMTR);
745 if (lj_char_isdigit(uchar(*p))) p++; /* skip width */
746 if (lj_char_isdigit(uchar(*p))) p++; /* (2 digits at most) */
747 if (*p == '.') {
748 p++;
749 if (lj_char_isdigit(uchar(*p))) p++; /* skip precision */
750 if (lj_char_isdigit(uchar(*p))) p++; /* (2 digits at most) */
751 }
752 if (lj_char_isdigit(uchar(*p)))
753 lj_err_caller(L, LJ_ERR_STRFMTW);
754 *(form++) = '%';
755 strncpy(form, strfrmt, (size_t)(p - strfrmt + 1));
756 form += p - strfrmt + 1;
757 *form = '\0';
758 return p;
759}
760
761static void addintlen(char *form)
762{
763 size_t l = strlen(form);
764 char spec = form[l - 1];
765 strcpy(form + l - 1, LUA_INTFRMLEN);
766 form[l + sizeof(LUA_INTFRMLEN) - 2] = spec;
767 form[l + sizeof(LUA_INTFRMLEN) - 1] = '\0';
768}
769
770static unsigned LUA_INTFRM_T num2intfrm(lua_State *L, int arg)
771{
772 if (sizeof(LUA_INTFRM_T) == 4) {
773 return (LUA_INTFRM_T)lj_lib_checkbit(L, arg);
774 } else {
775 cTValue *o;
776 lj_lib_checknumber(L, arg);
777 o = L->base+arg-1;
778 if (tvisint(o))
779 return (LUA_INTFRM_T)intV(o);
780 else
781 return (LUA_INTFRM_T)numV(o);
782 }
783}
784
785static unsigned LUA_INTFRM_T num2uintfrm(lua_State *L, int arg)
786{
787 if (sizeof(LUA_INTFRM_T) == 4) {
788 return (unsigned LUA_INTFRM_T)lj_lib_checkbit(L, arg);
789 } else {
790 cTValue *o;
791 lj_lib_checknumber(L, arg);
792 o = L->base+arg-1;
793 if (tvisint(o))
794 return (unsigned LUA_INTFRM_T)intV(o);
795 else if ((int32_t)o->u32.hi < 0)
796 return (unsigned LUA_INTFRM_T)(LUA_INTFRM_T)numV(o);
797 else
798 return (unsigned LUA_INTFRM_T)numV(o);
799 }
800}
801
802static GCstr *meta_tostring(lua_State *L, int arg)
803{ 645{
804 TValue *o = L->base+arg-1; 646 TValue *o = L->base+arg-1;
805 cTValue *mo; 647 cTValue *mo;
806 lua_assert(o < L->top); /* Caller already checks for existence. */ 648 lua_assert(o < L->top); /* Caller already checks for existence. */
807 if (LJ_LIKELY(tvisstr(o))) 649 if (LJ_LIKELY(tvisstr(o)))
808 return strV(o); 650 return strV(o);
809 if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { 651 if (retry != 2 && !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
810 copyTV(L, L->top++, mo); 652 copyTV(L, L->top++, mo);
811 copyTV(L, L->top++, o); 653 copyTV(L, L->top++, o);
812 lua_call(L, 1, 1); 654 lua_call(L, 1, 1);
813 L->top--; 655 copyTV(L, L->base+arg-1, --L->top);
814 if (tvisstr(L->top)) 656 return NULL; /* Buffer may be overwritten, retry. */
815 return strV(L->top);
816 o = L->base+arg-1;
817 copyTV(L, o, L->top);
818 }
819 if (tvisnumber(o)) {
820 return lj_str_fromnumber(L, o);
821 } else if (tvisnil(o)) {
822 return lj_str_newlit(L, "nil");
823 } else if (tvisfalse(o)) {
824 return lj_str_newlit(L, "false");
825 } else if (tvistrue(o)) {
826 return lj_str_newlit(L, "true");
827 } else {
828 if (tvisfunc(o) && isffunc(funcV(o)))
829 lj_str_pushf(L, "function: builtin#%d", funcV(o)->c.ffid);
830 else
831 lj_str_pushf(L, "%s: %p", lj_typename(o), lua_topointer(L, arg));
832 L->top--;
833 return strV(L->top);
834 } 657 }
835} 658 return lj_strfmt_obj(L, o);
836 659}
837LJLIB_CF(string_format) 660
838{ 661LJLIB_CF(string_format) LJLIB_REC(.)
839 int arg = 1, top = (int)(L->top - L->base); 662{
840 GCstr *fmt = lj_lib_checkstr(L, arg); 663 int arg, top = (int)(L->top - L->base);
841 const char *strfrmt = strdata(fmt); 664 GCstr *fmt;
842 const char *strfrmt_end = strfrmt + fmt->len; 665 SBuf *sb;
843 luaL_Buffer b; 666 FormatState fs;
844 luaL_buffinit(L, &b); 667 SFormat sf;
845 while (strfrmt < strfrmt_end) { 668 int retry = 0;
846 if (*strfrmt != L_ESC) { 669again:
847 luaL_addchar(&b, *strfrmt++); 670 arg = 1;
848 } else if (*++strfrmt == L_ESC) { 671 sb = lj_buf_tmp_(L);
849 luaL_addchar(&b, *strfrmt++); /* %% */ 672 fmt = lj_lib_checkstr(L, arg);
850 } else { /* format item */ 673 lj_strfmt_init(&fs, strdata(fmt), fmt->len);
851 char form[MAX_FMTSPEC]; /* to store the format (`%...') */ 674 while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) {
852 char buff[MAX_FMTITEM]; /* to store the formatted item */ 675 if (sf == STRFMT_LIT) {
853 int n = 0; 676 lj_buf_putmem(sb, fs.str, fs.len);
677 } else if (sf == STRFMT_ERR) {
678 lj_err_callerv(L, LJ_ERR_STRFMT, strdata(lj_str_new(L, fs.str, fs.len)));
679 } else {
854 if (++arg > top) 680 if (++arg > top)
855 luaL_argerror(L, arg, lj_obj_typename[0]); 681 luaL_argerror(L, arg, lj_obj_typename[0]);
856 strfrmt = scanformat(L, strfrmt, form); 682 switch (STRFMT_TYPE(sf)) {
857 switch (*strfrmt++) { 683 case STRFMT_INT:
858 case 'c': 684 if (tvisint(L->base+arg-1)) {
859 n = sprintf(buff, form, lj_lib_checkint(L, arg)); 685 int32_t k = intV(L->base+arg-1);
686 if (sf == STRFMT_INT)
687 lj_strfmt_putint(sb, k); /* Shortcut for plain %d. */
688 else
689 lj_strfmt_putfxint(sb, sf, k);
690 } else {
691 lj_strfmt_putfnum_int(sb, sf, lj_lib_checknum(L, arg));
692 }
860 break; 693 break;
861 case 'd': case 'i': 694 case STRFMT_UINT:
862 addintlen(form); 695 if (tvisint(L->base+arg-1))
863 n = sprintf(buff, form, num2intfrm(L, arg)); 696 lj_strfmt_putfxint(sb, sf, intV(L->base+arg-1));
697 else
698 lj_strfmt_putfnum_uint(sb, sf, lj_lib_checknum(L, arg));
864 break; 699 break;
865 case 'o': case 'u': case 'x': case 'X': 700 case STRFMT_NUM:
866 addintlen(form); 701 lj_strfmt_putfnum(sb, sf, lj_lib_checknum(L, arg));
867 n = sprintf(buff, form, num2uintfrm(L, arg));
868 break; 702 break;
869 case 'e': case 'E': case 'f': case 'g': case 'G': case 'a': case 'A': { 703 case STRFMT_STR: {
870 TValue tv; 704 GCstr *str = string_fmt_tostring(L, arg, retry);
871 tv.n = lj_lib_checknum(L, arg); 705 if (str == NULL)
872 if (LJ_UNLIKELY((tv.u32.hi << 1) >= 0xffe00000)) { 706 retry = 1;
873 /* Canonicalize output of non-finite values. */ 707 else if ((sf & STRFMT_T_QUOTED))
874 char *p, nbuf[LJ_STR_NUMBUF]; 708 lj_strfmt_putquoted(sb, str); /* No formatting. */
875 size_t len = lj_str_bufnum(nbuf, &tv); 709 else
876 if (strfrmt[-1] < 'a') { 710 lj_strfmt_putfstr(sb, sf, str);
877 nbuf[len-3] = nbuf[len-3] - 0x20;
878 nbuf[len-2] = nbuf[len-2] - 0x20;
879 nbuf[len-1] = nbuf[len-1] - 0x20;
880 }
881 nbuf[len] = '\0';
882 for (p = form; *p < 'A' && *p != '.'; p++) ;
883 *p++ = 's'; *p = '\0';
884 n = sprintf(buff, form, nbuf);
885 break;
886 }
887 n = sprintf(buff, form, (double)tv.n);
888 break; 711 break;
889 } 712 }
890 case 'q': 713 case STRFMT_CHAR:
891 addquoted(L, &b, arg); 714 lj_strfmt_putfchar(sb, sf, lj_lib_checkint(L, arg));
892 continue; 715 break;
893 case 'p': 716 case STRFMT_PTR: /* No formatting. */
894 lj_str_pushf(L, "%p", lua_topointer(L, arg)); 717 lj_strfmt_putptr(sb, lj_obj_ptr(L->base+arg-1));
895 luaL_addvalue(&b);
896 continue;
897 case 's': {
898 GCstr *str = meta_tostring(L, arg);
899 if (!strchr(form, '.') && str->len >= 100) {
900 /* no precision and string is too long to be formatted;
901 keep original string */
902 setstrV(L, L->top++, str);
903 luaL_addvalue(&b);
904 continue;
905 }
906 n = sprintf(buff, form, strdata(str));
907 break; 718 break;
908 }
909 default: 719 default:
910 lj_err_callerv(L, LJ_ERR_STRFMTO, *(strfrmt -1)); 720 lua_assert(0);
911 break; 721 break;
912 } 722 }
913 luaL_addlstring(&b, buff, n);
914 } 723 }
915 } 724 }
916 luaL_pushresult(&b); 725 if (retry++ == 1) goto again;
726 setstrV(L, L->top-1, lj_buf_str(L, sb));
727 lj_gc_check(L);
917 return 1; 728 return 1;
918} 729}
919 730
@@ -926,10 +737,6 @@ LUALIB_API int luaopen_string(lua_State *L)
926 GCtab *mt; 737 GCtab *mt;
927 global_State *g; 738 global_State *g;
928 LJ_LIB_REG(L, LUA_STRLIBNAME, string); 739 LJ_LIB_REG(L, LUA_STRLIBNAME, string);
929#if defined(LUA_COMPAT_GFIND) && !LJ_52
930 lua_getfield(L, -1, "gmatch");
931 lua_setfield(L, -2, "gfind");
932#endif
933 mt = lj_tab_new(L, 0, 1); 740 mt = lj_tab_new(L, 0, 1);
934 /* NOBARRIER: basemt is a GC root. */ 741 /* NOBARRIER: basemt is a GC root. */
935 g = G(L); 742 g = G(L);
diff --git a/src/lib_table.c b/src/lib_table.c
index 9842513b..4e612146 100644
--- a/src/lib_table.c
+++ b/src/lib_table.c
@@ -16,57 +16,43 @@
16#include "lj_obj.h" 16#include "lj_obj.h"
17#include "lj_gc.h" 17#include "lj_gc.h"
18#include "lj_err.h" 18#include "lj_err.h"
19#include "lj_buf.h"
19#include "lj_tab.h" 20#include "lj_tab.h"
21#include "lj_ff.h"
20#include "lj_lib.h" 22#include "lj_lib.h"
21 23
22/* ------------------------------------------------------------------------ */ 24/* ------------------------------------------------------------------------ */
23 25
24#define LJLIB_MODULE_table 26#define LJLIB_MODULE_table
25 27
26LJLIB_CF(table_foreachi) 28LJLIB_LUA(table_foreachi) /*
27{ 29 function(t, f)
28 GCtab *t = lj_lib_checktab(L, 1); 30 CHECK_tab(t)
29 GCfunc *func = lj_lib_checkfunc(L, 2); 31 CHECK_func(f)
30 MSize i, n = lj_tab_len(t); 32 for i=1,#t do
31 for (i = 1; i <= n; i++) { 33 local r = f(i, t[i])
32 cTValue *val; 34 if r ~= nil then return r end
33 setfuncV(L, L->top, func); 35 end
34 setintV(L->top+1, i); 36 end
35 val = lj_tab_getint(t, (int32_t)i); 37*/
36 if (val) { copyTV(L, L->top+2, val); } else { setnilV(L->top+2); }
37 L->top += 3;
38 lua_call(L, 2, 1);
39 if (!tvisnil(L->top-1))
40 return 1;
41 L->top--;
42 }
43 return 0;
44}
45 38
46LJLIB_CF(table_foreach) 39LJLIB_LUA(table_foreach) /*
47{ 40 function(t, f)
48 GCtab *t = lj_lib_checktab(L, 1); 41 CHECK_tab(t)
49 GCfunc *func = lj_lib_checkfunc(L, 2); 42 CHECK_func(f)
50 L->top = L->base+3; 43 for k, v in PAIRS(t) do
51 setnilV(L->top-1); 44 local r = f(k, v)
52 while (lj_tab_next(L, t, L->top-1)) { 45 if r ~= nil then return r end
53 copyTV(L, L->top+2, L->top); 46 end
54 copyTV(L, L->top+1, L->top-1); 47 end
55 setfuncV(L, L->top, func); 48*/
56 L->top += 3;
57 lua_call(L, 2, 1);
58 if (!tvisnil(L->top-1))
59 return 1;
60 L->top--;
61 }
62 return 0;
63}
64 49
65LJLIB_ASM(table_getn) LJLIB_REC(.) 50LJLIB_LUA(table_getn) /*
66{ 51 function(t)
67 lj_lib_checktab(L, 1); 52 CHECK_tab(t)
68 return FFH_UNREACHABLE; 53 return #t
69} 54 end
55*/
70 56
71LJLIB_CF(table_maxn) 57LJLIB_CF(table_maxn)
72{ 58{
@@ -119,52 +105,67 @@ LJLIB_CF(table_insert) LJLIB_REC(.)
119 return 0; 105 return 0;
120} 106}
121 107
122LJLIB_CF(table_remove) LJLIB_REC(.) 108LJLIB_LUA(table_remove) /*
123{ 109 function(t, pos)
124 GCtab *t = lj_lib_checktab(L, 1); 110 CHECK_tab(t)
125 int32_t e = (int32_t)lj_tab_len(t); 111 local len = #t
126 int32_t pos = lj_lib_optint(L, 2, e); 112 if pos == nil then
127 if (!(1 <= pos && pos <= e)) /* Nothing to remove? */ 113 if len ~= 0 then
128 return 0; 114 local old = t[len]
129 lua_rawgeti(L, 1, pos); /* Get previous value. */ 115 t[len] = nil
130 /* NOBARRIER: This just moves existing elements around. */ 116 return old
131 for (; pos < e; pos++) { 117 end
132 cTValue *src = lj_tab_getint(t, pos+1); 118 else
133 TValue *dst = lj_tab_setint(L, t, pos); 119 CHECK_int(pos)
134 if (src) { 120 if pos >= 1 and pos <= len then
135 copyTV(L, dst, src); 121 local old = t[pos]
136 } else { 122 for i=pos+1,len do
137 setnilV(dst); 123 t[i-1] = t[i]
138 } 124 end
139 } 125 t[len] = nil
140 setnilV(lj_tab_setint(L, t, e)); /* Remove (last) value. */ 126 return old
141 return 1; /* Return previous value. */ 127 end
142} 128 end
129 end
130*/
131
132LJLIB_LUA(table_move) /*
133 function(a1, f, e, t, a2)
134 CHECK_tab(a1)
135 CHECK_int(f)
136 CHECK_int(e)
137 CHECK_int(t)
138 if a2 == nil then a2 = a1 end
139 CHECK_tab(a2)
140 if e >= f then
141 local d = t - f
142 if t > e or t <= f or a2 ~= a1 then
143 for i=f,e do a2[i+d] = a1[i] end
144 else
145 for i=e,f,-1 do a2[i+d] = a1[i] end
146 end
147 end
148 return a2
149 end
150*/
143 151
144LJLIB_CF(table_concat) 152LJLIB_CF(table_concat) LJLIB_REC(.)
145{ 153{
146 luaL_Buffer b;
147 GCtab *t = lj_lib_checktab(L, 1); 154 GCtab *t = lj_lib_checktab(L, 1);
148 GCstr *sep = lj_lib_optstr(L, 2); 155 GCstr *sep = lj_lib_optstr(L, 2);
149 MSize seplen = sep ? sep->len : 0;
150 int32_t i = lj_lib_optint(L, 3, 1); 156 int32_t i = lj_lib_optint(L, 3, 1);
151 int32_t e = (L->base+3 < L->top && !tvisnil(L->base+3)) ? 157 int32_t e = (L->base+3 < L->top && !tvisnil(L->base+3)) ?
152 lj_lib_checkint(L, 4) : (int32_t)lj_tab_len(t); 158 lj_lib_checkint(L, 4) : (int32_t)lj_tab_len(t);
153 luaL_buffinit(L, &b); 159 SBuf *sb = lj_buf_tmp_(L);
154 if (i <= e) { 160 SBuf *sbx = lj_buf_puttab(sb, t, sep, i, e);
155 for (;;) { 161 if (LJ_UNLIKELY(!sbx)) { /* Error: bad element type. */
156 cTValue *o; 162 int32_t idx = (int32_t)(intptr_t)sbufP(sb);
157 lua_rawgeti(L, 1, i); 163 cTValue *o = lj_tab_getint(t, idx);
158 o = L->top-1; 164 lj_err_callerv(L, LJ_ERR_TABCAT,
159 if (!(tvisstr(o) || tvisnumber(o))) 165 lj_obj_itypename[o ? itypemap(o) : ~LJ_TNIL], idx);
160 lj_err_callerv(L, LJ_ERR_TABCAT, lj_typename(o), i);
161 luaL_addvalue(&b);
162 if (i++ == e) break;
163 if (seplen)
164 luaL_addlstring(&b, strdata(sep), seplen);
165 }
166 } 166 }
167 luaL_pushresult(&b); 167 setstrV(L, L->top-1, lj_buf_str(L, sbx));
168 lj_gc_check(L);
168 return 1; 169 return 1;
169} 170}
170 171
@@ -284,6 +285,30 @@ LJLIB_CF(table_pack)
284} 285}
285#endif 286#endif
286 287
288LJLIB_NOREG LJLIB_CF(table_new) LJLIB_REC(.)
289{
290 int32_t a = lj_lib_checkint(L, 1);
291 int32_t h = lj_lib_checkint(L, 2);
292 lua_createtable(L, a, h);
293 return 1;
294}
295
296LJLIB_NOREG LJLIB_CF(table_clear) LJLIB_REC(.)
297{
298 lj_tab_clear(lj_lib_checktab(L, 1));
299 return 0;
300}
301
302static int luaopen_table_new(lua_State *L)
303{
304 return lj_lib_postreg(L, lj_cf_table_new, FF_table_new, "new");
305}
306
307static int luaopen_table_clear(lua_State *L)
308{
309 return lj_lib_postreg(L, lj_cf_table_clear, FF_table_clear, "clear");
310}
311
287/* ------------------------------------------------------------------------ */ 312/* ------------------------------------------------------------------------ */
288 313
289#include "lj_libdef.h" 314#include "lj_libdef.h"
@@ -295,6 +320,8 @@ LUALIB_API int luaopen_table(lua_State *L)
295 lua_getglobal(L, "unpack"); 320 lua_getglobal(L, "unpack");
296 lua_setfield(L, -2, "unpack"); 321 lua_setfield(L, -2, "unpack");
297#endif 322#endif
323 lj_lib_prereg(L, LUA_TABLIBNAME ".new", luaopen_table_new, tabV(L->top-1));
324 lj_lib_prereg(L, LUA_TABLIBNAME ".clear", luaopen_table_clear, tabV(L->top-1));
298 return 1; 325 return 1;
299} 326}
300 327
diff --git a/src/lj_alloc.c b/src/lj_alloc.c
index dc64dca9..33a2eb8f 100644
--- a/src/lj_alloc.c
+++ b/src/lj_alloc.c
@@ -72,13 +72,56 @@
72 72
73#define IS_DIRECT_BIT (SIZE_T_ONE) 73#define IS_DIRECT_BIT (SIZE_T_ONE)
74 74
75
76/* Determine system-specific block allocation method. */
75#if LJ_TARGET_WINDOWS 77#if LJ_TARGET_WINDOWS
76 78
77#define WIN32_LEAN_AND_MEAN 79#define WIN32_LEAN_AND_MEAN
78#include <windows.h> 80#include <windows.h>
79 81
82#define LJ_ALLOC_VIRTUALALLOC 1
83
84#if LJ_64 && !LJ_GC64
85#define LJ_ALLOC_NTAVM 1
86#endif
87
88#else
89
90#include <errno.h>
91/* If this include fails, then rebuild with: -DLUAJIT_USE_SYSMALLOC */
92#include <sys/mman.h>
93
94#define LJ_ALLOC_MMAP 1
95
80#if LJ_64 96#if LJ_64
81 97
98#define LJ_ALLOC_MMAP_PROBE 1
99
100#if LJ_GC64
101#define LJ_ALLOC_MBITS 47 /* 128 TB in LJ_GC64 mode. */
102#elif LJ_TARGET_X64 && LJ_HASJIT
103/* Due to limitations in the x64 compiler backend. */
104#define LJ_ALLOC_MBITS 31 /* 2 GB on x64 with !LJ_GC64. */
105#else
106#define LJ_ALLOC_MBITS 32 /* 4 GB on other archs with !LJ_GC64. */
107#endif
108
109#endif
110
111#if LJ_64 && !LJ_GC64 && defined(MAP_32BIT)
112#define LJ_ALLOC_MMAP32 1
113#endif
114
115#if LJ_TARGET_LINUX
116#define LJ_ALLOC_MREMAP 1
117#endif
118
119#endif
120
121
122#if LJ_ALLOC_VIRTUALALLOC
123
124#if LJ_ALLOC_NTAVM
82/* Undocumented, but hey, that's what we all love so much about Windows. */ 125/* Undocumented, but hey, that's what we all love so much about Windows. */
83typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG zbits, 126typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG zbits,
84 size_t *size, ULONG alloctype, ULONG prot); 127 size_t *size, ULONG alloctype, ULONG prot);
@@ -89,14 +132,15 @@ static PNTAVM ntavm;
89*/ 132*/
90#define NTAVM_ZEROBITS 1 133#define NTAVM_ZEROBITS 1
91 134
92static void INIT_MMAP(void) 135static void init_mmap(void)
93{ 136{
94 ntavm = (PNTAVM)GetProcAddress(GetModuleHandleA("ntdll.dll"), 137 ntavm = (PNTAVM)GetProcAddress(GetModuleHandleA("ntdll.dll"),
95 "NtAllocateVirtualMemory"); 138 "NtAllocateVirtualMemory");
96} 139}
140#define INIT_MMAP() init_mmap()
97 141
98/* Win64 32 bit MMAP via NtAllocateVirtualMemory. */ 142/* Win64 32 bit MMAP via NtAllocateVirtualMemory. */
99static LJ_AINLINE void *CALL_MMAP(size_t size) 143static void *CALL_MMAP(size_t size)
100{ 144{
101 DWORD olderr = GetLastError(); 145 DWORD olderr = GetLastError();
102 void *ptr = NULL; 146 void *ptr = NULL;
@@ -107,7 +151,7 @@ static LJ_AINLINE void *CALL_MMAP(size_t size)
107} 151}
108 152
109/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ 153/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
110static LJ_AINLINE void *DIRECT_MMAP(size_t size) 154static void *DIRECT_MMAP(size_t size)
111{ 155{
112 DWORD olderr = GetLastError(); 156 DWORD olderr = GetLastError();
113 void *ptr = NULL; 157 void *ptr = NULL;
@@ -119,23 +163,21 @@ static LJ_AINLINE void *DIRECT_MMAP(size_t size)
119 163
120#else 164#else
121 165
122#define INIT_MMAP() ((void)0)
123
124/* Win32 MMAP via VirtualAlloc */ 166/* Win32 MMAP via VirtualAlloc */
125static LJ_AINLINE void *CALL_MMAP(size_t size) 167static void *CALL_MMAP(size_t size)
126{ 168{
127 DWORD olderr = GetLastError(); 169 DWORD olderr = GetLastError();
128 void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); 170 void *ptr = LJ_WIN_VALLOC(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
129 SetLastError(olderr); 171 SetLastError(olderr);
130 return ptr ? ptr : MFAIL; 172 return ptr ? ptr : MFAIL;
131} 173}
132 174
133/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ 175/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
134static LJ_AINLINE void *DIRECT_MMAP(size_t size) 176static void *DIRECT_MMAP(size_t size)
135{ 177{
136 DWORD olderr = GetLastError(); 178 DWORD olderr = GetLastError();
137 void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, 179 void *ptr = LJ_WIN_VALLOC(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN,
138 PAGE_READWRITE); 180 PAGE_READWRITE);
139 SetLastError(olderr); 181 SetLastError(olderr);
140 return ptr ? ptr : MFAIL; 182 return ptr ? ptr : MFAIL;
141} 183}
@@ -143,7 +185,7 @@ static LJ_AINLINE void *DIRECT_MMAP(size_t size)
143#endif 185#endif
144 186
145/* This function supports releasing coalesed segments */ 187/* This function supports releasing coalesed segments */
146static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size) 188static int CALL_MUNMAP(void *ptr, size_t size)
147{ 189{
148 DWORD olderr = GetLastError(); 190 DWORD olderr = GetLastError();
149 MEMORY_BASIC_INFORMATION minfo; 191 MEMORY_BASIC_INFORMATION minfo;
@@ -163,10 +205,7 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
163 return 0; 205 return 0;
164} 206}
165 207
166#else 208#elif LJ_ALLOC_MMAP
167
168#include <errno.h>
169#include <sys/mman.h>
170 209
171#define MMAP_PROT (PROT_READ|PROT_WRITE) 210#define MMAP_PROT (PROT_READ|PROT_WRITE)
172#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) 211#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
@@ -174,105 +213,152 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
174#endif 213#endif
175#define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS) 214#define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS)
176 215
177#if LJ_64 216#if LJ_ALLOC_MMAP_PROBE
178/* 64 bit mode needs special support for allocating memory in the lower 2GB. */
179
180#if defined(MAP_32BIT)
181 217
182#if defined(__sun__) 218#ifdef MAP_TRYFIXED
183#define MMAP_REGION_START ((uintptr_t)0x1000) 219#define MMAP_FLAGS_PROBE (MMAP_FLAGS|MAP_TRYFIXED)
184#else 220#else
185/* Actually this only gives us max. 1GB in current Linux kernels. */ 221#define MMAP_FLAGS_PROBE MMAP_FLAGS
186#define MMAP_REGION_START ((uintptr_t)0)
187#endif 222#endif
188 223
189static LJ_AINLINE void *CALL_MMAP(size_t size) 224#define LJ_ALLOC_MMAP_PROBE_MAX 30
190{ 225#define LJ_ALLOC_MMAP_PROBE_LINEAR 5
191 int olderr = errno;
192 void *ptr = mmap((void *)MMAP_REGION_START, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0);
193 errno = olderr;
194 return ptr;
195}
196 226
197#elif LJ_TARGET_OSX || LJ_TARGET_PS4 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__sun__) || LJ_TARGET_CYGWIN 227#define LJ_ALLOC_MMAP_PROBE_LOWER ((uintptr_t)0x4000)
198 228
199/* OSX and FreeBSD mmap() use a naive first-fit linear search. 229/* No point in a giant ifdef mess. Just try to open /dev/urandom.
200** That's perfect for us. Except that -pagezero_size must be set for OSX, 230** It doesn't really matter if this fails, since we get some ASLR bits from
201** otherwise the lower 4GB are blocked. And the 32GB RLIMIT_DATA needs 231** every unsuitable allocation, too. And we prefer linear allocation, anyway.
202** to be reduced to 250MB on FreeBSD.
203*/ 232*/
204#if LJ_TARGET_OSX || defined(__DragonFly__) 233#include <fcntl.h>
205#define MMAP_REGION_START ((uintptr_t)0x10000) 234#include <unistd.h>
206#elif LJ_TARGET_PS4
207#define MMAP_REGION_START ((uintptr_t)0x4000)
208#else
209#define MMAP_REGION_START ((uintptr_t)0x10000000)
210#endif
211#define MMAP_REGION_END ((uintptr_t)0x80000000)
212 235
213#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4 236static uintptr_t mmap_probe_seed(void)
214#include <sys/resource.h> 237{
215#endif 238 uintptr_t val;
239 int fd = open("/dev/urandom", O_RDONLY);
240 if (fd != -1) {
241 int ok = ((size_t)read(fd, &val, sizeof(val)) == sizeof(val));
242 (void)close(fd);
243 if (ok) return val;
244 }
245 return 1; /* Punt. */
246}
216 247
217static LJ_AINLINE void *CALL_MMAP(size_t size) 248static void *mmap_probe(size_t size)
218{ 249{
219 int olderr = errno;
220 /* Hint for next allocation. Doesn't need to be thread-safe. */ 250 /* Hint for next allocation. Doesn't need to be thread-safe. */
221 static uintptr_t alloc_hint = MMAP_REGION_START; 251 static uintptr_t hint_addr = 0;
222 int retry = 0; 252 static uintptr_t hint_prng = 0;
223#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4 253 int olderr = errno;
224 static int rlimit_modified = 0; 254 int retry;
225 if (LJ_UNLIKELY(rlimit_modified == 0)) { 255 for (retry = 0; retry < LJ_ALLOC_MMAP_PROBE_MAX; retry++) {
226 struct rlimit rlim; 256 void *p = mmap((void *)hint_addr, size, MMAP_PROT, MMAP_FLAGS_PROBE, -1, 0);
227 rlim.rlim_cur = rlim.rlim_max = MMAP_REGION_START; 257 uintptr_t addr = (uintptr_t)p;
228 setrlimit(RLIMIT_DATA, &rlim); /* Ignore result. May fail below. */ 258 if ((addr >> LJ_ALLOC_MBITS) == 0 && addr >= LJ_ALLOC_MMAP_PROBE_LOWER &&
229 rlimit_modified = 1; 259 ((addr + size) >> LJ_ALLOC_MBITS) == 0) {
230 } 260 /* We got a suitable address. Bump the hint address. */
231#endif 261 hint_addr = addr + size;
232 for (;;) {
233 void *p = mmap((void *)alloc_hint, size, MMAP_PROT, MMAP_FLAGS, -1, 0);
234 if ((uintptr_t)p >= MMAP_REGION_START &&
235 (uintptr_t)p + size < MMAP_REGION_END) {
236 alloc_hint = (uintptr_t)p + size;
237 errno = olderr; 262 errno = olderr;
238 return p; 263 return p;
239 } 264 }
240 if (p != CMFAIL) munmap(p, size); 265 if (p != MFAIL) {
241#if defined(__sun__) || defined(__DragonFly__) 266 munmap(p, size);
242 alloc_hint += 0x1000000; /* Need near-exhaustive linear scan. */ 267 } else if (errno == ENOMEM) {
243 if (alloc_hint + size < MMAP_REGION_END) continue; 268 return MFAIL;
244#endif 269 }
245 if (retry) break; 270 if (hint_addr) {
246 retry = 1; 271 /* First, try linear probing. */
247 alloc_hint = MMAP_REGION_START; 272 if (retry < LJ_ALLOC_MMAP_PROBE_LINEAR) {
273 hint_addr += 0x1000000;
274 if (((hint_addr + size) >> LJ_ALLOC_MBITS) != 0)
275 hint_addr = 0;
276 continue;
277 } else if (retry == LJ_ALLOC_MMAP_PROBE_LINEAR) {
278 /* Next, try a no-hint probe to get back an ASLR address. */
279 hint_addr = 0;
280 continue;
281 }
282 }
283 /* Finally, try pseudo-random probing. */
284 if (LJ_UNLIKELY(hint_prng == 0)) {
285 hint_prng = mmap_probe_seed();
286 }
287 /* The unsuitable address we got has some ASLR PRNG bits. */
288 hint_addr ^= addr & ~((uintptr_t)(LJ_PAGESIZE-1));
289 do { /* The PRNG itself is very weak, but see above. */
290 hint_prng = hint_prng * 1103515245 + 12345;
291 hint_addr ^= hint_prng * (uintptr_t)LJ_PAGESIZE;
292 hint_addr &= (((uintptr_t)1 << LJ_ALLOC_MBITS)-1);
293 } while (hint_addr < LJ_ALLOC_MMAP_PROBE_LOWER);
248 } 294 }
249 errno = olderr; 295 errno = olderr;
250 return CMFAIL; 296 return MFAIL;
251} 297}
252 298
299#endif
300
301#if LJ_ALLOC_MMAP32
302
303#if defined(__sun__)
304#define LJ_ALLOC_MMAP32_START ((uintptr_t)0x1000)
253#else 305#else
306#define LJ_ALLOC_MMAP32_START ((uintptr_t)0)
307#endif
254 308
255#error "NYI: need an equivalent of MAP_32BIT for this 64 bit OS" 309static void *mmap_map32(size_t size)
310{
311#if LJ_ALLOC_MMAP_PROBE
312 static int fallback = 0;
313 if (fallback)
314 return mmap_probe(size);
315#endif
316 {
317 int olderr = errno;
318 void *ptr = mmap((void *)LJ_ALLOC_MMAP32_START, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0);
319 errno = olderr;
320 /* This only allows 1GB on Linux. So fallback to probing to get 2GB. */
321#if LJ_ALLOC_MMAP_PROBE
322 if (ptr == MFAIL) {
323 fallback = 1;
324 return mmap_probe(size);
325 }
326#endif
327 return ptr;
328 }
329}
256 330
257#endif 331#endif
258 332
333#if LJ_ALLOC_MMAP32
334#define CALL_MMAP(size) mmap_map32(size)
335#elif LJ_ALLOC_MMAP_PROBE
336#define CALL_MMAP(size) mmap_probe(size)
259#else 337#else
260 338static void *CALL_MMAP(size_t size)
261/* 32 bit mode is easy. */
262static LJ_AINLINE void *CALL_MMAP(size_t size)
263{ 339{
264 int olderr = errno; 340 int olderr = errno;
265 void *ptr = mmap(NULL, size, MMAP_PROT, MMAP_FLAGS, -1, 0); 341 void *ptr = mmap(NULL, size, MMAP_PROT, MMAP_FLAGS, -1, 0);
266 errno = olderr; 342 errno = olderr;
267 return ptr; 343 return ptr;
268} 344}
269
270#endif 345#endif
271 346
272#define INIT_MMAP() ((void)0) 347#if LJ_64 && !LJ_GC64 && ((defined(__FreeBSD__) && __FreeBSD__ < 10) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
273#define DIRECT_MMAP(s) CALL_MMAP(s) 348
349#include <sys/resource.h>
350
351static void init_mmap(void)
352{
353 struct rlimit rlim;
354 rlim.rlim_cur = rlim.rlim_max = 0x10000;
355 setrlimit(RLIMIT_DATA, &rlim); /* Ignore result. May fail later. */
356}
357#define INIT_MMAP() init_mmap()
274 358
275static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size) 359#endif
360
361static int CALL_MUNMAP(void *ptr, size_t size)
276{ 362{
277 int olderr = errno; 363 int olderr = errno;
278 int ret = munmap(ptr, size); 364 int ret = munmap(ptr, size);
@@ -280,10 +366,9 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
280 return ret; 366 return ret;
281} 367}
282 368
283#if LJ_TARGET_LINUX 369#if LJ_ALLOC_MREMAP
284/* Need to define _GNU_SOURCE to get the mremap prototype. */ 370/* Need to define _GNU_SOURCE to get the mremap prototype. */
285static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, 371static void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, int flags)
286 int flags)
287{ 372{
288 int olderr = errno; 373 int olderr = errno;
289 ptr = mremap(ptr, osz, nsz, flags); 374 ptr = mremap(ptr, osz, nsz, flags);
@@ -294,7 +379,7 @@ static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz,
294#define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv)) 379#define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv))
295#define CALL_MREMAP_NOMOVE 0 380#define CALL_MREMAP_NOMOVE 0
296#define CALL_MREMAP_MAYMOVE 1 381#define CALL_MREMAP_MAYMOVE 1
297#if LJ_64 382#if LJ_64 && !LJ_GC64
298#define CALL_MREMAP_MV CALL_MREMAP_NOMOVE 383#define CALL_MREMAP_MV CALL_MREMAP_NOMOVE
299#else 384#else
300#define CALL_MREMAP_MV CALL_MREMAP_MAYMOVE 385#define CALL_MREMAP_MV CALL_MREMAP_MAYMOVE
@@ -303,6 +388,15 @@ static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz,
303 388
304#endif 389#endif
305 390
391
392#ifndef INIT_MMAP
393#define INIT_MMAP() ((void)0)
394#endif
395
396#ifndef DIRECT_MMAP
397#define DIRECT_MMAP(s) CALL_MMAP(s)
398#endif
399
306#ifndef CALL_MREMAP 400#ifndef CALL_MREMAP
307#define CALL_MREMAP(addr, osz, nsz, mv) ((void)osz, MFAIL) 401#define CALL_MREMAP(addr, osz, nsz, mv) ((void)osz, MFAIL)
308#endif 402#endif
diff --git a/src/lj_api.c b/src/lj_api.c
index e2d7e533..974b5643 100644
--- a/src/lj_api.c
+++ b/src/lj_api.c
@@ -24,6 +24,7 @@
24#include "lj_trace.h" 24#include "lj_trace.h"
25#include "lj_vm.h" 25#include "lj_vm.h"
26#include "lj_strscan.h" 26#include "lj_strscan.h"
27#include "lj_strfmt.h"
27 28
28/* -- Common helper functions --------------------------------------------- */ 29/* -- Common helper functions --------------------------------------------- */
29 30
@@ -111,6 +112,13 @@ LUA_API void lua_xmove(lua_State *from, lua_State *to, int n)
111 from->top = f; 112 from->top = f;
112} 113}
113 114
115LUA_API const lua_Number *lua_version(lua_State *L)
116{
117 static const lua_Number version = LUA_VERSION_NUM;
118 UNUSED(L);
119 return &version;
120}
121
114/* -- Stack manipulation -------------------------------------------------- */ 122/* -- Stack manipulation -------------------------------------------------- */
115 123
116LUA_API int lua_gettop(lua_State *L) 124LUA_API int lua_gettop(lua_State *L)
@@ -151,30 +159,40 @@ LUA_API void lua_insert(lua_State *L, int idx)
151 copyTV(L, p, L->top); 159 copyTV(L, p, L->top);
152} 160}
153 161
154LUA_API void lua_replace(lua_State *L, int idx) 162static void copy_slot(lua_State *L, TValue *f, int idx)
155{ 163{
156 api_checknelems(L, 1);
157 if (idx == LUA_GLOBALSINDEX) { 164 if (idx == LUA_GLOBALSINDEX) {
158 api_check(L, tvistab(L->top-1)); 165 api_check(L, tvistab(f));
159 /* NOBARRIER: A thread (i.e. L) is never black. */ 166 /* NOBARRIER: A thread (i.e. L) is never black. */
160 setgcref(L->env, obj2gco(tabV(L->top-1))); 167 setgcref(L->env, obj2gco(tabV(f)));
161 } else if (idx == LUA_ENVIRONINDEX) { 168 } else if (idx == LUA_ENVIRONINDEX) {
162 GCfunc *fn = curr_func(L); 169 GCfunc *fn = curr_func(L);
163 if (fn->c.gct != ~LJ_TFUNC) 170 if (fn->c.gct != ~LJ_TFUNC)
164 lj_err_msg(L, LJ_ERR_NOENV); 171 lj_err_msg(L, LJ_ERR_NOENV);
165 api_check(L, tvistab(L->top-1)); 172 api_check(L, tvistab(f));
166 setgcref(fn->c.env, obj2gco(tabV(L->top-1))); 173 setgcref(fn->c.env, obj2gco(tabV(f)));
167 lj_gc_barrier(L, fn, L->top-1); 174 lj_gc_barrier(L, fn, f);
168 } else { 175 } else {
169 TValue *o = index2adr(L, idx); 176 TValue *o = index2adr(L, idx);
170 api_checkvalidindex(L, o); 177 api_checkvalidindex(L, o);
171 copyTV(L, o, L->top-1); 178 copyTV(L, o, f);
172 if (idx < LUA_GLOBALSINDEX) /* Need a barrier for upvalues. */ 179 if (idx < LUA_GLOBALSINDEX) /* Need a barrier for upvalues. */
173 lj_gc_barrier(L, curr_func(L), L->top-1); 180 lj_gc_barrier(L, curr_func(L), f);
174 } 181 }
182}
183
184LUA_API void lua_replace(lua_State *L, int idx)
185{
186 api_checknelems(L, 1);
187 copy_slot(L, L->top - 1, idx);
175 L->top--; 188 L->top--;
176} 189}
177 190
191LUA_API void lua_copy(lua_State *L, int fromidx, int toidx)
192{
193 copy_slot(L, index2adr(L, fromidx), toidx);
194}
195
178LUA_API void lua_pushvalue(lua_State *L, int idx) 196LUA_API void lua_pushvalue(lua_State *L, int idx)
179{ 197{
180 copyTV(L, L->top, index2adr(L, idx)); 198 copyTV(L, L->top, index2adr(L, idx));
@@ -188,7 +206,7 @@ LUA_API int lua_type(lua_State *L, int idx)
188 cTValue *o = index2adr(L, idx); 206 cTValue *o = index2adr(L, idx);
189 if (tvisnumber(o)) { 207 if (tvisnumber(o)) {
190 return LUA_TNUMBER; 208 return LUA_TNUMBER;
191#if LJ_64 209#if LJ_64 && !LJ_GC64
192 } else if (tvislightud(o)) { 210 } else if (tvislightud(o)) {
193 return LUA_TLIGHTUSERDATA; 211 return LUA_TLIGHTUSERDATA;
194#endif 212#endif
@@ -268,7 +286,7 @@ LUA_API int lua_equal(lua_State *L, int idx1, int idx2)
268 return 0; 286 return 0;
269 } else if (tvispri(o1)) { 287 } else if (tvispri(o1)) {
270 return o1 != niltv(L) && o2 != niltv(L); 288 return o1 != niltv(L) && o2 != niltv(L);
271#if LJ_64 289#if LJ_64 && !LJ_GC64
272 } else if (tvislightud(o1)) { 290 } else if (tvislightud(o1)) {
273 return o1->u64 == o2->u64; 291 return o1->u64 == o2->u64;
274#endif 292#endif
@@ -283,8 +301,8 @@ LUA_API int lua_equal(lua_State *L, int idx1, int idx2)
283 } else { 301 } else {
284 L->top = base+2; 302 L->top = base+2;
285 lj_vm_call(L, base, 1+1); 303 lj_vm_call(L, base, 1+1);
286 L->top -= 2; 304 L->top -= 2+LJ_FR2;
287 return tvistruecond(L->top+1); 305 return tvistruecond(L->top+1+LJ_FR2);
288 } 306 }
289 } 307 }
290} 308}
@@ -306,8 +324,8 @@ LUA_API int lua_lessthan(lua_State *L, int idx1, int idx2)
306 } else { 324 } else {
307 L->top = base+2; 325 L->top = base+2;
308 lj_vm_call(L, base, 1+1); 326 lj_vm_call(L, base, 1+1);
309 L->top -= 2; 327 L->top -= 2+LJ_FR2;
310 return tvistruecond(L->top+1); 328 return tvistruecond(L->top+1+LJ_FR2);
311 } 329 }
312 } 330 }
313} 331}
@@ -324,6 +342,22 @@ LUA_API lua_Number lua_tonumber(lua_State *L, int idx)
324 return 0; 342 return 0;
325} 343}
326 344
345LUA_API lua_Number lua_tonumberx(lua_State *L, int idx, int *ok)
346{
347 cTValue *o = index2adr(L, idx);
348 TValue tmp;
349 if (LJ_LIKELY(tvisnumber(o))) {
350 if (ok) *ok = 1;
351 return numberVnum(o);
352 } else if (tvisstr(o) && lj_strscan_num(strV(o), &tmp)) {
353 if (ok) *ok = 1;
354 return numV(&tmp);
355 } else {
356 if (ok) *ok = 0;
357 return 0;
358 }
359}
360
327LUALIB_API lua_Number luaL_checknumber(lua_State *L, int idx) 361LUALIB_API lua_Number luaL_checknumber(lua_State *L, int idx)
328{ 362{
329 cTValue *o = index2adr(L, idx); 363 cTValue *o = index2adr(L, idx);
@@ -361,9 +395,38 @@ LUA_API lua_Integer lua_tointeger(lua_State *L, int idx)
361 if (!(tvisstr(o) && lj_strscan_number(strV(o), &tmp))) 395 if (!(tvisstr(o) && lj_strscan_number(strV(o), &tmp)))
362 return 0; 396 return 0;
363 if (tvisint(&tmp)) 397 if (tvisint(&tmp))
364 return (lua_Integer)intV(&tmp); 398 return intV(&tmp);
399 n = numV(&tmp);
400 }
401#if LJ_64
402 return (lua_Integer)n;
403#else
404 return lj_num2int(n);
405#endif
406}
407
408LUA_API lua_Integer lua_tointegerx(lua_State *L, int idx, int *ok)
409{
410 cTValue *o = index2adr(L, idx);
411 TValue tmp;
412 lua_Number n;
413 if (LJ_LIKELY(tvisint(o))) {
414 if (ok) *ok = 1;
415 return intV(o);
416 } else if (LJ_LIKELY(tvisnum(o))) {
417 n = numV(o);
418 } else {
419 if (!(tvisstr(o) && lj_strscan_number(strV(o), &tmp))) {
420 if (ok) *ok = 0;
421 return 0;
422 }
423 if (tvisint(&tmp)) {
424 if (ok) *ok = 1;
425 return intV(&tmp);
426 }
365 n = numV(&tmp); 427 n = numV(&tmp);
366 } 428 }
429 if (ok) *ok = 1;
367#if LJ_64 430#if LJ_64
368 return (lua_Integer)n; 431 return (lua_Integer)n;
369#else 432#else
@@ -434,7 +497,7 @@ LUA_API const char *lua_tolstring(lua_State *L, int idx, size_t *len)
434 } else if (tvisnumber(o)) { 497 } else if (tvisnumber(o)) {
435 lj_gc_check(L); 498 lj_gc_check(L);
436 o = index2adr(L, idx); /* GC may move the stack. */ 499 o = index2adr(L, idx); /* GC may move the stack. */
437 s = lj_str_fromnumber(L, o); 500 s = lj_strfmt_number(L, o);
438 setstrV(L, o, s); 501 setstrV(L, o, s);
439 } else { 502 } else {
440 if (len != NULL) *len = 0; 503 if (len != NULL) *len = 0;
@@ -453,7 +516,7 @@ LUALIB_API const char *luaL_checklstring(lua_State *L, int idx, size_t *len)
453 } else if (tvisnumber(o)) { 516 } else if (tvisnumber(o)) {
454 lj_gc_check(L); 517 lj_gc_check(L);
455 o = index2adr(L, idx); /* GC may move the stack. */ 518 o = index2adr(L, idx); /* GC may move the stack. */
456 s = lj_str_fromnumber(L, o); 519 s = lj_strfmt_number(L, o);
457 setstrV(L, o, s); 520 setstrV(L, o, s);
458 } else { 521 } else {
459 lj_err_argt(L, idx, LUA_TSTRING); 522 lj_err_argt(L, idx, LUA_TSTRING);
@@ -475,7 +538,7 @@ LUALIB_API const char *luaL_optlstring(lua_State *L, int idx,
475 } else if (tvisnumber(o)) { 538 } else if (tvisnumber(o)) {
476 lj_gc_check(L); 539 lj_gc_check(L);
477 o = index2adr(L, idx); /* GC may move the stack. */ 540 o = index2adr(L, idx); /* GC may move the stack. */
478 s = lj_str_fromnumber(L, o); 541 s = lj_strfmt_number(L, o);
479 setstrV(L, o, s); 542 setstrV(L, o, s);
480 } else { 543 } else {
481 lj_err_argt(L, idx, LUA_TSTRING); 544 lj_err_argt(L, idx, LUA_TSTRING);
@@ -507,7 +570,7 @@ LUA_API size_t lua_objlen(lua_State *L, int idx)
507 } else if (tvisudata(o)) { 570 } else if (tvisudata(o)) {
508 return udataV(o)->len; 571 return udataV(o)->len;
509 } else if (tvisnumber(o)) { 572 } else if (tvisnumber(o)) {
510 GCstr *s = lj_str_fromnumber(L, o); 573 GCstr *s = lj_strfmt_number(L, o);
511 setstrV(L, o, s); 574 setstrV(L, o, s);
512 return s->len; 575 return s->len;
513 } else { 576 } else {
@@ -545,17 +608,7 @@ LUA_API lua_State *lua_tothread(lua_State *L, int idx)
545 608
546LUA_API const void *lua_topointer(lua_State *L, int idx) 609LUA_API const void *lua_topointer(lua_State *L, int idx)
547{ 610{
548 cTValue *o = index2adr(L, idx); 611 return lj_obj_ptr(index2adr(L, idx));
549 if (tvisudata(o))
550 return uddata(udataV(o));
551 else if (tvislightud(o))
552 return lightudV(o);
553 else if (tviscdata(o))
554 return cdataptr(cdataV(o));
555 else if (tvisgcv(o))
556 return gcV(o);
557 else
558 return NULL;
559} 612}
560 613
561/* -- Stack setters (object creation) ------------------------------------- */ 614/* -- Stack setters (object creation) ------------------------------------- */
@@ -606,7 +659,7 @@ LUA_API const char *lua_pushvfstring(lua_State *L, const char *fmt,
606 va_list argp) 659 va_list argp)
607{ 660{
608 lj_gc_check(L); 661 lj_gc_check(L);
609 return lj_str_pushvf(L, fmt, argp); 662 return lj_strfmt_pushvf(L, fmt, argp);
610} 663}
611 664
612LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...) 665LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...)
@@ -615,7 +668,7 @@ LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...)
615 va_list argp; 668 va_list argp;
616 lj_gc_check(L); 669 lj_gc_check(L);
617 va_start(argp, fmt); 670 va_start(argp, fmt);
618 ret = lj_str_pushvf(L, fmt, argp); 671 ret = lj_strfmt_pushvf(L, fmt, argp);
619 va_end(argp); 672 va_end(argp);
620 return ret; 673 return ret;
621} 674}
@@ -649,10 +702,8 @@ LUA_API void lua_pushlightuserdata(lua_State *L, void *p)
649 702
650LUA_API void lua_createtable(lua_State *L, int narray, int nrec) 703LUA_API void lua_createtable(lua_State *L, int narray, int nrec)
651{ 704{
652 GCtab *t;
653 lj_gc_check(L); 705 lj_gc_check(L);
654 t = lj_tab_new(L, (uint32_t)(narray > 0 ? narray+1 : 0), hsize2hbits(nrec)); 706 settabV(L, L->top, lj_tab_new_ah(L, narray, nrec));
655 settabV(L, L->top, t);
656 incr_top(L); 707 incr_top(L);
657} 708}
658 709
@@ -715,8 +766,8 @@ LUA_API void lua_concat(lua_State *L, int n)
715 n -= (int)(L->top - top); 766 n -= (int)(L->top - top);
716 L->top = top+2; 767 L->top = top+2;
717 lj_vm_call(L, top, 1+1); 768 lj_vm_call(L, top, 1+1);
718 L->top--; 769 L->top -= 1+LJ_FR2;
719 copyTV(L, L->top-1, L->top); 770 copyTV(L, L->top-1, L->top+LJ_FR2);
720 } while (--n > 0); 771 } while (--n > 0);
721 } else if (n == 0) { /* Push empty string. */ 772 } else if (n == 0) { /* Push empty string. */
722 setstrV(L, L->top, &G(L)->strempty); 773 setstrV(L, L->top, &G(L)->strempty);
@@ -735,8 +786,8 @@ LUA_API void lua_gettable(lua_State *L, int idx)
735 if (v == NULL) { 786 if (v == NULL) {
736 L->top += 2; 787 L->top += 2;
737 lj_vm_call(L, L->top-2, 1+1); 788 lj_vm_call(L, L->top-2, 1+1);
738 L->top -= 2; 789 L->top -= 2+LJ_FR2;
739 v = L->top+1; 790 v = L->top+1+LJ_FR2;
740 } 791 }
741 copyTV(L, L->top-1, v); 792 copyTV(L, L->top-1, v);
742} 793}
@@ -751,8 +802,8 @@ LUA_API void lua_getfield(lua_State *L, int idx, const char *k)
751 if (v == NULL) { 802 if (v == NULL) {
752 L->top += 2; 803 L->top += 2;
753 lj_vm_call(L, L->top-2, 1+1); 804 lj_vm_call(L, L->top-2, 1+1);
754 L->top -= 2; 805 L->top -= 2+LJ_FR2;
755 v = L->top+1; 806 v = L->top+1+LJ_FR2;
756 } 807 }
757 copyTV(L, L->top, v); 808 copyTV(L, L->top, v);
758 incr_top(L); 809 incr_top(L);
@@ -869,7 +920,7 @@ LUA_API void lua_upvaluejoin(lua_State *L, int idx1, int n1, int idx2, int n2)
869 lj_gc_objbarrier(L, fn1, gcref(fn1->l.uvptr[n1])); 920 lj_gc_objbarrier(L, fn1, gcref(fn1->l.uvptr[n1]));
870} 921}
871 922
872LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname) 923LUALIB_API void *luaL_testudata(lua_State *L, int idx, const char *tname)
873{ 924{
874 cTValue *o = index2adr(L, idx); 925 cTValue *o = index2adr(L, idx);
875 if (tvisudata(o)) { 926 if (tvisudata(o)) {
@@ -878,8 +929,14 @@ LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname)
878 if (tv && tvistab(tv) && tabV(tv) == tabref(ud->metatable)) 929 if (tv && tvistab(tv) && tabV(tv) == tabref(ud->metatable))
879 return uddata(ud); 930 return uddata(ud);
880 } 931 }
881 lj_err_argtype(L, idx, tname); 932 return NULL; /* value is not a userdata with a metatable */
882 return NULL; /* unreachable */ 933}
934
935LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname)
936{
937 void *p = luaL_testudata(L, idx, tname);
938 if (!p) lj_err_argtype(L, idx, tname);
939 return p;
883} 940}
884 941
885/* -- Object setters ------------------------------------------------------ */ 942/* -- Object setters ------------------------------------------------------ */
@@ -893,13 +950,14 @@ LUA_API void lua_settable(lua_State *L, int idx)
893 o = lj_meta_tset(L, t, L->top-2); 950 o = lj_meta_tset(L, t, L->top-2);
894 if (o) { 951 if (o) {
895 /* NOBARRIER: lj_meta_tset ensures the table is not black. */ 952 /* NOBARRIER: lj_meta_tset ensures the table is not black. */
896 copyTV(L, o, L->top-1);
897 L->top -= 2; 953 L->top -= 2;
954 copyTV(L, o, L->top+1);
898 } else { 955 } else {
899 L->top += 3; 956 TValue *base = L->top;
900 copyTV(L, L->top-1, L->top-6); 957 copyTV(L, base+2, base-3-2*LJ_FR2);
901 lj_vm_call(L, L->top-3, 0+1); 958 L->top = base+3;
902 L->top -= 3; 959 lj_vm_call(L, base, 0+1);
960 L->top -= 3+LJ_FR2;
903 } 961 }
904} 962}
905 963
@@ -913,14 +971,14 @@ LUA_API void lua_setfield(lua_State *L, int idx, const char *k)
913 setstrV(L, &key, lj_str_newz(L, k)); 971 setstrV(L, &key, lj_str_newz(L, k));
914 o = lj_meta_tset(L, t, &key); 972 o = lj_meta_tset(L, t, &key);
915 if (o) { 973 if (o) {
916 L->top--;
917 /* NOBARRIER: lj_meta_tset ensures the table is not black. */ 974 /* NOBARRIER: lj_meta_tset ensures the table is not black. */
918 copyTV(L, o, L->top); 975 copyTV(L, o, --L->top);
919 } else { 976 } else {
920 L->top += 3; 977 TValue *base = L->top;
921 copyTV(L, L->top-1, L->top-6); 978 copyTV(L, base+2, base-3-2*LJ_FR2);
922 lj_vm_call(L, L->top-3, 0+1); 979 L->top = base+3;
923 L->top -= 2; 980 lj_vm_call(L, base, 0+1);
981 L->top -= 2+LJ_FR2;
924 } 982 }
925} 983}
926 984
@@ -987,6 +1045,12 @@ LUA_API int lua_setmetatable(lua_State *L, int idx)
987 return 1; 1045 return 1;
988} 1046}
989 1047
1048LUALIB_API void luaL_setmetatable(lua_State *L, const char *tname)
1049{
1050 lua_getfield(L, LUA_REGISTRYINDEX, tname);
1051 lua_setmetatable(L, -2);
1052}
1053
990LUA_API int lua_setfenv(lua_State *L, int idx) 1054LUA_API int lua_setfenv(lua_State *L, int idx)
991{ 1055{
992 cTValue *o = index2adr(L, idx); 1056 cTValue *o = index2adr(L, idx);
@@ -1027,11 +1091,24 @@ LUA_API const char *lua_setupvalue(lua_State *L, int idx, int n)
1027 1091
1028/* -- Calls --------------------------------------------------------------- */ 1092/* -- Calls --------------------------------------------------------------- */
1029 1093
1094#if LJ_FR2
1095static TValue *api_call_base(lua_State *L, int nargs)
1096{
1097 TValue *o = L->top, *base = o - nargs;
1098 L->top = o+1;
1099 for (; o > base; o--) copyTV(L, o, o-1);
1100 setnilV(o);
1101 return o+1;
1102}
1103#else
1104#define api_call_base(L, nargs) (L->top - (nargs))
1105#endif
1106
1030LUA_API void lua_call(lua_State *L, int nargs, int nresults) 1107LUA_API void lua_call(lua_State *L, int nargs, int nresults)
1031{ 1108{
1032 api_check(L, L->status == 0 || L->status == LUA_ERRERR); 1109 api_check(L, L->status == LUA_OK || L->status == LUA_ERRERR);
1033 api_checknelems(L, nargs+1); 1110 api_checknelems(L, nargs+1);
1034 lj_vm_call(L, L->top - nargs, nresults+1); 1111 lj_vm_call(L, api_call_base(L, nargs), nresults+1);
1035} 1112}
1036 1113
1037LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc) 1114LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
@@ -1040,7 +1117,7 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
1040 uint8_t oldh = hook_save(g); 1117 uint8_t oldh = hook_save(g);
1041 ptrdiff_t ef; 1118 ptrdiff_t ef;
1042 int status; 1119 int status;
1043 api_check(L, L->status == 0 || L->status == LUA_ERRERR); 1120 api_check(L, L->status == LUA_OK || L->status == LUA_ERRERR);
1044 api_checknelems(L, nargs+1); 1121 api_checknelems(L, nargs+1);
1045 if (errfunc == 0) { 1122 if (errfunc == 0) {
1046 ef = 0; 1123 ef = 0;
@@ -1049,7 +1126,7 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
1049 api_checkvalidindex(L, o); 1126 api_checkvalidindex(L, o);
1050 ef = savestack(L, o); 1127 ef = savestack(L, o);
1051 } 1128 }
1052 status = lj_vm_pcall(L, L->top - nargs, nresults+1, ef); 1129 status = lj_vm_pcall(L, api_call_base(L, nargs), nresults+1, ef);
1053 if (status) hook_restore(g, oldh); 1130 if (status) hook_restore(g, oldh);
1054 return status; 1131 return status;
1055} 1132}
@@ -1057,12 +1134,14 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
1057static TValue *cpcall(lua_State *L, lua_CFunction func, void *ud) 1134static TValue *cpcall(lua_State *L, lua_CFunction func, void *ud)
1058{ 1135{
1059 GCfunc *fn = lj_func_newC(L, 0, getcurrenv(L)); 1136 GCfunc *fn = lj_func_newC(L, 0, getcurrenv(L));
1137 TValue *top = L->top;
1060 fn->c.f = func; 1138 fn->c.f = func;
1061 setfuncV(L, L->top, fn); 1139 setfuncV(L, top++, fn);
1062 setlightudV(L->top+1, checklightudptr(L, ud)); 1140 if (LJ_FR2) setnilV(top++);
1141 setlightudV(top++, checklightudptr(L, ud));
1063 cframe_nres(L->cframe) = 1+0; /* Zero results. */ 1142 cframe_nres(L->cframe) = 1+0; /* Zero results. */
1064 L->top += 2; 1143 L->top = top;
1065 return L->top-1; /* Now call the newly allocated C function. */ 1144 return top-1; /* Now call the newly allocated C function. */
1066} 1145}
1067 1146
1068LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud) 1147LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud)
@@ -1070,7 +1149,7 @@ LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud)
1070 global_State *g = G(L); 1149 global_State *g = G(L);
1071 uint8_t oldh = hook_save(g); 1150 uint8_t oldh = hook_save(g);
1072 int status; 1151 int status;
1073 api_check(L, L->status == 0 || L->status == LUA_ERRERR); 1152 api_check(L, L->status == LUA_OK || L->status == LUA_ERRERR);
1074 status = lj_vm_cpcall(L, func, ud, cpcall); 1153 status = lj_vm_cpcall(L, func, ud, cpcall);
1075 if (status) hook_restore(g, oldh); 1154 if (status) hook_restore(g, oldh);
1076 return status; 1155 return status;
@@ -1079,10 +1158,11 @@ LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud)
1079LUALIB_API int luaL_callmeta(lua_State *L, int idx, const char *field) 1158LUALIB_API int luaL_callmeta(lua_State *L, int idx, const char *field)
1080{ 1159{
1081 if (luaL_getmetafield(L, idx, field)) { 1160 if (luaL_getmetafield(L, idx, field)) {
1082 TValue *base = L->top--; 1161 TValue *top = L->top--;
1083 copyTV(L, base, index2adr(L, idx)); 1162 if (LJ_FR2) setnilV(top++);
1084 L->top = base+1; 1163 copyTV(L, top++, index2adr(L, idx));
1085 lj_vm_call(L, base, 1+1); 1164 L->top = top;
1165 lj_vm_call(L, top-1, 1+1);
1086 return 1; 1166 return 1;
1087 } 1167 }
1088 return 0; 1168 return 0;
@@ -1090,6 +1170,11 @@ LUALIB_API int luaL_callmeta(lua_State *L, int idx, const char *field)
1090 1170
1091/* -- Coroutine yield and resume ------------------------------------------ */ 1171/* -- Coroutine yield and resume ------------------------------------------ */
1092 1172
1173LUA_API int lua_isyieldable(lua_State *L)
1174{
1175 return cframe_canyield(L->cframe);
1176}
1177
1093LUA_API int lua_yield(lua_State *L, int nresults) 1178LUA_API int lua_yield(lua_State *L, int nresults)
1094{ 1179{
1095 void *cf = L->cframe; 1180 void *cf = L->cframe;
@@ -1109,12 +1194,14 @@ LUA_API int lua_yield(lua_State *L, int nresults)
1109 } else { /* Yield from hook: add a pseudo-frame. */ 1194 } else { /* Yield from hook: add a pseudo-frame. */
1110 TValue *top = L->top; 1195 TValue *top = L->top;
1111 hook_leave(g); 1196 hook_leave(g);
1112 top->u64 = cframe_multres(cf); 1197 (top++)->u64 = cframe_multres(cf);
1113 setcont(top+1, lj_cont_hook); 1198 setcont(top, lj_cont_hook);
1114 setframe_pc(top+1, cframe_pc(cf)-1); 1199 if (LJ_FR2) top++;
1115 setframe_gc(top+2, obj2gco(L)); 1200 setframe_pc(top, cframe_pc(cf)-1);
1116 setframe_ftsz(top+2, (int)((char *)(top+3)-(char *)L->base)+FRAME_CONT); 1201 if (LJ_FR2) top++;
1117 L->top = L->base = top+3; 1202 setframe_gc(top, obj2gco(L), LJ_TTHREAD);
1203 setframe_ftsz(top, ((char *)(top+1)-(char *)L->base)+FRAME_CONT);
1204 L->top = L->base = top+1;
1118#if LJ_TARGET_X64 1205#if LJ_TARGET_X64
1119 lj_err_throw(L, LUA_YIELD); 1206 lj_err_throw(L, LUA_YIELD);
1120#else 1207#else
@@ -1131,7 +1218,9 @@ LUA_API int lua_yield(lua_State *L, int nresults)
1131LUA_API int lua_resume(lua_State *L, int nargs) 1218LUA_API int lua_resume(lua_State *L, int nargs)
1132{ 1219{
1133 if (L->cframe == NULL && L->status <= LUA_YIELD) 1220 if (L->cframe == NULL && L->status <= LUA_YIELD)
1134 return lj_vm_resume(L, L->top - nargs, 0, 0); 1221 return lj_vm_resume(L,
1222 L->status == LUA_OK ? api_call_base(L, nargs) : L->top - nargs,
1223 0, 0);
1135 L->top = L->base; 1224 L->top = L->base;
1136 setstrV(L, L->top, lj_err_str(L, LJ_ERR_COSUSP)); 1225 setstrV(L, L->top, lj_err_str(L, LJ_ERR_COSUSP));
1137 incr_top(L); 1226 incr_top(L);
@@ -1161,7 +1250,7 @@ LUA_API int lua_gc(lua_State *L, int what, int data)
1161 res = (int)(g->gc.total & 0x3ff); 1250 res = (int)(g->gc.total & 0x3ff);
1162 break; 1251 break;
1163 case LUA_GCSTEP: { 1252 case LUA_GCSTEP: {
1164 MSize a = (MSize)data << 10; 1253 GCSize a = (GCSize)data << 10;
1165 g->gc.threshold = (a <= g->gc.total) ? (g->gc.total - a) : 0; 1254 g->gc.threshold = (a <= g->gc.total) ? (g->gc.total - a) : 0;
1166 while (g->gc.total >= g->gc.threshold) 1255 while (g->gc.total >= g->gc.threshold)
1167 if (lj_gc_step(L) > 0) { 1256 if (lj_gc_step(L) > 0) {
@@ -1178,6 +1267,9 @@ LUA_API int lua_gc(lua_State *L, int what, int data)
1178 res = (int)(g->gc.stepmul); 1267 res = (int)(g->gc.stepmul);
1179 g->gc.stepmul = (MSize)data; 1268 g->gc.stepmul = (MSize)data;
1180 break; 1269 break;
1270 case LUA_GCISRUNNING:
1271 res = (g->gc.threshold != LJ_MAX_MEM);
1272 break;
1181 default: 1273 default:
1182 res = -1; /* Invalid option. */ 1274 res = -1; /* Invalid option. */
1183 } 1275 }
diff --git a/src/lj_arch.h b/src/lj_arch.h
index 320ccf97..027b39ce 100644
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -19,12 +19,16 @@
19#define LUAJIT_ARCH_x64 2 19#define LUAJIT_ARCH_x64 2
20#define LUAJIT_ARCH_ARM 3 20#define LUAJIT_ARCH_ARM 3
21#define LUAJIT_ARCH_arm 3 21#define LUAJIT_ARCH_arm 3
22#define LUAJIT_ARCH_PPC 4 22#define LUAJIT_ARCH_ARM64 4
23#define LUAJIT_ARCH_ppc 4 23#define LUAJIT_ARCH_arm64 4
24#define LUAJIT_ARCH_PPCSPE 5 24#define LUAJIT_ARCH_PPC 5
25#define LUAJIT_ARCH_ppcspe 5 25#define LUAJIT_ARCH_ppc 5
26#define LUAJIT_ARCH_MIPS 6 26#define LUAJIT_ARCH_MIPS 6
27#define LUAJIT_ARCH_mips 6 27#define LUAJIT_ARCH_mips 6
28#define LUAJIT_ARCH_MIPS32 6
29#define LUAJIT_ARCH_mips32 6
30#define LUAJIT_ARCH_MIPS64 7
31#define LUAJIT_ARCH_mips64 7
28 32
29/* Target OS. */ 33/* Target OS. */
30#define LUAJIT_OS_OTHER 0 34#define LUAJIT_OS_OTHER 0
@@ -43,14 +47,14 @@
43#define LUAJIT_TARGET LUAJIT_ARCH_X64 47#define LUAJIT_TARGET LUAJIT_ARCH_X64
44#elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM) 48#elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM)
45#define LUAJIT_TARGET LUAJIT_ARCH_ARM 49#define LUAJIT_TARGET LUAJIT_ARCH_ARM
50#elif defined(__aarch64__)
51#define LUAJIT_TARGET LUAJIT_ARCH_ARM64
46#elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC) 52#elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC)
47#ifdef __NO_FPRS__
48#define LUAJIT_TARGET LUAJIT_ARCH_PPCSPE
49#else
50#define LUAJIT_TARGET LUAJIT_ARCH_PPC 53#define LUAJIT_TARGET LUAJIT_ARCH_PPC
51#endif 54#elif defined(__mips64__) || defined(__mips64) || defined(__MIPS64__) || defined(__MIPS64)
55#define LUAJIT_TARGET LUAJIT_ARCH_MIPS64
52#elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS) 56#elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS)
53#define LUAJIT_TARGET LUAJIT_ARCH_MIPS 57#define LUAJIT_TARGET LUAJIT_ARCH_MIPS32
54#else 58#else
55#error "No support for this architecture (yet)" 59#error "No support for this architecture (yet)"
56#endif 60#endif
@@ -70,7 +74,7 @@
70 defined(__NetBSD__) || defined(__OpenBSD__) || \ 74 defined(__NetBSD__) || defined(__OpenBSD__) || \
71 defined(__DragonFly__)) && !defined(__ORBIS__) 75 defined(__DragonFly__)) && !defined(__ORBIS__)
72#define LUAJIT_OS LUAJIT_OS_BSD 76#define LUAJIT_OS LUAJIT_OS_BSD
73#elif (defined(__sun__) && defined(__svr4__)) 77#elif (defined(__sun__) && defined(__svr4__)) || defined(__HAIKU__)
74#define LUAJIT_OS LUAJIT_OS_POSIX 78#define LUAJIT_OS LUAJIT_OS_POSIX
75#elif defined(__CYGWIN__) 79#elif defined(__CYGWIN__)
76#define LJ_TARGET_CYGWIN 1 80#define LJ_TARGET_CYGWIN 1
@@ -99,7 +103,7 @@
99#define LJ_TARGET_WINDOWS (LUAJIT_OS == LUAJIT_OS_WINDOWS) 103#define LJ_TARGET_WINDOWS (LUAJIT_OS == LUAJIT_OS_WINDOWS)
100#define LJ_TARGET_LINUX (LUAJIT_OS == LUAJIT_OS_LINUX) 104#define LJ_TARGET_LINUX (LUAJIT_OS == LUAJIT_OS_LINUX)
101#define LJ_TARGET_OSX (LUAJIT_OS == LUAJIT_OS_OSX) 105#define LJ_TARGET_OSX (LUAJIT_OS == LUAJIT_OS_OSX)
102#define LJ_TARGET_IOS (LJ_TARGET_OSX && LUAJIT_TARGET == LUAJIT_ARCH_ARM) 106#define LJ_TARGET_IOS (LJ_TARGET_OSX && (LUAJIT_TARGET == LUAJIT_ARCH_ARM || LUAJIT_TARGET == LUAJIT_ARCH_ARM64))
103#define LJ_TARGET_POSIX (LUAJIT_OS > LUAJIT_OS_WINDOWS) 107#define LJ_TARGET_POSIX (LUAJIT_OS > LUAJIT_OS_WINDOWS)
104#define LJ_TARGET_DLOPEN LJ_TARGET_POSIX 108#define LJ_TARGET_DLOPEN LJ_TARGET_POSIX
105 109
@@ -125,6 +129,19 @@
125#define LJ_TARGET_CONSOLE 1 129#define LJ_TARGET_CONSOLE 1
126#endif 130#endif
127 131
132#ifdef _DURANGO
133#define LJ_TARGET_XBOXONE 1
134#define LJ_TARGET_CONSOLE 1
135#define LJ_TARGET_GC64 1
136#endif
137
138#ifdef _UWP
139#define LJ_TARGET_UWP 1
140#if LUAJIT_TARGET == LUAJIT_ARCH_X64
141#define LJ_TARGET_GC64 1
142#endif
143#endif
144
128#define LJ_NUMMODE_SINGLE 0 /* Single-number mode only. */ 145#define LJ_NUMMODE_SINGLE 0 /* Single-number mode only. */
129#define LJ_NUMMODE_SINGLE_DUAL 1 /* Default to single-number mode. */ 146#define LJ_NUMMODE_SINGLE_DUAL 1 /* Default to single-number mode. */
130#define LJ_NUMMODE_DUAL 2 /* Dual-number mode only. */ 147#define LJ_NUMMODE_DUAL 2 /* Dual-number mode only. */
@@ -167,6 +184,9 @@
167#define LJ_TARGET_MASKROT 1 184#define LJ_TARGET_MASKROT 1
168#define LJ_TARGET_UNALIGNED 1 185#define LJ_TARGET_UNALIGNED 1
169#define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE_DUAL 186#define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE_DUAL
187#ifndef LUAJIT_DISABLE_GC64
188#define LJ_TARGET_GC64 1
189#endif
170 190
171#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM 191#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM
172 192
@@ -188,7 +208,7 @@
188#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ 208#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
189#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL 209#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
190 210
191#if __ARM_ARCH____ARM_ARCH_8__ || __ARM_ARCH_8A__ 211#if __ARM_ARCH_8__ || __ARM_ARCH_8A__
192#define LJ_ARCH_VERSION 80 212#define LJ_ARCH_VERSION 80
193#elif __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__ 213#elif __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__
194#define LJ_ARCH_VERSION 70 214#define LJ_ARCH_VERSION 70
@@ -200,22 +220,84 @@
200#define LJ_ARCH_VERSION 50 220#define LJ_ARCH_VERSION 50
201#endif 221#endif
202 222
223#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM64
224
225#define LJ_ARCH_BITS 64
226#if defined(__AARCH64EB__)
227#define LJ_ARCH_NAME "arm64be"
228#define LJ_ARCH_ENDIAN LUAJIT_BE
229#else
230#define LJ_ARCH_NAME "arm64"
231#define LJ_ARCH_ENDIAN LUAJIT_LE
232#endif
233#define LJ_TARGET_ARM64 1
234#define LJ_TARGET_EHRETREG 0
235#define LJ_TARGET_JUMPRANGE 27 /* +-2^27 = +-128MB */
236#define LJ_TARGET_MASKSHIFT 1
237#define LJ_TARGET_MASKROT 1
238#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
239#define LJ_TARGET_GC64 1
240#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
241
242#define LJ_ARCH_VERSION 80
243
203#elif LUAJIT_TARGET == LUAJIT_ARCH_PPC 244#elif LUAJIT_TARGET == LUAJIT_ARCH_PPC
204 245
205#define LJ_ARCH_NAME "ppc" 246#ifndef LJ_ARCH_ENDIAN
247#if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
248#define LJ_ARCH_ENDIAN LUAJIT_LE
249#else
250#define LJ_ARCH_ENDIAN LUAJIT_BE
251#endif
252#endif
253
206#if _LP64 254#if _LP64
207#define LJ_ARCH_BITS 64 255#define LJ_ARCH_BITS 64
256#if LJ_ARCH_ENDIAN == LUAJIT_LE
257#define LJ_ARCH_NAME "ppc64le"
258#else
259#define LJ_ARCH_NAME "ppc64"
260#endif
208#else 261#else
209#define LJ_ARCH_BITS 32 262#define LJ_ARCH_BITS 32
263#define LJ_ARCH_NAME "ppc"
264
265#if !defined(LJ_ARCH_HASFPU)
266#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
267#define LJ_ARCH_HASFPU 0
268#else
269#define LJ_ARCH_HASFPU 1
210#endif 270#endif
211#define LJ_ARCH_ENDIAN LUAJIT_BE 271#endif
272
273#if !defined(LJ_ABI_SOFTFP)
274#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
275#define LJ_ABI_SOFTFP 1
276#else
277#define LJ_ABI_SOFTFP 0
278#endif
279#endif
280#endif
281
282#if LJ_ABI_SOFTFP
283#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
284#else
285#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE
286#endif
287
212#define LJ_TARGET_PPC 1 288#define LJ_TARGET_PPC 1
213#define LJ_TARGET_EHRETREG 3 289#define LJ_TARGET_EHRETREG 3
214#define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */ 290#define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */
215#define LJ_TARGET_MASKSHIFT 0 291#define LJ_TARGET_MASKSHIFT 0
216#define LJ_TARGET_MASKROT 1 292#define LJ_TARGET_MASKROT 1
217#define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */ 293#define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */
218#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE 294
295#if LJ_TARGET_CONSOLE
296#define LJ_ARCH_PPC32ON64 1
297#define LJ_ARCH_NOFFI 1
298#elif LJ_ARCH_BITS == 64
299#error "No support for PPC64"
300#endif
219 301
220#if _ARCH_PWR7 302#if _ARCH_PWR7
221#define LJ_ARCH_VERSION 70 303#define LJ_ARCH_VERSION 70
@@ -230,10 +312,6 @@
230#else 312#else
231#define LJ_ARCH_VERSION 0 313#define LJ_ARCH_VERSION 0
232#endif 314#endif
233#if __PPC64__ || __powerpc64__ || LJ_TARGET_CONSOLE
234#define LJ_ARCH_PPC64 1
235#define LJ_ARCH_NOFFI 1
236#endif
237#if _ARCH_PPCSQ 315#if _ARCH_PPCSQ
238#define LJ_ARCH_SQRT 1 316#define LJ_ARCH_SQRT 1
239#endif 317#endif
@@ -247,44 +325,79 @@
247#define LJ_ARCH_XENON 1 325#define LJ_ARCH_XENON 1
248#endif 326#endif
249 327
250#elif LUAJIT_TARGET == LUAJIT_ARCH_PPCSPE 328#elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 || LUAJIT_TARGET == LUAJIT_ARCH_MIPS64
251
252#define LJ_ARCH_NAME "ppcspe"
253#define LJ_ARCH_BITS 32
254#define LJ_ARCH_ENDIAN LUAJIT_BE
255#ifndef LJ_ABI_SOFTFP
256#define LJ_ABI_SOFTFP 1
257#endif
258#define LJ_ABI_EABI 1
259#define LJ_TARGET_PPCSPE 1
260#define LJ_TARGET_EHRETREG 3
261#define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */
262#define LJ_TARGET_MASKSHIFT 0
263#define LJ_TARGET_MASKROT 1
264#define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */
265#define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE
266#define LJ_ARCH_NOFFI 1 /* NYI: comparisons, calls. */
267#define LJ_ARCH_NOJIT 1
268
269#elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS
270 329
271#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) 330#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL)
331#if __mips_isa_rev >= 6
332#define LJ_TARGET_MIPSR6 1
333#define LJ_TARGET_UNALIGNED 1
334#endif
335#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32
336#if LJ_TARGET_MIPSR6
337#define LJ_ARCH_NAME "mips32r6el"
338#else
272#define LJ_ARCH_NAME "mipsel" 339#define LJ_ARCH_NAME "mipsel"
340#endif
341#else
342#if LJ_TARGET_MIPSR6
343#define LJ_ARCH_NAME "mips64r6el"
344#else
345#define LJ_ARCH_NAME "mips64el"
346#endif
347#endif
273#define LJ_ARCH_ENDIAN LUAJIT_LE 348#define LJ_ARCH_ENDIAN LUAJIT_LE
274#else 349#else
350#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32
351#if LJ_TARGET_MIPSR6
352#define LJ_ARCH_NAME "mips32r6"
353#else
275#define LJ_ARCH_NAME "mips" 354#define LJ_ARCH_NAME "mips"
355#endif
356#else
357#if LJ_TARGET_MIPSR6
358#define LJ_ARCH_NAME "mips64r6"
359#else
360#define LJ_ARCH_NAME "mips64"
361#endif
362#endif
276#define LJ_ARCH_ENDIAN LUAJIT_BE 363#define LJ_ARCH_ENDIAN LUAJIT_BE
277#endif 364#endif
365
366#if !defined(LJ_ARCH_HASFPU)
367#ifdef __mips_soft_float
368#define LJ_ARCH_HASFPU 0
369#else
370#define LJ_ARCH_HASFPU 1
371#endif
372#endif
373
374#if !defined(LJ_ABI_SOFTFP)
375#ifdef __mips_soft_float
376#define LJ_ABI_SOFTFP 1
377#else
378#define LJ_ABI_SOFTFP 0
379#endif
380#endif
381
382#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32
278#define LJ_ARCH_BITS 32 383#define LJ_ARCH_BITS 32
384#define LJ_TARGET_MIPS32 1
385#else
386#define LJ_ARCH_BITS 64
387#define LJ_TARGET_MIPS64 1
388#define LJ_TARGET_GC64 1
389#endif
279#define LJ_TARGET_MIPS 1 390#define LJ_TARGET_MIPS 1
280#define LJ_TARGET_EHRETREG 4 391#define LJ_TARGET_EHRETREG 4
281#define LJ_TARGET_JUMPRANGE 27 /* 2*2^27 = 256MB-aligned region */ 392#define LJ_TARGET_JUMPRANGE 27 /* 2*2^27 = 256MB-aligned region */
282#define LJ_TARGET_MASKSHIFT 1 393#define LJ_TARGET_MASKSHIFT 1
283#define LJ_TARGET_MASKROT 1 394#define LJ_TARGET_MASKROT 1
284#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ 395#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
285#define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE 396#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
286 397
287#if _MIPS_ARCH_MIPS32R2 398#if LJ_TARGET_MIPSR6
399#define LJ_ARCH_VERSION 60
400#elif _MIPS_ARCH_MIPS32R2 || _MIPS_ARCH_MIPS64R2
288#define LJ_ARCH_VERSION 20 401#define LJ_ARCH_VERSION 20
289#else 402#else
290#define LJ_ARCH_VERSION 10 403#define LJ_ARCH_VERSION 10
@@ -312,6 +425,16 @@
312#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2) 425#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2)
313#error "Need at least GCC 4.2 or newer" 426#error "Need at least GCC 4.2 or newer"
314#endif 427#endif
428#elif LJ_TARGET_ARM64
429#if __clang__
430#if ((__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ < 5)) && !defined(__NX_TOOLCHAIN_MAJOR__)
431#error "Need at least Clang 3.5 or newer"
432#endif
433#else
434#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 8)
435#error "Need at least GCC 4.8 or newer"
436#endif
437#endif
315#elif !LJ_TARGET_PS3 438#elif !LJ_TARGET_PS3
316#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 3) 439#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 3)
317#error "Need at least GCC 4.3 or newer" 440#error "Need at least GCC 4.3 or newer"
@@ -335,22 +458,29 @@
335#if !(__ARM_EABI__ || LJ_TARGET_IOS) 458#if !(__ARM_EABI__ || LJ_TARGET_IOS)
336#error "Only ARM EABI or iOS 3.0+ ABI is supported" 459#error "Only ARM EABI or iOS 3.0+ ABI is supported"
337#endif 460#endif
338#elif LJ_TARGET_PPC || LJ_TARGET_PPCSPE 461#elif LJ_TARGET_ARM64
339#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE) 462#if defined(_ILP32)
340#error "No support for PowerPC CPUs without double-precision FPU" 463#error "No support for ILP32 model on ARM64"
341#endif 464#endif
465#elif LJ_TARGET_PPC
342#if defined(_LITTLE_ENDIAN) && (!defined(_BYTE_ORDER) || (_BYTE_ORDER == _LITTLE_ENDIAN)) 466#if defined(_LITTLE_ENDIAN) && (!defined(_BYTE_ORDER) || (_BYTE_ORDER == _LITTLE_ENDIAN))
343#error "No support for little-endian PowerPC" 467#error "No support for little-endian PPC32"
468#endif
469#if defined(__NO_FPRS__) && !defined(_SOFT_FLOAT)
470#error "No support for PPC/e500 anymore (use LuaJIT 2.0)"
344#endif 471#endif
345#if defined(_LP64) 472#elif LJ_TARGET_MIPS32
346#error "No support for PowerPC 64 bit mode" 473#if !((defined(_MIPS_SIM_ABI32) && _MIPS_SIM == _MIPS_SIM_ABI32) || (defined(_ABIO32) && _MIPS_SIM == _ABIO32))
474#error "Only o32 ABI supported for MIPS32"
347#endif 475#endif
348#elif LJ_TARGET_MIPS 476#if LJ_TARGET_MIPSR6
349#if defined(__mips_soft_float) 477/* Not that useful, since most available r6 CPUs are 64 bit. */
350#error "No support for MIPS CPUs without FPU" 478#error "No support for MIPS32R6"
351#endif 479#endif
352#if defined(_LP64) 480#elif LJ_TARGET_MIPS64
353#error "No support for MIPS64" 481#if !((defined(_MIPS_SIM_ABI64) && _MIPS_SIM == _MIPS_SIM_ABI64) || (defined(_ABI64) && _MIPS_SIM == _ABI64))
482/* MIPS32ON64 aka n32 ABI support might be desirable, but difficult. */
483#error "Only n64 ABI supported for MIPS64"
354#endif 484#endif
355#endif 485#endif
356#endif 486#endif
@@ -376,6 +506,20 @@
376#endif 506#endif
377#endif 507#endif
378 508
509/* 64 bit GC references. */
510#if LJ_TARGET_GC64
511#define LJ_GC64 1
512#else
513#define LJ_GC64 0
514#endif
515
516/* 2-slot frame info. */
517#if LJ_GC64
518#define LJ_FR2 1
519#else
520#define LJ_FR2 0
521#endif
522
379/* Disable or enable the JIT compiler. */ 523/* Disable or enable the JIT compiler. */
380#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) 524#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT)
381#define LJ_HASJIT 0 525#define LJ_HASJIT 0
@@ -390,6 +534,21 @@
390#define LJ_HASFFI 1 534#define LJ_HASFFI 1
391#endif 535#endif
392 536
537#if defined(LUAJIT_DISABLE_PROFILE)
538#define LJ_HASPROFILE 0
539#elif LJ_TARGET_POSIX
540#define LJ_HASPROFILE 1
541#define LJ_PROFILE_SIGPROF 1
542#elif LJ_TARGET_PS3
543#define LJ_HASPROFILE 1
544#define LJ_PROFILE_PTHREAD 1
545#elif LJ_TARGET_WINDOWS || LJ_TARGET_XBOX360
546#define LJ_HASPROFILE 1
547#define LJ_PROFILE_WTHREAD 1
548#else
549#define LJ_HASPROFILE 0
550#endif
551
393#ifndef LJ_ARCH_HASFPU 552#ifndef LJ_ARCH_HASFPU
394#define LJ_ARCH_HASFPU 1 553#define LJ_ARCH_HASFPU 1
395#endif 554#endif
@@ -397,6 +556,7 @@
397#define LJ_ABI_SOFTFP 0 556#define LJ_ABI_SOFTFP 0
398#endif 557#endif
399#define LJ_SOFTFP (!LJ_ARCH_HASFPU) 558#define LJ_SOFTFP (!LJ_ARCH_HASFPU)
559#define LJ_SOFTFP32 (LJ_SOFTFP && LJ_32)
400 560
401#if LJ_ARCH_ENDIAN == LUAJIT_BE 561#if LJ_ARCH_ENDIAN == LUAJIT_BE
402#define LJ_LE 0 562#define LJ_LE 0
@@ -422,11 +582,11 @@
422#define LJ_TARGET_UNALIGNED 0 582#define LJ_TARGET_UNALIGNED 0
423#endif 583#endif
424 584
425/* Various workarounds for embedded operating systems. */ 585/* Various workarounds for embedded operating systems or weak C runtimes. */
426#if (defined(__ANDROID__) && !defined(LJ_TARGET_X86ORX64)) || defined(__symbian__) || LJ_TARGET_XBOX360 586#if defined(__ANDROID__) || defined(__symbian__) || LJ_TARGET_XBOX360 || LJ_TARGET_WINDOWS
427#define LUAJIT_NO_LOG2 587#define LUAJIT_NO_LOG2
428#endif 588#endif
429#if defined(__symbian__) 589#if defined(__symbian__) || LJ_TARGET_WINDOWS
430#define LUAJIT_NO_EXP2 590#define LUAJIT_NO_EXP2
431#endif 591#endif
432#if LJ_TARGET_CONSOLE || (LJ_TARGET_IOS && __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_8_0) 592#if LJ_TARGET_CONSOLE || (LJ_TARGET_IOS && __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_8_0)
@@ -442,6 +602,18 @@
442#define LJ_NO_UNWIND 1 602#define LJ_NO_UNWIND 1
443#endif 603#endif
444 604
605#if LJ_TARGET_WINDOWS
606#if LJ_TARGET_UWP
607#define LJ_WIN_VALLOC VirtualAllocFromApp
608#define LJ_WIN_VPROTECT VirtualProtectFromApp
609extern void *LJ_WIN_LOADLIBA(const char *path);
610#else
611#define LJ_WIN_VALLOC VirtualAlloc
612#define LJ_WIN_VPROTECT VirtualProtect
613#define LJ_WIN_LOADLIBA(path) LoadLibraryExA((path), NULL, 0)
614#endif
615#endif
616
445/* Compatibility with Lua 5.1 vs. 5.2. */ 617/* Compatibility with Lua 5.1 vs. 5.2. */
446#ifdef LUAJIT_ENABLE_LUA52COMPAT 618#ifdef LUAJIT_ENABLE_LUA52COMPAT
447#define LJ_52 1 619#define LJ_52 1
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 9b17421e..68d28fb0 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -90,7 +90,7 @@ typedef struct ASMState {
90 MCode *realign; /* Realign loop if not NULL. */ 90 MCode *realign; /* Realign loop if not NULL. */
91 91
92#ifdef RID_NUM_KREF 92#ifdef RID_NUM_KREF
93 int32_t krefk[RID_NUM_KREF]; 93 intptr_t krefk[RID_NUM_KREF];
94#endif 94#endif
95 IRRef1 phireg[RID_MAX]; /* PHI register references. */ 95 IRRef1 phireg[RID_MAX]; /* PHI register references. */
96 uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent instruction to RegSP map. */ 96 uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent instruction to RegSP map. */
@@ -143,7 +143,7 @@ static LJ_AINLINE void checkmclim(ASMState *as)
143#define ra_krefreg(ref) ((Reg)(RID_MIN_KREF + (Reg)(ref))) 143#define ra_krefreg(ref) ((Reg)(RID_MIN_KREF + (Reg)(ref)))
144#define ra_krefk(as, ref) (as->krefk[(ref)]) 144#define ra_krefk(as, ref) (as->krefk[(ref)])
145 145
146static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, int32_t k) 146static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, intptr_t k)
147{ 147{
148 IRRef ref = (IRRef)(r - RID_MIN_KREF); 148 IRRef ref = (IRRef)(r - RID_MIN_KREF);
149 as->krefk[ref] = k; 149 as->krefk[ref] = k;
@@ -170,6 +170,8 @@ IRFLDEF(FLOFS)
170#include "lj_emit_x86.h" 170#include "lj_emit_x86.h"
171#elif LJ_TARGET_ARM 171#elif LJ_TARGET_ARM
172#include "lj_emit_arm.h" 172#include "lj_emit_arm.h"
173#elif LJ_TARGET_ARM64
174#include "lj_emit_arm64.h"
173#elif LJ_TARGET_PPC 175#elif LJ_TARGET_PPC
174#include "lj_emit_ppc.h" 176#include "lj_emit_ppc.h"
175#elif LJ_TARGET_MIPS 177#elif LJ_TARGET_MIPS
@@ -178,6 +180,12 @@ IRFLDEF(FLOFS)
178#error "Missing instruction emitter for target CPU" 180#error "Missing instruction emitter for target CPU"
179#endif 181#endif
180 182
183/* Generic load/store of register from/to stack slot. */
184#define emit_spload(as, ir, r, ofs) \
185 emit_loadofs(as, ir, (r), RID_SP, (ofs))
186#define emit_spstore(as, ir, r, ofs) \
187 emit_storeofs(as, ir, (r), RID_SP, (ofs))
188
181/* -- Register allocator debugging ---------------------------------------- */ 189/* -- Register allocator debugging ---------------------------------------- */
182 190
183/* #define LUAJIT_DEBUG_RA */ 191/* #define LUAJIT_DEBUG_RA */
@@ -315,7 +323,11 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
315 lua_assert(!rset_test(as->freeset, r)); 323 lua_assert(!rset_test(as->freeset, r));
316 ra_free(as, r); 324 ra_free(as, r);
317 ra_modified(as, r); 325 ra_modified(as, r);
326#if LJ_64
327 emit_loadu64(as, r, ra_krefk(as, ref));
328#else
318 emit_loadi(as, r, ra_krefk(as, ref)); 329 emit_loadi(as, r, ra_krefk(as, ref));
330#endif
319 return r; 331 return r;
320 } 332 }
321 ir = IR(ref); 333 ir = IR(ref);
@@ -325,9 +337,9 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
325 ra_modified(as, r); 337 ra_modified(as, r);
326 ir->r = RID_INIT; /* Do not keep any hint. */ 338 ir->r = RID_INIT; /* Do not keep any hint. */
327 RA_DBGX((as, "remat $i $r", ir, r)); 339 RA_DBGX((as, "remat $i $r", ir, r));
328#if !LJ_SOFTFP 340#if !LJ_SOFTFP32
329 if (ir->o == IR_KNUM) { 341 if (ir->o == IR_KNUM) {
330 emit_loadn(as, r, ir_knum(ir)); 342 emit_loadk64(as, r, ir);
331 } else 343 } else
332#endif 344#endif
333 if (emit_canremat(REF_BASE) && ir->o == IR_BASE) { 345 if (emit_canremat(REF_BASE) && ir->o == IR_BASE) {
@@ -335,10 +347,16 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
335 emit_getgl(as, r, jit_base); 347 emit_getgl(as, r, jit_base);
336 } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) { 348 } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) {
337 lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */ 349 lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */
338 emit_getgl(as, r, jit_L); 350 emit_getgl(as, r, cur_L);
339#if LJ_64 351#if LJ_64
340 } else if (ir->o == IR_KINT64) { 352 } else if (ir->o == IR_KINT64) {
341 emit_loadu64(as, r, ir_kint64(ir)->u64); 353 emit_loadu64(as, r, ir_kint64(ir)->u64);
354#if LJ_GC64
355 } else if (ir->o == IR_KGC) {
356 emit_loadu64(as, r, (uintptr_t)ir_kgc(ir));
357 } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
358 emit_loadu64(as, r, (uintptr_t)ir_kptr(ir));
359#endif
342#endif 360#endif
343 } else { 361 } else {
344 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || 362 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
@@ -511,7 +529,7 @@ static void ra_evictk(ASMState *as)
511 529
512#ifdef RID_NUM_KREF 530#ifdef RID_NUM_KREF
513/* Allocate a register for a constant. */ 531/* Allocate a register for a constant. */
514static Reg ra_allock(ASMState *as, int32_t k, RegSet allow) 532static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow)
515{ 533{
516 /* First try to find a register which already holds the same constant. */ 534 /* First try to find a register which already holds the same constant. */
517 RegSet pick, work = ~as->freeset & RSET_GPR; 535 RegSet pick, work = ~as->freeset & RSET_GPR;
@@ -520,9 +538,31 @@ static Reg ra_allock(ASMState *as, int32_t k, RegSet allow)
520 IRRef ref; 538 IRRef ref;
521 r = rset_pickbot(work); 539 r = rset_pickbot(work);
522 ref = regcost_ref(as->cost[r]); 540 ref = regcost_ref(as->cost[r]);
541#if LJ_64
542 if (ref < ASMREF_L) {
543 if (ra_iskref(ref)) {
544 if (k == ra_krefk(as, ref))
545 return r;
546 } else {
547 IRIns *ir = IR(ref);
548 if ((ir->o == IR_KINT64 && k == (int64_t)ir_kint64(ir)->u64) ||
549#if LJ_GC64
550 (ir->o == IR_KINT && k == ir->i) ||
551 (ir->o == IR_KGC && k == (intptr_t)ir_kgc(ir)) ||
552 ((ir->o == IR_KPTR || ir->o == IR_KKPTR) &&
553 k == (intptr_t)ir_kptr(ir))
554#else
555 (ir->o != IR_KINT64 && k == ir->i)
556#endif
557 )
558 return r;
559 }
560 }
561#else
523 if (ref < ASMREF_L && 562 if (ref < ASMREF_L &&
524 k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i)) 563 k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i))
525 return r; 564 return r;
565#endif
526 rset_clear(work, r); 566 rset_clear(work, r);
527 } 567 }
528 pick = as->freeset & allow; 568 pick = as->freeset & allow;
@@ -542,7 +582,7 @@ static Reg ra_allock(ASMState *as, int32_t k, RegSet allow)
542} 582}
543 583
544/* Allocate a specific register for a constant. */ 584/* Allocate a specific register for a constant. */
545static void ra_allockreg(ASMState *as, int32_t k, Reg r) 585static void ra_allockreg(ASMState *as, intptr_t k, Reg r)
546{ 586{
547 Reg kr = ra_allock(as, k, RID2RSET(r)); 587 Reg kr = ra_allock(as, k, RID2RSET(r));
548 if (kr != r) { 588 if (kr != r) {
@@ -612,10 +652,20 @@ static Reg ra_alloc1(ASMState *as, IRRef ref, RegSet allow)
612 return r; 652 return r;
613} 653}
614 654
655/* Add a register rename to the IR. */
656static void ra_addrename(ASMState *as, Reg down, IRRef ref, SnapNo snapno)
657{
658 IRRef ren;
659 lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, snapno);
660 ren = tref_ref(lj_ir_emit(as->J));
661 as->J->cur.ir[ren].r = (uint8_t)down;
662 as->J->cur.ir[ren].s = SPS_NONE;
663}
664
615/* Rename register allocation and emit move. */ 665/* Rename register allocation and emit move. */
616static void ra_rename(ASMState *as, Reg down, Reg up) 666static void ra_rename(ASMState *as, Reg down, Reg up)
617{ 667{
618 IRRef ren, ref = regcost_ref(as->cost[up] = as->cost[down]); 668 IRRef ref = regcost_ref(as->cost[up] = as->cost[down]);
619 IRIns *ir = IR(ref); 669 IRIns *ir = IR(ref);
620 ir->r = (uint8_t)up; 670 ir->r = (uint8_t)up;
621 as->cost[down] = 0; 671 as->cost[down] = 0;
@@ -628,11 +678,7 @@ static void ra_rename(ASMState *as, Reg down, Reg up)
628 RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up)); 678 RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up));
629 emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */ 679 emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */
630 if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */ 680 if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */
631 lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, as->snapno); 681 ra_addrename(as, down, ref, as->snapno);
632 ren = tref_ref(lj_ir_emit(as->J));
633 as->ir = as->T->ir; /* The IR may have been reallocated. */
634 IR(ren)->r = (uint8_t)down;
635 IR(ren)->s = SPS_NONE;
636 } 682 }
637} 683}
638 684
@@ -682,18 +728,22 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref)
682 if (ra_noreg(left)) { 728 if (ra_noreg(left)) {
683 if (irref_isk(lref)) { 729 if (irref_isk(lref)) {
684 if (ir->o == IR_KNUM) { 730 if (ir->o == IR_KNUM) {
685 cTValue *tv = ir_knum(ir);
686 /* FP remat needs a load except for +0. Still better than eviction. */ 731 /* FP remat needs a load except for +0. Still better than eviction. */
687 if (tvispzero(tv) || !(as->freeset & RSET_FPR)) { 732 if (tvispzero(ir_knum(ir)) || !(as->freeset & RSET_FPR)) {
688 emit_loadn(as, dest, tv); 733 emit_loadk64(as, dest, ir);
689 return; 734 return;
690 } 735 }
691#if LJ_64 736#if LJ_64
692 } else if (ir->o == IR_KINT64) { 737 } else if (ir->o == IR_KINT64) {
693 emit_loadu64(as, dest, ir_kint64(ir)->u64); 738 emit_loadk64(as, dest, ir);
739 return;
740#if LJ_GC64
741 } else if (ir->o == IR_KGC || ir->o == IR_KPTR || ir->o == IR_KKPTR) {
742 emit_loadk64(as, dest, ir);
694 return; 743 return;
695#endif 744#endif
696 } else { 745#endif
746 } else if (ir->o != IR_KPRI) {
697 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || 747 lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
698 ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); 748 ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL);
699 emit_loadi(as, dest, ir->i); 749 emit_loadi(as, dest, ir->i);
@@ -934,7 +984,7 @@ static void asm_snap_prep(ASMState *as)
934 } else { 984 } else {
935 /* Process any renames above the highwater mark. */ 985 /* Process any renames above the highwater mark. */
936 for (; as->snaprename < as->T->nins; as->snaprename++) { 986 for (; as->snaprename < as->T->nins; as->snaprename++) {
937 IRIns *ir = IR(as->snaprename); 987 IRIns *ir = &as->T->ir[as->snaprename];
938 if (asm_snap_checkrename(as, ir->op1)) 988 if (asm_snap_checkrename(as, ir->op1))
939 ir->op2 = REF_BIAS-1; /* Kill rename. */ 989 ir->op2 = REF_BIAS-1; /* Kill rename. */
940 } 990 }
@@ -943,44 +993,6 @@ static void asm_snap_prep(ASMState *as)
943 993
944/* -- Miscellaneous helpers ----------------------------------------------- */ 994/* -- Miscellaneous helpers ----------------------------------------------- */
945 995
946/* Collect arguments from CALL* and CARG instructions. */
947static void asm_collectargs(ASMState *as, IRIns *ir,
948 const CCallInfo *ci, IRRef *args)
949{
950 uint32_t n = CCI_NARGS(ci);
951 lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */
952 if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
953 while (n-- > 1) {
954 ir = IR(ir->op1);
955 lua_assert(ir->o == IR_CARG);
956 args[n] = ir->op2 == REF_NIL ? 0 : ir->op2;
957 }
958 args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
959 lua_assert(IR(ir->op1)->o != IR_CARG);
960}
961
962/* Reconstruct CCallInfo flags for CALLX*. */
963static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
964{
965 uint32_t nargs = 0;
966 if (ir->op1 != REF_NIL) { /* Count number of arguments first. */
967 IRIns *ira = IR(ir->op1);
968 nargs++;
969 while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
970 }
971#if LJ_HASFFI
972 if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */
973 CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
974 CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
975 nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
976#if LJ_TARGET_X86
977 nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
978#endif
979 }
980#endif
981 return (nargs | (ir->t.irt << CCI_OTSHIFT));
982}
983
984/* Calculate stack adjustment. */ 996/* Calculate stack adjustment. */
985static int32_t asm_stack_adjust(ASMState *as) 997static int32_t asm_stack_adjust(ASMState *as)
986{ 998{
@@ -1004,7 +1016,11 @@ static uint32_t ir_khash(IRIns *ir)
1004 } else { 1016 } else {
1005 lua_assert(irt_isgcv(ir->t)); 1017 lua_assert(irt_isgcv(ir->t));
1006 lo = u32ptr(ir_kgc(ir)); 1018 lo = u32ptr(ir_kgc(ir));
1019#if LJ_GC64
1020 hi = (uint32_t)(u64ptr(ir_kgc(ir)) >> 32) | (irt_toitype(ir->t) << 15);
1021#else
1007 hi = lo + HASH_BIAS; 1022 hi = lo + HASH_BIAS;
1023#endif
1008 } 1024 }
1009 return hashrot(lo, hi); 1025 return hashrot(lo, hi);
1010} 1026}
@@ -1065,6 +1081,259 @@ static void asm_gcstep(ASMState *as, IRIns *ir)
1065 as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */ 1081 as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */
1066} 1082}
1067 1083
1084/* -- Buffer operations --------------------------------------------------- */
1085
1086static void asm_tvptr(ASMState *as, Reg dest, IRRef ref);
1087
1088static void asm_bufhdr(ASMState *as, IRIns *ir)
1089{
1090 Reg sb = ra_dest(as, ir, RSET_GPR);
1091 if ((ir->op2 & IRBUFHDR_APPEND)) {
1092 /* Rematerialize const buffer pointer instead of likely spill. */
1093 IRIns *irp = IR(ir->op1);
1094 if (!(ra_hasreg(irp->r) || irp == ir-1 ||
1095 (irp == ir-2 && !ra_used(ir-1)))) {
1096 while (!(irp->o == IR_BUFHDR && !(irp->op2 & IRBUFHDR_APPEND)))
1097 irp = IR(irp->op1);
1098 if (irref_isk(irp->op1)) {
1099 ra_weak(as, ra_allocref(as, ir->op1, RSET_GPR));
1100 ir = irp;
1101 }
1102 }
1103 } else {
1104 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
1105 /* Passing ir isn't strictly correct, but it's an IRT_PGC, too. */
1106 emit_storeofs(as, ir, tmp, sb, offsetof(SBuf, p));
1107 emit_loadofs(as, ir, tmp, sb, offsetof(SBuf, b));
1108 }
1109#if LJ_TARGET_X86ORX64
1110 ra_left(as, sb, ir->op1);
1111#else
1112 ra_leftov(as, sb, ir->op1);
1113#endif
1114}
1115
1116static void asm_bufput(ASMState *as, IRIns *ir)
1117{
1118 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr];
1119 IRRef args[3];
1120 IRIns *irs;
1121 int kchar = -129;
1122 args[0] = ir->op1; /* SBuf * */
1123 args[1] = ir->op2; /* GCstr * */
1124 irs = IR(ir->op2);
1125 lua_assert(irt_isstr(irs->t));
1126 if (irs->o == IR_KGC) {
1127 GCstr *s = ir_kstr(irs);
1128 if (s->len == 1) { /* Optimize put of single-char string constant. */
1129 kchar = (int8_t)strdata(s)[0]; /* Signed! */
1130 args[1] = ASMREF_TMP1; /* int, truncated to char */
1131 ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
1132 }
1133 } else if (mayfuse(as, ir->op2) && ra_noreg(irs->r)) {
1134 if (irs->o == IR_TOSTR) { /* Fuse number to string conversions. */
1135 if (irs->op2 == IRTOSTR_NUM) {
1136 args[1] = ASMREF_TMP1; /* TValue * */
1137 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putnum];
1138 } else {
1139 lua_assert(irt_isinteger(IR(irs->op1)->t));
1140 args[1] = irs->op1; /* int */
1141 if (irs->op2 == IRTOSTR_INT)
1142 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putint];
1143 else
1144 ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
1145 }
1146 } else if (irs->o == IR_SNEW) { /* Fuse string allocation. */
1147 args[1] = irs->op1; /* const void * */
1148 args[2] = irs->op2; /* MSize */
1149 ci = &lj_ir_callinfo[IRCALL_lj_buf_putmem];
1150 }
1151 }
1152 asm_setupresult(as, ir, ci); /* SBuf * */
1153 asm_gencall(as, ci, args);
1154 if (args[1] == ASMREF_TMP1) {
1155 Reg tmp = ra_releasetmp(as, ASMREF_TMP1);
1156 if (kchar == -129)
1157 asm_tvptr(as, tmp, irs->op1);
1158 else
1159 ra_allockreg(as, kchar, tmp);
1160 }
1161}
1162
1163static void asm_bufstr(ASMState *as, IRIns *ir)
1164{
1165 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_tostr];
1166 IRRef args[1];
1167 args[0] = ir->op1; /* SBuf *sb */
1168 as->gcsteps++;
1169 asm_setupresult(as, ir, ci); /* GCstr * */
1170 asm_gencall(as, ci, args);
1171}
1172
1173/* -- Type conversions ---------------------------------------------------- */
1174
1175static void asm_tostr(ASMState *as, IRIns *ir)
1176{
1177 const CCallInfo *ci;
1178 IRRef args[2];
1179 args[0] = ASMREF_L;
1180 as->gcsteps++;
1181 if (ir->op2 == IRTOSTR_NUM) {
1182 args[1] = ASMREF_TMP1; /* cTValue * */
1183 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_num];
1184 } else {
1185 args[1] = ir->op1; /* int32_t k */
1186 if (ir->op2 == IRTOSTR_INT)
1187 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_int];
1188 else
1189 ci = &lj_ir_callinfo[IRCALL_lj_strfmt_char];
1190 }
1191 asm_setupresult(as, ir, ci); /* GCstr * */
1192 asm_gencall(as, ci, args);
1193 if (ir->op2 == IRTOSTR_NUM)
1194 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
1195}
1196
1197#if LJ_32 && LJ_HASFFI && !LJ_SOFTFP && !LJ_TARGET_X86
1198static void asm_conv64(ASMState *as, IRIns *ir)
1199{
1200 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
1201 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
1202 IRCallID id;
1203 IRRef args[2];
1204 lua_assert((ir-1)->o == IR_CONV && ir->o == IR_HIOP);
1205 args[LJ_BE] = (ir-1)->op1;
1206 args[LJ_LE] = ir->op1;
1207 if (st == IRT_NUM || st == IRT_FLOAT) {
1208 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
1209 ir--;
1210 } else {
1211 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
1212 }
1213 {
1214#if LJ_TARGET_ARM && !LJ_ABI_SOFTFP
1215 CCallInfo cim = lj_ir_callinfo[id], *ci = &cim;
1216 cim.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */
1217#else
1218 const CCallInfo *ci = &lj_ir_callinfo[id];
1219#endif
1220 asm_setupresult(as, ir, ci);
1221 asm_gencall(as, ci, args);
1222 }
1223}
1224#endif
1225
1226/* -- Memory references --------------------------------------------------- */
1227
1228static void asm_newref(ASMState *as, IRIns *ir)
1229{
1230 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
1231 IRRef args[3];
1232 if (ir->r == RID_SINK)
1233 return;
1234 args[0] = ASMREF_L; /* lua_State *L */
1235 args[1] = ir->op1; /* GCtab *t */
1236 args[2] = ASMREF_TMP1; /* cTValue *key */
1237 asm_setupresult(as, ir, ci); /* TValue * */
1238 asm_gencall(as, ci, args);
1239 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
1240}
1241
1242static void asm_lref(ASMState *as, IRIns *ir)
1243{
1244 Reg r = ra_dest(as, ir, RSET_GPR);
1245#if LJ_TARGET_X86ORX64
1246 ra_left(as, r, ASMREF_L);
1247#else
1248 ra_leftov(as, r, ASMREF_L);
1249#endif
1250}
1251
1252/* -- Calls --------------------------------------------------------------- */
1253
1254/* Collect arguments from CALL* and CARG instructions. */
1255static void asm_collectargs(ASMState *as, IRIns *ir,
1256 const CCallInfo *ci, IRRef *args)
1257{
1258 uint32_t n = CCI_XNARGS(ci);
1259 lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */
1260 if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
1261 while (n-- > 1) {
1262 ir = IR(ir->op1);
1263 lua_assert(ir->o == IR_CARG);
1264 args[n] = ir->op2 == REF_NIL ? 0 : ir->op2;
1265 }
1266 args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
1267 lua_assert(IR(ir->op1)->o != IR_CARG);
1268}
1269
1270/* Reconstruct CCallInfo flags for CALLX*. */
1271static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
1272{
1273 uint32_t nargs = 0;
1274 if (ir->op1 != REF_NIL) { /* Count number of arguments first. */
1275 IRIns *ira = IR(ir->op1);
1276 nargs++;
1277 while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
1278 }
1279#if LJ_HASFFI
1280 if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */
1281 CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
1282 CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
1283 nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
1284#if LJ_TARGET_X86
1285 nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
1286#endif
1287 }
1288#endif
1289 return (nargs | (ir->t.irt << CCI_OTSHIFT));
1290}
1291
1292static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
1293{
1294 const CCallInfo *ci = &lj_ir_callinfo[id];
1295 IRRef args[2];
1296 args[0] = ir->op1;
1297 args[1] = ir->op2;
1298 asm_setupresult(as, ir, ci);
1299 asm_gencall(as, ci, args);
1300}
1301
1302static void asm_call(ASMState *as, IRIns *ir)
1303{
1304 IRRef args[CCI_NARGS_MAX];
1305 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
1306 asm_collectargs(as, ir, ci, args);
1307 asm_setupresult(as, ir, ci);
1308 asm_gencall(as, ci, args);
1309}
1310
1311#if !LJ_SOFTFP32
1312static void asm_fppow(ASMState *as, IRIns *ir, IRRef lref, IRRef rref)
1313{
1314 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
1315 IRRef args[2];
1316 args[0] = lref;
1317 args[1] = rref;
1318 asm_setupresult(as, ir, ci);
1319 asm_gencall(as, ci, args);
1320}
1321
1322static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
1323{
1324 IRIns *irp = IR(ir->op1);
1325 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
1326 IRIns *irpp = IR(irp->op1);
1327 if (irpp == ir-2 && irpp->o == IR_FPMATH &&
1328 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
1329 asm_fppow(as, ir, irpp->op1, irp->op2);
1330 return 1;
1331 }
1332 }
1333 return 0;
1334}
1335#endif
1336
1068/* -- PHI and loop handling ----------------------------------------------- */ 1337/* -- PHI and loop handling ----------------------------------------------- */
1069 1338
1070/* Break a PHI cycle by renaming to a free register (evict if needed). */ 1339/* Break a PHI cycle by renaming to a free register (evict if needed). */
@@ -1250,12 +1519,7 @@ static void asm_phi_fixup(ASMState *as)
1250 irt_clearmark(ir->t); 1519 irt_clearmark(ir->t);
1251 /* Left PHI gained a spill slot before the loop? */ 1520 /* Left PHI gained a spill slot before the loop? */
1252 if (ra_hasspill(ir->s)) { 1521 if (ra_hasspill(ir->s)) {
1253 IRRef ren; 1522 ra_addrename(as, r, lref, as->loopsnapno);
1254 lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), lref, as->loopsnapno);
1255 ren = tref_ref(lj_ir_emit(as->J));
1256 as->ir = as->T->ir; /* The IR may have been reallocated. */
1257 IR(ren)->r = (uint8_t)r;
1258 IR(ren)->s = SPS_NONE;
1259 } 1523 }
1260 } 1524 }
1261 rset_clear(work, r); 1525 rset_clear(work, r);
@@ -1330,6 +1594,8 @@ static void asm_loop(ASMState *as)
1330#include "lj_asm_x86.h" 1594#include "lj_asm_x86.h"
1331#elif LJ_TARGET_ARM 1595#elif LJ_TARGET_ARM
1332#include "lj_asm_arm.h" 1596#include "lj_asm_arm.h"
1597#elif LJ_TARGET_ARM64
1598#include "lj_asm_arm64.h"
1333#elif LJ_TARGET_PPC 1599#elif LJ_TARGET_PPC
1334#include "lj_asm_ppc.h" 1600#include "lj_asm_ppc.h"
1335#elif LJ_TARGET_MIPS 1601#elif LJ_TARGET_MIPS
@@ -1338,6 +1604,136 @@ static void asm_loop(ASMState *as)
1338#error "Missing assembler for target CPU" 1604#error "Missing assembler for target CPU"
1339#endif 1605#endif
1340 1606
1607/* -- Instruction dispatch ------------------------------------------------ */
1608
1609/* Assemble a single instruction. */
1610static void asm_ir(ASMState *as, IRIns *ir)
1611{
1612 switch ((IROp)ir->o) {
1613 /* Miscellaneous ops. */
1614 case IR_LOOP: asm_loop(as); break;
1615 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
1616 case IR_USE:
1617 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
1618 case IR_PHI: asm_phi(as, ir); break;
1619 case IR_HIOP: asm_hiop(as, ir); break;
1620 case IR_GCSTEP: asm_gcstep(as, ir); break;
1621 case IR_PROF: asm_prof(as, ir); break;
1622
1623 /* Guarded assertions. */
1624 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
1625 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
1626 case IR_ABC:
1627 asm_comp(as, ir);
1628 break;
1629 case IR_EQ: case IR_NE:
1630 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
1631 as->curins--;
1632 asm_href(as, ir-1, (IROp)ir->o);
1633 } else {
1634 asm_equal(as, ir);
1635 }
1636 break;
1637
1638 case IR_RETF: asm_retf(as, ir); break;
1639
1640 /* Bit ops. */
1641 case IR_BNOT: asm_bnot(as, ir); break;
1642 case IR_BSWAP: asm_bswap(as, ir); break;
1643 case IR_BAND: asm_band(as, ir); break;
1644 case IR_BOR: asm_bor(as, ir); break;
1645 case IR_BXOR: asm_bxor(as, ir); break;
1646 case IR_BSHL: asm_bshl(as, ir); break;
1647 case IR_BSHR: asm_bshr(as, ir); break;
1648 case IR_BSAR: asm_bsar(as, ir); break;
1649 case IR_BROL: asm_brol(as, ir); break;
1650 case IR_BROR: asm_bror(as, ir); break;
1651
1652 /* Arithmetic ops. */
1653 case IR_ADD: asm_add(as, ir); break;
1654 case IR_SUB: asm_sub(as, ir); break;
1655 case IR_MUL: asm_mul(as, ir); break;
1656 case IR_MOD: asm_mod(as, ir); break;
1657 case IR_NEG: asm_neg(as, ir); break;
1658#if LJ_SOFTFP32
1659 case IR_DIV: case IR_POW: case IR_ABS:
1660 case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT:
1661 lua_assert(0); /* Unused for LJ_SOFTFP32. */
1662 break;
1663#else
1664 case IR_DIV: asm_div(as, ir); break;
1665 case IR_POW: asm_pow(as, ir); break;
1666 case IR_ABS: asm_abs(as, ir); break;
1667 case IR_ATAN2: asm_atan2(as, ir); break;
1668 case IR_LDEXP: asm_ldexp(as, ir); break;
1669 case IR_FPMATH: asm_fpmath(as, ir); break;
1670 case IR_TOBIT: asm_tobit(as, ir); break;
1671#endif
1672 case IR_MIN: asm_min(as, ir); break;
1673 case IR_MAX: asm_max(as, ir); break;
1674
1675 /* Overflow-checking arithmetic ops. */
1676 case IR_ADDOV: asm_addov(as, ir); break;
1677 case IR_SUBOV: asm_subov(as, ir); break;
1678 case IR_MULOV: asm_mulov(as, ir); break;
1679
1680 /* Memory references. */
1681 case IR_AREF: asm_aref(as, ir); break;
1682 case IR_HREF: asm_href(as, ir, 0); break;
1683 case IR_HREFK: asm_hrefk(as, ir); break;
1684 case IR_NEWREF: asm_newref(as, ir); break;
1685 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
1686 case IR_FREF: asm_fref(as, ir); break;
1687 case IR_STRREF: asm_strref(as, ir); break;
1688 case IR_LREF: asm_lref(as, ir); break;
1689
1690 /* Loads and stores. */
1691 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
1692 asm_ahuvload(as, ir);
1693 break;
1694 case IR_FLOAD: asm_fload(as, ir); break;
1695 case IR_XLOAD: asm_xload(as, ir); break;
1696 case IR_SLOAD: asm_sload(as, ir); break;
1697
1698 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
1699 case IR_FSTORE: asm_fstore(as, ir); break;
1700 case IR_XSTORE: asm_xstore(as, ir); break;
1701
1702 /* Allocations. */
1703 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
1704 case IR_TNEW: asm_tnew(as, ir); break;
1705 case IR_TDUP: asm_tdup(as, ir); break;
1706 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
1707
1708 /* Buffer operations. */
1709 case IR_BUFHDR: asm_bufhdr(as, ir); break;
1710 case IR_BUFPUT: asm_bufput(as, ir); break;
1711 case IR_BUFSTR: asm_bufstr(as, ir); break;
1712
1713 /* Write barriers. */
1714 case IR_TBAR: asm_tbar(as, ir); break;
1715 case IR_OBAR: asm_obar(as, ir); break;
1716
1717 /* Type conversions. */
1718 case IR_CONV: asm_conv(as, ir); break;
1719 case IR_TOSTR: asm_tostr(as, ir); break;
1720 case IR_STRTO: asm_strto(as, ir); break;
1721
1722 /* Calls. */
1723 case IR_CALLA:
1724 as->gcsteps++;
1725 /* fallthrough */
1726 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
1727 case IR_CALLXS: asm_callx(as, ir); break;
1728 case IR_CARG: break;
1729
1730 default:
1731 setintV(&as->J->errinfo, ir->o);
1732 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
1733 break;
1734 }
1735}
1736
1341/* -- Head of trace ------------------------------------------------------- */ 1737/* -- Head of trace ------------------------------------------------------- */
1342 1738
1343/* Head of a root trace. */ 1739/* Head of a root trace. */
@@ -1536,7 +1932,7 @@ static BCReg asm_baseslot(ASMState *as, SnapShot *snap, int *gotframe)
1536 SnapEntry sn = map[n-1]; 1932 SnapEntry sn = map[n-1];
1537 if ((sn & SNAP_FRAME)) { 1933 if ((sn & SNAP_FRAME)) {
1538 *gotframe = 1; 1934 *gotframe = 1;
1539 return snap_slot(sn); 1935 return snap_slot(sn) - LJ_FR2;
1540 } 1936 }
1541 } 1937 }
1542 return 0; 1938 return 0;
@@ -1556,19 +1952,23 @@ static void asm_tail_link(ASMState *as)
1556 1952
1557 if (as->T->link == 0) { 1953 if (as->T->link == 0) {
1558 /* Setup fixed registers for exit to interpreter. */ 1954 /* Setup fixed registers for exit to interpreter. */
1559 const BCIns *pc = snap_pc(as->T->snapmap[snap->mapofs + snap->nent]); 1955 const BCIns *pc = snap_pc(&as->T->snapmap[snap->mapofs + snap->nent]);
1560 int32_t mres; 1956 int32_t mres;
1561 if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */ 1957 if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */
1562 BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins; 1958 BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins;
1563 if (bc_isret(bc_op(*retpc))) 1959 if (bc_isret(bc_op(*retpc)))
1564 pc = retpc; 1960 pc = retpc;
1565 } 1961 }
1962#if LJ_GC64
1963 emit_loadu64(as, RID_LPC, u64ptr(pc));
1964#else
1566 ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH); 1965 ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH);
1567 ra_allockreg(as, i32ptr(pc), RID_LPC); 1966 ra_allockreg(as, i32ptr(pc), RID_LPC);
1568 mres = (int32_t)(snap->nslots - baseslot); 1967#endif
1968 mres = (int32_t)(snap->nslots - baseslot - LJ_FR2);
1569 switch (bc_op(*pc)) { 1969 switch (bc_op(*pc)) {
1570 case BC_CALLM: case BC_CALLMT: 1970 case BC_CALLM: case BC_CALLMT:
1571 mres -= (int32_t)(1 + bc_a(*pc) + bc_c(*pc)); break; 1971 mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break;
1572 case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break; 1972 case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break;
1573 case BC_TSETM: mres -= (int32_t)bc_a(*pc); break; 1973 case BC_TSETM: mres -= (int32_t)bc_a(*pc); break;
1574 default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break; 1974 default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break;
@@ -1580,6 +1980,11 @@ static void asm_tail_link(ASMState *as)
1580 } 1980 }
1581 emit_addptr(as, RID_BASE, 8*(int32_t)baseslot); 1981 emit_addptr(as, RID_BASE, 8*(int32_t)baseslot);
1582 1982
1983 if (as->J->ktrace) { /* Patch ktrace slot with the final GCtrace pointer. */
1984 setgcref(IR(as->J->ktrace)[LJ_GC64].gcr, obj2gco(as->J->curfinal));
1985 IR(as->J->ktrace)->o = IR_KGC;
1986 }
1987
1583 /* Sync the interpreter state with the on-trace state. */ 1988 /* Sync the interpreter state with the on-trace state. */
1584 asm_stack_restore(as, snap); 1989 asm_stack_restore(as, snap);
1585 1990
@@ -1605,17 +2010,23 @@ static void asm_setup_regsp(ASMState *as)
1605 ra_setup(as); 2010 ra_setup(as);
1606 2011
1607 /* Clear reg/sp for constants. */ 2012 /* Clear reg/sp for constants. */
1608 for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) 2013 for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) {
1609 ir->prev = REGSP_INIT; 2014 ir->prev = REGSP_INIT;
2015 if (irt_is64(ir->t) && ir->o != IR_KNULL) {
2016#if LJ_GC64
2017 /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */
2018 ir->i = 0; /* Will become non-zero only for RIP-relative addresses. */
2019#else
2020 /* Make life easier for backends by putting address of constant in i. */
2021 ir->i = (int32_t)(intptr_t)(ir+1);
2022#endif
2023 ir++;
2024 }
2025 }
1610 2026
1611 /* REF_BASE is used for implicit references to the BASE register. */ 2027 /* REF_BASE is used for implicit references to the BASE register. */
1612 lastir->prev = REGSP_HINT(RID_BASE); 2028 lastir->prev = REGSP_HINT(RID_BASE);
1613 2029
1614 ir = IR(nins-1);
1615 if (ir->o == IR_RENAME) {
1616 do { ir--; nins--; } while (ir->o == IR_RENAME);
1617 T->nins = nins; /* Remove any renames left over from ASM restart. */
1618 }
1619 as->snaprename = nins; 2030 as->snaprename = nins;
1620 as->snapref = nins; 2031 as->snapref = nins;
1621 as->snapno = T->nsnap; 2032 as->snapno = T->nsnap;
@@ -1676,7 +2087,7 @@ static void asm_setup_regsp(ASMState *as)
1676 as->modset |= RSET_SCRATCH; 2087 as->modset |= RSET_SCRATCH;
1677 continue; 2088 continue;
1678 } 2089 }
1679 case IR_CALLN: case IR_CALLL: case IR_CALLS: { 2090 case IR_CALLN: case IR_CALLA: case IR_CALLL: case IR_CALLS: {
1680 const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; 2091 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
1681 ir->prev = asm_setup_call_slots(as, ir, ci); 2092 ir->prev = asm_setup_call_slots(as, ir, ci);
1682 if (inloop) 2093 if (inloop)
@@ -1701,8 +2112,8 @@ static void asm_setup_regsp(ASMState *as)
1701 ir->prev = REGSP_HINT(RID_FPRET); 2112 ir->prev = REGSP_HINT(RID_FPRET);
1702 continue; 2113 continue;
1703 } 2114 }
1704 /* fallthrough */
1705#endif 2115#endif
2116 /* fallthrough */
1706 case IR_CALLN: case IR_CALLXS: 2117 case IR_CALLN: case IR_CALLXS:
1707#if LJ_SOFTFP 2118#if LJ_SOFTFP
1708 case IR_MIN: case IR_MAX: 2119 case IR_MIN: case IR_MAX:
@@ -1721,11 +2132,23 @@ static void asm_setup_regsp(ASMState *as)
1721#endif 2132#endif
1722 /* fallthrough */ 2133 /* fallthrough */
1723 /* C calls evict all scratch regs and return results in RID_RET. */ 2134 /* C calls evict all scratch regs and return results in RID_RET. */
1724 case IR_SNEW: case IR_XSNEW: case IR_NEWREF: 2135 case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT:
1725 if (REGARG_NUMGPR < 3 && as->evenspill < 3) 2136 if (REGARG_NUMGPR < 3 && as->evenspill < 3)
1726 as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */ 2137 as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */
2138#if LJ_TARGET_X86 && LJ_HASFFI
2139 if (0) {
2140 case IR_CNEW:
2141 if (ir->op2 != REF_NIL && as->evenspill < 4)
2142 as->evenspill = 4; /* lj_cdata_newv needs 4 args. */
2143 }
2144 /* fallthrough */
2145#else
2146 /* fallthrough */
2147 case IR_CNEW:
2148#endif
1727 /* fallthrough */ 2149 /* fallthrough */
1728 case IR_TNEW: case IR_TDUP: case IR_CNEW: case IR_CNEWI: case IR_TOSTR: 2150 case IR_TNEW: case IR_TDUP: case IR_CNEWI: case IR_TOSTR:
2151 case IR_BUFSTR:
1729 ir->prev = REGSP_HINT(RID_RET); 2152 ir->prev = REGSP_HINT(RID_RET);
1730 if (inloop) 2153 if (inloop)
1731 as->modset = RSET_SCRATCH; 2154 as->modset = RSET_SCRATCH;
@@ -1734,21 +2157,27 @@ static void asm_setup_regsp(ASMState *as)
1734 if (inloop) 2157 if (inloop)
1735 as->modset = RSET_SCRATCH; 2158 as->modset = RSET_SCRATCH;
1736 break; 2159 break;
1737#if !LJ_TARGET_X86ORX64 && !LJ_SOFTFP 2160#if !LJ_SOFTFP
1738 case IR_ATAN2: case IR_LDEXP: 2161 case IR_ATAN2:
2162#if LJ_TARGET_X86
2163 if (as->evenspill < 4) /* Leave room to call atan2(). */
2164 as->evenspill = 4;
2165#endif
2166#if !LJ_TARGET_X86ORX64
2167 case IR_LDEXP:
2168#endif
1739#endif 2169#endif
2170 /* fallthrough */
1740 case IR_POW: 2171 case IR_POW:
1741 if (!LJ_SOFTFP && irt_isnum(ir->t)) { 2172 if (!LJ_SOFTFP && irt_isnum(ir->t)) {
1742#if LJ_TARGET_X86ORX64
1743 ir->prev = REGSP_HINT(RID_XMM0);
1744 if (inloop) 2173 if (inloop)
1745 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX); 2174 as->modset |= RSET_SCRATCH;
2175#if LJ_TARGET_X86
2176 break;
1746#else 2177#else
1747 ir->prev = REGSP_HINT(RID_FPRET); 2178 ir->prev = REGSP_HINT(RID_FPRET);
1748 if (inloop)
1749 as->modset |= RSET_SCRATCH;
1750#endif
1751 continue; 2179 continue;
2180#endif
1752 } 2181 }
1753 /* fallthrough */ /* for integer POW */ 2182 /* fallthrough */ /* for integer POW */
1754 case IR_DIV: case IR_MOD: 2183 case IR_DIV: case IR_MOD:
@@ -1761,31 +2190,34 @@ static void asm_setup_regsp(ASMState *as)
1761 break; 2190 break;
1762 case IR_FPMATH: 2191 case IR_FPMATH:
1763#if LJ_TARGET_X86ORX64 2192#if LJ_TARGET_X86ORX64
1764 if (ir->op2 == IRFPM_EXP2) { /* May be joined to lj_vm_pow_sse. */ 2193 if (ir->op2 <= IRFPM_TRUNC) {
1765 ir->prev = REGSP_HINT(RID_XMM0); 2194 if (!(as->flags & JIT_F_SSE4_1)) {
1766#if !LJ_64 2195 ir->prev = REGSP_HINT(RID_XMM0);
1767 if (as->evenspill < 4) /* Leave room for 16 byte scratch area. */ 2196 if (inloop)
2197 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
2198 continue;
2199 }
2200 break;
2201 } else if (ir->op2 == IRFPM_EXP2 && !LJ_64) {
2202 if (as->evenspill < 4) /* Leave room to call pow(). */
1768 as->evenspill = 4; 2203 as->evenspill = 4;
1769#endif
1770 if (inloop)
1771 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX);
1772 continue;
1773 } else if (ir->op2 <= IRFPM_TRUNC && !(as->flags & JIT_F_SSE4_1)) {
1774 ir->prev = REGSP_HINT(RID_XMM0);
1775 if (inloop)
1776 as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
1777 continue;
1778 } 2204 }
2205#endif
2206 if (inloop)
2207 as->modset |= RSET_SCRATCH;
2208#if LJ_TARGET_X86
1779 break; 2209 break;
1780#else 2210#else
1781 ir->prev = REGSP_HINT(RID_FPRET); 2211 ir->prev = REGSP_HINT(RID_FPRET);
1782 if (inloop)
1783 as->modset |= RSET_SCRATCH;
1784 continue; 2212 continue;
1785#endif 2213#endif
1786#if LJ_TARGET_X86ORX64 2214#if LJ_TARGET_X86ORX64
1787 /* Non-constant shift counts need to be in RID_ECX on x86/x64. */ 2215 /* Non-constant shift counts need to be in RID_ECX on x86/x64. */
1788 case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: 2216 case IR_BSHL: case IR_BSHR: case IR_BSAR:
2217 if ((as->flags & JIT_F_BMI2)) /* Except if BMI2 is available. */
2218 break;
2219 /* fallthrough */
2220 case IR_BROL: case IR_BROR:
1789 if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) { 2221 if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) {
1790 IR(ir->op2)->r = REGSP_HINT(RID_ECX); 2222 IR(ir->op2)->r = REGSP_HINT(RID_ECX);
1791 if (inloop) 2223 if (inloop)
@@ -1831,14 +2263,25 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1831 ASMState *as = &as_; 2263 ASMState *as = &as_;
1832 MCode *origtop; 2264 MCode *origtop;
1833 2265
2266 /* Remove nops/renames left over from ASM restart due to LJ_TRERR_MCODELM. */
2267 {
2268 IRRef nins = T->nins;
2269 IRIns *ir = &T->ir[nins-1];
2270 if (ir->o == IR_NOP || ir->o == IR_RENAME) {
2271 do { ir--; nins--; } while (ir->o == IR_NOP || ir->o == IR_RENAME);
2272 T->nins = nins;
2273 }
2274 }
2275
1834 /* Ensure an initialized instruction beyond the last one for HIOP checks. */ 2276 /* Ensure an initialized instruction beyond the last one for HIOP checks. */
1835 J->cur.nins = lj_ir_nextins(J); 2277 /* This also allows one RENAME to be added without reallocating curfinal. */
1836 J->cur.ir[J->cur.nins].o = IR_NOP; 2278 as->orignins = lj_ir_nextins(J);
2279 J->cur.ir[as->orignins].o = IR_NOP;
1837 2280
1838 /* Setup initial state. Copy some fields to reduce indirections. */ 2281 /* Setup initial state. Copy some fields to reduce indirections. */
1839 as->J = J; 2282 as->J = J;
1840 as->T = T; 2283 as->T = T;
1841 as->ir = T->ir; 2284 J->curfinal = lj_trace_alloc(J->L, T); /* This copies the IR, too. */
1842 as->flags = J->flags; 2285 as->flags = J->flags;
1843 as->loopref = J->loopref; 2286 as->loopref = J->loopref;
1844 as->realign = NULL; 2287 as->realign = NULL;
@@ -1851,12 +2294,41 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1851 as->mclim = as->mcbot + MCLIM_REDZONE; 2294 as->mclim = as->mcbot + MCLIM_REDZONE;
1852 asm_setup_target(as); 2295 asm_setup_target(as);
1853 2296
1854 do { 2297 /*
2298 ** This is a loop, because the MCode may have to be (re-)assembled
2299 ** multiple times:
2300 **
2301 ** 1. as->realign is set (and the assembly aborted), if the arch-specific
2302 ** backend wants the MCode to be aligned differently.
2303 **
2304 ** This is currently only the case on x86/x64, where small loops get
2305 ** an aligned loop body plus a short branch. Not much effort is wasted,
2306 ** because the abort happens very quickly and only once.
2307 **
2308 ** 2. The IR is immovable, since the MCode embeds pointers to various
2309 ** constants inside the IR. But RENAMEs may need to be added to the IR
2310 ** during assembly, which might grow and reallocate the IR. We check
2311 ** at the end if the IR (in J->cur.ir) has actually grown, resize the
2312 ** copy (in J->curfinal.ir) and try again.
2313 **
2314 ** 95% of all traces have zero RENAMEs, 3% have one RENAME, 1.5% have
2315 ** 2 RENAMEs and only 0.5% have more than that. That's why we opt to
2316 ** always have one spare slot in the IR (see above), which means we
2317 ** have to redo the assembly for only ~2% of all traces.
2318 **
2319 ** Very, very rarely, this needs to be done repeatedly, since the
2320 ** location of constants inside the IR (actually, reachability from
2321 ** a global pointer) may affect register allocation and thus the
2322 ** number of RENAMEs.
2323 */
2324 for (;;) {
1855 as->mcp = as->mctop; 2325 as->mcp = as->mctop;
1856#ifdef LUA_USE_ASSERT 2326#ifdef LUA_USE_ASSERT
1857 as->mcp_prev = as->mcp; 2327 as->mcp_prev = as->mcp;
1858#endif 2328#endif
1859 as->curins = T->nins; 2329 as->ir = J->curfinal->ir; /* Use the copied IR. */
2330 as->curins = J->cur.nins = as->orignins;
2331
1860 RA_DBG_START(); 2332 RA_DBG_START();
1861 RA_DBGX((as, "===== STOP =====")); 2333 RA_DBGX((as, "===== STOP ====="));
1862 2334
@@ -1884,22 +2356,40 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1884 checkmclim(as); 2356 checkmclim(as);
1885 asm_ir(as, ir); 2357 asm_ir(as, ir);
1886 } 2358 }
1887 } while (as->realign); /* Retry in case the MCode needs to be realigned. */
1888 2359
1889 /* Emit head of trace. */ 2360 if (as->realign && J->curfinal->nins >= T->nins)
1890 RA_DBG_REF(); 2361 continue; /* Retry in case only the MCode needs to be realigned. */
1891 checkmclim(as); 2362
1892 if (as->gcsteps > 0) { 2363 /* Emit head of trace. */
1893 as->curins = as->T->snap[0].ref; 2364 RA_DBG_REF();
1894 asm_snap_prep(as); /* The GC check is a guard. */ 2365 checkmclim(as);
1895 asm_gc_check(as); 2366 if (as->gcsteps > 0) {
2367 as->curins = as->T->snap[0].ref;
2368 asm_snap_prep(as); /* The GC check is a guard. */
2369 asm_gc_check(as);
2370 as->curins = as->stopins;
2371 }
2372 ra_evictk(as);
2373 if (as->parent)
2374 asm_head_side(as);
2375 else
2376 asm_head_root(as);
2377 asm_phi_fixup(as);
2378
2379 if (J->curfinal->nins >= T->nins) { /* IR didn't grow? */
2380 lua_assert(J->curfinal->nk == T->nk);
2381 memcpy(J->curfinal->ir + as->orignins, T->ir + as->orignins,
2382 (T->nins - as->orignins) * sizeof(IRIns)); /* Copy RENAMEs. */
2383 T->nins = J->curfinal->nins;
2384 break; /* Done. */
2385 }
2386
2387 /* Otherwise try again with a bigger IR. */
2388 lj_trace_free(J2G(J), J->curfinal);
2389 J->curfinal = NULL; /* In case lj_trace_alloc() OOMs. */
2390 J->curfinal = lj_trace_alloc(J->L, T);
2391 as->realign = NULL;
1896 } 2392 }
1897 ra_evictk(as);
1898 if (as->parent)
1899 asm_head_side(as);
1900 else
1901 asm_head_root(as);
1902 asm_phi_fixup(as);
1903 2393
1904 RA_DBGX((as, "===== START ====")); 2394 RA_DBGX((as, "===== START ===="));
1905 RA_DBG_FLUSH(); 2395 RA_DBG_FLUSH();
@@ -1912,6 +2402,9 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1912 if (!as->loopref) 2402 if (!as->loopref)
1913 asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */ 2403 asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */
1914 T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp); 2404 T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp);
2405#if LJ_TARGET_MCODE_FIXUP
2406 asm_mcode_fixup(T->mcode, T->szmcode);
2407#endif
1915 lj_mcode_sync(T->mcode, origtop); 2408 lj_mcode_sync(T->mcode, origtop);
1916} 2409}
1917 2410
diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h
index 087530b2..9d055c81 100644
--- a/src/lj_asm_arm.h
+++ b/src/lj_asm_arm.h
@@ -338,7 +338,7 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, ARMIns ai, ARMIns air)
338/* Generate a call to a C function. */ 338/* Generate a call to a C function. */
339static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) 339static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
340{ 340{
341 uint32_t n, nargs = CCI_NARGS(ci); 341 uint32_t n, nargs = CCI_XNARGS(ci);
342 int32_t ofs = 0; 342 int32_t ofs = 0;
343#if LJ_SOFTFP 343#if LJ_SOFTFP
344 Reg gpr = REGARG_FIRSTGPR; 344 Reg gpr = REGARG_FIRSTGPR;
@@ -453,15 +453,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
453 UNUSED(ci); 453 UNUSED(ci);
454} 454}
455 455
456static void asm_call(ASMState *as, IRIns *ir)
457{
458 IRRef args[CCI_NARGS_MAX];
459 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
460 asm_collectargs(as, ir, ci, args);
461 asm_setupresult(as, ir, ci);
462 asm_gencall(as, ci, args);
463}
464
465static void asm_callx(ASMState *as, IRIns *ir) 456static void asm_callx(ASMState *as, IRIns *ir)
466{ 457{
467 IRRef args[CCI_NARGS_MAX*2]; 458 IRRef args[CCI_NARGS_MAX*2];
@@ -490,7 +481,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
490{ 481{
491 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); 482 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
492 void *pc = ir_kptr(IR(ir->op2)); 483 void *pc = ir_kptr(IR(ir->op2));
493 int32_t delta = 1+bc_a(*((const BCIns *)pc - 1)); 484 int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
494 as->topslot -= (BCReg)delta; 485 as->topslot -= (BCReg)delta;
495 if ((int32_t)as->topslot < 0) as->topslot = 0; 486 if ((int32_t)as->topslot < 0) as->topslot = 0;
496 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ 487 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
@@ -601,31 +592,6 @@ static void asm_conv(ASMState *as, IRIns *ir)
601 } 592 }
602} 593}
603 594
604#if !LJ_SOFTFP && LJ_HASFFI
605static void asm_conv64(ASMState *as, IRIns *ir)
606{
607 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
608 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
609 IRCallID id;
610 CCallInfo ci;
611 IRRef args[2];
612 args[0] = (ir-1)->op1;
613 args[1] = ir->op1;
614 if (st == IRT_NUM || st == IRT_FLOAT) {
615 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
616 ir--;
617 } else {
618 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
619 }
620 ci = lj_ir_callinfo[id];
621#if !LJ_ABI_SOFTFP
622 ci.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */
623#endif
624 asm_setupresult(as, ir, &ci);
625 asm_gencall(as, &ci, args);
626}
627#endif
628
629static void asm_strto(ASMState *as, IRIns *ir) 595static void asm_strto(ASMState *as, IRIns *ir)
630{ 596{
631 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; 597 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
@@ -689,6 +655,8 @@ static void asm_strto(ASMState *as, IRIns *ir)
689 emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR); 655 emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR);
690} 656}
691 657
658/* -- Memory references --------------------------------------------------- */
659
692/* Get pointer to TValue. */ 660/* Get pointer to TValue. */
693static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) 661static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
694{ 662{
@@ -714,7 +682,7 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
714 Reg src = ra_alloc1(as, ref, allow); 682 Reg src = ra_alloc1(as, ref, allow);
715 emit_lso(as, ARMI_STR, src, RID_SP, 0); 683 emit_lso(as, ARMI_STR, src, RID_SP, 0);
716 } 684 }
717 if ((ir+1)->o == IR_HIOP) 685 if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
718 type = ra_alloc1(as, ref+1, allow); 686 type = ra_alloc1(as, ref+1, allow);
719 else 687 else
720 type = ra_allock(as, irt_toitype(ir->t), allow); 688 type = ra_allock(as, irt_toitype(ir->t), allow);
@@ -722,27 +690,6 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
722 } 690 }
723} 691}
724 692
725static void asm_tostr(ASMState *as, IRIns *ir)
726{
727 IRRef args[2];
728 args[0] = ASMREF_L;
729 as->gcsteps++;
730 if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
731 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
732 args[1] = ASMREF_TMP1; /* const lua_Number * */
733 asm_setupresult(as, ir, ci); /* GCstr * */
734 asm_gencall(as, ci, args);
735 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
736 } else {
737 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
738 args[1] = ir->op1; /* int32_t k */
739 asm_setupresult(as, ir, ci); /* GCstr * */
740 asm_gencall(as, ci, args);
741 }
742}
743
744/* -- Memory references --------------------------------------------------- */
745
746static void asm_aref(ASMState *as, IRIns *ir) 693static void asm_aref(ASMState *as, IRIns *ir)
747{ 694{
748 Reg dest = ra_dest(as, ir, RSET_GPR); 695 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -960,20 +907,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
960 emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR); 907 emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR);
961} 908}
962 909
963static void asm_newref(ASMState *as, IRIns *ir)
964{
965 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
966 IRRef args[3];
967 if (ir->r == RID_SINK)
968 return;
969 args[0] = ASMREF_L; /* lua_State *L */
970 args[1] = ir->op1; /* GCtab *t */
971 args[2] = ASMREF_TMP1; /* cTValue *key */
972 asm_setupresult(as, ir, ci); /* TValue * */
973 asm_gencall(as, ci, args);
974 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
975}
976
977static void asm_uref(ASMState *as, IRIns *ir) 910static void asm_uref(ASMState *as, IRIns *ir)
978{ 911{
979 Reg dest = ra_dest(as, ir, RSET_GPR); 912 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1064,22 +997,26 @@ static ARMIns asm_fxstoreins(IRIns *ir)
1064 997
1065static void asm_fload(ASMState *as, IRIns *ir) 998static void asm_fload(ASMState *as, IRIns *ir)
1066{ 999{
1067 Reg dest = ra_dest(as, ir, RSET_GPR); 1000 if (ir->op1 == REF_NIL) {
1068 Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); 1001 lua_assert(!ra_used(ir)); /* We can end up here if DCE is turned off. */
1069 ARMIns ai = asm_fxloadins(ir); 1002 } else {
1070 int32_t ofs; 1003 Reg dest = ra_dest(as, ir, RSET_GPR);
1071 if (ir->op2 == IRFL_TAB_ARRAY) { 1004 Reg idx = ra_alloc1(as, ir->op1, RSET_GPR);
1072 ofs = asm_fuseabase(as, ir->op1); 1005 ARMIns ai = asm_fxloadins(ir);
1073 if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ 1006 int32_t ofs;
1074 emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx); 1007 if (ir->op2 == IRFL_TAB_ARRAY) {
1075 return; 1008 ofs = asm_fuseabase(as, ir->op1);
1009 if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
1010 emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx);
1011 return;
1012 }
1076 } 1013 }
1014 ofs = field_ofs[ir->op2];
1015 if ((ai & 0x04000000))
1016 emit_lso(as, ai, dest, idx, ofs);
1017 else
1018 emit_lsox(as, ai, dest, idx, ofs);
1077 } 1019 }
1078 ofs = field_ofs[ir->op2];
1079 if ((ai & 0x04000000))
1080 emit_lso(as, ai, dest, idx, ofs);
1081 else
1082 emit_lsox(as, ai, dest, idx, ofs);
1083} 1020}
1084 1021
1085static void asm_fstore(ASMState *as, IRIns *ir) 1022static void asm_fstore(ASMState *as, IRIns *ir)
@@ -1105,7 +1042,7 @@ static void asm_xload(ASMState *as, IRIns *ir)
1105 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); 1042 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
1106} 1043}
1107 1044
1108static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) 1045static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
1109{ 1046{
1110 if (ir->r != RID_SINK) { 1047 if (ir->r != RID_SINK) {
1111 Reg src = ra_alloc1(as, ir->op2, 1048 Reg src = ra_alloc1(as, ir->op2,
@@ -1115,6 +1052,8 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
1115 } 1052 }
1116} 1053}
1117 1054
1055#define asm_xstore(as, ir) asm_xstore_(as, ir, 0)
1056
1118static void asm_ahuvload(ASMState *as, IRIns *ir) 1057static void asm_ahuvload(ASMState *as, IRIns *ir)
1119{ 1058{
1120 int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); 1059 int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
@@ -1272,19 +1211,16 @@ dotypecheck:
1272static void asm_cnew(ASMState *as, IRIns *ir) 1211static void asm_cnew(ASMState *as, IRIns *ir)
1273{ 1212{
1274 CTState *cts = ctype_ctsG(J2G(as->J)); 1213 CTState *cts = ctype_ctsG(J2G(as->J));
1275 CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; 1214 CTypeID id = (CTypeID)IR(ir->op1)->i;
1276 CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? 1215 CTSize sz;
1277 lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; 1216 CTInfo info = lj_ctype_info(cts, id, &sz);
1278 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; 1217 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
1279 IRRef args[2]; 1218 IRRef args[4];
1280 RegSet allow = (RSET_GPR & ~RSET_SCRATCH); 1219 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1281 RegSet drop = RSET_SCRATCH; 1220 RegSet drop = RSET_SCRATCH;
1282 lua_assert(sz != CTSIZE_INVALID); 1221 lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
1283 1222
1284 args[0] = ASMREF_L; /* lua_State *L */
1285 args[1] = ASMREF_TMP1; /* MSize size */
1286 as->gcsteps++; 1223 as->gcsteps++;
1287
1288 if (ra_hasreg(ir->r)) 1224 if (ra_hasreg(ir->r))
1289 rset_clear(drop, ir->r); /* Dest reg handled below. */ 1225 rset_clear(drop, ir->r); /* Dest reg handled below. */
1290 ra_evictset(as, drop); 1226 ra_evictset(as, drop);
@@ -1306,16 +1242,28 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1306 if (ofs == sizeof(GCcdata)) break; 1242 if (ofs == sizeof(GCcdata)) break;
1307 ofs -= 4; ir--; 1243 ofs -= 4; ir--;
1308 } 1244 }
1245 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
1246 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
1247 args[0] = ASMREF_L; /* lua_State *L */
1248 args[1] = ir->op1; /* CTypeID id */
1249 args[2] = ir->op2; /* CTSize sz */
1250 args[3] = ASMREF_TMP1; /* CTSize align */
1251 asm_gencall(as, ci, args);
1252 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
1253 return;
1309 } 1254 }
1255
1310 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ 1256 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
1311 { 1257 {
1312 uint32_t k = emit_isk12(ARMI_MOV, ctypeid); 1258 uint32_t k = emit_isk12(ARMI_MOV, id);
1313 Reg r = k ? RID_R1 : ra_allock(as, ctypeid, allow); 1259 Reg r = k ? RID_R1 : ra_allock(as, id, allow);
1314 emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct)); 1260 emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct));
1315 emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid)); 1261 emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid));
1316 emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP); 1262 emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP);
1317 if (k) emit_d(as, ARMI_MOV^k, RID_R1); 1263 if (k) emit_d(as, ARMI_MOV^k, RID_R1);
1318 } 1264 }
1265 args[0] = ASMREF_L; /* lua_State *L */
1266 args[1] = ASMREF_TMP1; /* MSize size */
1319 asm_gencall(as, ci, args); 1267 asm_gencall(as, ci, args);
1320 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), 1268 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
1321 ra_releasetmp(as, ASMREF_TMP1)); 1269 ra_releasetmp(as, ASMREF_TMP1));
@@ -1392,23 +1340,38 @@ static void asm_fpunary(ASMState *as, IRIns *ir, ARMIns ai)
1392 emit_dm(as, ai, (dest & 15), (left & 15)); 1340 emit_dm(as, ai, (dest & 15), (left & 15));
1393} 1341}
1394 1342
1395static int asm_fpjoin_pow(ASMState *as, IRIns *ir) 1343static void asm_callround(ASMState *as, IRIns *ir, int id)
1396{ 1344{
1397 IRIns *irp = IR(ir->op1); 1345 /* The modified regs must match with the *.dasc implementation. */
1398 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { 1346 RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)|
1399 IRIns *irpp = IR(irp->op1); 1347 RID2RSET(RID_R3)|RID2RSET(RID_R12);
1400 if (irpp == ir-2 && irpp->o == IR_FPMATH && 1348 RegSet of;
1401 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { 1349 Reg dest, src;
1402 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; 1350 ra_evictset(as, drop);
1403 IRRef args[2]; 1351 dest = ra_dest(as, ir, RSET_FPR);
1404 args[0] = irpp->op1; 1352 emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15));
1405 args[1] = irp->op2; 1353 emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf :
1406 asm_setupresult(as, ir, ci); 1354 id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf :
1407 asm_gencall(as, ci, args); 1355 (void *)lj_vm_trunc_sf);
1408 return 1; 1356 /* Workaround to protect argument GPRs from being used for remat. */
1409 } 1357 of = as->freeset;
1410 } 1358 as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1);
1411 return 0; 1359 as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L);
1360 src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */
1361 as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1));
1362 emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15));
1363}
1364
1365static void asm_fpmath(ASMState *as, IRIns *ir)
1366{
1367 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
1368 return;
1369 if (ir->op2 <= IRFPM_TRUNC)
1370 asm_callround(as, ir, ir->op2);
1371 else if (ir->op2 == IRFPM_SQRT)
1372 asm_fpunary(as, ir, ARMI_VSQRT_D);
1373 else
1374 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
1412} 1375}
1413#endif 1376#endif
1414 1377
@@ -1474,19 +1437,6 @@ static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai)
1474 asm_intop(as, ir, asm_drop_cmp0(as, ai)); 1437 asm_intop(as, ir, asm_drop_cmp0(as, ai));
1475} 1438}
1476 1439
1477static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
1478{
1479 ai = asm_drop_cmp0(as, ai);
1480 if (ir->op2 == 0) {
1481 Reg dest = ra_dest(as, ir, RSET_GPR);
1482 uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
1483 emit_d(as, ai^m, dest);
1484 } else {
1485 /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */
1486 asm_intop(as, ir, ai);
1487 }
1488}
1489
1490static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai) 1440static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai)
1491{ 1441{
1492 Reg dest = ra_dest(as, ir, RSET_GPR); 1442 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1552,6 +1502,20 @@ static void asm_mul(ASMState *as, IRIns *ir)
1552 asm_intmul(as, ir); 1502 asm_intmul(as, ir);
1553} 1503}
1554 1504
1505#define asm_addov(as, ir) asm_add(as, ir)
1506#define asm_subov(as, ir) asm_sub(as, ir)
1507#define asm_mulov(as, ir) asm_mul(as, ir)
1508
1509#if !LJ_SOFTFP
1510#define asm_div(as, ir) asm_fparith(as, ir, ARMI_VDIV_D)
1511#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi)
1512#define asm_abs(as, ir) asm_fpunary(as, ir, ARMI_VABS_D)
1513#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2)
1514#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
1515#endif
1516
1517#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi)
1518
1555static void asm_neg(ASMState *as, IRIns *ir) 1519static void asm_neg(ASMState *as, IRIns *ir)
1556{ 1520{
1557#if !LJ_SOFTFP 1521#if !LJ_SOFTFP
@@ -1563,41 +1527,22 @@ static void asm_neg(ASMState *as, IRIns *ir)
1563 asm_intneg(as, ir, ARMI_RSB); 1527 asm_intneg(as, ir, ARMI_RSB);
1564} 1528}
1565 1529
1566static void asm_callid(ASMState *as, IRIns *ir, IRCallID id) 1530static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
1567{ 1531{
1568 const CCallInfo *ci = &lj_ir_callinfo[id]; 1532 ai = asm_drop_cmp0(as, ai);
1569 IRRef args[2]; 1533 if (ir->op2 == 0) {
1570 args[0] = ir->op1; 1534 Reg dest = ra_dest(as, ir, RSET_GPR);
1571 args[1] = ir->op2; 1535 uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
1572 asm_setupresult(as, ir, ci); 1536 emit_d(as, ai^m, dest);
1573 asm_gencall(as, ci, args); 1537 } else {
1538 /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */
1539 asm_intop(as, ir, ai);
1540 }
1574} 1541}
1575 1542
1576#if !LJ_SOFTFP 1543#define asm_bnot(as, ir) asm_bitop(as, ir, ARMI_MVN)
1577static void asm_callround(ASMState *as, IRIns *ir, int id)
1578{
1579 /* The modified regs must match with the *.dasc implementation. */
1580 RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)|
1581 RID2RSET(RID_R3)|RID2RSET(RID_R12);
1582 RegSet of;
1583 Reg dest, src;
1584 ra_evictset(as, drop);
1585 dest = ra_dest(as, ir, RSET_FPR);
1586 emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15));
1587 emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf :
1588 id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf :
1589 (void *)lj_vm_trunc_sf);
1590 /* Workaround to protect argument GPRs from being used for remat. */
1591 of = as->freeset;
1592 as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1);
1593 as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L);
1594 src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */
1595 as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1));
1596 emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15));
1597}
1598#endif
1599 1544
1600static void asm_bitswap(ASMState *as, IRIns *ir) 1545static void asm_bswap(ASMState *as, IRIns *ir)
1601{ 1546{
1602 Reg dest = ra_dest(as, ir, RSET_GPR); 1547 Reg dest = ra_dest(as, ir, RSET_GPR);
1603 Reg left = ra_alloc1(as, ir->op1, RSET_GPR); 1548 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
@@ -1614,6 +1559,10 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
1614 } 1559 }
1615} 1560}
1616 1561
1562#define asm_band(as, ir) asm_bitop(as, ir, ARMI_AND)
1563#define asm_bor(as, ir) asm_bitop(as, ir, ARMI_ORR)
1564#define asm_bxor(as, ir) asm_bitop(as, ir, ARMI_EOR)
1565
1617static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh) 1566static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
1618{ 1567{
1619 if (irref_isk(ir->op2)) { /* Constant shifts. */ 1568 if (irref_isk(ir->op2)) { /* Constant shifts. */
@@ -1631,6 +1580,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
1631 } 1580 }
1632} 1581}
1633 1582
1583#define asm_bshl(as, ir) asm_bitshift(as, ir, ARMSH_LSL)
1584#define asm_bshr(as, ir) asm_bitshift(as, ir, ARMSH_LSR)
1585#define asm_bsar(as, ir) asm_bitshift(as, ir, ARMSH_ASR)
1586#define asm_bror(as, ir) asm_bitshift(as, ir, ARMSH_ROR)
1587#define asm_brol(as, ir) lua_assert(0)
1588
1634static void asm_intmin_max(ASMState *as, IRIns *ir, int cc) 1589static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
1635{ 1590{
1636 uint32_t kcmp = 0, kmov = 0; 1591 uint32_t kcmp = 0, kmov = 0;
@@ -1704,6 +1659,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc, int fcc)
1704 asm_intmin_max(as, ir, cc); 1659 asm_intmin_max(as, ir, cc);
1705} 1660}
1706 1661
1662#define asm_min(as, ir) asm_min_max(as, ir, CC_GT, CC_HI)
1663#define asm_max(as, ir) asm_min_max(as, ir, CC_LT, CC_LO)
1664
1707/* -- Comparisons --------------------------------------------------------- */ 1665/* -- Comparisons --------------------------------------------------------- */
1708 1666
1709/* Map of comparisons to flags. ORDER IR. */ 1667/* Map of comparisons to flags. ORDER IR. */
@@ -1819,6 +1777,18 @@ notst:
1819 as->flagmcp = as->mcp; /* Allow elimination of the compare. */ 1777 as->flagmcp = as->mcp; /* Allow elimination of the compare. */
1820} 1778}
1821 1779
1780static void asm_comp(ASMState *as, IRIns *ir)
1781{
1782#if !LJ_SOFTFP
1783 if (irt_isnum(ir->t))
1784 asm_fpcomp(as, ir);
1785 else
1786#endif
1787 asm_intcomp(as, ir);
1788}
1789
1790#define asm_equal(as, ir) asm_comp(as, ir)
1791
1822#if LJ_HASFFI 1792#if LJ_HASFFI
1823/* 64 bit integer comparisons. */ 1793/* 64 bit integer comparisons. */
1824static void asm_int64comp(ASMState *as, IRIns *ir) 1794static void asm_int64comp(ASMState *as, IRIns *ir)
@@ -1893,7 +1863,7 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1893#endif 1863#endif
1894 } else if ((ir-1)->o == IR_XSTORE) { 1864 } else if ((ir-1)->o == IR_XSTORE) {
1895 if ((ir-1)->r != RID_SINK) 1865 if ((ir-1)->r != RID_SINK)
1896 asm_xstore(as, ir, 4); 1866 asm_xstore_(as, ir, 4);
1897 return; 1867 return;
1898 } 1868 }
1899 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ 1869 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
@@ -1941,6 +1911,16 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1941#endif 1911#endif
1942} 1912}
1943 1913
1914/* -- Profiling ----------------------------------------------------------- */
1915
1916static void asm_prof(ASMState *as, IRIns *ir)
1917{
1918 UNUSED(ir);
1919 asm_guardcc(as, CC_NE);
1920 emit_n(as, ARMI_TST|ARMI_K12|HOOK_PROFILE, RID_TMP);
1921 emit_lsptr(as, ARMI_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask);
1922}
1923
1944/* -- Stack handling ------------------------------------------------------ */ 1924/* -- Stack handling ------------------------------------------------------ */
1945 1925
1946/* Check Lua stack size for overflow. Use exit handler as fallback. */ 1926/* Check Lua stack size for overflow. Use exit handler as fallback. */
@@ -1970,7 +1950,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
1970 emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, 1950 emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP,
1971 (int32_t)offsetof(lua_State, maxstack)); 1951 (int32_t)offsetof(lua_State, maxstack));
1972 if (irp) { /* Must not spill arbitrary registers in head of side trace. */ 1952 if (irp) { /* Must not spill arbitrary registers in head of side trace. */
1973 int32_t i = i32ptr(&J2G(as->J)->jit_L); 1953 int32_t i = i32ptr(&J2G(as->J)->cur_L);
1974 if (ra_hasspill(irp->s)) 1954 if (ra_hasspill(irp->s))
1975 emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s)); 1955 emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s));
1976 emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095)); 1956 emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095));
@@ -1978,7 +1958,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
1978 emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */ 1958 emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */
1979 emit_loadi(as, RID_TMP, (i & ~4095)); 1959 emit_loadi(as, RID_TMP, (i & ~4095));
1980 } else { 1960 } else {
1981 emit_getgl(as, RID_TMP, jit_L); 1961 emit_getgl(as, RID_TMP, cur_L);
1982 } 1962 }
1983} 1963}
1984 1964
@@ -2087,13 +2067,13 @@ static void asm_loop_fixup(ASMState *as)
2087 2067
2088/* -- Head of trace ------------------------------------------------------- */ 2068/* -- Head of trace ------------------------------------------------------- */
2089 2069
2090/* Reload L register from g->jit_L. */ 2070/* Reload L register from g->cur_L. */
2091static void asm_head_lreg(ASMState *as) 2071static void asm_head_lreg(ASMState *as)
2092{ 2072{
2093 IRIns *ir = IR(ASMREF_L); 2073 IRIns *ir = IR(ASMREF_L);
2094 if (ra_used(ir)) { 2074 if (ra_used(ir)) {
2095 Reg r = ra_dest(as, ir, RSET_GPR); 2075 Reg r = ra_dest(as, ir, RSET_GPR);
2096 emit_getgl(as, r, jit_L); 2076 emit_getgl(as, r, cur_L);
2097 ra_evictk(as); 2077 ra_evictk(as);
2098 } 2078 }
2099} 2079}
@@ -2164,143 +2144,13 @@ static void asm_tail_prep(ASMState *as)
2164 *p = 0; /* Prevent load/store merging. */ 2144 *p = 0; /* Prevent load/store merging. */
2165} 2145}
2166 2146
2167/* -- Instruction dispatch ------------------------------------------------ */
2168
2169/* Assemble a single instruction. */
2170static void asm_ir(ASMState *as, IRIns *ir)
2171{
2172 switch ((IROp)ir->o) {
2173 /* Miscellaneous ops. */
2174 case IR_LOOP: asm_loop(as); break;
2175 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
2176 case IR_USE:
2177 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
2178 case IR_PHI: asm_phi(as, ir); break;
2179 case IR_HIOP: asm_hiop(as, ir); break;
2180 case IR_GCSTEP: asm_gcstep(as, ir); break;
2181
2182 /* Guarded assertions. */
2183 case IR_EQ: case IR_NE:
2184 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
2185 as->curins--;
2186 asm_href(as, ir-1, (IROp)ir->o);
2187 break;
2188 }
2189 /* fallthrough */
2190 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
2191 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
2192 case IR_ABC:
2193#if !LJ_SOFTFP
2194 if (irt_isnum(ir->t)) { asm_fpcomp(as, ir); break; }
2195#endif
2196 asm_intcomp(as, ir);
2197 break;
2198
2199 case IR_RETF: asm_retf(as, ir); break;
2200
2201 /* Bit ops. */
2202 case IR_BNOT: asm_bitop(as, ir, ARMI_MVN); break;
2203 case IR_BSWAP: asm_bitswap(as, ir); break;
2204
2205 case IR_BAND: asm_bitop(as, ir, ARMI_AND); break;
2206 case IR_BOR: asm_bitop(as, ir, ARMI_ORR); break;
2207 case IR_BXOR: asm_bitop(as, ir, ARMI_EOR); break;
2208
2209 case IR_BSHL: asm_bitshift(as, ir, ARMSH_LSL); break;
2210 case IR_BSHR: asm_bitshift(as, ir, ARMSH_LSR); break;
2211 case IR_BSAR: asm_bitshift(as, ir, ARMSH_ASR); break;
2212 case IR_BROR: asm_bitshift(as, ir, ARMSH_ROR); break;
2213 case IR_BROL: lua_assert(0); break;
2214
2215 /* Arithmetic ops. */
2216 case IR_ADD: case IR_ADDOV: asm_add(as, ir); break;
2217 case IR_SUB: case IR_SUBOV: asm_sub(as, ir); break;
2218 case IR_MUL: case IR_MULOV: asm_mul(as, ir); break;
2219 case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
2220 case IR_NEG: asm_neg(as, ir); break;
2221
2222#if LJ_SOFTFP
2223 case IR_DIV: case IR_POW: case IR_ABS:
2224 case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT:
2225 lua_assert(0); /* Unused for LJ_SOFTFP. */
2226 break;
2227#else
2228 case IR_DIV: asm_fparith(as, ir, ARMI_VDIV_D); break;
2229 case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
2230 case IR_ABS: asm_fpunary(as, ir, ARMI_VABS_D); break;
2231 case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
2232 case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
2233 case IR_FPMATH:
2234 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
2235 break;
2236 if (ir->op2 <= IRFPM_TRUNC)
2237 asm_callround(as, ir, ir->op2);
2238 else if (ir->op2 == IRFPM_SQRT)
2239 asm_fpunary(as, ir, ARMI_VSQRT_D);
2240 else
2241 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
2242 break;
2243 case IR_TOBIT: asm_tobit(as, ir); break;
2244#endif
2245
2246 case IR_MIN: asm_min_max(as, ir, CC_GT, CC_HI); break;
2247 case IR_MAX: asm_min_max(as, ir, CC_LT, CC_LO); break;
2248
2249 /* Memory references. */
2250 case IR_AREF: asm_aref(as, ir); break;
2251 case IR_HREF: asm_href(as, ir, 0); break;
2252 case IR_HREFK: asm_hrefk(as, ir); break;
2253 case IR_NEWREF: asm_newref(as, ir); break;
2254 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
2255 case IR_FREF: asm_fref(as, ir); break;
2256 case IR_STRREF: asm_strref(as, ir); break;
2257
2258 /* Loads and stores. */
2259 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2260 asm_ahuvload(as, ir);
2261 break;
2262 case IR_FLOAD: asm_fload(as, ir); break;
2263 case IR_XLOAD: asm_xload(as, ir); break;
2264 case IR_SLOAD: asm_sload(as, ir); break;
2265
2266 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
2267 case IR_FSTORE: asm_fstore(as, ir); break;
2268 case IR_XSTORE: asm_xstore(as, ir, 0); break;
2269
2270 /* Allocations. */
2271 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
2272 case IR_TNEW: asm_tnew(as, ir); break;
2273 case IR_TDUP: asm_tdup(as, ir); break;
2274 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
2275
2276 /* Write barriers. */
2277 case IR_TBAR: asm_tbar(as, ir); break;
2278 case IR_OBAR: asm_obar(as, ir); break;
2279
2280 /* Type conversions. */
2281 case IR_CONV: asm_conv(as, ir); break;
2282 case IR_TOSTR: asm_tostr(as, ir); break;
2283 case IR_STRTO: asm_strto(as, ir); break;
2284
2285 /* Calls. */
2286 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
2287 case IR_CALLXS: asm_callx(as, ir); break;
2288 case IR_CARG: break;
2289
2290 default:
2291 setintV(&as->J->errinfo, ir->o);
2292 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
2293 break;
2294 }
2295}
2296
2297/* -- Trace setup --------------------------------------------------------- */ 2147/* -- Trace setup --------------------------------------------------------- */
2298 2148
2299/* Ensure there are enough stack slots for call arguments. */ 2149/* Ensure there are enough stack slots for call arguments. */
2300static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) 2150static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
2301{ 2151{
2302 IRRef args[CCI_NARGS_MAX*2]; 2152 IRRef args[CCI_NARGS_MAX*2];
2303 uint32_t i, nargs = (int)CCI_NARGS(ci); 2153 uint32_t i, nargs = CCI_XNARGS(ci);
2304 int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0; 2154 int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0;
2305 asm_collectargs(as, ir, ci, args); 2155 asm_collectargs(as, ir, ci, args);
2306 for (i = 0; i < nargs; i++) { 2156 for (i = 0; i < nargs; i++) {
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
new file mode 100644
index 00000000..ce2100c9
--- /dev/null
+++ b/src/lj_asm_arm64.h
@@ -0,0 +1,2043 @@
1/*
2** ARM64 IR assembler (SSA IR -> machine code).
3** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h
4**
5** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
6** Sponsored by Cisco Systems, Inc.
7*/
8
9/* -- Register allocator extensions --------------------------------------- */
10
11/* Allocate a register with a hint. */
12static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow)
13{
14 Reg r = IR(ref)->r;
15 if (ra_noreg(r)) {
16 if (!ra_hashint(r) && !iscrossref(as, ref))
17 ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */
18 r = ra_allocref(as, ref, allow);
19 }
20 ra_noweak(as, r);
21 return r;
22}
23
24/* Allocate two source registers for three-operand instructions. */
25static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
26{
27 IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
28 Reg left = irl->r, right = irr->r;
29 if (ra_hasreg(left)) {
30 ra_noweak(as, left);
31 if (ra_noreg(right))
32 right = ra_allocref(as, ir->op2, rset_exclude(allow, left));
33 else
34 ra_noweak(as, right);
35 } else if (ra_hasreg(right)) {
36 ra_noweak(as, right);
37 left = ra_allocref(as, ir->op1, rset_exclude(allow, right));
38 } else if (ra_hashint(right)) {
39 right = ra_allocref(as, ir->op2, allow);
40 left = ra_alloc1(as, ir->op1, rset_exclude(allow, right));
41 } else {
42 left = ra_allocref(as, ir->op1, allow);
43 right = ra_alloc1(as, ir->op2, rset_exclude(allow, left));
44 }
45 return left | (right << 8);
46}
47
48/* -- Guard handling ------------------------------------------------------ */
49
50/* Setup all needed exit stubs. */
51static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
52{
53 ExitNo i;
54 MCode *mxp = as->mctop;
55 if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim)
56 asm_mclimit(as);
57 /* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ... */
58 for (i = nexits-1; (int32_t)i >= 0; i--)
59 *--mxp = A64I_LE(A64I_BL | A64F_S26(-3-i));
60 *--mxp = A64I_LE(A64I_MOVZw | A64F_U16(as->T->traceno));
61 mxp--;
62 *mxp = A64I_LE(A64I_BL | A64F_S26(((MCode *)(void *)lj_vm_exit_handler-mxp)));
63 *--mxp = A64I_LE(A64I_STRx | A64F_D(RID_LR) | A64F_N(RID_SP));
64 as->mctop = mxp;
65}
66
67static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno)
68{
69 /* Keep this in-sync with exitstub_trace_addr(). */
70 return as->mctop + exitno + 3;
71}
72
73/* Emit conditional branch to exit for guard. */
74static void asm_guardcc(ASMState *as, A64CC cc)
75{
76 MCode *target = asm_exitstub_addr(as, as->snapno);
77 MCode *p = as->mcp;
78 if (LJ_UNLIKELY(p == as->invmcp)) {
79 as->loopinv = 1;
80 *p = A64I_B | A64F_S26(target-p);
81 emit_cond_branch(as, cc^1, p-1);
82 return;
83 }
84 emit_cond_branch(as, cc, target);
85}
86
87/* Emit test and branch instruction to exit for guard. */
88static void asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit)
89{
90 MCode *target = asm_exitstub_addr(as, as->snapno);
91 MCode *p = as->mcp;
92 if (LJ_UNLIKELY(p == as->invmcp)) {
93 as->loopinv = 1;
94 *p = A64I_B | A64F_S26(target-p);
95 emit_tnb(as, ai^0x01000000u, r, bit, p-1);
96 return;
97 }
98 emit_tnb(as, ai, r, bit, target);
99}
100
101/* Emit compare and branch instruction to exit for guard. */
102static void asm_guardcnb(ASMState *as, A64Ins ai, Reg r)
103{
104 MCode *target = asm_exitstub_addr(as, as->snapno);
105 MCode *p = as->mcp;
106 if (LJ_UNLIKELY(p == as->invmcp)) {
107 as->loopinv = 1;
108 *p = A64I_B | A64F_S26(target-p);
109 emit_cnb(as, ai^0x01000000u, r, p-1);
110 return;
111 }
112 emit_cnb(as, ai, r, target);
113}
114
115/* -- Operand fusion ------------------------------------------------------ */
116
117/* Limit linear search to this distance. Avoids O(n^2) behavior. */
118#define CONFLICT_SEARCH_LIM 31
119
120static int asm_isk32(ASMState *as, IRRef ref, int32_t *k)
121{
122 if (irref_isk(ref)) {
123 IRIns *ir = IR(ref);
124 if (ir->o == IR_KNULL || !irt_is64(ir->t)) {
125 *k = ir->i;
126 return 1;
127 } else if (checki32((int64_t)ir_k64(ir)->u64)) {
128 *k = (int32_t)ir_k64(ir)->u64;
129 return 1;
130 }
131 }
132 return 0;
133}
134
135/* Check if there's no conflicting instruction between curins and ref. */
136static int noconflict(ASMState *as, IRRef ref, IROp conflict)
137{
138 IRIns *ir = as->ir;
139 IRRef i = as->curins;
140 if (i > ref + CONFLICT_SEARCH_LIM)
141 return 0; /* Give up, ref is too far away. */
142 while (--i > ref)
143 if (ir[i].o == conflict)
144 return 0; /* Conflict found. */
145 return 1; /* Ok, no conflict. */
146}
147
148/* Fuse the array base of colocated arrays. */
149static int32_t asm_fuseabase(ASMState *as, IRRef ref)
150{
151 IRIns *ir = IR(ref);
152 if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE &&
153 !neverfuse(as) && noconflict(as, ref, IR_NEWREF))
154 return (int32_t)sizeof(GCtab);
155 return 0;
156}
157
158#define FUSE_REG 0x40000000
159
160/* Fuse array/hash/upvalue reference into register+offset operand. */
161static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow,
162 A64Ins ins)
163{
164 IRIns *ir = IR(ref);
165 if (ra_noreg(ir->r)) {
166 if (ir->o == IR_AREF) {
167 if (mayfuse(as, ref)) {
168 if (irref_isk(ir->op2)) {
169 IRRef tab = IR(ir->op1)->op1;
170 int32_t ofs = asm_fuseabase(as, tab);
171 IRRef refa = ofs ? tab : ir->op1;
172 ofs += 8*IR(ir->op2)->i;
173 if (emit_checkofs(ins, ofs)) {
174 *ofsp = ofs;
175 return ra_alloc1(as, refa, allow);
176 }
177 } else {
178 Reg base = ra_alloc1(as, ir->op1, allow);
179 *ofsp = FUSE_REG|ra_alloc1(as, ir->op2, rset_exclude(allow, base));
180 return base;
181 }
182 }
183 } else if (ir->o == IR_HREFK) {
184 if (mayfuse(as, ref)) {
185 int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node));
186 if (emit_checkofs(ins, ofs)) {
187 *ofsp = ofs;
188 return ra_alloc1(as, ir->op1, allow);
189 }
190 }
191 } else if (ir->o == IR_UREFC) {
192 if (irref_isk(ir->op1)) {
193 GCfunc *fn = ir_kfunc(IR(ir->op1));
194 GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv;
195 int64_t ofs = glofs(as, &uv->tv);
196 if (emit_checkofs(ins, ofs)) {
197 *ofsp = (int32_t)ofs;
198 return RID_GL;
199 }
200 }
201 }
202 }
203 *ofsp = 0;
204 return ra_alloc1(as, ref, allow);
205}
206
207/* Fuse m operand into arithmetic/logic instructions. */
208static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow)
209{
210 IRIns *ir = IR(ref);
211 if (ra_hasreg(ir->r)) {
212 ra_noweak(as, ir->r);
213 return A64F_M(ir->r);
214 } else if (irref_isk(ref)) {
215 uint32_t m;
216 int64_t k = get_k64val(ir);
217 if ((ai & 0x1f000000) == 0x0a000000)
218 m = emit_isk13(k, irt_is64(ir->t));
219 else
220 m = emit_isk12(k);
221 if (m)
222 return m;
223 } else if (mayfuse(as, ref)) {
224 if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR && irref_isk(ir->op2)) ||
225 (ir->o == IR_ADD && ir->op1 == ir->op2)) {
226 A64Shift sh = ir->o == IR_BSHR ? A64SH_LSR :
227 ir->o == IR_BSAR ? A64SH_ASR : A64SH_LSL;
228 int shift = ir->o == IR_ADD ? 1 :
229 (IR(ir->op2)->i & (irt_is64(ir->t) ? 63 : 31));
230 IRIns *irl = IR(ir->op1);
231 if (sh == A64SH_LSL &&
232 irl->o == IR_CONV &&
233 irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) &&
234 shift <= 4 &&
235 canfuse(as, irl)) {
236 Reg m = ra_alloc1(as, irl->op1, allow);
237 return A64F_M(m) | A64F_EXSH(A64EX_SXTW, shift);
238 } else {
239 Reg m = ra_alloc1(as, ir->op1, allow);
240 return A64F_M(m) | A64F_SH(sh, shift);
241 }
242 } else if (ir->o == IR_CONV &&
243 ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)) {
244 Reg m = ra_alloc1(as, ir->op1, allow);
245 return A64F_M(m) | A64F_EX(A64EX_SXTW);
246 }
247 }
248 return A64F_M(ra_allocref(as, ref, allow));
249}
250
251/* Fuse XLOAD/XSTORE reference into load/store operand. */
252static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef ref,
253 RegSet allow)
254{
255 IRIns *ir = IR(ref);
256 Reg base;
257 int32_t ofs = 0;
258 if (ra_noreg(ir->r) && canfuse(as, ir)) {
259 if (ir->o == IR_ADD) {
260 if (asm_isk32(as, ir->op2, &ofs) && emit_checkofs(ai, ofs)) {
261 ref = ir->op1;
262 } else {
263 Reg rn, rm;
264 IRRef lref = ir->op1, rref = ir->op2;
265 IRIns *irl = IR(lref);
266 if (mayfuse(as, irl->op1)) {
267 unsigned int shift = 4;
268 if (irl->o == IR_BSHL && irref_isk(irl->op2)) {
269 shift = (IR(irl->op2)->i & 63);
270 } else if (irl->o == IR_ADD && irl->op1 == irl->op2) {
271 shift = 1;
272 }
273 if ((ai >> 30) == shift) {
274 lref = irl->op1;
275 irl = IR(lref);
276 ai |= A64I_LS_SH;
277 }
278 }
279 if (irl->o == IR_CONV &&
280 irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) &&
281 canfuse(as, irl)) {
282 lref = irl->op1;
283 ai |= A64I_LS_SXTWx;
284 } else {
285 ai |= A64I_LS_LSLx;
286 }
287 rm = ra_alloc1(as, lref, allow);
288 rn = ra_alloc1(as, rref, rset_exclude(allow, rm));
289 emit_dnm(as, (ai^A64I_LS_R), (rd & 31), rn, rm);
290 return;
291 }
292 } else if (ir->o == IR_STRREF) {
293 if (asm_isk32(as, ir->op2, &ofs)) {
294 ref = ir->op1;
295 } else if (asm_isk32(as, ir->op1, &ofs)) {
296 ref = ir->op2;
297 } else {
298 Reg refk = irref_isk(ir->op1) ? ir->op1 : ir->op2;
299 Reg refv = irref_isk(ir->op1) ? ir->op2 : ir->op1;
300 Reg rn = ra_alloc1(as, refv, allow);
301 IRIns *irr = IR(refk);
302 uint32_t m;
303 if (irr+1 == ir && !ra_used(irr) &&
304 irr->o == IR_ADD && irref_isk(irr->op2)) {
305 ofs = sizeof(GCstr) + IR(irr->op2)->i;
306 if (emit_checkofs(ai, ofs)) {
307 Reg rm = ra_alloc1(as, irr->op1, rset_exclude(allow, rn));
308 m = A64F_M(rm) | A64F_EX(A64EX_SXTW);
309 goto skipopm;
310 }
311 }
312 m = asm_fuseopm(as, 0, refk, rset_exclude(allow, rn));
313 ofs = sizeof(GCstr);
314 skipopm:
315 emit_lso(as, ai, rd, rd, ofs);
316 emit_dn(as, A64I_ADDx^m, rd, rn);
317 return;
318 }
319 ofs += sizeof(GCstr);
320 if (!emit_checkofs(ai, ofs)) {
321 Reg rn = ra_alloc1(as, ref, allow);
322 Reg rm = ra_allock(as, ofs, rset_exclude(allow, rn));
323 emit_dnm(as, (ai^A64I_LS_R)|A64I_LS_UXTWx, rd, rn, rm);
324 return;
325 }
326 }
327 }
328 base = ra_alloc1(as, ref, allow);
329 emit_lso(as, ai, (rd & 31), base, ofs);
330}
331
332/* Fuse FP multiply-add/sub. */
333static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air)
334{
335 IRRef lref = ir->op1, rref = ir->op2;
336 IRIns *irm;
337 if (lref != rref &&
338 ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) &&
339 ra_noreg(irm->r)) ||
340 (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) &&
341 (rref = lref, ai = air, ra_noreg(irm->r))))) {
342 Reg dest = ra_dest(as, ir, RSET_FPR);
343 Reg add = ra_hintalloc(as, rref, dest, RSET_FPR);
344 Reg left = ra_alloc2(as, irm,
345 rset_exclude(rset_exclude(RSET_FPR, dest), add));
346 Reg right = (left >> 8); left &= 255;
347 emit_dnma(as, ai, (dest & 31), (left & 31), (right & 31), (add & 31));
348 return 1;
349 }
350 return 0;
351}
352
353/* Fuse BAND + BSHL/BSHR into UBFM. */
354static int asm_fuseandshift(ASMState *as, IRIns *ir)
355{
356 IRIns *irl = IR(ir->op1);
357 lua_assert(ir->o == IR_BAND);
358 if (canfuse(as, irl) && irref_isk(ir->op2)) {
359 uint64_t mask = get_k64val(IR(ir->op2));
360 if (irref_isk(irl->op2) && (irl->o == IR_BSHR || irl->o == IR_BSHL)) {
361 int32_t shmask = irt_is64(irl->t) ? 63 : 31;
362 int32_t shift = (IR(irl->op2)->i & shmask);
363 int32_t imms = shift;
364 if (irl->o == IR_BSHL) {
365 mask >>= shift;
366 shift = (shmask-shift+1) & shmask;
367 imms = 0;
368 }
369 if (mask && !((mask+1) & mask)) { /* Contiguous 1-bits at the bottom. */
370 Reg dest = ra_dest(as, ir, RSET_GPR);
371 Reg left = ra_alloc1(as, irl->op1, RSET_GPR);
372 A64Ins ai = shmask == 63 ? A64I_UBFMx : A64I_UBFMw;
373 imms += 63 - emit_clz64(mask);
374 if (imms > shmask) imms = shmask;
375 emit_dn(as, ai | A64F_IMMS(imms) | A64F_IMMR(shift), dest, left);
376 return 1;
377 }
378 }
379 }
380 return 0;
381}
382
383/* Fuse BOR(BSHL, BSHR) into EXTR/ROR. */
384static int asm_fuseorshift(ASMState *as, IRIns *ir)
385{
386 IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
387 lua_assert(ir->o == IR_BOR);
388 if (canfuse(as, irl) && canfuse(as, irr) &&
389 ((irl->o == IR_BSHR && irr->o == IR_BSHL) ||
390 (irl->o == IR_BSHL && irr->o == IR_BSHR))) {
391 if (irref_isk(irl->op2) && irref_isk(irr->op2)) {
392 IRRef lref = irl->op1, rref = irr->op1;
393 uint32_t lshift = IR(irl->op2)->i, rshift = IR(irr->op2)->i;
394 if (irl->o == IR_BSHR) { /* BSHR needs to be the right operand. */
395 uint32_t tmp2;
396 IRRef tmp1 = lref; lref = rref; rref = tmp1;
397 tmp2 = lshift; lshift = rshift; rshift = tmp2;
398 }
399 if (rshift + lshift == (irt_is64(ir->t) ? 64 : 32)) {
400 A64Ins ai = irt_is64(ir->t) ? A64I_EXTRx : A64I_EXTRw;
401 Reg dest = ra_dest(as, ir, RSET_GPR);
402 Reg left = ra_alloc1(as, lref, RSET_GPR);
403 Reg right = ra_alloc1(as, rref, rset_exclude(RSET_GPR, left));
404 emit_dnm(as, ai | A64F_IMMS(rshift), dest, left, right);
405 return 1;
406 }
407 }
408 }
409 return 0;
410}
411
412/* -- Calls --------------------------------------------------------------- */
413
414/* Generate a call to a C function. */
415static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
416{
417 uint32_t n, nargs = CCI_XNARGS(ci);
418 int32_t ofs = 0;
419 Reg gpr, fpr = REGARG_FIRSTFPR;
420 if ((void *)ci->func)
421 emit_call(as, (void *)ci->func);
422 for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++)
423 as->cost[gpr] = REGCOST(~0u, ASMREF_L);
424 gpr = REGARG_FIRSTGPR;
425 for (n = 0; n < nargs; n++) { /* Setup args. */
426 IRRef ref = args[n];
427 IRIns *ir = IR(ref);
428 if (ref) {
429 if (irt_isfp(ir->t)) {
430 if (fpr <= REGARG_LASTFPR) {
431 lua_assert(rset_test(as->freeset, fpr)); /* Must have been evicted. */
432 ra_leftov(as, fpr, ref);
433 fpr++;
434 } else {
435 Reg r = ra_alloc1(as, ref, RSET_FPR);
436 emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_isnum(ir->t)) ? 4 : 0));
437 ofs += 8;
438 }
439 } else {
440 if (gpr <= REGARG_LASTGPR) {
441 lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */
442 ra_leftov(as, gpr, ref);
443 gpr++;
444 } else {
445 Reg r = ra_alloc1(as, ref, RSET_GPR);
446 emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_is64(ir->t)) ? 4 : 0));
447 ofs += 8;
448 }
449 }
450 }
451 }
452}
453
454/* Setup result reg/sp for call. Evict scratch regs. */
455static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
456{
457 RegSet drop = RSET_SCRATCH;
458 if (ra_hasreg(ir->r))
459 rset_clear(drop, ir->r); /* Dest reg handled below. */
460 ra_evictset(as, drop); /* Evictions must be performed first. */
461 if (ra_used(ir)) {
462 lua_assert(!irt_ispri(ir->t));
463 if (irt_isfp(ir->t)) {
464 if (ci->flags & CCI_CASTU64) {
465 Reg dest = ra_dest(as, ir, RSET_FPR) & 31;
466 emit_dn(as, irt_isnum(ir->t) ? A64I_FMOV_D_R : A64I_FMOV_S_R,
467 dest, RID_RET);
468 } else {
469 ra_destreg(as, ir, RID_FPRET);
470 }
471 } else {
472 ra_destreg(as, ir, RID_RET);
473 }
474 }
475 UNUSED(ci);
476}
477
478static void asm_callx(ASMState *as, IRIns *ir)
479{
480 IRRef args[CCI_NARGS_MAX*2];
481 CCallInfo ci;
482 IRRef func;
483 IRIns *irf;
484 ci.flags = asm_callx_flags(as, ir);
485 asm_collectargs(as, ir, &ci, args);
486 asm_setupresult(as, ir, &ci);
487 func = ir->op2; irf = IR(func);
488 if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
489 if (irref_isk(func)) { /* Call to constant address. */
490 ci.func = (ASMFunction)(ir_k64(irf)->u64);
491 } else { /* Need a non-argument register for indirect calls. */
492 Reg freg = ra_alloc1(as, func, RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED);
493 emit_n(as, A64I_BLR, freg);
494 ci.func = (ASMFunction)(void *)0;
495 }
496 asm_gencall(as, &ci, args);
497}
498
499/* -- Returns ------------------------------------------------------------- */
500
501/* Return to lower frame. Guard that it goes to the right spot. */
502static void asm_retf(ASMState *as, IRIns *ir)
503{
504 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
505 void *pc = ir_kptr(IR(ir->op2));
506 int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
507 as->topslot -= (BCReg)delta;
508 if ((int32_t)as->topslot < 0) as->topslot = 0;
509 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
510 /* Need to force a spill on REF_BASE now to update the stack slot. */
511 emit_lso(as, A64I_STRx, base, RID_SP, ra_spill(as, IR(REF_BASE)));
512 emit_setgl(as, base, jit_base);
513 emit_addptr(as, base, -8*delta);
514 asm_guardcc(as, CC_NE);
515 emit_nm(as, A64I_CMPx, RID_TMP,
516 ra_allock(as, i64ptr(pc), rset_exclude(RSET_GPR, base)));
517 emit_lso(as, A64I_LDRx, RID_TMP, base, -8);
518}
519
520/* -- Type conversions ---------------------------------------------------- */
521
522static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
523{
524 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
525 Reg dest = ra_dest(as, ir, RSET_GPR);
526 asm_guardcc(as, CC_NE);
527 emit_nm(as, A64I_FCMPd, (tmp & 31), (left & 31));
528 emit_dn(as, A64I_FCVT_F64_S32, (tmp & 31), dest);
529 emit_dn(as, A64I_FCVT_S32_F64, dest, (left & 31));
530}
531
532static void asm_tobit(ASMState *as, IRIns *ir)
533{
534 RegSet allow = RSET_FPR;
535 Reg left = ra_alloc1(as, ir->op1, allow);
536 Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left));
537 Reg tmp = ra_scratch(as, rset_clear(allow, right));
538 Reg dest = ra_dest(as, ir, RSET_GPR);
539 emit_dn(as, A64I_FMOV_R_S, dest, (tmp & 31));
540 emit_dnm(as, A64I_FADDd, (tmp & 31), (left & 31), (right & 31));
541}
542
543static void asm_conv(ASMState *as, IRIns *ir)
544{
545 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
546 int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64);
547 int stfp = (st == IRT_NUM || st == IRT_FLOAT);
548 IRRef lref = ir->op1;
549 lua_assert(irt_type(ir->t) != st);
550 if (irt_isfp(ir->t)) {
551 Reg dest = ra_dest(as, ir, RSET_FPR);
552 if (stfp) { /* FP to FP conversion. */
553 emit_dn(as, st == IRT_NUM ? A64I_FCVT_F32_F64 : A64I_FCVT_F64_F32,
554 (dest & 31), (ra_alloc1(as, lref, RSET_FPR) & 31));
555 } else { /* Integer to FP conversion. */
556 Reg left = ra_alloc1(as, lref, RSET_GPR);
557 A64Ins ai = irt_isfloat(ir->t) ?
558 (((IRT_IS64 >> st) & 1) ?
559 (st == IRT_I64 ? A64I_FCVT_F32_S64 : A64I_FCVT_F32_U64) :
560 (st == IRT_INT ? A64I_FCVT_F32_S32 : A64I_FCVT_F32_U32)) :
561 (((IRT_IS64 >> st) & 1) ?
562 (st == IRT_I64 ? A64I_FCVT_F64_S64 : A64I_FCVT_F64_U64) :
563 (st == IRT_INT ? A64I_FCVT_F64_S32 : A64I_FCVT_F64_U32));
564 emit_dn(as, ai, (dest & 31), left);
565 }
566 } else if (stfp) { /* FP to integer conversion. */
567 if (irt_isguard(ir->t)) {
568 /* Checked conversions are only supported from number to int. */
569 lua_assert(irt_isint(ir->t) && st == IRT_NUM);
570 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
571 } else {
572 Reg left = ra_alloc1(as, lref, RSET_FPR);
573 Reg dest = ra_dest(as, ir, RSET_GPR);
574 A64Ins ai = irt_is64(ir->t) ?
575 (st == IRT_NUM ?
576 (irt_isi64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_U64_F64) :
577 (irt_isi64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_U64_F32)) :
578 (st == IRT_NUM ?
579 (irt_isint(ir->t) ? A64I_FCVT_S32_F64 : A64I_FCVT_U32_F64) :
580 (irt_isint(ir->t) ? A64I_FCVT_S32_F32 : A64I_FCVT_U32_F32));
581 emit_dn(as, ai, dest, (left & 31));
582 }
583 } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
584 Reg dest = ra_dest(as, ir, RSET_GPR);
585 Reg left = ra_alloc1(as, lref, RSET_GPR);
586 A64Ins ai = st == IRT_I8 ? A64I_SXTBw :
587 st == IRT_U8 ? A64I_UXTBw :
588 st == IRT_I16 ? A64I_SXTHw : A64I_UXTHw;
589 lua_assert(irt_isint(ir->t) || irt_isu32(ir->t));
590 emit_dn(as, ai, dest, left);
591 } else {
592 Reg dest = ra_dest(as, ir, RSET_GPR);
593 if (irt_is64(ir->t)) {
594 if (st64 || !(ir->op2 & IRCONV_SEXT)) {
595 /* 64/64 bit no-op (cast) or 32 to 64 bit zero extension. */
596 ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */
597 } else { /* 32 to 64 bit sign extension. */
598 Reg left = ra_alloc1(as, lref, RSET_GPR);
599 emit_dn(as, A64I_SXTW, dest, left);
600 }
601 } else {
602 if (st64) {
603 /* This is either a 32 bit reg/reg mov which zeroes the hiword
604 ** or a load of the loword from a 64 bit address.
605 */
606 Reg left = ra_alloc1(as, lref, RSET_GPR);
607 emit_dm(as, A64I_MOVw, dest, left);
608 } else { /* 32/32 bit no-op (cast). */
609 ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */
610 }
611 }
612 }
613}
614
615static void asm_strto(ASMState *as, IRIns *ir)
616{
617 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
618 IRRef args[2];
619 Reg dest = 0, tmp;
620 int destused = ra_used(ir);
621 int32_t ofs = 0;
622 ra_evictset(as, RSET_SCRATCH);
623 if (destused) {
624 if (ra_hasspill(ir->s)) {
625 ofs = sps_scale(ir->s);
626 destused = 0;
627 if (ra_hasreg(ir->r)) {
628 ra_free(as, ir->r);
629 ra_modified(as, ir->r);
630 emit_spload(as, ir, ir->r, ofs);
631 }
632 } else {
633 dest = ra_dest(as, ir, RSET_FPR);
634 }
635 }
636 if (destused)
637 emit_lso(as, A64I_LDRd, (dest & 31), RID_SP, 0);
638 asm_guardcnb(as, A64I_CBZ, RID_RET);
639 args[0] = ir->op1; /* GCstr *str */
640 args[1] = ASMREF_TMP1; /* TValue *n */
641 asm_gencall(as, ci, args);
642 tmp = ra_releasetmp(as, ASMREF_TMP1);
643 emit_opk(as, A64I_ADDx, tmp, RID_SP, ofs, RSET_GPR);
644}
645
646/* -- Memory references --------------------------------------------------- */
647
648/* Store tagged value for ref at base+ofs. */
649static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref)
650{
651 RegSet allow = rset_exclude(RSET_GPR, base);
652 IRIns *ir = IR(ref);
653 lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t));
654 if (irref_isk(ref)) {
655 TValue k;
656 lj_ir_kvalue(as->J->L, &k, ir);
657 emit_lso(as, A64I_STRx, ra_allock(as, k.u64, allow), base, ofs);
658 } else {
659 Reg src = ra_alloc1(as, ref, allow);
660 rset_clear(allow, src);
661 if (irt_isinteger(ir->t)) {
662 Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow);
663 emit_lso(as, A64I_STRx, RID_TMP, base, ofs);
664 emit_dnm(as, A64I_ADDx | A64F_EX(A64EX_UXTW), RID_TMP, type, src);
665 } else {
666 Reg type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
667 emit_lso(as, A64I_STRx, RID_TMP, base, ofs);
668 emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), RID_TMP, src, type);
669 }
670 }
671}
672
673/* Get pointer to TValue. */
674static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
675{
676 IRIns *ir = IR(ref);
677 if (irt_isnum(ir->t)) {
678 if (irref_isk(ref)) {
679 /* Use the number constant itself as a TValue. */
680 ra_allockreg(as, i64ptr(ir_knum(ir)), dest);
681 } else {
682 /* Otherwise force a spill and use the spill slot. */
683 emit_opk(as, A64I_ADDx, dest, RID_SP, ra_spill(as, ir), RSET_GPR);
684 }
685 } else {
686 /* Otherwise use g->tmptv to hold the TValue. */
687 asm_tvstore64(as, dest, 0, ref);
688 ra_allockreg(as, i64ptr(&J2G(as->J)->tmptv), dest);
689 }
690}
691
692static void asm_aref(ASMState *as, IRIns *ir)
693{
694 Reg dest = ra_dest(as, ir, RSET_GPR);
695 Reg idx, base;
696 if (irref_isk(ir->op2)) {
697 IRRef tab = IR(ir->op1)->op1;
698 int32_t ofs = asm_fuseabase(as, tab);
699 IRRef refa = ofs ? tab : ir->op1;
700 uint32_t k = emit_isk12(ofs + 8*IR(ir->op2)->i);
701 if (k) {
702 base = ra_alloc1(as, refa, RSET_GPR);
703 emit_dn(as, A64I_ADDx^k, dest, base);
704 return;
705 }
706 }
707 base = ra_alloc1(as, ir->op1, RSET_GPR);
708 idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base));
709 emit_dnm(as, A64I_ADDx | A64F_EXSH(A64EX_UXTW, 3), dest, base, idx);
710}
711
712/* Inlined hash lookup. Specialized for key type and for const keys.
713** The equivalent C code is:
714** Node *n = hashkey(t, key);
715** do {
716** if (lj_obj_equal(&n->key, key)) return &n->val;
717** } while ((n = nextnode(n)));
718** return niltv(L);
719*/
720static void asm_href(ASMState *as, IRIns *ir, IROp merge)
721{
722 RegSet allow = RSET_GPR;
723 int destused = ra_used(ir);
724 Reg dest = ra_dest(as, ir, allow);
725 Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
726 Reg key = 0, tmp = RID_TMP;
727 Reg ftmp = RID_NONE, type = RID_NONE, scr = RID_NONE, tisnum = RID_NONE;
728 IRRef refkey = ir->op2;
729 IRIns *irkey = IR(refkey);
730 int isk = irref_isk(ir->op2);
731 IRType1 kt = irkey->t;
732 uint32_t k = 0;
733 uint32_t khash;
734 MCLabel l_end, l_loop, l_next;
735 rset_clear(allow, tab);
736
737 if (!isk) {
738 key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow);
739 rset_clear(allow, key);
740 if (!irt_isstr(kt)) {
741 tmp = ra_scratch(as, allow);
742 rset_clear(allow, tmp);
743 }
744 } else if (irt_isnum(kt)) {
745 int64_t val = (int64_t)ir_knum(irkey)->u64;
746 if (!(k = emit_isk12(val))) {
747 key = ra_allock(as, val, allow);
748 rset_clear(allow, key);
749 }
750 } else if (!irt_ispri(kt)) {
751 if (!(k = emit_isk12(irkey->i))) {
752 key = ra_alloc1(as, refkey, allow);
753 rset_clear(allow, key);
754 }
755 }
756
757 /* Allocate constants early. */
758 if (irt_isnum(kt)) {
759 if (!isk) {
760 tisnum = ra_allock(as, LJ_TISNUM << 15, allow);
761 ftmp = ra_scratch(as, rset_exclude(RSET_FPR, key));
762 rset_clear(allow, tisnum);
763 }
764 } else if (irt_isaddr(kt)) {
765 if (isk) {
766 int64_t kk = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64;
767 scr = ra_allock(as, kk, allow);
768 } else {
769 scr = ra_scratch(as, allow);
770 }
771 rset_clear(allow, scr);
772 } else {
773 lua_assert(irt_ispri(kt) && !irt_isnil(kt));
774 type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow);
775 scr = ra_scratch(as, rset_clear(allow, type));
776 rset_clear(allow, scr);
777 }
778
779 /* Key not found in chain: jump to exit (if merged) or load niltv. */
780 l_end = emit_label(as);
781 as->invmcp = NULL;
782 if (merge == IR_NE)
783 asm_guardcc(as, CC_AL);
784 else if (destused)
785 emit_loada(as, dest, niltvg(J2G(as->J)));
786
787 /* Follow hash chain until the end. */
788 l_loop = --as->mcp;
789 emit_n(as, A64I_CMPx^A64I_K12^0, dest);
790 emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next));
791 l_next = emit_label(as);
792
793 /* Type and value comparison. */
794 if (merge == IR_EQ)
795 asm_guardcc(as, CC_EQ);
796 else
797 emit_cond_branch(as, CC_EQ, l_end);
798
799 if (irt_isnum(kt)) {
800 if (isk) {
801 /* Assumes -0.0 is already canonicalized to +0.0. */
802 if (k)
803 emit_n(as, A64I_CMPx^k, tmp);
804 else
805 emit_nm(as, A64I_CMPx, key, tmp);
806 emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64));
807 } else {
808 emit_nm(as, A64I_FCMPd, key, ftmp);
809 emit_dn(as, A64I_FMOV_D_R, (ftmp & 31), (tmp & 31));
810 emit_cond_branch(as, CC_LO, l_next);
811 emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), tisnum, tmp);
812 emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.n));
813 }
814 } else if (irt_isaddr(kt)) {
815 if (isk) {
816 emit_nm(as, A64I_CMPx, scr, tmp);
817 emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64));
818 } else {
819 emit_nm(as, A64I_CMPx, tmp, scr);
820 emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key.u64));
821 }
822 } else {
823 emit_nm(as, A64I_CMPw, scr, type);
824 emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key));
825 }
826
827 *l_loop = A64I_BCC | A64F_S19(as->mcp - l_loop) | CC_NE;
828 if (!isk && irt_isaddr(kt)) {
829 type = ra_allock(as, (int32_t)irt_toitype(kt), allow);
830 emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, key, type);
831 rset_clear(allow, type);
832 }
833 /* Load main position relative to tab->node into dest. */
834 khash = isk ? ir_khash(irkey) : 1;
835 if (khash == 0) {
836 emit_lso(as, A64I_LDRx, dest, tab, offsetof(GCtab, node));
837 } else {
838 emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 3), dest, tmp, dest);
839 emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 1), dest, dest, dest);
840 emit_lso(as, A64I_LDRx, tmp, tab, offsetof(GCtab, node));
841 if (isk) {
842 Reg tmphash = ra_allock(as, khash, allow);
843 emit_dnm(as, A64I_ANDw, dest, dest, tmphash);
844 emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask));
845 } else if (irt_isstr(kt)) {
846 /* Fetch of str->hash is cheaper than ra_allock. */
847 emit_dnm(as, A64I_ANDw, dest, dest, tmp);
848 emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, hash));
849 emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask));
850 } else { /* Must match with hash*() in lj_tab.c. */
851 emit_dnm(as, A64I_ANDw, dest, dest, tmp);
852 emit_lso(as, A64I_LDRw, tmp, tab, offsetof(GCtab, hmask));
853 emit_dnm(as, A64I_SUBw, dest, dest, tmp);
854 emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT3)), tmp, tmp, tmp);
855 emit_dnm(as, A64I_EORw, dest, dest, tmp);
856 emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT2)), dest, dest, dest);
857 emit_dnm(as, A64I_SUBw, tmp, tmp, dest);
858 emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT1)), dest, dest, dest);
859 emit_dnm(as, A64I_EORw, tmp, tmp, dest);
860 if (irt_isnum(kt)) {
861 emit_dnm(as, A64I_ADDw, dest, dest, dest);
862 emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest);
863 emit_dm(as, A64I_MOVw, tmp, dest);
864 emit_dn(as, A64I_FMOV_R_D, dest, (key & 31));
865 } else {
866 checkmclim(as);
867 emit_dm(as, A64I_MOVw, tmp, key);
868 emit_dnm(as, A64I_EORw, dest, dest,
869 ra_allock(as, irt_toitype(kt) << 15, allow));
870 emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest);
871 emit_dm(as, A64I_MOVx, dest, key);
872 }
873 }
874 }
875}
876
877static void asm_hrefk(ASMState *as, IRIns *ir)
878{
879 IRIns *kslot = IR(ir->op2);
880 IRIns *irkey = IR(kslot->op1);
881 int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node));
882 int32_t kofs = ofs + (int32_t)offsetof(Node, key);
883 int bigofs = !emit_checkofs(A64I_LDRx, ofs);
884 Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
885 Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
886 Reg key, idx = node;
887 RegSet allow = rset_exclude(RSET_GPR, node);
888 uint64_t k;
889 lua_assert(ofs % sizeof(Node) == 0);
890 if (bigofs) {
891 idx = dest;
892 rset_clear(allow, dest);
893 kofs = (int32_t)offsetof(Node, key);
894 } else if (ra_hasreg(dest)) {
895 emit_opk(as, A64I_ADDx, dest, node, ofs, allow);
896 }
897 asm_guardcc(as, CC_NE);
898 if (irt_ispri(irkey->t)) {
899 k = ~((int64_t)~irt_toitype(irkey->t) << 47);
900 } else if (irt_isnum(irkey->t)) {
901 k = ir_knum(irkey)->u64;
902 } else {
903 k = ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey);
904 }
905 key = ra_scratch(as, allow);
906 emit_nm(as, A64I_CMPx, key, ra_allock(as, k, rset_exclude(allow, key)));
907 emit_lso(as, A64I_LDRx, key, idx, kofs);
908 if (bigofs)
909 emit_opk(as, A64I_ADDx, dest, node, ofs, RSET_GPR);
910}
911
912static void asm_uref(ASMState *as, IRIns *ir)
913{
914 Reg dest = ra_dest(as, ir, RSET_GPR);
915 if (irref_isk(ir->op1)) {
916 GCfunc *fn = ir_kfunc(IR(ir->op1));
917 MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
918 emit_lsptr(as, A64I_LDRx, dest, v);
919 } else {
920 Reg uv = ra_scratch(as, RSET_GPR);
921 Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
922 if (ir->o == IR_UREFC) {
923 asm_guardcc(as, CC_NE);
924 emit_n(as, (A64I_CMPx^A64I_K12) | A64F_U12(1), RID_TMP);
925 emit_opk(as, A64I_ADDx, dest, uv,
926 (int32_t)offsetof(GCupval, tv), RSET_GPR);
927 emit_lso(as, A64I_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
928 } else {
929 emit_lso(as, A64I_LDRx, dest, uv, (int32_t)offsetof(GCupval, v));
930 }
931 emit_lso(as, A64I_LDRx, uv, func,
932 (int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8));
933 }
934}
935
936static void asm_fref(ASMState *as, IRIns *ir)
937{
938 UNUSED(as); UNUSED(ir);
939 lua_assert(!ra_used(ir));
940}
941
942static void asm_strref(ASMState *as, IRIns *ir)
943{
944 RegSet allow = RSET_GPR;
945 Reg dest = ra_dest(as, ir, allow);
946 Reg base = ra_alloc1(as, ir->op1, allow);
947 IRIns *irr = IR(ir->op2);
948 int32_t ofs = sizeof(GCstr);
949 uint32_t m;
950 rset_clear(allow, base);
951 if (irref_isk(ir->op2) && (m = emit_isk12(ofs + irr->i))) {
952 emit_dn(as, A64I_ADDx^m, dest, base);
953 } else {
954 emit_dn(as, (A64I_ADDx^A64I_K12) | A64F_U12(ofs), dest, dest);
955 emit_dnm(as, A64I_ADDx, dest, base, ra_alloc1(as, ir->op2, allow));
956 }
957}
958
959/* -- Loads and stores ---------------------------------------------------- */
960
961static A64Ins asm_fxloadins(IRIns *ir)
962{
963 switch (irt_type(ir->t)) {
964 case IRT_I8: return A64I_LDRB ^ A64I_LS_S;
965 case IRT_U8: return A64I_LDRB;
966 case IRT_I16: return A64I_LDRH ^ A64I_LS_S;
967 case IRT_U16: return A64I_LDRH;
968 case IRT_NUM: return A64I_LDRd;
969 case IRT_FLOAT: return A64I_LDRs;
970 default: return irt_is64(ir->t) ? A64I_LDRx : A64I_LDRw;
971 }
972}
973
974static A64Ins asm_fxstoreins(IRIns *ir)
975{
976 switch (irt_type(ir->t)) {
977 case IRT_I8: case IRT_U8: return A64I_STRB;
978 case IRT_I16: case IRT_U16: return A64I_STRH;
979 case IRT_NUM: return A64I_STRd;
980 case IRT_FLOAT: return A64I_STRs;
981 default: return irt_is64(ir->t) ? A64I_STRx : A64I_STRw;
982 }
983}
984
985static void asm_fload(ASMState *as, IRIns *ir)
986{
987 Reg dest = ra_dest(as, ir, RSET_GPR);
988 Reg idx;
989 A64Ins ai = asm_fxloadins(ir);
990 int32_t ofs;
991 if (ir->op1 == REF_NIL) {
992 idx = RID_GL;
993 ofs = (ir->op2 << 2) - GG_OFS(g);
994 } else {
995 idx = ra_alloc1(as, ir->op1, RSET_GPR);
996 if (ir->op2 == IRFL_TAB_ARRAY) {
997 ofs = asm_fuseabase(as, ir->op1);
998 if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
999 emit_dn(as, (A64I_ADDx^A64I_K12) | A64F_U12(ofs), dest, idx);
1000 return;
1001 }
1002 }
1003 ofs = field_ofs[ir->op2];
1004 }
1005 emit_lso(as, ai, (dest & 31), idx, ofs);
1006}
1007
1008static void asm_fstore(ASMState *as, IRIns *ir)
1009{
1010 if (ir->r != RID_SINK) {
1011 Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
1012 IRIns *irf = IR(ir->op1);
1013 Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
1014 int32_t ofs = field_ofs[irf->op2];
1015 emit_lso(as, asm_fxstoreins(ir), (src & 31), idx, ofs);
1016 }
1017}
1018
1019static void asm_xload(ASMState *as, IRIns *ir)
1020{
1021 Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
1022 lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
1023 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR);
1024}
1025
1026static void asm_xstore(ASMState *as, IRIns *ir)
1027{
1028 if (ir->r != RID_SINK) {
1029 Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
1030 asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
1031 rset_exclude(RSET_GPR, src));
1032 }
1033}
1034
1035static void asm_ahuvload(ASMState *as, IRIns *ir)
1036{
1037 Reg idx, tmp, type;
1038 int32_t ofs = 0;
1039 RegSet gpr = RSET_GPR, allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
1040 lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) ||
1041 irt_isint(ir->t));
1042 if (ra_used(ir)) {
1043 Reg dest = ra_dest(as, ir, allow);
1044 tmp = irt_isnum(ir->t) ? ra_scratch(as, rset_clear(gpr, dest)) : dest;
1045 if (irt_isaddr(ir->t)) {
1046 emit_dn(as, A64I_ANDx^emit_isk13(LJ_GCVMASK, 1), dest, dest);
1047 } else if (irt_isnum(ir->t)) {
1048 emit_dn(as, A64I_FMOV_D_R, (dest & 31), tmp);
1049 } else if (irt_isint(ir->t)) {
1050 emit_dm(as, A64I_MOVw, dest, dest);
1051 }
1052 } else {
1053 tmp = ra_scratch(as, gpr);
1054 }
1055 type = ra_scratch(as, rset_clear(gpr, tmp));
1056 idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, type), A64I_LDRx);
1057 /* Always do the type check, even if the load result is unused. */
1058 asm_guardcc(as, irt_isnum(ir->t) ? CC_LS : CC_NE);
1059 if (irt_type(ir->t) >= IRT_NUM) {
1060 lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t));
1061 emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32),
1062 ra_allock(as, LJ_TISNUM << 15, rset_exclude(gpr, idx)), tmp);
1063 } else if (irt_isaddr(ir->t)) {
1064 emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(ir->t)), type);
1065 emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp);
1066 } else if (irt_isnil(ir->t)) {
1067 emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp);
1068 } else {
1069 emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32),
1070 ra_allock(as, (irt_toitype(ir->t) << 15) | 0x7fff, allow), tmp);
1071 }
1072 if (ofs & FUSE_REG)
1073 emit_dnm(as, (A64I_LDRx^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, tmp, idx, (ofs & 31));
1074 else
1075 emit_lso(as, A64I_LDRx, tmp, idx, ofs);
1076}
1077
1078static void asm_ahustore(ASMState *as, IRIns *ir)
1079{
1080 if (ir->r != RID_SINK) {
1081 RegSet allow = RSET_GPR;
1082 Reg idx, src = RID_NONE, tmp = RID_TMP, type = RID_NONE;
1083 int32_t ofs = 0;
1084 if (irt_isnum(ir->t)) {
1085 src = ra_alloc1(as, ir->op2, RSET_FPR);
1086 idx = asm_fuseahuref(as, ir->op1, &ofs, allow, A64I_STRd);
1087 if (ofs & FUSE_REG)
1088 emit_dnm(as, (A64I_STRd^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, (src & 31), idx, (ofs &31));
1089 else
1090 emit_lso(as, A64I_STRd, (src & 31), idx, ofs);
1091 } else {
1092 if (!irt_ispri(ir->t)) {
1093 src = ra_alloc1(as, ir->op2, allow);
1094 rset_clear(allow, src);
1095 if (irt_isinteger(ir->t))
1096 type = ra_allock(as, (uint64_t)(int32_t)LJ_TISNUM << 47, allow);
1097 else
1098 type = ra_allock(as, irt_toitype(ir->t), allow);
1099 } else {
1100 tmp = type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t)<<47), allow);
1101 }
1102 idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type),
1103 A64I_STRx);
1104 if (ofs & FUSE_REG)
1105 emit_dnm(as, (A64I_STRx^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, tmp, idx, (ofs & 31));
1106 else
1107 emit_lso(as, A64I_STRx, tmp, idx, ofs);
1108 if (ra_hasreg(src)) {
1109 if (irt_isinteger(ir->t)) {
1110 emit_dnm(as, A64I_ADDx | A64F_EX(A64EX_UXTW), tmp, type, src);
1111 } else {
1112 emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, src, type);
1113 }
1114 }
1115 }
1116 }
1117}
1118
1119static void asm_sload(ASMState *as, IRIns *ir)
1120{
1121 int32_t ofs = 8*((int32_t)ir->op1-2);
1122 IRType1 t = ir->t;
1123 Reg dest = RID_NONE, base;
1124 RegSet allow = RSET_GPR;
1125 lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */
1126 lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK));
1127 if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
1128 dest = ra_scratch(as, RSET_FPR);
1129 asm_tointg(as, ir, dest);
1130 t.irt = IRT_NUM; /* Continue with a regular number type check. */
1131 } else if (ra_used(ir)) {
1132 Reg tmp = RID_NONE;
1133 if ((ir->op2 & IRSLOAD_CONVERT))
1134 tmp = ra_scratch(as, irt_isint(t) ? RSET_FPR : RSET_GPR);
1135 lua_assert((irt_isnum(t)) || irt_isint(t) || irt_isaddr(t));
1136 dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow);
1137 base = ra_alloc1(as, REF_BASE, rset_clear(allow, dest));
1138 if (irt_isaddr(t)) {
1139 emit_dn(as, A64I_ANDx^emit_isk13(LJ_GCVMASK, 1), dest, dest);
1140 } else if ((ir->op2 & IRSLOAD_CONVERT)) {
1141 if (irt_isint(t)) {
1142 emit_dn(as, A64I_FCVT_S32_F64, dest, (tmp & 31));
1143 /* If value is already loaded for type check, move it to FPR. */
1144 if ((ir->op2 & IRSLOAD_TYPECHECK))
1145 emit_dn(as, A64I_FMOV_D_R, (tmp & 31), dest);
1146 else
1147 dest = tmp;
1148 t.irt = IRT_NUM; /* Check for original type. */
1149 } else {
1150 emit_dn(as, A64I_FCVT_F64_S32, (dest & 31), tmp);
1151 dest = tmp;
1152 t.irt = IRT_INT; /* Check for original type. */
1153 }
1154 } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) {
1155 emit_dm(as, A64I_MOVw, dest, dest);
1156 }
1157 goto dotypecheck;
1158 }
1159 base = ra_alloc1(as, REF_BASE, allow);
1160dotypecheck:
1161 rset_clear(allow, base);
1162 if ((ir->op2 & IRSLOAD_TYPECHECK)) {
1163 Reg tmp;
1164 if (ra_hasreg(dest) && rset_test(RSET_GPR, dest)) {
1165 tmp = dest;
1166 } else {
1167 tmp = ra_scratch(as, allow);
1168 rset_clear(allow, tmp);
1169 }
1170 if (irt_isnum(t) && !(ir->op2 & IRSLOAD_CONVERT))
1171 emit_dn(as, A64I_FMOV_D_R, (dest & 31), tmp);
1172 /* Need type check, even if the load result is unused. */
1173 asm_guardcc(as, irt_isnum(t) ? CC_LS : CC_NE);
1174 if (irt_type(t) >= IRT_NUM) {
1175 lua_assert(irt_isinteger(t) || irt_isnum(t));
1176 emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32),
1177 ra_allock(as, LJ_TISNUM << 15, allow), tmp);
1178 } else if (irt_isnil(t)) {
1179 emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp);
1180 } else if (irt_ispri(t)) {
1181 emit_nm(as, A64I_CMPx,
1182 ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow), tmp);
1183 } else {
1184 Reg type = ra_scratch(as, allow);
1185 emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(t)), type);
1186 emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp);
1187 }
1188 emit_lso(as, A64I_LDRx, tmp, base, ofs);
1189 return;
1190 }
1191 if (ra_hasreg(dest)) {
1192 emit_lso(as, irt_isnum(t) ? A64I_LDRd :
1193 (irt_isint(t) ? A64I_LDRw : A64I_LDRx), (dest & 31), base,
1194 ofs ^ ((LJ_BE && irt_isint(t) ? 4 : 0)));
1195 }
1196}
1197
1198/* -- Allocations --------------------------------------------------------- */
1199
1200#if LJ_HASFFI
1201static void asm_cnew(ASMState *as, IRIns *ir)
1202{
1203 CTState *cts = ctype_ctsG(J2G(as->J));
1204 CTypeID id = (CTypeID)IR(ir->op1)->i;
1205 CTSize sz;
1206 CTInfo info = lj_ctype_info(cts, id, &sz);
1207 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
1208 IRRef args[4];
1209 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1210 lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
1211
1212 as->gcsteps++;
1213 asm_setupresult(as, ir, ci); /* GCcdata * */
1214 /* Initialize immutable cdata object. */
1215 if (ir->o == IR_CNEWI) {
1216 int32_t ofs = sizeof(GCcdata);
1217 Reg r = ra_alloc1(as, ir->op2, allow);
1218 lua_assert(sz == 4 || sz == 8);
1219 emit_lso(as, sz == 8 ? A64I_STRx : A64I_STRw, r, RID_RET, ofs);
1220 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
1221 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
1222 args[0] = ASMREF_L; /* lua_State *L */
1223 args[1] = ir->op1; /* CTypeID id */
1224 args[2] = ir->op2; /* CTSize sz */
1225 args[3] = ASMREF_TMP1; /* CTSize align */
1226 asm_gencall(as, ci, args);
1227 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
1228 return;
1229 }
1230
1231 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
1232 {
1233 Reg r = (id < 65536) ? RID_X1 : ra_allock(as, id, allow);
1234 emit_lso(as, A64I_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct));
1235 emit_lso(as, A64I_STRH, r, RID_RET, offsetof(GCcdata, ctypeid));
1236 emit_d(as, A64I_MOVZw | A64F_U16(~LJ_TCDATA), RID_TMP);
1237 if (id < 65536) emit_d(as, A64I_MOVZw | A64F_U16(id), RID_X1);
1238 }
1239 args[0] = ASMREF_L; /* lua_State *L */
1240 args[1] = ASMREF_TMP1; /* MSize size */
1241 asm_gencall(as, ci, args);
1242 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
1243 ra_releasetmp(as, ASMREF_TMP1));
1244}
1245#else
1246#define asm_cnew(as, ir) ((void)0)
1247#endif
1248
1249/* -- Write barriers ------------------------------------------------------ */
1250
1251static void asm_tbar(ASMState *as, IRIns *ir)
1252{
1253 Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
1254 Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab));
1255 Reg gr = ra_allock(as, i64ptr(J2G(as->J)),
1256 rset_exclude(rset_exclude(RSET_GPR, tab), link));
1257 Reg mark = RID_TMP;
1258 MCLabel l_end = emit_label(as);
1259 emit_lso(as, A64I_STRx, link, tab, (int32_t)offsetof(GCtab, gclist));
1260 emit_lso(as, A64I_STRB, mark, tab, (int32_t)offsetof(GCtab, marked));
1261 emit_lso(as, A64I_STRx, tab, gr,
1262 (int32_t)offsetof(global_State, gc.grayagain));
1263 emit_dn(as, A64I_ANDw^emit_isk13(~LJ_GC_BLACK, 0), mark, mark);
1264 emit_lso(as, A64I_LDRx, link, gr,
1265 (int32_t)offsetof(global_State, gc.grayagain));
1266 emit_cond_branch(as, CC_EQ, l_end);
1267 emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), mark);
1268 emit_lso(as, A64I_LDRB, mark, tab, (int32_t)offsetof(GCtab, marked));
1269}
1270
1271static void asm_obar(ASMState *as, IRIns *ir)
1272{
1273 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv];
1274 IRRef args[2];
1275 MCLabel l_end;
1276 RegSet allow = RSET_GPR;
1277 Reg obj, val, tmp;
1278 /* No need for other object barriers (yet). */
1279 lua_assert(IR(ir->op1)->o == IR_UREFC);
1280 ra_evictset(as, RSET_SCRATCH);
1281 l_end = emit_label(as);
1282 args[0] = ASMREF_TMP1; /* global_State *g */
1283 args[1] = ir->op1; /* TValue *tv */
1284 asm_gencall(as, ci, args);
1285 ra_allockreg(as, i64ptr(J2G(as->J)), ra_releasetmp(as, ASMREF_TMP1) );
1286 obj = IR(ir->op1)->r;
1287 tmp = ra_scratch(as, rset_exclude(allow, obj));
1288 emit_cond_branch(as, CC_EQ, l_end);
1289 emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), tmp);
1290 emit_cond_branch(as, CC_EQ, l_end);
1291 emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_WHITES, 0), RID_TMP);
1292 val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj));
1293 emit_lso(as, A64I_LDRB, tmp, obj,
1294 (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv));
1295 emit_lso(as, A64I_LDRB, RID_TMP, val, (int32_t)offsetof(GChead, marked));
1296}
1297
1298/* -- Arithmetic and logic operations ------------------------------------- */
1299
1300static void asm_fparith(ASMState *as, IRIns *ir, A64Ins ai)
1301{
1302 Reg dest = ra_dest(as, ir, RSET_FPR);
1303 Reg right, left = ra_alloc2(as, ir, RSET_FPR);
1304 right = (left >> 8); left &= 255;
1305 emit_dnm(as, ai, (dest & 31), (left & 31), (right & 31));
1306}
1307
1308static void asm_fpunary(ASMState *as, IRIns *ir, A64Ins ai)
1309{
1310 Reg dest = ra_dest(as, ir, RSET_FPR);
1311 Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR);
1312 emit_dn(as, ai, (dest & 31), (left & 31));
1313}
1314
1315static void asm_fpmath(ASMState *as, IRIns *ir)
1316{
1317 IRFPMathOp fpm = (IRFPMathOp)ir->op2;
1318 if (fpm == IRFPM_SQRT) {
1319 asm_fpunary(as, ir, A64I_FSQRTd);
1320 } else if (fpm <= IRFPM_TRUNC) {
1321 asm_fpunary(as, ir, fpm == IRFPM_FLOOR ? A64I_FRINTMd :
1322 fpm == IRFPM_CEIL ? A64I_FRINTPd : A64I_FRINTZd);
1323 } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) {
1324 return;
1325 } else {
1326 asm_callid(as, ir, IRCALL_lj_vm_floor + fpm);
1327 }
1328}
1329
1330static int asm_swapops(ASMState *as, IRRef lref, IRRef rref)
1331{
1332 IRIns *ir;
1333 if (irref_isk(rref))
1334 return 0; /* Don't swap constants to the left. */
1335 if (irref_isk(lref))
1336 return 1; /* But swap constants to the right. */
1337 ir = IR(rref);
1338 if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) ||
1339 (ir->o == IR_ADD && ir->op1 == ir->op2) ||
1340 (ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)))
1341 return 0; /* Don't swap fusable operands to the left. */
1342 ir = IR(lref);
1343 if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) ||
1344 (ir->o == IR_ADD && ir->op1 == ir->op2) ||
1345 (ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)))
1346 return 1; /* But swap fusable operands to the right. */
1347 return 0; /* Otherwise don't swap. */
1348}
1349
1350static void asm_intop(ASMState *as, IRIns *ir, A64Ins ai)
1351{
1352 IRRef lref = ir->op1, rref = ir->op2;
1353 Reg left, dest = ra_dest(as, ir, RSET_GPR);
1354 uint32_t m;
1355 if ((ai & ~A64I_S) != A64I_SUBw && asm_swapops(as, lref, rref)) {
1356 IRRef tmp = lref; lref = rref; rref = tmp;
1357 }
1358 left = ra_hintalloc(as, lref, dest, RSET_GPR);
1359 if (irt_is64(ir->t)) ai |= A64I_X;
1360 m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left));
1361 if (irt_isguard(ir->t)) { /* For IR_ADDOV etc. */
1362 asm_guardcc(as, CC_VS);
1363 ai |= A64I_S;
1364 }
1365 emit_dn(as, ai^m, dest, left);
1366}
1367
1368static void asm_intop_s(ASMState *as, IRIns *ir, A64Ins ai)
1369{
1370 if (as->flagmcp == as->mcp) { /* Drop cmp r, #0. */
1371 as->flagmcp = NULL;
1372 as->mcp++;
1373 ai |= A64I_S;
1374 }
1375 asm_intop(as, ir, ai);
1376}
1377
1378static void asm_intneg(ASMState *as, IRIns *ir)
1379{
1380 Reg dest = ra_dest(as, ir, RSET_GPR);
1381 Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1382 emit_dm(as, irt_is64(ir->t) ? A64I_NEGx : A64I_NEGw, dest, left);
1383}
1384
1385/* NYI: use add/shift for MUL(OV) with constants. FOLD only does 2^k. */
1386static void asm_intmul(ASMState *as, IRIns *ir)
1387{
1388 Reg dest = ra_dest(as, ir, RSET_GPR);
1389 Reg left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, dest));
1390 Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1391 if (irt_isguard(ir->t)) { /* IR_MULOV */
1392 asm_guardcc(as, CC_NE);
1393 emit_dm(as, A64I_MOVw, dest, dest); /* Zero-extend. */
1394 emit_nm(as, A64I_CMPw | A64F_SH(A64SH_ASR, 31), RID_TMP, dest);
1395 emit_dn(as, A64I_ASRx | A64F_IMMR(32), RID_TMP, dest);
1396 emit_dnm(as, A64I_SMULL, dest, right, left);
1397 } else {
1398 emit_dnm(as, irt_is64(ir->t) ? A64I_MULx : A64I_MULw, dest, left, right);
1399 }
1400}
1401
1402static void asm_add(ASMState *as, IRIns *ir)
1403{
1404 if (irt_isnum(ir->t)) {
1405 if (!asm_fusemadd(as, ir, A64I_FMADDd, A64I_FMADDd))
1406 asm_fparith(as, ir, A64I_FADDd);
1407 return;
1408 }
1409 asm_intop_s(as, ir, A64I_ADDw);
1410}
1411
1412static void asm_sub(ASMState *as, IRIns *ir)
1413{
1414 if (irt_isnum(ir->t)) {
1415 if (!asm_fusemadd(as, ir, A64I_FNMSUBd, A64I_FMSUBd))
1416 asm_fparith(as, ir, A64I_FSUBd);
1417 return;
1418 }
1419 asm_intop_s(as, ir, A64I_SUBw);
1420}
1421
1422static void asm_mul(ASMState *as, IRIns *ir)
1423{
1424 if (irt_isnum(ir->t)) {
1425 asm_fparith(as, ir, A64I_FMULd);
1426 return;
1427 }
1428 asm_intmul(as, ir);
1429}
1430
1431static void asm_div(ASMState *as, IRIns *ir)
1432{
1433#if LJ_HASFFI
1434 if (!irt_isnum(ir->t))
1435 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
1436 IRCALL_lj_carith_divu64);
1437 else
1438#endif
1439 asm_fparith(as, ir, A64I_FDIVd);
1440}
1441
1442static void asm_pow(ASMState *as, IRIns *ir)
1443{
1444#if LJ_HASFFI
1445 if (!irt_isnum(ir->t))
1446 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
1447 IRCALL_lj_carith_powu64);
1448 else
1449#endif
1450 asm_callid(as, ir, IRCALL_lj_vm_powi);
1451}
1452
1453#define asm_addov(as, ir) asm_add(as, ir)
1454#define asm_subov(as, ir) asm_sub(as, ir)
1455#define asm_mulov(as, ir) asm_mul(as, ir)
1456
1457#define asm_abs(as, ir) asm_fpunary(as, ir, A64I_FABS)
1458#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2)
1459#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
1460
1461static void asm_mod(ASMState *as, IRIns *ir)
1462{
1463#if LJ_HASFFI
1464 if (!irt_isint(ir->t))
1465 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
1466 IRCALL_lj_carith_modu64);
1467 else
1468#endif
1469 asm_callid(as, ir, IRCALL_lj_vm_modi);
1470}
1471
1472static void asm_neg(ASMState *as, IRIns *ir)
1473{
1474 if (irt_isnum(ir->t)) {
1475 asm_fpunary(as, ir, A64I_FNEGd);
1476 return;
1477 }
1478 asm_intneg(as, ir);
1479}
1480
1481static void asm_band(ASMState *as, IRIns *ir)
1482{
1483 A64Ins ai = A64I_ANDw;
1484 if (asm_fuseandshift(as, ir))
1485 return;
1486 if (as->flagmcp == as->mcp) {
1487 /* Try to drop cmp r, #0. */
1488 as->flagmcp = NULL;
1489 as->mcp++;
1490 ai = A64I_ANDSw;
1491 }
1492 asm_intop(as, ir, ai);
1493}
1494
1495static void asm_borbxor(ASMState *as, IRIns *ir, A64Ins ai)
1496{
1497 IRRef lref = ir->op1, rref = ir->op2;
1498 IRIns *irl = IR(lref), *irr = IR(rref);
1499 if ((canfuse(as, irl) && irl->o == IR_BNOT && !irref_isk(rref)) ||
1500 (canfuse(as, irr) && irr->o == IR_BNOT && !irref_isk(lref))) {
1501 Reg left, dest = ra_dest(as, ir, RSET_GPR);
1502 uint32_t m;
1503 if (irl->o == IR_BNOT) {
1504 IRRef tmp = lref; lref = rref; rref = tmp;
1505 }
1506 left = ra_alloc1(as, lref, RSET_GPR);
1507 ai |= A64I_ON;
1508 if (irt_is64(ir->t)) ai |= A64I_X;
1509 m = asm_fuseopm(as, ai, IR(rref)->op1, rset_exclude(RSET_GPR, left));
1510 emit_dn(as, ai^m, dest, left);
1511 } else {
1512 asm_intop(as, ir, ai);
1513 }
1514}
1515
1516static void asm_bor(ASMState *as, IRIns *ir)
1517{
1518 if (asm_fuseorshift(as, ir))
1519 return;
1520 asm_borbxor(as, ir, A64I_ORRw);
1521}
1522
1523#define asm_bxor(as, ir) asm_borbxor(as, ir, A64I_EORw)
1524
1525static void asm_bnot(ASMState *as, IRIns *ir)
1526{
1527 A64Ins ai = A64I_MVNw;
1528 Reg dest = ra_dest(as, ir, RSET_GPR);
1529 uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
1530 if (irt_is64(ir->t)) ai |= A64I_X;
1531 emit_d(as, ai^m, dest);
1532}
1533
1534static void asm_bswap(ASMState *as, IRIns *ir)
1535{
1536 Reg dest = ra_dest(as, ir, RSET_GPR);
1537 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
1538 emit_dn(as, irt_is64(ir->t) ? A64I_REVx : A64I_REVw, dest, left);
1539}
1540
1541static void asm_bitshift(ASMState *as, IRIns *ir, A64Ins ai, A64Shift sh)
1542{
1543 int32_t shmask = irt_is64(ir->t) ? 63 : 31;
1544 if (irref_isk(ir->op2)) { /* Constant shifts. */
1545 Reg left, dest = ra_dest(as, ir, RSET_GPR);
1546 int32_t shift = (IR(ir->op2)->i & shmask);
1547 IRIns *irl = IR(ir->op1);
1548 if (shmask == 63) ai += A64I_UBFMx - A64I_UBFMw;
1549
1550 /* Fuse BSHL + BSHR/BSAR into UBFM/SBFM aka UBFX/SBFX/UBFIZ/SBFIZ. */
1551 if ((sh == A64SH_LSR || sh == A64SH_ASR) && canfuse(as, irl)) {
1552 if (irl->o == IR_BSHL && irref_isk(irl->op2)) {
1553 int32_t shift2 = (IR(irl->op2)->i & shmask);
1554 shift = ((shift - shift2) & shmask);
1555 shmask -= shift2;
1556 ir = irl;
1557 }
1558 }
1559
1560 left = ra_alloc1(as, ir->op1, RSET_GPR);
1561 switch (sh) {
1562 case A64SH_LSL:
1563 emit_dn(as, ai | A64F_IMMS(shmask-shift) |
1564 A64F_IMMR((shmask-shift+1)&shmask), dest, left);
1565 break;
1566 case A64SH_LSR: case A64SH_ASR:
1567 emit_dn(as, ai | A64F_IMMS(shmask) | A64F_IMMR(shift), dest, left);
1568 break;
1569 case A64SH_ROR:
1570 emit_dnm(as, ai | A64F_IMMS(shift), dest, left, left);
1571 break;
1572 }
1573 } else { /* Variable-length shifts. */
1574 Reg dest = ra_dest(as, ir, RSET_GPR);
1575 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
1576 Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1577 emit_dnm(as, (shmask == 63 ? A64I_SHRx : A64I_SHRw) | A64F_BSH(sh), dest, left, right);
1578 }
1579}
1580
1581#define asm_bshl(as, ir) asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSL)
1582#define asm_bshr(as, ir) asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSR)
1583#define asm_bsar(as, ir) asm_bitshift(as, ir, A64I_SBFMw, A64SH_ASR)
1584#define asm_bror(as, ir) asm_bitshift(as, ir, A64I_EXTRw, A64SH_ROR)
1585#define asm_brol(as, ir) lua_assert(0)
1586
1587static void asm_intmin_max(ASMState *as, IRIns *ir, A64CC cc)
1588{
1589 Reg dest = ra_dest(as, ir, RSET_GPR);
1590 Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1591 Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1592 emit_dnm(as, A64I_CSELw|A64F_CC(cc), dest, left, right);
1593 emit_nm(as, A64I_CMPw, left, right);
1594}
1595
1596static void asm_fpmin_max(ASMState *as, IRIns *ir, A64CC fcc)
1597{
1598 Reg dest = (ra_dest(as, ir, RSET_FPR) & 31);
1599 Reg right, left = ra_alloc2(as, ir, RSET_FPR);
1600 right = ((left >> 8) & 31); left &= 31;
1601 emit_dnm(as, A64I_FCSELd | A64F_CC(fcc), dest, left, right);
1602 emit_nm(as, A64I_FCMPd, left, right);
1603}
1604
1605static void asm_min_max(ASMState *as, IRIns *ir, A64CC cc, A64CC fcc)
1606{
1607 if (irt_isnum(ir->t))
1608 asm_fpmin_max(as, ir, fcc);
1609 else
1610 asm_intmin_max(as, ir, cc);
1611}
1612
1613#define asm_max(as, ir) asm_min_max(as, ir, CC_GT, CC_HI)
1614#define asm_min(as, ir) asm_min_max(as, ir, CC_LT, CC_LO)
1615
1616/* -- Comparisons --------------------------------------------------------- */
1617
1618/* Map of comparisons to flags. ORDER IR. */
1619static const uint8_t asm_compmap[IR_ABC+1] = {
1620 /* op FP swp int cc FP cc */
1621 /* LT */ CC_GE + (CC_HS << 4),
1622 /* GE x */ CC_LT + (CC_HI << 4),
1623 /* LE */ CC_GT + (CC_HI << 4),
1624 /* GT x */ CC_LE + (CC_HS << 4),
1625 /* ULT x */ CC_HS + (CC_LS << 4),
1626 /* UGE */ CC_LO + (CC_LO << 4),
1627 /* ULE x */ CC_HI + (CC_LO << 4),
1628 /* UGT */ CC_LS + (CC_LS << 4),
1629 /* EQ */ CC_NE + (CC_NE << 4),
1630 /* NE */ CC_EQ + (CC_EQ << 4),
1631 /* ABC */ CC_LS + (CC_LS << 4) /* Same as UGT. */
1632};
1633
1634/* FP comparisons. */
1635static void asm_fpcomp(ASMState *as, IRIns *ir)
1636{
1637 Reg left, right;
1638 A64Ins ai;
1639 int swp = ((ir->o ^ (ir->o >> 2)) & ~(ir->o >> 3) & 1);
1640 if (!swp && irref_isk(ir->op2) && ir_knum(IR(ir->op2))->u64 == 0) {
1641 left = (ra_alloc1(as, ir->op1, RSET_FPR) & 31);
1642 right = 0;
1643 ai = A64I_FCMPZd;
1644 } else {
1645 left = ra_alloc2(as, ir, RSET_FPR);
1646 if (swp) {
1647 right = (left & 31); left = ((left >> 8) & 31);
1648 } else {
1649 right = ((left >> 8) & 31); left &= 31;
1650 }
1651 ai = A64I_FCMPd;
1652 }
1653 asm_guardcc(as, (asm_compmap[ir->o] >> 4));
1654 emit_nm(as, ai, left, right);
1655}
1656
1657/* Integer comparisons. */
1658static void asm_intcomp(ASMState *as, IRIns *ir)
1659{
1660 A64CC oldcc, cc = (asm_compmap[ir->o] & 15);
1661 A64Ins ai = irt_is64(ir->t) ? A64I_CMPx : A64I_CMPw;
1662 IRRef lref = ir->op1, rref = ir->op2;
1663 Reg left;
1664 uint32_t m;
1665 int cmpprev0 = 0;
1666 lua_assert(irt_is64(ir->t) || irt_isint(ir->t) ||
1667 irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t));
1668 if (asm_swapops(as, lref, rref)) {
1669 IRRef tmp = lref; lref = rref; rref = tmp;
1670 if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */
1671 else if (cc > CC_NE) cc ^= 11; /* LO <-> HI, LS <-> HS */
1672 }
1673 oldcc = cc;
1674 if (irref_isk(rref) && get_k64val(IR(rref)) == 0) {
1675 IRIns *irl = IR(lref);
1676 if (cc == CC_GE) cc = CC_PL;
1677 else if (cc == CC_LT) cc = CC_MI;
1678 else if (cc > CC_NE) goto nocombine; /* Other conds don't work with tst. */
1679 cmpprev0 = (irl+1 == ir);
1680 /* Combine and-cmp-bcc into tbz/tbnz or and-cmp into tst. */
1681 if (cmpprev0 && irl->o == IR_BAND && !ra_used(irl)) {
1682 IRRef blref = irl->op1, brref = irl->op2;
1683 uint32_t m2 = 0;
1684 Reg bleft;
1685 if (asm_swapops(as, blref, brref)) {
1686 Reg tmp = blref; blref = brref; brref = tmp;
1687 }
1688 if (irref_isk(brref)) {
1689 uint64_t k = get_k64val(IR(brref));
1690 if (k && !(k & (k-1)) && (cc == CC_EQ || cc == CC_NE)) {
1691 asm_guardtnb(as, cc == CC_EQ ? A64I_TBZ : A64I_TBNZ,
1692 ra_alloc1(as, blref, RSET_GPR), emit_ctz64(k));
1693 return;
1694 }
1695 m2 = emit_isk13(k, irt_is64(irl->t));
1696 }
1697 bleft = ra_alloc1(as, blref, RSET_GPR);
1698 ai = (irt_is64(irl->t) ? A64I_TSTx : A64I_TSTw);
1699 if (!m2)
1700 m2 = asm_fuseopm(as, ai, brref, rset_exclude(RSET_GPR, bleft));
1701 asm_guardcc(as, cc);
1702 emit_n(as, ai^m2, bleft);
1703 return;
1704 }
1705 if (cc == CC_EQ || cc == CC_NE) {
1706 /* Combine cmp-bcc into cbz/cbnz. */
1707 ai = cc == CC_EQ ? A64I_CBZ : A64I_CBNZ;
1708 if (irt_is64(ir->t)) ai |= A64I_X;
1709 asm_guardcnb(as, ai, ra_alloc1(as, lref, RSET_GPR));
1710 return;
1711 }
1712 }
1713nocombine:
1714 left = ra_alloc1(as, lref, RSET_GPR);
1715 m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left));
1716 asm_guardcc(as, cc);
1717 emit_n(as, ai^m, left);
1718 /* Signed comparison with zero and referencing previous ins? */
1719 if (cmpprev0 && (oldcc <= CC_NE || oldcc >= CC_GE))
1720 as->flagmcp = as->mcp; /* Allow elimination of the compare. */
1721}
1722
1723static void asm_comp(ASMState *as, IRIns *ir)
1724{
1725 if (irt_isnum(ir->t))
1726 asm_fpcomp(as, ir);
1727 else
1728 asm_intcomp(as, ir);
1729}
1730
1731#define asm_equal(as, ir) asm_comp(as, ir)
1732
1733/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */
1734
1735/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
1736static void asm_hiop(ASMState *as, IRIns *ir)
1737{
1738 UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused on 64 bit. */
1739}
1740
1741/* -- Profiling ----------------------------------------------------------- */
1742
1743static void asm_prof(ASMState *as, IRIns *ir)
1744{
1745 uint32_t k = emit_isk13(HOOK_PROFILE, 0);
1746 lua_assert(k != 0);
1747 UNUSED(ir);
1748 asm_guardcc(as, CC_NE);
1749 emit_n(as, A64I_TSTw^k, RID_TMP);
1750 emit_lsptr(as, A64I_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask);
1751}
1752
1753/* -- Stack handling ------------------------------------------------------ */
1754
1755/* Check Lua stack size for overflow. Use exit handler as fallback. */
1756static void asm_stack_check(ASMState *as, BCReg topslot,
1757 IRIns *irp, RegSet allow, ExitNo exitno)
1758{
1759 Reg pbase;
1760 uint32_t k;
1761 if (irp) {
1762 if (!ra_hasspill(irp->s)) {
1763 pbase = irp->r;
1764 lua_assert(ra_hasreg(pbase));
1765 } else if (allow) {
1766 pbase = rset_pickbot(allow);
1767 } else {
1768 pbase = RID_RET;
1769 emit_lso(as, A64I_LDRx, RID_RET, RID_SP, 0); /* Restore temp register. */
1770 }
1771 } else {
1772 pbase = RID_BASE;
1773 }
1774 emit_cond_branch(as, CC_LS, asm_exitstub_addr(as, exitno));
1775 k = emit_isk12((8*topslot));
1776 lua_assert(k);
1777 emit_n(as, A64I_CMPx^k, RID_TMP);
1778 emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, pbase);
1779 emit_lso(as, A64I_LDRx, RID_TMP, RID_TMP,
1780 (int32_t)offsetof(lua_State, maxstack));
1781 if (irp) { /* Must not spill arbitrary registers in head of side trace. */
1782 if (ra_hasspill(irp->s))
1783 emit_lso(as, A64I_LDRx, pbase, RID_SP, sps_scale(irp->s));
1784 emit_lso(as, A64I_LDRx, RID_TMP, RID_GL, glofs(as, &J2G(as->J)->cur_L));
1785 if (ra_hasspill(irp->s) && !allow)
1786 emit_lso(as, A64I_STRx, RID_RET, RID_SP, 0); /* Save temp register. */
1787 } else {
1788 emit_getgl(as, RID_TMP, cur_L);
1789 }
1790}
1791
1792/* Restore Lua stack from on-trace state. */
1793static void asm_stack_restore(ASMState *as, SnapShot *snap)
1794{
1795 SnapEntry *map = &as->T->snapmap[snap->mapofs];
1796#ifdef LUA_USE_ASSERT
1797 SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2];
1798#endif
1799 MSize n, nent = snap->nent;
1800 /* Store the value of all modified slots to the Lua stack. */
1801 for (n = 0; n < nent; n++) {
1802 SnapEntry sn = map[n];
1803 BCReg s = snap_slot(sn);
1804 int32_t ofs = 8*((int32_t)s-1-LJ_FR2);
1805 IRRef ref = snap_ref(sn);
1806 IRIns *ir = IR(ref);
1807 if ((sn & SNAP_NORESTORE))
1808 continue;
1809 if (irt_isnum(ir->t)) {
1810 Reg src = ra_alloc1(as, ref, RSET_FPR);
1811 emit_lso(as, A64I_STRd, (src & 31), RID_BASE, ofs);
1812 } else {
1813 asm_tvstore64(as, RID_BASE, ofs, ref);
1814 }
1815 checkmclim(as);
1816 }
1817 lua_assert(map + nent == flinks);
1818}
1819
1820/* -- GC handling --------------------------------------------------------- */
1821
1822/* Check GC threshold and do one or more GC steps. */
1823static void asm_gc_check(ASMState *as)
1824{
1825 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit];
1826 IRRef args[2];
1827 MCLabel l_end;
1828 Reg tmp1, tmp2;
1829 ra_evictset(as, RSET_SCRATCH);
1830 l_end = emit_label(as);
1831 /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */
1832 asm_guardcnb(as, A64I_CBNZ, RID_RET); /* Assumes asm_snap_prep() is done. */
1833 args[0] = ASMREF_TMP1; /* global_State *g */
1834 args[1] = ASMREF_TMP2; /* MSize steps */
1835 asm_gencall(as, ci, args);
1836 tmp1 = ra_releasetmp(as, ASMREF_TMP1);
1837 tmp2 = ra_releasetmp(as, ASMREF_TMP2);
1838 emit_loadi(as, tmp2, as->gcsteps);
1839 /* Jump around GC step if GC total < GC threshold. */
1840 emit_cond_branch(as, CC_LS, l_end);
1841 emit_nm(as, A64I_CMPx, RID_TMP, tmp2);
1842 emit_lso(as, A64I_LDRx, tmp2, tmp1,
1843 (int32_t)offsetof(global_State, gc.threshold));
1844 emit_lso(as, A64I_LDRx, RID_TMP, tmp1,
1845 (int32_t)offsetof(global_State, gc.total));
1846 ra_allockreg(as, i64ptr(J2G(as->J)), tmp1);
1847 as->gcsteps = 0;
1848 checkmclim(as);
1849}
1850
1851/* -- Loop handling ------------------------------------------------------- */
1852
1853/* Fixup the loop branch. */
1854static void asm_loop_fixup(ASMState *as)
1855{
1856 MCode *p = as->mctop;
1857 MCode *target = as->mcp;
1858 if (as->loopinv) { /* Inverted loop branch? */
1859 uint32_t mask = (p[-2] & 0x7e000000) == 0x36000000 ? 0x3fffu : 0x7ffffu;
1860 ptrdiff_t delta = target - (p - 2);
1861 /* asm_guard* already inverted the bcc/tnb/cnb and patched the final b. */
1862 p[-2] |= ((uint32_t)delta & mask) << 5;
1863 } else {
1864 ptrdiff_t delta = target - (p - 1);
1865 p[-1] = A64I_B | A64F_S26(delta);
1866 }
1867}
1868
1869/* -- Head of trace ------------------------------------------------------- */
1870
1871/* Reload L register from g->cur_L. */
1872static void asm_head_lreg(ASMState *as)
1873{
1874 IRIns *ir = IR(ASMREF_L);
1875 if (ra_used(ir)) {
1876 Reg r = ra_dest(as, ir, RSET_GPR);
1877 emit_getgl(as, r, cur_L);
1878 ra_evictk(as);
1879 }
1880}
1881
1882/* Coalesce BASE register for a root trace. */
1883static void asm_head_root_base(ASMState *as)
1884{
1885 IRIns *ir;
1886 asm_head_lreg(as);
1887 ir = IR(REF_BASE);
1888 if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
1889 ra_spill(as, ir);
1890 ra_destreg(as, ir, RID_BASE);
1891}
1892
1893/* Coalesce BASE register for a side trace. */
1894static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
1895{
1896 IRIns *ir;
1897 asm_head_lreg(as);
1898 ir = IR(REF_BASE);
1899 if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
1900 ra_spill(as, ir);
1901 if (ra_hasspill(irp->s)) {
1902 rset_clear(allow, ra_dest(as, ir, allow));
1903 } else {
1904 Reg r = irp->r;
1905 lua_assert(ra_hasreg(r));
1906 rset_clear(allow, r);
1907 if (r != ir->r && !rset_test(as->freeset, r))
1908 ra_restore(as, regcost_ref(as->cost[r]));
1909 ra_destreg(as, ir, r);
1910 }
1911 return allow;
1912}
1913
1914/* -- Tail of trace ------------------------------------------------------- */
1915
1916/* Fixup the tail code. */
1917static void asm_tail_fixup(ASMState *as, TraceNo lnk)
1918{
1919 MCode *p = as->mctop;
1920 MCode *target;
1921 /* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */
1922 int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED));
1923 if (spadj == 0) {
1924 *--p = A64I_LE(A64I_NOP);
1925 as->mctop = p;
1926 } else {
1927 /* Patch stack adjustment. */
1928 uint32_t k = emit_isk12(spadj);
1929 lua_assert(k);
1930 p[-2] = (A64I_ADDx^k) | A64F_D(RID_SP) | A64F_N(RID_SP);
1931 }
1932 /* Patch exit branch. */
1933 target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp;
1934 p[-1] = A64I_B | A64F_S26((target-p)+1);
1935}
1936
1937/* Prepare tail of code. */
1938static void asm_tail_prep(ASMState *as)
1939{
1940 MCode *p = as->mctop - 1; /* Leave room for exit branch. */
1941 if (as->loopref) {
1942 as->invmcp = as->mcp = p;
1943 } else {
1944 as->mcp = p-1; /* Leave room for stack pointer adjustment. */
1945 as->invmcp = NULL;
1946 }
1947 *p = 0; /* Prevent load/store merging. */
1948}
1949
1950/* -- Trace setup --------------------------------------------------------- */
1951
1952/* Ensure there are enough stack slots for call arguments. */
1953static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
1954{
1955 IRRef args[CCI_NARGS_MAX*2];
1956 uint32_t i, nargs = CCI_XNARGS(ci);
1957 int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
1958 asm_collectargs(as, ir, ci, args);
1959 for (i = 0; i < nargs; i++) {
1960 if (args[i] && irt_isfp(IR(args[i])->t)) {
1961 if (nfpr > 0) nfpr--; else nslots += 2;
1962 } else {
1963 if (ngpr > 0) ngpr--; else nslots += 2;
1964 }
1965 }
1966 if (nslots > as->evenspill) /* Leave room for args in stack slots. */
1967 as->evenspill = nslots;
1968 return REGSP_HINT(RID_RET);
1969}
1970
1971static void asm_setup_target(ASMState *as)
1972{
1973 /* May need extra exit for asm_stack_check on side traces. */
1974 asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0));
1975}
1976
1977#if LJ_BE
1978/* ARM64 instructions are always little-endian. Swap for ARM64BE. */
1979static void asm_mcode_fixup(MCode *mcode, MSize size)
1980{
1981 MCode *pe = (MCode *)((char *)mcode + size);
1982 while (mcode < pe) {
1983 MCode ins = *mcode;
1984 *mcode++ = lj_bswap(ins);
1985 }
1986}
1987#define LJ_TARGET_MCODE_FIXUP 1
1988#endif
1989
1990/* -- Trace patching ------------------------------------------------------ */
1991
1992/* Patch exit jumps of existing machine code to a new target. */
1993void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
1994{
1995 MCode *p = T->mcode;
1996 MCode *pe = (MCode *)((char *)p + T->szmcode);
1997 MCode *cstart = NULL;
1998 MCode *mcarea = lj_mcode_patch(J, p, 0);
1999 MCode *px = exitstub_trace_addr(T, exitno);
2000 /* Note: this assumes a trace exit is only ever patched once. */
2001 for (; p < pe; p++) {
2002 /* Look for exitstub branch, replace with branch to target. */
2003 ptrdiff_t delta = target - p;
2004 MCode ins = A64I_LE(*p);
2005 if ((ins & 0xff000000u) == 0x54000000u &&
2006 ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) {
2007 /* Patch bcc, if within range. */
2008 if (A64F_S_OK(delta, 19)) {
2009 *p = A64I_LE((ins & 0xff00001fu) | A64F_S19(delta));
2010 if (!cstart) cstart = p;
2011 }
2012 } else if ((ins & 0xfc000000u) == 0x14000000u &&
2013 ((ins ^ (px-p)) & 0x03ffffffu) == 0) {
2014 /* Patch b. */
2015 lua_assert(A64F_S_OK(delta, 26));
2016 *p = A64I_LE((ins & 0xfc000000u) | A64F_S26(delta));
2017 if (!cstart) cstart = p;
2018 } else if ((ins & 0x7e000000u) == 0x34000000u &&
2019 ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) {
2020 /* Patch cbz/cbnz, if within range. */
2021 if (A64F_S_OK(delta, 19)) {
2022 *p = A64I_LE((ins & 0xff00001fu) | A64F_S19(delta));
2023 if (!cstart) cstart = p;
2024 }
2025 } else if ((ins & 0x7e000000u) == 0x36000000u &&
2026 ((ins ^ ((px-p)<<5)) & 0x0007ffe0u) == 0) {
2027 /* Patch tbz/tbnz, if within range. */
2028 if (A64F_S_OK(delta, 14)) {
2029 *p = A64I_LE((ins & 0xfff8001fu) | A64F_S14(delta));
2030 if (!cstart) cstart = p;
2031 }
2032 }
2033 }
2034 { /* Always patch long-range branch in exit stub itself. */
2035 ptrdiff_t delta = target - px;
2036 lua_assert(A64F_S_OK(delta, 26));
2037 *px = A64I_B | A64F_S26(delta);
2038 if (!cstart) cstart = px;
2039 }
2040 lj_mcode_sync(cstart, px+1);
2041 lj_mcode_patch(J, mcarea, 1);
2042}
2043
diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h
index 190a55eb..9309b781 100644
--- a/src/lj_asm_mips.h
+++ b/src/lj_asm_mips.h
@@ -23,7 +23,7 @@ static Reg ra_alloc1z(ASMState *as, IRRef ref, RegSet allow)
23{ 23{
24 Reg r = IR(ref)->r; 24 Reg r = IR(ref)->r;
25 if (ra_noreg(r)) { 25 if (ra_noreg(r)) {
26 if (!(allow & RSET_FPR) && irref_isk(ref) && IR(ref)->i == 0) 26 if (!(allow & RSET_FPR) && irref_isk(ref) && get_kval(IR(ref)) == 0)
27 return RID_ZERO; 27 return RID_ZERO;
28 r = ra_allocref(as, ref, allow); 28 r = ra_allocref(as, ref, allow);
29 } else { 29 } else {
@@ -101,7 +101,12 @@ static void asm_guard(ASMState *as, MIPSIns mi, Reg rs, Reg rt)
101 as->invmcp = NULL; 101 as->invmcp = NULL;
102 as->loopinv = 1; 102 as->loopinv = 1;
103 as->mcp = p+1; 103 as->mcp = p+1;
104#if !LJ_TARGET_MIPSR6
104 mi = mi ^ ((mi>>28) == 1 ? 0x04000000u : 0x00010000u); /* Invert cond. */ 105 mi = mi ^ ((mi>>28) == 1 ? 0x04000000u : 0x00010000u); /* Invert cond. */
106#else
107 mi = mi ^ ((mi>>28) == 1 ? 0x04000000u :
108 (mi>>28) == 4 ? 0x00800000u : 0x00010000u); /* Invert cond. */
109#endif
105 target = p; /* Patch target later in asm_loop_fixup. */ 110 target = p; /* Patch target later in asm_loop_fixup. */
106 } 111 }
107 emit_ti(as, MIPSI_LI, RID_TMP, as->snapno); 112 emit_ti(as, MIPSI_LI, RID_TMP, as->snapno);
@@ -165,9 +170,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow)
165 } else if (ir->o == IR_UREFC) { 170 } else if (ir->o == IR_UREFC) {
166 if (irref_isk(ir->op1)) { 171 if (irref_isk(ir->op1)) {
167 GCfunc *fn = ir_kfunc(IR(ir->op1)); 172 GCfunc *fn = ir_kfunc(IR(ir->op1));
168 int32_t ofs = i32ptr(&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv); 173 intptr_t ofs = (intptr_t)&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv;
169 int32_t jgl = (intptr_t)J2G(as->J); 174 intptr_t jgl = (intptr_t)J2G(as->J);
170 if ((uint32_t)(ofs-jgl) < 65536) { 175 if ((uintptr_t)(ofs-jgl) < 65536) {
171 *ofsp = ofs-jgl-32768; 176 *ofsp = ofs-jgl-32768;
172 return RID_JGL; 177 return RID_JGL;
173 } else { 178 } else {
@@ -189,20 +194,21 @@ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref,
189 Reg base; 194 Reg base;
190 if (ra_noreg(ir->r) && canfuse(as, ir)) { 195 if (ra_noreg(ir->r) && canfuse(as, ir)) {
191 if (ir->o == IR_ADD) { 196 if (ir->o == IR_ADD) {
192 int32_t ofs2; 197 intptr_t ofs2;
193 if (irref_isk(ir->op2) && (ofs2 = ofs + IR(ir->op2)->i, checki16(ofs2))) { 198 if (irref_isk(ir->op2) && (ofs2 = ofs + get_kval(IR(ir->op2)),
199 checki16(ofs2))) {
194 ref = ir->op1; 200 ref = ir->op1;
195 ofs = ofs2; 201 ofs = (int32_t)ofs2;
196 } 202 }
197 } else if (ir->o == IR_STRREF) { 203 } else if (ir->o == IR_STRREF) {
198 int32_t ofs2 = 65536; 204 intptr_t ofs2 = 65536;
199 lua_assert(ofs == 0); 205 lua_assert(ofs == 0);
200 ofs = (int32_t)sizeof(GCstr); 206 ofs = (int32_t)sizeof(GCstr);
201 if (irref_isk(ir->op2)) { 207 if (irref_isk(ir->op2)) {
202 ofs2 = ofs + IR(ir->op2)->i; 208 ofs2 = ofs + get_kval(IR(ir->op2));
203 ref = ir->op1; 209 ref = ir->op1;
204 } else if (irref_isk(ir->op1)) { 210 } else if (irref_isk(ir->op1)) {
205 ofs2 = ofs + IR(ir->op1)->i; 211 ofs2 = ofs + get_kval(IR(ir->op1));
206 ref = ir->op2; 212 ref = ir->op2;
207 } 213 }
208 if (!checki16(ofs2)) { 214 if (!checki16(ofs2)) {
@@ -210,7 +216,7 @@ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref,
210 Reg right, left = ra_alloc2(as, ir, allow); 216 Reg right, left = ra_alloc2(as, ir, allow);
211 right = (left >> 8); left &= 255; 217 right = (left >> 8); left &= 255;
212 emit_hsi(as, mi, rt, RID_TMP, ofs); 218 emit_hsi(as, mi, rt, RID_TMP, ofs);
213 emit_dst(as, MIPSI_ADDU, RID_TMP, left, right); 219 emit_dst(as, MIPSI_AADDU, RID_TMP, left, right);
214 return; 220 return;
215 } 221 }
216 ofs = ofs2; 222 ofs = ofs2;
@@ -225,29 +231,41 @@ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref,
225/* Generate a call to a C function. */ 231/* Generate a call to a C function. */
226static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) 232static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
227{ 233{
228 uint32_t n, nargs = CCI_NARGS(ci); 234 uint32_t n, nargs = CCI_XNARGS(ci);
229 int32_t ofs = 16; 235 int32_t ofs = LJ_32 ? 16 : 0;
236#if LJ_SOFTFP
237 Reg gpr = REGARG_FIRSTGPR;
238#else
230 Reg gpr, fpr = REGARG_FIRSTFPR; 239 Reg gpr, fpr = REGARG_FIRSTFPR;
240#endif
231 if ((void *)ci->func) 241 if ((void *)ci->func)
232 emit_call(as, (void *)ci->func); 242 emit_call(as, (void *)ci->func, 1);
243#if !LJ_SOFTFP
233 for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) 244 for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++)
234 as->cost[gpr] = REGCOST(~0u, ASMREF_L); 245 as->cost[gpr] = REGCOST(~0u, ASMREF_L);
235 gpr = REGARG_FIRSTGPR; 246 gpr = REGARG_FIRSTGPR;
247#endif
236 for (n = 0; n < nargs; n++) { /* Setup args. */ 248 for (n = 0; n < nargs; n++) { /* Setup args. */
237 IRRef ref = args[n]; 249 IRRef ref = args[n];
238 if (ref) { 250 if (ref) {
239 IRIns *ir = IR(ref); 251 IRIns *ir = IR(ref);
252#if !LJ_SOFTFP
240 if (irt_isfp(ir->t) && fpr <= REGARG_LASTFPR && 253 if (irt_isfp(ir->t) && fpr <= REGARG_LASTFPR &&
241 !(ci->flags & CCI_VARARG)) { 254 !(ci->flags & CCI_VARARG)) {
242 lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ 255 lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */
243 ra_leftov(as, fpr, ref); 256 ra_leftov(as, fpr, ref);
244 fpr += 2; 257 fpr += LJ_32 ? 2 : 1;
245 gpr += irt_isnum(ir->t) ? 2 : 1; 258 gpr += (LJ_32 && irt_isnum(ir->t)) ? 2 : 1;
246 } else { 259 } else
260#endif
261 {
262#if LJ_32 && !LJ_SOFTFP
247 fpr = REGARG_LASTFPR+1; 263 fpr = REGARG_LASTFPR+1;
248 if (irt_isnum(ir->t)) gpr = (gpr+1) & ~1; 264#endif
265 if (LJ_32 && irt_isnum(ir->t)) gpr = (gpr+1) & ~1;
249 if (gpr <= REGARG_LASTGPR) { 266 if (gpr <= REGARG_LASTGPR) {
250 lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ 267 lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */
268#if !LJ_SOFTFP
251 if (irt_isfp(ir->t)) { 269 if (irt_isfp(ir->t)) {
252 RegSet of = as->freeset; 270 RegSet of = as->freeset;
253 Reg r; 271 Reg r;
@@ -256,31 +274,55 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
256 r = ra_alloc1(as, ref, RSET_FPR); 274 r = ra_alloc1(as, ref, RSET_FPR);
257 as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1)); 275 as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1));
258 if (irt_isnum(ir->t)) { 276 if (irt_isnum(ir->t)) {
277#if LJ_32
259 emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?0:1), r+1); 278 emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?0:1), r+1);
260 emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?1:0), r); 279 emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?1:0), r);
261 lua_assert(rset_test(as->freeset, gpr+1)); /* Already evicted. */ 280 lua_assert(rset_test(as->freeset, gpr+1)); /* Already evicted. */
262 gpr += 2; 281 gpr += 2;
282#else
283 emit_tg(as, MIPSI_DMFC1, gpr, r);
284 gpr++; fpr++;
285#endif
263 } else if (irt_isfloat(ir->t)) { 286 } else if (irt_isfloat(ir->t)) {
264 emit_tg(as, MIPSI_MFC1, gpr, r); 287 emit_tg(as, MIPSI_MFC1, gpr, r);
265 gpr++; 288 gpr++;
289#if LJ_64
290 fpr++;
291#endif
266 } 292 }
267 } else { 293 } else
294#endif
295 {
268 ra_leftov(as, gpr, ref); 296 ra_leftov(as, gpr, ref);
269 gpr++; 297 gpr++;
298#if LJ_64 && !LJ_SOFTFP
299 fpr++;
300#endif
270 } 301 }
271 } else { 302 } else {
272 Reg r = ra_alloc1z(as, ref, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); 303 Reg r = ra_alloc1z(as, ref, !LJ_SOFTFP && irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
304#if LJ_32
273 if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4; 305 if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4;
274 emit_spstore(as, ir, r, ofs); 306 emit_spstore(as, ir, r, ofs);
275 ofs += irt_isnum(ir->t) ? 8 : 4; 307 ofs += irt_isnum(ir->t) ? 8 : 4;
308#else
309 emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_isfp(ir->t) && !irt_is64(ir->t)) ? 4 : 0));
310 ofs += 8;
311#endif
276 } 312 }
277 } 313 }
278 } else { 314 } else {
315#if !LJ_SOFTFP
279 fpr = REGARG_LASTFPR+1; 316 fpr = REGARG_LASTFPR+1;
280 if (gpr <= REGARG_LASTGPR) 317#endif
318 if (gpr <= REGARG_LASTGPR) {
281 gpr++; 319 gpr++;
282 else 320#if LJ_64 && !LJ_SOFTFP
283 ofs += 4; 321 fpr++;
322#endif
323 } else {
324 ofs += LJ_32 ? 4 : 8;
325 }
284 } 326 }
285 checkmclim(as); 327 checkmclim(as);
286 } 328 }
@@ -290,50 +332,57 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
290static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) 332static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
291{ 333{
292 RegSet drop = RSET_SCRATCH; 334 RegSet drop = RSET_SCRATCH;
335#if LJ_32
293 int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); 336 int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
337#endif
338#if !LJ_SOFTFP
294 if ((ci->flags & CCI_NOFPRCLOBBER)) 339 if ((ci->flags & CCI_NOFPRCLOBBER))
295 drop &= ~RSET_FPR; 340 drop &= ~RSET_FPR;
341#endif
296 if (ra_hasreg(ir->r)) 342 if (ra_hasreg(ir->r))
297 rset_clear(drop, ir->r); /* Dest reg handled below. */ 343 rset_clear(drop, ir->r); /* Dest reg handled below. */
344#if LJ_32
298 if (hiop && ra_hasreg((ir+1)->r)) 345 if (hiop && ra_hasreg((ir+1)->r))
299 rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ 346 rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */
347#endif
300 ra_evictset(as, drop); /* Evictions must be performed first. */ 348 ra_evictset(as, drop); /* Evictions must be performed first. */
301 if (ra_used(ir)) { 349 if (ra_used(ir)) {
302 lua_assert(!irt_ispri(ir->t)); 350 lua_assert(!irt_ispri(ir->t));
303 if (irt_isfp(ir->t)) { 351 if (!LJ_SOFTFP && irt_isfp(ir->t)) {
304 if ((ci->flags & CCI_CASTU64)) { 352 if ((ci->flags & CCI_CASTU64)) {
305 int32_t ofs = sps_scale(ir->s); 353 int32_t ofs = sps_scale(ir->s);
306 Reg dest = ir->r; 354 Reg dest = ir->r;
307 if (ra_hasreg(dest)) { 355 if (ra_hasreg(dest)) {
308 ra_free(as, dest); 356 ra_free(as, dest);
309 ra_modified(as, dest); 357 ra_modified(as, dest);
358#if LJ_32
310 emit_tg(as, MIPSI_MTC1, RID_RETHI, dest+1); 359 emit_tg(as, MIPSI_MTC1, RID_RETHI, dest+1);
311 emit_tg(as, MIPSI_MTC1, RID_RETLO, dest); 360 emit_tg(as, MIPSI_MTC1, RID_RETLO, dest);
361#else
362 emit_tg(as, MIPSI_DMTC1, RID_RET, dest);
363#endif
312 } 364 }
313 if (ofs) { 365 if (ofs) {
366#if LJ_32
314 emit_tsi(as, MIPSI_SW, RID_RETLO, RID_SP, ofs+(LJ_BE?4:0)); 367 emit_tsi(as, MIPSI_SW, RID_RETLO, RID_SP, ofs+(LJ_BE?4:0));
315 emit_tsi(as, MIPSI_SW, RID_RETHI, RID_SP, ofs+(LJ_BE?0:4)); 368 emit_tsi(as, MIPSI_SW, RID_RETHI, RID_SP, ofs+(LJ_BE?0:4));
369#else
370 emit_tsi(as, MIPSI_SD, RID_RET, RID_SP, ofs);
371#endif
316 } 372 }
317 } else { 373 } else {
318 ra_destreg(as, ir, RID_FPRET); 374 ra_destreg(as, ir, RID_FPRET);
319 } 375 }
376#if LJ_32
320 } else if (hiop) { 377 } else if (hiop) {
321 ra_destpair(as, ir); 378 ra_destpair(as, ir);
379#endif
322 } else { 380 } else {
323 ra_destreg(as, ir, RID_RET); 381 ra_destreg(as, ir, RID_RET);
324 } 382 }
325 } 383 }
326} 384}
327 385
328static void asm_call(ASMState *as, IRIns *ir)
329{
330 IRRef args[CCI_NARGS_MAX];
331 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
332 asm_collectargs(as, ir, ci, args);
333 asm_setupresult(as, ir, ci);
334 asm_gencall(as, ci, args);
335}
336
337static void asm_callx(ASMState *as, IRIns *ir) 386static void asm_callx(ASMState *as, IRIns *ir)
338{ 387{
339 IRRef args[CCI_NARGS_MAX*2]; 388 IRRef args[CCI_NARGS_MAX*2];
@@ -346,7 +395,7 @@ static void asm_callx(ASMState *as, IRIns *ir)
346 func = ir->op2; irf = IR(func); 395 func = ir->op2; irf = IR(func);
347 if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } 396 if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
348 if (irref_isk(func)) { /* Call to constant address. */ 397 if (irref_isk(func)) { /* Call to constant address. */
349 ci.func = (ASMFunction)(void *)(irf->i); 398 ci.func = (ASMFunction)(void *)get_kval(irf);
350 } else { /* Need specific register for indirect calls. */ 399 } else { /* Need specific register for indirect calls. */
351 Reg r = ra_alloc1(as, func, RID2RSET(RID_CFUNCADDR)); 400 Reg r = ra_alloc1(as, func, RID2RSET(RID_CFUNCADDR));
352 MCode *p = as->mcp; 401 MCode *p = as->mcp;
@@ -361,27 +410,23 @@ static void asm_callx(ASMState *as, IRIns *ir)
361 asm_gencall(as, &ci, args); 410 asm_gencall(as, &ci, args);
362} 411}
363 412
364static void asm_callid(ASMState *as, IRIns *ir, IRCallID id) 413#if !LJ_SOFTFP
365{
366 const CCallInfo *ci = &lj_ir_callinfo[id];
367 IRRef args[2];
368 args[0] = ir->op1;
369 args[1] = ir->op2;
370 asm_setupresult(as, ir, ci);
371 asm_gencall(as, ci, args);
372}
373
374static void asm_callround(ASMState *as, IRIns *ir, IRCallID id) 414static void asm_callround(ASMState *as, IRIns *ir, IRCallID id)
375{ 415{
376 /* The modified regs must match with the *.dasc implementation. */ 416 /* The modified regs must match with the *.dasc implementation. */
377 RegSet drop = RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_FPRET)| 417 RegSet drop = RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_FPRET)|
378 RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR); 418 RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR)
419#if LJ_TARGET_MIPSR6
420 |RID2RSET(RID_F21)
421#endif
422 ;
379 if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); 423 if (ra_hasreg(ir->r)) rset_clear(drop, ir->r);
380 ra_evictset(as, drop); 424 ra_evictset(as, drop);
381 ra_destreg(as, ir, RID_FPRET); 425 ra_destreg(as, ir, RID_FPRET);
382 emit_call(as, (void *)lj_ir_callinfo[id].func); 426 emit_call(as, (void *)lj_ir_callinfo[id].func, 0);
383 ra_leftov(as, REGARG_FIRSTFPR, ir->op1); 427 ra_leftov(as, REGARG_FIRSTFPR, ir->op1);
384} 428}
429#endif
385 430
386/* -- Returns ------------------------------------------------------------- */ 431/* -- Returns ------------------------------------------------------------- */
387 432
@@ -390,25 +435,31 @@ static void asm_retf(ASMState *as, IRIns *ir)
390{ 435{
391 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); 436 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
392 void *pc = ir_kptr(IR(ir->op2)); 437 void *pc = ir_kptr(IR(ir->op2));
393 int32_t delta = 1+bc_a(*((const BCIns *)pc - 1)); 438 int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
394 as->topslot -= (BCReg)delta; 439 as->topslot -= (BCReg)delta;
395 if ((int32_t)as->topslot < 0) as->topslot = 0; 440 if ((int32_t)as->topslot < 0) as->topslot = 0;
396 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ 441 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
397 emit_setgl(as, base, jit_base); 442 emit_setgl(as, base, jit_base);
398 emit_addptr(as, base, -8*delta); 443 emit_addptr(as, base, -8*delta);
399 asm_guard(as, MIPSI_BNE, RID_TMP, 444 asm_guard(as, MIPSI_BNE, RID_TMP,
400 ra_allock(as, i32ptr(pc), rset_exclude(RSET_GPR, base))); 445 ra_allock(as, igcptr(pc), rset_exclude(RSET_GPR, base)));
401 emit_tsi(as, MIPSI_LW, RID_TMP, base, -8); 446 emit_tsi(as, MIPSI_AL, RID_TMP, base, -8);
402} 447}
403 448
404/* -- Type conversions ---------------------------------------------------- */ 449/* -- Type conversions ---------------------------------------------------- */
405 450
451#if !LJ_SOFTFP
406static void asm_tointg(ASMState *as, IRIns *ir, Reg left) 452static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
407{ 453{
408 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); 454 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
409 Reg dest = ra_dest(as, ir, RSET_GPR); 455 Reg dest = ra_dest(as, ir, RSET_GPR);
456#if !LJ_TARGET_MIPSR6
410 asm_guard(as, MIPSI_BC1F, 0, 0); 457 asm_guard(as, MIPSI_BC1F, 0, 0);
411 emit_fgh(as, MIPSI_C_EQ_D, 0, tmp, left); 458 emit_fgh(as, MIPSI_C_EQ_D, 0, tmp, left);
459#else
460 asm_guard(as, MIPSI_BC1EQZ, 0, (tmp&31));
461 emit_fgh(as, MIPSI_CMP_EQ_D, tmp, tmp, left);
462#endif
412 emit_fg(as, MIPSI_CVT_D_W, tmp, tmp); 463 emit_fg(as, MIPSI_CVT_D_W, tmp, tmp);
413 emit_tg(as, MIPSI_MFC1, dest, tmp); 464 emit_tg(as, MIPSI_MFC1, dest, tmp);
414 emit_fg(as, MIPSI_CVT_W_D, tmp, left); 465 emit_fg(as, MIPSI_CVT_W_D, tmp, left);
@@ -424,15 +475,53 @@ static void asm_tobit(ASMState *as, IRIns *ir)
424 emit_tg(as, MIPSI_MFC1, dest, tmp); 475 emit_tg(as, MIPSI_MFC1, dest, tmp);
425 emit_fgh(as, MIPSI_ADD_D, tmp, left, right); 476 emit_fgh(as, MIPSI_ADD_D, tmp, left, right);
426} 477}
478#elif LJ_64 /* && LJ_SOFTFP */
479static void asm_tointg(ASMState *as, IRIns *ir, Reg r)
480{
481 /* The modified regs must match with the *.dasc implementation. */
482 RegSet drop = RID2RSET(REGARG_FIRSTGPR)|RID2RSET(RID_RET)|RID2RSET(RID_RET+1)|
483 RID2RSET(RID_R1)|RID2RSET(RID_R12);
484 if (ra_hasreg(ir->r)) rset_clear(drop, ir->r);
485 ra_evictset(as, drop);
486 /* Return values are in RID_RET (converted value) and RID_RET+1 (status). */
487 ra_destreg(as, ir, RID_RET);
488 asm_guard(as, MIPSI_BNE, RID_RET+1, RID_ZERO);
489 emit_call(as, (void *)lj_ir_callinfo[IRCALL_lj_vm_tointg].func, 0);
490 if (r == RID_NONE)
491 ra_leftov(as, REGARG_FIRSTGPR, ir->op1);
492 else if (r != REGARG_FIRSTGPR)
493 emit_move(as, REGARG_FIRSTGPR, r);
494}
495
496static void asm_tobit(ASMState *as, IRIns *ir)
497{
498 Reg dest = ra_dest(as, ir, RSET_GPR);
499 emit_dta(as, MIPSI_SLL, dest, dest, 0);
500 asm_callid(as, ir, IRCALL_lj_vm_tobit);
501}
502#endif
427 503
428static void asm_conv(ASMState *as, IRIns *ir) 504static void asm_conv(ASMState *as, IRIns *ir)
429{ 505{
430 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); 506 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
507#if !LJ_SOFTFP32
431 int stfp = (st == IRT_NUM || st == IRT_FLOAT); 508 int stfp = (st == IRT_NUM || st == IRT_FLOAT);
509#endif
510#if LJ_64
511 int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64);
512#endif
432 IRRef lref = ir->op1; 513 IRRef lref = ir->op1;
433 lua_assert(irt_type(ir->t) != st); 514#if LJ_32
434 lua_assert(!(irt_isint64(ir->t) || 515 lua_assert(!(irt_isint64(ir->t) ||
435 (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ 516 (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */
517#endif
518#if LJ_SOFTFP32
519 /* FP conversions are handled by SPLIT. */
520 lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT));
521 /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */
522#else
523 lua_assert(irt_type(ir->t) != st);
524#if !LJ_SOFTFP
436 if (irt_isfp(ir->t)) { 525 if (irt_isfp(ir->t)) {
437 Reg dest = ra_dest(as, ir, RSET_FPR); 526 Reg dest = ra_dest(as, ir, RSET_FPR);
438 if (stfp) { /* FP to FP conversion. */ 527 if (stfp) { /* FP to FP conversion. */
@@ -448,16 +537,44 @@ static void asm_conv(ASMState *as, IRIns *ir)
448 emit_fgh(as, MIPSI_ADD_D, dest, dest, tmp); 537 emit_fgh(as, MIPSI_ADD_D, dest, dest, tmp);
449 emit_fg(as, MIPSI_CVT_D_W, dest, dest); 538 emit_fg(as, MIPSI_CVT_D_W, dest, dest);
450 emit_lsptr(as, MIPSI_LDC1, (tmp & 31), 539 emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
451 (void *)lj_ir_k64_find(as->J, U64x(41e00000,00000000)), 540 (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR);
452 RSET_GPR);
453 emit_tg(as, MIPSI_MTC1, RID_TMP, dest); 541 emit_tg(as, MIPSI_MTC1, RID_TMP, dest);
454 emit_dst(as, MIPSI_XOR, RID_TMP, RID_TMP, left); 542 emit_dst(as, MIPSI_XOR, RID_TMP, RID_TMP, left);
455 emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000); 543 emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000);
544#if LJ_64
545 } else if(st == IRT_U64) { /* U64 to FP conversion. */
546 /* if (x >= 1u<<63) y = (double)(int64_t)(x&(1u<<63)-1) + pow(2.0, 63) */
547 Reg left = ra_alloc1(as, lref, RSET_GPR);
548 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, dest));
549 MCLabel l_end = emit_label(as);
550 if (irt_isfloat(ir->t)) {
551 emit_fgh(as, MIPSI_ADD_S, dest, dest, tmp);
552 emit_lsptr(as, MIPSI_LWC1, (tmp & 31), (void *)&as->J->k32[LJ_K32_2P63],
553 rset_exclude(RSET_GPR, left));
554 emit_fg(as, MIPSI_CVT_S_L, dest, dest);
555 } else {
556 emit_fgh(as, MIPSI_ADD_D, dest, dest, tmp);
557 emit_lsptr(as, MIPSI_LDC1, (tmp & 31), (void *)&as->J->k64[LJ_K64_2P63],
558 rset_exclude(RSET_GPR, left));
559 emit_fg(as, MIPSI_CVT_D_L, dest, dest);
560 }
561 emit_branch(as, MIPSI_BGEZ, left, RID_ZERO, l_end);
562 emit_tg(as, MIPSI_DMTC1, RID_TMP, dest);
563 emit_tsml(as, MIPSI_DEXTM, RID_TMP, left, 30, 0);
564#endif
456 } else { /* Integer to FP conversion. */ 565 } else { /* Integer to FP conversion. */
457 Reg left = ra_alloc1(as, lref, RSET_GPR); 566 Reg left = ra_alloc1(as, lref, RSET_GPR);
567#if LJ_32
458 emit_fg(as, irt_isfloat(ir->t) ? MIPSI_CVT_S_W : MIPSI_CVT_D_W, 568 emit_fg(as, irt_isfloat(ir->t) ? MIPSI_CVT_S_W : MIPSI_CVT_D_W,
459 dest, dest); 569 dest, dest);
460 emit_tg(as, MIPSI_MTC1, left, dest); 570 emit_tg(as, MIPSI_MTC1, left, dest);
571#else
572 MIPSIns mi = irt_isfloat(ir->t) ?
573 (st64 ? MIPSI_CVT_S_L : MIPSI_CVT_S_W) :
574 (st64 ? MIPSI_CVT_D_L : MIPSI_CVT_D_W);
575 emit_fg(as, mi, dest, dest);
576 emit_tg(as, st64 ? MIPSI_DMTC1 : MIPSI_MTC1, left, dest);
577#endif
461 } 578 }
462 } else if (stfp) { /* FP to integer conversion. */ 579 } else if (stfp) { /* FP to integer conversion. */
463 if (irt_isguard(ir->t)) { 580 if (irt_isguard(ir->t)) {
@@ -468,7 +585,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
468 Reg dest = ra_dest(as, ir, RSET_GPR); 585 Reg dest = ra_dest(as, ir, RSET_GPR);
469 Reg left = ra_alloc1(as, lref, RSET_FPR); 586 Reg left = ra_alloc1(as, lref, RSET_FPR);
470 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); 587 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
471 if (irt_isu32(ir->t)) { 588 if (irt_isu32(ir->t)) { /* FP to U32 conversion. */
472 /* y = (int)floor(x - 2147483648.0) ^ 0x80000000 */ 589 /* y = (int)floor(x - 2147483648.0) ^ 0x80000000 */
473 emit_dst(as, MIPSI_XOR, dest, dest, RID_TMP); 590 emit_dst(as, MIPSI_XOR, dest, dest, RID_TMP);
474 emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000); 591 emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000);
@@ -479,25 +596,111 @@ static void asm_conv(ASMState *as, IRIns *ir)
479 tmp, left, tmp); 596 tmp, left, tmp);
480 if (st == IRT_FLOAT) 597 if (st == IRT_FLOAT)
481 emit_lsptr(as, MIPSI_LWC1, (tmp & 31), 598 emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
482 (void *)lj_ir_k64_find(as->J, U64x(4f000000,4f000000)), 599 (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR);
483 RSET_GPR);
484 else 600 else
485 emit_lsptr(as, MIPSI_LDC1, (tmp & 31), 601 emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
486 (void *)lj_ir_k64_find(as->J, U64x(41e00000,00000000)), 602 (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR);
487 RSET_GPR); 603#if LJ_64
604 } else if (irt_isu64(ir->t)) { /* FP to U64 conversion. */
605 MCLabel l_end;
606 emit_tg(as, MIPSI_DMFC1, dest, tmp);
607 l_end = emit_label(as);
608 /* For inputs >= 2^63 add -2^64 and convert again. */
609 if (st == IRT_NUM) {
610 emit_fg(as, MIPSI_TRUNC_L_D, tmp, tmp);
611 emit_fgh(as, MIPSI_ADD_D, tmp, left, tmp);
612 emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
613 (void *)&as->J->k64[LJ_K64_M2P64],
614 rset_exclude(RSET_GPR, dest));
615 emit_fg(as, MIPSI_TRUNC_L_D, tmp, left); /* Delay slot. */
616#if !LJ_TARGET_MIPSR6
617 emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
618 emit_fgh(as, MIPSI_C_OLT_D, 0, left, tmp);
619#else
620 emit_branch(as, MIPSI_BC1NEZ, 0, (left&31), l_end);
621 emit_fgh(as, MIPSI_CMP_LT_D, left, left, tmp);
622#endif
623 emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
624 (void *)&as->J->k64[LJ_K64_2P63],
625 rset_exclude(RSET_GPR, dest));
626 } else {
627 emit_fg(as, MIPSI_TRUNC_L_S, tmp, tmp);
628 emit_fgh(as, MIPSI_ADD_S, tmp, left, tmp);
629 emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
630 (void *)&as->J->k32[LJ_K32_M2P64],
631 rset_exclude(RSET_GPR, dest));
632 emit_fg(as, MIPSI_TRUNC_L_S, tmp, left); /* Delay slot. */
633#if !LJ_TARGET_MIPSR6
634 emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
635 emit_fgh(as, MIPSI_C_OLT_S, 0, left, tmp);
636#else
637 emit_branch(as, MIPSI_BC1NEZ, 0, (left&31), l_end);
638 emit_fgh(as, MIPSI_CMP_LT_S, left, left, tmp);
639#endif
640 emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
641 (void *)&as->J->k32[LJ_K32_2P63],
642 rset_exclude(RSET_GPR, dest));
643 }
644#endif
488 } else { 645 } else {
646#if LJ_32
489 emit_tg(as, MIPSI_MFC1, dest, tmp); 647 emit_tg(as, MIPSI_MFC1, dest, tmp);
490 emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D, 648 emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D,
491 tmp, left); 649 tmp, left);
650#else
651 MIPSIns mi = irt_is64(ir->t) ?
652 (st == IRT_NUM ? MIPSI_TRUNC_L_D : MIPSI_TRUNC_L_S) :
653 (st == IRT_NUM ? MIPSI_TRUNC_W_D : MIPSI_TRUNC_W_S);
654 emit_tg(as, irt_is64(ir->t) ? MIPSI_DMFC1 : MIPSI_MFC1, dest, left);
655 emit_fg(as, mi, left, left);
656#endif
492 } 657 }
493 } 658 }
494 } else { 659 } else
660#else
661 if (irt_isfp(ir->t)) {
662#if LJ_64 && LJ_HASFFI
663 if (stfp) { /* FP to FP conversion. */
664 asm_callid(as, ir, irt_isnum(ir->t) ? IRCALL_softfp_f2d :
665 IRCALL_softfp_d2f);
666 } else { /* Integer to FP conversion. */
667 IRCallID cid = ((IRT_IS64 >> st) & 1) ?
668 (irt_isnum(ir->t) ?
669 (st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d) :
670 (st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f)) :
671 (irt_isnum(ir->t) ?
672 (st == IRT_INT ? IRCALL_softfp_i2d : IRCALL_softfp_ui2d) :
673 (st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f));
674 asm_callid(as, ir, cid);
675 }
676#else
677 asm_callid(as, ir, IRCALL_softfp_i2d);
678#endif
679 } else if (stfp) { /* FP to integer conversion. */
680 if (irt_isguard(ir->t)) {
681 /* Checked conversions are only supported from number to int. */
682 lua_assert(irt_isint(ir->t) && st == IRT_NUM);
683 asm_tointg(as, ir, RID_NONE);
684 } else {
685 IRCallID cid = irt_is64(ir->t) ?
686 ((st == IRT_NUM) ?
687 (irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul) :
688 (irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul)) :
689 ((st == IRT_NUM) ?
690 (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) :
691 (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui));
692 asm_callid(as, ir, cid);
693 }
694 } else
695#endif
696#endif
697 {
495 Reg dest = ra_dest(as, ir, RSET_GPR); 698 Reg dest = ra_dest(as, ir, RSET_GPR);
496 if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ 699 if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
497 Reg left = ra_alloc1(as, ir->op1, RSET_GPR); 700 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
498 lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); 701 lua_assert(irt_isint(ir->t) || irt_isu32(ir->t));
499 if ((ir->op2 & IRCONV_SEXT)) { 702 if ((ir->op2 & IRCONV_SEXT)) {
500 if ((as->flags & JIT_F_MIPS32R2)) { 703 if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) {
501 emit_dst(as, st == IRT_I8 ? MIPSI_SEB : MIPSI_SEH, dest, 0, left); 704 emit_dst(as, st == IRT_I8 ? MIPSI_SEB : MIPSI_SEH, dest, 0, left);
502 } else { 705 } else {
503 uint32_t shift = st == IRT_I8 ? 24 : 16; 706 uint32_t shift = st == IRT_I8 ? 24 : 16;
@@ -509,49 +712,108 @@ static void asm_conv(ASMState *as, IRIns *ir)
509 (int32_t)(st == IRT_U8 ? 0xff : 0xffff)); 712 (int32_t)(st == IRT_U8 ? 0xff : 0xffff));
510 } 713 }
511 } else { /* 32/64 bit integer conversions. */ 714 } else { /* 32/64 bit integer conversions. */
715#if LJ_32
512 /* Only need to handle 32/32 bit no-op (cast) on 32 bit archs. */ 716 /* Only need to handle 32/32 bit no-op (cast) on 32 bit archs. */
513 ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ 717 ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */
718#else
719 if (irt_is64(ir->t)) {
720 if (st64) {
721 /* 64/64 bit no-op (cast)*/
722 ra_leftov(as, dest, lref);
723 } else {
724 Reg left = ra_alloc1(as, lref, RSET_GPR);
725 if ((ir->op2 & IRCONV_SEXT)) { /* 32 to 64 bit sign extension. */
726 emit_dta(as, MIPSI_SLL, dest, left, 0);
727 } else { /* 32 to 64 bit zero extension. */
728 emit_tsml(as, MIPSI_DEXT, dest, left, 31, 0);
729 }
730 }
731 } else {
732 if (st64) {
733 /* This is either a 32 bit reg/reg mov which zeroes the hiword
734 ** or a load of the loword from a 64 bit address.
735 */
736 Reg left = ra_alloc1(as, lref, RSET_GPR);
737 emit_tsml(as, MIPSI_DEXT, dest, left, 31, 0);
738 } else { /* 32/32 bit no-op (cast). */
739 /* Do nothing, but may need to move regs. */
740 ra_leftov(as, dest, lref);
741 }
742 }
743#endif
514 } 744 }
515 } 745 }
516} 746}
517 747
518#if LJ_HASFFI
519static void asm_conv64(ASMState *as, IRIns *ir)
520{
521 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
522 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
523 IRCallID id;
524 const CCallInfo *ci;
525 IRRef args[2];
526 args[LJ_BE?0:1] = ir->op1;
527 args[LJ_BE?1:0] = (ir-1)->op1;
528 if (st == IRT_NUM || st == IRT_FLOAT) {
529 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
530 ir--;
531 } else {
532 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
533 }
534 ci = &lj_ir_callinfo[id];
535 asm_setupresult(as, ir, ci);
536 asm_gencall(as, ci, args);
537}
538#endif
539
540static void asm_strto(ASMState *as, IRIns *ir) 748static void asm_strto(ASMState *as, IRIns *ir)
541{ 749{
542 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; 750 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
543 IRRef args[2]; 751 IRRef args[2];
752 int32_t ofs = 0;
753#if LJ_SOFTFP32
754 ra_evictset(as, RSET_SCRATCH);
755 if (ra_used(ir)) {
756 if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) &&
757 (ir->s & 1) == LJ_BE && (ir->s ^ 1) == (ir+1)->s) {
758 int i;
759 for (i = 0; i < 2; i++) {
760 Reg r = (ir+i)->r;
761 if (ra_hasreg(r)) {
762 ra_free(as, r);
763 ra_modified(as, r);
764 emit_spload(as, ir+i, r, sps_scale((ir+i)->s));
765 }
766 }
767 ofs = sps_scale(ir->s & ~1);
768 } else {
769 Reg rhi = ra_dest(as, ir+1, RSET_GPR);
770 Reg rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi));
771 emit_tsi(as, MIPSI_LW, rhi, RID_SP, ofs+(LJ_BE?0:4));
772 emit_tsi(as, MIPSI_LW, rlo, RID_SP, ofs+(LJ_BE?4:0));
773 }
774 }
775#else
544 RegSet drop = RSET_SCRATCH; 776 RegSet drop = RSET_SCRATCH;
545 if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ 777 if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */
546 ra_evictset(as, drop); 778 ra_evictset(as, drop);
779 ofs = sps_scale(ir->s);
780#endif
547 asm_guard(as, MIPSI_BEQ, RID_RET, RID_ZERO); /* Test return status. */ 781 asm_guard(as, MIPSI_BEQ, RID_RET, RID_ZERO); /* Test return status. */
548 args[0] = ir->op1; /* GCstr *str */ 782 args[0] = ir->op1; /* GCstr *str */
549 args[1] = ASMREF_TMP1; /* TValue *n */ 783 args[1] = ASMREF_TMP1; /* TValue *n */
550 asm_gencall(as, ci, args); 784 asm_gencall(as, ci, args);
551 /* Store the result to the spill slot or temp slots. */ 785 /* Store the result to the spill slot or temp slots. */
552 emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1), 786 emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1),
553 RID_SP, sps_scale(ir->s)); 787 RID_SP, ofs);
788}
789
790/* -- Memory references --------------------------------------------------- */
791
792#if LJ_64
793/* Store tagged value for ref at base+ofs. */
794static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref)
795{
796 RegSet allow = rset_exclude(RSET_GPR, base);
797 IRIns *ir = IR(ref);
798 lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t));
799 if (irref_isk(ref)) {
800 TValue k;
801 lj_ir_kvalue(as->J->L, &k, ir);
802 emit_tsi(as, MIPSI_SD, ra_allock(as, (int64_t)k.u64, allow), base, ofs);
803 } else {
804 Reg src = ra_alloc1(as, ref, allow);
805 Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47,
806 rset_exclude(allow, src));
807 emit_tsi(as, MIPSI_SD, RID_TMP, base, ofs);
808 if (irt_isinteger(ir->t)) {
809 emit_dst(as, MIPSI_DADDU, RID_TMP, RID_TMP, type);
810 emit_tsml(as, MIPSI_DEXT, RID_TMP, src, 31, 0);
811 } else {
812 emit_dst(as, MIPSI_DADDU, RID_TMP, src, type);
813 }
814 }
554} 815}
816#endif
555 817
556/* Get pointer to TValue. */ 818/* Get pointer to TValue. */
557static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) 819static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
@@ -559,44 +821,32 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
559 IRIns *ir = IR(ref); 821 IRIns *ir = IR(ref);
560 if (irt_isnum(ir->t)) { 822 if (irt_isnum(ir->t)) {
561 if (irref_isk(ref)) /* Use the number constant itself as a TValue. */ 823 if (irref_isk(ref)) /* Use the number constant itself as a TValue. */
562 ra_allockreg(as, i32ptr(ir_knum(ir)), dest); 824 ra_allockreg(as, igcptr(ir_knum(ir)), dest);
563 else /* Otherwise force a spill and use the spill slot. */ 825 else /* Otherwise force a spill and use the spill slot. */
564 emit_tsi(as, MIPSI_ADDIU, dest, RID_SP, ra_spill(as, ir)); 826 emit_tsi(as, MIPSI_AADDIU, dest, RID_SP, ra_spill(as, ir));
565 } else { 827 } else {
566 /* Otherwise use g->tmptv to hold the TValue. */ 828 /* Otherwise use g->tmptv to hold the TValue. */
829#if LJ_32
567 RegSet allow = rset_exclude(RSET_GPR, dest); 830 RegSet allow = rset_exclude(RSET_GPR, dest);
568 Reg type; 831 Reg type;
569 emit_tsi(as, MIPSI_ADDIU, dest, RID_JGL, offsetof(global_State, tmptv)-32768); 832 emit_tsi(as, MIPSI_ADDIU, dest, RID_JGL, (int32_t)(offsetof(global_State, tmptv)-32768));
570 if (!irt_ispri(ir->t)) { 833 if (!irt_ispri(ir->t)) {
571 Reg src = ra_alloc1(as, ref, allow); 834 Reg src = ra_alloc1(as, ref, allow);
572 emit_setgl(as, src, tmptv.gcr); 835 emit_setgl(as, src, tmptv.gcr);
573 } 836 }
574 type = ra_allock(as, irt_toitype(ir->t), allow); 837 if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
838 type = ra_alloc1(as, ref+1, allow);
839 else
840 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
575 emit_setgl(as, type, tmptv.it); 841 emit_setgl(as, type, tmptv.it);
842#else
843 asm_tvstore64(as, dest, 0, ref);
844 emit_tsi(as, MIPSI_DADDIU, dest, RID_JGL,
845 (int32_t)(offsetof(global_State, tmptv)-32768));
846#endif
576 } 847 }
577} 848}
578 849
579static void asm_tostr(ASMState *as, IRIns *ir)
580{
581 IRRef args[2];
582 args[0] = ASMREF_L;
583 as->gcsteps++;
584 if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
585 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
586 args[1] = ASMREF_TMP1; /* const lua_Number * */
587 asm_setupresult(as, ir, ci); /* GCstr * */
588 asm_gencall(as, ci, args);
589 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
590 } else {
591 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
592 args[1] = ir->op1; /* int32_t k */
593 asm_setupresult(as, ir, ci); /* GCstr * */
594 asm_gencall(as, ci, args);
595 }
596}
597
598/* -- Memory references --------------------------------------------------- */
599
600static void asm_aref(ASMState *as, IRIns *ir) 850static void asm_aref(ASMState *as, IRIns *ir)
601{ 851{
602 Reg dest = ra_dest(as, ir, RSET_GPR); 852 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -608,14 +858,18 @@ static void asm_aref(ASMState *as, IRIns *ir)
608 ofs += 8*IR(ir->op2)->i; 858 ofs += 8*IR(ir->op2)->i;
609 if (checki16(ofs)) { 859 if (checki16(ofs)) {
610 base = ra_alloc1(as, refa, RSET_GPR); 860 base = ra_alloc1(as, refa, RSET_GPR);
611 emit_tsi(as, MIPSI_ADDIU, dest, base, ofs); 861 emit_tsi(as, MIPSI_AADDIU, dest, base, ofs);
612 return; 862 return;
613 } 863 }
614 } 864 }
615 base = ra_alloc1(as, ir->op1, RSET_GPR); 865 base = ra_alloc1(as, ir->op1, RSET_GPR);
616 idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); 866 idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base));
617 emit_dst(as, MIPSI_ADDU, dest, RID_TMP, base); 867#if !LJ_TARGET_MIPSR6
868 emit_dst(as, MIPSI_AADDU, dest, RID_TMP, base);
618 emit_dta(as, MIPSI_SLL, RID_TMP, idx, 3); 869 emit_dta(as, MIPSI_SLL, RID_TMP, idx, 3);
870#else
871 emit_dst(as, MIPSI_ALSA | MIPSF_A(3-1), dest, idx, base);
872#endif
619} 873}
620 874
621/* Inlined hash lookup. Specialized for key type and for const keys. 875/* Inlined hash lookup. Specialized for key type and for const keys.
@@ -626,51 +880,109 @@ static void asm_aref(ASMState *as, IRIns *ir)
626** } while ((n = nextnode(n))); 880** } while ((n = nextnode(n)));
627** return niltv(L); 881** return niltv(L);
628*/ 882*/
629static void asm_href(ASMState *as, IRIns *ir) 883static void asm_href(ASMState *as, IRIns *ir, IROp merge)
630{ 884{
631 RegSet allow = RSET_GPR; 885 RegSet allow = RSET_GPR;
632 int destused = ra_used(ir); 886 int destused = ra_used(ir);
633 Reg dest = ra_dest(as, ir, allow); 887 Reg dest = ra_dest(as, ir, allow);
634 Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); 888 Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
635 Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1 = RID_TMP, tmp2; 889 Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1 = RID_TMP, tmp2;
890#if LJ_64
891 Reg cmp64 = RID_NONE;
892#endif
636 IRRef refkey = ir->op2; 893 IRRef refkey = ir->op2;
637 IRIns *irkey = IR(refkey); 894 IRIns *irkey = IR(refkey);
895 int isk = irref_isk(refkey);
638 IRType1 kt = irkey->t; 896 IRType1 kt = irkey->t;
639 uint32_t khash; 897 uint32_t khash;
640 MCLabel l_end, l_loop, l_next; 898 MCLabel l_end, l_loop, l_next;
641 899
642 rset_clear(allow, tab); 900 rset_clear(allow, tab);
643 if (irt_isnum(kt)) { 901#if LJ_SOFTFP32
902 if (!isk) {
903 key = ra_alloc1(as, refkey, allow);
904 rset_clear(allow, key);
905 if (irkey[1].o == IR_HIOP) {
906 if (ra_hasreg((irkey+1)->r)) {
907 type = tmpnum = (irkey+1)->r;
908 tmp1 = ra_scratch(as, allow);
909 rset_clear(allow, tmp1);
910 ra_noweak(as, tmpnum);
911 } else {
912 type = tmpnum = ra_allocref(as, refkey+1, allow);
913 }
914 rset_clear(allow, tmpnum);
915 } else {
916 type = ra_allock(as, (int32_t)irt_toitype(irkey->t), allow);
917 rset_clear(allow, type);
918 }
919 }
920#else
921 if (!LJ_SOFTFP && irt_isnum(kt)) {
644 key = ra_alloc1(as, refkey, RSET_FPR); 922 key = ra_alloc1(as, refkey, RSET_FPR);
645 tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); 923 tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key));
646 } else if (!irt_ispri(kt)) { 924 } else if (!irt_ispri(kt)) {
647 key = ra_alloc1(as, refkey, allow); 925 key = ra_alloc1(as, refkey, allow);
648 rset_clear(allow, key); 926 rset_clear(allow, key);
649 type = ra_allock(as, irt_toitype(irkey->t), allow); 927#if LJ_32
928 type = ra_allock(as, (int32_t)irt_toitype(irkey->t), allow);
650 rset_clear(allow, type); 929 rset_clear(allow, type);
930#endif
651 } 931 }
932#endif
652 tmp2 = ra_scratch(as, allow); 933 tmp2 = ra_scratch(as, allow);
653 rset_clear(allow, tmp2); 934 rset_clear(allow, tmp2);
935#if LJ_64
936 if (LJ_SOFTFP || !irt_isnum(kt)) {
937 /* Allocate cmp64 register used for 64-bit comparisons */
938 if (LJ_SOFTFP && irt_isnum(kt)) {
939 cmp64 = key;
940 } else if (!isk && irt_isaddr(kt)) {
941 cmp64 = tmp2;
942 } else {
943 int64_t k;
944 if (isk && irt_isaddr(kt)) {
945 k = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64;
946 } else {
947 lua_assert(irt_ispri(kt) && !irt_isnil(kt));
948 k = ~((int64_t)~irt_toitype(ir->t) << 47);
949 }
950 cmp64 = ra_allock(as, k, allow);
951 rset_clear(allow, cmp64);
952 }
953 }
954#endif
654 955
655 /* Key not found in chain: load niltv. */ 956 /* Key not found in chain: jump to exit (if merged) or load niltv. */
656 l_end = emit_label(as); 957 l_end = emit_label(as);
657 if (destused) 958 as->invmcp = NULL;
959 if (merge == IR_NE)
960 asm_guard(as, MIPSI_B, RID_ZERO, RID_ZERO);
961 else if (destused)
658 emit_loada(as, dest, niltvg(J2G(as->J))); 962 emit_loada(as, dest, niltvg(J2G(as->J)));
659 else
660 *--as->mcp = MIPSI_NOP;
661 /* Follow hash chain until the end. */ 963 /* Follow hash chain until the end. */
662 emit_move(as, dest, tmp1); 964 emit_move(as, dest, tmp1);
663 l_loop = --as->mcp; 965 l_loop = --as->mcp;
664 emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, next)); 966 emit_tsi(as, MIPSI_AL, tmp1, dest, (int32_t)offsetof(Node, next));
665 l_next = emit_label(as); 967 l_next = emit_label(as);
666 968
667 /* Type and value comparison. */ 969 /* Type and value comparison. */
668 if (irt_isnum(kt)) { 970 if (merge == IR_EQ) { /* Must match asm_guard(). */
971 emit_ti(as, MIPSI_LI, RID_TMP, as->snapno);
972 l_end = asm_exitstub_addr(as);
973 }
974 if (!LJ_SOFTFP && irt_isnum(kt)) {
975#if !LJ_TARGET_MIPSR6
669 emit_branch(as, MIPSI_BC1T, 0, 0, l_end); 976 emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
670 emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key); 977 emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key);
671 emit_tg(as, MIPSI_MFC1, tmp1, key+1); 978#else
979 emit_branch(as, MIPSI_BC1NEZ, 0, (tmpnum&31), l_end);
980 emit_fgh(as, MIPSI_CMP_EQ_D, tmpnum, tmpnum, key);
981#endif
982 *--as->mcp = MIPSI_NOP; /* Avoid NaN comparison overhead. */
672 emit_branch(as, MIPSI_BEQ, tmp1, RID_ZERO, l_next); 983 emit_branch(as, MIPSI_BEQ, tmp1, RID_ZERO, l_next);
673 emit_tsi(as, MIPSI_SLTIU, tmp1, tmp1, (int32_t)LJ_TISNUM); 984 emit_tsi(as, MIPSI_SLTIU, tmp1, tmp1, (int32_t)LJ_TISNUM);
985#if LJ_32
674 emit_hsi(as, MIPSI_LDC1, tmpnum, dest, (int32_t)offsetof(Node, key.n)); 986 emit_hsi(as, MIPSI_LDC1, tmpnum, dest, (int32_t)offsetof(Node, key.n));
675 } else { 987 } else {
676 if (irt_ispri(kt)) { 988 if (irt_ispri(kt)) {
@@ -683,24 +995,39 @@ static void asm_href(ASMState *as, IRIns *ir)
683 } 995 }
684 emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, key.it)); 996 emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, key.it));
685 *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu); 997 *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu);
998#else
999 emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 15);
1000 emit_tg(as, MIPSI_DMTC1, tmp1, tmpnum);
1001 emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64));
1002 } else {
1003 emit_branch(as, MIPSI_BEQ, tmp1, cmp64, l_end);
1004 emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64));
1005 }
1006 *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu);
1007 if (!isk && irt_isaddr(kt)) {
1008 type = ra_allock(as, (int64_t)irt_toitype(kt) << 47, allow);
1009 emit_dst(as, MIPSI_DADDU, tmp2, key, type);
1010 rset_clear(allow, type);
1011 }
1012#endif
686 1013
687 /* Load main position relative to tab->node into dest. */ 1014 /* Load main position relative to tab->node into dest. */
688 khash = irref_isk(refkey) ? ir_khash(irkey) : 1; 1015 khash = isk ? ir_khash(irkey) : 1;
689 if (khash == 0) { 1016 if (khash == 0) {
690 emit_tsi(as, MIPSI_LW, dest, tab, (int32_t)offsetof(GCtab, node)); 1017 emit_tsi(as, MIPSI_AL, dest, tab, (int32_t)offsetof(GCtab, node));
691 } else { 1018 } else {
692 Reg tmphash = tmp1; 1019 Reg tmphash = tmp1;
693 if (irref_isk(refkey)) 1020 if (isk)
694 tmphash = ra_allock(as, khash, allow); 1021 tmphash = ra_allock(as, khash, allow);
695 emit_dst(as, MIPSI_ADDU, dest, dest, tmp1); 1022 emit_dst(as, MIPSI_AADDU, dest, dest, tmp1);
696 lua_assert(sizeof(Node) == 24); 1023 lua_assert(sizeof(Node) == 24);
697 emit_dst(as, MIPSI_SUBU, tmp1, tmp2, tmp1); 1024 emit_dst(as, MIPSI_SUBU, tmp1, tmp2, tmp1);
698 emit_dta(as, MIPSI_SLL, tmp1, tmp1, 3); 1025 emit_dta(as, MIPSI_SLL, tmp1, tmp1, 3);
699 emit_dta(as, MIPSI_SLL, tmp2, tmp1, 5); 1026 emit_dta(as, MIPSI_SLL, tmp2, tmp1, 5);
700 emit_dst(as, MIPSI_AND, tmp1, tmp2, tmphash); 1027 emit_dst(as, MIPSI_AND, tmp1, tmp2, tmphash);
701 emit_tsi(as, MIPSI_LW, dest, tab, (int32_t)offsetof(GCtab, node)); 1028 emit_tsi(as, MIPSI_AL, dest, tab, (int32_t)offsetof(GCtab, node));
702 emit_tsi(as, MIPSI_LW, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); 1029 emit_tsi(as, MIPSI_LW, tmp2, tab, (int32_t)offsetof(GCtab, hmask));
703 if (irref_isk(refkey)) { 1030 if (isk) {
704 /* Nothing to do. */ 1031 /* Nothing to do. */
705 } else if (irt_isstr(kt)) { 1032 } else if (irt_isstr(kt)) {
706 emit_tsi(as, MIPSI_LW, tmp1, key, (int32_t)offsetof(GCstr, hash)); 1033 emit_tsi(as, MIPSI_LW, tmp1, key, (int32_t)offsetof(GCstr, hash));
@@ -710,9 +1037,10 @@ static void asm_href(ASMState *as, IRIns *ir)
710 emit_dst(as, MIPSI_XOR, tmp1, tmp1, tmp2); 1037 emit_dst(as, MIPSI_XOR, tmp1, tmp1, tmp2);
711 emit_rotr(as, tmp1, tmp1, dest, (-HASH_ROT2-HASH_ROT1)&31); 1038 emit_rotr(as, tmp1, tmp1, dest, (-HASH_ROT2-HASH_ROT1)&31);
712 emit_dst(as, MIPSI_SUBU, tmp2, tmp2, dest); 1039 emit_dst(as, MIPSI_SUBU, tmp2, tmp2, dest);
713 if (irt_isnum(kt)) { 1040#if LJ_32
1041 if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) {
714 emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1); 1042 emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1);
715 if ((as->flags & JIT_F_MIPS32R2)) { 1043 if ((as->flags & JIT_F_MIPSXXR2)) {
716 emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31); 1044 emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31);
717 } else { 1045 } else {
718 emit_dst(as, MIPSI_OR, dest, dest, tmp1); 1046 emit_dst(as, MIPSI_OR, dest, dest, tmp1);
@@ -720,13 +1048,35 @@ static void asm_href(ASMState *as, IRIns *ir)
720 emit_dta(as, MIPSI_SRL, dest, tmp1, (-HASH_ROT1)&31); 1048 emit_dta(as, MIPSI_SRL, dest, tmp1, (-HASH_ROT1)&31);
721 } 1049 }
722 emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1); 1050 emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1);
1051#if LJ_SOFTFP
1052 emit_ds(as, MIPSI_MOVE, tmp1, type);
1053 emit_ds(as, MIPSI_MOVE, tmp2, key);
1054#else
723 emit_tg(as, MIPSI_MFC1, tmp2, key); 1055 emit_tg(as, MIPSI_MFC1, tmp2, key);
724 emit_tg(as, MIPSI_MFC1, tmp1, key+1); 1056 emit_tg(as, MIPSI_MFC1, tmp1, key+1);
1057#endif
725 } else { 1058 } else {
726 emit_dst(as, MIPSI_XOR, tmp2, key, tmp1); 1059 emit_dst(as, MIPSI_XOR, tmp2, key, tmp1);
727 emit_rotr(as, dest, tmp1, tmp2, (-HASH_ROT1)&31); 1060 emit_rotr(as, dest, tmp1, tmp2, (-HASH_ROT1)&31);
728 emit_dst(as, MIPSI_ADDU, tmp1, key, ra_allock(as, HASH_BIAS, allow)); 1061 emit_dst(as, MIPSI_ADDU, tmp1, key, ra_allock(as, HASH_BIAS, allow));
729 } 1062 }
1063#else
1064 emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1);
1065 emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31);
1066 if (irt_isnum(kt)) {
1067 emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1);
1068 emit_dta(as, MIPSI_DSRA32, tmp1, LJ_SOFTFP ? key : tmp1, 0);
1069 emit_dta(as, MIPSI_SLL, tmp2, LJ_SOFTFP ? key : tmp1, 0);
1070#if !LJ_SOFTFP
1071 emit_tg(as, MIPSI_DMFC1, tmp1, key);
1072#endif
1073 } else {
1074 checkmclim(as);
1075 emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 0);
1076 emit_dta(as, MIPSI_SLL, tmp2, key, 0);
1077 emit_dst(as, MIPSI_DADDU, tmp1, key, type);
1078 }
1079#endif
730 } 1080 }
731 } 1081 }
732} 1082}
@@ -739,17 +1089,24 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
739 int32_t kofs = ofs + (int32_t)offsetof(Node, key); 1089 int32_t kofs = ofs + (int32_t)offsetof(Node, key);
740 Reg dest = (ra_used(ir)||ofs > 32736) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; 1090 Reg dest = (ra_used(ir)||ofs > 32736) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
741 Reg node = ra_alloc1(as, ir->op1, RSET_GPR); 1091 Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
742 Reg key = RID_NONE, type = RID_TMP, idx = node;
743 RegSet allow = rset_exclude(RSET_GPR, node); 1092 RegSet allow = rset_exclude(RSET_GPR, node);
1093 Reg idx = node;
1094#if LJ_32
1095 Reg key = RID_NONE, type = RID_TMP;
744 int32_t lo, hi; 1096 int32_t lo, hi;
1097#else
1098 Reg key = ra_scratch(as, allow);
1099 int64_t k;
1100#endif
745 lua_assert(ofs % sizeof(Node) == 0); 1101 lua_assert(ofs % sizeof(Node) == 0);
746 if (ofs > 32736) { 1102 if (ofs > 32736) {
747 idx = dest; 1103 idx = dest;
748 rset_clear(allow, dest); 1104 rset_clear(allow, dest);
749 kofs = (int32_t)offsetof(Node, key); 1105 kofs = (int32_t)offsetof(Node, key);
750 } else if (ra_hasreg(dest)) { 1106 } else if (ra_hasreg(dest)) {
751 emit_tsi(as, MIPSI_ADDIU, dest, node, ofs); 1107 emit_tsi(as, MIPSI_AADDIU, dest, node, ofs);
752 } 1108 }
1109#if LJ_32
753 if (!irt_ispri(irkey->t)) { 1110 if (!irt_ispri(irkey->t)) {
754 key = ra_scratch(as, allow); 1111 key = ra_scratch(as, allow);
755 rset_clear(allow, key); 1112 rset_clear(allow, key);
@@ -768,22 +1125,20 @@ nolo:
768 asm_guard(as, MIPSI_BNE, type, hi ? ra_allock(as, hi, allow) : RID_ZERO); 1125 asm_guard(as, MIPSI_BNE, type, hi ? ra_allock(as, hi, allow) : RID_ZERO);
769 if (ra_hasreg(key)) emit_tsi(as, MIPSI_LW, key, idx, kofs+(LJ_BE?4:0)); 1126 if (ra_hasreg(key)) emit_tsi(as, MIPSI_LW, key, idx, kofs+(LJ_BE?4:0));
770 emit_tsi(as, MIPSI_LW, type, idx, kofs+(LJ_BE?0:4)); 1127 emit_tsi(as, MIPSI_LW, type, idx, kofs+(LJ_BE?0:4));
771 if (ofs > 32736) 1128#else
772 emit_tsi(as, MIPSI_ADDU, dest, node, ra_allock(as, ofs, allow)); 1129 if (irt_ispri(irkey->t)) {
773} 1130 lua_assert(!irt_isnil(irkey->t));
774 1131 k = ~((int64_t)~irt_toitype(irkey->t) << 47);
775static void asm_newref(ASMState *as, IRIns *ir) 1132 } else if (irt_isnum(irkey->t)) {
776{ 1133 k = (int64_t)ir_knum(irkey)->u64;
777 if (ir->r != RID_SINK) { 1134 } else {
778 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; 1135 k = ((int64_t)irt_toitype(irkey->t) << 47) | (int64_t)ir_kgc(irkey);
779 IRRef args[3];
780 args[0] = ASMREF_L; /* lua_State *L */
781 args[1] = ir->op1; /* GCtab *t */
782 args[2] = ASMREF_TMP1; /* cTValue *key */
783 asm_setupresult(as, ir, ci); /* TValue * */
784 asm_gencall(as, ci, args);
785 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
786 } 1136 }
1137 asm_guard(as, MIPSI_BNE, key, ra_allock(as, k, allow));
1138 emit_tsi(as, MIPSI_LD, key, idx, kofs);
1139#endif
1140 if (ofs > 32736)
1141 emit_tsi(as, MIPSI_AADDU, dest, node, ra_allock(as, ofs, allow));
787} 1142}
788 1143
789static void asm_uref(ASMState *as, IRIns *ir) 1144static void asm_uref(ASMState *as, IRIns *ir)
@@ -792,19 +1147,19 @@ static void asm_uref(ASMState *as, IRIns *ir)
792 if (irref_isk(ir->op1)) { 1147 if (irref_isk(ir->op1)) {
793 GCfunc *fn = ir_kfunc(IR(ir->op1)); 1148 GCfunc *fn = ir_kfunc(IR(ir->op1));
794 MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; 1149 MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
795 emit_lsptr(as, MIPSI_LW, dest, v, RSET_GPR); 1150 emit_lsptr(as, MIPSI_AL, dest, v, RSET_GPR);
796 } else { 1151 } else {
797 Reg uv = ra_scratch(as, RSET_GPR); 1152 Reg uv = ra_scratch(as, RSET_GPR);
798 Reg func = ra_alloc1(as, ir->op1, RSET_GPR); 1153 Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
799 if (ir->o == IR_UREFC) { 1154 if (ir->o == IR_UREFC) {
800 asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); 1155 asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
801 emit_tsi(as, MIPSI_ADDIU, dest, uv, (int32_t)offsetof(GCupval, tv)); 1156 emit_tsi(as, MIPSI_AADDIU, dest, uv, (int32_t)offsetof(GCupval, tv));
802 emit_tsi(as, MIPSI_LBU, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); 1157 emit_tsi(as, MIPSI_LBU, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
803 } else { 1158 } else {
804 emit_tsi(as, MIPSI_LW, dest, uv, (int32_t)offsetof(GCupval, v)); 1159 emit_tsi(as, MIPSI_AL, dest, uv, (int32_t)offsetof(GCupval, v));
805 } 1160 }
806 emit_tsi(as, MIPSI_LW, uv, func, 1161 emit_tsi(as, MIPSI_AL, uv, func, (int32_t)offsetof(GCfuncL, uvptr) +
807 (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); 1162 (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
808 } 1163 }
809} 1164}
810 1165
@@ -816,6 +1171,7 @@ static void asm_fref(ASMState *as, IRIns *ir)
816 1171
817static void asm_strref(ASMState *as, IRIns *ir) 1172static void asm_strref(ASMState *as, IRIns *ir)
818{ 1173{
1174#if LJ_32
819 Reg dest = ra_dest(as, ir, RSET_GPR); 1175 Reg dest = ra_dest(as, ir, RSET_GPR);
820 IRRef ref = ir->op2, refk = ir->op1; 1176 IRRef ref = ir->op2, refk = ir->op1;
821 int32_t ofs = (int32_t)sizeof(GCstr); 1177 int32_t ofs = (int32_t)sizeof(GCstr);
@@ -847,6 +1203,20 @@ static void asm_strref(ASMState *as, IRIns *ir)
847 else 1203 else
848 emit_dst(as, MIPSI_ADDU, dest, r, 1204 emit_dst(as, MIPSI_ADDU, dest, r,
849 ra_allock(as, ofs, rset_exclude(RSET_GPR, r))); 1205 ra_allock(as, ofs, rset_exclude(RSET_GPR, r)));
1206#else
1207 RegSet allow = RSET_GPR;
1208 Reg dest = ra_dest(as, ir, allow);
1209 Reg base = ra_alloc1(as, ir->op1, allow);
1210 IRIns *irr = IR(ir->op2);
1211 int32_t ofs = sizeof(GCstr);
1212 rset_clear(allow, base);
1213 if (irref_isk(ir->op2) && checki16(ofs + irr->i)) {
1214 emit_tsi(as, MIPSI_DADDIU, dest, base, ofs + irr->i);
1215 } else {
1216 emit_tsi(as, MIPSI_DADDIU, dest, dest, ofs);
1217 emit_dst(as, MIPSI_DADDU, dest, base, ra_alloc1(as, ir->op2, allow));
1218 }
1219#endif
850} 1220}
851 1221
852/* -- Loads and stores ---------------------------------------------------- */ 1222/* -- Loads and stores ---------------------------------------------------- */
@@ -858,9 +1228,11 @@ static MIPSIns asm_fxloadins(IRIns *ir)
858 case IRT_U8: return MIPSI_LBU; 1228 case IRT_U8: return MIPSI_LBU;
859 case IRT_I16: return MIPSI_LH; 1229 case IRT_I16: return MIPSI_LH;
860 case IRT_U16: return MIPSI_LHU; 1230 case IRT_U16: return MIPSI_LHU;
861 case IRT_NUM: return MIPSI_LDC1; 1231 case IRT_NUM: lua_assert(!LJ_SOFTFP32); if (!LJ_SOFTFP) return MIPSI_LDC1;
862 case IRT_FLOAT: return MIPSI_LWC1; 1232 /* fallthrough */
863 default: return MIPSI_LW; 1233 case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_LWC1;
1234 /* fallthrough */
1235 default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_LD : MIPSI_LW;
864 } 1236 }
865} 1237}
866 1238
@@ -869,26 +1241,34 @@ static MIPSIns asm_fxstoreins(IRIns *ir)
869 switch (irt_type(ir->t)) { 1241 switch (irt_type(ir->t)) {
870 case IRT_I8: case IRT_U8: return MIPSI_SB; 1242 case IRT_I8: case IRT_U8: return MIPSI_SB;
871 case IRT_I16: case IRT_U16: return MIPSI_SH; 1243 case IRT_I16: case IRT_U16: return MIPSI_SH;
872 case IRT_NUM: return MIPSI_SDC1; 1244 case IRT_NUM: lua_assert(!LJ_SOFTFP32); if (!LJ_SOFTFP) return MIPSI_SDC1;
873 case IRT_FLOAT: return MIPSI_SWC1; 1245 /* fallthrough */
874 default: return MIPSI_SW; 1246 case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_SWC1;
1247 /* fallthrough */
1248 default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_SD : MIPSI_SW;
875 } 1249 }
876} 1250}
877 1251
878static void asm_fload(ASMState *as, IRIns *ir) 1252static void asm_fload(ASMState *as, IRIns *ir)
879{ 1253{
880 Reg dest = ra_dest(as, ir, RSET_GPR); 1254 Reg dest = ra_dest(as, ir, RSET_GPR);
881 Reg idx = ra_alloc1(as, ir->op1, RSET_GPR);
882 MIPSIns mi = asm_fxloadins(ir); 1255 MIPSIns mi = asm_fxloadins(ir);
1256 Reg idx;
883 int32_t ofs; 1257 int32_t ofs;
884 if (ir->op2 == IRFL_TAB_ARRAY) { 1258 if (ir->op1 == REF_NIL) {
885 ofs = asm_fuseabase(as, ir->op1); 1259 idx = RID_JGL;
886 if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ 1260 ofs = (ir->op2 << 2) - 32768 - GG_OFS(g);
887 emit_tsi(as, MIPSI_ADDIU, dest, idx, ofs); 1261 } else {
888 return; 1262 idx = ra_alloc1(as, ir->op1, RSET_GPR);
1263 if (ir->op2 == IRFL_TAB_ARRAY) {
1264 ofs = asm_fuseabase(as, ir->op1);
1265 if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
1266 emit_tsi(as, MIPSI_AADDIU, dest, idx, ofs);
1267 return;
1268 }
889 } 1269 }
1270 ofs = field_ofs[ir->op2];
890 } 1271 }
891 ofs = field_ofs[ir->op2];
892 lua_assert(!irt_isfp(ir->t)); 1272 lua_assert(!irt_isfp(ir->t));
893 emit_tsi(as, mi, dest, idx, ofs); 1273 emit_tsi(as, mi, dest, idx, ofs);
894} 1274}
@@ -908,43 +1288,79 @@ static void asm_fstore(ASMState *as, IRIns *ir)
908 1288
909static void asm_xload(ASMState *as, IRIns *ir) 1289static void asm_xload(ASMState *as, IRIns *ir)
910{ 1290{
911 Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); 1291 Reg dest = ra_dest(as, ir,
912 lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); 1292 (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
1293 lua_assert(LJ_TARGET_UNALIGNED || !(ir->op2 & IRXLOAD_UNALIGNED));
913 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); 1294 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
914} 1295}
915 1296
916static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) 1297static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
917{ 1298{
918 if (ir->r != RID_SINK) { 1299 if (ir->r != RID_SINK) {
919 Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); 1300 Reg src = ra_alloc1z(as, ir->op2,
1301 (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
920 asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, 1302 asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
921 rset_exclude(RSET_GPR, src), ofs); 1303 rset_exclude(RSET_GPR, src), ofs);
922 } 1304 }
923} 1305}
924 1306
1307#define asm_xstore(as, ir) asm_xstore_(as, ir, 0)
1308
925static void asm_ahuvload(ASMState *as, IRIns *ir) 1309static void asm_ahuvload(ASMState *as, IRIns *ir)
926{ 1310{
927 IRType1 t = ir->t; 1311 int hiop = (LJ_SOFTFP32 && (ir+1)->o == IR_HIOP);
928 Reg dest = RID_NONE, type = RID_TMP, idx; 1312 Reg dest = RID_NONE, type = RID_TMP, idx;
929 RegSet allow = RSET_GPR; 1313 RegSet allow = RSET_GPR;
930 int32_t ofs = 0; 1314 int32_t ofs = 0;
1315 IRType1 t = ir->t;
1316 if (hiop) {
1317 t.irt = IRT_NUM;
1318 if (ra_used(ir+1)) {
1319 type = ra_dest(as, ir+1, allow);
1320 rset_clear(allow, type);
1321 }
1322 }
931 if (ra_used(ir)) { 1323 if (ra_used(ir)) {
932 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); 1324 lua_assert((LJ_SOFTFP32 ? 0 : irt_isnum(ir->t)) ||
933 dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); 1325 irt_isint(ir->t) || irt_isaddr(ir->t));
1326 dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
934 rset_clear(allow, dest); 1327 rset_clear(allow, dest);
1328#if LJ_64
1329 if (irt_isaddr(t))
1330 emit_tsml(as, MIPSI_DEXTM, dest, dest, 14, 0);
1331 else if (irt_isint(t))
1332 emit_dta(as, MIPSI_SLL, dest, dest, 0);
1333#endif
935 } 1334 }
936 idx = asm_fuseahuref(as, ir->op1, &ofs, allow); 1335 idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
937 rset_clear(allow, idx); 1336 rset_clear(allow, idx);
938 if (irt_isnum(t)) { 1337 if (irt_isnum(t)) {
939 asm_guard(as, MIPSI_BEQ, type, RID_ZERO); 1338 asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
940 emit_tsi(as, MIPSI_SLTIU, type, type, (int32_t)LJ_TISNUM); 1339 emit_tsi(as, MIPSI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM);
941 if (ra_hasreg(dest))
942 emit_hsi(as, MIPSI_LDC1, dest, idx, ofs);
943 } else { 1340 } else {
944 asm_guard(as, MIPSI_BNE, type, ra_allock(as, irt_toitype(t), allow)); 1341 asm_guard(as, MIPSI_BNE, type,
945 if (ra_hasreg(dest)) emit_tsi(as, MIPSI_LW, dest, idx, ofs+(LJ_BE?4:0)); 1342 ra_allock(as, (int32_t)irt_toitype(t), allow));
1343 }
1344#if LJ_32
1345 if (ra_hasreg(dest)) {
1346 if (!LJ_SOFTFP && irt_isnum(t))
1347 emit_hsi(as, MIPSI_LDC1, dest, idx, ofs);
1348 else
1349 emit_tsi(as, MIPSI_LW, dest, idx, ofs+(LJ_BE?4:0));
946 } 1350 }
947 emit_tsi(as, MIPSI_LW, type, idx, ofs+(LJ_BE?0:4)); 1351 emit_tsi(as, MIPSI_LW, type, idx, ofs+(LJ_BE?0:4));
1352#else
1353 if (ra_hasreg(dest)) {
1354 if (!LJ_SOFTFP && irt_isnum(t)) {
1355 emit_hsi(as, MIPSI_LDC1, dest, idx, ofs);
1356 dest = type;
1357 }
1358 } else {
1359 dest = type;
1360 }
1361 emit_dta(as, MIPSI_DSRA32, type, dest, 15);
1362 emit_tsi(as, MIPSI_LD, dest, idx, ofs);
1363#endif
948} 1364}
949 1365
950static void asm_ahustore(ASMState *as, IRIns *ir) 1366static void asm_ahustore(ASMState *as, IRIns *ir)
@@ -954,81 +1370,176 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
954 int32_t ofs = 0; 1370 int32_t ofs = 0;
955 if (ir->r == RID_SINK) 1371 if (ir->r == RID_SINK)
956 return; 1372 return;
957 if (irt_isnum(ir->t)) { 1373 if (!LJ_SOFTFP32 && irt_isnum(ir->t)) {
958 src = ra_alloc1(as, ir->op2, RSET_FPR); 1374 src = ra_alloc1(as, ir->op2, LJ_SOFTFP ? RSET_GPR : RSET_FPR);
1375 idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
1376 emit_hsi(as, LJ_SOFTFP ? MIPSI_SD : MIPSI_SDC1, src, idx, ofs);
959 } else { 1377 } else {
1378#if LJ_32
960 if (!irt_ispri(ir->t)) { 1379 if (!irt_ispri(ir->t)) {
961 src = ra_alloc1(as, ir->op2, allow); 1380 src = ra_alloc1(as, ir->op2, allow);
962 rset_clear(allow, src); 1381 rset_clear(allow, src);
963 } 1382 }
964 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); 1383 if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
1384 type = ra_alloc1(as, (ir+1)->op2, allow);
1385 else
1386 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
965 rset_clear(allow, type); 1387 rset_clear(allow, type);
966 } 1388 idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
967 idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
968 if (irt_isnum(ir->t)) {
969 emit_hsi(as, MIPSI_SDC1, src, idx, ofs);
970 } else {
971 if (ra_hasreg(src)) 1389 if (ra_hasreg(src))
972 emit_tsi(as, MIPSI_SW, src, idx, ofs+(LJ_BE?4:0)); 1390 emit_tsi(as, MIPSI_SW, src, idx, ofs+(LJ_BE?4:0));
973 emit_tsi(as, MIPSI_SW, type, idx, ofs+(LJ_BE?0:4)); 1391 emit_tsi(as, MIPSI_SW, type, idx, ofs+(LJ_BE?0:4));
1392#else
1393 Reg tmp = RID_TMP;
1394 if (irt_ispri(ir->t)) {
1395 tmp = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow);
1396 rset_clear(allow, tmp);
1397 } else {
1398 src = ra_alloc1(as, ir->op2, allow);
1399 rset_clear(allow, src);
1400 type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow);
1401 rset_clear(allow, type);
1402 }
1403 idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
1404 emit_tsi(as, MIPSI_SD, tmp, idx, ofs);
1405 if (ra_hasreg(src)) {
1406 if (irt_isinteger(ir->t)) {
1407 emit_dst(as, MIPSI_DADDU, tmp, tmp, type);
1408 emit_tsml(as, MIPSI_DEXT, tmp, src, 31, 0);
1409 } else {
1410 emit_dst(as, MIPSI_DADDU, tmp, src, type);
1411 }
1412 }
1413#endif
974 } 1414 }
975} 1415}
976 1416
977static void asm_sload(ASMState *as, IRIns *ir) 1417static void asm_sload(ASMState *as, IRIns *ir)
978{ 1418{
979 int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
980 IRType1 t = ir->t;
981 Reg dest = RID_NONE, type = RID_NONE, base; 1419 Reg dest = RID_NONE, type = RID_NONE, base;
982 RegSet allow = RSET_GPR; 1420 RegSet allow = RSET_GPR;
1421 IRType1 t = ir->t;
1422#if LJ_32
1423 int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
1424 int hiop = (LJ_SOFTFP32 && (ir+1)->o == IR_HIOP);
1425 if (hiop)
1426 t.irt = IRT_NUM;
1427#else
1428 int32_t ofs = 8*((int32_t)ir->op1-2);
1429#endif
983 lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ 1430 lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */
984 lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); 1431 lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK));
985 lua_assert(!irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); 1432#if LJ_SOFTFP32
1433 lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */
1434 if (hiop && ra_used(ir+1)) {
1435 type = ra_dest(as, ir+1, allow);
1436 rset_clear(allow, type);
1437 }
1438#else
986 if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { 1439 if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
987 dest = ra_scratch(as, RSET_FPR); 1440 dest = ra_scratch(as, LJ_SOFTFP ? allow : RSET_FPR);
988 asm_tointg(as, ir, dest); 1441 asm_tointg(as, ir, dest);
989 t.irt = IRT_NUM; /* Continue with a regular number type check. */ 1442 t.irt = IRT_NUM; /* Continue with a regular number type check. */
990 } else if (ra_used(ir)) { 1443 } else
991 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); 1444#endif
992 dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); 1445 if (ra_used(ir)) {
1446 lua_assert((LJ_SOFTFP32 ? 0 : irt_isnum(ir->t)) ||
1447 irt_isint(ir->t) || irt_isaddr(ir->t));
1448 dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
993 rset_clear(allow, dest); 1449 rset_clear(allow, dest);
994 base = ra_alloc1(as, REF_BASE, allow); 1450 base = ra_alloc1(as, REF_BASE, allow);
995 rset_clear(allow, base); 1451 rset_clear(allow, base);
996 if ((ir->op2 & IRSLOAD_CONVERT)) { 1452 if (!LJ_SOFTFP32 && (ir->op2 & IRSLOAD_CONVERT)) {
997 if (irt_isint(t)) { 1453 if (irt_isint(t)) {
998 Reg tmp = ra_scratch(as, RSET_FPR); 1454 Reg tmp = ra_scratch(as, LJ_SOFTFP ? RSET_GPR : RSET_FPR);
1455#if LJ_SOFTFP
1456 ra_evictset(as, rset_exclude(RSET_SCRATCH, dest));
1457 ra_destreg(as, ir, RID_RET);
1458 emit_call(as, (void *)lj_ir_callinfo[IRCALL_softfp_d2i].func, 0);
1459 if (tmp != REGARG_FIRSTGPR)
1460 emit_move(as, REGARG_FIRSTGPR, tmp);
1461#else
999 emit_tg(as, MIPSI_MFC1, dest, tmp); 1462 emit_tg(as, MIPSI_MFC1, dest, tmp);
1000 emit_fg(as, MIPSI_CVT_W_D, tmp, tmp); 1463 emit_fg(as, MIPSI_TRUNC_W_D, tmp, tmp);
1464#endif
1001 dest = tmp; 1465 dest = tmp;
1002 t.irt = IRT_NUM; /* Check for original type. */ 1466 t.irt = IRT_NUM; /* Check for original type. */
1003 } else { 1467 } else {
1004 Reg tmp = ra_scratch(as, RSET_GPR); 1468 Reg tmp = ra_scratch(as, RSET_GPR);
1469#if LJ_SOFTFP
1470 ra_evictset(as, rset_exclude(RSET_SCRATCH, dest));
1471 ra_destreg(as, ir, RID_RET);
1472 emit_call(as, (void *)lj_ir_callinfo[IRCALL_softfp_i2d].func, 0);
1473 emit_dta(as, MIPSI_SLL, REGARG_FIRSTGPR, tmp, 0);
1474#else
1005 emit_fg(as, MIPSI_CVT_D_W, dest, dest); 1475 emit_fg(as, MIPSI_CVT_D_W, dest, dest);
1006 emit_tg(as, MIPSI_MTC1, tmp, dest); 1476 emit_tg(as, MIPSI_MTC1, tmp, dest);
1477#endif
1007 dest = tmp; 1478 dest = tmp;
1008 t.irt = IRT_INT; /* Check for original type. */ 1479 t.irt = IRT_INT; /* Check for original type. */
1009 } 1480 }
1010 } 1481 }
1482#if LJ_64
1483 else if (irt_isaddr(t)) {
1484 /* Clear type from pointers. */
1485 emit_tsml(as, MIPSI_DEXTM, dest, dest, 14, 0);
1486 } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) {
1487 /* Sign-extend integers. */
1488 emit_dta(as, MIPSI_SLL, dest, dest, 0);
1489 }
1490#endif
1011 goto dotypecheck; 1491 goto dotypecheck;
1012 } 1492 }
1013 base = ra_alloc1(as, REF_BASE, allow); 1493 base = ra_alloc1(as, REF_BASE, allow);
1014 rset_clear(allow, base); 1494 rset_clear(allow, base);
1015dotypecheck: 1495dotypecheck:
1016 if (irt_isnum(t)) { 1496#if LJ_32
1017 if ((ir->op2 & IRSLOAD_TYPECHECK)) { 1497 if ((ir->op2 & IRSLOAD_TYPECHECK)) {
1018 asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); 1498 if (ra_noreg(type))
1019 emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)LJ_TISNUM);
1020 type = RID_TMP; 1499 type = RID_TMP;
1021 } 1500 if (irt_isnum(t)) {
1022 if (ra_hasreg(dest)) emit_hsi(as, MIPSI_LDC1, dest, base, ofs); 1501 asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
1023 } else { 1502 emit_tsi(as, MIPSI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM);
1024 if ((ir->op2 & IRSLOAD_TYPECHECK)) { 1503 } else {
1025 Reg ktype = ra_allock(as, irt_toitype(t), allow); 1504 Reg ktype = ra_allock(as, irt_toitype(t), allow);
1026 asm_guard(as, MIPSI_BNE, RID_TMP, ktype); 1505 asm_guard(as, MIPSI_BNE, type, ktype);
1027 type = RID_TMP; 1506 }
1507 }
1508 if (ra_hasreg(dest)) {
1509 if (!LJ_SOFTFP && irt_isnum(t))
1510 emit_hsi(as, MIPSI_LDC1, dest, base, ofs);
1511 else
1512 emit_tsi(as, MIPSI_LW, dest, base, ofs ^ (LJ_BE?4:0));
1513 }
1514 if (ra_hasreg(type))
1515 emit_tsi(as, MIPSI_LW, type, base, ofs ^ (LJ_BE?0:4));
1516#else
1517 if ((ir->op2 & IRSLOAD_TYPECHECK)) {
1518 type = dest < RID_MAX_GPR ? dest : RID_TMP;
1519 if (irt_ispri(t)) {
1520 asm_guard(as, MIPSI_BNE, type,
1521 ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow));
1522 } else {
1523 if (irt_isnum(t)) {
1524 asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
1525 emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)LJ_TISNUM);
1526 if (!LJ_SOFTFP && ra_hasreg(dest))
1527 emit_hsi(as, MIPSI_LDC1, dest, base, ofs);
1528 } else {
1529 asm_guard(as, MIPSI_BNE, RID_TMP,
1530 ra_allock(as, (int32_t)irt_toitype(t), allow));
1531 }
1532 emit_dta(as, MIPSI_DSRA32, RID_TMP, type, 15);
1028 } 1533 }
1029 if (ra_hasreg(dest)) emit_tsi(as, MIPSI_LW, dest, base, ofs ^ (LJ_BE?4:0)); 1534 emit_tsi(as, MIPSI_LD, type, base, ofs);
1535 } else if (ra_hasreg(dest)) {
1536 if (!LJ_SOFTFP && irt_isnum(t))
1537 emit_hsi(as, MIPSI_LDC1, dest, base, ofs);
1538 else
1539 emit_tsi(as, irt_isint(t) ? MIPSI_LW : MIPSI_LD, dest, base,
1540 ofs ^ ((LJ_BE && irt_isint(t)) ? 4 : 0));
1030 } 1541 }
1031 if (ra_hasreg(type)) emit_tsi(as, MIPSI_LW, type, base, ofs ^ (LJ_BE?0:4)); 1542#endif
1032} 1543}
1033 1544
1034/* -- Allocations --------------------------------------------------------- */ 1545/* -- Allocations --------------------------------------------------------- */
@@ -1037,19 +1548,15 @@ dotypecheck:
1037static void asm_cnew(ASMState *as, IRIns *ir) 1548static void asm_cnew(ASMState *as, IRIns *ir)
1038{ 1549{
1039 CTState *cts = ctype_ctsG(J2G(as->J)); 1550 CTState *cts = ctype_ctsG(J2G(as->J));
1040 CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; 1551 CTypeID id = (CTypeID)IR(ir->op1)->i;
1041 CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? 1552 CTSize sz;
1042 lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; 1553 CTInfo info = lj_ctype_info(cts, id, &sz);
1043 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; 1554 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
1044 IRRef args[2]; 1555 IRRef args[4];
1045 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1046 RegSet drop = RSET_SCRATCH; 1556 RegSet drop = RSET_SCRATCH;
1047 lua_assert(sz != CTSIZE_INVALID); 1557 lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
1048 1558
1049 args[0] = ASMREF_L; /* lua_State *L */
1050 args[1] = ASMREF_TMP1; /* MSize size */
1051 as->gcsteps++; 1559 as->gcsteps++;
1052
1053 if (ra_hasreg(ir->r)) 1560 if (ra_hasreg(ir->r))
1054 rset_clear(drop, ir->r); /* Dest reg handled below. */ 1561 rset_clear(drop, ir->r); /* Dest reg handled below. */
1055 ra_evictset(as, drop); 1562 ra_evictset(as, drop);
@@ -1058,8 +1565,9 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1058 1565
1059 /* Initialize immutable cdata object. */ 1566 /* Initialize immutable cdata object. */
1060 if (ir->o == IR_CNEWI) { 1567 if (ir->o == IR_CNEWI) {
1568 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1569#if LJ_32
1061 int32_t ofs = sizeof(GCcdata); 1570 int32_t ofs = sizeof(GCcdata);
1062 lua_assert(sz == 4 || sz == 8);
1063 if (sz == 8) { 1571 if (sz == 8) {
1064 ofs += 4; 1572 ofs += 4;
1065 lua_assert((ir+1)->o == IR_HIOP); 1573 lua_assert((ir+1)->o == IR_HIOP);
@@ -1072,12 +1580,29 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1072 if (ofs == sizeof(GCcdata)) break; 1580 if (ofs == sizeof(GCcdata)) break;
1073 ofs -= 4; if (LJ_BE) ir++; else ir--; 1581 ofs -= 4; if (LJ_BE) ir++; else ir--;
1074 } 1582 }
1583#else
1584 emit_tsi(as, sz == 8 ? MIPSI_SD : MIPSI_SW, ra_alloc1(as, ir->op2, allow),
1585 RID_RET, sizeof(GCcdata));
1586#endif
1587 lua_assert(sz == 4 || sz == 8);
1588 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
1589 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
1590 args[0] = ASMREF_L; /* lua_State *L */
1591 args[1] = ir->op1; /* CTypeID id */
1592 args[2] = ir->op2; /* CTSize sz */
1593 args[3] = ASMREF_TMP1; /* CTSize align */
1594 asm_gencall(as, ci, args);
1595 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
1596 return;
1075 } 1597 }
1598
1076 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ 1599 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
1077 emit_tsi(as, MIPSI_SB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); 1600 emit_tsi(as, MIPSI_SB, RID_RET+1, RID_RET, offsetof(GCcdata, gct));
1078 emit_tsi(as, MIPSI_SH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); 1601 emit_tsi(as, MIPSI_SH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid));
1079 emit_ti(as, MIPSI_LI, RID_RET+1, ~LJ_TCDATA); 1602 emit_ti(as, MIPSI_LI, RID_RET+1, ~LJ_TCDATA);
1080 emit_ti(as, MIPSI_LI, RID_TMP, ctypeid); /* Lower 16 bit used. Sign-ext ok. */ 1603 emit_ti(as, MIPSI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */
1604 args[0] = ASMREF_L; /* lua_State *L */
1605 args[1] = ASMREF_TMP1; /* MSize size */
1081 asm_gencall(as, ci, args); 1606 asm_gencall(as, ci, args);
1082 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), 1607 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
1083 ra_releasetmp(as, ASMREF_TMP1)); 1608 ra_releasetmp(as, ASMREF_TMP1));
@@ -1094,7 +1619,7 @@ static void asm_tbar(ASMState *as, IRIns *ir)
1094 Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab)); 1619 Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab));
1095 Reg link = RID_TMP; 1620 Reg link = RID_TMP;
1096 MCLabel l_end = emit_label(as); 1621 MCLabel l_end = emit_label(as);
1097 emit_tsi(as, MIPSI_SW, link, tab, (int32_t)offsetof(GCtab, gclist)); 1622 emit_tsi(as, MIPSI_AS, link, tab, (int32_t)offsetof(GCtab, gclist));
1098 emit_tsi(as, MIPSI_SB, mark, tab, (int32_t)offsetof(GCtab, marked)); 1623 emit_tsi(as, MIPSI_SB, mark, tab, (int32_t)offsetof(GCtab, marked));
1099 emit_setgl(as, tab, gc.grayagain); 1624 emit_setgl(as, tab, gc.grayagain);
1100 emit_getgl(as, link, gc.grayagain); 1625 emit_getgl(as, link, gc.grayagain);
@@ -1117,7 +1642,7 @@ static void asm_obar(ASMState *as, IRIns *ir)
1117 args[0] = ASMREF_TMP1; /* global_State *g */ 1642 args[0] = ASMREF_TMP1; /* global_State *g */
1118 args[1] = ir->op1; /* TValue *tv */ 1643 args[1] = ir->op1; /* TValue *tv */
1119 asm_gencall(as, ci, args); 1644 asm_gencall(as, ci, args);
1120 emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); 1645 emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768);
1121 obj = IR(ir->op1)->r; 1646 obj = IR(ir->op1)->r;
1122 tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj)); 1647 tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj));
1123 emit_branch(as, MIPSI_BEQ, RID_TMP, RID_ZERO, l_end); 1648 emit_branch(as, MIPSI_BEQ, RID_TMP, RID_ZERO, l_end);
@@ -1132,6 +1657,7 @@ static void asm_obar(ASMState *as, IRIns *ir)
1132 1657
1133/* -- Arithmetic and logic operations ------------------------------------- */ 1658/* -- Arithmetic and logic operations ------------------------------------- */
1134 1659
1660#if !LJ_SOFTFP
1135static void asm_fparith(ASMState *as, IRIns *ir, MIPSIns mi) 1661static void asm_fparith(ASMState *as, IRIns *ir, MIPSIns mi)
1136{ 1662{
1137 Reg dest = ra_dest(as, ir, RSET_FPR); 1663 Reg dest = ra_dest(as, ir, RSET_FPR);
@@ -1146,83 +1672,180 @@ static void asm_fpunary(ASMState *as, IRIns *ir, MIPSIns mi)
1146 Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); 1672 Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR);
1147 emit_fg(as, mi, dest, left); 1673 emit_fg(as, mi, dest, left);
1148} 1674}
1675#endif
1149 1676
1150static int asm_fpjoin_pow(ASMState *as, IRIns *ir) 1677#if !LJ_SOFTFP32
1151{ 1678static void asm_fpmath(ASMState *as, IRIns *ir)
1152 IRIns *irp = IR(ir->op1); 1679{
1153 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { 1680 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
1154 IRIns *irpp = IR(irp->op1); 1681 return;
1155 if (irpp == ir-2 && irpp->o == IR_FPMATH && 1682#if !LJ_SOFTFP
1156 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { 1683 if (ir->op2 <= IRFPM_TRUNC)
1157 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; 1684 asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2);
1158 IRRef args[2]; 1685 else if (ir->op2 == IRFPM_SQRT)
1159 args[0] = irpp->op1; 1686 asm_fpunary(as, ir, MIPSI_SQRT_D);
1160 args[1] = irp->op2; 1687 else
1161 asm_setupresult(as, ir, ci); 1688#endif
1162 asm_gencall(as, ci, args); 1689 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
1163 return 1;
1164 }
1165 }
1166 return 0;
1167} 1690}
1691#endif
1692
1693#if !LJ_SOFTFP
1694#define asm_fpadd(as, ir) asm_fparith(as, ir, MIPSI_ADD_D)
1695#define asm_fpsub(as, ir) asm_fparith(as, ir, MIPSI_SUB_D)
1696#define asm_fpmul(as, ir) asm_fparith(as, ir, MIPSI_MUL_D)
1697#elif LJ_64 /* && LJ_SOFTFP */
1698#define asm_fpadd(as, ir) asm_callid(as, ir, IRCALL_softfp_add)
1699#define asm_fpsub(as, ir) asm_callid(as, ir, IRCALL_softfp_sub)
1700#define asm_fpmul(as, ir) asm_callid(as, ir, IRCALL_softfp_mul)
1701#endif
1168 1702
1169static void asm_add(ASMState *as, IRIns *ir) 1703static void asm_add(ASMState *as, IRIns *ir)
1170{ 1704{
1171 if (irt_isnum(ir->t)) { 1705 IRType1 t = ir->t;
1172 asm_fparith(as, ir, MIPSI_ADD_D); 1706#if !LJ_SOFTFP32
1173 } else { 1707 if (irt_isnum(t)) {
1708 asm_fpadd(as, ir);
1709 } else
1710#endif
1711 {
1712 /* TODO MIPSR6: Fuse ADD(BSHL(a,1-4),b) or ADD(ADD(a,a),b) to MIPSI_ALSA. */
1174 Reg dest = ra_dest(as, ir, RSET_GPR); 1713 Reg dest = ra_dest(as, ir, RSET_GPR);
1175 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); 1714 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1176 if (irref_isk(ir->op2)) { 1715 if (irref_isk(ir->op2)) {
1177 int32_t k = IR(ir->op2)->i; 1716 intptr_t k = get_kval(IR(ir->op2));
1178 if (checki16(k)) { 1717 if (checki16(k)) {
1179 emit_tsi(as, MIPSI_ADDIU, dest, left, k); 1718 emit_tsi(as, (LJ_64 && irt_is64(t)) ? MIPSI_DADDIU : MIPSI_ADDIU, dest,
1719 left, k);
1180 return; 1720 return;
1181 } 1721 }
1182 } 1722 }
1183 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); 1723 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1184 emit_dst(as, MIPSI_ADDU, dest, left, right); 1724 emit_dst(as, (LJ_64 && irt_is64(t)) ? MIPSI_DADDU : MIPSI_ADDU, dest,
1725 left, right);
1185 } 1726 }
1186} 1727}
1187 1728
1188static void asm_sub(ASMState *as, IRIns *ir) 1729static void asm_sub(ASMState *as, IRIns *ir)
1189{ 1730{
1731#if !LJ_SOFTFP32
1190 if (irt_isnum(ir->t)) { 1732 if (irt_isnum(ir->t)) {
1191 asm_fparith(as, ir, MIPSI_SUB_D); 1733 asm_fpsub(as, ir);
1192 } else { 1734 } else
1735#endif
1736 {
1193 Reg dest = ra_dest(as, ir, RSET_GPR); 1737 Reg dest = ra_dest(as, ir, RSET_GPR);
1194 Reg right, left = ra_alloc2(as, ir, RSET_GPR); 1738 Reg right, left = ra_alloc2(as, ir, RSET_GPR);
1195 right = (left >> 8); left &= 255; 1739 right = (left >> 8); left &= 255;
1196 emit_dst(as, MIPSI_SUBU, dest, left, right); 1740 emit_dst(as, (LJ_64 && irt_is64(ir->t)) ? MIPSI_DSUBU : MIPSI_SUBU, dest,
1741 left, right);
1197 } 1742 }
1198} 1743}
1199 1744
1200static void asm_mul(ASMState *as, IRIns *ir) 1745static void asm_mul(ASMState *as, IRIns *ir)
1201{ 1746{
1747#if !LJ_SOFTFP32
1202 if (irt_isnum(ir->t)) { 1748 if (irt_isnum(ir->t)) {
1203 asm_fparith(as, ir, MIPSI_MUL_D); 1749 asm_fpmul(as, ir);
1204 } else { 1750 } else
1751#endif
1752 {
1205 Reg dest = ra_dest(as, ir, RSET_GPR); 1753 Reg dest = ra_dest(as, ir, RSET_GPR);
1206 Reg right, left = ra_alloc2(as, ir, RSET_GPR); 1754 Reg right, left = ra_alloc2(as, ir, RSET_GPR);
1207 right = (left >> 8); left &= 255; 1755 right = (left >> 8); left &= 255;
1208 emit_dst(as, MIPSI_MUL, dest, left, right); 1756 if (LJ_64 && irt_is64(ir->t)) {
1757#if !LJ_TARGET_MIPSR6
1758 emit_dst(as, MIPSI_MFLO, dest, 0, 0);
1759 emit_dst(as, MIPSI_DMULT, 0, left, right);
1760#else
1761 emit_dst(as, MIPSI_DMUL, dest, left, right);
1762#endif
1763 } else {
1764 emit_dst(as, MIPSI_MUL, dest, left, right);
1765 }
1209 } 1766 }
1210} 1767}
1211 1768
1769static void asm_mod(ASMState *as, IRIns *ir)
1770{
1771#if LJ_64 && LJ_HASFFI
1772 if (!irt_isint(ir->t))
1773 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
1774 IRCALL_lj_carith_modu64);
1775 else
1776#endif
1777 asm_callid(as, ir, IRCALL_lj_vm_modi);
1778}
1779
1780#if !LJ_SOFTFP32
1781static void asm_pow(ASMState *as, IRIns *ir)
1782{
1783#if LJ_64 && LJ_HASFFI
1784 if (!irt_isnum(ir->t))
1785 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
1786 IRCALL_lj_carith_powu64);
1787 else
1788#endif
1789 asm_callid(as, ir, IRCALL_lj_vm_powi);
1790}
1791
1792static void asm_div(ASMState *as, IRIns *ir)
1793{
1794#if LJ_64 && LJ_HASFFI
1795 if (!irt_isnum(ir->t))
1796 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
1797 IRCALL_lj_carith_divu64);
1798 else
1799#endif
1800#if !LJ_SOFTFP
1801 asm_fparith(as, ir, MIPSI_DIV_D);
1802#else
1803 asm_callid(as, ir, IRCALL_softfp_div);
1804#endif
1805}
1806#endif
1807
1212static void asm_neg(ASMState *as, IRIns *ir) 1808static void asm_neg(ASMState *as, IRIns *ir)
1213{ 1809{
1810#if !LJ_SOFTFP
1214 if (irt_isnum(ir->t)) { 1811 if (irt_isnum(ir->t)) {
1215 asm_fpunary(as, ir, MIPSI_NEG_D); 1812 asm_fpunary(as, ir, MIPSI_NEG_D);
1216 } else { 1813 } else
1814#elif LJ_64 /* && LJ_SOFTFP */
1815 if (irt_isnum(ir->t)) {
1816 Reg dest = ra_dest(as, ir, RSET_GPR);
1817 Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1818 emit_dst(as, MIPSI_XOR, dest, left,
1819 ra_allock(as, 0x8000000000000000ll, rset_exclude(RSET_GPR, dest)));
1820 } else
1821#endif
1822 {
1217 Reg dest = ra_dest(as, ir, RSET_GPR); 1823 Reg dest = ra_dest(as, ir, RSET_GPR);
1218 Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); 1824 Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1219 emit_dst(as, MIPSI_SUBU, dest, RID_ZERO, left); 1825 emit_dst(as, (LJ_64 && irt_is64(ir->t)) ? MIPSI_DSUBU : MIPSI_SUBU, dest,
1826 RID_ZERO, left);
1220 } 1827 }
1221} 1828}
1222 1829
1830#if !LJ_SOFTFP
1831#define asm_abs(as, ir) asm_fpunary(as, ir, MIPSI_ABS_D)
1832#elif LJ_64 /* && LJ_SOFTFP */
1833static void asm_abs(ASMState *as, IRIns *ir)
1834{
1835 Reg dest = ra_dest(as, ir, RSET_GPR);
1836 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
1837 emit_tsml(as, MIPSI_DEXTM, dest, left, 30, 0);
1838}
1839#endif
1840
1841#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2)
1842#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
1843
1223static void asm_arithov(ASMState *as, IRIns *ir) 1844static void asm_arithov(ASMState *as, IRIns *ir)
1224{ 1845{
1846 /* TODO MIPSR6: bovc/bnvc. Caveat: no delay slot to load RID_TMP. */
1225 Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR); 1847 Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR);
1848 lua_assert(!irt_is64(ir->t));
1226 if (irref_isk(ir->op2)) { 1849 if (irref_isk(ir->op2)) {
1227 int k = IR(ir->op2)->i; 1850 int k = IR(ir->op2)->i;
1228 if (ir->o == IR_SUBOV) k = -k; 1851 if (ir->o == IR_SUBOV) k = -k;
@@ -1253,16 +1876,29 @@ static void asm_arithov(ASMState *as, IRIns *ir)
1253 emit_move(as, RID_TMP, dest == left ? left : right); 1876 emit_move(as, RID_TMP, dest == left ? left : right);
1254} 1877}
1255 1878
1879#define asm_addov(as, ir) asm_arithov(as, ir)
1880#define asm_subov(as, ir) asm_arithov(as, ir)
1881
1256static void asm_mulov(ASMState *as, IRIns *ir) 1882static void asm_mulov(ASMState *as, IRIns *ir)
1257{ 1883{
1258#if LJ_DUALNUM 1884 Reg dest = ra_dest(as, ir, RSET_GPR);
1259#error "NYI: MULOV" 1885 Reg tmp, right, left = ra_alloc2(as, ir, RSET_GPR);
1886 right = (left >> 8); left &= 255;
1887 tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left),
1888 right), dest));
1889 asm_guard(as, MIPSI_BNE, RID_TMP, tmp);
1890 emit_dta(as, MIPSI_SRA, RID_TMP, dest, 31);
1891#if !LJ_TARGET_MIPSR6
1892 emit_dst(as, MIPSI_MFHI, tmp, 0, 0);
1893 emit_dst(as, MIPSI_MFLO, dest, 0, 0);
1894 emit_dst(as, MIPSI_MULT, 0, left, right);
1260#else 1895#else
1261 UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused in single-number mode. */ 1896 emit_dst(as, MIPSI_MUL, dest, left, right);
1897 emit_dst(as, MIPSI_MUH, tmp, left, right);
1262#endif 1898#endif
1263} 1899}
1264 1900
1265#if LJ_HASFFI 1901#if LJ_32 && LJ_HASFFI
1266static void asm_add64(ASMState *as, IRIns *ir) 1902static void asm_add64(ASMState *as, IRIns *ir)
1267{ 1903{
1268 Reg dest = ra_dest(as, ir, RSET_GPR); 1904 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1346,7 +1982,7 @@ static void asm_neg64(ASMState *as, IRIns *ir)
1346} 1982}
1347#endif 1983#endif
1348 1984
1349static void asm_bitnot(ASMState *as, IRIns *ir) 1985static void asm_bnot(ASMState *as, IRIns *ir)
1350{ 1986{
1351 Reg left, right, dest = ra_dest(as, ir, RSET_GPR); 1987 Reg left, right, dest = ra_dest(as, ir, RSET_GPR);
1352 IRIns *irl = IR(ir->op1); 1988 IRIns *irl = IR(ir->op1);
@@ -1360,11 +1996,12 @@ static void asm_bitnot(ASMState *as, IRIns *ir)
1360 emit_dst(as, MIPSI_NOR, dest, left, right); 1996 emit_dst(as, MIPSI_NOR, dest, left, right);
1361} 1997}
1362 1998
1363static void asm_bitswap(ASMState *as, IRIns *ir) 1999static void asm_bswap(ASMState *as, IRIns *ir)
1364{ 2000{
1365 Reg dest = ra_dest(as, ir, RSET_GPR); 2001 Reg dest = ra_dest(as, ir, RSET_GPR);
1366 Reg left = ra_alloc1(as, ir->op1, RSET_GPR); 2002 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
1367 if ((as->flags & JIT_F_MIPS32R2)) { 2003#if LJ_32
2004 if ((as->flags & JIT_F_MIPSXXR2)) {
1368 emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16); 2005 emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16);
1369 emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left); 2006 emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left);
1370 } else { 2007 } else {
@@ -1379,6 +2016,15 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
1379 emit_dta(as, MIPSI_SRL, tmp, left, 24); 2016 emit_dta(as, MIPSI_SRL, tmp, left, 24);
1380 emit_dta(as, MIPSI_SLL, RID_TMP, left, 24); 2017 emit_dta(as, MIPSI_SLL, RID_TMP, left, 24);
1381 } 2018 }
2019#else
2020 if (irt_is64(ir->t)) {
2021 emit_dst(as, MIPSI_DSHD, dest, 0, RID_TMP);
2022 emit_dst(as, MIPSI_DSBH, RID_TMP, 0, left);
2023 } else {
2024 emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16);
2025 emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left);
2026 }
2027#endif
1382} 2028}
1383 2029
1384static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) 2030static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
@@ -1386,7 +2032,7 @@ static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
1386 Reg dest = ra_dest(as, ir, RSET_GPR); 2032 Reg dest = ra_dest(as, ir, RSET_GPR);
1387 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); 2033 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1388 if (irref_isk(ir->op2)) { 2034 if (irref_isk(ir->op2)) {
1389 int32_t k = IR(ir->op2)->i; 2035 intptr_t k = get_kval(IR(ir->op2));
1390 if (checku16(k)) { 2036 if (checku16(k)) {
1391 emit_tsi(as, mik, dest, left, k); 2037 emit_tsi(as, mik, dest, left, k);
1392 return; 2038 return;
@@ -1396,22 +2042,34 @@ static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
1396 emit_dst(as, mi, dest, left, right); 2042 emit_dst(as, mi, dest, left, right);
1397} 2043}
1398 2044
2045#define asm_band(as, ir) asm_bitop(as, ir, MIPSI_AND, MIPSI_ANDI)
2046#define asm_bor(as, ir) asm_bitop(as, ir, MIPSI_OR, MIPSI_ORI)
2047#define asm_bxor(as, ir) asm_bitop(as, ir, MIPSI_XOR, MIPSI_XORI)
2048
1399static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) 2049static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
1400{ 2050{
1401 Reg dest = ra_dest(as, ir, RSET_GPR); 2051 Reg dest = ra_dest(as, ir, RSET_GPR);
1402 if (irref_isk(ir->op2)) { /* Constant shifts. */ 2052 if (irref_isk(ir->op2)) { /* Constant shifts. */
1403 uint32_t shift = (uint32_t)(IR(ir->op2)->i & 31); 2053 uint32_t shift = (uint32_t)IR(ir->op2)->i;
1404 emit_dta(as, mik, dest, ra_hintalloc(as, ir->op1, dest, RSET_GPR), shift); 2054 if (LJ_64 && irt_is64(ir->t)) mik |= (shift & 32) ? MIPSI_D32 : MIPSI_D;
2055 emit_dta(as, mik, dest, ra_hintalloc(as, ir->op1, dest, RSET_GPR),
2056 (shift & 31));
1405 } else { 2057 } else {
1406 Reg right, left = ra_alloc2(as, ir, RSET_GPR); 2058 Reg right, left = ra_alloc2(as, ir, RSET_GPR);
1407 right = (left >> 8); left &= 255; 2059 right = (left >> 8); left &= 255;
2060 if (LJ_64 && irt_is64(ir->t)) mi |= MIPSI_DV;
1408 emit_dst(as, mi, dest, right, left); /* Shift amount is in rs. */ 2061 emit_dst(as, mi, dest, right, left); /* Shift amount is in rs. */
1409 } 2062 }
1410} 2063}
1411 2064
1412static void asm_bitror(ASMState *as, IRIns *ir) 2065#define asm_bshl(as, ir) asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL)
2066#define asm_bshr(as, ir) asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL)
2067#define asm_bsar(as, ir) asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA)
2068#define asm_brol(as, ir) lua_assert(0)
2069
2070static void asm_bror(ASMState *as, IRIns *ir)
1413{ 2071{
1414 if ((as->flags & JIT_F_MIPS32R2)) { 2072 if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) {
1415 asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR); 2073 asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR);
1416 } else { 2074 } else {
1417 Reg dest = ra_dest(as, ir, RSET_GPR); 2075 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1430,12 +2088,38 @@ static void asm_bitror(ASMState *as, IRIns *ir)
1430 } 2088 }
1431} 2089}
1432 2090
2091#if LJ_SOFTFP
2092static void asm_sfpmin_max(ASMState *as, IRIns *ir)
2093{
2094 CCallInfo ci = lj_ir_callinfo[(IROp)ir->o == IR_MIN ? IRCALL_lj_vm_sfmin : IRCALL_lj_vm_sfmax];
2095#if LJ_64
2096 IRRef args[2];
2097 args[0] = ir->op1;
2098 args[1] = ir->op2;
2099#else
2100 IRRef args[4];
2101 args[0^LJ_BE] = ir->op1;
2102 args[1^LJ_BE] = (ir+1)->op1;
2103 args[2^LJ_BE] = ir->op2;
2104 args[3^LJ_BE] = (ir+1)->op2;
2105#endif
2106 asm_setupresult(as, ir, &ci);
2107 emit_call(as, (void *)ci.func, 0);
2108 ci.func = NULL;
2109 asm_gencall(as, &ci, args);
2110}
2111#endif
2112
1433static void asm_min_max(ASMState *as, IRIns *ir, int ismax) 2113static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
1434{ 2114{
1435 if (irt_isnum(ir->t)) { 2115 if (!LJ_SOFTFP32 && irt_isnum(ir->t)) {
2116#if LJ_SOFTFP
2117 asm_sfpmin_max(as, ir);
2118#else
1436 Reg dest = ra_dest(as, ir, RSET_FPR); 2119 Reg dest = ra_dest(as, ir, RSET_FPR);
1437 Reg right, left = ra_alloc2(as, ir, RSET_FPR); 2120 Reg right, left = ra_alloc2(as, ir, RSET_FPR);
1438 right = (left >> 8); left &= 255; 2121 right = (left >> 8); left &= 255;
2122#if !LJ_TARGET_MIPSR6
1439 if (dest == left) { 2123 if (dest == left) {
1440 emit_fg(as, MIPSI_MOVT_D, dest, right); 2124 emit_fg(as, MIPSI_MOVT_D, dest, right);
1441 } else { 2125 } else {
@@ -1443,42 +2127,143 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
1443 if (dest != right) emit_fg(as, MIPSI_MOV_D, dest, right); 2127 if (dest != right) emit_fg(as, MIPSI_MOV_D, dest, right);
1444 } 2128 }
1445 emit_fgh(as, MIPSI_C_OLT_D, 0, ismax ? left : right, ismax ? right : left); 2129 emit_fgh(as, MIPSI_C_OLT_D, 0, ismax ? left : right, ismax ? right : left);
2130#else
2131 emit_fgh(as, ismax ? MIPSI_MAX_D : MIPSI_MIN_D, dest, left, right);
2132#endif
2133#endif
1446 } else { 2134 } else {
1447 Reg dest = ra_dest(as, ir, RSET_GPR); 2135 Reg dest = ra_dest(as, ir, RSET_GPR);
1448 Reg right, left = ra_alloc2(as, ir, RSET_GPR); 2136 Reg right, left = ra_alloc2(as, ir, RSET_GPR);
1449 right = (left >> 8); left &= 255; 2137 right = (left >> 8); left &= 255;
1450 if (dest == left) { 2138 if (left == right) {
1451 emit_dst(as, MIPSI_MOVN, dest, right, RID_TMP); 2139 if (dest != left) emit_move(as, dest, left);
1452 } else { 2140 } else {
1453 emit_dst(as, MIPSI_MOVZ, dest, left, RID_TMP); 2141#if !LJ_TARGET_MIPSR6
1454 if (dest != right) emit_move(as, dest, right); 2142 if (dest == left) {
2143 emit_dst(as, MIPSI_MOVN, dest, right, RID_TMP);
2144 } else {
2145 emit_dst(as, MIPSI_MOVZ, dest, left, RID_TMP);
2146 if (dest != right) emit_move(as, dest, right);
2147 }
2148#else
2149 emit_dst(as, MIPSI_OR, dest, dest, RID_TMP);
2150 if (dest != right) {
2151 emit_dst(as, MIPSI_SELNEZ, RID_TMP, right, RID_TMP);
2152 emit_dst(as, MIPSI_SELEQZ, dest, left, RID_TMP);
2153 } else {
2154 emit_dst(as, MIPSI_SELEQZ, RID_TMP, left, RID_TMP);
2155 emit_dst(as, MIPSI_SELNEZ, dest, right, RID_TMP);
2156 }
2157#endif
2158 emit_dst(as, MIPSI_SLT, RID_TMP,
2159 ismax ? left : right, ismax ? right : left);
1455 } 2160 }
1456 emit_dst(as, MIPSI_SLT, RID_TMP,
1457 ismax ? left : right, ismax ? right : left);
1458 } 2161 }
1459} 2162}
1460 2163
2164#define asm_min(as, ir) asm_min_max(as, ir, 0)
2165#define asm_max(as, ir) asm_min_max(as, ir, 1)
2166
1461/* -- Comparisons --------------------------------------------------------- */ 2167/* -- Comparisons --------------------------------------------------------- */
1462 2168
2169#if LJ_SOFTFP
2170/* SFP comparisons. */
2171static void asm_sfpcomp(ASMState *as, IRIns *ir)
2172{
2173 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp];
2174 RegSet drop = RSET_SCRATCH;
2175 Reg r;
2176#if LJ_64
2177 IRRef args[2];
2178 args[0] = ir->op1;
2179 args[1] = ir->op2;
2180#else
2181 IRRef args[4];
2182 args[LJ_LE ? 0 : 1] = ir->op1; args[LJ_LE ? 1 : 0] = (ir+1)->op1;
2183 args[LJ_LE ? 2 : 3] = ir->op2; args[LJ_LE ? 3 : 2] = (ir+1)->op2;
2184#endif
2185
2186 for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+(LJ_64?1:3); r++) {
2187 if (!rset_test(as->freeset, r) &&
2188 regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR])
2189 rset_clear(drop, r);
2190 }
2191 ra_evictset(as, drop);
2192
2193 asm_setupresult(as, ir, ci);
2194
2195 switch ((IROp)ir->o) {
2196 case IR_LT:
2197 asm_guard(as, MIPSI_BGEZ, RID_RET, 0);
2198 break;
2199 case IR_ULT:
2200 asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP);
2201 emit_loadi(as, RID_TMP, 1);
2202 asm_guard(as, MIPSI_BEQ, RID_RET, RID_ZERO);
2203 break;
2204 case IR_GE:
2205 asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP);
2206 emit_loadi(as, RID_TMP, 2);
2207 asm_guard(as, MIPSI_BLTZ, RID_RET, 0);
2208 break;
2209 case IR_LE:
2210 asm_guard(as, MIPSI_BGTZ, RID_RET, 0);
2211 break;
2212 case IR_GT:
2213 asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP);
2214 emit_loadi(as, RID_TMP, 2);
2215 asm_guard(as, MIPSI_BLEZ, RID_RET, 0);
2216 break;
2217 case IR_UGE:
2218 asm_guard(as, MIPSI_BLTZ, RID_RET, 0);
2219 break;
2220 case IR_ULE:
2221 asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP);
2222 emit_loadi(as, RID_TMP, 1);
2223 break;
2224 case IR_UGT: case IR_ABC:
2225 asm_guard(as, MIPSI_BLEZ, RID_RET, 0);
2226 break;
2227 case IR_EQ: case IR_NE:
2228 asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, RID_RET, RID_ZERO);
2229 default:
2230 break;
2231 }
2232 asm_gencall(as, ci, args);
2233}
2234#endif
2235
1463static void asm_comp(ASMState *as, IRIns *ir) 2236static void asm_comp(ASMState *as, IRIns *ir)
1464{ 2237{
1465 /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */ 2238 /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */
1466 IROp op = ir->o; 2239 IROp op = ir->o;
1467 if (irt_isnum(ir->t)) { 2240 if (!LJ_SOFTFP32 && irt_isnum(ir->t)) {
2241#if LJ_SOFTFP
2242 asm_sfpcomp(as, ir);
2243#else
2244#if !LJ_TARGET_MIPSR6
1468 Reg right, left = ra_alloc2(as, ir, RSET_FPR); 2245 Reg right, left = ra_alloc2(as, ir, RSET_FPR);
1469 right = (left >> 8); left &= 255; 2246 right = (left >> 8); left &= 255;
1470 asm_guard(as, (op&1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); 2247 asm_guard(as, (op&1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0);
1471 emit_fgh(as, MIPSI_C_OLT_D + ((op&3) ^ ((op>>2)&1)), 0, left, right); 2248 emit_fgh(as, MIPSI_C_OLT_D + ((op&3) ^ ((op>>2)&1)), 0, left, right);
2249#else
2250 Reg tmp, right, left = ra_alloc2(as, ir, RSET_FPR);
2251 right = (left >> 8); left &= 255;
2252 tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_FPR, left), right));
2253 asm_guard(as, (op&1) ? MIPSI_BC1NEZ : MIPSI_BC1EQZ, 0, (tmp&31));
2254 emit_fgh(as, MIPSI_CMP_LT_D + ((op&3) ^ ((op>>2)&1)), tmp, left, right);
2255#endif
2256#endif
1472 } else { 2257 } else {
1473 Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); 2258 Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR);
1474 if (op == IR_ABC) op = IR_UGT; 2259 if (op == IR_ABC) op = IR_UGT;
1475 if ((op&4) == 0 && irref_isk(ir->op2) && IR(ir->op2)->i == 0) { 2260 if ((op&4) == 0 && irref_isk(ir->op2) && get_kval(IR(ir->op2)) == 0) {
1476 MIPSIns mi = (op&2) ? ((op&1) ? MIPSI_BLEZ : MIPSI_BGTZ) : 2261 MIPSIns mi = (op&2) ? ((op&1) ? MIPSI_BLEZ : MIPSI_BGTZ) :
1477 ((op&1) ? MIPSI_BLTZ : MIPSI_BGEZ); 2262 ((op&1) ? MIPSI_BLTZ : MIPSI_BGEZ);
1478 asm_guard(as, mi, left, 0); 2263 asm_guard(as, mi, left, 0);
1479 } else { 2264 } else {
1480 if (irref_isk(ir->op2)) { 2265 if (irref_isk(ir->op2)) {
1481 int32_t k = IR(ir->op2)->i; 2266 intptr_t k = get_kval(IR(ir->op2));
1482 if ((op&2)) k++; 2267 if ((op&2)) k++;
1483 if (checki16(k)) { 2268 if (checki16(k)) {
1484 asm_guard(as, (op&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO); 2269 asm_guard(as, (op&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO);
@@ -1495,19 +2280,28 @@ static void asm_comp(ASMState *as, IRIns *ir)
1495 } 2280 }
1496} 2281}
1497 2282
1498static void asm_compeq(ASMState *as, IRIns *ir) 2283static void asm_equal(ASMState *as, IRIns *ir)
1499{ 2284{
1500 Reg right, left = ra_alloc2(as, ir, irt_isnum(ir->t) ? RSET_FPR : RSET_GPR); 2285 Reg right, left = ra_alloc2(as, ir, (!LJ_SOFTFP && irt_isnum(ir->t)) ?
2286 RSET_FPR : RSET_GPR);
1501 right = (left >> 8); left &= 255; 2287 right = (left >> 8); left &= 255;
1502 if (irt_isnum(ir->t)) { 2288 if (!LJ_SOFTFP32 && irt_isnum(ir->t)) {
2289#if LJ_SOFTFP
2290 asm_sfpcomp(as, ir);
2291#elif !LJ_TARGET_MIPSR6
1503 asm_guard(as, (ir->o & 1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); 2292 asm_guard(as, (ir->o & 1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0);
1504 emit_fgh(as, MIPSI_C_EQ_D, 0, left, right); 2293 emit_fgh(as, MIPSI_C_EQ_D, 0, left, right);
2294#else
2295 Reg tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_FPR, left), right));
2296 asm_guard(as, (ir->o & 1) ? MIPSI_BC1NEZ : MIPSI_BC1EQZ, 0, (tmp&31));
2297 emit_fgh(as, MIPSI_CMP_EQ_D, tmp, left, right);
2298#endif
1505 } else { 2299 } else {
1506 asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, left, right); 2300 asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, left, right);
1507 } 2301 }
1508} 2302}
1509 2303
1510#if LJ_HASFFI 2304#if LJ_32 && LJ_HASFFI
1511/* 64 bit integer comparisons. */ 2305/* 64 bit integer comparisons. */
1512static void asm_comp64(ASMState *as, IRIns *ir) 2306static void asm_comp64(ASMState *as, IRIns *ir)
1513{ 2307{
@@ -1549,41 +2343,79 @@ static void asm_comp64eq(ASMState *as, IRIns *ir)
1549/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ 2343/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
1550static void asm_hiop(ASMState *as, IRIns *ir) 2344static void asm_hiop(ASMState *as, IRIns *ir)
1551{ 2345{
1552#if LJ_HASFFI 2346#if LJ_32 && (LJ_HASFFI || LJ_SOFTFP)
1553 /* HIOP is marked as a store because it needs its own DCE logic. */ 2347 /* HIOP is marked as a store because it needs its own DCE logic. */
1554 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ 2348 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
1555 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; 2349 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
1556 if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ 2350 if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */
1557 as->curins--; /* Always skip the CONV. */ 2351 as->curins--; /* Always skip the CONV. */
2352#if LJ_HASFFI && !LJ_SOFTFP
1558 if (usehi || uselo) 2353 if (usehi || uselo)
1559 asm_conv64(as, ir); 2354 asm_conv64(as, ir);
1560 return; 2355 return;
2356#endif
1561 } else if ((ir-1)->o < IR_EQ) { /* 64 bit integer comparisons. ORDER IR. */ 2357 } else if ((ir-1)->o < IR_EQ) { /* 64 bit integer comparisons. ORDER IR. */
1562 as->curins--; /* Always skip the loword comparison. */ 2358 as->curins--; /* Always skip the loword comparison. */
2359#if LJ_SOFTFP
2360 if (!irt_isint(ir->t)) {
2361 asm_sfpcomp(as, ir-1);
2362 return;
2363 }
2364#endif
2365#if LJ_HASFFI
1563 asm_comp64(as, ir); 2366 asm_comp64(as, ir);
2367#endif
1564 return; 2368 return;
1565 } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ 2369 } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */
1566 as->curins--; /* Always skip the loword comparison. */ 2370 as->curins--; /* Always skip the loword comparison. */
2371#if LJ_SOFTFP
2372 if (!irt_isint(ir->t)) {
2373 asm_sfpcomp(as, ir-1);
2374 return;
2375 }
2376#endif
2377#if LJ_HASFFI
1567 asm_comp64eq(as, ir); 2378 asm_comp64eq(as, ir);
2379#endif
1568 return; 2380 return;
2381#if LJ_SOFTFP
2382 } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) {
2383 as->curins--; /* Always skip the loword min/max. */
2384 if (uselo || usehi)
2385 asm_sfpmin_max(as, ir-1);
2386 return;
2387#endif
1569 } else if ((ir-1)->o == IR_XSTORE) { 2388 } else if ((ir-1)->o == IR_XSTORE) {
1570 as->curins--; /* Handle both stores here. */ 2389 as->curins--; /* Handle both stores here. */
1571 if ((ir-1)->r != RID_SINK) { 2390 if ((ir-1)->r != RID_SINK) {
1572 asm_xstore(as, ir, LJ_LE ? 4 : 0); 2391 asm_xstore_(as, ir, LJ_LE ? 4 : 0);
1573 asm_xstore(as, ir-1, LJ_LE ? 0 : 4); 2392 asm_xstore_(as, ir-1, LJ_LE ? 0 : 4);
1574 } 2393 }
1575 return; 2394 return;
1576 } 2395 }
1577 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ 2396 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
1578 switch ((ir-1)->o) { 2397 switch ((ir-1)->o) {
2398#if LJ_HASFFI
1579 case IR_ADD: as->curins--; asm_add64(as, ir); break; 2399 case IR_ADD: as->curins--; asm_add64(as, ir); break;
1580 case IR_SUB: as->curins--; asm_sub64(as, ir); break; 2400 case IR_SUB: as->curins--; asm_sub64(as, ir); break;
1581 case IR_NEG: as->curins--; asm_neg64(as, ir); break; 2401 case IR_NEG: as->curins--; asm_neg64(as, ir); break;
2402#endif
2403#if LJ_SOFTFP
2404 case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2405 case IR_STRTO:
2406 if (!uselo)
2407 ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */
2408 break;
2409#endif
1582 case IR_CALLN: 2410 case IR_CALLN:
2411 case IR_CALLS:
1583 case IR_CALLXS: 2412 case IR_CALLXS:
1584 if (!uselo) 2413 if (!uselo)
1585 ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ 2414 ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
1586 break; 2415 break;
2416#if LJ_SOFTFP
2417 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR:
2418#endif
1587 case IR_CNEWI: 2419 case IR_CNEWI:
1588 /* Nothing to do here. Handled by lo op itself. */ 2420 /* Nothing to do here. Handled by lo op itself. */
1589 break; 2421 break;
@@ -1594,6 +2426,17 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1594#endif 2426#endif
1595} 2427}
1596 2428
2429/* -- Profiling ----------------------------------------------------------- */
2430
2431static void asm_prof(ASMState *as, IRIns *ir)
2432{
2433 UNUSED(ir);
2434 asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO);
2435 emit_tsi(as, MIPSI_ANDI, RID_TMP, RID_TMP, HOOK_PROFILE);
2436 emit_lsglptr(as, MIPSI_LBU, RID_TMP,
2437 (int32_t)offsetof(global_State, hookmask));
2438}
2439
1597/* -- Stack handling ------------------------------------------------------ */ 2440/* -- Stack handling ------------------------------------------------------ */
1598 2441
1599/* Check Lua stack size for overflow. Use exit handler as fallback. */ 2442/* Check Lua stack size for overflow. Use exit handler as fallback. */
@@ -1604,46 +2447,67 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
1604 Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE; 2447 Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE;
1605 ExitNo oldsnap = as->snapno; 2448 ExitNo oldsnap = as->snapno;
1606 rset_clear(allow, pbase); 2449 rset_clear(allow, pbase);
2450#if LJ_32
1607 tmp = allow ? rset_pickbot(allow) : 2451 tmp = allow ? rset_pickbot(allow) :
1608 (pbase == RID_RETHI ? RID_RETLO : RID_RETHI); 2452 (pbase == RID_RETHI ? RID_RETLO : RID_RETHI);
2453#else
2454 tmp = allow ? rset_pickbot(allow) : RID_RET;
2455#endif
1609 as->snapno = exitno; 2456 as->snapno = exitno;
1610 asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO); 2457 asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO);
1611 as->snapno = oldsnap; 2458 as->snapno = oldsnap;
1612 if (allow == RSET_EMPTY) /* Restore temp. register. */ 2459 if (allow == RSET_EMPTY) /* Restore temp. register. */
1613 emit_tsi(as, MIPSI_LW, tmp, RID_SP, 0); 2460 emit_tsi(as, MIPSI_AL, tmp, RID_SP, 0);
1614 else 2461 else
1615 ra_modified(as, tmp); 2462 ra_modified(as, tmp);
1616 emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)(8*topslot)); 2463 emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)(8*topslot));
1617 emit_dst(as, MIPSI_SUBU, RID_TMP, tmp, pbase); 2464 emit_dst(as, MIPSI_ASUBU, RID_TMP, tmp, pbase);
1618 emit_tsi(as, MIPSI_LW, tmp, tmp, offsetof(lua_State, maxstack)); 2465 emit_tsi(as, MIPSI_AL, tmp, tmp, offsetof(lua_State, maxstack));
1619 if (pbase == RID_TMP) 2466 if (pbase == RID_TMP)
1620 emit_getgl(as, RID_TMP, jit_base); 2467 emit_getgl(as, RID_TMP, jit_base);
1621 emit_getgl(as, tmp, jit_L); 2468 emit_getgl(as, tmp, cur_L);
1622 if (allow == RSET_EMPTY) /* Spill temp. register. */ 2469 if (allow == RSET_EMPTY) /* Spill temp. register. */
1623 emit_tsi(as, MIPSI_SW, tmp, RID_SP, 0); 2470 emit_tsi(as, MIPSI_AS, tmp, RID_SP, 0);
1624} 2471}
1625 2472
1626/* Restore Lua stack from on-trace state. */ 2473/* Restore Lua stack from on-trace state. */
1627static void asm_stack_restore(ASMState *as, SnapShot *snap) 2474static void asm_stack_restore(ASMState *as, SnapShot *snap)
1628{ 2475{
1629 SnapEntry *map = &as->T->snapmap[snap->mapofs]; 2476 SnapEntry *map = &as->T->snapmap[snap->mapofs];
1630 SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1]; 2477#if LJ_32 || defined(LUA_USE_ASSERT)
2478 SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2];
2479#endif
1631 MSize n, nent = snap->nent; 2480 MSize n, nent = snap->nent;
1632 /* Store the value of all modified slots to the Lua stack. */ 2481 /* Store the value of all modified slots to the Lua stack. */
1633 for (n = 0; n < nent; n++) { 2482 for (n = 0; n < nent; n++) {
1634 SnapEntry sn = map[n]; 2483 SnapEntry sn = map[n];
1635 BCReg s = snap_slot(sn); 2484 BCReg s = snap_slot(sn);
1636 int32_t ofs = 8*((int32_t)s-1); 2485 int32_t ofs = 8*((int32_t)s-1-LJ_FR2);
1637 IRRef ref = snap_ref(sn); 2486 IRRef ref = snap_ref(sn);
1638 IRIns *ir = IR(ref); 2487 IRIns *ir = IR(ref);
1639 if ((sn & SNAP_NORESTORE)) 2488 if ((sn & SNAP_NORESTORE))
1640 continue; 2489 continue;
1641 if (irt_isnum(ir->t)) { 2490 if (irt_isnum(ir->t)) {
2491#if LJ_SOFTFP32
2492 Reg tmp;
2493 RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
2494 lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */
2495 tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow);
2496 emit_tsi(as, MIPSI_SW, tmp, RID_BASE, ofs+(LJ_BE?4:0));
2497 if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1);
2498 tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow);
2499 emit_tsi(as, MIPSI_SW, tmp, RID_BASE, ofs+(LJ_BE?0:4));
2500#elif LJ_SOFTFP /* && LJ_64 */
2501 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
2502 emit_tsi(as, MIPSI_SD, src, RID_BASE, ofs);
2503#else
1642 Reg src = ra_alloc1(as, ref, RSET_FPR); 2504 Reg src = ra_alloc1(as, ref, RSET_FPR);
1643 emit_hsi(as, MIPSI_SDC1, src, RID_BASE, ofs); 2505 emit_hsi(as, MIPSI_SDC1, src, RID_BASE, ofs);
2506#endif
1644 } else { 2507 } else {
1645 Reg type; 2508#if LJ_32
1646 RegSet allow = rset_exclude(RSET_GPR, RID_BASE); 2509 RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
2510 Reg type;
1647 lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); 2511 lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t));
1648 if (!irt_ispri(ir->t)) { 2512 if (!irt_ispri(ir->t)) {
1649 Reg src = ra_alloc1(as, ref, allow); 2513 Reg src = ra_alloc1(as, ref, allow);
@@ -1653,10 +2517,17 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
1653 if ((sn & (SNAP_CONT|SNAP_FRAME))) { 2517 if ((sn & (SNAP_CONT|SNAP_FRAME))) {
1654 if (s == 0) continue; /* Do not overwrite link to previous frame. */ 2518 if (s == 0) continue; /* Do not overwrite link to previous frame. */
1655 type = ra_allock(as, (int32_t)(*flinks--), allow); 2519 type = ra_allock(as, (int32_t)(*flinks--), allow);
2520#if LJ_SOFTFP
2521 } else if ((sn & SNAP_SOFTFPNUM)) {
2522 type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE));
2523#endif
1656 } else { 2524 } else {
1657 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); 2525 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
1658 } 2526 }
1659 emit_tsi(as, MIPSI_SW, type, RID_BASE, ofs+(LJ_BE?0:4)); 2527 emit_tsi(as, MIPSI_SW, type, RID_BASE, ofs+(LJ_BE?0:4));
2528#else
2529 asm_tvstore64(as, RID_BASE, ofs, ref);
2530#endif
1660 } 2531 }
1661 checkmclim(as); 2532 checkmclim(as);
1662 } 2533 }
@@ -1680,7 +2551,7 @@ static void asm_gc_check(ASMState *as)
1680 args[0] = ASMREF_TMP1; /* global_State *g */ 2551 args[0] = ASMREF_TMP1; /* global_State *g */
1681 args[1] = ASMREF_TMP2; /* MSize steps */ 2552 args[1] = ASMREF_TMP2; /* MSize steps */
1682 asm_gencall(as, ci, args); 2553 asm_gencall(as, ci, args);
1683 emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); 2554 emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768);
1684 tmp = ra_releasetmp(as, ASMREF_TMP2); 2555 tmp = ra_releasetmp(as, ASMREF_TMP2);
1685 emit_loadi(as, tmp, as->gcsteps); 2556 emit_loadi(as, tmp, as->gcsteps);
1686 /* Jump around GC step if GC total < GC threshold. */ 2557 /* Jump around GC step if GC total < GC threshold. */
@@ -1755,7 +2626,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
1755 MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp; 2626 MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp;
1756 int32_t spadj = as->T->spadjust; 2627 int32_t spadj = as->T->spadjust;
1757 MCode *p = as->mctop-1; 2628 MCode *p = as->mctop-1;
1758 *p = spadj ? (MIPSI_ADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP; 2629 *p = spadj ? (MIPSI_AADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP;
1759 p[-1] = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu); 2630 p[-1] = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu);
1760} 2631}
1761 2632
@@ -1766,139 +2637,26 @@ static void asm_tail_prep(ASMState *as)
1766 as->invmcp = as->loopref ? as->mcp : NULL; 2637 as->invmcp = as->loopref ? as->mcp : NULL;
1767} 2638}
1768 2639
1769/* -- Instruction dispatch ------------------------------------------------ */
1770
1771/* Assemble a single instruction. */
1772static void asm_ir(ASMState *as, IRIns *ir)
1773{
1774 switch ((IROp)ir->o) {
1775 /* Miscellaneous ops. */
1776 case IR_LOOP: asm_loop(as); break;
1777 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
1778 case IR_USE:
1779 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
1780 case IR_PHI: asm_phi(as, ir); break;
1781 case IR_HIOP: asm_hiop(as, ir); break;
1782 case IR_GCSTEP: asm_gcstep(as, ir); break;
1783
1784 /* Guarded assertions. */
1785 case IR_EQ: case IR_NE: asm_compeq(as, ir); break;
1786 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
1787 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
1788 case IR_ABC:
1789 asm_comp(as, ir);
1790 break;
1791
1792 case IR_RETF: asm_retf(as, ir); break;
1793
1794 /* Bit ops. */
1795 case IR_BNOT: asm_bitnot(as, ir); break;
1796 case IR_BSWAP: asm_bitswap(as, ir); break;
1797
1798 case IR_BAND: asm_bitop(as, ir, MIPSI_AND, MIPSI_ANDI); break;
1799 case IR_BOR: asm_bitop(as, ir, MIPSI_OR, MIPSI_ORI); break;
1800 case IR_BXOR: asm_bitop(as, ir, MIPSI_XOR, MIPSI_XORI); break;
1801
1802 case IR_BSHL: asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL); break;
1803 case IR_BSHR: asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL); break;
1804 case IR_BSAR: asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA); break;
1805 case IR_BROL: lua_assert(0); break;
1806 case IR_BROR: asm_bitror(as, ir); break;
1807
1808 /* Arithmetic ops. */
1809 case IR_ADD: asm_add(as, ir); break;
1810 case IR_SUB: asm_sub(as, ir); break;
1811 case IR_MUL: asm_mul(as, ir); break;
1812 case IR_DIV: asm_fparith(as, ir, MIPSI_DIV_D); break;
1813 case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
1814 case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
1815 case IR_NEG: asm_neg(as, ir); break;
1816
1817 case IR_ABS: asm_fpunary(as, ir, MIPSI_ABS_D); break;
1818 case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
1819 case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
1820 case IR_MIN: asm_min_max(as, ir, 0); break;
1821 case IR_MAX: asm_min_max(as, ir, 1); break;
1822 case IR_FPMATH:
1823 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
1824 break;
1825 if (ir->op2 <= IRFPM_TRUNC)
1826 asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2);
1827 else if (ir->op2 == IRFPM_SQRT)
1828 asm_fpunary(as, ir, MIPSI_SQRT_D);
1829 else
1830 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
1831 break;
1832
1833 /* Overflow-checking arithmetic ops. */
1834 case IR_ADDOV: asm_arithov(as, ir); break;
1835 case IR_SUBOV: asm_arithov(as, ir); break;
1836 case IR_MULOV: asm_mulov(as, ir); break;
1837
1838 /* Memory references. */
1839 case IR_AREF: asm_aref(as, ir); break;
1840 case IR_HREF: asm_href(as, ir); break;
1841 case IR_HREFK: asm_hrefk(as, ir); break;
1842 case IR_NEWREF: asm_newref(as, ir); break;
1843 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
1844 case IR_FREF: asm_fref(as, ir); break;
1845 case IR_STRREF: asm_strref(as, ir); break;
1846
1847 /* Loads and stores. */
1848 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
1849 asm_ahuvload(as, ir);
1850 break;
1851 case IR_FLOAD: asm_fload(as, ir); break;
1852 case IR_XLOAD: asm_xload(as, ir); break;
1853 case IR_SLOAD: asm_sload(as, ir); break;
1854
1855 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
1856 case IR_FSTORE: asm_fstore(as, ir); break;
1857 case IR_XSTORE: asm_xstore(as, ir, 0); break;
1858
1859 /* Allocations. */
1860 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
1861 case IR_TNEW: asm_tnew(as, ir); break;
1862 case IR_TDUP: asm_tdup(as, ir); break;
1863 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
1864
1865 /* Write barriers. */
1866 case IR_TBAR: asm_tbar(as, ir); break;
1867 case IR_OBAR: asm_obar(as, ir); break;
1868
1869 /* Type conversions. */
1870 case IR_CONV: asm_conv(as, ir); break;
1871 case IR_TOBIT: asm_tobit(as, ir); break;
1872 case IR_TOSTR: asm_tostr(as, ir); break;
1873 case IR_STRTO: asm_strto(as, ir); break;
1874
1875 /* Calls. */
1876 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
1877 case IR_CALLXS: asm_callx(as, ir); break;
1878 case IR_CARG: break;
1879
1880 default:
1881 setintV(&as->J->errinfo, ir->o);
1882 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
1883 break;
1884 }
1885}
1886
1887/* -- Trace setup --------------------------------------------------------- */ 2640/* -- Trace setup --------------------------------------------------------- */
1888 2641
1889/* Ensure there are enough stack slots for call arguments. */ 2642/* Ensure there are enough stack slots for call arguments. */
1890static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) 2643static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
1891{ 2644{
1892 IRRef args[CCI_NARGS_MAX*2]; 2645 IRRef args[CCI_NARGS_MAX*2];
1893 uint32_t i, nargs = (int)CCI_NARGS(ci); 2646 uint32_t i, nargs = CCI_XNARGS(ci);
2647#if LJ_32
1894 int nslots = 4, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; 2648 int nslots = 4, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
2649#else
2650 int nslots = 0, ngpr = REGARG_NUMGPR;
2651#endif
1895 asm_collectargs(as, ir, ci, args); 2652 asm_collectargs(as, ir, ci, args);
1896 for (i = 0; i < nargs; i++) { 2653 for (i = 0; i < nargs; i++) {
1897 if (args[i] && irt_isfp(IR(args[i])->t) && 2654#if LJ_32
2655 if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t) &&
1898 nfpr > 0 && !(ci->flags & CCI_VARARG)) { 2656 nfpr > 0 && !(ci->flags & CCI_VARARG)) {
1899 nfpr--; 2657 nfpr--;
1900 ngpr -= irt_isnum(IR(args[i])->t) ? 2 : 1; 2658 ngpr -= irt_isnum(IR(args[i])->t) ? 2 : 1;
1901 } else if (args[i] && irt_isnum(IR(args[i])->t)) { 2659 } else if (!LJ_SOFTFP && args[i] && irt_isnum(IR(args[i])->t)) {
1902 nfpr = 0; 2660 nfpr = 0;
1903 ngpr = ngpr & ~1; 2661 ngpr = ngpr & ~1;
1904 if (ngpr > 0) ngpr -= 2; else nslots = (nslots+3) & ~1; 2662 if (ngpr > 0) ngpr -= 2; else nslots = (nslots+3) & ~1;
@@ -1906,6 +2664,9 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
1906 nfpr = 0; 2664 nfpr = 0;
1907 if (ngpr > 0) ngpr--; else nslots++; 2665 if (ngpr > 0) ngpr--; else nslots++;
1908 } 2666 }
2667#else
2668 if (ngpr > 0) ngpr--; else nslots += 2;
2669#endif
1909 } 2670 }
1910 if (nslots > as->evenspill) /* Leave room for args in stack slots. */ 2671 if (nslots > as->evenspill) /* Leave room for args in stack slots. */
1911 as->evenspill = nslots; 2672 as->evenspill = nslots;
@@ -1936,7 +2697,12 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
1936 if (((p[-1] ^ (px-p)) & 0xffffu) == 0 && 2697 if (((p[-1] ^ (px-p)) & 0xffffu) == 0 &&
1937 ((p[-1] & 0xf0000000u) == MIPSI_BEQ || 2698 ((p[-1] & 0xf0000000u) == MIPSI_BEQ ||
1938 (p[-1] & 0xfc1e0000u) == MIPSI_BLTZ || 2699 (p[-1] & 0xfc1e0000u) == MIPSI_BLTZ ||
1939 (p[-1] & 0xffe00000u) == MIPSI_BC1F)) { 2700#if !LJ_TARGET_MIPSR6
2701 (p[-1] & 0xffe00000u) == MIPSI_BC1F
2702#else
2703 (p[-1] & 0xff600000u) == MIPSI_BC1EQZ
2704#endif
2705 )) {
1940 ptrdiff_t delta = target - p; 2706 ptrdiff_t delta = target - p;
1941 if (((delta + 0x8000) >> 16) == 0) { /* Patch in-range branch. */ 2707 if (((delta + 0x8000) >> 16) == 0) { /* Patch in-range branch. */
1942 patchbranch: 2708 patchbranch:
diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h
index dc092db2..8fa8c8ef 100644
--- a/src/lj_asm_ppc.h
+++ b/src/lj_asm_ppc.h
@@ -226,6 +226,7 @@ static void asm_fusexrefx(ASMState *as, PPCIns pi, Reg rt, IRRef ref,
226 emit_tab(as, pi, rt, left, right); 226 emit_tab(as, pi, rt, left, right);
227} 227}
228 228
229#if !LJ_SOFTFP
229/* Fuse to multiply-add/sub instruction. */ 230/* Fuse to multiply-add/sub instruction. */
230static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) 231static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir)
231{ 232{
@@ -245,21 +246,26 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir)
245 } 246 }
246 return 0; 247 return 0;
247} 248}
249#endif
248 250
249/* -- Calls --------------------------------------------------------------- */ 251/* -- Calls --------------------------------------------------------------- */
250 252
251/* Generate a call to a C function. */ 253/* Generate a call to a C function. */
252static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) 254static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
253{ 255{
254 uint32_t n, nargs = CCI_NARGS(ci); 256 uint32_t n, nargs = CCI_XNARGS(ci);
255 int32_t ofs = 8; 257 int32_t ofs = 8;
256 Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR; 258 Reg gpr = REGARG_FIRSTGPR;
259#if !LJ_SOFTFP
260 Reg fpr = REGARG_FIRSTFPR;
261#endif
257 if ((void *)ci->func) 262 if ((void *)ci->func)
258 emit_call(as, (void *)ci->func); 263 emit_call(as, (void *)ci->func);
259 for (n = 0; n < nargs; n++) { /* Setup args. */ 264 for (n = 0; n < nargs; n++) { /* Setup args. */
260 IRRef ref = args[n]; 265 IRRef ref = args[n];
261 if (ref) { 266 if (ref) {
262 IRIns *ir = IR(ref); 267 IRIns *ir = IR(ref);
268#if !LJ_SOFTFP
263 if (irt_isfp(ir->t)) { 269 if (irt_isfp(ir->t)) {
264 if (fpr <= REGARG_LASTFPR) { 270 if (fpr <= REGARG_LASTFPR) {
265 lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ 271 lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */
@@ -271,7 +277,9 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
271 emit_spstore(as, ir, r, ofs); 277 emit_spstore(as, ir, r, ofs);
272 ofs += irt_isnum(ir->t) ? 8 : 4; 278 ofs += irt_isnum(ir->t) ? 8 : 4;
273 } 279 }
274 } else { 280 } else
281#endif
282 {
275 if (gpr <= REGARG_LASTGPR) { 283 if (gpr <= REGARG_LASTGPR) {
276 lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ 284 lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */
277 ra_leftov(as, gpr, ref); 285 ra_leftov(as, gpr, ref);
@@ -290,8 +298,10 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
290 } 298 }
291 checkmclim(as); 299 checkmclim(as);
292 } 300 }
301#if !LJ_SOFTFP
293 if ((ci->flags & CCI_VARARG)) /* Vararg calls need to know about FPR use. */ 302 if ((ci->flags & CCI_VARARG)) /* Vararg calls need to know about FPR use. */
294 emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6); 303 emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6);
304#endif
295} 305}
296 306
297/* Setup result reg/sp for call. Evict scratch regs. */ 307/* Setup result reg/sp for call. Evict scratch regs. */
@@ -299,8 +309,10 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
299{ 309{
300 RegSet drop = RSET_SCRATCH; 310 RegSet drop = RSET_SCRATCH;
301 int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); 311 int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
312#if !LJ_SOFTFP
302 if ((ci->flags & CCI_NOFPRCLOBBER)) 313 if ((ci->flags & CCI_NOFPRCLOBBER))
303 drop &= ~RSET_FPR; 314 drop &= ~RSET_FPR;
315#endif
304 if (ra_hasreg(ir->r)) 316 if (ra_hasreg(ir->r))
305 rset_clear(drop, ir->r); /* Dest reg handled below. */ 317 rset_clear(drop, ir->r); /* Dest reg handled below. */
306 if (hiop && ra_hasreg((ir+1)->r)) 318 if (hiop && ra_hasreg((ir+1)->r))
@@ -308,7 +320,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
308 ra_evictset(as, drop); /* Evictions must be performed first. */ 320 ra_evictset(as, drop); /* Evictions must be performed first. */
309 if (ra_used(ir)) { 321 if (ra_used(ir)) {
310 lua_assert(!irt_ispri(ir->t)); 322 lua_assert(!irt_ispri(ir->t));
311 if (irt_isfp(ir->t)) { 323 if (!LJ_SOFTFP && irt_isfp(ir->t)) {
312 if ((ci->flags & CCI_CASTU64)) { 324 if ((ci->flags & CCI_CASTU64)) {
313 /* Use spill slot or temp slots. */ 325 /* Use spill slot or temp slots. */
314 int32_t ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP; 326 int32_t ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP;
@@ -323,23 +335,16 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
323 } else { 335 } else {
324 ra_destreg(as, ir, RID_FPRET); 336 ra_destreg(as, ir, RID_FPRET);
325 } 337 }
338#if LJ_32
326 } else if (hiop) { 339 } else if (hiop) {
327 ra_destpair(as, ir); 340 ra_destpair(as, ir);
341#endif
328 } else { 342 } else {
329 ra_destreg(as, ir, RID_RET); 343 ra_destreg(as, ir, RID_RET);
330 } 344 }
331 } 345 }
332} 346}
333 347
334static void asm_call(ASMState *as, IRIns *ir)
335{
336 IRRef args[CCI_NARGS_MAX];
337 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
338 asm_collectargs(as, ir, ci, args);
339 asm_setupresult(as, ir, ci);
340 asm_gencall(as, ci, args);
341}
342
343static void asm_callx(ASMState *as, IRIns *ir) 348static void asm_callx(ASMState *as, IRIns *ir)
344{ 349{
345 IRRef args[CCI_NARGS_MAX*2]; 350 IRRef args[CCI_NARGS_MAX*2];
@@ -352,7 +357,7 @@ static void asm_callx(ASMState *as, IRIns *ir)
352 func = ir->op2; irf = IR(func); 357 func = ir->op2; irf = IR(func);
353 if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } 358 if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
354 if (irref_isk(func)) { /* Call to constant address. */ 359 if (irref_isk(func)) { /* Call to constant address. */
355 ci.func = (ASMFunction)(void *)(irf->i); 360 ci.func = (ASMFunction)(void *)(intptr_t)(irf->i);
356 } else { /* Need a non-argument register for indirect calls. */ 361 } else { /* Need a non-argument register for indirect calls. */
357 RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1); 362 RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1);
358 Reg freg = ra_alloc1(as, func, allow); 363 Reg freg = ra_alloc1(as, func, allow);
@@ -363,16 +368,6 @@ static void asm_callx(ASMState *as, IRIns *ir)
363 asm_gencall(as, &ci, args); 368 asm_gencall(as, &ci, args);
364} 369}
365 370
366static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
367{
368 const CCallInfo *ci = &lj_ir_callinfo[id];
369 IRRef args[2];
370 args[0] = ir->op1;
371 args[1] = ir->op2;
372 asm_setupresult(as, ir, ci);
373 asm_gencall(as, ci, args);
374}
375
376/* -- Returns ------------------------------------------------------------- */ 371/* -- Returns ------------------------------------------------------------- */
377 372
378/* Return to lower frame. Guard that it goes to the right spot. */ 373/* Return to lower frame. Guard that it goes to the right spot. */
@@ -380,7 +375,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
380{ 375{
381 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); 376 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
382 void *pc = ir_kptr(IR(ir->op2)); 377 void *pc = ir_kptr(IR(ir->op2));
383 int32_t delta = 1+bc_a(*((const BCIns *)pc - 1)); 378 int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
384 as->topslot -= (BCReg)delta; 379 as->topslot -= (BCReg)delta;
385 if ((int32_t)as->topslot < 0) as->topslot = 0; 380 if ((int32_t)as->topslot < 0) as->topslot = 0;
386 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ 381 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
@@ -394,6 +389,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
394 389
395/* -- Type conversions ---------------------------------------------------- */ 390/* -- Type conversions ---------------------------------------------------- */
396 391
392#if !LJ_SOFTFP
397static void asm_tointg(ASMState *as, IRIns *ir, Reg left) 393static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
398{ 394{
399 RegSet allow = RSET_FPR; 395 RegSet allow = RSET_FPR;
@@ -410,8 +406,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
410 emit_asi(as, PPCI_XORIS, RID_TMP, dest, 0x8000); 406 emit_asi(as, PPCI_XORIS, RID_TMP, dest, 0x8000);
411 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); 407 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
412 emit_lsptr(as, PPCI_LFS, (fbias & 31), 408 emit_lsptr(as, PPCI_LFS, (fbias & 31),
413 (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)), 409 (void *)&as->J->k32[LJ_K32_2P52_2P31], RSET_GPR);
414 RSET_GPR);
415 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); 410 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
416 emit_fb(as, PPCI_FCTIWZ, tmp, left); 411 emit_fb(as, PPCI_FCTIWZ, tmp, left);
417} 412}
@@ -427,15 +422,23 @@ static void asm_tobit(ASMState *as, IRIns *ir)
427 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); 422 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
428 emit_fab(as, PPCI_FADD, tmp, left, right); 423 emit_fab(as, PPCI_FADD, tmp, left, right);
429} 424}
425#endif
430 426
431static void asm_conv(ASMState *as, IRIns *ir) 427static void asm_conv(ASMState *as, IRIns *ir)
432{ 428{
433 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); 429 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
430#if !LJ_SOFTFP
434 int stfp = (st == IRT_NUM || st == IRT_FLOAT); 431 int stfp = (st == IRT_NUM || st == IRT_FLOAT);
432#endif
435 IRRef lref = ir->op1; 433 IRRef lref = ir->op1;
436 lua_assert(irt_type(ir->t) != st);
437 lua_assert(!(irt_isint64(ir->t) || 434 lua_assert(!(irt_isint64(ir->t) ||
438 (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ 435 (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */
436#if LJ_SOFTFP
437 /* FP conversions are handled by SPLIT. */
438 lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT));
439 /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */
440#else
441 lua_assert(irt_type(ir->t) != st);
439 if (irt_isfp(ir->t)) { 442 if (irt_isfp(ir->t)) {
440 Reg dest = ra_dest(as, ir, RSET_FPR); 443 Reg dest = ra_dest(as, ir, RSET_FPR);
441 if (stfp) { /* FP to FP conversion. */ 444 if (stfp) { /* FP to FP conversion. */
@@ -450,13 +453,11 @@ static void asm_conv(ASMState *as, IRIns *ir)
450 Reg left = ra_alloc1(as, lref, allow); 453 Reg left = ra_alloc1(as, lref, allow);
451 Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, left)); 454 Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, left));
452 Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); 455 Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest));
453 const float *kbias;
454 if (irt_isfloat(ir->t)) emit_fb(as, PPCI_FRSP, dest, dest); 456 if (irt_isfloat(ir->t)) emit_fb(as, PPCI_FRSP, dest, dest);
455 emit_fab(as, PPCI_FSUB, dest, dest, fbias); 457 emit_fab(as, PPCI_FSUB, dest, dest, fbias);
456 emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); 458 emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP);
457 kbias = (const float *)lj_ir_k64_find(as->J, U64x(59800004,59800000)); 459 emit_lsptr(as, PPCI_LFS, (fbias & 31),
458 if (st == IRT_U32) kbias++; 460 &as->J->k32[st == IRT_U32 ? LJ_K32_2P52 : LJ_K32_2P52_2P31],
459 emit_lsptr(as, PPCI_LFS, (fbias & 31), (void *)kbias,
460 rset_clear(allow, hibias)); 461 rset_clear(allow, hibias));
461 emit_tai(as, PPCI_STW, st == IRT_U32 ? left : RID_TMP, 462 emit_tai(as, PPCI_STW, st == IRT_U32 ? left : RID_TMP,
462 RID_SP, SPOFS_TMPLO); 463 RID_SP, SPOFS_TMPLO);
@@ -489,15 +490,16 @@ static void asm_conv(ASMState *as, IRIns *ir)
489 emit_fb(as, PPCI_FCTIWZ, tmp, tmp); 490 emit_fb(as, PPCI_FCTIWZ, tmp, tmp);
490 emit_fab(as, PPCI_FSUB, tmp, left, tmp); 491 emit_fab(as, PPCI_FSUB, tmp, left, tmp);
491 emit_lsptr(as, PPCI_LFS, (tmp & 31), 492 emit_lsptr(as, PPCI_LFS, (tmp & 31),
492 (void *)lj_ir_k64_find(as->J, U64x(4f000000,00000000)), 493 (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR);
493 RSET_GPR);
494 } else { 494 } else {
495 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); 495 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
496 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); 496 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
497 emit_fb(as, PPCI_FCTIWZ, tmp, left); 497 emit_fb(as, PPCI_FCTIWZ, tmp, left);
498 } 498 }
499 } 499 }
500 } else { 500 } else
501#endif
502 {
501 Reg dest = ra_dest(as, ir, RSET_GPR); 503 Reg dest = ra_dest(as, ir, RSET_GPR);
502 if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ 504 if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
503 Reg left = ra_alloc1(as, ir->op1, RSET_GPR); 505 Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
@@ -513,46 +515,50 @@ static void asm_conv(ASMState *as, IRIns *ir)
513 } 515 }
514} 516}
515 517
516#if LJ_HASFFI
517static void asm_conv64(ASMState *as, IRIns *ir)
518{
519 IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
520 IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
521 IRCallID id;
522 const CCallInfo *ci;
523 IRRef args[2];
524 args[0] = ir->op1;
525 args[1] = (ir-1)->op1;
526 if (st == IRT_NUM || st == IRT_FLOAT) {
527 id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
528 ir--;
529 } else {
530 id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
531 }
532 ci = &lj_ir_callinfo[id];
533 asm_setupresult(as, ir, ci);
534 asm_gencall(as, ci, args);
535}
536#endif
537
538static void asm_strto(ASMState *as, IRIns *ir) 518static void asm_strto(ASMState *as, IRIns *ir)
539{ 519{
540 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; 520 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
541 IRRef args[2]; 521 IRRef args[2];
542 int32_t ofs; 522 int32_t ofs = SPOFS_TMP;
523#if LJ_SOFTFP
524 ra_evictset(as, RSET_SCRATCH);
525 if (ra_used(ir)) {
526 if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) &&
527 (ir->s & 1) == LJ_BE && (ir->s ^ 1) == (ir+1)->s) {
528 int i;
529 for (i = 0; i < 2; i++) {
530 Reg r = (ir+i)->r;
531 if (ra_hasreg(r)) {
532 ra_free(as, r);
533 ra_modified(as, r);
534 emit_spload(as, ir+i, r, sps_scale((ir+i)->s));
535 }
536 }
537 ofs = sps_scale(ir->s & ~1);
538 } else {
539 Reg rhi = ra_dest(as, ir+1, RSET_GPR);
540 Reg rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi));
541 emit_tai(as, PPCI_LWZ, rhi, RID_SP, ofs);
542 emit_tai(as, PPCI_LWZ, rlo, RID_SP, ofs+4);
543 }
544 }
545#else
543 RegSet drop = RSET_SCRATCH; 546 RegSet drop = RSET_SCRATCH;
544 if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ 547 if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */
545 ra_evictset(as, drop); 548 ra_evictset(as, drop);
549 if (ir->s) ofs = sps_scale(ir->s);
550#endif
546 asm_guardcc(as, CC_EQ); 551 asm_guardcc(as, CC_EQ);
547 emit_ai(as, PPCI_CMPWI, RID_RET, 0); /* Test return status. */ 552 emit_ai(as, PPCI_CMPWI, RID_RET, 0); /* Test return status. */
548 args[0] = ir->op1; /* GCstr *str */ 553 args[0] = ir->op1; /* GCstr *str */
549 args[1] = ASMREF_TMP1; /* TValue *n */ 554 args[1] = ASMREF_TMP1; /* TValue *n */
550 asm_gencall(as, ci, args); 555 asm_gencall(as, ci, args);
551 /* Store the result to the spill slot or temp slots. */ 556 /* Store the result to the spill slot or temp slots. */
552 ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP;
553 emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs); 557 emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs);
554} 558}
555 559
560/* -- Memory references --------------------------------------------------- */
561
556/* Get pointer to TValue. */ 562/* Get pointer to TValue. */
557static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) 563static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
558{ 564{
@@ -566,37 +572,19 @@ static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
566 /* Otherwise use g->tmptv to hold the TValue. */ 572 /* Otherwise use g->tmptv to hold the TValue. */
567 RegSet allow = rset_exclude(RSET_GPR, dest); 573 RegSet allow = rset_exclude(RSET_GPR, dest);
568 Reg type; 574 Reg type;
569 emit_tai(as, PPCI_ADDI, dest, RID_JGL, offsetof(global_State, tmptv)-32768); 575 emit_tai(as, PPCI_ADDI, dest, RID_JGL, (int32_t)offsetof(global_State, tmptv)-32768);
570 if (!irt_ispri(ir->t)) { 576 if (!irt_ispri(ir->t)) {
571 Reg src = ra_alloc1(as, ref, allow); 577 Reg src = ra_alloc1(as, ref, allow);
572 emit_setgl(as, src, tmptv.gcr); 578 emit_setgl(as, src, tmptv.gcr);
573 } 579 }
574 type = ra_allock(as, irt_toitype(ir->t), allow); 580 if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
581 type = ra_alloc1(as, ref+1, allow);
582 else
583 type = ra_allock(as, irt_toitype(ir->t), allow);
575 emit_setgl(as, type, tmptv.it); 584 emit_setgl(as, type, tmptv.it);
576 } 585 }
577} 586}
578 587
579static void asm_tostr(ASMState *as, IRIns *ir)
580{
581 IRRef args[2];
582 args[0] = ASMREF_L;
583 as->gcsteps++;
584 if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
585 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
586 args[1] = ASMREF_TMP1; /* const lua_Number * */
587 asm_setupresult(as, ir, ci); /* GCstr * */
588 asm_gencall(as, ci, args);
589 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
590 } else {
591 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
592 args[1] = ir->op1; /* int32_t k */
593 asm_setupresult(as, ir, ci); /* GCstr * */
594 asm_gencall(as, ci, args);
595 }
596}
597
598/* -- Memory references --------------------------------------------------- */
599
600static void asm_aref(ASMState *as, IRIns *ir) 588static void asm_aref(ASMState *as, IRIns *ir)
601{ 589{
602 Reg dest = ra_dest(as, ir, RSET_GPR); 590 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -636,11 +624,27 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
636 Reg tisnum = RID_NONE, tmpnum = RID_NONE; 624 Reg tisnum = RID_NONE, tmpnum = RID_NONE;
637 IRRef refkey = ir->op2; 625 IRRef refkey = ir->op2;
638 IRIns *irkey = IR(refkey); 626 IRIns *irkey = IR(refkey);
627 int isk = irref_isk(refkey);
639 IRType1 kt = irkey->t; 628 IRType1 kt = irkey->t;
640 uint32_t khash; 629 uint32_t khash;
641 MCLabel l_end, l_loop, l_next; 630 MCLabel l_end, l_loop, l_next;
642 631
643 rset_clear(allow, tab); 632 rset_clear(allow, tab);
633#if LJ_SOFTFP
634 if (!isk) {
635 key = ra_alloc1(as, refkey, allow);
636 rset_clear(allow, key);
637 if (irkey[1].o == IR_HIOP) {
638 if (ra_hasreg((irkey+1)->r)) {
639 tmpnum = (irkey+1)->r;
640 ra_noweak(as, tmpnum);
641 } else {
642 tmpnum = ra_allocref(as, refkey+1, allow);
643 }
644 rset_clear(allow, tmpnum);
645 }
646 }
647#else
644 if (irt_isnum(kt)) { 648 if (irt_isnum(kt)) {
645 key = ra_alloc1(as, refkey, RSET_FPR); 649 key = ra_alloc1(as, refkey, RSET_FPR);
646 tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); 650 tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key));
@@ -650,6 +654,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
650 key = ra_alloc1(as, refkey, allow); 654 key = ra_alloc1(as, refkey, allow);
651 rset_clear(allow, key); 655 rset_clear(allow, key);
652 } 656 }
657#endif
653 tmp2 = ra_scratch(as, allow); 658 tmp2 = ra_scratch(as, allow);
654 rset_clear(allow, tmp2); 659 rset_clear(allow, tmp2);
655 660
@@ -672,7 +677,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
672 asm_guardcc(as, CC_EQ); 677 asm_guardcc(as, CC_EQ);
673 else 678 else
674 emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); 679 emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end);
675 if (irt_isnum(kt)) { 680 if (!LJ_SOFTFP && irt_isnum(kt)) {
676 emit_fab(as, PPCI_FCMPU, 0, tmpnum, key); 681 emit_fab(as, PPCI_FCMPU, 0, tmpnum, key);
677 emit_condbranch(as, PPCI_BC, CC_GE, l_next); 682 emit_condbranch(as, PPCI_BC, CC_GE, l_next);
678 emit_ab(as, PPCI_CMPLW, tmp1, tisnum); 683 emit_ab(as, PPCI_CMPLW, tmp1, tisnum);
@@ -682,7 +687,10 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
682 emit_ab(as, PPCI_CMPW, tmp2, key); 687 emit_ab(as, PPCI_CMPW, tmp2, key);
683 emit_condbranch(as, PPCI_BC, CC_NE, l_next); 688 emit_condbranch(as, PPCI_BC, CC_NE, l_next);
684 } 689 }
685 emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t)); 690 if (LJ_SOFTFP && ra_hasreg(tmpnum))
691 emit_ab(as, PPCI_CMPW, tmp1, tmpnum);
692 else
693 emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t));
686 if (!irt_ispri(kt)) 694 if (!irt_ispri(kt))
687 emit_tai(as, PPCI_LWZ, tmp2, dest, (int32_t)offsetof(Node, key.gcr)); 695 emit_tai(as, PPCI_LWZ, tmp2, dest, (int32_t)offsetof(Node, key.gcr));
688 } 696 }
@@ -691,19 +699,19 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
691 (((char *)as->mcp-(char *)l_loop) & 0xffffu); 699 (((char *)as->mcp-(char *)l_loop) & 0xffffu);
692 700
693 /* Load main position relative to tab->node into dest. */ 701 /* Load main position relative to tab->node into dest. */
694 khash = irref_isk(refkey) ? ir_khash(irkey) : 1; 702 khash = isk ? ir_khash(irkey) : 1;
695 if (khash == 0) { 703 if (khash == 0) {
696 emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); 704 emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node));
697 } else { 705 } else {
698 Reg tmphash = tmp1; 706 Reg tmphash = tmp1;
699 if (irref_isk(refkey)) 707 if (isk)
700 tmphash = ra_allock(as, khash, allow); 708 tmphash = ra_allock(as, khash, allow);
701 emit_tab(as, PPCI_ADD, dest, dest, tmp1); 709 emit_tab(as, PPCI_ADD, dest, dest, tmp1);
702 emit_tai(as, PPCI_MULLI, tmp1, tmp1, sizeof(Node)); 710 emit_tai(as, PPCI_MULLI, tmp1, tmp1, sizeof(Node));
703 emit_asb(as, PPCI_AND, tmp1, tmp2, tmphash); 711 emit_asb(as, PPCI_AND, tmp1, tmp2, tmphash);
704 emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); 712 emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node));
705 emit_tai(as, PPCI_LWZ, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); 713 emit_tai(as, PPCI_LWZ, tmp2, tab, (int32_t)offsetof(GCtab, hmask));
706 if (irref_isk(refkey)) { 714 if (isk) {
707 /* Nothing to do. */ 715 /* Nothing to do. */
708 } else if (irt_isstr(kt)) { 716 } else if (irt_isstr(kt)) {
709 emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, hash)); 717 emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, hash));
@@ -713,13 +721,19 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
713 emit_asb(as, PPCI_XOR, tmp1, tmp1, tmp2); 721 emit_asb(as, PPCI_XOR, tmp1, tmp1, tmp2);
714 emit_rotlwi(as, tmp1, tmp1, (HASH_ROT2+HASH_ROT1)&31); 722 emit_rotlwi(as, tmp1, tmp1, (HASH_ROT2+HASH_ROT1)&31);
715 emit_tab(as, PPCI_SUBF, tmp2, dest, tmp2); 723 emit_tab(as, PPCI_SUBF, tmp2, dest, tmp2);
716 if (irt_isnum(kt)) { 724 if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) {
725#if LJ_SOFTFP
726 emit_asb(as, PPCI_XOR, tmp2, key, tmp1);
727 emit_rotlwi(as, dest, tmp1, HASH_ROT1);
728 emit_tab(as, PPCI_ADD, tmp1, tmpnum, tmpnum);
729#else
717 int32_t ofs = ra_spill(as, irkey); 730 int32_t ofs = ra_spill(as, irkey);
718 emit_asb(as, PPCI_XOR, tmp2, tmp2, tmp1); 731 emit_asb(as, PPCI_XOR, tmp2, tmp2, tmp1);
719 emit_rotlwi(as, dest, tmp1, HASH_ROT1); 732 emit_rotlwi(as, dest, tmp1, HASH_ROT1);
720 emit_tab(as, PPCI_ADD, tmp1, tmp1, tmp1); 733 emit_tab(as, PPCI_ADD, tmp1, tmp1, tmp1);
721 emit_tai(as, PPCI_LWZ, tmp2, RID_SP, ofs+4); 734 emit_tai(as, PPCI_LWZ, tmp2, RID_SP, ofs+4);
722 emit_tai(as, PPCI_LWZ, tmp1, RID_SP, ofs); 735 emit_tai(as, PPCI_LWZ, tmp1, RID_SP, ofs);
736#endif
723 } else { 737 } else {
724 emit_asb(as, PPCI_XOR, tmp2, key, tmp1); 738 emit_asb(as, PPCI_XOR, tmp2, key, tmp1);
725 emit_rotlwi(as, dest, tmp1, HASH_ROT1); 739 emit_rotlwi(as, dest, tmp1, HASH_ROT1);
@@ -773,20 +787,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
773 } 787 }
774} 788}
775 789
776static void asm_newref(ASMState *as, IRIns *ir)
777{
778 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
779 IRRef args[3];
780 if (ir->r == RID_SINK)
781 return;
782 args[0] = ASMREF_L; /* lua_State *L */
783 args[1] = ir->op1; /* GCtab *t */
784 args[2] = ASMREF_TMP1; /* cTValue *key */
785 asm_setupresult(as, ir, ci); /* TValue * */
786 asm_gencall(as, ci, args);
787 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
788}
789
790static void asm_uref(ASMState *as, IRIns *ir) 790static void asm_uref(ASMState *as, IRIns *ir)
791{ 791{
792 Reg dest = ra_dest(as, ir, RSET_GPR); 792 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -860,8 +860,8 @@ static PPCIns asm_fxloadins(IRIns *ir)
860 case IRT_U8: return PPCI_LBZ; 860 case IRT_U8: return PPCI_LBZ;
861 case IRT_I16: return PPCI_LHA; 861 case IRT_I16: return PPCI_LHA;
862 case IRT_U16: return PPCI_LHZ; 862 case IRT_U16: return PPCI_LHZ;
863 case IRT_NUM: return PPCI_LFD; 863 case IRT_NUM: lua_assert(!LJ_SOFTFP); return PPCI_LFD;
864 case IRT_FLOAT: return PPCI_LFS; 864 case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_LFS;
865 default: return PPCI_LWZ; 865 default: return PPCI_LWZ;
866 } 866 }
867} 867}
@@ -871,8 +871,8 @@ static PPCIns asm_fxstoreins(IRIns *ir)
871 switch (irt_type(ir->t)) { 871 switch (irt_type(ir->t)) {
872 case IRT_I8: case IRT_U8: return PPCI_STB; 872 case IRT_I8: case IRT_U8: return PPCI_STB;
873 case IRT_I16: case IRT_U16: return PPCI_STH; 873 case IRT_I16: case IRT_U16: return PPCI_STH;
874 case IRT_NUM: return PPCI_STFD; 874 case IRT_NUM: lua_assert(!LJ_SOFTFP); return PPCI_STFD;
875 case IRT_FLOAT: return PPCI_STFS; 875 case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_STFS;
876 default: return PPCI_STW; 876 default: return PPCI_STW;
877 } 877 }
878} 878}
@@ -880,17 +880,23 @@ static PPCIns asm_fxstoreins(IRIns *ir)
880static void asm_fload(ASMState *as, IRIns *ir) 880static void asm_fload(ASMState *as, IRIns *ir)
881{ 881{
882 Reg dest = ra_dest(as, ir, RSET_GPR); 882 Reg dest = ra_dest(as, ir, RSET_GPR);
883 Reg idx = ra_alloc1(as, ir->op1, RSET_GPR);
884 PPCIns pi = asm_fxloadins(ir); 883 PPCIns pi = asm_fxloadins(ir);
884 Reg idx;
885 int32_t ofs; 885 int32_t ofs;
886 if (ir->op2 == IRFL_TAB_ARRAY) { 886 if (ir->op1 == REF_NIL) {
887 ofs = asm_fuseabase(as, ir->op1); 887 idx = RID_JGL;
888 if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ 888 ofs = (ir->op2 << 2) - 32768;
889 emit_tai(as, PPCI_ADDI, dest, idx, ofs); 889 } else {
890 return; 890 idx = ra_alloc1(as, ir->op1, RSET_GPR);
891 if (ir->op2 == IRFL_TAB_ARRAY) {
892 ofs = asm_fuseabase(as, ir->op1);
893 if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
894 emit_tai(as, PPCI_ADDI, dest, idx, ofs);
895 return;
896 }
891 } 897 }
898 ofs = field_ofs[ir->op2];
892 } 899 }
893 ofs = field_ofs[ir->op2];
894 lua_assert(!irt_isi8(ir->t)); 900 lua_assert(!irt_isi8(ir->t));
895 emit_tai(as, pi, dest, idx, ofs); 901 emit_tai(as, pi, dest, idx, ofs);
896} 902}
@@ -909,14 +915,15 @@ static void asm_fstore(ASMState *as, IRIns *ir)
909 915
910static void asm_xload(ASMState *as, IRIns *ir) 916static void asm_xload(ASMState *as, IRIns *ir)
911{ 917{
912 Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); 918 Reg dest = ra_dest(as, ir,
919 (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
913 lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); 920 lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
914 if (irt_isi8(ir->t)) 921 if (irt_isi8(ir->t))
915 emit_as(as, PPCI_EXTSB, dest, dest); 922 emit_as(as, PPCI_EXTSB, dest, dest);
916 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); 923 asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
917} 924}
918 925
919static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) 926static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
920{ 927{
921 IRIns *irb; 928 IRIns *irb;
922 if (ir->r == RID_SINK) 929 if (ir->r == RID_SINK)
@@ -927,22 +934,34 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
927 Reg src = ra_alloc1(as, irb->op1, RSET_GPR); 934 Reg src = ra_alloc1(as, irb->op1, RSET_GPR);
928 asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src)); 935 asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src));
929 } else { 936 } else {
930 Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); 937 Reg src = ra_alloc1(as, ir->op2,
938 (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
931 asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, 939 asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
932 rset_exclude(RSET_GPR, src), ofs); 940 rset_exclude(RSET_GPR, src), ofs);
933 } 941 }
934} 942}
935 943
944#define asm_xstore(as, ir) asm_xstore_(as, ir, 0)
945
936static void asm_ahuvload(ASMState *as, IRIns *ir) 946static void asm_ahuvload(ASMState *as, IRIns *ir)
937{ 947{
938 IRType1 t = ir->t; 948 IRType1 t = ir->t;
939 Reg dest = RID_NONE, type = RID_TMP, tmp = RID_TMP, idx; 949 Reg dest = RID_NONE, type = RID_TMP, tmp = RID_TMP, idx;
940 RegSet allow = RSET_GPR; 950 RegSet allow = RSET_GPR;
941 int32_t ofs = AHUREF_LSX; 951 int32_t ofs = AHUREF_LSX;
952 if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) {
953 t.irt = IRT_NUM;
954 if (ra_used(ir+1)) {
955 type = ra_dest(as, ir+1, allow);
956 rset_clear(allow, type);
957 }
958 ofs = 0;
959 }
942 if (ra_used(ir)) { 960 if (ra_used(ir)) {
943 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); 961 lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
944 if (!irt_isnum(t)) ofs = 0; 962 irt_isint(ir->t) || irt_isaddr(ir->t));
945 dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); 963 if (LJ_SOFTFP || !irt_isnum(t)) ofs = 0;
964 dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
946 rset_clear(allow, dest); 965 rset_clear(allow, dest);
947 } 966 }
948 idx = asm_fuseahuref(as, ir->op1, &ofs, allow); 967 idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
@@ -951,12 +970,13 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
951 asm_guardcc(as, CC_GE); 970 asm_guardcc(as, CC_GE);
952 emit_ab(as, PPCI_CMPLW, type, tisnum); 971 emit_ab(as, PPCI_CMPLW, type, tisnum);
953 if (ra_hasreg(dest)) { 972 if (ra_hasreg(dest)) {
954 if (ofs == AHUREF_LSX) { 973 if (!LJ_SOFTFP && ofs == AHUREF_LSX) {
955 tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, 974 tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR,
956 (idx&255)), (idx>>8))); 975 (idx&255)), (idx>>8)));
957 emit_fab(as, PPCI_LFDX, dest, (idx&255), tmp); 976 emit_fab(as, PPCI_LFDX, dest, (idx&255), tmp);
958 } else { 977 } else {
959 emit_fai(as, PPCI_LFD, dest, idx, ofs); 978 emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest, idx,
979 ofs+4*LJ_SOFTFP);
960 } 980 }
961 } 981 }
962 } else { 982 } else {
@@ -979,7 +999,7 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
979 int32_t ofs = AHUREF_LSX; 999 int32_t ofs = AHUREF_LSX;
980 if (ir->r == RID_SINK) 1000 if (ir->r == RID_SINK)
981 return; 1001 return;
982 if (irt_isnum(ir->t)) { 1002 if (!LJ_SOFTFP && irt_isnum(ir->t)) {
983 src = ra_alloc1(as, ir->op2, RSET_FPR); 1003 src = ra_alloc1(as, ir->op2, RSET_FPR);
984 } else { 1004 } else {
985 if (!irt_ispri(ir->t)) { 1005 if (!irt_ispri(ir->t)) {
@@ -987,11 +1007,14 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
987 rset_clear(allow, src); 1007 rset_clear(allow, src);
988 ofs = 0; 1008 ofs = 0;
989 } 1009 }
990 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); 1010 if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
1011 type = ra_alloc1(as, (ir+1)->op2, allow);
1012 else
1013 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
991 rset_clear(allow, type); 1014 rset_clear(allow, type);
992 } 1015 }
993 idx = asm_fuseahuref(as, ir->op1, &ofs, allow); 1016 idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
994 if (irt_isnum(ir->t)) { 1017 if (!LJ_SOFTFP && irt_isnum(ir->t)) {
995 if (ofs == AHUREF_LSX) { 1018 if (ofs == AHUREF_LSX) {
996 emit_fab(as, PPCI_STFDX, src, (idx&255), RID_TMP); 1019 emit_fab(as, PPCI_STFDX, src, (idx&255), RID_TMP);
997 emit_slwi(as, RID_TMP, (idx>>8), 3); 1020 emit_slwi(as, RID_TMP, (idx>>8), 3);
@@ -1016,21 +1039,33 @@ static void asm_sload(ASMState *as, IRIns *ir)
1016 IRType1 t = ir->t; 1039 IRType1 t = ir->t;
1017 Reg dest = RID_NONE, type = RID_NONE, base; 1040 Reg dest = RID_NONE, type = RID_NONE, base;
1018 RegSet allow = RSET_GPR; 1041 RegSet allow = RSET_GPR;
1042 int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
1043 if (hiop)
1044 t.irt = IRT_NUM;
1019 lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ 1045 lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */
1020 lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); 1046 lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK));
1021 lua_assert(LJ_DUALNUM || 1047 lua_assert(LJ_DUALNUM ||
1022 !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); 1048 !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME)));
1049#if LJ_SOFTFP
1050 lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */
1051 if (hiop && ra_used(ir+1)) {
1052 type = ra_dest(as, ir+1, allow);
1053 rset_clear(allow, type);
1054 }
1055#else
1023 if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { 1056 if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
1024 dest = ra_scratch(as, RSET_FPR); 1057 dest = ra_scratch(as, RSET_FPR);
1025 asm_tointg(as, ir, dest); 1058 asm_tointg(as, ir, dest);
1026 t.irt = IRT_NUM; /* Continue with a regular number type check. */ 1059 t.irt = IRT_NUM; /* Continue with a regular number type check. */
1027 } else if (ra_used(ir)) { 1060 } else
1061#endif
1062 if (ra_used(ir)) {
1028 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); 1063 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
1029 dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); 1064 dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
1030 rset_clear(allow, dest); 1065 rset_clear(allow, dest);
1031 base = ra_alloc1(as, REF_BASE, allow); 1066 base = ra_alloc1(as, REF_BASE, allow);
1032 rset_clear(allow, base); 1067 rset_clear(allow, base);
1033 if ((ir->op2 & IRSLOAD_CONVERT)) { 1068 if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) {
1034 if (irt_isint(t)) { 1069 if (irt_isint(t)) {
1035 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); 1070 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
1036 dest = ra_scratch(as, RSET_FPR); 1071 dest = ra_scratch(as, RSET_FPR);
@@ -1044,7 +1079,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
1044 emit_fab(as, PPCI_FSUB, dest, dest, fbias); 1079 emit_fab(as, PPCI_FSUB, dest, dest, fbias);
1045 emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); 1080 emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP);
1046 emit_lsptr(as, PPCI_LFS, (fbias & 31), 1081 emit_lsptr(as, PPCI_LFS, (fbias & 31),
1047 (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)), 1082 (void *)&as->J->k32[LJ_K32_2P52_2P31],
1048 rset_clear(allow, hibias)); 1083 rset_clear(allow, hibias));
1049 emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPLO); 1084 emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPLO);
1050 emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI); 1085 emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI);
@@ -1062,10 +1097,13 @@ dotypecheck:
1062 if ((ir->op2 & IRSLOAD_TYPECHECK)) { 1097 if ((ir->op2 & IRSLOAD_TYPECHECK)) {
1063 Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow); 1098 Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow);
1064 asm_guardcc(as, CC_GE); 1099 asm_guardcc(as, CC_GE);
1065 emit_ab(as, PPCI_CMPLW, RID_TMP, tisnum); 1100#if !LJ_SOFTFP
1066 type = RID_TMP; 1101 type = RID_TMP;
1102#endif
1103 emit_ab(as, PPCI_CMPLW, type, tisnum);
1067 } 1104 }
1068 if (ra_hasreg(dest)) emit_fai(as, PPCI_LFD, dest, base, ofs-4); 1105 if (ra_hasreg(dest)) emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest,
1106 base, ofs-(LJ_SOFTFP?0:4));
1069 } else { 1107 } else {
1070 if ((ir->op2 & IRSLOAD_TYPECHECK)) { 1108 if ((ir->op2 & IRSLOAD_TYPECHECK)) {
1071 asm_guardcc(as, CC_NE); 1109 asm_guardcc(as, CC_NE);
@@ -1083,19 +1121,15 @@ dotypecheck:
1083static void asm_cnew(ASMState *as, IRIns *ir) 1121static void asm_cnew(ASMState *as, IRIns *ir)
1084{ 1122{
1085 CTState *cts = ctype_ctsG(J2G(as->J)); 1123 CTState *cts = ctype_ctsG(J2G(as->J));
1086 CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; 1124 CTypeID id = (CTypeID)IR(ir->op1)->i;
1087 CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? 1125 CTSize sz;
1088 lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; 1126 CTInfo info = lj_ctype_info(cts, id, &sz);
1089 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; 1127 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
1090 IRRef args[2]; 1128 IRRef args[4];
1091 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1092 RegSet drop = RSET_SCRATCH; 1129 RegSet drop = RSET_SCRATCH;
1093 lua_assert(sz != CTSIZE_INVALID); 1130 lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
1094 1131
1095 args[0] = ASMREF_L; /* lua_State *L */
1096 args[1] = ASMREF_TMP1; /* MSize size */
1097 as->gcsteps++; 1132 as->gcsteps++;
1098
1099 if (ra_hasreg(ir->r)) 1133 if (ra_hasreg(ir->r))
1100 rset_clear(drop, ir->r); /* Dest reg handled below. */ 1134 rset_clear(drop, ir->r); /* Dest reg handled below. */
1101 ra_evictset(as, drop); 1135 ra_evictset(as, drop);
@@ -1104,6 +1138,7 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1104 1138
1105 /* Initialize immutable cdata object. */ 1139 /* Initialize immutable cdata object. */
1106 if (ir->o == IR_CNEWI) { 1140 if (ir->o == IR_CNEWI) {
1141 RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1107 int32_t ofs = sizeof(GCcdata); 1142 int32_t ofs = sizeof(GCcdata);
1108 lua_assert(sz == 4 || sz == 8); 1143 lua_assert(sz == 4 || sz == 8);
1109 if (sz == 8) { 1144 if (sz == 8) {
@@ -1117,12 +1152,24 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1117 if (ofs == sizeof(GCcdata)) break; 1152 if (ofs == sizeof(GCcdata)) break;
1118 ofs -= 4; ir++; 1153 ofs -= 4; ir++;
1119 } 1154 }
1155 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
1156 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
1157 args[0] = ASMREF_L; /* lua_State *L */
1158 args[1] = ir->op1; /* CTypeID id */
1159 args[2] = ir->op2; /* CTSize sz */
1160 args[3] = ASMREF_TMP1; /* CTSize align */
1161 asm_gencall(as, ci, args);
1162 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
1163 return;
1120 } 1164 }
1165
1121 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ 1166 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
1122 emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); 1167 emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct));
1123 emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); 1168 emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid));
1124 emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA); 1169 emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA);
1125 emit_ti(as, PPCI_LI, RID_TMP, ctypeid); /* Lower 16 bit used. Sign-ext ok. */ 1170 emit_ti(as, PPCI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */
1171 args[0] = ASMREF_L; /* lua_State *L */
1172 args[1] = ASMREF_TMP1; /* MSize size */
1126 asm_gencall(as, ci, args); 1173 asm_gencall(as, ci, args);
1127 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), 1174 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
1128 ra_releasetmp(as, ASMREF_TMP1)); 1175 ra_releasetmp(as, ASMREF_TMP1));
@@ -1178,6 +1225,7 @@ static void asm_obar(ASMState *as, IRIns *ir)
1178 1225
1179/* -- Arithmetic and logic operations ------------------------------------- */ 1226/* -- Arithmetic and logic operations ------------------------------------- */
1180 1227
1228#if !LJ_SOFTFP
1181static void asm_fparith(ASMState *as, IRIns *ir, PPCIns pi) 1229static void asm_fparith(ASMState *as, IRIns *ir, PPCIns pi)
1182{ 1230{
1183 Reg dest = ra_dest(as, ir, RSET_FPR); 1231 Reg dest = ra_dest(as, ir, RSET_FPR);
@@ -1196,31 +1244,26 @@ static void asm_fpunary(ASMState *as, IRIns *ir, PPCIns pi)
1196 emit_fb(as, pi, dest, left); 1244 emit_fb(as, pi, dest, left);
1197} 1245}
1198 1246
1199static int asm_fpjoin_pow(ASMState *as, IRIns *ir) 1247static void asm_fpmath(ASMState *as, IRIns *ir)
1200{ 1248{
1201 IRIns *irp = IR(ir->op1); 1249 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
1202 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { 1250 return;
1203 IRIns *irpp = IR(irp->op1); 1251 if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT))
1204 if (irpp == ir-2 && irpp->o == IR_FPMATH && 1252 asm_fpunary(as, ir, PPCI_FSQRT);
1205 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { 1253 else
1206 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; 1254 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
1207 IRRef args[2];
1208 args[0] = irpp->op1;
1209 args[1] = irp->op2;
1210 asm_setupresult(as, ir, ci);
1211 asm_gencall(as, ci, args);
1212 return 1;
1213 }
1214 }
1215 return 0;
1216} 1255}
1256#endif
1217 1257
1218static void asm_add(ASMState *as, IRIns *ir) 1258static void asm_add(ASMState *as, IRIns *ir)
1219{ 1259{
1260#if !LJ_SOFTFP
1220 if (irt_isnum(ir->t)) { 1261 if (irt_isnum(ir->t)) {
1221 if (!asm_fusemadd(as, ir, PPCI_FMADD, PPCI_FMADD)) 1262 if (!asm_fusemadd(as, ir, PPCI_FMADD, PPCI_FMADD))
1222 asm_fparith(as, ir, PPCI_FADD); 1263 asm_fparith(as, ir, PPCI_FADD);
1223 } else { 1264 } else
1265#endif
1266 {
1224 Reg dest = ra_dest(as, ir, RSET_GPR); 1267 Reg dest = ra_dest(as, ir, RSET_GPR);
1225 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); 1268 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1226 PPCIns pi; 1269 PPCIns pi;
@@ -1259,10 +1302,13 @@ static void asm_add(ASMState *as, IRIns *ir)
1259 1302
1260static void asm_sub(ASMState *as, IRIns *ir) 1303static void asm_sub(ASMState *as, IRIns *ir)
1261{ 1304{
1305#if !LJ_SOFTFP
1262 if (irt_isnum(ir->t)) { 1306 if (irt_isnum(ir->t)) {
1263 if (!asm_fusemadd(as, ir, PPCI_FMSUB, PPCI_FNMSUB)) 1307 if (!asm_fusemadd(as, ir, PPCI_FMSUB, PPCI_FNMSUB))
1264 asm_fparith(as, ir, PPCI_FSUB); 1308 asm_fparith(as, ir, PPCI_FSUB);
1265 } else { 1309 } else
1310#endif
1311 {
1266 PPCIns pi = PPCI_SUBF; 1312 PPCIns pi = PPCI_SUBF;
1267 Reg dest = ra_dest(as, ir, RSET_GPR); 1313 Reg dest = ra_dest(as, ir, RSET_GPR);
1268 Reg left, right; 1314 Reg left, right;
@@ -1288,9 +1334,12 @@ static void asm_sub(ASMState *as, IRIns *ir)
1288 1334
1289static void asm_mul(ASMState *as, IRIns *ir) 1335static void asm_mul(ASMState *as, IRIns *ir)
1290{ 1336{
1337#if !LJ_SOFTFP
1291 if (irt_isnum(ir->t)) { 1338 if (irt_isnum(ir->t)) {
1292 asm_fparith(as, ir, PPCI_FMUL); 1339 asm_fparith(as, ir, PPCI_FMUL);
1293 } else { 1340 } else
1341#endif
1342 {
1294 PPCIns pi = PPCI_MULLW; 1343 PPCIns pi = PPCI_MULLW;
1295 Reg dest = ra_dest(as, ir, RSET_GPR); 1344 Reg dest = ra_dest(as, ir, RSET_GPR);
1296 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); 1345 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
@@ -1312,11 +1361,18 @@ static void asm_mul(ASMState *as, IRIns *ir)
1312 } 1361 }
1313} 1362}
1314 1363
1364#define asm_div(as, ir) asm_fparith(as, ir, PPCI_FDIV)
1365#define asm_mod(as, ir) asm_callid(as, ir, IRCALL_lj_vm_modi)
1366#define asm_pow(as, ir) asm_callid(as, ir, IRCALL_lj_vm_powi)
1367
1315static void asm_neg(ASMState *as, IRIns *ir) 1368static void asm_neg(ASMState *as, IRIns *ir)
1316{ 1369{
1370#if !LJ_SOFTFP
1317 if (irt_isnum(ir->t)) { 1371 if (irt_isnum(ir->t)) {
1318 asm_fpunary(as, ir, PPCI_FNEG); 1372 asm_fpunary(as, ir, PPCI_FNEG);
1319 } else { 1373 } else
1374#endif
1375 {
1320 Reg dest, left; 1376 Reg dest, left;
1321 PPCIns pi = PPCI_NEG; 1377 PPCIns pi = PPCI_NEG;
1322 if (as->flagmcp == as->mcp) { 1378 if (as->flagmcp == as->mcp) {
@@ -1330,6 +1386,10 @@ static void asm_neg(ASMState *as, IRIns *ir)
1330 } 1386 }
1331} 1387}
1332 1388
1389#define asm_abs(as, ir) asm_fpunary(as, ir, PPCI_FABS)
1390#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2)
1391#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
1392
1333static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi) 1393static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi)
1334{ 1394{
1335 Reg dest, left, right; 1395 Reg dest, left, right;
@@ -1345,6 +1405,10 @@ static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi)
1345 emit_tab(as, pi|PPCF_DOT, dest, left, right); 1405 emit_tab(as, pi|PPCF_DOT, dest, left, right);
1346} 1406}
1347 1407
1408#define asm_addov(as, ir) asm_arithov(as, ir, PPCI_ADDO)
1409#define asm_subov(as, ir) asm_arithov(as, ir, PPCI_SUBFO)
1410#define asm_mulov(as, ir) asm_arithov(as, ir, PPCI_MULLWO)
1411
1348#if LJ_HASFFI 1412#if LJ_HASFFI
1349static void asm_add64(ASMState *as, IRIns *ir) 1413static void asm_add64(ASMState *as, IRIns *ir)
1350{ 1414{
@@ -1424,7 +1488,7 @@ static void asm_neg64(ASMState *as, IRIns *ir)
1424} 1488}
1425#endif 1489#endif
1426 1490
1427static void asm_bitnot(ASMState *as, IRIns *ir) 1491static void asm_bnot(ASMState *as, IRIns *ir)
1428{ 1492{
1429 Reg dest, left, right; 1493 Reg dest, left, right;
1430 PPCIns pi = PPCI_NOR; 1494 PPCIns pi = PPCI_NOR;
@@ -1451,7 +1515,7 @@ nofuse:
1451 emit_asb(as, pi, dest, left, right); 1515 emit_asb(as, pi, dest, left, right);
1452} 1516}
1453 1517
1454static void asm_bitswap(ASMState *as, IRIns *ir) 1518static void asm_bswap(ASMState *as, IRIns *ir)
1455{ 1519{
1456 Reg dest = ra_dest(as, ir, RSET_GPR); 1520 Reg dest = ra_dest(as, ir, RSET_GPR);
1457 IRIns *irx; 1521 IRIns *irx;
@@ -1472,32 +1536,6 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
1472 } 1536 }
1473} 1537}
1474 1538
1475static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1476{
1477 Reg dest = ra_dest(as, ir, RSET_GPR);
1478 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1479 if (irref_isk(ir->op2)) {
1480 int32_t k = IR(ir->op2)->i;
1481 Reg tmp = left;
1482 if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) {
1483 if (!checku16(k)) {
1484 emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16));
1485 if ((k & 0xffff) == 0) return;
1486 }
1487 emit_asi(as, pik, dest, left, k);
1488 return;
1489 }
1490 }
1491 /* May fail due to spills/restores above, but simplifies the logic. */
1492 if (as->flagmcp == as->mcp) {
1493 as->flagmcp = NULL;
1494 as->mcp++;
1495 pi |= PPCF_DOT;
1496 }
1497 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1498 emit_asb(as, pi, dest, left, right);
1499}
1500
1501/* Fuse BAND with contiguous bitmask and a shift to rlwinm. */ 1539/* Fuse BAND with contiguous bitmask and a shift to rlwinm. */
1502static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref) 1540static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref)
1503{ 1541{
@@ -1528,7 +1566,7 @@ nofuse:
1528 *--as->mcp = pi | PPCF_T(left); 1566 *--as->mcp = pi | PPCF_T(left);
1529} 1567}
1530 1568
1531static void asm_bitand(ASMState *as, IRIns *ir) 1569static void asm_band(ASMState *as, IRIns *ir)
1532{ 1570{
1533 Reg dest, left, right; 1571 Reg dest, left, right;
1534 IRRef lref = ir->op1; 1572 IRRef lref = ir->op1;
@@ -1583,6 +1621,35 @@ static void asm_bitand(ASMState *as, IRIns *ir)
1583 emit_asb(as, PPCI_AND ^ dot, dest, left, right); 1621 emit_asb(as, PPCI_AND ^ dot, dest, left, right);
1584} 1622}
1585 1623
1624static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1625{
1626 Reg dest = ra_dest(as, ir, RSET_GPR);
1627 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1628 if (irref_isk(ir->op2)) {
1629 int32_t k = IR(ir->op2)->i;
1630 Reg tmp = left;
1631 if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) {
1632 if (!checku16(k)) {
1633 emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16));
1634 if ((k & 0xffff) == 0) return;
1635 }
1636 emit_asi(as, pik, dest, left, k);
1637 return;
1638 }
1639 }
1640 /* May fail due to spills/restores above, but simplifies the logic. */
1641 if (as->flagmcp == as->mcp) {
1642 as->flagmcp = NULL;
1643 as->mcp++;
1644 pi |= PPCF_DOT;
1645 }
1646 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1647 emit_asb(as, pi, dest, left, right);
1648}
1649
1650#define asm_bor(as, ir) asm_bitop(as, ir, PPCI_OR, PPCI_ORI)
1651#define asm_bxor(as, ir) asm_bitop(as, ir, PPCI_XOR, PPCI_XORI)
1652
1586static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) 1653static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1587{ 1654{
1588 Reg dest, left; 1655 Reg dest, left;
@@ -1608,9 +1675,48 @@ static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
1608 } 1675 }
1609} 1676}
1610 1677
1678#define asm_bshl(as, ir) asm_bitshift(as, ir, PPCI_SLW, 0)
1679#define asm_bshr(as, ir) asm_bitshift(as, ir, PPCI_SRW, 1)
1680#define asm_bsar(as, ir) asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI)
1681#define asm_brol(as, ir) \
1682 asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31), \
1683 PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31))
1684#define asm_bror(as, ir) lua_assert(0)
1685
1686#if LJ_SOFTFP
1687static void asm_sfpmin_max(ASMState *as, IRIns *ir)
1688{
1689 CCallInfo ci = lj_ir_callinfo[IRCALL_softfp_cmp];
1690 IRRef args[4];
1691 MCLabel l_right, l_end;
1692 Reg desthi = ra_dest(as, ir, RSET_GPR), destlo = ra_dest(as, ir+1, RSET_GPR);
1693 Reg righthi, lefthi = ra_alloc2(as, ir, RSET_GPR);
1694 Reg rightlo, leftlo = ra_alloc2(as, ir+1, RSET_GPR);
1695 PPCCC cond = (IROp)ir->o == IR_MIN ? CC_EQ : CC_NE;
1696 righthi = (lefthi >> 8); lefthi &= 255;
1697 rightlo = (leftlo >> 8); leftlo &= 255;
1698 args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1;
1699 args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2;
1700 l_end = emit_label(as);
1701 if (desthi != righthi) emit_mr(as, desthi, righthi);
1702 if (destlo != rightlo) emit_mr(as, destlo, rightlo);
1703 l_right = emit_label(as);
1704 if (l_end != l_right) emit_jmp(as, l_end);
1705 if (desthi != lefthi) emit_mr(as, desthi, lefthi);
1706 if (destlo != leftlo) emit_mr(as, destlo, leftlo);
1707 if (l_right == as->mcp+1) {
1708 cond ^= 4; l_right = l_end; ++as->mcp;
1709 }
1710 emit_condbranch(as, PPCI_BC, cond, l_right);
1711 ra_evictset(as, RSET_SCRATCH);
1712 emit_cmpi(as, RID_RET, 1);
1713 asm_gencall(as, &ci, args);
1714}
1715#endif
1716
1611static void asm_min_max(ASMState *as, IRIns *ir, int ismax) 1717static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
1612{ 1718{
1613 if (irt_isnum(ir->t)) { 1719 if (!LJ_SOFTFP && irt_isnum(ir->t)) {
1614 Reg dest = ra_dest(as, ir, RSET_FPR); 1720 Reg dest = ra_dest(as, ir, RSET_FPR);
1615 Reg tmp = dest; 1721 Reg tmp = dest;
1616 Reg right, left = ra_alloc2(as, ir, RSET_FPR); 1722 Reg right, left = ra_alloc2(as, ir, RSET_FPR);
@@ -1638,6 +1744,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
1638 } 1744 }
1639} 1745}
1640 1746
1747#define asm_min(as, ir) asm_min_max(as, ir, 0)
1748#define asm_max(as, ir) asm_min_max(as, ir, 1)
1749
1641/* -- Comparisons --------------------------------------------------------- */ 1750/* -- Comparisons --------------------------------------------------------- */
1642 1751
1643#define CC_UNSIGNED 0x08 /* Unsigned integer comparison. */ 1752#define CC_UNSIGNED 0x08 /* Unsigned integer comparison. */
@@ -1695,7 +1804,7 @@ static void asm_intcomp_(ASMState *as, IRRef lref, IRRef rref, Reg cr, PPCCC cc)
1695static void asm_comp(ASMState *as, IRIns *ir) 1804static void asm_comp(ASMState *as, IRIns *ir)
1696{ 1805{
1697 PPCCC cc = asm_compmap[ir->o]; 1806 PPCCC cc = asm_compmap[ir->o];
1698 if (irt_isnum(ir->t)) { 1807 if (!LJ_SOFTFP && irt_isnum(ir->t)) {
1699 Reg right, left = ra_alloc2(as, ir, RSET_FPR); 1808 Reg right, left = ra_alloc2(as, ir, RSET_FPR);
1700 right = (left >> 8); left &= 255; 1809 right = (left >> 8); left &= 255;
1701 asm_guardcc(as, (cc >> 4)); 1810 asm_guardcc(as, (cc >> 4));
@@ -1714,6 +1823,46 @@ static void asm_comp(ASMState *as, IRIns *ir)
1714 } 1823 }
1715} 1824}
1716 1825
1826#define asm_equal(as, ir) asm_comp(as, ir)
1827
1828#if LJ_SOFTFP
1829/* SFP comparisons. */
1830static void asm_sfpcomp(ASMState *as, IRIns *ir)
1831{
1832 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp];
1833 RegSet drop = RSET_SCRATCH;
1834 Reg r;
1835 IRRef args[4];
1836 args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1;
1837 args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2;
1838
1839 for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+3; r++) {
1840 if (!rset_test(as->freeset, r) &&
1841 regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR])
1842 rset_clear(drop, r);
1843 }
1844 ra_evictset(as, drop);
1845 asm_setupresult(as, ir, ci);
1846 switch ((IROp)ir->o) {
1847 case IR_ULT:
1848 asm_guardcc(as, CC_EQ);
1849 emit_ai(as, PPCI_CMPWI, RID_RET, 0);
1850 case IR_ULE:
1851 asm_guardcc(as, CC_EQ);
1852 emit_ai(as, PPCI_CMPWI, RID_RET, 1);
1853 break;
1854 case IR_GE: case IR_GT:
1855 asm_guardcc(as, CC_EQ);
1856 emit_ai(as, PPCI_CMPWI, RID_RET, 2);
1857 default:
1858 asm_guardcc(as, (asm_compmap[ir->o] & 0xf));
1859 emit_ai(as, PPCI_CMPWI, RID_RET, 0);
1860 break;
1861 }
1862 asm_gencall(as, ci, args);
1863}
1864#endif
1865
1717#if LJ_HASFFI 1866#if LJ_HASFFI
1718/* 64 bit integer comparisons. */ 1867/* 64 bit integer comparisons. */
1719static void asm_comp64(ASMState *as, IRIns *ir) 1868static void asm_comp64(ASMState *as, IRIns *ir)
@@ -1743,37 +1892,67 @@ static void asm_comp64(ASMState *as, IRIns *ir)
1743/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ 1892/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
1744static void asm_hiop(ASMState *as, IRIns *ir) 1893static void asm_hiop(ASMState *as, IRIns *ir)
1745{ 1894{
1746#if LJ_HASFFI 1895#if LJ_HASFFI || LJ_SOFTFP
1747 /* HIOP is marked as a store because it needs its own DCE logic. */ 1896 /* HIOP is marked as a store because it needs its own DCE logic. */
1748 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ 1897 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
1749 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; 1898 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
1750 if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ 1899 if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */
1751 as->curins--; /* Always skip the CONV. */ 1900 as->curins--; /* Always skip the CONV. */
1901#if LJ_HASFFI && !LJ_SOFTFP
1752 if (usehi || uselo) 1902 if (usehi || uselo)
1753 asm_conv64(as, ir); 1903 asm_conv64(as, ir);
1754 return; 1904 return;
1905#endif
1755 } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ 1906 } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */
1756 as->curins--; /* Always skip the loword comparison. */ 1907 as->curins--; /* Always skip the loword comparison. */
1908#if LJ_SOFTFP
1909 if (!irt_isint(ir->t)) {
1910 asm_sfpcomp(as, ir-1);
1911 return;
1912 }
1913#endif
1914#if LJ_HASFFI
1757 asm_comp64(as, ir); 1915 asm_comp64(as, ir);
1916#endif
1917 return;
1918#if LJ_SOFTFP
1919 } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) {
1920 as->curins--; /* Always skip the loword min/max. */
1921 if (uselo || usehi)
1922 asm_sfpmin_max(as, ir-1);
1758 return; 1923 return;
1924#endif
1759 } else if ((ir-1)->o == IR_XSTORE) { 1925 } else if ((ir-1)->o == IR_XSTORE) {
1760 as->curins--; /* Handle both stores here. */ 1926 as->curins--; /* Handle both stores here. */
1761 if ((ir-1)->r != RID_SINK) { 1927 if ((ir-1)->r != RID_SINK) {
1762 asm_xstore(as, ir, 0); 1928 asm_xstore_(as, ir, 0);
1763 asm_xstore(as, ir-1, 4); 1929 asm_xstore_(as, ir-1, 4);
1764 } 1930 }
1765 return; 1931 return;
1766 } 1932 }
1767 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ 1933 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
1768 switch ((ir-1)->o) { 1934 switch ((ir-1)->o) {
1935#if LJ_HASFFI
1769 case IR_ADD: as->curins--; asm_add64(as, ir); break; 1936 case IR_ADD: as->curins--; asm_add64(as, ir); break;
1770 case IR_SUB: as->curins--; asm_sub64(as, ir); break; 1937 case IR_SUB: as->curins--; asm_sub64(as, ir); break;
1771 case IR_NEG: as->curins--; asm_neg64(as, ir); break; 1938 case IR_NEG: as->curins--; asm_neg64(as, ir); break;
1939#endif
1940#if LJ_SOFTFP
1941 case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
1942 case IR_STRTO:
1943 if (!uselo)
1944 ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */
1945 break;
1946#endif
1772 case IR_CALLN: 1947 case IR_CALLN:
1948 case IR_CALLS:
1773 case IR_CALLXS: 1949 case IR_CALLXS:
1774 if (!uselo) 1950 if (!uselo)
1775 ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ 1951 ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
1776 break; 1952 break;
1953#if LJ_SOFTFP
1954 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR:
1955#endif
1777 case IR_CNEWI: 1956 case IR_CNEWI:
1778 /* Nothing to do here. Handled by lo op itself. */ 1957 /* Nothing to do here. Handled by lo op itself. */
1779 break; 1958 break;
@@ -1784,6 +1963,17 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1784#endif 1963#endif
1785} 1964}
1786 1965
1966/* -- Profiling ----------------------------------------------------------- */
1967
1968static void asm_prof(ASMState *as, IRIns *ir)
1969{
1970 UNUSED(ir);
1971 asm_guardcc(as, CC_NE);
1972 emit_asi(as, PPCI_ANDIDOT, RID_TMP, RID_TMP, HOOK_PROFILE);
1973 emit_lsglptr(as, PPCI_LBZ, RID_TMP,
1974 (int32_t)offsetof(global_State, hookmask));
1975}
1976
1787/* -- Stack handling ------------------------------------------------------ */ 1977/* -- Stack handling ------------------------------------------------------ */
1788 1978
1789/* Check Lua stack size for overflow. Use exit handler as fallback. */ 1979/* Check Lua stack size for overflow. Use exit handler as fallback. */
@@ -1805,7 +1995,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
1805 emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack)); 1995 emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack));
1806 if (pbase == RID_TMP) 1996 if (pbase == RID_TMP)
1807 emit_getgl(as, RID_TMP, jit_base); 1997 emit_getgl(as, RID_TMP, jit_base);
1808 emit_getgl(as, tmp, jit_L); 1998 emit_getgl(as, tmp, cur_L);
1809 if (allow == RSET_EMPTY) /* Spill temp. register. */ 1999 if (allow == RSET_EMPTY) /* Spill temp. register. */
1810 emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW); 2000 emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW);
1811} 2001}
@@ -1826,8 +2016,19 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
1826 if ((sn & SNAP_NORESTORE)) 2016 if ((sn & SNAP_NORESTORE))
1827 continue; 2017 continue;
1828 if (irt_isnum(ir->t)) { 2018 if (irt_isnum(ir->t)) {
2019#if LJ_SOFTFP
2020 Reg tmp;
2021 RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
2022 lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */
2023 tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow);
2024 emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?4:0));
2025 if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1);
2026 tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow);
2027 emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?0:4));
2028#else
1829 Reg src = ra_alloc1(as, ref, RSET_FPR); 2029 Reg src = ra_alloc1(as, ref, RSET_FPR);
1830 emit_fai(as, PPCI_STFD, src, RID_BASE, ofs); 2030 emit_fai(as, PPCI_STFD, src, RID_BASE, ofs);
2031#endif
1831 } else { 2032 } else {
1832 Reg type; 2033 Reg type;
1833 RegSet allow = rset_exclude(RSET_GPR, RID_BASE); 2034 RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
@@ -1840,6 +2041,10 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
1840 if ((sn & (SNAP_CONT|SNAP_FRAME))) { 2041 if ((sn & (SNAP_CONT|SNAP_FRAME))) {
1841 if (s == 0) continue; /* Do not overwrite link to previous frame. */ 2042 if (s == 0) continue; /* Do not overwrite link to previous frame. */
1842 type = ra_allock(as, (int32_t)(*flinks--), allow); 2043 type = ra_allock(as, (int32_t)(*flinks--), allow);
2044#if LJ_SOFTFP
2045 } else if ((sn & SNAP_SOFTFPNUM)) {
2046 type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE));
2047#endif
1843 } else { 2048 } else {
1844 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); 2049 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
1845 } 2050 }
@@ -1966,147 +2171,25 @@ static void asm_tail_prep(ASMState *as)
1966 } 2171 }
1967} 2172}
1968 2173
1969/* -- Instruction dispatch ------------------------------------------------ */
1970
1971/* Assemble a single instruction. */
1972static void asm_ir(ASMState *as, IRIns *ir)
1973{
1974 switch ((IROp)ir->o) {
1975 /* Miscellaneous ops. */
1976 case IR_LOOP: asm_loop(as); break;
1977 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
1978 case IR_USE:
1979 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
1980 case IR_PHI: asm_phi(as, ir); break;
1981 case IR_HIOP: asm_hiop(as, ir); break;
1982 case IR_GCSTEP: asm_gcstep(as, ir); break;
1983
1984 /* Guarded assertions. */
1985 case IR_EQ: case IR_NE:
1986 if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
1987 as->curins--;
1988 asm_href(as, ir-1, (IROp)ir->o);
1989 break;
1990 }
1991 /* fallthrough */
1992 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
1993 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
1994 case IR_ABC:
1995 asm_comp(as, ir);
1996 break;
1997
1998 case IR_RETF: asm_retf(as, ir); break;
1999
2000 /* Bit ops. */
2001 case IR_BNOT: asm_bitnot(as, ir); break;
2002 case IR_BSWAP: asm_bitswap(as, ir); break;
2003
2004 case IR_BAND: asm_bitand(as, ir); break;
2005 case IR_BOR: asm_bitop(as, ir, PPCI_OR, PPCI_ORI); break;
2006 case IR_BXOR: asm_bitop(as, ir, PPCI_XOR, PPCI_XORI); break;
2007
2008 case IR_BSHL: asm_bitshift(as, ir, PPCI_SLW, 0); break;
2009 case IR_BSHR: asm_bitshift(as, ir, PPCI_SRW, 1); break;
2010 case IR_BSAR: asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI); break;
2011 case IR_BROL: asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31),
2012 PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)); break;
2013 case IR_BROR: lua_assert(0); break;
2014
2015 /* Arithmetic ops. */
2016 case IR_ADD: asm_add(as, ir); break;
2017 case IR_SUB: asm_sub(as, ir); break;
2018 case IR_MUL: asm_mul(as, ir); break;
2019 case IR_DIV: asm_fparith(as, ir, PPCI_FDIV); break;
2020 case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
2021 case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
2022 case IR_NEG: asm_neg(as, ir); break;
2023
2024 case IR_ABS: asm_fpunary(as, ir, PPCI_FABS); break;
2025 case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
2026 case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
2027 case IR_MIN: asm_min_max(as, ir, 0); break;
2028 case IR_MAX: asm_min_max(as, ir, 1); break;
2029 case IR_FPMATH:
2030 if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
2031 break;
2032 if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT))
2033 asm_fpunary(as, ir, PPCI_FSQRT);
2034 else
2035 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
2036 break;
2037
2038 /* Overflow-checking arithmetic ops. */
2039 case IR_ADDOV: asm_arithov(as, ir, PPCI_ADDO); break;
2040 case IR_SUBOV: asm_arithov(as, ir, PPCI_SUBFO); break;
2041 case IR_MULOV: asm_arithov(as, ir, PPCI_MULLWO); break;
2042
2043 /* Memory references. */
2044 case IR_AREF: asm_aref(as, ir); break;
2045 case IR_HREF: asm_href(as, ir, 0); break;
2046 case IR_HREFK: asm_hrefk(as, ir); break;
2047 case IR_NEWREF: asm_newref(as, ir); break;
2048 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
2049 case IR_FREF: asm_fref(as, ir); break;
2050 case IR_STRREF: asm_strref(as, ir); break;
2051
2052 /* Loads and stores. */
2053 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2054 asm_ahuvload(as, ir);
2055 break;
2056 case IR_FLOAD: asm_fload(as, ir); break;
2057 case IR_XLOAD: asm_xload(as, ir); break;
2058 case IR_SLOAD: asm_sload(as, ir); break;
2059
2060 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
2061 case IR_FSTORE: asm_fstore(as, ir); break;
2062 case IR_XSTORE: asm_xstore(as, ir, 0); break;
2063
2064 /* Allocations. */
2065 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
2066 case IR_TNEW: asm_tnew(as, ir); break;
2067 case IR_TDUP: asm_tdup(as, ir); break;
2068 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
2069
2070 /* Write barriers. */
2071 case IR_TBAR: asm_tbar(as, ir); break;
2072 case IR_OBAR: asm_obar(as, ir); break;
2073
2074 /* Type conversions. */
2075 case IR_CONV: asm_conv(as, ir); break;
2076 case IR_TOBIT: asm_tobit(as, ir); break;
2077 case IR_TOSTR: asm_tostr(as, ir); break;
2078 case IR_STRTO: asm_strto(as, ir); break;
2079
2080 /* Calls. */
2081 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
2082 case IR_CALLXS: asm_callx(as, ir); break;
2083 case IR_CARG: break;
2084
2085 default:
2086 setintV(&as->J->errinfo, ir->o);
2087 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
2088 break;
2089 }
2090}
2091
2092/* -- Trace setup --------------------------------------------------------- */ 2174/* -- Trace setup --------------------------------------------------------- */
2093 2175
2094/* Ensure there are enough stack slots for call arguments. */ 2176/* Ensure there are enough stack slots for call arguments. */
2095static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) 2177static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
2096{ 2178{
2097 IRRef args[CCI_NARGS_MAX*2]; 2179 IRRef args[CCI_NARGS_MAX*2];
2098 uint32_t i, nargs = (int)CCI_NARGS(ci); 2180 uint32_t i, nargs = CCI_XNARGS(ci);
2099 int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; 2181 int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
2100 asm_collectargs(as, ir, ci, args); 2182 asm_collectargs(as, ir, ci, args);
2101 for (i = 0; i < nargs; i++) 2183 for (i = 0; i < nargs; i++)
2102 if (args[i] && irt_isfp(IR(args[i])->t)) { 2184 if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t)) {
2103 if (nfpr > 0) nfpr--; else nslots = (nslots+3) & ~1; 2185 if (nfpr > 0) nfpr--; else nslots = (nslots+3) & ~1;
2104 } else { 2186 } else {
2105 if (ngpr > 0) ngpr--; else nslots++; 2187 if (ngpr > 0) ngpr--; else nslots++;
2106 } 2188 }
2107 if (nslots > as->evenspill) /* Leave room for args in stack slots. */ 2189 if (nslots > as->evenspill) /* Leave room for args in stack slots. */
2108 as->evenspill = nslots; 2190 as->evenspill = nslots;
2109 return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET); 2191 return (!LJ_SOFTFP && irt_isfp(ir->t)) ? REGSP_HINT(RID_FPRET) :
2192 REGSP_HINT(RID_RET);
2110} 2193}
2111 2194
2112static void asm_setup_target(ASMState *as) 2195static void asm_setup_target(ASMState *as)
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
index 2c38d1ec..21b510ca 100644
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -21,12 +21,14 @@ static MCode *asm_exitstub_gen(ASMState *as, ExitNo group)
21 } 21 }
22 /* Push the high byte of the exitno for each exit stub group. */ 22 /* Push the high byte of the exitno for each exit stub group. */
23 *mxp++ = XI_PUSHi8; *mxp++ = (MCode)((group*EXITSTUBS_PER_GROUP)>>8); 23 *mxp++ = XI_PUSHi8; *mxp++ = (MCode)((group*EXITSTUBS_PER_GROUP)>>8);
24#if !LJ_GC64
24 /* Store DISPATCH at original stack slot 0. Account for the two push ops. */ 25 /* Store DISPATCH at original stack slot 0. Account for the two push ops. */
25 *mxp++ = XI_MOVmi; 26 *mxp++ = XI_MOVmi;
26 *mxp++ = MODRM(XM_OFS8, 0, RID_ESP); 27 *mxp++ = MODRM(XM_OFS8, 0, RID_ESP);
27 *mxp++ = MODRM(XM_SCALE1, RID_ESP, RID_ESP); 28 *mxp++ = MODRM(XM_SCALE1, RID_ESP, RID_ESP);
28 *mxp++ = 2*sizeof(void *); 29 *mxp++ = 2*sizeof(void *);
29 *(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4; 30 *(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4;
31#endif
30 /* Jump to exit handler which fills in the ExitState. */ 32 /* Jump to exit handler which fills in the ExitState. */
31 *mxp++ = XI_JMP; mxp += 4; 33 *mxp++ = XI_JMP; mxp += 4;
32 *((int32_t *)(mxp-4)) = jmprel(mxp, (MCode *)(void *)lj_vm_exit_handler); 34 *((int32_t *)(mxp-4)) = jmprel(mxp, (MCode *)(void *)lj_vm_exit_handler);
@@ -62,10 +64,14 @@ static void asm_guardcc(ASMState *as, int cc)
62 target = p; 64 target = p;
63 cc ^= 1; 65 cc ^= 1;
64 if (as->realign) { 66 if (as->realign) {
67 if (LJ_GC64 && LJ_UNLIKELY(as->mrm.base == RID_RIP))
68 as->mrm.ofs += 2; /* Fixup RIP offset for pending fused load. */
65 emit_sjcc(as, cc, target); 69 emit_sjcc(as, cc, target);
66 return; 70 return;
67 } 71 }
68 } 72 }
73 if (LJ_GC64 && LJ_UNLIKELY(as->mrm.base == RID_RIP))
74 as->mrm.ofs += 6; /* Fixup RIP offset for pending fused load. */
69 emit_jcc(as, cc, target); 75 emit_jcc(as, cc, target);
70} 76}
71 77
@@ -79,6 +85,15 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k)
79{ 85{
80 if (irref_isk(ref)) { 86 if (irref_isk(ref)) {
81 IRIns *ir = IR(ref); 87 IRIns *ir = IR(ref);
88#if LJ_GC64
89 if (ir->o == IR_KNULL || !irt_is64(ir->t)) {
90 *k = ir->i;
91 return 1;
92 } else if (checki32((int64_t)ir_k64(ir)->u64)) {
93 *k = (int32_t)ir_k64(ir)->u64;
94 return 1;
95 }
96#else
82 if (ir->o != IR_KINT64) { 97 if (ir->o != IR_KINT64) {
83 *k = ir->i; 98 *k = ir->i;
84 return 1; 99 return 1;
@@ -86,6 +101,7 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k)
86 *k = (int32_t)ir_kint64(ir)->u64; 101 *k = (int32_t)ir_kint64(ir)->u64;
87 return 1; 102 return 1;
88 } 103 }
104#endif
89 } 105 }
90 return 0; 106 return 0;
91} 107}
@@ -185,9 +201,19 @@ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow)
185 if (irref_isk(ir->op1)) { 201 if (irref_isk(ir->op1)) {
186 GCfunc *fn = ir_kfunc(IR(ir->op1)); 202 GCfunc *fn = ir_kfunc(IR(ir->op1));
187 GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv; 203 GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv;
204#if LJ_GC64
205 int64_t ofs = dispofs(as, &uv->tv);
206 if (checki32(ofs) && checki32(ofs+4)) {
207 as->mrm.ofs = (int32_t)ofs;
208 as->mrm.base = RID_DISPATCH;
209 as->mrm.idx = RID_NONE;
210 return;
211 }
212#else
188 as->mrm.ofs = ptr2addr(&uv->tv); 213 as->mrm.ofs = ptr2addr(&uv->tv);
189 as->mrm.base = as->mrm.idx = RID_NONE; 214 as->mrm.base = as->mrm.idx = RID_NONE;
190 return; 215 return;
216#endif
191 } 217 }
192 break; 218 break;
193 default: 219 default:
@@ -205,14 +231,40 @@ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow)
205static void asm_fusefref(ASMState *as, IRIns *ir, RegSet allow) 231static void asm_fusefref(ASMState *as, IRIns *ir, RegSet allow)
206{ 232{
207 lua_assert(ir->o == IR_FLOAD || ir->o == IR_FREF); 233 lua_assert(ir->o == IR_FLOAD || ir->o == IR_FREF);
208 as->mrm.ofs = field_ofs[ir->op2];
209 as->mrm.idx = RID_NONE; 234 as->mrm.idx = RID_NONE;
235 if (ir->op1 == REF_NIL) {
236#if LJ_GC64
237 as->mrm.ofs = (int32_t)(ir->op2 << 2) - GG_OFS(dispatch);
238 as->mrm.base = RID_DISPATCH;
239#else
240 as->mrm.ofs = (int32_t)(ir->op2 << 2) + ptr2addr(J2GG(as->J));
241 as->mrm.base = RID_NONE;
242#endif
243 return;
244 }
245 as->mrm.ofs = field_ofs[ir->op2];
210 if (irref_isk(ir->op1)) { 246 if (irref_isk(ir->op1)) {
211 as->mrm.ofs += IR(ir->op1)->i; 247 IRIns *op1 = IR(ir->op1);
248#if LJ_GC64
249 if (ir->op1 == REF_NIL) {
250 as->mrm.ofs -= GG_OFS(dispatch);
251 as->mrm.base = RID_DISPATCH;
252 return;
253 } else if (op1->o == IR_KPTR || op1->o == IR_KKPTR) {
254 intptr_t ofs = dispofs(as, ir_kptr(op1));
255 if (checki32(as->mrm.ofs + ofs)) {
256 as->mrm.ofs += (int32_t)ofs;
257 as->mrm.base = RID_DISPATCH;
258 return;
259 }
260 }
261#else
262 as->mrm.ofs += op1->i;
212 as->mrm.base = RID_NONE; 263 as->mrm.base = RID_NONE;
213 } else { 264 return;
214 as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow); 265#endif
215 } 266 }
267 as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow);
216} 268}
217 269
218/* Fuse string reference into memory operand. */ 270/* Fuse string reference into memory operand. */
@@ -223,7 +275,7 @@ static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow)
223 as->mrm.base = as->mrm.idx = RID_NONE; 275 as->mrm.base = as->mrm.idx = RID_NONE;
224 as->mrm.scale = XM_SCALE1; 276 as->mrm.scale = XM_SCALE1;
225 as->mrm.ofs = sizeof(GCstr); 277 as->mrm.ofs = sizeof(GCstr);
226 if (irref_isk(ir->op1)) { 278 if (!LJ_GC64 && irref_isk(ir->op1)) {
227 as->mrm.ofs += IR(ir->op1)->i; 279 as->mrm.ofs += IR(ir->op1)->i;
228 } else { 280 } else {
229 Reg r = ra_alloc1(as, ir->op1, allow); 281 Reg r = ra_alloc1(as, ir->op1, allow);
@@ -255,10 +307,20 @@ static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow)
255 IRIns *ir = IR(ref); 307 IRIns *ir = IR(ref);
256 as->mrm.idx = RID_NONE; 308 as->mrm.idx = RID_NONE;
257 if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { 309 if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
310#if LJ_GC64
311 intptr_t ofs = dispofs(as, ir_kptr(ir));
312 if (checki32(ofs)) {
313 as->mrm.ofs = (int32_t)ofs;
314 as->mrm.base = RID_DISPATCH;
315 return;
316 }
317 } if (0) {
318#else
258 as->mrm.ofs = ir->i; 319 as->mrm.ofs = ir->i;
259 as->mrm.base = RID_NONE; 320 as->mrm.base = RID_NONE;
260 } else if (ir->o == IR_STRREF) { 321 } else if (ir->o == IR_STRREF) {
261 asm_fusestrref(as, ir, allow); 322 asm_fusestrref(as, ir, allow);
323#endif
262 } else { 324 } else {
263 as->mrm.ofs = 0; 325 as->mrm.ofs = 0;
264 if (canfuse(as, ir) && ir->o == IR_ADD && ra_noreg(ir->r)) { 326 if (canfuse(as, ir) && ir->o == IR_ADD && ra_noreg(ir->r)) {
@@ -301,7 +363,46 @@ static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow)
301 } 363 }
302} 364}
303 365
304/* Fuse load into memory operand. */ 366/* Fuse load of 64 bit IR constant into memory operand. */
367static Reg asm_fuseloadk64(ASMState *as, IRIns *ir)
368{
369 const uint64_t *k = &ir_k64(ir)->u64;
370 if (!LJ_GC64 || checki32((intptr_t)k)) {
371 as->mrm.ofs = ptr2addr(k);
372 as->mrm.base = RID_NONE;
373#if LJ_GC64
374 } else if (checki32(dispofs(as, k))) {
375 as->mrm.ofs = (int32_t)dispofs(as, k);
376 as->mrm.base = RID_DISPATCH;
377 } else if (checki32(mcpofs(as, k)) && checki32(mcpofs(as, k+1)) &&
378 checki32(mctopofs(as, k)) && checki32(mctopofs(as, k+1))) {
379 as->mrm.ofs = (int32_t)mcpofs(as, k);
380 as->mrm.base = RID_RIP;
381 } else {
382 if (ir->i) {
383 lua_assert(*k == *(uint64_t*)(as->mctop - ir->i));
384 } else {
385 while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3;
386 *(uint64_t*)as->mcbot = *k;
387 ir->i = (int32_t)(as->mctop - as->mcbot);
388 as->mcbot += 8;
389 as->mclim = as->mcbot + MCLIM_REDZONE;
390 lj_mcode_commitbot(as->J, as->mcbot);
391 }
392 as->mrm.ofs = (int32_t)mcpofs(as, as->mctop - ir->i);
393 as->mrm.base = RID_RIP;
394#endif
395 }
396 as->mrm.idx = RID_NONE;
397 return RID_MRM;
398}
399
400/* Fuse load into memory operand.
401**
402** Important caveat: this may emit RIP-relative loads! So don't place any
403** code emitters between this function and the use of its result.
404** The only permitted exception is asm_guardcc().
405*/
305static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) 406static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
306{ 407{
307 IRIns *ir = IR(ref); 408 IRIns *ir = IR(ref);
@@ -320,26 +421,35 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
320 if (ir->o == IR_KNUM) { 421 if (ir->o == IR_KNUM) {
321 RegSet avail = as->freeset & ~as->modset & RSET_FPR; 422 RegSet avail = as->freeset & ~as->modset & RSET_FPR;
322 lua_assert(allow != RSET_EMPTY); 423 lua_assert(allow != RSET_EMPTY);
323 if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */ 424 if (!(avail & (avail-1))) /* Fuse if less than two regs available. */
324 as->mrm.ofs = ptr2addr(ir_knum(ir)); 425 return asm_fuseloadk64(as, ir);
325 as->mrm.base = as->mrm.idx = RID_NONE;
326 return RID_MRM;
327 }
328 } else if (ref == REF_BASE || ir->o == IR_KINT64) { 426 } else if (ref == REF_BASE || ir->o == IR_KINT64) {
329 RegSet avail = as->freeset & ~as->modset & RSET_GPR; 427 RegSet avail = as->freeset & ~as->modset & RSET_GPR;
330 lua_assert(allow != RSET_EMPTY); 428 lua_assert(allow != RSET_EMPTY);
331 if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */ 429 if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */
332 as->mrm.ofs = ptr2addr(ref == REF_BASE ? (void *)&J2G(as->J)->jit_base : (void *)ir_kint64(ir)); 430 if (ref == REF_BASE) {
333 as->mrm.base = as->mrm.idx = RID_NONE; 431#if LJ_GC64
334 return RID_MRM; 432 as->mrm.ofs = (int32_t)dispofs(as, &J2G(as->J)->jit_base);
433 as->mrm.base = RID_DISPATCH;
434#else
435 as->mrm.ofs = ptr2addr(&J2G(as->J)->jit_base);
436 as->mrm.base = RID_NONE;
437#endif
438 as->mrm.idx = RID_NONE;
439 return RID_MRM;
440 } else {
441 return asm_fuseloadk64(as, ir);
442 }
335 } 443 }
336 } else if (mayfuse(as, ref)) { 444 } else if (mayfuse(as, ref)) {
337 RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR; 445 RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR;
338 if (ir->o == IR_SLOAD) { 446 if (ir->o == IR_SLOAD) {
339 if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) && 447 if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) &&
340 noconflict(as, ref, IR_RETF, 0)) { 448 noconflict(as, ref, IR_RETF, 0) &&
449 !(LJ_GC64 && irt_isaddr(ir->t))) {
341 as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow); 450 as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow);
342 as->mrm.ofs = 8*((int32_t)ir->op1-1) + ((ir->op2&IRSLOAD_FRAME)?4:0); 451 as->mrm.ofs = 8*((int32_t)ir->op1-1-LJ_FR2) +
452 (!LJ_FR2 && (ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
343 as->mrm.idx = RID_NONE; 453 as->mrm.idx = RID_NONE;
344 return RID_MRM; 454 return RID_MRM;
345 } 455 }
@@ -351,7 +461,8 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
351 return RID_MRM; 461 return RID_MRM;
352 } 462 }
353 } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) { 463 } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) {
354 if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0)) { 464 if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0) &&
465 !(LJ_GC64 && irt_isaddr(ir->t))) {
355 asm_fuseahuref(as, ir->op1, xallow); 466 asm_fuseahuref(as, ir->op1, xallow);
356 return RID_MRM; 467 return RID_MRM;
357 } 468 }
@@ -364,11 +475,15 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
364 asm_fusexref(as, ir->op1, xallow); 475 asm_fusexref(as, ir->op1, xallow);
365 return RID_MRM; 476 return RID_MRM;
366 } 477 }
367 } else if (ir->o == IR_VLOAD) { 478 } else if (ir->o == IR_VLOAD && !(LJ_GC64 && irt_isaddr(ir->t))) {
368 asm_fuseahuref(as, ir->op1, xallow); 479 asm_fuseahuref(as, ir->op1, xallow);
369 return RID_MRM; 480 return RID_MRM;
370 } 481 }
371 } 482 }
483 if (ir->o == IR_FLOAD && ir->op1 == REF_NIL) {
484 asm_fusefref(as, ir, RSET_EMPTY);
485 return RID_MRM;
486 }
372 if (!(as->freeset & allow) && !emit_canremat(ref) && 487 if (!(as->freeset & allow) && !emit_canremat(ref) &&
373 (allow == RSET_EMPTY || ra_hasspill(ir->s) || iscrossref(as, ref))) 488 (allow == RSET_EMPTY || ra_hasspill(ir->s) || iscrossref(as, ref)))
374 goto fusespill; 489 goto fusespill;
@@ -392,7 +507,7 @@ static Reg asm_fuseloadm(ASMState *as, IRRef ref, RegSet allow, int is64)
392/* Count the required number of stack slots for a call. */ 507/* Count the required number of stack slots for a call. */
393static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args) 508static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args)
394{ 509{
395 uint32_t i, nargs = CCI_NARGS(ci); 510 uint32_t i, nargs = CCI_XNARGS(ci);
396 int nslots = 0; 511 int nslots = 0;
397#if LJ_64 512#if LJ_64
398 if (LJ_ABI_WIN) { 513 if (LJ_ABI_WIN) {
@@ -425,7 +540,7 @@ static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args)
425/* Generate a call to a C function. */ 540/* Generate a call to a C function. */
426static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) 541static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
427{ 542{
428 uint32_t n, nargs = CCI_NARGS(ci); 543 uint32_t n, nargs = CCI_XNARGS(ci);
429 int32_t ofs = STACKARG_OFS; 544 int32_t ofs = STACKARG_OFS;
430#if LJ_64 545#if LJ_64
431 uint32_t gprs = REGARG_GPRS; 546 uint32_t gprs = REGARG_GPRS;
@@ -485,8 +600,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
485 if (r) { /* Argument is in a register. */ 600 if (r) { /* Argument is in a register. */
486 if (r < RID_MAX_GPR && ref < ASMREF_TMP1) { 601 if (r < RID_MAX_GPR && ref < ASMREF_TMP1) {
487#if LJ_64 602#if LJ_64
488 if (ir->o == IR_KINT64) 603 if (LJ_GC64 ? !(ir->o == IR_KINT || ir->o == IR_KNULL) : ir->o == IR_KINT64)
489 emit_loadu64(as, r, ir_kint64(ir)->u64); 604 emit_loadu64(as, r, ir_k64(ir)->u64);
490 else 605 else
491#endif 606#endif
492 emit_loadi(as, r, ir->i); 607 emit_loadi(as, r, ir->i);
@@ -560,7 +675,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
560 if (ra_hasreg(dest)) { 675 if (ra_hasreg(dest)) {
561 ra_free(as, dest); 676 ra_free(as, dest);
562 ra_modified(as, dest); 677 ra_modified(as, dest);
563 emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, 678 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS,
564 dest, RID_ESP, ofs); 679 dest, RID_ESP, ofs);
565 } 680 }
566 if ((ci->flags & CCI_CASTU64)) { 681 if ((ci->flags & CCI_CASTU64)) {
@@ -584,15 +699,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
584 } 699 }
585} 700}
586 701
587static void asm_call(ASMState *as, IRIns *ir)
588{
589 IRRef args[CCI_NARGS_MAX];
590 const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
591 asm_collectargs(as, ir, ci, args);
592 asm_setupresult(as, ir, ci);
593 asm_gencall(as, ci, args);
594}
595
596/* Return a constant function pointer or NULL for indirect calls. */ 702/* Return a constant function pointer or NULL for indirect calls. */
597static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func) 703static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func)
598{ 704{
@@ -651,15 +757,23 @@ static void asm_callx(ASMState *as, IRIns *ir)
651static void asm_retf(ASMState *as, IRIns *ir) 757static void asm_retf(ASMState *as, IRIns *ir)
652{ 758{
653 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); 759 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
760#if LJ_FR2
761 Reg rpc = ra_scratch(as, rset_exclude(RSET_GPR, base));
762#endif
654 void *pc = ir_kptr(IR(ir->op2)); 763 void *pc = ir_kptr(IR(ir->op2));
655 int32_t delta = 1+bc_a(*((const BCIns *)pc - 1)); 764 int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
656 as->topslot -= (BCReg)delta; 765 as->topslot -= (BCReg)delta;
657 if ((int32_t)as->topslot < 0) as->topslot = 0; 766 if ((int32_t)as->topslot < 0) as->topslot = 0;
658 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ 767 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
659 emit_setgl(as, base, jit_base); 768 emit_setgl(as, base, jit_base);
660 emit_addptr(as, base, -8*delta); 769 emit_addptr(as, base, -8*delta);
661 asm_guardcc(as, CC_NE); 770 asm_guardcc(as, CC_NE);
771#if LJ_FR2
772 emit_rmro(as, XO_CMP, rpc|REX_GC64, base, -8);
773 emit_loadu64(as, rpc, u64ptr(pc));
774#else
662 emit_gmroi(as, XG_ARITHi(XOg_CMP), base, -4, ptr2addr(pc)); 775 emit_gmroi(as, XG_ARITHi(XOg_CMP), base, -4, ptr2addr(pc));
776#endif
663} 777}
664 778
665/* -- Type conversions ---------------------------------------------------- */ 779/* -- Type conversions ---------------------------------------------------- */
@@ -672,8 +786,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
672 asm_guardcc(as, CC_NE); 786 asm_guardcc(as, CC_NE);
673 emit_rr(as, XO_UCOMISD, left, tmp); 787 emit_rr(as, XO_UCOMISD, left, tmp);
674 emit_rr(as, XO_CVTSI2SD, tmp, dest); 788 emit_rr(as, XO_CVTSI2SD, tmp, dest);
675 if (!(as->flags & JIT_F_SPLIT_XMM)) 789 emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */
676 emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */
677 emit_rr(as, XO_CVTTSD2SI, dest, left); 790 emit_rr(as, XO_CVTTSD2SI, dest, left);
678 /* Can't fuse since left is needed twice. */ 791 /* Can't fuse since left is needed twice. */
679} 792}
@@ -684,8 +797,9 @@ static void asm_tobit(ASMState *as, IRIns *ir)
684 Reg tmp = ra_noreg(IR(ir->op1)->r) ? 797 Reg tmp = ra_noreg(IR(ir->op1)->r) ?
685 ra_alloc1(as, ir->op1, RSET_FPR) : 798 ra_alloc1(as, ir->op1, RSET_FPR) :
686 ra_scratch(as, RSET_FPR); 799 ra_scratch(as, RSET_FPR);
687 Reg right = asm_fuseload(as, ir->op2, rset_exclude(RSET_FPR, tmp)); 800 Reg right;
688 emit_rr(as, XO_MOVDto, tmp, dest); 801 emit_rr(as, XO_MOVDto, tmp, dest);
802 right = asm_fuseload(as, ir->op2, rset_exclude(RSET_FPR, tmp));
689 emit_mrm(as, XO_ADDSD, tmp, right); 803 emit_mrm(as, XO_ADDSD, tmp, right);
690 ra_left(as, tmp, ir->op1); 804 ra_left(as, tmp, ir->op1);
691} 805}
@@ -706,13 +820,13 @@ static void asm_conv(ASMState *as, IRIns *ir)
706 if (left == dest) return; /* Avoid the XO_XORPS. */ 820 if (left == dest) return; /* Avoid the XO_XORPS. */
707 } else if (LJ_32 && st == IRT_U32) { /* U32 to FP conversion on x86. */ 821 } else if (LJ_32 && st == IRT_U32) { /* U32 to FP conversion on x86. */
708 /* number = (2^52+2^51 .. u32) - (2^52+2^51) */ 822 /* number = (2^52+2^51 .. u32) - (2^52+2^51) */
709 cTValue *k = lj_ir_k64_find(as->J, U64x(43380000,00000000)); 823 cTValue *k = &as->J->k64[LJ_K64_TOBIT];
710 Reg bias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); 824 Reg bias = ra_scratch(as, rset_exclude(RSET_FPR, dest));
711 if (irt_isfloat(ir->t)) 825 if (irt_isfloat(ir->t))
712 emit_rr(as, XO_CVTSD2SS, dest, dest); 826 emit_rr(as, XO_CVTSD2SS, dest, dest);
713 emit_rr(as, XO_SUBSD, dest, bias); /* Subtract 2^52+2^51 bias. */ 827 emit_rr(as, XO_SUBSD, dest, bias); /* Subtract 2^52+2^51 bias. */
714 emit_rr(as, XO_XORPS, dest, bias); /* Merge bias and integer. */ 828 emit_rr(as, XO_XORPS, dest, bias); /* Merge bias and integer. */
715 emit_loadn(as, bias, k); 829 emit_rma(as, XO_MOVSD, bias, k);
716 emit_mrm(as, XO_MOVD, dest, asm_fuseload(as, lref, RSET_GPR)); 830 emit_mrm(as, XO_MOVD, dest, asm_fuseload(as, lref, RSET_GPR));
717 return; 831 return;
718 } else { /* Integer to FP conversion. */ 832 } else { /* Integer to FP conversion. */
@@ -721,7 +835,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
721 asm_fuseloadm(as, lref, RSET_GPR, st64); 835 asm_fuseloadm(as, lref, RSET_GPR, st64);
722 if (LJ_64 && st == IRT_U64) { 836 if (LJ_64 && st == IRT_U64) {
723 MCLabel l_end = emit_label(as); 837 MCLabel l_end = emit_label(as);
724 const void *k = lj_ir_k64_find(as->J, U64x(43f00000,00000000)); 838 cTValue *k = &as->J->k64[LJ_K64_2P64];
725 emit_rma(as, XO_ADDSD, dest, k); /* Add 2^64 to compensate. */ 839 emit_rma(as, XO_ADDSD, dest, k); /* Add 2^64 to compensate. */
726 emit_sjcc(as, CC_NS, l_end); 840 emit_sjcc(as, CC_NS, l_end);
727 emit_rr(as, XO_TEST, left|REX_64, left); /* Check if u64 >= 2^63. */ 841 emit_rr(as, XO_TEST, left|REX_64, left); /* Check if u64 >= 2^63. */
@@ -729,8 +843,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
729 emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS, 843 emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS,
730 dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left); 844 dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left);
731 } 845 }
732 if (!(as->flags & JIT_F_SPLIT_XMM)) 846 emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */
733 emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */
734 } else if (stfp) { /* FP to integer conversion. */ 847 } else if (stfp) { /* FP to integer conversion. */
735 if (irt_isguard(ir->t)) { 848 if (irt_isguard(ir->t)) {
736 /* Checked conversions are only supported from number to int. */ 849 /* Checked conversions are only supported from number to int. */
@@ -738,9 +851,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
738 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); 851 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
739 } else { 852 } else {
740 Reg dest = ra_dest(as, ir, RSET_GPR); 853 Reg dest = ra_dest(as, ir, RSET_GPR);
741 x86Op op = st == IRT_NUM ? 854 x86Op op = st == IRT_NUM ? XO_CVTTSD2SI : XO_CVTTSS2SI;
742 ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSD2SI : XO_CVTSD2SI) :
743 ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSS2SI : XO_CVTSS2SI);
744 if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) { 855 if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) {
745 /* LJ_64: For inputs >= 2^63 add -2^64, convert again. */ 856 /* LJ_64: For inputs >= 2^63 add -2^64, convert again. */
746 /* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */ 857 /* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */
@@ -751,23 +862,20 @@ static void asm_conv(ASMState *as, IRIns *ir)
751 emit_gri(as, XG_ARITHi(XOg_ADD), dest, (int32_t)0x80000000); 862 emit_gri(as, XG_ARITHi(XOg_ADD), dest, (int32_t)0x80000000);
752 emit_rr(as, op, dest|REX_64, tmp); 863 emit_rr(as, op, dest|REX_64, tmp);
753 if (st == IRT_NUM) 864 if (st == IRT_NUM)
754 emit_rma(as, XO_ADDSD, tmp, lj_ir_k64_find(as->J, 865 emit_rma(as, XO_ADDSD, tmp, &as->J->k64[LJ_K64_M2P64_31]);
755 LJ_64 ? U64x(c3f00000,00000000) : U64x(c1e00000,00000000)));
756 else 866 else
757 emit_rma(as, XO_ADDSS, tmp, lj_ir_k64_find(as->J, 867 emit_rma(as, XO_ADDSS, tmp, &as->J->k32[LJ_K32_M2P64_31]);
758 LJ_64 ? U64x(00000000,df800000) : U64x(00000000,cf000000)));
759 emit_sjcc(as, CC_NS, l_end); 868 emit_sjcc(as, CC_NS, l_end);
760 emit_rr(as, XO_TEST, dest|REX_64, dest); /* Check if dest negative. */ 869 emit_rr(as, XO_TEST, dest|REX_64, dest); /* Check if dest negative. */
761 emit_rr(as, op, dest|REX_64, tmp); 870 emit_rr(as, op, dest|REX_64, tmp);
762 ra_left(as, tmp, lref); 871 ra_left(as, tmp, lref);
763 } else { 872 } else {
764 Reg left = asm_fuseload(as, lref, RSET_FPR);
765 if (LJ_64 && irt_isu32(ir->t)) 873 if (LJ_64 && irt_isu32(ir->t))
766 emit_rr(as, XO_MOV, dest, dest); /* Zero hiword. */ 874 emit_rr(as, XO_MOV, dest, dest); /* Zero hiword. */
767 emit_mrm(as, op, 875 emit_mrm(as, op,
768 dest|((LJ_64 && 876 dest|((LJ_64 &&
769 (irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0), 877 (irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0),
770 left); 878 asm_fuseload(as, lref, RSET_FPR));
771 } 879 }
772 } 880 }
773 } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ 881 } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
@@ -834,16 +942,14 @@ static void asm_conv_fp_int64(ASMState *as, IRIns *ir)
834 if (ra_hasreg(dest)) { 942 if (ra_hasreg(dest)) {
835 ra_free(as, dest); 943 ra_free(as, dest);
836 ra_modified(as, dest); 944 ra_modified(as, dest);
837 emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, 945 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, dest, RID_ESP, ofs);
838 dest, RID_ESP, ofs);
839 } 946 }
840 emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd, 947 emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd,
841 irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs); 948 irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs);
842 if (((ir-1)->op2 & IRCONV_SRCMASK) == IRT_U64) { 949 if (((ir-1)->op2 & IRCONV_SRCMASK) == IRT_U64) {
843 /* For inputs in [2^63,2^64-1] add 2^64 to compensate. */ 950 /* For inputs in [2^63,2^64-1] add 2^64 to compensate. */
844 MCLabel l_end = emit_label(as); 951 MCLabel l_end = emit_label(as);
845 emit_rma(as, XO_FADDq, XOg_FADDq, 952 emit_rma(as, XO_FADDq, XOg_FADDq, &as->J->k64[LJ_K64_2P64]);
846 lj_ir_k64_find(as->J, U64x(43f00000,00000000)));
847 emit_sjcc(as, CC_NS, l_end); 953 emit_sjcc(as, CC_NS, l_end);
848 emit_rr(as, XO_TEST, hi, hi); /* Check if u64 >= 2^63. */ 954 emit_rr(as, XO_TEST, hi, hi); /* Check if u64 >= 2^63. */
849 } else { 955 } else {
@@ -863,7 +969,6 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
863 Reg lo, hi; 969 Reg lo, hi;
864 lua_assert(st == IRT_NUM || st == IRT_FLOAT); 970 lua_assert(st == IRT_NUM || st == IRT_FLOAT);
865 lua_assert(dt == IRT_I64 || dt == IRT_U64); 971 lua_assert(dt == IRT_I64 || dt == IRT_U64);
866 lua_assert(((ir-1)->op2 & IRCONV_TRUNC));
867 hi = ra_dest(as, ir, RSET_GPR); 972 hi = ra_dest(as, ir, RSET_GPR);
868 lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi)); 973 lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi));
869 if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0); 974 if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0);
@@ -884,8 +989,7 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
884 emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0); 989 emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0);
885 else 990 else
886 emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0); 991 emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0);
887 emit_rma(as, XO_FADDq, XOg_FADDq, 992 emit_rma(as, XO_FADDq, XOg_FADDq, &as->J->k64[LJ_K64_M2P64]);
888 lj_ir_k64_find(as->J, U64x(c3f00000,00000000)));
889 emit_sjcc(as, CC_NS, l_pop); 993 emit_sjcc(as, CC_NS, l_pop);
890 emit_rr(as, XO_TEST, hi, hi); /* Check if out-of-range (2^63). */ 994 emit_rr(as, XO_TEST, hi, hi); /* Check if out-of-range (2^63). */
891 } 995 }
@@ -906,6 +1010,14 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
906 st == IRT_NUM ? XOg_FLDq: XOg_FLDd, 1010 st == IRT_NUM ? XOg_FLDq: XOg_FLDd,
907 asm_fuseload(as, ir->op1, RSET_EMPTY)); 1011 asm_fuseload(as, ir->op1, RSET_EMPTY));
908} 1012}
1013
1014static void asm_conv64(ASMState *as, IRIns *ir)
1015{
1016 if (irt_isfp(ir->t))
1017 asm_conv_fp_int64(as, ir);
1018 else
1019 asm_conv_int64_fp(as, ir);
1020}
909#endif 1021#endif
910 1022
911static void asm_strto(ASMState *as, IRIns *ir) 1023static void asm_strto(ASMState *as, IRIns *ir)
@@ -927,54 +1039,60 @@ static void asm_strto(ASMState *as, IRIns *ir)
927 RID_ESP, sps_scale(ir->s)); 1039 RID_ESP, sps_scale(ir->s));
928} 1040}
929 1041
930static void asm_tostr(ASMState *as, IRIns *ir) 1042/* -- Memory references --------------------------------------------------- */
1043
1044/* Get pointer to TValue. */
1045static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
931{ 1046{
932 IRIns *irl = IR(ir->op1); 1047 IRIns *ir = IR(ref);
933 IRRef args[2]; 1048 if (irt_isnum(ir->t)) {
934 args[0] = ASMREF_L; 1049 /* For numbers use the constant itself or a spill slot as a TValue. */
935 as->gcsteps++; 1050 if (irref_isk(ref))
936 if (irt_isnum(irl->t)) { 1051 emit_loada(as, dest, ir_knum(ir));
937 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum]; 1052 else
938 args[1] = ASMREF_TMP1; /* const lua_Number * */ 1053 emit_rmro(as, XO_LEA, dest|REX_64, RID_ESP, ra_spill(as, ir));
939 asm_setupresult(as, ir, ci); /* GCstr * */
940 asm_gencall(as, ci, args);
941 emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1)|REX_64,
942 RID_ESP, ra_spill(as, irl));
943 } else { 1054 } else {
944 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint]; 1055 /* Otherwise use g->tmptv to hold the TValue. */
945 args[1] = ir->op1; /* int32_t k */ 1056#if LJ_GC64
946 asm_setupresult(as, ir, ci); /* GCstr * */ 1057 if (irref_isk(ref)) {
947 asm_gencall(as, ci, args); 1058 TValue k;
1059 lj_ir_kvalue(as->J->L, &k, ir);
1060 emit_movmroi(as, dest, 4, k.u32.hi);
1061 emit_movmroi(as, dest, 0, k.u32.lo);
1062 } else {
1063 /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */
1064 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
1065 if (irt_is64(ir->t)) {
1066 emit_u32(as, irt_toitype(ir->t) << 15);
1067 emit_rmro(as, XO_ARITHi, XOg_OR, dest, 4);
1068 } else {
1069 /* Currently, no caller passes integers that might end up here. */
1070 emit_movmroi(as, dest, 4, (irt_toitype(ir->t) << 15));
1071 }
1072 emit_movtomro(as, REX_64IR(ir, src), dest, 0);
1073 }
1074#else
1075 if (!irref_isk(ref)) {
1076 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
1077 emit_movtomro(as, REX_64IR(ir, src), dest, 0);
1078 } else if (!irt_ispri(ir->t)) {
1079 emit_movmroi(as, dest, 0, ir->i);
1080 }
1081 if (!(LJ_64 && irt_islightud(ir->t)))
1082 emit_movmroi(as, dest, 4, irt_toitype(ir->t));
1083#endif
1084 emit_loada(as, dest, &J2G(as->J)->tmptv);
948 } 1085 }
949} 1086}
950 1087
951/* -- Memory references --------------------------------------------------- */
952
953static void asm_aref(ASMState *as, IRIns *ir) 1088static void asm_aref(ASMState *as, IRIns *ir)
954{ 1089{
955 Reg dest = ra_dest(as, ir, RSET_GPR); 1090 Reg dest = ra_dest(as, ir, RSET_GPR);
956 asm_fusearef(as, ir, RSET_GPR); 1091 asm_fusearef(as, ir, RSET_GPR);
957 if (!(as->mrm.idx == RID_NONE && as->mrm.ofs == 0)) 1092 if (!(as->mrm.idx == RID_NONE && as->mrm.ofs == 0))
958 emit_mrm(as, XO_LEA, dest, RID_MRM); 1093 emit_mrm(as, XO_LEA, dest|REX_GC64, RID_MRM);
959 else if (as->mrm.base != dest) 1094 else if (as->mrm.base != dest)
960 emit_rr(as, XO_MOV, dest, as->mrm.base); 1095 emit_rr(as, XO_MOV, dest|REX_GC64, as->mrm.base);
961}
962
963/* Merge NE(HREF, niltv) check. */
964static MCode *merge_href_niltv(ASMState *as, IRIns *ir)
965{
966 /* Assumes nothing else generates NE of HREF. */
967 if ((ir[1].o == IR_NE || ir[1].o == IR_EQ) && ir[1].op1 == as->curins &&
968 ra_hasreg(ir->r)) {
969 MCode *p = as->mcp;
970 p += (LJ_64 && *p != XI_ARITHi) ? 7+6 : 6+6;
971 /* Ensure no loop branch inversion happened. */
972 if (p[-6] == 0x0f && p[-5] == XI_JCCn+(CC_NE^(ir[1].o & 1))) {
973 as->mcp = p; /* Kill cmp reg, imm32 + jz exit. */
974 return p + *(int32_t *)(p-4); /* Return exit address. */
975 }
976 }
977 return NULL;
978} 1096}
979 1097
980/* Inlined hash lookup. Specialized for key type and for const keys. 1098/* Inlined hash lookup. Specialized for key type and for const keys.
@@ -985,10 +1103,10 @@ static MCode *merge_href_niltv(ASMState *as, IRIns *ir)
985** } while ((n = nextnode(n))); 1103** } while ((n = nextnode(n)));
986** return niltv(L); 1104** return niltv(L);
987*/ 1105*/
988static void asm_href(ASMState *as, IRIns *ir) 1106static void asm_href(ASMState *as, IRIns *ir, IROp merge)
989{ 1107{
990 MCode *nilexit = merge_href_niltv(as, ir); /* Do this before any restores. */
991 RegSet allow = RSET_GPR; 1108 RegSet allow = RSET_GPR;
1109 int destused = ra_used(ir);
992 Reg dest = ra_dest(as, ir, allow); 1110 Reg dest = ra_dest(as, ir, allow);
993 Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); 1111 Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
994 Reg key = RID_NONE, tmp = RID_NONE; 1112 Reg key = RID_NONE, tmp = RID_NONE;
@@ -1001,28 +1119,26 @@ static void asm_href(ASMState *as, IRIns *ir)
1001 if (!isk) { 1119 if (!isk) {
1002 rset_clear(allow, tab); 1120 rset_clear(allow, tab);
1003 key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow); 1121 key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow);
1004 if (!irt_isstr(kt)) 1122 if (LJ_GC64 || !irt_isstr(kt))
1005 tmp = ra_scratch(as, rset_exclude(allow, key)); 1123 tmp = ra_scratch(as, rset_exclude(allow, key));
1006 } 1124 }
1007 1125
1008 /* Key not found in chain: jump to exit (if merged with NE) or load niltv. */ 1126 /* Key not found in chain: jump to exit (if merged) or load niltv. */
1009 l_end = emit_label(as); 1127 l_end = emit_label(as);
1010 if (nilexit && ir[1].o == IR_NE) { 1128 if (merge == IR_NE)
1011 emit_jcc(as, CC_E, nilexit); /* XI_JMP is not found by lj_asm_patchexit. */ 1129 asm_guardcc(as, CC_E); /* XI_JMP is not found by lj_asm_patchexit. */
1012 nilexit = NULL; 1130 else if (destused)
1013 } else {
1014 emit_loada(as, dest, niltvg(J2G(as->J))); 1131 emit_loada(as, dest, niltvg(J2G(as->J)));
1015 }
1016 1132
1017 /* Follow hash chain until the end. */ 1133 /* Follow hash chain until the end. */
1018 l_loop = emit_sjcc_label(as, CC_NZ); 1134 l_loop = emit_sjcc_label(as, CC_NZ);
1019 emit_rr(as, XO_TEST, dest, dest); 1135 emit_rr(as, XO_TEST, dest|REX_GC64, dest);
1020 emit_rmro(as, XO_MOV, dest, dest, offsetof(Node, next)); 1136 emit_rmro(as, XO_MOV, dest|REX_GC64, dest, offsetof(Node, next));
1021 l_next = emit_label(as); 1137 l_next = emit_label(as);
1022 1138
1023 /* Type and value comparison. */ 1139 /* Type and value comparison. */
1024 if (nilexit) 1140 if (merge == IR_EQ)
1025 emit_jcc(as, CC_E, nilexit); 1141 asm_guardcc(as, CC_E);
1026 else 1142 else
1027 emit_sjcc(as, CC_E, l_end); 1143 emit_sjcc(as, CC_E, l_end);
1028 if (irt_isnum(kt)) { 1144 if (irt_isnum(kt)) {
@@ -1038,7 +1154,7 @@ static void asm_href(ASMState *as, IRIns *ir)
1038 emit_rmro(as, XO_UCOMISD, key, dest, offsetof(Node, key.n)); 1154 emit_rmro(as, XO_UCOMISD, key, dest, offsetof(Node, key.n));
1039 emit_sjcc(as, CC_AE, l_next); 1155 emit_sjcc(as, CC_AE, l_next);
1040 /* The type check avoids NaN penalties and complaints from Valgrind. */ 1156 /* The type check avoids NaN penalties and complaints from Valgrind. */
1041#if LJ_64 1157#if LJ_64 && !LJ_GC64
1042 emit_u32(as, LJ_TISNUM); 1158 emit_u32(as, LJ_TISNUM);
1043 emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it)); 1159 emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it));
1044#else 1160#else
@@ -1046,10 +1162,28 @@ static void asm_href(ASMState *as, IRIns *ir)
1046 emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); 1162 emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it));
1047#endif 1163#endif
1048 } 1164 }
1049#if LJ_64 1165#if LJ_64 && !LJ_GC64
1050 } else if (irt_islightud(kt)) { 1166 } else if (irt_islightud(kt)) {
1051 emit_rmro(as, XO_CMP, key|REX_64, dest, offsetof(Node, key.u64)); 1167 emit_rmro(as, XO_CMP, key|REX_64, dest, offsetof(Node, key.u64));
1052#endif 1168#endif
1169#if LJ_GC64
1170 } else if (irt_isaddr(kt)) {
1171 if (isk) {
1172 TValue k;
1173 k.u64 = ((uint64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64;
1174 emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.lo),
1175 k.u32.lo);
1176 emit_sjcc(as, CC_NE, l_next);
1177 emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.hi),
1178 k.u32.hi);
1179 } else {
1180 emit_rmro(as, XO_CMP, tmp|REX_64, dest, offsetof(Node, key.u64));
1181 }
1182 } else {
1183 lua_assert(irt_ispri(kt) && !irt_isnil(kt));
1184 emit_u32(as, (irt_toitype(kt)<<15)|0x7fff);
1185 emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it));
1186#else
1053 } else { 1187 } else {
1054 if (!irt_ispri(kt)) { 1188 if (!irt_ispri(kt)) {
1055 lua_assert(irt_isaddr(kt)); 1189 lua_assert(irt_isaddr(kt));
@@ -1063,16 +1197,23 @@ static void asm_href(ASMState *as, IRIns *ir)
1063 lua_assert(!irt_isnil(kt)); 1197 lua_assert(!irt_isnil(kt));
1064 emit_i8(as, irt_toitype(kt)); 1198 emit_i8(as, irt_toitype(kt));
1065 emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); 1199 emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it));
1200#endif
1066 } 1201 }
1067 emit_sfixup(as, l_loop); 1202 emit_sfixup(as, l_loop);
1068 checkmclim(as); 1203 checkmclim(as);
1204#if LJ_GC64
1205 if (!isk && irt_isaddr(kt)) {
1206 emit_rr(as, XO_OR, tmp|REX_64, key);
1207 emit_loadu64(as, tmp, (uint64_t)irt_toitype(kt) << 47);
1208 }
1209#endif
1069 1210
1070 /* Load main position relative to tab->node into dest. */ 1211 /* Load main position relative to tab->node into dest. */
1071 khash = isk ? ir_khash(irkey) : 1; 1212 khash = isk ? ir_khash(irkey) : 1;
1072 if (khash == 0) { 1213 if (khash == 0) {
1073 emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, node)); 1214 emit_rmro(as, XO_MOV, dest|REX_GC64, tab, offsetof(GCtab, node));
1074 } else { 1215 } else {
1075 emit_rmro(as, XO_ARITH(XOg_ADD), dest, tab, offsetof(GCtab, node)); 1216 emit_rmro(as, XO_ARITH(XOg_ADD), dest|REX_GC64, tab, offsetof(GCtab,node));
1076 if ((as->flags & JIT_F_PREFER_IMUL)) { 1217 if ((as->flags & JIT_F_PREFER_IMUL)) {
1077 emit_i8(as, sizeof(Node)); 1218 emit_i8(as, sizeof(Node));
1078 emit_rr(as, XO_IMULi8, dest, dest); 1219 emit_rr(as, XO_IMULi8, dest, dest);
@@ -1107,7 +1248,19 @@ static void asm_href(ASMState *as, IRIns *ir)
1107#endif 1248#endif
1108 } else { 1249 } else {
1109 emit_rr(as, XO_MOV, tmp, key); 1250 emit_rr(as, XO_MOV, tmp, key);
1251#if LJ_GC64
1252 checkmclim(as);
1253 emit_gri(as, XG_ARITHi(XOg_XOR), dest, irt_toitype(kt) << 15);
1254 if ((as->flags & JIT_F_BMI2)) {
1255 emit_i8(as, 32);
1256 emit_mrm(as, XV_RORX|VEX_64, dest, key);
1257 } else {
1258 emit_shifti(as, XOg_SHR|REX_64, dest, 32);
1259 emit_rr(as, XO_MOV, dest|REX_64, key|REX_64);
1260 }
1261#else
1110 emit_rmro(as, XO_LEA, dest, key, HASH_BIAS); 1262 emit_rmro(as, XO_LEA, dest, key, HASH_BIAS);
1263#endif
1111 } 1264 }
1112 } 1265 }
1113 } 1266 }
@@ -1127,11 +1280,11 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
1127 if (ra_hasreg(dest)) { 1280 if (ra_hasreg(dest)) {
1128 if (ofs != 0) { 1281 if (ofs != 0) {
1129 if (dest == node && !(as->flags & JIT_F_LEA_AGU)) 1282 if (dest == node && !(as->flags & JIT_F_LEA_AGU))
1130 emit_gri(as, XG_ARITHi(XOg_ADD), dest, ofs); 1283 emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, ofs);
1131 else 1284 else
1132 emit_rmro(as, XO_LEA, dest, node, ofs); 1285 emit_rmro(as, XO_LEA, dest|REX_GC64, node, ofs);
1133 } else if (dest != node) { 1286 } else if (dest != node) {
1134 emit_rr(as, XO_MOV, dest, node); 1287 emit_rr(as, XO_MOV, dest|REX_GC64, node);
1135 } 1288 }
1136 } 1289 }
1137 asm_guardcc(as, CC_NE); 1290 asm_guardcc(as, CC_NE);
@@ -1143,13 +1296,24 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
1143 lua_assert(irt_isnum(irkey->t) || irt_isgcv(irkey->t)); 1296 lua_assert(irt_isnum(irkey->t) || irt_isgcv(irkey->t));
1144 /* Assumes -0.0 is already canonicalized to +0.0. */ 1297 /* Assumes -0.0 is already canonicalized to +0.0. */
1145 emit_loadu64(as, key, irt_isnum(irkey->t) ? ir_knum(irkey)->u64 : 1298 emit_loadu64(as, key, irt_isnum(irkey->t) ? ir_knum(irkey)->u64 :
1299#if LJ_GC64
1300 ((uint64_t)irt_toitype(irkey->t) << 47) |
1301 (uint64_t)ir_kgc(irkey));
1302#else
1146 ((uint64_t)irt_toitype(irkey->t) << 32) | 1303 ((uint64_t)irt_toitype(irkey->t) << 32) |
1147 (uint64_t)(uint32_t)ptr2addr(ir_kgc(irkey))); 1304 (uint64_t)(uint32_t)ptr2addr(ir_kgc(irkey)));
1305#endif
1148 } else { 1306 } else {
1149 lua_assert(!irt_isnil(irkey->t)); 1307 lua_assert(!irt_isnil(irkey->t));
1308#if LJ_GC64
1309 emit_i32(as, (irt_toitype(irkey->t)<<15)|0x7fff);
1310 emit_rmro(as, XO_ARITHi, XOg_CMP, node,
1311 ofs + (int32_t)offsetof(Node, key.it));
1312#else
1150 emit_i8(as, irt_toitype(irkey->t)); 1313 emit_i8(as, irt_toitype(irkey->t));
1151 emit_rmro(as, XO_ARITHi8, XOg_CMP, node, 1314 emit_rmro(as, XO_ARITHi8, XOg_CMP, node,
1152 ofs + (int32_t)offsetof(Node, key.it)); 1315 ofs + (int32_t)offsetof(Node, key.it));
1316#endif
1153 } 1317 }
1154#else 1318#else
1155 l_exit = emit_label(as); 1319 l_exit = emit_label(as);
@@ -1178,61 +1342,27 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
1178#endif 1342#endif
1179} 1343}
1180 1344
1181static void asm_newref(ASMState *as, IRIns *ir)
1182{
1183 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
1184 IRRef args[3];
1185 IRIns *irkey;
1186 Reg tmp;
1187 if (ir->r == RID_SINK)
1188 return;
1189 args[0] = ASMREF_L; /* lua_State *L */
1190 args[1] = ir->op1; /* GCtab *t */
1191 args[2] = ASMREF_TMP1; /* cTValue *key */
1192 asm_setupresult(as, ir, ci); /* TValue * */
1193 asm_gencall(as, ci, args);
1194 tmp = ra_releasetmp(as, ASMREF_TMP1);
1195 irkey = IR(ir->op2);
1196 if (irt_isnum(irkey->t)) {
1197 /* For numbers use the constant itself or a spill slot as a TValue. */
1198 if (irref_isk(ir->op2))
1199 emit_loada(as, tmp, ir_knum(irkey));
1200 else
1201 emit_rmro(as, XO_LEA, tmp|REX_64, RID_ESP, ra_spill(as, irkey));
1202 } else {
1203 /* Otherwise use g->tmptv to hold the TValue. */
1204 if (!irref_isk(ir->op2)) {
1205 Reg src = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, tmp));
1206 emit_movtomro(as, REX_64IR(irkey, src), tmp, 0);
1207 } else if (!irt_ispri(irkey->t)) {
1208 emit_movmroi(as, tmp, 0, irkey->i);
1209 }
1210 if (!(LJ_64 && irt_islightud(irkey->t)))
1211 emit_movmroi(as, tmp, 4, irt_toitype(irkey->t));
1212 emit_loada(as, tmp, &J2G(as->J)->tmptv);
1213 }
1214}
1215
1216static void asm_uref(ASMState *as, IRIns *ir) 1345static void asm_uref(ASMState *as, IRIns *ir)
1217{ 1346{
1218 Reg dest = ra_dest(as, ir, RSET_GPR); 1347 Reg dest = ra_dest(as, ir, RSET_GPR);
1219 if (irref_isk(ir->op1)) { 1348 if (irref_isk(ir->op1)) {
1220 GCfunc *fn = ir_kfunc(IR(ir->op1)); 1349 GCfunc *fn = ir_kfunc(IR(ir->op1));
1221 MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; 1350 MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
1222 emit_rma(as, XO_MOV, dest, v); 1351 emit_rma(as, XO_MOV, dest|REX_GC64, v);
1223 } else { 1352 } else {
1224 Reg uv = ra_scratch(as, RSET_GPR); 1353 Reg uv = ra_scratch(as, RSET_GPR);
1225 Reg func = ra_alloc1(as, ir->op1, RSET_GPR); 1354 Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
1226 if (ir->o == IR_UREFC) { 1355 if (ir->o == IR_UREFC) {
1227 emit_rmro(as, XO_LEA, dest, uv, offsetof(GCupval, tv)); 1356 emit_rmro(as, XO_LEA, dest|REX_GC64, uv, offsetof(GCupval, tv));
1228 asm_guardcc(as, CC_NE); 1357 asm_guardcc(as, CC_NE);
1229 emit_i8(as, 1); 1358 emit_i8(as, 1);
1230 emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed)); 1359 emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed));
1231 } else { 1360 } else {
1232 emit_rmro(as, XO_MOV, dest, uv, offsetof(GCupval, v)); 1361 emit_rmro(as, XO_MOV, dest|REX_GC64, uv, offsetof(GCupval, v));
1233 } 1362 }
1234 emit_rmro(as, XO_MOV, uv, func, 1363 emit_rmro(as, XO_MOV, uv|REX_GC64, func,
1235 (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); 1364 (int32_t)offsetof(GCfuncL, uvptr) +
1365 (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
1236 } 1366 }
1237} 1367}
1238 1368
@@ -1250,9 +1380,9 @@ static void asm_strref(ASMState *as, IRIns *ir)
1250 if (as->mrm.base == RID_NONE) 1380 if (as->mrm.base == RID_NONE)
1251 emit_loadi(as, dest, as->mrm.ofs); 1381 emit_loadi(as, dest, as->mrm.ofs);
1252 else if (as->mrm.base == dest && as->mrm.idx == RID_NONE) 1382 else if (as->mrm.base == dest && as->mrm.idx == RID_NONE)
1253 emit_gri(as, XG_ARITHi(XOg_ADD), dest, as->mrm.ofs); 1383 emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, as->mrm.ofs);
1254 else 1384 else
1255 emit_mrm(as, XO_LEA, dest, RID_MRM); 1385 emit_mrm(as, XO_LEA, dest|REX_GC64, RID_MRM);
1256} 1386}
1257 1387
1258/* -- Loads and stores ---------------------------------------------------- */ 1388/* -- Loads and stores ---------------------------------------------------- */
@@ -1271,7 +1401,7 @@ static void asm_fxload(ASMState *as, IRIns *ir)
1271 case IRT_U8: xo = XO_MOVZXb; break; 1401 case IRT_U8: xo = XO_MOVZXb; break;
1272 case IRT_I16: xo = XO_MOVSXw; break; 1402 case IRT_I16: xo = XO_MOVSXw; break;
1273 case IRT_U16: xo = XO_MOVZXw; break; 1403 case IRT_U16: xo = XO_MOVZXw; break;
1274 case IRT_NUM: xo = XMM_MOVRM(as); break; 1404 case IRT_NUM: xo = XO_MOVSD; break;
1275 case IRT_FLOAT: xo = XO_MOVSS; break; 1405 case IRT_FLOAT: xo = XO_MOVSS; break;
1276 default: 1406 default:
1277 if (LJ_64 && irt_is64(ir->t)) 1407 if (LJ_64 && irt_is64(ir->t))
@@ -1284,6 +1414,9 @@ static void asm_fxload(ASMState *as, IRIns *ir)
1284 emit_mrm(as, xo, dest, RID_MRM); 1414 emit_mrm(as, xo, dest, RID_MRM);
1285} 1415}
1286 1416
1417#define asm_fload(as, ir) asm_fxload(as, ir)
1418#define asm_xload(as, ir) asm_fxload(as, ir)
1419
1287static void asm_fxstore(ASMState *as, IRIns *ir) 1420static void asm_fxstore(ASMState *as, IRIns *ir)
1288{ 1421{
1289 RegSet allow = RSET_GPR; 1422 RegSet allow = RSET_GPR;
@@ -1318,7 +1451,7 @@ static void asm_fxstore(ASMState *as, IRIns *ir)
1318 case IRT_I16: case IRT_U16: xo = XO_MOVtow; break; 1451 case IRT_I16: case IRT_U16: xo = XO_MOVtow; break;
1319 case IRT_NUM: xo = XO_MOVSDto; break; 1452 case IRT_NUM: xo = XO_MOVSDto; break;
1320 case IRT_FLOAT: xo = XO_MOVSSto; break; 1453 case IRT_FLOAT: xo = XO_MOVSSto; break;
1321#if LJ_64 1454#if LJ_64 && !LJ_GC64
1322 case IRT_LIGHTUD: lua_assert(0); /* NYI: mask 64 bit lightuserdata. */ 1455 case IRT_LIGHTUD: lua_assert(0); /* NYI: mask 64 bit lightuserdata. */
1323#endif 1456#endif
1324 default: 1457 default:
@@ -1347,7 +1480,10 @@ static void asm_fxstore(ASMState *as, IRIns *ir)
1347 } 1480 }
1348} 1481}
1349 1482
1350#if LJ_64 1483#define asm_fstore(as, ir) asm_fxstore(as, ir)
1484#define asm_xstore(as, ir) asm_fxstore(as, ir)
1485
1486#if LJ_64 && !LJ_GC64
1351static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) 1487static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck)
1352{ 1488{
1353 if (ra_used(ir) || typecheck) { 1489 if (ra_used(ir) || typecheck) {
@@ -1369,9 +1505,12 @@ static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck)
1369 1505
1370static void asm_ahuvload(ASMState *as, IRIns *ir) 1506static void asm_ahuvload(ASMState *as, IRIns *ir)
1371{ 1507{
1508#if LJ_GC64
1509 Reg tmp = RID_NONE;
1510#endif
1372 lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || 1511 lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) ||
1373 (LJ_DUALNUM && irt_isint(ir->t))); 1512 (LJ_DUALNUM && irt_isint(ir->t)));
1374#if LJ_64 1513#if LJ_64 && !LJ_GC64
1375 if (irt_islightud(ir->t)) { 1514 if (irt_islightud(ir->t)) {
1376 Reg dest = asm_load_lightud64(as, ir, 1); 1515 Reg dest = asm_load_lightud64(as, ir, 1);
1377 if (ra_hasreg(dest)) { 1516 if (ra_hasreg(dest)) {
@@ -1385,20 +1524,64 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
1385 RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; 1524 RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
1386 Reg dest = ra_dest(as, ir, allow); 1525 Reg dest = ra_dest(as, ir, allow);
1387 asm_fuseahuref(as, ir->op1, RSET_GPR); 1526 asm_fuseahuref(as, ir->op1, RSET_GPR);
1388 emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), dest, RID_MRM); 1527#if LJ_GC64
1528 if (irt_isaddr(ir->t)) {
1529 emit_shifti(as, XOg_SHR|REX_64, dest, 17);
1530 asm_guardcc(as, CC_NE);
1531 emit_i8(as, irt_toitype(ir->t));
1532 emit_rr(as, XO_ARITHi8, XOg_CMP, dest);
1533 emit_i8(as, XI_O16);
1534 if ((as->flags & JIT_F_BMI2)) {
1535 emit_i8(as, 47);
1536 emit_mrm(as, XV_RORX|VEX_64, dest, RID_MRM);
1537 } else {
1538 emit_shifti(as, XOg_ROR|REX_64, dest, 47);
1539 emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM);
1540 }
1541 return;
1542 } else
1543#endif
1544 emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XO_MOVSD, dest, RID_MRM);
1389 } else { 1545 } else {
1390 asm_fuseahuref(as, ir->op1, RSET_GPR); 1546 RegSet gpr = RSET_GPR;
1547#if LJ_GC64
1548 if (irt_isaddr(ir->t)) {
1549 tmp = ra_scratch(as, RSET_GPR);
1550 gpr = rset_exclude(gpr, tmp);
1551 }
1552#endif
1553 asm_fuseahuref(as, ir->op1, gpr);
1391 } 1554 }
1392 /* Always do the type check, even if the load result is unused. */ 1555 /* Always do the type check, even if the load result is unused. */
1393 as->mrm.ofs += 4; 1556 as->mrm.ofs += 4;
1394 asm_guardcc(as, irt_isnum(ir->t) ? CC_AE : CC_NE); 1557 asm_guardcc(as, irt_isnum(ir->t) ? CC_AE : CC_NE);
1395 if (LJ_64 && irt_type(ir->t) >= IRT_NUM) { 1558 if (LJ_64 && irt_type(ir->t) >= IRT_NUM) {
1396 lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t)); 1559 lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t));
1560#if LJ_GC64
1561 emit_u32(as, LJ_TISNUM << 15);
1562#else
1397 emit_u32(as, LJ_TISNUM); 1563 emit_u32(as, LJ_TISNUM);
1564#endif
1398 emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM); 1565 emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM);
1566#if LJ_GC64
1567 } else if (irt_isaddr(ir->t)) {
1568 as->mrm.ofs -= 4;
1569 emit_i8(as, irt_toitype(ir->t));
1570 emit_mrm(as, XO_ARITHi8, XOg_CMP, tmp);
1571 emit_shifti(as, XOg_SAR|REX_64, tmp, 47);
1572 emit_mrm(as, XO_MOV, tmp|REX_64, RID_MRM);
1573 } else if (irt_isnil(ir->t)) {
1574 as->mrm.ofs -= 4;
1575 emit_i8(as, -1);
1576 emit_mrm(as, XO_ARITHi8, XOg_CMP|REX_64, RID_MRM);
1577 } else {
1578 emit_u32(as, (irt_toitype(ir->t) << 15) | 0x7fff);
1579 emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM);
1580#else
1399 } else { 1581 } else {
1400 emit_i8(as, irt_toitype(ir->t)); 1582 emit_i8(as, irt_toitype(ir->t));
1401 emit_mrm(as, XO_ARITHi8, XOg_CMP, RID_MRM); 1583 emit_mrm(as, XO_ARITHi8, XOg_CMP, RID_MRM);
1584#endif
1402 } 1585 }
1403} 1586}
1404 1587
@@ -1410,12 +1593,28 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
1410 Reg src = ra_alloc1(as, ir->op2, RSET_FPR); 1593 Reg src = ra_alloc1(as, ir->op2, RSET_FPR);
1411 asm_fuseahuref(as, ir->op1, RSET_GPR); 1594 asm_fuseahuref(as, ir->op1, RSET_GPR);
1412 emit_mrm(as, XO_MOVSDto, src, RID_MRM); 1595 emit_mrm(as, XO_MOVSDto, src, RID_MRM);
1413#if LJ_64 1596#if LJ_64 && !LJ_GC64
1414 } else if (irt_islightud(ir->t)) { 1597 } else if (irt_islightud(ir->t)) {
1415 Reg src = ra_alloc1(as, ir->op2, RSET_GPR); 1598 Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
1416 asm_fuseahuref(as, ir->op1, rset_exclude(RSET_GPR, src)); 1599 asm_fuseahuref(as, ir->op1, rset_exclude(RSET_GPR, src));
1417 emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM); 1600 emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM);
1418#endif 1601#endif
1602#if LJ_GC64
1603 } else if (irref_isk(ir->op2)) {
1604 TValue k;
1605 lj_ir_kvalue(as->J->L, &k, IR(ir->op2));
1606 asm_fuseahuref(as, ir->op1, RSET_GPR);
1607 if (tvisnil(&k)) {
1608 emit_i32(as, -1);
1609 emit_mrm(as, XO_MOVmi, REX_64, RID_MRM);
1610 } else {
1611 emit_u32(as, k.u32.lo);
1612 emit_mrm(as, XO_MOVmi, 0, RID_MRM);
1613 as->mrm.ofs += 4;
1614 emit_u32(as, k.u32.hi);
1615 emit_mrm(as, XO_MOVmi, 0, RID_MRM);
1616 }
1617#endif
1419 } else { 1618 } else {
1420 IRIns *irr = IR(ir->op2); 1619 IRIns *irr = IR(ir->op2);
1421 RegSet allow = RSET_GPR; 1620 RegSet allow = RSET_GPR;
@@ -1426,6 +1625,17 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
1426 } 1625 }
1427 asm_fuseahuref(as, ir->op1, allow); 1626 asm_fuseahuref(as, ir->op1, allow);
1428 if (ra_hasreg(src)) { 1627 if (ra_hasreg(src)) {
1628#if LJ_GC64
1629 if (!(LJ_DUALNUM && irt_isinteger(ir->t))) {
1630 /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */
1631 as->mrm.ofs += 4;
1632 emit_u32(as, irt_toitype(ir->t) << 15);
1633 emit_mrm(as, XO_ARITHi, XOg_OR, RID_MRM);
1634 as->mrm.ofs -= 4;
1635 emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM);
1636 return;
1637 }
1638#endif
1429 emit_mrm(as, XO_MOVto, src, RID_MRM); 1639 emit_mrm(as, XO_MOVto, src, RID_MRM);
1430 } else if (!irt_ispri(irr->t)) { 1640 } else if (!irt_ispri(irr->t)) {
1431 lua_assert(irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isinteger(ir->t))); 1641 lua_assert(irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isinteger(ir->t)));
@@ -1433,14 +1643,20 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
1433 emit_mrm(as, XO_MOVmi, 0, RID_MRM); 1643 emit_mrm(as, XO_MOVmi, 0, RID_MRM);
1434 } 1644 }
1435 as->mrm.ofs += 4; 1645 as->mrm.ofs += 4;
1646#if LJ_GC64
1647 lua_assert(LJ_DUALNUM && irt_isinteger(ir->t));
1648 emit_i32(as, LJ_TNUMX << 15);
1649#else
1436 emit_i32(as, (int32_t)irt_toitype(ir->t)); 1650 emit_i32(as, (int32_t)irt_toitype(ir->t));
1651#endif
1437 emit_mrm(as, XO_MOVmi, 0, RID_MRM); 1652 emit_mrm(as, XO_MOVmi, 0, RID_MRM);
1438 } 1653 }
1439} 1654}
1440 1655
1441static void asm_sload(ASMState *as, IRIns *ir) 1656static void asm_sload(ASMState *as, IRIns *ir)
1442{ 1657{
1443 int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0); 1658 int32_t ofs = 8*((int32_t)ir->op1-1-LJ_FR2) +
1659 (!LJ_FR2 && (ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
1444 IRType1 t = ir->t; 1660 IRType1 t = ir->t;
1445 Reg base; 1661 Reg base;
1446 lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ 1662 lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */
@@ -1451,9 +1667,9 @@ static void asm_sload(ASMState *as, IRIns *ir)
1451 Reg left = ra_scratch(as, RSET_FPR); 1667 Reg left = ra_scratch(as, RSET_FPR);
1452 asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */ 1668 asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */
1453 base = ra_alloc1(as, REF_BASE, RSET_GPR); 1669 base = ra_alloc1(as, REF_BASE, RSET_GPR);
1454 emit_rmro(as, XMM_MOVRM(as), left, base, ofs); 1670 emit_rmro(as, XO_MOVSD, left, base, ofs);
1455 t.irt = IRT_NUM; /* Continue with a regular number type check. */ 1671 t.irt = IRT_NUM; /* Continue with a regular number type check. */
1456#if LJ_64 1672#if LJ_64 && !LJ_GC64
1457 } else if (irt_islightud(t)) { 1673 } else if (irt_islightud(t)) {
1458 Reg dest = asm_load_lightud64(as, ir, (ir->op2 & IRSLOAD_TYPECHECK)); 1674 Reg dest = asm_load_lightud64(as, ir, (ir->op2 & IRSLOAD_TYPECHECK));
1459 if (ra_hasreg(dest)) { 1675 if (ra_hasreg(dest)) {
@@ -1469,11 +1685,39 @@ static void asm_sload(ASMState *as, IRIns *ir)
1469 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); 1685 lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
1470 if ((ir->op2 & IRSLOAD_CONVERT)) { 1686 if ((ir->op2 & IRSLOAD_CONVERT)) {
1471 t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */ 1687 t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */
1472 emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTSD2SI, dest, base, ofs); 1688 emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTTSD2SI, dest, base, ofs);
1473 } else if (irt_isnum(t)) {
1474 emit_rmro(as, XMM_MOVRM(as), dest, base, ofs);
1475 } else { 1689 } else {
1476 emit_rmro(as, XO_MOV, dest, base, ofs); 1690#if LJ_GC64
1691 if (irt_isaddr(t)) {
1692 /* LJ_GC64 type check + tag removal without BMI2 and with BMI2:
1693 **
1694 ** mov r64, [addr] rorx r64, [addr], 47
1695 ** ror r64, 47
1696 ** cmp r16, itype cmp r16, itype
1697 ** jne ->exit jne ->exit
1698 ** shr r64, 16 shr r64, 16
1699 */
1700 emit_shifti(as, XOg_SHR|REX_64, dest, 17);
1701 if ((ir->op2 & IRSLOAD_TYPECHECK)) {
1702 asm_guardcc(as, CC_NE);
1703 emit_i8(as, irt_toitype(t));
1704 emit_rr(as, XO_ARITHi8, XOg_CMP, dest);
1705 emit_i8(as, XI_O16);
1706 }
1707 if ((as->flags & JIT_F_BMI2)) {
1708 emit_i8(as, 47);
1709 emit_rmro(as, XV_RORX|VEX_64, dest, base, ofs);
1710 } else {
1711 if ((ir->op2 & IRSLOAD_TYPECHECK))
1712 emit_shifti(as, XOg_ROR|REX_64, dest, 47);
1713 else
1714 emit_shifti(as, XOg_SHL|REX_64, dest, 17);
1715 emit_rmro(as, XO_MOV, dest|REX_64, base, ofs);
1716 }
1717 return;
1718 } else
1719#endif
1720 emit_rmro(as, irt_isnum(t) ? XO_MOVSD : XO_MOV, dest, base, ofs);
1477 } 1721 }
1478 } else { 1722 } else {
1479 if (!(ir->op2 & IRSLOAD_TYPECHECK)) 1723 if (!(ir->op2 & IRSLOAD_TYPECHECK))
@@ -1485,11 +1729,42 @@ static void asm_sload(ASMState *as, IRIns *ir)
1485 asm_guardcc(as, irt_isnum(t) ? CC_AE : CC_NE); 1729 asm_guardcc(as, irt_isnum(t) ? CC_AE : CC_NE);
1486 if (LJ_64 && irt_type(t) >= IRT_NUM) { 1730 if (LJ_64 && irt_type(t) >= IRT_NUM) {
1487 lua_assert(irt_isinteger(t) || irt_isnum(t)); 1731 lua_assert(irt_isinteger(t) || irt_isnum(t));
1732#if LJ_GC64
1733 emit_u32(as, LJ_TISNUM << 15);
1734#else
1488 emit_u32(as, LJ_TISNUM); 1735 emit_u32(as, LJ_TISNUM);
1736#endif
1737 emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4);
1738#if LJ_GC64
1739 } else if (irt_isnil(t)) {
1740 /* LJ_GC64 type check for nil:
1741 **
1742 ** cmp qword [addr], -1
1743 ** jne ->exit
1744 */
1745 emit_i8(as, -1);
1746 emit_rmro(as, XO_ARITHi8, XOg_CMP|REX_64, base, ofs);
1747 } else if (irt_ispri(t)) {
1748 emit_u32(as, (irt_toitype(t) << 15) | 0x7fff);
1489 emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4); 1749 emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4);
1490 } else { 1750 } else {
1751 /* LJ_GC64 type check only:
1752 **
1753 ** mov r64, [addr]
1754 ** sar r64, 47
1755 ** cmp r32, itype
1756 ** jne ->exit
1757 */
1758 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, base));
1759 emit_i8(as, irt_toitype(t));
1760 emit_rr(as, XO_ARITHi8, XOg_CMP, tmp);
1761 emit_shifti(as, XOg_SAR|REX_64, tmp, 47);
1762 emit_rmro(as, XO_MOV, tmp|REX_64, base, ofs);
1763#else
1764 } else {
1491 emit_i8(as, irt_toitype(t)); 1765 emit_i8(as, irt_toitype(t));
1492 emit_rmro(as, XO_ARITHi8, XOg_CMP, base, ofs+4); 1766 emit_rmro(as, XO_ARITHi8, XOg_CMP, base, ofs+4);
1767#endif
1493 } 1768 }
1494 } 1769 }
1495} 1770}
@@ -1500,15 +1775,13 @@ static void asm_sload(ASMState *as, IRIns *ir)
1500static void asm_cnew(ASMState *as, IRIns *ir) 1775static void asm_cnew(ASMState *as, IRIns *ir)
1501{ 1776{
1502 CTState *cts = ctype_ctsG(J2G(as->J)); 1777 CTState *cts = ctype_ctsG(J2G(as->J));
1503 CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; 1778 CTypeID id = (CTypeID)IR(ir->op1)->i;
1504 CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? 1779 CTSize sz;
1505 lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; 1780 CTInfo info = lj_ctype_info(cts, id, &sz);
1506 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; 1781 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
1507 IRRef args[2]; 1782 IRRef args[4];
1508 lua_assert(sz != CTSIZE_INVALID); 1783 lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
1509 1784
1510 args[0] = ASMREF_L; /* lua_State *L */
1511 args[1] = ASMREF_TMP1; /* MSize size */
1512 as->gcsteps++; 1785 as->gcsteps++;
1513 asm_setupresult(as, ir, ci); /* GCcdata * */ 1786 asm_setupresult(as, ir, ci); /* GCcdata * */
1514 1787
@@ -1519,8 +1792,9 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1519 Reg r64 = sz == 8 ? REX_64 : 0; 1792 Reg r64 = sz == 8 ? REX_64 : 0;
1520 if (irref_isk(ir->op2)) { 1793 if (irref_isk(ir->op2)) {
1521 IRIns *irk = IR(ir->op2); 1794 IRIns *irk = IR(ir->op2);
1522 uint64_t k = irk->o == IR_KINT64 ? ir_k64(irk)->u64 : 1795 uint64_t k = (irk->o == IR_KINT64 ||
1523 (uint64_t)(uint32_t)irk->i; 1796 (LJ_GC64 && (irk->o == IR_KPTR || irk->o == IR_KKPTR))) ?
1797 ir_k64(irk)->u64 : (uint64_t)(uint32_t)irk->i;
1524 if (sz == 4 || checki32((int64_t)k)) { 1798 if (sz == 4 || checki32((int64_t)k)) {
1525 emit_i32(as, (int32_t)k); 1799 emit_i32(as, (int32_t)k);
1526 emit_rmro(as, XO_MOVmi, r64, RID_RET, sizeof(GCcdata)); 1800 emit_rmro(as, XO_MOVmi, r64, RID_RET, sizeof(GCcdata));
@@ -1551,15 +1825,26 @@ static void asm_cnew(ASMState *as, IRIns *ir)
1551 } while (1); 1825 } while (1);
1552#endif 1826#endif
1553 lua_assert(sz == 4 || sz == 8); 1827 lua_assert(sz == 4 || sz == 8);
1828 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
1829 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
1830 args[0] = ASMREF_L; /* lua_State *L */
1831 args[1] = ir->op1; /* CTypeID id */
1832 args[2] = ir->op2; /* CTSize sz */
1833 args[3] = ASMREF_TMP1; /* CTSize align */
1834 asm_gencall(as, ci, args);
1835 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
1836 return;
1554 } 1837 }
1555 1838
1556 /* Combine initialization of marked, gct and ctypeid. */ 1839 /* Combine initialization of marked, gct and ctypeid. */
1557 emit_movtomro(as, RID_ECX, RID_RET, offsetof(GCcdata, marked)); 1840 emit_movtomro(as, RID_ECX, RID_RET, offsetof(GCcdata, marked));
1558 emit_gri(as, XG_ARITHi(XOg_OR), RID_ECX, 1841 emit_gri(as, XG_ARITHi(XOg_OR), RID_ECX,
1559 (int32_t)((~LJ_TCDATA<<8)+(ctypeid<<16))); 1842 (int32_t)((~LJ_TCDATA<<8)+(id<<16)));
1560 emit_gri(as, XG_ARITHi(XOg_AND), RID_ECX, LJ_GC_WHITES); 1843 emit_gri(as, XG_ARITHi(XOg_AND), RID_ECX, LJ_GC_WHITES);
1561 emit_opgl(as, XO_MOVZXb, RID_ECX, gc.currentwhite); 1844 emit_opgl(as, XO_MOVZXb, RID_ECX, gc.currentwhite);
1562 1845
1846 args[0] = ASMREF_L; /* lua_State *L */
1847 args[1] = ASMREF_TMP1; /* MSize size */
1563 asm_gencall(as, ci, args); 1848 asm_gencall(as, ci, args);
1564 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata))); 1849 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata)));
1565} 1850}
@@ -1574,7 +1859,7 @@ static void asm_tbar(ASMState *as, IRIns *ir)
1574 Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); 1859 Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
1575 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, tab)); 1860 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, tab));
1576 MCLabel l_end = emit_label(as); 1861 MCLabel l_end = emit_label(as);
1577 emit_movtomro(as, tmp, tab, offsetof(GCtab, gclist)); 1862 emit_movtomro(as, tmp|REX_GC64, tab, offsetof(GCtab, gclist));
1578 emit_setgl(as, tab, gc.grayagain); 1863 emit_setgl(as, tab, gc.grayagain);
1579 emit_getgl(as, tmp, gc.grayagain); 1864 emit_getgl(as, tmp, gc.grayagain);
1580 emit_i8(as, ~LJ_GC_BLACK); 1865 emit_i8(as, ~LJ_GC_BLACK);
@@ -1637,36 +1922,9 @@ static void asm_x87load(ASMState *as, IRRef ref)
1637 } 1922 }
1638} 1923}
1639 1924
1640/* Try to rejoin pow from EXP2, MUL and LOG2 (if still unsplit). */
1641static int fpmjoin_pow(ASMState *as, IRIns *ir)
1642{
1643 IRIns *irp = IR(ir->op1);
1644 if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
1645 IRIns *irpp = IR(irp->op1);
1646 if (irpp == ir-2 && irpp->o == IR_FPMATH &&
1647 irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
1648 /* The modified regs must match with the *.dasc implementation. */
1649 RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX);
1650 IRIns *irx;
1651 if (ra_hasreg(ir->r))
1652 rset_clear(drop, ir->r); /* Dest reg handled below. */
1653 ra_evictset(as, drop);
1654 ra_destreg(as, ir, RID_XMM0);
1655 emit_call(as, lj_vm_pow_sse);
1656 irx = IR(irpp->op1);
1657 if (ra_noreg(irx->r) && ra_gethint(irx->r) == RID_XMM1)
1658 irx->r = RID_INIT; /* Avoid allocating xmm1 for x. */
1659 ra_left(as, RID_XMM0, irpp->op1);
1660 ra_left(as, RID_XMM1, irp->op2);
1661 return 1;
1662 }
1663 }
1664 return 0;
1665}
1666
1667static void asm_fpmath(ASMState *as, IRIns *ir) 1925static void asm_fpmath(ASMState *as, IRIns *ir)
1668{ 1926{
1669 IRFPMathOp fpm = ir->o == IR_FPMATH ? (IRFPMathOp)ir->op2 : IRFPM_OTHER; 1927 IRFPMathOp fpm = (IRFPMathOp)ir->op2;
1670 if (fpm == IRFPM_SQRT) { 1928 if (fpm == IRFPM_SQRT) {
1671 Reg dest = ra_dest(as, ir, RSET_FPR); 1929 Reg dest = ra_dest(as, ir, RSET_FPR);
1672 Reg left = asm_fuseload(as, ir->op1, RSET_FPR); 1930 Reg left = asm_fuseload(as, ir->op1, RSET_FPR);
@@ -1697,51 +1955,29 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
1697 fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse); 1955 fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse);
1698 ra_left(as, RID_XMM0, ir->op1); 1956 ra_left(as, RID_XMM0, ir->op1);
1699 } 1957 }
1700 } else if (fpm == IRFPM_EXP2 && fpmjoin_pow(as, ir)) { 1958 } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) {
1701 /* Rejoined to pow(). */ 1959 /* Rejoined to pow(). */
1702 } else { /* Handle x87 ops. */ 1960 } else {
1703 int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ 1961 asm_callid(as, ir, IRCALL_lj_vm_floor + fpm);
1704 Reg dest = ir->r; 1962 }
1705 if (ra_hasreg(dest)) { 1963}
1706 ra_free(as, dest); 1964
1707 ra_modified(as, dest); 1965#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2)
1708 emit_rmro(as, XMM_MOVRM(as), dest, RID_ESP, ofs); 1966
1709 } 1967static void asm_ldexp(ASMState *as, IRIns *ir)
1710 emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs); 1968{
1711 switch (fpm) { /* st0 = lj_vm_*(st0) */ 1969 int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */
1712 case IRFPM_EXP: emit_call(as, lj_vm_exp_x87); break; 1970 Reg dest = ir->r;
1713 case IRFPM_EXP2: emit_call(as, lj_vm_exp2_x87); break; 1971 if (ra_hasreg(dest)) {
1714 case IRFPM_SIN: emit_x87op(as, XI_FSIN); break; 1972 ra_free(as, dest);
1715 case IRFPM_COS: emit_x87op(as, XI_FCOS); break; 1973 ra_modified(as, dest);
1716 case IRFPM_TAN: emit_x87op(as, XI_FPOP); emit_x87op(as, XI_FPTAN); break; 1974 emit_rmro(as, XO_MOVSD, dest, RID_ESP, ofs);
1717 case IRFPM_LOG: case IRFPM_LOG2: case IRFPM_LOG10:
1718 /* Note: the use of fyl2xp1 would be pointless here. When computing
1719 ** log(1.0+eps) the precision is already lost after 1.0 is added.
1720 ** Subtracting 1.0 won't recover it. OTOH math.log1p would make sense.
1721 */
1722 emit_x87op(as, XI_FYL2X); break;
1723 case IRFPM_OTHER:
1724 switch (ir->o) {
1725 case IR_ATAN2:
1726 emit_x87op(as, XI_FPATAN); asm_x87load(as, ir->op2); break;
1727 case IR_LDEXP:
1728 emit_x87op(as, XI_FPOP1); emit_x87op(as, XI_FSCALE); break;
1729 default: lua_assert(0); break;
1730 }
1731 break;
1732 default: lua_assert(0); break;
1733 }
1734 asm_x87load(as, ir->op1);
1735 switch (fpm) {
1736 case IRFPM_LOG: emit_x87op(as, XI_FLDLN2); break;
1737 case IRFPM_LOG2: emit_x87op(as, XI_FLD1); break;
1738 case IRFPM_LOG10: emit_x87op(as, XI_FLDLG2); break;
1739 case IRFPM_OTHER:
1740 if (ir->o == IR_LDEXP) asm_x87load(as, ir->op2);
1741 break;
1742 default: break;
1743 }
1744 } 1975 }
1976 emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
1977 emit_x87op(as, XI_FPOP1);
1978 emit_x87op(as, XI_FSCALE);
1979 asm_x87load(as, ir->op1);
1980 asm_x87load(as, ir->op2);
1745} 1981}
1746 1982
1747static void asm_fppowi(ASMState *as, IRIns *ir) 1983static void asm_fppowi(ASMState *as, IRIns *ir)
@@ -1757,26 +1993,15 @@ static void asm_fppowi(ASMState *as, IRIns *ir)
1757 ra_left(as, RID_EAX, ir->op2); 1993 ra_left(as, RID_EAX, ir->op2);
1758} 1994}
1759 1995
1760#if LJ_64 && LJ_HASFFI 1996static void asm_pow(ASMState *as, IRIns *ir)
1761static void asm_arith64(ASMState *as, IRIns *ir, IRCallID id)
1762{ 1997{
1763 const CCallInfo *ci = &lj_ir_callinfo[id]; 1998#if LJ_64 && LJ_HASFFI
1764 IRRef args[2]; 1999 if (!irt_isnum(ir->t))
1765 args[0] = ir->op1; 2000 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
1766 args[1] = ir->op2; 2001 IRCALL_lj_carith_powu64);
1767 asm_setupresult(as, ir, ci); 2002 else
1768 asm_gencall(as, ci, args);
1769}
1770#endif 2003#endif
1771 2004 asm_fppowi(as, ir);
1772static void asm_intmod(ASMState *as, IRIns *ir)
1773{
1774 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_vm_modi];
1775 IRRef args[2];
1776 args[0] = ir->op1;
1777 args[1] = ir->op2;
1778 asm_setupresult(as, ir, ci);
1779 asm_gencall(as, ci, args);
1780} 2005}
1781 2006
1782static int asm_swapops(ASMState *as, IRIns *ir) 2007static int asm_swapops(ASMState *as, IRIns *ir)
@@ -1959,6 +2184,44 @@ static void asm_add(ASMState *as, IRIns *ir)
1959 asm_intarith(as, ir, XOg_ADD); 2184 asm_intarith(as, ir, XOg_ADD);
1960} 2185}
1961 2186
2187static void asm_sub(ASMState *as, IRIns *ir)
2188{
2189 if (irt_isnum(ir->t))
2190 asm_fparith(as, ir, XO_SUBSD);
2191 else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */
2192 asm_intarith(as, ir, XOg_SUB);
2193}
2194
2195static void asm_mul(ASMState *as, IRIns *ir)
2196{
2197 if (irt_isnum(ir->t))
2198 asm_fparith(as, ir, XO_MULSD);
2199 else
2200 asm_intarith(as, ir, XOg_X_IMUL);
2201}
2202
2203static void asm_div(ASMState *as, IRIns *ir)
2204{
2205#if LJ_64 && LJ_HASFFI
2206 if (!irt_isnum(ir->t))
2207 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
2208 IRCALL_lj_carith_divu64);
2209 else
2210#endif
2211 asm_fparith(as, ir, XO_DIVSD);
2212}
2213
2214static void asm_mod(ASMState *as, IRIns *ir)
2215{
2216#if LJ_64 && LJ_HASFFI
2217 if (!irt_isint(ir->t))
2218 asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
2219 IRCALL_lj_carith_modu64);
2220 else
2221#endif
2222 asm_callid(as, ir, IRCALL_lj_vm_modi);
2223}
2224
1962static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg) 2225static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg)
1963{ 2226{
1964 Reg dest = ra_dest(as, ir, RSET_GPR); 2227 Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1966,7 +2229,17 @@ static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg)
1966 ra_left(as, dest, ir->op1); 2229 ra_left(as, dest, ir->op1);
1967} 2230}
1968 2231
1969static void asm_min_max(ASMState *as, IRIns *ir, int cc) 2232static void asm_neg(ASMState *as, IRIns *ir)
2233{
2234 if (irt_isnum(ir->t))
2235 asm_fparith(as, ir, XO_XORPS);
2236 else
2237 asm_neg_not(as, ir, XOg_NEG);
2238}
2239
2240#define asm_abs(as, ir) asm_fparith(as, ir, XO_ANDPS)
2241
2242static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
1970{ 2243{
1971 Reg right, dest = ra_dest(as, ir, RSET_GPR); 2244 Reg right, dest = ra_dest(as, ir, RSET_GPR);
1972 IRRef lref = ir->op1, rref = ir->op2; 2245 IRRef lref = ir->op1, rref = ir->op2;
@@ -1977,7 +2250,30 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc)
1977 ra_left(as, dest, lref); 2250 ra_left(as, dest, lref);
1978} 2251}
1979 2252
1980static void asm_bitswap(ASMState *as, IRIns *ir) 2253static void asm_min(ASMState *as, IRIns *ir)
2254{
2255 if (irt_isnum(ir->t))
2256 asm_fparith(as, ir, XO_MINSD);
2257 else
2258 asm_intmin_max(as, ir, CC_G);
2259}
2260
2261static void asm_max(ASMState *as, IRIns *ir)
2262{
2263 if (irt_isnum(ir->t))
2264 asm_fparith(as, ir, XO_MAXSD);
2265 else
2266 asm_intmin_max(as, ir, CC_L);
2267}
2268
2269/* Note: don't use LEA for overflow-checking arithmetic! */
2270#define asm_addov(as, ir) asm_intarith(as, ir, XOg_ADD)
2271#define asm_subov(as, ir) asm_intarith(as, ir, XOg_SUB)
2272#define asm_mulov(as, ir) asm_intarith(as, ir, XOg_X_IMUL)
2273
2274#define asm_bnot(as, ir) asm_neg_not(as, ir, XOg_NOT)
2275
2276static void asm_bswap(ASMState *as, IRIns *ir)
1981{ 2277{
1982 Reg dest = ra_dest(as, ir, RSET_GPR); 2278 Reg dest = ra_dest(as, ir, RSET_GPR);
1983 as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24), 2279 as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24),
@@ -1985,7 +2281,11 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
1985 ra_left(as, dest, ir->op1); 2281 ra_left(as, dest, ir->op1);
1986} 2282}
1987 2283
1988static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs) 2284#define asm_band(as, ir) asm_intarith(as, ir, XOg_AND)
2285#define asm_bor(as, ir) asm_intarith(as, ir, XOg_OR)
2286#define asm_bxor(as, ir) asm_intarith(as, ir, XOg_XOR)
2287
2288static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs, x86Op xv)
1989{ 2289{
1990 IRRef rref = ir->op2; 2290 IRRef rref = ir->op2;
1991 IRIns *irr = IR(rref); 2291 IRIns *irr = IR(rref);
@@ -1994,11 +2294,27 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
1994 int shift; 2294 int shift;
1995 dest = ra_dest(as, ir, RSET_GPR); 2295 dest = ra_dest(as, ir, RSET_GPR);
1996 shift = irr->i & (irt_is64(ir->t) ? 63 : 31); 2296 shift = irr->i & (irt_is64(ir->t) ? 63 : 31);
2297 if (!xv && shift && (as->flags & JIT_F_BMI2)) {
2298 Reg left = asm_fuseloadm(as, ir->op1, RSET_GPR, irt_is64(ir->t));
2299 if (left != dest) { /* BMI2 rotate right by constant. */
2300 emit_i8(as, xs == XOg_ROL ? -shift : shift);
2301 emit_mrm(as, VEX_64IR(ir, XV_RORX), dest, left);
2302 return;
2303 }
2304 }
1997 switch (shift) { 2305 switch (shift) {
1998 case 0: break; 2306 case 0: break;
1999 case 1: emit_rr(as, XO_SHIFT1, REX_64IR(ir, xs), dest); break; 2307 case 1: emit_rr(as, XO_SHIFT1, REX_64IR(ir, xs), dest); break;
2000 default: emit_shifti(as, REX_64IR(ir, xs), dest, shift); break; 2308 default: emit_shifti(as, REX_64IR(ir, xs), dest, shift); break;
2001 } 2309 }
2310 } else if ((as->flags & JIT_F_BMI2) && xv) { /* BMI2 variable shifts. */
2311 Reg left, right;
2312 dest = ra_dest(as, ir, RSET_GPR);
2313 right = ra_alloc1(as, rref, RSET_GPR);
2314 left = asm_fuseloadm(as, ir->op1, rset_exclude(RSET_GPR, right),
2315 irt_is64(ir->t));
2316 emit_mrm(as, VEX_64IR(ir, xv) ^ (right << 19), dest, left);
2317 return;
2002 } else { /* Variable shifts implicitly use register cl (i.e. ecx). */ 2318 } else { /* Variable shifts implicitly use register cl (i.e. ecx). */
2003 Reg right; 2319 Reg right;
2004 dest = ra_dest(as, ir, rset_exclude(RSET_GPR, RID_ECX)); 2320 dest = ra_dest(as, ir, rset_exclude(RSET_GPR, RID_ECX));
@@ -2024,6 +2340,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
2024 */ 2340 */
2025} 2341}
2026 2342
2343#define asm_bshl(as, ir) asm_bitshift(as, ir, XOg_SHL, XV_SHLX)
2344#define asm_bshr(as, ir) asm_bitshift(as, ir, XOg_SHR, XV_SHRX)
2345#define asm_bsar(as, ir) asm_bitshift(as, ir, XOg_SAR, XV_SARX)
2346#define asm_brol(as, ir) asm_bitshift(as, ir, XOg_ROL, 0)
2347#define asm_bror(as, ir) asm_bitshift(as, ir, XOg_ROR, 0)
2348
2027/* -- Comparisons --------------------------------------------------------- */ 2349/* -- Comparisons --------------------------------------------------------- */
2028 2350
2029/* Virtual flags for unordered FP comparisons. */ 2351/* Virtual flags for unordered FP comparisons. */
@@ -2050,8 +2372,9 @@ static const uint16_t asm_compmap[IR_ABC+1] = {
2050}; 2372};
2051 2373
2052/* FP and integer comparisons. */ 2374/* FP and integer comparisons. */
2053static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc) 2375static void asm_comp(ASMState *as, IRIns *ir)
2054{ 2376{
2377 uint32_t cc = asm_compmap[ir->o];
2055 if (irt_isnum(ir->t)) { 2378 if (irt_isnum(ir->t)) {
2056 IRRef lref = ir->op1; 2379 IRRef lref = ir->op1;
2057 IRRef rref = ir->op2; 2380 IRRef rref = ir->op2;
@@ -2072,7 +2395,6 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc)
2072 cc ^= (VCC_PS|(5<<4)); /* A <-> B, AE <-> BE, PS <-> none */ 2395 cc ^= (VCC_PS|(5<<4)); /* A <-> B, AE <-> BE, PS <-> none */
2073 } 2396 }
2074 left = ra_alloc1(as, lref, RSET_FPR); 2397 left = ra_alloc1(as, lref, RSET_FPR);
2075 right = asm_fuseload(as, rref, rset_exclude(RSET_FPR, left));
2076 l_around = emit_label(as); 2398 l_around = emit_label(as);
2077 asm_guardcc(as, cc >> 4); 2399 asm_guardcc(as, cc >> 4);
2078 if (cc & VCC_P) { /* Extra CC_P branch required? */ 2400 if (cc & VCC_P) { /* Extra CC_P branch required? */
@@ -2089,6 +2411,7 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc)
2089 emit_jcc(as, CC_P, as->mcp); 2411 emit_jcc(as, CC_P, as->mcp);
2090 } 2412 }
2091 } 2413 }
2414 right = asm_fuseload(as, rref, rset_exclude(RSET_FPR, left));
2092 emit_mrm(as, XO_UCOMISD, left, right); 2415 emit_mrm(as, XO_UCOMISD, left, right);
2093 } else { 2416 } else {
2094 IRRef lref = ir->op1, rref = ir->op2; 2417 IRRef lref = ir->op1, rref = ir->op2;
@@ -2206,6 +2529,8 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc)
2206 } 2529 }
2207} 2530}
2208 2531
2532#define asm_equal(as, ir) asm_comp(as, ir)
2533
2209#if LJ_32 && LJ_HASFFI 2534#if LJ_32 && LJ_HASFFI
2210/* 64 bit integer comparisons in 32 bit mode. */ 2535/* 64 bit integer comparisons in 32 bit mode. */
2211static void asm_comp_int64(ASMState *as, IRIns *ir) 2536static void asm_comp_int64(ASMState *as, IRIns *ir)
@@ -2288,13 +2613,9 @@ static void asm_hiop(ASMState *as, IRIns *ir)
2288 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ 2613 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
2289 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; 2614 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
2290 if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ 2615 if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */
2291 if (usehi || uselo) {
2292 if (irt_isfp(ir->t))
2293 asm_conv_fp_int64(as, ir);
2294 else
2295 asm_conv_int64_fp(as, ir);
2296 }
2297 as->curins--; /* Always skip the CONV. */ 2616 as->curins--; /* Always skip the CONV. */
2617 if (usehi || uselo)
2618 asm_conv64(as, ir);
2298 return; 2619 return;
2299 } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ 2620 } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */
2300 asm_comp_int64(as, ir); 2621 asm_comp_int64(as, ir);
@@ -2343,6 +2664,16 @@ static void asm_hiop(ASMState *as, IRIns *ir)
2343#endif 2664#endif
2344} 2665}
2345 2666
2667/* -- Profiling ----------------------------------------------------------- */
2668
2669static void asm_prof(ASMState *as, IRIns *ir)
2670{
2671 UNUSED(ir);
2672 asm_guardcc(as, CC_NE);
2673 emit_i8(as, HOOK_PROFILE);
2674 emit_rma(as, XO_GROUP3b, XOg_TEST, &J2G(as->J)->hookmask);
2675}
2676
2346/* -- Stack handling ------------------------------------------------------ */ 2677/* -- Stack handling ------------------------------------------------------ */
2347 2678
2348/* Check Lua stack size for overflow. Use exit handler as fallback. */ 2679/* Check Lua stack size for overflow. Use exit handler as fallback. */
@@ -2357,14 +2688,19 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
2357 emit_rmro(as, XO_MOV, r|REX_64, RID_ESP, 0); 2688 emit_rmro(as, XO_MOV, r|REX_64, RID_ESP, 0);
2358 else 2689 else
2359 ra_modified(as, r); 2690 ra_modified(as, r);
2360 emit_gri(as, XG_ARITHi(XOg_CMP), r, (int32_t)(8*topslot)); 2691 emit_gri(as, XG_ARITHi(XOg_CMP), r|REX_GC64, (int32_t)(8*topslot));
2361 if (ra_hasreg(pbase) && pbase != r) 2692 if (ra_hasreg(pbase) && pbase != r)
2362 emit_rr(as, XO_ARITH(XOg_SUB), r, pbase); 2693 emit_rr(as, XO_ARITH(XOg_SUB), r|REX_GC64, pbase);
2363 else 2694 else
2695#if LJ_GC64
2696 emit_rmro(as, XO_ARITH(XOg_SUB), r|REX_64, RID_DISPATCH,
2697 (int32_t)dispofs(as, &J2G(as->J)->jit_base));
2698#else
2364 emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE, 2699 emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE,
2365 ptr2addr(&J2G(as->J)->jit_base)); 2700 ptr2addr(&J2G(as->J)->jit_base));
2366 emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack)); 2701#endif
2367 emit_getgl(as, r, jit_L); 2702 emit_rmro(as, XO_MOV, r|REX_GC64, r, offsetof(lua_State, maxstack));
2703 emit_getgl(as, r, cur_L);
2368 if (allow == RSET_EMPTY) /* Spill temp. register. */ 2704 if (allow == RSET_EMPTY) /* Spill temp. register. */
2369 emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0); 2705 emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0);
2370} 2706}
@@ -2373,13 +2709,15 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
2373static void asm_stack_restore(ASMState *as, SnapShot *snap) 2709static void asm_stack_restore(ASMState *as, SnapShot *snap)
2374{ 2710{
2375 SnapEntry *map = &as->T->snapmap[snap->mapofs]; 2711 SnapEntry *map = &as->T->snapmap[snap->mapofs];
2376 SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1]; 2712#if !LJ_FR2 || defined(LUA_USE_ASSERT)
2713 SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2];
2714#endif
2377 MSize n, nent = snap->nent; 2715 MSize n, nent = snap->nent;
2378 /* Store the value of all modified slots to the Lua stack. */ 2716 /* Store the value of all modified slots to the Lua stack. */
2379 for (n = 0; n < nent; n++) { 2717 for (n = 0; n < nent; n++) {
2380 SnapEntry sn = map[n]; 2718 SnapEntry sn = map[n];
2381 BCReg s = snap_slot(sn); 2719 BCReg s = snap_slot(sn);
2382 int32_t ofs = 8*((int32_t)s-1); 2720 int32_t ofs = 8*((int32_t)s-1-LJ_FR2);
2383 IRRef ref = snap_ref(sn); 2721 IRRef ref = snap_ref(sn);
2384 IRIns *ir = IR(ref); 2722 IRIns *ir = IR(ref);
2385 if ((sn & SNAP_NORESTORE)) 2723 if ((sn & SNAP_NORESTORE))
@@ -2392,16 +2730,44 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
2392 (LJ_DUALNUM && irt_isinteger(ir->t))); 2730 (LJ_DUALNUM && irt_isinteger(ir->t)));
2393 if (!irref_isk(ref)) { 2731 if (!irref_isk(ref)) {
2394 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); 2732 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
2733#if LJ_GC64
2734 if (irt_is64(ir->t)) {
2735 /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */
2736 emit_u32(as, irt_toitype(ir->t) << 15);
2737 emit_rmro(as, XO_ARITHi, XOg_OR, RID_BASE, ofs+4);
2738 } else if (LJ_DUALNUM && irt_isinteger(ir->t)) {
2739 emit_movmroi(as, RID_BASE, ofs+4, LJ_TISNUM << 15);
2740 } else {
2741 emit_movmroi(as, RID_BASE, ofs+4, (irt_toitype(ir->t)<<15)|0x7fff);
2742 }
2743#endif
2395 emit_movtomro(as, REX_64IR(ir, src), RID_BASE, ofs); 2744 emit_movtomro(as, REX_64IR(ir, src), RID_BASE, ofs);
2745#if LJ_GC64
2746 } else {
2747 TValue k;
2748 lj_ir_kvalue(as->J->L, &k, ir);
2749 if (tvisnil(&k)) {
2750 emit_i32(as, -1);
2751 emit_rmro(as, XO_MOVmi, REX_64, RID_BASE, ofs);
2752 } else {
2753 emit_movmroi(as, RID_BASE, ofs+4, k.u32.hi);
2754 emit_movmroi(as, RID_BASE, ofs, k.u32.lo);
2755 }
2756#else
2396 } else if (!irt_ispri(ir->t)) { 2757 } else if (!irt_ispri(ir->t)) {
2397 emit_movmroi(as, RID_BASE, ofs, ir->i); 2758 emit_movmroi(as, RID_BASE, ofs, ir->i);
2759#endif
2398 } 2760 }
2399 if ((sn & (SNAP_CONT|SNAP_FRAME))) { 2761 if ((sn & (SNAP_CONT|SNAP_FRAME))) {
2762#if !LJ_FR2
2400 if (s != 0) /* Do not overwrite link to previous frame. */ 2763 if (s != 0) /* Do not overwrite link to previous frame. */
2401 emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--)); 2764 emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--));
2765#endif
2766#if !LJ_GC64
2402 } else { 2767 } else {
2403 if (!(LJ_64 && irt_islightud(ir->t))) 2768 if (!(LJ_64 && irt_islightud(ir->t)))
2404 emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t)); 2769 emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t));
2770#endif
2405 } 2771 }
2406 } 2772 }
2407 checkmclim(as); 2773 checkmclim(as);
@@ -2427,11 +2793,15 @@ static void asm_gc_check(ASMState *as)
2427 args[1] = ASMREF_TMP2; /* MSize steps */ 2793 args[1] = ASMREF_TMP2; /* MSize steps */
2428 asm_gencall(as, ci, args); 2794 asm_gencall(as, ci, args);
2429 tmp = ra_releasetmp(as, ASMREF_TMP1); 2795 tmp = ra_releasetmp(as, ASMREF_TMP1);
2796#if LJ_GC64
2797 emit_rmro(as, XO_LEA, tmp|REX_64, RID_DISPATCH, GG_DISP2G);
2798#else
2430 emit_loada(as, tmp, J2G(as->J)); 2799 emit_loada(as, tmp, J2G(as->J));
2800#endif
2431 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP2), as->gcsteps); 2801 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP2), as->gcsteps);
2432 /* Jump around GC step if GC total < GC threshold. */ 2802 /* Jump around GC step if GC total < GC threshold. */
2433 emit_sjcc(as, CC_B, l_end); 2803 emit_sjcc(as, CC_B, l_end);
2434 emit_opgl(as, XO_ARITH(XOg_CMP), tmp, gc.threshold); 2804 emit_opgl(as, XO_ARITH(XOg_CMP), tmp|REX_GC64, gc.threshold);
2435 emit_getgl(as, tmp, gc.total); 2805 emit_getgl(as, tmp, gc.total);
2436 as->gcsteps = 0; 2806 as->gcsteps = 0;
2437 checkmclim(as); 2807 checkmclim(as);
@@ -2496,7 +2866,7 @@ static void asm_head_root_base(ASMState *as)
2496 if (rset_test(as->modset, r) || irt_ismarked(ir->t)) 2866 if (rset_test(as->modset, r) || irt_ismarked(ir->t))
2497 ir->r = RID_INIT; /* No inheritance for modified BASE register. */ 2867 ir->r = RID_INIT; /* No inheritance for modified BASE register. */
2498 if (r != RID_BASE) 2868 if (r != RID_BASE)
2499 emit_rr(as, XO_MOV, r, RID_BASE); 2869 emit_rr(as, XO_MOV, r|REX_GC64, RID_BASE);
2500 } 2870 }
2501} 2871}
2502 2872
@@ -2512,8 +2882,9 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
2512 if (irp->r == r) { 2882 if (irp->r == r) {
2513 rset_clear(allow, r); /* Mark same BASE register as coalesced. */ 2883 rset_clear(allow, r); /* Mark same BASE register as coalesced. */
2514 } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { 2884 } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
2885 /* Move from coalesced parent reg. */
2515 rset_clear(allow, irp->r); 2886 rset_clear(allow, irp->r);
2516 emit_rr(as, XO_MOV, r, irp->r); /* Move from coalesced parent reg. */ 2887 emit_rr(as, XO_MOV, r|REX_GC64, irp->r);
2517 } else { 2888 } else {
2518 emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ 2889 emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */
2519 } 2890 }
@@ -2592,163 +2963,6 @@ static void asm_tail_prep(ASMState *as)
2592 } 2963 }
2593} 2964}
2594 2965
2595/* -- Instruction dispatch ------------------------------------------------ */
2596
2597/* Assemble a single instruction. */
2598static void asm_ir(ASMState *as, IRIns *ir)
2599{
2600 switch ((IROp)ir->o) {
2601 /* Miscellaneous ops. */
2602 case IR_LOOP: asm_loop(as); break;
2603 case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
2604 case IR_USE:
2605 ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
2606 case IR_PHI: asm_phi(as, ir); break;
2607 case IR_HIOP: asm_hiop(as, ir); break;
2608 case IR_GCSTEP: asm_gcstep(as, ir); break;
2609
2610 /* Guarded assertions. */
2611 case IR_LT: case IR_GE: case IR_LE: case IR_GT:
2612 case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
2613 case IR_EQ: case IR_NE: case IR_ABC:
2614 asm_comp(as, ir, asm_compmap[ir->o]);
2615 break;
2616
2617 case IR_RETF: asm_retf(as, ir); break;
2618
2619 /* Bit ops. */
2620 case IR_BNOT: asm_neg_not(as, ir, XOg_NOT); break;
2621 case IR_BSWAP: asm_bitswap(as, ir); break;
2622
2623 case IR_BAND: asm_intarith(as, ir, XOg_AND); break;
2624 case IR_BOR: asm_intarith(as, ir, XOg_OR); break;
2625 case IR_BXOR: asm_intarith(as, ir, XOg_XOR); break;
2626
2627 case IR_BSHL: asm_bitshift(as, ir, XOg_SHL); break;
2628 case IR_BSHR: asm_bitshift(as, ir, XOg_SHR); break;
2629 case IR_BSAR: asm_bitshift(as, ir, XOg_SAR); break;
2630 case IR_BROL: asm_bitshift(as, ir, XOg_ROL); break;
2631 case IR_BROR: asm_bitshift(as, ir, XOg_ROR); break;
2632
2633 /* Arithmetic ops. */
2634 case IR_ADD: asm_add(as, ir); break;
2635 case IR_SUB:
2636 if (irt_isnum(ir->t))
2637 asm_fparith(as, ir, XO_SUBSD);
2638 else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */
2639 asm_intarith(as, ir, XOg_SUB);
2640 break;
2641 case IR_MUL:
2642 if (irt_isnum(ir->t))
2643 asm_fparith(as, ir, XO_MULSD);
2644 else
2645 asm_intarith(as, ir, XOg_X_IMUL);
2646 break;
2647 case IR_DIV:
2648#if LJ_64 && LJ_HASFFI
2649 if (!irt_isnum(ir->t))
2650 asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
2651 IRCALL_lj_carith_divu64);
2652 else
2653#endif
2654 asm_fparith(as, ir, XO_DIVSD);
2655 break;
2656 case IR_MOD:
2657#if LJ_64 && LJ_HASFFI
2658 if (!irt_isint(ir->t))
2659 asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
2660 IRCALL_lj_carith_modu64);
2661 else
2662#endif
2663 asm_intmod(as, ir);
2664 break;
2665
2666 case IR_NEG:
2667 if (irt_isnum(ir->t))
2668 asm_fparith(as, ir, XO_XORPS);
2669 else
2670 asm_neg_not(as, ir, XOg_NEG);
2671 break;
2672 case IR_ABS: asm_fparith(as, ir, XO_ANDPS); break;
2673
2674 case IR_MIN:
2675 if (irt_isnum(ir->t))
2676 asm_fparith(as, ir, XO_MINSD);
2677 else
2678 asm_min_max(as, ir, CC_G);
2679 break;
2680 case IR_MAX:
2681 if (irt_isnum(ir->t))
2682 asm_fparith(as, ir, XO_MAXSD);
2683 else
2684 asm_min_max(as, ir, CC_L);
2685 break;
2686
2687 case IR_FPMATH: case IR_ATAN2: case IR_LDEXP:
2688 asm_fpmath(as, ir);
2689 break;
2690 case IR_POW:
2691#if LJ_64 && LJ_HASFFI
2692 if (!irt_isnum(ir->t))
2693 asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
2694 IRCALL_lj_carith_powu64);
2695 else
2696#endif
2697 asm_fppowi(as, ir);
2698 break;
2699
2700 /* Overflow-checking arithmetic ops. Note: don't use LEA here! */
2701 case IR_ADDOV: asm_intarith(as, ir, XOg_ADD); break;
2702 case IR_SUBOV: asm_intarith(as, ir, XOg_SUB); break;
2703 case IR_MULOV: asm_intarith(as, ir, XOg_X_IMUL); break;
2704
2705 /* Memory references. */
2706 case IR_AREF: asm_aref(as, ir); break;
2707 case IR_HREF: asm_href(as, ir); break;
2708 case IR_HREFK: asm_hrefk(as, ir); break;
2709 case IR_NEWREF: asm_newref(as, ir); break;
2710 case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
2711 case IR_FREF: asm_fref(as, ir); break;
2712 case IR_STRREF: asm_strref(as, ir); break;
2713
2714 /* Loads and stores. */
2715 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
2716 asm_ahuvload(as, ir);
2717 break;
2718 case IR_FLOAD: case IR_XLOAD: asm_fxload(as, ir); break;
2719 case IR_SLOAD: asm_sload(as, ir); break;
2720
2721 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
2722 case IR_FSTORE: case IR_XSTORE: asm_fxstore(as, ir); break;
2723
2724 /* Allocations. */
2725 case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
2726 case IR_TNEW: asm_tnew(as, ir); break;
2727 case IR_TDUP: asm_tdup(as, ir); break;
2728 case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
2729
2730 /* Write barriers. */
2731 case IR_TBAR: asm_tbar(as, ir); break;
2732 case IR_OBAR: asm_obar(as, ir); break;
2733
2734 /* Type conversions. */
2735 case IR_TOBIT: asm_tobit(as, ir); break;
2736 case IR_CONV: asm_conv(as, ir); break;
2737 case IR_TOSTR: asm_tostr(as, ir); break;
2738 case IR_STRTO: asm_strto(as, ir); break;
2739
2740 /* Calls. */
2741 case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
2742 case IR_CALLXS: asm_callx(as, ir); break;
2743 case IR_CARG: break;
2744
2745 default:
2746 setintV(&as->J->errinfo, ir->o);
2747 lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
2748 break;
2749 }
2750}
2751
2752/* -- Trace setup --------------------------------------------------------- */ 2966/* -- Trace setup --------------------------------------------------------- */
2753 2967
2754/* Ensure there are enough stack slots for call arguments. */ 2968/* Ensure there are enough stack slots for call arguments. */
@@ -2771,6 +2985,7 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
2771static void asm_setup_target(ASMState *as) 2985static void asm_setup_target(ASMState *as)
2772{ 2986{
2773 asm_exitstub_setup(as, as->T->nsnap); 2987 asm_exitstub_setup(as, as->T->nsnap);
2988 as->mrm.base = 0;
2774} 2989}
2775 2990
2776/* -- Trace patching ------------------------------------------------------ */ 2991/* -- Trace patching ------------------------------------------------------ */
@@ -2883,13 +3098,19 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
2883 MSize len = T->szmcode; 3098 MSize len = T->szmcode;
2884 MCode *px = exitstub_addr(J, exitno) - 6; 3099 MCode *px = exitstub_addr(J, exitno) - 6;
2885 MCode *pe = p+len-6; 3100 MCode *pe = p+len-6;
2886 uint32_t stateaddr = u32ptr(&J2G(J)->vmstate); 3101#if LJ_GC64
3102 uint32_t statei = (uint32_t)(GG_OFS(g.vmstate) - GG_OFS(dispatch));
3103#else
3104 uint32_t statei = u32ptr(&J2G(J)->vmstate);
3105#endif
2887 if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px) 3106 if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px)
2888 *(int32_t *)(p+len-4) = jmprel(p+len, target); 3107 *(int32_t *)(p+len-4) = jmprel(p+len, target);
2889 /* Do not patch parent exit for a stack check. Skip beyond vmstate update. */ 3108 /* Do not patch parent exit for a stack check. Skip beyond vmstate update. */
2890 for (; p < pe; p += asm_x86_inslen(p)) 3109 for (; p < pe; p += asm_x86_inslen(p)) {
2891 if (*(uint32_t *)(p+(LJ_64 ? 3 : 2)) == stateaddr && p[0] == XI_MOVmi) 3110 intptr_t ofs = LJ_GC64 ? (p[0] & 0xf0) == 0x40 : LJ_64;
3111 if (*(uint32_t *)(p+2+ofs) == statei && p[ofs+LJ_GC64-LJ_64] == XI_MOVmi)
2892 break; 3112 break;
3113 }
2893 lua_assert(p < pe); 3114 lua_assert(p < pe);
2894 for (; p < pe; p += asm_x86_inslen(p)) 3115 for (; p < pe; p += asm_x86_inslen(p))
2895 if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px) 3116 if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px)
diff --git a/src/lj_bc.h b/src/lj_bc.h
index 8fd7a2ed..44c78f83 100644
--- a/src/lj_bc.h
+++ b/src/lj_bc.h
@@ -89,6 +89,8 @@
89 _(ISFC, dst, ___, var, ___) \ 89 _(ISFC, dst, ___, var, ___) \
90 _(IST, ___, ___, var, ___) \ 90 _(IST, ___, ___, var, ___) \
91 _(ISF, ___, ___, var, ___) \ 91 _(ISF, ___, ___, var, ___) \
92 _(ISTYPE, var, ___, lit, ___) \
93 _(ISNUM, var, ___, lit, ___) \
92 \ 94 \
93 /* Unary ops. */ \ 95 /* Unary ops. */ \
94 _(MOV, dst, ___, var, ___) \ 96 _(MOV, dst, ___, var, ___) \
@@ -143,10 +145,12 @@
143 _(TGETV, dst, var, var, index) \ 145 _(TGETV, dst, var, var, index) \
144 _(TGETS, dst, var, str, index) \ 146 _(TGETS, dst, var, str, index) \
145 _(TGETB, dst, var, lit, index) \ 147 _(TGETB, dst, var, lit, index) \
148 _(TGETR, dst, var, var, index) \
146 _(TSETV, var, var, var, newindex) \ 149 _(TSETV, var, var, var, newindex) \
147 _(TSETS, var, var, str, newindex) \ 150 _(TSETS, var, var, str, newindex) \
148 _(TSETB, var, var, lit, newindex) \ 151 _(TSETB, var, var, lit, newindex) \
149 _(TSETM, base, ___, num, newindex) \ 152 _(TSETM, base, ___, num, newindex) \
153 _(TSETR, var, var, var, newindex) \
150 \ 154 \
151 /* Calls and vararg handling. T = tail call. */ \ 155 /* Calls and vararg handling. T = tail call. */ \
152 _(CALLM, base, lit, lit, call) \ 156 _(CALLM, base, lit, lit, call) \
diff --git a/src/lj_bcdump.h b/src/lj_bcdump.h
index ad564619..f458d41f 100644
--- a/src/lj_bcdump.h
+++ b/src/lj_bcdump.h
@@ -36,14 +36,15 @@
36/* If you perform *any* kind of private modifications to the bytecode itself 36/* If you perform *any* kind of private modifications to the bytecode itself
37** or to the dump format, you *must* set BCDUMP_VERSION to 0x80 or higher. 37** or to the dump format, you *must* set BCDUMP_VERSION to 0x80 or higher.
38*/ 38*/
39#define BCDUMP_VERSION 1 39#define BCDUMP_VERSION 2
40 40
41/* Compatibility flags. */ 41/* Compatibility flags. */
42#define BCDUMP_F_BE 0x01 42#define BCDUMP_F_BE 0x01
43#define BCDUMP_F_STRIP 0x02 43#define BCDUMP_F_STRIP 0x02
44#define BCDUMP_F_FFI 0x04 44#define BCDUMP_F_FFI 0x04
45#define BCDUMP_F_FR2 0x08
45 46
46#define BCDUMP_F_KNOWN (BCDUMP_F_FFI*2-1) 47#define BCDUMP_F_KNOWN (BCDUMP_F_FR2*2-1)
47 48
48/* Type codes for the GC constants of a prototype. Plus length for strings. */ 49/* Type codes for the GC constants of a prototype. Plus length for strings. */
49enum { 50enum {
@@ -61,6 +62,7 @@ enum {
61 62
62LJ_FUNC int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, 63LJ_FUNC int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer,
63 void *data, int strip); 64 void *data, int strip);
65LJ_FUNC GCproto *lj_bcread_proto(LexState *ls);
64LJ_FUNC GCproto *lj_bcread(LexState *ls); 66LJ_FUNC GCproto *lj_bcread(LexState *ls);
65 67
66#endif 68#endif
diff --git a/src/lj_bcread.c b/src/lj_bcread.c
index b88794eb..1585272f 100644
--- a/src/lj_bcread.c
+++ b/src/lj_bcread.c
@@ -9,6 +9,7 @@
9#include "lj_obj.h" 9#include "lj_obj.h"
10#include "lj_gc.h" 10#include "lj_gc.h"
11#include "lj_err.h" 11#include "lj_err.h"
12#include "lj_buf.h"
12#include "lj_str.h" 13#include "lj_str.h"
13#include "lj_tab.h" 14#include "lj_tab.h"
14#include "lj_bc.h" 15#include "lj_bc.h"
@@ -20,6 +21,7 @@
20#include "lj_lex.h" 21#include "lj_lex.h"
21#include "lj_bcdump.h" 22#include "lj_bcdump.h"
22#include "lj_state.h" 23#include "lj_state.h"
24#include "lj_strfmt.h"
23 25
24/* Reuse some lexer fields for our own purposes. */ 26/* Reuse some lexer fields for our own purposes. */
25#define bcread_flags(ls) ls->level 27#define bcread_flags(ls) ls->level
@@ -38,85 +40,74 @@ static LJ_NOINLINE void bcread_error(LexState *ls, ErrMsg em)
38 const char *name = ls->chunkarg; 40 const char *name = ls->chunkarg;
39 if (*name == BCDUMP_HEAD1) name = "(binary)"; 41 if (*name == BCDUMP_HEAD1) name = "(binary)";
40 else if (*name == '@' || *name == '=') name++; 42 else if (*name == '@' || *name == '=') name++;
41 lj_str_pushf(L, "%s: %s", name, err2msg(em)); 43 lj_strfmt_pushf(L, "%s: %s", name, err2msg(em));
42 lj_err_throw(L, LUA_ERRSYNTAX); 44 lj_err_throw(L, LUA_ERRSYNTAX);
43} 45}
44 46
45/* Resize input buffer. */ 47/* Refill buffer. */
46static void bcread_resize(LexState *ls, MSize len)
47{
48 if (ls->sb.sz < len) {
49 MSize sz = ls->sb.sz * 2;
50 while (len > sz) sz = sz * 2;
51 lj_str_resizebuf(ls->L, &ls->sb, sz);
52 /* Caveat: this may change ls->sb.buf which may affect ls->p. */
53 }
54}
55
56/* Refill buffer if needed. */
57static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need) 48static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need)
58{ 49{
59 lua_assert(len != 0); 50 lua_assert(len != 0);
60 if (len > LJ_MAX_MEM || ls->current < 0) 51 if (len > LJ_MAX_BUF || ls->c < 0)
61 bcread_error(ls, LJ_ERR_BCBAD); 52 bcread_error(ls, LJ_ERR_BCBAD);
62 do { 53 do {
63 const char *buf; 54 const char *buf;
64 size_t size; 55 size_t sz;
65 if (ls->n) { /* Copy remainder to buffer. */ 56 char *p = sbufB(&ls->sb);
66 if (ls->sb.n) { /* Move down in buffer. */ 57 MSize n = (MSize)(ls->pe - ls->p);
67 lua_assert(ls->p + ls->n == ls->sb.buf + ls->sb.n); 58 if (n) { /* Copy remainder to buffer. */
68 if (ls->n != ls->sb.n) 59 if (sbuflen(&ls->sb)) { /* Move down in buffer. */
69 memmove(ls->sb.buf, ls->p, ls->n); 60 lua_assert(ls->pe == sbufP(&ls->sb));
61 if (ls->p != p) memmove(p, ls->p, n);
70 } else { /* Copy from buffer provided by reader. */ 62 } else { /* Copy from buffer provided by reader. */
71 bcread_resize(ls, len); 63 p = lj_buf_need(&ls->sb, len);
72 memcpy(ls->sb.buf, ls->p, ls->n); 64 memcpy(p, ls->p, n);
73 } 65 }
74 ls->p = ls->sb.buf; 66 ls->p = p;
67 ls->pe = p + n;
75 } 68 }
76 ls->sb.n = ls->n; 69 setsbufP(&ls->sb, p + n);
77 buf = ls->rfunc(ls->L, ls->rdata, &size); /* Get more data from reader. */ 70 buf = ls->rfunc(ls->L, ls->rdata, &sz); /* Get more data from reader. */
78 if (buf == NULL || size == 0) { /* EOF? */ 71 if (buf == NULL || sz == 0) { /* EOF? */
79 if (need) bcread_error(ls, LJ_ERR_BCBAD); 72 if (need) bcread_error(ls, LJ_ERR_BCBAD);
80 ls->current = -1; /* Only bad if we get called again. */ 73 ls->c = -1; /* Only bad if we get called again. */
81 break; 74 break;
82 } 75 }
83 if (size >= LJ_MAX_MEM - ls->sb.n) lj_err_mem(ls->L); 76 if (sz >= LJ_MAX_BUF - n) lj_err_mem(ls->L);
84 if (ls->sb.n) { /* Append to buffer. */ 77 if (n) { /* Append to buffer. */
85 MSize n = ls->sb.n + (MSize)size; 78 n += (MSize)sz;
86 bcread_resize(ls, n < len ? len : n); 79 p = lj_buf_need(&ls->sb, n < len ? len : n);
87 memcpy(ls->sb.buf + ls->sb.n, buf, size); 80 memcpy(sbufP(&ls->sb), buf, sz);
88 ls->n = ls->sb.n = n; 81 setsbufP(&ls->sb, p + n);
89 ls->p = ls->sb.buf; 82 ls->p = p;
83 ls->pe = p + n;
90 } else { /* Return buffer provided by reader. */ 84 } else { /* Return buffer provided by reader. */
91 ls->n = (MSize)size;
92 ls->p = buf; 85 ls->p = buf;
86 ls->pe = buf + sz;
93 } 87 }
94 } while (ls->n < len); 88 } while ((MSize)(ls->pe - ls->p) < len);
95} 89}
96 90
97/* Need a certain number of bytes. */ 91/* Need a certain number of bytes. */
98static LJ_AINLINE void bcread_need(LexState *ls, MSize len) 92static LJ_AINLINE void bcread_need(LexState *ls, MSize len)
99{ 93{
100 if (LJ_UNLIKELY(ls->n < len)) 94 if (LJ_UNLIKELY((MSize)(ls->pe - ls->p) < len))
101 bcread_fill(ls, len, 1); 95 bcread_fill(ls, len, 1);
102} 96}
103 97
104/* Want to read up to a certain number of bytes, but may need less. */ 98/* Want to read up to a certain number of bytes, but may need less. */
105static LJ_AINLINE void bcread_want(LexState *ls, MSize len) 99static LJ_AINLINE void bcread_want(LexState *ls, MSize len)
106{ 100{
107 if (LJ_UNLIKELY(ls->n < len)) 101 if (LJ_UNLIKELY((MSize)(ls->pe - ls->p) < len))
108 bcread_fill(ls, len, 0); 102 bcread_fill(ls, len, 0);
109} 103}
110 104
111#define bcread_dec(ls) check_exp(ls->n > 0, ls->n--)
112#define bcread_consume(ls, len) check_exp(ls->n >= (len), ls->n -= (len))
113
114/* Return memory block from buffer. */ 105/* Return memory block from buffer. */
115static uint8_t *bcread_mem(LexState *ls, MSize len) 106static LJ_AINLINE uint8_t *bcread_mem(LexState *ls, MSize len)
116{ 107{
117 uint8_t *p = (uint8_t *)ls->p; 108 uint8_t *p = (uint8_t *)ls->p;
118 bcread_consume(ls, len); 109 ls->p += len;
119 ls->p = (char *)p + len; 110 lua_assert(ls->p <= ls->pe);
120 return p; 111 return p;
121} 112}
122 113
@@ -129,25 +120,15 @@ static void bcread_block(LexState *ls, void *q, MSize len)
129/* Read byte from buffer. */ 120/* Read byte from buffer. */
130static LJ_AINLINE uint32_t bcread_byte(LexState *ls) 121static LJ_AINLINE uint32_t bcread_byte(LexState *ls)
131{ 122{
132 bcread_dec(ls); 123 lua_assert(ls->p < ls->pe);
133 return (uint32_t)(uint8_t)*ls->p++; 124 return (uint32_t)(uint8_t)*ls->p++;
134} 125}
135 126
136/* Read ULEB128 value from buffer. */ 127/* Read ULEB128 value from buffer. */
137static uint32_t bcread_uleb128(LexState *ls) 128static LJ_AINLINE uint32_t bcread_uleb128(LexState *ls)
138{ 129{
139 const uint8_t *p = (const uint8_t *)ls->p; 130 uint32_t v = lj_buf_ruleb128(&ls->p);
140 uint32_t v = *p++; 131 lua_assert(ls->p <= ls->pe);
141 if (LJ_UNLIKELY(v >= 0x80)) {
142 int sh = 0;
143 v &= 0x7f;
144 do {
145 v |= ((*p & 0x7f) << (sh += 7));
146 bcread_dec(ls);
147 } while (*p++ >= 0x80);
148 }
149 bcread_dec(ls);
150 ls->p = (char *)p;
151 return v; 132 return v;
152} 133}
153 134
@@ -161,11 +142,10 @@ static uint32_t bcread_uleb128_33(LexState *ls)
161 v &= 0x3f; 142 v &= 0x3f;
162 do { 143 do {
163 v |= ((*p & 0x7f) << (sh += 7)); 144 v |= ((*p & 0x7f) << (sh += 7));
164 bcread_dec(ls);
165 } while (*p++ >= 0x80); 145 } while (*p++ >= 0x80);
166 } 146 }
167 bcread_dec(ls);
168 ls->p = (char *)p; 147 ls->p = (char *)p;
148 lua_assert(ls->p <= ls->pe);
169 return v; 149 return v;
170} 150}
171 151
@@ -213,7 +193,7 @@ static void bcread_ktabk(LexState *ls, TValue *o)
213 o->u32.hi = bcread_uleb128(ls); 193 o->u32.hi = bcread_uleb128(ls);
214 } else { 194 } else {
215 lua_assert(tp <= BCDUMP_KTAB_TRUE); 195 lua_assert(tp <= BCDUMP_KTAB_TRUE);
216 setitype(o, ~tp); 196 setpriV(o, ~tp);
217 } 197 }
218} 198}
219 199
@@ -327,25 +307,13 @@ static void bcread_uv(LexState *ls, GCproto *pt, MSize sizeuv)
327} 307}
328 308
329/* Read a prototype. */ 309/* Read a prototype. */
330static GCproto *bcread_proto(LexState *ls) 310GCproto *lj_bcread_proto(LexState *ls)
331{ 311{
332 GCproto *pt; 312 GCproto *pt;
333 MSize framesize, numparams, flags, sizeuv, sizekgc, sizekn, sizebc, sizept; 313 MSize framesize, numparams, flags, sizeuv, sizekgc, sizekn, sizebc, sizept;
334 MSize ofsk, ofsuv, ofsdbg; 314 MSize ofsk, ofsuv, ofsdbg;
335 MSize sizedbg = 0; 315 MSize sizedbg = 0;
336 BCLine firstline = 0, numline = 0; 316 BCLine firstline = 0, numline = 0;
337 MSize len, startn;
338
339 /* Read length. */
340 if (ls->n > 0 && ls->p[0] == 0) { /* Shortcut EOF. */
341 ls->n--; ls->p++;
342 return NULL;
343 }
344 bcread_want(ls, 5);
345 len = bcread_uleb128(ls);
346 if (!len) return NULL; /* EOF */
347 bcread_need(ls, len);
348 startn = ls->n;
349 317
350 /* Read prototype header. */ 318 /* Read prototype header. */
351 flags = bcread_byte(ls); 319 flags = bcread_byte(ls);
@@ -414,9 +382,6 @@ static GCproto *bcread_proto(LexState *ls)
414 setmref(pt->uvinfo, NULL); 382 setmref(pt->uvinfo, NULL);
415 setmref(pt->varinfo, NULL); 383 setmref(pt->varinfo, NULL);
416 } 384 }
417
418 if (len != startn - ls->n)
419 bcread_error(ls, LJ_ERR_BCBAD);
420 return pt; 385 return pt;
421} 386}
422 387
@@ -430,6 +395,7 @@ static int bcread_header(LexState *ls)
430 bcread_byte(ls) != BCDUMP_VERSION) return 0; 395 bcread_byte(ls) != BCDUMP_VERSION) return 0;
431 bcread_flags(ls) = flags = bcread_uleb128(ls); 396 bcread_flags(ls) = flags = bcread_uleb128(ls);
432 if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0; 397 if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0;
398 if ((flags & BCDUMP_F_FR2) != LJ_FR2*BCDUMP_F_FR2) return 0;
433 if ((flags & BCDUMP_F_FFI)) { 399 if ((flags & BCDUMP_F_FFI)) {
434#if LJ_HASFFI 400#if LJ_HASFFI
435 lua_State *L = ls->L; 401 lua_State *L = ls->L;
@@ -456,19 +422,33 @@ static int bcread_header(LexState *ls)
456GCproto *lj_bcread(LexState *ls) 422GCproto *lj_bcread(LexState *ls)
457{ 423{
458 lua_State *L = ls->L; 424 lua_State *L = ls->L;
459 lua_assert(ls->current == BCDUMP_HEAD1); 425 lua_assert(ls->c == BCDUMP_HEAD1);
460 bcread_savetop(L, ls, L->top); 426 bcread_savetop(L, ls, L->top);
461 lj_str_resetbuf(&ls->sb); 427 lj_buf_reset(&ls->sb);
462 /* Check for a valid bytecode dump header. */ 428 /* Check for a valid bytecode dump header. */
463 if (!bcread_header(ls)) 429 if (!bcread_header(ls))
464 bcread_error(ls, LJ_ERR_BCFMT); 430 bcread_error(ls, LJ_ERR_BCFMT);
465 for (;;) { /* Process all prototypes in the bytecode dump. */ 431 for (;;) { /* Process all prototypes in the bytecode dump. */
466 GCproto *pt = bcread_proto(ls); 432 GCproto *pt;
467 if (!pt) break; 433 MSize len;
434 const char *startp;
435 /* Read length. */
436 if (ls->p < ls->pe && ls->p[0] == 0) { /* Shortcut EOF. */
437 ls->p++;
438 break;
439 }
440 bcread_want(ls, 5);
441 len = bcread_uleb128(ls);
442 if (!len) break; /* EOF */
443 bcread_need(ls, len);
444 startp = ls->p;
445 pt = lj_bcread_proto(ls);
446 if (ls->p != startp + len)
447 bcread_error(ls, LJ_ERR_BCBAD);
468 setprotoV(L, L->top, pt); 448 setprotoV(L, L->top, pt);
469 incr_top(L); 449 incr_top(L);
470 } 450 }
471 if ((ls->n && !ls->endmark) || L->top-1 != bcread_oldtop(L, ls)) 451 if ((ls->pe != ls->p && !ls->endmark) || L->top-1 != bcread_oldtop(L, ls))
472 bcread_error(ls, LJ_ERR_BCBAD); 452 bcread_error(ls, LJ_ERR_BCBAD);
473 /* Pop off last prototype. */ 453 /* Pop off last prototype. */
474 L->top--; 454 L->top--;
diff --git a/src/lj_bcwrite.c b/src/lj_bcwrite.c
index f57fcfd6..dd38289e 100644
--- a/src/lj_bcwrite.c
+++ b/src/lj_bcwrite.c
@@ -8,7 +8,7 @@
8 8
9#include "lj_obj.h" 9#include "lj_obj.h"
10#include "lj_gc.h" 10#include "lj_gc.h"
11#include "lj_str.h" 11#include "lj_buf.h"
12#include "lj_bc.h" 12#include "lj_bc.h"
13#if LJ_HASFFI 13#if LJ_HASFFI
14#include "lj_ctype.h" 14#include "lj_ctype.h"
@@ -17,13 +17,13 @@
17#include "lj_dispatch.h" 17#include "lj_dispatch.h"
18#include "lj_jit.h" 18#include "lj_jit.h"
19#endif 19#endif
20#include "lj_strfmt.h"
20#include "lj_bcdump.h" 21#include "lj_bcdump.h"
21#include "lj_vm.h" 22#include "lj_vm.h"
22 23
23/* Context for bytecode writer. */ 24/* Context for bytecode writer. */
24typedef struct BCWriteCtx { 25typedef struct BCWriteCtx {
25 SBuf sb; /* Output buffer. */ 26 SBuf sb; /* Output buffer. */
26 lua_State *L; /* Lua state. */
27 GCproto *pt; /* Root prototype. */ 27 GCproto *pt; /* Root prototype. */
28 lua_Writer wfunc; /* Writer callback. */ 28 lua_Writer wfunc; /* Writer callback. */
29 void *wdata; /* Writer callback data. */ 29 void *wdata; /* Writer callback data. */
@@ -31,85 +31,44 @@ typedef struct BCWriteCtx {
31 int status; /* Status from writer callback. */ 31 int status; /* Status from writer callback. */
32} BCWriteCtx; 32} BCWriteCtx;
33 33
34/* -- Output buffer handling ---------------------------------------------- */
35
36/* Resize buffer if needed. */
37static LJ_NOINLINE void bcwrite_resize(BCWriteCtx *ctx, MSize len)
38{
39 MSize sz = ctx->sb.sz * 2;
40 while (ctx->sb.n + len > sz) sz = sz * 2;
41 lj_str_resizebuf(ctx->L, &ctx->sb, sz);
42}
43
44/* Need a certain amount of buffer space. */
45static LJ_AINLINE void bcwrite_need(BCWriteCtx *ctx, MSize len)
46{
47 if (LJ_UNLIKELY(ctx->sb.n + len > ctx->sb.sz))
48 bcwrite_resize(ctx, len);
49}
50
51/* Add memory block to buffer. */
52static void bcwrite_block(BCWriteCtx *ctx, const void *p, MSize len)
53{
54 uint8_t *q = (uint8_t *)(ctx->sb.buf + ctx->sb.n);
55 MSize i;
56 ctx->sb.n += len;
57 for (i = 0; i < len; i++) q[i] = ((uint8_t *)p)[i];
58}
59
60/* Add byte to buffer. */
61static LJ_AINLINE void bcwrite_byte(BCWriteCtx *ctx, uint8_t b)
62{
63 ctx->sb.buf[ctx->sb.n++] = b;
64}
65
66/* Add ULEB128 value to buffer. */
67static void bcwrite_uleb128(BCWriteCtx *ctx, uint32_t v)
68{
69 MSize n = ctx->sb.n;
70 uint8_t *p = (uint8_t *)ctx->sb.buf;
71 for (; v >= 0x80; v >>= 7)
72 p[n++] = (uint8_t)((v & 0x7f) | 0x80);
73 p[n++] = (uint8_t)v;
74 ctx->sb.n = n;
75}
76
77/* -- Bytecode writer ----------------------------------------------------- */ 34/* -- Bytecode writer ----------------------------------------------------- */
78 35
79/* Write a single constant key/value of a template table. */ 36/* Write a single constant key/value of a template table. */
80static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow) 37static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow)
81{ 38{
82 bcwrite_need(ctx, 1+10); 39 char *p = lj_buf_more(&ctx->sb, 1+10);
83 if (tvisstr(o)) { 40 if (tvisstr(o)) {
84 const GCstr *str = strV(o); 41 const GCstr *str = strV(o);
85 MSize len = str->len; 42 MSize len = str->len;
86 bcwrite_need(ctx, 5+len); 43 p = lj_buf_more(&ctx->sb, 5+len);
87 bcwrite_uleb128(ctx, BCDUMP_KTAB_STR+len); 44 p = lj_strfmt_wuleb128(p, BCDUMP_KTAB_STR+len);
88 bcwrite_block(ctx, strdata(str), len); 45 p = lj_buf_wmem(p, strdata(str), len);
89 } else if (tvisint(o)) { 46 } else if (tvisint(o)) {
90 bcwrite_byte(ctx, BCDUMP_KTAB_INT); 47 *p++ = BCDUMP_KTAB_INT;
91 bcwrite_uleb128(ctx, intV(o)); 48 p = lj_strfmt_wuleb128(p, intV(o));
92 } else if (tvisnum(o)) { 49 } else if (tvisnum(o)) {
93 if (!LJ_DUALNUM && narrow) { /* Narrow number constants to integers. */ 50 if (!LJ_DUALNUM && narrow) { /* Narrow number constants to integers. */
94 lua_Number num = numV(o); 51 lua_Number num = numV(o);
95 int32_t k = lj_num2int(num); 52 int32_t k = lj_num2int(num);
96 if (num == (lua_Number)k) { /* -0 is never a constant. */ 53 if (num == (lua_Number)k) { /* -0 is never a constant. */
97 bcwrite_byte(ctx, BCDUMP_KTAB_INT); 54 *p++ = BCDUMP_KTAB_INT;
98 bcwrite_uleb128(ctx, k); 55 p = lj_strfmt_wuleb128(p, k);
56 setsbufP(&ctx->sb, p);
99 return; 57 return;
100 } 58 }
101 } 59 }
102 bcwrite_byte(ctx, BCDUMP_KTAB_NUM); 60 *p++ = BCDUMP_KTAB_NUM;
103 bcwrite_uleb128(ctx, o->u32.lo); 61 p = lj_strfmt_wuleb128(p, o->u32.lo);
104 bcwrite_uleb128(ctx, o->u32.hi); 62 p = lj_strfmt_wuleb128(p, o->u32.hi);
105 } else { 63 } else {
106 lua_assert(tvispri(o)); 64 lua_assert(tvispri(o));
107 bcwrite_byte(ctx, BCDUMP_KTAB_NIL+~itype(o)); 65 *p++ = BCDUMP_KTAB_NIL+~itype(o);
108 } 66 }
67 setsbufP(&ctx->sb, p);
109} 68}
110 69
111/* Write a template table. */ 70/* Write a template table. */
112static void bcwrite_ktab(BCWriteCtx *ctx, const GCtab *t) 71static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t)
113{ 72{
114 MSize narray = 0, nhash = 0; 73 MSize narray = 0, nhash = 0;
115 if (t->asize > 0) { /* Determine max. length of array part. */ 74 if (t->asize > 0) { /* Determine max. length of array part. */
@@ -127,8 +86,9 @@ static void bcwrite_ktab(BCWriteCtx *ctx, const GCtab *t)
127 nhash += !tvisnil(&node[i].val); 86 nhash += !tvisnil(&node[i].val);
128 } 87 }
129 /* Write number of array slots and hash slots. */ 88 /* Write number of array slots and hash slots. */
130 bcwrite_uleb128(ctx, narray); 89 p = lj_strfmt_wuleb128(p, narray);
131 bcwrite_uleb128(ctx, nhash); 90 p = lj_strfmt_wuleb128(p, nhash);
91 setsbufP(&ctx->sb, p);
132 if (narray) { /* Write array entries (may contain nil). */ 92 if (narray) { /* Write array entries (may contain nil). */
133 MSize i; 93 MSize i;
134 TValue *o = tvref(t->array); 94 TValue *o = tvref(t->array);
@@ -155,6 +115,7 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt)
155 for (i = 0; i < sizekgc; i++, kr++) { 115 for (i = 0; i < sizekgc; i++, kr++) {
156 GCobj *o = gcref(*kr); 116 GCobj *o = gcref(*kr);
157 MSize tp, need = 1; 117 MSize tp, need = 1;
118 char *p;
158 /* Determine constant type and needed size. */ 119 /* Determine constant type and needed size. */
159 if (o->gch.gct == ~LJ_TSTR) { 120 if (o->gch.gct == ~LJ_TSTR) {
160 tp = BCDUMP_KGC_STR + gco2str(o)->len; 121 tp = BCDUMP_KGC_STR + gco2str(o)->len;
@@ -181,24 +142,26 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt)
181 need = 1+2*5; 142 need = 1+2*5;
182 } 143 }
183 /* Write constant type. */ 144 /* Write constant type. */
184 bcwrite_need(ctx, need); 145 p = lj_buf_more(&ctx->sb, need);
185 bcwrite_uleb128(ctx, tp); 146 p = lj_strfmt_wuleb128(p, tp);
186 /* Write constant data (if any). */ 147 /* Write constant data (if any). */
187 if (tp >= BCDUMP_KGC_STR) { 148 if (tp >= BCDUMP_KGC_STR) {
188 bcwrite_block(ctx, strdata(gco2str(o)), gco2str(o)->len); 149 p = lj_buf_wmem(p, strdata(gco2str(o)), gco2str(o)->len);
189 } else if (tp == BCDUMP_KGC_TAB) { 150 } else if (tp == BCDUMP_KGC_TAB) {
190 bcwrite_ktab(ctx, gco2tab(o)); 151 bcwrite_ktab(ctx, p, gco2tab(o));
152 continue;
191#if LJ_HASFFI 153#if LJ_HASFFI
192 } else if (tp != BCDUMP_KGC_CHILD) { 154 } else if (tp != BCDUMP_KGC_CHILD) {
193 cTValue *p = (TValue *)cdataptr(gco2cd(o)); 155 cTValue *q = (TValue *)cdataptr(gco2cd(o));
194 bcwrite_uleb128(ctx, p[0].u32.lo); 156 p = lj_strfmt_wuleb128(p, q[0].u32.lo);
195 bcwrite_uleb128(ctx, p[0].u32.hi); 157 p = lj_strfmt_wuleb128(p, q[0].u32.hi);
196 if (tp == BCDUMP_KGC_COMPLEX) { 158 if (tp == BCDUMP_KGC_COMPLEX) {
197 bcwrite_uleb128(ctx, p[1].u32.lo); 159 p = lj_strfmt_wuleb128(p, q[1].u32.lo);
198 bcwrite_uleb128(ctx, p[1].u32.hi); 160 p = lj_strfmt_wuleb128(p, q[1].u32.hi);
199 } 161 }
200#endif 162#endif
201 } 163 }
164 setsbufP(&ctx->sb, p);
202 } 165 }
203} 166}
204 167
@@ -207,7 +170,7 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt)
207{ 170{
208 MSize i, sizekn = pt->sizekn; 171 MSize i, sizekn = pt->sizekn;
209 cTValue *o = mref(pt->k, TValue); 172 cTValue *o = mref(pt->k, TValue);
210 bcwrite_need(ctx, 10*sizekn); 173 char *p = lj_buf_more(&ctx->sb, 10*sizekn);
211 for (i = 0; i < sizekn; i++, o++) { 174 for (i = 0; i < sizekn; i++, o++) {
212 int32_t k; 175 int32_t k;
213 if (tvisint(o)) { 176 if (tvisint(o)) {
@@ -220,55 +183,55 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt)
220 k = lj_num2int(num); 183 k = lj_num2int(num);
221 if (num == (lua_Number)k) { /* -0 is never a constant. */ 184 if (num == (lua_Number)k) { /* -0 is never a constant. */
222 save_int: 185 save_int:
223 bcwrite_uleb128(ctx, 2*(uint32_t)k | ((uint32_t)k & 0x80000000u)); 186 p = lj_strfmt_wuleb128(p, 2*(uint32_t)k | ((uint32_t)k&0x80000000u));
224 if (k < 0) { 187 if (k < 0)
225 char *p = &ctx->sb.buf[ctx->sb.n-1]; 188 p[-1] = (p[-1] & 7) | ((k>>27) & 0x18);
226 *p = (*p & 7) | ((k>>27) & 0x18);
227 }
228 continue; 189 continue;
229 } 190 }
230 } 191 }
231 bcwrite_uleb128(ctx, 1+(2*o->u32.lo | (o->u32.lo & 0x80000000u))); 192 p = lj_strfmt_wuleb128(p, 1+(2*o->u32.lo | (o->u32.lo & 0x80000000u)));
232 if (o->u32.lo >= 0x80000000u) { 193 if (o->u32.lo >= 0x80000000u)
233 char *p = &ctx->sb.buf[ctx->sb.n-1]; 194 p[-1] = (p[-1] & 7) | ((o->u32.lo>>27) & 0x18);
234 *p = (*p & 7) | ((o->u32.lo>>27) & 0x18); 195 p = lj_strfmt_wuleb128(p, o->u32.hi);
235 }
236 bcwrite_uleb128(ctx, o->u32.hi);
237 } 196 }
238 } 197 }
198 setsbufP(&ctx->sb, p);
239} 199}
240 200
241/* Write bytecode instructions. */ 201/* Write bytecode instructions. */
242static void bcwrite_bytecode(BCWriteCtx *ctx, GCproto *pt) 202static char *bcwrite_bytecode(BCWriteCtx *ctx, char *p, GCproto *pt)
243{ 203{
244 MSize nbc = pt->sizebc-1; /* Omit the [JI]FUNC* header. */ 204 MSize nbc = pt->sizebc-1; /* Omit the [JI]FUNC* header. */
245#if LJ_HASJIT 205#if LJ_HASJIT
246 uint8_t *p = (uint8_t *)&ctx->sb.buf[ctx->sb.n]; 206 uint8_t *q = (uint8_t *)p;
247#endif 207#endif
248 bcwrite_block(ctx, proto_bc(pt)+1, nbc*(MSize)sizeof(BCIns)); 208 p = lj_buf_wmem(p, proto_bc(pt)+1, nbc*(MSize)sizeof(BCIns));
209 UNUSED(ctx);
249#if LJ_HASJIT 210#if LJ_HASJIT
250 /* Unpatch modified bytecode containing ILOOP/JLOOP etc. */ 211 /* Unpatch modified bytecode containing ILOOP/JLOOP etc. */
251 if ((pt->flags & PROTO_ILOOP) || pt->trace) { 212 if ((pt->flags & PROTO_ILOOP) || pt->trace) {
252 jit_State *J = L2J(ctx->L); 213 jit_State *J = L2J(sbufL(&ctx->sb));
253 MSize i; 214 MSize i;
254 for (i = 0; i < nbc; i++, p += sizeof(BCIns)) { 215 for (i = 0; i < nbc; i++, q += sizeof(BCIns)) {
255 BCOp op = (BCOp)p[LJ_ENDIAN_SELECT(0, 3)]; 216 BCOp op = (BCOp)q[LJ_ENDIAN_SELECT(0, 3)];
256 if (op == BC_IFORL || op == BC_IITERL || op == BC_ILOOP || 217 if (op == BC_IFORL || op == BC_IITERL || op == BC_ILOOP ||
257 op == BC_JFORI) { 218 op == BC_JFORI) {
258 p[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_IFORL+BC_FORL); 219 q[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_IFORL+BC_FORL);
259 } else if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) { 220 } else if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) {
260 BCReg rd = p[LJ_ENDIAN_SELECT(2, 1)] + (p[LJ_ENDIAN_SELECT(3, 0)] << 8); 221 BCReg rd = q[LJ_ENDIAN_SELECT(2, 1)] + (q[LJ_ENDIAN_SELECT(3, 0)] << 8);
261 memcpy(p, &traceref(J, rd)->startins, 4); 222 memcpy(q, &traceref(J, rd)->startins, 4);
262 } 223 }
263 } 224 }
264 } 225 }
265#endif 226#endif
227 return p;
266} 228}
267 229
268/* Write prototype. */ 230/* Write prototype. */
269static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt) 231static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
270{ 232{
271 MSize sizedbg = 0; 233 MSize sizedbg = 0;
234 char *p;
272 235
273 /* Recursively write children of prototype. */ 236 /* Recursively write children of prototype. */
274 if ((pt->flags & PROTO_CHILD)) { 237 if ((pt->flags & PROTO_CHILD)) {
@@ -282,31 +245,32 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
282 } 245 }
283 246
284 /* Start writing the prototype info to a buffer. */ 247 /* Start writing the prototype info to a buffer. */
285 lj_str_resetbuf(&ctx->sb); 248 p = lj_buf_need(&ctx->sb,
286 ctx->sb.n = 5; /* Leave room for final size. */ 249 5+4+6*5+(pt->sizebc-1)*(MSize)sizeof(BCIns)+pt->sizeuv*2);
287 bcwrite_need(ctx, 4+6*5+(pt->sizebc-1)*(MSize)sizeof(BCIns)+pt->sizeuv*2); 250 p += 5; /* Leave room for final size. */
288 251
289 /* Write prototype header. */ 252 /* Write prototype header. */
290 bcwrite_byte(ctx, (pt->flags & (PROTO_CHILD|PROTO_VARARG|PROTO_FFI))); 253 *p++ = (pt->flags & (PROTO_CHILD|PROTO_VARARG|PROTO_FFI));
291 bcwrite_byte(ctx, pt->numparams); 254 *p++ = pt->numparams;
292 bcwrite_byte(ctx, pt->framesize); 255 *p++ = pt->framesize;
293 bcwrite_byte(ctx, pt->sizeuv); 256 *p++ = pt->sizeuv;
294 bcwrite_uleb128(ctx, pt->sizekgc); 257 p = lj_strfmt_wuleb128(p, pt->sizekgc);
295 bcwrite_uleb128(ctx, pt->sizekn); 258 p = lj_strfmt_wuleb128(p, pt->sizekn);
296 bcwrite_uleb128(ctx, pt->sizebc-1); 259 p = lj_strfmt_wuleb128(p, pt->sizebc-1);
297 if (!ctx->strip) { 260 if (!ctx->strip) {
298 if (proto_lineinfo(pt)) 261 if (proto_lineinfo(pt))
299 sizedbg = pt->sizept - (MSize)((char *)proto_lineinfo(pt) - (char *)pt); 262 sizedbg = pt->sizept - (MSize)((char *)proto_lineinfo(pt) - (char *)pt);
300 bcwrite_uleb128(ctx, sizedbg); 263 p = lj_strfmt_wuleb128(p, sizedbg);
301 if (sizedbg) { 264 if (sizedbg) {
302 bcwrite_uleb128(ctx, pt->firstline); 265 p = lj_strfmt_wuleb128(p, pt->firstline);
303 bcwrite_uleb128(ctx, pt->numline); 266 p = lj_strfmt_wuleb128(p, pt->numline);
304 } 267 }
305 } 268 }
306 269
307 /* Write bytecode instructions and upvalue refs. */ 270 /* Write bytecode instructions and upvalue refs. */
308 bcwrite_bytecode(ctx, pt); 271 p = bcwrite_bytecode(ctx, p, pt);
309 bcwrite_block(ctx, proto_uv(pt), pt->sizeuv*2); 272 p = lj_buf_wmem(p, proto_uv(pt), pt->sizeuv*2);
273 setsbufP(&ctx->sb, p);
310 274
311 /* Write constants. */ 275 /* Write constants. */
312 bcwrite_kgc(ctx, pt); 276 bcwrite_kgc(ctx, pt);
@@ -314,18 +278,19 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
314 278
315 /* Write debug info, if not stripped. */ 279 /* Write debug info, if not stripped. */
316 if (sizedbg) { 280 if (sizedbg) {
317 bcwrite_need(ctx, sizedbg); 281 p = lj_buf_more(&ctx->sb, sizedbg);
318 bcwrite_block(ctx, proto_lineinfo(pt), sizedbg); 282 p = lj_buf_wmem(p, proto_lineinfo(pt), sizedbg);
283 setsbufP(&ctx->sb, p);
319 } 284 }
320 285
321 /* Pass buffer to writer function. */ 286 /* Pass buffer to writer function. */
322 if (ctx->status == 0) { 287 if (ctx->status == 0) {
323 MSize n = ctx->sb.n - 5; 288 MSize n = sbuflen(&ctx->sb) - 5;
324 MSize nn = (lj_fls(n)+8)*9 >> 6; 289 MSize nn = (lj_fls(n)+8)*9 >> 6;
325 ctx->sb.n = 5 - nn; 290 char *q = sbufB(&ctx->sb) + (5 - nn);
326 bcwrite_uleb128(ctx, n); /* Fill in final size. */ 291 p = lj_strfmt_wuleb128(q, n); /* Fill in final size. */
327 lua_assert(ctx->sb.n == 5); 292 lua_assert(p == sbufB(&ctx->sb) + 5);
328 ctx->status = ctx->wfunc(ctx->L, ctx->sb.buf+5-nn, nn+n, ctx->wdata); 293 ctx->status = ctx->wfunc(sbufL(&ctx->sb), q, nn+n, ctx->wdata);
329 } 294 }
330} 295}
331 296
@@ -335,20 +300,21 @@ static void bcwrite_header(BCWriteCtx *ctx)
335 GCstr *chunkname = proto_chunkname(ctx->pt); 300 GCstr *chunkname = proto_chunkname(ctx->pt);
336 const char *name = strdata(chunkname); 301 const char *name = strdata(chunkname);
337 MSize len = chunkname->len; 302 MSize len = chunkname->len;
338 lj_str_resetbuf(&ctx->sb); 303 char *p = lj_buf_need(&ctx->sb, 5+5+len);
339 bcwrite_need(ctx, 5+5+len); 304 *p++ = BCDUMP_HEAD1;
340 bcwrite_byte(ctx, BCDUMP_HEAD1); 305 *p++ = BCDUMP_HEAD2;
341 bcwrite_byte(ctx, BCDUMP_HEAD2); 306 *p++ = BCDUMP_HEAD3;
342 bcwrite_byte(ctx, BCDUMP_HEAD3); 307 *p++ = BCDUMP_VERSION;
343 bcwrite_byte(ctx, BCDUMP_VERSION); 308 *p++ = (ctx->strip ? BCDUMP_F_STRIP : 0) +
344 bcwrite_byte(ctx, (ctx->strip ? BCDUMP_F_STRIP : 0) + 309 LJ_BE*BCDUMP_F_BE +
345 (LJ_BE ? BCDUMP_F_BE : 0) + 310 ((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0) +
346 ((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0)); 311 LJ_FR2*BCDUMP_F_FR2;
347 if (!ctx->strip) { 312 if (!ctx->strip) {
348 bcwrite_uleb128(ctx, len); 313 p = lj_strfmt_wuleb128(p, len);
349 bcwrite_block(ctx, name, len); 314 p = lj_buf_wmem(p, name, len);
350 } 315 }
351 ctx->status = ctx->wfunc(ctx->L, ctx->sb.buf, ctx->sb.n, ctx->wdata); 316 ctx->status = ctx->wfunc(sbufL(&ctx->sb), sbufB(&ctx->sb),
317 (MSize)(p - sbufB(&ctx->sb)), ctx->wdata);
352} 318}
353 319
354/* Write footer of bytecode dump. */ 320/* Write footer of bytecode dump. */
@@ -356,7 +322,7 @@ static void bcwrite_footer(BCWriteCtx *ctx)
356{ 322{
357 if (ctx->status == 0) { 323 if (ctx->status == 0) {
358 uint8_t zero = 0; 324 uint8_t zero = 0;
359 ctx->status = ctx->wfunc(ctx->L, &zero, 1, ctx->wdata); 325 ctx->status = ctx->wfunc(sbufL(&ctx->sb), &zero, 1, ctx->wdata);
360 } 326 }
361} 327}
362 328
@@ -364,8 +330,8 @@ static void bcwrite_footer(BCWriteCtx *ctx)
364static TValue *cpwriter(lua_State *L, lua_CFunction dummy, void *ud) 330static TValue *cpwriter(lua_State *L, lua_CFunction dummy, void *ud)
365{ 331{
366 BCWriteCtx *ctx = (BCWriteCtx *)ud; 332 BCWriteCtx *ctx = (BCWriteCtx *)ud;
367 UNUSED(dummy); 333 UNUSED(L); UNUSED(dummy);
368 lj_str_resizebuf(L, &ctx->sb, 1024); /* Avoids resize for most prototypes. */ 334 lj_buf_need(&ctx->sb, 1024); /* Avoids resize for most prototypes. */
369 bcwrite_header(ctx); 335 bcwrite_header(ctx);
370 bcwrite_proto(ctx, ctx->pt); 336 bcwrite_proto(ctx, ctx->pt);
371 bcwrite_footer(ctx); 337 bcwrite_footer(ctx);
@@ -378,16 +344,15 @@ int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, void *data,
378{ 344{
379 BCWriteCtx ctx; 345 BCWriteCtx ctx;
380 int status; 346 int status;
381 ctx.L = L;
382 ctx.pt = pt; 347 ctx.pt = pt;
383 ctx.wfunc = writer; 348 ctx.wfunc = writer;
384 ctx.wdata = data; 349 ctx.wdata = data;
385 ctx.strip = strip; 350 ctx.strip = strip;
386 ctx.status = 0; 351 ctx.status = 0;
387 lj_str_initbuf(&ctx.sb); 352 lj_buf_init(L, &ctx.sb);
388 status = lj_vm_cpcall(L, NULL, &ctx, cpwriter); 353 status = lj_vm_cpcall(L, NULL, &ctx, cpwriter);
389 if (status == 0) status = ctx.status; 354 if (status == 0) status = ctx.status;
390 lj_str_freebuf(G(ctx.L), &ctx.sb); 355 lj_buf_free(G(sbufL(&ctx.sb)), &ctx.sb);
391 return status; 356 return status;
392} 357}
393 358
diff --git a/src/lj_buf.c b/src/lj_buf.c
new file mode 100644
index 00000000..c8778016
--- /dev/null
+++ b/src/lj_buf.c
@@ -0,0 +1,232 @@
1/*
2** Buffer handling.
3** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lj_buf_c
7#define LUA_CORE
8
9#include "lj_obj.h"
10#include "lj_gc.h"
11#include "lj_err.h"
12#include "lj_buf.h"
13#include "lj_str.h"
14#include "lj_tab.h"
15#include "lj_strfmt.h"
16
17/* -- Buffer management --------------------------------------------------- */
18
19static void buf_grow(SBuf *sb, MSize sz)
20{
21 MSize osz = sbufsz(sb), len = sbuflen(sb), nsz = osz;
22 char *b;
23 if (nsz < LJ_MIN_SBUF) nsz = LJ_MIN_SBUF;
24 while (nsz < sz) nsz += nsz;
25 b = (char *)lj_mem_realloc(sbufL(sb), sbufB(sb), osz, nsz);
26 setmref(sb->b, b);
27 setmref(sb->p, b + len);
28 setmref(sb->e, b + nsz);
29}
30
31LJ_NOINLINE char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz)
32{
33 lua_assert(sz > sbufsz(sb));
34 if (LJ_UNLIKELY(sz > LJ_MAX_BUF))
35 lj_err_mem(sbufL(sb));
36 buf_grow(sb, sz);
37 return sbufB(sb);
38}
39
40LJ_NOINLINE char *LJ_FASTCALL lj_buf_more2(SBuf *sb, MSize sz)
41{
42 MSize len = sbuflen(sb);
43 lua_assert(sz > sbufleft(sb));
44 if (LJ_UNLIKELY(sz > LJ_MAX_BUF || len + sz > LJ_MAX_BUF))
45 lj_err_mem(sbufL(sb));
46 buf_grow(sb, len + sz);
47 return sbufP(sb);
48}
49
50void LJ_FASTCALL lj_buf_shrink(lua_State *L, SBuf *sb)
51{
52 char *b = sbufB(sb);
53 MSize osz = (MSize)(sbufE(sb) - b);
54 if (osz > 2*LJ_MIN_SBUF) {
55 MSize n = (MSize)(sbufP(sb) - b);
56 b = lj_mem_realloc(L, b, osz, (osz >> 1));
57 setmref(sb->b, b);
58 setmref(sb->p, b + n);
59 setmref(sb->e, b + (osz >> 1));
60 }
61}
62
63char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz)
64{
65 SBuf *sb = &G(L)->tmpbuf;
66 setsbufL(sb, L);
67 return lj_buf_need(sb, sz);
68}
69
70/* -- Low-level buffer put operations ------------------------------------- */
71
72SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len)
73{
74 char *p = lj_buf_more(sb, len);
75 p = lj_buf_wmem(p, q, len);
76 setsbufP(sb, p);
77 return sb;
78}
79
80SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c)
81{
82 char *p = lj_buf_more(sb, 1);
83 *p++ = (char)c;
84 setsbufP(sb, p);
85 return sb;
86}
87
88SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s)
89{
90 MSize len = s->len;
91 char *p = lj_buf_more(sb, len);
92 p = lj_buf_wmem(p, strdata(s), len);
93 setsbufP(sb, p);
94 return sb;
95}
96
97/* -- High-level buffer put operations ------------------------------------ */
98
99SBuf * LJ_FASTCALL lj_buf_putstr_reverse(SBuf *sb, GCstr *s)
100{
101 MSize len = s->len;
102 char *p = lj_buf_more(sb, len), *e = p+len;
103 const char *q = strdata(s)+len-1;
104 while (p < e)
105 *p++ = *q--;
106 setsbufP(sb, p);
107 return sb;
108}
109
110SBuf * LJ_FASTCALL lj_buf_putstr_lower(SBuf *sb, GCstr *s)
111{
112 MSize len = s->len;
113 char *p = lj_buf_more(sb, len), *e = p+len;
114 const char *q = strdata(s);
115 for (; p < e; p++, q++) {
116 uint32_t c = *(unsigned char *)q;
117#if LJ_TARGET_PPC
118 *p = c + ((c >= 'A' && c <= 'Z') << 5);
119#else
120 if (c >= 'A' && c <= 'Z') c += 0x20;
121 *p = c;
122#endif
123 }
124 setsbufP(sb, p);
125 return sb;
126}
127
128SBuf * LJ_FASTCALL lj_buf_putstr_upper(SBuf *sb, GCstr *s)
129{
130 MSize len = s->len;
131 char *p = lj_buf_more(sb, len), *e = p+len;
132 const char *q = strdata(s);
133 for (; p < e; p++, q++) {
134 uint32_t c = *(unsigned char *)q;
135#if LJ_TARGET_PPC
136 *p = c - ((c >= 'a' && c <= 'z') << 5);
137#else
138 if (c >= 'a' && c <= 'z') c -= 0x20;
139 *p = c;
140#endif
141 }
142 setsbufP(sb, p);
143 return sb;
144}
145
146SBuf *lj_buf_putstr_rep(SBuf *sb, GCstr *s, int32_t rep)
147{
148 MSize len = s->len;
149 if (rep > 0 && len) {
150 uint64_t tlen = (uint64_t)rep * len;
151 char *p;
152 if (LJ_UNLIKELY(tlen > LJ_MAX_STR))
153 lj_err_mem(sbufL(sb));
154 p = lj_buf_more(sb, (MSize)tlen);
155 if (len == 1) { /* Optimize a common case. */
156 uint32_t c = strdata(s)[0];
157 do { *p++ = c; } while (--rep > 0);
158 } else {
159 const char *e = strdata(s) + len;
160 do {
161 const char *q = strdata(s);
162 do { *p++ = *q++; } while (q < e);
163 } while (--rep > 0);
164 }
165 setsbufP(sb, p);
166 }
167 return sb;
168}
169
170SBuf *lj_buf_puttab(SBuf *sb, GCtab *t, GCstr *sep, int32_t i, int32_t e)
171{
172 MSize seplen = sep ? sep->len : 0;
173 if (i <= e) {
174 for (;;) {
175 cTValue *o = lj_tab_getint(t, i);
176 char *p;
177 if (!o) {
178 badtype: /* Error: bad element type. */
179 setsbufP(sb, (void *)(intptr_t)i); /* Store failing index. */
180 return NULL;
181 } else if (tvisstr(o)) {
182 MSize len = strV(o)->len;
183 p = lj_buf_wmem(lj_buf_more(sb, len + seplen), strVdata(o), len);
184 } else if (tvisint(o)) {
185 p = lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT+seplen), intV(o));
186 } else if (tvisnum(o)) {
187 p = lj_buf_more(lj_strfmt_putfnum(sb, STRFMT_G14, numV(o)), seplen);
188 } else {
189 goto badtype;
190 }
191 if (i++ == e) {
192 setsbufP(sb, p);
193 break;
194 }
195 if (seplen) p = lj_buf_wmem(p, strdata(sep), seplen);
196 setsbufP(sb, p);
197 }
198 }
199 return sb;
200}
201
202/* -- Miscellaneous buffer operations ------------------------------------- */
203
204GCstr * LJ_FASTCALL lj_buf_tostr(SBuf *sb)
205{
206 return lj_str_new(sbufL(sb), sbufB(sb), sbuflen(sb));
207}
208
209/* Concatenate two strings. */
210GCstr *lj_buf_cat2str(lua_State *L, GCstr *s1, GCstr *s2)
211{
212 MSize len1 = s1->len, len2 = s2->len;
213 char *buf = lj_buf_tmp(L, len1 + len2);
214 memcpy(buf, strdata(s1), len1);
215 memcpy(buf+len1, strdata(s2), len2);
216 return lj_str_new(L, buf, len1 + len2);
217}
218
219/* Read ULEB128 from buffer. */
220uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp)
221{
222 const uint8_t *p = (const uint8_t *)*pp;
223 uint32_t v = *p++;
224 if (LJ_UNLIKELY(v >= 0x80)) {
225 int sh = 0;
226 v &= 0x7f;
227 do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80);
228 }
229 *pp = (const char *)p;
230 return v;
231}
232
diff --git a/src/lj_buf.h b/src/lj_buf.h
new file mode 100644
index 00000000..dab13bd2
--- /dev/null
+++ b/src/lj_buf.h
@@ -0,0 +1,103 @@
1/*
2** Buffer handling.
3** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_BUF_H
7#define _LJ_BUF_H
8
9#include "lj_obj.h"
10#include "lj_gc.h"
11#include "lj_str.h"
12
13/* Resizable string buffers. Struct definition in lj_obj.h. */
14#define sbufB(sb) (mref((sb)->b, char))
15#define sbufP(sb) (mref((sb)->p, char))
16#define sbufE(sb) (mref((sb)->e, char))
17#define sbufL(sb) (mref((sb)->L, lua_State))
18#define sbufsz(sb) ((MSize)(sbufE((sb)) - sbufB((sb))))
19#define sbuflen(sb) ((MSize)(sbufP((sb)) - sbufB((sb))))
20#define sbufleft(sb) ((MSize)(sbufE((sb)) - sbufP((sb))))
21#define setsbufP(sb, q) (setmref((sb)->p, (q)))
22#define setsbufL(sb, l) (setmref((sb)->L, (l)))
23
24/* Buffer management */
25LJ_FUNC char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz);
26LJ_FUNC char *LJ_FASTCALL lj_buf_more2(SBuf *sb, MSize sz);
27LJ_FUNC void LJ_FASTCALL lj_buf_shrink(lua_State *L, SBuf *sb);
28LJ_FUNC char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz);
29
30static LJ_AINLINE void lj_buf_init(lua_State *L, SBuf *sb)
31{
32 setsbufL(sb, L);
33 setmref(sb->p, NULL); setmref(sb->e, NULL); setmref(sb->b, NULL);
34}
35
36static LJ_AINLINE void lj_buf_reset(SBuf *sb)
37{
38 setmrefr(sb->p, sb->b);
39}
40
41static LJ_AINLINE SBuf *lj_buf_tmp_(lua_State *L)
42{
43 SBuf *sb = &G(L)->tmpbuf;
44 setsbufL(sb, L);
45 lj_buf_reset(sb);
46 return sb;
47}
48
49static LJ_AINLINE void lj_buf_free(global_State *g, SBuf *sb)
50{
51 lj_mem_free(g, sbufB(sb), sbufsz(sb));
52}
53
54static LJ_AINLINE char *lj_buf_need(SBuf *sb, MSize sz)
55{
56 if (LJ_UNLIKELY(sz > sbufsz(sb)))
57 return lj_buf_need2(sb, sz);
58 return sbufB(sb);
59}
60
61static LJ_AINLINE char *lj_buf_more(SBuf *sb, MSize sz)
62{
63 if (LJ_UNLIKELY(sz > sbufleft(sb)))
64 return lj_buf_more2(sb, sz);
65 return sbufP(sb);
66}
67
68/* Low-level buffer put operations */
69LJ_FUNC SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len);
70LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c);
71LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s);
72
73static LJ_AINLINE char *lj_buf_wmem(char *p, const void *q, MSize len)
74{
75 return (char *)memcpy(p, q, len) + len;
76}
77
78static LJ_AINLINE void lj_buf_putb(SBuf *sb, int c)
79{
80 char *p = lj_buf_more(sb, 1);
81 *p++ = (char)c;
82 setsbufP(sb, p);
83}
84
85/* High-level buffer put operations */
86LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_reverse(SBuf *sb, GCstr *s);
87LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_lower(SBuf *sb, GCstr *s);
88LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_upper(SBuf *sb, GCstr *s);
89LJ_FUNC SBuf *lj_buf_putstr_rep(SBuf *sb, GCstr *s, int32_t rep);
90LJ_FUNC SBuf *lj_buf_puttab(SBuf *sb, GCtab *t, GCstr *sep,
91 int32_t i, int32_t e);
92
93/* Miscellaneous buffer operations */
94LJ_FUNCA GCstr * LJ_FASTCALL lj_buf_tostr(SBuf *sb);
95LJ_FUNC GCstr *lj_buf_cat2str(lua_State *L, GCstr *s1, GCstr *s2);
96LJ_FUNC uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp);
97
98static LJ_AINLINE GCstr *lj_buf_str(lua_State *L, SBuf *sb)
99{
100 return lj_str_new(L, sbufB(sb), sbuflen(sb));
101}
102
103#endif
diff --git a/src/lj_carith.c b/src/lj_carith.c
index b33b1f36..cb7e8db6 100644
--- a/src/lj_carith.c
+++ b/src/lj_carith.c
@@ -11,10 +11,12 @@
11#include "lj_err.h" 11#include "lj_err.h"
12#include "lj_tab.h" 12#include "lj_tab.h"
13#include "lj_meta.h" 13#include "lj_meta.h"
14#include "lj_ir.h"
14#include "lj_ctype.h" 15#include "lj_ctype.h"
15#include "lj_cconv.h" 16#include "lj_cconv.h"
16#include "lj_cdata.h" 17#include "lj_cdata.h"
17#include "lj_carith.h" 18#include "lj_carith.h"
19#include "lj_strscan.h"
18 20
19/* -- C data arithmetic --------------------------------------------------- */ 21/* -- C data arithmetic --------------------------------------------------- */
20 22
@@ -281,6 +283,79 @@ int lj_carith_len(lua_State *L)
281 return lj_carith_meta(L, cts, &ca, MM_len); 283 return lj_carith_meta(L, cts, &ca, MM_len);
282} 284}
283 285
286/* -- 64 bit bit operations helpers --------------------------------------- */
287
288#if LJ_64
289#define B64DEF(name) \
290 static LJ_AINLINE uint64_t lj_carith_##name(uint64_t x, int32_t sh)
291#else
292/* Not inlined on 32 bit archs, since some of these are quite lengthy. */
293#define B64DEF(name) \
294 uint64_t LJ_NOINLINE lj_carith_##name(uint64_t x, int32_t sh)
295#endif
296
297B64DEF(shl64) { return x << (sh&63); }
298B64DEF(shr64) { return x >> (sh&63); }
299B64DEF(sar64) { return (uint64_t)((int64_t)x >> (sh&63)); }
300B64DEF(rol64) { return lj_rol(x, (sh&63)); }
301B64DEF(ror64) { return lj_ror(x, (sh&63)); }
302
303#undef B64DEF
304
305uint64_t lj_carith_shift64(uint64_t x, int32_t sh, int op)
306{
307 switch (op) {
308 case IR_BSHL-IR_BSHL: x = lj_carith_shl64(x, sh); break;
309 case IR_BSHR-IR_BSHL: x = lj_carith_shr64(x, sh); break;
310 case IR_BSAR-IR_BSHL: x = lj_carith_sar64(x, sh); break;
311 case IR_BROL-IR_BSHL: x = lj_carith_rol64(x, sh); break;
312 case IR_BROR-IR_BSHL: x = lj_carith_ror64(x, sh); break;
313 default: lua_assert(0); break;
314 }
315 return x;
316}
317
318/* Equivalent to lj_lib_checkbit(), but handles cdata. */
319uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id)
320{
321 TValue *o = L->base + narg-1;
322 if (o >= L->top) {
323 err:
324 lj_err_argt(L, narg, LUA_TNUMBER);
325 } else if (LJ_LIKELY(tvisnumber(o))) {
326 /* Handled below. */
327 } else if (tviscdata(o)) {
328 CTState *cts = ctype_cts(L);
329 uint8_t *sp = (uint8_t *)cdataptr(cdataV(o));
330 CTypeID sid = cdataV(o)->ctypeid;
331 CType *s = ctype_get(cts, sid);
332 uint64_t x;
333 if (ctype_isref(s->info)) {
334 sp = *(void **)sp;
335 sid = ctype_cid(s->info);
336 }
337 s = ctype_raw(cts, sid);
338 if (ctype_isenum(s->info)) s = ctype_child(cts, s);
339 if ((s->info & (CTMASK_NUM|CTF_BOOL|CTF_FP|CTF_UNSIGNED)) ==
340 CTINFO(CT_NUM, CTF_UNSIGNED) && s->size == 8)
341 *id = CTID_UINT64; /* Use uint64_t, since it has the highest rank. */
342 else if (!*id)
343 *id = CTID_INT64; /* Use int64_t, unless already set. */
344 lj_cconv_ct_ct(cts, ctype_get(cts, *id), s,
345 (uint8_t *)&x, sp, CCF_ARG(narg));
346 return x;
347 } else if (!(tvisstr(o) && lj_strscan_number(strV(o), o))) {
348 goto err;
349 }
350 if (LJ_LIKELY(tvisint(o))) {
351 return (uint32_t)intV(o);
352 } else {
353 int32_t i = lj_num2bit(numV(o));
354 if (LJ_DUALNUM) setintV(o, i);
355 return (uint32_t)i;
356 }
357}
358
284/* -- 64 bit integer arithmetic helpers ----------------------------------- */ 359/* -- 64 bit integer arithmetic helpers ----------------------------------- */
285 360
286#if LJ_32 && LJ_HASJIT 361#if LJ_32 && LJ_HASJIT
diff --git a/src/lj_carith.h b/src/lj_carith.h
index bac3e1a4..af6225ae 100644
--- a/src/lj_carith.h
+++ b/src/lj_carith.h
@@ -13,6 +13,16 @@
13LJ_FUNC int lj_carith_op(lua_State *L, MMS mm); 13LJ_FUNC int lj_carith_op(lua_State *L, MMS mm);
14LJ_FUNC int lj_carith_len(lua_State *L); 14LJ_FUNC int lj_carith_len(lua_State *L);
15 15
16#if LJ_32
17LJ_FUNC uint64_t lj_carith_shl64(uint64_t x, int32_t sh);
18LJ_FUNC uint64_t lj_carith_shr64(uint64_t x, int32_t sh);
19LJ_FUNC uint64_t lj_carith_sar64(uint64_t x, int32_t sh);
20LJ_FUNC uint64_t lj_carith_rol64(uint64_t x, int32_t sh);
21LJ_FUNC uint64_t lj_carith_ror64(uint64_t x, int32_t sh);
22#endif
23LJ_FUNC uint64_t lj_carith_shift64(uint64_t x, int32_t sh, int op);
24LJ_FUNC uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id);
25
16#if LJ_32 && LJ_HASJIT 26#if LJ_32 && LJ_HASJIT
17LJ_FUNC int64_t lj_carith_mul64(int64_t x, int64_t k); 27LJ_FUNC int64_t lj_carith_mul64(int64_t x, int64_t k);
18#endif 28#endif
diff --git a/src/lj_ccall.c b/src/lj_ccall.c
index fe1e0a3a..a9b81aa5 100644
--- a/src/lj_ccall.c
+++ b/src/lj_ccall.c
@@ -9,7 +9,6 @@
9 9
10#include "lj_gc.h" 10#include "lj_gc.h"
11#include "lj_err.h" 11#include "lj_err.h"
12#include "lj_str.h"
13#include "lj_tab.h" 12#include "lj_tab.h"
14#include "lj_ctype.h" 13#include "lj_ctype.h"
15#include "lj_cconv.h" 14#include "lj_cconv.h"
@@ -291,56 +290,84 @@
291#define CCALL_HANDLE_RET \ 290#define CCALL_HANDLE_RET \
292 if ((ct->info & CTF_VARARG)) sp = (uint8_t *)&cc->gpr[0]; 291 if ((ct->info & CTF_VARARG)) sp = (uint8_t *)&cc->gpr[0];
293 292
294#elif LJ_TARGET_PPC 293#elif LJ_TARGET_ARM64
295/* -- PPC calling conventions --------------------------------------------- */ 294/* -- ARM64 calling conventions ------------------------------------------- */
296 295
297#define CCALL_HANDLE_STRUCTRET \ 296#define CCALL_HANDLE_STRUCTRET \
298 cc->retref = 1; /* Return all structs by reference. */ \ 297 cc->retref = !ccall_classify_struct(cts, ctr); \
299 cc->gpr[ngpr++] = (GPRArg)dp; 298 if (cc->retref) cc->retp = dp;
299
300#define CCALL_HANDLE_STRUCTRET2 \
301 unsigned int cl = ccall_classify_struct(cts, ctr); \
302 if ((cl & 4)) { /* Combine float HFA from separate registers. */ \
303 CTSize i = (cl >> 8) - 1; \
304 do { ((uint32_t *)dp)[i] = cc->fpr[i].lo; } while (i--); \
305 } else { \
306 if (cl > 1) sp = (uint8_t *)&cc->fpr[0]; \
307 memcpy(dp, sp, ctr->size); \
308 }
300 309
301#define CCALL_HANDLE_COMPLEXRET \ 310#define CCALL_HANDLE_COMPLEXRET \
302 /* Complex values are returned in 2 or 4 GPRs. */ \ 311 /* Complex values are returned in one or two FPRs. */ \
303 cc->retref = 0; 312 cc->retref = 0;
304 313
305#define CCALL_HANDLE_COMPLEXRET2 \ 314#define CCALL_HANDLE_COMPLEXRET2 \
306 memcpy(dp, sp, ctr->size); /* Copy complex from GPRs. */ 315 if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \
316 ((float *)dp)[0] = cc->fpr[0].f; \
317 ((float *)dp)[1] = cc->fpr[1].f; \
318 } else { /* Copy complex double from FPRs. */ \
319 ((double *)dp)[0] = cc->fpr[0].d; \
320 ((double *)dp)[1] = cc->fpr[1].d; \
321 }
307 322
308#define CCALL_HANDLE_STRUCTARG \ 323#define CCALL_HANDLE_STRUCTARG \
309 rp = cdataptr(lj_cdata_new(cts, did, sz)); \ 324 unsigned int cl = ccall_classify_struct(cts, d); \
310 sz = CTSIZE_PTR; /* Pass all structs by reference. */ 325 if (cl == 0) { /* Pass struct by reference. */ \
326 rp = cdataptr(lj_cdata_new(cts, did, sz)); \
327 sz = CTSIZE_PTR; \
328 } else if (cl > 1) { /* Pass struct in FPRs or on stack. */ \
329 isfp = (cl & 4) ? 2 : 1; \
330 } /* else: Pass struct in GPRs or on stack. */
311 331
312#define CCALL_HANDLE_COMPLEXARG \ 332#define CCALL_HANDLE_COMPLEXARG \
313 /* Pass complex by value in 2 or 4 GPRs. */ 333 /* Pass complex by value in separate (!) FPRs or on stack. */ \
334 isfp = sz == 2*sizeof(float) ? 2 : 1;
314 335
315#define CCALL_HANDLE_REGARG \ 336#define CCALL_HANDLE_REGARG \
316 if (isfp) { /* Try to pass argument in FPRs. */ \ 337 if (LJ_TARGET_IOS && isva) { \
317 if (nfpr + 1 <= CCALL_NARG_FPR) { \ 338 /* IOS: All variadic arguments are on the stack. */ \
339 } else if (isfp) { /* Try to pass argument in FPRs. */ \
340 int n2 = ctype_isvector(d->info) ? 1 : n*isfp; \
341 if (nfpr + n2 <= CCALL_NARG_FPR) { \
318 dp = &cc->fpr[nfpr]; \ 342 dp = &cc->fpr[nfpr]; \
319 nfpr += 1; \ 343 nfpr += n2; \
320 d = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \
321 goto done; \ 344 goto done; \
345 } else { \
346 nfpr = CCALL_NARG_FPR; /* Prevent reordering. */ \
347 if (LJ_TARGET_IOS && d->size < 8) goto err_nyi; \
322 } \ 348 } \
323 } else { /* Try to pass argument in GPRs. */ \ 349 } else { /* Try to pass argument in GPRs. */ \
324 if (n > 1) { \ 350 if (!LJ_TARGET_IOS && (d->info & CTF_ALIGN) > CTALIGN_PTR) \
325 lua_assert(n == 2 || n == 4); /* int64_t or complex (float). */ \ 351 ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \
326 if (ctype_isinteger(d->info)) \
327 ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
328 else if (ngpr + n > maxgpr) \
329 ngpr = maxgpr; /* Prevent reordering. */ \
330 } \
331 if (ngpr + n <= maxgpr) { \ 352 if (ngpr + n <= maxgpr) { \
332 dp = &cc->gpr[ngpr]; \ 353 dp = &cc->gpr[ngpr]; \
333 ngpr += n; \ 354 ngpr += n; \
334 goto done; \ 355 goto done; \
356 } else { \
357 ngpr = maxgpr; /* Prevent reordering. */ \
358 if (LJ_TARGET_IOS && d->size < 8) goto err_nyi; \
335 } \ 359 } \
336 } 360 }
337 361
362#if LJ_BE
338#define CCALL_HANDLE_RET \ 363#define CCALL_HANDLE_RET \
339 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ 364 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
340 ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ 365 sp = (uint8_t *)&cc->fpr[0].f;
366#endif
341 367
342#elif LJ_TARGET_PPCSPE 368
343/* -- PPC/SPE calling conventions ----------------------------------------- */ 369#elif LJ_TARGET_PPC
370/* -- PPC calling conventions --------------------------------------------- */
344 371
345#define CCALL_HANDLE_STRUCTRET \ 372#define CCALL_HANDLE_STRUCTRET \
346 cc->retref = 1; /* Return all structs by reference. */ \ 373 cc->retref = 1; /* Return all structs by reference. */ \
@@ -360,12 +387,12 @@
360#define CCALL_HANDLE_COMPLEXARG \ 387#define CCALL_HANDLE_COMPLEXARG \
361 /* Pass complex by value in 2 or 4 GPRs. */ 388 /* Pass complex by value in 2 or 4 GPRs. */
362 389
363/* PPC/SPE has a softfp ABI. */ 390#define CCALL_HANDLE_GPR \
364#define CCALL_HANDLE_REGARG \ 391 /* Try to pass argument in GPRs. */ \
365 if (n > 1) { /* Doesn't fit in a single GPR? */ \ 392 if (n > 1) { \
366 lua_assert(n == 2 || n == 4); /* int64_t, double or complex (float). */ \ 393 lua_assert(n == 2 || n == 4); /* int64_t or complex (float). */ \
367 if (n == 2) \ 394 if (ctype_isinteger(d->info) || ctype_isfp(d->info)) \
368 ngpr = (ngpr + 1u) & ~1u; /* Only align 64 bit value to regpair. */ \ 395 ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
369 else if (ngpr + n > maxgpr) \ 396 else if (ngpr + n > maxgpr) \
370 ngpr = maxgpr; /* Prevent reordering. */ \ 397 ngpr = maxgpr; /* Prevent reordering. */ \
371 } \ 398 } \
@@ -373,10 +400,32 @@
373 dp = &cc->gpr[ngpr]; \ 400 dp = &cc->gpr[ngpr]; \
374 ngpr += n; \ 401 ngpr += n; \
375 goto done; \ 402 goto done; \
403 } \
404
405#if LJ_ABI_SOFTFP
406#define CCALL_HANDLE_REGARG CCALL_HANDLE_GPR
407#else
408#define CCALL_HANDLE_REGARG \
409 if (isfp) { /* Try to pass argument in FPRs. */ \
410 if (nfpr + 1 <= CCALL_NARG_FPR) { \
411 dp = &cc->fpr[nfpr]; \
412 nfpr += 1; \
413 d = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \
414 goto done; \
415 } \
416 } else { \
417 CCALL_HANDLE_GPR \
376 } 418 }
419#endif
377 420
378#elif LJ_TARGET_MIPS 421#if !LJ_ABI_SOFTFP
379/* -- MIPS calling conventions -------------------------------------------- */ 422#define CCALL_HANDLE_RET \
423 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
424 ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */
425#endif
426
427#elif LJ_TARGET_MIPS32
428/* -- MIPS o32 calling conventions ---------------------------------------- */
380 429
381#define CCALL_HANDLE_STRUCTRET \ 430#define CCALL_HANDLE_STRUCTRET \
382 cc->retref = 1; /* Return all structs by reference. */ \ 431 cc->retref = 1; /* Return all structs by reference. */ \
@@ -386,6 +435,18 @@
386 /* Complex values are returned in 1 or 2 FPRs. */ \ 435 /* Complex values are returned in 1 or 2 FPRs. */ \
387 cc->retref = 0; 436 cc->retref = 0;
388 437
438#if LJ_ABI_SOFTFP
439#define CCALL_HANDLE_COMPLEXRET2 \
440 if (ctr->size == 2*sizeof(float)) { /* Copy complex float from GPRs. */ \
441 ((intptr_t *)dp)[0] = cc->gpr[0]; \
442 ((intptr_t *)dp)[1] = cc->gpr[1]; \
443 } else { /* Copy complex double from GPRs. */ \
444 ((intptr_t *)dp)[0] = cc->gpr[0]; \
445 ((intptr_t *)dp)[1] = cc->gpr[1]; \
446 ((intptr_t *)dp)[2] = cc->gpr[2]; \
447 ((intptr_t *)dp)[3] = cc->gpr[3]; \
448 }
449#else
389#define CCALL_HANDLE_COMPLEXRET2 \ 450#define CCALL_HANDLE_COMPLEXRET2 \
390 if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ 451 if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \
391 ((float *)dp)[0] = cc->fpr[0].f; \ 452 ((float *)dp)[0] = cc->fpr[0].f; \
@@ -394,6 +455,7 @@
394 ((double *)dp)[0] = cc->fpr[0].d; \ 455 ((double *)dp)[0] = cc->fpr[0].d; \
395 ((double *)dp)[1] = cc->fpr[1].d; \ 456 ((double *)dp)[1] = cc->fpr[1].d; \
396 } 457 }
458#endif
397 459
398#define CCALL_HANDLE_STRUCTARG \ 460#define CCALL_HANDLE_STRUCTARG \
399 /* Pass all structs by value in registers and/or on the stack. */ 461 /* Pass all structs by value in registers and/or on the stack. */
@@ -401,6 +463,22 @@
401#define CCALL_HANDLE_COMPLEXARG \ 463#define CCALL_HANDLE_COMPLEXARG \
402 /* Pass complex by value in 2 or 4 GPRs. */ 464 /* Pass complex by value in 2 or 4 GPRs. */
403 465
466#define CCALL_HANDLE_GPR \
467 if ((d->info & CTF_ALIGN) > CTALIGN_PTR) \
468 ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \
469 if (ngpr < maxgpr) { \
470 dp = &cc->gpr[ngpr]; \
471 if (ngpr + n > maxgpr) { \
472 nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \
473 if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \
474 ngpr = maxgpr; \
475 } else { \
476 ngpr += n; \
477 } \
478 goto done; \
479 }
480
481#if !LJ_ABI_SOFTFP /* MIPS32 hard-float */
404#define CCALL_HANDLE_REGARG \ 482#define CCALL_HANDLE_REGARG \
405 if (isfp && nfpr < CCALL_NARG_FPR && !(ct->info & CTF_VARARG)) { \ 483 if (isfp && nfpr < CCALL_NARG_FPR && !(ct->info & CTF_VARARG)) { \
406 /* Try to pass argument in FPRs. */ \ 484 /* Try to pass argument in FPRs. */ \
@@ -409,25 +487,91 @@
409 goto done; \ 487 goto done; \
410 } else { /* Try to pass argument in GPRs. */ \ 488 } else { /* Try to pass argument in GPRs. */ \
411 nfpr = CCALL_NARG_FPR; \ 489 nfpr = CCALL_NARG_FPR; \
412 if ((d->info & CTF_ALIGN) > CTALIGN_PTR) \ 490 CCALL_HANDLE_GPR \
413 ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ 491 }
414 if (ngpr < maxgpr) { \ 492#else /* MIPS32 soft-float */
415 dp = &cc->gpr[ngpr]; \ 493#define CCALL_HANDLE_REGARG CCALL_HANDLE_GPR
416 if (ngpr + n > maxgpr) { \ 494#endif
417 nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ 495
418 if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ 496#if !LJ_ABI_SOFTFP
419 ngpr = maxgpr; \ 497/* On MIPS64 soft-float, position of float return values is endian-dependant. */
420 } else { \ 498#define CCALL_HANDLE_RET \
421 ngpr += n; \ 499 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
422 } \ 500 sp = (uint8_t *)&cc->fpr[0].f;
423 goto done; \ 501#endif
424 } \ 502
503#elif LJ_TARGET_MIPS64
504/* -- MIPS n64 calling conventions ---------------------------------------- */
505
506#define CCALL_HANDLE_STRUCTRET \
507 cc->retref = !(sz <= 16); \
508 if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp;
509
510#define CCALL_HANDLE_STRUCTRET2 \
511 ccall_copy_struct(cc, ctr, dp, sp, ccall_classify_struct(cts, ctr, ct));
512
513#define CCALL_HANDLE_COMPLEXRET \
514 /* Complex values are returned in 1 or 2 FPRs. */ \
515 cc->retref = 0;
516
517#if LJ_ABI_SOFTFP /* MIPS64 soft-float */
518
519#define CCALL_HANDLE_COMPLEXRET2 \
520 if (ctr->size == 2*sizeof(float)) { /* Copy complex float from GPRs. */ \
521 ((intptr_t *)dp)[0] = cc->gpr[0]; \
522 } else { /* Copy complex double from GPRs. */ \
523 ((intptr_t *)dp)[0] = cc->gpr[0]; \
524 ((intptr_t *)dp)[1] = cc->gpr[1]; \
525 }
526
527#define CCALL_HANDLE_COMPLEXARG \
528 /* Pass complex by value in 2 or 4 GPRs. */
529
530/* Position of soft-float 'float' return value depends on endianess. */
531#define CCALL_HANDLE_RET \
532 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
533 sp = (uint8_t *)cc->gpr + LJ_ENDIAN_SELECT(0, 4);
534
535#else /* MIPS64 hard-float */
536
537#define CCALL_HANDLE_COMPLEXRET2 \
538 if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \
539 ((float *)dp)[0] = cc->fpr[0].f; \
540 ((float *)dp)[1] = cc->fpr[1].f; \
541 } else { /* Copy complex double from FPRs. */ \
542 ((double *)dp)[0] = cc->fpr[0].d; \
543 ((double *)dp)[1] = cc->fpr[1].d; \
544 }
545
546#define CCALL_HANDLE_COMPLEXARG \
547 if (sz == 2*sizeof(float)) { \
548 isfp = 2; \
549 if (ngpr < maxgpr) \
550 sz *= 2; \
425 } 551 }
426 552
427#define CCALL_HANDLE_RET \ 553#define CCALL_HANDLE_RET \
428 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ 554 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
429 sp = (uint8_t *)&cc->fpr[0].f; 555 sp = (uint8_t *)&cc->fpr[0].f;
430 556
557#endif
558
559#define CCALL_HANDLE_STRUCTARG \
560 /* Pass all structs by value in registers and/or on the stack. */
561
562#define CCALL_HANDLE_REGARG \
563 if (ngpr < maxgpr) { \
564 dp = &cc->gpr[ngpr]; \
565 if (ngpr + n > maxgpr) { \
566 nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \
567 if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \
568 ngpr = maxgpr; \
569 } else { \
570 ngpr += n; \
571 } \
572 goto done; \
573 }
574
431#else 575#else
432#error "Missing calling convention definitions for this architecture" 576#error "Missing calling convention definitions for this architecture"
433#endif 577#endif
@@ -621,6 +765,125 @@ noth: /* Not a homogeneous float/double aggregate. */
621 765
622#endif 766#endif
623 767
768/* -- ARM64 ABI struct classification ------------------------------------- */
769
770#if LJ_TARGET_ARM64
771
772/* Classify a struct based on its fields. */
773static unsigned int ccall_classify_struct(CTState *cts, CType *ct)
774{
775 CTSize sz = ct->size;
776 unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION);
777 while (ct->sib) {
778 CType *sct;
779 ct = ctype_get(cts, ct->sib);
780 if (ctype_isfield(ct->info)) {
781 sct = ctype_rawchild(cts, ct);
782 if (ctype_isfp(sct->info)) {
783 r |= sct->size;
784 if (!isu) n++; else if (n == 0) n = 1;
785 } else if (ctype_iscomplex(sct->info)) {
786 r |= (sct->size >> 1);
787 if (!isu) n += 2; else if (n < 2) n = 2;
788 } else if (ctype_isstruct(sct->info)) {
789 goto substruct;
790 } else {
791 goto noth;
792 }
793 } else if (ctype_isbitfield(ct->info)) {
794 goto noth;
795 } else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) {
796 sct = ctype_rawchild(cts, ct);
797 substruct:
798 if (sct->size > 0) {
799 unsigned int s = ccall_classify_struct(cts, sct);
800 if (s <= 1) goto noth;
801 r |= (s & 255);
802 if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8);
803 }
804 }
805 }
806 if ((r == 4 || r == 8) && n <= 4)
807 return r + (n << 8);
808noth: /* Not a homogeneous float/double aggregate. */
809 return (sz <= 16); /* Return structs of size <= 16 in GPRs. */
810}
811
812#endif
813
814/* -- MIPS64 ABI struct classification ---------------------------- */
815
816#if LJ_TARGET_MIPS64
817
818#define FTYPE_FLOAT 1
819#define FTYPE_DOUBLE 2
820
821/* Classify FP fields (max. 2) and their types. */
822static unsigned int ccall_classify_struct(CTState *cts, CType *ct, CType *ctf)
823{
824 int n = 0, ft = 0;
825 if ((ctf->info & CTF_VARARG) || (ct->info & CTF_UNION))
826 goto noth;
827 while (ct->sib) {
828 CType *sct;
829 ct = ctype_get(cts, ct->sib);
830 if (n == 2) {
831 goto noth;
832 } else if (ctype_isfield(ct->info)) {
833 sct = ctype_rawchild(cts, ct);
834 if (ctype_isfp(sct->info)) {
835 ft |= (sct->size == 4 ? FTYPE_FLOAT : FTYPE_DOUBLE) << 2*n;
836 n++;
837 } else {
838 goto noth;
839 }
840 } else if (ctype_isbitfield(ct->info) ||
841 ctype_isxattrib(ct->info, CTA_SUBTYPE)) {
842 goto noth;
843 }
844 }
845 if (n <= 2)
846 return ft;
847noth: /* Not a homogeneous float/double aggregate. */
848 return 0; /* Struct is in GPRs. */
849}
850
851static void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp,
852 int ft)
853{
854 if (LJ_ABI_SOFTFP ? ft :
855 ((ft & 3) == FTYPE_FLOAT || (ft >> 2) == FTYPE_FLOAT)) {
856 int i, ofs = 0;
857 for (i = 0; ft != 0; i++, ft >>= 2) {
858 if ((ft & 3) == FTYPE_FLOAT) {
859#if LJ_ABI_SOFTFP
860 /* The 2nd FP struct result is in CARG1 (gpr[2]) and not CRET2. */
861 memcpy((uint8_t *)dp + ofs,
862 (uint8_t *)&cc->gpr[2*i] + LJ_ENDIAN_SELECT(0, 4), 4);
863#else
864 *(float *)((uint8_t *)dp + ofs) = cc->fpr[i].f;
865#endif
866 ofs += 4;
867 } else {
868 ofs = (ofs + 7) & ~7; /* 64 bit alignment. */
869#if LJ_ABI_SOFTFP
870 *(intptr_t *)((uint8_t *)dp + ofs) = cc->gpr[2*i];
871#else
872 *(double *)((uint8_t *)dp + ofs) = cc->fpr[i].d;
873#endif
874 ofs += 8;
875 }
876 }
877 } else {
878#if !LJ_ABI_SOFTFP
879 if (ft) sp = (uint8_t *)&cc->fpr[0];
880#endif
881 memcpy(dp, sp, ctr->size);
882 }
883}
884
885#endif
886
624/* -- Common C call handling ---------------------------------------------- */ 887/* -- Common C call handling ---------------------------------------------- */
625 888
626/* Infer the destination CTypeID for a vararg argument. */ 889/* Infer the destination CTypeID for a vararg argument. */
@@ -788,6 +1051,19 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
788 *(int32_t *)dp = d->size == 1 ? (int32_t)*(int8_t *)dp : 1051 *(int32_t *)dp = d->size == 1 ? (int32_t)*(int8_t *)dp :
789 (int32_t)*(int16_t *)dp; 1052 (int32_t)*(int16_t *)dp;
790 } 1053 }
1054#if LJ_TARGET_ARM64 && LJ_BE
1055 if (isfp && d->size == sizeof(float))
1056 ((float *)dp)[1] = ((float *)dp)[0]; /* Floats occupy high slot. */
1057#endif
1058#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE)
1059 if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info)
1060#if LJ_TARGET_MIPS64
1061 || (isfp && nsp == 0)
1062#endif
1063 ) && d->size <= 4) {
1064 *(int64_t *)dp = (int64_t)*(int32_t *)dp; /* Sign-extend to 64 bit. */
1065 }
1066#endif
791#if LJ_TARGET_X64 && LJ_ABI_WIN 1067#if LJ_TARGET_X64 && LJ_ABI_WIN
792 if (isva) { /* Windows/x64 mirrors varargs in both register sets. */ 1068 if (isva) { /* Windows/x64 mirrors varargs in both register sets. */
793 if (nfpr == ngpr) 1069 if (nfpr == ngpr)
@@ -803,13 +1079,19 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
803 cc->fpr[nfpr-1].d[0] = cc->fpr[nfpr-2].d[1]; /* Split complex double. */ 1079 cc->fpr[nfpr-1].d[0] = cc->fpr[nfpr-2].d[1]; /* Split complex double. */
804 cc->fpr[nfpr-2].d[1] = 0; 1080 cc->fpr[nfpr-2].d[1] = 0;
805 } 1081 }
1082#elif LJ_TARGET_ARM64 || (LJ_TARGET_MIPS64 && !LJ_ABI_SOFTFP)
1083 if (isfp == 2 && (uint8_t *)dp < (uint8_t *)cc->stack) {
1084 /* Split float HFA or complex float into separate registers. */
1085 CTSize i = (sz >> 2) - 1;
1086 do { ((uint64_t *)dp)[i] = ((uint32_t *)dp)[i]; } while (i--);
1087 }
806#else 1088#else
807 UNUSED(isfp); 1089 UNUSED(isfp);
808#endif 1090#endif
809 } 1091 }
810 if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */ 1092 if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */
811 1093
812#if LJ_TARGET_X64 || LJ_TARGET_PPC 1094#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP)
813 cc->nfpr = nfpr; /* Required for vararg functions. */ 1095 cc->nfpr = nfpr; /* Required for vararg functions. */
814#endif 1096#endif
815 cc->nsp = nsp; 1097 cc->nsp = nsp;
@@ -844,7 +1126,8 @@ static int ccall_get_results(lua_State *L, CTState *cts, CType *ct,
844 CCALL_HANDLE_COMPLEXRET2 1126 CCALL_HANDLE_COMPLEXRET2
845 return 1; /* One GC step. */ 1127 return 1; /* One GC step. */
846 } 1128 }
847 if (LJ_BE && ctype_isinteger_or_bool(ctr->info) && ctr->size < CTSIZE_PTR) 1129 if (LJ_BE && ctr->size < CTSIZE_PTR &&
1130 (ctype_isinteger_or_bool(ctr->info) || ctype_isenum(ctr->info)))
848 sp += (CTSIZE_PTR - ctr->size); 1131 sp += (CTSIZE_PTR - ctr->size);
849#if CCALL_NUM_FPR 1132#if CCALL_NUM_FPR
850 if (ctype_isfp(ctr->info) || ctype_isvector(ctr->info)) 1133 if (ctype_isfp(ctr->info) || ctype_isvector(ctr->info))
diff --git a/src/lj_ccall.h b/src/lj_ccall.h
index 5f6d5101..8b3abdf9 100644
--- a/src/lj_ccall.h
+++ b/src/lj_ccall.h
@@ -68,35 +68,56 @@ typedef union FPRArg {
68 float f[2]; 68 float f[2];
69} FPRArg; 69} FPRArg;
70 70
71#elif LJ_TARGET_PPC 71#elif LJ_TARGET_ARM64
72 72
73#define CCALL_NARG_GPR 8 73#define CCALL_NARG_GPR 8
74#define CCALL_NRET_GPR 2
74#define CCALL_NARG_FPR 8 75#define CCALL_NARG_FPR 8
76#define CCALL_NRET_FPR 4
77#define CCALL_SPS_FREE 0
78
79typedef intptr_t GPRArg;
80typedef union FPRArg {
81 double d;
82 struct { LJ_ENDIAN_LOHI(float f; , float g;) };
83 struct { LJ_ENDIAN_LOHI(uint32_t lo; , uint32_t hi;) };
84} FPRArg;
85
86#elif LJ_TARGET_PPC
87
88#define CCALL_NARG_GPR 8
89#define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 8)
75#define CCALL_NRET_GPR 4 /* For complex double. */ 90#define CCALL_NRET_GPR 4 /* For complex double. */
76#define CCALL_NRET_FPR 1 91#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 1)
77#define CCALL_SPS_EXTRA 4 92#define CCALL_SPS_EXTRA 4
78#define CCALL_SPS_FREE 0 93#define CCALL_SPS_FREE 0
79 94
80typedef intptr_t GPRArg; 95typedef intptr_t GPRArg;
81typedef double FPRArg; 96typedef double FPRArg;
82 97
83#elif LJ_TARGET_PPCSPE 98#elif LJ_TARGET_MIPS32
84 99
85#define CCALL_NARG_GPR 8 100#define CCALL_NARG_GPR 4
86#define CCALL_NARG_FPR 0 101#define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 2)
87#define CCALL_NRET_GPR 4 /* For softfp complex double. */ 102#define CCALL_NRET_GPR (LJ_ABI_SOFTFP ? 4 : 2)
88#define CCALL_NRET_FPR 0 103#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 2)
89#define CCALL_SPS_FREE 0 /* NYI */ 104#define CCALL_SPS_EXTRA 7
105#define CCALL_SPS_FREE 1
90 106
91typedef intptr_t GPRArg; 107typedef intptr_t GPRArg;
108typedef union FPRArg {
109 double d;
110 struct { LJ_ENDIAN_LOHI(float f; , float g;) };
111} FPRArg;
92 112
93#elif LJ_TARGET_MIPS 113#elif LJ_TARGET_MIPS64
94 114
95#define CCALL_NARG_GPR 4 115/* FP args are positional and overlay the GPR array. */
96#define CCALL_NARG_FPR 2 116#define CCALL_NARG_GPR 8
117#define CCALL_NARG_FPR 0
97#define CCALL_NRET_GPR 2 118#define CCALL_NRET_GPR 2
98#define CCALL_NRET_FPR 2 119#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 2)
99#define CCALL_SPS_EXTRA 7 120#define CCALL_SPS_EXTRA 3
100#define CCALL_SPS_FREE 1 121#define CCALL_SPS_FREE 1
101 122
102typedef intptr_t GPRArg; 123typedef intptr_t GPRArg;
@@ -145,6 +166,8 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState {
145 uint8_t nfpr; /* Number of arguments in FPRs. */ 166 uint8_t nfpr; /* Number of arguments in FPRs. */
146#elif LJ_TARGET_X86 167#elif LJ_TARGET_X86
147 uint8_t resx87; /* Result on x87 stack: 1:float, 2:double. */ 168 uint8_t resx87; /* Result on x87 stack: 1:float, 2:double. */
169#elif LJ_TARGET_ARM64
170 void *retp; /* Aggregate return pointer in x8. */
148#elif LJ_TARGET_PPC 171#elif LJ_TARGET_PPC
149 uint8_t nfpr; /* Number of arguments in FPRs. */ 172 uint8_t nfpr; /* Number of arguments in FPRs. */
150#endif 173#endif
diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c
index 26377f82..4edd8a35 100644
--- a/src/lj_ccallback.c
+++ b/src/lj_ccallback.c
@@ -27,7 +27,7 @@
27 27
28#if LJ_OS_NOJIT 28#if LJ_OS_NOJIT
29 29
30/* Disabled callback support. */ 30/* Callbacks disabled. */
31#define CALLBACK_SLOT2OFS(slot) (0*(slot)) 31#define CALLBACK_SLOT2OFS(slot) (0*(slot))
32#define CALLBACK_OFS2SLOT(ofs) (0*(ofs)) 32#define CALLBACK_OFS2SLOT(ofs) (0*(ofs))
33#define CALLBACK_MAX_SLOT 0 33#define CALLBACK_MAX_SLOT 0
@@ -35,7 +35,7 @@
35#elif LJ_TARGET_X86ORX64 35#elif LJ_TARGET_X86ORX64
36 36
37#define CALLBACK_MCODE_HEAD (LJ_64 ? 8 : 0) 37#define CALLBACK_MCODE_HEAD (LJ_64 ? 8 : 0)
38#define CALLBACK_MCODE_GROUP (-2+1+2+5+(LJ_64 ? 6 : 5)) 38#define CALLBACK_MCODE_GROUP (-2+1+2+(LJ_GC64 ? 10 : 5)+(LJ_64 ? 6 : 5))
39 39
40#define CALLBACK_SLOT2OFS(slot) \ 40#define CALLBACK_SLOT2OFS(slot) \
41 (CALLBACK_MCODE_HEAD + CALLBACK_MCODE_GROUP*((slot)/32) + 4*(slot)) 41 (CALLBACK_MCODE_HEAD + CALLBACK_MCODE_GROUP*((slot)/32) + 4*(slot))
@@ -54,23 +54,22 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs)
54#elif LJ_TARGET_ARM 54#elif LJ_TARGET_ARM
55 55
56#define CALLBACK_MCODE_HEAD 32 56#define CALLBACK_MCODE_HEAD 32
57#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot)) 57
58#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8) 58#elif LJ_TARGET_ARM64
59#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE)) 59
60#define CALLBACK_MCODE_HEAD 32
60 61
61#elif LJ_TARGET_PPC 62#elif LJ_TARGET_PPC
62 63
63#define CALLBACK_MCODE_HEAD 24 64#define CALLBACK_MCODE_HEAD 24
64#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot))
65#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8)
66#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
67 65
68#elif LJ_TARGET_MIPS 66#elif LJ_TARGET_MIPS32
69 67
70#define CALLBACK_MCODE_HEAD 24 68#define CALLBACK_MCODE_HEAD 20
71#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot)) 69
72#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8) 70#elif LJ_TARGET_MIPS64
73#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE)) 71
72#define CALLBACK_MCODE_HEAD 52
74 73
75#else 74#else
76 75
@@ -81,6 +80,12 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs)
81 80
82#endif 81#endif
83 82
83#ifndef CALLBACK_SLOT2OFS
84#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot))
85#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8)
86#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
87#endif
88
84/* Convert callback slot number to callback function pointer. */ 89/* Convert callback slot number to callback function pointer. */
85static void *callback_slot2ptr(CTState *cts, MSize slot) 90static void *callback_slot2ptr(CTState *cts, MSize slot)
86{ 91{
@@ -119,8 +124,13 @@ static void callback_mcode_init(global_State *g, uint8_t *page)
119 /* push ebp/rbp; mov ah, slot>>8; mov ebp, &g. */ 124 /* push ebp/rbp; mov ah, slot>>8; mov ebp, &g. */
120 *p++ = XI_PUSH + RID_EBP; 125 *p++ = XI_PUSH + RID_EBP;
121 *p++ = XI_MOVrib | (RID_EAX+4); *p++ = (uint8_t)(slot >> 8); 126 *p++ = XI_MOVrib | (RID_EAX+4); *p++ = (uint8_t)(slot >> 8);
127#if LJ_GC64
128 *p++ = 0x48; *p++ = XI_MOVri | RID_EBP;
129 *(uint64_t *)p = (uint64_t)(g); p += 8;
130#else
122 *p++ = XI_MOVri | RID_EBP; 131 *p++ = XI_MOVri | RID_EBP;
123 *(int32_t *)p = i32ptr(g); p += 4; 132 *(int32_t *)p = i32ptr(g); p += 4;
133#endif
124#if LJ_64 134#if LJ_64
125 /* jmp [rip-pageofs] where lj_vm_ffi_callback is stored. */ 135 /* jmp [rip-pageofs] where lj_vm_ffi_callback is stored. */
126 *p++ = XI_GROUP5; *p++ = XM_OFS0 + (XOg_JMP<<3) + RID_EBP; 136 *p++ = XI_GROUP5; *p++ = XM_OFS0 + (XOg_JMP<<3) + RID_EBP;
@@ -157,6 +167,26 @@ static void callback_mcode_init(global_State *g, uint32_t *page)
157 } 167 }
158 lua_assert(p - page <= CALLBACK_MCODE_SIZE); 168 lua_assert(p - page <= CALLBACK_MCODE_SIZE);
159} 169}
170#elif LJ_TARGET_ARM64
171static void callback_mcode_init(global_State *g, uint32_t *page)
172{
173 uint32_t *p = page;
174 void *target = (void *)lj_vm_ffi_callback;
175 MSize slot;
176 *p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X11) | A64F_S19(4));
177 *p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X10) | A64F_S19(5));
178 *p++ = A64I_LE(A64I_BR | A64F_N(RID_X11));
179 *p++ = A64I_LE(A64I_NOP);
180 ((void **)p)[0] = target;
181 ((void **)p)[1] = g;
182 p += 4;
183 for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
184 *p++ = A64I_LE(A64I_MOVZw | A64F_D(RID_X9) | A64F_U16(slot));
185 *p = A64I_LE(A64I_B | A64F_S26((page-p) & 0x03ffffffu));
186 p++;
187 }
188 lua_assert(p - page <= CALLBACK_MCODE_SIZE);
189}
160#elif LJ_TARGET_PPC 190#elif LJ_TARGET_PPC
161static void callback_mcode_init(global_State *g, uint32_t *page) 191static void callback_mcode_init(global_State *g, uint32_t *page)
162{ 192{
@@ -180,14 +210,27 @@ static void callback_mcode_init(global_State *g, uint32_t *page)
180static void callback_mcode_init(global_State *g, uint32_t *page) 210static void callback_mcode_init(global_State *g, uint32_t *page)
181{ 211{
182 uint32_t *p = page; 212 uint32_t *p = page;
183 void *target = (void *)lj_vm_ffi_callback; 213 uintptr_t target = (uintptr_t)(void *)lj_vm_ffi_callback;
214 uintptr_t ug = (uintptr_t)(void *)g;
184 MSize slot; 215 MSize slot;
185 *p++ = MIPSI_SW | MIPSF_T(RID_R1)|MIPSF_S(RID_SP) | 0; 216#if LJ_TARGET_MIPS32
186 *p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (u32ptr(target) >> 16); 217 *p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (target >> 16);
187 *p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (u32ptr(g) >> 16); 218 *p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (ug >> 16);
188 *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) |(u32ptr(target)&0xffff); 219#else
220 *p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (target >> 48);
221 *p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (ug >> 48);
222 *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | ((target >> 32) & 0xffff);
223 *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | ((ug >> 32) & 0xffff);
224 *p++ = MIPSI_DSLL | MIPSF_D(RID_R3)|MIPSF_T(RID_R3) | MIPSF_A(16);
225 *p++ = MIPSI_DSLL | MIPSF_D(RID_R2)|MIPSF_T(RID_R2) | MIPSF_A(16);
226 *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | ((target >> 16) & 0xffff);
227 *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | ((ug >> 16) & 0xffff);
228 *p++ = MIPSI_DSLL | MIPSF_D(RID_R3)|MIPSF_T(RID_R3) | MIPSF_A(16);
229 *p++ = MIPSI_DSLL | MIPSF_D(RID_R2)|MIPSF_T(RID_R2) | MIPSF_A(16);
230#endif
231 *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | (target & 0xffff);
189 *p++ = MIPSI_JR | MIPSF_S(RID_R3); 232 *p++ = MIPSI_JR | MIPSF_S(RID_R3);
190 *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | (u32ptr(g)&0xffff); 233 *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | (ug & 0xffff);
191 for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { 234 for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
192 *p = MIPSI_B | ((page-p-1) & 0x0000ffffu); 235 *p = MIPSI_B | ((page-p-1) & 0x0000ffffu);
193 p++; 236 p++;
@@ -224,7 +267,7 @@ static void callback_mcode_new(CTState *cts)
224 if (CALLBACK_MAX_SLOT == 0) 267 if (CALLBACK_MAX_SLOT == 0)
225 lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV); 268 lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV);
226#if LJ_TARGET_WINDOWS 269#if LJ_TARGET_WINDOWS
227 p = VirtualAlloc(NULL, sz, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); 270 p = LJ_WIN_VALLOC(NULL, sz, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
228 if (!p) 271 if (!p)
229 lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV); 272 lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV);
230#elif LJ_TARGET_POSIX 273#elif LJ_TARGET_POSIX
@@ -242,7 +285,7 @@ static void callback_mcode_new(CTState *cts)
242#if LJ_TARGET_WINDOWS 285#if LJ_TARGET_WINDOWS
243 { 286 {
244 DWORD oprot; 287 DWORD oprot;
245 VirtualProtect(p, sz, PAGE_EXECUTE_READ, &oprot); 288 LJ_WIN_VPROTECT(p, sz, PAGE_EXECUTE_READ, &oprot);
246 } 289 }
247#elif LJ_TARGET_POSIX 290#elif LJ_TARGET_POSIX
248 mprotect(p, sz, (PROT_READ|PROT_EXEC)); 291 mprotect(p, sz, (PROT_READ|PROT_EXEC));
@@ -351,33 +394,77 @@ void lj_ccallback_mcode_free(CTState *cts)
351 goto done; \ 394 goto done; \
352 } CALLBACK_HANDLE_REGARG_FP2 395 } CALLBACK_HANDLE_REGARG_FP2
353 396
354#elif LJ_TARGET_PPC 397#elif LJ_TARGET_ARM64
355 398
356#define CALLBACK_HANDLE_REGARG \ 399#define CALLBACK_HANDLE_REGARG \
357 if (isfp) { \ 400 if (isfp) { \
358 if (nfpr + 1 <= CCALL_NARG_FPR) { \ 401 if (nfpr + n <= CCALL_NARG_FPR) { \
359 sp = &cts->cb.fpr[nfpr++]; \ 402 sp = &cts->cb.fpr[nfpr]; \
360 cta = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \ 403 nfpr += n; \
361 goto done; \ 404 goto done; \
405 } else { \
406 nfpr = CCALL_NARG_FPR; /* Prevent reordering. */ \
362 } \ 407 } \
363 } else { /* Try to pass argument in GPRs. */ \ 408 } else { \
364 if (n > 1) { \ 409 if (!LJ_TARGET_IOS && n > 1) \
365 lua_assert(ctype_isinteger(cta->info) && n == 2); /* int64_t. */ \ 410 ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \
366 ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
367 } \
368 if (ngpr + n <= maxgpr) { \ 411 if (ngpr + n <= maxgpr) { \
369 sp = &cts->cb.gpr[ngpr]; \ 412 sp = &cts->cb.gpr[ngpr]; \
370 ngpr += n; \ 413 ngpr += n; \
371 goto done; \ 414 goto done; \
415 } else { \
416 ngpr = CCALL_NARG_GPR; /* Prevent reordering. */ \
417 } \
418 }
419
420#elif LJ_TARGET_PPC
421
422#define CALLBACK_HANDLE_GPR \
423 if (n > 1) { \
424 lua_assert(((LJ_ABI_SOFTFP && ctype_isnum(cta->info)) || /* double. */ \
425 ctype_isinteger(cta->info)) && n == 2); /* int64_t. */ \
426 ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
427 } \
428 if (ngpr + n <= maxgpr) { \
429 sp = &cts->cb.gpr[ngpr]; \
430 ngpr += n; \
431 goto done; \
432 }
433
434#if LJ_ABI_SOFTFP
435#define CALLBACK_HANDLE_REGARG \
436 CALLBACK_HANDLE_GPR \
437 UNUSED(isfp);
438#else
439#define CALLBACK_HANDLE_REGARG \
440 if (isfp) { \
441 if (nfpr + 1 <= CCALL_NARG_FPR) { \
442 sp = &cts->cb.fpr[nfpr++]; \
443 cta = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \
444 goto done; \
372 } \ 445 } \
446 } else { /* Try to pass argument in GPRs. */ \
447 CALLBACK_HANDLE_GPR \
373 } 448 }
449#endif
374 450
451#if !LJ_ABI_SOFTFP
375#define CALLBACK_HANDLE_RET \ 452#define CALLBACK_HANDLE_RET \
376 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ 453 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
377 *(double *)dp = *(float *)dp; /* FPRs always hold doubles. */ 454 *(double *)dp = *(float *)dp; /* FPRs always hold doubles. */
455#endif
378 456
379#elif LJ_TARGET_MIPS 457#elif LJ_TARGET_MIPS32
380 458
459#define CALLBACK_HANDLE_GPR \
460 if (n > 1) ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \
461 if (ngpr + n <= maxgpr) { \
462 sp = &cts->cb.gpr[ngpr]; \
463 ngpr += n; \
464 goto done; \
465 }
466
467#if !LJ_ABI_SOFTFP /* MIPS32 hard-float */
381#define CALLBACK_HANDLE_REGARG \ 468#define CALLBACK_HANDLE_REGARG \
382 if (isfp && nfpr < CCALL_NARG_FPR) { /* Try to pass argument in FPRs. */ \ 469 if (isfp && nfpr < CCALL_NARG_FPR) { /* Try to pass argument in FPRs. */ \
383 sp = (void *)((uint8_t *)&cts->cb.fpr[nfpr] + ((LJ_BE && n==1) ? 4 : 0)); \ 470 sp = (void *)((uint8_t *)&cts->cb.fpr[nfpr] + ((LJ_BE && n==1) ? 4 : 0)); \
@@ -385,13 +472,36 @@ void lj_ccallback_mcode_free(CTState *cts)
385 goto done; \ 472 goto done; \
386 } else { /* Try to pass argument in GPRs. */ \ 473 } else { /* Try to pass argument in GPRs. */ \
387 nfpr = CCALL_NARG_FPR; \ 474 nfpr = CCALL_NARG_FPR; \
388 if (n > 1) ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ 475 CALLBACK_HANDLE_GPR \
389 if (ngpr + n <= maxgpr) { \ 476 }
390 sp = &cts->cb.gpr[ngpr]; \ 477#else /* MIPS32 soft-float */
391 ngpr += n; \ 478#define CALLBACK_HANDLE_REGARG \
392 goto done; \ 479 CALLBACK_HANDLE_GPR \
393 } \ 480 UNUSED(isfp);
481#endif
482
483#define CALLBACK_HANDLE_RET \
484 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
485 ((float *)dp)[1] = *(float *)dp;
486
487#elif LJ_TARGET_MIPS64
488
489#if !LJ_ABI_SOFTFP /* MIPS64 hard-float */
490#define CALLBACK_HANDLE_REGARG \
491 if (ngpr + n <= maxgpr) { \
492 sp = isfp ? (void*) &cts->cb.fpr[ngpr] : (void*) &cts->cb.gpr[ngpr]; \
493 ngpr += n; \
494 goto done; \
394 } 495 }
496#else /* MIPS64 soft-float */
497#define CALLBACK_HANDLE_REGARG \
498 if (ngpr + n <= maxgpr) { \
499 UNUSED(isfp); \
500 sp = (void*) &cts->cb.gpr[ngpr]; \
501 ngpr += n; \
502 goto done; \
503 }
504#endif
395 505
396#define CALLBACK_HANDLE_RET \ 506#define CALLBACK_HANDLE_RET \
397 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ 507 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
@@ -411,6 +521,7 @@ static void callback_conv_args(CTState *cts, lua_State *L)
411 int gcsteps = 0; 521 int gcsteps = 0;
412 CType *ct; 522 CType *ct;
413 GCfunc *fn; 523 GCfunc *fn;
524 int fntp;
414 MSize ngpr = 0, nsp = 0, maxgpr = CCALL_NARG_GPR; 525 MSize ngpr = 0, nsp = 0, maxgpr = CCALL_NARG_GPR;
415#if CCALL_NARG_FPR 526#if CCALL_NARG_FPR
416 MSize nfpr = 0; 527 MSize nfpr = 0;
@@ -421,18 +532,27 @@ static void callback_conv_args(CTState *cts, lua_State *L)
421 532
422 if (slot < cts->cb.sizeid && (id = cts->cb.cbid[slot]) != 0) { 533 if (slot < cts->cb.sizeid && (id = cts->cb.cbid[slot]) != 0) {
423 ct = ctype_get(cts, id); 534 ct = ctype_get(cts, id);
424 rid = ctype_cid(ct->info); 535 rid = ctype_cid(ct->info); /* Return type. x86: +(spadj<<16). */
425 fn = funcV(lj_tab_getint(cts->miscmap, (int32_t)slot)); 536 fn = funcV(lj_tab_getint(cts->miscmap, (int32_t)slot));
537 fntp = LJ_TFUNC;
426 } else { /* Must set up frame first, before throwing the error. */ 538 } else { /* Must set up frame first, before throwing the error. */
427 ct = NULL; 539 ct = NULL;
428 rid = 0; 540 rid = 0;
429 fn = (GCfunc *)L; 541 fn = (GCfunc *)L;
542 fntp = LJ_TTHREAD;
543 }
544 /* Continuation returns from callback. */
545 if (LJ_FR2) {
546 (o++)->u64 = LJ_CONT_FFI_CALLBACK;
547 (o++)->u64 = rid;
548 o++;
549 } else {
550 o->u32.lo = LJ_CONT_FFI_CALLBACK;
551 o->u32.hi = rid;
552 o++;
430 } 553 }
431 o->u32.lo = LJ_CONT_FFI_CALLBACK; /* Continuation returns from callback. */ 554 setframe_gc(o, obj2gco(fn), fntp);
432 o->u32.hi = rid; /* Return type. x86: +(spadj<<16). */ 555 setframe_ftsz(o, ((char *)(o+1) - (char *)L->base) + FRAME_CONT);
433 o++;
434 setframe_gc(o, obj2gco(fn));
435 setframe_ftsz(o, (int)((char *)(o+1) - (char *)L->base) + FRAME_CONT);
436 L->top = L->base = ++o; 556 L->top = L->base = ++o;
437 if (!ct) 557 if (!ct)
438 lj_err_caller(cts->L, LJ_ERR_FFI_BADCBACK); 558 lj_err_caller(cts->L, LJ_ERR_FFI_BADCBACK);
@@ -474,7 +594,11 @@ static void callback_conv_args(CTState *cts, lua_State *L)
474 nsp += n; 594 nsp += n;
475 595
476 done: 596 done:
477 if (LJ_BE && cta->size < CTSIZE_PTR) 597 if (LJ_BE && cta->size < CTSIZE_PTR
598#if LJ_TARGET_MIPS64
599 && !(isfp && nsp)
600#endif
601 )
478 sp = (void *)((uint8_t *)sp + CTSIZE_PTR-cta->size); 602 sp = (void *)((uint8_t *)sp + CTSIZE_PTR-cta->size);
479 gcsteps += lj_cconv_tv_ct(cts, cta, 0, o++, sp); 603 gcsteps += lj_cconv_tv_ct(cts, cta, 0, o++, sp);
480 } 604 }
@@ -483,9 +607,14 @@ static void callback_conv_args(CTState *cts, lua_State *L)
483 L->top = o; 607 L->top = o;
484#if LJ_TARGET_X86 608#if LJ_TARGET_X86
485 /* Store stack adjustment for returns from non-cdecl callbacks. */ 609 /* Store stack adjustment for returns from non-cdecl callbacks. */
486 if (ctype_cconv(ct->info) != CTCC_CDECL) 610 if (ctype_cconv(ct->info) != CTCC_CDECL) {
611#if LJ_FR2
612 (L->base-3)->u64 |= (nsp << (16+2));
613#else
487 (L->base-2)->u32.hi |= (nsp << (16+2)); 614 (L->base-2)->u32.hi |= (nsp << (16+2));
488#endif 615#endif
616 }
617#endif
489 while (gcsteps-- > 0) 618 while (gcsteps-- > 0)
490 lj_gc_check(L); 619 lj_gc_check(L);
491} 620}
@@ -493,7 +622,11 @@ static void callback_conv_args(CTState *cts, lua_State *L)
493/* Convert Lua object to callback result. */ 622/* Convert Lua object to callback result. */
494static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) 623static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
495{ 624{
625#if LJ_FR2
626 CType *ctr = ctype_raw(cts, (uint16_t)(L->base-3)->u64);
627#else
496 CType *ctr = ctype_raw(cts, (uint16_t)(L->base-2)->u32.hi); 628 CType *ctr = ctype_raw(cts, (uint16_t)(L->base-2)->u32.hi);
629#endif
497#if LJ_TARGET_X86 630#if LJ_TARGET_X86
498 cts->cb.gpr[2] = 0; 631 cts->cb.gpr[2] = 0;
499#endif 632#endif
@@ -503,6 +636,10 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
503 if (ctype_isfp(ctr->info)) 636 if (ctype_isfp(ctr->info))
504 dp = (uint8_t *)&cts->cb.fpr[0]; 637 dp = (uint8_t *)&cts->cb.fpr[0];
505#endif 638#endif
639#if LJ_TARGET_ARM64 && LJ_BE
640 if (ctype_isfp(ctr->info) && ctr->size == sizeof(float))
641 dp = (uint8_t *)&cts->cb.fpr[0].f[1];
642#endif
506 lj_cconv_ct_tv(cts, ctr, dp, o, 0); 643 lj_cconv_ct_tv(cts, ctr, dp, o, 0);
507#ifdef CALLBACK_HANDLE_RET 644#ifdef CALLBACK_HANDLE_RET
508 CALLBACK_HANDLE_RET 645 CALLBACK_HANDLE_RET
@@ -516,6 +653,12 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
516 *(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp : 653 *(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp :
517 (int32_t)*(int16_t *)dp; 654 (int32_t)*(int16_t *)dp;
518 } 655 }
656#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE)
657 /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */
658 if (ctr->size <= 4 &&
659 (LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info)))
660 *(int64_t *)dp = (int64_t)*(int32_t *)dp;
661#endif
519#if LJ_TARGET_X86 662#if LJ_TARGET_X86
520 if (ctype_isfp(ctr->info)) 663 if (ctype_isfp(ctr->info))
521 cts->cb.gpr[2] = ctr->size == sizeof(float) ? 1 : 2; 664 cts->cb.gpr[2] = ctr->size == sizeof(float) ? 1 : 2;
@@ -529,7 +672,7 @@ lua_State * LJ_FASTCALL lj_ccallback_enter(CTState *cts, void *cf)
529 lua_State *L = cts->L; 672 lua_State *L = cts->L;
530 global_State *g = cts->g; 673 global_State *g = cts->g;
531 lua_assert(L != NULL); 674 lua_assert(L != NULL);
532 if (gcref(g->jit_L)) { 675 if (tvref(g->jit_base)) {
533 setstrV(L, L->top++, lj_err_str(L, LJ_ERR_FFI_BADCBACK)); 676 setstrV(L, L->top++, lj_err_str(L, LJ_ERR_FFI_BADCBACK));
534 if (g->panic) g->panic(L); 677 if (g->panic) g->panic(L);
535 exit(EXIT_FAILURE); 678 exit(EXIT_FAILURE);
@@ -562,9 +705,9 @@ void LJ_FASTCALL lj_ccallback_leave(CTState *cts, TValue *o)
562 } 705 }
563 callback_conv_result(cts, L, o); 706 callback_conv_result(cts, L, o);
564 /* Finally drop C frame and continuation frame. */ 707 /* Finally drop C frame and continuation frame. */
565 L->cframe = cframe_prev(L->cframe); 708 L->top -= 2+2*LJ_FR2;
566 L->top -= 2;
567 L->base = obase; 709 L->base = obase;
710 L->cframe = cframe_prev(L->cframe);
568 cts->cb.slot = 0; /* Blacklist C function that called the callback. */ 711 cts->cb.slot = 0; /* Blacklist C function that called the callback. */
569} 712}
570 713
diff --git a/src/lj_cconv.c b/src/lj_cconv.c
index 99776b0e..03ed0ce2 100644
--- a/src/lj_cconv.c
+++ b/src/lj_cconv.c
@@ -448,8 +448,10 @@ int lj_cconv_tv_bf(CTState *cts, CType *s, TValue *o, uint8_t *sp)
448 setintV(o, (int32_t)val); 448 setintV(o, (int32_t)val);
449 } 449 }
450 } else { 450 } else {
451 uint32_t b = (val >> pos) & 1;
451 lua_assert(bsz == 1); 452 lua_assert(bsz == 1);
452 setboolV(o, (val >> pos) & 1); 453 setboolV(o, b);
454 setboolV(&cts->g->tmptv2, b); /* Remember for trace recorder. */
453 } 455 }
454 return 0; /* No GC step needed. */ 456 return 0; /* No GC step needed. */
455} 457}
diff --git a/src/lj_cdata.c b/src/lj_cdata.c
index 4aeb0ce3..10d9423d 100644
--- a/src/lj_cdata.c
+++ b/src/lj_cdata.c
@@ -9,7 +9,6 @@
9 9
10#include "lj_gc.h" 10#include "lj_gc.h"
11#include "lj_err.h" 11#include "lj_err.h"
12#include "lj_str.h"
13#include "lj_tab.h" 12#include "lj_tab.h"
14#include "lj_ctype.h" 13#include "lj_ctype.h"
15#include "lj_cconv.h" 14#include "lj_cconv.h"
@@ -27,12 +26,12 @@ GCcdata *lj_cdata_newref(CTState *cts, const void *p, CTypeID id)
27} 26}
28 27
29/* Allocate variable-sized or specially aligned C data object. */ 28/* Allocate variable-sized or specially aligned C data object. */
30GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz, CTSize align) 29GCcdata *lj_cdata_newv(lua_State *L, CTypeID id, CTSize sz, CTSize align)
31{ 30{
32 global_State *g; 31 global_State *g;
33 MSize extra = sizeof(GCcdataVar) + sizeof(GCcdata) + 32 MSize extra = sizeof(GCcdataVar) + sizeof(GCcdata) +
34 (align > CT_MEMALIGN ? (1u<<align) - (1u<<CT_MEMALIGN) : 0); 33 (align > CT_MEMALIGN ? (1u<<align) - (1u<<CT_MEMALIGN) : 0);
35 char *p = lj_mem_newt(cts->L, extra + sz, char); 34 char *p = lj_mem_newt(L, extra + sz, char);
36 uintptr_t adata = (uintptr_t)p + sizeof(GCcdataVar) + sizeof(GCcdata); 35 uintptr_t adata = (uintptr_t)p + sizeof(GCcdataVar) + sizeof(GCcdata);
37 uintptr_t almask = (1u << align) - 1u; 36 uintptr_t almask = (1u << align) - 1u;
38 GCcdata *cd = (GCcdata *)(((adata + almask) & ~almask) - sizeof(GCcdata)); 37 GCcdata *cd = (GCcdata *)(((adata + almask) & ~almask) - sizeof(GCcdata));
@@ -40,7 +39,7 @@ GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz, CTSize align)
40 cdatav(cd)->offset = (uint16_t)((char *)cd - p); 39 cdatav(cd)->offset = (uint16_t)((char *)cd - p);
41 cdatav(cd)->extra = extra; 40 cdatav(cd)->extra = extra;
42 cdatav(cd)->len = sz; 41 cdatav(cd)->len = sz;
43 g = cts->g; 42 g = G(L);
44 setgcrefr(cd->nextgc, g->gc.root); 43 setgcrefr(cd->nextgc, g->gc.root);
45 setgcref(g->gc.root, obj2gco(cd)); 44 setgcref(g->gc.root, obj2gco(cd));
46 newwhite(g, obj2gco(cd)); 45 newwhite(g, obj2gco(cd));
@@ -50,6 +49,15 @@ GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz, CTSize align)
50 return cd; 49 return cd;
51} 50}
52 51
52/* Allocate arbitrary C data object. */
53GCcdata *lj_cdata_newx(CTState *cts, CTypeID id, CTSize sz, CTInfo info)
54{
55 if (!(info & CTF_VLA) && ctype_align(info) <= CT_MEMALIGN)
56 return lj_cdata_new(cts, id, sz);
57 else
58 return lj_cdata_newv(cts->L, id, sz, ctype_align(info));
59}
60
53/* Free a C data object. */ 61/* Free a C data object. */
54void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd) 62void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd)
55{ 63{
@@ -76,21 +84,22 @@ void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd)
76 } 84 }
77} 85}
78 86
79TValue * LJ_FASTCALL lj_cdata_setfin(lua_State *L, GCcdata *cd) 87void lj_cdata_setfin(lua_State *L, GCcdata *cd, GCobj *obj, uint32_t it)
80{ 88{
81 global_State *g = G(L); 89 GCtab *t = ctype_ctsG(G(L))->finalizer;
82 GCtab *t = ctype_ctsG(g)->finalizer;
83 if (gcref(t->metatable)) { 90 if (gcref(t->metatable)) {
84 /* Add cdata to finalizer table, if still enabled. */ 91 /* Add cdata to finalizer table, if still enabled. */
85 TValue *tv, tmp; 92 TValue *tv, tmp;
86 setcdataV(L, &tmp, cd); 93 setcdataV(L, &tmp, cd);
87 lj_gc_anybarriert(L, t); 94 lj_gc_anybarriert(L, t);
88 tv = lj_tab_set(L, t, &tmp); 95 tv = lj_tab_set(L, t, &tmp);
89 cd->marked |= LJ_GC_CDATA_FIN; 96 if (it == LJ_TNIL) {
90 return tv; 97 setnilV(tv);
91 } else { 98 cd->marked &= ~LJ_GC_CDATA_FIN;
92 /* Otherwise return dummy TValue. */ 99 } else {
93 return &g->tmptv; 100 setgcV(L, tv, obj, it);
101 cd->marked |= LJ_GC_CDATA_FIN;
102 }
94 } 103 }
95} 104}
96 105
@@ -123,7 +132,12 @@ collect_attrib:
123 idx = (ptrdiff_t)intV(key); 132 idx = (ptrdiff_t)intV(key);
124 goto integer_key; 133 goto integer_key;
125 } else if (tvisnum(key)) { /* Numeric key. */ 134 } else if (tvisnum(key)) { /* Numeric key. */
126 idx = LJ_64 ? (ptrdiff_t)numV(key) : (ptrdiff_t)lj_num2int(numV(key)); 135#ifdef _MSC_VER
136 /* Workaround for MSVC bug. */
137 volatile
138#endif
139 lua_Number n = numV(key);
140 idx = LJ_64 ? (ptrdiff_t)n : (ptrdiff_t)lj_num2int(n);
127 integer_key: 141 integer_key:
128 if (ctype_ispointer(ct->info)) { 142 if (ctype_ispointer(ct->info)) {
129 CTSize sz = lj_ctype_size(cts, ctype_cid(ct->info)); /* Element size. */ 143 CTSize sz = lj_ctype_size(cts, ctype_cid(ct->info)); /* Element size. */
diff --git a/src/lj_cdata.h b/src/lj_cdata.h
index 2ce90bdf..c1089e64 100644
--- a/src/lj_cdata.h
+++ b/src/lj_cdata.h
@@ -58,11 +58,14 @@ static LJ_AINLINE GCcdata *lj_cdata_new_(lua_State *L, CTypeID id, CTSize sz)
58} 58}
59 59
60LJ_FUNC GCcdata *lj_cdata_newref(CTState *cts, const void *pp, CTypeID id); 60LJ_FUNC GCcdata *lj_cdata_newref(CTState *cts, const void *pp, CTypeID id);
61LJ_FUNC GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz, 61LJ_FUNC GCcdata *lj_cdata_newv(lua_State *L, CTypeID id, CTSize sz,
62 CTSize align); 62 CTSize align);
63LJ_FUNC GCcdata *lj_cdata_newx(CTState *cts, CTypeID id, CTSize sz,
64 CTInfo info);
63 65
64LJ_FUNC void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd); 66LJ_FUNC void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd);
65LJ_FUNCA TValue * LJ_FASTCALL lj_cdata_setfin(lua_State *L, GCcdata *cd); 67LJ_FUNC void lj_cdata_setfin(lua_State *L, GCcdata *cd, GCobj *obj,
68 uint32_t it);
66 69
67LJ_FUNC CType *lj_cdata_index(CTState *cts, GCcdata *cd, cTValue *key, 70LJ_FUNC CType *lj_cdata_index(CTState *cts, GCcdata *cd, cTValue *key,
68 uint8_t **pp, CTInfo *qual); 71 uint8_t **pp, CTInfo *qual);
diff --git a/src/lj_clib.c b/src/lj_clib.c
index df20aca3..2ea6ff45 100644
--- a/src/lj_clib.c
+++ b/src/lj_clib.c
@@ -16,6 +16,7 @@
16#include "lj_cconv.h" 16#include "lj_cconv.h"
17#include "lj_cdata.h" 17#include "lj_cdata.h"
18#include "lj_clib.h" 18#include "lj_clib.h"
19#include "lj_strfmt.h"
19 20
20/* -- OS-specific functions ----------------------------------------------- */ 21/* -- OS-specific functions ----------------------------------------------- */
21 22
@@ -61,7 +62,7 @@ static const char *clib_extname(lua_State *L, const char *name)
61#endif 62#endif
62 ) { 63 ) {
63 if (!strchr(name, '.')) { 64 if (!strchr(name, '.')) {
64 name = lj_str_pushf(L, CLIB_SOEXT, name); 65 name = lj_strfmt_pushf(L, CLIB_SOEXT, name);
65 L->top--; 66 L->top--;
66#if LJ_TARGET_CYGWIN 67#if LJ_TARGET_CYGWIN
67 } else { 68 } else {
@@ -70,7 +71,7 @@ static const char *clib_extname(lua_State *L, const char *name)
70 } 71 }
71 if (!(name[0] == CLIB_SOPREFIX[0] && name[1] == CLIB_SOPREFIX[1] && 72 if (!(name[0] == CLIB_SOPREFIX[0] && name[1] == CLIB_SOPREFIX[1] &&
72 name[2] == CLIB_SOPREFIX[2])) { 73 name[2] == CLIB_SOPREFIX[2])) {
73 name = lj_str_pushf(L, CLIB_SOPREFIX "%s", name); 74 name = lj_strfmt_pushf(L, CLIB_SOPREFIX "%s", name);
74 L->top--; 75 L->top--;
75 } 76 }
76 } 77 }
@@ -158,11 +159,13 @@ BOOL WINAPI GetModuleHandleExA(DWORD, LPCSTR, HMODULE*);
158/* Default libraries. */ 159/* Default libraries. */
159enum { 160enum {
160 CLIB_HANDLE_EXE, 161 CLIB_HANDLE_EXE,
162#if !LJ_TARGET_UWP
161 CLIB_HANDLE_DLL, 163 CLIB_HANDLE_DLL,
162 CLIB_HANDLE_CRT, 164 CLIB_HANDLE_CRT,
163 CLIB_HANDLE_KERNEL32, 165 CLIB_HANDLE_KERNEL32,
164 CLIB_HANDLE_USER32, 166 CLIB_HANDLE_USER32,
165 CLIB_HANDLE_GDI32, 167 CLIB_HANDLE_GDI32,
168#endif
166 CLIB_HANDLE_MAX 169 CLIB_HANDLE_MAX
167}; 170};
168 171
@@ -172,11 +175,19 @@ LJ_NORET LJ_NOINLINE static void clib_error(lua_State *L, const char *fmt,
172 const char *name) 175 const char *name)
173{ 176{
174 DWORD err = GetLastError(); 177 DWORD err = GetLastError();
178#if LJ_TARGET_XBOXONE
179 wchar_t wbuf[128];
180 char buf[128*2];
181 if (!FormatMessageW(FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_FROM_SYSTEM,
182 NULL, err, 0, wbuf, sizeof(wbuf)/sizeof(wchar_t), NULL) ||
183 !WideCharToMultiByte(CP_ACP, 0, wbuf, 128, buf, 128*2, NULL, NULL))
184#else
175 char buf[128]; 185 char buf[128];
176 if (!FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_FROM_SYSTEM, 186 if (!FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_FROM_SYSTEM,
177 NULL, err, 0, buf, sizeof(buf), NULL)) 187 NULL, err, 0, buf, sizeof(buf), NULL))
188#endif
178 buf[0] = '\0'; 189 buf[0] = '\0';
179 lj_err_callermsg(L, lj_str_pushf(L, fmt, name, buf)); 190 lj_err_callermsg(L, lj_strfmt_pushf(L, fmt, name, buf));
180} 191}
181 192
182static int clib_needext(const char *s) 193static int clib_needext(const char *s)
@@ -191,7 +202,7 @@ static int clib_needext(const char *s)
191static const char *clib_extname(lua_State *L, const char *name) 202static const char *clib_extname(lua_State *L, const char *name)
192{ 203{
193 if (clib_needext(name)) { 204 if (clib_needext(name)) {
194 name = lj_str_pushf(L, "%s.dll", name); 205 name = lj_strfmt_pushf(L, "%s.dll", name);
195 L->top--; 206 L->top--;
196 } 207 }
197 return name; 208 return name;
@@ -200,7 +211,7 @@ static const char *clib_extname(lua_State *L, const char *name)
200static void *clib_loadlib(lua_State *L, const char *name, int global) 211static void *clib_loadlib(lua_State *L, const char *name, int global)
201{ 212{
202 DWORD oldwerr = GetLastError(); 213 DWORD oldwerr = GetLastError();
203 void *h = (void *)LoadLibraryA(clib_extname(L, name)); 214 void *h = LJ_WIN_LOADLIBA(clib_extname(L, name));
204 if (!h) clib_error(L, "cannot load module " LUA_QS ": %s", name); 215 if (!h) clib_error(L, "cannot load module " LUA_QS ": %s", name);
205 SetLastError(oldwerr); 216 SetLastError(oldwerr);
206 UNUSED(global); 217 UNUSED(global);
@@ -210,6 +221,7 @@ static void *clib_loadlib(lua_State *L, const char *name, int global)
210static void clib_unloadlib(CLibrary *cl) 221static void clib_unloadlib(CLibrary *cl)
211{ 222{
212 if (cl->handle == CLIB_DEFHANDLE) { 223 if (cl->handle == CLIB_DEFHANDLE) {
224#if !LJ_TARGET_UWP
213 MSize i; 225 MSize i;
214 for (i = CLIB_HANDLE_KERNEL32; i < CLIB_HANDLE_MAX; i++) { 226 for (i = CLIB_HANDLE_KERNEL32; i < CLIB_HANDLE_MAX; i++) {
215 void *h = clib_def_handle[i]; 227 void *h = clib_def_handle[i];
@@ -218,11 +230,16 @@ static void clib_unloadlib(CLibrary *cl)
218 FreeLibrary((HINSTANCE)h); 230 FreeLibrary((HINSTANCE)h);
219 } 231 }
220 } 232 }
233#endif
221 } else if (cl->handle) { 234 } else if (cl->handle) {
222 FreeLibrary((HINSTANCE)cl->handle); 235 FreeLibrary((HINSTANCE)cl->handle);
223 } 236 }
224} 237}
225 238
239#if LJ_TARGET_UWP
240EXTERN_C IMAGE_DOS_HEADER __ImageBase;
241#endif
242
226static void *clib_getsym(CLibrary *cl, const char *name) 243static void *clib_getsym(CLibrary *cl, const char *name)
227{ 244{
228 void *p = NULL; 245 void *p = NULL;
@@ -231,6 +248,9 @@ static void *clib_getsym(CLibrary *cl, const char *name)
231 for (i = 0; i < CLIB_HANDLE_MAX; i++) { 248 for (i = 0; i < CLIB_HANDLE_MAX; i++) {
232 HINSTANCE h = (HINSTANCE)clib_def_handle[i]; 249 HINSTANCE h = (HINSTANCE)clib_def_handle[i];
233 if (!(void *)h) { /* Resolve default library handles (once). */ 250 if (!(void *)h) { /* Resolve default library handles (once). */
251#if LJ_TARGET_UWP
252 h = (HINSTANCE)&__ImageBase;
253#else
234 switch (i) { 254 switch (i) {
235 case CLIB_HANDLE_EXE: GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, NULL, &h); break; 255 case CLIB_HANDLE_EXE: GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, NULL, &h); break;
236 case CLIB_HANDLE_DLL: 256 case CLIB_HANDLE_DLL:
@@ -241,11 +261,12 @@ static void *clib_getsym(CLibrary *cl, const char *name)
241 GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, 261 GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
242 (const char *)&_fmode, &h); 262 (const char *)&_fmode, &h);
243 break; 263 break;
244 case CLIB_HANDLE_KERNEL32: h = LoadLibraryA("kernel32.dll"); break; 264 case CLIB_HANDLE_KERNEL32: h = LJ_WIN_LOADLIBA("kernel32.dll"); break;
245 case CLIB_HANDLE_USER32: h = LoadLibraryA("user32.dll"); break; 265 case CLIB_HANDLE_USER32: h = LJ_WIN_LOADLIBA("user32.dll"); break;
246 case CLIB_HANDLE_GDI32: h = LoadLibraryA("gdi32.dll"); break; 266 case CLIB_HANDLE_GDI32: h = LJ_WIN_LOADLIBA("gdi32.dll"); break;
247 } 267 }
248 if (!h) continue; 268 if (!h) continue;
269#endif
249 clib_def_handle[i] = (void *)h; 270 clib_def_handle[i] = (void *)h;
250 } 271 }
251 p = (void *)GetProcAddress(h, name); 272 p = (void *)GetProcAddress(h, name);
@@ -264,7 +285,7 @@ static void *clib_getsym(CLibrary *cl, const char *name)
264LJ_NORET LJ_NOINLINE static void clib_error(lua_State *L, const char *fmt, 285LJ_NORET LJ_NOINLINE static void clib_error(lua_State *L, const char *fmt,
265 const char *name) 286 const char *name)
266{ 287{
267 lj_err_callermsg(L, lj_str_pushf(L, fmt, name, "no support for this OS")); 288 lj_err_callermsg(L, lj_strfmt_pushf(L, fmt, name, "no support for this OS"));
268} 289}
269 290
270static void *clib_loadlib(lua_State *L, const char *name, int global) 291static void *clib_loadlib(lua_State *L, const char *name, int global)
@@ -348,7 +369,7 @@ TValue *lj_clib_index(lua_State *L, CLibrary *cl, GCstr *name)
348 CTInfo cconv = ctype_cconv(ct->info); 369 CTInfo cconv = ctype_cconv(ct->info);
349 if (cconv == CTCC_FASTCALL || cconv == CTCC_STDCALL) { 370 if (cconv == CTCC_FASTCALL || cconv == CTCC_STDCALL) {
350 CTSize sz = clib_func_argsize(cts, ct); 371 CTSize sz = clib_func_argsize(cts, ct);
351 const char *symd = lj_str_pushf(L, 372 const char *symd = lj_strfmt_pushf(L,
352 cconv == CTCC_FASTCALL ? "@%s@%d" : "_%s@%d", 373 cconv == CTCC_FASTCALL ? "@%s@%d" : "_%s@%d",
353 sym, sz); 374 sym, sz);
354 L->top--; 375 L->top--;
diff --git a/src/lj_cparse.c b/src/lj_cparse.c
index 50bb76ad..70b82af3 100644
--- a/src/lj_cparse.c
+++ b/src/lj_cparse.c
@@ -9,13 +9,14 @@
9 9
10#include "lj_gc.h" 10#include "lj_gc.h"
11#include "lj_err.h" 11#include "lj_err.h"
12#include "lj_str.h" 12#include "lj_buf.h"
13#include "lj_ctype.h" 13#include "lj_ctype.h"
14#include "lj_cparse.h" 14#include "lj_cparse.h"
15#include "lj_frame.h" 15#include "lj_frame.h"
16#include "lj_vm.h" 16#include "lj_vm.h"
17#include "lj_char.h" 17#include "lj_char.h"
18#include "lj_strscan.h" 18#include "lj_strscan.h"
19#include "lj_strfmt.h"
19 20
20/* 21/*
21** Important note: this is NOT a validating C parser! This is a minimal 22** Important note: this is NOT a validating C parser! This is a minimal
@@ -27,6 +28,24 @@
27** If in doubt, please check the input against your favorite C compiler. 28** If in doubt, please check the input against your favorite C compiler.
28*/ 29*/
29 30
31/* -- Miscellaneous ------------------------------------------------------- */
32
33/* Match string against a C literal. */
34#define cp_str_is(str, k) \
35 ((str)->len == sizeof(k)-1 && !memcmp(strdata(str), k, sizeof(k)-1))
36
37/* Check string against a linear list of matches. */
38int lj_cparse_case(GCstr *str, const char *match)
39{
40 MSize len;
41 int n;
42 for (n = 0; (len = (MSize)*match++); n++, match += len) {
43 if (str->len == len && !memcmp(match, strdata(str), len))
44 return n;
45 }
46 return -1;
47}
48
30/* -- C lexer ------------------------------------------------------------- */ 49/* -- C lexer ------------------------------------------------------------- */
31 50
32/* C lexer token names. */ 51/* C lexer token names. */
@@ -46,9 +65,9 @@ static const char *cp_tok2str(CPState *cp, CPToken tok)
46 if (tok > CTOK_OFS) 65 if (tok > CTOK_OFS)
47 return ctoknames[tok-CTOK_OFS-1]; 66 return ctoknames[tok-CTOK_OFS-1];
48 else if (!lj_char_iscntrl(tok)) 67 else if (!lj_char_iscntrl(tok))
49 return lj_str_pushf(cp->L, "%c", tok); 68 return lj_strfmt_pushf(cp->L, "%c", tok);
50 else 69 else
51 return lj_str_pushf(cp->L, "char(%d)", tok); 70 return lj_strfmt_pushf(cp->L, "char(%d)", tok);
52} 71}
53 72
54/* End-of-line? */ 73/* End-of-line? */
@@ -85,24 +104,10 @@ static LJ_NOINLINE CPChar cp_get_bs(CPState *cp)
85 return cp_get(cp); 104 return cp_get(cp);
86} 105}
87 106
88/* Grow save buffer. */
89static LJ_NOINLINE void cp_save_grow(CPState *cp, CPChar c)
90{
91 MSize newsize;
92 if (cp->sb.sz >= CPARSE_MAX_BUF/2)
93 cp_err(cp, LJ_ERR_XELEM);
94 newsize = cp->sb.sz * 2;
95 lj_str_resizebuf(cp->L, &cp->sb, newsize);
96 cp->sb.buf[cp->sb.n++] = (char)c;
97}
98
99/* Save character in buffer. */ 107/* Save character in buffer. */
100static LJ_AINLINE void cp_save(CPState *cp, CPChar c) 108static LJ_AINLINE void cp_save(CPState *cp, CPChar c)
101{ 109{
102 if (LJ_UNLIKELY(cp->sb.n + 1 > cp->sb.sz)) 110 lj_buf_putb(&cp->sb, c);
103 cp_save_grow(cp, c);
104 else
105 cp->sb.buf[cp->sb.n++] = (char)c;
106} 111}
107 112
108/* Skip line break. Handles "\n", "\r", "\r\n" or "\n\r". */ 113/* Skip line break. Handles "\n", "\r", "\r\n" or "\n\r". */
@@ -122,20 +127,20 @@ LJ_NORET static void cp_errmsg(CPState *cp, CPToken tok, ErrMsg em, ...)
122 tokstr = NULL; 127 tokstr = NULL;
123 } else if (tok == CTOK_IDENT || tok == CTOK_INTEGER || tok == CTOK_STRING || 128 } else if (tok == CTOK_IDENT || tok == CTOK_INTEGER || tok == CTOK_STRING ||
124 tok >= CTOK_FIRSTDECL) { 129 tok >= CTOK_FIRSTDECL) {
125 if (cp->sb.n == 0) cp_save(cp, '$'); 130 if (sbufP(&cp->sb) == sbufB(&cp->sb)) cp_save(cp, '$');
126 cp_save(cp, '\0'); 131 cp_save(cp, '\0');
127 tokstr = cp->sb.buf; 132 tokstr = sbufB(&cp->sb);
128 } else { 133 } else {
129 tokstr = cp_tok2str(cp, tok); 134 tokstr = cp_tok2str(cp, tok);
130 } 135 }
131 L = cp->L; 136 L = cp->L;
132 va_start(argp, em); 137 va_start(argp, em);
133 msg = lj_str_pushvf(L, err2msg(em), argp); 138 msg = lj_strfmt_pushvf(L, err2msg(em), argp);
134 va_end(argp); 139 va_end(argp);
135 if (tokstr) 140 if (tokstr)
136 msg = lj_str_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tokstr); 141 msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tokstr);
137 if (cp->linenumber > 1) 142 if (cp->linenumber > 1)
138 msg = lj_str_pushf(L, "%s at line %d", msg, cp->linenumber); 143 msg = lj_strfmt_pushf(L, "%s at line %d", msg, cp->linenumber);
139 lj_err_callermsg(L, msg); 144 lj_err_callermsg(L, msg);
140} 145}
141 146
@@ -164,7 +169,7 @@ static CPToken cp_number(CPState *cp)
164 TValue o; 169 TValue o;
165 do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp))); 170 do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp)));
166 cp_save(cp, '\0'); 171 cp_save(cp, '\0');
167 fmt = lj_strscan_scan((const uint8_t *)cp->sb.buf, &o, STRSCAN_OPT_C); 172 fmt = lj_strscan_scan((const uint8_t *)sbufB(&cp->sb), &o, STRSCAN_OPT_C);
168 if (fmt == STRSCAN_INT) cp->val.id = CTID_INT32; 173 if (fmt == STRSCAN_INT) cp->val.id = CTID_INT32;
169 else if (fmt == STRSCAN_U32) cp->val.id = CTID_UINT32; 174 else if (fmt == STRSCAN_U32) cp->val.id = CTID_UINT32;
170 else if (!(cp->mode & CPARSE_MODE_SKIP)) 175 else if (!(cp->mode & CPARSE_MODE_SKIP))
@@ -177,7 +182,7 @@ static CPToken cp_number(CPState *cp)
177static CPToken cp_ident(CPState *cp) 182static CPToken cp_ident(CPState *cp)
178{ 183{
179 do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp))); 184 do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp)));
180 cp->str = lj_str_new(cp->L, cp->sb.buf, cp->sb.n); 185 cp->str = lj_buf_str(cp->L, &cp->sb);
181 cp->val.id = lj_ctype_getname(cp->cts, &cp->ct, cp->str, cp->tmask); 186 cp->val.id = lj_ctype_getname(cp->cts, &cp->ct, cp->str, cp->tmask);
182 if (ctype_type(cp->ct->info) == CT_KW) 187 if (ctype_type(cp->ct->info) == CT_KW)
183 return ctype_cid(cp->ct->info); 188 return ctype_cid(cp->ct->info);
@@ -263,11 +268,11 @@ static CPToken cp_string(CPState *cp)
263 } 268 }
264 cp_get(cp); 269 cp_get(cp);
265 if (delim == '"') { 270 if (delim == '"') {
266 cp->str = lj_str_new(cp->L, cp->sb.buf, cp->sb.n); 271 cp->str = lj_buf_str(cp->L, &cp->sb);
267 return CTOK_STRING; 272 return CTOK_STRING;
268 } else { 273 } else {
269 if (cp->sb.n != 1) cp_err_token(cp, '\''); 274 if (sbuflen(&cp->sb) != 1) cp_err_token(cp, '\'');
270 cp->val.i32 = (int32_t)(char)cp->sb.buf[0]; 275 cp->val.i32 = (int32_t)(char)*sbufB(&cp->sb);
271 cp->val.id = CTID_INT32; 276 cp->val.id = CTID_INT32;
272 return CTOK_INTEGER; 277 return CTOK_INTEGER;
273 } 278 }
@@ -296,7 +301,7 @@ static void cp_comment_cpp(CPState *cp)
296/* Lexical scanner for C. Only a minimal subset is implemented. */ 301/* Lexical scanner for C. Only a minimal subset is implemented. */
297static CPToken cp_next_(CPState *cp) 302static CPToken cp_next_(CPState *cp)
298{ 303{
299 lj_str_resetbuf(&cp->sb); 304 lj_buf_reset(&cp->sb);
300 for (;;) { 305 for (;;) {
301 if (lj_char_isident(cp->c)) 306 if (lj_char_isident(cp->c))
302 return lj_char_isdigit(cp->c) ? cp_number(cp) : cp_ident(cp); 307 return lj_char_isdigit(cp->c) ? cp_number(cp) : cp_ident(cp);
@@ -385,8 +390,7 @@ static void cp_init(CPState *cp)
385 cp->depth = 0; 390 cp->depth = 0;
386 cp->curpack = 0; 391 cp->curpack = 0;
387 cp->packstack[0] = 255; 392 cp->packstack[0] = 255;
388 lj_str_initbuf(&cp->sb); 393 lj_buf_init(cp->L, &cp->sb);
389 lj_str_resizebuf(cp->L, &cp->sb, LJ_MIN_SBUF);
390 lua_assert(cp->p != NULL); 394 lua_assert(cp->p != NULL);
391 cp_get(cp); /* Read-ahead first char. */ 395 cp_get(cp); /* Read-ahead first char. */
392 cp->tok = 0; 396 cp->tok = 0;
@@ -398,7 +402,7 @@ static void cp_init(CPState *cp)
398static void cp_cleanup(CPState *cp) 402static void cp_cleanup(CPState *cp)
399{ 403{
400 global_State *g = G(cp->L); 404 global_State *g = G(cp->L);
401 lj_str_freebuf(g, &cp->sb); 405 lj_buf_free(g, &cp->sb);
402} 406}
403 407
404/* Check and consume optional token. */ 408/* Check and consume optional token. */
@@ -953,8 +957,6 @@ static CTypeID cp_decl_intern(CPState *cp, CPDecl *decl)
953 957
954/* -- C declaration parser ------------------------------------------------ */ 958/* -- C declaration parser ------------------------------------------------ */
955 959
956#define H_(le, be) LJ_ENDIAN_SELECT(0x##le, 0x##be)
957
958/* Reset declaration state to declaration specifier. */ 960/* Reset declaration state to declaration specifier. */
959static void cp_decl_reset(CPDecl *decl) 961static void cp_decl_reset(CPDecl *decl)
960{ 962{
@@ -1031,7 +1033,7 @@ static void cp_decl_asm(CPState *cp, CPDecl *decl)
1031 if (cp->tok == CTOK_STRING) { 1033 if (cp->tok == CTOK_STRING) {
1032 GCstr *str = cp->str; 1034 GCstr *str = cp->str;
1033 while (cp_next(cp) == CTOK_STRING) { 1035 while (cp_next(cp) == CTOK_STRING) {
1034 lj_str_pushf(cp->L, "%s%s", strdata(str), strdata(cp->str)); 1036 lj_strfmt_pushf(cp->L, "%s%s", strdata(str), strdata(cp->str));
1035 cp->L->top--; 1037 cp->L->top--;
1036 str = strV(cp->L->top); 1038 str = strV(cp->L->top);
1037 } 1039 }
@@ -1083,44 +1085,57 @@ static void cp_decl_gccattribute(CPState *cp, CPDecl *decl)
1083 if (cp->tok == CTOK_IDENT) { 1085 if (cp->tok == CTOK_IDENT) {
1084 GCstr *attrstr = cp->str; 1086 GCstr *attrstr = cp->str;
1085 cp_next(cp); 1087 cp_next(cp);
1086 switch (attrstr->hash) { 1088 switch (lj_cparse_case(attrstr,
1087 case H_(64a9208e,8ce14319): case H_(8e6331b2,95a282af): /* aligned */ 1089 "\007aligned" "\013__aligned__"
1090 "\006packed" "\012__packed__"
1091 "\004mode" "\010__mode__"
1092 "\013vector_size" "\017__vector_size__"
1093#if LJ_TARGET_X86
1094 "\007regparm" "\013__regparm__"
1095 "\005cdecl" "\011__cdecl__"
1096 "\010thiscall" "\014__thiscall__"
1097 "\010fastcall" "\014__fastcall__"
1098 "\007stdcall" "\013__stdcall__"
1099 "\012sseregparm" "\016__sseregparm__"
1100#endif
1101 )) {
1102 case 0: case 1: /* aligned */
1088 cp_decl_align(cp, decl); 1103 cp_decl_align(cp, decl);
1089 break; 1104 break;
1090 case H_(42eb47de,f0ede26c): case H_(29f48a09,cf383e0c): /* packed */ 1105 case 2: case 3: /* packed */
1091 decl->attr |= CTFP_PACKED; 1106 decl->attr |= CTFP_PACKED;
1092 break; 1107 break;
1093 case H_(0a84eef6,8dfab04c): case H_(995cf92c,d5696591): /* mode */ 1108 case 4: case 5: /* mode */
1094 cp_decl_mode(cp, decl); 1109 cp_decl_mode(cp, decl);
1095 break; 1110 break;
1096 case H_(0ab31997,2d5213fa): case H_(bf875611,200e9990): /* vector_size */ 1111 case 6: case 7: /* vector_size */
1097 { 1112 {
1098 CTSize vsize = cp_decl_sizeattr(cp); 1113 CTSize vsize = cp_decl_sizeattr(cp);
1099 if (vsize) CTF_INSERT(decl->attr, VSIZEP, lj_fls(vsize)); 1114 if (vsize) CTF_INSERT(decl->attr, VSIZEP, lj_fls(vsize));
1100 } 1115 }
1101 break; 1116 break;
1102#if LJ_TARGET_X86 1117#if LJ_TARGET_X86
1103 case H_(5ad22db8,c689b848): case H_(439150fa,65ea78cb): /* regparm */ 1118 case 8: case 9: /* regparm */
1104 CTF_INSERT(decl->fattr, REGPARM, cp_decl_sizeattr(cp)); 1119 CTF_INSERT(decl->fattr, REGPARM, cp_decl_sizeattr(cp));
1105 decl->fattr |= CTFP_CCONV; 1120 decl->fattr |= CTFP_CCONV;
1106 break; 1121 break;
1107 case H_(18fc0b98,7ff4c074): case H_(4e62abed,0a747424): /* cdecl */ 1122 case 10: case 11: /* cdecl */
1108 CTF_INSERT(decl->fattr, CCONV, CTCC_CDECL); 1123 CTF_INSERT(decl->fattr, CCONV, CTCC_CDECL);
1109 decl->fattr |= CTFP_CCONV; 1124 decl->fattr |= CTFP_CCONV;
1110 break; 1125 break;
1111 case H_(72b2e41b,494c5a44): case H_(f2356d59,f25fc9bd): /* thiscall */ 1126 case 12: case 13: /* thiscall */
1112 CTF_INSERT(decl->fattr, CCONV, CTCC_THISCALL); 1127 CTF_INSERT(decl->fattr, CCONV, CTCC_THISCALL);
1113 decl->fattr |= CTFP_CCONV; 1128 decl->fattr |= CTFP_CCONV;
1114 break; 1129 break;
1115 case H_(0d0ffc42,ab746f88): case H_(21c54ba1,7f0ca7e3): /* fastcall */ 1130 case 14: case 15: /* fastcall */
1116 CTF_INSERT(decl->fattr, CCONV, CTCC_FASTCALL); 1131 CTF_INSERT(decl->fattr, CCONV, CTCC_FASTCALL);
1117 decl->fattr |= CTFP_CCONV; 1132 decl->fattr |= CTFP_CCONV;
1118 break; 1133 break;
1119 case H_(ef76b040,9412e06a): case H_(de56697b,c750e6e1): /* stdcall */ 1134 case 16: case 17: /* stdcall */
1120 CTF_INSERT(decl->fattr, CCONV, CTCC_STDCALL); 1135 CTF_INSERT(decl->fattr, CCONV, CTCC_STDCALL);
1121 decl->fattr |= CTFP_CCONV; 1136 decl->fattr |= CTFP_CCONV;
1122 break; 1137 break;
1123 case H_(ea78b622,f234bd8e): case H_(252ffb06,8d50f34b): /* sseregparm */ 1138 case 18: case 19: /* sseregparm */
1124 decl->fattr |= CTF_SSEREGPARM; 1139 decl->fattr |= CTF_SSEREGPARM;
1125 decl->fattr |= CTFP_CCONV; 1140 decl->fattr |= CTFP_CCONV;
1126 break; 1141 break;
@@ -1152,16 +1167,13 @@ static void cp_decl_msvcattribute(CPState *cp, CPDecl *decl)
1152 while (cp->tok == CTOK_IDENT) { 1167 while (cp->tok == CTOK_IDENT) {
1153 GCstr *attrstr = cp->str; 1168 GCstr *attrstr = cp->str;
1154 cp_next(cp); 1169 cp_next(cp);
1155 switch (attrstr->hash) { 1170 if (cp_str_is(attrstr, "align")) {
1156 case H_(bc2395fa,98f267f8): /* align */
1157 cp_decl_align(cp, decl); 1171 cp_decl_align(cp, decl);
1158 break; 1172 } else { /* Ignore all other attributes. */
1159 default: /* Ignore all other attributes. */
1160 if (cp_opt(cp, '(')) { 1173 if (cp_opt(cp, '(')) {
1161 while (cp->tok != ')' && cp->tok != CTOK_EOF) cp_next(cp); 1174 while (cp->tok != ')' && cp->tok != CTOK_EOF) cp_next(cp);
1162 cp_check(cp, ')'); 1175 cp_check(cp, ')');
1163 } 1176 }
1164 break;
1165 } 1177 }
1166 } 1178 }
1167 cp_check(cp, ')'); 1179 cp_check(cp, ')');
@@ -1741,17 +1753,16 @@ static CTypeID cp_decl_abstract(CPState *cp)
1741static void cp_pragma(CPState *cp, BCLine pragmaline) 1753static void cp_pragma(CPState *cp, BCLine pragmaline)
1742{ 1754{
1743 cp_next(cp); 1755 cp_next(cp);
1744 if (cp->tok == CTOK_IDENT && 1756 if (cp->tok == CTOK_IDENT && cp_str_is(cp->str, "pack")) {
1745 cp->str->hash == H_(e79b999f,42ca3e85)) { /* pack */
1746 cp_next(cp); 1757 cp_next(cp);
1747 cp_check(cp, '('); 1758 cp_check(cp, '(');
1748 if (cp->tok == CTOK_IDENT) { 1759 if (cp->tok == CTOK_IDENT) {
1749 if (cp->str->hash == H_(738e923c,a1b65954)) { /* push */ 1760 if (cp_str_is(cp->str, "push")) {
1750 if (cp->curpack < CPARSE_MAX_PACKSTACK) { 1761 if (cp->curpack < CPARSE_MAX_PACKSTACK) {
1751 cp->packstack[cp->curpack+1] = cp->packstack[cp->curpack]; 1762 cp->packstack[cp->curpack+1] = cp->packstack[cp->curpack];
1752 cp->curpack++; 1763 cp->curpack++;
1753 } 1764 }
1754 } else if (cp->str->hash == H_(6c71cf27,6c71cf27)) { /* pop */ 1765 } else if (cp_str_is(cp->str, "pop")) {
1755 if (cp->curpack > 0) cp->curpack--; 1766 if (cp->curpack > 0) cp->curpack--;
1756 } else { 1767 } else {
1757 cp_errmsg(cp, cp->tok, LJ_ERR_XSYMBOL); 1768 cp_errmsg(cp, cp->tok, LJ_ERR_XSYMBOL);
@@ -1773,6 +1784,16 @@ static void cp_pragma(CPState *cp, BCLine pragmaline)
1773 } 1784 }
1774} 1785}
1775 1786
1787/* Handle line number. */
1788static void cp_line(CPState *cp, BCLine hashline)
1789{
1790 BCLine newline = cp->val.u32;
1791 /* TODO: Handle file name and include it in error messages. */
1792 while (cp->tok != CTOK_EOF && cp->linenumber == hashline)
1793 cp_next(cp);
1794 cp->linenumber = newline;
1795}
1796
1776/* Parse multiple C declarations of types or extern identifiers. */ 1797/* Parse multiple C declarations of types or extern identifiers. */
1777static void cp_decl_multi(CPState *cp) 1798static void cp_decl_multi(CPState *cp)
1778{ 1799{
@@ -1785,12 +1806,21 @@ static void cp_decl_multi(CPState *cp)
1785 continue; 1806 continue;
1786 } 1807 }
1787 if (cp->tok == '#') { /* Workaround, since we have no preprocessor, yet. */ 1808 if (cp->tok == '#') { /* Workaround, since we have no preprocessor, yet. */
1788 BCLine pragmaline = cp->linenumber; 1809 BCLine hashline = cp->linenumber;
1789 if (!(cp_next(cp) == CTOK_IDENT && 1810 CPToken tok = cp_next(cp);
1790 cp->str->hash == H_(f5e6b4f8,1d509107))) /* pragma */ 1811 if (tok == CTOK_INTEGER) {
1812 cp_line(cp, hashline);
1813 continue;
1814 } else if (tok == CTOK_IDENT && cp_str_is(cp->str, "line")) {
1815 if (cp_next(cp) != CTOK_INTEGER) cp_err_token(cp, tok);
1816 cp_line(cp, hashline);
1817 continue;
1818 } else if (tok == CTOK_IDENT && cp_str_is(cp->str, "pragma")) {
1819 cp_pragma(cp, hashline);
1820 continue;
1821 } else {
1791 cp_errmsg(cp, cp->tok, LJ_ERR_XSYMBOL); 1822 cp_errmsg(cp, cp->tok, LJ_ERR_XSYMBOL);
1792 cp_pragma(cp, pragmaline); 1823 }
1793 continue;
1794 } 1824 }
1795 scl = cp_decl_spec(cp, &decl, CDF_TYPEDEF|CDF_EXTERN|CDF_STATIC); 1825 scl = cp_decl_spec(cp, &decl, CDF_TYPEDEF|CDF_EXTERN|CDF_STATIC);
1796 if ((cp->tok == ';' || cp->tok == CTOK_EOF) && 1826 if ((cp->tok == ';' || cp->tok == CTOK_EOF) &&
@@ -1856,8 +1886,6 @@ static void cp_decl_single(CPState *cp)
1856 if (cp->tok != CTOK_EOF) cp_err_token(cp, CTOK_EOF); 1886 if (cp->tok != CTOK_EOF) cp_err_token(cp, CTOK_EOF);
1857} 1887}
1858 1888
1859#undef H_
1860
1861/* ------------------------------------------------------------------------ */ 1889/* ------------------------------------------------------------------------ */
1862 1890
1863/* Protected callback for C parser. */ 1891/* Protected callback for C parser. */
diff --git a/src/lj_cparse.h b/src/lj_cparse.h
index 87eb3ff4..5f667a7c 100644
--- a/src/lj_cparse.h
+++ b/src/lj_cparse.h
@@ -60,6 +60,8 @@ typedef struct CPState {
60 60
61LJ_FUNC int lj_cparse(CPState *cp); 61LJ_FUNC int lj_cparse(CPState *cp);
62 62
63LJ_FUNC int lj_cparse_case(GCstr *str, const char *match);
64
63#endif 65#endif
64 66
65#endif 67#endif
diff --git a/src/lj_crecord.c b/src/lj_crecord.c
index 99344b79..6e999cc9 100644
--- a/src/lj_crecord.c
+++ b/src/lj_crecord.c
@@ -11,13 +11,13 @@
11#if LJ_HASJIT && LJ_HASFFI 11#if LJ_HASJIT && LJ_HASFFI
12 12
13#include "lj_err.h" 13#include "lj_err.h"
14#include "lj_str.h"
15#include "lj_tab.h" 14#include "lj_tab.h"
16#include "lj_frame.h" 15#include "lj_frame.h"
17#include "lj_ctype.h" 16#include "lj_ctype.h"
18#include "lj_cdata.h" 17#include "lj_cdata.h"
19#include "lj_cparse.h" 18#include "lj_cparse.h"
20#include "lj_cconv.h" 19#include "lj_cconv.h"
20#include "lj_carith.h"
21#include "lj_clib.h" 21#include "lj_clib.h"
22#include "lj_ccall.h" 22#include "lj_ccall.h"
23#include "lj_ff.h" 23#include "lj_ff.h"
@@ -31,6 +31,7 @@
31#include "lj_snap.h" 31#include "lj_snap.h"
32#include "lj_crecord.h" 32#include "lj_crecord.h"
33#include "lj_dispatch.h" 33#include "lj_dispatch.h"
34#include "lj_strfmt.h"
34 35
35/* Some local macros to save typing. Undef'd at the end. */ 36/* Some local macros to save typing. Undef'd at the end. */
36#define IR(ref) (&J->cur.ir[(ref)]) 37#define IR(ref) (&J->cur.ir[(ref)])
@@ -211,7 +212,7 @@ static void crec_copy_emit(jit_State *J, CRecMemList *ml, MSize mlp,
211 ml[i].trval = emitir(IRT(IR_XLOAD, ml[i].tp), trsptr, 0); 212 ml[i].trval = emitir(IRT(IR_XLOAD, ml[i].tp), trsptr, 0);
212 ml[i].trofs = trofs; 213 ml[i].trofs = trofs;
213 i++; 214 i++;
214 rwin += (LJ_SOFTFP && ml[i].tp == IRT_NUM) ? 2 : 1; 215 rwin += (LJ_SOFTFP32 && ml[i].tp == IRT_NUM) ? 2 : 1;
215 if (rwin >= CREC_COPY_REGWIN || i >= mlp) { /* Flush buffered stores. */ 216 if (rwin >= CREC_COPY_REGWIN || i >= mlp) { /* Flush buffered stores. */
216 rwin = 0; 217 rwin = 0;
217 for ( ; j < i; j++) { 218 for ( ; j < i; j++) {
@@ -441,7 +442,7 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp,
441 /* fallthrough */ 442 /* fallthrough */
442 case CCX(I, F): 443 case CCX(I, F):
443 if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi; 444 if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi;
444 sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_TRUNC|IRCONV_ANY); 445 sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_ANY);
445 goto xstore; 446 goto xstore;
446 case CCX(I, P): 447 case CCX(I, P):
447 case CCX(I, A): 448 case CCX(I, A):
@@ -521,7 +522,7 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp,
521 if (st == IRT_CDATA) goto err_nyi; 522 if (st == IRT_CDATA) goto err_nyi;
522 /* The signed conversion is cheaper. x64 really has 47 bit pointers. */ 523 /* The signed conversion is cheaper. x64 really has 47 bit pointers. */
523 sp = emitconv(sp, (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32, 524 sp = emitconv(sp, (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32,
524 st, IRCONV_TRUNC|IRCONV_ANY); 525 st, IRCONV_ANY);
525 goto xstore; 526 goto xstore;
526 527
527 /* Destination is an array. */ 528 /* Destination is an array. */
@@ -640,12 +641,23 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, cTValue *sval)
640 sp = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, sizeof(GCstr))); 641 sp = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, sizeof(GCstr)));
641 sid = CTID_A_CCHAR; 642 sid = CTID_A_CCHAR;
642 } 643 }
643 } else { /* NYI: tref_istab(sp), tref_islightud(sp). */ 644 } else if (tref_islightud(sp)) {
645#if LJ_64
646 sp = emitir(IRT(IR_BAND, IRT_P64), sp,
647 lj_ir_kint64(J, U64x(00007fff,ffffffff)));
648#endif
649 } else { /* NYI: tref_istab(sp). */
644 IRType t; 650 IRType t;
645 sid = argv2cdata(J, sp, sval)->ctypeid; 651 sid = argv2cdata(J, sp, sval)->ctypeid;
646 s = ctype_raw(cts, sid); 652 s = ctype_raw(cts, sid);
647 svisnz = cdataptr(cdataV(sval)); 653 svisnz = cdataptr(cdataV(sval));
648 t = crec_ct2irt(cts, s); 654 if (ctype_isfunc(s->info)) {
655 sid = lj_ctype_intern(cts, CTINFO(CT_PTR, CTALIGN_PTR|sid), CTSIZE_PTR);
656 s = ctype_get(cts, sid);
657 t = IRT_PTR;
658 } else {
659 t = crec_ct2irt(cts, s);
660 }
649 if (ctype_isptr(s->info)) { 661 if (ctype_isptr(s->info)) {
650 sp = emitir(IRT(IR_FLOAD, t), sp, IRFL_CDATA_PTR); 662 sp = emitir(IRT(IR_FLOAD, t), sp, IRFL_CDATA_PTR);
651 if (ctype_isref(s->info)) { 663 if (ctype_isref(s->info)) {
@@ -700,6 +712,19 @@ static TRef crec_reassoc_ofs(jit_State *J, TRef tr, ptrdiff_t *ofsp, MSize sz)
700 return tr; 712 return tr;
701} 713}
702 714
715/* Tailcall to function. */
716static void crec_tailcall(jit_State *J, RecordFFData *rd, cTValue *tv)
717{
718 TRef kfunc = lj_ir_kfunc(J, funcV(tv));
719#if LJ_FR2
720 J->base[-2] = kfunc;
721 J->base[-1] = TREF_FRAME;
722#else
723 J->base[-1] = kfunc | TREF_FRAME;
724#endif
725 rd->nres = -1; /* Pending tailcall. */
726}
727
703/* Record ctype __index/__newindex metamethods. */ 728/* Record ctype __index/__newindex metamethods. */
704static void crec_index_meta(jit_State *J, CTState *cts, CType *ct, 729static void crec_index_meta(jit_State *J, CTState *cts, CType *ct,
705 RecordFFData *rd) 730 RecordFFData *rd)
@@ -709,8 +734,7 @@ static void crec_index_meta(jit_State *J, CTState *cts, CType *ct,
709 if (!tv) 734 if (!tv)
710 lj_trace_err(J, LJ_TRERR_BADTYPE); 735 lj_trace_err(J, LJ_TRERR_BADTYPE);
711 if (tvisfunc(tv)) { 736 if (tvisfunc(tv)) {
712 J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME; 737 crec_tailcall(J, rd, tv);
713 rd->nres = -1; /* Pending tailcall. */
714 } else if (rd->data == 0 && tvistab(tv) && tref_isstr(J->base[1])) { 738 } else if (rd->data == 0 && tvistab(tv) && tref_isstr(J->base[1])) {
715 /* Specialize to result of __index lookup. */ 739 /* Specialize to result of __index lookup. */
716 cTValue *o = lj_tab_get(J->L, tabV(tv), &rd->argv[1]); 740 cTValue *o = lj_tab_get(J->L, tabV(tv), &rd->argv[1]);
@@ -727,6 +751,48 @@ static void crec_index_meta(jit_State *J, CTState *cts, CType *ct,
727 } 751 }
728} 752}
729 753
754/* Record bitfield load/store. */
755static void crec_index_bf(jit_State *J, RecordFFData *rd, TRef ptr, CTInfo info)
756{
757 IRType t = IRT_I8 + 2*lj_fls(ctype_bitcsz(info)) + ((info&CTF_UNSIGNED)?1:0);
758 TRef tr = emitir(IRT(IR_XLOAD, t), ptr, 0);
759 CTSize pos = ctype_bitpos(info), bsz = ctype_bitbsz(info), shift = 32 - bsz;
760 lua_assert(t <= IRT_U32); /* NYI: 64 bit bitfields. */
761 if (rd->data == 0) { /* __index metamethod. */
762 if ((info & CTF_BOOL)) {
763 tr = emitir(IRTI(IR_BAND), tr, lj_ir_kint(J, (int32_t)((1u << pos))));
764 /* Assume not equal to zero. Fixup and emit pending guard later. */
765 lj_ir_set(J, IRTGI(IR_NE), tr, lj_ir_kint(J, 0));
766 J->postproc = LJ_POST_FIXGUARD;
767 tr = TREF_TRUE;
768 } else if (!(info & CTF_UNSIGNED)) {
769 tr = emitir(IRTI(IR_BSHL), tr, lj_ir_kint(J, shift - pos));
770 tr = emitir(IRTI(IR_BSAR), tr, lj_ir_kint(J, shift));
771 } else {
772 lua_assert(bsz < 32); /* Full-size fields cannot end up here. */
773 tr = emitir(IRTI(IR_BSHR), tr, lj_ir_kint(J, pos));
774 tr = emitir(IRTI(IR_BAND), tr, lj_ir_kint(J, (int32_t)((1u << bsz)-1)));
775 /* We can omit the U32 to NUM conversion, since bsz < 32. */
776 }
777 J->base[0] = tr;
778 } else { /* __newindex metamethod. */
779 CTState *cts = ctype_ctsG(J2G(J));
780 CType *ct = ctype_get(cts,
781 (info & CTF_BOOL) ? CTID_BOOL :
782 (info & CTF_UNSIGNED) ? CTID_UINT32 : CTID_INT32);
783 int32_t mask = (int32_t)(((1u << bsz)-1) << pos);
784 TRef sp = crec_ct_tv(J, ct, 0, J->base[2], &rd->argv[2]);
785 sp = emitir(IRTI(IR_BSHL), sp, lj_ir_kint(J, pos));
786 /* Use of the target type avoids forwarding conversions. */
787 sp = emitir(IRT(IR_BAND, t), sp, lj_ir_kint(J, mask));
788 tr = emitir(IRT(IR_BAND, t), tr, lj_ir_kint(J, (int32_t)~mask));
789 tr = emitir(IRT(IR_BOR, t), tr, sp);
790 emitir(IRT(IR_XSTORE, t), ptr, tr);
791 rd->nres = 0;
792 J->needsnap = 1;
793 }
794}
795
730void LJ_FASTCALL recff_cdata_index(jit_State *J, RecordFFData *rd) 796void LJ_FASTCALL recff_cdata_index(jit_State *J, RecordFFData *rd)
731{ 797{
732 TRef idx, ptr = J->base[0]; 798 TRef idx, ptr = J->base[0];
@@ -801,6 +867,7 @@ again:
801 CType *fct; 867 CType *fct;
802 fct = lj_ctype_getfield(cts, ct, name, &fofs); 868 fct = lj_ctype_getfield(cts, ct, name, &fofs);
803 if (fct) { 869 if (fct) {
870 ofs += (ptrdiff_t)fofs;
804 /* Always specialize to the field name. */ 871 /* Always specialize to the field name. */
805 emitir(IRTG(IR_EQ, IRT_STR), idx, lj_ir_kstr(J, name)); 872 emitir(IRTG(IR_EQ, IRT_STR), idx, lj_ir_kstr(J, name));
806 if (ctype_isconstval(fct->info)) { 873 if (ctype_isconstval(fct->info)) {
@@ -812,12 +879,14 @@ again:
812 J->base[0] = lj_ir_kint(J, (int32_t)fct->size); 879 J->base[0] = lj_ir_kint(J, (int32_t)fct->size);
813 return; /* Interpreter will throw for newindex. */ 880 return; /* Interpreter will throw for newindex. */
814 } else if (ctype_isbitfield(fct->info)) { 881 } else if (ctype_isbitfield(fct->info)) {
815 lj_trace_err(J, LJ_TRERR_NYICONV); 882 if (ofs)
883 ptr = emitir(IRT(IR_ADD, IRT_PTR), ptr, lj_ir_kintp(J, ofs));
884 crec_index_bf(J, rd, ptr, fct->info);
885 return;
816 } else { 886 } else {
817 lua_assert(ctype_isfield(fct->info)); 887 lua_assert(ctype_isfield(fct->info));
818 sid = ctype_cid(fct->info); 888 sid = ctype_cid(fct->info);
819 } 889 }
820 ofs += (ptrdiff_t)fofs;
821 } 890 }
822 } else if (ctype_iscomplex(ct->info)) { 891 } else if (ctype_iscomplex(ct->info)) {
823 if (name->len == 2 && 892 if (name->len == 2 &&
@@ -867,21 +936,17 @@ again:
867} 936}
868 937
869/* Record setting a finalizer. */ 938/* Record setting a finalizer. */
870static void crec_finalizer(jit_State *J, TRef trcd, cTValue *fin) 939static void crec_finalizer(jit_State *J, TRef trcd, TRef trfin, cTValue *fin)
871{ 940{
872 TRef trlo = lj_ir_call(J, IRCALL_lj_cdata_setfin, trcd); 941 if (tvisgcv(fin)) {
873 TRef trhi = emitir(IRT(IR_ADD, IRT_P32), trlo, lj_ir_kint(J, 4)); 942 if (!trfin) trfin = lj_ir_kptr(J, gcval(fin));
874 if (LJ_BE) { TRef tmp = trlo; trlo = trhi; trhi = tmp; } 943 } else if (tvisnil(fin)) {
875 if (tvisfunc(fin)) { 944 trfin = lj_ir_kptr(J, NULL);
876 emitir(IRT(IR_XSTORE, IRT_P32), trlo, lj_ir_kfunc(J, funcV(fin)));
877 emitir(IRTI(IR_XSTORE), trhi, lj_ir_kint(J, LJ_TFUNC));
878 } else if (tviscdata(fin)) {
879 emitir(IRT(IR_XSTORE, IRT_P32), trlo,
880 lj_ir_kgc(J, obj2gco(cdataV(fin)), IRT_CDATA));
881 emitir(IRTI(IR_XSTORE), trhi, lj_ir_kint(J, LJ_TCDATA));
882 } else { 945 } else {
883 lj_trace_err(J, LJ_TRERR_BADTYPE); 946 lj_trace_err(J, LJ_TRERR_BADTYPE);
884 } 947 }
948 lj_ir_call(J, IRCALL_lj_cdata_setfin, trcd,
949 trfin, lj_ir_kint(J, (int32_t)itype(fin)));
885 J->needsnap = 1; 950 J->needsnap = 1;
886} 951}
887 952
@@ -892,10 +957,8 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
892 CTSize sz; 957 CTSize sz;
893 CTInfo info = lj_ctype_info(cts, id, &sz); 958 CTInfo info = lj_ctype_info(cts, id, &sz);
894 CType *d = ctype_raw(cts, id); 959 CType *d = ctype_raw(cts, id);
895 TRef trid; 960 TRef trcd, trid = lj_ir_kint(J, id);
896 if (!sz || sz > 128 || (info & CTF_VLA) || ctype_align(info) > CT_MEMALIGN) 961 cTValue *fin;
897 lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: large/special allocations. */
898 trid = lj_ir_kint(J, id);
899 /* Use special instruction to box pointer or 32/64 bit integer. */ 962 /* Use special instruction to box pointer or 32/64 bit integer. */
900 if (ctype_isptr(info) || (ctype_isinteger(info) && (sz == 4 || sz == 8))) { 963 if (ctype_isptr(info) || (ctype_isinteger(info) && (sz == 4 || sz == 8))) {
901 TRef sp = J->base[1] ? crec_ct_tv(J, d, 0, J->base[1], &rd->argv[1]) : 964 TRef sp = J->base[1] ? crec_ct_tv(J, d, 0, J->base[1], &rd->argv[1]) :
@@ -903,11 +966,36 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
903 sz == 4 ? lj_ir_kint(J, 0) : 966 sz == 4 ? lj_ir_kint(J, 0) :
904 (lj_needsplit(J), lj_ir_kint64(J, 0)); 967 (lj_needsplit(J), lj_ir_kint64(J, 0));
905 J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), trid, sp); 968 J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), trid, sp);
969 return;
906 } else { 970 } else {
907 TRef trcd = emitir(IRTG(IR_CNEW, IRT_CDATA), trid, TREF_NIL); 971 TRef trsz = TREF_NIL;
908 cTValue *fin; 972 if ((info & CTF_VLA)) { /* Calculate VLA/VLS size at runtime. */
909 J->base[0] = trcd; 973 CTSize sz0, sz1;
910 if (J->base[1] && !J->base[2] && 974 if (!J->base[1] || J->base[2])
975 lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: init VLA/VLS. */
976 trsz = crec_ct_tv(J, ctype_get(cts, CTID_INT32), 0,
977 J->base[1], &rd->argv[1]);
978 sz0 = lj_ctype_vlsize(cts, d, 0);
979 sz1 = lj_ctype_vlsize(cts, d, 1);
980 trsz = emitir(IRTGI(IR_MULOV), trsz, lj_ir_kint(J, (int32_t)(sz1-sz0)));
981 trsz = emitir(IRTGI(IR_ADDOV), trsz, lj_ir_kint(J, (int32_t)sz0));
982 J->base[1] = 0; /* Simplify logic below. */
983 } else if (ctype_align(info) > CT_MEMALIGN) {
984 trsz = lj_ir_kint(J, sz);
985 }
986 trcd = emitir(IRTG(IR_CNEW, IRT_CDATA), trid, trsz);
987 if (sz > 128 || (info & CTF_VLA)) {
988 TRef dp;
989 CTSize align;
990 special: /* Only handle bulk zero-fill for large/VLA/VLS types. */
991 if (J->base[1])
992 lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: init large/VLA/VLS types. */
993 dp = emitir(IRT(IR_ADD, IRT_PTR), trcd, lj_ir_kintp(J, sizeof(GCcdata)));
994 if (trsz == TREF_NIL) trsz = lj_ir_kint(J, sz);
995 align = ctype_align(info);
996 if (align < CT_MEMALIGN) align = CT_MEMALIGN;
997 crec_fill(J, dp, trsz, lj_ir_kint(J, 0), (1u << align));
998 } else if (J->base[1] && !J->base[2] &&
911 !lj_cconv_multi_init(cts, d, &rd->argv[1])) { 999 !lj_cconv_multi_init(cts, d, &rd->argv[1])) {
912 goto single_init; 1000 goto single_init;
913 } else if (ctype_isarray(d->info)) { 1001 } else if (ctype_isarray(d->info)) {
@@ -918,8 +1006,9 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
918 TValue *sval = &tv; 1006 TValue *sval = &tv;
919 MSize i; 1007 MSize i;
920 tv.u64 = 0; 1008 tv.u64 = 0;
921 if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info))) 1009 if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info)) ||
922 lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: init array of aggregates. */ 1010 esize * CREC_FILL_MAXUNROLL < sz)
1011 goto special;
923 for (i = 1, ofs = 0; ofs < sz; ofs += esize) { 1012 for (i = 1, ofs = 0; ofs < sz; ofs += esize) {
924 TRef dp = emitir(IRT(IR_ADD, IRT_PTR), trcd, 1013 TRef dp = emitir(IRT(IR_ADD, IRT_PTR), trcd,
925 lj_ir_kintp(J, ofs + sizeof(GCcdata))); 1014 lj_ir_kintp(J, ofs + sizeof(GCcdata)));
@@ -976,11 +1065,12 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
976 crec_ct_tv(J, d, dp, lj_ir_kint(J, 0), &tv); 1065 crec_ct_tv(J, d, dp, lj_ir_kint(J, 0), &tv);
977 } 1066 }
978 } 1067 }
979 /* Handle __gc metamethod. */
980 fin = lj_ctype_meta(cts, id, MM_gc);
981 if (fin)
982 crec_finalizer(J, trcd, fin);
983 } 1068 }
1069 J->base[0] = trcd;
1070 /* Handle __gc metamethod. */
1071 fin = lj_ctype_meta(cts, id, MM_gc);
1072 if (fin)
1073 crec_finalizer(J, trcd, 0, fin);
984} 1074}
985 1075
986/* Record argument conversions. */ 1076/* Record argument conversions. */
@@ -1040,7 +1130,7 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd,
1040 else 1130 else
1041 tr = emitconv(tr, IRT_INT, d->size==1 ? IRT_I8 : IRT_I16,IRCONV_SEXT); 1131 tr = emitconv(tr, IRT_INT, d->size==1 ? IRT_I8 : IRT_I16,IRCONV_SEXT);
1042 } 1132 }
1043 } else if (LJ_SOFTFP && ctype_isfp(d->info) && d->size > 4) { 1133 } else if (LJ_SOFTFP32 && ctype_isfp(d->info) && d->size > 4) {
1044 lj_needsplit(J); 1134 lj_needsplit(J);
1045 } 1135 }
1046#if LJ_TARGET_X86 1136#if LJ_TARGET_X86
@@ -1086,20 +1176,20 @@ static void crec_snap_caller(jit_State *J)
1086 lua_State *L = J->L; 1176 lua_State *L = J->L;
1087 TValue *base = L->base, *top = L->top; 1177 TValue *base = L->base, *top = L->top;
1088 const BCIns *pc = J->pc; 1178 const BCIns *pc = J->pc;
1089 TRef ftr = J->base[-1]; 1179 TRef ftr = J->base[-1-LJ_FR2];
1090 ptrdiff_t delta; 1180 ptrdiff_t delta;
1091 if (!frame_islua(base-1) || J->framedepth <= 0) 1181 if (!frame_islua(base-1) || J->framedepth <= 0)
1092 lj_trace_err(J, LJ_TRERR_NYICALL); 1182 lj_trace_err(J, LJ_TRERR_NYICALL);
1093 J->pc = frame_pc(base-1); delta = 1+bc_a(J->pc[-1]); 1183 J->pc = frame_pc(base-1); delta = 1+LJ_FR2+bc_a(J->pc[-1]);
1094 L->top = base; L->base = base - delta; 1184 L->top = base; L->base = base - delta;
1095 J->base[-1] = TREF_FALSE; 1185 J->base[-1-LJ_FR2] = TREF_FALSE;
1096 J->base -= delta; J->baseslot -= (BCReg)delta; 1186 J->base -= delta; J->baseslot -= (BCReg)delta;
1097 J->maxslot = (BCReg)delta; J->framedepth--; 1187 J->maxslot = (BCReg)delta-LJ_FR2; J->framedepth--;
1098 lj_snap_add(J); 1188 lj_snap_add(J);
1099 L->base = base; L->top = top; 1189 L->base = base; L->top = top;
1100 J->framedepth++; J->maxslot = 1; 1190 J->framedepth++; J->maxslot = 1;
1101 J->base += delta; J->baseslot += (BCReg)delta; 1191 J->base += delta; J->baseslot += (BCReg)delta;
1102 J->base[-1] = ftr; J->pc = pc; 1192 J->base[-1-LJ_FR2] = ftr; J->pc = pc;
1103} 1193}
1104 1194
1105/* Record function call. */ 1195/* Record function call. */
@@ -1191,8 +1281,7 @@ void LJ_FASTCALL recff_cdata_call(jit_State *J, RecordFFData *rd)
1191 tv = lj_ctype_meta(cts, ctype_isptr(ct->info) ? ctype_cid(ct->info) : id, mm); 1281 tv = lj_ctype_meta(cts, ctype_isptr(ct->info) ? ctype_cid(ct->info) : id, mm);
1192 if (tv) { 1282 if (tv) {
1193 if (tvisfunc(tv)) { 1283 if (tvisfunc(tv)) {
1194 J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME; 1284 crec_tailcall(J, rd, tv);
1195 rd->nres = -1; /* Pending tailcall. */
1196 return; 1285 return;
1197 } 1286 }
1198 } else if (mm == MM_new) { 1287 } else if (mm == MM_new) {
@@ -1233,7 +1322,7 @@ static TRef crec_arith_int64(jit_State *J, TRef *sp, CType **s, MMS mm)
1233 for (i = 0; i < 2; i++) { 1322 for (i = 0; i < 2; i++) {
1234 IRType st = tref_type(sp[i]); 1323 IRType st = tref_type(sp[i]);
1235 if (st == IRT_NUM || st == IRT_FLOAT) 1324 if (st == IRT_NUM || st == IRT_FLOAT)
1236 sp[i] = emitconv(sp[i], dt, st, IRCONV_TRUNC|IRCONV_ANY); 1325 sp[i] = emitconv(sp[i], dt, st, IRCONV_ANY);
1237 else if (!(st == IRT_I64 || st == IRT_U64)) 1326 else if (!(st == IRT_I64 || st == IRT_U64))
1238 sp[i] = emitconv(sp[i], dt, IRT_INT, 1327 sp[i] = emitconv(sp[i], dt, IRT_INT,
1239 (s[i]->info & CTF_UNSIGNED) ? 0 : IRCONV_SEXT); 1328 (s[i]->info & CTF_UNSIGNED) ? 0 : IRCONV_SEXT);
@@ -1302,15 +1391,14 @@ static TRef crec_arith_ptr(jit_State *J, TRef *sp, CType **s, MMS mm)
1302 CTypeID id; 1391 CTypeID id;
1303#if LJ_64 1392#if LJ_64
1304 if (t == IRT_NUM || t == IRT_FLOAT) 1393 if (t == IRT_NUM || t == IRT_FLOAT)
1305 tr = emitconv(tr, IRT_INTP, t, IRCONV_TRUNC|IRCONV_ANY); 1394 tr = emitconv(tr, IRT_INTP, t, IRCONV_ANY);
1306 else if (!(t == IRT_I64 || t == IRT_U64)) 1395 else if (!(t == IRT_I64 || t == IRT_U64))
1307 tr = emitconv(tr, IRT_INTP, IRT_INT, 1396 tr = emitconv(tr, IRT_INTP, IRT_INT,
1308 ((t - IRT_I8) & 1) ? 0 : IRCONV_SEXT); 1397 ((t - IRT_I8) & 1) ? 0 : IRCONV_SEXT);
1309#else 1398#else
1310 if (!tref_typerange(sp[1], IRT_I8, IRT_U32)) { 1399 if (!tref_typerange(sp[1], IRT_I8, IRT_U32)) {
1311 tr = emitconv(tr, IRT_INTP, t, 1400 tr = emitconv(tr, IRT_INTP, t,
1312 (t == IRT_NUM || t == IRT_FLOAT) ? 1401 (t == IRT_NUM || t == IRT_FLOAT) ? IRCONV_ANY : 0);
1313 IRCONV_TRUNC|IRCONV_ANY : 0);
1314 } 1402 }
1315#endif 1403#endif
1316 tr = emitir(IRT(IR_MUL, IRT_INTP), tr, lj_ir_kintp(J, sz)); 1404 tr = emitir(IRT(IR_MUL, IRT_INTP), tr, lj_ir_kintp(J, sz));
@@ -1342,8 +1430,7 @@ static TRef crec_arith_meta(jit_State *J, TRef *sp, CType **s, CTState *cts,
1342 } 1430 }
1343 if (tv) { 1431 if (tv) {
1344 if (tvisfunc(tv)) { 1432 if (tvisfunc(tv)) {
1345 J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME; 1433 crec_tailcall(J, rd, tv);
1346 rd->nres = -1; /* Pending tailcall. */
1347 return 0; 1434 return 0;
1348 } /* NYI: non-function metamethods. */ 1435 } /* NYI: non-function metamethods. */
1349 } else if ((MMS)rd->data == MM_eq) { /* Fallback cdata pointer comparison. */ 1436 } else if ((MMS)rd->data == MM_eq) { /* Fallback cdata pointer comparison. */
@@ -1453,8 +1540,7 @@ void LJ_FASTCALL recff_cdata_arith(jit_State *J, RecordFFData *rd)
1453 !irt_isguard(J->guardemit)) { 1540 !irt_isguard(J->guardemit)) {
1454 const BCIns *pc = frame_contpc(J->L->base-1) - 1; 1541 const BCIns *pc = frame_contpc(J->L->base-1) - 1;
1455 if (bc_op(*pc) <= BC_ISNEP) { 1542 if (bc_op(*pc) <= BC_ISNEP) {
1456 setframe_pc(&J2G(J)->tmptv, pc); 1543 J2G(J)->tmptv.u64 = (uint64_t)(uintptr_t)pc;
1457 J2G(J)->tmptv.u32.lo = ((tref_istrue(tr) ^ bc_op(*pc)) & 1);
1458 J->postproc = LJ_POST_FIXCOMP; 1544 J->postproc = LJ_POST_FIXCOMP;
1459 } 1545 }
1460 } 1546 }
@@ -1643,7 +1729,139 @@ void LJ_FASTCALL recff_ffi_xof(jit_State *J, RecordFFData *rd)
1643void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd) 1729void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd)
1644{ 1730{
1645 argv2cdata(J, J->base[0], &rd->argv[0]); 1731 argv2cdata(J, J->base[0], &rd->argv[0]);
1646 crec_finalizer(J, J->base[0], &rd->argv[1]); 1732 if (!J->base[1])
1733 lj_trace_err(J, LJ_TRERR_BADTYPE);
1734 crec_finalizer(J, J->base[0], J->base[1], &rd->argv[1]);
1735}
1736
1737/* -- 64 bit bit.* library functions -------------------------------------- */
1738
1739/* Determine bit operation type from argument type. */
1740static CTypeID crec_bit64_type(CTState *cts, cTValue *tv)
1741{
1742 if (tviscdata(tv)) {
1743 CType *ct = lj_ctype_rawref(cts, cdataV(tv)->ctypeid);
1744 if (ctype_isenum(ct->info)) ct = ctype_child(cts, ct);
1745 if ((ct->info & (CTMASK_NUM|CTF_BOOL|CTF_FP|CTF_UNSIGNED)) ==
1746 CTINFO(CT_NUM, CTF_UNSIGNED) && ct->size == 8)
1747 return CTID_UINT64; /* Use uint64_t, since it has the highest rank. */
1748 return CTID_INT64; /* Otherwise use int64_t. */
1749 }
1750 return 0; /* Use regular 32 bit ops. */
1751}
1752
1753void LJ_FASTCALL recff_bit64_tobit(jit_State *J, RecordFFData *rd)
1754{
1755 CTState *cts = ctype_ctsG(J2G(J));
1756 TRef tr = crec_ct_tv(J, ctype_get(cts, CTID_INT64), 0,
1757 J->base[0], &rd->argv[0]);
1758 if (!tref_isinteger(tr))
1759 tr = emitconv(tr, IRT_INT, tref_type(tr), 0);
1760 J->base[0] = tr;
1761}
1762
1763int LJ_FASTCALL recff_bit64_unary(jit_State *J, RecordFFData *rd)
1764{
1765 CTState *cts = ctype_ctsG(J2G(J));
1766 CTypeID id = crec_bit64_type(cts, &rd->argv[0]);
1767 if (id) {
1768 TRef tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]);
1769 tr = emitir(IRT(rd->data, id-CTID_INT64+IRT_I64), tr, 0);
1770 J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr);
1771 return 1;
1772 }
1773 return 0;
1774}
1775
1776int LJ_FASTCALL recff_bit64_nary(jit_State *J, RecordFFData *rd)
1777{
1778 CTState *cts = ctype_ctsG(J2G(J));
1779 CTypeID id = 0;
1780 MSize i;
1781 for (i = 0; J->base[i] != 0; i++) {
1782 CTypeID aid = crec_bit64_type(cts, &rd->argv[i]);
1783 if (id < aid) id = aid; /* Determine highest type rank of all arguments. */
1784 }
1785 if (id) {
1786 CType *ct = ctype_get(cts, id);
1787 uint32_t ot = IRT(rd->data, id-CTID_INT64+IRT_I64);
1788 TRef tr = crec_ct_tv(J, ct, 0, J->base[0], &rd->argv[0]);
1789 for (i = 1; J->base[i] != 0; i++) {
1790 TRef tr2 = crec_ct_tv(J, ct, 0, J->base[i], &rd->argv[i]);
1791 tr = emitir(ot, tr, tr2);
1792 }
1793 J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr);
1794 return 1;
1795 }
1796 return 0;
1797}
1798
1799int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd)
1800{
1801 CTState *cts = ctype_ctsG(J2G(J));
1802 CTypeID id;
1803 TRef tsh = 0;
1804 if (J->base[0] && tref_iscdata(J->base[1])) {
1805 tsh = crec_ct_tv(J, ctype_get(cts, CTID_INT64), 0,
1806 J->base[1], &rd->argv[1]);
1807 if (!tref_isinteger(tsh))
1808 tsh = emitconv(tsh, IRT_INT, tref_type(tsh), 0);
1809 J->base[1] = tsh;
1810 }
1811 id = crec_bit64_type(cts, &rd->argv[0]);
1812 if (id) {
1813 TRef tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]);
1814 uint32_t op = rd->data;
1815 if (!tsh) tsh = lj_opt_narrow_tobit(J, J->base[1]);
1816 if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
1817 !tref_isk(tsh))
1818 tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 63));
1819#ifdef LJ_TARGET_UNIFYROT
1820 if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) {
1821 op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR;
1822 tsh = emitir(IRTI(IR_NEG), tsh, tsh);
1823 }
1824#endif
1825 tr = emitir(IRT(op, id-CTID_INT64+IRT_I64), tr, tsh);
1826 J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr);
1827 return 1;
1828 }
1829 return 0;
1830}
1831
1832TRef recff_bit64_tohex(jit_State *J, RecordFFData *rd, TRef hdr)
1833{
1834 CTState *cts = ctype_ctsG(J2G(J));
1835 CTypeID id = crec_bit64_type(cts, &rd->argv[0]);
1836 TRef tr, trsf = J->base[1];
1837 SFormat sf = (STRFMT_UINT|STRFMT_T_HEX);
1838 int32_t n;
1839 if (trsf) {
1840 CTypeID id2 = 0;
1841 n = (int32_t)lj_carith_check64(J->L, 2, &id2);
1842 if (id2)
1843 trsf = crec_ct_tv(J, ctype_get(cts, CTID_INT32), 0, trsf, &rd->argv[1]);
1844 else
1845 trsf = lj_opt_narrow_tobit(J, trsf);
1846 emitir(IRTGI(IR_EQ), trsf, lj_ir_kint(J, n)); /* Specialize to n. */
1847 } else {
1848 n = id ? 16 : 8;
1849 }
1850 if (n < 0) { n = -n; sf |= STRFMT_F_UPPER; }
1851 sf |= ((SFormat)((n+1)&255) << STRFMT_SH_PREC);
1852 if (id) {
1853 tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]);
1854 if (n < 16)
1855 tr = emitir(IRT(IR_BAND, IRT_U64), tr,
1856 lj_ir_kint64(J, ((uint64_t)1 << 4*n)-1));
1857 } else {
1858 tr = lj_opt_narrow_tobit(J, J->base[0]);
1859 if (n < 8)
1860 tr = emitir(IRTI(IR_BAND), tr, lj_ir_kint(J, (int32_t)((1u << 4*n)-1)));
1861 tr = emitconv(tr, IRT_U64, IRT_INT, 0); /* No sign-extension. */
1862 lj_needsplit(J);
1863 }
1864 return lj_ir_call(J, IRCALL_lj_strfmt_putfxint, hdr, lj_ir_kint(J, sf), tr);
1647} 1865}
1648 1866
1649/* -- Miscellaneous library functions ------------------------------------- */ 1867/* -- Miscellaneous library functions ------------------------------------- */
diff --git a/src/lj_crecord.h b/src/lj_crecord.h
index 941c8adb..4a8465ad 100644
--- a/src/lj_crecord.h
+++ b/src/lj_crecord.h
@@ -25,6 +25,13 @@ LJ_FUNC void LJ_FASTCALL recff_ffi_istype(jit_State *J, RecordFFData *rd);
25LJ_FUNC void LJ_FASTCALL recff_ffi_abi(jit_State *J, RecordFFData *rd); 25LJ_FUNC void LJ_FASTCALL recff_ffi_abi(jit_State *J, RecordFFData *rd);
26LJ_FUNC void LJ_FASTCALL recff_ffi_xof(jit_State *J, RecordFFData *rd); 26LJ_FUNC void LJ_FASTCALL recff_ffi_xof(jit_State *J, RecordFFData *rd);
27LJ_FUNC void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd); 27LJ_FUNC void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd);
28
29LJ_FUNC void LJ_FASTCALL recff_bit64_tobit(jit_State *J, RecordFFData *rd);
30LJ_FUNC int LJ_FASTCALL recff_bit64_unary(jit_State *J, RecordFFData *rd);
31LJ_FUNC int LJ_FASTCALL recff_bit64_nary(jit_State *J, RecordFFData *rd);
32LJ_FUNC int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd);
33LJ_FUNC TRef recff_bit64_tohex(jit_State *J, RecordFFData *rd, TRef hdr);
34
28LJ_FUNC void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd); 35LJ_FUNC void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd);
29#endif 36#endif
30 37
diff --git a/src/lj_ctype.c b/src/lj_ctype.c
index a338e8e6..7e96e1bc 100644
--- a/src/lj_ctype.c
+++ b/src/lj_ctype.c
@@ -11,8 +11,10 @@
11#include "lj_err.h" 11#include "lj_err.h"
12#include "lj_str.h" 12#include "lj_str.h"
13#include "lj_tab.h" 13#include "lj_tab.h"
14#include "lj_strfmt.h"
14#include "lj_ctype.h" 15#include "lj_ctype.h"
15#include "lj_ccallback.h" 16#include "lj_ccallback.h"
17#include "lj_buf.h"
16 18
17/* -- C type definitions -------------------------------------------------- */ 19/* -- C type definitions -------------------------------------------------- */
18 20
@@ -37,6 +39,8 @@
37 _("uint64_t", UINT64) \ 39 _("uint64_t", UINT64) \
38 _("intptr_t", INT_PSZ) \ 40 _("intptr_t", INT_PSZ) \
39 _("uintptr_t", UINT_PSZ) \ 41 _("uintptr_t", UINT_PSZ) \
42 /* From POSIX. */ \
43 _("ssize_t", INT_PSZ) \
40 /* End of typedef list. */ 44 /* End of typedef list. */
41 45
42/* Keywords (only the ones we actually care for). */ 46/* Keywords (only the ones we actually care for). */
@@ -568,19 +572,18 @@ GCstr *lj_ctype_repr_int64(lua_State *L, uint64_t n, int isunsigned)
568/* Convert complex to string with 'i' or 'I' suffix. */ 572/* Convert complex to string with 'i' or 'I' suffix. */
569GCstr *lj_ctype_repr_complex(lua_State *L, void *sp, CTSize size) 573GCstr *lj_ctype_repr_complex(lua_State *L, void *sp, CTSize size)
570{ 574{
571 char buf[2*LJ_STR_NUMBUF+2+1]; 575 SBuf *sb = lj_buf_tmp_(L);
572 TValue re, im; 576 TValue re, im;
573 size_t len;
574 if (size == 2*sizeof(double)) { 577 if (size == 2*sizeof(double)) {
575 re.n = *(double *)sp; im.n = ((double *)sp)[1]; 578 re.n = *(double *)sp; im.n = ((double *)sp)[1];
576 } else { 579 } else {
577 re.n = (double)*(float *)sp; im.n = (double)((float *)sp)[1]; 580 re.n = (double)*(float *)sp; im.n = (double)((float *)sp)[1];
578 } 581 }
579 len = lj_str_bufnum(buf, &re); 582 lj_strfmt_putfnum(sb, STRFMT_G14, re.n);
580 if (!(im.u32.hi & 0x80000000u) || im.n != im.n) buf[len++] = '+'; 583 if (!(im.u32.hi & 0x80000000u) || im.n != im.n) lj_buf_putchar(sb, '+');
581 len += lj_str_bufnum(buf+len, &im); 584 lj_strfmt_putfnum(sb, STRFMT_G14, im.n);
582 buf[len] = buf[len-1] >= 'a' ? 'I' : 'i'; 585 lj_buf_putchar(sb, sbufP(sb)[-1] >= 'a' ? 'I' : 'i');
583 return lj_str_new(L, buf, len+1); 586 return lj_buf_str(L, sb);
584} 587}
585 588
586/* -- C type state -------------------------------------------------------- */ 589/* -- C type state -------------------------------------------------------- */
diff --git a/src/lj_ctype.h b/src/lj_ctype.h
index 8066214f..73cefef8 100644
--- a/src/lj_ctype.h
+++ b/src/lj_ctype.h
@@ -263,7 +263,7 @@ typedef struct CTState {
263/* -- Predefined types ---------------------------------------------------- */ 263/* -- Predefined types ---------------------------------------------------- */
264 264
265/* Target-dependent types. */ 265/* Target-dependent types. */
266#if LJ_TARGET_PPC || LJ_TARGET_PPCSPE 266#if LJ_TARGET_PPC
267#define CTTYDEFP(_) \ 267#define CTTYDEFP(_) \
268 _(LINT32, 4, CT_NUM, CTF_LONG|CTALIGN(2)) 268 _(LINT32, 4, CT_NUM, CTF_LONG|CTALIGN(2))
269#else 269#else
diff --git a/src/lj_debug.c b/src/lj_debug.c
index 04fecfaf..70f77c74 100644
--- a/src/lj_debug.c
+++ b/src/lj_debug.c
@@ -9,12 +9,12 @@
9#include "lj_obj.h" 9#include "lj_obj.h"
10#include "lj_err.h" 10#include "lj_err.h"
11#include "lj_debug.h" 11#include "lj_debug.h"
12#include "lj_str.h" 12#include "lj_buf.h"
13#include "lj_tab.h" 13#include "lj_tab.h"
14#include "lj_state.h" 14#include "lj_state.h"
15#include "lj_frame.h" 15#include "lj_frame.h"
16#include "lj_bc.h" 16#include "lj_bc.h"
17#include "lj_vm.h" 17#include "lj_strfmt.h"
18#if LJ_HASJIT 18#if LJ_HASJIT
19#include "lj_jit.h" 19#include "lj_jit.h"
20#endif 20#endif
@@ -24,11 +24,11 @@
24/* Get frame corresponding to a level. */ 24/* Get frame corresponding to a level. */
25cTValue *lj_debug_frame(lua_State *L, int level, int *size) 25cTValue *lj_debug_frame(lua_State *L, int level, int *size)
26{ 26{
27 cTValue *frame, *nextframe, *bot = tvref(L->stack); 27 cTValue *frame, *nextframe, *bot = tvref(L->stack)+LJ_FR2;
28 /* Traverse frames backwards. */ 28 /* Traverse frames backwards. */
29 for (nextframe = frame = L->base-1; frame > bot; ) { 29 for (nextframe = frame = L->base-1; frame > bot; ) {
30 if (frame_gc(frame) == obj2gco(L)) 30 if (frame_gc(frame) == obj2gco(L))
31 level++; /* Skip dummy frames. See lj_meta_call(). */ 31 level++; /* Skip dummy frames. See lj_err_optype_call(). */
32 if (level-- == 0) { 32 if (level-- == 0) {
33 *size = (int)(nextframe - frame); 33 *size = (int)(nextframe - frame);
34 return frame; /* Level found. */ 34 return frame; /* Level found. */
@@ -87,8 +87,7 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe)
87 if (frame_islua(f)) { 87 if (frame_islua(f)) {
88 f = frame_prevl(f); 88 f = frame_prevl(f);
89 } else { 89 } else {
90 if (frame_isc(f) || (LJ_HASFFI && frame_iscont(f) && 90 if (frame_isc(f) || (frame_iscont(f) && frame_iscont_fficb(f)))
91 (f-1)->u32.lo == LJ_CONT_FFI_CALLBACK))
92 cf = cframe_raw(cframe_prev(cf)); 91 cf = cframe_raw(cframe_prev(cf));
93 f = frame_prevd(f); 92 f = frame_prevd(f);
94 } 93 }
@@ -142,38 +141,25 @@ static BCLine debug_frameline(lua_State *L, GCfunc *fn, cTValue *nextframe)
142 141
143/* -- Variable names ------------------------------------------------------ */ 142/* -- Variable names ------------------------------------------------------ */
144 143
145/* Read ULEB128 value. */
146static uint32_t debug_read_uleb128(const uint8_t **pp)
147{
148 const uint8_t *p = *pp;
149 uint32_t v = *p++;
150 if (LJ_UNLIKELY(v >= 0x80)) {
151 int sh = 0;
152 v &= 0x7f;
153 do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80);
154 }
155 *pp = p;
156 return v;
157}
158
159/* Get name of a local variable from slot number and PC. */ 144/* Get name of a local variable from slot number and PC. */
160static const char *debug_varname(const GCproto *pt, BCPos pc, BCReg slot) 145static const char *debug_varname(const GCproto *pt, BCPos pc, BCReg slot)
161{ 146{
162 const uint8_t *p = proto_varinfo(pt); 147 const char *p = (const char *)proto_varinfo(pt);
163 if (p) { 148 if (p) {
164 BCPos lastpc = 0; 149 BCPos lastpc = 0;
165 for (;;) { 150 for (;;) {
166 const char *name = (const char *)p; 151 const char *name = p;
167 uint32_t vn = *p++; 152 uint32_t vn = *(const uint8_t *)p;
168 BCPos startpc, endpc; 153 BCPos startpc, endpc;
169 if (vn < VARNAME__MAX) { 154 if (vn < VARNAME__MAX) {
170 if (vn == VARNAME_END) break; /* End of varinfo. */ 155 if (vn == VARNAME_END) break; /* End of varinfo. */
171 } else { 156 } else {
172 while (*p++) ; /* Skip over variable name string. */ 157 do { p++; } while (*(const uint8_t *)p); /* Skip over variable name. */
173 } 158 }
174 lastpc = startpc = lastpc + debug_read_uleb128(&p); 159 p++;
160 lastpc = startpc = lastpc + lj_buf_ruleb128(&p);
175 if (startpc > pc) break; 161 if (startpc > pc) break;
176 endpc = startpc + debug_read_uleb128(&p); 162 endpc = startpc + lj_buf_ruleb128(&p);
177 if (pc < endpc && slot-- == 0) { 163 if (pc < endpc && slot-- == 0) {
178 if (vn < VARNAME__MAX) { 164 if (vn < VARNAME__MAX) {
179#define VARNAMESTR(name, str) str "\0" 165#define VARNAMESTR(name, str) str "\0"
@@ -198,7 +184,7 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar,
198 TValue *nextframe = size ? frame + size : NULL; 184 TValue *nextframe = size ? frame + size : NULL;
199 GCfunc *fn = frame_func(frame); 185 GCfunc *fn = frame_func(frame);
200 BCPos pc = debug_framepc(L, fn, nextframe); 186 BCPos pc = debug_framepc(L, fn, nextframe);
201 if (!nextframe) nextframe = L->top; 187 if (!nextframe) nextframe = L->top+LJ_FR2;
202 if ((int)slot1 < 0) { /* Negative slot number is for varargs. */ 188 if ((int)slot1 < 0) { /* Negative slot number is for varargs. */
203 if (pc != NO_BCPOS) { 189 if (pc != NO_BCPOS) {
204 GCproto *pt = funcproto(fn); 190 GCproto *pt = funcproto(fn);
@@ -208,7 +194,7 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar,
208 nextframe = frame; 194 nextframe = frame;
209 frame = frame_prevd(frame); 195 frame = frame_prevd(frame);
210 } 196 }
211 if (frame + slot1 < nextframe) { 197 if (frame + slot1+LJ_FR2 < nextframe) {
212 *name = "(*vararg)"; 198 *name = "(*vararg)";
213 return frame+slot1; 199 return frame+slot1;
214 } 200 }
@@ -219,7 +205,7 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar,
219 if (pc != NO_BCPOS && 205 if (pc != NO_BCPOS &&
220 (*name = debug_varname(funcproto(fn), pc, slot1-1)) != NULL) 206 (*name = debug_varname(funcproto(fn), pc, slot1-1)) != NULL)
221 ; 207 ;
222 else if (slot1 > 0 && frame + slot1 < nextframe) 208 else if (slot1 > 0 && frame + slot1+LJ_FR2 < nextframe)
223 *name = "(*temporary)"; 209 *name = "(*temporary)";
224 return frame+slot1; 210 return frame+slot1;
225} 211}
@@ -282,7 +268,7 @@ restart:
282 *name = strdata(gco2str(proto_kgc(pt, ~(ptrdiff_t)bc_c(ins)))); 268 *name = strdata(gco2str(proto_kgc(pt, ~(ptrdiff_t)bc_c(ins))));
283 if (ip > proto_bc(pt)) { 269 if (ip > proto_bc(pt)) {
284 BCIns insp = ip[-1]; 270 BCIns insp = ip[-1];
285 if (bc_op(insp) == BC_MOV && bc_a(insp) == ra+1 && 271 if (bc_op(insp) == BC_MOV && bc_a(insp) == ra+1+LJ_FR2 &&
286 bc_d(insp) == bc_b(ins)) 272 bc_d(insp) == bc_b(ins))
287 return "method"; 273 return "method";
288 } 274 }
@@ -299,12 +285,12 @@ restart:
299} 285}
300 286
301/* Deduce function name from caller of a frame. */ 287/* Deduce function name from caller of a frame. */
302const char *lj_debug_funcname(lua_State *L, TValue *frame, const char **name) 288const char *lj_debug_funcname(lua_State *L, cTValue *frame, const char **name)
303{ 289{
304 TValue *pframe; 290 cTValue *pframe;
305 GCfunc *fn; 291 GCfunc *fn;
306 BCPos pc; 292 BCPos pc;
307 if (frame <= tvref(L->stack)) 293 if (frame <= tvref(L->stack)+LJ_FR2)
308 return NULL; 294 return NULL;
309 if (frame_isvarg(frame)) 295 if (frame_isvarg(frame))
310 frame = frame_prevd(frame); 296 frame = frame_prevd(frame);
@@ -330,7 +316,7 @@ const char *lj_debug_funcname(lua_State *L, TValue *frame, const char **name)
330/* -- Source code locations ----------------------------------------------- */ 316/* -- Source code locations ----------------------------------------------- */
331 317
332/* Generate shortened source name. */ 318/* Generate shortened source name. */
333void lj_debug_shortname(char *out, GCstr *str) 319void lj_debug_shortname(char *out, GCstr *str, BCLine line)
334{ 320{
335 const char *src = strdata(str); 321 const char *src = strdata(str);
336 if (*src == '=') { 322 if (*src == '=') {
@@ -344,11 +330,11 @@ void lj_debug_shortname(char *out, GCstr *str)
344 *out++ = '.'; *out++ = '.'; *out++ = '.'; 330 *out++ = '.'; *out++ = '.'; *out++ = '.';
345 } 331 }
346 strcpy(out, src); 332 strcpy(out, src);
347 } else { /* Output [string "string"]. */ 333 } else { /* Output [string "string"] or [builtin:name]. */
348 size_t len; /* Length, up to first control char. */ 334 size_t len; /* Length, up to first control char. */
349 for (len = 0; len < LUA_IDSIZE-12; len++) 335 for (len = 0; len < LUA_IDSIZE-12; len++)
350 if (((const unsigned char *)src)[len] < ' ') break; 336 if (((const unsigned char *)src)[len] < ' ') break;
351 strcpy(out, "[string \""); out += 9; 337 strcpy(out, line == ~(BCLine)0 ? "[builtin:" : "[string \""); out += 9;
352 if (src[len] != '\0') { /* Must truncate? */ 338 if (src[len] != '\0') { /* Must truncate? */
353 if (len > LUA_IDSIZE-15) len = LUA_IDSIZE-15; 339 if (len > LUA_IDSIZE-15) len = LUA_IDSIZE-15;
354 strncpy(out, src, len); out += len; 340 strncpy(out, src, len); out += len;
@@ -356,7 +342,7 @@ void lj_debug_shortname(char *out, GCstr *str)
356 } else { 342 } else {
357 strcpy(out, src); out += len; 343 strcpy(out, src); out += len;
358 } 344 }
359 strcpy(out, "\"]"); 345 strcpy(out, line == ~(BCLine)0 ? "]" : "\"]");
360 } 346 }
361} 347}
362 348
@@ -369,14 +355,15 @@ void lj_debug_addloc(lua_State *L, const char *msg,
369 if (isluafunc(fn)) { 355 if (isluafunc(fn)) {
370 BCLine line = debug_frameline(L, fn, nextframe); 356 BCLine line = debug_frameline(L, fn, nextframe);
371 if (line >= 0) { 357 if (line >= 0) {
358 GCproto *pt = funcproto(fn);
372 char buf[LUA_IDSIZE]; 359 char buf[LUA_IDSIZE];
373 lj_debug_shortname(buf, proto_chunkname(funcproto(fn))); 360 lj_debug_shortname(buf, proto_chunkname(pt), pt->firstline);
374 lj_str_pushf(L, "%s:%d: %s", buf, line, msg); 361 lj_strfmt_pushf(L, "%s:%d: %s", buf, line, msg);
375 return; 362 return;
376 } 363 }
377 } 364 }
378 } 365 }
379 lj_str_pushf(L, "%s", msg); 366 lj_strfmt_pushf(L, "%s", msg);
380} 367}
381 368
382/* Push location string for a bytecode position to Lua stack. */ 369/* Push location string for a bytecode position to Lua stack. */
@@ -386,20 +373,22 @@ void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc)
386 const char *s = strdata(name); 373 const char *s = strdata(name);
387 MSize i, len = name->len; 374 MSize i, len = name->len;
388 BCLine line = lj_debug_line(pt, pc); 375 BCLine line = lj_debug_line(pt, pc);
389 if (*s == '@') { 376 if (pt->firstline == ~(BCLine)0) {
377 lj_strfmt_pushf(L, "builtin:%s", s);
378 } else if (*s == '@') {
390 s++; len--; 379 s++; len--;
391 for (i = len; i > 0; i--) 380 for (i = len; i > 0; i--)
392 if (s[i] == '/' || s[i] == '\\') { 381 if (s[i] == '/' || s[i] == '\\') {
393 s += i+1; 382 s += i+1;
394 break; 383 break;
395 } 384 }
396 lj_str_pushf(L, "%s:%d", s, line); 385 lj_strfmt_pushf(L, "%s:%d", s, line);
397 } else if (len > 40) { 386 } else if (len > 40) {
398 lj_str_pushf(L, "%p:%d", pt, line); 387 lj_strfmt_pushf(L, "%p:%d", pt, line);
399 } else if (*s == '=') { 388 } else if (*s == '=') {
400 lj_str_pushf(L, "%s:%d", s+1, line); 389 lj_strfmt_pushf(L, "%s:%d", s+1, line);
401 } else { 390 } else {
402 lj_str_pushf(L, "\"%s\":%d", s, line); 391 lj_strfmt_pushf(L, "\"%s\":%d", s, line);
403 } 392 }
404} 393}
405 394
@@ -462,7 +451,7 @@ int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar, int ext)
462 BCLine firstline = pt->firstline; 451 BCLine firstline = pt->firstline;
463 GCstr *name = proto_chunkname(pt); 452 GCstr *name = proto_chunkname(pt);
464 ar->source = strdata(name); 453 ar->source = strdata(name);
465 lj_debug_shortname(ar->short_src, name); 454 lj_debug_shortname(ar->short_src, name, pt->firstline);
466 ar->linedefined = (int)firstline; 455 ar->linedefined = (int)firstline;
467 ar->lastlinedefined = (int)(firstline + pt->numline); 456 ar->lastlinedefined = (int)(firstline + pt->numline);
468 ar->what = (firstline || !pt->numline) ? "Lua" : "main"; 457 ar->what = (firstline || !pt->numline) ? "Lua" : "main";
@@ -552,6 +541,111 @@ LUA_API int lua_getstack(lua_State *L, int level, lua_Debug *ar)
552 } 541 }
553} 542}
554 543
544#if LJ_HASPROFILE
545/* Put the chunkname into a buffer. */
546static int debug_putchunkname(SBuf *sb, GCproto *pt, int pathstrip)
547{
548 GCstr *name = proto_chunkname(pt);
549 const char *p = strdata(name);
550 if (pt->firstline == ~(BCLine)0) {
551 lj_buf_putmem(sb, "[builtin:", 9);
552 lj_buf_putstr(sb, name);
553 lj_buf_putb(sb, ']');
554 return 0;
555 }
556 if (*p == '=' || *p == '@') {
557 MSize len = name->len-1;
558 p++;
559 if (pathstrip) {
560 int i;
561 for (i = len-1; i >= 0; i--)
562 if (p[i] == '/' || p[i] == '\\') {
563 len -= i+1;
564 p = p+i+1;
565 break;
566 }
567 }
568 lj_buf_putmem(sb, p, len);
569 } else {
570 lj_buf_putmem(sb, "[string]", 8);
571 }
572 return 1;
573}
574
575/* Put a compact stack dump into a buffer. */
576void lj_debug_dumpstack(lua_State *L, SBuf *sb, const char *fmt, int depth)
577{
578 int level = 0, dir = 1, pathstrip = 1;
579 MSize lastlen = 0;
580 if (depth < 0) { level = ~depth; depth = dir = -1; } /* Reverse frames. */
581 while (level != depth) { /* Loop through all frame. */
582 int size;
583 cTValue *frame = lj_debug_frame(L, level, &size);
584 if (frame) {
585 cTValue *nextframe = size ? frame+size : NULL;
586 GCfunc *fn = frame_func(frame);
587 const uint8_t *p = (const uint8_t *)fmt;
588 int c;
589 while ((c = *p++)) {
590 switch (c) {
591 case 'p': /* Preserve full path. */
592 pathstrip = 0;
593 break;
594 case 'F': case 'f': { /* Dump function name. */
595 const char *name;
596 const char *what = lj_debug_funcname(L, frame, &name);
597 if (what) {
598 if (c == 'F' && isluafunc(fn)) { /* Dump module:name for 'F'. */
599 GCproto *pt = funcproto(fn);
600 if (pt->firstline != ~(BCLine)0) { /* Not a bytecode builtin. */
601 debug_putchunkname(sb, pt, pathstrip);
602 lj_buf_putb(sb, ':');
603 }
604 }
605 lj_buf_putmem(sb, name, (MSize)strlen(name));
606 break;
607 } /* else: can't derive a name, dump module:line. */
608 }
609 /* fallthrough */
610 case 'l': /* Dump module:line. */
611 if (isluafunc(fn)) {
612 GCproto *pt = funcproto(fn);
613 if (debug_putchunkname(sb, pt, pathstrip)) {
614 /* Regular Lua function. */
615 BCLine line = c == 'l' ? debug_frameline(L, fn, nextframe) :
616 pt->firstline;
617 lj_buf_putb(sb, ':');
618 lj_strfmt_putint(sb, line >= 0 ? line : pt->firstline);
619 }
620 } else if (isffunc(fn)) { /* Dump numbered builtins. */
621 lj_buf_putmem(sb, "[builtin#", 9);
622 lj_strfmt_putint(sb, fn->c.ffid);
623 lj_buf_putb(sb, ']');
624 } else { /* Dump C function address. */
625 lj_buf_putb(sb, '@');
626 lj_strfmt_putptr(sb, fn->c.f);
627 }
628 break;
629 case 'Z': /* Zap trailing separator. */
630 lastlen = sbuflen(sb);
631 break;
632 default:
633 lj_buf_putb(sb, c);
634 break;
635 }
636 }
637 } else if (dir == 1) {
638 break;
639 } else {
640 level -= size; /* Reverse frame order: quickly skip missing level. */
641 }
642 level += dir;
643 }
644 if (lastlen)
645 setsbufP(sb, sbufB(sb) + lastlen); /* Zap trailing separator. */
646}
647#endif
648
555/* Number of frames for the leading and trailing part of a traceback. */ 649/* Number of frames for the leading and trailing part of a traceback. */
556#define TRACEBACK_LEVELS1 12 650#define TRACEBACK_LEVELS1 12
557#define TRACEBACK_LEVELS2 10 651#define TRACEBACK_LEVELS2 10
diff --git a/src/lj_debug.h b/src/lj_debug.h
index 75ea927c..cc7e93d2 100644
--- a/src/lj_debug.h
+++ b/src/lj_debug.h
@@ -32,14 +32,18 @@ LJ_FUNC const char *lj_debug_uvname(GCproto *pt, uint32_t idx);
32LJ_FUNC const char *lj_debug_uvnamev(cTValue *o, uint32_t idx, TValue **tvp); 32LJ_FUNC const char *lj_debug_uvnamev(cTValue *o, uint32_t idx, TValue **tvp);
33LJ_FUNC const char *lj_debug_slotname(GCproto *pt, const BCIns *pc, 33LJ_FUNC const char *lj_debug_slotname(GCproto *pt, const BCIns *pc,
34 BCReg slot, const char **name); 34 BCReg slot, const char **name);
35LJ_FUNC const char *lj_debug_funcname(lua_State *L, TValue *frame, 35LJ_FUNC const char *lj_debug_funcname(lua_State *L, cTValue *frame,
36 const char **name); 36 const char **name);
37LJ_FUNC void lj_debug_shortname(char *out, GCstr *str); 37LJ_FUNC void lj_debug_shortname(char *out, GCstr *str, BCLine line);
38LJ_FUNC void lj_debug_addloc(lua_State *L, const char *msg, 38LJ_FUNC void lj_debug_addloc(lua_State *L, const char *msg,
39 cTValue *frame, cTValue *nextframe); 39 cTValue *frame, cTValue *nextframe);
40LJ_FUNC void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc); 40LJ_FUNC void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc);
41LJ_FUNC int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar, 41LJ_FUNC int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar,
42 int ext); 42 int ext);
43#if LJ_HASPROFILE
44LJ_FUNC void lj_debug_dumpstack(lua_State *L, SBuf *sb, const char *fmt,
45 int depth);
46#endif
43 47
44/* Fixed internal variable names. */ 48/* Fixed internal variable names. */
45#define VARNAMEDEF(_) \ 49#define VARNAMEDEF(_) \
diff --git a/src/lj_def.h b/src/lj_def.h
index b5e26d69..75aaeb79 100644
--- a/src/lj_def.h
+++ b/src/lj_def.h
@@ -46,10 +46,14 @@ typedef unsigned int uintptr_t;
46#include <stdlib.h> 46#include <stdlib.h>
47 47
48/* Various VM limits. */ 48/* Various VM limits. */
49#define LJ_MAX_MEM 0x7fffff00 /* Max. total memory allocation. */ 49#define LJ_MAX_MEM32 0x7fffff00 /* Max. 32 bit memory allocation. */
50#define LJ_MAX_MEM64 ((uint64_t)1<<47) /* Max. 64 bit memory allocation. */
51/* Max. total memory allocation. */
52#define LJ_MAX_MEM (LJ_GC64 ? LJ_MAX_MEM64 : LJ_MAX_MEM32)
50#define LJ_MAX_ALLOC LJ_MAX_MEM /* Max. individual allocation length. */ 53#define LJ_MAX_ALLOC LJ_MAX_MEM /* Max. individual allocation length. */
51#define LJ_MAX_STR LJ_MAX_MEM /* Max. string length. */ 54#define LJ_MAX_STR LJ_MAX_MEM32 /* Max. string length. */
52#define LJ_MAX_UDATA LJ_MAX_MEM /* Max. userdata length. */ 55#define LJ_MAX_BUF LJ_MAX_MEM32 /* Max. buffer length. */
56#define LJ_MAX_UDATA LJ_MAX_MEM32 /* Max. userdata length. */
53 57
54#define LJ_MAX_STRTAB (1<<26) /* Max. string table size. */ 58#define LJ_MAX_STRTAB (1<<26) /* Max. string table size. */
55#define LJ_MAX_HBITS 26 /* Max. hash bits. */ 59#define LJ_MAX_HBITS 26 /* Max. hash bits. */
@@ -57,7 +61,7 @@ typedef unsigned int uintptr_t;
57#define LJ_MAX_ASIZE ((1<<(LJ_MAX_ABITS-1))+1) /* Max. array part size. */ 61#define LJ_MAX_ASIZE ((1<<(LJ_MAX_ABITS-1))+1) /* Max. array part size. */
58#define LJ_MAX_COLOSIZE 16 /* Max. elems for colocated array. */ 62#define LJ_MAX_COLOSIZE 16 /* Max. elems for colocated array. */
59 63
60#define LJ_MAX_LINE LJ_MAX_MEM /* Max. source code line number. */ 64#define LJ_MAX_LINE LJ_MAX_MEM32 /* Max. source code line number. */
61#define LJ_MAX_XLEVEL 200 /* Max. syntactic nesting level. */ 65#define LJ_MAX_XLEVEL 200 /* Max. syntactic nesting level. */
62#define LJ_MAX_BCINS (1<<26) /* Max. # of bytecode instructions. */ 66#define LJ_MAX_BCINS (1<<26) /* Max. # of bytecode instructions. */
63#define LJ_MAX_SLOTS 250 /* Max. # of slots in a Lua func. */ 67#define LJ_MAX_SLOTS 250 /* Max. # of slots in a Lua func. */
@@ -65,7 +69,7 @@ typedef unsigned int uintptr_t;
65#define LJ_MAX_UPVAL 60 /* Max. # of upvalues. */ 69#define LJ_MAX_UPVAL 60 /* Max. # of upvalues. */
66 70
67#define LJ_MAX_IDXCHAIN 100 /* __index/__newindex chain limit. */ 71#define LJ_MAX_IDXCHAIN 100 /* __index/__newindex chain limit. */
68#define LJ_STACK_EXTRA 5 /* Extra stack space (metamethods). */ 72#define LJ_STACK_EXTRA (5+2*LJ_FR2) /* Extra stack space (metamethods). */
69 73
70#define LJ_NUM_CBPAGE 1 /* Number of FFI callback pages. */ 74#define LJ_NUM_CBPAGE 1 /* Number of FFI callback pages. */
71 75
@@ -76,7 +80,6 @@ typedef unsigned int uintptr_t;
76#define LJ_MIN_SBUF 32 /* Min. string buffer length. */ 80#define LJ_MIN_SBUF 32 /* Min. string buffer length. */
77#define LJ_MIN_VECSZ 8 /* Min. size for growable vectors. */ 81#define LJ_MIN_VECSZ 8 /* Min. size for growable vectors. */
78#define LJ_MIN_IRSZ 32 /* Min. size for growable IR. */ 82#define LJ_MIN_IRSZ 32 /* Min. size for growable IR. */
79#define LJ_MIN_K64SZ 16 /* Min. size for chained K64Array. */
80 83
81/* JIT compiler limits. */ 84/* JIT compiler limits. */
82#define LJ_MAX_JSLOTS 250 /* Max. # of stack slots for a trace. */ 85#define LJ_MAX_JSLOTS 250 /* Max. # of stack slots for a trace. */
@@ -91,6 +94,9 @@ typedef unsigned int uintptr_t;
91#define U64x(hi, lo) (((uint64_t)0x##hi << 32) + (uint64_t)0x##lo) 94#define U64x(hi, lo) (((uint64_t)0x##hi << 32) + (uint64_t)0x##lo)
92#define i32ptr(p) ((int32_t)(intptr_t)(void *)(p)) 95#define i32ptr(p) ((int32_t)(intptr_t)(void *)(p))
93#define u32ptr(p) ((uint32_t)(intptr_t)(void *)(p)) 96#define u32ptr(p) ((uint32_t)(intptr_t)(void *)(p))
97#define i64ptr(p) ((int64_t)(intptr_t)(void *)(p))
98#define u64ptr(p) ((uint64_t)(intptr_t)(void *)(p))
99#define igcptr(p) (LJ_GC64 ? i64ptr(p) : i32ptr(p))
94 100
95#define checki8(x) ((x) == (int32_t)(int8_t)(x)) 101#define checki8(x) ((x) == (int32_t)(int8_t)(x))
96#define checku8(x) ((x) == (int32_t)(uint8_t)(x)) 102#define checku8(x) ((x) == (int32_t)(uint8_t)(x))
@@ -99,6 +105,8 @@ typedef unsigned int uintptr_t;
99#define checki32(x) ((x) == (int32_t)(x)) 105#define checki32(x) ((x) == (int32_t)(x))
100#define checku32(x) ((x) == (uint32_t)(x)) 106#define checku32(x) ((x) == (uint32_t)(x))
101#define checkptr32(x) ((uintptr_t)(x) == (uint32_t)(uintptr_t)(x)) 107#define checkptr32(x) ((uintptr_t)(x) == (uint32_t)(uintptr_t)(x))
108#define checkptr47(x) (((uint64_t)(uintptr_t)(x) >> 47) == 0)
109#define checkptrGC(x) (LJ_GC64 ? checkptr47((x)) : LJ_64 ? checkptr32((x)) :1)
102 110
103/* Every half-decent C compiler transforms this into a rotate instruction. */ 111/* Every half-decent C compiler transforms this into a rotate instruction. */
104#define lj_rol(x, n) (((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1)))) 112#define lj_rol(x, n) (((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1))))
diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c
index 644e9028..8553438c 100644
--- a/src/lj_dispatch.c
+++ b/src/lj_dispatch.c
@@ -8,6 +8,7 @@
8 8
9#include "lj_obj.h" 9#include "lj_obj.h"
10#include "lj_err.h" 10#include "lj_err.h"
11#include "lj_buf.h"
11#include "lj_func.h" 12#include "lj_func.h"
12#include "lj_str.h" 13#include "lj_str.h"
13#include "lj_tab.h" 14#include "lj_tab.h"
@@ -17,6 +18,7 @@
17#include "lj_frame.h" 18#include "lj_frame.h"
18#include "lj_bc.h" 19#include "lj_bc.h"
19#include "lj_ff.h" 20#include "lj_ff.h"
21#include "lj_strfmt.h"
20#if LJ_HASJIT 22#if LJ_HASJIT
21#include "lj_jit.h" 23#include "lj_jit.h"
22#endif 24#endif
@@ -25,6 +27,9 @@
25#endif 27#endif
26#include "lj_trace.h" 28#include "lj_trace.h"
27#include "lj_dispatch.h" 29#include "lj_dispatch.h"
30#if LJ_HASPROFILE
31#include "lj_profile.h"
32#endif
28#include "lj_vm.h" 33#include "lj_vm.h"
29#include "luajit.h" 34#include "luajit.h"
30 35
@@ -37,6 +42,12 @@ LJ_STATIC_ASSERT(GG_NUM_ASMFF == FF_NUM_ASMFUNC);
37#include <math.h> 42#include <math.h>
38LJ_FUNCA_NORET void LJ_FASTCALL lj_ffh_coroutine_wrap_err(lua_State *L, 43LJ_FUNCA_NORET void LJ_FASTCALL lj_ffh_coroutine_wrap_err(lua_State *L,
39 lua_State *co); 44 lua_State *co);
45#if !LJ_HASJIT
46#define lj_dispatch_stitch lj_dispatch_ins
47#endif
48#if !LJ_HASPROFILE
49#define lj_dispatch_profile lj_dispatch_ins
50#endif
40 51
41#define GOTFUNC(name) (ASMFunction)name, 52#define GOTFUNC(name) (ASMFunction)name,
42static const ASMFunction dispatch_got[] = { 53static const ASMFunction dispatch_got[] = {
@@ -64,7 +75,7 @@ void lj_dispatch_init(GG_State *GG)
64 for (i = 0; i < GG_NUM_ASMFF; i++) 75 for (i = 0; i < GG_NUM_ASMFF; i++)
65 GG->bcff[i] = BCINS_AD(BC__MAX+i, 0, 0); 76 GG->bcff[i] = BCINS_AD(BC__MAX+i, 0, 0);
66#if LJ_TARGET_MIPS 77#if LJ_TARGET_MIPS
67 memcpy(GG->got, dispatch_got, LJ_GOT__MAX*4); 78 memcpy(GG->got, dispatch_got, LJ_GOT__MAX*sizeof(ASMFunction *));
68#endif 79#endif
69} 80}
70 81
@@ -82,11 +93,12 @@ void lj_dispatch_init_hotcount(global_State *g)
82#endif 93#endif
83 94
84/* Internal dispatch mode bits. */ 95/* Internal dispatch mode bits. */
85#define DISPMODE_JIT 0x01 /* JIT compiler on. */ 96#define DISPMODE_CALL 0x01 /* Override call dispatch. */
86#define DISPMODE_REC 0x02 /* Recording active. */ 97#define DISPMODE_RET 0x02 /* Override return dispatch. */
87#define DISPMODE_INS 0x04 /* Override instruction dispatch. */ 98#define DISPMODE_INS 0x04 /* Override instruction dispatch. */
88#define DISPMODE_CALL 0x08 /* Override call dispatch. */ 99#define DISPMODE_JIT 0x10 /* JIT compiler on. */
89#define DISPMODE_RET 0x10 /* Override return dispatch. */ 100#define DISPMODE_REC 0x20 /* Recording active. */
101#define DISPMODE_PROF 0x40 /* Profiling active. */
90 102
91/* Update dispatch table depending on various flags. */ 103/* Update dispatch table depending on various flags. */
92void lj_dispatch_update(global_State *g) 104void lj_dispatch_update(global_State *g)
@@ -98,6 +110,9 @@ void lj_dispatch_update(global_State *g)
98 mode |= G2J(g)->state != LJ_TRACE_IDLE ? 110 mode |= G2J(g)->state != LJ_TRACE_IDLE ?
99 (DISPMODE_REC|DISPMODE_INS|DISPMODE_CALL) : 0; 111 (DISPMODE_REC|DISPMODE_INS|DISPMODE_CALL) : 0;
100#endif 112#endif
113#if LJ_HASPROFILE
114 mode |= (g->hookmask & HOOK_PROFILE) ? (DISPMODE_PROF|DISPMODE_INS) : 0;
115#endif
101 mode |= (g->hookmask & (LUA_MASKLINE|LUA_MASKCOUNT)) ? DISPMODE_INS : 0; 116 mode |= (g->hookmask & (LUA_MASKLINE|LUA_MASKCOUNT)) ? DISPMODE_INS : 0;
102 mode |= (g->hookmask & LUA_MASKCALL) ? DISPMODE_CALL : 0; 117 mode |= (g->hookmask & LUA_MASKCALL) ? DISPMODE_CALL : 0;
103 mode |= (g->hookmask & LUA_MASKRET) ? DISPMODE_RET : 0; 118 mode |= (g->hookmask & LUA_MASKRET) ? DISPMODE_RET : 0;
@@ -126,9 +141,9 @@ void lj_dispatch_update(global_State *g)
126 disp[GG_LEN_DDISP+BC_LOOP] = f_loop; 141 disp[GG_LEN_DDISP+BC_LOOP] = f_loop;
127 142
128 /* Set dynamic instruction dispatch. */ 143 /* Set dynamic instruction dispatch. */
129 if ((oldmode ^ mode) & (DISPMODE_REC|DISPMODE_INS)) { 144 if ((oldmode ^ mode) & (DISPMODE_PROF|DISPMODE_REC|DISPMODE_INS)) {
130 /* Need to update the whole table. */ 145 /* Need to update the whole table. */
131 if (!(mode & (DISPMODE_REC|DISPMODE_INS))) { /* No ins dispatch? */ 146 if (!(mode & DISPMODE_INS)) { /* No ins dispatch? */
132 /* Copy static dispatch table to dynamic dispatch table. */ 147 /* Copy static dispatch table to dynamic dispatch table. */
133 memcpy(&disp[0], &disp[GG_LEN_DDISP], GG_LEN_SDISP*sizeof(ASMFunction)); 148 memcpy(&disp[0], &disp[GG_LEN_DDISP], GG_LEN_SDISP*sizeof(ASMFunction));
134 /* Overwrite with dynamic return dispatch. */ 149 /* Overwrite with dynamic return dispatch. */
@@ -140,12 +155,13 @@ void lj_dispatch_update(global_State *g)
140 } 155 }
141 } else { 156 } else {
142 /* The recording dispatch also checks for hooks. */ 157 /* The recording dispatch also checks for hooks. */
143 ASMFunction f = (mode & DISPMODE_REC) ? lj_vm_record : lj_vm_inshook; 158 ASMFunction f = (mode & DISPMODE_PROF) ? lj_vm_profhook :
159 (mode & DISPMODE_REC) ? lj_vm_record : lj_vm_inshook;
144 uint32_t i; 160 uint32_t i;
145 for (i = 0; i < GG_LEN_SDISP; i++) 161 for (i = 0; i < GG_LEN_SDISP; i++)
146 disp[i] = f; 162 disp[i] = f;
147 } 163 }
148 } else if (!(mode & (DISPMODE_REC|DISPMODE_INS))) { 164 } else if (!(mode & DISPMODE_INS)) {
149 /* Otherwise set dynamic counting ins. */ 165 /* Otherwise set dynamic counting ins. */
150 disp[BC_FORL] = f_forl; 166 disp[BC_FORL] = f_forl;
151 disp[BC_ITERL] = f_iterl; 167 disp[BC_ITERL] = f_iterl;
@@ -251,7 +267,7 @@ int luaJIT_setmode(lua_State *L, int idx, int mode)
251 case LUAJIT_MODE_FUNC: 267 case LUAJIT_MODE_FUNC:
252 case LUAJIT_MODE_ALLFUNC: 268 case LUAJIT_MODE_ALLFUNC:
253 case LUAJIT_MODE_ALLSUBFUNC: { 269 case LUAJIT_MODE_ALLSUBFUNC: {
254 cTValue *tv = idx == 0 ? frame_prev(L->base-1) : 270 cTValue *tv = idx == 0 ? frame_prev(L->base-1)-LJ_FR2 :
255 idx > 0 ? L->base + (idx-1) : L->top + idx; 271 idx > 0 ? L->base + (idx-1) : L->top + idx;
256 GCproto *pt; 272 GCproto *pt;
257 if ((idx == 0 || tvisfunc(tv)) && isluafunc(&gcval(tv)->fn)) 273 if ((idx == 0 || tvisfunc(tv)) && isluafunc(&gcval(tv)->fn))
@@ -352,10 +368,19 @@ static void callhook(lua_State *L, int event, BCLine line)
352 /* Top frame, nextframe = NULL. */ 368 /* Top frame, nextframe = NULL. */
353 ar.i_ci = (int)((L->base-1) - tvref(L->stack)); 369 ar.i_ci = (int)((L->base-1) - tvref(L->stack));
354 lj_state_checkstack(L, 1+LUA_MINSTACK); 370 lj_state_checkstack(L, 1+LUA_MINSTACK);
371#if LJ_HASPROFILE && !LJ_PROFILE_SIGPROF
372 lj_profile_hook_enter(g);
373#else
355 hook_enter(g); 374 hook_enter(g);
375#endif
356 hookf(L, &ar); 376 hookf(L, &ar);
357 lua_assert(hook_active(g)); 377 lua_assert(hook_active(g));
378 setgcref(g->cur_L, obj2gco(L));
379#if LJ_HASPROFILE && !LJ_PROFILE_SIGPROF
380 lj_profile_hook_leave(g);
381#else
358 hook_leave(g); 382 hook_leave(g);
383#endif
359 } 384 }
360} 385}
361 386
@@ -368,7 +393,7 @@ static BCReg cur_topslot(GCproto *pt, const BCIns *pc, uint32_t nres)
368 if (bc_op(ins) == BC_UCLO) 393 if (bc_op(ins) == BC_UCLO)
369 ins = pc[bc_j(ins)]; 394 ins = pc[bc_j(ins)];
370 switch (bc_op(ins)) { 395 switch (bc_op(ins)) {
371 case BC_CALLM: case BC_CALLMT: return bc_a(ins) + bc_c(ins) + nres-1+1; 396 case BC_CALLM: case BC_CALLMT: return bc_a(ins) + bc_c(ins) + nres-1+1+LJ_FR2;
372 case BC_RETM: return bc_a(ins) + bc_d(ins) + nres-1; 397 case BC_RETM: return bc_a(ins) + bc_d(ins) + nres-1;
373 case BC_TSETM: return bc_a(ins) + nres-1; 398 case BC_TSETM: return bc_a(ins) + nres-1;
374 default: return pt->framesize; 399 default: return pt->framesize;
@@ -492,3 +517,41 @@ out:
492 return makeasmfunc(lj_bc_ofs[op]); /* Return static dispatch target. */ 517 return makeasmfunc(lj_bc_ofs[op]); /* Return static dispatch target. */
493} 518}
494 519
520#if LJ_HASJIT
521/* Stitch a new trace. */
522void LJ_FASTCALL lj_dispatch_stitch(jit_State *J, const BCIns *pc)
523{
524 ERRNO_SAVE
525 lua_State *L = J->L;
526 void *cf = cframe_raw(L->cframe);
527 const BCIns *oldpc = cframe_pc(cf);
528 setcframe_pc(cf, pc);
529 /* Before dispatch, have to bias PC by 1. */
530 L->top = L->base + cur_topslot(curr_proto(L), pc+1, cframe_multres_n(cf));
531 lj_trace_stitch(J, pc-1); /* Point to the CALL instruction. */
532 setcframe_pc(cf, oldpc);
533 ERRNO_RESTORE
534}
535#endif
536
537#if LJ_HASPROFILE
538/* Profile dispatch. */
539void LJ_FASTCALL lj_dispatch_profile(lua_State *L, const BCIns *pc)
540{
541 ERRNO_SAVE
542 GCfunc *fn = curr_func(L);
543 GCproto *pt = funcproto(fn);
544 void *cf = cframe_raw(L->cframe);
545 const BCIns *oldpc = cframe_pc(cf);
546 global_State *g;
547 setcframe_pc(cf, pc);
548 L->top = L->base + cur_topslot(pt, pc, cframe_multres_n(cf));
549 lj_profile_interpreter(L);
550 setcframe_pc(cf, oldpc);
551 g = G(L);
552 setgcref(g->cur_L, obj2gco(L));
553 setvmstate(g, INTERP);
554 ERRNO_RESTORE
555}
556#endif
557
diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h
index 17bf93da..4ea6e85d 100644
--- a/src/lj_dispatch.h
+++ b/src/lj_dispatch.h
@@ -14,6 +14,22 @@
14 14
15#if LJ_TARGET_MIPS 15#if LJ_TARGET_MIPS
16/* Need our own global offset table for the dreaded MIPS calling conventions. */ 16/* Need our own global offset table for the dreaded MIPS calling conventions. */
17
18#ifndef _LJ_VM_H
19LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b);
20#endif
21
22#if LJ_SOFTFP
23#ifndef _LJ_IRCALL_H
24extern double __adddf3(double a, double b);
25extern double __subdf3(double a, double b);
26extern double __muldf3(double a, double b);
27extern double __divdf3(double a, double b);
28#endif
29#define SFGOTDEF(_) _(sqrt) _(__adddf3) _(__subdf3) _(__muldf3) _(__divdf3)
30#else
31#define SFGOTDEF(_)
32#endif
17#if LJ_HASJIT 33#if LJ_HASJIT
18#define JITGOTDEF(_) _(lj_trace_exit) _(lj_trace_hot) 34#define JITGOTDEF(_) _(lj_trace_exit) _(lj_trace_hot)
19#else 35#else
@@ -28,16 +44,19 @@
28#define GOTDEF(_) \ 44#define GOTDEF(_) \
29 _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \ 45 _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \
30 _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \ 46 _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \
31 _(pow) _(fmod) _(ldexp) \ 47 _(pow) _(fmod) _(ldexp) _(lj_vm_modi) \
32 _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_err_throw) \ 48 _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_dispatch_stitch) \
49 _(lj_dispatch_profile) _(lj_err_throw) \
33 _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \ 50 _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \
34 _(lj_gc_barrieruv) _(lj_gc_step) _(lj_gc_step_fixtop) _(lj_meta_arith) \ 51 _(lj_gc_barrieruv) _(lj_gc_step) _(lj_gc_step_fixtop) _(lj_meta_arith) \
35 _(lj_meta_call) _(lj_meta_cat) _(lj_meta_comp) _(lj_meta_equal) \ 52 _(lj_meta_call) _(lj_meta_cat) _(lj_meta_comp) _(lj_meta_equal) \
36 _(lj_meta_for) _(lj_meta_len) _(lj_meta_tget) _(lj_meta_tset) \ 53 _(lj_meta_for) _(lj_meta_istype) _(lj_meta_len) _(lj_meta_tget) \
37 _(lj_state_growstack) _(lj_str_fromnum) _(lj_str_fromnumber) _(lj_str_new) \ 54 _(lj_meta_tset) _(lj_state_growstack) _(lj_strfmt_number) \
38 _(lj_tab_dup) _(lj_tab_get) _(lj_tab_getinth) _(lj_tab_len) _(lj_tab_new) \ 55 _(lj_str_new) _(lj_tab_dup) _(lj_tab_get) _(lj_tab_getinth) _(lj_tab_len) \
39 _(lj_tab_newkey) _(lj_tab_next) _(lj_tab_reasize) \ 56 _(lj_tab_new) _(lj_tab_newkey) _(lj_tab_next) _(lj_tab_reasize) \
40 JITGOTDEF(_) FFIGOTDEF(_) 57 _(lj_tab_setinth) _(lj_buf_putstr_reverse) _(lj_buf_putstr_lower) \
58 _(lj_buf_putstr_upper) _(lj_buf_tostr) \
59 JITGOTDEF(_) FFIGOTDEF(_) SFGOTDEF(_)
41 60
42enum { 61enum {
43#define GOTENUM(name) LJ_GOT_##name, 62#define GOTENUM(name) LJ_GOT_##name,
@@ -60,7 +79,7 @@ typedef uint16_t HotCount;
60#define HOTCOUNT_CALL 1 79#define HOTCOUNT_CALL 1
61 80
62/* This solves a circular dependency problem -- bump as needed. Sigh. */ 81/* This solves a circular dependency problem -- bump as needed. Sigh. */
63#define GG_NUM_ASMFF 62 82#define GG_NUM_ASMFF 57
64 83
65#define GG_LEN_DDISP (BC__MAX + GG_NUM_ASMFF) 84#define GG_LEN_DDISP (BC__MAX + GG_NUM_ASMFF)
66#define GG_LEN_SDISP BC_FUNCF 85#define GG_LEN_SDISP BC_FUNCF
@@ -88,6 +107,7 @@ typedef struct GG_State {
88#define J2G(J) (&J2GG(J)->g) 107#define J2G(J) (&J2GG(J)->g)
89#define G2J(gl) (&G2GG(gl)->J) 108#define G2J(gl) (&G2GG(gl)->J)
90#define L2J(L) (&L2GG(L)->J) 109#define L2J(L) (&L2GG(L)->J)
110#define GG_G2J (GG_OFS(J) - GG_OFS(g))
91#define GG_G2DISP (GG_OFS(dispatch) - GG_OFS(g)) 111#define GG_G2DISP (GG_OFS(dispatch) - GG_OFS(g))
92#define GG_DISP2G (GG_OFS(g) - GG_OFS(dispatch)) 112#define GG_DISP2G (GG_OFS(g) - GG_OFS(dispatch))
93#define GG_DISP2J (GG_OFS(J) - GG_OFS(dispatch)) 113#define GG_DISP2J (GG_OFS(J) - GG_OFS(dispatch))
@@ -109,7 +129,12 @@ LJ_FUNC void lj_dispatch_update(global_State *g);
109/* Instruction dispatch callback for hooks or when recording. */ 129/* Instruction dispatch callback for hooks or when recording. */
110LJ_FUNCA void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc); 130LJ_FUNCA void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc);
111LJ_FUNCA ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns*pc); 131LJ_FUNCA ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns*pc);
112LJ_FUNCA void LJ_FASTCALL lj_dispatch_return(lua_State *L, const BCIns *pc); 132#if LJ_HASJIT
133LJ_FUNCA void LJ_FASTCALL lj_dispatch_stitch(jit_State *J, const BCIns *pc);
134#endif
135#if LJ_HASPROFILE
136LJ_FUNCA void LJ_FASTCALL lj_dispatch_profile(lua_State *L, const BCIns *pc);
137#endif
113 138
114#if LJ_HASFFI && !defined(_BUILDVM_H) 139#if LJ_HASFFI && !defined(_BUILDVM_H)
115/* Save/restore errno and GetLastError() around hooks, exits and recording. */ 140/* Save/restore errno and GetLastError() around hooks, exits and recording. */
diff --git a/src/lj_emit_arm.h b/src/lj_emit_arm.h
index 6a136e51..25561549 100644
--- a/src/lj_emit_arm.h
+++ b/src/lj_emit_arm.h
@@ -207,7 +207,7 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i)
207 207
208#define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) 208#define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr)))
209 209
210static Reg ra_allock(ASMState *as, int32_t k, RegSet allow); 210static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
211 211
212/* Get/set from constant pointer. */ 212/* Get/set from constant pointer. */
213static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p) 213static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p)
@@ -219,8 +219,9 @@ static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p)
219 219
220#if !LJ_SOFTFP 220#if !LJ_SOFTFP
221/* Load a number constant into an FPR. */ 221/* Load a number constant into an FPR. */
222static void emit_loadn(ASMState *as, Reg r, cTValue *tv) 222static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
223{ 223{
224 cTValue *tv = ir_knum(ir);
224 int32_t i; 225 int32_t i;
225 if ((as->flags & JIT_F_VFPV3) && !tv->u32.lo) { 226 if ((as->flags & JIT_F_VFPV3) && !tv->u32.lo) {
226 uint32_t hi = tv->u32.hi; 227 uint32_t hi = tv->u32.hi;
@@ -308,30 +309,30 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
308 emit_dm(as, ARMI_MOV, dst, src); 309 emit_dm(as, ARMI_MOV, dst, src);
309} 310}
310 311
311/* Generic load of register from stack slot. */ 312/* Generic load of register with base and (small) offset address. */
312static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 313static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
313{ 314{
314#if LJ_SOFTFP 315#if LJ_SOFTFP
315 lua_assert(!irt_isnum(ir->t)); UNUSED(ir); 316 lua_assert(!irt_isnum(ir->t)); UNUSED(ir);
316#else 317#else
317 if (r >= RID_MAX_GPR) 318 if (r >= RID_MAX_GPR)
318 emit_vlso(as, irt_isnum(ir->t) ? ARMI_VLDR_D : ARMI_VLDR_S, r, RID_SP, ofs); 319 emit_vlso(as, irt_isnum(ir->t) ? ARMI_VLDR_D : ARMI_VLDR_S, r, base, ofs);
319 else 320 else
320#endif 321#endif
321 emit_lso(as, ARMI_LDR, r, RID_SP, ofs); 322 emit_lso(as, ARMI_LDR, r, base, ofs);
322} 323}
323 324
324/* Generic store of register to stack slot. */ 325/* Generic store of register with base and (small) offset address. */
325static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 326static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
326{ 327{
327#if LJ_SOFTFP 328#if LJ_SOFTFP
328 lua_assert(!irt_isnum(ir->t)); UNUSED(ir); 329 lua_assert(!irt_isnum(ir->t)); UNUSED(ir);
329#else 330#else
330 if (r >= RID_MAX_GPR) 331 if (r >= RID_MAX_GPR)
331 emit_vlso(as, irt_isnum(ir->t) ? ARMI_VSTR_D : ARMI_VSTR_S, r, RID_SP, ofs); 332 emit_vlso(as, irt_isnum(ir->t) ? ARMI_VSTR_D : ARMI_VSTR_S, r, base, ofs);
332 else 333 else
333#endif 334#endif
334 emit_lso(as, ARMI_STR, r, RID_SP, ofs); 335 emit_lso(as, ARMI_STR, r, base, ofs);
335} 336}
336 337
337/* Emit an arithmetic/logic operation with a constant operand. */ 338/* Emit an arithmetic/logic operation with a constant operand. */
diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h
new file mode 100644
index 00000000..f09c0f3a
--- /dev/null
+++ b/src/lj_emit_arm64.h
@@ -0,0 +1,419 @@
1/*
2** ARM64 instruction emitter.
3** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h
4**
5** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
6** Sponsored by Cisco Systems, Inc.
7*/
8
9/* -- Constant encoding --------------------------------------------------- */
10
11static uint64_t get_k64val(IRIns *ir)
12{
13 if (ir->o == IR_KINT64) {
14 return ir_kint64(ir)->u64;
15 } else if (ir->o == IR_KGC) {
16 return (uint64_t)ir_kgc(ir);
17 } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
18 return (uint64_t)ir_kptr(ir);
19 } else {
20 lua_assert(ir->o == IR_KINT || ir->o == IR_KNULL);
21 return ir->i; /* Sign-extended. */
22 }
23}
24
25/* Encode constant in K12 format for data processing instructions. */
26static uint32_t emit_isk12(int64_t n)
27{
28 uint64_t k = (n < 0) ? -n : n;
29 uint32_t m = (n < 0) ? 0x40000000 : 0;
30 if (k < 0x1000) {
31 return A64I_K12|m|A64F_U12(k);
32 } else if ((k & 0xfff000) == k) {
33 return A64I_K12|m|0x400000|A64F_U12(k>>12);
34 }
35 return 0;
36}
37
38#define emit_clz64(n) __builtin_clzll(n)
39#define emit_ctz64(n) __builtin_ctzll(n)
40
41/* Encode constant in K13 format for logical data processing instructions. */
42static uint32_t emit_isk13(uint64_t n, int is64)
43{
44 int inv = 0, w = 128, lz, tz;
45 if (n & 1) { n = ~n; w = 64; inv = 1; } /* Avoid wrap-around of ones. */
46 if (!n) return 0; /* Neither all-zero nor all-ones are allowed. */
47 do { /* Find the repeat width. */
48 if (is64 && (uint32_t)(n^(n>>32))) break;
49 n = (uint32_t)n;
50 if (!n) return 0; /* Ditto when passing n=0xffffffff and is64=0. */
51 w = 32; if ((n^(n>>16)) & 0xffff) break;
52 n = n & 0xffff; w = 16; if ((n^(n>>8)) & 0xff) break;
53 n = n & 0xff; w = 8; if ((n^(n>>4)) & 0xf) break;
54 n = n & 0xf; w = 4; if ((n^(n>>2)) & 0x3) break;
55 n = n & 0x3; w = 2;
56 } while (0);
57 lz = emit_clz64(n);
58 tz = emit_ctz64(n);
59 if ((int64_t)(n << lz) >> (lz+tz) != -1ll) return 0; /* Non-contiguous? */
60 if (inv)
61 return A64I_K13 | (((lz-w) & 127) << 16) | (((lz+tz-w-1) & 63) << 10);
62 else
63 return A64I_K13 | ((w-tz) << 16) | (((63-lz-tz-w-w) & 63) << 10);
64}
65
66static uint32_t emit_isfpk64(uint64_t n)
67{
68 uint64_t etop9 = ((n >> 54) & 0x1ff);
69 if ((n << 16) == 0 && (etop9 == 0x100 || etop9 == 0x0ff)) {
70 return (uint32_t)(((n >> 48) & 0x7f) | ((n >> 56) & 0x80));
71 }
72 return ~0u;
73}
74
75/* -- Emit basic instructions --------------------------------------------- */
76
77static void emit_dnma(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm, Reg ra)
78{
79 *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm) | A64F_A(ra);
80}
81
82static void emit_dnm(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm)
83{
84 *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm);
85}
86
87static void emit_dm(ASMState *as, A64Ins ai, Reg rd, Reg rm)
88{
89 *--as->mcp = ai | A64F_D(rd) | A64F_M(rm);
90}
91
92static void emit_dn(ASMState *as, A64Ins ai, Reg rd, Reg rn)
93{
94 *--as->mcp = ai | A64F_D(rd) | A64F_N(rn);
95}
96
97static void emit_nm(ASMState *as, A64Ins ai, Reg rn, Reg rm)
98{
99 *--as->mcp = ai | A64F_N(rn) | A64F_M(rm);
100}
101
102static void emit_d(ASMState *as, A64Ins ai, Reg rd)
103{
104 *--as->mcp = ai | A64F_D(rd);
105}
106
107static void emit_n(ASMState *as, A64Ins ai, Reg rn)
108{
109 *--as->mcp = ai | A64F_N(rn);
110}
111
112static int emit_checkofs(A64Ins ai, int64_t ofs)
113{
114 int scale = (ai >> 30) & 3;
115 if (ofs < 0 || (ofs & ((1<<scale)-1))) {
116 return (ofs >= -256 && ofs <= 255) ? -1 : 0;
117 } else {
118 return (ofs < (4096<<scale)) ? 1 : 0;
119 }
120}
121
122static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs)
123{
124 int ot = emit_checkofs(ai, ofs), sc = (ai >> 30) & 3;
125 lua_assert(ot);
126 /* Combine LDR/STR pairs to LDP/STP. */
127 if ((sc == 2 || sc == 3) &&
128 (!(ai & 0x400000) || rd != rn) &&
129 as->mcp != as->mcloop) {
130 uint32_t prev = *as->mcp & ~A64F_D(31);
131 int ofsm = ofs - (1<<sc), ofsp = ofs + (1<<sc);
132 A64Ins aip;
133 if (prev == (ai | A64F_N(rn) | A64F_U12(ofsm>>sc)) ||
134 prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsm&0x1ff))) {
135 aip = (A64F_A(rd) | A64F_D(*as->mcp & 31));
136 } else if (prev == (ai | A64F_N(rn) | A64F_U12(ofsp>>sc)) ||
137 prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsp&0x1ff))) {
138 aip = (A64F_D(rd) | A64F_A(*as->mcp & 31));
139 ofsm = ofs;
140 } else {
141 goto nopair;
142 }
143 if (ofsm >= (int)((unsigned int)-64<<sc) && ofsm <= (63<<sc)) {
144 *as->mcp = aip | A64F_N(rn) | ((ofsm >> sc) << 15) |
145 (ai ^ ((ai == A64I_LDRx || ai == A64I_STRx) ? 0x50000000 : 0x90000000));
146 return;
147 }
148 }
149nopair:
150 if (ot == 1)
151 *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_U12(ofs >> sc);
152 else
153 *--as->mcp = (ai^A64I_LS_U) | A64F_D(rd) | A64F_N(rn) | A64F_S9(ofs & 0x1ff);
154}
155
156/* -- Emit loads/stores --------------------------------------------------- */
157
158/* Prefer rematerialization of BASE/L from global_State over spills. */
159#define emit_canremat(ref) ((ref) <= ASMREF_L)
160
161/* Try to find an N-step delta relative to other consts with N < lim. */
162static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim)
163{
164 RegSet work = ~as->freeset & RSET_GPR;
165 if (lim <= 1) return 0; /* Can't beat that. */
166 while (work) {
167 Reg r = rset_picktop(work);
168 IRRef ref = regcost_ref(as->cost[r]);
169 lua_assert(r != rd);
170 if (ref < REF_TRUE) {
171 uint64_t kx = ra_iskref(ref) ? (uint64_t)ra_krefk(as, ref) :
172 get_k64val(IR(ref));
173 int64_t delta = (int64_t)(k - kx);
174 if (delta == 0) {
175 emit_dm(as, A64I_MOVx, rd, r);
176 return 1;
177 } else {
178 uint32_t k12 = emit_isk12(delta < 0 ? -delta : delta);
179 if (k12) {
180 emit_dn(as, (delta < 0 ? A64I_SUBx : A64I_ADDx)^k12, rd, r);
181 return 1;
182 }
183 /* Do other ops or multi-step deltas pay off? Probably not.
184 ** E.g. XOR rarely helps with pointer consts.
185 */
186 }
187 }
188 rset_clear(work, r);
189 }
190 return 0; /* Failed. */
191}
192
193static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64)
194{
195 uint32_t k13 = emit_isk13(u64, is64);
196 if (k13) { /* Can the constant be represented as a bitmask immediate? */
197 emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO);
198 } else {
199 int i, zeros = 0, ones = 0, neg;
200 if (!is64) u64 = (int64_t)(int32_t)u64; /* Sign-extend. */
201 /* Count homogeneous 16 bit fragments. */
202 for (i = 0; i < 4; i++) {
203 uint64_t frag = (u64 >> i*16) & 0xffff;
204 zeros += (frag == 0);
205 ones += (frag == 0xffff);
206 }
207 neg = ones > zeros; /* Use MOVN if it pays off. */
208 if (!emit_kdelta(as, rd, u64, 4 - (neg ? ones : zeros))) {
209 int shift = 0, lshift = 0;
210 uint64_t n64 = neg ? ~u64 : u64;
211 if (n64 != 0) {
212 /* Find first/last fragment to be filled. */
213 shift = (63-emit_clz64(n64)) & ~15;
214 lshift = emit_ctz64(n64) & ~15;
215 }
216 /* MOVK requires the original value (u64). */
217 while (shift > lshift) {
218 uint32_t u16 = (u64 >> shift) & 0xffff;
219 /* Skip fragments that are correctly filled by MOVN/MOVZ. */
220 if (u16 != (neg ? 0xffff : 0))
221 emit_d(as, is64 | A64I_MOVKw | A64F_U16(u16) | A64F_LSL16(shift), rd);
222 shift -= 16;
223 }
224 /* But MOVN needs an inverted value (n64). */
225 emit_d(as, (neg ? A64I_MOVNx : A64I_MOVZx) |
226 A64F_U16((n64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd);
227 }
228 }
229}
230
231/* Load a 32 bit constant into a GPR. */
232#define emit_loadi(as, rd, i) emit_loadk(as, rd, i, 0)
233
234/* Load a 64 bit constant into a GPR. */
235#define emit_loadu64(as, rd, i) emit_loadk(as, rd, i, A64I_X)
236
237#define emit_loada(as, r, addr) emit_loadu64(as, (r), (uintptr_t)(addr))
238
239#define glofs(as, k) \
240 ((intptr_t)((uintptr_t)(k) - (uintptr_t)&J2GG(as->J)->g))
241#define mcpofs(as, k) \
242 ((intptr_t)((uintptr_t)(k) - (uintptr_t)(as->mcp - 1)))
243#define checkmcpofs(as, k) \
244 (A64F_S_OK(mcpofs(as, k)>>2, 19))
245
246static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
247
248/* Get/set from constant pointer. */
249static void emit_lsptr(ASMState *as, A64Ins ai, Reg r, void *p)
250{
251 /* First, check if ip + offset is in range. */
252 if ((ai & 0x00400000) && checkmcpofs(as, p)) {
253 emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, p)>>2), r);
254 } else {
255 Reg base = RID_GL; /* Next, try GL + offset. */
256 int64_t ofs = glofs(as, p);
257 if (!emit_checkofs(ai, ofs)) { /* Else split up into base reg + offset. */
258 int64_t i64 = i64ptr(p);
259 base = ra_allock(as, (i64 & ~0x7fffull), rset_exclude(RSET_GPR, r));
260 ofs = i64 & 0x7fffull;
261 }
262 emit_lso(as, ai, r, base, ofs);
263 }
264}
265
266/* Load 64 bit IR constant into register. */
267static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
268{
269 const uint64_t *k = &ir_k64(ir)->u64;
270 int64_t ofs;
271 if (r >= RID_MAX_GPR) {
272 uint32_t fpk = emit_isfpk64(*k);
273 if (fpk != ~0u) {
274 emit_d(as, A64I_FMOV_DI | A64F_FP8(fpk), (r & 31));
275 return;
276 }
277 }
278 ofs = glofs(as, k);
279 if (emit_checkofs(A64I_LDRx, ofs)) {
280 emit_lso(as, r >= RID_MAX_GPR ? A64I_LDRd : A64I_LDRx,
281 (r & 31), RID_GL, ofs);
282 } else {
283 if (r >= RID_MAX_GPR) {
284 emit_dn(as, A64I_FMOV_D_R, (r & 31), RID_TMP);
285 r = RID_TMP;
286 }
287 if (checkmcpofs(as, k))
288 emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, k)>>2), r);
289 else
290 emit_loadu64(as, r, *k);
291 }
292}
293
294/* Get/set global_State fields. */
295#define emit_getgl(as, r, field) \
296 emit_lsptr(as, A64I_LDRx, (r), (void *)&J2G(as->J)->field)
297#define emit_setgl(as, r, field) \
298 emit_lsptr(as, A64I_STRx, (r), (void *)&J2G(as->J)->field)
299
300/* Trace number is determined from pc of exit instruction. */
301#define emit_setvmstate(as, i) UNUSED(i)
302
303/* -- Emit control-flow instructions -------------------------------------- */
304
305/* Label for internal jumps. */
306typedef MCode *MCLabel;
307
308/* Return label pointing to current PC. */
309#define emit_label(as) ((as)->mcp)
310
311static void emit_cond_branch(ASMState *as, A64CC cond, MCode *target)
312{
313 MCode *p = --as->mcp;
314 ptrdiff_t delta = target - p;
315 lua_assert(A64F_S_OK(delta, 19));
316 *p = A64I_BCC | A64F_S19(delta) | cond;
317}
318
319static void emit_branch(ASMState *as, A64Ins ai, MCode *target)
320{
321 MCode *p = --as->mcp;
322 ptrdiff_t delta = target - p;
323 lua_assert(A64F_S_OK(delta, 26));
324 *p = ai | A64F_S26(delta);
325}
326
327static void emit_tnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit, MCode *target)
328{
329 MCode *p = --as->mcp;
330 ptrdiff_t delta = target - p;
331 lua_assert(bit < 63 && A64F_S_OK(delta, 14));
332 if (bit > 31) ai |= A64I_X;
333 *p = ai | A64F_BIT(bit & 31) | A64F_S14(delta) | r;
334}
335
336static void emit_cnb(ASMState *as, A64Ins ai, Reg r, MCode *target)
337{
338 MCode *p = --as->mcp;
339 ptrdiff_t delta = target - p;
340 lua_assert(A64F_S_OK(delta, 19));
341 *p = ai | A64F_S19(delta) | r;
342}
343
344#define emit_jmp(as, target) emit_branch(as, A64I_B, (target))
345
346static void emit_call(ASMState *as, void *target)
347{
348 MCode *p = --as->mcp;
349 ptrdiff_t delta = (char *)target - (char *)p;
350 if (A64F_S_OK(delta>>2, 26)) {
351 *p = A64I_BL | A64F_S26(delta>>2);
352 } else { /* Target out of range: need indirect call. But don't use R0-R7. */
353 Reg r = ra_allock(as, i64ptr(target),
354 RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED);
355 *p = A64I_BLR | A64F_N(r);
356 }
357}
358
359/* -- Emit generic operations --------------------------------------------- */
360
361/* Generic move between two regs. */
362static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
363{
364 if (dst >= RID_MAX_GPR) {
365 emit_dn(as, irt_isnum(ir->t) ? A64I_FMOV_D : A64I_FMOV_S,
366 (dst & 31), (src & 31));
367 return;
368 }
369 if (as->mcp != as->mcloop) { /* Swap early registers for loads/stores. */
370 MCode ins = *as->mcp, swp = (src^dst);
371 if ((ins & 0xbf800000) == 0xb9000000) {
372 if (!((ins ^ (dst << 5)) & 0x000003e0))
373 *as->mcp = ins ^ (swp << 5); /* Swap N in load/store. */
374 if (!(ins & 0x00400000) && !((ins ^ dst) & 0x0000001f))
375 *as->mcp = ins ^ swp; /* Swap D in store. */
376 }
377 }
378 emit_dm(as, A64I_MOVx, dst, src);
379}
380
381/* Generic load of register with base and (small) offset address. */
382static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
383{
384 if (r >= RID_MAX_GPR)
385 emit_lso(as, irt_isnum(ir->t) ? A64I_LDRd : A64I_LDRs, (r & 31), base, ofs);
386 else
387 emit_lso(as, irt_is64(ir->t) ? A64I_LDRx : A64I_LDRw, r, base, ofs);
388}
389
390/* Generic store of register with base and (small) offset address. */
391static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
392{
393 if (r >= RID_MAX_GPR)
394 emit_lso(as, irt_isnum(ir->t) ? A64I_STRd : A64I_STRs, (r & 31), base, ofs);
395 else
396 emit_lso(as, irt_is64(ir->t) ? A64I_STRx : A64I_STRw, r, base, ofs);
397}
398
399/* Emit an arithmetic operation with a constant operand. */
400static void emit_opk(ASMState *as, A64Ins ai, Reg dest, Reg src,
401 int32_t i, RegSet allow)
402{
403 uint32_t k = emit_isk12(i);
404 if (k)
405 emit_dn(as, ai^k, dest, src);
406 else
407 emit_dnm(as, ai, dest, src, ra_allock(as, i, allow));
408}
409
410/* Add offset to pointer. */
411static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
412{
413 if (ofs)
414 emit_opk(as, ofs < 0 ? A64I_SUBx : A64I_ADDx, r, r,
415 ofs < 0 ? -ofs : ofs, rset_exclude(RSET_GPR, r));
416}
417
418#define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs))
419
diff --git a/src/lj_emit_mips.h b/src/lj_emit_mips.h
index f3dcd1dd..bdabcf16 100644
--- a/src/lj_emit_mips.h
+++ b/src/lj_emit_mips.h
@@ -3,6 +3,30 @@
3** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h 3** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h
4*/ 4*/
5 5
6#if LJ_64
7static intptr_t get_k64val(IRIns *ir)
8{
9 if (ir->o == IR_KINT64) {
10 return (intptr_t)ir_kint64(ir)->u64;
11 } else if (ir->o == IR_KGC) {
12 return (intptr_t)ir_kgc(ir);
13 } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
14 return (intptr_t)ir_kptr(ir);
15 } else if (LJ_SOFTFP && ir->o == IR_KNUM) {
16 return (intptr_t)ir_knum(ir)->u64;
17 } else {
18 lua_assert(ir->o == IR_KINT || ir->o == IR_KNULL);
19 return ir->i; /* Sign-extended. */
20 }
21}
22#endif
23
24#if LJ_64
25#define get_kval(ir) get_k64val(ir)
26#else
27#define get_kval(ir) ((ir)->i)
28#endif
29
6/* -- Emit basic instructions --------------------------------------------- */ 30/* -- Emit basic instructions --------------------------------------------- */
7 31
8static void emit_dst(ASMState *as, MIPSIns mi, Reg rd, Reg rs, Reg rt) 32static void emit_dst(ASMState *as, MIPSIns mi, Reg rd, Reg rs, Reg rt)
@@ -35,7 +59,7 @@ static void emit_fgh(ASMState *as, MIPSIns mi, Reg rf, Reg rg, Reg rh)
35 59
36static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift) 60static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift)
37{ 61{
38 if ((as->flags & JIT_F_MIPS32R2)) { 62 if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) {
39 emit_dta(as, MIPSI_ROTR, dest, src, shift); 63 emit_dta(as, MIPSI_ROTR, dest, src, shift);
40 } else { 64 } else {
41 emit_dst(as, MIPSI_OR, dest, dest, tmp); 65 emit_dst(as, MIPSI_OR, dest, dest, tmp);
@@ -44,13 +68,21 @@ static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift)
44 } 68 }
45} 69}
46 70
71#if LJ_64
72static void emit_tsml(ASMState *as, MIPSIns mi, Reg rt, Reg rs, uint32_t msb,
73 uint32_t lsb)
74{
75 *--as->mcp = mi | MIPSF_T(rt) | MIPSF_S(rs) | MIPSF_M(msb) | MIPSF_L(lsb);
76}
77#endif
78
47/* -- Emit loads/stores --------------------------------------------------- */ 79/* -- Emit loads/stores --------------------------------------------------- */
48 80
49/* Prefer rematerialization of BASE/L from global_State over spills. */ 81/* Prefer rematerialization of BASE/L from global_State over spills. */
50#define emit_canremat(ref) ((ref) <= REF_BASE) 82#define emit_canremat(ref) ((ref) <= REF_BASE)
51 83
52/* Try to find a one step delta relative to another constant. */ 84/* Try to find a one step delta relative to another constant. */
53static int emit_kdelta1(ASMState *as, Reg t, int32_t i) 85static int emit_kdelta1(ASMState *as, Reg t, intptr_t i)
54{ 86{
55 RegSet work = ~as->freeset & RSET_GPR; 87 RegSet work = ~as->freeset & RSET_GPR;
56 while (work) { 88 while (work) {
@@ -58,9 +90,10 @@ static int emit_kdelta1(ASMState *as, Reg t, int32_t i)
58 IRRef ref = regcost_ref(as->cost[r]); 90 IRRef ref = regcost_ref(as->cost[r]);
59 lua_assert(r != t); 91 lua_assert(r != t);
60 if (ref < ASMREF_L) { 92 if (ref < ASMREF_L) {
61 int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i); 93 intptr_t delta = (intptr_t)((uintptr_t)i -
94 (uintptr_t)(ra_iskref(ref) ? ra_krefk(as, ref) : get_kval(IR(ref))));
62 if (checki16(delta)) { 95 if (checki16(delta)) {
63 emit_tsi(as, MIPSI_ADDIU, t, r, delta); 96 emit_tsi(as, MIPSI_AADDIU, t, r, delta);
64 return 1; 97 return 1;
65 } 98 }
66 } 99 }
@@ -76,8 +109,8 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i)
76 emit_ti(as, MIPSI_LI, r, i); 109 emit_ti(as, MIPSI_LI, r, i);
77 } else { 110 } else {
78 if ((i & 0xffff)) { 111 if ((i & 0xffff)) {
79 int32_t jgl = i32ptr(J2G(as->J)); 112 intptr_t jgl = (intptr_t)(void *)J2G(as->J);
80 if ((uint32_t)(i-jgl) < 65536) { 113 if ((uintptr_t)(i-jgl) < 65536) {
81 emit_tsi(as, MIPSI_ADDIU, r, RID_JGL, i-jgl-32768); 114 emit_tsi(as, MIPSI_ADDIU, r, RID_JGL, i-jgl-32768);
82 return; 115 return;
83 } else if (emit_kdelta1(as, r, i)) { 116 } else if (emit_kdelta1(as, r, i)) {
@@ -92,16 +125,49 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i)
92 } 125 }
93} 126}
94 127
128#if LJ_64
129/* Load a 64 bit constant into a GPR. */
130static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
131{
132 if (checki32((int64_t)u64)) {
133 emit_loadi(as, r, (int32_t)u64);
134 } else {
135 uint64_t delta = u64 - (uint64_t)(void *)J2G(as->J);
136 if (delta < 65536) {
137 emit_tsi(as, MIPSI_DADDIU, r, RID_JGL, (int32_t)(delta-32768));
138 } else if (emit_kdelta1(as, r, (intptr_t)u64)) {
139 return;
140 } else {
141 /* TODO MIPSR6: Use DAHI & DATI. Caveat: sign-extension. */
142 if ((u64 & 0xffff)) {
143 emit_tsi(as, MIPSI_ORI, r, r, u64 & 0xffff);
144 }
145 if (((u64 >> 16) & 0xffff)) {
146 emit_dta(as, MIPSI_DSLL, r, r, 16);
147 emit_tsi(as, MIPSI_ORI, r, r, (u64 >> 16) & 0xffff);
148 emit_dta(as, MIPSI_DSLL, r, r, 16);
149 } else {
150 emit_dta(as, MIPSI_DSLL32, r, r, 0);
151 }
152 emit_loadi(as, r, (int32_t)(u64 >> 32));
153 }
154 /* TODO: There are probably more optimization opportunities. */
155 }
156}
157
158#define emit_loada(as, r, addr) emit_loadu64(as, (r), u64ptr((addr)))
159#else
95#define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) 160#define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr)))
161#endif
96 162
97static Reg ra_allock(ASMState *as, int32_t k, RegSet allow); 163static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
98static void ra_allockreg(ASMState *as, int32_t k, Reg r); 164static void ra_allockreg(ASMState *as, intptr_t k, Reg r);
99 165
100/* Get/set from constant pointer. */ 166/* Get/set from constant pointer. */
101static void emit_lsptr(ASMState *as, MIPSIns mi, Reg r, void *p, RegSet allow) 167static void emit_lsptr(ASMState *as, MIPSIns mi, Reg r, void *p, RegSet allow)
102{ 168{
103 int32_t jgl = i32ptr(J2G(as->J)); 169 intptr_t jgl = (intptr_t)(J2G(as->J));
104 int32_t i = i32ptr(p); 170 intptr_t i = (intptr_t)(p);
105 Reg base; 171 Reg base;
106 if ((uint32_t)(i-jgl) < 65536) { 172 if ((uint32_t)(i-jgl) < 65536) {
107 i = i-jgl-32768; 173 i = i-jgl-32768;
@@ -112,8 +178,24 @@ static void emit_lsptr(ASMState *as, MIPSIns mi, Reg r, void *p, RegSet allow)
112 emit_tsi(as, mi, r, base, i); 178 emit_tsi(as, mi, r, base, i);
113} 179}
114 180
115#define emit_loadn(as, r, tv) \ 181#if LJ_64
116 emit_lsptr(as, MIPSI_LDC1, ((r) & 31), (void *)(tv), RSET_GPR) 182static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
183{
184 const uint64_t *k = &ir_k64(ir)->u64;
185 Reg r64 = r;
186 if (rset_test(RSET_FPR, r)) {
187 r64 = RID_TMP;
188 emit_tg(as, MIPSI_DMTC1, r64, r);
189 }
190 if ((uint32_t)((intptr_t)k-(intptr_t)J2G(as->J)) < 65536)
191 emit_lsptr(as, MIPSI_LD, r64, (void *)k, 0);
192 else
193 emit_loadu64(as, r64, *k);
194}
195#else
196#define emit_loadk64(as, r, ir) \
197 emit_lsptr(as, MIPSI_LDC1, ((r) & 31), (void *)&ir_knum((ir))->u64, RSET_GPR)
198#endif
117 199
118/* Get/set global_State fields. */ 200/* Get/set global_State fields. */
119static void emit_lsglptr(ASMState *as, MIPSIns mi, Reg r, int32_t ofs) 201static void emit_lsglptr(ASMState *as, MIPSIns mi, Reg r, int32_t ofs)
@@ -122,9 +204,9 @@ static void emit_lsglptr(ASMState *as, MIPSIns mi, Reg r, int32_t ofs)
122} 204}
123 205
124#define emit_getgl(as, r, field) \ 206#define emit_getgl(as, r, field) \
125 emit_lsglptr(as, MIPSI_LW, (r), (int32_t)offsetof(global_State, field)) 207 emit_lsglptr(as, MIPSI_AL, (r), (int32_t)offsetof(global_State, field))
126#define emit_setgl(as, r, field) \ 208#define emit_setgl(as, r, field) \
127 emit_lsglptr(as, MIPSI_SW, (r), (int32_t)offsetof(global_State, field)) 209 emit_lsglptr(as, MIPSI_AS, (r), (int32_t)offsetof(global_State, field))
128 210
129/* Trace number is determined from per-trace exit stubs. */ 211/* Trace number is determined from per-trace exit stubs. */
130#define emit_setvmstate(as, i) UNUSED(i) 212#define emit_setvmstate(as, i) UNUSED(i)
@@ -152,16 +234,31 @@ static void emit_jmp(ASMState *as, MCode *target)
152 emit_branch(as, MIPSI_B, RID_ZERO, RID_ZERO, (target)); 234 emit_branch(as, MIPSI_B, RID_ZERO, RID_ZERO, (target));
153} 235}
154 236
155static void emit_call(ASMState *as, void *target) 237static void emit_call(ASMState *as, void *target, int needcfa)
156{ 238{
157 MCode *p = as->mcp; 239 MCode *p = as->mcp;
158 *--p = MIPSI_NOP; 240#if LJ_TARGET_MIPSR6
159 if ((((uintptr_t)target ^ (uintptr_t)p) >> 28) == 0) 241 ptrdiff_t delta = (char *)target - (char *)p;
242 if ((((delta>>2) + 0x02000000) >> 26) == 0) { /* Try compact call first. */
243 *--p = MIPSI_BALC | (((uintptr_t)delta >>2) & 0x03ffffffu);
244 as->mcp = p;
245 return;
246 }
247#endif
248 *--p = MIPSI_NOP; /* Delay slot. */
249 if ((((uintptr_t)target ^ (uintptr_t)p) >> 28) == 0) {
250#if !LJ_TARGET_MIPSR6
251 *--p = (((uintptr_t)target & 1) ? MIPSI_JALX : MIPSI_JAL) |
252 (((uintptr_t)target >>2) & 0x03ffffffu);
253#else
160 *--p = MIPSI_JAL | (((uintptr_t)target >>2) & 0x03ffffffu); 254 *--p = MIPSI_JAL | (((uintptr_t)target >>2) & 0x03ffffffu);
161 else /* Target out of range: need indirect call. */ 255#endif
256 } else { /* Target out of range: need indirect call. */
162 *--p = MIPSI_JALR | MIPSF_S(RID_CFUNCADDR); 257 *--p = MIPSI_JALR | MIPSF_S(RID_CFUNCADDR);
258 needcfa = 1;
259 }
163 as->mcp = p; 260 as->mcp = p;
164 ra_allockreg(as, i32ptr(target), RID_CFUNCADDR); 261 if (needcfa) ra_allockreg(as, (intptr_t)target, RID_CFUNCADDR);
165} 262}
166 263
167/* -- Emit generic operations --------------------------------------------- */ 264/* -- Emit generic operations --------------------------------------------- */
@@ -178,24 +275,24 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
178 emit_fg(as, irt_isnum(ir->t) ? MIPSI_MOV_D : MIPSI_MOV_S, dst, src); 275 emit_fg(as, irt_isnum(ir->t) ? MIPSI_MOV_D : MIPSI_MOV_S, dst, src);
179} 276}
180 277
181/* Generic load of register from stack slot. */ 278/* Generic load of register with base and (small) offset address. */
182static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 279static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
183{ 280{
184 if (r < RID_MAX_GPR) 281 if (r < RID_MAX_GPR)
185 emit_tsi(as, MIPSI_LW, r, RID_SP, ofs); 282 emit_tsi(as, irt_is64(ir->t) ? MIPSI_LD : MIPSI_LW, r, base, ofs);
186 else 283 else
187 emit_tsi(as, irt_isnum(ir->t) ? MIPSI_LDC1 : MIPSI_LWC1, 284 emit_tsi(as, irt_isnum(ir->t) ? MIPSI_LDC1 : MIPSI_LWC1,
188 (r & 31), RID_SP, ofs); 285 (r & 31), base, ofs);
189} 286}
190 287
191/* Generic store of register to stack slot. */ 288/* Generic store of register with base and (small) offset address. */
192static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 289static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
193{ 290{
194 if (r < RID_MAX_GPR) 291 if (r < RID_MAX_GPR)
195 emit_tsi(as, MIPSI_SW, r, RID_SP, ofs); 292 emit_tsi(as, irt_is64(ir->t) ? MIPSI_SD : MIPSI_SW, r, base, ofs);
196 else 293 else
197 emit_tsi(as, irt_isnum(ir->t) ? MIPSI_SDC1 : MIPSI_SWC1, 294 emit_tsi(as, irt_isnum(ir->t) ? MIPSI_SDC1 : MIPSI_SWC1,
198 (r&31), RID_SP, ofs); 295 (r&31), base, ofs);
199} 296}
200 297
201/* Add offset to pointer. */ 298/* Add offset to pointer. */
@@ -203,7 +300,7 @@ static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
203{ 300{
204 if (ofs) { 301 if (ofs) {
205 lua_assert(checki16(ofs)); 302 lua_assert(checki16(ofs));
206 emit_tsi(as, MIPSI_ADDIU, r, r, ofs); 303 emit_tsi(as, MIPSI_AADDIU, r, r, ofs);
207 } 304 }
208} 305}
209 306
diff --git a/src/lj_emit_ppc.h b/src/lj_emit_ppc.h
index e993d294..69765528 100644
--- a/src/lj_emit_ppc.h
+++ b/src/lj_emit_ppc.h
@@ -98,7 +98,7 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i)
98 98
99#define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) 99#define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr)))
100 100
101static Reg ra_allock(ASMState *as, int32_t k, RegSet allow); 101static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
102 102
103/* Get/set from constant pointer. */ 103/* Get/set from constant pointer. */
104static void emit_lsptr(ASMState *as, PPCIns pi, Reg r, void *p, RegSet allow) 104static void emit_lsptr(ASMState *as, PPCIns pi, Reg r, void *p, RegSet allow)
@@ -115,8 +115,8 @@ static void emit_lsptr(ASMState *as, PPCIns pi, Reg r, void *p, RegSet allow)
115 emit_tai(as, pi, r, base, i); 115 emit_tai(as, pi, r, base, i);
116} 116}
117 117
118#define emit_loadn(as, r, tv) \ 118#define emit_loadk64(as, r, ir) \
119 emit_lsptr(as, PPCI_LFD, ((r) & 31), (void *)(tv), RSET_GPR) 119 emit_lsptr(as, PPCI_LFD, ((r) & 31), (void *)&ir_knum((ir))->u64, RSET_GPR)
120 120
121/* Get/set global_State fields. */ 121/* Get/set global_State fields. */
122static void emit_lsglptr(ASMState *as, PPCIns pi, Reg r, int32_t ofs) 122static void emit_lsglptr(ASMState *as, PPCIns pi, Reg r, int32_t ofs)
@@ -186,22 +186,22 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
186 emit_fb(as, PPCI_FMR, dst, src); 186 emit_fb(as, PPCI_FMR, dst, src);
187} 187}
188 188
189/* Generic load of register from stack slot. */ 189/* Generic load of register with base and (small) offset address. */
190static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 190static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
191{ 191{
192 if (r < RID_MAX_GPR) 192 if (r < RID_MAX_GPR)
193 emit_tai(as, PPCI_LWZ, r, RID_SP, ofs); 193 emit_tai(as, PPCI_LWZ, r, base, ofs);
194 else 194 else
195 emit_fai(as, irt_isnum(ir->t) ? PPCI_LFD : PPCI_LFS, r, RID_SP, ofs); 195 emit_fai(as, irt_isnum(ir->t) ? PPCI_LFD : PPCI_LFS, r, base, ofs);
196} 196}
197 197
198/* Generic store of register to stack slot. */ 198/* Generic store of register with base and (small) offset address. */
199static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 199static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
200{ 200{
201 if (r < RID_MAX_GPR) 201 if (r < RID_MAX_GPR)
202 emit_tai(as, PPCI_STW, r, RID_SP, ofs); 202 emit_tai(as, PPCI_STW, r, base, ofs);
203 else 203 else
204 emit_fai(as, irt_isnum(ir->t) ? PPCI_STFD : PPCI_STFS, r, RID_SP, ofs); 204 emit_fai(as, irt_isnum(ir->t) ? PPCI_STFD : PPCI_STFS, r, base, ofs);
205} 205}
206 206
207/* Emit a compare (for equality) with a constant operand. */ 207/* Emit a compare (for equality) with a constant operand. */
diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h
index 7bbc695b..bc4391a0 100644
--- a/src/lj_emit_x86.h
+++ b/src/lj_emit_x86.h
@@ -13,10 +13,17 @@
13 if (rex != 0x40) *--(p) = rex; } 13 if (rex != 0x40) *--(p) = rex; }
14#define FORCE_REX 0x200 14#define FORCE_REX 0x200
15#define REX_64 (FORCE_REX|0x080000) 15#define REX_64 (FORCE_REX|0x080000)
16#define VEX_64 0x800000
16#else 17#else
17#define REXRB(p, rr, rb) ((void)0) 18#define REXRB(p, rr, rb) ((void)0)
18#define FORCE_REX 0 19#define FORCE_REX 0
19#define REX_64 0 20#define REX_64 0
21#define VEX_64 0
22#endif
23#if LJ_GC64
24#define REX_GC64 REX_64
25#else
26#define REX_GC64 0
20#endif 27#endif
21 28
22#define emit_i8(as, i) (*--as->mcp = (MCode)(i)) 29#define emit_i8(as, i) (*--as->mcp = (MCode)(i))
@@ -31,6 +38,13 @@ static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx,
31 MCode *p, int delta) 38 MCode *p, int delta)
32{ 39{
33 int n = (int8_t)xo; 40 int n = (int8_t)xo;
41 if (n == -60) { /* VEX-encoded instruction */
42#if LJ_64
43 xo ^= (((rr>>1)&4)+((rx>>2)&2)+((rb>>3)&1))<<13;
44#endif
45 *(uint32_t *)(p+delta-5) = (uint32_t)xo;
46 return p+delta-5;
47 }
34#if defined(__GNUC__) 48#if defined(__GNUC__)
35 if (__builtin_constant_p(xo) && n == -2) 49 if (__builtin_constant_p(xo) && n == -2)
36 p[delta-2] = (MCode)(xo >> 24); 50 p[delta-2] = (MCode)(xo >> 24);
@@ -85,26 +99,17 @@ static int32_t ptr2addr(const void *p)
85#define ptr2addr(p) (i32ptr((p))) 99#define ptr2addr(p) (i32ptr((p)))
86#endif 100#endif
87 101
88/* op r, [addr] */
89static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr)
90{
91 MCode *p = as->mcp;
92 *(int32_t *)(p-4) = ptr2addr(addr);
93#if LJ_64
94 p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
95 as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5);
96#else
97 as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4);
98#endif
99}
100
101/* op r, [base+ofs] */ 102/* op r, [base+ofs] */
102static void emit_rmro(ASMState *as, x86Op xo, Reg rr, Reg rb, int32_t ofs) 103static void emit_rmro(ASMState *as, x86Op xo, Reg rr, Reg rb, int32_t ofs)
103{ 104{
104 MCode *p = as->mcp; 105 MCode *p = as->mcp;
105 x86Mode mode; 106 x86Mode mode;
106 if (ra_hasreg(rb)) { 107 if (ra_hasreg(rb)) {
107 if (ofs == 0 && (rb&7) != RID_EBP) { 108 if (LJ_GC64 && rb == RID_RIP) {
109 mode = XM_OFS0;
110 p -= 4;
111 *(int32_t *)p = ofs;
112 } else if (ofs == 0 && (rb&7) != RID_EBP) {
108 mode = XM_OFS0; 113 mode = XM_OFS0;
109 } else if (checki8(ofs)) { 114 } else if (checki8(ofs)) {
110 *--p = (MCode)ofs; 115 *--p = (MCode)ofs;
@@ -202,6 +207,11 @@ static void emit_mrm(ASMState *as, x86Op xo, Reg rr, Reg rb)
202 *--p = MODRM(XM_SCALE1, RID_ESP, RID_EBP); 207 *--p = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
203 rb = RID_ESP; 208 rb = RID_ESP;
204#endif 209#endif
210 } else if (LJ_GC64 && rb == RID_RIP) {
211 lua_assert(as->mrm.idx == RID_NONE);
212 mode = XM_OFS0;
213 p -= 4;
214 *(int32_t *)p = as->mrm.ofs;
205 } else { 215 } else {
206 if (as->mrm.ofs == 0 && (rb&7) != RID_EBP) { 216 if (as->mrm.ofs == 0 && (rb&7) != RID_EBP) {
207 mode = XM_OFS0; 217 mode = XM_OFS0;
@@ -241,10 +251,6 @@ static void emit_gmrmi(ASMState *as, x86Group xg, Reg rb, int32_t i)
241 251
242/* -- Emit loads/stores --------------------------------------------------- */ 252/* -- Emit loads/stores --------------------------------------------------- */
243 253
244/* Instruction selection for XMM moves. */
245#define XMM_MOVRR(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVSD : XO_MOVAPS)
246#define XMM_MOVRM(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVLPD : XO_MOVSD)
247
248/* mov [base+ofs], i */ 254/* mov [base+ofs], i */
249static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) 255static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i)
250{ 256{
@@ -259,8 +265,8 @@ static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i)
259/* Get/set global_State fields. */ 265/* Get/set global_State fields. */
260#define emit_opgl(as, xo, r, field) \ 266#define emit_opgl(as, xo, r, field) \
261 emit_rma(as, (xo), (r), (void *)&J2G(as->J)->field) 267 emit_rma(as, (xo), (r), (void *)&J2G(as->J)->field)
262#define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r), field) 268#define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r)|REX_GC64, field)
263#define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r), field) 269#define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r)|REX_GC64, field)
264 270
265#define emit_setvmstate(as, i) \ 271#define emit_setvmstate(as, i) \
266 (emit_i32(as, i), emit_opgl(as, XO_MOVmi, 0, vmstate)) 272 (emit_i32(as, i), emit_opgl(as, XO_MOVmi, 0, vmstate))
@@ -285,9 +291,21 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i)
285 } 291 }
286} 292}
287 293
294#if LJ_GC64
295#define dispofs(as, k) \
296 ((intptr_t)((uintptr_t)(k) - (uintptr_t)J2GG(as->J)->dispatch))
297#define mcpofs(as, k) \
298 ((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mcp))
299#define mctopofs(as, k) \
300 ((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mctop))
301/* mov r, addr */
302#define emit_loada(as, r, addr) \
303 emit_loadu64(as, (r), (uintptr_t)(addr))
304#else
288/* mov r, addr */ 305/* mov r, addr */
289#define emit_loada(as, r, addr) \ 306#define emit_loada(as, r, addr) \
290 emit_loadi(as, (r), ptr2addr((addr))) 307 emit_loadi(as, (r), ptr2addr((addr)))
308#endif
291 309
292#if LJ_64 310#if LJ_64
293/* mov r, imm64 or shorter 32 bit extended load. */ 311/* mov r, imm64 or shorter 32 bit extended load. */
@@ -299,6 +317,15 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
299 MCode *p = as->mcp; 317 MCode *p = as->mcp;
300 *(int32_t *)(p-4) = (int32_t)u64; 318 *(int32_t *)(p-4) = (int32_t)u64;
301 as->mcp = emit_opm(XO_MOVmi, XM_REG, REX_64, r, p, -4); 319 as->mcp = emit_opm(XO_MOVmi, XM_REG, REX_64, r, p, -4);
320#if LJ_GC64
321 } else if (checki32(dispofs(as, u64))) {
322 emit_rmro(as, XO_LEA, r|REX_64, RID_DISPATCH, (int32_t)dispofs(as, u64));
323 } else if (checki32(mcpofs(as, u64)) && checki32(mctopofs(as, u64))) {
324 /* Since as->realign assumes the code size doesn't change, check
325 ** RIP-relative addressing reachability for both as->mcp and as->mctop.
326 */
327 emit_rmro(as, XO_LEA, r|REX_64, RID_RIP, (int32_t)mcpofs(as, u64));
328#endif
302 } else { /* Full-size 64 bit load. */ 329 } else { /* Full-size 64 bit load. */
303 MCode *p = as->mcp; 330 MCode *p = as->mcp;
304 *(uint64_t *)(p-8) = u64; 331 *(uint64_t *)(p-8) = u64;
@@ -310,13 +337,89 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
310} 337}
311#endif 338#endif
312 339
313/* movsd r, [&tv->n] / xorps r, r */ 340/* op r, [addr] */
314static void emit_loadn(ASMState *as, Reg r, cTValue *tv) 341static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr)
315{ 342{
316 if (tvispzero(tv)) /* Use xor only for +0. */ 343#if LJ_GC64
317 emit_rr(as, XO_XORPS, r, r); 344 if (checki32(dispofs(as, addr))) {
318 else 345 emit_rmro(as, xo, rr, RID_DISPATCH, (int32_t)dispofs(as, addr));
319 emit_rma(as, XMM_MOVRM(as), r, &tv->n); 346 } else if (checki32(mcpofs(as, addr)) && checki32(mctopofs(as, addr))) {
347 emit_rmro(as, xo, rr, RID_RIP, (int32_t)mcpofs(as, addr));
348 } else if (!checki32((intptr_t)addr)) {
349 Reg ra = (rr & 15);
350 if (xo != XO_MOV) {
351 /* We can't allocate a register here. Use and restore DISPATCH. Ugly. */
352 uint64_t dispaddr = (uintptr_t)J2GG(as->J)->dispatch;
353 uint8_t i8 = xo == XO_GROUP3b ? *as->mcp++ : 0;
354 ra = RID_DISPATCH;
355 if (checku32(dispaddr)) {
356 emit_loadi(as, ra, (int32_t)dispaddr);
357 } else { /* Full-size 64 bit load. */
358 MCode *p = as->mcp;
359 *(uint64_t *)(p-8) = dispaddr;
360 p[-9] = (MCode)(XI_MOVri+(ra&7));
361 p[-10] = 0x48 + ((ra>>3)&1);
362 p -= 10;
363 as->mcp = p;
364 }
365 if (xo == XO_GROUP3b) emit_i8(as, i8);
366 }
367 emit_rmro(as, xo, rr, ra, 0);
368 emit_loadu64(as, ra, (uintptr_t)addr);
369 } else
370#endif
371 {
372 MCode *p = as->mcp;
373 *(int32_t *)(p-4) = ptr2addr(addr);
374#if LJ_64
375 p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
376 as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5);
377#else
378 as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4);
379#endif
380 }
381}
382
383/* Load 64 bit IR constant into register. */
384static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
385{
386 Reg r64;
387 x86Op xo;
388 const uint64_t *k = &ir_k64(ir)->u64;
389 if (rset_test(RSET_FPR, r)) {
390 r64 = r;
391 xo = XO_MOVSD;
392 } else {
393 r64 = r | REX_64;
394 xo = XO_MOV;
395 }
396 if (*k == 0) {
397 emit_rr(as, rset_test(RSET_FPR, r) ? XO_XORPS : XO_ARITH(XOg_XOR), r, r);
398#if LJ_GC64
399 } else if (checki32((intptr_t)k) || checki32(dispofs(as, k)) ||
400 (checki32(mcpofs(as, k)) && checki32(mctopofs(as, k)))) {
401 emit_rma(as, xo, r64, k);
402 } else {
403 if (ir->i) {
404 lua_assert(*k == *(uint64_t*)(as->mctop - ir->i));
405 } else if (as->curins <= as->stopins && rset_test(RSET_GPR, r)) {
406 emit_loadu64(as, r, *k);
407 return;
408 } else {
409 /* If all else fails, add the FP constant at the MCode area bottom. */
410 while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3;
411 *(uint64_t *)as->mcbot = *k;
412 ir->i = (int32_t)(as->mctop - as->mcbot);
413 as->mcbot += 8;
414 as->mclim = as->mcbot + MCLIM_REDZONE;
415 lj_mcode_commitbot(as->J, as->mcbot);
416 }
417 emit_rmro(as, xo, r64, RID_RIP, (int32_t)mcpofs(as, as->mctop - ir->i));
418#else
419 } else {
420 emit_rma(as, xo, r64, k);
421#endif
422 }
320} 423}
321 424
322/* -- Emit control-flow instructions -------------------------------------- */ 425/* -- Emit control-flow instructions -------------------------------------- */
@@ -418,8 +521,10 @@ static void emit_call_(ASMState *as, MCode *target)
418/* Use 64 bit operations to handle 64 bit IR types. */ 521/* Use 64 bit operations to handle 64 bit IR types. */
419#if LJ_64 522#if LJ_64
420#define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0)) 523#define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0))
524#define VEX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? VEX_64 : 0))
421#else 525#else
422#define REX_64IR(ir, r) (r) 526#define REX_64IR(ir, r) (r)
527#define VEX_64IR(ir, r) (r)
423#endif 528#endif
424 529
425/* Generic move between two regs. */ 530/* Generic move between two regs. */
@@ -429,25 +534,25 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
429 if (dst < RID_MAX_GPR) 534 if (dst < RID_MAX_GPR)
430 emit_rr(as, XO_MOV, REX_64IR(ir, dst), src); 535 emit_rr(as, XO_MOV, REX_64IR(ir, dst), src);
431 else 536 else
432 emit_rr(as, XMM_MOVRR(as), dst, src); 537 emit_rr(as, XO_MOVAPS, dst, src);
433} 538}
434 539
435/* Generic load of register from stack slot. */ 540/* Generic load of register with base and (small) offset address. */
436static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 541static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
437{ 542{
438 if (r < RID_MAX_GPR) 543 if (r < RID_MAX_GPR)
439 emit_rmro(as, XO_MOV, REX_64IR(ir, r), RID_ESP, ofs); 544 emit_rmro(as, XO_MOV, REX_64IR(ir, r), base, ofs);
440 else 545 else
441 emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, r, RID_ESP, ofs); 546 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, r, base, ofs);
442} 547}
443 548
444/* Generic store of register to stack slot. */ 549/* Generic store of register with base and (small) offset address. */
445static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) 550static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
446{ 551{
447 if (r < RID_MAX_GPR) 552 if (r < RID_MAX_GPR)
448 emit_rmro(as, XO_MOVto, REX_64IR(ir, r), RID_ESP, ofs); 553 emit_rmro(as, XO_MOVto, REX_64IR(ir, r), base, ofs);
449 else 554 else
450 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto, r, RID_ESP, ofs); 555 emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto, r, base, ofs);
451} 556}
452 557
453/* Add offset to pointer. */ 558/* Add offset to pointer. */
@@ -455,9 +560,9 @@ static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
455{ 560{
456 if (ofs) { 561 if (ofs) {
457 if ((as->flags & JIT_F_LEA_AGU)) 562 if ((as->flags & JIT_F_LEA_AGU))
458 emit_rmro(as, XO_LEA, r, r, ofs); 563 emit_rmro(as, XO_LEA, r|REX_GC64, r, ofs);
459 else 564 else
460 emit_gri(as, XG_ARITHi(XOg_ADD), r, ofs); 565 emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs);
461 } 566 }
462} 567}
463 568
diff --git a/src/lj_err.c b/src/lj_err.c
index caa7487f..52498932 100644
--- a/src/lj_err.c
+++ b/src/lj_err.c
@@ -16,6 +16,7 @@
16#include "lj_ff.h" 16#include "lj_ff.h"
17#include "lj_trace.h" 17#include "lj_trace.h"
18#include "lj_vm.h" 18#include "lj_vm.h"
19#include "lj_strfmt.h"
19 20
20/* 21/*
21** LuaJIT can either use internal or external frame unwinding: 22** LuaJIT can either use internal or external frame unwinding:
@@ -45,7 +46,8 @@
45** the wrapper function feature. Lua errors thrown through C++ frames 46** the wrapper function feature. Lua errors thrown through C++ frames
46** cannot be caught by C++ code and C++ destructors are not run. 47** cannot be caught by C++ code and C++ destructors are not run.
47** 48**
48** EXT is the default on x64 systems, INT is the default on all other systems. 49** EXT is the default on x64 systems and on Windows, INT is the default on all
50** other systems.
49** 51**
50** EXT can be manually enabled on POSIX systems using GCC and DWARF2 stack 52** EXT can be manually enabled on POSIX systems using GCC and DWARF2 stack
51** unwinding with -DLUAJIT_UNWIND_EXTERNAL. *All* C code must be compiled 53** unwinding with -DLUAJIT_UNWIND_EXTERNAL. *All* C code must be compiled
@@ -54,7 +56,6 @@
54** and all C libraries that have callbacks which may be used to call back 56** and all C libraries that have callbacks which may be used to call back
55** into Lua. C++ code must *not* be compiled with -fno-exceptions. 57** into Lua. C++ code must *not* be compiled with -fno-exceptions.
56** 58**
57** EXT cannot be enabled on WIN32 since system exceptions use code-driven SEH.
58** EXT is mandatory on WIN64 since the calling convention has an abundance 59** EXT is mandatory on WIN64 since the calling convention has an abundance
59** of callee-saved registers (rbx, rbp, rsi, rdi, r12-r15, xmm6-xmm15). 60** of callee-saved registers (rbx, rbp, rsi, rdi, r12-r15, xmm6-xmm15).
60** The POSIX/x64 interpreter only saves r12/r13 for INT (e.g. PS4). 61** The POSIX/x64 interpreter only saves r12/r13 for INT (e.g. PS4).
@@ -62,7 +63,7 @@
62 63
63#if defined(__GNUC__) && (LJ_TARGET_X64 || defined(LUAJIT_UNWIND_EXTERNAL)) && !LJ_NO_UNWIND 64#if defined(__GNUC__) && (LJ_TARGET_X64 || defined(LUAJIT_UNWIND_EXTERNAL)) && !LJ_NO_UNWIND
64#define LJ_UNWIND_EXT 1 65#define LJ_UNWIND_EXT 1
65#elif LJ_TARGET_X64 && LJ_TARGET_WINDOWS 66#elif LJ_TARGET_WINDOWS
66#define LJ_UNWIND_EXT 1 67#define LJ_UNWIND_EXT 1
67#endif 68#endif
68 69
@@ -98,14 +99,14 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
98 TValue *top = restorestack(L, -nres); 99 TValue *top = restorestack(L, -nres);
99 if (frame < top) { /* Frame reached? */ 100 if (frame < top) { /* Frame reached? */
100 if (errcode) { 101 if (errcode) {
101 L->cframe = cframe_prev(cf);
102 L->base = frame+1; 102 L->base = frame+1;
103 L->cframe = cframe_prev(cf);
103 unwindstack(L, top); 104 unwindstack(L, top);
104 } 105 }
105 return cf; 106 return cf;
106 } 107 }
107 } 108 }
108 if (frame <= tvref(L->stack)) 109 if (frame <= tvref(L->stack)+LJ_FR2)
109 break; 110 break;
110 switch (frame_typep(frame)) { 111 switch (frame_typep(frame)) {
111 case FRAME_LUA: /* Lua frame. */ 112 case FRAME_LUA: /* Lua frame. */
@@ -113,14 +114,12 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
113 frame = frame_prevl(frame); 114 frame = frame_prevl(frame);
114 break; 115 break;
115 case FRAME_C: /* C frame. */ 116 case FRAME_C: /* C frame. */
116#if LJ_HASFFI
117 unwind_c: 117 unwind_c:
118#endif
119#if LJ_UNWIND_EXT 118#if LJ_UNWIND_EXT
120 if (errcode) { 119 if (errcode) {
121 L->cframe = cframe_prev(cf);
122 L->base = frame_prevd(frame) + 1; 120 L->base = frame_prevd(frame) + 1;
123 unwindstack(L, frame); 121 L->cframe = cframe_prev(cf);
122 unwindstack(L, frame - LJ_FR2);
124 } else if (cf != stopcf) { 123 } else if (cf != stopcf) {
125 cf = cframe_prev(cf); 124 cf = cframe_prev(cf);
126 frame = frame_prevd(frame); 125 frame = frame_prevd(frame);
@@ -143,16 +142,14 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
143 return cf; 142 return cf;
144 } 143 }
145 if (errcode) { 144 if (errcode) {
146 L->cframe = cframe_prev(cf);
147 L->base = frame_prevd(frame) + 1; 145 L->base = frame_prevd(frame) + 1;
148 unwindstack(L, frame); 146 L->cframe = cframe_prev(cf);
147 unwindstack(L, frame - LJ_FR2);
149 } 148 }
150 return cf; 149 return cf;
151 case FRAME_CONT: /* Continuation frame. */ 150 case FRAME_CONT: /* Continuation frame. */
152#if LJ_HASFFI 151 if (frame_iscont_fficb(frame))
153 if ((frame-1)->u32.lo == LJ_CONT_FFI_CALLBACK)
154 goto unwind_c; 152 goto unwind_c;
155#endif
156 /* fallthrough */ 153 /* fallthrough */
157 case FRAME_VARG: /* Vararg frame. */ 154 case FRAME_VARG: /* Vararg frame. */
158 frame = frame_prevd(frame); 155 frame = frame_prevd(frame);
@@ -166,8 +163,8 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
166 } 163 }
167 if (frame_typep(frame) == FRAME_PCALL) 164 if (frame_typep(frame) == FRAME_PCALL)
168 hook_leave(G(L)); 165 hook_leave(G(L));
169 L->cframe = cf;
170 L->base = frame_prevd(frame) + 1; 166 L->base = frame_prevd(frame) + 1;
167 L->cframe = cf;
171 unwindstack(L, L->base); 168 unwindstack(L, L->base);
172 } 169 }
173 return (void *)((intptr_t)cf | CFRAME_UNWIND_FF); 170 return (void *)((intptr_t)cf | CFRAME_UNWIND_FF);
@@ -175,8 +172,8 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
175 } 172 }
176 /* No C frame. */ 173 /* No C frame. */
177 if (errcode) { 174 if (errcode) {
175 L->base = tvref(L->stack)+1+LJ_FR2;
178 L->cframe = NULL; 176 L->cframe = NULL;
179 L->base = tvref(L->stack)+1;
180 unwindstack(L, L->base); 177 unwindstack(L, L->base);
181 if (G(L)->panic) 178 if (G(L)->panic)
182 G(L)->panic(L); 179 G(L)->panic(L);
@@ -194,13 +191,6 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
194** since various OS, distros and compilers mess up the header installation. 191** since various OS, distros and compilers mess up the header installation.
195*/ 192*/
196 193
197typedef struct _Unwind_Exception
198{
199 uint64_t exclass;
200 void (*excleanup)(int, struct _Unwind_Exception *);
201 uintptr_t p1, p2;
202} __attribute__((__aligned__)) _Unwind_Exception;
203
204typedef struct _Unwind_Context _Unwind_Context; 194typedef struct _Unwind_Context _Unwind_Context;
205 195
206#define _URC_OK 0 196#define _URC_OK 0
@@ -210,8 +200,20 @@ typedef struct _Unwind_Context _Unwind_Context;
210#define _URC_CONTINUE_UNWIND 8 200#define _URC_CONTINUE_UNWIND 8
211#define _URC_FAILURE 9 201#define _URC_FAILURE 9
212 202
203#define LJ_UEXCLASS 0x4c55414a49543200ULL /* LUAJIT2\0 */
204#define LJ_UEXCLASS_MAKE(c) (LJ_UEXCLASS | (uint64_t)(c))
205#define LJ_UEXCLASS_CHECK(cl) (((cl) ^ LJ_UEXCLASS) <= 0xff)
206#define LJ_UEXCLASS_ERRCODE(cl) ((int)((cl) & 0xff))
207
213#if !LJ_TARGET_ARM 208#if !LJ_TARGET_ARM
214 209
210typedef struct _Unwind_Exception
211{
212 uint64_t exclass;
213 void (*excleanup)(int, struct _Unwind_Exception *);
214 uintptr_t p1, p2;
215} __attribute__((__aligned__)) _Unwind_Exception;
216
215extern uintptr_t _Unwind_GetCFA(_Unwind_Context *); 217extern uintptr_t _Unwind_GetCFA(_Unwind_Context *);
216extern void _Unwind_SetGR(_Unwind_Context *, int, uintptr_t); 218extern void _Unwind_SetGR(_Unwind_Context *, int, uintptr_t);
217extern void _Unwind_SetIP(_Unwind_Context *, uintptr_t); 219extern void _Unwind_SetIP(_Unwind_Context *, uintptr_t);
@@ -223,11 +225,6 @@ extern int _Unwind_RaiseException(_Unwind_Exception *);
223#define _UA_HANDLER_FRAME 4 225#define _UA_HANDLER_FRAME 4
224#define _UA_FORCE_UNWIND 8 226#define _UA_FORCE_UNWIND 8
225 227
226#define LJ_UEXCLASS 0x4c55414a49543200ULL /* LUAJIT2\0 */
227#define LJ_UEXCLASS_MAKE(c) (LJ_UEXCLASS | (uint64_t)(c))
228#define LJ_UEXCLASS_CHECK(cl) (((cl) ^ LJ_UEXCLASS) <= 0xff)
229#define LJ_UEXCLASS_ERRCODE(cl) ((int)((cl) & 0xff))
230
231/* DWARF2 personality handler referenced from interpreter .eh_frame. */ 228/* DWARF2 personality handler referenced from interpreter .eh_frame. */
232LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions, 229LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions,
233 uint64_t uexclass, _Unwind_Exception *uex, _Unwind_Context *ctx) 230 uint64_t uexclass, _Unwind_Exception *uex, _Unwind_Context *ctx)
@@ -301,10 +298,22 @@ static void err_raise_ext(int errcode)
301} 298}
302#endif 299#endif
303 300
304#else 301#else /* LJ_TARGET_ARM */
305 302
306extern void _Unwind_DeleteException(void *); 303#define _US_VIRTUAL_UNWIND_FRAME 0
307extern int __gnu_unwind_frame (void *, _Unwind_Context *); 304#define _US_UNWIND_FRAME_STARTING 1
305#define _US_ACTION_MASK 3
306#define _US_FORCE_UNWIND 8
307
308typedef struct _Unwind_Control_Block _Unwind_Control_Block;
309
310struct _Unwind_Control_Block {
311 uint64_t exclass;
312 uint32_t misc[20];
313};
314
315extern int _Unwind_RaiseException(_Unwind_Control_Block *);
316extern int __gnu_unwind_frame(_Unwind_Control_Block *, _Unwind_Context *);
308extern int _Unwind_VRS_Set(_Unwind_Context *, int, uint32_t, int, void *); 317extern int _Unwind_VRS_Set(_Unwind_Context *, int, uint32_t, int, void *);
309extern int _Unwind_VRS_Get(_Unwind_Context *, int, uint32_t, int, void *); 318extern int _Unwind_VRS_Get(_Unwind_Context *, int, uint32_t, int, void *);
310 319
@@ -320,35 +329,58 @@ static inline void _Unwind_SetGR(_Unwind_Context *ctx, int r, uint32_t v)
320 _Unwind_VRS_Set(ctx, 0, r, 0, &v); 329 _Unwind_VRS_Set(ctx, 0, r, 0, &v);
321} 330}
322 331
323#define _US_VIRTUAL_UNWIND_FRAME 0 332extern void lj_vm_unwind_ext(void);
324#define _US_UNWIND_FRAME_STARTING 1
325#define _US_ACTION_MASK 3
326#define _US_FORCE_UNWIND 8
327 333
328/* ARM unwinder personality handler referenced from interpreter .ARM.extab. */ 334/* ARM unwinder personality handler referenced from interpreter .ARM.extab. */
329LJ_FUNCA int lj_err_unwind_arm(int state, void *ucb, _Unwind_Context *ctx) 335LJ_FUNCA int lj_err_unwind_arm(int state, _Unwind_Control_Block *ucb,
336 _Unwind_Context *ctx)
330{ 337{
331 void *cf = (void *)_Unwind_GetGR(ctx, 13); 338 void *cf = (void *)_Unwind_GetGR(ctx, 13);
332 lua_State *L = cframe_L(cf); 339 lua_State *L = cframe_L(cf);
333 if ((state & _US_ACTION_MASK) == _US_VIRTUAL_UNWIND_FRAME) { 340 int errcode;
334 setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP)); 341
342 switch ((state & _US_ACTION_MASK)) {
343 case _US_VIRTUAL_UNWIND_FRAME:
344 if ((state & _US_FORCE_UNWIND)) break;
335 return _URC_HANDLER_FOUND; 345 return _URC_HANDLER_FOUND;
336 } 346 case _US_UNWIND_FRAME_STARTING:
337 if ((state&(_US_ACTION_MASK|_US_FORCE_UNWIND)) == _US_UNWIND_FRAME_STARTING) { 347 if (LJ_UEXCLASS_CHECK(ucb->exclass)) {
338 _Unwind_DeleteException(ucb); 348 errcode = LJ_UEXCLASS_ERRCODE(ucb->exclass);
339 _Unwind_SetGR(ctx, 15, (uint32_t)(void *)lj_err_throw); 349 } else {
340 _Unwind_SetGR(ctx, 0, (uint32_t)L); 350 errcode = LUA_ERRRUN;
341 _Unwind_SetGR(ctx, 1, (uint32_t)LUA_ERRRUN); 351 setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP));
352 }
353 cf = err_unwind(L, cf, errcode);
354 if ((state & _US_FORCE_UNWIND) || cf == NULL) break;
355 _Unwind_SetGR(ctx, 15, (uint32_t)lj_vm_unwind_ext);
356 _Unwind_SetGR(ctx, 0, (uint32_t)ucb);
357 _Unwind_SetGR(ctx, 1, (uint32_t)errcode);
358 _Unwind_SetGR(ctx, 2, cframe_unwind_ff(cf) ?
359 (uint32_t)lj_vm_unwind_ff_eh :
360 (uint32_t)lj_vm_unwind_c_eh);
342 return _URC_INSTALL_CONTEXT; 361 return _URC_INSTALL_CONTEXT;
362 default:
363 return _URC_FAILURE;
343 } 364 }
344 if (__gnu_unwind_frame(ucb, ctx) != _URC_OK) 365 if (__gnu_unwind_frame(ucb, ctx) != _URC_OK)
345 return _URC_FAILURE; 366 return _URC_FAILURE;
346 return _URC_CONTINUE_UNWIND; 367 return _URC_CONTINUE_UNWIND;
347} 368}
348 369
370#if LJ_UNWIND_EXT
371static __thread _Unwind_Control_Block static_uex;
372
373static void err_raise_ext(int errcode)
374{
375 memset(&static_uex, 0, sizeof(static_uex));
376 static_uex.exclass = LJ_UEXCLASS_MAKE(errcode);
377 _Unwind_RaiseException(&static_uex);
378}
349#endif 379#endif
350 380
351#elif LJ_TARGET_X64 && LJ_ABI_WIN 381#endif /* LJ_TARGET_ARM */
382
383#elif LJ_ABI_WIN
352 384
353/* 385/*
354** Someone in Redmond owes me several days of my life. A lot of this is 386** Someone in Redmond owes me several days of my life. A lot of this is
@@ -366,6 +398,7 @@ LJ_FUNCA int lj_err_unwind_arm(int state, void *ucb, _Unwind_Context *ctx)
366#define WIN32_LEAN_AND_MEAN 398#define WIN32_LEAN_AND_MEAN
367#include <windows.h> 399#include <windows.h>
368 400
401#if LJ_TARGET_X64
369/* Taken from: http://www.nynaeve.net/?p=99 */ 402/* Taken from: http://www.nynaeve.net/?p=99 */
370typedef struct UndocumentedDispatcherContext { 403typedef struct UndocumentedDispatcherContext {
371 ULONG64 ControlPc; 404 ULONG64 ControlPc;
@@ -380,11 +413,14 @@ typedef struct UndocumentedDispatcherContext {
380 ULONG ScopeIndex; 413 ULONG ScopeIndex;
381 ULONG Fill0; 414 ULONG Fill0;
382} UndocumentedDispatcherContext; 415} UndocumentedDispatcherContext;
416#else
417typedef void *UndocumentedDispatcherContext;
418#endif
383 419
384/* Another wild guess. */ 420/* Another wild guess. */
385extern void __DestructExceptionObject(EXCEPTION_RECORD *rec, int nothrow); 421extern void __DestructExceptionObject(EXCEPTION_RECORD *rec, int nothrow);
386 422
387#ifdef MINGW_SDK_INIT 423#if LJ_TARGET_X64 && defined(MINGW_SDK_INIT)
388/* Workaround for broken MinGW64 declaration. */ 424/* Workaround for broken MinGW64 declaration. */
389VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID) asm("RtlUnwindEx"); 425VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID) asm("RtlUnwindEx");
390#define RtlUnwindEx RtlUnwindEx_FIXED 426#define RtlUnwindEx RtlUnwindEx_FIXED
@@ -398,10 +434,15 @@ VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID) asm("RtlUnwindEx");
398#define LJ_EXCODE_CHECK(cl) (((cl) ^ LJ_EXCODE) <= 0xff) 434#define LJ_EXCODE_CHECK(cl) (((cl) ^ LJ_EXCODE) <= 0xff)
399#define LJ_EXCODE_ERRCODE(cl) ((int)((cl) & 0xff)) 435#define LJ_EXCODE_ERRCODE(cl) ((int)((cl) & 0xff))
400 436
401/* Win64 exception handler for interpreter frame. */ 437/* Windows exception handler for interpreter frame. */
402LJ_FUNCA EXCEPTION_DISPOSITION lj_err_unwind_win64(EXCEPTION_RECORD *rec, 438LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec,
403 void *cf, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch) 439 void *f, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch)
404{ 440{
441#if LJ_TARGET_X64
442 void *cf = f;
443#else
444 void *cf = (char *)f - CFRAME_OFS_SEH;
445#endif
405 lua_State *L = cframe_L(cf); 446 lua_State *L = cframe_L(cf);
406 int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ? 447 int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ?
407 LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN; 448 LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN;
@@ -419,8 +460,9 @@ LJ_FUNCA EXCEPTION_DISPOSITION lj_err_unwind_win64(EXCEPTION_RECORD *rec,
419 setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP)); 460 setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP));
420 } else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) { 461 } else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) {
421 /* Don't catch access violations etc. */ 462 /* Don't catch access violations etc. */
422 return ExceptionContinueSearch; 463 return 1; /* ExceptionContinueSearch */
423 } 464 }
465#if LJ_TARGET_X64
424 /* Unwind the stack and call all handlers for all lower C frames 466 /* Unwind the stack and call all handlers for all lower C frames
425 ** (including ourselves) again with EH_UNWINDING set. Then set 467 ** (including ourselves) again with EH_UNWINDING set. Then set
426 ** rsp = cf, rax = errcode and jump to the specified target. 468 ** rsp = cf, rax = errcode and jump to the specified target.
@@ -430,9 +472,21 @@ LJ_FUNCA EXCEPTION_DISPOSITION lj_err_unwind_win64(EXCEPTION_RECORD *rec,
430 lj_vm_unwind_c_eh), 472 lj_vm_unwind_c_eh),
431 rec, (void *)(uintptr_t)errcode, ctx, dispatch->HistoryTable); 473 rec, (void *)(uintptr_t)errcode, ctx, dispatch->HistoryTable);
432 /* RtlUnwindEx should never return. */ 474 /* RtlUnwindEx should never return. */
475#else
476 UNUSED(ctx);
477 UNUSED(dispatch);
478 /* Call all handlers for all lower C frames (including ourselves) again
479 ** with EH_UNWINDING set. Then call the specified function, passing cf
480 ** and errcode.
481 */
482 lj_vm_rtlunwind(cf, (void *)rec,
483 (cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ?
484 (void *)lj_vm_unwind_ff : (void *)lj_vm_unwind_c, errcode);
485 /* lj_vm_rtlunwind does not return. */
486#endif
433 } 487 }
434 } 488 }
435 return ExceptionContinueSearch; 489 return 1; /* ExceptionContinueSearch */
436} 490}
437 491
438/* Raise Windows exception. */ 492/* Raise Windows exception. */
@@ -450,8 +504,8 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode)
450{ 504{
451 global_State *g = G(L); 505 global_State *g = G(L);
452 lj_trace_abort(g); 506 lj_trace_abort(g);
453 setgcrefnull(g->jit_L); 507 setmref(g->jit_base, NULL);
454 L->status = 0; 508 L->status = LUA_OK;
455#if LJ_UNWIND_EXT 509#if LJ_UNWIND_EXT
456 err_raise_ext(errcode); 510 err_raise_ext(errcode);
457 /* 511 /*
@@ -495,7 +549,7 @@ LJ_NOINLINE void lj_err_mem(lua_State *L)
495/* Find error function for runtime errors. Requires an extra stack traversal. */ 549/* Find error function for runtime errors. Requires an extra stack traversal. */
496static ptrdiff_t finderrfunc(lua_State *L) 550static ptrdiff_t finderrfunc(lua_State *L)
497{ 551{
498 cTValue *frame = L->base-1, *bot = tvref(L->stack); 552 cTValue *frame = L->base-1, *bot = tvref(L->stack)+LJ_FR2;
499 void *cf = L->cframe; 553 void *cf = L->cframe;
500 while (frame > bot && cf) { 554 while (frame > bot && cf) {
501 while (cframe_nres(cframe_raw(cf)) < 0) { /* cframe without frame? */ 555 while (cframe_nres(cframe_raw(cf)) < 0) { /* cframe without frame? */
@@ -519,10 +573,8 @@ static ptrdiff_t finderrfunc(lua_State *L)
519 frame = frame_prevd(frame); 573 frame = frame_prevd(frame);
520 break; 574 break;
521 case FRAME_CONT: 575 case FRAME_CONT:
522#if LJ_HASFFI 576 if (frame_iscont_fficb(frame))
523 if ((frame-1)->u32.lo == LJ_CONT_FFI_CALLBACK)
524 cf = cframe_prev(cf); 577 cf = cframe_prev(cf);
525#endif
526 frame = frame_prevd(frame); 578 frame = frame_prevd(frame);
527 break; 579 break;
528 case FRAME_CP: 580 case FRAME_CP:
@@ -533,8 +585,8 @@ static ptrdiff_t finderrfunc(lua_State *L)
533 break; 585 break;
534 case FRAME_PCALL: 586 case FRAME_PCALL:
535 case FRAME_PCALLH: 587 case FRAME_PCALLH:
536 if (frame_ftsz(frame) >= (ptrdiff_t)(2*sizeof(TValue))) /* xpcall? */ 588 if (frame_func(frame_prevd(frame))->c.ffid == FF_xpcall)
537 return savestack(L, frame-1); /* Point to xpcall's errorfunc. */ 589 return savestack(L, frame_prevd(frame)+1); /* xpcall's errorfunc. */
538 return 0; 590 return 0;
539 default: 591 default:
540 lua_assert(0); 592 lua_assert(0);
@@ -557,8 +609,9 @@ LJ_NOINLINE void lj_err_run(lua_State *L)
557 lj_err_throw(L, LUA_ERRERR); 609 lj_err_throw(L, LUA_ERRERR);
558 } 610 }
559 L->status = LUA_ERRERR; 611 L->status = LUA_ERRERR;
560 copyTV(L, top, top-1); 612 copyTV(L, top+LJ_FR2, top-1);
561 copyTV(L, top-1, errfunc); 613 copyTV(L, top-1, errfunc);
614 if (LJ_FR2) setnilV(top++);
562 L->top = top+1; 615 L->top = top+1;
563 lj_vm_call(L, top, 1+1); /* Stack: |errfunc|msg| -> |msg| */ 616 lj_vm_call(L, top, 1+1); /* Stack: |errfunc|msg| -> |msg| */
564 } 617 }
@@ -572,7 +625,7 @@ LJ_NORET LJ_NOINLINE static void err_msgv(lua_State *L, ErrMsg em, ...)
572 va_list argp; 625 va_list argp;
573 va_start(argp, em); 626 va_start(argp, em);
574 if (curr_funcisL(L)) L->top = curr_topL(L); 627 if (curr_funcisL(L)) L->top = curr_topL(L);
575 msg = lj_str_pushvf(L, err2msg(em), argp); 628 msg = lj_strfmt_pushvf(L, err2msg(em), argp);
576 va_end(argp); 629 va_end(argp);
577 lj_debug_addloc(L, msg, L->base-1, NULL); 630 lj_debug_addloc(L, msg, L->base-1, NULL);
578 lj_err_run(L); 631 lj_err_run(L);
@@ -590,11 +643,11 @@ LJ_NOINLINE void lj_err_lex(lua_State *L, GCstr *src, const char *tok,
590{ 643{
591 char buff[LUA_IDSIZE]; 644 char buff[LUA_IDSIZE];
592 const char *msg; 645 const char *msg;
593 lj_debug_shortname(buff, src); 646 lj_debug_shortname(buff, src, line);
594 msg = lj_str_pushvf(L, err2msg(em), argp); 647 msg = lj_strfmt_pushvf(L, err2msg(em), argp);
595 msg = lj_str_pushf(L, "%s:%d: %s", buff, line, msg); 648 msg = lj_strfmt_pushf(L, "%s:%d: %s", buff, line, msg);
596 if (tok) 649 if (tok)
597 lj_str_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tok); 650 lj_strfmt_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tok);
598 lj_err_throw(L, LUA_ERRSYNTAX); 651 lj_err_throw(L, LUA_ERRSYNTAX);
599} 652}
600 653
@@ -633,8 +686,9 @@ LJ_NOINLINE void lj_err_optype_call(lua_State *L, TValue *o)
633 const BCIns *pc = cframe_Lpc(L); 686 const BCIns *pc = cframe_Lpc(L);
634 if (((ptrdiff_t)pc & FRAME_TYPE) != FRAME_LUA) { 687 if (((ptrdiff_t)pc & FRAME_TYPE) != FRAME_LUA) {
635 const char *tname = lj_typename(o); 688 const char *tname = lj_typename(o);
689 if (LJ_FR2) o++;
636 setframe_pc(o, pc); 690 setframe_pc(o, pc);
637 setframe_gc(o, obj2gco(L)); 691 setframe_gc(o, obj2gco(L), LJ_TTHREAD);
638 L->top = L->base = o+1; 692 L->top = L->base = o+1;
639 err_msgv(L, LJ_ERR_BADCALL, tname); 693 err_msgv(L, LJ_ERR_BADCALL, tname);
640 } 694 }
@@ -649,13 +703,10 @@ LJ_NOINLINE void lj_err_callermsg(lua_State *L, const char *msg)
649 if (frame_islua(frame)) { 703 if (frame_islua(frame)) {
650 pframe = frame_prevl(frame); 704 pframe = frame_prevl(frame);
651 } else if (frame_iscont(frame)) { 705 } else if (frame_iscont(frame)) {
652#if LJ_HASFFI 706 if (frame_iscont_fficb(frame)) {
653 if ((frame-1)->u32.lo == LJ_CONT_FFI_CALLBACK) {
654 pframe = frame; 707 pframe = frame;
655 frame = NULL; 708 frame = NULL;
656 } else 709 } else {
657#endif
658 {
659 pframe = frame_prevd(frame); 710 pframe = frame_prevd(frame);
660#if LJ_HASFFI 711#if LJ_HASFFI
661 /* Remove frame for FFI metamethods. */ 712 /* Remove frame for FFI metamethods. */
@@ -678,7 +729,7 @@ LJ_NOINLINE void lj_err_callerv(lua_State *L, ErrMsg em, ...)
678 const char *msg; 729 const char *msg;
679 va_list argp; 730 va_list argp;
680 va_start(argp, em); 731 va_start(argp, em);
681 msg = lj_str_pushvf(L, err2msg(em), argp); 732 msg = lj_strfmt_pushvf(L, err2msg(em), argp);
682 va_end(argp); 733 va_end(argp);
683 lj_err_callermsg(L, msg); 734 lj_err_callermsg(L, msg);
684} 735}
@@ -698,9 +749,9 @@ LJ_NORET LJ_NOINLINE static void err_argmsg(lua_State *L, int narg,
698 if (narg < 0 && narg > LUA_REGISTRYINDEX) 749 if (narg < 0 && narg > LUA_REGISTRYINDEX)
699 narg = (int)(L->top - L->base) + narg + 1; 750 narg = (int)(L->top - L->base) + narg + 1;
700 if (ftype && ftype[3] == 'h' && --narg == 0) /* Check for "method". */ 751 if (ftype && ftype[3] == 'h' && --narg == 0) /* Check for "method". */
701 msg = lj_str_pushf(L, err2msg(LJ_ERR_BADSELF), fname, msg); 752 msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_BADSELF), fname, msg);
702 else 753 else
703 msg = lj_str_pushf(L, err2msg(LJ_ERR_BADARG), narg, fname, msg); 754 msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_BADARG), narg, fname, msg);
704 lj_err_callermsg(L, msg); 755 lj_err_callermsg(L, msg);
705} 756}
706 757
@@ -710,7 +761,7 @@ LJ_NOINLINE void lj_err_argv(lua_State *L, int narg, ErrMsg em, ...)
710 const char *msg; 761 const char *msg;
711 va_list argp; 762 va_list argp;
712 va_start(argp, em); 763 va_start(argp, em);
713 msg = lj_str_pushvf(L, err2msg(em), argp); 764 msg = lj_strfmt_pushvf(L, err2msg(em), argp);
714 va_end(argp); 765 va_end(argp);
715 err_argmsg(L, narg, msg); 766 err_argmsg(L, narg, msg);
716} 767}
@@ -740,7 +791,7 @@ LJ_NOINLINE void lj_err_argtype(lua_State *L, int narg, const char *xname)
740 TValue *o = narg < 0 ? L->top + narg : L->base + narg-1; 791 TValue *o = narg < 0 ? L->top + narg : L->base + narg-1;
741 tname = o < L->top ? lj_typename(o) : lj_obj_typename[0]; 792 tname = o < L->top ? lj_typename(o) : lj_obj_typename[0];
742 } 793 }
743 msg = lj_str_pushf(L, err2msg(LJ_ERR_BADTYPE), xname, tname); 794 msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_BADTYPE), xname, tname);
744 err_argmsg(L, narg, msg); 795 err_argmsg(L, narg, msg);
745} 796}
746 797
@@ -790,7 +841,7 @@ LUALIB_API int luaL_error(lua_State *L, const char *fmt, ...)
790 const char *msg; 841 const char *msg;
791 va_list argp; 842 va_list argp;
792 va_start(argp, fmt); 843 va_start(argp, fmt);
793 msg = lj_str_pushvf(L, fmt, argp); 844 msg = lj_strfmt_pushvf(L, fmt, argp);
794 va_end(argp); 845 va_end(argp);
795 lj_err_callermsg(L, msg); 846 lj_err_callermsg(L, msg);
796 return 0; /* unreachable */ 847 return 0; /* unreachable */
diff --git a/src/lj_errmsg.h b/src/lj_errmsg.h
index 35b5edd5..efb7c3f3 100644
--- a/src/lj_errmsg.h
+++ b/src/lj_errmsg.h
@@ -96,9 +96,7 @@ ERRDEF(STRPATX, "pattern too complex")
96ERRDEF(STRCAPI, "invalid capture index") 96ERRDEF(STRCAPI, "invalid capture index")
97ERRDEF(STRCAPN, "too many captures") 97ERRDEF(STRCAPN, "too many captures")
98ERRDEF(STRCAPU, "unfinished capture") 98ERRDEF(STRCAPU, "unfinished capture")
99ERRDEF(STRFMTO, "invalid option " LUA_QL("%%%c") " to " LUA_QL("format")) 99ERRDEF(STRFMT, "invalid option " LUA_QS " to " LUA_QL("format"))
100ERRDEF(STRFMTR, "invalid format (repeated flags)")
101ERRDEF(STRFMTW, "invalid format (width or precision too long)")
102ERRDEF(STRGSRV, "invalid replacement value (a %s)") 100ERRDEF(STRGSRV, "invalid replacement value (a %s)")
103ERRDEF(BADMODN, "name conflict for module " LUA_QS) 101ERRDEF(BADMODN, "name conflict for module " LUA_QS)
104#if LJ_HASJIT 102#if LJ_HASJIT
@@ -118,7 +116,6 @@ ERRDEF(JITOPT, "unknown or malformed optimization flag " LUA_QS)
118/* Lexer/parser errors. */ 116/* Lexer/parser errors. */
119ERRDEF(XMODE, "attempt to load chunk with wrong mode") 117ERRDEF(XMODE, "attempt to load chunk with wrong mode")
120ERRDEF(XNEAR, "%s near " LUA_QS) 118ERRDEF(XNEAR, "%s near " LUA_QS)
121ERRDEF(XELEM, "lexical element too long")
122ERRDEF(XLINES, "chunk has too many lines") 119ERRDEF(XLINES, "chunk has too many lines")
123ERRDEF(XLEVELS, "chunk has too many syntax levels") 120ERRDEF(XLEVELS, "chunk has too many syntax levels")
124ERRDEF(XNUMBER, "malformed number") 121ERRDEF(XNUMBER, "malformed number")
diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c
index 1d428590..5282217f 100644
--- a/src/lj_ffrecord.c
+++ b/src/lj_ffrecord.c
@@ -27,6 +27,7 @@
27#include "lj_dispatch.h" 27#include "lj_dispatch.h"
28#include "lj_vm.h" 28#include "lj_vm.h"
29#include "lj_strscan.h" 29#include "lj_strscan.h"
30#include "lj_strfmt.h"
30 31
31/* Some local macros to save typing. Undef'd at the end. */ 32/* Some local macros to save typing. Undef'd at the end. */
32#define IR(ref) (&J->cur.ir[(ref)]) 33#define IR(ref) (&J->cur.ir[(ref)])
@@ -79,10 +80,7 @@ static GCstr *argv2str(jit_State *J, TValue *o)
79 GCstr *s; 80 GCstr *s;
80 if (!tvisnumber(o)) 81 if (!tvisnumber(o))
81 lj_trace_err(J, LJ_TRERR_BADTYPE); 82 lj_trace_err(J, LJ_TRERR_BADTYPE);
82 if (tvisint(o)) 83 s = lj_strfmt_number(J->L, o);
83 s = lj_str_fromint(J->L, intV(o));
84 else
85 s = lj_str_fromnum(J->L, &o->n);
86 setstrV(J->L, o, s); 84 setstrV(J->L, o, s);
87 return s; 85 return s;
88 } 86 }
@@ -98,27 +96,90 @@ static ptrdiff_t results_wanted(jit_State *J)
98 return -1; 96 return -1;
99} 97}
100 98
101/* Throw error for unsupported variant of fast function. */ 99/* Trace stitching: add continuation below frame to start a new trace. */
102LJ_NORET static void recff_nyiu(jit_State *J) 100static void recff_stitch(jit_State *J)
103{ 101{
104 setfuncV(J->L, &J->errinfo, J->fn); 102 ASMFunction cont = lj_cont_stitch;
105 lj_trace_err_info(J, LJ_TRERR_NYIFFU); 103 lua_State *L = J->L;
104 TValue *base = L->base;
105 BCReg nslot = J->maxslot + 1 + LJ_FR2;
106 TValue *nframe = base + 1 + LJ_FR2;
107 const BCIns *pc = frame_pc(base-1);
108 TValue *pframe = frame_prevl(base-1);
109
110 /* Move func + args up in Lua stack and insert continuation. */
111 memmove(&base[1], &base[-1-LJ_FR2], sizeof(TValue)*nslot);
112 setframe_ftsz(nframe, ((char *)nframe - (char *)pframe) + FRAME_CONT);
113 setcont(base-LJ_FR2, cont);
114 setframe_pc(base, pc);
115 setnilV(base-1-LJ_FR2); /* Incorrect, but rec_check_slots() won't run anymore. */
116 L->base += 2 + LJ_FR2;
117 L->top += 2 + LJ_FR2;
118
119 /* Ditto for the IR. */
120 memmove(&J->base[1], &J->base[-1-LJ_FR2], sizeof(TRef)*nslot);
121#if LJ_FR2
122 J->base[2] = TREF_FRAME;
123 J->base[-1] = lj_ir_k64(J, IR_KNUM, u64ptr(contptr(cont)));
124 J->base[0] = lj_ir_k64(J, IR_KNUM, u64ptr(pc)) | TREF_CONT;
125#else
126 J->base[0] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT;
127#endif
128 J->ktrace = tref_ref((J->base[-1-LJ_FR2] = lj_ir_ktrace(J)));
129 J->base += 2 + LJ_FR2;
130 J->baseslot += 2 + LJ_FR2;
131 J->framedepth++;
132
133 lj_record_stop(J, LJ_TRLINK_STITCH, 0);
134
135 /* Undo Lua stack changes. */
136 memmove(&base[-1-LJ_FR2], &base[1], sizeof(TValue)*nslot);
137 setframe_pc(base-1, pc);
138 L->base -= 2 + LJ_FR2;
139 L->top -= 2 + LJ_FR2;
106} 140}
107 141
108/* Fallback handler for all fast functions that are not recorded (yet). */ 142/* Fallback handler for fast functions that are not recorded (yet). */
109static void LJ_FASTCALL recff_nyi(jit_State *J, RecordFFData *rd) 143static void LJ_FASTCALL recff_nyi(jit_State *J, RecordFFData *rd)
110{ 144{
111 setfuncV(J->L, &J->errinfo, J->fn); 145 if (J->cur.nins < (IRRef)J->param[JIT_P_minstitch] + REF_BASE) {
112 lj_trace_err_info(J, LJ_TRERR_NYIFF); 146 lj_trace_err_info(J, LJ_TRERR_TRACEUV);
113 UNUSED(rd); 147 } else {
148 /* Can only stitch from Lua call. */
149 if (J->framedepth && frame_islua(J->L->base-1)) {
150 BCOp op = bc_op(*frame_pc(J->L->base-1));
151 /* Stitched trace cannot start with *M op with variable # of args. */
152 if (!(op == BC_CALLM || op == BC_CALLMT ||
153 op == BC_RETM || op == BC_TSETM)) {
154 switch (J->fn->c.ffid) {
155 case FF_error:
156 case FF_debug_sethook:
157 case FF_jit_flush:
158 break; /* Don't stitch across special builtins. */
159 default:
160 recff_stitch(J); /* Use trace stitching. */
161 rd->nres = -1;
162 return;
163 }
164 }
165 }
166 /* Otherwise stop trace and return to interpreter. */
167 lj_record_stop(J, LJ_TRLINK_RETURN, 0);
168 rd->nres = -1;
169 }
114} 170}
115 171
116/* C functions can have arbitrary side-effects and are not recorded (yet). */ 172/* Fallback handler for unsupported variants of fast functions. */
117static void LJ_FASTCALL recff_c(jit_State *J, RecordFFData *rd) 173#define recff_nyiu recff_nyi
174
175/* Must stop the trace for classic C functions with arbitrary side-effects. */
176#define recff_c recff_nyi
177
178/* Emit BUFHDR for the global temporary buffer. */
179static TRef recff_bufhdr(jit_State *J)
118{ 180{
119 setfuncV(J->L, &J->errinfo, J->fn); 181 return emitir(IRT(IR_BUFHDR, IRT_PGC),
120 lj_trace_err_info(J, LJ_TRERR_NYICF); 182 lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET);
121 UNUSED(rd);
122} 183}
123 184
124/* -- Base library fast functions ----------------------------------------- */ 185/* -- Base library fast functions ----------------------------------------- */
@@ -135,7 +196,7 @@ static void LJ_FASTCALL recff_type(jit_State *J, RecordFFData *rd)
135 uint32_t t; 196 uint32_t t;
136 if (tvisnumber(&rd->argv[0])) 197 if (tvisnumber(&rd->argv[0]))
137 t = ~LJ_TNUMX; 198 t = ~LJ_TNUMX;
138 else if (LJ_64 && tvislightud(&rd->argv[0])) 199 else if (LJ_64 && !LJ_GC64 && tvislightud(&rd->argv[0]))
139 t = ~LJ_TLIGHTUD; 200 t = ~LJ_TLIGHTUD;
140 else 201 else
141 t = ~itype(&rd->argv[0]); 202 t = ~itype(&rd->argv[0]);
@@ -167,7 +228,7 @@ static void LJ_FASTCALL recff_setmetatable(jit_State *J, RecordFFData *rd)
167 ix.tab = tr; 228 ix.tab = tr;
168 copyTV(J->L, &ix.tabv, &rd->argv[0]); 229 copyTV(J->L, &ix.tabv, &rd->argv[0]);
169 lj_record_mm_lookup(J, &ix, MM_metatable); /* Guard for no __metatable. */ 230 lj_record_mm_lookup(J, &ix, MM_metatable); /* Guard for no __metatable. */
170 fref = emitir(IRT(IR_FREF, IRT_P32), tr, IRFL_TAB_META); 231 fref = emitir(IRT(IR_FREF, IRT_PGC), tr, IRFL_TAB_META);
171 mtref = tref_isnil(mt) ? lj_ir_knull(J, IRT_TAB) : mt; 232 mtref = tref_isnil(mt) ? lj_ir_knull(J, IRT_TAB) : mt;
172 emitir(IRT(IR_FSTORE, IRT_TAB), fref, mtref); 233 emitir(IRT(IR_FSTORE, IRT_TAB), fref, mtref);
173 if (!tref_isnil(mt)) 234 if (!tref_isnil(mt))
@@ -233,7 +294,7 @@ int32_t lj_ffrecord_select_mode(jit_State *J, TRef tr, TValue *tv)
233 if (strV(tv)->len == 1) { 294 if (strV(tv)->len == 1) {
234 emitir(IRTG(IR_EQ, IRT_STR), tr, lj_ir_kstr(J, strV(tv))); 295 emitir(IRTG(IR_EQ, IRT_STR), tr, lj_ir_kstr(J, strV(tv)));
235 } else { 296 } else {
236 TRef trptr = emitir(IRT(IR_STRREF, IRT_P32), tr, lj_ir_kint(J, 0)); 297 TRef trptr = emitir(IRT(IR_STRREF, IRT_PGC), tr, lj_ir_kint(J, 0));
237 TRef trchar = emitir(IRT(IR_XLOAD, IRT_U8), trptr, IRXLOAD_READONLY); 298 TRef trchar = emitir(IRT(IR_XLOAD, IRT_U8), trptr, IRXLOAD_READONLY);
238 emitir(IRTG(IR_EQ, IRT_INT), trchar, lj_ir_kint(J, '#')); 299 emitir(IRTG(IR_EQ, IRT_INT), trchar, lj_ir_kint(J, '#'));
239 } 300 }
@@ -263,7 +324,8 @@ static void LJ_FASTCALL recff_select(jit_State *J, RecordFFData *rd)
263 J->base[i] = J->base[start+i]; 324 J->base[i] = J->base[start+i];
264 } /* else: Interpreter will throw. */ 325 } /* else: Interpreter will throw. */
265 } else { 326 } else {
266 recff_nyiu(J); 327 recff_nyiu(J, rd);
328 return;
267 } 329 }
268 } /* else: Interpreter will throw. */ 330 } /* else: Interpreter will throw. */
269} 331}
@@ -274,14 +336,18 @@ static void LJ_FASTCALL recff_tonumber(jit_State *J, RecordFFData *rd)
274 TRef base = J->base[1]; 336 TRef base = J->base[1];
275 if (tr && !tref_isnil(base)) { 337 if (tr && !tref_isnil(base)) {
276 base = lj_opt_narrow_toint(J, base); 338 base = lj_opt_narrow_toint(J, base);
277 if (!tref_isk(base) || IR(tref_ref(base))->i != 10) 339 if (!tref_isk(base) || IR(tref_ref(base))->i != 10) {
278 recff_nyiu(J); 340 recff_nyiu(J, rd);
341 return;
342 }
279 } 343 }
280 if (tref_isnumber_str(tr)) { 344 if (tref_isnumber_str(tr)) {
281 if (tref_isstr(tr)) { 345 if (tref_isstr(tr)) {
282 TValue tmp; 346 TValue tmp;
283 if (!lj_strscan_num(strV(&rd->argv[0]), &tmp)) 347 if (!lj_strscan_num(strV(&rd->argv[0]), &tmp)) {
284 recff_nyiu(J); /* Would need an inverted STRTO for this case. */ 348 recff_nyiu(J, rd); /* Would need an inverted STRTO for this case. */
349 return;
350 }
285 tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0); 351 tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0);
286 } 352 }
287#if LJ_HASFFI 353#if LJ_HASFFI
@@ -313,10 +379,10 @@ static int recff_metacall(jit_State *J, RecordFFData *rd, MMS mm)
313 int errcode; 379 int errcode;
314 TValue argv0; 380 TValue argv0;
315 /* Temporarily insert metamethod below object. */ 381 /* Temporarily insert metamethod below object. */
316 J->base[1] = J->base[0]; 382 J->base[1+LJ_FR2] = J->base[0];
317 J->base[0] = ix.mobj; 383 J->base[0] = ix.mobj;
318 copyTV(J->L, &argv0, &rd->argv[0]); 384 copyTV(J->L, &argv0, &rd->argv[0]);
319 copyTV(J->L, &rd->argv[1], &rd->argv[0]); 385 copyTV(J->L, &rd->argv[1+LJ_FR2], &rd->argv[0]);
320 copyTV(J->L, &rd->argv[0], &ix.mobjv); 386 copyTV(J->L, &rd->argv[0], &ix.mobjv);
321 /* Need to protect lj_record_tailcall because it may throw. */ 387 /* Need to protect lj_record_tailcall because it may throw. */
322 errcode = lj_vm_cpcall(J->L, NULL, J, recff_metacall_cp); 388 errcode = lj_vm_cpcall(J->L, NULL, J, recff_metacall_cp);
@@ -336,13 +402,15 @@ static void LJ_FASTCALL recff_tostring(jit_State *J, RecordFFData *rd)
336 if (tref_isstr(tr)) { 402 if (tref_isstr(tr)) {
337 /* Ignore __tostring in the string base metatable. */ 403 /* Ignore __tostring in the string base metatable. */
338 /* Pass on result in J->base[0]. */ 404 /* Pass on result in J->base[0]. */
339 } else if (!recff_metacall(J, rd, MM_tostring)) { 405 } else if (tr && !recff_metacall(J, rd, MM_tostring)) {
340 if (tref_isnumber(tr)) { 406 if (tref_isnumber(tr)) {
341 J->base[0] = emitir(IRT(IR_TOSTR, IRT_STR), tr, 0); 407 J->base[0] = emitir(IRT(IR_TOSTR, IRT_STR), tr,
408 tref_isnum(tr) ? IRTOSTR_NUM : IRTOSTR_INT);
342 } else if (tref_ispri(tr)) { 409 } else if (tref_ispri(tr)) {
343 J->base[0] = lj_ir_kstr(J, strV(&J->fn->c.upvalue[tref_type(tr)])); 410 J->base[0] = lj_ir_kstr(J, lj_strfmt_obj(J->L, &rd->argv[0]));
344 } else { 411 } else {
345 recff_nyiu(J); 412 recff_nyiu(J, rd);
413 return;
346 } 414 }
347 } 415 }
348} 416}
@@ -364,15 +432,15 @@ static void LJ_FASTCALL recff_ipairs_aux(jit_State *J, RecordFFData *rd)
364 } /* else: Interpreter will throw. */ 432 } /* else: Interpreter will throw. */
365} 433}
366 434
367static void LJ_FASTCALL recff_ipairs(jit_State *J, RecordFFData *rd) 435static void LJ_FASTCALL recff_xpairs(jit_State *J, RecordFFData *rd)
368{ 436{
369 TRef tr = J->base[0]; 437 TRef tr = J->base[0];
370 if (!((LJ_52 || (LJ_HASFFI && tref_iscdata(tr))) && 438 if (!((LJ_52 || (LJ_HASFFI && tref_iscdata(tr))) &&
371 recff_metacall(J, rd, MM_ipairs))) { 439 recff_metacall(J, rd, MM_pairs + rd->data))) {
372 if (tref_istab(tr)) { 440 if (tref_istab(tr)) {
373 J->base[0] = lj_ir_kfunc(J, funcV(&J->fn->c.upvalue[0])); 441 J->base[0] = lj_ir_kfunc(J, funcV(&J->fn->c.upvalue[0]));
374 J->base[1] = tr; 442 J->base[1] = tr;
375 J->base[2] = lj_ir_kint(J, 0); 443 J->base[2] = rd->data ? lj_ir_kint(J, 0) : TREF_NIL;
376 rd->nres = 3; 444 rd->nres = 3;
377 } /* else: Interpreter will throw. */ 445 } /* else: Interpreter will throw. */
378 } 446 }
@@ -381,6 +449,10 @@ static void LJ_FASTCALL recff_ipairs(jit_State *J, RecordFFData *rd)
381static void LJ_FASTCALL recff_pcall(jit_State *J, RecordFFData *rd) 449static void LJ_FASTCALL recff_pcall(jit_State *J, RecordFFData *rd)
382{ 450{
383 if (J->maxslot >= 1) { 451 if (J->maxslot >= 1) {
452#if LJ_FR2
453 /* Shift function arguments up. */
454 memmove(J->base + 1, J->base, sizeof(TRef) * J->maxslot);
455#endif
384 lj_record_call(J, 0, J->maxslot - 1); 456 lj_record_call(J, 0, J->maxslot - 1);
385 rd->nres = -1; /* Pending call. */ 457 rd->nres = -1; /* Pending call. */
386 } /* else: Interpreter will throw. */ 458 } /* else: Interpreter will throw. */
@@ -406,6 +478,10 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd)
406 copyTV(J->L, &argv1, &rd->argv[1]); 478 copyTV(J->L, &argv1, &rd->argv[1]);
407 copyTV(J->L, &rd->argv[0], &argv1); 479 copyTV(J->L, &rd->argv[0], &argv1);
408 copyTV(J->L, &rd->argv[1], &argv0); 480 copyTV(J->L, &rd->argv[1], &argv0);
481#if LJ_FR2
482 /* Shift function arguments up. */
483 memmove(J->base + 2, J->base + 1, sizeof(TRef) * (J->maxslot-1));
484#endif
409 /* Need to protect lj_record_call because it may throw. */ 485 /* Need to protect lj_record_call because it may throw. */
410 errcode = lj_vm_cpcall(J->L, NULL, J, recff_xpcall_cp); 486 errcode = lj_vm_cpcall(J->L, NULL, J, recff_xpcall_cp);
411 /* Always undo Lua stack swap to avoid confusing the interpreter. */ 487 /* Always undo Lua stack swap to avoid confusing the interpreter. */
@@ -417,12 +493,24 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd)
417 } /* else: Interpreter will throw. */ 493 } /* else: Interpreter will throw. */
418} 494}
419 495
496static void LJ_FASTCALL recff_getfenv(jit_State *J, RecordFFData *rd)
497{
498 TRef tr = J->base[0];
499 /* Only support getfenv(0) for now. */
500 if (tref_isint(tr) && tref_isk(tr) && IR(tref_ref(tr))->i == 0) {
501 TRef trl = emitir(IRT(IR_LREF, IRT_THREAD), 0, 0);
502 J->base[0] = emitir(IRT(IR_FLOAD, IRT_TAB), trl, IRFL_THREAD_ENV);
503 return;
504 }
505 recff_nyiu(J, rd);
506}
507
420/* -- Math library fast functions ----------------------------------------- */ 508/* -- Math library fast functions ----------------------------------------- */
421 509
422static void LJ_FASTCALL recff_math_abs(jit_State *J, RecordFFData *rd) 510static void LJ_FASTCALL recff_math_abs(jit_State *J, RecordFFData *rd)
423{ 511{
424 TRef tr = lj_ir_tonum(J, J->base[0]); 512 TRef tr = lj_ir_tonum(J, J->base[0]);
425 J->base[0] = emitir(IRTN(IR_ABS), tr, lj_ir_knum_abs(J)); 513 J->base[0] = emitir(IRTN(IR_ABS), tr, lj_ir_ksimd(J, LJ_KSIMD_ABS));
426 UNUSED(rd); 514 UNUSED(rd);
427} 515}
428 516
@@ -529,14 +617,6 @@ static void LJ_FASTCALL recff_math_modf(jit_State *J, RecordFFData *rd)
529 rd->nres = 2; 617 rd->nres = 2;
530} 618}
531 619
532static void LJ_FASTCALL recff_math_degrad(jit_State *J, RecordFFData *rd)
533{
534 TRef tr = lj_ir_tonum(J, J->base[0]);
535 TRef trm = lj_ir_knum(J, numV(&J->fn->c.upvalue[0]));
536 J->base[0] = emitir(IRTN(IR_MUL), tr, trm);
537 UNUSED(rd);
538}
539
540static void LJ_FASTCALL recff_math_pow(jit_State *J, RecordFFData *rd) 620static void LJ_FASTCALL recff_math_pow(jit_State *J, RecordFFData *rd)
541{ 621{
542 J->base[0] = lj_opt_narrow_pow(J, J->base[0], J->base[1], 622 J->base[0] = lj_opt_narrow_pow(J, J->base[0], J->base[1],
@@ -591,48 +671,105 @@ static void LJ_FASTCALL recff_math_random(jit_State *J, RecordFFData *rd)
591 671
592/* -- Bit library fast functions ------------------------------------------ */ 672/* -- Bit library fast functions ------------------------------------------ */
593 673
594/* Record unary bit.tobit, bit.bnot, bit.bswap. */ 674/* Record bit.tobit. */
675static void LJ_FASTCALL recff_bit_tobit(jit_State *J, RecordFFData *rd)
676{
677 TRef tr = J->base[0];
678#if LJ_HASFFI
679 if (tref_iscdata(tr)) { recff_bit64_tobit(J, rd); return; }
680#endif
681 J->base[0] = lj_opt_narrow_tobit(J, tr);
682 UNUSED(rd);
683}
684
685/* Record unary bit.bnot, bit.bswap. */
595static void LJ_FASTCALL recff_bit_unary(jit_State *J, RecordFFData *rd) 686static void LJ_FASTCALL recff_bit_unary(jit_State *J, RecordFFData *rd)
596{ 687{
597 TRef tr = lj_opt_narrow_tobit(J, J->base[0]); 688#if LJ_HASFFI
598 J->base[0] = (rd->data == IR_TOBIT) ? tr : emitir(IRTI(rd->data), tr, 0); 689 if (recff_bit64_unary(J, rd))
690 return;
691#endif
692 J->base[0] = emitir(IRTI(rd->data), lj_opt_narrow_tobit(J, J->base[0]), 0);
599} 693}
600 694
601/* Record N-ary bit.band, bit.bor, bit.bxor. */ 695/* Record N-ary bit.band, bit.bor, bit.bxor. */
602static void LJ_FASTCALL recff_bit_nary(jit_State *J, RecordFFData *rd) 696static void LJ_FASTCALL recff_bit_nary(jit_State *J, RecordFFData *rd)
603{ 697{
604 TRef tr = lj_opt_narrow_tobit(J, J->base[0]); 698#if LJ_HASFFI
605 uint32_t op = rd->data; 699 if (recff_bit64_nary(J, rd))
606 BCReg i; 700 return;
607 for (i = 1; J->base[i] != 0; i++) 701#endif
608 tr = emitir(IRTI(op), tr, lj_opt_narrow_tobit(J, J->base[i])); 702 {
609 J->base[0] = tr; 703 TRef tr = lj_opt_narrow_tobit(J, J->base[0]);
704 uint32_t ot = IRTI(rd->data);
705 BCReg i;
706 for (i = 1; J->base[i] != 0; i++)
707 tr = emitir(ot, tr, lj_opt_narrow_tobit(J, J->base[i]));
708 J->base[0] = tr;
709 }
610} 710}
611 711
612/* Record bit shifts. */ 712/* Record bit shifts. */
613static void LJ_FASTCALL recff_bit_shift(jit_State *J, RecordFFData *rd) 713static void LJ_FASTCALL recff_bit_shift(jit_State *J, RecordFFData *rd)
614{ 714{
615 TRef tr = lj_opt_narrow_tobit(J, J->base[0]); 715#if LJ_HASFFI
616 TRef tsh = lj_opt_narrow_tobit(J, J->base[1]); 716 if (recff_bit64_shift(J, rd))
617 IROp op = (IROp)rd->data; 717 return;
618 if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) && 718#endif
619 !tref_isk(tsh)) 719 {
620 tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31)); 720 TRef tr = lj_opt_narrow_tobit(J, J->base[0]);
721 TRef tsh = lj_opt_narrow_tobit(J, J->base[1]);
722 IROp op = (IROp)rd->data;
723 if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
724 !tref_isk(tsh))
725 tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31));
621#ifdef LJ_TARGET_UNIFYROT 726#ifdef LJ_TARGET_UNIFYROT
622 if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) { 727 if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) {
623 op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR; 728 op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR;
624 tsh = emitir(IRTI(IR_NEG), tsh, tsh); 729 tsh = emitir(IRTI(IR_NEG), tsh, tsh);
730 }
731#endif
732 J->base[0] = emitir(IRTI(op), tr, tsh);
625 } 733 }
734}
735
736static void LJ_FASTCALL recff_bit_tohex(jit_State *J, RecordFFData *rd)
737{
738#if LJ_HASFFI
739 TRef hdr = recff_bufhdr(J);
740 TRef tr = recff_bit64_tohex(J, rd, hdr);
741 J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
742#else
743 recff_nyiu(J, rd); /* Don't bother working around this NYI. */
626#endif 744#endif
627 J->base[0] = emitir(IRTI(op), tr, tsh);
628} 745}
629 746
630/* -- String library fast functions --------------------------------------- */ 747/* -- String library fast functions --------------------------------------- */
631 748
632static void LJ_FASTCALL recff_string_len(jit_State *J, RecordFFData *rd) 749/* Specialize to relative starting position for string. */
750static TRef recff_string_start(jit_State *J, GCstr *s, int32_t *st, TRef tr,
751 TRef trlen, TRef tr0)
633{ 752{
634 J->base[0] = emitir(IRTI(IR_FLOAD), lj_ir_tostr(J, J->base[0]), IRFL_STR_LEN); 753 int32_t start = *st;
635 UNUSED(rd); 754 if (start < 0) {
755 emitir(IRTGI(IR_LT), tr, tr0);
756 tr = emitir(IRTI(IR_ADD), trlen, tr);
757 start = start + (int32_t)s->len;
758 emitir(start < 0 ? IRTGI(IR_LT) : IRTGI(IR_GE), tr, tr0);
759 if (start < 0) {
760 tr = tr0;
761 start = 0;
762 }
763 } else if (start == 0) {
764 emitir(IRTGI(IR_EQ), tr, tr0);
765 tr = tr0;
766 } else {
767 tr = emitir(IRTI(IR_ADD), tr, lj_ir_kint(J, -1));
768 emitir(IRTGI(IR_GE), tr, tr0);
769 start--;
770 }
771 *st = start;
772 return tr;
636} 773}
637 774
638/* Handle string.byte (rd->data = 0) and string.sub (rd->data = 1). */ 775/* Handle string.byte (rd->data = 0) and string.sub (rd->data = 1). */
@@ -679,39 +816,21 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd)
679 } else if ((MSize)end <= str->len) { 816 } else if ((MSize)end <= str->len) {
680 emitir(IRTGI(IR_ULE), trend, trlen); 817 emitir(IRTGI(IR_ULE), trend, trlen);
681 } else { 818 } else {
682 emitir(IRTGI(IR_GT), trend, trlen); 819 emitir(IRTGI(IR_UGT), trend, trlen);
683 end = (int32_t)str->len; 820 end = (int32_t)str->len;
684 trend = trlen; 821 trend = trlen;
685 } 822 }
686 if (start < 0) { 823 trstart = recff_string_start(J, str, &start, trstart, trlen, tr0);
687 emitir(IRTGI(IR_LT), trstart, tr0);
688 trstart = emitir(IRTI(IR_ADD), trlen, trstart);
689 start = start+(int32_t)str->len;
690 emitir(start < 0 ? IRTGI(IR_LT) : IRTGI(IR_GE), trstart, tr0);
691 if (start < 0) {
692 trstart = tr0;
693 start = 0;
694 }
695 } else {
696 if (start == 0) {
697 emitir(IRTGI(IR_EQ), trstart, tr0);
698 trstart = tr0;
699 } else {
700 trstart = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, -1));
701 emitir(IRTGI(IR_GE), trstart, tr0);
702 start--;
703 }
704 }
705 if (rd->data) { /* Return string.sub result. */ 824 if (rd->data) { /* Return string.sub result. */
706 if (end - start >= 0) { 825 if (end - start >= 0) {
707 /* Also handle empty range here, to avoid extra traces. */ 826 /* Also handle empty range here, to avoid extra traces. */
708 TRef trptr, trslen = emitir(IRTI(IR_SUB), trend, trstart); 827 TRef trptr, trslen = emitir(IRTI(IR_SUB), trend, trstart);
709 emitir(IRTGI(IR_GE), trslen, tr0); 828 emitir(IRTGI(IR_GE), trslen, tr0);
710 trptr = emitir(IRT(IR_STRREF, IRT_P32), trstr, trstart); 829 trptr = emitir(IRT(IR_STRREF, IRT_PGC), trstr, trstart);
711 J->base[0] = emitir(IRT(IR_SNEW, IRT_STR), trptr, trslen); 830 J->base[0] = emitir(IRT(IR_SNEW, IRT_STR), trptr, trslen);
712 } else { /* Range underflow: return empty string. */ 831 } else { /* Range underflow: return empty string. */
713 emitir(IRTGI(IR_LT), trend, trstart); 832 emitir(IRTGI(IR_LT), trend, trstart);
714 J->base[0] = lj_ir_kstr(J, lj_str_new(J->L, strdata(str), 0)); 833 J->base[0] = lj_ir_kstr(J, &J2G(J)->strempty);
715 } 834 }
716 } else { /* Return string.byte result(s). */ 835 } else { /* Return string.byte result(s). */
717 ptrdiff_t i, len = end - start; 836 ptrdiff_t i, len = end - start;
@@ -723,7 +842,7 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd)
723 rd->nres = len; 842 rd->nres = len;
724 for (i = 0; i < len; i++) { 843 for (i = 0; i < len; i++) {
725 TRef tmp = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, (int32_t)i)); 844 TRef tmp = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, (int32_t)i));
726 tmp = emitir(IRT(IR_STRREF, IRT_P32), trstr, tmp); 845 tmp = emitir(IRT(IR_STRREF, IRT_PGC), trstr, tmp);
727 J->base[i] = emitir(IRT(IR_XLOAD, IRT_U8), tmp, IRXLOAD_READONLY); 846 J->base[i] = emitir(IRT(IR_XLOAD, IRT_U8), tmp, IRXLOAD_READONLY);
728 } 847 }
729 } else { /* Empty range or range underflow: return no results. */ 848 } else { /* Empty range or range underflow: return no results. */
@@ -733,48 +852,203 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd)
733 } 852 }
734} 853}
735 854
736/* -- Table library fast functions ---------------------------------------- */ 855static void LJ_FASTCALL recff_string_char(jit_State *J, RecordFFData *rd)
737
738static void LJ_FASTCALL recff_table_getn(jit_State *J, RecordFFData *rd)
739{ 856{
740 if (tref_istab(J->base[0])) 857 TRef k255 = lj_ir_kint(J, 255);
741 J->base[0] = lj_ir_call(J, IRCALL_lj_tab_len, J->base[0]); 858 BCReg i;
742 /* else: Interpreter will throw. */ 859 for (i = 0; J->base[i] != 0; i++) { /* Convert char values to strings. */
860 TRef tr = lj_opt_narrow_toint(J, J->base[i]);
861 emitir(IRTGI(IR_ULE), tr, k255);
862 J->base[i] = emitir(IRT(IR_TOSTR, IRT_STR), tr, IRTOSTR_CHAR);
863 }
864 if (i > 1) { /* Concatenate the strings, if there's more than one. */
865 TRef hdr = recff_bufhdr(J), tr = hdr;
866 for (i = 0; J->base[i] != 0; i++)
867 tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, J->base[i]);
868 J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
869 } else if (i == 0) {
870 J->base[0] = lj_ir_kstr(J, &J2G(J)->strempty);
871 }
743 UNUSED(rd); 872 UNUSED(rd);
744} 873}
745 874
746static void LJ_FASTCALL recff_table_remove(jit_State *J, RecordFFData *rd) 875static void LJ_FASTCALL recff_string_rep(jit_State *J, RecordFFData *rd)
747{ 876{
748 TRef tab = J->base[0]; 877 TRef str = lj_ir_tostr(J, J->base[0]);
749 rd->nres = 0; 878 TRef rep = lj_opt_narrow_toint(J, J->base[1]);
750 if (tref_istab(tab)) { 879 TRef hdr, tr, str2 = 0;
751 if (tref_isnil(J->base[1])) { /* Simple pop: t[#t] = nil */ 880 if (!tref_isnil(J->base[2])) {
752 TRef trlen = lj_ir_call(J, IRCALL_lj_tab_len, tab); 881 TRef sep = lj_ir_tostr(J, J->base[2]);
753 GCtab *t = tabV(&rd->argv[0]); 882 int32_t vrep = argv2int(J, &rd->argv[1]);
754 MSize len = lj_tab_len(t); 883 emitir(IRTGI(vrep > 1 ? IR_GT : IR_LE), rep, lj_ir_kint(J, 1));
755 emitir(IRTGI(len ? IR_NE : IR_EQ), trlen, lj_ir_kint(J, 0)); 884 if (vrep > 1) {
756 if (len) { 885 TRef hdr2 = recff_bufhdr(J);
757 RecordIndex ix; 886 TRef tr2 = emitir(IRT(IR_BUFPUT, IRT_PGC), hdr2, sep);
758 ix.tab = tab; 887 tr2 = emitir(IRT(IR_BUFPUT, IRT_PGC), tr2, str);
759 ix.key = trlen; 888 str2 = emitir(IRT(IR_BUFSTR, IRT_STR), tr2, hdr2);
760 settabV(J->L, &ix.tabv, t); 889 }
761 setintV(&ix.keyv, len); 890 }
762 ix.idxchain = 0; 891 tr = hdr = recff_bufhdr(J);
763 if (results_wanted(J) != 0) { /* Specialize load only if needed. */ 892 if (str2) {
764 ix.val = 0; 893 tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, str);
765 J->base[0] = lj_record_idx(J, &ix); /* Load previous value. */ 894 str = str2;
766 rd->nres = 1; 895 rep = emitir(IRTI(IR_ADD), rep, lj_ir_kint(J, -1));
767 /* Assumes ix.key/ix.tab is not modified for raw lj_record_idx(). */ 896 }
768 } 897 tr = lj_ir_call(J, IRCALL_lj_buf_putstr_rep, tr, str, rep);
769 ix.val = TREF_NIL; 898 J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
770 lj_record_idx(J, &ix); /* Remove value. */ 899}
900
901static void LJ_FASTCALL recff_string_op(jit_State *J, RecordFFData *rd)
902{
903 TRef str = lj_ir_tostr(J, J->base[0]);
904 TRef hdr = recff_bufhdr(J);
905 TRef tr = lj_ir_call(J, rd->data, hdr, str);
906 J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
907}
908
909static void LJ_FASTCALL recff_string_find(jit_State *J, RecordFFData *rd)
910{
911 TRef trstr = lj_ir_tostr(J, J->base[0]);
912 TRef trpat = lj_ir_tostr(J, J->base[1]);
913 TRef trlen = emitir(IRTI(IR_FLOAD), trstr, IRFL_STR_LEN);
914 TRef tr0 = lj_ir_kint(J, 0);
915 TRef trstart;
916 GCstr *str = argv2str(J, &rd->argv[0]);
917 GCstr *pat = argv2str(J, &rd->argv[1]);
918 int32_t start;
919 J->needsnap = 1;
920 if (tref_isnil(J->base[2])) {
921 trstart = lj_ir_kint(J, 1);
922 start = 1;
923 } else {
924 trstart = lj_opt_narrow_toint(J, J->base[2]);
925 start = argv2int(J, &rd->argv[2]);
926 }
927 trstart = recff_string_start(J, str, &start, trstart, trlen, tr0);
928 if ((MSize)start <= str->len) {
929 emitir(IRTGI(IR_ULE), trstart, trlen);
930 } else {
931 emitir(IRTGI(IR_UGT), trstart, trlen);
932#if LJ_52
933 J->base[0] = TREF_NIL;
934 return;
935#else
936 trstart = trlen;
937 start = str->len;
938#endif
939 }
940 /* Fixed arg or no pattern matching chars? (Specialized to pattern string.) */
941 if ((J->base[2] && tref_istruecond(J->base[3])) ||
942 (emitir(IRTG(IR_EQ, IRT_STR), trpat, lj_ir_kstr(J, pat)),
943 !lj_str_haspattern(pat))) { /* Search for fixed string. */
944 TRef trsptr = emitir(IRT(IR_STRREF, IRT_PGC), trstr, trstart);
945 TRef trpptr = emitir(IRT(IR_STRREF, IRT_PGC), trpat, tr0);
946 TRef trslen = emitir(IRTI(IR_SUB), trlen, trstart);
947 TRef trplen = emitir(IRTI(IR_FLOAD), trpat, IRFL_STR_LEN);
948 TRef tr = lj_ir_call(J, IRCALL_lj_str_find, trsptr, trpptr, trslen, trplen);
949 TRef trp0 = lj_ir_kkptr(J, NULL);
950 if (lj_str_find(strdata(str)+(MSize)start, strdata(pat),
951 str->len-(MSize)start, pat->len)) {
952 TRef pos;
953 emitir(IRTG(IR_NE, IRT_PGC), tr, trp0);
954 /* Recompute offset. trsptr may not point into trstr after folding. */
955 pos = emitir(IRTI(IR_ADD), emitir(IRTI(IR_SUB), tr, trsptr), trstart);
956 J->base[0] = emitir(IRTI(IR_ADD), pos, lj_ir_kint(J, 1));
957 J->base[1] = emitir(IRTI(IR_ADD), pos, trplen);
958 rd->nres = 2;
959 } else {
960 emitir(IRTG(IR_EQ, IRT_PGC), tr, trp0);
961 J->base[0] = TREF_NIL;
962 }
963 } else { /* Search for pattern. */
964 recff_nyiu(J, rd);
965 return;
966 }
967}
968
969static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd)
970{
971 TRef trfmt = lj_ir_tostr(J, J->base[0]);
972 GCstr *fmt = argv2str(J, &rd->argv[0]);
973 int arg = 1;
974 TRef hdr, tr;
975 FormatState fs;
976 SFormat sf;
977 /* Specialize to the format string. */
978 emitir(IRTG(IR_EQ, IRT_STR), trfmt, lj_ir_kstr(J, fmt));
979 tr = hdr = recff_bufhdr(J);
980 lj_strfmt_init(&fs, strdata(fmt), fmt->len);
981 while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) { /* Parse format. */
982 TRef tra = sf == STRFMT_LIT ? 0 : J->base[arg++];
983 TRef trsf = lj_ir_kint(J, (int32_t)sf);
984 IRCallID id;
985 switch (STRFMT_TYPE(sf)) {
986 case STRFMT_LIT:
987 tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr,
988 lj_ir_kstr(J, lj_str_new(J->L, fs.str, fs.len)));
989 break;
990 case STRFMT_INT:
991 id = IRCALL_lj_strfmt_putfnum_int;
992 handle_int:
993 if (!tref_isinteger(tra))
994 goto handle_num;
995 if (sf == STRFMT_INT) { /* Shortcut for plain %d. */
996 tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr,
997 emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_INT));
998 } else {
999#if LJ_HASFFI
1000 tra = emitir(IRT(IR_CONV, IRT_U64), tra,
1001 (IRT_INT|(IRT_U64<<5)|IRCONV_SEXT));
1002 tr = lj_ir_call(J, IRCALL_lj_strfmt_putfxint, tr, trsf, tra);
1003 lj_needsplit(J);
1004#else
1005 recff_nyiu(J, rd); /* Don't bother working around this NYI. */
1006 return;
1007#endif
1008 }
1009 break;
1010 case STRFMT_UINT:
1011 id = IRCALL_lj_strfmt_putfnum_uint;
1012 goto handle_int;
1013 case STRFMT_NUM:
1014 id = IRCALL_lj_strfmt_putfnum;
1015 handle_num:
1016 tra = lj_ir_tonum(J, tra);
1017 tr = lj_ir_call(J, id, tr, trsf, tra);
1018 if (LJ_SOFTFP32) lj_needsplit(J);
1019 break;
1020 case STRFMT_STR:
1021 if (!tref_isstr(tra)) {
1022 recff_nyiu(J, rd); /* NYI: __tostring and non-string types for %s. */
1023 return;
771 } 1024 }
772 } else { /* Complex case: remove in the middle. */ 1025 if (sf == STRFMT_STR) /* Shortcut for plain %s. */
773 recff_nyiu(J); 1026 tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, tra);
1027 else if ((sf & STRFMT_T_QUOTED))
1028 tr = lj_ir_call(J, IRCALL_lj_strfmt_putquoted, tr, tra);
1029 else
1030 tr = lj_ir_call(J, IRCALL_lj_strfmt_putfstr, tr, trsf, tra);
1031 break;
1032 case STRFMT_CHAR:
1033 tra = lj_opt_narrow_toint(J, tra);
1034 if (sf == STRFMT_CHAR) /* Shortcut for plain %c. */
1035 tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr,
1036 emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_CHAR));
1037 else
1038 tr = lj_ir_call(J, IRCALL_lj_strfmt_putfchar, tr, trsf, tra);
1039 break;
1040 case STRFMT_PTR: /* NYI */
1041 case STRFMT_ERR:
1042 default:
1043 recff_nyiu(J, rd);
1044 return;
774 } 1045 }
775 } /* else: Interpreter will throw. */ 1046 }
1047 J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
776} 1048}
777 1049
1050/* -- Table library fast functions ---------------------------------------- */
1051
778static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd) 1052static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd)
779{ 1053{
780 RecordIndex ix; 1054 RecordIndex ix;
@@ -791,11 +1065,49 @@ static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd)
791 ix.idxchain = 0; 1065 ix.idxchain = 0;
792 lj_record_idx(J, &ix); /* Set new value. */ 1066 lj_record_idx(J, &ix); /* Set new value. */
793 } else { /* Complex case: insert in the middle. */ 1067 } else { /* Complex case: insert in the middle. */
794 recff_nyiu(J); 1068 recff_nyiu(J, rd);
1069 return;
795 } 1070 }
796 } /* else: Interpreter will throw. */ 1071 } /* else: Interpreter will throw. */
797} 1072}
798 1073
1074static void LJ_FASTCALL recff_table_concat(jit_State *J, RecordFFData *rd)
1075{
1076 TRef tab = J->base[0];
1077 if (tref_istab(tab)) {
1078 TRef sep = !tref_isnil(J->base[1]) ?
1079 lj_ir_tostr(J, J->base[1]) : lj_ir_knull(J, IRT_STR);
1080 TRef tri = (J->base[1] && !tref_isnil(J->base[2])) ?
1081 lj_opt_narrow_toint(J, J->base[2]) : lj_ir_kint(J, 1);
1082 TRef tre = (J->base[1] && J->base[2] && !tref_isnil(J->base[3])) ?
1083 lj_opt_narrow_toint(J, J->base[3]) :
1084 lj_ir_call(J, IRCALL_lj_tab_len, tab);
1085 TRef hdr = recff_bufhdr(J);
1086 TRef tr = lj_ir_call(J, IRCALL_lj_buf_puttab, hdr, tab, sep, tri, tre);
1087 emitir(IRTG(IR_NE, IRT_PTR), tr, lj_ir_kptr(J, NULL));
1088 J->base[0] = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
1089 } /* else: Interpreter will throw. */
1090 UNUSED(rd);
1091}
1092
1093static void LJ_FASTCALL recff_table_new(jit_State *J, RecordFFData *rd)
1094{
1095 TRef tra = lj_opt_narrow_toint(J, J->base[0]);
1096 TRef trh = lj_opt_narrow_toint(J, J->base[1]);
1097 J->base[0] = lj_ir_call(J, IRCALL_lj_tab_new_ah, tra, trh);
1098 UNUSED(rd);
1099}
1100
1101static void LJ_FASTCALL recff_table_clear(jit_State *J, RecordFFData *rd)
1102{
1103 TRef tr = J->base[0];
1104 if (tref_istab(tr)) {
1105 rd->nres = 0;
1106 lj_ir_call(J, IRCALL_lj_tab_clear, tr);
1107 J->needsnap = 1;
1108 } /* else: Interpreter will throw. */
1109}
1110
799/* -- I/O library fast functions ------------------------------------------ */ 1111/* -- I/O library fast functions ------------------------------------------ */
800 1112
801/* Get FILE* for I/O function. Any I/O error aborts recording, so there's 1113/* Get FILE* for I/O function. Any I/O error aborts recording, so there's
@@ -805,8 +1117,13 @@ static TRef recff_io_fp(jit_State *J, TRef *udp, int32_t id)
805{ 1117{
806 TRef tr, ud, fp; 1118 TRef tr, ud, fp;
807 if (id) { /* io.func() */ 1119 if (id) { /* io.func() */
1120#if LJ_GC64
1121 /* TODO: fix ARM32 asm_fload(), so we can use this for all archs. */
1122 ud = lj_ir_ggfload(J, IRT_UDATA, GG_OFS(g.gcroot[id]));
1123#else
808 tr = lj_ir_kptr(J, &J2G(J)->gcroot[id]); 1124 tr = lj_ir_kptr(J, &J2G(J)->gcroot[id]);
809 ud = emitir(IRT(IR_XLOAD, IRT_UDATA), tr, 0); 1125 ud = emitir(IRT(IR_XLOAD, IRT_UDATA), tr, 0);
1126#endif
810 } else { /* fp:method() */ 1127 } else { /* fp:method() */
811 ud = J->base[0]; 1128 ud = J->base[0];
812 if (!tref_isudata(ud)) 1129 if (!tref_isudata(ud))
@@ -828,10 +1145,13 @@ static void LJ_FASTCALL recff_io_write(jit_State *J, RecordFFData *rd)
828 ptrdiff_t i = rd->data == 0 ? 1 : 0; 1145 ptrdiff_t i = rd->data == 0 ? 1 : 0;
829 for (; J->base[i]; i++) { 1146 for (; J->base[i]; i++) {
830 TRef str = lj_ir_tostr(J, J->base[i]); 1147 TRef str = lj_ir_tostr(J, J->base[i]);
831 TRef buf = emitir(IRT(IR_STRREF, IRT_P32), str, zero); 1148 TRef buf = emitir(IRT(IR_STRREF, IRT_PGC), str, zero);
832 TRef len = emitir(IRTI(IR_FLOAD), str, IRFL_STR_LEN); 1149 TRef len = emitir(IRTI(IR_FLOAD), str, IRFL_STR_LEN);
833 if (tref_isk(len) && IR(tref_ref(len))->i == 1) { 1150 if (tref_isk(len) && IR(tref_ref(len))->i == 1) {
834 TRef tr = emitir(IRT(IR_XLOAD, IRT_U8), buf, IRXLOAD_READONLY); 1151 IRIns *irs = IR(tref_ref(str));
1152 TRef tr = (irs->o == IR_TOSTR && irs->op2 == IRTOSTR_CHAR) ?
1153 irs->op1 :
1154 emitir(IRT(IR_XLOAD, IRT_U8), buf, IRXLOAD_READONLY);
835 tr = lj_ir_call(J, IRCALL_fputc, tr, fp); 1155 tr = lj_ir_call(J, IRCALL_fputc, tr, fp);
836 if (results_wanted(J) != 0) /* Check result only if not ignored. */ 1156 if (results_wanted(J) != 0) /* Check result only if not ignored. */
837 emitir(IRTGI(IR_NE), tr, lj_ir_kint(J, -1)); 1157 emitir(IRTGI(IR_NE), tr, lj_ir_kint(J, -1));
@@ -853,6 +1173,28 @@ static void LJ_FASTCALL recff_io_flush(jit_State *J, RecordFFData *rd)
853 J->base[0] = TREF_TRUE; 1173 J->base[0] = TREF_TRUE;
854} 1174}
855 1175
1176/* -- Debug library fast functions ---------------------------------------- */
1177
1178static void LJ_FASTCALL recff_debug_getmetatable(jit_State *J, RecordFFData *rd)
1179{
1180 GCtab *mt;
1181 TRef mtref;
1182 TRef tr = J->base[0];
1183 if (tref_istab(tr)) {
1184 mt = tabref(tabV(&rd->argv[0])->metatable);
1185 mtref = emitir(IRT(IR_FLOAD, IRT_TAB), tr, IRFL_TAB_META);
1186 } else if (tref_isudata(tr)) {
1187 mt = tabref(udataV(&rd->argv[0])->metatable);
1188 mtref = emitir(IRT(IR_FLOAD, IRT_TAB), tr, IRFL_UDATA_META);
1189 } else {
1190 mt = tabref(basemt_obj(J2G(J), &rd->argv[0]));
1191 J->base[0] = mt ? lj_ir_ktab(J, mt) : TREF_NIL;
1192 return;
1193 }
1194 emitir(IRTG(mt ? IR_NE : IR_EQ, IRT_TAB), mtref, lj_ir_knull(J, IRT_TAB));
1195 J->base[0] = mt ? mtref : TREF_NIL;
1196}
1197
856/* -- Record calls to fast functions -------------------------------------- */ 1198/* -- Record calls to fast functions -------------------------------------- */
857 1199
858#include "lj_recdef.h" 1200#include "lj_recdef.h"
diff --git a/src/lj_frame.h b/src/lj_frame.h
index e78a28a5..599a2d1c 100644
--- a/src/lj_frame.h
+++ b/src/lj_frame.h
@@ -11,7 +11,16 @@
11 11
12/* -- Lua stack frame ----------------------------------------------------- */ 12/* -- Lua stack frame ----------------------------------------------------- */
13 13
14/* Frame type markers in callee function slot (callee base-1). */ 14/* Frame type markers in LSB of PC (4-byte aligned) or delta (8-byte aligned:
15**
16** PC 00 Lua frame
17** delta 001 C frame
18** delta 010 Continuation frame
19** delta 011 Lua vararg frame
20** delta 101 cpcall() frame
21** delta 110 ff pcall() frame
22** delta 111 ff pcall() frame with active hook
23*/
15enum { 24enum {
16 FRAME_LUA, FRAME_C, FRAME_CONT, FRAME_VARG, 25 FRAME_LUA, FRAME_C, FRAME_CONT, FRAME_VARG,
17 FRAME_LUAP, FRAME_CP, FRAME_PCALL, FRAME_PCALLH 26 FRAME_LUAP, FRAME_CP, FRAME_PCALL, FRAME_PCALLH
@@ -21,9 +30,47 @@ enum {
21#define FRAME_TYPEP (FRAME_TYPE|FRAME_P) 30#define FRAME_TYPEP (FRAME_TYPE|FRAME_P)
22 31
23/* Macros to access and modify Lua frames. */ 32/* Macros to access and modify Lua frames. */
33#if LJ_FR2
34/* Two-slot frame info, required for 64 bit PC/GCRef:
35**
36** base-2 base-1 | base base+1 ...
37** [func PC/delta/ft] | [slots ...]
38** ^-- frame | ^-- base ^-- top
39**
40** Continuation frames:
41**
42** base-4 base-3 base-2 base-1 | base base+1 ...
43** [cont PC ] [func PC/delta/ft] | [slots ...]
44** ^-- frame | ^-- base ^-- top
45*/
46#define frame_gc(f) (gcval((f)-1))
47#define frame_ftsz(f) ((ptrdiff_t)(f)->ftsz)
48#define frame_pc(f) ((const BCIns *)frame_ftsz(f))
49#define setframe_gc(f, p, tp) (setgcVraw((f)-1, (p), (tp)))
50#define setframe_ftsz(f, sz) ((f)->ftsz = (sz))
51#define setframe_pc(f, pc) ((f)->ftsz = (int64_t)(intptr_t)(pc))
52#else
53/* One-slot frame info, sufficient for 32 bit PC/GCRef:
54**
55** base-1 | base base+1 ...
56** lo hi |
57** [func | PC/delta/ft] | [slots ...]
58** ^-- frame | ^-- base ^-- top
59**
60** Continuation frames:
61**
62** base-2 base-1 | base base+1 ...
63** lo hi lo hi |
64** [cont | PC] [func | PC/delta/ft] | [slots ...]
65** ^-- frame | ^-- base ^-- top
66*/
24#define frame_gc(f) (gcref((f)->fr.func)) 67#define frame_gc(f) (gcref((f)->fr.func))
25#define frame_func(f) (&frame_gc(f)->fn) 68#define frame_ftsz(f) ((ptrdiff_t)(f)->fr.tp.ftsz)
26#define frame_ftsz(f) ((f)->fr.tp.ftsz) 69#define frame_pc(f) (mref((f)->fr.tp.pcr, const BCIns))
70#define setframe_gc(f, p, tp) (setgcref((f)->fr.func, (p)), UNUSED(tp))
71#define setframe_ftsz(f, sz) ((f)->fr.tp.ftsz = (int32_t)(sz))
72#define setframe_pc(f, pc) (setmref((f)->fr.tp.pcr, (pc)))
73#endif
27 74
28#define frame_type(f) (frame_ftsz(f) & FRAME_TYPE) 75#define frame_type(f) (frame_ftsz(f) & FRAME_TYPE)
29#define frame_typep(f) (frame_ftsz(f) & FRAME_TYPEP) 76#define frame_typep(f) (frame_ftsz(f) & FRAME_TYPEP)
@@ -33,33 +80,53 @@ enum {
33#define frame_isvarg(f) (frame_typep(f) == FRAME_VARG) 80#define frame_isvarg(f) (frame_typep(f) == FRAME_VARG)
34#define frame_ispcall(f) ((frame_ftsz(f) & 6) == FRAME_PCALL) 81#define frame_ispcall(f) ((frame_ftsz(f) & 6) == FRAME_PCALL)
35 82
36#define frame_pc(f) (mref((f)->fr.tp.pcr, const BCIns)) 83#define frame_func(f) (&frame_gc(f)->fn)
84#define frame_delta(f) (frame_ftsz(f) >> 3)
85#define frame_sized(f) (frame_ftsz(f) & ~FRAME_TYPEP)
86
87enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
88
89#if LJ_FR2
90#define frame_contpc(f) (frame_pc((f)-2))
91#define frame_contv(f) (((f)-3)->u64)
92#else
37#define frame_contpc(f) (frame_pc((f)-1)) 93#define frame_contpc(f) (frame_pc((f)-1))
38#if LJ_64 94#define frame_contv(f) (((f)-1)->u32.lo)
95#endif
96#if LJ_FR2
97#define frame_contf(f) ((ASMFunction)(uintptr_t)((f)-3)->u64)
98#elif LJ_64
39#define frame_contf(f) \ 99#define frame_contf(f) \
40 ((ASMFunction)(void *)((intptr_t)lj_vm_asm_begin + \ 100 ((ASMFunction)(void *)((intptr_t)lj_vm_asm_begin + \
41 (intptr_t)(int32_t)((f)-1)->u32.lo)) 101 (intptr_t)(int32_t)((f)-1)->u32.lo))
42#else 102#else
43#define frame_contf(f) ((ASMFunction)gcrefp(((f)-1)->gcr, void)) 103#define frame_contf(f) ((ASMFunction)gcrefp(((f)-1)->gcr, void))
44#endif 104#endif
45#define frame_delta(f) (frame_ftsz(f) >> 3) 105#define frame_iscont_fficb(f) \
46#define frame_sized(f) (frame_ftsz(f) & ~FRAME_TYPEP) 106 (LJ_HASFFI && frame_contv(f) == LJ_CONT_FFI_CALLBACK)
47 107
48#define frame_prevl(f) ((f) - (1+bc_a(frame_pc(f)[-1]))) 108#define frame_prevl(f) ((f) - (1+LJ_FR2+bc_a(frame_pc(f)[-1])))
49#define frame_prevd(f) ((TValue *)((char *)(f) - frame_sized(f))) 109#define frame_prevd(f) ((TValue *)((char *)(f) - frame_sized(f)))
50#define frame_prev(f) (frame_islua(f)?frame_prevl(f):frame_prevd(f)) 110#define frame_prev(f) (frame_islua(f)?frame_prevl(f):frame_prevd(f))
51/* Note: this macro does not skip over FRAME_VARG. */ 111/* Note: this macro does not skip over FRAME_VARG. */
52 112
53#define setframe_pc(f, pc) (setmref((f)->fr.tp.pcr, (pc)))
54#define setframe_ftsz(f, sz) ((f)->fr.tp.ftsz = (sz))
55#define setframe_gc(f, p) (setgcref((f)->fr.func, (p)))
56
57/* -- C stack frame ------------------------------------------------------- */ 113/* -- C stack frame ------------------------------------------------------- */
58 114
59/* Macros to access and modify the C stack frame chain. */ 115/* Macros to access and modify the C stack frame chain. */
60 116
61/* These definitions must match with the arch-specific *.dasc files. */ 117/* These definitions must match with the arch-specific *.dasc files. */
62#if LJ_TARGET_X86 118#if LJ_TARGET_X86
119#if LJ_ABI_WIN
120#define CFRAME_OFS_ERRF (19*4)
121#define CFRAME_OFS_NRES (18*4)
122#define CFRAME_OFS_PREV (17*4)
123#define CFRAME_OFS_L (16*4)
124#define CFRAME_OFS_SEH (9*4)
125#define CFRAME_OFS_PC (6*4)
126#define CFRAME_OFS_MULTRES (5*4)
127#define CFRAME_SIZE (16*4)
128#define CFRAME_SHIFT_MULTRES 0
129#else
63#define CFRAME_OFS_ERRF (15*4) 130#define CFRAME_OFS_ERRF (15*4)
64#define CFRAME_OFS_NRES (14*4) 131#define CFRAME_OFS_NRES (14*4)
65#define CFRAME_OFS_PREV (13*4) 132#define CFRAME_OFS_PREV (13*4)
@@ -68,24 +135,41 @@ enum {
68#define CFRAME_OFS_MULTRES (5*4) 135#define CFRAME_OFS_MULTRES (5*4)
69#define CFRAME_SIZE (12*4) 136#define CFRAME_SIZE (12*4)
70#define CFRAME_SHIFT_MULTRES 0 137#define CFRAME_SHIFT_MULTRES 0
138#endif
71#elif LJ_TARGET_X64 139#elif LJ_TARGET_X64
72#if LJ_ABI_WIN 140#if LJ_ABI_WIN
73#define CFRAME_OFS_PREV (13*8) 141#define CFRAME_OFS_PREV (13*8)
142#if LJ_GC64
143#define CFRAME_OFS_PC (12*8)
144#define CFRAME_OFS_L (11*8)
145#define CFRAME_OFS_ERRF (21*4)
146#define CFRAME_OFS_NRES (20*4)
147#define CFRAME_OFS_MULTRES (8*4)
148#else
74#define CFRAME_OFS_PC (25*4) 149#define CFRAME_OFS_PC (25*4)
75#define CFRAME_OFS_L (24*4) 150#define CFRAME_OFS_L (24*4)
76#define CFRAME_OFS_ERRF (23*4) 151#define CFRAME_OFS_ERRF (23*4)
77#define CFRAME_OFS_NRES (22*4) 152#define CFRAME_OFS_NRES (22*4)
78#define CFRAME_OFS_MULTRES (21*4) 153#define CFRAME_OFS_MULTRES (21*4)
154#endif
79#define CFRAME_SIZE (10*8) 155#define CFRAME_SIZE (10*8)
80#define CFRAME_SIZE_JIT (CFRAME_SIZE + 9*16 + 4*8) 156#define CFRAME_SIZE_JIT (CFRAME_SIZE + 9*16 + 4*8)
81#define CFRAME_SHIFT_MULTRES 0 157#define CFRAME_SHIFT_MULTRES 0
82#else 158#else
83#define CFRAME_OFS_PREV (4*8) 159#define CFRAME_OFS_PREV (4*8)
160#if LJ_GC64
161#define CFRAME_OFS_PC (3*8)
162#define CFRAME_OFS_L (2*8)
163#define CFRAME_OFS_ERRF (3*4)
164#define CFRAME_OFS_NRES (2*4)
165#define CFRAME_OFS_MULTRES (0*4)
166#else
84#define CFRAME_OFS_PC (7*4) 167#define CFRAME_OFS_PC (7*4)
85#define CFRAME_OFS_L (6*4) 168#define CFRAME_OFS_L (6*4)
86#define CFRAME_OFS_ERRF (5*4) 169#define CFRAME_OFS_ERRF (5*4)
87#define CFRAME_OFS_NRES (4*4) 170#define CFRAME_OFS_NRES (4*4)
88#define CFRAME_OFS_MULTRES (1*4) 171#define CFRAME_OFS_MULTRES (1*4)
172#endif
89#if LJ_NO_UNWIND 173#if LJ_NO_UNWIND
90#define CFRAME_SIZE (12*8) 174#define CFRAME_SIZE (12*8)
91#else 175#else
@@ -107,6 +191,15 @@ enum {
107#define CFRAME_SIZE 64 191#define CFRAME_SIZE 64
108#endif 192#endif
109#define CFRAME_SHIFT_MULTRES 3 193#define CFRAME_SHIFT_MULTRES 3
194#elif LJ_TARGET_ARM64
195#define CFRAME_OFS_ERRF 196
196#define CFRAME_OFS_NRES 200
197#define CFRAME_OFS_PREV 160
198#define CFRAME_OFS_L 176
199#define CFRAME_OFS_PC 168
200#define CFRAME_OFS_MULTRES 192
201#define CFRAME_SIZE 208
202#define CFRAME_SHIFT_MULTRES 3
110#elif LJ_TARGET_PPC 203#elif LJ_TARGET_PPC
111#if LJ_TARGET_XBOX360 204#if LJ_TARGET_XBOX360
112#define CFRAME_OFS_ERRF 424 205#define CFRAME_OFS_ERRF 424
@@ -117,7 +210,7 @@ enum {
117#define CFRAME_OFS_MULTRES 408 210#define CFRAME_OFS_MULTRES 408
118#define CFRAME_SIZE 384 211#define CFRAME_SIZE 384
119#define CFRAME_SHIFT_MULTRES 3 212#define CFRAME_SHIFT_MULTRES 3
120#elif LJ_ARCH_PPC64 213#elif LJ_ARCH_PPC32ON64
121#define CFRAME_OFS_ERRF 472 214#define CFRAME_OFS_ERRF 472
122#define CFRAME_OFS_NRES 468 215#define CFRAME_OFS_NRES 468
123#define CFRAME_OFS_PREV 448 216#define CFRAME_OFS_PREV 448
@@ -133,26 +226,43 @@ enum {
133#define CFRAME_OFS_L 36 226#define CFRAME_OFS_L 36
134#define CFRAME_OFS_PC 32 227#define CFRAME_OFS_PC 32
135#define CFRAME_OFS_MULTRES 28 228#define CFRAME_OFS_MULTRES 28
136#define CFRAME_SIZE 272 229#define CFRAME_SIZE (LJ_ARCH_HASFPU ? 272 : 128)
137#define CFRAME_SHIFT_MULTRES 3 230#define CFRAME_SHIFT_MULTRES 3
138#endif 231#endif
139#elif LJ_TARGET_PPCSPE 232#elif LJ_TARGET_MIPS32
140#define CFRAME_OFS_ERRF 28 233#if LJ_ARCH_HASFPU
141#define CFRAME_OFS_NRES 24
142#define CFRAME_OFS_PREV 20
143#define CFRAME_OFS_L 16
144#define CFRAME_OFS_PC 12
145#define CFRAME_OFS_MULTRES 8
146#define CFRAME_SIZE 184
147#define CFRAME_SHIFT_MULTRES 3
148#elif LJ_TARGET_MIPS
149#define CFRAME_OFS_ERRF 124 234#define CFRAME_OFS_ERRF 124
150#define CFRAME_OFS_NRES 120 235#define CFRAME_OFS_NRES 120
151#define CFRAME_OFS_PREV 116 236#define CFRAME_OFS_PREV 116
152#define CFRAME_OFS_L 112 237#define CFRAME_OFS_L 112
238#define CFRAME_SIZE 112
239#else
240#define CFRAME_OFS_ERRF 76
241#define CFRAME_OFS_NRES 72
242#define CFRAME_OFS_PREV 68
243#define CFRAME_OFS_L 64
244#define CFRAME_SIZE 64
245#endif
153#define CFRAME_OFS_PC 20 246#define CFRAME_OFS_PC 20
154#define CFRAME_OFS_MULTRES 16 247#define CFRAME_OFS_MULTRES 16
155#define CFRAME_SIZE 112 248#define CFRAME_SHIFT_MULTRES 3
249#elif LJ_TARGET_MIPS64
250#if LJ_ARCH_HASFPU
251#define CFRAME_OFS_ERRF 188
252#define CFRAME_OFS_NRES 184
253#define CFRAME_OFS_PREV 176
254#define CFRAME_OFS_L 168
255#define CFRAME_OFS_PC 160
256#define CFRAME_SIZE 192
257#else
258#define CFRAME_OFS_ERRF 124
259#define CFRAME_OFS_NRES 120
260#define CFRAME_OFS_PREV 112
261#define CFRAME_OFS_L 104
262#define CFRAME_OFS_PC 96
263#define CFRAME_SIZE 128
264#endif
265#define CFRAME_OFS_MULTRES 0
156#define CFRAME_SHIFT_MULTRES 3 266#define CFRAME_SHIFT_MULTRES 3
157#else 267#else
158#error "Missing CFRAME_* definitions for this architecture" 268#error "Missing CFRAME_* definitions for this architecture"
diff --git a/src/lj_gc.c b/src/lj_gc.c
index 86fcd6eb..81439aab 100644
--- a/src/lj_gc.c
+++ b/src/lj_gc.c
@@ -12,6 +12,7 @@
12#include "lj_obj.h" 12#include "lj_obj.h"
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_buf.h"
15#include "lj_str.h" 16#include "lj_str.h"
16#include "lj_tab.h" 17#include "lj_tab.h"
17#include "lj_func.h" 18#include "lj_func.h"
@@ -24,6 +25,7 @@
24#include "lj_cdata.h" 25#include "lj_cdata.h"
25#endif 26#endif
26#include "lj_trace.h" 27#include "lj_trace.h"
28#include "lj_dispatch.h"
27#include "lj_vm.h" 29#include "lj_vm.h"
28 30
29#define GCSTEPSIZE 1024u 31#define GCSTEPSIZE 1024u
@@ -68,7 +70,7 @@ static void gc_mark(global_State *g, GCobj *o)
68 gray2black(o); /* Closed upvalues are never gray. */ 70 gray2black(o); /* Closed upvalues are never gray. */
69 } else if (gct != ~LJ_TSTR && gct != ~LJ_TCDATA) { 71 } else if (gct != ~LJ_TSTR && gct != ~LJ_TCDATA) {
70 lua_assert(gct == ~LJ_TFUNC || gct == ~LJ_TTAB || 72 lua_assert(gct == ~LJ_TFUNC || gct == ~LJ_TTAB ||
71 gct == ~LJ_TTHREAD || gct == ~LJ_TPROTO); 73 gct == ~LJ_TTHREAD || gct == ~LJ_TPROTO || gct == ~LJ_TTRACE);
72 setgcrefr(o->gch.gclist, g->gc.gray); 74 setgcrefr(o->gch.gclist, g->gc.gray);
73 setgcref(g->gc.gray, o); 75 setgcref(g->gc.gray, o);
74 } 76 }
@@ -244,6 +246,8 @@ static void gc_traverse_trace(global_State *g, GCtrace *T)
244 IRIns *ir = &T->ir[ref]; 246 IRIns *ir = &T->ir[ref];
245 if (ir->o == IR_KGC) 247 if (ir->o == IR_KGC)
246 gc_markobj(g, ir_kgc(ir)); 248 gc_markobj(g, ir_kgc(ir));
249 if (irt_is64(ir->t) && ir->o != IR_KNULL)
250 ref++;
247 } 251 }
248 if (T->link) gc_marktrace(g, T->link); 252 if (T->link) gc_marktrace(g, T->link);
249 if (T->nextroot) gc_marktrace(g, T->nextroot); 253 if (T->nextroot) gc_marktrace(g, T->nextroot);
@@ -274,12 +278,12 @@ static MSize gc_traverse_frames(global_State *g, lua_State *th)
274{ 278{
275 TValue *frame, *top = th->top-1, *bot = tvref(th->stack); 279 TValue *frame, *top = th->top-1, *bot = tvref(th->stack);
276 /* Note: extra vararg frame not skipped, marks function twice (harmless). */ 280 /* Note: extra vararg frame not skipped, marks function twice (harmless). */
277 for (frame = th->base-1; frame > bot; frame = frame_prev(frame)) { 281 for (frame = th->base-1; frame > bot+LJ_FR2; frame = frame_prev(frame)) {
278 GCfunc *fn = frame_func(frame); 282 GCfunc *fn = frame_func(frame);
279 TValue *ftop = frame; 283 TValue *ftop = frame;
280 if (isluafunc(fn)) ftop += funcproto(fn)->framesize; 284 if (isluafunc(fn)) ftop += funcproto(fn)->framesize;
281 if (ftop > top) top = ftop; 285 if (ftop > top) top = ftop;
282 gc_markobj(g, fn); /* Need to mark hidden function (or L). */ 286 if (!LJ_FR2) gc_markobj(g, fn); /* Need to mark hidden function (or L). */
283 } 287 }
284 top++; /* Correct bias of -1 (frame == base-1). */ 288 top++; /* Correct bias of -1 (frame == base-1). */
285 if (top > tvref(th->maxstack)) top = tvref(th->maxstack); 289 if (top > tvref(th->maxstack)) top = tvref(th->maxstack);
@@ -290,7 +294,7 @@ static MSize gc_traverse_frames(global_State *g, lua_State *th)
290static void gc_traverse_thread(global_State *g, lua_State *th) 294static void gc_traverse_thread(global_State *g, lua_State *th)
291{ 295{
292 TValue *o, *top = th->top; 296 TValue *o, *top = th->top;
293 for (o = tvref(th->stack)+1; o < top; o++) 297 for (o = tvref(th->stack)+1+LJ_FR2; o < top; o++)
294 gc_marktv(g, o); 298 gc_marktv(g, o);
295 if (g->gc.state == GCSatomic) { 299 if (g->gc.state == GCSatomic) {
296 top = tvref(th->stack) + th->stacksize; 300 top = tvref(th->stack) + th->stacksize;
@@ -355,15 +359,6 @@ static size_t gc_propagate_gray(global_State *g)
355 359
356/* -- Sweep phase --------------------------------------------------------- */ 360/* -- Sweep phase --------------------------------------------------------- */
357 361
358/* Try to shrink some common data structures. */
359static void gc_shrink(global_State *g, lua_State *L)
360{
361 if (g->strnum <= (g->strmask >> 2) && g->strmask > LJ_MIN_STRTAB*2-1)
362 lj_str_resize(L, g->strmask >> 1); /* Shrink string table. */
363 if (g->tmpbuf.sz > LJ_MIN_SBUF*2)
364 lj_str_resizebuf(L, &g->tmpbuf, g->tmpbuf.sz >> 1); /* Shrink temp buf. */
365}
366
367/* Type of GC free functions. */ 362/* Type of GC free functions. */
368typedef void (LJ_FASTCALL *GCFreeFunc)(global_State *g, GCobj *o); 363typedef void (LJ_FASTCALL *GCFreeFunc)(global_State *g, GCobj *o);
369 364
@@ -389,7 +384,7 @@ static const GCFreeFunc gc_freefunc[] = {
389}; 384};
390 385
391/* Full sweep of a GC list. */ 386/* Full sweep of a GC list. */
392#define gc_fullsweep(g, p) gc_sweep(g, (p), LJ_MAX_MEM) 387#define gc_fullsweep(g, p) gc_sweep(g, (p), ~(uint32_t)0)
393 388
394/* Partial sweep of a GC list. */ 389/* Partial sweep of a GC list. */
395static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim) 390static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim)
@@ -467,18 +462,21 @@ static void gc_call_finalizer(global_State *g, lua_State *L,
467{ 462{
468 /* Save and restore lots of state around the __gc callback. */ 463 /* Save and restore lots of state around the __gc callback. */
469 uint8_t oldh = hook_save(g); 464 uint8_t oldh = hook_save(g);
470 MSize oldt = g->gc.threshold; 465 GCSize oldt = g->gc.threshold;
471 int errcode; 466 int errcode;
472 TValue *top; 467 TValue *top;
473 lj_trace_abort(g); 468 lj_trace_abort(g);
474 top = L->top;
475 L->top = top+2;
476 hook_entergc(g); /* Disable hooks and new traces during __gc. */ 469 hook_entergc(g); /* Disable hooks and new traces during __gc. */
470 if (LJ_HASPROFILE && (oldh & HOOK_PROFILE)) lj_dispatch_update(g);
477 g->gc.threshold = LJ_MAX_MEM; /* Prevent GC steps. */ 471 g->gc.threshold = LJ_MAX_MEM; /* Prevent GC steps. */
478 copyTV(L, top, mo); 472 top = L->top;
479 setgcV(L, top+1, o, ~o->gch.gct); 473 copyTV(L, top++, mo);
480 errcode = lj_vm_pcall(L, top+1, 1+0, -1); /* Stack: |mo|o| -> | */ 474 if (LJ_FR2) setnilV(top++);
475 setgcV(L, top, o, ~o->gch.gct);
476 L->top = top+1;
477 errcode = lj_vm_pcall(L, top, 1+0, -1); /* Stack: |mo|o| -> | */
481 hook_restore(g, oldh); 478 hook_restore(g, oldh);
479 if (LJ_HASPROFILE && (oldh & HOOK_PROFILE)) lj_dispatch_update(g);
482 g->gc.threshold = oldt; /* Restore GC threshold. */ 480 g->gc.threshold = oldt; /* Restore GC threshold. */
483 if (errcode) 481 if (errcode)
484 lj_err_throw(L, errcode); /* Propagate errors. */ 482 lj_err_throw(L, errcode); /* Propagate errors. */
@@ -490,7 +488,7 @@ static void gc_finalize(lua_State *L)
490 global_State *g = G(L); 488 global_State *g = G(L);
491 GCobj *o = gcnext(gcref(g->gc.mmudata)); 489 GCobj *o = gcnext(gcref(g->gc.mmudata));
492 cTValue *mo; 490 cTValue *mo;
493 lua_assert(gcref(g->jit_L) == NULL); /* Must not be called on trace. */ 491 lua_assert(tvref(g->jit_base) == NULL); /* Must not be called on trace. */
494 /* Unchain from list of userdata to be finalized. */ 492 /* Unchain from list of userdata to be finalized. */
495 if (o == gcref(g->gc.mmudata)) 493 if (o == gcref(g->gc.mmudata))
496 setgcrefnull(g->gc.mmudata); 494 setgcrefnull(g->gc.mmudata);
@@ -599,11 +597,13 @@ static void atomic(global_State *g, lua_State *L)
599 /* All marking done, clear weak tables. */ 597 /* All marking done, clear weak tables. */
600 gc_clearweak(gcref(g->gc.weak)); 598 gc_clearweak(gcref(g->gc.weak));
601 599
600 lj_buf_shrink(L, &g->tmpbuf); /* Shrink temp buffer. */
601
602 /* Prepare for sweep phase. */ 602 /* Prepare for sweep phase. */
603 g->gc.currentwhite = (uint8_t)otherwhite(g); /* Flip current white. */ 603 g->gc.currentwhite = (uint8_t)otherwhite(g); /* Flip current white. */
604 g->strempty.marked = g->gc.currentwhite; 604 g->strempty.marked = g->gc.currentwhite;
605 setmref(g->gc.sweep, &g->gc.root); 605 setmref(g->gc.sweep, &g->gc.root);
606 g->gc.estimate = g->gc.total - (MSize)udsize; /* Initial estimate. */ 606 g->gc.estimate = g->gc.total - (GCSize)udsize; /* Initial estimate. */
607} 607}
608 608
609/* GC state machine. Returns a cost estimate for each step performed. */ 609/* GC state machine. Returns a cost estimate for each step performed. */
@@ -620,14 +620,14 @@ static size_t gc_onestep(lua_State *L)
620 g->gc.state = GCSatomic; /* End of mark phase. */ 620 g->gc.state = GCSatomic; /* End of mark phase. */
621 return 0; 621 return 0;
622 case GCSatomic: 622 case GCSatomic:
623 if (gcref(g->jit_L)) /* Don't run atomic phase on trace. */ 623 if (tvref(g->jit_base)) /* Don't run atomic phase on trace. */
624 return LJ_MAX_MEM; 624 return LJ_MAX_MEM;
625 atomic(g, L); 625 atomic(g, L);
626 g->gc.state = GCSsweepstring; /* Start of sweep phase. */ 626 g->gc.state = GCSsweepstring; /* Start of sweep phase. */
627 g->gc.sweepstr = 0; 627 g->gc.sweepstr = 0;
628 return 0; 628 return 0;
629 case GCSsweepstring: { 629 case GCSsweepstring: {
630 MSize old = g->gc.total; 630 GCSize old = g->gc.total;
631 gc_fullsweep(g, &g->strhash[g->gc.sweepstr++]); /* Sweep one chain. */ 631 gc_fullsweep(g, &g->strhash[g->gc.sweepstr++]); /* Sweep one chain. */
632 if (g->gc.sweepstr > g->strmask) 632 if (g->gc.sweepstr > g->strmask)
633 g->gc.state = GCSsweep; /* All string hash chains sweeped. */ 633 g->gc.state = GCSsweep; /* All string hash chains sweeped. */
@@ -636,12 +636,13 @@ static size_t gc_onestep(lua_State *L)
636 return GCSWEEPCOST; 636 return GCSWEEPCOST;
637 } 637 }
638 case GCSsweep: { 638 case GCSsweep: {
639 MSize old = g->gc.total; 639 GCSize old = g->gc.total;
640 setmref(g->gc.sweep, gc_sweep(g, mref(g->gc.sweep, GCRef), GCSWEEPMAX)); 640 setmref(g->gc.sweep, gc_sweep(g, mref(g->gc.sweep, GCRef), GCSWEEPMAX));
641 lua_assert(old >= g->gc.total); 641 lua_assert(old >= g->gc.total);
642 g->gc.estimate -= old - g->gc.total; 642 g->gc.estimate -= old - g->gc.total;
643 if (gcref(*mref(g->gc.sweep, GCRef)) == NULL) { 643 if (gcref(*mref(g->gc.sweep, GCRef)) == NULL) {
644 gc_shrink(g, L); 644 if (g->strnum <= (g->strmask >> 2) && g->strmask > LJ_MIN_STRTAB*2-1)
645 lj_str_resize(L, g->strmask >> 1); /* Shrink string table. */
645 if (gcref(g->gc.mmudata)) { /* Need any finalizations? */ 646 if (gcref(g->gc.mmudata)) { /* Need any finalizations? */
646 g->gc.state = GCSfinalize; 647 g->gc.state = GCSfinalize;
647#if LJ_HASFFI 648#if LJ_HASFFI
@@ -656,7 +657,7 @@ static size_t gc_onestep(lua_State *L)
656 } 657 }
657 case GCSfinalize: 658 case GCSfinalize:
658 if (gcref(g->gc.mmudata) != NULL) { 659 if (gcref(g->gc.mmudata) != NULL) {
659 if (gcref(g->jit_L)) /* Don't call finalizers on trace. */ 660 if (tvref(g->jit_base)) /* Don't call finalizers on trace. */
660 return LJ_MAX_MEM; 661 return LJ_MAX_MEM;
661 gc_finalize(L); /* Finalize one userdata object. */ 662 gc_finalize(L); /* Finalize one userdata object. */
662 if (g->gc.estimate > GCFINALIZECOST) 663 if (g->gc.estimate > GCFINALIZECOST)
@@ -679,7 +680,7 @@ static size_t gc_onestep(lua_State *L)
679int LJ_FASTCALL lj_gc_step(lua_State *L) 680int LJ_FASTCALL lj_gc_step(lua_State *L)
680{ 681{
681 global_State *g = G(L); 682 global_State *g = G(L);
682 MSize lim; 683 GCSize lim;
683 int32_t ostate = g->vmstate; 684 int32_t ostate = g->vmstate;
684 setvmstate(g, GC); 685 setvmstate(g, GC);
685 lim = (GCSTEPSIZE/100) * g->gc.stepmul; 686 lim = (GCSTEPSIZE/100) * g->gc.stepmul;
@@ -688,13 +689,13 @@ int LJ_FASTCALL lj_gc_step(lua_State *L)
688 if (g->gc.total > g->gc.threshold) 689 if (g->gc.total > g->gc.threshold)
689 g->gc.debt += g->gc.total - g->gc.threshold; 690 g->gc.debt += g->gc.total - g->gc.threshold;
690 do { 691 do {
691 lim -= (MSize)gc_onestep(L); 692 lim -= (GCSize)gc_onestep(L);
692 if (g->gc.state == GCSpause) { 693 if (g->gc.state == GCSpause) {
693 g->gc.threshold = (g->gc.estimate/100) * g->gc.pause; 694 g->gc.threshold = (g->gc.estimate/100) * g->gc.pause;
694 g->vmstate = ostate; 695 g->vmstate = ostate;
695 return 1; /* Finished a GC cycle. */ 696 return 1; /* Finished a GC cycle. */
696 } 697 }
697 } while ((int32_t)lim > 0); 698 } while (sizeof(lim) == 8 ? ((int64_t)lim > 0) : ((int32_t)lim > 0));
698 if (g->gc.debt < GCSTEPSIZE) { 699 if (g->gc.debt < GCSTEPSIZE) {
699 g->gc.threshold = g->gc.total + GCSTEPSIZE; 700 g->gc.threshold = g->gc.total + GCSTEPSIZE;
700 g->vmstate = ostate; 701 g->vmstate = ostate;
@@ -718,8 +719,8 @@ void LJ_FASTCALL lj_gc_step_fixtop(lua_State *L)
718/* Perform multiple GC steps. Called from JIT-compiled code. */ 719/* Perform multiple GC steps. Called from JIT-compiled code. */
719int LJ_FASTCALL lj_gc_step_jit(global_State *g, MSize steps) 720int LJ_FASTCALL lj_gc_step_jit(global_State *g, MSize steps)
720{ 721{
721 lua_State *L = gco2th(gcref(g->jit_L)); 722 lua_State *L = gco2th(gcref(g->cur_L));
722 L->base = mref(G(L)->jit_base, TValue); 723 L->base = tvref(G(L)->jit_base);
723 L->top = curr_topL(L); 724 L->top = curr_topL(L);
724 while (steps-- > 0 && lj_gc_step(L) == 0) 725 while (steps-- > 0 && lj_gc_step(L) == 0)
725 ; 726 ;
@@ -813,7 +814,7 @@ void lj_gc_barriertrace(global_State *g, uint32_t traceno)
813/* -- Allocator ----------------------------------------------------------- */ 814/* -- Allocator ----------------------------------------------------------- */
814 815
815/* Call pluggable memory allocator to allocate or resize a fragment. */ 816/* Call pluggable memory allocator to allocate or resize a fragment. */
816void *lj_mem_realloc(lua_State *L, void *p, MSize osz, MSize nsz) 817void *lj_mem_realloc(lua_State *L, void *p, GCSize osz, GCSize nsz)
817{ 818{
818 global_State *g = G(L); 819 global_State *g = G(L);
819 lua_assert((osz == 0) == (p == NULL)); 820 lua_assert((osz == 0) == (p == NULL));
@@ -821,19 +822,19 @@ void *lj_mem_realloc(lua_State *L, void *p, MSize osz, MSize nsz)
821 if (p == NULL && nsz > 0) 822 if (p == NULL && nsz > 0)
822 lj_err_mem(L); 823 lj_err_mem(L);
823 lua_assert((nsz == 0) == (p == NULL)); 824 lua_assert((nsz == 0) == (p == NULL));
824 lua_assert(checkptr32(p)); 825 lua_assert(checkptrGC(p));
825 g->gc.total = (g->gc.total - osz) + nsz; 826 g->gc.total = (g->gc.total - osz) + nsz;
826 return p; 827 return p;
827} 828}
828 829
829/* Allocate new GC object and link it to the root set. */ 830/* Allocate new GC object and link it to the root set. */
830void * LJ_FASTCALL lj_mem_newgco(lua_State *L, MSize size) 831void * LJ_FASTCALL lj_mem_newgco(lua_State *L, GCSize size)
831{ 832{
832 global_State *g = G(L); 833 global_State *g = G(L);
833 GCobj *o = (GCobj *)g->allocf(g->allocd, NULL, 0, size); 834 GCobj *o = (GCobj *)g->allocf(g->allocd, NULL, 0, size);
834 if (o == NULL) 835 if (o == NULL)
835 lj_err_mem(L); 836 lj_err_mem(L);
836 lua_assert(checkptr32(o)); 837 lua_assert(checkptrGC(o));
837 g->gc.total += size; 838 g->gc.total += size;
838 setgcrefr(o->gch.nextgc, g->gc.root); 839 setgcrefr(o->gch.nextgc, g->gc.root);
839 setgcref(g->gc.root, o); 840 setgcref(g->gc.root, o);
diff --git a/src/lj_gc.h b/src/lj_gc.h
index e42dbcf0..1725c639 100644
--- a/src/lj_gc.h
+++ b/src/lj_gc.h
@@ -107,8 +107,8 @@ static LJ_AINLINE void lj_gc_barrierback(global_State *g, GCtab *t)
107 lj_gc_barrierf(G(L), obj2gco(p), obj2gco(o)); } 107 lj_gc_barrierf(G(L), obj2gco(p), obj2gco(o)); }
108 108
109/* Allocator. */ 109/* Allocator. */
110LJ_FUNC void *lj_mem_realloc(lua_State *L, void *p, MSize osz, MSize nsz); 110LJ_FUNC void *lj_mem_realloc(lua_State *L, void *p, GCSize osz, GCSize nsz);
111LJ_FUNC void * LJ_FASTCALL lj_mem_newgco(lua_State *L, MSize size); 111LJ_FUNC void * LJ_FASTCALL lj_mem_newgco(lua_State *L, GCSize size);
112LJ_FUNC void *lj_mem_grow(lua_State *L, void *p, 112LJ_FUNC void *lj_mem_grow(lua_State *L, void *p,
113 MSize *szp, MSize lim, MSize esz); 113 MSize *szp, MSize lim, MSize esz);
114 114
@@ -116,13 +116,13 @@ LJ_FUNC void *lj_mem_grow(lua_State *L, void *p,
116 116
117static LJ_AINLINE void lj_mem_free(global_State *g, void *p, size_t osize) 117static LJ_AINLINE void lj_mem_free(global_State *g, void *p, size_t osize)
118{ 118{
119 g->gc.total -= (MSize)osize; 119 g->gc.total -= (GCSize)osize;
120 g->allocf(g->allocd, p, osize, 0); 120 g->allocf(g->allocd, p, osize, 0);
121} 121}
122 122
123#define lj_mem_newvec(L, n, t) ((t *)lj_mem_new(L, (MSize)((n)*sizeof(t)))) 123#define lj_mem_newvec(L, n, t) ((t *)lj_mem_new(L, (GCSize)((n)*sizeof(t))))
124#define lj_mem_reallocvec(L, p, on, n, t) \ 124#define lj_mem_reallocvec(L, p, on, n, t) \
125 ((p) = (t *)lj_mem_realloc(L, p, (on)*sizeof(t), (MSize)((n)*sizeof(t)))) 125 ((p) = (t *)lj_mem_realloc(L, p, (on)*sizeof(t), (GCSize)((n)*sizeof(t))))
126#define lj_mem_growvec(L, p, n, m, t) \ 126#define lj_mem_growvec(L, p, n, m, t) \
127 ((p) = (t *)lj_mem_grow(L, (p), &(n), (m), (MSize)sizeof(t))) 127 ((p) = (t *)lj_mem_grow(L, (p), &(n), (m), (MSize)sizeof(t)))
128#define lj_mem_freevec(g, p, n, t) lj_mem_free(g, (p), (n)*sizeof(t)) 128#define lj_mem_freevec(g, p, n, t) lj_mem_free(g, (p), (n)*sizeof(t))
diff --git a/src/lj_gdbjit.c b/src/lj_gdbjit.c
index c2a9e901..a20d9ae2 100644
--- a/src/lj_gdbjit.c
+++ b/src/lj_gdbjit.c
@@ -14,6 +14,8 @@
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_debug.h" 15#include "lj_debug.h"
16#include "lj_frame.h" 16#include "lj_frame.h"
17#include "lj_buf.h"
18#include "lj_strfmt.h"
17#include "lj_jit.h" 19#include "lj_jit.h"
18#include "lj_dispatch.h" 20#include "lj_dispatch.h"
19 21
@@ -294,6 +296,9 @@ enum {
294#elif LJ_TARGET_ARM 296#elif LJ_TARGET_ARM
295 DW_REG_SP = 13, 297 DW_REG_SP = 13,
296 DW_REG_RA = 14, 298 DW_REG_RA = 14,
299#elif LJ_TARGET_ARM64
300 DW_REG_SP = 31,
301 DW_REG_RA = 30,
297#elif LJ_TARGET_PPC 302#elif LJ_TARGET_PPC
298 DW_REG_SP = 1, 303 DW_REG_SP = 1,
299 DW_REG_RA = 65, 304 DW_REG_RA = 65,
@@ -372,6 +377,8 @@ static const ELFheader elfhdr_template = {
372 .machine = 62, 377 .machine = 62,
373#elif LJ_TARGET_ARM 378#elif LJ_TARGET_ARM
374 .machine = 40, 379 .machine = 40,
380#elif LJ_TARGET_ARM64
381 .machine = 183,
375#elif LJ_TARGET_PPC 382#elif LJ_TARGET_PPC
376 .machine = 20, 383 .machine = 20,
377#elif LJ_TARGET_MIPS 384#elif LJ_TARGET_MIPS
@@ -428,16 +435,6 @@ static void gdbjit_catnum(GDBJITctx *ctx, uint32_t n)
428 *ctx->p++ = '0' + n; 435 *ctx->p++ = '0' + n;
429} 436}
430 437
431/* Add a ULEB128 value. */
432static void gdbjit_uleb128(GDBJITctx *ctx, uint32_t v)
433{
434 uint8_t *p = ctx->p;
435 for (; v >= 0x80; v >>= 7)
436 *p++ = (uint8_t)((v & 0x7f) | 0x80);
437 *p++ = (uint8_t)v;
438 ctx->p = p;
439}
440
441/* Add a SLEB128 value. */ 438/* Add a SLEB128 value. */
442static void gdbjit_sleb128(GDBJITctx *ctx, int32_t v) 439static void gdbjit_sleb128(GDBJITctx *ctx, int32_t v)
443{ 440{
@@ -454,7 +451,7 @@ static void gdbjit_sleb128(GDBJITctx *ctx, int32_t v)
454#define DU16(x) (*(uint16_t *)p = (x), p += 2) 451#define DU16(x) (*(uint16_t *)p = (x), p += 2)
455#define DU32(x) (*(uint32_t *)p = (x), p += 4) 452#define DU32(x) (*(uint32_t *)p = (x), p += 4)
456#define DADDR(x) (*(uintptr_t *)p = (x), p += sizeof(uintptr_t)) 453#define DADDR(x) (*(uintptr_t *)p = (x), p += sizeof(uintptr_t))
457#define DUV(x) (ctx->p = p, gdbjit_uleb128(ctx, (x)), p = ctx->p) 454#define DUV(x) (p = (uint8_t *)lj_strfmt_wuleb128((char *)p, (x)))
458#define DSV(x) (ctx->p = p, gdbjit_sleb128(ctx, (x)), p = ctx->p) 455#define DSV(x) (ctx->p = p, gdbjit_sleb128(ctx, (x)), p = ctx->p)
459#define DSTR(str) (ctx->p = p, gdbjit_strz(ctx, (str)), p = ctx->p) 456#define DSTR(str) (ctx->p = p, gdbjit_strz(ctx, (str)), p = ctx->p)
460#define DALIGNNOP(s) while ((uintptr_t)p & ((s)-1)) *p++ = DW_CFA_nop 457#define DALIGNNOP(s) while ((uintptr_t)p & ((s)-1)) *p++ = DW_CFA_nop
@@ -564,13 +561,20 @@ static void LJ_FASTCALL gdbjit_ehframe(GDBJITctx *ctx)
564 DB(DW_CFA_offset|DW_REG_15); DUV(4); 561 DB(DW_CFA_offset|DW_REG_15); DUV(4);
565 DB(DW_CFA_offset|DW_REG_14); DUV(5); 562 DB(DW_CFA_offset|DW_REG_14); DUV(5);
566 /* Extra registers saved for JIT-compiled code. */ 563 /* Extra registers saved for JIT-compiled code. */
567 DB(DW_CFA_offset|DW_REG_13); DUV(9); 564 DB(DW_CFA_offset|DW_REG_13); DUV(LJ_GC64 ? 10 : 9);
568 DB(DW_CFA_offset|DW_REG_12); DUV(10); 565 DB(DW_CFA_offset|DW_REG_12); DUV(LJ_GC64 ? 11 : 10);
569#elif LJ_TARGET_ARM 566#elif LJ_TARGET_ARM
570 { 567 {
571 int i; 568 int i;
572 for (i = 11; i >= 4; i--) { DB(DW_CFA_offset|i); DUV(2+(11-i)); } 569 for (i = 11; i >= 4; i--) { DB(DW_CFA_offset|i); DUV(2+(11-i)); }
573 } 570 }
571#elif LJ_TARGET_ARM64
572 {
573 int i;
574 DB(DW_CFA_offset|31); DUV(2);
575 for (i = 28; i >= 19; i--) { DB(DW_CFA_offset|i); DUV(3+(28-i)); }
576 for (i = 15; i >= 8; i--) { DB(DW_CFA_offset|32|i); DUV(28-i); }
577 }
574#elif LJ_TARGET_PPC 578#elif LJ_TARGET_PPC
575 { 579 {
576 int i; 580 int i;
@@ -727,6 +731,20 @@ static void gdbjit_buildobj(GDBJITctx *ctx)
727 731
728/* -- Interface to GDB JIT API -------------------------------------------- */ 732/* -- Interface to GDB JIT API -------------------------------------------- */
729 733
734static int gdbjit_lock;
735
736static void gdbjit_lock_acquire()
737{
738 while (__sync_lock_test_and_set(&gdbjit_lock, 1)) {
739 /* Just spin; futexes or pthreads aren't worth the portability cost. */
740 }
741}
742
743static void gdbjit_lock_release()
744{
745 __sync_lock_release(&gdbjit_lock);
746}
747
730/* Add new entry to GDB JIT symbol chain. */ 748/* Add new entry to GDB JIT symbol chain. */
731static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx) 749static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx)
732{ 750{
@@ -738,6 +756,7 @@ static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx)
738 ctx->T->gdbjit_entry = (void *)eo; 756 ctx->T->gdbjit_entry = (void *)eo;
739 /* Link new entry to chain and register it. */ 757 /* Link new entry to chain and register it. */
740 eo->entry.prev_entry = NULL; 758 eo->entry.prev_entry = NULL;
759 gdbjit_lock_acquire();
741 eo->entry.next_entry = __jit_debug_descriptor.first_entry; 760 eo->entry.next_entry = __jit_debug_descriptor.first_entry;
742 if (eo->entry.next_entry) 761 if (eo->entry.next_entry)
743 eo->entry.next_entry->prev_entry = &eo->entry; 762 eo->entry.next_entry->prev_entry = &eo->entry;
@@ -747,6 +766,7 @@ static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx)
747 __jit_debug_descriptor.relevant_entry = &eo->entry; 766 __jit_debug_descriptor.relevant_entry = &eo->entry;
748 __jit_debug_descriptor.action_flag = GDBJIT_REGISTER; 767 __jit_debug_descriptor.action_flag = GDBJIT_REGISTER;
749 __jit_debug_register_code(); 768 __jit_debug_register_code();
769 gdbjit_lock_release();
750} 770}
751 771
752/* Add debug info for newly compiled trace and notify GDB. */ 772/* Add debug info for newly compiled trace and notify GDB. */
@@ -778,6 +798,7 @@ void lj_gdbjit_deltrace(jit_State *J, GCtrace *T)
778{ 798{
779 GDBJITentryobj *eo = (GDBJITentryobj *)T->gdbjit_entry; 799 GDBJITentryobj *eo = (GDBJITentryobj *)T->gdbjit_entry;
780 if (eo) { 800 if (eo) {
801 gdbjit_lock_acquire();
781 if (eo->entry.prev_entry) 802 if (eo->entry.prev_entry)
782 eo->entry.prev_entry->next_entry = eo->entry.next_entry; 803 eo->entry.prev_entry->next_entry = eo->entry.next_entry;
783 else 804 else
@@ -787,6 +808,7 @@ void lj_gdbjit_deltrace(jit_State *J, GCtrace *T)
787 __jit_debug_descriptor.relevant_entry = &eo->entry; 808 __jit_debug_descriptor.relevant_entry = &eo->entry;
788 __jit_debug_descriptor.action_flag = GDBJIT_UNREGISTER; 809 __jit_debug_descriptor.action_flag = GDBJIT_UNREGISTER;
789 __jit_debug_register_code(); 810 __jit_debug_register_code();
811 gdbjit_lock_release();
790 lj_mem_free(J2G(J), eo, eo->sz); 812 lj_mem_free(J2G(J), eo, eo->sz);
791 } 813 }
792} 814}
diff --git a/src/lj_ir.c b/src/lj_ir.c
index 38f289cb..1dd25f23 100644
--- a/src/lj_ir.c
+++ b/src/lj_ir.c
@@ -15,6 +15,7 @@
15#if LJ_HASJIT 15#if LJ_HASJIT
16 16
17#include "lj_gc.h" 17#include "lj_gc.h"
18#include "lj_buf.h"
18#include "lj_str.h" 19#include "lj_str.h"
19#include "lj_tab.h" 20#include "lj_tab.h"
20#include "lj_ir.h" 21#include "lj_ir.h"
@@ -29,6 +30,7 @@
29#endif 30#endif
30#include "lj_vm.h" 31#include "lj_vm.h"
31#include "lj_strscan.h" 32#include "lj_strscan.h"
33#include "lj_strfmt.h"
32#include "lj_lib.h" 34#include "lj_lib.h"
33 35
34/* Some local macros to save typing. Undef'd at the end. */ 36/* Some local macros to save typing. Undef'd at the end. */
@@ -89,7 +91,7 @@ static void lj_ir_growbot(jit_State *J)
89 IRIns *baseir = J->irbuf + J->irbotlim; 91 IRIns *baseir = J->irbuf + J->irbotlim;
90 MSize szins = J->irtoplim - J->irbotlim; 92 MSize szins = J->irtoplim - J->irbotlim;
91 lua_assert(szins != 0); 93 lua_assert(szins != 0);
92 lua_assert(J->cur.nk == J->irbotlim); 94 lua_assert(J->cur.nk == J->irbotlim || J->cur.nk-1 == J->irbotlim);
93 if (J->cur.nins + (szins >> 1) < J->irtoplim) { 95 if (J->cur.nins + (szins >> 1) < J->irtoplim) {
94 /* More than half of the buffer is free on top: shift up by a quarter. */ 96 /* More than half of the buffer is free on top: shift up by a quarter. */
95 MSize ofs = szins >> 2; 97 MSize ofs = szins >> 2;
@@ -143,6 +145,16 @@ TRef lj_ir_call(jit_State *J, IRCallID id, ...)
143 return emitir(CCI_OPTYPE(ci), tr, id); 145 return emitir(CCI_OPTYPE(ci), tr, id);
144} 146}
145 147
148/* Load field of type t from GG_State + offset. Must be 32 bit aligned. */
149LJ_FUNC TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs)
150{
151 lua_assert((ofs & 3) == 0);
152 ofs >>= 2;
153 lua_assert(ofs >= IRFL__MAX && ofs <= 0x3ff); /* 10 bit FOLD key limit. */
154 lj_ir_set(J, IRT(IR_FLOAD, t), REF_NIL, ofs);
155 return lj_opt_fold(J);
156}
157
146/* -- Interning of constants ---------------------------------------------- */ 158/* -- Interning of constants ---------------------------------------------- */
147 159
148/* 160/*
@@ -163,6 +175,24 @@ static LJ_AINLINE IRRef ir_nextk(jit_State *J)
163 return ref; 175 return ref;
164} 176}
165 177
178/* Get ref of next 64 bit IR constant and optionally grow IR.
179** Note: this may invalidate all IRIns *!
180*/
181static LJ_AINLINE IRRef ir_nextk64(jit_State *J)
182{
183 IRRef ref = J->cur.nk - 2;
184 lua_assert(J->state != LJ_TRACE_ASM);
185 if (LJ_UNLIKELY(ref < J->irbotlim)) lj_ir_growbot(J);
186 J->cur.nk = ref;
187 return ref;
188}
189
190#if LJ_GC64
191#define ir_nextkgc ir_nextk64
192#else
193#define ir_nextkgc ir_nextk
194#endif
195
166/* Intern int32_t constant. */ 196/* Intern int32_t constant. */
167TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k) 197TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k)
168{ 198{
@@ -182,79 +212,21 @@ found:
182 return TREF(ref, IRT_INT); 212 return TREF(ref, IRT_INT);
183} 213}
184 214
185/* The MRef inside the KNUM/KINT64 IR instructions holds the address of the 215/* Intern 64 bit constant, given by its 64 bit pattern. */
186** 64 bit constant. The constants themselves are stored in a chained array 216TRef lj_ir_k64(jit_State *J, IROp op, uint64_t u64)
187** and shared across traces.
188**
189** Rationale for choosing this data structure:
190** - The address of the constants is embedded in the generated machine code
191** and must never move. A resizable array or hash table wouldn't work.
192** - Most apps need very few non-32 bit integer constants (less than a dozen).
193** - Linear search is hard to beat in terms of speed and low complexity.
194*/
195typedef struct K64Array {
196 MRef next; /* Pointer to next list. */
197 MSize numk; /* Number of used elements in this array. */
198 TValue k[LJ_MIN_K64SZ]; /* Array of constants. */
199} K64Array;
200
201/* Free all chained arrays. */
202void lj_ir_k64_freeall(jit_State *J)
203{
204 K64Array *k;
205 for (k = mref(J->k64, K64Array); k; ) {
206 K64Array *next = mref(k->next, K64Array);
207 lj_mem_free(J2G(J), k, sizeof(K64Array));
208 k = next;
209 }
210}
211
212/* Find 64 bit constant in chained array or add it. */
213cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64)
214{
215 K64Array *k, *kp = NULL;
216 TValue *ntv;
217 MSize idx;
218 /* Search for the constant in the whole chain of arrays. */
219 for (k = mref(J->k64, K64Array); k; k = mref(k->next, K64Array)) {
220 kp = k; /* Remember previous element in list. */
221 for (idx = 0; idx < k->numk; idx++) { /* Search one array. */
222 TValue *tv = &k->k[idx];
223 if (tv->u64 == u64) /* Needed for +-0/NaN/absmask. */
224 return tv;
225 }
226 }
227 /* Constant was not found, need to add it. */
228 if (!(kp && kp->numk < LJ_MIN_K64SZ)) { /* Allocate a new array. */
229 K64Array *kn = lj_mem_newt(J->L, sizeof(K64Array), K64Array);
230 setmref(kn->next, NULL);
231 kn->numk = 0;
232 if (kp)
233 setmref(kp->next, kn); /* Chain to the end of the list. */
234 else
235 setmref(J->k64, kn); /* Link first array. */
236 kp = kn;
237 }
238 ntv = &kp->k[kp->numk++]; /* Add to current array. */
239 ntv->u64 = u64;
240 return ntv;
241}
242
243/* Intern 64 bit constant, given by its address. */
244TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv)
245{ 217{
246 IRIns *ir, *cir = J->cur.ir; 218 IRIns *ir, *cir = J->cur.ir;
247 IRRef ref; 219 IRRef ref;
248 IRType t = op == IR_KNUM ? IRT_NUM : IRT_I64; 220 IRType t = op == IR_KNUM ? IRT_NUM : IRT_I64;
249 for (ref = J->chain[op]; ref; ref = cir[ref].prev) 221 for (ref = J->chain[op]; ref; ref = cir[ref].prev)
250 if (ir_k64(&cir[ref]) == tv) 222 if (ir_k64(&cir[ref])->u64 == u64)
251 goto found; 223 goto found;
252 ref = ir_nextk(J); 224 ref = ir_nextk64(J);
253 ir = IR(ref); 225 ir = IR(ref);
254 lua_assert(checkptr32(tv)); 226 ir[1].tv.u64 = u64;
255 setmref(ir->ptr, tv);
256 ir->t.irt = t; 227 ir->t.irt = t;
257 ir->o = op; 228 ir->o = op;
229 ir->op12 = 0;
258 ir->prev = J->chain[op]; 230 ir->prev = J->chain[op];
259 J->chain[op] = (IRRef1)ref; 231 J->chain[op] = (IRRef1)ref;
260found: 232found:
@@ -264,13 +236,13 @@ found:
264/* Intern FP constant, given by its 64 bit pattern. */ 236/* Intern FP constant, given by its 64 bit pattern. */
265TRef lj_ir_knum_u64(jit_State *J, uint64_t u64) 237TRef lj_ir_knum_u64(jit_State *J, uint64_t u64)
266{ 238{
267 return lj_ir_k64(J, IR_KNUM, lj_ir_k64_find(J, u64)); 239 return lj_ir_k64(J, IR_KNUM, u64);
268} 240}
269 241
270/* Intern 64 bit integer constant. */ 242/* Intern 64 bit integer constant. */
271TRef lj_ir_kint64(jit_State *J, uint64_t u64) 243TRef lj_ir_kint64(jit_State *J, uint64_t u64)
272{ 244{
273 return lj_ir_k64(J, IR_KINT64, lj_ir_k64_find(J, u64)); 245 return lj_ir_k64(J, IR_KINT64, u64);
274} 246}
275 247
276/* Check whether a number is int and return it. -0 is NOT considered an int. */ 248/* Check whether a number is int and return it. -0 is NOT considered an int. */
@@ -309,10 +281,11 @@ TRef lj_ir_kgc(jit_State *J, GCobj *o, IRType t)
309 for (ref = J->chain[IR_KGC]; ref; ref = cir[ref].prev) 281 for (ref = J->chain[IR_KGC]; ref; ref = cir[ref].prev)
310 if (ir_kgc(&cir[ref]) == o) 282 if (ir_kgc(&cir[ref]) == o)
311 goto found; 283 goto found;
312 ref = ir_nextk(J); 284 ref = ir_nextkgc(J);
313 ir = IR(ref); 285 ir = IR(ref);
314 /* NOBARRIER: Current trace is a GC root. */ 286 /* NOBARRIER: Current trace is a GC root. */
315 setgcref(ir->gcr, o); 287 ir->op12 = 0;
288 setgcref(ir[LJ_GC64].gcr, o);
316 ir->t.irt = (uint8_t)t; 289 ir->t.irt = (uint8_t)t;
317 ir->o = IR_KGC; 290 ir->o = IR_KGC;
318 ir->prev = J->chain[IR_KGC]; 291 ir->prev = J->chain[IR_KGC];
@@ -321,24 +294,44 @@ found:
321 return TREF(ref, t); 294 return TREF(ref, t);
322} 295}
323 296
324/* Intern 32 bit pointer constant. */ 297/* Allocate GCtrace constant placeholder (no interning). */
298TRef lj_ir_ktrace(jit_State *J)
299{
300 IRRef ref = ir_nextkgc(J);
301 IRIns *ir = IR(ref);
302 lua_assert(irt_toitype_(IRT_P64) == LJ_TTRACE);
303 ir->t.irt = IRT_P64;
304 ir->o = LJ_GC64 ? IR_KNUM : IR_KNULL; /* Not IR_KGC yet, but same size. */
305 ir->op12 = 0;
306 ir->prev = 0;
307 return TREF(ref, IRT_P64);
308}
309
310/* Intern pointer constant. */
325TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr) 311TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr)
326{ 312{
327 IRIns *ir, *cir = J->cur.ir; 313 IRIns *ir, *cir = J->cur.ir;
328 IRRef ref; 314 IRRef ref;
329 lua_assert((void *)(intptr_t)i32ptr(ptr) == ptr); 315#if LJ_64 && !LJ_GC64
316 lua_assert((void *)(uintptr_t)u32ptr(ptr) == ptr);
317#endif
330 for (ref = J->chain[op]; ref; ref = cir[ref].prev) 318 for (ref = J->chain[op]; ref; ref = cir[ref].prev)
331 if (mref(cir[ref].ptr, void) == ptr) 319 if (ir_kptr(&cir[ref]) == ptr)
332 goto found; 320 goto found;
321#if LJ_GC64
322 ref = ir_nextk64(J);
323#else
333 ref = ir_nextk(J); 324 ref = ir_nextk(J);
325#endif
334 ir = IR(ref); 326 ir = IR(ref);
335 setmref(ir->ptr, ptr); 327 ir->op12 = 0;
336 ir->t.irt = IRT_P32; 328 setmref(ir[LJ_GC64].ptr, ptr);
329 ir->t.irt = IRT_PGC;
337 ir->o = op; 330 ir->o = op;
338 ir->prev = J->chain[op]; 331 ir->prev = J->chain[op];
339 J->chain[op] = (IRRef1)ref; 332 J->chain[op] = (IRRef1)ref;
340found: 333found:
341 return TREF(ref, IRT_P32); 334 return TREF(ref, IRT_PGC);
342} 335}
343 336
344/* Intern typed NULL constant. */ 337/* Intern typed NULL constant. */
@@ -390,12 +383,11 @@ void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir)
390 UNUSED(L); 383 UNUSED(L);
391 lua_assert(ir->o != IR_KSLOT); /* Common mistake. */ 384 lua_assert(ir->o != IR_KSLOT); /* Common mistake. */
392 switch (ir->o) { 385 switch (ir->o) {
393 case IR_KPRI: setitype(tv, irt_toitype(ir->t)); break; 386 case IR_KPRI: setpriV(tv, irt_toitype(ir->t)); break;
394 case IR_KINT: setintV(tv, ir->i); break; 387 case IR_KINT: setintV(tv, ir->i); break;
395 case IR_KGC: setgcV(L, tv, ir_kgc(ir), irt_toitype(ir->t)); break; 388 case IR_KGC: setgcV(L, tv, ir_kgc(ir), irt_toitype(ir->t)); break;
396 case IR_KPTR: case IR_KKPTR: case IR_KNULL: 389 case IR_KPTR: case IR_KKPTR: setlightudV(tv, ir_kptr(ir)); break;
397 setlightudV(tv, mref(ir->ptr, void)); 390 case IR_KNULL: setlightudV(tv, NULL); break;
398 break;
399 case IR_KNUM: setnumV(tv, ir_knum(ir)->n); break; 391 case IR_KNUM: setnumV(tv, ir_knum(ir)->n); break;
400#if LJ_HASFFI 392#if LJ_HASFFI
401 case IR_KINT64: { 393 case IR_KINT64: {
@@ -443,7 +435,8 @@ TRef LJ_FASTCALL lj_ir_tostr(jit_State *J, TRef tr)
443 if (!tref_isstr(tr)) { 435 if (!tref_isstr(tr)) {
444 if (!tref_isnumber(tr)) 436 if (!tref_isnumber(tr))
445 lj_trace_err(J, LJ_TRERR_BADTYPE); 437 lj_trace_err(J, LJ_TRERR_BADTYPE);
446 tr = emitir(IRT(IR_TOSTR, IRT_STR), tr, 0); 438 tr = emitir(IRT(IR_TOSTR, IRT_STR), tr,
439 tref_isnum(tr) ? IRTOSTR_NUM : IRTOSTR_INT);
447 } 440 }
448 return tr; 441 return tr;
449} 442}
diff --git a/src/lj_ir.h b/src/lj_ir.h
index f91d6d0e..6bbe0a33 100644
--- a/src/lj_ir.h
+++ b/src/lj_ir.h
@@ -40,6 +40,7 @@
40 _(USE, S , ref, ___) \ 40 _(USE, S , ref, ___) \
41 _(PHI, S , ref, ref) \ 41 _(PHI, S , ref, ref) \
42 _(RENAME, S , ref, lit) \ 42 _(RENAME, S , ref, lit) \
43 _(PROF, S , ___, ___) \
43 \ 44 \
44 /* Constants. */ \ 45 /* Constants. */ \
45 _(KPRI, N , ___, ___) \ 46 _(KPRI, N , ___, ___) \
@@ -96,6 +97,7 @@
96 _(UREFC, LW, ref, lit) \ 97 _(UREFC, LW, ref, lit) \
97 _(FREF, R , ref, lit) \ 98 _(FREF, R , ref, lit) \
98 _(STRREF, N , ref, ref) \ 99 _(STRREF, N , ref, ref) \
100 _(LREF, L , ___, ___) \
99 \ 101 \
100 /* Loads and Stores. These must be in the same order. */ \ 102 /* Loads and Stores. These must be in the same order. */ \
101 _(ALOAD, L , ref, ___) \ 103 _(ALOAD, L , ref, ___) \
@@ -120,6 +122,11 @@
120 _(CNEW, AW, ref, ref) \ 122 _(CNEW, AW, ref, ref) \
121 _(CNEWI, NW, ref, ref) /* CSE is ok, not marked as A. */ \ 123 _(CNEWI, NW, ref, ref) /* CSE is ok, not marked as A. */ \
122 \ 124 \
125 /* Buffer operations. */ \
126 _(BUFHDR, L , ref, lit) \
127 _(BUFPUT, L , ref, ref) \
128 _(BUFSTR, A , ref, ref) \
129 \
123 /* Barriers. */ \ 130 /* Barriers. */ \
124 _(TBAR, S , ref, ___) \ 131 _(TBAR, S , ref, ___) \
125 _(OBAR, S , ref, ref) \ 132 _(OBAR, S , ref, ref) \
@@ -128,11 +135,12 @@
128 /* Type conversions. */ \ 135 /* Type conversions. */ \
129 _(CONV, NW, ref, lit) \ 136 _(CONV, NW, ref, lit) \
130 _(TOBIT, N , ref, ref) \ 137 _(TOBIT, N , ref, ref) \
131 _(TOSTR, N , ref, ___) \ 138 _(TOSTR, N , ref, lit) \
132 _(STRTO, N , ref, ___) \ 139 _(STRTO, N , ref, ___) \
133 \ 140 \
134 /* Calls. */ \ 141 /* Calls. */ \
135 _(CALLN, N , ref, lit) \ 142 _(CALLN, N , ref, lit) \
143 _(CALLA, A , ref, lit) \
136 _(CALLL, L , ref, lit) \ 144 _(CALLL, L , ref, lit) \
137 _(CALLS, S , ref, lit) \ 145 _(CALLS, S , ref, lit) \
138 _(CALLXS, S , ref, ref) \ 146 _(CALLXS, S , ref, ref) \
@@ -186,6 +194,8 @@ IRFPMDEF(FPMENUM)
186 _(STR_LEN, offsetof(GCstr, len)) \ 194 _(STR_LEN, offsetof(GCstr, len)) \
187 _(FUNC_ENV, offsetof(GCfunc, l.env)) \ 195 _(FUNC_ENV, offsetof(GCfunc, l.env)) \
188 _(FUNC_PC, offsetof(GCfunc, l.pc)) \ 196 _(FUNC_PC, offsetof(GCfunc, l.pc)) \
197 _(FUNC_FFID, offsetof(GCfunc, l.ffid)) \
198 _(THREAD_ENV, offsetof(lua_State, env)) \
189 _(TAB_META, offsetof(GCtab, metatable)) \ 199 _(TAB_META, offsetof(GCtab, metatable)) \
190 _(TAB_ARRAY, offsetof(GCtab, array)) \ 200 _(TAB_ARRAY, offsetof(GCtab, array)) \
191 _(TAB_NODE, offsetof(GCtab, node)) \ 201 _(TAB_NODE, offsetof(GCtab, node)) \
@@ -210,7 +220,7 @@ IRFLDEF(FLENUM)
210 220
211/* SLOAD mode bits, stored in op2. */ 221/* SLOAD mode bits, stored in op2. */
212#define IRSLOAD_PARENT 0x01 /* Coalesce with parent trace. */ 222#define IRSLOAD_PARENT 0x01 /* Coalesce with parent trace. */
213#define IRSLOAD_FRAME 0x02 /* Load hiword of frame. */ 223#define IRSLOAD_FRAME 0x02 /* Load 32 bits of ftsz. */
214#define IRSLOAD_TYPECHECK 0x04 /* Needs type check. */ 224#define IRSLOAD_TYPECHECK 0x04 /* Needs type check. */
215#define IRSLOAD_CONVERT 0x08 /* Number to integer conversion. */ 225#define IRSLOAD_CONVERT 0x08 /* Number to integer conversion. */
216#define IRSLOAD_READONLY 0x10 /* Read-only, omit slot store. */ 226#define IRSLOAD_READONLY 0x10 /* Read-only, omit slot store. */
@@ -221,13 +231,16 @@ IRFLDEF(FLENUM)
221#define IRXLOAD_VOLATILE 2 /* Load from volatile data. */ 231#define IRXLOAD_VOLATILE 2 /* Load from volatile data. */
222#define IRXLOAD_UNALIGNED 4 /* Unaligned load. */ 232#define IRXLOAD_UNALIGNED 4 /* Unaligned load. */
223 233
234/* BUFHDR mode, stored in op2. */
235#define IRBUFHDR_RESET 0 /* Reset buffer. */
236#define IRBUFHDR_APPEND 1 /* Append to buffer. */
237
224/* CONV mode, stored in op2. */ 238/* CONV mode, stored in op2. */
225#define IRCONV_SRCMASK 0x001f /* Source IRType. */ 239#define IRCONV_SRCMASK 0x001f /* Source IRType. */
226#define IRCONV_DSTMASK 0x03e0 /* Dest. IRType (also in ir->t). */ 240#define IRCONV_DSTMASK 0x03e0 /* Dest. IRType (also in ir->t). */
227#define IRCONV_DSH 5 241#define IRCONV_DSH 5
228#define IRCONV_NUM_INT ((IRT_NUM<<IRCONV_DSH)|IRT_INT) 242#define IRCONV_NUM_INT ((IRT_NUM<<IRCONV_DSH)|IRT_INT)
229#define IRCONV_INT_NUM ((IRT_INT<<IRCONV_DSH)|IRT_NUM) 243#define IRCONV_INT_NUM ((IRT_INT<<IRCONV_DSH)|IRT_NUM)
230#define IRCONV_TRUNC 0x0400 /* Truncate number to integer. */
231#define IRCONV_SEXT 0x0800 /* Sign-extend integer to integer. */ 244#define IRCONV_SEXT 0x0800 /* Sign-extend integer to integer. */
232#define IRCONV_MODEMASK 0x0fff 245#define IRCONV_MODEMASK 0x0fff
233#define IRCONV_CONVMASK 0xf000 246#define IRCONV_CONVMASK 0xf000
@@ -238,6 +251,11 @@ IRFLDEF(FLENUM)
238#define IRCONV_INDEX (2<<IRCONV_CSH) /* Check + special backprop rules. */ 251#define IRCONV_INDEX (2<<IRCONV_CSH) /* Check + special backprop rules. */
239#define IRCONV_CHECK (3<<IRCONV_CSH) /* Number checked for integerness. */ 252#define IRCONV_CHECK (3<<IRCONV_CSH) /* Number checked for integerness. */
240 253
254/* TOSTR mode, stored in op2. */
255#define IRTOSTR_INT 0 /* Convert integer to string. */
256#define IRTOSTR_NUM 1 /* Convert number to string. */
257#define IRTOSTR_CHAR 2 /* Convert char value to string. */
258
241/* -- IR operands --------------------------------------------------------- */ 259/* -- IR operands --------------------------------------------------------- */
242 260
243/* IR operand mode (2 bit). */ 261/* IR operand mode (2 bit). */
@@ -276,7 +294,9 @@ LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1];
276 294
277/* -- IR instruction types ------------------------------------------------ */ 295/* -- IR instruction types ------------------------------------------------ */
278 296
279/* Map of itypes to non-negative numbers. ORDER LJ_T. 297#define IRTSIZE_PGC (LJ_GC64 ? 8 : 4)
298
299/* Map of itypes to non-negative numbers and their sizes. ORDER LJ_T.
280** LJ_TUPVAL/LJ_TTRACE never appear in a TValue. Use these itypes for 300** LJ_TUPVAL/LJ_TTRACE never appear in a TValue. Use these itypes for
281** IRT_P32 and IRT_P64, which never escape the IR. 301** IRT_P32 and IRT_P64, which never escape the IR.
282** The various integers are only used in the IR and can only escape to 302** The various integers are only used in the IR and can only escape to
@@ -284,12 +304,13 @@ LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1];
284** contiguous and next to IRT_NUM (see the typerange macros below). 304** contiguous and next to IRT_NUM (see the typerange macros below).
285*/ 305*/
286#define IRTDEF(_) \ 306#define IRTDEF(_) \
287 _(NIL, 4) _(FALSE, 4) _(TRUE, 4) _(LIGHTUD, LJ_64 ? 8 : 4) _(STR, 4) \ 307 _(NIL, 4) _(FALSE, 4) _(TRUE, 4) _(LIGHTUD, LJ_64 ? 8 : 4) \
288 _(P32, 4) _(THREAD, 4) _(PROTO, 4) _(FUNC, 4) _(P64, 8) _(CDATA, 4) \ 308 _(STR, IRTSIZE_PGC) _(P32, 4) _(THREAD, IRTSIZE_PGC) _(PROTO, IRTSIZE_PGC) \
289 _(TAB, 4) _(UDATA, 4) \ 309 _(FUNC, IRTSIZE_PGC) _(P64, 8) _(CDATA, IRTSIZE_PGC) _(TAB, IRTSIZE_PGC) \
310 _(UDATA, IRTSIZE_PGC) \
290 _(FLOAT, 4) _(NUM, 8) _(I8, 1) _(U8, 1) _(I16, 2) _(U16, 2) \ 311 _(FLOAT, 4) _(NUM, 8) _(I8, 1) _(U8, 1) _(I16, 2) _(U16, 2) \
291 _(INT, 4) _(U32, 4) _(I64, 8) _(U64, 8) \ 312 _(INT, 4) _(U32, 4) _(I64, 8) _(U64, 8) \
292 _(SOFTFP, 4) /* There is room for 9 more types. */ 313 _(SOFTFP, 4) /* There is room for 8 more types. */
293 314
294/* IR result type and flags (8 bit). */ 315/* IR result type and flags (8 bit). */
295typedef enum { 316typedef enum {
@@ -300,6 +321,8 @@ IRTDEF(IRTENUM)
300 321
301 /* Native pointer type and the corresponding integer type. */ 322 /* Native pointer type and the corresponding integer type. */
302 IRT_PTR = LJ_64 ? IRT_P64 : IRT_P32, 323 IRT_PTR = LJ_64 ? IRT_P64 : IRT_P32,
324 IRT_PGC = LJ_GC64 ? IRT_P64 : IRT_P32,
325 IRT_IGC = LJ_GC64 ? IRT_I64 : IRT_INT,
303 IRT_INTP = LJ_64 ? IRT_I64 : IRT_INT, 326 IRT_INTP = LJ_64 ? IRT_I64 : IRT_INT,
304 IRT_UINTP = LJ_64 ? IRT_U64 : IRT_U32, 327 IRT_UINTP = LJ_64 ? IRT_U64 : IRT_U32,
305 328
@@ -353,7 +376,14 @@ typedef struct IRType1 { uint8_t irt; } IRType1;
353#define irt_isaddr(t) (irt_typerange((t), IRT_LIGHTUD, IRT_UDATA)) 376#define irt_isaddr(t) (irt_typerange((t), IRT_LIGHTUD, IRT_UDATA))
354#define irt_isint64(t) (irt_typerange((t), IRT_I64, IRT_U64)) 377#define irt_isint64(t) (irt_typerange((t), IRT_I64, IRT_U64))
355 378
356#if LJ_64 379#if LJ_GC64
380/* Include IRT_NIL, so IR(ASMREF_L) (aka REF_NIL) is considered 64 bit. */
381#define IRT_IS64 \
382 ((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64)|(1u<<IRT_P64)|\
383 (1u<<IRT_LIGHTUD)|(1u<<IRT_STR)|(1u<<IRT_THREAD)|(1u<<IRT_PROTO)|\
384 (1u<<IRT_FUNC)|(1u<<IRT_CDATA)|(1u<<IRT_TAB)|(1u<<IRT_UDATA)|\
385 (1u<<IRT_NIL))
386#elif LJ_64
357#define IRT_IS64 \ 387#define IRT_IS64 \
358 ((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64)|(1u<<IRT_P64)|(1u<<IRT_LIGHTUD)) 388 ((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64)|(1u<<IRT_P64)|(1u<<IRT_LIGHTUD))
359#else 389#else
@@ -374,7 +404,7 @@ static LJ_AINLINE IRType itype2irt(const TValue *tv)
374 return IRT_INT; 404 return IRT_INT;
375 else if (tvisnum(tv)) 405 else if (tvisnum(tv))
376 return IRT_NUM; 406 return IRT_NUM;
377#if LJ_64 407#if LJ_64 && !LJ_GC64
378 else if (tvislightud(tv)) 408 else if (tvislightud(tv))
379 return IRT_LIGHTUD; 409 return IRT_LIGHTUD;
380#endif 410#endif
@@ -384,7 +414,7 @@ static LJ_AINLINE IRType itype2irt(const TValue *tv)
384 414
385static LJ_AINLINE uint32_t irt_toitype_(IRType t) 415static LJ_AINLINE uint32_t irt_toitype_(IRType t)
386{ 416{
387 lua_assert(!LJ_64 || t != IRT_LIGHTUD); 417 lua_assert(!LJ_64 || LJ_GC64 || t != IRT_LIGHTUD);
388 if (LJ_DUALNUM && t > IRT_NUM) { 418 if (LJ_DUALNUM && t > IRT_NUM) {
389 return LJ_TISNUM; 419 return LJ_TISNUM;
390 } else { 420 } else {
@@ -464,6 +494,7 @@ typedef uint32_t TRef;
464#define tref_isnil(tr) (tref_istype((tr), IRT_NIL)) 494#define tref_isnil(tr) (tref_istype((tr), IRT_NIL))
465#define tref_isfalse(tr) (tref_istype((tr), IRT_FALSE)) 495#define tref_isfalse(tr) (tref_istype((tr), IRT_FALSE))
466#define tref_istrue(tr) (tref_istype((tr), IRT_TRUE)) 496#define tref_istrue(tr) (tref_istype((tr), IRT_TRUE))
497#define tref_islightud(tr) (tref_istype((tr), IRT_LIGHTUD))
467#define tref_isstr(tr) (tref_istype((tr), IRT_STR)) 498#define tref_isstr(tr) (tref_istype((tr), IRT_STR))
468#define tref_isfunc(tr) (tref_istype((tr), IRT_FUNC)) 499#define tref_isfunc(tr) (tref_istype((tr), IRT_FUNC))
469#define tref_iscdata(tr) (tref_istype((tr), IRT_CDATA)) 500#define tref_iscdata(tr) (tref_istype((tr), IRT_CDATA))
@@ -496,7 +527,9 @@ typedef uint32_t TRef;
496** +-------+-------+---+---+---+---+ 527** +-------+-------+---+---+---+---+
497** | op1 | op2 | t | o | r | s | 528** | op1 | op2 | t | o | r | s |
498** +-------+-------+---+---+---+---+ 529** +-------+-------+---+---+---+---+
499** | op12/i/gco | ot | prev | (alternative fields in union) 530** | op12/i/gco32 | ot | prev | (alternative fields in union)
531** +-------+-------+---+---+---+---+
532** | TValue/gco64 | (2nd IR slot for 64 bit constants)
500** +---------------+-------+-------+ 533** +---------------+-------+-------+
501** 32 16 16 534** 32 16 16
502** 535**
@@ -524,21 +557,27 @@ typedef union IRIns {
524 ) 557 )
525 }; 558 };
526 int32_t i; /* 32 bit signed integer literal (overlaps op12). */ 559 int32_t i; /* 32 bit signed integer literal (overlaps op12). */
527 GCRef gcr; /* GCobj constant (overlaps op12). */ 560 GCRef gcr; /* GCobj constant (overlaps op12 or entire slot). */
528 MRef ptr; /* Pointer constant (overlaps op12). */ 561 MRef ptr; /* Pointer constant (overlaps op12 or entire slot). */
562 TValue tv; /* TValue constant (overlaps entire slot). */
529} IRIns; 563} IRIns;
530 564
531#define ir_kgc(ir) check_exp((ir)->o == IR_KGC, gcref((ir)->gcr)) 565#define ir_isk64(ir) \
566 ((ir)->o == IR_KNUM || (ir)->o == IR_KINT64 || \
567 (LJ_GC64 && \
568 ((ir)->o == IR_KGC || (ir)->o == IR_KPTR || (ir)->o == IR_KKPTR)))
569
570#define ir_kgc(ir) check_exp((ir)->o == IR_KGC, gcref((ir)[LJ_GC64].gcr))
532#define ir_kstr(ir) (gco2str(ir_kgc((ir)))) 571#define ir_kstr(ir) (gco2str(ir_kgc((ir))))
533#define ir_ktab(ir) (gco2tab(ir_kgc((ir)))) 572#define ir_ktab(ir) (gco2tab(ir_kgc((ir))))
534#define ir_kfunc(ir) (gco2func(ir_kgc((ir)))) 573#define ir_kfunc(ir) (gco2func(ir_kgc((ir))))
535#define ir_kcdata(ir) (gco2cd(ir_kgc((ir)))) 574#define ir_kcdata(ir) (gco2cd(ir_kgc((ir))))
536#define ir_knum(ir) check_exp((ir)->o == IR_KNUM, mref((ir)->ptr, cTValue)) 575#define ir_knum(ir) check_exp((ir)->o == IR_KNUM, &(ir)[1].tv)
537#define ir_kint64(ir) check_exp((ir)->o == IR_KINT64, mref((ir)->ptr,cTValue)) 576#define ir_kint64(ir) check_exp((ir)->o == IR_KINT64, &(ir)[1].tv)
538#define ir_k64(ir) \ 577#define ir_k64(ir) check_exp(ir_isk64(ir), &(ir)[1].tv)
539 check_exp((ir)->o == IR_KNUM || (ir)->o == IR_KINT64, mref((ir)->ptr,cTValue))
540#define ir_kptr(ir) \ 578#define ir_kptr(ir) \
541 check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, mref((ir)->ptr, void)) 579 check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, \
580 mref((ir)[LJ_GC64].ptr, void))
542 581
543/* A store or any other op with a non-weak guard has a side-effect. */ 582/* A store or any other op with a non-weak guard has a side-effect. */
544static LJ_AINLINE int ir_sideeff(IRIns *ir) 583static LJ_AINLINE int ir_sideeff(IRIns *ir)
diff --git a/src/lj_ircall.h b/src/lj_ircall.h
index aae9adbb..f4f3f781 100644
--- a/src/lj_ircall.h
+++ b/src/lj_ircall.h
@@ -16,7 +16,7 @@ typedef struct CCallInfo {
16 uint32_t flags; /* Number of arguments and flags. */ 16 uint32_t flags; /* Number of arguments and flags. */
17} CCallInfo; 17} CCallInfo;
18 18
19#define CCI_NARGS(ci) ((ci)->flags & 0xff) /* Extract # of args. */ 19#define CCI_NARGS(ci) ((ci)->flags & 0xff) /* # of args. */
20#define CCI_NARGS_MAX 32 /* Max. # of args. */ 20#define CCI_NARGS_MAX 32 /* Max. # of args. */
21 21
22#define CCI_OTSHIFT 16 22#define CCI_OTSHIFT 16
@@ -25,6 +25,7 @@ typedef struct CCallInfo {
25#define CCI_OP(ci) ((ci)->flags >> CCI_OPSHIFT) /* Get op. */ 25#define CCI_OP(ci) ((ci)->flags >> CCI_OPSHIFT) /* Get op. */
26 26
27#define CCI_CALL_N (IR_CALLN << CCI_OPSHIFT) 27#define CCI_CALL_N (IR_CALLN << CCI_OPSHIFT)
28#define CCI_CALL_A (IR_CALLA << CCI_OPSHIFT)
28#define CCI_CALL_L (IR_CALLL << CCI_OPSHIFT) 29#define CCI_CALL_L (IR_CALLL << CCI_OPSHIFT)
29#define CCI_CALL_S (IR_CALLS << CCI_OPSHIFT) 30#define CCI_CALL_S (IR_CALLS << CCI_OPSHIFT)
30#define CCI_CALL_FN (CCI_CALL_N|CCI_CC_FASTCALL) 31#define CCI_CALL_FN (CCI_CALL_N|CCI_CC_FASTCALL)
@@ -45,6 +46,17 @@ typedef struct CCallInfo {
45#define CCI_CC_FASTCALL 0x2000 /* Fastcall calling convention. */ 46#define CCI_CC_FASTCALL 0x2000 /* Fastcall calling convention. */
46#define CCI_CC_STDCALL 0x3000 /* Stdcall calling convention. */ 47#define CCI_CC_STDCALL 0x3000 /* Stdcall calling convention. */
47 48
49/* Extra args for SOFTFP, SPLIT 64 bit. */
50#define CCI_XARGS_SHIFT 14
51#define CCI_XARGS(ci) (((ci)->flags >> CCI_XARGS_SHIFT) & 3)
52#define CCI_XA (1u << CCI_XARGS_SHIFT)
53
54#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)
55#define CCI_XNARGS(ci) (CCI_NARGS((ci)) + CCI_XARGS((ci)))
56#else
57#define CCI_XNARGS(ci) CCI_NARGS((ci))
58#endif
59
48/* Helpers for conditional function definitions. */ 60/* Helpers for conditional function definitions. */
49#define IRCALLCOND_ANY(x) x 61#define IRCALLCOND_ANY(x) x
50 62
@@ -66,6 +78,18 @@ typedef struct CCallInfo {
66#define IRCALLCOND_SOFTFP_FFI(x) NULL 78#define IRCALLCOND_SOFTFP_FFI(x) NULL
67#endif 79#endif
68 80
81#if LJ_SOFTFP && LJ_TARGET_MIPS
82#define IRCALLCOND_SOFTFP_MIPS(x) x
83#else
84#define IRCALLCOND_SOFTFP_MIPS(x) NULL
85#endif
86
87#if LJ_SOFTFP && LJ_TARGET_MIPS64
88#define IRCALLCOND_SOFTFP_MIPS64(x) x
89#else
90#define IRCALLCOND_SOFTFP_MIPS64(x) NULL
91#endif
92
69#define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS) 93#define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS)
70 94
71#if LJ_HASFFI && (LJ_SOFTFP || LJ_NEED_FP64) 95#if LJ_HASFFI && (LJ_SOFTFP || LJ_NEED_FP64)
@@ -87,92 +111,135 @@ typedef struct CCallInfo {
87#endif 111#endif
88 112
89#if LJ_SOFTFP 113#if LJ_SOFTFP
90#define ARG1_FP 2 /* Treat as 2 32 bit arguments. */ 114#define XA_FP CCI_XA
115#define XA2_FP (CCI_XA+CCI_XA)
91#else 116#else
92#define ARG1_FP 1 117#define XA_FP 0
118#define XA2_FP 0
119#endif
120
121#if LJ_SOFTFP32
122#define XA_FP32 CCI_XA
123#define XA2_FP32 (CCI_XA+CCI_XA)
124#else
125#define XA_FP32 0
126#define XA2_FP32 0
93#endif 127#endif
94 128
95#if LJ_32 129#if LJ_32
96#define ARG2_64 4 /* Treat as 4 32 bit arguments. */ 130#define XA_64 CCI_XA
131#define XA2_64 (CCI_XA+CCI_XA)
97#else 132#else
98#define ARG2_64 2 133#define XA_64 0
134#define XA2_64 0
99#endif 135#endif
100 136
101/* Function definitions for CALL* instructions. */ 137/* Function definitions for CALL* instructions. */
102#define IRCALLDEF(_) \ 138#define IRCALLDEF(_) \
103 _(ANY, lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \ 139 _(ANY, lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \
140 _(ANY, lj_str_find, 4, N, PGC, 0) \
104 _(ANY, lj_str_new, 3, S, STR, CCI_L) \ 141 _(ANY, lj_str_new, 3, S, STR, CCI_L) \
105 _(ANY, lj_strscan_num, 2, FN, INT, 0) \ 142 _(ANY, lj_strscan_num, 2, FN, INT, 0) \
106 _(ANY, lj_str_fromint, 2, FN, STR, CCI_L) \ 143 _(ANY, lj_strfmt_int, 2, FN, STR, CCI_L) \
107 _(ANY, lj_str_fromnum, 2, FN, STR, CCI_L) \ 144 _(ANY, lj_strfmt_num, 2, FN, STR, CCI_L) \
145 _(ANY, lj_strfmt_char, 2, FN, STR, CCI_L) \
146 _(ANY, lj_strfmt_putint, 2, FL, PGC, 0) \
147 _(ANY, lj_strfmt_putnum, 2, FL, PGC, 0) \
148 _(ANY, lj_strfmt_putquoted, 2, FL, PGC, 0) \
149 _(ANY, lj_strfmt_putfxint, 3, L, PGC, XA_64) \
150 _(ANY, lj_strfmt_putfnum_int, 3, L, PGC, XA_FP) \
151 _(ANY, lj_strfmt_putfnum_uint, 3, L, PGC, XA_FP) \
152 _(ANY, lj_strfmt_putfnum, 3, L, PGC, XA_FP) \
153 _(ANY, lj_strfmt_putfstr, 3, L, PGC, 0) \
154 _(ANY, lj_strfmt_putfchar, 3, L, PGC, 0) \
155 _(ANY, lj_buf_putmem, 3, S, PGC, 0) \
156 _(ANY, lj_buf_putstr, 2, FL, PGC, 0) \
157 _(ANY, lj_buf_putchar, 2, FL, PGC, 0) \
158 _(ANY, lj_buf_putstr_reverse, 2, FL, PGC, 0) \
159 _(ANY, lj_buf_putstr_lower, 2, FL, PGC, 0) \
160 _(ANY, lj_buf_putstr_upper, 2, FL, PGC, 0) \
161 _(ANY, lj_buf_putstr_rep, 3, L, PGC, 0) \
162 _(ANY, lj_buf_puttab, 5, L, PGC, 0) \
163 _(ANY, lj_buf_tostr, 1, FL, STR, 0) \
164 _(ANY, lj_tab_new_ah, 3, A, TAB, CCI_L) \
108 _(ANY, lj_tab_new1, 2, FS, TAB, CCI_L) \ 165 _(ANY, lj_tab_new1, 2, FS, TAB, CCI_L) \
109 _(ANY, lj_tab_dup, 2, FS, TAB, CCI_L) \ 166 _(ANY, lj_tab_dup, 2, FS, TAB, CCI_L) \
110 _(ANY, lj_tab_newkey, 3, S, P32, CCI_L) \ 167 _(ANY, lj_tab_clear, 1, FS, NIL, 0) \
168 _(ANY, lj_tab_newkey, 3, S, PGC, CCI_L) \
111 _(ANY, lj_tab_len, 1, FL, INT, 0) \ 169 _(ANY, lj_tab_len, 1, FL, INT, 0) \
112 _(ANY, lj_gc_step_jit, 2, FS, NIL, CCI_L) \ 170 _(ANY, lj_gc_step_jit, 2, FS, NIL, CCI_L) \
113 _(ANY, lj_gc_barrieruv, 2, FS, NIL, 0) \ 171 _(ANY, lj_gc_barrieruv, 2, FS, NIL, 0) \
114 _(ANY, lj_mem_newgco, 2, FS, P32, CCI_L) \ 172 _(ANY, lj_mem_newgco, 2, FS, PGC, CCI_L) \
115 _(ANY, lj_math_random_step, 1, FS, NUM, CCI_CASTU64) \ 173 _(ANY, lj_math_random_step, 1, FS, NUM, CCI_CASTU64) \
116 _(ANY, lj_vm_modi, 2, FN, INT, 0) \ 174 _(ANY, lj_vm_modi, 2, FN, INT, 0) \
117 _(ANY, sinh, ARG1_FP, N, NUM, 0) \ 175 _(ANY, sinh, 1, N, NUM, XA_FP) \
118 _(ANY, cosh, ARG1_FP, N, NUM, 0) \ 176 _(ANY, cosh, 1, N, NUM, XA_FP) \
119 _(ANY, tanh, ARG1_FP, N, NUM, 0) \ 177 _(ANY, tanh, 1, N, NUM, XA_FP) \
120 _(ANY, fputc, 2, S, INT, 0) \ 178 _(ANY, fputc, 2, S, INT, 0) \
121 _(ANY, fwrite, 4, S, INT, 0) \ 179 _(ANY, fwrite, 4, S, INT, 0) \
122 _(ANY, fflush, 1, S, INT, 0) \ 180 _(ANY, fflush, 1, S, INT, 0) \
123 /* ORDER FPM */ \ 181 /* ORDER FPM */ \
124 _(FPMATH, lj_vm_floor, ARG1_FP, N, NUM, 0) \ 182 _(FPMATH, lj_vm_floor, 1, N, NUM, XA_FP) \
125 _(FPMATH, lj_vm_ceil, ARG1_FP, N, NUM, 0) \ 183 _(FPMATH, lj_vm_ceil, 1, N, NUM, XA_FP) \
126 _(FPMATH, lj_vm_trunc, ARG1_FP, N, NUM, 0) \ 184 _(FPMATH, lj_vm_trunc, 1, N, NUM, XA_FP) \
127 _(FPMATH, sqrt, ARG1_FP, N, NUM, 0) \ 185 _(FPMATH, sqrt, 1, N, NUM, XA_FP) \
128 _(FPMATH, exp, ARG1_FP, N, NUM, 0) \ 186 _(ANY, exp, 1, N, NUM, XA_FP) \
129 _(FPMATH, lj_vm_exp2, ARG1_FP, N, NUM, 0) \ 187 _(ANY, lj_vm_exp2, 1, N, NUM, XA_FP) \
130 _(FPMATH, log, ARG1_FP, N, NUM, 0) \ 188 _(ANY, log, 1, N, NUM, XA_FP) \
131 _(FPMATH, lj_vm_log2, ARG1_FP, N, NUM, 0) \ 189 _(ANY, lj_vm_log2, 1, N, NUM, XA_FP) \
132 _(FPMATH, log10, ARG1_FP, N, NUM, 0) \ 190 _(ANY, log10, 1, N, NUM, XA_FP) \
133 _(FPMATH, sin, ARG1_FP, N, NUM, 0) \ 191 _(ANY, sin, 1, N, NUM, XA_FP) \
134 _(FPMATH, cos, ARG1_FP, N, NUM, 0) \ 192 _(ANY, cos, 1, N, NUM, XA_FP) \
135 _(FPMATH, tan, ARG1_FP, N, NUM, 0) \ 193 _(ANY, tan, 1, N, NUM, XA_FP) \
136 _(FPMATH, lj_vm_powi, ARG1_FP+1, N, NUM, 0) \ 194 _(ANY, lj_vm_powi, 2, N, NUM, XA_FP) \
137 _(FPMATH, pow, ARG1_FP*2, N, NUM, 0) \ 195 _(ANY, pow, 2, N, NUM, XA2_FP) \
138 _(FPMATH, atan2, ARG1_FP*2, N, NUM, 0) \ 196 _(ANY, atan2, 2, N, NUM, XA2_FP) \
139 _(FPMATH, ldexp, ARG1_FP+1, N, NUM, 0) \ 197 _(ANY, ldexp, 2, N, NUM, XA_FP) \
140 _(SOFTFP, lj_vm_tobit, 2, N, INT, 0) \ 198 _(SOFTFP, lj_vm_tobit, 1, N, INT, XA_FP32) \
141 _(SOFTFP, softfp_add, 4, N, NUM, 0) \ 199 _(SOFTFP, softfp_add, 2, N, NUM, XA2_FP32) \
142 _(SOFTFP, softfp_sub, 4, N, NUM, 0) \ 200 _(SOFTFP, softfp_sub, 2, N, NUM, XA2_FP32) \
143 _(SOFTFP, softfp_mul, 4, N, NUM, 0) \ 201 _(SOFTFP, softfp_mul, 2, N, NUM, XA2_FP32) \
144 _(SOFTFP, softfp_div, 4, N, NUM, 0) \ 202 _(SOFTFP, softfp_div, 2, N, NUM, XA2_FP32) \
145 _(SOFTFP, softfp_cmp, 4, N, NIL, 0) \ 203 _(SOFTFP, softfp_cmp, 2, N, NIL, XA2_FP32) \
146 _(SOFTFP, softfp_i2d, 1, N, NUM, 0) \ 204 _(SOFTFP, softfp_i2d, 1, N, NUM, 0) \
147 _(SOFTFP, softfp_d2i, 2, N, INT, 0) \ 205 _(SOFTFP, softfp_d2i, 1, N, INT, XA_FP32) \
206 _(SOFTFP_MIPS, lj_vm_sfmin, 2, N, NUM, XA2_FP32) \
207 _(SOFTFP_MIPS, lj_vm_sfmax, 2, N, NUM, XA2_FP32) \
208 _(SOFTFP_MIPS64, lj_vm_tointg, 1, N, INT, 0) \
148 _(SOFTFP_FFI, softfp_ui2d, 1, N, NUM, 0) \ 209 _(SOFTFP_FFI, softfp_ui2d, 1, N, NUM, 0) \
149 _(SOFTFP_FFI, softfp_f2d, 1, N, NUM, 0) \ 210 _(SOFTFP_FFI, softfp_f2d, 1, N, NUM, 0) \
150 _(SOFTFP_FFI, softfp_d2ui, 2, N, INT, 0) \ 211 _(SOFTFP_FFI, softfp_d2ui, 1, N, INT, XA_FP32) \
151 _(SOFTFP_FFI, softfp_d2f, 2, N, FLOAT, 0) \ 212 _(SOFTFP_FFI, softfp_d2f, 1, N, FLOAT, XA_FP32) \
152 _(SOFTFP_FFI, softfp_i2f, 1, N, FLOAT, 0) \ 213 _(SOFTFP_FFI, softfp_i2f, 1, N, FLOAT, 0) \
153 _(SOFTFP_FFI, softfp_ui2f, 1, N, FLOAT, 0) \ 214 _(SOFTFP_FFI, softfp_ui2f, 1, N, FLOAT, 0) \
154 _(SOFTFP_FFI, softfp_f2i, 1, N, INT, 0) \ 215 _(SOFTFP_FFI, softfp_f2i, 1, N, INT, 0) \
155 _(SOFTFP_FFI, softfp_f2ui, 1, N, INT, 0) \ 216 _(SOFTFP_FFI, softfp_f2ui, 1, N, INT, 0) \
156 _(FP64_FFI, fp64_l2d, 2, N, NUM, 0) \ 217 _(FP64_FFI, fp64_l2d, 1, N, NUM, XA_64) \
157 _(FP64_FFI, fp64_ul2d, 2, N, NUM, 0) \ 218 _(FP64_FFI, fp64_ul2d, 1, N, NUM, XA_64) \
158 _(FP64_FFI, fp64_l2f, 2, N, FLOAT, 0) \ 219 _(FP64_FFI, fp64_l2f, 1, N, FLOAT, XA_64) \
159 _(FP64_FFI, fp64_ul2f, 2, N, FLOAT, 0) \ 220 _(FP64_FFI, fp64_ul2f, 1, N, FLOAT, XA_64) \
160 _(FP64_FFI, fp64_d2l, ARG1_FP, N, I64, 0) \ 221 _(FP64_FFI, fp64_d2l, 1, N, I64, XA_FP) \
161 _(FP64_FFI, fp64_d2ul, ARG1_FP, N, U64, 0) \ 222 _(FP64_FFI, fp64_d2ul, 1, N, U64, XA_FP) \
162 _(FP64_FFI, fp64_f2l, 1, N, I64, 0) \ 223 _(FP64_FFI, fp64_f2l, 1, N, I64, 0) \
163 _(FP64_FFI, fp64_f2ul, 1, N, U64, 0) \ 224 _(FP64_FFI, fp64_f2ul, 1, N, U64, 0) \
164 _(FFI, lj_carith_divi64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) \ 225 _(FFI, lj_carith_divi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \
165 _(FFI, lj_carith_divu64, ARG2_64, N, U64, CCI_NOFPRCLOBBER) \ 226 _(FFI, lj_carith_divu64, 2, N, U64, XA2_64|CCI_NOFPRCLOBBER) \
166 _(FFI, lj_carith_modi64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) \ 227 _(FFI, lj_carith_modi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \
167 _(FFI, lj_carith_modu64, ARG2_64, N, U64, CCI_NOFPRCLOBBER) \ 228 _(FFI, lj_carith_modu64, 2, N, U64, XA2_64|CCI_NOFPRCLOBBER) \
168 _(FFI, lj_carith_powi64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) \ 229 _(FFI, lj_carith_powi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \
169 _(FFI, lj_carith_powu64, ARG2_64, N, U64, CCI_NOFPRCLOBBER) \ 230 _(FFI, lj_carith_powu64, 2, N, U64, XA2_64|CCI_NOFPRCLOBBER) \
170 _(FFI, lj_cdata_setfin, 2, FN, P32, CCI_L) \ 231 _(FFI, lj_cdata_newv, 4, S, CDATA, CCI_L) \
171 _(FFI, strlen, 1, L, INTP, 0) \ 232 _(FFI, lj_cdata_setfin, 4, S, NIL, CCI_L) \
172 _(FFI, memcpy, 3, S, PTR, 0) \ 233 _(FFI, strlen, 1, L, INTP, 0) \
173 _(FFI, memset, 3, S, PTR, 0) \ 234 _(FFI, memcpy, 3, S, PTR, 0) \
174 _(FFI, lj_vm_errno, 0, S, INT, CCI_NOFPRCLOBBER) \ 235 _(FFI, memset, 3, S, PTR, 0) \
175 _(FFI32, lj_carith_mul64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) 236 _(FFI, lj_vm_errno, 0, S, INT, CCI_NOFPRCLOBBER) \
237 _(FFI32, lj_carith_mul64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \
238 _(FFI32, lj_carith_shl64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \
239 _(FFI32, lj_carith_shr64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \
240 _(FFI32, lj_carith_sar64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \
241 _(FFI32, lj_carith_rol64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \
242 _(FFI32, lj_carith_ror64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \
176 \ 243 \
177 /* End of list. */ 244 /* End of list. */
178 245
@@ -220,6 +287,22 @@ LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1];
220#define fp64_f2l __aeabi_f2lz 287#define fp64_f2l __aeabi_f2lz
221#define fp64_f2ul __aeabi_f2ulz 288#define fp64_f2ul __aeabi_f2ulz
222#endif 289#endif
290#elif LJ_TARGET_MIPS || LJ_TARGET_PPC
291#define softfp_add __adddf3
292#define softfp_sub __subdf3
293#define softfp_mul __muldf3
294#define softfp_div __divdf3
295#define softfp_cmp __ledf2
296#define softfp_i2d __floatsidf
297#define softfp_d2i __fixdfsi
298#define softfp_ui2d __floatunsidf
299#define softfp_f2d __extendsfdf2
300#define softfp_d2ui __fixunsdfsi
301#define softfp_d2f __truncdfsf2
302#define softfp_i2f __floatsisf
303#define softfp_ui2f __floatunsisf
304#define softfp_f2i __fixsfsi
305#define softfp_f2ui __fixunssfsi
223#else 306#else
224#error "Missing soft-float definitions for target architecture" 307#error "Missing soft-float definitions for target architecture"
225#endif 308#endif
@@ -240,6 +323,10 @@ extern float softfp_ui2f(uint32_t a);
240extern int32_t softfp_f2i(float a); 323extern int32_t softfp_f2i(float a);
241extern uint32_t softfp_f2ui(float a); 324extern uint32_t softfp_f2ui(float a);
242#endif 325#endif
326#if LJ_TARGET_MIPS
327extern double lj_vm_sfmin(double a, double b);
328extern double lj_vm_sfmax(double a, double b);
329#endif
243#endif 330#endif
244 331
245#if LJ_HASFFI && LJ_NEED_FP64 && !(LJ_TARGET_ARM && LJ_SOFTFP) 332#if LJ_HASFFI && LJ_NEED_FP64 && !(LJ_TARGET_ARM && LJ_SOFTFP)
diff --git a/src/lj_iropt.h b/src/lj_iropt.h
index cf5b4d1f..02d6b946 100644
--- a/src/lj_iropt.h
+++ b/src/lj_iropt.h
@@ -36,11 +36,11 @@ static LJ_AINLINE IRRef lj_ir_nextins(jit_State *J)
36 return ref; 36 return ref;
37} 37}
38 38
39LJ_FUNC TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs);
40
39/* Interning of constants. */ 41/* Interning of constants. */
40LJ_FUNC TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k); 42LJ_FUNC TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k);
41LJ_FUNC void lj_ir_k64_freeall(jit_State *J); 43LJ_FUNC TRef lj_ir_k64(jit_State *J, IROp op, uint64_t u64);
42LJ_FUNC TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv);
43LJ_FUNC cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64);
44LJ_FUNC TRef lj_ir_knum_u64(jit_State *J, uint64_t u64); 44LJ_FUNC TRef lj_ir_knum_u64(jit_State *J, uint64_t u64);
45LJ_FUNC TRef lj_ir_knumint(jit_State *J, lua_Number n); 45LJ_FUNC TRef lj_ir_knumint(jit_State *J, lua_Number n);
46LJ_FUNC TRef lj_ir_kint64(jit_State *J, uint64_t u64); 46LJ_FUNC TRef lj_ir_kint64(jit_State *J, uint64_t u64);
@@ -48,6 +48,7 @@ LJ_FUNC TRef lj_ir_kgc(jit_State *J, GCobj *o, IRType t);
48LJ_FUNC TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr); 48LJ_FUNC TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr);
49LJ_FUNC TRef lj_ir_knull(jit_State *J, IRType t); 49LJ_FUNC TRef lj_ir_knull(jit_State *J, IRType t);
50LJ_FUNC TRef lj_ir_kslot(jit_State *J, TRef key, IRRef slot); 50LJ_FUNC TRef lj_ir_kslot(jit_State *J, TRef key, IRRef slot);
51LJ_FUNC TRef lj_ir_ktrace(jit_State *J);
51 52
52#if LJ_64 53#if LJ_64
53#define lj_ir_kintp(J, k) lj_ir_kint64(J, (uint64_t)(k)) 54#define lj_ir_kintp(J, k) lj_ir_kint64(J, (uint64_t)(k))
@@ -74,8 +75,8 @@ static LJ_AINLINE TRef lj_ir_knum(jit_State *J, lua_Number n)
74#define lj_ir_knum_tobit(J) lj_ir_knum_u64(J, U64x(43380000,00000000)) 75#define lj_ir_knum_tobit(J) lj_ir_knum_u64(J, U64x(43380000,00000000))
75 76
76/* Special 128 bit SIMD constants. */ 77/* Special 128 bit SIMD constants. */
77#define lj_ir_knum_abs(J) lj_ir_k64(J, IR_KNUM, LJ_KSIMD(J, LJ_KSIMD_ABS)) 78#define lj_ir_ksimd(J, idx) \
78#define lj_ir_knum_neg(J) lj_ir_k64(J, IR_KNUM, LJ_KSIMD(J, LJ_KSIMD_NEG)) 79 lj_ir_ggfload(J, IRT_NUM, (uintptr_t)LJ_KSIMD(J, idx) - (uintptr_t)J2GG(J))
79 80
80/* Access to constants. */ 81/* Access to constants. */
81LJ_FUNC void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir); 82LJ_FUNC void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir);
@@ -149,7 +150,7 @@ LJ_FUNC IRType lj_opt_narrow_forl(jit_State *J, cTValue *forbase);
149/* Optimization passes. */ 150/* Optimization passes. */
150LJ_FUNC void lj_opt_dce(jit_State *J); 151LJ_FUNC void lj_opt_dce(jit_State *J);
151LJ_FUNC int lj_opt_loop(jit_State *J); 152LJ_FUNC int lj_opt_loop(jit_State *J);
152#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) 153#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)
153LJ_FUNC void lj_opt_split(jit_State *J); 154LJ_FUNC void lj_opt_split(jit_State *J);
154#else 155#else
155#define lj_opt_split(J) UNUSED(J) 156#define lj_opt_split(J) UNUSED(J)
diff --git a/src/lj_jit.h b/src/lj_jit.h
index 0e1c4827..f179f17f 100644
--- a/src/lj_jit.h
+++ b/src/lj_jit.h
@@ -14,18 +14,16 @@
14 14
15/* CPU-specific JIT engine flags. */ 15/* CPU-specific JIT engine flags. */
16#if LJ_TARGET_X86ORX64 16#if LJ_TARGET_X86ORX64
17#define JIT_F_CMOV 0x00000010 17#define JIT_F_SSE2 0x00000010
18#define JIT_F_SSE2 0x00000020 18#define JIT_F_SSE3 0x00000020
19#define JIT_F_SSE3 0x00000040 19#define JIT_F_SSE4_1 0x00000040
20#define JIT_F_SSE4_1 0x00000080 20#define JIT_F_PREFER_IMUL 0x00000080
21#define JIT_F_P4 0x00000100 21#define JIT_F_LEA_AGU 0x00000100
22#define JIT_F_PREFER_IMUL 0x00000200 22#define JIT_F_BMI2 0x00000200
23#define JIT_F_SPLIT_XMM 0x00000400
24#define JIT_F_LEA_AGU 0x00000800
25 23
26/* Names for the CPU-specific flags. Must match the order above. */ 24/* Names for the CPU-specific flags. Must match the order above. */
27#define JIT_F_CPU_FIRST JIT_F_CMOV 25#define JIT_F_CPU_FIRST JIT_F_SSE2
28#define JIT_F_CPUSTRING "\4CMOV\4SSE2\4SSE3\6SSE4.1\2P4\3AMD\2K8\4ATOM" 26#define JIT_F_CPUSTRING "\4SSE2\4SSE3\6SSE4.1\3AMD\4ATOM\4BMI2"
29#elif LJ_TARGET_ARM 27#elif LJ_TARGET_ARM
30#define JIT_F_ARMV6_ 0x00000010 28#define JIT_F_ARMV6_ 0x00000010
31#define JIT_F_ARMV6T2_ 0x00000020 29#define JIT_F_ARMV6T2_ 0x00000020
@@ -48,11 +46,23 @@
48#define JIT_F_CPU_FIRST JIT_F_SQRT 46#define JIT_F_CPU_FIRST JIT_F_SQRT
49#define JIT_F_CPUSTRING "\4SQRT\5ROUND" 47#define JIT_F_CPUSTRING "\4SQRT\5ROUND"
50#elif LJ_TARGET_MIPS 48#elif LJ_TARGET_MIPS
51#define JIT_F_MIPS32R2 0x00000010 49#define JIT_F_MIPSXXR2 0x00000010
52 50
53/* Names for the CPU-specific flags. Must match the order above. */ 51/* Names for the CPU-specific flags. Must match the order above. */
54#define JIT_F_CPU_FIRST JIT_F_MIPS32R2 52#define JIT_F_CPU_FIRST JIT_F_MIPSXXR2
53#if LJ_TARGET_MIPS32
54#if LJ_TARGET_MIPSR6
55#define JIT_F_CPUSTRING "\010MIPS32R6"
56#else
55#define JIT_F_CPUSTRING "\010MIPS32R2" 57#define JIT_F_CPUSTRING "\010MIPS32R2"
58#endif
59#else
60#if LJ_TARGET_MIPSR6
61#define JIT_F_CPUSTRING "\010MIPS64R6"
62#else
63#define JIT_F_CPUSTRING "\010MIPS64R2"
64#endif
65#endif
56#else 66#else
57#define JIT_F_CPU_FIRST 0 67#define JIT_F_CPU_FIRST 0
58#define JIT_F_CPUSTRING "" 68#define JIT_F_CPUSTRING ""
@@ -100,6 +110,7 @@
100 _(\012, maxirconst, 500) /* Max. # of IR constants of a trace. */ \ 110 _(\012, maxirconst, 500) /* Max. # of IR constants of a trace. */ \
101 _(\007, maxside, 100) /* Max. # of side traces of a root trace. */ \ 111 _(\007, maxside, 100) /* Max. # of side traces of a root trace. */ \
102 _(\007, maxsnap, 500) /* Max. # of snapshots for a trace. */ \ 112 _(\007, maxsnap, 500) /* Max. # of snapshots for a trace. */ \
113 _(\011, minstitch, 0) /* Min. # of IR ins for a stitched trace. */ \
103 \ 114 \
104 _(\007, hotloop, 56) /* # of iter. to detect a hot loop/call. */ \ 115 _(\007, hotloop, 56) /* # of iter. to detect a hot loop/call. */ \
105 _(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \ 116 _(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \
@@ -186,14 +197,26 @@ LJ_STATIC_ASSERT(SNAP_CONT == TREF_CONT);
186#define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref)) 197#define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref))
187#define SNAP_TR(slot, tr) \ 198#define SNAP_TR(slot, tr) \
188 (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK))) 199 (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK)))
200#if !LJ_FR2
189#define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc)) 201#define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc))
202#endif
190#define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz)) 203#define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz))
191#define snap_ref(sn) ((sn) & 0xffff) 204#define snap_ref(sn) ((sn) & 0xffff)
192#define snap_slot(sn) ((BCReg)((sn) >> 24)) 205#define snap_slot(sn) ((BCReg)((sn) >> 24))
193#define snap_isframe(sn) ((sn) & SNAP_FRAME) 206#define snap_isframe(sn) ((sn) & SNAP_FRAME)
194#define snap_pc(sn) ((const BCIns *)(uintptr_t)(sn))
195#define snap_setref(sn, ref) (((sn) & (0xffff0000&~SNAP_NORESTORE)) | (ref)) 207#define snap_setref(sn, ref) (((sn) & (0xffff0000&~SNAP_NORESTORE)) | (ref))
196 208
209static LJ_AINLINE const BCIns *snap_pc(SnapEntry *sn)
210{
211#if LJ_FR2
212 uint64_t pcbase;
213 memcpy(&pcbase, sn, sizeof(uint64_t));
214 return (const BCIns *)(pcbase >> 8);
215#else
216 return (const BCIns *)(uintptr_t)*sn;
217#endif
218}
219
197/* Snapshot and exit numbers. */ 220/* Snapshot and exit numbers. */
198typedef uint32_t SnapNo; 221typedef uint32_t SnapNo;
199typedef uint32_t ExitNo; 222typedef uint32_t ExitNo;
@@ -211,7 +234,8 @@ typedef enum {
211 LJ_TRLINK_UPREC, /* Up-recursion. */ 234 LJ_TRLINK_UPREC, /* Up-recursion. */
212 LJ_TRLINK_DOWNREC, /* Down-recursion. */ 235 LJ_TRLINK_DOWNREC, /* Down-recursion. */
213 LJ_TRLINK_INTERP, /* Fallback to interpreter. */ 236 LJ_TRLINK_INTERP, /* Fallback to interpreter. */
214 LJ_TRLINK_RETURN /* Return to interpreter. */ 237 LJ_TRLINK_RETURN, /* Return to interpreter. */
238 LJ_TRLINK_STITCH /* Trace stitching. */
215} TraceLink; 239} TraceLink;
216 240
217/* Trace object. */ 241/* Trace object. */
@@ -219,6 +243,9 @@ typedef struct GCtrace {
219 GCHeader; 243 GCHeader;
220 uint16_t nsnap; /* Number of snapshots. */ 244 uint16_t nsnap; /* Number of snapshots. */
221 IRRef nins; /* Next IR instruction. Biased with REF_BIAS. */ 245 IRRef nins; /* Next IR instruction. Biased with REF_BIAS. */
246#if LJ_GC64
247 uint32_t unused_gc64;
248#endif
222 GCRef gclist; 249 GCRef gclist;
223 IRIns *ir; /* IR instructions/constants. Biased with REF_BIAS. */ 250 IRIns *ir; /* IR instructions/constants. Biased with REF_BIAS. */
224 IRRef nk; /* Lowest IR constant. Biased with REF_BIAS. */ 251 IRRef nk; /* Lowest IR constant. Biased with REF_BIAS. */
@@ -294,6 +321,16 @@ typedef struct ScEvEntry {
294 uint8_t dir; /* Direction. 1: +, 0: -. */ 321 uint8_t dir; /* Direction. 1: +, 0: -. */
295} ScEvEntry; 322} ScEvEntry;
296 323
324/* Reverse bytecode map (IRRef -> PC). Only for selected instructions. */
325typedef struct RBCHashEntry {
326 MRef pc; /* Bytecode PC. */
327 GCRef pt; /* Prototype. */
328 IRRef ref; /* IR reference. */
329} RBCHashEntry;
330
331/* Number of slots in the reverse bytecode hash table. Must be a power of 2. */
332#define RBCHASH_SLOTS 8
333
297/* 128 bit SIMD constants. */ 334/* 128 bit SIMD constants. */
298enum { 335enum {
299 LJ_KSIMD_ABS, 336 LJ_KSIMD_ABS,
@@ -301,12 +338,51 @@ enum {
301 LJ_KSIMD__MAX 338 LJ_KSIMD__MAX
302}; 339};
303 340
341enum {
342#if LJ_TARGET_X86ORX64
343 LJ_K64_TOBIT, /* 2^52 + 2^51 */
344 LJ_K64_2P64, /* 2^64 */
345 LJ_K64_M2P64, /* -2^64 */
346#if LJ_32
347 LJ_K64_M2P64_31, /* -2^64 or -2^31 */
348#else
349 LJ_K64_M2P64_31 = LJ_K64_M2P64,
350#endif
351#endif
352#if LJ_TARGET_MIPS
353 LJ_K64_2P31, /* 2^31 */
354#if LJ_64
355 LJ_K64_2P63, /* 2^63 */
356 LJ_K64_M2P64, /* -2^64 */
357#endif
358#endif
359 LJ_K64__MAX,
360};
361
362enum {
363#if LJ_TARGET_X86ORX64
364 LJ_K32_M2P64_31, /* -2^64 or -2^31 */
365#endif
366#if LJ_TARGET_PPC
367 LJ_K32_2P52_2P31, /* 2^52 + 2^31 */
368 LJ_K32_2P52, /* 2^52 */
369#endif
370#if LJ_TARGET_PPC || LJ_TARGET_MIPS
371 LJ_K32_2P31, /* 2^31 */
372#endif
373#if LJ_TARGET_MIPS64
374 LJ_K32_2P63, /* 2^63 */
375 LJ_K32_M2P64, /* -2^64 */
376#endif
377 LJ_K32__MAX
378};
379
304/* Get 16 byte aligned pointer to SIMD constant. */ 380/* Get 16 byte aligned pointer to SIMD constant. */
305#define LJ_KSIMD(J, n) \ 381#define LJ_KSIMD(J, n) \
306 ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15)) 382 ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15))
307 383
308/* Set/reset flag to activate the SPLIT pass for the current trace. */ 384/* Set/reset flag to activate the SPLIT pass for the current trace. */
309#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) 385#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)
310#define lj_needsplit(J) (J->needsplit = 1) 386#define lj_needsplit(J) (J->needsplit = 1)
311#define lj_resetsplit(J) (J->needsplit = 0) 387#define lj_resetsplit(J) (J->needsplit = 0)
312#else 388#else
@@ -317,13 +393,14 @@ enum {
317/* Fold state is used to fold instructions on-the-fly. */ 393/* Fold state is used to fold instructions on-the-fly. */
318typedef struct FoldState { 394typedef struct FoldState {
319 IRIns ins; /* Currently emitted instruction. */ 395 IRIns ins; /* Currently emitted instruction. */
320 IRIns left; /* Instruction referenced by left operand. */ 396 IRIns left[2]; /* Instruction referenced by left operand. */
321 IRIns right; /* Instruction referenced by right operand. */ 397 IRIns right[2]; /* Instruction referenced by right operand. */
322} FoldState; 398} FoldState;
323 399
324/* JIT compiler state. */ 400/* JIT compiler state. */
325typedef struct jit_State { 401typedef struct jit_State {
326 GCtrace cur; /* Current trace. */ 402 GCtrace cur; /* Current trace. */
403 GCtrace *curfinal; /* Final address of current trace (set during asm). */
327 404
328 lua_State *L; /* Current Lua state. */ 405 lua_State *L; /* Current Lua state. */
329 const BCIns *pc; /* Current PC. */ 406 const BCIns *pc; /* Current PC. */
@@ -353,8 +430,9 @@ typedef struct jit_State {
353 int32_t framedepth; /* Current frame depth. */ 430 int32_t framedepth; /* Current frame depth. */
354 int32_t retdepth; /* Return frame depth (count of RETF). */ 431 int32_t retdepth; /* Return frame depth (count of RETF). */
355 432
356 MRef k64; /* Pointer to chained array of 64 bit constants. */
357 TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */ 433 TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */
434 TValue k64[LJ_K64__MAX]; /* Common 8 byte constants used by backends. */
435 uint32_t k32[LJ_K32__MAX]; /* Ditto for 4 byte constants. */
358 436
359 IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */ 437 IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */
360 IRRef irtoplim; /* Upper limit of instuction buffer (biased). */ 438 IRRef irtoplim; /* Upper limit of instuction buffer (biased). */
@@ -367,13 +445,15 @@ typedef struct jit_State {
367 MSize sizesnapmap; /* Size of temp. snapshot map buffer. */ 445 MSize sizesnapmap; /* Size of temp. snapshot map buffer. */
368 446
369 PostProc postproc; /* Required post-processing after execution. */ 447 PostProc postproc; /* Required post-processing after execution. */
370#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) 448#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)
371 int needsplit; /* Need SPLIT pass. */ 449 uint8_t needsplit; /* Need SPLIT pass. */
372#endif 450#endif
451 uint8_t retryrec; /* Retry recording. */
373 452
374 GCRef *trace; /* Array of traces. */ 453 GCRef *trace; /* Array of traces. */
375 TraceNo freetrace; /* Start of scan for next free trace. */ 454 TraceNo freetrace; /* Start of scan for next free trace. */
376 MSize sizetrace; /* Size of trace array. */ 455 MSize sizetrace; /* Size of trace array. */
456 IRRef1 ktrace; /* Reference to KGC with GCtrace. */
377 457
378 IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */ 458 IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */
379 TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */ 459 TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */
@@ -386,6 +466,10 @@ typedef struct jit_State {
386 uint32_t penaltyslot; /* Round-robin index into penalty slots. */ 466 uint32_t penaltyslot; /* Round-robin index into penalty slots. */
387 uint32_t prngstate; /* PRNG state. */ 467 uint32_t prngstate; /* PRNG state. */
388 468
469#ifdef LUAJIT_ENABLE_TABLE_BUMP
470 RBCHashEntry rbchash[RBCHASH_SLOTS]; /* Reverse bytecode map. */
471#endif
472
389 BPropEntry bpropcache[BPROP_SLOTS]; /* Backpropagation cache slots. */ 473 BPropEntry bpropcache[BPROP_SLOTS]; /* Backpropagation cache slots. */
390 uint32_t bpropslot; /* Round-robin index into bpropcache slots. */ 474 uint32_t bpropslot; /* Round-robin index into bpropcache slots. */
391 475
@@ -406,6 +490,12 @@ typedef struct jit_State {
406 size_t szallmcarea; /* Total size of all allocated mcode areas. */ 490 size_t szallmcarea; /* Total size of all allocated mcode areas. */
407 491
408 TValue errinfo; /* Additional info element for trace errors. */ 492 TValue errinfo; /* Additional info element for trace errors. */
493
494#if LJ_HASPROFILE
495 GCproto *prev_pt; /* Previous prototype. */
496 BCLine prev_line; /* Previous line. */
497 int prof_mode; /* Profiling mode: 0, 'f', 'l'. */
498#endif
409} 499}
410#if LJ_TARGET_ARM 500#if LJ_TARGET_ARM
411LJ_ALIGN(16) /* For DISPATCH-relative addresses in assembler part. */ 501LJ_ALIGN(16) /* For DISPATCH-relative addresses in assembler part. */
diff --git a/src/lj_lex.c b/src/lj_lex.c
index ca942583..05a2efc3 100644
--- a/src/lj_lex.c
+++ b/src/lj_lex.c
@@ -12,6 +12,7 @@
12#include "lj_obj.h" 12#include "lj_obj.h"
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_buf.h"
15#include "lj_str.h" 16#include "lj_str.h"
16#if LJ_HASFFI 17#if LJ_HASFFI
17#include "lj_tab.h" 18#include "lj_tab.h"
@@ -24,6 +25,7 @@
24#include "lj_parse.h" 25#include "lj_parse.h"
25#include "lj_char.h" 26#include "lj_char.h"
26#include "lj_strscan.h" 27#include "lj_strscan.h"
28#include "lj_strfmt.h"
27 29
28/* Lua lexer token names. */ 30/* Lua lexer token names. */
29static const char *const tokennames[] = { 31static const char *const tokennames[] = {
@@ -37,54 +39,54 @@ TKDEF(TKSTR1, TKSTR2)
37 39
38/* -- Buffer handling ----------------------------------------------------- */ 40/* -- Buffer handling ----------------------------------------------------- */
39 41
40#define char2int(c) ((int)(uint8_t)(c)) 42#define LEX_EOF (-1)
41#define next(ls) \ 43#define lex_iseol(ls) (ls->c == '\n' || ls->c == '\r')
42 (ls->current = (ls->n--) > 0 ? char2int(*ls->p++) : fillbuf(ls))
43#define save_and_next(ls) (save(ls, ls->current), next(ls))
44#define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r')
45#define END_OF_STREAM (-1)
46 44
47static int fillbuf(LexState *ls) 45/* Get more input from reader. */
46static LJ_NOINLINE LexChar lex_more(LexState *ls)
48{ 47{
49 size_t sz; 48 size_t sz;
50 const char *buf = ls->rfunc(ls->L, ls->rdata, &sz); 49 const char *p = ls->rfunc(ls->L, ls->rdata, &sz);
51 if (buf == NULL || sz == 0) return END_OF_STREAM; 50 if (p == NULL || sz == 0) return LEX_EOF;
52 if (sz >= LJ_MAX_MEM) { 51 if (sz >= LJ_MAX_BUF) {
53 if (sz != ~(size_t)0) lj_err_mem(ls->L); 52 if (sz != ~(size_t)0) lj_err_mem(ls->L);
53 sz = ~(uintptr_t)0 - (uintptr_t)p;
54 if (sz >= LJ_MAX_BUF) sz = LJ_MAX_BUF-1;
54 ls->endmark = 1; 55 ls->endmark = 1;
55 } 56 }
56 ls->n = (MSize)sz - 1; 57 ls->pe = p + sz;
57 ls->p = buf; 58 ls->p = p + 1;
58 return char2int(*(ls->p++)); 59 return (LexChar)(uint8_t)p[0];
59} 60}
60 61
61static LJ_NOINLINE void save_grow(LexState *ls, int c) 62/* Get next character. */
63static LJ_AINLINE LexChar lex_next(LexState *ls)
62{ 64{
63 MSize newsize; 65 return (ls->c = ls->p < ls->pe ? (LexChar)(uint8_t)*ls->p++ : lex_more(ls));
64 if (ls->sb.sz >= LJ_MAX_STR/2)
65 lj_lex_error(ls, 0, LJ_ERR_XELEM);
66 newsize = ls->sb.sz * 2;
67 lj_str_resizebuf(ls->L, &ls->sb, newsize);
68 ls->sb.buf[ls->sb.n++] = (char)c;
69} 66}
70 67
71static LJ_AINLINE void save(LexState *ls, int c) 68/* Save character. */
69static LJ_AINLINE void lex_save(LexState *ls, LexChar c)
72{ 70{
73 if (LJ_UNLIKELY(ls->sb.n + 1 > ls->sb.sz)) 71 lj_buf_putb(&ls->sb, c);
74 save_grow(ls, c);
75 else
76 ls->sb.buf[ls->sb.n++] = (char)c;
77} 72}
78 73
79static void inclinenumber(LexState *ls) 74/* Save previous character and get next character. */
75static LJ_AINLINE LexChar lex_savenext(LexState *ls)
80{ 76{
81 int old = ls->current; 77 lex_save(ls, ls->c);
82 lua_assert(currIsNewline(ls)); 78 return lex_next(ls);
83 next(ls); /* skip `\n' or `\r' */ 79}
84 if (currIsNewline(ls) && ls->current != old) 80
85 next(ls); /* skip `\n\r' or `\r\n' */ 81/* Skip line break. Handles "\n", "\r", "\r\n" or "\n\r". */
82static void lex_newline(LexState *ls)
83{
84 LexChar old = ls->c;
85 lua_assert(lex_iseol(ls));
86 lex_next(ls); /* Skip "\n" or "\r". */
87 if (lex_iseol(ls) && ls->c != old) lex_next(ls); /* Skip "\n\r" or "\r\n". */
86 if (++ls->linenumber >= LJ_MAX_LINE) 88 if (++ls->linenumber >= LJ_MAX_LINE)
87 lj_lex_error(ls, ls->token, LJ_ERR_XLINES); 89 lj_lex_error(ls, ls->tok, LJ_ERR_XLINES);
88} 90}
89 91
90/* -- Scanner for terminals ----------------------------------------------- */ 92/* -- Scanner for terminals ----------------------------------------------- */
@@ -93,19 +95,17 @@ static void inclinenumber(LexState *ls)
93static void lex_number(LexState *ls, TValue *tv) 95static void lex_number(LexState *ls, TValue *tv)
94{ 96{
95 StrScanFmt fmt; 97 StrScanFmt fmt;
96 int c, xp = 'e'; 98 LexChar c, xp = 'e';
97 lua_assert(lj_char_isdigit(ls->current)); 99 lua_assert(lj_char_isdigit(ls->c));
98 if ((c = ls->current) == '0') { 100 if ((c = ls->c) == '0' && (lex_savenext(ls) | 0x20) == 'x')
99 save_and_next(ls); 101 xp = 'p';
100 if ((ls->current | 0x20) == 'x') xp = 'p'; 102 while (lj_char_isident(ls->c) || ls->c == '.' ||
101 } 103 ((ls->c == '-' || ls->c == '+') && (c | 0x20) == xp)) {
102 while (lj_char_isident(ls->current) || ls->current == '.' || 104 c = ls->c;
103 ((ls->current == '-' || ls->current == '+') && (c | 0x20) == xp)) { 105 lex_savenext(ls);
104 c = ls->current;
105 save_and_next(ls);
106 } 106 }
107 save(ls, '\0'); 107 lex_save(ls, '\0');
108 fmt = lj_strscan_scan((const uint8_t *)ls->sb.buf, tv, 108 fmt = lj_strscan_scan((const uint8_t *)sbufB(&ls->sb), tv,
109 (LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) | 109 (LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) |
110 (LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0)); 110 (LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0));
111 if (LJ_DUALNUM && fmt == STRSCAN_INT) { 111 if (LJ_DUALNUM && fmt == STRSCAN_INT) {
@@ -138,60 +138,60 @@ static void lex_number(LexState *ls, TValue *tv)
138 } 138 }
139} 139}
140 140
141static int skip_sep(LexState *ls) 141/* Skip equal signs for "[=...=[" and "]=...=]" and return their count. */
142static int lex_skipeq(LexState *ls)
142{ 143{
143 int count = 0; 144 int count = 0;
144 int s = ls->current; 145 LexChar s = ls->c;
145 lua_assert(s == '[' || s == ']'); 146 lua_assert(s == '[' || s == ']');
146 save_and_next(ls); 147 while (lex_savenext(ls) == '=' && count < 0x20000000)
147 while (ls->current == '=' && count < 0x20000000) {
148 save_and_next(ls);
149 count++; 148 count++;
150 } 149 return (ls->c == s) ? count : (-count) - 1;
151 return (ls->current == s) ? count : (-count) - 1;
152} 150}
153 151
154static void read_long_string(LexState *ls, TValue *tv, int sep) 152/* Parse a long string or long comment (tv set to NULL). */
153static void lex_longstring(LexState *ls, TValue *tv, int sep)
155{ 154{
156 save_and_next(ls); /* skip 2nd `[' */ 155 lex_savenext(ls); /* Skip second '['. */
157 if (currIsNewline(ls)) /* string starts with a newline? */ 156 if (lex_iseol(ls)) /* Skip initial newline. */
158 inclinenumber(ls); /* skip it */ 157 lex_newline(ls);
159 for (;;) { 158 for (;;) {
160 switch (ls->current) { 159 switch (ls->c) {
161 case END_OF_STREAM: 160 case LEX_EOF:
162 lj_lex_error(ls, TK_eof, tv ? LJ_ERR_XLSTR : LJ_ERR_XLCOM); 161 lj_lex_error(ls, TK_eof, tv ? LJ_ERR_XLSTR : LJ_ERR_XLCOM);
163 break; 162 break;
164 case ']': 163 case ']':
165 if (skip_sep(ls) == sep) { 164 if (lex_skipeq(ls) == sep) {
166 save_and_next(ls); /* skip 2nd `]' */ 165 lex_savenext(ls); /* Skip second ']'. */
167 goto endloop; 166 goto endloop;
168 } 167 }
169 break; 168 break;
170 case '\n': 169 case '\n':
171 case '\r': 170 case '\r':
172 save(ls, '\n'); 171 lex_save(ls, '\n');
173 inclinenumber(ls); 172 lex_newline(ls);
174 if (!tv) lj_str_resetbuf(&ls->sb); /* avoid wasting space */ 173 if (!tv) lj_buf_reset(&ls->sb); /* Don't waste space for comments. */
175 break; 174 break;
176 default: 175 default:
177 if (tv) save_and_next(ls); 176 lex_savenext(ls);
178 else next(ls);
179 break; 177 break;
180 } 178 }
181 } endloop: 179 } endloop:
182 if (tv) { 180 if (tv) {
183 GCstr *str = lj_parse_keepstr(ls, ls->sb.buf + (2 + (MSize)sep), 181 GCstr *str = lj_parse_keepstr(ls, sbufB(&ls->sb) + (2 + (MSize)sep),
184 ls->sb.n - 2*(2 + (MSize)sep)); 182 sbuflen(&ls->sb) - 2*(2 + (MSize)sep));
185 setstrV(ls->L, tv, str); 183 setstrV(ls->L, tv, str);
186 } 184 }
187} 185}
188 186
189static void read_string(LexState *ls, int delim, TValue *tv) 187/* Parse a string. */
188static void lex_string(LexState *ls, TValue *tv)
190{ 189{
191 save_and_next(ls); 190 LexChar delim = ls->c; /* Delimiter is '\'' or '"'. */
192 while (ls->current != delim) { 191 lex_savenext(ls);
193 switch (ls->current) { 192 while (ls->c != delim) {
194 case END_OF_STREAM: 193 switch (ls->c) {
194 case LEX_EOF:
195 lj_lex_error(ls, TK_eof, LJ_ERR_XSTR); 195 lj_lex_error(ls, TK_eof, LJ_ERR_XSTR);
196 continue; 196 continue;
197 case '\n': 197 case '\n':
@@ -199,7 +199,7 @@ static void read_string(LexState *ls, int delim, TValue *tv)
199 lj_lex_error(ls, TK_string, LJ_ERR_XSTR); 199 lj_lex_error(ls, TK_string, LJ_ERR_XSTR);
200 continue; 200 continue;
201 case '\\': { 201 case '\\': {
202 int c = next(ls); /* Skip the '\\'. */ 202 LexChar c = lex_next(ls); /* Skip the '\\'. */
203 switch (c) { 203 switch (c) {
204 case 'a': c = '\a'; break; 204 case 'a': c = '\a'; break;
205 case 'b': c = '\b'; break; 205 case 'b': c = '\b'; break;
@@ -209,111 +209,139 @@ static void read_string(LexState *ls, int delim, TValue *tv)
209 case 't': c = '\t'; break; 209 case 't': c = '\t'; break;
210 case 'v': c = '\v'; break; 210 case 'v': c = '\v'; break;
211 case 'x': /* Hexadecimal escape '\xXX'. */ 211 case 'x': /* Hexadecimal escape '\xXX'. */
212 c = (next(ls) & 15u) << 4; 212 c = (lex_next(ls) & 15u) << 4;
213 if (!lj_char_isdigit(ls->current)) { 213 if (!lj_char_isdigit(ls->c)) {
214 if (!lj_char_isxdigit(ls->current)) goto err_xesc; 214 if (!lj_char_isxdigit(ls->c)) goto err_xesc;
215 c += 9 << 4; 215 c += 9 << 4;
216 } 216 }
217 c += (next(ls) & 15u); 217 c += (lex_next(ls) & 15u);
218 if (!lj_char_isdigit(ls->current)) { 218 if (!lj_char_isdigit(ls->c)) {
219 if (!lj_char_isxdigit(ls->current)) goto err_xesc; 219 if (!lj_char_isxdigit(ls->c)) goto err_xesc;
220 c += 9; 220 c += 9;
221 } 221 }
222 break; 222 break;
223 case 'u': /* Unicode escape '\u{XX...}'. */
224 if (lex_next(ls) != '{') goto err_xesc;
225 lex_next(ls);
226 c = 0;
227 do {
228 c = (c << 4) | (ls->c & 15u);
229 if (!lj_char_isdigit(ls->c)) {
230 if (!lj_char_isxdigit(ls->c)) goto err_xesc;
231 c += 9;
232 }
233 if (c >= 0x110000) goto err_xesc; /* Out of Unicode range. */
234 } while (lex_next(ls) != '}');
235 if (c < 0x800) {
236 if (c < 0x80) break;
237 lex_save(ls, 0xc0 | (c >> 6));
238 } else {
239 if (c >= 0x10000) {
240 lex_save(ls, 0xf0 | (c >> 18));
241 lex_save(ls, 0x80 | ((c >> 12) & 0x3f));
242 } else {
243 if (c >= 0xd800 && c < 0xe000) goto err_xesc; /* No surrogates. */
244 lex_save(ls, 0xe0 | (c >> 12));
245 }
246 lex_save(ls, 0x80 | ((c >> 6) & 0x3f));
247 }
248 c = 0x80 | (c & 0x3f);
249 break;
223 case 'z': /* Skip whitespace. */ 250 case 'z': /* Skip whitespace. */
224 next(ls); 251 lex_next(ls);
225 while (lj_char_isspace(ls->current)) 252 while (lj_char_isspace(ls->c))
226 if (currIsNewline(ls)) inclinenumber(ls); else next(ls); 253 if (lex_iseol(ls)) lex_newline(ls); else lex_next(ls);
227 continue; 254 continue;
228 case '\n': case '\r': save(ls, '\n'); inclinenumber(ls); continue; 255 case '\n': case '\r': lex_save(ls, '\n'); lex_newline(ls); continue;
229 case '\\': case '\"': case '\'': break; 256 case '\\': case '\"': case '\'': break;
230 case END_OF_STREAM: continue; 257 case LEX_EOF: continue;
231 default: 258 default:
232 if (!lj_char_isdigit(c)) 259 if (!lj_char_isdigit(c))
233 goto err_xesc; 260 goto err_xesc;
234 c -= '0'; /* Decimal escape '\ddd'. */ 261 c -= '0'; /* Decimal escape '\ddd'. */
235 if (lj_char_isdigit(next(ls))) { 262 if (lj_char_isdigit(lex_next(ls))) {
236 c = c*10 + (ls->current - '0'); 263 c = c*10 + (ls->c - '0');
237 if (lj_char_isdigit(next(ls))) { 264 if (lj_char_isdigit(lex_next(ls))) {
238 c = c*10 + (ls->current - '0'); 265 c = c*10 + (ls->c - '0');
239 if (c > 255) { 266 if (c > 255) {
240 err_xesc: 267 err_xesc:
241 lj_lex_error(ls, TK_string, LJ_ERR_XESC); 268 lj_lex_error(ls, TK_string, LJ_ERR_XESC);
242 } 269 }
243 next(ls); 270 lex_next(ls);
244 } 271 }
245 } 272 }
246 save(ls, c); 273 lex_save(ls, c);
247 continue; 274 continue;
248 } 275 }
249 save(ls, c); 276 lex_save(ls, c);
250 next(ls); 277 lex_next(ls);
251 continue; 278 continue;
252 } 279 }
253 default: 280 default:
254 save_and_next(ls); 281 lex_savenext(ls);
255 break; 282 break;
256 } 283 }
257 } 284 }
258 save_and_next(ls); /* skip delimiter */ 285 lex_savenext(ls); /* Skip trailing delimiter. */
259 setstrV(ls->L, tv, lj_parse_keepstr(ls, ls->sb.buf + 1, ls->sb.n - 2)); 286 setstrV(ls->L, tv,
287 lj_parse_keepstr(ls, sbufB(&ls->sb)+1, sbuflen(&ls->sb)-2));
260} 288}
261 289
262/* -- Main lexical scanner ------------------------------------------------ */ 290/* -- Main lexical scanner ------------------------------------------------ */
263 291
264static int llex(LexState *ls, TValue *tv) 292/* Get next lexical token. */
293static LexToken lex_scan(LexState *ls, TValue *tv)
265{ 294{
266 lj_str_resetbuf(&ls->sb); 295 lj_buf_reset(&ls->sb);
267 for (;;) { 296 for (;;) {
268 if (lj_char_isident(ls->current)) { 297 if (lj_char_isident(ls->c)) {
269 GCstr *s; 298 GCstr *s;
270 if (lj_char_isdigit(ls->current)) { /* Numeric literal. */ 299 if (lj_char_isdigit(ls->c)) { /* Numeric literal. */
271 lex_number(ls, tv); 300 lex_number(ls, tv);
272 return TK_number; 301 return TK_number;
273 } 302 }
274 /* Identifier or reserved word. */ 303 /* Identifier or reserved word. */
275 do { 304 do {
276 save_and_next(ls); 305 lex_savenext(ls);
277 } while (lj_char_isident(ls->current)); 306 } while (lj_char_isident(ls->c));
278 s = lj_parse_keepstr(ls, ls->sb.buf, ls->sb.n); 307 s = lj_parse_keepstr(ls, sbufB(&ls->sb), sbuflen(&ls->sb));
279 setstrV(ls->L, tv, s); 308 setstrV(ls->L, tv, s);
280 if (s->reserved > 0) /* Reserved word? */ 309 if (s->reserved > 0) /* Reserved word? */
281 return TK_OFS + s->reserved; 310 return TK_OFS + s->reserved;
282 return TK_name; 311 return TK_name;
283 } 312 }
284 switch (ls->current) { 313 switch (ls->c) {
285 case '\n': 314 case '\n':
286 case '\r': 315 case '\r':
287 inclinenumber(ls); 316 lex_newline(ls);
288 continue; 317 continue;
289 case ' ': 318 case ' ':
290 case '\t': 319 case '\t':
291 case '\v': 320 case '\v':
292 case '\f': 321 case '\f':
293 next(ls); 322 lex_next(ls);
294 continue; 323 continue;
295 case '-': 324 case '-':
296 next(ls); 325 lex_next(ls);
297 if (ls->current != '-') return '-'; 326 if (ls->c != '-') return '-';
298 /* else is a comment */ 327 lex_next(ls);
299 next(ls); 328 if (ls->c == '[') { /* Long comment "--[=*[...]=*]". */
300 if (ls->current == '[') { 329 int sep = lex_skipeq(ls);
301 int sep = skip_sep(ls); 330 lj_buf_reset(&ls->sb); /* `lex_skipeq' may dirty the buffer */
302 lj_str_resetbuf(&ls->sb); /* `skip_sep' may dirty the buffer */
303 if (sep >= 0) { 331 if (sep >= 0) {
304 read_long_string(ls, NULL, sep); /* long comment */ 332 lex_longstring(ls, NULL, sep);
305 lj_str_resetbuf(&ls->sb); 333 lj_buf_reset(&ls->sb);
306 continue; 334 continue;
307 } 335 }
308 } 336 }
309 /* else short comment */ 337 /* Short comment "--.*\n". */
310 while (!currIsNewline(ls) && ls->current != END_OF_STREAM) 338 while (!lex_iseol(ls) && ls->c != LEX_EOF)
311 next(ls); 339 lex_next(ls);
312 continue; 340 continue;
313 case '[': { 341 case '[': {
314 int sep = skip_sep(ls); 342 int sep = lex_skipeq(ls);
315 if (sep >= 0) { 343 if (sep >= 0) {
316 read_long_string(ls, tv, sep); 344 lex_longstring(ls, tv, sep);
317 return TK_string; 345 return TK_string;
318 } else if (sep == -1) { 346 } else if (sep == -1) {
319 return '['; 347 return '[';
@@ -323,44 +351,43 @@ static int llex(LexState *ls, TValue *tv)
323 } 351 }
324 } 352 }
325 case '=': 353 case '=':
326 next(ls); 354 lex_next(ls);
327 if (ls->current != '=') return '='; else { next(ls); return TK_eq; } 355 if (ls->c != '=') return '='; else { lex_next(ls); return TK_eq; }
328 case '<': 356 case '<':
329 next(ls); 357 lex_next(ls);
330 if (ls->current != '=') return '<'; else { next(ls); return TK_le; } 358 if (ls->c != '=') return '<'; else { lex_next(ls); return TK_le; }
331 case '>': 359 case '>':
332 next(ls); 360 lex_next(ls);
333 if (ls->current != '=') return '>'; else { next(ls); return TK_ge; } 361 if (ls->c != '=') return '>'; else { lex_next(ls); return TK_ge; }
334 case '~': 362 case '~':
335 next(ls); 363 lex_next(ls);
336 if (ls->current != '=') return '~'; else { next(ls); return TK_ne; } 364 if (ls->c != '=') return '~'; else { lex_next(ls); return TK_ne; }
337 case ':': 365 case ':':
338 next(ls); 366 lex_next(ls);
339 if (ls->current != ':') return ':'; else { next(ls); return TK_label; } 367 if (ls->c != ':') return ':'; else { lex_next(ls); return TK_label; }
340 case '"': 368 case '"':
341 case '\'': 369 case '\'':
342 read_string(ls, ls->current, tv); 370 lex_string(ls, tv);
343 return TK_string; 371 return TK_string;
344 case '.': 372 case '.':
345 save_and_next(ls); 373 if (lex_savenext(ls) == '.') {
346 if (ls->current == '.') { 374 lex_next(ls);
347 next(ls); 375 if (ls->c == '.') {
348 if (ls->current == '.') { 376 lex_next(ls);
349 next(ls);
350 return TK_dots; /* ... */ 377 return TK_dots; /* ... */
351 } 378 }
352 return TK_concat; /* .. */ 379 return TK_concat; /* .. */
353 } else if (!lj_char_isdigit(ls->current)) { 380 } else if (!lj_char_isdigit(ls->c)) {
354 return '.'; 381 return '.';
355 } else { 382 } else {
356 lex_number(ls, tv); 383 lex_number(ls, tv);
357 return TK_number; 384 return TK_number;
358 } 385 }
359 case END_OF_STREAM: 386 case LEX_EOF:
360 return TK_eof; 387 return TK_eof;
361 default: { 388 default: {
362 int c = ls->current; 389 LexChar c = ls->c;
363 next(ls); 390 lex_next(ls);
364 return c; /* Single-char tokens (+ - / ...). */ 391 return c; /* Single-char tokens (+ - / ...). */
365 } 392 }
366 } 393 }
@@ -375,36 +402,33 @@ int lj_lex_setup(lua_State *L, LexState *ls)
375 int header = 0; 402 int header = 0;
376 ls->L = L; 403 ls->L = L;
377 ls->fs = NULL; 404 ls->fs = NULL;
378 ls->n = 0; 405 ls->pe = ls->p = NULL;
379 ls->p = NULL;
380 ls->vstack = NULL; 406 ls->vstack = NULL;
381 ls->sizevstack = 0; 407 ls->sizevstack = 0;
382 ls->vtop = 0; 408 ls->vtop = 0;
383 ls->bcstack = NULL; 409 ls->bcstack = NULL;
384 ls->sizebcstack = 0; 410 ls->sizebcstack = 0;
385 ls->token = 0; 411 ls->tok = 0;
386 ls->lookahead = TK_eof; /* No look-ahead token. */ 412 ls->lookahead = TK_eof; /* No look-ahead token. */
387 ls->linenumber = 1; 413 ls->linenumber = 1;
388 ls->lastline = 1; 414 ls->lastline = 1;
389 ls->endmark = 0; 415 ls->endmark = 0;
390 lj_str_resizebuf(ls->L, &ls->sb, LJ_MIN_SBUF); 416 lex_next(ls); /* Read-ahead first char. */
391 next(ls); /* Read-ahead first char. */ 417 if (ls->c == 0xef && ls->p + 2 <= ls->pe && (uint8_t)ls->p[0] == 0xbb &&
392 if (ls->current == 0xef && ls->n >= 2 && char2int(ls->p[0]) == 0xbb && 418 (uint8_t)ls->p[1] == 0xbf) { /* Skip UTF-8 BOM (if buffered). */
393 char2int(ls->p[1]) == 0xbf) { /* Skip UTF-8 BOM (if buffered). */
394 ls->n -= 2;
395 ls->p += 2; 419 ls->p += 2;
396 next(ls); 420 lex_next(ls);
397 header = 1; 421 header = 1;
398 } 422 }
399 if (ls->current == '#') { /* Skip POSIX #! header line. */ 423 if (ls->c == '#') { /* Skip POSIX #! header line. */
400 do { 424 do {
401 next(ls); 425 lex_next(ls);
402 if (ls->current == END_OF_STREAM) return 0; 426 if (ls->c == LEX_EOF) return 0;
403 } while (!currIsNewline(ls)); 427 } while (!lex_iseol(ls));
404 inclinenumber(ls); 428 lex_newline(ls);
405 header = 1; 429 header = 1;
406 } 430 }
407 if (ls->current == LUA_SIGNATURE[0]) { /* Bytecode dump. */ 431 if (ls->c == LUA_SIGNATURE[0]) { /* Bytecode dump. */
408 if (header) { 432 if (header) {
409 /* 433 /*
410 ** Loading bytecode with an extra header is disabled for security 434 ** Loading bytecode with an extra header is disabled for security
@@ -426,55 +450,60 @@ void lj_lex_cleanup(lua_State *L, LexState *ls)
426 global_State *g = G(L); 450 global_State *g = G(L);
427 lj_mem_freevec(g, ls->bcstack, ls->sizebcstack, BCInsLine); 451 lj_mem_freevec(g, ls->bcstack, ls->sizebcstack, BCInsLine);
428 lj_mem_freevec(g, ls->vstack, ls->sizevstack, VarInfo); 452 lj_mem_freevec(g, ls->vstack, ls->sizevstack, VarInfo);
429 lj_str_freebuf(g, &ls->sb); 453 lj_buf_free(g, &ls->sb);
430} 454}
431 455
456/* Return next lexical token. */
432void lj_lex_next(LexState *ls) 457void lj_lex_next(LexState *ls)
433{ 458{
434 ls->lastline = ls->linenumber; 459 ls->lastline = ls->linenumber;
435 if (LJ_LIKELY(ls->lookahead == TK_eof)) { /* No lookahead token? */ 460 if (LJ_LIKELY(ls->lookahead == TK_eof)) { /* No lookahead token? */
436 ls->token = llex(ls, &ls->tokenval); /* Get next token. */ 461 ls->tok = lex_scan(ls, &ls->tokval); /* Get next token. */
437 } else { /* Otherwise return lookahead token. */ 462 } else { /* Otherwise return lookahead token. */
438 ls->token = ls->lookahead; 463 ls->tok = ls->lookahead;
439 ls->lookahead = TK_eof; 464 ls->lookahead = TK_eof;
440 ls->tokenval = ls->lookaheadval; 465 ls->tokval = ls->lookaheadval;
441 } 466 }
442} 467}
443 468
469/* Look ahead for the next token. */
444LexToken lj_lex_lookahead(LexState *ls) 470LexToken lj_lex_lookahead(LexState *ls)
445{ 471{
446 lua_assert(ls->lookahead == TK_eof); 472 lua_assert(ls->lookahead == TK_eof);
447 ls->lookahead = llex(ls, &ls->lookaheadval); 473 ls->lookahead = lex_scan(ls, &ls->lookaheadval);
448 return ls->lookahead; 474 return ls->lookahead;
449} 475}
450 476
451const char *lj_lex_token2str(LexState *ls, LexToken token) 477/* Convert token to string. */
478const char *lj_lex_token2str(LexState *ls, LexToken tok)
452{ 479{
453 if (token > TK_OFS) 480 if (tok > TK_OFS)
454 return tokennames[token-TK_OFS-1]; 481 return tokennames[tok-TK_OFS-1];
455 else if (!lj_char_iscntrl(token)) 482 else if (!lj_char_iscntrl(tok))
456 return lj_str_pushf(ls->L, "%c", token); 483 return lj_strfmt_pushf(ls->L, "%c", tok);
457 else 484 else
458 return lj_str_pushf(ls->L, "char(%d)", token); 485 return lj_strfmt_pushf(ls->L, "char(%d)", tok);
459} 486}
460 487
461void lj_lex_error(LexState *ls, LexToken token, ErrMsg em, ...) 488/* Lexer error. */
489void lj_lex_error(LexState *ls, LexToken tok, ErrMsg em, ...)
462{ 490{
463 const char *tok; 491 const char *tokstr;
464 va_list argp; 492 va_list argp;
465 if (token == 0) { 493 if (tok == 0) {
466 tok = NULL; 494 tokstr = NULL;
467 } else if (token == TK_name || token == TK_string || token == TK_number) { 495 } else if (tok == TK_name || tok == TK_string || tok == TK_number) {
468 save(ls, '\0'); 496 lex_save(ls, '\0');
469 tok = ls->sb.buf; 497 tokstr = sbufB(&ls->sb);
470 } else { 498 } else {
471 tok = lj_lex_token2str(ls, token); 499 tokstr = lj_lex_token2str(ls, tok);
472 } 500 }
473 va_start(argp, em); 501 va_start(argp, em);
474 lj_err_lex(ls->L, ls->chunkname, tok, ls->linenumber, em, argp); 502 lj_err_lex(ls->L, ls->chunkname, tokstr, ls->linenumber, em, argp);
475 va_end(argp); 503 va_end(argp);
476} 504}
477 505
506/* Initialize strings for reserved words. */
478void lj_lex_init(lua_State *L) 507void lj_lex_init(lua_State *L)
479{ 508{
480 uint32_t i; 509 uint32_t i;
diff --git a/src/lj_lex.h b/src/lj_lex.h
index b1bc4876..8665aa2a 100644
--- a/src/lj_lex.h
+++ b/src/lj_lex.h
@@ -30,7 +30,8 @@ TKDEF(TKENUM1, TKENUM2)
30 TK_RESERVED = TK_while - TK_OFS 30 TK_RESERVED = TK_while - TK_OFS
31}; 31};
32 32
33typedef int LexToken; 33typedef int LexChar; /* Lexical character. Unsigned ext. from char. */
34typedef int LexToken; /* Lexical token. */
34 35
35/* Combined bytecode ins/line. Only used during bytecode generation. */ 36/* Combined bytecode ins/line. Only used during bytecode generation. */
36typedef struct BCInsLine { 37typedef struct BCInsLine {
@@ -51,13 +52,13 @@ typedef struct VarInfo {
51typedef struct LexState { 52typedef struct LexState {
52 struct FuncState *fs; /* Current FuncState. Defined in lj_parse.c. */ 53 struct FuncState *fs; /* Current FuncState. Defined in lj_parse.c. */
53 struct lua_State *L; /* Lua state. */ 54 struct lua_State *L; /* Lua state. */
54 TValue tokenval; /* Current token value. */ 55 TValue tokval; /* Current token value. */
55 TValue lookaheadval; /* Lookahead token value. */ 56 TValue lookaheadval; /* Lookahead token value. */
56 int current; /* Current character (charint). */
57 LexToken token; /* Current token. */
58 LexToken lookahead; /* Lookahead token. */
59 MSize n; /* Bytes left in input buffer. */
60 const char *p; /* Current position in input buffer. */ 57 const char *p; /* Current position in input buffer. */
58 const char *pe; /* End of input buffer. */
59 LexChar c; /* Current character. */
60 LexToken tok; /* Current token. */
61 LexToken lookahead; /* Lookahead token. */
61 SBuf sb; /* String buffer for tokens. */ 62 SBuf sb; /* String buffer for tokens. */
62 lua_Reader rfunc; /* Reader callback. */ 63 lua_Reader rfunc; /* Reader callback. */
63 void *rdata; /* Reader callback data. */ 64 void *rdata; /* Reader callback data. */
@@ -79,8 +80,8 @@ LJ_FUNC int lj_lex_setup(lua_State *L, LexState *ls);
79LJ_FUNC void lj_lex_cleanup(lua_State *L, LexState *ls); 80LJ_FUNC void lj_lex_cleanup(lua_State *L, LexState *ls);
80LJ_FUNC void lj_lex_next(LexState *ls); 81LJ_FUNC void lj_lex_next(LexState *ls);
81LJ_FUNC LexToken lj_lex_lookahead(LexState *ls); 82LJ_FUNC LexToken lj_lex_lookahead(LexState *ls);
82LJ_FUNC const char *lj_lex_token2str(LexState *ls, LexToken token); 83LJ_FUNC const char *lj_lex_token2str(LexState *ls, LexToken tok);
83LJ_FUNC_NORET void lj_lex_error(LexState *ls, LexToken token, ErrMsg em, ...); 84LJ_FUNC_NORET void lj_lex_error(LexState *ls, LexToken tok, ErrMsg em, ...);
84LJ_FUNC void lj_lex_init(lua_State *L); 85LJ_FUNC void lj_lex_init(lua_State *L);
85 86
86#endif 87#endif
diff --git a/src/lj_lib.c b/src/lj_lib.c
index 0c91a1c8..56fb6555 100644
--- a/src/lj_lib.c
+++ b/src/lj_lib.c
@@ -18,6 +18,9 @@
18#include "lj_dispatch.h" 18#include "lj_dispatch.h"
19#include "lj_vm.h" 19#include "lj_vm.h"
20#include "lj_strscan.h" 20#include "lj_strscan.h"
21#include "lj_strfmt.h"
22#include "lj_lex.h"
23#include "lj_bcdump.h"
21#include "lj_lib.h" 24#include "lj_lib.h"
22 25
23/* -- Library initialization ---------------------------------------------- */ 26/* -- Library initialization ---------------------------------------------- */
@@ -43,6 +46,28 @@ static GCtab *lib_create_table(lua_State *L, const char *libname, int hsize)
43 return tabV(L->top-1); 46 return tabV(L->top-1);
44} 47}
45 48
49static const uint8_t *lib_read_lfunc(lua_State *L, const uint8_t *p, GCtab *tab)
50{
51 int len = *p++;
52 GCstr *name = lj_str_new(L, (const char *)p, len);
53 LexState ls;
54 GCproto *pt;
55 GCfunc *fn;
56 memset(&ls, 0, sizeof(ls));
57 ls.L = L;
58 ls.p = (const char *)(p+len);
59 ls.pe = (const char *)~(uintptr_t)0;
60 ls.c = -1;
61 ls.level = (BCDUMP_F_STRIP|(LJ_BE*BCDUMP_F_BE));
62 ls.chunkname = name;
63 pt = lj_bcread_proto(&ls);
64 pt->firstline = ~(BCLine)0;
65 fn = lj_func_newL_empty(L, pt, tabref(L->env));
66 /* NOBARRIER: See below for common barrier. */
67 setfuncV(L, lj_tab_setstr(L, tab, name), fn);
68 return (const uint8_t *)ls.p;
69}
70
46void lj_lib_register(lua_State *L, const char *libname, 71void lj_lib_register(lua_State *L, const char *libname,
47 const uint8_t *p, const lua_CFunction *cf) 72 const uint8_t *p, const lua_CFunction *cf)
48{ 73{
@@ -87,6 +112,9 @@ void lj_lib_register(lua_State *L, const char *libname,
87 ofn = fn; 112 ofn = fn;
88 } else { 113 } else {
89 switch (tag | len) { 114 switch (tag | len) {
115 case LIBINIT_LUA:
116 p = lib_read_lfunc(L, p, tab);
117 break;
90 case LIBINIT_SET: 118 case LIBINIT_SET:
91 L->top -= 2; 119 L->top -= 2;
92 if (tvisstr(L->top+1) && strV(L->top+1)->len == 0) 120 if (tvisstr(L->top+1) && strV(L->top+1)->len == 0)
@@ -120,6 +148,37 @@ void lj_lib_register(lua_State *L, const char *libname,
120 } 148 }
121} 149}
122 150
151/* Push internal function on the stack. */
152GCfunc *lj_lib_pushcc(lua_State *L, lua_CFunction f, int id, int n)
153{
154 GCfunc *fn;
155 lua_pushcclosure(L, f, n);
156 fn = funcV(L->top-1);
157 fn->c.ffid = (uint8_t)id;
158 setmref(fn->c.pc, &G(L)->bc_cfunc_int);
159 return fn;
160}
161
162void lj_lib_prereg(lua_State *L, const char *name, lua_CFunction f, GCtab *env)
163{
164 luaL_findtable(L, LUA_REGISTRYINDEX, "_PRELOAD", 4);
165 lua_pushcfunction(L, f);
166 /* NOBARRIER: The function is new (marked white). */
167 setgcref(funcV(L->top-1)->c.env, obj2gco(env));
168 lua_setfield(L, -2, name);
169 L->top--;
170}
171
172int lj_lib_postreg(lua_State *L, lua_CFunction cf, int id, const char *name)
173{
174 GCfunc *fn = lj_lib_pushcf(L, cf, id);
175 GCtab *t = tabref(curr_func(L)->c.env); /* Reference to parent table. */
176 setfuncV(L, lj_tab_setstr(L, t, lj_str_newz(L, name)), fn);
177 lj_gc_anybarriert(L, t);
178 setfuncV(L, L->top++, fn);
179 return 1;
180}
181
123/* -- Type checks --------------------------------------------------------- */ 182/* -- Type checks --------------------------------------------------------- */
124 183
125TValue *lj_lib_checkany(lua_State *L, int narg) 184TValue *lj_lib_checkany(lua_State *L, int narg)
@@ -137,7 +196,7 @@ GCstr *lj_lib_checkstr(lua_State *L, int narg)
137 if (LJ_LIKELY(tvisstr(o))) { 196 if (LJ_LIKELY(tvisstr(o))) {
138 return strV(o); 197 return strV(o);
139 } else if (tvisnumber(o)) { 198 } else if (tvisnumber(o)) {
140 GCstr *s = lj_str_fromnumber(L, o); 199 GCstr *s = lj_strfmt_number(L, o);
141 setstrV(L, o, s); 200 setstrV(L, o, s);
142 return s; 201 return s;
143 } 202 }
@@ -196,20 +255,6 @@ int32_t lj_lib_optint(lua_State *L, int narg, int32_t def)
196 return (o < L->top && !tvisnil(o)) ? lj_lib_checkint(L, narg) : def; 255 return (o < L->top && !tvisnil(o)) ? lj_lib_checkint(L, narg) : def;
197} 256}
198 257
199int32_t lj_lib_checkbit(lua_State *L, int narg)
200{
201 TValue *o = L->base + narg-1;
202 if (!(o < L->top && lj_strscan_numberobj(o)))
203 lj_err_argt(L, narg, LUA_TNUMBER);
204 if (LJ_LIKELY(tvisint(o))) {
205 return intV(o);
206 } else {
207 int32_t i = lj_num2bit(numV(o));
208 if (LJ_DUALNUM) setintV(o, i);
209 return i;
210 }
211}
212
213GCfunc *lj_lib_checkfunc(lua_State *L, int narg) 258GCfunc *lj_lib_checkfunc(lua_State *L, int narg)
214{ 259{
215 TValue *o = L->base + narg-1; 260 TValue *o = L->base + narg-1;
diff --git a/src/lj_lib.h b/src/lj_lib.h
index 754e7444..83778b83 100644
--- a/src/lj_lib.h
+++ b/src/lj_lib.h
@@ -41,15 +41,22 @@ LJ_FUNC void lj_lib_checknumber(lua_State *L, int narg);
41LJ_FUNC lua_Number lj_lib_checknum(lua_State *L, int narg); 41LJ_FUNC lua_Number lj_lib_checknum(lua_State *L, int narg);
42LJ_FUNC int32_t lj_lib_checkint(lua_State *L, int narg); 42LJ_FUNC int32_t lj_lib_checkint(lua_State *L, int narg);
43LJ_FUNC int32_t lj_lib_optint(lua_State *L, int narg, int32_t def); 43LJ_FUNC int32_t lj_lib_optint(lua_State *L, int narg, int32_t def);
44LJ_FUNC int32_t lj_lib_checkbit(lua_State *L, int narg);
45LJ_FUNC GCfunc *lj_lib_checkfunc(lua_State *L, int narg); 44LJ_FUNC GCfunc *lj_lib_checkfunc(lua_State *L, int narg);
46LJ_FUNC GCtab *lj_lib_checktab(lua_State *L, int narg); 45LJ_FUNC GCtab *lj_lib_checktab(lua_State *L, int narg);
47LJ_FUNC GCtab *lj_lib_checktabornil(lua_State *L, int narg); 46LJ_FUNC GCtab *lj_lib_checktabornil(lua_State *L, int narg);
48LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst); 47LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst);
49 48
50/* Avoid including lj_frame.h. */ 49/* Avoid including lj_frame.h. */
50#if LJ_GC64
51#define lj_lib_upvalue(L, n) \
52 (&gcval(L->base-2)->fn.c.upvalue[(n)-1])
53#elif LJ_FR2
54#define lj_lib_upvalue(L, n) \
55 (&gcref((L->base-2)->gcr)->fn.c.upvalue[(n)-1])
56#else
51#define lj_lib_upvalue(L, n) \ 57#define lj_lib_upvalue(L, n) \
52 (&gcref((L->base-1)->fr.func)->fn.c.upvalue[(n)-1]) 58 (&gcref((L->base-1)->fr.func)->fn.c.upvalue[(n)-1])
59#endif
53 60
54#if LJ_TARGET_WINDOWS 61#if LJ_TARGET_WINDOWS
55#define lj_lib_checkfpu(L) \ 62#define lj_lib_checkfpu(L) \
@@ -60,23 +67,14 @@ LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst);
60#define lj_lib_checkfpu(L) UNUSED(L) 67#define lj_lib_checkfpu(L) UNUSED(L)
61#endif 68#endif
62 69
63/* Push internal function on the stack. */ 70LJ_FUNC GCfunc *lj_lib_pushcc(lua_State *L, lua_CFunction f, int id, int n);
64static LJ_AINLINE void lj_lib_pushcc(lua_State *L, lua_CFunction f,
65 int id, int n)
66{
67 GCfunc *fn;
68 lua_pushcclosure(L, f, n);
69 fn = funcV(L->top-1);
70 fn->c.ffid = (uint8_t)id;
71 setmref(fn->c.pc, &G(L)->bc_cfunc_int);
72}
73
74#define lj_lib_pushcf(L, fn, id) (lj_lib_pushcc(L, (fn), (id), 0)) 71#define lj_lib_pushcf(L, fn, id) (lj_lib_pushcc(L, (fn), (id), 0))
75 72
76/* Library function declarations. Scanned by buildvm. */ 73/* Library function declarations. Scanned by buildvm. */
77#define LJLIB_CF(name) static int lj_cf_##name(lua_State *L) 74#define LJLIB_CF(name) static int lj_cf_##name(lua_State *L)
78#define LJLIB_ASM(name) static int lj_ffh_##name(lua_State *L) 75#define LJLIB_ASM(name) static int lj_ffh_##name(lua_State *L)
79#define LJLIB_ASM_(name) 76#define LJLIB_ASM_(name)
77#define LJLIB_LUA(name)
80#define LJLIB_SET(name) 78#define LJLIB_SET(name)
81#define LJLIB_PUSH(arg) 79#define LJLIB_PUSH(arg)
82#define LJLIB_REC(handler) 80#define LJLIB_REC(handler)
@@ -88,6 +86,10 @@ static LJ_AINLINE void lj_lib_pushcc(lua_State *L, lua_CFunction f,
88 86
89LJ_FUNC void lj_lib_register(lua_State *L, const char *libname, 87LJ_FUNC void lj_lib_register(lua_State *L, const char *libname,
90 const uint8_t *init, const lua_CFunction *cf); 88 const uint8_t *init, const lua_CFunction *cf);
89LJ_FUNC void lj_lib_prereg(lua_State *L, const char *name, lua_CFunction f,
90 GCtab *env);
91LJ_FUNC int lj_lib_postreg(lua_State *L, lua_CFunction cf, int id,
92 const char *name);
91 93
92/* Library init data tags. */ 94/* Library init data tags. */
93#define LIBINIT_LENMASK 0x3f 95#define LIBINIT_LENMASK 0x3f
@@ -96,7 +98,8 @@ LJ_FUNC void lj_lib_register(lua_State *L, const char *libname,
96#define LIBINIT_ASM 0x40 98#define LIBINIT_ASM 0x40
97#define LIBINIT_ASM_ 0x80 99#define LIBINIT_ASM_ 0x80
98#define LIBINIT_STRING 0xc0 100#define LIBINIT_STRING 0xc0
99#define LIBINIT_MAXSTR 0x39 101#define LIBINIT_MAXSTR 0x38
102#define LIBINIT_LUA 0xf9
100#define LIBINIT_SET 0xfa 103#define LIBINIT_SET 0xfa
101#define LIBINIT_NUMBER 0xfb 104#define LIBINIT_NUMBER 0xfb
102#define LIBINIT_COPY 0xfc 105#define LIBINIT_COPY 0xfc
diff --git a/src/lj_load.c b/src/lj_load.c
index ec6f0aba..746bf428 100644
--- a/src/lj_load.c
+++ b/src/lj_load.c
@@ -15,7 +15,7 @@
15#include "lj_obj.h" 15#include "lj_obj.h"
16#include "lj_gc.h" 16#include "lj_gc.h"
17#include "lj_err.h" 17#include "lj_err.h"
18#include "lj_str.h" 18#include "lj_buf.h"
19#include "lj_func.h" 19#include "lj_func.h"
20#include "lj_frame.h" 20#include "lj_frame.h"
21#include "lj_vm.h" 21#include "lj_vm.h"
@@ -54,7 +54,7 @@ LUA_API int lua_loadx(lua_State *L, lua_Reader reader, void *data,
54 ls.rdata = data; 54 ls.rdata = data;
55 ls.chunkarg = chunkname ? chunkname : "?"; 55 ls.chunkarg = chunkname ? chunkname : "?";
56 ls.mode = mode; 56 ls.mode = mode;
57 lj_str_initbuf(&ls.sb); 57 lj_buf_init(L, &ls.sb);
58 status = lj_vm_cpcall(L, NULL, &ls, cpparser); 58 status = lj_vm_cpcall(L, NULL, &ls, cpparser);
59 lj_lex_cleanup(L, &ls); 59 lj_lex_cleanup(L, &ls);
60 lj_gc_check(L); 60 lj_gc_check(L);
diff --git a/src/lj_mcode.c b/src/lj_mcode.c
index 02ade1d4..bc3e922f 100644
--- a/src/lj_mcode.c
+++ b/src/lj_mcode.c
@@ -66,8 +66,8 @@ void lj_mcode_sync(void *start, void *end)
66 66
67static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, DWORD prot) 67static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, DWORD prot)
68{ 68{
69 void *p = VirtualAlloc((void *)hint, sz, 69 void *p = LJ_WIN_VALLOC((void *)hint, sz,
70 MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot); 70 MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot);
71 if (!p && !hint) 71 if (!p && !hint)
72 lj_trace_err(J, LJ_TRERR_MCODEAL); 72 lj_trace_err(J, LJ_TRERR_MCODEAL);
73 return p; 73 return p;
@@ -82,7 +82,7 @@ static void mcode_free(jit_State *J, void *p, size_t sz)
82static int mcode_setprot(void *p, size_t sz, DWORD prot) 82static int mcode_setprot(void *p, size_t sz, DWORD prot)
83{ 83{
84 DWORD oprot; 84 DWORD oprot;
85 return !VirtualProtect(p, sz, prot, &oprot); 85 return !LJ_WIN_VPROTECT(p, sz, prot, &oprot);
86} 86}
87 87
88#elif LJ_TARGET_POSIX 88#elif LJ_TARGET_POSIX
@@ -221,8 +221,8 @@ static void *mcode_alloc(jit_State *J, size_t sz)
221 */ 221 */
222#if LJ_TARGET_MIPS 222#if LJ_TARGET_MIPS
223 /* Use the middle of the 256MB-aligned region. */ 223 /* Use the middle of the 256MB-aligned region. */
224 uintptr_t target = ((uintptr_t)(void *)lj_vm_exit_handler & 0xf0000000u) + 224 uintptr_t target = ((uintptr_t)(void *)lj_vm_exit_handler &
225 0x08000000u; 225 ~(uintptr_t)0x0fffffffu) + 0x08000000u;
226#else 226#else
227 uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler & ~(uintptr_t)0xffff; 227 uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler & ~(uintptr_t)0xffff;
228#endif 228#endif
@@ -255,7 +255,7 @@ static void *mcode_alloc(jit_State *J, size_t sz)
255/* All memory addresses are reachable by relative jumps. */ 255/* All memory addresses are reachable by relative jumps. */
256static void *mcode_alloc(jit_State *J, size_t sz) 256static void *mcode_alloc(jit_State *J, size_t sz)
257{ 257{
258#ifdef __OpenBSD__ 258#if defined(__OpenBSD__) || LJ_TARGET_UWP
259 /* Allow better executable memory allocation for OpenBSD W^X mode. */ 259 /* Allow better executable memory allocation for OpenBSD W^X mode. */
260 void *p = mcode_alloc_at(J, 0, sz, MCPROT_RUN); 260 void *p = mcode_alloc_at(J, 0, sz, MCPROT_RUN);
261 if (p && mcode_setprot(p, sz, MCPROT_GEN)) { 261 if (p && mcode_setprot(p, sz, MCPROT_GEN)) {
diff --git a/src/lj_meta.c b/src/lj_meta.c
index 6affc18b..7391ff00 100644
--- a/src/lj_meta.c
+++ b/src/lj_meta.c
@@ -12,6 +12,7 @@
12#include "lj_obj.h" 12#include "lj_obj.h"
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_buf.h"
15#include "lj_str.h" 16#include "lj_str.h"
16#include "lj_tab.h" 17#include "lj_tab.h"
17#include "lj_meta.h" 18#include "lj_meta.h"
@@ -19,6 +20,8 @@
19#include "lj_bc.h" 20#include "lj_bc.h"
20#include "lj_vm.h" 21#include "lj_vm.h"
21#include "lj_strscan.h" 22#include "lj_strscan.h"
23#include "lj_strfmt.h"
24#include "lj_lib.h"
22 25
23/* -- Metamethod handling ------------------------------------------------- */ 26/* -- Metamethod handling ------------------------------------------------- */
24 27
@@ -77,12 +80,16 @@ int lj_meta_tailcall(lua_State *L, cTValue *tv)
77 TValue *base = L->base; 80 TValue *base = L->base;
78 TValue *top = L->top; 81 TValue *top = L->top;
79 const BCIns *pc = frame_pc(base-1); /* Preserve old PC from frame. */ 82 const BCIns *pc = frame_pc(base-1); /* Preserve old PC from frame. */
80 copyTV(L, base-1, tv); /* Replace frame with new object. */ 83 copyTV(L, base-1-LJ_FR2, tv); /* Replace frame with new object. */
81 top->u32.lo = LJ_CONT_TAILCALL; 84 if (LJ_FR2)
82 setframe_pc(top, pc); 85 (top++)->u64 = LJ_CONT_TAILCALL;
83 setframe_gc(top+1, obj2gco(L)); /* Dummy frame object. */ 86 else
84 setframe_ftsz(top+1, (int)((char *)(top+2) - (char *)base) + FRAME_CONT); 87 top->u32.lo = LJ_CONT_TAILCALL;
85 L->base = L->top = top+2; 88 setframe_pc(top++, pc);
89 if (LJ_FR2) top++;
90 setframe_gc(top, obj2gco(L), LJ_TTHREAD); /* Dummy frame object. */
91 setframe_ftsz(top, ((char *)(top+1) - (char *)base) + FRAME_CONT);
92 L->base = L->top = top+1;
86 /* 93 /*
87 ** before: [old_mo|PC] [... ...] 94 ** before: [old_mo|PC] [... ...]
88 ** ^base ^top 95 ** ^base ^top
@@ -113,11 +120,13 @@ static TValue *mmcall(lua_State *L, ASMFunction cont, cTValue *mo,
113 */ 120 */
114 TValue *top = L->top; 121 TValue *top = L->top;
115 if (curr_funcisL(L)) top = curr_topL(L); 122 if (curr_funcisL(L)) top = curr_topL(L);
116 setcont(top, cont); /* Assembler VM stores PC in upper word. */ 123 setcont(top++, cont); /* Assembler VM stores PC in upper word or FR2. */
117 copyTV(L, top+1, mo); /* Store metamethod and two arguments. */ 124 if (LJ_FR2) setnilV(top++);
118 copyTV(L, top+2, a); 125 copyTV(L, top++, mo); /* Store metamethod and two arguments. */
119 copyTV(L, top+3, b); 126 if (LJ_FR2) setnilV(top++);
120 return top+2; /* Return new base. */ 127 copyTV(L, top, a);
128 copyTV(L, top+1, b);
129 return top; /* Return new base. */
121} 130}
122 131
123/* -- C helpers for some instructions, called from assembler VM ----------- */ 132/* -- C helpers for some instructions, called from assembler VM ----------- */
@@ -225,27 +234,14 @@ TValue *lj_meta_arith(lua_State *L, TValue *ra, cTValue *rb, cTValue *rc,
225 } 234 }
226} 235}
227 236
228/* In-place coercion of a number to a string. */
229static LJ_AINLINE int tostring(lua_State *L, TValue *o)
230{
231 if (tvisstr(o)) {
232 return 1;
233 } else if (tvisnumber(o)) {
234 setstrV(L, o, lj_str_fromnumber(L, o));
235 return 1;
236 } else {
237 return 0;
238 }
239}
240
241/* Helper for CAT. Coercion, iterative concat, __concat metamethod. */ 237/* Helper for CAT. Coercion, iterative concat, __concat metamethod. */
242TValue *lj_meta_cat(lua_State *L, TValue *top, int left) 238TValue *lj_meta_cat(lua_State *L, TValue *top, int left)
243{ 239{
244 int fromc = 0; 240 int fromc = 0;
245 if (left < 0) { left = -left; fromc = 1; } 241 if (left < 0) { left = -left; fromc = 1; }
246 do { 242 do {
247 int n = 1; 243 if (!(tvisstr(top) || tvisnumber(top)) ||
248 if (!(tvisstr(top-1) || tvisnumber(top-1)) || !tostring(L, top)) { 244 !(tvisstr(top-1) || tvisnumber(top-1))) {
249 cTValue *mo = lj_meta_lookup(L, top-1, MM_concat); 245 cTValue *mo = lj_meta_lookup(L, top-1, MM_concat);
250 if (tvisnil(mo)) { 246 if (tvisnil(mo)) {
251 mo = lj_meta_lookup(L, top, MM_concat); 247 mo = lj_meta_lookup(L, top, MM_concat);
@@ -266,13 +262,12 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left)
266 ** after mm: [...][CAT stack ...] <--push-- [result] 262 ** after mm: [...][CAT stack ...] <--push-- [result]
267 ** next step: [...][CAT stack .............] 263 ** next step: [...][CAT stack .............]
268 */ 264 */
269 copyTV(L, top+2, top); /* Careful with the order of stack copies! */ 265 copyTV(L, top+2*LJ_FR2+2, top); /* Carefully ordered stack copies! */
270 copyTV(L, top+1, top-1); 266 copyTV(L, top+2*LJ_FR2+1, top-1);
271 copyTV(L, top, mo); 267 copyTV(L, top+LJ_FR2, mo);
272 setcont(top-1, lj_cont_cat); 268 setcont(top-1, lj_cont_cat);
269 if (LJ_FR2) { setnilV(top); setnilV(top+2); top += 2; }
273 return top+1; /* Trigger metamethod call. */ 270 return top+1; /* Trigger metamethod call. */
274 } else if (strV(top)->len == 0) { /* Shortcut. */
275 (void)tostring(L, top-1);
276 } else { 271 } else {
277 /* Pick as many strings as possible from the top and concatenate them: 272 /* Pick as many strings as possible from the top and concatenate them:
278 ** 273 **
@@ -281,27 +276,28 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left)
281 ** concat: [...][CAT stack ...] [result] 276 ** concat: [...][CAT stack ...] [result]
282 ** next step: [...][CAT stack ............] 277 ** next step: [...][CAT stack ............]
283 */ 278 */
284 MSize tlen = strV(top)->len; 279 TValue *e, *o = top;
285 char *buffer; 280 uint64_t tlen = tvisstr(o) ? strV(o)->len : STRFMT_MAXBUF_NUM;
286 int i; 281 SBuf *sb;
287 for (n = 1; n <= left && tostring(L, top-n); n++) { 282 do {
288 MSize len = strV(top-n)->len; 283 o--; tlen += tvisstr(o) ? strV(o)->len : STRFMT_MAXBUF_NUM;
289 if (len >= LJ_MAX_STR - tlen) 284 } while (--left > 0 && (tvisstr(o-1) || tvisnumber(o-1)));
290 lj_err_msg(L, LJ_ERR_STROV); 285 if (tlen >= LJ_MAX_STR) lj_err_msg(L, LJ_ERR_STROV);
291 tlen += len; 286 sb = lj_buf_tmp_(L);
292 } 287 lj_buf_more(sb, (MSize)tlen);
293 buffer = lj_str_needbuf(L, &G(L)->tmpbuf, tlen); 288 for (e = top, top = o; o <= e; o++) {
294 n--; 289 if (tvisstr(o)) {
295 tlen = 0; 290 GCstr *s = strV(o);
296 for (i = n; i >= 0; i--) { 291 MSize len = s->len;
297 MSize len = strV(top-i)->len; 292 lj_buf_putmem(sb, strdata(s), len);
298 memcpy(buffer + tlen, strVdata(top-i), len); 293 } else if (tvisint(o)) {
299 tlen += len; 294 lj_strfmt_putint(sb, intV(o));
295 } else {
296 lj_strfmt_putfnum(sb, STRFMT_G14, numV(o));
297 }
300 } 298 }
301 setstrV(L, top-n, lj_str_new(L, buffer, tlen)); 299 setstrV(L, top, lj_buf_str(L, sb));
302 } 300 }
303 left -= n;
304 top -= n;
305 } while (left >= 1); 301 } while (left >= 1);
306 if (LJ_UNLIKELY(G(L)->gc.total >= G(L)->gc.threshold)) { 302 if (LJ_UNLIKELY(G(L)->gc.total >= G(L)->gc.threshold)) {
307 if (!fromc) L->top = curr_topL(L); 303 if (!fromc) L->top = curr_topL(L);
@@ -338,12 +334,14 @@ TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne)
338 return (TValue *)(intptr_t)ne; 334 return (TValue *)(intptr_t)ne;
339 } 335 }
340 top = curr_top(L); 336 top = curr_top(L);
341 setcont(top, ne ? lj_cont_condf : lj_cont_condt); 337 setcont(top++, ne ? lj_cont_condf : lj_cont_condt);
342 copyTV(L, top+1, mo); 338 if (LJ_FR2) setnilV(top++);
339 copyTV(L, top++, mo);
340 if (LJ_FR2) setnilV(top++);
343 it = ~(uint32_t)o1->gch.gct; 341 it = ~(uint32_t)o1->gch.gct;
344 setgcV(L, top+2, o1, it); 342 setgcV(L, top, o1, it);
345 setgcV(L, top+3, o2, it); 343 setgcV(L, top+1, o2, it);
346 return top+2; /* Trigger metamethod call. */ 344 return top; /* Trigger metamethod call. */
347 } 345 }
348 return (TValue *)(intptr_t)ne; 346 return (TValue *)(intptr_t)ne;
349} 347}
@@ -366,7 +364,7 @@ TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins)
366 o2 = &mref(curr_proto(L)->k, cTValue)[bc_d(ins)]; 364 o2 = &mref(curr_proto(L)->k, cTValue)[bc_d(ins)];
367 } else { 365 } else {
368 lua_assert(op == BC_ISEQP); 366 lua_assert(op == BC_ISEQP);
369 setitype(&tv, ~bc_d(ins)); 367 setpriV(&tv, ~bc_d(ins));
370 o2 = &tv; 368 o2 = &tv;
371 } 369 }
372 mo = lj_meta_lookup(L, o1mm, MM_eq); 370 mo = lj_meta_lookup(L, o1mm, MM_eq);
@@ -423,6 +421,18 @@ TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op)
423 } 421 }
424} 422}
425 423
424/* Helper for ISTYPE and ISNUM. Implicit coercion or error. */
425void lj_meta_istype(lua_State *L, BCReg ra, BCReg tp)
426{
427 L->top = curr_topL(L);
428 ra++; tp--;
429 lua_assert(LJ_DUALNUM || tp != ~LJ_TNUMX); /* ISTYPE -> ISNUM broken. */
430 if (LJ_DUALNUM && tp == ~LJ_TNUMX) lj_lib_checkint(L, ra);
431 else if (tp == ~LJ_TNUMX+1) lj_lib_checknum(L, ra);
432 else if (tp == ~LJ_TSTR) lj_lib_checkstr(L, ra);
433 else lj_err_argtype(L, ra, lj_obj_itypename[tp]);
434}
435
426/* Helper for calls. __call metamethod. */ 436/* Helper for calls. __call metamethod. */
427void lj_meta_call(lua_State *L, TValue *func, TValue *top) 437void lj_meta_call(lua_State *L, TValue *func, TValue *top)
428{ 438{
@@ -430,7 +440,8 @@ void lj_meta_call(lua_State *L, TValue *func, TValue *top)
430 TValue *p; 440 TValue *p;
431 if (!tvisfunc(mo)) 441 if (!tvisfunc(mo))
432 lj_err_optype_call(L, func); 442 lj_err_optype_call(L, func);
433 for (p = top; p > func; p--) copyTV(L, p, p-1); 443 for (p = top; p > func+2*LJ_FR2; p--) copyTV(L, p, p-1);
444 if (LJ_FR2) copyTV(L, func+2, func);
434 copyTV(L, func, mo); 445 copyTV(L, func, mo);
435} 446}
436 447
diff --git a/src/lj_meta.h b/src/lj_meta.h
index bd911e94..d6d31924 100644
--- a/src/lj_meta.h
+++ b/src/lj_meta.h
@@ -31,6 +31,7 @@ LJ_FUNCA TValue * LJ_FASTCALL lj_meta_len(lua_State *L, cTValue *o);
31LJ_FUNCA TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne); 31LJ_FUNCA TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne);
32LJ_FUNCA TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins); 32LJ_FUNCA TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins);
33LJ_FUNCA TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op); 33LJ_FUNCA TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op);
34LJ_FUNCA void lj_meta_istype(lua_State *L, BCReg ra, BCReg tp);
34LJ_FUNCA void lj_meta_call(lua_State *L, TValue *func, TValue *top); 35LJ_FUNCA void lj_meta_call(lua_State *L, TValue *func, TValue *top);
35LJ_FUNCA void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o); 36LJ_FUNCA void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o);
36 37
diff --git a/src/lj_obj.c b/src/lj_obj.c
index 9cdce625..5d16e0e5 100644
--- a/src/lj_obj.c
+++ b/src/lj_obj.c
@@ -20,7 +20,7 @@ LJ_DATADEF const char *const lj_obj_itypename[] = { /* ORDER LJ_T */
20}; 20};
21 21
22/* Compare two objects without calling metamethods. */ 22/* Compare two objects without calling metamethods. */
23int lj_obj_equal(cTValue *o1, cTValue *o2) 23int LJ_FASTCALL lj_obj_equal(cTValue *o1, cTValue *o2)
24{ 24{
25 if (itype(o1) == itype(o2)) { 25 if (itype(o1) == itype(o2)) {
26 if (tvispri(o1)) 26 if (tvispri(o1))
@@ -33,3 +33,18 @@ int lj_obj_equal(cTValue *o1, cTValue *o2)
33 return numberVnum(o1) == numberVnum(o2); 33 return numberVnum(o1) == numberVnum(o2);
34} 34}
35 35
36/* Return pointer to object or its object data. */
37const void * LJ_FASTCALL lj_obj_ptr(cTValue *o)
38{
39 if (tvisudata(o))
40 return uddata(udataV(o));
41 else if (tvislightud(o))
42 return lightudV(o);
43 else if (LJ_HASFFI && tviscdata(o))
44 return cdataptr(cdataV(o));
45 else if (tvisgcv(o))
46 return gcV(o);
47 else
48 return NULL;
49}
50
diff --git a/src/lj_obj.h b/src/lj_obj.h
index 5c3c88fc..7d582949 100644
--- a/src/lj_obj.h
+++ b/src/lj_obj.h
@@ -15,42 +15,75 @@
15 15
16/* -- Memory references (32 bit address space) ---------------------------- */ 16/* -- Memory references (32 bit address space) ---------------------------- */
17 17
18/* Memory size. */ 18/* Memory and GC object sizes. */
19typedef uint32_t MSize; 19typedef uint32_t MSize;
20#if LJ_GC64
21typedef uint64_t GCSize;
22#else
23typedef uint32_t GCSize;
24#endif
20 25
21/* Memory reference */ 26/* Memory reference */
22typedef struct MRef { 27typedef struct MRef {
28#if LJ_GC64
29 uint64_t ptr64; /* True 64 bit pointer. */
30#else
23 uint32_t ptr32; /* Pseudo 32 bit pointer. */ 31 uint32_t ptr32; /* Pseudo 32 bit pointer. */
32#endif
24} MRef; 33} MRef;
25 34
35#if LJ_GC64
36#define mref(r, t) ((t *)(void *)(r).ptr64)
37
38#define setmref(r, p) ((r).ptr64 = (uint64_t)(void *)(p))
39#define setmrefr(r, v) ((r).ptr64 = (v).ptr64)
40#else
26#define mref(r, t) ((t *)(void *)(uintptr_t)(r).ptr32) 41#define mref(r, t) ((t *)(void *)(uintptr_t)(r).ptr32)
27 42
28#define setmref(r, p) ((r).ptr32 = (uint32_t)(uintptr_t)(void *)(p)) 43#define setmref(r, p) ((r).ptr32 = (uint32_t)(uintptr_t)(void *)(p))
29#define setmrefr(r, v) ((r).ptr32 = (v).ptr32) 44#define setmrefr(r, v) ((r).ptr32 = (v).ptr32)
45#endif
30 46
31/* -- GC object references (32 bit address space) ------------------------- */ 47/* -- GC object references (32 bit address space) ------------------------- */
32 48
33/* GCobj reference */ 49/* GCobj reference */
34typedef struct GCRef { 50typedef struct GCRef {
51#if LJ_GC64
52 uint64_t gcptr64; /* True 64 bit pointer. */
53#else
35 uint32_t gcptr32; /* Pseudo 32 bit pointer. */ 54 uint32_t gcptr32; /* Pseudo 32 bit pointer. */
55#endif
36} GCRef; 56} GCRef;
37 57
38/* Common GC header for all collectable objects. */ 58/* Common GC header for all collectable objects. */
39#define GCHeader GCRef nextgc; uint8_t marked; uint8_t gct 59#define GCHeader GCRef nextgc; uint8_t marked; uint8_t gct
40/* This occupies 6 bytes, so use the next 2 bytes for non-32 bit fields. */ 60/* This occupies 6 bytes, so use the next 2 bytes for non-32 bit fields. */
41 61
62#if LJ_GC64
63#define gcref(r) ((GCobj *)(r).gcptr64)
64#define gcrefp(r, t) ((t *)(void *)(r).gcptr64)
65#define gcrefu(r) ((r).gcptr64)
66#define gcrefeq(r1, r2) ((r1).gcptr64 == (r2).gcptr64)
67
68#define setgcref(r, gc) ((r).gcptr64 = (uint64_t)&(gc)->gch)
69#define setgcreft(r, gc, it) \
70 (r).gcptr64 = (uint64_t)&(gc)->gch | (((uint64_t)(it)) << 47)
71#define setgcrefp(r, p) ((r).gcptr64 = (uint64_t)(p))
72#define setgcrefnull(r) ((r).gcptr64 = 0)
73#define setgcrefr(r, v) ((r).gcptr64 = (v).gcptr64)
74#else
42#define gcref(r) ((GCobj *)(uintptr_t)(r).gcptr32) 75#define gcref(r) ((GCobj *)(uintptr_t)(r).gcptr32)
43#define gcrefp(r, t) ((t *)(void *)(uintptr_t)(r).gcptr32) 76#define gcrefp(r, t) ((t *)(void *)(uintptr_t)(r).gcptr32)
44#define gcrefu(r) ((r).gcptr32) 77#define gcrefu(r) ((r).gcptr32)
45#define gcrefi(r) ((int32_t)(r).gcptr32)
46#define gcrefeq(r1, r2) ((r1).gcptr32 == (r2).gcptr32) 78#define gcrefeq(r1, r2) ((r1).gcptr32 == (r2).gcptr32)
47#define gcnext(gc) (gcref((gc)->gch.nextgc))
48 79
49#define setgcref(r, gc) ((r).gcptr32 = (uint32_t)(uintptr_t)&(gc)->gch) 80#define setgcref(r, gc) ((r).gcptr32 = (uint32_t)(uintptr_t)&(gc)->gch)
50#define setgcrefi(r, i) ((r).gcptr32 = (uint32_t)(i))
51#define setgcrefp(r, p) ((r).gcptr32 = (uint32_t)(uintptr_t)(p)) 81#define setgcrefp(r, p) ((r).gcptr32 = (uint32_t)(uintptr_t)(p))
52#define setgcrefnull(r) ((r).gcptr32 = 0) 82#define setgcrefnull(r) ((r).gcptr32 = 0)
53#define setgcrefr(r, v) ((r).gcptr32 = (v).gcptr32) 83#define setgcrefr(r, v) ((r).gcptr32 = (v).gcptr32)
84#endif
85
86#define gcnext(gc) (gcref((gc)->gch.nextgc))
54 87
55/* IMPORTANT NOTE: 88/* IMPORTANT NOTE:
56** 89**
@@ -119,11 +152,12 @@ typedef int32_t BCLine; /* Bytecode line number. */
119/* Internal assembler functions. Never call these directly from C. */ 152/* Internal assembler functions. Never call these directly from C. */
120typedef void (*ASMFunction)(void); 153typedef void (*ASMFunction)(void);
121 154
122/* Resizable string buffer. Need this here, details in lj_str.h. */ 155/* Resizable string buffer. Need this here, details in lj_buf.h. */
123typedef struct SBuf { 156typedef struct SBuf {
124 char *buf; /* String buffer base. */ 157 MRef p; /* String buffer pointer. */
125 MSize n; /* String buffer length. */ 158 MRef e; /* String buffer end pointer. */
126 MSize sz; /* String buffer size. */ 159 MRef b; /* String buffer base. */
160 MRef L; /* lua_State, used for buffer resizing. */
127} SBuf; 161} SBuf;
128 162
129/* -- Tags and values ----------------------------------------------------- */ 163/* -- Tags and values ----------------------------------------------------- */
@@ -131,13 +165,23 @@ typedef struct SBuf {
131/* Frame link. */ 165/* Frame link. */
132typedef union { 166typedef union {
133 int32_t ftsz; /* Frame type and size of previous frame. */ 167 int32_t ftsz; /* Frame type and size of previous frame. */
134 MRef pcr; /* Overlaps PC for Lua frames. */ 168 MRef pcr; /* Or PC for Lua frames. */
135} FrameLink; 169} FrameLink;
136 170
137/* Tagged value. */ 171/* Tagged value. */
138typedef LJ_ALIGN(8) union TValue { 172typedef LJ_ALIGN(8) union TValue {
139 uint64_t u64; /* 64 bit pattern overlaps number. */ 173 uint64_t u64; /* 64 bit pattern overlaps number. */
140 lua_Number n; /* Number object overlaps split tag/value object. */ 174 lua_Number n; /* Number object overlaps split tag/value object. */
175#if LJ_GC64
176 GCRef gcr; /* GCobj reference with tag. */
177 int64_t it64;
178 struct {
179 LJ_ENDIAN_LOHI(
180 int32_t i; /* Integer value. */
181 , uint32_t it; /* Internal object tag. Must overlap MSW of number. */
182 )
183 };
184#else
141 struct { 185 struct {
142 LJ_ENDIAN_LOHI( 186 LJ_ENDIAN_LOHI(
143 union { 187 union {
@@ -147,12 +191,17 @@ typedef LJ_ALIGN(8) union TValue {
147 , uint32_t it; /* Internal object tag. Must overlap MSW of number. */ 191 , uint32_t it; /* Internal object tag. Must overlap MSW of number. */
148 ) 192 )
149 }; 193 };
194#endif
195#if LJ_FR2
196 int64_t ftsz; /* Frame type and size of previous frame, or PC. */
197#else
150 struct { 198 struct {
151 LJ_ENDIAN_LOHI( 199 LJ_ENDIAN_LOHI(
152 GCRef func; /* Function for next frame (or dummy L). */ 200 GCRef func; /* Function for next frame (or dummy L). */
153 , FrameLink tp; /* Link to previous frame. */ 201 , FrameLink tp; /* Link to previous frame. */
154 ) 202 )
155 } fr; 203 } fr;
204#endif
156 struct { 205 struct {
157 LJ_ENDIAN_LOHI( 206 LJ_ENDIAN_LOHI(
158 uint32_t lo; /* Lower 32 bits of number. */ 207 uint32_t lo; /* Lower 32 bits of number. */
@@ -172,6 +221,8 @@ typedef const TValue cTValue;
172 221
173/* Internal object tags. 222/* Internal object tags.
174** 223**
224** Format for 32 bit GC references (!LJ_GC64):
225**
175** Internal tags overlap the MSW of a number object (must be a double). 226** Internal tags overlap the MSW of a number object (must be a double).
176** Interpreted as a double these are special NaNs. The FPU only generates 227** Interpreted as a double these are special NaNs. The FPU only generates
177** one type of NaN (0xfff8_0000_0000_0000). So MSWs > 0xfff80000 are available 228** one type of NaN (0xfff8_0000_0000_0000). So MSWs > 0xfff80000 are available
@@ -186,6 +237,18 @@ typedef const TValue cTValue;
186** int (LJ_DUALNUM)| itype | int | 237** int (LJ_DUALNUM)| itype | int |
187** number -------double------ 238** number -------double------
188** 239**
240** Format for 64 bit GC references (LJ_GC64):
241**
242** The upper 13 bits must be 1 (0xfff8...) for a special NaN. The next
243** 4 bits hold the internal tag. The lowest 47 bits either hold a pointer,
244** a zero-extended 32 bit integer or all bits set to 1 for primitive types.
245**
246** ------MSW------.------LSW------
247** primitive types |1..1|itype|1..................1|
248** GC objects/lightud |1..1|itype|-------GCRef--------|
249** int (LJ_DUALNUM) |1..1|itype|0..0|-----int-------|
250** number ------------double-------------
251**
189** ORDER LJ_T 252** ORDER LJ_T
190** Primitive types nil/false/true must be first, lightuserdata next. 253** Primitive types nil/false/true must be first, lightuserdata next.
191** GC objects are at the end, table/userdata must be lowest. 254** GC objects are at the end, table/userdata must be lowest.
@@ -208,7 +271,7 @@ typedef const TValue cTValue;
208#define LJ_TNUMX (~13u) 271#define LJ_TNUMX (~13u)
209 272
210/* Integers have itype == LJ_TISNUM doubles have itype < LJ_TISNUM */ 273/* Integers have itype == LJ_TISNUM doubles have itype < LJ_TISNUM */
211#if LJ_64 274#if LJ_64 && !LJ_GC64
212#define LJ_TISNUM 0xfffeffffu 275#define LJ_TISNUM 0xfffeffffu
213#else 276#else
214#define LJ_TISNUM LJ_TNUMX 277#define LJ_TISNUM LJ_TNUMX
@@ -218,6 +281,10 @@ typedef const TValue cTValue;
218#define LJ_TISGCV (LJ_TSTR+1) 281#define LJ_TISGCV (LJ_TSTR+1)
219#define LJ_TISTABUD LJ_TTAB 282#define LJ_TISTABUD LJ_TTAB
220 283
284#if LJ_GC64
285#define LJ_GCVMASK (((uint64_t)1 << 47) - 1)
286#endif
287
221/* -- String object ------------------------------------------------------- */ 288/* -- String object ------------------------------------------------------- */
222 289
223/* String object header. String payload follows. */ 290/* String object header. String payload follows. */
@@ -291,6 +358,9 @@ typedef struct GCproto {
291 uint8_t numparams; /* Number of parameters. */ 358 uint8_t numparams; /* Number of parameters. */
292 uint8_t framesize; /* Fixed frame size. */ 359 uint8_t framesize; /* Fixed frame size. */
293 MSize sizebc; /* Number of bytecode instructions. */ 360 MSize sizebc; /* Number of bytecode instructions. */
361#if LJ_GC64
362 uint32_t unused_gc64;
363#endif
294 GCRef gclist; 364 GCRef gclist;
295 MRef k; /* Split constant array (points to the middle). */ 365 MRef k; /* Split constant array (points to the middle). */
296 MRef uv; /* Upvalue list. local slot|0x8000 or parent uv idx. */ 366 MRef uv; /* Upvalue list. local slot|0x8000 or parent uv idx. */
@@ -402,7 +472,9 @@ typedef struct Node {
402 TValue val; /* Value object. Must be first field. */ 472 TValue val; /* Value object. Must be first field. */
403 TValue key; /* Key object. */ 473 TValue key; /* Key object. */
404 MRef next; /* Hash chain. */ 474 MRef next; /* Hash chain. */
475#if !LJ_GC64
405 MRef freetop; /* Top of free elements (stored in t->node[0]). */ 476 MRef freetop; /* Top of free elements (stored in t->node[0]). */
477#endif
406} Node; 478} Node;
407 479
408LJ_STATIC_ASSERT(offsetof(Node, val) == 0); 480LJ_STATIC_ASSERT(offsetof(Node, val) == 0);
@@ -417,12 +489,22 @@ typedef struct GCtab {
417 MRef node; /* Hash part. */ 489 MRef node; /* Hash part. */
418 uint32_t asize; /* Size of array part (keys [0, asize-1]). */ 490 uint32_t asize; /* Size of array part (keys [0, asize-1]). */
419 uint32_t hmask; /* Hash part mask (size of hash part - 1). */ 491 uint32_t hmask; /* Hash part mask (size of hash part - 1). */
492#if LJ_GC64
493 MRef freetop; /* Top of free elements. */
494#endif
420} GCtab; 495} GCtab;
421 496
422#define sizetabcolo(n) ((n)*sizeof(TValue) + sizeof(GCtab)) 497#define sizetabcolo(n) ((n)*sizeof(TValue) + sizeof(GCtab))
423#define tabref(r) (&gcref((r))->tab) 498#define tabref(r) (&gcref((r))->tab)
424#define noderef(r) (mref((r), Node)) 499#define noderef(r) (mref((r), Node))
425#define nextnode(n) (mref((n)->next, Node)) 500#define nextnode(n) (mref((n)->next, Node))
501#if LJ_GC64
502#define getfreetop(t, n) (noderef((t)->freetop))
503#define setfreetop(t, n, v) (setmref((t)->freetop, (v)))
504#else
505#define getfreetop(t, n) (noderef((n)->freetop))
506#define setfreetop(t, n, v) (setmref((n)->freetop, (v)))
507#endif
426 508
427/* -- State objects ------------------------------------------------------- */ 509/* -- State objects ------------------------------------------------------- */
428 510
@@ -489,8 +571,8 @@ typedef enum {
489#define mmname_str(g, mm) (strref((g)->gcroot[GCROOT_MMNAME+(mm)])) 571#define mmname_str(g, mm) (strref((g)->gcroot[GCROOT_MMNAME+(mm)]))
490 572
491typedef struct GCState { 573typedef struct GCState {
492 MSize total; /* Memory currently allocated. */ 574 GCSize total; /* Memory currently allocated. */
493 MSize threshold; /* Memory threshold. */ 575 GCSize threshold; /* Memory threshold. */
494 uint8_t currentwhite; /* Current white color. */ 576 uint8_t currentwhite; /* Current white color. */
495 uint8_t state; /* GC state. */ 577 uint8_t state; /* GC state. */
496 uint8_t nocdatafin; /* No cdata finalizer called. */ 578 uint8_t nocdatafin; /* No cdata finalizer called. */
@@ -502,9 +584,9 @@ typedef struct GCState {
502 GCRef grayagain; /* List of objects for atomic traversal. */ 584 GCRef grayagain; /* List of objects for atomic traversal. */
503 GCRef weak; /* List of weak tables (to be cleared). */ 585 GCRef weak; /* List of weak tables (to be cleared). */
504 GCRef mmudata; /* List of userdata (to be finalized). */ 586 GCRef mmudata; /* List of userdata (to be finalized). */
587 GCSize debt; /* Debt (how much GC is behind schedule). */
588 GCSize estimate; /* Estimate of memory actually in use. */
505 MSize stepmul; /* Incremental GC step granularity. */ 589 MSize stepmul; /* Incremental GC step granularity. */
506 MSize debt; /* Debt (how much GC is behind schedule). */
507 MSize estimate; /* Estimate of memory actually in use. */
508 MSize pause; /* Pause between successive GC cycles. */ 590 MSize pause; /* Pause between successive GC cycles. */
509} GCState; 591} GCState;
510 592
@@ -516,8 +598,8 @@ typedef struct global_State {
516 lua_Alloc allocf; /* Memory allocator. */ 598 lua_Alloc allocf; /* Memory allocator. */
517 void *allocd; /* Memory allocator data. */ 599 void *allocd; /* Memory allocator data. */
518 GCState gc; /* Garbage collector. */ 600 GCState gc; /* Garbage collector. */
519 SBuf tmpbuf; /* Temporary buffer for string concatenation. */ 601 volatile int32_t vmstate; /* VM state or current JIT code trace number. */
520 Node nilnode; /* Fallback 1-element hash part (nil key and value). */ 602 SBuf tmpbuf; /* Temporary string buffer. */
521 GCstr strempty; /* Empty string. */ 603 GCstr strempty; /* Empty string. */
522 uint8_t stremptyz; /* Zero terminator of empty string. */ 604 uint8_t stremptyz; /* Zero terminator of empty string. */
523 uint8_t hookmask; /* Hook mask. */ 605 uint8_t hookmask; /* Hook mask. */
@@ -526,17 +608,17 @@ typedef struct global_State {
526 GCRef mainthref; /* Link to main thread. */ 608 GCRef mainthref; /* Link to main thread. */
527 TValue registrytv; /* Anchor for registry. */ 609 TValue registrytv; /* Anchor for registry. */
528 TValue tmptv, tmptv2; /* Temporary TValues. */ 610 TValue tmptv, tmptv2; /* Temporary TValues. */
611 Node nilnode; /* Fallback 1-element hash part (nil key and value). */
529 GCupval uvhead; /* Head of double-linked list of all open upvalues. */ 612 GCupval uvhead; /* Head of double-linked list of all open upvalues. */
530 int32_t hookcount; /* Instruction hook countdown. */ 613 int32_t hookcount; /* Instruction hook countdown. */
531 int32_t hookcstart; /* Start count for instruction hook counter. */ 614 int32_t hookcstart; /* Start count for instruction hook counter. */
532 lua_Hook hookf; /* Hook function. */ 615 lua_Hook hookf; /* Hook function. */
533 lua_CFunction wrapf; /* Wrapper for C function calls. */ 616 lua_CFunction wrapf; /* Wrapper for C function calls. */
534 lua_CFunction panic; /* Called as a last resort for errors. */ 617 lua_CFunction panic; /* Called as a last resort for errors. */
535 volatile int32_t vmstate; /* VM state or current JIT code trace number. */
536 BCIns bc_cfunc_int; /* Bytecode for internal C function calls. */ 618 BCIns bc_cfunc_int; /* Bytecode for internal C function calls. */
537 BCIns bc_cfunc_ext; /* Bytecode for external C function calls. */ 619 BCIns bc_cfunc_ext; /* Bytecode for external C function calls. */
538 GCRef jit_L; /* Current JIT code lua_State or NULL. */ 620 GCRef cur_L; /* Currently executing lua_State. */
539 MRef jit_base; /* Current JIT code L->base. */ 621 MRef jit_base; /* Current JIT code L->base or NULL. */
540 MRef ctype_state; /* Pointer to C type state. */ 622 MRef ctype_state; /* Pointer to C type state. */
541 GCRef gcroot[GCROOT_MAX]; /* GC roots. */ 623 GCRef gcroot[GCROOT_MAX]; /* GC roots. */
542} global_State; 624} global_State;
@@ -553,9 +635,11 @@ typedef struct global_State {
553#define HOOK_ACTIVE_SHIFT 4 635#define HOOK_ACTIVE_SHIFT 4
554#define HOOK_VMEVENT 0x20 636#define HOOK_VMEVENT 0x20
555#define HOOK_GC 0x40 637#define HOOK_GC 0x40
638#define HOOK_PROFILE 0x80
556#define hook_active(g) ((g)->hookmask & HOOK_ACTIVE) 639#define hook_active(g) ((g)->hookmask & HOOK_ACTIVE)
557#define hook_enter(g) ((g)->hookmask |= HOOK_ACTIVE) 640#define hook_enter(g) ((g)->hookmask |= HOOK_ACTIVE)
558#define hook_entergc(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_GC)) 641#define hook_entergc(g) \
642 ((g)->hookmask = ((g)->hookmask | (HOOK_ACTIVE|HOOK_GC)) & ~HOOK_PROFILE)
559#define hook_vmevent(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_VMEVENT)) 643#define hook_vmevent(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_VMEVENT))
560#define hook_leave(g) ((g)->hookmask &= ~HOOK_ACTIVE) 644#define hook_leave(g) ((g)->hookmask &= ~HOOK_ACTIVE)
561#define hook_save(g) ((g)->hookmask & ~HOOK_EVENTMASK) 645#define hook_save(g) ((g)->hookmask & ~HOOK_EVENTMASK)
@@ -583,7 +667,13 @@ struct lua_State {
583#define registry(L) (&G(L)->registrytv) 667#define registry(L) (&G(L)->registrytv)
584 668
585/* Macros to access the currently executing (Lua) function. */ 669/* Macros to access the currently executing (Lua) function. */
670#if LJ_GC64
671#define curr_func(L) (&gcval(L->base-2)->fn)
672#elif LJ_FR2
673#define curr_func(L) (&gcref((L->base-2)->gcr)->fn)
674#else
586#define curr_func(L) (&gcref((L->base-1)->fr.func)->fn) 675#define curr_func(L) (&gcref((L->base-1)->fr.func)->fn)
676#endif
587#define curr_funcisL(L) (isluafunc(curr_func(L))) 677#define curr_funcisL(L) (isluafunc(curr_func(L)))
588#define curr_proto(L) (funcproto(curr_func(L))) 678#define curr_proto(L) (funcproto(curr_func(L)))
589#define curr_topL(L) (L->base + curr_proto(L)->framesize) 679#define curr_topL(L) (L->base + curr_proto(L)->framesize)
@@ -647,12 +737,17 @@ typedef union GCobj {
647#endif 737#endif
648 738
649/* Macros to test types. */ 739/* Macros to test types. */
740#if LJ_GC64
741#define itype(o) ((uint32_t)((o)->it64 >> 47))
742#define tvisnil(o) ((o)->it64 == -1)
743#else
650#define itype(o) ((o)->it) 744#define itype(o) ((o)->it)
651#define tvisnil(o) (itype(o) == LJ_TNIL) 745#define tvisnil(o) (itype(o) == LJ_TNIL)
746#endif
652#define tvisfalse(o) (itype(o) == LJ_TFALSE) 747#define tvisfalse(o) (itype(o) == LJ_TFALSE)
653#define tvistrue(o) (itype(o) == LJ_TTRUE) 748#define tvistrue(o) (itype(o) == LJ_TTRUE)
654#define tvisbool(o) (tvisfalse(o) || tvistrue(o)) 749#define tvisbool(o) (tvisfalse(o) || tvistrue(o))
655#if LJ_64 750#if LJ_64 && !LJ_GC64
656#define tvislightud(o) (((int32_t)itype(o) >> 15) == -2) 751#define tvislightud(o) (((int32_t)itype(o) >> 15) == -2)
657#else 752#else
658#define tvislightud(o) (itype(o) == LJ_TLIGHTUD) 753#define tvislightud(o) (itype(o) == LJ_TLIGHTUD)
@@ -686,7 +781,7 @@ typedef union GCobj {
686#define rawnumequal(o1, o2) ((o1)->u64 == (o2)->u64) 781#define rawnumequal(o1, o2) ((o1)->u64 == (o2)->u64)
687 782
688/* Macros to convert type ids. */ 783/* Macros to convert type ids. */
689#if LJ_64 784#if LJ_64 && !LJ_GC64
690#define itypemap(o) \ 785#define itypemap(o) \
691 (tvisnumber(o) ? ~LJ_TNUMX : tvislightud(o) ? ~LJ_TLIGHTUD : ~itype(o)) 786 (tvisnumber(o) ? ~LJ_TNUMX : tvislightud(o) ? ~LJ_TLIGHTUD : ~itype(o))
692#else 787#else
@@ -694,8 +789,12 @@ typedef union GCobj {
694#endif 789#endif
695 790
696/* Macros to get tagged values. */ 791/* Macros to get tagged values. */
792#if LJ_GC64
793#define gcval(o) ((GCobj *)(gcrefu((o)->gcr) & LJ_GCVMASK))
794#else
697#define gcval(o) (gcref((o)->gcr)) 795#define gcval(o) (gcref((o)->gcr))
698#define boolV(o) check_exp(tvisbool(o), (LJ_TFALSE - (o)->it)) 796#endif
797#define boolV(o) check_exp(tvisbool(o), (LJ_TFALSE - itype(o)))
699#if LJ_64 798#if LJ_64
700#define lightudV(o) \ 799#define lightudV(o) \
701 check_exp(tvislightud(o), (void *)((o)->u64 & U64x(00007fff,ffffffff))) 800 check_exp(tvislightud(o), (void *)((o)->u64 & U64x(00007fff,ffffffff)))
@@ -714,13 +813,23 @@ typedef union GCobj {
714#define intV(o) check_exp(tvisint(o), (int32_t)(o)->i) 813#define intV(o) check_exp(tvisint(o), (int32_t)(o)->i)
715 814
716/* Macros to set tagged values. */ 815/* Macros to set tagged values. */
816#if LJ_GC64
817#define setitype(o, i) ((o)->it = ((i) << 15))
818#define setnilV(o) ((o)->it64 = -1)
819#define setpriV(o, x) ((o)->it64 = (int64_t)~((uint64_t)~(x)<<47))
820#define setboolV(o, x) ((o)->it64 = (int64_t)~((uint64_t)((x)+1)<<47))
821#else
717#define setitype(o, i) ((o)->it = (i)) 822#define setitype(o, i) ((o)->it = (i))
718#define setnilV(o) ((o)->it = LJ_TNIL) 823#define setnilV(o) ((o)->it = LJ_TNIL)
719#define setboolV(o, x) ((o)->it = LJ_TFALSE-(uint32_t)(x)) 824#define setboolV(o, x) ((o)->it = LJ_TFALSE-(uint32_t)(x))
825#define setpriV(o, i) (setitype((o), (i)))
826#endif
720 827
721static LJ_AINLINE void setlightudV(TValue *o, void *p) 828static LJ_AINLINE void setlightudV(TValue *o, void *p)
722{ 829{
723#if LJ_64 830#if LJ_GC64
831 o->u64 = (uint64_t)p | (((uint64_t)LJ_TLIGHTUD) << 47);
832#elif LJ_64
724 o->u64 = (uint64_t)p | (((uint64_t)0xffff) << 48); 833 o->u64 = (uint64_t)p | (((uint64_t)0xffff) << 48);
725#else 834#else
726 setgcrefp(o->gcr, p); setitype(o, LJ_TLIGHTUD); 835 setgcrefp(o->gcr, p); setitype(o, LJ_TLIGHTUD);
@@ -730,20 +839,39 @@ static LJ_AINLINE void setlightudV(TValue *o, void *p)
730#if LJ_64 839#if LJ_64
731#define checklightudptr(L, p) \ 840#define checklightudptr(L, p) \
732 (((uint64_t)(p) >> 47) ? (lj_err_msg(L, LJ_ERR_BADLU), NULL) : (p)) 841 (((uint64_t)(p) >> 47) ? (lj_err_msg(L, LJ_ERR_BADLU), NULL) : (p))
842#else
843#define checklightudptr(L, p) (p)
844#endif
845
846#if LJ_FR2
847#define contptr(f) ((void *)(f))
848#define setcont(o, f) ((o)->u64 = (uint64_t)(uintptr_t)contptr(f))
849#elif LJ_64
850#define contptr(f) \
851 ((void *)(uintptr_t)(uint32_t)((intptr_t)(f) - (intptr_t)lj_vm_asm_begin))
733#define setcont(o, f) \ 852#define setcont(o, f) \
734 ((o)->u64 = (uint64_t)(void *)(f) - (uint64_t)lj_vm_asm_begin) 853 ((o)->u64 = (uint64_t)(void *)(f) - (uint64_t)lj_vm_asm_begin)
735#else 854#else
736#define checklightudptr(L, p) (p) 855#define contptr(f) ((void *)(f))
737#define setcont(o, f) setlightudV((o), (void *)(f)) 856#define setcont(o, f) setlightudV((o), contptr(f))
738#endif 857#endif
739 858
740#define tvchecklive(L, o) \ 859#define tvchecklive(L, o) \
741 UNUSED(L), lua_assert(!tvisgcv(o) || \ 860 UNUSED(L), lua_assert(!tvisgcv(o) || \
742 ((~itype(o) == gcval(o)->gch.gct) && !isdead(G(L), gcval(o)))) 861 ((~itype(o) == gcval(o)->gch.gct) && !isdead(G(L), gcval(o))))
743 862
744static LJ_AINLINE void setgcV(lua_State *L, TValue *o, GCobj *v, uint32_t itype) 863static LJ_AINLINE void setgcVraw(TValue *o, GCobj *v, uint32_t itype)
745{ 864{
746 setgcref(o->gcr, v); setitype(o, itype); tvchecklive(L, o); 865#if LJ_GC64
866 setgcreft(o->gcr, v, itype);
867#else
868 setgcref(o->gcr, v); setitype(o, itype);
869#endif
870}
871
872static LJ_AINLINE void setgcV(lua_State *L, TValue *o, GCobj *v, uint32_t it)
873{
874 setgcVraw(o, v, it); tvchecklive(L, o);
747} 875}
748 876
749#define define_setV(name, type, tag) \ 877#define define_setV(name, type, tag) \
@@ -797,6 +925,9 @@ static LJ_AINLINE void copyTV(lua_State *L, TValue *o1, const TValue *o2)
797 925
798#if LJ_SOFTFP 926#if LJ_SOFTFP
799LJ_ASMF int32_t lj_vm_tobit(double x); 927LJ_ASMF int32_t lj_vm_tobit(double x);
928#if LJ_TARGET_MIPS64
929LJ_ASMF int32_t lj_vm_tointg(double x);
930#endif
800#endif 931#endif
801 932
802static LJ_AINLINE int32_t lj_num2bit(lua_Number n) 933static LJ_AINLINE int32_t lj_num2bit(lua_Number n)
@@ -810,11 +941,7 @@ static LJ_AINLINE int32_t lj_num2bit(lua_Number n)
810#endif 941#endif
811} 942}
812 943
813#if LJ_TARGET_X86 && !defined(__SSE2__)
814#define lj_num2int(n) lj_num2bit((n))
815#else
816#define lj_num2int(n) ((int32_t)(n)) 944#define lj_num2int(n) ((int32_t)(n))
817#endif
818 945
819/* 946/*
820** This must match the JIT backend behavior. In particular for archs 947** This must match the JIT backend behavior. In particular for archs
@@ -859,6 +986,7 @@ LJ_DATA const char *const lj_obj_itypename[~LJ_TNUMX+1];
859#define lj_typename(o) (lj_obj_itypename[itypemap(o)]) 986#define lj_typename(o) (lj_obj_itypename[itypemap(o)])
860 987
861/* Compare two objects without calling metamethods. */ 988/* Compare two objects without calling metamethods. */
862LJ_FUNC int lj_obj_equal(cTValue *o1, cTValue *o2); 989LJ_FUNC int LJ_FASTCALL lj_obj_equal(cTValue *o1, cTValue *o2);
990LJ_FUNC const void * LJ_FASTCALL lj_obj_ptr(cTValue *o);
863 991
864#endif 992#endif
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
index 928d3852..b4d05a26 100644
--- a/src/lj_opt_fold.c
+++ b/src/lj_opt_fold.c
@@ -14,18 +14,21 @@
14 14
15#if LJ_HASJIT 15#if LJ_HASJIT
16 16
17#include "lj_buf.h"
17#include "lj_str.h" 18#include "lj_str.h"
18#include "lj_tab.h" 19#include "lj_tab.h"
19#include "lj_ir.h" 20#include "lj_ir.h"
20#include "lj_jit.h" 21#include "lj_jit.h"
22#include "lj_ircall.h"
21#include "lj_iropt.h" 23#include "lj_iropt.h"
22#include "lj_trace.h" 24#include "lj_trace.h"
23#if LJ_HASFFI 25#if LJ_HASFFI
24#include "lj_ctype.h" 26#include "lj_ctype.h"
25#endif
26#include "lj_carith.h" 27#include "lj_carith.h"
28#endif
27#include "lj_vm.h" 29#include "lj_vm.h"
28#include "lj_strscan.h" 30#include "lj_strscan.h"
31#include "lj_strfmt.h"
29 32
30/* Here's a short description how the FOLD engine processes instructions: 33/* Here's a short description how the FOLD engine processes instructions:
31** 34**
@@ -133,8 +136,8 @@
133/* Some local macros to save typing. Undef'd at the end. */ 136/* Some local macros to save typing. Undef'd at the end. */
134#define IR(ref) (&J->cur.ir[(ref)]) 137#define IR(ref) (&J->cur.ir[(ref)])
135#define fins (&J->fold.ins) 138#define fins (&J->fold.ins)
136#define fleft (&J->fold.left) 139#define fleft (J->fold.left)
137#define fright (&J->fold.right) 140#define fright (J->fold.right)
138#define knumleft (ir_knum(fleft)->n) 141#define knumleft (ir_knum(fleft)->n)
139#define knumright (ir_knum(fright)->n) 142#define knumright (ir_knum(fright)->n)
140 143
@@ -155,13 +158,14 @@ typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J);
155 158
156/* Barrier to prevent folding across a GC step. 159/* Barrier to prevent folding across a GC step.
157** GC steps can only happen at the head of a trace and at LOOP. 160** GC steps can only happen at the head of a trace and at LOOP.
158** And the GC is only driven forward if there is at least one allocation. 161** And the GC is only driven forward if there's at least one allocation.
159*/ 162*/
160#define gcstep_barrier(J, ref) \ 163#define gcstep_barrier(J, ref) \
161 ((ref) < J->chain[IR_LOOP] && \ 164 ((ref) < J->chain[IR_LOOP] && \
162 (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \ 165 (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \
163 J->chain[IR_TNEW] || J->chain[IR_TDUP] || \ 166 J->chain[IR_TNEW] || J->chain[IR_TDUP] || \
164 J->chain[IR_CNEW] || J->chain[IR_CNEWI] || J->chain[IR_TOSTR])) 167 J->chain[IR_CNEW] || J->chain[IR_CNEWI] || \
168 J->chain[IR_BUFSTR] || J->chain[IR_TOSTR] || J->chain[IR_CALLA]))
165 169
166/* -- Constant folding for FP numbers ------------------------------------- */ 170/* -- Constant folding for FP numbers ------------------------------------- */
167 171
@@ -169,8 +173,6 @@ LJFOLD(ADD KNUM KNUM)
169LJFOLD(SUB KNUM KNUM) 173LJFOLD(SUB KNUM KNUM)
170LJFOLD(MUL KNUM KNUM) 174LJFOLD(MUL KNUM KNUM)
171LJFOLD(DIV KNUM KNUM) 175LJFOLD(DIV KNUM KNUM)
172LJFOLD(NEG KNUM KNUM)
173LJFOLD(ABS KNUM KNUM)
174LJFOLD(ATAN2 KNUM KNUM) 176LJFOLD(ATAN2 KNUM KNUM)
175LJFOLD(LDEXP KNUM KNUM) 177LJFOLD(LDEXP KNUM KNUM)
176LJFOLD(MIN KNUM KNUM) 178LJFOLD(MIN KNUM KNUM)
@@ -183,6 +185,15 @@ LJFOLDF(kfold_numarith)
183 return lj_ir_knum(J, y); 185 return lj_ir_knum(J, y);
184} 186}
185 187
188LJFOLD(NEG KNUM FLOAD)
189LJFOLD(ABS KNUM FLOAD)
190LJFOLDF(kfold_numabsneg)
191{
192 lua_Number a = knumleft;
193 lua_Number y = lj_vm_foldarith(a, a, fins->o - IR_ADD);
194 return lj_ir_knum(J, y);
195}
196
186LJFOLD(LDEXP KNUM KINT) 197LJFOLD(LDEXP KNUM KINT)
187LJFOLDF(kfold_ldexp) 198LJFOLDF(kfold_ldexp)
188{ 199{
@@ -336,15 +347,18 @@ LJFOLDF(kfold_intcomp0)
336static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op) 347static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op)
337{ 348{
338 switch (op) { 349 switch (op) {
339#if LJ_64 || LJ_HASFFI 350#if LJ_HASFFI
340 case IR_ADD: k1 += k2; break; 351 case IR_ADD: k1 += k2; break;
341 case IR_SUB: k1 -= k2; break; 352 case IR_SUB: k1 -= k2; break;
342#endif
343#if LJ_HASFFI
344 case IR_MUL: k1 *= k2; break; 353 case IR_MUL: k1 *= k2; break;
345 case IR_BAND: k1 &= k2; break; 354 case IR_BAND: k1 &= k2; break;
346 case IR_BOR: k1 |= k2; break; 355 case IR_BOR: k1 |= k2; break;
347 case IR_BXOR: k1 ^= k2; break; 356 case IR_BXOR: k1 ^= k2; break;
357 case IR_BSHL: k1 <<= (k2 & 63); break;
358 case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 63)); break;
359 case IR_BSAR: k1 >>= (k2 & 63); break;
360 case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 63)); break;
361 case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 63)); break;
348#endif 362#endif
349 default: UNUSED(k2); lua_assert(0); break; 363 default: UNUSED(k2); lua_assert(0); break;
350 } 364 }
@@ -392,20 +406,10 @@ LJFOLD(BROL KINT64 KINT)
392LJFOLD(BROR KINT64 KINT) 406LJFOLD(BROR KINT64 KINT)
393LJFOLDF(kfold_int64shift) 407LJFOLDF(kfold_int64shift)
394{ 408{
395#if LJ_HASFFI || LJ_64 409#if LJ_HASFFI
396 uint64_t k = ir_k64(fleft)->u64; 410 uint64_t k = ir_k64(fleft)->u64;
397 int32_t sh = (fright->i & 63); 411 int32_t sh = (fright->i & 63);
398 switch ((IROp)fins->o) { 412 return INT64FOLD(lj_carith_shift64(k, sh, fins->o - IR_BSHL));
399 case IR_BSHL: k <<= sh; break;
400#if LJ_HASFFI
401 case IR_BSHR: k >>= sh; break;
402 case IR_BSAR: k = (uint64_t)((int64_t)k >> sh); break;
403 case IR_BROL: k = lj_rol(k, sh); break;
404 case IR_BROR: k = lj_ror(k, sh); break;
405#endif
406 default: lua_assert(0); break;
407 }
408 return INT64FOLD(k);
409#else 413#else
410 UNUSED(J); lua_assert(0); return FAILFOLD; 414 UNUSED(J); lua_assert(0); return FAILFOLD;
411#endif 415#endif
@@ -510,7 +514,7 @@ LJFOLDF(kfold_strref_snew)
510 PHIBARRIER(ir); 514 PHIBARRIER(ir);
511 fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */ 515 fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */
512 fins->op1 = str; 516 fins->op1 = str;
513 fins->ot = IRT(IR_STRREF, IRT_P32); 517 fins->ot = IRT(IR_STRREF, IRT_PGC);
514 return RETRYFOLD; 518 return RETRYFOLD;
515 } 519 }
516 } 520 }
@@ -528,6 +532,180 @@ LJFOLDF(kfold_strcmp)
528 return NEXTFOLD; 532 return NEXTFOLD;
529} 533}
530 534
535/* -- Constant folding and forwarding for buffers ------------------------- */
536
537/*
538** Buffer ops perform stores, but their effect is limited to the buffer
539** itself. Also, buffer ops are chained: a use of an op implies a use of
540** all other ops up the chain. Conversely, if an op is unused, all ops
541** up the chain can go unsed. This largely eliminates the need to treat
542** them as stores.
543**
544** Alas, treating them as normal (IRM_N) ops doesn't work, because they
545** cannot be CSEd in isolation. CSE for IRM_N is implicitly done in LOOP
546** or if FOLD is disabled.
547**
548** The compromise is to declare them as loads, emit them like stores and
549** CSE whole chains manually when the BUFSTR is to be emitted. Any chain
550** fragments left over from CSE are eliminated by DCE.
551*/
552
553/* BUFHDR is emitted like a store, see below. */
554
555LJFOLD(BUFPUT BUFHDR BUFSTR)
556LJFOLDF(bufput_append)
557{
558 /* New buffer, no other buffer op inbetween and same buffer? */
559 if ((J->flags & JIT_F_OPT_FWD) &&
560 !(fleft->op2 & IRBUFHDR_APPEND) &&
561 fleft->prev == fright->op2 &&
562 fleft->op1 == IR(fright->op2)->op1) {
563 IRRef ref = fins->op1;
564 IR(ref)->op2 = (fleft->op2 | IRBUFHDR_APPEND); /* Modify BUFHDR. */
565 IR(ref)->op1 = fright->op1;
566 return ref;
567 }
568 return EMITFOLD; /* Always emit, CSE later. */
569}
570
571LJFOLD(BUFPUT any any)
572LJFOLDF(bufput_kgc)
573{
574 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fright->o == IR_KGC) {
575 GCstr *s2 = ir_kstr(fright);
576 if (s2->len == 0) { /* Empty string? */
577 return LEFTFOLD;
578 } else {
579 if (fleft->o == IR_BUFPUT && irref_isk(fleft->op2) &&
580 !irt_isphi(fleft->t)) { /* Join two constant string puts in a row. */
581 GCstr *s1 = ir_kstr(IR(fleft->op2));
582 IRRef kref = lj_ir_kstr(J, lj_buf_cat2str(J->L, s1, s2));
583 /* lj_ir_kstr() may realloc the IR and invalidates any IRIns *. */
584 IR(fins->op1)->op2 = kref; /* Modify previous BUFPUT. */
585 return fins->op1;
586 }
587 }
588 }
589 return EMITFOLD; /* Always emit, CSE later. */
590}
591
592LJFOLD(BUFSTR any any)
593LJFOLDF(bufstr_kfold_cse)
594{
595 lua_assert(fleft->o == IR_BUFHDR || fleft->o == IR_BUFPUT ||
596 fleft->o == IR_CALLL);
597 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
598 if (fleft->o == IR_BUFHDR) { /* No put operations? */
599 if (!(fleft->op2 & IRBUFHDR_APPEND)) /* Empty buffer? */
600 return lj_ir_kstr(J, &J2G(J)->strempty);
601 fins->op1 = fleft->op1;
602 fins->op2 = fleft->prev; /* Relies on checks in bufput_append. */
603 return CSEFOLD;
604 } else if (fleft->o == IR_BUFPUT) {
605 IRIns *irb = IR(fleft->op1);
606 if (irb->o == IR_BUFHDR && !(irb->op2 & IRBUFHDR_APPEND))
607 return fleft->op2; /* Shortcut for a single put operation. */
608 }
609 }
610 /* Try to CSE the whole chain. */
611 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
612 IRRef ref = J->chain[IR_BUFSTR];
613 while (ref) {
614 IRIns *irs = IR(ref), *ira = fleft, *irb = IR(irs->op1);
615 while (ira->o == irb->o && ira->op2 == irb->op2) {
616 lua_assert(ira->o == IR_BUFHDR || ira->o == IR_BUFPUT ||
617 ira->o == IR_CALLL || ira->o == IR_CARG);
618 if (ira->o == IR_BUFHDR && !(ira->op2 & IRBUFHDR_APPEND))
619 return ref; /* CSE succeeded. */
620 if (ira->o == IR_CALLL && ira->op2 == IRCALL_lj_buf_puttab)
621 break;
622 ira = IR(ira->op1);
623 irb = IR(irb->op1);
624 }
625 ref = irs->prev;
626 }
627 }
628 return EMITFOLD; /* No CSE possible. */
629}
630
631LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_reverse)
632LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_upper)
633LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_lower)
634LJFOLD(CALLL CARG IRCALL_lj_strfmt_putquoted)
635LJFOLDF(bufput_kfold_op)
636{
637 if (irref_isk(fleft->op2)) {
638 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
639 SBuf *sb = lj_buf_tmp_(J->L);
640 sb = ((SBuf * (LJ_FASTCALL *)(SBuf *, GCstr *))ci->func)(sb,
641 ir_kstr(IR(fleft->op2)));
642 fins->o = IR_BUFPUT;
643 fins->op1 = fleft->op1;
644 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
645 return RETRYFOLD;
646 }
647 return EMITFOLD; /* Always emit, CSE later. */
648}
649
650LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_rep)
651LJFOLDF(bufput_kfold_rep)
652{
653 if (irref_isk(fleft->op2)) {
654 IRIns *irc = IR(fleft->op1);
655 if (irref_isk(irc->op2)) {
656 SBuf *sb = lj_buf_tmp_(J->L);
657 sb = lj_buf_putstr_rep(sb, ir_kstr(IR(irc->op2)), IR(fleft->op2)->i);
658 fins->o = IR_BUFPUT;
659 fins->op1 = irc->op1;
660 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
661 return RETRYFOLD;
662 }
663 }
664 return EMITFOLD; /* Always emit, CSE later. */
665}
666
667LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfxint)
668LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_int)
669LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_uint)
670LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum)
671LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfstr)
672LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfchar)
673LJFOLDF(bufput_kfold_fmt)
674{
675 IRIns *irc = IR(fleft->op1);
676 lua_assert(irref_isk(irc->op2)); /* SFormat must be const. */
677 if (irref_isk(fleft->op2)) {
678 SFormat sf = (SFormat)IR(irc->op2)->i;
679 IRIns *ira = IR(fleft->op2);
680 SBuf *sb = lj_buf_tmp_(J->L);
681 switch (fins->op2) {
682 case IRCALL_lj_strfmt_putfxint:
683 sb = lj_strfmt_putfxint(sb, sf, ir_k64(ira)->u64);
684 break;
685 case IRCALL_lj_strfmt_putfstr:
686 sb = lj_strfmt_putfstr(sb, sf, ir_kstr(ira));
687 break;
688 case IRCALL_lj_strfmt_putfchar:
689 sb = lj_strfmt_putfchar(sb, sf, ira->i);
690 break;
691 case IRCALL_lj_strfmt_putfnum_int:
692 case IRCALL_lj_strfmt_putfnum_uint:
693 case IRCALL_lj_strfmt_putfnum:
694 default: {
695 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
696 sb = ((SBuf * (*)(SBuf *, SFormat, lua_Number))ci->func)(sb, sf,
697 ir_knum(ira)->n);
698 break;
699 }
700 }
701 fins->o = IR_BUFPUT;
702 fins->op1 = irc->op1;
703 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
704 return RETRYFOLD;
705 }
706 return EMITFOLD; /* Always emit, CSE later. */
707}
708
531/* -- Constant folding of pointer arithmetic ------------------------------ */ 709/* -- Constant folding of pointer arithmetic ------------------------------ */
532 710
533LJFOLD(ADD KGC KINT) 711LJFOLD(ADD KGC KINT)
@@ -648,27 +826,22 @@ LJFOLD(CONV KNUM IRCONV_INT_NUM)
648LJFOLDF(kfold_conv_knum_int_num) 826LJFOLDF(kfold_conv_knum_int_num)
649{ 827{
650 lua_Number n = knumleft; 828 lua_Number n = knumleft;
651 if (!(fins->op2 & IRCONV_TRUNC)) { 829 int32_t k = lj_num2int(n);
652 int32_t k = lj_num2int(n); 830 if (irt_isguard(fins->t) && n != (lua_Number)k) {
653 if (irt_isguard(fins->t) && n != (lua_Number)k) { 831 /* We're about to create a guard which always fails, like CONV +1.5.
654 /* We're about to create a guard which always fails, like CONV +1.5. 832 ** Some pathological loops cause this during LICM, e.g.:
655 ** Some pathological loops cause this during LICM, e.g.: 833 ** local x,k,t = 0,1.5,{1,[1.5]=2}
656 ** local x,k,t = 0,1.5,{1,[1.5]=2} 834 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
657 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end 835 ** assert(x == 300)
658 ** assert(x == 300) 836 */
659 */ 837 return FAILFOLD;
660 return FAILFOLD;
661 }
662 return INTFOLD(k);
663 } else {
664 return INTFOLD((int32_t)n);
665 } 838 }
839 return INTFOLD(k);
666} 840}
667 841
668LJFOLD(CONV KNUM IRCONV_U32_NUM) 842LJFOLD(CONV KNUM IRCONV_U32_NUM)
669LJFOLDF(kfold_conv_knum_u32_num) 843LJFOLDF(kfold_conv_knum_u32_num)
670{ 844{
671 lua_assert((fins->op2 & IRCONV_TRUNC));
672#ifdef _MSC_VER 845#ifdef _MSC_VER
673 { /* Workaround for MSVC bug. */ 846 { /* Workaround for MSVC bug. */
674 volatile uint32_t u = (uint32_t)knumleft; 847 volatile uint32_t u = (uint32_t)knumleft;
@@ -682,27 +855,27 @@ LJFOLDF(kfold_conv_knum_u32_num)
682LJFOLD(CONV KNUM IRCONV_I64_NUM) 855LJFOLD(CONV KNUM IRCONV_I64_NUM)
683LJFOLDF(kfold_conv_knum_i64_num) 856LJFOLDF(kfold_conv_knum_i64_num)
684{ 857{
685 lua_assert((fins->op2 & IRCONV_TRUNC));
686 return INT64FOLD((uint64_t)(int64_t)knumleft); 858 return INT64FOLD((uint64_t)(int64_t)knumleft);
687} 859}
688 860
689LJFOLD(CONV KNUM IRCONV_U64_NUM) 861LJFOLD(CONV KNUM IRCONV_U64_NUM)
690LJFOLDF(kfold_conv_knum_u64_num) 862LJFOLDF(kfold_conv_knum_u64_num)
691{ 863{
692 lua_assert((fins->op2 & IRCONV_TRUNC));
693 return INT64FOLD(lj_num2u64(knumleft)); 864 return INT64FOLD(lj_num2u64(knumleft));
694} 865}
695 866
696LJFOLD(TOSTR KNUM) 867LJFOLD(TOSTR KNUM any)
697LJFOLDF(kfold_tostr_knum) 868LJFOLDF(kfold_tostr_knum)
698{ 869{
699 return lj_ir_kstr(J, lj_str_fromnum(J->L, &knumleft)); 870 return lj_ir_kstr(J, lj_strfmt_num(J->L, ir_knum(fleft)));
700} 871}
701 872
702LJFOLD(TOSTR KINT) 873LJFOLD(TOSTR KINT any)
703LJFOLDF(kfold_tostr_kint) 874LJFOLDF(kfold_tostr_kint)
704{ 875{
705 return lj_ir_kstr(J, lj_str_fromint(J->L, fleft->i)); 876 return lj_ir_kstr(J, fins->op2 == IRTOSTR_INT ?
877 lj_strfmt_int(J->L, fleft->i) :
878 lj_strfmt_char(J->L, fleft->i));
706} 879}
707 880
708LJFOLD(STRTO KGC) 881LJFOLD(STRTO KGC)
@@ -750,13 +923,13 @@ LJFOLDF(shortcut_round)
750 return NEXTFOLD; 923 return NEXTFOLD;
751} 924}
752 925
753LJFOLD(ABS ABS KNUM) 926LJFOLD(ABS ABS FLOAD)
754LJFOLDF(shortcut_left) 927LJFOLDF(shortcut_left)
755{ 928{
756 return LEFTFOLD; /* f(g(x)) ==> g(x) */ 929 return LEFTFOLD; /* f(g(x)) ==> g(x) */
757} 930}
758 931
759LJFOLD(ABS NEG KNUM) 932LJFOLD(ABS NEG FLOAD)
760LJFOLDF(shortcut_dropleft) 933LJFOLDF(shortcut_dropleft)
761{ 934{
762 PHIBARRIER(fleft); 935 PHIBARRIER(fleft);
@@ -837,8 +1010,10 @@ LJFOLDF(simplify_nummuldiv_k)
837 if (n == 1.0) { /* x o 1 ==> x */ 1010 if (n == 1.0) { /* x o 1 ==> x */
838 return LEFTFOLD; 1011 return LEFTFOLD;
839 } else if (n == -1.0) { /* x o -1 ==> -x */ 1012 } else if (n == -1.0) { /* x o -1 ==> -x */
1013 IRRef op1 = fins->op1;
1014 fins->op2 = (IRRef1)lj_ir_ksimd(J, LJ_KSIMD_NEG); /* Modifies fins. */
1015 fins->op1 = op1;
840 fins->o = IR_NEG; 1016 fins->o = IR_NEG;
841 fins->op2 = (IRRef1)lj_ir_knum_neg(J);
842 return RETRYFOLD; 1017 return RETRYFOLD;
843 } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */ 1018 } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */
844 fins->o = IR_ADD; 1019 fins->o = IR_ADD;
@@ -1205,7 +1380,9 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k)
1205 ** But this is mainly intended for simple address arithmetic. 1380 ** But this is mainly intended for simple address arithmetic.
1206 ** Also it's easier for the backend to optimize the original multiplies. 1381 ** Also it's easier for the backend to optimize the original multiplies.
1207 */ 1382 */
1208 if (k == 1) { /* i * 1 ==> i */ 1383 if (k == 0) { /* i * 0 ==> 0 */
1384 return RIGHTFOLD;
1385 } else if (k == 1) { /* i * 1 ==> i */
1209 return LEFTFOLD; 1386 return LEFTFOLD;
1210 } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */ 1387 } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */
1211 fins->o = IR_BSHL; 1388 fins->o = IR_BSHL;
@@ -1218,9 +1395,7 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k)
1218LJFOLD(MUL any KINT) 1395LJFOLD(MUL any KINT)
1219LJFOLDF(simplify_intmul_k32) 1396LJFOLDF(simplify_intmul_k32)
1220{ 1397{
1221 if (fright->i == 0) /* i * 0 ==> 0 */ 1398 if (fright->i >= 0)
1222 return INTFOLD(0);
1223 else if (fright->i > 0)
1224 return simplify_intmul_k(J, fright->i); 1399 return simplify_intmul_k(J, fright->i);
1225 return NEXTFOLD; 1400 return NEXTFOLD;
1226} 1401}
@@ -1228,14 +1403,13 @@ LJFOLDF(simplify_intmul_k32)
1228LJFOLD(MUL any KINT64) 1403LJFOLD(MUL any KINT64)
1229LJFOLDF(simplify_intmul_k64) 1404LJFOLDF(simplify_intmul_k64)
1230{ 1405{
1231 if (ir_kint64(fright)->u64 == 0) /* i * 0 ==> 0 */ 1406#if LJ_HASFFI
1232 return INT64FOLD(0); 1407 if (ir_kint64(fright)->u64 < 0x80000000u)
1233#if LJ_64
1234 /* NYI: SPLIT for BSHL and 32 bit backend support. */
1235 else if (ir_kint64(fright)->u64 < 0x80000000u)
1236 return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64); 1408 return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64);
1237#endif
1238 return NEXTFOLD; 1409 return NEXTFOLD;
1410#else
1411 UNUSED(J); lua_assert(0); return FAILFOLD;
1412#endif
1239} 1413}
1240 1414
1241LJFOLD(MOD any KINT) 1415LJFOLD(MOD any KINT)
@@ -1491,6 +1665,14 @@ LJFOLDF(simplify_shiftk_andk)
1491 fins->op2 = (IRRef1)lj_ir_kint(J, k); 1665 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1492 fins->ot = IRTI(IR_BAND); 1666 fins->ot = IRTI(IR_BAND);
1493 return RETRYFOLD; 1667 return RETRYFOLD;
1668 } else if (irk->o == IR_KINT64) {
1669 uint64_t k = kfold_int64arith(ir_k64(irk)->u64, fright->i, (IROp)fins->o);
1670 IROpT ot = fleft->ot;
1671 fins->op1 = fleft->op1;
1672 fins->op1 = (IRRef1)lj_opt_fold(J);
1673 fins->op2 = (IRRef1)lj_ir_kint64(J, k);
1674 fins->ot = ot;
1675 return RETRYFOLD;
1494 } 1676 }
1495 return NEXTFOLD; 1677 return NEXTFOLD;
1496} 1678}
@@ -1506,6 +1688,47 @@ LJFOLDF(simplify_andk_shiftk)
1506 return NEXTFOLD; 1688 return NEXTFOLD;
1507} 1689}
1508 1690
1691LJFOLD(BAND BOR KINT)
1692LJFOLD(BOR BAND KINT)
1693LJFOLDF(simplify_andor_k)
1694{
1695 IRIns *irk = IR(fleft->op2);
1696 PHIBARRIER(fleft);
1697 if (irk->o == IR_KINT) {
1698 int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
1699 /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */
1700 /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */
1701 if (k == (fins->o == IR_BAND ? 0 : -1)) {
1702 fins->op1 = fleft->op1;
1703 return RETRYFOLD;
1704 }
1705 }
1706 return NEXTFOLD;
1707}
1708
1709LJFOLD(BAND BOR KINT64)
1710LJFOLD(BOR BAND KINT64)
1711LJFOLDF(simplify_andor_k64)
1712{
1713#if LJ_HASFFI
1714 IRIns *irk = IR(fleft->op2);
1715 PHIBARRIER(fleft);
1716 if (irk->o == IR_KINT64) {
1717 uint64_t k = kfold_int64arith(ir_k64(irk)->u64,
1718 ir_k64(fright)->u64, (IROp)fins->o);
1719 /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */
1720 /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */
1721 if (k == (fins->o == IR_BAND ? (uint64_t)0 : ~(uint64_t)0)) {
1722 fins->op1 = fleft->op1;
1723 return RETRYFOLD;
1724 }
1725 }
1726 return NEXTFOLD;
1727#else
1728 UNUSED(J); lua_assert(0); return FAILFOLD;
1729#endif
1730}
1731
1509/* -- Reassociation ------------------------------------------------------- */ 1732/* -- Reassociation ------------------------------------------------------- */
1510 1733
1511LJFOLD(ADD ADD KINT) 1734LJFOLD(ADD ADD KINT)
@@ -1535,7 +1758,7 @@ LJFOLD(BOR BOR KINT64)
1535LJFOLD(BXOR BXOR KINT64) 1758LJFOLD(BXOR BXOR KINT64)
1536LJFOLDF(reassoc_intarith_k64) 1759LJFOLDF(reassoc_intarith_k64)
1537{ 1760{
1538#if LJ_HASFFI || LJ_64 1761#if LJ_HASFFI
1539 IRIns *irk = IR(fleft->op2); 1762 IRIns *irk = IR(fleft->op2);
1540 if (irk->o == IR_KINT64) { 1763 if (irk->o == IR_KINT64) {
1541 uint64_t k = kfold_int64arith(ir_k64(irk)->u64, 1764 uint64_t k = kfold_int64arith(ir_k64(irk)->u64,
@@ -1953,6 +2176,7 @@ LJFOLDF(fwd_href_tdup)
1953** an aliased table, as it may invalidate all of the pointers and fields. 2176** an aliased table, as it may invalidate all of the pointers and fields.
1954** Only HREF needs the NEWREF check -- AREF and HREFK already depend on 2177** Only HREF needs the NEWREF check -- AREF and HREFK already depend on
1955** FLOADs. And NEWREF itself is treated like a store (see below). 2178** FLOADs. And NEWREF itself is treated like a store (see below).
2179** LREF is constant (per trace) since coroutine switches are not inlined.
1956*/ 2180*/
1957LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE) 2181LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE)
1958LJFOLDF(fload_tab_tnew_asize) 2182LJFOLDF(fload_tab_tnew_asize)
@@ -2016,6 +2240,14 @@ LJFOLDF(fload_str_len_snew)
2016 return NEXTFOLD; 2240 return NEXTFOLD;
2017} 2241}
2018 2242
2243LJFOLD(FLOAD TOSTR IRFL_STR_LEN)
2244LJFOLDF(fload_str_len_tostr)
2245{
2246 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fleft->op2 == IRTOSTR_CHAR)
2247 return INTFOLD(1);
2248 return NEXTFOLD;
2249}
2250
2019/* The C type ID of cdata objects is immutable. */ 2251/* The C type ID of cdata objects is immutable. */
2020LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID) 2252LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID)
2021LJFOLDF(fload_cdata_typeid_kgc) 2253LJFOLDF(fload_cdata_typeid_kgc)
@@ -2062,6 +2294,8 @@ LJFOLDF(fload_cdata_ptr_int64_cnew)
2062} 2294}
2063 2295
2064LJFOLD(FLOAD any IRFL_STR_LEN) 2296LJFOLD(FLOAD any IRFL_STR_LEN)
2297LJFOLD(FLOAD any IRFL_FUNC_ENV)
2298LJFOLD(FLOAD any IRFL_THREAD_ENV)
2065LJFOLD(FLOAD any IRFL_CDATA_CTYPEID) 2299LJFOLD(FLOAD any IRFL_CDATA_CTYPEID)
2066LJFOLD(FLOAD any IRFL_CDATA_PTR) 2300LJFOLD(FLOAD any IRFL_CDATA_PTR)
2067LJFOLD(FLOAD any IRFL_CDATA_INT) 2301LJFOLD(FLOAD any IRFL_CDATA_INT)
@@ -2127,6 +2361,17 @@ LJFOLDF(barrier_tnew_tdup)
2127 return DROPFOLD; 2361 return DROPFOLD;
2128} 2362}
2129 2363
2364/* -- Profiling ----------------------------------------------------------- */
2365
2366LJFOLD(PROF any any)
2367LJFOLDF(prof)
2368{
2369 IRRef ref = J->chain[IR_PROF];
2370 if (ref+1 == J->cur.nins) /* Drop neighbouring IR_PROF. */
2371 return ref;
2372 return EMITFOLD;
2373}
2374
2130/* -- Stores and allocations ---------------------------------------------- */ 2375/* -- Stores and allocations ---------------------------------------------- */
2131 2376
2132/* Stores and allocations cannot be folded or passed on to CSE in general. 2377/* Stores and allocations cannot be folded or passed on to CSE in general.
@@ -2149,8 +2394,9 @@ LJFOLD(XSTORE any any)
2149LJFOLDX(lj_opt_dse_xstore) 2394LJFOLDX(lj_opt_dse_xstore)
2150 2395
2151LJFOLD(NEWREF any any) /* Treated like a store. */ 2396LJFOLD(NEWREF any any) /* Treated like a store. */
2152LJFOLD(CALLS any any) 2397LJFOLD(CALLA any any)
2153LJFOLD(CALLL any any) /* Safeguard fallback. */ 2398LJFOLD(CALLL any any) /* Safeguard fallback. */
2399LJFOLD(CALLS any any)
2154LJFOLD(CALLXS any any) 2400LJFOLD(CALLXS any any)
2155LJFOLD(XBAR) 2401LJFOLD(XBAR)
2156LJFOLD(RETF any any) /* Modifies BASE. */ 2402LJFOLD(RETF any any) /* Modifies BASE. */
@@ -2158,6 +2404,7 @@ LJFOLD(TNEW any any)
2158LJFOLD(TDUP any) 2404LJFOLD(TDUP any)
2159LJFOLD(CNEW any any) 2405LJFOLD(CNEW any any)
2160LJFOLD(XSNEW any any) 2406LJFOLD(XSNEW any any)
2407LJFOLD(BUFHDR any any)
2161LJFOLDX(lj_ir_emit) 2408LJFOLDX(lj_ir_emit)
2162 2409
2163/* ------------------------------------------------------------------------ */ 2410/* ------------------------------------------------------------------------ */
@@ -2209,10 +2456,14 @@ retry:
2209 if (fins->op1 >= J->cur.nk) { 2456 if (fins->op1 >= J->cur.nk) {
2210 key += (uint32_t)IR(fins->op1)->o << 10; 2457 key += (uint32_t)IR(fins->op1)->o << 10;
2211 *fleft = *IR(fins->op1); 2458 *fleft = *IR(fins->op1);
2459 if (fins->op1 < REF_TRUE)
2460 fleft[1] = IR(fins->op1)[1];
2212 } 2461 }
2213 if (fins->op2 >= J->cur.nk) { 2462 if (fins->op2 >= J->cur.nk) {
2214 key += (uint32_t)IR(fins->op2)->o; 2463 key += (uint32_t)IR(fins->op2)->o;
2215 *fright = *IR(fins->op2); 2464 *fright = *IR(fins->op2);
2465 if (fins->op2 < REF_TRUE)
2466 fright[1] = IR(fins->op2)[1];
2216 } else { 2467 } else {
2217 key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */ 2468 key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */
2218 } 2469 }
diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c
index d5e1eb13..c5919ca0 100644
--- a/src/lj_opt_loop.c
+++ b/src/lj_opt_loop.c
@@ -11,7 +11,7 @@
11#if LJ_HASJIT 11#if LJ_HASJIT
12 12
13#include "lj_err.h" 13#include "lj_err.h"
14#include "lj_str.h" 14#include "lj_buf.h"
15#include "lj_ir.h" 15#include "lj_ir.h"
16#include "lj_jit.h" 16#include "lj_jit.h"
17#include "lj_iropt.h" 17#include "lj_iropt.h"
@@ -254,9 +254,16 @@ static void loop_subst_snap(jit_State *J, SnapShot *osnap,
254 J->cur.nsnapmap = (uint32_t)(nmap - J->cur.snapmap); 254 J->cur.nsnapmap = (uint32_t)(nmap - J->cur.snapmap);
255} 255}
256 256
257typedef struct LoopState {
258 jit_State *J;
259 IRRef1 *subst;
260 MSize sizesubst;
261} LoopState;
262
257/* Unroll loop. */ 263/* Unroll loop. */
258static void loop_unroll(jit_State *J) 264static void loop_unroll(LoopState *lps)
259{ 265{
266 jit_State *J = lps->J;
260 IRRef1 phi[LJ_MAX_PHI]; 267 IRRef1 phi[LJ_MAX_PHI];
261 uint32_t nphi = 0; 268 uint32_t nphi = 0;
262 IRRef1 *subst; 269 IRRef1 *subst;
@@ -265,13 +272,13 @@ static void loop_unroll(jit_State *J)
265 SnapEntry *loopmap, *psentinel; 272 SnapEntry *loopmap, *psentinel;
266 IRRef ins, invar; 273 IRRef ins, invar;
267 274
268 /* Use temp buffer for substitution table. 275 /* Allocate substitution table.
269 ** Only non-constant refs in [REF_BIAS,invar) are valid indexes. 276 ** Only non-constant refs in [REF_BIAS,invar) are valid indexes.
270 ** Caveat: don't call into the VM or run the GC or the buffer may be gone.
271 */ 277 */
272 invar = J->cur.nins; 278 invar = J->cur.nins;
273 subst = (IRRef1 *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, 279 lps->sizesubst = invar - REF_BIAS;
274 (invar-REF_BIAS)*sizeof(IRRef1)) - REF_BIAS; 280 lps->subst = lj_mem_newvec(J->L, lps->sizesubst, IRRef1);
281 subst = lps->subst - REF_BIAS;
275 subst[REF_BASE] = REF_BASE; 282 subst[REF_BASE] = REF_BASE;
276 283
277 /* LOOP separates the pre-roll from the loop body. */ 284 /* LOOP separates the pre-roll from the loop body. */
@@ -396,7 +403,7 @@ static void loop_undo(jit_State *J, IRRef ins, SnapNo nsnap, MSize nsnapmap)
396static TValue *cploop_opt(lua_State *L, lua_CFunction dummy, void *ud) 403static TValue *cploop_opt(lua_State *L, lua_CFunction dummy, void *ud)
397{ 404{
398 UNUSED(L); UNUSED(dummy); 405 UNUSED(L); UNUSED(dummy);
399 loop_unroll((jit_State *)ud); 406 loop_unroll((LoopState *)ud);
400 return NULL; 407 return NULL;
401} 408}
402 409
@@ -406,7 +413,13 @@ int lj_opt_loop(jit_State *J)
406 IRRef nins = J->cur.nins; 413 IRRef nins = J->cur.nins;
407 SnapNo nsnap = J->cur.nsnap; 414 SnapNo nsnap = J->cur.nsnap;
408 MSize nsnapmap = J->cur.nsnapmap; 415 MSize nsnapmap = J->cur.nsnapmap;
409 int errcode = lj_vm_cpcall(J->L, NULL, J, cploop_opt); 416 LoopState lps;
417 int errcode;
418 lps.J = J;
419 lps.subst = NULL;
420 lps.sizesubst = 0;
421 errcode = lj_vm_cpcall(J->L, NULL, &lps, cploop_opt);
422 lj_mem_freevec(J2G(J), lps.subst, lps.sizesubst, IRRef1);
410 if (LJ_UNLIKELY(errcode)) { 423 if (LJ_UNLIKELY(errcode)) {
411 lua_State *L = J->L; 424 lua_State *L = J->L;
412 if (errcode == LUA_ERRRUN && tvisnumber(L->top-1)) { /* Trace error? */ 425 if (errcode == LUA_ERRRUN && tvisnumber(L->top-1)) { /* Trace error? */
diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c
index 281f29ad..079f7cfe 100644
--- a/src/lj_opt_mem.c
+++ b/src/lj_opt_mem.c
@@ -17,12 +17,13 @@
17#include "lj_ir.h" 17#include "lj_ir.h"
18#include "lj_jit.h" 18#include "lj_jit.h"
19#include "lj_iropt.h" 19#include "lj_iropt.h"
20#include "lj_ircall.h"
20 21
21/* Some local macros to save typing. Undef'd at the end. */ 22/* Some local macros to save typing. Undef'd at the end. */
22#define IR(ref) (&J->cur.ir[(ref)]) 23#define IR(ref) (&J->cur.ir[(ref)])
23#define fins (&J->fold.ins) 24#define fins (&J->fold.ins)
24#define fleft (&J->fold.left) 25#define fleft (J->fold.left)
25#define fright (&J->fold.right) 26#define fright (J->fold.right)
26 27
27/* 28/*
28** Caveat #1: return value is not always a TRef -- only use with tref_ref(). 29** Caveat #1: return value is not always a TRef -- only use with tref_ref().
@@ -309,7 +310,21 @@ int LJ_FASTCALL lj_opt_fwd_href_nokey(jit_State *J)
309 return 1; /* No conflict. Can fold to niltv. */ 310 return 1; /* No conflict. Can fold to niltv. */
310} 311}
311 312
312/* Check whether there's no aliasing NEWREF for the left operand. */ 313/* Check whether there's no aliasing table.clear. */
314static int fwd_aa_tab_clear(jit_State *J, IRRef lim, IRRef ta)
315{
316 IRRef ref = J->chain[IR_CALLS];
317 while (ref > lim) {
318 IRIns *calls = IR(ref);
319 if (calls->op2 == IRCALL_lj_tab_clear &&
320 (ta == calls->op1 || aa_table(J, ta, calls->op1) != ALIAS_NO))
321 return 0; /* Conflict. */
322 ref = calls->prev;
323 }
324 return 1; /* No conflict. Can safely FOLD/CSE. */
325}
326
327/* Check whether there's no aliasing NEWREF/table.clear for the left operand. */
313int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim) 328int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim)
314{ 329{
315 IRRef ta = fins->op1; 330 IRRef ta = fins->op1;
@@ -320,7 +335,7 @@ int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim)
320 return 0; /* Conflict. */ 335 return 0; /* Conflict. */
321 ref = newref->prev; 336 ref = newref->prev;
322 } 337 }
323 return 1; /* No conflict. Can safely FOLD/CSE. */ 338 return fwd_aa_tab_clear(J, lim, ta);
324} 339}
325 340
326/* ASTORE/HSTORE elimination. */ 341/* ASTORE/HSTORE elimination. */
@@ -855,6 +870,10 @@ TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J)
855 ref = store->prev; 870 ref = store->prev;
856 } 871 }
857 872
873 /* Search for aliasing table.clear. */
874 if (!fwd_aa_tab_clear(J, lim, tab))
875 return lj_ir_emit(J);
876
858 /* Try to find a matching load. Below the conflicting store, if any. */ 877 /* Try to find a matching load. Below the conflicting store, if any. */
859 return lj_opt_cselim(J, lim); 878 return lj_opt_cselim(J, lim);
860} 879}
diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c
index 28d3c255..ef0599c9 100644
--- a/src/lj_opt_narrow.c
+++ b/src/lj_opt_narrow.c
@@ -555,7 +555,7 @@ TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc)
555 return emitir(IRTGI(IR_SUBOV), lj_ir_kint(J, 0), rc); 555 return emitir(IRTGI(IR_SUBOV), lj_ir_kint(J, 0), rc);
556 rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT); 556 rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT);
557 } 557 }
558 return emitir(IRTN(IR_NEG), rc, lj_ir_knum_neg(J)); 558 return emitir(IRTN(IR_NEG), rc, lj_ir_ksimd(J, LJ_KSIMD_NEG));
559} 559}
560 560
561/* Narrowing of modulo operator. */ 561/* Narrowing of modulo operator. */
diff --git a/src/lj_opt_sink.c b/src/lj_opt_sink.c
index df7f58af..c5323b11 100644
--- a/src/lj_opt_sink.c
+++ b/src/lj_opt_sink.c
@@ -165,8 +165,8 @@ static void sink_remark_phi(jit_State *J)
165/* Sweep instructions and tag sunken allocations and stores. */ 165/* Sweep instructions and tag sunken allocations and stores. */
166static void sink_sweep_ins(jit_State *J) 166static void sink_sweep_ins(jit_State *J)
167{ 167{
168 IRIns *ir, *irfirst = IR(J->cur.nk); 168 IRIns *ir, *irbase = IR(REF_BASE);
169 for (ir = IR(J->cur.nins-1) ; ir >= irfirst; ir--) { 169 for (ir = IR(J->cur.nins-1) ; ir >= irbase; ir--) {
170 switch (ir->o) { 170 switch (ir->o) {
171 case IR_ASTORE: case IR_HSTORE: case IR_FSTORE: case IR_XSTORE: { 171 case IR_ASTORE: case IR_HSTORE: case IR_FSTORE: case IR_XSTORE: {
172 IRIns *ira = sink_checkalloc(J, ir); 172 IRIns *ira = sink_checkalloc(J, ir);
@@ -216,6 +216,13 @@ static void sink_sweep_ins(jit_State *J)
216 break; 216 break;
217 } 217 }
218 } 218 }
219 for (ir = IR(J->cur.nk); ir < irbase; ir++) {
220 irt_clearmark(ir->t);
221 ir->prev = REGSP_INIT;
222 /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */
223 if (irt_is64(ir->t) && ir->o != IR_KNULL)
224 ir++;
225 }
219} 226}
220 227
221/* Allocation sinking and store sinking. 228/* Allocation sinking and store sinking.
diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c
index a517fa8a..ee7cf0f9 100644
--- a/src/lj_opt_split.c
+++ b/src/lj_opt_split.c
@@ -8,14 +8,15 @@
8 8
9#include "lj_obj.h" 9#include "lj_obj.h"
10 10
11#if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) 11#if LJ_HASJIT && (LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI))
12 12
13#include "lj_err.h" 13#include "lj_err.h"
14#include "lj_str.h" 14#include "lj_buf.h"
15#include "lj_ir.h" 15#include "lj_ir.h"
16#include "lj_jit.h" 16#include "lj_jit.h"
17#include "lj_ircall.h" 17#include "lj_ircall.h"
18#include "lj_iropt.h" 18#include "lj_iropt.h"
19#include "lj_dispatch.h"
19#include "lj_vm.h" 20#include "lj_vm.h"
20 21
21/* SPLIT pass: 22/* SPLIT pass:
@@ -139,6 +140,7 @@ static IRRef split_call_l(jit_State *J, IRRef1 *hisubst, IRIns *oir,
139 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); 140 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
140 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); 141 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
141} 142}
143#endif
142 144
143/* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */ 145/* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */
144static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir, 146static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir,
@@ -155,7 +157,6 @@ static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir,
155 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); 157 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
156 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); 158 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
157} 159}
158#endif
159 160
160/* Emit a CALLN with two split 64 bit arguments. */ 161/* Emit a CALLN with two split 64 bit arguments. */
161static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir, 162static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir,
@@ -192,9 +193,121 @@ static IRRef split_ptr(jit_State *J, IRIns *oir, IRRef ref)
192 nref = ir->op1; 193 nref = ir->op1;
193 if (ofs == 0) return nref; 194 if (ofs == 0) return nref;
194 } 195 }
195 return split_emit(J, IRTI(IR_ADD), nref, lj_ir_kint(J, ofs)); 196 return split_emit(J, IRT(IR_ADD, IRT_PTR), nref, lj_ir_kint(J, ofs));
197}
198
199#if LJ_HASFFI
200static IRRef split_bitshift(jit_State *J, IRRef1 *hisubst,
201 IRIns *oir, IRIns *nir, IRIns *ir)
202{
203 IROp op = ir->o;
204 IRRef kref = nir->op2;
205 if (irref_isk(kref)) { /* Optimize constant shifts. */
206 int32_t k = (IR(kref)->i & 63);
207 IRRef lo = nir->op1, hi = hisubst[ir->op1];
208 if (op == IR_BROL || op == IR_BROR) {
209 if (op == IR_BROR) k = (-k & 63);
210 if (k >= 32) { IRRef t = lo; lo = hi; hi = t; k -= 32; }
211 if (k == 0) {
212 passthrough:
213 J->cur.nins--;
214 ir->prev = lo;
215 return hi;
216 } else {
217 TRef k1, k2;
218 IRRef t1, t2, t3, t4;
219 J->cur.nins--;
220 k1 = lj_ir_kint(J, k);
221 k2 = lj_ir_kint(J, (-k & 31));
222 t1 = split_emit(J, IRTI(IR_BSHL), lo, k1);
223 t2 = split_emit(J, IRTI(IR_BSHL), hi, k1);
224 t3 = split_emit(J, IRTI(IR_BSHR), lo, k2);
225 t4 = split_emit(J, IRTI(IR_BSHR), hi, k2);
226 ir->prev = split_emit(J, IRTI(IR_BOR), t1, t4);
227 return split_emit(J, IRTI(IR_BOR), t2, t3);
228 }
229 } else if (k == 0) {
230 goto passthrough;
231 } else if (k < 32) {
232 if (op == IR_BSHL) {
233 IRRef t1 = split_emit(J, IRTI(IR_BSHL), hi, kref);
234 IRRef t2 = split_emit(J, IRTI(IR_BSHR), lo, lj_ir_kint(J, (-k&31)));
235 return split_emit(J, IRTI(IR_BOR), t1, t2);
236 } else {
237 IRRef t1 = ir->prev, t2;
238 lua_assert(op == IR_BSHR || op == IR_BSAR);
239 nir->o = IR_BSHR;
240 t2 = split_emit(J, IRTI(IR_BSHL), hi, lj_ir_kint(J, (-k&31)));
241 ir->prev = split_emit(J, IRTI(IR_BOR), t1, t2);
242 return split_emit(J, IRTI(op), hi, kref);
243 }
244 } else {
245 if (op == IR_BSHL) {
246 if (k == 32)
247 J->cur.nins--;
248 else
249 lo = ir->prev;
250 ir->prev = lj_ir_kint(J, 0);
251 return lo;
252 } else {
253 lua_assert(op == IR_BSHR || op == IR_BSAR);
254 if (k == 32) {
255 J->cur.nins--;
256 ir->prev = hi;
257 } else {
258 nir->op1 = hi;
259 }
260 if (op == IR_BSHR)
261 return lj_ir_kint(J, 0);
262 else
263 return split_emit(J, IRTI(IR_BSAR), hi, lj_ir_kint(J, 31));
264 }
265 }
266 }
267 return split_call_li(J, hisubst, oir, ir,
268 op - IR_BSHL + IRCALL_lj_carith_shl64);
196} 269}
197 270
271static IRRef split_bitop(jit_State *J, IRRef1 *hisubst,
272 IRIns *nir, IRIns *ir)
273{
274 IROp op = ir->o;
275 IRRef hi, kref = nir->op2;
276 if (irref_isk(kref)) { /* Optimize bit operations with lo constant. */
277 int32_t k = IR(kref)->i;
278 if (k == 0 || k == -1) {
279 if (op == IR_BAND) k = ~k;
280 if (k == 0) {
281 J->cur.nins--;
282 ir->prev = nir->op1;
283 } else if (op == IR_BXOR) {
284 nir->o = IR_BNOT;
285 nir->op2 = 0;
286 } else {
287 J->cur.nins--;
288 ir->prev = kref;
289 }
290 }
291 }
292 hi = hisubst[ir->op1];
293 kref = hisubst[ir->op2];
294 if (irref_isk(kref)) { /* Optimize bit operations with hi constant. */
295 int32_t k = IR(kref)->i;
296 if (k == 0 || k == -1) {
297 if (op == IR_BAND) k = ~k;
298 if (k == 0) {
299 return hi;
300 } else if (op == IR_BXOR) {
301 return split_emit(J, IRTI(IR_BNOT), hi, 0);
302 } else {
303 return kref;
304 }
305 }
306 }
307 return split_emit(J, IRTI(op), hi, kref);
308}
309#endif
310
198/* Substitute references of a snapshot. */ 311/* Substitute references of a snapshot. */
199static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir) 312static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir)
200{ 313{
@@ -214,7 +327,7 @@ static void split_ir(jit_State *J)
214 IRRef nins = J->cur.nins, nk = J->cur.nk; 327 IRRef nins = J->cur.nins, nk = J->cur.nk;
215 MSize irlen = nins - nk; 328 MSize irlen = nins - nk;
216 MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1)); 329 MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1));
217 IRIns *oir = (IRIns *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, need); 330 IRIns *oir = (IRIns *)lj_buf_tmp(J->L, need);
218 IRRef1 *hisubst; 331 IRRef1 *hisubst;
219 IRRef ref, snref; 332 IRRef ref, snref;
220 SnapShot *snap; 333 SnapShot *snap;
@@ -241,6 +354,8 @@ static void split_ir(jit_State *J)
241 ir->prev = ref; /* Identity substitution for loword. */ 354 ir->prev = ref; /* Identity substitution for loword. */
242 hisubst[ref] = 0; 355 hisubst[ref] = 0;
243 } 356 }
357 if (irt_is64(ir->t) && ir->o != IR_KNULL)
358 ref++;
244 } 359 }
245 360
246 /* Process old IR instructions. */ 361 /* Process old IR instructions. */
@@ -321,7 +436,8 @@ static void split_ir(jit_State *J)
321 nir->o = IR_CONV; /* Pass through loword. */ 436 nir->o = IR_CONV; /* Pass through loword. */
322 nir->op2 = (IRT_INT << 5) | IRT_INT; 437 nir->op2 = (IRT_INT << 5) | IRT_INT;
323 hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP), 438 hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP),
324 hisubst[ir->op1], hisubst[ir->op2]); 439 hisubst[ir->op1],
440 lj_ir_kint(J, (int32_t)(0x7fffffffu + (ir->o == IR_NEG))));
325 break; 441 break;
326 case IR_SLOAD: 442 case IR_SLOAD:
327 if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from int to number. */ 443 if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from int to number. */
@@ -336,15 +452,24 @@ static void split_ir(jit_State *J)
336 case IR_STRTO: 452 case IR_STRTO:
337 hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); 453 hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
338 break; 454 break;
455 case IR_FLOAD:
456 lua_assert(ir->op1 == REF_NIL);
457 hi = lj_ir_kint(J, *(int32_t*)((char*)J2GG(J) + ir->op2 + LJ_LE*4));
458 nir->op2 += LJ_BE*4;
459 break;
339 case IR_XLOAD: { 460 case IR_XLOAD: {
340 IRIns inslo = *nir; /* Save/undo the emit of the lo XLOAD. */ 461 IRIns inslo = *nir; /* Save/undo the emit of the lo XLOAD. */
341 J->cur.nins--; 462 J->cur.nins--;
342 hi = split_ptr(J, oir, ir->op1); /* Insert the hiref ADD. */ 463 hi = split_ptr(J, oir, ir->op1); /* Insert the hiref ADD. */
464#if LJ_BE
465 hi = split_emit(J, IRT(IR_XLOAD, IRT_INT), hi, ir->op2);
466 inslo.t.irt = IRT_SOFTFP | (inslo.t.irt & IRT_GUARD);
467#endif
343 nref = lj_ir_nextins(J); 468 nref = lj_ir_nextins(J);
344 nir = IR(nref); 469 nir = IR(nref);
345 *nir = inslo; /* Re-emit lo XLOAD immediately before hi XLOAD. */ 470 *nir = inslo; /* Re-emit lo XLOAD. */
346 hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2);
347#if LJ_LE 471#if LJ_LE
472 hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2);
348 ir->prev = nref; 473 ir->prev = nref;
349#else 474#else
350 ir->prev = hi; hi = nref; 475 ir->prev = hi; hi = nref;
@@ -438,6 +563,19 @@ static void split_ir(jit_State *J)
438 irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : 563 irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
439 IRCALL_lj_carith_powu64); 564 IRCALL_lj_carith_powu64);
440 break; 565 break;
566 case IR_BNOT:
567 hi = split_emit(J, IRTI(IR_BNOT), hiref, 0);
568 break;
569 case IR_BSWAP:
570 ir->prev = split_emit(J, IRTI(IR_BSWAP), hiref, 0);
571 hi = nref;
572 break;
573 case IR_BAND: case IR_BOR: case IR_BXOR:
574 hi = split_bitop(J, hisubst, nir, ir);
575 break;
576 case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR:
577 hi = split_bitshift(J, hisubst, oir, nir, ir);
578 break;
441 case IR_FLOAD: 579 case IR_FLOAD:
442 lua_assert(ir->op2 == IRFL_CDATA_INT64); 580 lua_assert(ir->op2 == IRFL_CDATA_INT64);
443 hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4); 581 hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4);
diff --git a/src/lj_parse.c b/src/lj_parse.c
index 74dd5706..68f3789e 100644
--- a/src/lj_parse.c
+++ b/src/lj_parse.c
@@ -13,6 +13,7 @@
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_debug.h" 15#include "lj_debug.h"
16#include "lj_buf.h"
16#include "lj_str.h" 17#include "lj_str.h"
17#include "lj_tab.h" 18#include "lj_tab.h"
18#include "lj_func.h" 19#include "lj_func.h"
@@ -21,6 +22,7 @@
21#if LJ_HASFFI 22#if LJ_HASFFI
22#include "lj_ctype.h" 23#include "lj_ctype.h"
23#endif 24#endif
25#include "lj_strfmt.h"
24#include "lj_lex.h" 26#include "lj_lex.h"
25#include "lj_parse.h" 27#include "lj_parse.h"
26#include "lj_vm.h" 28#include "lj_vm.h"
@@ -165,12 +167,12 @@ LJ_STATIC_ASSERT((int)BC_MODVV-(int)BC_ADDVV == (int)OPR_MOD-(int)OPR_ADD);
165 167
166LJ_NORET LJ_NOINLINE static void err_syntax(LexState *ls, ErrMsg em) 168LJ_NORET LJ_NOINLINE static void err_syntax(LexState *ls, ErrMsg em)
167{ 169{
168 lj_lex_error(ls, ls->token, em); 170 lj_lex_error(ls, ls->tok, em);
169} 171}
170 172
171LJ_NORET LJ_NOINLINE static void err_token(LexState *ls, LexToken token) 173LJ_NORET LJ_NOINLINE static void err_token(LexState *ls, LexToken tok)
172{ 174{
173 lj_lex_error(ls, ls->token, LJ_ERR_XTOKEN, lj_lex_token2str(ls, token)); 175 lj_lex_error(ls, ls->tok, LJ_ERR_XTOKEN, lj_lex_token2str(ls, tok));
174} 176}
175 177
176LJ_NORET static void err_limit(FuncState *fs, uint32_t limit, const char *what) 178LJ_NORET static void err_limit(FuncState *fs, uint32_t limit, const char *what)
@@ -660,16 +662,16 @@ static void bcemit_method(FuncState *fs, ExpDesc *e, ExpDesc *key)
660 BCReg idx, func, obj = expr_toanyreg(fs, e); 662 BCReg idx, func, obj = expr_toanyreg(fs, e);
661 expr_free(fs, e); 663 expr_free(fs, e);
662 func = fs->freereg; 664 func = fs->freereg;
663 bcemit_AD(fs, BC_MOV, func+1, obj); /* Copy object to first argument. */ 665 bcemit_AD(fs, BC_MOV, func+1+LJ_FR2, obj); /* Copy object to 1st argument. */
664 lua_assert(expr_isstrk(key)); 666 lua_assert(expr_isstrk(key));
665 idx = const_str(fs, key); 667 idx = const_str(fs, key);
666 if (idx <= BCMAX_C) { 668 if (idx <= BCMAX_C) {
667 bcreg_reserve(fs, 2); 669 bcreg_reserve(fs, 2+LJ_FR2);
668 bcemit_ABC(fs, BC_TGETS, func, obj, idx); 670 bcemit_ABC(fs, BC_TGETS, func, obj, idx);
669 } else { 671 } else {
670 bcreg_reserve(fs, 3); 672 bcreg_reserve(fs, 3+LJ_FR2);
671 bcemit_AD(fs, BC_KSTR, func+2, idx); 673 bcemit_AD(fs, BC_KSTR, func+2+LJ_FR2, idx);
672 bcemit_ABC(fs, BC_TGETV, func, obj, func+2); 674 bcemit_ABC(fs, BC_TGETV, func, obj, func+2+LJ_FR2);
673 fs->freereg--; 675 fs->freereg--;
674 } 676 }
675 e->u.s.info = func; 677 e->u.s.info = func;
@@ -983,7 +985,7 @@ static void bcemit_unop(FuncState *fs, BCOp op, ExpDesc *e)
983/* Check and consume optional token. */ 985/* Check and consume optional token. */
984static int lex_opt(LexState *ls, LexToken tok) 986static int lex_opt(LexState *ls, LexToken tok)
985{ 987{
986 if (ls->token == tok) { 988 if (ls->tok == tok) {
987 lj_lex_next(ls); 989 lj_lex_next(ls);
988 return 1; 990 return 1;
989 } 991 }
@@ -993,7 +995,7 @@ static int lex_opt(LexState *ls, LexToken tok)
993/* Check and consume token. */ 995/* Check and consume token. */
994static void lex_check(LexState *ls, LexToken tok) 996static void lex_check(LexState *ls, LexToken tok)
995{ 997{
996 if (ls->token != tok) 998 if (ls->tok != tok)
997 err_token(ls, tok); 999 err_token(ls, tok);
998 lj_lex_next(ls); 1000 lj_lex_next(ls);
999} 1001}
@@ -1007,7 +1009,7 @@ static void lex_match(LexState *ls, LexToken what, LexToken who, BCLine line)
1007 } else { 1009 } else {
1008 const char *swhat = lj_lex_token2str(ls, what); 1010 const char *swhat = lj_lex_token2str(ls, what);
1009 const char *swho = lj_lex_token2str(ls, who); 1011 const char *swho = lj_lex_token2str(ls, who);
1010 lj_lex_error(ls, ls->token, LJ_ERR_XMATCH, swhat, swho, line); 1012 lj_lex_error(ls, ls->tok, LJ_ERR_XMATCH, swhat, swho, line);
1011 } 1013 }
1012 } 1014 }
1013} 1015}
@@ -1016,9 +1018,9 @@ static void lex_match(LexState *ls, LexToken what, LexToken who, BCLine line)
1016static GCstr *lex_str(LexState *ls) 1018static GCstr *lex_str(LexState *ls)
1017{ 1019{
1018 GCstr *s; 1020 GCstr *s;
1019 if (ls->token != TK_name && (LJ_52 || ls->token != TK_goto)) 1021 if (ls->tok != TK_name && (LJ_52 || ls->tok != TK_goto))
1020 err_token(ls, TK_name); 1022 err_token(ls, TK_name);
1021 s = strV(&ls->tokenval); 1023 s = strV(&ls->tokval);
1022 lj_lex_next(ls); 1024 lj_lex_next(ls);
1023 return s; 1025 return s;
1024} 1026}
@@ -1433,78 +1435,46 @@ static void fs_fixup_line(FuncState *fs, GCproto *pt,
1433 } 1435 }
1434} 1436}
1435 1437
1436/* Resize buffer if needed. */
1437static LJ_NOINLINE void fs_buf_resize(LexState *ls, MSize len)
1438{
1439 MSize sz = ls->sb.sz * 2;
1440 while (ls->sb.n + len > sz) sz = sz * 2;
1441 lj_str_resizebuf(ls->L, &ls->sb, sz);
1442}
1443
1444static LJ_AINLINE void fs_buf_need(LexState *ls, MSize len)
1445{
1446 if (LJ_UNLIKELY(ls->sb.n + len > ls->sb.sz))
1447 fs_buf_resize(ls, len);
1448}
1449
1450/* Add string to buffer. */
1451static void fs_buf_str(LexState *ls, const char *str, MSize len)
1452{
1453 char *p = ls->sb.buf + ls->sb.n;
1454 MSize i;
1455 ls->sb.n += len;
1456 for (i = 0; i < len; i++) p[i] = str[i];
1457}
1458
1459/* Add ULEB128 value to buffer. */
1460static void fs_buf_uleb128(LexState *ls, uint32_t v)
1461{
1462 MSize n = ls->sb.n;
1463 uint8_t *p = (uint8_t *)ls->sb.buf;
1464 for (; v >= 0x80; v >>= 7)
1465 p[n++] = (uint8_t)((v & 0x7f) | 0x80);
1466 p[n++] = (uint8_t)v;
1467 ls->sb.n = n;
1468}
1469
1470/* Prepare variable info for prototype. */ 1438/* Prepare variable info for prototype. */
1471static size_t fs_prep_var(LexState *ls, FuncState *fs, size_t *ofsvar) 1439static size_t fs_prep_var(LexState *ls, FuncState *fs, size_t *ofsvar)
1472{ 1440{
1473 VarInfo *vs =ls->vstack, *ve; 1441 VarInfo *vs =ls->vstack, *ve;
1474 MSize i, n; 1442 MSize i, n;
1475 BCPos lastpc; 1443 BCPos lastpc;
1476 lj_str_resetbuf(&ls->sb); /* Copy to temp. string buffer. */ 1444 lj_buf_reset(&ls->sb); /* Copy to temp. string buffer. */
1477 /* Store upvalue names. */ 1445 /* Store upvalue names. */
1478 for (i = 0, n = fs->nuv; i < n; i++) { 1446 for (i = 0, n = fs->nuv; i < n; i++) {
1479 GCstr *s = strref(vs[fs->uvmap[i]].name); 1447 GCstr *s = strref(vs[fs->uvmap[i]].name);
1480 MSize len = s->len+1; 1448 MSize len = s->len+1;
1481 fs_buf_need(ls, len); 1449 char *p = lj_buf_more(&ls->sb, len);
1482 fs_buf_str(ls, strdata(s), len); 1450 p = lj_buf_wmem(p, strdata(s), len);
1451 setsbufP(&ls->sb, p);
1483 } 1452 }
1484 *ofsvar = ls->sb.n; 1453 *ofsvar = sbuflen(&ls->sb);
1485 lastpc = 0; 1454 lastpc = 0;
1486 /* Store local variable names and compressed ranges. */ 1455 /* Store local variable names and compressed ranges. */
1487 for (ve = vs + ls->vtop, vs += fs->vbase; vs < ve; vs++) { 1456 for (ve = vs + ls->vtop, vs += fs->vbase; vs < ve; vs++) {
1488 if (!gola_isgotolabel(vs)) { 1457 if (!gola_isgotolabel(vs)) {
1489 GCstr *s = strref(vs->name); 1458 GCstr *s = strref(vs->name);
1490 BCPos startpc; 1459 BCPos startpc;
1460 char *p;
1491 if ((uintptr_t)s < VARNAME__MAX) { 1461 if ((uintptr_t)s < VARNAME__MAX) {
1492 fs_buf_need(ls, 1 + 2*5); 1462 p = lj_buf_more(&ls->sb, 1 + 2*5);
1493 ls->sb.buf[ls->sb.n++] = (uint8_t)(uintptr_t)s; 1463 *p++ = (char)(uintptr_t)s;
1494 } else { 1464 } else {
1495 MSize len = s->len+1; 1465 MSize len = s->len+1;
1496 fs_buf_need(ls, len + 2*5); 1466 p = lj_buf_more(&ls->sb, len + 2*5);
1497 fs_buf_str(ls, strdata(s), len); 1467 p = lj_buf_wmem(p, strdata(s), len);
1498 } 1468 }
1499 startpc = vs->startpc; 1469 startpc = vs->startpc;
1500 fs_buf_uleb128(ls, startpc-lastpc); 1470 p = lj_strfmt_wuleb128(p, startpc-lastpc);
1501 fs_buf_uleb128(ls, vs->endpc-startpc); 1471 p = lj_strfmt_wuleb128(p, vs->endpc-startpc);
1472 setsbufP(&ls->sb, p);
1502 lastpc = startpc; 1473 lastpc = startpc;
1503 } 1474 }
1504 } 1475 }
1505 fs_buf_need(ls, 1); 1476 lj_buf_putb(&ls->sb, '\0'); /* Terminator for varinfo. */
1506 ls->sb.buf[ls->sb.n++] = '\0'; /* Terminator for varinfo. */ 1477 return sbuflen(&ls->sb);
1507 return ls->sb.n;
1508} 1478}
1509 1479
1510/* Fixup variable info for prototype. */ 1480/* Fixup variable info for prototype. */
@@ -1512,7 +1482,7 @@ static void fs_fixup_var(LexState *ls, GCproto *pt, uint8_t *p, size_t ofsvar)
1512{ 1482{
1513 setmref(pt->uvinfo, p); 1483 setmref(pt->uvinfo, p);
1514 setmref(pt->varinfo, (char *)p + ofsvar); 1484 setmref(pt->varinfo, (char *)p + ofsvar);
1515 memcpy(p, ls->sb.buf, ls->sb.n); /* Copy from temp. string buffer. */ 1485 memcpy(p, sbufB(&ls->sb), sbuflen(&ls->sb)); /* Copy from temp. buffer. */
1516} 1486}
1517#else 1487#else
1518 1488
@@ -1621,7 +1591,7 @@ static GCproto *fs_finish(LexState *ls, BCLine line)
1621 L->top--; /* Pop table of constants. */ 1591 L->top--; /* Pop table of constants. */
1622 ls->vtop = fs->vbase; /* Reset variable stack. */ 1592 ls->vtop = fs->vbase; /* Reset variable stack. */
1623 ls->fs = fs->prev; 1593 ls->fs = fs->prev;
1624 lua_assert(ls->fs != NULL || ls->token == TK_eof); 1594 lua_assert(ls->fs != NULL || ls->tok == TK_eof);
1625 return pt; 1595 return pt;
1626} 1596}
1627 1597
@@ -1718,10 +1688,9 @@ static void expr_bracket(LexState *ls, ExpDesc *v)
1718static void expr_kvalue(TValue *v, ExpDesc *e) 1688static void expr_kvalue(TValue *v, ExpDesc *e)
1719{ 1689{
1720 if (e->k <= VKTRUE) { 1690 if (e->k <= VKTRUE) {
1721 setitype(v, ~(uint32_t)e->k); 1691 setpriV(v, ~(uint32_t)e->k);
1722 } else if (e->k == VKSTR) { 1692 } else if (e->k == VKSTR) {
1723 setgcref(v->gcr, obj2gco(e->u.sval)); 1693 setgcVraw(v, obj2gco(e->u.sval), LJ_TSTR);
1724 setitype(v, LJ_TSTR);
1725 } else { 1694 } else {
1726 lua_assert(tvisnumber(expr_numtv(e))); 1695 lua_assert(tvisnumber(expr_numtv(e)));
1727 *v = *expr_numtv(e); 1696 *v = *expr_numtv(e);
@@ -1743,15 +1712,15 @@ static void expr_table(LexState *ls, ExpDesc *e)
1743 bcreg_reserve(fs, 1); 1712 bcreg_reserve(fs, 1);
1744 freg++; 1713 freg++;
1745 lex_check(ls, '{'); 1714 lex_check(ls, '{');
1746 while (ls->token != '}') { 1715 while (ls->tok != '}') {
1747 ExpDesc key, val; 1716 ExpDesc key, val;
1748 vcall = 0; 1717 vcall = 0;
1749 if (ls->token == '[') { 1718 if (ls->tok == '[') {
1750 expr_bracket(ls, &key); /* Already calls expr_toval. */ 1719 expr_bracket(ls, &key); /* Already calls expr_toval. */
1751 if (!expr_isk(&key)) expr_index(fs, e, &key); 1720 if (!expr_isk(&key)) expr_index(fs, e, &key);
1752 if (expr_isnumk(&key) && expr_numiszero(&key)) needarr = 1; else nhash++; 1721 if (expr_isnumk(&key) && expr_numiszero(&key)) needarr = 1; else nhash++;
1753 lex_check(ls, '='); 1722 lex_check(ls, '=');
1754 } else if ((ls->token == TK_name || (!LJ_52 && ls->token == TK_goto)) && 1723 } else if ((ls->tok == TK_name || (!LJ_52 && ls->tok == TK_goto)) &&
1755 lj_lex_lookahead(ls) == '=') { 1724 lj_lex_lookahead(ls) == '=') {
1756 expr_str(ls, &key); 1725 expr_str(ls, &key);
1757 lex_check(ls, '='); 1726 lex_check(ls, '=');
@@ -1844,11 +1813,11 @@ static BCReg parse_params(LexState *ls, int needself)
1844 lex_check(ls, '('); 1813 lex_check(ls, '(');
1845 if (needself) 1814 if (needself)
1846 var_new_lit(ls, nparams++, "self"); 1815 var_new_lit(ls, nparams++, "self");
1847 if (ls->token != ')') { 1816 if (ls->tok != ')') {
1848 do { 1817 do {
1849 if (ls->token == TK_name || (!LJ_52 && ls->token == TK_goto)) { 1818 if (ls->tok == TK_name || (!LJ_52 && ls->tok == TK_goto)) {
1850 var_new(ls, nparams++, lex_str(ls)); 1819 var_new(ls, nparams++, lex_str(ls));
1851 } else if (ls->token == TK_dots) { 1820 } else if (ls->tok == TK_dots) {
1852 lj_lex_next(ls); 1821 lj_lex_next(ls);
1853 fs->flags |= PROTO_VARARG; 1822 fs->flags |= PROTO_VARARG;
1854 break; 1823 break;
@@ -1882,7 +1851,7 @@ static void parse_body(LexState *ls, ExpDesc *e, int needself, BCLine line)
1882 fs.bclim = pfs->bclim - pfs->pc; 1851 fs.bclim = pfs->bclim - pfs->pc;
1883 bcemit_AD(&fs, BC_FUNCF, 0, 0); /* Placeholder. */ 1852 bcemit_AD(&fs, BC_FUNCF, 0, 0); /* Placeholder. */
1884 parse_chunk(ls); 1853 parse_chunk(ls);
1885 if (ls->token != TK_end) lex_match(ls, TK_end, TK_function, line); 1854 if (ls->tok != TK_end) lex_match(ls, TK_end, TK_function, line);
1886 pt = fs_finish(ls, (ls->lastline = ls->linenumber)); 1855 pt = fs_finish(ls, (ls->lastline = ls->linenumber));
1887 pfs->bcbase = ls->bcstack + oldbase; /* May have been reallocated. */ 1856 pfs->bcbase = ls->bcstack + oldbase; /* May have been reallocated. */
1888 pfs->bclim = (BCPos)(ls->sizebcstack - oldbase); 1857 pfs->bclim = (BCPos)(ls->sizebcstack - oldbase);
@@ -1921,13 +1890,13 @@ static void parse_args(LexState *ls, ExpDesc *e)
1921 BCIns ins; 1890 BCIns ins;
1922 BCReg base; 1891 BCReg base;
1923 BCLine line = ls->linenumber; 1892 BCLine line = ls->linenumber;
1924 if (ls->token == '(') { 1893 if (ls->tok == '(') {
1925#if !LJ_52 1894#if !LJ_52
1926 if (line != ls->lastline) 1895 if (line != ls->lastline)
1927 err_syntax(ls, LJ_ERR_XAMBIG); 1896 err_syntax(ls, LJ_ERR_XAMBIG);
1928#endif 1897#endif
1929 lj_lex_next(ls); 1898 lj_lex_next(ls);
1930 if (ls->token == ')') { /* f(). */ 1899 if (ls->tok == ')') { /* f(). */
1931 args.k = VVOID; 1900 args.k = VVOID;
1932 } else { 1901 } else {
1933 expr_list(ls, &args); 1902 expr_list(ls, &args);
@@ -1935,11 +1904,11 @@ static void parse_args(LexState *ls, ExpDesc *e)
1935 setbc_b(bcptr(fs, &args), 0); /* Pass on multiple results. */ 1904 setbc_b(bcptr(fs, &args), 0); /* Pass on multiple results. */
1936 } 1905 }
1937 lex_match(ls, ')', '(', line); 1906 lex_match(ls, ')', '(', line);
1938 } else if (ls->token == '{') { 1907 } else if (ls->tok == '{') {
1939 expr_table(ls, &args); 1908 expr_table(ls, &args);
1940 } else if (ls->token == TK_string) { 1909 } else if (ls->tok == TK_string) {
1941 expr_init(&args, VKSTR, 0); 1910 expr_init(&args, VKSTR, 0);
1942 args.u.sval = strV(&ls->tokenval); 1911 args.u.sval = strV(&ls->tokval);
1943 lj_lex_next(ls); 1912 lj_lex_next(ls);
1944 } else { 1913 } else {
1945 err_syntax(ls, LJ_ERR_XFUNARG); 1914 err_syntax(ls, LJ_ERR_XFUNARG);
@@ -1948,11 +1917,11 @@ static void parse_args(LexState *ls, ExpDesc *e)
1948 lua_assert(e->k == VNONRELOC); 1917 lua_assert(e->k == VNONRELOC);
1949 base = e->u.s.info; /* Base register for call. */ 1918 base = e->u.s.info; /* Base register for call. */
1950 if (args.k == VCALL) { 1919 if (args.k == VCALL) {
1951 ins = BCINS_ABC(BC_CALLM, base, 2, args.u.s.aux - base - 1); 1920 ins = BCINS_ABC(BC_CALLM, base, 2, args.u.s.aux - base - 1 - LJ_FR2);
1952 } else { 1921 } else {
1953 if (args.k != VVOID) 1922 if (args.k != VVOID)
1954 expr_tonextreg(fs, &args); 1923 expr_tonextreg(fs, &args);
1955 ins = BCINS_ABC(BC_CALL, base, 2, fs->freereg - base); 1924 ins = BCINS_ABC(BC_CALL, base, 2, fs->freereg - base - LJ_FR2);
1956 } 1925 }
1957 expr_init(e, VCALL, bcemit_INS(fs, ins)); 1926 expr_init(e, VCALL, bcemit_INS(fs, ins));
1958 e->u.s.aux = base; 1927 e->u.s.aux = base;
@@ -1965,33 +1934,34 @@ static void expr_primary(LexState *ls, ExpDesc *v)
1965{ 1934{
1966 FuncState *fs = ls->fs; 1935 FuncState *fs = ls->fs;
1967 /* Parse prefix expression. */ 1936 /* Parse prefix expression. */
1968 if (ls->token == '(') { 1937 if (ls->tok == '(') {
1969 BCLine line = ls->linenumber; 1938 BCLine line = ls->linenumber;
1970 lj_lex_next(ls); 1939 lj_lex_next(ls);
1971 expr(ls, v); 1940 expr(ls, v);
1972 lex_match(ls, ')', '(', line); 1941 lex_match(ls, ')', '(', line);
1973 expr_discharge(ls->fs, v); 1942 expr_discharge(ls->fs, v);
1974 } else if (ls->token == TK_name || (!LJ_52 && ls->token == TK_goto)) { 1943 } else if (ls->tok == TK_name || (!LJ_52 && ls->tok == TK_goto)) {
1975 var_lookup(ls, v); 1944 var_lookup(ls, v);
1976 } else { 1945 } else {
1977 err_syntax(ls, LJ_ERR_XSYMBOL); 1946 err_syntax(ls, LJ_ERR_XSYMBOL);
1978 } 1947 }
1979 for (;;) { /* Parse multiple expression suffixes. */ 1948 for (;;) { /* Parse multiple expression suffixes. */
1980 if (ls->token == '.') { 1949 if (ls->tok == '.') {
1981 expr_field(ls, v); 1950 expr_field(ls, v);
1982 } else if (ls->token == '[') { 1951 } else if (ls->tok == '[') {
1983 ExpDesc key; 1952 ExpDesc key;
1984 expr_toanyreg(fs, v); 1953 expr_toanyreg(fs, v);
1985 expr_bracket(ls, &key); 1954 expr_bracket(ls, &key);
1986 expr_index(fs, v, &key); 1955 expr_index(fs, v, &key);
1987 } else if (ls->token == ':') { 1956 } else if (ls->tok == ':') {
1988 ExpDesc key; 1957 ExpDesc key;
1989 lj_lex_next(ls); 1958 lj_lex_next(ls);
1990 expr_str(ls, &key); 1959 expr_str(ls, &key);
1991 bcemit_method(fs, v, &key); 1960 bcemit_method(fs, v, &key);
1992 parse_args(ls, v); 1961 parse_args(ls, v);
1993 } else if (ls->token == '(' || ls->token == TK_string || ls->token == '{') { 1962 } else if (ls->tok == '(' || ls->tok == TK_string || ls->tok == '{') {
1994 expr_tonextreg(fs, v); 1963 expr_tonextreg(fs, v);
1964 if (LJ_FR2) bcreg_reserve(fs, 1);
1995 parse_args(ls, v); 1965 parse_args(ls, v);
1996 } else { 1966 } else {
1997 break; 1967 break;
@@ -2002,14 +1972,14 @@ static void expr_primary(LexState *ls, ExpDesc *v)
2002/* Parse simple expression. */ 1972/* Parse simple expression. */
2003static void expr_simple(LexState *ls, ExpDesc *v) 1973static void expr_simple(LexState *ls, ExpDesc *v)
2004{ 1974{
2005 switch (ls->token) { 1975 switch (ls->tok) {
2006 case TK_number: 1976 case TK_number:
2007 expr_init(v, (LJ_HASFFI && tviscdata(&ls->tokenval)) ? VKCDATA : VKNUM, 0); 1977 expr_init(v, (LJ_HASFFI && tviscdata(&ls->tokval)) ? VKCDATA : VKNUM, 0);
2008 copyTV(ls->L, &v->u.nval, &ls->tokenval); 1978 copyTV(ls->L, &v->u.nval, &ls->tokval);
2009 break; 1979 break;
2010 case TK_string: 1980 case TK_string:
2011 expr_init(v, VKSTR, 0); 1981 expr_init(v, VKSTR, 0);
2012 v->u.sval = strV(&ls->tokenval); 1982 v->u.sval = strV(&ls->tokval);
2013 break; 1983 break;
2014 case TK_nil: 1984 case TK_nil:
2015 expr_init(v, VKNIL, 0); 1985 expr_init(v, VKNIL, 0);
@@ -2097,11 +2067,11 @@ static BinOpr expr_binop(LexState *ls, ExpDesc *v, uint32_t limit);
2097static void expr_unop(LexState *ls, ExpDesc *v) 2067static void expr_unop(LexState *ls, ExpDesc *v)
2098{ 2068{
2099 BCOp op; 2069 BCOp op;
2100 if (ls->token == TK_not) { 2070 if (ls->tok == TK_not) {
2101 op = BC_NOT; 2071 op = BC_NOT;
2102 } else if (ls->token == '-') { 2072 } else if (ls->tok == '-') {
2103 op = BC_UNM; 2073 op = BC_UNM;
2104 } else if (ls->token == '#') { 2074 } else if (ls->tok == '#') {
2105 op = BC_LEN; 2075 op = BC_LEN;
2106 } else { 2076 } else {
2107 expr_simple(ls, v); 2077 expr_simple(ls, v);
@@ -2118,7 +2088,7 @@ static BinOpr expr_binop(LexState *ls, ExpDesc *v, uint32_t limit)
2118 BinOpr op; 2088 BinOpr op;
2119 synlevel_begin(ls); 2089 synlevel_begin(ls);
2120 expr_unop(ls, v); 2090 expr_unop(ls, v);
2121 op = token2binop(ls->token); 2091 op = token2binop(ls->tok);
2122 while (op != OPR_NOBINOPR && priority[op].left > limit) { 2092 while (op != OPR_NOBINOPR && priority[op].left > limit) {
2123 ExpDesc v2; 2093 ExpDesc v2;
2124 BinOpr nextop; 2094 BinOpr nextop;
@@ -2307,9 +2277,9 @@ static void parse_func(LexState *ls, BCLine line)
2307 lj_lex_next(ls); /* Skip 'function'. */ 2277 lj_lex_next(ls); /* Skip 'function'. */
2308 /* Parse function name. */ 2278 /* Parse function name. */
2309 var_lookup(ls, &v); 2279 var_lookup(ls, &v);
2310 while (ls->token == '.') /* Multiple dot-separated fields. */ 2280 while (ls->tok == '.') /* Multiple dot-separated fields. */
2311 expr_field(ls, &v); 2281 expr_field(ls, &v);
2312 if (ls->token == ':') { /* Optional colon to signify method call. */ 2282 if (ls->tok == ':') { /* Optional colon to signify method call. */
2313 needself = 1; 2283 needself = 1;
2314 expr_field(ls, &v); 2284 expr_field(ls, &v);
2315 } 2285 }
@@ -2322,9 +2292,9 @@ static void parse_func(LexState *ls, BCLine line)
2322/* -- Control transfer statements ----------------------------------------- */ 2292/* -- Control transfer statements ----------------------------------------- */
2323 2293
2324/* Check for end of block. */ 2294/* Check for end of block. */
2325static int endofblock(LexToken token) 2295static int parse_isend(LexToken tok)
2326{ 2296{
2327 switch (token) { 2297 switch (tok) {
2328 case TK_else: case TK_elseif: case TK_end: case TK_until: case TK_eof: 2298 case TK_else: case TK_elseif: case TK_end: case TK_until: case TK_eof:
2329 return 1; 2299 return 1;
2330 default: 2300 default:
@@ -2339,7 +2309,7 @@ static void parse_return(LexState *ls)
2339 FuncState *fs = ls->fs; 2309 FuncState *fs = ls->fs;
2340 lj_lex_next(ls); /* Skip 'return'. */ 2310 lj_lex_next(ls); /* Skip 'return'. */
2341 fs->flags |= PROTO_HAS_RETURN; 2311 fs->flags |= PROTO_HAS_RETURN;
2342 if (endofblock(ls->token) || ls->token == ';') { /* Bare return. */ 2312 if (parse_isend(ls->tok) || ls->tok == ';') { /* Bare return. */
2343 ins = BCINS_AD(BC_RET0, 0, 1); 2313 ins = BCINS_AD(BC_RET0, 0, 1);
2344 } else { /* Return with one or more values. */ 2314 } else { /* Return with one or more values. */
2345 ExpDesc e; /* Receives the _last_ expression in the list. */ 2315 ExpDesc e; /* Receives the _last_ expression in the list. */
@@ -2405,18 +2375,18 @@ static void parse_label(LexState *ls)
2405 lex_check(ls, TK_label); 2375 lex_check(ls, TK_label);
2406 /* Recursively parse trailing statements: labels and ';' (Lua 5.2 only). */ 2376 /* Recursively parse trailing statements: labels and ';' (Lua 5.2 only). */
2407 for (;;) { 2377 for (;;) {
2408 if (ls->token == TK_label) { 2378 if (ls->tok == TK_label) {
2409 synlevel_begin(ls); 2379 synlevel_begin(ls);
2410 parse_label(ls); 2380 parse_label(ls);
2411 synlevel_end(ls); 2381 synlevel_end(ls);
2412 } else if (LJ_52 && ls->token == ';') { 2382 } else if (LJ_52 && ls->tok == ';') {
2413 lj_lex_next(ls); 2383 lj_lex_next(ls);
2414 } else { 2384 } else {
2415 break; 2385 break;
2416 } 2386 }
2417 } 2387 }
2418 /* Trailing label is considered to be outside of scope. */ 2388 /* Trailing label is considered to be outside of scope. */
2419 if (endofblock(ls->token) && ls->token != TK_until) 2389 if (parse_isend(ls->tok) && ls->tok != TK_until)
2420 ls->vstack[idx].slot = fs->bl->nactvar; 2390 ls->vstack[idx].slot = fs->bl->nactvar;
2421 gola_resolve(ls, fs->bl, idx); 2391 gola_resolve(ls, fs->bl, idx);
2422} 2392}
@@ -2572,7 +2542,8 @@ static void parse_for_iter(LexState *ls, GCstr *indexname)
2572 lex_check(ls, TK_in); 2542 lex_check(ls, TK_in);
2573 line = ls->linenumber; 2543 line = ls->linenumber;
2574 assign_adjust(ls, 3, expr_list(ls, &e), &e); 2544 assign_adjust(ls, 3, expr_list(ls, &e), &e);
2575 bcreg_bump(fs, 3); /* The iterator needs another 3 slots (func + 2 args). */ 2545 /* The iterator needs another 3 [4] slots (func [pc] | state ctl). */
2546 bcreg_bump(fs, 3+LJ_FR2);
2576 isnext = (nvars <= 5 && predict_next(ls, fs, exprpc)); 2547 isnext = (nvars <= 5 && predict_next(ls, fs, exprpc));
2577 var_add(ls, 3); /* Hidden control variables. */ 2548 var_add(ls, 3); /* Hidden control variables. */
2578 lex_check(ls, TK_do); 2549 lex_check(ls, TK_do);
@@ -2600,9 +2571,9 @@ static void parse_for(LexState *ls, BCLine line)
2600 fscope_begin(fs, &bl, FSCOPE_LOOP); 2571 fscope_begin(fs, &bl, FSCOPE_LOOP);
2601 lj_lex_next(ls); /* Skip 'for'. */ 2572 lj_lex_next(ls); /* Skip 'for'. */
2602 varname = lex_str(ls); /* Get first variable name. */ 2573 varname = lex_str(ls); /* Get first variable name. */
2603 if (ls->token == '=') 2574 if (ls->tok == '=')
2604 parse_for_num(ls, varname, line); 2575 parse_for_num(ls, varname, line);
2605 else if (ls->token == ',' || ls->token == TK_in) 2576 else if (ls->tok == ',' || ls->tok == TK_in)
2606 parse_for_iter(ls, varname); 2577 parse_for_iter(ls, varname);
2607 else 2578 else
2608 err_syntax(ls, LJ_ERR_XFOR); 2579 err_syntax(ls, LJ_ERR_XFOR);
@@ -2628,12 +2599,12 @@ static void parse_if(LexState *ls, BCLine line)
2628 BCPos flist; 2599 BCPos flist;
2629 BCPos escapelist = NO_JMP; 2600 BCPos escapelist = NO_JMP;
2630 flist = parse_then(ls); 2601 flist = parse_then(ls);
2631 while (ls->token == TK_elseif) { /* Parse multiple 'elseif' blocks. */ 2602 while (ls->tok == TK_elseif) { /* Parse multiple 'elseif' blocks. */
2632 jmp_append(fs, &escapelist, bcemit_jmp(fs)); 2603 jmp_append(fs, &escapelist, bcemit_jmp(fs));
2633 jmp_tohere(fs, flist); 2604 jmp_tohere(fs, flist);
2634 flist = parse_then(ls); 2605 flist = parse_then(ls);
2635 } 2606 }
2636 if (ls->token == TK_else) { /* Parse optional 'else' block. */ 2607 if (ls->tok == TK_else) { /* Parse optional 'else' block. */
2637 jmp_append(fs, &escapelist, bcemit_jmp(fs)); 2608 jmp_append(fs, &escapelist, bcemit_jmp(fs));
2638 jmp_tohere(fs, flist); 2609 jmp_tohere(fs, flist);
2639 lj_lex_next(ls); /* Skip 'else'. */ 2610 lj_lex_next(ls); /* Skip 'else'. */
@@ -2651,7 +2622,7 @@ static void parse_if(LexState *ls, BCLine line)
2651static int parse_stmt(LexState *ls) 2622static int parse_stmt(LexState *ls)
2652{ 2623{
2653 BCLine line = ls->linenumber; 2624 BCLine line = ls->linenumber;
2654 switch (ls->token) { 2625 switch (ls->tok) {
2655 case TK_if: 2626 case TK_if:
2656 parse_if(ls, line); 2627 parse_if(ls, line);
2657 break; 2628 break;
@@ -2710,7 +2681,7 @@ static void parse_chunk(LexState *ls)
2710{ 2681{
2711 int islast = 0; 2682 int islast = 0;
2712 synlevel_begin(ls); 2683 synlevel_begin(ls);
2713 while (!islast && !endofblock(ls->token)) { 2684 while (!islast && !parse_isend(ls->tok)) {
2714 islast = parse_stmt(ls); 2685 islast = parse_stmt(ls);
2715 lex_opt(ls, ';'); 2686 lex_opt(ls, ';');
2716 lua_assert(ls->fs->framesize >= ls->fs->freereg && 2687 lua_assert(ls->fs->framesize >= ls->fs->freereg &&
@@ -2745,7 +2716,7 @@ GCproto *lj_parse(LexState *ls)
2745 bcemit_AD(&fs, BC_FUNCV, 0, 0); /* Placeholder. */ 2716 bcemit_AD(&fs, BC_FUNCV, 0, 0); /* Placeholder. */
2746 lj_lex_next(ls); /* Read-ahead first token. */ 2717 lj_lex_next(ls); /* Read-ahead first token. */
2747 parse_chunk(ls); 2718 parse_chunk(ls);
2748 if (ls->token != TK_eof) 2719 if (ls->tok != TK_eof)
2749 err_token(ls, TK_eof); 2720 err_token(ls, TK_eof);
2750 pt = fs_finish(ls, ls->linenumber); 2721 pt = fs_finish(ls, ls->linenumber);
2751 L->top--; /* Drop chunkname. */ 2722 L->top--; /* Drop chunkname. */
diff --git a/src/lj_profile.c b/src/lj_profile.c
new file mode 100644
index 00000000..2fe40858
--- /dev/null
+++ b/src/lj_profile.c
@@ -0,0 +1,368 @@
1/*
2** Low-overhead profiling.
3** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lj_profile_c
7#define LUA_CORE
8
9#include "lj_obj.h"
10
11#if LJ_HASPROFILE
12
13#include "lj_buf.h"
14#include "lj_frame.h"
15#include "lj_debug.h"
16#include "lj_dispatch.h"
17#if LJ_HASJIT
18#include "lj_jit.h"
19#include "lj_trace.h"
20#endif
21#include "lj_profile.h"
22
23#include "luajit.h"
24
25#if LJ_PROFILE_SIGPROF
26
27#include <sys/time.h>
28#include <signal.h>
29#define profile_lock(ps) UNUSED(ps)
30#define profile_unlock(ps) UNUSED(ps)
31
32#elif LJ_PROFILE_PTHREAD
33
34#include <pthread.h>
35#include <time.h>
36#if LJ_TARGET_PS3
37#include <sys/timer.h>
38#endif
39#define profile_lock(ps) pthread_mutex_lock(&ps->lock)
40#define profile_unlock(ps) pthread_mutex_unlock(&ps->lock)
41
42#elif LJ_PROFILE_WTHREAD
43
44#define WIN32_LEAN_AND_MEAN
45#if LJ_TARGET_XBOX360
46#include <xtl.h>
47#include <xbox.h>
48#else
49#include <windows.h>
50#endif
51typedef unsigned int (WINAPI *WMM_TPFUNC)(unsigned int);
52#define profile_lock(ps) EnterCriticalSection(&ps->lock)
53#define profile_unlock(ps) LeaveCriticalSection(&ps->lock)
54
55#endif
56
57/* Profiler state. */
58typedef struct ProfileState {
59 global_State *g; /* VM state that started the profiler. */
60 luaJIT_profile_callback cb; /* Profiler callback. */
61 void *data; /* Profiler callback data. */
62 SBuf sb; /* String buffer for stack dumps. */
63 int interval; /* Sample interval in milliseconds. */
64 int samples; /* Number of samples for next callback. */
65 int vmstate; /* VM state when profile timer triggered. */
66#if LJ_PROFILE_SIGPROF
67 struct sigaction oldsa; /* Previous SIGPROF state. */
68#elif LJ_PROFILE_PTHREAD
69 pthread_mutex_t lock; /* g->hookmask update lock. */
70 pthread_t thread; /* Timer thread. */
71 int abort; /* Abort timer thread. */
72#elif LJ_PROFILE_WTHREAD
73#if LJ_TARGET_WINDOWS
74 HINSTANCE wmm; /* WinMM library handle. */
75 WMM_TPFUNC wmm_tbp; /* WinMM timeBeginPeriod function. */
76 WMM_TPFUNC wmm_tep; /* WinMM timeEndPeriod function. */
77#endif
78 CRITICAL_SECTION lock; /* g->hookmask update lock. */
79 HANDLE thread; /* Timer thread. */
80 int abort; /* Abort timer thread. */
81#endif
82} ProfileState;
83
84/* Sadly, we have to use a static profiler state.
85**
86** The SIGPROF variant needs a static pointer to the global state, anyway.
87** And it would be hard to extend for multiple threads. You can still use
88** multiple VMs in multiple threads, but only profile one at a time.
89*/
90static ProfileState profile_state;
91
92/* Default sample interval in milliseconds. */
93#define LJ_PROFILE_INTERVAL_DEFAULT 10
94
95/* -- Profiler/hook interaction ------------------------------------------- */
96
97#if !LJ_PROFILE_SIGPROF
98void LJ_FASTCALL lj_profile_hook_enter(global_State *g)
99{
100 ProfileState *ps = &profile_state;
101 if (ps->g) {
102 profile_lock(ps);
103 hook_enter(g);
104 profile_unlock(ps);
105 } else {
106 hook_enter(g);
107 }
108}
109
110void LJ_FASTCALL lj_profile_hook_leave(global_State *g)
111{
112 ProfileState *ps = &profile_state;
113 if (ps->g) {
114 profile_lock(ps);
115 hook_leave(g);
116 profile_unlock(ps);
117 } else {
118 hook_leave(g);
119 }
120}
121#endif
122
123/* -- Profile callbacks --------------------------------------------------- */
124
125/* Callback from profile hook (HOOK_PROFILE already cleared). */
126void LJ_FASTCALL lj_profile_interpreter(lua_State *L)
127{
128 ProfileState *ps = &profile_state;
129 global_State *g = G(L);
130 uint8_t mask;
131 profile_lock(ps);
132 mask = (g->hookmask & ~HOOK_PROFILE);
133 if (!(mask & HOOK_VMEVENT)) {
134 int samples = ps->samples;
135 ps->samples = 0;
136 g->hookmask = HOOK_VMEVENT;
137 lj_dispatch_update(g);
138 profile_unlock(ps);
139 ps->cb(ps->data, L, samples, ps->vmstate); /* Invoke user callback. */
140 profile_lock(ps);
141 mask |= (g->hookmask & HOOK_PROFILE);
142 }
143 g->hookmask = mask;
144 lj_dispatch_update(g);
145 profile_unlock(ps);
146}
147
148/* Trigger profile hook. Asynchronous call from OS-specific profile timer. */
149static void profile_trigger(ProfileState *ps)
150{
151 global_State *g = ps->g;
152 uint8_t mask;
153 profile_lock(ps);
154 ps->samples++; /* Always increment number of samples. */
155 mask = g->hookmask;
156 if (!(mask & (HOOK_PROFILE|HOOK_VMEVENT|HOOK_GC))) { /* Set profile hook. */
157 int st = g->vmstate;
158 ps->vmstate = st >= 0 ? 'N' :
159 st == ~LJ_VMST_INTERP ? 'I' :
160 st == ~LJ_VMST_C ? 'C' :
161 st == ~LJ_VMST_GC ? 'G' : 'J';
162 g->hookmask = (mask | HOOK_PROFILE);
163 lj_dispatch_update(g);
164 }
165 profile_unlock(ps);
166}
167
168/* -- OS-specific profile timer handling ---------------------------------- */
169
170#if LJ_PROFILE_SIGPROF
171
172/* SIGPROF handler. */
173static void profile_signal(int sig)
174{
175 UNUSED(sig);
176 profile_trigger(&profile_state);
177}
178
179/* Start profiling timer. */
180static void profile_timer_start(ProfileState *ps)
181{
182 int interval = ps->interval;
183 struct itimerval tm;
184 struct sigaction sa;
185 tm.it_value.tv_sec = tm.it_interval.tv_sec = interval / 1000;
186 tm.it_value.tv_usec = tm.it_interval.tv_usec = (interval % 1000) * 1000;
187 setitimer(ITIMER_PROF, &tm, NULL);
188 sa.sa_flags = SA_RESTART;
189 sa.sa_handler = profile_signal;
190 sigemptyset(&sa.sa_mask);
191 sigaction(SIGPROF, &sa, &ps->oldsa);
192}
193
194/* Stop profiling timer. */
195static void profile_timer_stop(ProfileState *ps)
196{
197 struct itimerval tm;
198 tm.it_value.tv_sec = tm.it_interval.tv_sec = 0;
199 tm.it_value.tv_usec = tm.it_interval.tv_usec = 0;
200 setitimer(ITIMER_PROF, &tm, NULL);
201 sigaction(SIGPROF, &ps->oldsa, NULL);
202}
203
204#elif LJ_PROFILE_PTHREAD
205
206/* POSIX timer thread. */
207static void *profile_thread(ProfileState *ps)
208{
209 int interval = ps->interval;
210#if !LJ_TARGET_PS3
211 struct timespec ts;
212 ts.tv_sec = interval / 1000;
213 ts.tv_nsec = (interval % 1000) * 1000000;
214#endif
215 while (1) {
216#if LJ_TARGET_PS3
217 sys_timer_usleep(interval * 1000);
218#else
219 nanosleep(&ts, NULL);
220#endif
221 if (ps->abort) break;
222 profile_trigger(ps);
223 }
224 return NULL;
225}
226
227/* Start profiling timer thread. */
228static void profile_timer_start(ProfileState *ps)
229{
230 pthread_mutex_init(&ps->lock, 0);
231 ps->abort = 0;
232 pthread_create(&ps->thread, NULL, (void *(*)(void *))profile_thread, ps);
233}
234
235/* Stop profiling timer thread. */
236static void profile_timer_stop(ProfileState *ps)
237{
238 ps->abort = 1;
239 pthread_join(ps->thread, NULL);
240 pthread_mutex_destroy(&ps->lock);
241}
242
243#elif LJ_PROFILE_WTHREAD
244
245/* Windows timer thread. */
246static DWORD WINAPI profile_thread(void *psx)
247{
248 ProfileState *ps = (ProfileState *)psx;
249 int interval = ps->interval;
250#if LJ_TARGET_WINDOWS && !LJ_TARGET_UWP
251 ps->wmm_tbp(interval);
252#endif
253 while (1) {
254 Sleep(interval);
255 if (ps->abort) break;
256 profile_trigger(ps);
257 }
258#if LJ_TARGET_WINDOWS && !LJ_TARGET_UWP
259 ps->wmm_tep(interval);
260#endif
261 return 0;
262}
263
264/* Start profiling timer thread. */
265static void profile_timer_start(ProfileState *ps)
266{
267#if LJ_TARGET_WINDOWS && !LJ_TARGET_UWP
268 if (!ps->wmm) { /* Load WinMM library on-demand. */
269 ps->wmm = LJ_WIN_LOADLIBA("winmm.dll");
270 if (ps->wmm) {
271 ps->wmm_tbp = (WMM_TPFUNC)GetProcAddress(ps->wmm, "timeBeginPeriod");
272 ps->wmm_tep = (WMM_TPFUNC)GetProcAddress(ps->wmm, "timeEndPeriod");
273 if (!ps->wmm_tbp || !ps->wmm_tep) {
274 ps->wmm = NULL;
275 return;
276 }
277 }
278 }
279#endif
280 InitializeCriticalSection(&ps->lock);
281 ps->abort = 0;
282 ps->thread = CreateThread(NULL, 0, profile_thread, ps, 0, NULL);
283}
284
285/* Stop profiling timer thread. */
286static void profile_timer_stop(ProfileState *ps)
287{
288 ps->abort = 1;
289 WaitForSingleObject(ps->thread, INFINITE);
290 DeleteCriticalSection(&ps->lock);
291}
292
293#endif
294
295/* -- Public profiling API ------------------------------------------------ */
296
297/* Start profiling. */
298LUA_API void luaJIT_profile_start(lua_State *L, const char *mode,
299 luaJIT_profile_callback cb, void *data)
300{
301 ProfileState *ps = &profile_state;
302 int interval = LJ_PROFILE_INTERVAL_DEFAULT;
303 while (*mode) {
304 int m = *mode++;
305 switch (m) {
306 case 'i':
307 interval = 0;
308 while (*mode >= '0' && *mode <= '9')
309 interval = interval * 10 + (*mode++ - '0');
310 if (interval <= 0) interval = 1;
311 break;
312#if LJ_HASJIT
313 case 'l': case 'f':
314 L2J(L)->prof_mode = m;
315 lj_trace_flushall(L);
316 break;
317#endif
318 default: /* Ignore unknown mode chars. */
319 break;
320 }
321 }
322 if (ps->g) {
323 luaJIT_profile_stop(L);
324 if (ps->g) return; /* Profiler in use by another VM. */
325 }
326 ps->g = G(L);
327 ps->interval = interval;
328 ps->cb = cb;
329 ps->data = data;
330 ps->samples = 0;
331 lj_buf_init(L, &ps->sb);
332 profile_timer_start(ps);
333}
334
335/* Stop profiling. */
336LUA_API void luaJIT_profile_stop(lua_State *L)
337{
338 ProfileState *ps = &profile_state;
339 global_State *g = ps->g;
340 if (G(L) == g) { /* Only stop profiler if started by this VM. */
341 profile_timer_stop(ps);
342 g->hookmask &= ~HOOK_PROFILE;
343 lj_dispatch_update(g);
344#if LJ_HASJIT
345 G2J(g)->prof_mode = 0;
346 lj_trace_flushall(L);
347#endif
348 lj_buf_free(g, &ps->sb);
349 setmref(ps->sb.b, NULL);
350 setmref(ps->sb.e, NULL);
351 ps->g = NULL;
352 }
353}
354
355/* Return a compact stack dump. */
356LUA_API const char *luaJIT_profile_dumpstack(lua_State *L, const char *fmt,
357 int depth, size_t *len)
358{
359 ProfileState *ps = &profile_state;
360 SBuf *sb = &ps->sb;
361 setsbufL(sb, L);
362 lj_buf_reset(sb);
363 lj_debug_dumpstack(L, sb, fmt, depth);
364 *len = (size_t)sbuflen(sb);
365 return sbufB(sb);
366}
367
368#endif
diff --git a/src/lj_profile.h b/src/lj_profile.h
new file mode 100644
index 00000000..db69eb9e
--- /dev/null
+++ b/src/lj_profile.h
@@ -0,0 +1,21 @@
1/*
2** Low-overhead profiling.
3** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_PROFILE_H
7#define _LJ_PROFILE_H
8
9#include "lj_obj.h"
10
11#if LJ_HASPROFILE
12
13LJ_FUNC void LJ_FASTCALL lj_profile_interpreter(lua_State *L);
14#if !LJ_PROFILE_SIGPROF
15LJ_FUNC void LJ_FASTCALL lj_profile_hook_enter(global_State *g);
16LJ_FUNC void LJ_FASTCALL lj_profile_hook_leave(global_State *g);
17#endif
18
19#endif
20
21#endif
diff --git a/src/lj_record.c b/src/lj_record.c
index 69822f54..8eec0071 100644
--- a/src/lj_record.c
+++ b/src/lj_record.c
@@ -20,6 +20,9 @@
20#endif 20#endif
21#include "lj_bc.h" 21#include "lj_bc.h"
22#include "lj_ff.h" 22#include "lj_ff.h"
23#if LJ_HASPROFILE
24#include "lj_debug.h"
25#endif
23#include "lj_ir.h" 26#include "lj_ir.h"
24#include "lj_jit.h" 27#include "lj_jit.h"
25#include "lj_ircall.h" 28#include "lj_ircall.h"
@@ -48,7 +51,7 @@ static void rec_check_ir(jit_State *J)
48{ 51{
49 IRRef i, nins = J->cur.nins, nk = J->cur.nk; 52 IRRef i, nins = J->cur.nins, nk = J->cur.nk;
50 lua_assert(nk <= REF_BIAS && nins >= REF_BIAS && nins < 65536); 53 lua_assert(nk <= REF_BIAS && nins >= REF_BIAS && nins < 65536);
51 for (i = nins-1; i >= nk; i--) { 54 for (i = nk; i < nins; i++) {
52 IRIns *ir = IR(i); 55 IRIns *ir = IR(i);
53 uint32_t mode = lj_ir_mode[ir->o]; 56 uint32_t mode = lj_ir_mode[ir->o];
54 IRRef op1 = ir->op1; 57 IRRef op1 = ir->op1;
@@ -58,7 +61,10 @@ static void rec_check_ir(jit_State *J)
58 case IRMref: lua_assert(op1 >= nk); 61 case IRMref: lua_assert(op1 >= nk);
59 lua_assert(i >= REF_BIAS ? op1 < i : op1 > i); break; 62 lua_assert(i >= REF_BIAS ? op1 < i : op1 > i); break;
60 case IRMlit: break; 63 case IRMlit: break;
61 case IRMcst: lua_assert(i < REF_BIAS); continue; 64 case IRMcst: lua_assert(i < REF_BIAS);
65 if (irt_is64(ir->t) && ir->o != IR_KNULL)
66 i++;
67 continue;
62 } 68 }
63 switch (irm_op2(mode)) { 69 switch (irm_op2(mode)) {
64 case IRMnone: lua_assert(op2 == 0); break; 70 case IRMnone: lua_assert(op2 == 0); break;
@@ -81,30 +87,48 @@ static void rec_check_slots(jit_State *J)
81 BCReg s, nslots = J->baseslot + J->maxslot; 87 BCReg s, nslots = J->baseslot + J->maxslot;
82 int32_t depth = 0; 88 int32_t depth = 0;
83 cTValue *base = J->L->base - J->baseslot; 89 cTValue *base = J->L->base - J->baseslot;
84 lua_assert(J->baseslot >= 1); 90 lua_assert(J->baseslot >= 1+LJ_FR2);
85 lua_assert(J->baseslot == 1 || (J->slot[J->baseslot-1] & TREF_FRAME)); 91 lua_assert(J->baseslot == 1+LJ_FR2 || (J->slot[J->baseslot-1] & TREF_FRAME));
86 lua_assert(nslots <= LJ_MAX_JSLOTS); 92 lua_assert(nslots <= LJ_MAX_JSLOTS);
87 for (s = 0; s < nslots; s++) { 93 for (s = 0; s < nslots; s++) {
88 TRef tr = J->slot[s]; 94 TRef tr = J->slot[s];
89 if (tr) { 95 if (tr) {
90 cTValue *tv = &base[s]; 96 cTValue *tv = &base[s];
91 IRRef ref = tref_ref(tr); 97 IRRef ref = tref_ref(tr);
92 IRIns *ir; 98 IRIns *ir = NULL; /* Silence compiler. */
93 lua_assert(ref >= J->cur.nk && ref < J->cur.nins); 99 if (!LJ_FR2 || ref || !(tr & (TREF_FRAME | TREF_CONT))) {
94 ir = IR(ref); 100 lua_assert(ref >= J->cur.nk && ref < J->cur.nins);
95 lua_assert(irt_t(ir->t) == tref_t(tr)); 101 ir = IR(ref);
102 lua_assert(irt_t(ir->t) == tref_t(tr));
103 }
96 if (s == 0) { 104 if (s == 0) {
97 lua_assert(tref_isfunc(tr)); 105 lua_assert(tref_isfunc(tr));
106#if LJ_FR2
107 } else if (s == 1) {
108 lua_assert((tr & ~TREF_FRAME) == 0);
109#endif
98 } else if ((tr & TREF_FRAME)) { 110 } else if ((tr & TREF_FRAME)) {
99 GCfunc *fn = gco2func(frame_gc(tv)); 111 GCfunc *fn = gco2func(frame_gc(tv));
100 BCReg delta = (BCReg)(tv - frame_prev(tv)); 112 BCReg delta = (BCReg)(tv - frame_prev(tv));
113#if LJ_FR2
114 if (ref)
115 lua_assert(ir_knum(ir)->u64 == tv->u64);
116 tr = J->slot[s-1];
117 ir = IR(tref_ref(tr));
118#endif
101 lua_assert(tref_isfunc(tr)); 119 lua_assert(tref_isfunc(tr));
102 if (tref_isk(tr)) lua_assert(fn == ir_kfunc(ir)); 120 if (tref_isk(tr)) lua_assert(fn == ir_kfunc(ir));
103 lua_assert(s > delta ? (J->slot[s-delta] & TREF_FRAME) : (s == delta)); 121 lua_assert(s > delta + LJ_FR2 ? (J->slot[s-delta] & TREF_FRAME)
122 : (s == delta + LJ_FR2));
104 depth++; 123 depth++;
105 } else if ((tr & TREF_CONT)) { 124 } else if ((tr & TREF_CONT)) {
125#if LJ_FR2
126 if (ref)
127 lua_assert(ir_knum(ir)->u64 == tv->u64);
128#else
106 lua_assert(ir_kptr(ir) == gcrefp(tv->gcr, void)); 129 lua_assert(ir_kptr(ir) == gcrefp(tv->gcr, void));
107 lua_assert((J->slot[s+1] & TREF_FRAME)); 130#endif
131 lua_assert((J->slot[s+1+LJ_FR2] & TREF_FRAME));
108 depth++; 132 depth++;
109 } else { 133 } else {
110 if (tvisnumber(tv)) 134 if (tvisnumber(tv))
@@ -156,10 +180,10 @@ static TRef sload(jit_State *J, int32_t slot)
156/* Get TRef for current function. */ 180/* Get TRef for current function. */
157static TRef getcurrf(jit_State *J) 181static TRef getcurrf(jit_State *J)
158{ 182{
159 if (J->base[-1]) 183 if (J->base[-1-LJ_FR2])
160 return J->base[-1]; 184 return J->base[-1-LJ_FR2];
161 lua_assert(J->baseslot == 1); 185 lua_assert(J->baseslot == 1+LJ_FR2);
162 return sloadt(J, -1, IRT_FUNC, IRSLOAD_READONLY); 186 return sloadt(J, -1-LJ_FR2, IRT_FUNC, IRSLOAD_READONLY);
163} 187}
164 188
165/* Compare for raw object equality. 189/* Compare for raw object equality.
@@ -230,8 +254,12 @@ static void canonicalize_slots(jit_State *J)
230} 254}
231 255
232/* Stop recording. */ 256/* Stop recording. */
233static void rec_stop(jit_State *J, TraceLink linktype, TraceNo lnk) 257void lj_record_stop(jit_State *J, TraceLink linktype, TraceNo lnk)
234{ 258{
259#ifdef LUAJIT_ENABLE_TABLE_BUMP
260 if (J->retryrec)
261 lj_trace_err(J, LJ_TRERR_RETRY);
262#endif
235 lj_trace_end(J); 263 lj_trace_end(J);
236 J->cur.linktype = (uint8_t)linktype; 264 J->cur.linktype = (uint8_t)linktype;
237 J->cur.link = (uint16_t)lnk; 265 J->cur.link = (uint16_t)lnk;
@@ -499,8 +527,7 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl)
499static LoopEvent rec_iterl(jit_State *J, const BCIns iterins) 527static LoopEvent rec_iterl(jit_State *J, const BCIns iterins)
500{ 528{
501 BCReg ra = bc_a(iterins); 529 BCReg ra = bc_a(iterins);
502 lua_assert(J->base[ra] != 0); 530 if (!tref_isnil(getslot(J, ra))) { /* Looping back? */
503 if (!tref_isnil(J->base[ra])) { /* Looping back? */
504 J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */ 531 J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */
505 J->maxslot = ra-1+bc_b(J->pc[-1]); 532 J->maxslot = ra-1+bc_b(J->pc[-1]);
506 J->pc += bc_j(iterins)+1; 533 J->pc += bc_j(iterins)+1;
@@ -538,12 +565,12 @@ static int innerloopleft(jit_State *J, const BCIns *pc)
538/* Handle the case when an interpreted loop op is hit. */ 565/* Handle the case when an interpreted loop op is hit. */
539static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev) 566static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev)
540{ 567{
541 if (J->parent == 0) { 568 if (J->parent == 0 && J->exitno == 0) {
542 if (pc == J->startpc && J->framedepth + J->retdepth == 0) { 569 if (pc == J->startpc && J->framedepth + J->retdepth == 0) {
543 /* Same loop? */ 570 /* Same loop? */
544 if (ev == LOOPEV_LEAVE) /* Must loop back to form a root trace. */ 571 if (ev == LOOPEV_LEAVE) /* Must loop back to form a root trace. */
545 lj_trace_err(J, LJ_TRERR_LLEAVE); 572 lj_trace_err(J, LJ_TRERR_LLEAVE);
546 rec_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping root trace. */ 573 lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping trace. */
547 } else if (ev != LOOPEV_LEAVE) { /* Entering inner loop? */ 574 } else if (ev != LOOPEV_LEAVE) { /* Entering inner loop? */
548 /* It's usually better to abort here and wait until the inner loop 575 /* It's usually better to abort here and wait until the inner loop
549 ** is traced. But if the inner loop repeatedly didn't loop back, 576 ** is traced. But if the inner loop repeatedly didn't loop back,
@@ -568,18 +595,64 @@ static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev)
568/* Handle the case when an already compiled loop op is hit. */ 595/* Handle the case when an already compiled loop op is hit. */
569static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev) 596static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev)
570{ 597{
571 if (J->parent == 0) { /* Root trace hit an inner loop. */ 598 if (J->parent == 0 && J->exitno == 0) { /* Root trace hit an inner loop. */
572 /* Better let the inner loop spawn a side trace back here. */ 599 /* Better let the inner loop spawn a side trace back here. */
573 lj_trace_err(J, LJ_TRERR_LINNER); 600 lj_trace_err(J, LJ_TRERR_LINNER);
574 } else if (ev != LOOPEV_LEAVE) { /* Side trace enters a compiled loop. */ 601 } else if (ev != LOOPEV_LEAVE) { /* Side trace enters a compiled loop. */
575 J->instunroll = 0; /* Cannot continue across a compiled loop op. */ 602 J->instunroll = 0; /* Cannot continue across a compiled loop op. */
576 if (J->pc == J->startpc && J->framedepth + J->retdepth == 0) 603 if (J->pc == J->startpc && J->framedepth + J->retdepth == 0)
577 rec_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Form an extra loop. */ 604 lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Form extra loop. */
578 else 605 else
579 rec_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the loop. */ 606 lj_record_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the loop. */
580 } /* Side trace continues across a loop that's left or not entered. */ 607 } /* Side trace continues across a loop that's left or not entered. */
581} 608}
582 609
610/* -- Record profiler hook checks ----------------------------------------- */
611
612#if LJ_HASPROFILE
613
614/* Need to insert profiler hook check? */
615static int rec_profile_need(jit_State *J, GCproto *pt, const BCIns *pc)
616{
617 GCproto *ppt;
618 lua_assert(J->prof_mode == 'f' || J->prof_mode == 'l');
619 if (!pt)
620 return 0;
621 ppt = J->prev_pt;
622 J->prev_pt = pt;
623 if (pt != ppt && ppt) {
624 J->prev_line = -1;
625 return 1;
626 }
627 if (J->prof_mode == 'l') {
628 BCLine line = lj_debug_line(pt, proto_bcpos(pt, pc));
629 BCLine pline = J->prev_line;
630 J->prev_line = line;
631 if (pline != line)
632 return 1;
633 }
634 return 0;
635}
636
637static void rec_profile_ins(jit_State *J, const BCIns *pc)
638{
639 if (J->prof_mode && rec_profile_need(J, J->pt, pc)) {
640 emitir(IRTG(IR_PROF, IRT_NIL), 0, 0);
641 lj_snap_add(J);
642 }
643}
644
645static void rec_profile_ret(jit_State *J)
646{
647 if (J->prof_mode == 'f') {
648 emitir(IRTG(IR_PROF, IRT_NIL), 0, 0);
649 J->prev_pt = NULL;
650 lj_snap_add(J);
651 }
652}
653
654#endif
655
583/* -- Record calls and returns -------------------------------------------- */ 656/* -- Record calls and returns -------------------------------------------- */
584 657
585/* Specialize to the runtime value of the called function or its prototype. */ 658/* Specialize to the runtime value of the called function or its prototype. */
@@ -590,11 +663,26 @@ static TRef rec_call_specialize(jit_State *J, GCfunc *fn, TRef tr)
590 GCproto *pt = funcproto(fn); 663 GCproto *pt = funcproto(fn);
591 /* Too many closures created? Probably not a monomorphic function. */ 664 /* Too many closures created? Probably not a monomorphic function. */
592 if (pt->flags >= PROTO_CLC_POLY) { /* Specialize to prototype instead. */ 665 if (pt->flags >= PROTO_CLC_POLY) { /* Specialize to prototype instead. */
593 TRef trpt = emitir(IRT(IR_FLOAD, IRT_P32), tr, IRFL_FUNC_PC); 666 TRef trpt = emitir(IRT(IR_FLOAD, IRT_PGC), tr, IRFL_FUNC_PC);
594 emitir(IRTG(IR_EQ, IRT_P32), trpt, lj_ir_kptr(J, proto_bc(pt))); 667 emitir(IRTG(IR_EQ, IRT_PGC), trpt, lj_ir_kptr(J, proto_bc(pt)));
595 (void)lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); /* Prevent GC of proto. */ 668 (void)lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); /* Prevent GC of proto. */
596 return tr; 669 return tr;
597 } 670 }
671 } else {
672 /* Don't specialize to non-monomorphic builtins. */
673 switch (fn->c.ffid) {
674 case FF_coroutine_wrap_aux:
675 case FF_string_gmatch_aux:
676 /* NYI: io_file_iter doesn't have an ffid, yet. */
677 { /* Specialize to the ffid. */
678 TRef trid = emitir(IRT(IR_FLOAD, IRT_U8), tr, IRFL_FUNC_FFID);
679 emitir(IRTG(IR_EQ, IRT_INT), trid, lj_ir_kint(J, fn->c.ffid));
680 }
681 return tr;
682 default:
683 /* NYI: don't specialize to non-monomorphic C functions. */
684 break;
685 }
598 } 686 }
599 /* Otherwise specialize to the function (closure) value itself. */ 687 /* Otherwise specialize to the function (closure) value itself. */
600 kfunc = lj_ir_kfunc(J, fn); 688 kfunc = lj_ir_kfunc(J, fn);
@@ -607,21 +695,31 @@ static void rec_call_setup(jit_State *J, BCReg func, ptrdiff_t nargs)
607{ 695{
608 RecordIndex ix; 696 RecordIndex ix;
609 TValue *functv = &J->L->base[func]; 697 TValue *functv = &J->L->base[func];
610 TRef *fbase = &J->base[func]; 698 TRef kfunc, *fbase = &J->base[func];
611 ptrdiff_t i; 699 ptrdiff_t i;
612 for (i = 0; i <= nargs; i++) 700 (void)getslot(J, func); /* Ensure func has a reference. */
613 (void)getslot(J, func+i); /* Ensure func and all args have a reference. */ 701 for (i = 1; i <= nargs; i++)
702 (void)getslot(J, func+LJ_FR2+i); /* Ensure all args have a reference. */
614 if (!tref_isfunc(fbase[0])) { /* Resolve __call metamethod. */ 703 if (!tref_isfunc(fbase[0])) { /* Resolve __call metamethod. */
615 ix.tab = fbase[0]; 704 ix.tab = fbase[0];
616 copyTV(J->L, &ix.tabv, functv); 705 copyTV(J->L, &ix.tabv, functv);
617 if (!lj_record_mm_lookup(J, &ix, MM_call) || !tref_isfunc(ix.mobj)) 706 if (!lj_record_mm_lookup(J, &ix, MM_call) || !tref_isfunc(ix.mobj))
618 lj_trace_err(J, LJ_TRERR_NOMM); 707 lj_trace_err(J, LJ_TRERR_NOMM);
619 for (i = ++nargs; i > 0; i--) /* Shift arguments up. */ 708 for (i = ++nargs; i > LJ_FR2; i--) /* Shift arguments up. */
620 fbase[i] = fbase[i-1]; 709 fbase[i+LJ_FR2] = fbase[i+LJ_FR2-1];
710#if LJ_FR2
711 fbase[2] = fbase[0];
712#endif
621 fbase[0] = ix.mobj; /* Replace function. */ 713 fbase[0] = ix.mobj; /* Replace function. */
622 functv = &ix.mobjv; 714 functv = &ix.mobjv;
623 } 715 }
624 fbase[0] = TREF_FRAME | rec_call_specialize(J, funcV(functv), fbase[0]); 716 kfunc = rec_call_specialize(J, funcV(functv), fbase[0]);
717#if LJ_FR2
718 fbase[0] = kfunc;
719 fbase[1] = TREF_FRAME;
720#else
721 fbase[0] = kfunc | TREF_FRAME;
722#endif
625 J->maxslot = (BCReg)nargs; 723 J->maxslot = (BCReg)nargs;
626} 724}
627 725
@@ -631,8 +729,8 @@ void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs)
631 rec_call_setup(J, func, nargs); 729 rec_call_setup(J, func, nargs);
632 /* Bump frame. */ 730 /* Bump frame. */
633 J->framedepth++; 731 J->framedepth++;
634 J->base += func+1; 732 J->base += func+1+LJ_FR2;
635 J->baseslot += func+1; 733 J->baseslot += func+1+LJ_FR2;
636 if (J->baseslot + J->maxslot >= LJ_MAX_JSLOTS) 734 if (J->baseslot + J->maxslot >= LJ_MAX_JSLOTS)
637 lj_trace_err(J, LJ_TRERR_STACKOV); 735 lj_trace_err(J, LJ_TRERR_STACKOV);
638} 736}
@@ -650,7 +748,9 @@ void lj_record_tailcall(jit_State *J, BCReg func, ptrdiff_t nargs)
650 func += cbase; 748 func += cbase;
651 } 749 }
652 /* Move func + args down. */ 750 /* Move func + args down. */
653 memmove(&J->base[-1], &J->base[func], sizeof(TRef)*(J->maxslot+1)); 751 if (LJ_FR2 && J->baseslot == 2)
752 J->base[func+1] = TREF_FRAME;
753 memmove(&J->base[-1-LJ_FR2], &J->base[func], sizeof(TRef)*(J->maxslot+1+LJ_FR2));
654 /* Note: the new TREF_FRAME is now at J->base[-1] (even for slot #0). */ 754 /* Note: the new TREF_FRAME is now at J->base[-1] (even for slot #0). */
655 /* Tailcalls can form a loop, so count towards the loop unroll limit. */ 755 /* Tailcalls can form a loop, so count towards the loop unroll limit. */
656 if (++J->tailcalled > J->loopunroll) 756 if (++J->tailcalled > J->loopunroll)
@@ -680,6 +780,8 @@ static int check_downrec_unroll(jit_State *J, GCproto *pt)
680 return 0; 780 return 0;
681} 781}
682 782
783static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot);
784
683/* Record return. */ 785/* Record return. */
684void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) 786void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
685{ 787{
@@ -691,7 +793,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
691 BCReg cbase = (BCReg)frame_delta(frame); 793 BCReg cbase = (BCReg)frame_delta(frame);
692 if (--J->framedepth <= 0) 794 if (--J->framedepth <= 0)
693 lj_trace_err(J, LJ_TRERR_NYIRETL); 795 lj_trace_err(J, LJ_TRERR_NYIRETL);
694 lua_assert(J->baseslot > 1); 796 lua_assert(J->baseslot > 1+LJ_FR2);
695 gotresults++; 797 gotresults++;
696 rbase += cbase; 798 rbase += cbase;
697 J->baseslot -= (BCReg)cbase; 799 J->baseslot -= (BCReg)cbase;
@@ -702,19 +804,20 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
702 /* Return to lower frame via interpreter for unhandled cases. */ 804 /* Return to lower frame via interpreter for unhandled cases. */
703 if (J->framedepth == 0 && J->pt && bc_isret(bc_op(*J->pc)) && 805 if (J->framedepth == 0 && J->pt && bc_isret(bc_op(*J->pc)) &&
704 (!frame_islua(frame) || 806 (!frame_islua(frame) ||
705 (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))))) { 807 (J->parent == 0 && J->exitno == 0 &&
808 !bc_isret(bc_op(J->cur.startins))))) {
706 /* NYI: specialize to frame type and return directly, not via RET*. */ 809 /* NYI: specialize to frame type and return directly, not via RET*. */
707 for (i = 0; i < (ptrdiff_t)rbase; i++) 810 for (i = 0; i < (ptrdiff_t)rbase; i++)
708 J->base[i] = 0; /* Purge dead slots. */ 811 J->base[i] = 0; /* Purge dead slots. */
709 J->maxslot = rbase + (BCReg)gotresults; 812 J->maxslot = rbase + (BCReg)gotresults;
710 rec_stop(J, LJ_TRLINK_RETURN, 0); /* Return to interpreter. */ 813 lj_record_stop(J, LJ_TRLINK_RETURN, 0); /* Return to interpreter. */
711 return; 814 return;
712 } 815 }
713 if (frame_isvarg(frame)) { 816 if (frame_isvarg(frame)) {
714 BCReg cbase = (BCReg)frame_delta(frame); 817 BCReg cbase = (BCReg)frame_delta(frame);
715 if (--J->framedepth < 0) /* NYI: return of vararg func to lower frame. */ 818 if (--J->framedepth < 0) /* NYI: return of vararg func to lower frame. */
716 lj_trace_err(J, LJ_TRERR_NYIRETL); 819 lj_trace_err(J, LJ_TRERR_NYIRETL);
717 lua_assert(J->baseslot > 1); 820 lua_assert(J->baseslot > 1+LJ_FR2);
718 rbase += cbase; 821 rbase += cbase;
719 J->baseslot -= (BCReg)cbase; 822 J->baseslot -= (BCReg)cbase;
720 J->base -= cbase; 823 J->base -= cbase;
@@ -724,27 +827,28 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
724 BCIns callins = *(frame_pc(frame)-1); 827 BCIns callins = *(frame_pc(frame)-1);
725 ptrdiff_t nresults = bc_b(callins) ? (ptrdiff_t)bc_b(callins)-1 :gotresults; 828 ptrdiff_t nresults = bc_b(callins) ? (ptrdiff_t)bc_b(callins)-1 :gotresults;
726 BCReg cbase = bc_a(callins); 829 BCReg cbase = bc_a(callins);
727 GCproto *pt = funcproto(frame_func(frame - (cbase+1))); 830 GCproto *pt = funcproto(frame_func(frame - (cbase+1+LJ_FR2)));
728 if ((pt->flags & PROTO_NOJIT)) 831 if ((pt->flags & PROTO_NOJIT))
729 lj_trace_err(J, LJ_TRERR_CJITOFF); 832 lj_trace_err(J, LJ_TRERR_CJITOFF);
730 if (J->framedepth == 0 && J->pt && frame == J->L->base - 1) { 833 if (J->framedepth == 0 && J->pt && frame == J->L->base - 1) {
731 if (check_downrec_unroll(J, pt)) { 834 if (check_downrec_unroll(J, pt)) {
732 J->maxslot = (BCReg)(rbase + gotresults); 835 J->maxslot = (BCReg)(rbase + gotresults);
733 lj_snap_purge(J); 836 lj_snap_purge(J);
734 rec_stop(J, LJ_TRLINK_DOWNREC, J->cur.traceno); /* Down-recursion. */ 837 lj_record_stop(J, LJ_TRLINK_DOWNREC, J->cur.traceno); /* Down-rec. */
735 return; 838 return;
736 } 839 }
737 lj_snap_add(J); 840 lj_snap_add(J);
738 } 841 }
739 for (i = 0; i < nresults; i++) /* Adjust results. */ 842 for (i = 0; i < nresults; i++) /* Adjust results. */
740 J->base[i-1] = i < gotresults ? J->base[rbase+i] : TREF_NIL; 843 J->base[i-1-LJ_FR2] = i < gotresults ? J->base[rbase+i] : TREF_NIL;
741 J->maxslot = cbase+(BCReg)nresults; 844 J->maxslot = cbase+(BCReg)nresults;
742 if (J->framedepth > 0) { /* Return to a frame that is part of the trace. */ 845 if (J->framedepth > 0) { /* Return to a frame that is part of the trace. */
743 J->framedepth--; 846 J->framedepth--;
744 lua_assert(J->baseslot > cbase+1); 847 lua_assert(J->baseslot > cbase+1+LJ_FR2);
745 J->baseslot -= cbase+1; 848 J->baseslot -= cbase+1+LJ_FR2;
746 J->base -= cbase+1; 849 J->base -= cbase+1+LJ_FR2;
747 } else if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) { 850 } else if (J->parent == 0 && J->exitno == 0 &&
851 !bc_isret(bc_op(J->cur.startins))) {
748 /* Return to lower frame would leave the loop in a root trace. */ 852 /* Return to lower frame would leave the loop in a root trace. */
749 lj_trace_err(J, LJ_TRERR_LLEAVE); 853 lj_trace_err(J, LJ_TRERR_LLEAVE);
750 } else if (J->needsnap) { /* Tailcalled to ff with side-effects. */ 854 } else if (J->needsnap) { /* Tailcalled to ff with side-effects. */
@@ -752,13 +856,13 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
752 } else { /* Return to lower frame. Guard for the target we return to. */ 856 } else { /* Return to lower frame. Guard for the target we return to. */
753 TRef trpt = lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); 857 TRef trpt = lj_ir_kgc(J, obj2gco(pt), IRT_PROTO);
754 TRef trpc = lj_ir_kptr(J, (void *)frame_pc(frame)); 858 TRef trpc = lj_ir_kptr(J, (void *)frame_pc(frame));
755 emitir(IRTG(IR_RETF, IRT_P32), trpt, trpc); 859 emitir(IRTG(IR_RETF, IRT_PGC), trpt, trpc);
756 J->retdepth++; 860 J->retdepth++;
757 J->needsnap = 1; 861 J->needsnap = 1;
758 lua_assert(J->baseslot == 1); 862 lua_assert(J->baseslot == 1+LJ_FR2);
759 /* Shift result slots up and clear the slots of the new frame below. */ 863 /* Shift result slots up and clear the slots of the new frame below. */
760 memmove(J->base + cbase, J->base-1, sizeof(TRef)*nresults); 864 memmove(J->base + cbase, J->base-1-LJ_FR2, sizeof(TRef)*nresults);
761 memset(J->base-1, 0, sizeof(TRef)*(cbase+1)); 865 memset(J->base-1-LJ_FR2, 0, sizeof(TRef)*(cbase+1+LJ_FR2));
762 } 866 }
763 } else if (frame_iscont(frame)) { /* Return to continuation frame. */ 867 } else if (frame_iscont(frame)) { /* Return to continuation frame. */
764 ASMFunction cont = frame_contf(frame); 868 ASMFunction cont = frame_contf(frame);
@@ -767,16 +871,40 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
767 lj_trace_err(J, LJ_TRERR_NYIRETL); 871 lj_trace_err(J, LJ_TRERR_NYIRETL);
768 J->baseslot -= (BCReg)cbase; 872 J->baseslot -= (BCReg)cbase;
769 J->base -= cbase; 873 J->base -= cbase;
770 J->maxslot = cbase-2; 874 J->maxslot = cbase-(2<<LJ_FR2);
771 if (cont == lj_cont_ra) { 875 if (cont == lj_cont_ra) {
772 /* Copy result to destination slot. */ 876 /* Copy result to destination slot. */
773 BCReg dst = bc_a(*(frame_contpc(frame)-1)); 877 BCReg dst = bc_a(*(frame_contpc(frame)-1));
774 J->base[dst] = gotresults ? J->base[cbase+rbase] : TREF_NIL; 878 J->base[dst] = gotresults ? J->base[cbase+rbase] : TREF_NIL;
775 if (dst >= J->maxslot) J->maxslot = dst+1; 879 if (dst >= J->maxslot) {
880 J->maxslot = dst+1;
881 }
776 } else if (cont == lj_cont_nop) { 882 } else if (cont == lj_cont_nop) {
777 /* Nothing to do here. */ 883 /* Nothing to do here. */
778 } else if (cont == lj_cont_cat) { 884 } else if (cont == lj_cont_cat) {
779 lua_assert(0); 885 BCReg bslot = bc_b(*(frame_contpc(frame)-1));
886 TRef tr = gotresults ? J->base[cbase+rbase] : TREF_NIL;
887 if (bslot != J->maxslot) { /* Concatenate the remainder. */
888 TValue *b = J->L->base, save; /* Simulate lower frame and result. */
889 J->base[J->maxslot] = tr;
890 copyTV(J->L, &save, b-(2<<LJ_FR2));
891 if (gotresults)
892 copyTV(J->L, b-(2<<LJ_FR2), b+rbase);
893 else
894 setnilV(b-(2<<LJ_FR2));
895 J->L->base = b - cbase;
896 tr = rec_cat(J, bslot, cbase-(2<<LJ_FR2));
897 b = J->L->base + cbase; /* Undo. */
898 J->L->base = b;
899 copyTV(J->L, b-(2<<LJ_FR2), &save);
900 }
901 if (tr) { /* Store final result. */
902 BCReg dst = bc_a(*(frame_contpc(frame)-1));
903 J->base[dst] = tr;
904 if (dst >= J->maxslot) {
905 J->maxslot = dst+1;
906 }
907 } /* Otherwise continue with another __concat call. */
780 } else { 908 } else {
781 /* Result type already specialized. */ 909 /* Result type already specialized. */
782 lua_assert(cont == lj_cont_condf || cont == lj_cont_condt); 910 lua_assert(cont == lj_cont_condf || cont == lj_cont_condt);
@@ -784,7 +912,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
784 } else { 912 } else {
785 lj_trace_err(J, LJ_TRERR_NYIRETL); /* NYI: handle return to C frame. */ 913 lj_trace_err(J, LJ_TRERR_NYIRETL); /* NYI: handle return to C frame. */
786 } 914 }
787 lua_assert(J->baseslot >= 1); 915 lua_assert(J->baseslot >= 1+LJ_FR2);
788} 916}
789 917
790/* -- Metamethod handling ------------------------------------------------- */ 918/* -- Metamethod handling ------------------------------------------------- */
@@ -792,19 +920,17 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
792/* Prepare to record call to metamethod. */ 920/* Prepare to record call to metamethod. */
793static BCReg rec_mm_prep(jit_State *J, ASMFunction cont) 921static BCReg rec_mm_prep(jit_State *J, ASMFunction cont)
794{ 922{
795 BCReg s, top = curr_proto(J->L)->framesize; 923 BCReg s, top = cont == lj_cont_cat ? J->maxslot : curr_proto(J->L)->framesize;
796 TRef trcont; 924#if LJ_FR2
797 setcont(&J->L->base[top], cont); 925 J->base[top] = lj_ir_k64(J, IR_KNUM, u64ptr(contptr(cont)));
798#if LJ_64 926 J->base[top+1] = TREF_CONT;
799 trcont = lj_ir_kptr(J, (void *)((int64_t)cont - (int64_t)lj_vm_asm_begin));
800#else 927#else
801 trcont = lj_ir_kptr(J, (void *)cont); 928 J->base[top] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT;
802#endif 929#endif
803 J->base[top] = trcont | TREF_CONT;
804 J->framedepth++; 930 J->framedepth++;
805 for (s = J->maxslot; s < top; s++) 931 for (s = J->maxslot; s < top; s++)
806 J->base[s] = 0; /* Clear frame gap to avoid resurrecting previous refs. */ 932 J->base[s] = 0; /* Clear frame gap to avoid resurrecting previous refs. */
807 return top+1; 933 return top+1+LJ_FR2;
808} 934}
809 935
810/* Record metamethod lookup. */ 936/* Record metamethod lookup. */
@@ -823,7 +949,7 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm)
823 cTValue *mo; 949 cTValue *mo;
824 if (LJ_HASFFI && udtype == UDTYPE_FFI_CLIB) { 950 if (LJ_HASFFI && udtype == UDTYPE_FFI_CLIB) {
825 /* Specialize to the C library namespace object. */ 951 /* Specialize to the C library namespace object. */
826 emitir(IRTG(IR_EQ, IRT_P32), ix->tab, lj_ir_kptr(J, udataV(&ix->tabv))); 952 emitir(IRTG(IR_EQ, IRT_PGC), ix->tab, lj_ir_kptr(J, udataV(&ix->tabv)));
827 } else { 953 } else {
828 /* Specialize to the type of userdata. */ 954 /* Specialize to the type of userdata. */
829 TRef tr = emitir(IRT(IR_FLOAD, IRT_U8), ix->tab, IRFL_UDATA_UDTYPE); 955 TRef tr = emitir(IRT(IR_FLOAD, IRT_U8), ix->tab, IRFL_UDATA_UDTYPE);
@@ -852,7 +978,13 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm)
852 } 978 }
853 /* The cdata metatable is treated as immutable. */ 979 /* The cdata metatable is treated as immutable. */
854 if (LJ_HASFFI && tref_iscdata(ix->tab)) goto immutable_mt; 980 if (LJ_HASFFI && tref_iscdata(ix->tab)) goto immutable_mt;
981#if LJ_GC64
982 /* TODO: fix ARM32 asm_fload(), so we can use this for all archs. */
983 ix->mt = mix.tab = lj_ir_ggfload(J, IRT_TAB,
984 GG_OFS(g.gcroot[GCROOT_BASEMT+itypemap(&ix->tabv)]));
985#else
855 ix->mt = mix.tab = lj_ir_ktab(J, mt); 986 ix->mt = mix.tab = lj_ir_ktab(J, mt);
987#endif
856 goto nocheck; 988 goto nocheck;
857 } 989 }
858 ix->mt = mt ? mix.tab : TREF_NIL; 990 ix->mt = mt ? mix.tab : TREF_NIL;
@@ -879,12 +1011,12 @@ nocheck:
879static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm) 1011static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm)
880{ 1012{
881 /* Set up metamethod call first to save ix->tab and ix->tabv. */ 1013 /* Set up metamethod call first to save ix->tab and ix->tabv. */
882 BCReg func = rec_mm_prep(J, lj_cont_ra); 1014 BCReg func = rec_mm_prep(J, mm == MM_concat ? lj_cont_cat : lj_cont_ra);
883 TRef *base = J->base + func; 1015 TRef *base = J->base + func;
884 TValue *basev = J->L->base + func; 1016 TValue *basev = J->L->base + func;
885 base[1] = ix->tab; base[2] = ix->key; 1017 base[1+LJ_FR2] = ix->tab; base[2+LJ_FR2] = ix->key;
886 copyTV(J->L, basev+1, &ix->tabv); 1018 copyTV(J->L, basev+1+LJ_FR2, &ix->tabv);
887 copyTV(J->L, basev+2, &ix->keyv); 1019 copyTV(J->L, basev+2+LJ_FR2, &ix->keyv);
888 if (!lj_record_mm_lookup(J, ix, mm)) { /* Lookup mm on 1st operand. */ 1020 if (!lj_record_mm_lookup(J, ix, mm)) { /* Lookup mm on 1st operand. */
889 if (mm != MM_unm) { 1021 if (mm != MM_unm) {
890 ix->tab = ix->key; 1022 ix->tab = ix->key;
@@ -896,6 +1028,9 @@ static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm)
896 } 1028 }
897ok: 1029ok:
898 base[0] = ix->mobj; 1030 base[0] = ix->mobj;
1031#if LJ_FR2
1032 base[1] = 0;
1033#endif
899 copyTV(J->L, basev+0, &ix->mobjv); 1034 copyTV(J->L, basev+0, &ix->mobjv);
900 lj_record_call(J, func, 2); 1035 lj_record_call(J, func, 2);
901 return 0; /* No result yet. */ 1036 return 0; /* No result yet. */
@@ -912,6 +1047,8 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv)
912 TRef *base = J->base + func; 1047 TRef *base = J->base + func;
913 TValue *basev = J->L->base + func; 1048 TValue *basev = J->L->base + func;
914 base[0] = ix.mobj; copyTV(J->L, basev+0, &ix.mobjv); 1049 base[0] = ix.mobj; copyTV(J->L, basev+0, &ix.mobjv);
1050 base += LJ_FR2;
1051 basev += LJ_FR2;
915 base[1] = tr; copyTV(J->L, basev+1, tv); 1052 base[1] = tr; copyTV(J->L, basev+1, tv);
916#if LJ_52 1053#if LJ_52
917 base[2] = tr; copyTV(J->L, basev+2, tv); 1054 base[2] = tr; copyTV(J->L, basev+2, tv);
@@ -931,10 +1068,10 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv)
931static void rec_mm_callcomp(jit_State *J, RecordIndex *ix, int op) 1068static void rec_mm_callcomp(jit_State *J, RecordIndex *ix, int op)
932{ 1069{
933 BCReg func = rec_mm_prep(J, (op&1) ? lj_cont_condf : lj_cont_condt); 1070 BCReg func = rec_mm_prep(J, (op&1) ? lj_cont_condf : lj_cont_condt);
934 TRef *base = J->base + func; 1071 TRef *base = J->base + func + LJ_FR2;
935 TValue *tv = J->L->base + func; 1072 TValue *tv = J->L->base + func + LJ_FR2;
936 base[0] = ix->mobj; base[1] = ix->val; base[2] = ix->key; 1073 base[-LJ_FR2] = ix->mobj; base[1] = ix->val; base[2] = ix->key;
937 copyTV(J->L, tv+0, &ix->mobjv); 1074 copyTV(J->L, tv-LJ_FR2, &ix->mobjv);
938 copyTV(J->L, tv+1, &ix->valv); 1075 copyTV(J->L, tv+1, &ix->valv);
939 copyTV(J->L, tv+2, &ix->keyv); 1076 copyTV(J->L, tv+2, &ix->keyv);
940 lj_record_call(J, func, 2); 1077 lj_record_call(J, func, 2);
@@ -1041,6 +1178,72 @@ static void rec_mm_comp_cdata(jit_State *J, RecordIndex *ix, int op, MMS mm)
1041 1178
1042/* -- Indexed access ------------------------------------------------------ */ 1179/* -- Indexed access ------------------------------------------------------ */
1043 1180
1181#ifdef LUAJIT_ENABLE_TABLE_BUMP
1182/* Bump table allocations in bytecode when they grow during recording. */
1183static void rec_idx_bump(jit_State *J, RecordIndex *ix)
1184{
1185 RBCHashEntry *rbc = &J->rbchash[(ix->tab & (RBCHASH_SLOTS-1))];
1186 if (tref_ref(ix->tab) == rbc->ref) {
1187 const BCIns *pc = mref(rbc->pc, const BCIns);
1188 GCtab *tb = tabV(&ix->tabv);
1189 uint32_t nhbits;
1190 IRIns *ir;
1191 if (!tvisnil(&ix->keyv))
1192 (void)lj_tab_set(J->L, tb, &ix->keyv); /* Grow table right now. */
1193 nhbits = tb->hmask > 0 ? lj_fls(tb->hmask)+1 : 0;
1194 ir = IR(tref_ref(ix->tab));
1195 if (ir->o == IR_TNEW) {
1196 uint32_t ah = bc_d(*pc);
1197 uint32_t asize = ah & 0x7ff, hbits = ah >> 11;
1198 if (nhbits > hbits) hbits = nhbits;
1199 if (tb->asize > asize) {
1200 asize = tb->asize <= 0x7ff ? tb->asize : 0x7ff;
1201 }
1202 if ((asize | (hbits<<11)) != ah) { /* Has the size changed? */
1203 /* Patch bytecode, but continue recording (for more patching). */
1204 setbc_d(pc, (asize | (hbits<<11)));
1205 /* Patching TNEW operands is only safe if the trace is aborted. */
1206 ir->op1 = asize; ir->op2 = hbits;
1207 J->retryrec = 1; /* Abort the trace at the end of recording. */
1208 }
1209 } else if (ir->o == IR_TDUP) {
1210 GCtab *tpl = gco2tab(proto_kgc(&gcref(rbc->pt)->pt, ~(ptrdiff_t)bc_d(*pc)));
1211 /* Grow template table, but preserve keys with nil values. */
1212 if ((tb->asize > tpl->asize && (1u << nhbits)-1 == tpl->hmask) ||
1213 (tb->asize == tpl->asize && (1u << nhbits)-1 > tpl->hmask)) {
1214 Node *node = noderef(tpl->node);
1215 uint32_t i, hmask = tpl->hmask, asize;
1216 TValue *array;
1217 for (i = 0; i <= hmask; i++) {
1218 if (!tvisnil(&node[i].key) && tvisnil(&node[i].val))
1219 settabV(J->L, &node[i].val, tpl);
1220 }
1221 if (!tvisnil(&ix->keyv) && tref_isk(ix->key)) {
1222 TValue *o = lj_tab_set(J->L, tpl, &ix->keyv);
1223 if (tvisnil(o)) settabV(J->L, o, tpl);
1224 }
1225 lj_tab_resize(J->L, tpl, tb->asize, nhbits);
1226 node = noderef(tpl->node);
1227 hmask = tpl->hmask;
1228 for (i = 0; i <= hmask; i++) {
1229 /* This is safe, since template tables only hold immutable values. */
1230 if (tvistab(&node[i].val))
1231 setnilV(&node[i].val);
1232 }
1233 /* The shape of the table may have changed. Clean up array part, too. */
1234 asize = tpl->asize;
1235 array = tvref(tpl->array);
1236 for (i = 0; i < asize; i++) {
1237 if (tvistab(&array[i]))
1238 setnilV(&array[i]);
1239 }
1240 J->retryrec = 1; /* Abort the trace at the end of recording. */
1241 }
1242 }
1243 }
1244}
1245#endif
1246
1044/* Record bounds-check. */ 1247/* Record bounds-check. */
1045static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize) 1248static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize)
1046{ 1249{
@@ -1080,11 +1283,14 @@ static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize)
1080} 1283}
1081 1284
1082/* Record indexed key lookup. */ 1285/* Record indexed key lookup. */
1083static TRef rec_idx_key(jit_State *J, RecordIndex *ix) 1286static TRef rec_idx_key(jit_State *J, RecordIndex *ix, IRRef *rbref,
1287 IRType1 *rbguard)
1084{ 1288{
1085 TRef key; 1289 TRef key;
1086 GCtab *t = tabV(&ix->tabv); 1290 GCtab *t = tabV(&ix->tabv);
1087 ix->oldv = lj_tab_get(J->L, t, &ix->keyv); /* Lookup previous value. */ 1291 ix->oldv = lj_tab_get(J->L, t, &ix->keyv); /* Lookup previous value. */
1292 *rbref = 0;
1293 rbguard->irt = 0;
1088 1294
1089 /* Integer keys are looked up in the array part first. */ 1295 /* Integer keys are looked up in the array part first. */
1090 key = ix->key; 1296 key = ix->key;
@@ -1098,8 +1304,8 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix)
1098 if ((MSize)k < t->asize) { /* Currently an array key? */ 1304 if ((MSize)k < t->asize) { /* Currently an array key? */
1099 TRef arrayref; 1305 TRef arrayref;
1100 rec_idx_abc(J, asizeref, ikey, t->asize); 1306 rec_idx_abc(J, asizeref, ikey, t->asize);
1101 arrayref = emitir(IRT(IR_FLOAD, IRT_P32), ix->tab, IRFL_TAB_ARRAY); 1307 arrayref = emitir(IRT(IR_FLOAD, IRT_PGC), ix->tab, IRFL_TAB_ARRAY);
1102 return emitir(IRT(IR_AREF, IRT_P32), arrayref, ikey); 1308 return emitir(IRT(IR_AREF, IRT_PGC), arrayref, ikey);
1103 } else { /* Currently not in array (may be an array extension)? */ 1309 } else { /* Currently not in array (may be an array extension)? */
1104 emitir(IRTGI(IR_ULE), asizeref, ikey); /* Inv. bounds check. */ 1310 emitir(IRTGI(IR_ULE), asizeref, ikey); /* Inv. bounds check. */
1105 if (k == 0 && tref_isk(key)) 1311 if (k == 0 && tref_isk(key))
@@ -1134,16 +1340,18 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix)
1134 MSize hslot = (MSize)((char *)ix->oldv - (char *)&noderef(t->node)[0].val); 1340 MSize hslot = (MSize)((char *)ix->oldv - (char *)&noderef(t->node)[0].val);
1135 if (t->hmask > 0 && hslot <= t->hmask*(MSize)sizeof(Node) && 1341 if (t->hmask > 0 && hslot <= t->hmask*(MSize)sizeof(Node) &&
1136 hslot <= 65535*(MSize)sizeof(Node)) { 1342 hslot <= 65535*(MSize)sizeof(Node)) {
1137 TRef node, kslot; 1343 TRef node, kslot, hm;
1138 TRef hm = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_HMASK); 1344 *rbref = J->cur.nins; /* Mark possible rollback point. */
1345 *rbguard = J->guardemit;
1346 hm = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_HMASK);
1139 emitir(IRTGI(IR_EQ), hm, lj_ir_kint(J, (int32_t)t->hmask)); 1347 emitir(IRTGI(IR_EQ), hm, lj_ir_kint(J, (int32_t)t->hmask));
1140 node = emitir(IRT(IR_FLOAD, IRT_P32), ix->tab, IRFL_TAB_NODE); 1348 node = emitir(IRT(IR_FLOAD, IRT_PGC), ix->tab, IRFL_TAB_NODE);
1141 kslot = lj_ir_kslot(J, key, hslot / sizeof(Node)); 1349 kslot = lj_ir_kslot(J, key, hslot / sizeof(Node));
1142 return emitir(IRTG(IR_HREFK, IRT_P32), node, kslot); 1350 return emitir(IRTG(IR_HREFK, IRT_PGC), node, kslot);
1143 } 1351 }
1144 } 1352 }
1145 /* Fall back to a regular hash lookup. */ 1353 /* Fall back to a regular hash lookup. */
1146 return emitir(IRT(IR_HREF, IRT_P32), ix->tab, key); 1354 return emitir(IRT(IR_HREF, IRT_PGC), ix->tab, key);
1147} 1355}
1148 1356
1149/* Determine whether a key is NOT one of the fast metamethod names. */ 1357/* Determine whether a key is NOT one of the fast metamethod names. */
@@ -1168,6 +1376,8 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
1168{ 1376{
1169 TRef xref; 1377 TRef xref;
1170 IROp xrefop, loadop; 1378 IROp xrefop, loadop;
1379 IRRef rbref;
1380 IRType1 rbguard;
1171 cTValue *oldv; 1381 cTValue *oldv;
1172 1382
1173 while (!tref_istab(ix->tab)) { /* Handle non-table lookup. */ 1383 while (!tref_istab(ix->tab)) { /* Handle non-table lookup. */
@@ -1178,10 +1388,10 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
1178 handlemm: 1388 handlemm:
1179 if (tref_isfunc(ix->mobj)) { /* Handle metamethod call. */ 1389 if (tref_isfunc(ix->mobj)) { /* Handle metamethod call. */
1180 BCReg func = rec_mm_prep(J, ix->val ? lj_cont_nop : lj_cont_ra); 1390 BCReg func = rec_mm_prep(J, ix->val ? lj_cont_nop : lj_cont_ra);
1181 TRef *base = J->base + func; 1391 TRef *base = J->base + func + LJ_FR2;
1182 TValue *tv = J->L->base + func; 1392 TValue *tv = J->L->base + func + LJ_FR2;
1183 base[0] = ix->mobj; base[1] = ix->tab; base[2] = ix->key; 1393 base[-LJ_FR2] = ix->mobj; base[1] = ix->tab; base[2] = ix->key;
1184 setfuncV(J->L, tv+0, funcV(&ix->mobjv)); 1394 setfuncV(J->L, tv-LJ_FR2, funcV(&ix->mobjv));
1185 copyTV(J->L, tv+1, &ix->tabv); 1395 copyTV(J->L, tv+1, &ix->tabv);
1186 copyTV(J->L, tv+2, &ix->keyv); 1396 copyTV(J->L, tv+2, &ix->keyv);
1187 if (ix->val) { 1397 if (ix->val) {
@@ -1213,7 +1423,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
1213 } 1423 }
1214 1424
1215 /* Record the key lookup. */ 1425 /* Record the key lookup. */
1216 xref = rec_idx_key(J, ix); 1426 xref = rec_idx_key(J, ix, &rbref, &rbguard);
1217 xrefop = IR(tref_ref(xref))->o; 1427 xrefop = IR(tref_ref(xref))->o;
1218 loadop = xrefop == IR_AREF ? IR_ALOAD : IR_HLOAD; 1428 loadop = xrefop == IR_AREF ? IR_ALOAD : IR_HLOAD;
1219 /* The lj_meta_tset() inconsistency is gone, but better play safe. */ 1429 /* The lj_meta_tset() inconsistency is gone, but better play safe. */
@@ -1223,11 +1433,15 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
1223 IRType t = itype2irt(oldv); 1433 IRType t = itype2irt(oldv);
1224 TRef res; 1434 TRef res;
1225 if (oldv == niltvg(J2G(J))) { 1435 if (oldv == niltvg(J2G(J))) {
1226 emitir(IRTG(IR_EQ, IRT_P32), xref, lj_ir_kkptr(J, niltvg(J2G(J)))); 1436 emitir(IRTG(IR_EQ, IRT_PGC), xref, lj_ir_kkptr(J, niltvg(J2G(J))));
1227 res = TREF_NIL; 1437 res = TREF_NIL;
1228 } else { 1438 } else {
1229 res = emitir(IRTG(loadop, t), xref, 0); 1439 res = emitir(IRTG(loadop, t), xref, 0);
1230 } 1440 }
1441 if (tref_ref(res) < rbref) { /* HREFK + load forwarded? */
1442 lj_ir_rollback(J, rbref); /* Rollback to eliminate hmask guard. */
1443 J->guardemit = rbguard;
1444 }
1231 if (t == IRT_NIL && ix->idxchain && lj_record_mm_lookup(J, ix, MM_index)) 1445 if (t == IRT_NIL && ix->idxchain && lj_record_mm_lookup(J, ix, MM_index))
1232 goto handlemm; 1446 goto handlemm;
1233 if (irtype_ispri(t)) res = TREF_PRI(t); /* Canonicalize primitives. */ 1447 if (irtype_ispri(t)) res = TREF_PRI(t); /* Canonicalize primitives. */
@@ -1235,6 +1449,10 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
1235 } else { /* Indexed store. */ 1449 } else { /* Indexed store. */
1236 GCtab *mt = tabref(tabV(&ix->tabv)->metatable); 1450 GCtab *mt = tabref(tabV(&ix->tabv)->metatable);
1237 int keybarrier = tref_isgcv(ix->key) && !tref_isnil(ix->val); 1451 int keybarrier = tref_isgcv(ix->key) && !tref_isnil(ix->val);
1452 if (tref_ref(xref) < rbref) { /* HREFK forwarded? */
1453 lj_ir_rollback(J, rbref); /* Rollback to eliminate hmask guard. */
1454 J->guardemit = rbguard;
1455 }
1238 if (tvisnil(oldv)) { /* Previous value was nil? */ 1456 if (tvisnil(oldv)) { /* Previous value was nil? */
1239 /* Need to duplicate the hasmm check for the early guards. */ 1457 /* Need to duplicate the hasmm check for the early guards. */
1240 int hasmm = 0; 1458 int hasmm = 0;
@@ -1245,7 +1463,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
1245 if (hasmm) 1463 if (hasmm)
1246 emitir(IRTG(loadop, IRT_NIL), xref, 0); /* Guard for nil value. */ 1464 emitir(IRTG(loadop, IRT_NIL), xref, 0); /* Guard for nil value. */
1247 else if (xrefop == IR_HREF) 1465 else if (xrefop == IR_HREF)
1248 emitir(IRTG(oldv == niltvg(J2G(J)) ? IR_EQ : IR_NE, IRT_P32), 1466 emitir(IRTG(oldv == niltvg(J2G(J)) ? IR_EQ : IR_NE, IRT_PGC),
1249 xref, lj_ir_kkptr(J, niltvg(J2G(J)))); 1467 xref, lj_ir_kkptr(J, niltvg(J2G(J))));
1250 if (ix->idxchain && lj_record_mm_lookup(J, ix, MM_newindex)) { 1468 if (ix->idxchain && lj_record_mm_lookup(J, ix, MM_newindex)) {
1251 lua_assert(hasmm); 1469 lua_assert(hasmm);
@@ -1256,13 +1474,17 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
1256 TRef key = ix->key; 1474 TRef key = ix->key;
1257 if (tref_isinteger(key)) /* NEWREF needs a TValue as a key. */ 1475 if (tref_isinteger(key)) /* NEWREF needs a TValue as a key. */
1258 key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT); 1476 key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT);
1259 xref = emitir(IRT(IR_NEWREF, IRT_P32), ix->tab, key); 1477 xref = emitir(IRT(IR_NEWREF, IRT_PGC), ix->tab, key);
1260 keybarrier = 0; /* NEWREF already takes care of the key barrier. */ 1478 keybarrier = 0; /* NEWREF already takes care of the key barrier. */
1479#ifdef LUAJIT_ENABLE_TABLE_BUMP
1480 if ((J->flags & JIT_F_OPT_SINK)) /* Avoid a separate flag. */
1481 rec_idx_bump(J, ix);
1482#endif
1261 } 1483 }
1262 } else if (!lj_opt_fwd_wasnonnil(J, loadop, tref_ref(xref))) { 1484 } else if (!lj_opt_fwd_wasnonnil(J, loadop, tref_ref(xref))) {
1263 /* Cannot derive that the previous value was non-nil, must do checks. */ 1485 /* Cannot derive that the previous value was non-nil, must do checks. */
1264 if (xrefop == IR_HREF) /* Guard against store to niltv. */ 1486 if (xrefop == IR_HREF) /* Guard against store to niltv. */
1265 emitir(IRTG(IR_NE, IRT_P32), xref, lj_ir_kkptr(J, niltvg(J2G(J)))); 1487 emitir(IRTG(IR_NE, IRT_PGC), xref, lj_ir_kkptr(J, niltvg(J2G(J))));
1266 if (ix->idxchain) { /* Metamethod lookup required? */ 1488 if (ix->idxchain) { /* Metamethod lookup required? */
1267 /* A check for NULL metatable is cheaper (hoistable) than a load. */ 1489 /* A check for NULL metatable is cheaper (hoistable) than a load. */
1268 if (!mt) { 1490 if (!mt) {
@@ -1284,7 +1506,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
1284 emitir(IRT(IR_TBAR, IRT_NIL), ix->tab, 0); 1506 emitir(IRT(IR_TBAR, IRT_NIL), ix->tab, 0);
1285 /* Invalidate neg. metamethod cache for stores with certain string keys. */ 1507 /* Invalidate neg. metamethod cache for stores with certain string keys. */
1286 if (!nommstr(J, ix->key)) { 1508 if (!nommstr(J, ix->key)) {
1287 TRef fref = emitir(IRT(IR_FREF, IRT_P32), ix->tab, IRFL_TAB_NOMM); 1509 TRef fref = emitir(IRT(IR_FREF, IRT_PGC), ix->tab, IRFL_TAB_NOMM);
1288 emitir(IRT(IR_FSTORE, IRT_U8), fref, lj_ir_kint(J, 0)); 1510 emitir(IRT(IR_FSTORE, IRT_U8), fref, lj_ir_kint(J, 0));
1289 } 1511 }
1290 J->needsnap = 1; 1512 J->needsnap = 1;
@@ -1292,6 +1514,31 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
1292 } 1514 }
1293} 1515}
1294 1516
1517static void rec_tsetm(jit_State *J, BCReg ra, BCReg rn, int32_t i)
1518{
1519 RecordIndex ix;
1520 cTValue *basev = J->L->base;
1521 GCtab *t = tabV(&basev[ra-1]);
1522 settabV(J->L, &ix.tabv, t);
1523 ix.tab = getslot(J, ra-1);
1524 ix.idxchain = 0;
1525#ifdef LUAJIT_ENABLE_TABLE_BUMP
1526 if ((J->flags & JIT_F_OPT_SINK)) {
1527 if (t->asize < i+rn-ra)
1528 lj_tab_reasize(J->L, t, i+rn-ra);
1529 setnilV(&ix.keyv);
1530 rec_idx_bump(J, &ix);
1531 }
1532#endif
1533 for (; ra < rn; i++, ra++) {
1534 setintV(&ix.keyv, i);
1535 ix.key = lj_ir_kint(J, i);
1536 copyTV(J->L, &ix.valv, &basev[ra]);
1537 ix.val = getslot(J, ra);
1538 lj_record_idx(J, &ix);
1539 }
1540}
1541
1295/* -- Upvalue access ------------------------------------------------------ */ 1542/* -- Upvalue access ------------------------------------------------------ */
1296 1543
1297/* Check whether upvalue is immutable and ok to constify. */ 1544/* Check whether upvalue is immutable and ok to constify. */
@@ -1334,7 +1581,11 @@ static TRef rec_upvalue(jit_State *J, uint32_t uv, TRef val)
1334 goto noconstify; 1581 goto noconstify;
1335 kfunc = lj_ir_kfunc(J, J->fn); 1582 kfunc = lj_ir_kfunc(J, J->fn);
1336 emitir(IRTG(IR_EQ, IRT_FUNC), fn, kfunc); 1583 emitir(IRTG(IR_EQ, IRT_FUNC), fn, kfunc);
1337 J->base[-1] = TREF_FRAME | kfunc; 1584#if LJ_FR2
1585 J->base[-2] = kfunc;
1586#else
1587 J->base[-1] = kfunc | TREF_FRAME;
1588#endif
1338 fn = kfunc; 1589 fn = kfunc;
1339 } 1590 }
1340 tr = lj_record_constify(J, uvval(uvp)); 1591 tr = lj_record_constify(J, uvval(uvp));
@@ -1345,16 +1596,16 @@ noconstify:
1345 /* Note: this effectively limits LJ_MAX_UPVAL to 127. */ 1596 /* Note: this effectively limits LJ_MAX_UPVAL to 127. */
1346 uv = (uv << 8) | (hashrot(uvp->dhash, uvp->dhash + HASH_BIAS) & 0xff); 1597 uv = (uv << 8) | (hashrot(uvp->dhash, uvp->dhash + HASH_BIAS) & 0xff);
1347 if (!uvp->closed) { 1598 if (!uvp->closed) {
1348 uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_P32), fn, uv)); 1599 uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_PGC), fn, uv));
1349 /* In current stack? */ 1600 /* In current stack? */
1350 if (uvval(uvp) >= tvref(J->L->stack) && 1601 if (uvval(uvp) >= tvref(J->L->stack) &&
1351 uvval(uvp) < tvref(J->L->maxstack)) { 1602 uvval(uvp) < tvref(J->L->maxstack)) {
1352 int32_t slot = (int32_t)(uvval(uvp) - (J->L->base - J->baseslot)); 1603 int32_t slot = (int32_t)(uvval(uvp) - (J->L->base - J->baseslot));
1353 if (slot >= 0) { /* Aliases an SSA slot? */ 1604 if (slot >= 0) { /* Aliases an SSA slot? */
1354 emitir(IRTG(IR_EQ, IRT_P32), 1605 emitir(IRTG(IR_EQ, IRT_PGC),
1355 REF_BASE, 1606 REF_BASE,
1356 emitir(IRT(IR_ADD, IRT_P32), uref, 1607 emitir(IRT(IR_ADD, IRT_PGC), uref,
1357 lj_ir_kint(J, (slot - 1) * -8))); 1608 lj_ir_kint(J, (slot - 1 - LJ_FR2) * -8)));
1358 slot -= (int32_t)J->baseslot; /* Note: slot number may be negative! */ 1609 slot -= (int32_t)J->baseslot; /* Note: slot number may be negative! */
1359 if (val == 0) { 1610 if (val == 0) {
1360 return getslot(J, slot); 1611 return getslot(J, slot);
@@ -1365,12 +1616,12 @@ noconstify:
1365 } 1616 }
1366 } 1617 }
1367 } 1618 }
1368 emitir(IRTG(IR_UGT, IRT_P32), 1619 emitir(IRTG(IR_UGT, IRT_PGC),
1369 emitir(IRT(IR_SUB, IRT_P32), uref, REF_BASE), 1620 emitir(IRT(IR_SUB, IRT_PGC), uref, REF_BASE),
1370 lj_ir_kint(J, (J->baseslot + J->maxslot) * 8)); 1621 lj_ir_kint(J, (J->baseslot + J->maxslot) * 8));
1371 } else { 1622 } else {
1372 needbarrier = 1; 1623 needbarrier = 1;
1373 uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_P32), fn, uv)); 1624 uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_PGC), fn, uv));
1374 } 1625 }
1375 if (val == 0) { /* Upvalue load */ 1626 if (val == 0) { /* Upvalue load */
1376 IRType t = itype2irt(uvval(uvp)); 1627 IRType t = itype2irt(uvval(uvp));
@@ -1409,9 +1660,9 @@ static void check_call_unroll(jit_State *J, TraceNo lnk)
1409 if (count + J->tailcalled > J->param[JIT_P_recunroll]) { 1660 if (count + J->tailcalled > J->param[JIT_P_recunroll]) {
1410 J->pc++; 1661 J->pc++;
1411 if (J->framedepth + J->retdepth == 0) 1662 if (J->framedepth + J->retdepth == 0)
1412 rec_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Tail-recursion. */ 1663 lj_record_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Tail-rec. */
1413 else 1664 else
1414 rec_stop(J, LJ_TRLINK_UPREC, J->cur.traceno); /* Up-recursion. */ 1665 lj_record_stop(J, LJ_TRLINK_UPREC, J->cur.traceno); /* Up-recursion. */
1415 } 1666 }
1416 } else { 1667 } else {
1417 if (count > J->param[JIT_P_callunroll]) { 1668 if (count > J->param[JIT_P_callunroll]) {
@@ -1445,11 +1696,14 @@ static void rec_func_setup(jit_State *J)
1445static void rec_func_vararg(jit_State *J) 1696static void rec_func_vararg(jit_State *J)
1446{ 1697{
1447 GCproto *pt = J->pt; 1698 GCproto *pt = J->pt;
1448 BCReg s, fixargs, vframe = J->maxslot+1; 1699 BCReg s, fixargs, vframe = J->maxslot+1+LJ_FR2;
1449 lua_assert((pt->flags & PROTO_VARARG)); 1700 lua_assert((pt->flags & PROTO_VARARG));
1450 if (J->baseslot + vframe + pt->framesize >= LJ_MAX_JSLOTS) 1701 if (J->baseslot + vframe + pt->framesize >= LJ_MAX_JSLOTS)
1451 lj_trace_err(J, LJ_TRERR_STACKOV); 1702 lj_trace_err(J, LJ_TRERR_STACKOV);
1452 J->base[vframe-1] = J->base[-1]; /* Copy function up. */ 1703 J->base[vframe-1-LJ_FR2] = J->base[-1-LJ_FR2]; /* Copy function up. */
1704#if LJ_FR2
1705 J->base[vframe-1] = TREF_FRAME;
1706#endif
1453 /* Copy fixarg slots up and set their original slots to nil. */ 1707 /* Copy fixarg slots up and set their original slots to nil. */
1454 fixargs = pt->numparams < J->maxslot ? pt->numparams : J->maxslot; 1708 fixargs = pt->numparams < J->maxslot ? pt->numparams : J->maxslot;
1455 for (s = 0; s < fixargs; s++) { 1709 for (s = 0; s < fixargs; s++) {
@@ -1485,9 +1739,9 @@ static void rec_func_jit(jit_State *J, TraceNo lnk)
1485 } 1739 }
1486 J->instunroll = 0; /* Cannot continue across a compiled function. */ 1740 J->instunroll = 0; /* Cannot continue across a compiled function. */
1487 if (J->pc == J->startpc && J->framedepth + J->retdepth == 0) 1741 if (J->pc == J->startpc && J->framedepth + J->retdepth == 0)
1488 rec_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Extra tail-recursion. */ 1742 lj_record_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Extra tail-rec. */
1489 else 1743 else
1490 rec_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the function. */ 1744 lj_record_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the function. */
1491} 1745}
1492 1746
1493/* -- Vararg handling ----------------------------------------------------- */ 1747/* -- Vararg handling ----------------------------------------------------- */
@@ -1511,8 +1765,10 @@ static int select_detect(jit_State *J)
1511static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) 1765static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
1512{ 1766{
1513 int32_t numparams = J->pt->numparams; 1767 int32_t numparams = J->pt->numparams;
1514 ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1; 1768 ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1 - LJ_FR2;
1515 lua_assert(frame_isvarg(J->L->base-1)); 1769 lua_assert(frame_isvarg(J->L->base-1));
1770 if (LJ_FR2 && dst > J->maxslot)
1771 J->base[dst-1] = 0; /* Prevent resurrection of unrelated slot. */
1516 if (J->framedepth > 0) { /* Simple case: varargs defined on-trace. */ 1772 if (J->framedepth > 0) { /* Simple case: varargs defined on-trace. */
1517 ptrdiff_t i; 1773 ptrdiff_t i;
1518 if (nvararg < 0) nvararg = 0; 1774 if (nvararg < 0) nvararg = 0;
@@ -1523,10 +1779,10 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
1523 J->maxslot = dst + (BCReg)nresults; 1779 J->maxslot = dst + (BCReg)nresults;
1524 } 1780 }
1525 for (i = 0; i < nresults; i++) 1781 for (i = 0; i < nresults; i++)
1526 J->base[dst+i] = i < nvararg ? getslot(J, i - nvararg - 1) : TREF_NIL; 1782 J->base[dst+i] = i < nvararg ? getslot(J, i - nvararg - 1 - LJ_FR2) : TREF_NIL;
1527 } else { /* Unknown number of varargs passed to trace. */ 1783 } else { /* Unknown number of varargs passed to trace. */
1528 TRef fr = emitir(IRTI(IR_SLOAD), 0, IRSLOAD_READONLY|IRSLOAD_FRAME); 1784 TRef fr = emitir(IRTI(IR_SLOAD), LJ_FR2, IRSLOAD_READONLY|IRSLOAD_FRAME);
1529 int32_t frofs = 8*(1+numparams)+FRAME_VARG; 1785 int32_t frofs = 8*(1+LJ_FR2+numparams)+FRAME_VARG;
1530 if (nresults >= 0) { /* Known fixed number of results. */ 1786 if (nresults >= 0) { /* Known fixed number of results. */
1531 ptrdiff_t i; 1787 ptrdiff_t i;
1532 if (nvararg > 0) { 1788 if (nvararg > 0) {
@@ -1535,12 +1791,13 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
1535 if (nvararg >= nresults) 1791 if (nvararg >= nresults)
1536 emitir(IRTGI(IR_GE), fr, lj_ir_kint(J, frofs+8*(int32_t)nresults)); 1792 emitir(IRTGI(IR_GE), fr, lj_ir_kint(J, frofs+8*(int32_t)nresults));
1537 else 1793 else
1538 emitir(IRTGI(IR_EQ), fr, lj_ir_kint(J, frame_ftsz(J->L->base-1))); 1794 emitir(IRTGI(IR_EQ), fr,
1539 vbase = emitir(IRTI(IR_SUB), REF_BASE, fr); 1795 lj_ir_kint(J, (int32_t)frame_ftsz(J->L->base-1)));
1540 vbase = emitir(IRT(IR_ADD, IRT_P32), vbase, lj_ir_kint(J, frofs-8)); 1796 vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr);
1797 vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, lj_ir_kint(J, frofs-8));
1541 for (i = 0; i < nload; i++) { 1798 for (i = 0; i < nload; i++) {
1542 IRType t = itype2irt(&J->L->base[i-1-nvararg]); 1799 IRType t = itype2irt(&J->L->base[i-1-LJ_FR2-nvararg]);
1543 TRef aref = emitir(IRT(IR_AREF, IRT_P32), 1800 TRef aref = emitir(IRT(IR_AREF, IRT_PGC),
1544 vbase, lj_ir_kint(J, (int32_t)i)); 1801 vbase, lj_ir_kint(J, (int32_t)i));
1545 TRef tr = emitir(IRTG(IR_VLOAD, t), aref, 0); 1802 TRef tr = emitir(IRTG(IR_VLOAD, t), aref, 0);
1546 if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */ 1803 if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */
@@ -1586,15 +1843,16 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
1586 } 1843 }
1587 if (idx != 0 && idx <= nvararg) { 1844 if (idx != 0 && idx <= nvararg) {
1588 IRType t; 1845 IRType t;
1589 TRef aref, vbase = emitir(IRTI(IR_SUB), REF_BASE, fr); 1846 TRef aref, vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr);
1590 vbase = emitir(IRT(IR_ADD, IRT_P32), vbase, lj_ir_kint(J, frofs-8)); 1847 vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase,
1591 t = itype2irt(&J->L->base[idx-2-nvararg]); 1848 lj_ir_kint(J, frofs-(8<<LJ_FR2)));
1592 aref = emitir(IRT(IR_AREF, IRT_P32), vbase, tridx); 1849 t = itype2irt(&J->L->base[idx-2-LJ_FR2-nvararg]);
1850 aref = emitir(IRT(IR_AREF, IRT_PGC), vbase, tridx);
1593 tr = emitir(IRTG(IR_VLOAD, t), aref, 0); 1851 tr = emitir(IRTG(IR_VLOAD, t), aref, 0);
1594 if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */ 1852 if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */
1595 } 1853 }
1596 J->base[dst-2] = tr; 1854 J->base[dst-2-LJ_FR2] = tr;
1597 J->maxslot = dst-1; 1855 J->maxslot = dst-1-LJ_FR2;
1598 J->bcskip = 2; /* Skip CALLM + select. */ 1856 J->bcskip = 2; /* Skip CALLM + select. */
1599 } else { 1857 } else {
1600 nyivarg: 1858 nyivarg:
@@ -1612,8 +1870,63 @@ static TRef rec_tnew(jit_State *J, uint32_t ah)
1612{ 1870{
1613 uint32_t asize = ah & 0x7ff; 1871 uint32_t asize = ah & 0x7ff;
1614 uint32_t hbits = ah >> 11; 1872 uint32_t hbits = ah >> 11;
1873 TRef tr;
1615 if (asize == 0x7ff) asize = 0x801; 1874 if (asize == 0x7ff) asize = 0x801;
1616 return emitir(IRTG(IR_TNEW, IRT_TAB), asize, hbits); 1875 tr = emitir(IRTG(IR_TNEW, IRT_TAB), asize, hbits);
1876#ifdef LUAJIT_ENABLE_TABLE_BUMP
1877 J->rbchash[(tr & (RBCHASH_SLOTS-1))].ref = tref_ref(tr);
1878 setmref(J->rbchash[(tr & (RBCHASH_SLOTS-1))].pc, J->pc);
1879 setgcref(J->rbchash[(tr & (RBCHASH_SLOTS-1))].pt, obj2gco(J->pt));
1880#endif
1881 return tr;
1882}
1883
1884/* -- Concatenation ------------------------------------------------------- */
1885
1886static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot)
1887{
1888 TRef *top = &J->base[topslot];
1889 TValue savetv[5];
1890 BCReg s;
1891 RecordIndex ix;
1892 lua_assert(baseslot < topslot);
1893 for (s = baseslot; s <= topslot; s++)
1894 (void)getslot(J, s); /* Ensure all arguments have a reference. */
1895 if (tref_isnumber_str(top[0]) && tref_isnumber_str(top[-1])) {
1896 TRef tr, hdr, *trp, *xbase, *base = &J->base[baseslot];
1897 /* First convert numbers to strings. */
1898 for (trp = top; trp >= base; trp--) {
1899 if (tref_isnumber(*trp))
1900 *trp = emitir(IRT(IR_TOSTR, IRT_STR), *trp,
1901 tref_isnum(*trp) ? IRTOSTR_NUM : IRTOSTR_INT);
1902 else if (!tref_isstr(*trp))
1903 break;
1904 }
1905 xbase = ++trp;
1906 tr = hdr = emitir(IRT(IR_BUFHDR, IRT_PGC),
1907 lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET);
1908 do {
1909 tr = emitir(IRT(IR_BUFPUT, IRT_PGC), tr, *trp++);
1910 } while (trp <= top);
1911 tr = emitir(IRT(IR_BUFSTR, IRT_STR), tr, hdr);
1912 J->maxslot = (BCReg)(xbase - J->base);
1913 if (xbase == base) return tr; /* Return simple concatenation result. */
1914 /* Pass partial result. */
1915 topslot = J->maxslot--;
1916 *xbase = tr;
1917 top = xbase;
1918 setstrV(J->L, &ix.keyv, &J2G(J)->strempty); /* Simulate string result. */
1919 } else {
1920 J->maxslot = topslot-1;
1921 copyTV(J->L, &ix.keyv, &J->L->base[topslot]);
1922 }
1923 copyTV(J->L, &ix.tabv, &J->L->base[topslot-1]);
1924 ix.tab = top[-1];
1925 ix.key = top[0];
1926 memcpy(savetv, &J->L->base[topslot-1], sizeof(savetv)); /* Save slots. */
1927 rec_mm_arith(J, &ix, MM_concat); /* Call __concat metamethod. */
1928 memcpy(&J->L->base[topslot-1], savetv, sizeof(savetv)); /* Restore slots. */
1929 return 0; /* No result yet. */
1617} 1930}
1618 1931
1619/* -- Record bytecode ops ------------------------------------------------- */ 1932/* -- Record bytecode ops ------------------------------------------------- */
@@ -1634,7 +1947,15 @@ static void rec_comp_fixup(jit_State *J, const BCIns *pc, int cond)
1634 const BCIns *npc = pc + 2 + (cond ? bc_j(jmpins) : 0); 1947 const BCIns *npc = pc + 2 + (cond ? bc_j(jmpins) : 0);
1635 SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; 1948 SnapShot *snap = &J->cur.snap[J->cur.nsnap-1];
1636 /* Set PC to opposite target to avoid re-recording the comp. in side trace. */ 1949 /* Set PC to opposite target to avoid re-recording the comp. in side trace. */
1950#if LJ_FR2
1951 SnapEntry *flink = &J->cur.snapmap[snap->mapofs + snap->nent];
1952 uint64_t pcbase;
1953 memcpy(&pcbase, flink, sizeof(uint64_t));
1954 pcbase = (pcbase & 0xff) | (u64ptr(npc) << 8);
1955 memcpy(flink, &pcbase, sizeof(uint64_t));
1956#else
1637 J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc); 1957 J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc);
1958#endif
1638 J->needsnap = 1; 1959 J->needsnap = 1;
1639 if (bc_a(jmpins) < J->maxslot) J->maxslot = bc_a(jmpins); 1960 if (bc_a(jmpins) < J->maxslot) J->maxslot = bc_a(jmpins);
1640 lj_snap_shrink(J); /* Shrink last snapshot if possible. */ 1961 lj_snap_shrink(J); /* Shrink last snapshot if possible. */
@@ -1654,7 +1975,7 @@ void lj_record_ins(jit_State *J)
1654 if (LJ_UNLIKELY(J->postproc != LJ_POST_NONE)) { 1975 if (LJ_UNLIKELY(J->postproc != LJ_POST_NONE)) {
1655 switch (J->postproc) { 1976 switch (J->postproc) {
1656 case LJ_POST_FIXCOMP: /* Fixup comparison. */ 1977 case LJ_POST_FIXCOMP: /* Fixup comparison. */
1657 pc = frame_pc(&J2G(J)->tmptv); 1978 pc = (const BCIns *)(uintptr_t)J2G(J)->tmptv.u64;
1658 rec_comp_fixup(J, pc, (!tvistruecond(&J2G(J)->tmptv2) ^ (bc_op(*pc)&1))); 1979 rec_comp_fixup(J, pc, (!tvistruecond(&J2G(J)->tmptv2) ^ (bc_op(*pc)&1)));
1659 /* fallthrough */ 1980 /* fallthrough */
1660 case LJ_POST_FIXGUARD: /* Fixup and emit pending guard. */ 1981 case LJ_POST_FIXGUARD: /* Fixup and emit pending guard. */
@@ -1722,6 +2043,10 @@ void lj_record_ins(jit_State *J)
1722 rec_check_ir(J); 2043 rec_check_ir(J);
1723#endif 2044#endif
1724 2045
2046#if LJ_HASPROFILE
2047 rec_profile_ins(J, pc);
2048#endif
2049
1725 /* Keep a copy of the runtime values of var/num/str operands. */ 2050 /* Keep a copy of the runtime values of var/num/str operands. */
1726#define rav (&ix.valv) 2051#define rav (&ix.valv)
1727#define rbv (&ix.tabv) 2052#define rbv (&ix.tabv)
@@ -1748,7 +2073,7 @@ void lj_record_ins(jit_State *J)
1748 switch (bcmode_c(op)) { 2073 switch (bcmode_c(op)) {
1749 case BCMvar: 2074 case BCMvar:
1750 copyTV(J->L, rcv, &lbase[rc]); ix.key = rc = getslot(J, rc); break; 2075 copyTV(J->L, rcv, &lbase[rc]); ix.key = rc = getslot(J, rc); break;
1751 case BCMpri: setitype(rcv, ~rc); ix.key = rc = TREF_PRI(IRT_NIL+rc); break; 2076 case BCMpri: setpriV(rcv, ~rc); ix.key = rc = TREF_PRI(IRT_NIL+rc); break;
1752 case BCMnum: { cTValue *tv = proto_knumtv(J->pt, rc); 2077 case BCMnum: { cTValue *tv = proto_knumtv(J->pt, rc);
1753 copyTV(J->L, rcv, tv); ix.key = rc = tvisint(tv) ? lj_ir_kint(J, intV(tv)) : 2078 copyTV(J->L, rcv, tv); ix.key = rc = tvisint(tv) ? lj_ir_kint(J, intV(tv)) :
1754 lj_ir_knumint(J, numV(tv)); } break; 2079 lj_ir_knumint(J, numV(tv)); } break;
@@ -1843,6 +2168,18 @@ void lj_record_ins(jit_State *J)
1843 J->maxslot = bc_a(pc[1]); /* Shrink used slots. */ 2168 J->maxslot = bc_a(pc[1]); /* Shrink used slots. */
1844 break; 2169 break;
1845 2170
2171 case BC_ISTYPE: case BC_ISNUM:
2172 /* These coercions need to correspond with lj_meta_istype(). */
2173 if (LJ_DUALNUM && rc == ~LJ_TNUMX+1)
2174 ra = lj_opt_narrow_toint(J, ra);
2175 else if (rc == ~LJ_TNUMX+2)
2176 ra = lj_ir_tonum(J, ra);
2177 else if (rc == ~LJ_TSTR+1)
2178 ra = lj_ir_tostr(J, ra);
2179 /* else: type specialization suffices. */
2180 J->base[bc_a(ins)] = ra;
2181 break;
2182
1846 /* -- Unary ops --------------------------------------------------------- */ 2183 /* -- Unary ops --------------------------------------------------------- */
1847 2184
1848 case BC_NOT: 2185 case BC_NOT:
@@ -1906,11 +2243,23 @@ void lj_record_ins(jit_State *J)
1906 rc = rec_mm_arith(J, &ix, MM_pow); 2243 rc = rec_mm_arith(J, &ix, MM_pow);
1907 break; 2244 break;
1908 2245
2246 /* -- Miscellaneous ops ------------------------------------------------- */
2247
2248 case BC_CAT:
2249 rc = rec_cat(J, rb, rc);
2250 break;
2251
1909 /* -- Constant and move ops --------------------------------------------- */ 2252 /* -- Constant and move ops --------------------------------------------- */
1910 2253
1911 case BC_MOV: 2254 case BC_MOV:
1912 /* Clear gap of method call to avoid resurrecting previous refs. */ 2255 /* Clear gap of method call to avoid resurrecting previous refs. */
1913 if (ra > J->maxslot) J->base[ra-1] = 0; 2256 if (ra > J->maxslot) {
2257#if LJ_FR2
2258 memset(J->base + J->maxslot, 0, (ra - J->maxslot) * sizeof(TRef));
2259#else
2260 J->base[ra-1] = 0;
2261#endif
2262 }
1914 break; 2263 break;
1915 case BC_KSTR: case BC_KNUM: case BC_KPRI: 2264 case BC_KSTR: case BC_KNUM: case BC_KPRI:
1916 break; 2265 break;
@@ -1918,6 +2267,8 @@ void lj_record_ins(jit_State *J)
1918 rc = lj_ir_kint(J, (int32_t)(int16_t)rc); 2267 rc = lj_ir_kint(J, (int32_t)(int16_t)rc);
1919 break; 2268 break;
1920 case BC_KNIL: 2269 case BC_KNIL:
2270 if (LJ_FR2 && ra > J->maxslot)
2271 J->base[ra-1] = 0;
1921 while (ra <= rc) 2272 while (ra <= rc)
1922 J->base[ra++] = TREF_NIL; 2273 J->base[ra++] = TREF_NIL;
1923 if (rc >= J->maxslot) J->maxslot = rc+1; 2274 if (rc >= J->maxslot) J->maxslot = rc+1;
@@ -1954,6 +2305,14 @@ void lj_record_ins(jit_State *J)
1954 ix.idxchain = LJ_MAX_IDXCHAIN; 2305 ix.idxchain = LJ_MAX_IDXCHAIN;
1955 rc = lj_record_idx(J, &ix); 2306 rc = lj_record_idx(J, &ix);
1956 break; 2307 break;
2308 case BC_TGETR: case BC_TSETR:
2309 ix.idxchain = 0;
2310 rc = lj_record_idx(J, &ix);
2311 break;
2312
2313 case BC_TSETM:
2314 rec_tsetm(J, ra, (BCReg)(J->L->top - J->L->base), (int32_t)rcv->u32.lo);
2315 break;
1957 2316
1958 case BC_TNEW: 2317 case BC_TNEW:
1959 rc = rec_tnew(J, rc); 2318 rc = rec_tnew(J, rc);
@@ -1961,33 +2320,38 @@ void lj_record_ins(jit_State *J)
1961 case BC_TDUP: 2320 case BC_TDUP:
1962 rc = emitir(IRTG(IR_TDUP, IRT_TAB), 2321 rc = emitir(IRTG(IR_TDUP, IRT_TAB),
1963 lj_ir_ktab(J, gco2tab(proto_kgc(J->pt, ~(ptrdiff_t)rc))), 0); 2322 lj_ir_ktab(J, gco2tab(proto_kgc(J->pt, ~(ptrdiff_t)rc))), 0);
2323#ifdef LUAJIT_ENABLE_TABLE_BUMP
2324 J->rbchash[(rc & (RBCHASH_SLOTS-1))].ref = tref_ref(rc);
2325 setmref(J->rbchash[(rc & (RBCHASH_SLOTS-1))].pc, pc);
2326 setgcref(J->rbchash[(rc & (RBCHASH_SLOTS-1))].pt, obj2gco(J->pt));
2327#endif
1964 break; 2328 break;
1965 2329
1966 /* -- Calls and vararg handling ----------------------------------------- */ 2330 /* -- Calls and vararg handling ----------------------------------------- */
1967 2331
1968 case BC_ITERC: 2332 case BC_ITERC:
1969 J->base[ra] = getslot(J, ra-3); 2333 J->base[ra] = getslot(J, ra-3);
1970 J->base[ra+1] = getslot(J, ra-2); 2334 J->base[ra+1+LJ_FR2] = getslot(J, ra-2);
1971 J->base[ra+2] = getslot(J, ra-1); 2335 J->base[ra+2+LJ_FR2] = getslot(J, ra-1);
1972 { /* Do the actual copy now because lj_record_call needs the values. */ 2336 { /* Do the actual copy now because lj_record_call needs the values. */
1973 TValue *b = &J->L->base[ra]; 2337 TValue *b = &J->L->base[ra];
1974 copyTV(J->L, b, b-3); 2338 copyTV(J->L, b, b-3);
1975 copyTV(J->L, b+1, b-2); 2339 copyTV(J->L, b+1+LJ_FR2, b-2);
1976 copyTV(J->L, b+2, b-1); 2340 copyTV(J->L, b+2+LJ_FR2, b-1);
1977 } 2341 }
1978 lj_record_call(J, ra, (ptrdiff_t)rc-1); 2342 lj_record_call(J, ra, (ptrdiff_t)rc-1);
1979 break; 2343 break;
1980 2344
1981 /* L->top is set to L->base+ra+rc+NARGS-1+1. See lj_dispatch_ins(). */ 2345 /* L->top is set to L->base+ra+rc+NARGS-1+1. See lj_dispatch_ins(). */
1982 case BC_CALLM: 2346 case BC_CALLM:
1983 rc = (BCReg)(J->L->top - J->L->base) - ra; 2347 rc = (BCReg)(J->L->top - J->L->base) - ra - LJ_FR2;
1984 /* fallthrough */ 2348 /* fallthrough */
1985 case BC_CALL: 2349 case BC_CALL:
1986 lj_record_call(J, ra, (ptrdiff_t)rc-1); 2350 lj_record_call(J, ra, (ptrdiff_t)rc-1);
1987 break; 2351 break;
1988 2352
1989 case BC_CALLMT: 2353 case BC_CALLMT:
1990 rc = (BCReg)(J->L->top - J->L->base) - ra; 2354 rc = (BCReg)(J->L->top - J->L->base) - ra - LJ_FR2;
1991 /* fallthrough */ 2355 /* fallthrough */
1992 case BC_CALLT: 2356 case BC_CALLT:
1993 lj_record_tailcall(J, ra, (ptrdiff_t)rc-1); 2357 lj_record_tailcall(J, ra, (ptrdiff_t)rc-1);
@@ -2004,6 +2368,9 @@ void lj_record_ins(jit_State *J)
2004 rc = (BCReg)(J->L->top - J->L->base) - ra + 1; 2368 rc = (BCReg)(J->L->top - J->L->base) - ra + 1;
2005 /* fallthrough */ 2369 /* fallthrough */
2006 case BC_RET: case BC_RET0: case BC_RET1: 2370 case BC_RET: case BC_RET0: case BC_RET1:
2371#if LJ_HASPROFILE
2372 rec_profile_ret(J);
2373#endif
2007 lj_record_ret(J, ra, (ptrdiff_t)rc-1); 2374 lj_record_ret(J, ra, (ptrdiff_t)rc-1);
2008 break; 2375 break;
2009 2376
@@ -2016,7 +2383,7 @@ void lj_record_ins(jit_State *J)
2016 case BC_JFORI: 2383 case BC_JFORI:
2017 lua_assert(bc_op(pc[(ptrdiff_t)rc-BCBIAS_J]) == BC_JFORL); 2384 lua_assert(bc_op(pc[(ptrdiff_t)rc-BCBIAS_J]) == BC_JFORL);
2018 if (rec_for(J, pc, 0) != LOOPEV_LEAVE) /* Link to existing loop. */ 2385 if (rec_for(J, pc, 0) != LOOPEV_LEAVE) /* Link to existing loop. */
2019 rec_stop(J, LJ_TRLINK_ROOT, bc_d(pc[(ptrdiff_t)rc-BCBIAS_J])); 2386 lj_record_stop(J, LJ_TRLINK_ROOT, bc_d(pc[(ptrdiff_t)rc-BCBIAS_J]));
2020 /* Continue tracing if the loop is not entered. */ 2387 /* Continue tracing if the loop is not entered. */
2021 break; 2388 break;
2022 2389
@@ -2083,10 +2450,8 @@ void lj_record_ins(jit_State *J)
2083 /* fallthrough */ 2450 /* fallthrough */
2084 case BC_ITERN: 2451 case BC_ITERN:
2085 case BC_ISNEXT: 2452 case BC_ISNEXT:
2086 case BC_CAT:
2087 case BC_UCLO: 2453 case BC_UCLO:
2088 case BC_FNEW: 2454 case BC_FNEW:
2089 case BC_TSETM:
2090 setintV(&J->errinfo, (int32_t)op); 2455 setintV(&J->errinfo, (int32_t)op);
2091 lj_trace_err_info(J, LJ_TRERR_NYIBC); 2456 lj_trace_err_info(J, LJ_TRERR_NYIBC);
2092 break; 2457 break;
@@ -2095,15 +2460,21 @@ void lj_record_ins(jit_State *J)
2095 /* rc == 0 if we have no result yet, e.g. pending __index metamethod call. */ 2460 /* rc == 0 if we have no result yet, e.g. pending __index metamethod call. */
2096 if (bcmode_a(op) == BCMdst && rc) { 2461 if (bcmode_a(op) == BCMdst && rc) {
2097 J->base[ra] = rc; 2462 J->base[ra] = rc;
2098 if (ra >= J->maxslot) J->maxslot = ra+1; 2463 if (ra >= J->maxslot) {
2464#if LJ_FR2
2465 if (ra > J->maxslot) J->base[ra-1] = 0;
2466#endif
2467 J->maxslot = ra+1;
2468 }
2099 } 2469 }
2100 2470
2101#undef rav 2471#undef rav
2102#undef rbv 2472#undef rbv
2103#undef rcv 2473#undef rcv
2104 2474
2105 /* Limit the number of recorded IR instructions. */ 2475 /* Limit the number of recorded IR instructions and constants. */
2106 if (J->cur.nins > REF_FIRST+(IRRef)J->param[JIT_P_maxrecord]) 2476 if (J->cur.nins > REF_FIRST+(IRRef)J->param[JIT_P_maxrecord] ||
2477 J->cur.nk < REF_BIAS-(IRRef)J->param[JIT_P_maxirconst])
2107 lj_trace_err(J, LJ_TRERR_TRACEOV); 2478 lj_trace_err(J, LJ_TRERR_TRACEOV);
2108} 2479}
2109 2480
@@ -2152,6 +2523,12 @@ static const BCIns *rec_setup_root(jit_State *J)
2152 J->maxslot = J->pt->numparams; 2523 J->maxslot = J->pt->numparams;
2153 pc++; 2524 pc++;
2154 break; 2525 break;
2526 case BC_CALLM:
2527 case BC_CALL:
2528 case BC_ITERC:
2529 /* No bytecode range check for stitched traces. */
2530 pc++;
2531 break;
2155 default: 2532 default:
2156 lua_assert(0); 2533 lua_assert(0);
2157 break; 2534 break;
@@ -2167,11 +2544,14 @@ void lj_record_setup(jit_State *J)
2167 /* Initialize state related to current trace. */ 2544 /* Initialize state related to current trace. */
2168 memset(J->slot, 0, sizeof(J->slot)); 2545 memset(J->slot, 0, sizeof(J->slot));
2169 memset(J->chain, 0, sizeof(J->chain)); 2546 memset(J->chain, 0, sizeof(J->chain));
2547#ifdef LUAJIT_ENABLE_TABLE_BUMP
2548 memset(J->rbchash, 0, sizeof(J->rbchash));
2549#endif
2170 memset(J->bpropcache, 0, sizeof(J->bpropcache)); 2550 memset(J->bpropcache, 0, sizeof(J->bpropcache));
2171 J->scev.idx = REF_NIL; 2551 J->scev.idx = REF_NIL;
2172 setmref(J->scev.pc, NULL); 2552 setmref(J->scev.pc, NULL);
2173 2553
2174 J->baseslot = 1; /* Invoking function is at base[-1]. */ 2554 J->baseslot = 1+LJ_FR2; /* Invoking function is at base[-1-LJ_FR2]. */
2175 J->base = J->slot + J->baseslot; 2555 J->base = J->slot + J->baseslot;
2176 J->maxslot = 0; 2556 J->maxslot = 0;
2177 J->framedepth = 0; 2557 J->framedepth = 0;
@@ -2186,7 +2566,7 @@ void lj_record_setup(jit_State *J)
2186 J->bc_extent = ~(MSize)0; 2566 J->bc_extent = ~(MSize)0;
2187 2567
2188 /* Emit instructions for fixed references. Also triggers initial IR alloc. */ 2568 /* Emit instructions for fixed references. Also triggers initial IR alloc. */
2189 emitir_raw(IRT(IR_BASE, IRT_P32), J->parent, J->exitno); 2569 emitir_raw(IRT(IR_BASE, IRT_PGC), J->parent, J->exitno);
2190 for (i = 0; i <= 2; i++) { 2570 for (i = 0; i <= 2; i++) {
2191 IRIns *ir = IR(REF_NIL-i); 2571 IRIns *ir = IR(REF_NIL-i);
2192 ir->i = 0; 2572 ir->i = 0;
@@ -2220,7 +2600,7 @@ void lj_record_setup(jit_State *J)
2220 if (traceref(J, J->cur.root)->nchild >= J->param[JIT_P_maxside] || 2600 if (traceref(J, J->cur.root)->nchild >= J->param[JIT_P_maxside] ||
2221 T->snap[J->exitno].count >= J->param[JIT_P_hotexit] + 2601 T->snap[J->exitno].count >= J->param[JIT_P_hotexit] +
2222 J->param[JIT_P_tryside]) { 2602 J->param[JIT_P_tryside]) {
2223 rec_stop(J, LJ_TRLINK_INTERP, 0); 2603 lj_record_stop(J, LJ_TRLINK_INTERP, 0);
2224 } 2604 }
2225 } else { /* Root trace. */ 2605 } else { /* Root trace. */
2226 J->cur.root = 0; 2606 J->cur.root = 0;
@@ -2232,9 +2612,15 @@ void lj_record_setup(jit_State *J)
2232 lj_snap_add(J); 2612 lj_snap_add(J);
2233 if (bc_op(J->cur.startins) == BC_FORL) 2613 if (bc_op(J->cur.startins) == BC_FORL)
2234 rec_for_loop(J, J->pc-1, &J->scev, 1); 2614 rec_for_loop(J, J->pc-1, &J->scev, 1);
2615 else if (bc_op(J->cur.startins) == BC_ITERC)
2616 J->startpc = NULL;
2235 if (1 + J->pt->framesize >= LJ_MAX_JSLOTS) 2617 if (1 + J->pt->framesize >= LJ_MAX_JSLOTS)
2236 lj_trace_err(J, LJ_TRERR_STACKOV); 2618 lj_trace_err(J, LJ_TRERR_STACKOV);
2237 } 2619 }
2620#if LJ_HASPROFILE
2621 J->prev_pt = NULL;
2622 J->prev_line = -1;
2623#endif
2238#ifdef LUAJIT_ENABLE_CHECKHOOK 2624#ifdef LUAJIT_ENABLE_CHECKHOOK
2239 /* Regularly check for instruction/line hooks from compiled code and 2625 /* Regularly check for instruction/line hooks from compiled code and
2240 ** exit to the interpreter if the hooks are set. 2626 ** exit to the interpreter if the hooks are set.
diff --git a/src/lj_record.h b/src/lj_record.h
index 8ef8b2a7..e7d24fae 100644
--- a/src/lj_record.h
+++ b/src/lj_record.h
@@ -28,6 +28,7 @@ typedef struct RecordIndex {
28 28
29LJ_FUNC int lj_record_objcmp(jit_State *J, TRef a, TRef b, 29LJ_FUNC int lj_record_objcmp(jit_State *J, TRef a, TRef b,
30 cTValue *av, cTValue *bv); 30 cTValue *av, cTValue *bv);
31LJ_FUNC void lj_record_stop(jit_State *J, TraceLink linktype, TraceNo lnk);
31LJ_FUNC TRef lj_record_constify(jit_State *J, cTValue *o); 32LJ_FUNC TRef lj_record_constify(jit_State *J, cTValue *o);
32 33
33LJ_FUNC void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs); 34LJ_FUNC void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs);
diff --git a/src/lj_snap.c b/src/lj_snap.c
index de8068ac..a47c0e3e 100644
--- a/src/lj_snap.c
+++ b/src/lj_snap.c
@@ -68,10 +68,22 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots)
68 for (s = 0; s < nslots; s++) { 68 for (s = 0; s < nslots; s++) {
69 TRef tr = J->slot[s]; 69 TRef tr = J->slot[s];
70 IRRef ref = tref_ref(tr); 70 IRRef ref = tref_ref(tr);
71#if LJ_FR2
72 if (s == 1) { /* Ignore slot 1 in LJ_FR2 mode, except if tailcalled. */
73 if ((tr & TREF_FRAME))
74 map[n++] = SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL);
75 continue;
76 }
77 if ((tr & (TREF_FRAME | TREF_CONT)) && !ref) {
78 cTValue *base = J->L->base - J->baseslot;
79 tr = J->slot[s] = (tr & 0xff0000) | lj_ir_k64(J, IR_KNUM, base[s].u64);
80 ref = tref_ref(tr);
81 }
82#endif
71 if (ref) { 83 if (ref) {
72 SnapEntry sn = SNAP_TR(s, tr); 84 SnapEntry sn = SNAP_TR(s, tr);
73 IRIns *ir = &J->cur.ir[ref]; 85 IRIns *ir = &J->cur.ir[ref];
74 if (!(sn & (SNAP_CONT|SNAP_FRAME)) && 86 if ((LJ_FR2 || !(sn & (SNAP_CONT|SNAP_FRAME))) &&
75 ir->o == IR_SLOAD && ir->op1 == s && ref > retf) { 87 ir->o == IR_SLOAD && ir->op1 == s && ref > retf) {
76 /* No need to snapshot unmodified non-inherited slots. */ 88 /* No need to snapshot unmodified non-inherited slots. */
77 if (!(ir->op2 & IRSLOAD_INHERIT)) 89 if (!(ir->op2 & IRSLOAD_INHERIT))
@@ -81,7 +93,7 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots)
81 (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) 93 (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT)
82 sn |= SNAP_NORESTORE; 94 sn |= SNAP_NORESTORE;
83 } 95 }
84 if (LJ_SOFTFP && irt_isnum(ir->t)) 96 if (LJ_SOFTFP32 && irt_isnum(ir->t))
85 sn |= SNAP_SOFTFPNUM; 97 sn |= SNAP_SOFTFPNUM;
86 map[n++] = sn; 98 map[n++] = sn;
87 } 99 }
@@ -90,32 +102,51 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots)
90} 102}
91 103
92/* Add frame links at the end of the snapshot. */ 104/* Add frame links at the end of the snapshot. */
93static BCReg snapshot_framelinks(jit_State *J, SnapEntry *map) 105static MSize snapshot_framelinks(jit_State *J, SnapEntry *map, uint8_t *topslot)
94{ 106{
95 cTValue *frame = J->L->base - 1; 107 cTValue *frame = J->L->base - 1;
96 cTValue *lim = J->L->base - J->baseslot; 108 cTValue *lim = J->L->base - J->baseslot + LJ_FR2;
97 cTValue *ftop = frame + funcproto(frame_func(frame))->framesize; 109 GCfunc *fn = frame_func(frame);
110 cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top;
111#if LJ_FR2
112 uint64_t pcbase = (u64ptr(J->pc) << 8) | (J->baseslot - 2);
113 lua_assert(2 <= J->baseslot && J->baseslot <= 257);
114 memcpy(map, &pcbase, sizeof(uint64_t));
115#else
98 MSize f = 0; 116 MSize f = 0;
99 map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */ 117 map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */
118#endif
100 while (frame > lim) { /* Backwards traversal of all frames above base. */ 119 while (frame > lim) { /* Backwards traversal of all frames above base. */
101 if (frame_islua(frame)) { 120 if (frame_islua(frame)) {
121#if !LJ_FR2
102 map[f++] = SNAP_MKPC(frame_pc(frame)); 122 map[f++] = SNAP_MKPC(frame_pc(frame));
123#endif
103 frame = frame_prevl(frame); 124 frame = frame_prevl(frame);
104 } else if (frame_iscont(frame)) { 125 } else if (frame_iscont(frame)) {
126#if !LJ_FR2
105 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); 127 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
106 map[f++] = SNAP_MKPC(frame_contpc(frame)); 128 map[f++] = SNAP_MKPC(frame_contpc(frame));
129#endif
107 frame = frame_prevd(frame); 130 frame = frame_prevd(frame);
108 } else { 131 } else {
109 lua_assert(!frame_isc(frame)); 132 lua_assert(!frame_isc(frame));
133#if !LJ_FR2
110 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); 134 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
135#endif
111 frame = frame_prevd(frame); 136 frame = frame_prevd(frame);
112 continue; 137 continue;
113 } 138 }
114 if (frame + funcproto(frame_func(frame))->framesize > ftop) 139 if (frame + funcproto(frame_func(frame))->framesize > ftop)
115 ftop = frame + funcproto(frame_func(frame))->framesize; 140 ftop = frame + funcproto(frame_func(frame))->framesize;
116 } 141 }
142 *topslot = (uint8_t)(ftop - lim);
143#if LJ_FR2
144 lua_assert(sizeof(SnapEntry) * 2 == sizeof(uint64_t));
145 return 2;
146#else
117 lua_assert(f == (MSize)(1 + J->framedepth)); 147 lua_assert(f == (MSize)(1 + J->framedepth));
118 return (BCReg)(ftop - lim); 148 return f;
149#endif
119} 150}
120 151
121/* Take a snapshot of the current stack. */ 152/* Take a snapshot of the current stack. */
@@ -125,16 +156,16 @@ static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap)
125 MSize nent; 156 MSize nent;
126 SnapEntry *p; 157 SnapEntry *p;
127 /* Conservative estimate. */ 158 /* Conservative estimate. */
128 lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth+1); 159 lj_snap_grow_map(J, nsnapmap + nslots + (MSize)(LJ_FR2?2:J->framedepth+1));
129 p = &J->cur.snapmap[nsnapmap]; 160 p = &J->cur.snapmap[nsnapmap];
130 nent = snapshot_slots(J, p, nslots); 161 nent = snapshot_slots(J, p, nslots);
131 snap->topslot = (uint8_t)snapshot_framelinks(J, p + nent); 162 snap->nent = (uint8_t)nent;
163 nent += snapshot_framelinks(J, p + nent, &snap->topslot);
132 snap->mapofs = (uint32_t)nsnapmap; 164 snap->mapofs = (uint32_t)nsnapmap;
133 snap->ref = (IRRef1)J->cur.nins; 165 snap->ref = (IRRef1)J->cur.nins;
134 snap->nent = (uint8_t)nent;
135 snap->nslots = (uint8_t)nslots; 166 snap->nslots = (uint8_t)nslots;
136 snap->count = 0; 167 snap->count = 0;
137 J->cur.nsnapmap = (uint32_t)(nsnapmap + nent + 1 + J->framedepth); 168 J->cur.nsnapmap = (uint32_t)(nsnapmap + nent);
138} 169}
139 170
140/* Add or merge a snapshot. */ 171/* Add or merge a snapshot. */
@@ -143,8 +174,8 @@ void lj_snap_add(jit_State *J)
143 MSize nsnap = J->cur.nsnap; 174 MSize nsnap = J->cur.nsnap;
144 MSize nsnapmap = J->cur.nsnapmap; 175 MSize nsnapmap = J->cur.nsnapmap;
145 /* Merge if no ins. inbetween or if requested and no guard inbetween. */ 176 /* Merge if no ins. inbetween or if requested and no guard inbetween. */
146 if (J->mergesnap ? !irt_isguard(J->guardemit) : 177 if ((nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins) ||
147 (nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins)) { 178 (J->mergesnap && !irt_isguard(J->guardemit))) {
148 if (nsnap == 1) { /* But preserve snap #0 PC. */ 179 if (nsnap == 1) { /* But preserve snap #0 PC. */
149 emitir_raw(IRT(IR_NOP, IRT_NIL), 0, 0); 180 emitir_raw(IRT(IR_NOP, IRT_NIL), 0, 0);
150 goto nomerge; 181 goto nomerge;
@@ -237,7 +268,8 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf,
237 case BCMbase: 268 case BCMbase:
238 if (op >= BC_CALLM && op <= BC_VARG) { 269 if (op >= BC_CALLM && op <= BC_VARG) {
239 BCReg top = (op == BC_CALLM || op == BC_CALLMT || bc_c(ins) == 0) ? 270 BCReg top = (op == BC_CALLM || op == BC_CALLMT || bc_c(ins) == 0) ?
240 maxslot : (bc_a(ins) + bc_c(ins)); 271 maxslot : (bc_a(ins) + bc_c(ins)+LJ_FR2);
272 if (LJ_FR2) DEF_SLOT(bc_a(ins)+1);
241 s = bc_a(ins) - ((op == BC_ITERC || op == BC_ITERN) ? 3 : 0); 273 s = bc_a(ins) - ((op == BC_ITERC || op == BC_ITERN) ? 3 : 0);
242 for (; s < top; s++) USE_SLOT(s); 274 for (; s < top; s++) USE_SLOT(s);
243 for (; s < maxslot; s++) DEF_SLOT(s); 275 for (; s < maxslot; s++) DEF_SLOT(s);
@@ -281,8 +313,8 @@ void lj_snap_shrink(jit_State *J)
281 MSize n, m, nlim, nent = snap->nent; 313 MSize n, m, nlim, nent = snap->nent;
282 uint8_t udf[SNAP_USEDEF_SLOTS]; 314 uint8_t udf[SNAP_USEDEF_SLOTS];
283 BCReg maxslot = J->maxslot; 315 BCReg maxslot = J->maxslot;
284 BCReg minslot = snap_usedef(J, udf, snap_pc(map[nent]), maxslot);
285 BCReg baseslot = J->baseslot; 316 BCReg baseslot = J->baseslot;
317 BCReg minslot = snap_usedef(J, udf, snap_pc(&map[nent]), maxslot);
286 maxslot += baseslot; 318 maxslot += baseslot;
287 minslot += baseslot; 319 minslot += baseslot;
288 snap->nslots = (uint8_t)maxslot; 320 snap->nslots = (uint8_t)maxslot;
@@ -342,7 +374,7 @@ IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir)
342 break; 374 break;
343 } 375 }
344 } 376 }
345 } else if (LJ_SOFTFP && ir->o == IR_HIOP) { 377 } else if (LJ_SOFTFP32 && ir->o == IR_HIOP) {
346 ref++; 378 ref++;
347 } else if (ir->o == IR_PVAL) { 379 } else if (ir->o == IR_PVAL) {
348 ref = ir->op1 + REF_BIAS; 380 ref = ir->op1 + REF_BIAS;
@@ -368,8 +400,8 @@ static TRef snap_replay_const(jit_State *J, IRIns *ir)
368 case IR_KPRI: return TREF_PRI(irt_type(ir->t)); 400 case IR_KPRI: return TREF_PRI(irt_type(ir->t));
369 case IR_KINT: return lj_ir_kint(J, ir->i); 401 case IR_KINT: return lj_ir_kint(J, ir->i);
370 case IR_KGC: return lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); 402 case IR_KGC: return lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t));
371 case IR_KNUM: return lj_ir_k64(J, IR_KNUM, ir_knum(ir)); 403 case IR_KNUM: case IR_KINT64:
372 case IR_KINT64: return lj_ir_k64(J, IR_KINT64, ir_kint64(ir)); 404 return lj_ir_k64(J, (IROp)ir->o, ir_k64(ir)->u64);
373 case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir)); /* Continuation. */ 405 case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir)); /* Continuation. */
374 default: lua_assert(0); return TREF_NIL; break; 406 default: lua_assert(0); return TREF_NIL; break;
375 } 407 }
@@ -442,7 +474,11 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
442 goto setslot; 474 goto setslot;
443 bloomset(seen, ref); 475 bloomset(seen, ref);
444 if (irref_isk(ref)) { 476 if (irref_isk(ref)) {
445 tr = snap_replay_const(J, ir); 477 /* See special treatment of LJ_FR2 slot 1 in snapshot_slots() above. */
478 if (LJ_FR2 && (sn == SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL)))
479 tr = 0;
480 else
481 tr = snap_replay_const(J, ir);
446 } else if (!regsp_used(ir->prev)) { 482 } else if (!regsp_used(ir->prev)) {
447 pass23 = 1; 483 pass23 = 1;
448 lua_assert(s != 0); 484 lua_assert(s != 0);
@@ -450,13 +486,13 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
450 } else { 486 } else {
451 IRType t = irt_type(ir->t); 487 IRType t = irt_type(ir->t);
452 uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT; 488 uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT;
453 if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM; 489 if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM;
454 if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY); 490 if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY);
455 tr = emitir_raw(IRT(IR_SLOAD, t), s, mode); 491 tr = emitir_raw(IRT(IR_SLOAD, t), s, mode);
456 } 492 }
457 setslot: 493 setslot:
458 J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME)); /* Same as TREF_* flags. */ 494 J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME)); /* Same as TREF_* flags. */
459 J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && s); 495 J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && (s != LJ_FR2));
460 if ((sn & SNAP_FRAME)) 496 if ((sn & SNAP_FRAME))
461 J->baseslot = s+1; 497 J->baseslot = s+1;
462 } 498 }
@@ -484,7 +520,7 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
484 if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { 520 if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
485 if (snap_pref(J, T, map, nent, seen, irs->op2) == 0) 521 if (snap_pref(J, T, map, nent, seen, irs->op2) == 0)
486 snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1); 522 snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1);
487 else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) && 523 else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) &&
488 irs+1 < irlast && (irs+1)->o == IR_HIOP) 524 irs+1 < irlast && (irs+1)->o == IR_HIOP)
489 snap_pref(J, T, map, nent, seen, (irs+1)->op2); 525 snap_pref(J, T, map, nent, seen, (irs+1)->op2);
490 } 526 }
@@ -543,17 +579,16 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
543 lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT); 579 lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT);
544 val = snap_pref(J, T, map, nent, seen, irc->op1); 580 val = snap_pref(J, T, map, nent, seen, irc->op1);
545 val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT); 581 val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT);
546 } else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) && 582 } else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) &&
547 irs+1 < irlast && (irs+1)->o == IR_HIOP) { 583 irs+1 < irlast && (irs+1)->o == IR_HIOP) {
548 IRType t = IRT_I64; 584 IRType t = IRT_I64;
549 if (LJ_SOFTFP && irt_type((irs+1)->t) == IRT_SOFTFP) 585 if (LJ_SOFTFP32 && irt_type((irs+1)->t) == IRT_SOFTFP)
550 t = IRT_NUM; 586 t = IRT_NUM;
551 lj_needsplit(J); 587 lj_needsplit(J);
552 if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) { 588 if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) {
553 uint64_t k = (uint32_t)T->ir[irs->op2].i + 589 uint64_t k = (uint32_t)T->ir[irs->op2].i +
554 ((uint64_t)T->ir[(irs+1)->op2].i << 32); 590 ((uint64_t)T->ir[(irs+1)->op2].i << 32);
555 val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, 591 val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, k);
556 lj_ir_k64_find(J, k));
557 } else { 592 } else {
558 val = emitir_raw(IRT(IR_HIOP, t), val, 593 val = emitir_raw(IRT(IR_HIOP, t), val,
559 snap_pref(J, T, map, nent, seen, (irs+1)->op2)); 594 snap_pref(J, T, map, nent, seen, (irs+1)->op2));
@@ -600,17 +635,18 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
600 int32_t *sps = &ex->spill[regsp_spill(rs)]; 635 int32_t *sps = &ex->spill[regsp_spill(rs)];
601 if (irt_isinteger(t)) { 636 if (irt_isinteger(t)) {
602 setintV(o, *sps); 637 setintV(o, *sps);
603#if !LJ_SOFTFP 638#if !LJ_SOFTFP32
604 } else if (irt_isnum(t)) { 639 } else if (irt_isnum(t)) {
605 o->u64 = *(uint64_t *)sps; 640 o->u64 = *(uint64_t *)sps;
606#endif 641#endif
607 } else if (LJ_64 && irt_islightud(t)) { 642#if LJ_64 && !LJ_GC64
643 } else if (irt_islightud(t)) {
608 /* 64 bit lightuserdata which may escape already has the tag bits. */ 644 /* 64 bit lightuserdata which may escape already has the tag bits. */
609 o->u64 = *(uint64_t *)sps; 645 o->u64 = *(uint64_t *)sps;
646#endif
610 } else { 647 } else {
611 lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ 648 lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */
612 setgcrefi(o->gcr, *sps); 649 setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t));
613 setitype(o, irt_toitype(t));
614 } 650 }
615 } else { /* Restore from register. */ 651 } else { /* Restore from register. */
616 Reg r = regsp_reg(rs); 652 Reg r = regsp_reg(rs);
@@ -624,14 +660,19 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
624#if !LJ_SOFTFP 660#if !LJ_SOFTFP
625 } else if (irt_isnum(t)) { 661 } else if (irt_isnum(t)) {
626 setnumV(o, ex->fpr[r-RID_MIN_FPR]); 662 setnumV(o, ex->fpr[r-RID_MIN_FPR]);
663#elif LJ_64 /* && LJ_SOFTFP */
664 } else if (irt_isnum(t)) {
665 o->u64 = ex->gpr[r-RID_MIN_GPR];
627#endif 666#endif
628 } else if (LJ_64 && irt_islightud(t)) { 667#if LJ_64 && !LJ_GC64
629 /* 64 bit lightuserdata which may escape already has the tag bits. */ 668 } else if (irt_is64(t)) {
669 /* 64 bit values that already have the tag bits. */
630 o->u64 = ex->gpr[r-RID_MIN_GPR]; 670 o->u64 = ex->gpr[r-RID_MIN_GPR];
671#endif
672 } else if (irt_ispri(t)) {
673 setpriV(o, irt_toitype(t));
631 } else { 674 } else {
632 if (!irt_ispri(t)) 675 setgcV(J->L, o, (GCobj *)ex->gpr[r-RID_MIN_GPR], irt_toitype(t));
633 setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]);
634 setitype(o, irt_toitype(t));
635 } 676 }
636 } 677 }
637} 678}
@@ -647,8 +688,8 @@ static void snap_restoredata(GCtrace *T, ExitState *ex,
647 int32_t *src; 688 int32_t *src;
648 uint64_t tmp; 689 uint64_t tmp;
649 if (irref_isk(ref)) { 690 if (irref_isk(ref)) {
650 if (ir->o == IR_KNUM || ir->o == IR_KINT64) { 691 if (ir_isk64(ir)) {
651 src = mref(ir->ptr, int32_t); 692 src = (int32_t *)&ir[1];
652 } else if (sz == 8) { 693 } else if (sz == 8) {
653 tmp = (uint64_t)(uint32_t)ir->i; 694 tmp = (uint64_t)(uint32_t)ir->i;
654 src = (int32_t *)&tmp; 695 src = (int32_t *)&tmp;
@@ -685,8 +726,9 @@ static void snap_restoredata(GCtrace *T, ExitState *ex,
685#else 726#else
686 if (LJ_BE && sz == 4) src++; 727 if (LJ_BE && sz == 4) src++;
687#endif 728#endif
688 } 729 } else
689#endif 730#endif
731 if (LJ_64 && LJ_BE && sz == 4) src++;
690 } 732 }
691 } 733 }
692 lua_assert(sz == 1 || sz == 2 || sz == 4 || sz == 8); 734 lua_assert(sz == 1 || sz == 2 || sz == 4 || sz == 8);
@@ -708,8 +750,9 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
708 if (ir->o == IR_CNEW || ir->o == IR_CNEWI) { 750 if (ir->o == IR_CNEW || ir->o == IR_CNEWI) {
709 CTState *cts = ctype_cts(J->L); 751 CTState *cts = ctype_cts(J->L);
710 CTypeID id = (CTypeID)T->ir[ir->op1].i; 752 CTypeID id = (CTypeID)T->ir[ir->op1].i;
711 CTSize sz = lj_ctype_size(cts, id); 753 CTSize sz;
712 GCcdata *cd = lj_cdata_new(cts, id, sz); 754 CTInfo info = lj_ctype_info(cts, id, &sz);
755 GCcdata *cd = lj_cdata_newx(cts, id, sz, info);
713 setcdataV(J->L, o, cd); 756 setcdataV(J->L, o, cd);
714 if (ir->o == IR_CNEWI) { 757 if (ir->o == IR_CNEWI) {
715 uint8_t *p = (uint8_t *)cdataptr(cd); 758 uint8_t *p = (uint8_t *)cdataptr(cd);
@@ -773,7 +816,7 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
773 val = lj_tab_set(J->L, t, &tmp); 816 val = lj_tab_set(J->L, t, &tmp);
774 /* NOBARRIER: The table is new (marked white). */ 817 /* NOBARRIER: The table is new (marked white). */
775 snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val); 818 snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val);
776 if (LJ_SOFTFP && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { 819 if (LJ_SOFTFP32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) {
777 snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp); 820 snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp);
778 val->u32.hi = tmp.u32.lo; 821 val->u32.hi = tmp.u32.lo;
779 } 822 }
@@ -791,11 +834,15 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
791 SnapShot *snap = &T->snap[snapno]; 834 SnapShot *snap = &T->snap[snapno];
792 MSize n, nent = snap->nent; 835 MSize n, nent = snap->nent;
793 SnapEntry *map = &T->snapmap[snap->mapofs]; 836 SnapEntry *map = &T->snapmap[snap->mapofs];
794 SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1]; 837#if !LJ_FR2 || defined(LUA_USE_ASSERT)
795 int32_t ftsz0; 838 SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1-LJ_FR2];
839#endif
840#if !LJ_FR2
841 ptrdiff_t ftsz0;
842#endif
796 TValue *frame; 843 TValue *frame;
797 BloomFilter rfilt = snap_renamefilter(T, snapno); 844 BloomFilter rfilt = snap_renamefilter(T, snapno);
798 const BCIns *pc = snap_pc(map[nent]); 845 const BCIns *pc = snap_pc(&map[nent]);
799 lua_State *L = J->L; 846 lua_State *L = J->L;
800 847
801 /* Set interpreter PC to the next PC to get correct error messages. */ 848 /* Set interpreter PC to the next PC to get correct error messages. */
@@ -808,8 +855,10 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
808 } 855 }
809 856
810 /* Fill stack slots with data from the registers and spill slots. */ 857 /* Fill stack slots with data from the registers and spill slots. */
811 frame = L->base-1; 858 frame = L->base-1-LJ_FR2;
859#if !LJ_FR2
812 ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */ 860 ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */
861#endif
813 for (n = 0; n < nent; n++) { 862 for (n = 0; n < nent; n++) {
814 SnapEntry sn = map[n]; 863 SnapEntry sn = map[n];
815 if (!(sn & SNAP_NORESTORE)) { 864 if (!(sn & SNAP_NORESTORE)) {
@@ -828,17 +877,22 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
828 continue; 877 continue;
829 } 878 }
830 snap_restoreval(J, T, ex, snapno, rfilt, ref, o); 879 snap_restoreval(J, T, ex, snapno, rfilt, ref, o);
831 if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && tvisint(o)) { 880 if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM) && tvisint(o)) {
832 TValue tmp; 881 TValue tmp;
833 snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp); 882 snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp);
834 o->u32.hi = tmp.u32.lo; 883 o->u32.hi = tmp.u32.lo;
884#if !LJ_FR2
835 } else if ((sn & (SNAP_CONT|SNAP_FRAME))) { 885 } else if ((sn & (SNAP_CONT|SNAP_FRAME))) {
836 /* Overwrite tag with frame link. */ 886 /* Overwrite tag with frame link. */
837 o->fr.tp.ftsz = snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0; 887 setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0);
838 L->base = o+1; 888 L->base = o+1;
889#endif
839 } 890 }
840 } 891 }
841 } 892 }
893#if LJ_FR2
894 L->base += (map[nent+LJ_BE] & 0xff);
895#endif
842 lua_assert(map + nent == flinks); 896 lua_assert(map + nent == flinks);
843 897
844 /* Compute current stack top. */ 898 /* Compute current stack top. */
diff --git a/src/lj_state.c b/src/lj_state.c
index ab064266..dc82e260 100644
--- a/src/lj_state.c
+++ b/src/lj_state.c
@@ -12,6 +12,7 @@
12#include "lj_obj.h" 12#include "lj_obj.h"
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_err.h" 14#include "lj_err.h"
15#include "lj_buf.h"
15#include "lj_str.h" 16#include "lj_str.h"
16#include "lj_tab.h" 17#include "lj_tab.h"
17#include "lj_func.h" 18#include "lj_func.h"
@@ -26,6 +27,7 @@
26#include "lj_vm.h" 27#include "lj_vm.h"
27#include "lj_lex.h" 28#include "lj_lex.h"
28#include "lj_alloc.h" 29#include "lj_alloc.h"
30#include "luajit.h"
29 31
30/* -- Stack handling ------------------------------------------------------ */ 32/* -- Stack handling ------------------------------------------------------ */
31 33
@@ -47,6 +49,7 @@
47** one extra slot if mobj is not a function. Only lj_meta_tset needs 5 49** one extra slot if mobj is not a function. Only lj_meta_tset needs 5
48** slots above top, but then mobj is always a function. So we can get by 50** slots above top, but then mobj is always a function. So we can get by
49** with 5 extra slots. 51** with 5 extra slots.
52** LJ_FR2: We need 2 more slots for the frame PC and the continuation PC.
50*/ 53*/
51 54
52/* Resize stack slots and adjust pointers in state. */ 55/* Resize stack slots and adjust pointers in state. */
@@ -59,7 +62,7 @@ static void resizestack(lua_State *L, MSize n)
59 GCobj *up; 62 GCobj *up;
60 lua_assert((MSize)(tvref(L->maxstack)-oldst)==L->stacksize-LJ_STACK_EXTRA-1); 63 lua_assert((MSize)(tvref(L->maxstack)-oldst)==L->stacksize-LJ_STACK_EXTRA-1);
61 st = (TValue *)lj_mem_realloc(L, tvref(L->stack), 64 st = (TValue *)lj_mem_realloc(L, tvref(L->stack),
62 (MSize)(L->stacksize*sizeof(TValue)), 65 (MSize)(oldsize*sizeof(TValue)),
63 (MSize)(realsize*sizeof(TValue))); 66 (MSize)(realsize*sizeof(TValue)));
64 setmref(L->stack, st); 67 setmref(L->stack, st);
65 delta = (char *)st - (char *)oldst; 68 delta = (char *)st - (char *)oldst;
@@ -67,12 +70,12 @@ static void resizestack(lua_State *L, MSize n)
67 while (oldsize < realsize) /* Clear new slots. */ 70 while (oldsize < realsize) /* Clear new slots. */
68 setnilV(st + oldsize++); 71 setnilV(st + oldsize++);
69 L->stacksize = realsize; 72 L->stacksize = realsize;
73 if ((size_t)(mref(G(L)->jit_base, char) - (char *)oldst) < oldsize)
74 setmref(G(L)->jit_base, mref(G(L)->jit_base, char) + delta);
70 L->base = (TValue *)((char *)L->base + delta); 75 L->base = (TValue *)((char *)L->base + delta);
71 L->top = (TValue *)((char *)L->top + delta); 76 L->top = (TValue *)((char *)L->top + delta);
72 for (up = gcref(L->openupval); up != NULL; up = gcnext(up)) 77 for (up = gcref(L->openupval); up != NULL; up = gcnext(up))
73 setmref(gco2uv(up)->v, (TValue *)((char *)uvval(gco2uv(up)) + delta)); 78 setmref(gco2uv(up)->v, (TValue *)((char *)uvval(gco2uv(up)) + delta));
74 if (obj2gco(L) == gcref(G(L)->jit_L))
75 setmref(G(L)->jit_base, mref(G(L)->jit_base, char) + delta);
76} 79}
77 80
78/* Relimit stack after error, in case the limit was overdrawn. */ 81/* Relimit stack after error, in case the limit was overdrawn. */
@@ -89,7 +92,8 @@ void lj_state_shrinkstack(lua_State *L, MSize used)
89 return; /* Avoid stack shrinking while handling stack overflow. */ 92 return; /* Avoid stack shrinking while handling stack overflow. */
90 if (4*used < L->stacksize && 93 if (4*used < L->stacksize &&
91 2*(LJ_STACK_START+LJ_STACK_EXTRA) < L->stacksize && 94 2*(LJ_STACK_START+LJ_STACK_EXTRA) < L->stacksize &&
92 obj2gco(L) != gcref(G(L)->jit_L)) /* Don't shrink stack of live trace. */ 95 /* Don't shrink stack of live trace. */
96 (tvref(G(L)->jit_base) == NULL || obj2gco(L) != gcref(G(L)->cur_L)))
93 resizestack(L, L->stacksize >> 1); 97 resizestack(L, L->stacksize >> 1);
94} 98}
95 99
@@ -125,8 +129,9 @@ static void stack_init(lua_State *L1, lua_State *L)
125 L1->stacksize = LJ_STACK_START + LJ_STACK_EXTRA; 129 L1->stacksize = LJ_STACK_START + LJ_STACK_EXTRA;
126 stend = st + L1->stacksize; 130 stend = st + L1->stacksize;
127 setmref(L1->maxstack, stend - LJ_STACK_EXTRA - 1); 131 setmref(L1->maxstack, stend - LJ_STACK_EXTRA - 1);
128 L1->base = L1->top = st+1; 132 setthreadV(L1, st++, L1); /* Needed for curr_funcisL() on empty stack. */
129 setthreadV(L1, st, L1); /* Needed for curr_funcisL() on empty stack. */ 133 if (LJ_FR2) setnilV(st++);
134 L1->base = L1->top = st;
130 while (st < stend) /* Clear new slots. */ 135 while (st < stend) /* Clear new slots. */
131 setnilV(st++); 136 setnilV(st++);
132} 137}
@@ -164,7 +169,7 @@ static void close_state(lua_State *L)
164 lj_ctype_freestate(g); 169 lj_ctype_freestate(g);
165#endif 170#endif
166 lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef); 171 lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef);
167 lj_str_freebuf(g, &g->tmpbuf); 172 lj_buf_free(g, &g->tmpbuf);
168 lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue); 173 lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue);
169 lua_assert(g->gc.total == sizeof(GG_State)); 174 lua_assert(g->gc.total == sizeof(GG_State));
170#ifndef LUAJIT_USE_SYSMALLOC 175#ifndef LUAJIT_USE_SYSMALLOC
@@ -175,7 +180,7 @@ static void close_state(lua_State *L)
175 g->allocf(g->allocd, G2GG(g), sizeof(GG_State), 0); 180 g->allocf(g->allocd, G2GG(g), sizeof(GG_State), 0);
176} 181}
177 182
178#if LJ_64 && !(defined(LUAJIT_USE_VALGRIND) && defined(LUAJIT_USE_SYSMALLOC)) 183#if LJ_64 && !LJ_GC64 && !(defined(LUAJIT_USE_VALGRIND) && defined(LUAJIT_USE_SYSMALLOC))
179lua_State *lj_state_newstate(lua_Alloc f, void *ud) 184lua_State *lj_state_newstate(lua_Alloc f, void *ud)
180#else 185#else
181LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) 186LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
@@ -184,7 +189,7 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
184 GG_State *GG = (GG_State *)f(ud, NULL, 0, sizeof(GG_State)); 189 GG_State *GG = (GG_State *)f(ud, NULL, 0, sizeof(GG_State));
185 lua_State *L = &GG->L; 190 lua_State *L = &GG->L;
186 global_State *g = &GG->g; 191 global_State *g = &GG->g;
187 if (GG == NULL || !checkptr32(GG)) return NULL; 192 if (GG == NULL || !checkptrGC(GG)) return NULL;
188 memset(GG, 0, sizeof(GG_State)); 193 memset(GG, 0, sizeof(GG_State));
189 L->gct = ~LJ_TTHREAD; 194 L->gct = ~LJ_TTHREAD;
190 L->marked = LJ_GC_WHITE0 | LJ_GC_FIXED | LJ_GC_SFIXED; /* Prevent free. */ 195 L->marked = LJ_GC_WHITE0 | LJ_GC_FIXED | LJ_GC_SFIXED; /* Prevent free. */
@@ -202,8 +207,10 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
202 setnilV(registry(L)); 207 setnilV(registry(L));
203 setnilV(&g->nilnode.val); 208 setnilV(&g->nilnode.val);
204 setnilV(&g->nilnode.key); 209 setnilV(&g->nilnode.key);
210#if !LJ_GC64
205 setmref(g->nilnode.freetop, &g->nilnode); 211 setmref(g->nilnode.freetop, &g->nilnode);
206 lj_str_initbuf(&g->tmpbuf); 212#endif
213 lj_buf_init(NULL, &g->tmpbuf);
207 g->gc.state = GCSpause; 214 g->gc.state = GCSpause;
208 setgcref(g->gc.root, obj2gco(L)); 215 setgcref(g->gc.root, obj2gco(L));
209 setmref(g->gc.sweep, &g->gc.root); 216 setmref(g->gc.sweep, &g->gc.root);
@@ -217,7 +224,7 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
217 close_state(L); 224 close_state(L);
218 return NULL; 225 return NULL;
219 } 226 }
220 L->status = 0; 227 L->status = LUA_OK;
221 return L; 228 return L;
222} 229}
223 230
@@ -236,6 +243,10 @@ LUA_API void lua_close(lua_State *L)
236 global_State *g = G(L); 243 global_State *g = G(L);
237 int i; 244 int i;
238 L = mainthread(g); /* Only the main thread can be closed. */ 245 L = mainthread(g); /* Only the main thread can be closed. */
246#if LJ_HASPROFILE
247 luaJIT_profile_stop(L);
248#endif
249 setgcrefnull(g->cur_L);
239 lj_func_closeuv(L, tvref(L->stack)); 250 lj_func_closeuv(L, tvref(L->stack));
240 lj_gc_separateudata(g, 1); /* Separate udata which have GC metamethods. */ 251 lj_gc_separateudata(g, 1); /* Separate udata which have GC metamethods. */
241#if LJ_HASJIT 252#if LJ_HASJIT
@@ -245,10 +256,10 @@ LUA_API void lua_close(lua_State *L)
245#endif 256#endif
246 for (i = 0;;) { 257 for (i = 0;;) {
247 hook_enter(g); 258 hook_enter(g);
248 L->status = 0; 259 L->status = LUA_OK;
260 L->base = L->top = tvref(L->stack) + 1 + LJ_FR2;
249 L->cframe = NULL; 261 L->cframe = NULL;
250 L->base = L->top = tvref(L->stack) + 1; 262 if (lj_vm_cpcall(L, NULL, NULL, cpfinalize) == LUA_OK) {
251 if (lj_vm_cpcall(L, NULL, NULL, cpfinalize) == 0) {
252 if (++i >= 10) break; 263 if (++i >= 10) break;
253 lj_gc_separateudata(g, 1); /* Separate udata again. */ 264 lj_gc_separateudata(g, 1); /* Separate udata again. */
254 if (gcref(g->gc.mmudata) == NULL) /* Until nothing is left to do. */ 265 if (gcref(g->gc.mmudata) == NULL) /* Until nothing is left to do. */
@@ -263,7 +274,7 @@ lua_State *lj_state_new(lua_State *L)
263 lua_State *L1 = lj_mem_newobj(L, lua_State); 274 lua_State *L1 = lj_mem_newobj(L, lua_State);
264 L1->gct = ~LJ_TTHREAD; 275 L1->gct = ~LJ_TTHREAD;
265 L1->dummy_ffid = FF_C; 276 L1->dummy_ffid = FF_C;
266 L1->status = 0; 277 L1->status = LUA_OK;
267 L1->stacksize = 0; 278 L1->stacksize = 0;
268 setmref(L1->stack, NULL); 279 setmref(L1->stack, NULL);
269 L1->cframe = NULL; 280 L1->cframe = NULL;
@@ -279,6 +290,8 @@ lua_State *lj_state_new(lua_State *L)
279void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L) 290void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L)
280{ 291{
281 lua_assert(L != mainthread(g)); 292 lua_assert(L != mainthread(g));
293 if (obj2gco(L) == gcref(g->cur_L))
294 setgcrefnull(g->cur_L);
282 lj_func_closeuv(L, tvref(L->stack)); 295 lj_func_closeuv(L, tvref(L->stack));
283 lua_assert(gcref(L->openupval) == NULL); 296 lua_assert(gcref(L->openupval) == NULL);
284 lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue); 297 lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue);
diff --git a/src/lj_state.h b/src/lj_state.h
index 18afe55c..9a8c7d93 100644
--- a/src/lj_state.h
+++ b/src/lj_state.h
@@ -28,7 +28,7 @@ static LJ_AINLINE void lj_state_checkstack(lua_State *L, MSize need)
28 28
29LJ_FUNC lua_State *lj_state_new(lua_State *L); 29LJ_FUNC lua_State *lj_state_new(lua_State *L);
30LJ_FUNC void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L); 30LJ_FUNC void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L);
31#if LJ_64 31#if LJ_64 && !LJ_GC64 && !(defined(LUAJIT_USE_VALGRIND) && defined(LUAJIT_USE_SYSMALLOC))
32LJ_FUNC lua_State *lj_state_newstate(lua_Alloc f, void *ud); 32LJ_FUNC lua_State *lj_state_newstate(lua_Alloc f, void *ud);
33#endif 33#endif
34 34
diff --git a/src/lj_str.c b/src/lj_str.c
index 279c5cc3..ec74afa5 100644
--- a/src/lj_str.c
+++ b/src/lj_str.c
@@ -1,13 +1,8 @@
1/* 1/*
2** String handling. 2** String handling.
3** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h 3** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h
4**
5** Portions taken verbatim or adapted from the Lua interpreter.
6** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
7*/ 4*/
8 5
9#include <stdio.h>
10
11#define lj_str_c 6#define lj_str_c
12#define LUA_CORE 7#define LUA_CORE
13 8
@@ -15,10 +10,9 @@
15#include "lj_gc.h" 10#include "lj_gc.h"
16#include "lj_err.h" 11#include "lj_err.h"
17#include "lj_str.h" 12#include "lj_str.h"
18#include "lj_state.h"
19#include "lj_char.h" 13#include "lj_char.h"
20 14
21/* -- String interning ---------------------------------------------------- */ 15/* -- String helpers ------------------------------------------------------ */
22 16
23/* Ordered compare of strings. Assumes string data is 4-byte aligned. */ 17/* Ordered compare of strings. Assumes string data is 4-byte aligned. */
24int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b) 18int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
@@ -64,6 +58,40 @@ static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len)
64 return 0; 58 return 0;
65} 59}
66 60
61/* Find fixed string p inside string s. */
62const char *lj_str_find(const char *s, const char *p, MSize slen, MSize plen)
63{
64 if (plen <= slen) {
65 if (plen == 0) {
66 return s;
67 } else {
68 int c = *(const uint8_t *)p++;
69 plen--; slen -= plen;
70 while (slen) {
71 const char *q = (const char *)memchr(s, c, slen);
72 if (!q) break;
73 if (memcmp(q+1, p, plen) == 0) return q;
74 q++; slen -= (MSize)(q-s); s = q;
75 }
76 }
77 }
78 return NULL;
79}
80
81/* Check whether a string has a pattern matching character. */
82int lj_str_haspattern(GCstr *s)
83{
84 const char *p = strdata(s), *q = p + s->len;
85 while (p < q) {
86 int c = *(const uint8_t *)p++;
87 if (lj_char_ispunct(c) && strchr("^$*+?.([%-", c))
88 return 1; /* Found a pattern matching char. */
89 }
90 return 0; /* No pattern matching chars found. */
91}
92
93/* -- String interning ---------------------------------------------------- */
94
67/* Resize the string hash table (grow and shrink). */ 95/* Resize the string hash table (grow and shrink). */
68void lj_str_resize(lua_State *L, MSize newmask) 96void lj_str_resize(lua_State *L, MSize newmask)
69{ 97{
@@ -167,173 +195,3 @@ void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s)
167 lj_mem_free(g, s, sizestring(s)); 195 lj_mem_free(g, s, sizestring(s));
168} 196}
169 197
170/* -- Type conversions ---------------------------------------------------- */
171
172/* Print number to buffer. Canonicalizes non-finite values. */
173size_t LJ_FASTCALL lj_str_bufnum(char *s, cTValue *o)
174{
175 if (LJ_LIKELY((o->u32.hi << 1) < 0xffe00000)) { /* Finite? */
176 lua_Number n = o->n;
177#if __BIONIC__
178 if (tvismzero(o)) { s[0] = '-'; s[1] = '0'; return 2; }
179#endif
180 return (size_t)lua_number2str(s, n);
181 } else if (((o->u32.hi & 0x000fffff) | o->u32.lo) != 0) {
182 s[0] = 'n'; s[1] = 'a'; s[2] = 'n'; return 3;
183 } else if ((o->u32.hi & 0x80000000) == 0) {
184 s[0] = 'i'; s[1] = 'n'; s[2] = 'f'; return 3;
185 } else {
186 s[0] = '-'; s[1] = 'i'; s[2] = 'n'; s[3] = 'f'; return 4;
187 }
188}
189
190/* Print integer to buffer. Returns pointer to start. */
191char * LJ_FASTCALL lj_str_bufint(char *p, int32_t k)
192{
193 uint32_t u = (uint32_t)(k < 0 ? -k : k);
194 p += 1+10;
195 do { *--p = (char)('0' + u % 10); } while (u /= 10);
196 if (k < 0) *--p = '-';
197 return p;
198}
199
200/* Convert number to string. */
201GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np)
202{
203 char buf[LJ_STR_NUMBUF];
204 size_t len = lj_str_bufnum(buf, (TValue *)np);
205 return lj_str_new(L, buf, len);
206}
207
208/* Convert integer to string. */
209GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k)
210{
211 char s[1+10];
212 char *p = lj_str_bufint(s, k);
213 return lj_str_new(L, p, (size_t)(s+sizeof(s)-p));
214}
215
216GCstr * LJ_FASTCALL lj_str_fromnumber(lua_State *L, cTValue *o)
217{
218 return tvisint(o) ? lj_str_fromint(L, intV(o)) : lj_str_fromnum(L, &o->n);
219}
220
221/* -- String formatting --------------------------------------------------- */
222
223static void addstr(lua_State *L, SBuf *sb, const char *str, MSize len)
224{
225 char *p;
226 MSize i;
227 if (sb->n + len > sb->sz) {
228 MSize sz = sb->sz * 2;
229 while (sb->n + len > sz) sz = sz * 2;
230 lj_str_resizebuf(L, sb, sz);
231 }
232 p = sb->buf + sb->n;
233 sb->n += len;
234 for (i = 0; i < len; i++) p[i] = str[i];
235}
236
237static void addchar(lua_State *L, SBuf *sb, int c)
238{
239 if (sb->n + 1 > sb->sz) {
240 MSize sz = sb->sz * 2;
241 lj_str_resizebuf(L, sb, sz);
242 }
243 sb->buf[sb->n++] = (char)c;
244}
245
246/* Push formatted message as a string object to Lua stack. va_list variant. */
247const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp)
248{
249 SBuf *sb = &G(L)->tmpbuf;
250 lj_str_needbuf(L, sb, (MSize)strlen(fmt));
251 lj_str_resetbuf(sb);
252 for (;;) {
253 const char *e = strchr(fmt, '%');
254 if (e == NULL) break;
255 addstr(L, sb, fmt, (MSize)(e-fmt));
256 /* This function only handles %s, %c, %d, %f and %p formats. */
257 switch (e[1]) {
258 case 's': {
259 const char *s = va_arg(argp, char *);
260 if (s == NULL) s = "(null)";
261 addstr(L, sb, s, (MSize)strlen(s));
262 break;
263 }
264 case 'c':
265 addchar(L, sb, va_arg(argp, int));
266 break;
267 case 'd': {
268 char buf[LJ_STR_INTBUF];
269 char *p = lj_str_bufint(buf, va_arg(argp, int32_t));
270 addstr(L, sb, p, (MSize)(buf+LJ_STR_INTBUF-p));
271 break;
272 }
273 case 'f': {
274 char buf[LJ_STR_NUMBUF];
275 TValue tv;
276 MSize len;
277 tv.n = (lua_Number)(va_arg(argp, LUAI_UACNUMBER));
278 len = (MSize)lj_str_bufnum(buf, &tv);
279 addstr(L, sb, buf, len);
280 break;
281 }
282 case 'p': {
283#define FMTP_CHARS (2*sizeof(ptrdiff_t))
284 char buf[2+FMTP_CHARS];
285 ptrdiff_t p = (ptrdiff_t)(va_arg(argp, void *));
286 ptrdiff_t i, lasti = 2+FMTP_CHARS;
287 if (p == 0) {
288 addstr(L, sb, "NULL", 4);
289 break;
290 }
291#if LJ_64
292 /* Shorten output for 64 bit pointers. */
293 lasti = 2+2*4+((p >> 32) ? 2+2*(lj_fls((uint32_t)(p >> 32))>>3) : 0);
294#endif
295 buf[0] = '0';
296 buf[1] = 'x';
297 for (i = lasti-1; i >= 2; i--, p >>= 4)
298 buf[i] = "0123456789abcdef"[(p & 15)];
299 addstr(L, sb, buf, (MSize)lasti);
300 break;
301 }
302 case '%':
303 addchar(L, sb, '%');
304 break;
305 default:
306 addchar(L, sb, '%');
307 addchar(L, sb, e[1]);
308 break;
309 }
310 fmt = e+2;
311 }
312 addstr(L, sb, fmt, (MSize)strlen(fmt));
313 setstrV(L, L->top, lj_str_new(L, sb->buf, sb->n));
314 incr_top(L);
315 return strVdata(L->top - 1);
316}
317
318/* Push formatted message as a string object to Lua stack. Vararg variant. */
319const char *lj_str_pushf(lua_State *L, const char *fmt, ...)
320{
321 const char *msg;
322 va_list argp;
323 va_start(argp, fmt);
324 msg = lj_str_pushvf(L, fmt, argp);
325 va_end(argp);
326 return msg;
327}
328
329/* -- Buffer handling ----------------------------------------------------- */
330
331char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz)
332{
333 if (sz > sb->sz) {
334 if (sz < LJ_MIN_SBUF) sz = LJ_MIN_SBUF;
335 lj_str_resizebuf(L, sb, sz);
336 }
337 return sb->buf;
338}
339
diff --git a/src/lj_str.h b/src/lj_str.h
index e7687cb1..2e9bfc1d 100644
--- a/src/lj_str.h
+++ b/src/lj_str.h
@@ -10,8 +10,13 @@
10 10
11#include "lj_obj.h" 11#include "lj_obj.h"
12 12
13/* String interning. */ 13/* String helpers. */
14LJ_FUNC int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b); 14LJ_FUNC int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b);
15LJ_FUNC const char *lj_str_find(const char *s, const char *f,
16 MSize slen, MSize flen);
17LJ_FUNC int lj_str_haspattern(GCstr *s);
18
19/* String interning. */
15LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask); 20LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask);
16LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len); 21LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len);
17LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s); 22LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s);
@@ -19,32 +24,4 @@ LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s);
19#define lj_str_newz(L, s) (lj_str_new(L, s, strlen(s))) 24#define lj_str_newz(L, s) (lj_str_new(L, s, strlen(s)))
20#define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1)) 25#define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1))
21 26
22/* Type conversions. */
23LJ_FUNC size_t LJ_FASTCALL lj_str_bufnum(char *s, cTValue *o);
24LJ_FUNC char * LJ_FASTCALL lj_str_bufint(char *p, int32_t k);
25LJ_FUNCA GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np);
26LJ_FUNC GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k);
27LJ_FUNCA GCstr * LJ_FASTCALL lj_str_fromnumber(lua_State *L, cTValue *o);
28
29#define LJ_STR_INTBUF (1+10)
30#define LJ_STR_NUMBUF LUAI_MAXNUMBER2STR
31
32/* String formatting. */
33LJ_FUNC const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp);
34LJ_FUNC const char *lj_str_pushf(lua_State *L, const char *fmt, ...)
35#if defined(__GNUC__)
36 __attribute__ ((format (printf, 2, 3)))
37#endif
38 ;
39
40/* Resizable string buffers. Struct definition in lj_obj.h. */
41LJ_FUNC char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz);
42
43#define lj_str_initbuf(sb) ((sb)->buf = NULL, (sb)->sz = 0)
44#define lj_str_resetbuf(sb) ((sb)->n = 0)
45#define lj_str_resizebuf(L, sb, size) \
46 ((sb)->buf = (char *)lj_mem_realloc(L, (sb)->buf, (sb)->sz, (size)), \
47 (sb)->sz = (size))
48#define lj_str_freebuf(g, sb) lj_mem_free(g, (void *)(sb)->buf, (sb)->sz)
49
50#endif 27#endif
diff --git a/src/lj_strfmt.c b/src/lj_strfmt.c
new file mode 100644
index 00000000..8f968d32
--- /dev/null
+++ b/src/lj_strfmt.c
@@ -0,0 +1,472 @@
1/*
2** String formatting.
3** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#include <stdio.h>
7
8#define lj_strfmt_c
9#define LUA_CORE
10
11#include "lj_obj.h"
12#include "lj_buf.h"
13#include "lj_str.h"
14#include "lj_state.h"
15#include "lj_char.h"
16#include "lj_strfmt.h"
17
18/* -- Format parser ------------------------------------------------------- */
19
20static const uint8_t strfmt_map[('x'-'A')+1] = {
21 STRFMT_A,0,0,0,STRFMT_E,STRFMT_F,STRFMT_G,0,0,0,0,0,0,
22 0,0,0,0,0,0,0,0,0,0,STRFMT_X,0,0,
23 0,0,0,0,0,0,
24 STRFMT_A,0,STRFMT_C,STRFMT_D,STRFMT_E,STRFMT_F,STRFMT_G,0,STRFMT_I,0,0,0,0,
25 0,STRFMT_O,STRFMT_P,STRFMT_Q,0,STRFMT_S,0,STRFMT_U,0,0,STRFMT_X
26};
27
28SFormat LJ_FASTCALL lj_strfmt_parse(FormatState *fs)
29{
30 const uint8_t *p = fs->p, *e = fs->e;
31 fs->str = (const char *)p;
32 for (; p < e; p++) {
33 if (*p == '%') { /* Escape char? */
34 if (p[1] == '%') { /* '%%'? */
35 fs->p = ++p+1;
36 goto retlit;
37 } else {
38 SFormat sf = 0;
39 uint32_t c;
40 if (p != (const uint8_t *)fs->str)
41 break;
42 for (p++; (uint32_t)*p - ' ' <= (uint32_t)('0' - ' '); p++) {
43 /* Parse flags. */
44 if (*p == '-') sf |= STRFMT_F_LEFT;
45 else if (*p == '+') sf |= STRFMT_F_PLUS;
46 else if (*p == '0') sf |= STRFMT_F_ZERO;
47 else if (*p == ' ') sf |= STRFMT_F_SPACE;
48 else if (*p == '#') sf |= STRFMT_F_ALT;
49 else break;
50 }
51 if ((uint32_t)*p - '0' < 10) { /* Parse width. */
52 uint32_t width = (uint32_t)*p++ - '0';
53 if ((uint32_t)*p - '0' < 10)
54 width = (uint32_t)*p++ - '0' + width*10;
55 sf |= (width << STRFMT_SH_WIDTH);
56 }
57 if (*p == '.') { /* Parse precision. */
58 uint32_t prec = 0;
59 p++;
60 if ((uint32_t)*p - '0' < 10) {
61 prec = (uint32_t)*p++ - '0';
62 if ((uint32_t)*p - '0' < 10)
63 prec = (uint32_t)*p++ - '0' + prec*10;
64 }
65 sf |= ((prec+1) << STRFMT_SH_PREC);
66 }
67 /* Parse conversion. */
68 c = (uint32_t)*p - 'A';
69 if (LJ_LIKELY(c <= (uint32_t)('x' - 'A'))) {
70 uint32_t sx = strfmt_map[c];
71 if (sx) {
72 fs->p = p+1;
73 return (sf | sx | ((c & 0x20) ? 0 : STRFMT_F_UPPER));
74 }
75 }
76 /* Return error location. */
77 if (*p >= 32) p++;
78 fs->len = (MSize)(p - (const uint8_t *)fs->str);
79 fs->p = fs->e;
80 return STRFMT_ERR;
81 }
82 }
83 }
84 fs->p = p;
85retlit:
86 fs->len = (MSize)(p - (const uint8_t *)fs->str);
87 return fs->len ? STRFMT_LIT : STRFMT_EOF;
88}
89
90/* -- Raw conversions ----------------------------------------------------- */
91
92#define WINT_R(x, sh, sc) \
93 { uint32_t d = (x*(((1<<sh)+sc-1)/sc))>>sh; x -= d*sc; *p++ = (char)('0'+d); }
94
95/* Write integer to buffer. */
96char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k)
97{
98 uint32_t u = (uint32_t)k;
99 if (k < 0) { u = (uint32_t)-k; *p++ = '-'; }
100 if (u < 10000) {
101 if (u < 10) goto dig1;
102 if (u < 100) goto dig2;
103 if (u < 1000) goto dig3;
104 } else {
105 uint32_t v = u / 10000; u -= v * 10000;
106 if (v < 10000) {
107 if (v < 10) goto dig5;
108 if (v < 100) goto dig6;
109 if (v < 1000) goto dig7;
110 } else {
111 uint32_t w = v / 10000; v -= w * 10000;
112 if (w >= 10) WINT_R(w, 10, 10)
113 *p++ = (char)('0'+w);
114 }
115 WINT_R(v, 23, 1000)
116 dig7: WINT_R(v, 12, 100)
117 dig6: WINT_R(v, 10, 10)
118 dig5: *p++ = (char)('0'+v);
119 }
120 WINT_R(u, 23, 1000)
121 dig3: WINT_R(u, 12, 100)
122 dig2: WINT_R(u, 10, 10)
123 dig1: *p++ = (char)('0'+u);
124 return p;
125}
126#undef WINT_R
127
128/* Write pointer to buffer. */
129char * LJ_FASTCALL lj_strfmt_wptr(char *p, const void *v)
130{
131 ptrdiff_t x = (ptrdiff_t)v;
132 MSize i, n = STRFMT_MAXBUF_PTR;
133 if (x == 0) {
134 *p++ = 'N'; *p++ = 'U'; *p++ = 'L'; *p++ = 'L';
135 return p;
136 }
137#if LJ_64
138 /* Shorten output for 64 bit pointers. */
139 n = 2+2*4+((x >> 32) ? 2+2*(lj_fls((uint32_t)(x >> 32))>>3) : 0);
140#endif
141 p[0] = '0';
142 p[1] = 'x';
143 for (i = n-1; i >= 2; i--, x >>= 4)
144 p[i] = "0123456789abcdef"[(x & 15)];
145 return p+n;
146}
147
148/* Write ULEB128 to buffer. */
149char * LJ_FASTCALL lj_strfmt_wuleb128(char *p, uint32_t v)
150{
151 for (; v >= 0x80; v >>= 7)
152 *p++ = (char)((v & 0x7f) | 0x80);
153 *p++ = (char)v;
154 return p;
155}
156
157/* Return string or write number to tmp buffer and return pointer to start. */
158const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp)
159{
160 SBuf *sb;
161 if (tvisstr(o)) {
162 *lenp = strV(o)->len;
163 return strVdata(o);
164 } else if (tvisint(o)) {
165 sb = lj_strfmt_putint(lj_buf_tmp_(L), intV(o));
166 } else if (tvisnum(o)) {
167 sb = lj_strfmt_putfnum(lj_buf_tmp_(L), STRFMT_G14, o->n);
168 } else {
169 return NULL;
170 }
171 *lenp = sbuflen(sb);
172 return sbufB(sb);
173}
174
175/* -- Unformatted conversions to buffer ----------------------------------- */
176
177/* Add integer to buffer. */
178SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k)
179{
180 setsbufP(sb, lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT), k));
181 return sb;
182}
183
184#if LJ_HASJIT
185/* Add number to buffer. */
186SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o)
187{
188 return lj_strfmt_putfnum(sb, STRFMT_G14, o->n);
189}
190#endif
191
192SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v)
193{
194 setsbufP(sb, lj_strfmt_wptr(lj_buf_more(sb, STRFMT_MAXBUF_PTR), v));
195 return sb;
196}
197
198/* Add quoted string to buffer. */
199SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str)
200{
201 const char *s = strdata(str);
202 MSize len = str->len;
203 lj_buf_putb(sb, '"');
204 while (len--) {
205 uint32_t c = (uint32_t)(uint8_t)*s++;
206 char *p = lj_buf_more(sb, 4);
207 if (c == '"' || c == '\\' || c == '\n') {
208 *p++ = '\\';
209 } else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */
210 uint32_t d;
211 *p++ = '\\';
212 if (c >= 100 || lj_char_isdigit((uint8_t)*s)) {
213 *p++ = (char)('0'+(c >= 100)); if (c >= 100) c -= 100;
214 goto tens;
215 } else if (c >= 10) {
216 tens:
217 d = (c * 205) >> 11; c -= d * 10; *p++ = (char)('0'+d);
218 }
219 c += '0';
220 }
221 *p++ = (char)c;
222 setsbufP(sb, p);
223 }
224 lj_buf_putb(sb, '"');
225 return sb;
226}
227
228/* -- Formatted conversions to buffer ------------------------------------- */
229
230/* Add formatted char to buffer. */
231SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat sf, int32_t c)
232{
233 MSize width = STRFMT_WIDTH(sf);
234 char *p = lj_buf_more(sb, width > 1 ? width : 1);
235 if ((sf & STRFMT_F_LEFT)) *p++ = (char)c;
236 while (width-- > 1) *p++ = ' ';
237 if (!(sf & STRFMT_F_LEFT)) *p++ = (char)c;
238 setsbufP(sb, p);
239 return sb;
240}
241
242/* Add formatted string to buffer. */
243SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat sf, GCstr *str)
244{
245 MSize len = str->len <= STRFMT_PREC(sf) ? str->len : STRFMT_PREC(sf);
246 MSize width = STRFMT_WIDTH(sf);
247 char *p = lj_buf_more(sb, width > len ? width : len);
248 if ((sf & STRFMT_F_LEFT)) p = lj_buf_wmem(p, strdata(str), len);
249 while (width-- > len) *p++ = ' ';
250 if (!(sf & STRFMT_F_LEFT)) p = lj_buf_wmem(p, strdata(str), len);
251 setsbufP(sb, p);
252 return sb;
253}
254
255/* Add formatted signed/unsigned integer to buffer. */
256SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k)
257{
258 char buf[STRFMT_MAXBUF_XINT], *q = buf + sizeof(buf), *p;
259#ifdef LUA_USE_ASSERT
260 char *ps;
261#endif
262 MSize prefix = 0, len, prec, pprec, width, need;
263
264 /* Figure out signed prefixes. */
265 if (STRFMT_TYPE(sf) == STRFMT_INT) {
266 if ((int64_t)k < 0) {
267 k = (uint64_t)-(int64_t)k;
268 prefix = 256 + '-';
269 } else if ((sf & STRFMT_F_PLUS)) {
270 prefix = 256 + '+';
271 } else if ((sf & STRFMT_F_SPACE)) {
272 prefix = 256 + ' ';
273 }
274 }
275
276 /* Convert number and store to fixed-size buffer in reverse order. */
277 prec = STRFMT_PREC(sf);
278 if ((int32_t)prec >= 0) sf &= ~STRFMT_F_ZERO;
279 if (k == 0) { /* Special-case zero argument. */
280 if (prec != 0 ||
281 (sf & (STRFMT_T_OCT|STRFMT_F_ALT)) == (STRFMT_T_OCT|STRFMT_F_ALT))
282 *--q = '0';
283 } else if (!(sf & (STRFMT_T_HEX|STRFMT_T_OCT))) { /* Decimal. */
284 uint32_t k2;
285 while ((k >> 32)) { *--q = (char)('0' + k % 10); k /= 10; }
286 k2 = (uint32_t)k;
287 do { *--q = (char)('0' + k2 % 10); k2 /= 10; } while (k2);
288 } else if ((sf & STRFMT_T_HEX)) { /* Hex. */
289 const char *hexdig = (sf & STRFMT_F_UPPER) ? "0123456789ABCDEF" :
290 "0123456789abcdef";
291 do { *--q = hexdig[(k & 15)]; k >>= 4; } while (k);
292 if ((sf & STRFMT_F_ALT)) prefix = 512 + ((sf & STRFMT_F_UPPER) ? 'X' : 'x');
293 } else { /* Octal. */
294 do { *--q = (char)('0' + (uint32_t)(k & 7)); k >>= 3; } while (k);
295 if ((sf & STRFMT_F_ALT)) *--q = '0';
296 }
297
298 /* Calculate sizes. */
299 len = (MSize)(buf + sizeof(buf) - q);
300 if ((int32_t)len >= (int32_t)prec) prec = len;
301 width = STRFMT_WIDTH(sf);
302 pprec = prec + (prefix >> 8);
303 need = width > pprec ? width : pprec;
304 p = lj_buf_more(sb, need);
305#ifdef LUA_USE_ASSERT
306 ps = p;
307#endif
308
309 /* Format number with leading/trailing whitespace and zeros. */
310 if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == 0)
311 while (width-- > pprec) *p++ = ' ';
312 if (prefix) {
313 if ((char)prefix >= 'X') *p++ = '0';
314 *p++ = (char)prefix;
315 }
316 if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == STRFMT_F_ZERO)
317 while (width-- > pprec) *p++ = '0';
318 while (prec-- > len) *p++ = '0';
319 while (q < buf + sizeof(buf)) *p++ = *q++; /* Add number itself. */
320 if ((sf & STRFMT_F_LEFT))
321 while (width-- > pprec) *p++ = ' ';
322
323 lua_assert(need == (MSize)(p - ps));
324 setsbufP(sb, p);
325 return sb;
326}
327
328/* Add number formatted as signed integer to buffer. */
329SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n)
330{
331 int64_t k = (int64_t)n;
332 if (checki32(k) && sf == STRFMT_INT)
333 return lj_strfmt_putint(sb, (int32_t)k); /* Shortcut for plain %d. */
334 else
335 return lj_strfmt_putfxint(sb, sf, (uint64_t)k);
336}
337
338/* Add number formatted as unsigned integer to buffer. */
339SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n)
340{
341 int64_t k;
342 if (n >= 9223372036854775808.0)
343 k = (int64_t)(n - 18446744073709551616.0);
344 else
345 k = (int64_t)n;
346 return lj_strfmt_putfxint(sb, sf, (uint64_t)k);
347}
348
349/* -- Conversions to strings ---------------------------------------------- */
350
351/* Convert integer to string. */
352GCstr * LJ_FASTCALL lj_strfmt_int(lua_State *L, int32_t k)
353{
354 char buf[STRFMT_MAXBUF_INT];
355 MSize len = (MSize)(lj_strfmt_wint(buf, k) - buf);
356 return lj_str_new(L, buf, len);
357}
358
359/* Convert integer or number to string. */
360GCstr * LJ_FASTCALL lj_strfmt_number(lua_State *L, cTValue *o)
361{
362 return tvisint(o) ? lj_strfmt_int(L, intV(o)) : lj_strfmt_num(L, o);
363}
364
365#if LJ_HASJIT
366/* Convert char value to string. */
367GCstr * LJ_FASTCALL lj_strfmt_char(lua_State *L, int c)
368{
369 char buf[1];
370 buf[0] = c;
371 return lj_str_new(L, buf, 1);
372}
373#endif
374
375/* Raw conversion of object to string. */
376GCstr * LJ_FASTCALL lj_strfmt_obj(lua_State *L, cTValue *o)
377{
378 if (tvisstr(o)) {
379 return strV(o);
380 } else if (tvisnumber(o)) {
381 return lj_strfmt_number(L, o);
382 } else if (tvisnil(o)) {
383 return lj_str_newlit(L, "nil");
384 } else if (tvisfalse(o)) {
385 return lj_str_newlit(L, "false");
386 } else if (tvistrue(o)) {
387 return lj_str_newlit(L, "true");
388 } else {
389 char buf[8+2+2+16], *p = buf;
390 p = lj_buf_wmem(p, lj_typename(o), (MSize)strlen(lj_typename(o)));
391 *p++ = ':'; *p++ = ' ';
392 if (tvisfunc(o) && isffunc(funcV(o))) {
393 p = lj_buf_wmem(p, "builtin#", 8);
394 p = lj_strfmt_wint(p, funcV(o)->c.ffid);
395 } else {
396 p = lj_strfmt_wptr(p, lj_obj_ptr(o));
397 }
398 return lj_str_new(L, buf, (size_t)(p - buf));
399 }
400}
401
402/* -- Internal string formatting ------------------------------------------ */
403
404/*
405** These functions are only used for lua_pushfstring(), lua_pushvfstring()
406** and for internal string formatting (e.g. error messages). Caveat: unlike
407** string.format(), only a limited subset of formats and flags are supported!
408**
409** LuaJIT has support for a couple more formats than Lua 5.1/5.2:
410** - %d %u %o %x with full formatting, 32 bit integers only.
411** - %f and other FP formats are really %.14g.
412** - %s %c %p without formatting.
413*/
414
415/* Push formatted message as a string object to Lua stack. va_list variant. */
416const char *lj_strfmt_pushvf(lua_State *L, const char *fmt, va_list argp)
417{
418 SBuf *sb = lj_buf_tmp_(L);
419 FormatState fs;
420 SFormat sf;
421 GCstr *str;
422 lj_strfmt_init(&fs, fmt, (MSize)strlen(fmt));
423 while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) {
424 switch (STRFMT_TYPE(sf)) {
425 case STRFMT_LIT:
426 lj_buf_putmem(sb, fs.str, fs.len);
427 break;
428 case STRFMT_INT:
429 lj_strfmt_putfxint(sb, sf, va_arg(argp, int32_t));
430 break;
431 case STRFMT_UINT:
432 lj_strfmt_putfxint(sb, sf, va_arg(argp, uint32_t));
433 break;
434 case STRFMT_NUM:
435 lj_strfmt_putfnum(sb, STRFMT_G14, va_arg(argp, lua_Number));
436 break;
437 case STRFMT_STR: {
438 const char *s = va_arg(argp, char *);
439 if (s == NULL) s = "(null)";
440 lj_buf_putmem(sb, s, (MSize)strlen(s));
441 break;
442 }
443 case STRFMT_CHAR:
444 lj_buf_putb(sb, va_arg(argp, int));
445 break;
446 case STRFMT_PTR:
447 lj_strfmt_putptr(sb, va_arg(argp, void *));
448 break;
449 case STRFMT_ERR:
450 default:
451 lj_buf_putb(sb, '?');
452 lua_assert(0);
453 break;
454 }
455 }
456 str = lj_buf_str(L, sb);
457 setstrV(L, L->top, str);
458 incr_top(L);
459 return strdata(str);
460}
461
462/* Push formatted message as a string object to Lua stack. Vararg variant. */
463const char *lj_strfmt_pushf(lua_State *L, const char *fmt, ...)
464{
465 const char *msg;
466 va_list argp;
467 va_start(argp, fmt);
468 msg = lj_strfmt_pushvf(L, fmt, argp);
469 va_end(argp);
470 return msg;
471}
472
diff --git a/src/lj_strfmt.h b/src/lj_strfmt.h
new file mode 100644
index 00000000..339f8e15
--- /dev/null
+++ b/src/lj_strfmt.h
@@ -0,0 +1,125 @@
1/*
2** String formatting.
3** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_STRFMT_H
7#define _LJ_STRFMT_H
8
9#include "lj_obj.h"
10
11typedef uint32_t SFormat; /* Format indicator. */
12
13/* Format parser state. */
14typedef struct FormatState {
15 const uint8_t *p; /* Current format string pointer. */
16 const uint8_t *e; /* End of format string. */
17 const char *str; /* Returned literal string. */
18 MSize len; /* Size of literal string. */
19} FormatState;
20
21/* Format types (max. 16). */
22typedef enum FormatType {
23 STRFMT_EOF, STRFMT_ERR, STRFMT_LIT,
24 STRFMT_INT, STRFMT_UINT, STRFMT_NUM, STRFMT_STR, STRFMT_CHAR, STRFMT_PTR
25} FormatType;
26
27/* Format subtypes (bits are reused). */
28#define STRFMT_T_HEX 0x0010 /* STRFMT_UINT */
29#define STRFMT_T_OCT 0x0020 /* STRFMT_UINT */
30#define STRFMT_T_FP_A 0x0000 /* STRFMT_NUM */
31#define STRFMT_T_FP_E 0x0010 /* STRFMT_NUM */
32#define STRFMT_T_FP_F 0x0020 /* STRFMT_NUM */
33#define STRFMT_T_FP_G 0x0030 /* STRFMT_NUM */
34#define STRFMT_T_QUOTED 0x0010 /* STRFMT_STR */
35
36/* Format flags. */
37#define STRFMT_F_LEFT 0x0100
38#define STRFMT_F_PLUS 0x0200
39#define STRFMT_F_ZERO 0x0400
40#define STRFMT_F_SPACE 0x0800
41#define STRFMT_F_ALT 0x1000
42#define STRFMT_F_UPPER 0x2000
43
44/* Format indicator fields. */
45#define STRFMT_SH_WIDTH 16
46#define STRFMT_SH_PREC 24
47
48#define STRFMT_TYPE(sf) ((FormatType)((sf) & 15))
49#define STRFMT_WIDTH(sf) (((sf) >> STRFMT_SH_WIDTH) & 255u)
50#define STRFMT_PREC(sf) ((((sf) >> STRFMT_SH_PREC) & 255u) - 1u)
51#define STRFMT_FP(sf) (((sf) >> 4) & 3)
52
53/* Formats for conversion characters. */
54#define STRFMT_A (STRFMT_NUM|STRFMT_T_FP_A)
55#define STRFMT_C (STRFMT_CHAR)
56#define STRFMT_D (STRFMT_INT)
57#define STRFMT_E (STRFMT_NUM|STRFMT_T_FP_E)
58#define STRFMT_F (STRFMT_NUM|STRFMT_T_FP_F)
59#define STRFMT_G (STRFMT_NUM|STRFMT_T_FP_G)
60#define STRFMT_I STRFMT_D
61#define STRFMT_O (STRFMT_UINT|STRFMT_T_OCT)
62#define STRFMT_P (STRFMT_PTR)
63#define STRFMT_Q (STRFMT_STR|STRFMT_T_QUOTED)
64#define STRFMT_S (STRFMT_STR)
65#define STRFMT_U (STRFMT_UINT)
66#define STRFMT_X (STRFMT_UINT|STRFMT_T_HEX)
67#define STRFMT_G14 (STRFMT_G | ((14+1) << STRFMT_SH_PREC))
68
69/* Maximum buffer sizes for conversions. */
70#define STRFMT_MAXBUF_XINT (1+22) /* '0' prefix + uint64_t in octal. */
71#define STRFMT_MAXBUF_INT (1+10) /* Sign + int32_t in decimal. */
72#define STRFMT_MAXBUF_NUM 32 /* Must correspond with STRFMT_G14. */
73#define STRFMT_MAXBUF_PTR (2+2*sizeof(ptrdiff_t)) /* "0x" + hex ptr. */
74
75/* Format parser. */
76LJ_FUNC SFormat LJ_FASTCALL lj_strfmt_parse(FormatState *fs);
77
78static LJ_AINLINE void lj_strfmt_init(FormatState *fs, const char *p, MSize len)
79{
80 fs->p = (const uint8_t *)p;
81 fs->e = (const uint8_t *)p + len;
82 lua_assert(*fs->e == 0); /* Must be NUL-terminated (may have NULs inside). */
83}
84
85/* Raw conversions. */
86LJ_FUNC char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k);
87LJ_FUNC char * LJ_FASTCALL lj_strfmt_wptr(char *p, const void *v);
88LJ_FUNC char * LJ_FASTCALL lj_strfmt_wuleb128(char *p, uint32_t v);
89LJ_FUNC const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp);
90
91/* Unformatted conversions to buffer. */
92LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k);
93#if LJ_HASJIT
94LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o);
95#endif
96LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v);
97LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str);
98
99/* Formatted conversions to buffer. */
100LJ_FUNC SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k);
101LJ_FUNC SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n);
102LJ_FUNC SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n);
103LJ_FUNC SBuf *lj_strfmt_putfnum(SBuf *sb, SFormat, lua_Number n);
104LJ_FUNC SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat, int32_t c);
105LJ_FUNC SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat, GCstr *str);
106
107/* Conversions to strings. */
108LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_int(lua_State *L, int32_t k);
109LJ_FUNCA GCstr * LJ_FASTCALL lj_strfmt_num(lua_State *L, cTValue *o);
110LJ_FUNCA GCstr * LJ_FASTCALL lj_strfmt_number(lua_State *L, cTValue *o);
111#if LJ_HASJIT
112LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_char(lua_State *L, int c);
113#endif
114LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_obj(lua_State *L, cTValue *o);
115
116/* Internal string formatting. */
117LJ_FUNC const char *lj_strfmt_pushvf(lua_State *L, const char *fmt,
118 va_list argp);
119LJ_FUNC const char *lj_strfmt_pushf(lua_State *L, const char *fmt, ...)
120#ifdef __GNUC__
121 __attribute__ ((format (printf, 2, 3)))
122#endif
123 ;
124
125#endif
diff --git a/src/lj_strfmt_num.c b/src/lj_strfmt_num.c
new file mode 100644
index 00000000..36b11dc0
--- /dev/null
+++ b/src/lj_strfmt_num.c
@@ -0,0 +1,592 @@
1/*
2** String formatting for floating-point numbers.
3** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h
4** Contributed by Peter Cawley.
5*/
6
7#include <stdio.h>
8
9#define lj_strfmt_num_c
10#define LUA_CORE
11
12#include "lj_obj.h"
13#include "lj_buf.h"
14#include "lj_str.h"
15#include "lj_strfmt.h"
16
17/* -- Precomputed tables -------------------------------------------------- */
18
19/* Rescale factors to push the exponent of a number towards zero. */
20#define RESCALE_EXPONENTS(P, N) \
21 P(308), P(289), P(270), P(250), P(231), P(212), P(193), P(173), P(154), \
22 P(135), P(115), P(96), P(77), P(58), P(38), P(0), P(0), P(0), N(39), N(58), \
23 N(77), N(96), N(116), N(135), N(154), N(174), N(193), N(212), N(231), \
24 N(251), N(270), N(289)
25
26#define ONE_E_P(X) 1e+0 ## X
27#define ONE_E_N(X) 1e-0 ## X
28static const int16_t rescale_e[] = { RESCALE_EXPONENTS(-, +) };
29static const double rescale_n[] = { RESCALE_EXPONENTS(ONE_E_P, ONE_E_N) };
30#undef ONE_E_N
31#undef ONE_E_P
32
33/*
34** For p in range -70 through 57, this table encodes pairs (m, e) such that
35** 4*2^p <= (uint8_t)m*10^e, and is the smallest value for which this holds.
36*/
37static const int8_t four_ulp_m_e[] = {
38 34, -21, 68, -21, 14, -20, 28, -20, 55, -20, 2, -19, 3, -19, 5, -19, 9, -19,
39 -82, -18, 35, -18, 7, -17, -117, -17, 28, -17, 56, -17, 112, -16, -33, -16,
40 45, -16, 89, -16, -78, -15, 36, -15, 72, -15, -113, -14, 29, -14, 57, -14,
41 114, -13, -28, -13, 46, -13, 91, -12, -74, -12, 37, -12, 73, -12, 15, -11, 3,
42 -11, 59, -11, 2, -10, 3, -10, 5, -10, 1, -9, -69, -9, 38, -9, 75, -9, 15, -7,
43 3, -7, 6, -7, 12, -6, -17, -7, 48, -7, 96, -7, -65, -6, 39, -6, 77, -6, -103,
44 -5, 31, -5, 62, -5, 123, -4, -11, -4, 49, -4, 98, -4, -60, -3, 4, -2, 79, -3,
45 16, -2, 32, -2, 63, -2, 2, -1, 25, 0, 5, 1, 1, 2, 2, 2, 4, 2, 8, 2, 16, 2,
46 32, 2, 64, 2, -128, 2, 26, 2, 52, 2, 103, 3, -51, 3, 41, 4, 82, 4, -92, 4,
47 33, 4, 66, 4, -124, 5, 27, 5, 53, 5, 105, 6, 21, 6, 42, 6, 84, 6, 17, 7, 34,
48 7, 68, 7, 2, 8, 3, 8, 6, 8, 108, 9, -41, 9, 43, 10, 86, 9, -84, 10, 35, 10,
49 69, 10, -118, 11, 28, 11, 55, 12, 11, 13, 22, 13, 44, 13, 88, 13, -80, 13,
50 36, 13, 71, 13, -115, 14, 29, 14, 57, 14, 113, 15, -30, 15, 46, 15, 91, 15,
51 19, 16, 37, 16, 73, 16, 2, 17, 3, 17, 6, 17
52};
53
54/* min(2^32-1, 10^e-1) for e in range 0 through 10 */
55static uint32_t ndigits_dec_threshold[] = {
56 0, 9U, 99U, 999U, 9999U, 99999U, 999999U,
57 9999999U, 99999999U, 999999999U, 0xffffffffU
58};
59
60/* -- Helper functions ---------------------------------------------------- */
61
62/* Compute the number of digits in the decimal representation of x. */
63static MSize ndigits_dec(uint32_t x)
64{
65 MSize t = ((lj_fls(x | 1) * 77) >> 8) + 1; /* 2^8/77 is roughly log2(10) */
66 return t + (x > ndigits_dec_threshold[t]);
67}
68
69#define WINT_R(x, sh, sc) \
70 { uint32_t d = (x*(((1<<sh)+sc-1)/sc))>>sh; x -= d*sc; *p++ = (char)('0'+d); }
71
72/* Write 9-digit unsigned integer to buffer. */
73static char *lj_strfmt_wuint9(char *p, uint32_t u)
74{
75 uint32_t v = u / 10000, w;
76 u -= v * 10000;
77 w = v / 10000;
78 v -= w * 10000;
79 *p++ = (char)('0'+w);
80 WINT_R(v, 23, 1000)
81 WINT_R(v, 12, 100)
82 WINT_R(v, 10, 10)
83 *p++ = (char)('0'+v);
84 WINT_R(u, 23, 1000)
85 WINT_R(u, 12, 100)
86 WINT_R(u, 10, 10)
87 *p++ = (char)('0'+u);
88 return p;
89}
90#undef WINT_R
91
92/* -- Extended precision arithmetic --------------------------------------- */
93
94/*
95** The "nd" format is a fixed-precision decimal representation for numbers. It
96** consists of up to 64 uint32_t values, with each uint32_t storing a value
97** in the range [0, 1e9). A number in "nd" format consists of three variables:
98**
99** uint32_t nd[64];
100** uint32_t ndlo;
101** uint32_t ndhi;
102**
103** The integral part of the number is stored in nd[0 ... ndhi], the value of
104** which is sum{i in [0, ndhi] | nd[i] * 10^(9*i)}. If the fractional part of
105** the number is zero, ndlo is zero. Otherwise, the fractional part is stored
106** in nd[ndlo ... 63], the value of which is taken to be
107** sum{i in [ndlo, 63] | nd[i] * 10^(9*(i-64))}.
108**
109** If the array part had 128 elements rather than 64, then every double would
110** have an exact representation in "nd" format. With 64 elements, all integral
111** doubles have an exact representation, and all non-integral doubles have
112** enough digits to make both %.99e and %.99f do the right thing.
113*/
114
115#if LJ_64
116#define ND_MUL2K_MAX_SHIFT 29
117#define ND_MUL2K_DIV1E9(val) ((uint32_t)((val) / 1000000000))
118#else
119#define ND_MUL2K_MAX_SHIFT 11
120#define ND_MUL2K_DIV1E9(val) ((uint32_t)((val) >> 9) / 1953125)
121#endif
122
123/* Multiply nd by 2^k and add carry_in (ndlo is assumed to be zero). */
124static uint32_t nd_mul2k(uint32_t* nd, uint32_t ndhi, uint32_t k,
125 uint32_t carry_in, SFormat sf)
126{
127 uint32_t i, ndlo = 0, start = 1;
128 /* Performance hacks. */
129 if (k > ND_MUL2K_MAX_SHIFT*2 && STRFMT_FP(sf) != STRFMT_FP(STRFMT_T_FP_F)) {
130 start = ndhi - (STRFMT_PREC(sf) + 17) / 8;
131 }
132 /* Real logic. */
133 while (k >= ND_MUL2K_MAX_SHIFT) {
134 for (i = ndlo; i <= ndhi; i++) {
135 uint64_t val = ((uint64_t)nd[i] << ND_MUL2K_MAX_SHIFT) | carry_in;
136 carry_in = ND_MUL2K_DIV1E9(val);
137 nd[i] = (uint32_t)val - carry_in * 1000000000;
138 }
139 if (carry_in) {
140 nd[++ndhi] = carry_in; carry_in = 0;
141 if (start++ == ndlo) ++ndlo;
142 }
143 k -= ND_MUL2K_MAX_SHIFT;
144 }
145 if (k) {
146 for (i = ndlo; i <= ndhi; i++) {
147 uint64_t val = ((uint64_t)nd[i] << k) | carry_in;
148 carry_in = ND_MUL2K_DIV1E9(val);
149 nd[i] = (uint32_t)val - carry_in * 1000000000;
150 }
151 if (carry_in) nd[++ndhi] = carry_in;
152 }
153 return ndhi;
154}
155
156/* Divide nd by 2^k (ndlo is assumed to be zero). */
157static uint32_t nd_div2k(uint32_t* nd, uint32_t ndhi, uint32_t k, SFormat sf)
158{
159 uint32_t ndlo = 0, stop1 = ~0, stop2 = ~0;
160 /* Performance hacks. */
161 if (!ndhi) {
162 if (!nd[0]) {
163 return 0;
164 } else {
165 uint32_t s = lj_ffs(nd[0]);
166 if (s >= k) { nd[0] >>= k; return 0; }
167 nd[0] >>= s; k -= s;
168 }
169 }
170 if (k > 18) {
171 if (STRFMT_FP(sf) == STRFMT_FP(STRFMT_T_FP_F)) {
172 stop1 = 63 - (int32_t)STRFMT_PREC(sf) / 9;
173 } else {
174 int32_t floorlog2 = ndhi * 29 + lj_fls(nd[ndhi]) - k;
175 int32_t floorlog10 = (int32_t)(floorlog2 * 0.30102999566398114);
176 stop1 = 62 + (floorlog10 - (int32_t)STRFMT_PREC(sf)) / 9;
177 stop2 = 61 + ndhi - (int32_t)STRFMT_PREC(sf) / 8;
178 }
179 }
180 /* Real logic. */
181 while (k >= 9) {
182 uint32_t i = ndhi, carry = 0;
183 for (;;) {
184 uint32_t val = nd[i];
185 nd[i] = (val >> 9) + carry;
186 carry = (val & 0x1ff) * 1953125;
187 if (i == ndlo) break;
188 i = (i - 1) & 0x3f;
189 }
190 if (ndlo != stop1 && ndlo != stop2) {
191 if (carry) { ndlo = (ndlo - 1) & 0x3f; nd[ndlo] = carry; }
192 if (!nd[ndhi]) { ndhi = (ndhi - 1) & 0x3f; stop2--; }
193 } else if (!nd[ndhi]) {
194 if (ndhi != ndlo) { ndhi = (ndhi - 1) & 0x3f; stop2--; }
195 else return ndlo;
196 }
197 k -= 9;
198 }
199 if (k) {
200 uint32_t mask = (1U << k) - 1, mul = 1000000000 >> k, i = ndhi, carry = 0;
201 for (;;) {
202 uint32_t val = nd[i];
203 nd[i] = (val >> k) + carry;
204 carry = (val & mask) * mul;
205 if (i == ndlo) break;
206 i = (i - 1) & 0x3f;
207 }
208 if (carry) { ndlo = (ndlo - 1) & 0x3f; nd[ndlo] = carry; }
209 }
210 return ndlo;
211}
212
213/* Add m*10^e to nd (assumes ndlo <= e/9 <= ndhi and 0 <= m <= 9). */
214static uint32_t nd_add_m10e(uint32_t* nd, uint32_t ndhi, uint8_t m, int32_t e)
215{
216 uint32_t i, carry;
217 if (e >= 0) {
218 i = (uint32_t)e/9;
219 carry = m * (ndigits_dec_threshold[e - (int32_t)i*9] + 1);
220 } else {
221 int32_t f = (e-8)/9;
222 i = (uint32_t)(64 + f);
223 carry = m * (ndigits_dec_threshold[e - f*9] + 1);
224 }
225 for (;;) {
226 uint32_t val = nd[i] + carry;
227 if (LJ_UNLIKELY(val >= 1000000000)) {
228 val -= 1000000000;
229 nd[i] = val;
230 if (LJ_UNLIKELY(i == ndhi)) {
231 ndhi = (ndhi + 1) & 0x3f;
232 nd[ndhi] = 1;
233 break;
234 }
235 carry = 1;
236 i = (i + 1) & 0x3f;
237 } else {
238 nd[i] = val;
239 break;
240 }
241 }
242 return ndhi;
243}
244
245/* Test whether two "nd" values are equal in their most significant digits. */
246static int nd_similar(uint32_t* nd, uint32_t ndhi, uint32_t* ref, MSize hilen,
247 MSize prec)
248{
249 char nd9[9], ref9[9];
250 if (hilen <= prec) {
251 if (LJ_UNLIKELY(nd[ndhi] != *ref)) return 0;
252 prec -= hilen; ref--; ndhi = (ndhi - 1) & 0x3f;
253 if (prec >= 9) {
254 if (LJ_UNLIKELY(nd[ndhi] != *ref)) return 0;
255 prec -= 9; ref--; ndhi = (ndhi - 1) & 0x3f;
256 }
257 } else {
258 prec -= hilen - 9;
259 }
260 lua_assert(prec < 9);
261 lj_strfmt_wuint9(nd9, nd[ndhi]);
262 lj_strfmt_wuint9(ref9, *ref);
263 return !memcmp(nd9, ref9, prec) && (nd9[prec] < '5') == (ref9[prec] < '5');
264}
265
266/* -- Formatted conversions to buffer ------------------------------------- */
267
268/* Write formatted floating-point number to either sb or p. */
269static char *lj_strfmt_wfnum(SBuf *sb, SFormat sf, lua_Number n, char *p)
270{
271 MSize width = STRFMT_WIDTH(sf), prec = STRFMT_PREC(sf), len;
272 TValue t;
273 t.n = n;
274 if (LJ_UNLIKELY((t.u32.hi << 1) >= 0xffe00000)) {
275 /* Handle non-finite values uniformly for %a, %e, %f, %g. */
276 int prefix = 0, ch = (sf & STRFMT_F_UPPER) ? 0x202020 : 0;
277 if (((t.u32.hi & 0x000fffff) | t.u32.lo) != 0) {
278 ch ^= ('n' << 16) | ('a' << 8) | 'n';
279 if ((sf & STRFMT_F_SPACE)) prefix = ' ';
280 } else {
281 ch ^= ('i' << 16) | ('n' << 8) | 'f';
282 if ((t.u32.hi & 0x80000000)) prefix = '-';
283 else if ((sf & STRFMT_F_PLUS)) prefix = '+';
284 else if ((sf & STRFMT_F_SPACE)) prefix = ' ';
285 }
286 len = 3 + (prefix != 0);
287 if (!p) p = lj_buf_more(sb, width > len ? width : len);
288 if (!(sf & STRFMT_F_LEFT)) while (width-- > len) *p++ = ' ';
289 if (prefix) *p++ = prefix;
290 *p++ = (char)(ch >> 16); *p++ = (char)(ch >> 8); *p++ = (char)ch;
291 } else if (STRFMT_FP(sf) == STRFMT_FP(STRFMT_T_FP_A)) {
292 /* %a */
293 const char *hexdig = (sf & STRFMT_F_UPPER) ? "0123456789ABCDEFPX"
294 : "0123456789abcdefpx";
295 int32_t e = (t.u32.hi >> 20) & 0x7ff;
296 char prefix = 0, eprefix = '+';
297 if (t.u32.hi & 0x80000000) prefix = '-';
298 else if ((sf & STRFMT_F_PLUS)) prefix = '+';
299 else if ((sf & STRFMT_F_SPACE)) prefix = ' ';
300 t.u32.hi &= 0xfffff;
301 if (e) {
302 t.u32.hi |= 0x100000;
303 e -= 1023;
304 } else if (t.u32.lo | t.u32.hi) {
305 /* Non-zero denormal - normalise it. */
306 uint32_t shift = t.u32.hi ? 20-lj_fls(t.u32.hi) : 52-lj_fls(t.u32.lo);
307 e = -1022 - shift;
308 t.u64 <<= shift;
309 }
310 /* abs(n) == t.u64 * 2^(e - 52) */
311 /* If n != 0, bit 52 of t.u64 is set, and is the highest set bit. */
312 if ((int32_t)prec < 0) {
313 /* Default precision: use smallest precision giving exact result. */
314 prec = t.u32.lo ? 13-lj_ffs(t.u32.lo)/4 : 5-lj_ffs(t.u32.hi|0x100000)/4;
315 } else if (prec < 13) {
316 /* Precision is sufficiently low as to maybe require rounding. */
317 t.u64 += (((uint64_t)1) << (51 - prec*4));
318 }
319 if (e < 0) {
320 eprefix = '-';
321 e = -e;
322 }
323 len = 5 + ndigits_dec((uint32_t)e) + prec + (prefix != 0)
324 + ((prec | (sf & STRFMT_F_ALT)) != 0);
325 if (!p) p = lj_buf_more(sb, width > len ? width : len);
326 if (!(sf & (STRFMT_F_LEFT | STRFMT_F_ZERO))) {
327 while (width-- > len) *p++ = ' ';
328 }
329 if (prefix) *p++ = prefix;
330 *p++ = '0';
331 *p++ = hexdig[17]; /* x or X */
332 if ((sf & (STRFMT_F_LEFT | STRFMT_F_ZERO)) == STRFMT_F_ZERO) {
333 while (width-- > len) *p++ = '0';
334 }
335 *p++ = '0' + (t.u32.hi >> 20); /* Usually '1', sometimes '0' or '2'. */
336 if ((prec | (sf & STRFMT_F_ALT))) {
337 /* Emit fractional part. */
338 char *q = p + 1 + prec;
339 *p = '.';
340 if (prec < 13) t.u64 >>= (52 - prec*4);
341 else while (prec > 13) p[prec--] = '0';
342 while (prec) { p[prec--] = hexdig[t.u64 & 15]; t.u64 >>= 4; }
343 p = q;
344 }
345 *p++ = hexdig[16]; /* p or P */
346 *p++ = eprefix; /* + or - */
347 p = lj_strfmt_wint(p, e);
348 } else {
349 /* %e or %f or %g - begin by converting n to "nd" format. */
350 uint32_t nd[64];
351 uint32_t ndhi = 0, ndlo, i;
352 int32_t e = (t.u32.hi >> 20) & 0x7ff, ndebias = 0;
353 char prefix = 0, *q;
354 if (t.u32.hi & 0x80000000) prefix = '-';
355 else if ((sf & STRFMT_F_PLUS)) prefix = '+';
356 else if ((sf & STRFMT_F_SPACE)) prefix = ' ';
357 prec += ((int32_t)prec >> 31) & 7; /* Default precision is 6. */
358 if (STRFMT_FP(sf) == STRFMT_FP(STRFMT_T_FP_G)) {
359 /* %g - decrement precision if non-zero (to make it like %e). */
360 prec--;
361 prec ^= (uint32_t)((int32_t)prec >> 31);
362 }
363 if ((sf & STRFMT_T_FP_E) && prec < 14 && n != 0) {
364 /* Precision is sufficiently low that rescaling will probably work. */
365 if ((ndebias = rescale_e[e >> 6])) {
366 t.n = n * rescale_n[e >> 6];
367 if (LJ_UNLIKELY(!e)) t.n *= 1e10, ndebias -= 10;
368 t.u64 -= 2; /* Convert 2ulp below (later we convert 2ulp above). */
369 nd[0] = 0x100000 | (t.u32.hi & 0xfffff);
370 e = ((t.u32.hi >> 20) & 0x7ff) - 1075 - (ND_MUL2K_MAX_SHIFT < 29);
371 goto load_t_lo; rescale_failed:
372 t.n = n;
373 e = (t.u32.hi >> 20) & 0x7ff;
374 ndebias = ndhi = 0;
375 }
376 }
377 nd[0] = t.u32.hi & 0xfffff;
378 if (e == 0) e++; else nd[0] |= 0x100000;
379 e -= 1043;
380 if (t.u32.lo) {
381 e -= 32 + (ND_MUL2K_MAX_SHIFT < 29); load_t_lo:
382#if ND_MUL2K_MAX_SHIFT >= 29
383 nd[0] = (nd[0] << 3) | (t.u32.lo >> 29);
384 ndhi = nd_mul2k(nd, ndhi, 29, t.u32.lo & 0x1fffffff, sf);
385#elif ND_MUL2K_MAX_SHIFT >= 11
386 ndhi = nd_mul2k(nd, ndhi, 11, t.u32.lo >> 21, sf);
387 ndhi = nd_mul2k(nd, ndhi, 11, (t.u32.lo >> 10) & 0x7ff, sf);
388 ndhi = nd_mul2k(nd, ndhi, 11, (t.u32.lo << 1) & 0x7ff, sf);
389#else
390#error "ND_MUL2K_MAX_SHIFT too small"
391#endif
392 }
393 if (e >= 0) {
394 ndhi = nd_mul2k(nd, ndhi, (uint32_t)e, 0, sf);
395 ndlo = 0;
396 } else {
397 ndlo = nd_div2k(nd, ndhi, (uint32_t)-e, sf);
398 if (ndhi && !nd[ndhi]) ndhi--;
399 }
400 /* abs(n) == nd * 10^ndebias (for slightly loose interpretation of ==) */
401 if ((sf & STRFMT_T_FP_E)) {
402 /* %e or %g - assume %e and start by calculating nd's exponent (nde). */
403 char eprefix = '+';
404 int32_t nde = -1;
405 MSize hilen;
406 if (ndlo && !nd[ndhi]) {
407 ndhi = 64; do {} while (!nd[--ndhi]);
408 nde -= 64 * 9;
409 }
410 hilen = ndigits_dec(nd[ndhi]);
411 nde += ndhi * 9 + hilen;
412 if (ndebias) {
413 /*
414 ** Rescaling was performed, but this introduced some error, and might
415 ** have pushed us across a rounding boundary. We check whether this
416 ** error affected the result by introducing even more error (2ulp in
417 ** either direction), and seeing whether a roundary boundary was
418 ** crossed. Having already converted the -2ulp case, we save off its
419 ** most significant digits, convert the +2ulp case, and compare them.
420 */
421 int32_t eidx = e + 70 + (ND_MUL2K_MAX_SHIFT < 29)
422 + (t.u32.lo >= 0xfffffffe && !(~t.u32.hi << 12));
423 const int8_t *m_e = four_ulp_m_e + eidx * 2;
424 lua_assert(0 <= eidx && eidx < 128);
425 nd[33] = nd[ndhi];
426 nd[32] = nd[(ndhi - 1) & 0x3f];
427 nd[31] = nd[(ndhi - 2) & 0x3f];
428 nd_add_m10e(nd, ndhi, (uint8_t)*m_e, m_e[1]);
429 if (LJ_UNLIKELY(!nd_similar(nd, ndhi, nd + 33, hilen, prec + 1))) {
430 goto rescale_failed;
431 }
432 }
433 if ((int32_t)(prec - nde) < (0x3f & -(int32_t)ndlo) * 9) {
434 /* Precision is sufficiently low as to maybe require rounding. */
435 ndhi = nd_add_m10e(nd, ndhi, 5, nde - prec - 1);
436 nde += (hilen != ndigits_dec(nd[ndhi]));
437 }
438 nde += ndebias;
439 if ((sf & STRFMT_T_FP_F)) {
440 /* %g */
441 if ((int32_t)prec >= nde && nde >= -4) {
442 if (nde < 0) ndhi = 0;
443 prec -= nde;
444 goto g_format_like_f;
445 } else if (!(sf & STRFMT_F_ALT) && prec && width > 5) {
446 /* Decrease precision in order to strip trailing zeroes. */
447 char tail[9];
448 uint32_t maxprec = hilen - 1 + ((ndhi - ndlo) & 0x3f) * 9;
449 if (prec >= maxprec) prec = maxprec;
450 else ndlo = (ndhi - (((int32_t)(prec - hilen) + 9) / 9)) & 0x3f;
451 i = prec - hilen - (((ndhi - ndlo) & 0x3f) * 9) + 10;
452 lj_strfmt_wuint9(tail, nd[ndlo]);
453 while (prec && tail[--i] == '0') {
454 prec--;
455 if (!i) {
456 if (ndlo == ndhi) { prec = 0; break; }
457 lj_strfmt_wuint9(tail, nd[++ndlo]);
458 i = 9;
459 }
460 }
461 }
462 }
463 if (nde < 0) {
464 /* Make nde non-negative. */
465 eprefix = '-';
466 nde = -nde;
467 }
468 len = 3 + prec + (prefix != 0) + ndigits_dec((uint32_t)nde) + (nde < 10)
469 + ((prec | (sf & STRFMT_F_ALT)) != 0);
470 if (!p) p = lj_buf_more(sb, (width > len ? width : len) + 5);
471 if (!(sf & (STRFMT_F_LEFT | STRFMT_F_ZERO))) {
472 while (width-- > len) *p++ = ' ';
473 }
474 if (prefix) *p++ = prefix;
475 if ((sf & (STRFMT_F_LEFT | STRFMT_F_ZERO)) == STRFMT_F_ZERO) {
476 while (width-- > len) *p++ = '0';
477 }
478 q = lj_strfmt_wint(p + 1, nd[ndhi]);
479 p[0] = p[1]; /* Put leading digit in the correct place. */
480 if ((prec | (sf & STRFMT_F_ALT))) {
481 /* Emit fractional part. */
482 p[1] = '.'; p += 2;
483 prec -= (MSize)(q - p); p = q; /* Account for digits already emitted. */
484 /* Then emit chunks of 9 digits (this may emit 8 digits too many). */
485 for (i = ndhi; (int32_t)prec > 0 && i != ndlo; prec -= 9) {
486 i = (i - 1) & 0x3f;
487 p = lj_strfmt_wuint9(p, nd[i]);
488 }
489 if ((sf & STRFMT_T_FP_F) && !(sf & STRFMT_F_ALT)) {
490 /* %g (and not %#g) - strip trailing zeroes. */
491 p += (int32_t)prec & ((int32_t)prec >> 31);
492 while (p[-1] == '0') p--;
493 if (p[-1] == '.') p--;
494 } else {
495 /* %e (or %#g) - emit trailing zeroes. */
496 while ((int32_t)prec > 0) { *p++ = '0'; prec--; }
497 p += (int32_t)prec;
498 }
499 } else {
500 p++;
501 }
502 *p++ = (sf & STRFMT_F_UPPER) ? 'E' : 'e';
503 *p++ = eprefix; /* + or - */
504 if (nde < 10) *p++ = '0'; /* Always at least two digits of exponent. */
505 p = lj_strfmt_wint(p, nde);
506 } else {
507 /* %f (or, shortly, %g in %f style) */
508 if (prec < (MSize)(0x3f & -(int32_t)ndlo) * 9) {
509 /* Precision is sufficiently low as to maybe require rounding. */
510 ndhi = nd_add_m10e(nd, ndhi, 5, 0 - prec - 1);
511 }
512 g_format_like_f:
513 if ((sf & STRFMT_T_FP_E) && !(sf & STRFMT_F_ALT) && prec && width) {
514 /* Decrease precision in order to strip trailing zeroes. */
515 if (ndlo) {
516 /* nd has a fractional part; we need to look at its digits. */
517 char tail[9];
518 uint32_t maxprec = (64 - ndlo) * 9;
519 if (prec >= maxprec) prec = maxprec;
520 else ndlo = 64 - (prec + 8) / 9;
521 i = prec - ((63 - ndlo) * 9);
522 lj_strfmt_wuint9(tail, nd[ndlo]);
523 while (prec && tail[--i] == '0') {
524 prec--;
525 if (!i) {
526 if (ndlo == 63) { prec = 0; break; }
527 lj_strfmt_wuint9(tail, nd[++ndlo]);
528 i = 9;
529 }
530 }
531 } else {
532 /* nd has no fractional part, so precision goes straight to zero. */
533 prec = 0;
534 }
535 }
536 len = ndhi * 9 + ndigits_dec(nd[ndhi]) + prec + (prefix != 0)
537 + ((prec | (sf & STRFMT_F_ALT)) != 0);
538 if (!p) p = lj_buf_more(sb, (width > len ? width : len) + 8);
539 if (!(sf & (STRFMT_F_LEFT | STRFMT_F_ZERO))) {
540 while (width-- > len) *p++ = ' ';
541 }
542 if (prefix) *p++ = prefix;
543 if ((sf & (STRFMT_F_LEFT | STRFMT_F_ZERO)) == STRFMT_F_ZERO) {
544 while (width-- > len) *p++ = '0';
545 }
546 /* Emit integer part. */
547 p = lj_strfmt_wint(p, nd[ndhi]);
548 i = ndhi;
549 while (i) p = lj_strfmt_wuint9(p, nd[--i]);
550 if ((prec | (sf & STRFMT_F_ALT))) {
551 /* Emit fractional part. */
552 *p++ = '.';
553 /* Emit chunks of 9 digits (this may emit 8 digits too many). */
554 while ((int32_t)prec > 0 && i != ndlo) {
555 i = (i - 1) & 0x3f;
556 p = lj_strfmt_wuint9(p, nd[i]);
557 prec -= 9;
558 }
559 if ((sf & STRFMT_T_FP_E) && !(sf & STRFMT_F_ALT)) {
560 /* %g (and not %#g) - strip trailing zeroes. */
561 p += (int32_t)prec & ((int32_t)prec >> 31);
562 while (p[-1] == '0') p--;
563 if (p[-1] == '.') p--;
564 } else {
565 /* %f (or %#g) - emit trailing zeroes. */
566 while ((int32_t)prec > 0) { *p++ = '0'; prec--; }
567 p += (int32_t)prec;
568 }
569 }
570 }
571 }
572 if ((sf & STRFMT_F_LEFT)) while (width-- > len) *p++ = ' ';
573 return p;
574}
575
576/* Add formatted floating-point number to buffer. */
577SBuf *lj_strfmt_putfnum(SBuf *sb, SFormat sf, lua_Number n)
578{
579 setsbufP(sb, lj_strfmt_wfnum(sb, sf, n, NULL));
580 return sb;
581}
582
583/* -- Conversions to strings ---------------------------------------------- */
584
585/* Convert number to string. */
586GCstr * LJ_FASTCALL lj_strfmt_num(lua_State *L, cTValue *o)
587{
588 char buf[STRFMT_MAXBUF_NUM];
589 MSize len = (MSize)(lj_strfmt_wfnum(NULL, STRFMT_G14, o->n, buf) - buf);
590 return lj_str_new(L, buf, len);
591}
592
diff --git a/src/lj_strscan.c b/src/lj_strscan.c
index 8614facd..948c84a7 100644
--- a/src/lj_strscan.c
+++ b/src/lj_strscan.c
@@ -140,7 +140,7 @@ static StrScanFmt strscan_hex(const uint8_t *p, TValue *o,
140 break; 140 break;
141 } 141 }
142 142
143 /* Reduce range then convert to double. */ 143 /* Reduce range, then convert to double. */
144 if ((x & U64x(c0000000,0000000))) { x = (x >> 2) | (x & 3); ex2 += 2; } 144 if ((x & U64x(c0000000,0000000))) { x = (x >> 2) | (x & 3); ex2 += 2; }
145 strscan_double(x, o, ex2, neg); 145 strscan_double(x, o, ex2, neg);
146 return fmt; 146 return fmt;
@@ -326,6 +326,49 @@ static StrScanFmt strscan_dec(const uint8_t *p, TValue *o,
326 return fmt; 326 return fmt;
327} 327}
328 328
329/* Parse binary number. */
330static StrScanFmt strscan_bin(const uint8_t *p, TValue *o,
331 StrScanFmt fmt, uint32_t opt,
332 int32_t ex2, int32_t neg, uint32_t dig)
333{
334 uint64_t x = 0;
335 uint32_t i;
336
337 if (ex2 || dig > 64) return STRSCAN_ERROR;
338
339 /* Scan binary digits. */
340 for (i = dig; i; i--, p++) {
341 if ((*p & ~1) != '0') return STRSCAN_ERROR;
342 x = (x << 1) | (*p & 1);
343 }
344
345 /* Format-specific handling. */
346 switch (fmt) {
347 case STRSCAN_INT:
348 if (!(opt & STRSCAN_OPT_TONUM) && x < 0x80000000u+neg) {
349 o->i = neg ? -(int32_t)x : (int32_t)x;
350 return STRSCAN_INT; /* Fast path for 32 bit integers. */
351 }
352 if (!(opt & STRSCAN_OPT_C)) { fmt = STRSCAN_NUM; break; }
353 /* fallthrough */
354 case STRSCAN_U32:
355 if (dig > 32) return STRSCAN_ERROR;
356 o->i = neg ? -(int32_t)x : (int32_t)x;
357 return STRSCAN_U32;
358 case STRSCAN_I64:
359 case STRSCAN_U64:
360 o->u64 = neg ? (uint64_t)-(int64_t)x : x;
361 return fmt;
362 default:
363 break;
364 }
365
366 /* Reduce range, then convert to double. */
367 if ((x & U64x(c0000000,0000000))) { x = (x >> 2) | (x & 3); ex2 += 2; }
368 strscan_double(x, o, ex2, neg);
369 return fmt;
370}
371
329/* Scan string containing a number. Returns format. Returns value in o. */ 372/* Scan string containing a number. Returns format. Returns value in o. */
330StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt) 373StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
331{ 374{
@@ -364,8 +407,12 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
364 407
365 /* Determine base and skip leading zeros. */ 408 /* Determine base and skip leading zeros. */
366 if (LJ_UNLIKELY(*p <= '0')) { 409 if (LJ_UNLIKELY(*p <= '0')) {
367 if (*p == '0' && casecmp(p[1], 'x')) 410 if (*p == '0') {
368 base = 16, cmask = LJ_CHAR_XDIGIT, p += 2; 411 if (casecmp(p[1], 'x'))
412 base = 16, cmask = LJ_CHAR_XDIGIT, p += 2;
413 else if (casecmp(p[1], 'b'))
414 base = 2, cmask = LJ_CHAR_DIGIT, p += 2;
415 }
369 for ( ; ; p++) { 416 for ( ; ; p++) {
370 if (*p == '0') { 417 if (*p == '0') {
371 hasdig = 1; 418 hasdig = 1;
@@ -403,7 +450,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
403 } 450 }
404 451
405 /* Parse exponent. */ 452 /* Parse exponent. */
406 if (casecmp(*p, (uint32_t)(base == 16 ? 'p' : 'e'))) { 453 if (base >= 10 && casecmp(*p, (uint32_t)(base == 16 ? 'p' : 'e'))) {
407 uint32_t xx; 454 uint32_t xx;
408 int negx = 0; 455 int negx = 0;
409 fmt = STRSCAN_NUM; p++; 456 fmt = STRSCAN_NUM; p++;
@@ -459,6 +506,8 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
459 return strscan_oct(sp, o, fmt, neg, dig); 506 return strscan_oct(sp, o, fmt, neg, dig);
460 if (base == 16) 507 if (base == 16)
461 fmt = strscan_hex(sp, o, fmt, opt, ex, neg, dig); 508 fmt = strscan_hex(sp, o, fmt, opt, ex, neg, dig);
509 else if (base == 2)
510 fmt = strscan_bin(sp, o, fmt, opt, ex, neg, dig);
462 else 511 else
463 fmt = strscan_dec(sp, o, fmt, opt, ex, neg, dig); 512 fmt = strscan_dec(sp, o, fmt, opt, ex, neg, dig);
464 513
diff --git a/src/lj_tab.c b/src/lj_tab.c
index a45ddaca..dcd24d31 100644
--- a/src/lj_tab.c
+++ b/src/lj_tab.c
@@ -28,8 +28,12 @@ static LJ_AINLINE Node *hashmask(const GCtab *t, uint32_t hash)
28 28
29#define hashlohi(t, lo, hi) hashmask((t), hashrot((lo), (hi))) 29#define hashlohi(t, lo, hi) hashmask((t), hashrot((lo), (hi)))
30#define hashnum(t, o) hashlohi((t), (o)->u32.lo, ((o)->u32.hi << 1)) 30#define hashnum(t, o) hashlohi((t), (o)->u32.lo, ((o)->u32.hi << 1))
31#define hashptr(t, p) hashlohi((t), u32ptr(p), u32ptr(p) + HASH_BIAS) 31#if LJ_GC64
32#define hashgcref(t, r) \
33 hashlohi((t), (uint32_t)gcrefu(r), (uint32_t)(gcrefu(r) >> 32))
34#else
32#define hashgcref(t, r) hashlohi((t), gcrefu(r), gcrefu(r) + HASH_BIAS) 35#define hashgcref(t, r) hashlohi((t), gcrefu(r), gcrefu(r) + HASH_BIAS)
36#endif
33 37
34/* Hash an arbitrary key and return its anchor position in the hash table. */ 38/* Hash an arbitrary key and return its anchor position in the hash table. */
35static Node *hashkey(const GCtab *t, cTValue *key) 39static Node *hashkey(const GCtab *t, cTValue *key)
@@ -58,8 +62,8 @@ static LJ_AINLINE void newhpart(lua_State *L, GCtab *t, uint32_t hbits)
58 lj_err_msg(L, LJ_ERR_TABOV); 62 lj_err_msg(L, LJ_ERR_TABOV);
59 hsize = 1u << hbits; 63 hsize = 1u << hbits;
60 node = lj_mem_newvec(L, hsize, Node); 64 node = lj_mem_newvec(L, hsize, Node);
61 setmref(node->freetop, &node[hsize]);
62 setmref(t->node, node); 65 setmref(t->node, node);
66 setfreetop(t, node, &node[hsize]);
63 t->hmask = hsize-1; 67 t->hmask = hsize-1;
64} 68}
65 69
@@ -98,6 +102,7 @@ static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits)
98 GCtab *t; 102 GCtab *t;
99 /* First try to colocate the array part. */ 103 /* First try to colocate the array part. */
100 if (LJ_MAX_COLOSIZE != 0 && asize > 0 && asize <= LJ_MAX_COLOSIZE) { 104 if (LJ_MAX_COLOSIZE != 0 && asize > 0 && asize <= LJ_MAX_COLOSIZE) {
105 Node *nilnode;
101 lua_assert((sizeof(GCtab) & 7) == 0); 106 lua_assert((sizeof(GCtab) & 7) == 0);
102 t = (GCtab *)lj_mem_newgco(L, sizetabcolo(asize)); 107 t = (GCtab *)lj_mem_newgco(L, sizetabcolo(asize));
103 t->gct = ~LJ_TTAB; 108 t->gct = ~LJ_TTAB;
@@ -107,8 +112,13 @@ static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits)
107 setgcrefnull(t->metatable); 112 setgcrefnull(t->metatable);
108 t->asize = asize; 113 t->asize = asize;
109 t->hmask = 0; 114 t->hmask = 0;
110 setmref(t->node, &G(L)->nilnode); 115 nilnode = &G(L)->nilnode;
116 setmref(t->node, nilnode);
117#if LJ_GC64
118 setmref(t->freetop, nilnode);
119#endif
111 } else { /* Otherwise separately allocate the array part. */ 120 } else { /* Otherwise separately allocate the array part. */
121 Node *nilnode;
112 t = lj_mem_newobj(L, GCtab); 122 t = lj_mem_newobj(L, GCtab);
113 t->gct = ~LJ_TTAB; 123 t->gct = ~LJ_TTAB;
114 t->nomm = (uint8_t)~0; 124 t->nomm = (uint8_t)~0;
@@ -117,7 +127,11 @@ static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits)
117 setgcrefnull(t->metatable); 127 setgcrefnull(t->metatable);
118 t->asize = 0; /* In case the array allocation fails. */ 128 t->asize = 0; /* In case the array allocation fails. */
119 t->hmask = 0; 129 t->hmask = 0;
120 setmref(t->node, &G(L)->nilnode); 130 nilnode = &G(L)->nilnode;
131 setmref(t->node, nilnode);
132#if LJ_GC64
133 setmref(t->freetop, nilnode);
134#endif
121 if (asize > 0) { 135 if (asize > 0) {
122 if (asize > LJ_MAX_ASIZE) 136 if (asize > LJ_MAX_ASIZE)
123 lj_err_msg(L, LJ_ERR_TABOV); 137 lj_err_msg(L, LJ_ERR_TABOV);
@@ -149,6 +163,12 @@ GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits)
149 return t; 163 return t;
150} 164}
151 165
166/* The API of this function conforms to lua_createtable(). */
167GCtab *lj_tab_new_ah(lua_State *L, int32_t a, int32_t h)
168{
169 return lj_tab_new(L, (uint32_t)(a > 0 ? a+1 : 0), hsize2hbits(h));
170}
171
152#if LJ_HASJIT 172#if LJ_HASJIT
153GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize) 173GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize)
154{ 174{
@@ -185,7 +205,7 @@ GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt)
185 Node *node = noderef(t->node); 205 Node *node = noderef(t->node);
186 Node *knode = noderef(kt->node); 206 Node *knode = noderef(kt->node);
187 ptrdiff_t d = (char *)node - (char *)knode; 207 ptrdiff_t d = (char *)node - (char *)knode;
188 setmref(node->freetop, (Node *)((char *)noderef(knode->freetop) + d)); 208 setfreetop(t, node, (Node *)((char *)getfreetop(kt, knode) + d));
189 for (i = 0; i <= hmask; i++) { 209 for (i = 0; i <= hmask; i++) {
190 Node *kn = &knode[i]; 210 Node *kn = &knode[i];
191 Node *n = &node[i]; 211 Node *n = &node[i];
@@ -198,6 +218,17 @@ GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt)
198 return t; 218 return t;
199} 219}
200 220
221/* Clear a table. */
222void LJ_FASTCALL lj_tab_clear(GCtab *t)
223{
224 clearapart(t);
225 if (t->hmask > 0) {
226 Node *node = noderef(t->node);
227 setfreetop(t, node, &node[t->hmask+1]);
228 clearhpart(t);
229 }
230}
231
201/* Free a table. */ 232/* Free a table. */
202void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t) 233void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t)
203{ 234{
@@ -214,7 +245,7 @@ void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t)
214/* -- Table resizing ------------------------------------------------------ */ 245/* -- Table resizing ------------------------------------------------------ */
215 246
216/* Resize a table to fit the new array/hash part sizes. */ 247/* Resize a table to fit the new array/hash part sizes. */
217static void resizetab(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits) 248void lj_tab_resize(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits)
218{ 249{
219 Node *oldnode = noderef(t->node); 250 Node *oldnode = noderef(t->node);
220 uint32_t oldasize = t->asize; 251 uint32_t oldasize = t->asize;
@@ -247,6 +278,9 @@ static void resizetab(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits)
247 } else { 278 } else {
248 global_State *g = G(L); 279 global_State *g = G(L);
249 setmref(t->node, &g->nilnode); 280 setmref(t->node, &g->nilnode);
281#if LJ_GC64
282 setmref(t->freetop, &g->nilnode);
283#endif
250 t->hmask = 0; 284 t->hmask = 0;
251 } 285 }
252 if (asize < oldasize) { /* Array part shrinks? */ 286 if (asize < oldasize) { /* Array part shrinks? */
@@ -348,7 +382,7 @@ static void rehashtab(lua_State *L, GCtab *t, cTValue *ek)
348 asize += countint(ek, bins); 382 asize += countint(ek, bins);
349 na = bestasize(bins, &asize); 383 na = bestasize(bins, &asize);
350 total -= na; 384 total -= na;
351 resizetab(L, t, asize, hsize2hbits(total)); 385 lj_tab_resize(L, t, asize, hsize2hbits(total));
352} 386}
353 387
354#if LJ_HASFFI 388#if LJ_HASFFI
@@ -360,7 +394,7 @@ void lj_tab_rehash(lua_State *L, GCtab *t)
360 394
361void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize) 395void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize)
362{ 396{
363 resizetab(L, t, nasize+1, t->hmask > 0 ? lj_fls(t->hmask)+1 : 0); 397 lj_tab_resize(L, t, nasize+1, t->hmask > 0 ? lj_fls(t->hmask)+1 : 0);
364} 398}
365 399
366/* -- Table getters ------------------------------------------------------- */ 400/* -- Table getters ------------------------------------------------------- */
@@ -428,7 +462,7 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key)
428 Node *n = hashkey(t, key); 462 Node *n = hashkey(t, key);
429 if (!tvisnil(&n->val) || t->hmask == 0) { 463 if (!tvisnil(&n->val) || t->hmask == 0) {
430 Node *nodebase = noderef(t->node); 464 Node *nodebase = noderef(t->node);
431 Node *collide, *freenode = noderef(nodebase->freetop); 465 Node *collide, *freenode = getfreetop(t, nodebase);
432 lua_assert(freenode >= nodebase && freenode <= nodebase+t->hmask+1); 466 lua_assert(freenode >= nodebase && freenode <= nodebase+t->hmask+1);
433 do { 467 do {
434 if (freenode == nodebase) { /* No free node found? */ 468 if (freenode == nodebase) { /* No free node found? */
@@ -436,7 +470,7 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key)
436 return lj_tab_set(L, t, key); /* Retry key insertion. */ 470 return lj_tab_set(L, t, key); /* Retry key insertion. */
437 } 471 }
438 } while (!tvisnil(&(--freenode)->key)); 472 } while (!tvisnil(&(--freenode)->key));
439 setmref(nodebase->freetop, freenode); 473 setfreetop(t, nodebase, freenode);
440 lua_assert(freenode != &G(L)->nilnode); 474 lua_assert(freenode != &G(L)->nilnode);
441 collide = hashkey(t, &n->key); 475 collide = hashkey(t, &n->key);
442 if (collide != n) { /* Colliding node not the main node? */ 476 if (collide != n) { /* Colliding node not the main node? */
diff --git a/src/lj_tab.h b/src/lj_tab.h
index dc3c8dc1..597c94b2 100644
--- a/src/lj_tab.h
+++ b/src/lj_tab.h
@@ -34,14 +34,17 @@ static LJ_AINLINE uint32_t hashrot(uint32_t lo, uint32_t hi)
34#define hsize2hbits(s) ((s) ? ((s)==1 ? 1 : 1+lj_fls((uint32_t)((s)-1))) : 0) 34#define hsize2hbits(s) ((s) ? ((s)==1 ? 1 : 1+lj_fls((uint32_t)((s)-1))) : 0)
35 35
36LJ_FUNCA GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits); 36LJ_FUNCA GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits);
37LJ_FUNC GCtab *lj_tab_new_ah(lua_State *L, int32_t a, int32_t h);
37#if LJ_HASJIT 38#if LJ_HASJIT
38LJ_FUNC GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize); 39LJ_FUNC GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize);
39#endif 40#endif
40LJ_FUNCA GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt); 41LJ_FUNCA GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt);
42LJ_FUNC void LJ_FASTCALL lj_tab_clear(GCtab *t);
41LJ_FUNC void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t); 43LJ_FUNC void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t);
42#if LJ_HASFFI 44#if LJ_HASFFI
43LJ_FUNC void lj_tab_rehash(lua_State *L, GCtab *t); 45LJ_FUNC void lj_tab_rehash(lua_State *L, GCtab *t);
44#endif 46#endif
47LJ_FUNC void lj_tab_resize(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits);
45LJ_FUNCA void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize); 48LJ_FUNCA void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize);
46 49
47/* Caveat: all getters except lj_tab_get() can return NULL! */ 50/* Caveat: all getters except lj_tab_get() can return NULL! */
@@ -53,7 +56,7 @@ LJ_FUNCA cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key);
53/* Caveat: all setters require a write barrier for the stored value. */ 56/* Caveat: all setters require a write barrier for the stored value. */
54 57
55LJ_FUNCA TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key); 58LJ_FUNCA TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key);
56LJ_FUNC TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key); 59LJ_FUNCA TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key);
57LJ_FUNC TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key); 60LJ_FUNC TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key);
58LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key); 61LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key);
59 62
diff --git a/src/lj_target.h b/src/lj_target.h
index a8182596..47c960bc 100644
--- a/src/lj_target.h
+++ b/src/lj_target.h
@@ -55,7 +55,7 @@ typedef uint32_t RegSP;
55/* Bitset for registers. 32 registers suffice for most architectures. 55/* Bitset for registers. 32 registers suffice for most architectures.
56** Note that one set holds bits for both GPRs and FPRs. 56** Note that one set holds bits for both GPRs and FPRs.
57*/ 57*/
58#if LJ_TARGET_PPC || LJ_TARGET_MIPS 58#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64
59typedef uint64_t RegSet; 59typedef uint64_t RegSet;
60#else 60#else
61typedef uint32_t RegSet; 61typedef uint32_t RegSet;
@@ -69,7 +69,7 @@ typedef uint32_t RegSet;
69#define rset_set(rs, r) (rs |= RID2RSET(r)) 69#define rset_set(rs, r) (rs |= RID2RSET(r))
70#define rset_clear(rs, r) (rs &= ~RID2RSET(r)) 70#define rset_clear(rs, r) (rs &= ~RID2RSET(r))
71#define rset_exclude(rs, r) (rs & ~RID2RSET(r)) 71#define rset_exclude(rs, r) (rs & ~RID2RSET(r))
72#if LJ_TARGET_PPC || LJ_TARGET_MIPS 72#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64
73#define rset_picktop(rs) ((Reg)(__builtin_clzll(rs)^63)) 73#define rset_picktop(rs) ((Reg)(__builtin_clzll(rs)^63))
74#define rset_pickbot(rs) ((Reg)__builtin_ctzll(rs)) 74#define rset_pickbot(rs) ((Reg)__builtin_ctzll(rs))
75#else 75#else
@@ -138,6 +138,8 @@ typedef uint32_t RegCost;
138#include "lj_target_x86.h" 138#include "lj_target_x86.h"
139#elif LJ_TARGET_ARM 139#elif LJ_TARGET_ARM
140#include "lj_target_arm.h" 140#include "lj_target_arm.h"
141#elif LJ_TARGET_ARM64
142#include "lj_target_arm64.h"
141#elif LJ_TARGET_PPC 143#elif LJ_TARGET_PPC
142#include "lj_target_ppc.h" 144#include "lj_target_ppc.h"
143#elif LJ_TARGET_MIPS 145#elif LJ_TARGET_MIPS
diff --git a/src/lj_target_arm.h b/src/lj_target_arm.h
index 4d292dc9..48e50fe9 100644
--- a/src/lj_target_arm.h
+++ b/src/lj_target_arm.h
@@ -243,10 +243,6 @@ typedef enum ARMIns {
243 ARMI_VCVT_S32_F64 = 0xeebd0bc0, 243 ARMI_VCVT_S32_F64 = 0xeebd0bc0,
244 ARMI_VCVT_U32_F32 = 0xeebc0ac0, 244 ARMI_VCVT_U32_F32 = 0xeebc0ac0,
245 ARMI_VCVT_U32_F64 = 0xeebc0bc0, 245 ARMI_VCVT_U32_F64 = 0xeebc0bc0,
246 ARMI_VCVTR_S32_F32 = 0xeebd0a40,
247 ARMI_VCVTR_S32_F64 = 0xeebd0b40,
248 ARMI_VCVTR_U32_F32 = 0xeebc0a40,
249 ARMI_VCVTR_U32_F64 = 0xeebc0b40,
250 ARMI_VCVT_F32_S32 = 0xeeb80ac0, 246 ARMI_VCVT_F32_S32 = 0xeeb80ac0,
251 ARMI_VCVT_F64_S32 = 0xeeb80bc0, 247 ARMI_VCVT_F64_S32 = 0xeeb80bc0,
252 ARMI_VCVT_F32_U32 = 0xeeb80a40, 248 ARMI_VCVT_F32_U32 = 0xeeb80a40,
diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h
new file mode 100644
index 00000000..d729e178
--- /dev/null
+++ b/src/lj_target_arm64.h
@@ -0,0 +1,332 @@
1/*
2** Definitions for ARM64 CPUs.
3** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#ifndef _LJ_TARGET_ARM64_H
7#define _LJ_TARGET_ARM64_H
8
9/* -- Registers IDs ------------------------------------------------------- */
10
11#define GPRDEF(_) \
12 _(X0) _(X1) _(X2) _(X3) _(X4) _(X5) _(X6) _(X7) \
13 _(X8) _(X9) _(X10) _(X11) _(X12) _(X13) _(X14) _(X15) \
14 _(X16) _(X17) _(X18) _(X19) _(X20) _(X21) _(X22) _(X23) \
15 _(X24) _(X25) _(X26) _(X27) _(X28) _(FP) _(LR) _(SP)
16#define FPRDEF(_) \
17 _(D0) _(D1) _(D2) _(D3) _(D4) _(D5) _(D6) _(D7) \
18 _(D8) _(D9) _(D10) _(D11) _(D12) _(D13) _(D14) _(D15) \
19 _(D16) _(D17) _(D18) _(D19) _(D20) _(D21) _(D22) _(D23) \
20 _(D24) _(D25) _(D26) _(D27) _(D28) _(D29) _(D30) _(D31)
21#define VRIDDEF(_)
22
23#define RIDENUM(name) RID_##name,
24
25enum {
26 GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */
27 FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
28 RID_MAX,
29 RID_TMP = RID_LR,
30 RID_ZERO = RID_SP,
31
32 /* Calling conventions. */
33 RID_RET = RID_X0,
34 RID_FPRET = RID_D0,
35
36 /* These definitions must match with the *.dasc file(s): */
37 RID_BASE = RID_X19, /* Interpreter BASE. */
38 RID_LPC = RID_X21, /* Interpreter PC. */
39 RID_GL = RID_X22, /* Interpreter GL. */
40 RID_LREG = RID_X23, /* Interpreter L. */
41
42 /* Register ranges [min, max) and number of registers. */
43 RID_MIN_GPR = RID_X0,
44 RID_MAX_GPR = RID_SP+1,
45 RID_MIN_FPR = RID_MAX_GPR,
46 RID_MAX_FPR = RID_D31+1,
47 RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
48 RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR
49};
50
51#define RID_NUM_KREF RID_NUM_GPR
52#define RID_MIN_KREF RID_X0
53
54/* -- Register sets ------------------------------------------------------- */
55
56/* Make use of all registers, except for x18, fp, lr and sp. */
57#define RSET_FIXED \
58 (RID2RSET(RID_X18)|RID2RSET(RID_FP)|RID2RSET(RID_LR)|RID2RSET(RID_SP)|\
59 RID2RSET(RID_GL))
60#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED)
61#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)
62#define RSET_ALL (RSET_GPR|RSET_FPR)
63#define RSET_INIT RSET_ALL
64
65/* lr is an implicit scratch register. */
66#define RSET_SCRATCH_GPR (RSET_RANGE(RID_X0, RID_X17+1))
67#define RSET_SCRATCH_FPR \
68 (RSET_RANGE(RID_D0, RID_D7+1)|RSET_RANGE(RID_D16, RID_D31+1))
69#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR)
70#define REGARG_FIRSTGPR RID_X0
71#define REGARG_LASTGPR RID_X7
72#define REGARG_NUMGPR 8
73#define REGARG_FIRSTFPR RID_D0
74#define REGARG_LASTFPR RID_D7
75#define REGARG_NUMFPR 8
76
77/* -- Spill slots --------------------------------------------------------- */
78
79/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs.
80**
81** SPS_FIXED: Available fixed spill slots in interpreter frame.
82** This definition must match with the vm_arm64.dasc file.
83** Pre-allocate some slots to avoid sp adjust in every root trace.
84**
85** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots.
86*/
87#define SPS_FIXED 4
88#define SPS_FIRST 2
89
90#define SPOFS_TMP 0
91
92#define sps_scale(slot) (4 * (int32_t)(slot))
93#define sps_align(slot) (((slot) - SPS_FIXED + 3) & ~3)
94
95/* -- Exit state ---------------------------------------------------------- */
96
97/* This definition must match with the *.dasc file(s). */
98typedef struct {
99 lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
100 intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
101 int32_t spill[256]; /* Spill slots. */
102} ExitState;
103
104/* Highest exit + 1 indicates stack check. */
105#define EXITSTATE_CHECKEXIT 1
106
107/* Return the address of a per-trace exit stub. */
108static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno)
109{
110 while (*p == (LJ_LE ? 0xd503201f : 0x1f2003d5)) p++; /* Skip A64I_NOP. */
111 return p + 3 + exitno;
112}
113/* Avoid dependence on lj_jit.h if only including lj_target.h. */
114#define exitstub_trace_addr(T, exitno) \
115 exitstub_trace_addr_((MCode *)((char *)(T)->mcode + (T)->szmcode), (exitno))
116
117/* -- Instructions -------------------------------------------------------- */
118
119/* ARM64 instructions are always little-endian. Swap for ARM64BE. */
120#if LJ_BE
121#define A64I_LE(x) (lj_bswap(x))
122#else
123#define A64I_LE(x) (x)
124#endif
125
126/* Instruction fields. */
127#define A64F_D(r) (r)
128#define A64F_N(r) ((r) << 5)
129#define A64F_A(r) ((r) << 10)
130#define A64F_M(r) ((r) << 16)
131#define A64F_IMMS(x) ((x) << 10)
132#define A64F_IMMR(x) ((x) << 16)
133#define A64F_U16(x) ((x) << 5)
134#define A64F_U12(x) ((x) << 10)
135#define A64F_S26(x) (((uint32_t)(x) & 0x03ffffffu))
136#define A64F_S19(x) (((uint32_t)(x) & 0x7ffffu) << 5)
137#define A64F_S14(x) (((uint32_t)(x) & 0x3fffu) << 5)
138#define A64F_S9(x) ((x) << 12)
139#define A64F_BIT(x) ((x) << 19)
140#define A64F_SH(sh, x) (((sh) << 22) | ((x) << 10))
141#define A64F_EX(ex) (A64I_EX | ((ex) << 13))
142#define A64F_EXSH(ex,x) (A64I_EX | ((ex) << 13) | ((x) << 10))
143#define A64F_FP8(x) ((x) << 13)
144#define A64F_CC(cc) ((cc) << 12)
145#define A64F_LSL16(x) (((x) / 16) << 21)
146#define A64F_BSH(sh) ((sh) << 10)
147
148/* Check for valid field range. */
149#define A64F_S_OK(x, b) ((((x) + (1 << (b-1))) >> (b)) == 0)
150
151typedef enum A64Ins {
152 A64I_S = 0x20000000,
153 A64I_X = 0x80000000,
154 A64I_EX = 0x00200000,
155 A64I_ON = 0x00200000,
156 A64I_K12 = 0x1a000000,
157 A64I_K13 = 0x18000000,
158 A64I_LS_U = 0x01000000,
159 A64I_LS_S = 0x00800000,
160 A64I_LS_R = 0x01200800,
161 A64I_LS_SH = 0x00001000,
162 A64I_LS_UXTWx = 0x00004000,
163 A64I_LS_SXTWx = 0x0000c000,
164 A64I_LS_SXTXx = 0x0000e000,
165 A64I_LS_LSLx = 0x00006000,
166
167 A64I_ADDw = 0x0b000000,
168 A64I_ADDx = 0x8b000000,
169 A64I_ADDSw = 0x2b000000,
170 A64I_ADDSx = 0xab000000,
171 A64I_NEGw = 0x4b0003e0,
172 A64I_NEGx = 0xcb0003e0,
173 A64I_SUBw = 0x4b000000,
174 A64I_SUBx = 0xcb000000,
175 A64I_SUBSw = 0x6b000000,
176 A64I_SUBSx = 0xeb000000,
177
178 A64I_MULw = 0x1b007c00,
179 A64I_MULx = 0x9b007c00,
180 A64I_SMULL = 0x9b207c00,
181
182 A64I_ANDw = 0x0a000000,
183 A64I_ANDx = 0x8a000000,
184 A64I_ANDSw = 0x6a000000,
185 A64I_ANDSx = 0xea000000,
186 A64I_EORw = 0x4a000000,
187 A64I_EORx = 0xca000000,
188 A64I_ORRw = 0x2a000000,
189 A64I_ORRx = 0xaa000000,
190 A64I_TSTw = 0x6a00001f,
191 A64I_TSTx = 0xea00001f,
192
193 A64I_CMPw = 0x6b00001f,
194 A64I_CMPx = 0xeb00001f,
195 A64I_CMNw = 0x2b00001f,
196 A64I_CMNx = 0xab00001f,
197 A64I_CCMPw = 0x7a400000,
198 A64I_CCMPx = 0xfa400000,
199 A64I_CSELw = 0x1a800000,
200 A64I_CSELx = 0x9a800000,
201
202 A64I_ASRw = 0x13007c00,
203 A64I_ASRx = 0x9340fc00,
204 A64I_LSLx = 0xd3400000,
205 A64I_LSRx = 0xd340fc00,
206 A64I_SHRw = 0x1ac02000,
207 A64I_SHRx = 0x9ac02000, /* lsl/lsr/asr/ror x0, x0, x0 */
208 A64I_REVw = 0x5ac00800,
209 A64I_REVx = 0xdac00c00,
210
211 A64I_EXTRw = 0x13800000,
212 A64I_EXTRx = 0x93c00000,
213 A64I_SBFMw = 0x13000000,
214 A64I_SBFMx = 0x93400000,
215 A64I_SXTBw = 0x13001c00,
216 A64I_SXTHw = 0x13003c00,
217 A64I_SXTW = 0x93407c00,
218 A64I_UBFMw = 0x53000000,
219 A64I_UBFMx = 0xd3400000,
220 A64I_UXTBw = 0x53001c00,
221 A64I_UXTHw = 0x53003c00,
222
223 A64I_MOVw = 0x2a0003e0,
224 A64I_MOVx = 0xaa0003e0,
225 A64I_MVNw = 0x2a2003e0,
226 A64I_MVNx = 0xaa2003e0,
227 A64I_MOVKw = 0x72800000,
228 A64I_MOVKx = 0xf2800000,
229 A64I_MOVZw = 0x52800000,
230 A64I_MOVZx = 0xd2800000,
231 A64I_MOVNw = 0x12800000,
232 A64I_MOVNx = 0x92800000,
233
234 A64I_LDRB = 0x39400000,
235 A64I_LDRH = 0x79400000,
236 A64I_LDRw = 0xb9400000,
237 A64I_LDRx = 0xf9400000,
238 A64I_LDRLw = 0x18000000,
239 A64I_LDRLx = 0x58000000,
240 A64I_STRB = 0x39000000,
241 A64I_STRH = 0x79000000,
242 A64I_STRw = 0xb9000000,
243 A64I_STRx = 0xf9000000,
244 A64I_STPw = 0x29000000,
245 A64I_STPx = 0xa9000000,
246 A64I_LDPw = 0x29400000,
247 A64I_LDPx = 0xa9400000,
248
249 A64I_B = 0x14000000,
250 A64I_BCC = 0x54000000,
251 A64I_BL = 0x94000000,
252 A64I_BR = 0xd61f0000,
253 A64I_BLR = 0xd63f0000,
254 A64I_TBZ = 0x36000000,
255 A64I_TBNZ = 0x37000000,
256 A64I_CBZ = 0x34000000,
257 A64I_CBNZ = 0x35000000,
258
259 A64I_NOP = 0xd503201f,
260
261 /* FP */
262 A64I_FADDd = 0x1e602800,
263 A64I_FSUBd = 0x1e603800,
264 A64I_FMADDd = 0x1f400000,
265 A64I_FMSUBd = 0x1f408000,
266 A64I_FNMADDd = 0x1f600000,
267 A64I_FNMSUBd = 0x1f608000,
268 A64I_FMULd = 0x1e600800,
269 A64I_FDIVd = 0x1e601800,
270 A64I_FNEGd = 0x1e614000,
271 A64I_FABS = 0x1e60c000,
272 A64I_FSQRTd = 0x1e61c000,
273 A64I_LDRs = 0xbd400000,
274 A64I_LDRd = 0xfd400000,
275 A64I_STRs = 0xbd000000,
276 A64I_STRd = 0xfd000000,
277 A64I_LDPs = 0x2d400000,
278 A64I_LDPd = 0x6d400000,
279 A64I_STPs = 0x2d000000,
280 A64I_STPd = 0x6d000000,
281 A64I_FCMPd = 0x1e602000,
282 A64I_FCMPZd = 0x1e602008,
283 A64I_FCSELd = 0x1e600c00,
284 A64I_FRINTMd = 0x1e654000,
285 A64I_FRINTPd = 0x1e64c000,
286 A64I_FRINTZd = 0x1e65c000,
287
288 A64I_FCVT_F32_F64 = 0x1e624000,
289 A64I_FCVT_F64_F32 = 0x1e22c000,
290 A64I_FCVT_F32_S32 = 0x1e220000,
291 A64I_FCVT_F64_S32 = 0x1e620000,
292 A64I_FCVT_F32_U32 = 0x1e230000,
293 A64I_FCVT_F64_U32 = 0x1e630000,
294 A64I_FCVT_F32_S64 = 0x9e220000,
295 A64I_FCVT_F64_S64 = 0x9e620000,
296 A64I_FCVT_F32_U64 = 0x9e230000,
297 A64I_FCVT_F64_U64 = 0x9e630000,
298 A64I_FCVT_S32_F64 = 0x1e780000,
299 A64I_FCVT_S32_F32 = 0x1e380000,
300 A64I_FCVT_U32_F64 = 0x1e790000,
301 A64I_FCVT_U32_F32 = 0x1e390000,
302 A64I_FCVT_S64_F64 = 0x9e780000,
303 A64I_FCVT_S64_F32 = 0x9e380000,
304 A64I_FCVT_U64_F64 = 0x9e790000,
305 A64I_FCVT_U64_F32 = 0x9e390000,
306
307 A64I_FMOV_S = 0x1e204000,
308 A64I_FMOV_D = 0x1e604000,
309 A64I_FMOV_R_S = 0x1e260000,
310 A64I_FMOV_S_R = 0x1e270000,
311 A64I_FMOV_R_D = 0x9e660000,
312 A64I_FMOV_D_R = 0x9e670000,
313 A64I_FMOV_DI = 0x1e601000,
314} A64Ins;
315
316typedef enum A64Shift {
317 A64SH_LSL, A64SH_LSR, A64SH_ASR, A64SH_ROR
318} A64Shift;
319
320typedef enum A64Extend {
321 A64EX_UXTB, A64EX_UXTH, A64EX_UXTW, A64EX_UXTX,
322 A64EX_SXTB, A64EX_SXTH, A64EX_SXTW, A64EX_SXTX,
323} A64Extend;
324
325/* ARM condition codes. */
326typedef enum A64CC {
327 CC_EQ, CC_NE, CC_CS, CC_CC, CC_MI, CC_PL, CC_VS, CC_VC,
328 CC_HI, CC_LS, CC_GE, CC_LT, CC_GT, CC_LE, CC_AL,
329 CC_HS = CC_CS, CC_LO = CC_CC
330} A64CC;
331
332#endif
diff --git a/src/lj_target_mips.h b/src/lj_target_mips.h
index 4bbdc743..6e436967 100644
--- a/src/lj_target_mips.h
+++ b/src/lj_target_mips.h
@@ -13,11 +13,15 @@
13 _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(R15) \ 13 _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(R15) \
14 _(R16) _(R17) _(R18) _(R19) _(R20) _(R21) _(R22) _(R23) \ 14 _(R16) _(R17) _(R18) _(R19) _(R20) _(R21) _(R22) _(R23) \
15 _(R24) _(R25) _(SYS1) _(SYS2) _(R28) _(SP) _(R30) _(RA) 15 _(R24) _(R25) _(SYS1) _(SYS2) _(R28) _(SP) _(R30) _(RA)
16#if LJ_SOFTFP
17#define FPRDEF(_)
18#else
16#define FPRDEF(_) \ 19#define FPRDEF(_) \
17 _(F0) _(F1) _(F2) _(F3) _(F4) _(F5) _(F6) _(F7) \ 20 _(F0) _(F1) _(F2) _(F3) _(F4) _(F5) _(F6) _(F7) \
18 _(F8) _(F9) _(F10) _(F11) _(F12) _(F13) _(F14) _(F15) \ 21 _(F8) _(F9) _(F10) _(F11) _(F12) _(F13) _(F14) _(F15) \
19 _(F16) _(F17) _(F18) _(F19) _(F20) _(F21) _(F22) _(F23) \ 22 _(F16) _(F17) _(F18) _(F19) _(F20) _(F21) _(F22) _(F23) \
20 _(F24) _(F25) _(F26) _(F27) _(F28) _(F29) _(F30) _(F31) 23 _(F24) _(F25) _(F26) _(F27) _(F28) _(F29) _(F30) _(F31)
24#endif
21#define VRIDDEF(_) 25#define VRIDDEF(_)
22 26
23#define RIDENUM(name) RID_##name, 27#define RIDENUM(name) RID_##name,
@@ -39,7 +43,11 @@ enum {
39 RID_RETHI = RID_R2, 43 RID_RETHI = RID_R2,
40 RID_RETLO = RID_R3, 44 RID_RETLO = RID_R3,
41#endif 45#endif
46#if LJ_SOFTFP
47 RID_FPRET = RID_R2,
48#else
42 RID_FPRET = RID_F0, 49 RID_FPRET = RID_F0,
50#endif
43 RID_CFUNCADDR = RID_R25, 51 RID_CFUNCADDR = RID_R25,
44 52
45 /* These definitions must match with the *.dasc file(s): */ 53 /* These definitions must match with the *.dasc file(s): */
@@ -52,8 +60,12 @@ enum {
52 /* Register ranges [min, max) and number of registers. */ 60 /* Register ranges [min, max) and number of registers. */
53 RID_MIN_GPR = RID_R0, 61 RID_MIN_GPR = RID_R0,
54 RID_MAX_GPR = RID_RA+1, 62 RID_MAX_GPR = RID_RA+1,
55 RID_MIN_FPR = RID_F0, 63 RID_MIN_FPR = RID_MAX_GPR,
64#if LJ_SOFTFP
65 RID_MAX_FPR = RID_MIN_FPR,
66#else
56 RID_MAX_FPR = RID_F31+1, 67 RID_MAX_FPR = RID_F31+1,
68#endif
57 RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, 69 RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
58 RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR /* Only even regs are used. */ 70 RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR /* Only even regs are used. */
59}; 71};
@@ -68,28 +80,60 @@ enum {
68 (RID2RSET(RID_ZERO)|RID2RSET(RID_TMP)|RID2RSET(RID_SP)|\ 80 (RID2RSET(RID_ZERO)|RID2RSET(RID_TMP)|RID2RSET(RID_SP)|\
69 RID2RSET(RID_SYS1)|RID2RSET(RID_SYS2)|RID2RSET(RID_JGL)|RID2RSET(RID_GP)) 81 RID2RSET(RID_SYS1)|RID2RSET(RID_SYS2)|RID2RSET(RID_JGL)|RID2RSET(RID_GP))
70#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED) 82#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED)
83#if LJ_SOFTFP
84#define RSET_FPR 0
85#else
86#if LJ_32
71#define RSET_FPR \ 87#define RSET_FPR \
72 (RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\ 88 (RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\
73 RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\ 89 RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\
74 RID2RSET(RID_F16)|RID2RSET(RID_F18)|RID2RSET(RID_F20)|RID2RSET(RID_F22)|\ 90 RID2RSET(RID_F16)|RID2RSET(RID_F18)|RID2RSET(RID_F20)|RID2RSET(RID_F22)|\
75 RID2RSET(RID_F24)|RID2RSET(RID_F26)|RID2RSET(RID_F28)|RID2RSET(RID_F30)) 91 RID2RSET(RID_F24)|RID2RSET(RID_F26)|RID2RSET(RID_F28)|RID2RSET(RID_F30))
76#define RSET_ALL (RSET_GPR|RSET_FPR) 92#else
77#define RSET_INIT RSET_ALL 93#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)
94#endif
95#endif
96#define RSET_ALL (RSET_GPR|RSET_FPR)
97#define RSET_INIT RSET_ALL
78 98
79#define RSET_SCRATCH_GPR \ 99#define RSET_SCRATCH_GPR \
80 (RSET_RANGE(RID_R1, RID_R15+1)|\ 100 (RSET_RANGE(RID_R1, RID_R15+1)|\
81 RID2RSET(RID_R24)|RID2RSET(RID_R25)) 101 RID2RSET(RID_R24)|RID2RSET(RID_R25))
102#if LJ_SOFTFP
103#define RSET_SCRATCH_FPR 0
104#else
105#if LJ_32
82#define RSET_SCRATCH_FPR \ 106#define RSET_SCRATCH_FPR \
83 (RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\ 107 (RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\
84 RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\ 108 RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\
85 RID2RSET(RID_F16)|RID2RSET(RID_F18)) 109 RID2RSET(RID_F16)|RID2RSET(RID_F18))
110#else
111#define RSET_SCRATCH_FPR RSET_RANGE(RID_F0, RID_F24)
112#endif
113#endif
86#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) 114#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR)
87#define REGARG_FIRSTGPR RID_R4 115#define REGARG_FIRSTGPR RID_R4
116#if LJ_32
88#define REGARG_LASTGPR RID_R7 117#define REGARG_LASTGPR RID_R7
89#define REGARG_NUMGPR 4 118#define REGARG_NUMGPR 4
119#else
120#define REGARG_LASTGPR RID_R11
121#define REGARG_NUMGPR 8
122#endif
123#if LJ_ABI_SOFTFP
124#define REGARG_FIRSTFPR 0
125#define REGARG_LASTFPR 0
126#define REGARG_NUMFPR 0
127#else
90#define REGARG_FIRSTFPR RID_F12 128#define REGARG_FIRSTFPR RID_F12
129#if LJ_32
91#define REGARG_LASTFPR RID_F14 130#define REGARG_LASTFPR RID_F14
92#define REGARG_NUMFPR 2 131#define REGARG_NUMFPR 2
132#else
133#define REGARG_LASTFPR RID_F19
134#define REGARG_NUMFPR 8
135#endif
136#endif
93 137
94/* -- Spill slots --------------------------------------------------------- */ 138/* -- Spill slots --------------------------------------------------------- */
95 139
@@ -100,7 +144,11 @@ enum {
100** 144**
101** SPS_FIRST: First spill slot for general use. 145** SPS_FIRST: First spill slot for general use.
102*/ 146*/
147#if LJ_32
103#define SPS_FIXED 5 148#define SPS_FIXED 5
149#else
150#define SPS_FIXED 4
151#endif
104#define SPS_FIRST 4 152#define SPS_FIRST 4
105 153
106#define SPOFS_TMP 0 154#define SPOFS_TMP 0
@@ -112,8 +160,10 @@ enum {
112 160
113/* This definition must match with the *.dasc file(s). */ 161/* This definition must match with the *.dasc file(s). */
114typedef struct { 162typedef struct {
163#if !LJ_SOFTFP
115 lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ 164 lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
116 int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ 165#endif
166 intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
117 int32_t spill[256]; /* Spill slots. */ 167 int32_t spill[256]; /* Spill slots. */
118} ExitState; 168} ExitState;
119 169
@@ -142,52 +192,85 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p)
142#define MIPSF_F(r) ((r) << 6) 192#define MIPSF_F(r) ((r) << 6)
143#define MIPSF_A(n) ((n) << 6) 193#define MIPSF_A(n) ((n) << 6)
144#define MIPSF_M(n) ((n) << 11) 194#define MIPSF_M(n) ((n) << 11)
195#define MIPSF_L(n) ((n) << 6)
145 196
146typedef enum MIPSIns { 197typedef enum MIPSIns {
198 MIPSI_D = 0x38,
199 MIPSI_DV = 0x10,
200 MIPSI_D32 = 0x3c,
147 /* Integer instructions. */ 201 /* Integer instructions. */
148 MIPSI_MOVE = 0x00000021, 202 MIPSI_MOVE = 0x00000025,
149 MIPSI_NOP = 0x00000000, 203 MIPSI_NOP = 0x00000000,
150 204
151 MIPSI_LI = 0x24000000, 205 MIPSI_LI = 0x24000000,
152 MIPSI_LU = 0x34000000, 206 MIPSI_LU = 0x34000000,
153 MIPSI_LUI = 0x3c000000, 207 MIPSI_LUI = 0x3c000000,
154 208
155 MIPSI_ADDIU = 0x24000000, 209 MIPSI_AND = 0x00000024,
156 MIPSI_ANDI = 0x30000000, 210 MIPSI_ANDI = 0x30000000,
211 MIPSI_OR = 0x00000025,
157 MIPSI_ORI = 0x34000000, 212 MIPSI_ORI = 0x34000000,
213 MIPSI_XOR = 0x00000026,
158 MIPSI_XORI = 0x38000000, 214 MIPSI_XORI = 0x38000000,
215 MIPSI_NOR = 0x00000027,
216
217 MIPSI_SLT = 0x0000002a,
218 MIPSI_SLTU = 0x0000002b,
159 MIPSI_SLTI = 0x28000000, 219 MIPSI_SLTI = 0x28000000,
160 MIPSI_SLTIU = 0x2c000000, 220 MIPSI_SLTIU = 0x2c000000,
161 221
162 MIPSI_ADDU = 0x00000021, 222 MIPSI_ADDU = 0x00000021,
223 MIPSI_ADDIU = 0x24000000,
224 MIPSI_SUB = 0x00000022,
163 MIPSI_SUBU = 0x00000023, 225 MIPSI_SUBU = 0x00000023,
226
227#if !LJ_TARGET_MIPSR6
164 MIPSI_MUL = 0x70000002, 228 MIPSI_MUL = 0x70000002,
165 MIPSI_AND = 0x00000024, 229 MIPSI_DIV = 0x0000001a,
166 MIPSI_OR = 0x00000025, 230 MIPSI_DIVU = 0x0000001b,
167 MIPSI_XOR = 0x00000026, 231
168 MIPSI_NOR = 0x00000027,
169 MIPSI_SLT = 0x0000002a,
170 MIPSI_SLTU = 0x0000002b,
171 MIPSI_MOVZ = 0x0000000a, 232 MIPSI_MOVZ = 0x0000000a,
172 MIPSI_MOVN = 0x0000000b, 233 MIPSI_MOVN = 0x0000000b,
234 MIPSI_MFHI = 0x00000010,
235 MIPSI_MFLO = 0x00000012,
236 MIPSI_MULT = 0x00000018,
237#else
238 MIPSI_MUL = 0x00000098,
239 MIPSI_MUH = 0x000000d8,
240 MIPSI_DIV = 0x0000009a,
241 MIPSI_DIVU = 0x0000009b,
242
243 MIPSI_SELEQZ = 0x00000035,
244 MIPSI_SELNEZ = 0x00000037,
245#endif
173 246
174 MIPSI_SLL = 0x00000000, 247 MIPSI_SLL = 0x00000000,
175 MIPSI_SRL = 0x00000002, 248 MIPSI_SRL = 0x00000002,
176 MIPSI_SRA = 0x00000003, 249 MIPSI_SRA = 0x00000003,
177 MIPSI_ROTR = 0x00200002, /* MIPS32R2 */ 250 MIPSI_ROTR = 0x00200002, /* MIPSXXR2 */
251 MIPSI_DROTR = 0x0020003a,
252 MIPSI_DROTR32 = 0x0020003e,
178 MIPSI_SLLV = 0x00000004, 253 MIPSI_SLLV = 0x00000004,
179 MIPSI_SRLV = 0x00000006, 254 MIPSI_SRLV = 0x00000006,
180 MIPSI_SRAV = 0x00000007, 255 MIPSI_SRAV = 0x00000007,
181 MIPSI_ROTRV = 0x00000046, /* MIPS32R2 */ 256 MIPSI_ROTRV = 0x00000046, /* MIPSXXR2 */
257 MIPSI_DROTRV = 0x00000056,
182 258
183 MIPSI_SEB = 0x7c000420, /* MIPS32R2 */ 259 MIPSI_SEB = 0x7c000420, /* MIPSXXR2 */
184 MIPSI_SEH = 0x7c000620, /* MIPS32R2 */ 260 MIPSI_SEH = 0x7c000620, /* MIPSXXR2 */
185 MIPSI_WSBH = 0x7c0000a0, /* MIPS32R2 */ 261 MIPSI_WSBH = 0x7c0000a0, /* MIPSXXR2 */
262 MIPSI_DSBH = 0x7c0000a4,
186 263
187 MIPSI_B = 0x10000000, 264 MIPSI_B = 0x10000000,
188 MIPSI_J = 0x08000000, 265 MIPSI_J = 0x08000000,
189 MIPSI_JAL = 0x0c000000, 266 MIPSI_JAL = 0x0c000000,
267#if !LJ_TARGET_MIPSR6
268 MIPSI_JALX = 0x74000000,
190 MIPSI_JR = 0x00000008, 269 MIPSI_JR = 0x00000008,
270#else
271 MIPSI_JR = 0x00000009,
272 MIPSI_BALC = 0xe8000000,
273#endif
191 MIPSI_JALR = 0x0000f809, 274 MIPSI_JALR = 0x0000f809,
192 275
193 MIPSI_BEQ = 0x10000000, 276 MIPSI_BEQ = 0x10000000,
@@ -199,7 +282,9 @@ typedef enum MIPSIns {
199 282
200 /* Load/store instructions. */ 283 /* Load/store instructions. */
201 MIPSI_LW = 0x8c000000, 284 MIPSI_LW = 0x8c000000,
285 MIPSI_LD = 0xdc000000,
202 MIPSI_SW = 0xac000000, 286 MIPSI_SW = 0xac000000,
287 MIPSI_SD = 0xfc000000,
203 MIPSI_LB = 0x80000000, 288 MIPSI_LB = 0x80000000,
204 MIPSI_SB = 0xa0000000, 289 MIPSI_SB = 0xa0000000,
205 MIPSI_LH = 0x84000000, 290 MIPSI_LH = 0x84000000,
@@ -211,11 +296,69 @@ typedef enum MIPSIns {
211 MIPSI_LDC1 = 0xd4000000, 296 MIPSI_LDC1 = 0xd4000000,
212 MIPSI_SDC1 = 0xf4000000, 297 MIPSI_SDC1 = 0xf4000000,
213 298
299 /* MIPS64 instructions. */
300 MIPSI_DADD = 0x0000002c,
301 MIPSI_DADDU = 0x0000002d,
302 MIPSI_DADDIU = 0x64000000,
303 MIPSI_DSUB = 0x0000002e,
304 MIPSI_DSUBU = 0x0000002f,
305#if !LJ_TARGET_MIPSR6
306 MIPSI_DDIV = 0x0000001e,
307 MIPSI_DDIVU = 0x0000001f,
308 MIPSI_DMULT = 0x0000001c,
309 MIPSI_DMULTU = 0x0000001d,
310#else
311 MIPSI_DDIV = 0x0000009e,
312 MIPSI_DMOD = 0x000000de,
313 MIPSI_DDIVU = 0x0000009f,
314 MIPSI_DMODU = 0x000000df,
315 MIPSI_DMUL = 0x0000009c,
316 MIPSI_DMUH = 0x000000dc,
317#endif
318
319 MIPSI_DSLL = 0x00000038,
320 MIPSI_DSRL = 0x0000003a,
321 MIPSI_DSLLV = 0x00000014,
322 MIPSI_DSRLV = 0x00000016,
323 MIPSI_DSRA = 0x0000003b,
324 MIPSI_DSRAV = 0x00000017,
325 MIPSI_DSRA32 = 0x0000003f,
326 MIPSI_DSLL32 = 0x0000003c,
327 MIPSI_DSRL32 = 0x0000003e,
328 MIPSI_DSHD = 0x7c000164,
329
330 MIPSI_AADDU = LJ_32 ? MIPSI_ADDU : MIPSI_DADDU,
331 MIPSI_AADDIU = LJ_32 ? MIPSI_ADDIU : MIPSI_DADDIU,
332 MIPSI_ASUBU = LJ_32 ? MIPSI_SUBU : MIPSI_DSUBU,
333 MIPSI_AL = LJ_32 ? MIPSI_LW : MIPSI_LD,
334 MIPSI_AS = LJ_32 ? MIPSI_SW : MIPSI_SD,
335#if LJ_TARGET_MIPSR6
336 MIPSI_LSA = 0x00000005,
337 MIPSI_DLSA = 0x00000015,
338 MIPSI_ALSA = LJ_32 ? MIPSI_LSA : MIPSI_DLSA,
339#endif
340
341 /* Extract/insert instructions. */
342 MIPSI_DEXTM = 0x7c000001,
343 MIPSI_DEXTU = 0x7c000002,
344 MIPSI_DEXT = 0x7c000003,
345 MIPSI_DINSM = 0x7c000005,
346 MIPSI_DINSU = 0x7c000006,
347 MIPSI_DINS = 0x7c000007,
348
349 MIPSI_FLOOR_D = 0x4620000b,
350
214 /* FP instructions. */ 351 /* FP instructions. */
215 MIPSI_MOV_S = 0x46000006, 352 MIPSI_MOV_S = 0x46000006,
216 MIPSI_MOV_D = 0x46200006, 353 MIPSI_MOV_D = 0x46200006,
354#if !LJ_TARGET_MIPSR6
217 MIPSI_MOVT_D = 0x46210011, 355 MIPSI_MOVT_D = 0x46210011,
218 MIPSI_MOVF_D = 0x46200011, 356 MIPSI_MOVF_D = 0x46200011,
357#else
358 MIPSI_MIN_D = 0x4620001C,
359 MIPSI_MAX_D = 0x4620001E,
360 MIPSI_SEL_D = 0x46200010,
361#endif
219 362
220 MIPSI_ABS_D = 0x46200005, 363 MIPSI_ABS_D = 0x46200005,
221 MIPSI_NEG_D = 0x46200007, 364 MIPSI_NEG_D = 0x46200007,
@@ -235,23 +378,37 @@ typedef enum MIPSIns {
235 MIPSI_CVT_W_D = 0x46200024, 378 MIPSI_CVT_W_D = 0x46200024,
236 MIPSI_CVT_S_W = 0x46800020, 379 MIPSI_CVT_S_W = 0x46800020,
237 MIPSI_CVT_D_W = 0x46800021, 380 MIPSI_CVT_D_W = 0x46800021,
381 MIPSI_CVT_S_L = 0x46a00020,
382 MIPSI_CVT_D_L = 0x46a00021,
238 383
239 MIPSI_TRUNC_W_S = 0x4600000d, 384 MIPSI_TRUNC_W_S = 0x4600000d,
240 MIPSI_TRUNC_W_D = 0x4620000d, 385 MIPSI_TRUNC_W_D = 0x4620000d,
386 MIPSI_TRUNC_L_S = 0x46000009,
387 MIPSI_TRUNC_L_D = 0x46200009,
241 MIPSI_FLOOR_W_S = 0x4600000f, 388 MIPSI_FLOOR_W_S = 0x4600000f,
242 MIPSI_FLOOR_W_D = 0x4620000f, 389 MIPSI_FLOOR_W_D = 0x4620000f,
243 390
244 MIPSI_MFC1 = 0x44000000, 391 MIPSI_MFC1 = 0x44000000,
245 MIPSI_MTC1 = 0x44800000, 392 MIPSI_MTC1 = 0x44800000,
393 MIPSI_DMTC1 = 0x44a00000,
394 MIPSI_DMFC1 = 0x44200000,
246 395
396#if !LJ_TARGET_MIPSR6
247 MIPSI_BC1F = 0x45000000, 397 MIPSI_BC1F = 0x45000000,
248 MIPSI_BC1T = 0x45010000, 398 MIPSI_BC1T = 0x45010000,
249
250 MIPSI_C_EQ_D = 0x46200032, 399 MIPSI_C_EQ_D = 0x46200032,
400 MIPSI_C_OLT_S = 0x46000034,
251 MIPSI_C_OLT_D = 0x46200034, 401 MIPSI_C_OLT_D = 0x46200034,
252 MIPSI_C_ULT_D = 0x46200035, 402 MIPSI_C_ULT_D = 0x46200035,
253 MIPSI_C_OLE_D = 0x46200036, 403 MIPSI_C_OLE_D = 0x46200036,
254 MIPSI_C_ULE_D = 0x46200037, 404 MIPSI_C_ULE_D = 0x46200037,
405#else
406 MIPSI_BC1EQZ = 0x45200000,
407 MIPSI_BC1NEZ = 0x45a00000,
408 MIPSI_CMP_EQ_D = 0x46a00002,
409 MIPSI_CMP_LT_S = 0x46800004,
410 MIPSI_CMP_LT_D = 0x46a00004,
411#endif
255 412
256} MIPSIns; 413} MIPSIns;
257 414
diff --git a/src/lj_target_ppc.h b/src/lj_target_ppc.h
index 580995d5..c7d4c229 100644
--- a/src/lj_target_ppc.h
+++ b/src/lj_target_ppc.h
@@ -104,7 +104,7 @@ enum {
104/* This definition must match with the *.dasc file(s). */ 104/* This definition must match with the *.dasc file(s). */
105typedef struct { 105typedef struct {
106 lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ 106 lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
107 int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ 107 intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
108 int32_t spill[256]; /* Spill slots. */ 108 int32_t spill[256]; /* Spill slots. */
109} ExitState; 109} ExitState;
110 110
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h
index 8a96cbf2..71c930fe 100644
--- a/src/lj_target_x86.h
+++ b/src/lj_target_x86.h
@@ -22,7 +22,7 @@
22 _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) 22 _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7)
23#endif 23#endif
24#define VRIDDEF(_) \ 24#define VRIDDEF(_) \
25 _(MRM) 25 _(MRM) _(RIP)
26 26
27#define RIDENUM(name) RID_##name, 27#define RIDENUM(name) RID_##name,
28 28
@@ -31,8 +31,10 @@ enum {
31 FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ 31 FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
32 RID_MAX, 32 RID_MAX,
33 RID_MRM = RID_MAX, /* Pseudo-id for ModRM operand. */ 33 RID_MRM = RID_MAX, /* Pseudo-id for ModRM operand. */
34 RID_RIP = RID_MAX+5, /* Pseudo-id for RIP (x64 only), rm bits = 5. */
34 35
35 /* Calling conventions. */ 36 /* Calling conventions. */
37 RID_SP = RID_ESP,
36 RID_RET = RID_EAX, 38 RID_RET = RID_EAX,
37#if LJ_64 39#if LJ_64
38 RID_FPRET = RID_XMM0, 40 RID_FPRET = RID_XMM0,
@@ -62,8 +64,10 @@ enum {
62 64
63/* -- Register sets ------------------------------------------------------- */ 65/* -- Register sets ------------------------------------------------------- */
64 66
65/* Make use of all registers, except the stack pointer. */ 67/* Make use of all registers, except the stack pointer (and maybe DISPATCH). */
66#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR)-RID2RSET(RID_ESP)) 68#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) \
69 - RID2RSET(RID_ESP) \
70 - LJ_GC64*RID2RSET(RID_DISPATCH))
67#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)) 71#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR))
68#define RSET_ALL (RSET_GPR|RSET_FPR) 72#define RSET_ALL (RSET_GPR|RSET_FPR)
69#define RSET_INIT RSET_ALL 73#define RSET_INIT RSET_ALL
@@ -131,7 +135,11 @@ enum {
131#define SPS_FIXED (4*2) 135#define SPS_FIXED (4*2)
132#define SPS_FIRST (4*2) /* Don't use callee register save area. */ 136#define SPS_FIRST (4*2) /* Don't use callee register save area. */
133#else 137#else
138#if LJ_GC64
139#define SPS_FIXED 2
140#else
134#define SPS_FIXED 4 141#define SPS_FIXED 4
142#endif
135#define SPS_FIRST 2 143#define SPS_FIRST 2
136#endif 144#endif
137#else 145#else
@@ -184,12 +192,18 @@ typedef struct {
184#define XO_f20f(o) ((uint32_t)(0x0ff2fc + (0x##o<<24))) 192#define XO_f20f(o) ((uint32_t)(0x0ff2fc + (0x##o<<24)))
185#define XO_f30f(o) ((uint32_t)(0x0ff3fc + (0x##o<<24))) 193#define XO_f30f(o) ((uint32_t)(0x0ff3fc + (0x##o<<24)))
186 194
195#define XV_660f38(o) ((uint32_t)(0x79e2c4 + (0x##o<<24)))
196#define XV_f20f38(o) ((uint32_t)(0x7be2c4 + (0x##o<<24)))
197#define XV_f20f3a(o) ((uint32_t)(0x7be3c4 + (0x##o<<24)))
198#define XV_f30f38(o) ((uint32_t)(0x7ae2c4 + (0x##o<<24)))
199
187/* This list of x86 opcodes is not intended to be complete. Opcodes are only 200/* This list of x86 opcodes is not intended to be complete. Opcodes are only
188** included when needed. Take a look at DynASM or jit.dis_x86 to see the 201** included when needed. Take a look at DynASM or jit.dis_x86 to see the
189** whole mess. 202** whole mess.
190*/ 203*/
191typedef enum { 204typedef enum {
192 /* Fixed length opcodes. XI_* prefix. */ 205 /* Fixed length opcodes. XI_* prefix. */
206 XI_O16 = 0x66,
193 XI_NOP = 0x90, 207 XI_NOP = 0x90,
194 XI_XCHGa = 0x90, 208 XI_XCHGa = 0x90,
195 XI_CALL = 0xe8, 209 XI_CALL = 0xe8,
@@ -207,6 +221,7 @@ typedef enum {
207 XI_PUSHi8 = 0x6a, 221 XI_PUSHi8 = 0x6a,
208 XI_TESTb = 0x84, 222 XI_TESTb = 0x84,
209 XI_TEST = 0x85, 223 XI_TEST = 0x85,
224 XI_INT3 = 0xcc,
210 XI_MOVmi = 0xc7, 225 XI_MOVmi = 0xc7,
211 XI_GROUP5 = 0xff, 226 XI_GROUP5 = 0xff,
212 227
@@ -226,7 +241,14 @@ typedef enum {
226 XI_FSCALE = 0xfdd9, 241 XI_FSCALE = 0xfdd9,
227 XI_FYL2X = 0xf1d9, 242 XI_FYL2X = 0xf1d9,
228 243
244 /* VEX-encoded instructions. XV_* prefix. */
245 XV_RORX = XV_f20f3a(f0),
246 XV_SARX = XV_f30f38(f7),
247 XV_SHLX = XV_660f38(f7),
248 XV_SHRX = XV_f20f38(f7),
249
229 /* Variable-length opcodes. XO_* prefix. */ 250 /* Variable-length opcodes. XO_* prefix. */
251 XO_OR = XO_(0b),
230 XO_MOV = XO_(8b), 252 XO_MOV = XO_(8b),
231 XO_MOVto = XO_(89), 253 XO_MOVto = XO_(89),
232 XO_MOVtow = XO_66(89), 254 XO_MOVtow = XO_66(89),
@@ -277,10 +299,8 @@ typedef enum {
277 XO_ROUNDSD = 0x0b3a0ffc, /* Really 66 0f 3a 0b. See asm_fpmath. */ 299 XO_ROUNDSD = 0x0b3a0ffc, /* Really 66 0f 3a 0b. See asm_fpmath. */
278 XO_UCOMISD = XO_660f(2e), 300 XO_UCOMISD = XO_660f(2e),
279 XO_CVTSI2SD = XO_f20f(2a), 301 XO_CVTSI2SD = XO_f20f(2a),
280 XO_CVTSD2SI = XO_f20f(2d),
281 XO_CVTTSD2SI= XO_f20f(2c), 302 XO_CVTTSD2SI= XO_f20f(2c),
282 XO_CVTSI2SS = XO_f30f(2a), 303 XO_CVTSI2SS = XO_f30f(2a),
283 XO_CVTSS2SI = XO_f30f(2d),
284 XO_CVTTSS2SI= XO_f30f(2c), 304 XO_CVTTSS2SI= XO_f30f(2c),
285 XO_CVTSS2SD = XO_f30f(5a), 305 XO_CVTSS2SD = XO_f30f(5a),
286 XO_CVTSD2SS = XO_f20f(5a), 306 XO_CVTSD2SS = XO_f20f(5a),
diff --git a/src/lj_trace.c b/src/lj_trace.c
index 311baa73..a43c8c4e 100644
--- a/src/lj_trace.c
+++ b/src/lj_trace.c
@@ -117,15 +117,26 @@ static void perftools_addtrace(GCtrace *T)
117} 117}
118#endif 118#endif
119 119
120/* Allocate space for copy of trace. */ 120/* Allocate space for copy of T. */
121static GCtrace *trace_save_alloc(jit_State *J) 121GCtrace * LJ_FASTCALL lj_trace_alloc(lua_State *L, GCtrace *T)
122{ 122{
123 size_t sztr = ((sizeof(GCtrace)+7)&~7); 123 size_t sztr = ((sizeof(GCtrace)+7)&~7);
124 size_t szins = (J->cur.nins-J->cur.nk)*sizeof(IRIns); 124 size_t szins = (T->nins-T->nk)*sizeof(IRIns);
125 size_t sz = sztr + szins + 125 size_t sz = sztr + szins +
126 J->cur.nsnap*sizeof(SnapShot) + 126 T->nsnap*sizeof(SnapShot) +
127 J->cur.nsnapmap*sizeof(SnapEntry); 127 T->nsnapmap*sizeof(SnapEntry);
128 return lj_mem_newt(J->L, (MSize)sz, GCtrace); 128 GCtrace *T2 = lj_mem_newt(L, (MSize)sz, GCtrace);
129 char *p = (char *)T2 + sztr;
130 T2->gct = ~LJ_TTRACE;
131 T2->marked = 0;
132 T2->traceno = 0;
133 T2->ir = (IRIns *)p - T->nk;
134 T2->nins = T->nins;
135 T2->nk = T->nk;
136 T2->nsnap = T->nsnap;
137 T2->nsnapmap = T->nsnapmap;
138 memcpy(p, T->ir + T->nk, szins);
139 return T2;
129} 140}
130 141
131/* Save current trace by copying and compacting it. */ 142/* Save current trace by copying and compacting it. */
@@ -139,12 +150,12 @@ static void trace_save(jit_State *J, GCtrace *T)
139 setgcrefp(J2G(J)->gc.root, T); 150 setgcrefp(J2G(J)->gc.root, T);
140 newwhite(J2G(J), T); 151 newwhite(J2G(J), T);
141 T->gct = ~LJ_TTRACE; 152 T->gct = ~LJ_TTRACE;
142 T->ir = (IRIns *)p - J->cur.nk; 153 T->ir = (IRIns *)p - J->cur.nk; /* The IR has already been copied above. */
143 memcpy(p, J->cur.ir+J->cur.nk, szins);
144 p += szins; 154 p += szins;
145 TRACE_APPENDVEC(snap, nsnap, SnapShot) 155 TRACE_APPENDVEC(snap, nsnap, SnapShot)
146 TRACE_APPENDVEC(snapmap, nsnapmap, SnapEntry) 156 TRACE_APPENDVEC(snapmap, nsnapmap, SnapEntry)
147 J->cur.traceno = 0; 157 J->cur.traceno = 0;
158 J->curfinal = NULL;
148 setgcrefp(J->trace[T->traceno], T); 159 setgcrefp(J->trace[T->traceno], T);
149 lj_gc_barriertrace(J2G(J), T->traceno); 160 lj_gc_barriertrace(J2G(J), T->traceno);
150 lj_gdbjit_addtrace(J, T); 161 lj_gdbjit_addtrace(J, T);
@@ -274,7 +285,7 @@ int lj_trace_flushall(lua_State *L)
274 if (T->root == 0) 285 if (T->root == 0)
275 trace_flushroot(J, T); 286 trace_flushroot(J, T);
276 lj_gdbjit_deltrace(J, T); 287 lj_gdbjit_deltrace(J, T);
277 T->traceno = 0; 288 T->traceno = T->link = 0; /* Blacklist the link for cont_stitch. */
278 setgcrefnull(J->trace[i]); 289 setgcrefnull(J->trace[i]);
279 } 290 }
280 } 291 }
@@ -296,13 +307,42 @@ void lj_trace_initstate(global_State *g)
296{ 307{
297 jit_State *J = G2J(g); 308 jit_State *J = G2J(g);
298 TValue *tv; 309 TValue *tv;
299 /* Initialize SIMD constants. */ 310
311 /* Initialize aligned SIMD constants. */
300 tv = LJ_KSIMD(J, LJ_KSIMD_ABS); 312 tv = LJ_KSIMD(J, LJ_KSIMD_ABS);
301 tv[0].u64 = U64x(7fffffff,ffffffff); 313 tv[0].u64 = U64x(7fffffff,ffffffff);
302 tv[1].u64 = U64x(7fffffff,ffffffff); 314 tv[1].u64 = U64x(7fffffff,ffffffff);
303 tv = LJ_KSIMD(J, LJ_KSIMD_NEG); 315 tv = LJ_KSIMD(J, LJ_KSIMD_NEG);
304 tv[0].u64 = U64x(80000000,00000000); 316 tv[0].u64 = U64x(80000000,00000000);
305 tv[1].u64 = U64x(80000000,00000000); 317 tv[1].u64 = U64x(80000000,00000000);
318
319 /* Initialize 32/64 bit constants. */
320#if LJ_TARGET_X86ORX64
321 J->k64[LJ_K64_TOBIT].u64 = U64x(43380000,00000000);
322#if LJ_32
323 J->k64[LJ_K64_M2P64_31].u64 = U64x(c1e00000,00000000);
324#endif
325 J->k64[LJ_K64_2P64].u64 = U64x(43f00000,00000000);
326 J->k32[LJ_K32_M2P64_31] = LJ_64 ? 0xdf800000 : 0xcf000000;
327#endif
328#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64
329 J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000);
330#endif
331#if LJ_TARGET_PPC
332 J->k32[LJ_K32_2P52_2P31] = 0x59800004;
333 J->k32[LJ_K32_2P52] = 0x59800000;
334#endif
335#if LJ_TARGET_PPC || LJ_TARGET_MIPS
336 J->k32[LJ_K32_2P31] = 0x4f000000;
337#endif
338#if LJ_TARGET_MIPS
339 J->k64[LJ_K64_2P31].u64 = U64x(41e00000,00000000);
340#if LJ_64
341 J->k64[LJ_K64_2P63].u64 = U64x(43e00000,00000000);
342 J->k32[LJ_K32_2P63] = 0x5f000000;
343 J->k32[LJ_K32_M2P64] = 0xdf800000;
344#endif
345#endif
306} 346}
307 347
308/* Free everything associated with the JIT compiler state. */ 348/* Free everything associated with the JIT compiler state. */
@@ -317,7 +357,6 @@ void lj_trace_freestate(global_State *g)
317 } 357 }
318#endif 358#endif
319 lj_mcode_free(J); 359 lj_mcode_free(J);
320 lj_ir_k64_freeall(J);
321 lj_mem_freevec(g, J->snapmapbuf, J->sizesnapmap, SnapEntry); 360 lj_mem_freevec(g, J->snapmapbuf, J->sizesnapmap, SnapEntry);
322 lj_mem_freevec(g, J->snapbuf, J->sizesnap, SnapShot); 361 lj_mem_freevec(g, J->snapbuf, J->sizesnap, SnapShot);
323 lj_mem_freevec(g, J->irbuf + J->irbotlim, J->irtoplim - J->irbotlim, IRIns); 362 lj_mem_freevec(g, J->irbuf + J->irbotlim, J->irtoplim - J->irbotlim, IRIns);
@@ -367,7 +406,7 @@ static void trace_start(jit_State *J)
367 TraceNo traceno; 406 TraceNo traceno;
368 407
369 if ((J->pt->flags & PROTO_NOJIT)) { /* JIT disabled for this proto? */ 408 if ((J->pt->flags & PROTO_NOJIT)) { /* JIT disabled for this proto? */
370 if (J->parent == 0) { 409 if (J->parent == 0 && J->exitno == 0) {
371 /* Lazy bytecode patching to disable hotcount events. */ 410 /* Lazy bytecode patching to disable hotcount events. */
372 lua_assert(bc_op(*J->pc) == BC_FORL || bc_op(*J->pc) == BC_ITERL || 411 lua_assert(bc_op(*J->pc) == BC_FORL || bc_op(*J->pc) == BC_ITERL ||
373 bc_op(*J->pc) == BC_LOOP || bc_op(*J->pc) == BC_FUNCF); 412 bc_op(*J->pc) == BC_LOOP || bc_op(*J->pc) == BC_FUNCF);
@@ -401,6 +440,8 @@ static void trace_start(jit_State *J)
401 J->guardemit.irt = 0; 440 J->guardemit.irt = 0;
402 J->postproc = LJ_POST_NONE; 441 J->postproc = LJ_POST_NONE;
403 lj_resetsplit(J); 442 lj_resetsplit(J);
443 J->retryrec = 0;
444 J->ktrace = 0;
404 setgcref(J->cur.startpt, obj2gco(J->pt)); 445 setgcref(J->cur.startpt, obj2gco(J->pt));
405 446
406 L = J->L; 447 L = J->L;
@@ -412,6 +453,12 @@ static void trace_start(jit_State *J)
412 if (J->parent) { 453 if (J->parent) {
413 setintV(L->top++, J->parent); 454 setintV(L->top++, J->parent);
414 setintV(L->top++, J->exitno); 455 setintV(L->top++, J->exitno);
456 } else {
457 BCOp op = bc_op(*J->pc);
458 if (op == BC_CALLM || op == BC_CALL || op == BC_ITERC) {
459 setintV(L->top++, J->exitno); /* Parent of stitched trace. */
460 setintV(L->top++, -1);
461 }
415 } 462 }
416 ); 463 );
417 lj_record_setup(J); 464 lj_record_setup(J);
@@ -424,7 +471,7 @@ static void trace_stop(jit_State *J)
424 BCOp op = bc_op(J->cur.startins); 471 BCOp op = bc_op(J->cur.startins);
425 GCproto *pt = &gcref(J->cur.startpt)->pt; 472 GCproto *pt = &gcref(J->cur.startpt)->pt;
426 TraceNo traceno = J->cur.traceno; 473 TraceNo traceno = J->cur.traceno;
427 GCtrace *T = trace_save_alloc(J); /* Do this first. May throw OOM. */ 474 GCtrace *T = J->curfinal;
428 lua_State *L; 475 lua_State *L;
429 476
430 switch (op) { 477 switch (op) {
@@ -461,6 +508,12 @@ static void trace_stop(jit_State *J)
461 root->nextside = (TraceNo1)traceno; 508 root->nextside = (TraceNo1)traceno;
462 } 509 }
463 break; 510 break;
511 case BC_CALLM:
512 case BC_CALL:
513 case BC_ITERC:
514 /* Trace stitching: patch link of previous trace. */
515 traceref(J, J->exitno)->link = traceno;
516 break;
464 default: 517 default:
465 lua_assert(0); 518 lua_assert(0);
466 break; 519 break;
@@ -475,6 +528,7 @@ static void trace_stop(jit_State *J)
475 lj_vmevent_send(L, TRACE, 528 lj_vmevent_send(L, TRACE,
476 setstrV(L, L->top++, lj_str_newlit(L, "stop")); 529 setstrV(L, L->top++, lj_str_newlit(L, "stop"));
477 setintV(L->top++, traceno); 530 setintV(L->top++, traceno);
531 setfuncV(L, L->top++, J->fn);
478 ); 532 );
479} 533}
480 534
@@ -502,6 +556,10 @@ static int trace_abort(jit_State *J)
502 556
503 J->postproc = LJ_POST_NONE; 557 J->postproc = LJ_POST_NONE;
504 lj_mcode_abort(J); 558 lj_mcode_abort(J);
559 if (J->curfinal) {
560 lj_trace_free(J2G(J), J->curfinal);
561 J->curfinal = NULL;
562 }
505 if (tvisnumber(L->top-1)) 563 if (tvisnumber(L->top-1))
506 e = (TraceError)numberVint(L->top-1); 564 e = (TraceError)numberVint(L->top-1);
507 if (e == LJ_TRERR_MCODELM) { 565 if (e == LJ_TRERR_MCODELM) {
@@ -510,8 +568,17 @@ static int trace_abort(jit_State *J)
510 return 1; /* Retry ASM with new MCode area. */ 568 return 1; /* Retry ASM with new MCode area. */
511 } 569 }
512 /* Penalize or blacklist starting bytecode instruction. */ 570 /* Penalize or blacklist starting bytecode instruction. */
513 if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) 571 if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) {
514 penalty_pc(J, &gcref(J->cur.startpt)->pt, mref(J->cur.startpc, BCIns), e); 572 if (J->exitno == 0) {
573 BCIns *startpc = mref(J->cur.startpc, BCIns);
574 if (e == LJ_TRERR_RETRY)
575 hotcount_set(J2GG(J), startpc+1, 1); /* Immediate retry. */
576 else
577 penalty_pc(J, &gcref(J->cur.startpt)->pt, startpc, e);
578 } else {
579 traceref(J, J->exitno)->link = J->exitno; /* Self-link is blacklisted. */
580 }
581 }
515 582
516 /* Is there anything to abort? */ 583 /* Is there anything to abort? */
517 traceno = J->cur.traceno; 584 traceno = J->cur.traceno;
@@ -680,6 +747,7 @@ static void trace_hotside(jit_State *J, const BCIns *pc)
680{ 747{
681 SnapShot *snap = &traceref(J, J->parent)->snap[J->exitno]; 748 SnapShot *snap = &traceref(J, J->parent)->snap[J->exitno];
682 if (!(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT)) && 749 if (!(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT)) &&
750 isluafunc(curr_func(J->L)) &&
683 snap->count != SNAPCOUNT_DONE && 751 snap->count != SNAPCOUNT_DONE &&
684 ++snap->count >= J->param[JIT_P_hotexit]) { 752 ++snap->count >= J->param[JIT_P_hotexit]) {
685 lua_assert(J->state == LJ_TRACE_IDLE); 753 lua_assert(J->state == LJ_TRACE_IDLE);
@@ -689,6 +757,20 @@ static void trace_hotside(jit_State *J, const BCIns *pc)
689 } 757 }
690} 758}
691 759
760/* Stitch a new trace to the previous trace. */
761void LJ_FASTCALL lj_trace_stitch(jit_State *J, const BCIns *pc)
762{
763 /* Only start a new trace if not recording or inside __gc call or vmevent. */
764 if (J->state == LJ_TRACE_IDLE &&
765 !(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT))) {
766 J->parent = 0; /* Have to treat it like a root trace. */
767 /* J->exitno is set to the invoking trace. */
768 J->state = LJ_TRACE_START;
769 lj_trace_ins(J, pc);
770 }
771}
772
773
692/* Tiny struct to pass data to protected call. */ 774/* Tiny struct to pass data to protected call. */
693typedef struct ExitDataCP { 775typedef struct ExitDataCP {
694 jit_State *J; 776 jit_State *J;
@@ -775,17 +857,20 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
775 if (errcode) 857 if (errcode)
776 return -errcode; /* Return negated error code. */ 858 return -errcode; /* Return negated error code. */
777 859
778 lj_vmevent_send(L, TEXIT, 860 if (!(LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE)))
779 lj_state_checkstack(L, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK); 861 lj_vmevent_send(L, TEXIT,
780 setintV(L->top++, J->parent); 862 lj_state_checkstack(L, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK);
781 setintV(L->top++, J->exitno); 863 setintV(L->top++, J->parent);
782 trace_exit_regs(L, ex); 864 setintV(L->top++, J->exitno);
783 ); 865 trace_exit_regs(L, ex);
866 );
784 867
785 pc = exd.pc; 868 pc = exd.pc;
786 cf = cframe_raw(L->cframe); 869 cf = cframe_raw(L->cframe);
787 setcframe_pc(cf, pc); 870 setcframe_pc(cf, pc);
788 if (G(L)->gc.state == GCSatomic || G(L)->gc.state == GCSfinalize) { 871 if (LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE)) {
872 /* Just exit to interpreter. */
873 } else if (G(L)->gc.state == GCSatomic || G(L)->gc.state == GCSfinalize) {
789 if (!(G(L)->hookmask & HOOK_GC)) 874 if (!(G(L)->hookmask & HOOK_GC))
790 lj_gc_step(L); /* Exited because of GC: drive GC forward. */ 875 lj_gc_step(L); /* Exited because of GC: drive GC forward. */
791 } else { 876 } else {
@@ -809,7 +894,7 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
809 ERRNO_RESTORE 894 ERRNO_RESTORE
810 switch (bc_op(*pc)) { 895 switch (bc_op(*pc)) {
811 case BC_CALLM: case BC_CALLMT: 896 case BC_CALLM: case BC_CALLMT:
812 return (int)((BCReg)(L->top - L->base) - bc_a(*pc) - bc_c(*pc)); 897 return (int)((BCReg)(L->top - L->base) - bc_a(*pc) - bc_c(*pc) - LJ_FR2);
813 case BC_RETM: 898 case BC_RETM:
814 return (int)((BCReg)(L->top - L->base) + 1 - bc_a(*pc) - bc_d(*pc)); 899 return (int)((BCReg)(L->top - L->base) + 1 - bc_a(*pc) - bc_d(*pc));
815 case BC_TSETM: 900 case BC_TSETM:
diff --git a/src/lj_trace.h b/src/lj_trace.h
index 460f10a1..93d7aea1 100644
--- a/src/lj_trace.h
+++ b/src/lj_trace.h
@@ -23,6 +23,7 @@ LJ_FUNC_NORET void lj_trace_err(jit_State *J, TraceError e);
23LJ_FUNC_NORET void lj_trace_err_info(jit_State *J, TraceError e); 23LJ_FUNC_NORET void lj_trace_err_info(jit_State *J, TraceError e);
24 24
25/* Trace management. */ 25/* Trace management. */
26LJ_FUNC GCtrace * LJ_FASTCALL lj_trace_alloc(lua_State *L, GCtrace *T);
26LJ_FUNC void LJ_FASTCALL lj_trace_free(global_State *g, GCtrace *T); 27LJ_FUNC void LJ_FASTCALL lj_trace_free(global_State *g, GCtrace *T);
27LJ_FUNC void lj_trace_reenableproto(GCproto *pt); 28LJ_FUNC void lj_trace_reenableproto(GCproto *pt);
28LJ_FUNC void lj_trace_flushproto(global_State *g, GCproto *pt); 29LJ_FUNC void lj_trace_flushproto(global_State *g, GCproto *pt);
@@ -34,6 +35,7 @@ LJ_FUNC void lj_trace_freestate(global_State *g);
34/* Event handling. */ 35/* Event handling. */
35LJ_FUNC void lj_trace_ins(jit_State *J, const BCIns *pc); 36LJ_FUNC void lj_trace_ins(jit_State *J, const BCIns *pc);
36LJ_FUNCA void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc); 37LJ_FUNCA void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc);
38LJ_FUNCA void LJ_FASTCALL lj_trace_stitch(jit_State *J, const BCIns *pc);
37LJ_FUNCA int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr); 39LJ_FUNCA int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr);
38 40
39/* Signal asynchronous abort of trace or end of trace. */ 41/* Signal asynchronous abort of trace or end of trace. */
diff --git a/src/lj_traceerr.h b/src/lj_traceerr.h
index ecba11a6..0156a664 100644
--- a/src/lj_traceerr.h
+++ b/src/lj_traceerr.h
@@ -7,10 +7,12 @@
7 7
8/* Recording. */ 8/* Recording. */
9TREDEF(RECERR, "error thrown or hook called during recording") 9TREDEF(RECERR, "error thrown or hook called during recording")
10TREDEF(TRACEUV, "trace too short")
10TREDEF(TRACEOV, "trace too long") 11TREDEF(TRACEOV, "trace too long")
11TREDEF(STACKOV, "trace too deep") 12TREDEF(STACKOV, "trace too deep")
12TREDEF(SNAPOV, "too many snapshots") 13TREDEF(SNAPOV, "too many snapshots")
13TREDEF(BLACKL, "blacklisted") 14TREDEF(BLACKL, "blacklisted")
15TREDEF(RETRY, "retry recording")
14TREDEF(NYIBC, "NYI: bytecode %d") 16TREDEF(NYIBC, "NYI: bytecode %d")
15 17
16/* Recording loop ops. */ 18/* Recording loop ops. */
@@ -23,8 +25,6 @@ TREDEF(BADTYPE, "bad argument type")
23TREDEF(CJITOFF, "JIT compilation disabled for function") 25TREDEF(CJITOFF, "JIT compilation disabled for function")
24TREDEF(CUNROLL, "call unroll limit reached") 26TREDEF(CUNROLL, "call unroll limit reached")
25TREDEF(DOWNREC, "down-recursion, restarting") 27TREDEF(DOWNREC, "down-recursion, restarting")
26TREDEF(NYICF, "NYI: C function %s")
27TREDEF(NYIFF, "NYI: FastFunc %s")
28TREDEF(NYIFFU, "NYI: unsupported variant of FastFunc %s") 28TREDEF(NYIFFU, "NYI: unsupported variant of FastFunc %s")
29TREDEF(NYIRETL, "NYI: return to lower frame") 29TREDEF(NYIRETL, "NYI: return to lower frame")
30 30
diff --git a/src/lj_vm.h b/src/lj_vm.h
index 5b10adf3..5a7bc392 100644
--- a/src/lj_vm.h
+++ b/src/lj_vm.h
@@ -17,6 +17,10 @@ LJ_ASMF int lj_vm_cpcall(lua_State *L, lua_CFunction func, void *ud,
17LJ_ASMF int lj_vm_resume(lua_State *L, TValue *base, int nres1, ptrdiff_t ef); 17LJ_ASMF int lj_vm_resume(lua_State *L, TValue *base, int nres1, ptrdiff_t ef);
18LJ_ASMF_NORET void LJ_FASTCALL lj_vm_unwind_c(void *cframe, int errcode); 18LJ_ASMF_NORET void LJ_FASTCALL lj_vm_unwind_c(void *cframe, int errcode);
19LJ_ASMF_NORET void LJ_FASTCALL lj_vm_unwind_ff(void *cframe); 19LJ_ASMF_NORET void LJ_FASTCALL lj_vm_unwind_ff(void *cframe);
20#if LJ_ABI_WIN && LJ_TARGET_X86
21LJ_ASMF_NORET void LJ_FASTCALL lj_vm_rtlunwind(void *cframe, void *excptrec,
22 void *unwinder, int errcode);
23#endif
20LJ_ASMF void lj_vm_unwind_c_eh(void); 24LJ_ASMF void lj_vm_unwind_c_eh(void);
21LJ_ASMF void lj_vm_unwind_ff_eh(void); 25LJ_ASMF void lj_vm_unwind_ff_eh(void);
22#if LJ_TARGET_X86ORX64 26#if LJ_TARGET_X86ORX64
@@ -43,13 +47,14 @@ LJ_ASMF void lj_vm_record(void);
43LJ_ASMF void lj_vm_inshook(void); 47LJ_ASMF void lj_vm_inshook(void);
44LJ_ASMF void lj_vm_rethook(void); 48LJ_ASMF void lj_vm_rethook(void);
45LJ_ASMF void lj_vm_callhook(void); 49LJ_ASMF void lj_vm_callhook(void);
50LJ_ASMF void lj_vm_profhook(void);
46 51
47/* Trace exit handling. */ 52/* Trace exit handling. */
48LJ_ASMF void lj_vm_exit_handler(void); 53LJ_ASMF void lj_vm_exit_handler(void);
49LJ_ASMF void lj_vm_exit_interp(void); 54LJ_ASMF void lj_vm_exit_interp(void);
50 55
51/* Internal math helper functions. */ 56/* Internal math helper functions. */
52#if LJ_TARGET_X86ORX64 || LJ_TARGET_PPC 57#if LJ_TARGET_PPC || LJ_TARGET_ARM64 || (LJ_TARGET_MIPS && LJ_ABI_SOFTFP)
53#define lj_vm_floor floor 58#define lj_vm_floor floor
54#define lj_vm_ceil ceil 59#define lj_vm_ceil ceil
55#else 60#else
@@ -60,23 +65,26 @@ LJ_ASMF double lj_vm_floor_sf(double);
60LJ_ASMF double lj_vm_ceil_sf(double); 65LJ_ASMF double lj_vm_ceil_sf(double);
61#endif 66#endif
62#endif 67#endif
63#if defined(LUAJIT_NO_LOG2) || LJ_TARGET_X86ORX64 68#ifdef LUAJIT_NO_LOG2
64LJ_ASMF double lj_vm_log2(double); 69LJ_ASMF double lj_vm_log2(double);
65#else 70#else
66#define lj_vm_log2 log2 71#define lj_vm_log2 log2
67#endif 72#endif
73#if !(defined(_LJ_DISPATCH_H) && LJ_TARGET_MIPS)
74LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t);
75#endif
68 76
69#if LJ_HASJIT 77#if LJ_HASJIT
70#if LJ_TARGET_X86ORX64 78#if LJ_TARGET_X86ORX64
71LJ_ASMF void lj_vm_floor_sse(void); 79LJ_ASMF void lj_vm_floor_sse(void);
72LJ_ASMF void lj_vm_ceil_sse(void); 80LJ_ASMF void lj_vm_ceil_sse(void);
73LJ_ASMF void lj_vm_trunc_sse(void); 81LJ_ASMF void lj_vm_trunc_sse(void);
74LJ_ASMF void lj_vm_exp_x87(void);
75LJ_ASMF void lj_vm_exp2_x87(void);
76LJ_ASMF void lj_vm_pow_sse(void);
77LJ_ASMF void lj_vm_powi_sse(void); 82LJ_ASMF void lj_vm_powi_sse(void);
83#define lj_vm_powi NULL
78#else 84#else
79#if LJ_TARGET_PPC 85LJ_ASMF double lj_vm_powi(double, int32_t);
86#endif
87#if LJ_TARGET_PPC || LJ_TARGET_ARM64
80#define lj_vm_trunc trunc 88#define lj_vm_trunc trunc
81#else 89#else
82LJ_ASMF double lj_vm_trunc(double); 90LJ_ASMF double lj_vm_trunc(double);
@@ -84,14 +92,11 @@ LJ_ASMF double lj_vm_trunc(double);
84LJ_ASMF double lj_vm_trunc_sf(double); 92LJ_ASMF double lj_vm_trunc_sf(double);
85#endif 93#endif
86#endif 94#endif
87LJ_ASMF double lj_vm_powi(double, int32_t);
88#ifdef LUAJIT_NO_EXP2 95#ifdef LUAJIT_NO_EXP2
89LJ_ASMF double lj_vm_exp2(double); 96LJ_ASMF double lj_vm_exp2(double);
90#else 97#else
91#define lj_vm_exp2 exp2 98#define lj_vm_exp2 exp2
92#endif 99#endif
93#endif
94LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t);
95#if LJ_HASFFI 100#if LJ_HASFFI
96LJ_ASMF int lj_vm_errno(void); 101LJ_ASMF int lj_vm_errno(void);
97#endif 102#endif
@@ -104,8 +109,7 @@ LJ_ASMF void lj_cont_nop(void); /* Do nothing, just continue execution. */
104LJ_ASMF void lj_cont_condt(void); /* Branch if result is true. */ 109LJ_ASMF void lj_cont_condt(void); /* Branch if result is true. */
105LJ_ASMF void lj_cont_condf(void); /* Branch if result is false. */ 110LJ_ASMF void lj_cont_condf(void); /* Branch if result is false. */
106LJ_ASMF void lj_cont_hook(void); /* Continue from hook yield. */ 111LJ_ASMF void lj_cont_hook(void); /* Continue from hook yield. */
107 112LJ_ASMF void lj_cont_stitch(void); /* Trace stitching. */
108enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
109 113
110/* Start of the ASM code. */ 114/* Start of the ASM code. */
111LJ_ASMF char lj_vm_asm_begin[]; 115LJ_ASMF char lj_vm_asm_begin[];
diff --git a/src/lj_vmevent.c b/src/lj_vmevent.c
index 1d496748..8b442a44 100644
--- a/src/lj_vmevent.c
+++ b/src/lj_vmevent.c
@@ -27,6 +27,7 @@ ptrdiff_t lj_vmevent_prepare(lua_State *L, VMEvent ev)
27 if (tv && tvisfunc(tv)) { 27 if (tv && tvisfunc(tv)) {
28 lj_state_checkstack(L, LUA_MINSTACK); 28 lj_state_checkstack(L, LUA_MINSTACK);
29 setfuncV(L, L->top++, funcV(tv)); 29 setfuncV(L, L->top++, funcV(tv));
30 if (LJ_FR2) setnilV(L->top++);
30 return savestack(L, L->top); 31 return savestack(L, L->top);
31 } 32 }
32 } 33 }
diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c
index 50a2cbba..2a41bcaa 100644
--- a/src/lj_vmmath.c
+++ b/src/lj_vmmath.c
@@ -13,16 +13,29 @@
13#include "lj_ir.h" 13#include "lj_ir.h"
14#include "lj_vm.h" 14#include "lj_vm.h"
15 15
16/* -- Helper functions for generated machine code ------------------------- */ 16/* -- Wrapper functions --------------------------------------------------- */
17 17
18#if LJ_TARGET_X86ORX64 18#if LJ_TARGET_X86 && __ELF__ && __PIC__
19/* Wrapper functions to avoid linker issues on OSX. */ 19/* Wrapper functions to deal with the ELF/x86 PIC disaster. */
20LJ_FUNCA double lj_vm_sinh(double x) { return sinh(x); } 20LJ_FUNCA double lj_wrap_log(double x) { return log(x); }
21LJ_FUNCA double lj_vm_cosh(double x) { return cosh(x); } 21LJ_FUNCA double lj_wrap_log10(double x) { return log10(x); }
22LJ_FUNCA double lj_vm_tanh(double x) { return tanh(x); } 22LJ_FUNCA double lj_wrap_exp(double x) { return exp(x); }
23LJ_FUNCA double lj_wrap_sin(double x) { return sin(x); }
24LJ_FUNCA double lj_wrap_cos(double x) { return cos(x); }
25LJ_FUNCA double lj_wrap_tan(double x) { return tan(x); }
26LJ_FUNCA double lj_wrap_asin(double x) { return asin(x); }
27LJ_FUNCA double lj_wrap_acos(double x) { return acos(x); }
28LJ_FUNCA double lj_wrap_atan(double x) { return atan(x); }
29LJ_FUNCA double lj_wrap_sinh(double x) { return sinh(x); }
30LJ_FUNCA double lj_wrap_cosh(double x) { return cosh(x); }
31LJ_FUNCA double lj_wrap_tanh(double x) { return tanh(x); }
32LJ_FUNCA double lj_wrap_atan2(double x, double y) { return atan2(x, y); }
33LJ_FUNCA double lj_wrap_pow(double x, double y) { return pow(x, y); }
34LJ_FUNCA double lj_wrap_fmod(double x, double y) { return fmod(x, y); }
23#endif 35#endif
24 36
25#if !LJ_TARGET_X86ORX64 37/* -- Helper functions for generated machine code ------------------------- */
38
26double lj_vm_foldarith(double x, double y, int op) 39double lj_vm_foldarith(double x, double y, int op)
27{ 40{
28 switch (op) { 41 switch (op) {
@@ -43,6 +56,19 @@ double lj_vm_foldarith(double x, double y, int op)
43 default: return x; 56 default: return x;
44 } 57 }
45} 58}
59
60#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS
61int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b)
62{
63 uint32_t y, ua, ub;
64 lua_assert(b != 0); /* This must be checked before using this function. */
65 ua = a < 0 ? (uint32_t)-a : (uint32_t)a;
66 ub = b < 0 ? (uint32_t)-b : (uint32_t)b;
67 y = ua % ub;
68 if (y != 0 && (a^b) < 0) y = y - ub;
69 if (((int32_t)y^b) < 0) y = (uint32_t)-(int32_t)y;
70 return (int32_t)y;
71}
46#endif 72#endif
47 73
48#if LJ_HASJIT 74#if LJ_HASJIT
@@ -61,20 +87,6 @@ double lj_vm_exp2(double a)
61} 87}
62#endif 88#endif
63 89
64#if !(LJ_TARGET_ARM || LJ_TARGET_PPC)
65int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b)
66{
67 uint32_t y, ua, ub;
68 lua_assert(b != 0); /* This must be checked before using this function. */
69 ua = a < 0 ? (uint32_t)-a : (uint32_t)a;
70 ub = b < 0 ? (uint32_t)-b : (uint32_t)b;
71 y = ua % ub;
72 if (y != 0 && (a^b) < 0) y = y - ub;
73 if (((int32_t)y^b) < 0) y = (uint32_t)-(int32_t)y;
74 return (int32_t)y;
75}
76#endif
77
78#if !LJ_TARGET_X86ORX64 90#if !LJ_TARGET_X86ORX64
79/* Unsigned x^k. */ 91/* Unsigned x^k. */
80static double lj_vm_powui(double x, uint32_t k) 92static double lj_vm_powui(double x, uint32_t k)
@@ -107,6 +119,7 @@ double lj_vm_powi(double x, int32_t k)
107 else 119 else
108 return 1.0 / lj_vm_powui(x, (uint32_t)-k); 120 return 1.0 / lj_vm_powui(x, (uint32_t)-k);
109} 121}
122#endif
110 123
111/* Computes fpm(x) for extended math functions. */ 124/* Computes fpm(x) for extended math functions. */
112double lj_vm_foldfpm(double x, int fpm) 125double lj_vm_foldfpm(double x, int fpm)
@@ -128,7 +141,6 @@ double lj_vm_foldfpm(double x, int fpm)
128 } 141 }
129 return 0; 142 return 0;
130} 143}
131#endif
132 144
133#if LJ_HASFFI 145#if LJ_HASFFI
134int lj_vm_errno(void) 146int lj_vm_errno(void)
diff --git a/src/ljamalg.c b/src/ljamalg.c
index 21b46314..39542981 100644
--- a/src/ljamalg.c
+++ b/src/ljamalg.c
@@ -33,6 +33,7 @@
33#include "lj_char.c" 33#include "lj_char.c"
34#include "lj_bc.c" 34#include "lj_bc.c"
35#include "lj_obj.c" 35#include "lj_obj.c"
36#include "lj_buf.c"
36#include "lj_str.c" 37#include "lj_str.c"
37#include "lj_tab.c" 38#include "lj_tab.c"
38#include "lj_func.c" 39#include "lj_func.c"
@@ -44,7 +45,10 @@
44#include "lj_vmevent.c" 45#include "lj_vmevent.c"
45#include "lj_vmmath.c" 46#include "lj_vmmath.c"
46#include "lj_strscan.c" 47#include "lj_strscan.c"
48#include "lj_strfmt.c"
49#include "lj_strfmt_num.c"
47#include "lj_api.c" 50#include "lj_api.c"
51#include "lj_profile.c"
48#include "lj_lex.c" 52#include "lj_lex.c"
49#include "lj_parse.c" 53#include "lj_parse.c"
50#include "lj_bcread.c" 54#include "lj_bcread.c"
diff --git a/src/lua.h b/src/lua.h
index c83fd3bb..850bd796 100644
--- a/src/lua.h
+++ b/src/lua.h
@@ -39,7 +39,8 @@
39#define lua_upvalueindex(i) (LUA_GLOBALSINDEX-(i)) 39#define lua_upvalueindex(i) (LUA_GLOBALSINDEX-(i))
40 40
41 41
42/* thread status; 0 is OK */ 42/* thread status */
43#define LUA_OK 0
43#define LUA_YIELD 1 44#define LUA_YIELD 1
44#define LUA_ERRRUN 2 45#define LUA_ERRRUN 2
45#define LUA_ERRSYNTAX 3 46#define LUA_ERRSYNTAX 3
@@ -226,6 +227,7 @@ LUA_API int (lua_status) (lua_State *L);
226#define LUA_GCSTEP 5 227#define LUA_GCSTEP 5
227#define LUA_GCSETPAUSE 6 228#define LUA_GCSETPAUSE 6
228#define LUA_GCSETSTEPMUL 7 229#define LUA_GCSETSTEPMUL 7
230#define LUA_GCISRUNNING 9
229 231
230LUA_API int (lua_gc) (lua_State *L, int what, int data); 232LUA_API int (lua_gc) (lua_State *L, int what, int data);
231 233
@@ -346,6 +348,13 @@ LUA_API void *lua_upvalueid (lua_State *L, int idx, int n);
346LUA_API void lua_upvaluejoin (lua_State *L, int idx1, int n1, int idx2, int n2); 348LUA_API void lua_upvaluejoin (lua_State *L, int idx1, int n1, int idx2, int n2);
347LUA_API int lua_loadx (lua_State *L, lua_Reader reader, void *dt, 349LUA_API int lua_loadx (lua_State *L, lua_Reader reader, void *dt,
348 const char *chunkname, const char *mode); 350 const char *chunkname, const char *mode);
351LUA_API const lua_Number *lua_version (lua_State *L);
352LUA_API void lua_copy (lua_State *L, int fromidx, int toidx);
353LUA_API lua_Number lua_tonumberx (lua_State *L, int idx, int *isnum);
354LUA_API lua_Integer lua_tointegerx (lua_State *L, int idx, int *isnum);
355
356/* From Lua 5.3. */
357LUA_API int lua_isyieldable (lua_State *L);
349 358
350 359
351struct lua_Debug { 360struct lua_Debug {
diff --git a/src/luaconf.h b/src/luaconf.h
index 20feaca8..d422827a 100644
--- a/src/luaconf.h
+++ b/src/luaconf.h
@@ -37,7 +37,7 @@
37#endif 37#endif
38#define LUA_LROOT "/usr/local" 38#define LUA_LROOT "/usr/local"
39#define LUA_LUADIR "/lua/5.1/" 39#define LUA_LUADIR "/lua/5.1/"
40#define LUA_LJDIR "/luajit-2.0.5/" 40#define LUA_LJDIR "/luajit-2.1.0-beta3/"
41 41
42#ifdef LUA_ROOT 42#ifdef LUA_ROOT
43#define LUA_JROOT LUA_ROOT 43#define LUA_JROOT LUA_ROOT
@@ -79,7 +79,7 @@
79#define LUA_IGMARK "-" 79#define LUA_IGMARK "-"
80#define LUA_PATH_CONFIG \ 80#define LUA_PATH_CONFIG \
81 LUA_DIRSEP "\n" LUA_PATHSEP "\n" LUA_PATH_MARK "\n" \ 81 LUA_DIRSEP "\n" LUA_PATHSEP "\n" LUA_PATH_MARK "\n" \
82 LUA_EXECDIR "\n" LUA_IGMARK 82 LUA_EXECDIR "\n" LUA_IGMARK "\n"
83 83
84/* Quoting in error messages. */ 84/* Quoting in error messages. */
85#define LUA_QL(x) "'" x "'" 85#define LUA_QL(x) "'" x "'"
@@ -92,10 +92,6 @@
92#define LUAI_GCMUL 200 /* Run GC at 200% of allocation speed. */ 92#define LUAI_GCMUL 200 /* Run GC at 200% of allocation speed. */
93#define LUA_MAXCAPTURES 32 /* Max. pattern captures. */ 93#define LUA_MAXCAPTURES 32 /* Max. pattern captures. */
94 94
95/* Compatibility with older library function names. */
96#define LUA_COMPAT_MOD /* OLD: math.mod, NEW: math.fmod */
97#define LUA_COMPAT_GFIND /* OLD: string.gfind, NEW: string.gmatch */
98
99/* Configuration for the frontend (the luajit executable). */ 95/* Configuration for the frontend (the luajit executable). */
100#if defined(luajit_c) 96#if defined(luajit_c)
101#define LUA_PROGNAME "luajit" /* Fallback frontend name. */ 97#define LUA_PROGNAME "luajit" /* Fallback frontend name. */
diff --git a/src/luajit.c b/src/luajit.c
index 3901762d..53902480 100644
--- a/src/luajit.c
+++ b/src/luajit.c
@@ -61,8 +61,9 @@ static void laction(int i)
61 61
62static void print_usage(void) 62static void print_usage(void)
63{ 63{
64 fprintf(stderr, 64 fputs("usage: ", stderr);
65 "usage: %s [options]... [script [args]...].\n" 65 fputs(progname, stderr);
66 fputs(" [options]... [script [args]...].\n"
66 "Available options are:\n" 67 "Available options are:\n"
67 " -e chunk Execute string " LUA_QL("chunk") ".\n" 68 " -e chunk Execute string " LUA_QL("chunk") ".\n"
68 " -l name Require library " LUA_QL("name") ".\n" 69 " -l name Require library " LUA_QL("name") ".\n"
@@ -73,16 +74,14 @@ static void print_usage(void)
73 " -v Show version information.\n" 74 " -v Show version information.\n"
74 " -E Ignore environment variables.\n" 75 " -E Ignore environment variables.\n"
75 " -- Stop handling options.\n" 76 " -- Stop handling options.\n"
76 " - Execute stdin and stop handling options.\n" 77 " - Execute stdin and stop handling options.\n", stderr);
77 ,
78 progname);
79 fflush(stderr); 78 fflush(stderr);
80} 79}
81 80
82static void l_message(const char *pname, const char *msg) 81static void l_message(const char *pname, const char *msg)
83{ 82{
84 if (pname) fprintf(stderr, "%s: ", pname); 83 if (pname) { fputs(pname, stderr); fputc(':', stderr); fputc(' ', stderr); }
85 fprintf(stderr, "%s\n", msg); 84 fputs(msg, stderr); fputc('\n', stderr);
86 fflush(stderr); 85 fflush(stderr);
87} 86}
88 87
@@ -125,7 +124,7 @@ static int docall(lua_State *L, int narg, int clear)
125#endif 124#endif
126 lua_remove(L, base); /* remove traceback function */ 125 lua_remove(L, base); /* remove traceback function */
127 /* force a complete garbage collection in case of errors */ 126 /* force a complete garbage collection in case of errors */
128 if (status != 0) lua_gc(L, LUA_GCCOLLECT, 0); 127 if (status != LUA_OK) lua_gc(L, LUA_GCCOLLECT, 0);
129 return status; 128 return status;
130} 129}
131 130
@@ -154,22 +153,15 @@ static void print_jit_status(lua_State *L)
154 lua_settop(L, 0); /* clear stack */ 153 lua_settop(L, 0); /* clear stack */
155} 154}
156 155
157static int getargs(lua_State *L, char **argv, int n) 156static void createargtable(lua_State *L, char **argv, int argc, int argf)
158{ 157{
159 int narg;
160 int i; 158 int i;
161 int argc = 0; 159 lua_createtable(L, argc - argf, argf);
162 while (argv[argc]) argc++; /* count total number of arguments */
163 narg = argc - (n + 1); /* number of arguments to the script */
164 luaL_checkstack(L, narg + 3, "too many arguments to script");
165 for (i = n+1; i < argc; i++)
166 lua_pushstring(L, argv[i]);
167 lua_createtable(L, narg, n + 1);
168 for (i = 0; i < argc; i++) { 160 for (i = 0; i < argc; i++) {
169 lua_pushstring(L, argv[i]); 161 lua_pushstring(L, argv[i]);
170 lua_rawseti(L, -2, i - n); 162 lua_rawseti(L, -2, i - argf);
171 } 163 }
172 return narg; 164 lua_setglobal(L, "arg");
173} 165}
174 166
175static int dofile(lua_State *L, const char *name) 167static int dofile(lua_State *L, const char *name)
@@ -258,9 +250,9 @@ static void dotty(lua_State *L)
258 const char *oldprogname = progname; 250 const char *oldprogname = progname;
259 progname = NULL; 251 progname = NULL;
260 while ((status = loadline(L)) != -1) { 252 while ((status = loadline(L)) != -1) {
261 if (status == 0) status = docall(L, 0, 0); 253 if (status == LUA_OK) status = docall(L, 0, 0);
262 report(L, status); 254 report(L, status);
263 if (status == 0 && lua_gettop(L) > 0) { /* any result to print? */ 255 if (status == LUA_OK && lua_gettop(L) > 0) { /* any result to print? */
264 lua_getglobal(L, "print"); 256 lua_getglobal(L, "print");
265 lua_insert(L, 1); 257 lua_insert(L, 1);
266 if (lua_pcall(L, lua_gettop(L)-1, 0, 0) != 0) 258 if (lua_pcall(L, lua_gettop(L)-1, 0, 0) != 0)
@@ -275,21 +267,30 @@ static void dotty(lua_State *L)
275 progname = oldprogname; 267 progname = oldprogname;
276} 268}
277 269
278static int handle_script(lua_State *L, char **argv, int n) 270static int handle_script(lua_State *L, char **argx)
279{ 271{
280 int status; 272 int status;
281 const char *fname; 273 const char *fname = argx[0];
282 int narg = getargs(L, argv, n); /* collect arguments */ 274 if (strcmp(fname, "-") == 0 && strcmp(argx[-1], "--") != 0)
283 lua_setglobal(L, "arg");
284 fname = argv[n];
285 if (strcmp(fname, "-") == 0 && strcmp(argv[n-1], "--") != 0)
286 fname = NULL; /* stdin */ 275 fname = NULL; /* stdin */
287 status = luaL_loadfile(L, fname); 276 status = luaL_loadfile(L, fname);
288 lua_insert(L, -(narg+1)); 277 if (status == LUA_OK) {
289 if (status == 0) 278 /* Fetch args from arg table. LUA_INIT or -e might have changed them. */
279 int narg = 0;
280 lua_getglobal(L, "arg");
281 if (lua_istable(L, -1)) {
282 do {
283 narg++;
284 lua_rawgeti(L, -narg, narg);
285 } while (!lua_isnil(L, -1));
286 lua_pop(L, 1);
287 lua_remove(L, -narg);
288 narg--;
289 } else {
290 lua_pop(L, 1);
291 }
290 status = docall(L, narg, 0); 292 status = docall(L, narg, 0);
291 else 293 }
292 lua_pop(L, narg);
293 return report(L, status); 294 return report(L, status);
294} 295}
295 296
@@ -386,7 +387,8 @@ static int dobytecode(lua_State *L, char **argv)
386 } 387 }
387 for (argv++; *argv != NULL; narg++, argv++) 388 for (argv++; *argv != NULL; narg++, argv++)
388 lua_pushstring(L, *argv); 389 lua_pushstring(L, *argv);
389 return report(L, lua_pcall(L, narg, 0, 0)); 390 report(L, lua_pcall(L, narg, 0, 0));
391 return -1;
390} 392}
391 393
392/* check that argument has no extra characters at the end */ 394/* check that argument has no extra characters at the end */
@@ -407,7 +409,7 @@ static int collectargs(char **argv, int *flags)
407 switch (argv[i][1]) { /* Check option. */ 409 switch (argv[i][1]) { /* Check option. */
408 case '-': 410 case '-':
409 notail(argv[i]); 411 notail(argv[i]);
410 return (argv[i+1] != NULL ? i+1 : 0); 412 return i+1;
411 case '\0': 413 case '\0':
412 return i; 414 return i;
413 case 'i': 415 case 'i':
@@ -433,23 +435,23 @@ static int collectargs(char **argv, int *flags)
433 case 'b': /* LuaJIT extension */ 435 case 'b': /* LuaJIT extension */
434 if (*flags) return -1; 436 if (*flags) return -1;
435 *flags |= FLAGS_EXEC; 437 *flags |= FLAGS_EXEC;
436 return 0; 438 return i+1;
437 case 'E': 439 case 'E':
438 *flags |= FLAGS_NOENV; 440 *flags |= FLAGS_NOENV;
439 break; 441 break;
440 default: return -1; /* invalid option */ 442 default: return -1; /* invalid option */
441 } 443 }
442 } 444 }
443 return 0; 445 return i;
444} 446}
445 447
446static int runargs(lua_State *L, char **argv, int n) 448static int runargs(lua_State *L, char **argv, int argn)
447{ 449{
448 int i; 450 int i;
449 for (i = 1; i < n; i++) { 451 for (i = 1; i < argn; i++) {
450 if (argv[i] == NULL) continue; 452 if (argv[i] == NULL) continue;
451 lua_assert(argv[i][0] == '-'); 453 lua_assert(argv[i][0] == '-');
452 switch (argv[i][1]) { /* option */ 454 switch (argv[i][1]) {
453 case 'e': { 455 case 'e': {
454 const char *chunk = argv[i] + 2; 456 const char *chunk = argv[i] + 2;
455 if (*chunk == '\0') chunk = argv[++i]; 457 if (*chunk == '\0') chunk = argv[++i];
@@ -463,10 +465,10 @@ static int runargs(lua_State *L, char **argv, int n)
463 if (*filename == '\0') filename = argv[++i]; 465 if (*filename == '\0') filename = argv[++i];
464 lua_assert(filename != NULL); 466 lua_assert(filename != NULL);
465 if (dolibrary(L, filename)) 467 if (dolibrary(L, filename))
466 return 1; /* stop if file fails */ 468 return 1;
467 break; 469 break;
468 } 470 }
469 case 'j': { /* LuaJIT extension */ 471 case 'j': { /* LuaJIT extension. */
470 const char *cmd = argv[i] + 2; 472 const char *cmd = argv[i] + 2;
471 if (*cmd == '\0') cmd = argv[++i]; 473 if (*cmd == '\0') cmd = argv[++i];
472 lua_assert(cmd != NULL); 474 lua_assert(cmd != NULL);
@@ -474,16 +476,16 @@ static int runargs(lua_State *L, char **argv, int n)
474 return 1; 476 return 1;
475 break; 477 break;
476 } 478 }
477 case 'O': /* LuaJIT extension */ 479 case 'O': /* LuaJIT extension. */
478 if (dojitopt(L, argv[i] + 2)) 480 if (dojitopt(L, argv[i] + 2))
479 return 1; 481 return 1;
480 break; 482 break;
481 case 'b': /* LuaJIT extension */ 483 case 'b': /* LuaJIT extension. */
482 return dobytecode(L, argv+i); 484 return dobytecode(L, argv+i);
483 default: break; 485 default: break;
484 } 486 }
485 } 487 }
486 return 0; 488 return LUA_OK;
487} 489}
488 490
489static int handle_luainit(lua_State *L) 491static int handle_luainit(lua_State *L)
@@ -494,7 +496,7 @@ static int handle_luainit(lua_State *L)
494 const char *init = getenv(LUA_INIT); 496 const char *init = getenv(LUA_INIT);
495#endif 497#endif
496 if (init == NULL) 498 if (init == NULL)
497 return 0; /* status OK */ 499 return LUA_OK;
498 else if (init[0] == '@') 500 else if (init[0] == '@')
499 return dofile(L, init+1); 501 return dofile(L, init+1);
500 else 502 else
@@ -511,45 +513,57 @@ static int pmain(lua_State *L)
511{ 513{
512 struct Smain *s = &smain; 514 struct Smain *s = &smain;
513 char **argv = s->argv; 515 char **argv = s->argv;
514 int script; 516 int argn;
515 int flags = 0; 517 int flags = 0;
516 globalL = L; 518 globalL = L;
517 if (argv[0] && argv[0][0]) progname = argv[0]; 519 if (argv[0] && argv[0][0]) progname = argv[0];
518 LUAJIT_VERSION_SYM(); /* linker-enforced version check */ 520
519 script = collectargs(argv, &flags); 521 LUAJIT_VERSION_SYM(); /* Linker-enforced version check. */
520 if (script < 0) { /* invalid args? */ 522
523 argn = collectargs(argv, &flags);
524 if (argn < 0) { /* Invalid args? */
521 print_usage(); 525 print_usage();
522 s->status = 1; 526 s->status = 1;
523 return 0; 527 return 0;
524 } 528 }
529
525 if ((flags & FLAGS_NOENV)) { 530 if ((flags & FLAGS_NOENV)) {
526 lua_pushboolean(L, 1); 531 lua_pushboolean(L, 1);
527 lua_setfield(L, LUA_REGISTRYINDEX, "LUA_NOENV"); 532 lua_setfield(L, LUA_REGISTRYINDEX, "LUA_NOENV");
528 } 533 }
529 lua_gc(L, LUA_GCSTOP, 0); /* stop collector during initialization */ 534
530 luaL_openlibs(L); /* open libraries */ 535 /* Stop collector during library initialization. */
536 lua_gc(L, LUA_GCSTOP, 0);
537 luaL_openlibs(L);
531 lua_gc(L, LUA_GCRESTART, -1); 538 lua_gc(L, LUA_GCRESTART, -1);
539
540 createargtable(L, argv, s->argc, argn);
541
532 if (!(flags & FLAGS_NOENV)) { 542 if (!(flags & FLAGS_NOENV)) {
533 s->status = handle_luainit(L); 543 s->status = handle_luainit(L);
534 if (s->status != 0) return 0; 544 if (s->status != LUA_OK) return 0;
535 } 545 }
546
536 if ((flags & FLAGS_VERSION)) print_version(); 547 if ((flags & FLAGS_VERSION)) print_version();
537 s->status = runargs(L, argv, (script > 0) ? script : s->argc); 548
538 if (s->status != 0) return 0; 549 s->status = runargs(L, argv, argn);
539 if (script) { 550 if (s->status != LUA_OK) return 0;
540 s->status = handle_script(L, argv, script); 551
541 if (s->status != 0) return 0; 552 if (s->argc > argn) {
553 s->status = handle_script(L, argv + argn);
554 if (s->status != LUA_OK) return 0;
542 } 555 }
556
543 if ((flags & FLAGS_INTERACTIVE)) { 557 if ((flags & FLAGS_INTERACTIVE)) {
544 print_jit_status(L); 558 print_jit_status(L);
545 dotty(L); 559 dotty(L);
546 } else if (script == 0 && !(flags & (FLAGS_EXEC|FLAGS_VERSION))) { 560 } else if (s->argc == argn && !(flags & (FLAGS_EXEC|FLAGS_VERSION))) {
547 if (lua_stdin_is_tty()) { 561 if (lua_stdin_is_tty()) {
548 print_version(); 562 print_version();
549 print_jit_status(L); 563 print_jit_status(L);
550 dotty(L); 564 dotty(L);
551 } else { 565 } else {
552 dofile(L, NULL); /* executes stdin as a file */ 566 dofile(L, NULL); /* Executes stdin as a file. */
553 } 567 }
554 } 568 }
555 return 0; 569 return 0;
@@ -558,7 +572,7 @@ static int pmain(lua_State *L)
558int main(int argc, char **argv) 572int main(int argc, char **argv)
559{ 573{
560 int status; 574 int status;
561 lua_State *L = lua_open(); /* create state */ 575 lua_State *L = lua_open();
562 if (L == NULL) { 576 if (L == NULL) {
563 l_message(argv[0], "cannot create state: not enough memory"); 577 l_message(argv[0], "cannot create state: not enough memory");
564 return EXIT_FAILURE; 578 return EXIT_FAILURE;
@@ -568,6 +582,6 @@ int main(int argc, char **argv)
568 status = lua_cpcall(L, pmain, NULL); 582 status = lua_cpcall(L, pmain, NULL);
569 report(L, status); 583 report(L, status);
570 lua_close(L); 584 lua_close(L);
571 return (status || smain.status) ? EXIT_FAILURE : EXIT_SUCCESS; 585 return (status || smain.status > 0) ? EXIT_FAILURE : EXIT_SUCCESS;
572} 586}
573 587
diff --git a/src/luajit.h b/src/luajit.h
index 5f5b3887..600031a1 100644
--- a/src/luajit.h
+++ b/src/luajit.h
@@ -30,9 +30,9 @@
30 30
31#include "lua.h" 31#include "lua.h"
32 32
33#define LUAJIT_VERSION "LuaJIT 2.0.5" 33#define LUAJIT_VERSION "LuaJIT 2.1.0-beta3"
34#define LUAJIT_VERSION_NUM 20005 /* Version 2.0.5 = 02.00.05. */ 34#define LUAJIT_VERSION_NUM 20100 /* Version 2.1.0 = 02.01.00. */
35#define LUAJIT_VERSION_SYM luaJIT_version_2_0_5 35#define LUAJIT_VERSION_SYM luaJIT_version_2_1_0_beta3
36#define LUAJIT_COPYRIGHT "Copyright (C) 2005-2020 Mike Pall" 36#define LUAJIT_COPYRIGHT "Copyright (C) 2005-2020 Mike Pall"
37#define LUAJIT_URL "http://luajit.org/" 37#define LUAJIT_URL "http://luajit.org/"
38 38
@@ -64,6 +64,15 @@ enum {
64/* Control the JIT engine. */ 64/* Control the JIT engine. */
65LUA_API int luaJIT_setmode(lua_State *L, int idx, int mode); 65LUA_API int luaJIT_setmode(lua_State *L, int idx, int mode);
66 66
67/* Low-overhead profiling API. */
68typedef void (*luaJIT_profile_callback)(void *data, lua_State *L,
69 int samples, int vmstate);
70LUA_API void luaJIT_profile_start(lua_State *L, const char *mode,
71 luaJIT_profile_callback cb, void *data);
72LUA_API void luaJIT_profile_stop(lua_State *L);
73LUA_API const char *luaJIT_profile_dumpstack(lua_State *L, const char *fmt,
74 int depth, size_t *len);
75
67/* Enforce (dynamic) linker error for version mismatches. Call from main. */ 76/* Enforce (dynamic) linker error for version mismatches. Call from main. */
68LUA_API void LUAJIT_VERSION_SYM(void); 77LUA_API void LUAJIT_VERSION_SYM(void);
69 78
diff --git a/src/msvcbuild.bat b/src/msvcbuild.bat
index 499b5f12..ae035dc6 100644
--- a/src/msvcbuild.bat
+++ b/src/msvcbuild.bat
@@ -5,6 +5,7 @@
5@rem Then cd to this directory and run this script. Use the following 5@rem Then cd to this directory and run this script. Use the following
6@rem options (in order), if needed. The default is a dynamic release build. 6@rem options (in order), if needed. The default is a dynamic release build.
7@rem 7@rem
8@rem nogc64 disable LJ_GC64 mode for x64
8@rem debug emit debug symbols 9@rem debug emit debug symbols
9@rem amalg amalgamated build 10@rem amalg amalgamated build
10@rem static static linkage 11@rem static static linkage
@@ -20,6 +21,7 @@
20@set LJLIB=lib /nologo /nodefaultlib 21@set LJLIB=lib /nologo /nodefaultlib
21@set DASMDIR=..\dynasm 22@set DASMDIR=..\dynasm
22@set DASM=%DASMDIR%\dynasm.lua 23@set DASM=%DASMDIR%\dynasm.lua
24@set DASC=vm_x64.dasc
23@set LJDLLNAME=lua51.dll 25@set LJDLLNAME=lua51.dll
24@set LJLIBNAME=lua51.lib 26@set LJLIBNAME=lua51.lib
25@set BUILDTYPE=release 27@set BUILDTYPE=release
@@ -36,10 +38,17 @@ if exist minilua.exe.manifest^
36@set LJARCH=x64 38@set LJARCH=x64
37@minilua 39@minilua
38@if errorlevel 8 goto :X64 40@if errorlevel 8 goto :X64
41@set DASC=vm_x86.dasc
39@set DASMFLAGS=-D WIN -D JIT -D FFI 42@set DASMFLAGS=-D WIN -D JIT -D FFI
40@set LJARCH=x86 43@set LJARCH=x86
44@set LJCOMPILE=%LJCOMPILE% /arch:SSE2
41:X64 45:X64
42minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x86.dasc 46@if "%1" neq "nogc64" goto :GC64
47@shift
48@set DASC=vm_x86.dasc
49@set LJCOMPILE=%LJCOMPILE% /DLUAJIT_DISABLE_GC64
50:GC64
51minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC%
43@if errorlevel 1 goto :BAD 52@if errorlevel 1 goto :BAD
44 53
45%LJCOMPILE% /I "." /I %DASMDIR% host\buildvm*.c 54%LJCOMPILE% /I "." /I %DASMDIR% host\buildvm*.c
@@ -68,6 +77,7 @@ buildvm -m folddef -o lj_folddef.h lj_opt_fold.c
68@shift 77@shift
69@set BUILDTYPE=debug 78@set BUILDTYPE=debug
70@set LJCOMPILE=%LJCOMPILE% /Zi %DEBUGCFLAGS% 79@set LJCOMPILE=%LJCOMPILE% /Zi %DEBUGCFLAGS%
80@set LJLINK=%LJLINK% /opt:ref /opt:icf /incremental:no
71:NODEBUG 81:NODEBUG
72@set LJLINK=%LJLINK% /%BUILDTYPE% 82@set LJLINK=%LJLINK% /%BUILDTYPE%
73@if "%1"=="amalg" goto :AMALGDLL 83@if "%1"=="amalg" goto :AMALGDLL
diff --git a/src/ps4build.bat b/src/ps4build.bat
index 337a44fa..e4a7defe 100644
--- a/src/ps4build.bat
+++ b/src/ps4build.bat
@@ -2,7 +2,19 @@
2@rem Donated to the public domain. 2@rem Donated to the public domain.
3@rem 3@rem
4@rem Open a "Visual Studio .NET Command Prompt" (64 bit host compiler) 4@rem Open a "Visual Studio .NET Command Prompt" (64 bit host compiler)
5@rem or "VS2015 x64 Native Tools Command Prompt".
6@rem
5@rem Then cd to this directory and run this script. 7@rem Then cd to this directory and run this script.
8@rem
9@rem Recommended invocation:
10@rem
11@rem ps4build release build, amalgamated, 64-bit GC
12@rem ps4build debug debug build, amalgamated, 64-bit GC
13@rem
14@rem Additional command-line options (not generally recommended):
15@rem
16@rem gc32 (before debug) 32-bit GC
17@rem noamalg (after debug) non-amalgamated build
6 18
7@if not defined INCLUDE goto :FAIL 19@if not defined INCLUDE goto :FAIL
8@if not defined SCE_ORBIS_SDK_DIR goto :FAIL 20@if not defined SCE_ORBIS_SDK_DIR goto :FAIL
@@ -15,6 +27,14 @@
15@set DASMDIR=..\dynasm 27@set DASMDIR=..\dynasm
16@set DASM=%DASMDIR%\dynasm.lua 28@set DASM=%DASMDIR%\dynasm.lua
17@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c 29@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c
30@set GC64=-DLUAJIT_ENABLE_GC64
31@set DASC=vm_x64.dasc
32
33@if "%1" neq "gc32" goto :NOGC32
34@shift
35@set GC64=
36@set DASC=vm_x86.dasc
37:NOGC32
18 38
19%LJCOMPILE% host\minilua.c 39%LJCOMPILE% host\minilua.c
20@if errorlevel 1 goto :BAD 40@if errorlevel 1 goto :BAD
@@ -28,10 +48,10 @@ if exist minilua.exe.manifest^
28@if not errorlevel 8 goto :FAIL 48@if not errorlevel 8 goto :FAIL
29 49
30@set DASMFLAGS=-D P64 -D NO_UNWIND 50@set DASMFLAGS=-D P64 -D NO_UNWIND
31minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x86.dasc 51minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC%
32@if errorlevel 1 goto :BAD 52@if errorlevel 1 goto :BAD
33 53
34%LJCOMPILE% /I "." /I %DASMDIR% -DLUAJIT_TARGET=LUAJIT_ARCH_X64 -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_NO_UNWIND host\buildvm*.c 54%LJCOMPILE% /I "." /I %DASMDIR% %GC64% -DLUAJIT_TARGET=LUAJIT_ARCH_X64 -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_NO_UNWIND host\buildvm*.c
35@if errorlevel 1 goto :BAD 55@if errorlevel 1 goto :BAD
36%LJLINK% /out:buildvm.exe buildvm*.obj 56%LJLINK% /out:buildvm.exe buildvm*.obj
37@if errorlevel 1 goto :BAD 57@if errorlevel 1 goto :BAD
@@ -54,7 +74,7 @@ buildvm -m folddef -o lj_folddef.h lj_opt_fold.c
54@if errorlevel 1 goto :BAD 74@if errorlevel 1 goto :BAD
55 75
56@rem ---- Cross compiler ---- 76@rem ---- Cross compiler ----
57@set LJCOMPILE="%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-clang" -c -Wall -DLUAJIT_DISABLE_FFI 77@set LJCOMPILE="%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-clang" -c -Wall -DLUAJIT_DISABLE_FFI %GC64%
58@set LJLIB="%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-ar" rcus 78@set LJLIB="%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-ar" rcus
59@set INCLUDE="" 79@set INCLUDE=""
60 80
@@ -63,14 +83,14 @@ orbis-as -o lj_vm.o lj_vm.s
63@if "%1" neq "debug" goto :NODEBUG 83@if "%1" neq "debug" goto :NODEBUG
64@shift 84@shift
65@set LJCOMPILE=%LJCOMPILE% -g -O0 85@set LJCOMPILE=%LJCOMPILE% -g -O0
66@set TARGETLIB=libluajitD.a 86@set TARGETLIB=libluajitD_ps4.a
67goto :BUILD 87goto :BUILD
68:NODEBUG 88:NODEBUG
69@set LJCOMPILE=%LJCOMPILE% -O2 89@set LJCOMPILE=%LJCOMPILE% -O2
70@set TARGETLIB=libluajit.a 90@set TARGETLIB=libluajit_ps4.a
71:BUILD 91:BUILD
72del %TARGETLIB% 92del %TARGETLIB%
73@if "%1"=="amalg" goto :AMALG 93@if "%1" neq "noamalg" goto :AMALG
74for %%f in (lj_*.c lib_*.c) do ( 94for %%f in (lj_*.c lib_*.c) do (
75 %LJCOMPILE% %%f 95 %LJCOMPILE% %%f
76 @if errorlevel 1 goto :BAD 96 @if errorlevel 1 goto :BAD
diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc
index c5e0498e..edefac32 100644
--- a/src/vm_arm.dasc
+++ b/src/vm_arm.dasc
@@ -99,6 +99,7 @@
99|.type NODE, Node 99|.type NODE, Node
100|.type NARGS8, int 100|.type NARGS8, int
101|.type TRACE, GCtrace 101|.type TRACE, GCtrace
102|.type SBUF, SBuf
102| 103|
103|//----------------------------------------------------------------------- 104|//-----------------------------------------------------------------------
104| 105|
@@ -372,6 +373,17 @@ static void build_subroutines(BuildCtx *ctx)
372 | st_vmstate CARG2 373 | st_vmstate CARG2
373 | b ->vm_returnc 374 | b ->vm_returnc
374 | 375 |
376 |->vm_unwind_ext: // Complete external unwind.
377#if !LJ_NO_UNWIND
378 | push {r0, r1, r2, lr}
379 | bl extern _Unwind_Complete
380 | ldr r0, [sp]
381 | bl extern _Unwind_DeleteException
382 | pop {r0, r1, r2, lr}
383 | mov r0, r1
384 | bx r2
385#endif
386 |
375 |//----------------------------------------------------------------------- 387 |//-----------------------------------------------------------------------
376 |//-- Grow stack for calls ----------------------------------------------- 388 |//-- Grow stack for calls -----------------------------------------------
377 |//----------------------------------------------------------------------- 389 |//-----------------------------------------------------------------------
@@ -418,13 +430,14 @@ static void build_subroutines(BuildCtx *ctx)
418 | add CARG2, sp, #CFRAME_RESUME 430 | add CARG2, sp, #CFRAME_RESUME
419 | ldrb CARG1, L->status 431 | ldrb CARG1, L->status
420 | str CARG3, SAVE_ERRF 432 | str CARG3, SAVE_ERRF
421 | str CARG2, L->cframe 433 | str L, SAVE_PC // Any value outside of bytecode is ok.
422 | str CARG3, SAVE_CFRAME 434 | str CARG3, SAVE_CFRAME
423 | cmp CARG1, #0 435 | cmp CARG1, #0
424 | str L, SAVE_PC // Any value outside of bytecode is ok. 436 | str CARG2, L->cframe
425 | beq >3 437 | beq >3
426 | 438 |
427 | // Resume after yield (like a return). 439 | // Resume after yield (like a return).
440 | str L, [DISPATCH, #DISPATCH_GL(cur_L)]
428 | mov RA, BASE 441 | mov RA, BASE
429 | ldr BASE, L->base 442 | ldr BASE, L->base
430 | ldr CARG1, L->top 443 | ldr CARG1, L->top
@@ -458,14 +471,15 @@ static void build_subroutines(BuildCtx *ctx)
458 | str CARG3, SAVE_NRES 471 | str CARG3, SAVE_NRES
459 | mov L, CARG1 472 | mov L, CARG1
460 | str CARG1, SAVE_L 473 | str CARG1, SAVE_L
461 | mov BASE, CARG2
462 | str sp, L->cframe // Add our C frame to cframe chain.
463 | ldr DISPATCH, L->glref // Setup pointer to dispatch table. 474 | ldr DISPATCH, L->glref // Setup pointer to dispatch table.
475 | mov BASE, CARG2
464 | str CARG1, SAVE_PC // Any value outside of bytecode is ok. 476 | str CARG1, SAVE_PC // Any value outside of bytecode is ok.
465 | str RC, SAVE_CFRAME 477 | str RC, SAVE_CFRAME
466 | add DISPATCH, DISPATCH, #GG_G2DISP 478 | add DISPATCH, DISPATCH, #GG_G2DISP
479 | str sp, L->cframe // Add our C frame to cframe chain.
467 | 480 |
468 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). 481 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
482 | str L, [DISPATCH, #DISPATCH_GL(cur_L)]
469 | ldr RB, L->base // RB = old base (for vmeta_call). 483 | ldr RB, L->base // RB = old base (for vmeta_call).
470 | ldr CARG1, L->top 484 | ldr CARG1, L->top
471 | mov MASKR8, #255 485 | mov MASKR8, #255
@@ -491,20 +505,21 @@ static void build_subroutines(BuildCtx *ctx)
491 | mov L, CARG1 505 | mov L, CARG1
492 | ldr RA, L:CARG1->stack 506 | ldr RA, L:CARG1->stack
493 | str CARG1, SAVE_L 507 | str CARG1, SAVE_L
508 | ldr DISPATCH, L->glref // Setup pointer to dispatch table.
494 | ldr RB, L->top 509 | ldr RB, L->top
495 | str CARG1, SAVE_PC // Any value outside of bytecode is ok. 510 | str CARG1, SAVE_PC // Any value outside of bytecode is ok.
496 | ldr RC, L->cframe 511 | ldr RC, L->cframe
512 | add DISPATCH, DISPATCH, #GG_G2DISP
497 | sub RA, RA, RB // Compute -savestack(L, L->top). 513 | sub RA, RA, RB // Compute -savestack(L, L->top).
498 | str sp, L->cframe // Add our C frame to cframe chain.
499 | mov RB, #0 514 | mov RB, #0
500 | str RA, SAVE_NRES // Neg. delta means cframe w/o frame. 515 | str RA, SAVE_NRES // Neg. delta means cframe w/o frame.
501 | str RB, SAVE_ERRF // No error function. 516 | str RB, SAVE_ERRF // No error function.
502 | str RC, SAVE_CFRAME 517 | str RC, SAVE_CFRAME
518 | str sp, L->cframe // Add our C frame to cframe chain.
519 | str L, [DISPATCH, #DISPATCH_GL(cur_L)]
503 | blx CARG4 // (lua_State *L, lua_CFunction func, void *ud) 520 | blx CARG4 // (lua_State *L, lua_CFunction func, void *ud)
504 | ldr DISPATCH, L->glref // Setup pointer to dispatch table.
505 | movs BASE, CRET1 521 | movs BASE, CRET1
506 | mov PC, #FRAME_CP 522 | mov PC, #FRAME_CP
507 | add DISPATCH, DISPATCH, #GG_G2DISP
508 | bne <3 // Else continue with the call. 523 | bne <3 // Else continue with the call.
509 | b ->vm_leave_cp // No base? Just remove C frame. 524 | b ->vm_leave_cp // No base? Just remove C frame.
510 | 525 |
@@ -614,6 +629,16 @@ static void build_subroutines(BuildCtx *ctx)
614 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. 629 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
615 | b ->vm_call_dispatch_f 630 | b ->vm_call_dispatch_f
616 | 631 |
632 |->vmeta_tgetr:
633 | .IOS mov RC, BASE
634 | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
635 | // Returns cTValue * or NULL.
636 | .IOS mov BASE, RC
637 | cmp CRET1, #0
638 | ldrdne CARG12, [CRET1]
639 | mvneq CARG2, #~LJ_TNIL
640 | b ->BC_TGETR_Z
641 |
617 |//----------------------------------------------------------------------- 642 |//-----------------------------------------------------------------------
618 | 643 |
619 |->vmeta_tsets1: 644 |->vmeta_tsets1:
@@ -671,6 +696,15 @@ static void build_subroutines(BuildCtx *ctx)
671 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. 696 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
672 | b ->vm_call_dispatch_f 697 | b ->vm_call_dispatch_f
673 | 698 |
699 |->vmeta_tsetr:
700 | str BASE, L->base
701 | .IOS mov RC, BASE
702 | str PC, SAVE_PC
703 | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
704 | // Returns TValue *.
705 | .IOS mov BASE, RC
706 | b ->BC_TSETR_Z
707 |
674 |//-- Comparison metamethods --------------------------------------------- 708 |//-- Comparison metamethods ---------------------------------------------
675 | 709 |
676 |->vmeta_comp: 710 |->vmeta_comp:
@@ -735,6 +769,17 @@ static void build_subroutines(BuildCtx *ctx)
735 | b <3 769 | b <3
736 |.endif 770 |.endif
737 | 771 |
772 |->vmeta_istype:
773 | sub PC, PC, #4
774 | str BASE, L->base
775 | mov CARG1, L
776 | lsr CARG2, RA, #3
777 | mov CARG3, RC
778 | str PC, SAVE_PC
779 | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
780 | .IOS ldr BASE, L->base
781 | b ->cont_nop
782 |
738 |//-- Arithmetic metamethods --------------------------------------------- 783 |//-- Arithmetic metamethods ---------------------------------------------
739 | 784 |
740 |->vmeta_arith_vn: 785 |->vmeta_arith_vn:
@@ -1052,7 +1097,7 @@ static void build_subroutines(BuildCtx *ctx)
1052 | ffgccheck 1097 | ffgccheck
1053 | mov CARG1, L 1098 | mov CARG1, L
1054 | mov CARG2, BASE 1099 | mov CARG2, BASE
1055 | bl extern lj_str_fromnumber // (lua_State *L, cTValue *o) 1100 | bl extern lj_strfmt_number // (lua_State *L, cTValue *o)
1056 | // Returns GCstr *. 1101 | // Returns GCstr *.
1057 | ldr BASE, L->base 1102 | ldr BASE, L->base
1058 | mvn CARG2, #~LJ_TSTR 1103 | mvn CARG2, #~LJ_TSTR
@@ -1230,9 +1275,10 @@ static void build_subroutines(BuildCtx *ctx)
1230 | ldr CARG3, L:RA->base 1275 | ldr CARG3, L:RA->base
1231 | mv_vmstate CARG2, INTERP 1276 | mv_vmstate CARG2, INTERP
1232 | ldr CARG4, L:RA->top 1277 | ldr CARG4, L:RA->top
1233 | st_vmstate CARG2
1234 | cmp CRET1, #LUA_YIELD 1278 | cmp CRET1, #LUA_YIELD
1235 | ldr BASE, L->base 1279 | ldr BASE, L->base
1280 | str L, [DISPATCH, #DISPATCH_GL(cur_L)]
1281 | st_vmstate CARG2
1236 | bhi >8 1282 | bhi >8
1237 | subs RC, CARG4, CARG3 1283 | subs RC, CARG4, CARG3
1238 | ldr CARG1, L->maxstack 1284 | ldr CARG1, L->maxstack
@@ -1500,19 +1546,6 @@ static void build_subroutines(BuildCtx *ctx)
1500 | math_extern2 atan2 1546 | math_extern2 atan2
1501 | math_extern2 fmod 1547 | math_extern2 fmod
1502 | 1548 |
1503 |->ff_math_deg:
1504 |.if FPU
1505 | .ffunc_d math_rad
1506 | vldr d1, CFUNC:CARG3->upvalue[0]
1507 | vmul.f64 d0, d0, d1
1508 | b ->fff_resd
1509 |.else
1510 | .ffunc_n math_rad
1511 | ldrd CARG34, CFUNC:CARG3->upvalue[0]
1512 | bl extern __aeabi_dmul
1513 | b ->fff_restv
1514 |.endif
1515 |
1516 |.if HFABI 1549 |.if HFABI
1517 | .ffunc math_ldexp 1550 | .ffunc math_ldexp
1518 | ldr CARG4, [BASE, #4] 1551 | ldr CARG4, [BASE, #4]
@@ -1687,12 +1720,6 @@ static void build_subroutines(BuildCtx *ctx)
1687 | 1720 |
1688 |//-- String library ----------------------------------------------------- 1721 |//-- String library -----------------------------------------------------
1689 | 1722 |
1690 |.ffunc_1 string_len
1691 | checkstr CARG2, ->fff_fallback
1692 | ldr CARG1, STR:CARG1->len
1693 | mvn CARG2, #~LJ_TISNUM
1694 | b ->fff_restv
1695 |
1696 |.ffunc string_byte // Only handle the 1-arg case here. 1723 |.ffunc string_byte // Only handle the 1-arg case here.
1697 | ldrd CARG12, [BASE] 1724 | ldrd CARG12, [BASE]
1698 | ldr PC, [BASE, FRAME_PC] 1725 | ldr PC, [BASE, FRAME_PC]
@@ -1725,6 +1752,7 @@ static void build_subroutines(BuildCtx *ctx)
1725 | mov CARG1, L 1752 | mov CARG1, L
1726 | str PC, SAVE_PC 1753 | str PC, SAVE_PC
1727 | bl extern lj_str_new // (lua_State *L, char *str, size_t l) 1754 | bl extern lj_str_new // (lua_State *L, char *str, size_t l)
1755 |->fff_resstr:
1728 | // Returns GCstr *. 1756 | // Returns GCstr *.
1729 | ldr BASE, L->base 1757 | ldr BASE, L->base
1730 | mvn CARG2, #~LJ_TSTR 1758 | mvn CARG2, #~LJ_TSTR
@@ -1768,91 +1796,28 @@ static void build_subroutines(BuildCtx *ctx)
1768 | mvn CARG2, #~LJ_TSTR 1796 | mvn CARG2, #~LJ_TSTR
1769 | b ->fff_restv 1797 | b ->fff_restv
1770 | 1798 |
1771 |.ffunc string_rep // Only handle the 1-char case inline. 1799 |.macro ffstring_op, name
1772 | ffgccheck 1800 | .ffunc string_ .. name
1773 | ldrd CARG12, [BASE]
1774 | ldrd CARG34, [BASE, #8]
1775 | cmp NARGS8:RC, #16
1776 | bne ->fff_fallback // Exactly 2 arguments
1777 | checktp CARG2, LJ_TSTR
1778 | checktpeq CARG4, LJ_TISNUM
1779 | bne ->fff_fallback
1780 | subs CARG4, CARG3, #1
1781 | ldr CARG2, STR:CARG1->len
1782 | blt ->fff_emptystr // Count <= 0?
1783 | cmp CARG2, #1
1784 | blo ->fff_emptystr // Zero-length string?
1785 | bne ->fff_fallback // Fallback for > 1-char strings.
1786 | ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.sz)]
1787 | ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.buf)]
1788 | ldr CARG1, STR:CARG1[1]
1789 | cmp RB, CARG3
1790 | blo ->fff_fallback
1791 |1: // Fill buffer with char.
1792 | strb CARG1, [CARG2, CARG4]
1793 | subs CARG4, CARG4, #1
1794 | bge <1
1795 | b ->fff_newstr
1796 |
1797 |.ffunc string_reverse
1798 | ffgccheck 1801 | ffgccheck
1799 | ldrd CARG12, [BASE] 1802 | ldr CARG3, [BASE, #4]
1800 | cmp NARGS8:RC, #8 1803 | cmp NARGS8:RC, #8
1804 | ldr STR:CARG2, [BASE]
1801 | blo ->fff_fallback 1805 | blo ->fff_fallback
1802 | checkstr CARG2, ->fff_fallback 1806 | sub SBUF:CARG1, DISPATCH, #-DISPATCH_GL(tmpbuf)
1803 | ldr CARG3, STR:CARG1->len 1807 | checkstr CARG3, ->fff_fallback
1804 | ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.sz)] 1808 | ldr CARG4, SBUF:CARG1->b
1805 | ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.buf)] 1809 | str BASE, L->base
1806 | mov CARG4, CARG3 1810 | str PC, SAVE_PC
1807 | add CARG1, STR:CARG1, #sizeof(GCstr) 1811 | str L, SBUF:CARG1->L
1808 | cmp RB, CARG3 1812 | str CARG4, SBUF:CARG1->p
1809 | blo ->fff_fallback 1813 | bl extern lj_buf_putstr_ .. name
1810 |1: // Reverse string copy. 1814 | bl extern lj_buf_tostr
1811 | ldrb RB, [CARG1], #1 1815 | b ->fff_resstr
1812 | subs CARG4, CARG4, #1
1813 | blt ->fff_newstr
1814 | strb RB, [CARG2, CARG4]
1815 | b <1
1816 |
1817 |.macro ffstring_case, name, lo
1818 | .ffunc name
1819 | ffgccheck
1820 | ldrd CARG12, [BASE]
1821 | cmp NARGS8:RC, #8
1822 | blo ->fff_fallback
1823 | checkstr CARG2, ->fff_fallback
1824 | ldr CARG3, STR:CARG1->len
1825 | ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.sz)]
1826 | ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.buf)]
1827 | mov CARG4, #0
1828 | add CARG1, STR:CARG1, #sizeof(GCstr)
1829 | cmp RB, CARG3
1830 | blo ->fff_fallback
1831 |1: // ASCII case conversion.
1832 | ldrb RB, [CARG1, CARG4]
1833 | cmp CARG4, CARG3
1834 | bhs ->fff_newstr
1835 | sub RC, RB, #lo
1836 | cmp RC, #26
1837 | eorlo RB, RB, #0x20
1838 | strb RB, [CARG2, CARG4]
1839 | add CARG4, CARG4, #1
1840 | b <1
1841 |.endmacro 1816 |.endmacro
1842 | 1817 |
1843 |ffstring_case string_lower, 65 1818 |ffstring_op reverse
1844 |ffstring_case string_upper, 97 1819 |ffstring_op lower
1845 | 1820 |ffstring_op upper
1846 |//-- Table library ------------------------------------------------------
1847 |
1848 |.ffunc_1 table_getn
1849 | checktab CARG2, ->fff_fallback
1850 | .IOS mov RA, BASE
1851 | bl extern lj_tab_len // (GCtab *t)
1852 | // Returns uint32_t (but less than 2^31).
1853 | .IOS mov BASE, RA
1854 | mvn CARG2, #~LJ_TISNUM
1855 | b ->fff_restv
1856 | 1821 |
1857 |//-- Bit library -------------------------------------------------------- 1822 |//-- Bit library --------------------------------------------------------
1858 | 1823 |
@@ -2127,6 +2092,66 @@ static void build_subroutines(BuildCtx *ctx)
2127 | ldr INS, [PC, #-4] 2092 | ldr INS, [PC, #-4]
2128 | bx CRET1 2093 | bx CRET1
2129 | 2094 |
2095 |->cont_stitch: // Trace stitching.
2096 |.if JIT
2097 | // RA = resultptr, CARG4 = meta base
2098 | ldr RB, SAVE_MULTRES
2099 | ldr INS, [PC, #-4]
2100 | ldr TRACE:CARG3, [CARG4, #-24] // Save previous trace.
2101 | subs RB, RB, #8
2102 | decode_RA8 RC, INS // Call base.
2103 | beq >2
2104 |1: // Move results down.
2105 | ldrd CARG12, [RA]
2106 | add RA, RA, #8
2107 | subs RB, RB, #8
2108 | strd CARG12, [BASE, RC]
2109 | add RC, RC, #8
2110 | bne <1
2111 |2:
2112 | decode_RA8 RA, INS
2113 | decode_RB8 RB, INS
2114 | add RA, RA, RB
2115 |3:
2116 | cmp RA, RC
2117 | mvn CARG2, #~LJ_TNIL
2118 | bhi >9 // More results wanted?
2119 |
2120 | ldrh RA, TRACE:CARG3->traceno
2121 | ldrh RC, TRACE:CARG3->link
2122 | cmp RC, RA
2123 | beq ->cont_nop // Blacklisted.
2124 | cmp RC, #0
2125 | bne =>BC_JLOOP // Jump to stitched trace.
2126 |
2127 | // Stitch a new trace to the previous trace.
2128 | str RA, [DISPATCH, #DISPATCH_J(exitno)]
2129 | str L, [DISPATCH, #DISPATCH_J(L)]
2130 | str BASE, L->base
2131 | sub CARG1, DISPATCH, #-GG_DISP2J
2132 | mov CARG2, PC
2133 | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
2134 | ldr BASE, L->base
2135 | b ->cont_nop
2136 |
2137 |9: // Fill up results with nil.
2138 | strd CARG12, [BASE, RC]
2139 | add RC, RC, #8
2140 | b <3
2141 |.endif
2142 |
2143 |->vm_profhook: // Dispatch target for profiler hook.
2144#if LJ_HASPROFILE
2145 | mov CARG1, L
2146 | str BASE, L->base
2147 | mov CARG2, PC
2148 | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
2149 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2150 | ldr BASE, L->base
2151 | sub PC, PC, #4
2152 | b ->cont_nop
2153#endif
2154 |
2130 |//----------------------------------------------------------------------- 2155 |//-----------------------------------------------------------------------
2131 |//-- Trace exit handler ------------------------------------------------- 2156 |//-- Trace exit handler -------------------------------------------------
2132 |//----------------------------------------------------------------------- 2157 |//-----------------------------------------------------------------------
@@ -2151,14 +2176,14 @@ static void build_subroutines(BuildCtx *ctx)
2151 | add CARG1, CARG1, CARG2, asr #6 2176 | add CARG1, CARG1, CARG2, asr #6
2152 | ldr CARG2, [lr, #4] // Load exit stub group offset. 2177 | ldr CARG2, [lr, #4] // Load exit stub group offset.
2153 | sub CARG1, CARG1, lr 2178 | sub CARG1, CARG1, lr
2154 | ldr L, [DISPATCH, #DISPATCH_GL(jit_L)] 2179 | ldr L, [DISPATCH, #DISPATCH_GL(cur_L)]
2155 | add CARG1, CARG2, CARG1, lsr #2 // Compute exit number. 2180 | add CARG1, CARG2, CARG1, lsr #2 // Compute exit number.
2156 | ldr BASE, [DISPATCH, #DISPATCH_GL(jit_base)] 2181 | ldr BASE, [DISPATCH, #DISPATCH_GL(jit_base)]
2157 | str CARG1, [DISPATCH, #DISPATCH_J(exitno)] 2182 | str CARG1, [DISPATCH, #DISPATCH_J(exitno)]
2158 | mov CARG4, #0 2183 | mov CARG4, #0
2159 | str L, [DISPATCH, #DISPATCH_J(L)]
2160 | str BASE, L->base 2184 | str BASE, L->base
2161 | str CARG4, [DISPATCH, #DISPATCH_GL(jit_L)] 2185 | str L, [DISPATCH, #DISPATCH_J(L)]
2186 | str CARG4, [DISPATCH, #DISPATCH_GL(jit_base)]
2162 | sub CARG1, DISPATCH, #-GG_DISP2J 2187 | sub CARG1, DISPATCH, #-GG_DISP2J
2163 | mov CARG2, sp 2188 | mov CARG2, sp
2164 | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) 2189 | bl extern lj_trace_exit // (jit_State *J, ExitState *ex)
@@ -2177,13 +2202,14 @@ static void build_subroutines(BuildCtx *ctx)
2177 | ldr L, SAVE_L 2202 | ldr L, SAVE_L
2178 |1: 2203 |1:
2179 | cmp CARG1, #0 2204 | cmp CARG1, #0
2180 | blt >3 // Check for error from exit. 2205 | blt >9 // Check for error from exit.
2181 | lsl RC, CARG1, #3 2206 | lsl RC, CARG1, #3
2182 | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] 2207 | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
2183 | str RC, SAVE_MULTRES 2208 | str RC, SAVE_MULTRES
2184 | mov CARG3, #0 2209 | mov CARG3, #0
2210 | str BASE, L->base
2185 | ldr CARG2, LFUNC:CARG2->field_pc 2211 | ldr CARG2, LFUNC:CARG2->field_pc
2186 | str CARG3, [DISPATCH, #DISPATCH_GL(jit_L)] 2212 | str CARG3, [DISPATCH, #DISPATCH_GL(jit_base)]
2187 | mv_vmstate CARG4, INTERP 2213 | mv_vmstate CARG4, INTERP
2188 | ldr KBASE, [CARG2, #PC2PROTO(k)] 2214 | ldr KBASE, [CARG2, #PC2PROTO(k)]
2189 | // Modified copy of ins_next which handles function header dispatch, too. 2215 | // Modified copy of ins_next which handles function header dispatch, too.
@@ -2192,15 +2218,32 @@ static void build_subroutines(BuildCtx *ctx)
2192 | ldr INS, [PC], #4 2218 | ldr INS, [PC], #4
2193 | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. 2219 | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8.
2194 | st_vmstate CARG4 2220 | st_vmstate CARG4
2221 | cmp OP, #BC_FUNCC+2 // Fast function?
2222 | bhs >4
2223 |2:
2195 | cmp OP, #BC_FUNCF // Function header? 2224 | cmp OP, #BC_FUNCF // Function header?
2196 | ldr OP, [DISPATCH, OP, lsl #2] 2225 | ldr OP, [DISPATCH, OP, lsl #2]
2197 | decode_RA8 RA, INS 2226 | decode_RA8 RA, INS
2198 | lsrlo RC, INS, #16 // No: Decode operands A*8 and D. 2227 | lsrlo RC, INS, #16 // No: Decode operands A*8 and D.
2199 | subhs RC, RC, #8 2228 | subhs RC, RC, #8
2200 | addhs RA, RA, BASE // Yes: RA = BASE+framesize*8, RC = nargs*8 2229 | addhs RA, RA, BASE // Yes: RA = BASE+framesize*8, RC = nargs*8
2230 | ldrhs CARG3, [BASE, FRAME_FUNC]
2201 | bx OP 2231 | bx OP
2202 | 2232 |
2203 |3: // Rethrow error from the right C frame. 2233 |4: // Check frame below fast function.
2234 | ldr CARG1, [BASE, FRAME_PC]
2235 | ands CARG2, CARG1, #FRAME_TYPE
2236 | bne <2 // Trace stitching continuation?
2237 | // Otherwise set KBASE for Lua function below fast function.
2238 | ldr CARG3, [CARG1, #-4]
2239 | decode_RA8 CARG1, CARG3
2240 | sub CARG2, BASE, CARG1
2241 | ldr LFUNC:CARG3, [CARG2, #-16]
2242 | ldr CARG3, LFUNC:CARG3->field_pc
2243 | ldr KBASE, [CARG3, #PC2PROTO(k)]
2244 | b <2
2245 |
2246 |9: // Rethrow error from the right C frame.
2204 | rsb CARG2, CARG1, #0 2247 | rsb CARG2, CARG1, #0
2205 | mov CARG1, L 2248 | mov CARG1, L
2206 | bl extern lj_err_throw // (lua_State *L, int errcode) 2249 | bl extern lj_err_throw // (lua_State *L, int errcode)
@@ -2833,6 +2876,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2833 | ins_next 2876 | ins_next
2834 break; 2877 break;
2835 2878
2879 case BC_ISTYPE:
2880 | // RA = src*8, RC = -type
2881 | ldrd CARG12, [BASE, RA]
2882 | ins_next1
2883 | cmn CARG2, RC
2884 | ins_next2
2885 | bne ->vmeta_istype
2886 | ins_next3
2887 break;
2888 case BC_ISNUM:
2889 | // RA = src*8, RC = -(TISNUM-1)
2890 | ldrd CARG12, [BASE, RA]
2891 | ins_next1
2892 | checktp CARG2, LJ_TISNUM
2893 | ins_next2
2894 | bhs ->vmeta_istype
2895 | ins_next3
2896 break;
2897
2836 /* -- Unary ops --------------------------------------------------------- */ 2898 /* -- Unary ops --------------------------------------------------------- */
2837 2899
2838 case BC_MOV: 2900 case BC_MOV:
@@ -3503,6 +3565,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3503 | bne <1 // 'no __index' flag set: done. 3565 | bne <1 // 'no __index' flag set: done.
3504 | b ->vmeta_tgetb 3566 | b ->vmeta_tgetb
3505 break; 3567 break;
3568 case BC_TGETR:
3569 | decode_RB8 RB, INS
3570 | decode_RC8 RC, INS
3571 | // RA = dst*8, RB = table*8, RC = key*8
3572 | ldr TAB:CARG1, [BASE, RB]
3573 | ldr CARG2, [BASE, RC]
3574 | ldr CARG4, TAB:CARG1->array
3575 | ldr CARG3, TAB:CARG1->asize
3576 | add CARG4, CARG4, CARG2, lsl #3
3577 | cmp CARG2, CARG3 // In array part?
3578 | bhs ->vmeta_tgetr
3579 | ldrd CARG12, [CARG4]
3580 |->BC_TGETR_Z:
3581 | ins_next1
3582 | ins_next2
3583 | strd CARG12, [BASE, RA]
3584 | ins_next3
3585 break;
3506 3586
3507 case BC_TSETV: 3587 case BC_TSETV:
3508 | decode_RB8 RB, INS 3588 | decode_RB8 RB, INS
@@ -3673,6 +3753,32 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3673 | barrierback TAB:CARG1, INS, CARG3 3753 | barrierback TAB:CARG1, INS, CARG3
3674 | b <2 3754 | b <2
3675 break; 3755 break;
3756 case BC_TSETR:
3757 | decode_RB8 RB, INS
3758 | decode_RC8 RC, INS
3759 | // RA = src*8, RB = table*8, RC = key*8
3760 | ldr TAB:CARG2, [BASE, RB]
3761 | ldr CARG3, [BASE, RC]
3762 | ldrb INS, TAB:CARG2->marked
3763 | ldr CARG1, TAB:CARG2->array
3764 | ldr CARG4, TAB:CARG2->asize
3765 | tst INS, #LJ_GC_BLACK // isblack(table)
3766 | add CARG1, CARG1, CARG3, lsl #3
3767 | bne >7
3768 |2:
3769 | cmp CARG3, CARG4 // In array part?
3770 | bhs ->vmeta_tsetr
3771 |->BC_TSETR_Z:
3772 | ldrd CARG34, [BASE, RA]
3773 | ins_next1
3774 | ins_next2
3775 | strd CARG34, [CARG1]
3776 | ins_next3
3777 |
3778 |7: // Possible table write barrier for the value. Skip valiswhite check.
3779 | barrierback TAB:CARG2, INS, RB
3780 | b <2
3781 break;
3676 3782
3677 case BC_TSETM: 3783 case BC_TSETM:
3678 | // RA = base*8 (table at base-1), RC = num_const (start index) 3784 | // RA = base*8 (table at base-1), RC = num_const (start index)
@@ -4270,7 +4376,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4270 | st_vmstate CARG2 4376 | st_vmstate CARG2
4271 | ldr RA, TRACE:RC->mcode 4377 | ldr RA, TRACE:RC->mcode
4272 | str BASE, [DISPATCH, #DISPATCH_GL(jit_base)] 4378 | str BASE, [DISPATCH, #DISPATCH_GL(jit_base)]
4273 | str L, [DISPATCH, #DISPATCH_GL(jit_L)] 4379 | str L, [DISPATCH, #DISPATCH_GL(tmpbuf.L)]
4274 | bx RA 4380 | bx RA
4275 |.endif 4381 |.endif
4276 break; 4382 break;
@@ -4388,6 +4494,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4388 | ldr BASE, L->base 4494 | ldr BASE, L->base
4389 | mv_vmstate CARG3, INTERP 4495 | mv_vmstate CARG3, INTERP
4390 | ldr CRET2, L->top 4496 | ldr CRET2, L->top
4497 | str L, [DISPATCH, #DISPATCH_GL(cur_L)]
4391 | lsl RC, CRET1, #3 4498 | lsl RC, CRET1, #3
4392 | st_vmstate CARG3 4499 | st_vmstate CARG3
4393 | ldr PC, [BASE, FRAME_PC] 4500 | ldr PC, [BASE, FRAME_PC]
diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
new file mode 100644
index 00000000..edceb549
--- /dev/null
+++ b/src/vm_arm64.dasc
@@ -0,0 +1,3988 @@
1|// Low-level VM code for ARM64 CPUs.
2|// Bytecode interpreter, fast functions and helper functions.
3|// Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h
4|
5|.arch arm64
6|.section code_op, code_sub
7|
8|.actionlist build_actionlist
9|.globals GLOB_
10|.globalnames globnames
11|.externnames extnames
12|
13|// Note: The ragged indentation of the instructions is intentional.
14|// The starting columns indicate data dependencies.
15|
16|//-----------------------------------------------------------------------
17|
18|// ARM64 registers and the AAPCS64 ABI 1.0 at a glance:
19|//
20|// x0-x17 temp, x19-x28 callee-saved, x29 fp, x30 lr
21|// x18 is reserved on most platforms. Don't use it, save it or restore it.
22|// x31 doesn't exist. Register number 31 either means xzr/wzr (zero) or sp,
23|// depending on the instruction.
24|// v0-v7 temp, v8-v15 callee-saved (only d8-d15 preserved), v16-v31 temp
25|//
26|// x0-x7/v0-v7 hold parameters and results.
27|
28|// Fixed register assignments for the interpreter.
29|
30|// The following must be C callee-save.
31|.define BASE, x19 // Base of current Lua stack frame.
32|.define KBASE, x20 // Constants of current Lua function.
33|.define PC, x21 // Next PC.
34|.define GLREG, x22 // Global state.
35|.define LREG, x23 // Register holding lua_State (also in SAVE_L).
36|.define TISNUM, x24 // Constant LJ_TISNUM << 47.
37|.define TISNUMhi, x25 // Constant LJ_TISNUM << 15.
38|.define TISNIL, x26 // Constant -1LL.
39|.define fp, x29 // Yes, we have to maintain a frame pointer.
40|
41|.define ST_INTERP, w26 // Constant -1.
42|
43|// The following temporaries are not saved across C calls, except for RA/RC.
44|.define RA, x27
45|.define RC, x28
46|.define RB, x17
47|.define RAw, w27
48|.define RCw, w28
49|.define RBw, w17
50|.define INS, x16
51|.define INSw, w16
52|.define ITYPE, x15
53|.define TMP0, x8
54|.define TMP1, x9
55|.define TMP2, x10
56|.define TMP3, x11
57|.define TMP0w, w8
58|.define TMP1w, w9
59|.define TMP2w, w10
60|.define TMP3w, w11
61|
62|// Calling conventions. Also used as temporaries.
63|.define CARG1, x0
64|.define CARG2, x1
65|.define CARG3, x2
66|.define CARG4, x3
67|.define CARG5, x4
68|.define CARG1w, w0
69|.define CARG2w, w1
70|.define CARG3w, w2
71|.define CARG4w, w3
72|.define CARG5w, w4
73|
74|.define FARG1, d0
75|.define FARG2, d1
76|
77|.define CRET1, x0
78|.define CRET1w, w0
79|
80|// Stack layout while in interpreter. Must match with lj_frame.h.
81|
82|.define CFRAME_SPACE, 208
83|//----- 16 byte aligned, <-- sp entering interpreter
84|// Unused [sp, #204] // 32 bit values
85|.define SAVE_NRES, [sp, #200]
86|.define SAVE_ERRF, [sp, #196]
87|.define SAVE_MULTRES, [sp, #192]
88|.define TMPD, [sp, #184] // 64 bit values
89|.define SAVE_L, [sp, #176]
90|.define SAVE_PC, [sp, #168]
91|.define SAVE_CFRAME, [sp, #160]
92|.define SAVE_FPR_, 96 // 96+8*8: 64 bit FPR saves
93|.define SAVE_GPR_, 16 // 16+10*8: 64 bit GPR saves
94|.define SAVE_LR, [sp, #8]
95|.define SAVE_FP, [sp]
96|//----- 16 byte aligned, <-- sp while in interpreter.
97|
98|.define TMPDofs, #184
99|
100|.macro save_, gpr1, gpr2, fpr1, fpr2
101| stp d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(fpr1-8)*8]
102| stp x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(gpr1-19)*8]
103|.endmacro
104|.macro rest_, gpr1, gpr2, fpr1, fpr2
105| ldp d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(fpr1-8)*8]
106| ldp x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(gpr1-19)*8]
107|.endmacro
108|
109|.macro saveregs
110| stp fp, lr, [sp, #-CFRAME_SPACE]!
111| add fp, sp, #0
112| stp x19, x20, [sp, # SAVE_GPR_]
113| save_ 21, 22, 8, 9
114| save_ 23, 24, 10, 11
115| save_ 25, 26, 12, 13
116| save_ 27, 28, 14, 15
117|.endmacro
118|.macro restoreregs
119| ldp x19, x20, [sp, # SAVE_GPR_]
120| rest_ 21, 22, 8, 9
121| rest_ 23, 24, 10, 11
122| rest_ 25, 26, 12, 13
123| rest_ 27, 28, 14, 15
124| ldp fp, lr, [sp], # CFRAME_SPACE
125|.endmacro
126|
127|// Type definitions. Some of these are only used for documentation.
128|.type L, lua_State, LREG
129|.type GL, global_State, GLREG
130|.type TVALUE, TValue
131|.type GCOBJ, GCobj
132|.type STR, GCstr
133|.type TAB, GCtab
134|.type LFUNC, GCfuncL
135|.type CFUNC, GCfuncC
136|.type PROTO, GCproto
137|.type UPVAL, GCupval
138|.type NODE, Node
139|.type NARGS8, int
140|.type TRACE, GCtrace
141|.type SBUF, SBuf
142|
143|//-----------------------------------------------------------------------
144|
145|// Trap for not-yet-implemented parts.
146|.macro NYI; brk; .endmacro
147|
148|//-----------------------------------------------------------------------
149|
150|// Access to frame relative to BASE.
151|.define FRAME_FUNC, #-16
152|.define FRAME_PC, #-8
153|
154|// Endian-specific defines.
155|.if ENDIAN_LE
156|.define LO, 0
157|.define OFS_RD, 2
158|.define OFS_RB, 3
159|.define OFS_RA, 1
160|.define OFS_OP, 0
161|.else
162|.define LO, 4
163|.define OFS_RD, 0
164|.define OFS_RB, 0
165|.define OFS_RA, 2
166|.define OFS_OP, 3
167|.endif
168|
169|.macro decode_RA, dst, ins; ubfx dst, ins, #8, #8; .endmacro
170|.macro decode_RB, dst, ins; ubfx dst, ins, #24, #8; .endmacro
171|.macro decode_RC, dst, ins; ubfx dst, ins, #16, #8; .endmacro
172|.macro decode_RD, dst, ins; ubfx dst, ins, #16, #16; .endmacro
173|.macro decode_RC8RD, dst, src; ubfiz dst, src, #3, #8; .endmacro
174|
175|// Instruction decode+dispatch.
176|.macro ins_NEXT
177| ldr INSw, [PC], #4
178| add TMP1, GL, INS, uxtb #3
179| decode_RA RA, INS
180| ldr TMP0, [TMP1, #GG_G2DISP]
181| decode_RD RC, INS
182| br TMP0
183|.endmacro
184|
185|// Instruction footer.
186|.if 1
187| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
188| .define ins_next, ins_NEXT
189| .define ins_next_, ins_NEXT
190|.else
191| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
192| // Affects only certain kinds of benchmarks (and only with -j off).
193| .macro ins_next
194| b ->ins_next
195| .endmacro
196| .macro ins_next_
197| ->ins_next:
198| ins_NEXT
199| .endmacro
200|.endif
201|
202|// Call decode and dispatch.
203|.macro ins_callt
204| // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
205| ldr PC, LFUNC:CARG3->pc
206| ldr INSw, [PC], #4
207| add TMP1, GL, INS, uxtb #3
208| decode_RA RA, INS
209| ldr TMP0, [TMP1, #GG_G2DISP]
210| add RA, BASE, RA, lsl #3
211| br TMP0
212|.endmacro
213|
214|.macro ins_call
215| // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, PC = caller PC
216| str PC, [BASE, FRAME_PC]
217| ins_callt
218|.endmacro
219|
220|//-----------------------------------------------------------------------
221|
222|// Macros to check the TValue type and extract the GCobj. Branch on failure.
223|.macro checktp, reg, tp, target
224| asr ITYPE, reg, #47
225| cmn ITYPE, #-tp
226| and reg, reg, #LJ_GCVMASK
227| bne target
228|.endmacro
229|.macro checktp, dst, reg, tp, target
230| asr ITYPE, reg, #47
231| cmn ITYPE, #-tp
232| and dst, reg, #LJ_GCVMASK
233| bne target
234|.endmacro
235|.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro
236|.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro
237|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro
238|.macro checkint, reg, target
239| cmp TISNUMhi, reg, lsr #32
240| bne target
241|.endmacro
242|.macro checknum, reg, target
243| cmp TISNUMhi, reg, lsr #32
244| bls target
245|.endmacro
246|.macro checknumber, reg, target
247| cmp TISNUMhi, reg, lsr #32
248| blo target
249|.endmacro
250|
251|.macro mov_false, reg; movn reg, #0x8000, lsl #32; .endmacro
252|.macro mov_true, reg; movn reg, #0x0001, lsl #48; .endmacro
253|
254#define GL_J(field) (GG_G2J + (int)offsetof(jit_State, field))
255|
256#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
257|
258|.macro hotcheck, delta
259| lsr CARG1, PC, #1
260| and CARG1, CARG1, #126
261| add CARG1, CARG1, #GG_G2DISP+GG_DISP2HOT
262| ldrh CARG2w, [GL, CARG1]
263| subs CARG2, CARG2, #delta
264| strh CARG2w, [GL, CARG1]
265|.endmacro
266|
267|.macro hotloop
268| hotcheck HOTCOUNT_LOOP
269| blo ->vm_hotloop
270|.endmacro
271|
272|.macro hotcall
273| hotcheck HOTCOUNT_CALL
274| blo ->vm_hotcall
275|.endmacro
276|
277|// Set current VM state.
278|.macro mv_vmstate, reg, st; movn reg, #LJ_VMST_..st; .endmacro
279|.macro st_vmstate, reg; str reg, GL->vmstate; .endmacro
280|
281|// Move table write barrier back. Overwrites mark and tmp.
282|.macro barrierback, tab, mark, tmp
283| ldr tmp, GL->gc.grayagain
284| and mark, mark, #~LJ_GC_BLACK // black2gray(tab)
285| str tab, GL->gc.grayagain
286| strb mark, tab->marked
287| str tmp, tab->gclist
288|.endmacro
289|
290|//-----------------------------------------------------------------------
291
292#if !LJ_DUALNUM
293#error "Only dual-number mode supported for ARM64 target"
294#endif
295
296/* Generate subroutines used by opcodes and other parts of the VM. */
297/* The .code_sub section should be last to help static branch prediction. */
298static void build_subroutines(BuildCtx *ctx)
299{
300 |.code_sub
301 |
302 |//-----------------------------------------------------------------------
303 |//-- Return handling ----------------------------------------------------
304 |//-----------------------------------------------------------------------
305 |
306 |->vm_returnp:
307 | // See vm_return. Also: RB = previous base.
308 | tbz PC, #2, ->cont_dispatch // (PC & FRAME_P) == 0?
309 |
310 | // Return from pcall or xpcall fast func.
311 | ldr PC, [RB, FRAME_PC] // Fetch PC of previous frame.
312 | mov_true TMP0
313 | mov BASE, RB
314 | // Prepending may overwrite the pcall frame, so do it at the end.
315 | str TMP0, [RA, #-8]! // Prepend true to results.
316 |
317 |->vm_returnc:
318 | adds RC, RC, #8 // RC = (nresults+1)*8.
319 | mov CRET1, #LUA_YIELD
320 | beq ->vm_unwind_c_eh
321 | str RCw, SAVE_MULTRES
322 | ands CARG1, PC, #FRAME_TYPE
323 | beq ->BC_RET_Z // Handle regular return to Lua.
324 |
325 |->vm_return:
326 | // BASE = base, RA = resultptr, RC/MULTRES = (nresults+1)*8, PC = return
327 | // CARG1 = PC & FRAME_TYPE
328 | and RB, PC, #~FRAME_TYPEP
329 | cmp CARG1, #FRAME_C
330 | sub RB, BASE, RB // RB = previous base.
331 | bne ->vm_returnp
332 |
333 | str RB, L->base
334 | ldrsw CARG2, SAVE_NRES // CARG2 = nresults+1.
335 | mv_vmstate TMP0w, C
336 | sub BASE, BASE, #16
337 | subs TMP2, RC, #8
338 | st_vmstate TMP0w
339 | beq >2
340 |1:
341 | subs TMP2, TMP2, #8
342 | ldr TMP0, [RA], #8
343 | str TMP0, [BASE], #8
344 | bne <1
345 |2:
346 | cmp RC, CARG2, lsl #3 // More/less results wanted?
347 | bne >6
348 |3:
349 | str BASE, L->top // Store new top.
350 |
351 |->vm_leave_cp:
352 | ldr RC, SAVE_CFRAME // Restore previous C frame.
353 | mov CRET1, #0 // Ok return status for vm_pcall.
354 | str RC, L->cframe
355 |
356 |->vm_leave_unw:
357 | restoreregs
358 | ret
359 |
360 |6:
361 | bgt >7 // Less results wanted?
362 | // More results wanted. Check stack size and fill up results with nil.
363 | ldr CARG3, L->maxstack
364 | cmp BASE, CARG3
365 | bhs >8
366 | str TISNIL, [BASE], #8
367 | add RC, RC, #8
368 | b <2
369 |
370 |7: // Less results wanted.
371 | cbz CARG2, <3 // LUA_MULTRET+1 case?
372 | sub CARG1, RC, CARG2, lsl #3
373 | sub BASE, BASE, CARG1 // Shrink top.
374 | b <3
375 |
376 |8: // Corner case: need to grow stack for filling up results.
377 | // This can happen if:
378 | // - A C function grows the stack (a lot).
379 | // - The GC shrinks the stack in between.
380 | // - A return back from a lua_call() with (high) nresults adjustment.
381 | str BASE, L->top // Save current top held in BASE (yes).
382 | mov CARG1, L
383 | bl extern lj_state_growstack // (lua_State *L, int n)
384 | ldr BASE, L->top // Need the (realloced) L->top in BASE.
385 | ldrsw CARG2, SAVE_NRES
386 | b <2
387 |
388 |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
389 | // (void *cframe, int errcode)
390 | mov sp, CARG1
391 | mov CRET1, CARG2
392 |->vm_unwind_c_eh: // Landing pad for external unwinder.
393 | ldr L, SAVE_L
394 | mv_vmstate TMP0w, C
395 | ldr GL, L->glref
396 | st_vmstate TMP0w
397 | b ->vm_leave_unw
398 |
399 |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
400 | // (void *cframe)
401 | and sp, CARG1, #CFRAME_RAWMASK
402 |->vm_unwind_ff_eh: // Landing pad for external unwinder.
403 | ldr L, SAVE_L
404 | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
405 | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
406 | movn TISNIL, #0
407 | mov RC, #16 // 2 results: false + error message.
408 | ldr BASE, L->base
409 | ldr GL, L->glref // Setup pointer to global state.
410 | mov_false TMP0
411 | sub RA, BASE, #8 // Results start at BASE-8.
412 | ldr PC, [BASE, FRAME_PC] // Fetch PC of previous frame.
413 | str TMP0, [BASE, #-8] // Prepend false to error message.
414 | st_vmstate ST_INTERP
415 | b ->vm_returnc
416 |
417 |//-----------------------------------------------------------------------
418 |//-- Grow stack for calls -----------------------------------------------
419 |//-----------------------------------------------------------------------
420 |
421 |->vm_growstack_c: // Grow stack for C function.
422 | // CARG1 = L
423 | mov CARG2, #LUA_MINSTACK
424 | b >2
425 |
426 |->vm_growstack_l: // Grow stack for Lua function.
427 | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC
428 | add RC, BASE, RC
429 | sub RA, RA, BASE
430 | mov CARG1, L
431 | stp BASE, RC, L->base
432 | add PC, PC, #4 // Must point after first instruction.
433 | lsr CARG2, RA, #3
434 |2:
435 | // L->base = new base, L->top = top
436 | str PC, SAVE_PC
437 | bl extern lj_state_growstack // (lua_State *L, int n)
438 | ldp BASE, RC, L->base
439 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
440 | sub NARGS8:RC, RC, BASE
441 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
442 | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
443 | ins_callt // Just retry the call.
444 |
445 |//-----------------------------------------------------------------------
446 |//-- Entry points into the assembler VM ---------------------------------
447 |//-----------------------------------------------------------------------
448 |
449 |->vm_resume: // Setup C frame and resume thread.
450 | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
451 | saveregs
452 | mov L, CARG1
453 | ldr GL, L->glref // Setup pointer to global state.
454 | mov BASE, CARG2
455 | str L, SAVE_L
456 | mov PC, #FRAME_CP
457 | str wzr, SAVE_NRES
458 | add TMP0, sp, #CFRAME_RESUME
459 | ldrb TMP1w, L->status
460 | str wzr, SAVE_ERRF
461 | str L, SAVE_PC // Any value outside of bytecode is ok.
462 | str xzr, SAVE_CFRAME
463 | str TMP0, L->cframe
464 | cbz TMP1w, >3
465 |
466 | // Resume after yield (like a return).
467 | str L, GL->cur_L
468 | mov RA, BASE
469 | ldp BASE, CARG1, L->base
470 | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
471 | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
472 | ldr PC, [BASE, FRAME_PC]
473 | strb wzr, L->status
474 | movn TISNIL, #0
475 | sub RC, CARG1, BASE
476 | ands CARG1, PC, #FRAME_TYPE
477 | add RC, RC, #8
478 | st_vmstate ST_INTERP
479 | str RCw, SAVE_MULTRES
480 | beq ->BC_RET_Z
481 | b ->vm_return
482 |
483 |->vm_pcall: // Setup protected C frame and enter VM.
484 | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
485 | saveregs
486 | mov PC, #FRAME_CP
487 | str CARG4w, SAVE_ERRF
488 | b >1
489 |
490 |->vm_call: // Setup C frame and enter VM.
491 | // (lua_State *L, TValue *base, int nres1)
492 | saveregs
493 | mov PC, #FRAME_C
494 |
495 |1: // Entry point for vm_pcall above (PC = ftype).
496 | ldr RC, L:CARG1->cframe
497 | str CARG3w, SAVE_NRES
498 | mov L, CARG1
499 | str CARG1, SAVE_L
500 | ldr GL, L->glref // Setup pointer to global state.
501 | mov BASE, CARG2
502 | str CARG1, SAVE_PC // Any value outside of bytecode is ok.
503 | str RC, SAVE_CFRAME
504 | str fp, L->cframe // Add our C frame to cframe chain.
505 |
506 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
507 | str L, GL->cur_L
508 | ldp RB, CARG1, L->base // RB = old base (for vmeta_call).
509 | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
510 | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
511 | add PC, PC, BASE
512 | movn TISNIL, #0
513 | sub PC, PC, RB // PC = frame delta + frame type
514 | sub NARGS8:RC, CARG1, BASE
515 | st_vmstate ST_INTERP
516 |
517 |->vm_call_dispatch:
518 | // RB = old base, BASE = new base, RC = nargs*8, PC = caller PC
519 | ldr CARG3, [BASE, FRAME_FUNC]
520 | checkfunc CARG3, ->vmeta_call
521 |
522 |->vm_call_dispatch_f:
523 | ins_call
524 | // BASE = new base, CARG3 = func, RC = nargs*8, PC = caller PC
525 |
526 |->vm_cpcall: // Setup protected C frame, call C.
527 | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
528 | saveregs
529 | mov L, CARG1
530 | ldr RA, L:CARG1->stack
531 | str CARG1, SAVE_L
532 | ldr GL, L->glref // Setup pointer to global state.
533 | ldr RB, L->top
534 | str CARG1, SAVE_PC // Any value outside of bytecode is ok.
535 | ldr RC, L->cframe
536 | sub RA, RA, RB // Compute -savestack(L, L->top).
537 | str RAw, SAVE_NRES // Neg. delta means cframe w/o frame.
538 | str wzr, SAVE_ERRF // No error function.
539 | str RC, SAVE_CFRAME
540 | str fp, L->cframe // Add our C frame to cframe chain.
541 | str L, GL->cur_L
542 | blr CARG4 // (lua_State *L, lua_CFunction func, void *ud)
543 | mov BASE, CRET1
544 | mov PC, #FRAME_CP
545 | cbnz BASE, <3 // Else continue with the call.
546 | b ->vm_leave_cp // No base? Just remove C frame.
547 |
548 |//-----------------------------------------------------------------------
549 |//-- Metamethod handling ------------------------------------------------
550 |//-----------------------------------------------------------------------
551 |
552 |//-- Continuation dispatch ----------------------------------------------
553 |
554 |->cont_dispatch:
555 | // BASE = meta base, RA = resultptr, RC = (nresults+1)*8
556 | ldr LFUNC:CARG3, [RB, FRAME_FUNC]
557 | ldr CARG1, [BASE, #-32] // Get continuation.
558 | mov CARG4, BASE
559 | mov BASE, RB // Restore caller BASE.
560 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
561 |.if FFI
562 | cmp CARG1, #1
563 |.endif
564 | ldr PC, [CARG4, #-24] // Restore PC from [cont|PC].
565 | ldr CARG3, LFUNC:CARG3->pc
566 | add TMP0, RA, RC
567 | str TISNIL, [TMP0, #-8] // Ensure one valid arg.
568 |.if FFI
569 | bls >1
570 |.endif
571 | ldr KBASE, [CARG3, #PC2PROTO(k)]
572 | // BASE = base, RA = resultptr, CARG4 = meta base
573 | br CARG1
574 |
575 |.if FFI
576 |1:
577 | beq ->cont_ffi_callback // cont = 1: return from FFI callback.
578 | // cont = 0: tailcall from C function.
579 | sub CARG4, CARG4, #32
580 | sub RC, CARG4, BASE
581 | b ->vm_call_tail
582 |.endif
583 |
584 |->cont_cat: // RA = resultptr, CARG4 = meta base
585 | ldr INSw, [PC, #-4]
586 | sub CARG2, CARG4, #32
587 | ldr TMP0, [RA]
588 | str BASE, L->base
589 | decode_RB RB, INS
590 | decode_RA RA, INS
591 | add TMP1, BASE, RB, lsl #3
592 | subs TMP1, CARG2, TMP1
593 | beq >1
594 | str TMP0, [CARG2]
595 | lsr CARG3, TMP1, #3
596 | b ->BC_CAT_Z
597 |
598 |1:
599 | str TMP0, [BASE, RA, lsl #3]
600 | b ->cont_nop
601 |
602 |//-- Table indexing metamethods -----------------------------------------
603 |
604 |->vmeta_tgets1:
605 | movn CARG4, #~LJ_TSTR
606 | add CARG2, BASE, RB, lsl #3
607 | add CARG4, STR:RC, CARG4, lsl #47
608 | b >2
609 |
610 |->vmeta_tgets:
611 | movk CARG2, #(LJ_TTAB>>1)&0xffff, lsl #48
612 | str CARG2, GL->tmptv
613 | add CARG2, GL, #offsetof(global_State, tmptv)
614 |2:
615 | add CARG3, sp, TMPDofs
616 | str CARG4, TMPD
617 | b >1
618 |
619 |->vmeta_tgetb: // RB = table, RC = index
620 | add RC, RC, TISNUM
621 | add CARG2, BASE, RB, lsl #3
622 | add CARG3, sp, TMPDofs
623 | str RC, TMPD
624 | b >1
625 |
626 |->vmeta_tgetv: // RB = table, RC = key
627 | add CARG2, BASE, RB, lsl #3
628 | add CARG3, BASE, RC, lsl #3
629 |1:
630 | str BASE, L->base
631 | mov CARG1, L
632 | str PC, SAVE_PC
633 | bl extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
634 | // Returns TValue * (finished) or NULL (metamethod).
635 | cbz CRET1, >3
636 | ldr TMP0, [CRET1]
637 | str TMP0, [BASE, RA, lsl #3]
638 | ins_next
639 |
640 |3: // Call __index metamethod.
641 | // BASE = base, L->top = new base, stack = cont/func/t/k
642 | sub TMP1, BASE, #FRAME_CONT
643 | ldr BASE, L->top
644 | mov NARGS8:RC, #16 // 2 args for func(t, k).
645 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
646 | str PC, [BASE, #-24] // [cont|PC]
647 | sub PC, BASE, TMP1
648 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
649 | b ->vm_call_dispatch_f
650 |
651 |->vmeta_tgetr:
652 | sxtw CARG2, TMP1w
653 | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
654 | // Returns cTValue * or NULL.
655 | mov TMP0, TISNIL
656 | cbz CRET1, ->BC_TGETR_Z
657 | ldr TMP0, [CRET1]
658 | b ->BC_TGETR_Z
659 |
660 |//-----------------------------------------------------------------------
661 |
662 |->vmeta_tsets1:
663 | movn CARG4, #~LJ_TSTR
664 | add CARG2, BASE, RB, lsl #3
665 | add CARG4, STR:RC, CARG4, lsl #47
666 | b >2
667 |
668 |->vmeta_tsets:
669 | movk CARG2, #(LJ_TTAB>>1)&0xffff, lsl #48
670 | str CARG2, GL->tmptv
671 | add CARG2, GL, #offsetof(global_State, tmptv)
672 |2:
673 | add CARG3, sp, TMPDofs
674 | str CARG4, TMPD
675 | b >1
676 |
677 |->vmeta_tsetb: // RB = table, RC = index
678 | add RC, RC, TISNUM
679 | add CARG2, BASE, RB, lsl #3
680 | add CARG3, sp, TMPDofs
681 | str RC, TMPD
682 | b >1
683 |
684 |->vmeta_tsetv:
685 | add CARG2, BASE, RB, lsl #3
686 | add CARG3, BASE, RC, lsl #3
687 |1:
688 | str BASE, L->base
689 | mov CARG1, L
690 | str PC, SAVE_PC
691 | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
692 | // Returns TValue * (finished) or NULL (metamethod).
693 | ldr TMP0, [BASE, RA, lsl #3]
694 | cbz CRET1, >3
695 | // NOBARRIER: lj_meta_tset ensures the table is not black.
696 | str TMP0, [CRET1]
697 | ins_next
698 |
699 |3: // Call __newindex metamethod.
700 | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
701 | sub TMP1, BASE, #FRAME_CONT
702 | ldr BASE, L->top
703 | mov NARGS8:RC, #24 // 3 args for func(t, k, v).
704 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
705 | str TMP0, [BASE, #16] // Copy value to third argument.
706 | str PC, [BASE, #-24] // [cont|PC]
707 | sub PC, BASE, TMP1
708 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
709 | b ->vm_call_dispatch_f
710 |
711 |->vmeta_tsetr:
712 | sxtw CARG3, TMP1w
713 | str BASE, L->base
714 | str PC, SAVE_PC
715 | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
716 | // Returns TValue *.
717 | b ->BC_TSETR_Z
718 |
719 |//-- Comparison metamethods ---------------------------------------------
720 |
721 |->vmeta_comp:
722 | add CARG2, BASE, RA, lsl #3
723 | sub PC, PC, #4
724 | add CARG3, BASE, RC, lsl #3
725 | str BASE, L->base
726 | mov CARG1, L
727 | str PC, SAVE_PC
728 | uxtb CARG4w, INSw
729 | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
730 | // Returns 0/1 or TValue * (metamethod).
731 |3:
732 | cmp CRET1, #1
733 | bhi ->vmeta_binop
734 |4:
735 | ldrh RBw, [PC, # OFS_RD]
736 | add PC, PC, #4
737 | add RB, PC, RB, lsl #2
738 | sub RB, RB, #0x20000
739 | csel PC, PC, RB, lo
740 |->cont_nop:
741 | ins_next
742 |
743 |->cont_ra: // RA = resultptr
744 | ldr INSw, [PC, #-4]
745 | ldr TMP0, [RA]
746 | decode_RA TMP1, INS
747 | str TMP0, [BASE, TMP1, lsl #3]
748 | b ->cont_nop
749 |
750 |->cont_condt: // RA = resultptr
751 | ldr TMP0, [RA]
752 | mov_true TMP1
753 | cmp TMP1, TMP0 // Branch if result is true.
754 | b <4
755 |
756 |->cont_condf: // RA = resultptr
757 | ldr TMP0, [RA]
758 | mov_false TMP1
759 | cmp TMP0, TMP1 // Branch if result is false.
760 | b <4
761 |
762 |->vmeta_equal:
763 | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV.
764 | and TAB:CARG3, CARG3, #LJ_GCVMASK
765 | sub PC, PC, #4
766 | str BASE, L->base
767 | mov CARG1, L
768 | str PC, SAVE_PC
769 | bl extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
770 | // Returns 0/1 or TValue * (metamethod).
771 | b <3
772 |
773 |->vmeta_equal_cd:
774 |.if FFI
775 | sub PC, PC, #4
776 | str BASE, L->base
777 | mov CARG1, L
778 | mov CARG2, INS
779 | str PC, SAVE_PC
780 | bl extern lj_meta_equal_cd // (lua_State *L, BCIns op)
781 | // Returns 0/1 or TValue * (metamethod).
782 | b <3
783 |.endif
784 |
785 |->vmeta_istype:
786 | sub PC, PC, #4
787 | str BASE, L->base
788 | mov CARG1, L
789 | mov CARG2, RA
790 | mov CARG3, RC
791 | str PC, SAVE_PC
792 | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
793 | b ->cont_nop
794 |
795 |//-- Arithmetic metamethods ---------------------------------------------
796 |
797 |->vmeta_arith_vn:
798 | add CARG3, BASE, RB, lsl #3
799 | add CARG4, KBASE, RC, lsl #3
800 | b >1
801 |
802 |->vmeta_arith_nv:
803 | add CARG4, BASE, RB, lsl #3
804 | add CARG3, KBASE, RC, lsl #3
805 | b >1
806 |
807 |->vmeta_unm:
808 | add CARG3, BASE, RC, lsl #3
809 | mov CARG4, CARG3
810 | b >1
811 |
812 |->vmeta_arith_vv:
813 | add CARG3, BASE, RB, lsl #3
814 | add CARG4, BASE, RC, lsl #3
815 |1:
816 | uxtb CARG5w, INSw
817 | add CARG2, BASE, RA, lsl #3
818 | str BASE, L->base
819 | mov CARG1, L
820 | str PC, SAVE_PC
821 | bl extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
822 | // Returns NULL (finished) or TValue * (metamethod).
823 | cbz CRET1, ->cont_nop
824 |
825 | // Call metamethod for binary op.
826 |->vmeta_binop:
827 | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2
828 | sub TMP1, CRET1, BASE
829 | str PC, [CRET1, #-24] // [cont|PC]
830 | add PC, TMP1, #FRAME_CONT
831 | mov BASE, CRET1
832 | mov NARGS8:RC, #16 // 2 args for func(o1, o2).
833 | b ->vm_call_dispatch
834 |
835 |->vmeta_len:
836 | add CARG2, BASE, RC, lsl #3
837#if LJ_52
838 | mov TAB:RC, TAB:CARG1 // Save table (ignored for other types).
839#endif
840 | str BASE, L->base
841 | mov CARG1, L
842 | str PC, SAVE_PC
843 | bl extern lj_meta_len // (lua_State *L, TValue *o)
844 | // Returns NULL (retry) or TValue * (metamethod base).
845#if LJ_52
846 | cbnz CRET1, ->vmeta_binop // Binop call for compatibility.
847 | mov TAB:CARG1, TAB:RC
848 | b ->BC_LEN_Z
849#else
850 | b ->vmeta_binop // Binop call for compatibility.
851#endif
852 |
853 |//-- Call metamethod ----------------------------------------------------
854 |
855 |->vmeta_call: // Resolve and call __call metamethod.
856 | // RB = old base, BASE = new base, RC = nargs*8
857 | mov CARG1, L
858 | str RB, L->base // This is the callers base!
859 | sub CARG2, BASE, #16
860 | str PC, SAVE_PC
861 | add CARG3, BASE, NARGS8:RC
862 | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
863 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
864 | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now.
865 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
866 | ins_call
867 |
868 |->vmeta_callt: // Resolve __call for BC_CALLT.
869 | // BASE = old base, RA = new base, RC = nargs*8
870 | mov CARG1, L
871 | str BASE, L->base
872 | sub CARG2, RA, #16
873 | str PC, SAVE_PC
874 | add CARG3, RA, NARGS8:RC
875 | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
876 | ldr TMP1, [RA, FRAME_FUNC] // Guaranteed to be a function here.
877 | ldr PC, [BASE, FRAME_PC]
878 | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now.
879 | and LFUNC:CARG3, TMP1, #LJ_GCVMASK
880 | b ->BC_CALLT2_Z
881 |
882 |//-- Argument coercion for 'for' statement ------------------------------
883 |
884 |->vmeta_for:
885 | mov CARG1, L
886 | str BASE, L->base
887 | mov CARG2, RA
888 | str PC, SAVE_PC
889 | bl extern lj_meta_for // (lua_State *L, TValue *base)
890 | ldr INSw, [PC, #-4]
891 |.if JIT
892 | uxtb TMP0w, INSw
893 |.endif
894 | decode_RA RA, INS
895 | decode_RD RC, INS
896 |.if JIT
897 | cmp TMP0, #BC_JFORI
898 | beq =>BC_JFORI
899 |.endif
900 | b =>BC_FORI
901 |
902 |//-----------------------------------------------------------------------
903 |//-- Fast functions -----------------------------------------------------
904 |//-----------------------------------------------------------------------
905 |
906 |.macro .ffunc, name
907 |->ff_ .. name:
908 |.endmacro
909 |
910 |.macro .ffunc_1, name
911 |->ff_ .. name:
912 | ldr CARG1, [BASE]
913 | cmp NARGS8:RC, #8
914 | blo ->fff_fallback
915 |.endmacro
916 |
917 |.macro .ffunc_2, name
918 |->ff_ .. name:
919 | ldp CARG1, CARG2, [BASE]
920 | cmp NARGS8:RC, #16
921 | blo ->fff_fallback
922 |.endmacro
923 |
924 |.macro .ffunc_n, name
925 | .ffunc name
926 | ldr CARG1, [BASE]
927 | cmp NARGS8:RC, #8
928 | ldr FARG1, [BASE]
929 | blo ->fff_fallback
930 | checknum CARG1, ->fff_fallback
931 |.endmacro
932 |
933 |.macro .ffunc_nn, name
934 | .ffunc name
935 | ldp CARG1, CARG2, [BASE]
936 | cmp NARGS8:RC, #16
937 | ldp FARG1, FARG2, [BASE]
938 | blo ->fff_fallback
939 | checknum CARG1, ->fff_fallback
940 | checknum CARG2, ->fff_fallback
941 |.endmacro
942 |
943 |// Inlined GC threshold check. Caveat: uses CARG1 and CARG2.
944 |.macro ffgccheck
945 | ldp CARG1, CARG2, GL->gc.total // Assumes threshold follows total.
946 | cmp CARG1, CARG2
947 | blt >1
948 | bl ->fff_gcstep
949 |1:
950 |.endmacro
951 |
952 |//-- Base library: checks -----------------------------------------------
953 |
954 |.ffunc_1 assert
955 | ldr PC, [BASE, FRAME_PC]
956 | mov_false TMP1
957 | cmp CARG1, TMP1
958 | bhs ->fff_fallback
959 | str CARG1, [BASE, #-16]
960 | sub RB, BASE, #8
961 | subs RA, NARGS8:RC, #8
962 | add RC, NARGS8:RC, #8 // Compute (nresults+1)*8.
963 | cbz RA, ->fff_res // Done if exactly 1 argument.
964 |1:
965 | ldr CARG1, [RB, #16]
966 | sub RA, RA, #8
967 | str CARG1, [RB], #8
968 | cbnz RA, <1
969 | b ->fff_res
970 |
971 |.ffunc_1 type
972 | mov TMP0, #~LJ_TISNUM
973 | asr ITYPE, CARG1, #47
974 | cmn ITYPE, #~LJ_TISNUM
975 | csinv TMP1, TMP0, ITYPE, lo
976 | add TMP1, TMP1, #offsetof(GCfuncC, upvalue)/8
977 | ldr CARG1, [CFUNC:CARG3, TMP1, lsl #3]
978 | b ->fff_restv
979 |
980 |//-- Base library: getters and setters ---------------------------------
981 |
982 |.ffunc_1 getmetatable
983 | asr ITYPE, CARG1, #47
984 | cmn ITYPE, #-LJ_TTAB
985 | ccmn ITYPE, #-LJ_TUDATA, #4, ne
986 | and TAB:CARG1, CARG1, #LJ_GCVMASK
987 | bne >6
988 |1: // Field metatable must be at same offset for GCtab and GCudata!
989 | ldr TAB:RB, TAB:CARG1->metatable
990 |2:
991 | mov CARG1, TISNIL
992 | ldr STR:RC, GL->gcroot[GCROOT_MMNAME+MM_metatable]
993 | cbz TAB:RB, ->fff_restv
994 | ldr TMP1w, TAB:RB->hmask
995 | ldr TMP2w, STR:RC->hash
996 | ldr NODE:CARG3, TAB:RB->node
997 | and TMP1w, TMP1w, TMP2w // idx = str->hash & tab->hmask
998 | add TMP1, TMP1, TMP1, lsl #1
999 | movn CARG4, #~LJ_TSTR
1000 | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8
1001 | add CARG4, STR:RC, CARG4, lsl #47 // Tagged key to look for.
1002 |3: // Rearranged logic, because we expect _not_ to find the key.
1003 | ldp CARG1, TMP0, NODE:CARG3->val
1004 | ldr NODE:CARG3, NODE:CARG3->next
1005 | cmp TMP0, CARG4
1006 | beq >5
1007 | cbnz NODE:CARG3, <3
1008 |4:
1009 | mov CARG1, RB // Use metatable as default result.
1010 | movk CARG1, #(LJ_TTAB>>1)&0xffff, lsl #48
1011 | b ->fff_restv
1012 |5:
1013 | cmp TMP0, TISNIL
1014 | bne ->fff_restv
1015 | b <4
1016 |
1017 |6:
1018 | movn TMP0, #~LJ_TISNUM
1019 | cmp ITYPE, TMP0
1020 | csel ITYPE, ITYPE, TMP0, hs
1021 | sub TMP1, GL, ITYPE, lsl #3
1022 | ldr TAB:RB, [TMP1, #offsetof(global_State, gcroot[GCROOT_BASEMT])-8]
1023 | b <2
1024 |
1025 |.ffunc_2 setmetatable
1026 | // Fast path: no mt for table yet and not clearing the mt.
1027 | checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback
1028 | ldr TAB:TMP0, TAB:TMP1->metatable
1029 | asr ITYPE, CARG2, #47
1030 | ldrb TMP2w, TAB:TMP1->marked
1031 | cmn ITYPE, #-LJ_TTAB
1032 | and TAB:CARG2, CARG2, #LJ_GCVMASK
1033 | ccmp TAB:TMP0, #0, #0, eq
1034 | bne ->fff_fallback
1035 | str TAB:CARG2, TAB:TMP1->metatable
1036 | tbz TMP2w, #2, ->fff_restv // isblack(table)
1037 | barrierback TAB:TMP1, TMP2w, TMP0
1038 | b ->fff_restv
1039 |
1040 |.ffunc rawget
1041 | ldr CARG2, [BASE]
1042 | cmp NARGS8:RC, #16
1043 | blo ->fff_fallback
1044 | checktab CARG2, ->fff_fallback
1045 | mov CARG1, L
1046 | add CARG3, BASE, #8
1047 | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1048 | // Returns cTValue *.
1049 | ldr CARG1, [CRET1]
1050 | b ->fff_restv
1051 |
1052 |//-- Base library: conversions ------------------------------------------
1053 |
1054 |.ffunc tonumber
1055 | // Only handles the number case inline (without a base argument).
1056 | ldr CARG1, [BASE]
1057 | cmp NARGS8:RC, #8
1058 | bne ->fff_fallback
1059 | checknumber CARG1, ->fff_fallback
1060 | b ->fff_restv
1061 |
1062 |.ffunc_1 tostring
1063 | // Only handles the string or number case inline.
1064 | asr ITYPE, CARG1, #47
1065 | cmn ITYPE, #-LJ_TSTR
1066 | // A __tostring method in the string base metatable is ignored.
1067 | beq ->fff_restv
1068 | // Handle numbers inline, unless a number base metatable is present.
1069 | ldr TMP1, GL->gcroot[GCROOT_BASEMT_NUM]
1070 | str BASE, L->base
1071 | cmn ITYPE, #-LJ_TISNUM
1072 | ccmp TMP1, #0, #0, ls
1073 | str PC, SAVE_PC // Redundant (but a defined value).
1074 | bne ->fff_fallback
1075 | ffgccheck
1076 | mov CARG1, L
1077 | mov CARG2, BASE
1078 | bl extern lj_strfmt_number // (lua_State *L, cTValue *o)
1079 | // Returns GCstr *.
1080 | movn TMP1, #~LJ_TSTR
1081 | ldr BASE, L->base
1082 | add CARG1, CARG1, TMP1, lsl #47
1083 | b ->fff_restv
1084 |
1085 |//-- Base library: iterators -------------------------------------------
1086 |
1087 |.ffunc_1 next
1088 | checktp CARG2, CARG1, LJ_TTAB, ->fff_fallback
1089 | str TISNIL, [BASE, NARGS8:RC] // Set missing 2nd arg to nil.
1090 | ldr PC, [BASE, FRAME_PC]
1091 | stp BASE, BASE, L->base // Add frame since C call can throw.
1092 | mov CARG1, L
1093 | add CARG3, BASE, #8
1094 | str PC, SAVE_PC
1095 | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
1096 | // Returns 0 at end of traversal.
1097 | str TISNIL, [BASE, #-16]
1098 | cbz CRET1, ->fff_res1 // End of traversal: return nil.
1099 | ldp CARG1, CARG2, [BASE, #8] // Copy key and value to results.
1100 | mov RC, #(2+1)*8
1101 | stp CARG1, CARG2, [BASE, #-16]
1102 | b ->fff_res
1103 |
1104 |.ffunc_1 pairs
1105 | checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback
1106#if LJ_52
1107 | ldr TAB:CARG2, TAB:TMP1->metatable
1108#endif
1109 | ldr CFUNC:CARG4, CFUNC:CARG3->upvalue[0]
1110 | ldr PC, [BASE, FRAME_PC]
1111#if LJ_52
1112 | cbnz TAB:CARG2, ->fff_fallback
1113#endif
1114 | mov RC, #(3+1)*8
1115 | stp CARG1, TISNIL, [BASE, #-8]
1116 | str CFUNC:CARG4, [BASE, #-16]
1117 | b ->fff_res
1118 |
1119 |.ffunc_2 ipairs_aux
1120 | checktab CARG1, ->fff_fallback
1121 | checkint CARG2, ->fff_fallback
1122 | ldr TMP1w, TAB:CARG1->asize
1123 | ldr CARG3, TAB:CARG1->array
1124 | ldr TMP0w, TAB:CARG1->hmask
1125 | add CARG2w, CARG2w, #1
1126 | cmp CARG2w, TMP1w
1127 | ldr PC, [BASE, FRAME_PC]
1128 | add TMP2, CARG2, TISNUM
1129 | mov RC, #(0+1)*8
1130 | str TMP2, [BASE, #-16]
1131 | bhs >2 // Not in array part?
1132 | ldr TMP0, [CARG3, CARG2, lsl #3]
1133 |1:
1134 | mov TMP1, #(2+1)*8
1135 | cmp TMP0, TISNIL
1136 | str TMP0, [BASE, #-8]
1137 | csel RC, RC, TMP1, eq
1138 | b ->fff_res
1139 |2: // Check for empty hash part first. Otherwise call C function.
1140 | cbz TMP0w, ->fff_res
1141 | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
1142 | // Returns cTValue * or NULL.
1143 | cbz CRET1, ->fff_res
1144 | ldr TMP0, [CRET1]
1145 | b <1
1146 |
1147 |.ffunc_1 ipairs
1148 | checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback
1149#if LJ_52
1150 | ldr TAB:CARG2, TAB:TMP1->metatable
1151#endif
1152 | ldr CFUNC:CARG4, CFUNC:CARG3->upvalue[0]
1153 | ldr PC, [BASE, FRAME_PC]
1154#if LJ_52
1155 | cbnz TAB:CARG2, ->fff_fallback
1156#endif
1157 | mov RC, #(3+1)*8
1158 | stp CARG1, TISNUM, [BASE, #-8]
1159 | str CFUNC:CARG4, [BASE, #-16]
1160 | b ->fff_res
1161 |
1162 |//-- Base library: catch errors ----------------------------------------
1163 |
1164 |.ffunc pcall
1165 | ldrb TMP0w, GL->hookmask
1166 | subs NARGS8:RC, NARGS8:RC, #8
1167 | blo ->fff_fallback
1168 | mov RB, BASE
1169 | add BASE, BASE, #16
1170 | ubfx TMP0w, TMP0w, #HOOK_ACTIVE_SHIFT, #1
1171 | add PC, TMP0, #16+FRAME_PCALL
1172 | beq ->vm_call_dispatch
1173 |1:
1174 | add TMP2, BASE, NARGS8:RC
1175 |2:
1176 | ldr TMP0, [TMP2, #-16]
1177 | str TMP0, [TMP2, #-8]!
1178 | cmp TMP2, BASE
1179 | bne <2
1180 | b ->vm_call_dispatch
1181 |
1182 |.ffunc xpcall
1183 | ldp CARG1, CARG2, [BASE]
1184 | ldrb TMP0w, GL->hookmask
1185 | subs NARGS8:TMP1, NARGS8:RC, #16
1186 | blo ->fff_fallback
1187 | mov RB, BASE
1188 | asr ITYPE, CARG2, #47
1189 | ubfx TMP0w, TMP0w, #HOOK_ACTIVE_SHIFT, #1
1190 | cmn ITYPE, #-LJ_TFUNC
1191 | add PC, TMP0, #24+FRAME_PCALL
1192 | bne ->fff_fallback // Traceback must be a function.
1193 | mov NARGS8:RC, NARGS8:TMP1
1194 | add BASE, BASE, #24
1195 | stp CARG2, CARG1, [RB] // Swap function and traceback.
1196 | cbz NARGS8:RC, ->vm_call_dispatch
1197 | b <1
1198 |
1199 |//-- Coroutine library --------------------------------------------------
1200 |
1201 |.macro coroutine_resume_wrap, resume
1202 |.if resume
1203 |.ffunc_1 coroutine_resume
1204 | checktp CARG1, LJ_TTHREAD, ->fff_fallback
1205 |.else
1206 |.ffunc coroutine_wrap_aux
1207 | ldr L:CARG1, CFUNC:CARG3->upvalue[0].gcr
1208 | and L:CARG1, CARG1, #LJ_GCVMASK
1209 |.endif
1210 | ldr PC, [BASE, FRAME_PC]
1211 | str BASE, L->base
1212 | ldp RB, CARG2, L:CARG1->base
1213 | ldrb TMP1w, L:CARG1->status
1214 | add TMP0, CARG2, TMP1
1215 | str PC, SAVE_PC
1216 | cmp TMP0, RB
1217 | beq ->fff_fallback
1218 | cmp TMP1, #LUA_YIELD
1219 | add TMP0, CARG2, #8
1220 | csel CARG2, CARG2, TMP0, hs
1221 | ldr CARG4, L:CARG1->maxstack
1222 | add CARG3, CARG2, NARGS8:RC
1223 | ldr RB, L:CARG1->cframe
1224 | ccmp CARG3, CARG4, #2, ls
1225 | ccmp RB, #0, #2, ls
1226 | bhi ->fff_fallback
1227 |.if resume
1228 | sub CARG3, CARG3, #8 // Keep resumed thread in stack for GC.
1229 | add BASE, BASE, #8
1230 | sub NARGS8:RC, NARGS8:RC, #8
1231 |.endif
1232 | str CARG3, L:CARG1->top
1233 | str BASE, L->top
1234 | cbz NARGS8:RC, >3
1235 |2: // Move args to coroutine.
1236 | ldr TMP0, [BASE, RB]
1237 | cmp RB, NARGS8:RC
1238 | str TMP0, [CARG2, RB]
1239 | add RB, RB, #8
1240 | bne <2
1241 |3:
1242 | mov CARG3, #0
1243 | mov L:RA, L:CARG1
1244 | mov CARG4, #0
1245 | bl ->vm_resume // (lua_State *L, TValue *base, 0, 0)
1246 | // Returns thread status.
1247 |4:
1248 | ldp CARG3, CARG4, L:RA->base
1249 | cmp CRET1, #LUA_YIELD
1250 | ldr BASE, L->base
1251 | str L, GL->cur_L
1252 | st_vmstate ST_INTERP
1253 | bhi >8
1254 | sub RC, CARG4, CARG3
1255 | ldr CARG1, L->maxstack
1256 | add CARG2, BASE, RC
1257 | cbz RC, >6 // No results?
1258 | cmp CARG2, CARG1
1259 | mov RB, #0
1260 | bhi >9 // Need to grow stack?
1261 |
1262 | sub CARG4, RC, #8
1263 | str CARG3, L:RA->top // Clear coroutine stack.
1264 |5: // Move results from coroutine.
1265 | ldr TMP0, [CARG3, RB]
1266 | cmp RB, CARG4
1267 | str TMP0, [BASE, RB]
1268 | add RB, RB, #8
1269 | bne <5
1270 |6:
1271 |.if resume
1272 | mov_true TMP1
1273 | add RC, RC, #16
1274 |7:
1275 | str TMP1, [BASE, #-8] // Prepend true/false to results.
1276 | sub RA, BASE, #8
1277 |.else
1278 | mov RA, BASE
1279 | add RC, RC, #8
1280 |.endif
1281 | ands CARG1, PC, #FRAME_TYPE
1282 | str PC, SAVE_PC
1283 | str RCw, SAVE_MULTRES
1284 | beq ->BC_RET_Z
1285 | b ->vm_return
1286 |
1287 |8: // Coroutine returned with error (at co->top-1).
1288 |.if resume
1289 | ldr TMP0, [CARG4, #-8]!
1290 | mov_false TMP1
1291 | mov RC, #(2+1)*8
1292 | str CARG4, L:RA->top // Remove error from coroutine stack.
1293 | str TMP0, [BASE] // Copy error message.
1294 | b <7
1295 |.else
1296 | mov CARG1, L
1297 | mov CARG2, L:RA
1298 | bl extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co)
1299 | // Never returns.
1300 |.endif
1301 |
1302 |9: // Handle stack expansion on return from yield.
1303 | mov CARG1, L
1304 | lsr CARG2, RC, #3
1305 | bl extern lj_state_growstack // (lua_State *L, int n)
1306 | mov CRET1, #0
1307 | b <4
1308 |.endmacro
1309 |
1310 | coroutine_resume_wrap 1 // coroutine.resume
1311 | coroutine_resume_wrap 0 // coroutine.wrap
1312 |
1313 |.ffunc coroutine_yield
1314 | ldr TMP0, L->cframe
1315 | add TMP1, BASE, NARGS8:RC
1316 | mov CRET1, #LUA_YIELD
1317 | stp BASE, TMP1, L->base
1318 | tbz TMP0, #0, ->fff_fallback
1319 | str xzr, L->cframe
1320 | strb CRET1w, L->status
1321 | b ->vm_leave_unw
1322 |
1323 |//-- Math library -------------------------------------------------------
1324 |
1325 |.macro math_round, func, round
1326 | .ffunc math_ .. func
1327 | ldr CARG1, [BASE]
1328 | cmp NARGS8:RC, #8
1329 | ldr d0, [BASE]
1330 | blo ->fff_fallback
1331 | cmp TISNUMhi, CARG1, lsr #32
1332 | beq ->fff_restv
1333 | blo ->fff_fallback
1334 | round d0, d0
1335 | b ->fff_resn
1336 |.endmacro
1337 |
1338 | math_round floor, frintm
1339 | math_round ceil, frintp
1340 |
1341 |.ffunc_1 math_abs
1342 | checknumber CARG1, ->fff_fallback
1343 | and CARG1, CARG1, #U64x(7fffffff,ffffffff)
1344 | bne ->fff_restv
1345 | eor CARG2w, CARG1w, CARG1w, asr #31
1346 | movz CARG3, #0x41e0, lsl #48 // 2^31.
1347 | subs CARG1w, CARG2w, CARG1w, asr #31
1348 | add CARG1, CARG1, TISNUM
1349 | csel CARG1, CARG1, CARG3, pl
1350 | // Fallthrough.
1351 |
1352 |->fff_restv:
1353 | // CARG1 = TValue result.
1354 | ldr PC, [BASE, FRAME_PC]
1355 | str CARG1, [BASE, #-16]
1356 |->fff_res1:
1357 | // PC = return.
1358 | mov RC, #(1+1)*8
1359 |->fff_res:
1360 | // RC = (nresults+1)*8, PC = return.
1361 | ands CARG1, PC, #FRAME_TYPE
1362 | str RCw, SAVE_MULTRES
1363 | sub RA, BASE, #16
1364 | bne ->vm_return
1365 | ldr INSw, [PC, #-4]
1366 | decode_RB RB, INS
1367 |5:
1368 | cmp RC, RB, lsl #3 // More results expected?
1369 | blo >6
1370 | decode_RA TMP1, INS
1371 | // Adjust BASE. KBASE is assumed to be set for the calling frame.
1372 | sub BASE, RA, TMP1, lsl #3
1373 | ins_next
1374 |
1375 |6: // Fill up results with nil.
1376 | add TMP1, RA, RC
1377 | add RC, RC, #8
1378 | str TISNIL, [TMP1, #-8]
1379 | b <5
1380 |
1381 |.macro math_extern, func
1382 | .ffunc_n math_ .. func
1383 | bl extern func
1384 | b ->fff_resn
1385 |.endmacro
1386 |
1387 |.macro math_extern2, func
1388 | .ffunc_nn math_ .. func
1389 | bl extern func
1390 | b ->fff_resn
1391 |.endmacro
1392 |
1393 |.ffunc_n math_sqrt
1394 | fsqrt d0, d0
1395 |->fff_resn:
1396 | ldr PC, [BASE, FRAME_PC]
1397 | str d0, [BASE, #-16]
1398 | b ->fff_res1
1399 |
1400 |.ffunc math_log
1401 | ldr CARG1, [BASE]
1402 | cmp NARGS8:RC, #8
1403 | ldr FARG1, [BASE]
1404 | bne ->fff_fallback // Need exactly 1 argument.
1405 | checknum CARG1, ->fff_fallback
1406 | bl extern log
1407 | b ->fff_resn
1408 |
1409 | math_extern log10
1410 | math_extern exp
1411 | math_extern sin
1412 | math_extern cos
1413 | math_extern tan
1414 | math_extern asin
1415 | math_extern acos
1416 | math_extern atan
1417 | math_extern sinh
1418 | math_extern cosh
1419 | math_extern tanh
1420 | math_extern2 pow
1421 | math_extern2 atan2
1422 | math_extern2 fmod
1423 |
1424 |.ffunc_2 math_ldexp
1425 | ldr FARG1, [BASE]
1426 | checknum CARG1, ->fff_fallback
1427 | checkint CARG2, ->fff_fallback
1428 | sxtw CARG1, CARG2w
1429 | bl extern ldexp // (double x, int exp)
1430 | b ->fff_resn
1431 |
1432 |.ffunc_n math_frexp
1433 | add CARG1, sp, TMPDofs
1434 | bl extern frexp
1435 | ldr CARG2w, TMPD
1436 | ldr PC, [BASE, FRAME_PC]
1437 | str d0, [BASE, #-16]
1438 | mov RC, #(2+1)*8
1439 | add CARG2, CARG2, TISNUM
1440 | str CARG2, [BASE, #-8]
1441 | b ->fff_res
1442 |
1443 |.ffunc_n math_modf
1444 | sub CARG1, BASE, #16
1445 | ldr PC, [BASE, FRAME_PC]
1446 | bl extern modf
1447 | mov RC, #(2+1)*8
1448 | str d0, [BASE, #-8]
1449 | b ->fff_res
1450 |
1451 |.macro math_minmax, name, cond, fcond
1452 | .ffunc_1 name
1453 | add RB, BASE, RC
1454 | add RA, BASE, #8
1455 | checkint CARG1, >4
1456 |1: // Handle integers.
1457 | ldr CARG2, [RA]
1458 | cmp RA, RB
1459 | bhs ->fff_restv
1460 | checkint CARG2, >3
1461 | cmp CARG1w, CARG2w
1462 | add RA, RA, #8
1463 | csel CARG1, CARG2, CARG1, cond
1464 | b <1
1465 |3: // Convert intermediate result to number and continue below.
1466 | scvtf d0, CARG1w
1467 | blo ->fff_fallback
1468 | ldr d1, [RA]
1469 | b >6
1470 |
1471 |4:
1472 | ldr d0, [BASE]
1473 | blo ->fff_fallback
1474 |5: // Handle numbers.
1475 | ldr CARG2, [RA]
1476 | ldr d1, [RA]
1477 | cmp RA, RB
1478 | bhs ->fff_resn
1479 | checknum CARG2, >7
1480 |6:
1481 | fcmp d0, d1
1482 | add RA, RA, #8
1483 | fcsel d0, d1, d0, fcond
1484 | b <5
1485 |7: // Convert integer to number and continue above.
1486 | scvtf d1, CARG2w
1487 | blo ->fff_fallback
1488 | b <6
1489 |.endmacro
1490 |
1491 | math_minmax math_min, gt, hi
1492 | math_minmax math_max, lt, lo
1493 |
1494 |//-- String library -----------------------------------------------------
1495 |
1496 |.ffunc string_byte // Only handle the 1-arg case here.
1497 | ldp PC, CARG1, [BASE, FRAME_PC]
1498 | cmp NARGS8:RC, #8
1499 | asr ITYPE, CARG1, #47
1500 | ccmn ITYPE, #-LJ_TSTR, #0, eq
1501 | and STR:CARG1, CARG1, #LJ_GCVMASK
1502 | bne ->fff_fallback
1503 | ldrb TMP0w, STR:CARG1[1] // Access is always ok (NUL at end).
1504 | ldr CARG3w, STR:CARG1->len
1505 | add TMP0, TMP0, TISNUM
1506 | str TMP0, [BASE, #-16]
1507 | mov RC, #(0+1)*8
1508 | cbz CARG3, ->fff_res
1509 | b ->fff_res1
1510 |
1511 |.ffunc string_char // Only handle the 1-arg case here.
1512 | ffgccheck
1513 | ldp PC, CARG1, [BASE, FRAME_PC]
1514 | cmp CARG1w, #255
1515 | ccmp NARGS8:RC, #8, #0, ls // Need exactly 1 argument.
1516 | bne ->fff_fallback
1517 | checkint CARG1, ->fff_fallback
1518 | mov CARG3, #1
1519 | // Point to the char inside the integer in the stack slot.
1520 |.if ENDIAN_LE
1521 | mov CARG2, BASE
1522 |.else
1523 | add CARG2, BASE, #7
1524 |.endif
1525 |->fff_newstr:
1526 | // CARG2 = str, CARG3 = len.
1527 | str BASE, L->base
1528 | mov CARG1, L
1529 | str PC, SAVE_PC
1530 | bl extern lj_str_new // (lua_State *L, char *str, size_t l)
1531 |->fff_resstr:
1532 | // Returns GCstr *.
1533 | ldr BASE, L->base
1534 | movn TMP1, #~LJ_TSTR
1535 | add CARG1, CARG1, TMP1, lsl #47
1536 | b ->fff_restv
1537 |
1538 |.ffunc string_sub
1539 | ffgccheck
1540 | ldr CARG1, [BASE]
1541 | ldr CARG3, [BASE, #16]
1542 | cmp NARGS8:RC, #16
1543 | movn RB, #0
1544 | beq >1
1545 | blo ->fff_fallback
1546 | checkint CARG3, ->fff_fallback
1547 | sxtw RB, CARG3w
1548 |1:
1549 | ldr CARG2, [BASE, #8]
1550 | checkstr CARG1, ->fff_fallback
1551 | ldr TMP1w, STR:CARG1->len
1552 | checkint CARG2, ->fff_fallback
1553 | sxtw CARG2, CARG2w
1554 | // CARG1 = str, TMP1 = str->len, CARG2 = start, RB = end
1555 | add TMP2, RB, TMP1
1556 | cmp RB, #0
1557 | add TMP0, CARG2, TMP1
1558 | csinc RB, RB, TMP2, ge // if (end < 0) end += len+1
1559 | cmp CARG2, #0
1560 | csinc CARG2, CARG2, TMP0, ge // if (start < 0) start += len+1
1561 | cmp RB, #0
1562 | csel RB, RB, xzr, ge // if (end < 0) end = 0
1563 | cmp CARG2, #1
1564 | csinc CARG2, CARG2, xzr, ge // if (start < 1) start = 1
1565 | cmp RB, TMP1
1566 | csel RB, RB, TMP1, le // if (end > len) end = len
1567 | add CARG1, STR:CARG1, #sizeof(GCstr)-1
1568 | subs CARG3, RB, CARG2 // len = end - start
1569 | add CARG2, CARG1, CARG2
1570 | add CARG3, CARG3, #1 // len += 1
1571 | bge ->fff_newstr
1572 | add STR:CARG1, GL, #offsetof(global_State, strempty)
1573 | movn TMP1, #~LJ_TSTR
1574 | add CARG1, CARG1, TMP1, lsl #47
1575 | b ->fff_restv
1576 |
1577 |.macro ffstring_op, name
1578 | .ffunc string_ .. name
1579 | ffgccheck
1580 | ldr CARG2, [BASE]
1581 | cmp NARGS8:RC, #8
1582 | asr ITYPE, CARG2, #47
1583 | ccmn ITYPE, #-LJ_TSTR, #0, hs
1584 | and STR:CARG2, CARG2, #LJ_GCVMASK
1585 | bne ->fff_fallback
1586 | ldr TMP0, GL->tmpbuf.b
1587 | add SBUF:CARG1, GL, #offsetof(global_State, tmpbuf)
1588 | str BASE, L->base
1589 | str PC, SAVE_PC
1590 | str L, GL->tmpbuf.L
1591 | str TMP0, GL->tmpbuf.p
1592 | bl extern lj_buf_putstr_ .. name
1593 | bl extern lj_buf_tostr
1594 | b ->fff_resstr
1595 |.endmacro
1596 |
1597 |ffstring_op reverse
1598 |ffstring_op lower
1599 |ffstring_op upper
1600 |
1601 |//-- Bit library --------------------------------------------------------
1602 |
1603 |// FP number to bit conversion for soft-float. Clobbers CARG1-CARG3
1604 |->vm_tobit_fb:
1605 | bls ->fff_fallback
1606 | add CARG2, CARG1, CARG1
1607 | mov CARG3, #1076
1608 | sub CARG3, CARG3, CARG2, lsr #53
1609 | cmp CARG3, #53
1610 | bhi >1
1611 | and CARG2, CARG2, #U64x(001fffff,ffffffff)
1612 | orr CARG2, CARG2, #U64x(00200000,00000000)
1613 | cmp CARG1, #0
1614 | lsr CARG2, CARG2, CARG3
1615 | cneg CARG1w, CARG2w, mi
1616 | br lr
1617 |1:
1618 | mov CARG1w, #0
1619 | br lr
1620 |
1621 |.macro .ffunc_bit, name
1622 | .ffunc_1 bit_..name
1623 | adr lr, >1
1624 | checkint CARG1, ->vm_tobit_fb
1625 |1:
1626 |.endmacro
1627 |
1628 |.macro .ffunc_bit_op, name, ins
1629 | .ffunc_bit name
1630 | mov RA, #8
1631 | mov TMP0w, CARG1w
1632 | adr lr, >2
1633 |1:
1634 | ldr CARG1, [BASE, RA]
1635 | cmp RA, NARGS8:RC
1636 | add RA, RA, #8
1637 | bge >9
1638 | checkint CARG1, ->vm_tobit_fb
1639 |2:
1640 | ins TMP0w, TMP0w, CARG1w
1641 | b <1
1642 |.endmacro
1643 |
1644 |.ffunc_bit_op band, and
1645 |.ffunc_bit_op bor, orr
1646 |.ffunc_bit_op bxor, eor
1647 |
1648 |.ffunc_bit tobit
1649 | mov TMP0w, CARG1w
1650 |9: // Label reused by .ffunc_bit_op users.
1651 | add CARG1, TMP0, TISNUM
1652 | b ->fff_restv
1653 |
1654 |.ffunc_bit bswap
1655 | rev TMP0w, CARG1w
1656 | add CARG1, TMP0, TISNUM
1657 | b ->fff_restv
1658 |
1659 |.ffunc_bit bnot
1660 | mvn TMP0w, CARG1w
1661 | add CARG1, TMP0, TISNUM
1662 | b ->fff_restv
1663 |
1664 |.macro .ffunc_bit_sh, name, ins, shmod
1665 | .ffunc bit_..name
1666 | ldp TMP0, CARG1, [BASE]
1667 | cmp NARGS8:RC, #16
1668 | blo ->fff_fallback
1669 | adr lr, >1
1670 | checkint CARG1, ->vm_tobit_fb
1671 |1:
1672 |.if shmod == 0
1673 | mov TMP1, CARG1
1674 |.else
1675 | neg TMP1, CARG1
1676 |.endif
1677 | mov CARG1, TMP0
1678 | adr lr, >2
1679 | checkint CARG1, ->vm_tobit_fb
1680 |2:
1681 | ins TMP0w, CARG1w, TMP1w
1682 | add CARG1, TMP0, TISNUM
1683 | b ->fff_restv
1684 |.endmacro
1685 |
1686 |.ffunc_bit_sh lshift, lsl, 0
1687 |.ffunc_bit_sh rshift, lsr, 0
1688 |.ffunc_bit_sh arshift, asr, 0
1689 |.ffunc_bit_sh rol, ror, 1
1690 |.ffunc_bit_sh ror, ror, 0
1691 |
1692 |//-----------------------------------------------------------------------
1693 |
1694 |->fff_fallback: // Call fast function fallback handler.
1695 | // BASE = new base, RC = nargs*8
1696 | ldp CFUNC:CARG3, PC, [BASE, FRAME_FUNC] // Fallback may overwrite PC.
1697 | ldr TMP2, L->maxstack
1698 | add TMP1, BASE, NARGS8:RC
1699 | stp BASE, TMP1, L->base
1700 | and CFUNC:CARG3, CARG3, #LJ_GCVMASK
1701 | add TMP1, TMP1, #8*LUA_MINSTACK
1702 | ldr CARG3, CFUNC:CARG3->f
1703 | str PC, SAVE_PC // Redundant (but a defined value).
1704 | cmp TMP1, TMP2
1705 | mov CARG1, L
1706 | bhi >5 // Need to grow stack.
1707 | blr CARG3 // (lua_State *L)
1708 | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
1709 | ldr BASE, L->base
1710 | cmp CRET1w, #0
1711 | lsl RC, CRET1, #3
1712 | sub RA, BASE, #16
1713 | bgt ->fff_res // Returned nresults+1?
1714 |1: // Returned 0 or -1: retry fast path.
1715 | ldr CARG1, L->top
1716 | ldr CFUNC:CARG3, [BASE, FRAME_FUNC]
1717 | sub NARGS8:RC, CARG1, BASE
1718 | bne ->vm_call_tail // Returned -1?
1719 | and CFUNC:CARG3, CARG3, #LJ_GCVMASK
1720 | ins_callt // Returned 0: retry fast path.
1721 |
1722 |// Reconstruct previous base for vmeta_call during tailcall.
1723 |->vm_call_tail:
1724 | ands TMP0, PC, #FRAME_TYPE
1725 | and TMP1, PC, #~FRAME_TYPEP
1726 | bne >3
1727 | ldrb RAw, [PC, #-4+OFS_RA]
1728 | lsl RA, RA, #3
1729 | add TMP1, RA, #16
1730 |3:
1731 | sub RB, BASE, TMP1
1732 | b ->vm_call_dispatch // Resolve again for tailcall.
1733 |
1734 |5: // Grow stack for fallback handler.
1735 | mov CARG2, #LUA_MINSTACK
1736 | bl extern lj_state_growstack // (lua_State *L, int n)
1737 | ldr BASE, L->base
1738 | cmp CARG1, CARG1 // Set zero-flag to force retry.
1739 | b <1
1740 |
1741 |->fff_gcstep: // Call GC step function.
1742 | // BASE = new base, RC = nargs*8
1743 | add CARG2, BASE, NARGS8:RC // Calculate L->top.
1744 | mov RA, lr
1745 | stp BASE, CARG2, L->base
1746 | str PC, SAVE_PC // Redundant (but a defined value).
1747 | mov CARG1, L
1748 | bl extern lj_gc_step // (lua_State *L)
1749 | ldp BASE, CARG2, L->base
1750 | ldr CFUNC:CARG3, [BASE, FRAME_FUNC]
1751 | mov lr, RA // Help return address predictor.
1752 | sub NARGS8:RC, CARG2, BASE // Calculate nargs*8.
1753 | and CFUNC:CARG3, CARG3, #LJ_GCVMASK
1754 | ret
1755 |
1756 |//-----------------------------------------------------------------------
1757 |//-- Special dispatch targets -------------------------------------------
1758 |//-----------------------------------------------------------------------
1759 |
1760 |->vm_record: // Dispatch target for recording phase.
1761 |.if JIT
1762 | ldrb CARG1w, GL->hookmask
1763 | tst CARG1, #HOOK_VMEVENT // No recording while in vmevent.
1764 | bne >5
1765 | // Decrement the hookcount for consistency, but always do the call.
1766 | ldr CARG2w, GL->hookcount
1767 | tst CARG1, #HOOK_ACTIVE
1768 | bne >1
1769 | sub CARG2w, CARG2w, #1
1770 | tst CARG1, #LUA_MASKLINE|LUA_MASKCOUNT
1771 | beq >1
1772 | str CARG2w, GL->hookcount
1773 | b >1
1774 |.endif
1775 |
1776 |->vm_rethook: // Dispatch target for return hooks.
1777 | ldrb TMP2w, GL->hookmask
1778 | tbz TMP2w, #HOOK_ACTIVE_SHIFT, >1 // Hook already active?
1779 |5: // Re-dispatch to static ins.
1780 | ldr TMP0, [TMP1, #GG_G2DISP+GG_DISP2STATIC]
1781 | br TMP0
1782 |
1783 |->vm_inshook: // Dispatch target for instr/line hooks.
1784 | ldrb TMP2w, GL->hookmask
1785 | ldr TMP3w, GL->hookcount
1786 | tbnz TMP2w, #HOOK_ACTIVE_SHIFT, <5 // Hook already active?
1787 | tst TMP2w, #LUA_MASKLINE|LUA_MASKCOUNT
1788 | beq <5
1789 | sub TMP3w, TMP3w, #1
1790 | str TMP3w, GL->hookcount
1791 | cbz TMP3w, >1
1792 | tbz TMP2w, #LUA_HOOKLINE, <5
1793 |1:
1794 | mov CARG1, L
1795 | str BASE, L->base
1796 | mov CARG2, PC
1797 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
1798 | bl extern lj_dispatch_ins // (lua_State *L, const BCIns *pc)
1799 |3:
1800 | ldr BASE, L->base
1801 |4: // Re-dispatch to static ins.
1802 | ldr INSw, [PC, #-4]
1803 | add TMP1, GL, INS, uxtb #3
1804 | decode_RA RA, INS
1805 | ldr TMP0, [TMP1, #GG_G2DISP+GG_DISP2STATIC]
1806 | decode_RD RC, INS
1807 | br TMP0
1808 |
1809 |->cont_hook: // Continue from hook yield.
1810 | ldr CARG1, [CARG4, #-40]
1811 | add PC, PC, #4
1812 | str CARG1w, SAVE_MULTRES // Restore MULTRES for *M ins.
1813 | b <4
1814 |
1815 |->vm_hotloop: // Hot loop counter underflow.
1816 |.if JIT
1817 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Same as curr_topL(L).
1818 | add CARG1, GL, #GG_G2DISP+GG_DISP2J
1819 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
1820 | str PC, SAVE_PC
1821 | ldr CARG3, LFUNC:CARG3->pc
1822 | mov CARG2, PC
1823 | str L, [GL, #GL_J(L)]
1824 | ldrb CARG3w, [CARG3, #PC2PROTO(framesize)]
1825 | str BASE, L->base
1826 | add CARG3, BASE, CARG3, lsl #3
1827 | str CARG3, L->top
1828 | bl extern lj_trace_hot // (jit_State *J, const BCIns *pc)
1829 | b <3
1830 |.endif
1831 |
1832 |->vm_callhook: // Dispatch target for call hooks.
1833 | mov CARG2, PC
1834 |.if JIT
1835 | b >1
1836 |.endif
1837 |
1838 |->vm_hotcall: // Hot call counter underflow.
1839 |.if JIT
1840 | orr CARG2, PC, #1
1841 |1:
1842 |.endif
1843 | add TMP1, BASE, NARGS8:RC
1844 | str PC, SAVE_PC
1845 | mov CARG1, L
1846 | sub RA, RA, BASE
1847 | stp BASE, TMP1, L->base
1848 | bl extern lj_dispatch_call // (lua_State *L, const BCIns *pc)
1849 | // Returns ASMFunction.
1850 | ldp BASE, TMP1, L->base
1851 | str xzr, SAVE_PC // Invalidate for subsequent line hook.
1852 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
1853 | add RA, BASE, RA
1854 | sub NARGS8:RC, TMP1, BASE
1855 | ldr INSw, [PC, #-4]
1856 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
1857 | br CRET1
1858 |
1859 |->cont_stitch: // Trace stitching.
1860 |.if JIT
1861 | // RA = resultptr, CARG4 = meta base
1862 | ldr RBw, SAVE_MULTRES
1863 | ldr INSw, [PC, #-4]
1864 | ldr TRACE:CARG3, [CARG4, #-40] // Save previous trace.
1865 | subs RB, RB, #8
1866 | decode_RA RC, INS // Call base.
1867 | and CARG3, CARG3, #LJ_GCVMASK
1868 | beq >2
1869 |1: // Move results down.
1870 | ldr CARG1, [RA]
1871 | add RA, RA, #8
1872 | subs RB, RB, #8
1873 | str CARG1, [BASE, RC, lsl #3]
1874 | add RC, RC, #1
1875 | bne <1
1876 |2:
1877 | decode_RA RA, INS
1878 | decode_RB RB, INS
1879 | add RA, RA, RB
1880 |3:
1881 | cmp RA, RC
1882 | bhi >9 // More results wanted?
1883 |
1884 | ldrh RAw, TRACE:CARG3->traceno
1885 | ldrh RCw, TRACE:CARG3->link
1886 | cmp RCw, RAw
1887 | beq ->cont_nop // Blacklisted.
1888 | cmp RCw, #0
1889 | bne =>BC_JLOOP // Jump to stitched trace.
1890 |
1891 | // Stitch a new trace to the previous trace.
1892 | mov CARG1, #GL_J(exitno)
1893 | str RAw, [GL, CARG1]
1894 | mov CARG1, #GL_J(L)
1895 | str L, [GL, CARG1]
1896 | str BASE, L->base
1897 | add CARG1, GL, #GG_G2J
1898 | mov CARG2, PC
1899 | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
1900 | ldr BASE, L->base
1901 | b ->cont_nop
1902 |
1903 |9: // Fill up results with nil.
1904 | str TISNIL, [BASE, RC, lsl #3]
1905 | add RC, RC, #1
1906 | b <3
1907 |.endif
1908 |
1909 |->vm_profhook: // Dispatch target for profiler hook.
1910#if LJ_HASPROFILE
1911 | mov CARG1, L
1912 | str BASE, L->base
1913 | mov CARG2, PC
1914 | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
1915 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
1916 | ldr BASE, L->base
1917 | sub PC, PC, #4
1918 | b ->cont_nop
1919#endif
1920 |
1921 |//-----------------------------------------------------------------------
1922 |//-- Trace exit handler -------------------------------------------------
1923 |//-----------------------------------------------------------------------
1924 |
1925 |.macro savex_, a, b
1926 | stp d..a, d..b, [sp, #a*8]
1927 | stp x..a, x..b, [sp, #32*8+a*8]
1928 |.endmacro
1929 |
1930 |->vm_exit_handler:
1931 |.if JIT
1932 | sub sp, sp, #(64*8)
1933 | savex_, 0, 1
1934 | savex_, 2, 3
1935 | savex_, 4, 5
1936 | savex_, 6, 7
1937 | savex_, 8, 9
1938 | savex_, 10, 11
1939 | savex_, 12, 13
1940 | savex_, 14, 15
1941 | savex_, 16, 17
1942 | savex_, 18, 19
1943 | savex_, 20, 21
1944 | savex_, 22, 23
1945 | savex_, 24, 25
1946 | savex_, 26, 27
1947 | savex_, 28, 29
1948 | stp d30, d31, [sp, #30*8]
1949 | ldr CARG1, [sp, #64*8] // Load original value of lr.
1950 | add CARG3, sp, #64*8 // Recompute original value of sp.
1951 | mv_vmstate CARG4w, EXIT
1952 | stp xzr, CARG3, [sp, #62*8] // Store 0/sp in RID_LR/RID_SP.
1953 | sub CARG1, CARG1, lr
1954 | ldr L, GL->cur_L
1955 | lsr CARG1, CARG1, #2
1956 | ldr BASE, GL->jit_base
1957 | sub CARG1, CARG1, #2
1958 | ldr CARG2w, [lr] // Load trace number.
1959 | st_vmstate CARG4w
1960 |.if ENDIAN_BE
1961 | rev32 CARG2, CARG2
1962 |.endif
1963 | str BASE, L->base
1964 | ubfx CARG2w, CARG2w, #5, #16
1965 | str CARG1w, [GL, #GL_J(exitno)]
1966 | str CARG2w, [GL, #GL_J(parent)]
1967 | str L, [GL, #GL_J(L)]
1968 | str xzr, GL->jit_base
1969 | add CARG1, GL, #GG_G2J
1970 | mov CARG2, sp
1971 | bl extern lj_trace_exit // (jit_State *J, ExitState *ex)
1972 | // Returns MULTRES (unscaled) or negated error code.
1973 | ldr CARG2, L->cframe
1974 | ldr BASE, L->base
1975 | and sp, CARG2, #CFRAME_RAWMASK
1976 | ldr PC, SAVE_PC // Get SAVE_PC.
1977 | str L, SAVE_L // Set SAVE_L (on-trace resume/yield).
1978 | b >1
1979 |.endif
1980 |
1981 |->vm_exit_interp:
1982 | // CARG1 = MULTRES or negated error code, BASE, PC and GL set.
1983 |.if JIT
1984 | ldr L, SAVE_L
1985 |1:
1986 | cmp CARG1w, #0
1987 | blt >9 // Check for error from exit.
1988 | lsl RC, CARG1, #3
1989 | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
1990 | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
1991 | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
1992 | movn TISNIL, #0
1993 | and LFUNC:CARG2, CARG2, #LJ_GCVMASK
1994 | str RCw, SAVE_MULTRES
1995 | str BASE, L->base
1996 | ldr CARG2, LFUNC:CARG2->pc
1997 | str xzr, GL->jit_base
1998 | mv_vmstate CARG4w, INTERP
1999 | ldr KBASE, [CARG2, #PC2PROTO(k)]
2000 | // Modified copy of ins_next which handles function header dispatch, too.
2001 | ldrb RBw, [PC, # OFS_OP]
2002 | ldr INSw, [PC], #4
2003 | st_vmstate CARG4w
2004 | cmp RBw, #BC_FUNCC+2 // Fast function?
2005 | add TMP1, GL, INS, uxtb #3
2006 | bhs >4
2007 |2:
2008 | cmp RBw, #BC_FUNCF // Function header?
2009 | add TMP0, GL, RB, uxtb #3
2010 | ldr RB, [TMP0, #GG_G2DISP]
2011 | decode_RA RA, INS
2012 | lsr TMP0, INS, #16
2013 | csel RC, TMP0, RC, lo
2014 | blo >5
2015 | ldr CARG3, [BASE, FRAME_FUNC]
2016 | sub RC, RC, #8
2017 | add RA, BASE, RA, lsl #3 // Yes: RA = BASE+framesize*8, RC = nargs*8
2018 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
2019 |5:
2020 | br RB
2021 |
2022 |4: // Check frame below fast function.
2023 | ldr CARG1, [BASE, FRAME_PC]
2024 | ands CARG2, CARG1, #FRAME_TYPE
2025 | bne <2 // Trace stitching continuation?
2026 | // Otherwise set KBASE for Lua function below fast function.
2027 | ldr CARG3w, [CARG1, #-4]
2028 | decode_RA CARG1, CARG3
2029 | sub CARG2, BASE, CARG1, lsl #3
2030 | ldr LFUNC:CARG3, [CARG2, #-32]
2031 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
2032 | ldr CARG3, LFUNC:CARG3->pc
2033 | ldr KBASE, [CARG3, #PC2PROTO(k)]
2034 | b <2
2035 |
2036 |9: // Rethrow error from the right C frame.
2037 | neg CARG2, CARG1
2038 | mov CARG1, L
2039 | bl extern lj_err_throw // (lua_State *L, int errcode)
2040 |.endif
2041 |
2042 |//-----------------------------------------------------------------------
2043 |//-- Math helper functions ----------------------------------------------
2044 |//-----------------------------------------------------------------------
2045 |
2046 | // int lj_vm_modi(int dividend, int divisor);
2047 |->vm_modi:
2048 | eor CARG4w, CARG1w, CARG2w
2049 | cmp CARG4w, #0
2050 | eor CARG3w, CARG1w, CARG1w, asr #31
2051 | eor CARG4w, CARG2w, CARG2w, asr #31
2052 | sub CARG3w, CARG3w, CARG1w, asr #31
2053 | sub CARG4w, CARG4w, CARG2w, asr #31
2054 | udiv CARG1w, CARG3w, CARG4w
2055 | msub CARG1w, CARG1w, CARG4w, CARG3w
2056 | ccmp CARG1w, #0, #4, mi
2057 | sub CARG3w, CARG1w, CARG4w
2058 | csel CARG1w, CARG1w, CARG3w, eq
2059 | eor CARG3w, CARG1w, CARG2w
2060 | cmp CARG3w, #0
2061 | cneg CARG1w, CARG1w, mi
2062 | ret
2063 |
2064 |//-----------------------------------------------------------------------
2065 |//-- Miscellaneous functions --------------------------------------------
2066 |//-----------------------------------------------------------------------
2067 |
2068 |//-----------------------------------------------------------------------
2069 |//-- FFI helper functions -----------------------------------------------
2070 |//-----------------------------------------------------------------------
2071 |
2072 |// Handler for callback functions.
2073 |// Saveregs already performed. Callback slot number in [sp], g in r12.
2074 |->vm_ffi_callback:
2075 |.if FFI
2076 |.type CTSTATE, CTState, PC
2077 | saveregs
2078 | ldr CTSTATE, GL:x10->ctype_state
2079 | mov GL, x10
2080 | add x10, sp, # CFRAME_SPACE
2081 | str w9, CTSTATE->cb.slot
2082 | stp x0, x1, CTSTATE->cb.gpr[0]
2083 | stp d0, d1, CTSTATE->cb.fpr[0]
2084 | stp x2, x3, CTSTATE->cb.gpr[2]
2085 | stp d2, d3, CTSTATE->cb.fpr[2]
2086 | stp x4, x5, CTSTATE->cb.gpr[4]
2087 | stp d4, d5, CTSTATE->cb.fpr[4]
2088 | stp x6, x7, CTSTATE->cb.gpr[6]
2089 | stp d6, d7, CTSTATE->cb.fpr[6]
2090 | str x10, CTSTATE->cb.stack
2091 | mov CARG1, CTSTATE
2092 | str CTSTATE, SAVE_PC // Any value outside of bytecode is ok.
2093 | mov CARG2, sp
2094 | bl extern lj_ccallback_enter // (CTState *cts, void *cf)
2095 | // Returns lua_State *.
2096 | ldp BASE, RC, L:CRET1->base
2097 | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
2098 | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
2099 | movn TISNIL, #0
2100 | mov L, CRET1
2101 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
2102 | sub RC, RC, BASE
2103 | st_vmstate ST_INTERP
2104 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
2105 | ins_callt
2106 |.endif
2107 |
2108 |->cont_ffi_callback: // Return from FFI callback.
2109 |.if FFI
2110 | ldr CTSTATE, GL->ctype_state
2111 | stp BASE, CARG4, L->base
2112 | str L, CTSTATE->L
2113 | mov CARG1, CTSTATE
2114 | mov CARG2, RA
2115 | bl extern lj_ccallback_leave // (CTState *cts, TValue *o)
2116 | ldp x0, x1, CTSTATE->cb.gpr[0]
2117 | ldp d0, d1, CTSTATE->cb.fpr[0]
2118 | b ->vm_leave_unw
2119 |.endif
2120 |
2121 |->vm_ffi_call: // Call C function via FFI.
2122 | // Caveat: needs special frame unwinding, see below.
2123 |.if FFI
2124 | .type CCSTATE, CCallState, x19
2125 | stp fp, lr, [sp, #-32]!
2126 | add fp, sp, #0
2127 | str CCSTATE, [sp, #16]
2128 | mov CCSTATE, x0
2129 | ldr TMP0w, CCSTATE:x0->spadj
2130 | ldrb TMP1w, CCSTATE->nsp
2131 | add TMP2, CCSTATE, #offsetof(CCallState, stack)
2132 | subs TMP1, TMP1, #1
2133 | ldr TMP3, CCSTATE->func
2134 | sub sp, fp, TMP0
2135 | bmi >2
2136 |1: // Copy stack slots
2137 | ldr TMP0, [TMP2, TMP1, lsl #3]
2138 | str TMP0, [sp, TMP1, lsl #3]
2139 | subs TMP1, TMP1, #1
2140 | bpl <1
2141 |2:
2142 | ldp x0, x1, CCSTATE->gpr[0]
2143 | ldp d0, d1, CCSTATE->fpr[0]
2144 | ldp x2, x3, CCSTATE->gpr[2]
2145 | ldp d2, d3, CCSTATE->fpr[2]
2146 | ldp x4, x5, CCSTATE->gpr[4]
2147 | ldp d4, d5, CCSTATE->fpr[4]
2148 | ldp x6, x7, CCSTATE->gpr[6]
2149 | ldp d6, d7, CCSTATE->fpr[6]
2150 | ldr x8, CCSTATE->retp
2151 | blr TMP3
2152 | mov sp, fp
2153 | stp x0, x1, CCSTATE->gpr[0]
2154 | stp d0, d1, CCSTATE->fpr[0]
2155 | stp d2, d3, CCSTATE->fpr[2]
2156 | ldr CCSTATE, [sp, #16]
2157 | ldp fp, lr, [sp], #32
2158 | ret
2159 |.endif
2160 |// Note: vm_ffi_call must be the last function in this object file!
2161 |
2162 |//-----------------------------------------------------------------------
2163}
2164
2165/* Generate the code for a single instruction. */
2166static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2167{
2168 int vk = 0;
2169 |=>defop:
2170
2171 switch (op) {
2172
2173 /* -- Comparison ops ---------------------------------------------------- */
2174
2175 /* Remember: all ops branch for a true comparison, fall through otherwise. */
2176
2177 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
2178 | // RA = src1, RC = src2, JMP with RC = target
2179 | ldr CARG1, [BASE, RA, lsl #3]
2180 | ldrh RBw, [PC, # OFS_RD]
2181 | ldr CARG2, [BASE, RC, lsl #3]
2182 | add PC, PC, #4
2183 | add RB, PC, RB, lsl #2
2184 | sub RB, RB, #0x20000
2185 | checkint CARG1, >3
2186 | checkint CARG2, >4
2187 | cmp CARG1w, CARG2w
2188 if (op == BC_ISLT) {
2189 | csel PC, RB, PC, lt
2190 } else if (op == BC_ISGE) {
2191 | csel PC, RB, PC, ge
2192 } else if (op == BC_ISLE) {
2193 | csel PC, RB, PC, le
2194 } else {
2195 | csel PC, RB, PC, gt
2196 }
2197 |1:
2198 | ins_next
2199 |
2200 |3: // RA not int.
2201 | ldr FARG1, [BASE, RA, lsl #3]
2202 | blo ->vmeta_comp
2203 | ldr FARG2, [BASE, RC, lsl #3]
2204 | cmp TISNUMhi, CARG2, lsr #32
2205 | bhi >5
2206 | bne ->vmeta_comp
2207 | // RA number, RC int.
2208 | scvtf FARG2, CARG2w
2209 | b >5
2210 |
2211 |4: // RA int, RC not int
2212 | ldr FARG2, [BASE, RC, lsl #3]
2213 | blo ->vmeta_comp
2214 | // RA int, RC number.
2215 | scvtf FARG1, CARG1w
2216 |
2217 |5: // RA number, RC number
2218 | fcmp FARG1, FARG2
2219 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
2220 if (op == BC_ISLT) {
2221 | csel PC, RB, PC, lo
2222 } else if (op == BC_ISGE) {
2223 | csel PC, RB, PC, hs
2224 } else if (op == BC_ISLE) {
2225 | csel PC, RB, PC, ls
2226 } else {
2227 | csel PC, RB, PC, hi
2228 }
2229 | b <1
2230 break;
2231
2232 case BC_ISEQV: case BC_ISNEV:
2233 vk = op == BC_ISEQV;
2234 | // RA = src1, RC = src2, JMP with RC = target
2235 | ldr CARG1, [BASE, RA, lsl #3]
2236 | add RC, BASE, RC, lsl #3
2237 | ldrh RBw, [PC, # OFS_RD]
2238 | ldr CARG3, [RC]
2239 | add PC, PC, #4
2240 | add RB, PC, RB, lsl #2
2241 | sub RB, RB, #0x20000
2242 | asr ITYPE, CARG3, #47
2243 | cmn ITYPE, #-LJ_TISNUM
2244 if (vk) {
2245 | bls ->BC_ISEQN_Z
2246 } else {
2247 | bls ->BC_ISNEN_Z
2248 }
2249 | // RC is not a number.
2250 | asr TMP0, CARG1, #47
2251 |.if FFI
2252 | // Check if RC or RA is a cdata.
2253 | cmn ITYPE, #-LJ_TCDATA
2254 | ccmn TMP0, #-LJ_TCDATA, #4, ne
2255 | beq ->vmeta_equal_cd
2256 |.endif
2257 | cmp CARG1, CARG3
2258 | bne >2
2259 | // Tag and value are equal.
2260 if (vk) {
2261 |->BC_ISEQV_Z:
2262 | mov PC, RB // Perform branch.
2263 }
2264 |1:
2265 | ins_next
2266 |
2267 |2: // Check if the tags are the same and it's a table or userdata.
2268 | cmp ITYPE, TMP0
2269 | ccmn ITYPE, #-LJ_TISTABUD, #2, eq
2270 if (vk) {
2271 | bhi <1
2272 } else {
2273 | bhi ->BC_ISEQV_Z // Reuse code from opposite instruction.
2274 }
2275 | // Different tables or userdatas. Need to check __eq metamethod.
2276 | // Field metatable must be at same offset for GCtab and GCudata!
2277 | and TAB:CARG2, CARG1, #LJ_GCVMASK
2278 | ldr TAB:TMP2, TAB:CARG2->metatable
2279 if (vk) {
2280 | cbz TAB:TMP2, <1 // No metatable?
2281 | ldrb TMP1w, TAB:TMP2->nomm
2282 | mov CARG4, #0 // ne = 0
2283 | tbnz TMP1w, #MM_eq, <1 // 'no __eq' flag set: done.
2284 } else {
2285 | cbz TAB:TMP2, ->BC_ISEQV_Z // No metatable?
2286 | ldrb TMP1w, TAB:TMP2->nomm
2287 | mov CARG4, #1 // ne = 1.
2288 | tbnz TMP1w, #MM_eq, ->BC_ISEQV_Z // 'no __eq' flag set: done.
2289 }
2290 | b ->vmeta_equal
2291 break;
2292
2293 case BC_ISEQS: case BC_ISNES:
2294 vk = op == BC_ISEQS;
2295 | // RA = src, RC = str_const (~), JMP with RC = target
2296 | ldr CARG1, [BASE, RA, lsl #3]
2297 | mvn RC, RC
2298 | ldrh RBw, [PC, # OFS_RD]
2299 | ldr CARG2, [KBASE, RC, lsl #3]
2300 | add PC, PC, #4
2301 | movn TMP0, #~LJ_TSTR
2302 |.if FFI
2303 | asr ITYPE, CARG1, #47
2304 |.endif
2305 | add RB, PC, RB, lsl #2
2306 | add CARG2, CARG2, TMP0, lsl #47
2307 | sub RB, RB, #0x20000
2308 |.if FFI
2309 | cmn ITYPE, #-LJ_TCDATA
2310 | beq ->vmeta_equal_cd
2311 |.endif
2312 | cmp CARG1, CARG2
2313 if (vk) {
2314 | csel PC, RB, PC, eq
2315 } else {
2316 | csel PC, RB, PC, ne
2317 }
2318 | ins_next
2319 break;
2320
2321 case BC_ISEQN: case BC_ISNEN:
2322 vk = op == BC_ISEQN;
2323 | // RA = src, RC = num_const (~), JMP with RC = target
2324 | ldr CARG1, [BASE, RA, lsl #3]
2325 | add RC, KBASE, RC, lsl #3
2326 | ldrh RBw, [PC, # OFS_RD]
2327 | ldr CARG3, [RC]
2328 | add PC, PC, #4
2329 | add RB, PC, RB, lsl #2
2330 | sub RB, RB, #0x20000
2331 if (vk) {
2332 |->BC_ISEQN_Z:
2333 } else {
2334 |->BC_ISNEN_Z:
2335 }
2336 | checkint CARG1, >4
2337 | checkint CARG3, >6
2338 | cmp CARG1w, CARG3w
2339 |1:
2340 if (vk) {
2341 | csel PC, RB, PC, eq
2342 |2:
2343 } else {
2344 |2:
2345 | csel PC, RB, PC, ne
2346 }
2347 |3:
2348 | ins_next
2349 |
2350 |4: // RA not int.
2351 |.if FFI
2352 | blo >7
2353 |.else
2354 | blo <2
2355 |.endif
2356 | ldr FARG1, [BASE, RA, lsl #3]
2357 | ldr FARG2, [RC]
2358 | cmp TISNUMhi, CARG3, lsr #32
2359 | bne >5
2360 | // RA number, RC int.
2361 | scvtf FARG2, CARG3w
2362 |5:
2363 | // RA number, RC number.
2364 | fcmp FARG1, FARG2
2365 | b <1
2366 |
2367 |6: // RA int, RC number
2368 | ldr FARG2, [RC]
2369 | scvtf FARG1, CARG1w
2370 | fcmp FARG1, FARG2
2371 | b <1
2372 |
2373 |.if FFI
2374 |7:
2375 | asr ITYPE, CARG1, #47
2376 | cmn ITYPE, #-LJ_TCDATA
2377 | bne <2
2378 | b ->vmeta_equal_cd
2379 |.endif
2380 break;
2381
2382 case BC_ISEQP: case BC_ISNEP:
2383 vk = op == BC_ISEQP;
2384 | // RA = src, RC = primitive_type (~), JMP with RC = target
2385 | ldr TMP0, [BASE, RA, lsl #3]
2386 | ldrh RBw, [PC, # OFS_RD]
2387 | add PC, PC, #4
2388 | add RC, RC, #1
2389 | add RB, PC, RB, lsl #2
2390 |.if FFI
2391 | asr ITYPE, TMP0, #47
2392 | cmn ITYPE, #-LJ_TCDATA
2393 | beq ->vmeta_equal_cd
2394 | cmn RC, ITYPE
2395 |.else
2396 | cmn RC, TMP0, asr #47
2397 |.endif
2398 | sub RB, RB, #0x20000
2399 if (vk) {
2400 | csel PC, RB, PC, eq
2401 } else {
2402 | csel PC, RB, PC, ne
2403 }
2404 | ins_next
2405 break;
2406
2407 /* -- Unary test and copy ops ------------------------------------------- */
2408
2409 case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
2410 | // RA = dst or unused, RC = src, JMP with RC = target
2411 | ldrh RBw, [PC, # OFS_RD]
2412 | ldr TMP0, [BASE, RC, lsl #3]
2413 | add PC, PC, #4
2414 | mov_false TMP1
2415 | add RB, PC, RB, lsl #2
2416 | cmp TMP0, TMP1
2417 | sub RB, RB, #0x20000
2418 if (op == BC_ISTC || op == BC_IST) {
2419 if (op == BC_ISTC) {
2420 | csel RA, RA, RC, lo
2421 }
2422 | csel PC, RB, PC, lo
2423 } else {
2424 if (op == BC_ISFC) {
2425 | csel RA, RA, RC, hs
2426 }
2427 | csel PC, RB, PC, hs
2428 }
2429 if (op == BC_ISTC || op == BC_ISFC) {
2430 | str TMP0, [BASE, RA, lsl #3]
2431 }
2432 | ins_next
2433 break;
2434
2435 case BC_ISTYPE:
2436 | // RA = src, RC = -type
2437 | ldr TMP0, [BASE, RA, lsl #3]
2438 | cmn RC, TMP0, asr #47
2439 | bne ->vmeta_istype
2440 | ins_next
2441 break;
2442 case BC_ISNUM:
2443 | // RA = src, RC = -(TISNUM-1)
2444 | ldr TMP0, [BASE, RA]
2445 | checknum TMP0, ->vmeta_istype
2446 | ins_next
2447 break;
2448
2449 /* -- Unary ops --------------------------------------------------------- */
2450
2451 case BC_MOV:
2452 | // RA = dst, RC = src
2453 | ldr TMP0, [BASE, RC, lsl #3]
2454 | str TMP0, [BASE, RA, lsl #3]
2455 | ins_next
2456 break;
2457 case BC_NOT:
2458 | // RA = dst, RC = src
2459 | ldr TMP0, [BASE, RC, lsl #3]
2460 | mov_false TMP1
2461 | mov_true TMP2
2462 | cmp TMP0, TMP1
2463 | csel TMP0, TMP1, TMP2, lo
2464 | str TMP0, [BASE, RA, lsl #3]
2465 | ins_next
2466 break;
2467 case BC_UNM:
2468 | // RA = dst, RC = src
2469 | ldr TMP0, [BASE, RC, lsl #3]
2470 | asr ITYPE, TMP0, #47
2471 | cmn ITYPE, #-LJ_TISNUM
2472 | bhi ->vmeta_unm
2473 | eor TMP0, TMP0, #U64x(80000000,00000000)
2474 | bne >5
2475 | negs TMP0w, TMP0w
2476 | movz CARG3, #0x41e0, lsl #48 // 2^31.
2477 | add TMP0, TMP0, TISNUM
2478 | csel TMP0, TMP0, CARG3, vc
2479 |5:
2480 | str TMP0, [BASE, RA, lsl #3]
2481 | ins_next
2482 break;
2483 case BC_LEN:
2484 | // RA = dst, RC = src
2485 | ldr CARG1, [BASE, RC, lsl #3]
2486 | asr ITYPE, CARG1, #47
2487 | cmn ITYPE, #-LJ_TSTR
2488 | and CARG1, CARG1, #LJ_GCVMASK
2489 | bne >2
2490 | ldr CARG1w, STR:CARG1->len
2491 |1:
2492 | add CARG1, CARG1, TISNUM
2493 | str CARG1, [BASE, RA, lsl #3]
2494 | ins_next
2495 |
2496 |2:
2497 | cmn ITYPE, #-LJ_TTAB
2498 | bne ->vmeta_len
2499#if LJ_52
2500 | ldr TAB:CARG2, TAB:CARG1->metatable
2501 | cbnz TAB:CARG2, >9
2502 |3:
2503#endif
2504 |->BC_LEN_Z:
2505 | bl extern lj_tab_len // (GCtab *t)
2506 | // Returns uint32_t (but less than 2^31).
2507 | b <1
2508 |
2509#if LJ_52
2510 |9:
2511 | ldrb TMP1w, TAB:CARG2->nomm
2512 | tbnz TMP1w, #MM_len, <3 // 'no __len' flag set: done.
2513 | b ->vmeta_len
2514#endif
2515 break;
2516
2517 /* -- Binary ops -------------------------------------------------------- */
2518
2519 |.macro ins_arithcheck_int, target
2520 | checkint CARG1, target
2521 | checkint CARG2, target
2522 |.endmacro
2523 |
2524 |.macro ins_arithcheck_num, target
2525 | checknum CARG1, target
2526 | checknum CARG2, target
2527 |.endmacro
2528 |
2529 |.macro ins_arithcheck_nzdiv, target
2530 | cbz CARG2w, target
2531 |.endmacro
2532 |
2533 |.macro ins_arithhead
2534 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
2535 ||if (vk == 1) {
2536 | and RC, RC, #255
2537 | decode_RB RB, INS
2538 ||} else {
2539 | decode_RB RB, INS
2540 | and RC, RC, #255
2541 ||}
2542 |.endmacro
2543 |
2544 |.macro ins_arithload, reg1, reg2
2545 | // RA = dst, RB = src1, RC = src2 | num_const
2546 ||switch (vk) {
2547 ||case 0:
2548 | ldr reg1, [BASE, RB, lsl #3]
2549 | ldr reg2, [KBASE, RC, lsl #3]
2550 || break;
2551 ||case 1:
2552 | ldr reg1, [KBASE, RC, lsl #3]
2553 | ldr reg2, [BASE, RB, lsl #3]
2554 || break;
2555 ||default:
2556 | ldr reg1, [BASE, RB, lsl #3]
2557 | ldr reg2, [BASE, RC, lsl #3]
2558 || break;
2559 ||}
2560 |.endmacro
2561 |
2562 |.macro ins_arithfallback, ins
2563 ||switch (vk) {
2564 ||case 0:
2565 | ins ->vmeta_arith_vn
2566 || break;
2567 ||case 1:
2568 | ins ->vmeta_arith_nv
2569 || break;
2570 ||default:
2571 | ins ->vmeta_arith_vv
2572 || break;
2573 ||}
2574 |.endmacro
2575 |
2576 |.macro ins_arithmod, res, reg1, reg2
2577 | fdiv d2, reg1, reg2
2578 | frintm d2, d2
2579 | fmsub res, d2, reg2, reg1
2580 |.endmacro
2581 |
2582 |.macro ins_arithdn, intins, fpins
2583 | ins_arithhead
2584 | ins_arithload CARG1, CARG2
2585 | ins_arithcheck_int >5
2586 |.if "intins" == "smull"
2587 | smull CARG1, CARG1w, CARG2w
2588 | cmp CARG1, CARG1, sxtw
2589 | mov CARG1w, CARG1w
2590 | ins_arithfallback bne
2591 |.elif "intins" == "ins_arithmodi"
2592 | ins_arithfallback ins_arithcheck_nzdiv
2593 | bl ->vm_modi
2594 |.else
2595 | intins CARG1w, CARG1w, CARG2w
2596 | ins_arithfallback bvs
2597 |.endif
2598 | add CARG1, CARG1, TISNUM
2599 | str CARG1, [BASE, RA, lsl #3]
2600 |4:
2601 | ins_next
2602 |
2603 |5: // FP variant.
2604 | ins_arithload FARG1, FARG2
2605 | ins_arithfallback ins_arithcheck_num
2606 | fpins FARG1, FARG1, FARG2
2607 | str FARG1, [BASE, RA, lsl #3]
2608 | b <4
2609 |.endmacro
2610 |
2611 |.macro ins_arithfp, fpins
2612 | ins_arithhead
2613 | ins_arithload CARG1, CARG2
2614 | ins_arithload FARG1, FARG2
2615 | ins_arithfallback ins_arithcheck_num
2616 |.if "fpins" == "fpow"
2617 | bl extern pow
2618 |.else
2619 | fpins FARG1, FARG1, FARG2
2620 |.endif
2621 | str FARG1, [BASE, RA, lsl #3]
2622 | ins_next
2623 |.endmacro
2624
2625 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
2626 | ins_arithdn adds, fadd
2627 break;
2628 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
2629 | ins_arithdn subs, fsub
2630 break;
2631 case BC_MULVN: case BC_MULNV: case BC_MULVV:
2632 | ins_arithdn smull, fmul
2633 break;
2634 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
2635 | ins_arithfp fdiv
2636 break;
2637 case BC_MODVN: case BC_MODNV: case BC_MODVV:
2638 | ins_arithdn ins_arithmodi, ins_arithmod
2639 break;
2640 case BC_POW:
2641 | // NYI: (partial) integer arithmetic.
2642 | ins_arithfp fpow
2643 break;
2644
2645 case BC_CAT:
2646 | decode_RB RB, INS
2647 | and RC, RC, #255
2648 | // RA = dst, RB = src_start, RC = src_end
2649 | str BASE, L->base
2650 | sub CARG3, RC, RB
2651 | add CARG2, BASE, RC, lsl #3
2652 |->BC_CAT_Z:
2653 | // RA = dst, CARG2 = top-1, CARG3 = left
2654 | mov CARG1, L
2655 | str PC, SAVE_PC
2656 | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left)
2657 | // Returns NULL (finished) or TValue * (metamethod).
2658 | ldrb RBw, [PC, #-4+OFS_RB]
2659 | ldr BASE, L->base
2660 | cbnz CRET1, ->vmeta_binop
2661 | ldr TMP0, [BASE, RB, lsl #3]
2662 | str TMP0, [BASE, RA, lsl #3] // Copy result to RA.
2663 | ins_next
2664 break;
2665
2666 /* -- Constant ops ------------------------------------------------------ */
2667
2668 case BC_KSTR:
2669 | // RA = dst, RC = str_const (~)
2670 | mvn RC, RC
2671 | ldr TMP0, [KBASE, RC, lsl #3]
2672 | movn TMP1, #~LJ_TSTR
2673 | add TMP0, TMP0, TMP1, lsl #47
2674 | str TMP0, [BASE, RA, lsl #3]
2675 | ins_next
2676 break;
2677 case BC_KCDATA:
2678 |.if FFI
2679 | // RA = dst, RC = cdata_const (~)
2680 | mvn RC, RC
2681 | ldr TMP0, [KBASE, RC, lsl #3]
2682 | movn TMP1, #~LJ_TCDATA
2683 | add TMP0, TMP0, TMP1, lsl #47
2684 | str TMP0, [BASE, RA, lsl #3]
2685 | ins_next
2686 |.endif
2687 break;
2688 case BC_KSHORT:
2689 | // RA = dst, RC = int16_literal
2690 | sxth RCw, RCw
2691 | add TMP0, RC, TISNUM
2692 | str TMP0, [BASE, RA, lsl #3]
2693 | ins_next
2694 break;
2695 case BC_KNUM:
2696 | // RA = dst, RC = num_const
2697 | ldr TMP0, [KBASE, RC, lsl #3]
2698 | str TMP0, [BASE, RA, lsl #3]
2699 | ins_next
2700 break;
2701 case BC_KPRI:
2702 | // RA = dst, RC = primitive_type (~)
2703 | mvn TMP0, RC, lsl #47
2704 | str TMP0, [BASE, RA, lsl #3]
2705 | ins_next
2706 break;
2707 case BC_KNIL:
2708 | // RA = base, RC = end
2709 | add RA, BASE, RA, lsl #3
2710 | add RC, BASE, RC, lsl #3
2711 | str TISNIL, [RA], #8
2712 |1:
2713 | cmp RA, RC
2714 | str TISNIL, [RA], #8
2715 | blt <1
2716 | ins_next_
2717 break;
2718
2719 /* -- Upvalue and function ops ------------------------------------------ */
2720
2721 case BC_UGET:
2722 | // RA = dst, RC = uvnum
2723 | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
2724 | add RC, RC, #offsetof(GCfuncL, uvptr)/8
2725 | and LFUNC:CARG2, CARG2, #LJ_GCVMASK
2726 | ldr UPVAL:CARG2, [LFUNC:CARG2, RC, lsl #3]
2727 | ldr CARG2, UPVAL:CARG2->v
2728 | ldr TMP0, [CARG2]
2729 | str TMP0, [BASE, RA, lsl #3]
2730 | ins_next
2731 break;
2732 case BC_USETV:
2733 | // RA = uvnum, RC = src
2734 | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
2735 | add RA, RA, #offsetof(GCfuncL, uvptr)/8
2736 | and LFUNC:CARG2, CARG2, #LJ_GCVMASK
2737 | ldr UPVAL:CARG1, [LFUNC:CARG2, RA, lsl #3]
2738 | ldr CARG3, [BASE, RC, lsl #3]
2739 | ldr CARG2, UPVAL:CARG1->v
2740 | ldrb TMP2w, UPVAL:CARG1->marked
2741 | ldrb TMP0w, UPVAL:CARG1->closed
2742 | asr ITYPE, CARG3, #47
2743 | str CARG3, [CARG2]
2744 | add ITYPE, ITYPE, #-LJ_TISGCV
2745 | tst TMP2w, #LJ_GC_BLACK // isblack(uv)
2746 | ccmp TMP0w, #0, #4, ne // && uv->closed
2747 | ccmn ITYPE, #-(LJ_TNUMX - LJ_TISGCV), #0, ne // && tvisgcv(v)
2748 | bhi >2
2749 |1:
2750 | ins_next
2751 |
2752 |2: // Check if new value is white.
2753 | and GCOBJ:CARG3, CARG3, #LJ_GCVMASK
2754 | ldrb TMP1w, GCOBJ:CARG3->gch.marked
2755 | tst TMP1w, #LJ_GC_WHITES // iswhite(str)
2756 | beq <1
2757 | // Crossed a write barrier. Move the barrier forward.
2758 | mov CARG1, GL
2759 | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv)
2760 | b <1
2761 break;
2762 case BC_USETS:
2763 | // RA = uvnum, RC = str_const (~)
2764 | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
2765 | add RA, RA, #offsetof(GCfuncL, uvptr)/8
2766 | mvn RC, RC
2767 | and LFUNC:CARG2, CARG2, #LJ_GCVMASK
2768 | ldr UPVAL:CARG1, [LFUNC:CARG2, RA, lsl #3]
2769 | ldr STR:CARG3, [KBASE, RC, lsl #3]
2770 | movn TMP0, #~LJ_TSTR
2771 | ldr CARG2, UPVAL:CARG1->v
2772 | ldrb TMP2w, UPVAL:CARG1->marked
2773 | add TMP0, STR:CARG3, TMP0, lsl #47
2774 | ldrb TMP1w, STR:CARG3->marked
2775 | str TMP0, [CARG2]
2776 | tbnz TMP2w, #2, >2 // isblack(uv)
2777 |1:
2778 | ins_next
2779 |
2780 |2: // Check if string is white and ensure upvalue is closed.
2781 | ldrb TMP0w, UPVAL:CARG1->closed
2782 | tst TMP1w, #LJ_GC_WHITES // iswhite(str)
2783 | ccmp TMP0w, #0, #4, ne
2784 | beq <1
2785 | // Crossed a write barrier. Move the barrier forward.
2786 | mov CARG1, GL
2787 | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv)
2788 | b <1
2789 break;
2790 case BC_USETN:
2791 | // RA = uvnum, RC = num_const
2792 | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
2793 | add RA, RA, #offsetof(GCfuncL, uvptr)/8
2794 | and LFUNC:CARG2, CARG2, #LJ_GCVMASK
2795 | ldr UPVAL:CARG2, [LFUNC:CARG2, RA, lsl #3]
2796 | ldr TMP0, [KBASE, RC, lsl #3]
2797 | ldr CARG2, UPVAL:CARG2->v
2798 | str TMP0, [CARG2]
2799 | ins_next
2800 break;
2801 case BC_USETP:
2802 | // RA = uvnum, RC = primitive_type (~)
2803 | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
2804 | add RA, RA, #offsetof(GCfuncL, uvptr)/8
2805 | and LFUNC:CARG2, CARG2, #LJ_GCVMASK
2806 | ldr UPVAL:CARG2, [LFUNC:CARG2, RA, lsl #3]
2807 | mvn TMP0, RC, lsl #47
2808 | ldr CARG2, UPVAL:CARG2->v
2809 | str TMP0, [CARG2]
2810 | ins_next
2811 break;
2812
2813 case BC_UCLO:
2814 | // RA = level, RC = target
2815 | ldr CARG3, L->openupval
2816 | add RC, PC, RC, lsl #2
2817 | str BASE, L->base
2818 | sub PC, RC, #0x20000
2819 | cbz CARG3, >1
2820 | mov CARG1, L
2821 | add CARG2, BASE, RA, lsl #3
2822 | bl extern lj_func_closeuv // (lua_State *L, TValue *level)
2823 | ldr BASE, L->base
2824 |1:
2825 | ins_next
2826 break;
2827
2828 case BC_FNEW:
2829 | // RA = dst, RC = proto_const (~) (holding function prototype)
2830 | mvn RC, RC
2831 | str BASE, L->base
2832 | ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
2833 | str PC, SAVE_PC
2834 | ldr CARG2, [KBASE, RC, lsl #3]
2835 | mov CARG1, L
2836 | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
2837 | // (lua_State *L, GCproto *pt, GCfuncL *parent)
2838 | bl extern lj_func_newL_gc
2839 | // Returns GCfuncL *.
2840 | ldr BASE, L->base
2841 | movn TMP0, #~LJ_TFUNC
2842 | add CRET1, CRET1, TMP0, lsl #47
2843 | str CRET1, [BASE, RA, lsl #3]
2844 | ins_next
2845 break;
2846
2847 /* -- Table ops --------------------------------------------------------- */
2848
2849 case BC_TNEW:
2850 case BC_TDUP:
2851 | // RA = dst, RC = (hbits|asize) | tab_const (~)
2852 | ldp CARG3, CARG4, GL->gc.total // Assumes threshold follows total.
2853 | str BASE, L->base
2854 | str PC, SAVE_PC
2855 | mov CARG1, L
2856 | cmp CARG3, CARG4
2857 | bhs >5
2858 |1:
2859 if (op == BC_TNEW) {
2860 | and CARG2, RC, #0x7ff
2861 | lsr CARG3, RC, #11
2862 | cmp CARG2, #0x7ff
2863 | mov TMP0, #0x801
2864 | csel CARG2, CARG2, TMP0, ne
2865 | bl extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits)
2866 | // Returns GCtab *.
2867 } else {
2868 | mvn RC, RC
2869 | ldr CARG2, [KBASE, RC, lsl #3]
2870 | bl extern lj_tab_dup // (lua_State *L, Table *kt)
2871 | // Returns GCtab *.
2872 }
2873 | ldr BASE, L->base
2874 | movk CRET1, #(LJ_TTAB>>1)&0xffff, lsl #48
2875 | str CRET1, [BASE, RA, lsl #3]
2876 | ins_next
2877 |
2878 |5:
2879 | bl extern lj_gc_step_fixtop // (lua_State *L)
2880 | mov CARG1, L
2881 | b <1
2882 break;
2883
2884 case BC_GGET:
2885 | // RA = dst, RC = str_const (~)
2886 case BC_GSET:
2887 | // RA = dst, RC = str_const (~)
2888 | ldr LFUNC:CARG1, [BASE, FRAME_FUNC]
2889 | mvn RC, RC
2890 | and LFUNC:CARG1, CARG1, #LJ_GCVMASK
2891 | ldr TAB:CARG2, LFUNC:CARG1->env
2892 | ldr STR:RC, [KBASE, RC, lsl #3]
2893 if (op == BC_GGET) {
2894 | b ->BC_TGETS_Z
2895 } else {
2896 | b ->BC_TSETS_Z
2897 }
2898 break;
2899
2900 case BC_TGETV:
2901 | decode_RB RB, INS
2902 | and RC, RC, #255
2903 | // RA = dst, RB = table, RC = key
2904 | ldr CARG2, [BASE, RB, lsl #3]
2905 | ldr TMP1, [BASE, RC, lsl #3]
2906 | checktab CARG2, ->vmeta_tgetv
2907 | checkint TMP1, >9 // Integer key?
2908 | ldr CARG3, TAB:CARG2->array
2909 | ldr CARG1w, TAB:CARG2->asize
2910 | add CARG3, CARG3, TMP1, uxtw #3
2911 | cmp TMP1w, CARG1w // In array part?
2912 | bhs ->vmeta_tgetv
2913 | ldr TMP0, [CARG3]
2914 | cmp TMP0, TISNIL
2915 | beq >5
2916 |1:
2917 | str TMP0, [BASE, RA, lsl #3]
2918 | ins_next
2919 |
2920 |5: // Check for __index if table value is nil.
2921 | ldr TAB:CARG1, TAB:CARG2->metatable
2922 | cbz TAB:CARG1, <1 // No metatable: done.
2923 | ldrb TMP1w, TAB:CARG1->nomm
2924 | tbnz TMP1w, #MM_index, <1 // 'no __index' flag set: done.
2925 | b ->vmeta_tgetv
2926 |
2927 |9:
2928 | asr ITYPE, TMP1, #47
2929 | cmn ITYPE, #-LJ_TSTR // String key?
2930 | bne ->vmeta_tgetv
2931 | and STR:RC, TMP1, #LJ_GCVMASK
2932 | b ->BC_TGETS_Z
2933 break;
2934 case BC_TGETS:
2935 | decode_RB RB, INS
2936 | and RC, RC, #255
2937 | // RA = dst, RB = table, RC = str_const (~)
2938 | ldr CARG2, [BASE, RB, lsl #3]
2939 | mvn RC, RC
2940 | ldr STR:RC, [KBASE, RC, lsl #3]
2941 | checktab CARG2, ->vmeta_tgets1
2942 |->BC_TGETS_Z:
2943 | // TAB:CARG2 = GCtab *, STR:RC = GCstr *, RA = dst
2944 | ldr TMP1w, TAB:CARG2->hmask
2945 | ldr TMP2w, STR:RC->hash
2946 | ldr NODE:CARG3, TAB:CARG2->node
2947 | and TMP1w, TMP1w, TMP2w // idx = str->hash & tab->hmask
2948 | add TMP1, TMP1, TMP1, lsl #1
2949 | movn CARG4, #~LJ_TSTR
2950 | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8
2951 | add CARG4, STR:RC, CARG4, lsl #47 // Tagged key to look for.
2952 |1:
2953 | ldp TMP0, CARG1, NODE:CARG3->val
2954 | ldr NODE:CARG3, NODE:CARG3->next
2955 | cmp CARG1, CARG4
2956 | bne >4
2957 | cmp TMP0, TISNIL
2958 | beq >5
2959 |3:
2960 | str TMP0, [BASE, RA, lsl #3]
2961 | ins_next
2962 |
2963 |4: // Follow hash chain.
2964 | cbnz NODE:CARG3, <1
2965 | // End of hash chain: key not found, nil result.
2966 | mov TMP0, TISNIL
2967 |
2968 |5: // Check for __index if table value is nil.
2969 | ldr TAB:CARG1, TAB:CARG2->metatable
2970 | cbz TAB:CARG1, <3 // No metatable: done.
2971 | ldrb TMP1w, TAB:CARG1->nomm
2972 | tbnz TMP1w, #MM_index, <3 // 'no __index' flag set: done.
2973 | b ->vmeta_tgets
2974 break;
2975 case BC_TGETB:
2976 | decode_RB RB, INS
2977 | and RC, RC, #255
2978 | // RA = dst, RB = table, RC = index
2979 | ldr CARG2, [BASE, RB, lsl #3]
2980 | checktab CARG2, ->vmeta_tgetb
2981 | ldr CARG3, TAB:CARG2->array
2982 | ldr CARG1w, TAB:CARG2->asize
2983 | add CARG3, CARG3, RC, lsl #3
2984 | cmp RCw, CARG1w // In array part?
2985 | bhs ->vmeta_tgetb
2986 | ldr TMP0, [CARG3]
2987 | cmp TMP0, TISNIL
2988 | beq >5
2989 |1:
2990 | str TMP0, [BASE, RA, lsl #3]
2991 | ins_next
2992 |
2993 |5: // Check for __index if table value is nil.
2994 | ldr TAB:CARG1, TAB:CARG2->metatable
2995 | cbz TAB:CARG1, <1 // No metatable: done.
2996 | ldrb TMP1w, TAB:CARG1->nomm
2997 | tbnz TMP1w, #MM_index, <1 // 'no __index' flag set: done.
2998 | b ->vmeta_tgetb
2999 break;
3000 case BC_TGETR:
3001 | decode_RB RB, INS
3002 | and RC, RC, #255
3003 | // RA = dst, RB = table, RC = key
3004 | ldr CARG1, [BASE, RB, lsl #3]
3005 | ldr TMP1, [BASE, RC, lsl #3]
3006 | and TAB:CARG1, CARG1, #LJ_GCVMASK
3007 | ldr CARG3, TAB:CARG1->array
3008 | ldr TMP2w, TAB:CARG1->asize
3009 | add CARG3, CARG3, TMP1w, uxtw #3
3010 | cmp TMP1w, TMP2w // In array part?
3011 | bhs ->vmeta_tgetr
3012 | ldr TMP0, [CARG3]
3013 |->BC_TGETR_Z:
3014 | str TMP0, [BASE, RA, lsl #3]
3015 | ins_next
3016 break;
3017
3018 case BC_TSETV:
3019 | decode_RB RB, INS
3020 | and RC, RC, #255
3021 | // RA = src, RB = table, RC = key
3022 | ldr CARG2, [BASE, RB, lsl #3]
3023 | ldr TMP1, [BASE, RC, lsl #3]
3024 | checktab CARG2, ->vmeta_tsetv
3025 | checkint TMP1, >9 // Integer key?
3026 | ldr CARG3, TAB:CARG2->array
3027 | ldr CARG1w, TAB:CARG2->asize
3028 | add CARG3, CARG3, TMP1, uxtw #3
3029 | cmp TMP1w, CARG1w // In array part?
3030 | bhs ->vmeta_tsetv
3031 | ldr TMP1, [CARG3]
3032 | ldr TMP0, [BASE, RA, lsl #3]
3033 | ldrb TMP2w, TAB:CARG2->marked
3034 | cmp TMP1, TISNIL // Previous value is nil?
3035 | beq >5
3036 |1:
3037 | str TMP0, [CARG3]
3038 | tbnz TMP2w, #2, >7 // isblack(table)
3039 |2:
3040 | ins_next
3041 |
3042 |5: // Check for __newindex if previous value is nil.
3043 | ldr TAB:CARG1, TAB:CARG2->metatable
3044 | cbz TAB:CARG1, <1 // No metatable: done.
3045 | ldrb TMP1w, TAB:CARG1->nomm
3046 | tbnz TMP1w, #MM_newindex, <1 // 'no __newindex' flag set: done.
3047 | b ->vmeta_tsetv
3048 |
3049 |7: // Possible table write barrier for the value. Skip valiswhite check.
3050 | barrierback TAB:CARG2, TMP2w, TMP1
3051 | b <2
3052 |
3053 |9:
3054 | asr ITYPE, TMP1, #47
3055 | cmn ITYPE, #-LJ_TSTR // String key?
3056 | bne ->vmeta_tsetv
3057 | and STR:RC, TMP1, #LJ_GCVMASK
3058 | b ->BC_TSETS_Z
3059 break;
3060 case BC_TSETS:
3061 | decode_RB RB, INS
3062 | and RC, RC, #255
3063 | // RA = dst, RB = table, RC = str_const (~)
3064 | ldr CARG2, [BASE, RB, lsl #3]
3065 | mvn RC, RC
3066 | ldr STR:RC, [KBASE, RC, lsl #3]
3067 | checktab CARG2, ->vmeta_tsets1
3068 |->BC_TSETS_Z:
3069 | // TAB:CARG2 = GCtab *, STR:RC = GCstr *, RA = src
3070 | ldr TMP1w, TAB:CARG2->hmask
3071 | ldr TMP2w, STR:RC->hash
3072 | ldr NODE:CARG3, TAB:CARG2->node
3073 | and TMP1w, TMP1w, TMP2w // idx = str->hash & tab->hmask
3074 | add TMP1, TMP1, TMP1, lsl #1
3075 | movn CARG4, #~LJ_TSTR
3076 | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8
3077 | add CARG4, STR:RC, CARG4, lsl #47 // Tagged key to look for.
3078 | strb wzr, TAB:CARG2->nomm // Clear metamethod cache.
3079 |1:
3080 | ldp TMP1, CARG1, NODE:CARG3->val
3081 | ldr NODE:TMP3, NODE:CARG3->next
3082 | ldrb TMP2w, TAB:CARG2->marked
3083 | cmp CARG1, CARG4
3084 | bne >5
3085 | ldr TMP0, [BASE, RA, lsl #3]
3086 | cmp TMP1, TISNIL // Previous value is nil?
3087 | beq >4
3088 |2:
3089 | str TMP0, NODE:CARG3->val
3090 | tbnz TMP2w, #2, >7 // isblack(table)
3091 |3:
3092 | ins_next
3093 |
3094 |4: // Check for __newindex if previous value is nil.
3095 | ldr TAB:CARG1, TAB:CARG2->metatable
3096 | cbz TAB:CARG1, <2 // No metatable: done.
3097 | ldrb TMP1w, TAB:CARG1->nomm
3098 | tbnz TMP1w, #MM_newindex, <2 // 'no __newindex' flag set: done.
3099 | b ->vmeta_tsets
3100 |
3101 |5: // Follow hash chain.
3102 | mov NODE:CARG3, NODE:TMP3
3103 | cbnz NODE:TMP3, <1
3104 | // End of hash chain: key not found, add a new one.
3105 |
3106 | // But check for __newindex first.
3107 | ldr TAB:CARG1, TAB:CARG2->metatable
3108 | cbz TAB:CARG1, >6 // No metatable: continue.
3109 | ldrb TMP1w, TAB:CARG1->nomm
3110 | // 'no __newindex' flag NOT set: check.
3111 | tbz TMP1w, #MM_newindex, ->vmeta_tsets
3112 |6:
3113 | movn TMP1, #~LJ_TSTR
3114 | str PC, SAVE_PC
3115 | add TMP0, STR:RC, TMP1, lsl #47
3116 | str BASE, L->base
3117 | mov CARG1, L
3118 | str TMP0, TMPD
3119 | add CARG3, sp, TMPDofs
3120 | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
3121 | // Returns TValue *.
3122 | ldr BASE, L->base
3123 | ldr TMP0, [BASE, RA, lsl #3]
3124 | str TMP0, [CRET1]
3125 | b <3 // No 2nd write barrier needed.
3126 |
3127 |7: // Possible table write barrier for the value. Skip valiswhite check.
3128 | barrierback TAB:CARG2, TMP2w, TMP1
3129 | b <3
3130 break;
3131 case BC_TSETB:
3132 | decode_RB RB, INS
3133 | and RC, RC, #255
3134 | // RA = src, RB = table, RC = index
3135 | ldr CARG2, [BASE, RB, lsl #3]
3136 | checktab CARG2, ->vmeta_tsetb
3137 | ldr CARG3, TAB:CARG2->array
3138 | ldr CARG1w, TAB:CARG2->asize
3139 | add CARG3, CARG3, RC, lsl #3
3140 | cmp RCw, CARG1w // In array part?
3141 | bhs ->vmeta_tsetb
3142 | ldr TMP1, [CARG3]
3143 | ldr TMP0, [BASE, RA, lsl #3]
3144 | ldrb TMP2w, TAB:CARG2->marked
3145 | cmp TMP1, TISNIL // Previous value is nil?
3146 | beq >5
3147 |1:
3148 | str TMP0, [CARG3]
3149 | tbnz TMP2w, #2, >7 // isblack(table)
3150 |2:
3151 | ins_next
3152 |
3153 |5: // Check for __newindex if previous value is nil.
3154 | ldr TAB:CARG1, TAB:CARG2->metatable
3155 | cbz TAB:CARG1, <1 // No metatable: done.
3156 | ldrb TMP1w, TAB:CARG1->nomm
3157 | tbnz TMP1w, #MM_newindex, <1 // 'no __newindex' flag set: done.
3158 | b ->vmeta_tsetb
3159 |
3160 |7: // Possible table write barrier for the value. Skip valiswhite check.
3161 | barrierback TAB:CARG2, TMP2w, TMP1
3162 | b <2
3163 break;
3164 case BC_TSETR:
3165 | decode_RB RB, INS
3166 | and RC, RC, #255
3167 | // RA = src, RB = table, RC = key
3168 | ldr CARG2, [BASE, RB, lsl #3]
3169 | ldr TMP1, [BASE, RC, lsl #3]
3170 | and TAB:CARG2, CARG2, #LJ_GCVMASK
3171 | ldr CARG1, TAB:CARG2->array
3172 | ldrb TMP2w, TAB:CARG2->marked
3173 | ldr CARG4w, TAB:CARG2->asize
3174 | add CARG1, CARG1, TMP1, uxtw #3
3175 | tbnz TMP2w, #2, >7 // isblack(table)
3176 |2:
3177 | cmp TMP1w, CARG4w // In array part?
3178 | bhs ->vmeta_tsetr
3179 |->BC_TSETR_Z:
3180 | ldr TMP0, [BASE, RA, lsl #3]
3181 | str TMP0, [CARG1]
3182 | ins_next
3183 |
3184 |7: // Possible table write barrier for the value. Skip valiswhite check.
3185 | barrierback TAB:CARG2, TMP2w, TMP0
3186 | b <2
3187 break;
3188
3189 case BC_TSETM:
3190 | // RA = base (table at base-1), RC = num_const (start index)
3191 | add RA, BASE, RA, lsl #3
3192 |1:
3193 | ldr RBw, SAVE_MULTRES
3194 | ldr TAB:CARG2, [RA, #-8] // Guaranteed to be a table.
3195 | ldr TMP1, [KBASE, RC, lsl #3] // Integer constant is in lo-word.
3196 | sub RB, RB, #8
3197 | cbz RB, >4 // Nothing to copy?
3198 | and TAB:CARG2, CARG2, #LJ_GCVMASK
3199 | ldr CARG1w, TAB:CARG2->asize
3200 | add CARG3w, TMP1w, RBw, lsr #3
3201 | ldr CARG4, TAB:CARG2->array
3202 | cmp CARG3, CARG1
3203 | add RB, RA, RB
3204 | bhi >5
3205 | add TMP1, CARG4, TMP1w, uxtw #3
3206 | ldrb TMP2w, TAB:CARG2->marked
3207 |3: // Copy result slots to table.
3208 | ldr TMP0, [RA], #8
3209 | str TMP0, [TMP1], #8
3210 | cmp RA, RB
3211 | blo <3
3212 | tbnz TMP2w, #2, >7 // isblack(table)
3213 |4:
3214 | ins_next
3215 |
3216 |5: // Need to resize array part.
3217 | str BASE, L->base
3218 | mov CARG1, L
3219 | str PC, SAVE_PC
3220 | bl extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
3221 | // Must not reallocate the stack.
3222 | b <1
3223 |
3224 |7: // Possible table write barrier for any value. Skip valiswhite check.
3225 | barrierback TAB:CARG2, TMP2w, TMP1
3226 | b <4
3227 break;
3228
3229 /* -- Calls and vararg handling ----------------------------------------- */
3230
3231 case BC_CALLM:
3232 | // RA = base, (RB = nresults+1,) RC = extra_nargs
3233 | ldr TMP0w, SAVE_MULTRES
3234 | decode_RC8RD NARGS8:RC, RC
3235 | add NARGS8:RC, NARGS8:RC, TMP0
3236 | b ->BC_CALL_Z
3237 break;
3238 case BC_CALL:
3239 | decode_RC8RD NARGS8:RC, RC
3240 | // RA = base, (RB = nresults+1,) RC = (nargs+1)*8
3241 |->BC_CALL_Z:
3242 | mov RB, BASE // Save old BASE for vmeta_call.
3243 | add BASE, BASE, RA, lsl #3
3244 | ldr CARG3, [BASE]
3245 | sub NARGS8:RC, NARGS8:RC, #8
3246 | add BASE, BASE, #16
3247 | checkfunc CARG3, ->vmeta_call
3248 | ins_call
3249 break;
3250
3251 case BC_CALLMT:
3252 | // RA = base, (RB = 0,) RC = extra_nargs
3253 | ldr TMP0w, SAVE_MULTRES
3254 | add NARGS8:RC, TMP0, RC, lsl #3
3255 | b ->BC_CALLT1_Z
3256 break;
3257 case BC_CALLT:
3258 | lsl NARGS8:RC, RC, #3
3259 | // RA = base, (RB = 0,) RC = (nargs+1)*8
3260 |->BC_CALLT1_Z:
3261 | add RA, BASE, RA, lsl #3
3262 | ldr TMP1, [RA]
3263 | sub NARGS8:RC, NARGS8:RC, #8
3264 | add RA, RA, #16
3265 | checktp CARG3, TMP1, LJ_TFUNC, ->vmeta_callt
3266 | ldr PC, [BASE, FRAME_PC]
3267 |->BC_CALLT2_Z:
3268 | mov RB, #0
3269 | ldrb TMP2w, LFUNC:CARG3->ffid
3270 | tst PC, #FRAME_TYPE
3271 | bne >7
3272 |1:
3273 | str TMP1, [BASE, FRAME_FUNC] // Copy function down, but keep PC.
3274 | cbz NARGS8:RC, >3
3275 |2:
3276 | ldr TMP0, [RA, RB]
3277 | add TMP1, RB, #8
3278 | cmp TMP1, NARGS8:RC
3279 | str TMP0, [BASE, RB]
3280 | mov RB, TMP1
3281 | bne <2
3282 |3:
3283 | cmp TMP2, #1 // (> FF_C) Calling a fast function?
3284 | bhi >5
3285 |4:
3286 | ins_callt
3287 |
3288 |5: // Tailcall to a fast function with a Lua frame below.
3289 | ldrb RAw, [PC, #-4+OFS_RA]
3290 | sub CARG1, BASE, RA, lsl #3
3291 | ldr LFUNC:CARG1, [CARG1, #-32]
3292 | and LFUNC:CARG1, CARG1, #LJ_GCVMASK
3293 | ldr CARG1, LFUNC:CARG1->pc
3294 | ldr KBASE, [CARG1, #PC2PROTO(k)]
3295 | b <4
3296 |
3297 |7: // Tailcall from a vararg function.
3298 | eor PC, PC, #FRAME_VARG
3299 | tst PC, #FRAME_TYPEP // Vararg frame below?
3300 | csel TMP2, RB, TMP2, ne // Clear ffid if no Lua function below.
3301 | bne <1
3302 | sub BASE, BASE, PC
3303 | ldr PC, [BASE, FRAME_PC]
3304 | tst PC, #FRAME_TYPE
3305 | csel TMP2, RB, TMP2, ne // Clear ffid if no Lua function below.
3306 | b <1
3307 break;
3308
3309 case BC_ITERC:
3310 | // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
3311 | add RA, BASE, RA, lsl #3
3312 | ldr CARG3, [RA, #-24]
3313 | mov RB, BASE // Save old BASE for vmeta_call.
3314 | ldp CARG1, CARG2, [RA, #-16]
3315 | add BASE, RA, #16
3316 | mov NARGS8:RC, #16 // Iterators get 2 arguments.
3317 | str CARG3, [RA] // Copy callable.
3318 | stp CARG1, CARG2, [RA, #16] // Copy state and control var.
3319 | checkfunc CARG3, ->vmeta_call
3320 | ins_call
3321 break;
3322
3323 case BC_ITERN:
3324 | // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
3325 |.if JIT
3326 | // NYI: add hotloop, record BC_ITERN.
3327 |.endif
3328 | add RA, BASE, RA, lsl #3
3329 | ldr TAB:RB, [RA, #-16]
3330 | ldrh TMP3w, [PC, # OFS_RD]
3331 | ldr CARG1w, [RA, #-8+LO] // Get index from control var.
3332 | add PC, PC, #4
3333 | add TMP3, PC, TMP3, lsl #2
3334 | and TAB:RB, RB, #LJ_GCVMASK
3335 | sub TMP3, TMP3, #0x20000
3336 | ldr TMP1w, TAB:RB->asize
3337 | ldr CARG2, TAB:RB->array
3338 |1: // Traverse array part.
3339 | subs RC, CARG1, TMP1
3340 | add CARG3, CARG2, CARG1, lsl #3
3341 | bhs >5 // Index points after array part?
3342 | ldr TMP0, [CARG3]
3343 | cmp TMP0, TISNIL
3344 | cinc CARG1, CARG1, eq // Skip holes in array part.
3345 | beq <1
3346 | add CARG1, CARG1, TISNUM
3347 | stp CARG1, TMP0, [RA]
3348 | add CARG1, CARG1, #1
3349 |3:
3350 | str CARG1w, [RA, #-8+LO] // Update control var.
3351 | mov PC, TMP3
3352 |4:
3353 | ins_next
3354 |
3355 |5: // Traverse hash part.
3356 | ldr TMP2w, TAB:RB->hmask
3357 | ldr NODE:RB, TAB:RB->node
3358 |6:
3359 | add CARG1, RC, RC, lsl #1
3360 | cmp RC, TMP2 // End of iteration? Branch to ITERN+1.
3361 | add NODE:CARG3, NODE:RB, CARG1, lsl #3 // node = tab->node + idx*3*8
3362 | bhi <4
3363 | ldp TMP0, CARG1, NODE:CARG3->val
3364 | cmp TMP0, TISNIL
3365 | add RC, RC, #1
3366 | beq <6 // Skip holes in hash part.
3367 | stp CARG1, TMP0, [RA]
3368 | add CARG1, RC, TMP1
3369 | b <3
3370 break;
3371
3372 case BC_ISNEXT:
3373 | // RA = base, RC = target (points to ITERN)
3374 | add RA, BASE, RA, lsl #3
3375 | ldr CFUNC:CARG1, [RA, #-24]
3376 | add RC, PC, RC, lsl #2
3377 | ldp TAB:CARG3, CARG4, [RA, #-16]
3378 | sub RC, RC, #0x20000
3379 | checkfunc CFUNC:CARG1, >5
3380 | asr TMP0, TAB:CARG3, #47
3381 | ldrb TMP1w, CFUNC:CARG1->ffid
3382 | cmn TMP0, #-LJ_TTAB
3383 | ccmp CARG4, TISNIL, #0, eq
3384 | ccmp TMP1w, #FF_next_N, #0, eq
3385 | bne >5
3386 | mov TMP0w, #0xfffe7fff
3387 | lsl TMP0, TMP0, #32
3388 | str TMP0, [RA, #-8] // Initialize control var.
3389 |1:
3390 | mov PC, RC
3391 | ins_next
3392 |
3393 |5: // Despecialize bytecode if any of the checks fail.
3394 | mov TMP0, #BC_JMP
3395 | mov TMP1, #BC_ITERC
3396 | strb TMP0w, [PC, #-4+OFS_OP]
3397 | strb TMP1w, [RC, # OFS_OP]
3398 | b <1
3399 break;
3400
3401 case BC_VARG:
3402 | decode_RB RB, INS
3403 | and RC, RC, #255
3404 | // RA = base, RB = (nresults+1), RC = numparams
3405 | ldr TMP1, [BASE, FRAME_PC]
3406 | add RC, BASE, RC, lsl #3
3407 | add RA, BASE, RA, lsl #3
3408 | add RC, RC, #FRAME_VARG
3409 | add TMP2, RA, RB, lsl #3
3410 | sub RC, RC, TMP1 // RC = vbase
3411 | // Note: RC may now be even _above_ BASE if nargs was < numparams.
3412 | sub TMP3, BASE, #16 // TMP3 = vtop
3413 | cbz RB, >5
3414 | sub TMP2, TMP2, #16
3415 |1: // Copy vararg slots to destination slots.
3416 | cmp RC, TMP3
3417 | ldr TMP0, [RC], #8
3418 | csel TMP0, TMP0, TISNIL, lo
3419 | cmp RA, TMP2
3420 | str TMP0, [RA], #8
3421 | blo <1
3422 |2:
3423 | ins_next
3424 |
3425 |5: // Copy all varargs.
3426 | ldr TMP0, L->maxstack
3427 | subs TMP2, TMP3, RC
3428 | csel RB, xzr, TMP2, le // MULTRES = (max(vtop-vbase,0)+1)*8
3429 | add RB, RB, #8
3430 | add TMP1, RA, TMP2
3431 | str RBw, SAVE_MULTRES
3432 | ble <2 // Nothing to copy.
3433 | cmp TMP1, TMP0
3434 | bhi >7
3435 |6:
3436 | ldr TMP0, [RC], #8
3437 | str TMP0, [RA], #8
3438 | cmp RC, TMP3
3439 | blo <6
3440 | b <2
3441 |
3442 |7: // Grow stack for varargs.
3443 | lsr CARG2, TMP2, #3
3444 | stp BASE, RA, L->base
3445 | mov CARG1, L
3446 | sub RC, RC, BASE // Need delta, because BASE may change.
3447 | str PC, SAVE_PC
3448 | bl extern lj_state_growstack // (lua_State *L, int n)
3449 | ldp BASE, RA, L->base
3450 | add RC, BASE, RC
3451 | sub TMP3, BASE, #16
3452 | b <6
3453 break;
3454
3455 /* -- Returns ----------------------------------------------------------- */
3456
3457 case BC_RETM:
3458 | // RA = results, RC = extra results
3459 | ldr TMP0w, SAVE_MULTRES
3460 | ldr PC, [BASE, FRAME_PC]
3461 | add RA, BASE, RA, lsl #3
3462 | add RC, TMP0, RC, lsl #3
3463 | b ->BC_RETM_Z
3464 break;
3465
3466 case BC_RET:
3467 | // RA = results, RC = nresults+1
3468 | ldr PC, [BASE, FRAME_PC]
3469 | lsl RC, RC, #3
3470 | add RA, BASE, RA, lsl #3
3471 |->BC_RETM_Z:
3472 | str RCw, SAVE_MULTRES
3473 |1:
3474 | ands CARG1, PC, #FRAME_TYPE
3475 | eor CARG2, PC, #FRAME_VARG
3476 | bne ->BC_RETV2_Z
3477 |
3478 |->BC_RET_Z:
3479 | // BASE = base, RA = resultptr, RC = (nresults+1)*8, PC = return
3480 | ldr INSw, [PC, #-4]
3481 | subs TMP1, RC, #8
3482 | sub CARG3, BASE, #16
3483 | beq >3
3484 |2:
3485 | ldr TMP0, [RA], #8
3486 | add BASE, BASE, #8
3487 | sub TMP1, TMP1, #8
3488 | str TMP0, [BASE, #-24]
3489 | cbnz TMP1, <2
3490 |3:
3491 | decode_RA RA, INS
3492 | sub CARG4, CARG3, RA, lsl #3
3493 | decode_RB RB, INS
3494 | ldr LFUNC:CARG1, [CARG4, FRAME_FUNC]
3495 |5:
3496 | cmp RC, RB, lsl #3 // More results expected?
3497 | blo >6
3498 | and LFUNC:CARG1, CARG1, #LJ_GCVMASK
3499 | mov BASE, CARG4
3500 | ldr CARG2, LFUNC:CARG1->pc
3501 | ldr KBASE, [CARG2, #PC2PROTO(k)]
3502 | ins_next
3503 |
3504 |6: // Fill up results with nil.
3505 | add BASE, BASE, #8
3506 | add RC, RC, #8
3507 | str TISNIL, [BASE, #-24]
3508 | b <5
3509 |
3510 |->BC_RETV1_Z: // Non-standard return case.
3511 | add RA, BASE, RA, lsl #3
3512 |->BC_RETV2_Z:
3513 | tst CARG2, #FRAME_TYPEP
3514 | bne ->vm_return
3515 | // Return from vararg function: relocate BASE down.
3516 | sub BASE, BASE, CARG2
3517 | ldr PC, [BASE, FRAME_PC]
3518 | b <1
3519 break;
3520
3521 case BC_RET0: case BC_RET1:
3522 | // RA = results, RC = nresults+1
3523 | ldr PC, [BASE, FRAME_PC]
3524 | lsl RC, RC, #3
3525 | str RCw, SAVE_MULTRES
3526 | ands CARG1, PC, #FRAME_TYPE
3527 | eor CARG2, PC, #FRAME_VARG
3528 | bne ->BC_RETV1_Z
3529 | ldr INSw, [PC, #-4]
3530 if (op == BC_RET1) {
3531 | ldr TMP0, [BASE, RA, lsl #3]
3532 }
3533 | sub CARG4, BASE, #16
3534 | decode_RA RA, INS
3535 | sub BASE, CARG4, RA, lsl #3
3536 if (op == BC_RET1) {
3537 | str TMP0, [CARG4], #8
3538 }
3539 | decode_RB RB, INS
3540 | ldr LFUNC:CARG1, [BASE, FRAME_FUNC]
3541 |5:
3542 | cmp RC, RB, lsl #3
3543 | blo >6
3544 | and LFUNC:CARG1, CARG1, #LJ_GCVMASK
3545 | ldr CARG2, LFUNC:CARG1->pc
3546 | ldr KBASE, [CARG2, #PC2PROTO(k)]
3547 | ins_next
3548 |
3549 |6: // Fill up results with nil.
3550 | add RC, RC, #8
3551 | str TISNIL, [CARG4], #8
3552 | b <5
3553 break;
3554
3555 /* -- Loops and branches ------------------------------------------------ */
3556
3557 |.define FOR_IDX, [RA]; .define FOR_TIDX, [RA, #4]
3558 |.define FOR_STOP, [RA, #8]; .define FOR_TSTOP, [RA, #12]
3559 |.define FOR_STEP, [RA, #16]; .define FOR_TSTEP, [RA, #20]
3560 |.define FOR_EXT, [RA, #24]; .define FOR_TEXT, [RA, #28]
3561
3562 case BC_FORL:
3563 |.if JIT
3564 | hotloop
3565 |.endif
3566 | // Fall through. Assumes BC_IFORL follows.
3567 break;
3568
3569 case BC_JFORI:
3570 case BC_JFORL:
3571#if !LJ_HASJIT
3572 break;
3573#endif
3574 case BC_FORI:
3575 case BC_IFORL:
3576 | // RA = base, RC = target (after end of loop or start of loop)
3577 vk = (op == BC_IFORL || op == BC_JFORL);
3578 | add RA, BASE, RA, lsl #3
3579 | ldp CARG1, CARG2, FOR_IDX // CARG1 = IDX, CARG2 = STOP
3580 | ldr CARG3, FOR_STEP // CARG3 = STEP
3581 if (op != BC_JFORL) {
3582 | add RC, PC, RC, lsl #2
3583 | sub RC, RC, #0x20000
3584 }
3585 | checkint CARG1, >5
3586 if (!vk) {
3587 | checkint CARG2, ->vmeta_for
3588 | checkint CARG3, ->vmeta_for
3589 | tbnz CARG3w, #31, >4
3590 | cmp CARG1w, CARG2w
3591 } else {
3592 | adds CARG1w, CARG1w, CARG3w
3593 | bvs >2
3594 | add TMP0, CARG1, TISNUM
3595 | tbnz CARG3w, #31, >4
3596 | cmp CARG1w, CARG2w
3597 }
3598 |1:
3599 if (op == BC_FORI) {
3600 | csel PC, RC, PC, gt
3601 } else if (op == BC_JFORI) {
3602 | mov PC, RC
3603 | ldrh RCw, [RC, #-4+OFS_RD]
3604 } else if (op == BC_IFORL) {
3605 | csel PC, RC, PC, le
3606 }
3607 if (vk) {
3608 | str TMP0, FOR_IDX
3609 | str TMP0, FOR_EXT
3610 } else {
3611 | str CARG1, FOR_EXT
3612 }
3613 if (op == BC_JFORI || op == BC_JFORL) {
3614 | ble =>BC_JLOOP
3615 }
3616 |2:
3617 | ins_next
3618 |
3619 |4: // Invert check for negative step.
3620 | cmp CARG2w, CARG1w
3621 | b <1
3622 |
3623 |5: // FP loop.
3624 | ldp d0, d1, FOR_IDX
3625 | blo ->vmeta_for
3626 if (!vk) {
3627 | checknum CARG2, ->vmeta_for
3628 | checknum CARG3, ->vmeta_for
3629 | str d0, FOR_EXT
3630 } else {
3631 | ldr d2, FOR_STEP
3632 | fadd d0, d0, d2
3633 }
3634 | tbnz CARG3, #63, >7
3635 | fcmp d0, d1
3636 |6:
3637 if (vk) {
3638 | str d0, FOR_IDX
3639 | str d0, FOR_EXT
3640 }
3641 if (op == BC_FORI) {
3642 | csel PC, RC, PC, hi
3643 } else if (op == BC_JFORI) {
3644 | ldrh RCw, [RC, #-4+OFS_RD]
3645 | bls =>BC_JLOOP
3646 } else if (op == BC_IFORL) {
3647 | csel PC, RC, PC, ls
3648 } else {
3649 | bls =>BC_JLOOP
3650 }
3651 | b <2
3652 |
3653 |7: // Invert check for negative step.
3654 | fcmp d1, d0
3655 | b <6
3656 break;
3657
3658 case BC_ITERL:
3659 |.if JIT
3660 | hotloop
3661 |.endif
3662 | // Fall through. Assumes BC_IITERL follows.
3663 break;
3664
3665 case BC_JITERL:
3666#if !LJ_HASJIT
3667 break;
3668#endif
3669 case BC_IITERL:
3670 | // RA = base, RC = target
3671 | ldr CARG1, [BASE, RA, lsl #3]
3672 | add TMP1, BASE, RA, lsl #3
3673 | cmp CARG1, TISNIL
3674 | beq >1 // Stop if iterator returned nil.
3675 if (op == BC_JITERL) {
3676 | str CARG1, [TMP1, #-8]
3677 | b =>BC_JLOOP
3678 } else {
3679 | add TMP0, PC, RC, lsl #2 // Otherwise save control var + branch.
3680 | sub PC, TMP0, #0x20000
3681 | str CARG1, [TMP1, #-8]
3682 }
3683 |1:
3684 | ins_next
3685 break;
3686
3687 case BC_LOOP:
3688 | // RA = base, RC = target (loop extent)
3689 | // Note: RA/RC is only used by trace recorder to determine scope/extent
3690 | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
3691 |.if JIT
3692 | hotloop
3693 |.endif
3694 | // Fall through. Assumes BC_ILOOP follows.
3695 break;
3696
3697 case BC_ILOOP:
3698 | // RA = base, RC = target (loop extent)
3699 | ins_next
3700 break;
3701
3702 case BC_JLOOP:
3703 |.if JIT
3704 | // RA = base (ignored), RC = traceno
3705 | ldr CARG1, [GL, #GL_J(trace)]
3706 | mov CARG2w, #0 // Traces on ARM64 don't store the trace #, so use 0.
3707 | ldr TRACE:RC, [CARG1, RC, lsl #3]
3708 | st_vmstate CARG2w
3709 | ldr RA, TRACE:RC->mcode
3710 | str BASE, GL->jit_base
3711 | str L, GL->tmpbuf.L
3712 | sub sp, sp, #16 // See SPS_FIXED. Avoids sp adjust in every root trace.
3713 | br RA
3714 |.endif
3715 break;
3716
3717 case BC_JMP:
3718 | // RA = base (only used by trace recorder), RC = target
3719 | add RC, PC, RC, lsl #2
3720 | sub PC, RC, #0x20000
3721 | ins_next
3722 break;
3723
3724 /* -- Function headers -------------------------------------------------- */
3725
3726 case BC_FUNCF:
3727 |.if JIT
3728 | hotcall
3729 |.endif
3730 case BC_FUNCV: /* NYI: compiled vararg functions. */
3731 | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow.
3732 break;
3733
3734 case BC_JFUNCF:
3735#if !LJ_HASJIT
3736 break;
3737#endif
3738 case BC_IFUNCF:
3739 | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8
3740 | ldr CARG1, L->maxstack
3741 | ldrb TMP1w, [PC, #-4+PC2PROTO(numparams)]
3742 | ldr KBASE, [PC, #-4+PC2PROTO(k)]
3743 | cmp RA, CARG1
3744 | bhi ->vm_growstack_l
3745 |2:
3746 | cmp NARGS8:RC, TMP1, lsl #3 // Check for missing parameters.
3747 | blo >3
3748 if (op == BC_JFUNCF) {
3749 | decode_RD RC, INS
3750 | b =>BC_JLOOP
3751 } else {
3752 | ins_next
3753 }
3754 |
3755 |3: // Clear missing parameters.
3756 | str TISNIL, [BASE, NARGS8:RC]
3757 | add NARGS8:RC, NARGS8:RC, #8
3758 | b <2
3759 break;
3760
3761 case BC_JFUNCV:
3762#if !LJ_HASJIT
3763 break;
3764#endif
3765 | NYI // NYI: compiled vararg functions
3766 break; /* NYI: compiled vararg functions. */
3767
3768 case BC_IFUNCV:
3769 | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8
3770 | ldr CARG1, L->maxstack
3771 | movn TMP0, #~LJ_TFUNC
3772 | add TMP2, BASE, RC
3773 | add LFUNC:CARG3, CARG3, TMP0, lsl #47
3774 | add RA, RA, RC
3775 | add TMP0, RC, #16+FRAME_VARG
3776 | str LFUNC:CARG3, [TMP2], #8 // Store (tagged) copy of LFUNC.
3777 | ldr KBASE, [PC, #-4+PC2PROTO(k)]
3778 | cmp RA, CARG1
3779 | str TMP0, [TMP2], #8 // Store delta + FRAME_VARG.
3780 | bhs ->vm_growstack_l
3781 | sub RC, TMP2, #16
3782 | ldrb TMP1w, [PC, #-4+PC2PROTO(numparams)]
3783 | mov RA, BASE
3784 | mov BASE, TMP2
3785 | cbz TMP1, >2
3786 |1:
3787 | cmp RA, RC // Less args than parameters?
3788 | bhs >3
3789 | ldr TMP0, [RA]
3790 | sub TMP1, TMP1, #1
3791 | str TISNIL, [RA], #8 // Clear old fixarg slot (help the GC).
3792 | str TMP0, [TMP2], #8
3793 | cbnz TMP1, <1
3794 |2:
3795 | ins_next
3796 |
3797 |3:
3798 | sub TMP1, TMP1, #1
3799 | str TISNIL, [TMP2], #8
3800 | cbz TMP1, <2
3801 | b <3
3802 break;
3803
3804 case BC_FUNCC:
3805 case BC_FUNCCW:
3806 | // BASE = new base, RA = BASE+framesize*8, CARG3 = CFUNC, RC = nargs*8
3807 if (op == BC_FUNCC) {
3808 | ldr CARG4, CFUNC:CARG3->f
3809 } else {
3810 | ldr CARG4, GL->wrapf
3811 }
3812 | add CARG2, RA, NARGS8:RC
3813 | ldr CARG1, L->maxstack
3814 | add RC, BASE, NARGS8:RC
3815 | cmp CARG2, CARG1
3816 | stp BASE, RC, L->base
3817 if (op == BC_FUNCCW) {
3818 | ldr CARG2, CFUNC:CARG3->f
3819 }
3820 | mv_vmstate TMP0w, C
3821 | mov CARG1, L
3822 | bhi ->vm_growstack_c // Need to grow stack.
3823 | st_vmstate TMP0w
3824 | blr CARG4 // (lua_State *L [, lua_CFunction f])
3825 | // Returns nresults.
3826 | ldp BASE, TMP1, L->base
3827 | str L, GL->cur_L
3828 | sbfiz RC, CRET1, #3, #32
3829 | st_vmstate ST_INTERP
3830 | ldr PC, [BASE, FRAME_PC]
3831 | sub RA, TMP1, RC // RA = L->top - nresults*8
3832 | b ->vm_returnc
3833 break;
3834
3835 /* ---------------------------------------------------------------------- */
3836
3837 default:
3838 fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
3839 exit(2);
3840 break;
3841 }
3842}
3843
3844static int build_backend(BuildCtx *ctx)
3845{
3846 int op;
3847
3848 dasm_growpc(Dst, BC__MAX);
3849
3850 build_subroutines(ctx);
3851
3852 |.code_op
3853 for (op = 0; op < BC__MAX; op++)
3854 build_ins(ctx, (BCOp)op, op);
3855
3856 return BC__MAX;
3857}
3858
3859/* Emit pseudo frame-info for all assembler functions. */
3860static void emit_asm_debug(BuildCtx *ctx)
3861{
3862 int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
3863 int i, cf = CFRAME_SIZE >> 3;
3864 switch (ctx->mode) {
3865 case BUILD_elfasm:
3866 fprintf(ctx->fp, "\t.section .debug_frame,\"\",%%progbits\n");
3867 fprintf(ctx->fp,
3868 ".Lframe0:\n"
3869 "\t.long .LECIE0-.LSCIE0\n"
3870 ".LSCIE0:\n"
3871 "\t.long 0xffffffff\n"
3872 "\t.byte 0x1\n"
3873 "\t.string \"\"\n"
3874 "\t.uleb128 0x1\n"
3875 "\t.sleb128 -8\n"
3876 "\t.byte 30\n" /* Return address is in lr. */
3877 "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */
3878 "\t.align 3\n"
3879 ".LECIE0:\n\n");
3880 fprintf(ctx->fp,
3881 ".LSFDE0:\n"
3882 "\t.long .LEFDE0-.LASFDE0\n"
3883 ".LASFDE0:\n"
3884 "\t.long .Lframe0\n"
3885 "\t.quad .Lbegin\n"
3886 "\t.quad %d\n"
3887 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
3888 "\t.byte 0x9d\n\t.uleb128 %d\n" /* offset fp */
3889 "\t.byte 0x9e\n\t.uleb128 %d\n", /* offset lr */
3890 fcofs, CFRAME_SIZE, cf, cf-1);
3891 for (i = 19; i <= 28; i++) /* offset x19-x28 */
3892 fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, cf-i+17);
3893 for (i = 8; i <= 15; i++) /* offset d8-d15 */
3894 fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n",
3895 64+i, cf-i-4);
3896 fprintf(ctx->fp,
3897 "\t.align 3\n"
3898 ".LEFDE0:\n\n");
3899#if LJ_HASFFI
3900 fprintf(ctx->fp,
3901 ".LSFDE1:\n"
3902 "\t.long .LEFDE1-.LASFDE1\n"
3903 ".LASFDE1:\n"
3904 "\t.long .Lframe0\n"
3905 "\t.quad lj_vm_ffi_call\n"
3906 "\t.quad %d\n"
3907 "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */
3908 "\t.byte 0x9d\n\t.uleb128 4\n" /* offset fp */
3909 "\t.byte 0x9e\n\t.uleb128 3\n" /* offset lr */
3910 "\t.byte 0x93\n\t.uleb128 2\n" /* offset x19 */
3911 "\t.align 3\n"
3912 ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
3913#endif
3914 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",%%progbits\n");
3915 fprintf(ctx->fp,
3916 ".Lframe1:\n"
3917 "\t.long .LECIE1-.LSCIE1\n"
3918 ".LSCIE1:\n"
3919 "\t.long 0\n"
3920 "\t.byte 0x1\n"
3921 "\t.string \"zPR\"\n"
3922 "\t.uleb128 0x1\n"
3923 "\t.sleb128 -8\n"
3924 "\t.byte 30\n" /* Return address is in lr. */
3925 "\t.uleb128 6\n" /* augmentation length */
3926 "\t.byte 0x1b\n" /* pcrel|sdata4 */
3927 "\t.long lj_err_unwind_dwarf-.\n"
3928 "\t.byte 0x1b\n" /* pcrel|sdata4 */
3929 "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */
3930 "\t.align 3\n"
3931 ".LECIE1:\n\n");
3932 fprintf(ctx->fp,
3933 ".LSFDE2:\n"
3934 "\t.long .LEFDE2-.LASFDE2\n"
3935 ".LASFDE2:\n"
3936 "\t.long .LASFDE2-.Lframe1\n"
3937 "\t.long .Lbegin-.\n"
3938 "\t.long %d\n"
3939 "\t.uleb128 0\n" /* augmentation length */
3940 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
3941 "\t.byte 0x9d\n\t.uleb128 %d\n" /* offset fp */
3942 "\t.byte 0x9e\n\t.uleb128 %d\n", /* offset lr */
3943 fcofs, CFRAME_SIZE, cf, cf-1);
3944 for (i = 19; i <= 28; i++) /* offset x19-x28 */
3945 fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, cf-i+17);
3946 for (i = 8; i <= 15; i++) /* offset d8-d15 */
3947 fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n",
3948 64+i, cf-i-4);
3949 fprintf(ctx->fp,
3950 "\t.align 3\n"
3951 ".LEFDE2:\n\n");
3952#if LJ_HASFFI
3953 fprintf(ctx->fp,
3954 ".Lframe2:\n"
3955 "\t.long .LECIE2-.LSCIE2\n"
3956 ".LSCIE2:\n"
3957 "\t.long 0\n"
3958 "\t.byte 0x1\n"
3959 "\t.string \"zR\"\n"
3960 "\t.uleb128 0x1\n"
3961 "\t.sleb128 -8\n"
3962 "\t.byte 30\n" /* Return address is in lr. */
3963 "\t.uleb128 1\n" /* augmentation length */
3964 "\t.byte 0x1b\n" /* pcrel|sdata4 */
3965 "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */
3966 "\t.align 3\n"
3967 ".LECIE2:\n\n");
3968 fprintf(ctx->fp,
3969 ".LSFDE3:\n"
3970 "\t.long .LEFDE3-.LASFDE3\n"
3971 ".LASFDE3:\n"
3972 "\t.long .LASFDE3-.Lframe2\n"
3973 "\t.long lj_vm_ffi_call-.\n"
3974 "\t.long %d\n"
3975 "\t.uleb128 0\n" /* augmentation length */
3976 "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */
3977 "\t.byte 0x9d\n\t.uleb128 4\n" /* offset fp */
3978 "\t.byte 0x9e\n\t.uleb128 3\n" /* offset lr */
3979 "\t.byte 0x93\n\t.uleb128 2\n" /* offset x19 */
3980 "\t.align 3\n"
3981 ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
3982#endif
3983 break;
3984 default:
3985 break;
3986 }
3987}
3988
diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc
index e6b53e0d..37506139 100644
--- a/src/vm_mips.dasc
+++ b/src/vm_mips.dasc
@@ -1,6 +1,9 @@
1|// Low-level VM code for MIPS CPUs. 1|// Low-level VM code for MIPS CPUs.
2|// Bytecode interpreter, fast functions and helper functions. 2|// Bytecode interpreter, fast functions and helper functions.
3|// Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h 3|// Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h
4|//
5|// MIPS soft-float support contributed by Djordje Kovacevic and
6|// Stefan Pejic from RT-RK.com, sponsored by Cisco Systems, Inc.
4| 7|
5|.arch mips 8|.arch mips
6|.section code_op, code_sub 9|.section code_op, code_sub
@@ -18,6 +21,12 @@
18|// Fixed register assignments for the interpreter. 21|// Fixed register assignments for the interpreter.
19|// Don't use: r0 = 0, r26/r27 = reserved, r28 = gp, r29 = sp, r31 = ra 22|// Don't use: r0 = 0, r26/r27 = reserved, r28 = gp, r29 = sp, r31 = ra
20| 23|
24|.macro .FPU, a, b
25|.if FPU
26| a, b
27|.endif
28|.endmacro
29|
21|// The following must be C callee-save (but BASE is often refetched). 30|// The following must be C callee-save (but BASE is often refetched).
22|.define BASE, r16 // Base of current Lua stack frame. 31|.define BASE, r16 // Base of current Lua stack frame.
23|.define KBASE, r17 // Constants of current Lua function. 32|.define KBASE, r17 // Constants of current Lua function.
@@ -25,13 +34,15 @@
25|.define DISPATCH, r19 // Opcode dispatch table. 34|.define DISPATCH, r19 // Opcode dispatch table.
26|.define LREG, r20 // Register holding lua_State (also in SAVE_L). 35|.define LREG, r20 // Register holding lua_State (also in SAVE_L).
27|.define MULTRES, r21 // Size of multi-result: (nresults+1)*8. 36|.define MULTRES, r21 // Size of multi-result: (nresults+1)*8.
28|// NYI: r22 currently unused.
29| 37|
30|.define JGL, r30 // On-trace: global_State + 32768. 38|.define JGL, r30 // On-trace: global_State + 32768.
31| 39|
32|// Constants for type-comparisons, stores and conversions. C callee-save. 40|// Constants for type-comparisons, stores and conversions. C callee-save.
41|.define TISNUM, r22
33|.define TISNIL, r30 42|.define TISNIL, r30
43|.if FPU
34|.define TOBIT, f30 // 2^52 + 2^51. 44|.define TOBIT, f30 // 2^52 + 2^51.
45|.endif
35| 46|
36|// The following temporaries are not saved across C calls, except for RA. 47|// The following temporaries are not saved across C calls, except for RA.
37|.define RA, r23 // Callee-save. 48|.define RA, r23 // Callee-save.
@@ -46,7 +57,7 @@
46|.define TMP2, r14 57|.define TMP2, r14
47|.define TMP3, r15 58|.define TMP3, r15
48| 59|
49|// Calling conventions. 60|// MIPS o32 calling convention.
50|.define CFUNCADDR, r25 61|.define CFUNCADDR, r25
51|.define CARG1, r4 62|.define CARG1, r4
52|.define CARG2, r5 63|.define CARG2, r5
@@ -56,13 +67,33 @@
56|.define CRET1, r2 67|.define CRET1, r2
57|.define CRET2, r3 68|.define CRET2, r3
58| 69|
70|.if ENDIAN_LE
71|.define SFRETLO, CRET1
72|.define SFRETHI, CRET2
73|.define SFARG1LO, CARG1
74|.define SFARG1HI, CARG2
75|.define SFARG2LO, CARG3
76|.define SFARG2HI, CARG4
77|.else
78|.define SFRETLO, CRET2
79|.define SFRETHI, CRET1
80|.define SFARG1LO, CARG2
81|.define SFARG1HI, CARG1
82|.define SFARG2LO, CARG4
83|.define SFARG2HI, CARG3
84|.endif
85|
86|.if FPU
59|.define FARG1, f12 87|.define FARG1, f12
60|.define FARG2, f14 88|.define FARG2, f14
61| 89|
62|.define FRET1, f0 90|.define FRET1, f0
63|.define FRET2, f2 91|.define FRET2, f2
92|.endif
64| 93|
65|// Stack layout while in interpreter. Must match with lj_frame.h. 94|// Stack layout while in interpreter. Must match with lj_frame.h.
95|.if FPU // MIPS32 hard-float.
96|
66|.define CFRAME_SPACE, 112 // Delta for sp. 97|.define CFRAME_SPACE, 112 // Delta for sp.
67| 98|
68|.define SAVE_ERRF, 124(sp) // 32 bit C frame info. 99|.define SAVE_ERRF, 124(sp) // 32 bit C frame info.
@@ -72,6 +103,20 @@
72|//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter. 103|//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter.
73|.define SAVE_GPR_, 72 // .. 72+10*4: 32 bit GPR saves. 104|.define SAVE_GPR_, 72 // .. 72+10*4: 32 bit GPR saves.
74|.define SAVE_FPR_, 24 // .. 24+6*8: 64 bit FPR saves. 105|.define SAVE_FPR_, 24 // .. 24+6*8: 64 bit FPR saves.
106|
107|.else // MIPS32 soft-float
108|
109|.define CFRAME_SPACE, 64 // Delta for sp.
110|
111|.define SAVE_ERRF, 76(sp) // 32 bit C frame info.
112|.define SAVE_NRES, 72(sp)
113|.define SAVE_CFRAME, 68(sp)
114|.define SAVE_L, 64(sp)
115|//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter.
116|.define SAVE_GPR_, 24 // .. 24+10*4: 32 bit GPR saves.
117|
118|.endif
119|
75|.define SAVE_PC, 20(sp) 120|.define SAVE_PC, 20(sp)
76|.define ARG5, 16(sp) 121|.define ARG5, 16(sp)
77|.define CSAVE_4, 12(sp) 122|.define CSAVE_4, 12(sp)
@@ -83,43 +128,45 @@
83|.define ARG5_OFS, 16 128|.define ARG5_OFS, 16
84|.define SAVE_MULTRES, ARG5 129|.define SAVE_MULTRES, ARG5
85| 130|
131|//-----------------------------------------------------------------------
132|
86|.macro saveregs 133|.macro saveregs
87| addiu sp, sp, -CFRAME_SPACE 134| addiu sp, sp, -CFRAME_SPACE
88| sw ra, SAVE_GPR_+9*4(sp) 135| sw ra, SAVE_GPR_+9*4(sp)
89| sw r30, SAVE_GPR_+8*4(sp) 136| sw r30, SAVE_GPR_+8*4(sp)
90| sdc1 f30, SAVE_FPR_+5*8(sp) 137| .FPU sdc1 f30, SAVE_FPR_+5*8(sp)
91| sw r23, SAVE_GPR_+7*4(sp) 138| sw r23, SAVE_GPR_+7*4(sp)
92| sw r22, SAVE_GPR_+6*4(sp) 139| sw r22, SAVE_GPR_+6*4(sp)
93| sdc1 f28, SAVE_FPR_+4*8(sp) 140| .FPU sdc1 f28, SAVE_FPR_+4*8(sp)
94| sw r21, SAVE_GPR_+5*4(sp) 141| sw r21, SAVE_GPR_+5*4(sp)
95| sw r20, SAVE_GPR_+4*4(sp) 142| sw r20, SAVE_GPR_+4*4(sp)
96| sdc1 f26, SAVE_FPR_+3*8(sp) 143| .FPU sdc1 f26, SAVE_FPR_+3*8(sp)
97| sw r19, SAVE_GPR_+3*4(sp) 144| sw r19, SAVE_GPR_+3*4(sp)
98| sw r18, SAVE_GPR_+2*4(sp) 145| sw r18, SAVE_GPR_+2*4(sp)
99| sdc1 f24, SAVE_FPR_+2*8(sp) 146| .FPU sdc1 f24, SAVE_FPR_+2*8(sp)
100| sw r17, SAVE_GPR_+1*4(sp) 147| sw r17, SAVE_GPR_+1*4(sp)
101| sw r16, SAVE_GPR_+0*4(sp) 148| sw r16, SAVE_GPR_+0*4(sp)
102| sdc1 f22, SAVE_FPR_+1*8(sp) 149| .FPU sdc1 f22, SAVE_FPR_+1*8(sp)
103| sdc1 f20, SAVE_FPR_+0*8(sp) 150| .FPU sdc1 f20, SAVE_FPR_+0*8(sp)
104|.endmacro 151|.endmacro
105| 152|
106|.macro restoreregs_ret 153|.macro restoreregs_ret
107| lw ra, SAVE_GPR_+9*4(sp) 154| lw ra, SAVE_GPR_+9*4(sp)
108| lw r30, SAVE_GPR_+8*4(sp) 155| lw r30, SAVE_GPR_+8*4(sp)
109| ldc1 f30, SAVE_FPR_+5*8(sp) 156| .FPU ldc1 f30, SAVE_FPR_+5*8(sp)
110| lw r23, SAVE_GPR_+7*4(sp) 157| lw r23, SAVE_GPR_+7*4(sp)
111| lw r22, SAVE_GPR_+6*4(sp) 158| lw r22, SAVE_GPR_+6*4(sp)
112| ldc1 f28, SAVE_FPR_+4*8(sp) 159| .FPU ldc1 f28, SAVE_FPR_+4*8(sp)
113| lw r21, SAVE_GPR_+5*4(sp) 160| lw r21, SAVE_GPR_+5*4(sp)
114| lw r20, SAVE_GPR_+4*4(sp) 161| lw r20, SAVE_GPR_+4*4(sp)
115| ldc1 f26, SAVE_FPR_+3*8(sp) 162| .FPU ldc1 f26, SAVE_FPR_+3*8(sp)
116| lw r19, SAVE_GPR_+3*4(sp) 163| lw r19, SAVE_GPR_+3*4(sp)
117| lw r18, SAVE_GPR_+2*4(sp) 164| lw r18, SAVE_GPR_+2*4(sp)
118| ldc1 f24, SAVE_FPR_+2*8(sp) 165| .FPU ldc1 f24, SAVE_FPR_+2*8(sp)
119| lw r17, SAVE_GPR_+1*4(sp) 166| lw r17, SAVE_GPR_+1*4(sp)
120| lw r16, SAVE_GPR_+0*4(sp) 167| lw r16, SAVE_GPR_+0*4(sp)
121| ldc1 f22, SAVE_FPR_+1*8(sp) 168| .FPU ldc1 f22, SAVE_FPR_+1*8(sp)
122| ldc1 f20, SAVE_FPR_+0*8(sp) 169| .FPU ldc1 f20, SAVE_FPR_+0*8(sp)
123| jr ra 170| jr ra
124| addiu sp, sp, CFRAME_SPACE 171| addiu sp, sp, CFRAME_SPACE
125|.endmacro 172|.endmacro
@@ -138,6 +185,7 @@
138|.type NODE, Node 185|.type NODE, Node
139|.type NARGS8, int 186|.type NARGS8, int
140|.type TRACE, GCtrace 187|.type TRACE, GCtrace
188|.type SBUF, SBuf
141| 189|
142|//----------------------------------------------------------------------- 190|//-----------------------------------------------------------------------
143| 191|
@@ -152,13 +200,23 @@
152|//----------------------------------------------------------------------- 200|//-----------------------------------------------------------------------
153| 201|
154|// Endian-specific defines. 202|// Endian-specific defines.
155|.define FRAME_PC, LJ_ENDIAN_SELECT(-4,-8) 203|.if ENDIAN_LE
156|.define FRAME_FUNC, LJ_ENDIAN_SELECT(-8,-4) 204|.define FRAME_PC, -4
157|.define HI, LJ_ENDIAN_SELECT(4,0) 205|.define FRAME_FUNC, -8
158|.define LO, LJ_ENDIAN_SELECT(0,4) 206|.define HI, 4
159|.define OFS_RD, LJ_ENDIAN_SELECT(2,0) 207|.define LO, 0
160|.define OFS_RA, LJ_ENDIAN_SELECT(1,2) 208|.define OFS_RD, 2
161|.define OFS_OP, LJ_ENDIAN_SELECT(0,3) 209|.define OFS_RA, 1
210|.define OFS_OP, 0
211|.else
212|.define FRAME_PC, -8
213|.define FRAME_FUNC, -4
214|.define HI, 0
215|.define LO, 4
216|.define OFS_RD, 0
217|.define OFS_RA, 2
218|.define OFS_OP, 3
219|.endif
162| 220|
163|// Instruction decode. 221|// Instruction decode.
164|.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro 222|.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro
@@ -353,9 +411,11 @@ static void build_subroutines(BuildCtx *ctx)
353 |. sll TMP2, TMP2, 3 411 |. sll TMP2, TMP2, 3
354 |1: 412 |1:
355 | addiu TMP1, TMP1, -8 413 | addiu TMP1, TMP1, -8
356 | ldc1 f0, 0(RA) 414 | lw SFRETHI, HI(RA)
415 | lw SFRETLO, LO(RA)
357 | addiu RA, RA, 8 416 | addiu RA, RA, 8
358 | sdc1 f0, 0(BASE) 417 | sw SFRETHI, HI(BASE)
418 | sw SFRETLO, LO(BASE)
359 | bnez TMP1, <1 419 | bnez TMP1, <1
360 |. addiu BASE, BASE, 8 420 |. addiu BASE, BASE, 8
361 | 421 |
@@ -424,15 +484,16 @@ static void build_subroutines(BuildCtx *ctx)
424 | and sp, CARG1, AT 484 | and sp, CARG1, AT
425 |->vm_unwind_ff_eh: // Landing pad for external unwinder. 485 |->vm_unwind_ff_eh: // Landing pad for external unwinder.
426 | lw L, SAVE_L 486 | lw L, SAVE_L
427 | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 487 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
488 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
428 | li TISNIL, LJ_TNIL 489 | li TISNIL, LJ_TNIL
429 | lw BASE, L->base 490 | lw BASE, L->base
430 | lw DISPATCH, L->glref // Setup pointer to dispatch table. 491 | lw DISPATCH, L->glref // Setup pointer to dispatch table.
431 | mtc1 TMP3, TOBIT 492 | .FPU mtc1 TMP3, TOBIT
432 | li TMP1, LJ_TFALSE 493 | li TMP1, LJ_TFALSE
433 | li_vmstate INTERP 494 | li_vmstate INTERP
434 | lw PC, FRAME_PC(BASE) // Fetch PC of previous frame. 495 | lw PC, FRAME_PC(BASE) // Fetch PC of previous frame.
435 | cvt.d.s TOBIT, TOBIT 496 | .FPU cvt.d.s TOBIT, TOBIT
436 | addiu RA, BASE, -8 // Results start at BASE-8. 497 | addiu RA, BASE, -8 // Results start at BASE-8.
437 | addiu DISPATCH, DISPATCH, GG_G2DISP 498 | addiu DISPATCH, DISPATCH, GG_G2DISP
438 | sw TMP1, HI(RA) // Prepend false to error message. 499 | sw TMP1, HI(RA) // Prepend false to error message.
@@ -486,21 +547,23 @@ static void build_subroutines(BuildCtx *ctx)
486 | addiu DISPATCH, DISPATCH, GG_G2DISP 547 | addiu DISPATCH, DISPATCH, GG_G2DISP
487 | sw r0, SAVE_NRES 548 | sw r0, SAVE_NRES
488 | sw r0, SAVE_ERRF 549 | sw r0, SAVE_ERRF
489 | sw TMP0, L->cframe 550 | sw CARG1, SAVE_PC // Any value outside of bytecode is ok.
490 | sw r0, SAVE_CFRAME 551 | sw r0, SAVE_CFRAME
491 | beqz TMP1, >3 552 | beqz TMP1, >3
492 |. sw CARG1, SAVE_PC // Any value outside of bytecode is ok. 553 |. sw TMP0, L->cframe
493 | 554 |
494 | // Resume after yield (like a return). 555 | // Resume after yield (like a return).
556 | sw L, DISPATCH_GL(cur_L)(DISPATCH)
495 | move RA, BASE 557 | move RA, BASE
496 | lw BASE, L->base 558 | lw BASE, L->base
559 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
497 | lw TMP1, L->top 560 | lw TMP1, L->top
498 | lw PC, FRAME_PC(BASE) 561 | lw PC, FRAME_PC(BASE)
499 | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 562 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
500 | subu RD, TMP1, BASE 563 | subu RD, TMP1, BASE
501 | mtc1 TMP3, TOBIT 564 | .FPU mtc1 TMP3, TOBIT
502 | sb r0, L->status 565 | sb r0, L->status
503 | cvt.d.s TOBIT, TOBIT 566 | .FPU cvt.d.s TOBIT, TOBIT
504 | li_vmstate INTERP 567 | li_vmstate INTERP
505 | addiu RD, RD, 8 568 | addiu RD, RD, 8
506 | st_vmstate 569 | st_vmstate
@@ -525,25 +588,27 @@ static void build_subroutines(BuildCtx *ctx)
525 | 588 |
526 |1: // Entry point for vm_pcall above (PC = ftype). 589 |1: // Entry point for vm_pcall above (PC = ftype).
527 | lw TMP1, L:CARG1->cframe 590 | lw TMP1, L:CARG1->cframe
528 | sw CARG3, SAVE_NRES
529 | move L, CARG1 591 | move L, CARG1
530 | sw CARG1, SAVE_L 592 | sw CARG3, SAVE_NRES
531 | move BASE, CARG2
532 | sw sp, L->cframe // Add our C frame to cframe chain.
533 | lw DISPATCH, L->glref // Setup pointer to dispatch table. 593 | lw DISPATCH, L->glref // Setup pointer to dispatch table.
594 | sw CARG1, SAVE_L
595 | move BASE, CARG2
596 | addiu DISPATCH, DISPATCH, GG_G2DISP
534 | sw CARG1, SAVE_PC // Any value outside of bytecode is ok. 597 | sw CARG1, SAVE_PC // Any value outside of bytecode is ok.
535 | sw TMP1, SAVE_CFRAME 598 | sw TMP1, SAVE_CFRAME
536 | addiu DISPATCH, DISPATCH, GG_G2DISP 599 | sw sp, L->cframe // Add our C frame to cframe chain.
537 | 600 |
538 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). 601 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
602 | sw L, DISPATCH_GL(cur_L)(DISPATCH)
539 | lw TMP2, L->base // TMP2 = old base (used in vmeta_call). 603 | lw TMP2, L->base // TMP2 = old base (used in vmeta_call).
540 | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 604 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
605 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
541 | lw TMP1, L->top 606 | lw TMP1, L->top
542 | mtc1 TMP3, TOBIT 607 | .FPU mtc1 TMP3, TOBIT
543 | addu PC, PC, BASE 608 | addu PC, PC, BASE
544 | subu NARGS8:RC, TMP1, BASE 609 | subu NARGS8:RC, TMP1, BASE
545 | subu PC, PC, TMP2 // PC = frame delta + frame type 610 | subu PC, PC, TMP2 // PC = frame delta + frame type
546 | cvt.d.s TOBIT, TOBIT 611 | .FPU cvt.d.s TOBIT, TOBIT
547 | li_vmstate INTERP 612 | li_vmstate INTERP
548 | li TISNIL, LJ_TNIL 613 | li TISNIL, LJ_TNIL
549 | st_vmstate 614 | st_vmstate
@@ -566,20 +631,21 @@ static void build_subroutines(BuildCtx *ctx)
566 | lw TMP0, L:CARG1->stack 631 | lw TMP0, L:CARG1->stack
567 | sw CARG1, SAVE_L 632 | sw CARG1, SAVE_L
568 | lw TMP1, L->top 633 | lw TMP1, L->top
634 | lw DISPATCH, L->glref // Setup pointer to dispatch table.
569 | sw CARG1, SAVE_PC // Any value outside of bytecode is ok. 635 | sw CARG1, SAVE_PC // Any value outside of bytecode is ok.
570 | subu TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). 636 | subu TMP0, TMP0, TMP1 // Compute -savestack(L, L->top).
571 | lw TMP1, L->cframe 637 | lw TMP1, L->cframe
572 | sw sp, L->cframe // Add our C frame to cframe chain. 638 | addiu DISPATCH, DISPATCH, GG_G2DISP
573 | sw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. 639 | sw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame.
574 | sw r0, SAVE_ERRF // No error function. 640 | sw r0, SAVE_ERRF // No error function.
575 | move CFUNCADDR, CARG4 641 | sw TMP1, SAVE_CFRAME
642 | sw sp, L->cframe // Add our C frame to cframe chain.
643 | sw L, DISPATCH_GL(cur_L)(DISPATCH)
576 | jalr CARG4 // (lua_State *L, lua_CFunction func, void *ud) 644 | jalr CARG4 // (lua_State *L, lua_CFunction func, void *ud)
577 |. sw TMP1, SAVE_CFRAME 645 |. move CFUNCADDR, CARG4
578 | move BASE, CRET1 646 | move BASE, CRET1
579 | lw DISPATCH, L->glref // Setup pointer to dispatch table.
580 | li PC, FRAME_CP
581 | bnez CRET1, <3 // Else continue with the call. 647 | bnez CRET1, <3 // Else continue with the call.
582 |. addiu DISPATCH, DISPATCH, GG_G2DISP 648 |. li PC, FRAME_CP
583 | b ->vm_leave_cp // No base? Just remove C frame. 649 | b ->vm_leave_cp // No base? Just remove C frame.
584 |. nop 650 |. nop
585 | 651 |
@@ -624,7 +690,8 @@ static void build_subroutines(BuildCtx *ctx)
624 |->cont_cat: // RA = resultptr, RB = meta base 690 |->cont_cat: // RA = resultptr, RB = meta base
625 | lw INS, -4(PC) 691 | lw INS, -4(PC)
626 | addiu CARG2, RB, -16 692 | addiu CARG2, RB, -16
627 | ldc1 f0, 0(RA) 693 | lw SFRETHI, HI(RA)
694 | lw SFRETLO, LO(RA)
628 | decode_RB8a MULTRES, INS 695 | decode_RB8a MULTRES, INS
629 | decode_RA8a RA, INS 696 | decode_RA8a RA, INS
630 | decode_RB8b MULTRES 697 | decode_RB8b MULTRES
@@ -632,11 +699,13 @@ static void build_subroutines(BuildCtx *ctx)
632 | addu TMP1, BASE, MULTRES 699 | addu TMP1, BASE, MULTRES
633 | sw BASE, L->base 700 | sw BASE, L->base
634 | subu CARG3, CARG2, TMP1 701 | subu CARG3, CARG2, TMP1
702 | sw SFRETHI, HI(CARG2)
635 | bne TMP1, CARG2, ->BC_CAT_Z 703 | bne TMP1, CARG2, ->BC_CAT_Z
636 |. sdc1 f0, 0(CARG2) 704 |. sw SFRETLO, LO(CARG2)
637 | addu RA, BASE, RA 705 | addu RA, BASE, RA
706 | sw SFRETHI, HI(RA)
638 | b ->cont_nop 707 | b ->cont_nop
639 |. sdc1 f0, 0(RA) 708 |. sw SFRETLO, LO(RA)
640 | 709 |
641 |//-- Table indexing metamethods ----------------------------------------- 710 |//-- Table indexing metamethods -----------------------------------------
642 | 711 |
@@ -659,10 +728,9 @@ static void build_subroutines(BuildCtx *ctx)
659 |. sw TMP1, HI(CARG3) 728 |. sw TMP1, HI(CARG3)
660 | 729 |
661 |->vmeta_tgetb: // TMP0 = index 730 |->vmeta_tgetb: // TMP0 = index
662 | mtc1 TMP0, f0
663 | cvt.d.w f0, f0
664 | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) 731 | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
665 | sdc1 f0, 0(CARG3) 732 | sw TMP0, LO(CARG3)
733 | sw TISNUM, HI(CARG3)
666 | 734 |
667 |->vmeta_tgetv: 735 |->vmeta_tgetv:
668 |1: 736 |1:
@@ -674,9 +742,11 @@ static void build_subroutines(BuildCtx *ctx)
674 | // Returns TValue * (finished) or NULL (metamethod). 742 | // Returns TValue * (finished) or NULL (metamethod).
675 | beqz CRET1, >3 743 | beqz CRET1, >3
676 |. addiu TMP1, BASE, -FRAME_CONT 744 |. addiu TMP1, BASE, -FRAME_CONT
677 | ldc1 f0, 0(CRET1) 745 | lw SFARG1HI, HI(CRET1)
746 | lw SFARG2HI, LO(CRET1)
678 | ins_next1 747 | ins_next1
679 | sdc1 f0, 0(RA) 748 | sw SFARG1HI, HI(RA)
749 | sw SFARG2HI, LO(RA)
680 | ins_next2 750 | ins_next2
681 | 751 |
682 |3: // Call __index metamethod. 752 |3: // Call __index metamethod.
@@ -688,6 +758,17 @@ static void build_subroutines(BuildCtx *ctx)
688 | b ->vm_call_dispatch_f 758 | b ->vm_call_dispatch_f
689 |. li NARGS8:RC, 16 // 2 args for func(t, k). 759 |. li NARGS8:RC, 16 // 2 args for func(t, k).
690 | 760 |
761 |->vmeta_tgetr:
762 | load_got lj_tab_getinth
763 | call_intern lj_tab_getinth // (GCtab *t, int32_t key)
764 |. nop
765 | // Returns cTValue * or NULL.
766 | beqz CRET1, ->BC_TGETR_Z
767 |. move SFARG2HI, TISNIL
768 | lw SFARG2HI, HI(CRET1)
769 | b ->BC_TGETR_Z
770 |. lw SFARG2LO, LO(CRET1)
771 |
691 |//----------------------------------------------------------------------- 772 |//-----------------------------------------------------------------------
692 | 773 |
693 |->vmeta_tsets1: 774 |->vmeta_tsets1:
@@ -709,10 +790,9 @@ static void build_subroutines(BuildCtx *ctx)
709 |. sw TMP1, HI(CARG3) 790 |. sw TMP1, HI(CARG3)
710 | 791 |
711 |->vmeta_tsetb: // TMP0 = index 792 |->vmeta_tsetb: // TMP0 = index
712 | mtc1 TMP0, f0
713 | cvt.d.w f0, f0
714 | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) 793 | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
715 | sdc1 f0, 0(CARG3) 794 | sw TMP0, LO(CARG3)
795 | sw TISNUM, HI(CARG3)
716 | 796 |
717 |->vmeta_tsetv: 797 |->vmeta_tsetv:
718 |1: 798 |1:
@@ -722,11 +802,13 @@ static void build_subroutines(BuildCtx *ctx)
722 | call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) 802 | call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
723 |. move CARG1, L 803 |. move CARG1, L
724 | // Returns TValue * (finished) or NULL (metamethod). 804 | // Returns TValue * (finished) or NULL (metamethod).
805 | lw SFARG1HI, HI(RA)
725 | beqz CRET1, >3 806 | beqz CRET1, >3
726 |. ldc1 f0, 0(RA) 807 |. lw SFARG1LO, LO(RA)
727 | // NOBARRIER: lj_meta_tset ensures the table is not black. 808 | // NOBARRIER: lj_meta_tset ensures the table is not black.
728 | ins_next1 809 | ins_next1
729 | sdc1 f0, 0(CRET1) 810 | sw SFARG1HI, HI(CRET1)
811 | sw SFARG1LO, LO(CRET1)
730 | ins_next2 812 | ins_next2
731 | 813 |
732 |3: // Call __newindex metamethod. 814 |3: // Call __newindex metamethod.
@@ -736,14 +818,27 @@ static void build_subroutines(BuildCtx *ctx)
736 | sw PC, -16+HI(BASE) // [cont|PC] 818 | sw PC, -16+HI(BASE) // [cont|PC]
737 | subu PC, BASE, TMP1 819 | subu PC, BASE, TMP1
738 | lw LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. 820 | lw LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
739 | sdc1 f0, 16(BASE) // Copy value to third argument. 821 | sw SFARG1HI, 16+HI(BASE) // Copy value to third argument.
822 | sw SFARG1LO, 16+LO(BASE)
740 | b ->vm_call_dispatch_f 823 | b ->vm_call_dispatch_f
741 |. li NARGS8:RC, 24 // 3 args for func(t, k, v) 824 |. li NARGS8:RC, 24 // 3 args for func(t, k, v)
742 | 825 |
826 |->vmeta_tsetr:
827 | load_got lj_tab_setinth
828 | sw BASE, L->base
829 | sw PC, SAVE_PC
830 | call_intern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
831 |. move CARG1, L
832 | // Returns TValue *.
833 | b ->BC_TSETR_Z
834 |. nop
835 |
743 |//-- Comparison metamethods --------------------------------------------- 836 |//-- Comparison metamethods ---------------------------------------------
744 | 837 |
745 |->vmeta_comp: 838 |->vmeta_comp:
746 | // CARG2, CARG3 are already set by BC_ISLT/BC_ISGE/BC_ISLE/BC_ISGT. 839 | // RA/RD point to o1/o2.
840 | move CARG2, RA
841 | move CARG3, RD
747 | load_got lj_meta_comp 842 | load_got lj_meta_comp
748 | addiu PC, PC, -4 843 | addiu PC, PC, -4
749 | sw BASE, L->base 844 | sw BASE, L->base
@@ -769,11 +864,13 @@ static void build_subroutines(BuildCtx *ctx)
769 | 864 |
770 |->cont_ra: // RA = resultptr 865 |->cont_ra: // RA = resultptr
771 | lbu TMP1, -4+OFS_RA(PC) 866 | lbu TMP1, -4+OFS_RA(PC)
772 | ldc1 f0, 0(RA) 867 | lw SFRETHI, HI(RA)
868 | lw SFRETLO, LO(RA)
773 | sll TMP1, TMP1, 3 869 | sll TMP1, TMP1, 3
774 | addu TMP1, BASE, TMP1 870 | addu TMP1, BASE, TMP1
871 | sw SFRETHI, HI(TMP1)
775 | b ->cont_nop 872 | b ->cont_nop
776 |. sdc1 f0, 0(TMP1) 873 |. sw SFRETLO, LO(TMP1)
777 | 874 |
778 |->cont_condt: // RA = resultptr 875 |->cont_condt: // RA = resultptr
779 | lw TMP0, HI(RA) 876 | lw TMP0, HI(RA)
@@ -788,8 +885,11 @@ static void build_subroutines(BuildCtx *ctx)
788 |. addiu TMP2, AT, -1 // Branch if result is false. 885 |. addiu TMP2, AT, -1 // Branch if result is false.
789 | 886 |
790 |->vmeta_equal: 887 |->vmeta_equal:
791 | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV. 888 | // SFARG1LO/SFARG2LO point to o1/o2. TMP0 is set to 0/1.
792 | load_got lj_meta_equal 889 | load_got lj_meta_equal
890 | move CARG2, SFARG1LO
891 | move CARG3, SFARG2LO
892 | move CARG4, TMP0
793 | addiu PC, PC, -4 893 | addiu PC, PC, -4
794 | sw BASE, L->base 894 | sw BASE, L->base
795 | sw PC, SAVE_PC 895 | sw PC, SAVE_PC
@@ -813,17 +913,31 @@ static void build_subroutines(BuildCtx *ctx)
813 |. nop 913 |. nop
814 |.endif 914 |.endif
815 | 915 |
916 |->vmeta_istype:
917 | load_got lj_meta_istype
918 | addiu PC, PC, -4
919 | sw BASE, L->base
920 | srl CARG2, RA, 3
921 | srl CARG3, RD, 3
922 | sw PC, SAVE_PC
923 | call_intern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
924 |. move CARG1, L
925 | b ->cont_nop
926 |. nop
927 |
816 |//-- Arithmetic metamethods --------------------------------------------- 928 |//-- Arithmetic metamethods ---------------------------------------------
817 | 929 |
818 |->vmeta_unm: 930 |->vmeta_unm:
819 | move CARG4, CARG3 931 | move RC, RB
820 | 932 |
821 |->vmeta_arith: 933 |->vmeta_arith:
822 | load_got lj_meta_arith 934 | load_got lj_meta_arith
823 | decode_OP1 TMP0, INS 935 | decode_OP1 TMP0, INS
824 | sw BASE, L->base 936 | sw BASE, L->base
825 | sw PC, SAVE_PC
826 | move CARG2, RA 937 | move CARG2, RA
938 | sw PC, SAVE_PC
939 | move CARG3, RB
940 | move CARG4, RC
827 | sw TMP0, ARG5 941 | sw TMP0, ARG5
828 | call_intern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) 942 | call_intern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
829 |. move CARG1, L 943 |. move CARG1, L
@@ -931,40 +1045,52 @@ static void build_subroutines(BuildCtx *ctx)
931 | 1045 |
932 |.macro .ffunc_1, name 1046 |.macro .ffunc_1, name
933 |->ff_ .. name: 1047 |->ff_ .. name:
1048 | lw SFARG1HI, HI(BASE)
934 | beqz NARGS8:RC, ->fff_fallback 1049 | beqz NARGS8:RC, ->fff_fallback
935 |. lw CARG3, HI(BASE) 1050 |. lw SFARG1LO, LO(BASE)
936 | lw CARG1, LO(BASE)
937 |.endmacro 1051 |.endmacro
938 | 1052 |
939 |.macro .ffunc_2, name 1053 |.macro .ffunc_2, name
940 |->ff_ .. name: 1054 |->ff_ .. name:
941 | sltiu AT, NARGS8:RC, 16 1055 | sltiu AT, NARGS8:RC, 16
942 | lw CARG3, HI(BASE) 1056 | lw SFARG1HI, HI(BASE)
943 | bnez AT, ->fff_fallback 1057 | bnez AT, ->fff_fallback
944 |. lw CARG4, 8+HI(BASE) 1058 |. lw SFARG2HI, 8+HI(BASE)
945 | lw CARG1, LO(BASE) 1059 | lw SFARG1LO, LO(BASE)
946 | lw CARG2, 8+LO(BASE) 1060 | lw SFARG2LO, 8+LO(BASE)
947 |.endmacro 1061 |.endmacro
948 | 1062 |
949 |.macro .ffunc_n, name // Caveat: has delay slot! 1063 |.macro .ffunc_n, name // Caveat: has delay slot!
950 |->ff_ .. name: 1064 |->ff_ .. name:
951 | lw CARG3, HI(BASE) 1065 | lw SFARG1HI, HI(BASE)
1066 |.if FPU
1067 | ldc1 FARG1, 0(BASE)
1068 |.else
1069 | lw SFARG1LO, LO(BASE)
1070 |.endif
952 | beqz NARGS8:RC, ->fff_fallback 1071 | beqz NARGS8:RC, ->fff_fallback
953 |. ldc1 FARG1, 0(BASE) 1072 |. sltiu AT, SFARG1HI, LJ_TISNUM
954 | sltiu AT, CARG3, LJ_TISNUM
955 | beqz AT, ->fff_fallback 1073 | beqz AT, ->fff_fallback
956 |.endmacro 1074 |.endmacro
957 | 1075 |
958 |.macro .ffunc_nn, name // Caveat: has delay slot! 1076 |.macro .ffunc_nn, name // Caveat: has delay slot!
959 |->ff_ .. name: 1077 |->ff_ .. name:
960 | sltiu AT, NARGS8:RC, 16 1078 | sltiu AT, NARGS8:RC, 16
961 | lw CARG3, HI(BASE) 1079 | lw SFARG1HI, HI(BASE)
962 | bnez AT, ->fff_fallback 1080 | bnez AT, ->fff_fallback
963 |. lw CARG4, 8+HI(BASE) 1081 |. lw SFARG2HI, 8+HI(BASE)
964 | ldc1 FARG1, 0(BASE) 1082 | sltiu TMP0, SFARG1HI, LJ_TISNUM
965 | ldc1 FARG2, 8(BASE) 1083 |.if FPU
966 | sltiu TMP0, CARG3, LJ_TISNUM 1084 | ldc1 FARG1, 0(BASE)
967 | sltiu TMP1, CARG4, LJ_TISNUM 1085 |.else
1086 | lw SFARG1LO, LO(BASE)
1087 |.endif
1088 | sltiu TMP1, SFARG2HI, LJ_TISNUM
1089 |.if FPU
1090 | ldc1 FARG2, 8(BASE)
1091 |.else
1092 | lw SFARG2LO, 8+LO(BASE)
1093 |.endif
968 | and TMP0, TMP0, TMP1 1094 | and TMP0, TMP0, TMP1
969 | beqz TMP0, ->fff_fallback 1095 | beqz TMP0, ->fff_fallback
970 |.endmacro 1096 |.endmacro
@@ -980,52 +1106,54 @@ static void build_subroutines(BuildCtx *ctx)
980 |//-- Base library: checks ----------------------------------------------- 1106 |//-- Base library: checks -----------------------------------------------
981 | 1107 |
982 |.ffunc_1 assert 1108 |.ffunc_1 assert
983 | sltiu AT, CARG3, LJ_TISTRUECOND 1109 | sltiu AT, SFARG1HI, LJ_TISTRUECOND
984 | beqz AT, ->fff_fallback 1110 | beqz AT, ->fff_fallback
985 |. addiu RA, BASE, -8 1111 |. addiu RA, BASE, -8
986 | lw PC, FRAME_PC(BASE) 1112 | lw PC, FRAME_PC(BASE)
987 | addiu RD, NARGS8:RC, 8 // Compute (nresults+1)*8. 1113 | addiu RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
988 | addu TMP2, RA, NARGS8:RC 1114 | addu TMP2, RA, NARGS8:RC
989 | sw CARG3, HI(RA) 1115 | sw SFARG1HI, HI(RA)
990 | addiu TMP1, BASE, 8 1116 | addiu TMP1, BASE, 8
991 | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument. 1117 | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument.
992 |. sw CARG1, LO(RA) 1118 |. sw SFARG1LO, LO(RA)
993 |1: 1119 |1:
994 | ldc1 f0, 0(TMP1) 1120 | lw SFRETHI, HI(TMP1)
995 | sdc1 f0, -8(TMP1) 1121 | lw SFRETLO, LO(TMP1)
1122 | sw SFRETHI, -8+HI(TMP1)
1123 | sw SFRETLO, -8+LO(TMP1)
996 | bne TMP1, TMP2, <1 1124 | bne TMP1, TMP2, <1
997 |. addiu TMP1, TMP1, 8 1125 |. addiu TMP1, TMP1, 8
998 | b ->fff_res 1126 | b ->fff_res
999 |. nop 1127 |. nop
1000 | 1128 |
1001 |.ffunc type 1129 |.ffunc type
1002 | lw CARG3, HI(BASE) 1130 | lw SFARG1HI, HI(BASE)
1003 | li TMP1, LJ_TISNUM
1004 | beqz NARGS8:RC, ->fff_fallback 1131 | beqz NARGS8:RC, ->fff_fallback
1005 |. sltiu TMP0, CARG3, LJ_TISNUM 1132 |. sltiu TMP0, SFARG1HI, LJ_TISNUM
1006 | movz TMP1, CARG3, TMP0 1133 | movn SFARG1HI, TISNUM, TMP0
1007 | not TMP1, TMP1 1134 | not TMP1, SFARG1HI
1008 | sll TMP1, TMP1, 3 1135 | sll TMP1, TMP1, 3
1009 | addu TMP1, CFUNC:RB, TMP1 1136 | addu TMP1, CFUNC:RB, TMP1
1010 | b ->fff_resn 1137 | lw SFARG1HI, CFUNC:TMP1->upvalue[0].u32.hi
1011 |. ldc1 FRET1, CFUNC:TMP1->upvalue 1138 | b ->fff_restv
1139 |. lw SFARG1LO, CFUNC:TMP1->upvalue[0].u32.lo
1012 | 1140 |
1013 |//-- Base library: getters and setters --------------------------------- 1141 |//-- Base library: getters and setters ---------------------------------
1014 | 1142 |
1015 |.ffunc_1 getmetatable 1143 |.ffunc_1 getmetatable
1016 | li AT, LJ_TTAB 1144 | li AT, LJ_TTAB
1017 | bne CARG3, AT, >6 1145 | bne SFARG1HI, AT, >6
1018 |. li AT, LJ_TUDATA 1146 |. li AT, LJ_TUDATA
1019 |1: // Field metatable must be at same offset for GCtab and GCudata! 1147 |1: // Field metatable must be at same offset for GCtab and GCudata!
1020 | lw TAB:CARG1, TAB:CARG1->metatable 1148 | lw TAB:SFARG1LO, TAB:SFARG1LO->metatable
1021 |2: 1149 |2:
1022 | lw STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH) 1150 | lw STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH)
1023 | beqz TAB:CARG1, ->fff_restv 1151 | beqz TAB:SFARG1LO, ->fff_restv
1024 |. li CARG3, LJ_TNIL 1152 |. li SFARG1HI, LJ_TNIL
1025 | lw TMP0, TAB:CARG1->hmask 1153 | lw TMP0, TAB:SFARG1LO->hmask
1026 | li CARG3, LJ_TTAB // Use metatable as default result. 1154 | li SFARG1HI, LJ_TTAB // Use metatable as default result.
1027 | lw TMP1, STR:RC->hash 1155 | lw TMP1, STR:RC->hash
1028 | lw NODE:TMP2, TAB:CARG1->node 1156 | lw NODE:TMP2, TAB:SFARG1LO->node
1029 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask 1157 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
1030 | sll TMP0, TMP1, 5 1158 | sll TMP0, TMP1, 5
1031 | sll TMP1, TMP1, 3 1159 | sll TMP1, TMP1, 3
@@ -1037,7 +1165,7 @@ static void build_subroutines(BuildCtx *ctx)
1037 | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) 1165 | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2)
1038 | lw NODE:TMP3, NODE:TMP2->next 1166 | lw NODE:TMP3, NODE:TMP2->next
1039 | bne CARG4, AT, >4 1167 | bne CARG4, AT, >4
1040 |. lw CARG2, offsetof(Node, val)+HI(NODE:TMP2) 1168 |. lw CARG3, offsetof(Node, val)+HI(NODE:TMP2)
1041 | beq TMP0, STR:RC, >5 1169 | beq TMP0, STR:RC, >5
1042 |. lw TMP1, offsetof(Node, val)+LO(NODE:TMP2) 1170 |. lw TMP1, offsetof(Node, val)+LO(NODE:TMP2)
1043 |4: 1171 |4:
@@ -1046,36 +1174,35 @@ static void build_subroutines(BuildCtx *ctx)
1046 | b <3 1174 | b <3
1047 |. nop 1175 |. nop
1048 |5: 1176 |5:
1049 | beq CARG2, TISNIL, ->fff_restv // Ditto for nil value. 1177 | beq CARG3, TISNIL, ->fff_restv // Ditto for nil value.
1050 |. nop 1178 |. nop
1051 | move CARG3, CARG2 // Return value of mt.__metatable. 1179 | move SFARG1HI, CARG3 // Return value of mt.__metatable.
1052 | b ->fff_restv 1180 | b ->fff_restv
1053 |. move CARG1, TMP1 1181 |. move SFARG1LO, TMP1
1054 | 1182 |
1055 |6: 1183 |6:
1056 | beq CARG3, AT, <1 1184 | beq SFARG1HI, AT, <1
1057 |. sltiu TMP0, CARG3, LJ_TISNUM 1185 |. sltu AT, TISNUM, SFARG1HI
1058 | li TMP1, LJ_TISNUM 1186 | movz SFARG1HI, TISNUM, AT
1059 | movz TMP1, CARG3, TMP0 1187 | not TMP1, SFARG1HI
1060 | not TMP1, TMP1
1061 | sll TMP1, TMP1, 2 1188 | sll TMP1, TMP1, 2
1062 | addu TMP1, DISPATCH, TMP1 1189 | addu TMP1, DISPATCH, TMP1
1063 | b <2 1190 | b <2
1064 |. lw TAB:CARG1, DISPATCH_GL(gcroot[GCROOT_BASEMT])(TMP1) 1191 |. lw TAB:SFARG1LO, DISPATCH_GL(gcroot[GCROOT_BASEMT])(TMP1)
1065 | 1192 |
1066 |.ffunc_2 setmetatable 1193 |.ffunc_2 setmetatable
1067 | // Fast path: no mt for table yet and not clearing the mt. 1194 | // Fast path: no mt for table yet and not clearing the mt.
1068 | li AT, LJ_TTAB 1195 | li AT, LJ_TTAB
1069 | bne CARG3, AT, ->fff_fallback 1196 | bne SFARG1HI, AT, ->fff_fallback
1070 |. addiu CARG4, CARG4, -LJ_TTAB 1197 |. addiu SFARG2HI, SFARG2HI, -LJ_TTAB
1071 | lw TAB:TMP1, TAB:CARG1->metatable 1198 | lw TAB:TMP1, TAB:SFARG1LO->metatable
1072 | lbu TMP3, TAB:CARG1->marked 1199 | lbu TMP3, TAB:SFARG1LO->marked
1073 | or AT, CARG4, TAB:TMP1 1200 | or AT, SFARG2HI, TAB:TMP1
1074 | bnez AT, ->fff_fallback 1201 | bnez AT, ->fff_fallback
1075 |. andi AT, TMP3, LJ_GC_BLACK // isblack(table) 1202 |. andi AT, TMP3, LJ_GC_BLACK // isblack(table)
1076 | beqz AT, ->fff_restv 1203 | beqz AT, ->fff_restv
1077 |. sw TAB:CARG2, TAB:CARG1->metatable 1204 |. sw TAB:SFARG2LO, TAB:SFARG1LO->metatable
1078 | barrierback TAB:CARG1, TMP3, TMP0, ->fff_restv 1205 | barrierback TAB:SFARG1LO, TMP3, TMP0, ->fff_restv
1079 | 1206 |
1080 |.ffunc rawget 1207 |.ffunc rawget
1081 | lw CARG4, HI(BASE) 1208 | lw CARG4, HI(BASE)
@@ -1089,44 +1216,44 @@ static void build_subroutines(BuildCtx *ctx)
1089 | call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) 1216 | call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1090 |. move CARG1, L 1217 |. move CARG1, L
1091 | // Returns cTValue *. 1218 | // Returns cTValue *.
1092 | b ->fff_resn 1219 | lw SFARG1HI, HI(CRET1)
1093 |. ldc1 FRET1, 0(CRET1) 1220 | b ->fff_restv
1221 |. lw SFARG1LO, LO(CRET1)
1094 | 1222 |
1095 |//-- Base library: conversions ------------------------------------------ 1223 |//-- Base library: conversions ------------------------------------------
1096 | 1224 |
1097 |.ffunc tonumber 1225 |.ffunc tonumber
1098 | // Only handles the number case inline (without a base argument). 1226 | // Only handles the number case inline (without a base argument).
1099 | lw CARG1, HI(BASE) 1227 | lw CARG1, HI(BASE)
1100 | xori AT, NARGS8:RC, 8 1228 | xori AT, NARGS8:RC, 8 // Exactly one number argument.
1101 | sltiu CARG1, CARG1, LJ_TISNUM 1229 | sltu TMP0, TISNUM, CARG1
1102 | movn CARG1, r0, AT 1230 | or AT, AT, TMP0
1103 | beqz CARG1, ->fff_fallback // Exactly one number argument. 1231 | bnez AT, ->fff_fallback
1104 |. ldc1 FRET1, 0(BASE) 1232 |. lw SFARG1HI, HI(BASE)
1105 | b ->fff_resn 1233 | b ->fff_restv
1106 |. nop 1234 |. lw SFARG1LO, LO(BASE)
1107 | 1235 |
1108 |.ffunc_1 tostring 1236 |.ffunc_1 tostring
1109 | // Only handles the string or number case inline. 1237 | // Only handles the string or number case inline.
1110 | li AT, LJ_TSTR 1238 | li AT, LJ_TSTR
1111 | // A __tostring method in the string base metatable is ignored. 1239 | // A __tostring method in the string base metatable is ignored.
1112 | beq CARG3, AT, ->fff_restv // String key? 1240 | beq SFARG1HI, AT, ->fff_restv // String key?
1113 | // Handle numbers inline, unless a number base metatable is present. 1241 | // Handle numbers inline, unless a number base metatable is present.
1114 |. lw TMP1, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH) 1242 |. lw TMP1, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH)
1115 | sltiu TMP0, CARG3, LJ_TISNUM 1243 | sltu TMP0, TISNUM, SFARG1HI
1116 | sltiu TMP1, TMP1, 1 1244 | or TMP0, TMP0, TMP1
1117 | and TMP0, TMP0, TMP1 1245 | bnez TMP0, ->fff_fallback
1118 | beqz TMP0, ->fff_fallback
1119 |. sw BASE, L->base // Add frame since C call can throw. 1246 |. sw BASE, L->base // Add frame since C call can throw.
1120 | ffgccheck 1247 | ffgccheck
1121 |. sw PC, SAVE_PC // Redundant (but a defined value). 1248 |. sw PC, SAVE_PC // Redundant (but a defined value).
1122 | load_got lj_str_fromnum 1249 | load_got lj_strfmt_number
1123 | move CARG1, L 1250 | move CARG1, L
1124 | call_intern lj_str_fromnum // (lua_State *L, lua_Number *np) 1251 | call_intern lj_strfmt_number // (lua_State *L, cTValue *o)
1125 |. move CARG2, BASE 1252 |. move CARG2, BASE
1126 | // Returns GCstr *. 1253 | // Returns GCstr *.
1127 | li CARG3, LJ_TSTR 1254 | li SFARG1HI, LJ_TSTR
1128 | b ->fff_restv 1255 | b ->fff_restv
1129 |. move CARG1, CRET1 1256 |. move SFARG1LO, CRET1
1130 | 1257 |
1131 |//-- Base library: iterators ------------------------------------------- 1258 |//-- Base library: iterators -------------------------------------------
1132 | 1259 |
@@ -1148,31 +1275,38 @@ static void build_subroutines(BuildCtx *ctx)
1148 |. move CARG1, L 1275 |. move CARG1, L
1149 | // Returns 0 at end of traversal. 1276 | // Returns 0 at end of traversal.
1150 | beqz CRET1, ->fff_restv // End of traversal: return nil. 1277 | beqz CRET1, ->fff_restv // End of traversal: return nil.
1151 |. li CARG3, LJ_TNIL 1278 |. li SFARG1HI, LJ_TNIL
1152 | ldc1 f0, 8(BASE) // Copy key and value to results. 1279 | lw TMP0, 8+HI(BASE)
1280 | lw TMP1, 8+LO(BASE)
1153 | addiu RA, BASE, -8 1281 | addiu RA, BASE, -8
1154 | ldc1 f2, 16(BASE) 1282 | lw TMP2, 16+HI(BASE)
1155 | li RD, (2+1)*8 1283 | lw TMP3, 16+LO(BASE)
1156 | sdc1 f0, 0(RA) 1284 | sw TMP0, HI(RA)
1285 | sw TMP1, LO(RA)
1286 | sw TMP2, 8+HI(RA)
1287 | sw TMP3, 8+LO(RA)
1157 | b ->fff_res 1288 | b ->fff_res
1158 |. sdc1 f2, 8(RA) 1289 |. li RD, (2+1)*8
1159 | 1290 |
1160 |.ffunc_1 pairs 1291 |.ffunc_1 pairs
1161 | li AT, LJ_TTAB 1292 | li AT, LJ_TTAB
1162 | bne CARG3, AT, ->fff_fallback 1293 | bne SFARG1HI, AT, ->fff_fallback
1163 |. lw PC, FRAME_PC(BASE) 1294 |. lw PC, FRAME_PC(BASE)
1164#if LJ_52 1295#if LJ_52
1165 | lw TAB:TMP2, TAB:CARG1->metatable 1296 | lw TAB:TMP2, TAB:SFARG1LO->metatable
1166 | ldc1 f0, CFUNC:RB->upvalue[0] 1297 | lw TMP0, CFUNC:RB->upvalue[0].u32.hi
1298 | lw TMP1, CFUNC:RB->upvalue[0].u32.lo
1167 | bnez TAB:TMP2, ->fff_fallback 1299 | bnez TAB:TMP2, ->fff_fallback
1168#else 1300#else
1169 | ldc1 f0, CFUNC:RB->upvalue[0] 1301 | lw TMP0, CFUNC:RB->upvalue[0].u32.hi
1302 | lw TMP1, CFUNC:RB->upvalue[0].u32.lo
1170#endif 1303#endif
1171 |. addiu RA, BASE, -8 1304 |. addiu RA, BASE, -8
1172 | sw TISNIL, 8+HI(BASE) 1305 | sw TISNIL, 8+HI(BASE)
1173 | li RD, (3+1)*8 1306 | sw TMP0, HI(RA)
1307 | sw TMP1, LO(RA)
1174 | b ->fff_res 1308 | b ->fff_res
1175 |. sdc1 f0, 0(RA) 1309 |. li RD, (3+1)*8
1176 | 1310 |
1177 |.ffunc ipairs_aux 1311 |.ffunc ipairs_aux
1178 | sltiu AT, NARGS8:RC, 16 1312 | sltiu AT, NARGS8:RC, 16
@@ -1180,35 +1314,32 @@ static void build_subroutines(BuildCtx *ctx)
1180 | lw TAB:CARG1, LO(BASE) 1314 | lw TAB:CARG1, LO(BASE)
1181 | lw CARG4, 8+HI(BASE) 1315 | lw CARG4, 8+HI(BASE)
1182 | bnez AT, ->fff_fallback 1316 | bnez AT, ->fff_fallback
1183 |. ldc1 FARG2, 8(BASE) 1317 |. addiu CARG3, CARG3, -LJ_TTAB
1184 | addiu CARG3, CARG3, -LJ_TTAB 1318 | xor CARG4, CARG4, TISNUM
1185 | sltiu AT, CARG4, LJ_TISNUM 1319 | and AT, CARG3, CARG4
1186 | li TMP0, 1 1320 | bnez AT, ->fff_fallback
1187 | movn AT, r0, CARG3
1188 | mtc1 TMP0, FARG1
1189 | beqz AT, ->fff_fallback
1190 |. lw PC, FRAME_PC(BASE) 1321 |. lw PC, FRAME_PC(BASE)
1191 | cvt.w.d FRET1, FARG2 1322 | lw TMP2, 8+LO(BASE)
1192 | cvt.d.w FARG1, FARG1
1193 | lw TMP0, TAB:CARG1->asize 1323 | lw TMP0, TAB:CARG1->asize
1194 | lw TMP1, TAB:CARG1->array 1324 | lw TMP1, TAB:CARG1->array
1195 | mfc1 TMP2, FRET1
1196 | addiu RA, BASE, -8
1197 | add.d FARG2, FARG2, FARG1
1198 | addiu TMP2, TMP2, 1 1325 | addiu TMP2, TMP2, 1
1326 | sw TISNUM, -8+HI(BASE)
1199 | sltu AT, TMP2, TMP0 1327 | sltu AT, TMP2, TMP0
1328 | sw TMP2, -8+LO(BASE)
1329 | beqz AT, >2 // Not in array part?
1330 |. addiu RA, BASE, -8
1200 | sll TMP3, TMP2, 3 1331 | sll TMP3, TMP2, 3
1201 | addu TMP3, TMP1, TMP3 1332 | addu TMP3, TMP1, TMP3
1202 | beqz AT, >2 // Not in array part? 1333 | lw TMP1, HI(TMP3)
1203 |. sdc1 FARG2, 0(RA) 1334 | lw TMP2, LO(TMP3)
1204 | lw TMP2, HI(TMP3)
1205 | ldc1 f0, 0(TMP3)
1206 |1: 1335 |1:
1207 | beq TMP2, TISNIL, ->fff_res // End of iteration, return 0 results. 1336 | beq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results.
1208 |. li RD, (0+1)*8 1337 |. li RD, (0+1)*8
1209 | li RD, (2+1)*8 1338 | sw TMP1, 8+HI(RA)
1339 | sw TMP2, 8+LO(RA)
1210 | b ->fff_res 1340 | b ->fff_res
1211 |. sdc1 f0, 8(RA) 1341 |. li RD, (2+1)*8
1342 |
1212 |2: // Check for empty hash part first. Otherwise call C function. 1343 |2: // Check for empty hash part first. Otherwise call C function.
1213 | lw TMP0, TAB:CARG1->hmask 1344 | lw TMP0, TAB:CARG1->hmask
1214 | load_got lj_tab_getinth 1345 | load_got lj_tab_getinth
@@ -1219,27 +1350,30 @@ static void build_subroutines(BuildCtx *ctx)
1219 | // Returns cTValue * or NULL. 1350 | // Returns cTValue * or NULL.
1220 | beqz CRET1, ->fff_res 1351 | beqz CRET1, ->fff_res
1221 |. li RD, (0+1)*8 1352 |. li RD, (0+1)*8
1222 | lw TMP2, HI(CRET1) 1353 | lw TMP1, HI(CRET1)
1223 | b <1 1354 | b <1
1224 |. ldc1 f0, 0(CRET1) 1355 |. lw TMP2, LO(CRET1)
1225 | 1356 |
1226 |.ffunc_1 ipairs 1357 |.ffunc_1 ipairs
1227 | li AT, LJ_TTAB 1358 | li AT, LJ_TTAB
1228 | bne CARG3, AT, ->fff_fallback 1359 | bne SFARG1HI, AT, ->fff_fallback
1229 |. lw PC, FRAME_PC(BASE) 1360 |. lw PC, FRAME_PC(BASE)
1230#if LJ_52 1361#if LJ_52
1231 | lw TAB:TMP2, TAB:CARG1->metatable 1362 | lw TAB:TMP2, TAB:SFARG1LO->metatable
1232 | ldc1 f0, CFUNC:RB->upvalue[0] 1363 | lw TMP0, CFUNC:RB->upvalue[0].u32.hi
1364 | lw TMP1, CFUNC:RB->upvalue[0].u32.lo
1233 | bnez TAB:TMP2, ->fff_fallback 1365 | bnez TAB:TMP2, ->fff_fallback
1234#else 1366#else
1235 | ldc1 f0, CFUNC:RB->upvalue[0] 1367 | lw TMP0, CFUNC:RB->upvalue[0].u32.hi
1368 | lw TMP1, CFUNC:RB->upvalue[0].u32.lo
1236#endif 1369#endif
1237 |. addiu RA, BASE, -8 1370 |. addiu RA, BASE, -8
1238 | sw r0, 8+HI(BASE) 1371 | sw TISNUM, 8+HI(BASE)
1239 | sw r0, 8+LO(BASE) 1372 | sw r0, 8+LO(BASE)
1240 | li RD, (3+1)*8 1373 | sw TMP0, HI(RA)
1374 | sw TMP1, LO(RA)
1241 | b ->fff_res 1375 | b ->fff_res
1242 |. sdc1 f0, 0(RA) 1376 |. li RD, (3+1)*8
1243 | 1377 |
1244 |//-- Base library: catch errors ---------------------------------------- 1378 |//-- Base library: catch errors ----------------------------------------
1245 | 1379 |
@@ -1259,8 +1393,9 @@ static void build_subroutines(BuildCtx *ctx)
1259 | sltiu AT, NARGS8:RC, 16 1393 | sltiu AT, NARGS8:RC, 16
1260 | lw CARG4, 8+HI(BASE) 1394 | lw CARG4, 8+HI(BASE)
1261 | bnez AT, ->fff_fallback 1395 | bnez AT, ->fff_fallback
1262 |. ldc1 FARG2, 8(BASE) 1396 |. lw CARG3, 8+LO(BASE)
1263 | ldc1 FARG1, 0(BASE) 1397 | lw CARG1, LO(BASE)
1398 | lw CARG2, HI(BASE)
1264 | lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH) 1399 | lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH)
1265 | li AT, LJ_TFUNC 1400 | li AT, LJ_TFUNC
1266 | move TMP2, BASE 1401 | move TMP2, BASE
@@ -1268,9 +1403,11 @@ static void build_subroutines(BuildCtx *ctx)
1268 | addiu BASE, BASE, 16 1403 | addiu BASE, BASE, 16
1269 | // Remember active hook before pcall. 1404 | // Remember active hook before pcall.
1270 | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT 1405 | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT
1271 | sdc1 FARG2, 0(TMP2) // Swap function and traceback. 1406 | sw CARG3, LO(TMP2) // Swap function and traceback.
1407 | sw CARG4, HI(TMP2)
1272 | andi TMP3, TMP3, 1 1408 | andi TMP3, TMP3, 1
1273 | sdc1 FARG1, 8(TMP2) 1409 | sw CARG1, 8+LO(TMP2)
1410 | sw CARG2, 8+HI(TMP2)
1274 | addiu PC, TMP3, 16+FRAME_PCALL 1411 | addiu PC, TMP3, 16+FRAME_PCALL
1275 | b ->vm_call_dispatch 1412 | b ->vm_call_dispatch
1276 |. addiu NARGS8:RC, NARGS8:RC, -16 1413 |. addiu NARGS8:RC, NARGS8:RC, -16
@@ -1279,7 +1416,10 @@ static void build_subroutines(BuildCtx *ctx)
1279 | 1416 |
1280 |.macro coroutine_resume_wrap, resume 1417 |.macro coroutine_resume_wrap, resume
1281 |.if resume 1418 |.if resume
1282 |.ffunc_1 coroutine_resume 1419 |.ffunc coroutine_resume
1420 | lw CARG3, HI(BASE)
1421 | beqz NARGS8:RC, ->fff_fallback
1422 |. lw CARG1, LO(BASE)
1283 | li AT, LJ_TTHREAD 1423 | li AT, LJ_TTHREAD
1284 | bne CARG3, AT, ->fff_fallback 1424 | bne CARG3, AT, ->fff_fallback
1285 |.else 1425 |.else
@@ -1314,11 +1454,13 @@ static void build_subroutines(BuildCtx *ctx)
1314 | move CARG3, CARG2 1454 | move CARG3, CARG2
1315 | sw BASE, L->top 1455 | sw BASE, L->top
1316 |2: // Move args to coroutine. 1456 |2: // Move args to coroutine.
1317 | ldc1 f0, 0(BASE) 1457 | lw SFRETHI, HI(BASE)
1458 | lw SFRETLO, LO(BASE)
1318 | sltu AT, BASE, TMP1 1459 | sltu AT, BASE, TMP1
1319 | beqz AT, >3 1460 | beqz AT, >3
1320 |. addiu BASE, BASE, 8 1461 |. addiu BASE, BASE, 8
1321 | sdc1 f0, 0(CARG3) 1462 | sw SFRETHI, HI(CARG3)
1463 | sw SFRETLO, LO(CARG3)
1322 | b <2 1464 | b <2
1323 |. addiu CARG3, CARG3, 8 1465 |. addiu CARG3, CARG3, 8
1324 |3: 1466 |3:
@@ -1331,6 +1473,7 @@ static void build_subroutines(BuildCtx *ctx)
1331 | lw TMP3, L:RA->top 1473 | lw TMP3, L:RA->top
1332 | li_vmstate INTERP 1474 | li_vmstate INTERP
1333 | lw BASE, L->base 1475 | lw BASE, L->base
1476 | sw L, DISPATCH_GL(cur_L)(DISPATCH)
1334 | st_vmstate 1477 | st_vmstate
1335 | beqz AT, >8 1478 | beqz AT, >8
1336 |. subu RD, TMP3, TMP2 1479 |. subu RD, TMP3, TMP2
@@ -1343,10 +1486,12 @@ static void build_subroutines(BuildCtx *ctx)
1343 | sw TMP2, L:RA->top // Clear coroutine stack. 1486 | sw TMP2, L:RA->top // Clear coroutine stack.
1344 | move TMP1, BASE 1487 | move TMP1, BASE
1345 |5: // Move results from coroutine. 1488 |5: // Move results from coroutine.
1346 | ldc1 f0, 0(TMP2) 1489 | lw SFRETHI, HI(TMP2)
1490 | lw SFRETLO, LO(TMP2)
1347 | addiu TMP2, TMP2, 8 1491 | addiu TMP2, TMP2, 8
1348 | sltu AT, TMP2, TMP3 1492 | sltu AT, TMP2, TMP3
1349 | sdc1 f0, 0(TMP1) 1493 | sw SFRETHI, HI(TMP1)
1494 | sw SFRETLO, LO(TMP1)
1350 | bnez AT, <5 1495 | bnez AT, <5
1351 |. addiu TMP1, TMP1, 8 1496 |. addiu TMP1, TMP1, 8
1352 |6: 1497 |6:
@@ -1371,12 +1516,14 @@ static void build_subroutines(BuildCtx *ctx)
1371 |.if resume 1516 |.if resume
1372 | addiu TMP3, TMP3, -8 1517 | addiu TMP3, TMP3, -8
1373 | li TMP1, LJ_TFALSE 1518 | li TMP1, LJ_TFALSE
1374 | ldc1 f0, 0(TMP3) 1519 | lw SFRETHI, HI(TMP3)
1520 | lw SFRETLO, LO(TMP3)
1375 | sw TMP3, L:RA->top // Remove error from coroutine stack. 1521 | sw TMP3, L:RA->top // Remove error from coroutine stack.
1376 | li RD, (2+1)*8 1522 | li RD, (2+1)*8
1377 | sw TMP1, -8+HI(BASE) // Prepend false to results. 1523 | sw TMP1, -8+HI(BASE) // Prepend false to results.
1378 | addiu RA, BASE, -8 1524 | addiu RA, BASE, -8
1379 | sdc1 f0, 0(BASE) // Copy error message. 1525 | sw SFRETHI, HI(BASE) // Copy error message.
1526 | sw SFRETLO, LO(BASE)
1380 | b <7 1527 | b <7
1381 |. andi TMP0, PC, FRAME_TYPE 1528 |. andi TMP0, PC, FRAME_TYPE
1382 |.else 1529 |.else
@@ -1412,20 +1559,29 @@ static void build_subroutines(BuildCtx *ctx)
1412 | 1559 |
1413 |//-- Math library ------------------------------------------------------- 1560 |//-- Math library -------------------------------------------------------
1414 | 1561 |
1415 |.ffunc_n math_abs 1562 |.ffunc_1 math_abs
1416 |. abs.d FRET1, FARG1 1563 | bne SFARG1HI, TISNUM, >1
1417 |->fff_resn: 1564 |. sra TMP0, SFARG1LO, 31
1418 | lw PC, FRAME_PC(BASE) 1565 | xor TMP1, SFARG1LO, TMP0
1419 | addiu RA, BASE, -8 1566 | subu SFARG1LO, TMP1, TMP0
1420 | b ->fff_res1 1567 | bgez SFARG1LO, ->fff_restv
1421 |. sdc1 FRET1, -8(BASE) 1568 |. nop
1569 | lui SFARG1HI, 0x41e0 // 2^31 as a double.
1570 | b ->fff_restv
1571 |. li SFARG1LO, 0
1572 |1:
1573 | sltiu AT, SFARG1HI, LJ_TISNUM
1574 | beqz AT, ->fff_fallback
1575 |. sll SFARG1HI, SFARG1HI, 1
1576 | srl SFARG1HI, SFARG1HI, 1
1577 |// fallthrough
1422 | 1578 |
1423 |->fff_restv: 1579 |->fff_restv:
1424 | // CARG3/CARG1 = TValue result. 1580 | // SFARG1LO/SFARG1HI = TValue result.
1425 | lw PC, FRAME_PC(BASE) 1581 | lw PC, FRAME_PC(BASE)
1426 | sw CARG3, -8+HI(BASE) 1582 | sw SFARG1HI, -8+HI(BASE)
1427 | addiu RA, BASE, -8 1583 | addiu RA, BASE, -8
1428 | sw CARG1, -8+LO(BASE) 1584 | sw SFARG1LO, -8+LO(BASE)
1429 |->fff_res1: 1585 |->fff_res1:
1430 | // RA = results, PC = return. 1586 | // RA = results, PC = return.
1431 | li RD, (1+1)*8 1587 | li RD, (1+1)*8
@@ -1454,15 +1610,19 @@ static void build_subroutines(BuildCtx *ctx)
1454 |. sw TISNIL, -8+HI(TMP1) 1610 |. sw TISNIL, -8+HI(TMP1)
1455 | 1611 |
1456 |.macro math_extern, func 1612 |.macro math_extern, func
1457 |->ff_math_ .. func: 1613 | .ffunc math_ .. func
1458 | lw CARG3, HI(BASE) 1614 | lw SFARG1HI, HI(BASE)
1459 | beqz NARGS8:RC, ->fff_fallback 1615 | beqz NARGS8:RC, ->fff_fallback
1460 |. load_got func 1616 |. load_got func
1461 | sltiu AT, CARG3, LJ_TISNUM 1617 | sltiu AT, SFARG1HI, LJ_TISNUM
1462 | beqz AT, ->fff_fallback 1618 | beqz AT, ->fff_fallback
1463 |. nop 1619 |.if FPU
1464 | call_extern
1465 |. ldc1 FARG1, 0(BASE) 1620 |. ldc1 FARG1, 0(BASE)
1621 |.else
1622 |. lw SFARG1LO, LO(BASE)
1623 |.endif
1624 | call_extern
1625 |. nop
1466 | b ->fff_resn 1626 | b ->fff_resn
1467 |. nop 1627 |. nop
1468 |.endmacro 1628 |.endmacro
@@ -1476,10 +1636,22 @@ static void build_subroutines(BuildCtx *ctx)
1476 |. nop 1636 |. nop
1477 |.endmacro 1637 |.endmacro
1478 | 1638 |
1639 |// TODO: Return integer type if result is integer (own sf implementation).
1479 |.macro math_round, func 1640 |.macro math_round, func
1480 | .ffunc_n math_ .. func 1641 |->ff_math_ .. func:
1481 |. nop 1642 | lw SFARG1HI, HI(BASE)
1643 | beqz NARGS8:RC, ->fff_fallback
1644 |. lw SFARG1LO, LO(BASE)
1645 | beq SFARG1HI, TISNUM, ->fff_restv
1646 |. sltu AT, SFARG1HI, TISNUM
1647 | beqz AT, ->fff_fallback
1648 |.if FPU
1649 |. ldc1 FARG1, 0(BASE)
1482 | bal ->vm_ .. func 1650 | bal ->vm_ .. func
1651 |.else
1652 |. load_got func
1653 | call_extern
1654 |.endif
1483 |. nop 1655 |. nop
1484 | b ->fff_resn 1656 | b ->fff_resn
1485 |. nop 1657 |. nop
@@ -1489,15 +1661,19 @@ static void build_subroutines(BuildCtx *ctx)
1489 | math_round ceil 1661 | math_round ceil
1490 | 1662 |
1491 |.ffunc math_log 1663 |.ffunc math_log
1492 | lw CARG3, HI(BASE)
1493 | li AT, 8 1664 | li AT, 8
1494 | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument. 1665 | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument.
1495 |. load_got log 1666 |. lw SFARG1HI, HI(BASE)
1496 | sltiu AT, CARG3, LJ_TISNUM 1667 | sltiu AT, SFARG1HI, LJ_TISNUM
1497 | beqz AT, ->fff_fallback 1668 | beqz AT, ->fff_fallback
1498 |. nop 1669 |. load_got log
1670 |.if FPU
1499 | call_extern 1671 | call_extern
1500 |. ldc1 FARG1, 0(BASE) 1672 |. ldc1 FARG1, 0(BASE)
1673 |.else
1674 | call_extern
1675 |. lw SFARG1LO, LO(BASE)
1676 |.endif
1501 | b ->fff_resn 1677 | b ->fff_resn
1502 |. nop 1678 |. nop
1503 | 1679 |
@@ -1516,23 +1692,43 @@ static void build_subroutines(BuildCtx *ctx)
1516 | math_extern2 atan2 1692 | math_extern2 atan2
1517 | math_extern2 fmod 1693 | math_extern2 fmod
1518 | 1694 |
1695 |.if FPU
1519 |.ffunc_n math_sqrt 1696 |.ffunc_n math_sqrt
1520 |. sqrt.d FRET1, FARG1 1697 |. sqrt.d FRET1, FARG1
1521 | b ->fff_resn 1698 |// fallthrough to ->fff_resn
1522 |. nop 1699 |.else
1700 | math_extern sqrt
1701 |.endif
1702 |
1703 |->fff_resn:
1704 | lw PC, FRAME_PC(BASE)
1705 | addiu RA, BASE, -8
1706 |.if FPU
1707 | b ->fff_res1
1708 |. sdc1 FRET1, -8(BASE)
1709 |.else
1710 | sw SFRETHI, -8+HI(BASE)
1711 | b ->fff_res1
1712 |. sw SFRETLO, -8+LO(BASE)
1713 |.endif
1523 | 1714 |
1524 |->ff_math_deg:
1525 |.ffunc_n math_rad
1526 |. ldc1 FARG2, CFUNC:RB->upvalue[0]
1527 | b ->fff_resn
1528 |. mul.d FRET1, FARG1, FARG2
1529 | 1715 |
1530 |.ffunc_nn math_ldexp 1716 |.ffunc math_ldexp
1531 | cvt.w.d FARG2, FARG2 1717 | sltiu AT, NARGS8:RC, 16
1718 | lw SFARG1HI, HI(BASE)
1719 | bnez AT, ->fff_fallback
1720 |. lw CARG4, 8+HI(BASE)
1721 | bne CARG4, TISNUM, ->fff_fallback
1532 | load_got ldexp 1722 | load_got ldexp
1533 | mfc1 CARG3, FARG2 1723 |. sltu AT, SFARG1HI, TISNUM
1724 | beqz AT, ->fff_fallback
1725 |.if FPU
1726 |. ldc1 FARG1, 0(BASE)
1727 |.else
1728 |. lw SFARG1LO, LO(BASE)
1729 |.endif
1534 | call_extern 1730 | call_extern
1535 |. nop 1731 |. lw CARG3, 8+LO(BASE)
1536 | b ->fff_resn 1732 | b ->fff_resn
1537 |. nop 1733 |. nop
1538 | 1734 |
@@ -1543,10 +1739,17 @@ static void build_subroutines(BuildCtx *ctx)
1543 |. addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) 1739 |. addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
1544 | lw TMP1, DISPATCH_GL(tmptv)(DISPATCH) 1740 | lw TMP1, DISPATCH_GL(tmptv)(DISPATCH)
1545 | addiu RA, BASE, -8 1741 | addiu RA, BASE, -8
1742 |.if FPU
1546 | mtc1 TMP1, FARG2 1743 | mtc1 TMP1, FARG2
1547 | sdc1 FRET1, 0(RA) 1744 | sdc1 FRET1, 0(RA)
1548 | cvt.d.w FARG2, FARG2 1745 | cvt.d.w FARG2, FARG2
1549 | sdc1 FARG2, 8(RA) 1746 | sdc1 FARG2, 8(RA)
1747 |.else
1748 | sw SFRETLO, LO(RA)
1749 | sw SFRETHI, HI(RA)
1750 | sw TMP1, 8+LO(RA)
1751 | sw TISNUM, 8+HI(RA)
1752 |.endif
1550 | b ->fff_res 1753 | b ->fff_res
1551 |. li RD, (2+1)*8 1754 |. li RD, (2+1)*8
1552 | 1755 |
@@ -1556,49 +1759,101 @@ static void build_subroutines(BuildCtx *ctx)
1556 | call_extern 1759 | call_extern
1557 |. addiu CARG3, BASE, -8 1760 |. addiu CARG3, BASE, -8
1558 | addiu RA, BASE, -8 1761 | addiu RA, BASE, -8
1762 |.if FPU
1559 | sdc1 FRET1, 0(BASE) 1763 | sdc1 FRET1, 0(BASE)
1764 |.else
1765 | sw SFRETLO, LO(BASE)
1766 | sw SFRETHI, HI(BASE)
1767 |.endif
1560 | b ->fff_res 1768 | b ->fff_res
1561 |. li RD, (2+1)*8 1769 |. li RD, (2+1)*8
1562 | 1770 |
1563 |.macro math_minmax, name, ismax 1771 |.macro math_minmax, name, intins, fpins
1564 |->ff_ .. name: 1772 | .ffunc_1 name
1565 | lw CARG3, HI(BASE) 1773 | addu TMP3, BASE, NARGS8:RC
1566 | beqz NARGS8:RC, ->fff_fallback 1774 | bne SFARG1HI, TISNUM, >5
1567 |. ldc1 FRET1, 0(BASE) 1775 |. addiu TMP2, BASE, 8
1568 | sltiu AT, CARG3, LJ_TISNUM 1776 |1: // Handle integers.
1777 |. lw SFARG2HI, HI(TMP2)
1778 | beq TMP2, TMP3, ->fff_restv
1779 |. lw SFARG2LO, LO(TMP2)
1780 | bne SFARG2HI, TISNUM, >3
1781 |. slt AT, SFARG1LO, SFARG2LO
1782 | intins SFARG1LO, SFARG2LO, AT
1783 | b <1
1784 |. addiu TMP2, TMP2, 8
1785 |
1786 |3: // Convert intermediate result to number and continue with number loop.
1787 | sltiu AT, SFARG2HI, LJ_TISNUM
1569 | beqz AT, ->fff_fallback 1788 | beqz AT, ->fff_fallback
1570 |. addu TMP2, BASE, NARGS8:RC 1789 |.if FPU
1571 | addiu TMP1, BASE, 8 1790 |. mtc1 SFARG1LO, FRET1
1572 | beq TMP1, TMP2, ->fff_resn 1791 | cvt.d.w FRET1, FRET1
1573 |1: 1792 | b >7
1574 |. lw CARG3, HI(TMP1) 1793 |. ldc1 FARG1, 0(TMP2)
1575 | ldc1 FARG1, 0(TMP1) 1794 |.else
1576 | addiu TMP1, TMP1, 8 1795 |. nop
1577 | sltiu AT, CARG3, LJ_TISNUM 1796 | bal ->vm_sfi2d_1
1797 |. nop
1798 | b >7
1799 |. nop
1800 |.endif
1801 |
1802 |5:
1803 |. sltiu AT, SFARG1HI, LJ_TISNUM
1578 | beqz AT, ->fff_fallback 1804 | beqz AT, ->fff_fallback
1579 |.if ismax 1805 |.if FPU
1580 |. c.olt.d FARG1, FRET1 1806 |. ldc1 FRET1, 0(BASE)
1807 |.endif
1808 |
1809 |6: // Handle numbers.
1810 |. lw SFARG2HI, HI(TMP2)
1811 |.if FPU
1812 | beq TMP2, TMP3, ->fff_resn
1581 |.else 1813 |.else
1582 |. c.olt.d FRET1, FARG1 1814 | beq TMP2, TMP3, ->fff_restv
1583 |.endif 1815 |.endif
1584 | bne TMP1, TMP2, <1 1816 |. sltiu AT, SFARG2HI, LJ_TISNUM
1585 |. movf.d FRET1, FARG1 1817 | beqz AT, >8
1586 | b ->fff_resn 1818 |.if FPU
1819 |. ldc1 FARG1, 0(TMP2)
1820 |.else
1821 |. lw SFARG2LO, LO(TMP2)
1822 |.endif
1823 |7:
1824 |.if FPU
1825 | c.olt.d FRET1, FARG1
1826 | fpins FRET1, FARG1
1827 |.else
1828 | bal ->vm_sfcmpolt
1587 |. nop 1829 |. nop
1830 | intins SFARG1LO, SFARG2LO, CRET1
1831 | intins SFARG1HI, SFARG2HI, CRET1
1832 |.endif
1833 | b <6
1834 |. addiu TMP2, TMP2, 8
1835 |
1836 |8: // Convert integer to number and continue with number loop.
1837 | bne SFARG2HI, TISNUM, ->fff_fallback
1838 |.if FPU
1839 |. lwc1 FARG1, LO(TMP2)
1840 | b <7
1841 |. cvt.d.w FARG1, FARG1
1842 |.else
1843 |. nop
1844 | bal ->vm_sfi2d_2
1845 |. nop
1846 | b <7
1847 |. nop
1848 |.endif
1849 |
1588 |.endmacro 1850 |.endmacro
1589 | 1851 |
1590 | math_minmax math_min, 0 1852 | math_minmax math_min, movz, movf.d
1591 | math_minmax math_max, 1 1853 | math_minmax math_max, movn, movt.d
1592 | 1854 |
1593 |//-- String library ----------------------------------------------------- 1855 |//-- String library -----------------------------------------------------
1594 | 1856 |
1595 |.ffunc_1 string_len
1596 | li AT, LJ_TSTR
1597 | bne CARG3, AT, ->fff_fallback
1598 |. nop
1599 | b ->fff_resi
1600 |. lw CRET1, STR:CARG1->len
1601 |
1602 |.ffunc string_byte // Only handle the 1-arg case here. 1857 |.ffunc string_byte // Only handle the 1-arg case here.
1603 | lw CARG3, HI(BASE) 1858 | lw CARG3, HI(BASE)
1604 | lw STR:CARG1, LO(BASE) 1859 | lw STR:CARG1, LO(BASE)
@@ -1608,33 +1863,31 @@ static void build_subroutines(BuildCtx *ctx)
1608 | bnez AT, ->fff_fallback // Need exactly 1 string argument. 1863 | bnez AT, ->fff_fallback // Need exactly 1 string argument.
1609 |. nop 1864 |. nop
1610 | lw TMP0, STR:CARG1->len 1865 | lw TMP0, STR:CARG1->len
1611 | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end).
1612 | addiu RA, BASE, -8 1866 | addiu RA, BASE, -8
1867 | lw PC, FRAME_PC(BASE)
1613 | sltu RD, r0, TMP0 1868 | sltu RD, r0, TMP0
1614 | mtc1 TMP1, f0 1869 | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end).
1615 | addiu RD, RD, 1 1870 | addiu RD, RD, 1
1616 | cvt.d.w f0, f0
1617 | lw PC, FRAME_PC(BASE)
1618 | sll RD, RD, 3 // RD = ((str->len != 0)+1)*8 1871 | sll RD, RD, 3 // RD = ((str->len != 0)+1)*8
1872 | sw TISNUM, HI(RA)
1619 | b ->fff_res 1873 | b ->fff_res
1620 |. sdc1 f0, 0(RA) 1874 |. sw TMP1, LO(RA)
1621 | 1875 |
1622 |.ffunc string_char // Only handle the 1-arg case here. 1876 |.ffunc string_char // Only handle the 1-arg case here.
1623 | ffgccheck 1877 | ffgccheck
1624 |. nop 1878 |. nop
1625 | lw CARG3, HI(BASE) 1879 | lw CARG3, HI(BASE)
1626 | ldc1 FARG1, 0(BASE) 1880 | lw CARG1, LO(BASE)
1627 | li AT, 8 1881 | li TMP1, 255
1628 | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument. 1882 | xori AT, NARGS8:RC, 8 // Exactly 1 argument.
1629 |. sltiu AT, CARG3, LJ_TISNUM 1883 | xor TMP0, CARG3, TISNUM // Integer.
1630 | beqz AT, ->fff_fallback 1884 | sltu TMP1, TMP1, CARG1 // !(255 < n).
1885 | or AT, AT, TMP0
1886 | or AT, AT, TMP1
1887 | bnez AT, ->fff_fallback
1631 |. li CARG3, 1 1888 |. li CARG3, 1
1632 | cvt.w.d FARG1, FARG1
1633 | addiu CARG2, sp, ARG5_OFS 1889 | addiu CARG2, sp, ARG5_OFS
1634 | sltiu AT, TMP0, 256 1890 | sb CARG1, ARG5
1635 | mfc1 TMP0, FARG1
1636 | beqz AT, ->fff_fallback
1637 |. sw TMP0, ARG5
1638 |->fff_newstr: 1891 |->fff_newstr:
1639 | load_got lj_str_new 1892 | load_got lj_str_new
1640 | sw BASE, L->base 1893 | sw BASE, L->base
@@ -1643,35 +1896,30 @@ static void build_subroutines(BuildCtx *ctx)
1643 |. move CARG1, L 1896 |. move CARG1, L
1644 | // Returns GCstr *. 1897 | // Returns GCstr *.
1645 | lw BASE, L->base 1898 | lw BASE, L->base
1646 | move CARG1, CRET1 1899 |->fff_resstr:
1900 | move SFARG1LO, CRET1
1647 | b ->fff_restv 1901 | b ->fff_restv
1648 |. li CARG3, LJ_TSTR 1902 |. li SFARG1HI, LJ_TSTR
1649 | 1903 |
1650 |.ffunc string_sub 1904 |.ffunc string_sub
1651 | ffgccheck 1905 | ffgccheck
1652 |. nop 1906 |. nop
1653 | addiu AT, NARGS8:RC, -16 1907 | addiu AT, NARGS8:RC, -16
1654 | lw CARG3, 16+HI(BASE) 1908 | lw CARG3, 16+HI(BASE)
1655 | ldc1 f0, 16(BASE)
1656 | lw TMP0, HI(BASE) 1909 | lw TMP0, HI(BASE)
1657 | lw STR:CARG1, LO(BASE) 1910 | lw STR:CARG1, LO(BASE)
1658 | bltz AT, ->fff_fallback 1911 | bltz AT, ->fff_fallback
1659 | lw CARG2, 8+HI(BASE) 1912 |. lw CARG2, 8+HI(BASE)
1660 | ldc1 f2, 8(BASE)
1661 | beqz AT, >1 1913 | beqz AT, >1
1662 |. li CARG4, -1 1914 |. li CARG4, -1
1663 | cvt.w.d f0, f0 1915 | bne CARG3, TISNUM, ->fff_fallback
1664 | sltiu AT, CARG3, LJ_TISNUM 1916 |. lw CARG4, 16+LO(BASE)
1665 | beqz AT, ->fff_fallback
1666 |. mfc1 CARG4, f0
1667 |1: 1917 |1:
1668 | sltiu AT, CARG2, LJ_TISNUM 1918 | bne CARG2, TISNUM, ->fff_fallback
1669 | beqz AT, ->fff_fallback
1670 |. li AT, LJ_TSTR 1919 |. li AT, LJ_TSTR
1671 | cvt.w.d f2, f2
1672 | bne TMP0, AT, ->fff_fallback 1920 | bne TMP0, AT, ->fff_fallback
1673 |. lw CARG2, STR:CARG1->len 1921 |. lw CARG3, 8+LO(BASE)
1674 | mfc1 CARG3, f2 1922 | lw CARG2, STR:CARG1->len
1675 | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end 1923 | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end
1676 | slt AT, CARG4, r0 1924 | slt AT, CARG4, r0
1677 | addiu TMP0, CARG2, 1 1925 | addiu TMP0, CARG2, 1
@@ -1693,139 +1941,130 @@ static void build_subroutines(BuildCtx *ctx)
1693 | bgez CARG3, ->fff_newstr 1941 | bgez CARG3, ->fff_newstr
1694 |. addiu CARG3, CARG3, 1 // len++ 1942 |. addiu CARG3, CARG3, 1 // len++
1695 |->fff_emptystr: // Return empty string. 1943 |->fff_emptystr: // Return empty string.
1696 | addiu STR:CARG1, DISPATCH, DISPATCH_GL(strempty) 1944 | addiu STR:SFARG1LO, DISPATCH, DISPATCH_GL(strempty)
1697 | b ->fff_restv 1945 | b ->fff_restv
1698 |. li CARG3, LJ_TSTR 1946 |. li SFARG1HI, LJ_TSTR
1699 |
1700 |.ffunc string_rep // Only handle the 1-char case inline.
1701 | ffgccheck
1702 |. nop
1703 | lw TMP0, HI(BASE)
1704 | addiu AT, NARGS8:RC, -16 // Exactly 2 arguments.
1705 | lw CARG4, 8+HI(BASE)
1706 | lw STR:CARG1, LO(BASE)
1707 | addiu TMP0, TMP0, -LJ_TSTR
1708 | ldc1 f0, 8(BASE)
1709 | or AT, AT, TMP0
1710 | bnez AT, ->fff_fallback
1711 |. sltiu AT, CARG4, LJ_TISNUM
1712 | cvt.w.d f0, f0
1713 | beqz AT, ->fff_fallback
1714 |. lw TMP0, STR:CARG1->len
1715 | mfc1 CARG3, f0
1716 | lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
1717 | li AT, 1
1718 | blez CARG3, ->fff_emptystr // Count <= 0?
1719 |. sltu AT, AT, TMP0
1720 | beqz TMP0, ->fff_emptystr // Zero length string?
1721 |. sltu TMP0, TMP1, CARG3
1722 | or AT, AT, TMP0
1723 | lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
1724 | bnez AT, ->fff_fallback // Fallback for > 1-char strings.
1725 |. lbu TMP0, STR:CARG1[1]
1726 | addu TMP2, CARG2, CARG3
1727 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
1728 | addiu TMP2, TMP2, -1
1729 | sltu AT, CARG2, TMP2
1730 | bnez AT, <1
1731 |. sb TMP0, 0(TMP2)
1732 | b ->fff_newstr
1733 |. nop
1734 |
1735 |.ffunc string_reverse
1736 | ffgccheck
1737 |. nop
1738 | lw CARG3, HI(BASE)
1739 | lw STR:CARG1, LO(BASE)
1740 | beqz NARGS8:RC, ->fff_fallback
1741 |. li AT, LJ_TSTR
1742 | bne CARG3, AT, ->fff_fallback
1743 |. lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
1744 | lw CARG3, STR:CARG1->len
1745 | addiu CARG1, STR:CARG1, #STR
1746 | lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
1747 | sltu AT, TMP1, CARG3
1748 | bnez AT, ->fff_fallback
1749 |. addu TMP3, CARG1, CARG3
1750 | addu CARG4, CARG2, CARG3
1751 |1: // Reverse string copy.
1752 | lbu TMP1, 0(CARG1)
1753 | sltu AT, CARG1, TMP3
1754 | beqz AT, ->fff_newstr
1755 |. addiu CARG1, CARG1, 1
1756 | addiu CARG4, CARG4, -1
1757 | b <1
1758 | sb TMP1, 0(CARG4)
1759 | 1947 |
1760 |.macro ffstring_case, name, lo 1948 |.macro ffstring_op, name
1761 | .ffunc name 1949 | .ffunc string_ .. name
1762 | ffgccheck 1950 | ffgccheck
1763 |. nop 1951 |. nop
1764 | lw CARG3, HI(BASE) 1952 | lw CARG3, HI(BASE)
1765 | lw STR:CARG1, LO(BASE) 1953 | lw STR:CARG2, LO(BASE)
1766 | beqz NARGS8:RC, ->fff_fallback 1954 | beqz NARGS8:RC, ->fff_fallback
1767 |. li AT, LJ_TSTR 1955 |. li AT, LJ_TSTR
1768 | bne CARG3, AT, ->fff_fallback 1956 | bne CARG3, AT, ->fff_fallback
1769 |. lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) 1957 |. addiu SBUF:CARG1, DISPATCH, DISPATCH_GL(tmpbuf)
1770 | lw CARG3, STR:CARG1->len 1958 | load_got lj_buf_putstr_ .. name
1771 | addiu CARG1, STR:CARG1, #STR 1959 | lw TMP0, SBUF:CARG1->b
1772 | lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) 1960 | sw L, SBUF:CARG1->L
1773 | sltu AT, TMP1, CARG3 1961 | sw BASE, L->base
1774 | bnez AT, ->fff_fallback 1962 | sw TMP0, SBUF:CARG1->p
1775 |. addu TMP3, CARG1, CARG3 1963 | call_intern extern lj_buf_putstr_ .. name
1776 | move CARG4, CARG2 1964 |. sw PC, SAVE_PC
1777 |1: // ASCII case conversion. 1965 | load_got lj_buf_tostr
1778 | lbu TMP1, 0(CARG1) 1966 | call_intern lj_buf_tostr
1779 | sltu AT, CARG1, TMP3 1967 |. move SBUF:CARG1, SBUF:CRET1
1780 | beqz AT, ->fff_newstr 1968 | b ->fff_resstr
1781 |. addiu TMP0, TMP1, -lo 1969 |. lw BASE, L->base
1782 | xori TMP2, TMP1, 0x20
1783 | sltiu AT, TMP0, 26
1784 | movn TMP1, TMP2, AT
1785 | addiu CARG1, CARG1, 1
1786 | sb TMP1, 0(CARG4)
1787 | b <1
1788 |. addiu CARG4, CARG4, 1
1789 |.endmacro 1970 |.endmacro
1790 | 1971 |
1791 |ffstring_case string_lower, 65 1972 |ffstring_op reverse
1792 |ffstring_case string_upper, 97 1973 |ffstring_op lower
1974 |ffstring_op upper
1793 | 1975 |
1794 |//-- Table library ------------------------------------------------------ 1976 |//-- Bit library --------------------------------------------------------
1795 | 1977 |
1796 |.ffunc_1 table_getn 1978 |->vm_tobit_fb:
1797 | li AT, LJ_TTAB 1979 | beqz TMP1, ->fff_fallback
1798 | bne CARG3, AT, ->fff_fallback 1980 |.if FPU
1799 |. load_got lj_tab_len 1981 |. ldc1 FARG1, 0(BASE)
1800 | call_intern lj_tab_len // (GCtab *t) 1982 | add.d FARG1, FARG1, TOBIT
1801 |. nop 1983 | jr ra
1802 | // Returns uint32_t (but less than 2^31). 1984 |. mfc1 CRET1, FARG1
1803 | b ->fff_resi 1985 |.else
1986 |// FP number to bit conversion for soft-float.
1987 |->vm_tobit:
1988 | sll TMP0, SFARG1HI, 1
1989 | lui AT, 0x0020
1990 | addu TMP0, TMP0, AT
1991 | slt AT, TMP0, r0
1992 | movz SFARG1LO, r0, AT
1993 | beqz AT, >2
1994 |. li TMP1, 0x3e0
1995 | not TMP1, TMP1
1996 | sra TMP0, TMP0, 21
1997 | subu TMP0, TMP1, TMP0
1998 | slt AT, TMP0, r0
1999 | bnez AT, >1
2000 |. sll TMP1, SFARG1HI, 11
2001 | lui AT, 0x8000
2002 | or TMP1, TMP1, AT
2003 | srl AT, SFARG1LO, 21
2004 | or TMP1, TMP1, AT
2005 | slt AT, SFARG1HI, r0
2006 | beqz AT, >2
2007 |. srlv SFARG1LO, TMP1, TMP0
2008 | subu SFARG1LO, r0, SFARG1LO
2009 |2:
2010 | jr ra
2011 |. move CRET1, SFARG1LO
2012 |1:
2013 | addiu TMP0, TMP0, 21
2014 | srlv TMP1, SFARG1LO, TMP0
2015 | li AT, 20
2016 | subu TMP0, AT, TMP0
2017 | sll SFARG1LO, SFARG1HI, 12
2018 | sllv AT, SFARG1LO, TMP0
2019 | or SFARG1LO, TMP1, AT
2020 | slt AT, SFARG1HI, r0
2021 | beqz AT, <2
1804 |. nop 2022 |. nop
1805 | 2023 | jr ra
1806 |//-- Bit library -------------------------------------------------------- 2024 |. subu CRET1, r0, SFARG1LO
2025 |.endif
1807 | 2026 |
1808 |.macro .ffunc_bit, name 2027 |.macro .ffunc_bit, name
1809 | .ffunc_n bit_..name 2028 | .ffunc_1 bit_..name
1810 |. add.d FARG1, FARG1, TOBIT 2029 | beq SFARG1HI, TISNUM, >6
1811 | mfc1 CRET1, FARG1 2030 |. move CRET1, SFARG1LO
2031 | bal ->vm_tobit_fb
2032 |. sltu TMP1, SFARG1HI, TISNUM
2033 |6:
1812 |.endmacro 2034 |.endmacro
1813 | 2035 |
1814 |.macro .ffunc_bit_op, name, ins 2036 |.macro .ffunc_bit_op, name, ins
1815 | .ffunc_bit name 2037 | .ffunc_bit name
1816 | addiu TMP1, BASE, 8 2038 | addiu TMP2, BASE, 8
1817 | addu TMP2, BASE, NARGS8:RC 2039 | addu TMP3, BASE, NARGS8:RC
1818 |1: 2040 |1:
1819 | lw CARG4, HI(TMP1) 2041 | lw SFARG1HI, HI(TMP2)
1820 | beq TMP1, TMP2, ->fff_resi 2042 | beq TMP2, TMP3, ->fff_resi
1821 |. ldc1 FARG1, 0(TMP1) 2043 |. lw SFARG1LO, LO(TMP2)
1822 | sltiu AT, CARG4, LJ_TISNUM 2044 |.if FPU
1823 | beqz AT, ->fff_fallback 2045 | bne SFARG1HI, TISNUM, >2
1824 | add.d FARG1, FARG1, TOBIT 2046 |. addiu TMP2, TMP2, 8
1825 | mfc1 CARG2, FARG1
1826 | ins CRET1, CRET1, CARG2
1827 | b <1 2047 | b <1
1828 |. addiu TMP1, TMP1, 8 2048 |. ins CRET1, CRET1, SFARG1LO
2049 |2:
2050 | ldc1 FARG1, -8(TMP2)
2051 | sltu TMP1, SFARG1HI, TISNUM
2052 | beqz TMP1, ->fff_fallback
2053 |. add.d FARG1, FARG1, TOBIT
2054 | mfc1 SFARG1LO, FARG1
2055 | b <1
2056 |. ins CRET1, CRET1, SFARG1LO
2057 |.else
2058 | beq SFARG1HI, TISNUM, >2
2059 |. move CRET2, CRET1
2060 | bal ->vm_tobit_fb
2061 |. sltu TMP1, SFARG1HI, TISNUM
2062 | move SFARG1LO, CRET2
2063 |2:
2064 | ins CRET1, CRET1, SFARG1LO
2065 | b <1
2066 |. addiu TMP2, TMP2, 8
2067 |.endif
1829 |.endmacro 2068 |.endmacro
1830 | 2069 |
1831 |.ffunc_bit_op band, and 2070 |.ffunc_bit_op band, and
@@ -1849,24 +2088,28 @@ static void build_subroutines(BuildCtx *ctx)
1849 |. not CRET1, CRET1 2088 |. not CRET1, CRET1
1850 | 2089 |
1851 |.macro .ffunc_bit_sh, name, ins, shmod 2090 |.macro .ffunc_bit_sh, name, ins, shmod
1852 | .ffunc_nn bit_..name 2091 | .ffunc_2 bit_..name
1853 |. add.d FARG1, FARG1, TOBIT 2092 | beq SFARG1HI, TISNUM, >1
1854 | add.d FARG2, FARG2, TOBIT 2093 |. nop
1855 | mfc1 CARG1, FARG1 2094 | bal ->vm_tobit_fb
1856 | mfc1 CARG2, FARG2 2095 |. sltu TMP1, SFARG1HI, TISNUM
2096 | move SFARG1LO, CRET1
2097 |1:
2098 | bne SFARG2HI, TISNUM, ->fff_fallback
2099 |. nop
1857 |.if shmod == 1 2100 |.if shmod == 1
1858 | li AT, 32 2101 | li AT, 32
1859 | subu TMP0, AT, CARG2 2102 | subu TMP0, AT, SFARG2LO
1860 | sllv CARG2, CARG1, CARG2 2103 | sllv SFARG2LO, SFARG1LO, SFARG2LO
1861 | srlv CARG1, CARG1, TMP0 2104 | srlv SFARG1LO, SFARG1LO, TMP0
1862 |.elif shmod == 2 2105 |.elif shmod == 2
1863 | li AT, 32 2106 | li AT, 32
1864 | subu TMP0, AT, CARG2 2107 | subu TMP0, AT, SFARG2LO
1865 | srlv CARG2, CARG1, CARG2 2108 | srlv SFARG2LO, SFARG1LO, SFARG2LO
1866 | sllv CARG1, CARG1, TMP0 2109 | sllv SFARG1LO, SFARG1LO, TMP0
1867 |.endif 2110 |.endif
1868 | b ->fff_resi 2111 | b ->fff_resi
1869 |. ins CRET1, CARG1, CARG2 2112 |. ins CRET1, SFARG1LO, SFARG2LO
1870 |.endmacro 2113 |.endmacro
1871 | 2114 |
1872 |.ffunc_bit_sh lshift, sllv, 0 2115 |.ffunc_bit_sh lshift, sllv, 0
@@ -1878,9 +2121,11 @@ static void build_subroutines(BuildCtx *ctx)
1878 | 2121 |
1879 |.ffunc_bit tobit 2122 |.ffunc_bit tobit
1880 |->fff_resi: 2123 |->fff_resi:
1881 | mtc1 CRET1, FRET1 2124 | lw PC, FRAME_PC(BASE)
1882 | b ->fff_resn 2125 | addiu RA, BASE, -8
1883 |. cvt.d.w FRET1, FRET1 2126 | sw TISNUM, -8+HI(BASE)
2127 | b ->fff_res1
2128 |. sw CRET1, -8+LO(BASE)
1884 | 2129 |
1885 |//----------------------------------------------------------------------- 2130 |//-----------------------------------------------------------------------
1886 | 2131 |
@@ -2067,19 +2312,96 @@ static void build_subroutines(BuildCtx *ctx)
2067 | jr CRET1 2312 | jr CRET1
2068 |. lw INS, -4(PC) 2313 |. lw INS, -4(PC)
2069 | 2314 |
2315 |->cont_stitch: // Trace stitching.
2316 |.if JIT
2317 | // RA = resultptr, RB = meta base
2318 | lw INS, -4(PC)
2319 | lw TMP2, -24+LO(RB) // Save previous trace.
2320 | decode_RA8a RC, INS
2321 | addiu AT, MULTRES, -8
2322 | decode_RA8b RC
2323 | beqz AT, >2
2324 |. addu RC, BASE, RC // Call base.
2325 |1: // Move results down.
2326 | lw SFRETHI, HI(RA)
2327 | lw SFRETLO, LO(RA)
2328 | addiu AT, AT, -8
2329 | addiu RA, RA, 8
2330 | sw SFRETHI, HI(RC)
2331 | sw SFRETLO, LO(RC)
2332 | bnez AT, <1
2333 |. addiu RC, RC, 8
2334 |2:
2335 | decode_RA8a RA, INS
2336 | decode_RB8a RB, INS
2337 | decode_RA8b RA
2338 | decode_RB8b RB
2339 | addu RA, RA, RB
2340 | addu RA, BASE, RA
2341 |3:
2342 | sltu AT, RC, RA
2343 | bnez AT, >9 // More results wanted?
2344 |. nop
2345 |
2346 | lhu TMP3, TRACE:TMP2->traceno
2347 | lhu RD, TRACE:TMP2->link
2348 | beq RD, TMP3, ->cont_nop // Blacklisted.
2349 |. load_got lj_dispatch_stitch
2350 | bnez RD, =>BC_JLOOP // Jump to stitched trace.
2351 |. sll RD, RD, 3
2352 |
2353 | // Stitch a new trace to the previous trace.
2354 | sw TMP3, DISPATCH_J(exitno)(DISPATCH)
2355 | sw L, DISPATCH_J(L)(DISPATCH)
2356 | sw BASE, L->base
2357 | addiu CARG1, DISPATCH, GG_DISP2J
2358 | call_intern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
2359 |. move CARG2, PC
2360 | b ->cont_nop
2361 |. lw BASE, L->base
2362 |
2363 |9:
2364 | sw TISNIL, HI(RC)
2365 | b <3
2366 |. addiu RC, RC, 8
2367 |.endif
2368 |
2369 |->vm_profhook: // Dispatch target for profiler hook.
2370#if LJ_HASPROFILE
2371 | load_got lj_dispatch_profile
2372 | sw MULTRES, SAVE_MULTRES
2373 | move CARG2, PC
2374 | sw BASE, L->base
2375 | call_intern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
2376 |. move CARG1, L
2377 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2378 | addiu PC, PC, -4
2379 | b ->cont_nop
2380 |. lw BASE, L->base
2381#endif
2382 |
2070 |//----------------------------------------------------------------------- 2383 |//-----------------------------------------------------------------------
2071 |//-- Trace exit handler ------------------------------------------------- 2384 |//-- Trace exit handler -------------------------------------------------
2072 |//----------------------------------------------------------------------- 2385 |//-----------------------------------------------------------------------
2073 | 2386 |
2074 |.macro savex_, a, b 2387 |.macro savex_, a, b
2388 |.if FPU
2075 | sdc1 f..a, 16+a*8(sp) 2389 | sdc1 f..a, 16+a*8(sp)
2076 | sw r..a, 16+32*8+a*4(sp) 2390 | sw r..a, 16+32*8+a*4(sp)
2077 | sw r..b, 16+32*8+b*4(sp) 2391 | sw r..b, 16+32*8+b*4(sp)
2392 |.else
2393 | sw r..a, 16+a*4(sp)
2394 | sw r..b, 16+b*4(sp)
2395 |.endif
2078 |.endmacro 2396 |.endmacro
2079 | 2397 |
2080 |->vm_exit_handler: 2398 |->vm_exit_handler:
2081 |.if JIT 2399 |.if JIT
2400 |.if FPU
2082 | addiu sp, sp, -(16+32*8+32*4) 2401 | addiu sp, sp, -(16+32*8+32*4)
2402 |.else
2403 | addiu sp, sp, -(16+32*4)
2404 |.endif
2083 | savex_ 0, 1 2405 | savex_ 0, 1
2084 | savex_ 2, 3 2406 | savex_ 2, 3
2085 | savex_ 4, 5 2407 | savex_ 4, 5
@@ -2094,25 +2416,34 @@ static void build_subroutines(BuildCtx *ctx)
2094 | savex_ 22, 23 2416 | savex_ 22, 23
2095 | savex_ 24, 25 2417 | savex_ 24, 25
2096 | savex_ 26, 27 2418 | savex_ 26, 27
2419 |.if FPU
2097 | sdc1 f28, 16+28*8(sp) 2420 | sdc1 f28, 16+28*8(sp)
2098 | sw r28, 16+32*8+28*4(sp)
2099 | sdc1 f30, 16+30*8(sp) 2421 | sdc1 f30, 16+30*8(sp)
2422 | sw r28, 16+32*8+28*4(sp)
2100 | sw r30, 16+32*8+30*4(sp) 2423 | sw r30, 16+32*8+30*4(sp)
2101 | sw r0, 16+32*8+31*4(sp) // Clear RID_TMP. 2424 | sw r0, 16+32*8+31*4(sp) // Clear RID_TMP.
2425 | addiu TMP2, sp, 16+32*8+32*4 // Recompute original value of sp.
2426 | sw TMP2, 16+32*8+29*4(sp) // Store sp in RID_SP
2427 |.else
2428 | sw r28, 16+28*4(sp)
2429 | sw r30, 16+30*4(sp)
2430 | sw r0, 16+31*4(sp) // Clear RID_TMP.
2431 | addiu TMP2, sp, 16+32*4 // Recompute original value of sp.
2432 | sw TMP2, 16+29*4(sp) // Store sp in RID_SP
2433 |.endif
2102 | li_vmstate EXIT 2434 | li_vmstate EXIT
2103 | addiu TMP2, sp, 16+32*8+32*4 // Recompute original value of sp.
2104 | addiu DISPATCH, JGL, -GG_DISP2G-32768 2435 | addiu DISPATCH, JGL, -GG_DISP2G-32768
2105 | lw TMP1, 0(TMP2) // Load exit number. 2436 | lw TMP1, 0(TMP2) // Load exit number.
2106 | st_vmstate 2437 | st_vmstate
2107 | sw TMP2, 16+32*8+29*4(sp) // Store sp in RID_SP. 2438 | lw L, DISPATCH_GL(cur_L)(DISPATCH)
2108 | lw L, DISPATCH_GL(jit_L)(DISPATCH) 2439 | lw BASE, DISPATCH_GL(jit_base)(DISPATCH)
2109 | lw BASE, DISPATCH_GL(jit_base)(DISPATCH)
2110 | load_got lj_trace_exit 2440 | load_got lj_trace_exit
2111 | sw L, DISPATCH_J(L)(DISPATCH) 2441 | sw L, DISPATCH_J(L)(DISPATCH)
2112 | sw ra, DISPATCH_J(parent)(DISPATCH) // Store trace number. 2442 | sw ra, DISPATCH_J(parent)(DISPATCH) // Store trace number.
2443 | sw BASE, L->base
2113 | sw TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number. 2444 | sw TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number.
2114 | addiu CARG1, DISPATCH, GG_DISP2J 2445 | addiu CARG1, DISPATCH, GG_DISP2J
2115 | sw BASE, L->base 2446 | sw r0, DISPATCH_GL(jit_base)(DISPATCH)
2116 | call_intern lj_trace_exit // (jit_State *J, ExitState *ex) 2447 | call_intern lj_trace_exit // (jit_State *J, ExitState *ex)
2117 |. addiu CARG2, sp, 16 2448 |. addiu CARG2, sp, 16
2118 | // Returns MULTRES (unscaled) or negated error code. 2449 | // Returns MULTRES (unscaled) or negated error code.
@@ -2128,19 +2459,21 @@ static void build_subroutines(BuildCtx *ctx)
2128 |.if JIT 2459 |.if JIT
2129 | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set. 2460 | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set.
2130 | lw L, SAVE_L 2461 | lw L, SAVE_L
2131 | addiu DISPATCH, JGL, -GG_DISP2G-32768 2462 | addiu DISPATCH, JGL, -GG_DISP2G-32768
2463 | sw BASE, L->base
2132 |1: 2464 |1:
2133 | bltz CRET1, >3 // Check for error from exit. 2465 | bltz CRET1, >9 // Check for error from exit.
2134 |. lw LFUNC:TMP1, FRAME_FUNC(BASE) 2466 |. lw LFUNC:RB, FRAME_FUNC(BASE)
2135 | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 2467 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2136 | sll MULTRES, CRET1, 3 2468 | sll MULTRES, CRET1, 3
2137 | li TISNIL, LJ_TNIL 2469 | li TISNIL, LJ_TNIL
2470 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
2138 | sw MULTRES, SAVE_MULTRES 2471 | sw MULTRES, SAVE_MULTRES
2139 | mtc1 TMP3, TOBIT 2472 | .FPU mtc1 TMP3, TOBIT
2140 | lw TMP1, LFUNC:TMP1->pc 2473 | lw TMP1, LFUNC:RB->pc
2141 | sw r0, DISPATCH_GL(jit_L)(DISPATCH) 2474 | sw r0, DISPATCH_GL(jit_base)(DISPATCH)
2142 | lw KBASE, PC2PROTO(k)(TMP1) 2475 | lw KBASE, PC2PROTO(k)(TMP1)
2143 | cvt.d.s TOBIT, TOBIT 2476 | .FPU cvt.d.s TOBIT, TOBIT
2144 | // Modified copy of ins_next which handles function header dispatch, too. 2477 | // Modified copy of ins_next which handles function header dispatch, too.
2145 | lw INS, 0(PC) 2478 | lw INS, 0(PC)
2146 | addiu PC, PC, 4 2479 | addiu PC, PC, 4
@@ -2148,7 +2481,7 @@ static void build_subroutines(BuildCtx *ctx)
2148 | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH) 2481 | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH)
2149 | decode_OP4a TMP1, INS 2482 | decode_OP4a TMP1, INS
2150 | decode_OP4b TMP1 2483 | decode_OP4b TMP1
2151 | sltiu TMP2, TMP1, BC_FUNCF*4 // Function header? 2484 | sltiu TMP2, TMP1, BC_FUNCF*4
2152 | addu TMP0, DISPATCH, TMP1 2485 | addu TMP0, DISPATCH, TMP1
2153 | decode_RD8a RD, INS 2486 | decode_RD8a RD, INS
2154 | lw AT, 0(TMP0) 2487 | lw AT, 0(TMP0)
@@ -2158,11 +2491,27 @@ static void build_subroutines(BuildCtx *ctx)
2158 | jr AT 2491 | jr AT
2159 |. decode_RD8b RD 2492 |. decode_RD8b RD
2160 |2: 2493 |2:
2494 | sltiu TMP2, TMP1, (BC_FUNCC+2)*4 // Fast function?
2495 | bnez TMP2, >3
2496 |. lw TMP1, FRAME_PC(BASE)
2497 | // Check frame below fast function.
2498 | andi TMP0, TMP1, FRAME_TYPE
2499 | bnez TMP0, >3 // Trace stitching continuation?
2500 |. nop
2501 | // Otherwise set KBASE for Lua function below fast function.
2502 | lw TMP2, -4(TMP1)
2503 | decode_RA8a TMP0, TMP2
2504 | decode_RA8b TMP0
2505 | subu TMP1, BASE, TMP0
2506 | lw LFUNC:TMP2, -8+FRAME_FUNC(TMP1)
2507 | lw TMP1, LFUNC:TMP2->pc
2508 | lw KBASE, PC2PROTO(k)(TMP1)
2509 |3:
2161 | addiu RC, MULTRES, -8 2510 | addiu RC, MULTRES, -8
2162 | jr AT 2511 | jr AT
2163 |. addu RA, RA, BASE 2512 |. addu RA, RA, BASE
2164 | 2513 |
2165 |3: // Rethrow error from the right C frame. 2514 |9: // Rethrow error from the right C frame.
2166 | load_got lj_err_throw 2515 | load_got lj_err_throw
2167 | negu CARG2, CRET1 2516 | negu CARG2, CRET1
2168 | call_intern lj_err_throw // (lua_State *L, int errcode) 2517 | call_intern lj_err_throw // (lua_State *L, int errcode)
@@ -2173,8 +2522,9 @@ static void build_subroutines(BuildCtx *ctx)
2173 |//-- Math helper functions ---------------------------------------------- 2522 |//-- Math helper functions ----------------------------------------------
2174 |//----------------------------------------------------------------------- 2523 |//-----------------------------------------------------------------------
2175 | 2524 |
2525 |// Hard-float round to integer.
2176 |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1. 2526 |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1.
2177 |.macro vm_round, func 2527 |.macro vm_round_hf, func
2178 | lui TMP0, 0x4330 // Hiword of 2^52 (double). 2528 | lui TMP0, 0x4330 // Hiword of 2^52 (double).
2179 | mtc1 r0, f4 2529 | mtc1 r0, f4
2180 | mtc1 TMP0, f5 2530 | mtc1 TMP0, f5
@@ -2216,6 +2566,12 @@ static void build_subroutines(BuildCtx *ctx)
2216 |. mov.d FRET1, FARG1 2566 |. mov.d FRET1, FARG1
2217 |.endmacro 2567 |.endmacro
2218 | 2568 |
2569 |.macro vm_round, func
2570 |.if FPU
2571 | vm_round_hf, func
2572 |.endif
2573 |.endmacro
2574 |
2219 |->vm_floor: 2575 |->vm_floor:
2220 | vm_round floor 2576 | vm_round floor
2221 |->vm_ceil: 2577 |->vm_ceil:
@@ -2225,6 +2581,178 @@ static void build_subroutines(BuildCtx *ctx)
2225 | vm_round trunc 2581 | vm_round trunc
2226 |.endif 2582 |.endif
2227 | 2583 |
2584 |// Soft-float integer to number conversion.
2585 |.macro sfi2d, AHI, ALO
2586 |.if not FPU
2587 | beqz ALO, >9 // Handle zero first.
2588 |. sra TMP0, ALO, 31
2589 | xor TMP1, ALO, TMP0
2590 | subu TMP1, TMP1, TMP0 // Absolute value in TMP1.
2591 | clz AHI, TMP1
2592 | andi TMP0, TMP0, 0x800 // Mask sign bit.
2593 | li AT, 0x3ff+31-1
2594 | sllv TMP1, TMP1, AHI // Align mantissa left with leading 1.
2595 | subu AHI, AT, AHI // Exponent - 1 in AHI.
2596 | sll ALO, TMP1, 21
2597 | or AHI, AHI, TMP0 // Sign | Exponent.
2598 | srl TMP1, TMP1, 11
2599 | sll AHI, AHI, 20 // Align left.
2600 | jr ra
2601 |. addu AHI, AHI, TMP1 // Add mantissa, increment exponent.
2602 |9:
2603 | jr ra
2604 |. li AHI, 0
2605 |.endif
2606 |.endmacro
2607 |
2608 |// Input SFARG1LO. Output: SFARG1*. Temporaries: AT, TMP0, TMP1.
2609 |->vm_sfi2d_1:
2610 | sfi2d SFARG1HI, SFARG1LO
2611 |
2612 |// Input SFARG2LO. Output: SFARG2*. Temporaries: AT, TMP0, TMP1.
2613 |->vm_sfi2d_2:
2614 | sfi2d SFARG2HI, SFARG2LO
2615 |
2616 |// Soft-float comparison. Equivalent to c.eq.d.
2617 |// Input: SFARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1.
2618 |->vm_sfcmpeq:
2619 |.if not FPU
2620 | sll AT, SFARG1HI, 1
2621 | sll TMP0, SFARG2HI, 1
2622 | or CRET1, SFARG1LO, SFARG2LO
2623 | or TMP1, AT, TMP0
2624 | or TMP1, TMP1, CRET1
2625 | beqz TMP1, >8 // Both args +-0: return 1.
2626 |. sltu CRET1, r0, SFARG1LO
2627 | lui TMP1, 0xffe0
2628 | addu AT, AT, CRET1
2629 | sltu CRET1, r0, SFARG2LO
2630 | sltu AT, TMP1, AT
2631 | addu TMP0, TMP0, CRET1
2632 | sltu TMP0, TMP1, TMP0
2633 | or TMP1, AT, TMP0
2634 | bnez TMP1, >9 // Either arg is NaN: return 0;
2635 |. xor TMP0, SFARG1HI, SFARG2HI
2636 | xor TMP1, SFARG1LO, SFARG2LO
2637 | or AT, TMP0, TMP1
2638 | jr ra
2639 |. sltiu CRET1, AT, 1 // Same values: return 1.
2640 |8:
2641 | jr ra
2642 |. li CRET1, 1
2643 |9:
2644 | jr ra
2645 |. li CRET1, 0
2646 |.endif
2647 |
2648 |// Soft-float comparison. Equivalent to c.ult.d and c.olt.d.
2649 |// Input: SFARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1, CRET2.
2650 |->vm_sfcmpult:
2651 |.if not FPU
2652 | b >1
2653 |. li CRET2, 1
2654 |.endif
2655 |
2656 |->vm_sfcmpolt:
2657 |.if not FPU
2658 | li CRET2, 0
2659 |1:
2660 | sll AT, SFARG1HI, 1
2661 | sll TMP0, SFARG2HI, 1
2662 | or CRET1, SFARG1LO, SFARG2LO
2663 | or TMP1, AT, TMP0
2664 | or TMP1, TMP1, CRET1
2665 | beqz TMP1, >8 // Both args +-0: return 0.
2666 |. sltu CRET1, r0, SFARG1LO
2667 | lui TMP1, 0xffe0
2668 | addu AT, AT, CRET1
2669 | sltu CRET1, r0, SFARG2LO
2670 | sltu AT, TMP1, AT
2671 | addu TMP0, TMP0, CRET1
2672 | sltu TMP0, TMP1, TMP0
2673 | or TMP1, AT, TMP0
2674 | bnez TMP1, >9 // Either arg is NaN: return 0 or 1;
2675 |. and AT, SFARG1HI, SFARG2HI
2676 | bltz AT, >5 // Both args negative?
2677 |. nop
2678 | beq SFARG1HI, SFARG2HI, >8
2679 |. sltu CRET1, SFARG1LO, SFARG2LO
2680 | jr ra
2681 |. slt CRET1, SFARG1HI, SFARG2HI
2682 |5: // Swap conditions if both operands are negative.
2683 | beq SFARG1HI, SFARG2HI, >8
2684 |. sltu CRET1, SFARG2LO, SFARG1LO
2685 | jr ra
2686 |. slt CRET1, SFARG2HI, SFARG1HI
2687 |8:
2688 | jr ra
2689 |. nop
2690 |9:
2691 | jr ra
2692 |. move CRET1, CRET2
2693 |.endif
2694 |
2695 |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a.
2696 |// Input: SFARG*, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1.
2697 |->vm_sfcmpolex:
2698 |.if not FPU
2699 | sll AT, SFARG1HI, 1
2700 | sll TMP0, SFARG2HI, 1
2701 | or CRET1, SFARG1LO, SFARG2LO
2702 | or TMP1, AT, TMP0
2703 | or TMP1, TMP1, CRET1
2704 | beqz TMP1, >8 // Both args +-0: return 1.
2705 |. sltu CRET1, r0, SFARG1LO
2706 | lui TMP1, 0xffe0
2707 | addu AT, AT, CRET1
2708 | sltu CRET1, r0, SFARG2LO
2709 | sltu AT, TMP1, AT
2710 | addu TMP0, TMP0, CRET1
2711 | sltu TMP0, TMP1, TMP0
2712 | or TMP1, AT, TMP0
2713 | bnez TMP1, >9 // Either arg is NaN: return 0;
2714 |. and AT, SFARG1HI, SFARG2HI
2715 | xor AT, AT, TMP3
2716 | bltz AT, >5 // Both args negative?
2717 |. nop
2718 | beq SFARG1HI, SFARG2HI, >6
2719 |. sltu CRET1, SFARG2LO, SFARG1LO
2720 | jr ra
2721 |. slt CRET1, SFARG2HI, SFARG1HI
2722 |5: // Swap conditions if both operands are negative.
2723 | beq SFARG1HI, SFARG2HI, >6
2724 |. sltu CRET1, SFARG1LO, SFARG2LO
2725 | slt CRET1, SFARG1HI, SFARG2HI
2726 |6:
2727 | jr ra
2728 |. nop
2729 |8:
2730 | jr ra
2731 |. li CRET1, 1
2732 |9:
2733 | jr ra
2734 |. li CRET1, 0
2735 |.endif
2736 |
2737 |.macro sfmin_max, name, intins
2738 |->vm_sf .. name:
2739 |.if JIT and not FPU
2740 | move TMP2, ra
2741 | bal ->vm_sfcmpolt
2742 |. nop
2743 | move TMP0, CRET1
2744 | move SFRETHI, SFARG1HI
2745 | move SFRETLO, SFARG1LO
2746 | move ra, TMP2
2747 | intins SFRETHI, SFARG2HI, TMP0
2748 | jr ra
2749 |. intins SFRETLO, SFARG2LO, TMP0
2750 |.endif
2751 |.endmacro
2752 |
2753 | sfmin_max min, movz
2754 | sfmin_max max, movn
2755 |
2228 |//----------------------------------------------------------------------- 2756 |//-----------------------------------------------------------------------
2229 |//-- Miscellaneous functions -------------------------------------------- 2757 |//-- Miscellaneous functions --------------------------------------------
2230 |//----------------------------------------------------------------------- 2758 |//-----------------------------------------------------------------------
@@ -2244,10 +2772,10 @@ static void build_subroutines(BuildCtx *ctx)
2244 | sw r1, CTSTATE->cb.slot 2772 | sw r1, CTSTATE->cb.slot
2245 | sw CARG1, CTSTATE->cb.gpr[0] 2773 | sw CARG1, CTSTATE->cb.gpr[0]
2246 | sw CARG2, CTSTATE->cb.gpr[1] 2774 | sw CARG2, CTSTATE->cb.gpr[1]
2247 | sdc1 FARG1, CTSTATE->cb.fpr[0] 2775 | .FPU sdc1 FARG1, CTSTATE->cb.fpr[0]
2248 | sw CARG3, CTSTATE->cb.gpr[2] 2776 | sw CARG3, CTSTATE->cb.gpr[2]
2249 | sw CARG4, CTSTATE->cb.gpr[3] 2777 | sw CARG4, CTSTATE->cb.gpr[3]
2250 | sdc1 FARG2, CTSTATE->cb.fpr[1] 2778 | .FPU sdc1 FARG2, CTSTATE->cb.fpr[1]
2251 | addiu TMP0, sp, CFRAME_SPACE+16 2779 | addiu TMP0, sp, CFRAME_SPACE+16
2252 | sw TMP0, CTSTATE->cb.stack 2780 | sw TMP0, CTSTATE->cb.stack
2253 | sw r0, SAVE_PC // Any value outside of bytecode is ok. 2781 | sw r0, SAVE_PC // Any value outside of bytecode is ok.
@@ -2257,15 +2785,16 @@ static void build_subroutines(BuildCtx *ctx)
2257 | // Returns lua_State *. 2785 | // Returns lua_State *.
2258 | lw BASE, L:CRET1->base 2786 | lw BASE, L:CRET1->base
2259 | lw RC, L:CRET1->top 2787 | lw RC, L:CRET1->top
2788 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
2260 | move L, CRET1 2789 | move L, CRET1
2261 | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 2790 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2262 | lw LFUNC:RB, FRAME_FUNC(BASE) 2791 | lw LFUNC:RB, FRAME_FUNC(BASE)
2263 | mtc1 TMP3, TOBIT 2792 | .FPU mtc1 TMP3, TOBIT
2264 | li_vmstate INTERP 2793 | li_vmstate INTERP
2265 | li TISNIL, LJ_TNIL 2794 | li TISNIL, LJ_TNIL
2266 | subu RC, RC, BASE 2795 | subu RC, RC, BASE
2267 | st_vmstate 2796 | st_vmstate
2268 | cvt.d.s TOBIT, TOBIT 2797 | .FPU cvt.d.s TOBIT, TOBIT
2269 | ins_callt 2798 | ins_callt
2270 |.endif 2799 |.endif
2271 | 2800 |
@@ -2279,11 +2808,11 @@ static void build_subroutines(BuildCtx *ctx)
2279 | move CARG2, RA 2808 | move CARG2, RA
2280 | call_intern lj_ccallback_leave // (CTState *cts, TValue *o) 2809 | call_intern lj_ccallback_leave // (CTState *cts, TValue *o)
2281 |. move CARG1, CTSTATE 2810 |. move CARG1, CTSTATE
2811 | .FPU ldc1 FRET1, CTSTATE->cb.fpr[0]
2282 | lw CRET1, CTSTATE->cb.gpr[0] 2812 | lw CRET1, CTSTATE->cb.gpr[0]
2283 | ldc1 FRET1, CTSTATE->cb.fpr[0] 2813 | .FPU ldc1 FRET2, CTSTATE->cb.fpr[1]
2284 | lw CRET2, CTSTATE->cb.gpr[1]
2285 | b ->vm_leave_unw 2814 | b ->vm_leave_unw
2286 |. ldc1 FRET2, CTSTATE->cb.fpr[1] 2815 |. lw CRET2, CTSTATE->cb.gpr[1]
2287 |.endif 2816 |.endif
2288 | 2817 |
2289 |->vm_ffi_call: // Call C function via FFI. 2818 |->vm_ffi_call: // Call C function via FFI.
@@ -2315,8 +2844,8 @@ static void build_subroutines(BuildCtx *ctx)
2315 | lw CARG2, CCSTATE->gpr[1] 2844 | lw CARG2, CCSTATE->gpr[1]
2316 | lw CARG3, CCSTATE->gpr[2] 2845 | lw CARG3, CCSTATE->gpr[2]
2317 | lw CARG4, CCSTATE->gpr[3] 2846 | lw CARG4, CCSTATE->gpr[3]
2318 | ldc1 FARG1, CCSTATE->fpr[0] 2847 | .FPU ldc1 FARG1, CCSTATE->fpr[0]
2319 | ldc1 FARG2, CCSTATE->fpr[1] 2848 | .FPU ldc1 FARG2, CCSTATE->fpr[1]
2320 | jalr CFUNCADDR 2849 | jalr CFUNCADDR
2321 |. lw CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. 2850 |. lw CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1.
2322 | lw CCSTATE:TMP1, -12(r16) 2851 | lw CCSTATE:TMP1, -12(r16)
@@ -2324,8 +2853,13 @@ static void build_subroutines(BuildCtx *ctx)
2324 | lw ra, -4(r16) 2853 | lw ra, -4(r16)
2325 | sw CRET1, CCSTATE:TMP1->gpr[0] 2854 | sw CRET1, CCSTATE:TMP1->gpr[0]
2326 | sw CRET2, CCSTATE:TMP1->gpr[1] 2855 | sw CRET2, CCSTATE:TMP1->gpr[1]
2856 |.if FPU
2327 | sdc1 FRET1, CCSTATE:TMP1->fpr[0] 2857 | sdc1 FRET1, CCSTATE:TMP1->fpr[0]
2328 | sdc1 FRET2, CCSTATE:TMP1->fpr[1] 2858 | sdc1 FRET2, CCSTATE:TMP1->fpr[1]
2859 |.else
2860 | sw CARG1, CCSTATE:TMP1->gpr[2] // Soft-float: complex double .im part.
2861 | sw CARG2, CCSTATE:TMP1->gpr[3]
2862 |.endif
2329 | move sp, r16 2863 | move sp, r16
2330 | jr ra 2864 | jr ra
2331 |. move r16, TMP2 2865 |. move r16, TMP2
@@ -2349,82 +2883,143 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2349 2883
2350 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: 2884 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
2351 | // RA = src1*8, RD = src2*8, JMP with RD = target 2885 | // RA = src1*8, RD = src2*8, JMP with RD = target
2352 | addu CARG2, BASE, RA 2886 |.macro bc_comp, FRA, FRD, RAHI, RALO, RDHI, RDLO, movop, fmovop, fcomp, sfcomp
2353 | addu CARG3, BASE, RD 2887 | addu RA, BASE, RA
2354 | lw TMP0, HI(CARG2) 2888 | addu RD, BASE, RD
2355 | lw TMP1, HI(CARG3) 2889 | lw RAHI, HI(RA)
2356 | ldc1 f0, 0(CARG2) 2890 | lw RDHI, HI(RD)
2357 | ldc1 f2, 0(CARG3)
2358 | sltiu TMP0, TMP0, LJ_TISNUM
2359 | sltiu TMP1, TMP1, LJ_TISNUM
2360 | lhu TMP2, OFS_RD(PC) 2891 | lhu TMP2, OFS_RD(PC)
2361 | and TMP0, TMP0, TMP1
2362 | addiu PC, PC, 4 2892 | addiu PC, PC, 4
2363 | beqz TMP0, ->vmeta_comp 2893 | bne RAHI, TISNUM, >2
2364 |. lui TMP1, (-(BCBIAS_J*4 >> 16) & 65535) 2894 |. lw RALO, LO(RA)
2365 | decode_RD4b TMP2 2895 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2366 | addu TMP2, TMP2, TMP1 2896 | lw RDLO, LO(RD)
2367 if (op == BC_ISLT || op == BC_ISGE) { 2897 | bne RDHI, TISNUM, >5
2368 | c.olt.d f0, f2 2898 |. decode_RD4b TMP2
2369 } else { 2899 | slt AT, SFARG1LO, SFARG2LO
2370 | c.ole.d f0, f2 2900 | addu TMP2, TMP2, TMP3
2371 } 2901 | movop TMP2, r0, AT
2372 if (op == BC_ISLT || op == BC_ISLE) {
2373 | movf TMP2, r0
2374 } else {
2375 | movt TMP2, r0
2376 }
2377 | addu PC, PC, TMP2
2378 |1: 2902 |1:
2903 | addu PC, PC, TMP2
2379 | ins_next 2904 | ins_next
2905 |
2906 |2: // RA is not an integer.
2907 | sltiu AT, RAHI, LJ_TISNUM
2908 | beqz AT, ->vmeta_comp
2909 |. lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2910 | sltiu AT, RDHI, LJ_TISNUM
2911 |.if FPU
2912 | ldc1 FRA, 0(RA)
2913 | ldc1 FRD, 0(RD)
2914 |.else
2915 | lw RDLO, LO(RD)
2916 |.endif
2917 | beqz AT, >4
2918 |. decode_RD4b TMP2
2919 |3: // RA and RD are both numbers.
2920 |.if FPU
2921 | fcomp f20, f22
2922 | addu TMP2, TMP2, TMP3
2923 | b <1
2924 |. fmovop TMP2, r0
2925 |.else
2926 | bal sfcomp
2927 |. addu TMP2, TMP2, TMP3
2928 | b <1
2929 |. movop TMP2, r0, CRET1
2930 |.endif
2931 |
2932 |4: // RA is a number, RD is not a number.
2933 | bne RDHI, TISNUM, ->vmeta_comp
2934 | // RA is a number, RD is an integer. Convert RD to a number.
2935 |.if FPU
2936 |. lwc1 FRD, LO(RD)
2937 | b <3
2938 |. cvt.d.w FRD, FRD
2939 |.else
2940 |. nop
2941 |.if "RDHI" == "SFARG1HI"
2942 | bal ->vm_sfi2d_1
2943 |.else
2944 | bal ->vm_sfi2d_2
2945 |.endif
2946 |. nop
2947 | b <3
2948 |. nop
2949 |.endif
2950 |
2951 |5: // RA is an integer, RD is not an integer
2952 | sltiu AT, RDHI, LJ_TISNUM
2953 | beqz AT, ->vmeta_comp
2954 | // RA is an integer, RD is a number. Convert RA to a number.
2955 |.if FPU
2956 |. mtc1 RALO, FRA
2957 | ldc1 FRD, 0(RD)
2958 | b <3
2959 | cvt.d.w FRA, FRA
2960 |.else
2961 |. nop
2962 |.if "RAHI" == "SFARG1HI"
2963 | bal ->vm_sfi2d_1
2964 |.else
2965 | bal ->vm_sfi2d_2
2966 |.endif
2967 |. nop
2968 | b <3
2969 |. nop
2970 |.endif
2971 |.endmacro
2972 |
2973 if (op == BC_ISLT) {
2974 | bc_comp f20, f22, SFARG1HI, SFARG1LO, SFARG2HI, SFARG2LO, movz, movf, c.olt.d, ->vm_sfcmpolt
2975 } else if (op == BC_ISGE) {
2976 | bc_comp f20, f22, SFARG1HI, SFARG1LO, SFARG2HI, SFARG2LO, movn, movt, c.olt.d, ->vm_sfcmpolt
2977 } else if (op == BC_ISLE) {
2978 | bc_comp f22, f20, SFARG2HI, SFARG2LO, SFARG1HI, SFARG1LO, movn, movt, c.ult.d, ->vm_sfcmpult
2979 } else {
2980 | bc_comp f22, f20, SFARG2HI, SFARG2LO, SFARG1HI, SFARG1LO, movz, movf, c.ult.d, ->vm_sfcmpult
2981 }
2380 break; 2982 break;
2381 2983
2382 case BC_ISEQV: case BC_ISNEV: 2984 case BC_ISEQV: case BC_ISNEV:
2383 vk = op == BC_ISEQV; 2985 vk = op == BC_ISEQV;
2384 | // RA = src1*8, RD = src2*8, JMP with RD = target 2986 | // RA = src1*8, RD = src2*8, JMP with RD = target
2385 | addu RA, BASE, RA 2987 | addu RA, BASE, RA
2386 | addiu PC, PC, 4 2988 | addiu PC, PC, 4
2387 | lw TMP0, HI(RA)
2388 | ldc1 f0, 0(RA)
2389 | addu RD, BASE, RD 2989 | addu RD, BASE, RD
2990 | lw SFARG1HI, HI(RA)
2390 | lhu TMP2, -4+OFS_RD(PC) 2991 | lhu TMP2, -4+OFS_RD(PC)
2391 | lw TMP1, HI(RD) 2992 | lw SFARG2HI, HI(RD)
2392 | ldc1 f2, 0(RD)
2393 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 2993 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2394 | sltiu AT, TMP0, LJ_TISNUM 2994 | sltu AT, TISNUM, SFARG1HI
2395 | sltiu CARG1, TMP1, LJ_TISNUM 2995 | sltu TMP0, TISNUM, SFARG2HI
2396 | decode_RD4b TMP2 2996 | or AT, AT, TMP0
2397 | and AT, AT, CARG1
2398 | beqz AT, >5
2399 |. addu TMP2, TMP2, TMP3
2400 | c.eq.d f0, f2
2401 if (vk) { 2997 if (vk) {
2402 | movf TMP2, r0 2998 | beqz AT, ->BC_ISEQN_Z
2403 } else { 2999 } else {
2404 | movt TMP2, r0 3000 | beqz AT, ->BC_ISNEN_Z
2405 } 3001 }
2406 |1: 3002 |. decode_RD4b TMP2
2407 | addu PC, PC, TMP2 3003 | // Either or both types are not numbers.
2408 | ins_next 3004 | lw SFARG1LO, LO(RA)
2409 |5: // Either or both types are not numbers. 3005 | lw SFARG2LO, LO(RD)
2410 | lw CARG2, LO(RA) 3006 | addu TMP2, TMP2, TMP3
2411 | lw CARG3, LO(RD)
2412 |.if FFI 3007 |.if FFI
2413 | li TMP3, LJ_TCDATA 3008 | li TMP3, LJ_TCDATA
2414 | beq TMP0, TMP3, ->vmeta_equal_cd 3009 | beq SFARG1HI, TMP3, ->vmeta_equal_cd
2415 |.endif 3010 |.endif
2416 |. sltiu AT, TMP0, LJ_TISPRI // Not a primitive? 3011 |. sltiu AT, SFARG1HI, LJ_TISPRI // Not a primitive?
2417 |.if FFI 3012 |.if FFI
2418 | beq TMP1, TMP3, ->vmeta_equal_cd 3013 | beq SFARG2HI, TMP3, ->vmeta_equal_cd
2419 |.endif 3014 |.endif
2420 |. xor TMP3, CARG2, CARG3 // Same tv? 3015 |. xor TMP3, SFARG1LO, SFARG2LO // Same tv?
2421 | xor TMP1, TMP1, TMP0 // Same type? 3016 | xor SFARG2HI, SFARG2HI, SFARG1HI // Same type?
2422 | sltiu CARG1, TMP0, LJ_TISTABUD+1 // Table or userdata? 3017 | sltiu TMP0, SFARG1HI, LJ_TISTABUD+1 // Table or userdata?
2423 | movz TMP3, r0, AT // Ignore tv if primitive. 3018 | movz TMP3, r0, AT // Ignore tv if primitive.
2424 | movn CARG1, r0, TMP1 // Tab/ud and same type? 3019 | movn TMP0, r0, SFARG2HI // Tab/ud and same type?
2425 | or AT, TMP1, TMP3 // Same type && (pri||same tv). 3020 | or AT, SFARG2HI, TMP3 // Same type && (pri||same tv).
2426 | movz CARG1, r0, AT 3021 | movz TMP0, r0, AT
2427 | beqz CARG1, <1 // Done if not tab/ud or not same type or same tv. 3022 | beqz TMP0, >1 // Done if not tab/ud or not same type or same tv.
2428 if (vk) { 3023 if (vk) {
2429 |. movn TMP2, r0, AT 3024 |. movn TMP2, r0, AT
2430 } else { 3025 } else {
@@ -2432,15 +3027,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2432 } 3027 }
2433 | // Different tables or userdatas. Need to check __eq metamethod. 3028 | // Different tables or userdatas. Need to check __eq metamethod.
2434 | // Field metatable must be at same offset for GCtab and GCudata! 3029 | // Field metatable must be at same offset for GCtab and GCudata!
2435 | lw TAB:TMP1, TAB:CARG2->metatable 3030 | lw TAB:TMP1, TAB:SFARG1LO->metatable
2436 | beqz TAB:TMP1, <1 // No metatable? 3031 | beqz TAB:TMP1, >1 // No metatable?
2437 |. nop 3032 |. nop
2438 | lbu TMP1, TAB:TMP1->nomm 3033 | lbu TMP1, TAB:TMP1->nomm
2439 | andi TMP1, TMP1, 1<<MM_eq 3034 | andi TMP1, TMP1, 1<<MM_eq
2440 | bnez TMP1, <1 // Or 'no __eq' flag set? 3035 | bnez TMP1, >1 // Or 'no __eq' flag set?
2441 |. nop 3036 |. nop
2442 | b ->vmeta_equal // Handle __eq metamethod. 3037 | b ->vmeta_equal // Handle __eq metamethod.
2443 |. li CARG4, 1-vk // ne = 0 or 1. 3038 |. li TMP0, 1-vk // ne = 0 or 1.
3039 |1:
3040 | addu PC, PC, TMP2
3041 | ins_next
2444 break; 3042 break;
2445 3043
2446 case BC_ISEQS: case BC_ISNES: 3044 case BC_ISEQS: case BC_ISNES:
@@ -2477,38 +3075,124 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2477 vk = op == BC_ISEQN; 3075 vk = op == BC_ISEQN;
2478 | // RA = src*8, RD = num_const*8, JMP with RD = target 3076 | // RA = src*8, RD = num_const*8, JMP with RD = target
2479 | addu RA, BASE, RA 3077 | addu RA, BASE, RA
2480 | addiu PC, PC, 4 3078 | addu RD, KBASE, RD
2481 | lw TMP0, HI(RA) 3079 | lw SFARG1HI, HI(RA)
2482 | ldc1 f0, 0(RA) 3080 | lw SFARG2HI, HI(RD)
2483 | addu RD, KBASE, RD 3081 | lhu TMP2, OFS_RD(PC)
2484 | lhu TMP2, -4+OFS_RD(PC) 3082 | addiu PC, PC, 4
2485 | ldc1 f2, 0(RD)
2486 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 3083 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2487 | sltiu AT, TMP0, LJ_TISNUM
2488 | decode_RD4b TMP2 3084 | decode_RD4b TMP2
2489 |.if FFI
2490 | beqz AT, >5
2491 |.else
2492 | beqz AT, >1
2493 |.endif
2494 |. addu TMP2, TMP2, TMP3
2495 | c.eq.d f0, f2
2496 if (vk) { 3085 if (vk) {
2497 | movf TMP2, r0 3086 |->BC_ISEQN_Z:
2498 | addu PC, PC, TMP2 3087 } else {
3088 |->BC_ISNEN_Z:
3089 }
3090 | bne SFARG1HI, TISNUM, >3
3091 |. lw SFARG1LO, LO(RA)
3092 | lw SFARG2LO, LO(RD)
3093 | addu TMP2, TMP2, TMP3
3094 | bne SFARG2HI, TISNUM, >6
3095 |. xor AT, SFARG1LO, SFARG2LO
3096 if (vk) {
3097 | movn TMP2, r0, AT
2499 |1: 3098 |1:
3099 | addu PC, PC, TMP2
3100 |2:
2500 } else { 3101 } else {
2501 | movt TMP2, r0 3102 | movz TMP2, r0, AT
2502 |1: 3103 |1:
3104 |2:
2503 | addu PC, PC, TMP2 3105 | addu PC, PC, TMP2
2504 } 3106 }
2505 | ins_next 3107 | ins_next
3108 |
3109 |3: // RA is not an integer.
3110 | sltiu AT, SFARG1HI, LJ_TISNUM
2506 |.if FFI 3111 |.if FFI
2507 |5: 3112 | beqz AT, >8
2508 | li AT, LJ_TCDATA 3113 |.else
2509 | beq TMP0, AT, ->vmeta_equal_cd 3114 | beqz AT, <2
3115 |.endif
3116 |. addu TMP2, TMP2, TMP3
3117 | sltiu AT, SFARG2HI, LJ_TISNUM
3118 |.if FPU
3119 | ldc1 f20, 0(RA)
3120 | ldc1 f22, 0(RD)
3121 |.endif
3122 | beqz AT, >5
3123 |. lw SFARG2LO, LO(RD)
3124 |4: // RA and RD are both numbers.
3125 |.if FPU
3126 | c.eq.d f20, f22
3127 | b <1
3128 if (vk) {
3129 |. movf TMP2, r0
3130 } else {
3131 |. movt TMP2, r0
3132 }
3133 |.else
3134 | bal ->vm_sfcmpeq
2510 |. nop 3135 |. nop
2511 | b <1 3136 | b <1
3137 if (vk) {
3138 |. movz TMP2, r0, CRET1
3139 } else {
3140 |. movn TMP2, r0, CRET1
3141 }
3142 |.endif
3143 |
3144 |5: // RA is a number, RD is not a number.
3145 |.if FFI
3146 | bne SFARG2HI, TISNUM, >9
3147 |.else
3148 | bne SFARG2HI, TISNUM, <2
3149 |.endif
3150 | // RA is a number, RD is an integer. Convert RD to a number.
3151 |.if FPU
3152 |. lwc1 f22, LO(RD)
3153 | b <4
3154 |. cvt.d.w f22, f22
3155 |.else
3156 |. nop
3157 | bal ->vm_sfi2d_2
3158 |. nop
3159 | b <4
3160 |. nop
3161 |.endif
3162 |
3163 |6: // RA is an integer, RD is not an integer
3164 | sltiu AT, SFARG2HI, LJ_TISNUM
3165 |.if FFI
3166 | beqz AT, >9
3167 |.else
3168 | beqz AT, <2
3169 |.endif
3170 | // RA is an integer, RD is a number. Convert RA to a number.
3171 |.if FPU
3172 |. mtc1 SFARG1LO, f20
3173 | ldc1 f22, 0(RD)
3174 | b <4
3175 | cvt.d.w f20, f20
3176 |.else
3177 |. nop
3178 | bal ->vm_sfi2d_1
3179 |. nop
3180 | b <4
3181 |. nop
3182 |.endif
3183 |
3184 |.if FFI
3185 |8:
3186 | li AT, LJ_TCDATA
3187 | bne SFARG1HI, AT, <2
3188 |. nop
3189 | b ->vmeta_equal_cd
3190 |. nop
3191 |9:
3192 | li AT, LJ_TCDATA
3193 | bne SFARG2HI, AT, <2
3194 |. nop
3195 | b ->vmeta_equal_cd
2512 |. nop 3196 |. nop
2513 |.endif 3197 |.endif
2514 break; 3198 break;
@@ -2560,7 +3244,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2560 | addu PC, PC, TMP2 3244 | addu PC, PC, TMP2
2561 } else { 3245 } else {
2562 | sltiu TMP0, TMP0, LJ_TISTRUECOND 3246 | sltiu TMP0, TMP0, LJ_TISTRUECOND
2563 | ldc1 f0, 0(RD) 3247 | lw SFRETHI, HI(RD)
3248 | lw SFRETLO, LO(RD)
2564 if (op == BC_ISTC) { 3249 if (op == BC_ISTC) {
2565 | beqz TMP0, >1 3250 | beqz TMP0, >1
2566 } else { 3251 } else {
@@ -2570,22 +3255,45 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2570 | decode_RD4b TMP2 3255 | decode_RD4b TMP2
2571 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 3256 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
2572 | addu TMP2, TMP2, TMP3 3257 | addu TMP2, TMP2, TMP3
2573 | sdc1 f0, 0(RA) 3258 | sw SFRETHI, HI(RA)
3259 | sw SFRETLO, LO(RA)
2574 | addu PC, PC, TMP2 3260 | addu PC, PC, TMP2
2575 |1: 3261 |1:
2576 } 3262 }
2577 | ins_next 3263 | ins_next
2578 break; 3264 break;
2579 3265
3266 case BC_ISTYPE:
3267 | // RA = src*8, RD = -type*8
3268 | addu TMP2, BASE, RA
3269 | srl TMP1, RD, 3
3270 | lw TMP0, HI(TMP2)
3271 | ins_next1
3272 | addu AT, TMP0, TMP1
3273 | bnez AT, ->vmeta_istype
3274 |. ins_next2
3275 break;
3276 case BC_ISNUM:
3277 | // RA = src*8, RD = -(TISNUM-1)*8
3278 | addu TMP2, BASE, RA
3279 | lw TMP0, HI(TMP2)
3280 | ins_next1
3281 | sltiu AT, TMP0, LJ_TISNUM
3282 | beqz AT, ->vmeta_istype
3283 |. ins_next2
3284 break;
3285
2580 /* -- Unary ops --------------------------------------------------------- */ 3286 /* -- Unary ops --------------------------------------------------------- */
2581 3287
2582 case BC_MOV: 3288 case BC_MOV:
2583 | // RA = dst*8, RD = src*8 3289 | // RA = dst*8, RD = src*8
2584 | addu RD, BASE, RD 3290 | addu RD, BASE, RD
2585 | addu RA, BASE, RA 3291 | addu RA, BASE, RA
2586 | ldc1 f0, 0(RD) 3292 | lw SFRETHI, HI(RD)
3293 | lw SFRETLO, LO(RD)
2587 | ins_next1 3294 | ins_next1
2588 | sdc1 f0, 0(RA) 3295 | sw SFRETHI, HI(RA)
3296 | sw SFRETLO, LO(RA)
2589 | ins_next2 3297 | ins_next2
2590 break; 3298 break;
2591 case BC_NOT: 3299 case BC_NOT:
@@ -2602,16 +3310,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2602 break; 3310 break;
2603 case BC_UNM: 3311 case BC_UNM:
2604 | // RA = dst*8, RD = src*8 3312 | // RA = dst*8, RD = src*8
2605 | addu CARG3, BASE, RD 3313 | addu RB, BASE, RD
3314 | lw SFARG1HI, HI(RB)
2606 | addu RA, BASE, RA 3315 | addu RA, BASE, RA
2607 | lw TMP0, HI(CARG3) 3316 | bne SFARG1HI, TISNUM, >2
2608 | ldc1 f0, 0(CARG3) 3317 |. lw SFARG1LO, LO(RB)
2609 | sltiu AT, TMP0, LJ_TISNUM 3318 | lui TMP1, 0x8000
2610 | beqz AT, ->vmeta_unm 3319 | beq SFARG1LO, TMP1, ->vmeta_unm // Meta handler deals with -2^31.
2611 |. neg.d f0, f0 3320 |. negu SFARG1LO, SFARG1LO
3321 |1:
2612 | ins_next1 3322 | ins_next1
2613 | sdc1 f0, 0(RA) 3323 | sw SFARG1HI, HI(RA)
3324 | sw SFARG1LO, LO(RA)
2614 | ins_next2 3325 | ins_next2
3326 |2:
3327 | sltiu AT, SFARG1HI, LJ_TISNUM
3328 | beqz AT, ->vmeta_unm
3329 |. lui TMP1, 0x8000
3330 | b <1
3331 |. xor SFARG1HI, SFARG1HI, TMP1
2615 break; 3332 break;
2616 case BC_LEN: 3333 case BC_LEN:
2617 | // RA = dst*8, RD = src*8 3334 | // RA = dst*8, RD = src*8
@@ -2622,12 +3339,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2622 | li AT, LJ_TSTR 3339 | li AT, LJ_TSTR
2623 | bne TMP0, AT, >2 3340 | bne TMP0, AT, >2
2624 |. li AT, LJ_TTAB 3341 |. li AT, LJ_TTAB
2625 | lw CRET1, STR:CARG1->len 3342 | lw CRET1, STR:CARG1->len
2626 |1: 3343 |1:
2627 | mtc1 CRET1, f0
2628 | cvt.d.w f0, f0
2629 | ins_next1 3344 | ins_next1
2630 | sdc1 f0, 0(RA) 3345 | sw TISNUM, HI(RA)
3346 | sw CRET1, LO(RA)
2631 | ins_next2 3347 | ins_next2
2632 |2: 3348 |2:
2633 | bne TMP0, AT, ->vmeta_len 3349 | bne TMP0, AT, ->vmeta_len
@@ -2658,104 +3374,232 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2658 3374
2659 /* -- Binary ops -------------------------------------------------------- */ 3375 /* -- Binary ops -------------------------------------------------------- */
2660 3376
2661 |.macro ins_arithpre 3377 |.macro fpmod, a, b, c
3378 | bal ->vm_floor // floor(b/c)
3379 |. div.d FARG1, b, c
3380 | mul.d a, FRET1, c
3381 | sub.d a, b, a // b - floor(b/c)*c
3382 |.endmacro
3383
3384 |.macro sfpmod
3385 | addiu sp, sp, -16
3386 |
3387 | load_got __divdf3
3388 | sw SFARG1HI, HI(sp)
3389 | sw SFARG1LO, LO(sp)
3390 | sw SFARG2HI, 8+HI(sp)
3391 | call_extern
3392 |. sw SFARG2LO, 8+LO(sp)
3393 |
3394 | load_got floor
3395 | move SFARG1HI, SFRETHI
3396 | call_extern
3397 |. move SFARG1LO, SFRETLO
3398 |
3399 | load_got __muldf3
3400 | move SFARG1HI, SFRETHI
3401 | move SFARG1LO, SFRETLO
3402 | lw SFARG2HI, 8+HI(sp)
3403 | call_extern
3404 |. lw SFARG2LO, 8+LO(sp)
3405 |
3406 | load_got __subdf3
3407 | lw SFARG1HI, HI(sp)
3408 | lw SFARG1LO, LO(sp)
3409 | move SFARG2HI, SFRETHI
3410 | call_extern
3411 |. move SFARG2LO, SFRETLO
3412 |
3413 | addiu sp, sp, 16
3414 |.endmacro
3415
3416 |.macro ins_arithpre, label
2662 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 3417 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
2663 | decode_RB8a RB, INS
2664 | decode_RB8b RB
2665 | decode_RDtoRC8 RC, RD
2666 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 3418 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
2667 ||switch (vk) { 3419 ||switch (vk) {
2668 ||case 0: 3420 ||case 0:
2669 | addu CARG3, BASE, RB 3421 | decode_RB8a RB, INS
2670 | addu CARG4, KBASE, RC 3422 | decode_RB8b RB
2671 | lw TMP1, HI(CARG3) 3423 | decode_RDtoRC8 RC, RD
2672 | ldc1 f20, 0(CARG3) 3424 | // RA = dst*8, RB = src1*8, RC = num_const*8
2673 | ldc1 f22, 0(CARG4) 3425 | addu RB, BASE, RB
2674 | sltiu AT, TMP1, LJ_TISNUM 3426 |.if "label" ~= "none"
3427 | b label
3428 |.endif
3429 |. addu RC, KBASE, RC
2675 || break; 3430 || break;
2676 ||case 1: 3431 ||case 1:
2677 | addu CARG4, BASE, RB 3432 | decode_RB8a RC, INS
2678 | addu CARG3, KBASE, RC 3433 | decode_RB8b RC
2679 | lw TMP1, HI(CARG4) 3434 | decode_RDtoRC8 RB, RD
2680 | ldc1 f22, 0(CARG4) 3435 | // RA = dst*8, RB = num_const*8, RC = src1*8
2681 | ldc1 f20, 0(CARG3) 3436 | addu RC, BASE, RC
2682 | sltiu AT, TMP1, LJ_TISNUM 3437 |.if "label" ~= "none"
3438 | b label
3439 |.endif
3440 |. addu RB, KBASE, RB
2683 || break; 3441 || break;
2684 ||default: 3442 ||default:
2685 | addu CARG3, BASE, RB 3443 | decode_RB8a RB, INS
2686 | addu CARG4, BASE, RC 3444 | decode_RB8b RB
2687 | lw TMP1, HI(CARG3) 3445 | decode_RDtoRC8 RC, RD
2688 | lw TMP2, HI(CARG4) 3446 | // RA = dst*8, RB = src1*8, RC = src2*8
2689 | ldc1 f20, 0(CARG3) 3447 | addu RB, BASE, RB
2690 | ldc1 f22, 0(CARG4) 3448 |.if "label" ~= "none"
2691 | sltiu AT, TMP1, LJ_TISNUM 3449 | b label
2692 | sltiu TMP0, TMP2, LJ_TISNUM 3450 |.endif
2693 | and AT, AT, TMP0 3451 |. addu RC, BASE, RC
2694 || break; 3452 || break;
2695 ||} 3453 ||}
2696 | beqz AT, ->vmeta_arith
2697 |. addu RA, BASE, RA
2698 |.endmacro 3454 |.endmacro
2699 | 3455 |
2700 |.macro fpmod, a, b, c 3456 |.macro ins_arith, intins, fpins, fpcall, label
2701 |->BC_MODVN_Z: 3457 | ins_arithpre none
2702 | bal ->vm_floor // floor(b/c)
2703 |. div.d FARG1, b, c
2704 | mul.d a, FRET1, c
2705 | sub.d a, b, a // b - floor(b/c)*c
2706 |.endmacro
2707 | 3458 |
2708 |.macro ins_arith, ins 3459 |.if "label" ~= "none"
2709 | ins_arithpre 3460 |label:
2710 |.if "ins" == "fpmod_" 3461 |.endif
2711 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 3462 |
2712 |. nop 3463 | lw SFARG1HI, HI(RB)
3464 | lw SFARG2HI, HI(RC)
3465 |
3466 |.if "intins" ~= "div"
3467 |
3468 | // Check for two integers.
3469 | lw SFARG1LO, LO(RB)
3470 | bne SFARG1HI, TISNUM, >5
3471 |. lw SFARG2LO, LO(RC)
3472 | bne SFARG2HI, TISNUM, >5
3473 |
3474 |.if "intins" == "addu"
3475 |. intins CRET1, SFARG1LO, SFARG2LO
3476 | xor TMP1, CRET1, SFARG1LO // ((y^a) & (y^b)) < 0: overflow.
3477 | xor TMP2, CRET1, SFARG2LO
3478 | and TMP1, TMP1, TMP2
3479 | bltz TMP1, ->vmeta_arith
3480 |. addu RA, BASE, RA
3481 |.elif "intins" == "subu"
3482 |. intins CRET1, SFARG1LO, SFARG2LO
3483 | xor TMP1, CRET1, SFARG1LO // ((y^a) & (a^b)) < 0: overflow.
3484 | xor TMP2, SFARG1LO, SFARG2LO
3485 | and TMP1, TMP1, TMP2
3486 | bltz TMP1, ->vmeta_arith
3487 |. addu RA, BASE, RA
3488 |.elif "intins" == "mult"
3489 |. intins SFARG1LO, SFARG2LO
3490 | mflo CRET1
3491 | mfhi TMP2
3492 | sra TMP1, CRET1, 31
3493 | bne TMP1, TMP2, ->vmeta_arith
3494 |. addu RA, BASE, RA
2713 |.else 3495 |.else
2714 | ins f0, f20, f22 3496 |. load_got lj_vm_modi
3497 | beqz SFARG2LO, ->vmeta_arith
3498 |. addu RA, BASE, RA
3499 |.if ENDIAN_BE
3500 | move CARG1, SFARG1LO
3501 |.endif
3502 | call_extern
3503 |. move CARG2, SFARG2LO
3504 |.endif
3505 |
3506 | ins_next1
3507 | sw TISNUM, HI(RA)
3508 | sw CRET1, LO(RA)
3509 |3:
3510 | ins_next2
3511 |
3512 |.elif not FPU
3513 |
3514 | lw SFARG1LO, LO(RB)
3515 | lw SFARG2LO, LO(RC)
3516 |
3517 |.endif
3518 |
3519 |5: // Check for two numbers.
3520 | .FPU ldc1 f20, 0(RB)
3521 | sltiu AT, SFARG1HI, LJ_TISNUM
3522 | sltiu TMP0, SFARG2HI, LJ_TISNUM
3523 | .FPU ldc1 f22, 0(RC)
3524 | and AT, AT, TMP0
3525 | beqz AT, ->vmeta_arith
3526 |. addu RA, BASE, RA
3527 |
3528 |.if FPU
3529 | fpins FRET1, f20, f22
3530 |.elif "fpcall" == "sfpmod"
3531 | sfpmod
3532 |.else
3533 | load_got fpcall
3534 | call_extern
3535 |. nop
3536 |.endif
3537 |
2715 | ins_next1 3538 | ins_next1
2716 | sdc1 f0, 0(RA) 3539 |.if not FPU
3540 | sw SFRETHI, HI(RA)
3541 |.endif
3542 |.if "intins" ~= "div"
3543 | b <3
3544 |.endif
3545 |.if FPU
3546 |. sdc1 FRET1, 0(RA)
3547 |.else
3548 |. sw SFRETLO, LO(RA)
3549 |.endif
3550 |.if "intins" == "div"
2717 | ins_next2 3551 | ins_next2
2718 |.endif 3552 |.endif
3553 |
2719 |.endmacro 3554 |.endmacro
2720 3555
2721 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: 3556 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
2722 | ins_arith add.d 3557 | ins_arith addu, add.d, __adddf3, none
2723 break; 3558 break;
2724 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: 3559 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
2725 | ins_arith sub.d 3560 | ins_arith subu, sub.d, __subdf3, none
2726 break; 3561 break;
2727 case BC_MULVN: case BC_MULNV: case BC_MULVV: 3562 case BC_MULVN: case BC_MULNV: case BC_MULVV:
2728 | ins_arith mul.d 3563 | ins_arith mult, mul.d, __muldf3, none
3564 break;
3565 case BC_DIVVN:
3566 | ins_arith div, div.d, __divdf3, ->BC_DIVVN_Z
2729 break; 3567 break;
2730 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: 3568 case BC_DIVNV: case BC_DIVVV:
2731 | ins_arith div.d 3569 | ins_arithpre ->BC_DIVVN_Z
2732 break; 3570 break;
2733 case BC_MODVN: 3571 case BC_MODVN:
2734 | ins_arith fpmod 3572 | ins_arith modi, fpmod, sfpmod, ->BC_MODVN_Z
2735 break; 3573 break;
2736 case BC_MODNV: case BC_MODVV: 3574 case BC_MODNV: case BC_MODVV:
2737 | ins_arith fpmod_ 3575 | ins_arithpre ->BC_MODVN_Z
2738 break; 3576 break;
2739 case BC_POW: 3577 case BC_POW:
2740 | decode_RB8a RB, INS 3578 | ins_arithpre none
2741 | decode_RB8b RB 3579 | lw SFARG1HI, HI(RB)
2742 | decode_RDtoRC8 RC, RD 3580 | lw SFARG2HI, HI(RC)
2743 | addu CARG3, BASE, RB 3581 | sltiu AT, SFARG1HI, LJ_TISNUM
2744 | addu CARG4, BASE, RC 3582 | sltiu TMP0, SFARG2HI, LJ_TISNUM
2745 | lw TMP1, HI(CARG3)
2746 | lw TMP2, HI(CARG4)
2747 | ldc1 FARG1, 0(CARG3)
2748 | ldc1 FARG2, 0(CARG4)
2749 | sltiu AT, TMP1, LJ_TISNUM
2750 | sltiu TMP0, TMP2, LJ_TISNUM
2751 | and AT, AT, TMP0 3583 | and AT, AT, TMP0
2752 | load_got pow 3584 | load_got pow
2753 | beqz AT, ->vmeta_arith 3585 | beqz AT, ->vmeta_arith
2754 |. addu RA, BASE, RA 3586 |. addu RA, BASE, RA
3587 |.if FPU
3588 | ldc1 FARG1, 0(RB)
3589 | ldc1 FARG2, 0(RC)
3590 |.else
3591 | lw SFARG1LO, LO(RB)
3592 | lw SFARG2LO, LO(RC)
3593 |.endif
2755 | call_extern 3594 | call_extern
2756 |. nop 3595 |. nop
2757 | ins_next1 3596 | ins_next1
3597 |.if FPU
2758 | sdc1 FRET1, 0(RA) 3598 | sdc1 FRET1, 0(RA)
3599 |.else
3600 | sw SFRETHI, HI(RA)
3601 | sw SFRETLO, LO(RA)
3602 |.endif
2759 | ins_next2 3603 | ins_next2
2760 break; 3604 break;
2761 3605
@@ -2778,10 +3622,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2778 | bnez CRET1, ->vmeta_binop 3622 | bnez CRET1, ->vmeta_binop
2779 |. lw BASE, L->base 3623 |. lw BASE, L->base
2780 | addu RB, BASE, MULTRES 3624 | addu RB, BASE, MULTRES
2781 | ldc1 f0, 0(RB) 3625 | lw SFRETHI, HI(RB)
3626 | lw SFRETLO, LO(RB)
2782 | addu RA, BASE, RA 3627 | addu RA, BASE, RA
2783 | ins_next1 3628 | ins_next1
2784 | sdc1 f0, 0(RA) // Copy result from RB to RA. 3629 | sw SFRETHI, HI(RA)
3630 | sw SFRETLO, LO(RA)
2785 | ins_next2 3631 | ins_next2
2786 break; 3632 break;
2787 3633
@@ -2816,20 +3662,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2816 case BC_KSHORT: 3662 case BC_KSHORT:
2817 | // RA = dst*8, RD = int16_literal*8 3663 | // RA = dst*8, RD = int16_literal*8
2818 | sra RD, INS, 16 3664 | sra RD, INS, 16
2819 | mtc1 RD, f0
2820 | addu RA, BASE, RA 3665 | addu RA, BASE, RA
2821 | cvt.d.w f0, f0
2822 | ins_next1 3666 | ins_next1
2823 | sdc1 f0, 0(RA) 3667 | sw TISNUM, HI(RA)
3668 | sw RD, LO(RA)
2824 | ins_next2 3669 | ins_next2
2825 break; 3670 break;
2826 case BC_KNUM: 3671 case BC_KNUM:
2827 | // RA = dst*8, RD = num_const*8 3672 | // RA = dst*8, RD = num_const*8
2828 | addu RD, KBASE, RD 3673 | addu RD, KBASE, RD
2829 | addu RA, BASE, RA 3674 | addu RA, BASE, RA
2830 | ldc1 f0, 0(RD) 3675 | lw SFRETHI, HI(RD)
3676 | lw SFRETLO, LO(RD)
2831 | ins_next1 3677 | ins_next1
2832 | sdc1 f0, 0(RA) 3678 | sw SFRETHI, HI(RA)
3679 | sw SFRETLO, LO(RA)
2833 | ins_next2 3680 | ins_next2
2834 break; 3681 break;
2835 case BC_KPRI: 3682 case BC_KPRI:
@@ -2865,9 +3712,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2865 | lw UPVAL:RB, LFUNC:RD->uvptr 3712 | lw UPVAL:RB, LFUNC:RD->uvptr
2866 | ins_next1 3713 | ins_next1
2867 | lw TMP1, UPVAL:RB->v 3714 | lw TMP1, UPVAL:RB->v
2868 | ldc1 f0, 0(TMP1) 3715 | lw SFRETHI, HI(TMP1)
3716 | lw SFRETLO, LO(TMP1)
2869 | addu RA, BASE, RA 3717 | addu RA, BASE, RA
2870 | sdc1 f0, 0(RA) 3718 | sw SFRETHI, HI(RA)
3719 | sw SFRETLO, LO(RA)
2871 | ins_next2 3720 | ins_next2
2872 break; 3721 break;
2873 case BC_USETV: 3722 case BC_USETV:
@@ -2876,26 +3725,27 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2876 | srl RA, RA, 1 3725 | srl RA, RA, 1
2877 | addu RD, BASE, RD 3726 | addu RD, BASE, RD
2878 | addu RA, RA, LFUNC:RB 3727 | addu RA, RA, LFUNC:RB
2879 | ldc1 f0, 0(RD)
2880 | lw UPVAL:RB, LFUNC:RA->uvptr 3728 | lw UPVAL:RB, LFUNC:RA->uvptr
3729 | lw SFRETHI, HI(RD)
3730 | lw SFRETLO, LO(RD)
2881 | lbu TMP3, UPVAL:RB->marked 3731 | lbu TMP3, UPVAL:RB->marked
2882 | lw CARG2, UPVAL:RB->v 3732 | lw CARG2, UPVAL:RB->v
2883 | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv) 3733 | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
2884 | lbu TMP0, UPVAL:RB->closed 3734 | lbu TMP0, UPVAL:RB->closed
2885 | lw TMP2, HI(RD) 3735 | sw SFRETHI, HI(CARG2)
2886 | sdc1 f0, 0(CARG2) 3736 | sw SFRETLO, LO(CARG2)
2887 | li AT, LJ_GC_BLACK|1 3737 | li AT, LJ_GC_BLACK|1
2888 | or TMP3, TMP3, TMP0 3738 | or TMP3, TMP3, TMP0
2889 | beq TMP3, AT, >2 // Upvalue is closed and black? 3739 | beq TMP3, AT, >2 // Upvalue is closed and black?
2890 |. addiu TMP2, TMP2, -(LJ_TNUMX+1) 3740 |. addiu TMP2, SFRETHI, -(LJ_TNUMX+1)
2891 |1: 3741 |1:
2892 | ins_next 3742 | ins_next
2893 | 3743 |
2894 |2: // Check if new value is collectable. 3744 |2: // Check if new value is collectable.
2895 | sltiu AT, TMP2, LJ_TISGCV - (LJ_TNUMX+1) 3745 | sltiu AT, TMP2, LJ_TISGCV - (LJ_TNUMX+1)
2896 | beqz AT, <1 // tvisgcv(v) 3746 | beqz AT, <1 // tvisgcv(v)
2897 |. lw TMP1, LO(RD) 3747 |. nop
2898 | lbu TMP3, GCOBJ:TMP1->gch.marked 3748 | lbu TMP3, GCOBJ:SFRETLO->gch.marked
2899 | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v) 3749 | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v)
2900 | beqz TMP3, <1 3750 | beqz TMP3, <1
2901 |. load_got lj_gc_barrieruv 3751 |. load_got lj_gc_barrieruv
@@ -2943,11 +3793,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2943 | srl RA, RA, 1 3793 | srl RA, RA, 1
2944 | addu RD, KBASE, RD 3794 | addu RD, KBASE, RD
2945 | addu RA, RA, LFUNC:RB 3795 | addu RA, RA, LFUNC:RB
2946 | ldc1 f0, 0(RD) 3796 | lw UPVAL:RB, LFUNC:RA->uvptr
2947 | lw UPVAL:RB, LFUNC:RA->uvptr 3797 | lw SFRETHI, HI(RD)
3798 | lw SFRETLO, LO(RD)
3799 | lw TMP1, UPVAL:RB->v
2948 | ins_next1 3800 | ins_next1
2949 | lw TMP1, UPVAL:RB->v 3801 | sw SFRETHI, HI(TMP1)
2950 | sdc1 f0, 0(TMP1) 3802 | sw SFRETLO, LO(TMP1)
2951 | ins_next2 3803 | ins_next2
2952 break; 3804 break;
2953 case BC_USETP: 3805 case BC_USETP:
@@ -2957,10 +3809,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2957 | srl TMP0, RD, 3 3809 | srl TMP0, RD, 3
2958 | addu RA, RA, LFUNC:RB 3810 | addu RA, RA, LFUNC:RB
2959 | not TMP0, TMP0 3811 | not TMP0, TMP0
2960 | lw UPVAL:RB, LFUNC:RA->uvptr 3812 | lw UPVAL:RB, LFUNC:RA->uvptr
2961 | ins_next1 3813 | ins_next1
2962 | lw TMP1, UPVAL:RB->v 3814 | lw TMP1, UPVAL:RB->v
2963 | sw TMP0, HI(TMP1) 3815 | sw TMP0, HI(TMP1)
2964 | ins_next2 3816 | ins_next2
2965 break; 3817 break;
2966 3818
@@ -2996,8 +3848,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2996 | li TMP0, LJ_TFUNC 3848 | li TMP0, LJ_TFUNC
2997 | ins_next1 3849 | ins_next1
2998 | addu RA, BASE, RA 3850 | addu RA, BASE, RA
2999 | sw TMP0, HI(RA)
3000 | sw LFUNC:CRET1, LO(RA) 3851 | sw LFUNC:CRET1, LO(RA)
3852 | sw TMP0, HI(RA)
3001 | ins_next2 3853 | ins_next2
3002 break; 3854 break;
3003 3855
@@ -3078,31 +3930,23 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3078 | lw TMP2, HI(CARG3) 3930 | lw TMP2, HI(CARG3)
3079 | lw TAB:RB, LO(CARG2) 3931 | lw TAB:RB, LO(CARG2)
3080 | li AT, LJ_TTAB 3932 | li AT, LJ_TTAB
3081 | ldc1 f0, 0(CARG3)
3082 | bne TMP1, AT, ->vmeta_tgetv 3933 | bne TMP1, AT, ->vmeta_tgetv
3083 |. addu RA, BASE, RA 3934 |. addu RA, BASE, RA
3084 | sltiu AT, TMP2, LJ_TISNUM 3935 | bne TMP2, TISNUM, >5
3085 | beqz AT, >5 3936 |. lw RC, LO(CARG3)
3086 |. li AT, LJ_TSTR 3937 | lw TMP0, TAB:RB->asize
3087 |
3088 | // Convert number key to integer, check for integerness and range.
3089 | cvt.w.d f2, f0
3090 | lw TMP0, TAB:RB->asize
3091 | mfc1 TMP2, f2
3092 | cvt.d.w f4, f2
3093 | lw TMP1, TAB:RB->array 3938 | lw TMP1, TAB:RB->array
3094 | c.eq.d f0, f4 3939 | sltu AT, RC, TMP0
3095 | sltu AT, TMP2, TMP0 3940 | sll TMP2, RC, 3
3096 | movf AT, r0
3097 | sll TMP2, TMP2, 3
3098 | beqz AT, ->vmeta_tgetv // Integer key and in array part? 3941 | beqz AT, ->vmeta_tgetv // Integer key and in array part?
3099 |. addu TMP2, TMP1, TMP2 3942 |. addu TMP2, TMP1, TMP2
3100 | lw TMP0, HI(TMP2) 3943 | lw SFRETHI, HI(TMP2)
3101 | beq TMP0, TISNIL, >2 3944 | beq SFRETHI, TISNIL, >2
3102 |. ldc1 f0, 0(TMP2) 3945 |. lw SFRETLO, LO(TMP2)
3103 |1: 3946 |1:
3104 | ins_next1 3947 | ins_next1
3105 | sdc1 f0, 0(RA) 3948 | sw SFRETHI, HI(RA)
3949 | sw SFRETLO, LO(RA)
3106 | ins_next2 3950 | ins_next2
3107 | 3951 |
3108 |2: // Check for __index if table value is nil. 3952 |2: // Check for __index if table value is nil.
@@ -3117,8 +3961,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3117 |. nop 3961 |. nop
3118 | 3962 |
3119 |5: 3963 |5:
3964 | li AT, LJ_TSTR
3120 | bne TMP2, AT, ->vmeta_tgetv 3965 | bne TMP2, AT, ->vmeta_tgetv
3121 |. lw STR:RC, LO(CARG3) 3966 |. nop
3122 | b ->BC_TGETS_Z // String key? 3967 | b ->BC_TGETS_Z // String key?
3123 |. nop 3968 |. nop
3124 break; 3969 break;
@@ -3150,18 +3995,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3150 | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2) 3995 | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2)
3151 | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) 3996 | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2)
3152 | lw NODE:TMP1, NODE:TMP2->next 3997 | lw NODE:TMP1, NODE:TMP2->next
3153 | lw CARG2, offsetof(Node, val)+HI(NODE:TMP2) 3998 | lw SFRETHI, offsetof(Node, val)+HI(NODE:TMP2)
3154 | addiu CARG1, CARG1, -LJ_TSTR 3999 | addiu CARG1, CARG1, -LJ_TSTR
3155 | xor TMP0, TMP0, STR:RC 4000 | xor TMP0, TMP0, STR:RC
3156 | or AT, CARG1, TMP0 4001 | or AT, CARG1, TMP0
3157 | bnez AT, >4 4002 | bnez AT, >4
3158 |. lw TAB:TMP3, TAB:RB->metatable 4003 |. lw TAB:TMP3, TAB:RB->metatable
3159 | beq CARG2, TISNIL, >5 // Key found, but nil value? 4004 | beq SFRETHI, TISNIL, >5 // Key found, but nil value?
3160 |. lw CARG1, offsetof(Node, val)+LO(NODE:TMP2) 4005 |. lw SFRETLO, offsetof(Node, val)+LO(NODE:TMP2)
3161 |3: 4006 |3:
3162 | ins_next1 4007 | ins_next1
3163 | sw CARG2, HI(RA) 4008 | sw SFRETHI, HI(RA)
3164 | sw CARG1, LO(RA) 4009 | sw SFRETLO, LO(RA)
3165 | ins_next2 4010 | ins_next2
3166 | 4011 |
3167 |4: // Follow hash chain. 4012 |4: // Follow hash chain.
@@ -3171,7 +4016,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3171 | 4016 |
3172 |5: // Check for __index if table value is nil. 4017 |5: // Check for __index if table value is nil.
3173 | beqz TAB:TMP3, <3 // No metatable: done. 4018 | beqz TAB:TMP3, <3 // No metatable: done.
3174 |. li CARG2, LJ_TNIL 4019 |. li SFRETHI, LJ_TNIL
3175 | lbu TMP0, TAB:TMP3->nomm 4020 | lbu TMP0, TAB:TMP3->nomm
3176 | andi TMP0, TMP0, 1<<MM_index 4021 | andi TMP0, TMP0, 1<<MM_index
3177 | bnez TMP0, <3 // 'no __index' flag set: done. 4022 | bnez TMP0, <3 // 'no __index' flag set: done.
@@ -3196,12 +4041,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3196 | sltu AT, TMP0, TMP1 4041 | sltu AT, TMP0, TMP1
3197 | beqz AT, ->vmeta_tgetb 4042 | beqz AT, ->vmeta_tgetb
3198 |. addu RC, TMP2, RC 4043 |. addu RC, TMP2, RC
3199 | lw TMP1, HI(RC) 4044 | lw SFRETHI, HI(RC)
3200 | beq TMP1, TISNIL, >5 4045 | beq SFRETHI, TISNIL, >5
3201 |. ldc1 f0, 0(RC) 4046 |. lw SFRETLO, LO(RC)
3202 |1: 4047 |1:
3203 | ins_next1 4048 | ins_next1
3204 | sdc1 f0, 0(RA) 4049 | sw SFRETHI, HI(RA)
4050 | sw SFRETLO, LO(RA)
3205 | ins_next2 4051 | ins_next2
3206 | 4052 |
3207 |5: // Check for __index if table value is nil. 4053 |5: // Check for __index if table value is nil.
@@ -3212,9 +4058,33 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3212 | andi TMP1, TMP1, 1<<MM_index 4058 | andi TMP1, TMP1, 1<<MM_index
3213 | bnez TMP1, <1 // 'no __index' flag set: done. 4059 | bnez TMP1, <1 // 'no __index' flag set: done.
3214 |. nop 4060 |. nop
3215 | b ->vmeta_tgetb // Caveat: preserve TMP0! 4061 | b ->vmeta_tgetb // Caveat: preserve TMP0 and CARG2!
3216 |. nop 4062 |. nop
3217 break; 4063 break;
4064 case BC_TGETR:
4065 | // RA = dst*8, RB = table*8, RC = key*8
4066 | decode_RB8a RB, INS
4067 | decode_RB8b RB
4068 | decode_RDtoRC8 RC, RD
4069 | addu RB, BASE, RB
4070 | addu RC, BASE, RC
4071 | lw TAB:CARG1, LO(RB)
4072 | lw CARG2, LO(RC)
4073 | addu RA, BASE, RA
4074 | lw TMP0, TAB:CARG1->asize
4075 | lw TMP1, TAB:CARG1->array
4076 | sltu AT, CARG2, TMP0
4077 | sll TMP2, CARG2, 3
4078 | beqz AT, ->vmeta_tgetr // In array part?
4079 |. addu CRET1, TMP1, TMP2
4080 | lw SFARG2HI, HI(CRET1)
4081 | lw SFARG2LO, LO(CRET1)
4082 |->BC_TGETR_Z:
4083 | ins_next1
4084 | sw SFARG2HI, HI(RA)
4085 | sw SFARG2LO, LO(RA)
4086 | ins_next2
4087 break;
3218 4088
3219 case BC_TSETV: 4089 case BC_TSETV:
3220 | // RA = src*8, RB = table*8, RC = key*8 4090 | // RA = src*8, RB = table*8, RC = key*8
@@ -3227,33 +4097,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3227 | lw TMP2, HI(CARG3) 4097 | lw TMP2, HI(CARG3)
3228 | lw TAB:RB, LO(CARG2) 4098 | lw TAB:RB, LO(CARG2)
3229 | li AT, LJ_TTAB 4099 | li AT, LJ_TTAB
3230 | ldc1 f0, 0(CARG3)
3231 | bne TMP1, AT, ->vmeta_tsetv 4100 | bne TMP1, AT, ->vmeta_tsetv
3232 |. addu RA, BASE, RA 4101 |. addu RA, BASE, RA
3233 | sltiu AT, TMP2, LJ_TISNUM 4102 | bne TMP2, TISNUM, >5
3234 | beqz AT, >5 4103 |. lw RC, LO(CARG3)
3235 |. li AT, LJ_TSTR 4104 | lw TMP0, TAB:RB->asize
3236 |
3237 | // Convert number key to integer, check for integerness and range.
3238 | cvt.w.d f2, f0
3239 | lw TMP0, TAB:RB->asize
3240 | mfc1 TMP2, f2
3241 | cvt.d.w f4, f2
3242 | lw TMP1, TAB:RB->array 4105 | lw TMP1, TAB:RB->array
3243 | c.eq.d f0, f4 4106 | sltu AT, RC, TMP0
3244 | sltu AT, TMP2, TMP0 4107 | sll TMP2, RC, 3
3245 | movf AT, r0
3246 | sll TMP2, TMP2, 3
3247 | beqz AT, ->vmeta_tsetv // Integer key and in array part? 4108 | beqz AT, ->vmeta_tsetv // Integer key and in array part?
3248 |. addu TMP1, TMP1, TMP2 4109 |. addu TMP1, TMP1, TMP2
3249 | lbu TMP3, TAB:RB->marked
3250 | lw TMP0, HI(TMP1) 4110 | lw TMP0, HI(TMP1)
4111 | lbu TMP3, TAB:RB->marked
4112 | lw SFRETHI, HI(RA)
3251 | beq TMP0, TISNIL, >3 4113 | beq TMP0, TISNIL, >3
3252 |. ldc1 f0, 0(RA) 4114 |. lw SFRETLO, LO(RA)
3253 |1: 4115 |1:
3254 | andi AT, TMP3, LJ_GC_BLACK // isblack(table) 4116 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
3255 | bnez AT, >7 4117 | sw SFRETHI, HI(TMP1)
3256 |. sdc1 f0, 0(TMP1) 4118 | bnez AT, >7
4119 |. sw SFRETLO, LO(TMP1)
3257 |2: 4120 |2:
3258 | ins_next 4121 | ins_next
3259 | 4122 |
@@ -3269,8 +4132,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3269 |. nop 4132 |. nop
3270 | 4133 |
3271 |5: 4134 |5:
4135 | li AT, LJ_TSTR
3272 | bne TMP2, AT, ->vmeta_tsetv 4136 | bne TMP2, AT, ->vmeta_tsetv
3273 |. lw STR:RC, LO(CARG3) 4137 |. nop
3274 | b ->BC_TSETS_Z // String key? 4138 | b ->BC_TSETS_Z // String key?
3275 |. nop 4139 |. nop
3276 | 4140 |
@@ -3302,7 +4166,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3302 | sll TMP1, TMP1, 3 4166 | sll TMP1, TMP1, 3
3303 | subu TMP1, TMP0, TMP1 4167 | subu TMP1, TMP0, TMP1
3304 | addu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) 4168 | addu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
4169 |.if FPU
3305 | ldc1 f20, 0(RA) 4170 | ldc1 f20, 0(RA)
4171 |.else
4172 | lw SFRETHI, HI(RA)
4173 | lw SFRETLO, LO(RA)
4174 |.endif
3306 |1: 4175 |1:
3307 | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2) 4176 | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2)
3308 | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) 4177 | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2)
@@ -3316,8 +4185,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3316 |. lw TAB:TMP0, TAB:RB->metatable 4185 |. lw TAB:TMP0, TAB:RB->metatable
3317 |2: 4186 |2:
3318 | andi AT, TMP3, LJ_GC_BLACK // isblack(table) 4187 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4188 |.if FPU
3319 | bnez AT, >7 4189 | bnez AT, >7
3320 |. sdc1 f20, NODE:TMP2->val 4190 |. sdc1 f20, NODE:TMP2->val
4191 |.else
4192 | sw SFRETHI, NODE:TMP2->val.u32.hi
4193 | bnez AT, >7
4194 |. sw SFRETLO, NODE:TMP2->val.u32.lo
4195 |.endif
3321 |3: 4196 |3:
3322 | ins_next 4197 | ins_next
3323 | 4198 |
@@ -3355,8 +4230,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3355 |. move CARG1, L 4230 |. move CARG1, L
3356 | // Returns TValue *. 4231 | // Returns TValue *.
3357 | lw BASE, L->base 4232 | lw BASE, L->base
4233 |.if FPU
3358 | b <3 // No 2nd write barrier needed. 4234 | b <3 // No 2nd write barrier needed.
3359 |. sdc1 f20, 0(CRET1) 4235 |. sdc1 f20, 0(CRET1)
4236 |.else
4237 | lw SFARG1HI, HI(RA)
4238 | lw SFARG1LO, LO(RA)
4239 | sw SFARG1HI, HI(CRET1)
4240 | b <3 // No 2nd write barrier needed.
4241 |. sw SFARG1LO, LO(CRET1)
4242 |.endif
3360 | 4243 |
3361 |7: // Possible table write barrier for the value. Skip valiswhite check. 4244 |7: // Possible table write barrier for the value. Skip valiswhite check.
3362 | barrierback TAB:RB, TMP3, TMP0, <3 4245 | barrierback TAB:RB, TMP3, TMP0, <3
@@ -3381,11 +4264,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3381 | lw TMP1, HI(RC) 4264 | lw TMP1, HI(RC)
3382 | lbu TMP3, TAB:RB->marked 4265 | lbu TMP3, TAB:RB->marked
3383 | beq TMP1, TISNIL, >5 4266 | beq TMP1, TISNIL, >5
3384 |. ldc1 f0, 0(RA)
3385 |1: 4267 |1:
4268 |. lw SFRETHI, HI(RA)
4269 | lw SFRETLO, LO(RA)
3386 | andi AT, TMP3, LJ_GC_BLACK // isblack(table) 4270 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4271 | sw SFRETHI, HI(RC)
3387 | bnez AT, >7 4272 | bnez AT, >7
3388 |. sdc1 f0, 0(RC) 4273 |. sw SFRETLO, LO(RC)
3389 |2: 4274 |2:
3390 | ins_next 4275 | ins_next
3391 | 4276 |
@@ -3397,12 +4282,43 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3397 | andi TMP1, TMP1, 1<<MM_newindex 4282 | andi TMP1, TMP1, 1<<MM_newindex
3398 | bnez TMP1, <1 // 'no __newindex' flag set: done. 4283 | bnez TMP1, <1 // 'no __newindex' flag set: done.
3399 |. nop 4284 |. nop
3400 | b ->vmeta_tsetb // Caveat: preserve TMP0! 4285 | b ->vmeta_tsetb // Caveat: preserve TMP0 and CARG2!
3401 |. nop 4286 |. nop
3402 | 4287 |
3403 |7: // Possible table write barrier for the value. Skip valiswhite check. 4288 |7: // Possible table write barrier for the value. Skip valiswhite check.
3404 | barrierback TAB:RB, TMP3, TMP0, <2 4289 | barrierback TAB:RB, TMP3, TMP0, <2
3405 break; 4290 break;
4291 case BC_TSETR:
4292 | // RA = dst*8, RB = table*8, RC = key*8
4293 | decode_RB8a RB, INS
4294 | decode_RB8b RB
4295 | decode_RDtoRC8 RC, RD
4296 | addu CARG1, BASE, RB
4297 | addu CARG3, BASE, RC
4298 | lw TAB:CARG2, LO(CARG1)
4299 | lw CARG3, LO(CARG3)
4300 | lbu TMP3, TAB:CARG2->marked
4301 | lw TMP0, TAB:CARG2->asize
4302 | lw TMP1, TAB:CARG2->array
4303 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4304 | bnez AT, >7
4305 |. addu RA, BASE, RA
4306 |2:
4307 | sltu AT, CARG3, TMP0
4308 | sll TMP2, CARG3, 3
4309 | beqz AT, ->vmeta_tsetr // In array part?
4310 |. addu CRET1, TMP1, TMP2
4311 |->BC_TSETR_Z:
4312 | lw SFARG1HI, HI(RA)
4313 | lw SFARG1LO, LO(RA)
4314 | ins_next1
4315 | sw SFARG1HI, HI(CRET1)
4316 | sw SFARG1LO, LO(CRET1)
4317 | ins_next2
4318 |
4319 |7: // Possible table write barrier for the value. Skip valiswhite check.
4320 | barrierback TAB:CARG2, TMP3, CRET1, <2
4321 break;
3406 4322
3407 case BC_TSETM: 4323 case BC_TSETM:
3408 | // RA = base*8 (table at base-1), RD = num_const*8 (start index) 4324 | // RA = base*8 (table at base-1), RD = num_const*8 (start index)
@@ -3425,10 +4341,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3425 | addu TMP1, TMP1, CARG1 4341 | addu TMP1, TMP1, CARG1
3426 | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table) 4342 | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table)
3427 |3: // Copy result slots to table. 4343 |3: // Copy result slots to table.
3428 | ldc1 f0, 0(RA) 4344 | lw SFRETHI, HI(RA)
4345 | lw SFRETLO, LO(RA)
3429 | addiu RA, RA, 8 4346 | addiu RA, RA, 8
3430 | sltu AT, RA, TMP2 4347 | sltu AT, RA, TMP2
3431 | sdc1 f0, 0(TMP1) 4348 | sw SFRETHI, HI(TMP1)
4349 | sw SFRETLO, LO(TMP1)
3432 | bnez AT, <3 4350 | bnez AT, <3
3433 |. addiu TMP1, TMP1, 8 4351 |. addiu TMP1, TMP1, 8
3434 | bnez TMP0, >7 4352 | bnez TMP0, >7
@@ -3503,10 +4421,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3503 | beqz NARGS8:RC, >3 4421 | beqz NARGS8:RC, >3
3504 |. move TMP3, NARGS8:RC 4422 |. move TMP3, NARGS8:RC
3505 |2: 4423 |2:
3506 | ldc1 f0, 0(RA) 4424 | lw SFRETHI, HI(RA)
4425 | lw SFRETLO, LO(RA)
3507 | addiu RA, RA, 8 4426 | addiu RA, RA, 8
3508 | addiu TMP3, TMP3, -8 4427 | addiu TMP3, TMP3, -8
3509 | sdc1 f0, 0(TMP2) 4428 | sw SFRETHI, HI(TMP2)
4429 | sw SFRETLO, LO(TMP2)
3510 | bnez TMP3, <2 4430 | bnez TMP3, <2
3511 |. addiu TMP2, TMP2, 8 4431 |. addiu TMP2, TMP2, 8
3512 |3: 4432 |3:
@@ -3543,12 +4463,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3543 | li AT, LJ_TFUNC 4463 | li AT, LJ_TFUNC
3544 | lw TMP1, -24+HI(BASE) 4464 | lw TMP1, -24+HI(BASE)
3545 | lw LFUNC:RB, -24+LO(BASE) 4465 | lw LFUNC:RB, -24+LO(BASE)
3546 | ldc1 f2, -8(BASE) 4466 | lw SFARG1HI, -16+HI(BASE)
3547 | ldc1 f0, -16(BASE) 4467 | lw SFARG1LO, -16+LO(BASE)
4468 | lw SFARG2HI, -8+HI(BASE)
4469 | lw SFARG2LO, -8+LO(BASE)
3548 | sw TMP1, HI(BASE) // Copy callable. 4470 | sw TMP1, HI(BASE) // Copy callable.
3549 | sw LFUNC:RB, LO(BASE) 4471 | sw LFUNC:RB, LO(BASE)
3550 | sdc1 f2, 16(BASE) // Copy control var. 4472 | sw SFARG1HI, 8+HI(BASE) // Copy state.
3551 | sdc1 f0, 8(BASE) // Copy state. 4473 | sw SFARG1LO, 8+LO(BASE)
4474 | sw SFARG2HI, 16+HI(BASE) // Copy control var.
4475 | sw SFARG2LO, 16+LO(BASE)
3552 | addiu BASE, BASE, 8 4476 | addiu BASE, BASE, 8
3553 | bne TMP1, AT, ->vmeta_call 4477 | bne TMP1, AT, ->vmeta_call
3554 |. li NARGS8:RC, 16 // Iterators get 2 arguments. 4478 |. li NARGS8:RC, 16 // Iterators get 2 arguments.
@@ -3571,20 +4495,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3571 | beqz AT, >5 // Index points after array part? 4495 | beqz AT, >5 // Index points after array part?
3572 |. sll TMP3, RC, 3 4496 |. sll TMP3, RC, 3
3573 | addu TMP3, TMP1, TMP3 4497 | addu TMP3, TMP1, TMP3
3574 | lw TMP2, HI(TMP3) 4498 | lw SFARG1HI, HI(TMP3)
3575 | ldc1 f0, 0(TMP3) 4499 | lw SFARG1LO, LO(TMP3)
3576 | mtc1 RC, f2
3577 | lhu RD, -4+OFS_RD(PC) 4500 | lhu RD, -4+OFS_RD(PC)
3578 | beq TMP2, TISNIL, <1 // Skip holes in array part. 4501 | sw TISNUM, HI(RA)
4502 | sw RC, LO(RA)
4503 | beq SFARG1HI, TISNIL, <1 // Skip holes in array part.
3579 |. addiu RC, RC, 1 4504 |. addiu RC, RC, 1
3580 | cvt.d.w f2, f2 4505 | sw SFARG1HI, 8+HI(RA)
4506 | sw SFARG1LO, 8+LO(RA)
3581 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 4507 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3582 | sdc1 f0, 8(RA)
3583 | decode_RD4b RD 4508 | decode_RD4b RD
3584 | addu RD, RD, TMP3 4509 | addu RD, RD, TMP3
3585 | sw RC, -8+LO(RA) // Update control var. 4510 | sw RC, -8+LO(RA) // Update control var.
3586 | addu PC, PC, RD 4511 | addu PC, PC, RD
3587 | sdc1 f2, 0(RA)
3588 |3: 4512 |3:
3589 | ins_next 4513 | ins_next
3590 | 4514 |
@@ -3599,18 +4523,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3599 | sll RB, RC, 3 4523 | sll RB, RC, 3
3600 | subu TMP3, TMP3, RB 4524 | subu TMP3, TMP3, RB
3601 | addu NODE:TMP3, TMP3, TMP2 4525 | addu NODE:TMP3, TMP3, TMP2
3602 | lw RB, HI(NODE:TMP3) 4526 | lw SFARG1HI, NODE:TMP3->val.u32.hi
3603 | ldc1 f0, 0(NODE:TMP3) 4527 | lw SFARG1LO, NODE:TMP3->val.u32.lo
3604 | lhu RD, -4+OFS_RD(PC) 4528 | lhu RD, -4+OFS_RD(PC)
3605 | beq RB, TISNIL, <6 // Skip holes in hash part. 4529 | beq SFARG1HI, TISNIL, <6 // Skip holes in hash part.
3606 |. addiu RC, RC, 1 4530 |. addiu RC, RC, 1
3607 | ldc1 f2, NODE:TMP3->key 4531 | lw SFARG2HI, NODE:TMP3->key.u32.hi
4532 | lw SFARG2LO, NODE:TMP3->key.u32.lo
3608 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) 4533 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3609 | sdc1 f0, 8(RA) 4534 | sw SFARG1HI, 8+HI(RA)
4535 | sw SFARG1LO, 8+LO(RA)
3610 | addu RC, RC, TMP0 4536 | addu RC, RC, TMP0
3611 | decode_RD4b RD 4537 | decode_RD4b RD
3612 | addu RD, RD, TMP3 4538 | addu RD, RD, TMP3
3613 | sdc1 f2, 0(RA) 4539 | sw SFARG2HI, HI(RA)
4540 | sw SFARG2LO, LO(RA)
3614 | addu PC, PC, RD 4541 | addu PC, PC, RD
3615 | b <3 4542 | b <3
3616 |. sw RC, -8+LO(RA) // Update control var. 4543 |. sw RC, -8+LO(RA) // Update control var.
@@ -3690,9 +4617,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3690 | bnez AT, >7 4617 | bnez AT, >7
3691 |. addiu MULTRES, TMP1, 8 4618 |. addiu MULTRES, TMP1, 8
3692 |6: 4619 |6:
3693 | ldc1 f0, 0(RC) 4620 | lw SFRETHI, HI(RC)
4621 | lw SFRETLO, LO(RC)
3694 | addiu RC, RC, 8 4622 | addiu RC, RC, 8
3695 | sdc1 f0, 0(RA) 4623 | sw SFRETHI, HI(RA)
4624 | sw SFRETLO, LO(RA)
3696 | sltu AT, RC, TMP3 4625 | sltu AT, RC, TMP3
3697 | bnez AT, <6 // More vararg slots? 4626 | bnez AT, <6 // More vararg slots?
3698 |. addiu RA, RA, 8 4627 |. addiu RA, RA, 8
@@ -3748,10 +4677,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3748 | beqz RC, >3 4677 | beqz RC, >3
3749 |. subu BASE, TMP2, TMP0 4678 |. subu BASE, TMP2, TMP0
3750 |2: 4679 |2:
3751 | ldc1 f0, 0(RA) 4680 | lw SFRETHI, HI(RA)
4681 | lw SFRETLO, LO(RA)
3752 | addiu RA, RA, 8 4682 | addiu RA, RA, 8
3753 | addiu RC, RC, -8 4683 | addiu RC, RC, -8
3754 | sdc1 f0, 0(TMP2) 4684 | sw SFRETHI, HI(TMP2)
4685 | sw SFRETLO, LO(TMP2)
3755 | bnez RC, <2 4686 | bnez RC, <2
3756 |. addiu TMP2, TMP2, 8 4687 |. addiu TMP2, TMP2, 8
3757 |3: 4688 |3:
@@ -3792,14 +4723,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3792 | lw INS, -4(PC) 4723 | lw INS, -4(PC)
3793 | addiu TMP2, BASE, -8 4724 | addiu TMP2, BASE, -8
3794 if (op == BC_RET1) { 4725 if (op == BC_RET1) {
3795 | ldc1 f0, 0(RA) 4726 | lw SFRETHI, HI(RA)
4727 | lw SFRETLO, LO(RA)
3796 } 4728 }
3797 | decode_RB8a RB, INS 4729 | decode_RB8a RB, INS
3798 | decode_RA8a RA, INS 4730 | decode_RA8a RA, INS
3799 | decode_RB8b RB 4731 | decode_RB8b RB
3800 | decode_RA8b RA 4732 | decode_RA8b RA
3801 if (op == BC_RET1) { 4733 if (op == BC_RET1) {
3802 | sdc1 f0, 0(TMP2) 4734 | sw SFRETHI, HI(TMP2)
4735 | sw SFRETLO, LO(TMP2)
3803 } 4736 }
3804 | subu BASE, TMP2, RA 4737 | subu BASE, TMP2, RA
3805 |5: 4738 |5:
@@ -3841,69 +4774,147 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3841 | // RA = base*8, RD = target (after end of loop or start of loop) 4774 | // RA = base*8, RD = target (after end of loop or start of loop)
3842 vk = (op == BC_IFORL || op == BC_JFORL); 4775 vk = (op == BC_IFORL || op == BC_JFORL);
3843 | addu RA, BASE, RA 4776 | addu RA, BASE, RA
3844 if (vk) { 4777 | lw SFARG1HI, FORL_IDX*8+HI(RA)
3845 | ldc1 f0, FORL_IDX*8(RA) 4778 | lw SFARG1LO, FORL_IDX*8+LO(RA)
3846 | ldc1 f4, FORL_STEP*8(RA)
3847 | ldc1 f2, FORL_STOP*8(RA)
3848 | lw TMP3, FORL_STEP*8+HI(RA)
3849 | add.d f0, f0, f4
3850 | sdc1 f0, FORL_IDX*8(RA)
3851 } else {
3852 | lw TMP1, FORL_IDX*8+HI(RA)
3853 | lw TMP3, FORL_STEP*8+HI(RA)
3854 | lw TMP2, FORL_STOP*8+HI(RA)
3855 | sltiu TMP1, TMP1, LJ_TISNUM
3856 | sltiu TMP0, TMP3, LJ_TISNUM
3857 | sltiu TMP2, TMP2, LJ_TISNUM
3858 | and TMP1, TMP1, TMP0
3859 | and TMP1, TMP1, TMP2
3860 | ldc1 f0, FORL_IDX*8(RA)
3861 | beqz TMP1, ->vmeta_for
3862 |. ldc1 f2, FORL_STOP*8(RA)
3863 }
3864 if (op != BC_JFORL) { 4779 if (op != BC_JFORL) {
3865 | srl RD, RD, 1 4780 | srl RD, RD, 1
3866 | lui TMP0, (-(BCBIAS_J*4 >> 16) & 65535) 4781 | lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535)
4782 | addu TMP2, RD, TMP2
4783 }
4784 if (!vk) {
4785 | lw SFARG2HI, FORL_STOP*8+HI(RA)
4786 | lw SFARG2LO, FORL_STOP*8+LO(RA)
4787 | bne SFARG1HI, TISNUM, >5
4788 |. lw SFRETHI, FORL_STEP*8+HI(RA)
4789 | xor AT, SFARG2HI, TISNUM
4790 | lw SFRETLO, FORL_STEP*8+LO(RA)
4791 | xor TMP0, SFRETHI, TISNUM
4792 | or AT, AT, TMP0
4793 | bnez AT, ->vmeta_for
4794 |. slt AT, SFRETLO, r0
4795 | slt CRET1, SFARG2LO, SFARG1LO
4796 | slt TMP1, SFARG1LO, SFARG2LO
4797 | movn CRET1, TMP1, AT
4798 } else {
4799 | bne SFARG1HI, TISNUM, >5
4800 |. lw SFARG2LO, FORL_STEP*8+LO(RA)
4801 | lw SFRETLO, FORL_STOP*8+LO(RA)
4802 | move TMP3, SFARG1LO
4803 | addu SFARG1LO, SFARG1LO, SFARG2LO
4804 | xor TMP0, SFARG1LO, TMP3
4805 | xor TMP1, SFARG1LO, SFARG2LO
4806 | and TMP0, TMP0, TMP1
4807 | slt TMP1, SFARG1LO, SFRETLO
4808 | slt CRET1, SFRETLO, SFARG1LO
4809 | slt AT, SFARG2LO, r0
4810 | slt TMP0, TMP0, r0 // ((y^a) & (y^b)) < 0: overflow.
4811 | movn CRET1, TMP1, AT
4812 | or CRET1, CRET1, TMP0
4813 }
4814 |1:
4815 if (op == BC_FORI) {
4816 | movz TMP2, r0, CRET1
4817 | addu PC, PC, TMP2
4818 } else if (op == BC_JFORI) {
4819 | addu PC, PC, TMP2
4820 | lhu RD, -4+OFS_RD(PC)
4821 } else if (op == BC_IFORL) {
4822 | movn TMP2, r0, CRET1
4823 | addu PC, PC, TMP2
3867 } 4824 }
3868 | c.le.d 0, f0, f2 4825 if (vk) {
3869 | c.le.d 1, f2, f0 4826 | sw SFARG1HI, FORL_IDX*8+HI(RA)
3870 | sdc1 f0, FORL_EXT*8(RA) 4827 | sw SFARG1LO, FORL_IDX*8+LO(RA)
4828 }
4829 | ins_next1
4830 | sw SFARG1HI, FORL_EXT*8+HI(RA)
4831 | sw SFARG1LO, FORL_EXT*8+LO(RA)
4832 |2:
3871 if (op == BC_JFORI) { 4833 if (op == BC_JFORI) {
3872 | li TMP1, 1 4834 | beqz CRET1, =>BC_JLOOP
3873 | li TMP2, 1
3874 | addu TMP0, RD, TMP0
3875 | slt TMP3, TMP3, r0
3876 | movf TMP1, r0, 0
3877 | addu PC, PC, TMP0
3878 | movf TMP2, r0, 1
3879 | lhu RD, -4+OFS_RD(PC)
3880 | movn TMP1, TMP2, TMP3
3881 | bnez TMP1, =>BC_JLOOP
3882 |. decode_RD8b RD 4835 |. decode_RD8b RD
3883 } else if (op == BC_JFORL) { 4836 } else if (op == BC_JFORL) {
3884 | li TMP1, 1 4837 | beqz CRET1, =>BC_JLOOP
3885 | li TMP2, 1 4838 }
3886 | slt TMP3, TMP3, r0 4839 | ins_next2
3887 | movf TMP1, r0, 0 4840 |
3888 | movf TMP2, r0, 1 4841 |5: // FP loop.
3889 | movn TMP1, TMP2, TMP3 4842 |.if FPU
3890 | bnez TMP1, =>BC_JLOOP 4843 if (!vk) {
4844 | ldc1 f0, FORL_IDX*8(RA)
4845 | ldc1 f2, FORL_STOP*8(RA)
4846 | sltiu TMP0, SFARG1HI, LJ_TISNUM
4847 | sltiu TMP1, SFARG2HI, LJ_TISNUM
4848 | sltiu AT, SFRETHI, LJ_TISNUM
4849 | and TMP0, TMP0, TMP1
4850 | and AT, AT, TMP0
4851 | beqz AT, ->vmeta_for
4852 |. slt TMP3, SFRETHI, r0
4853 | c.ole.d 0, f0, f2
4854 | c.ole.d 1, f2, f0
4855 | li CRET1, 1
4856 | movt CRET1, r0, 0
4857 | movt AT, r0, 1
4858 | b <1
4859 |. movn CRET1, AT, TMP3
4860 } else {
4861 | ldc1 f0, FORL_IDX*8(RA)
4862 | ldc1 f4, FORL_STEP*8(RA)
4863 | ldc1 f2, FORL_STOP*8(RA)
4864 | lw SFARG2HI, FORL_STEP*8+HI(RA)
4865 | add.d f0, f0, f4
4866 | c.ole.d 0, f0, f2
4867 | c.ole.d 1, f2, f0
4868 | slt TMP3, SFARG2HI, r0
4869 | li CRET1, 1
4870 | li AT, 1
4871 | movt CRET1, r0, 0
4872 | movt AT, r0, 1
4873 | movn CRET1, AT, TMP3
4874 if (op == BC_IFORL) {
4875 | movn TMP2, r0, CRET1
4876 | addu PC, PC, TMP2
4877 }
4878 | sdc1 f0, FORL_IDX*8(RA)
4879 | ins_next1
4880 | b <2
4881 |. sdc1 f0, FORL_EXT*8(RA)
4882 }
4883 |.else
4884 if (!vk) {
4885 | sltiu TMP0, SFARG1HI, LJ_TISNUM
4886 | sltiu TMP1, SFARG2HI, LJ_TISNUM
4887 | sltiu AT, SFRETHI, LJ_TISNUM
4888 | and TMP0, TMP0, TMP1
4889 | and AT, AT, TMP0
4890 | beqz AT, ->vmeta_for
4891 |. nop
4892 | bal ->vm_sfcmpolex
4893 |. move TMP3, SFRETHI
4894 | b <1
3891 |. nop 4895 |. nop
3892 } else { 4896 } else {
3893 | addu TMP1, RD, TMP0 4897 | lw SFARG2HI, FORL_STEP*8+HI(RA)
3894 | slt TMP3, TMP3, r0 4898 | load_got __adddf3
3895 | move TMP2, TMP1 4899 | call_extern
3896 if (op == BC_FORI) { 4900 |. sw TMP2, ARG5
3897 | movt TMP1, r0, 0 4901 | lw SFARG2HI, FORL_STOP*8+HI(RA)
3898 | movt TMP2, r0, 1 4902 | lw SFARG2LO, FORL_STOP*8+LO(RA)
4903 | move SFARG1HI, SFRETHI
4904 | move SFARG1LO, SFRETLO
4905 | bal ->vm_sfcmpolex
4906 |. lw TMP3, FORL_STEP*8+HI(RA)
4907 if ( op == BC_JFORL ) {
4908 | lhu RD, -4+OFS_RD(PC)
4909 | lw TMP2, ARG5
4910 | b <1
4911 |. decode_RD8b RD
3899 } else { 4912 } else {
3900 | movf TMP1, r0, 0 4913 | b <1
3901 | movf TMP2, r0, 1 4914 |. lw TMP2, ARG5
3902 } 4915 }
3903 | movn TMP1, TMP2, TMP3
3904 | addu PC, PC, TMP1
3905 } 4916 }
3906 | ins_next 4917 |.endif
3907 break; 4918 break;
3908 4919
3909 case BC_ITERL: 4920 case BC_ITERL:
@@ -3962,8 +4973,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3962 | sw AT, DISPATCH_GL(vmstate)(DISPATCH) 4973 | sw AT, DISPATCH_GL(vmstate)(DISPATCH)
3963 | lw TRACE:TMP2, 0(TMP1) 4974 | lw TRACE:TMP2, 0(TMP1)
3964 | sw BASE, DISPATCH_GL(jit_base)(DISPATCH) 4975 | sw BASE, DISPATCH_GL(jit_base)(DISPATCH)
3965 | sw L, DISPATCH_GL(jit_L)(DISPATCH)
3966 | lw TMP2, TRACE:TMP2->mcode 4976 | lw TMP2, TRACE:TMP2->mcode
4977 | sw L, DISPATCH_GL(tmpbuf.L)(DISPATCH)
3967 | jr TMP2 4978 | jr TMP2
3968 |. addiu JGL, DISPATCH, GG_DISP2G+32768 4979 |. addiu JGL, DISPATCH, GG_DISP2G+32768
3969 |.endif 4980 |.endif
@@ -4089,6 +5100,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4089 | li_vmstate INTERP 5100 | li_vmstate INTERP
4090 | lw PC, FRAME_PC(BASE) // Fetch PC of caller. 5101 | lw PC, FRAME_PC(BASE) // Fetch PC of caller.
4091 | subu RA, TMP1, RD // RA = L->top - nresults*8 5102 | subu RA, TMP1, RD // RA = L->top - nresults*8
5103 | sw L, DISPATCH_GL(cur_L)(DISPATCH)
4092 | b ->vm_returnc 5104 | b ->vm_returnc
4093 |. st_vmstate 5105 |. st_vmstate
4094 break; 5106 break;
@@ -4151,8 +5163,10 @@ static void emit_asm_debug(BuildCtx *ctx)
4151 fcofs, CFRAME_SIZE); 5163 fcofs, CFRAME_SIZE);
4152 for (i = 23; i >= 16; i--) 5164 for (i = 23; i >= 16; i--)
4153 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i); 5165 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i);
5166#if !LJ_SOFTFP
4154 for (i = 30; i >= 20; i -= 2) 5167 for (i = 30; i >= 20; i -= 2)
4155 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i); 5168 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i);
5169#endif
4156 fprintf(ctx->fp, 5170 fprintf(ctx->fp,
4157 "\t.align 2\n" 5171 "\t.align 2\n"
4158 ".LEFDE0:\n\n"); 5172 ".LEFDE0:\n\n");
@@ -4204,8 +5218,10 @@ static void emit_asm_debug(BuildCtx *ctx)
4204 fcofs, CFRAME_SIZE); 5218 fcofs, CFRAME_SIZE);
4205 for (i = 23; i >= 16; i--) 5219 for (i = 23; i >= 16; i--)
4206 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i); 5220 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i);
5221#if !LJ_SOFTFP
4207 for (i = 30; i >= 20; i -= 2) 5222 for (i = 30; i >= 20; i -= 2)
4208 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i); 5223 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i);
5224#endif
4209 fprintf(ctx->fp, 5225 fprintf(ctx->fp,
4210 "\t.align 2\n" 5226 "\t.align 2\n"
4211 ".LEFDE2:\n\n"); 5227 ".LEFDE2:\n\n");
diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc
new file mode 100644
index 00000000..4ae19b7d
--- /dev/null
+++ b/src/vm_mips64.dasc
@@ -0,0 +1,5424 @@
1|// Low-level VM code for MIPS64 CPUs.
2|// Bytecode interpreter, fast functions and helper functions.
3|// Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h
4|//
5|// Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
6|// Sponsored by Cisco Systems, Inc.
7|
8|.arch mips64
9|.section code_op, code_sub
10|
11|.actionlist build_actionlist
12|.globals GLOB_
13|.globalnames globnames
14|.externnames extnames
15|
16|// Note: The ragged indentation of the instructions is intentional.
17|// The starting columns indicate data dependencies.
18|
19|//-----------------------------------------------------------------------
20|
21|// Fixed register assignments for the interpreter.
22|// Don't use: r0 = 0, r26/r27 = reserved, r28 = gp, r29 = sp, r31 = ra
23|
24|.macro .FPU, a, b
25|.if FPU
26| a, b
27|.endif
28|.endmacro
29|
30|// The following must be C callee-save (but BASE is often refetched).
31|.define BASE, r16 // Base of current Lua stack frame.
32|.define KBASE, r17 // Constants of current Lua function.
33|.define PC, r18 // Next PC.
34|.define DISPATCH, r19 // Opcode dispatch table.
35|.define LREG, r20 // Register holding lua_State (also in SAVE_L).
36|.define MULTRES, r21 // Size of multi-result: (nresults+1)*8.
37|
38|.define JGL, r30 // On-trace: global_State + 32768.
39|
40|// Constants for type-comparisons, stores and conversions. C callee-save.
41|.define TISNIL, r30
42|.define TISNUM, r22
43|.if FPU
44|.define TOBIT, f30 // 2^52 + 2^51.
45|.endif
46|
47|// The following temporaries are not saved across C calls, except for RA.
48|.define RA, r23 // Callee-save.
49|.define RB, r8
50|.define RC, r9
51|.define RD, r10
52|.define INS, r11
53|
54|.define AT, r1 // Assembler temporary.
55|.define TMP0, r12
56|.define TMP1, r13
57|.define TMP2, r14
58|.define TMP3, r15
59|
60|// MIPS n64 calling convention.
61|.define CFUNCADDR, r25
62|.define CARG1, r4
63|.define CARG2, r5
64|.define CARG3, r6
65|.define CARG4, r7
66|.define CARG5, r8
67|.define CARG6, r9
68|.define CARG7, r10
69|.define CARG8, r11
70|
71|.define CRET1, r2
72|.define CRET2, r3
73|
74|.if FPU
75|.define FARG1, f12
76|.define FARG2, f13
77|.define FARG3, f14
78|.define FARG4, f15
79|.define FARG5, f16
80|.define FARG6, f17
81|.define FARG7, f18
82|.define FARG8, f19
83|
84|.define FRET1, f0
85|.define FRET2, f2
86|
87|.define FTMP0, f20
88|.define FTMP1, f21
89|.define FTMP2, f22
90|.endif
91|
92|// Stack layout while in interpreter. Must match with lj_frame.h.
93|.if FPU // MIPS64 hard-float.
94|
95|.define CFRAME_SPACE, 192 // Delta for sp.
96|
97|//----- 16 byte aligned, <-- sp entering interpreter
98|.define SAVE_ERRF, 188(sp) // 32 bit values.
99|.define SAVE_NRES, 184(sp)
100|.define SAVE_CFRAME, 176(sp) // 64 bit values.
101|.define SAVE_L, 168(sp)
102|.define SAVE_PC, 160(sp)
103|//----- 16 byte aligned
104|.define SAVE_GPR_, 80 // .. 80+10*8: 64 bit GPR saves.
105|.define SAVE_FPR_, 16 // .. 16+8*8: 64 bit FPR saves.
106|
107|.else // MIPS64 soft-float
108|
109|.define CFRAME_SPACE, 128 // Delta for sp.
110|
111|//----- 16 byte aligned, <-- sp entering interpreter
112|.define SAVE_ERRF, 124(sp) // 32 bit values.
113|.define SAVE_NRES, 120(sp)
114|.define SAVE_CFRAME, 112(sp) // 64 bit values.
115|.define SAVE_L, 104(sp)
116|.define SAVE_PC, 96(sp)
117|//----- 16 byte aligned
118|.define SAVE_GPR_, 16 // .. 16+10*8: 64 bit GPR saves.
119|
120|.endif
121|
122|.define TMPX, 8(sp) // Unused by interpreter, temp for JIT code.
123|.define TMPD, 0(sp)
124|//----- 16 byte aligned
125|
126|.define TMPD_OFS, 0
127|
128|.define SAVE_MULTRES, TMPD
129|
130|//-----------------------------------------------------------------------
131|
132|.macro saveregs
133| daddiu sp, sp, -CFRAME_SPACE
134| sd ra, SAVE_GPR_+9*8(sp)
135| sd r30, SAVE_GPR_+8*8(sp)
136| .FPU sdc1 f31, SAVE_FPR_+7*8(sp)
137| sd r23, SAVE_GPR_+7*8(sp)
138| .FPU sdc1 f30, SAVE_FPR_+6*8(sp)
139| sd r22, SAVE_GPR_+6*8(sp)
140| .FPU sdc1 f29, SAVE_FPR_+5*8(sp)
141| sd r21, SAVE_GPR_+5*8(sp)
142| .FPU sdc1 f28, SAVE_FPR_+4*8(sp)
143| sd r20, SAVE_GPR_+4*8(sp)
144| .FPU sdc1 f27, SAVE_FPR_+3*8(sp)
145| sd r19, SAVE_GPR_+3*8(sp)
146| .FPU sdc1 f26, SAVE_FPR_+2*8(sp)
147| sd r18, SAVE_GPR_+2*8(sp)
148| .FPU sdc1 f25, SAVE_FPR_+1*8(sp)
149| sd r17, SAVE_GPR_+1*8(sp)
150| .FPU sdc1 f24, SAVE_FPR_+0*8(sp)
151| sd r16, SAVE_GPR_+0*8(sp)
152|.endmacro
153|
154|.macro restoreregs_ret
155| ld ra, SAVE_GPR_+9*8(sp)
156| ld r30, SAVE_GPR_+8*8(sp)
157| ld r23, SAVE_GPR_+7*8(sp)
158| .FPU ldc1 f31, SAVE_FPR_+7*8(sp)
159| ld r22, SAVE_GPR_+6*8(sp)
160| .FPU ldc1 f30, SAVE_FPR_+6*8(sp)
161| ld r21, SAVE_GPR_+5*8(sp)
162| .FPU ldc1 f29, SAVE_FPR_+5*8(sp)
163| ld r20, SAVE_GPR_+4*8(sp)
164| .FPU ldc1 f28, SAVE_FPR_+4*8(sp)
165| ld r19, SAVE_GPR_+3*8(sp)
166| .FPU ldc1 f27, SAVE_FPR_+3*8(sp)
167| ld r18, SAVE_GPR_+2*8(sp)
168| .FPU ldc1 f26, SAVE_FPR_+2*8(sp)
169| ld r17, SAVE_GPR_+1*8(sp)
170| .FPU ldc1 f25, SAVE_FPR_+1*8(sp)
171| ld r16, SAVE_GPR_+0*8(sp)
172| .FPU ldc1 f24, SAVE_FPR_+0*8(sp)
173| jr ra
174| daddiu sp, sp, CFRAME_SPACE
175|.endmacro
176|
177|// Type definitions. Some of these are only used for documentation.
178|.type L, lua_State, LREG
179|.type GL, global_State
180|.type TVALUE, TValue
181|.type GCOBJ, GCobj
182|.type STR, GCstr
183|.type TAB, GCtab
184|.type LFUNC, GCfuncL
185|.type CFUNC, GCfuncC
186|.type PROTO, GCproto
187|.type UPVAL, GCupval
188|.type NODE, Node
189|.type NARGS8, int
190|.type TRACE, GCtrace
191|.type SBUF, SBuf
192|
193|//-----------------------------------------------------------------------
194|
195|// Trap for not-yet-implemented parts.
196|.macro NYI; .long 0xf0f0f0f0; .endmacro
197|
198|// Macros to mark delay slots.
199|.macro ., a; a; .endmacro
200|.macro ., a,b; a,b; .endmacro
201|.macro ., a,b,c; a,b,c; .endmacro
202|.macro ., a,b,c,d; a,b,c,d; .endmacro
203|
204|.define FRAME_PC, -8
205|.define FRAME_FUNC, -16
206|
207|//-----------------------------------------------------------------------
208|
209|// Endian-specific defines.
210|.if ENDIAN_LE
211|.define HI, 4
212|.define LO, 0
213|.define OFS_RD, 2
214|.define OFS_RA, 1
215|.define OFS_OP, 0
216|.else
217|.define HI, 0
218|.define LO, 4
219|.define OFS_RD, 0
220|.define OFS_RA, 2
221|.define OFS_OP, 3
222|.endif
223|
224|// Instruction decode.
225|.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro
226|.macro decode_OP8a, dst, ins; andi dst, ins, 0xff; .endmacro
227|.macro decode_OP8b, dst; sll dst, dst, 3; .endmacro
228|.macro decode_RC8a, dst, ins; srl dst, ins, 13; .endmacro
229|.macro decode_RC8b, dst; andi dst, dst, 0x7f8; .endmacro
230|.macro decode_RD4b, dst; sll dst, dst, 2; .endmacro
231|.macro decode_RA8a, dst, ins; srl dst, ins, 5; .endmacro
232|.macro decode_RA8b, dst; andi dst, dst, 0x7f8; .endmacro
233|.macro decode_RB8a, dst, ins; srl dst, ins, 21; .endmacro
234|.macro decode_RB8b, dst; andi dst, dst, 0x7f8; .endmacro
235|.macro decode_RD8a, dst, ins; srl dst, ins, 16; .endmacro
236|.macro decode_RD8b, dst; sll dst, dst, 3; .endmacro
237|.macro decode_RDtoRC8, dst, src; andi dst, src, 0x7f8; .endmacro
238|
239|// Instruction fetch.
240|.macro ins_NEXT1
241| lw INS, 0(PC)
242| daddiu PC, PC, 4
243|.endmacro
244|// Instruction decode+dispatch.
245|.macro ins_NEXT2
246| decode_OP8a TMP1, INS
247| decode_OP8b TMP1
248| daddu TMP0, DISPATCH, TMP1
249| decode_RD8a RD, INS
250| ld AT, 0(TMP0)
251| decode_RA8a RA, INS
252| decode_RD8b RD
253| jr AT
254| decode_RA8b RA
255|.endmacro
256|.macro ins_NEXT
257| ins_NEXT1
258| ins_NEXT2
259|.endmacro
260|
261|// Instruction footer.
262|.if 1
263| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
264| .define ins_next, ins_NEXT
265| .define ins_next_, ins_NEXT
266| .define ins_next1, ins_NEXT1
267| .define ins_next2, ins_NEXT2
268|.else
269| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
270| // Affects only certain kinds of benchmarks (and only with -j off).
271| .macro ins_next
272| b ->ins_next
273| .endmacro
274| .macro ins_next1
275| .endmacro
276| .macro ins_next2
277| b ->ins_next
278| .endmacro
279| .macro ins_next_
280| ->ins_next:
281| ins_NEXT
282| .endmacro
283|.endif
284|
285|// Call decode and dispatch.
286|.macro ins_callt
287| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
288| ld PC, LFUNC:RB->pc
289| lw INS, 0(PC)
290| daddiu PC, PC, 4
291| decode_OP8a TMP1, INS
292| decode_RA8a RA, INS
293| decode_OP8b TMP1
294| decode_RA8b RA
295| daddu TMP0, DISPATCH, TMP1
296| ld TMP0, 0(TMP0)
297| jr TMP0
298| daddu RA, RA, BASE
299|.endmacro
300|
301|.macro ins_call
302| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC
303| sd PC, FRAME_PC(BASE)
304| ins_callt
305|.endmacro
306|
307|//-----------------------------------------------------------------------
308|
309|.macro branch_RD
310| srl TMP0, RD, 1
311| lui AT, (-(BCBIAS_J*4 >> 16) & 65535)
312| addu TMP0, TMP0, AT
313| daddu PC, PC, TMP0
314|.endmacro
315|
316|// Assumes DISPATCH is relative to GL.
317#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
318#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
319#define GG_DISP2GOT (GG_OFS(got) - GG_OFS(dispatch))
320#define DISPATCH_GOT(name) (GG_DISP2GOT + sizeof(void*)*LJ_GOT_##name)
321|
322#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
323|
324|.macro load_got, func
325| ld CFUNCADDR, DISPATCH_GOT(func)(DISPATCH)
326|.endmacro
327|// Much faster. Sadly, there's no easy way to force the required code layout.
328|// .macro call_intern, func; bal extern func; .endmacro
329|.macro call_intern, func; jalr CFUNCADDR; .endmacro
330|.macro call_extern; jalr CFUNCADDR; .endmacro
331|.macro jmp_extern; jr CFUNCADDR; .endmacro
332|
333|.macro hotcheck, delta, target
334| dsrl TMP1, PC, 1
335| andi TMP1, TMP1, 126
336| daddu TMP1, TMP1, DISPATCH
337| lhu TMP2, GG_DISP2HOT(TMP1)
338| addiu TMP2, TMP2, -delta
339| bltz TMP2, target
340|. sh TMP2, GG_DISP2HOT(TMP1)
341|.endmacro
342|
343|.macro hotloop
344| hotcheck HOTCOUNT_LOOP, ->vm_hotloop
345|.endmacro
346|
347|.macro hotcall
348| hotcheck HOTCOUNT_CALL, ->vm_hotcall
349|.endmacro
350|
351|// Set current VM state. Uses TMP0.
352|.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro
353|.macro st_vmstate; sw TMP0, DISPATCH_GL(vmstate)(DISPATCH); .endmacro
354|
355|// Move table write barrier back. Overwrites mark and tmp.
356|.macro barrierback, tab, mark, tmp, target
357| ld tmp, DISPATCH_GL(gc.grayagain)(DISPATCH)
358| andi mark, mark, ~LJ_GC_BLACK & 255 // black2gray(tab)
359| sd tab, DISPATCH_GL(gc.grayagain)(DISPATCH)
360| sb mark, tab->marked
361| b target
362|. sd tmp, tab->gclist
363|.endmacro
364|
365|// Clear type tag. Isolate lowest 14+32+1=47 bits of reg.
366|.macro cleartp, reg; dextm reg, reg, 0, 14; .endmacro
367|.macro cleartp, dst, reg; dextm dst, reg, 0, 14; .endmacro
368|
369|// Set type tag: Merge 17 type bits into bits [15+32=47, 31+32+1=64) of dst.
370|.macro settp, dst, tp; dinsu dst, tp, 15, 31; .endmacro
371|
372|// Extract (negative) type tag.
373|.macro gettp, dst, src; dsra dst, src, 47; .endmacro
374|
375|// Macros to check the TValue type and extract the GCobj. Branch on failure.
376|.macro checktp, reg, tp, target
377| gettp AT, reg
378| daddiu AT, AT, tp
379| bnez AT, target
380|. cleartp reg
381|.endmacro
382|.macro checktp, dst, reg, tp, target
383| gettp AT, reg
384| daddiu AT, AT, tp
385| bnez AT, target
386|. cleartp dst, reg
387|.endmacro
388|.macro checkstr, reg, target; checktp reg, -LJ_TSTR, target; .endmacro
389|.macro checktab, reg, target; checktp reg, -LJ_TTAB, target; .endmacro
390|.macro checkfunc, reg, target; checktp reg, -LJ_TFUNC, target; .endmacro
391|.macro checkint, reg, target // Caveat: has delay slot!
392| gettp AT, reg
393| bne AT, TISNUM, target
394|.endmacro
395|.macro checknum, reg, target // Caveat: has delay slot!
396| gettp AT, reg
397| sltiu AT, AT, LJ_TISNUM
398| beqz AT, target
399|.endmacro
400|
401|.macro mov_false, reg
402| lu reg, 0x8000
403| dsll reg, reg, 32
404| not reg, reg
405|.endmacro
406|.macro mov_true, reg
407| li reg, 0x0001
408| dsll reg, reg, 48
409| not reg, reg
410|.endmacro
411|
412|//-----------------------------------------------------------------------
413
414/* Generate subroutines used by opcodes and other parts of the VM. */
415/* The .code_sub section should be last to help static branch prediction. */
416static void build_subroutines(BuildCtx *ctx)
417{
418 |.code_sub
419 |
420 |//-----------------------------------------------------------------------
421 |//-- Return handling ----------------------------------------------------
422 |//-----------------------------------------------------------------------
423 |
424 |->vm_returnp:
425 | // See vm_return. Also: TMP2 = previous base.
426 | andi AT, PC, FRAME_P
427 | beqz AT, ->cont_dispatch
428 |
429 | // Return from pcall or xpcall fast func.
430 |. mov_true TMP1
431 | ld PC, FRAME_PC(TMP2) // Fetch PC of previous frame.
432 | move BASE, TMP2 // Restore caller base.
433 | // Prepending may overwrite the pcall frame, so do it at the end.
434 | sd TMP1, -8(RA) // Prepend true to results.
435 | daddiu RA, RA, -8
436 |
437 |->vm_returnc:
438 | addiu RD, RD, 8 // RD = (nresults+1)*8.
439 | andi TMP0, PC, FRAME_TYPE
440 | beqz RD, ->vm_unwind_c_eh
441 |. li CRET1, LUA_YIELD
442 | beqz TMP0, ->BC_RET_Z // Handle regular return to Lua.
443 |. move MULTRES, RD
444 |
445 |->vm_return:
446 | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return
447 | // TMP0 = PC & FRAME_TYPE
448 | li TMP2, -8
449 | xori AT, TMP0, FRAME_C
450 | and TMP2, PC, TMP2
451 | bnez AT, ->vm_returnp
452 | dsubu TMP2, BASE, TMP2 // TMP2 = previous base.
453 |
454 | addiu TMP1, RD, -8
455 | sd TMP2, L->base
456 | li_vmstate C
457 | lw TMP2, SAVE_NRES
458 | daddiu BASE, BASE, -16
459 | st_vmstate
460 | beqz TMP1, >2
461 |. sll TMP2, TMP2, 3
462 |1:
463 | addiu TMP1, TMP1, -8
464 | ld CRET1, 0(RA)
465 | daddiu RA, RA, 8
466 | sd CRET1, 0(BASE)
467 | bnez TMP1, <1
468 |. daddiu BASE, BASE, 8
469 |
470 |2:
471 | bne TMP2, RD, >6
472 |3:
473 |. sd BASE, L->top // Store new top.
474 |
475 |->vm_leave_cp:
476 | ld TMP0, SAVE_CFRAME // Restore previous C frame.
477 | move CRET1, r0 // Ok return status for vm_pcall.
478 | sd TMP0, L->cframe
479 |
480 |->vm_leave_unw:
481 | restoreregs_ret
482 |
483 |6:
484 | ld TMP1, L->maxstack
485 | slt AT, TMP2, RD
486 | bnez AT, >7 // Less results wanted?
487 | // More results wanted. Check stack size and fill up results with nil.
488 |. slt AT, BASE, TMP1
489 | beqz AT, >8
490 |. nop
491 | sd TISNIL, 0(BASE)
492 | addiu RD, RD, 8
493 | b <2
494 |. daddiu BASE, BASE, 8
495 |
496 |7: // Less results wanted.
497 | subu TMP0, RD, TMP2
498 | dsubu TMP0, BASE, TMP0 // Either keep top or shrink it.
499 |.if MIPSR6
500 | selnez TMP0, TMP0, TMP2 // LUA_MULTRET+1 case?
501 | seleqz BASE, BASE, TMP2
502 | b <3
503 |. or BASE, BASE, TMP0
504 |.else
505 | b <3
506 |. movn BASE, TMP0, TMP2 // LUA_MULTRET+1 case?
507 |.endif
508 |
509 |8: // Corner case: need to grow stack for filling up results.
510 | // This can happen if:
511 | // - A C function grows the stack (a lot).
512 | // - The GC shrinks the stack in between.
513 | // - A return back from a lua_call() with (high) nresults adjustment.
514 | load_got lj_state_growstack
515 | move MULTRES, RD
516 | srl CARG2, TMP2, 3
517 | call_intern lj_state_growstack // (lua_State *L, int n)
518 |. move CARG1, L
519 | lw TMP2, SAVE_NRES
520 | ld BASE, L->top // Need the (realloced) L->top in BASE.
521 | move RD, MULTRES
522 | b <2
523 |. sll TMP2, TMP2, 3
524 |
525 |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
526 | // (void *cframe, int errcode)
527 | move sp, CARG1
528 | move CRET1, CARG2
529 |->vm_unwind_c_eh: // Landing pad for external unwinder.
530 | ld L, SAVE_L
531 | li TMP0, ~LJ_VMST_C
532 | ld GL:TMP1, L->glref
533 | b ->vm_leave_unw
534 |. sw TMP0, GL:TMP1->vmstate
535 |
536 |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
537 | // (void *cframe)
538 | li AT, -4
539 | and sp, CARG1, AT
540 |->vm_unwind_ff_eh: // Landing pad for external unwinder.
541 | ld L, SAVE_L
542 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
543 | li TISNIL, LJ_TNIL
544 | li TISNUM, LJ_TISNUM
545 | ld BASE, L->base
546 | ld DISPATCH, L->glref // Setup pointer to dispatch table.
547 | .FPU mtc1 TMP3, TOBIT
548 | mov_false TMP1
549 | li_vmstate INTERP
550 | ld PC, FRAME_PC(BASE) // Fetch PC of previous frame.
551 | .FPU cvt.d.s TOBIT, TOBIT
552 | daddiu RA, BASE, -8 // Results start at BASE-8.
553 | daddiu DISPATCH, DISPATCH, GG_G2DISP
554 | sd TMP1, 0(RA) // Prepend false to error message.
555 | st_vmstate
556 | b ->vm_returnc
557 |. li RD, 16 // 2 results: false + error message.
558 |
559 |//-----------------------------------------------------------------------
560 |//-- Grow stack for calls -----------------------------------------------
561 |//-----------------------------------------------------------------------
562 |
563 |->vm_growstack_c: // Grow stack for C function.
564 | b >2
565 |. li CARG2, LUA_MINSTACK
566 |
567 |->vm_growstack_l: // Grow stack for Lua function.
568 | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC
569 | daddu RC, BASE, RC
570 | dsubu RA, RA, BASE
571 | sd BASE, L->base
572 | daddiu PC, PC, 4 // Must point after first instruction.
573 | sd RC, L->top
574 | srl CARG2, RA, 3
575 |2:
576 | // L->base = new base, L->top = top
577 | load_got lj_state_growstack
578 | sd PC, SAVE_PC
579 | call_intern lj_state_growstack // (lua_State *L, int n)
580 |. move CARG1, L
581 | ld BASE, L->base
582 | ld RC, L->top
583 | ld LFUNC:RB, FRAME_FUNC(BASE)
584 | dsubu RC, RC, BASE
585 | cleartp LFUNC:RB
586 | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
587 | ins_callt // Just retry the call.
588 |
589 |//-----------------------------------------------------------------------
590 |//-- Entry points into the assembler VM ---------------------------------
591 |//-----------------------------------------------------------------------
592 |
593 |->vm_resume: // Setup C frame and resume thread.
594 | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
595 | saveregs
596 | move L, CARG1
597 | ld DISPATCH, L->glref // Setup pointer to dispatch table.
598 | move BASE, CARG2
599 | lbu TMP1, L->status
600 | sd L, SAVE_L
601 | li PC, FRAME_CP
602 | daddiu TMP0, sp, CFRAME_RESUME
603 | daddiu DISPATCH, DISPATCH, GG_G2DISP
604 | sw r0, SAVE_NRES
605 | sw r0, SAVE_ERRF
606 | sd CARG1, SAVE_PC // Any value outside of bytecode is ok.
607 | sd r0, SAVE_CFRAME
608 | beqz TMP1, >3
609 |. sd TMP0, L->cframe
610 |
611 | // Resume after yield (like a return).
612 | sd L, DISPATCH_GL(cur_L)(DISPATCH)
613 | move RA, BASE
614 | ld BASE, L->base
615 | ld TMP1, L->top
616 | ld PC, FRAME_PC(BASE)
617 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
618 | dsubu RD, TMP1, BASE
619 | .FPU mtc1 TMP3, TOBIT
620 | sb r0, L->status
621 | .FPU cvt.d.s TOBIT, TOBIT
622 | li_vmstate INTERP
623 | daddiu RD, RD, 8
624 | st_vmstate
625 | move MULTRES, RD
626 | andi TMP0, PC, FRAME_TYPE
627 | li TISNIL, LJ_TNIL
628 | beqz TMP0, ->BC_RET_Z
629 |. li TISNUM, LJ_TISNUM
630 | b ->vm_return
631 |. nop
632 |
633 |->vm_pcall: // Setup protected C frame and enter VM.
634 | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
635 | saveregs
636 | sw CARG4, SAVE_ERRF
637 | b >1
638 |. li PC, FRAME_CP
639 |
640 |->vm_call: // Setup C frame and enter VM.
641 | // (lua_State *L, TValue *base, int nres1)
642 | saveregs
643 | li PC, FRAME_C
644 |
645 |1: // Entry point for vm_pcall above (PC = ftype).
646 | ld TMP1, L:CARG1->cframe
647 | move L, CARG1
648 | sw CARG3, SAVE_NRES
649 | ld DISPATCH, L->glref // Setup pointer to dispatch table.
650 | sd CARG1, SAVE_L
651 | move BASE, CARG2
652 | daddiu DISPATCH, DISPATCH, GG_G2DISP
653 | sd CARG1, SAVE_PC // Any value outside of bytecode is ok.
654 | sd TMP1, SAVE_CFRAME
655 | sd sp, L->cframe // Add our C frame to cframe chain.
656 |
657 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
658 | sd L, DISPATCH_GL(cur_L)(DISPATCH)
659 | ld TMP2, L->base // TMP2 = old base (used in vmeta_call).
660 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
661 | ld TMP1, L->top
662 | .FPU mtc1 TMP3, TOBIT
663 | daddu PC, PC, BASE
664 | dsubu NARGS8:RC, TMP1, BASE
665 | li TISNUM, LJ_TISNUM
666 | dsubu PC, PC, TMP2 // PC = frame delta + frame type
667 | .FPU cvt.d.s TOBIT, TOBIT
668 | li_vmstate INTERP
669 | li TISNIL, LJ_TNIL
670 | st_vmstate
671 |
672 |->vm_call_dispatch:
673 | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC
674 | ld LFUNC:RB, FRAME_FUNC(BASE)
675 | checkfunc LFUNC:RB, ->vmeta_call
676 |
677 |->vm_call_dispatch_f:
678 | ins_call
679 | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC
680 |
681 |->vm_cpcall: // Setup protected C frame, call C.
682 | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
683 | saveregs
684 | move L, CARG1
685 | ld TMP0, L:CARG1->stack
686 | sd CARG1, SAVE_L
687 | ld TMP1, L->top
688 | ld DISPATCH, L->glref // Setup pointer to dispatch table.
689 | sd CARG1, SAVE_PC // Any value outside of bytecode is ok.
690 | dsubu TMP0, TMP0, TMP1 // Compute -savestack(L, L->top).
691 | ld TMP1, L->cframe
692 | daddiu DISPATCH, DISPATCH, GG_G2DISP
693 | sw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame.
694 | sw r0, SAVE_ERRF // No error function.
695 | sd TMP1, SAVE_CFRAME
696 | sd sp, L->cframe // Add our C frame to cframe chain.
697 | sd L, DISPATCH_GL(cur_L)(DISPATCH)
698 | jalr CARG4 // (lua_State *L, lua_CFunction func, void *ud)
699 |. move CFUNCADDR, CARG4
700 | move BASE, CRET1
701 | bnez CRET1, <3 // Else continue with the call.
702 |. li PC, FRAME_CP
703 | b ->vm_leave_cp // No base? Just remove C frame.
704 |. nop
705 |
706 |//-----------------------------------------------------------------------
707 |//-- Metamethod handling ------------------------------------------------
708 |//-----------------------------------------------------------------------
709 |
710 |// The lj_meta_* functions (except for lj_meta_cat) don't reallocate the
711 |// stack, so BASE doesn't need to be reloaded across these calls.
712 |
713 |//-- Continuation dispatch ----------------------------------------------
714 |
715 |->cont_dispatch:
716 | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8
717 | ld TMP0, -32(BASE) // Continuation.
718 | move RB, BASE
719 | move BASE, TMP2 // Restore caller BASE.
720 | ld LFUNC:TMP1, FRAME_FUNC(TMP2)
721 |.if FFI
722 | sltiu AT, TMP0, 2
723 |.endif
724 | ld PC, -24(RB) // Restore PC from [cont|PC].
725 | cleartp LFUNC:TMP1
726 | daddu TMP2, RA, RD
727 | ld TMP1, LFUNC:TMP1->pc
728 |.if FFI
729 | bnez AT, >1
730 |.endif
731 |. sd TISNIL, -8(TMP2) // Ensure one valid arg.
732 | // BASE = base, RA = resultptr, RB = meta base
733 | jr TMP0 // Jump to continuation.
734 |. ld KBASE, PC2PROTO(k)(TMP1)
735 |
736 |.if FFI
737 |1:
738 | bnez TMP0, ->cont_ffi_callback // cont = 1: return from FFI callback.
739 | // cont = 0: tailcall from C function.
740 |. daddiu TMP1, RB, -32
741 | b ->vm_call_tail
742 |. dsubu RC, TMP1, BASE
743 |.endif
744 |
745 |->cont_cat: // RA = resultptr, RB = meta base
746 | lw INS, -4(PC)
747 | daddiu CARG2, RB, -32
748 | ld CRET1, 0(RA)
749 | decode_RB8a MULTRES, INS
750 | decode_RA8a RA, INS
751 | decode_RB8b MULTRES
752 | decode_RA8b RA
753 | daddu TMP1, BASE, MULTRES
754 | sd BASE, L->base
755 | dsubu CARG3, CARG2, TMP1
756 | bne TMP1, CARG2, ->BC_CAT_Z
757 |. sd CRET1, 0(CARG2)
758 | daddu RA, BASE, RA
759 | b ->cont_nop
760 |. sd CRET1, 0(RA)
761 |
762 |//-- Table indexing metamethods -----------------------------------------
763 |
764 |->vmeta_tgets1:
765 | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
766 | li TMP0, LJ_TSTR
767 | settp STR:RC, TMP0
768 | b >1
769 |. sd STR:RC, 0(CARG3)
770 |
771 |->vmeta_tgets:
772 | daddiu CARG2, DISPATCH, DISPATCH_GL(tmptv)
773 | li TMP0, LJ_TTAB
774 | li TMP1, LJ_TSTR
775 | settp TAB:RB, TMP0
776 | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv2)
777 | sd TAB:RB, 0(CARG2)
778 | settp STR:RC, TMP1
779 | b >1
780 |. sd STR:RC, 0(CARG3)
781 |
782 |->vmeta_tgetb: // TMP0 = index
783 | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
784 | settp TMP0, TISNUM
785 | sd TMP0, 0(CARG3)
786 |
787 |->vmeta_tgetv:
788 |1:
789 | load_got lj_meta_tget
790 | sd BASE, L->base
791 | sd PC, SAVE_PC
792 | call_intern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
793 |. move CARG1, L
794 | // Returns TValue * (finished) or NULL (metamethod).
795 | beqz CRET1, >3
796 |. daddiu TMP1, BASE, -FRAME_CONT
797 | ld CARG1, 0(CRET1)
798 | ins_next1
799 | sd CARG1, 0(RA)
800 | ins_next2
801 |
802 |3: // Call __index metamethod.
803 | // BASE = base, L->top = new base, stack = cont/func/t/k
804 | ld BASE, L->top
805 | sd PC, -24(BASE) // [cont|PC]
806 | dsubu PC, BASE, TMP1
807 | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
808 | cleartp LFUNC:RB
809 | b ->vm_call_dispatch_f
810 |. li NARGS8:RC, 16 // 2 args for func(t, k).
811 |
812 |->vmeta_tgetr:
813 | load_got lj_tab_getinth
814 | call_intern lj_tab_getinth // (GCtab *t, int32_t key)
815 |. nop
816 | // Returns cTValue * or NULL.
817 | beqz CRET1, ->BC_TGETR_Z
818 |. move CARG2, TISNIL
819 | b ->BC_TGETR_Z
820 |. ld CARG2, 0(CRET1)
821 |
822 |//-----------------------------------------------------------------------
823 |
824 |->vmeta_tsets1:
825 | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
826 | li TMP0, LJ_TSTR
827 | settp STR:RC, TMP0
828 | b >1
829 |. sd STR:RC, 0(CARG3)
830 |
831 |->vmeta_tsets:
832 | daddiu CARG2, DISPATCH, DISPATCH_GL(tmptv)
833 | li TMP0, LJ_TTAB
834 | li TMP1, LJ_TSTR
835 | settp TAB:RB, TMP0
836 | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv2)
837 | sd TAB:RB, 0(CARG2)
838 | settp STR:RC, TMP1
839 | b >1
840 |. sd STR:RC, 0(CARG3)
841 |
842 |->vmeta_tsetb: // TMP0 = index
843 | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
844 | settp TMP0, TISNUM
845 | sd TMP0, 0(CARG3)
846 |
847 |->vmeta_tsetv:
848 |1:
849 | load_got lj_meta_tset
850 | sd BASE, L->base
851 | sd PC, SAVE_PC
852 | call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
853 |. move CARG1, L
854 | // Returns TValue * (finished) or NULL (metamethod).
855 | beqz CRET1, >3
856 |. ld CARG1, 0(RA)
857 | // NOBARRIER: lj_meta_tset ensures the table is not black.
858 | ins_next1
859 | sd CARG1, 0(CRET1)
860 | ins_next2
861 |
862 |3: // Call __newindex metamethod.
863 | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
864 | daddiu TMP1, BASE, -FRAME_CONT
865 | ld BASE, L->top
866 | sd PC, -24(BASE) // [cont|PC]
867 | dsubu PC, BASE, TMP1
868 | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
869 | cleartp LFUNC:RB
870 | sd CARG1, 16(BASE) // Copy value to third argument.
871 | b ->vm_call_dispatch_f
872 |. li NARGS8:RC, 24 // 3 args for func(t, k, v)
873 |
874 |->vmeta_tsetr:
875 | load_got lj_tab_setinth
876 | sd BASE, L->base
877 | sd PC, SAVE_PC
878 | call_intern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
879 |. move CARG1, L
880 | // Returns TValue *.
881 | b ->BC_TSETR_Z
882 |. nop
883 |
884 |//-- Comparison metamethods ---------------------------------------------
885 |
886 |->vmeta_comp:
887 | // RA/RD point to o1/o2.
888 | move CARG2, RA
889 | move CARG3, RD
890 | load_got lj_meta_comp
891 | daddiu PC, PC, -4
892 | sd BASE, L->base
893 | sd PC, SAVE_PC
894 | decode_OP1 CARG4, INS
895 | call_intern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
896 |. move CARG1, L
897 | // Returns 0/1 or TValue * (metamethod).
898 |3:
899 | sltiu AT, CRET1, 2
900 | beqz AT, ->vmeta_binop
901 | negu TMP2, CRET1
902 |4:
903 | lhu RD, OFS_RD(PC)
904 | daddiu PC, PC, 4
905 | lui TMP1, (-(BCBIAS_J*4 >> 16) & 65535)
906 | sll RD, RD, 2
907 | addu RD, RD, TMP1
908 | and RD, RD, TMP2
909 | daddu PC, PC, RD
910 |->cont_nop:
911 | ins_next
912 |
913 |->cont_ra: // RA = resultptr
914 | lbu TMP1, -4+OFS_RA(PC)
915 | ld CRET1, 0(RA)
916 | sll TMP1, TMP1, 3
917 | daddu TMP1, BASE, TMP1
918 | b ->cont_nop
919 |. sd CRET1, 0(TMP1)
920 |
921 |->cont_condt: // RA = resultptr
922 | ld TMP0, 0(RA)
923 | gettp TMP0, TMP0
924 | sltiu AT, TMP0, LJ_TISTRUECOND
925 | b <4
926 |. negu TMP2, AT // Branch if result is true.
927 |
928 |->cont_condf: // RA = resultptr
929 | ld TMP0, 0(RA)
930 | gettp TMP0, TMP0
931 | sltiu AT, TMP0, LJ_TISTRUECOND
932 | b <4
933 |. addiu TMP2, AT, -1 // Branch if result is false.
934 |
935 |->vmeta_equal:
936 | // CARG1/CARG2 point to o1/o2. TMP0 is set to 0/1.
937 | load_got lj_meta_equal
938 | cleartp LFUNC:CARG3, CARG2
939 | cleartp LFUNC:CARG2, CARG1
940 | move CARG4, TMP0
941 | daddiu PC, PC, -4
942 | sd BASE, L->base
943 | sd PC, SAVE_PC
944 | call_intern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
945 |. move CARG1, L
946 | // Returns 0/1 or TValue * (metamethod).
947 | b <3
948 |. nop
949 |
950 |->vmeta_equal_cd:
951 |.if FFI
952 | load_got lj_meta_equal_cd
953 | move CARG2, INS
954 | daddiu PC, PC, -4
955 | sd BASE, L->base
956 | sd PC, SAVE_PC
957 | call_intern lj_meta_equal_cd // (lua_State *L, BCIns op)
958 |. move CARG1, L
959 | // Returns 0/1 or TValue * (metamethod).
960 | b <3
961 |. nop
962 |.endif
963 |
964 |->vmeta_istype:
965 | load_got lj_meta_istype
966 | daddiu PC, PC, -4
967 | sd BASE, L->base
968 | srl CARG2, RA, 3
969 | srl CARG3, RD, 3
970 | sd PC, SAVE_PC
971 | call_intern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
972 |. move CARG1, L
973 | b ->cont_nop
974 |. nop
975 |
976 |//-- Arithmetic metamethods ---------------------------------------------
977 |
978 |->vmeta_unm:
979 | move RC, RB
980 |
981 |->vmeta_arith:
982 | load_got lj_meta_arith
983 | sd BASE, L->base
984 | move CARG2, RA
985 | sd PC, SAVE_PC
986 | move CARG3, RB
987 | move CARG4, RC
988 | decode_OP1 CARG5, INS // CARG5 == RB.
989 | call_intern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
990 |. move CARG1, L
991 | // Returns NULL (finished) or TValue * (metamethod).
992 | beqz CRET1, ->cont_nop
993 |. nop
994 |
995 | // Call metamethod for binary op.
996 |->vmeta_binop:
997 | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2
998 | dsubu TMP1, CRET1, BASE
999 | sd PC, -24(CRET1) // [cont|PC]
1000 | move TMP2, BASE
1001 | daddiu PC, TMP1, FRAME_CONT
1002 | move BASE, CRET1
1003 | b ->vm_call_dispatch
1004 |. li NARGS8:RC, 16 // 2 args for func(o1, o2).
1005 |
1006 |->vmeta_len:
1007 | // CARG2 already set by BC_LEN.
1008#if LJ_52
1009 | move MULTRES, CARG1
1010#endif
1011 | load_got lj_meta_len
1012 | sd BASE, L->base
1013 | sd PC, SAVE_PC
1014 | call_intern lj_meta_len // (lua_State *L, TValue *o)
1015 |. move CARG1, L
1016 | // Returns NULL (retry) or TValue * (metamethod base).
1017#if LJ_52
1018 | bnez CRET1, ->vmeta_binop // Binop call for compatibility.
1019 |. nop
1020 | b ->BC_LEN_Z
1021 |. move CARG1, MULTRES
1022#else
1023 | b ->vmeta_binop // Binop call for compatibility.
1024 |. nop
1025#endif
1026 |
1027 |//-- Call metamethod ----------------------------------------------------
1028 |
1029 |->vmeta_call: // Resolve and call __call metamethod.
1030 | // TMP2 = old base, BASE = new base, RC = nargs*8
1031 | load_got lj_meta_call
1032 | sd TMP2, L->base // This is the callers base!
1033 | daddiu CARG2, BASE, -16
1034 | sd PC, SAVE_PC
1035 | daddu CARG3, BASE, RC
1036 | move MULTRES, NARGS8:RC
1037 | call_intern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
1038 |. move CARG1, L
1039 | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
1040 | daddiu NARGS8:RC, MULTRES, 8 // Got one more argument now.
1041 | cleartp LFUNC:RB
1042 | ins_call
1043 |
1044 |->vmeta_callt: // Resolve __call for BC_CALLT.
1045 | // BASE = old base, RA = new base, RC = nargs*8
1046 | load_got lj_meta_call
1047 | sd BASE, L->base
1048 | daddiu CARG2, RA, -16
1049 | sd PC, SAVE_PC
1050 | daddu CARG3, RA, RC
1051 | move MULTRES, NARGS8:RC
1052 | call_intern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
1053 |. move CARG1, L
1054 | ld RB, FRAME_FUNC(RA) // Guaranteed to be a function here.
1055 | ld TMP1, FRAME_PC(BASE)
1056 | daddiu NARGS8:RC, MULTRES, 8 // Got one more argument now.
1057 | b ->BC_CALLT_Z
1058 |. cleartp LFUNC:CARG3, RB
1059 |
1060 |//-- Argument coercion for 'for' statement ------------------------------
1061 |
1062 |->vmeta_for:
1063 | load_got lj_meta_for
1064 | sd BASE, L->base
1065 | move CARG2, RA
1066 | sd PC, SAVE_PC
1067 | move MULTRES, INS
1068 | call_intern lj_meta_for // (lua_State *L, TValue *base)
1069 |. move CARG1, L
1070 |.if JIT
1071 | decode_OP1 TMP0, MULTRES
1072 | li AT, BC_JFORI
1073 |.endif
1074 | decode_RA8a RA, MULTRES
1075 | decode_RD8a RD, MULTRES
1076 | decode_RA8b RA
1077 |.if JIT
1078 | beq TMP0, AT, =>BC_JFORI
1079 |. decode_RD8b RD
1080 | b =>BC_FORI
1081 |. nop
1082 |.else
1083 | b =>BC_FORI
1084 |. decode_RD8b RD
1085 |.endif
1086 |
1087 |//-----------------------------------------------------------------------
1088 |//-- Fast functions -----------------------------------------------------
1089 |//-----------------------------------------------------------------------
1090 |
1091 |.macro .ffunc, name
1092 |->ff_ .. name:
1093 |.endmacro
1094 |
1095 |.macro .ffunc_1, name
1096 |->ff_ .. name:
1097 | beqz NARGS8:RC, ->fff_fallback
1098 |. ld CARG1, 0(BASE)
1099 |.endmacro
1100 |
1101 |.macro .ffunc_2, name
1102 |->ff_ .. name:
1103 | sltiu AT, NARGS8:RC, 16
1104 | ld CARG1, 0(BASE)
1105 | bnez AT, ->fff_fallback
1106 |. ld CARG2, 8(BASE)
1107 |.endmacro
1108 |
1109 |.macro .ffunc_n, name // Caveat: has delay slot!
1110 |->ff_ .. name:
1111 | ld CARG1, 0(BASE)
1112 | beqz NARGS8:RC, ->fff_fallback
1113 | // Either ldc1 or the 1st instruction of checknum is in the delay slot.
1114 | .FPU ldc1 FARG1, 0(BASE)
1115 | checknum CARG1, ->fff_fallback
1116 |.endmacro
1117 |
1118 |.macro .ffunc_nn, name // Caveat: has delay slot!
1119 |->ff_ .. name:
1120 | ld CARG1, 0(BASE)
1121 | sltiu AT, NARGS8:RC, 16
1122 | ld CARG2, 8(BASE)
1123 | bnez AT, ->fff_fallback
1124 |. gettp TMP0, CARG1
1125 | gettp TMP1, CARG2
1126 | sltiu TMP0, TMP0, LJ_TISNUM
1127 | sltiu TMP1, TMP1, LJ_TISNUM
1128 | .FPU ldc1 FARG1, 0(BASE)
1129 | and TMP0, TMP0, TMP1
1130 | .FPU ldc1 FARG2, 8(BASE)
1131 | beqz TMP0, ->fff_fallback
1132 |.endmacro
1133 |
1134 |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1 and has delay slot!
1135 |// MIPSR6: no delay slot, but a forbidden slot.
1136 |.macro ffgccheck
1137 | ld TMP0, DISPATCH_GL(gc.total)(DISPATCH)
1138 | ld TMP1, DISPATCH_GL(gc.threshold)(DISPATCH)
1139 | dsubu AT, TMP0, TMP1
1140 |.if MIPSR6
1141 | bgezalc AT, ->fff_gcstep
1142 |.else
1143 | bgezal AT, ->fff_gcstep
1144 |.endif
1145 |.endmacro
1146 |
1147 |//-- Base library: checks -----------------------------------------------
1148 |.ffunc_1 assert
1149 | gettp AT, CARG1
1150 | sltiu AT, AT, LJ_TISTRUECOND
1151 | beqz AT, ->fff_fallback
1152 |. daddiu RA, BASE, -16
1153 | ld PC, FRAME_PC(BASE)
1154 | addiu RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
1155 | daddu TMP2, RA, RD
1156 | daddiu TMP1, BASE, 8
1157 | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument.
1158 |. sd CARG1, 0(RA)
1159 |1:
1160 | ld CRET1, 0(TMP1)
1161 | sd CRET1, -16(TMP1)
1162 | bne TMP1, TMP2, <1
1163 |. daddiu TMP1, TMP1, 8
1164 | b ->fff_res
1165 |. nop
1166 |
1167 |.ffunc_1 type
1168 | gettp TMP0, CARG1
1169 | sltu TMP1, TISNUM, TMP0
1170 | not TMP2, TMP0
1171 | li TMP3, ~LJ_TISNUM
1172 |.if MIPSR6
1173 | selnez TMP2, TMP2, TMP1
1174 | seleqz TMP3, TMP3, TMP1
1175 | or TMP2, TMP2, TMP3
1176 |.else
1177 | movz TMP2, TMP3, TMP1
1178 |.endif
1179 | dsll TMP2, TMP2, 3
1180 | daddu TMP2, CFUNC:RB, TMP2
1181 | b ->fff_restv
1182 |. ld CARG1, CFUNC:TMP2->upvalue
1183 |
1184 |//-- Base library: getters and setters ---------------------------------
1185 |
1186 |.ffunc_1 getmetatable
1187 | gettp TMP2, CARG1
1188 | daddiu TMP0, TMP2, -LJ_TTAB
1189 | daddiu TMP1, TMP2, -LJ_TUDATA
1190 |.if MIPSR6
1191 | selnez TMP0, TMP1, TMP0
1192 |.else
1193 | movn TMP0, TMP1, TMP0
1194 |.endif
1195 | bnez TMP0, >6
1196 |. cleartp TAB:CARG1
1197 |1: // Field metatable must be at same offset for GCtab and GCudata!
1198 | ld TAB:RB, TAB:CARG1->metatable
1199 |2:
1200 | ld STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH)
1201 | beqz TAB:RB, ->fff_restv
1202 |. li CARG1, LJ_TNIL
1203 | lw TMP0, TAB:RB->hmask
1204 | lw TMP1, STR:RC->hash
1205 | ld NODE:TMP2, TAB:RB->node
1206 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
1207 | dsll TMP0, TMP1, 5
1208 | dsll TMP1, TMP1, 3
1209 | dsubu TMP1, TMP0, TMP1
1210 | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
1211 | li CARG4, LJ_TSTR
1212 | settp STR:RC, CARG4 // Tagged key to look for.
1213 |3: // Rearranged logic, because we expect _not_ to find the key.
1214 | ld TMP0, NODE:TMP2->key
1215 | ld CARG1, NODE:TMP2->val
1216 | ld NODE:TMP2, NODE:TMP2->next
1217 | beq RC, TMP0, >5
1218 |. li AT, LJ_TTAB
1219 | bnez NODE:TMP2, <3
1220 |. nop
1221 |4:
1222 | move CARG1, RB
1223 | b ->fff_restv // Not found, keep default result.
1224 |. settp CARG1, AT
1225 |5:
1226 | bne CARG1, TISNIL, ->fff_restv
1227 |. nop
1228 | b <4 // Ditto for nil value.
1229 |. nop
1230 |
1231 |6:
1232 | sltiu AT, TMP2, LJ_TISNUM
1233 |.if MIPSR6
1234 | selnez TMP0, TISNUM, AT
1235 | seleqz AT, TMP2, AT
1236 | or TMP2, TMP0, AT
1237 |.else
1238 | movn TMP2, TISNUM, AT
1239 |.endif
1240 | dsll TMP2, TMP2, 3
1241 | dsubu TMP0, DISPATCH, TMP2
1242 | b <2
1243 |. ld TAB:RB, DISPATCH_GL(gcroot[GCROOT_BASEMT])-8(TMP0)
1244 |
1245 |.ffunc_2 setmetatable
1246 | // Fast path: no mt for table yet and not clearing the mt.
1247 | checktp TMP1, CARG1, -LJ_TTAB, ->fff_fallback
1248 | gettp TMP3, CARG2
1249 | ld TAB:TMP0, TAB:TMP1->metatable
1250 | lbu TMP2, TAB:TMP1->marked
1251 | daddiu AT, TMP3, -LJ_TTAB
1252 | cleartp TAB:CARG2
1253 | or AT, AT, TAB:TMP0
1254 | bnez AT, ->fff_fallback
1255 |. andi AT, TMP2, LJ_GC_BLACK // isblack(table)
1256 | beqz AT, ->fff_restv
1257 |. sd TAB:CARG2, TAB:TMP1->metatable
1258 | barrierback TAB:TMP1, TMP2, TMP0, ->fff_restv
1259 |
1260 |.ffunc rawget
1261 | ld CARG2, 0(BASE)
1262 | sltiu AT, NARGS8:RC, 16
1263 | load_got lj_tab_get
1264 | gettp TMP0, CARG2
1265 | cleartp CARG2
1266 | daddiu TMP0, TMP0, -LJ_TTAB
1267 | or AT, AT, TMP0
1268 | bnez AT, ->fff_fallback
1269 |. daddiu CARG3, BASE, 8
1270 | call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1271 |. move CARG1, L
1272 | b ->fff_restv
1273 |. ld CARG1, 0(CRET1)
1274 |
1275 |//-- Base library: conversions ------------------------------------------
1276 |
1277 |.ffunc tonumber
1278 | // Only handles the number case inline (without a base argument).
1279 | ld CARG1, 0(BASE)
1280 | xori AT, NARGS8:RC, 8 // Exactly one number argument.
1281 | gettp TMP1, CARG1
1282 | sltu TMP0, TISNUM, TMP1
1283 | or AT, AT, TMP0
1284 | bnez AT, ->fff_fallback
1285 |. nop
1286 | b ->fff_restv
1287 |. nop
1288 |
1289 |.ffunc_1 tostring
1290 | // Only handles the string or number case inline.
1291 | gettp TMP0, CARG1
1292 | daddiu AT, TMP0, -LJ_TSTR
1293 | // A __tostring method in the string base metatable is ignored.
1294 | beqz AT, ->fff_restv // String key?
1295 | // Handle numbers inline, unless a number base metatable is present.
1296 |. ld TMP1, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH)
1297 | sltu TMP0, TISNUM, TMP0
1298 | or TMP0, TMP0, TMP1
1299 | bnez TMP0, ->fff_fallback
1300 |. sd BASE, L->base // Add frame since C call can throw.
1301 |.if MIPSR6
1302 | sd PC, SAVE_PC // Redundant (but a defined value).
1303 | ffgccheck
1304 |.else
1305 | ffgccheck
1306 |. sd PC, SAVE_PC // Redundant (but a defined value).
1307 |.endif
1308 | load_got lj_strfmt_number
1309 | move CARG1, L
1310 | call_intern lj_strfmt_number // (lua_State *L, cTValue *o)
1311 |. move CARG2, BASE
1312 | // Returns GCstr *.
1313 | li AT, LJ_TSTR
1314 | settp CRET1, AT
1315 | b ->fff_restv
1316 |. move CARG1, CRET1
1317 |
1318 |//-- Base library: iterators -------------------------------------------
1319 |
1320 |.ffunc_1 next
1321 | checktp CARG2, CARG1, -LJ_TTAB, ->fff_fallback
1322 | daddu TMP2, BASE, NARGS8:RC
1323 | sd TISNIL, 0(TMP2) // Set missing 2nd arg to nil.
1324 | ld PC, FRAME_PC(BASE)
1325 | load_got lj_tab_next
1326 | sd BASE, L->base // Add frame since C call can throw.
1327 | sd BASE, L->top // Dummy frame length is ok.
1328 | daddiu CARG3, BASE, 8
1329 | sd PC, SAVE_PC
1330 | call_intern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
1331 |. move CARG1, L
1332 | // Returns 0 at end of traversal.
1333 | beqz CRET1, ->fff_restv // End of traversal: return nil.
1334 |. move CARG1, TISNIL
1335 | ld TMP0, 8(BASE)
1336 | daddiu RA, BASE, -16
1337 | ld TMP2, 16(BASE)
1338 | sd TMP0, 0(RA)
1339 | sd TMP2, 8(RA)
1340 | b ->fff_res
1341 |. li RD, (2+1)*8
1342 |
1343 |.ffunc_1 pairs
1344 | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback
1345 | ld PC, FRAME_PC(BASE)
1346#if LJ_52
1347 | ld TAB:TMP2, TAB:TMP1->metatable
1348 | ld TMP0, CFUNC:RB->upvalue[0]
1349 | bnez TAB:TMP2, ->fff_fallback
1350#else
1351 | ld TMP0, CFUNC:RB->upvalue[0]
1352#endif
1353 |. daddiu RA, BASE, -16
1354 | sd TISNIL, 0(BASE)
1355 | sd CARG1, -8(BASE)
1356 | sd TMP0, 0(RA)
1357 | b ->fff_res
1358 |. li RD, (3+1)*8
1359 |
1360 |.ffunc_2 ipairs_aux
1361 | checktab CARG1, ->fff_fallback
1362 | checkint CARG2, ->fff_fallback
1363 |. lw TMP0, TAB:CARG1->asize
1364 | ld TMP1, TAB:CARG1->array
1365 | ld PC, FRAME_PC(BASE)
1366 | sextw TMP2, CARG2
1367 | addiu TMP2, TMP2, 1
1368 | sltu AT, TMP2, TMP0
1369 | daddiu RA, BASE, -16
1370 | zextw TMP0, TMP2
1371 | settp TMP0, TISNUM
1372 | beqz AT, >2 // Not in array part?
1373 |. sd TMP0, 0(RA)
1374 | dsll TMP3, TMP2, 3
1375 | daddu TMP3, TMP1, TMP3
1376 | ld TMP1, 0(TMP3)
1377 |1:
1378 | beq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results.
1379 |. li RD, (0+1)*8
1380 | sd TMP1, -8(BASE)
1381 | b ->fff_res
1382 |. li RD, (2+1)*8
1383 |2: // Check for empty hash part first. Otherwise call C function.
1384 | lw TMP0, TAB:CARG1->hmask
1385 | load_got lj_tab_getinth
1386 | beqz TMP0, ->fff_res
1387 |. li RD, (0+1)*8
1388 | call_intern lj_tab_getinth // (GCtab *t, int32_t key)
1389 |. move CARG2, TMP2
1390 | // Returns cTValue * or NULL.
1391 | beqz CRET1, ->fff_res
1392 |. li RD, (0+1)*8
1393 | b <1
1394 |. ld TMP1, 0(CRET1)
1395 |
1396 |.ffunc_1 ipairs
1397 | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback
1398 | ld PC, FRAME_PC(BASE)
1399#if LJ_52
1400 | ld TAB:TMP2, TAB:TMP1->metatable
1401 | ld CFUNC:TMP0, CFUNC:RB->upvalue[0]
1402 | bnez TAB:TMP2, ->fff_fallback
1403#else
1404 | ld TMP0, CFUNC:RB->upvalue[0]
1405#endif
1406 | daddiu RA, BASE, -16
1407 | dsll AT, TISNUM, 47
1408 | sd CARG1, -8(BASE)
1409 | sd AT, 0(BASE)
1410 | sd CFUNC:TMP0, 0(RA)
1411 | b ->fff_res
1412 |. li RD, (3+1)*8
1413 |
1414 |//-- Base library: catch errors ----------------------------------------
1415 |
1416 |.ffunc pcall
1417 | daddiu NARGS8:RC, NARGS8:RC, -8
1418 | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH)
1419 | bltz NARGS8:RC, ->fff_fallback
1420 |. move TMP2, BASE
1421 | daddiu BASE, BASE, 16
1422 | // Remember active hook before pcall.
1423 | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT
1424 | andi TMP3, TMP3, 1
1425 | daddiu PC, TMP3, 16+FRAME_PCALL
1426 | beqz NARGS8:RC, ->vm_call_dispatch
1427 |1:
1428 |. daddu TMP0, BASE, NARGS8:RC
1429 |2:
1430 | ld TMP1, -16(TMP0)
1431 | sd TMP1, -8(TMP0)
1432 | daddiu TMP0, TMP0, -8
1433 | bne TMP0, BASE, <2
1434 |. nop
1435 | b ->vm_call_dispatch
1436 |. nop
1437 |
1438 |.ffunc xpcall
1439 | daddiu NARGS8:TMP0, NARGS8:RC, -16
1440 | ld CARG1, 0(BASE)
1441 | ld CARG2, 8(BASE)
1442 | bltz NARGS8:TMP0, ->fff_fallback
1443 |. lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH)
1444 | gettp AT, CARG2
1445 | daddiu AT, AT, -LJ_TFUNC
1446 | bnez AT, ->fff_fallback // Traceback must be a function.
1447 |. move TMP2, BASE
1448 | move NARGS8:RC, NARGS8:TMP0
1449 | daddiu BASE, BASE, 24
1450 | // Remember active hook before pcall.
1451 | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT
1452 | sd CARG2, 0(TMP2) // Swap function and traceback.
1453 | andi TMP3, TMP3, 1
1454 | sd CARG1, 8(TMP2)
1455 | beqz NARGS8:RC, ->vm_call_dispatch
1456 |. daddiu PC, TMP3, 24+FRAME_PCALL
1457 | b <1
1458 |. nop
1459 |
1460 |//-- Coroutine library --------------------------------------------------
1461 |
1462 |.macro coroutine_resume_wrap, resume
1463 |.if resume
1464 |.ffunc_1 coroutine_resume
1465 | checktp CARG1, CARG1, -LJ_TTHREAD, ->fff_fallback
1466 |.else
1467 |.ffunc coroutine_wrap_aux
1468 | ld L:CARG1, CFUNC:RB->upvalue[0].gcr
1469 | cleartp L:CARG1
1470 |.endif
1471 | lbu TMP0, L:CARG1->status
1472 | ld TMP1, L:CARG1->cframe
1473 | ld CARG2, L:CARG1->top
1474 | ld TMP2, L:CARG1->base
1475 | addiu AT, TMP0, -LUA_YIELD
1476 | daddu CARG3, CARG2, TMP0
1477 | daddiu TMP3, CARG2, 8
1478 |.if MIPSR6
1479 | seleqz CARG2, CARG2, AT
1480 | selnez TMP3, TMP3, AT
1481 | bgtz AT, ->fff_fallback // st > LUA_YIELD?
1482 |. or CARG2, TMP3, CARG2
1483 |.else
1484 | bgtz AT, ->fff_fallback // st > LUA_YIELD?
1485 |. movn CARG2, TMP3, AT
1486 |.endif
1487 | xor TMP2, TMP2, CARG3
1488 | bnez TMP1, ->fff_fallback // cframe != 0?
1489 |. or AT, TMP2, TMP0
1490 | ld TMP0, L:CARG1->maxstack
1491 | beqz AT, ->fff_fallback // base == top && st == 0?
1492 |. ld PC, FRAME_PC(BASE)
1493 | daddu TMP2, CARG2, NARGS8:RC
1494 | sltu AT, TMP0, TMP2
1495 | bnez AT, ->fff_fallback // Stack overflow?
1496 |. sd PC, SAVE_PC
1497 | sd BASE, L->base
1498 |1:
1499 |.if resume
1500 | daddiu BASE, BASE, 8 // Keep resumed thread in stack for GC.
1501 | daddiu NARGS8:RC, NARGS8:RC, -8
1502 | daddiu TMP2, TMP2, -8
1503 |.endif
1504 | sd TMP2, L:CARG1->top
1505 | daddu TMP1, BASE, NARGS8:RC
1506 | move CARG3, CARG2
1507 | sd BASE, L->top
1508 |2: // Move args to coroutine.
1509 | ld CRET1, 0(BASE)
1510 | sltu AT, BASE, TMP1
1511 | beqz AT, >3
1512 |. daddiu BASE, BASE, 8
1513 | sd CRET1, 0(CARG3)
1514 | b <2
1515 |. daddiu CARG3, CARG3, 8
1516 |3:
1517 | bal ->vm_resume // (lua_State *L, TValue *base, 0, 0)
1518 |. move L:RA, L:CARG1
1519 | // Returns thread status.
1520 |4:
1521 | ld TMP2, L:RA->base
1522 | sltiu AT, CRET1, LUA_YIELD+1
1523 | ld TMP3, L:RA->top
1524 | li_vmstate INTERP
1525 | ld BASE, L->base
1526 | sd L, DISPATCH_GL(cur_L)(DISPATCH)
1527 | st_vmstate
1528 | beqz AT, >8
1529 |. dsubu RD, TMP3, TMP2
1530 | ld TMP0, L->maxstack
1531 | beqz RD, >6 // No results?
1532 |. daddu TMP1, BASE, RD
1533 | sltu AT, TMP0, TMP1
1534 | bnez AT, >9 // Need to grow stack?
1535 |. daddu TMP3, TMP2, RD
1536 | sd TMP2, L:RA->top // Clear coroutine stack.
1537 | move TMP1, BASE
1538 |5: // Move results from coroutine.
1539 | ld CRET1, 0(TMP2)
1540 | daddiu TMP2, TMP2, 8
1541 | sltu AT, TMP2, TMP3
1542 | sd CRET1, 0(TMP1)
1543 | bnez AT, <5
1544 |. daddiu TMP1, TMP1, 8
1545 |6:
1546 | andi TMP0, PC, FRAME_TYPE
1547 |.if resume
1548 | mov_true TMP1
1549 | daddiu RA, BASE, -8
1550 | sd TMP1, -8(BASE) // Prepend true to results.
1551 | daddiu RD, RD, 16
1552 |.else
1553 | move RA, BASE
1554 | daddiu RD, RD, 8
1555 |.endif
1556 |7:
1557 | sd PC, SAVE_PC
1558 | beqz TMP0, ->BC_RET_Z
1559 |. move MULTRES, RD
1560 | b ->vm_return
1561 |. nop
1562 |
1563 |8: // Coroutine returned with error (at co->top-1).
1564 |.if resume
1565 | daddiu TMP3, TMP3, -8
1566 | mov_false TMP1
1567 | ld CRET1, 0(TMP3)
1568 | sd TMP3, L:RA->top // Remove error from coroutine stack.
1569 | li RD, (2+1)*8
1570 | sd TMP1, -8(BASE) // Prepend false to results.
1571 | daddiu RA, BASE, -8
1572 | sd CRET1, 0(BASE) // Copy error message.
1573 | b <7
1574 |. andi TMP0, PC, FRAME_TYPE
1575 |.else
1576 | load_got lj_ffh_coroutine_wrap_err
1577 | move CARG2, L:RA
1578 | call_intern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co)
1579 |. move CARG1, L
1580 |.endif
1581 |
1582 |9: // Handle stack expansion on return from yield.
1583 | load_got lj_state_growstack
1584 | srl CARG2, RD, 3
1585 | call_intern lj_state_growstack // (lua_State *L, int n)
1586 |. move CARG1, L
1587 | b <4
1588 |. li CRET1, 0
1589 |.endmacro
1590 |
1591 | coroutine_resume_wrap 1 // coroutine.resume
1592 | coroutine_resume_wrap 0 // coroutine.wrap
1593 |
1594 |.ffunc coroutine_yield
1595 | ld TMP0, L->cframe
1596 | daddu TMP1, BASE, NARGS8:RC
1597 | sd BASE, L->base
1598 | andi TMP0, TMP0, CFRAME_RESUME
1599 | sd TMP1, L->top
1600 | beqz TMP0, ->fff_fallback
1601 |. li CRET1, LUA_YIELD
1602 | sd r0, L->cframe
1603 | b ->vm_leave_unw
1604 |. sb CRET1, L->status
1605 |
1606 |//-- Math library -------------------------------------------------------
1607 |
1608 |.ffunc_1 math_abs
1609 | gettp CARG2, CARG1
1610 | daddiu AT, CARG2, -LJ_TISNUM
1611 | bnez AT, >1
1612 |. sextw TMP1, CARG1
1613 | sra TMP0, TMP1, 31 // Extract sign.
1614 | xor TMP1, TMP1, TMP0
1615 | dsubu CARG1, TMP1, TMP0
1616 | dsll TMP3, CARG1, 32
1617 | bgez TMP3, ->fff_restv
1618 |. settp CARG1, TISNUM
1619 | li CARG1, 0x41e0 // 2^31 as a double.
1620 | b ->fff_restv
1621 |. dsll CARG1, CARG1, 48
1622 |1:
1623 | sltiu AT, CARG2, LJ_TISNUM
1624 | beqz AT, ->fff_fallback
1625 |. dextm CARG1, CARG1, 0, 30
1626 |// fallthrough
1627 |
1628 |->fff_restv:
1629 | // CARG1 = TValue result.
1630 | ld PC, FRAME_PC(BASE)
1631 | daddiu RA, BASE, -16
1632 | sd CARG1, -16(BASE)
1633 |->fff_res1:
1634 | // RA = results, PC = return.
1635 | li RD, (1+1)*8
1636 |->fff_res:
1637 | // RA = results, RD = (nresults+1)*8, PC = return.
1638 | andi TMP0, PC, FRAME_TYPE
1639 | bnez TMP0, ->vm_return
1640 |. move MULTRES, RD
1641 | lw INS, -4(PC)
1642 | decode_RB8a RB, INS
1643 | decode_RB8b RB
1644 |5:
1645 | sltu AT, RD, RB
1646 | bnez AT, >6 // More results expected?
1647 |. decode_RA8a TMP0, INS
1648 | decode_RA8b TMP0
1649 | ins_next1
1650 | // Adjust BASE. KBASE is assumed to be set for the calling frame.
1651 | dsubu BASE, RA, TMP0
1652 | ins_next2
1653 |
1654 |6: // Fill up results with nil.
1655 | daddu TMP1, RA, RD
1656 | daddiu RD, RD, 8
1657 | b <5
1658 |. sd TISNIL, -8(TMP1)
1659 |
1660 |.macro math_extern, func
1661 | .ffunc_n math_ .. func
1662 | load_got func
1663 | call_extern
1664 |. nop
1665 | b ->fff_resn
1666 |. nop
1667 |.endmacro
1668 |
1669 |.macro math_extern2, func
1670 | .ffunc_nn math_ .. func
1671 |. load_got func
1672 | call_extern
1673 |. nop
1674 | b ->fff_resn
1675 |. nop
1676 |.endmacro
1677 |
1678 |// TODO: Return integer type if result is integer (own sf implementation).
1679 |.macro math_round, func
1680 |->ff_math_ .. func:
1681 | ld CARG1, 0(BASE)
1682 | beqz NARGS8:RC, ->fff_fallback
1683 |. gettp TMP0, CARG1
1684 | beq TMP0, TISNUM, ->fff_restv
1685 |. sltu AT, TMP0, TISNUM
1686 | beqz AT, ->fff_fallback
1687 |.if FPU
1688 |. ldc1 FARG1, 0(BASE)
1689 | bal ->vm_ .. func
1690 |. nop
1691 |.else
1692 |. load_got func
1693 | call_extern
1694 |. nop
1695 |.endif
1696 | b ->fff_resn
1697 |. nop
1698 |.endmacro
1699 |
1700 | math_round floor
1701 | math_round ceil
1702 |
1703 |.ffunc math_log
1704 | li AT, 8
1705 | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument.
1706 |. ld CARG1, 0(BASE)
1707 | checknum CARG1, ->fff_fallback
1708 |. load_got log
1709 |.if FPU
1710 | call_extern
1711 |. ldc1 FARG1, 0(BASE)
1712 |.else
1713 | call_extern
1714 |. nop
1715 |.endif
1716 | b ->fff_resn
1717 |. nop
1718 |
1719 | math_extern log10
1720 | math_extern exp
1721 | math_extern sin
1722 | math_extern cos
1723 | math_extern tan
1724 | math_extern asin
1725 | math_extern acos
1726 | math_extern atan
1727 | math_extern sinh
1728 | math_extern cosh
1729 | math_extern tanh
1730 | math_extern2 pow
1731 | math_extern2 atan2
1732 | math_extern2 fmod
1733 |
1734 |.if FPU
1735 |.ffunc_n math_sqrt
1736 |. sqrt.d FRET1, FARG1
1737 |// fallthrough to ->fff_resn
1738 |.else
1739 | math_extern sqrt
1740 |.endif
1741 |
1742 |->fff_resn:
1743 | ld PC, FRAME_PC(BASE)
1744 | daddiu RA, BASE, -16
1745 | b ->fff_res1
1746 |.if FPU
1747 |. sdc1 FRET1, 0(RA)
1748 |.else
1749 |. sd CRET1, 0(RA)
1750 |.endif
1751 |
1752 |
1753 |.ffunc_2 math_ldexp
1754 | checknum CARG1, ->fff_fallback
1755 | checkint CARG2, ->fff_fallback
1756 |. load_got ldexp
1757 | .FPU ldc1 FARG1, 0(BASE)
1758 | call_extern
1759 |. lw CARG2, 8+LO(BASE)
1760 | b ->fff_resn
1761 |. nop
1762 |
1763 |.ffunc_n math_frexp
1764 | load_got frexp
1765 | ld PC, FRAME_PC(BASE)
1766 | call_extern
1767 |. daddiu CARG2, DISPATCH, DISPATCH_GL(tmptv)
1768 | lw TMP1, DISPATCH_GL(tmptv)(DISPATCH)
1769 | daddiu RA, BASE, -16
1770 |.if FPU
1771 | mtc1 TMP1, FARG2
1772 | sdc1 FRET1, 0(RA)
1773 | cvt.d.w FARG2, FARG2
1774 | sdc1 FARG2, 8(RA)
1775 |.else
1776 | sd CRET1, 0(RA)
1777 | zextw TMP1, TMP1
1778 | settp TMP1, TISNUM
1779 | sd TMP1, 8(RA)
1780 |.endif
1781 | b ->fff_res
1782 |. li RD, (2+1)*8
1783 |
1784 |.ffunc_n math_modf
1785 | load_got modf
1786 | ld PC, FRAME_PC(BASE)
1787 | call_extern
1788 |. daddiu CARG2, BASE, -16
1789 | daddiu RA, BASE, -16
1790 |.if FPU
1791 | sdc1 FRET1, -8(BASE)
1792 |.else
1793 | sd CRET1, -8(BASE)
1794 |.endif
1795 | b ->fff_res
1796 |. li RD, (2+1)*8
1797 |
1798 |.macro math_minmax, name, intins, intinsc, fpins
1799 | .ffunc_1 name
1800 | daddu TMP3, BASE, NARGS8:RC
1801 | checkint CARG1, >5
1802 |. daddiu TMP2, BASE, 8
1803 |1: // Handle integers.
1804 | beq TMP2, TMP3, ->fff_restv
1805 |. ld CARG2, 0(TMP2)
1806 | checkint CARG2, >3
1807 |. sextw CARG1, CARG1
1808 | lw CARG2, LO(TMP2)
1809 |. slt AT, CARG1, CARG2
1810 |.if MIPSR6
1811 | intins TMP1, CARG2, AT
1812 | intinsc CARG1, CARG1, AT
1813 | or CARG1, CARG1, TMP1
1814 |.else
1815 | intins CARG1, CARG2, AT
1816 |.endif
1817 | daddiu TMP2, TMP2, 8
1818 | zextw CARG1, CARG1
1819 | b <1
1820 |. settp CARG1, TISNUM
1821 |
1822 |3: // Convert intermediate result to number and continue with number loop.
1823 | checknum CARG2, ->fff_fallback
1824 |.if FPU
1825 |. mtc1 CARG1, FRET1
1826 | cvt.d.w FRET1, FRET1
1827 | b >7
1828 |. ldc1 FARG1, 0(TMP2)
1829 |.else
1830 |. nop
1831 | bal ->vm_sfi2d_1
1832 |. nop
1833 | b >7
1834 |. nop
1835 |.endif
1836 |
1837 |5:
1838 | .FPU ldc1 FRET1, 0(BASE)
1839 | checknum CARG1, ->fff_fallback
1840 |6: // Handle numbers.
1841 |. ld CARG2, 0(TMP2)
1842 | beq TMP2, TMP3, ->fff_resn
1843 |.if FPU
1844 | ldc1 FARG1, 0(TMP2)
1845 |.else
1846 | move CRET1, CARG1
1847 |.endif
1848 | checknum CARG2, >8
1849 |. nop
1850 |7:
1851 |.if FPU
1852 |.if MIPSR6
1853 | fpins FRET1, FRET1, FARG1
1854 |.else
1855 | c.olt.d FRET1, FARG1
1856 | fpins FRET1, FARG1
1857 |.endif
1858 |.else
1859 | bal ->vm_sfcmpolt
1860 |. nop
1861 |.if MIPSR6
1862 | intins AT, CARG2, CRET1
1863 | intinsc CARG1, CARG1, CRET1
1864 | or CARG1, CARG1, AT
1865 |.else
1866 | intins CARG1, CARG2, CRET1
1867 |.endif
1868 |.endif
1869 | b <6
1870 |. daddiu TMP2, TMP2, 8
1871 |
1872 |8: // Convert integer to number and continue with number loop.
1873 | checkint CARG2, ->fff_fallback
1874 |.if FPU
1875 |. lwc1 FARG1, LO(TMP2)
1876 | b <7
1877 |. cvt.d.w FARG1, FARG1
1878 |.else
1879 |. lw CARG2, LO(TMP2)
1880 | bal ->vm_sfi2d_2
1881 |. nop
1882 | b <7
1883 |. nop
1884 |.endif
1885 |
1886 |.endmacro
1887 |
1888 |.if MIPSR6
1889 | math_minmax math_min, seleqz, selnez, min.d
1890 | math_minmax math_max, selnez, seleqz, max.d
1891 |.else
1892 | math_minmax math_min, movz, _, movf.d
1893 | math_minmax math_max, movn, _, movt.d
1894 |.endif
1895 |
1896 |//-- String library -----------------------------------------------------
1897 |
1898 |.ffunc string_byte // Only handle the 1-arg case here.
1899 | ld CARG1, 0(BASE)
1900 | gettp TMP0, CARG1
1901 | xori AT, NARGS8:RC, 8
1902 | daddiu TMP0, TMP0, -LJ_TSTR
1903 | or AT, AT, TMP0
1904 | bnez AT, ->fff_fallback // Need exactly 1 string argument.
1905 |. cleartp STR:CARG1
1906 | lw TMP0, STR:CARG1->len
1907 | daddiu RA, BASE, -16
1908 | ld PC, FRAME_PC(BASE)
1909 | sltu RD, r0, TMP0
1910 | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end).
1911 | addiu RD, RD, 1
1912 | sll RD, RD, 3 // RD = ((str->len != 0)+1)*8
1913 | settp TMP1, TISNUM
1914 | b ->fff_res
1915 |. sd TMP1, 0(RA)
1916 |
1917 |.ffunc string_char // Only handle the 1-arg case here.
1918 | ffgccheck
1919 |.if not MIPSR6
1920 |. nop
1921 |.endif
1922 | ld CARG1, 0(BASE)
1923 | gettp TMP0, CARG1
1924 | xori AT, NARGS8:RC, 8 // Exactly 1 argument.
1925 | daddiu TMP0, TMP0, -LJ_TISNUM // Integer.
1926 | li TMP1, 255
1927 | sextw CARG1, CARG1
1928 | or AT, AT, TMP0
1929 | sltu TMP1, TMP1, CARG1 // !(255 < n).
1930 | or AT, AT, TMP1
1931 | bnez AT, ->fff_fallback
1932 |. li CARG3, 1
1933 | daddiu CARG2, sp, TMPD_OFS
1934 | sb CARG1, TMPD
1935 |->fff_newstr:
1936 | load_got lj_str_new
1937 | sd BASE, L->base
1938 | sd PC, SAVE_PC
1939 | call_intern lj_str_new // (lua_State *L, char *str, size_t l)
1940 |. move CARG1, L
1941 | // Returns GCstr *.
1942 | ld BASE, L->base
1943 |->fff_resstr:
1944 | li AT, LJ_TSTR
1945 | settp CRET1, AT
1946 | b ->fff_restv
1947 |. move CARG1, CRET1
1948 |
1949 |.ffunc string_sub
1950 | ffgccheck
1951 |.if not MIPSR6
1952 |. nop
1953 |.endif
1954 | addiu AT, NARGS8:RC, -16
1955 | ld TMP0, 0(BASE)
1956 | bltz AT, ->fff_fallback
1957 |. gettp TMP3, TMP0
1958 | cleartp STR:CARG1, TMP0
1959 | ld CARG2, 8(BASE)
1960 | beqz AT, >1
1961 |. li CARG4, -1
1962 | ld CARG3, 16(BASE)
1963 | checkint CARG3, ->fff_fallback
1964 |. sextw CARG4, CARG3
1965 |1:
1966 | checkint CARG2, ->fff_fallback
1967 |. li AT, LJ_TSTR
1968 | bne TMP3, AT, ->fff_fallback
1969 |. sextw CARG3, CARG2
1970 | lw CARG2, STR:CARG1->len
1971 | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end
1972 | slt AT, CARG4, r0
1973 | addiu TMP0, CARG2, 1
1974 | addu TMP1, CARG4, TMP0
1975 | slt TMP3, CARG3, r0
1976 |.if MIPSR6
1977 | seleqz CARG4, CARG4, AT
1978 | selnez TMP1, TMP1, AT
1979 | or CARG4, TMP1, CARG4 // if (end < 0) end += len+1
1980 |.else
1981 | movn CARG4, TMP1, AT // if (end < 0) end += len+1
1982 |.endif
1983 | addu TMP1, CARG3, TMP0
1984 |.if MIPSR6
1985 | selnez TMP1, TMP1, TMP3
1986 | seleqz CARG3, CARG3, TMP3
1987 | or CARG3, TMP1, CARG3 // if (start < 0) start += len+1
1988 | li TMP2, 1
1989 | slt AT, CARG4, r0
1990 | slt TMP3, r0, CARG3
1991 | seleqz CARG4, CARG4, AT // if (end < 0) end = 0
1992 | selnez CARG3, CARG3, TMP3
1993 | seleqz TMP2, TMP2, TMP3
1994 | or CARG3, TMP2, CARG3 // if (start < 1) start = 1
1995 | slt AT, CARG2, CARG4
1996 | seleqz CARG4, CARG4, AT
1997 | selnez CARG2, CARG2, AT
1998 | or CARG4, CARG2, CARG4 // if (end > len) end = len
1999 |.else
2000 | movn CARG3, TMP1, TMP3 // if (start < 0) start += len+1
2001 | li TMP2, 1
2002 | slt AT, CARG4, r0
2003 | slt TMP3, r0, CARG3
2004 | movn CARG4, r0, AT // if (end < 0) end = 0
2005 | movz CARG3, TMP2, TMP3 // if (start < 1) start = 1
2006 | slt AT, CARG2, CARG4
2007 | movn CARG4, CARG2, AT // if (end > len) end = len
2008 |.endif
2009 | daddu CARG2, STR:CARG1, CARG3
2010 | subu CARG3, CARG4, CARG3 // len = end - start
2011 | daddiu CARG2, CARG2, sizeof(GCstr)-1
2012 | bgez CARG3, ->fff_newstr
2013 |. addiu CARG3, CARG3, 1 // len++
2014 |->fff_emptystr: // Return empty string.
2015 | li AT, LJ_TSTR
2016 | daddiu STR:CARG1, DISPATCH, DISPATCH_GL(strempty)
2017 | b ->fff_restv
2018 |. settp CARG1, AT
2019 |
2020 |.macro ffstring_op, name
2021 | .ffunc string_ .. name
2022 | ffgccheck
2023 |. nop
2024 | beqz NARGS8:RC, ->fff_fallback
2025 |. ld CARG2, 0(BASE)
2026 | checkstr STR:CARG2, ->fff_fallback
2027 | daddiu SBUF:CARG1, DISPATCH, DISPATCH_GL(tmpbuf)
2028 | load_got lj_buf_putstr_ .. name
2029 | ld TMP0, SBUF:CARG1->b
2030 | sd L, SBUF:CARG1->L
2031 | sd BASE, L->base
2032 | sd TMP0, SBUF:CARG1->p
2033 | call_intern extern lj_buf_putstr_ .. name
2034 |. sd PC, SAVE_PC
2035 | load_got lj_buf_tostr
2036 | call_intern lj_buf_tostr
2037 |. move SBUF:CARG1, SBUF:CRET1
2038 | b ->fff_resstr
2039 |. ld BASE, L->base
2040 |.endmacro
2041 |
2042 |ffstring_op reverse
2043 |ffstring_op lower
2044 |ffstring_op upper
2045 |
2046 |//-- Bit library --------------------------------------------------------
2047 |
2048 |->vm_tobit_fb:
2049 | beqz TMP1, ->fff_fallback
2050 |.if FPU
2051 |. ldc1 FARG1, 0(BASE)
2052 | add.d FARG1, FARG1, TOBIT
2053 | mfc1 CRET1, FARG1
2054 | jr ra
2055 |. zextw CRET1, CRET1
2056 |.else
2057 |// FP number to bit conversion for soft-float.
2058 |->vm_tobit:
2059 | dsll TMP0, CARG1, 1
2060 | li CARG3, 1076
2061 | dsrl AT, TMP0, 53
2062 | dsubu CARG3, CARG3, AT
2063 | sltiu AT, CARG3, 54
2064 | beqz AT, >1
2065 |. dextm TMP0, TMP0, 0, 20
2066 | dinsu TMP0, AT, 21, 21
2067 | slt AT, CARG1, r0
2068 | dsrlv CRET1, TMP0, CARG3
2069 | dsubu TMP0, r0, CRET1
2070 |.if MIPSR6
2071 | selnez TMP0, TMP0, AT
2072 | seleqz CRET1, CRET1, AT
2073 | or CRET1, CRET1, TMP0
2074 |.else
2075 | movn CRET1, TMP0, AT
2076 |.endif
2077 | jr ra
2078 |. zextw CRET1, CRET1
2079 |1:
2080 | jr ra
2081 |. move CRET1, r0
2082 |
2083 |// FP number to int conversion with a check for soft-float.
2084 |// Modifies CARG1, CRET1, CRET2, TMP0, AT.
2085 |->vm_tointg:
2086 |.if JIT
2087 | dsll CRET2, CARG1, 1
2088 | beqz CRET2, >2
2089 |. li TMP0, 1076
2090 | dsrl AT, CRET2, 53
2091 | dsubu TMP0, TMP0, AT
2092 | sltiu AT, TMP0, 54
2093 | beqz AT, >1
2094 |. dextm CRET2, CRET2, 0, 20
2095 | dinsu CRET2, AT, 21, 21
2096 | slt AT, CARG1, r0
2097 | dsrlv CRET1, CRET2, TMP0
2098 | dsubu CARG1, r0, CRET1
2099 |.if MIPSR6
2100 | seleqz CRET1, CRET1, AT
2101 | selnez CARG1, CARG1, AT
2102 | or CRET1, CRET1, CARG1
2103 |.else
2104 | movn CRET1, CARG1, AT
2105 |.endif
2106 | li CARG1, 64
2107 | subu TMP0, CARG1, TMP0
2108 | dsllv CRET2, CRET2, TMP0 // Integer check.
2109 | sextw AT, CRET1
2110 | xor AT, CRET1, AT // Range check.
2111 | jr ra
2112 |.if MIPSR6
2113 | seleqz AT, AT, CRET2
2114 | selnez CRET2, CRET2, CRET2
2115 | jr ra
2116 |. or CRET2, AT, CRET2
2117 |.else
2118 | jr ra
2119 |. movz CRET2, AT, CRET2
2120 |.endif
2121 |1:
2122 | jr ra
2123 |. li CRET2, 1
2124 |2:
2125 | jr ra
2126 |. move CRET1, r0
2127 |.endif
2128 |.endif
2129 |
2130 |.macro .ffunc_bit, name
2131 | .ffunc_1 bit_..name
2132 | gettp TMP0, CARG1
2133 | beq TMP0, TISNUM, >6
2134 |. zextw CRET1, CARG1
2135 | bal ->vm_tobit_fb
2136 |. sltiu TMP1, TMP0, LJ_TISNUM
2137 |6:
2138 |.endmacro
2139 |
2140 |.macro .ffunc_bit_op, name, bins
2141 | .ffunc_bit name
2142 | daddiu TMP2, BASE, 8
2143 | daddu TMP3, BASE, NARGS8:RC
2144 |1:
2145 | beq TMP2, TMP3, ->fff_resi
2146 |. ld CARG1, 0(TMP2)
2147 | gettp TMP0, CARG1
2148 |.if FPU
2149 | bne TMP0, TISNUM, >2
2150 |. daddiu TMP2, TMP2, 8
2151 | zextw CARG1, CARG1
2152 | b <1
2153 |. bins CRET1, CRET1, CARG1
2154 |2:
2155 | ldc1 FARG1, -8(TMP2)
2156 | sltiu AT, TMP0, LJ_TISNUM
2157 | beqz AT, ->fff_fallback
2158 |. add.d FARG1, FARG1, TOBIT
2159 | mfc1 CARG1, FARG1
2160 | zextw CARG1, CARG1
2161 | b <1
2162 |. bins CRET1, CRET1, CARG1
2163 |.else
2164 | beq TMP0, TISNUM, >2
2165 |. move CRET2, CRET1
2166 | bal ->vm_tobit_fb
2167 |. sltiu TMP1, TMP0, LJ_TISNUM
2168 | move CARG1, CRET2
2169 |2:
2170 | zextw CARG1, CARG1
2171 | bins CRET1, CRET1, CARG1
2172 | b <1
2173 |. daddiu TMP2, TMP2, 8
2174 |.endif
2175 |.endmacro
2176 |
2177 |.ffunc_bit_op band, and
2178 |.ffunc_bit_op bor, or
2179 |.ffunc_bit_op bxor, xor
2180 |
2181 |.ffunc_bit bswap
2182 | dsrl TMP0, CRET1, 8
2183 | dsrl TMP1, CRET1, 24
2184 | andi TMP2, TMP0, 0xff00
2185 | dins TMP1, CRET1, 24, 31
2186 | dins TMP2, TMP0, 16, 23
2187 | b ->fff_resi
2188 |. or CRET1, TMP1, TMP2
2189 |
2190 |.ffunc_bit bnot
2191 | not CRET1, CRET1
2192 | b ->fff_resi
2193 |. zextw CRET1, CRET1
2194 |
2195 |.macro .ffunc_bit_sh, name, shins, shmod
2196 | .ffunc_2 bit_..name
2197 | gettp TMP0, CARG1
2198 | beq TMP0, TISNUM, >1
2199 |. nop
2200 | bal ->vm_tobit_fb
2201 |. sltiu TMP1, TMP0, LJ_TISNUM
2202 | move CARG1, CRET1
2203 |1:
2204 | gettp TMP0, CARG2
2205 | bne TMP0, TISNUM, ->fff_fallback
2206 |. zextw CARG2, CARG2
2207 | sextw CARG1, CARG1
2208 |.if shmod == 1
2209 | negu CARG2, CARG2
2210 |.endif
2211 | shins CRET1, CARG1, CARG2
2212 | b ->fff_resi
2213 |. zextw CRET1, CRET1
2214 |.endmacro
2215 |
2216 |.ffunc_bit_sh lshift, sllv, 0
2217 |.ffunc_bit_sh rshift, srlv, 0
2218 |.ffunc_bit_sh arshift, srav, 0
2219 |.ffunc_bit_sh rol, rotrv, 1
2220 |.ffunc_bit_sh ror, rotrv, 0
2221 |
2222 |.ffunc_bit tobit
2223 |->fff_resi:
2224 | ld PC, FRAME_PC(BASE)
2225 | daddiu RA, BASE, -16
2226 | settp CRET1, TISNUM
2227 | b ->fff_res1
2228 |. sd CRET1, -16(BASE)
2229 |
2230 |//-----------------------------------------------------------------------
2231 |->fff_fallback: // Call fast function fallback handler.
2232 | // BASE = new base, RB = CFUNC, RC = nargs*8
2233 | ld TMP3, CFUNC:RB->f
2234 | daddu TMP1, BASE, NARGS8:RC
2235 | ld PC, FRAME_PC(BASE) // Fallback may overwrite PC.
2236 | daddiu TMP0, TMP1, 8*LUA_MINSTACK
2237 | ld TMP2, L->maxstack
2238 | sd PC, SAVE_PC // Redundant (but a defined value).
2239 | sltu AT, TMP2, TMP0
2240 | sd BASE, L->base
2241 | sd TMP1, L->top
2242 | bnez AT, >5 // Need to grow stack.
2243 |. move CFUNCADDR, TMP3
2244 | jalr TMP3 // (lua_State *L)
2245 |. move CARG1, L
2246 | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
2247 | ld BASE, L->base
2248 | sll RD, CRET1, 3
2249 | bgtz CRET1, ->fff_res // Returned nresults+1?
2250 |. daddiu RA, BASE, -16
2251 |1: // Returned 0 or -1: retry fast path.
2252 | ld LFUNC:RB, FRAME_FUNC(BASE)
2253 | ld TMP0, L->top
2254 | cleartp LFUNC:RB
2255 | bnez CRET1, ->vm_call_tail // Returned -1?
2256 |. dsubu NARGS8:RC, TMP0, BASE
2257 | ins_callt // Returned 0: retry fast path.
2258 |
2259 |// Reconstruct previous base for vmeta_call during tailcall.
2260 |->vm_call_tail:
2261 | andi TMP0, PC, FRAME_TYPE
2262 | li AT, -4
2263 | bnez TMP0, >3
2264 |. and TMP1, PC, AT
2265 | lbu TMP1, OFS_RA(PC)
2266 | sll TMP1, TMP1, 3
2267 | addiu TMP1, TMP1, 16
2268 |3:
2269 | b ->vm_call_dispatch // Resolve again for tailcall.
2270 |. dsubu TMP2, BASE, TMP1
2271 |
2272 |5: // Grow stack for fallback handler.
2273 | load_got lj_state_growstack
2274 | li CARG2, LUA_MINSTACK
2275 | call_intern lj_state_growstack // (lua_State *L, int n)
2276 |. move CARG1, L
2277 | ld BASE, L->base
2278 | b <1
2279 |. li CRET1, 0 // Force retry.
2280 |
2281 |->fff_gcstep: // Call GC step function.
2282 | // BASE = new base, RC = nargs*8
2283 | move MULTRES, ra
2284 | load_got lj_gc_step
2285 | sd BASE, L->base
2286 | daddu TMP0, BASE, NARGS8:RC
2287 | sd PC, SAVE_PC // Redundant (but a defined value).
2288 | sd TMP0, L->top
2289 | call_intern lj_gc_step // (lua_State *L)
2290 |. move CARG1, L
2291 | ld BASE, L->base
2292 | move ra, MULTRES
2293 | ld TMP0, L->top
2294 | ld CFUNC:RB, FRAME_FUNC(BASE)
2295 | cleartp CFUNC:RB
2296 | jr ra
2297 |. dsubu NARGS8:RC, TMP0, BASE
2298 |
2299 |//-----------------------------------------------------------------------
2300 |//-- Special dispatch targets -------------------------------------------
2301 |//-----------------------------------------------------------------------
2302 |
2303 |->vm_record: // Dispatch target for recording phase.
2304 |.if JIT
2305 | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH)
2306 | andi AT, TMP3, HOOK_VMEVENT // No recording while in vmevent.
2307 | bnez AT, >5
2308 | // Decrement the hookcount for consistency, but always do the call.
2309 |. lw TMP2, DISPATCH_GL(hookcount)(DISPATCH)
2310 | andi AT, TMP3, HOOK_ACTIVE
2311 | bnez AT, >1
2312 |. addiu TMP2, TMP2, -1
2313 | andi AT, TMP3, LUA_MASKLINE|LUA_MASKCOUNT
2314 | beqz AT, >1
2315 |. nop
2316 | b >1
2317 |. sw TMP2, DISPATCH_GL(hookcount)(DISPATCH)
2318 |.endif
2319 |
2320 |->vm_rethook: // Dispatch target for return hooks.
2321 | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH)
2322 | andi AT, TMP3, HOOK_ACTIVE // Hook already active?
2323 | beqz AT, >1
2324 |5: // Re-dispatch to static ins.
2325 |. ld AT, GG_DISP2STATIC(TMP0) // Assumes TMP0 holds DISPATCH+OP*4.
2326 | jr AT
2327 |. nop
2328 |
2329 |->vm_inshook: // Dispatch target for instr/line hooks.
2330 | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH)
2331 | lw TMP2, DISPATCH_GL(hookcount)(DISPATCH)
2332 | andi AT, TMP3, HOOK_ACTIVE // Hook already active?
2333 | bnez AT, <5
2334 |. andi AT, TMP3, LUA_MASKLINE|LUA_MASKCOUNT
2335 | beqz AT, <5
2336 |. addiu TMP2, TMP2, -1
2337 | beqz TMP2, >1
2338 |. sw TMP2, DISPATCH_GL(hookcount)(DISPATCH)
2339 | andi AT, TMP3, LUA_MASKLINE
2340 | beqz AT, <5
2341 |1:
2342 |. load_got lj_dispatch_ins
2343 | sw MULTRES, SAVE_MULTRES
2344 | move CARG2, PC
2345 | sd BASE, L->base
2346 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
2347 | call_intern lj_dispatch_ins // (lua_State *L, const BCIns *pc)
2348 |. move CARG1, L
2349 |3:
2350 | ld BASE, L->base
2351 |4: // Re-dispatch to static ins.
2352 | lw INS, -4(PC)
2353 | decode_OP8a TMP1, INS
2354 | decode_OP8b TMP1
2355 | daddu TMP0, DISPATCH, TMP1
2356 | decode_RD8a RD, INS
2357 | ld AT, GG_DISP2STATIC(TMP0)
2358 | decode_RA8a RA, INS
2359 | decode_RD8b RD
2360 | jr AT
2361 | decode_RA8b RA
2362 |
2363 |->cont_hook: // Continue from hook yield.
2364 | daddiu PC, PC, 4
2365 | b <4
2366 |. lw MULTRES, -24+LO(RB) // Restore MULTRES for *M ins.
2367 |
2368 |->vm_hotloop: // Hot loop counter underflow.
2369 |.if JIT
2370 | ld LFUNC:TMP1, FRAME_FUNC(BASE)
2371 | daddiu CARG1, DISPATCH, GG_DISP2J
2372 | cleartp LFUNC:TMP1
2373 | sd PC, SAVE_PC
2374 | ld TMP1, LFUNC:TMP1->pc
2375 | move CARG2, PC
2376 | sd L, DISPATCH_J(L)(DISPATCH)
2377 | lbu TMP1, PC2PROTO(framesize)(TMP1)
2378 | load_got lj_trace_hot
2379 | sd BASE, L->base
2380 | dsll TMP1, TMP1, 3
2381 | daddu TMP1, BASE, TMP1
2382 | call_intern lj_trace_hot // (jit_State *J, const BCIns *pc)
2383 |. sd TMP1, L->top
2384 | b <3
2385 |. nop
2386 |.endif
2387 |
2388 |
2389 |->vm_callhook: // Dispatch target for call hooks.
2390 |.if JIT
2391 | b >1
2392 |.endif
2393 |. move CARG2, PC
2394 |
2395 |->vm_hotcall: // Hot call counter underflow.
2396 |.if JIT
2397 | ori CARG2, PC, 1
2398 |1:
2399 |.endif
2400 | load_got lj_dispatch_call
2401 | daddu TMP0, BASE, RC
2402 | sd PC, SAVE_PC
2403 | sd BASE, L->base
2404 | dsubu RA, RA, BASE
2405 | sd TMP0, L->top
2406 | call_intern lj_dispatch_call // (lua_State *L, const BCIns *pc)
2407 |. move CARG1, L
2408 | // Returns ASMFunction.
2409 | ld BASE, L->base
2410 | ld TMP0, L->top
2411 | sd r0, SAVE_PC // Invalidate for subsequent line hook.
2412 | dsubu NARGS8:RC, TMP0, BASE
2413 | daddu RA, BASE, RA
2414 | ld LFUNC:RB, FRAME_FUNC(BASE)
2415 | cleartp LFUNC:RB
2416 | jr CRET1
2417 |. lw INS, -4(PC)
2418 |
2419 |->cont_stitch: // Trace stitching.
2420 |.if JIT
2421 | // RA = resultptr, RB = meta base
2422 | lw INS, -4(PC)
2423 | ld TRACE:TMP2, -40(RB) // Save previous trace.
2424 | decode_RA8a RC, INS
2425 | daddiu AT, MULTRES, -8
2426 | cleartp TRACE:TMP2
2427 | decode_RA8b RC
2428 | beqz AT, >2
2429 |. daddu RC, BASE, RC // Call base.
2430 |1: // Move results down.
2431 | ld CARG1, 0(RA)
2432 | daddiu AT, AT, -8
2433 | daddiu RA, RA, 8
2434 | sd CARG1, 0(RC)
2435 | bnez AT, <1
2436 |. daddiu RC, RC, 8
2437 |2:
2438 | decode_RA8a RA, INS
2439 | decode_RB8a RB, INS
2440 | decode_RA8b RA
2441 | decode_RB8b RB
2442 | daddu RA, RA, RB
2443 | daddu RA, BASE, RA
2444 |3:
2445 | sltu AT, RC, RA
2446 | bnez AT, >9 // More results wanted?
2447 |. nop
2448 |
2449 | lhu TMP3, TRACE:TMP2->traceno
2450 | lhu RD, TRACE:TMP2->link
2451 | beq RD, TMP3, ->cont_nop // Blacklisted.
2452 |. load_got lj_dispatch_stitch
2453 | bnez RD, =>BC_JLOOP // Jump to stitched trace.
2454 |. sll RD, RD, 3
2455 |
2456 | // Stitch a new trace to the previous trace.
2457 | sw TMP3, DISPATCH_J(exitno)(DISPATCH)
2458 | sd L, DISPATCH_J(L)(DISPATCH)
2459 | sd BASE, L->base
2460 | daddiu CARG1, DISPATCH, GG_DISP2J
2461 | call_intern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
2462 |. move CARG2, PC
2463 | b ->cont_nop
2464 |. ld BASE, L->base
2465 |
2466 |9:
2467 | sd TISNIL, 0(RC)
2468 | b <3
2469 |. daddiu RC, RC, 8
2470 |.endif
2471 |
2472 |->vm_profhook: // Dispatch target for profiler hook.
2473#if LJ_HASPROFILE
2474 | load_got lj_dispatch_profile
2475 | sw MULTRES, SAVE_MULTRES
2476 | move CARG2, PC
2477 | sd BASE, L->base
2478 | call_intern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
2479 |. move CARG1, L
2480 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2481 | daddiu PC, PC, -4
2482 | b ->cont_nop
2483 |. ld BASE, L->base
2484#endif
2485 |
2486 |//-----------------------------------------------------------------------
2487 |//-- Trace exit handler -------------------------------------------------
2488 |//-----------------------------------------------------------------------
2489 |
2490 |.macro savex_, a, b
2491 |.if FPU
2492 | sdc1 f..a, a*8(sp)
2493 | sdc1 f..b, b*8(sp)
2494 | sd r..a, 32*8+a*8(sp)
2495 | sd r..b, 32*8+b*8(sp)
2496 |.else
2497 | sd r..a, a*8(sp)
2498 | sd r..b, b*8(sp)
2499 |.endif
2500 |.endmacro
2501 |
2502 |->vm_exit_handler:
2503 |.if JIT
2504 |.if FPU
2505 | daddiu sp, sp, -(32*8+32*8)
2506 |.else
2507 | daddiu sp, sp, -(32*8)
2508 |.endif
2509 | savex_ 0, 1
2510 | savex_ 2, 3
2511 | savex_ 4, 5
2512 | savex_ 6, 7
2513 | savex_ 8, 9
2514 | savex_ 10, 11
2515 | savex_ 12, 13
2516 | savex_ 14, 15
2517 | savex_ 16, 17
2518 | savex_ 18, 19
2519 | savex_ 20, 21
2520 | savex_ 22, 23
2521 | savex_ 24, 25
2522 | savex_ 26, 27
2523 | savex_ 28, 30
2524 |.if FPU
2525 | sdc1 f29, 29*8(sp)
2526 | sdc1 f31, 31*8(sp)
2527 | sd r0, 32*8+31*8(sp) // Clear RID_TMP.
2528 | daddiu TMP2, sp, 32*8+32*8 // Recompute original value of sp.
2529 | sd TMP2, 32*8+29*8(sp) // Store sp in RID_SP
2530 |.else
2531 | sd r0, 31*8(sp) // Clear RID_TMP.
2532 | daddiu TMP2, sp, 32*8 // Recompute original value of sp.
2533 | sd TMP2, 29*8(sp) // Store sp in RID_SP
2534 |.endif
2535 | li_vmstate EXIT
2536 | daddiu DISPATCH, JGL, -GG_DISP2G-32768
2537 | lw TMP1, 0(TMP2) // Load exit number.
2538 | st_vmstate
2539 | ld L, DISPATCH_GL(cur_L)(DISPATCH)
2540 | ld BASE, DISPATCH_GL(jit_base)(DISPATCH)
2541 | load_got lj_trace_exit
2542 | sd L, DISPATCH_J(L)(DISPATCH)
2543 | sw ra, DISPATCH_J(parent)(DISPATCH) // Store trace number.
2544 | sd BASE, L->base
2545 | sw TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number.
2546 | daddiu CARG1, DISPATCH, GG_DISP2J
2547 | sd r0, DISPATCH_GL(jit_base)(DISPATCH)
2548 | call_intern lj_trace_exit // (jit_State *J, ExitState *ex)
2549 |. move CARG2, sp
2550 | // Returns MULTRES (unscaled) or negated error code.
2551 | ld TMP1, L->cframe
2552 | li AT, -4
2553 | ld BASE, L->base
2554 | and sp, TMP1, AT
2555 | ld PC, SAVE_PC // Get SAVE_PC.
2556 | b >1
2557 |. sd L, SAVE_L // Set SAVE_L (on-trace resume/yield).
2558 |.endif
2559 |->vm_exit_interp:
2560 |.if JIT
2561 | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set.
2562 | ld L, SAVE_L
2563 | daddiu DISPATCH, JGL, -GG_DISP2G-32768
2564 | sd BASE, L->base
2565 |1:
2566 | bltz CRET1, >9 // Check for error from exit.
2567 |. ld LFUNC:RB, FRAME_FUNC(BASE)
2568 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2569 | dsll MULTRES, CRET1, 3
2570 | cleartp LFUNC:RB
2571 | sw MULTRES, SAVE_MULTRES
2572 | li TISNIL, LJ_TNIL
2573 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
2574 | .FPU mtc1 TMP3, TOBIT
2575 | ld TMP1, LFUNC:RB->pc
2576 | sd r0, DISPATCH_GL(jit_base)(DISPATCH)
2577 | ld KBASE, PC2PROTO(k)(TMP1)
2578 | .FPU cvt.d.s TOBIT, TOBIT
2579 | // Modified copy of ins_next which handles function header dispatch, too.
2580 | lw INS, 0(PC)
2581 | daddiu PC, PC, 4
2582 | // Assumes TISNIL == ~LJ_VMST_INTERP == -1
2583 | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH)
2584 | decode_OP8a TMP1, INS
2585 | decode_OP8b TMP1
2586 | sltiu TMP2, TMP1, BC_FUNCF*8
2587 | daddu TMP0, DISPATCH, TMP1
2588 | decode_RD8a RD, INS
2589 | ld AT, 0(TMP0)
2590 | decode_RA8a RA, INS
2591 | beqz TMP2, >2
2592 |. decode_RA8b RA
2593 | jr AT
2594 |. decode_RD8b RD
2595 |2:
2596 | sltiu TMP2, TMP1, (BC_FUNCC+2)*8 // Fast function?
2597 | bnez TMP2, >3
2598 |. ld TMP1, FRAME_PC(BASE)
2599 | // Check frame below fast function.
2600 | andi TMP0, TMP1, FRAME_TYPE
2601 | bnez TMP0, >3 // Trace stitching continuation?
2602 |. nop
2603 | // Otherwise set KBASE for Lua function below fast function.
2604 | lw TMP2, -4(TMP1)
2605 | decode_RA8a TMP0, TMP2
2606 | decode_RA8b TMP0
2607 | dsubu TMP1, BASE, TMP0
2608 | ld LFUNC:TMP2, -32(TMP1)
2609 | cleartp LFUNC:TMP2
2610 | ld TMP1, LFUNC:TMP2->pc
2611 | ld KBASE, PC2PROTO(k)(TMP1)
2612 |3:
2613 | daddiu RC, MULTRES, -8
2614 | jr AT
2615 |. daddu RA, RA, BASE
2616 |
2617 |9: // Rethrow error from the right C frame.
2618 | load_got lj_err_throw
2619 | negu CARG2, CRET1
2620 | call_intern lj_err_throw // (lua_State *L, int errcode)
2621 |. move CARG1, L
2622 |.endif
2623 |
2624 |//-----------------------------------------------------------------------
2625 |//-- Math helper functions ----------------------------------------------
2626 |//-----------------------------------------------------------------------
2627 |
2628 |// Hard-float round to integer.
2629 |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1.
2630 |// MIPSR6: Modifies FTMP1, too.
2631 |.macro vm_round_hf, func
2632 | lui TMP0, 0x4330 // Hiword of 2^52 (double).
2633 | dsll TMP0, TMP0, 32
2634 | dmtc1 TMP0, f4
2635 | abs.d FRET2, FARG1 // |x|
2636 | dmfc1 AT, FARG1
2637 |.if MIPSR6
2638 | cmp.lt.d FTMP1, FRET2, f4
2639 | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52
2640 | bc1eqz FTMP1, >1 // Truncate only if |x| < 2^52.
2641 |.else
2642 | c.olt.d 0, FRET2, f4
2643 | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52
2644 | bc1f 0, >1 // Truncate only if |x| < 2^52.
2645 |.endif
2646 |. sub.d FRET1, FRET1, f4
2647 | slt AT, AT, r0
2648 |.if "func" == "ceil"
2649 | lui TMP0, 0xbff0 // Hiword of -1 (double). Preserves -0.
2650 |.else
2651 | lui TMP0, 0x3ff0 // Hiword of +1 (double).
2652 |.endif
2653 |.if "func" == "trunc"
2654 | dsll TMP0, TMP0, 32
2655 | dmtc1 TMP0, f4
2656 |.if MIPSR6
2657 | cmp.lt.d FTMP1, FRET2, FRET1 // |x| < result?
2658 | sub.d FRET2, FRET1, f4
2659 | sel.d FTMP1, FRET1, FRET2 // If yes, subtract +1.
2660 | dmtc1 AT, FRET1
2661 | neg.d FRET2, FTMP1
2662 | jr ra
2663 |. sel.d FRET1, FTMP1, FRET2 // Merge sign bit back in.
2664 |.else
2665 | c.olt.d 0, FRET2, FRET1 // |x| < result?
2666 | sub.d FRET2, FRET1, f4
2667 | movt.d FRET1, FRET2, 0 // If yes, subtract +1.
2668 | neg.d FRET2, FRET1
2669 | jr ra
2670 |. movn.d FRET1, FRET2, AT // Merge sign bit back in.
2671 |.endif
2672 |.else
2673 | neg.d FRET2, FRET1
2674 | dsll TMP0, TMP0, 32
2675 | dmtc1 TMP0, f4
2676 |.if MIPSR6
2677 | dmtc1 AT, FTMP1
2678 | sel.d FTMP1, FRET1, FRET2
2679 |.if "func" == "ceil"
2680 | cmp.lt.d FRET1, FTMP1, FARG1 // x > result?
2681 |.else
2682 | cmp.lt.d FRET1, FARG1, FTMP1 // x < result?
2683 |.endif
2684 | sub.d FRET2, FTMP1, f4 // If yes, subtract +-1.
2685 | jr ra
2686 |. sel.d FRET1, FTMP1, FRET2
2687 |.else
2688 | movn.d FRET1, FRET2, AT // Merge sign bit back in.
2689 |.if "func" == "ceil"
2690 | c.olt.d 0, FRET1, FARG1 // x > result?
2691 |.else
2692 | c.olt.d 0, FARG1, FRET1 // x < result?
2693 |.endif
2694 | sub.d FRET2, FRET1, f4 // If yes, subtract +-1.
2695 | jr ra
2696 |. movt.d FRET1, FRET2, 0
2697 |.endif
2698 |.endif
2699 |1:
2700 | jr ra
2701 |. mov.d FRET1, FARG1
2702 |.endmacro
2703 |
2704 |.macro vm_round, func
2705 |.if FPU
2706 | vm_round_hf, func
2707 |.endif
2708 |.endmacro
2709 |
2710 |->vm_floor:
2711 | vm_round floor
2712 |->vm_ceil:
2713 | vm_round ceil
2714 |->vm_trunc:
2715 |.if JIT
2716 | vm_round trunc
2717 |.endif
2718 |
2719 |// Soft-float integer to number conversion.
2720 |.macro sfi2d, ARG
2721 |.if not FPU
2722 | beqz ARG, >9 // Handle zero first.
2723 |. sra TMP0, ARG, 31
2724 | xor TMP1, ARG, TMP0
2725 | dsubu TMP1, TMP1, TMP0 // Absolute value in TMP1.
2726 | dclz ARG, TMP1
2727 | addiu ARG, ARG, -11
2728 | li AT, 0x3ff+63-11-1
2729 | dsllv TMP1, TMP1, ARG // Align mantissa left with leading 1.
2730 | subu ARG, AT, ARG // Exponent - 1.
2731 | ins ARG, TMP0, 11, 11 // Sign | Exponent.
2732 | dsll ARG, ARG, 52 // Align left.
2733 | jr ra
2734 |. daddu ARG, ARG, TMP1 // Add mantissa, increment exponent.
2735 |9:
2736 | jr ra
2737 |. nop
2738 |.endif
2739 |.endmacro
2740 |
2741 |// Input CARG1. Output: CARG1. Temporaries: AT, TMP0, TMP1.
2742 |->vm_sfi2d_1:
2743 | sfi2d CARG1
2744 |
2745 |// Input CARG2. Output: CARG2. Temporaries: AT, TMP0, TMP1.
2746 |->vm_sfi2d_2:
2747 | sfi2d CARG2
2748 |
2749 |// Soft-float comparison. Equivalent to c.eq.d.
2750 |// Input: CARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1.
2751 |->vm_sfcmpeq:
2752 |.if not FPU
2753 | dsll AT, CARG1, 1
2754 | dsll TMP0, CARG2, 1
2755 | or TMP1, AT, TMP0
2756 | beqz TMP1, >8 // Both args +-0: return 1.
2757 |. lui TMP1, 0xffe0
2758 | dsll TMP1, TMP1, 32
2759 | sltu AT, TMP1, AT
2760 | sltu TMP0, TMP1, TMP0
2761 | or TMP1, AT, TMP0
2762 | bnez TMP1, >9 // Either arg is NaN: return 0;
2763 |. xor AT, CARG1, CARG2
2764 | jr ra
2765 |. sltiu CRET1, AT, 1 // Same values: return 1.
2766 |8:
2767 | jr ra
2768 |. li CRET1, 1
2769 |9:
2770 | jr ra
2771 |. li CRET1, 0
2772 |.endif
2773 |
2774 |// Soft-float comparison. Equivalent to c.ult.d and c.olt.d.
2775 |// Input: CARG1, CARG2. Output: CRET1. Temporaries: AT, TMP0, TMP1, CRET2.
2776 |->vm_sfcmpult:
2777 |.if not FPU
2778 | b >1
2779 |. li CRET2, 1
2780 |.endif
2781 |
2782 |->vm_sfcmpolt:
2783 |.if not FPU
2784 | li CRET2, 0
2785 |1:
2786 | dsll AT, CARG1, 1
2787 | dsll TMP0, CARG2, 1
2788 | or TMP1, AT, TMP0
2789 | beqz TMP1, >8 // Both args +-0: return 0.
2790 |. lui TMP1, 0xffe0
2791 | dsll TMP1, TMP1, 32
2792 | sltu AT, TMP1, AT
2793 | sltu TMP0, TMP1, TMP0
2794 | or TMP1, AT, TMP0
2795 | bnez TMP1, >9 // Either arg is NaN: return 0 or 1;
2796 |. and AT, CARG1, CARG2
2797 | bltz AT, >5 // Both args negative?
2798 |. nop
2799 | jr ra
2800 |. slt CRET1, CARG1, CARG2
2801 |5: // Swap conditions if both operands are negative.
2802 | jr ra
2803 |. slt CRET1, CARG2, CARG1
2804 |8:
2805 | jr ra
2806 |. li CRET1, 0
2807 |9:
2808 | jr ra
2809 |. move CRET1, CRET2
2810 |.endif
2811 |
2812 |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a.
2813 |// Input: CARG1, CARG2, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1.
2814 |->vm_sfcmpolex:
2815 |.if not FPU
2816 | dsll AT, CARG1, 1
2817 | dsll TMP0, CARG2, 1
2818 | or TMP1, AT, TMP0
2819 | beqz TMP1, >8 // Both args +-0: return 1.
2820 |. lui TMP1, 0xffe0
2821 | dsll TMP1, TMP1, 32
2822 | sltu AT, TMP1, AT
2823 | sltu TMP0, TMP1, TMP0
2824 | or TMP1, AT, TMP0
2825 | bnez TMP1, >9 // Either arg is NaN: return 0;
2826 |. and AT, CARG1, CARG2
2827 | xor AT, AT, TMP3
2828 | bltz AT, >5 // Both args negative?
2829 |. nop
2830 | jr ra
2831 |. slt CRET1, CARG2, CARG1
2832 |5: // Swap conditions if both operands are negative.
2833 | jr ra
2834 |. slt CRET1, CARG1, CARG2
2835 |8:
2836 | jr ra
2837 |. li CRET1, 1
2838 |9:
2839 | jr ra
2840 |. li CRET1, 0
2841 |.endif
2842 |
2843 |.macro sfmin_max, name, intins, intinsc
2844 |->vm_sf .. name:
2845 |.if JIT and not FPU
2846 | move TMP2, ra
2847 | bal ->vm_sfcmpolt
2848 |. nop
2849 | move ra, TMP2
2850 | move TMP0, CRET1
2851 | move CRET1, CARG1
2852 |.if MIPSR6
2853 | intins CRET1, CRET1, TMP0
2854 | intinsc TMP0, CARG2, TMP0
2855 | jr ra
2856 |. or CRET1, CRET1, TMP0
2857 |.else
2858 | jr ra
2859 |. intins CRET1, CARG2, TMP0
2860 |.endif
2861 |.endif
2862 |.endmacro
2863 |
2864 |.if MIPSR6
2865 | sfmin_max min, selnez, seleqz
2866 | sfmin_max max, seleqz, selnez
2867 |.else
2868 | sfmin_max min, movz, _
2869 | sfmin_max max, movn, _
2870 |.endif
2871 |
2872 |//-----------------------------------------------------------------------
2873 |//-- Miscellaneous functions --------------------------------------------
2874 |//-----------------------------------------------------------------------
2875 |
2876 |//-----------------------------------------------------------------------
2877 |//-- FFI helper functions -----------------------------------------------
2878 |//-----------------------------------------------------------------------
2879 |
2880 |// Handler for callback functions. Callback slot number in r1, g in r2.
2881 |->vm_ffi_callback:
2882 |.if FFI
2883 |.type CTSTATE, CTState, PC
2884 | saveregs
2885 | ld CTSTATE, GL:r2->ctype_state
2886 | daddiu DISPATCH, r2, GG_G2DISP
2887 | load_got lj_ccallback_enter
2888 | sw r1, CTSTATE->cb.slot
2889 | sd CARG1, CTSTATE->cb.gpr[0]
2890 | .FPU sdc1 FARG1, CTSTATE->cb.fpr[0]
2891 | sd CARG2, CTSTATE->cb.gpr[1]
2892 | .FPU sdc1 FARG2, CTSTATE->cb.fpr[1]
2893 | sd CARG3, CTSTATE->cb.gpr[2]
2894 | .FPU sdc1 FARG3, CTSTATE->cb.fpr[2]
2895 | sd CARG4, CTSTATE->cb.gpr[3]
2896 | .FPU sdc1 FARG4, CTSTATE->cb.fpr[3]
2897 | sd CARG5, CTSTATE->cb.gpr[4]
2898 | .FPU sdc1 FARG5, CTSTATE->cb.fpr[4]
2899 | sd CARG6, CTSTATE->cb.gpr[5]
2900 | .FPU sdc1 FARG6, CTSTATE->cb.fpr[5]
2901 | sd CARG7, CTSTATE->cb.gpr[6]
2902 | .FPU sdc1 FARG7, CTSTATE->cb.fpr[6]
2903 | sd CARG8, CTSTATE->cb.gpr[7]
2904 | .FPU sdc1 FARG8, CTSTATE->cb.fpr[7]
2905 | daddiu TMP0, sp, CFRAME_SPACE
2906 | sd TMP0, CTSTATE->cb.stack
2907 | sd r0, SAVE_PC // Any value outside of bytecode is ok.
2908 | move CARG2, sp
2909 | call_intern lj_ccallback_enter // (CTState *cts, void *cf)
2910 |. move CARG1, CTSTATE
2911 | // Returns lua_State *.
2912 | ld BASE, L:CRET1->base
2913 | ld RC, L:CRET1->top
2914 | move L, CRET1
2915 | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2916 | ld LFUNC:RB, FRAME_FUNC(BASE)
2917 | .FPU mtc1 TMP3, TOBIT
2918 | li TISNIL, LJ_TNIL
2919 | li TISNUM, LJ_TISNUM
2920 | li_vmstate INTERP
2921 | subu RC, RC, BASE
2922 | cleartp LFUNC:RB
2923 | st_vmstate
2924 | .FPU cvt.d.s TOBIT, TOBIT
2925 | ins_callt
2926 |.endif
2927 |
2928 |->cont_ffi_callback: // Return from FFI callback.
2929 |.if FFI
2930 | load_got lj_ccallback_leave
2931 | ld CTSTATE, DISPATCH_GL(ctype_state)(DISPATCH)
2932 | sd BASE, L->base
2933 | sd RB, L->top
2934 | sd L, CTSTATE->L
2935 | move CARG2, RA
2936 | call_intern lj_ccallback_leave // (CTState *cts, TValue *o)
2937 |. move CARG1, CTSTATE
2938 | .FPU ldc1 FRET1, CTSTATE->cb.fpr[0]
2939 | ld CRET1, CTSTATE->cb.gpr[0]
2940 | .FPU ldc1 FRET2, CTSTATE->cb.fpr[1]
2941 | b ->vm_leave_unw
2942 |. ld CRET2, CTSTATE->cb.gpr[1]
2943 |.endif
2944 |
2945 |->vm_ffi_call: // Call C function via FFI.
2946 | // Caveat: needs special frame unwinding, see below.
2947 |.if FFI
2948 | .type CCSTATE, CCallState, CARG1
2949 | lw TMP1, CCSTATE->spadj
2950 | lbu CARG2, CCSTATE->nsp
2951 | move TMP2, sp
2952 | dsubu sp, sp, TMP1
2953 | sd ra, -8(TMP2)
2954 | sll CARG2, CARG2, 3
2955 | sd r16, -16(TMP2)
2956 | sd CCSTATE, -24(TMP2)
2957 | move r16, TMP2
2958 | daddiu TMP1, CCSTATE, offsetof(CCallState, stack)
2959 | move TMP2, sp
2960 | beqz CARG2, >2
2961 |. daddu TMP3, TMP1, CARG2
2962 |1:
2963 | ld TMP0, 0(TMP1)
2964 | daddiu TMP1, TMP1, 8
2965 | sltu AT, TMP1, TMP3
2966 | sd TMP0, 0(TMP2)
2967 | bnez AT, <1
2968 |. daddiu TMP2, TMP2, 8
2969 |2:
2970 | ld CFUNCADDR, CCSTATE->func
2971 | .FPU ldc1 FARG1, CCSTATE->gpr[0]
2972 | ld CARG2, CCSTATE->gpr[1]
2973 | .FPU ldc1 FARG2, CCSTATE->gpr[1]
2974 | ld CARG3, CCSTATE->gpr[2]
2975 | .FPU ldc1 FARG3, CCSTATE->gpr[2]
2976 | ld CARG4, CCSTATE->gpr[3]
2977 | .FPU ldc1 FARG4, CCSTATE->gpr[3]
2978 | ld CARG5, CCSTATE->gpr[4]
2979 | .FPU ldc1 FARG5, CCSTATE->gpr[4]
2980 | ld CARG6, CCSTATE->gpr[5]
2981 | .FPU ldc1 FARG6, CCSTATE->gpr[5]
2982 | ld CARG7, CCSTATE->gpr[6]
2983 | .FPU ldc1 FARG7, CCSTATE->gpr[6]
2984 | ld CARG8, CCSTATE->gpr[7]
2985 | .FPU ldc1 FARG8, CCSTATE->gpr[7]
2986 | jalr CFUNCADDR
2987 |. ld CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1.
2988 | ld CCSTATE:TMP1, -24(r16)
2989 | ld TMP2, -16(r16)
2990 | ld ra, -8(r16)
2991 | sd CRET1, CCSTATE:TMP1->gpr[0]
2992 | sd CRET2, CCSTATE:TMP1->gpr[1]
2993 |.if FPU
2994 | sdc1 FRET1, CCSTATE:TMP1->fpr[0]
2995 | sdc1 FRET2, CCSTATE:TMP1->fpr[1]
2996 |.else
2997 | sd CARG1, CCSTATE:TMP1->gpr[2] // 2nd FP struct field for soft-float.
2998 |.endif
2999 | move sp, r16
3000 | jr ra
3001 |. move r16, TMP2
3002 |.endif
3003 |// Note: vm_ffi_call must be the last function in this object file!
3004 |
3005 |//-----------------------------------------------------------------------
3006}
3007
3008/* Generate the code for a single instruction. */
3009static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3010{
3011 int vk = 0;
3012 |=>defop:
3013
3014 switch (op) {
3015
3016 /* -- Comparison ops ---------------------------------------------------- */
3017
3018 /* Remember: all ops branch for a true comparison, fall through otherwise. */
3019
3020 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
3021 | // RA = src1*8, RD = src2*8, JMP with RD = target
3022 |.macro bc_comp, FRA, FRD, ARGRA, ARGRD, movop, fmovop, fcomp, sfcomp
3023 | daddu RA, BASE, RA
3024 | daddu RD, BASE, RD
3025 | ld ARGRA, 0(RA)
3026 | ld ARGRD, 0(RD)
3027 | lhu TMP2, OFS_RD(PC)
3028 | gettp CARG3, ARGRA
3029 | gettp CARG4, ARGRD
3030 | bne CARG3, TISNUM, >2
3031 |. daddiu PC, PC, 4
3032 | bne CARG4, TISNUM, >5
3033 |. decode_RD4b TMP2
3034 | sextw ARGRA, ARGRA
3035 | sextw ARGRD, ARGRD
3036 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3037 | slt AT, CARG1, CARG2
3038 | addu TMP2, TMP2, TMP3
3039 |.if MIPSR6
3040 | movop TMP2, TMP2, AT
3041 |.else
3042 | movop TMP2, r0, AT
3043 |.endif
3044 |1:
3045 | daddu PC, PC, TMP2
3046 | ins_next
3047 |
3048 |2: // RA is not an integer.
3049 | sltiu AT, CARG3, LJ_TISNUM
3050 | beqz AT, ->vmeta_comp
3051 |. lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3052 | sltiu AT, CARG4, LJ_TISNUM
3053 | beqz AT, >4
3054 |. decode_RD4b TMP2
3055 |.if FPU
3056 | ldc1 FRA, 0(RA)
3057 | ldc1 FRD, 0(RD)
3058 |.endif
3059 |3: // RA and RD are both numbers.
3060 |.if FPU
3061 |.if MIPSR6
3062 | fcomp FTMP0, FTMP0, FTMP2
3063 | addu TMP2, TMP2, TMP3
3064 | mfc1 TMP3, FTMP0
3065 | b <1
3066 |. fmovop TMP2, TMP2, TMP3
3067 |.else
3068 | fcomp FTMP0, FTMP2
3069 | addu TMP2, TMP2, TMP3
3070 | b <1
3071 |. fmovop TMP2, r0
3072 |.endif
3073 |.else
3074 | bal sfcomp
3075 |. addu TMP2, TMP2, TMP3
3076 | b <1
3077 |.if MIPSR6
3078 |. movop TMP2, TMP2, CRET1
3079 |.else
3080 |. movop TMP2, r0, CRET1
3081 |.endif
3082 |.endif
3083 |
3084 |4: // RA is a number, RD is not a number.
3085 | bne CARG4, TISNUM, ->vmeta_comp
3086 | // RA is a number, RD is an integer. Convert RD to a number.
3087 |.if FPU
3088 |. lwc1 FRD, LO(RD)
3089 | ldc1 FRA, 0(RA)
3090 | b <3
3091 |. cvt.d.w FRD, FRD
3092 |.else
3093 |.if "ARGRD" == "CARG1"
3094 |. sextw CARG1, CARG1
3095 | bal ->vm_sfi2d_1
3096 |. nop
3097 |.else
3098 |. sextw CARG2, CARG2
3099 | bal ->vm_sfi2d_2
3100 |. nop
3101 |.endif
3102 | b <3
3103 |. nop
3104 |.endif
3105 |
3106 |5: // RA is an integer, RD is not an integer
3107 | sltiu AT, CARG4, LJ_TISNUM
3108 | beqz AT, ->vmeta_comp
3109 |. lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3110 | // RA is an integer, RD is a number. Convert RA to a number.
3111 |.if FPU
3112 | lwc1 FRA, LO(RA)
3113 | ldc1 FRD, 0(RD)
3114 | b <3
3115 | cvt.d.w FRA, FRA
3116 |.else
3117 |.if "ARGRA" == "CARG1"
3118 | bal ->vm_sfi2d_1
3119 |. sextw CARG1, CARG1
3120 |.else
3121 | bal ->vm_sfi2d_2
3122 |. sextw CARG2, CARG2
3123 |.endif
3124 | b <3
3125 |. nop
3126 |.endif
3127 |.endmacro
3128 |
3129 |.if MIPSR6
3130 if (op == BC_ISLT) {
3131 | bc_comp FTMP0, FTMP2, CARG1, CARG2, selnez, selnez, cmp.lt.d, ->vm_sfcmpolt
3132 } else if (op == BC_ISGE) {
3133 | bc_comp FTMP0, FTMP2, CARG1, CARG2, seleqz, seleqz, cmp.lt.d, ->vm_sfcmpolt
3134 } else if (op == BC_ISLE) {
3135 | bc_comp FTMP2, FTMP0, CARG2, CARG1, seleqz, seleqz, cmp.ult.d, ->vm_sfcmpult
3136 } else {
3137 | bc_comp FTMP2, FTMP0, CARG2, CARG1, selnez, selnez, cmp.ult.d, ->vm_sfcmpult
3138 }
3139 |.else
3140 if (op == BC_ISLT) {
3141 | bc_comp FTMP0, FTMP2, CARG1, CARG2, movz, movf, c.olt.d, ->vm_sfcmpolt
3142 } else if (op == BC_ISGE) {
3143 | bc_comp FTMP0, FTMP2, CARG1, CARG2, movn, movt, c.olt.d, ->vm_sfcmpolt
3144 } else if (op == BC_ISLE) {
3145 | bc_comp FTMP2, FTMP0, CARG2, CARG1, movn, movt, c.ult.d, ->vm_sfcmpult
3146 } else {
3147 | bc_comp FTMP2, FTMP0, CARG2, CARG1, movz, movf, c.ult.d, ->vm_sfcmpult
3148 }
3149 |.endif
3150 break;
3151
3152 case BC_ISEQV: case BC_ISNEV:
3153 vk = op == BC_ISEQV;
3154 | // RA = src1*8, RD = src2*8, JMP with RD = target
3155 | daddu RA, BASE, RA
3156 | daddiu PC, PC, 4
3157 | daddu RD, BASE, RD
3158 | ld CARG1, 0(RA)
3159 | lhu TMP2, -4+OFS_RD(PC)
3160 | ld CARG2, 0(RD)
3161 | gettp CARG3, CARG1
3162 | gettp CARG4, CARG2
3163 | sltu AT, TISNUM, CARG3
3164 | sltu TMP1, TISNUM, CARG4
3165 | or AT, AT, TMP1
3166 if (vk) {
3167 | beqz AT, ->BC_ISEQN_Z
3168 } else {
3169 | beqz AT, ->BC_ISNEN_Z
3170 }
3171 | // Either or both types are not numbers.
3172 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3173 |.if FFI
3174 |. li AT, LJ_TCDATA
3175 | beq CARG3, AT, ->vmeta_equal_cd
3176 |.endif
3177 | decode_RD4b TMP2
3178 |.if FFI
3179 | beq CARG4, AT, ->vmeta_equal_cd
3180 |. nop
3181 |.endif
3182 | bne CARG1, CARG2, >2
3183 |. addu TMP2, TMP2, TMP3
3184 | // Tag and value are equal.
3185 if (vk) {
3186 |->BC_ISEQV_Z:
3187 | daddu PC, PC, TMP2
3188 }
3189 |1:
3190 | ins_next
3191 |
3192 |2: // Check if the tags are the same and it's a table or userdata.
3193 | xor AT, CARG3, CARG4 // Same type?
3194 | sltiu TMP0, CARG3, LJ_TISTABUD+1 // Table or userdata?
3195 |.if MIPSR6
3196 | seleqz TMP0, TMP0, AT
3197 |.else
3198 | movn TMP0, r0, AT
3199 |.endif
3200 if (vk) {
3201 | beqz TMP0, <1
3202 } else {
3203 | beqz TMP0, ->BC_ISEQV_Z // Reuse code from opposite instruction.
3204 }
3205 | // Different tables or userdatas. Need to check __eq metamethod.
3206 | // Field metatable must be at same offset for GCtab and GCudata!
3207 |. cleartp TAB:TMP1, CARG1
3208 | ld TAB:TMP3, TAB:TMP1->metatable
3209 if (vk) {
3210 | beqz TAB:TMP3, <1 // No metatable?
3211 |. nop
3212 | lbu TMP3, TAB:TMP3->nomm
3213 | andi TMP3, TMP3, 1<<MM_eq
3214 | bnez TMP3, >1 // Or 'no __eq' flag set?
3215 } else {
3216 | beqz TAB:TMP3,->BC_ISEQV_Z // No metatable?
3217 |. nop
3218 | lbu TMP3, TAB:TMP3->nomm
3219 | andi TMP3, TMP3, 1<<MM_eq
3220 | bnez TMP3, ->BC_ISEQV_Z // Or 'no __eq' flag set?
3221 }
3222 |. nop
3223 | b ->vmeta_equal // Handle __eq metamethod.
3224 |. li TMP0, 1-vk // ne = 0 or 1.
3225 break;
3226
3227 case BC_ISEQS: case BC_ISNES:
3228 vk = op == BC_ISEQS;
3229 | // RA = src*8, RD = str_const*8 (~), JMP with RD = target
3230 | daddu RA, BASE, RA
3231 | daddiu PC, PC, 4
3232 | ld CARG1, 0(RA)
3233 | dsubu RD, KBASE, RD
3234 | lhu TMP2, -4+OFS_RD(PC)
3235 | ld CARG2, -8(RD) // KBASE-8-str_const*8
3236 |.if FFI
3237 | gettp TMP0, CARG1
3238 | li AT, LJ_TCDATA
3239 |.endif
3240 | li TMP1, LJ_TSTR
3241 | decode_RD4b TMP2
3242 |.if FFI
3243 | beq TMP0, AT, ->vmeta_equal_cd
3244 |.endif
3245 |. settp CARG2, TMP1
3246 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3247 | xor TMP1, CARG1, CARG2
3248 | addu TMP2, TMP2, TMP3
3249 |.if MIPSR6
3250 if (vk) {
3251 | seleqz TMP2, TMP2, TMP1
3252 } else {
3253 | selnez TMP2, TMP2, TMP1
3254 }
3255 |.else
3256 if (vk) {
3257 | movn TMP2, r0, TMP1
3258 } else {
3259 | movz TMP2, r0, TMP1
3260 }
3261 |.endif
3262 | daddu PC, PC, TMP2
3263 | ins_next
3264 break;
3265
3266 case BC_ISEQN: case BC_ISNEN:
3267 vk = op == BC_ISEQN;
3268 | // RA = src*8, RD = num_const*8, JMP with RD = target
3269 | daddu RA, BASE, RA
3270 | daddu RD, KBASE, RD
3271 | ld CARG1, 0(RA)
3272 | ld CARG2, 0(RD)
3273 | lhu TMP2, OFS_RD(PC)
3274 | gettp CARG3, CARG1
3275 | gettp CARG4, CARG2
3276 | daddiu PC, PC, 4
3277 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3278 if (vk) {
3279 |->BC_ISEQN_Z:
3280 } else {
3281 |->BC_ISNEN_Z:
3282 }
3283 | bne CARG3, TISNUM, >3
3284 |. decode_RD4b TMP2
3285 | bne CARG4, TISNUM, >6
3286 |. addu TMP2, TMP2, TMP3
3287 | xor AT, CARG1, CARG2
3288 |.if MIPSR6
3289 if (vk) {
3290 | seleqz TMP2, TMP2, AT
3291 |1:
3292 | daddu PC, PC, TMP2
3293 |2:
3294 } else {
3295 | selnez TMP2, TMP2, AT
3296 |1:
3297 |2:
3298 | daddu PC, PC, TMP2
3299 }
3300 |.else
3301 if (vk) {
3302 | movn TMP2, r0, AT
3303 |1:
3304 | daddu PC, PC, TMP2
3305 |2:
3306 } else {
3307 | movz TMP2, r0, AT
3308 |1:
3309 |2:
3310 | daddu PC, PC, TMP2
3311 }
3312 |.endif
3313 | ins_next
3314 |
3315 |3: // RA is not an integer.
3316 | sltu AT, CARG3, TISNUM
3317 |.if FFI
3318 | beqz AT, >8
3319 |.else
3320 | beqz AT, <2
3321 |.endif
3322 |. addu TMP2, TMP2, TMP3
3323 | sltu AT, CARG4, TISNUM
3324 |.if FPU
3325 | ldc1 FTMP0, 0(RA)
3326 | ldc1 FTMP2, 0(RD)
3327 |.endif
3328 | beqz AT, >5
3329 |. nop
3330 |4: // RA and RD are both numbers.
3331 |.if FPU
3332 |.if MIPSR6
3333 | cmp.eq.d FTMP0, FTMP0, FTMP2
3334 | dmfc1 TMP1, FTMP0
3335 | b <1
3336 if (vk) {
3337 |. selnez TMP2, TMP2, TMP1
3338 } else {
3339 |. seleqz TMP2, TMP2, TMP1
3340 }
3341 |.else
3342 | c.eq.d FTMP0, FTMP2
3343 | b <1
3344 if (vk) {
3345 |. movf TMP2, r0
3346 } else {
3347 |. movt TMP2, r0
3348 }
3349 |.endif
3350 |.else
3351 | bal ->vm_sfcmpeq
3352 |. nop
3353 | b <1
3354 |.if MIPSR6
3355 if (vk) {
3356 |. selnez TMP2, TMP2, CRET1
3357 } else {
3358 |. seleqz TMP2, TMP2, CRET1
3359 }
3360 |.else
3361 if (vk) {
3362 |. movz TMP2, r0, CRET1
3363 } else {
3364 |. movn TMP2, r0, CRET1
3365 }
3366 |.endif
3367 |.endif
3368 |
3369 |5: // RA is a number, RD is not a number.
3370 |.if FFI
3371 | bne CARG4, TISNUM, >9
3372 |.else
3373 | bne CARG4, TISNUM, <2
3374 |.endif
3375 | // RA is a number, RD is an integer. Convert RD to a number.
3376 |.if FPU
3377 |. lwc1 FTMP2, LO(RD)
3378 | b <4
3379 |. cvt.d.w FTMP2, FTMP2
3380 |.else
3381 |. sextw CARG2, CARG2
3382 | bal ->vm_sfi2d_2
3383 |. nop
3384 | b <4
3385 |. nop
3386 |.endif
3387 |
3388 |6: // RA is an integer, RD is not an integer
3389 | sltu AT, CARG4, TISNUM
3390 |.if FFI
3391 | beqz AT, >9
3392 |.else
3393 | beqz AT, <2
3394 |.endif
3395 | // RA is an integer, RD is a number. Convert RA to a number.
3396 |.if FPU
3397 |. lwc1 FTMP0, LO(RA)
3398 | ldc1 FTMP2, 0(RD)
3399 | b <4
3400 | cvt.d.w FTMP0, FTMP0
3401 |.else
3402 |. sextw CARG1, CARG1
3403 | bal ->vm_sfi2d_1
3404 |. nop
3405 | b <4
3406 |. nop
3407 |.endif
3408 |
3409 |.if FFI
3410 |8:
3411 | li AT, LJ_TCDATA
3412 | bne CARG3, AT, <2
3413 |. nop
3414 | b ->vmeta_equal_cd
3415 |. nop
3416 |9:
3417 | li AT, LJ_TCDATA
3418 | bne CARG4, AT, <2
3419 |. nop
3420 | b ->vmeta_equal_cd
3421 |. nop
3422 |.endif
3423 break;
3424
3425 case BC_ISEQP: case BC_ISNEP:
3426 vk = op == BC_ISEQP;
3427 | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target
3428 | daddu RA, BASE, RA
3429 | srl TMP1, RD, 3
3430 | ld TMP0, 0(RA)
3431 | lhu TMP2, OFS_RD(PC)
3432 | not TMP1, TMP1
3433 | gettp TMP0, TMP0
3434 | daddiu PC, PC, 4
3435 |.if FFI
3436 | li AT, LJ_TCDATA
3437 | beq TMP0, AT, ->vmeta_equal_cd
3438 |.endif
3439 |. xor TMP0, TMP0, TMP1
3440 | decode_RD4b TMP2
3441 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3442 | addu TMP2, TMP2, TMP3
3443 |.if MIPSR6
3444 if (vk) {
3445 | seleqz TMP2, TMP2, TMP0
3446 } else {
3447 | selnez TMP2, TMP2, TMP0
3448 }
3449 |.else
3450 if (vk) {
3451 | movn TMP2, r0, TMP0
3452 } else {
3453 | movz TMP2, r0, TMP0
3454 }
3455 |.endif
3456 | daddu PC, PC, TMP2
3457 | ins_next
3458 break;
3459
3460 /* -- Unary test and copy ops ------------------------------------------- */
3461
3462 case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
3463 | // RA = dst*8 or unused, RD = src*8, JMP with RD = target
3464 | daddu RD, BASE, RD
3465 | lhu TMP2, OFS_RD(PC)
3466 | ld TMP0, 0(RD)
3467 | daddiu PC, PC, 4
3468 | gettp TMP0, TMP0
3469 | sltiu TMP0, TMP0, LJ_TISTRUECOND
3470 if (op == BC_IST || op == BC_ISF) {
3471 | decode_RD4b TMP2
3472 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3473 | addu TMP2, TMP2, TMP3
3474 |.if MIPSR6
3475 if (op == BC_IST) {
3476 | selnez TMP2, TMP2, TMP0;
3477 } else {
3478 | seleqz TMP2, TMP2, TMP0;
3479 }
3480 |.else
3481 if (op == BC_IST) {
3482 | movz TMP2, r0, TMP0
3483 } else {
3484 | movn TMP2, r0, TMP0
3485 }
3486 |.endif
3487 | daddu PC, PC, TMP2
3488 } else {
3489 | ld CRET1, 0(RD)
3490 if (op == BC_ISTC) {
3491 | beqz TMP0, >1
3492 } else {
3493 | bnez TMP0, >1
3494 }
3495 |. daddu RA, BASE, RA
3496 | decode_RD4b TMP2
3497 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
3498 | addu TMP2, TMP2, TMP3
3499 | sd CRET1, 0(RA)
3500 | daddu PC, PC, TMP2
3501 |1:
3502 }
3503 | ins_next
3504 break;
3505
3506 case BC_ISTYPE:
3507 | // RA = src*8, RD = -type*8
3508 | daddu TMP2, BASE, RA
3509 | srl TMP1, RD, 3
3510 | ld TMP0, 0(TMP2)
3511 | ins_next1
3512 | gettp TMP0, TMP0
3513 | daddu AT, TMP0, TMP1
3514 | bnez AT, ->vmeta_istype
3515 |. ins_next2
3516 break;
3517 case BC_ISNUM:
3518 | // RA = src*8, RD = -(TISNUM-1)*8
3519 | daddu TMP2, BASE, RA
3520 | ld TMP0, 0(TMP2)
3521 | ins_next1
3522 | checknum TMP0, ->vmeta_istype
3523 |. ins_next2
3524 break;
3525
3526 /* -- Unary ops --------------------------------------------------------- */
3527
3528 case BC_MOV:
3529 | // RA = dst*8, RD = src*8
3530 | daddu RD, BASE, RD
3531 | daddu RA, BASE, RA
3532 | ld CRET1, 0(RD)
3533 | ins_next1
3534 | sd CRET1, 0(RA)
3535 | ins_next2
3536 break;
3537 case BC_NOT:
3538 | // RA = dst*8, RD = src*8
3539 | daddu RD, BASE, RD
3540 | daddu RA, BASE, RA
3541 | ld TMP0, 0(RD)
3542 | li AT, LJ_TTRUE
3543 | gettp TMP0, TMP0
3544 | sltu TMP0, AT, TMP0
3545 | addiu TMP0, TMP0, 1
3546 | dsll TMP0, TMP0, 47
3547 | not TMP0, TMP0
3548 | ins_next1
3549 | sd TMP0, 0(RA)
3550 | ins_next2
3551 break;
3552 case BC_UNM:
3553 | // RA = dst*8, RD = src*8
3554 | daddu RB, BASE, RD
3555 | ld CARG1, 0(RB)
3556 | daddu RA, BASE, RA
3557 | gettp CARG3, CARG1
3558 | bne CARG3, TISNUM, >2
3559 |. lui TMP1, 0x8000
3560 | sextw CARG1, CARG1
3561 | beq CARG1, TMP1, ->vmeta_unm // Meta handler deals with -2^31.
3562 |. negu CARG1, CARG1
3563 | zextw CARG1, CARG1
3564 | settp CARG1, TISNUM
3565 |1:
3566 | ins_next1
3567 | sd CARG1, 0(RA)
3568 | ins_next2
3569 |2:
3570 | sltiu AT, CARG3, LJ_TISNUM
3571 | beqz AT, ->vmeta_unm
3572 |. dsll TMP1, TMP1, 32
3573 | b <1
3574 |. xor CARG1, CARG1, TMP1
3575 break;
3576 case BC_LEN:
3577 | // RA = dst*8, RD = src*8
3578 | daddu CARG2, BASE, RD
3579 | daddu RA, BASE, RA
3580 | ld TMP0, 0(CARG2)
3581 | gettp TMP1, TMP0
3582 | daddiu AT, TMP1, -LJ_TSTR
3583 | bnez AT, >2
3584 |. cleartp STR:CARG1, TMP0
3585 | lw CRET1, STR:CARG1->len
3586 |1:
3587 | settp CRET1, TISNUM
3588 | ins_next1
3589 | sd CRET1, 0(RA)
3590 | ins_next2
3591 |2:
3592 | daddiu AT, TMP1, -LJ_TTAB
3593 | bnez AT, ->vmeta_len
3594 |. nop
3595#if LJ_52
3596 | ld TAB:TMP2, TAB:CARG1->metatable
3597 | bnez TAB:TMP2, >9
3598 |. nop
3599 |3:
3600#endif
3601 |->BC_LEN_Z:
3602 | load_got lj_tab_len
3603 | call_intern lj_tab_len // (GCtab *t)
3604 |. nop
3605 | // Returns uint32_t (but less than 2^31).
3606 | b <1
3607 |. nop
3608#if LJ_52
3609 |9:
3610 | lbu TMP0, TAB:TMP2->nomm
3611 | andi TMP0, TMP0, 1<<MM_len
3612 | bnez TMP0, <3 // 'no __len' flag set: done.
3613 |. nop
3614 | b ->vmeta_len
3615 |. nop
3616#endif
3617 break;
3618
3619 /* -- Binary ops -------------------------------------------------------- */
3620
3621 |.macro fpmod, a, b, c
3622 | bal ->vm_floor // floor(b/c)
3623 |. div.d FARG1, b, c
3624 | mul.d a, FRET1, c
3625 | sub.d a, b, a // b - floor(b/c)*c
3626 |.endmacro
3627
3628 |.macro sfpmod
3629 | daddiu sp, sp, -16
3630 |
3631 | load_got __divdf3
3632 | sd CARG1, 0(sp)
3633 | call_extern
3634 |. sd CARG2, 8(sp)
3635 |
3636 | load_got floor
3637 | call_extern
3638 |. move CARG1, CRET1
3639 |
3640 | load_got __muldf3
3641 | move CARG1, CRET1
3642 | call_extern
3643 |. ld CARG2, 8(sp)
3644 |
3645 | load_got __subdf3
3646 | ld CARG1, 0(sp)
3647 | call_extern
3648 |. move CARG2, CRET1
3649 |
3650 | daddiu sp, sp, 16
3651 |.endmacro
3652
3653 |.macro ins_arithpre, label
3654 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3655 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
3656 ||switch (vk) {
3657 ||case 0:
3658 | decode_RB8a RB, INS
3659 | decode_RB8b RB
3660 | decode_RDtoRC8 RC, RD
3661 | // RA = dst*8, RB = src1*8, RC = num_const*8
3662 | daddu RB, BASE, RB
3663 |.if "label" ~= "none"
3664 | b label
3665 |.endif
3666 |. daddu RC, KBASE, RC
3667 || break;
3668 ||case 1:
3669 | decode_RB8a RC, INS
3670 | decode_RB8b RC
3671 | decode_RDtoRC8 RB, RD
3672 | // RA = dst*8, RB = num_const*8, RC = src1*8
3673 | daddu RC, BASE, RC
3674 |.if "label" ~= "none"
3675 | b label
3676 |.endif
3677 |. daddu RB, KBASE, RB
3678 || break;
3679 ||default:
3680 | decode_RB8a RB, INS
3681 | decode_RB8b RB
3682 | decode_RDtoRC8 RC, RD
3683 | // RA = dst*8, RB = src1*8, RC = src2*8
3684 | daddu RB, BASE, RB
3685 |.if "label" ~= "none"
3686 | b label
3687 |.endif
3688 |. daddu RC, BASE, RC
3689 || break;
3690 ||}
3691 |.endmacro
3692 |
3693 |.macro ins_arith, intins, fpins, fpcall, label
3694 | ins_arithpre none
3695 |
3696 |.if "label" ~= "none"
3697 |label:
3698 |.endif
3699 |
3700 |// Used in 5.
3701 | ld CARG1, 0(RB)
3702 | ld CARG2, 0(RC)
3703 | gettp TMP0, CARG1
3704 | gettp TMP1, CARG2
3705 |
3706 |.if "intins" ~= "div"
3707 |
3708 | // Check for two integers.
3709 | sextw CARG3, CARG1
3710 | bne TMP0, TISNUM, >5
3711 |. sextw CARG4, CARG2
3712 | bne TMP1, TISNUM, >5
3713 |
3714 |.if "intins" == "addu"
3715 |. intins CRET1, CARG3, CARG4
3716 | xor TMP1, CRET1, CARG3 // ((y^a) & (y^b)) < 0: overflow.
3717 | xor TMP2, CRET1, CARG4
3718 | and TMP1, TMP1, TMP2
3719 | bltz TMP1, ->vmeta_arith
3720 |. daddu RA, BASE, RA
3721 |.elif "intins" == "subu"
3722 |. intins CRET1, CARG3, CARG4
3723 | xor TMP1, CRET1, CARG3 // ((y^a) & (a^b)) < 0: overflow.
3724 | xor TMP2, CARG3, CARG4
3725 | and TMP1, TMP1, TMP2
3726 | bltz TMP1, ->vmeta_arith
3727 |. daddu RA, BASE, RA
3728 |.elif "intins" == "mult"
3729 |.if MIPSR6
3730 |. nop
3731 | mul CRET1, CARG3, CARG4
3732 | muh TMP2, CARG3, CARG4
3733 |.else
3734 |. intins CARG3, CARG4
3735 | mflo CRET1
3736 | mfhi TMP2
3737 |.endif
3738 | sra TMP1, CRET1, 31
3739 | bne TMP1, TMP2, ->vmeta_arith
3740 |. daddu RA, BASE, RA
3741 |.else
3742 |. load_got lj_vm_modi
3743 | beqz CARG4, ->vmeta_arith
3744 |. daddu RA, BASE, RA
3745 | move CARG1, CARG3
3746 | call_extern
3747 |. move CARG2, CARG4
3748 |.endif
3749 |
3750 | zextw CRET1, CRET1
3751 | settp CRET1, TISNUM
3752 | ins_next1
3753 | sd CRET1, 0(RA)
3754 |3:
3755 | ins_next2
3756 |
3757 |.endif
3758 |
3759 |5: // Check for two numbers.
3760 | .FPU ldc1 FTMP0, 0(RB)
3761 | sltu AT, TMP0, TISNUM
3762 | sltu TMP0, TMP1, TISNUM
3763 | .FPU ldc1 FTMP2, 0(RC)
3764 | and AT, AT, TMP0
3765 | beqz AT, ->vmeta_arith
3766 |. daddu RA, BASE, RA
3767 |
3768 |.if FPU
3769 | fpins FRET1, FTMP0, FTMP2
3770 |.elif "fpcall" == "sfpmod"
3771 | sfpmod
3772 |.else
3773 | load_got fpcall
3774 | call_extern
3775 |. nop
3776 |.endif
3777 |
3778 | ins_next1
3779 |.if "intins" ~= "div"
3780 | b <3
3781 |.endif
3782 |.if FPU
3783 |. sdc1 FRET1, 0(RA)
3784 |.else
3785 |. sd CRET1, 0(RA)
3786 |.endif
3787 |.if "intins" == "div"
3788 | ins_next2
3789 |.endif
3790 |
3791 |.endmacro
3792
3793 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
3794 | ins_arith addu, add.d, __adddf3, none
3795 break;
3796 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
3797 | ins_arith subu, sub.d, __subdf3, none
3798 break;
3799 case BC_MULVN: case BC_MULNV: case BC_MULVV:
3800 | ins_arith mult, mul.d, __muldf3, none
3801 break;
3802 case BC_DIVVN:
3803 | ins_arith div, div.d, __divdf3, ->BC_DIVVN_Z
3804 break;
3805 case BC_DIVNV: case BC_DIVVV:
3806 | ins_arithpre ->BC_DIVVN_Z
3807 break;
3808 case BC_MODVN:
3809 | ins_arith modi, fpmod, sfpmod, ->BC_MODVN_Z
3810 break;
3811 case BC_MODNV: case BC_MODVV:
3812 | ins_arithpre ->BC_MODVN_Z
3813 break;
3814 case BC_POW:
3815 | ins_arithpre none
3816 | ld CARG1, 0(RB)
3817 | ld CARG2, 0(RC)
3818 | gettp TMP0, CARG1
3819 | gettp TMP1, CARG2
3820 | sltiu TMP0, TMP0, LJ_TISNUM
3821 | sltiu TMP1, TMP1, LJ_TISNUM
3822 | and AT, TMP0, TMP1
3823 | load_got pow
3824 | beqz AT, ->vmeta_arith
3825 |. daddu RA, BASE, RA
3826 |.if FPU
3827 | ldc1 FARG1, 0(RB)
3828 | ldc1 FARG2, 0(RC)
3829 |.endif
3830 | call_extern
3831 |. nop
3832 | ins_next1
3833 |.if FPU
3834 | sdc1 FRET1, 0(RA)
3835 |.else
3836 | sd CRET1, 0(RA)
3837 |.endif
3838 | ins_next2
3839 break;
3840
3841 case BC_CAT:
3842 | // RA = dst*8, RB = src_start*8, RC = src_end*8
3843 | decode_RB8a RB, INS
3844 | decode_RB8b RB
3845 | decode_RDtoRC8 RC, RD
3846 | dsubu CARG3, RC, RB
3847 | sd BASE, L->base
3848 | daddu CARG2, BASE, RC
3849 | move MULTRES, RB
3850 |->BC_CAT_Z:
3851 | load_got lj_meta_cat
3852 | srl CARG3, CARG3, 3
3853 | sd PC, SAVE_PC
3854 | call_intern lj_meta_cat // (lua_State *L, TValue *top, int left)
3855 |. move CARG1, L
3856 | // Returns NULL (finished) or TValue * (metamethod).
3857 | bnez CRET1, ->vmeta_binop
3858 |. ld BASE, L->base
3859 | daddu RB, BASE, MULTRES
3860 | ld CRET1, 0(RB)
3861 | daddu RA, BASE, RA
3862 | ins_next1
3863 | sd CRET1, 0(RA)
3864 | ins_next2
3865 break;
3866
3867 /* -- Constant ops ------------------------------------------------------ */
3868
3869 case BC_KSTR:
3870 | // RA = dst*8, RD = str_const*8 (~)
3871 | dsubu TMP1, KBASE, RD
3872 | ins_next1
3873 | li TMP2, LJ_TSTR
3874 | ld TMP0, -8(TMP1) // KBASE-8-str_const*8
3875 | daddu RA, BASE, RA
3876 | settp TMP0, TMP2
3877 | sd TMP0, 0(RA)
3878 | ins_next2
3879 break;
3880 case BC_KCDATA:
3881 |.if FFI
3882 | // RA = dst*8, RD = cdata_const*8 (~)
3883 | dsubu TMP1, KBASE, RD
3884 | ins_next1
3885 | ld TMP0, -8(TMP1) // KBASE-8-cdata_const*8
3886 | li TMP2, LJ_TCDATA
3887 | daddu RA, BASE, RA
3888 | settp TMP0, TMP2
3889 | sd TMP0, 0(RA)
3890 | ins_next2
3891 |.endif
3892 break;
3893 case BC_KSHORT:
3894 | // RA = dst*8, RD = int16_literal*8
3895 | sra RD, INS, 16
3896 | daddu RA, BASE, RA
3897 | zextw RD, RD
3898 | ins_next1
3899 | settp RD, TISNUM
3900 | sd RD, 0(RA)
3901 | ins_next2
3902 break;
3903 case BC_KNUM:
3904 | // RA = dst*8, RD = num_const*8
3905 | daddu RD, KBASE, RD
3906 | daddu RA, BASE, RA
3907 | ld CRET1, 0(RD)
3908 | ins_next1
3909 | sd CRET1, 0(RA)
3910 | ins_next2
3911 break;
3912 case BC_KPRI:
3913 | // RA = dst*8, RD = primitive_type*8 (~)
3914 | daddu RA, BASE, RA
3915 | dsll TMP0, RD, 44
3916 | not TMP0, TMP0
3917 | ins_next1
3918 | sd TMP0, 0(RA)
3919 | ins_next2
3920 break;
3921 case BC_KNIL:
3922 | // RA = base*8, RD = end*8
3923 | daddu RA, BASE, RA
3924 | sd TISNIL, 0(RA)
3925 | daddiu RA, RA, 8
3926 | daddu RD, BASE, RD
3927 |1:
3928 | sd TISNIL, 0(RA)
3929 | slt AT, RA, RD
3930 | bnez AT, <1
3931 |. daddiu RA, RA, 8
3932 | ins_next_
3933 break;
3934
3935 /* -- Upvalue and function ops ------------------------------------------ */
3936
3937 case BC_UGET:
3938 | // RA = dst*8, RD = uvnum*8
3939 | ld LFUNC:RB, FRAME_FUNC(BASE)
3940 | daddu RA, BASE, RA
3941 | cleartp LFUNC:RB
3942 | daddu RD, RD, LFUNC:RB
3943 | ld UPVAL:RB, LFUNC:RD->uvptr
3944 | ins_next1
3945 | ld TMP1, UPVAL:RB->v
3946 | ld CRET1, 0(TMP1)
3947 | sd CRET1, 0(RA)
3948 | ins_next2
3949 break;
3950 case BC_USETV:
3951 | // RA = uvnum*8, RD = src*8
3952 | ld LFUNC:RB, FRAME_FUNC(BASE)
3953 | daddu RD, BASE, RD
3954 | cleartp LFUNC:RB
3955 | daddu RA, RA, LFUNC:RB
3956 | ld UPVAL:RB, LFUNC:RA->uvptr
3957 | ld CRET1, 0(RD)
3958 | lbu TMP3, UPVAL:RB->marked
3959 | ld CARG2, UPVAL:RB->v
3960 | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
3961 | lbu TMP0, UPVAL:RB->closed
3962 | gettp TMP2, CRET1
3963 | sd CRET1, 0(CARG2)
3964 | li AT, LJ_GC_BLACK|1
3965 | or TMP3, TMP3, TMP0
3966 | beq TMP3, AT, >2 // Upvalue is closed and black?
3967 |. daddiu TMP2, TMP2, -(LJ_TNUMX+1)
3968 |1:
3969 | ins_next
3970 |
3971 |2: // Check if new value is collectable.
3972 | sltiu AT, TMP2, LJ_TISGCV - (LJ_TNUMX+1)
3973 | beqz AT, <1 // tvisgcv(v)
3974 |. cleartp GCOBJ:CRET1, CRET1
3975 | lbu TMP3, GCOBJ:CRET1->gch.marked
3976 | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v)
3977 | beqz TMP3, <1
3978 |. load_got lj_gc_barrieruv
3979 | // Crossed a write barrier. Move the barrier forward.
3980 | call_intern lj_gc_barrieruv // (global_State *g, TValue *tv)
3981 |. daddiu CARG1, DISPATCH, GG_DISP2G
3982 | b <1
3983 |. nop
3984 break;
3985 case BC_USETS:
3986 | // RA = uvnum*8, RD = str_const*8 (~)
3987 | ld LFUNC:RB, FRAME_FUNC(BASE)
3988 | dsubu TMP1, KBASE, RD
3989 | cleartp LFUNC:RB
3990 | daddu RA, RA, LFUNC:RB
3991 | ld UPVAL:RB, LFUNC:RA->uvptr
3992 | ld STR:TMP1, -8(TMP1) // KBASE-8-str_const*8
3993 | lbu TMP2, UPVAL:RB->marked
3994 | ld CARG2, UPVAL:RB->v
3995 | lbu TMP3, STR:TMP1->marked
3996 | andi AT, TMP2, LJ_GC_BLACK // isblack(uv)
3997 | lbu TMP2, UPVAL:RB->closed
3998 | li TMP0, LJ_TSTR
3999 | settp TMP1, TMP0
4000 | bnez AT, >2
4001 |. sd TMP1, 0(CARG2)
4002 |1:
4003 | ins_next
4004 |
4005 |2: // Check if string is white and ensure upvalue is closed.
4006 | beqz TMP2, <1
4007 |. andi AT, TMP3, LJ_GC_WHITES // iswhite(str)
4008 | beqz AT, <1
4009 |. load_got lj_gc_barrieruv
4010 | // Crossed a write barrier. Move the barrier forward.
4011 | call_intern lj_gc_barrieruv // (global_State *g, TValue *tv)
4012 |. daddiu CARG1, DISPATCH, GG_DISP2G
4013 | b <1
4014 |. nop
4015 break;
4016 case BC_USETN:
4017 | // RA = uvnum*8, RD = num_const*8
4018 | ld LFUNC:RB, FRAME_FUNC(BASE)
4019 | daddu RD, KBASE, RD
4020 | cleartp LFUNC:RB
4021 | daddu RA, RA, LFUNC:RB
4022 | ld UPVAL:RB, LFUNC:RA->uvptr
4023 | ld CRET1, 0(RD)
4024 | ld TMP1, UPVAL:RB->v
4025 | ins_next1
4026 | sd CRET1, 0(TMP1)
4027 | ins_next2
4028 break;
4029 case BC_USETP:
4030 | // RA = uvnum*8, RD = primitive_type*8 (~)
4031 | ld LFUNC:RB, FRAME_FUNC(BASE)
4032 | dsll TMP0, RD, 44
4033 | cleartp LFUNC:RB
4034 | daddu RA, RA, LFUNC:RB
4035 | not TMP0, TMP0
4036 | ld UPVAL:RB, LFUNC:RA->uvptr
4037 | ins_next1
4038 | ld TMP1, UPVAL:RB->v
4039 | sd TMP0, 0(TMP1)
4040 | ins_next2
4041 break;
4042
4043 case BC_UCLO:
4044 | // RA = level*8, RD = target
4045 | ld TMP2, L->openupval
4046 | branch_RD // Do this first since RD is not saved.
4047 | load_got lj_func_closeuv
4048 | sd BASE, L->base
4049 | beqz TMP2, >1
4050 |. move CARG1, L
4051 | call_intern lj_func_closeuv // (lua_State *L, TValue *level)
4052 |. daddu CARG2, BASE, RA
4053 | ld BASE, L->base
4054 |1:
4055 | ins_next
4056 break;
4057
4058 case BC_FNEW:
4059 | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype)
4060 | load_got lj_func_newL_gc
4061 | dsubu TMP1, KBASE, RD
4062 | ld CARG3, FRAME_FUNC(BASE)
4063 | ld CARG2, -8(TMP1) // KBASE-8-tab_const*8
4064 | sd BASE, L->base
4065 | sd PC, SAVE_PC
4066 | cleartp CARG3
4067 | // (lua_State *L, GCproto *pt, GCfuncL *parent)
4068 | call_intern lj_func_newL_gc
4069 |. move CARG1, L
4070 | // Returns GCfuncL *.
4071 | li TMP0, LJ_TFUNC
4072 | ld BASE, L->base
4073 | ins_next1
4074 | settp CRET1, TMP0
4075 | daddu RA, BASE, RA
4076 | sd CRET1, 0(RA)
4077 | ins_next2
4078 break;
4079
4080 /* -- Table ops --------------------------------------------------------- */
4081
4082 case BC_TNEW:
4083 case BC_TDUP:
4084 | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~)
4085 | ld TMP0, DISPATCH_GL(gc.total)(DISPATCH)
4086 | ld TMP1, DISPATCH_GL(gc.threshold)(DISPATCH)
4087 | sd BASE, L->base
4088 | sd PC, SAVE_PC
4089 | sltu AT, TMP0, TMP1
4090 | beqz AT, >5
4091 |1:
4092 if (op == BC_TNEW) {
4093 | load_got lj_tab_new
4094 | srl CARG2, RD, 3
4095 | andi CARG2, CARG2, 0x7ff
4096 | li TMP0, 0x801
4097 | addiu AT, CARG2, -0x7ff
4098 | srl CARG3, RD, 14
4099 |.if MIPSR6
4100 | seleqz TMP0, TMP0, AT
4101 | selnez CARG2, CARG2, AT
4102 | or CARG2, CARG2, TMP0
4103 |.else
4104 | movz CARG2, TMP0, AT
4105 |.endif
4106 | // (lua_State *L, int32_t asize, uint32_t hbits)
4107 | call_intern lj_tab_new
4108 |. move CARG1, L
4109 | // Returns Table *.
4110 } else {
4111 | load_got lj_tab_dup
4112 | dsubu TMP1, KBASE, RD
4113 | move CARG1, L
4114 | call_intern lj_tab_dup // (lua_State *L, Table *kt)
4115 |. ld CARG2, -8(TMP1) // KBASE-8-str_const*8
4116 | // Returns Table *.
4117 }
4118 | li TMP0, LJ_TTAB
4119 | ld BASE, L->base
4120 | ins_next1
4121 | daddu RA, BASE, RA
4122 | settp CRET1, TMP0
4123 | sd CRET1, 0(RA)
4124 | ins_next2
4125 |5:
4126 | load_got lj_gc_step_fixtop
4127 | move MULTRES, RD
4128 | call_intern lj_gc_step_fixtop // (lua_State *L)
4129 |. move CARG1, L
4130 | b <1
4131 |. move RD, MULTRES
4132 break;
4133
4134 case BC_GGET:
4135 | // RA = dst*8, RD = str_const*8 (~)
4136 case BC_GSET:
4137 | // RA = src*8, RD = str_const*8 (~)
4138 | ld LFUNC:TMP2, FRAME_FUNC(BASE)
4139 | dsubu TMP1, KBASE, RD
4140 | ld STR:RC, -8(TMP1) // KBASE-8-str_const*8
4141 | cleartp LFUNC:TMP2
4142 | ld TAB:RB, LFUNC:TMP2->env
4143 if (op == BC_GGET) {
4144 | b ->BC_TGETS_Z
4145 } else {
4146 | b ->BC_TSETS_Z
4147 }
4148 |. daddu RA, BASE, RA
4149 break;
4150
4151 case BC_TGETV:
4152 | // RA = dst*8, RB = table*8, RC = key*8
4153 | decode_RB8a RB, INS
4154 | decode_RB8b RB
4155 | decode_RDtoRC8 RC, RD
4156 | daddu CARG2, BASE, RB
4157 | daddu CARG3, BASE, RC
4158 | ld TAB:RB, 0(CARG2)
4159 | ld TMP2, 0(CARG3)
4160 | daddu RA, BASE, RA
4161 | checktab TAB:RB, ->vmeta_tgetv
4162 | gettp TMP3, TMP2
4163 | bne TMP3, TISNUM, >5 // Integer key?
4164 |. lw TMP0, TAB:RB->asize
4165 | sextw TMP2, TMP2
4166 | ld TMP1, TAB:RB->array
4167 | sltu AT, TMP2, TMP0
4168 | sll TMP2, TMP2, 3
4169 | beqz AT, ->vmeta_tgetv // Integer key and in array part?
4170 |. daddu TMP2, TMP1, TMP2
4171 | ld AT, 0(TMP2)
4172 | beq AT, TISNIL, >2
4173 |. ld CRET1, 0(TMP2)
4174 |1:
4175 | ins_next1
4176 | sd CRET1, 0(RA)
4177 | ins_next2
4178 |
4179 |2: // Check for __index if table value is nil.
4180 | ld TAB:TMP2, TAB:RB->metatable
4181 | beqz TAB:TMP2, <1 // No metatable: done.
4182 |. nop
4183 | lbu TMP0, TAB:TMP2->nomm
4184 | andi TMP0, TMP0, 1<<MM_index
4185 | bnez TMP0, <1 // 'no __index' flag set: done.
4186 |. nop
4187 | b ->vmeta_tgetv
4188 |. nop
4189 |
4190 |5:
4191 | li AT, LJ_TSTR
4192 | bne TMP3, AT, ->vmeta_tgetv
4193 |. cleartp RC, TMP2
4194 | b ->BC_TGETS_Z // String key?
4195 |. nop
4196 break;
4197 case BC_TGETS:
4198 | // RA = dst*8, RB = table*8, RC = str_const*8 (~)
4199 | decode_RB8a RB, INS
4200 | decode_RB8b RB
4201 | decode_RC8a RC, INS
4202 | daddu CARG2, BASE, RB
4203 | decode_RC8b RC
4204 | ld TAB:RB, 0(CARG2)
4205 | dsubu CARG3, KBASE, RC
4206 | daddu RA, BASE, RA
4207 | ld STR:RC, -8(CARG3) // KBASE-8-str_const*8
4208 | checktab TAB:RB, ->vmeta_tgets1
4209 |->BC_TGETS_Z:
4210 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8
4211 | lw TMP0, TAB:RB->hmask
4212 | lw TMP1, STR:RC->hash
4213 | ld NODE:TMP2, TAB:RB->node
4214 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
4215 | sll TMP0, TMP1, 5
4216 | sll TMP1, TMP1, 3
4217 | subu TMP1, TMP0, TMP1
4218 | li TMP3, LJ_TSTR
4219 | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
4220 | settp STR:RC, TMP3 // Tagged key to look for.
4221 |1:
4222 | ld CARG1, NODE:TMP2->key
4223 | ld CRET1, NODE:TMP2->val
4224 | ld NODE:TMP1, NODE:TMP2->next
4225 | bne CARG1, RC, >4
4226 |. ld TAB:TMP3, TAB:RB->metatable
4227 | beq CRET1, TISNIL, >5 // Key found, but nil value?
4228 |. nop
4229 |3:
4230 | ins_next1
4231 | sd CRET1, 0(RA)
4232 | ins_next2
4233 |
4234 |4: // Follow hash chain.
4235 | bnez NODE:TMP1, <1
4236 |. move NODE:TMP2, NODE:TMP1
4237 | // End of hash chain: key not found, nil result.
4238 |
4239 |5: // Check for __index if table value is nil.
4240 | beqz TAB:TMP3, <3 // No metatable: done.
4241 |. move CRET1, TISNIL
4242 | lbu TMP0, TAB:TMP3->nomm
4243 | andi TMP0, TMP0, 1<<MM_index
4244 | bnez TMP0, <3 // 'no __index' flag set: done.
4245 |. nop
4246 | b ->vmeta_tgets
4247 |. nop
4248 break;
4249 case BC_TGETB:
4250 | // RA = dst*8, RB = table*8, RC = index*8
4251 | decode_RB8a RB, INS
4252 | decode_RB8b RB
4253 | daddu CARG2, BASE, RB
4254 | decode_RDtoRC8 RC, RD
4255 | ld TAB:RB, 0(CARG2)
4256 | daddu RA, BASE, RA
4257 | srl TMP0, RC, 3
4258 | checktab TAB:RB, ->vmeta_tgetb
4259 | lw TMP1, TAB:RB->asize
4260 | ld TMP2, TAB:RB->array
4261 | sltu AT, TMP0, TMP1
4262 | beqz AT, ->vmeta_tgetb
4263 |. daddu RC, TMP2, RC
4264 | ld AT, 0(RC)
4265 | beq AT, TISNIL, >5
4266 |. ld CRET1, 0(RC)
4267 |1:
4268 | ins_next1
4269 | sd CRET1, 0(RA)
4270 | ins_next2
4271 |
4272 |5: // Check for __index if table value is nil.
4273 | ld TAB:TMP2, TAB:RB->metatable
4274 | beqz TAB:TMP2, <1 // No metatable: done.
4275 |. nop
4276 | lbu TMP1, TAB:TMP2->nomm
4277 | andi TMP1, TMP1, 1<<MM_index
4278 | bnez TMP1, <1 // 'no __index' flag set: done.
4279 |. nop
4280 | b ->vmeta_tgetb // Caveat: preserve TMP0 and CARG2!
4281 |. nop
4282 break;
4283 case BC_TGETR:
4284 | // RA = dst*8, RB = table*8, RC = key*8
4285 | decode_RB8a RB, INS
4286 | decode_RB8b RB
4287 | decode_RDtoRC8 RC, RD
4288 | daddu RB, BASE, RB
4289 | daddu RC, BASE, RC
4290 | ld TAB:CARG1, 0(RB)
4291 | lw CARG2, LO(RC)
4292 | daddu RA, BASE, RA
4293 | cleartp TAB:CARG1
4294 | lw TMP0, TAB:CARG1->asize
4295 | ld TMP1, TAB:CARG1->array
4296 | sltu AT, CARG2, TMP0
4297 | sll TMP2, CARG2, 3
4298 | beqz AT, ->vmeta_tgetr // In array part?
4299 |. daddu CRET1, TMP1, TMP2
4300 | ld CARG2, 0(CRET1)
4301 |->BC_TGETR_Z:
4302 | ins_next1
4303 | sd CARG2, 0(RA)
4304 | ins_next2
4305 break;
4306
4307 case BC_TSETV:
4308 | // RA = src*8, RB = table*8, RC = key*8
4309 | decode_RB8a RB, INS
4310 | decode_RB8b RB
4311 | decode_RDtoRC8 RC, RD
4312 | daddu CARG2, BASE, RB
4313 | daddu CARG3, BASE, RC
4314 | ld RB, 0(CARG2)
4315 | ld TMP2, 0(CARG3)
4316 | daddu RA, BASE, RA
4317 | checktab RB, ->vmeta_tsetv
4318 | checkint TMP2, >5
4319 |. sextw RC, TMP2
4320 | lw TMP0, TAB:RB->asize
4321 | ld TMP1, TAB:RB->array
4322 | sltu AT, RC, TMP0
4323 | sll TMP2, RC, 3
4324 | beqz AT, ->vmeta_tsetv // Integer key and in array part?
4325 |. daddu TMP1, TMP1, TMP2
4326 | ld TMP0, 0(TMP1)
4327 | lbu TMP3, TAB:RB->marked
4328 | beq TMP0, TISNIL, >3
4329 |. ld CRET1, 0(RA)
4330 |1:
4331 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4332 | bnez AT, >7
4333 |. sd CRET1, 0(TMP1)
4334 |2:
4335 | ins_next
4336 |
4337 |3: // Check for __newindex if previous value is nil.
4338 | ld TAB:TMP2, TAB:RB->metatable
4339 | beqz TAB:TMP2, <1 // No metatable: done.
4340 |. nop
4341 | lbu TMP2, TAB:TMP2->nomm
4342 | andi TMP2, TMP2, 1<<MM_newindex
4343 | bnez TMP2, <1 // 'no __newindex' flag set: done.
4344 |. nop
4345 | b ->vmeta_tsetv
4346 |. nop
4347 |
4348 |5:
4349 | gettp AT, TMP2
4350 | daddiu AT, AT, -LJ_TSTR
4351 | bnez AT, ->vmeta_tsetv
4352 |. nop
4353 | b ->BC_TSETS_Z // String key?
4354 |. cleartp STR:RC, TMP2
4355 |
4356 |7: // Possible table write barrier for the value. Skip valiswhite check.
4357 | barrierback TAB:RB, TMP3, TMP0, <2
4358 break;
4359 case BC_TSETS:
4360 | // RA = src*8, RB = table*8, RC = str_const*8 (~)
4361 | decode_RB8a RB, INS
4362 | decode_RB8b RB
4363 | daddu CARG2, BASE, RB
4364 | decode_RC8a RC, INS
4365 | ld TAB:RB, 0(CARG2)
4366 | decode_RC8b RC
4367 | dsubu CARG3, KBASE, RC
4368 | ld RC, -8(CARG3) // KBASE-8-str_const*8
4369 | daddu RA, BASE, RA
4370 | cleartp STR:RC
4371 | checktab TAB:RB, ->vmeta_tsets1
4372 |->BC_TSETS_Z:
4373 | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8
4374 | lw TMP0, TAB:RB->hmask
4375 | lw TMP1, STR:RC->hash
4376 | ld NODE:TMP2, TAB:RB->node
4377 | sb r0, TAB:RB->nomm // Clear metamethod cache.
4378 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
4379 | sll TMP0, TMP1, 5
4380 | sll TMP1, TMP1, 3
4381 | subu TMP1, TMP0, TMP1
4382 | li TMP3, LJ_TSTR
4383 | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
4384 | settp STR:RC, TMP3 // Tagged key to look for.
4385 |.if FPU
4386 | ldc1 FTMP0, 0(RA)
4387 |.else
4388 | ld CRET1, 0(RA)
4389 |.endif
4390 |1:
4391 | ld TMP0, NODE:TMP2->key
4392 | ld CARG2, NODE:TMP2->val
4393 | ld NODE:TMP1, NODE:TMP2->next
4394 | bne TMP0, RC, >5
4395 |. lbu TMP3, TAB:RB->marked
4396 | beq CARG2, TISNIL, >4 // Key found, but nil value?
4397 |. ld TAB:TMP0, TAB:RB->metatable
4398 |2:
4399 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4400 | bnez AT, >7
4401 |.if FPU
4402 |. sdc1 FTMP0, NODE:TMP2->val
4403 |.else
4404 |. sd CRET1, NODE:TMP2->val
4405 |.endif
4406 |3:
4407 | ins_next
4408 |
4409 |4: // Check for __newindex if previous value is nil.
4410 | beqz TAB:TMP0, <2 // No metatable: done.
4411 |. nop
4412 | lbu TMP0, TAB:TMP0->nomm
4413 | andi TMP0, TMP0, 1<<MM_newindex
4414 | bnez TMP0, <2 // 'no __newindex' flag set: done.
4415 |. nop
4416 | b ->vmeta_tsets
4417 |. nop
4418 |
4419 |5: // Follow hash chain.
4420 | bnez NODE:TMP1, <1
4421 |. move NODE:TMP2, NODE:TMP1
4422 | // End of hash chain: key not found, add a new one
4423 |
4424 | // But check for __newindex first.
4425 | ld TAB:TMP2, TAB:RB->metatable
4426 | beqz TAB:TMP2, >6 // No metatable: continue.
4427 |. daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
4428 | lbu TMP0, TAB:TMP2->nomm
4429 | andi TMP0, TMP0, 1<<MM_newindex
4430 | beqz TMP0, ->vmeta_tsets // 'no __newindex' flag NOT set: check.
4431 |6:
4432 | load_got lj_tab_newkey
4433 | sd RC, 0(CARG3)
4434 | sd BASE, L->base
4435 | move CARG2, TAB:RB
4436 | sd PC, SAVE_PC
4437 | call_intern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k
4438 |. move CARG1, L
4439 | // Returns TValue *.
4440 | ld BASE, L->base
4441 |.if FPU
4442 | b <3 // No 2nd write barrier needed.
4443 |. sdc1 FTMP0, 0(CRET1)
4444 |.else
4445 | ld CARG1, 0(RA)
4446 | b <3 // No 2nd write barrier needed.
4447 |. sd CARG1, 0(CRET1)
4448 |.endif
4449 |
4450 |7: // Possible table write barrier for the value. Skip valiswhite check.
4451 | barrierback TAB:RB, TMP3, TMP0, <3
4452 break;
4453 case BC_TSETB:
4454 | // RA = src*8, RB = table*8, RC = index*8
4455 | decode_RB8a RB, INS
4456 | decode_RB8b RB
4457 | daddu CARG2, BASE, RB
4458 | decode_RDtoRC8 RC, RD
4459 | ld TAB:RB, 0(CARG2)
4460 | daddu RA, BASE, RA
4461 | srl TMP0, RC, 3
4462 | checktab RB, ->vmeta_tsetb
4463 | lw TMP1, TAB:RB->asize
4464 | ld TMP2, TAB:RB->array
4465 | sltu AT, TMP0, TMP1
4466 | beqz AT, ->vmeta_tsetb
4467 |. daddu RC, TMP2, RC
4468 | ld TMP1, 0(RC)
4469 | lbu TMP3, TAB:RB->marked
4470 | beq TMP1, TISNIL, >5
4471 |1:
4472 |. ld CRET1, 0(RA)
4473 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4474 | bnez AT, >7
4475 |. sd CRET1, 0(RC)
4476 |2:
4477 | ins_next
4478 |
4479 |5: // Check for __newindex if previous value is nil.
4480 | ld TAB:TMP2, TAB:RB->metatable
4481 | beqz TAB:TMP2, <1 // No metatable: done.
4482 |. nop
4483 | lbu TMP1, TAB:TMP2->nomm
4484 | andi TMP1, TMP1, 1<<MM_newindex
4485 | bnez TMP1, <1 // 'no __newindex' flag set: done.
4486 |. nop
4487 | b ->vmeta_tsetb // Caveat: preserve TMP0 and CARG2!
4488 |. nop
4489 |
4490 |7: // Possible table write barrier for the value. Skip valiswhite check.
4491 | barrierback TAB:RB, TMP3, TMP0, <2
4492 break;
4493 case BC_TSETR:
4494 | // RA = dst*8, RB = table*8, RC = key*8
4495 | decode_RB8a RB, INS
4496 | decode_RB8b RB
4497 | decode_RDtoRC8 RC, RD
4498 | daddu CARG1, BASE, RB
4499 | daddu CARG3, BASE, RC
4500 | ld TAB:CARG2, 0(CARG1)
4501 | lw CARG3, LO(CARG3)
4502 | cleartp TAB:CARG2
4503 | lbu TMP3, TAB:CARG2->marked
4504 | lw TMP0, TAB:CARG2->asize
4505 | ld TMP1, TAB:CARG2->array
4506 | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
4507 | bnez AT, >7
4508 |. daddu RA, BASE, RA
4509 |2:
4510 | sltu AT, CARG3, TMP0
4511 | sll TMP2, CARG3, 3
4512 | beqz AT, ->vmeta_tsetr // In array part?
4513 |. daddu CRET1, TMP1, TMP2
4514 |->BC_TSETR_Z:
4515 | ld CARG1, 0(RA)
4516 | ins_next1
4517 | sd CARG1, 0(CRET1)
4518 | ins_next2
4519 |
4520 |7: // Possible table write barrier for the value. Skip valiswhite check.
4521 | barrierback TAB:CARG2, TMP3, CRET1, <2
4522 break;
4523
4524 case BC_TSETM:
4525 | // RA = base*8 (table at base-1), RD = num_const*8 (start index)
4526 | daddu RA, BASE, RA
4527 |1:
4528 | daddu TMP3, KBASE, RD
4529 | ld TAB:CARG2, -8(RA) // Guaranteed to be a table.
4530 | addiu TMP0, MULTRES, -8
4531 | lw TMP3, LO(TMP3) // Integer constant is in lo-word.
4532 | beqz TMP0, >4 // Nothing to copy?
4533 |. srl CARG3, TMP0, 3
4534 | cleartp CARG2
4535 | addu CARG3, CARG3, TMP3
4536 | lw TMP2, TAB:CARG2->asize
4537 | sll TMP1, TMP3, 3
4538 | lbu TMP3, TAB:CARG2->marked
4539 | ld CARG1, TAB:CARG2->array
4540 | sltu AT, TMP2, CARG3
4541 | bnez AT, >5
4542 |. daddu TMP2, RA, TMP0
4543 | daddu TMP1, TMP1, CARG1
4544 | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table)
4545 |3: // Copy result slots to table.
4546 | ld CRET1, 0(RA)
4547 | daddiu RA, RA, 8
4548 | sltu AT, RA, TMP2
4549 | sd CRET1, 0(TMP1)
4550 | bnez AT, <3
4551 |. daddiu TMP1, TMP1, 8
4552 | bnez TMP0, >7
4553 |. nop
4554 |4:
4555 | ins_next
4556 |
4557 |5: // Need to resize array part.
4558 | load_got lj_tab_reasize
4559 | sd BASE, L->base
4560 | sd PC, SAVE_PC
4561 | move BASE, RD
4562 | call_intern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
4563 |. move CARG1, L
4564 | // Must not reallocate the stack.
4565 | move RD, BASE
4566 | b <1
4567 |. ld BASE, L->base // Reload BASE for lack of a saved register.
4568 |
4569 |7: // Possible table write barrier for any value. Skip valiswhite check.
4570 | barrierback TAB:CARG2, TMP3, TMP0, <4
4571 break;
4572
4573 /* -- Calls and vararg handling ----------------------------------------- */
4574
4575 case BC_CALLM:
4576 | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8
4577 | decode_RDtoRC8 NARGS8:RC, RD
4578 | b ->BC_CALL_Z
4579 |. addu NARGS8:RC, NARGS8:RC, MULTRES
4580 break;
4581 case BC_CALL:
4582 | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8
4583 | decode_RDtoRC8 NARGS8:RC, RD
4584 |->BC_CALL_Z:
4585 | move TMP2, BASE
4586 | daddu BASE, BASE, RA
4587 | ld LFUNC:RB, 0(BASE)
4588 | daddiu BASE, BASE, 16
4589 | addiu NARGS8:RC, NARGS8:RC, -8
4590 | checkfunc RB, ->vmeta_call
4591 | ins_call
4592 break;
4593
4594 case BC_CALLMT:
4595 | // RA = base*8, (RB = 0,) RC = extra_nargs*8
4596 | addu NARGS8:RD, NARGS8:RD, MULTRES // BC_CALLT gets RC from RD.
4597 | // Fall through. Assumes BC_CALLT follows.
4598 break;
4599 case BC_CALLT:
4600 | // RA = base*8, (RB = 0,) RC = (nargs+1)*8
4601 | daddu RA, BASE, RA
4602 | ld RB, 0(RA)
4603 | move NARGS8:RC, RD
4604 | ld TMP1, FRAME_PC(BASE)
4605 | daddiu RA, RA, 16
4606 | addiu NARGS8:RC, NARGS8:RC, -8
4607 | checktp CARG3, RB, -LJ_TFUNC, ->vmeta_callt
4608 |->BC_CALLT_Z:
4609 | andi TMP0, TMP1, FRAME_TYPE // Caveat: preserve TMP0 until the 'or'.
4610 | lbu TMP3, LFUNC:CARG3->ffid
4611 | bnez TMP0, >7
4612 |. xori TMP2, TMP1, FRAME_VARG
4613 |1:
4614 | sd RB, FRAME_FUNC(BASE) // Copy function down, but keep PC.
4615 | sltiu AT, TMP3, 2 // (> FF_C) Calling a fast function?
4616 | move TMP2, BASE
4617 | move RB, CARG3
4618 | beqz NARGS8:RC, >3
4619 |. move TMP3, NARGS8:RC
4620 |2:
4621 | ld CRET1, 0(RA)
4622 | daddiu RA, RA, 8
4623 | addiu TMP3, TMP3, -8
4624 | sd CRET1, 0(TMP2)
4625 | bnez TMP3, <2
4626 |. daddiu TMP2, TMP2, 8
4627 |3:
4628 | or TMP0, TMP0, AT
4629 | beqz TMP0, >5
4630 |. nop
4631 |4:
4632 | ins_callt
4633 |
4634 |5: // Tailcall to a fast function with a Lua frame below.
4635 | lw INS, -4(TMP1)
4636 | decode_RA8a RA, INS
4637 | decode_RA8b RA
4638 | dsubu TMP1, BASE, RA
4639 | ld TMP1, -32(TMP1)
4640 | cleartp LFUNC:TMP1
4641 | ld TMP1, LFUNC:TMP1->pc
4642 | b <4
4643 |. ld KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE.
4644 |
4645 |7: // Tailcall from a vararg function.
4646 | andi AT, TMP2, FRAME_TYPEP
4647 | bnez AT, <1 // Vararg frame below?
4648 |. dsubu TMP2, BASE, TMP2 // Relocate BASE down.
4649 | move BASE, TMP2
4650 | ld TMP1, FRAME_PC(TMP2)
4651 | b <1
4652 |. andi TMP0, TMP1, FRAME_TYPE
4653 break;
4654
4655 case BC_ITERC:
4656 | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8))
4657 | move TMP2, BASE // Save old BASE fir vmeta_call.
4658 | daddu BASE, BASE, RA
4659 | ld RB, -24(BASE)
4660 | ld CARG1, -16(BASE)
4661 | ld CARG2, -8(BASE)
4662 | li NARGS8:RC, 16 // Iterators get 2 arguments.
4663 | sd RB, 0(BASE) // Copy callable.
4664 | sd CARG1, 16(BASE) // Copy state.
4665 | sd CARG2, 24(BASE) // Copy control var.
4666 | daddiu BASE, BASE, 16
4667 | checkfunc RB, ->vmeta_call
4668 | ins_call
4669 break;
4670
4671 case BC_ITERN:
4672 | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8)
4673 |.if JIT
4674 | // NYI: add hotloop, record BC_ITERN.
4675 |.endif
4676 | daddu RA, BASE, RA
4677 | ld TAB:RB, -16(RA)
4678 | lw RC, -8+LO(RA) // Get index from control var.
4679 | cleartp TAB:RB
4680 | daddiu PC, PC, 4
4681 | lw TMP0, TAB:RB->asize
4682 | ld TMP1, TAB:RB->array
4683 | dsll CARG3, TISNUM, 47
4684 |1: // Traverse array part.
4685 | sltu AT, RC, TMP0
4686 | beqz AT, >5 // Index points after array part?
4687 |. sll TMP3, RC, 3
4688 | daddu TMP3, TMP1, TMP3
4689 | ld CARG1, 0(TMP3)
4690 | lhu RD, -4+OFS_RD(PC)
4691 | or TMP2, RC, CARG3
4692 | beq CARG1, TISNIL, <1 // Skip holes in array part.
4693 |. addiu RC, RC, 1
4694 | sd TMP2, 0(RA)
4695 | sd CARG1, 8(RA)
4696 | or TMP0, RC, CARG3
4697 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
4698 | decode_RD4b RD
4699 | daddu RD, RD, TMP3
4700 | sw TMP0, -8+LO(RA) // Update control var.
4701 | daddu PC, PC, RD
4702 |3:
4703 | ins_next
4704 |
4705 |5: // Traverse hash part.
4706 | lw TMP1, TAB:RB->hmask
4707 | subu RC, RC, TMP0
4708 | ld TMP2, TAB:RB->node
4709 |6:
4710 | sltu AT, TMP1, RC // End of iteration? Branch to ITERL+1.
4711 | bnez AT, <3
4712 |. sll TMP3, RC, 5
4713 | sll RB, RC, 3
4714 | subu TMP3, TMP3, RB
4715 | daddu NODE:TMP3, TMP3, TMP2
4716 | ld CARG1, 0(NODE:TMP3)
4717 | lhu RD, -4+OFS_RD(PC)
4718 | beq CARG1, TISNIL, <6 // Skip holes in hash part.
4719 |. addiu RC, RC, 1
4720 | ld CARG2, NODE:TMP3->key
4721 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
4722 | sd CARG1, 8(RA)
4723 | addu RC, RC, TMP0
4724 | decode_RD4b RD
4725 | addu RD, RD, TMP3
4726 | sd CARG2, 0(RA)
4727 | daddu PC, PC, RD
4728 | b <3
4729 |. sw RC, -8+LO(RA) // Update control var.
4730 break;
4731
4732 case BC_ISNEXT:
4733 | // RA = base*8, RD = target (points to ITERN)
4734 | daddu RA, BASE, RA
4735 | srl TMP0, RD, 1
4736 | ld CFUNC:CARG1, -24(RA)
4737 | daddu TMP0, PC, TMP0
4738 | ld CARG2, -16(RA)
4739 | ld CARG3, -8(RA)
4740 | lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535)
4741 | checkfunc CFUNC:CARG1, >5
4742 | gettp CARG2, CARG2
4743 | daddiu CARG2, CARG2, -LJ_TTAB
4744 | lbu TMP1, CFUNC:CARG1->ffid
4745 | daddiu CARG3, CARG3, -LJ_TNIL
4746 | or AT, CARG2, CARG3
4747 | daddiu TMP1, TMP1, -FF_next_N
4748 | or AT, AT, TMP1
4749 | bnez AT, >5
4750 |. lui TMP1, 0xfffe
4751 | daddu PC, TMP0, TMP2
4752 | ori TMP1, TMP1, 0x7fff
4753 | dsll TMP1, TMP1, 32
4754 | sd TMP1, -8(RA)
4755 |1:
4756 | ins_next
4757 |5: // Despecialize bytecode if any of the checks fail.
4758 | li TMP3, BC_JMP
4759 | li TMP1, BC_ITERC
4760 | sb TMP3, -4+OFS_OP(PC)
4761 | daddu PC, TMP0, TMP2
4762 | b <1
4763 |. sb TMP1, OFS_OP(PC)
4764 break;
4765
4766 case BC_VARG:
4767 | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8
4768 | ld TMP0, FRAME_PC(BASE)
4769 | decode_RDtoRC8 RC, RD
4770 | decode_RB8a RB, INS
4771 | daddu RC, BASE, RC
4772 | decode_RB8b RB
4773 | daddu RA, BASE, RA
4774 | daddiu RC, RC, FRAME_VARG
4775 | daddu TMP2, RA, RB
4776 | daddiu TMP3, BASE, -16 // TMP3 = vtop
4777 | dsubu RC, RC, TMP0 // RC = vbase
4778 | // Note: RC may now be even _above_ BASE if nargs was < numparams.
4779 | beqz RB, >5 // Copy all varargs?
4780 |. dsubu TMP1, TMP3, RC
4781 | daddiu TMP2, TMP2, -16
4782 |1: // Copy vararg slots to destination slots.
4783 | ld CARG1, 0(RC)
4784 | sltu AT, RC, TMP3
4785 | daddiu RC, RC, 8
4786 |.if MIPSR6
4787 | selnez CARG1, CARG1, AT
4788 | seleqz AT, TISNIL, AT
4789 | or CARG1, CARG1, AT
4790 |.else
4791 | movz CARG1, TISNIL, AT
4792 |.endif
4793 | sd CARG1, 0(RA)
4794 | sltu AT, RA, TMP2
4795 | bnez AT, <1
4796 |. daddiu RA, RA, 8
4797 |3:
4798 | ins_next
4799 |
4800 |5: // Copy all varargs.
4801 | ld TMP0, L->maxstack
4802 | blez TMP1, <3 // No vararg slots?
4803 |. li MULTRES, 8 // MULTRES = (0+1)*8
4804 | daddu TMP2, RA, TMP1
4805 | sltu AT, TMP0, TMP2
4806 | bnez AT, >7
4807 |. daddiu MULTRES, TMP1, 8
4808 |6:
4809 | ld CRET1, 0(RC)
4810 | daddiu RC, RC, 8
4811 | sd CRET1, 0(RA)
4812 | sltu AT, RC, TMP3
4813 | bnez AT, <6 // More vararg slots?
4814 |. daddiu RA, RA, 8
4815 | b <3
4816 |. nop
4817 |
4818 |7: // Grow stack for varargs.
4819 | load_got lj_state_growstack
4820 | sd RA, L->top
4821 | dsubu RA, RA, BASE
4822 | sd BASE, L->base
4823 | dsubu BASE, RC, BASE // Need delta, because BASE may change.
4824 | sd PC, SAVE_PC
4825 | srl CARG2, TMP1, 3
4826 | call_intern lj_state_growstack // (lua_State *L, int n)
4827 |. move CARG1, L
4828 | move RC, BASE
4829 | ld BASE, L->base
4830 | daddu RA, BASE, RA
4831 | daddu RC, BASE, RC
4832 | b <6
4833 |. daddiu TMP3, BASE, -16
4834 break;
4835
4836 /* -- Returns ----------------------------------------------------------- */
4837
4838 case BC_RETM:
4839 | // RA = results*8, RD = extra_nresults*8
4840 | addu RD, RD, MULTRES // MULTRES >= 8, so RD >= 8.
4841 | // Fall through. Assumes BC_RET follows.
4842 break;
4843
4844 case BC_RET:
4845 | // RA = results*8, RD = (nresults+1)*8
4846 | ld PC, FRAME_PC(BASE)
4847 | daddu RA, BASE, RA
4848 | move MULTRES, RD
4849 |1:
4850 | andi TMP0, PC, FRAME_TYPE
4851 | bnez TMP0, ->BC_RETV_Z
4852 |. xori TMP1, PC, FRAME_VARG
4853 |
4854 |->BC_RET_Z:
4855 | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return
4856 | lw INS, -4(PC)
4857 | daddiu TMP2, BASE, -16
4858 | daddiu RC, RD, -8
4859 | decode_RA8a TMP0, INS
4860 | decode_RB8a RB, INS
4861 | decode_RA8b TMP0
4862 | decode_RB8b RB
4863 | daddu TMP3, TMP2, RB
4864 | beqz RC, >3
4865 |. dsubu BASE, TMP2, TMP0
4866 |2:
4867 | ld CRET1, 0(RA)
4868 | daddiu RA, RA, 8
4869 | daddiu RC, RC, -8
4870 | sd CRET1, 0(TMP2)
4871 | bnez RC, <2
4872 |. daddiu TMP2, TMP2, 8
4873 |3:
4874 | daddiu TMP3, TMP3, -8
4875 |5:
4876 | sltu AT, TMP2, TMP3
4877 | bnez AT, >6
4878 |. ld LFUNC:TMP1, FRAME_FUNC(BASE)
4879 | ins_next1
4880 | cleartp LFUNC:TMP1
4881 | ld TMP1, LFUNC:TMP1->pc
4882 | ld KBASE, PC2PROTO(k)(TMP1)
4883 | ins_next2
4884 |
4885 |6: // Fill up results with nil.
4886 | sd TISNIL, 0(TMP2)
4887 | b <5
4888 |. daddiu TMP2, TMP2, 8
4889 |
4890 |->BC_RETV_Z: // Non-standard return case.
4891 | andi TMP2, TMP1, FRAME_TYPEP
4892 | bnez TMP2, ->vm_return
4893 |. nop
4894 | // Return from vararg function: relocate BASE down.
4895 | dsubu BASE, BASE, TMP1
4896 | b <1
4897 |. ld PC, FRAME_PC(BASE)
4898 break;
4899
4900 case BC_RET0: case BC_RET1:
4901 | // RA = results*8, RD = (nresults+1)*8
4902 | ld PC, FRAME_PC(BASE)
4903 | daddu RA, BASE, RA
4904 | move MULTRES, RD
4905 | andi TMP0, PC, FRAME_TYPE
4906 | bnez TMP0, ->BC_RETV_Z
4907 |. xori TMP1, PC, FRAME_VARG
4908 | lw INS, -4(PC)
4909 | daddiu TMP2, BASE, -16
4910 if (op == BC_RET1) {
4911 | ld CRET1, 0(RA)
4912 }
4913 | decode_RB8a RB, INS
4914 | decode_RA8a RA, INS
4915 | decode_RB8b RB
4916 | decode_RA8b RA
4917 | dsubu BASE, TMP2, RA
4918 if (op == BC_RET1) {
4919 | sd CRET1, 0(TMP2)
4920 }
4921 |5:
4922 | sltu AT, RD, RB
4923 | bnez AT, >6
4924 |. ld TMP1, FRAME_FUNC(BASE)
4925 | ins_next1
4926 | cleartp LFUNC:TMP1
4927 | ld TMP1, LFUNC:TMP1->pc
4928 | ld KBASE, PC2PROTO(k)(TMP1)
4929 | ins_next2
4930 |
4931 |6: // Fill up results with nil.
4932 | daddiu TMP2, TMP2, 8
4933 | daddiu RD, RD, 8
4934 | b <5
4935 if (op == BC_RET1) {
4936 |. sd TISNIL, 0(TMP2)
4937 } else {
4938 |. sd TISNIL, -8(TMP2)
4939 }
4940 break;
4941
4942 /* -- Loops and branches ------------------------------------------------ */
4943
4944 case BC_FORL:
4945 |.if JIT
4946 | hotloop
4947 |.endif
4948 | // Fall through. Assumes BC_IFORL follows.
4949 break;
4950
4951 case BC_JFORI:
4952 case BC_JFORL:
4953#if !LJ_HASJIT
4954 break;
4955#endif
4956 case BC_FORI:
4957 case BC_IFORL:
4958 | // RA = base*8, RD = target (after end of loop or start of loop)
4959 vk = (op == BC_IFORL || op == BC_JFORL);
4960 | daddu RA, BASE, RA
4961 | ld CARG1, FORL_IDX*8(RA) // IDX CARG1 - CARG3 type
4962 | gettp CARG3, CARG1
4963 if (op != BC_JFORL) {
4964 | srl RD, RD, 1
4965 | lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535)
4966 | daddu TMP2, RD, TMP2
4967 }
4968 if (!vk) {
4969 | ld CARG2, FORL_STOP*8(RA) // STOP CARG2 - CARG4 type
4970 | ld CRET1, FORL_STEP*8(RA) // STEP CRET1 - CRET2 type
4971 | gettp CARG4, CARG2
4972 | bne CARG3, TISNUM, >5
4973 |. gettp CRET2, CRET1
4974 | bne CARG4, TISNUM, ->vmeta_for
4975 |. sextw CARG3, CARG1
4976 | bne CRET2, TISNUM, ->vmeta_for
4977 |. sextw CARG2, CARG2
4978 | dext AT, CRET1, 31, 0
4979 | slt CRET1, CARG2, CARG3
4980 | slt TMP1, CARG3, CARG2
4981 |.if MIPSR6
4982 | selnez TMP1, TMP1, AT
4983 | seleqz CRET1, CRET1, AT
4984 | or CRET1, CRET1, TMP1
4985 |.else
4986 | movn CRET1, TMP1, AT
4987 |.endif
4988 } else {
4989 | bne CARG3, TISNUM, >5
4990 |. ld CARG2, FORL_STEP*8(RA) // STEP CARG2 - CARG4 type
4991 | ld CRET1, FORL_STOP*8(RA) // STOP CRET1 - CRET2 type
4992 | sextw TMP3, CARG1
4993 | sextw CARG2, CARG2
4994 | sextw CRET1, CRET1
4995 | addu CARG1, TMP3, CARG2
4996 | xor TMP0, CARG1, TMP3
4997 | xor TMP1, CARG1, CARG2
4998 | and TMP0, TMP0, TMP1
4999 | slt TMP1, CARG1, CRET1
5000 | slt CRET1, CRET1, CARG1
5001 | slt AT, CARG2, r0
5002 | slt TMP0, TMP0, r0 // ((y^a) & (y^b)) < 0: overflow.
5003 |.if MIPSR6
5004 | selnez TMP1, TMP1, AT
5005 | seleqz CRET1, CRET1, AT
5006 | or CRET1, CRET1, TMP1
5007 |.else
5008 | movn CRET1, TMP1, AT
5009 |.endif
5010 | or CRET1, CRET1, TMP0
5011 | zextw CARG1, CARG1
5012 | settp CARG1, TISNUM
5013 }
5014 |1:
5015 if (op == BC_FORI) {
5016 |.if MIPSR6
5017 | selnez TMP2, TMP2, CRET1
5018 |.else
5019 | movz TMP2, r0, CRET1
5020 |.endif
5021 | daddu PC, PC, TMP2
5022 } else if (op == BC_JFORI) {
5023 | daddu PC, PC, TMP2
5024 | lhu RD, -4+OFS_RD(PC)
5025 } else if (op == BC_IFORL) {
5026 |.if MIPSR6
5027 | seleqz TMP2, TMP2, CRET1
5028 |.else
5029 | movn TMP2, r0, CRET1
5030 |.endif
5031 | daddu PC, PC, TMP2
5032 }
5033 if (vk) {
5034 | sd CARG1, FORL_IDX*8(RA)
5035 }
5036 | ins_next1
5037 | sd CARG1, FORL_EXT*8(RA)
5038 |2:
5039 if (op == BC_JFORI) {
5040 | beqz CRET1, =>BC_JLOOP
5041 |. decode_RD8b RD
5042 } else if (op == BC_JFORL) {
5043 | beqz CRET1, =>BC_JLOOP
5044 }
5045 | ins_next2
5046 |
5047 |5: // FP loop.
5048 |.if FPU
5049 if (!vk) {
5050 | ldc1 f0, FORL_IDX*8(RA)
5051 | ldc1 f2, FORL_STOP*8(RA)
5052 | sltiu TMP0, CARG3, LJ_TISNUM
5053 | sltiu TMP1, CARG4, LJ_TISNUM
5054 | sltiu AT, CRET2, LJ_TISNUM
5055 | ld TMP3, FORL_STEP*8(RA)
5056 | and TMP0, TMP0, TMP1
5057 | and AT, AT, TMP0
5058 | beqz AT, ->vmeta_for
5059 |. slt TMP3, TMP3, r0
5060 |.if MIPSR6
5061 | dmtc1 TMP3, FTMP2
5062 | cmp.lt.d FTMP0, f0, f2
5063 | cmp.lt.d FTMP1, f2, f0
5064 | sel.d FTMP2, FTMP1, FTMP0
5065 | b <1
5066 |. dmfc1 CRET1, FTMP2
5067 |.else
5068 | c.ole.d 0, f0, f2
5069 | c.ole.d 1, f2, f0
5070 | li CRET1, 1
5071 | movt CRET1, r0, 0
5072 | movt AT, r0, 1
5073 | b <1
5074 |. movn CRET1, AT, TMP3
5075 |.endif
5076 } else {
5077 | ldc1 f0, FORL_IDX*8(RA)
5078 | ldc1 f4, FORL_STEP*8(RA)
5079 | ldc1 f2, FORL_STOP*8(RA)
5080 | ld TMP3, FORL_STEP*8(RA)
5081 | add.d f0, f0, f4
5082 |.if MIPSR6
5083 | slt TMP3, TMP3, r0
5084 | dmtc1 TMP3, FTMP2
5085 | cmp.lt.d FTMP0, f0, f2
5086 | cmp.lt.d FTMP1, f2, f0
5087 | sel.d FTMP2, FTMP1, FTMP0
5088 | dmfc1 CRET1, FTMP2
5089 if (op == BC_IFORL) {
5090 | seleqz TMP2, TMP2, CRET1
5091 | daddu PC, PC, TMP2
5092 }
5093 |.else
5094 | c.ole.d 0, f0, f2
5095 | c.ole.d 1, f2, f0
5096 | slt TMP3, TMP3, r0
5097 | li CRET1, 1
5098 | li AT, 1
5099 | movt CRET1, r0, 0
5100 | movt AT, r0, 1
5101 | movn CRET1, AT, TMP3
5102 if (op == BC_IFORL) {
5103 | movn TMP2, r0, CRET1
5104 | daddu PC, PC, TMP2
5105 }
5106 |.endif
5107 | sdc1 f0, FORL_IDX*8(RA)
5108 | ins_next1
5109 | b <2
5110 |. sdc1 f0, FORL_EXT*8(RA)
5111 }
5112 |.else
5113 if (!vk) {
5114 | sltiu TMP0, CARG3, LJ_TISNUM
5115 | sltiu TMP1, CARG4, LJ_TISNUM
5116 | sltiu AT, CRET2, LJ_TISNUM
5117 | and TMP0, TMP0, TMP1
5118 | and AT, AT, TMP0
5119 | beqz AT, ->vmeta_for
5120 |. nop
5121 | bal ->vm_sfcmpolex
5122 |. lw TMP3, FORL_STEP*8+HI(RA)
5123 | b <1
5124 |. nop
5125 } else {
5126 | load_got __adddf3
5127 | call_extern
5128 |. sw TMP2, TMPD
5129 | ld CARG2, FORL_STOP*8(RA)
5130 | move CARG1, CRET1
5131 if ( op == BC_JFORL ) {
5132 | lhu RD, -4+OFS_RD(PC)
5133 | decode_RD8b RD
5134 }
5135 | bal ->vm_sfcmpolex
5136 |. lw TMP3, FORL_STEP*8+HI(RA)
5137 | b <1
5138 |. lw TMP2, TMPD
5139 }
5140 |.endif
5141 break;
5142
5143 case BC_ITERL:
5144 |.if JIT
5145 | hotloop
5146 |.endif
5147 | // Fall through. Assumes BC_IITERL follows.
5148 break;
5149
5150 case BC_JITERL:
5151#if !LJ_HASJIT
5152 break;
5153#endif
5154 case BC_IITERL:
5155 | // RA = base*8, RD = target
5156 | daddu RA, BASE, RA
5157 | ld TMP1, 0(RA)
5158 | beq TMP1, TISNIL, >1 // Stop if iterator returned nil.
5159 |. nop
5160 if (op == BC_JITERL) {
5161 | b =>BC_JLOOP
5162 |. sd TMP1, -8(RA)
5163 } else {
5164 | branch_RD // Otherwise save control var + branch.
5165 | sd TMP1, -8(RA)
5166 }
5167 |1:
5168 | ins_next
5169 break;
5170
5171 case BC_LOOP:
5172 | // RA = base*8, RD = target (loop extent)
5173 | // Note: RA/RD is only used by trace recorder to determine scope/extent
5174 | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
5175 |.if JIT
5176 | hotloop
5177 |.endif
5178 | // Fall through. Assumes BC_ILOOP follows.
5179 break;
5180
5181 case BC_ILOOP:
5182 | // RA = base*8, RD = target (loop extent)
5183 | ins_next
5184 break;
5185
5186 case BC_JLOOP:
5187 |.if JIT
5188 | // RA = base*8 (ignored), RD = traceno*8
5189 | ld TMP1, DISPATCH_J(trace)(DISPATCH)
5190 | li AT, 0
5191 | daddu TMP1, TMP1, RD
5192 | // Traces on MIPS don't store the trace number, so use 0.
5193 | sd AT, DISPATCH_GL(vmstate)(DISPATCH)
5194 | ld TRACE:TMP2, 0(TMP1)
5195 | sd BASE, DISPATCH_GL(jit_base)(DISPATCH)
5196 | ld TMP2, TRACE:TMP2->mcode
5197 | sd L, DISPATCH_GL(tmpbuf.L)(DISPATCH)
5198 | jr TMP2
5199 |. daddiu JGL, DISPATCH, GG_DISP2G+32768
5200 |.endif
5201 break;
5202
5203 case BC_JMP:
5204 | // RA = base*8 (only used by trace recorder), RD = target
5205 | branch_RD
5206 | ins_next
5207 break;
5208
5209 /* -- Function headers -------------------------------------------------- */
5210
5211 case BC_FUNCF:
5212 |.if JIT
5213 | hotcall
5214 |.endif
5215 case BC_FUNCV: /* NYI: compiled vararg functions. */
5216 | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow.
5217 break;
5218
5219 case BC_JFUNCF:
5220#if !LJ_HASJIT
5221 break;
5222#endif
5223 case BC_IFUNCF:
5224 | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8
5225 | ld TMP2, L->maxstack
5226 | lbu TMP1, -4+PC2PROTO(numparams)(PC)
5227 | ld KBASE, -4+PC2PROTO(k)(PC)
5228 | sltu AT, TMP2, RA
5229 | bnez AT, ->vm_growstack_l
5230 |. sll TMP1, TMP1, 3
5231 if (op != BC_JFUNCF) {
5232 | ins_next1
5233 }
5234 |2:
5235 | sltu AT, NARGS8:RC, TMP1 // Check for missing parameters.
5236 | bnez AT, >3
5237 |. daddu AT, BASE, NARGS8:RC
5238 if (op == BC_JFUNCF) {
5239 | decode_RD8a RD, INS
5240 | b =>BC_JLOOP
5241 |. decode_RD8b RD
5242 } else {
5243 | ins_next2
5244 }
5245 |
5246 |3: // Clear missing parameters.
5247 | sd TISNIL, 0(AT)
5248 | b <2
5249 |. addiu NARGS8:RC, NARGS8:RC, 8
5250 break;
5251
5252 case BC_JFUNCV:
5253#if !LJ_HASJIT
5254 break;
5255#endif
5256 | NYI // NYI: compiled vararg functions
5257 break; /* NYI: compiled vararg functions. */
5258
5259 case BC_IFUNCV:
5260 | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8
5261 | li TMP0, LJ_TFUNC
5262 | daddu TMP1, BASE, RC
5263 | ld TMP2, L->maxstack
5264 | settp LFUNC:RB, TMP0
5265 | daddu TMP0, RA, RC
5266 | sd LFUNC:RB, 0(TMP1) // Store (tagged) copy of LFUNC.
5267 | daddiu TMP3, RC, 16+FRAME_VARG
5268 | sltu AT, TMP0, TMP2
5269 | ld KBASE, -4+PC2PROTO(k)(PC)
5270 | beqz AT, ->vm_growstack_l
5271 |. sd TMP3, 8(TMP1) // Store delta + FRAME_VARG.
5272 | lbu TMP2, -4+PC2PROTO(numparams)(PC)
5273 | move RA, BASE
5274 | move RC, TMP1
5275 | ins_next1
5276 | beqz TMP2, >3
5277 |. daddiu BASE, TMP1, 16
5278 |1:
5279 | ld TMP0, 0(RA)
5280 | sltu AT, RA, RC // Less args than parameters?
5281 | move CARG1, TMP0
5282 |.if MIPSR6
5283 | selnez TMP0, TMP0, AT
5284 | seleqz TMP3, TISNIL, AT
5285 | or TMP0, TMP0, TMP3
5286 | seleqz TMP3, CARG1, AT
5287 | selnez CARG1, TISNIL, AT
5288 | or CARG1, CARG1, TMP3
5289 |.else
5290 | movz TMP0, TISNIL, AT // Clear missing parameters.
5291 | movn CARG1, TISNIL, AT // Clear old fixarg slot (help the GC).
5292 |.endif
5293 | addiu TMP2, TMP2, -1
5294 | sd TMP0, 16(TMP1)
5295 | daddiu TMP1, TMP1, 8
5296 | sd CARG1, 0(RA)
5297 | bnez TMP2, <1
5298 |. daddiu RA, RA, 8
5299 |3:
5300 | ins_next2
5301 break;
5302
5303 case BC_FUNCC:
5304 case BC_FUNCCW:
5305 | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8
5306 if (op == BC_FUNCC) {
5307 | ld CFUNCADDR, CFUNC:RB->f
5308 } else {
5309 | ld CFUNCADDR, DISPATCH_GL(wrapf)(DISPATCH)
5310 }
5311 | daddu TMP1, RA, NARGS8:RC
5312 | ld TMP2, L->maxstack
5313 | daddu RC, BASE, NARGS8:RC
5314 | sd BASE, L->base
5315 | sltu AT, TMP2, TMP1
5316 | sd RC, L->top
5317 | li_vmstate C
5318 if (op == BC_FUNCCW) {
5319 | ld CARG2, CFUNC:RB->f
5320 }
5321 | bnez AT, ->vm_growstack_c // Need to grow stack.
5322 |. move CARG1, L
5323 | jalr CFUNCADDR // (lua_State *L [, lua_CFunction f])
5324 |. st_vmstate
5325 | // Returns nresults.
5326 | ld BASE, L->base
5327 | sll RD, CRET1, 3
5328 | ld TMP1, L->top
5329 | li_vmstate INTERP
5330 | ld PC, FRAME_PC(BASE) // Fetch PC of caller.
5331 | dsubu RA, TMP1, RD // RA = L->top - nresults*8
5332 | sd L, DISPATCH_GL(cur_L)(DISPATCH)
5333 | b ->vm_returnc
5334 |. st_vmstate
5335 break;
5336
5337 /* ---------------------------------------------------------------------- */
5338
5339 default:
5340 fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
5341 exit(2);
5342 break;
5343 }
5344}
5345
5346static int build_backend(BuildCtx *ctx)
5347{
5348 int op;
5349
5350 dasm_growpc(Dst, BC__MAX);
5351
5352 build_subroutines(ctx);
5353
5354 |.code_op
5355 for (op = 0; op < BC__MAX; op++)
5356 build_ins(ctx, (BCOp)op, op);
5357
5358 return BC__MAX;
5359}
5360
5361/* Emit pseudo frame-info for all assembler functions. */
5362static void emit_asm_debug(BuildCtx *ctx)
5363{
5364 int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
5365 int i;
5366 switch (ctx->mode) {
5367 case BUILD_elfasm:
5368 fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
5369 fprintf(ctx->fp,
5370 ".Lframe0:\n"
5371 "\t.4byte .LECIE0-.LSCIE0\n"
5372 ".LSCIE0:\n"
5373 "\t.4byte 0xffffffff\n"
5374 "\t.byte 0x1\n"
5375 "\t.string \"\"\n"
5376 "\t.uleb128 0x1\n"
5377 "\t.sleb128 -4\n"
5378 "\t.byte 31\n"
5379 "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 0\n"
5380 "\t.align 2\n"
5381 ".LECIE0:\n\n");
5382 fprintf(ctx->fp,
5383 ".LSFDE0:\n"
5384 "\t.4byte .LEFDE0-.LASFDE0\n"
5385 ".LASFDE0:\n"
5386 "\t.4byte .Lframe0\n"
5387 "\t.8byte .Lbegin\n"
5388 "\t.8byte %d\n"
5389 "\t.byte 0xe\n\t.uleb128 %d\n"
5390 "\t.byte 0x9f\n\t.sleb128 2*5\n"
5391 "\t.byte 0x9e\n\t.sleb128 2*6\n",
5392 fcofs, CFRAME_SIZE);
5393 for (i = 23; i >= 16; i--)
5394 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(30-i));
5395#if !LJ_SOFTFP
5396 for (i = 31; i >= 24; i--)
5397 fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(46-i));
5398#endif
5399 fprintf(ctx->fp,
5400 "\t.align 2\n"
5401 ".LEFDE0:\n\n");
5402#if LJ_HASFFI
5403 fprintf(ctx->fp,
5404 ".LSFDE1:\n"
5405 "\t.4byte .LEFDE1-.LASFDE1\n"
5406 ".LASFDE1:\n"
5407 "\t.4byte .Lframe0\n"
5408 "\t.4byte lj_vm_ffi_call\n"
5409 "\t.4byte %d\n"
5410 "\t.byte 0x9f\n\t.uleb128 2*1\n"
5411 "\t.byte 0x90\n\t.uleb128 2*2\n"
5412 "\t.byte 0xd\n\t.uleb128 0x10\n"
5413 "\t.align 2\n"
5414 ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
5415#endif
5416#if !LJ_NO_UNWIND
5417 /* NYI */
5418#endif
5419 break;
5420 default:
5421 break;
5422 }
5423}
5424
diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
index 6b973d4e..4299e266 100644
--- a/src/vm_ppc.dasc
+++ b/src/vm_ppc.dasc
@@ -1,4 +1,4 @@
1|// Low-level VM code for PowerPC CPUs. 1|// Low-level VM code for PowerPC 32 bit or 32on64 bit mode.
2|// Bytecode interpreter, fast functions and helper functions. 2|// Bytecode interpreter, fast functions and helper functions.
3|// Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h 3|// Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h
4| 4|
@@ -18,7 +18,6 @@
18|// DynASM defines used by the PPC port: 18|// DynASM defines used by the PPC port:
19|// 19|//
20|// P64 64 bit pointers (only for GPR64 testing). 20|// P64 64 bit pointers (only for GPR64 testing).
21|// Note: a full PPC64 _LP64 port is not planned.
22|// GPR64 64 bit registers (but possibly 32 bit pointers, e.g. PS3). 21|// GPR64 64 bit registers (but possibly 32 bit pointers, e.g. PS3).
23|// Affects reg saves, stack layout, carry/overflow/dot flags etc. 22|// Affects reg saves, stack layout, carry/overflow/dot flags etc.
24|// FRAME32 Use 32 bit frame layout, even with GPR64 (Xbox 360). 23|// FRAME32 Use 32 bit frame layout, even with GPR64 (Xbox 360).
@@ -103,6 +102,18 @@
103|// Fixed register assignments for the interpreter. 102|// Fixed register assignments for the interpreter.
104|// Don't use: r1 = sp, r2 and r13 = reserved (TOC, TLS or SDATA) 103|// Don't use: r1 = sp, r2 and r13 = reserved (TOC, TLS or SDATA)
105| 104|
105|.macro .FPU, a, b
106|.if FPU
107| a, b
108|.endif
109|.endmacro
110|
111|.macro .FPU, a, b, c
112|.if FPU
113| a, b, c
114|.endif
115|.endmacro
116|
106|// The following must be C callee-save (but BASE is often refetched). 117|// The following must be C callee-save (but BASE is often refetched).
107|.define BASE, r14 // Base of current Lua stack frame. 118|.define BASE, r14 // Base of current Lua stack frame.
108|.define KBASE, r15 // Constants of current Lua function. 119|.define KBASE, r15 // Constants of current Lua function.
@@ -116,8 +127,10 @@
116|.define TISNUM, r22 127|.define TISNUM, r22
117|.define TISNIL, r23 128|.define TISNIL, r23
118|.define ZERO, r24 129|.define ZERO, r24
130|.if FPU
119|.define TOBIT, f30 // 2^52 + 2^51. 131|.define TOBIT, f30 // 2^52 + 2^51.
120|.define TONUM, f31 // 2^52 + 2^51 + 2^31. 132|.define TONUM, f31 // 2^52 + 2^51 + 2^31.
133|.endif
121| 134|
122|// The following temporaries are not saved across C calls, except for RA. 135|// The following temporaries are not saved across C calls, except for RA.
123|.define RA, r20 // Callee-save. 136|.define RA, r20 // Callee-save.
@@ -133,6 +146,7 @@
133| 146|
134|// Saved temporaries. 147|// Saved temporaries.
135|.define SAVE0, r21 148|.define SAVE0, r21
149|.define SAVE1, r25
136| 150|
137|// Calling conventions. 151|// Calling conventions.
138|.define CARG1, r3 152|.define CARG1, r3
@@ -141,8 +155,10 @@
141|.define CARG4, r6 // Overlaps TMP3. 155|.define CARG4, r6 // Overlaps TMP3.
142|.define CARG5, r7 // Overlaps INS. 156|.define CARG5, r7 // Overlaps INS.
143| 157|
158|.if FPU
144|.define FARG1, f1 159|.define FARG1, f1
145|.define FARG2, f2 160|.define FARG2, f2
161|.endif
146| 162|
147|.define CRET1, r3 163|.define CRET1, r3
148|.define CRET2, r4 164|.define CRET2, r4
@@ -213,10 +229,16 @@
213|.endif 229|.endif
214|.else 230|.else
215| 231|
232|.if FPU
216|.define SAVE_LR, 276(sp) 233|.define SAVE_LR, 276(sp)
217|.define CFRAME_SPACE, 272 // Delta for sp. 234|.define CFRAME_SPACE, 272 // Delta for sp.
218|// Back chain for sp: 272(sp) <-- sp entering interpreter 235|// Back chain for sp: 272(sp) <-- sp entering interpreter
219|.define SAVE_FPR_, 128 // .. 128+18*8: 64 bit FPR saves. 236|.define SAVE_FPR_, 128 // .. 128+18*8: 64 bit FPR saves.
237|.else
238|.define SAVE_LR, 132(sp)
239|.define CFRAME_SPACE, 128 // Delta for sp.
240|// Back chain for sp: 128(sp) <-- sp entering interpreter
241|.endif
220|.define SAVE_GPR_, 56 // .. 56+18*4: 32 bit GPR saves. 242|.define SAVE_GPR_, 56 // .. 56+18*4: 32 bit GPR saves.
221|.define SAVE_CR, 52(sp) // 32 bit CR save. 243|.define SAVE_CR, 52(sp) // 32 bit CR save.
222|.define SAVE_ERRF, 48(sp) // 32 bit C frame info. 244|.define SAVE_ERRF, 48(sp) // 32 bit C frame info.
@@ -226,16 +248,25 @@
226|.define SAVE_PC, 32(sp) 248|.define SAVE_PC, 32(sp)
227|.define SAVE_MULTRES, 28(sp) 249|.define SAVE_MULTRES, 28(sp)
228|.define UNUSED1, 24(sp) 250|.define UNUSED1, 24(sp)
251|.if FPU
229|.define TMPD_LO, 20(sp) 252|.define TMPD_LO, 20(sp)
230|.define TMPD_HI, 16(sp) 253|.define TMPD_HI, 16(sp)
231|.define TONUM_LO, 12(sp) 254|.define TONUM_LO, 12(sp)
232|.define TONUM_HI, 8(sp) 255|.define TONUM_HI, 8(sp)
256|.else
257|.define SFSAVE_4, 20(sp)
258|.define SFSAVE_3, 16(sp)
259|.define SFSAVE_2, 12(sp)
260|.define SFSAVE_1, 8(sp)
261|.endif
233|// Next frame lr: 4(sp) 262|// Next frame lr: 4(sp)
234|// Back chain for sp: 0(sp) <-- sp while in interpreter 263|// Back chain for sp: 0(sp) <-- sp while in interpreter
235| 264|
265|.if FPU
236|.define TMPD_BLO, 23(sp) 266|.define TMPD_BLO, 23(sp)
237|.define TMPD, TMPD_HI 267|.define TMPD, TMPD_HI
238|.define TONUM_D, TONUM_HI 268|.define TONUM_D, TONUM_HI
269|.endif
239| 270|
240|.endif 271|.endif
241| 272|
@@ -245,7 +276,7 @@
245|.else 276|.else
246| stw r..reg, SAVE_GPR_+(reg-14)*4(sp) 277| stw r..reg, SAVE_GPR_+(reg-14)*4(sp)
247|.endif 278|.endif
248| stfd f..reg, SAVE_FPR_+(reg-14)*8(sp) 279| .FPU stfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
249|.endmacro 280|.endmacro
250|.macro rest_, reg 281|.macro rest_, reg
251|.if GPR64 282|.if GPR64
@@ -253,7 +284,7 @@
253|.else 284|.else
254| lwz r..reg, SAVE_GPR_+(reg-14)*4(sp) 285| lwz r..reg, SAVE_GPR_+(reg-14)*4(sp)
255|.endif 286|.endif
256| lfd f..reg, SAVE_FPR_+(reg-14)*8(sp) 287| .FPU lfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
257|.endmacro 288|.endmacro
258| 289|
259|.macro saveregs 290|.macro saveregs
@@ -316,19 +347,14 @@
316|.type NODE, Node 347|.type NODE, Node
317|.type NARGS8, int 348|.type NARGS8, int
318|.type TRACE, GCtrace 349|.type TRACE, GCtrace
350|.type SBUF, SBuf
319| 351|
320|//----------------------------------------------------------------------- 352|//-----------------------------------------------------------------------
321| 353|
322|// These basic macros should really be part of DynASM.
323|.macro srwi, rx, ry, n; rlwinm rx, ry, 32-n, n, 31; .endmacro
324|.macro slwi, rx, ry, n; rlwinm rx, ry, n, 0, 31-n; .endmacro
325|.macro rotlwi, rx, ry, n; rlwinm rx, ry, n, 0, 31; .endmacro
326|.macro rotlw, rx, ry, rn; rlwnm rx, ry, rn, 0, 31; .endmacro
327|.macro subi, rx, ry, i; addi rx, ry, -i; .endmacro
328|
329|// Trap for not-yet-implemented parts. 354|// Trap for not-yet-implemented parts.
330|.macro NYI; tw 4, sp, sp; .endmacro 355|.macro NYI; tw 4, sp, sp; .endmacro
331| 356|
357|.if FPU
332|// int/FP conversions. 358|// int/FP conversions.
333|.macro tonum_i, freg, reg 359|.macro tonum_i, freg, reg
334| xoris reg, reg, 0x8000 360| xoris reg, reg, 0x8000
@@ -352,6 +378,7 @@
352|.macro toint, reg, freg 378|.macro toint, reg, freg
353| toint reg, freg, freg 379| toint reg, freg, freg
354|.endmacro 380|.endmacro
381|.endif
355| 382|
356|//----------------------------------------------------------------------- 383|//-----------------------------------------------------------------------
357| 384|
@@ -539,9 +566,19 @@ static void build_subroutines(BuildCtx *ctx)
539 | beq >2 566 | beq >2
540 |1: 567 |1:
541 | addic. TMP1, TMP1, -8 568 | addic. TMP1, TMP1, -8
569 |.if FPU
542 | lfd f0, 0(RA) 570 | lfd f0, 0(RA)
571 |.else
572 | lwz CARG1, 0(RA)
573 | lwz CARG2, 4(RA)
574 |.endif
543 | addi RA, RA, 8 575 | addi RA, RA, 8
576 |.if FPU
544 | stfd f0, 0(BASE) 577 | stfd f0, 0(BASE)
578 |.else
579 | stw CARG1, 0(BASE)
580 | stw CARG2, 4(BASE)
581 |.endif
545 | addi BASE, BASE, 8 582 | addi BASE, BASE, 8
546 | bney <1 583 | bney <1
547 | 584 |
@@ -619,23 +656,23 @@ static void build_subroutines(BuildCtx *ctx)
619 | .toc ld TOCREG, SAVE_TOC 656 | .toc ld TOCREG, SAVE_TOC
620 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 657 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
621 | lp BASE, L->base 658 | lp BASE, L->base
622 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 659 | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
623 | lwz DISPATCH, L->glref // Setup pointer to dispatch table. 660 | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
624 | li ZERO, 0 661 | li ZERO, 0
625 | stw TMP3, TMPD 662 | .FPU stw TMP3, TMPD
626 | li TMP1, LJ_TFALSE 663 | li TMP1, LJ_TFALSE
627 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 664 | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
628 | li TISNIL, LJ_TNIL 665 | li TISNIL, LJ_TNIL
629 | li_vmstate INTERP 666 | li_vmstate INTERP
630 | lfs TOBIT, TMPD 667 | .FPU lfs TOBIT, TMPD
631 | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame. 668 | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame.
632 | la RA, -8(BASE) // Results start at BASE-8. 669 | la RA, -8(BASE) // Results start at BASE-8.
633 | stw TMP3, TMPD 670 | .FPU stw TMP3, TMPD
634 | addi DISPATCH, DISPATCH, GG_G2DISP 671 | addi DISPATCH, DISPATCH, GG_G2DISP
635 | stw TMP1, 0(RA) // Prepend false to error message. 672 | stw TMP1, 0(RA) // Prepend false to error message.
636 | li RD, 16 // 2 results: false + error message. 673 | li RD, 16 // 2 results: false + error message.
637 | st_vmstate 674 | st_vmstate
638 | lfs TONUM, TMPD 675 | .FPU lfs TONUM, TMPD
639 | b ->vm_returnc 676 | b ->vm_returnc
640 | 677 |
641 |//----------------------------------------------------------------------- 678 |//-----------------------------------------------------------------------
@@ -684,33 +721,34 @@ static void build_subroutines(BuildCtx *ctx)
684 | stw CARG3, SAVE_NRES 721 | stw CARG3, SAVE_NRES
685 | cmplwi TMP1, 0 722 | cmplwi TMP1, 0
686 | stw CARG3, SAVE_ERRF 723 | stw CARG3, SAVE_ERRF
687 | stp TMP0, L->cframe
688 | stp CARG3, SAVE_CFRAME 724 | stp CARG3, SAVE_CFRAME
689 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. 725 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok.
726 | stp TMP0, L->cframe
690 | beq >3 727 | beq >3
691 | 728 |
692 | // Resume after yield (like a return). 729 | // Resume after yield (like a return).
730 | stw L, DISPATCH_GL(cur_L)(DISPATCH)
693 | mr RA, BASE 731 | mr RA, BASE
694 | lp BASE, L->base 732 | lp BASE, L->base
695 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 733 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
696 | lp TMP1, L->top 734 | lp TMP1, L->top
697 | lwz PC, FRAME_PC(BASE) 735 | lwz PC, FRAME_PC(BASE)
698 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 736 | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
699 | stb CARG3, L->status 737 | stb CARG3, L->status
700 | stw TMP3, TMPD 738 | .FPU stw TMP3, TMPD
701 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 739 | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
702 | lfs TOBIT, TMPD 740 | .FPU lfs TOBIT, TMPD
703 | sub RD, TMP1, BASE 741 | sub RD, TMP1, BASE
704 | stw TMP3, TMPD 742 | .FPU stw TMP3, TMPD
705 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) 743 | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
706 | addi RD, RD, 8 744 | addi RD, RD, 8
707 | stw TMP0, TONUM_HI 745 | .FPU stw TMP0, TONUM_HI
708 | li_vmstate INTERP 746 | li_vmstate INTERP
709 | li ZERO, 0 747 | li ZERO, 0
710 | st_vmstate 748 | st_vmstate
711 | andix. TMP0, PC, FRAME_TYPE 749 | andix. TMP0, PC, FRAME_TYPE
712 | mr MULTRES, RD 750 | mr MULTRES, RD
713 | lfs TONUM, TMPD 751 | .FPU lfs TONUM, TMPD
714 | li TISNIL, LJ_TNIL 752 | li TISNIL, LJ_TNIL
715 | beq ->BC_RET_Z 753 | beq ->BC_RET_Z
716 | b ->vm_return 754 | b ->vm_return
@@ -729,33 +767,34 @@ static void build_subroutines(BuildCtx *ctx)
729 | 767 |
730 |1: // Entry point for vm_pcall above (PC = ftype). 768 |1: // Entry point for vm_pcall above (PC = ftype).
731 | lp TMP1, L:CARG1->cframe 769 | lp TMP1, L:CARG1->cframe
732 | stw CARG3, SAVE_NRES
733 | mr L, CARG1 770 | mr L, CARG1
734 | stw CARG1, SAVE_L 771 | stw CARG3, SAVE_NRES
735 | mr BASE, CARG2
736 | stp sp, L->cframe // Add our C frame to cframe chain.
737 | lwz DISPATCH, L->glref // Setup pointer to dispatch table. 772 | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
773 | stw CARG1, SAVE_L
774 | mr BASE, CARG2
775 | addi DISPATCH, DISPATCH, GG_G2DISP
738 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. 776 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok.
739 | stp TMP1, SAVE_CFRAME 777 | stp TMP1, SAVE_CFRAME
740 | addi DISPATCH, DISPATCH, GG_G2DISP 778 | stp sp, L->cframe // Add our C frame to cframe chain.
741 | 779 |
742 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). 780 |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
781 | stw L, DISPATCH_GL(cur_L)(DISPATCH)
743 | lp TMP2, L->base // TMP2 = old base (used in vmeta_call). 782 | lp TMP2, L->base // TMP2 = old base (used in vmeta_call).
744 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 783 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
745 | lp TMP1, L->top 784 | lp TMP1, L->top
746 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 785 | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
747 | add PC, PC, BASE 786 | add PC, PC, BASE
748 | stw TMP3, TMPD 787 | .FPU stw TMP3, TMPD
749 | li ZERO, 0 788 | li ZERO, 0
750 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 789 | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
751 | lfs TOBIT, TMPD 790 | .FPU lfs TOBIT, TMPD
752 | sub PC, PC, TMP2 // PC = frame delta + frame type 791 | sub PC, PC, TMP2 // PC = frame delta + frame type
753 | stw TMP3, TMPD 792 | .FPU stw TMP3, TMPD
754 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) 793 | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
755 | sub NARGS8:RC, TMP1, BASE 794 | sub NARGS8:RC, TMP1, BASE
756 | stw TMP0, TONUM_HI 795 | .FPU stw TMP0, TONUM_HI
757 | li_vmstate INTERP 796 | li_vmstate INTERP
758 | lfs TONUM, TMPD 797 | .FPU lfs TONUM, TMPD
759 | li TISNIL, LJ_TNIL 798 | li TISNIL, LJ_TNIL
760 | st_vmstate 799 | st_vmstate
761 | 800 |
@@ -776,15 +815,18 @@ static void build_subroutines(BuildCtx *ctx)
776 | lwz TMP0, L:CARG1->stack 815 | lwz TMP0, L:CARG1->stack
777 | stw CARG1, SAVE_L 816 | stw CARG1, SAVE_L
778 | lp TMP1, L->top 817 | lp TMP1, L->top
818 | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
779 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. 819 | stw CARG1, SAVE_PC // Any value outside of bytecode is ok.
780 | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). 820 | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top).
781 | lp TMP1, L->cframe 821 | lp TMP1, L->cframe
782 | stp sp, L->cframe // Add our C frame to cframe chain. 822 | addi DISPATCH, DISPATCH, GG_G2DISP
783 | .toc lp CARG4, 0(CARG4) 823 | .toc lp CARG4, 0(CARG4)
784 | li TMP2, 0 824 | li TMP2, 0
785 | stw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. 825 | stw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame.
786 | stw TMP2, SAVE_ERRF // No error function. 826 | stw TMP2, SAVE_ERRF // No error function.
787 | stp TMP1, SAVE_CFRAME 827 | stp TMP1, SAVE_CFRAME
828 | stp sp, L->cframe // Add our C frame to cframe chain.
829 | stw L, DISPATCH_GL(cur_L)(DISPATCH)
788 | mtctr CARG4 830 | mtctr CARG4
789 | bctrl // (lua_State *L, lua_CFunction func, void *ud) 831 | bctrl // (lua_State *L, lua_CFunction func, void *ud)
790 |.if PPE 832 |.if PPE
@@ -793,9 +835,7 @@ static void build_subroutines(BuildCtx *ctx)
793 |.else 835 |.else
794 | mr. BASE, CRET1 836 | mr. BASE, CRET1
795 |.endif 837 |.endif
796 | lwz DISPATCH, L->glref // Setup pointer to dispatch table. 838 | li PC, FRAME_CP
797 | li PC, FRAME_CP
798 | addi DISPATCH, DISPATCH, GG_G2DISP
799 | bne <3 // Else continue with the call. 839 | bne <3 // Else continue with the call.
800 | b ->vm_leave_cp // No base? Just remove C frame. 840 | b ->vm_leave_cp // No base? Just remove C frame.
801 | 841 |
@@ -842,15 +882,30 @@ static void build_subroutines(BuildCtx *ctx)
842 | lwz INS, -4(PC) 882 | lwz INS, -4(PC)
843 | subi CARG2, RB, 16 883 | subi CARG2, RB, 16
844 | decode_RB8 SAVE0, INS 884 | decode_RB8 SAVE0, INS
885 |.if FPU
845 | lfd f0, 0(RA) 886 | lfd f0, 0(RA)
887 |.else
888 | lwz TMP2, 0(RA)
889 | lwz TMP3, 4(RA)
890 |.endif
846 | add TMP1, BASE, SAVE0 891 | add TMP1, BASE, SAVE0
847 | stp BASE, L->base 892 | stp BASE, L->base
848 | cmplw TMP1, CARG2 893 | cmplw TMP1, CARG2
849 | sub CARG3, CARG2, TMP1 894 | sub CARG3, CARG2, TMP1
850 | decode_RA8 RA, INS 895 | decode_RA8 RA, INS
896 |.if FPU
851 | stfd f0, 0(CARG2) 897 | stfd f0, 0(CARG2)
898 |.else
899 | stw TMP2, 0(CARG2)
900 | stw TMP3, 4(CARG2)
901 |.endif
852 | bney ->BC_CAT_Z 902 | bney ->BC_CAT_Z
903 |.if FPU
853 | stfdx f0, BASE, RA 904 | stfdx f0, BASE, RA
905 |.else
906 | stwux TMP2, RA, BASE
907 | stw TMP3, 4(RA)
908 |.endif
854 | b ->cont_nop 909 | b ->cont_nop
855 | 910 |
856 |//-- Table indexing metamethods ----------------------------------------- 911 |//-- Table indexing metamethods -----------------------------------------
@@ -903,9 +958,19 @@ static void build_subroutines(BuildCtx *ctx)
903 | // Returns TValue * (finished) or NULL (metamethod). 958 | // Returns TValue * (finished) or NULL (metamethod).
904 | cmplwi CRET1, 0 959 | cmplwi CRET1, 0
905 | beq >3 960 | beq >3
961 |.if FPU
906 | lfd f0, 0(CRET1) 962 | lfd f0, 0(CRET1)
963 |.else
964 | lwz TMP0, 0(CRET1)
965 | lwz TMP1, 4(CRET1)
966 |.endif
907 | ins_next1 967 | ins_next1
968 |.if FPU
908 | stfdx f0, BASE, RA 969 | stfdx f0, BASE, RA
970 |.else
971 | stwux TMP0, RA, BASE
972 | stw TMP1, 4(RA)
973 |.endif
909 | ins_next2 974 | ins_next2
910 | 975 |
911 |3: // Call __index metamethod. 976 |3: // Call __index metamethod.
@@ -918,6 +983,22 @@ static void build_subroutines(BuildCtx *ctx)
918 | li NARGS8:RC, 16 // 2 args for func(t, k). 983 | li NARGS8:RC, 16 // 2 args for func(t, k).
919 | b ->vm_call_dispatch_f 984 | b ->vm_call_dispatch_f
920 | 985 |
986 |->vmeta_tgetr:
987 | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
988 | // Returns cTValue * or NULL.
989 | cmplwi CRET1, 0
990 | beq >1
991 |.if FPU
992 | lfd f14, 0(CRET1)
993 |.else
994 | lwz SAVE0, 0(CRET1)
995 | lwz SAVE1, 4(CRET1)
996 |.endif
997 | b ->BC_TGETR_Z
998 |1:
999 | stwx TISNIL, BASE, RA
1000 | b ->cont_nop
1001 |
921 |//----------------------------------------------------------------------- 1002 |//-----------------------------------------------------------------------
922 | 1003 |
923 |->vmeta_tsets1: 1004 |->vmeta_tsets1:
@@ -967,11 +1048,21 @@ static void build_subroutines(BuildCtx *ctx)
967 | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) 1048 | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
968 | // Returns TValue * (finished) or NULL (metamethod). 1049 | // Returns TValue * (finished) or NULL (metamethod).
969 | cmplwi CRET1, 0 1050 | cmplwi CRET1, 0
1051 |.if FPU
970 | lfdx f0, BASE, RA 1052 | lfdx f0, BASE, RA
1053 |.else
1054 | lwzux TMP2, RA, BASE
1055 | lwz TMP3, 4(RA)
1056 |.endif
971 | beq >3 1057 | beq >3
972 | // NOBARRIER: lj_meta_tset ensures the table is not black. 1058 | // NOBARRIER: lj_meta_tset ensures the table is not black.
973 | ins_next1 1059 | ins_next1
1060 |.if FPU
974 | stfd f0, 0(CRET1) 1061 | stfd f0, 0(CRET1)
1062 |.else
1063 | stw TMP2, 0(CRET1)
1064 | stw TMP3, 4(CRET1)
1065 |.endif
975 | ins_next2 1066 | ins_next2
976 | 1067 |
977 |3: // Call __newindex metamethod. 1068 |3: // Call __newindex metamethod.
@@ -982,9 +1073,27 @@ static void build_subroutines(BuildCtx *ctx)
982 | add PC, TMP1, BASE 1073 | add PC, TMP1, BASE
983 | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. 1074 | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
984 | li NARGS8:RC, 24 // 3 args for func(t, k, v) 1075 | li NARGS8:RC, 24 // 3 args for func(t, k, v)
1076 |.if FPU
985 | stfd f0, 16(BASE) // Copy value to third argument. 1077 | stfd f0, 16(BASE) // Copy value to third argument.
1078 |.else
1079 | stw TMP2, 16(BASE)
1080 | stw TMP3, 20(BASE)
1081 |.endif
986 | b ->vm_call_dispatch_f 1082 | b ->vm_call_dispatch_f
987 | 1083 |
1084 |->vmeta_tsetr:
1085 | stp BASE, L->base
1086 | stw PC, SAVE_PC
1087 | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
1088 | // Returns TValue *.
1089 |.if FPU
1090 | stfd f14, 0(CRET1)
1091 |.else
1092 | stw SAVE0, 0(CRET1)
1093 | stw SAVE1, 4(CRET1)
1094 |.endif
1095 | b ->cont_nop
1096 |
988 |//-- Comparison metamethods --------------------------------------------- 1097 |//-- Comparison metamethods ---------------------------------------------
989 | 1098 |
990 |->vmeta_comp: 1099 |->vmeta_comp:
@@ -1021,9 +1130,19 @@ static void build_subroutines(BuildCtx *ctx)
1021 | 1130 |
1022 |->cont_ra: // RA = resultptr 1131 |->cont_ra: // RA = resultptr
1023 | lwz INS, -4(PC) 1132 | lwz INS, -4(PC)
1133 |.if FPU
1024 | lfd f0, 0(RA) 1134 | lfd f0, 0(RA)
1135 |.else
1136 | lwz CARG1, 0(RA)
1137 | lwz CARG2, 4(RA)
1138 |.endif
1025 | decode_RA8 TMP1, INS 1139 | decode_RA8 TMP1, INS
1140 |.if FPU
1026 | stfdx f0, BASE, TMP1 1141 | stfdx f0, BASE, TMP1
1142 |.else
1143 | stwux CARG1, TMP1, BASE
1144 | stw CARG2, 4(TMP1)
1145 |.endif
1027 | b ->cont_nop 1146 | b ->cont_nop
1028 | 1147 |
1029 |->cont_condt: // RA = resultptr 1148 |->cont_condt: // RA = resultptr
@@ -1063,6 +1182,16 @@ static void build_subroutines(BuildCtx *ctx)
1063 | b <3 1182 | b <3
1064 |.endif 1183 |.endif
1065 | 1184 |
1185 |->vmeta_istype:
1186 | subi PC, PC, 4
1187 | stp BASE, L->base
1188 | srwi CARG2, RA, 3
1189 | mr CARG1, L
1190 | srwi CARG3, RD, 3
1191 | stw PC, SAVE_PC
1192 | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
1193 | b ->cont_nop
1194 |
1066 |//-- Arithmetic metamethods --------------------------------------------- 1195 |//-- Arithmetic metamethods ---------------------------------------------
1067 | 1196 |
1068 |->vmeta_arith_nv: 1197 |->vmeta_arith_nv:
@@ -1219,22 +1348,32 @@ static void build_subroutines(BuildCtx *ctx)
1219 |.macro .ffunc_n, name 1348 |.macro .ffunc_n, name
1220 |->ff_ .. name: 1349 |->ff_ .. name:
1221 | cmplwi NARGS8:RC, 8 1350 | cmplwi NARGS8:RC, 8
1222 | lwz CARG3, 0(BASE) 1351 | lwz CARG1, 0(BASE)
1352 |.if FPU
1223 | lfd FARG1, 0(BASE) 1353 | lfd FARG1, 0(BASE)
1354 |.else
1355 | lwz CARG2, 4(BASE)
1356 |.endif
1224 | blt ->fff_fallback 1357 | blt ->fff_fallback
1225 | checknum CARG3; bge ->fff_fallback 1358 | checknum CARG1; bge ->fff_fallback
1226 |.endmacro 1359 |.endmacro
1227 | 1360 |
1228 |.macro .ffunc_nn, name 1361 |.macro .ffunc_nn, name
1229 |->ff_ .. name: 1362 |->ff_ .. name:
1230 | cmplwi NARGS8:RC, 16 1363 | cmplwi NARGS8:RC, 16
1231 | lwz CARG3, 0(BASE) 1364 | lwz CARG1, 0(BASE)
1365 |.if FPU
1232 | lfd FARG1, 0(BASE) 1366 | lfd FARG1, 0(BASE)
1233 | lwz CARG4, 8(BASE) 1367 | lwz CARG3, 8(BASE)
1234 | lfd FARG2, 8(BASE) 1368 | lfd FARG2, 8(BASE)
1369 |.else
1370 | lwz CARG2, 4(BASE)
1371 | lwz CARG3, 8(BASE)
1372 | lwz CARG4, 12(BASE)
1373 |.endif
1235 | blt ->fff_fallback 1374 | blt ->fff_fallback
1375 | checknum CARG1; bge ->fff_fallback
1236 | checknum CARG3; bge ->fff_fallback 1376 | checknum CARG3; bge ->fff_fallback
1237 | checknum CARG4; bge ->fff_fallback
1238 |.endmacro 1377 |.endmacro
1239 | 1378 |
1240 |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1. 1379 |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1.
@@ -1255,14 +1394,21 @@ static void build_subroutines(BuildCtx *ctx)
1255 | bge cr1, ->fff_fallback 1394 | bge cr1, ->fff_fallback
1256 | stw CARG3, 0(RA) 1395 | stw CARG3, 0(RA)
1257 | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8. 1396 | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
1397 | addi TMP1, BASE, 8
1398 | add TMP2, RA, NARGS8:RC
1258 | stw CARG1, 4(RA) 1399 | stw CARG1, 4(RA)
1259 | beq ->fff_res // Done if exactly 1 argument. 1400 | beq ->fff_res // Done if exactly 1 argument.
1260 | li TMP1, 8
1261 | subi RC, RC, 8
1262 |1: 1401 |1:
1263 | cmplw TMP1, RC 1402 | cmplw TMP1, TMP2
1264 | lfdx f0, BASE, TMP1 1403 |.if FPU
1265 | stfdx f0, RA, TMP1 1404 | lfd f0, 0(TMP1)
1405 | stfd f0, 0(TMP1)
1406 |.else
1407 | lwz CARG1, 0(TMP1)
1408 | lwz CARG2, 4(TMP1)
1409 | stw CARG1, -8(TMP1)
1410 | stw CARG2, -4(TMP1)
1411 |.endif
1266 | addi TMP1, TMP1, 8 1412 | addi TMP1, TMP1, 8
1267 | bney <1 1413 | bney <1
1268 | b ->fff_res 1414 | b ->fff_res
@@ -1277,8 +1423,14 @@ static void build_subroutines(BuildCtx *ctx)
1277 | orc TMP1, TMP2, TMP0 1423 | orc TMP1, TMP2, TMP0
1278 | addi TMP1, TMP1, ~LJ_TISNUM+1 1424 | addi TMP1, TMP1, ~LJ_TISNUM+1
1279 | slwi TMP1, TMP1, 3 1425 | slwi TMP1, TMP1, 3
1426 |.if FPU
1280 | la TMP2, CFUNC:RB->upvalue 1427 | la TMP2, CFUNC:RB->upvalue
1281 | lfdx FARG1, TMP2, TMP1 1428 | lfdx FARG1, TMP2, TMP1
1429 |.else
1430 | add TMP1, CFUNC:RB, TMP1
1431 | lwz CARG1, CFUNC:TMP1->upvalue[0].u32.hi
1432 | lwz CARG2, CFUNC:TMP1->upvalue[0].u32.lo
1433 |.endif
1282 | b ->fff_resn 1434 | b ->fff_resn
1283 | 1435 |
1284 |//-- Base library: getters and setters --------------------------------- 1436 |//-- Base library: getters and setters ---------------------------------
@@ -1356,7 +1508,12 @@ static void build_subroutines(BuildCtx *ctx)
1356 | mr CARG1, L 1508 | mr CARG1, L
1357 | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) 1509 | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1358 | // Returns cTValue *. 1510 | // Returns cTValue *.
1511 |.if FPU
1359 | lfd FARG1, 0(CRET1) 1512 | lfd FARG1, 0(CRET1)
1513 |.else
1514 | lwz CARG2, 4(CRET1)
1515 | lwz CARG1, 0(CRET1) // Caveat: CARG1 == CRET1.
1516 |.endif
1360 | b ->fff_resn 1517 | b ->fff_resn
1361 | 1518 |
1362 |//-- Base library: conversions ------------------------------------------ 1519 |//-- Base library: conversions ------------------------------------------
@@ -1365,7 +1522,11 @@ static void build_subroutines(BuildCtx *ctx)
1365 | // Only handles the number case inline (without a base argument). 1522 | // Only handles the number case inline (without a base argument).
1366 | cmplwi NARGS8:RC, 8 1523 | cmplwi NARGS8:RC, 8
1367 | lwz CARG1, 0(BASE) 1524 | lwz CARG1, 0(BASE)
1525 |.if FPU
1368 | lfd FARG1, 0(BASE) 1526 | lfd FARG1, 0(BASE)
1527 |.else
1528 | lwz CARG2, 4(BASE)
1529 |.endif
1369 | bne ->fff_fallback // Exactly one argument. 1530 | bne ->fff_fallback // Exactly one argument.
1370 | checknum CARG1; bgt ->fff_fallback 1531 | checknum CARG1; bgt ->fff_fallback
1371 | b ->fff_resn 1532 | b ->fff_resn
@@ -1387,9 +1548,9 @@ static void build_subroutines(BuildCtx *ctx)
1387 | mr CARG1, L 1548 | mr CARG1, L
1388 | mr CARG2, BASE 1549 | mr CARG2, BASE
1389 |.if DUALNUM 1550 |.if DUALNUM
1390 | bl extern lj_str_fromnumber // (lua_State *L, cTValue *o) 1551 | bl extern lj_strfmt_number // (lua_State *L, cTValue *o)
1391 |.else 1552 |.else
1392 | bl extern lj_str_fromnum // (lua_State *L, lua_Number *np) 1553 | bl extern lj_strfmt_num // (lua_State *L, lua_Number *np)
1393 |.endif 1554 |.endif
1394 | // Returns GCstr *. 1555 | // Returns GCstr *.
1395 | li CARG3, LJ_TSTR 1556 | li CARG3, LJ_TSTR
@@ -1416,12 +1577,23 @@ static void build_subroutines(BuildCtx *ctx)
1416 | cmplwi CRET1, 0 1577 | cmplwi CRET1, 0
1417 | li CARG3, LJ_TNIL 1578 | li CARG3, LJ_TNIL
1418 | beq ->fff_restv // End of traversal: return nil. 1579 | beq ->fff_restv // End of traversal: return nil.
1419 | lfd f0, 8(BASE) // Copy key and value to results.
1420 | la RA, -8(BASE) 1580 | la RA, -8(BASE)
1581 |.if FPU
1582 | lfd f0, 8(BASE) // Copy key and value to results.
1421 | lfd f1, 16(BASE) 1583 | lfd f1, 16(BASE)
1422 | stfd f0, 0(RA) 1584 | stfd f0, 0(RA)
1423 | li RD, (2+1)*8
1424 | stfd f1, 8(RA) 1585 | stfd f1, 8(RA)
1586 |.else
1587 | lwz CARG1, 8(BASE)
1588 | lwz CARG2, 12(BASE)
1589 | lwz CARG3, 16(BASE)
1590 | lwz CARG4, 20(BASE)
1591 | stw CARG1, 0(RA)
1592 | stw CARG2, 4(RA)
1593 | stw CARG3, 8(RA)
1594 | stw CARG4, 12(RA)
1595 |.endif
1596 | li RD, (2+1)*8
1425 | b ->fff_res 1597 | b ->fff_res
1426 | 1598 |
1427 |.ffunc_1 pairs 1599 |.ffunc_1 pairs
@@ -1430,17 +1602,32 @@ static void build_subroutines(BuildCtx *ctx)
1430 | bne ->fff_fallback 1602 | bne ->fff_fallback
1431#if LJ_52 1603#if LJ_52
1432 | lwz TAB:TMP2, TAB:CARG1->metatable 1604 | lwz TAB:TMP2, TAB:CARG1->metatable
1605 |.if FPU
1433 | lfd f0, CFUNC:RB->upvalue[0] 1606 | lfd f0, CFUNC:RB->upvalue[0]
1607 |.else
1608 | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
1609 | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
1610 |.endif
1434 | cmplwi TAB:TMP2, 0 1611 | cmplwi TAB:TMP2, 0
1435 | la RA, -8(BASE) 1612 | la RA, -8(BASE)
1436 | bne ->fff_fallback 1613 | bne ->fff_fallback
1437#else 1614#else
1615 |.if FPU
1438 | lfd f0, CFUNC:RB->upvalue[0] 1616 | lfd f0, CFUNC:RB->upvalue[0]
1617 |.else
1618 | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
1619 | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
1620 |.endif
1439 | la RA, -8(BASE) 1621 | la RA, -8(BASE)
1440#endif 1622#endif
1441 | stw TISNIL, 8(BASE) 1623 | stw TISNIL, 8(BASE)
1442 | li RD, (3+1)*8 1624 | li RD, (3+1)*8
1625 |.if FPU
1443 | stfd f0, 0(RA) 1626 | stfd f0, 0(RA)
1627 |.else
1628 | stw TMP0, 0(RA)
1629 | stw TMP1, 4(RA)
1630 |.endif
1444 | b ->fff_res 1631 | b ->fff_res
1445 | 1632 |
1446 |.ffunc ipairs_aux 1633 |.ffunc ipairs_aux
@@ -1486,14 +1673,24 @@ static void build_subroutines(BuildCtx *ctx)
1486 | stfd FARG2, 0(RA) 1673 | stfd FARG2, 0(RA)
1487 |.endif 1674 |.endif
1488 | ble >2 // Not in array part? 1675 | ble >2 // Not in array part?
1676 |.if FPU
1489 | lwzx TMP2, TMP1, TMP3 1677 | lwzx TMP2, TMP1, TMP3
1490 | lfdx f0, TMP1, TMP3 1678 | lfdx f0, TMP1, TMP3
1679 |.else
1680 | lwzux TMP2, TMP1, TMP3
1681 | lwz TMP3, 4(TMP1)
1682 |.endif
1491 |1: 1683 |1:
1492 | checknil TMP2 1684 | checknil TMP2
1493 | li RD, (0+1)*8 1685 | li RD, (0+1)*8
1494 | beq ->fff_res // End of iteration, return 0 results. 1686 | beq ->fff_res // End of iteration, return 0 results.
1495 | li RD, (2+1)*8 1687 | li RD, (2+1)*8
1688 |.if FPU
1496 | stfd f0, 8(RA) 1689 | stfd f0, 8(RA)
1690 |.else
1691 | stw TMP2, 8(RA)
1692 | stw TMP3, 12(RA)
1693 |.endif
1497 | b ->fff_res 1694 | b ->fff_res
1498 |2: // Check for empty hash part first. Otherwise call C function. 1695 |2: // Check for empty hash part first. Otherwise call C function.
1499 | lwz TMP0, TAB:CARG1->hmask 1696 | lwz TMP0, TAB:CARG1->hmask
@@ -1507,7 +1704,11 @@ static void build_subroutines(BuildCtx *ctx)
1507 | li RD, (0+1)*8 1704 | li RD, (0+1)*8
1508 | beq ->fff_res 1705 | beq ->fff_res
1509 | lwz TMP2, 0(CRET1) 1706 | lwz TMP2, 0(CRET1)
1707 |.if FPU
1510 | lfd f0, 0(CRET1) 1708 | lfd f0, 0(CRET1)
1709 |.else
1710 | lwz TMP3, 4(CRET1)
1711 |.endif
1511 | b <1 1712 | b <1
1512 | 1713 |
1513 |.ffunc_1 ipairs 1714 |.ffunc_1 ipairs
@@ -1516,12 +1717,22 @@ static void build_subroutines(BuildCtx *ctx)
1516 | bne ->fff_fallback 1717 | bne ->fff_fallback
1517#if LJ_52 1718#if LJ_52
1518 | lwz TAB:TMP2, TAB:CARG1->metatable 1719 | lwz TAB:TMP2, TAB:CARG1->metatable
1720 |.if FPU
1519 | lfd f0, CFUNC:RB->upvalue[0] 1721 | lfd f0, CFUNC:RB->upvalue[0]
1722 |.else
1723 | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
1724 | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
1725 |.endif
1520 | cmplwi TAB:TMP2, 0 1726 | cmplwi TAB:TMP2, 0
1521 | la RA, -8(BASE) 1727 | la RA, -8(BASE)
1522 | bne ->fff_fallback 1728 | bne ->fff_fallback
1523#else 1729#else
1730 |.if FPU
1524 | lfd f0, CFUNC:RB->upvalue[0] 1731 | lfd f0, CFUNC:RB->upvalue[0]
1732 |.else
1733 | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
1734 | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
1735 |.endif
1525 | la RA, -8(BASE) 1736 | la RA, -8(BASE)
1526#endif 1737#endif
1527 |.if DUALNUM 1738 |.if DUALNUM
@@ -1531,7 +1742,12 @@ static void build_subroutines(BuildCtx *ctx)
1531 |.endif 1742 |.endif
1532 | stw ZERO, 12(BASE) 1743 | stw ZERO, 12(BASE)
1533 | li RD, (3+1)*8 1744 | li RD, (3+1)*8
1745 |.if FPU
1534 | stfd f0, 0(RA) 1746 | stfd f0, 0(RA)
1747 |.else
1748 | stw TMP0, 0(RA)
1749 | stw TMP1, 4(RA)
1750 |.endif
1535 | b ->fff_res 1751 | b ->fff_res
1536 | 1752 |
1537 |//-- Base library: catch errors ---------------------------------------- 1753 |//-- Base library: catch errors ----------------------------------------
@@ -1550,19 +1766,32 @@ static void build_subroutines(BuildCtx *ctx)
1550 | 1766 |
1551 |.ffunc xpcall 1767 |.ffunc xpcall
1552 | cmplwi NARGS8:RC, 16 1768 | cmplwi NARGS8:RC, 16
1553 | lwz CARG4, 8(BASE) 1769 | lwz CARG3, 8(BASE)
1770 |.if FPU
1554 | lfd FARG2, 8(BASE) 1771 | lfd FARG2, 8(BASE)
1555 | lfd FARG1, 0(BASE) 1772 | lfd FARG1, 0(BASE)
1773 |.else
1774 | lwz CARG1, 0(BASE)
1775 | lwz CARG2, 4(BASE)
1776 | lwz CARG4, 12(BASE)
1777 |.endif
1556 | blt ->fff_fallback 1778 | blt ->fff_fallback
1557 | lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH) 1779 | lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH)
1558 | mr TMP2, BASE 1780 | mr TMP2, BASE
1559 | checkfunc CARG4; bne ->fff_fallback // Traceback must be a function. 1781 | checkfunc CARG3; bne ->fff_fallback // Traceback must be a function.
1560 | la BASE, 16(BASE) 1782 | la BASE, 16(BASE)
1561 | // Remember active hook before pcall. 1783 | // Remember active hook before pcall.
1562 | rlwinm TMP1, TMP1, 32-HOOK_ACTIVE_SHIFT, 31, 31 1784 | rlwinm TMP1, TMP1, 32-HOOK_ACTIVE_SHIFT, 31, 31
1785 |.if FPU
1563 | stfd FARG2, 0(TMP2) // Swap function and traceback. 1786 | stfd FARG2, 0(TMP2) // Swap function and traceback.
1564 | subi NARGS8:RC, NARGS8:RC, 16
1565 | stfd FARG1, 8(TMP2) 1787 | stfd FARG1, 8(TMP2)
1788 |.else
1789 | stw CARG3, 0(TMP2)
1790 | stw CARG4, 4(TMP2)
1791 | stw CARG1, 8(TMP2)
1792 | stw CARG2, 12(TMP2)
1793 |.endif
1794 | subi NARGS8:RC, NARGS8:RC, 16
1566 | addi PC, TMP1, 16+FRAME_PCALL 1795 | addi PC, TMP1, 16+FRAME_PCALL
1567 | b ->vm_call_dispatch 1796 | b ->vm_call_dispatch
1568 | 1797 |
@@ -1605,9 +1834,21 @@ static void build_subroutines(BuildCtx *ctx)
1605 | stp BASE, L->top 1834 | stp BASE, L->top
1606 |2: // Move args to coroutine. 1835 |2: // Move args to coroutine.
1607 | cmpw TMP1, NARGS8:RC 1836 | cmpw TMP1, NARGS8:RC
1837 |.if FPU
1608 | lfdx f0, BASE, TMP1 1838 | lfdx f0, BASE, TMP1
1839 |.else
1840 | add CARG3, BASE, TMP1
1841 | lwz TMP2, 0(CARG3)
1842 | lwz TMP3, 4(CARG3)
1843 |.endif
1609 | beq >3 1844 | beq >3
1845 |.if FPU
1610 | stfdx f0, CARG2, TMP1 1846 | stfdx f0, CARG2, TMP1
1847 |.else
1848 | add CARG3, CARG2, TMP1
1849 | stw TMP2, 0(CARG3)
1850 | stw TMP3, 4(CARG3)
1851 |.endif
1611 | addi TMP1, TMP1, 8 1852 | addi TMP1, TMP1, 8
1612 | b <2 1853 | b <2
1613 |3: 1854 |3:
@@ -1622,6 +1863,7 @@ static void build_subroutines(BuildCtx *ctx)
1622 | lp TMP3, L:SAVE0->top 1863 | lp TMP3, L:SAVE0->top
1623 | li_vmstate INTERP 1864 | li_vmstate INTERP
1624 | lp BASE, L->base 1865 | lp BASE, L->base
1866 | stw L, DISPATCH_GL(cur_L)(DISPATCH)
1625 | st_vmstate 1867 | st_vmstate
1626 | bgt >8 1868 | bgt >8
1627 | sub RD, TMP3, TMP2 1869 | sub RD, TMP3, TMP2
@@ -1637,8 +1879,17 @@ static void build_subroutines(BuildCtx *ctx)
1637 | stp TMP2, L:SAVE0->top // Clear coroutine stack. 1879 | stp TMP2, L:SAVE0->top // Clear coroutine stack.
1638 |5: // Move results from coroutine. 1880 |5: // Move results from coroutine.
1639 | cmplw TMP1, TMP3 1881 | cmplw TMP1, TMP3
1882 |.if FPU
1640 | lfdx f0, TMP2, TMP1 1883 | lfdx f0, TMP2, TMP1
1641 | stfdx f0, BASE, TMP1 1884 | stfdx f0, BASE, TMP1
1885 |.else
1886 | add CARG3, TMP2, TMP1
1887 | lwz CARG1, 0(CARG3)
1888 | lwz CARG2, 4(CARG3)
1889 | add CARG3, BASE, TMP1
1890 | stw CARG1, 0(CARG3)
1891 | stw CARG2, 4(CARG3)
1892 |.endif
1642 | addi TMP1, TMP1, 8 1893 | addi TMP1, TMP1, 8
1643 | bne <5 1894 | bne <5
1644 |6: 1895 |6:
@@ -1663,12 +1914,22 @@ static void build_subroutines(BuildCtx *ctx)
1663 | andix. TMP0, PC, FRAME_TYPE 1914 | andix. TMP0, PC, FRAME_TYPE
1664 | la TMP3, -8(TMP3) 1915 | la TMP3, -8(TMP3)
1665 | li TMP1, LJ_TFALSE 1916 | li TMP1, LJ_TFALSE
1917 |.if FPU
1666 | lfd f0, 0(TMP3) 1918 | lfd f0, 0(TMP3)
1919 |.else
1920 | lwz CARG1, 0(TMP3)
1921 | lwz CARG2, 4(TMP3)
1922 |.endif
1667 | stp TMP3, L:SAVE0->top // Remove error from coroutine stack. 1923 | stp TMP3, L:SAVE0->top // Remove error from coroutine stack.
1668 | li RD, (2+1)*8 1924 | li RD, (2+1)*8
1669 | stw TMP1, -8(BASE) // Prepend false to results. 1925 | stw TMP1, -8(BASE) // Prepend false to results.
1670 | la RA, -8(BASE) 1926 | la RA, -8(BASE)
1927 |.if FPU
1671 | stfd f0, 0(BASE) // Copy error message. 1928 | stfd f0, 0(BASE) // Copy error message.
1929 |.else
1930 | stw CARG1, 0(BASE) // Copy error message.
1931 | stw CARG2, 4(BASE)
1932 |.endif
1672 | b <7 1933 | b <7
1673 |.else 1934 |.else
1674 | mr CARG1, L 1935 | mr CARG1, L
@@ -1847,7 +2108,12 @@ static void build_subroutines(BuildCtx *ctx)
1847 | lus CARG1, 0x8000 // -(2^31). 2108 | lus CARG1, 0x8000 // -(2^31).
1848 | beqy ->fff_resi 2109 | beqy ->fff_resi
1849 |5: 2110 |5:
2111 |.if FPU
1850 | lfd FARG1, 0(BASE) 2112 | lfd FARG1, 0(BASE)
2113 |.else
2114 | lwz CARG1, 0(BASE)
2115 | lwz CARG2, 4(BASE)
2116 |.endif
1851 | blex func 2117 | blex func
1852 | b ->fff_resn 2118 | b ->fff_resn
1853 |.endmacro 2119 |.endmacro
@@ -1871,10 +2137,14 @@ static void build_subroutines(BuildCtx *ctx)
1871 | 2137 |
1872 |.ffunc math_log 2138 |.ffunc math_log
1873 | cmplwi NARGS8:RC, 8 2139 | cmplwi NARGS8:RC, 8
1874 | lwz CARG3, 0(BASE) 2140 | lwz CARG1, 0(BASE)
1875 | lfd FARG1, 0(BASE)
1876 | bne ->fff_fallback // Need exactly 1 argument. 2141 | bne ->fff_fallback // Need exactly 1 argument.
1877 | checknum CARG3; bge ->fff_fallback 2142 | checknum CARG1; bge ->fff_fallback
2143 |.if FPU
2144 | lfd FARG1, 0(BASE)
2145 |.else
2146 | lwz CARG2, 4(BASE)
2147 |.endif
1878 | blex log 2148 | blex log
1879 | b ->fff_resn 2149 | b ->fff_resn
1880 | 2150 |
@@ -1893,26 +2163,27 @@ static void build_subroutines(BuildCtx *ctx)
1893 | math_extern2 atan2 2163 | math_extern2 atan2
1894 | math_extern2 fmod 2164 | math_extern2 fmod
1895 | 2165 |
1896 |->ff_math_deg:
1897 |.ffunc_n math_rad
1898 | lfd FARG2, CFUNC:RB->upvalue[0]
1899 | fmul FARG1, FARG1, FARG2
1900 | b ->fff_resn
1901 |
1902 |.if DUALNUM 2166 |.if DUALNUM
1903 |.ffunc math_ldexp 2167 |.ffunc math_ldexp
1904 | cmplwi NARGS8:RC, 16 2168 | cmplwi NARGS8:RC, 16
1905 | lwz CARG3, 0(BASE) 2169 | lwz TMP0, 0(BASE)
2170 |.if FPU
1906 | lfd FARG1, 0(BASE) 2171 | lfd FARG1, 0(BASE)
1907 | lwz CARG4, 8(BASE) 2172 |.else
2173 | lwz CARG1, 0(BASE)
2174 | lwz CARG2, 4(BASE)
2175 |.endif
2176 | lwz TMP1, 8(BASE)
1908 |.if GPR64 2177 |.if GPR64
1909 | lwz CARG2, 12(BASE) 2178 | lwz CARG2, 12(BASE)
1910 |.else 2179 |.elif FPU
1911 | lwz CARG1, 12(BASE) 2180 | lwz CARG1, 12(BASE)
2181 |.else
2182 | lwz CARG3, 12(BASE)
1912 |.endif 2183 |.endif
1913 | blt ->fff_fallback 2184 | blt ->fff_fallback
1914 | checknum CARG3; bge ->fff_fallback 2185 | checknum TMP0; bge ->fff_fallback
1915 | checknum CARG4; bne ->fff_fallback 2186 | checknum TMP1; bne ->fff_fallback
1916 |.else 2187 |.else
1917 |.ffunc_nn math_ldexp 2188 |.ffunc_nn math_ldexp
1918 |.if GPR64 2189 |.if GPR64
@@ -1927,8 +2198,10 @@ static void build_subroutines(BuildCtx *ctx)
1927 |.ffunc_n math_frexp 2198 |.ffunc_n math_frexp
1928 |.if GPR64 2199 |.if GPR64
1929 | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) 2200 | la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
1930 |.else 2201 |.elif FPU
1931 | la CARG1, DISPATCH_GL(tmptv)(DISPATCH) 2202 | la CARG1, DISPATCH_GL(tmptv)(DISPATCH)
2203 |.else
2204 | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
1932 |.endif 2205 |.endif
1933 | lwz PC, FRAME_PC(BASE) 2206 | lwz PC, FRAME_PC(BASE)
1934 | blex frexp 2207 | blex frexp
@@ -1937,7 +2210,12 @@ static void build_subroutines(BuildCtx *ctx)
1937 |.if not DUALNUM 2210 |.if not DUALNUM
1938 | tonum_i FARG2, TMP1 2211 | tonum_i FARG2, TMP1
1939 |.endif 2212 |.endif
2213 |.if FPU
1940 | stfd FARG1, 0(RA) 2214 | stfd FARG1, 0(RA)
2215 |.else
2216 | stw CRET1, 0(RA)
2217 | stw CRET2, 4(RA)
2218 |.endif
1941 | li RD, (2+1)*8 2219 | li RD, (2+1)*8
1942 |.if DUALNUM 2220 |.if DUALNUM
1943 | stw TISNUM, 8(RA) 2221 | stw TISNUM, 8(RA)
@@ -1950,13 +2228,20 @@ static void build_subroutines(BuildCtx *ctx)
1950 |.ffunc_n math_modf 2228 |.ffunc_n math_modf
1951 |.if GPR64 2229 |.if GPR64
1952 | la CARG2, -8(BASE) 2230 | la CARG2, -8(BASE)
1953 |.else 2231 |.elif FPU
1954 | la CARG1, -8(BASE) 2232 | la CARG1, -8(BASE)
2233 |.else
2234 | la CARG3, -8(BASE)
1955 |.endif 2235 |.endif
1956 | lwz PC, FRAME_PC(BASE) 2236 | lwz PC, FRAME_PC(BASE)
1957 | blex modf 2237 | blex modf
1958 | la RA, -8(BASE) 2238 | la RA, -8(BASE)
2239 |.if FPU
1959 | stfd FARG1, 0(BASE) 2240 | stfd FARG1, 0(BASE)
2241 |.else
2242 | stw CRET1, 0(BASE)
2243 | stw CRET2, 4(BASE)
2244 |.endif
1960 | li RD, (2+1)*8 2245 | li RD, (2+1)*8
1961 | b ->fff_res 2246 | b ->fff_res
1962 | 2247 |
@@ -1964,13 +2249,13 @@ static void build_subroutines(BuildCtx *ctx)
1964 |.if DUALNUM 2249 |.if DUALNUM
1965 | .ffunc_1 name 2250 | .ffunc_1 name
1966 | checknum CARG3 2251 | checknum CARG3
1967 | addi TMP1, BASE, 8 2252 | addi SAVE0, BASE, 8
1968 | add TMP2, BASE, NARGS8:RC 2253 | add SAVE1, BASE, NARGS8:RC
1969 | bne >4 2254 | bne >4
1970 |1: // Handle integers. 2255 |1: // Handle integers.
1971 | lwz CARG4, 0(TMP1) 2256 | lwz CARG4, 0(SAVE0)
1972 | cmplw cr1, TMP1, TMP2 2257 | cmplw cr1, SAVE0, SAVE1
1973 | lwz CARG2, 4(TMP1) 2258 | lwz CARG2, 4(SAVE0)
1974 | bge cr1, ->fff_resi 2259 | bge cr1, ->fff_resi
1975 | checknum CARG4 2260 | checknum CARG4
1976 | xoris TMP0, CARG1, 0x8000 2261 | xoris TMP0, CARG1, 0x8000
@@ -1987,36 +2272,76 @@ static void build_subroutines(BuildCtx *ctx)
1987 |.if GPR64 2272 |.if GPR64
1988 | rldicl CARG1, CARG1, 0, 32 2273 | rldicl CARG1, CARG1, 0, 32
1989 |.endif 2274 |.endif
1990 | addi TMP1, TMP1, 8 2275 | addi SAVE0, SAVE0, 8
1991 | b <1 2276 | b <1
1992 |3: 2277 |3:
1993 | bge ->fff_fallback 2278 | bge ->fff_fallback
1994 | // Convert intermediate result to number and continue below. 2279 | // Convert intermediate result to number and continue below.
2280 |.if FPU
1995 | tonum_i FARG1, CARG1 2281 | tonum_i FARG1, CARG1
1996 | lfd FARG2, 0(TMP1) 2282 | lfd FARG2, 0(SAVE0)
2283 |.else
2284 | mr CARG2, CARG1
2285 | bl ->vm_sfi2d_1
2286 | lwz CARG3, 0(SAVE0)
2287 | lwz CARG4, 4(SAVE0)
2288 |.endif
1997 | b >6 2289 | b >6
1998 |4: 2290 |4:
2291 |.if FPU
1999 | lfd FARG1, 0(BASE) 2292 | lfd FARG1, 0(BASE)
2293 |.else
2294 | lwz CARG1, 0(BASE)
2295 | lwz CARG2, 4(BASE)
2296 |.endif
2000 | bge ->fff_fallback 2297 | bge ->fff_fallback
2001 |5: // Handle numbers. 2298 |5: // Handle numbers.
2002 | lwz CARG4, 0(TMP1) 2299 | lwz CARG3, 0(SAVE0)
2003 | cmplw cr1, TMP1, TMP2 2300 | cmplw cr1, SAVE0, SAVE1
2004 | lfd FARG2, 0(TMP1) 2301 |.if FPU
2302 | lfd FARG2, 0(SAVE0)
2303 |.else
2304 | lwz CARG4, 4(SAVE0)
2305 |.endif
2005 | bge cr1, ->fff_resn 2306 | bge cr1, ->fff_resn
2006 | checknum CARG4; bge >7 2307 | checknum CARG3; bge >7
2007 |6: 2308 |6:
2309 | addi SAVE0, SAVE0, 8
2310 |.if FPU
2008 | fsub f0, FARG1, FARG2 2311 | fsub f0, FARG1, FARG2
2009 | addi TMP1, TMP1, 8
2010 |.if ismax 2312 |.if ismax
2011 | fsel FARG1, f0, FARG1, FARG2 2313 | fsel FARG1, f0, FARG1, FARG2
2012 |.else 2314 |.else
2013 | fsel FARG1, f0, FARG2, FARG1 2315 | fsel FARG1, f0, FARG2, FARG1
2014 |.endif 2316 |.endif
2317 |.else
2318 | stw CARG1, SFSAVE_1
2319 | stw CARG2, SFSAVE_2
2320 | stw CARG3, SFSAVE_3
2321 | stw CARG4, SFSAVE_4
2322 | blex __ledf2
2323 | cmpwi CRET1, 0
2324 |.if ismax
2325 | blt >8
2326 |.else
2327 | bge >8
2328 |.endif
2329 | lwz CARG1, SFSAVE_1
2330 | lwz CARG2, SFSAVE_2
2331 | b <5
2332 |8:
2333 | lwz CARG1, SFSAVE_3
2334 | lwz CARG2, SFSAVE_4
2335 |.endif
2015 | b <5 2336 | b <5
2016 |7: // Convert integer to number and continue above. 2337 |7: // Convert integer to number and continue above.
2017 | lwz CARG2, 4(TMP1) 2338 | lwz CARG3, 4(SAVE0)
2018 | bne ->fff_fallback 2339 | bne ->fff_fallback
2019 | tonum_i FARG2, CARG2 2340 |.if FPU
2341 | tonum_i FARG2, CARG3
2342 |.else
2343 | bl ->vm_sfi2d_2
2344 |.endif
2020 | b <6 2345 | b <6
2021 |.else 2346 |.else
2022 | .ffunc_n name 2347 | .ffunc_n name
@@ -2044,11 +2369,6 @@ static void build_subroutines(BuildCtx *ctx)
2044 | 2369 |
2045 |//-- String library ----------------------------------------------------- 2370 |//-- String library -----------------------------------------------------
2046 | 2371 |
2047 |.ffunc_1 string_len
2048 | checkstr CARG3; bne ->fff_fallback
2049 | lwz CRET1, STR:CARG1->len
2050 | b ->fff_resi
2051 |
2052 |.ffunc string_byte // Only handle the 1-arg case here. 2372 |.ffunc string_byte // Only handle the 1-arg case here.
2053 | cmplwi NARGS8:RC, 8 2373 | cmplwi NARGS8:RC, 8
2054 | lwz CARG3, 0(BASE) 2374 | lwz CARG3, 0(BASE)
@@ -2103,6 +2423,7 @@ static void build_subroutines(BuildCtx *ctx)
2103 | stp BASE, L->base 2423 | stp BASE, L->base
2104 | stw PC, SAVE_PC 2424 | stw PC, SAVE_PC
2105 | bl extern lj_str_new // (lua_State *L, char *str, size_t l) 2425 | bl extern lj_str_new // (lua_State *L, char *str, size_t l)
2426 |->fff_resstr:
2106 | // Returns GCstr *. 2427 | // Returns GCstr *.
2107 | lp BASE, L->base 2428 | lp BASE, L->base
2108 | li CARG3, LJ_TSTR 2429 | li CARG3, LJ_TSTR
@@ -2180,114 +2501,29 @@ static void build_subroutines(BuildCtx *ctx)
2180 | addi TMP1, TMP1, 1 // start = 1 + (start ? start+len : 0) 2501 | addi TMP1, TMP1, 1 // start = 1 + (start ? start+len : 0)
2181 | b <3 2502 | b <3
2182 | 2503 |
2183 |.ffunc string_rep // Only handle the 1-char case inline. 2504 |.macro ffstring_op, name
2184 | ffgccheck 2505 | .ffunc string_ .. name
2185 | cmplwi NARGS8:RC, 16
2186 | lwz TMP0, 0(BASE)
2187 | lwz STR:CARG1, 4(BASE)
2188 | lwz CARG4, 8(BASE)
2189 |.if DUALNUM
2190 | lwz CARG3, 12(BASE)
2191 |.else
2192 | lfd FARG2, 8(BASE)
2193 |.endif
2194 | bne ->fff_fallback // Exactly 2 arguments.
2195 | checkstr TMP0; bne ->fff_fallback
2196 |.if DUALNUM
2197 | checknum CARG4; bne ->fff_fallback
2198 |.else
2199 | checknum CARG4; bge ->fff_fallback
2200 | toint CARG3, FARG2
2201 |.endif
2202 | lwz TMP0, STR:CARG1->len
2203 | cmpwi CARG3, 0
2204 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
2205 | ble >2 // Count <= 0? (or non-int)
2206 | cmplwi TMP0, 1
2207 | subi TMP2, CARG3, 1
2208 | blt >2 // Zero length string?
2209 | cmplw cr1, TMP1, CARG3
2210 | bne ->fff_fallback // Fallback for > 1-char strings.
2211 | lbz TMP0, STR:CARG1[1]
2212 | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
2213 | blt cr1, ->fff_fallback
2214 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
2215 | cmplwi TMP2, 0
2216 | stbx TMP0, CARG2, TMP2
2217 | subi TMP2, TMP2, 1
2218 | bne <1
2219 | b ->fff_newstr
2220 |2: // Return empty string.
2221 | la STR:CARG1, DISPATCH_GL(strempty)(DISPATCH)
2222 | li CARG3, LJ_TSTR
2223 | b ->fff_restv
2224 |
2225 |.ffunc string_reverse
2226 | ffgccheck
2227 | cmplwi NARGS8:RC, 8
2228 | lwz CARG3, 0(BASE)
2229 | lwz STR:CARG1, 4(BASE)
2230 | blt ->fff_fallback
2231 | checkstr CARG3
2232 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
2233 | bne ->fff_fallback
2234 | lwz CARG3, STR:CARG1->len
2235 | la CARG1, #STR(STR:CARG1)
2236 | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
2237 | li TMP2, 0
2238 | cmplw TMP1, CARG3
2239 | subi TMP3, CARG3, 1
2240 | blt ->fff_fallback
2241 |1: // Reverse string copy.
2242 | cmpwi TMP3, 0
2243 | lbzx TMP1, CARG1, TMP2
2244 | blty ->fff_newstr
2245 | stbx TMP1, CARG2, TMP3
2246 | subi TMP3, TMP3, 1
2247 | addi TMP2, TMP2, 1
2248 | b <1
2249 |
2250 |.macro ffstring_case, name, lo
2251 | .ffunc name
2252 | ffgccheck 2506 | ffgccheck
2253 | cmplwi NARGS8:RC, 8 2507 | cmplwi NARGS8:RC, 8
2254 | lwz CARG3, 0(BASE) 2508 | lwz CARG3, 0(BASE)
2255 | lwz STR:CARG1, 4(BASE) 2509 | lwz STR:CARG2, 4(BASE)
2256 | blt ->fff_fallback 2510 | blt ->fff_fallback
2257 | checkstr CARG3 2511 | checkstr CARG3
2258 | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) 2512 | la SBUF:CARG1, DISPATCH_GL(tmpbuf)(DISPATCH)
2259 | bne ->fff_fallback 2513 | bne ->fff_fallback
2260 | lwz CARG3, STR:CARG1->len 2514 | lwz TMP0, SBUF:CARG1->b
2261 | la CARG1, #STR(STR:CARG1) 2515 | stw L, SBUF:CARG1->L
2262 | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) 2516 | stp BASE, L->base
2263 | cmplw TMP1, CARG3 2517 | stw PC, SAVE_PC
2264 | li TMP2, 0 2518 | stw TMP0, SBUF:CARG1->p
2265 | blt ->fff_fallback 2519 | bl extern lj_buf_putstr_ .. name
2266 |1: // ASCII case conversion. 2520 | bl extern lj_buf_tostr
2267 | cmplw TMP2, CARG3 2521 | b ->fff_resstr
2268 | lbzx TMP1, CARG1, TMP2
2269 | bgey ->fff_newstr
2270 | subi TMP0, TMP1, lo
2271 | xori TMP3, TMP1, 0x20
2272 | addic TMP0, TMP0, -26
2273 | subfe TMP3, TMP3, TMP3
2274 | rlwinm TMP3, TMP3, 0, 26, 26 // x &= 0x20.
2275 | xor TMP1, TMP1, TMP3
2276 | stbx TMP1, CARG2, TMP2
2277 | addi TMP2, TMP2, 1
2278 | b <1
2279 |.endmacro 2522 |.endmacro
2280 | 2523 |
2281 |ffstring_case string_lower, 65 2524 |ffstring_op reverse
2282 |ffstring_case string_upper, 97 2525 |ffstring_op lower
2283 | 2526 |ffstring_op upper
2284 |//-- Table library ------------------------------------------------------
2285 |
2286 |.ffunc_1 table_getn
2287 | checktab CARG3; bne ->fff_fallback
2288 | bl extern lj_tab_len // (GCtab *t)
2289 | // Returns uint32_t (but less than 2^31).
2290 | b ->fff_resi
2291 | 2527 |
2292 |//-- Bit library -------------------------------------------------------- 2528 |//-- Bit library --------------------------------------------------------
2293 | 2529 |
@@ -2305,28 +2541,37 @@ static void build_subroutines(BuildCtx *ctx)
2305 | 2541 |
2306 |.macro .ffunc_bit_op, name, ins 2542 |.macro .ffunc_bit_op, name, ins
2307 | .ffunc_bit name 2543 | .ffunc_bit name
2308 | addi TMP1, BASE, 8 2544 | addi SAVE0, BASE, 8
2309 | add TMP2, BASE, NARGS8:RC 2545 | add SAVE1, BASE, NARGS8:RC
2310 |1: 2546 |1:
2311 | lwz CARG4, 0(TMP1) 2547 | lwz CARG4, 0(SAVE0)
2312 | cmplw cr1, TMP1, TMP2 2548 | cmplw cr1, SAVE0, SAVE1
2313 |.if DUALNUM 2549 |.if DUALNUM
2314 | lwz CARG2, 4(TMP1) 2550 | lwz CARG2, 4(SAVE0)
2315 |.else 2551 |.else
2316 | lfd FARG1, 0(TMP1) 2552 | lfd FARG1, 0(SAVE0)
2317 |.endif 2553 |.endif
2318 | bgey cr1, ->fff_resi 2554 | bgey cr1, ->fff_resi
2319 | checknum CARG4 2555 | checknum CARG4
2320 |.if DUALNUM 2556 |.if DUALNUM
2557 |.if FPU
2321 | bnel ->fff_bitop_fb 2558 | bnel ->fff_bitop_fb
2322 |.else 2559 |.else
2560 | beq >3
2561 | stw CARG1, SFSAVE_1
2562 | bl ->fff_bitop_fb
2563 | mr CARG2, CARG1
2564 | lwz CARG1, SFSAVE_1
2565 |3:
2566 |.endif
2567 |.else
2323 | fadd FARG1, FARG1, TOBIT 2568 | fadd FARG1, FARG1, TOBIT
2324 | bge ->fff_fallback 2569 | bge ->fff_fallback
2325 | stfd FARG1, TMPD 2570 | stfd FARG1, TMPD
2326 | lwz CARG2, TMPD_LO 2571 | lwz CARG2, TMPD_LO
2327 |.endif 2572 |.endif
2328 | ins CARG1, CARG1, CARG2 2573 | ins CARG1, CARG1, CARG2
2329 | addi TMP1, TMP1, 8 2574 | addi SAVE0, SAVE0, 8
2330 | b <1 2575 | b <1
2331 |.endmacro 2576 |.endmacro
2332 | 2577 |
@@ -2348,7 +2593,14 @@ static void build_subroutines(BuildCtx *ctx)
2348 |.macro .ffunc_bit_sh, name, ins, shmod 2593 |.macro .ffunc_bit_sh, name, ins, shmod
2349 |.if DUALNUM 2594 |.if DUALNUM
2350 | .ffunc_2 bit_..name 2595 | .ffunc_2 bit_..name
2596 |.if FPU
2351 | checknum CARG3; bnel ->fff_tobit_fb 2597 | checknum CARG3; bnel ->fff_tobit_fb
2598 |.else
2599 | checknum CARG3; beq >1
2600 | bl ->fff_tobit_fb
2601 | lwz CARG2, 12(BASE) // Conversion polluted CARG2.
2602 |1:
2603 |.endif
2352 | // Note: no inline conversion from number for 2nd argument! 2604 | // Note: no inline conversion from number for 2nd argument!
2353 | checknum CARG4; bne ->fff_fallback 2605 | checknum CARG4; bne ->fff_fallback
2354 |.else 2606 |.else
@@ -2385,27 +2637,77 @@ static void build_subroutines(BuildCtx *ctx)
2385 |->fff_resn: 2637 |->fff_resn:
2386 | lwz PC, FRAME_PC(BASE) 2638 | lwz PC, FRAME_PC(BASE)
2387 | la RA, -8(BASE) 2639 | la RA, -8(BASE)
2640 |.if FPU
2388 | stfd FARG1, -8(BASE) 2641 | stfd FARG1, -8(BASE)
2642 |.else
2643 | stw CARG1, -8(BASE)
2644 | stw CARG2, -4(BASE)
2645 |.endif
2389 | b ->fff_res1 2646 | b ->fff_res1
2390 | 2647 |
2391 |// Fallback FP number to bit conversion. 2648 |// Fallback FP number to bit conversion.
2392 |->fff_tobit_fb: 2649 |->fff_tobit_fb:
2393 |.if DUALNUM 2650 |.if DUALNUM
2651 |.if FPU
2394 | lfd FARG1, 0(BASE) 2652 | lfd FARG1, 0(BASE)
2395 | bgt ->fff_fallback 2653 | bgt ->fff_fallback
2396 | fadd FARG1, FARG1, TOBIT 2654 | fadd FARG1, FARG1, TOBIT
2397 | stfd FARG1, TMPD 2655 | stfd FARG1, TMPD
2398 | lwz CARG1, TMPD_LO 2656 | lwz CARG1, TMPD_LO
2399 | blr 2657 | blr
2658 |.else
2659 | bgt ->fff_fallback
2660 | mr CARG2, CARG1
2661 | mr CARG1, CARG3
2662 |// Modifies: CARG1, CARG2, TMP0, TMP1, TMP2.
2663 |->vm_tobit:
2664 | slwi TMP2, CARG1, 1
2665 | addis TMP2, TMP2, 0x0020
2666 | cmpwi TMP2, 0
2667 | bge >2
2668 | li TMP1, 0x3e0
2669 | srawi TMP2, TMP2, 21
2670 | not TMP1, TMP1
2671 | sub. TMP2, TMP1, TMP2
2672 | cmpwi cr7, CARG1, 0
2673 | blt >1
2674 | slwi TMP1, CARG1, 11
2675 | srwi TMP0, CARG2, 21
2676 | oris TMP1, TMP1, 0x8000
2677 | or TMP1, TMP1, TMP0
2678 | srw CARG1, TMP1, TMP2
2679 | bclr 4, 28 // Return if cr7[lt] == 0, no hint.
2680 | neg CARG1, CARG1
2681 | blr
2682 |1:
2683 | addi TMP2, TMP2, 21
2684 | srw TMP1, CARG2, TMP2
2685 | slwi CARG2, CARG1, 12
2686 | subfic TMP2, TMP2, 20
2687 | slw TMP0, CARG2, TMP2
2688 | or CARG1, TMP1, TMP0
2689 | bclr 4, 28 // Return if cr7[lt] == 0, no hint.
2690 | neg CARG1, CARG1
2691 | blr
2692 |2:
2693 | li CARG1, 0
2694 | blr
2695 |.endif
2400 |.endif 2696 |.endif
2401 |->fff_bitop_fb: 2697 |->fff_bitop_fb:
2402 |.if DUALNUM 2698 |.if DUALNUM
2403 | lfd FARG1, 0(TMP1) 2699 |.if FPU
2700 | lfd FARG1, 0(SAVE0)
2404 | bgt ->fff_fallback 2701 | bgt ->fff_fallback
2405 | fadd FARG1, FARG1, TOBIT 2702 | fadd FARG1, FARG1, TOBIT
2406 | stfd FARG1, TMPD 2703 | stfd FARG1, TMPD
2407 | lwz CARG2, TMPD_LO 2704 | lwz CARG2, TMPD_LO
2408 | blr 2705 | blr
2706 |.else
2707 | bgt ->fff_fallback
2708 | mr CARG1, CARG4
2709 | b ->vm_tobit
2710 |.endif
2409 |.endif 2711 |.endif
2410 | 2712 |
2411 |//----------------------------------------------------------------------- 2713 |//-----------------------------------------------------------------------
@@ -2589,15 +2891,88 @@ static void build_subroutines(BuildCtx *ctx)
2589 | mtctr CRET1 2891 | mtctr CRET1
2590 | bctr 2892 | bctr
2591 | 2893 |
2894 |->cont_stitch: // Trace stitching.
2895 |.if JIT
2896 | // RA = resultptr, RB = meta base
2897 | lwz INS, -4(PC)
2898 | lwz TRACE:TMP2, -20(RB) // Save previous trace.
2899 | addic. TMP1, MULTRES, -8
2900 | decode_RA8 RC, INS // Call base.
2901 | beq >2
2902 |1: // Move results down.
2903 |.if FPU
2904 | lfd f0, 0(RA)
2905 |.else
2906 | lwz CARG1, 0(RA)
2907 | lwz CARG2, 4(RA)
2908 |.endif
2909 | addic. TMP1, TMP1, -8
2910 | addi RA, RA, 8
2911 |.if FPU
2912 | stfdx f0, BASE, RC
2913 |.else
2914 | add CARG3, BASE, RC
2915 | stw CARG1, 0(CARG3)
2916 | stw CARG2, 4(CARG3)
2917 |.endif
2918 | addi RC, RC, 8
2919 | bne <1
2920 |2:
2921 | decode_RA8 RA, INS
2922 | decode_RB8 RB, INS
2923 | add RA, RA, RB
2924 |3:
2925 | cmplw RA, RC
2926 | bgt >9 // More results wanted?
2927 |
2928 | lhz TMP3, TRACE:TMP2->traceno
2929 | lhz RD, TRACE:TMP2->link
2930 | cmpw RD, TMP3
2931 | cmpwi cr1, RD, 0
2932 | beq ->cont_nop // Blacklisted.
2933 | slwi RD, RD, 3
2934 | bne cr1, =>BC_JLOOP // Jump to stitched trace.
2935 |
2936 | // Stitch a new trace to the previous trace.
2937 | stw TMP3, DISPATCH_J(exitno)(DISPATCH)
2938 | stp L, DISPATCH_J(L)(DISPATCH)
2939 | stp BASE, L->base
2940 | addi CARG1, DISPATCH, GG_DISP2J
2941 | mr CARG2, PC
2942 | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
2943 | lp BASE, L->base
2944 | b ->cont_nop
2945 |
2946 |9:
2947 | stwx TISNIL, BASE, RC
2948 | addi RC, RC, 8
2949 | b <3
2950 |.endif
2951 |
2952 |->vm_profhook: // Dispatch target for profiler hook.
2953#if LJ_HASPROFILE
2954 | mr CARG1, L
2955 | stw MULTRES, SAVE_MULTRES
2956 | mr CARG2, PC
2957 | stp BASE, L->base
2958 | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
2959 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2960 | lp BASE, L->base
2961 | subi PC, PC, 4
2962 | b ->cont_nop
2963#endif
2964 |
2592 |//----------------------------------------------------------------------- 2965 |//-----------------------------------------------------------------------
2593 |//-- Trace exit handler ------------------------------------------------- 2966 |//-- Trace exit handler -------------------------------------------------
2594 |//----------------------------------------------------------------------- 2967 |//-----------------------------------------------------------------------
2595 | 2968 |
2596 |.macro savex_, a, b, c, d 2969 |.macro savex_, a, b, c, d
2970 |.if FPU
2597 | stfd f..a, 16+a*8(sp) 2971 | stfd f..a, 16+a*8(sp)
2598 | stfd f..b, 16+b*8(sp) 2972 | stfd f..b, 16+b*8(sp)
2599 | stfd f..c, 16+c*8(sp) 2973 | stfd f..c, 16+c*8(sp)
2600 | stfd f..d, 16+d*8(sp) 2974 | stfd f..d, 16+d*8(sp)
2975 |.endif
2601 |.endmacro 2976 |.endmacro
2602 | 2977 |
2603 |->vm_exit_handler: 2978 |->vm_exit_handler:
@@ -2623,16 +2998,16 @@ static void build_subroutines(BuildCtx *ctx)
2623 | savex_ 20,21,22,23 2998 | savex_ 20,21,22,23
2624 | lhz CARG4, 2(CARG3) // Load trace number. 2999 | lhz CARG4, 2(CARG3) // Load trace number.
2625 | savex_ 24,25,26,27 3000 | savex_ 24,25,26,27
2626 | lwz L, DISPATCH_GL(jit_L)(DISPATCH) 3001 | lwz L, DISPATCH_GL(cur_L)(DISPATCH)
2627 | savex_ 28,29,30,31 3002 | savex_ 28,29,30,31
2628 | sub CARG3, TMP0, CARG3 // Compute exit number. 3003 | sub CARG3, TMP0, CARG3 // Compute exit number.
2629 | lp BASE, DISPATCH_GL(jit_base)(DISPATCH) 3004 | lp BASE, DISPATCH_GL(jit_base)(DISPATCH)
2630 | srwi CARG3, CARG3, 2 3005 | srwi CARG3, CARG3, 2
2631 | stw L, DISPATCH_J(L)(DISPATCH) 3006 | stp L, DISPATCH_J(L)(DISPATCH)
2632 | subi CARG3, CARG3, 2 3007 | subi CARG3, CARG3, 2
2633 | stw TMP1, DISPATCH_GL(jit_L)(DISPATCH)
2634 | stw CARG4, DISPATCH_J(parent)(DISPATCH)
2635 | stp BASE, L->base 3008 | stp BASE, L->base
3009 | stw CARG4, DISPATCH_J(parent)(DISPATCH)
3010 | stw TMP1, DISPATCH_GL(jit_base)(DISPATCH)
2636 | addi CARG1, DISPATCH, GG_DISP2J 3011 | addi CARG1, DISPATCH, GG_DISP2J
2637 | stw CARG3, DISPATCH_J(exitno)(DISPATCH) 3012 | stw CARG3, DISPATCH_J(exitno)(DISPATCH)
2638 | addi CARG2, sp, 16 3013 | addi CARG2, sp, 16
@@ -2656,28 +3031,29 @@ static void build_subroutines(BuildCtx *ctx)
2656 | // CARG1 = MULTRES or negated error code, BASE, PC and JGL set. 3031 | // CARG1 = MULTRES or negated error code, BASE, PC and JGL set.
2657 | lwz L, SAVE_L 3032 | lwz L, SAVE_L
2658 | addi DISPATCH, JGL, -GG_DISP2G-32768 3033 | addi DISPATCH, JGL, -GG_DISP2G-32768
3034 | stp BASE, L->base
2659 |1: 3035 |1:
2660 | cmpwi CARG1, 0 3036 | cmpwi CARG1, 0
2661 | blt >3 // Check for error from exit. 3037 | blt >9 // Check for error from exit.
2662 | lwz LFUNC:TMP1, FRAME_FUNC(BASE) 3038 | lwz LFUNC:RB, FRAME_FUNC(BASE)
2663 | slwi MULTRES, CARG1, 3 3039 | slwi MULTRES, CARG1, 3
2664 | li TMP2, 0 3040 | li TMP2, 0
2665 | stw MULTRES, SAVE_MULTRES 3041 | stw MULTRES, SAVE_MULTRES
2666 | lwz TMP1, LFUNC:TMP1->pc 3042 | lwz TMP1, LFUNC:RB->pc
2667 | stw TMP2, DISPATCH_GL(jit_L)(DISPATCH) 3043 | stw TMP2, DISPATCH_GL(jit_base)(DISPATCH)
2668 | lwz KBASE, PC2PROTO(k)(TMP1) 3044 | lwz KBASE, PC2PROTO(k)(TMP1)
2669 | // Setup type comparison constants. 3045 | // Setup type comparison constants.
2670 | li TISNUM, LJ_TISNUM 3046 | li TISNUM, LJ_TISNUM
2671 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 3047 | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2672 | stw TMP3, TMPD 3048 | .FPU stw TMP3, TMPD
2673 | li ZERO, 0 3049 | li ZERO, 0
2674 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 3050 | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
2675 | lfs TOBIT, TMPD 3051 | .FPU lfs TOBIT, TMPD
2676 | stw TMP3, TMPD 3052 | .FPU stw TMP3, TMPD
2677 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) 3053 | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
2678 | li TISNIL, LJ_TNIL 3054 | li TISNIL, LJ_TNIL
2679 | stw TMP0, TONUM_HI 3055 | .FPU stw TMP0, TONUM_HI
2680 | lfs TONUM, TMPD 3056 | .FPU lfs TONUM, TMPD
2681 | // Modified copy of ins_next which handles function header dispatch, too. 3057 | // Modified copy of ins_next which handles function header dispatch, too.
2682 | lwz INS, 0(PC) 3058 | lwz INS, 0(PC)
2683 | addi PC, PC, 4 3059 | addi PC, PC, 4
@@ -2694,11 +3070,25 @@ static void build_subroutines(BuildCtx *ctx)
2694 | decode_RC8 RC, INS 3070 | decode_RC8 RC, INS
2695 | bctr 3071 | bctr
2696 |2: 3072 |2:
3073 | cmplwi TMP1, (BC_FUNCC+2)*4 // Fast function?
3074 | blt >3
3075 | // Check frame below fast function.
3076 | lwz TMP1, FRAME_PC(BASE)
3077 | andix. TMP0, TMP1, FRAME_TYPE
3078 | bney >3 // Trace stitching continuation?
3079 | // Otherwise set KBASE for Lua function below fast function.
3080 | lwz TMP2, -4(TMP1)
3081 | decode_RA8 TMP0, TMP2
3082 | sub TMP1, BASE, TMP0
3083 | lwz LFUNC:TMP2, -12(TMP1)
3084 | lwz TMP1, LFUNC:TMP2->pc
3085 | lwz KBASE, PC2PROTO(k)(TMP1)
3086 |3:
2697 | subi RC, MULTRES, 8 3087 | subi RC, MULTRES, 8
2698 | add RA, RA, BASE 3088 | add RA, RA, BASE
2699 | bctr 3089 | bctr
2700 | 3090 |
2701 |3: // Rethrow error from the right C frame. 3091 |9: // Rethrow error from the right C frame.
2702 | neg CARG2, CARG1 3092 | neg CARG2, CARG1
2703 | mr CARG1, L 3093 | mr CARG1, L
2704 | bl extern lj_err_throw // (lua_State *L, int errcode) 3094 | bl extern lj_err_throw // (lua_State *L, int errcode)
@@ -2708,7 +3098,35 @@ static void build_subroutines(BuildCtx *ctx)
2708 |//-- Math helper functions ---------------------------------------------- 3098 |//-- Math helper functions ----------------------------------------------
2709 |//----------------------------------------------------------------------- 3099 |//-----------------------------------------------------------------------
2710 | 3100 |
2711 |// NYI: Use internal implementations of floor, ceil, trunc. 3101 |// NYI: Use internal implementations of floor, ceil, trunc, sfcmp.
3102 |
3103 |.macro sfi2d, AHI, ALO
3104 |.if not FPU
3105 | mr. AHI, ALO
3106 | bclr 12, 2 // Handle zero first.
3107 | srawi TMP0, ALO, 31
3108 | xor TMP1, ALO, TMP0
3109 | sub TMP1, TMP1, TMP0 // Absolute value in TMP1.
3110 | cntlzw AHI, TMP1
3111 | andix. TMP0, TMP0, 0x800 // Mask sign bit.
3112 | slw TMP1, TMP1, AHI // Align mantissa left with leading 1.
3113 | subfic AHI, AHI, 0x3ff+31-1 // Exponent -1 in AHI.
3114 | slwi ALO, TMP1, 21
3115 | or AHI, AHI, TMP0 // Sign | Exponent.
3116 | srwi TMP1, TMP1, 11
3117 | slwi AHI, AHI, 20 // Align left.
3118 | add AHI, AHI, TMP1 // Add mantissa, increment exponent.
3119 | blr
3120 |.endif
3121 |.endmacro
3122 |
3123 |// Input: CARG2. Output: CARG1, CARG2. Temporaries: TMP0, TMP1.
3124 |->vm_sfi2d_1:
3125 | sfi2d CARG1, CARG2
3126 |
3127 |// Input: CARG4. Output: CARG3, CARG4. Temporaries: TMP0, TMP1.
3128 |->vm_sfi2d_2:
3129 | sfi2d CARG3, CARG4
2712 | 3130 |
2713 |->vm_modi: 3131 |->vm_modi:
2714 | divwo. TMP0, CARG1, CARG2 3132 | divwo. TMP0, CARG1, CARG2
@@ -2776,21 +3194,21 @@ static void build_subroutines(BuildCtx *ctx)
2776 | addi DISPATCH, r12, GG_G2DISP 3194 | addi DISPATCH, r12, GG_G2DISP
2777 | stw r11, CTSTATE->cb.slot 3195 | stw r11, CTSTATE->cb.slot
2778 | stw r3, CTSTATE->cb.gpr[0] 3196 | stw r3, CTSTATE->cb.gpr[0]
2779 | stfd f1, CTSTATE->cb.fpr[0] 3197 | .FPU stfd f1, CTSTATE->cb.fpr[0]
2780 | stw r4, CTSTATE->cb.gpr[1] 3198 | stw r4, CTSTATE->cb.gpr[1]
2781 | stfd f2, CTSTATE->cb.fpr[1] 3199 | .FPU stfd f2, CTSTATE->cb.fpr[1]
2782 | stw r5, CTSTATE->cb.gpr[2] 3200 | stw r5, CTSTATE->cb.gpr[2]
2783 | stfd f3, CTSTATE->cb.fpr[2] 3201 | .FPU stfd f3, CTSTATE->cb.fpr[2]
2784 | stw r6, CTSTATE->cb.gpr[3] 3202 | stw r6, CTSTATE->cb.gpr[3]
2785 | stfd f4, CTSTATE->cb.fpr[3] 3203 | .FPU stfd f4, CTSTATE->cb.fpr[3]
2786 | stw r7, CTSTATE->cb.gpr[4] 3204 | stw r7, CTSTATE->cb.gpr[4]
2787 | stfd f5, CTSTATE->cb.fpr[4] 3205 | .FPU stfd f5, CTSTATE->cb.fpr[4]
2788 | stw r8, CTSTATE->cb.gpr[5] 3206 | stw r8, CTSTATE->cb.gpr[5]
2789 | stfd f6, CTSTATE->cb.fpr[5] 3207 | .FPU stfd f6, CTSTATE->cb.fpr[5]
2790 | stw r9, CTSTATE->cb.gpr[6] 3208 | stw r9, CTSTATE->cb.gpr[6]
2791 | stfd f7, CTSTATE->cb.fpr[6] 3209 | .FPU stfd f7, CTSTATE->cb.fpr[6]
2792 | stw r10, CTSTATE->cb.gpr[7] 3210 | stw r10, CTSTATE->cb.gpr[7]
2793 | stfd f8, CTSTATE->cb.fpr[7] 3211 | .FPU stfd f8, CTSTATE->cb.fpr[7]
2794 | addi TMP0, sp, CFRAME_SPACE+8 3212 | addi TMP0, sp, CFRAME_SPACE+8
2795 | stw TMP0, CTSTATE->cb.stack 3213 | stw TMP0, CTSTATE->cb.stack
2796 | mr CARG1, CTSTATE 3214 | mr CARG1, CTSTATE
@@ -2801,21 +3219,21 @@ static void build_subroutines(BuildCtx *ctx)
2801 | lp BASE, L:CRET1->base 3219 | lp BASE, L:CRET1->base
2802 | li TISNUM, LJ_TISNUM // Setup type comparison constants. 3220 | li TISNUM, LJ_TISNUM // Setup type comparison constants.
2803 | lp RC, L:CRET1->top 3221 | lp RC, L:CRET1->top
2804 | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). 3222 | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
2805 | li ZERO, 0 3223 | li ZERO, 0
2806 | mr L, CRET1 3224 | mr L, CRET1
2807 | stw TMP3, TMPD 3225 | .FPU stw TMP3, TMPD
2808 | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) 3226 | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
2809 | lwz LFUNC:RB, FRAME_FUNC(BASE) 3227 | lwz LFUNC:RB, FRAME_FUNC(BASE)
2810 | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). 3228 | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
2811 | stw TMP0, TONUM_HI 3229 | .FPU stw TMP0, TONUM_HI
2812 | li TISNIL, LJ_TNIL 3230 | li TISNIL, LJ_TNIL
2813 | li_vmstate INTERP 3231 | li_vmstate INTERP
2814 | lfs TOBIT, TMPD 3232 | .FPU lfs TOBIT, TMPD
2815 | stw TMP3, TMPD 3233 | .FPU stw TMP3, TMPD
2816 | sub RC, RC, BASE 3234 | sub RC, RC, BASE
2817 | st_vmstate 3235 | st_vmstate
2818 | lfs TONUM, TMPD 3236 | .FPU lfs TONUM, TMPD
2819 | ins_callt 3237 | ins_callt
2820 |.endif 3238 |.endif
2821 | 3239 |
@@ -2829,7 +3247,7 @@ static void build_subroutines(BuildCtx *ctx)
2829 | mr CARG2, RA 3247 | mr CARG2, RA
2830 | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) 3248 | bl extern lj_ccallback_leave // (CTState *cts, TValue *o)
2831 | lwz CRET1, CTSTATE->cb.gpr[0] 3249 | lwz CRET1, CTSTATE->cb.gpr[0]
2832 | lfd FARG1, CTSTATE->cb.fpr[0] 3250 | .FPU lfd FARG1, CTSTATE->cb.fpr[0]
2833 | lwz CRET2, CTSTATE->cb.gpr[1] 3251 | lwz CRET2, CTSTATE->cb.gpr[1]
2834 | b ->vm_leave_unw 3252 | b ->vm_leave_unw
2835 |.endif 3253 |.endif
@@ -2863,14 +3281,14 @@ static void build_subroutines(BuildCtx *ctx)
2863 | bge <1 3281 | bge <1
2864 |2: 3282 |2:
2865 | bney cr1, >3 3283 | bney cr1, >3
2866 | lfd f1, CCSTATE->fpr[0] 3284 | .FPU lfd f1, CCSTATE->fpr[0]
2867 | lfd f2, CCSTATE->fpr[1] 3285 | .FPU lfd f2, CCSTATE->fpr[1]
2868 | lfd f3, CCSTATE->fpr[2] 3286 | .FPU lfd f3, CCSTATE->fpr[2]
2869 | lfd f4, CCSTATE->fpr[3] 3287 | .FPU lfd f4, CCSTATE->fpr[3]
2870 | lfd f5, CCSTATE->fpr[4] 3288 | .FPU lfd f5, CCSTATE->fpr[4]
2871 | lfd f6, CCSTATE->fpr[5] 3289 | .FPU lfd f6, CCSTATE->fpr[5]
2872 | lfd f7, CCSTATE->fpr[6] 3290 | .FPU lfd f7, CCSTATE->fpr[6]
2873 | lfd f8, CCSTATE->fpr[7] 3291 | .FPU lfd f8, CCSTATE->fpr[7]
2874 |3: 3292 |3:
2875 | lp TMP0, CCSTATE->func 3293 | lp TMP0, CCSTATE->func
2876 | lwz CARG2, CCSTATE->gpr[1] 3294 | lwz CARG2, CCSTATE->gpr[1]
@@ -2887,7 +3305,7 @@ static void build_subroutines(BuildCtx *ctx)
2887 | lwz TMP2, -4(r14) 3305 | lwz TMP2, -4(r14)
2888 | lwz TMP0, 4(r14) 3306 | lwz TMP0, 4(r14)
2889 | stw CARG1, CCSTATE:TMP1->gpr[0] 3307 | stw CARG1, CCSTATE:TMP1->gpr[0]
2890 | stfd FARG1, CCSTATE:TMP1->fpr[0] 3308 | .FPU stfd FARG1, CCSTATE:TMP1->fpr[0]
2891 | stw CARG2, CCSTATE:TMP1->gpr[1] 3309 | stw CARG2, CCSTATE:TMP1->gpr[1]
2892 | mtlr TMP0 3310 | mtlr TMP0
2893 | stw CARG3, CCSTATE:TMP1->gpr[2] 3311 | stw CARG3, CCSTATE:TMP1->gpr[2]
@@ -2916,19 +3334,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2916 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: 3334 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
2917 | // RA = src1*8, RD = src2*8, JMP with RD = target 3335 | // RA = src1*8, RD = src2*8, JMP with RD = target
2918 |.if DUALNUM 3336 |.if DUALNUM
2919 | lwzux TMP0, RA, BASE 3337 | lwzux CARG1, RA, BASE
2920 | addi PC, PC, 4 3338 | addi PC, PC, 4
2921 | lwz CARG2, 4(RA) 3339 | lwz CARG2, 4(RA)
2922 | lwzux TMP1, RD, BASE 3340 | lwzux CARG3, RD, BASE
2923 | lwz TMP2, -4(PC) 3341 | lwz TMP2, -4(PC)
2924 | checknum cr0, TMP0 3342 | checknum cr0, CARG1
2925 | lwz CARG3, 4(RD) 3343 | lwz CARG4, 4(RD)
2926 | decode_RD4 TMP2, TMP2 3344 | decode_RD4 TMP2, TMP2
2927 | checknum cr1, TMP1 3345 | checknum cr1, CARG3
2928 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3346 | addis SAVE0, TMP2, -(BCBIAS_J*4 >> 16)
2929 | bne cr0, >7 3347 | bne cr0, >7
2930 | bne cr1, >8 3348 | bne cr1, >8
2931 | cmpw CARG2, CARG3 3349 | cmpw CARG2, CARG4
2932 if (op == BC_ISLT) { 3350 if (op == BC_ISLT) {
2933 | bge >2 3351 | bge >2
2934 } else if (op == BC_ISGE) { 3352 } else if (op == BC_ISGE) {
@@ -2939,28 +3357,41 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2939 | ble >2 3357 | ble >2
2940 } 3358 }
2941 |1: 3359 |1:
2942 | add PC, PC, TMP2 3360 | add PC, PC, SAVE0
2943 |2: 3361 |2:
2944 | ins_next 3362 | ins_next
2945 | 3363 |
2946 |7: // RA is not an integer. 3364 |7: // RA is not an integer.
2947 | bgt cr0, ->vmeta_comp 3365 | bgt cr0, ->vmeta_comp
2948 | // RA is a number. 3366 | // RA is a number.
2949 | lfd f0, 0(RA) 3367 | .FPU lfd f0, 0(RA)
2950 | bgt cr1, ->vmeta_comp 3368 | bgt cr1, ->vmeta_comp
2951 | blt cr1, >4 3369 | blt cr1, >4
2952 | // RA is a number, RD is an integer. 3370 | // RA is a number, RD is an integer.
2953 | tonum_i f1, CARG3 3371 |.if FPU
3372 | tonum_i f1, CARG4
3373 |.else
3374 | bl ->vm_sfi2d_2
3375 |.endif
2954 | b >5 3376 | b >5
2955 | 3377 |
2956 |8: // RA is an integer, RD is not an integer. 3378 |8: // RA is an integer, RD is not an integer.
2957 | bgt cr1, ->vmeta_comp 3379 | bgt cr1, ->vmeta_comp
2958 | // RA is an integer, RD is a number. 3380 | // RA is an integer, RD is a number.
3381 |.if FPU
2959 | tonum_i f0, CARG2 3382 | tonum_i f0, CARG2
3383 |.else
3384 | bl ->vm_sfi2d_1
3385 |.endif
2960 |4: 3386 |4:
2961 | lfd f1, 0(RD) 3387 | .FPU lfd f1, 0(RD)
2962 |5: 3388 |5:
3389 |.if FPU
2963 | fcmpu cr0, f0, f1 3390 | fcmpu cr0, f0, f1
3391 |.else
3392 | blex __ledf2
3393 | cmpwi CRET1, 0
3394 |.endif
2964 if (op == BC_ISLT) { 3395 if (op == BC_ISLT) {
2965 | bge <2 3396 | bge <2
2966 } else if (op == BC_ISGE) { 3397 } else if (op == BC_ISGE) {
@@ -3008,42 +3439,42 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3008 vk = op == BC_ISEQV; 3439 vk = op == BC_ISEQV;
3009 | // RA = src1*8, RD = src2*8, JMP with RD = target 3440 | // RA = src1*8, RD = src2*8, JMP with RD = target
3010 |.if DUALNUM 3441 |.if DUALNUM
3011 | lwzux TMP0, RA, BASE 3442 | lwzux CARG1, RA, BASE
3012 | addi PC, PC, 4 3443 | addi PC, PC, 4
3013 | lwz CARG2, 4(RA) 3444 | lwz CARG2, 4(RA)
3014 | lwzux TMP1, RD, BASE 3445 | lwzux CARG3, RD, BASE
3015 | checknum cr0, TMP0 3446 | checknum cr0, CARG1
3016 | lwz TMP2, -4(PC) 3447 | lwz SAVE0, -4(PC)
3017 | checknum cr1, TMP1 3448 | checknum cr1, CARG3
3018 | decode_RD4 TMP2, TMP2 3449 | decode_RD4 SAVE0, SAVE0
3019 | lwz CARG3, 4(RD) 3450 | lwz CARG4, 4(RD)
3020 | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt 3451 | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt
3021 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3452 | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
3022 if (vk) { 3453 if (vk) {
3023 | ble cr7, ->BC_ISEQN_Z 3454 | ble cr7, ->BC_ISEQN_Z
3024 } else { 3455 } else {
3025 | ble cr7, ->BC_ISNEN_Z 3456 | ble cr7, ->BC_ISNEN_Z
3026 } 3457 }
3027 |.else 3458 |.else
3028 | lwzux TMP0, RA, BASE 3459 | lwzux CARG1, RA, BASE
3029 | lwz TMP2, 0(PC) 3460 | lwz SAVE0, 0(PC)
3030 | lfd f0, 0(RA) 3461 | lfd f0, 0(RA)
3031 | addi PC, PC, 4 3462 | addi PC, PC, 4
3032 | lwzux TMP1, RD, BASE 3463 | lwzux CARG3, RD, BASE
3033 | checknum cr0, TMP0 3464 | checknum cr0, CARG1
3034 | decode_RD4 TMP2, TMP2 3465 | decode_RD4 SAVE0, SAVE0
3035 | lfd f1, 0(RD) 3466 | lfd f1, 0(RD)
3036 | checknum cr1, TMP1 3467 | checknum cr1, CARG3
3037 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3468 | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
3038 | bge cr0, >5 3469 | bge cr0, >5
3039 | bge cr1, >5 3470 | bge cr1, >5
3040 | fcmpu cr0, f0, f1 3471 | fcmpu cr0, f0, f1
3041 if (vk) { 3472 if (vk) {
3042 | bne >1 3473 | bne >1
3043 | add PC, PC, TMP2 3474 | add PC, PC, SAVE0
3044 } else { 3475 } else {
3045 | beq >1 3476 | beq >1
3046 | add PC, PC, TMP2 3477 | add PC, PC, SAVE0
3047 } 3478 }
3048 |1: 3479 |1:
3049 | ins_next 3480 | ins_next
@@ -3051,36 +3482,36 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3051 |5: // Either or both types are not numbers. 3482 |5: // Either or both types are not numbers.
3052 |.if not DUALNUM 3483 |.if not DUALNUM
3053 | lwz CARG2, 4(RA) 3484 | lwz CARG2, 4(RA)
3054 | lwz CARG3, 4(RD) 3485 | lwz CARG4, 4(RD)
3055 |.endif 3486 |.endif
3056 |.if FFI 3487 |.if FFI
3057 | cmpwi cr7, TMP0, LJ_TCDATA 3488 | cmpwi cr7, CARG1, LJ_TCDATA
3058 | cmpwi cr5, TMP1, LJ_TCDATA 3489 | cmpwi cr5, CARG3, LJ_TCDATA
3059 |.endif 3490 |.endif
3060 | not TMP3, TMP0 3491 | not TMP2, CARG1
3061 | cmplw TMP0, TMP1 3492 | cmplw CARG1, CARG3
3062 | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive? 3493 | cmplwi cr1, TMP2, ~LJ_TISPRI // Primitive?
3063 |.if FFI 3494 |.if FFI
3064 | cror 4*cr7+eq, 4*cr7+eq, 4*cr5+eq 3495 | cror 4*cr7+eq, 4*cr7+eq, 4*cr5+eq
3065 |.endif 3496 |.endif
3066 | cmplwi cr6, TMP3, ~LJ_TISTABUD // Table or userdata? 3497 | cmplwi cr6, TMP2, ~LJ_TISTABUD // Table or userdata?
3067 |.if FFI 3498 |.if FFI
3068 | beq cr7, ->vmeta_equal_cd 3499 | beq cr7, ->vmeta_equal_cd
3069 |.endif 3500 |.endif
3070 | cmplw cr5, CARG2, CARG3 3501 | cmplw cr5, CARG2, CARG4
3071 | crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive. 3502 | crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive.
3072 | crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type. 3503 | crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type.
3073 | crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv. 3504 | crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv.
3074 | mr SAVE0, PC 3505 | mr SAVE1, PC
3075 | cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2. 3506 | cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2.
3076 | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2. 3507 | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2.
3077 if (vk) { 3508 if (vk) {
3078 | bne cr0, >6 3509 | bne cr0, >6
3079 | add PC, PC, TMP2 3510 | add PC, PC, SAVE0
3080 |6: 3511 |6:
3081 } else { 3512 } else {
3082 | beq cr0, >6 3513 | beq cr0, >6
3083 | add PC, PC, TMP2 3514 | add PC, PC, SAVE0
3084 |6: 3515 |6:
3085 } 3516 }
3086 |.if DUALNUM 3517 |.if DUALNUM
@@ -3095,6 +3526,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3095 | 3526 |
3096 | // Different tables or userdatas. Need to check __eq metamethod. 3527 | // Different tables or userdatas. Need to check __eq metamethod.
3097 | // Field metatable must be at same offset for GCtab and GCudata! 3528 | // Field metatable must be at same offset for GCtab and GCudata!
3529 | mr CARG3, CARG4
3098 | lwz TAB:TMP2, TAB:CARG2->metatable 3530 | lwz TAB:TMP2, TAB:CARG2->metatable
3099 | li CARG4, 1-vk // ne = 0 or 1. 3531 | li CARG4, 1-vk // ne = 0 or 1.
3100 | cmplwi TAB:TMP2, 0 3532 | cmplwi TAB:TMP2, 0
@@ -3102,7 +3534,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3102 | lbz TMP2, TAB:TMP2->nomm 3534 | lbz TMP2, TAB:TMP2->nomm
3103 | andix. TMP2, TMP2, 1<<MM_eq 3535 | andix. TMP2, TMP2, 1<<MM_eq
3104 | bne <1 // Or 'no __eq' flag set? 3536 | bne <1 // Or 'no __eq' flag set?
3105 | mr PC, SAVE0 // Restore old PC. 3537 | mr PC, SAVE1 // Restore old PC.
3106 | b ->vmeta_equal // Handle __eq metamethod. 3538 | b ->vmeta_equal // Handle __eq metamethod.
3107 break; 3539 break;
3108 3540
@@ -3143,16 +3575,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3143 vk = op == BC_ISEQN; 3575 vk = op == BC_ISEQN;
3144 | // RA = src*8, RD = num_const*8, JMP with RD = target 3576 | // RA = src*8, RD = num_const*8, JMP with RD = target
3145 |.if DUALNUM 3577 |.if DUALNUM
3146 | lwzux TMP0, RA, BASE 3578 | lwzux CARG1, RA, BASE
3147 | addi PC, PC, 4 3579 | addi PC, PC, 4
3148 | lwz CARG2, 4(RA) 3580 | lwz CARG2, 4(RA)
3149 | lwzux TMP1, RD, KBASE 3581 | lwzux CARG3, RD, KBASE
3150 | checknum cr0, TMP0 3582 | checknum cr0, CARG1
3151 | lwz TMP2, -4(PC) 3583 | lwz SAVE0, -4(PC)
3152 | checknum cr1, TMP1 3584 | checknum cr1, CARG3
3153 | decode_RD4 TMP2, TMP2 3585 | decode_RD4 SAVE0, SAVE0
3154 | lwz CARG3, 4(RD) 3586 | lwz CARG4, 4(RD)
3155 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3587 | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
3156 if (vk) { 3588 if (vk) {
3157 |->BC_ISEQN_Z: 3589 |->BC_ISEQN_Z:
3158 } else { 3590 } else {
@@ -3160,7 +3592,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3160 } 3592 }
3161 | bne cr0, >7 3593 | bne cr0, >7
3162 | bne cr1, >8 3594 | bne cr1, >8
3163 | cmpw CARG2, CARG3 3595 | cmpw CARG2, CARG4
3164 |4: 3596 |4:
3165 |.else 3597 |.else
3166 if (vk) { 3598 if (vk) {
@@ -3168,20 +3600,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3168 } else { 3600 } else {
3169 |->BC_ISNEN_Z: // Dummy label. 3601 |->BC_ISNEN_Z: // Dummy label.
3170 } 3602 }
3171 | lwzx TMP0, BASE, RA 3603 | lwzx CARG1, BASE, RA
3172 | addi PC, PC, 4 3604 | addi PC, PC, 4
3173 | lfdx f0, BASE, RA 3605 | lfdx f0, BASE, RA
3174 | lwz TMP2, -4(PC) 3606 | lwz SAVE0, -4(PC)
3175 | lfdx f1, KBASE, RD 3607 | lfdx f1, KBASE, RD
3176 | decode_RD4 TMP2, TMP2 3608 | decode_RD4 SAVE0, SAVE0
3177 | checknum TMP0 3609 | checknum CARG1
3178 | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) 3610 | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
3179 | bge >3 3611 | bge >3
3180 | fcmpu cr0, f0, f1 3612 | fcmpu cr0, f0, f1
3181 |.endif 3613 |.endif
3182 if (vk) { 3614 if (vk) {
3183 | bne >1 3615 | bne >1
3184 | add PC, PC, TMP2 3616 | add PC, PC, SAVE0
3185 |1: 3617 |1:
3186 |.if not FFI 3618 |.if not FFI
3187 |3: 3619 |3:
@@ -3192,13 +3624,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3192 |.if not FFI 3624 |.if not FFI
3193 |3: 3625 |3:
3194 |.endif 3626 |.endif
3195 | add PC, PC, TMP2 3627 | add PC, PC, SAVE0
3196 |2: 3628 |2:
3197 } 3629 }
3198 | ins_next 3630 | ins_next
3199 |.if FFI 3631 |.if FFI
3200 |3: 3632 |3:
3201 | cmpwi TMP0, LJ_TCDATA 3633 | cmpwi CARG1, LJ_TCDATA
3202 | beq ->vmeta_equal_cd 3634 | beq ->vmeta_equal_cd
3203 | b <1 3635 | b <1
3204 |.endif 3636 |.endif
@@ -3206,18 +3638,31 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3206 |7: // RA is not an integer. 3638 |7: // RA is not an integer.
3207 | bge cr0, <3 3639 | bge cr0, <3
3208 | // RA is a number. 3640 | // RA is a number.
3209 | lfd f0, 0(RA) 3641 | .FPU lfd f0, 0(RA)
3210 | blt cr1, >1 3642 | blt cr1, >1
3211 | // RA is a number, RD is an integer. 3643 | // RA is a number, RD is an integer.
3212 | tonum_i f1, CARG3 3644 |.if FPU
3645 | tonum_i f1, CARG4
3646 |.else
3647 | bl ->vm_sfi2d_2
3648 |.endif
3213 | b >2 3649 | b >2
3214 | 3650 |
3215 |8: // RA is an integer, RD is a number. 3651 |8: // RA is an integer, RD is a number.
3652 |.if FPU
3216 | tonum_i f0, CARG2 3653 | tonum_i f0, CARG2
3654 |.else
3655 | bl ->vm_sfi2d_1
3656 |.endif
3217 |1: 3657 |1:
3218 | lfd f1, 0(RD) 3658 | .FPU lfd f1, 0(RD)
3219 |2: 3659 |2:
3660 |.if FPU
3220 | fcmpu cr0, f0, f1 3661 | fcmpu cr0, f0, f1
3662 |.else
3663 | blex __ledf2
3664 | cmpwi CRET1, 0
3665 |.endif
3221 | b <4 3666 | b <4
3222 |.endif 3667 |.endif
3223 break; 3668 break;
@@ -3272,7 +3717,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3272 | add PC, PC, TMP2 3717 | add PC, PC, TMP2
3273 } else { 3718 } else {
3274 | li TMP1, LJ_TFALSE 3719 | li TMP1, LJ_TFALSE
3720 |.if FPU
3275 | lfdx f0, BASE, RD 3721 | lfdx f0, BASE, RD
3722 |.else
3723 | lwzux CARG1, RD, BASE
3724 | lwz CARG2, 4(RD)
3725 |.endif
3276 | cmplw TMP0, TMP1 3726 | cmplw TMP0, TMP1
3277 if (op == BC_ISTC) { 3727 if (op == BC_ISTC) {
3278 | bge >1 3728 | bge >1
@@ -3281,20 +3731,55 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3281 } 3731 }
3282 | addis PC, PC, -(BCBIAS_J*4 >> 16) 3732 | addis PC, PC, -(BCBIAS_J*4 >> 16)
3283 | decode_RD4 TMP2, INS 3733 | decode_RD4 TMP2, INS
3734 |.if FPU
3284 | stfdx f0, BASE, RA 3735 | stfdx f0, BASE, RA
3736 |.else
3737 | stwux CARG1, RA, BASE
3738 | stw CARG2, 4(RA)
3739 |.endif
3285 | add PC, PC, TMP2 3740 | add PC, PC, TMP2
3286 |1: 3741 |1:
3287 } 3742 }
3288 | ins_next 3743 | ins_next
3289 break; 3744 break;
3290 3745
3746 case BC_ISTYPE:
3747 | // RA = src*8, RD = -type*8
3748 | lwzx TMP0, BASE, RA
3749 | srwi TMP1, RD, 3
3750 | ins_next1
3751 |.if not PPE and not GPR64
3752 | add. TMP0, TMP0, TMP1
3753 |.else
3754 | neg TMP1, TMP1
3755 | cmpw TMP0, TMP1
3756 |.endif
3757 | bne ->vmeta_istype
3758 | ins_next2
3759 break;
3760 case BC_ISNUM:
3761 | // RA = src*8, RD = -(TISNUM-1)*8
3762 | lwzx TMP0, BASE, RA
3763 | ins_next1
3764 | checknum TMP0
3765 | bge ->vmeta_istype
3766 | ins_next2
3767 break;
3768
3291 /* -- Unary ops --------------------------------------------------------- */ 3769 /* -- Unary ops --------------------------------------------------------- */
3292 3770
3293 case BC_MOV: 3771 case BC_MOV:
3294 | // RA = dst*8, RD = src*8 3772 | // RA = dst*8, RD = src*8
3295 | ins_next1 3773 | ins_next1
3774 |.if FPU
3296 | lfdx f0, BASE, RD 3775 | lfdx f0, BASE, RD
3297 | stfdx f0, BASE, RA 3776 | stfdx f0, BASE, RA
3777 |.else
3778 | lwzux TMP0, RD, BASE
3779 | lwz TMP1, 4(RD)
3780 | stwux TMP0, RA, BASE
3781 | stw TMP1, 4(RA)
3782 |.endif
3298 | ins_next2 3783 | ins_next2
3299 break; 3784 break;
3300 case BC_NOT: 3785 case BC_NOT:
@@ -3396,44 +3881,65 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3396 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 3881 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3397 ||switch (vk) { 3882 ||switch (vk) {
3398 ||case 0: 3883 ||case 0:
3399 | lwzx TMP1, BASE, RB 3884 | lwzx CARG1, BASE, RB
3400 | .if DUALNUM 3885 | .if DUALNUM
3401 | lwzx TMP2, KBASE, RC 3886 | lwzx CARG3, KBASE, RC
3402 | .endif 3887 | .endif
3888 | .if FPU
3403 | lfdx f14, BASE, RB 3889 | lfdx f14, BASE, RB
3404 | lfdx f15, KBASE, RC 3890 | lfdx f15, KBASE, RC
3891 | .else
3892 | add TMP1, BASE, RB
3893 | add TMP2, KBASE, RC
3894 | lwz CARG2, 4(TMP1)
3895 | lwz CARG4, 4(TMP2)
3896 | .endif
3405 | .if DUALNUM 3897 | .if DUALNUM
3406 | checknum cr0, TMP1 3898 | checknum cr0, CARG1
3407 | checknum cr1, TMP2 3899 | checknum cr1, CARG3
3408 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3900 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3409 | bge ->vmeta_arith_vn 3901 | bge ->vmeta_arith_vn
3410 | .else 3902 | .else
3411 | checknum TMP1; bge ->vmeta_arith_vn 3903 | checknum CARG1; bge ->vmeta_arith_vn
3412 | .endif 3904 | .endif
3413 || break; 3905 || break;
3414 ||case 1: 3906 ||case 1:
3415 | lwzx TMP1, BASE, RB 3907 | lwzx CARG1, BASE, RB
3416 | .if DUALNUM 3908 | .if DUALNUM
3417 | lwzx TMP2, KBASE, RC 3909 | lwzx CARG3, KBASE, RC
3418 | .endif 3910 | .endif
3911 | .if FPU
3419 | lfdx f15, BASE, RB 3912 | lfdx f15, BASE, RB
3420 | lfdx f14, KBASE, RC 3913 | lfdx f14, KBASE, RC
3914 | .else
3915 | add TMP1, BASE, RB
3916 | add TMP2, KBASE, RC
3917 | lwz CARG2, 4(TMP1)
3918 | lwz CARG4, 4(TMP2)
3919 | .endif
3421 | .if DUALNUM 3920 | .if DUALNUM
3422 | checknum cr0, TMP1 3921 | checknum cr0, CARG1
3423 | checknum cr1, TMP2 3922 | checknum cr1, CARG3
3424 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3923 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3425 | bge ->vmeta_arith_nv 3924 | bge ->vmeta_arith_nv
3426 | .else 3925 | .else
3427 | checknum TMP1; bge ->vmeta_arith_nv 3926 | checknum CARG1; bge ->vmeta_arith_nv
3428 | .endif 3927 | .endif
3429 || break; 3928 || break;
3430 ||default: 3929 ||default:
3431 | lwzx TMP1, BASE, RB 3930 | lwzx CARG1, BASE, RB
3432 | lwzx TMP2, BASE, RC 3931 | lwzx CARG3, BASE, RC
3932 | .if FPU
3433 | lfdx f14, BASE, RB 3933 | lfdx f14, BASE, RB
3434 | lfdx f15, BASE, RC 3934 | lfdx f15, BASE, RC
3435 | checknum cr0, TMP1 3935 | .else
3436 | checknum cr1, TMP2 3936 | add TMP1, BASE, RB
3937 | add TMP2, BASE, RC
3938 | lwz CARG2, 4(TMP1)
3939 | lwz CARG4, 4(TMP2)
3940 | .endif
3941 | checknum cr0, CARG1
3942 | checknum cr1, CARG3
3437 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 3943 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3438 | bge ->vmeta_arith_vv 3944 | bge ->vmeta_arith_vv
3439 || break; 3945 || break;
@@ -3467,48 +3973,78 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3467 | fsub a, b, a // b - floor(b/c)*c 3973 | fsub a, b, a // b - floor(b/c)*c
3468 |.endmacro 3974 |.endmacro
3469 | 3975 |
3976 |.macro sfpmod
3977 |->BC_MODVN_Z:
3978 | stw CARG1, SFSAVE_1
3979 | stw CARG2, SFSAVE_2
3980 | mr SAVE0, CARG3
3981 | mr SAVE1, CARG4
3982 | blex __divdf3
3983 | blex floor
3984 | mr CARG3, SAVE0
3985 | mr CARG4, SAVE1
3986 | blex __muldf3
3987 | mr CARG3, CRET1
3988 | mr CARG4, CRET2
3989 | lwz CARG1, SFSAVE_1
3990 | lwz CARG2, SFSAVE_2
3991 | blex __subdf3
3992 |.endmacro
3993 |
3470 |.macro ins_arithfp, fpins 3994 |.macro ins_arithfp, fpins
3471 | ins_arithpre 3995 | ins_arithpre
3472 |.if "fpins" == "fpmod_" 3996 |.if "fpins" == "fpmod_"
3473 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 3997 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
3474 |.else 3998 |.elif FPU
3475 | fpins f0, f14, f15 3999 | fpins f0, f14, f15
3476 | ins_next1 4000 | ins_next1
3477 | stfdx f0, BASE, RA 4001 | stfdx f0, BASE, RA
3478 | ins_next2 4002 | ins_next2
4003 |.else
4004 | blex __divdf3 // Only soft-float div uses this macro.
4005 | ins_next1
4006 | stwux CRET1, RA, BASE
4007 | stw CRET2, 4(RA)
4008 | ins_next2
3479 |.endif 4009 |.endif
3480 |.endmacro 4010 |.endmacro
3481 | 4011 |
3482 |.macro ins_arithdn, intins, fpins 4012 |.macro ins_arithdn, intins, fpins, fpcall
3483 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 4013 | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
3484 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 4014 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3485 ||switch (vk) { 4015 ||switch (vk) {
3486 ||case 0: 4016 ||case 0:
3487 | lwzux TMP1, RB, BASE 4017 | lwzux CARG1, RB, BASE
3488 | lwzux TMP2, RC, KBASE 4018 | lwzux CARG3, RC, KBASE
3489 | lwz CARG1, 4(RB) 4019 | lwz CARG2, 4(RB)
3490 | checknum cr0, TMP1 4020 | checknum cr0, CARG1
3491 | lwz CARG2, 4(RC) 4021 | lwz CARG4, 4(RC)
4022 | checknum cr1, CARG3
3492 || break; 4023 || break;
3493 ||case 1: 4024 ||case 1:
3494 | lwzux TMP1, RB, BASE 4025 | lwzux CARG3, RB, BASE
3495 | lwzux TMP2, RC, KBASE 4026 | lwzux CARG1, RC, KBASE
3496 | lwz CARG2, 4(RB) 4027 | lwz CARG4, 4(RB)
3497 | checknum cr0, TMP1 4028 | checknum cr0, CARG3
3498 | lwz CARG1, 4(RC) 4029 | lwz CARG2, 4(RC)
4030 | checknum cr1, CARG1
3499 || break; 4031 || break;
3500 ||default: 4032 ||default:
3501 | lwzux TMP1, RB, BASE 4033 | lwzux CARG1, RB, BASE
3502 | lwzux TMP2, RC, BASE 4034 | lwzux CARG3, RC, BASE
3503 | lwz CARG1, 4(RB) 4035 | lwz CARG2, 4(RB)
3504 | checknum cr0, TMP1 4036 | checknum cr0, CARG1
3505 | lwz CARG2, 4(RC) 4037 | lwz CARG4, 4(RC)
4038 | checknum cr1, CARG3
3506 || break; 4039 || break;
3507 ||} 4040 ||}
3508 | checknum cr1, TMP2
3509 | bne >5 4041 | bne >5
3510 | bne cr1, >5 4042 | bne cr1, >5
3511 | intins CARG1, CARG1, CARG2 4043 |.if "intins" == "intmod"
4044 | mr CARG1, CARG2
4045 | mr CARG2, CARG4
4046 |.endif
4047 | intins CARG1, CARG2, CARG4
3512 | bso >4 4048 | bso >4
3513 |1: 4049 |1:
3514 | ins_next1 4050 | ins_next1
@@ -3520,29 +4056,40 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3520 | checkov TMP0, <1 // Ignore unrelated overflow. 4056 | checkov TMP0, <1 // Ignore unrelated overflow.
3521 | ins_arithfallback b 4057 | ins_arithfallback b
3522 |5: // FP variant. 4058 |5: // FP variant.
4059 |.if FPU
3523 ||if (vk == 1) { 4060 ||if (vk == 1) {
3524 | lfd f15, 0(RB) 4061 | lfd f15, 0(RB)
3525 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3526 | lfd f14, 0(RC) 4062 | lfd f14, 0(RC)
3527 ||} else { 4063 ||} else {
3528 | lfd f14, 0(RB) 4064 | lfd f14, 0(RB)
3529 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3530 | lfd f15, 0(RC) 4065 | lfd f15, 0(RC)
3531 ||} 4066 ||}
4067 |.endif
4068 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3532 | ins_arithfallback bge 4069 | ins_arithfallback bge
3533 |.if "fpins" == "fpmod_" 4070 |.if "fpins" == "fpmod_"
3534 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 4071 | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
3535 |.else 4072 |.else
4073 |.if FPU
3536 | fpins f0, f14, f15 4074 | fpins f0, f14, f15
3537 | ins_next1
3538 | stfdx f0, BASE, RA 4075 | stfdx f0, BASE, RA
4076 |.else
4077 |.if "fpcall" == "sfpmod"
4078 | sfpmod
4079 |.else
4080 | blex fpcall
4081 |.endif
4082 | stwux CRET1, RA, BASE
4083 | stw CRET2, 4(RA)
4084 |.endif
4085 | ins_next1
3539 | b <2 4086 | b <2
3540 |.endif 4087 |.endif
3541 |.endmacro 4088 |.endmacro
3542 | 4089 |
3543 |.macro ins_arith, intins, fpins 4090 |.macro ins_arith, intins, fpins, fpcall
3544 |.if DUALNUM 4091 |.if DUALNUM
3545 | ins_arithdn intins, fpins 4092 | ins_arithdn intins, fpins, fpcall
3546 |.else 4093 |.else
3547 | ins_arithfp fpins 4094 | ins_arithfp fpins
3548 |.endif 4095 |.endif
@@ -3557,9 +4104,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3557 | addo. TMP0, TMP0, TMP3 4104 | addo. TMP0, TMP0, TMP3
3558 | add y, a, b 4105 | add y, a, b
3559 |.endmacro 4106 |.endmacro
3560 | ins_arith addo32., fadd 4107 | ins_arith addo32., fadd, __adddf3
3561 |.else 4108 |.else
3562 | ins_arith addo., fadd 4109 | ins_arith addo., fadd, __adddf3
3563 |.endif 4110 |.endif
3564 break; 4111 break;
3565 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: 4112 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
@@ -3571,36 +4118,48 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3571 | subo. TMP0, TMP0, TMP3 4118 | subo. TMP0, TMP0, TMP3
3572 | sub y, a, b 4119 | sub y, a, b
3573 |.endmacro 4120 |.endmacro
3574 | ins_arith subo32., fsub 4121 | ins_arith subo32., fsub, __subdf3
3575 |.else 4122 |.else
3576 | ins_arith subo., fsub 4123 | ins_arith subo., fsub, __subdf3
3577 |.endif 4124 |.endif
3578 break; 4125 break;
3579 case BC_MULVN: case BC_MULNV: case BC_MULVV: 4126 case BC_MULVN: case BC_MULNV: case BC_MULVV:
3580 | ins_arith mullwo., fmul 4127 | ins_arith mullwo., fmul, __muldf3
3581 break; 4128 break;
3582 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: 4129 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
3583 | ins_arithfp fdiv 4130 | ins_arithfp fdiv
3584 break; 4131 break;
3585 case BC_MODVN: 4132 case BC_MODVN:
3586 | ins_arith intmod, fpmod 4133 | ins_arith intmod, fpmod, sfpmod
3587 break; 4134 break;
3588 case BC_MODNV: case BC_MODVV: 4135 case BC_MODNV: case BC_MODVV:
3589 | ins_arith intmod, fpmod_ 4136 | ins_arith intmod, fpmod_, sfpmod
3590 break; 4137 break;
3591 case BC_POW: 4138 case BC_POW:
3592 | // NYI: (partial) integer arithmetic. 4139 | // NYI: (partial) integer arithmetic.
3593 | lwzx TMP1, BASE, RB 4140 | lwzx CARG1, BASE, RB
4141 | lwzx CARG3, BASE, RC
4142 |.if FPU
3594 | lfdx FARG1, BASE, RB 4143 | lfdx FARG1, BASE, RB
3595 | lwzx TMP2, BASE, RC
3596 | lfdx FARG2, BASE, RC 4144 | lfdx FARG2, BASE, RC
3597 | checknum cr0, TMP1 4145 |.else
3598 | checknum cr1, TMP2 4146 | add TMP1, BASE, RB
4147 | add TMP2, BASE, RC
4148 | lwz CARG2, 4(TMP1)
4149 | lwz CARG4, 4(TMP2)
4150 |.endif
4151 | checknum cr0, CARG1
4152 | checknum cr1, CARG3
3599 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 4153 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
3600 | bge ->vmeta_arith_vv 4154 | bge ->vmeta_arith_vv
3601 | blex pow 4155 | blex pow
3602 | ins_next1 4156 | ins_next1
4157 |.if FPU
3603 | stfdx FARG1, BASE, RA 4158 | stfdx FARG1, BASE, RA
4159 |.else
4160 | stwux CARG1, RA, BASE
4161 | stw CARG2, 4(RA)
4162 |.endif
3604 | ins_next2 4163 | ins_next2
3605 break; 4164 break;
3606 4165
@@ -3620,8 +4179,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3620 | lp BASE, L->base 4179 | lp BASE, L->base
3621 | bne ->vmeta_binop 4180 | bne ->vmeta_binop
3622 | ins_next1 4181 | ins_next1
4182 |.if FPU
3623 | lfdx f0, BASE, SAVE0 // Copy result from RB to RA. 4183 | lfdx f0, BASE, SAVE0 // Copy result from RB to RA.
3624 | stfdx f0, BASE, RA 4184 | stfdx f0, BASE, RA
4185 |.else
4186 | lwzux TMP0, SAVE0, BASE
4187 | lwz TMP1, 4(SAVE0)
4188 | stwux TMP0, RA, BASE
4189 | stw TMP1, 4(RA)
4190 |.endif
3625 | ins_next2 4191 | ins_next2
3626 break; 4192 break;
3627 4193
@@ -3684,8 +4250,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3684 case BC_KNUM: 4250 case BC_KNUM:
3685 | // RA = dst*8, RD = num_const*8 4251 | // RA = dst*8, RD = num_const*8
3686 | ins_next1 4252 | ins_next1
4253 |.if FPU
3687 | lfdx f0, KBASE, RD 4254 | lfdx f0, KBASE, RD
3688 | stfdx f0, BASE, RA 4255 | stfdx f0, BASE, RA
4256 |.else
4257 | lwzux TMP0, RD, KBASE
4258 | lwz TMP1, 4(RD)
4259 | stwux TMP0, RA, BASE
4260 | stw TMP1, 4(RA)
4261 |.endif
3689 | ins_next2 4262 | ins_next2
3690 break; 4263 break;
3691 case BC_KPRI: 4264 case BC_KPRI:
@@ -3718,8 +4291,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3718 | lwzx UPVAL:RB, LFUNC:RB, RD 4291 | lwzx UPVAL:RB, LFUNC:RB, RD
3719 | ins_next1 4292 | ins_next1
3720 | lwz TMP1, UPVAL:RB->v 4293 | lwz TMP1, UPVAL:RB->v
4294 |.if FPU
3721 | lfd f0, 0(TMP1) 4295 | lfd f0, 0(TMP1)
3722 | stfdx f0, BASE, RA 4296 | stfdx f0, BASE, RA
4297 |.else
4298 | lwz TMP2, 0(TMP1)
4299 | lwz TMP3, 4(TMP1)
4300 | stwux TMP2, RA, BASE
4301 | stw TMP3, 4(RA)
4302 |.endif
3723 | ins_next2 4303 | ins_next2
3724 break; 4304 break;
3725 case BC_USETV: 4305 case BC_USETV:
@@ -3727,14 +4307,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3727 | lwz LFUNC:RB, FRAME_FUNC(BASE) 4307 | lwz LFUNC:RB, FRAME_FUNC(BASE)
3728 | srwi RA, RA, 1 4308 | srwi RA, RA, 1
3729 | addi RA, RA, offsetof(GCfuncL, uvptr) 4309 | addi RA, RA, offsetof(GCfuncL, uvptr)
4310 |.if FPU
3730 | lfdux f0, RD, BASE 4311 | lfdux f0, RD, BASE
4312 |.else
4313 | lwzux CARG1, RD, BASE
4314 | lwz CARG3, 4(RD)
4315 |.endif
3731 | lwzx UPVAL:RB, LFUNC:RB, RA 4316 | lwzx UPVAL:RB, LFUNC:RB, RA
3732 | lbz TMP3, UPVAL:RB->marked 4317 | lbz TMP3, UPVAL:RB->marked
3733 | lwz CARG2, UPVAL:RB->v 4318 | lwz CARG2, UPVAL:RB->v
3734 | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) 4319 | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
3735 | lbz TMP0, UPVAL:RB->closed 4320 | lbz TMP0, UPVAL:RB->closed
3736 | lwz TMP2, 0(RD) 4321 | lwz TMP2, 0(RD)
4322 |.if FPU
3737 | stfd f0, 0(CARG2) 4323 | stfd f0, 0(CARG2)
4324 |.else
4325 | stw CARG1, 0(CARG2)
4326 | stw CARG3, 4(CARG2)
4327 |.endif
3738 | cmplwi cr1, TMP0, 0 4328 | cmplwi cr1, TMP0, 0
3739 | lwz TMP1, 4(RD) 4329 | lwz TMP1, 4(RD)
3740 | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq 4330 | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
@@ -3790,11 +4380,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3790 | lwz LFUNC:RB, FRAME_FUNC(BASE) 4380 | lwz LFUNC:RB, FRAME_FUNC(BASE)
3791 | srwi RA, RA, 1 4381 | srwi RA, RA, 1
3792 | addi RA, RA, offsetof(GCfuncL, uvptr) 4382 | addi RA, RA, offsetof(GCfuncL, uvptr)
4383 |.if FPU
3793 | lfdx f0, KBASE, RD 4384 | lfdx f0, KBASE, RD
4385 |.else
4386 | lwzux TMP2, RD, KBASE
4387 | lwz TMP3, 4(RD)
4388 |.endif
3794 | lwzx UPVAL:RB, LFUNC:RB, RA 4389 | lwzx UPVAL:RB, LFUNC:RB, RA
3795 | ins_next1 4390 | ins_next1
3796 | lwz TMP1, UPVAL:RB->v 4391 | lwz TMP1, UPVAL:RB->v
4392 |.if FPU
3797 | stfd f0, 0(TMP1) 4393 | stfd f0, 0(TMP1)
4394 |.else
4395 | stw TMP2, 0(TMP1)
4396 | stw TMP3, 4(TMP1)
4397 |.endif
3798 | ins_next2 4398 | ins_next2
3799 break; 4399 break;
3800 case BC_USETP: 4400 case BC_USETP:
@@ -3942,11 +4542,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3942 |.endif 4542 |.endif
3943 | ble ->vmeta_tgetv // Integer key and in array part? 4543 | ble ->vmeta_tgetv // Integer key and in array part?
3944 | lwzx TMP0, TMP1, TMP2 4544 | lwzx TMP0, TMP1, TMP2
4545 |.if FPU
3945 | lfdx f14, TMP1, TMP2 4546 | lfdx f14, TMP1, TMP2
4547 |.else
4548 | lwzux SAVE0, TMP1, TMP2
4549 | lwz SAVE1, 4(TMP1)
4550 |.endif
3946 | checknil TMP0; beq >2 4551 | checknil TMP0; beq >2
3947 |1: 4552 |1:
3948 | ins_next1 4553 | ins_next1
4554 |.if FPU
3949 | stfdx f14, BASE, RA 4555 | stfdx f14, BASE, RA
4556 |.else
4557 | stwux SAVE0, RA, BASE
4558 | stw SAVE1, 4(RA)
4559 |.endif
3950 | ins_next2 4560 | ins_next2
3951 | 4561 |
3952 |2: // Check for __index if table value is nil. 4562 |2: // Check for __index if table value is nil.
@@ -4022,12 +4632,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4022 | lwz TMP1, TAB:RB->asize 4632 | lwz TMP1, TAB:RB->asize
4023 | lwz TMP2, TAB:RB->array 4633 | lwz TMP2, TAB:RB->array
4024 | cmplw TMP0, TMP1; bge ->vmeta_tgetb 4634 | cmplw TMP0, TMP1; bge ->vmeta_tgetb
4635 |.if FPU
4025 | lwzx TMP1, TMP2, RC 4636 | lwzx TMP1, TMP2, RC
4026 | lfdx f0, TMP2, RC 4637 | lfdx f0, TMP2, RC
4638 |.else
4639 | lwzux TMP1, TMP2, RC
4640 | lwz TMP3, 4(TMP2)
4641 |.endif
4027 | checknil TMP1; beq >5 4642 | checknil TMP1; beq >5
4028 |1: 4643 |1:
4029 | ins_next1 4644 | ins_next1
4645 |.if FPU
4030 | stfdx f0, BASE, RA 4646 | stfdx f0, BASE, RA
4647 |.else
4648 | stwux TMP1, RA, BASE
4649 | stw TMP3, 4(RA)
4650 |.endif
4031 | ins_next2 4651 | ins_next2
4032 | 4652 |
4033 |5: // Check for __index if table value is nil. 4653 |5: // Check for __index if table value is nil.
@@ -4039,6 +4659,40 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4039 | bne <1 // 'no __index' flag set: done. 4659 | bne <1 // 'no __index' flag set: done.
4040 | b ->vmeta_tgetb // Caveat: preserve TMP0! 4660 | b ->vmeta_tgetb // Caveat: preserve TMP0!
4041 break; 4661 break;
4662 case BC_TGETR:
4663 | // RA = dst*8, RB = table*8, RC = key*8
4664 | add RB, BASE, RB
4665 | lwz TAB:CARG1, 4(RB)
4666 |.if DUALNUM
4667 | add RC, BASE, RC
4668 | lwz TMP0, TAB:CARG1->asize
4669 | lwz CARG2, 4(RC)
4670 | lwz TMP1, TAB:CARG1->array
4671 |.else
4672 | lfdx f0, BASE, RC
4673 | lwz TMP0, TAB:CARG1->asize
4674 | toint CARG2, f0
4675 | lwz TMP1, TAB:CARG1->array
4676 |.endif
4677 | cmplw TMP0, CARG2
4678 | slwi TMP2, CARG2, 3
4679 | ble ->vmeta_tgetr // In array part?
4680 |.if FPU
4681 | lfdx f14, TMP1, TMP2
4682 |.else
4683 | lwzux SAVE0, TMP2, TMP1
4684 | lwz SAVE1, 4(TMP2)
4685 |.endif
4686 |->BC_TGETR_Z:
4687 | ins_next1
4688 |.if FPU
4689 | stfdx f14, BASE, RA
4690 |.else
4691 | stwux SAVE0, RA, BASE
4692 | stw SAVE1, 4(RA)
4693 |.endif
4694 | ins_next2
4695 break;
4042 4696
4043 case BC_TSETV: 4697 case BC_TSETV:
4044 | // RA = src*8, RB = table*8, RC = key*8 4698 | // RA = src*8, RB = table*8, RC = key*8
@@ -4077,11 +4731,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4077 | ble ->vmeta_tsetv // Integer key and in array part? 4731 | ble ->vmeta_tsetv // Integer key and in array part?
4078 | lwzx TMP2, TMP1, TMP0 4732 | lwzx TMP2, TMP1, TMP0
4079 | lbz TMP3, TAB:RB->marked 4733 | lbz TMP3, TAB:RB->marked
4734 |.if FPU
4080 | lfdx f14, BASE, RA 4735 | lfdx f14, BASE, RA
4736 |.else
4737 | add SAVE1, BASE, RA
4738 | lwz SAVE0, 0(SAVE1)
4739 | lwz SAVE1, 4(SAVE1)
4740 |.endif
4081 | checknil TMP2; beq >3 4741 | checknil TMP2; beq >3
4082 |1: 4742 |1:
4083 | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table) 4743 | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table)
4744 |.if FPU
4084 | stfdx f14, TMP1, TMP0 4745 | stfdx f14, TMP1, TMP0
4746 |.else
4747 | stwux SAVE0, TMP1, TMP0
4748 | stw SAVE1, 4(TMP1)
4749 |.endif
4085 | bne >7 4750 | bne >7
4086 |2: 4751 |2:
4087 | ins_next 4752 | ins_next
@@ -4122,7 +4787,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4122 | lwz NODE:TMP2, TAB:RB->node 4787 | lwz NODE:TMP2, TAB:RB->node
4123 | stb ZERO, TAB:RB->nomm // Clear metamethod cache. 4788 | stb ZERO, TAB:RB->nomm // Clear metamethod cache.
4124 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask 4789 | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
4790 |.if FPU
4125 | lfdx f14, BASE, RA 4791 | lfdx f14, BASE, RA
4792 |.else
4793 | add CARG2, BASE, RA
4794 | lwz SAVE0, 0(CARG2)
4795 | lwz SAVE1, 4(CARG2)
4796 |.endif
4126 | slwi TMP0, TMP1, 5 4797 | slwi TMP0, TMP1, 5
4127 | slwi TMP1, TMP1, 3 4798 | slwi TMP1, TMP1, 3
4128 | sub TMP1, TMP0, TMP1 4799 | sub TMP1, TMP0, TMP1
@@ -4138,7 +4809,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4138 | checknil CARG2; beq >4 // Key found, but nil value? 4809 | checknil CARG2; beq >4 // Key found, but nil value?
4139 |2: 4810 |2:
4140 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) 4811 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
4812 |.if FPU
4141 | stfd f14, NODE:TMP2->val 4813 | stfd f14, NODE:TMP2->val
4814 |.else
4815 | stw SAVE0, NODE:TMP2->val.u32.hi
4816 | stw SAVE1, NODE:TMP2->val.u32.lo
4817 |.endif
4142 | bne >7 4818 | bne >7
4143 |3: 4819 |3:
4144 | ins_next 4820 | ins_next
@@ -4177,7 +4853,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4177 | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) 4853 | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
4178 | // Returns TValue *. 4854 | // Returns TValue *.
4179 | lp BASE, L->base 4855 | lp BASE, L->base
4856 |.if FPU
4180 | stfd f14, 0(CRET1) 4857 | stfd f14, 0(CRET1)
4858 |.else
4859 | stw SAVE0, 0(CRET1)
4860 | stw SAVE1, 4(CRET1)
4861 |.endif
4181 | b <3 // No 2nd write barrier needed. 4862 | b <3 // No 2nd write barrier needed.
4182 | 4863 |
4183 |7: // Possible table write barrier for the value. Skip valiswhite check. 4864 |7: // Possible table write barrier for the value. Skip valiswhite check.
@@ -4194,13 +4875,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4194 | lwz TMP2, TAB:RB->array 4875 | lwz TMP2, TAB:RB->array
4195 | lbz TMP3, TAB:RB->marked 4876 | lbz TMP3, TAB:RB->marked
4196 | cmplw TMP0, TMP1 4877 | cmplw TMP0, TMP1
4878 |.if FPU
4197 | lfdx f14, BASE, RA 4879 | lfdx f14, BASE, RA
4880 |.else
4881 | add CARG2, BASE, RA
4882 | lwz SAVE0, 0(CARG2)
4883 | lwz SAVE1, 4(CARG2)
4884 |.endif
4198 | bge ->vmeta_tsetb 4885 | bge ->vmeta_tsetb
4199 | lwzx TMP1, TMP2, RC 4886 | lwzx TMP1, TMP2, RC
4200 | checknil TMP1; beq >5 4887 | checknil TMP1; beq >5
4201 |1: 4888 |1:
4202 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) 4889 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
4890 |.if FPU
4203 | stfdx f14, TMP2, RC 4891 | stfdx f14, TMP2, RC
4892 |.else
4893 | stwux SAVE0, RC, TMP2
4894 | stw SAVE1, 4(RC)
4895 |.endif
4204 | bne >7 4896 | bne >7
4205 |2: 4897 |2:
4206 | ins_next 4898 | ins_next
@@ -4218,6 +4910,49 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4218 | barrierback TAB:RB, TMP3, TMP0 4910 | barrierback TAB:RB, TMP3, TMP0
4219 | b <2 4911 | b <2
4220 break; 4912 break;
4913 case BC_TSETR:
4914 | // RA = dst*8, RB = table*8, RC = key*8
4915 | add RB, BASE, RB
4916 | lwz TAB:CARG2, 4(RB)
4917 |.if DUALNUM
4918 | add RC, BASE, RC
4919 | lbz TMP3, TAB:CARG2->marked
4920 | lwz TMP0, TAB:CARG2->asize
4921 | lwz CARG3, 4(RC)
4922 | lwz TMP1, TAB:CARG2->array
4923 |.else
4924 | lfdx f0, BASE, RC
4925 | lbz TMP3, TAB:CARG2->marked
4926 | lwz TMP0, TAB:CARG2->asize
4927 | toint CARG3, f0
4928 | lwz TMP1, TAB:CARG2->array
4929 |.endif
4930 | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table)
4931 | bne >7
4932 |2:
4933 | cmplw TMP0, CARG3
4934 | slwi TMP2, CARG3, 3
4935 |.if FPU
4936 | lfdx f14, BASE, RA
4937 |.else
4938 | lwzux SAVE0, RA, BASE
4939 | lwz SAVE1, 4(RA)
4940 |.endif
4941 | ble ->vmeta_tsetr // In array part?
4942 | ins_next1
4943 |.if FPU
4944 | stfdx f14, TMP1, TMP2
4945 |.else
4946 | stwux SAVE0, TMP1, TMP2
4947 | stw SAVE1, 4(TMP1)
4948 |.endif
4949 | ins_next2
4950 |
4951 |7: // Possible table write barrier for the value. Skip valiswhite check.
4952 | barrierback TAB:CARG2, TMP3, TMP2
4953 | b <2
4954 break;
4955
4221 4956
4222 case BC_TSETM: 4957 case BC_TSETM:
4223 | // RA = base*8 (table at base-1), RD = num_const*8 (start index) 4958 | // RA = base*8 (table at base-1), RD = num_const*8 (start index)
@@ -4240,10 +4975,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4240 | add TMP1, TMP1, TMP0 4975 | add TMP1, TMP1, TMP0
4241 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) 4976 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
4242 |3: // Copy result slots to table. 4977 |3: // Copy result slots to table.
4978 |.if FPU
4243 | lfd f0, 0(RA) 4979 | lfd f0, 0(RA)
4980 |.else
4981 | lwz SAVE0, 0(RA)
4982 | lwz SAVE1, 4(RA)
4983 |.endif
4244 | addi RA, RA, 8 4984 | addi RA, RA, 8
4245 | cmpw cr1, RA, TMP2 4985 | cmpw cr1, RA, TMP2
4986 |.if FPU
4246 | stfd f0, 0(TMP1) 4987 | stfd f0, 0(TMP1)
4988 |.else
4989 | stw SAVE0, 0(TMP1)
4990 | stw SAVE1, 4(TMP1)
4991 |.endif
4247 | addi TMP1, TMP1, 8 4992 | addi TMP1, TMP1, 8
4248 | blt cr1, <3 4993 | blt cr1, <3
4249 | bne >7 4994 | bne >7
@@ -4310,9 +5055,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4310 | beq cr1, >3 5055 | beq cr1, >3
4311 |2: 5056 |2:
4312 | addi TMP3, TMP2, 8 5057 | addi TMP3, TMP2, 8
5058 |.if FPU
4313 | lfdx f0, RA, TMP2 5059 | lfdx f0, RA, TMP2
5060 |.else
5061 | add CARG3, RA, TMP2
5062 | lwz CARG1, 0(CARG3)
5063 | lwz CARG2, 4(CARG3)
5064 |.endif
4314 | cmplw cr1, TMP3, NARGS8:RC 5065 | cmplw cr1, TMP3, NARGS8:RC
5066 |.if FPU
4315 | stfdx f0, BASE, TMP2 5067 | stfdx f0, BASE, TMP2
5068 |.else
5069 | stwux CARG1, TMP2, BASE
5070 | stw CARG2, 4(TMP2)
5071 |.endif
4316 | mr TMP2, TMP3 5072 | mr TMP2, TMP3
4317 | bne cr1, <2 5073 | bne cr1, <2
4318 |3: 5074 |3:
@@ -4345,14 +5101,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4345 | add BASE, BASE, RA 5101 | add BASE, BASE, RA
4346 | lwz TMP1, -24(BASE) 5102 | lwz TMP1, -24(BASE)
4347 | lwz LFUNC:RB, -20(BASE) 5103 | lwz LFUNC:RB, -20(BASE)
5104 |.if FPU
4348 | lfd f1, -8(BASE) 5105 | lfd f1, -8(BASE)
4349 | lfd f0, -16(BASE) 5106 | lfd f0, -16(BASE)
5107 |.else
5108 | lwz CARG1, -8(BASE)
5109 | lwz CARG2, -4(BASE)
5110 | lwz CARG3, -16(BASE)
5111 | lwz CARG4, -12(BASE)
5112 |.endif
4350 | stw TMP1, 0(BASE) // Copy callable. 5113 | stw TMP1, 0(BASE) // Copy callable.
4351 | stw LFUNC:RB, 4(BASE) 5114 | stw LFUNC:RB, 4(BASE)
4352 | checkfunc TMP1 5115 | checkfunc TMP1
4353 | stfd f1, 16(BASE) // Copy control var.
4354 | li NARGS8:RC, 16 // Iterators get 2 arguments. 5116 | li NARGS8:RC, 16 // Iterators get 2 arguments.
5117 |.if FPU
5118 | stfd f1, 16(BASE) // Copy control var.
4355 | stfdu f0, 8(BASE) // Copy state. 5119 | stfdu f0, 8(BASE) // Copy state.
5120 |.else
5121 | stw CARG1, 16(BASE) // Copy control var.
5122 | stw CARG2, 20(BASE)
5123 | stwu CARG3, 8(BASE) // Copy state.
5124 | stw CARG4, 4(BASE)
5125 |.endif
4356 | bne ->vmeta_call 5126 | bne ->vmeta_call
4357 | ins_call 5127 | ins_call
4358 break; 5128 break;
@@ -4373,7 +5143,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4373 | slwi TMP3, RC, 3 5143 | slwi TMP3, RC, 3
4374 | bge >5 // Index points after array part? 5144 | bge >5 // Index points after array part?
4375 | lwzx TMP2, TMP1, TMP3 5145 | lwzx TMP2, TMP1, TMP3
5146 |.if FPU
4376 | lfdx f0, TMP1, TMP3 5147 | lfdx f0, TMP1, TMP3
5148 |.else
5149 | lwzux CARG1, TMP3, TMP1
5150 | lwz CARG2, 4(TMP3)
5151 |.endif
4377 | checknil TMP2 5152 | checknil TMP2
4378 | lwz INS, -4(PC) 5153 | lwz INS, -4(PC)
4379 | beq >4 5154 | beq >4
@@ -4385,7 +5160,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4385 |.endif 5160 |.endif
4386 | addi RC, RC, 1 5161 | addi RC, RC, 1
4387 | addis TMP3, PC, -(BCBIAS_J*4 >> 16) 5162 | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
5163 |.if FPU
4388 | stfd f0, 8(RA) 5164 | stfd f0, 8(RA)
5165 |.else
5166 | stw CARG1, 8(RA)
5167 | stw CARG2, 12(RA)
5168 |.endif
4389 | decode_RD4 TMP1, INS 5169 | decode_RD4 TMP1, INS
4390 | stw RC, -4(RA) // Update control var. 5170 | stw RC, -4(RA) // Update control var.
4391 | add PC, TMP1, TMP3 5171 | add PC, TMP1, TMP3
@@ -4410,17 +5190,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4410 | slwi RB, RC, 3 5190 | slwi RB, RC, 3
4411 | sub TMP3, TMP3, RB 5191 | sub TMP3, TMP3, RB
4412 | lwzx RB, TMP2, TMP3 5192 | lwzx RB, TMP2, TMP3
5193 |.if FPU
4413 | lfdx f0, TMP2, TMP3 5194 | lfdx f0, TMP2, TMP3
5195 |.else
5196 | add CARG3, TMP2, TMP3
5197 | lwz CARG1, 0(CARG3)
5198 | lwz CARG2, 4(CARG3)
5199 |.endif
4414 | add NODE:TMP3, TMP2, TMP3 5200 | add NODE:TMP3, TMP2, TMP3
4415 | checknil RB 5201 | checknil RB
4416 | lwz INS, -4(PC) 5202 | lwz INS, -4(PC)
4417 | beq >7 5203 | beq >7
5204 |.if FPU
4418 | lfd f1, NODE:TMP3->key 5205 | lfd f1, NODE:TMP3->key
5206 |.else
5207 | lwz CARG3, NODE:TMP3->key.u32.hi
5208 | lwz CARG4, NODE:TMP3->key.u32.lo
5209 |.endif
4419 | addis TMP2, PC, -(BCBIAS_J*4 >> 16) 5210 | addis TMP2, PC, -(BCBIAS_J*4 >> 16)
5211 |.if FPU
4420 | stfd f0, 8(RA) 5212 | stfd f0, 8(RA)
5213 |.else
5214 | stw CARG1, 8(RA)
5215 | stw CARG2, 12(RA)
5216 |.endif
4421 | add RC, RC, TMP0 5217 | add RC, RC, TMP0
4422 | decode_RD4 TMP1, INS 5218 | decode_RD4 TMP1, INS
5219 |.if FPU
4423 | stfd f1, 0(RA) 5220 | stfd f1, 0(RA)
5221 |.else
5222 | stw CARG3, 0(RA)
5223 | stw CARG4, 4(RA)
5224 |.endif
4424 | addi RC, RC, 1 5225 | addi RC, RC, 1
4425 | add PC, TMP1, TMP2 5226 | add PC, TMP1, TMP2
4426 | stw RC, -4(RA) // Update control var. 5227 | stw RC, -4(RA) // Update control var.
@@ -4486,9 +5287,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4486 | subi TMP2, TMP2, 16 5287 | subi TMP2, TMP2, 16
4487 | ble >2 // No vararg slots? 5288 | ble >2 // No vararg slots?
4488 |1: // Copy vararg slots to destination slots. 5289 |1: // Copy vararg slots to destination slots.
5290 |.if FPU
4489 | lfd f0, 0(RC) 5291 | lfd f0, 0(RC)
5292 |.else
5293 | lwz CARG1, 0(RC)
5294 | lwz CARG2, 4(RC)
5295 |.endif
4490 | addi RC, RC, 8 5296 | addi RC, RC, 8
5297 |.if FPU
4491 | stfd f0, 0(RA) 5298 | stfd f0, 0(RA)
5299 |.else
5300 | stw CARG1, 0(RA)
5301 | stw CARG2, 4(RA)
5302 |.endif
4492 | cmplw RA, TMP2 5303 | cmplw RA, TMP2
4493 | cmplw cr1, RC, TMP3 5304 | cmplw cr1, RC, TMP3
4494 | bge >3 // All destination slots filled? 5305 | bge >3 // All destination slots filled?
@@ -4511,9 +5322,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4511 | addi MULTRES, TMP1, 8 5322 | addi MULTRES, TMP1, 8
4512 | bgt >7 5323 | bgt >7
4513 |6: 5324 |6:
5325 |.if FPU
4514 | lfd f0, 0(RC) 5326 | lfd f0, 0(RC)
5327 |.else
5328 | lwz CARG1, 0(RC)
5329 | lwz CARG2, 4(RC)
5330 |.endif
4515 | addi RC, RC, 8 5331 | addi RC, RC, 8
5332 |.if FPU
4516 | stfd f0, 0(RA) 5333 | stfd f0, 0(RA)
5334 |.else
5335 | stw CARG1, 0(RA)
5336 | stw CARG2, 4(RA)
5337 |.endif
4517 | cmplw RC, TMP3 5338 | cmplw RC, TMP3
4518 | addi RA, RA, 8 5339 | addi RA, RA, 8
4519 | blt <6 // More vararg slots? 5340 | blt <6 // More vararg slots?
@@ -4564,14 +5385,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4564 | li TMP1, 0 5385 | li TMP1, 0
4565 |2: 5386 |2:
4566 | addi TMP3, TMP1, 8 5387 | addi TMP3, TMP1, 8
5388 |.if FPU
4567 | lfdx f0, RA, TMP1 5389 | lfdx f0, RA, TMP1
5390 |.else
5391 | add CARG3, RA, TMP1
5392 | lwz CARG1, 0(CARG3)
5393 | lwz CARG2, 4(CARG3)
5394 |.endif
4568 | cmpw TMP3, RC 5395 | cmpw TMP3, RC
5396 |.if FPU
4569 | stfdx f0, TMP2, TMP1 5397 | stfdx f0, TMP2, TMP1
5398 |.else
5399 | add CARG3, TMP2, TMP1
5400 | stw CARG1, 0(CARG3)
5401 | stw CARG2, 4(CARG3)
5402 |.endif
4570 | beq >3 5403 | beq >3
4571 | addi TMP1, TMP3, 8 5404 | addi TMP1, TMP3, 8
5405 |.if FPU
4572 | lfdx f1, RA, TMP3 5406 | lfdx f1, RA, TMP3
5407 |.else
5408 | add CARG3, RA, TMP3
5409 | lwz CARG1, 0(CARG3)
5410 | lwz CARG2, 4(CARG3)
5411 |.endif
4573 | cmpw TMP1, RC 5412 | cmpw TMP1, RC
5413 |.if FPU
4574 | stfdx f1, TMP2, TMP3 5414 | stfdx f1, TMP2, TMP3
5415 |.else
5416 | add CARG3, TMP2, TMP3
5417 | stw CARG1, 0(CARG3)
5418 | stw CARG2, 4(CARG3)
5419 |.endif
4575 | bne <2 5420 | bne <2
4576 |3: 5421 |3:
4577 |5: 5422 |5:
@@ -4613,8 +5458,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4613 | subi TMP2, BASE, 8 5458 | subi TMP2, BASE, 8
4614 | decode_RB8 RB, INS 5459 | decode_RB8 RB, INS
4615 if (op == BC_RET1) { 5460 if (op == BC_RET1) {
5461 |.if FPU
4616 | lfd f0, 0(RA) 5462 | lfd f0, 0(RA)
4617 | stfd f0, 0(TMP2) 5463 | stfd f0, 0(TMP2)
5464 |.else
5465 | lwz CARG1, 0(RA)
5466 | lwz CARG2, 4(RA)
5467 | stw CARG1, 0(TMP2)
5468 | stw CARG2, 4(TMP2)
5469 |.endif
4618 } 5470 }
4619 |5: 5471 |5:
4620 | cmplw RB, RD 5472 | cmplw RB, RD
@@ -4675,11 +5527,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4675 |4: 5527 |4:
4676 | stw CARG1, FORL_IDX*8+4(RA) 5528 | stw CARG1, FORL_IDX*8+4(RA)
4677 } else { 5529 } else {
4678 | lwz TMP3, FORL_STEP*8(RA) 5530 | lwz SAVE0, FORL_STEP*8(RA)
4679 | lwz CARG3, FORL_STEP*8+4(RA) 5531 | lwz CARG3, FORL_STEP*8+4(RA)
4680 | lwz TMP2, FORL_STOP*8(RA) 5532 | lwz TMP2, FORL_STOP*8(RA)
4681 | lwz CARG2, FORL_STOP*8+4(RA) 5533 | lwz CARG2, FORL_STOP*8+4(RA)
4682 | cmplw cr7, TMP3, TISNUM 5534 | cmplw cr7, SAVE0, TISNUM
4683 | cmplw cr1, TMP2, TISNUM 5535 | cmplw cr1, TMP2, TISNUM
4684 | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq 5536 | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq
4685 | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq 5537 | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
@@ -4722,41 +5574,80 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4722 if (vk) { 5574 if (vk) {
4723 |.if DUALNUM 5575 |.if DUALNUM
4724 |9: // FP loop. 5576 |9: // FP loop.
5577 |.if FPU
4725 | lfd f1, FORL_IDX*8(RA) 5578 | lfd f1, FORL_IDX*8(RA)
4726 |.else 5579 |.else
5580 | lwz CARG1, FORL_IDX*8(RA)
5581 | lwz CARG2, FORL_IDX*8+4(RA)
5582 |.endif
5583 |.else
4727 | lfdux f1, RA, BASE 5584 | lfdux f1, RA, BASE
4728 |.endif 5585 |.endif
5586 |.if FPU
4729 | lfd f3, FORL_STEP*8(RA) 5587 | lfd f3, FORL_STEP*8(RA)
4730 | lfd f2, FORL_STOP*8(RA) 5588 | lfd f2, FORL_STOP*8(RA)
4731 | lwz TMP3, FORL_STEP*8(RA)
4732 | fadd f1, f1, f3 5589 | fadd f1, f1, f3
4733 | stfd f1, FORL_IDX*8(RA) 5590 | stfd f1, FORL_IDX*8(RA)
5591 |.else
5592 | lwz CARG3, FORL_STEP*8(RA)
5593 | lwz CARG4, FORL_STEP*8+4(RA)
5594 | mr SAVE1, RD
5595 | blex __adddf3
5596 | mr RD, SAVE1
5597 | stw CRET1, FORL_IDX*8(RA)
5598 | stw CRET2, FORL_IDX*8+4(RA)
5599 | lwz CARG3, FORL_STOP*8(RA)
5600 | lwz CARG4, FORL_STOP*8+4(RA)
5601 |.endif
5602 | lwz SAVE0, FORL_STEP*8(RA)
4734 } else { 5603 } else {
4735 |.if DUALNUM 5604 |.if DUALNUM
4736 |9: // FP loop. 5605 |9: // FP loop.
4737 |.else 5606 |.else
4738 | lwzux TMP1, RA, BASE 5607 | lwzux TMP1, RA, BASE
4739 | lwz TMP3, FORL_STEP*8(RA) 5608 | lwz SAVE0, FORL_STEP*8(RA)
4740 | lwz TMP2, FORL_STOP*8(RA) 5609 | lwz TMP2, FORL_STOP*8(RA)
4741 | cmplw cr0, TMP1, TISNUM 5610 | cmplw cr0, TMP1, TISNUM
4742 | cmplw cr7, TMP3, TISNUM 5611 | cmplw cr7, SAVE0, TISNUM
4743 | cmplw cr1, TMP2, TISNUM 5612 | cmplw cr1, TMP2, TISNUM
4744 |.endif 5613 |.endif
5614 |.if FPU
4745 | lfd f1, FORL_IDX*8(RA) 5615 | lfd f1, FORL_IDX*8(RA)
5616 |.else
5617 | lwz CARG1, FORL_IDX*8(RA)
5618 | lwz CARG2, FORL_IDX*8+4(RA)
5619 |.endif
4746 | crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt 5620 | crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt
4747 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt 5621 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
5622 |.if FPU
4748 | lfd f2, FORL_STOP*8(RA) 5623 | lfd f2, FORL_STOP*8(RA)
5624 |.else
5625 | lwz CARG3, FORL_STOP*8(RA)
5626 | lwz CARG4, FORL_STOP*8+4(RA)
5627 |.endif
4749 | bge ->vmeta_for 5628 | bge ->vmeta_for
4750 } 5629 }
4751 | cmpwi cr6, TMP3, 0 5630 | cmpwi cr6, SAVE0, 0
4752 if (op != BC_JFORL) { 5631 if (op != BC_JFORL) {
4753 | srwi RD, RD, 1 5632 | srwi RD, RD, 1
4754 } 5633 }
5634 |.if FPU
4755 | stfd f1, FORL_EXT*8(RA) 5635 | stfd f1, FORL_EXT*8(RA)
5636 |.else
5637 | stw CARG1, FORL_EXT*8(RA)
5638 | stw CARG2, FORL_EXT*8+4(RA)
5639 |.endif
4756 if (op != BC_JFORL) { 5640 if (op != BC_JFORL) {
4757 | add RD, PC, RD 5641 | add RD, PC, RD
4758 } 5642 }
5643 |.if FPU
4759 | fcmpu cr0, f1, f2 5644 | fcmpu cr0, f1, f2
5645 |.else
5646 | mr SAVE1, RD
5647 | blex __ledf2
5648 | cmpwi CRET1, 0
5649 | mr RD, SAVE1
5650 |.endif
4760 if (op == BC_JFORI) { 5651 if (op == BC_JFORI) {
4761 | addis PC, RD, -(BCBIAS_J*4 >> 16) 5652 | addis PC, RD, -(BCBIAS_J*4 >> 16)
4762 } 5653 }
@@ -4859,8 +5750,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4859 | lp TMP2, TRACE:TMP2->mcode 5750 | lp TMP2, TRACE:TMP2->mcode
4860 | stw BASE, DISPATCH_GL(jit_base)(DISPATCH) 5751 | stw BASE, DISPATCH_GL(jit_base)(DISPATCH)
4861 | mtctr TMP2 5752 | mtctr TMP2
4862 | stw L, DISPATCH_GL(jit_L)(DISPATCH)
4863 | addi JGL, DISPATCH, GG_DISP2G+32768 5753 | addi JGL, DISPATCH, GG_DISP2G+32768
5754 | stw L, DISPATCH_GL(tmpbuf.L)(DISPATCH)
4864 | bctr 5755 | bctr
4865 |.endif 5756 |.endif
4866 break; 5757 break;
@@ -4995,6 +5886,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4995 | lp TMP1, L->top 5886 | lp TMP1, L->top
4996 | li_vmstate INTERP 5887 | li_vmstate INTERP
4997 | lwz PC, FRAME_PC(BASE) // Fetch PC of caller. 5888 | lwz PC, FRAME_PC(BASE) // Fetch PC of caller.
5889 | stw L, DISPATCH_GL(cur_L)(DISPATCH)
4998 | sub RA, TMP1, RD // RA = L->top - nresults*8 5890 | sub RA, TMP1, RD // RA = L->top - nresults*8
4999 | st_vmstate 5891 | st_vmstate
5000 | b ->vm_returnc 5892 | b ->vm_returnc
diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc
new file mode 100644
index 00000000..a5749b17
--- /dev/null
+++ b/src/vm_x64.dasc
@@ -0,0 +1,4909 @@
1|// Low-level VM code for x64 CPUs in LJ_GC64 mode.
2|// Bytecode interpreter, fast functions and helper functions.
3|// Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h
4|
5|.arch x64
6|.section code_op, code_sub
7|
8|.actionlist build_actionlist
9|.globals GLOB_
10|.globalnames globnames
11|.externnames extnames
12|
13|//-----------------------------------------------------------------------
14|
15|.if WIN
16|.define X64WIN, 1 // Windows/x64 calling conventions.
17|.endif
18|
19|// Fixed register assignments for the interpreter.
20|// This is very fragile and has many dependencies. Caveat emptor.
21|.define BASE, rdx // Not C callee-save, refetched anyway.
22|.if X64WIN
23|.define KBASE, rdi // Must be C callee-save.
24|.define PC, rsi // Must be C callee-save.
25|.define DISPATCH, rbx // Must be C callee-save.
26|.define KBASEd, edi
27|.define PCd, esi
28|.define DISPATCHd, ebx
29|.else
30|.define KBASE, r15 // Must be C callee-save.
31|.define PC, rbx // Must be C callee-save.
32|.define DISPATCH, r14 // Must be C callee-save.
33|.define KBASEd, r15d
34|.define PCd, ebx
35|.define DISPATCHd, r14d
36|.endif
37|
38|.define RA, rcx
39|.define RAd, ecx
40|.define RAH, ch
41|.define RAL, cl
42|.define RB, rbp // Must be rbp (C callee-save).
43|.define RBd, ebp
44|.define RC, rax // Must be rax.
45|.define RCd, eax
46|.define RCW, ax
47|.define RCH, ah
48|.define RCL, al
49|.define OP, RBd
50|.define RD, RC
51|.define RDd, RCd
52|.define RDW, RCW
53|.define RDL, RCL
54|.define TMPR, r10
55|.define TMPRd, r10d
56|.define ITYPE, r11
57|.define ITYPEd, r11d
58|
59|.if X64WIN
60|.define CARG1, rcx // x64/WIN64 C call arguments.
61|.define CARG2, rdx
62|.define CARG3, r8
63|.define CARG4, r9
64|.define CARG1d, ecx
65|.define CARG2d, edx
66|.define CARG3d, r8d
67|.define CARG4d, r9d
68|.else
69|.define CARG1, rdi // x64/POSIX C call arguments.
70|.define CARG2, rsi
71|.define CARG3, rdx
72|.define CARG4, rcx
73|.define CARG5, r8
74|.define CARG6, r9
75|.define CARG1d, edi
76|.define CARG2d, esi
77|.define CARG3d, edx
78|.define CARG4d, ecx
79|.define CARG5d, r8d
80|.define CARG6d, r9d
81|.endif
82|
83|// Type definitions. Some of these are only used for documentation.
84|.type L, lua_State
85|.type GL, global_State
86|.type TVALUE, TValue
87|.type GCOBJ, GCobj
88|.type STR, GCstr
89|.type TAB, GCtab
90|.type LFUNC, GCfuncL
91|.type CFUNC, GCfuncC
92|.type PROTO, GCproto
93|.type UPVAL, GCupval
94|.type NODE, Node
95|.type NARGS, int
96|.type TRACE, GCtrace
97|.type SBUF, SBuf
98|
99|// Stack layout while in interpreter. Must match with lj_frame.h.
100|//-----------------------------------------------------------------------
101|.if X64WIN // x64/Windows stack layout
102|
103|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
104|.macro saveregs_
105| push rdi; push rsi; push rbx
106| sub rsp, CFRAME_SPACE
107|.endmacro
108|.macro saveregs
109| push rbp; saveregs_
110|.endmacro
111|.macro restoreregs
112| add rsp, CFRAME_SPACE
113| pop rbx; pop rsi; pop rdi; pop rbp
114|.endmacro
115|
116|.define SAVE_CFRAME, aword [rsp+aword*13]
117|.define SAVE_PC, aword [rsp+aword*12]
118|.define SAVE_L, aword [rsp+aword*11]
119|.define SAVE_ERRF, dword [rsp+dword*21]
120|.define SAVE_NRES, dword [rsp+dword*20]
121|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter
122|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
123|.define SAVE_R4, aword [rsp+aword*8]
124|.define SAVE_R3, aword [rsp+aword*7]
125|.define SAVE_R2, aword [rsp+aword*6]
126|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
127|.define ARG5, aword [rsp+aword*4]
128|.define CSAVE_4, aword [rsp+aword*3]
129|.define CSAVE_3, aword [rsp+aword*2]
130|.define CSAVE_2, aword [rsp+aword*1]
131|.define CSAVE_1, aword [rsp] //<-- rsp while in interpreter.
132|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee
133|
134|.define ARG5d, dword [rsp+dword*8]
135|.define TMP1, ARG5 // TMP1 overlaps ARG5
136|.define TMP1d, ARG5d
137|.define TMP1hi, dword [rsp+dword*9]
138|.define MULTRES, TMP1d // MULTRES overlaps TMP1d.
139|
140|//-----------------------------------------------------------------------
141|.else // x64/POSIX stack layout
142|
143|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
144|.macro saveregs_
145| push rbx; push r15; push r14
146|.if NO_UNWIND
147| push r13; push r12
148|.endif
149| sub rsp, CFRAME_SPACE
150|.endmacro
151|.macro saveregs
152| push rbp; saveregs_
153|.endmacro
154|.macro restoreregs
155| add rsp, CFRAME_SPACE
156|.if NO_UNWIND
157| pop r12; pop r13
158|.endif
159| pop r14; pop r15; pop rbx; pop rbp
160|.endmacro
161|
162|//----- 16 byte aligned,
163|.if NO_UNWIND
164|.define SAVE_RET, aword [rsp+aword*11] //<-- rsp entering interpreter.
165|.define SAVE_R4, aword [rsp+aword*10]
166|.define SAVE_R3, aword [rsp+aword*9]
167|.define SAVE_R2, aword [rsp+aword*8]
168|.define SAVE_R1, aword [rsp+aword*7]
169|.define SAVE_RU2, aword [rsp+aword*6]
170|.define SAVE_RU1, aword [rsp+aword*5] //<-- rsp after register saves.
171|.else
172|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
173|.define SAVE_R4, aword [rsp+aword*8]
174|.define SAVE_R3, aword [rsp+aword*7]
175|.define SAVE_R2, aword [rsp+aword*6]
176|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
177|.endif
178|.define SAVE_CFRAME, aword [rsp+aword*4]
179|.define SAVE_PC, aword [rsp+aword*3]
180|.define SAVE_L, aword [rsp+aword*2]
181|.define SAVE_ERRF, dword [rsp+dword*3]
182|.define SAVE_NRES, dword [rsp+dword*2]
183|.define TMP1, aword [rsp] //<-- rsp while in interpreter.
184|//----- 16 byte aligned
185|
186|.define TMP1d, dword [rsp]
187|.define TMP1hi, dword [rsp+dword*1]
188|.define MULTRES, TMP1d // MULTRES overlaps TMP1d.
189|
190|.endif
191|
192|//-----------------------------------------------------------------------
193|
194|// Instruction headers.
195|.macro ins_A; .endmacro
196|.macro ins_AD; .endmacro
197|.macro ins_AJ; .endmacro
198|.macro ins_ABC; movzx RBd, RCH; movzx RCd, RCL; .endmacro
199|.macro ins_AB_; movzx RBd, RCH; .endmacro
200|.macro ins_A_C; movzx RCd, RCL; .endmacro
201|.macro ins_AND; not RD; .endmacro
202|
203|// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster).
204|.macro ins_NEXT
205| mov RCd, [PC]
206| movzx RAd, RCH
207| movzx OP, RCL
208| add PC, 4
209| shr RCd, 16
210| jmp aword [DISPATCH+OP*8]
211|.endmacro
212|
213|// Instruction footer.
214|.if 1
215| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
216| .define ins_next, ins_NEXT
217| .define ins_next_, ins_NEXT
218|.else
219| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
220| // Affects only certain kinds of benchmarks (and only with -j off).
221| // Around 10%-30% slower on Core2, a lot more slower on P4.
222| .macro ins_next
223| jmp ->ins_next
224| .endmacro
225| .macro ins_next_
226| ->ins_next:
227| ins_NEXT
228| .endmacro
229|.endif
230|
231|// Call decode and dispatch.
232|.macro ins_callt
233| // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-8] = PC
234| mov PC, LFUNC:RB->pc
235| mov RAd, [PC]
236| movzx OP, RAL
237| movzx RAd, RAH
238| add PC, 4
239| jmp aword [DISPATCH+OP*8]
240|.endmacro
241|
242|.macro ins_call
243| // BASE = new base, RB = LFUNC, RD = nargs+1
244| mov [BASE-8], PC
245| ins_callt
246|.endmacro
247|
248|//-----------------------------------------------------------------------
249|
250|// Macros to clear or set tags.
251|.macro cleartp, reg; shl reg, 17; shr reg, 17; .endmacro
252|.macro settp, reg, tp
253| mov64 ITYPE, ((uint64_t)tp<<47)
254| or reg, ITYPE
255|.endmacro
256|.macro settp, dst, reg, tp
257| mov64 dst, ((uint64_t)tp<<47)
258| or dst, reg
259|.endmacro
260|.macro setint, reg
261| settp reg, LJ_TISNUM
262|.endmacro
263|.macro setint, dst, reg
264| settp dst, reg, LJ_TISNUM
265|.endmacro
266|
267|// Macros to test operand types.
268|.macro checktp_nc, reg, tp, target
269| mov ITYPE, reg
270| sar ITYPE, 47
271| cmp ITYPEd, tp
272| jne target
273|.endmacro
274|.macro checktp, reg, tp, target
275| mov ITYPE, reg
276| cleartp reg
277| sar ITYPE, 47
278| cmp ITYPEd, tp
279| jne target
280|.endmacro
281|.macro checktptp, src, tp, target
282| mov ITYPE, src
283| sar ITYPE, 47
284| cmp ITYPEd, tp
285| jne target
286|.endmacro
287|.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro
288|.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro
289|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro
290|
291|.macro checknumx, reg, target, jump
292| mov ITYPE, reg
293| sar ITYPE, 47
294| cmp ITYPEd, LJ_TISNUM
295| jump target
296|.endmacro
297|.macro checkint, reg, target; checknumx reg, target, jne; .endmacro
298|.macro checkinttp, src, target; checknumx src, target, jne; .endmacro
299|.macro checknum, reg, target; checknumx reg, target, jae; .endmacro
300|.macro checknumtp, src, target; checknumx src, target, jae; .endmacro
301|.macro checknumber, src, target; checknumx src, target, ja; .endmacro
302|
303|.macro mov_false, reg; mov64 reg, (int64_t)~((uint64_t)1<<47); .endmacro
304|.macro mov_true, reg; mov64 reg, (int64_t)~((uint64_t)2<<47); .endmacro
305|
306|// These operands must be used with movzx.
307|.define PC_OP, byte [PC-4]
308|.define PC_RA, byte [PC-3]
309|.define PC_RB, byte [PC-1]
310|.define PC_RC, byte [PC-2]
311|.define PC_RD, word [PC-2]
312|
313|.macro branchPC, reg
314| lea PC, [PC+reg*4-BCBIAS_J*4]
315|.endmacro
316|
317|// Assumes DISPATCH is relative to GL.
318#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
319#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
320|
321#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
322|
323|// Decrement hashed hotcount and trigger trace recorder if zero.
324|.macro hotloop, reg
325| mov reg, PCd
326| shr reg, 1
327| and reg, HOTCOUNT_PCMASK
328| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP
329| jb ->vm_hotloop
330|.endmacro
331|
332|.macro hotcall, reg
333| mov reg, PCd
334| shr reg, 1
335| and reg, HOTCOUNT_PCMASK
336| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL
337| jb ->vm_hotcall
338|.endmacro
339|
340|// Set current VM state.
341|.macro set_vmstate, st
342| mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
343|.endmacro
344|
345|.macro fpop1; fstp st1; .endmacro
346|
347|// Synthesize SSE FP constants.
348|.macro sseconst_abs, reg, tmp // Synthesize abs mask.
349| mov64 tmp, U64x(7fffffff,ffffffff); movd reg, tmp
350|.endmacro
351|
352|.macro sseconst_hi, reg, tmp, val // Synthesize hi-32 bit const.
353| mov64 tmp, U64x(val,00000000); movd reg, tmp
354|.endmacro
355|
356|.macro sseconst_sign, reg, tmp // Synthesize sign mask.
357| sseconst_hi reg, tmp, 80000000
358|.endmacro
359|.macro sseconst_1, reg, tmp // Synthesize 1.0.
360| sseconst_hi reg, tmp, 3ff00000
361|.endmacro
362|.macro sseconst_m1, reg, tmp // Synthesize -1.0.
363| sseconst_hi reg, tmp, bff00000
364|.endmacro
365|.macro sseconst_2p52, reg, tmp // Synthesize 2^52.
366| sseconst_hi reg, tmp, 43300000
367|.endmacro
368|.macro sseconst_tobit, reg, tmp // Synthesize 2^52 + 2^51.
369| sseconst_hi reg, tmp, 43380000
370|.endmacro
371|
372|// Move table write barrier back. Overwrites reg.
373|.macro barrierback, tab, reg
374| and byte tab->marked, (uint8_t)~LJ_GC_BLACK // black2gray(tab)
375| mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)]
376| mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab
377| mov tab->gclist, reg
378|.endmacro
379|
380|//-----------------------------------------------------------------------
381
382/* Generate subroutines used by opcodes and other parts of the VM. */
383/* The .code_sub section should be last to help static branch prediction. */
384static void build_subroutines(BuildCtx *ctx)
385{
386 |.code_sub
387 |
388 |//-----------------------------------------------------------------------
389 |//-- Return handling ----------------------------------------------------
390 |//-----------------------------------------------------------------------
391 |
392 |->vm_returnp:
393 | test PCd, FRAME_P
394 | jz ->cont_dispatch
395 |
396 | // Return from pcall or xpcall fast func.
397 | and PC, -8
398 | sub BASE, PC // Restore caller base.
399 | lea RA, [RA+PC-8] // Rebase RA and prepend one result.
400 | mov PC, [BASE-8] // Fetch PC of previous frame.
401 | // Prepending may overwrite the pcall frame, so do it at the end.
402 | mov_true ITYPE
403 | mov aword [BASE+RA], ITYPE // Prepend true to results.
404 |
405 |->vm_returnc:
406 | add RDd, 1 // RD = nresults+1
407 | jz ->vm_unwind_yield
408 | mov MULTRES, RDd
409 | test PC, FRAME_TYPE
410 | jz ->BC_RET_Z // Handle regular return to Lua.
411 |
412 |->vm_return:
413 | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return
414 | xor PC, FRAME_C
415 | test PCd, FRAME_TYPE
416 | jnz ->vm_returnp
417 |
418 | // Return to C.
419 | set_vmstate C
420 | and PC, -8
421 | sub PC, BASE
422 | neg PC // Previous base = BASE - delta.
423 |
424 | sub RDd, 1
425 | jz >2
426 |1: // Move results down.
427 | mov RB, [BASE+RA]
428 | mov [BASE-16], RB
429 | add BASE, 8
430 | sub RDd, 1
431 | jnz <1
432 |2:
433 | mov L:RB, SAVE_L
434 | mov L:RB->base, PC
435 |3:
436 | mov RDd, MULTRES
437 | mov RAd, SAVE_NRES // RA = wanted nresults+1
438 |4:
439 | cmp RAd, RDd
440 | jne >6 // More/less results wanted?
441 |5:
442 | sub BASE, 16
443 | mov L:RB->top, BASE
444 |
445 |->vm_leave_cp:
446 | mov RA, SAVE_CFRAME // Restore previous C frame.
447 | mov L:RB->cframe, RA
448 | xor eax, eax // Ok return status for vm_pcall.
449 |
450 |->vm_leave_unw:
451 | restoreregs
452 | ret
453 |
454 |6:
455 | jb >7 // Less results wanted?
456 | // More results wanted. Check stack size and fill up results with nil.
457 | cmp BASE, L:RB->maxstack
458 | ja >8
459 | mov aword [BASE-16], LJ_TNIL
460 | add BASE, 8
461 | add RDd, 1
462 | jmp <4
463 |
464 |7: // Less results wanted.
465 | test RAd, RAd
466 | jz <5 // But check for LUA_MULTRET+1.
467 | sub RA, RD // Negative result!
468 | lea BASE, [BASE+RA*8] // Correct top.
469 | jmp <5
470 |
471 |8: // Corner case: need to grow stack for filling up results.
472 | // This can happen if:
473 | // - A C function grows the stack (a lot).
474 | // - The GC shrinks the stack in between.
475 | // - A return back from a lua_call() with (high) nresults adjustment.
476 | mov L:RB->top, BASE // Save current top held in BASE (yes).
477 | mov MULTRES, RDd // Need to fill only remainder with nil.
478 | mov CARG2d, RAd
479 | mov CARG1, L:RB
480 | call extern lj_state_growstack // (lua_State *L, int n)
481 | mov BASE, L:RB->top // Need the (realloced) L->top in BASE.
482 | jmp <3
483 |
484 |->vm_unwind_yield:
485 | mov al, LUA_YIELD
486 | jmp ->vm_unwind_c_eh
487 |
488 |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
489 | // (void *cframe, int errcode)
490 | mov eax, CARG2d // Error return status for vm_pcall.
491 | mov rsp, CARG1
492 |->vm_unwind_c_eh: // Landing pad for external unwinder.
493 | mov L:RB, SAVE_L
494 | mov GL:RB, L:RB->glref
495 | mov dword GL:RB->vmstate, ~LJ_VMST_C
496 | jmp ->vm_leave_unw
497 |
498 |->vm_unwind_rethrow:
499 |.if not X64WIN
500 | mov CARG1, SAVE_L
501 | mov CARG2d, eax
502 | restoreregs
503 | jmp extern lj_err_throw // (lua_State *L, int errcode)
504 |.endif
505 |
506 |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
507 | // (void *cframe)
508 | and CARG1, CFRAME_RAWMASK
509 | mov rsp, CARG1
510 |->vm_unwind_ff_eh: // Landing pad for external unwinder.
511 | mov L:RB, SAVE_L
512 | mov RDd, 1+1 // Really 1+2 results, incr. later.
513 | mov BASE, L:RB->base
514 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
515 | add DISPATCH, GG_G2DISP
516 | mov PC, [BASE-8] // Fetch PC of previous frame.
517 | mov_false RA
518 | mov RB, [BASE]
519 | mov [BASE-16], RA // Prepend false to error message.
520 | mov [BASE-8], RB
521 | mov RA, -16 // Results start at BASE+RA = BASE-16.
522 | set_vmstate INTERP
523 | jmp ->vm_returnc // Increments RD/MULTRES and returns.
524 |
525 |//-----------------------------------------------------------------------
526 |//-- Grow stack for calls -----------------------------------------------
527 |//-----------------------------------------------------------------------
528 |
529 |->vm_growstack_c: // Grow stack for C function.
530 | mov CARG2d, LUA_MINSTACK
531 | jmp >2
532 |
533 |->vm_growstack_v: // Grow stack for vararg Lua function.
534 | sub RD, 16 // LJ_FR2
535 | jmp >1
536 |
537 |->vm_growstack_f: // Grow stack for fixarg Lua function.
538 | // BASE = new base, RD = nargs+1, RB = L, PC = first PC
539 | lea RD, [BASE+NARGS:RD*8-8]
540 |1:
541 | movzx RAd, byte [PC-4+PC2PROTO(framesize)]
542 | add PC, 4 // Must point after first instruction.
543 | mov L:RB->base, BASE
544 | mov L:RB->top, RD
545 | mov SAVE_PC, PC
546 | mov CARG2, RA
547 |2:
548 | // RB = L, L->base = new base, L->top = top
549 | mov CARG1, L:RB
550 | call extern lj_state_growstack // (lua_State *L, int n)
551 | mov BASE, L:RB->base
552 | mov RD, L:RB->top
553 | mov LFUNC:RB, [BASE-16]
554 | cleartp LFUNC:RB
555 | sub RD, BASE
556 | shr RDd, 3
557 | add NARGS:RDd, 1
558 | // BASE = new base, RB = LFUNC, RD = nargs+1
559 | ins_callt // Just retry the call.
560 |
561 |//-----------------------------------------------------------------------
562 |//-- Entry points into the assembler VM ---------------------------------
563 |//-----------------------------------------------------------------------
564 |
565 |->vm_resume: // Setup C frame and resume thread.
566 | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
567 | saveregs
568 | mov L:RB, CARG1 // Caveat: CARG1 may be RA.
569 | mov SAVE_L, CARG1
570 | mov RA, CARG2
571 | mov PCd, FRAME_CP
572 | xor RDd, RDd
573 | lea KBASE, [esp+CFRAME_RESUME]
574 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
575 | add DISPATCH, GG_G2DISP
576 | mov SAVE_PC, RD // Any value outside of bytecode is ok.
577 | mov SAVE_CFRAME, RD
578 | mov SAVE_NRES, RDd
579 | mov SAVE_ERRF, RDd
580 | mov L:RB->cframe, KBASE
581 | cmp byte L:RB->status, RDL
582 | je >2 // Initial resume (like a call).
583 |
584 | // Resume after yield (like a return).
585 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
586 | set_vmstate INTERP
587 | mov byte L:RB->status, RDL
588 | mov BASE, L:RB->base
589 | mov RD, L:RB->top
590 | sub RD, RA
591 | shr RDd, 3
592 | add RDd, 1 // RD = nresults+1
593 | sub RA, BASE // RA = resultofs
594 | mov PC, [BASE-8]
595 | mov MULTRES, RDd
596 | test PCd, FRAME_TYPE
597 | jz ->BC_RET_Z
598 | jmp ->vm_return
599 |
600 |->vm_pcall: // Setup protected C frame and enter VM.
601 | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
602 | saveregs
603 | mov PCd, FRAME_CP
604 | mov SAVE_ERRF, CARG4d
605 | jmp >1
606 |
607 |->vm_call: // Setup C frame and enter VM.
608 | // (lua_State *L, TValue *base, int nres1)
609 | saveregs
610 | mov PCd, FRAME_C
611 |
612 |1: // Entry point for vm_pcall above (PC = ftype).
613 | mov SAVE_NRES, CARG3d
614 | mov L:RB, CARG1 // Caveat: CARG1 may be RA.
615 | mov SAVE_L, CARG1
616 | mov RA, CARG2
617 |
618 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
619 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
620 | mov SAVE_CFRAME, KBASE
621 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
622 | add DISPATCH, GG_G2DISP
623 | mov L:RB->cframe, rsp
624 |
625 |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
626 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
627 | set_vmstate INTERP
628 | mov BASE, L:RB->base // BASE = old base (used in vmeta_call).
629 | add PC, RA
630 | sub PC, BASE // PC = frame delta + frame type
631 |
632 | mov RD, L:RB->top
633 | sub RD, RA
634 | shr NARGS:RDd, 3
635 | add NARGS:RDd, 1 // RD = nargs+1
636 |
637 |->vm_call_dispatch:
638 | mov LFUNC:RB, [RA-16]
639 | checkfunc LFUNC:RB, ->vmeta_call // Ensure KBASE defined and != BASE.
640 |
641 |->vm_call_dispatch_f:
642 | mov BASE, RA
643 | ins_call
644 | // BASE = new base, RB = func, RD = nargs+1, PC = caller PC
645 |
646 |->vm_cpcall: // Setup protected C frame, call C.
647 | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
648 | saveregs
649 | mov L:RB, CARG1 // Caveat: CARG1 may be RA.
650 | mov SAVE_L, CARG1
651 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
652 |
653 | mov KBASE, L:RB->stack // Compute -savestack(L, L->top).
654 | sub KBASE, L:RB->top
655 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
656 | mov SAVE_ERRF, 0 // No error function.
657 | mov SAVE_NRES, KBASEd // Neg. delta means cframe w/o frame.
658 | add DISPATCH, GG_G2DISP
659 | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
660 |
661 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
662 | mov SAVE_CFRAME, KBASE
663 | mov L:RB->cframe, rsp
664 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
665 |
666 | call CARG4 // (lua_State *L, lua_CFunction func, void *ud)
667 | // TValue * (new base) or NULL returned in eax (RC).
668 | test RC, RC
669 | jz ->vm_leave_cp // No base? Just remove C frame.
670 | mov RA, RC
671 | mov PCd, FRAME_CP
672 | jmp <2 // Else continue with the call.
673 |
674 |//-----------------------------------------------------------------------
675 |//-- Metamethod handling ------------------------------------------------
676 |//-----------------------------------------------------------------------
677 |
678 |//-- Continuation dispatch ----------------------------------------------
679 |
680 |->cont_dispatch:
681 | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES)
682 | add RA, BASE
683 | and PC, -8
684 | mov RB, BASE
685 | sub BASE, PC // Restore caller BASE.
686 | mov aword [RA+RD*8-8], LJ_TNIL // Ensure one valid arg.
687 | mov RC, RA // ... in [RC]
688 | mov PC, [RB-24] // Restore PC from [cont|PC].
689 | mov RA, qword [RB-32] // May be negative on WIN64 with debug.
690 |.if FFI
691 | cmp RA, 1
692 | jbe >1
693 |.endif
694 | mov LFUNC:KBASE, [BASE-16]
695 | cleartp LFUNC:KBASE
696 | mov KBASE, LFUNC:KBASE->pc
697 | mov KBASE, [KBASE+PC2PROTO(k)]
698 | // BASE = base, RC = result, RB = meta base
699 | jmp RA // Jump to continuation.
700 |
701 |.if FFI
702 |1:
703 | je ->cont_ffi_callback // cont = 1: return from FFI callback.
704 | // cont = 0: Tail call from C function.
705 | sub RB, BASE
706 | shr RBd, 3
707 | lea RDd, [RBd-3]
708 | jmp ->vm_call_tail
709 |.endif
710 |
711 |->cont_cat: // BASE = base, RC = result, RB = mbase
712 | movzx RAd, PC_RB
713 | sub RB, 32
714 | lea RA, [BASE+RA*8]
715 | sub RA, RB
716 | je ->cont_ra
717 | neg RA
718 | shr RAd, 3
719 |.if X64WIN
720 | mov CARG3d, RAd
721 | mov L:CARG1, SAVE_L
722 | mov L:CARG1->base, BASE
723 | mov RC, [RC]
724 | mov [RB], RC
725 | mov CARG2, RB
726 |.else
727 | mov L:CARG1, SAVE_L
728 | mov L:CARG1->base, BASE
729 | mov CARG3d, RAd
730 | mov RA, [RC]
731 | mov [RB], RA
732 | mov CARG2, RB
733 |.endif
734 | jmp ->BC_CAT_Z
735 |
736 |//-- Table indexing metamethods -----------------------------------------
737 |
738 |->vmeta_tgets:
739 | settp STR:RC, LJ_TSTR // STR:RC = GCstr *
740 | mov TMP1, STR:RC
741 | lea RC, TMP1
742 | cmp PC_OP, BC_GGET
743 | jne >1
744 | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab *
745 | lea RB, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
746 | mov [RB], TAB:RA
747 | jmp >2
748 |
749 |->vmeta_tgetb:
750 | movzx RCd, PC_RC
751 |.if DUALNUM
752 | setint RC
753 | mov TMP1, RC
754 |.else
755 | cvtsi2sd xmm0, RCd
756 | movsd TMP1, xmm0
757 |.endif
758 | lea RC, TMP1
759 | jmp >1
760 |
761 |->vmeta_tgetv:
762 | movzx RCd, PC_RC // Reload TValue *k from RC.
763 | lea RC, [BASE+RC*8]
764 |1:
765 | movzx RBd, PC_RB // Reload TValue *t from RB.
766 | lea RB, [BASE+RB*8]
767 |2:
768 | mov L:CARG1, SAVE_L
769 | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE.
770 | mov CARG2, RB
771 | mov CARG3, RC
772 | mov L:RB, L:CARG1
773 | mov SAVE_PC, PC
774 | call extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
775 | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
776 | mov BASE, L:RB->base
777 | test RC, RC
778 | jz >3
779 |->cont_ra: // BASE = base, RC = result
780 | movzx RAd, PC_RA
781 | mov RB, [RC]
782 | mov [BASE+RA*8], RB
783 | ins_next
784 |
785 |3: // Call __index metamethod.
786 | // BASE = base, L->top = new base, stack = cont/func/t/k
787 | mov RA, L:RB->top
788 | mov [RA-24], PC // [cont|PC]
789 | lea PC, [RA+FRAME_CONT]
790 | sub PC, BASE
791 | mov LFUNC:RB, [RA-16] // Guaranteed to be a function here.
792 | mov NARGS:RDd, 2+1 // 2 args for func(t, k).
793 | cleartp LFUNC:RB
794 | jmp ->vm_call_dispatch_f
795 |
796 |->vmeta_tgetr:
797 | mov CARG1, TAB:RB
798 | mov RB, BASE // Save BASE.
799 | mov CARG2d, RCd // Caveat: CARG2 == BASE
800 | call extern lj_tab_getinth // (GCtab *t, int32_t key)
801 | // cTValue * or NULL returned in eax (RC).
802 | movzx RAd, PC_RA
803 | mov BASE, RB // Restore BASE.
804 | test RC, RC
805 | jnz ->BC_TGETR_Z
806 | mov ITYPE, LJ_TNIL
807 | jmp ->BC_TGETR2_Z
808 |
809 |//-----------------------------------------------------------------------
810 |
811 |->vmeta_tsets:
812 | settp STR:RC, LJ_TSTR // STR:RC = GCstr *
813 | mov TMP1, STR:RC
814 | lea RC, TMP1
815 | cmp PC_OP, BC_GSET
816 | jne >1
817 | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab *
818 | lea RB, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
819 | mov [RB], TAB:RA
820 | jmp >2
821 |
822 |->vmeta_tsetb:
823 | movzx RCd, PC_RC
824 |.if DUALNUM
825 | setint RC
826 | mov TMP1, RC
827 |.else
828 | cvtsi2sd xmm0, RCd
829 | movsd TMP1, xmm0
830 |.endif
831 | lea RC, TMP1
832 | jmp >1
833 |
834 |->vmeta_tsetv:
835 | movzx RCd, PC_RC // Reload TValue *k from RC.
836 | lea RC, [BASE+RC*8]
837 |1:
838 | movzx RBd, PC_RB // Reload TValue *t from RB.
839 | lea RB, [BASE+RB*8]
840 |2:
841 | mov L:CARG1, SAVE_L
842 | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE.
843 | mov CARG2, RB
844 | mov CARG3, RC
845 | mov L:RB, L:CARG1
846 | mov SAVE_PC, PC
847 | call extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
848 | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
849 | mov BASE, L:RB->base
850 | test RC, RC
851 | jz >3
852 | // NOBARRIER: lj_meta_tset ensures the table is not black.
853 | movzx RAd, PC_RA
854 | mov RB, [BASE+RA*8]
855 | mov [RC], RB
856 |->cont_nop: // BASE = base, (RC = result)
857 | ins_next
858 |
859 |3: // Call __newindex metamethod.
860 | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
861 | mov RA, L:RB->top
862 | mov [RA-24], PC // [cont|PC]
863 | movzx RCd, PC_RA
864 | // Copy value to third argument.
865 | mov RB, [BASE+RC*8]
866 | mov [RA+16], RB
867 | lea PC, [RA+FRAME_CONT]
868 | sub PC, BASE
869 | mov LFUNC:RB, [RA-16] // Guaranteed to be a function here.
870 | mov NARGS:RDd, 3+1 // 3 args for func(t, k, v).
871 | cleartp LFUNC:RB
872 | jmp ->vm_call_dispatch_f
873 |
874 |->vmeta_tsetr:
875 |.if X64WIN
876 | mov L:CARG1, SAVE_L
877 | mov CARG3d, RCd
878 | mov L:CARG1->base, BASE
879 | xchg CARG2, TAB:RB // Caveat: CARG2 == BASE.
880 |.else
881 | mov L:CARG1, SAVE_L
882 | mov CARG2, TAB:RB
883 | mov L:CARG1->base, BASE
884 | mov RB, BASE // Save BASE.
885 | mov CARG3d, RCd // Caveat: CARG3 == BASE.
886 |.endif
887 | mov SAVE_PC, PC
888 | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
889 | // TValue * returned in eax (RC).
890 | movzx RAd, PC_RA
891 | mov BASE, RB // Restore BASE.
892 | jmp ->BC_TSETR_Z
893 |
894 |//-- Comparison metamethods ---------------------------------------------
895 |
896 |->vmeta_comp:
897 | movzx RDd, PC_RD
898 | movzx RAd, PC_RA
899 | mov L:RB, SAVE_L
900 | mov L:RB->base, BASE // Caveat: CARG2/CARG3 == BASE.
901 |.if X64WIN
902 | lea CARG3, [BASE+RD*8]
903 | lea CARG2, [BASE+RA*8]
904 |.else
905 | lea CARG2, [BASE+RA*8]
906 | lea CARG3, [BASE+RD*8]
907 |.endif
908 | mov CARG1, L:RB // Caveat: CARG1/CARG4 == RA.
909 | movzx CARG4d, PC_OP
910 | mov SAVE_PC, PC
911 | call extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
912 | // 0/1 or TValue * (metamethod) returned in eax (RC).
913 |3:
914 | mov BASE, L:RB->base
915 | cmp RC, 1
916 | ja ->vmeta_binop
917 |4:
918 | lea PC, [PC+4]
919 | jb >6
920 |5:
921 | movzx RDd, PC_RD
922 | branchPC RD
923 |6:
924 | ins_next
925 |
926 |->cont_condt: // BASE = base, RC = result
927 | add PC, 4
928 | mov ITYPE, [RC]
929 | sar ITYPE, 47
930 | cmp ITYPEd, LJ_TISTRUECOND // Branch if result is true.
931 | jb <5
932 | jmp <6
933 |
934 |->cont_condf: // BASE = base, RC = result
935 | mov ITYPE, [RC]
936 | sar ITYPE, 47
937 | cmp ITYPEd, LJ_TISTRUECOND // Branch if result is false.
938 | jmp <4
939 |
940 |->vmeta_equal:
941 | cleartp TAB:RD
942 | sub PC, 4
943 |.if X64WIN
944 | mov CARG3, RD
945 | mov CARG4d, RBd
946 | mov L:RB, SAVE_L
947 | mov L:RB->base, BASE // Caveat: CARG2 == BASE.
948 | mov CARG2, RA
949 | mov CARG1, L:RB // Caveat: CARG1 == RA.
950 |.else
951 | mov CARG2, RA
952 | mov CARG4d, RBd // Caveat: CARG4 == RA.
953 | mov L:RB, SAVE_L
954 | mov L:RB->base, BASE // Caveat: CARG3 == BASE.
955 | mov CARG3, RD
956 | mov CARG1, L:RB
957 |.endif
958 | mov SAVE_PC, PC
959 | call extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
960 | // 0/1 or TValue * (metamethod) returned in eax (RC).
961 | jmp <3
962 |
963 |->vmeta_equal_cd:
964 |.if FFI
965 | sub PC, 4
966 | mov L:RB, SAVE_L
967 | mov L:RB->base, BASE
968 | mov CARG1, L:RB
969 | mov CARG2d, dword [PC-4]
970 | mov SAVE_PC, PC
971 | call extern lj_meta_equal_cd // (lua_State *L, BCIns ins)
972 | // 0/1 or TValue * (metamethod) returned in eax (RC).
973 | jmp <3
974 |.endif
975 |
976 |->vmeta_istype:
977 | mov L:RB, SAVE_L
978 | mov L:RB->base, BASE // Caveat: CARG2/CARG3 may be BASE.
979 | mov CARG2d, RAd
980 | mov CARG3d, RDd
981 | mov L:CARG1, L:RB
982 | mov SAVE_PC, PC
983 | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
984 | mov BASE, L:RB->base
985 | jmp <6
986 |
987 |//-- Arithmetic metamethods ---------------------------------------------
988 |
989 |->vmeta_arith_vno:
990 |.if DUALNUM
991 | movzx RBd, PC_RB
992 | movzx RCd, PC_RC
993 |.endif
994 |->vmeta_arith_vn:
995 | lea RC, [KBASE+RC*8]
996 | jmp >1
997 |
998 |->vmeta_arith_nvo:
999 |.if DUALNUM
1000 | movzx RBd, PC_RB
1001 | movzx RCd, PC_RC
1002 |.endif
1003 |->vmeta_arith_nv:
1004 | lea TMPR, [KBASE+RC*8]
1005 | lea RC, [BASE+RB*8]
1006 | mov RB, TMPR
1007 | jmp >2
1008 |
1009 |->vmeta_unm:
1010 | lea RC, [BASE+RD*8]
1011 | mov RB, RC
1012 | jmp >2
1013 |
1014 |->vmeta_arith_vvo:
1015 |.if DUALNUM
1016 | movzx RBd, PC_RB
1017 | movzx RCd, PC_RC
1018 |.endif
1019 |->vmeta_arith_vv:
1020 | lea RC, [BASE+RC*8]
1021 |1:
1022 | lea RB, [BASE+RB*8]
1023 |2:
1024 | lea RA, [BASE+RA*8]
1025 |.if X64WIN
1026 | mov CARG3, RB
1027 | mov CARG4, RC
1028 | movzx RCd, PC_OP
1029 | mov ARG5d, RCd
1030 | mov L:RB, SAVE_L
1031 | mov L:RB->base, BASE // Caveat: CARG2 == BASE.
1032 | mov CARG2, RA
1033 | mov CARG1, L:RB // Caveat: CARG1 == RA.
1034 |.else
1035 | movzx CARG5d, PC_OP
1036 | mov CARG2, RA
1037 | mov CARG4, RC // Caveat: CARG4 == RA.
1038 | mov L:CARG1, SAVE_L
1039 | mov L:CARG1->base, BASE // Caveat: CARG3 == BASE.
1040 | mov CARG3, RB
1041 | mov L:RB, L:CARG1
1042 |.endif
1043 | mov SAVE_PC, PC
1044 | call extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
1045 | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
1046 | mov BASE, L:RB->base
1047 | test RC, RC
1048 | jz ->cont_nop
1049 |
1050 | // Call metamethod for binary op.
1051 |->vmeta_binop:
1052 | // BASE = base, RC = new base, stack = cont/func/o1/o2
1053 | mov RA, RC
1054 | sub RC, BASE
1055 | mov [RA-24], PC // [cont|PC]
1056 | lea PC, [RC+FRAME_CONT]
1057 | mov NARGS:RDd, 2+1 // 2 args for func(o1, o2).
1058 | jmp ->vm_call_dispatch
1059 |
1060 |->vmeta_len:
1061 | movzx RDd, PC_RD
1062 | mov L:RB, SAVE_L
1063 | mov L:RB->base, BASE
1064 | lea CARG2, [BASE+RD*8] // Caveat: CARG2 == BASE
1065 | mov L:CARG1, L:RB
1066 | mov SAVE_PC, PC
1067 | call extern lj_meta_len // (lua_State *L, TValue *o)
1068 | // NULL (retry) or TValue * (metamethod) returned in eax (RC).
1069 | mov BASE, L:RB->base
1070#if LJ_52
1071 | test RC, RC
1072 | jne ->vmeta_binop // Binop call for compatibility.
1073 | movzx RDd, PC_RD
1074 | mov TAB:CARG1, [BASE+RD*8]
1075 | cleartp TAB:CARG1
1076 | jmp ->BC_LEN_Z
1077#else
1078 | jmp ->vmeta_binop // Binop call for compatibility.
1079#endif
1080 |
1081 |//-- Call metamethod ----------------------------------------------------
1082 |
1083 |->vmeta_call_ra:
1084 | lea RA, [BASE+RA*8+16]
1085 |->vmeta_call: // Resolve and call __call metamethod.
1086 | // BASE = old base, RA = new base, RC = nargs+1, PC = return
1087 | mov TMP1d, NARGS:RDd // Save RA, RC for us.
1088 | mov RB, RA
1089 |.if X64WIN
1090 | mov L:TMPR, SAVE_L
1091 | mov L:TMPR->base, BASE // Caveat: CARG2 is BASE.
1092 | lea CARG2, [RA-16]
1093 | lea CARG3, [RA+NARGS:RD*8-8]
1094 | mov CARG1, L:TMPR // Caveat: CARG1 is RA.
1095 |.else
1096 | mov L:CARG1, SAVE_L
1097 | mov L:CARG1->base, BASE // Caveat: CARG3 is BASE.
1098 | lea CARG2, [RA-16]
1099 | lea CARG3, [RA+NARGS:RD*8-8]
1100 |.endif
1101 | mov SAVE_PC, PC
1102 | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
1103 | mov RA, RB
1104 | mov L:RB, SAVE_L
1105 | mov BASE, L:RB->base
1106 | mov NARGS:RDd, TMP1d
1107 | mov LFUNC:RB, [RA-16]
1108 | add NARGS:RDd, 1
1109 | // This is fragile. L->base must not move, KBASE must always be defined.
1110 | cmp KBASE, BASE // Continue with CALLT if flag set.
1111 | je ->BC_CALLT_Z
1112 | cleartp LFUNC:RB
1113 | mov BASE, RA
1114 | ins_call // Otherwise call resolved metamethod.
1115 |
1116 |//-- Argument coercion for 'for' statement ------------------------------
1117 |
1118 |->vmeta_for:
1119 | mov L:RB, SAVE_L
1120 | mov L:RB->base, BASE
1121 | mov CARG2, RA // Caveat: CARG2 == BASE
1122 | mov L:CARG1, L:RB // Caveat: CARG1 == RA
1123 | mov SAVE_PC, PC
1124 | call extern lj_meta_for // (lua_State *L, TValue *base)
1125 | mov BASE, L:RB->base
1126 | mov RCd, [PC-4]
1127 | movzx RAd, RCH
1128 | movzx OP, RCL
1129 | shr RCd, 16
1130 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Retry FORI or JFORI.
1131 |
1132 |//-----------------------------------------------------------------------
1133 |//-- Fast functions -----------------------------------------------------
1134 |//-----------------------------------------------------------------------
1135 |
1136 |.macro .ffunc, name
1137 |->ff_ .. name:
1138 |.endmacro
1139 |
1140 |.macro .ffunc_1, name
1141 |->ff_ .. name:
1142 | cmp NARGS:RDd, 1+1; jb ->fff_fallback
1143 |.endmacro
1144 |
1145 |.macro .ffunc_2, name
1146 |->ff_ .. name:
1147 | cmp NARGS:RDd, 2+1; jb ->fff_fallback
1148 |.endmacro
1149 |
1150 |.macro .ffunc_n, name, op
1151 | .ffunc_1 name
1152 | checknumtp [BASE], ->fff_fallback
1153 | op xmm0, qword [BASE]
1154 |.endmacro
1155 |
1156 |.macro .ffunc_n, name
1157 | .ffunc_n name, movsd
1158 |.endmacro
1159 |
1160 |.macro .ffunc_nn, name
1161 | .ffunc_2 name
1162 | checknumtp [BASE], ->fff_fallback
1163 | checknumtp [BASE+8], ->fff_fallback
1164 | movsd xmm0, qword [BASE]
1165 | movsd xmm1, qword [BASE+8]
1166 |.endmacro
1167 |
1168 |// Inlined GC threshold check. Caveat: uses label 1.
1169 |.macro ffgccheck
1170 | mov RB, [DISPATCH+DISPATCH_GL(gc.total)]
1171 | cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)]
1172 | jb >1
1173 | call ->fff_gcstep
1174 |1:
1175 |.endmacro
1176 |
1177 |//-- Base library: checks -----------------------------------------------
1178 |
1179 |.ffunc_1 assert
1180 | mov ITYPE, [BASE]
1181 | mov RB, ITYPE
1182 | sar ITYPE, 47
1183 | cmp ITYPEd, LJ_TISTRUECOND; jae ->fff_fallback
1184 | mov PC, [BASE-8]
1185 | mov MULTRES, RDd
1186 | mov RB, [BASE]
1187 | mov [BASE-16], RB
1188 | sub RDd, 2
1189 | jz >2
1190 | mov RA, BASE
1191 |1:
1192 | add RA, 8
1193 | mov RB, [RA]
1194 | mov [RA-16], RB
1195 | sub RDd, 1
1196 | jnz <1
1197 |2:
1198 | mov RDd, MULTRES
1199 | jmp ->fff_res_
1200 |
1201 |.ffunc_1 type
1202 | mov RC, [BASE]
1203 | sar RC, 47
1204 | mov RBd, LJ_TISNUM
1205 | cmp RCd, RBd
1206 | cmovb RCd, RBd
1207 | not RCd
1208 |2:
1209 | mov CFUNC:RB, [BASE-16]
1210 | cleartp CFUNC:RB
1211 | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))]
1212 | mov PC, [BASE-8]
1213 | settp STR:RC, LJ_TSTR
1214 | mov [BASE-16], STR:RC
1215 | jmp ->fff_res1
1216 |
1217 |//-- Base library: getters and setters ---------------------------------
1218 |
1219 |.ffunc_1 getmetatable
1220 | mov TAB:RB, [BASE]
1221 | mov PC, [BASE-8]
1222 | checktab TAB:RB, >6
1223 |1: // Field metatable must be at same offset for GCtab and GCudata!
1224 | mov TAB:RB, TAB:RB->metatable
1225 |2:
1226 | test TAB:RB, TAB:RB
1227 | mov aword [BASE-16], LJ_TNIL
1228 | jz ->fff_res1
1229 | settp TAB:RC, TAB:RB, LJ_TTAB
1230 | mov [BASE-16], TAB:RC // Store metatable as default result.
1231 | mov STR:RC, [DISPATCH+DISPATCH_GL(gcroot)+8*(GCROOT_MMNAME+MM_metatable)]
1232 | mov RAd, TAB:RB->hmask
1233 | and RAd, STR:RC->hash
1234 | settp STR:RC, LJ_TSTR
1235 | imul RAd, #NODE
1236 | add NODE:RA, TAB:RB->node
1237 |3: // Rearranged logic, because we expect _not_ to find the key.
1238 | cmp NODE:RA->key, STR:RC
1239 | je >5
1240 |4:
1241 | mov NODE:RA, NODE:RA->next
1242 | test NODE:RA, NODE:RA
1243 | jnz <3
1244 | jmp ->fff_res1 // Not found, keep default result.
1245 |5:
1246 | mov RB, NODE:RA->val
1247 | cmp RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value.
1248 | mov [BASE-16], RB // Return value of mt.__metatable.
1249 | jmp ->fff_res1
1250 |
1251 |6:
1252 | cmp ITYPEd, LJ_TUDATA; je <1
1253 | cmp ITYPEd, LJ_TISNUM; ja >7
1254 | mov ITYPEd, LJ_TISNUM
1255 |7:
1256 | not ITYPEd
1257 | mov TAB:RB, [DISPATCH+ITYPE*8+DISPATCH_GL(gcroot[GCROOT_BASEMT])]
1258 | jmp <2
1259 |
1260 |.ffunc_2 setmetatable
1261 | mov TAB:RB, [BASE]
1262 | mov TAB:TMPR, TAB:RB
1263 | checktab TAB:RB, ->fff_fallback
1264 | // Fast path: no mt for table yet and not clearing the mt.
1265 | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback
1266 | mov TAB:RA, [BASE+8]
1267 | checktab TAB:RA, ->fff_fallback
1268 | mov TAB:RB->metatable, TAB:RA
1269 | mov PC, [BASE-8]
1270 | mov [BASE-16], TAB:TMPR // Return original table.
1271 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
1272 | jz >1
1273 | // Possible write barrier. Table is black, but skip iswhite(mt) check.
1274 | barrierback TAB:RB, RC
1275 |1:
1276 | jmp ->fff_res1
1277 |
1278 |.ffunc_2 rawget
1279 |.if X64WIN
1280 | mov TAB:RA, [BASE]
1281 | checktab TAB:RA, ->fff_fallback
1282 | mov RB, BASE // Save BASE.
1283 | lea CARG3, [BASE+8]
1284 | mov CARG2, TAB:RA // Caveat: CARG2 == BASE.
1285 | mov CARG1, SAVE_L
1286 |.else
1287 | mov TAB:CARG2, [BASE]
1288 | checktab TAB:CARG2, ->fff_fallback
1289 | mov RB, BASE // Save BASE.
1290 | lea CARG3, [BASE+8] // Caveat: CARG3 == BASE.
1291 | mov CARG1, SAVE_L
1292 |.endif
1293 | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1294 | // cTValue * returned in eax (RD).
1295 | mov BASE, RB // Restore BASE.
1296 | // Copy table slot.
1297 | mov RB, [RD]
1298 | mov PC, [BASE-8]
1299 | mov [BASE-16], RB
1300 | jmp ->fff_res1
1301 |
1302 |//-- Base library: conversions ------------------------------------------
1303 |
1304 |.ffunc tonumber
1305 | // Only handles the number case inline (without a base argument).
1306 | cmp NARGS:RDd, 1+1; jne ->fff_fallback // Exactly one argument.
1307 | mov RB, [BASE]
1308 | checknumber RB, ->fff_fallback
1309 | mov PC, [BASE-8]
1310 | mov [BASE-16], RB
1311 | jmp ->fff_res1
1312 |
1313 |.ffunc_1 tostring
1314 | // Only handles the string or number case inline.
1315 | mov PC, [BASE-8]
1316 | mov STR:RB, [BASE]
1317 | checktp_nc STR:RB, LJ_TSTR, >3
1318 | // A __tostring method in the string base metatable is ignored.
1319 |2:
1320 | mov [BASE-16], STR:RB
1321 | jmp ->fff_res1
1322 |3: // Handle numbers inline, unless a number base metatable is present.
1323 | cmp ITYPEd, LJ_TISNUM; ja ->fff_fallback_1
1324 | cmp aword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0
1325 | jne ->fff_fallback
1326 | ffgccheck // Caveat: uses label 1.
1327 | mov L:RB, SAVE_L
1328 | mov L:RB->base, BASE // Add frame since C call can throw.
1329 | mov SAVE_PC, PC // Redundant (but a defined value).
1330 |.if not X64WIN
1331 | mov CARG2, BASE // Otherwise: CARG2 == BASE
1332 |.endif
1333 | mov L:CARG1, L:RB
1334 |.if DUALNUM
1335 | call extern lj_strfmt_number // (lua_State *L, cTValue *o)
1336 |.else
1337 | call extern lj_strfmt_num // (lua_State *L, lua_Number *np)
1338 |.endif
1339 | // GCstr returned in eax (RD).
1340 | mov BASE, L:RB->base
1341 | settp STR:RB, RD, LJ_TSTR
1342 | jmp <2
1343 |
1344 |//-- Base library: iterators -------------------------------------------
1345 |
1346 |.ffunc_1 next
1347 | je >2 // Missing 2nd arg?
1348 |1:
1349 |.if X64WIN
1350 | mov RA, [BASE]
1351 | checktab RA, ->fff_fallback
1352 |.else
1353 | mov CARG2, [BASE]
1354 | checktab CARG2, ->fff_fallback
1355 |.endif
1356 | mov L:RB, SAVE_L
1357 | mov L:RB->base, BASE // Add frame since C call can throw.
1358 | mov L:RB->top, BASE // Dummy frame length is ok.
1359 | mov PC, [BASE-8]
1360 |.if X64WIN
1361 | lea CARG3, [BASE+8]
1362 | mov CARG2, RA // Caveat: CARG2 == BASE.
1363 | mov CARG1, L:RB
1364 |.else
1365 | lea CARG3, [BASE+8] // Caveat: CARG3 == BASE.
1366 | mov CARG1, L:RB
1367 |.endif
1368 | mov SAVE_PC, PC // Needed for ITERN fallback.
1369 | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
1370 | // Flag returned in eax (RD).
1371 | mov BASE, L:RB->base
1372 | test RDd, RDd; jz >3 // End of traversal?
1373 | // Copy key and value to results.
1374 | mov RB, [BASE+8]
1375 | mov RD, [BASE+16]
1376 | mov [BASE-16], RB
1377 | mov [BASE-8], RD
1378 |->fff_res2:
1379 | mov RDd, 1+2
1380 | jmp ->fff_res
1381 |2: // Set missing 2nd arg to nil.
1382 | mov aword [BASE+8], LJ_TNIL
1383 | jmp <1
1384 |3: // End of traversal: return nil.
1385 | mov aword [BASE-16], LJ_TNIL
1386 | jmp ->fff_res1
1387 |
1388 |.ffunc_1 pairs
1389 | mov TAB:RB, [BASE]
1390 | mov TMPR, TAB:RB
1391 | checktab TAB:RB, ->fff_fallback
1392#if LJ_52
1393 | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback
1394#endif
1395 | mov CFUNC:RD, [BASE-16]
1396 | cleartp CFUNC:RD
1397 | mov CFUNC:RD, CFUNC:RD->upvalue[0]
1398 | settp CFUNC:RD, LJ_TFUNC
1399 | mov PC, [BASE-8]
1400 | mov [BASE-16], CFUNC:RD
1401 | mov [BASE-8], TMPR
1402 | mov aword [BASE], LJ_TNIL
1403 | mov RDd, 1+3
1404 | jmp ->fff_res
1405 |
1406 |.ffunc_2 ipairs_aux
1407 | mov TAB:RB, [BASE]
1408 | checktab TAB:RB, ->fff_fallback
1409 |.if DUALNUM
1410 | mov RA, [BASE+8]
1411 | checkint RA, ->fff_fallback
1412 |.else
1413 | checknumtp [BASE+8], ->fff_fallback
1414 | movsd xmm0, qword [BASE+8]
1415 |.endif
1416 | mov PC, [BASE-8]
1417 |.if DUALNUM
1418 | add RAd, 1
1419 | setint ITYPE, RA
1420 | mov [BASE-16], ITYPE
1421 |.else
1422 | sseconst_1 xmm1, TMPR
1423 | addsd xmm0, xmm1
1424 | cvttsd2si RAd, xmm0
1425 | movsd qword [BASE-16], xmm0
1426 |.endif
1427 | cmp RAd, TAB:RB->asize; jae >2 // Not in array part?
1428 | mov RD, TAB:RB->array
1429 | lea RD, [RD+RA*8]
1430 |1:
1431 | cmp aword [RD], LJ_TNIL; je ->fff_res0
1432 | // Copy array slot.
1433 | mov RB, [RD]
1434 | mov [BASE-8], RB
1435 | jmp ->fff_res2
1436 |2: // Check for empty hash part first. Otherwise call C function.
1437 | cmp dword TAB:RB->hmask, 0; je ->fff_res0
1438 |.if X64WIN
1439 | mov TMPR, BASE
1440 | mov CARG2d, RAd
1441 | mov CARG1, TAB:RB
1442 | mov RB, TMPR
1443 |.else
1444 | mov CARG1, TAB:RB
1445 | mov RB, BASE // Save BASE.
1446 | mov CARG2d, RAd // Caveat: CARG2 == BASE
1447 |.endif
1448 | call extern lj_tab_getinth // (GCtab *t, int32_t key)
1449 | // cTValue * or NULL returned in eax (RD).
1450 | mov BASE, RB
1451 | test RD, RD
1452 | jnz <1
1453 |->fff_res0:
1454 | mov RDd, 1+0
1455 | jmp ->fff_res
1456 |
1457 |.ffunc_1 ipairs
1458 | mov TAB:RB, [BASE]
1459 | mov TMPR, TAB:RB
1460 | checktab TAB:RB, ->fff_fallback
1461#if LJ_52
1462 | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback
1463#endif
1464 | mov CFUNC:RD, [BASE-16]
1465 | cleartp CFUNC:RD
1466 | mov CFUNC:RD, CFUNC:RD->upvalue[0]
1467 | settp CFUNC:RD, LJ_TFUNC
1468 | mov PC, [BASE-8]
1469 | mov [BASE-16], CFUNC:RD
1470 | mov [BASE-8], TMPR
1471 |.if DUALNUM
1472 | mov64 RD, ((uint64_t)LJ_TISNUM<<47)
1473 | mov [BASE], RD
1474 |.else
1475 | mov qword [BASE], 0
1476 |.endif
1477 | mov RDd, 1+3
1478 | jmp ->fff_res
1479 |
1480 |//-- Base library: catch errors ----------------------------------------
1481 |
1482 |.ffunc_1 pcall
1483 | lea RA, [BASE+16]
1484 | sub NARGS:RDd, 1
1485 | mov PCd, 16+FRAME_PCALL
1486 |1:
1487 | movzx RBd, byte [DISPATCH+DISPATCH_GL(hookmask)]
1488 | shr RB, HOOK_ACTIVE_SHIFT
1489 | and RB, 1
1490 | add PC, RB // Remember active hook before pcall.
1491 | // Note: this does a (harmless) copy of the function to the PC slot, too.
1492 | mov KBASE, RD
1493 |2:
1494 | mov RB, [RA+KBASE*8-24]
1495 | mov [RA+KBASE*8-16], RB
1496 | sub KBASE, 1
1497 | ja <2
1498 | jmp ->vm_call_dispatch
1499 |
1500 |.ffunc_2 xpcall
1501 | mov LFUNC:RA, [BASE+8]
1502 | checktp_nc LFUNC:RA, LJ_TFUNC, ->fff_fallback
1503 | mov LFUNC:RB, [BASE] // Swap function and traceback.
1504 | mov [BASE], LFUNC:RA
1505 | mov [BASE+8], LFUNC:RB
1506 | lea RA, [BASE+24]
1507 | sub NARGS:RDd, 2
1508 | mov PCd, 24+FRAME_PCALL
1509 | jmp <1
1510 |
1511 |//-- Coroutine library --------------------------------------------------
1512 |
1513 |.macro coroutine_resume_wrap, resume
1514 |.if resume
1515 |.ffunc_1 coroutine_resume
1516 | mov L:RB, [BASE]
1517 | cleartp L:RB
1518 |.else
1519 |.ffunc coroutine_wrap_aux
1520 | mov CFUNC:RB, [BASE-16]
1521 | cleartp CFUNC:RB
1522 | mov L:RB, CFUNC:RB->upvalue[0].gcr
1523 | cleartp L:RB
1524 |.endif
1525 | mov PC, [BASE-8]
1526 | mov SAVE_PC, PC
1527 | mov TMP1, L:RB
1528 |.if resume
1529 | checktptp [BASE], LJ_TTHREAD, ->fff_fallback
1530 |.endif
1531 | cmp aword L:RB->cframe, 0; jne ->fff_fallback
1532 | cmp byte L:RB->status, LUA_YIELD; ja ->fff_fallback
1533 | mov RA, L:RB->top
1534 | je >1 // Status != LUA_YIELD (i.e. 0)?
1535 | cmp RA, L:RB->base // Check for presence of initial func.
1536 | je ->fff_fallback
1537 | mov PC, [RA-8] // Move initial function up.
1538 | mov [RA], PC
1539 | add RA, 8
1540 |1:
1541 |.if resume
1542 | lea PC, [RA+NARGS:RD*8-16] // Check stack space (-1-thread).
1543 |.else
1544 | lea PC, [RA+NARGS:RD*8-8] // Check stack space (-1).
1545 |.endif
1546 | cmp PC, L:RB->maxstack; ja ->fff_fallback
1547 | mov L:RB->top, PC
1548 |
1549 | mov L:RB, SAVE_L
1550 | mov L:RB->base, BASE
1551 |.if resume
1552 | add BASE, 8 // Keep resumed thread in stack for GC.
1553 |.endif
1554 | mov L:RB->top, BASE
1555 |.if resume
1556 | lea RB, [BASE+NARGS:RD*8-24] // RB = end of source for stack move.
1557 |.else
1558 | lea RB, [BASE+NARGS:RD*8-16] // RB = end of source for stack move.
1559 |.endif
1560 | sub RB, PC // Relative to PC.
1561 |
1562 | cmp PC, RA
1563 | je >3
1564 |2: // Move args to coroutine.
1565 | mov RC, [PC+RB]
1566 | mov [PC-8], RC
1567 | sub PC, 8
1568 | cmp PC, RA
1569 | jne <2
1570 |3:
1571 | mov CARG2, RA
1572 | mov CARG1, TMP1
1573 | call ->vm_resume // (lua_State *L, TValue *base, 0, 0)
1574 |
1575 | mov L:RB, SAVE_L
1576 | mov L:PC, TMP1
1577 | mov BASE, L:RB->base
1578 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
1579 | set_vmstate INTERP
1580 |
1581 | cmp eax, LUA_YIELD
1582 | ja >8
1583 |4:
1584 | mov RA, L:PC->base
1585 | mov KBASE, L:PC->top
1586 | mov L:PC->top, RA // Clear coroutine stack.
1587 | mov PC, KBASE
1588 | sub PC, RA
1589 | je >6 // No results?
1590 | lea RD, [BASE+PC]
1591 | shr PCd, 3
1592 | cmp RD, L:RB->maxstack
1593 | ja >9 // Need to grow stack?
1594 |
1595 | mov RB, BASE
1596 | sub RB, RA
1597 |5: // Move results from coroutine.
1598 | mov RD, [RA]
1599 | mov [RA+RB], RD
1600 | add RA, 8
1601 | cmp RA, KBASE
1602 | jne <5
1603 |6:
1604 |.if resume
1605 | lea RDd, [PCd+2] // nresults+1 = 1 + true + results.
1606 | mov_true ITYPE // Prepend true to results.
1607 | mov [BASE-8], ITYPE
1608 |.else
1609 | lea RDd, [PCd+1] // nresults+1 = 1 + results.
1610 |.endif
1611 |7:
1612 | mov PC, SAVE_PC
1613 | mov MULTRES, RDd
1614 |.if resume
1615 | mov RA, -8
1616 |.else
1617 | xor RAd, RAd
1618 |.endif
1619 | test PCd, FRAME_TYPE
1620 | jz ->BC_RET_Z
1621 | jmp ->vm_return
1622 |
1623 |8: // Coroutine returned with error (at co->top-1).
1624 |.if resume
1625 | mov_false ITYPE // Prepend false to results.
1626 | mov [BASE-8], ITYPE
1627 | mov RA, L:PC->top
1628 | sub RA, 8
1629 | mov L:PC->top, RA // Clear error from coroutine stack.
1630 | // Copy error message.
1631 | mov RD, [RA]
1632 | mov [BASE], RD
1633 | mov RDd, 1+2 // nresults+1 = 1 + false + error.
1634 | jmp <7
1635 |.else
1636 | mov CARG2, L:PC
1637 | mov CARG1, L:RB
1638 | call extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co)
1639 | // Error function does not return.
1640 |.endif
1641 |
1642 |9: // Handle stack expansion on return from yield.
1643 | mov L:RA, TMP1
1644 | mov L:RA->top, KBASE // Undo coroutine stack clearing.
1645 | mov CARG2, PC
1646 | mov CARG1, L:RB
1647 | call extern lj_state_growstack // (lua_State *L, int n)
1648 | mov L:PC, TMP1
1649 | mov BASE, L:RB->base
1650 | jmp <4 // Retry the stack move.
1651 |.endmacro
1652 |
1653 | coroutine_resume_wrap 1 // coroutine.resume
1654 | coroutine_resume_wrap 0 // coroutine.wrap
1655 |
1656 |.ffunc coroutine_yield
1657 | mov L:RB, SAVE_L
1658 | test aword L:RB->cframe, CFRAME_RESUME
1659 | jz ->fff_fallback
1660 | mov L:RB->base, BASE
1661 | lea RD, [BASE+NARGS:RD*8-8]
1662 | mov L:RB->top, RD
1663 | xor RDd, RDd
1664 | mov aword L:RB->cframe, RD
1665 | mov al, LUA_YIELD
1666 | mov byte L:RB->status, al
1667 | jmp ->vm_leave_unw
1668 |
1669 |//-- Math library -------------------------------------------------------
1670 |
1671 | .ffunc_1 math_abs
1672 | mov RB, [BASE]
1673 |.if DUALNUM
1674 | checkint RB, >3
1675 | cmp RBd, 0; jns ->fff_resi
1676 | neg RBd; js >2
1677 |->fff_resbit:
1678 |->fff_resi:
1679 | setint RB
1680 |->fff_resRB:
1681 | mov PC, [BASE-8]
1682 | mov [BASE-16], RB
1683 | jmp ->fff_res1
1684 |2:
1685 | mov64 RB, U64x(41e00000,00000000) // 2^31.
1686 | jmp ->fff_resRB
1687 |3:
1688 | ja ->fff_fallback
1689 |.else
1690 | checknum RB, ->fff_fallback
1691 |.endif
1692 | shl RB, 1
1693 | shr RB, 1
1694 | mov PC, [BASE-8]
1695 | mov [BASE-16], RB
1696 | jmp ->fff_res1
1697 |
1698 |.ffunc_n math_sqrt, sqrtsd
1699 |->fff_resxmm0:
1700 | mov PC, [BASE-8]
1701 | movsd qword [BASE-16], xmm0
1702 | // fallthrough
1703 |
1704 |->fff_res1:
1705 | mov RDd, 1+1
1706 |->fff_res:
1707 | mov MULTRES, RDd
1708 |->fff_res_:
1709 | test PCd, FRAME_TYPE
1710 | jnz >7
1711 |5:
1712 | cmp PC_RB, RDL // More results expected?
1713 | ja >6
1714 | // Adjust BASE. KBASE is assumed to be set for the calling frame.
1715 | movzx RAd, PC_RA
1716 | neg RA
1717 | lea BASE, [BASE+RA*8-16] // base = base - (RA+2)*8
1718 | ins_next
1719 |
1720 |6: // Fill up results with nil.
1721 | mov aword [BASE+RD*8-24], LJ_TNIL
1722 | add RD, 1
1723 | jmp <5
1724 |
1725 |7: // Non-standard return case.
1726 | mov RA, -16 // Results start at BASE+RA = BASE-16.
1727 | jmp ->vm_return
1728 |
1729 |.macro math_round, func
1730 | .ffunc math_ .. func
1731 |.if DUALNUM
1732 | mov RB, [BASE]
1733 | checknumx RB, ->fff_resRB, je
1734 | ja ->fff_fallback
1735 |.else
1736 | checknumtp [BASE], ->fff_fallback
1737 |.endif
1738 | movsd xmm0, qword [BASE]
1739 | call ->vm_ .. func .. _sse
1740 |.if DUALNUM
1741 | cvttsd2si RBd, xmm0
1742 | cmp RBd, 0x80000000
1743 | jne ->fff_resi
1744 | cvtsi2sd xmm1, RBd
1745 | ucomisd xmm0, xmm1
1746 | jp ->fff_resxmm0
1747 | je ->fff_resi
1748 |.endif
1749 | jmp ->fff_resxmm0
1750 |.endmacro
1751 |
1752 | math_round floor
1753 | math_round ceil
1754 |
1755 |.ffunc math_log
1756 | cmp NARGS:RDd, 1+1; jne ->fff_fallback // Exactly one argument.
1757 | checknumtp [BASE], ->fff_fallback
1758 | movsd xmm0, qword [BASE]
1759 | mov RB, BASE
1760 | call extern log
1761 | mov BASE, RB
1762 | jmp ->fff_resxmm0
1763 |
1764 |.macro math_extern, func
1765 | .ffunc_n math_ .. func
1766 | mov RB, BASE
1767 | call extern func
1768 | mov BASE, RB
1769 | jmp ->fff_resxmm0
1770 |.endmacro
1771 |
1772 |.macro math_extern2, func
1773 | .ffunc_nn math_ .. func
1774 | mov RB, BASE
1775 | call extern func
1776 | mov BASE, RB
1777 | jmp ->fff_resxmm0
1778 |.endmacro
1779 |
1780 | math_extern log10
1781 | math_extern exp
1782 | math_extern sin
1783 | math_extern cos
1784 | math_extern tan
1785 | math_extern asin
1786 | math_extern acos
1787 | math_extern atan
1788 | math_extern sinh
1789 | math_extern cosh
1790 | math_extern tanh
1791 | math_extern2 pow
1792 | math_extern2 atan2
1793 | math_extern2 fmod
1794 |
1795 |.ffunc_2 math_ldexp
1796 | checknumtp [BASE], ->fff_fallback
1797 | checknumtp [BASE+8], ->fff_fallback
1798 | fld qword [BASE+8]
1799 | fld qword [BASE]
1800 | fscale
1801 | fpop1
1802 | mov PC, [BASE-8]
1803 | fstp qword [BASE-16]
1804 | jmp ->fff_res1
1805 |
1806 |.ffunc_n math_frexp
1807 | mov RB, BASE
1808 |.if X64WIN
1809 | lea CARG2, TMP1 // Caveat: CARG2 == BASE
1810 |.else
1811 | lea CARG1, TMP1
1812 |.endif
1813 | call extern frexp
1814 | mov BASE, RB
1815 | mov RBd, TMP1d
1816 | mov PC, [BASE-8]
1817 | movsd qword [BASE-16], xmm0
1818 |.if DUALNUM
1819 | setint RB
1820 | mov [BASE-8], RB
1821 |.else
1822 | cvtsi2sd xmm1, RBd
1823 | movsd qword [BASE-8], xmm1
1824 |.endif
1825 | mov RDd, 1+2
1826 | jmp ->fff_res
1827 |
1828 |.ffunc_n math_modf
1829 | mov RB, BASE
1830 |.if X64WIN
1831 | lea CARG2, [BASE-16] // Caveat: CARG2 == BASE
1832 |.else
1833 | lea CARG1, [BASE-16]
1834 |.endif
1835 | call extern modf
1836 | mov BASE, RB
1837 | mov PC, [BASE-8]
1838 | movsd qword [BASE-8], xmm0
1839 | mov RDd, 1+2
1840 | jmp ->fff_res
1841 |
1842 |.macro math_minmax, name, cmovop, sseop
1843 | .ffunc name
1844 | mov RAd, 2
1845 |.if DUALNUM
1846 | mov RB, [BASE]
1847 | checkint RB, >4
1848 |1: // Handle integers.
1849 | cmp RAd, RDd; jae ->fff_resRB
1850 | mov TMPR, [BASE+RA*8-8]
1851 | checkint TMPR, >3
1852 | cmp RBd, TMPRd
1853 | cmovop RB, TMPR
1854 | add RAd, 1
1855 | jmp <1
1856 |3:
1857 | ja ->fff_fallback
1858 | // Convert intermediate result to number and continue below.
1859 | cvtsi2sd xmm0, RBd
1860 | jmp >6
1861 |4:
1862 | ja ->fff_fallback
1863 |.else
1864 | checknumtp [BASE], ->fff_fallback
1865 |.endif
1866 |
1867 | movsd xmm0, qword [BASE]
1868 |5: // Handle numbers or integers.
1869 | cmp RAd, RDd; jae ->fff_resxmm0
1870 |.if DUALNUM
1871 | mov RB, [BASE+RA*8-8]
1872 | checknumx RB, >6, jb
1873 | ja ->fff_fallback
1874 | cvtsi2sd xmm1, RBd
1875 | jmp >7
1876 |.else
1877 | checknumtp [BASE+RA*8-8], ->fff_fallback
1878 |.endif
1879 |6:
1880 | movsd xmm1, qword [BASE+RA*8-8]
1881 |7:
1882 | sseop xmm0, xmm1
1883 | add RAd, 1
1884 | jmp <5
1885 |.endmacro
1886 |
1887 | math_minmax math_min, cmovg, minsd
1888 | math_minmax math_max, cmovl, maxsd
1889 |
1890 |//-- String library -----------------------------------------------------
1891 |
1892 |.ffunc string_byte // Only handle the 1-arg case here.
1893 | cmp NARGS:RDd, 1+1; jne ->fff_fallback
1894 | mov STR:RB, [BASE]
1895 | checkstr STR:RB, ->fff_fallback
1896 | mov PC, [BASE-8]
1897 | cmp dword STR:RB->len, 1
1898 | jb ->fff_res0 // Return no results for empty string.
1899 | movzx RBd, byte STR:RB[1]
1900 |.if DUALNUM
1901 | jmp ->fff_resi
1902 |.else
1903 | cvtsi2sd xmm0, RBd; jmp ->fff_resxmm0
1904 |.endif
1905 |
1906 |.ffunc string_char // Only handle the 1-arg case here.
1907 | ffgccheck
1908 | cmp NARGS:RDd, 1+1; jne ->fff_fallback // *Exactly* 1 arg.
1909 |.if DUALNUM
1910 | mov RB, [BASE]
1911 | checkint RB, ->fff_fallback
1912 |.else
1913 | checknumtp [BASE], ->fff_fallback
1914 | cvttsd2si RBd, qword [BASE]
1915 |.endif
1916 | cmp RBd, 255; ja ->fff_fallback
1917 | mov TMP1d, RBd
1918 | mov TMPRd, 1
1919 | lea RD, TMP1 // Points to stack. Little-endian.
1920 |->fff_newstr:
1921 | mov L:RB, SAVE_L
1922 | mov L:RB->base, BASE
1923 | mov CARG3d, TMPRd // Zero-extended to size_t.
1924 | mov CARG2, RD
1925 | mov CARG1, L:RB
1926 | mov SAVE_PC, PC
1927 | call extern lj_str_new // (lua_State *L, char *str, size_t l)
1928 |->fff_resstr:
1929 | // GCstr * returned in eax (RD).
1930 | mov BASE, L:RB->base
1931 | mov PC, [BASE-8]
1932 | settp STR:RD, LJ_TSTR
1933 | mov [BASE-16], STR:RD
1934 | jmp ->fff_res1
1935 |
1936 |.ffunc string_sub
1937 | ffgccheck
1938 | mov TMPRd, -1
1939 | cmp NARGS:RDd, 1+2; jb ->fff_fallback
1940 | jna >1
1941 |.if DUALNUM
1942 | mov TMPR, [BASE+16]
1943 | checkint TMPR, ->fff_fallback
1944 |.else
1945 | checknumtp [BASE+16], ->fff_fallback
1946 | cvttsd2si TMPRd, qword [BASE+16]
1947 |.endif
1948 |1:
1949 | mov STR:RB, [BASE]
1950 | checkstr STR:RB, ->fff_fallback
1951 |.if DUALNUM
1952 | mov ITYPE, [BASE+8]
1953 | mov RAd, ITYPEd // Must clear hiword for lea below.
1954 | sar ITYPE, 47
1955 | cmp ITYPEd, LJ_TISNUM
1956 | jne ->fff_fallback
1957 |.else
1958 | checknumtp [BASE+8], ->fff_fallback
1959 | cvttsd2si RAd, qword [BASE+8]
1960 |.endif
1961 | mov RCd, STR:RB->len
1962 | cmp RCd, TMPRd // len < end? (unsigned compare)
1963 | jb >5
1964 |2:
1965 | test RAd, RAd // start <= 0?
1966 | jle >7
1967 |3:
1968 | sub TMPRd, RAd // start > end?
1969 | jl ->fff_emptystr
1970 | lea RD, [STR:RB+RAd+#STR-1]
1971 | add TMPRd, 1
1972 |4:
1973 | jmp ->fff_newstr
1974 |
1975 |5: // Negative end or overflow.
1976 | jl >6
1977 | lea TMPRd, [TMPRd+RCd+1] // end = end+(len+1)
1978 | jmp <2
1979 |6: // Overflow.
1980 | mov TMPRd, RCd // end = len
1981 | jmp <2
1982 |
1983 |7: // Negative start or underflow.
1984 | je >8
1985 | add RAd, RCd // start = start+(len+1)
1986 | add RAd, 1
1987 | jg <3 // start > 0?
1988 |8: // Underflow.
1989 | mov RAd, 1 // start = 1
1990 | jmp <3
1991 |
1992 |->fff_emptystr: // Range underflow.
1993 | xor TMPRd, TMPRd // Zero length. Any ptr in RD is ok.
1994 | jmp <4
1995 |
1996 |.macro ffstring_op, name
1997 | .ffunc_1 string_ .. name
1998 | ffgccheck
1999 |.if X64WIN
2000 | mov STR:TMPR, [BASE]
2001 | checkstr STR:TMPR, ->fff_fallback
2002 |.else
2003 | mov STR:CARG2, [BASE]
2004 | checkstr STR:CARG2, ->fff_fallback
2005 |.endif
2006 | mov L:RB, SAVE_L
2007 | lea SBUF:CARG1, [DISPATCH+DISPATCH_GL(tmpbuf)]
2008 | mov L:RB->base, BASE
2009 |.if X64WIN
2010 | mov STR:CARG2, STR:TMPR // Caveat: CARG2 == BASE
2011 |.endif
2012 | mov RC, SBUF:CARG1->b
2013 | mov SBUF:CARG1->L, L:RB
2014 | mov SBUF:CARG1->p, RC
2015 | mov SAVE_PC, PC
2016 | call extern lj_buf_putstr_ .. name
2017 | mov CARG1, rax
2018 | call extern lj_buf_tostr
2019 | jmp ->fff_resstr
2020 |.endmacro
2021 |
2022 |ffstring_op reverse
2023 |ffstring_op lower
2024 |ffstring_op upper
2025 |
2026 |//-- Bit library --------------------------------------------------------
2027 |
2028 |.macro .ffunc_bit, name, kind, fdef
2029 | fdef name
2030 |.if kind == 2
2031 | sseconst_tobit xmm1, RB
2032 |.endif
2033 |.if DUALNUM
2034 | mov RB, [BASE]
2035 | checkint RB, >1
2036 |.if kind > 0
2037 | jmp >2
2038 |.else
2039 | jmp ->fff_resbit
2040 |.endif
2041 |1:
2042 | ja ->fff_fallback
2043 | movd xmm0, RB
2044 |.else
2045 | checknumtp [BASE], ->fff_fallback
2046 | movsd xmm0, qword [BASE]
2047 |.endif
2048 |.if kind < 2
2049 | sseconst_tobit xmm1, RB
2050 |.endif
2051 | addsd xmm0, xmm1
2052 | movd RBd, xmm0
2053 |2:
2054 |.endmacro
2055 |
2056 |.macro .ffunc_bit, name, kind
2057 | .ffunc_bit name, kind, .ffunc_1
2058 |.endmacro
2059 |
2060 |.ffunc_bit bit_tobit, 0
2061 | jmp ->fff_resbit
2062 |
2063 |.macro .ffunc_bit_op, name, ins
2064 | .ffunc_bit name, 2
2065 | mov TMPRd, NARGS:RDd // Save for fallback.
2066 | lea RD, [BASE+NARGS:RD*8-16]
2067 |1:
2068 | cmp RD, BASE
2069 | jbe ->fff_resbit
2070 |.if DUALNUM
2071 | mov RA, [RD]
2072 | checkint RA, >2
2073 | ins RBd, RAd
2074 | sub RD, 8
2075 | jmp <1
2076 |2:
2077 | ja ->fff_fallback_bit_op
2078 | movd xmm0, RA
2079 |.else
2080 | checknumtp [RD], ->fff_fallback_bit_op
2081 | movsd xmm0, qword [RD]
2082 |.endif
2083 | addsd xmm0, xmm1
2084 | movd RAd, xmm0
2085 | ins RBd, RAd
2086 | sub RD, 8
2087 | jmp <1
2088 |.endmacro
2089 |
2090 |.ffunc_bit_op bit_band, and
2091 |.ffunc_bit_op bit_bor, or
2092 |.ffunc_bit_op bit_bxor, xor
2093 |
2094 |.ffunc_bit bit_bswap, 1
2095 | bswap RBd
2096 | jmp ->fff_resbit
2097 |
2098 |.ffunc_bit bit_bnot, 1
2099 | not RBd
2100 |.if DUALNUM
2101 | jmp ->fff_resbit
2102 |.else
2103 |->fff_resbit:
2104 | cvtsi2sd xmm0, RBd
2105 | jmp ->fff_resxmm0
2106 |.endif
2107 |
2108 |->fff_fallback_bit_op:
2109 | mov NARGS:RDd, TMPRd // Restore for fallback
2110 | jmp ->fff_fallback
2111 |
2112 |.macro .ffunc_bit_sh, name, ins
2113 |.if DUALNUM
2114 | .ffunc_bit name, 1, .ffunc_2
2115 | // Note: no inline conversion from number for 2nd argument!
2116 | mov RA, [BASE+8]
2117 | checkint RA, ->fff_fallback
2118 |.else
2119 | .ffunc_nn name
2120 | sseconst_tobit xmm2, RB
2121 | addsd xmm0, xmm2
2122 | addsd xmm1, xmm2
2123 | movd RBd, xmm0
2124 | movd RAd, xmm1
2125 |.endif
2126 | ins RBd, cl // Assumes RA is ecx.
2127 | jmp ->fff_resbit
2128 |.endmacro
2129 |
2130 |.ffunc_bit_sh bit_lshift, shl
2131 |.ffunc_bit_sh bit_rshift, shr
2132 |.ffunc_bit_sh bit_arshift, sar
2133 |.ffunc_bit_sh bit_rol, rol
2134 |.ffunc_bit_sh bit_ror, ror
2135 |
2136 |//-----------------------------------------------------------------------
2137 |
2138 |->fff_fallback_2:
2139 | mov NARGS:RDd, 1+2 // Other args are ignored, anyway.
2140 | jmp ->fff_fallback
2141 |->fff_fallback_1:
2142 | mov NARGS:RDd, 1+1 // Other args are ignored, anyway.
2143 |->fff_fallback: // Call fast function fallback handler.
2144 | // BASE = new base, RD = nargs+1
2145 | mov L:RB, SAVE_L
2146 | mov PC, [BASE-8] // Fallback may overwrite PC.
2147 | mov SAVE_PC, PC // Redundant (but a defined value).
2148 | mov L:RB->base, BASE
2149 | lea RD, [BASE+NARGS:RD*8-8]
2150 | lea RA, [RD+8*LUA_MINSTACK] // Ensure enough space for handler.
2151 | mov L:RB->top, RD
2152 | mov CFUNC:RD, [BASE-16]
2153 | cleartp CFUNC:RD
2154 | cmp RA, L:RB->maxstack
2155 | ja >5 // Need to grow stack.
2156 | mov CARG1, L:RB
2157 | call aword CFUNC:RD->f // (lua_State *L)
2158 | mov BASE, L:RB->base
2159 | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
2160 | test RDd, RDd; jg ->fff_res // Returned nresults+1?
2161 |1:
2162 | mov RA, L:RB->top
2163 | sub RA, BASE
2164 | shr RAd, 3
2165 | test RDd, RDd
2166 | lea NARGS:RDd, [RAd+1]
2167 | mov LFUNC:RB, [BASE-16]
2168 | jne ->vm_call_tail // Returned -1?
2169 | cleartp LFUNC:RB
2170 | ins_callt // Returned 0: retry fast path.
2171 |
2172 |// Reconstruct previous base for vmeta_call during tailcall.
2173 |->vm_call_tail:
2174 | mov RA, BASE
2175 | test PCd, FRAME_TYPE
2176 | jnz >3
2177 | movzx RBd, PC_RA
2178 | neg RB
2179 | lea BASE, [BASE+RB*8-16] // base = base - (RB+2)*8
2180 | jmp ->vm_call_dispatch // Resolve again for tailcall.
2181 |3:
2182 | mov RB, PC
2183 | and RB, -8
2184 | sub BASE, RB
2185 | jmp ->vm_call_dispatch // Resolve again for tailcall.
2186 |
2187 |5: // Grow stack for fallback handler.
2188 | mov CARG2d, LUA_MINSTACK
2189 | mov CARG1, L:RB
2190 | call extern lj_state_growstack // (lua_State *L, int n)
2191 | mov BASE, L:RB->base
2192 | xor RDd, RDd // Simulate a return 0.
2193 | jmp <1 // Dumb retry (goes through ff first).
2194 |
2195 |->fff_gcstep: // Call GC step function.
2196 | // BASE = new base, RD = nargs+1
2197 | pop RB // Must keep stack at same level.
2198 | mov TMP1, RB // Save return address
2199 | mov L:RB, SAVE_L
2200 | mov SAVE_PC, PC // Redundant (but a defined value).
2201 | mov L:RB->base, BASE
2202 | lea RD, [BASE+NARGS:RD*8-8]
2203 | mov CARG1, L:RB
2204 | mov L:RB->top, RD
2205 | call extern lj_gc_step // (lua_State *L)
2206 | mov BASE, L:RB->base
2207 | mov RD, L:RB->top
2208 | sub RD, BASE
2209 | shr RDd, 3
2210 | add NARGS:RDd, 1
2211 | mov RB, TMP1
2212 | push RB // Restore return address.
2213 | ret
2214 |
2215 |//-----------------------------------------------------------------------
2216 |//-- Special dispatch targets -------------------------------------------
2217 |//-----------------------------------------------------------------------
2218 |
2219 |->vm_record: // Dispatch target for recording phase.
2220 |.if JIT
2221 | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
2222 | test RDL, HOOK_VMEVENT // No recording while in vmevent.
2223 | jnz >5
2224 | // Decrement the hookcount for consistency, but always do the call.
2225 | test RDL, HOOK_ACTIVE
2226 | jnz >1
2227 | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
2228 | jz >1
2229 | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
2230 | jmp >1
2231 |.endif
2232 |
2233 |->vm_rethook: // Dispatch target for return hooks.
2234 | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
2235 | test RDL, HOOK_ACTIVE // Hook already active?
2236 | jnz >5
2237 | jmp >1
2238 |
2239 |->vm_inshook: // Dispatch target for instr/line hooks.
2240 | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
2241 | test RDL, HOOK_ACTIVE // Hook already active?
2242 | jnz >5
2243 |
2244 | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
2245 | jz >5
2246 | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
2247 | jz >1
2248 | test RDL, LUA_MASKLINE
2249 | jz >5
2250 |1:
2251 | mov L:RB, SAVE_L
2252 | mov L:RB->base, BASE
2253 | mov CARG2, PC // Caveat: CARG2 == BASE
2254 | mov CARG1, L:RB
2255 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
2256 | call extern lj_dispatch_ins // (lua_State *L, const BCIns *pc)
2257 |3:
2258 | mov BASE, L:RB->base
2259 |4:
2260 | movzx RAd, PC_RA
2261 |5:
2262 | movzx OP, PC_OP
2263 | movzx RDd, PC_RD
2264 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Re-dispatch to static ins.
2265 |
2266 |->cont_hook: // Continue from hook yield.
2267 | add PC, 4
2268 | mov RA, [RB-40]
2269 | mov MULTRES, RAd // Restore MULTRES for *M ins.
2270 | jmp <4
2271 |
2272 |->vm_hotloop: // Hot loop counter underflow.
2273 |.if JIT
2274 | mov LFUNC:RB, [BASE-16] // Same as curr_topL(L).
2275 | cleartp LFUNC:RB
2276 | mov RB, LFUNC:RB->pc
2277 | movzx RDd, byte [RB+PC2PROTO(framesize)]
2278 | lea RD, [BASE+RD*8]
2279 | mov L:RB, SAVE_L
2280 | mov L:RB->base, BASE
2281 | mov L:RB->top, RD
2282 | mov CARG2, PC
2283 | lea CARG1, [DISPATCH+GG_DISP2J]
2284 | mov aword [DISPATCH+DISPATCH_J(L)], L:RB
2285 | mov SAVE_PC, PC
2286 | call extern lj_trace_hot // (jit_State *J, const BCIns *pc)
2287 | jmp <3
2288 |.endif
2289 |
2290 |->vm_callhook: // Dispatch target for call hooks.
2291 | mov SAVE_PC, PC
2292 |.if JIT
2293 | jmp >1
2294 |.endif
2295 |
2296 |->vm_hotcall: // Hot call counter underflow.
2297 |.if JIT
2298 | mov SAVE_PC, PC
2299 | or PC, 1 // Marker for hot call.
2300 |1:
2301 |.endif
2302 | lea RD, [BASE+NARGS:RD*8-8]
2303 | mov L:RB, SAVE_L
2304 | mov L:RB->base, BASE
2305 | mov L:RB->top, RD
2306 | mov CARG2, PC
2307 | mov CARG1, L:RB
2308 | call extern lj_dispatch_call // (lua_State *L, const BCIns *pc)
2309 | // ASMFunction returned in eax/rax (RD).
2310 | mov SAVE_PC, 0 // Invalidate for subsequent line hook.
2311 |.if JIT
2312 | and PC, -2
2313 |.endif
2314 | mov BASE, L:RB->base
2315 | mov RA, RD
2316 | mov RD, L:RB->top
2317 | sub RD, BASE
2318 | mov RB, RA
2319 | movzx RAd, PC_RA
2320 | shr RDd, 3
2321 | add NARGS:RDd, 1
2322 | jmp RB
2323 |
2324 |->cont_stitch: // Trace stitching.
2325 |.if JIT
2326 | // BASE = base, RC = result, RB = mbase
2327 | mov TRACE:ITYPE, [RB-40] // Save previous trace.
2328 | cleartp TRACE:ITYPE
2329 | mov TMPRd, MULTRES
2330 | movzx RAd, PC_RA
2331 | lea RA, [BASE+RA*8] // Call base.
2332 | sub TMPRd, 1
2333 | jz >2
2334 |1: // Move results down.
2335 | mov RB, [RC]
2336 | mov [RA], RB
2337 | add RC, 8
2338 | add RA, 8
2339 | sub TMPRd, 1
2340 | jnz <1
2341 |2:
2342 | movzx RCd, PC_RA
2343 | movzx RBd, PC_RB
2344 | add RC, RB
2345 | lea RC, [BASE+RC*8-8]
2346 |3:
2347 | cmp RC, RA
2348 | ja >9 // More results wanted?
2349 |
2350 | test TRACE:ITYPE, TRACE:ITYPE
2351 | jz ->cont_nop
2352 | movzx RBd, word TRACE:ITYPE->traceno
2353 | movzx RDd, word TRACE:ITYPE->link
2354 | cmp RDd, RBd
2355 | je ->cont_nop // Blacklisted.
2356 | test RDd, RDd
2357 | jne =>BC_JLOOP // Jump to stitched trace.
2358 |
2359 | // Stitch a new trace to the previous trace.
2360 | mov [DISPATCH+DISPATCH_J(exitno)], RB
2361 | mov L:RB, SAVE_L
2362 | mov L:RB->base, BASE
2363 | mov CARG2, PC
2364 | lea CARG1, [DISPATCH+GG_DISP2J]
2365 | mov aword [DISPATCH+DISPATCH_J(L)], L:RB
2366 | call extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
2367 | mov BASE, L:RB->base
2368 | jmp ->cont_nop
2369 |
2370 |9: // Fill up results with nil.
2371 | mov aword [RA], LJ_TNIL
2372 | add RA, 8
2373 | jmp <3
2374 |.endif
2375 |
2376 |->vm_profhook: // Dispatch target for profiler hook.
2377#if LJ_HASPROFILE
2378 | mov L:RB, SAVE_L
2379 | mov L:RB->base, BASE
2380 | mov CARG2, PC // Caveat: CARG2 == BASE
2381 | mov CARG1, L:RB
2382 | call extern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
2383 | mov BASE, L:RB->base
2384 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2385 | sub PC, 4
2386 | jmp ->cont_nop
2387#endif
2388 |
2389 |//-----------------------------------------------------------------------
2390 |//-- Trace exit handler -------------------------------------------------
2391 |//-----------------------------------------------------------------------
2392 |
2393 |// Called from an exit stub with the exit number on the stack.
2394 |// The 16 bit exit number is stored with two (sign-extended) push imm8.
2395 |->vm_exit_handler:
2396 |.if JIT
2397 | push r13; push r12
2398 | push r11; push r10; push r9; push r8
2399 | push rdi; push rsi; push rbp; lea rbp, [rsp+88]; push rbp
2400 | push rbx; push rdx; push rcx; push rax
2401 | movzx RCd, byte [rbp-8] // Reconstruct exit number.
2402 | mov RCH, byte [rbp-16]
2403 | mov [rbp-8], r15; mov [rbp-16], r14
2404 | // DISPATCH is preserved on-trace in LJ_GC64 mode.
2405 | mov RAd, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number.
2406 | set_vmstate EXIT
2407 | mov [DISPATCH+DISPATCH_J(exitno)], RCd
2408 | mov [DISPATCH+DISPATCH_J(parent)], RAd
2409 |.if X64WIN
2410 | sub rsp, 16*8+4*8 // Room for SSE regs + save area.
2411 |.else
2412 | sub rsp, 16*8 // Room for SSE regs.
2413 |.endif
2414 | add rbp, -128
2415 | movsd qword [rbp-8], xmm15; movsd qword [rbp-16], xmm14
2416 | movsd qword [rbp-24], xmm13; movsd qword [rbp-32], xmm12
2417 | movsd qword [rbp-40], xmm11; movsd qword [rbp-48], xmm10
2418 | movsd qword [rbp-56], xmm9; movsd qword [rbp-64], xmm8
2419 | movsd qword [rbp-72], xmm7; movsd qword [rbp-80], xmm6
2420 | movsd qword [rbp-88], xmm5; movsd qword [rbp-96], xmm4
2421 | movsd qword [rbp-104], xmm3; movsd qword [rbp-112], xmm2
2422 | movsd qword [rbp-120], xmm1; movsd qword [rbp-128], xmm0
2423 | // Caveat: RB is rbp.
2424 | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)]
2425 | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
2426 | mov aword [DISPATCH+DISPATCH_J(L)], L:RB
2427 | mov L:RB->base, BASE
2428 |.if X64WIN
2429 | lea CARG2, [rsp+4*8]
2430 |.else
2431 | mov CARG2, rsp
2432 |.endif
2433 | lea CARG1, [DISPATCH+GG_DISP2J]
2434 | mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0
2435 | call extern lj_trace_exit // (jit_State *J, ExitState *ex)
2436 | // MULTRES or negated error code returned in eax (RD).
2437 | mov RA, L:RB->cframe
2438 | and RA, CFRAME_RAWMASK
2439 | mov [RA+CFRAME_OFS_L], L:RB // Set SAVE_L (on-trace resume/yield).
2440 | mov BASE, L:RB->base
2441 | mov PC, [RA+CFRAME_OFS_PC] // Get SAVE_PC.
2442 | jmp >1
2443 |.endif
2444 |->vm_exit_interp:
2445 | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set.
2446 |.if JIT
2447 | // Restore additional callee-save registers only used in compiled code.
2448 |.if X64WIN
2449 | lea RA, [rsp+10*16+4*8]
2450 |1:
2451 | movdqa xmm15, [RA-10*16]
2452 | movdqa xmm14, [RA-9*16]
2453 | movdqa xmm13, [RA-8*16]
2454 | movdqa xmm12, [RA-7*16]
2455 | movdqa xmm11, [RA-6*16]
2456 | movdqa xmm10, [RA-5*16]
2457 | movdqa xmm9, [RA-4*16]
2458 | movdqa xmm8, [RA-3*16]
2459 | movdqa xmm7, [RA-2*16]
2460 | mov rsp, RA // Reposition stack to C frame.
2461 | movdqa xmm6, [RA-1*16]
2462 | mov r15, CSAVE_1
2463 | mov r14, CSAVE_2
2464 | mov r13, CSAVE_3
2465 | mov r12, CSAVE_4
2466 |.else
2467 | lea RA, [rsp+16]
2468 |1:
2469 | mov r13, [RA-8]
2470 | mov r12, [RA]
2471 | mov rsp, RA // Reposition stack to C frame.
2472 |.endif
2473 | test RDd, RDd; js >9 // Check for error from exit.
2474 | mov L:RB, SAVE_L
2475 | mov MULTRES, RDd
2476 | mov LFUNC:KBASE, [BASE-16]
2477 | cleartp LFUNC:KBASE
2478 | mov KBASE, LFUNC:KBASE->pc
2479 | mov KBASE, [KBASE+PC2PROTO(k)]
2480 | mov L:RB->base, BASE
2481 | mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0
2482 | set_vmstate INTERP
2483 | // Modified copy of ins_next which handles function header dispatch, too.
2484 | mov RCd, [PC]
2485 | movzx RAd, RCH
2486 | movzx OP, RCL
2487 | add PC, 4
2488 | shr RCd, 16
2489 | cmp OP, BC_FUNCF // Function header?
2490 | jb >3
2491 | cmp OP, BC_FUNCC+2 // Fast function?
2492 | jae >4
2493 |2:
2494 | mov RCd, MULTRES // RC/RD holds nres+1.
2495 |3:
2496 | jmp aword [DISPATCH+OP*8]
2497 |
2498 |4: // Check frame below fast function.
2499 | mov RC, [BASE-8]
2500 | test RCd, FRAME_TYPE
2501 | jnz <2 // Trace stitching continuation?
2502 | // Otherwise set KBASE for Lua function below fast function.
2503 | movzx RCd, byte [RC-3]
2504 | neg RC
2505 | mov LFUNC:KBASE, [BASE+RC*8-32]
2506 | cleartp LFUNC:KBASE
2507 | mov KBASE, LFUNC:KBASE->pc
2508 | mov KBASE, [KBASE+PC2PROTO(k)]
2509 | jmp <2
2510 |
2511 |9: // Rethrow error from the right C frame.
2512 | neg RD
2513 | mov CARG1, L:RB
2514 | mov CARG2, RD
2515 | call extern lj_err_throw // (lua_State *L, int errcode)
2516 |.endif
2517 |
2518 |//-----------------------------------------------------------------------
2519 |//-- Math helper functions ----------------------------------------------
2520 |//-----------------------------------------------------------------------
2521 |
2522 |// FP value rounding. Called by math.floor/math.ceil fast functions
2523 |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
2524 |.macro vm_round, name, mode, cond
2525 |->name:
2526 |->name .. _sse:
2527 | sseconst_abs xmm2, RD
2528 | sseconst_2p52 xmm3, RD
2529 | movaps xmm1, xmm0
2530 | andpd xmm1, xmm2 // |x|
2531 | ucomisd xmm3, xmm1 // No truncation if 2^52 <= |x|.
2532 | jbe >1
2533 | andnpd xmm2, xmm0 // Isolate sign bit.
2534 |.if mode == 2 // trunc(x)?
2535 | movaps xmm0, xmm1
2536 | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
2537 | subsd xmm1, xmm3
2538 | sseconst_1 xmm3, RD
2539 | cmpsd xmm0, xmm1, 1 // |x| < result?
2540 | andpd xmm0, xmm3
2541 | subsd xmm1, xmm0 // If yes, subtract -1.
2542 | orpd xmm1, xmm2 // Merge sign bit back in.
2543 |.else
2544 | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
2545 | subsd xmm1, xmm3
2546 | orpd xmm1, xmm2 // Merge sign bit back in.
2547 | .if mode == 1 // ceil(x)?
2548 | sseconst_m1 xmm2, RD // Must subtract -1 to preserve -0.
2549 | cmpsd xmm0, xmm1, 6 // x > result?
2550 | .else // floor(x)?
2551 | sseconst_1 xmm2, RD
2552 | cmpsd xmm0, xmm1, 1 // x < result?
2553 | .endif
2554 | andpd xmm0, xmm2
2555 | subsd xmm1, xmm0 // If yes, subtract +-1.
2556 |.endif
2557 | movaps xmm0, xmm1
2558 |1:
2559 | ret
2560 |.endmacro
2561 |
2562 | vm_round vm_floor, 0, 1
2563 | vm_round vm_ceil, 1, JIT
2564 | vm_round vm_trunc, 2, JIT
2565 |
2566 |// FP modulo x%y. Called by BC_MOD* and vm_arith.
2567 |->vm_mod:
2568 |// Args in xmm0/xmm1, return value in xmm0.
2569 |// Caveat: xmm0-xmm5 and RC (eax) modified!
2570 | movaps xmm5, xmm0
2571 | divsd xmm0, xmm1
2572 | sseconst_abs xmm2, RD
2573 | sseconst_2p52 xmm3, RD
2574 | movaps xmm4, xmm0
2575 | andpd xmm4, xmm2 // |x/y|
2576 | ucomisd xmm3, xmm4 // No truncation if 2^52 <= |x/y|.
2577 | jbe >1
2578 | andnpd xmm2, xmm0 // Isolate sign bit.
2579 | addsd xmm4, xmm3 // (|x/y| + 2^52) - 2^52
2580 | subsd xmm4, xmm3
2581 | orpd xmm4, xmm2 // Merge sign bit back in.
2582 | sseconst_1 xmm2, RD
2583 | cmpsd xmm0, xmm4, 1 // x/y < result?
2584 | andpd xmm0, xmm2
2585 | subsd xmm4, xmm0 // If yes, subtract 1.0.
2586 | movaps xmm0, xmm5
2587 | mulsd xmm1, xmm4
2588 | subsd xmm0, xmm1
2589 | ret
2590 |1:
2591 | mulsd xmm1, xmm0
2592 | movaps xmm0, xmm5
2593 | subsd xmm0, xmm1
2594 | ret
2595 |
2596 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
2597 |->vm_powi_sse:
2598 | cmp eax, 1; jle >6 // i<=1?
2599 | // Now 1 < (unsigned)i <= 0x80000000.
2600 |1: // Handle leading zeros.
2601 | test eax, 1; jnz >2
2602 | mulsd xmm0, xmm0
2603 | shr eax, 1
2604 | jmp <1
2605 |2:
2606 | shr eax, 1; jz >5
2607 | movaps xmm1, xmm0
2608 |3: // Handle trailing bits.
2609 | mulsd xmm0, xmm0
2610 | shr eax, 1; jz >4
2611 | jnc <3
2612 | mulsd xmm1, xmm0
2613 | jmp <3
2614 |4:
2615 | mulsd xmm0, xmm1
2616 |5:
2617 | ret
2618 |6:
2619 | je <5 // x^1 ==> x
2620 | jb >7 // x^0 ==> 1
2621 | neg eax
2622 | call <1
2623 | sseconst_1 xmm1, RD
2624 | divsd xmm1, xmm0
2625 | movaps xmm0, xmm1
2626 | ret
2627 |7:
2628 | sseconst_1 xmm0, RD
2629 | ret
2630 |
2631 |//-----------------------------------------------------------------------
2632 |//-- Miscellaneous functions --------------------------------------------
2633 |//-----------------------------------------------------------------------
2634 |
2635 |// int lj_vm_cpuid(uint32_t f, uint32_t res[4])
2636 |->vm_cpuid:
2637 | mov eax, CARG1d
2638 | .if X64WIN; push rsi; mov rsi, CARG2; .endif
2639 | push rbx
2640 | xor ecx, ecx
2641 | cpuid
2642 | mov [rsi], eax
2643 | mov [rsi+4], ebx
2644 | mov [rsi+8], ecx
2645 | mov [rsi+12], edx
2646 | pop rbx
2647 | .if X64WIN; pop rsi; .endif
2648 | ret
2649 |
2650 |//-----------------------------------------------------------------------
2651 |//-- Assertions ---------------------------------------------------------
2652 |//-----------------------------------------------------------------------
2653 |
2654 |->assert_bad_for_arg_type:
2655#ifdef LUA_USE_ASSERT
2656 | int3
2657#endif
2658 | int3
2659 |
2660 |//-----------------------------------------------------------------------
2661 |//-- FFI helper functions -----------------------------------------------
2662 |//-----------------------------------------------------------------------
2663 |
2664 |// Handler for callback functions. Callback slot number in ah/al.
2665 |->vm_ffi_callback:
2666 |.if FFI
2667 |.type CTSTATE, CTState, PC
2668 | saveregs_ // ebp/rbp already saved. ebp now holds global_State *.
2669 | lea DISPATCH, [ebp+GG_G2DISP]
2670 | mov CTSTATE, GL:ebp->ctype_state
2671 | movzx eax, ax
2672 | mov CTSTATE->cb.slot, eax
2673 | mov CTSTATE->cb.gpr[0], CARG1
2674 | mov CTSTATE->cb.gpr[1], CARG2
2675 | mov CTSTATE->cb.gpr[2], CARG3
2676 | mov CTSTATE->cb.gpr[3], CARG4
2677 | movsd qword CTSTATE->cb.fpr[0], xmm0
2678 | movsd qword CTSTATE->cb.fpr[1], xmm1
2679 | movsd qword CTSTATE->cb.fpr[2], xmm2
2680 | movsd qword CTSTATE->cb.fpr[3], xmm3
2681 |.if X64WIN
2682 | lea rax, [rsp+CFRAME_SIZE+4*8]
2683 |.else
2684 | lea rax, [rsp+CFRAME_SIZE]
2685 | mov CTSTATE->cb.gpr[4], CARG5
2686 | mov CTSTATE->cb.gpr[5], CARG6
2687 | movsd qword CTSTATE->cb.fpr[4], xmm4
2688 | movsd qword CTSTATE->cb.fpr[5], xmm5
2689 | movsd qword CTSTATE->cb.fpr[6], xmm6
2690 | movsd qword CTSTATE->cb.fpr[7], xmm7
2691 |.endif
2692 | mov CTSTATE->cb.stack, rax
2693 | mov CARG2, rsp
2694 | mov SAVE_PC, CTSTATE // Any value outside of bytecode is ok.
2695 | mov CARG1, CTSTATE
2696 | call extern lj_ccallback_enter // (CTState *cts, void *cf)
2697 | // lua_State * returned in eax (RD).
2698 | set_vmstate INTERP
2699 | mov BASE, L:RD->base
2700 | mov RD, L:RD->top
2701 | sub RD, BASE
2702 | mov LFUNC:RB, [BASE-16]
2703 | cleartp LFUNC:RB
2704 | shr RD, 3
2705 | add RD, 1
2706 | ins_callt
2707 |.endif
2708 |
2709 |->cont_ffi_callback: // Return from FFI callback.
2710 |.if FFI
2711 | mov L:RA, SAVE_L
2712 | mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)]
2713 | mov aword CTSTATE->L, L:RA
2714 | mov L:RA->base, BASE
2715 | mov L:RA->top, RB
2716 | mov CARG1, CTSTATE
2717 | mov CARG2, RC
2718 | call extern lj_ccallback_leave // (CTState *cts, TValue *o)
2719 | mov rax, CTSTATE->cb.gpr[0]
2720 | movsd xmm0, qword CTSTATE->cb.fpr[0]
2721 | jmp ->vm_leave_unw
2722 |.endif
2723 |
2724 |->vm_ffi_call: // Call C function via FFI.
2725 | // Caveat: needs special frame unwinding, see below.
2726 |.if FFI
2727 | .type CCSTATE, CCallState, rbx
2728 | push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1
2729 |
2730 | // Readjust stack.
2731 | mov eax, CCSTATE->spadj
2732 | sub rsp, rax
2733 |
2734 | // Copy stack slots.
2735 | movzx ecx, byte CCSTATE->nsp
2736 | sub ecx, 1
2737 | js >2
2738 |1:
2739 | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)]
2740 | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax
2741 | sub ecx, 1
2742 | jns <1
2743 |2:
2744 |
2745 | movzx eax, byte CCSTATE->nfpr
2746 | mov CARG1, CCSTATE->gpr[0]
2747 | mov CARG2, CCSTATE->gpr[1]
2748 | mov CARG3, CCSTATE->gpr[2]
2749 | mov CARG4, CCSTATE->gpr[3]
2750 |.if not X64WIN
2751 | mov CARG5, CCSTATE->gpr[4]
2752 | mov CARG6, CCSTATE->gpr[5]
2753 |.endif
2754 | test eax, eax; jz >5
2755 | movaps xmm0, CCSTATE->fpr[0]
2756 | movaps xmm1, CCSTATE->fpr[1]
2757 | movaps xmm2, CCSTATE->fpr[2]
2758 | movaps xmm3, CCSTATE->fpr[3]
2759 |.if not X64WIN
2760 | cmp eax, 4; jbe >5
2761 | movaps xmm4, CCSTATE->fpr[4]
2762 | movaps xmm5, CCSTATE->fpr[5]
2763 | movaps xmm6, CCSTATE->fpr[6]
2764 | movaps xmm7, CCSTATE->fpr[7]
2765 |.endif
2766 |5:
2767 |
2768 | call aword CCSTATE->func
2769 |
2770 | mov CCSTATE->gpr[0], rax
2771 | movaps CCSTATE->fpr[0], xmm0
2772 |.if not X64WIN
2773 | mov CCSTATE->gpr[1], rdx
2774 | movaps CCSTATE->fpr[1], xmm1
2775 |.endif
2776 |
2777 | mov rbx, [rbp-8]; leave; ret
2778 |.endif
2779 |// Note: vm_ffi_call must be the last function in this object file!
2780 |
2781 |//-----------------------------------------------------------------------
2782}
2783
2784/* Generate the code for a single instruction. */
2785static void build_ins(BuildCtx *ctx, BCOp op, int defop)
2786{
2787 int vk = 0;
2788 |// Note: aligning all instructions does not pay off.
2789 |=>defop:
2790
2791 switch (op) {
2792
2793 /* -- Comparison ops ---------------------------------------------------- */
2794
2795 /* Remember: all ops branch for a true comparison, fall through otherwise. */
2796
2797 |.macro jmp_comp, lt, ge, le, gt, target
2798 ||switch (op) {
2799 ||case BC_ISLT:
2800 | lt target
2801 ||break;
2802 ||case BC_ISGE:
2803 | ge target
2804 ||break;
2805 ||case BC_ISLE:
2806 | le target
2807 ||break;
2808 ||case BC_ISGT:
2809 | gt target
2810 ||break;
2811 ||default: break; /* Shut up GCC. */
2812 ||}
2813 |.endmacro
2814
2815 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
2816 | // RA = src1, RD = src2, JMP with RD = target
2817 | ins_AD
2818 | mov ITYPE, [BASE+RA*8]
2819 | mov RB, [BASE+RD*8]
2820 | mov RA, ITYPE
2821 | mov RD, RB
2822 | sar ITYPE, 47
2823 | sar RB, 47
2824 |.if DUALNUM
2825 | cmp ITYPEd, LJ_TISNUM; jne >7
2826 | cmp RBd, LJ_TISNUM; jne >8
2827 | add PC, 4
2828 | cmp RAd, RDd
2829 | jmp_comp jge, jl, jg, jle, >9
2830 |6:
2831 | movzx RDd, PC_RD
2832 | branchPC RD
2833 |9:
2834 | ins_next
2835 |
2836 |7: // RA is not an integer.
2837 | ja ->vmeta_comp
2838 | // RA is a number.
2839 | cmp RBd, LJ_TISNUM; jb >1; jne ->vmeta_comp
2840 | // RA is a number, RD is an integer.
2841 | cvtsi2sd xmm0, RDd
2842 | jmp >2
2843 |
2844 |8: // RA is an integer, RD is not an integer.
2845 | ja ->vmeta_comp
2846 | // RA is an integer, RD is a number.
2847 | cvtsi2sd xmm1, RAd
2848 | movd xmm0, RD
2849 | jmp >3
2850 |.else
2851 | cmp ITYPEd, LJ_TISNUM; jae ->vmeta_comp
2852 | cmp RBd, LJ_TISNUM; jae ->vmeta_comp
2853 |.endif
2854 |1:
2855 | movd xmm0, RD
2856 |2:
2857 | movd xmm1, RA
2858 |3:
2859 | add PC, 4
2860 | ucomisd xmm0, xmm1
2861 | // Unordered: all of ZF CF PF set, ordered: PF clear.
2862 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
2863 |.if DUALNUM
2864 | jmp_comp jbe, ja, jb, jae, <9
2865 | jmp <6
2866 |.else
2867 | jmp_comp jbe, ja, jb, jae, >1
2868 | movzx RDd, PC_RD
2869 | branchPC RD
2870 |1:
2871 | ins_next
2872 |.endif
2873 break;
2874
2875 case BC_ISEQV: case BC_ISNEV:
2876 vk = op == BC_ISEQV;
2877 | ins_AD // RA = src1, RD = src2, JMP with RD = target
2878 | mov RB, [BASE+RD*8]
2879 | mov ITYPE, [BASE+RA*8]
2880 | add PC, 4
2881 | mov RD, RB
2882 | mov RA, ITYPE
2883 | sar RB, 47
2884 | sar ITYPE, 47
2885 |.if DUALNUM
2886 | cmp RBd, LJ_TISNUM; jne >7
2887 | cmp ITYPEd, LJ_TISNUM; jne >8
2888 | cmp RDd, RAd
2889 if (vk) {
2890 | jne >9
2891 } else {
2892 | je >9
2893 }
2894 | movzx RDd, PC_RD
2895 | branchPC RD
2896 |9:
2897 | ins_next
2898 |
2899 |7: // RD is not an integer.
2900 | ja >5
2901 | // RD is a number.
2902 | movd xmm1, RD
2903 | cmp ITYPEd, LJ_TISNUM; jb >1; jne >5
2904 | // RD is a number, RA is an integer.
2905 | cvtsi2sd xmm0, RAd
2906 | jmp >2
2907 |
2908 |8: // RD is an integer, RA is not an integer.
2909 | ja >5
2910 | // RD is an integer, RA is a number.
2911 | cvtsi2sd xmm1, RDd
2912 | jmp >1
2913 |
2914 |.else
2915 | cmp RBd, LJ_TISNUM; jae >5
2916 | cmp ITYPEd, LJ_TISNUM; jae >5
2917 | movd xmm1, RD
2918 |.endif
2919 |1:
2920 | movd xmm0, RA
2921 |2:
2922 | ucomisd xmm0, xmm1
2923 |4:
2924 iseqne_fp:
2925 if (vk) {
2926 | jp >2 // Unordered means not equal.
2927 | jne >2
2928 } else {
2929 | jp >2 // Unordered means not equal.
2930 | je >1
2931 }
2932 iseqne_end:
2933 if (vk) {
2934 |1: // EQ: Branch to the target.
2935 | movzx RDd, PC_RD
2936 | branchPC RD
2937 |2: // NE: Fallthrough to next instruction.
2938 |.if not FFI
2939 |3:
2940 |.endif
2941 } else {
2942 |.if not FFI
2943 |3:
2944 |.endif
2945 |2: // NE: Branch to the target.
2946 | movzx RDd, PC_RD
2947 | branchPC RD
2948 |1: // EQ: Fallthrough to next instruction.
2949 }
2950 if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV ||
2951 op == BC_ISEQN || op == BC_ISNEN)) {
2952 | jmp <9
2953 } else {
2954 | ins_next
2955 }
2956 |
2957 if (op == BC_ISEQV || op == BC_ISNEV) {
2958 |5: // Either or both types are not numbers.
2959 |.if FFI
2960 | cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd
2961 | cmp ITYPEd, LJ_TCDATA; je ->vmeta_equal_cd
2962 |.endif
2963 | cmp RA, RD
2964 | je <1 // Same GCobjs or pvalues?
2965 | cmp RBd, ITYPEd
2966 | jne <2 // Not the same type?
2967 | cmp RBd, LJ_TISTABUD
2968 | ja <2 // Different objects and not table/ud?
2969 |
2970 | // Different tables or userdatas. Need to check __eq metamethod.
2971 | // Field metatable must be at same offset for GCtab and GCudata!
2972 | cleartp TAB:RA
2973 | mov TAB:RB, TAB:RA->metatable
2974 | test TAB:RB, TAB:RB
2975 | jz <2 // No metatable?
2976 | test byte TAB:RB->nomm, 1<<MM_eq
2977 | jnz <2 // Or 'no __eq' flag set?
2978 if (vk) {
2979 | xor RBd, RBd // ne = 0
2980 } else {
2981 | mov RBd, 1 // ne = 1
2982 }
2983 | jmp ->vmeta_equal // Handle __eq metamethod.
2984 } else {
2985 |.if FFI
2986 |3:
2987 | cmp ITYPEd, LJ_TCDATA
2988 if (LJ_DUALNUM && vk) {
2989 | jne <9
2990 } else {
2991 | jne <2
2992 }
2993 | jmp ->vmeta_equal_cd
2994 |.endif
2995 }
2996 break;
2997 case BC_ISEQS: case BC_ISNES:
2998 vk = op == BC_ISEQS;
2999 | ins_AND // RA = src, RD = str const, JMP with RD = target
3000 | mov RB, [BASE+RA*8]
3001 | add PC, 4
3002 | checkstr RB, >3
3003 | cmp RB, [KBASE+RD*8]
3004 iseqne_test:
3005 if (vk) {
3006 | jne >2
3007 } else {
3008 | je >1
3009 }
3010 goto iseqne_end;
3011 case BC_ISEQN: case BC_ISNEN:
3012 vk = op == BC_ISEQN;
3013 | ins_AD // RA = src, RD = num const, JMP with RD = target
3014 | mov RB, [BASE+RA*8]
3015 | add PC, 4
3016 |.if DUALNUM
3017 | checkint RB, >7
3018 | mov RD, [KBASE+RD*8]
3019 | checkint RD, >8
3020 | cmp RBd, RDd
3021 if (vk) {
3022 | jne >9
3023 } else {
3024 | je >9
3025 }
3026 | movzx RDd, PC_RD
3027 | branchPC RD
3028 |9:
3029 | ins_next
3030 |
3031 |7: // RA is not an integer.
3032 | ja >3
3033 | // RA is a number.
3034 | mov RD, [KBASE+RD*8]
3035 | checkint RD, >1
3036 | // RA is a number, RD is an integer.
3037 | cvtsi2sd xmm0, RDd
3038 | jmp >2
3039 |
3040 |8: // RA is an integer, RD is a number.
3041 | cvtsi2sd xmm0, RBd
3042 | movd xmm1, RD
3043 | ucomisd xmm0, xmm1
3044 | jmp >4
3045 |1:
3046 | movd xmm0, RD
3047 |.else
3048 | checknum RB, >3
3049 |1:
3050 | movsd xmm0, qword [KBASE+RD*8]
3051 |.endif
3052 |2:
3053 | ucomisd xmm0, qword [BASE+RA*8]
3054 |4:
3055 goto iseqne_fp;
3056 case BC_ISEQP: case BC_ISNEP:
3057 vk = op == BC_ISEQP;
3058 | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target
3059 | mov RB, [BASE+RA*8]
3060 | sar RB, 47
3061 | add PC, 4
3062 | cmp RBd, RDd
3063 if (!LJ_HASFFI) goto iseqne_test;
3064 if (vk) {
3065 | jne >3
3066 | movzx RDd, PC_RD
3067 | branchPC RD
3068 |2:
3069 | ins_next
3070 |3:
3071 | cmp RBd, LJ_TCDATA; jne <2
3072 | jmp ->vmeta_equal_cd
3073 } else {
3074 | je >2
3075 | cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd
3076 | movzx RDd, PC_RD
3077 | branchPC RD
3078 |2:
3079 | ins_next
3080 }
3081 break;
3082
3083 /* -- Unary test and copy ops ------------------------------------------- */
3084
3085 case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
3086 | ins_AD // RA = dst or unused, RD = src, JMP with RD = target
3087 | mov ITYPE, [BASE+RD*8]
3088 | add PC, 4
3089 if (op == BC_ISTC || op == BC_ISFC) {
3090 | mov RB, ITYPE
3091 }
3092 | sar ITYPE, 47
3093 | cmp ITYPEd, LJ_TISTRUECOND
3094 if (op == BC_IST || op == BC_ISTC) {
3095 | jae >1
3096 } else {
3097 | jb >1
3098 }
3099 if (op == BC_ISTC || op == BC_ISFC) {
3100 | mov [BASE+RA*8], RB
3101 }
3102 | movzx RDd, PC_RD
3103 | branchPC RD
3104 |1: // Fallthrough to the next instruction.
3105 | ins_next
3106 break;
3107
3108 case BC_ISTYPE:
3109 | ins_AD // RA = src, RD = -type
3110 | mov RB, [BASE+RA*8]
3111 | sar RB, 47
3112 | add RBd, RDd
3113 | jne ->vmeta_istype
3114 | ins_next
3115 break;
3116 case BC_ISNUM:
3117 | ins_AD // RA = src, RD = -(TISNUM-1)
3118 | checknumtp [BASE+RA*8], ->vmeta_istype
3119 | ins_next
3120 break;
3121
3122 /* -- Unary ops --------------------------------------------------------- */
3123
3124 case BC_MOV:
3125 | ins_AD // RA = dst, RD = src
3126 | mov RB, [BASE+RD*8]
3127 | mov [BASE+RA*8], RB
3128 | ins_next_
3129 break;
3130 case BC_NOT:
3131 | ins_AD // RA = dst, RD = src
3132 | mov RB, [BASE+RD*8]
3133 | sar RB, 47
3134 | mov RCd, 2
3135 | cmp RB, LJ_TISTRUECOND
3136 | sbb RCd, 0
3137 | shl RC, 47
3138 | not RC
3139 | mov [BASE+RA*8], RC
3140 | ins_next
3141 break;
3142 case BC_UNM:
3143 | ins_AD // RA = dst, RD = src
3144 | mov RB, [BASE+RD*8]
3145 |.if DUALNUM
3146 | checkint RB, >5
3147 | neg RBd
3148 | jo >4
3149 | setint RB
3150 |9:
3151 | mov [BASE+RA*8], RB
3152 | ins_next
3153 |4:
3154 | mov64 RB, U64x(41e00000,00000000) // 2^31.
3155 | jmp <9
3156 |5:
3157 | ja ->vmeta_unm
3158 |.else
3159 | checknum RB, ->vmeta_unm
3160 |.endif
3161 | mov64 RD, U64x(80000000,00000000)
3162 | xor RB, RD
3163 |.if DUALNUM
3164 | jmp <9
3165 |.else
3166 | mov [BASE+RA*8], RB
3167 | ins_next
3168 |.endif
3169 break;
3170 case BC_LEN:
3171 | ins_AD // RA = dst, RD = src
3172 | mov RD, [BASE+RD*8]
3173 | checkstr RD, >2
3174 |.if DUALNUM
3175 | mov RDd, dword STR:RD->len
3176 |1:
3177 | setint RD
3178 | mov [BASE+RA*8], RD
3179 |.else
3180 | xorps xmm0, xmm0
3181 | cvtsi2sd xmm0, dword STR:RD->len
3182 |1:
3183 | movsd qword [BASE+RA*8], xmm0
3184 |.endif
3185 | ins_next
3186 |2:
3187 | cmp ITYPEd, LJ_TTAB; jne ->vmeta_len
3188 | mov TAB:CARG1, TAB:RD
3189#if LJ_52
3190 | mov TAB:RB, TAB:RD->metatable
3191 | cmp TAB:RB, 0
3192 | jnz >9
3193 |3:
3194#endif
3195 |->BC_LEN_Z:
3196 | mov RB, BASE // Save BASE.
3197 | call extern lj_tab_len // (GCtab *t)
3198 | // Length of table returned in eax (RD).
3199 |.if DUALNUM
3200 | // Nothing to do.
3201 |.else
3202 | cvtsi2sd xmm0, RDd
3203 |.endif
3204 | mov BASE, RB // Restore BASE.
3205 | movzx RAd, PC_RA
3206 | jmp <1
3207#if LJ_52
3208 |9: // Check for __len.
3209 | test byte TAB:RB->nomm, 1<<MM_len
3210 | jnz <3
3211 | jmp ->vmeta_len // 'no __len' flag NOT set: check.
3212#endif
3213 break;
3214
3215 /* -- Binary ops -------------------------------------------------------- */
3216
3217 |.macro ins_arithpre, sseins, ssereg
3218 | ins_ABC
3219 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3220 ||switch (vk) {
3221 ||case 0:
3222 | checknumtp [BASE+RB*8], ->vmeta_arith_vn
3223 | .if DUALNUM
3224 | checknumtp [KBASE+RC*8], ->vmeta_arith_vn
3225 | .endif
3226 | movsd xmm0, qword [BASE+RB*8]
3227 | sseins ssereg, qword [KBASE+RC*8]
3228 || break;
3229 ||case 1:
3230 | checknumtp [BASE+RB*8], ->vmeta_arith_nv
3231 | .if DUALNUM
3232 | checknumtp [KBASE+RC*8], ->vmeta_arith_nv
3233 | .endif
3234 | movsd xmm0, qword [KBASE+RC*8]
3235 | sseins ssereg, qword [BASE+RB*8]
3236 || break;
3237 ||default:
3238 | checknumtp [BASE+RB*8], ->vmeta_arith_vv
3239 | checknumtp [BASE+RC*8], ->vmeta_arith_vv
3240 | movsd xmm0, qword [BASE+RB*8]
3241 | sseins ssereg, qword [BASE+RC*8]
3242 || break;
3243 ||}
3244 |.endmacro
3245 |
3246 |.macro ins_arithdn, intins
3247 | ins_ABC
3248 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3249 ||switch (vk) {
3250 ||case 0:
3251 | mov RB, [BASE+RB*8]
3252 | mov RC, [KBASE+RC*8]
3253 | checkint RB, ->vmeta_arith_vno
3254 | checkint RC, ->vmeta_arith_vno
3255 | intins RBd, RCd; jo ->vmeta_arith_vno
3256 || break;
3257 ||case 1:
3258 | mov RB, [BASE+RB*8]
3259 | mov RC, [KBASE+RC*8]
3260 | checkint RB, ->vmeta_arith_nvo
3261 | checkint RC, ->vmeta_arith_nvo
3262 | intins RCd, RBd; jo ->vmeta_arith_nvo
3263 || break;
3264 ||default:
3265 | mov RB, [BASE+RB*8]
3266 | mov RC, [BASE+RC*8]
3267 | checkint RB, ->vmeta_arith_vvo
3268 | checkint RC, ->vmeta_arith_vvo
3269 | intins RBd, RCd; jo ->vmeta_arith_vvo
3270 || break;
3271 ||}
3272 ||if (vk == 1) {
3273 | setint RC
3274 | mov [BASE+RA*8], RC
3275 ||} else {
3276 | setint RB
3277 | mov [BASE+RA*8], RB
3278 ||}
3279 | ins_next
3280 |.endmacro
3281 |
3282 |.macro ins_arithpost
3283 | movsd qword [BASE+RA*8], xmm0
3284 |.endmacro
3285 |
3286 |.macro ins_arith, sseins
3287 | ins_arithpre sseins, xmm0
3288 | ins_arithpost
3289 | ins_next
3290 |.endmacro
3291 |
3292 |.macro ins_arith, intins, sseins
3293 |.if DUALNUM
3294 | ins_arithdn intins
3295 |.else
3296 | ins_arith, sseins
3297 |.endif
3298 |.endmacro
3299
3300 | // RA = dst, RB = src1 or num const, RC = src2 or num const
3301 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
3302 | ins_arith add, addsd
3303 break;
3304 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
3305 | ins_arith sub, subsd
3306 break;
3307 case BC_MULVN: case BC_MULNV: case BC_MULVV:
3308 | ins_arith imul, mulsd
3309 break;
3310 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
3311 | ins_arith divsd
3312 break;
3313 case BC_MODVN:
3314 | ins_arithpre movsd, xmm1
3315 |->BC_MODVN_Z:
3316 | call ->vm_mod
3317 | ins_arithpost
3318 | ins_next
3319 break;
3320 case BC_MODNV: case BC_MODVV:
3321 | ins_arithpre movsd, xmm1
3322 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
3323 break;
3324 case BC_POW:
3325 | ins_arithpre movsd, xmm1
3326 | mov RB, BASE
3327 | call extern pow
3328 | movzx RAd, PC_RA
3329 | mov BASE, RB
3330 | ins_arithpost
3331 | ins_next
3332 break;
3333
3334 case BC_CAT:
3335 | ins_ABC // RA = dst, RB = src_start, RC = src_end
3336 | mov L:CARG1, SAVE_L
3337 | mov L:CARG1->base, BASE
3338 | lea CARG2, [BASE+RC*8]
3339 | mov CARG3d, RCd
3340 | sub CARG3d, RBd
3341 |->BC_CAT_Z:
3342 | mov L:RB, L:CARG1
3343 | mov SAVE_PC, PC
3344 | call extern lj_meta_cat // (lua_State *L, TValue *top, int left)
3345 | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
3346 | mov BASE, L:RB->base
3347 | test RC, RC
3348 | jnz ->vmeta_binop
3349 | movzx RBd, PC_RB // Copy result to Stk[RA] from Stk[RB].
3350 | movzx RAd, PC_RA
3351 | mov RC, [BASE+RB*8]
3352 | mov [BASE+RA*8], RC
3353 | ins_next
3354 break;
3355
3356 /* -- Constant ops ------------------------------------------------------ */
3357
3358 case BC_KSTR:
3359 | ins_AND // RA = dst, RD = str const (~)
3360 | mov RD, [KBASE+RD*8]
3361 | settp RD, LJ_TSTR
3362 | mov [BASE+RA*8], RD
3363 | ins_next
3364 break;
3365 case BC_KCDATA:
3366 |.if FFI
3367 | ins_AND // RA = dst, RD = cdata const (~)
3368 | mov RD, [KBASE+RD*8]
3369 | settp RD, LJ_TCDATA
3370 | mov [BASE+RA*8], RD
3371 | ins_next
3372 |.endif
3373 break;
3374 case BC_KSHORT:
3375 | ins_AD // RA = dst, RD = signed int16 literal
3376 |.if DUALNUM
3377 | movsx RDd, RDW
3378 | setint RD
3379 | mov [BASE+RA*8], RD
3380 |.else
3381 | movsx RDd, RDW // Sign-extend literal.
3382 | cvtsi2sd xmm0, RDd
3383 | movsd qword [BASE+RA*8], xmm0
3384 |.endif
3385 | ins_next
3386 break;
3387 case BC_KNUM:
3388 | ins_AD // RA = dst, RD = num const
3389 | movsd xmm0, qword [KBASE+RD*8]
3390 | movsd qword [BASE+RA*8], xmm0
3391 | ins_next
3392 break;
3393 case BC_KPRI:
3394 | ins_AD // RA = dst, RD = primitive type (~)
3395 | shl RD, 47
3396 | not RD
3397 | mov [BASE+RA*8], RD
3398 | ins_next
3399 break;
3400 case BC_KNIL:
3401 | ins_AD // RA = dst_start, RD = dst_end
3402 | lea RA, [BASE+RA*8+8]
3403 | lea RD, [BASE+RD*8]
3404 | mov RB, LJ_TNIL
3405 | mov [RA-8], RB // Sets minimum 2 slots.
3406 |1:
3407 | mov [RA], RB
3408 | add RA, 8
3409 | cmp RA, RD
3410 | jbe <1
3411 | ins_next
3412 break;
3413
3414 /* -- Upvalue and function ops ------------------------------------------ */
3415
3416 case BC_UGET:
3417 | ins_AD // RA = dst, RD = upvalue #
3418 | mov LFUNC:RB, [BASE-16]
3419 | cleartp LFUNC:RB
3420 | mov UPVAL:RB, [LFUNC:RB+RD*8+offsetof(GCfuncL, uvptr)]
3421 | mov RB, UPVAL:RB->v
3422 | mov RD, [RB]
3423 | mov [BASE+RA*8], RD
3424 | ins_next
3425 break;
3426 case BC_USETV:
3427#define TV2MARKOFS \
3428 ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))
3429 | ins_AD // RA = upvalue #, RD = src
3430 | mov LFUNC:RB, [BASE-16]
3431 | cleartp LFUNC:RB
3432 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
3433 | cmp byte UPVAL:RB->closed, 0
3434 | mov RB, UPVAL:RB->v
3435 | mov RA, [BASE+RD*8]
3436 | mov [RB], RA
3437 | jz >1
3438 | // Check barrier for closed upvalue.
3439 | test byte [RB+TV2MARKOFS], LJ_GC_BLACK // isblack(uv)
3440 | jnz >2
3441 |1:
3442 | ins_next
3443 |
3444 |2: // Upvalue is black. Check if new value is collectable and white.
3445 | mov RD, RA
3446 | sar RD, 47
3447 | sub RDd, LJ_TISGCV
3448 | cmp RDd, LJ_TNUMX - LJ_TISGCV // tvisgcv(v)
3449 | jbe <1
3450 | cleartp GCOBJ:RA
3451 | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v)
3452 | jz <1
3453 | // Crossed a write barrier. Move the barrier forward.
3454 |.if not X64WIN
3455 | mov CARG2, RB
3456 | mov RB, BASE // Save BASE.
3457 |.else
3458 | xchg CARG2, RB // Save BASE (CARG2 == BASE).
3459 |.endif
3460 | lea GL:CARG1, [DISPATCH+GG_DISP2G]
3461 | call extern lj_gc_barrieruv // (global_State *g, TValue *tv)
3462 | mov BASE, RB // Restore BASE.
3463 | jmp <1
3464 break;
3465#undef TV2MARKOFS
3466 case BC_USETS:
3467 | ins_AND // RA = upvalue #, RD = str const (~)
3468 | mov LFUNC:RB, [BASE-16]
3469 | cleartp LFUNC:RB
3470 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
3471 | mov STR:RA, [KBASE+RD*8]
3472 | mov RD, UPVAL:RB->v
3473 | settp STR:ITYPE, STR:RA, LJ_TSTR
3474 | mov [RD], STR:ITYPE
3475 | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
3476 | jnz >2
3477 |1:
3478 | ins_next
3479 |
3480 |2: // Check if string is white and ensure upvalue is closed.
3481 | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str)
3482 | jz <1
3483 | cmp byte UPVAL:RB->closed, 0
3484 | jz <1
3485 | // Crossed a write barrier. Move the barrier forward.
3486 | mov RB, BASE // Save BASE (CARG2 == BASE).
3487 | mov CARG2, RD
3488 | lea GL:CARG1, [DISPATCH+GG_DISP2G]
3489 | call extern lj_gc_barrieruv // (global_State *g, TValue *tv)
3490 | mov BASE, RB // Restore BASE.
3491 | jmp <1
3492 break;
3493 case BC_USETN:
3494 | ins_AD // RA = upvalue #, RD = num const
3495 | mov LFUNC:RB, [BASE-16]
3496 | cleartp LFUNC:RB
3497 | movsd xmm0, qword [KBASE+RD*8]
3498 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
3499 | mov RA, UPVAL:RB->v
3500 | movsd qword [RA], xmm0
3501 | ins_next
3502 break;
3503 case BC_USETP:
3504 | ins_AD // RA = upvalue #, RD = primitive type (~)
3505 | mov LFUNC:RB, [BASE-16]
3506 | cleartp LFUNC:RB
3507 | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
3508 | shl RD, 47
3509 | not RD
3510 | mov RA, UPVAL:RB->v
3511 | mov [RA], RD
3512 | ins_next
3513 break;
3514 case BC_UCLO:
3515 | ins_AD // RA = level, RD = target
3516 | branchPC RD // Do this first to free RD.
3517 | mov L:RB, SAVE_L
3518 | cmp aword L:RB->openupval, 0
3519 | je >1
3520 | mov L:RB->base, BASE
3521 | lea CARG2, [BASE+RA*8] // Caveat: CARG2 == BASE
3522 | mov L:CARG1, L:RB // Caveat: CARG1 == RA
3523 | call extern lj_func_closeuv // (lua_State *L, TValue *level)
3524 | mov BASE, L:RB->base
3525 |1:
3526 | ins_next
3527 break;
3528
3529 case BC_FNEW:
3530 | ins_AND // RA = dst, RD = proto const (~) (holding function prototype)
3531 | mov L:RB, SAVE_L
3532 | mov L:RB->base, BASE // Caveat: CARG2/CARG3 may be BASE.
3533 | mov CARG3, [BASE-16]
3534 | cleartp CARG3
3535 | mov CARG2, [KBASE+RD*8] // Fetch GCproto *.
3536 | mov CARG1, L:RB
3537 | mov SAVE_PC, PC
3538 | // (lua_State *L, GCproto *pt, GCfuncL *parent)
3539 | call extern lj_func_newL_gc
3540 | // GCfuncL * returned in eax (RC).
3541 | mov BASE, L:RB->base
3542 | movzx RAd, PC_RA
3543 | settp LFUNC:RC, LJ_TFUNC
3544 | mov [BASE+RA*8], LFUNC:RC
3545 | ins_next
3546 break;
3547
3548 /* -- Table ops --------------------------------------------------------- */
3549
3550 case BC_TNEW:
3551 | ins_AD // RA = dst, RD = hbits|asize
3552 | mov L:RB, SAVE_L
3553 | mov L:RB->base, BASE
3554 | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
3555 | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
3556 | mov SAVE_PC, PC
3557 | jae >5
3558 |1:
3559 | mov CARG3d, RDd
3560 | and RDd, 0x7ff
3561 | shr CARG3d, 11
3562 | cmp RDd, 0x7ff
3563 | je >3
3564 |2:
3565 | mov L:CARG1, L:RB
3566 | mov CARG2d, RDd
3567 | call extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits)
3568 | // Table * returned in eax (RC).
3569 | mov BASE, L:RB->base
3570 | movzx RAd, PC_RA
3571 | settp TAB:RC, LJ_TTAB
3572 | mov [BASE+RA*8], TAB:RC
3573 | ins_next
3574 |3: // Turn 0x7ff into 0x801.
3575 | mov RDd, 0x801
3576 | jmp <2
3577 |5:
3578 | mov L:CARG1, L:RB
3579 | call extern lj_gc_step_fixtop // (lua_State *L)
3580 | movzx RDd, PC_RD
3581 | jmp <1
3582 break;
3583 case BC_TDUP:
3584 | ins_AND // RA = dst, RD = table const (~) (holding template table)
3585 | mov L:RB, SAVE_L
3586 | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
3587 | mov SAVE_PC, PC
3588 | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
3589 | mov L:RB->base, BASE
3590 | jae >3
3591 |2:
3592 | mov TAB:CARG2, [KBASE+RD*8] // Caveat: CARG2 == BASE
3593 | mov L:CARG1, L:RB // Caveat: CARG1 == RA
3594 | call extern lj_tab_dup // (lua_State *L, Table *kt)
3595 | // Table * returned in eax (RC).
3596 | mov BASE, L:RB->base
3597 | movzx RAd, PC_RA
3598 | settp TAB:RC, LJ_TTAB
3599 | mov [BASE+RA*8], TAB:RC
3600 | ins_next
3601 |3:
3602 | mov L:CARG1, L:RB
3603 | call extern lj_gc_step_fixtop // (lua_State *L)
3604 | movzx RDd, PC_RD // Need to reload RD.
3605 | not RD
3606 | jmp <2
3607 break;
3608
3609 case BC_GGET:
3610 | ins_AND // RA = dst, RD = str const (~)
3611 | mov LFUNC:RB, [BASE-16]
3612 | cleartp LFUNC:RB
3613 | mov TAB:RB, LFUNC:RB->env
3614 | mov STR:RC, [KBASE+RD*8]
3615 | jmp ->BC_TGETS_Z
3616 break;
3617 case BC_GSET:
3618 | ins_AND // RA = src, RD = str const (~)
3619 | mov LFUNC:RB, [BASE-16]
3620 | cleartp LFUNC:RB
3621 | mov TAB:RB, LFUNC:RB->env
3622 | mov STR:RC, [KBASE+RD*8]
3623 | jmp ->BC_TSETS_Z
3624 break;
3625
3626 case BC_TGETV:
3627 | ins_ABC // RA = dst, RB = table, RC = key
3628 | mov TAB:RB, [BASE+RB*8]
3629 | mov RC, [BASE+RC*8]
3630 | checktab TAB:RB, ->vmeta_tgetv
3631 |
3632 | // Integer key?
3633 |.if DUALNUM
3634 | checkint RC, >5
3635 |.else
3636 | // Convert number to int and back and compare.
3637 | checknum RC, >5
3638 | movd xmm0, RC
3639 | cvttsd2si RCd, xmm0
3640 | cvtsi2sd xmm1, RCd
3641 | ucomisd xmm0, xmm1
3642 | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
3643 |.endif
3644 | cmp RCd, TAB:RB->asize // Takes care of unordered, too.
3645 | jae ->vmeta_tgetv // Not in array part? Use fallback.
3646 | shl RCd, 3
3647 | add RC, TAB:RB->array
3648 | // Get array slot.
3649 | mov ITYPE, [RC]
3650 | cmp ITYPE, LJ_TNIL // Avoid overwriting RB in fastpath.
3651 | je >2
3652 |1:
3653 | mov [BASE+RA*8], ITYPE
3654 | ins_next
3655 |
3656 |2: // Check for __index if table value is nil.
3657 | mov TAB:TMPR, TAB:RB->metatable
3658 | test TAB:TMPR, TAB:TMPR
3659 | jz <1
3660 | test byte TAB:TMPR->nomm, 1<<MM_index
3661 | jz ->vmeta_tgetv // 'no __index' flag NOT set: check.
3662 | jmp <1
3663 |
3664 |5: // String key?
3665 | cmp ITYPEd, LJ_TSTR; jne ->vmeta_tgetv
3666 | cleartp STR:RC
3667 | jmp ->BC_TGETS_Z
3668 break;
3669 case BC_TGETS:
3670 | ins_ABC // RA = dst, RB = table, RC = str const (~)
3671 | mov TAB:RB, [BASE+RB*8]
3672 | not RC
3673 | mov STR:RC, [KBASE+RC*8]
3674 | checktab TAB:RB, ->vmeta_tgets
3675 |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *
3676 | mov TMPRd, TAB:RB->hmask
3677 | and TMPRd, STR:RC->hash
3678 | imul TMPRd, #NODE
3679 | add NODE:TMPR, TAB:RB->node
3680 | settp ITYPE, STR:RC, LJ_TSTR
3681 |1:
3682 | cmp NODE:TMPR->key, ITYPE
3683 | jne >4
3684 | // Get node value.
3685 | mov ITYPE, NODE:TMPR->val
3686 | cmp ITYPE, LJ_TNIL
3687 | je >5 // Key found, but nil value?
3688 |2:
3689 | mov [BASE+RA*8], ITYPE
3690 | ins_next
3691 |
3692 |4: // Follow hash chain.
3693 | mov NODE:TMPR, NODE:TMPR->next
3694 | test NODE:TMPR, NODE:TMPR
3695 | jnz <1
3696 | // End of hash chain: key not found, nil result.
3697 | mov ITYPE, LJ_TNIL
3698 |
3699 |5: // Check for __index if table value is nil.
3700 | mov TAB:TMPR, TAB:RB->metatable
3701 | test TAB:TMPR, TAB:TMPR
3702 | jz <2 // No metatable: done.
3703 | test byte TAB:TMPR->nomm, 1<<MM_index
3704 | jnz <2 // 'no __index' flag set: done.
3705 | jmp ->vmeta_tgets // Caveat: preserve STR:RC.
3706 break;
3707 case BC_TGETB:
3708 | ins_ABC // RA = dst, RB = table, RC = byte literal
3709 | mov TAB:RB, [BASE+RB*8]
3710 | checktab TAB:RB, ->vmeta_tgetb
3711 | cmp RCd, TAB:RB->asize
3712 | jae ->vmeta_tgetb
3713 | shl RCd, 3
3714 | add RC, TAB:RB->array
3715 | // Get array slot.
3716 | mov ITYPE, [RC]
3717 | cmp ITYPE, LJ_TNIL
3718 | je >2
3719 |1:
3720 | mov [BASE+RA*8], ITYPE
3721 | ins_next
3722 |
3723 |2: // Check for __index if table value is nil.
3724 | mov TAB:TMPR, TAB:RB->metatable
3725 | test TAB:TMPR, TAB:TMPR
3726 | jz <1
3727 | test byte TAB:TMPR->nomm, 1<<MM_index
3728 | jz ->vmeta_tgetb // 'no __index' flag NOT set: check.
3729 | jmp <1
3730 break;
3731 case BC_TGETR:
3732 | ins_ABC // RA = dst, RB = table, RC = key
3733 | mov TAB:RB, [BASE+RB*8]
3734 | cleartp TAB:RB
3735 |.if DUALNUM
3736 | mov RCd, dword [BASE+RC*8]
3737 |.else
3738 | cvttsd2si RCd, qword [BASE+RC*8]
3739 |.endif
3740 | cmp RCd, TAB:RB->asize
3741 | jae ->vmeta_tgetr // Not in array part? Use fallback.
3742 | shl RCd, 3
3743 | add RC, TAB:RB->array
3744 | // Get array slot.
3745 |->BC_TGETR_Z:
3746 | mov ITYPE, [RC]
3747 |->BC_TGETR2_Z:
3748 | mov [BASE+RA*8], ITYPE
3749 | ins_next
3750 break;
3751
3752 case BC_TSETV:
3753 | ins_ABC // RA = src, RB = table, RC = key
3754 | mov TAB:RB, [BASE+RB*8]
3755 | mov RC, [BASE+RC*8]
3756 | checktab TAB:RB, ->vmeta_tsetv
3757 |
3758 | // Integer key?
3759 |.if DUALNUM
3760 | checkint RC, >5
3761 |.else
3762 | // Convert number to int and back and compare.
3763 | checknum RC, >5
3764 | movd xmm0, RC
3765 | cvttsd2si RCd, xmm0
3766 | cvtsi2sd xmm1, RCd
3767 | ucomisd xmm0, xmm1
3768 | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
3769 |.endif
3770 | cmp RCd, TAB:RB->asize // Takes care of unordered, too.
3771 | jae ->vmeta_tsetv
3772 | shl RCd, 3
3773 | add RC, TAB:RB->array
3774 | cmp aword [RC], LJ_TNIL
3775 | je >3 // Previous value is nil?
3776 |1:
3777 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3778 | jnz >7
3779 |2: // Set array slot.
3780 | mov RB, [BASE+RA*8]
3781 | mov [RC], RB
3782 | ins_next
3783 |
3784 |3: // Check for __newindex if previous value is nil.
3785 | mov TAB:TMPR, TAB:RB->metatable
3786 | test TAB:TMPR, TAB:TMPR
3787 | jz <1
3788 | test byte TAB:TMPR->nomm, 1<<MM_newindex
3789 | jz ->vmeta_tsetv // 'no __newindex' flag NOT set: check.
3790 | jmp <1
3791 |
3792 |5: // String key?
3793 | cmp ITYPEd, LJ_TSTR; jne ->vmeta_tsetv
3794 | cleartp STR:RC
3795 | jmp ->BC_TSETS_Z
3796 |
3797 |7: // Possible table write barrier for the value. Skip valiswhite check.
3798 | barrierback TAB:RB, TMPR
3799 | jmp <2
3800 break;
3801 case BC_TSETS:
3802 | ins_ABC // RA = src, RB = table, RC = str const (~)
3803 | mov TAB:RB, [BASE+RB*8]
3804 | not RC
3805 | mov STR:RC, [KBASE+RC*8]
3806 | checktab TAB:RB, ->vmeta_tsets
3807 |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *
3808 | mov TMPRd, TAB:RB->hmask
3809 | and TMPRd, STR:RC->hash
3810 | imul TMPRd, #NODE
3811 | mov byte TAB:RB->nomm, 0 // Clear metamethod cache.
3812 | add NODE:TMPR, TAB:RB->node
3813 | settp ITYPE, STR:RC, LJ_TSTR
3814 |1:
3815 | cmp NODE:TMPR->key, ITYPE
3816 | jne >5
3817 | // Ok, key found. Assumes: offsetof(Node, val) == 0
3818 | cmp aword [TMPR], LJ_TNIL
3819 | je >4 // Previous value is nil?
3820 |2:
3821 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3822 | jnz >7
3823 |3: // Set node value.
3824 | mov ITYPE, [BASE+RA*8]
3825 | mov [TMPR], ITYPE
3826 | ins_next
3827 |
3828 |4: // Check for __newindex if previous value is nil.
3829 | mov TAB:ITYPE, TAB:RB->metatable
3830 | test TAB:ITYPE, TAB:ITYPE
3831 | jz <2
3832 | test byte TAB:ITYPE->nomm, 1<<MM_newindex
3833 | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
3834 | jmp <2
3835 |
3836 |5: // Follow hash chain.
3837 | mov NODE:TMPR, NODE:TMPR->next
3838 | test NODE:TMPR, NODE:TMPR
3839 | jnz <1
3840 | // End of hash chain: key not found, add a new one.
3841 |
3842 | // But check for __newindex first.
3843 | mov TAB:TMPR, TAB:RB->metatable
3844 | test TAB:TMPR, TAB:TMPR
3845 | jz >6 // No metatable: continue.
3846 | test byte TAB:TMPR->nomm, 1<<MM_newindex
3847 | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
3848 |6:
3849 | mov TMP1, ITYPE
3850 | mov L:CARG1, SAVE_L
3851 | mov L:CARG1->base, BASE
3852 | lea CARG3, TMP1
3853 | mov CARG2, TAB:RB
3854 | mov SAVE_PC, PC
3855 | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
3856 | // Handles write barrier for the new key. TValue * returned in eax (RC).
3857 | mov L:CARG1, SAVE_L
3858 | mov BASE, L:CARG1->base
3859 | mov TMPR, rax
3860 | movzx RAd, PC_RA
3861 | jmp <2 // Must check write barrier for value.
3862 |
3863 |7: // Possible table write barrier for the value. Skip valiswhite check.
3864 | barrierback TAB:RB, ITYPE
3865 | jmp <3
3866 break;
3867 case BC_TSETB:
3868 | ins_ABC // RA = src, RB = table, RC = byte literal
3869 | mov TAB:RB, [BASE+RB*8]
3870 | checktab TAB:RB, ->vmeta_tsetb
3871 | cmp RCd, TAB:RB->asize
3872 | jae ->vmeta_tsetb
3873 | shl RCd, 3
3874 | add RC, TAB:RB->array
3875 | cmp aword [RC], LJ_TNIL
3876 | je >3 // Previous value is nil?
3877 |1:
3878 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3879 | jnz >7
3880 |2: // Set array slot.
3881 | mov ITYPE, [BASE+RA*8]
3882 | mov [RC], ITYPE
3883 | ins_next
3884 |
3885 |3: // Check for __newindex if previous value is nil.
3886 | mov TAB:TMPR, TAB:RB->metatable
3887 | test TAB:TMPR, TAB:TMPR
3888 | jz <1
3889 | test byte TAB:TMPR->nomm, 1<<MM_newindex
3890 | jz ->vmeta_tsetb // 'no __newindex' flag NOT set: check.
3891 | jmp <1
3892 |
3893 |7: // Possible table write barrier for the value. Skip valiswhite check.
3894 | barrierback TAB:RB, TMPR
3895 | jmp <2
3896 break;
3897 case BC_TSETR:
3898 | ins_ABC // RA = src, RB = table, RC = key
3899 | mov TAB:RB, [BASE+RB*8]
3900 | cleartp TAB:RB
3901 |.if DUALNUM
3902 | mov RC, [BASE+RC*8]
3903 |.else
3904 | cvttsd2si RCd, qword [BASE+RC*8]
3905 |.endif
3906 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3907 | jnz >7
3908 |2:
3909 | cmp RCd, TAB:RB->asize
3910 | jae ->vmeta_tsetr
3911 | shl RCd, 3
3912 | add RC, TAB:RB->array
3913 | // Set array slot.
3914 |->BC_TSETR_Z:
3915 | mov ITYPE, [BASE+RA*8]
3916 | mov [RC], ITYPE
3917 | ins_next
3918 |
3919 |7: // Possible table write barrier for the value. Skip valiswhite check.
3920 | barrierback TAB:RB, TMPR
3921 | jmp <2
3922 break;
3923
3924 case BC_TSETM:
3925 | ins_AD // RA = base (table at base-1), RD = num const (start index)
3926 |1:
3927 | mov TMPRd, dword [KBASE+RD*8] // Integer constant is in lo-word.
3928 | lea RA, [BASE+RA*8]
3929 | mov TAB:RB, [RA-8] // Guaranteed to be a table.
3930 | cleartp TAB:RB
3931 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3932 | jnz >7
3933 |2:
3934 | mov RDd, MULTRES
3935 | sub RDd, 1
3936 | jz >4 // Nothing to copy?
3937 | add RDd, TMPRd // Compute needed size.
3938 | cmp RDd, TAB:RB->asize
3939 | ja >5 // Doesn't fit into array part?
3940 | sub RDd, TMPRd
3941 | shl TMPRd, 3
3942 | add TMPR, TAB:RB->array
3943 |3: // Copy result slots to table.
3944 | mov RB, [RA]
3945 | add RA, 8
3946 | mov [TMPR], RB
3947 | add TMPR, 8
3948 | sub RDd, 1
3949 | jnz <3
3950 |4:
3951 | ins_next
3952 |
3953 |5: // Need to resize array part.
3954 | mov L:CARG1, SAVE_L
3955 | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE.
3956 | mov CARG2, TAB:RB
3957 | mov CARG3d, RDd
3958 | mov L:RB, L:CARG1
3959 | mov SAVE_PC, PC
3960 | call extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
3961 | mov BASE, L:RB->base
3962 | movzx RAd, PC_RA // Restore RA.
3963 | movzx RDd, PC_RD // Restore RD.
3964 | jmp <1 // Retry.
3965 |
3966 |7: // Possible table write barrier for any value. Skip valiswhite check.
3967 | barrierback TAB:RB, RD
3968 | jmp <2
3969 break;
3970
3971 /* -- Calls and vararg handling ----------------------------------------- */
3972
3973 case BC_CALL: case BC_CALLM:
3974 | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
3975 if (op == BC_CALLM) {
3976 | add NARGS:RDd, MULTRES
3977 }
3978 | mov LFUNC:RB, [BASE+RA*8]
3979 | checkfunc LFUNC:RB, ->vmeta_call_ra
3980 | lea BASE, [BASE+RA*8+16]
3981 | ins_call
3982 break;
3983
3984 case BC_CALLMT:
3985 | ins_AD // RA = base, RD = extra_nargs
3986 | add NARGS:RDd, MULTRES
3987 | // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op.
3988 break;
3989 case BC_CALLT:
3990 | ins_AD // RA = base, RD = nargs+1
3991 | lea RA, [BASE+RA*8+16]
3992 | mov KBASE, BASE // Use KBASE for move + vmeta_call hint.
3993 | mov LFUNC:RB, [RA-16]
3994 | checktp_nc LFUNC:RB, LJ_TFUNC, ->vmeta_call
3995 |->BC_CALLT_Z:
3996 | mov PC, [BASE-8]
3997 | test PCd, FRAME_TYPE
3998 | jnz >7
3999 |1:
4000 | mov [BASE-16], LFUNC:RB // Copy func+tag down, reloaded below.
4001 | mov MULTRES, NARGS:RDd
4002 | sub NARGS:RDd, 1
4003 | jz >3
4004 |2: // Move args down.
4005 | mov RB, [RA]
4006 | add RA, 8
4007 | mov [KBASE], RB
4008 | add KBASE, 8
4009 | sub NARGS:RDd, 1
4010 | jnz <2
4011 |
4012 | mov LFUNC:RB, [BASE-16]
4013 |3:
4014 | cleartp LFUNC:RB
4015 | mov NARGS:RDd, MULTRES
4016 | cmp byte LFUNC:RB->ffid, 1 // (> FF_C) Calling a fast function?
4017 | ja >5
4018 |4:
4019 | ins_callt
4020 |
4021 |5: // Tailcall to a fast function.
4022 | test PCd, FRAME_TYPE // Lua frame below?
4023 | jnz <4
4024 | movzx RAd, PC_RA
4025 | neg RA
4026 | mov LFUNC:KBASE, [BASE+RA*8-32] // Need to prepare KBASE.
4027 | cleartp LFUNC:KBASE
4028 | mov KBASE, LFUNC:KBASE->pc
4029 | mov KBASE, [KBASE+PC2PROTO(k)]
4030 | jmp <4
4031 |
4032 |7: // Tailcall from a vararg function.
4033 | sub PC, FRAME_VARG
4034 | test PCd, FRAME_TYPEP
4035 | jnz >8 // Vararg frame below?
4036 | sub BASE, PC // Need to relocate BASE/KBASE down.
4037 | mov KBASE, BASE
4038 | mov PC, [BASE-8]
4039 | jmp <1
4040 |8:
4041 | add PCd, FRAME_VARG
4042 | jmp <1
4043 break;
4044
4045 case BC_ITERC:
4046 | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1)
4047 | lea RA, [BASE+RA*8+16] // fb = base+2
4048 | mov RB, [RA-32] // Copy state. fb[0] = fb[-4].
4049 | mov RC, [RA-24] // Copy control var. fb[1] = fb[-3].
4050 | mov [RA], RB
4051 | mov [RA+8], RC
4052 | mov LFUNC:RB, [RA-40] // Copy callable. fb[-2] = fb[-5]
4053 | mov [RA-16], LFUNC:RB
4054 | mov NARGS:RDd, 2+1 // Handle like a regular 2-arg call.
4055 | checkfunc LFUNC:RB, ->vmeta_call
4056 | mov BASE, RA
4057 | ins_call
4058 break;
4059
4060 case BC_ITERN:
4061 | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
4062 |.if JIT
4063 | // NYI: add hotloop, record BC_ITERN.
4064 |.endif
4065 | mov TAB:RB, [BASE+RA*8-16]
4066 | cleartp TAB:RB
4067 | mov RCd, [BASE+RA*8-8] // Get index from control var.
4068 | mov TMPRd, TAB:RB->asize
4069 | add PC, 4
4070 | mov ITYPE, TAB:RB->array
4071 |1: // Traverse array part.
4072 | cmp RCd, TMPRd; jae >5 // Index points after array part?
4073 | cmp aword [ITYPE+RC*8], LJ_TNIL; je >4
4074 |.if not DUALNUM
4075 | cvtsi2sd xmm0, RCd
4076 |.endif
4077 | // Copy array slot to returned value.
4078 | mov RB, [ITYPE+RC*8]
4079 | mov [BASE+RA*8+8], RB
4080 | // Return array index as a numeric key.
4081 |.if DUALNUM
4082 | setint ITYPE, RC
4083 | mov [BASE+RA*8], ITYPE
4084 |.else
4085 | movsd qword [BASE+RA*8], xmm0
4086 |.endif
4087 | add RCd, 1
4088 | mov [BASE+RA*8-8], RCd // Update control var.
4089 |2:
4090 | movzx RDd, PC_RD // Get target from ITERL.
4091 | branchPC RD
4092 |3:
4093 | ins_next
4094 |
4095 |4: // Skip holes in array part.
4096 | add RCd, 1
4097 | jmp <1
4098 |
4099 |5: // Traverse hash part.
4100 | sub RCd, TMPRd
4101 |6:
4102 | cmp RCd, TAB:RB->hmask; ja <3 // End of iteration? Branch to ITERL+1.
4103 | imul ITYPEd, RCd, #NODE
4104 | add NODE:ITYPE, TAB:RB->node
4105 | cmp aword NODE:ITYPE->val, LJ_TNIL; je >7
4106 | lea TMPRd, [RCd+TMPRd+1]
4107 | // Copy key and value from hash slot.
4108 | mov RB, NODE:ITYPE->key
4109 | mov RC, NODE:ITYPE->val
4110 | mov [BASE+RA*8], RB
4111 | mov [BASE+RA*8+8], RC
4112 | mov [BASE+RA*8-8], TMPRd
4113 | jmp <2
4114 |
4115 |7: // Skip holes in hash part.
4116 | add RCd, 1
4117 | jmp <6
4118 break;
4119
4120 case BC_ISNEXT:
4121 | ins_AD // RA = base, RD = target (points to ITERN)
4122 | mov CFUNC:RB, [BASE+RA*8-24]
4123 | checkfunc CFUNC:RB, >5
4124 | checktptp [BASE+RA*8-16], LJ_TTAB, >5
4125 | cmp aword [BASE+RA*8-8], LJ_TNIL; jne >5
4126 | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5
4127 | branchPC RD
4128 | mov64 TMPR, U64x(fffe7fff, 00000000)
4129 | mov [BASE+RA*8-8], TMPR // Initialize control var.
4130 |1:
4131 | ins_next
4132 |5: // Despecialize bytecode if any of the checks fail.
4133 | mov PC_OP, BC_JMP
4134 | branchPC RD
4135 | mov byte [PC], BC_ITERC
4136 | jmp <1
4137 break;
4138
4139 case BC_VARG:
4140 | ins_ABC // RA = base, RB = nresults+1, RC = numparams
4141 | lea TMPR, [BASE+RC*8+(16+FRAME_VARG)]
4142 | lea RA, [BASE+RA*8]
4143 | sub TMPR, [BASE-8]
4144 | // Note: TMPR may now be even _above_ BASE if nargs was < numparams.
4145 | test RB, RB
4146 | jz >5 // Copy all varargs?
4147 | lea RB, [RA+RB*8-8]
4148 | cmp TMPR, BASE // No vararg slots?
4149 | jnb >2
4150 |1: // Copy vararg slots to destination slots.
4151 | mov RC, [TMPR-16]
4152 | add TMPR, 8
4153 | mov [RA], RC
4154 | add RA, 8
4155 | cmp RA, RB // All destination slots filled?
4156 | jnb >3
4157 | cmp TMPR, BASE // No more vararg slots?
4158 | jb <1
4159 |2: // Fill up remainder with nil.
4160 | mov aword [RA], LJ_TNIL
4161 | add RA, 8
4162 | cmp RA, RB
4163 | jb <2
4164 |3:
4165 | ins_next
4166 |
4167 |5: // Copy all varargs.
4168 | mov MULTRES, 1 // MULTRES = 0+1
4169 | mov RC, BASE
4170 | sub RC, TMPR
4171 | jbe <3 // No vararg slots?
4172 | mov RBd, RCd
4173 | shr RBd, 3
4174 | add RBd, 1
4175 | mov MULTRES, RBd // MULTRES = #varargs+1
4176 | mov L:RB, SAVE_L
4177 | add RC, RA
4178 | cmp RC, L:RB->maxstack
4179 | ja >7 // Need to grow stack?
4180 |6: // Copy all vararg slots.
4181 | mov RC, [TMPR-16]
4182 | add TMPR, 8
4183 | mov [RA], RC
4184 | add RA, 8
4185 | cmp TMPR, BASE // No more vararg slots?
4186 | jb <6
4187 | jmp <3
4188 |
4189 |7: // Grow stack for varargs.
4190 | mov L:RB->base, BASE
4191 | mov L:RB->top, RA
4192 | mov SAVE_PC, PC
4193 | sub TMPR, BASE // Need delta, because BASE may change.
4194 | mov TMP1hi, TMPRd
4195 | mov CARG2d, MULTRES
4196 | sub CARG2d, 1
4197 | mov CARG1, L:RB
4198 | call extern lj_state_growstack // (lua_State *L, int n)
4199 | mov BASE, L:RB->base
4200 | movsxd TMPR, TMP1hi
4201 | mov RA, L:RB->top
4202 | add TMPR, BASE
4203 | jmp <6
4204 break;
4205
4206 /* -- Returns ----------------------------------------------------------- */
4207
4208 case BC_RETM:
4209 | ins_AD // RA = results, RD = extra_nresults
4210 | add RDd, MULTRES // MULTRES >=1, so RD >=1.
4211 | // Fall through. Assumes BC_RET follows and ins_AD is a no-op.
4212 break;
4213
4214 case BC_RET: case BC_RET0: case BC_RET1:
4215 | ins_AD // RA = results, RD = nresults+1
4216 if (op != BC_RET0) {
4217 | shl RAd, 3
4218 }
4219 |1:
4220 | mov PC, [BASE-8]
4221 | mov MULTRES, RDd // Save nresults+1.
4222 | test PCd, FRAME_TYPE // Check frame type marker.
4223 | jnz >7 // Not returning to a fixarg Lua func?
4224 switch (op) {
4225 case BC_RET:
4226 |->BC_RET_Z:
4227 | mov KBASE, BASE // Use KBASE for result move.
4228 | sub RDd, 1
4229 | jz >3
4230 |2: // Move results down.
4231 | mov RB, [KBASE+RA]
4232 | mov [KBASE-16], RB
4233 | add KBASE, 8
4234 | sub RDd, 1
4235 | jnz <2
4236 |3:
4237 | mov RDd, MULTRES // Note: MULTRES may be >255.
4238 | movzx RBd, PC_RB // So cannot compare with RDL!
4239 |5:
4240 | cmp RBd, RDd // More results expected?
4241 | ja >6
4242 break;
4243 case BC_RET1:
4244 | mov RB, [BASE+RA]
4245 | mov [BASE-16], RB
4246 /* fallthrough */
4247 case BC_RET0:
4248 |5:
4249 | cmp PC_RB, RDL // More results expected?
4250 | ja >6
4251 default:
4252 break;
4253 }
4254 | movzx RAd, PC_RA
4255 | neg RA
4256 | lea BASE, [BASE+RA*8-16] // base = base - (RA+2)*8
4257 | mov LFUNC:KBASE, [BASE-16]
4258 | cleartp LFUNC:KBASE
4259 | mov KBASE, LFUNC:KBASE->pc
4260 | mov KBASE, [KBASE+PC2PROTO(k)]
4261 | ins_next
4262 |
4263 |6: // Fill up results with nil.
4264 if (op == BC_RET) {
4265 | mov aword [KBASE-16], LJ_TNIL // Note: relies on shifted base.
4266 | add KBASE, 8
4267 } else {
4268 | mov aword [BASE+RD*8-24], LJ_TNIL
4269 }
4270 | add RD, 1
4271 | jmp <5
4272 |
4273 |7: // Non-standard return case.
4274 | lea RB, [PC-FRAME_VARG]
4275 | test RBd, FRAME_TYPEP
4276 | jnz ->vm_return
4277 | // Return from vararg function: relocate BASE down and RA up.
4278 | sub BASE, RB
4279 if (op != BC_RET0) {
4280 | add RA, RB
4281 }
4282 | jmp <1
4283 break;
4284
4285 /* -- Loops and branches ------------------------------------------------ */
4286
4287 |.define FOR_IDX, [RA]
4288 |.define FOR_STOP, [RA+8]
4289 |.define FOR_STEP, [RA+16]
4290 |.define FOR_EXT, [RA+24]
4291
4292 case BC_FORL:
4293 |.if JIT
4294 | hotloop RBd
4295 |.endif
4296 | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op.
4297 break;
4298
4299 case BC_JFORI:
4300 case BC_JFORL:
4301#if !LJ_HASJIT
4302 break;
4303#endif
4304 case BC_FORI:
4305 case BC_IFORL:
4306 vk = (op == BC_IFORL || op == BC_JFORL);
4307 | ins_AJ // RA = base, RD = target (after end of loop or start of loop)
4308 | lea RA, [BASE+RA*8]
4309 if (LJ_DUALNUM) {
4310 | mov RB, FOR_IDX
4311 | checkint RB, >9
4312 | mov TMPR, FOR_STOP
4313 if (!vk) {
4314 | checkint TMPR, ->vmeta_for
4315 | mov ITYPE, FOR_STEP
4316 | test ITYPEd, ITYPEd; js >5
4317 | sar ITYPE, 47;
4318 | cmp ITYPEd, LJ_TISNUM; jne ->vmeta_for
4319 } else {
4320#ifdef LUA_USE_ASSERT
4321 | checkinttp FOR_STOP, ->assert_bad_for_arg_type
4322 | checkinttp FOR_STEP, ->assert_bad_for_arg_type
4323#endif
4324 | mov ITYPE, FOR_STEP
4325 | test ITYPEd, ITYPEd; js >5
4326 | add RBd, ITYPEd; jo >1
4327 | setint RB
4328 | mov FOR_IDX, RB
4329 }
4330 | cmp RBd, TMPRd
4331 | mov FOR_EXT, RB
4332 if (op == BC_FORI) {
4333 | jle >7
4334 |1:
4335 |6:
4336 | branchPC RD
4337 } else if (op == BC_JFORI) {
4338 | branchPC RD
4339 | movzx RDd, PC_RD
4340 | jle =>BC_JLOOP
4341 |1:
4342 |6:
4343 } else if (op == BC_IFORL) {
4344 | jg >7
4345 |6:
4346 | branchPC RD
4347 |1:
4348 } else {
4349 | jle =>BC_JLOOP
4350 |1:
4351 |6:
4352 }
4353 |7:
4354 | ins_next
4355 |
4356 |5: // Invert check for negative step.
4357 if (!vk) {
4358 | sar ITYPE, 47;
4359 | cmp ITYPEd, LJ_TISNUM; jne ->vmeta_for
4360 } else {
4361 | add RBd, ITYPEd; jo <1
4362 | setint RB
4363 | mov FOR_IDX, RB
4364 }
4365 | cmp RBd, TMPRd
4366 | mov FOR_EXT, RB
4367 if (op == BC_FORI) {
4368 | jge <7
4369 } else if (op == BC_JFORI) {
4370 | branchPC RD
4371 | movzx RDd, PC_RD
4372 | jge =>BC_JLOOP
4373 } else if (op == BC_IFORL) {
4374 | jl <7
4375 } else {
4376 | jge =>BC_JLOOP
4377 }
4378 | jmp <6
4379 |9: // Fallback to FP variant.
4380 if (!vk) {
4381 | jae ->vmeta_for
4382 }
4383 } else if (!vk) {
4384 | checknumtp FOR_IDX, ->vmeta_for
4385 }
4386 if (!vk) {
4387 | checknumtp FOR_STOP, ->vmeta_for
4388 } else {
4389#ifdef LUA_USE_ASSERT
4390 | checknumtp FOR_STOP, ->assert_bad_for_arg_type
4391 | checknumtp FOR_STEP, ->assert_bad_for_arg_type
4392#endif
4393 }
4394 | mov RB, FOR_STEP
4395 if (!vk) {
4396 | checknum RB, ->vmeta_for
4397 }
4398 | movsd xmm0, qword FOR_IDX
4399 | movsd xmm1, qword FOR_STOP
4400 if (vk) {
4401 | addsd xmm0, qword FOR_STEP
4402 | movsd qword FOR_IDX, xmm0
4403 | test RB, RB; js >3
4404 } else {
4405 | jl >3
4406 }
4407 | ucomisd xmm1, xmm0
4408 |1:
4409 | movsd qword FOR_EXT, xmm0
4410 if (op == BC_FORI) {
4411 |.if DUALNUM
4412 | jnb <7
4413 |.else
4414 | jnb >2
4415 | branchPC RD
4416 |.endif
4417 } else if (op == BC_JFORI) {
4418 | branchPC RD
4419 | movzx RDd, PC_RD
4420 | jnb =>BC_JLOOP
4421 } else if (op == BC_IFORL) {
4422 |.if DUALNUM
4423 | jb <7
4424 |.else
4425 | jb >2
4426 | branchPC RD
4427 |.endif
4428 } else {
4429 | jnb =>BC_JLOOP
4430 }
4431 |.if DUALNUM
4432 | jmp <6
4433 |.else
4434 |2:
4435 | ins_next
4436 |.endif
4437 |
4438 |3: // Invert comparison if step is negative.
4439 | ucomisd xmm0, xmm1
4440 | jmp <1
4441 break;
4442
4443 case BC_ITERL:
4444 |.if JIT
4445 | hotloop RBd
4446 |.endif
4447 | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op.
4448 break;
4449
4450 case BC_JITERL:
4451#if !LJ_HASJIT
4452 break;
4453#endif
4454 case BC_IITERL:
4455 | ins_AJ // RA = base, RD = target
4456 | lea RA, [BASE+RA*8]
4457 | mov RB, [RA]
4458 | cmp RB, LJ_TNIL; je >1 // Stop if iterator returned nil.
4459 if (op == BC_JITERL) {
4460 | mov [RA-8], RB
4461 | jmp =>BC_JLOOP
4462 } else {
4463 | branchPC RD // Otherwise save control var + branch.
4464 | mov [RA-8], RB
4465 }
4466 |1:
4467 | ins_next
4468 break;
4469
4470 case BC_LOOP:
4471 | ins_A // RA = base, RD = target (loop extent)
4472 | // Note: RA/RD is only used by trace recorder to determine scope/extent
4473 | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
4474 |.if JIT
4475 | hotloop RBd
4476 |.endif
4477 | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
4478 break;
4479
4480 case BC_ILOOP:
4481 | ins_A // RA = base, RD = target (loop extent)
4482 | ins_next
4483 break;
4484
4485 case BC_JLOOP:
4486 |.if JIT
4487 | ins_AD // RA = base (ignored), RD = traceno
4488 | mov RA, [DISPATCH+DISPATCH_J(trace)]
4489 | mov TRACE:RD, [RA+RD*8]
4490 | mov RD, TRACE:RD->mcode
4491 | mov L:RB, SAVE_L
4492 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
4493 | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB
4494 | // Save additional callee-save registers only used in compiled code.
4495 |.if X64WIN
4496 | mov CSAVE_4, r12
4497 | mov CSAVE_3, r13
4498 | mov CSAVE_2, r14
4499 | mov CSAVE_1, r15
4500 | mov RA, rsp
4501 | sub rsp, 10*16+4*8
4502 | movdqa [RA-1*16], xmm6
4503 | movdqa [RA-2*16], xmm7
4504 | movdqa [RA-3*16], xmm8
4505 | movdqa [RA-4*16], xmm9
4506 | movdqa [RA-5*16], xmm10
4507 | movdqa [RA-6*16], xmm11
4508 | movdqa [RA-7*16], xmm12
4509 | movdqa [RA-8*16], xmm13
4510 | movdqa [RA-9*16], xmm14
4511 | movdqa [RA-10*16], xmm15
4512 |.else
4513 | sub rsp, 16
4514 | mov [rsp+16], r12
4515 | mov [rsp+8], r13
4516 |.endif
4517 | jmp RD
4518 |.endif
4519 break;
4520
4521 case BC_JMP:
4522 | ins_AJ // RA = unused, RD = target
4523 | branchPC RD
4524 | ins_next
4525 break;
4526
4527 /* -- Function headers -------------------------------------------------- */
4528
4529 /*
4530 ** Reminder: A function may be called with func/args above L->maxstack,
4531 ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot,
4532 ** too. This means all FUNC* ops (including fast functions) must check
4533 ** for stack overflow _before_ adding more slots!
4534 */
4535
4536 case BC_FUNCF:
4537 |.if JIT
4538 | hotcall RBd
4539 |.endif
4540 case BC_FUNCV: /* NYI: compiled vararg functions. */
4541 | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op.
4542 break;
4543
4544 case BC_JFUNCF:
4545#if !LJ_HASJIT
4546 break;
4547#endif
4548 case BC_IFUNCF:
4549 | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
4550 | mov KBASE, [PC-4+PC2PROTO(k)]
4551 | mov L:RB, SAVE_L
4552 | lea RA, [BASE+RA*8] // Top of frame.
4553 | cmp RA, L:RB->maxstack
4554 | ja ->vm_growstack_f
4555 | movzx RAd, byte [PC-4+PC2PROTO(numparams)]
4556 | cmp NARGS:RDd, RAd // Check for missing parameters.
4557 | jbe >3
4558 |2:
4559 if (op == BC_JFUNCF) {
4560 | movzx RDd, PC_RD
4561 | jmp =>BC_JLOOP
4562 } else {
4563 | ins_next
4564 }
4565 |
4566 |3: // Clear missing parameters.
4567 | mov aword [BASE+NARGS:RD*8-8], LJ_TNIL
4568 | add NARGS:RDd, 1
4569 | cmp NARGS:RDd, RAd
4570 | jbe <3
4571 | jmp <2
4572 break;
4573
4574 case BC_JFUNCV:
4575#if !LJ_HASJIT
4576 break;
4577#endif
4578 | int3 // NYI: compiled vararg functions
4579 break; /* NYI: compiled vararg functions. */
4580
4581 case BC_IFUNCV:
4582 | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
4583 | lea RBd, [NARGS:RD*8+FRAME_VARG+8]
4584 | lea RD, [BASE+NARGS:RD*8+8]
4585 | mov LFUNC:KBASE, [BASE-16]
4586 | mov [RD-8], RB // Store delta + FRAME_VARG.
4587 | mov [RD-16], LFUNC:KBASE // Store copy of LFUNC.
4588 | mov L:RB, SAVE_L
4589 | lea RA, [RD+RA*8]
4590 | cmp RA, L:RB->maxstack
4591 | ja ->vm_growstack_v // Need to grow stack.
4592 | mov RA, BASE
4593 | mov BASE, RD
4594 | movzx RBd, byte [PC-4+PC2PROTO(numparams)]
4595 | test RBd, RBd
4596 | jz >2
4597 | add RA, 8
4598 |1: // Copy fixarg slots up to new frame.
4599 | add RA, 8
4600 | cmp RA, BASE
4601 | jnb >3 // Less args than parameters?
4602 | mov KBASE, [RA-16]
4603 | mov [RD], KBASE
4604 | add RD, 8
4605 | mov aword [RA-16], LJ_TNIL // Clear old fixarg slot (help the GC).
4606 | sub RBd, 1
4607 | jnz <1
4608 |2:
4609 if (op == BC_JFUNCV) {
4610 | movzx RDd, PC_RD
4611 | jmp =>BC_JLOOP
4612 } else {
4613 | mov KBASE, [PC-4+PC2PROTO(k)]
4614 | ins_next
4615 }
4616 |
4617 |3: // Clear missing parameters.
4618 | mov aword [RD], LJ_TNIL
4619 | add RD, 8
4620 | sub RBd, 1
4621 | jnz <3
4622 | jmp <2
4623 break;
4624
4625 case BC_FUNCC:
4626 case BC_FUNCCW:
4627 | ins_AD // BASE = new base, RA = ins RA|RD (unused), RD = nargs+1
4628 | mov CFUNC:RB, [BASE-16]
4629 | cleartp CFUNC:RB
4630 | mov KBASE, CFUNC:RB->f
4631 | mov L:RB, SAVE_L
4632 | lea RD, [BASE+NARGS:RD*8-8]
4633 | mov L:RB->base, BASE
4634 | lea RA, [RD+8*LUA_MINSTACK]
4635 | cmp RA, L:RB->maxstack
4636 | mov L:RB->top, RD
4637 if (op == BC_FUNCC) {
4638 | mov CARG1, L:RB // Caveat: CARG1 may be RA.
4639 } else {
4640 | mov CARG2, KBASE
4641 | mov CARG1, L:RB // Caveat: CARG1 may be RA.
4642 }
4643 | ja ->vm_growstack_c // Need to grow stack.
4644 | set_vmstate C
4645 if (op == BC_FUNCC) {
4646 | call KBASE // (lua_State *L)
4647 } else {
4648 | // (lua_State *L, lua_CFunction f)
4649 | call aword [DISPATCH+DISPATCH_GL(wrapf)]
4650 }
4651 | // nresults returned in eax (RD).
4652 | mov BASE, L:RB->base
4653 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
4654 | set_vmstate INTERP
4655 | lea RA, [BASE+RD*8]
4656 | neg RA
4657 | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
4658 | mov PC, [BASE-8] // Fetch PC of caller.
4659 | jmp ->vm_returnc
4660 break;
4661
4662 /* ---------------------------------------------------------------------- */
4663
4664 default:
4665 fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
4666 exit(2);
4667 break;
4668 }
4669}
4670
4671static int build_backend(BuildCtx *ctx)
4672{
4673 int op;
4674 dasm_growpc(Dst, BC__MAX);
4675 build_subroutines(ctx);
4676 |.code_op
4677 for (op = 0; op < BC__MAX; op++)
4678 build_ins(ctx, (BCOp)op, op);
4679 return BC__MAX;
4680}
4681
4682/* Emit pseudo frame-info for all assembler functions. */
4683static void emit_asm_debug(BuildCtx *ctx)
4684{
4685 int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
4686 switch (ctx->mode) {
4687 case BUILD_elfasm:
4688 fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
4689 fprintf(ctx->fp,
4690 ".Lframe0:\n"
4691 "\t.long .LECIE0-.LSCIE0\n"
4692 ".LSCIE0:\n"
4693 "\t.long 0xffffffff\n"
4694 "\t.byte 0x1\n"
4695 "\t.string \"\"\n"
4696 "\t.uleb128 0x1\n"
4697 "\t.sleb128 -8\n"
4698 "\t.byte 0x10\n"
4699 "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n"
4700 "\t.byte 0x80+0x10\n\t.uleb128 0x1\n"
4701 "\t.align 8\n"
4702 ".LECIE0:\n\n");
4703 fprintf(ctx->fp,
4704 ".LSFDE0:\n"
4705 "\t.long .LEFDE0-.LASFDE0\n"
4706 ".LASFDE0:\n"
4707 "\t.long .Lframe0\n"
4708 "\t.quad .Lbegin\n"
4709 "\t.quad %d\n"
4710 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
4711 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4712 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4713 "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
4714 "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
4715#if LJ_NO_UNWIND
4716 "\t.byte 0x8d\n\t.uleb128 0x6\n" /* offset r13 */
4717 "\t.byte 0x8c\n\t.uleb128 0x7\n" /* offset r12 */
4718#endif
4719 "\t.align 8\n"
4720 ".LEFDE0:\n\n", fcofs, CFRAME_SIZE);
4721#if LJ_HASFFI
4722 fprintf(ctx->fp,
4723 ".LSFDE1:\n"
4724 "\t.long .LEFDE1-.LASFDE1\n"
4725 ".LASFDE1:\n"
4726 "\t.long .Lframe0\n"
4727 "\t.quad lj_vm_ffi_call\n"
4728 "\t.quad %d\n"
4729 "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */
4730 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4731 "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */
4732 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4733 "\t.align 8\n"
4734 ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
4735#endif
4736#if !LJ_NO_UNWIND
4737#if (defined(__sun__) && defined(__svr4__))
4738 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n");
4739#else
4740 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
4741#endif
4742 fprintf(ctx->fp,
4743 ".Lframe1:\n"
4744 "\t.long .LECIE1-.LSCIE1\n"
4745 ".LSCIE1:\n"
4746 "\t.long 0\n"
4747 "\t.byte 0x1\n"
4748 "\t.string \"zPR\"\n"
4749 "\t.uleb128 0x1\n"
4750 "\t.sleb128 -8\n"
4751 "\t.byte 0x10\n"
4752 "\t.uleb128 6\n" /* augmentation length */
4753 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4754 "\t.long lj_err_unwind_dwarf-.\n"
4755 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4756 "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n"
4757 "\t.byte 0x80+0x10\n\t.uleb128 0x1\n"
4758 "\t.align 8\n"
4759 ".LECIE1:\n\n");
4760 fprintf(ctx->fp,
4761 ".LSFDE2:\n"
4762 "\t.long .LEFDE2-.LASFDE2\n"
4763 ".LASFDE2:\n"
4764 "\t.long .LASFDE2-.Lframe1\n"
4765 "\t.long .Lbegin-.\n"
4766 "\t.long %d\n"
4767 "\t.uleb128 0\n" /* augmentation length */
4768 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
4769 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4770 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4771 "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
4772 "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
4773 "\t.align 8\n"
4774 ".LEFDE2:\n\n", fcofs, CFRAME_SIZE);
4775#if LJ_HASFFI
4776 fprintf(ctx->fp,
4777 ".Lframe2:\n"
4778 "\t.long .LECIE2-.LSCIE2\n"
4779 ".LSCIE2:\n"
4780 "\t.long 0\n"
4781 "\t.byte 0x1\n"
4782 "\t.string \"zR\"\n"
4783 "\t.uleb128 0x1\n"
4784 "\t.sleb128 -8\n"
4785 "\t.byte 0x10\n"
4786 "\t.uleb128 1\n" /* augmentation length */
4787 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4788 "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n"
4789 "\t.byte 0x80+0x10\n\t.uleb128 0x1\n"
4790 "\t.align 8\n"
4791 ".LECIE2:\n\n");
4792 fprintf(ctx->fp,
4793 ".LSFDE3:\n"
4794 "\t.long .LEFDE3-.LASFDE3\n"
4795 ".LASFDE3:\n"
4796 "\t.long .LASFDE3-.Lframe2\n"
4797 "\t.long lj_vm_ffi_call-.\n"
4798 "\t.long %d\n"
4799 "\t.uleb128 0\n" /* augmentation length */
4800 "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */
4801 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
4802 "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */
4803 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
4804 "\t.align 8\n"
4805 ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
4806#endif
4807#endif
4808 break;
4809#if !LJ_NO_UNWIND
4810 /* Mental note: never let Apple design an assembler.
4811 ** Or a linker. Or a plastic case. But I digress.
4812 */
4813 case BUILD_machasm: {
4814#if LJ_HASFFI
4815 int fcsize = 0;
4816#endif
4817 int i;
4818 fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n");
4819 fprintf(ctx->fp,
4820 "EH_frame1:\n"
4821 "\t.set L$set$x,LECIEX-LSCIEX\n"
4822 "\t.long L$set$x\n"
4823 "LSCIEX:\n"
4824 "\t.long 0\n"
4825 "\t.byte 0x1\n"
4826 "\t.ascii \"zPR\\0\"\n"
4827 "\t.byte 0x1\n"
4828 "\t.byte 128-8\n"
4829 "\t.byte 0x10\n"
4830 "\t.byte 6\n" /* augmentation length */
4831 "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */
4832 "\t.long _lj_err_unwind_dwarf+4@GOTPCREL\n"
4833 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4834 "\t.byte 0xc\n\t.byte 0x7\n\t.byte 8\n"
4835 "\t.byte 0x80+0x10\n\t.byte 0x1\n"
4836 "\t.align 3\n"
4837 "LECIEX:\n\n");
4838 for (i = 0; i < ctx->nsym; i++) {
4839 const char *name = ctx->sym[i].name;
4840 int32_t size = ctx->sym[i+1].ofs - ctx->sym[i].ofs;
4841 if (size == 0) continue;
4842#if LJ_HASFFI
4843 if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; }
4844#endif
4845 fprintf(ctx->fp,
4846 "%s.eh:\n"
4847 "LSFDE%d:\n"
4848 "\t.set L$set$%d,LEFDE%d-LASFDE%d\n"
4849 "\t.long L$set$%d\n"
4850 "LASFDE%d:\n"
4851 "\t.long LASFDE%d-EH_frame1\n"
4852 "\t.long %s-.\n"
4853 "\t.long %d\n"
4854 "\t.byte 0\n" /* augmentation length */
4855 "\t.byte 0xe\n\t.byte %d\n" /* def_cfa_offset */
4856 "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */
4857 "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */
4858 "\t.byte 0x8f\n\t.byte 0x4\n" /* offset r15 */
4859 "\t.byte 0x8e\n\t.byte 0x5\n" /* offset r14 */
4860 "\t.align 3\n"
4861 "LEFDE%d:\n\n",
4862 name, i, i, i, i, i, i, i, name, size, CFRAME_SIZE, i);
4863 }
4864#if LJ_HASFFI
4865 if (fcsize) {
4866 fprintf(ctx->fp,
4867 "EH_frame2:\n"
4868 "\t.set L$set$y,LECIEY-LSCIEY\n"
4869 "\t.long L$set$y\n"
4870 "LSCIEY:\n"
4871 "\t.long 0\n"
4872 "\t.byte 0x1\n"
4873 "\t.ascii \"zR\\0\"\n"
4874 "\t.byte 0x1\n"
4875 "\t.byte 128-8\n"
4876 "\t.byte 0x10\n"
4877 "\t.byte 1\n" /* augmentation length */
4878 "\t.byte 0x1b\n" /* pcrel|sdata4 */
4879 "\t.byte 0xc\n\t.byte 0x7\n\t.byte 8\n"
4880 "\t.byte 0x80+0x10\n\t.byte 0x1\n"
4881 "\t.align 3\n"
4882 "LECIEY:\n\n");
4883 fprintf(ctx->fp,
4884 "_lj_vm_ffi_call.eh:\n"
4885 "LSFDEY:\n"
4886 "\t.set L$set$yy,LEFDEY-LASFDEY\n"
4887 "\t.long L$set$yy\n"
4888 "LASFDEY:\n"
4889 "\t.long LASFDEY-EH_frame2\n"
4890 "\t.long _lj_vm_ffi_call-.\n"
4891 "\t.long %d\n"
4892 "\t.byte 0\n" /* augmentation length */
4893 "\t.byte 0xe\n\t.byte 16\n" /* def_cfa_offset */
4894 "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */
4895 "\t.byte 0xd\n\t.byte 0x6\n" /* def_cfa_register rbp */
4896 "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */
4897 "\t.align 3\n"
4898 "LEFDEY:\n\n", fcsize);
4899 }
4900#endif
4901 fprintf(ctx->fp, ".subsections_via_symbols\n");
4902 }
4903 break;
4904#endif
4905 default: /* Difficult for other modes. */
4906 break;
4907 }
4908}
4909
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
index 2ccc671f..1965b06b 100644
--- a/src/vm_x86.dasc
+++ b/src/vm_x86.dasc
@@ -18,7 +18,6 @@
18| 18|
19|.if P64 19|.if P64
20|.define X64, 1 20|.define X64, 1
21|.define SSE, 1
22|.if WIN 21|.if WIN
23|.define X64WIN, 1 22|.define X64WIN, 1
24|.endif 23|.endif
@@ -116,24 +115,74 @@
116|.type NODE, Node 115|.type NODE, Node
117|.type NARGS, int 116|.type NARGS, int
118|.type TRACE, GCtrace 117|.type TRACE, GCtrace
118|.type SBUF, SBuf
119| 119|
120|// Stack layout while in interpreter. Must match with lj_frame.h. 120|// Stack layout while in interpreter. Must match with lj_frame.h.
121|//----------------------------------------------------------------------- 121|//-----------------------------------------------------------------------
122|.if not X64 // x86 stack layout. 122|.if not X64 // x86 stack layout.
123| 123|
124|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--). 124|.if WIN
125|
126|.define CFRAME_SPACE, aword*9 // Delta for esp (see <--).
125|.macro saveregs_ 127|.macro saveregs_
126| push edi; push esi; push ebx 128| push edi; push esi; push ebx
129| push extern lj_err_unwind_win
130| fs; push dword [0]
131| fs; mov [0], esp
127| sub esp, CFRAME_SPACE 132| sub esp, CFRAME_SPACE
128|.endmacro 133|.endmacro
129|.macro saveregs 134|.macro restoreregs
130| push ebp; saveregs_ 135| add esp, CFRAME_SPACE
136| fs; pop dword [0]
137| pop edi // Short for esp += 4.
138| pop ebx; pop esi; pop edi; pop ebp
139|.endmacro
140|
141|.else
142|
143|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--).
144|.macro saveregs_
145| push edi; push esi; push ebx
146| sub esp, CFRAME_SPACE
131|.endmacro 147|.endmacro
132|.macro restoreregs 148|.macro restoreregs
133| add esp, CFRAME_SPACE 149| add esp, CFRAME_SPACE
134| pop ebx; pop esi; pop edi; pop ebp 150| pop ebx; pop esi; pop edi; pop ebp
135|.endmacro 151|.endmacro
136| 152|
153|.endif
154|
155|.macro saveregs
156| push ebp; saveregs_
157|.endmacro
158|
159|.if WIN
160|.define SAVE_ERRF, aword [esp+aword*19] // vm_pcall/vm_cpcall only.
161|.define SAVE_NRES, aword [esp+aword*18]
162|.define SAVE_CFRAME, aword [esp+aword*17]
163|.define SAVE_L, aword [esp+aword*16]
164|//----- 16 byte aligned, ^^^ arguments from C caller
165|.define SAVE_RET, aword [esp+aword*15] //<-- esp entering interpreter.
166|.define SAVE_R4, aword [esp+aword*14]
167|.define SAVE_R3, aword [esp+aword*13]
168|.define SAVE_R2, aword [esp+aword*12]
169|//----- 16 byte aligned
170|.define SAVE_R1, aword [esp+aword*11]
171|.define SEH_FUNC, aword [esp+aword*10]
172|.define SEH_NEXT, aword [esp+aword*9] //<-- esp after register saves.
173|.define UNUSED2, aword [esp+aword*8]
174|//----- 16 byte aligned
175|.define UNUSED1, aword [esp+aword*7]
176|.define SAVE_PC, aword [esp+aword*6]
177|.define TMP2, aword [esp+aword*5]
178|.define TMP1, aword [esp+aword*4]
179|//----- 16 byte aligned
180|.define ARG4, aword [esp+aword*3]
181|.define ARG3, aword [esp+aword*2]
182|.define ARG2, aword [esp+aword*1]
183|.define ARG1, aword [esp] //<-- esp while in interpreter.
184|//----- 16 byte aligned, ^^^ arguments for C callee
185|.else
137|.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only. 186|.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only.
138|.define SAVE_NRES, aword [esp+aword*14] 187|.define SAVE_NRES, aword [esp+aword*14]
139|.define SAVE_CFRAME, aword [esp+aword*13] 188|.define SAVE_CFRAME, aword [esp+aword*13]
@@ -154,6 +203,7 @@
154|.define ARG2, aword [esp+aword*1] 203|.define ARG2, aword [esp+aword*1]
155|.define ARG1, aword [esp] //<-- esp while in interpreter. 204|.define ARG1, aword [esp] //<-- esp while in interpreter.
156|//----- 16 byte aligned, ^^^ arguments for C callee 205|//----- 16 byte aligned, ^^^ arguments for C callee
206|.endif
157| 207|
158|// FPARGx overlaps ARGx and ARG(x+1) on x86. 208|// FPARGx overlaps ARGx and ARG(x+1) on x86.
159|.define FPARG3, qword [esp+qword*1] 209|.define FPARG3, qword [esp+qword*1]
@@ -389,7 +439,6 @@
389| fpop 439| fpop
390|.endmacro 440|.endmacro
391| 441|
392|.macro fdup; fld st0; .endmacro
393|.macro fpop1; fstp st1; .endmacro 442|.macro fpop1; fstp st1; .endmacro
394| 443|
395|// Synthesize SSE FP constants. 444|// Synthesize SSE FP constants.
@@ -555,6 +604,10 @@ static void build_subroutines(BuildCtx *ctx)
555 |.else 604 |.else
556 | mov eax, FCARG2 // Error return status for vm_pcall. 605 | mov eax, FCARG2 // Error return status for vm_pcall.
557 | mov esp, FCARG1 606 | mov esp, FCARG1
607 |.if WIN
608 | lea FCARG1, SEH_NEXT
609 | fs; mov [0], FCARG1
610 |.endif
558 |.endif 611 |.endif
559 |->vm_unwind_c_eh: // Landing pad for external unwinder. 612 |->vm_unwind_c_eh: // Landing pad for external unwinder.
560 | mov L:RB, SAVE_L 613 | mov L:RB, SAVE_L
@@ -578,6 +631,10 @@ static void build_subroutines(BuildCtx *ctx)
578 |.else 631 |.else
579 | and FCARG1, CFRAME_RAWMASK 632 | and FCARG1, CFRAME_RAWMASK
580 | mov esp, FCARG1 633 | mov esp, FCARG1
634 |.if WIN
635 | lea FCARG1, SEH_NEXT
636 | fs; mov [0], FCARG1
637 |.endif
581 |.endif 638 |.endif
582 |->vm_unwind_ff_eh: // Landing pad for external unwinder. 639 |->vm_unwind_ff_eh: // Landing pad for external unwinder.
583 | mov L:RB, SAVE_L 640 | mov L:RB, SAVE_L
@@ -591,6 +648,19 @@ static void build_subroutines(BuildCtx *ctx)
591 | set_vmstate INTERP 648 | set_vmstate INTERP
592 | jmp ->vm_returnc // Increments RD/MULTRES and returns. 649 | jmp ->vm_returnc // Increments RD/MULTRES and returns.
593 | 650 |
651 |.if WIN and not X64
652 |->vm_rtlunwind@16: // Thin layer around RtlUnwind.
653 | // (void *cframe, void *excptrec, void *unwinder, int errcode)
654 | mov [esp], FCARG1 // Return value for RtlUnwind.
655 | push FCARG2 // Exception record for RtlUnwind.
656 | push 0 // Ignored by RtlUnwind.
657 | push dword [FCARG1+CFRAME_OFS_SEH]
658 | call extern RtlUnwind@16 // Violates ABI (clobbers too much).
659 | mov FCARG1, eax
660 | mov FCARG2, [esp+4] // errcode (for vm_unwind_c).
661 | ret // Jump to unwinder.
662 |.endif
663 |
594 |//----------------------------------------------------------------------- 664 |//-----------------------------------------------------------------------
595 |//-- Grow stack for calls ----------------------------------------------- 665 |//-- Grow stack for calls -----------------------------------------------
596 |//----------------------------------------------------------------------- 666 |//-----------------------------------------------------------------------
@@ -646,17 +716,18 @@ static void build_subroutines(BuildCtx *ctx)
646 | lea KBASEa, [esp+CFRAME_RESUME] 716 | lea KBASEa, [esp+CFRAME_RESUME]
647 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. 717 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
648 | add DISPATCH, GG_G2DISP 718 | add DISPATCH, GG_G2DISP
649 | mov L:RB->cframe, KBASEa
650 | mov SAVE_PC, RD // Any value outside of bytecode is ok. 719 | mov SAVE_PC, RD // Any value outside of bytecode is ok.
651 | mov SAVE_CFRAME, RDa 720 | mov SAVE_CFRAME, RDa
652 |.if X64 721 |.if X64
653 | mov SAVE_NRES, RD 722 | mov SAVE_NRES, RD
654 | mov SAVE_ERRF, RD 723 | mov SAVE_ERRF, RD
655 |.endif 724 |.endif
725 | mov L:RB->cframe, KBASEa
656 | cmp byte L:RB->status, RDL 726 | cmp byte L:RB->status, RDL
657 | je >3 // Initial resume (like a call). 727 | je >2 // Initial resume (like a call).
658 | 728 |
659 | // Resume after yield (like a return). 729 | // Resume after yield (like a return).
730 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
660 | set_vmstate INTERP 731 | set_vmstate INTERP
661 | mov byte L:RB->status, RDL 732 | mov byte L:RB->status, RDL
662 | mov BASE, L:RB->base 733 | mov BASE, L:RB->base
@@ -696,20 +767,19 @@ static void build_subroutines(BuildCtx *ctx)
696 | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME! 767 | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME!
697 |.endif 768 |.endif
698 | 769 |
770 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
699 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. 771 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain.
700 | mov SAVE_CFRAME, KBASEa 772 | mov SAVE_CFRAME, KBASEa
701 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. 773 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
774 | add DISPATCH, GG_G2DISP
702 |.if X64 775 |.if X64
703 | mov L:RB->cframe, rsp 776 | mov L:RB->cframe, rsp
704 |.else 777 |.else
705 | mov L:RB->cframe, esp 778 | mov L:RB->cframe, esp
706 |.endif 779 |.endif
707 | 780 |
708 |2: // Entry point for vm_cpcall below (RA = base, RB = L, PC = ftype). 781 |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
709 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. 782 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
710 | add DISPATCH, GG_G2DISP
711 |
712 |3: // Entry point for vm_resume above (RA = base, RB = L, PC = ftype).
713 | set_vmstate INTERP 783 | set_vmstate INTERP
714 | mov BASE, L:RB->base // BASE = old base (used in vmeta_call). 784 | mov BASE, L:RB->base // BASE = old base (used in vmeta_call).
715 | add PC, RA 785 | add PC, RA
@@ -747,14 +817,17 @@ static void build_subroutines(BuildCtx *ctx)
747 | 817 |
748 | mov KBASE, L:RB->stack // Compute -savestack(L, L->top). 818 | mov KBASE, L:RB->stack // Compute -savestack(L, L->top).
749 | sub KBASE, L:RB->top 819 | sub KBASE, L:RB->top
820 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
750 | mov SAVE_ERRF, 0 // No error function. 821 | mov SAVE_ERRF, 0 // No error function.
751 | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame. 822 | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame.
823 | add DISPATCH, GG_G2DISP
752 | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). 824 | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
753 | 825 |
754 |.if X64 826 |.if X64
755 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. 827 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain.
756 | mov SAVE_CFRAME, KBASEa 828 | mov SAVE_CFRAME, KBASEa
757 | mov L:RB->cframe, rsp 829 | mov L:RB->cframe, rsp
830 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
758 | 831 |
759 | call CARG4 // (lua_State *L, lua_CFunction func, void *ud) 832 | call CARG4 // (lua_State *L, lua_CFunction func, void *ud)
760 |.else 833 |.else
@@ -765,6 +838,7 @@ static void build_subroutines(BuildCtx *ctx)
765 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. 838 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
766 | mov SAVE_CFRAME, KBASE 839 | mov SAVE_CFRAME, KBASE
767 | mov L:RB->cframe, esp 840 | mov L:RB->cframe, esp
841 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
768 | 842 |
769 | call BASE // (lua_State *L, lua_CFunction func, void *ud) 843 | call BASE // (lua_State *L, lua_CFunction func, void *ud)
770 |.endif 844 |.endif
@@ -872,13 +946,9 @@ static void build_subroutines(BuildCtx *ctx)
872 |.if DUALNUM 946 |.if DUALNUM
873 | mov TMP2, LJ_TISNUM 947 | mov TMP2, LJ_TISNUM
874 | mov TMP1, RC 948 | mov TMP1, RC
875 |.elif SSE 949 |.else
876 | cvtsi2sd xmm0, RC 950 | cvtsi2sd xmm0, RC
877 | movsd TMPQ, xmm0 951 | movsd TMPQ, xmm0
878 |.else
879 | mov ARG4, RC
880 | fild ARG4
881 | fstp TMPQ
882 |.endif 952 |.endif
883 | lea RCa, TMPQ // Store temp. TValue in TMPQ. 953 | lea RCa, TMPQ // Store temp. TValue in TMPQ.
884 | jmp >1 954 | jmp >1
@@ -932,6 +1002,19 @@ static void build_subroutines(BuildCtx *ctx)
932 | mov NARGS:RD, 2+1 // 2 args for func(t, k). 1002 | mov NARGS:RD, 2+1 // 2 args for func(t, k).
933 | jmp ->vm_call_dispatch_f 1003 | jmp ->vm_call_dispatch_f
934 | 1004 |
1005 |->vmeta_tgetr:
1006 | mov FCARG1, TAB:RB
1007 | mov RB, BASE // Save BASE.
1008 | mov FCARG2, RC // Caveat: FCARG2 == BASE
1009 | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key)
1010 | // cTValue * or NULL returned in eax (RC).
1011 | movzx RA, PC_RA
1012 | mov BASE, RB // Restore BASE.
1013 | test RC, RC
1014 | jnz ->BC_TGETR_Z
1015 | mov dword [BASE+RA*8+4], LJ_TNIL
1016 | jmp ->BC_TGETR2_Z
1017 |
935 |//----------------------------------------------------------------------- 1018 |//-----------------------------------------------------------------------
936 | 1019 |
937 |->vmeta_tsets: 1020 |->vmeta_tsets:
@@ -951,13 +1034,9 @@ static void build_subroutines(BuildCtx *ctx)
951 |.if DUALNUM 1034 |.if DUALNUM
952 | mov TMP2, LJ_TISNUM 1035 | mov TMP2, LJ_TISNUM
953 | mov TMP1, RC 1036 | mov TMP1, RC
954 |.elif SSE 1037 |.else
955 | cvtsi2sd xmm0, RC 1038 | cvtsi2sd xmm0, RC
956 | movsd TMPQ, xmm0 1039 | movsd TMPQ, xmm0
957 |.else
958 | mov ARG4, RC
959 | fild ARG4
960 | fstp TMPQ
961 |.endif 1040 |.endif
962 | lea RCa, TMPQ // Store temp. TValue in TMPQ. 1041 | lea RCa, TMPQ // Store temp. TValue in TMPQ.
963 | jmp >1 1042 | jmp >1
@@ -1023,6 +1102,33 @@ static void build_subroutines(BuildCtx *ctx)
1023 | mov NARGS:RD, 3+1 // 3 args for func(t, k, v). 1102 | mov NARGS:RD, 3+1 // 3 args for func(t, k, v).
1024 | jmp ->vm_call_dispatch_f 1103 | jmp ->vm_call_dispatch_f
1025 | 1104 |
1105 |->vmeta_tsetr:
1106 |.if X64WIN
1107 | mov L:CARG1d, SAVE_L
1108 | mov CARG3d, RC
1109 | mov L:CARG1d->base, BASE
1110 | xchg CARG2d, TAB:RB // Caveat: CARG2d == BASE.
1111 |.elif X64
1112 | mov L:CARG1d, SAVE_L
1113 | mov CARG2d, TAB:RB
1114 | mov L:CARG1d->base, BASE
1115 | mov RB, BASE // Save BASE.
1116 | mov CARG3d, RC // Caveat: CARG3d == BASE.
1117 |.else
1118 | mov L:RA, SAVE_L
1119 | mov ARG2, TAB:RB
1120 | mov RB, BASE // Save BASE.
1121 | mov ARG3, RC
1122 | mov ARG1, L:RA
1123 | mov L:RA->base, BASE
1124 |.endif
1125 | mov SAVE_PC, PC
1126 | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
1127 | // TValue * returned in eax (RC).
1128 | movzx RA, PC_RA
1129 | mov BASE, RB // Restore BASE.
1130 | jmp ->BC_TSETR_Z
1131 |
1026 |//-- Comparison metamethods --------------------------------------------- 1132 |//-- Comparison metamethods ---------------------------------------------
1027 | 1133 |
1028 |->vmeta_comp: 1134 |->vmeta_comp:
@@ -1117,6 +1223,26 @@ static void build_subroutines(BuildCtx *ctx)
1117 | jmp <3 1223 | jmp <3
1118 |.endif 1224 |.endif
1119 | 1225 |
1226 |->vmeta_istype:
1227 |.if X64
1228 | mov L:RB, SAVE_L
1229 | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
1230 | mov CARG2d, RA
1231 | movzx CARG3d, PC_RD
1232 | mov L:CARG1d, L:RB
1233 |.else
1234 | movzx RD, PC_RD
1235 | mov ARG2, RA
1236 | mov L:RB, SAVE_L
1237 | mov ARG3, RD
1238 | mov ARG1, L:RB
1239 | mov L:RB->base, BASE
1240 |.endif
1241 | mov SAVE_PC, PC
1242 | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
1243 | mov BASE, L:RB->base
1244 | jmp <6
1245 |
1120 |//-- Arithmetic metamethods --------------------------------------------- 1246 |//-- Arithmetic metamethods ---------------------------------------------
1121 | 1247 |
1122 |->vmeta_arith_vno: 1248 |->vmeta_arith_vno:
@@ -1289,19 +1415,6 @@ static void build_subroutines(BuildCtx *ctx)
1289 | cmp NARGS:RD, 2+1; jb ->fff_fallback 1415 | cmp NARGS:RD, 2+1; jb ->fff_fallback
1290 |.endmacro 1416 |.endmacro
1291 | 1417 |
1292 |.macro .ffunc_n, name
1293 | .ffunc_1 name
1294 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1295 | fld qword [BASE]
1296 |.endmacro
1297 |
1298 |.macro .ffunc_n, name, op
1299 | .ffunc_1 name
1300 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1301 | op
1302 | fld qword [BASE]
1303 |.endmacro
1304 |
1305 |.macro .ffunc_nsse, name, op 1418 |.macro .ffunc_nsse, name, op
1306 | .ffunc_1 name 1419 | .ffunc_1 name
1307 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 1420 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
@@ -1312,14 +1425,6 @@ static void build_subroutines(BuildCtx *ctx)
1312 | .ffunc_nsse name, movsd 1425 | .ffunc_nsse name, movsd
1313 |.endmacro 1426 |.endmacro
1314 | 1427 |
1315 |.macro .ffunc_nn, name
1316 | .ffunc_2 name
1317 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1318 | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback
1319 | fld qword [BASE]
1320 | fld qword [BASE+8]
1321 |.endmacro
1322 |
1323 |.macro .ffunc_nnsse, name 1428 |.macro .ffunc_nnsse, name
1324 | .ffunc_2 name 1429 | .ffunc_2 name
1325 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 1430 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
@@ -1525,11 +1630,7 @@ static void build_subroutines(BuildCtx *ctx)
1525 |.else 1630 |.else
1526 | jae ->fff_fallback 1631 | jae ->fff_fallback
1527 |.endif 1632 |.endif
1528 |.if SSE
1529 | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0 1633 | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0
1530 |.else
1531 | fld qword [BASE]; jmp ->fff_resn
1532 |.endif
1533 | 1634 |
1534 |.ffunc_1 tostring 1635 |.ffunc_1 tostring
1535 | // Only handles the string or number case inline. 1636 | // Only handles the string or number case inline.
@@ -1554,9 +1655,9 @@ static void build_subroutines(BuildCtx *ctx)
1554 |.endif 1655 |.endif
1555 | mov L:FCARG1, L:RB 1656 | mov L:FCARG1, L:RB
1556 |.if DUALNUM 1657 |.if DUALNUM
1557 | call extern lj_str_fromnumber@8 // (lua_State *L, cTValue *o) 1658 | call extern lj_strfmt_number@8 // (lua_State *L, cTValue *o)
1558 |.else 1659 |.else
1559 | call extern lj_str_fromnum@8 // (lua_State *L, lua_Number *np) 1660 | call extern lj_strfmt_num@8 // (lua_State *L, lua_Number *np)
1560 |.endif 1661 |.endif
1561 | // GCstr returned in eax (RD). 1662 | // GCstr returned in eax (RD).
1562 | mov BASE, L:RB->base 1663 | mov BASE, L:RB->base
@@ -1647,19 +1748,12 @@ static void build_subroutines(BuildCtx *ctx)
1647 | add RD, 1 1748 | add RD, 1
1648 | mov dword [BASE-4], LJ_TISNUM 1749 | mov dword [BASE-4], LJ_TISNUM
1649 | mov dword [BASE-8], RD 1750 | mov dword [BASE-8], RD
1650 |.elif SSE 1751 |.else
1651 | movsd xmm0, qword [BASE+8] 1752 | movsd xmm0, qword [BASE+8]
1652 | sseconst_1 xmm1, RBa 1753 | sseconst_1 xmm1, RBa
1653 | addsd xmm0, xmm1 1754 | addsd xmm0, xmm1
1654 | cvtsd2si RD, xmm0 1755 | cvttsd2si RD, xmm0
1655 | movsd qword [BASE-8], xmm0 1756 | movsd qword [BASE-8], xmm0
1656 |.else
1657 | fld qword [BASE+8]
1658 | fld1
1659 | faddp st1
1660 | fist ARG1
1661 | fstp qword [BASE-8]
1662 | mov RD, ARG1
1663 |.endif 1757 |.endif
1664 | mov TAB:RB, [BASE] 1758 | mov TAB:RB, [BASE]
1665 | cmp RD, TAB:RB->asize; jae >2 // Not in array part? 1759 | cmp RD, TAB:RB->asize; jae >2 // Not in array part?
@@ -1706,12 +1800,9 @@ static void build_subroutines(BuildCtx *ctx)
1706 |.if DUALNUM 1800 |.if DUALNUM
1707 | mov dword [BASE+12], LJ_TISNUM 1801 | mov dword [BASE+12], LJ_TISNUM
1708 | mov dword [BASE+8], 0 1802 | mov dword [BASE+8], 0
1709 |.elif SSE 1803 |.else
1710 | xorps xmm0, xmm0 1804 | xorps xmm0, xmm0
1711 | movsd qword [BASE+8], xmm0 1805 | movsd qword [BASE+8], xmm0
1712 |.else
1713 | fldz
1714 | fstp qword [BASE+8]
1715 |.endif 1806 |.endif
1716 | mov RD, 1+3 1807 | mov RD, 1+3
1717 | jmp ->fff_res 1808 | jmp ->fff_res
@@ -1818,7 +1909,6 @@ static void build_subroutines(BuildCtx *ctx)
1818 | mov ARG3, RA 1909 | mov ARG3, RA
1819 |.endif 1910 |.endif
1820 | call ->vm_resume // (lua_State *L, TValue *base, 0, 0) 1911 | call ->vm_resume // (lua_State *L, TValue *base, 0, 0)
1821 | set_vmstate INTERP
1822 | 1912 |
1823 | mov L:RB, SAVE_L 1913 | mov L:RB, SAVE_L
1824 |.if X64 1914 |.if X64
@@ -1827,6 +1917,9 @@ static void build_subroutines(BuildCtx *ctx)
1827 | mov L:PC, ARG1 // The callee doesn't modify SAVE_L. 1917 | mov L:PC, ARG1 // The callee doesn't modify SAVE_L.
1828 |.endif 1918 |.endif
1829 | mov BASE, L:RB->base 1919 | mov BASE, L:RB->base
1920 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
1921 | set_vmstate INTERP
1922 |
1830 | cmp eax, LUA_YIELD 1923 | cmp eax, LUA_YIELD
1831 | ja >8 1924 | ja >8
1832 |4: 1925 |4:
@@ -1941,12 +2034,10 @@ static void build_subroutines(BuildCtx *ctx)
1941 |->fff_resi: // Dummy. 2034 |->fff_resi: // Dummy.
1942 |.endif 2035 |.endif
1943 | 2036 |
1944 |.if SSE
1945 |->fff_resn: 2037 |->fff_resn:
1946 | mov PC, [BASE-4] 2038 | mov PC, [BASE-4]
1947 | fstp qword [BASE-8] 2039 | fstp qword [BASE-8]
1948 | jmp ->fff_res1 2040 | jmp ->fff_res1
1949 |.endif
1950 | 2041 |
1951 | .ffunc_1 math_abs 2042 | .ffunc_1 math_abs
1952 |.if DUALNUM 2043 |.if DUALNUM
@@ -1970,8 +2061,6 @@ static void build_subroutines(BuildCtx *ctx)
1970 |.else 2061 |.else
1971 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 2062 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1972 |.endif 2063 |.endif
1973 |
1974 |.if SSE
1975 | movsd xmm0, qword [BASE] 2064 | movsd xmm0, qword [BASE]
1976 | sseconst_abs xmm1, RDa 2065 | sseconst_abs xmm1, RDa
1977 | andps xmm0, xmm1 2066 | andps xmm0, xmm1
@@ -1979,15 +2068,6 @@ static void build_subroutines(BuildCtx *ctx)
1979 | mov PC, [BASE-4] 2068 | mov PC, [BASE-4]
1980 | movsd qword [BASE-8], xmm0 2069 | movsd qword [BASE-8], xmm0
1981 | // fallthrough 2070 | // fallthrough
1982 |.else
1983 | fld qword [BASE]
1984 | fabs
1985 | // fallthrough
1986 |->fff_resxmm0: // Dummy.
1987 |->fff_resn:
1988 | mov PC, [BASE-4]
1989 | fstp qword [BASE-8]
1990 |.endif
1991 | 2071 |
1992 |->fff_res1: 2072 |->fff_res1:
1993 | mov RD, 1+1 2073 | mov RD, 1+1
@@ -2014,6 +2094,12 @@ static void build_subroutines(BuildCtx *ctx)
2014 | mov RAa, -8 // Results start at BASE+RA = BASE-8. 2094 | mov RAa, -8 // Results start at BASE+RA = BASE-8.
2015 | jmp ->vm_return 2095 | jmp ->vm_return
2016 | 2096 |
2097 |.if X64
2098 |.define fff_resfp, fff_resxmm0
2099 |.else
2100 |.define fff_resfp, fff_resn
2101 |.endif
2102 |
2017 |.macro math_round, func 2103 |.macro math_round, func
2018 | .ffunc math_ .. func 2104 | .ffunc math_ .. func
2019 |.if DUALNUM 2105 |.if DUALNUM
@@ -2024,107 +2110,75 @@ static void build_subroutines(BuildCtx *ctx)
2024 |.else 2110 |.else
2025 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 2111 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
2026 |.endif 2112 |.endif
2027 |.if SSE
2028 | movsd xmm0, qword [BASE] 2113 | movsd xmm0, qword [BASE]
2029 | call ->vm_ .. func 2114 | call ->vm_ .. func .. _sse
2030 | .if DUALNUM 2115 |.if DUALNUM
2031 | cvtsd2si RB, xmm0 2116 | cvttsd2si RB, xmm0
2032 | cmp RB, 0x80000000 2117 | cmp RB, 0x80000000
2033 | jne ->fff_resi 2118 | jne ->fff_resi
2034 | cvtsi2sd xmm1, RB 2119 | cvtsi2sd xmm1, RB
2035 | ucomisd xmm0, xmm1 2120 | ucomisd xmm0, xmm1
2036 | jp ->fff_resxmm0 2121 | jp ->fff_resxmm0
2037 | je ->fff_resi 2122 | je ->fff_resi
2038 | .endif
2039 | jmp ->fff_resxmm0
2040 |.else
2041 | fld qword [BASE]
2042 | call ->vm_ .. func
2043 | .if DUALNUM
2044 | fist ARG1
2045 | mov RB, ARG1
2046 | cmp RB, 0x80000000; jne >2
2047 | fdup
2048 | fild ARG1
2049 | fcomparepp
2050 | jp ->fff_resn
2051 | jne ->fff_resn
2052 |2:
2053 | fpop
2054 | jmp ->fff_resi
2055 | .else
2056 | jmp ->fff_resn
2057 | .endif
2058 |.endif 2123 |.endif
2124 | jmp ->fff_resxmm0
2059 |.endmacro 2125 |.endmacro
2060 | 2126 |
2061 | math_round floor 2127 | math_round floor
2062 | math_round ceil 2128 | math_round ceil
2063 | 2129 |
2064 |.if SSE
2065 |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0 2130 |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0
2066 |.else
2067 |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn
2068 |.endif
2069 | 2131 |
2070 |.ffunc math_log 2132 |.ffunc math_log
2071 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. 2133 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
2072 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback 2134 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
2073 | fldln2; fld qword [BASE]; fyl2x; jmp ->fff_resn 2135 | movsd xmm0, qword [BASE]
2074 | 2136 |.if not X64
2075 |.ffunc_n math_log10, fldlg2; fyl2x; jmp ->fff_resn 2137 | movsd FPARG1, xmm0
2076 |.ffunc_n math_exp; call ->vm_exp_x87; jmp ->fff_resn 2138 |.endif
2077 | 2139 | mov RB, BASE
2078 |.ffunc_n math_sin; fsin; jmp ->fff_resn 2140 | call extern log
2079 |.ffunc_n math_cos; fcos; jmp ->fff_resn 2141 | mov BASE, RB
2080 |.ffunc_n math_tan; fptan; fpop; jmp ->fff_resn 2142 | jmp ->fff_resfp
2081 |
2082 |.ffunc_n math_asin
2083 | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fpatan
2084 | jmp ->fff_resn
2085 |.ffunc_n math_acos
2086 | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fxch; fpatan
2087 | jmp ->fff_resn
2088 |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn
2089 | 2143 |
2090 |.macro math_extern, func 2144 |.macro math_extern, func
2091 |.if SSE
2092 | .ffunc_nsse math_ .. func 2145 | .ffunc_nsse math_ .. func
2093 | .if not X64 2146 |.if not X64
2094 | movsd FPARG1, xmm0 2147 | movsd FPARG1, xmm0
2095 | .endif
2096 |.else
2097 | .ffunc_n math_ .. func
2098 | fstp FPARG1
2099 |.endif 2148 |.endif
2100 | mov RB, BASE 2149 | mov RB, BASE
2101 | call extern lj_vm_ .. func 2150 | call extern func
2102 | mov BASE, RB 2151 | mov BASE, RB
2103 | .if X64 2152 | jmp ->fff_resfp
2104 | jmp ->fff_resxmm0 2153 |.endmacro
2105 | .else 2154 |
2106 | jmp ->fff_resn 2155 |.macro math_extern2, func
2107 | .endif 2156 | .ffunc_nnsse math_ .. func
2157 |.if not X64
2158 | movsd FPARG1, xmm0
2159 | movsd FPARG3, xmm1
2160 |.endif
2161 | mov RB, BASE
2162 | call extern func
2163 | mov BASE, RB
2164 | jmp ->fff_resfp
2108 |.endmacro 2165 |.endmacro
2109 | 2166 |
2167 | math_extern log10
2168 | math_extern exp
2169 | math_extern sin
2170 | math_extern cos
2171 | math_extern tan
2172 | math_extern asin
2173 | math_extern acos
2174 | math_extern atan
2110 | math_extern sinh 2175 | math_extern sinh
2111 | math_extern cosh 2176 | math_extern cosh
2112 | math_extern tanh 2177 | math_extern tanh
2178 | math_extern2 pow
2179 | math_extern2 atan2
2180 | math_extern2 fmod
2113 | 2181 |
2114 |->ff_math_deg:
2115 |.if SSE
2116 |.ffunc_nsse math_rad
2117 | mov CFUNC:RB, [BASE-8]
2118 | mulsd xmm0, qword CFUNC:RB->upvalue[0]
2119 | jmp ->fff_resxmm0
2120 |.else
2121 |.ffunc_n math_rad
2122 | mov CFUNC:RB, [BASE-8]
2123 | fmul qword CFUNC:RB->upvalue[0]
2124 | jmp ->fff_resn
2125 |.endif
2126 |
2127 |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn
2128 |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn 2182 |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn
2129 | 2183 |
2130 |.ffunc_1 math_frexp 2184 |.ffunc_1 math_frexp
@@ -2139,65 +2193,34 @@ static void build_subroutines(BuildCtx *ctx)
2139 | cmp RB, 0x00200000; jb >4 2193 | cmp RB, 0x00200000; jb >4
2140 |1: 2194 |1:
2141 | shr RB, 21; sub RB, RC // Extract and unbias exponent. 2195 | shr RB, 21; sub RB, RC // Extract and unbias exponent.
2142 |.if SSE
2143 | cvtsi2sd xmm0, RB 2196 | cvtsi2sd xmm0, RB
2144 |.else
2145 | mov TMP1, RB; fild TMP1
2146 |.endif
2147 | mov RB, [BASE-4] 2197 | mov RB, [BASE-4]
2148 | and RB, 0x800fffff // Mask off exponent. 2198 | and RB, 0x800fffff // Mask off exponent.
2149 | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0. 2199 | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0.
2150 | mov [BASE-4], RB 2200 | mov [BASE-4], RB
2151 |2: 2201 |2:
2152 |.if SSE
2153 | movsd qword [BASE], xmm0 2202 | movsd qword [BASE], xmm0
2154 |.else
2155 | fstp qword [BASE]
2156 |.endif
2157 | mov RD, 1+2 2203 | mov RD, 1+2
2158 | jmp ->fff_res 2204 | jmp ->fff_res
2159 |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0. 2205 |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0.
2160 |.if SSE
2161 | xorps xmm0, xmm0; jmp <2 2206 | xorps xmm0, xmm0; jmp <2
2162 |.else
2163 | fldz; jmp <2
2164 |.endif
2165 |4: // Handle denormals by multiplying with 2^54 and adjusting the bias. 2207 |4: // Handle denormals by multiplying with 2^54 and adjusting the bias.
2166 |.if SSE
2167 | movsd xmm0, qword [BASE] 2208 | movsd xmm0, qword [BASE]
2168 | sseconst_hi xmm1, RBa, 43500000 // 2^54. 2209 | sseconst_hi xmm1, RBa, 43500000 // 2^54.
2169 | mulsd xmm0, xmm1 2210 | mulsd xmm0, xmm1
2170 | movsd qword [BASE-8], xmm0 2211 | movsd qword [BASE-8], xmm0
2171 |.else
2172 | fld qword [BASE]
2173 | mov TMP1, 0x5a800000; fmul TMP1 // x = x*2^54
2174 | fstp qword [BASE-8]
2175 |.endif
2176 | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1 2212 | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1
2177 | 2213 |
2178 |.if SSE
2179 |.ffunc_nsse math_modf 2214 |.ffunc_nsse math_modf
2180 |.else
2181 |.ffunc_n math_modf
2182 |.endif
2183 | mov RB, [BASE+4] 2215 | mov RB, [BASE+4]
2184 | mov PC, [BASE-4] 2216 | mov PC, [BASE-4]
2185 | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf? 2217 | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf?
2186 |.if SSE
2187 | movaps xmm4, xmm0 2218 | movaps xmm4, xmm0
2188 | call ->vm_trunc 2219 | call ->vm_trunc_sse
2189 | subsd xmm4, xmm0 2220 | subsd xmm4, xmm0
2190 |1: 2221 |1:
2191 | movsd qword [BASE-8], xmm0 2222 | movsd qword [BASE-8], xmm0
2192 | movsd qword [BASE], xmm4 2223 | movsd qword [BASE], xmm4
2193 |.else
2194 | fdup
2195 | call ->vm_trunc
2196 | fsub st1, st0
2197 |1:
2198 | fstp qword [BASE-8]
2199 | fstp qword [BASE]
2200 |.endif
2201 | mov RC, [BASE-4]; mov RB, [BASE+4] 2224 | mov RC, [BASE-4]; mov RB, [BASE+4]
2202 | xor RC, RB; js >3 // Need to adjust sign? 2225 | xor RC, RB; js >3 // Need to adjust sign?
2203 |2: 2226 |2:
@@ -2207,24 +2230,9 @@ static void build_subroutines(BuildCtx *ctx)
2207 | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction. 2230 | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction.
2208 | jmp <2 2231 | jmp <2
2209 |4: 2232 |4:
2210 |.if SSE
2211 | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. 2233 | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0.
2212 |.else
2213 | fldz; fxch; jmp <1 // Return +-Inf and +-0.
2214 |.endif
2215 |
2216 |.ffunc_nnr math_fmod
2217 |1: ; fprem; fnstsw ax; and ax, 0x400; jnz <1
2218 | fpop1
2219 | jmp ->fff_resn
2220 | 2234 |
2221 |.if SSE 2235 |.macro math_minmax, name, cmovop, sseop
2222 |.ffunc_nnsse math_pow; call ->vm_pow; jmp ->fff_resxmm0
2223 |.else
2224 |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn
2225 |.endif
2226 |
2227 |.macro math_minmax, name, cmovop, fcmovop, sseop
2228 | .ffunc name 2236 | .ffunc name
2229 | mov RA, 2 2237 | mov RA, 2
2230 | cmp dword [BASE+4], LJ_TISNUM 2238 | cmp dword [BASE+4], LJ_TISNUM
@@ -2241,12 +2249,7 @@ static void build_subroutines(BuildCtx *ctx)
2241 |3: 2249 |3:
2242 | ja ->fff_fallback 2250 | ja ->fff_fallback
2243 | // Convert intermediate result to number and continue below. 2251 | // Convert intermediate result to number and continue below.
2244 |.if SSE
2245 | cvtsi2sd xmm0, RB 2252 | cvtsi2sd xmm0, RB
2246 |.else
2247 | mov TMP1, RB
2248 | fild TMP1
2249 |.endif
2250 | jmp >6 2253 | jmp >6
2251 |4: 2254 |4:
2252 | ja ->fff_fallback 2255 | ja ->fff_fallback
@@ -2254,7 +2257,6 @@ static void build_subroutines(BuildCtx *ctx)
2254 | jae ->fff_fallback 2257 | jae ->fff_fallback
2255 |.endif 2258 |.endif
2256 | 2259 |
2257 |.if SSE
2258 | movsd xmm0, qword [BASE] 2260 | movsd xmm0, qword [BASE]
2259 |5: // Handle numbers or integers. 2261 |5: // Handle numbers or integers.
2260 | cmp RA, RD; jae ->fff_resxmm0 2262 | cmp RA, RD; jae ->fff_resxmm0
@@ -2273,48 +2275,13 @@ static void build_subroutines(BuildCtx *ctx)
2273 | sseop xmm0, xmm1 2275 | sseop xmm0, xmm1
2274 | add RA, 1 2276 | add RA, 1
2275 | jmp <5 2277 | jmp <5
2276 |.else
2277 | fld qword [BASE]
2278 |5: // Handle numbers or integers.
2279 | cmp RA, RD; jae ->fff_resn
2280 | cmp dword [BASE+RA*8-4], LJ_TISNUM
2281 |.if DUALNUM
2282 | jb >6
2283 | ja >9
2284 | fild dword [BASE+RA*8-8]
2285 | jmp >7
2286 |.else
2287 | jae >9
2288 |.endif
2289 |6:
2290 | fld qword [BASE+RA*8-8]
2291 |7:
2292 | fucomi st1; fcmovop st1; fpop1
2293 | add RA, 1
2294 | jmp <5
2295 |.endif
2296 |.endmacro 2278 |.endmacro
2297 | 2279 |
2298 | math_minmax math_min, cmovg, fcmovnbe, minsd 2280 | math_minmax math_min, cmovg, minsd
2299 | math_minmax math_max, cmovl, fcmovbe, maxsd 2281 | math_minmax math_max, cmovl, maxsd
2300 |.if not SSE
2301 |9:
2302 | fpop; jmp ->fff_fallback
2303 |.endif
2304 | 2282 |
2305 |//-- String library ----------------------------------------------------- 2283 |//-- String library -----------------------------------------------------
2306 | 2284 |
2307 |.ffunc_1 string_len
2308 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2309 | mov STR:RB, [BASE]
2310 |.if DUALNUM
2311 | mov RB, dword STR:RB->len; jmp ->fff_resi
2312 |.elif SSE
2313 | cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0
2314 |.else
2315 | fild dword STR:RB->len; jmp ->fff_resn
2316 |.endif
2317 |
2318 |.ffunc string_byte // Only handle the 1-arg case here. 2285 |.ffunc string_byte // Only handle the 1-arg case here.
2319 | cmp NARGS:RD, 1+1; jne ->fff_fallback 2286 | cmp NARGS:RD, 1+1; jne ->fff_fallback
2320 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2287 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
@@ -2325,10 +2292,8 @@ static void build_subroutines(BuildCtx *ctx)
2325 | movzx RB, byte STR:RB[1] 2292 | movzx RB, byte STR:RB[1]
2326 |.if DUALNUM 2293 |.if DUALNUM
2327 | jmp ->fff_resi 2294 | jmp ->fff_resi
2328 |.elif SSE
2329 | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
2330 |.else 2295 |.else
2331 | mov TMP1, RB; fild TMP1; jmp ->fff_resn 2296 | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
2332 |.endif 2297 |.endif
2333 | 2298 |
2334 |.ffunc string_char // Only handle the 1-arg case here. 2299 |.ffunc string_char // Only handle the 1-arg case here.
@@ -2340,16 +2305,11 @@ static void build_subroutines(BuildCtx *ctx)
2340 | mov RB, dword [BASE] 2305 | mov RB, dword [BASE]
2341 | cmp RB, 255; ja ->fff_fallback 2306 | cmp RB, 255; ja ->fff_fallback
2342 | mov TMP2, RB 2307 | mov TMP2, RB
2343 |.elif SSE 2308 |.else
2344 | jae ->fff_fallback 2309 | jae ->fff_fallback
2345 | cvttsd2si RB, qword [BASE] 2310 | cvttsd2si RB, qword [BASE]
2346 | cmp RB, 255; ja ->fff_fallback 2311 | cmp RB, 255; ja ->fff_fallback
2347 | mov TMP2, RB 2312 | mov TMP2, RB
2348 |.else
2349 | jae ->fff_fallback
2350 | fld qword [BASE]
2351 | fistp TMP2
2352 | cmp TMP2, 255; ja ->fff_fallback
2353 |.endif 2313 |.endif
2354 |.if X64 2314 |.if X64
2355 | mov TMP3, 1 2315 | mov TMP3, 1
@@ -2370,6 +2330,7 @@ static void build_subroutines(BuildCtx *ctx)
2370 |.endif 2330 |.endif
2371 | mov SAVE_PC, PC 2331 | mov SAVE_PC, PC
2372 | call extern lj_str_new // (lua_State *L, char *str, size_t l) 2332 | call extern lj_str_new // (lua_State *L, char *str, size_t l)
2333 |->fff_resstr:
2373 | // GCstr * returned in eax (RD). 2334 | // GCstr * returned in eax (RD).
2374 | mov BASE, L:RB->base 2335 | mov BASE, L:RB->base
2375 | mov PC, [BASE-4] 2336 | mov PC, [BASE-4]
@@ -2387,14 +2348,10 @@ static void build_subroutines(BuildCtx *ctx)
2387 | jne ->fff_fallback 2348 | jne ->fff_fallback
2388 | mov RB, dword [BASE+16] 2349 | mov RB, dword [BASE+16]
2389 | mov TMP2, RB 2350 | mov TMP2, RB
2390 |.elif SSE 2351 |.else
2391 | jae ->fff_fallback 2352 | jae ->fff_fallback
2392 | cvttsd2si RB, qword [BASE+16] 2353 | cvttsd2si RB, qword [BASE+16]
2393 | mov TMP2, RB 2354 | mov TMP2, RB
2394 |.else
2395 | jae ->fff_fallback
2396 | fld qword [BASE+16]
2397 | fistp TMP2
2398 |.endif 2355 |.endif
2399 |1: 2356 |1:
2400 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2357 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
@@ -2409,12 +2366,8 @@ static void build_subroutines(BuildCtx *ctx)
2409 | mov RB, STR:RB->len 2366 | mov RB, STR:RB->len
2410 |.if DUALNUM 2367 |.if DUALNUM
2411 | mov RA, dword [BASE+8] 2368 | mov RA, dword [BASE+8]
2412 |.elif SSE
2413 | cvttsd2si RA, qword [BASE+8]
2414 |.else 2369 |.else
2415 | fld qword [BASE+8] 2370 | cvttsd2si RA, qword [BASE+8]
2416 | fistp ARG3
2417 | mov RA, ARG3
2418 |.endif 2371 |.endif
2419 | mov RC, TMP2 2372 | mov RC, TMP2
2420 | cmp RB, RC // len < end? (unsigned compare) 2373 | cmp RB, RC // len < end? (unsigned compare)
@@ -2458,136 +2411,34 @@ static void build_subroutines(BuildCtx *ctx)
2458 | xor RC, RC // Zero length. Any ptr in RB is ok. 2411 | xor RC, RC // Zero length. Any ptr in RB is ok.
2459 | jmp <4 2412 | jmp <4
2460 | 2413 |
2461 |.ffunc string_rep // Only handle the 1-char case inline. 2414 |.macro ffstring_op, name
2462 | ffgccheck 2415 | .ffunc_1 string_ .. name
2463 | cmp NARGS:RD, 2+1; jne ->fff_fallback // Exactly 2 arguments.
2464 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2465 | cmp dword [BASE+12], LJ_TISNUM
2466 | mov STR:RB, [BASE]
2467 |.if DUALNUM
2468 | jne ->fff_fallback
2469 | mov RC, dword [BASE+8]
2470 |.elif SSE
2471 | jae ->fff_fallback
2472 | cvttsd2si RC, qword [BASE+8]
2473 |.else
2474 | jae ->fff_fallback
2475 | fld qword [BASE+8]
2476 | fistp TMP2
2477 | mov RC, TMP2
2478 |.endif
2479 | test RC, RC
2480 | jle ->fff_emptystr // Count <= 0? (or non-int)
2481 | cmp dword STR:RB->len, 1
2482 | jb ->fff_emptystr // Zero length string?
2483 | jne ->fff_fallback_2 // Fallback for > 1-char strings.
2484 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_2
2485 | movzx RA, byte STR:RB[1]
2486 | mov RB, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2487 |.if X64
2488 | mov TMP3, RC
2489 |.else
2490 | mov ARG3, RC
2491 |.endif
2492 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
2493 | mov [RB], RAL
2494 | add RB, 1
2495 | sub RC, 1
2496 | jnz <1
2497 | mov RD, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2498 | jmp ->fff_newstr
2499 |
2500 |.ffunc_1 string_reverse
2501 | ffgccheck 2416 | ffgccheck
2502 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback 2417 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2503 | mov STR:RB, [BASE] 2418 | mov L:RB, SAVE_L
2504 | mov RC, STR:RB->len 2419 | lea SBUF:FCARG1, [DISPATCH+DISPATCH_GL(tmpbuf)]
2505 | test RC, RC 2420 | mov L:RB->base, BASE
2506 | jz ->fff_emptystr // Zero length string? 2421 | mov STR:FCARG2, [BASE] // Caveat: FCARG2 == BASE
2507 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1 2422 | mov RC, SBUF:FCARG1->b
2508 | add RB, #STR 2423 | mov SBUF:FCARG1->L, L:RB
2509 | mov TMP2, PC // Need another temp register. 2424 | mov SBUF:FCARG1->p, RC
2510 |.if X64 2425 | mov SAVE_PC, PC
2511 | mov TMP3, RC 2426 | call extern lj_buf_putstr_ .. name .. @8
2512 |.else 2427 | mov FCARG1, eax
2513 | mov ARG3, RC 2428 | call extern lj_buf_tostr@4
2514 |.endif 2429 | jmp ->fff_resstr
2515 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2516 |1:
2517 | movzx RA, byte [RB]
2518 | add RB, 1
2519 | sub RC, 1
2520 | mov [PC+RC], RAL
2521 | jnz <1
2522 | mov RD, PC
2523 | mov PC, TMP2
2524 | jmp ->fff_newstr
2525 |
2526 |.macro ffstring_case, name, lo, hi
2527 | .ffunc_1 name
2528 | ffgccheck
2529 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2530 | mov STR:RB, [BASE]
2531 | mov RC, STR:RB->len
2532 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1
2533 | add RB, #STR
2534 | mov TMP2, PC // Need another temp register.
2535 |.if X64
2536 | mov TMP3, RC
2537 |.else
2538 | mov ARG3, RC
2539 |.endif
2540 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2541 | jmp >3
2542 |1: // ASCII case conversion. Yes, this is suboptimal code (do you care?).
2543 | movzx RA, byte [RB+RC]
2544 | cmp RA, lo
2545 | jb >2
2546 | cmp RA, hi
2547 | ja >2
2548 | xor RA, 0x20
2549 |2:
2550 | mov [PC+RC], RAL
2551 |3:
2552 | sub RC, 1
2553 | jns <1
2554 | mov RD, PC
2555 | mov PC, TMP2
2556 | jmp ->fff_newstr
2557 |.endmacro 2430 |.endmacro
2558 | 2431 |
2559 |ffstring_case string_lower, 0x41, 0x5a 2432 |ffstring_op reverse
2560 |ffstring_case string_upper, 0x61, 0x7a 2433 |ffstring_op lower
2561 | 2434 |ffstring_op upper
2562 |//-- Table library ------------------------------------------------------
2563 |
2564 |.ffunc_1 table_getn
2565 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
2566 | mov RB, BASE // Save BASE.
2567 | mov TAB:FCARG1, [BASE]
2568 | call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t)
2569 | // Length of table returned in eax (RD).
2570 | mov BASE, RB // Restore BASE.
2571 |.if DUALNUM
2572 | mov RB, RD; jmp ->fff_resi
2573 |.elif SSE
2574 | cvtsi2sd xmm0, RD; jmp ->fff_resxmm0
2575 |.else
2576 | mov ARG1, RD; fild ARG1; jmp ->fff_resn
2577 |.endif
2578 | 2435 |
2579 |//-- Bit library -------------------------------------------------------- 2436 |//-- Bit library --------------------------------------------------------
2580 | 2437 |
2581 |.define TOBIT_BIAS, 0x59c00000 // 2^52 + 2^51 (float, not double!).
2582 |
2583 |.macro .ffunc_bit, name, kind, fdef 2438 |.macro .ffunc_bit, name, kind, fdef
2584 | fdef name 2439 | fdef name
2585 |.if kind == 2 2440 |.if kind == 2
2586 |.if SSE
2587 | sseconst_tobit xmm1, RBa 2441 | sseconst_tobit xmm1, RBa
2588 |.else
2589 | mov TMP1, TOBIT_BIAS
2590 |.endif
2591 |.endif 2442 |.endif
2592 | cmp dword [BASE+4], LJ_TISNUM 2443 | cmp dword [BASE+4], LJ_TISNUM
2593 |.if DUALNUM 2444 |.if DUALNUM
@@ -2603,24 +2454,12 @@ static void build_subroutines(BuildCtx *ctx)
2603 |.else 2454 |.else
2604 | jae ->fff_fallback 2455 | jae ->fff_fallback
2605 |.endif 2456 |.endif
2606 |.if SSE
2607 | movsd xmm0, qword [BASE] 2457 | movsd xmm0, qword [BASE]
2608 |.if kind < 2 2458 |.if kind < 2
2609 | sseconst_tobit xmm1, RBa 2459 | sseconst_tobit xmm1, RBa
2610 |.endif 2460 |.endif
2611 | addsd xmm0, xmm1 2461 | addsd xmm0, xmm1
2612 | movd RB, xmm0 2462 | movd RB, xmm0
2613 |.else
2614 | fld qword [BASE]
2615 |.if kind < 2
2616 | mov TMP1, TOBIT_BIAS
2617 |.endif
2618 | fadd TMP1
2619 | fstp FPARG1
2620 |.if kind > 0
2621 | mov RB, ARG1
2622 |.endif
2623 |.endif
2624 |2: 2463 |2:
2625 |.endmacro 2464 |.endmacro
2626 | 2465 |
@@ -2629,15 +2468,7 @@ static void build_subroutines(BuildCtx *ctx)
2629 |.endmacro 2468 |.endmacro
2630 | 2469 |
2631 |.ffunc_bit bit_tobit, 0 2470 |.ffunc_bit bit_tobit, 0
2632 |.if DUALNUM or SSE
2633 |.if not SSE
2634 | mov RB, ARG1
2635 |.endif
2636 | jmp ->fff_resbit 2471 | jmp ->fff_resbit
2637 |.else
2638 | fild ARG1
2639 | jmp ->fff_resn
2640 |.endif
2641 | 2472 |
2642 |.macro .ffunc_bit_op, name, ins 2473 |.macro .ffunc_bit_op, name, ins
2643 | .ffunc_bit name, 2 2474 | .ffunc_bit name, 2
@@ -2657,17 +2488,10 @@ static void build_subroutines(BuildCtx *ctx)
2657 |.else 2488 |.else
2658 | jae ->fff_fallback_bit_op 2489 | jae ->fff_fallback_bit_op
2659 |.endif 2490 |.endif
2660 |.if SSE
2661 | movsd xmm0, qword [RD] 2491 | movsd xmm0, qword [RD]
2662 | addsd xmm0, xmm1 2492 | addsd xmm0, xmm1
2663 | movd RA, xmm0 2493 | movd RA, xmm0
2664 | ins RB, RA 2494 | ins RB, RA
2665 |.else
2666 | fld qword [RD]
2667 | fadd TMP1
2668 | fstp FPARG1
2669 | ins RB, ARG1
2670 |.endif
2671 | sub RD, 8 2495 | sub RD, 8
2672 | jmp <1 2496 | jmp <1
2673 |.endmacro 2497 |.endmacro
@@ -2684,15 +2508,10 @@ static void build_subroutines(BuildCtx *ctx)
2684 | not RB 2508 | not RB
2685 |.if DUALNUM 2509 |.if DUALNUM
2686 | jmp ->fff_resbit 2510 | jmp ->fff_resbit
2687 |.elif SSE 2511 |.else
2688 |->fff_resbit: 2512 |->fff_resbit:
2689 | cvtsi2sd xmm0, RB 2513 | cvtsi2sd xmm0, RB
2690 | jmp ->fff_resxmm0 2514 | jmp ->fff_resxmm0
2691 |.else
2692 |->fff_resbit:
2693 | mov ARG1, RB
2694 | fild ARG1
2695 | jmp ->fff_resn
2696 |.endif 2515 |.endif
2697 | 2516 |
2698 |->fff_fallback_bit_op: 2517 |->fff_fallback_bit_op:
@@ -2705,22 +2524,13 @@ static void build_subroutines(BuildCtx *ctx)
2705 | // Note: no inline conversion from number for 2nd argument! 2524 | // Note: no inline conversion from number for 2nd argument!
2706 | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback 2525 | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback
2707 | mov RA, dword [BASE+8] 2526 | mov RA, dword [BASE+8]
2708 |.elif SSE 2527 |.else
2709 | .ffunc_nnsse name 2528 | .ffunc_nnsse name
2710 | sseconst_tobit xmm2, RBa 2529 | sseconst_tobit xmm2, RBa
2711 | addsd xmm0, xmm2 2530 | addsd xmm0, xmm2
2712 | addsd xmm1, xmm2 2531 | addsd xmm1, xmm2
2713 | movd RB, xmm0 2532 | movd RB, xmm0
2714 | movd RA, xmm1 2533 | movd RA, xmm1
2715 |.else
2716 | .ffunc_nn name
2717 | mov TMP1, TOBIT_BIAS
2718 | fadd TMP1
2719 | fstp FPARG3
2720 | fadd TMP1
2721 | fstp FPARG1
2722 | mov RA, ARG3
2723 | mov RB, ARG1
2724 |.endif 2534 |.endif
2725 | ins RB, cl // Assumes RA is ecx. 2535 | ins RB, cl // Assumes RA is ecx.
2726 | jmp ->fff_resbit 2536 | jmp ->fff_resbit
@@ -2854,7 +2664,7 @@ static void build_subroutines(BuildCtx *ctx)
2854 | mov FCARG2, PC // Caveat: FCARG2 == BASE 2664 | mov FCARG2, PC // Caveat: FCARG2 == BASE
2855 | mov FCARG1, L:RB 2665 | mov FCARG1, L:RB
2856 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. 2666 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
2857 | call extern lj_dispatch_ins@8 // (lua_State *L, BCIns *pc) 2667 | call extern lj_dispatch_ins@8 // (lua_State *L, const BCIns *pc)
2858 |3: 2668 |3:
2859 | mov BASE, L:RB->base 2669 | mov BASE, L:RB->base
2860 |4: 2670 |4:
@@ -2925,6 +2735,79 @@ static void build_subroutines(BuildCtx *ctx)
2925 | add NARGS:RD, 1 2735 | add NARGS:RD, 1
2926 | jmp RBa 2736 | jmp RBa
2927 | 2737 |
2738 |->cont_stitch: // Trace stitching.
2739 |.if JIT
2740 | // BASE = base, RC = result, RB = mbase
2741 | mov TRACE:RA, [RB-24] // Save previous trace.
2742 | mov TMP1, TRACE:RA
2743 | mov TMP3, DISPATCH // Need one more register.
2744 | mov DISPATCH, MULTRES
2745 | movzx RA, PC_RA
2746 | lea RA, [BASE+RA*8] // Call base.
2747 | sub DISPATCH, 1
2748 | jz >2
2749 |1: // Move results down.
2750 |.if X64
2751 | mov RBa, [RC]
2752 | mov [RA], RBa
2753 |.else
2754 | mov RB, [RC]
2755 | mov [RA], RB
2756 | mov RB, [RC+4]
2757 | mov [RA+4], RB
2758 |.endif
2759 | add RC, 8
2760 | add RA, 8
2761 | sub DISPATCH, 1
2762 | jnz <1
2763 |2:
2764 | movzx RC, PC_RA
2765 | movzx RB, PC_RB
2766 | add RC, RB
2767 | lea RC, [BASE+RC*8-8]
2768 |3:
2769 | cmp RC, RA
2770 | ja >9 // More results wanted?
2771 |
2772 | mov DISPATCH, TMP3
2773 | mov TRACE:RD, TMP1 // Get previous trace.
2774 | movzx RB, word TRACE:RD->traceno
2775 | movzx RD, word TRACE:RD->link
2776 | cmp RD, RB
2777 | je ->cont_nop // Blacklisted.
2778 | test RD, RD
2779 | jne =>BC_JLOOP // Jump to stitched trace.
2780 |
2781 | // Stitch a new trace to the previous trace.
2782 | mov [DISPATCH+DISPATCH_J(exitno)], RB
2783 | mov L:RB, SAVE_L
2784 | mov L:RB->base, BASE
2785 | mov FCARG2, PC
2786 | lea FCARG1, [DISPATCH+GG_DISP2J]
2787 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
2788 | call extern lj_dispatch_stitch@8 // (jit_State *J, const BCIns *pc)
2789 | mov BASE, L:RB->base
2790 | jmp ->cont_nop
2791 |
2792 |9: // Fill up results with nil.
2793 | mov dword [RA+4], LJ_TNIL
2794 | add RA, 8
2795 | jmp <3
2796 |.endif
2797 |
2798 |->vm_profhook: // Dispatch target for profiler hook.
2799#if LJ_HASPROFILE
2800 | mov L:RB, SAVE_L
2801 | mov L:RB->base, BASE
2802 | mov FCARG2, PC // Caveat: FCARG2 == BASE
2803 | mov FCARG1, L:RB
2804 | call extern lj_dispatch_profile@8 // (lua_State *L, const BCIns *pc)
2805 | mov BASE, L:RB->base
2806 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2807 | sub PC, 4
2808 | jmp ->cont_nop
2809#endif
2810 |
2928 |//----------------------------------------------------------------------- 2811 |//-----------------------------------------------------------------------
2929 |//-- Trace exit handler ------------------------------------------------- 2812 |//-- Trace exit handler -------------------------------------------------
2930 |//----------------------------------------------------------------------- 2813 |//-----------------------------------------------------------------------
@@ -2977,10 +2860,9 @@ static void build_subroutines(BuildCtx *ctx)
2977 | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0 2860 | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0
2978 |.endif 2861 |.endif
2979 | // Caveat: RB is ebp. 2862 | // Caveat: RB is ebp.
2980 | mov L:RB, [DISPATCH+DISPATCH_GL(jit_L)] 2863 | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)]
2981 | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)] 2864 | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
2982 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa 2865 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
2983 | mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0
2984 | mov L:RB->base, BASE 2866 | mov L:RB->base, BASE
2985 |.if X64WIN 2867 |.if X64WIN
2986 | lea CARG2, [rsp+4*8] 2868 | lea CARG2, [rsp+4*8]
@@ -2990,6 +2872,7 @@ static void build_subroutines(BuildCtx *ctx)
2990 | lea FCARG2, [esp+16] 2872 | lea FCARG2, [esp+16]
2991 |.endif 2873 |.endif
2992 | lea FCARG1, [DISPATCH+GG_DISP2J] 2874 | lea FCARG1, [DISPATCH+GG_DISP2J]
2875 | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
2993 | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex) 2876 | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex)
2994 | // MULTRES or negated error code returned in eax (RD). 2877 | // MULTRES or negated error code returned in eax (RD).
2995 | mov RAa, L:RB->cframe 2878 | mov RAa, L:RB->cframe
@@ -3036,12 +2919,14 @@ static void build_subroutines(BuildCtx *ctx)
3036 | mov r13, TMPa 2919 | mov r13, TMPa
3037 | mov r12, TMPQ 2920 | mov r12, TMPQ
3038 |.endif 2921 |.endif
3039 | test RD, RD; js >3 // Check for error from exit. 2922 | test RD, RD; js >9 // Check for error from exit.
2923 | mov L:RB, SAVE_L
3040 | mov MULTRES, RD 2924 | mov MULTRES, RD
3041 | mov LFUNC:KBASE, [BASE-8] 2925 | mov LFUNC:KBASE, [BASE-8]
3042 | mov KBASE, LFUNC:KBASE->pc 2926 | mov KBASE, LFUNC:KBASE->pc
3043 | mov KBASE, [KBASE+PC2PROTO(k)] 2927 | mov KBASE, [KBASE+PC2PROTO(k)]
3044 | mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0 2928 | mov L:RB->base, BASE
2929 | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
3045 | set_vmstate INTERP 2930 | set_vmstate INTERP
3046 | // Modified copy of ins_next which handles function header dispatch, too. 2931 | // Modified copy of ins_next which handles function header dispatch, too.
3047 | mov RC, [PC] 2932 | mov RC, [PC]
@@ -3050,16 +2935,31 @@ static void build_subroutines(BuildCtx *ctx)
3050 | add PC, 4 2935 | add PC, 4
3051 | shr RC, 16 2936 | shr RC, 16
3052 | cmp OP, BC_FUNCF // Function header? 2937 | cmp OP, BC_FUNCF // Function header?
3053 | jb >2 2938 | jb >3
3054 | mov RC, MULTRES // RC/RD holds nres+1. 2939 | cmp OP, BC_FUNCC+2 // Fast function?
2940 | jae >4
3055 |2: 2941 |2:
2942 | mov RC, MULTRES // RC/RD holds nres+1.
2943 |3:
3056 |.if X64 2944 |.if X64
3057 | jmp aword [DISPATCH+OP*8] 2945 | jmp aword [DISPATCH+OP*8]
3058 |.else 2946 |.else
3059 | jmp aword [DISPATCH+OP*4] 2947 | jmp aword [DISPATCH+OP*4]
3060 |.endif 2948 |.endif
3061 | 2949 |
3062 |3: // Rethrow error from the right C frame. 2950 |4: // Check frame below fast function.
2951 | mov RC, [BASE-4]
2952 | test RC, FRAME_TYPE
2953 | jnz <2 // Trace stitching continuation?
2954 | // Otherwise set KBASE for Lua function below fast function.
2955 | movzx RC, byte [RC-3]
2956 | not RCa
2957 | mov LFUNC:KBASE, [BASE+RC*8-8]
2958 | mov KBASE, LFUNC:KBASE->pc
2959 | mov KBASE, [KBASE+PC2PROTO(k)]
2960 | jmp <2
2961 |
2962 |9: // Rethrow error from the right C frame.
3063 | neg RD 2963 | neg RD
3064 | mov FCARG1, L:RB 2964 | mov FCARG1, L:RB
3065 | mov FCARG2, RD 2965 | mov FCARG2, RD
@@ -3071,27 +2971,18 @@ static void build_subroutines(BuildCtx *ctx)
3071 |//----------------------------------------------------------------------- 2971 |//-----------------------------------------------------------------------
3072 | 2972 |
3073 |// FP value rounding. Called by math.floor/math.ceil fast functions 2973 |// FP value rounding. Called by math.floor/math.ceil fast functions
3074 |// and from JIT code. 2974 |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
3075 | 2975 |.macro vm_round, name, mode, cond
3076 |// x87 variant: Arg/ret on x87 stack. No int/xmm registers modified. 2976 |->name:
3077 |.macro vm_round_x87, mode1, mode2 2977 |.if not X64 and cond
3078 | fnstcw word [esp+4] // Caveat: overwrites ARG1 and ARG2. 2978 | movsd xmm0, qword [esp+4]
3079 | mov [esp+8], eax 2979 | call ->name .. _sse
3080 | mov ax, mode1 2980 | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg.
3081 | or ax, [esp+4] 2981 | fld qword [esp+4]
3082 |.if mode2 ~= 0xffff
3083 | and ax, mode2
3084 |.endif
3085 | mov [esp+6], ax
3086 | fldcw word [esp+6]
3087 | frndint
3088 | fldcw word [esp+4]
3089 | mov eax, [esp+8]
3090 | ret 2982 | ret
3091 |.endmacro 2983 |.endif
3092 | 2984 |
3093 |// SSE variant: arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified. 2985 |->name .. _sse:
3094 |.macro vm_round_sse, mode
3095 | sseconst_abs xmm2, RDa 2986 | sseconst_abs xmm2, RDa
3096 | sseconst_2p52 xmm3, RDa 2987 | sseconst_2p52 xmm3, RDa
3097 | movaps xmm1, xmm0 2988 | movaps xmm1, xmm0
@@ -3127,22 +3018,12 @@ static void build_subroutines(BuildCtx *ctx)
3127 | ret 3018 | ret
3128 |.endmacro 3019 |.endmacro
3129 | 3020 |
3130 |.macro vm_round, name, ssemode, mode1, mode2 3021 | vm_round vm_floor, 0, 1
3131 |->name: 3022 | vm_round vm_ceil, 1, JIT
3132 |.if not SSE 3023 | vm_round vm_trunc, 2, JIT
3133 | vm_round_x87 mode1, mode2
3134 |.endif
3135 |->name .. _sse:
3136 | vm_round_sse ssemode
3137 |.endmacro
3138 |
3139 | vm_round vm_floor, 0, 0x0400, 0xf7ff
3140 | vm_round vm_ceil, 1, 0x0800, 0xfbff
3141 | vm_round vm_trunc, 2, 0x0c00, 0xffff
3142 | 3024 |
3143 |// FP modulo x%y. Called by BC_MOD* and vm_arith. 3025 |// FP modulo x%y. Called by BC_MOD* and vm_arith.
3144 |->vm_mod: 3026 |->vm_mod:
3145 |.if SSE
3146 |// Args in xmm0/xmm1, return value in xmm0. 3027 |// Args in xmm0/xmm1, return value in xmm0.
3147 |// Caveat: xmm0-xmm5 and RC (eax) modified! 3028 |// Caveat: xmm0-xmm5 and RC (eax) modified!
3148 | movaps xmm5, xmm0 3029 | movaps xmm5, xmm0
@@ -3170,172 +3051,6 @@ static void build_subroutines(BuildCtx *ctx)
3170 | movaps xmm0, xmm5 3051 | movaps xmm0, xmm5
3171 | subsd xmm0, xmm1 3052 | subsd xmm0, xmm1
3172 | ret 3053 | ret
3173 |.else
3174 |// Args/ret on x87 stack (y on top). No xmm registers modified.
3175 |// Caveat: needs 3 slots on x87 stack! RC (eax) modified!
3176 | fld st1
3177 | fdiv st1
3178 | fnstcw word [esp+4]
3179 | mov ax, 0x0400
3180 | or ax, [esp+4]
3181 | and ax, 0xf7ff
3182 | mov [esp+6], ax
3183 | fldcw word [esp+6]
3184 | frndint
3185 | fldcw word [esp+4]
3186 | fmulp st1
3187 | fsubp st1
3188 | ret
3189 |.endif
3190 |
3191 |// FP log2(x). Called by math.log(x, base).
3192 |->vm_log2:
3193 |.if X64WIN
3194 | movsd qword [rsp+8], xmm0 // Use scratch area.
3195 | fld1
3196 | fld qword [rsp+8]
3197 | fyl2x
3198 | fstp qword [rsp+8]
3199 | movsd xmm0, qword [rsp+8]
3200 |.elif X64
3201 | movsd qword [rsp-8], xmm0 // Use red zone.
3202 | fld1
3203 | fld qword [rsp-8]
3204 | fyl2x
3205 | fstp qword [rsp-8]
3206 | movsd xmm0, qword [rsp-8]
3207 |.else
3208 | fld1
3209 | fld qword [esp+4]
3210 | fyl2x
3211 |.endif
3212 | ret
3213 |
3214 |// FP exponentiation e^x and 2^x. Called by math.exp fast function and
3215 |// from JIT code. Arg/ret on x87 stack. No int/xmm regs modified.
3216 |// Caveat: needs 3 slots on x87 stack!
3217 |->vm_exp_x87:
3218 | fldl2e; fmulp st1 // e^x ==> 2^(x*log2(e))
3219 |->vm_exp2_x87:
3220 | .if X64WIN
3221 | .define expscratch, dword [rsp+8] // Use scratch area.
3222 | .elif X64
3223 | .define expscratch, dword [rsp-8] // Use red zone.
3224 | .else
3225 | .define expscratch, dword [esp+4] // Needs 4 byte scratch area.
3226 | .endif
3227 | fst expscratch // Caveat: overwrites ARG1.
3228 | cmp expscratch, 0x7f800000; je >1 // Special case: e^+Inf = +Inf
3229 | cmp expscratch, 0xff800000; je >2 // Special case: e^-Inf = 0
3230 |->vm_exp2raw: // Entry point for vm_pow. Without +-Inf check.
3231 | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part.
3232 | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int
3233 |1:
3234 | ret
3235 |2:
3236 | fpop; fldz; ret
3237 |
3238 |// Generic power function x^y. Called by BC_POW, math.pow fast function,
3239 |// and vm_arith.
3240 |// Args/ret on x87 stack (y on top). RC (eax) modified.
3241 |// Caveat: needs 3 slots on x87 stack!
3242 |->vm_pow:
3243 |.if not SSE
3244 | fist dword [esp+4] // Store/reload int before comparison.
3245 | fild dword [esp+4] // Integral exponent used in vm_powi.
3246 | fucomip st1
3247 | jnz >8 // Branch for FP exponents.
3248 | jp >9 // Branch for NaN exponent.
3249 | fpop // Pop y and fallthrough to vm_powi.
3250 |
3251 |// FP/int power function x^i. Arg1/ret on x87 stack.
3252 |// Arg2 (int) on C stack. RC (eax) modified.
3253 |// Caveat: needs 2 slots on x87 stack!
3254 | mov eax, [esp+4]
3255 | cmp eax, 1; jle >6 // i<=1?
3256 | // Now 1 < (unsigned)i <= 0x80000000.
3257 |1: // Handle leading zeros.
3258 | test eax, 1; jnz >2
3259 | fmul st0
3260 | shr eax, 1
3261 | jmp <1
3262 |2:
3263 | shr eax, 1; jz >5
3264 | fdup
3265 |3: // Handle trailing bits.
3266 | fmul st0
3267 | shr eax, 1; jz >4
3268 | jnc <3
3269 | fmul st1, st0
3270 | jmp <3
3271 |4:
3272 | fmulp st1
3273 |5:
3274 | ret
3275 |6:
3276 | je <5 // x^1 ==> x
3277 | jb >7
3278 | fld1; fdivrp st1
3279 | neg eax
3280 | cmp eax, 1; je <5 // x^-1 ==> 1/x
3281 | jmp <1 // x^-i ==> (1/x)^i
3282 |7:
3283 | fpop; fld1 // x^0 ==> 1
3284 | ret
3285 |
3286 |8: // FP/FP power function x^y.
3287 | fst dword [esp+4]
3288 | fxch
3289 | fst dword [esp+8]
3290 | mov eax, [esp+4]; shl eax, 1
3291 | cmp eax, 0xff000000; je >2 // x^+-Inf?
3292 | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y?
3293 | cmp eax, 0xff000000; je >4 // +-Inf^y?
3294 | fyl2x
3295 | jmp ->vm_exp2raw
3296 |
3297 |9: // Handle x^NaN.
3298 | fld1
3299 | fucomip st2
3300 | je >1 // 1^NaN ==> 1
3301 | fxch // x^NaN ==> NaN
3302 |1:
3303 | fpop
3304 | ret
3305 |
3306 |2: // Handle x^+-Inf.
3307 | fabs
3308 | fld1
3309 | fucomip st1
3310 | je >3 // +-1^+-Inf ==> 1
3311 | fpop; fabs; fldz; mov eax, 0; setc al
3312 | ror eax, 1; xor eax, [esp+4]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0
3313 | fxch
3314 |3:
3315 | fpop1; fabs
3316 | ret
3317 |
3318 |4: // Handle +-0^y or +-Inf^y.
3319 | cmp dword [esp+4], 0; jge <3 // y >= 0, x^y ==> |x|
3320 | fpop; fpop
3321 | test eax, eax; jz >5 // y < 0, +-0^y ==> +Inf
3322 | fldz // y < 0, +-Inf^y ==> 0
3323 | ret
3324 |5:
3325 | mov dword [esp+4], 0x7f800000 // Return +Inf.
3326 | fld dword [esp+4]
3327 | ret
3328 |.endif
3329 |
3330 |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified.
3331 |// Needs 16 byte scratch area for x86. Also called from JIT code.
3332 |->vm_pow_sse:
3333 | cvtsd2si eax, xmm1
3334 | cvtsi2sd xmm2, eax
3335 | ucomisd xmm1, xmm2
3336 | jnz >8 // Branch for FP exponents.
3337 | jp >9 // Branch for NaN exponent.
3338 | // Fallthrough to vm_powi_sse.
3339 | 3054 |
3340 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. 3055 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
3341 |->vm_powi_sse: 3056 |->vm_powi_sse:
@@ -3372,287 +3087,6 @@ static void build_subroutines(BuildCtx *ctx)
3372 | sseconst_1 xmm0, RDa 3087 | sseconst_1 xmm0, RDa
3373 | ret 3088 | ret
3374 | 3089 |
3375 |8: // FP/FP power function x^y.
3376 |.if X64
3377 | movd rax, xmm1; shl rax, 1
3378 | rol rax, 12; cmp rax, 0xffe; je >2 // x^+-Inf?
3379 | movd rax, xmm0; shl rax, 1; je >4 // +-0^y?
3380 | rol rax, 12; cmp rax, 0xffe; je >5 // +-Inf^y?
3381 | .if X64WIN
3382 | movsd qword [rsp+16], xmm1 // Use scratch area.
3383 | movsd qword [rsp+8], xmm0
3384 | fld qword [rsp+16]
3385 | fld qword [rsp+8]
3386 | .else
3387 | movsd qword [rsp-16], xmm1 // Use red zone.
3388 | movsd qword [rsp-8], xmm0
3389 | fld qword [rsp-16]
3390 | fld qword [rsp-8]
3391 | .endif
3392 |.else
3393 | movsd qword [esp+12], xmm1 // Needs 16 byte scratch area.
3394 | movsd qword [esp+4], xmm0
3395 | cmp dword [esp+12], 0; jne >1
3396 | mov eax, [esp+16]; shl eax, 1
3397 | cmp eax, 0xffe00000; je >2 // x^+-Inf?
3398 |1:
3399 | cmp dword [esp+4], 0; jne >1
3400 | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y?
3401 | cmp eax, 0xffe00000; je >5 // +-Inf^y?
3402 |1:
3403 | fld qword [esp+12]
3404 | fld qword [esp+4]
3405 |.endif
3406 | fyl2x // y*log2(x)
3407 | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part.
3408 | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int
3409 |.if X64WIN
3410 | fstp qword [rsp+8] // Use scratch area.
3411 | movsd xmm0, qword [rsp+8]
3412 |.elif X64
3413 | fstp qword [rsp-8] // Use red zone.
3414 | movsd xmm0, qword [rsp-8]
3415 |.else
3416 | fstp qword [esp+4] // Needs 8 byte scratch area.
3417 | movsd xmm0, qword [esp+4]
3418 |.endif
3419 | ret
3420 |
3421 |9: // Handle x^NaN.
3422 | sseconst_1 xmm2, RDa
3423 | ucomisd xmm0, xmm2; je >1 // 1^NaN ==> 1
3424 | movaps xmm0, xmm1 // x^NaN ==> NaN
3425 |1:
3426 | ret
3427 |
3428 |2: // Handle x^+-Inf.
3429 | sseconst_abs xmm2, RDa
3430 | andpd xmm0, xmm2 // |x|
3431 | sseconst_1 xmm2, RDa
3432 | ucomisd xmm0, xmm2; je <1 // +-1^+-Inf ==> 1
3433 | movmskpd eax, xmm1
3434 | xorps xmm0, xmm0
3435 | mov ah, al; setc al; xor al, ah; jne <1 // |x|<>1, x^+-Inf ==> +Inf/0
3436 |3:
3437 | sseconst_hi xmm0, RDa, 7ff00000 // +Inf
3438 | ret
3439 |
3440 |4: // Handle +-0^y.
3441 | movmskpd eax, xmm1; test eax, eax; jnz <3 // y < 0, +-0^y ==> +Inf
3442 | xorps xmm0, xmm0 // y >= 0, +-0^y ==> 0
3443 | ret
3444 |
3445 |5: // Handle +-Inf^y.
3446 | movmskpd eax, xmm1; test eax, eax; jz <3 // y >= 0, +-Inf^y ==> +Inf
3447 | xorps xmm0, xmm0 // y < 0, +-Inf^y ==> 0
3448 | ret
3449 |
3450 |// Callable from C: double lj_vm_foldfpm(double x, int fpm)
3451 |// Computes fpm(x) for extended math functions. ORDER FPM.
3452 |->vm_foldfpm:
3453 |.if JIT
3454 |.if X64
3455 | .if X64WIN
3456 | .define fpmop, CARG2d
3457 | .else
3458 | .define fpmop, CARG1d
3459 | .endif
3460 | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil
3461 | cmp fpmop, 3; jb ->vm_trunc; ja >2
3462 | sqrtsd xmm0, xmm0; ret
3463 |2:
3464 | .if X64WIN
3465 | movsd qword [rsp+8], xmm0 // Use scratch area.
3466 | fld qword [rsp+8]
3467 | .else
3468 | movsd qword [rsp-8], xmm0 // Use red zone.
3469 | fld qword [rsp-8]
3470 | .endif
3471 | cmp fpmop, 5; ja >2
3472 | .if X64WIN; pop rax; .endif
3473 | je >1
3474 | call ->vm_exp_x87
3475 | .if X64WIN; push rax; .endif
3476 | jmp >7
3477 |1:
3478 | call ->vm_exp2_x87
3479 | .if X64WIN; push rax; .endif
3480 | jmp >7
3481 |2: ; cmp fpmop, 7; je >1; ja >2
3482 | fldln2; fxch; fyl2x; jmp >7
3483 |1: ; fld1; fxch; fyl2x; jmp >7
3484 |2: ; cmp fpmop, 9; je >1; ja >2
3485 | fldlg2; fxch; fyl2x; jmp >7
3486 |1: ; fsin; jmp >7
3487 |2: ; cmp fpmop, 11; je >1; ja >9
3488 | fcos; jmp >7
3489 |1: ; fptan; fpop
3490 |7:
3491 | .if X64WIN
3492 | fstp qword [rsp+8] // Use scratch area.
3493 | movsd xmm0, qword [rsp+8]
3494 | .else
3495 | fstp qword [rsp-8] // Use red zone.
3496 | movsd xmm0, qword [rsp-8]
3497 | .endif
3498 | ret
3499 |.else // x86 calling convention.
3500 | .define fpmop, eax
3501 |.if SSE
3502 | mov fpmop, [esp+12]
3503 | movsd xmm0, qword [esp+4]
3504 | cmp fpmop, 1; je >1; ja >2
3505 | call ->vm_floor; jmp >7
3506 |1: ; call ->vm_ceil; jmp >7
3507 |2: ; cmp fpmop, 3; je >1; ja >2
3508 | call ->vm_trunc; jmp >7
3509 |1:
3510 | sqrtsd xmm0, xmm0
3511 |7:
3512 | movsd qword [esp+4], xmm0 // Overwrite callee-owned args.
3513 | fld qword [esp+4]
3514 | ret
3515 |2: ; fld qword [esp+4]
3516 | cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87
3517 |2: ; cmp fpmop, 7; je >1; ja >2
3518 | fldln2; fxch; fyl2x; ret
3519 |1: ; fld1; fxch; fyl2x; ret
3520 |2: ; cmp fpmop, 9; je >1; ja >2
3521 | fldlg2; fxch; fyl2x; ret
3522 |1: ; fsin; ret
3523 |2: ; cmp fpmop, 11; je >1; ja >9
3524 | fcos; ret
3525 |1: ; fptan; fpop; ret
3526 |.else
3527 | mov fpmop, [esp+12]
3528 | fld qword [esp+4]
3529 | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil
3530 | cmp fpmop, 3; jb ->vm_trunc; ja >2
3531 | fsqrt; ret
3532 |2: ; cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87
3533 | cmp fpmop, 7; je >1; ja >2
3534 | fldln2; fxch; fyl2x; ret
3535 |1: ; fld1; fxch; fyl2x; ret
3536 |2: ; cmp fpmop, 9; je >1; ja >2
3537 | fldlg2; fxch; fyl2x; ret
3538 |1: ; fsin; ret
3539 |2: ; cmp fpmop, 11; je >1; ja >9
3540 | fcos; ret
3541 |1: ; fptan; fpop; ret
3542 |.endif
3543 |.endif
3544 |9: ; int3 // Bad fpm.
3545 |.endif
3546 |
3547 |// Callable from C: double lj_vm_foldarith(double x, double y, int op)
3548 |// Compute x op y for basic arithmetic operators (+ - * / % ^ and unary -)
3549 |// and basic math functions. ORDER ARITH
3550 |->vm_foldarith:
3551 |.if X64
3552 |
3553 | .if X64WIN
3554 | .define foldop, CARG3d
3555 | .else
3556 | .define foldop, CARG1d
3557 | .endif
3558 | cmp foldop, 1; je >1; ja >2
3559 | addsd xmm0, xmm1; ret
3560 |1: ; subsd xmm0, xmm1; ret
3561 |2: ; cmp foldop, 3; je >1; ja >2
3562 | mulsd xmm0, xmm1; ret
3563 |1: ; divsd xmm0, xmm1; ret
3564 |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow
3565 | cmp foldop, 7; je >1; ja >2
3566 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret
3567 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret
3568 |2: ; cmp foldop, 9; ja >2
3569 |.if X64WIN
3570 | movsd qword [rsp+8], xmm0 // Use scratch area.
3571 | movsd qword [rsp+16], xmm1
3572 | fld qword [rsp+8]
3573 | fld qword [rsp+16]
3574 |.else
3575 | movsd qword [rsp-8], xmm0 // Use red zone.
3576 | movsd qword [rsp-16], xmm1
3577 | fld qword [rsp-8]
3578 | fld qword [rsp-16]
3579 |.endif
3580 | je >1
3581 | fpatan
3582 |7:
3583 |.if X64WIN
3584 | fstp qword [rsp+8] // Use scratch area.
3585 | movsd xmm0, qword [rsp+8]
3586 |.else
3587 | fstp qword [rsp-8] // Use red zone.
3588 | movsd xmm0, qword [rsp-8]
3589 |.endif
3590 | ret
3591 |1: ; fxch; fscale; fpop1; jmp <7
3592 |2: ; cmp foldop, 11; je >1; ja >9
3593 | minsd xmm0, xmm1; ret
3594 |1: ; maxsd xmm0, xmm1; ret
3595 |9: ; int3 // Bad op.
3596 |
3597 |.elif SSE // x86 calling convention with SSE ops.
3598 |
3599 | .define foldop, eax
3600 | mov foldop, [esp+20]
3601 | movsd xmm0, qword [esp+4]
3602 | movsd xmm1, qword [esp+12]
3603 | cmp foldop, 1; je >1; ja >2
3604 | addsd xmm0, xmm1
3605 |7:
3606 | movsd qword [esp+4], xmm0 // Overwrite callee-owned args.
3607 | fld qword [esp+4]
3608 | ret
3609 |1: ; subsd xmm0, xmm1; jmp <7
3610 |2: ; cmp foldop, 3; je >1; ja >2
3611 | mulsd xmm0, xmm1; jmp <7
3612 |1: ; divsd xmm0, xmm1; jmp <7
3613 |2: ; cmp foldop, 5
3614 | je >1; ja >2
3615 | call ->vm_mod; jmp <7
3616 |1: ; pop edx; call ->vm_pow; push edx; jmp <7 // Writes to scratch area.
3617 |2: ; cmp foldop, 7; je >1; ja >2
3618 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7
3619 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7
3620 |2: ; cmp foldop, 9; ja >2
3621 | fld qword [esp+4] // Reload from stack
3622 | fld qword [esp+12]
3623 | je >1
3624 | fpatan; ret
3625 |1: ; fxch; fscale; fpop1; ret
3626 |2: ; cmp foldop, 11; je >1; ja >9
3627 | minsd xmm0, xmm1; jmp <7
3628 |1: ; maxsd xmm0, xmm1; jmp <7
3629 |9: ; int3 // Bad op.
3630 |
3631 |.else // x86 calling convention with x87 ops.
3632 |
3633 | mov eax, [esp+20]
3634 | fld qword [esp+4]
3635 | fld qword [esp+12]
3636 | cmp eax, 1; je >1; ja >2
3637 | faddp st1; ret
3638 |1: ; fsubp st1; ret
3639 |2: ; cmp eax, 3; je >1; ja >2
3640 | fmulp st1; ret
3641 |1: ; fdivp st1; ret
3642 |2: ; cmp eax, 5; jb ->vm_mod; je ->vm_pow
3643 | cmp eax, 7; je >1; ja >2
3644 | fpop; fchs; ret
3645 |1: ; fpop; fabs; ret
3646 |2: ; cmp eax, 9; je >1; ja >2
3647 | fpatan; ret
3648 |1: ; fxch; fscale; fpop1; ret
3649 |2: ; cmp eax, 11; je >1; ja >9
3650 | fucomi st1; fcmovnbe st1; fpop1; ret
3651 |1: ; fucomi st1; fcmovbe st1; fpop1; ret
3652 |9: ; int3 // Bad op.
3653 |
3654 |.endif
3655 |
3656 |//----------------------------------------------------------------------- 3090 |//-----------------------------------------------------------------------
3657 |//-- Miscellaneous functions -------------------------------------------- 3091 |//-- Miscellaneous functions --------------------------------------------
3658 |//----------------------------------------------------------------------- 3092 |//-----------------------------------------------------------------------
@@ -3663,6 +3097,7 @@ static void build_subroutines(BuildCtx *ctx)
3663 | mov eax, CARG1d 3097 | mov eax, CARG1d
3664 | .if X64WIN; push rsi; mov rsi, CARG2; .endif 3098 | .if X64WIN; push rsi; mov rsi, CARG2; .endif
3665 | push rbx 3099 | push rbx
3100 | xor ecx, ecx
3666 | cpuid 3101 | cpuid
3667 | mov [rsi], eax 3102 | mov [rsi], eax
3668 | mov [rsi+4], ebx 3103 | mov [rsi+4], ebx
@@ -3686,6 +3121,7 @@ static void build_subroutines(BuildCtx *ctx)
3686 | mov eax, [esp+4] // Argument 1 is function number. 3121 | mov eax, [esp+4] // Argument 1 is function number.
3687 | push edi 3122 | push edi
3688 | push ebx 3123 | push ebx
3124 | xor ecx, ecx
3689 | cpuid 3125 | cpuid
3690 | mov edi, [esp+16] // Argument 2 is result area. 3126 | mov edi, [esp+16] // Argument 2 is result area.
3691 | mov [edi], eax 3127 | mov [edi], eax
@@ -3963,19 +3399,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3963 | // RA is a number. 3399 | // RA is a number.
3964 | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp 3400 | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp
3965 | // RA is a number, RD is an integer. 3401 | // RA is a number, RD is an integer.
3966 |.if SSE
3967 | cvtsi2sd xmm0, dword [BASE+RD*8] 3402 | cvtsi2sd xmm0, dword [BASE+RD*8]
3968 | jmp >2 3403 | jmp >2
3969 |.else
3970 | fld qword [BASE+RA*8]
3971 | fild dword [BASE+RD*8]
3972 | jmp >3
3973 |.endif
3974 | 3404 |
3975 |8: // RA is an integer, RD is not an integer. 3405 |8: // RA is an integer, RD is not an integer.
3976 | ja ->vmeta_comp 3406 | ja ->vmeta_comp
3977 | // RA is an integer, RD is a number. 3407 | // RA is an integer, RD is a number.
3978 |.if SSE
3979 | cvtsi2sd xmm1, dword [BASE+RA*8] 3408 | cvtsi2sd xmm1, dword [BASE+RA*8]
3980 | movsd xmm0, qword [BASE+RD*8] 3409 | movsd xmm0, qword [BASE+RD*8]
3981 | add PC, 4 3410 | add PC, 4
@@ -3983,29 +3412,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3983 | jmp_comp jbe, ja, jb, jae, <9 3412 | jmp_comp jbe, ja, jb, jae, <9
3984 | jmp <6 3413 | jmp <6
3985 |.else 3414 |.else
3986 | fild dword [BASE+RA*8]
3987 | jmp >2
3988 |.endif
3989 |.else
3990 | checknum RA, ->vmeta_comp 3415 | checknum RA, ->vmeta_comp
3991 | checknum RD, ->vmeta_comp 3416 | checknum RD, ->vmeta_comp
3992 |.endif 3417 |.endif
3993 |.if SSE
3994 |1: 3418 |1:
3995 | movsd xmm0, qword [BASE+RD*8] 3419 | movsd xmm0, qword [BASE+RD*8]
3996 |2: 3420 |2:
3997 | add PC, 4 3421 | add PC, 4
3998 | ucomisd xmm0, qword [BASE+RA*8] 3422 | ucomisd xmm0, qword [BASE+RA*8]
3999 |3: 3423 |3:
4000 |.else
4001 |1:
4002 | fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A.
4003 |2:
4004 | fld qword [BASE+RD*8]
4005 |3:
4006 | add PC, 4
4007 | fcomparepp
4008 |.endif
4009 | // Unordered: all of ZF CF PF set, ordered: PF clear. 3424 | // Unordered: all of ZF CF PF set, ordered: PF clear.
4010 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. 3425 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
4011 |.if DUALNUM 3426 |.if DUALNUM
@@ -4045,43 +3460,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4045 | // RD is a number. 3460 | // RD is a number.
4046 | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5 3461 | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5
4047 | // RD is a number, RA is an integer. 3462 | // RD is a number, RA is an integer.
4048 |.if SSE
4049 | cvtsi2sd xmm0, dword [BASE+RA*8] 3463 | cvtsi2sd xmm0, dword [BASE+RA*8]
4050 |.else
4051 | fild dword [BASE+RA*8]
4052 |.endif
4053 | jmp >2 3464 | jmp >2
4054 | 3465 |
4055 |8: // RD is an integer, RA is not an integer. 3466 |8: // RD is an integer, RA is not an integer.
4056 | ja >5 3467 | ja >5
4057 | // RD is an integer, RA is a number. 3468 | // RD is an integer, RA is a number.
4058 |.if SSE
4059 | cvtsi2sd xmm0, dword [BASE+RD*8] 3469 | cvtsi2sd xmm0, dword [BASE+RD*8]
4060 | ucomisd xmm0, qword [BASE+RA*8] 3470 | ucomisd xmm0, qword [BASE+RA*8]
4061 |.else
4062 | fild dword [BASE+RD*8]
4063 | fld qword [BASE+RA*8]
4064 |.endif
4065 | jmp >4 3471 | jmp >4
4066 | 3472 |
4067 |.else 3473 |.else
4068 | cmp RB, LJ_TISNUM; jae >5 3474 | cmp RB, LJ_TISNUM; jae >5
4069 | checknum RA, >5 3475 | checknum RA, >5
4070 |.endif 3476 |.endif
4071 |.if SSE
4072 |1: 3477 |1:
4073 | movsd xmm0, qword [BASE+RA*8] 3478 | movsd xmm0, qword [BASE+RA*8]
4074 |2: 3479 |2:
4075 | ucomisd xmm0, qword [BASE+RD*8] 3480 | ucomisd xmm0, qword [BASE+RD*8]
4076 |4: 3481 |4:
4077 |.else
4078 |1:
4079 | fld qword [BASE+RA*8]
4080 |2:
4081 | fld qword [BASE+RD*8]
4082 |4:
4083 | fcomparepp
4084 |.endif
4085 iseqne_fp: 3482 iseqne_fp:
4086 if (vk) { 3483 if (vk) {
4087 | jp >2 // Unordered means not equal. 3484 | jp >2 // Unordered means not equal.
@@ -4204,39 +3601,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4204 | // RA is a number. 3601 | // RA is a number.
4205 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1 3602 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1
4206 | // RA is a number, RD is an integer. 3603 | // RA is a number, RD is an integer.
4207 |.if SSE
4208 | cvtsi2sd xmm0, dword [KBASE+RD*8] 3604 | cvtsi2sd xmm0, dword [KBASE+RD*8]
4209 |.else
4210 | fild dword [KBASE+RD*8]
4211 |.endif
4212 | jmp >2 3605 | jmp >2
4213 | 3606 |
4214 |8: // RA is an integer, RD is a number. 3607 |8: // RA is an integer, RD is a number.
4215 |.if SSE
4216 | cvtsi2sd xmm0, dword [BASE+RA*8] 3608 | cvtsi2sd xmm0, dword [BASE+RA*8]
4217 | ucomisd xmm0, qword [KBASE+RD*8] 3609 | ucomisd xmm0, qword [KBASE+RD*8]
4218 |.else
4219 | fild dword [BASE+RA*8]
4220 | fld qword [KBASE+RD*8]
4221 |.endif
4222 | jmp >4 3610 | jmp >4
4223 |.else 3611 |.else
4224 | cmp RB, LJ_TISNUM; jae >3 3612 | cmp RB, LJ_TISNUM; jae >3
4225 |.endif 3613 |.endif
4226 |.if SSE
4227 |1: 3614 |1:
4228 | movsd xmm0, qword [KBASE+RD*8] 3615 | movsd xmm0, qword [KBASE+RD*8]
4229 |2: 3616 |2:
4230 | ucomisd xmm0, qword [BASE+RA*8] 3617 | ucomisd xmm0, qword [BASE+RA*8]
4231 |4: 3618 |4:
4232 |.else
4233 |1:
4234 | fld qword [KBASE+RD*8]
4235 |2:
4236 | fld qword [BASE+RA*8]
4237 |4:
4238 | fcomparepp
4239 |.endif
4240 goto iseqne_fp; 3619 goto iseqne_fp;
4241 case BC_ISEQP: case BC_ISNEP: 3620 case BC_ISEQP: case BC_ISNEP:
4242 vk = op == BC_ISEQP; 3621 vk = op == BC_ISEQP;
@@ -4287,6 +3666,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4287 | ins_next 3666 | ins_next
4288 break; 3667 break;
4289 3668
3669 case BC_ISTYPE:
3670 | ins_AD // RA = src, RD = -type
3671 | add RD, [BASE+RA*8+4]
3672 | jne ->vmeta_istype
3673 | ins_next
3674 break;
3675 case BC_ISNUM:
3676 | ins_AD // RA = src, RD = -(TISNUM-1)
3677 | checknum RA, ->vmeta_istype
3678 | ins_next
3679 break;
3680
4290 /* -- Unary ops --------------------------------------------------------- */ 3681 /* -- Unary ops --------------------------------------------------------- */
4291 3682
4292 case BC_MOV: 3683 case BC_MOV:
@@ -4330,16 +3721,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4330 |.else 3721 |.else
4331 | checknum RD, ->vmeta_unm 3722 | checknum RD, ->vmeta_unm
4332 |.endif 3723 |.endif
4333 |.if SSE
4334 | movsd xmm0, qword [BASE+RD*8] 3724 | movsd xmm0, qword [BASE+RD*8]
4335 | sseconst_sign xmm1, RDa 3725 | sseconst_sign xmm1, RDa
4336 | xorps xmm0, xmm1 3726 | xorps xmm0, xmm1
4337 | movsd qword [BASE+RA*8], xmm0 3727 | movsd qword [BASE+RA*8], xmm0
4338 |.else
4339 | fld qword [BASE+RD*8]
4340 | fchs
4341 | fstp qword [BASE+RA*8]
4342 |.endif
4343 |.if DUALNUM 3728 |.if DUALNUM
4344 | jmp <9 3729 | jmp <9
4345 |.else 3730 |.else
@@ -4355,15 +3740,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4355 |1: 3740 |1:
4356 | mov dword [BASE+RA*8+4], LJ_TISNUM 3741 | mov dword [BASE+RA*8+4], LJ_TISNUM
4357 | mov dword [BASE+RA*8], RD 3742 | mov dword [BASE+RA*8], RD
4358 |.elif SSE 3743 |.else
4359 | xorps xmm0, xmm0 3744 | xorps xmm0, xmm0
4360 | cvtsi2sd xmm0, dword STR:RD->len 3745 | cvtsi2sd xmm0, dword STR:RD->len
4361 |1: 3746 |1:
4362 | movsd qword [BASE+RA*8], xmm0 3747 | movsd qword [BASE+RA*8], xmm0
4363 |.else
4364 | fild dword STR:RD->len
4365 |1:
4366 | fstp qword [BASE+RA*8]
4367 |.endif 3748 |.endif
4368 | ins_next 3749 | ins_next
4369 |2: 3750 |2:
@@ -4381,11 +3762,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4381 | // Length of table returned in eax (RD). 3762 | // Length of table returned in eax (RD).
4382 |.if DUALNUM 3763 |.if DUALNUM
4383 | // Nothing to do. 3764 | // Nothing to do.
4384 |.elif SSE
4385 | cvtsi2sd xmm0, RD
4386 |.else 3765 |.else
4387 | mov ARG1, RD 3766 | cvtsi2sd xmm0, RD
4388 | fild ARG1
4389 |.endif 3767 |.endif
4390 | mov BASE, RB // Restore BASE. 3768 | mov BASE, RB // Restore BASE.
4391 | movzx RA, PC_RA 3769 | movzx RA, PC_RA
@@ -4400,7 +3778,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4400 3778
4401 /* -- Binary ops -------------------------------------------------------- */ 3779 /* -- Binary ops -------------------------------------------------------- */
4402 3780
4403 |.macro ins_arithpre, x87ins, sseins, ssereg 3781 |.macro ins_arithpre, sseins, ssereg
4404 | ins_ABC 3782 | ins_ABC
4405 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); 3783 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
4406 ||switch (vk) { 3784 ||switch (vk) {
@@ -4409,37 +3787,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4409 | .if DUALNUM 3787 | .if DUALNUM
4410 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn 3788 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn
4411 | .endif 3789 | .endif
4412 | .if SSE 3790 | movsd xmm0, qword [BASE+RB*8]
4413 | movsd xmm0, qword [BASE+RB*8] 3791 | sseins ssereg, qword [KBASE+RC*8]
4414 | sseins ssereg, qword [KBASE+RC*8]
4415 | .else
4416 | fld qword [BASE+RB*8]
4417 | x87ins qword [KBASE+RC*8]
4418 | .endif
4419 || break; 3792 || break;
4420 ||case 1: 3793 ||case 1:
4421 | checknum RB, ->vmeta_arith_nv 3794 | checknum RB, ->vmeta_arith_nv
4422 | .if DUALNUM 3795 | .if DUALNUM
4423 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv 3796 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv
4424 | .endif 3797 | .endif
4425 | .if SSE 3798 | movsd xmm0, qword [KBASE+RC*8]
4426 | movsd xmm0, qword [KBASE+RC*8] 3799 | sseins ssereg, qword [BASE+RB*8]
4427 | sseins ssereg, qword [BASE+RB*8]
4428 | .else
4429 | fld qword [KBASE+RC*8]
4430 | x87ins qword [BASE+RB*8]
4431 | .endif
4432 || break; 3800 || break;
4433 ||default: 3801 ||default:
4434 | checknum RB, ->vmeta_arith_vv 3802 | checknum RB, ->vmeta_arith_vv
4435 | checknum RC, ->vmeta_arith_vv 3803 | checknum RC, ->vmeta_arith_vv
4436 | .if SSE 3804 | movsd xmm0, qword [BASE+RB*8]
4437 | movsd xmm0, qword [BASE+RB*8] 3805 | sseins ssereg, qword [BASE+RC*8]
4438 | sseins ssereg, qword [BASE+RC*8]
4439 | .else
4440 | fld qword [BASE+RB*8]
4441 | x87ins qword [BASE+RC*8]
4442 | .endif
4443 || break; 3806 || break;
4444 ||} 3807 ||}
4445 |.endmacro 3808 |.endmacro
@@ -4477,55 +3840,62 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4477 |.endmacro 3840 |.endmacro
4478 | 3841 |
4479 |.macro ins_arithpost 3842 |.macro ins_arithpost
4480 |.if SSE
4481 | movsd qword [BASE+RA*8], xmm0 3843 | movsd qword [BASE+RA*8], xmm0
4482 |.else
4483 | fstp qword [BASE+RA*8]
4484 |.endif
4485 |.endmacro 3844 |.endmacro
4486 | 3845 |
4487 |.macro ins_arith, x87ins, sseins 3846 |.macro ins_arith, sseins
4488 | ins_arithpre x87ins, sseins, xmm0 3847 | ins_arithpre sseins, xmm0
4489 | ins_arithpost 3848 | ins_arithpost
4490 | ins_next 3849 | ins_next
4491 |.endmacro 3850 |.endmacro
4492 | 3851 |
4493 |.macro ins_arith, intins, x87ins, sseins 3852 |.macro ins_arith, intins, sseins
4494 |.if DUALNUM 3853 |.if DUALNUM
4495 | ins_arithdn intins 3854 | ins_arithdn intins
4496 |.else 3855 |.else
4497 | ins_arith, x87ins, sseins 3856 | ins_arith, sseins
4498 |.endif 3857 |.endif
4499 |.endmacro 3858 |.endmacro
4500 3859
4501 | // RA = dst, RB = src1 or num const, RC = src2 or num const 3860 | // RA = dst, RB = src1 or num const, RC = src2 or num const
4502 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: 3861 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
4503 | ins_arith add, fadd, addsd 3862 | ins_arith add, addsd
4504 break; 3863 break;
4505 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: 3864 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
4506 | ins_arith sub, fsub, subsd 3865 | ins_arith sub, subsd
4507 break; 3866 break;
4508 case BC_MULVN: case BC_MULNV: case BC_MULVV: 3867 case BC_MULVN: case BC_MULNV: case BC_MULVV:
4509 | ins_arith imul, fmul, mulsd 3868 | ins_arith imul, mulsd
4510 break; 3869 break;
4511 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: 3870 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
4512 | ins_arith fdiv, divsd 3871 | ins_arith divsd
4513 break; 3872 break;
4514 case BC_MODVN: 3873 case BC_MODVN:
4515 | ins_arithpre fld, movsd, xmm1 3874 | ins_arithpre movsd, xmm1
4516 |->BC_MODVN_Z: 3875 |->BC_MODVN_Z:
4517 | call ->vm_mod 3876 | call ->vm_mod
4518 | ins_arithpost 3877 | ins_arithpost
4519 | ins_next 3878 | ins_next
4520 break; 3879 break;
4521 case BC_MODNV: case BC_MODVV: 3880 case BC_MODNV: case BC_MODVV:
4522 | ins_arithpre fld, movsd, xmm1 3881 | ins_arithpre movsd, xmm1
4523 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. 3882 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
4524 break; 3883 break;
4525 case BC_POW: 3884 case BC_POW:
4526 | ins_arithpre fld, movsd, xmm1 3885 | ins_arithpre movsd, xmm1
4527 | call ->vm_pow 3886 | mov RB, BASE
3887 |.if not X64
3888 | movsd FPARG1, xmm0
3889 | movsd FPARG3, xmm1
3890 |.endif
3891 | call extern pow
3892 | movzx RA, PC_RA
3893 | mov BASE, RB
3894 |.if X64
4528 | ins_arithpost 3895 | ins_arithpost
3896 |.else
3897 | fstp qword [BASE+RA*8]
3898 |.endif
4529 | ins_next 3899 | ins_next
4530 break; 3900 break;
4531 3901
@@ -4593,25 +3963,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4593 | movsx RD, RDW 3963 | movsx RD, RDW
4594 | mov dword [BASE+RA*8+4], LJ_TISNUM 3964 | mov dword [BASE+RA*8+4], LJ_TISNUM
4595 | mov dword [BASE+RA*8], RD 3965 | mov dword [BASE+RA*8], RD
4596 |.elif SSE 3966 |.else
4597 | movsx RD, RDW // Sign-extend literal. 3967 | movsx RD, RDW // Sign-extend literal.
4598 | cvtsi2sd xmm0, RD 3968 | cvtsi2sd xmm0, RD
4599 | movsd qword [BASE+RA*8], xmm0 3969 | movsd qword [BASE+RA*8], xmm0
4600 |.else
4601 | fild PC_RD // Refetch signed RD from instruction.
4602 | fstp qword [BASE+RA*8]
4603 |.endif 3970 |.endif
4604 | ins_next 3971 | ins_next
4605 break; 3972 break;
4606 case BC_KNUM: 3973 case BC_KNUM:
4607 | ins_AD // RA = dst, RD = num const 3974 | ins_AD // RA = dst, RD = num const
4608 |.if SSE
4609 | movsd xmm0, qword [KBASE+RD*8] 3975 | movsd xmm0, qword [KBASE+RD*8]
4610 | movsd qword [BASE+RA*8], xmm0 3976 | movsd qword [BASE+RA*8], xmm0
4611 |.else
4612 | fld qword [KBASE+RD*8]
4613 | fstp qword [BASE+RA*8]
4614 |.endif
4615 | ins_next 3977 | ins_next
4616 break; 3978 break;
4617 case BC_KPRI: 3979 case BC_KPRI:
@@ -4718,18 +4080,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4718 case BC_USETN: 4080 case BC_USETN:
4719 | ins_AD // RA = upvalue #, RD = num const 4081 | ins_AD // RA = upvalue #, RD = num const
4720 | mov LFUNC:RB, [BASE-8] 4082 | mov LFUNC:RB, [BASE-8]
4721 |.if SSE
4722 | movsd xmm0, qword [KBASE+RD*8] 4083 | movsd xmm0, qword [KBASE+RD*8]
4723 |.else
4724 | fld qword [KBASE+RD*8]
4725 |.endif
4726 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] 4084 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
4727 | mov RA, UPVAL:RB->v 4085 | mov RA, UPVAL:RB->v
4728 |.if SSE
4729 | movsd qword [RA], xmm0 4086 | movsd qword [RA], xmm0
4730 |.else
4731 | fstp qword [RA]
4732 |.endif
4733 | ins_next 4087 | ins_next
4734 break; 4088 break;
4735 case BC_USETP: 4089 case BC_USETP:
@@ -4883,18 +4237,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
4883 |.else 4237 |.else
4884 | // Convert number to int and back and compare. 4238 | // Convert number to int and back and compare.
4885 | checknum RC, >5 4239 | checknum RC, >5
4886 |.if SSE
4887 | movsd xmm0, qword [BASE+RC*8] 4240 | movsd xmm0, qword [BASE+RC*8]
4888 | cvtsd2si RC, xmm0 4241 | cvttsd2si RC, xmm0
4889 | cvtsi2sd xmm1, RC 4242 | cvtsi2sd xmm1, RC
4890 | ucomisd xmm0, xmm1 4243 | ucomisd xmm0, xmm1
4891 |.else
4892 | fld qword [BASE+RC*8]
4893 | fist ARG1
4894 | fild ARG1
4895 | fcomparepp
4896 | mov RC, ARG1
4897 |.endif
4898 | jne ->vmeta_tgetv // Generic numeric key? Use fallback. 4244 | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
4899 |.endif 4245 |.endif
4900 | cmp RC, TAB:RB->asize // Takes care of unordered, too. 4246 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
@@ -5018,6 +4364,32 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5018 | mov dword [BASE+RA*8+4], LJ_TNIL 4364 | mov dword [BASE+RA*8+4], LJ_TNIL
5019 | jmp <1 4365 | jmp <1
5020 break; 4366 break;
4367 case BC_TGETR:
4368 | ins_ABC // RA = dst, RB = table, RC = key
4369 | mov TAB:RB, [BASE+RB*8]
4370 |.if DUALNUM
4371 | mov RC, dword [BASE+RC*8]
4372 |.else
4373 | cvttsd2si RC, qword [BASE+RC*8]
4374 |.endif
4375 | cmp RC, TAB:RB->asize
4376 | jae ->vmeta_tgetr // Not in array part? Use fallback.
4377 | shl RC, 3
4378 | add RC, TAB:RB->array
4379 | // Get array slot.
4380 |->BC_TGETR_Z:
4381 |.if X64
4382 | mov RBa, [RC]
4383 | mov [BASE+RA*8], RBa
4384 |.else
4385 | mov RB, [RC]
4386 | mov RC, [RC+4]
4387 | mov [BASE+RA*8], RB
4388 | mov [BASE+RA*8+4], RC
4389 |.endif
4390 |->BC_TGETR2_Z:
4391 | ins_next
4392 break;
5021 4393
5022 case BC_TSETV: 4394 case BC_TSETV:
5023 | ins_ABC // RA = src, RB = table, RC = key 4395 | ins_ABC // RA = src, RB = table, RC = key
@@ -5031,18 +4403,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5031 |.else 4403 |.else
5032 | // Convert number to int and back and compare. 4404 | // Convert number to int and back and compare.
5033 | checknum RC, >5 4405 | checknum RC, >5
5034 |.if SSE
5035 | movsd xmm0, qword [BASE+RC*8] 4406 | movsd xmm0, qword [BASE+RC*8]
5036 | cvtsd2si RC, xmm0 4407 | cvttsd2si RC, xmm0
5037 | cvtsi2sd xmm1, RC 4408 | cvtsi2sd xmm1, RC
5038 | ucomisd xmm0, xmm1 4409 | ucomisd xmm0, xmm1
5039 |.else
5040 | fld qword [BASE+RC*8]
5041 | fist ARG1
5042 | fild ARG1
5043 | fcomparepp
5044 | mov RC, ARG1
5045 |.endif
5046 | jne ->vmeta_tsetv // Generic numeric key? Use fallback. 4410 | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
5047 |.endif 4411 |.endif
5048 | cmp RC, TAB:RB->asize // Takes care of unordered, too. 4412 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
@@ -5212,6 +4576,39 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5212 | movzx RA, PC_RA // Restore RA. 4576 | movzx RA, PC_RA // Restore RA.
5213 | jmp <2 4577 | jmp <2
5214 break; 4578 break;
4579 case BC_TSETR:
4580 | ins_ABC // RA = src, RB = table, RC = key
4581 | mov TAB:RB, [BASE+RB*8]
4582 |.if DUALNUM
4583 | mov RC, dword [BASE+RC*8]
4584 |.else
4585 | cvttsd2si RC, qword [BASE+RC*8]
4586 |.endif
4587 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
4588 | jnz >7
4589 |2:
4590 | cmp RC, TAB:RB->asize
4591 | jae ->vmeta_tsetr
4592 | shl RC, 3
4593 | add RC, TAB:RB->array
4594 | // Set array slot.
4595 |->BC_TSETR_Z:
4596 |.if X64
4597 | mov RBa, [BASE+RA*8]
4598 | mov [RC], RBa
4599 |.else
4600 | mov RB, [BASE+RA*8+4]
4601 | mov RA, [BASE+RA*8]
4602 | mov [RC+4], RB
4603 | mov [RC], RA
4604 |.endif
4605 | ins_next
4606 |
4607 |7: // Possible table write barrier for the value. Skip valiswhite check.
4608 | barrierback TAB:RB, RA
4609 | movzx RA, PC_RA // Restore RA.
4610 | jmp <2
4611 break;
5215 4612
5216 case BC_TSETM: 4613 case BC_TSETM:
5217 | ins_AD // RA = base (table at base-1), RD = num const (start index) 4614 | ins_AD // RA = base (table at base-1), RD = num const (start index)
@@ -5405,10 +4802,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5405 |.if DUALNUM 4802 |.if DUALNUM
5406 | mov dword [BASE+RA*8+4], LJ_TISNUM 4803 | mov dword [BASE+RA*8+4], LJ_TISNUM
5407 | mov dword [BASE+RA*8], RC 4804 | mov dword [BASE+RA*8], RC
5408 |.elif SSE
5409 | cvtsi2sd xmm0, RC
5410 |.else 4805 |.else
5411 | fild dword [BASE+RA*8-8] 4806 | cvtsi2sd xmm0, RC
5412 |.endif 4807 |.endif
5413 | // Copy array slot to returned value. 4808 | // Copy array slot to returned value.
5414 |.if X64 4809 |.if X64
@@ -5424,10 +4819,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5424 | // Return array index as a numeric key. 4819 | // Return array index as a numeric key.
5425 |.if DUALNUM 4820 |.if DUALNUM
5426 | // See above. 4821 | // See above.
5427 |.elif SSE
5428 | movsd qword [BASE+RA*8], xmm0
5429 |.else 4822 |.else
5430 | fstp qword [BASE+RA*8] 4823 | movsd qword [BASE+RA*8], xmm0
5431 |.endif 4824 |.endif
5432 | mov [BASE+RA*8-8], RC // Update control var. 4825 | mov [BASE+RA*8-8], RC // Update control var.
5433 |2: 4826 |2:
@@ -5440,9 +4833,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5440 | 4833 |
5441 |4: // Skip holes in array part. 4834 |4: // Skip holes in array part.
5442 | add RC, 1 4835 | add RC, 1
5443 |.if not (DUALNUM or SSE)
5444 | mov [BASE+RA*8-8], RC
5445 |.endif
5446 | jmp <1 4836 | jmp <1
5447 | 4837 |
5448 |5: // Traverse hash part. 4838 |5: // Traverse hash part.
@@ -5776,7 +5166,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5776 if (!vk) { 5166 if (!vk) {
5777 | cmp RB, LJ_TISNUM; jae ->vmeta_for 5167 | cmp RB, LJ_TISNUM; jae ->vmeta_for
5778 } 5168 }
5779 |.if SSE
5780 | movsd xmm0, qword FOR_IDX 5169 | movsd xmm0, qword FOR_IDX
5781 | movsd xmm1, qword FOR_STOP 5170 | movsd xmm1, qword FOR_STOP
5782 if (vk) { 5171 if (vk) {
@@ -5789,22 +5178,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5789 | ucomisd xmm1, xmm0 5178 | ucomisd xmm1, xmm0
5790 |1: 5179 |1:
5791 | movsd qword FOR_EXT, xmm0 5180 | movsd qword FOR_EXT, xmm0
5792 |.else
5793 | fld qword FOR_STOP
5794 | fld qword FOR_IDX
5795 if (vk) {
5796 | fadd qword FOR_STEP // nidx = idx + step
5797 | fst qword FOR_IDX
5798 | fst qword FOR_EXT
5799 | test RB, RB; js >1
5800 } else {
5801 | fst qword FOR_EXT
5802 | jl >1
5803 }
5804 | fxch // Swap lim/(n)idx if step non-negative.
5805 |1:
5806 | fcomparepp
5807 |.endif
5808 if (op == BC_FORI) { 5181 if (op == BC_FORI) {
5809 |.if DUALNUM 5182 |.if DUALNUM
5810 | jnb <7 5183 | jnb <7
@@ -5832,11 +5205,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5832 |2: 5205 |2:
5833 | ins_next 5206 | ins_next
5834 |.endif 5207 |.endif
5835 |.if SSE 5208 |
5836 |3: // Invert comparison if step is negative. 5209 |3: // Invert comparison if step is negative.
5837 | ucomisd xmm0, xmm1 5210 | ucomisd xmm0, xmm1
5838 | jmp <1 5211 | jmp <1
5839 |.endif
5840 break; 5212 break;
5841 5213
5842 case BC_ITERL: 5214 case BC_ITERL:
@@ -5874,7 +5246,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5874 | ins_A // RA = base, RD = target (loop extent) 5246 | ins_A // RA = base, RD = target (loop extent)
5875 | // Note: RA/RD is only used by trace recorder to determine scope/extent 5247 | // Note: RA/RD is only used by trace recorder to determine scope/extent
5876 | // This opcode does NOT jump, it's only purpose is to detect a hot loop. 5248 | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
5877 |.if JIT 5249 |.if JIT
5878 | hotloop RB 5250 | hotloop RB
5879 |.endif 5251 |.endif
5880 | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. 5252 | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
@@ -5893,7 +5265,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
5893 | mov RDa, TRACE:RD->mcode 5265 | mov RDa, TRACE:RD->mcode
5894 | mov L:RB, SAVE_L 5266 | mov L:RB, SAVE_L
5895 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE 5267 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
5896 | mov [DISPATCH+DISPATCH_GL(jit_L)], L:RB 5268 | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB
5897 | // Save additional callee-save registers only used in compiled code. 5269 | // Save additional callee-save registers only used in compiled code.
5898 |.if X64WIN 5270 |.if X64WIN
5899 | mov TMPQ, r12 5271 | mov TMPQ, r12
@@ -6060,9 +5432,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
6060 | // (lua_State *L, lua_CFunction f) 5432 | // (lua_State *L, lua_CFunction f)
6061 | call aword [DISPATCH+DISPATCH_GL(wrapf)] 5433 | call aword [DISPATCH+DISPATCH_GL(wrapf)]
6062 } 5434 }
6063 | set_vmstate INTERP
6064 | // nresults returned in eax (RD). 5435 | // nresults returned in eax (RD).
6065 | mov BASE, L:RB->base 5436 | mov BASE, L:RB->base
5437 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
5438 | set_vmstate INTERP
6066 | lea RA, [BASE+RD*8] 5439 | lea RA, [BASE+RD*8]
6067 | neg RA 5440 | neg RA
6068 | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8 5441 | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
@@ -6382,15 +5755,21 @@ static void emit_asm_debug(BuildCtx *ctx)
6382 "LEFDEY:\n\n", fcsize); 5755 "LEFDEY:\n\n", fcsize);
6383 } 5756 }
6384#endif 5757#endif
6385#if LJ_64 5758#if !LJ_64
6386 fprintf(ctx->fp, "\t.subsections_via_symbols\n");
6387#else
6388 fprintf(ctx->fp, 5759 fprintf(ctx->fp,
6389 "\t.non_lazy_symbol_pointer\n" 5760 "\t.non_lazy_symbol_pointer\n"
6390 "L_lj_err_unwind_dwarf$non_lazy_ptr:\n" 5761 "L_lj_err_unwind_dwarf$non_lazy_ptr:\n"
6391 ".indirect_symbol _lj_err_unwind_dwarf\n" 5762 ".indirect_symbol _lj_err_unwind_dwarf\n"
6392 ".long 0\n"); 5763 ".long 0\n\n");
5764 fprintf(ctx->fp, "\t.section __IMPORT,__jump_table,symbol_stubs,pure_instructions+self_modifying_code,5\n");
5765 {
5766 const char *const *xn;
5767 for (xn = ctx->extnames; *xn; xn++)
5768 if (strncmp(*xn, LABEL_PREFIX, sizeof(LABEL_PREFIX)-1))
5769 fprintf(ctx->fp, "L_%s$stub:\n\t.indirect_symbol _%s\n\t.ascii \"\\364\\364\\364\\364\\364\"\n", *xn, *xn);
5770 }
6393#endif 5771#endif
5772 fprintf(ctx->fp, ".subsections_via_symbols\n");
6394 } 5773 }
6395 break; 5774 break;
6396#endif 5775#endif
diff --git a/src/xb1build.bat b/src/xb1build.bat
new file mode 100644
index 00000000..847e84a5
--- /dev/null
+++ b/src/xb1build.bat
@@ -0,0 +1,101 @@
1@rem Script to build LuaJIT with the Xbox One SDK.
2@rem Donated to the public domain.
3@rem
4@rem Open a "Visual Studio .NET Command Prompt" (64 bit host compiler)
5@rem Then cd to this directory and run this script.
6
7@if not defined INCLUDE goto :FAIL
8@if not defined DurangoXDK goto :FAIL
9
10@setlocal
11@echo ---- Host compiler ----
12@set LJCOMPILE=cl /nologo /c /MD /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE /DLUAJIT_ENABLE_GC64
13@set LJLINK=link /nologo
14@set LJMT=mt /nologo
15@set DASMDIR=..\dynasm
16@set DASM=%DASMDIR%\dynasm.lua
17@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c
18
19%LJCOMPILE% host\minilua.c
20@if errorlevel 1 goto :BAD
21%LJLINK% /out:minilua.exe minilua.obj
22@if errorlevel 1 goto :BAD
23if exist minilua.exe.manifest^
24 %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe
25
26@rem Error out for 64 bit host compiler
27@minilua
28@if not errorlevel 8 goto :FAIL
29
30@set DASMFLAGS=-D WIN -D FFI -D P64
31minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x64.dasc
32@if errorlevel 1 goto :BAD
33
34%LJCOMPILE% /I "." /I %DASMDIR% /D_DURANGO host\buildvm*.c
35@if errorlevel 1 goto :BAD
36%LJLINK% /out:buildvm.exe buildvm*.obj
37@if errorlevel 1 goto :BAD
38if exist buildvm.exe.manifest^
39 %LJMT% -manifest buildvm.exe.manifest -outputresource:buildvm.exe
40
41buildvm -m peobj -o lj_vm.obj
42@if errorlevel 1 goto :BAD
43buildvm -m bcdef -o lj_bcdef.h %ALL_LIB%
44@if errorlevel 1 goto :BAD
45buildvm -m ffdef -o lj_ffdef.h %ALL_LIB%
46@if errorlevel 1 goto :BAD
47buildvm -m libdef -o lj_libdef.h %ALL_LIB%
48@if errorlevel 1 goto :BAD
49buildvm -m recdef -o lj_recdef.h %ALL_LIB%
50@if errorlevel 1 goto :BAD
51buildvm -m vmdef -o jit\vmdef.lua %ALL_LIB%
52@if errorlevel 1 goto :BAD
53buildvm -m folddef -o lj_folddef.h lj_opt_fold.c
54@if errorlevel 1 goto :BAD
55
56@echo ---- Cross compiler ----
57
58@set CWD=%cd%
59@call "%DurangoXDK%\xdk\DurangoVars.cmd" XDK
60@cd /D "%CWD%"
61@shift
62
63@set LJCOMPILE="cl" /nologo /c /W3 /GF /Gm- /GR- /GS- /Gy /openmp- /D_CRT_SECURE_NO_DEPRECATE /D_LIB /D_UNICODE /D_DURANGO
64@set LJLIB="lib" /nologo
65
66@if "%1"=="debug" (
67 @shift
68 @set LJCOMPILE=%LJCOMPILE% /Zi /MDd /Od
69 @set LJLINK=%LJLINK% /debug
70) else (
71 @set LJCOMPILE=%LJCOMPILE% /MD /O2 /DNDEBUG
72)
73
74@if "%1"=="amalg" goto :AMALG
75%LJCOMPILE% /DLUA_BUILD_AS_DLL lj_*.c lib_*.c
76@if errorlevel 1 goto :BAD
77%LJLIB% /OUT:luajit.lib lj_*.obj lib_*.obj
78@if errorlevel 1 goto :BAD
79@goto :NOAMALG
80:AMALG
81%LJCOMPILE% /DLUA_BUILD_AS_DLL ljamalg.c
82@if errorlevel 1 goto :BAD
83%LJLIB% /OUT:luajit.lib ljamalg.obj lj_vm.obj
84@if errorlevel 1 goto :BAD
85:NOAMALG
86
87@del *.obj *.manifest minilua.exe buildvm.exe
88@echo.
89@echo === Successfully built LuaJIT for Xbox One ===
90
91@goto :END
92:BAD
93@echo.
94@echo *******************************************************
95@echo *** Build FAILED -- Please check the error messages ***
96@echo *******************************************************
97@goto :END
98:FAIL
99@echo To run this script you must open a "Visual Studio .NET Command Prompt"
100@echo (64 bit host compiler). The Xbox One SDK must be installed, too.
101:END